5 #include "packager/media/formats/webm/webm_cluster_parser.h"
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/vp8_parser.h"
14 #include "packager/media/filters/vp9_parser.h"
15 #include "packager/media/filters/webvtt_util.h"
16 #include "packager/media/formats/webm/webm_constants.h"
17 #include "packager/media/formats/webm/webm_crypto_helpers.h"
18 #include "packager/media/formats/webm/webm_webvtt_parser.h"
20 namespace edash_packager {
24 const int64_t kMicrosecondsPerMillisecond = 1000;
30 bool IsKeyframe(
bool is_video,
45 NOTIMPLEMENTED() <<
"Unsupported codec " << codec;
53 int64_t timecode_scale,
54 scoped_refptr<AudioStreamInfo> audio_stream_info,
55 scoped_refptr<VideoStreamInfo> video_stream_info,
56 int64_t audio_default_duration,
57 int64_t video_default_duration,
58 const WebMTracksParser::TextTracks& text_tracks,
59 const std::set<int64_t>& ignored_tracks,
60 const std::string& audio_encryption_key_id,
61 const std::string& video_encryption_key_id,
65 : timecode_multiplier_(timecode_scale / 1000.0),
66 audio_stream_info_(audio_stream_info),
67 video_stream_info_(video_stream_info),
68 ignored_tracks_(ignored_tracks),
69 audio_encryption_key_id_(audio_encryption_key_id),
70 video_encryption_key_id_(video_encryption_key_id),
71 parser_(kWebMIdCluster, this),
74 cluster_start_time_(kNoTimestamp),
75 audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
77 audio_default_duration,
79 video_(video_stream_info ? video_stream_info->track_id() : -1,
81 video_default_duration,
83 if (decryption_key_source)
85 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
86 it != text_tracks.end();
88 text_track_map_.insert(std::make_pair(
89 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
93 WebMClusterParser::~WebMClusterParser() {}
96 last_block_timecode_ = -1;
97 cluster_timecode_ = -1;
98 cluster_start_time_ = kNoTimestamp;
99 cluster_ended_ =
false;
108 audio_.ApplyDurationEstimateIfNeeded();
109 video_.ApplyDurationEstimateIfNeeded();
114 int result = parser_.
Parse(buf, size);
117 cluster_ended_ =
false;
122 if (cluster_ended_) {
125 if (cluster_start_time_ == kNoTimestamp) {
128 if (cluster_timecode_ < 0)
131 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
139 last_block_timecode_ = -1;
140 cluster_timecode_ = -1;
147 if (
id == kWebMIdCluster) {
148 cluster_timecode_ = -1;
149 cluster_start_time_ = kNoTimestamp;
150 }
else if (
id == kWebMIdBlockGroup) {
152 block_data_size_ = -1;
153 block_duration_ = -1;
154 discard_padding_ = -1;
155 discard_padding_set_ =
false;
156 }
else if (
id == kWebMIdBlockAdditions) {
158 block_additional_data_.reset();
159 block_additional_data_size_ = 0;
165 bool WebMClusterParser::OnListEnd(
int id) {
166 if (
id != kWebMIdBlockGroup)
170 if (block_data_size_ == -1) {
171 LOG(ERROR) <<
"Block missing from BlockGroup.";
175 bool result = ParseBlock(
false, block_data_.get(), block_data_size_,
176 block_additional_data_.get(),
177 block_additional_data_size_, block_duration_,
178 discard_padding_set_ ? discard_padding_ : 0);
180 block_data_size_ = -1;
181 block_duration_ = -1;
183 block_additional_data_.reset();
184 block_additional_data_size_ = 0;
185 discard_padding_ = -1;
186 discard_padding_set_ =
false;
190 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
193 case kWebMIdTimecode:
194 dst = &cluster_timecode_;
196 case kWebMIdBlockDuration:
197 dst = &block_duration_;
199 case kWebMIdBlockAddID:
200 dst = &block_add_id_;
211 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
214 const uint8_t* additional,
217 int64_t discard_padding) {
223 if (!(buf[0] & 0x80)) {
224 LOG(ERROR) <<
"TrackNumber over 127 not supported";
228 int track_num = buf[0] & 0x7f;
229 int timecode = buf[1] << 8 | buf[2];
230 int flags = buf[3] & 0xff;
231 int lacing = (flags >> 1) & 0x3;
234 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
239 if (timecode & 0x8000)
242 const uint8_t* frame_data = buf + 4;
243 int frame_size = size - (frame_data - buf);
244 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
245 frame_data, frame_size, additional, additional_size,
249 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
251 case kWebMIdSimpleBlock:
252 return ParseBlock(
true, data, size, NULL, 0, -1, 0);
256 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
260 block_data_.reset(
new uint8_t[size]);
261 memcpy(block_data_.get(), data, size);
262 block_data_size_ = size;
265 case kWebMIdBlockAdditional: {
266 uint64_t block_add_id = base::HostToNet64(block_add_id_);
267 if (block_additional_data_) {
272 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
273 "BlockGroup is not supported.";
279 block_additional_data_size_ = size +
sizeof(block_add_id);
280 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
281 memcpy(block_additional_data_.get(), &block_add_id,
282 sizeof(block_add_id));
283 memcpy(block_additional_data_.get() + 8, data, size);
286 case kWebMIdDiscardPadding: {
287 if (discard_padding_set_ || size <= 0 || size > 8)
289 discard_padding_set_ =
true;
292 discard_padding_ =
static_cast<int8_t
>(data[0]);
293 for (
int i = 1; i < size; ++i)
294 discard_padding_ = (discard_padding_ << 8) | data[i];
303 bool WebMClusterParser::OnBlock(
bool is_simple_block,
310 const uint8_t* additional,
312 int64_t discard_padding) {
314 if (cluster_timecode_ == -1) {
315 LOG(ERROR) <<
"Got a block before cluster timecode.";
322 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
326 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
327 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
332 StreamType stream_type = kStreamUnknown;
333 std::string encryption_key_id;
334 if (track_num == audio_.track_num()) {
336 encryption_key_id = audio_encryption_key_id_;
337 stream_type = kStreamAudio;
338 }
else if (track_num == video_.track_num()) {
340 encryption_key_id = video_encryption_key_id_;
341 stream_type = kStreamVideo;
342 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
344 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
347 if (block_duration < 0)
350 stream_type = kStreamText;
352 LOG(ERROR) <<
"Unexpected track number " << track_num;
355 DCHECK_NE(stream_type, kStreamUnknown);
357 last_block_timecode_ = timecode;
359 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
361 scoped_refptr<MediaSample> buffer;
362 if (stream_type != kStreamText) {
369 ? (flags & 0x80) != 0
370 : IsKeyframe(stream_type == kStreamVideo,
371 video_stream_info_ ? video_stream_info_->codec()
372 : kUnknownVideoCodec,
378 scoped_ptr<DecryptConfig> decrypt_config;
380 if (!encryption_key_id.empty() &&
381 !WebMCreateDecryptConfig(
383 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
384 encryption_key_id.size(),
385 &decrypt_config, &data_offset)) {
390 additional, additional_size, is_keyframe);
393 if (decrypt_config && !decrypt_config->iv().empty()) {
394 if (!decryptor_source_) {
395 LOG(ERROR) <<
"Encrypted media sample encountered, but decryption is "
399 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
400 buffer->writable_data(),
401 buffer->data_size())) {
402 LOG(ERROR) <<
"Cannot decrypt samples";
407 std::string id, settings, content;
410 std::vector<uint8_t> side_data;
411 MakeSideData(
id.begin(),
id.end(),
412 settings.begin(), settings.end(),
416 reinterpret_cast<const uint8_t*>(content.data()), content.length(),
417 &side_data[0], side_data.size(),
true);
420 buffer->set_dts(timestamp);
421 buffer->set_pts(timestamp);
422 if (cluster_start_time_ == kNoTimestamp)
423 cluster_start_time_ = timestamp;
424 buffer->set_duration(block_duration > 0
425 ? (block_duration * timecode_multiplier_)
428 if (!init_cb_.is_null() && !initialized_) {
429 std::vector<scoped_refptr<StreamInfo>> streams;
430 if (audio_stream_info_)
431 streams.push_back(audio_stream_info_);
432 if (video_stream_info_) {
433 if (stream_type == kStreamVideo) {
434 scoped_ptr<VPxParser> vpx_parser;
435 switch (video_stream_info_->codec()) {
437 vpx_parser.reset(
new VP8Parser);
440 vpx_parser.reset(
new VP9Parser);
443 NOTIMPLEMENTED() <<
"Unsupported codec "
444 << video_stream_info_->codec();
447 std::vector<VPxFrameInfo> vpx_frames;
448 if (!vpx_parser->Parse(buffer->data(), buffer->data_size(),
450 LOG(ERROR) <<
"Failed to parse vpx frame.";
453 if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
454 LOG(ERROR) <<
"The first frame should be a key frame.";
458 const VPCodecConfiguration* codec_config = &vpx_parser->codec_config();
459 video_stream_info_->set_codec_string(
460 codec_config->GetCodecString(video_stream_info_->codec()));
461 std::vector<uint8_t> extra_data;
462 codec_config->Write(&extra_data);
463 video_stream_info_->set_extra_data(extra_data);
464 streams.push_back(video_stream_info_);
465 init_cb_.Run(streams);
469 init_cb_.Run(streams);
474 return track->EmitBuffer(buffer);
477 WebMClusterParser::Track::Track(
int track_num,
479 int64_t default_duration,
481 : track_num_(track_num),
483 default_duration_(default_duration),
484 estimated_next_frame_duration_(kNoTimestamp),
485 new_sample_cb_(new_sample_cb) {
486 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
489 WebMClusterParser::Track::~Track() {}
491 bool WebMClusterParser::Track::EmitBuffer(
492 const scoped_refptr<MediaSample>& buffer) {
493 DVLOG(2) <<
"EmitBuffer() : " << track_num_
494 <<
" ts " << buffer->pts()
495 <<
" dur " << buffer->duration()
496 <<
" kf " << buffer->is_key_frame()
497 <<
" size " << buffer->data_size();
499 if (last_added_buffer_missing_duration_.get()) {
500 int64_t derived_duration =
501 buffer->pts() - last_added_buffer_missing_duration_->pts();
502 last_added_buffer_missing_duration_->set_duration(derived_duration);
504 DVLOG(2) <<
"EmitBuffer() : applied derived duration to held-back buffer : "
506 << last_added_buffer_missing_duration_->pts()
508 << last_added_buffer_missing_duration_->duration()
509 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
510 <<
" size " << last_added_buffer_missing_duration_->data_size();
511 scoped_refptr<MediaSample> updated_buffer =
512 last_added_buffer_missing_duration_;
513 last_added_buffer_missing_duration_ = NULL;
514 if (!EmitBufferHelp(updated_buffer))
518 if (buffer->duration() == kNoTimestamp) {
519 last_added_buffer_missing_duration_ = buffer;
520 DVLOG(2) <<
"EmitBuffer() : holding back buffer that is missing duration";
524 return EmitBufferHelp(buffer);
527 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
528 if (!last_added_buffer_missing_duration_.get())
531 int64_t estimated_duration = GetDurationEstimate();
532 last_added_buffer_missing_duration_->set_duration(estimated_duration);
534 VLOG(1) <<
"Track " << track_num_ <<
": Estimating WebM block duration to be "
535 << estimated_duration / 1000
536 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
537 "BlockGroups with BlockDurations at the end of each Track in a "
538 "Cluster to avoid estimation.";
540 DVLOG(2) <<
" new dur : ts " << last_added_buffer_missing_duration_->pts()
541 <<
" dur " << last_added_buffer_missing_duration_->duration()
542 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
543 <<
" size " << last_added_buffer_missing_duration_->data_size();
547 new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
548 last_added_buffer_missing_duration_ = NULL;
551 void WebMClusterParser::Track::Reset() {
552 last_added_buffer_missing_duration_ = NULL;
555 bool WebMClusterParser::Track::EmitBufferHelp(
556 const scoped_refptr<MediaSample>& buffer) {
557 DCHECK(!last_added_buffer_missing_duration_.get());
559 int64_t duration = buffer->duration();
560 if (duration < 0 || duration == kNoTimestamp) {
561 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
568 int64_t orig_duration_estimate = estimated_next_frame_duration_;
569 if (estimated_next_frame_duration_ == kNoTimestamp) {
570 estimated_next_frame_duration_ = duration;
572 estimated_next_frame_duration_ =
573 std::max(duration, estimated_next_frame_duration_);
576 if (orig_duration_estimate != estimated_next_frame_duration_) {
577 DVLOG(3) <<
"Updated duration estimate:"
578 << orig_duration_estimate
580 << estimated_next_frame_duration_
586 new_sample_cb_.Run(track_num_, buffer);
590 int64_t WebMClusterParser::Track::GetDurationEstimate() {
591 int64_t duration = kNoTimestamp;
592 if (default_duration_ != kNoTimestamp) {
593 duration = default_duration_;
594 DVLOG(3) << __FUNCTION__ <<
" : using track default duration " << duration;
595 }
else if (estimated_next_frame_duration_ != kNoTimestamp) {
596 duration = estimated_next_frame_duration_;
597 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration " << duration;
600 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
602 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
604 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration "
608 DCHECK_GT(duration, 0);
609 DCHECK_NE(duration, kNoTimestamp);
613 void WebMClusterParser::ResetTextTracks() {
614 for (TextTrackMap::iterator it = text_track_map_.begin();
615 it != text_track_map_.end();
621 WebMClusterParser::Track*
622 WebMClusterParser::FindTextTrack(
int track_num) {
623 const TextTrackMap::iterator it = text_track_map_.find(track_num);
625 if (it == text_track_map_.end())