5 #include "packager/media/formats/webm/webm_cluster_parser.h"
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/webvtt_util.h"
14 #include "packager/media/formats/webm/webm_constants.h"
15 #include "packager/media/formats/webm/webm_crypto_helpers.h"
16 #include "packager/media/formats/webm/webm_webvtt_parser.h"
20 #define LIMITED_LOG(level, count, max) \
21 LOG_IF(level, (count) < (max)) \
22 << (((count) + 1 == (max)) \
23 ? "(Log limit reached. Further similar entries " \
24 "may be suppressed): " \
26 #define LIMITED_DLOG(level, count, max) \
27 DLOG_IF(level, (count) < (max)) \
28 << (((count) + 1 == (max)) \
29 ? "(Log limit reached. Further similar entries " \
30 "may be suppressed): " \
34 const int64_t kMicrosecondsPerMillisecond = 1000;
37 namespace edash_packager {
41 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
42 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
43 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
48 kMaxDurationErrorLogs = 10,
51 kMaxDurationEstimateLogs = 10,
54 WebMClusterParser::WebMClusterParser(
55 int64_t timecode_scale,
57 int64_t audio_default_duration,
59 int64_t video_default_duration,
60 const WebMTracksParser::TextTracks& text_tracks,
61 const std::set<int64_t>& ignored_tracks,
62 const std::string& audio_encryption_key_id,
63 const std::string& video_encryption_key_id,
64 const AudioCodec audio_codec,
66 : timecode_multiplier_(timecode_scale / 1000.0),
67 ignored_tracks_(ignored_tracks),
68 audio_encryption_key_id_(audio_encryption_key_id),
69 video_encryption_key_id_(video_encryption_key_id),
70 audio_codec_(audio_codec),
71 parser_(kWebMIdCluster, this),
72 cluster_start_time_(kNoTimestamp),
73 audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
74 video_(video_track_num, true, video_default_duration, new_sample_cb) {
75 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
76 it != text_tracks.end();
78 text_track_map_.insert(std::make_pair(
79 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
83 WebMClusterParser::~WebMClusterParser() {}
86 last_block_timecode_ = -1;
87 cluster_timecode_ = -1;
88 cluster_start_time_ = kNoTimestamp;
89 cluster_ended_ =
false;
98 audio_.ApplyDurationEstimateIfNeeded();
99 video_.ApplyDurationEstimateIfNeeded();
104 int result = parser_.
Parse(buf, size);
107 cluster_ended_ =
false;
112 if (cluster_ended_) {
115 if (cluster_start_time_ == kNoTimestamp) {
118 if (cluster_timecode_ < 0)
121 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
129 last_block_timecode_ = -1;
130 cluster_timecode_ = -1;
136 int64_t WebMClusterParser::TryGetEncodedAudioDuration(
146 if (audio_codec_ == kCodecOpus) {
147 return ReadOpusDuration(data, size);
156 int64_t WebMClusterParser::ReadOpusDuration(
const uint8_t* data,
int size) {
159 static const uint8_t kTocConfigMask = 0xf8;
160 static const uint8_t kTocFrameCountCodeMask = 0x03;
161 static const uint8_t kFrameCountMask = 0x3f;
162 static const int64_t kPacketDurationMaxMs = 120000;
165 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
166 <<
"Invalid zero-byte Opus packet; demuxed block duration may be "
172 int frame_count_type = data[0] & kTocFrameCountCodeMask;
175 switch (frame_count_type) {
186 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
187 <<
"Second byte missing from 'Code 3' Opus packet; demuxed block "
188 "duration may be imprecise.";
192 frame_count = data[1] & kFrameCountMask;
194 if (frame_count == 0) {
195 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
196 <<
"Illegal 'Code 3' Opus packet with frame count zero; demuxed "
197 "block duration may be imprecise.";
203 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
204 <<
"Unexpected Opus frame count type: " << frame_count_type <<
"; "
205 <<
"demuxed block duration may be imprecise.";
209 int opusConfig = (data[0] & kTocConfigMask) >> 3;
210 CHECK_GE(opusConfig, 0);
213 DCHECK_GT(frame_count, 0);
216 if (duration > kPacketDurationMaxMs * 1000) {
220 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
221 <<
"Warning, demuxed Opus packet with encoded duration: "
222 << duration / 1000 <<
"ms. Should be no greater than "
223 << kPacketDurationMaxMs <<
"ms.";
229 WebMParserClient* WebMClusterParser::OnListStart(
int id) {
230 if (
id == kWebMIdCluster) {
231 cluster_timecode_ = -1;
232 cluster_start_time_ = kNoTimestamp;
233 }
else if (
id == kWebMIdBlockGroup) {
235 block_data_size_ = -1;
236 block_duration_ = -1;
237 discard_padding_ = -1;
238 discard_padding_set_ =
false;
239 }
else if (
id == kWebMIdBlockAdditions) {
241 block_additional_data_.reset();
242 block_additional_data_size_ = 0;
248 bool WebMClusterParser::OnListEnd(
int id) {
249 if (
id != kWebMIdBlockGroup)
253 if (block_data_size_ == -1) {
254 LOG(ERROR) <<
"Block missing from BlockGroup.";
258 bool result = ParseBlock(
false, block_data_.get(), block_data_size_,
259 block_additional_data_.get(),
260 block_additional_data_size_, block_duration_,
261 discard_padding_set_ ? discard_padding_ : 0);
263 block_data_size_ = -1;
264 block_duration_ = -1;
266 block_additional_data_.reset();
267 block_additional_data_size_ = 0;
268 discard_padding_ = -1;
269 discard_padding_set_ =
false;
273 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
276 case kWebMIdTimecode:
277 dst = &cluster_timecode_;
279 case kWebMIdBlockDuration:
280 dst = &block_duration_;
282 case kWebMIdBlockAddID:
283 dst = &block_add_id_;
294 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
297 const uint8_t* additional,
300 int64_t discard_padding) {
306 if (!(buf[0] & 0x80)) {
307 LOG(ERROR) <<
"TrackNumber over 127 not supported";
311 int track_num = buf[0] & 0x7f;
312 int timecode = buf[1] << 8 | buf[2];
313 int flags = buf[3] & 0xff;
314 int lacing = (flags >> 1) & 0x3;
317 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
322 if (timecode & 0x8000)
325 const uint8_t* frame_data = buf + 4;
326 int frame_size = size - (frame_data - buf);
327 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
328 frame_data, frame_size, additional, additional_size,
332 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
334 case kWebMIdSimpleBlock:
335 return ParseBlock(
true, data, size, NULL, 0, -1, 0);
339 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
343 block_data_.reset(
new uint8_t[size]);
344 memcpy(block_data_.get(), data, size);
345 block_data_size_ = size;
348 case kWebMIdBlockAdditional: {
349 uint64_t block_add_id = base::HostToNet64(block_add_id_);
350 if (block_additional_data_) {
355 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
356 "BlockGroup is not supported.";
362 block_additional_data_size_ = size +
sizeof(block_add_id);
363 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
364 memcpy(block_additional_data_.get(), &block_add_id,
365 sizeof(block_add_id));
366 memcpy(block_additional_data_.get() + 8, data, size);
369 case kWebMIdDiscardPadding: {
370 if (discard_padding_set_ || size <= 0 || size > 8)
372 discard_padding_set_ =
true;
375 discard_padding_ =
static_cast<int8_t
>(data[0]);
376 for (
int i = 1; i < size; ++i)
377 discard_padding_ = (discard_padding_ << 8) | data[i];
386 bool WebMClusterParser::OnBlock(
bool is_simple_block,
393 const uint8_t* additional,
395 int64_t discard_padding) {
397 if (cluster_timecode_ == -1) {
398 LOG(ERROR) <<
"Got a block before cluster timecode.";
405 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
409 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
410 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
415 StreamType stream_type = kStreamAudio;
416 std::string encryption_key_id;
417 int64_t encoded_duration = kNoTimestamp;
418 if (track_num == audio_.track_num()) {
420 encryption_key_id = audio_encryption_key_id_;
421 if (encryption_key_id.empty()) {
422 encoded_duration = TryGetEncodedAudioDuration(data, size);
424 }
else if (track_num == video_.track_num()) {
426 encryption_key_id = video_encryption_key_id_;
427 stream_type = kStreamVideo;
428 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
430 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
433 if (block_duration < 0)
436 stream_type = kStreamText;
438 LOG(ERROR) <<
"Unexpected track number " << track_num;
442 last_block_timecode_ = timecode;
444 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
446 scoped_refptr<MediaSample> buffer;
447 if (stream_type != kStreamText) {
453 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
458 scoped_ptr<DecryptConfig> decrypt_config;
460 if (!encryption_key_id.empty() &&
461 !WebMCreateDecryptConfig(
463 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
464 encryption_key_id.size(),
465 &decrypt_config, &data_offset)) {
470 additional, additional_size, is_keyframe);
472 if (decrypt_config) {
474 buffer->set_is_encrypted(
true);
477 std::string id, settings, content;
480 std::vector<uint8_t> side_data;
481 MakeSideData(
id.begin(),
id.end(),
482 settings.begin(), settings.end(),
486 reinterpret_cast<const uint8_t*>(content.data()), content.length(),
487 &side_data[0], side_data.size(),
true);
490 buffer->set_pts(timestamp);
491 if (cluster_start_time_ == kNoTimestamp)
492 cluster_start_time_ = timestamp;
494 int64_t block_duration_time_delta = kNoTimestamp;
495 if (block_duration >= 0) {
496 block_duration_time_delta = block_duration * timecode_multiplier_;
508 if (encoded_duration != kNoTimestamp) {
509 DCHECK(encoded_duration != kInfiniteDuration);
510 DCHECK(encoded_duration > 0);
511 buffer->set_duration(encoded_duration);
513 DVLOG(3) << __FUNCTION__ <<
" : "
514 <<
"Using encoded duration " << encoded_duration;
516 if (block_duration_time_delta != kNoTimestamp) {
517 int64_t duration_difference =
518 block_duration_time_delta - encoded_duration;
520 const auto kWarnDurationDiff = timecode_multiplier_ * 2;
521 if (duration_difference > kWarnDurationDiff) {
522 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
523 <<
"BlockDuration (" << block_duration_time_delta / 1000
524 <<
"ms) differs significantly from encoded duration ("
525 << encoded_duration / 1000 <<
"ms).";
528 }
else if (block_duration_time_delta != kNoTimestamp) {
529 buffer->set_duration(block_duration_time_delta);
531 buffer->set_duration(track->default_duration());
534 return track->EmitBuffer(buffer);
537 WebMClusterParser::Track::Track(
int track_num,
539 int64_t default_duration,
541 : track_num_(track_num),
543 default_duration_(default_duration),
544 estimated_next_frame_duration_(kNoTimestamp),
545 new_sample_cb_(new_sample_cb) {
546 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
549 WebMClusterParser::Track::~Track() {}
551 bool WebMClusterParser::Track::EmitBuffer(
552 const scoped_refptr<MediaSample>& buffer) {
553 DVLOG(2) <<
"EmitBuffer() : " << track_num_
554 <<
" ts " << buffer->pts()
555 <<
" dur " << buffer->duration()
556 <<
" kf " << buffer->is_key_frame()
557 <<
" size " << buffer->data_size();
559 if (last_added_buffer_missing_duration_.get()) {
560 int64_t derived_duration =
561 buffer->pts() - last_added_buffer_missing_duration_->pts();
562 last_added_buffer_missing_duration_->set_duration(derived_duration);
564 DVLOG(2) <<
"EmitBuffer() : applied derived duration to held-back buffer : "
566 << last_added_buffer_missing_duration_->pts()
568 << last_added_buffer_missing_duration_->duration()
569 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
570 <<
" size " << last_added_buffer_missing_duration_->data_size();
571 scoped_refptr<MediaSample> updated_buffer =
572 last_added_buffer_missing_duration_;
573 last_added_buffer_missing_duration_ = NULL;
574 if (!EmitBufferHelp(updated_buffer))
578 if (buffer->duration() == kNoTimestamp) {
579 last_added_buffer_missing_duration_ = buffer;
580 DVLOG(2) <<
"EmitBuffer() : holding back buffer that is missing duration";
584 return EmitBufferHelp(buffer);
587 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
588 if (!last_added_buffer_missing_duration_.get())
591 int64_t estimated_duration = GetDurationEstimate();
592 last_added_buffer_missing_duration_->set_duration(estimated_duration);
594 LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
595 <<
"Estimating WebM block duration to be " << estimated_duration / 1000
596 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
597 "BlockGroups with BlockDurations at the end of each Track in a "
598 "Cluster to avoid estimation.";
600 DVLOG(2) << __FUNCTION__ <<
" new dur : ts "
601 << last_added_buffer_missing_duration_->pts()
603 << last_added_buffer_missing_duration_->duration()
604 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
605 <<
" size " << last_added_buffer_missing_duration_->data_size();
609 new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
610 last_added_buffer_missing_duration_ = NULL;
613 void WebMClusterParser::Track::Reset() {
614 last_added_buffer_missing_duration_ = NULL;
617 bool WebMClusterParser::Track::IsKeyframe(
const uint8_t* data,
int size)
const {
629 if ((data[0] & 0x01) != 0)
634 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
640 bool WebMClusterParser::Track::EmitBufferHelp(
641 const scoped_refptr<MediaSample>& buffer) {
642 DCHECK(!last_added_buffer_missing_duration_.get());
644 int64_t duration = buffer->duration();
645 if (duration < 0 || duration == kNoTimestamp) {
646 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
653 int64_t orig_duration_estimate = estimated_next_frame_duration_;
654 if (estimated_next_frame_duration_ == kNoTimestamp) {
655 estimated_next_frame_duration_ = duration;
657 estimated_next_frame_duration_ =
658 std::max(duration, estimated_next_frame_duration_);
661 if (orig_duration_estimate != estimated_next_frame_duration_) {
662 DVLOG(3) <<
"Updated duration estimate:"
663 << orig_duration_estimate
665 << estimated_next_frame_duration_
671 new_sample_cb_.Run(track_num_, buffer);
675 int64_t WebMClusterParser::Track::GetDurationEstimate() {
676 int64_t duration = estimated_next_frame_duration_;
677 if (duration != kNoTimestamp) {
678 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration";
680 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration";
682 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
684 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
688 DCHECK(duration > 0);
689 DCHECK(duration != kNoTimestamp);
693 void WebMClusterParser::ResetTextTracks() {
694 for (TextTrackMap::iterator it = text_track_map_.begin();
695 it != text_track_map_.end();
701 WebMClusterParser::Track*
702 WebMClusterParser::FindTextTrack(
int track_num) {
703 const TextTrackMap::iterator it = text_track_map_.find(track_num);
705 if (it == text_track_map_.end())