5 #include "packager/media/formats/webm/webm_cluster_parser.h"
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/webvtt_util.h"
14 #include "packager/media/formats/webm/webm_constants.h"
15 #include "packager/media/formats/webm/webm_crypto_helpers.h"
16 #include "packager/media/formats/webm/webm_webvtt_parser.h"
20 #define LIMITED_LOG(level, count, max) \
21 LOG_IF(level, (count) < (max)) \
22 << (((count) + 1 == (max)) \
23 ? "(Log limit reached. Further similar entries " \
24 "may be suppressed): " \
26 #define LIMITED_DLOG(level, count, max) \
27 DLOG_IF(level, (count) < (max)) \
28 << (((count) + 1 == (max)) \
29 ? "(Log limit reached. Further similar entries " \
30 "may be suppressed): " \
34 const int64_t kMicrosecondsPerMillisecond = 1000;
37 namespace edash_packager {
40 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
41 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
42 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
43 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
48 kMaxDurationErrorLogs = 10,
51 kMaxDurationEstimateLogs = 10,
54 WebMClusterParser::WebMClusterParser(
55 int64_t timecode_scale,
57 int64_t audio_default_duration,
59 int64_t video_default_duration,
60 const WebMTracksParser::TextTracks& text_tracks,
61 const std::set<int64_t>& ignored_tracks,
62 const std::string& audio_encryption_key_id,
63 const std::string& video_encryption_key_id,
64 const AudioCodec audio_codec,
66 : timecode_multiplier_(timecode_scale / 1000.0),
67 ignored_tracks_(ignored_tracks),
68 audio_encryption_key_id_(audio_encryption_key_id),
69 video_encryption_key_id_(video_encryption_key_id),
70 audio_codec_(audio_codec),
71 parser_(kWebMIdCluster, this),
72 cluster_start_time_(kNoTimestamp),
73 audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
74 video_(video_track_num, true, video_default_duration, new_sample_cb) {
75 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
76 it != text_tracks.end();
78 text_track_map_.insert(std::make_pair(
79 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
83 WebMClusterParser::~WebMClusterParser() {}
85 void WebMClusterParser::Reset() {
86 last_block_timecode_ = -1;
87 cluster_timecode_ = -1;
88 cluster_start_time_ = kNoTimestamp;
89 cluster_ended_ =
false;
96 int WebMClusterParser::Parse(
const uint8_t* buf,
int size) {
97 int result = parser_.Parse(buf, size);
100 cluster_ended_ =
false;
104 cluster_ended_ = parser_.IsParsingComplete();
105 if (cluster_ended_) {
106 audio_.ApplyDurationEstimateIfNeeded();
107 video_.ApplyDurationEstimateIfNeeded();
111 if (cluster_start_time_ == kNoTimestamp) {
114 if (cluster_timecode_ < 0)
117 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
125 last_block_timecode_ = -1;
126 cluster_timecode_ = -1;
132 int64_t WebMClusterParser::TryGetEncodedAudioDuration(
142 if (audio_codec_ == kCodecOpus) {
143 return ReadOpusDuration(data, size);
152 int64_t WebMClusterParser::ReadOpusDuration(
const uint8_t* data,
int size) {
155 static const uint8_t kTocConfigMask = 0xf8;
156 static const uint8_t kTocFrameCountCodeMask = 0x03;
157 static const uint8_t kFrameCountMask = 0x3f;
158 static const int64_t kPacketDurationMax = 120;
161 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
162 <<
"Invalid zero-byte Opus packet; demuxed block duration may be "
168 int frame_count_type = data[0] & kTocFrameCountCodeMask;
171 switch (frame_count_type) {
182 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
183 <<
"Second byte missing from 'Code 3' Opus packet; demuxed block "
184 "duration may be imprecise.";
188 frame_count = data[1] & kFrameCountMask;
190 if (frame_count == 0) {
191 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
192 <<
"Illegal 'Code 3' Opus packet with frame count zero; demuxed "
193 "block duration may be imprecise.";
199 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
200 <<
"Unexpected Opus frame count type: " << frame_count_type <<
"; "
201 <<
"demuxed block duration may be imprecise.";
205 int opusConfig = (data[0] & kTocConfigMask) >> 3;
206 CHECK_GE(opusConfig, 0);
207 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
209 DCHECK_GT(frame_count, 0);
210 int64_t duration = kOpusFrameDurationsMu[opusConfig] * frame_count;
212 if (duration > kPacketDurationMax) {
216 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
217 <<
"Warning, demuxed Opus packet with encoded duration: "
218 << duration <<
"ms. Should be no greater than "
219 << kPacketDurationMax <<
"ms.";
225 WebMParserClient* WebMClusterParser::OnListStart(
int id) {
226 if (
id == kWebMIdCluster) {
227 cluster_timecode_ = -1;
228 cluster_start_time_ = kNoTimestamp;
229 }
else if (
id == kWebMIdBlockGroup) {
231 block_data_size_ = -1;
232 block_duration_ = -1;
233 discard_padding_ = -1;
234 discard_padding_set_ =
false;
235 }
else if (
id == kWebMIdBlockAdditions) {
237 block_additional_data_.reset();
238 block_additional_data_size_ = 0;
244 bool WebMClusterParser::OnListEnd(
int id) {
245 if (
id != kWebMIdBlockGroup)
249 if (block_data_size_ == -1) {
250 LOG(ERROR) <<
"Block missing from BlockGroup.";
254 bool result = ParseBlock(
false, block_data_.get(), block_data_size_,
255 block_additional_data_.get(),
256 block_additional_data_size_, block_duration_,
257 discard_padding_set_ ? discard_padding_ : 0);
259 block_data_size_ = -1;
260 block_duration_ = -1;
262 block_additional_data_.reset();
263 block_additional_data_size_ = 0;
264 discard_padding_ = -1;
265 discard_padding_set_ =
false;
269 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
272 case kWebMIdTimecode:
273 dst = &cluster_timecode_;
275 case kWebMIdBlockDuration:
276 dst = &block_duration_;
278 case kWebMIdBlockAddID:
279 dst = &block_add_id_;
290 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
293 const uint8_t* additional,
296 int64_t discard_padding) {
302 if (!(buf[0] & 0x80)) {
303 LOG(ERROR) <<
"TrackNumber over 127 not supported";
307 int track_num = buf[0] & 0x7f;
308 int timecode = buf[1] << 8 | buf[2];
309 int flags = buf[3] & 0xff;
310 int lacing = (flags >> 1) & 0x3;
313 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
318 if (timecode & 0x8000)
321 const uint8_t* frame_data = buf + 4;
322 int frame_size = size - (frame_data - buf);
323 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
324 frame_data, frame_size, additional, additional_size,
328 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
330 case kWebMIdSimpleBlock:
331 return ParseBlock(
true, data, size, NULL, 0, -1, 0);
335 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
339 block_data_.reset(
new uint8_t[size]);
340 memcpy(block_data_.get(), data, size);
341 block_data_size_ = size;
344 case kWebMIdBlockAdditional: {
345 uint64_t block_add_id = base::HostToNet64(block_add_id_);
346 if (block_additional_data_) {
351 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
352 "BlockGroup is not supported.";
358 block_additional_data_size_ = size +
sizeof(block_add_id);
359 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
360 memcpy(block_additional_data_.get(), &block_add_id,
361 sizeof(block_add_id));
362 memcpy(block_additional_data_.get() + 8, data, size);
365 case kWebMIdDiscardPadding: {
366 if (discard_padding_set_ || size <= 0 || size > 8)
368 discard_padding_set_ =
true;
371 discard_padding_ =
static_cast<int8_t
>(data[0]);
372 for (
int i = 1; i < size; ++i)
373 discard_padding_ = (discard_padding_ << 8) | data[i];
382 bool WebMClusterParser::OnBlock(
bool is_simple_block,
389 const uint8_t* additional,
391 int64_t discard_padding) {
393 if (cluster_timecode_ == -1) {
394 LOG(ERROR) <<
"Got a block before cluster timecode.";
401 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
405 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
406 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
411 StreamType stream_type = kStreamAudio;
412 std::string encryption_key_id;
413 int64_t encoded_duration = kNoTimestamp;
414 if (track_num == audio_.track_num()) {
416 encryption_key_id = audio_encryption_key_id_;
417 if (encryption_key_id.empty()) {
418 encoded_duration = TryGetEncodedAudioDuration(data, size);
420 }
else if (track_num == video_.track_num()) {
422 encryption_key_id = video_encryption_key_id_;
423 stream_type = kStreamVideo;
424 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
426 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
429 if (block_duration < 0)
432 stream_type = kStreamText;
434 LOG(ERROR) <<
"Unexpected track number " << track_num;
438 last_block_timecode_ = timecode;
440 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
442 scoped_refptr<MediaSample> buffer;
443 if (stream_type != kStreamText) {
449 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
454 scoped_ptr<DecryptConfig> decrypt_config;
456 if (!encryption_key_id.empty() &&
457 !WebMCreateDecryptConfig(
459 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
460 encryption_key_id.size(),
461 &decrypt_config, &data_offset)) {
466 additional, additional_size, is_keyframe);
468 if (decrypt_config) {
470 buffer->set_is_encrypted(
true);
473 std::string id, settings, content;
474 WebMWebVTTParser::Parse(data, size, &
id, &settings, &content);
476 std::vector<uint8_t> side_data;
477 MakeSideData(
id.begin(),
id.end(),
478 settings.begin(), settings.end(),
482 reinterpret_cast<const uint8_t*>(content.data()), content.length(),
483 &side_data[0], side_data.size(),
true);
486 buffer->set_pts(timestamp);
487 if (cluster_start_time_ == kNoTimestamp)
488 cluster_start_time_ = timestamp;
490 int64_t block_duration_time_delta = kNoTimestamp;
491 if (block_duration >= 0) {
492 block_duration_time_delta = block_duration * timecode_multiplier_;
506 if (encoded_duration != kNoTimestamp) {
507 DCHECK(encoded_duration != kInfiniteDuration);
508 DCHECK(encoded_duration > 0);
509 buffer->set_duration(encoded_duration);
511 DVLOG(3) << __FUNCTION__ <<
" : "
512 <<
"Using encoded duration " << encoded_duration;
514 if (block_duration_time_delta != kNoTimestamp) {
515 int64_t duration_difference =
516 block_duration_time_delta - encoded_duration;
518 const auto kWarnDurationDiff = timecode_multiplier_ * 2;
519 if (duration_difference > kWarnDurationDiff) {
520 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
521 <<
"BlockDuration (" << block_duration_time_delta
522 <<
"ms) differs significantly from encoded duration ("
523 << encoded_duration <<
"ms).";
526 }
else if (block_duration_time_delta != kNoTimestamp) {
527 buffer->set_duration(block_duration_time_delta);
529 buffer->set_duration(track->default_duration());
532 return track->AddBuffer(buffer);
535 WebMClusterParser::Track::Track(
int track_num,
537 int64_t default_duration,
539 : track_num_(track_num),
541 default_duration_(default_duration),
542 estimated_next_frame_duration_(kNoTimestamp),
543 new_sample_cb_(new_sample_cb) {
544 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
547 WebMClusterParser::Track::~Track() {}
549 bool WebMClusterParser::Track::AddBuffer(
550 const scoped_refptr<MediaSample>& buffer) {
551 DVLOG(2) <<
"AddBuffer() : " << track_num_
552 <<
" ts " << buffer->pts()
553 <<
" dur " << buffer->duration()
554 <<
" kf " << buffer->is_key_frame()
555 <<
" size " << buffer->data_size();
557 if (last_added_buffer_missing_duration_.get()) {
558 int64_t derived_duration =
559 buffer->pts() - last_added_buffer_missing_duration_->pts();
560 last_added_buffer_missing_duration_->set_duration(derived_duration);
562 DVLOG(2) <<
"AddBuffer() : applied derived duration to held-back buffer : "
564 << last_added_buffer_missing_duration_->pts()
566 << last_added_buffer_missing_duration_->duration()
567 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
568 <<
" size " << last_added_buffer_missing_duration_->data_size();
569 scoped_refptr<MediaSample> updated_buffer =
570 last_added_buffer_missing_duration_;
571 last_added_buffer_missing_duration_ = NULL;
572 if (!QueueBuffer(updated_buffer))
576 if (buffer->duration() == kNoTimestamp) {
577 last_added_buffer_missing_duration_ = buffer;
578 DVLOG(2) <<
"AddBuffer() : holding back buffer that is missing duration";
582 return QueueBuffer(buffer);
585 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
586 if (!last_added_buffer_missing_duration_.get())
589 int64_t estimated_duration = GetDurationEstimate();
590 last_added_buffer_missing_duration_->set_duration(estimated_duration);
599 LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
600 <<
"Estimating WebM block duration to be "
601 << estimated_duration
602 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
603 "BlockGroups with BlockDurations at the end of each Track in a "
604 "Cluster to avoid estimation.";
606 DVLOG(2) << __FUNCTION__ <<
" new dur : ts "
607 << last_added_buffer_missing_duration_->pts()
609 << last_added_buffer_missing_duration_->duration()
610 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
611 <<
" size " << last_added_buffer_missing_duration_->data_size();
615 new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
616 last_added_buffer_missing_duration_ = NULL;
619 void WebMClusterParser::Track::Reset() {
620 last_added_buffer_missing_duration_ = NULL;
623 bool WebMClusterParser::Track::IsKeyframe(
const uint8_t* data,
int size)
const {
635 if ((data[0] & 0x01) != 0)
640 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
646 bool WebMClusterParser::Track::QueueBuffer(
647 const scoped_refptr<MediaSample>& buffer) {
648 DCHECK(!last_added_buffer_missing_duration_.get());
650 int64_t duration = buffer->duration();
651 if (duration < 0 || duration == kNoTimestamp) {
652 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
666 int64_t orig_duration_estimate = estimated_next_frame_duration_;
667 if (estimated_next_frame_duration_ == kNoTimestamp) {
668 estimated_next_frame_duration_ = duration;
669 }
else if (is_video_) {
670 estimated_next_frame_duration_ =
671 std::max(duration, estimated_next_frame_duration_);
673 estimated_next_frame_duration_ =
674 std::min(duration, estimated_next_frame_duration_);
677 if (orig_duration_estimate != estimated_next_frame_duration_) {
678 DVLOG(3) <<
"Updated duration estimate:"
679 << orig_duration_estimate
681 << estimated_next_frame_duration_
687 new_sample_cb_.Run(track_num_, buffer);
691 int64_t WebMClusterParser::Track::GetDurationEstimate() {
692 int64_t duration = estimated_next_frame_duration_;
693 if (duration != kNoTimestamp) {
694 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration";
696 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration";
698 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
700 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
704 DCHECK(duration > 0);
705 DCHECK(duration != kNoTimestamp);
709 void WebMClusterParser::ResetTextTracks() {
710 for (TextTrackMap::iterator it = text_track_map_.begin();
711 it != text_track_map_.end();
717 WebMClusterParser::Track*
718 WebMClusterParser::FindTextTrack(
int track_num) {
719 const TextTrackMap::iterator it = text_track_map_.find(track_num);
721 if (it == text_track_map_.end())