5 #include "packager/media/formats/webm/webm_cluster_parser.h"
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/vp8_parser.h"
14 #include "packager/media/filters/vp9_parser.h"
15 #include "packager/media/filters/webvtt_util.h"
16 #include "packager/media/formats/webm/webm_constants.h"
17 #include "packager/media/formats/webm/webm_crypto_helpers.h"
18 #include "packager/media/formats/webm/webm_webvtt_parser.h"
22 #define LIMITED_LOG(level, count, max) \
23 LOG_IF(level, (count) < (max)) \
24 << (((count) + 1 == (max)) \
25 ? "(Log limit reached. Further similar entries " \
26 "may be suppressed): " \
28 #define LIMITED_DLOG(level, count, max) \
29 DLOG_IF(level, (count) < (max)) \
30 << (((count) + 1 == (max)) \
31 ? "(Log limit reached. Further similar entries " \
32 "may be suppressed): " \
35 namespace edash_packager {
39 const int64_t kMicrosecondsPerMillisecond = 1000;
44 kMaxDurationErrorLogs = 10,
47 kMaxDurationEstimateLogs = 10,
54 bool IsKeyframe(
bool is_video,
69 NOTIMPLEMENTED() <<
"Unsupported codec " << codec;
77 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
78 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
79 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
81 WebMClusterParser::WebMClusterParser(
82 int64_t timecode_scale,
83 scoped_refptr<AudioStreamInfo> audio_stream_info,
84 scoped_refptr<VideoStreamInfo> video_stream_info,
85 int64_t audio_default_duration,
86 int64_t video_default_duration,
87 const WebMTracksParser::TextTracks& text_tracks,
88 const std::set<int64_t>& ignored_tracks,
89 const std::string& audio_encryption_key_id,
90 const std::string& video_encryption_key_id,
93 : timecode_multiplier_(timecode_scale / 1000.0),
94 audio_stream_info_(audio_stream_info),
95 video_stream_info_(video_stream_info),
96 ignored_tracks_(ignored_tracks),
97 audio_encryption_key_id_(audio_encryption_key_id),
98 video_encryption_key_id_(video_encryption_key_id),
99 parser_(kWebMIdCluster, this),
102 cluster_start_time_(kNoTimestamp),
103 audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
105 audio_default_duration,
107 video_(video_stream_info ? video_stream_info->track_id() : -1,
109 video_default_duration,
111 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
112 it != text_tracks.end();
114 text_track_map_.insert(std::make_pair(
115 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
119 WebMClusterParser::~WebMClusterParser() {}
122 last_block_timecode_ = -1;
123 cluster_timecode_ = -1;
124 cluster_start_time_ = kNoTimestamp;
125 cluster_ended_ =
false;
134 audio_.ApplyDurationEstimateIfNeeded();
135 video_.ApplyDurationEstimateIfNeeded();
140 int result = parser_.
Parse(buf, size);
143 cluster_ended_ =
false;
148 if (cluster_ended_) {
151 if (cluster_start_time_ == kNoTimestamp) {
154 if (cluster_timecode_ < 0)
157 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
165 last_block_timecode_ = -1;
166 cluster_timecode_ = -1;
172 int64_t WebMClusterParser::TryGetEncodedAudioDuration(
182 DCHECK(audio_stream_info_);
183 if (audio_stream_info_->codec() == kCodecOpus) {
184 return ReadOpusDuration(data, size);
193 int64_t WebMClusterParser::ReadOpusDuration(
const uint8_t* data,
int size) {
196 static const uint8_t kTocConfigMask = 0xf8;
197 static const uint8_t kTocFrameCountCodeMask = 0x03;
198 static const uint8_t kFrameCountMask = 0x3f;
199 static const int64_t kPacketDurationMaxMs = 120000;
202 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
203 <<
"Invalid zero-byte Opus packet; demuxed block duration may be "
209 int frame_count_type = data[0] & kTocFrameCountCodeMask;
212 switch (frame_count_type) {
223 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
224 <<
"Second byte missing from 'Code 3' Opus packet; demuxed block "
225 "duration may be imprecise.";
229 frame_count = data[1] & kFrameCountMask;
231 if (frame_count == 0) {
232 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
233 <<
"Illegal 'Code 3' Opus packet with frame count zero; demuxed "
234 "block duration may be imprecise.";
240 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
241 <<
"Unexpected Opus frame count type: " << frame_count_type <<
"; "
242 <<
"demuxed block duration may be imprecise.";
246 int opusConfig = (data[0] & kTocConfigMask) >> 3;
247 CHECK_GE(opusConfig, 0);
250 DCHECK_GT(frame_count, 0);
253 if (duration > kPacketDurationMaxMs * 1000) {
257 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
258 <<
"Warning, demuxed Opus packet with encoded duration: "
259 << duration / 1000 <<
"ms. Should be no greater than "
260 << kPacketDurationMaxMs <<
"ms.";
266 WebMParserClient* WebMClusterParser::OnListStart(
int id) {
267 if (
id == kWebMIdCluster) {
268 cluster_timecode_ = -1;
269 cluster_start_time_ = kNoTimestamp;
270 }
else if (
id == kWebMIdBlockGroup) {
272 block_data_size_ = -1;
273 block_duration_ = -1;
274 discard_padding_ = -1;
275 discard_padding_set_ =
false;
276 }
else if (
id == kWebMIdBlockAdditions) {
278 block_additional_data_.reset();
279 block_additional_data_size_ = 0;
285 bool WebMClusterParser::OnListEnd(
int id) {
286 if (
id != kWebMIdBlockGroup)
290 if (block_data_size_ == -1) {
291 LOG(ERROR) <<
"Block missing from BlockGroup.";
295 bool result = ParseBlock(
false, block_data_.get(), block_data_size_,
296 block_additional_data_.get(),
297 block_additional_data_size_, block_duration_,
298 discard_padding_set_ ? discard_padding_ : 0);
300 block_data_size_ = -1;
301 block_duration_ = -1;
303 block_additional_data_.reset();
304 block_additional_data_size_ = 0;
305 discard_padding_ = -1;
306 discard_padding_set_ =
false;
310 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
313 case kWebMIdTimecode:
314 dst = &cluster_timecode_;
316 case kWebMIdBlockDuration:
317 dst = &block_duration_;
319 case kWebMIdBlockAddID:
320 dst = &block_add_id_;
331 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
334 const uint8_t* additional,
337 int64_t discard_padding) {
343 if (!(buf[0] & 0x80)) {
344 LOG(ERROR) <<
"TrackNumber over 127 not supported";
348 int track_num = buf[0] & 0x7f;
349 int timecode = buf[1] << 8 | buf[2];
350 int flags = buf[3] & 0xff;
351 int lacing = (flags >> 1) & 0x3;
354 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
359 if (timecode & 0x8000)
362 const uint8_t* frame_data = buf + 4;
363 int frame_size = size - (frame_data - buf);
364 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
365 frame_data, frame_size, additional, additional_size,
369 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
371 case kWebMIdSimpleBlock:
372 return ParseBlock(
true, data, size, NULL, 0, -1, 0);
376 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
380 block_data_.reset(
new uint8_t[size]);
381 memcpy(block_data_.get(), data, size);
382 block_data_size_ = size;
385 case kWebMIdBlockAdditional: {
386 uint64_t block_add_id = base::HostToNet64(block_add_id_);
387 if (block_additional_data_) {
392 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
393 "BlockGroup is not supported.";
399 block_additional_data_size_ = size +
sizeof(block_add_id);
400 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
401 memcpy(block_additional_data_.get(), &block_add_id,
402 sizeof(block_add_id));
403 memcpy(block_additional_data_.get() + 8, data, size);
406 case kWebMIdDiscardPadding: {
407 if (discard_padding_set_ || size <= 0 || size > 8)
409 discard_padding_set_ =
true;
412 discard_padding_ =
static_cast<int8_t
>(data[0]);
413 for (
int i = 1; i < size; ++i)
414 discard_padding_ = (discard_padding_ << 8) | data[i];
423 bool WebMClusterParser::OnBlock(
bool is_simple_block,
430 const uint8_t* additional,
432 int64_t discard_padding) {
434 if (cluster_timecode_ == -1) {
435 LOG(ERROR) <<
"Got a block before cluster timecode.";
442 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
446 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
447 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
452 StreamType stream_type = kStreamAudio;
453 std::string encryption_key_id;
454 int64_t encoded_duration = kNoTimestamp;
455 if (track_num == audio_.track_num()) {
457 encryption_key_id = audio_encryption_key_id_;
458 if (encryption_key_id.empty()) {
459 encoded_duration = TryGetEncodedAudioDuration(data, size);
461 }
else if (track_num == video_.track_num()) {
463 encryption_key_id = video_encryption_key_id_;
464 stream_type = kStreamVideo;
465 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
467 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
470 if (block_duration < 0)
473 stream_type = kStreamText;
475 LOG(ERROR) <<
"Unexpected track number " << track_num;
479 last_block_timecode_ = timecode;
481 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
483 scoped_refptr<MediaSample> buffer;
484 if (stream_type != kStreamText) {
491 ? (flags & 0x80) != 0
492 : IsKeyframe(stream_type == kStreamVideo,
493 video_stream_info_ ? video_stream_info_->codec()
494 : kUnknownVideoCodec,
500 scoped_ptr<DecryptConfig> decrypt_config;
502 if (!encryption_key_id.empty() &&
503 !WebMCreateDecryptConfig(
505 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
506 encryption_key_id.size(),
507 &decrypt_config, &data_offset)) {
512 additional, additional_size, is_keyframe);
514 if (decrypt_config) {
516 buffer->set_is_encrypted(
true);
519 std::string id, settings, content;
522 std::vector<uint8_t> side_data;
523 MakeSideData(
id.begin(),
id.end(),
524 settings.begin(), settings.end(),
528 reinterpret_cast<const uint8_t*>(content.data()), content.length(),
529 &side_data[0], side_data.size(),
true);
532 buffer->set_pts(timestamp);
533 if (cluster_start_time_ == kNoTimestamp)
534 cluster_start_time_ = timestamp;
536 int64_t block_duration_time_delta = kNoTimestamp;
537 if (block_duration >= 0) {
538 block_duration_time_delta = block_duration * timecode_multiplier_;
550 if (encoded_duration != kNoTimestamp) {
551 DCHECK(encoded_duration != kInfiniteDuration);
552 DCHECK(encoded_duration > 0);
553 buffer->set_duration(encoded_duration);
555 DVLOG(3) << __FUNCTION__ <<
" : "
556 <<
"Using encoded duration " << encoded_duration;
558 if (block_duration_time_delta != kNoTimestamp) {
559 int64_t duration_difference =
560 block_duration_time_delta - encoded_duration;
562 const auto kWarnDurationDiff = timecode_multiplier_ * 2;
563 if (duration_difference > kWarnDurationDiff) {
564 LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
565 <<
"BlockDuration (" << block_duration_time_delta / 1000
566 <<
"ms) differs significantly from encoded duration ("
567 << encoded_duration / 1000 <<
"ms).";
570 }
else if (block_duration_time_delta != kNoTimestamp) {
571 buffer->set_duration(block_duration_time_delta);
573 buffer->set_duration(track->default_duration());
576 if (!init_cb_.is_null() && !initialized_) {
577 std::vector<scoped_refptr<StreamInfo>> streams;
578 if (audio_stream_info_)
579 streams.push_back(audio_stream_info_);
580 if (video_stream_info_) {
581 if (stream_type == kStreamVideo) {
582 scoped_ptr<VPxParser> vpx_parser;
583 switch (video_stream_info_->codec()) {
585 vpx_parser.reset(
new VP8Parser);
588 vpx_parser.reset(
new VP9Parser);
591 NOTIMPLEMENTED() <<
"Unsupported codec "
592 << video_stream_info_->codec();
595 std::vector<VPxFrameInfo> vpx_frames;
596 if (!vpx_parser->Parse(buffer->data(), buffer->data_size(),
598 LOG(ERROR) <<
"Failed to parse vpx frame.";
601 if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
602 LOG(ERROR) <<
"The first frame should be a key frame.";
606 const VPCodecConfiguration* codec_config = &vpx_parser->codec_config();
607 video_stream_info_->set_codec_string(
608 codec_config->GetCodecString(video_stream_info_->codec()));
609 std::vector<uint8_t> extra_data;
610 codec_config->Write(&extra_data);
611 video_stream_info_->set_extra_data(extra_data);
612 streams.push_back(video_stream_info_);
613 init_cb_.Run(streams);
617 init_cb_.Run(streams);
622 return track->EmitBuffer(buffer);
625 WebMClusterParser::Track::Track(
int track_num,
627 int64_t default_duration,
629 : track_num_(track_num),
631 default_duration_(default_duration),
632 estimated_next_frame_duration_(kNoTimestamp),
633 new_sample_cb_(new_sample_cb) {
634 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
637 WebMClusterParser::Track::~Track() {}
639 bool WebMClusterParser::Track::EmitBuffer(
640 const scoped_refptr<MediaSample>& buffer) {
641 DVLOG(2) <<
"EmitBuffer() : " << track_num_
642 <<
" ts " << buffer->pts()
643 <<
" dur " << buffer->duration()
644 <<
" kf " << buffer->is_key_frame()
645 <<
" size " << buffer->data_size();
647 if (last_added_buffer_missing_duration_.get()) {
648 int64_t derived_duration =
649 buffer->pts() - last_added_buffer_missing_duration_->pts();
650 last_added_buffer_missing_duration_->set_duration(derived_duration);
652 DVLOG(2) <<
"EmitBuffer() : applied derived duration to held-back buffer : "
654 << last_added_buffer_missing_duration_->pts()
656 << last_added_buffer_missing_duration_->duration()
657 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
658 <<
" size " << last_added_buffer_missing_duration_->data_size();
659 scoped_refptr<MediaSample> updated_buffer =
660 last_added_buffer_missing_duration_;
661 last_added_buffer_missing_duration_ = NULL;
662 if (!EmitBufferHelp(updated_buffer))
666 if (buffer->duration() == kNoTimestamp) {
667 last_added_buffer_missing_duration_ = buffer;
668 DVLOG(2) <<
"EmitBuffer() : holding back buffer that is missing duration";
672 return EmitBufferHelp(buffer);
675 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
676 if (!last_added_buffer_missing_duration_.get())
679 int64_t estimated_duration = GetDurationEstimate();
680 last_added_buffer_missing_duration_->set_duration(estimated_duration);
682 LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
683 <<
"Estimating WebM block duration to be " << estimated_duration / 1000
684 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
685 "BlockGroups with BlockDurations at the end of each Track in a "
686 "Cluster to avoid estimation.";
688 DVLOG(2) << __FUNCTION__ <<
" new dur : ts "
689 << last_added_buffer_missing_duration_->pts()
691 << last_added_buffer_missing_duration_->duration()
692 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
693 <<
" size " << last_added_buffer_missing_duration_->data_size();
697 new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
698 last_added_buffer_missing_duration_ = NULL;
701 void WebMClusterParser::Track::Reset() {
702 last_added_buffer_missing_duration_ = NULL;
706 bool WebMClusterParser::Track::EmitBufferHelp(
707 const scoped_refptr<MediaSample>& buffer) {
708 DCHECK(!last_added_buffer_missing_duration_.get());
710 int64_t duration = buffer->duration();
711 if (duration < 0 || duration == kNoTimestamp) {
712 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
719 int64_t orig_duration_estimate = estimated_next_frame_duration_;
720 if (estimated_next_frame_duration_ == kNoTimestamp) {
721 estimated_next_frame_duration_ = duration;
723 estimated_next_frame_duration_ =
724 std::max(duration, estimated_next_frame_duration_);
727 if (orig_duration_estimate != estimated_next_frame_duration_) {
728 DVLOG(3) <<
"Updated duration estimate:"
729 << orig_duration_estimate
731 << estimated_next_frame_duration_
737 new_sample_cb_.Run(track_num_, buffer);
741 int64_t WebMClusterParser::Track::GetDurationEstimate() {
742 int64_t duration = estimated_next_frame_duration_;
743 if (duration != kNoTimestamp) {
744 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration";
746 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration";
748 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
750 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
754 DCHECK(duration > 0);
755 DCHECK(duration != kNoTimestamp);
759 void WebMClusterParser::ResetTextTracks() {
760 for (TextTrackMap::iterator it = text_track_map_.begin();
761 it != text_track_map_.end();
767 WebMClusterParser::Track*
768 WebMClusterParser::FindTextTrack(
int track_num) {
769 const TextTrackMap::iterator it = text_track_map_.find(track_num);
771 if (it == text_track_map_.end())