5 #include "packager/media/formats/webm/webm_cluster_parser.h"
10 #include "packager/base/logging.h"
11 #include "packager/base/sys_byteorder.h"
12 #include "packager/media/base/decrypt_config.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/codecs/vp8_parser.h"
15 #include "packager/media/codecs/vp9_parser.h"
16 #include "packager/media/codecs/webvtt_util.h"
17 #include "packager/media/formats/webm/webm_constants.h"
18 #include "packager/media/formats/webm/webm_crypto_helpers.h"
19 #include "packager/media/formats/webm/webm_webvtt_parser.h"
25 const int64_t kMicrosecondsPerMillisecond = 1000;
30 int64_t timecode_scale,
31 std::shared_ptr<AudioStreamInfo> audio_stream_info,
32 std::shared_ptr<VideoStreamInfo> video_stream_info,
33 int64_t audio_default_duration,
34 int64_t video_default_duration,
35 const WebMTracksParser::TextTracks& text_tracks,
36 const std::set<int64_t>& ignored_tracks,
37 const std::string& audio_encryption_key_id,
38 const std::string& video_encryption_key_id,
42 : timecode_multiplier_(timecode_scale / 1000.0),
43 audio_stream_info_(audio_stream_info),
44 video_stream_info_(video_stream_info),
45 ignored_tracks_(ignored_tracks),
46 audio_encryption_key_id_(audio_encryption_key_id),
47 video_encryption_key_id_(video_encryption_key_id),
48 parser_(kWebMIdCluster, this),
51 cluster_start_time_(kNoTimestamp),
52 audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
54 audio_default_duration,
56 video_(video_stream_info ? video_stream_info->track_id() : -1,
58 video_default_duration,
60 if (decryption_key_source) {
62 if (audio_stream_info_)
63 audio_stream_info_->set_is_encrypted(
false);
64 if (video_stream_info_)
65 video_stream_info_->set_is_encrypted(
false);
67 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
68 it != text_tracks.end();
70 text_track_map_.insert(std::make_pair(
71 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
75 WebMClusterParser::~WebMClusterParser() {}
78 last_block_timecode_ = -1;
79 cluster_timecode_ = -1;
80 cluster_start_time_ = kNoTimestamp;
81 cluster_ended_ =
false;
90 bool audio_result = audio_.ApplyDurationEstimateIfNeeded();
91 bool video_result = video_.ApplyDurationEstimateIfNeeded();
93 return audio_result && video_result;
97 int result = parser_.
Parse(buf, size);
100 cluster_ended_ =
false;
105 if (cluster_ended_) {
108 if (cluster_start_time_ == kNoTimestamp) {
111 if (cluster_timecode_ < 0)
114 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
122 last_block_timecode_ = -1;
123 cluster_timecode_ = -1;
130 if (
id == kWebMIdCluster) {
131 cluster_timecode_ = -1;
132 cluster_start_time_ = kNoTimestamp;
133 }
else if (
id == kWebMIdBlockGroup) {
135 block_data_size_ = -1;
136 block_duration_ = -1;
137 discard_padding_ = -1;
138 discard_padding_set_ =
false;
139 reference_block_set_ =
false;
140 }
else if (
id == kWebMIdBlockAdditions) {
142 block_additional_data_.reset();
143 block_additional_data_size_ = 0;
149 bool WebMClusterParser::OnListEnd(
int id) {
150 if (
id != kWebMIdBlockGroup)
154 if (block_data_size_ == -1) {
155 LOG(ERROR) <<
"Block missing from BlockGroup.";
159 bool result = ParseBlock(
160 false, block_data_.get(), block_data_size_, block_additional_data_.get(),
161 block_additional_data_size_, block_duration_,
162 discard_padding_set_ ? discard_padding_ : 0, reference_block_set_);
164 block_data_size_ = -1;
165 block_duration_ = -1;
167 block_additional_data_.reset();
168 block_additional_data_size_ = 0;
169 discard_padding_ = -1;
170 discard_padding_set_ =
false;
171 reference_block_set_ =
false;
175 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
178 case kWebMIdTimecode:
179 dst = &cluster_timecode_;
181 case kWebMIdBlockDuration:
182 dst = &block_duration_;
184 case kWebMIdBlockAddID:
185 dst = &block_add_id_;
196 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
199 const uint8_t* additional,
202 int64_t discard_padding,
203 bool reference_block_set) {
209 if (!(buf[0] & 0x80)) {
210 LOG(ERROR) <<
"TrackNumber over 127 not supported";
214 int track_num = buf[0] & 0x7f;
215 int timecode = buf[1] << 8 | buf[2];
216 int flags = buf[3] & 0xff;
217 int lacing = (flags >> 1) & 0x3;
220 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
225 if (timecode & 0x8000)
233 is_simple_block ? (flags & 0x80) != 0 : !reference_block_set;
235 const uint8_t* frame_data = buf + 4;
236 int frame_size = size - (frame_data - buf);
237 return OnBlock(is_simple_block, track_num, timecode, duration, frame_data,
238 frame_size, additional, additional_size, discard_padding,
242 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
244 case kWebMIdSimpleBlock:
245 return ParseBlock(
true, data, size, NULL, 0, -1, 0,
false);
249 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
253 block_data_.reset(
new uint8_t[size]);
254 memcpy(block_data_.get(), data, size);
255 block_data_size_ = size;
258 case kWebMIdBlockAdditional: {
259 uint64_t block_add_id = base::HostToNet64(block_add_id_);
260 if (block_additional_data_) {
265 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
266 "BlockGroup is not supported.";
272 block_additional_data_size_ = size +
sizeof(block_add_id);
273 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
274 memcpy(block_additional_data_.get(), &block_add_id,
275 sizeof(block_add_id));
276 memcpy(block_additional_data_.get() + 8, data, size);
279 case kWebMIdDiscardPadding: {
280 if (discard_padding_set_ || size <= 0 || size > 8)
282 discard_padding_set_ =
true;
285 discard_padding_ =
static_cast<int8_t
>(data[0]);
286 for (
int i = 1; i < size; ++i)
287 discard_padding_ = (discard_padding_ << 8) | data[i];
291 case kWebMIdReferenceBlock:
295 reference_block_set_ =
true;
302 bool WebMClusterParser::OnBlock(
bool is_simple_block,
308 const uint8_t* additional,
310 int64_t discard_padding,
313 if (cluster_timecode_ == -1) {
314 LOG(ERROR) <<
"Got a block before cluster timecode.";
321 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
325 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
326 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
331 StreamType stream_type = kStreamUnknown;
332 std::string encryption_key_id;
333 if (track_num == audio_.track_num()) {
335 encryption_key_id = audio_encryption_key_id_;
336 stream_type = kStreamAudio;
337 }
else if (track_num == video_.track_num()) {
339 encryption_key_id = video_encryption_key_id_;
340 stream_type = kStreamVideo;
341 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
343 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
346 if (block_duration < 0)
349 stream_type = kStreamText;
351 LOG(ERROR) <<
"Unexpected track number " << track_num;
354 DCHECK_NE(stream_type, kStreamUnknown);
356 last_block_timecode_ = timecode;
358 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
360 std::shared_ptr<MediaSample> buffer;
361 if (stream_type != kStreamText) {
365 std::unique_ptr<DecryptConfig> decrypt_config;
367 if (!encryption_key_id.empty() &&
368 !WebMCreateDecryptConfig(
370 reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
371 encryption_key_id.size(),
372 &decrypt_config, &data_offset)) {
377 additional, additional_size, is_key_frame);
379 if (decrypt_config) {
380 if (!decryptor_source_) {
383 buffer->set_decrypt_config(std::move(decrypt_config));
384 buffer->set_is_encrypted(
true);
385 }
else if (!decryptor_source_->DecryptSampleBuffer(
386 decrypt_config.get(), buffer->writable_data(),
387 buffer->data_size())) {
388 LOG(ERROR) <<
"Cannot decrypt samples";
393 std::string id, settings, content;
396 std::vector<uint8_t> side_data;
397 MakeSideData(
id.begin(),
id.end(),
398 settings.begin(), settings.end(),
402 reinterpret_cast<const uint8_t*>(content.data()), content.length(),
403 &side_data[0], side_data.size(),
true);
406 buffer->set_dts(timestamp);
407 buffer->set_pts(timestamp);
408 if (cluster_start_time_ == kNoTimestamp)
409 cluster_start_time_ = timestamp;
410 buffer->set_duration(block_duration > 0
411 ? (block_duration * timecode_multiplier_)
414 if (!init_cb_.is_null() && !initialized_) {
415 std::vector<std::shared_ptr<StreamInfo>> streams;
416 if (audio_stream_info_)
417 streams.push_back(audio_stream_info_);
418 if (video_stream_info_) {
419 if (stream_type == kStreamVideo) {
420 std::unique_ptr<VPxParser> vpx_parser;
421 switch (video_stream_info_->codec()) {
423 vpx_parser.reset(
new VP8Parser);
426 vpx_parser.reset(
new VP9Parser);
429 NOTIMPLEMENTED() <<
"Unsupported codec "
430 << video_stream_info_->codec();
433 std::vector<VPxFrameInfo> vpx_frames;
434 if (!vpx_parser->Parse(buffer->data(), buffer->data_size(),
436 LOG(ERROR) <<
"Failed to parse vpx frame.";
439 if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
440 LOG(ERROR) <<
"The first frame should be a key frame.";
444 VPCodecConfigurationRecord codec_config;
445 if (!video_stream_info_->codec_config().empty())
446 codec_config.ParseWebM(video_stream_info_->codec_config());
447 codec_config.MergeFrom(vpx_parser->codec_config());
449 video_stream_info_->set_codec_string(
450 codec_config.GetCodecString(video_stream_info_->codec()));
451 std::vector<uint8_t> config_serialized;
452 codec_config.WriteMP4(&config_serialized);
453 video_stream_info_->set_codec_config(config_serialized);
454 streams.push_back(video_stream_info_);
455 init_cb_.Run(streams);
459 init_cb_.Run(streams);
464 return track->EmitBuffer(buffer);
467 WebMClusterParser::Track::Track(
int track_num,
469 int64_t default_duration,
471 : track_num_(track_num),
473 default_duration_(default_duration),
474 estimated_next_frame_duration_(kNoTimestamp),
475 new_sample_cb_(new_sample_cb) {
476 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
479 WebMClusterParser::Track::~Track() {}
481 bool WebMClusterParser::Track::EmitBuffer(
482 const std::shared_ptr<MediaSample>& buffer) {
483 DVLOG(2) <<
"EmitBuffer() : " << track_num_
484 <<
" ts " << buffer->pts()
485 <<
" dur " << buffer->duration()
486 <<
" kf " << buffer->is_key_frame()
487 <<
" size " << buffer->data_size();
489 if (last_added_buffer_missing_duration_.get()) {
490 int64_t derived_duration =
491 buffer->pts() - last_added_buffer_missing_duration_->pts();
492 last_added_buffer_missing_duration_->set_duration(derived_duration);
494 DVLOG(2) <<
"EmitBuffer() : applied derived duration to held-back buffer : "
496 << last_added_buffer_missing_duration_->pts()
498 << last_added_buffer_missing_duration_->duration()
499 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
500 <<
" size " << last_added_buffer_missing_duration_->data_size();
501 std::shared_ptr<MediaSample> updated_buffer =
502 last_added_buffer_missing_duration_;
503 last_added_buffer_missing_duration_ = NULL;
504 if (!EmitBufferHelp(updated_buffer))
508 if (buffer->duration() == kNoTimestamp) {
509 last_added_buffer_missing_duration_ = buffer;
510 DVLOG(2) <<
"EmitBuffer() : holding back buffer that is missing duration";
514 return EmitBufferHelp(buffer);
517 bool WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
518 if (!last_added_buffer_missing_duration_.get())
521 int64_t estimated_duration = GetDurationEstimate();
522 last_added_buffer_missing_duration_->set_duration(estimated_duration);
524 VLOG(1) <<
"Track " << track_num_ <<
": Estimating WebM block duration to be "
525 << estimated_duration / 1000
526 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
527 "BlockGroups with BlockDurations at the end of each Track in a "
528 "Cluster to avoid estimation.";
530 DVLOG(2) <<
" new dur : ts " << last_added_buffer_missing_duration_->pts()
531 <<
" dur " << last_added_buffer_missing_duration_->duration()
532 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
533 <<
" size " << last_added_buffer_missing_duration_->data_size();
537 if (!new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_))
539 last_added_buffer_missing_duration_ = NULL;
543 void WebMClusterParser::Track::Reset() {
544 last_added_buffer_missing_duration_ = NULL;
547 bool WebMClusterParser::Track::EmitBufferHelp(
548 const std::shared_ptr<MediaSample>& buffer) {
549 DCHECK(!last_added_buffer_missing_duration_.get());
551 int64_t duration = buffer->duration();
552 if (duration < 0 || duration == kNoTimestamp) {
553 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
560 int64_t orig_duration_estimate = estimated_next_frame_duration_;
561 if (estimated_next_frame_duration_ == kNoTimestamp) {
562 estimated_next_frame_duration_ = duration;
564 estimated_next_frame_duration_ =
565 std::max(duration, estimated_next_frame_duration_);
568 if (orig_duration_estimate != estimated_next_frame_duration_) {
569 DVLOG(3) <<
"Updated duration estimate:"
570 << orig_duration_estimate
572 << estimated_next_frame_duration_
578 return new_sample_cb_.Run(track_num_, buffer);
581 int64_t WebMClusterParser::Track::GetDurationEstimate() {
582 int64_t duration = kNoTimestamp;
583 if (default_duration_ != kNoTimestamp) {
584 duration = default_duration_;
585 DVLOG(3) << __FUNCTION__ <<
" : using track default duration " << duration;
586 }
else if (estimated_next_frame_duration_ != kNoTimestamp) {
587 duration = estimated_next_frame_duration_;
588 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration " << duration;
591 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
593 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
595 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration "
599 DCHECK_GT(duration, 0);
600 DCHECK_NE(duration, kNoTimestamp);
604 void WebMClusterParser::ResetTextTracks() {
605 for (TextTrackMap::iterator it = text_track_map_.begin();
606 it != text_track_map_.end();
612 WebMClusterParser::Track*
613 WebMClusterParser::FindTextTrack(
int track_num) {
614 const TextTrackMap::iterator it = text_track_map_.find(track_num);
616 if (it == text_track_map_.end())