5 #include "packager/media/formats/mp4/mp4_media_parser.h"
10 #include "packager/base/callback.h"
11 #include "packager/base/callback_helpers.h"
12 #include "packager/base/logging.h"
13 #include "packager/base/memory/ref_counted.h"
14 #include "packager/base/strings/string_number_conversions.h"
15 #include "packager/media/base/audio_stream_info.h"
16 #include "packager/media/base/buffer_reader.h"
17 #include "packager/media/base/decrypt_config.h"
18 #include "packager/media/base/key_source.h"
19 #include "packager/media/base/macros.h"
20 #include "packager/media/base/media_sample.h"
21 #include "packager/media/base/rcheck.h"
22 #include "packager/media/base/video_stream_info.h"
23 #include "packager/media/codecs/avc_decoder_configuration_record.h"
24 #include "packager/media/codecs/es_descriptor.h"
25 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
26 #include "packager/media/codecs/vp_codec_configuration_record.h"
27 #include "packager/media/file/file.h"
28 #include "packager/media/file/file_closer.h"
29 #include "packager/media/formats/mp4/box_definitions.h"
30 #include "packager/media/formats/mp4/box_reader.h"
31 #include "packager/media/formats/mp4/track_run_iterator.h"
38 uint64_t Rescale(uint64_t time_in_old_scale,
41 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
44 Codec FourCCToCodec(FourCC fourcc) {
82 const uint8_t kDtsAudioNumChannels = 6;
83 const uint64_t kNanosecondsPerSecond = 1000000000ull;
87 MP4MediaParser::MP4MediaParser()
88 : state_(kWaitingForInit),
89 decryption_key_source_(NULL),
93 MP4MediaParser::~MP4MediaParser() {}
96 const NewSampleCB& new_sample_cb,
98 DCHECK_EQ(state_, kWaitingForInit);
99 DCHECK(init_cb_.is_null());
100 DCHECK(!init_cb.is_null());
101 DCHECK(!new_sample_cb.is_null());
103 ChangeState(kParsingBoxes);
105 new_sample_cb_ = new_sample_cb;
106 decryption_key_source_ = decryption_key_source;
107 if (decryption_key_source)
111 void MP4MediaParser::Reset() {
119 DCHECK_NE(state_, kWaitingForInit);
121 ChangeState(kParsingBoxes);
126 DCHECK_NE(state_, kWaitingForInit);
128 if (state_ == kError)
131 queue_.Push(buf, size);
133 bool result, err =
false;
136 if (state_ == kParsingBoxes) {
137 result = ParseBox(&err);
139 DCHECK_EQ(kEmittingSamples, state_);
140 result = EnqueueSample(&err);
142 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
143 err = !ReadAndDiscardMDATsUntil(max_clear);
146 }
while (result && !err);
149 DLOG(ERROR) <<
"Error while parsing MP4";
160 std::unique_ptr<File, FileCloser> file(
163 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
166 if (!file->Seek(0)) {
167 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
172 uint64_t file_position(0);
173 bool mdat_seen(
false);
175 const uint32_t kBoxHeaderReadSize(16);
176 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
177 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
178 if (bytes_read == 0) {
179 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
182 if (bytes_read < kBoxHeaderReadSize) {
183 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
191 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
194 if (box_type == FOURCC_mdat) {
196 }
else if (box_type == FOURCC_moov) {
202 if (!
Parse(&buffer[0], bytes_read)) {
203 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
206 uint64_t bytes_to_read = box_size - bytes_read;
207 buffer.resize(bytes_to_read);
208 while (bytes_to_read > 0) {
209 bytes_read = file->Read(&buffer[0], bytes_to_read);
210 if (bytes_read <= 0) {
211 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
215 if (!
Parse(&buffer[0], bytes_read)) {
216 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
219 bytes_to_read -= bytes_read;
225 file_position += box_size;
226 if (!file->Seek(file_position)) {
227 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
234 bool MP4MediaParser::ParseBox(
bool* err) {
237 queue_.Peek(&buf, &size);
242 if (reader.get() == NULL)
245 if (reader->type() == FOURCC_mdat) {
249 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
255 mdat_tail_ = queue_.
head() + reader->size();
257 if (reader->type() == FOURCC_moov) {
258 *err = !ParseMoov(reader.get());
259 }
else if (reader->type() == FOURCC_moof) {
260 moof_head_ = queue_.
head();
261 *err = !ParseMoof(reader.get());
269 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
272 queue_.Pop(static_cast<int>(reader->size()));
276 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
280 moov_.reset(
new Movie);
281 RCHECK(moov_->Parse(reader));
284 std::vector<scoped_refptr<StreamInfo> > streams;
286 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
287 track != moov_->tracks.end(); ++track) {
288 const uint32_t timescale = track->media.header.timescale;
291 uint64_t duration = 0;
292 if (track->media.header.duration > 0) {
293 duration = track->media.header.duration;
294 }
else if (moov_->extends.header.fragment_duration > 0) {
295 DCHECK(moov_->header.timescale != 0);
296 duration = Rescale(moov_->extends.header.fragment_duration,
297 moov_->header.timescale,
299 }
else if (moov_->header.duration > 0 &&
300 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
301 DCHECK(moov_->header.timescale != 0);
303 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
306 const SampleDescription& samp_descr =
307 track->media.information.sample_table.description;
313 if (moov_->extends.tracks.size() > 0) {
314 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
315 const TrackExtends& trex = moov_->extends.tracks[t];
316 if (trex.track_id == track->header.track_id) {
317 desc_idx = trex.default_sample_description_index;
322 const std::vector<ChunkInfo>& chunk_info =
323 track->media.information.sample_table.sample_to_chunk.chunk_info;
324 RCHECK(chunk_info.size() > 0);
325 desc_idx = chunk_info[0].sample_description_index;
327 RCHECK(desc_idx > 0);
330 if (samp_descr.type == kAudio) {
331 RCHECK(!samp_descr.audio_entries.empty());
335 if (desc_idx >= samp_descr.audio_entries.size())
338 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
339 const FourCC actual_format = entry.GetActualFormat();
340 Codec codec = FourCCToCodec(actual_format);
341 uint8_t num_channels = 0;
342 uint32_t sampling_frequency = 0;
343 uint64_t codec_delay_ns = 0;
344 uint8_t audio_object_type = 0;
345 uint32_t max_bitrate = 0;
346 uint32_t avg_bitrate = 0;
347 std::vector<uint8_t> codec_config;
349 switch (actual_format) {
353 if (entry.esds.es_descriptor.IsAAC()) {
355 const AACAudioSpecificConfig& aac_audio_specific_config =
356 entry.esds.aac_audio_specific_config;
357 num_channels = aac_audio_specific_config.num_channels();
358 sampling_frequency = aac_audio_specific_config.frequency();
359 audio_object_type = aac_audio_specific_config.audio_object_type();
360 codec_config = entry.esds.es_descriptor.decoder_specific_info();
362 }
else if (entry.esds.es_descriptor.IsDTS()) {
363 ObjectType audio_type = entry.esds.es_descriptor.object_type();
364 switch (audio_type) {
378 LOG(ERROR) <<
"Unsupported audio type " << audio_type
382 num_channels = entry.esds.aac_audio_specific_config.num_channels();
385 if (num_channels != kDtsAudioNumChannels) {
386 LOG(ERROR) <<
"Unsupported channel count " << num_channels
387 <<
" for audio type " << audio_type <<
".";
390 sampling_frequency = entry.samplerate;
391 max_bitrate = entry.esds.es_descriptor.max_bitrate();
392 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
394 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
395 << actual_format <<
" in stsd box.";
400 FALLTHROUGH_INTENDED;
402 FALLTHROUGH_INTENDED;
404 FALLTHROUGH_INTENDED;
406 FALLTHROUGH_INTENDED;
408 codec_config = entry.ddts.extra_data;
409 max_bitrate = entry.ddts.max_bitrate;
410 avg_bitrate = entry.ddts.avg_bitrate;
411 num_channels = entry.channelcount;
412 sampling_frequency = entry.samplerate;
415 codec_config = entry.dac3.data;
416 num_channels = entry.channelcount;
417 sampling_frequency = entry.samplerate;
420 codec_config = entry.dec3.data;
421 num_channels = entry.channelcount;
422 sampling_frequency = entry.samplerate;
425 codec_config = entry.dops.opus_identification_header;
426 num_channels = entry.channelcount;
427 sampling_frequency = entry.samplerate;
428 RCHECK(sampling_frequency != 0);
430 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
433 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
434 << actual_format <<
" in stsd box.";
439 uint64_t seek_preroll_ns = 0;
440 for (
const auto& sample_group_description :
441 track->media.information.sample_table.sample_group_descriptions) {
442 if (sample_group_description.grouping_type != FOURCC_roll)
444 const auto& audio_roll_recovery_entries =
445 sample_group_description.audio_roll_recovery_entries;
446 if (audio_roll_recovery_entries.size() != 1) {
447 LOG(WARNING) <<
"Unexpected number of entries in "
448 "SampleGroupDescription table with grouping type "
452 const int16_t roll_distance_in_samples =
453 audio_roll_recovery_entries[0].roll_distance;
454 if (roll_distance_in_samples < 0) {
455 RCHECK(sampling_frequency != 0);
456 seek_preroll_ns = kNanosecondsPerSecond *
457 (-roll_distance_in_samples) / sampling_frequency;
460 <<
"Roll distance is supposed to be negative, but seeing "
461 << roll_distance_in_samples;
466 const bool is_encrypted =
467 entry.sinf.info.track_encryption.default_is_protected == 1;
468 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
469 streams.push_back(
new AudioStreamInfo(
470 track->header.track_id, timescale, duration, codec,
472 codec_config.data(), codec_config.size(), entry.samplesize,
473 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
474 max_bitrate, avg_bitrate, track->media.header.language.code,
478 if (samp_descr.type == kVideo) {
479 RCHECK(!samp_descr.video_entries.empty());
480 if (desc_idx >= samp_descr.video_entries.size())
482 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
484 uint32_t coded_width = entry.width;
485 uint32_t coded_height = entry.height;
486 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
487 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
488 if (pixel_width == 0 && pixel_height == 0) {
492 std::string codec_string;
493 uint8_t nalu_length_size = 0;
495 const FourCC actual_format = entry.GetActualFormat();
496 const Codec video_codec = FourCCToCodec(actual_format);
497 switch (actual_format) {
499 AVCDecoderConfigurationRecord avc_config;
500 if (!avc_config.Parse(entry.codec_configuration.data)) {
501 LOG(ERROR) <<
"Failed to parse avcc.";
504 codec_string = avc_config.GetCodecString();
505 nalu_length_size = avc_config.nalu_length_size();
507 if (coded_width != avc_config.coded_width() ||
508 coded_height != avc_config.coded_height()) {
509 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
510 <<
"," << coded_height
511 <<
") does not match with resolution in "
512 "AVCDecoderConfigurationRecord ("
513 << avc_config.coded_width() <<
","
514 << avc_config.coded_height()
515 <<
"). Use AVCDecoderConfigurationRecord.";
516 coded_width = avc_config.coded_width();
517 coded_height = avc_config.coded_height();
520 if (pixel_width != avc_config.pixel_width() ||
521 pixel_height != avc_config.pixel_height()) {
522 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
523 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
525 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
527 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
528 <<
"). Use AVCDecoderConfigurationRecord.";
529 pixel_width = avc_config.pixel_width();
530 pixel_height = avc_config.pixel_height();
536 HEVCDecoderConfigurationRecord hevc_config;
537 if (!hevc_config.Parse(entry.codec_configuration.data)) {
538 LOG(ERROR) <<
"Failed to parse hevc.";
541 codec_string = hevc_config.GetCodecString(video_codec);
542 nalu_length_size = hevc_config.nalu_length_size();
548 VPCodecConfigurationRecord vp_config;
549 if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
550 LOG(ERROR) <<
"Failed to parse vpcc.";
553 codec_string = vp_config.GetCodecString(video_codec);
557 LOG(ERROR) <<
"Unsupported video format "
558 << FourCCToString(actual_format) <<
" in stsd box.";
562 const bool is_encrypted =
563 entry.sinf.info.track_encryption.default_is_protected == 1;
564 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
565 scoped_refptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
566 track->header.track_id, timescale, duration, video_codec,
567 codec_string, entry.codec_configuration.data.data(),
568 entry.codec_configuration.data.size(), coded_width, coded_height,
569 pixel_width, pixel_height,
571 nalu_length_size, track->media.header.language.code, is_encrypted));
574 if (moov_->pssh.size() > 0) {
575 std::vector<uint8_t> pssh_raw_data;
576 for (
const auto& pssh : moov_->pssh) {
577 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
580 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
581 pssh_raw_data.size());
584 streams.push_back(video_stream_info);
588 init_cb_.Run(streams);
589 if (!FetchKeysIfNecessary(moov_->pssh))
591 runs_.reset(
new TrackRunIterator(moov_.get()));
592 RCHECK(runs_->Init());
593 ChangeState(kEmittingSamples);
597 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
601 RCHECK(moof.Parse(reader));
603 runs_.reset(
new TrackRunIterator(moov_.get()));
604 RCHECK(runs_->Init(moof));
605 if (!FetchKeysIfNecessary(moof.pssh))
607 ChangeState(kEmittingSamples);
611 bool MP4MediaParser::FetchKeysIfNecessary(
612 const std::vector<ProtectionSystemSpecificHeader>& headers) {
617 if (!decryption_key_source_)
621 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
622 headers.begin(); iter != headers.end(); ++iter) {
623 status = decryption_key_source_->
FetchKeys(iter->raw_box);
627 VLOG(1) <<
"Unable to fetch decryption keys: " << status
628 <<
", trying the next PSSH box";
635 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
639 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
643 bool MP4MediaParser::EnqueueSample(
bool* err) {
644 if (!runs_->IsRunValid()) {
647 if (!queue_.
Trim(mdat_tail_))
650 ChangeState(kParsingBoxes);
654 if (!runs_->IsSampleValid()) {
663 queue_.Peek(&buf, &buf_size);
668 if (!runs_->is_audio() && !runs_->is_video())
678 if (runs_->AuxInfoNeedsToBeCached()) {
679 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
680 if (buf_size < runs_->aux_info_size())
682 *err = !runs_->CacheAuxInfo(buf, buf_size);
686 int64_t sample_offset = runs_->sample_offset() + moof_head_;
687 queue_.
PeekAt(sample_offset, &buf, &buf_size);
688 if (buf_size < runs_->sample_size()) {
689 if (sample_offset < queue_.
head()) {
690 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
691 <<
" < " << queue_.
head();
698 buf, runs_->sample_size(), runs_->is_keyframe()));
699 if (runs_->is_encrypted()) {
700 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
701 if (!decrypt_config) {
703 LOG(ERROR) <<
"Missing decrypt config.";
707 if (!decryptor_source_) {
710 stream_sample->set_decrypt_config(std::move(decrypt_config));
711 stream_sample->set_is_encrypted(
true);
712 }
else if (!decryptor_source_->DecryptSampleBuffer(
713 decrypt_config.get(), stream_sample->writable_data(),
714 stream_sample->data_size())) {
716 LOG(ERROR) <<
"Cannot decrypt samples.";
721 stream_sample->set_dts(runs_->dts());
722 stream_sample->set_pts(runs_->cts());
723 stream_sample->set_duration(runs_->duration());
725 DVLOG(3) <<
"Pushing frame: "
726 <<
", key=" << runs_->is_keyframe()
727 <<
", dur=" << runs_->duration()
728 <<
", dts=" << runs_->dts()
729 <<
", cts=" << runs_->cts()
730 <<
", size=" << runs_->sample_size();
732 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
734 LOG(ERROR) <<
"Failed to process the sample.";
738 runs_->AdvanceSample();
742 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
744 while (mdat_tail_ < offset) {
747 queue_.
PeekAt(mdat_tail_, &buf, &size);
754 mdat_tail_ += box_sz;
756 queue_.
Trim(std::min(mdat_tail_, offset));
760 void MP4MediaParser::ChangeState(State new_state) {
761 DVLOG(2) <<
"Changing state: " << new_state;