5 #include "packager/media/formats/mp4/mp4_media_parser.h" 10 #include "packager/base/callback.h" 11 #include "packager/base/callback_helpers.h" 12 #include "packager/base/logging.h" 13 #include "packager/base/strings/string_number_conversions.h" 14 #include "packager/file/file.h" 15 #include "packager/file/file_closer.h" 16 #include "packager/media/base/audio_stream_info.h" 17 #include "packager/media/base/buffer_reader.h" 18 #include "packager/media/base/decrypt_config.h" 19 #include "packager/media/base/key_source.h" 20 #include "packager/media/base/macros.h" 21 #include "packager/media/base/media_sample.h" 22 #include "packager/media/base/rcheck.h" 23 #include "packager/media/base/video_stream_info.h" 24 #include "packager/media/codecs/ac3_audio_util.h" 25 #include "packager/media/codecs/avc_decoder_configuration_record.h" 26 #include "packager/media/codecs/ec3_audio_util.h" 27 #include "packager/media/codecs/es_descriptor.h" 28 #include "packager/media/codecs/hevc_decoder_configuration_record.h" 29 #include "packager/media/codecs/vp_codec_configuration_record.h" 30 #include "packager/media/formats/mp4/box_definitions.h" 31 #include "packager/media/formats/mp4/box_reader.h" 32 #include "packager/media/formats/mp4/track_run_iterator.h" 39 uint64_t Rescale(uint64_t time_in_old_scale,
42 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
45 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
48 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
50 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
52 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
54 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
56 return H26xStreamFormat::kUnSpecified;
60 Codec FourCCToCodec(FourCC fourcc) {
98 const uint8_t kDtsAudioNumChannels = 6;
99 const uint64_t kNanosecondsPerSecond = 1000000000ull;
103 MP4MediaParser::MP4MediaParser()
104 : state_(kWaitingForInit),
105 decryption_key_source_(NULL),
109 MP4MediaParser::~MP4MediaParser() {}
112 const NewSampleCB& new_sample_cb,
114 DCHECK_EQ(state_, kWaitingForInit);
115 DCHECK(init_cb_.is_null());
116 DCHECK(!init_cb.is_null());
117 DCHECK(!new_sample_cb.is_null());
119 ChangeState(kParsingBoxes);
121 new_sample_cb_ = new_sample_cb;
122 decryption_key_source_ = decryption_key_source;
123 if (decryption_key_source)
127 void MP4MediaParser::Reset() {
135 DCHECK_NE(state_, kWaitingForInit);
137 ChangeState(kParsingBoxes);
142 DCHECK_NE(state_, kWaitingForInit);
144 if (state_ == kError)
147 queue_.Push(buf, size);
149 bool result, err =
false;
152 if (state_ == kParsingBoxes) {
153 result = ParseBox(&err);
155 DCHECK_EQ(kEmittingSamples, state_);
156 result = EnqueueSample(&err);
158 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
159 err = !ReadAndDiscardMDATsUntil(max_clear);
162 }
while (result && !err);
165 DLOG(ERROR) <<
"Error while parsing MP4";
176 std::unique_ptr<File, FileCloser> file(
179 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
182 if (!file->Seek(0)) {
183 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
188 uint64_t file_position(0);
189 bool mdat_seen(
false);
191 const uint32_t kBoxHeaderReadSize(16);
192 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
193 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
194 if (bytes_read == 0) {
195 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
198 if (bytes_read < kBoxHeaderReadSize) {
199 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
207 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
210 if (box_type == FOURCC_mdat) {
212 }
else if (box_type == FOURCC_moov) {
218 if (!Parse(&buffer[0], bytes_read)) {
219 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
222 uint64_t bytes_to_read = box_size - bytes_read;
223 buffer.resize(bytes_to_read);
224 while (bytes_to_read > 0) {
225 bytes_read = file->Read(&buffer[0], bytes_to_read);
226 if (bytes_read <= 0) {
227 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
231 if (!Parse(&buffer[0], bytes_read)) {
232 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
235 bytes_to_read -= bytes_read;
241 file_position += box_size;
242 if (!file->Seek(file_position)) {
243 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
250 bool MP4MediaParser::ParseBox(
bool* err) {
253 queue_.Peek(&buf, &size);
258 if (reader.get() == NULL)
261 if (reader->type() == FOURCC_mdat) {
267 NOTIMPLEMENTED() <<
" Non-seekable Files with 'mdat' box before 'moov' " 268 "box is not supported.";
275 <<
"Ignore unused 'mdat' box - this could be as a result of extra " 276 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
281 mdat_tail_ = queue_.head() + reader->size();
283 if (reader->type() == FOURCC_moov) {
284 *err = !ParseMoov(reader.get());
285 }
else if (reader->type() == FOURCC_moof) {
286 moof_head_ = queue_.head();
287 *err = !ParseMoof(reader.get());
295 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
298 queue_.Pop(static_cast<int>(reader->size()));
302 bool MP4MediaParser::ParseMoov(
BoxReader* reader) {
306 moov_.reset(
new Movie);
307 RCHECK(moov_->Parse(reader));
310 std::vector<std::shared_ptr<StreamInfo>> streams;
312 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
313 track != moov_->tracks.end(); ++track) {
314 const uint32_t timescale = track->media.header.timescale;
317 uint64_t duration = 0;
318 if (track->media.header.duration > 0) {
319 duration = track->media.header.duration;
320 }
else if (moov_->extends.header.fragment_duration > 0) {
321 DCHECK(moov_->header.timescale != 0);
322 duration = Rescale(moov_->extends.header.fragment_duration,
323 moov_->header.timescale,
325 }
else if (moov_->header.duration > 0 &&
326 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
327 DCHECK(moov_->header.timescale != 0);
329 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
333 track->media.information.sample_table.description;
339 if (moov_->extends.tracks.size() > 0) {
340 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
342 if (trex.track_id == track->header.track_id) {
343 desc_idx = trex.default_sample_description_index;
348 const std::vector<ChunkInfo>& chunk_info =
349 track->media.information.sample_table.sample_to_chunk.chunk_info;
350 RCHECK(chunk_info.size() > 0);
351 desc_idx = chunk_info[0].sample_description_index;
353 RCHECK(desc_idx > 0);
356 if (samp_descr.type == kAudio) {
357 RCHECK(!samp_descr.audio_entries.empty());
361 if (desc_idx >= samp_descr.audio_entries.size())
365 const FourCC actual_format = entry.GetActualFormat();
366 Codec codec = FourCCToCodec(actual_format);
367 uint8_t num_channels = 0;
368 uint32_t sampling_frequency = 0;
369 uint64_t codec_delay_ns = 0;
370 uint8_t audio_object_type = 0;
371 uint32_t max_bitrate = 0;
372 uint32_t avg_bitrate = 0;
373 std::vector<uint8_t> codec_config;
375 switch (actual_format) {
379 if (entry.esds.es_descriptor.
IsAAC()) {
382 entry.esds.aac_audio_specific_config;
387 codec_config = entry.esds.es_descriptor.decoder_specific_info();
389 }
else if (entry.esds.es_descriptor.IsDTS()) {
390 ObjectType audio_type = entry.esds.es_descriptor.object_type();
391 switch (audio_type) {
405 LOG(ERROR) <<
"Unsupported audio type " << audio_type
409 num_channels = entry.channelcount;
412 if (num_channels != kDtsAudioNumChannels) {
413 LOG(ERROR) <<
"Unsupported channel count " << num_channels
414 <<
" for audio type " << audio_type <<
".";
417 sampling_frequency = entry.samplerate;
418 max_bitrate = entry.esds.es_descriptor.max_bitrate();
419 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
421 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
422 << actual_format <<
" in stsd box.";
427 FALLTHROUGH_INTENDED;
429 FALLTHROUGH_INTENDED;
431 FALLTHROUGH_INTENDED;
433 FALLTHROUGH_INTENDED;
435 codec_config = entry.ddts.extra_data;
436 max_bitrate = entry.ddts.max_bitrate;
437 avg_bitrate = entry.ddts.avg_bitrate;
438 num_channels = entry.channelcount;
439 sampling_frequency = entry.samplerate;
442 codec_config = entry.dac3.data;
443 num_channels =
static_cast<uint8_t
>(GetAc3NumChannels(codec_config));
444 sampling_frequency = entry.samplerate;
447 codec_config = entry.dec3.data;
448 num_channels =
static_cast<uint8_t
>(GetEc3NumChannels(codec_config));
449 sampling_frequency = entry.samplerate;
452 codec_config = entry.dops.opus_identification_header;
453 num_channels = entry.channelcount;
454 sampling_frequency = entry.samplerate;
455 RCHECK(sampling_frequency != 0);
457 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
460 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
461 << actual_format <<
" in stsd box.";
466 uint64_t seek_preroll_ns = 0;
467 for (
const auto& sample_group_description :
468 track->media.information.sample_table.sample_group_descriptions) {
469 if (sample_group_description.grouping_type != FOURCC_roll)
471 const auto& audio_roll_recovery_entries =
472 sample_group_description.audio_roll_recovery_entries;
473 if (audio_roll_recovery_entries.size() != 1) {
474 LOG(WARNING) <<
"Unexpected number of entries in " 475 "SampleGroupDescription table with grouping type " 479 const int16_t roll_distance_in_samples =
480 audio_roll_recovery_entries[0].roll_distance;
481 if (roll_distance_in_samples < 0) {
482 RCHECK(sampling_frequency != 0);
483 seek_preroll_ns = kNanosecondsPerSecond *
484 (-roll_distance_in_samples) / sampling_frequency;
487 <<
"Roll distance is supposed to be negative, but seeing " 488 << roll_distance_in_samples;
494 const bool is_encrypted =
497 : entry.sinf.info.track_encryption.default_is_protected == 1;
498 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
500 track->header.track_id, timescale, duration, codec,
502 codec_config.data(), codec_config.size(), entry.samplesize,
503 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
504 max_bitrate, avg_bitrate, track->media.header.language.code,
508 if (samp_descr.type == kVideo) {
509 RCHECK(!samp_descr.video_entries.empty());
510 if (desc_idx >= samp_descr.video_entries.size())
514 uint32_t coded_width = entry.width;
515 uint32_t coded_height = entry.height;
516 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
517 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
518 if (pixel_width == 0 && pixel_height == 0) {
522 std::string codec_string;
523 uint8_t nalu_length_size = 0;
525 const FourCC actual_format = entry.GetActualFormat();
526 const Codec video_codec = FourCCToCodec(actual_format);
527 switch (actual_format) {
531 if (!avc_config.
Parse(entry.codec_configuration.data)) {
532 LOG(ERROR) <<
"Failed to parse avcc.";
538 if (coded_width != avc_config.coded_width() ||
539 coded_height != avc_config.coded_height()) {
540 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
541 <<
"," << coded_height
542 <<
") does not match with resolution in " 543 "AVCDecoderConfigurationRecord (" 544 << avc_config.coded_width() <<
"," 545 << avc_config.coded_height()
546 <<
"). Use AVCDecoderConfigurationRecord.";
547 coded_width = avc_config.coded_width();
548 coded_height = avc_config.coded_height();
551 if (pixel_width != avc_config.pixel_width() ||
552 pixel_height != avc_config.pixel_height()) {
553 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
554 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
"," 556 <<
") does not match with SAR in AVCDecoderConfigurationRecord " 558 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
559 <<
"). Use AVCDecoderConfigurationRecord.";
560 pixel_width = avc_config.pixel_width();
561 pixel_height = avc_config.pixel_height();
568 if (!hevc_config.
Parse(entry.codec_configuration.data)) {
569 LOG(ERROR) <<
"Failed to parse hevc.";
580 if (!vp_config.
ParseMP4(entry.codec_configuration.data)) {
581 LOG(ERROR) <<
"Failed to parse vpcc.";
588 LOG(ERROR) <<
"Unsupported video format " 589 << FourCCToString(actual_format) <<
" in stsd box.";
594 const bool is_encrypted =
597 : entry.sinf.info.track_encryption.default_is_protected == 1;
598 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
599 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
600 track->header.track_id, timescale, duration, video_codec,
601 GetH26xStreamFormat(actual_format), codec_string,
602 entry.codec_configuration.data.data(),
603 entry.codec_configuration.data.size(), coded_width, coded_height,
604 pixel_width, pixel_height,
606 nalu_length_size, track->media.header.language.code, is_encrypted));
609 if (moov_->pssh.size() > 0) {
610 std::vector<uint8_t> pssh_raw_data;
611 for (
const auto& pssh : moov_->pssh) {
612 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
615 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
616 pssh_raw_data.size());
619 streams.push_back(video_stream_info);
623 init_cb_.Run(streams);
624 if (!FetchKeysIfNecessary(moov_->pssh))
627 RCHECK(runs_->Init());
628 ChangeState(kEmittingSamples);
632 bool MP4MediaParser::ParseMoof(
BoxReader* reader) {
636 RCHECK(moof.Parse(reader));
639 RCHECK(runs_->Init(moof));
640 if (!FetchKeysIfNecessary(moof.pssh))
642 ChangeState(kEmittingSamples);
646 bool MP4MediaParser::FetchKeysIfNecessary(
647 const std::vector<ProtectionSystemSpecificHeader>& headers) {
652 if (!decryption_key_source_)
655 std::vector<uint8_t> pssh_raw_data;
656 for (
const auto& header : headers) {
657 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
658 header.raw_box.end());
661 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
663 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
669 bool MP4MediaParser::EnqueueSample(
bool* err) {
670 if (!runs_->IsRunValid()) {
673 if (!queue_.Trim(mdat_tail_))
676 ChangeState(kParsingBoxes);
680 if (!runs_->IsSampleValid()) {
689 queue_.Peek(&buf, &buf_size);
694 if (!runs_->is_audio() && !runs_->is_video())
704 if (runs_->AuxInfoNeedsToBeCached()) {
705 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
706 if (buf_size < runs_->aux_info_size())
708 *err = !runs_->CacheAuxInfo(buf, buf_size);
712 int64_t sample_offset = runs_->sample_offset() + moof_head_;
713 queue_.PeekAt(sample_offset, &buf, &buf_size);
714 if (buf_size < runs_->sample_size()) {
715 if (sample_offset < queue_.head()) {
716 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
717 <<
" < " << queue_.head();
723 const uint8_t* media_data = buf;
724 const size_t media_data_size = runs_->sample_size();
727 const size_t kDummyDataSize = 0;
728 std::shared_ptr<MediaSample> stream_sample(
731 if (runs_->is_encrypted()) {
732 std::shared_ptr<uint8_t> decrypted_media_data(
733 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
734 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
735 if (!decrypt_config) {
737 LOG(ERROR) <<
"Missing decrypt config.";
741 if (!decryptor_source_) {
742 stream_sample->SetData(media_data, media_data_size);
745 stream_sample->set_decrypt_config(std::move(decrypt_config));
746 stream_sample->set_is_encrypted(
true);
748 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
749 media_data, media_data_size,
750 decrypted_media_data.get())) {
752 LOG(ERROR) <<
"Cannot decrypt samples.";
755 stream_sample->TransferData(std::move(decrypted_media_data),
759 stream_sample->SetData(media_data, media_data_size);
762 stream_sample->set_dts(runs_->dts());
763 stream_sample->set_pts(runs_->cts());
764 stream_sample->set_duration(runs_->duration());
766 DVLOG(3) <<
"Pushing frame: " 767 <<
", key=" << runs_->is_keyframe()
768 <<
", dur=" << runs_->duration()
769 <<
", dts=" << runs_->dts()
770 <<
", cts=" << runs_->cts()
771 <<
", size=" << runs_->sample_size();
773 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
775 LOG(ERROR) <<
"Failed to process the sample.";
779 runs_->AdvanceSample();
783 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
785 while (mdat_tail_ < offset) {
788 queue_.PeekAt(mdat_tail_, &buf, &size);
795 mdat_tail_ += box_sz;
797 queue_.Trim(std::min(mdat_tail_, offset));
801 void MP4MediaParser::ChangeState(State new_state) {
802 DVLOG(2) <<
"Changing state: " << new_state;
All the methods that are virtual are virtual for mocking.
static File * OpenWithNoBuffering(const char *file_name, const char *mode)