5 #include "packager/media/formats/mp4/mp4_media_parser.h" 9 #include "packager/base/callback.h" 10 #include "packager/base/callback_helpers.h" 11 #include "packager/base/logging.h" 12 #include "packager/base/strings/string_number_conversions.h" 13 #include "packager/file/file.h" 14 #include "packager/file/file_closer.h" 15 #include "packager/media/base/audio_stream_info.h" 16 #include "packager/media/base/buffer_reader.h" 17 #include "packager/media/base/decrypt_config.h" 18 #include "packager/media/base/key_source.h" 19 #include "packager/media/base/macros.h" 20 #include "packager/media/base/media_sample.h" 21 #include "packager/media/base/rcheck.h" 22 #include "packager/media/base/video_stream_info.h" 23 #include "packager/media/base/video_util.h" 24 #include "packager/media/codecs/ac3_audio_util.h" 25 #include "packager/media/codecs/av1_codec_configuration_record.h" 26 #include "packager/media/codecs/avc_decoder_configuration_record.h" 27 #include "packager/media/codecs/dovi_decoder_configuration_record.h" 28 #include "packager/media/codecs/ec3_audio_util.h" 29 #include "packager/media/codecs/es_descriptor.h" 30 #include "packager/media/codecs/hevc_decoder_configuration_record.h" 31 #include "packager/media/codecs/vp_codec_configuration_record.h" 32 #include "packager/media/formats/mp4/box_definitions.h" 33 #include "packager/media/formats/mp4/box_reader.h" 34 #include "packager/media/formats/mp4/track_run_iterator.h" 41 uint64_t Rescale(uint64_t time_in_old_scale,
44 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
47 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
52 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
56 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
58 return H26xStreamFormat::kUnSpecified;
62 Codec FourCCToCodec(FourCC fourcc) {
71 return kCodecH265DolbyVision;
100 return kUnknownCodec;
104 Codec ObjectTypeToCodec(ObjectType object_type) {
105 switch (object_type) {
106 case ObjectType::kISO_14496_3:
107 case ObjectType::kISO_13818_7_AAC_LC:
109 case ObjectType::kDTSC:
111 case ObjectType::kDTSE:
113 case ObjectType::kDTSH:
115 case ObjectType::kDTSL:
118 return kUnknownCodec;
122 std::vector<uint8_t> GetDOVIDecoderConfig(
123 const std::vector<CodecConfiguration>& configs) {
124 for (
const CodecConfiguration& config : configs) {
125 if (config.box_type == FOURCC_dvcC || config.box_type == FOURCC_dvvC) {
129 return std::vector<uint8_t>();
132 bool UpdateCodecStringForDolbyVision(
133 FourCC actual_format,
134 const std::vector<CodecConfiguration>& configs,
135 std::string* codec_string) {
136 DOVIDecoderConfigurationRecord dovi_config;
137 if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
138 LOG(ERROR) <<
"Failed to parse Dolby Vision decoder " 139 "configuration record.";
142 if (actual_format == FOURCC_dvh1 || actual_format == FOURCC_dvhe) {
145 *codec_string = dovi_config.GetCodecString(actual_format);
152 const uint64_t kNanosecondsPerSecond = 1000000000ull;
156 MP4MediaParser::MP4MediaParser()
157 : state_(kWaitingForInit),
158 decryption_key_source_(NULL),
162 MP4MediaParser::~MP4MediaParser() {}
165 const NewSampleCB& new_sample_cb,
167 DCHECK_EQ(state_, kWaitingForInit);
168 DCHECK(init_cb_.is_null());
169 DCHECK(!init_cb.is_null());
170 DCHECK(!new_sample_cb.is_null());
172 ChangeState(kParsingBoxes);
174 new_sample_cb_ = new_sample_cb;
175 decryption_key_source_ = decryption_key_source;
176 if (decryption_key_source)
180 void MP4MediaParser::Reset() {
188 DCHECK_NE(state_, kWaitingForInit);
190 ChangeState(kParsingBoxes);
195 DCHECK_NE(state_, kWaitingForInit);
197 if (state_ == kError)
200 queue_.Push(buf, size);
202 bool result, err =
false;
205 if (state_ == kParsingBoxes) {
206 result = ParseBox(&err);
208 DCHECK_EQ(kEmittingSamples, state_);
209 result = EnqueueSample(&err);
211 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
212 err = !ReadAndDiscardMDATsUntil(max_clear);
215 }
while (result && !err);
218 DLOG(ERROR) <<
"Error while parsing MP4";
229 std::unique_ptr<File, FileCloser> file(
232 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
235 if (!file->Seek(0)) {
236 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
241 uint64_t file_position(0);
242 bool mdat_seen(
false);
244 const uint32_t kBoxHeaderReadSize(16);
245 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
246 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
247 if (bytes_read == 0) {
248 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
251 if (bytes_read < kBoxHeaderReadSize) {
252 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
260 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
263 if (box_type == FOURCC_mdat) {
265 }
else if (box_type == FOURCC_moov) {
271 if (!Parse(&buffer[0], bytes_read)) {
272 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
275 uint64_t bytes_to_read = box_size - bytes_read;
276 buffer.resize(bytes_to_read);
277 while (bytes_to_read > 0) {
278 bytes_read = file->Read(&buffer[0], bytes_to_read);
279 if (bytes_read <= 0) {
280 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
284 if (!Parse(&buffer[0], bytes_read)) {
285 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
288 bytes_to_read -= bytes_read;
294 file_position += box_size;
295 if (!file->Seek(file_position)) {
296 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
303 bool MP4MediaParser::ParseBox(
bool* err) {
306 queue_.Peek(&buf, &size);
311 if (reader.get() == NULL)
314 if (reader->type() == FOURCC_mdat) {
320 NOTIMPLEMENTED() <<
" Non-seekable Files with 'mdat' box before 'moov' " 321 "box is not supported.";
328 <<
"Ignore unused 'mdat' box - this could be as a result of extra " 329 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
334 mdat_tail_ = queue_.head() + reader->size();
336 if (reader->type() == FOURCC_moov) {
337 *err = !ParseMoov(reader.get());
338 }
else if (reader->type() == FOURCC_moof) {
339 moof_head_ = queue_.head();
340 *err = !ParseMoof(reader.get());
348 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
351 queue_.Pop(static_cast<int>(reader->size()));
355 bool MP4MediaParser::ParseMoov(
BoxReader* reader) {
359 moov_.reset(
new Movie);
360 RCHECK(moov_->Parse(reader));
363 std::vector<std::shared_ptr<StreamInfo>> streams;
365 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
366 track != moov_->tracks.end(); ++track) {
367 const uint32_t timescale = track->media.header.timescale;
370 uint64_t duration = 0;
371 if (track->media.header.duration > 0) {
372 duration = track->media.header.duration;
373 }
else if (moov_->extends.header.fragment_duration > 0) {
374 DCHECK(moov_->header.timescale != 0);
375 duration = Rescale(moov_->extends.header.fragment_duration,
376 moov_->header.timescale,
378 }
else if (moov_->header.duration > 0 &&
379 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
380 DCHECK(moov_->header.timescale != 0);
382 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
386 track->media.information.sample_table.description;
392 if (moov_->extends.tracks.size() > 0) {
393 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
395 if (trex.track_id == track->header.track_id) {
396 desc_idx = trex.default_sample_description_index;
401 const std::vector<ChunkInfo>& chunk_info =
402 track->media.information.sample_table.sample_to_chunk.chunk_info;
403 RCHECK(chunk_info.size() > 0);
404 desc_idx = chunk_info[0].sample_description_index;
406 RCHECK(desc_idx > 0);
409 if (samp_descr.type == kAudio) {
410 RCHECK(!samp_descr.audio_entries.empty());
414 if (desc_idx >= samp_descr.audio_entries.size())
418 const FourCC actual_format = entry.GetActualFormat();
419 Codec codec = FourCCToCodec(actual_format);
420 uint8_t num_channels = entry.channelcount;
421 uint32_t sampling_frequency = entry.samplerate;
422 uint64_t codec_delay_ns = 0;
423 uint8_t audio_object_type = 0;
424 uint32_t max_bitrate = 0;
425 uint32_t avg_bitrate = 0;
426 std::vector<uint8_t> codec_config;
428 switch (actual_format) {
431 entry.esds.es_descriptor.decoder_config_descriptor();
432 max_bitrate = decoder_config.max_bitrate();
433 avg_bitrate = decoder_config.avg_bitrate();
435 codec = ObjectTypeToCodec(decoder_config.object_type());
436 if (codec == kCodecAAC) {
438 entry.esds.aac_audio_specific_config;
444 decoder_config.decoder_specific_info_descriptor().data();
445 }
else if (codec == kUnknownCodec) {
450 LOG(WARNING) <<
"Unsupported audio object type " 451 <<
static_cast<int>(decoder_config.object_type())
452 <<
" in stsd.es_desriptor.";
457 FALLTHROUGH_INTENDED;
459 FALLTHROUGH_INTENDED;
461 FALLTHROUGH_INTENDED;
463 FALLTHROUGH_INTENDED;
465 codec_config = entry.ddts.extra_data;
466 max_bitrate = entry.ddts.max_bitrate;
467 avg_bitrate = entry.ddts.avg_bitrate;
470 codec_config = entry.dac3.data;
471 num_channels =
static_cast<uint8_t
>(GetAc3NumChannels(codec_config));
474 codec_config = entry.dec3.data;
475 num_channels =
static_cast<uint8_t
>(GetEc3NumChannels(codec_config));
478 codec_config = entry.dfla.data;
481 codec_config = entry.dops.opus_identification_header;
483 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
491 LOG(WARNING) <<
"Unsupported audio format '" 492 << FourCCToString(actual_format) <<
"' in stsd box.";
497 uint64_t seek_preroll_ns = 0;
498 for (
const auto& sample_group_description :
499 track->media.information.sample_table.sample_group_descriptions) {
500 if (sample_group_description.grouping_type != FOURCC_roll)
502 const auto& audio_roll_recovery_entries =
503 sample_group_description.audio_roll_recovery_entries;
504 if (audio_roll_recovery_entries.size() != 1) {
505 LOG(WARNING) <<
"Unexpected number of entries in " 506 "SampleGroupDescription table with grouping type " 510 const int16_t roll_distance_in_samples =
511 audio_roll_recovery_entries[0].roll_distance;
512 if (roll_distance_in_samples < 0) {
513 RCHECK(sampling_frequency != 0);
514 seek_preroll_ns = kNanosecondsPerSecond *
515 (-roll_distance_in_samples) / sampling_frequency;
518 <<
"Roll distance is supposed to be negative, but seeing " 519 << roll_distance_in_samples;
525 const bool is_encrypted =
528 : entry.sinf.info.track_encryption.default_is_protected == 1;
529 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
531 track->header.track_id, timescale, duration, codec,
533 codec_config.data(), codec_config.size(), entry.samplesize,
534 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
535 max_bitrate, avg_bitrate, track->media.header.language.code,
539 if (samp_descr.type == kVideo) {
540 RCHECK(!samp_descr.video_entries.empty());
541 if (desc_idx >= samp_descr.video_entries.size())
544 std::vector<uint8_t> codec_configuration_data =
545 entry.codec_configuration.data;
547 uint32_t coded_width = entry.width;
548 uint32_t coded_height = entry.height;
549 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
550 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
551 if (pixel_width == 0 && pixel_height == 0) {
552 DerivePixelWidthHeight(coded_width, coded_height, track->header.width,
553 track->header.height, &pixel_width,
556 std::string codec_string;
557 uint8_t nalu_length_size = 0;
558 uint8_t transfer_characteristics = 0;
560 const FourCC actual_format = entry.GetActualFormat();
561 const Codec video_codec = FourCCToCodec(actual_format);
562 switch (actual_format) {
565 if (!av1_config.
Parse(codec_configuration_data)) {
566 LOG(ERROR) <<
"Failed to parse av1c.";
575 if (!avc_config.
Parse(codec_configuration_data)) {
576 LOG(ERROR) <<
"Failed to parse avcc.";
584 if (avc_config.coded_width() != 0) {
585 DCHECK_NE(avc_config.coded_height(), 0u);
586 if (coded_width != avc_config.coded_width() ||
587 coded_height != avc_config.coded_height()) {
588 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
589 <<
"," << coded_height
590 <<
") does not match with resolution in " 591 "AVCDecoderConfigurationRecord (" 592 << avc_config.coded_width() <<
"," 593 << avc_config.coded_height()
594 <<
"). Use AVCDecoderConfigurationRecord.";
595 coded_width = avc_config.coded_width();
596 coded_height = avc_config.coded_height();
599 DCHECK_NE(avc_config.pixel_width(), 0u);
600 DCHECK_NE(avc_config.pixel_height(), 0u);
601 if (pixel_width != avc_config.pixel_width() ||
602 pixel_height != avc_config.pixel_height()) {
603 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
604 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
"," 606 <<
") does not match with SAR in " 607 "AVCDecoderConfigurationRecord " 609 << avc_config.pixel_width() <<
"," 610 << avc_config.pixel_height()
611 <<
"). Use AVCDecoderConfigurationRecord.";
612 pixel_width = avc_config.pixel_width();
613 pixel_height = avc_config.pixel_height();
623 if (!hevc_config.
Parse(codec_configuration_data)) {
624 LOG(ERROR) <<
"Failed to parse hevc.";
631 if (!entry.extra_codec_configs.empty()) {
632 if (!UpdateCodecStringForDolbyVision(
633 actual_format, entry.extra_codec_configs, &codec_string)) {
642 if (!vp_config.
ParseMP4(codec_configuration_data)) {
643 LOG(ERROR) <<
"Failed to parse vpcc.";
646 if (actual_format == FOURCC_vp09 &&
647 (!vp_config.is_level_set() || vp_config.level() == 0)) {
648 const double kUnknownSampleDuration = 0.0;
650 kUnknownSampleDuration);
651 vp_config.
WriteMP4(&codec_configuration_data);
662 LOG(WARNING) <<
"Unsupported video format '" 663 << FourCCToString(actual_format) <<
"' in stsd box.";
668 const bool is_encrypted =
671 : entry.sinf.info.track_encryption.default_is_protected == 1;
672 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
673 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
674 track->header.track_id, timescale, duration, video_codec,
675 GetH26xStreamFormat(actual_format), codec_string,
676 codec_configuration_data.data(), codec_configuration_data.size(),
677 coded_width, coded_height, pixel_width, pixel_height,
678 transfer_characteristics,
680 nalu_length_size, track->media.header.language.code, is_encrypted));
681 video_stream_info->set_extra_config(entry.ExtraCodecConfigsAsVector());
684 if (moov_->pssh.size() > 0) {
685 std::vector<uint8_t> pssh_raw_data;
686 for (
const auto& pssh : moov_->pssh) {
687 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
690 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
691 pssh_raw_data.size());
694 streams.push_back(video_stream_info);
698 init_cb_.Run(streams);
699 if (!FetchKeysIfNecessary(moov_->pssh))
702 RCHECK(runs_->Init());
703 ChangeState(kEmittingSamples);
707 bool MP4MediaParser::ParseMoof(
BoxReader* reader) {
711 RCHECK(moof.Parse(reader));
714 RCHECK(runs_->Init(moof));
715 if (!FetchKeysIfNecessary(moof.pssh))
717 ChangeState(kEmittingSamples);
721 bool MP4MediaParser::FetchKeysIfNecessary(
722 const std::vector<ProtectionSystemSpecificHeader>& headers) {
727 if (!decryption_key_source_)
730 std::vector<uint8_t> pssh_raw_data;
731 for (
const auto& header : headers) {
732 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
733 header.raw_box.end());
736 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
738 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
744 bool MP4MediaParser::EnqueueSample(
bool* err) {
745 if (!runs_->IsRunValid()) {
748 if (!queue_.Trim(mdat_tail_))
751 ChangeState(kParsingBoxes);
755 if (!runs_->IsSampleValid()) {
764 queue_.Peek(&buf, &buf_size);
769 if (!runs_->is_audio() && !runs_->is_video())
779 if (runs_->AuxInfoNeedsToBeCached()) {
780 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
781 if (buf_size < runs_->aux_info_size())
783 *err = !runs_->CacheAuxInfo(buf, buf_size);
787 int64_t sample_offset = runs_->sample_offset() + moof_head_;
788 queue_.PeekAt(sample_offset, &buf, &buf_size);
789 if (buf_size < runs_->sample_size()) {
790 if (sample_offset < queue_.head()) {
791 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
792 <<
" < " << queue_.head();
798 const uint8_t* media_data = buf;
799 const size_t media_data_size = runs_->sample_size();
802 const size_t kDummyDataSize = 0;
803 std::shared_ptr<MediaSample> stream_sample(
806 if (runs_->is_encrypted()) {
807 std::shared_ptr<uint8_t> decrypted_media_data(
808 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
809 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
810 if (!decrypt_config) {
812 LOG(ERROR) <<
"Missing decrypt config.";
816 if (!decryptor_source_) {
817 stream_sample->SetData(media_data, media_data_size);
820 stream_sample->set_decrypt_config(std::move(decrypt_config));
821 stream_sample->set_is_encrypted(
true);
823 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
824 media_data, media_data_size,
825 decrypted_media_data.get())) {
827 LOG(ERROR) <<
"Cannot decrypt samples.";
830 stream_sample->TransferData(std::move(decrypted_media_data),
834 stream_sample->SetData(media_data, media_data_size);
837 stream_sample->set_dts(runs_->dts());
838 stream_sample->set_pts(runs_->cts());
839 stream_sample->set_duration(runs_->duration());
841 DVLOG(3) <<
"Pushing frame: " 842 <<
", key=" << runs_->is_keyframe()
843 <<
", dur=" << runs_->duration()
844 <<
", dts=" << runs_->dts()
845 <<
", cts=" << runs_->cts()
846 <<
", size=" << runs_->sample_size();
848 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
850 LOG(ERROR) <<
"Failed to process the sample.";
854 runs_->AdvanceSample();
858 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
860 while (mdat_tail_ < offset) {
863 queue_.PeekAt(mdat_tail_, &buf, &size);
870 mdat_tail_ += box_sz;
872 queue_.Trim(std::min(mdat_tail_, offset));
876 void MP4MediaParser::ChangeState(State new_state) {
877 DVLOG(2) <<
"Changing state: " << new_state;
All the methods that are virtual are virtual for mocking.
static File * OpenWithNoBuffering(const char *file_name, const char *mode)