5 #include "packager/media/formats/mp4/mp4_media_parser.h" 10 #include "packager/base/callback.h" 11 #include "packager/base/callback_helpers.h" 12 #include "packager/base/logging.h" 13 #include "packager/base/strings/string_number_conversions.h" 14 #include "packager/file/file.h" 15 #include "packager/file/file_closer.h" 16 #include "packager/media/base/audio_stream_info.h" 17 #include "packager/media/base/buffer_reader.h" 18 #include "packager/media/base/decrypt_config.h" 19 #include "packager/media/base/key_source.h" 20 #include "packager/media/base/macros.h" 21 #include "packager/media/base/media_sample.h" 22 #include "packager/media/base/rcheck.h" 23 #include "packager/media/base/video_stream_info.h" 24 #include "packager/media/codecs/ac3_audio_util.h" 25 #include "packager/media/codecs/av1_codec_configuration_record.h" 26 #include "packager/media/codecs/avc_decoder_configuration_record.h" 27 #include "packager/media/codecs/ec3_audio_util.h" 28 #include "packager/media/codecs/es_descriptor.h" 29 #include "packager/media/codecs/hevc_decoder_configuration_record.h" 30 #include "packager/media/codecs/vp_codec_configuration_record.h" 31 #include "packager/media/formats/mp4/box_definitions.h" 32 #include "packager/media/formats/mp4/box_reader.h" 33 #include "packager/media/formats/mp4/track_run_iterator.h" 40 uint64_t Rescale(uint64_t time_in_old_scale,
43 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
46 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
49 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
51 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
53 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
55 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
57 return H26xStreamFormat::kUnSpecified;
61 Codec FourCCToCodec(FourCC fourcc) {
100 Codec ObjectTypeToCodec(ObjectType object_type) {
101 switch (object_type) {
102 case ObjectType::kISO_14496_3:
103 case ObjectType::kISO_13818_7_AAC_LC:
105 case ObjectType::kDTSC:
107 case ObjectType::kDTSE:
109 case ObjectType::kDTSH:
111 case ObjectType::kDTSL:
114 return kUnknownCodec;
118 const uint64_t kNanosecondsPerSecond = 1000000000ull;
122 MP4MediaParser::MP4MediaParser()
123 : state_(kWaitingForInit),
124 decryption_key_source_(NULL),
128 MP4MediaParser::~MP4MediaParser() {}
131 const NewSampleCB& new_sample_cb,
133 DCHECK_EQ(state_, kWaitingForInit);
134 DCHECK(init_cb_.is_null());
135 DCHECK(!init_cb.is_null());
136 DCHECK(!new_sample_cb.is_null());
138 ChangeState(kParsingBoxes);
140 new_sample_cb_ = new_sample_cb;
141 decryption_key_source_ = decryption_key_source;
142 if (decryption_key_source)
146 void MP4MediaParser::Reset() {
154 DCHECK_NE(state_, kWaitingForInit);
156 ChangeState(kParsingBoxes);
161 DCHECK_NE(state_, kWaitingForInit);
163 if (state_ == kError)
166 queue_.Push(buf, size);
168 bool result, err =
false;
171 if (state_ == kParsingBoxes) {
172 result = ParseBox(&err);
174 DCHECK_EQ(kEmittingSamples, state_);
175 result = EnqueueSample(&err);
177 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
178 err = !ReadAndDiscardMDATsUntil(max_clear);
181 }
while (result && !err);
184 DLOG(ERROR) <<
"Error while parsing MP4";
195 std::unique_ptr<File, FileCloser> file(
198 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
201 if (!file->Seek(0)) {
202 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
207 uint64_t file_position(0);
208 bool mdat_seen(
false);
210 const uint32_t kBoxHeaderReadSize(16);
211 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
212 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
213 if (bytes_read == 0) {
214 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
217 if (bytes_read < kBoxHeaderReadSize) {
218 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
226 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
229 if (box_type == FOURCC_mdat) {
231 }
else if (box_type == FOURCC_moov) {
237 if (!Parse(&buffer[0], bytes_read)) {
238 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
241 uint64_t bytes_to_read = box_size - bytes_read;
242 buffer.resize(bytes_to_read);
243 while (bytes_to_read > 0) {
244 bytes_read = file->Read(&buffer[0], bytes_to_read);
245 if (bytes_read <= 0) {
246 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
250 if (!Parse(&buffer[0], bytes_read)) {
251 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
254 bytes_to_read -= bytes_read;
260 file_position += box_size;
261 if (!file->Seek(file_position)) {
262 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
269 bool MP4MediaParser::ParseBox(
bool* err) {
272 queue_.Peek(&buf, &size);
277 if (reader.get() == NULL)
280 if (reader->type() == FOURCC_mdat) {
286 NOTIMPLEMENTED() <<
" Non-seekable Files with 'mdat' box before 'moov' " 287 "box is not supported.";
294 <<
"Ignore unused 'mdat' box - this could be as a result of extra " 295 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
300 mdat_tail_ = queue_.head() + reader->size();
302 if (reader->type() == FOURCC_moov) {
303 *err = !ParseMoov(reader.get());
304 }
else if (reader->type() == FOURCC_moof) {
305 moof_head_ = queue_.head();
306 *err = !ParseMoof(reader.get());
314 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
317 queue_.Pop(static_cast<int>(reader->size()));
321 bool MP4MediaParser::ParseMoov(
BoxReader* reader) {
325 moov_.reset(
new Movie);
326 RCHECK(moov_->Parse(reader));
329 std::vector<std::shared_ptr<StreamInfo>> streams;
331 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
332 track != moov_->tracks.end(); ++track) {
333 const uint32_t timescale = track->media.header.timescale;
336 uint64_t duration = 0;
337 if (track->media.header.duration > 0) {
338 duration = track->media.header.duration;
339 }
else if (moov_->extends.header.fragment_duration > 0) {
340 DCHECK(moov_->header.timescale != 0);
341 duration = Rescale(moov_->extends.header.fragment_duration,
342 moov_->header.timescale,
344 }
else if (moov_->header.duration > 0 &&
345 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
346 DCHECK(moov_->header.timescale != 0);
348 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
352 track->media.information.sample_table.description;
358 if (moov_->extends.tracks.size() > 0) {
359 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
361 if (trex.track_id == track->header.track_id) {
362 desc_idx = trex.default_sample_description_index;
367 const std::vector<ChunkInfo>& chunk_info =
368 track->media.information.sample_table.sample_to_chunk.chunk_info;
369 RCHECK(chunk_info.size() > 0);
370 desc_idx = chunk_info[0].sample_description_index;
372 RCHECK(desc_idx > 0);
375 if (samp_descr.type == kAudio) {
376 RCHECK(!samp_descr.audio_entries.empty());
380 if (desc_idx >= samp_descr.audio_entries.size())
384 const FourCC actual_format = entry.GetActualFormat();
385 Codec codec = FourCCToCodec(actual_format);
386 uint8_t num_channels = entry.channelcount;
387 uint32_t sampling_frequency = entry.samplerate;
388 uint64_t codec_delay_ns = 0;
389 uint8_t audio_object_type = 0;
390 uint32_t max_bitrate = 0;
391 uint32_t avg_bitrate = 0;
392 std::vector<uint8_t> codec_config;
394 switch (actual_format) {
396 max_bitrate = entry.esds.es_descriptor.max_bitrate();
397 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
399 codec = ObjectTypeToCodec(entry.esds.es_descriptor.object_type());
400 if (codec == kCodecAAC) {
402 entry.esds.aac_audio_specific_config;
407 codec_config = entry.esds.es_descriptor.decoder_specific_info();
408 }
else if (codec == kUnknownCodec) {
413 LOG(WARNING) <<
"Unsupported audio object type " 415 entry.esds.es_descriptor.object_type())
416 <<
" in stsd.es_desriptor.";
420 FALLTHROUGH_INTENDED;
422 FALLTHROUGH_INTENDED;
424 FALLTHROUGH_INTENDED;
426 FALLTHROUGH_INTENDED;
428 codec_config = entry.ddts.extra_data;
429 max_bitrate = entry.ddts.max_bitrate;
430 avg_bitrate = entry.ddts.avg_bitrate;
433 codec_config = entry.dac3.data;
434 num_channels =
static_cast<uint8_t
>(GetAc3NumChannels(codec_config));
437 codec_config = entry.dec3.data;
438 num_channels =
static_cast<uint8_t
>(GetEc3NumChannels(codec_config));
441 codec_config = entry.dfla.data;
444 codec_config = entry.dops.opus_identification_header;
446 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
454 LOG(WARNING) <<
"Unsupported audio format '" 455 << FourCCToString(actual_format) <<
"' in stsd box.";
460 uint64_t seek_preroll_ns = 0;
461 for (
const auto& sample_group_description :
462 track->media.information.sample_table.sample_group_descriptions) {
463 if (sample_group_description.grouping_type != FOURCC_roll)
465 const auto& audio_roll_recovery_entries =
466 sample_group_description.audio_roll_recovery_entries;
467 if (audio_roll_recovery_entries.size() != 1) {
468 LOG(WARNING) <<
"Unexpected number of entries in " 469 "SampleGroupDescription table with grouping type " 473 const int16_t roll_distance_in_samples =
474 audio_roll_recovery_entries[0].roll_distance;
475 if (roll_distance_in_samples < 0) {
476 RCHECK(sampling_frequency != 0);
477 seek_preroll_ns = kNanosecondsPerSecond *
478 (-roll_distance_in_samples) / sampling_frequency;
481 <<
"Roll distance is supposed to be negative, but seeing " 482 << roll_distance_in_samples;
488 const bool is_encrypted =
491 : entry.sinf.info.track_encryption.default_is_protected == 1;
492 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
494 track->header.track_id, timescale, duration, codec,
496 codec_config.data(), codec_config.size(), entry.samplesize,
497 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
498 max_bitrate, avg_bitrate, track->media.header.language.code,
502 if (samp_descr.type == kVideo) {
503 RCHECK(!samp_descr.video_entries.empty());
504 if (desc_idx >= samp_descr.video_entries.size())
507 std::vector<uint8_t> codec_configuration_data =
508 entry.codec_configuration.data;
510 uint32_t coded_width = entry.width;
511 uint32_t coded_height = entry.height;
512 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
513 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
514 if (pixel_width == 0 && pixel_height == 0) {
518 std::string codec_string;
519 uint8_t nalu_length_size = 0;
521 const FourCC actual_format = entry.GetActualFormat();
522 const Codec video_codec = FourCCToCodec(actual_format);
523 switch (actual_format) {
526 if (!av1_config.
Parse(codec_configuration_data)) {
527 LOG(ERROR) <<
"Failed to parse av1c.";
536 if (!avc_config.
Parse(codec_configuration_data)) {
537 LOG(ERROR) <<
"Failed to parse avcc.";
543 if (coded_width != avc_config.coded_width() ||
544 coded_height != avc_config.coded_height()) {
545 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
546 <<
"," << coded_height
547 <<
") does not match with resolution in " 548 "AVCDecoderConfigurationRecord (" 549 << avc_config.coded_width() <<
"," 550 << avc_config.coded_height()
551 <<
"). Use AVCDecoderConfigurationRecord.";
552 coded_width = avc_config.coded_width();
553 coded_height = avc_config.coded_height();
556 if (pixel_width != avc_config.pixel_width() ||
557 pixel_height != avc_config.pixel_height()) {
558 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
559 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
"," 561 <<
") does not match with SAR in AVCDecoderConfigurationRecord " 563 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
564 <<
"). Use AVCDecoderConfigurationRecord.";
565 pixel_width = avc_config.pixel_width();
566 pixel_height = avc_config.pixel_height();
573 if (!hevc_config.
Parse(codec_configuration_data)) {
574 LOG(ERROR) <<
"Failed to parse hevc.";
584 if (!vp_config.
ParseMP4(codec_configuration_data)) {
585 LOG(ERROR) <<
"Failed to parse vpcc.";
588 if (actual_format == FOURCC_vp09 &&
589 (!vp_config.is_level_set() || vp_config.level() == 0)) {
590 const double kUnknownSampleDuration = 0.0;
592 kUnknownSampleDuration);
593 vp_config.
WriteMP4(&codec_configuration_data);
604 LOG(WARNING) <<
"Unsupported video format '" 605 << FourCCToString(actual_format) <<
"' in stsd box.";
610 const bool is_encrypted =
613 : entry.sinf.info.track_encryption.default_is_protected == 1;
614 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
615 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
616 track->header.track_id, timescale, duration, video_codec,
617 GetH26xStreamFormat(actual_format), codec_string,
618 codec_configuration_data.data(), codec_configuration_data.size(),
619 coded_width, coded_height, pixel_width, pixel_height,
621 nalu_length_size, track->media.header.language.code, is_encrypted));
624 if (moov_->pssh.size() > 0) {
625 std::vector<uint8_t> pssh_raw_data;
626 for (
const auto& pssh : moov_->pssh) {
627 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
630 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
631 pssh_raw_data.size());
634 streams.push_back(video_stream_info);
638 init_cb_.Run(streams);
639 if (!FetchKeysIfNecessary(moov_->pssh))
642 RCHECK(runs_->Init());
643 ChangeState(kEmittingSamples);
647 bool MP4MediaParser::ParseMoof(
BoxReader* reader) {
651 RCHECK(moof.Parse(reader));
654 RCHECK(runs_->Init(moof));
655 if (!FetchKeysIfNecessary(moof.pssh))
657 ChangeState(kEmittingSamples);
661 bool MP4MediaParser::FetchKeysIfNecessary(
662 const std::vector<ProtectionSystemSpecificHeader>& headers) {
667 if (!decryption_key_source_)
670 std::vector<uint8_t> pssh_raw_data;
671 for (
const auto& header : headers) {
672 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
673 header.raw_box.end());
676 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
678 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
684 bool MP4MediaParser::EnqueueSample(
bool* err) {
685 if (!runs_->IsRunValid()) {
688 if (!queue_.Trim(mdat_tail_))
691 ChangeState(kParsingBoxes);
695 if (!runs_->IsSampleValid()) {
704 queue_.Peek(&buf, &buf_size);
709 if (!runs_->is_audio() && !runs_->is_video())
719 if (runs_->AuxInfoNeedsToBeCached()) {
720 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
721 if (buf_size < runs_->aux_info_size())
723 *err = !runs_->CacheAuxInfo(buf, buf_size);
727 int64_t sample_offset = runs_->sample_offset() + moof_head_;
728 queue_.PeekAt(sample_offset, &buf, &buf_size);
729 if (buf_size < runs_->sample_size()) {
730 if (sample_offset < queue_.head()) {
731 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
732 <<
" < " << queue_.head();
738 const uint8_t* media_data = buf;
739 const size_t media_data_size = runs_->sample_size();
742 const size_t kDummyDataSize = 0;
743 std::shared_ptr<MediaSample> stream_sample(
746 if (runs_->is_encrypted()) {
747 std::shared_ptr<uint8_t> decrypted_media_data(
748 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
749 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
750 if (!decrypt_config) {
752 LOG(ERROR) <<
"Missing decrypt config.";
756 if (!decryptor_source_) {
757 stream_sample->SetData(media_data, media_data_size);
760 stream_sample->set_decrypt_config(std::move(decrypt_config));
761 stream_sample->set_is_encrypted(
true);
763 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
764 media_data, media_data_size,
765 decrypted_media_data.get())) {
767 LOG(ERROR) <<
"Cannot decrypt samples.";
770 stream_sample->TransferData(std::move(decrypted_media_data),
774 stream_sample->SetData(media_data, media_data_size);
777 stream_sample->set_dts(runs_->dts());
778 stream_sample->set_pts(runs_->cts());
779 stream_sample->set_duration(runs_->duration());
781 DVLOG(3) <<
"Pushing frame: " 782 <<
", key=" << runs_->is_keyframe()
783 <<
", dur=" << runs_->duration()
784 <<
", dts=" << runs_->dts()
785 <<
", cts=" << runs_->cts()
786 <<
", size=" << runs_->sample_size();
788 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
790 LOG(ERROR) <<
"Failed to process the sample.";
794 runs_->AdvanceSample();
798 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
800 while (mdat_tail_ < offset) {
803 queue_.PeekAt(mdat_tail_, &buf, &size);
810 mdat_tail_ += box_sz;
812 queue_.Trim(std::min(mdat_tail_, offset));
816 void MP4MediaParser::ChangeState(State new_state) {
817 DVLOG(2) <<
"Changing state: " << new_state;
All the methods that are virtual are virtual for mocking.
static File * OpenWithNoBuffering(const char *file_name, const char *mode)