5 #include "packager/media/formats/mp4/mp4_media_parser.h" 10 #include "packager/base/callback.h" 11 #include "packager/base/callback_helpers.h" 12 #include "packager/base/logging.h" 13 #include "packager/base/strings/string_number_conversions.h" 14 #include "packager/file/file.h" 15 #include "packager/file/file_closer.h" 16 #include "packager/media/base/audio_stream_info.h" 17 #include "packager/media/base/buffer_reader.h" 18 #include "packager/media/base/decrypt_config.h" 19 #include "packager/media/base/key_source.h" 20 #include "packager/media/base/macros.h" 21 #include "packager/media/base/media_sample.h" 22 #include "packager/media/base/rcheck.h" 23 #include "packager/media/base/video_stream_info.h" 24 #include "packager/media/codecs/ac3_audio_util.h" 25 #include "packager/media/codecs/avc_decoder_configuration_record.h" 26 #include "packager/media/codecs/ec3_audio_util.h" 27 #include "packager/media/codecs/es_descriptor.h" 28 #include "packager/media/codecs/hevc_decoder_configuration_record.h" 29 #include "packager/media/codecs/vp_codec_configuration_record.h" 30 #include "packager/media/formats/mp4/box_definitions.h" 31 #include "packager/media/formats/mp4/box_reader.h" 32 #include "packager/media/formats/mp4/track_run_iterator.h" 39 uint64_t Rescale(uint64_t time_in_old_scale,
42 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
45 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
48 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
50 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
52 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
54 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
56 return H26xStreamFormat::kUnSpecified;
60 Codec FourCCToCodec(FourCC fourcc) {
99 Codec ObjectTypeToCodec(ObjectType object_type) {
100 switch (object_type) {
101 case ObjectType::kISO_14496_3:
102 case ObjectType::kISO_13818_7_AAC_LC:
104 case ObjectType::kDTSC:
106 case ObjectType::kDTSE:
108 case ObjectType::kDTSH:
110 case ObjectType::kDTSL:
113 return kUnknownCodec;
117 const uint64_t kNanosecondsPerSecond = 1000000000ull;
121 MP4MediaParser::MP4MediaParser()
122 : state_(kWaitingForInit),
123 decryption_key_source_(NULL),
127 MP4MediaParser::~MP4MediaParser() {}
130 const NewSampleCB& new_sample_cb,
132 DCHECK_EQ(state_, kWaitingForInit);
133 DCHECK(init_cb_.is_null());
134 DCHECK(!init_cb.is_null());
135 DCHECK(!new_sample_cb.is_null());
137 ChangeState(kParsingBoxes);
139 new_sample_cb_ = new_sample_cb;
140 decryption_key_source_ = decryption_key_source;
141 if (decryption_key_source)
145 void MP4MediaParser::Reset() {
153 DCHECK_NE(state_, kWaitingForInit);
155 ChangeState(kParsingBoxes);
160 DCHECK_NE(state_, kWaitingForInit);
162 if (state_ == kError)
165 queue_.Push(buf, size);
167 bool result, err =
false;
170 if (state_ == kParsingBoxes) {
171 result = ParseBox(&err);
173 DCHECK_EQ(kEmittingSamples, state_);
174 result = EnqueueSample(&err);
176 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
177 err = !ReadAndDiscardMDATsUntil(max_clear);
180 }
while (result && !err);
183 DLOG(ERROR) <<
"Error while parsing MP4";
194 std::unique_ptr<File, FileCloser> file(
197 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
200 if (!file->Seek(0)) {
201 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
206 uint64_t file_position(0);
207 bool mdat_seen(
false);
209 const uint32_t kBoxHeaderReadSize(16);
210 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
211 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
212 if (bytes_read == 0) {
213 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
216 if (bytes_read < kBoxHeaderReadSize) {
217 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
225 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
228 if (box_type == FOURCC_mdat) {
230 }
else if (box_type == FOURCC_moov) {
236 if (!Parse(&buffer[0], bytes_read)) {
237 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
240 uint64_t bytes_to_read = box_size - bytes_read;
241 buffer.resize(bytes_to_read);
242 while (bytes_to_read > 0) {
243 bytes_read = file->Read(&buffer[0], bytes_to_read);
244 if (bytes_read <= 0) {
245 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
249 if (!Parse(&buffer[0], bytes_read)) {
250 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
253 bytes_to_read -= bytes_read;
259 file_position += box_size;
260 if (!file->Seek(file_position)) {
261 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
268 bool MP4MediaParser::ParseBox(
bool* err) {
271 queue_.Peek(&buf, &size);
276 if (reader.get() == NULL)
279 if (reader->type() == FOURCC_mdat) {
285 NOTIMPLEMENTED() <<
" Non-seekable Files with 'mdat' box before 'moov' " 286 "box is not supported.";
293 <<
"Ignore unused 'mdat' box - this could be as a result of extra " 294 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
299 mdat_tail_ = queue_.head() + reader->size();
301 if (reader->type() == FOURCC_moov) {
302 *err = !ParseMoov(reader.get());
303 }
else if (reader->type() == FOURCC_moof) {
304 moof_head_ = queue_.head();
305 *err = !ParseMoof(reader.get());
313 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
316 queue_.Pop(static_cast<int>(reader->size()));
320 bool MP4MediaParser::ParseMoov(
BoxReader* reader) {
324 moov_.reset(
new Movie);
325 RCHECK(moov_->Parse(reader));
328 std::vector<std::shared_ptr<StreamInfo>> streams;
330 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
331 track != moov_->tracks.end(); ++track) {
332 const uint32_t timescale = track->media.header.timescale;
335 uint64_t duration = 0;
336 if (track->media.header.duration > 0) {
337 duration = track->media.header.duration;
338 }
else if (moov_->extends.header.fragment_duration > 0) {
339 DCHECK(moov_->header.timescale != 0);
340 duration = Rescale(moov_->extends.header.fragment_duration,
341 moov_->header.timescale,
343 }
else if (moov_->header.duration > 0 &&
344 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
345 DCHECK(moov_->header.timescale != 0);
347 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
351 track->media.information.sample_table.description;
357 if (moov_->extends.tracks.size() > 0) {
358 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
360 if (trex.track_id == track->header.track_id) {
361 desc_idx = trex.default_sample_description_index;
366 const std::vector<ChunkInfo>& chunk_info =
367 track->media.information.sample_table.sample_to_chunk.chunk_info;
368 RCHECK(chunk_info.size() > 0);
369 desc_idx = chunk_info[0].sample_description_index;
371 RCHECK(desc_idx > 0);
374 if (samp_descr.type == kAudio) {
375 RCHECK(!samp_descr.audio_entries.empty());
379 if (desc_idx >= samp_descr.audio_entries.size())
383 const FourCC actual_format = entry.GetActualFormat();
384 Codec codec = FourCCToCodec(actual_format);
385 uint8_t num_channels = entry.channelcount;
386 uint32_t sampling_frequency = entry.samplerate;
387 uint64_t codec_delay_ns = 0;
388 uint8_t audio_object_type = 0;
389 uint32_t max_bitrate = 0;
390 uint32_t avg_bitrate = 0;
391 std::vector<uint8_t> codec_config;
393 switch (actual_format) {
395 max_bitrate = entry.esds.es_descriptor.max_bitrate();
396 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
398 codec = ObjectTypeToCodec(entry.esds.es_descriptor.object_type());
399 if (codec == kCodecAAC) {
401 entry.esds.aac_audio_specific_config;
406 codec_config = entry.esds.es_descriptor.decoder_specific_info();
407 }
else if (codec == kUnknownCodec) {
412 LOG(WARNING) <<
"Unsupported audio object type " 414 entry.esds.es_descriptor.object_type())
415 <<
" in stsd.es_desriptor.";
419 FALLTHROUGH_INTENDED;
421 FALLTHROUGH_INTENDED;
423 FALLTHROUGH_INTENDED;
425 FALLTHROUGH_INTENDED;
427 codec_config = entry.ddts.extra_data;
428 max_bitrate = entry.ddts.max_bitrate;
429 avg_bitrate = entry.ddts.avg_bitrate;
432 codec_config = entry.dac3.data;
433 num_channels =
static_cast<uint8_t
>(GetAc3NumChannels(codec_config));
436 codec_config = entry.dec3.data;
437 num_channels =
static_cast<uint8_t
>(GetEc3NumChannels(codec_config));
440 codec_config = entry.dfla.data;
443 codec_config = entry.dops.opus_identification_header;
445 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
453 LOG(WARNING) <<
"Unsupported audio format '" 454 << FourCCToString(actual_format) <<
"' in stsd box.";
459 uint64_t seek_preroll_ns = 0;
460 for (
const auto& sample_group_description :
461 track->media.information.sample_table.sample_group_descriptions) {
462 if (sample_group_description.grouping_type != FOURCC_roll)
464 const auto& audio_roll_recovery_entries =
465 sample_group_description.audio_roll_recovery_entries;
466 if (audio_roll_recovery_entries.size() != 1) {
467 LOG(WARNING) <<
"Unexpected number of entries in " 468 "SampleGroupDescription table with grouping type " 472 const int16_t roll_distance_in_samples =
473 audio_roll_recovery_entries[0].roll_distance;
474 if (roll_distance_in_samples < 0) {
475 RCHECK(sampling_frequency != 0);
476 seek_preroll_ns = kNanosecondsPerSecond *
477 (-roll_distance_in_samples) / sampling_frequency;
480 <<
"Roll distance is supposed to be negative, but seeing " 481 << roll_distance_in_samples;
487 const bool is_encrypted =
490 : entry.sinf.info.track_encryption.default_is_protected == 1;
491 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
493 track->header.track_id, timescale, duration, codec,
495 codec_config.data(), codec_config.size(), entry.samplesize,
496 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
497 max_bitrate, avg_bitrate, track->media.header.language.code,
501 if (samp_descr.type == kVideo) {
502 RCHECK(!samp_descr.video_entries.empty());
503 if (desc_idx >= samp_descr.video_entries.size())
507 uint32_t coded_width = entry.width;
508 uint32_t coded_height = entry.height;
509 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
510 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
511 if (pixel_width == 0 && pixel_height == 0) {
515 std::string codec_string;
516 uint8_t nalu_length_size = 0;
518 const FourCC actual_format = entry.GetActualFormat();
519 const Codec video_codec = FourCCToCodec(actual_format);
520 switch (actual_format) {
524 if (!avc_config.
Parse(entry.codec_configuration.data)) {
525 LOG(ERROR) <<
"Failed to parse avcc.";
531 if (coded_width != avc_config.coded_width() ||
532 coded_height != avc_config.coded_height()) {
533 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
534 <<
"," << coded_height
535 <<
") does not match with resolution in " 536 "AVCDecoderConfigurationRecord (" 537 << avc_config.coded_width() <<
"," 538 << avc_config.coded_height()
539 <<
"). Use AVCDecoderConfigurationRecord.";
540 coded_width = avc_config.coded_width();
541 coded_height = avc_config.coded_height();
544 if (pixel_width != avc_config.pixel_width() ||
545 pixel_height != avc_config.pixel_height()) {
546 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
547 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
"," 549 <<
") does not match with SAR in AVCDecoderConfigurationRecord " 551 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
552 <<
"). Use AVCDecoderConfigurationRecord.";
553 pixel_width = avc_config.pixel_width();
554 pixel_height = avc_config.pixel_height();
561 if (!hevc_config.
Parse(entry.codec_configuration.data)) {
562 LOG(ERROR) <<
"Failed to parse hevc.";
573 if (!vp_config.
ParseMP4(entry.codec_configuration.data)) {
574 LOG(ERROR) <<
"Failed to parse vpcc.";
586 LOG(WARNING) <<
"Unsupported video format '" 587 << FourCCToString(actual_format) <<
"' in stsd box.";
592 const bool is_encrypted =
595 : entry.sinf.info.track_encryption.default_is_protected == 1;
596 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
597 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
598 track->header.track_id, timescale, duration, video_codec,
599 GetH26xStreamFormat(actual_format), codec_string,
600 entry.codec_configuration.data.data(),
601 entry.codec_configuration.data.size(), coded_width, coded_height,
602 pixel_width, pixel_height,
604 nalu_length_size, track->media.header.language.code, is_encrypted));
607 if (moov_->pssh.size() > 0) {
608 std::vector<uint8_t> pssh_raw_data;
609 for (
const auto& pssh : moov_->pssh) {
610 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
613 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
614 pssh_raw_data.size());
617 streams.push_back(video_stream_info);
621 init_cb_.Run(streams);
622 if (!FetchKeysIfNecessary(moov_->pssh))
625 RCHECK(runs_->Init());
626 ChangeState(kEmittingSamples);
630 bool MP4MediaParser::ParseMoof(
BoxReader* reader) {
634 RCHECK(moof.Parse(reader));
637 RCHECK(runs_->Init(moof));
638 if (!FetchKeysIfNecessary(moof.pssh))
640 ChangeState(kEmittingSamples);
644 bool MP4MediaParser::FetchKeysIfNecessary(
645 const std::vector<ProtectionSystemSpecificHeader>& headers) {
650 if (!decryption_key_source_)
653 std::vector<uint8_t> pssh_raw_data;
654 for (
const auto& header : headers) {
655 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
656 header.raw_box.end());
659 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
661 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
667 bool MP4MediaParser::EnqueueSample(
bool* err) {
668 if (!runs_->IsRunValid()) {
671 if (!queue_.Trim(mdat_tail_))
674 ChangeState(kParsingBoxes);
678 if (!runs_->IsSampleValid()) {
687 queue_.Peek(&buf, &buf_size);
692 if (!runs_->is_audio() && !runs_->is_video())
702 if (runs_->AuxInfoNeedsToBeCached()) {
703 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
704 if (buf_size < runs_->aux_info_size())
706 *err = !runs_->CacheAuxInfo(buf, buf_size);
710 int64_t sample_offset = runs_->sample_offset() + moof_head_;
711 queue_.PeekAt(sample_offset, &buf, &buf_size);
712 if (buf_size < runs_->sample_size()) {
713 if (sample_offset < queue_.head()) {
714 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
715 <<
" < " << queue_.head();
721 const uint8_t* media_data = buf;
722 const size_t media_data_size = runs_->sample_size();
725 const size_t kDummyDataSize = 0;
726 std::shared_ptr<MediaSample> stream_sample(
729 if (runs_->is_encrypted()) {
730 std::shared_ptr<uint8_t> decrypted_media_data(
731 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
732 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
733 if (!decrypt_config) {
735 LOG(ERROR) <<
"Missing decrypt config.";
739 if (!decryptor_source_) {
740 stream_sample->SetData(media_data, media_data_size);
743 stream_sample->set_decrypt_config(std::move(decrypt_config));
744 stream_sample->set_is_encrypted(
true);
746 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
747 media_data, media_data_size,
748 decrypted_media_data.get())) {
750 LOG(ERROR) <<
"Cannot decrypt samples.";
753 stream_sample->TransferData(std::move(decrypted_media_data),
757 stream_sample->SetData(media_data, media_data_size);
760 stream_sample->set_dts(runs_->dts());
761 stream_sample->set_pts(runs_->cts());
762 stream_sample->set_duration(runs_->duration());
764 DVLOG(3) <<
"Pushing frame: " 765 <<
", key=" << runs_->is_keyframe()
766 <<
", dur=" << runs_->duration()
767 <<
", dts=" << runs_->dts()
768 <<
", cts=" << runs_->cts()
769 <<
", size=" << runs_->sample_size();
771 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
773 LOG(ERROR) <<
"Failed to process the sample.";
777 runs_->AdvanceSample();
781 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
783 while (mdat_tail_ < offset) {
786 queue_.PeekAt(mdat_tail_, &buf, &size);
793 mdat_tail_ += box_sz;
795 queue_.Trim(std::min(mdat_tail_, offset));
799 void MP4MediaParser::ChangeState(State new_state) {
800 DVLOG(2) <<
"Changing state: " << new_state;
All the methods that are virtual are virtual for mocking.
static File * OpenWithNoBuffering(const char *file_name, const char *mode)