5 #include "packager/media/formats/mp4/mp4_media_parser.h"
10 #include "packager/base/callback.h"
11 #include "packager/base/callback_helpers.h"
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/file/file.h"
15 #include "packager/file/file_closer.h"
16 #include "packager/media/base/audio_stream_info.h"
17 #include "packager/media/base/buffer_reader.h"
18 #include "packager/media/base/decrypt_config.h"
19 #include "packager/media/base/key_source.h"
20 #include "packager/media/base/macros.h"
21 #include "packager/media/base/media_sample.h"
22 #include "packager/media/base/rcheck.h"
23 #include "packager/media/base/video_stream_info.h"
24 #include "packager/media/codecs/avc_decoder_configuration_record.h"
25 #include "packager/media/codecs/es_descriptor.h"
26 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
27 #include "packager/media/codecs/vp_codec_configuration_record.h"
28 #include "packager/media/formats/mp4/box_definitions.h"
29 #include "packager/media/formats/mp4/box_reader.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
46 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
48 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
50 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
52 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
54 return H26xStreamFormat::kUnSpecified;
58 Codec FourCCToCodec(FourCC fourcc) {
96 const uint8_t kDtsAudioNumChannels = 6;
97 const uint64_t kNanosecondsPerSecond = 1000000000ull;
101 MP4MediaParser::MP4MediaParser()
102 : state_(kWaitingForInit),
103 decryption_key_source_(NULL),
107 MP4MediaParser::~MP4MediaParser() {}
110 const NewSampleCB& new_sample_cb,
112 DCHECK_EQ(state_, kWaitingForInit);
113 DCHECK(init_cb_.is_null());
114 DCHECK(!init_cb.is_null());
115 DCHECK(!new_sample_cb.is_null());
117 ChangeState(kParsingBoxes);
119 new_sample_cb_ = new_sample_cb;
120 decryption_key_source_ = decryption_key_source;
121 if (decryption_key_source)
125 void MP4MediaParser::Reset() {
133 DCHECK_NE(state_, kWaitingForInit);
135 ChangeState(kParsingBoxes);
140 DCHECK_NE(state_, kWaitingForInit);
142 if (state_ == kError)
145 queue_.Push(buf, size);
147 bool result, err =
false;
150 if (state_ == kParsingBoxes) {
151 result = ParseBox(&err);
153 DCHECK_EQ(kEmittingSamples, state_);
154 result = EnqueueSample(&err);
156 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
157 err = !ReadAndDiscardMDATsUntil(max_clear);
160 }
while (result && !err);
163 DLOG(ERROR) <<
"Error while parsing MP4";
174 std::unique_ptr<File, FileCloser> file(
177 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
180 if (!file->Seek(0)) {
181 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
186 uint64_t file_position(0);
187 bool mdat_seen(
false);
189 const uint32_t kBoxHeaderReadSize(16);
190 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
191 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
192 if (bytes_read == 0) {
193 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
196 if (bytes_read < kBoxHeaderReadSize) {
197 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
205 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
208 if (box_type == FOURCC_mdat) {
210 }
else if (box_type == FOURCC_moov) {
216 if (!
Parse(&buffer[0], bytes_read)) {
217 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
220 uint64_t bytes_to_read = box_size - bytes_read;
221 buffer.resize(bytes_to_read);
222 while (bytes_to_read > 0) {
223 bytes_read = file->Read(&buffer[0], bytes_to_read);
224 if (bytes_read <= 0) {
225 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
229 if (!
Parse(&buffer[0], bytes_read)) {
230 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
233 bytes_to_read -= bytes_read;
239 file_position += box_size;
240 if (!file->Seek(file_position)) {
241 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
248 bool MP4MediaParser::ParseBox(
bool* err) {
251 queue_.Peek(&buf, &size);
256 if (reader.get() == NULL)
259 if (reader->type() == FOURCC_mdat) {
265 NOTIMPLEMENTED() <<
" Non-seekable Files with 'mdat' box before 'moov' "
266 "box is not supported.";
273 <<
"Ignore unused 'mdat' box - this could be as a result of extra "
274 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
279 mdat_tail_ = queue_.
head() + reader->size();
281 if (reader->type() == FOURCC_moov) {
282 *err = !ParseMoov(reader.get());
283 }
else if (reader->type() == FOURCC_moof) {
284 moof_head_ = queue_.
head();
285 *err = !ParseMoof(reader.get());
293 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
296 queue_.Pop(static_cast<int>(reader->size()));
300 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
304 moov_.reset(
new Movie);
305 RCHECK(moov_->Parse(reader));
308 std::vector<std::shared_ptr<StreamInfo>> streams;
310 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
311 track != moov_->tracks.end(); ++track) {
312 const uint32_t timescale = track->media.header.timescale;
315 uint64_t duration = 0;
316 if (track->media.header.duration > 0) {
317 duration = track->media.header.duration;
318 }
else if (moov_->extends.header.fragment_duration > 0) {
319 DCHECK(moov_->header.timescale != 0);
320 duration = Rescale(moov_->extends.header.fragment_duration,
321 moov_->header.timescale,
323 }
else if (moov_->header.duration > 0 &&
324 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
325 DCHECK(moov_->header.timescale != 0);
327 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
330 const SampleDescription& samp_descr =
331 track->media.information.sample_table.description;
337 if (moov_->extends.tracks.size() > 0) {
338 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
339 const TrackExtends& trex = moov_->extends.tracks[t];
340 if (trex.track_id == track->header.track_id) {
341 desc_idx = trex.default_sample_description_index;
346 const std::vector<ChunkInfo>& chunk_info =
347 track->media.information.sample_table.sample_to_chunk.chunk_info;
348 RCHECK(chunk_info.size() > 0);
349 desc_idx = chunk_info[0].sample_description_index;
351 RCHECK(desc_idx > 0);
354 if (samp_descr.type == kAudio) {
355 RCHECK(!samp_descr.audio_entries.empty());
359 if (desc_idx >= samp_descr.audio_entries.size())
362 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
363 const FourCC actual_format = entry.GetActualFormat();
364 Codec codec = FourCCToCodec(actual_format);
365 uint8_t num_channels = 0;
366 uint32_t sampling_frequency = 0;
367 uint64_t codec_delay_ns = 0;
368 uint8_t audio_object_type = 0;
369 uint32_t max_bitrate = 0;
370 uint32_t avg_bitrate = 0;
371 std::vector<uint8_t> codec_config;
373 switch (actual_format) {
377 if (entry.esds.es_descriptor.IsAAC()) {
379 const AACAudioSpecificConfig& aac_audio_specific_config =
380 entry.esds.aac_audio_specific_config;
381 num_channels = aac_audio_specific_config.GetNumChannels();
383 aac_audio_specific_config.GetSamplesPerSecond();
384 audio_object_type = aac_audio_specific_config.GetAudioObjectType();
385 codec_config = entry.esds.es_descriptor.decoder_specific_info();
387 }
else if (entry.esds.es_descriptor.IsDTS()) {
388 ObjectType audio_type = entry.esds.es_descriptor.object_type();
389 switch (audio_type) {
403 LOG(ERROR) <<
"Unsupported audio type " << audio_type
407 num_channels = entry.channelcount;
410 if (num_channels != kDtsAudioNumChannels) {
411 LOG(ERROR) <<
"Unsupported channel count " << num_channels
412 <<
" for audio type " << audio_type <<
".";
415 sampling_frequency = entry.samplerate;
416 max_bitrate = entry.esds.es_descriptor.max_bitrate();
417 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
419 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
420 << actual_format <<
" in stsd box.";
425 FALLTHROUGH_INTENDED;
427 FALLTHROUGH_INTENDED;
429 FALLTHROUGH_INTENDED;
431 FALLTHROUGH_INTENDED;
433 codec_config = entry.ddts.extra_data;
434 max_bitrate = entry.ddts.max_bitrate;
435 avg_bitrate = entry.ddts.avg_bitrate;
436 num_channels = entry.channelcount;
437 sampling_frequency = entry.samplerate;
440 codec_config = entry.dac3.data;
441 num_channels = entry.channelcount;
442 sampling_frequency = entry.samplerate;
445 codec_config = entry.dec3.data;
446 num_channels = entry.channelcount;
447 sampling_frequency = entry.samplerate;
450 codec_config = entry.dops.opus_identification_header;
451 num_channels = entry.channelcount;
452 sampling_frequency = entry.samplerate;
453 RCHECK(sampling_frequency != 0);
455 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
458 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
459 << actual_format <<
" in stsd box.";
464 uint64_t seek_preroll_ns = 0;
465 for (
const auto& sample_group_description :
466 track->media.information.sample_table.sample_group_descriptions) {
467 if (sample_group_description.grouping_type != FOURCC_roll)
469 const auto& audio_roll_recovery_entries =
470 sample_group_description.audio_roll_recovery_entries;
471 if (audio_roll_recovery_entries.size() != 1) {
472 LOG(WARNING) <<
"Unexpected number of entries in "
473 "SampleGroupDescription table with grouping type "
477 const int16_t roll_distance_in_samples =
478 audio_roll_recovery_entries[0].roll_distance;
479 if (roll_distance_in_samples < 0) {
480 RCHECK(sampling_frequency != 0);
481 seek_preroll_ns = kNanosecondsPerSecond *
482 (-roll_distance_in_samples) / sampling_frequency;
485 <<
"Roll distance is supposed to be negative, but seeing "
486 << roll_distance_in_samples;
492 const bool is_encrypted =
495 : entry.sinf.info.track_encryption.default_is_protected == 1;
496 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
497 streams.emplace_back(
new AudioStreamInfo(
498 track->header.track_id, timescale, duration, codec,
500 codec_config.data(), codec_config.size(), entry.samplesize,
501 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
502 max_bitrate, avg_bitrate, track->media.header.language.code,
506 if (samp_descr.type == kVideo) {
507 RCHECK(!samp_descr.video_entries.empty());
508 if (desc_idx >= samp_descr.video_entries.size())
510 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
512 uint32_t coded_width = entry.width;
513 uint32_t coded_height = entry.height;
514 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
515 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
516 if (pixel_width == 0 && pixel_height == 0) {
520 std::string codec_string;
521 uint8_t nalu_length_size = 0;
523 const FourCC actual_format = entry.GetActualFormat();
524 const Codec video_codec = FourCCToCodec(actual_format);
525 switch (actual_format) {
528 AVCDecoderConfigurationRecord avc_config;
529 if (!avc_config.Parse(entry.codec_configuration.data)) {
530 LOG(ERROR) <<
"Failed to parse avcc.";
533 codec_string = avc_config.GetCodecString(actual_format);
534 nalu_length_size = avc_config.nalu_length_size();
536 if (coded_width != avc_config.coded_width() ||
537 coded_height != avc_config.coded_height()) {
538 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
539 <<
"," << coded_height
540 <<
") does not match with resolution in "
541 "AVCDecoderConfigurationRecord ("
542 << avc_config.coded_width() <<
","
543 << avc_config.coded_height()
544 <<
"). Use AVCDecoderConfigurationRecord.";
545 coded_width = avc_config.coded_width();
546 coded_height = avc_config.coded_height();
549 if (pixel_width != avc_config.pixel_width() ||
550 pixel_height != avc_config.pixel_height()) {
551 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
552 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
554 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
556 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
557 <<
"). Use AVCDecoderConfigurationRecord.";
558 pixel_width = avc_config.pixel_width();
559 pixel_height = avc_config.pixel_height();
565 HEVCDecoderConfigurationRecord hevc_config;
566 if (!hevc_config.Parse(entry.codec_configuration.data)) {
567 LOG(ERROR) <<
"Failed to parse hevc.";
570 codec_string = hevc_config.GetCodecString(actual_format);
571 nalu_length_size = hevc_config.nalu_length_size();
577 VPCodecConfigurationRecord vp_config;
578 if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
579 LOG(ERROR) <<
"Failed to parse vpcc.";
582 codec_string = vp_config.GetCodecString(video_codec);
586 LOG(ERROR) <<
"Unsupported video format "
587 << FourCCToString(actual_format) <<
" in stsd box.";
592 const bool is_encrypted =
595 : entry.sinf.info.track_encryption.default_is_protected == 1;
596 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
597 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
598 track->header.track_id, timescale, duration, video_codec,
599 GetH26xStreamFormat(actual_format), codec_string,
600 entry.codec_configuration.data.data(),
601 entry.codec_configuration.data.size(), coded_width, coded_height,
602 pixel_width, pixel_height,
604 nalu_length_size, track->media.header.language.code, is_encrypted));
607 if (moov_->pssh.size() > 0) {
608 std::vector<uint8_t> pssh_raw_data;
609 for (
const auto& pssh : moov_->pssh) {
610 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
613 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
614 pssh_raw_data.size());
617 streams.push_back(video_stream_info);
621 init_cb_.Run(streams);
622 if (!FetchKeysIfNecessary(moov_->pssh))
624 runs_.reset(
new TrackRunIterator(moov_.get()));
625 RCHECK(runs_->Init());
626 ChangeState(kEmittingSamples);
630 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
634 RCHECK(moof.Parse(reader));
636 runs_.reset(
new TrackRunIterator(moov_.get()));
637 RCHECK(runs_->Init(moof));
638 if (!FetchKeysIfNecessary(moof.pssh))
640 ChangeState(kEmittingSamples);
644 bool MP4MediaParser::FetchKeysIfNecessary(
645 const std::vector<ProtectionSystemSpecificHeader>& headers) {
650 if (!decryption_key_source_)
653 std::vector<uint8_t> pssh_raw_data;
654 for (
const auto& header : headers) {
655 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
656 header.raw_box.end());
659 decryption_key_source_->
FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
661 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
667 bool MP4MediaParser::EnqueueSample(
bool* err) {
668 if (!runs_->IsRunValid()) {
671 if (!queue_.
Trim(mdat_tail_))
674 ChangeState(kParsingBoxes);
678 if (!runs_->IsSampleValid()) {
687 queue_.Peek(&buf, &buf_size);
692 if (!runs_->is_audio() && !runs_->is_video())
702 if (runs_->AuxInfoNeedsToBeCached()) {
703 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
704 if (buf_size < runs_->aux_info_size())
706 *err = !runs_->CacheAuxInfo(buf, buf_size);
710 int64_t sample_offset = runs_->sample_offset() + moof_head_;
711 queue_.
PeekAt(sample_offset, &buf, &buf_size);
712 if (buf_size < runs_->sample_size()) {
713 if (sample_offset < queue_.
head()) {
714 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
715 <<
" < " << queue_.
head();
721 const uint8_t* media_data = buf;
722 const size_t media_data_size = runs_->sample_size();
725 const size_t kDummyDataSize = 0;
726 std::shared_ptr<MediaSample> stream_sample(
729 if (runs_->is_encrypted()) {
730 std::shared_ptr<uint8_t> decrypted_media_data(
731 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
732 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
733 if (!decrypt_config) {
735 LOG(ERROR) <<
"Missing decrypt config.";
739 if (!decryptor_source_) {
740 stream_sample->SetData(media_data, media_data_size);
743 stream_sample->set_decrypt_config(std::move(decrypt_config));
744 stream_sample->set_is_encrypted(
true);
746 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
747 media_data, media_data_size,
748 decrypted_media_data.get())) {
750 LOG(ERROR) <<
"Cannot decrypt samples.";
753 stream_sample->TransferData(std::move(decrypted_media_data),
757 stream_sample->SetData(media_data, media_data_size);
760 stream_sample->set_dts(runs_->dts());
761 stream_sample->set_pts(runs_->cts());
762 stream_sample->set_duration(runs_->duration());
764 DVLOG(3) <<
"Pushing frame: "
765 <<
", key=" << runs_->is_keyframe()
766 <<
", dur=" << runs_->duration()
767 <<
", dts=" << runs_->dts()
768 <<
", cts=" << runs_->cts()
769 <<
", size=" << runs_->sample_size();
771 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
773 LOG(ERROR) <<
"Failed to process the sample.";
777 runs_->AdvanceSample();
781 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
783 while (mdat_tail_ < offset) {
786 queue_.
PeekAt(mdat_tail_, &buf, &size);
793 mdat_tail_ += box_sz;
795 queue_.
Trim(std::min(mdat_tail_, offset));
799 void MP4MediaParser::ChangeState(State new_state) {
800 DVLOG(2) <<
"Changing state: " << new_state;
static File * OpenWithNoBuffering(const char *file_name, const char *mode)