5 #include "packager/media/formats/mp4/mp4_media_parser.h"
10 #include "packager/base/callback.h"
11 #include "packager/base/callback_helpers.h"
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/file/file.h"
15 #include "packager/file/file_closer.h"
16 #include "packager/media/base/audio_stream_info.h"
17 #include "packager/media/base/buffer_reader.h"
18 #include "packager/media/base/decrypt_config.h"
19 #include "packager/media/base/key_source.h"
20 #include "packager/media/base/macros.h"
21 #include "packager/media/base/media_sample.h"
22 #include "packager/media/base/rcheck.h"
23 #include "packager/media/base/video_stream_info.h"
24 #include "packager/media/codecs/avc_decoder_configuration_record.h"
25 #include "packager/media/codecs/es_descriptor.h"
26 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
27 #include "packager/media/codecs/vp_codec_configuration_record.h"
28 #include "packager/media/formats/mp4/box_definitions.h"
29 #include "packager/media/formats/mp4/box_reader.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
46 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
48 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
50 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
52 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
54 return H26xStreamFormat::kUnSpecified;
58 Codec FourCCToCodec(FourCC fourcc) {
96 const uint8_t kDtsAudioNumChannels = 6;
97 const uint64_t kNanosecondsPerSecond = 1000000000ull;
101 MP4MediaParser::MP4MediaParser()
102 : state_(kWaitingForInit),
103 decryption_key_source_(NULL),
107 MP4MediaParser::~MP4MediaParser() {}
110 const NewSampleCB& new_sample_cb,
112 DCHECK_EQ(state_, kWaitingForInit);
113 DCHECK(init_cb_.is_null());
114 DCHECK(!init_cb.is_null());
115 DCHECK(!new_sample_cb.is_null());
117 ChangeState(kParsingBoxes);
119 new_sample_cb_ = new_sample_cb;
120 decryption_key_source_ = decryption_key_source;
121 if (decryption_key_source)
125 void MP4MediaParser::Reset() {
133 DCHECK_NE(state_, kWaitingForInit);
135 ChangeState(kParsingBoxes);
140 DCHECK_NE(state_, kWaitingForInit);
142 if (state_ == kError)
145 queue_.Push(buf, size);
147 bool result, err =
false;
150 if (state_ == kParsingBoxes) {
151 result = ParseBox(&err);
153 DCHECK_EQ(kEmittingSamples, state_);
154 result = EnqueueSample(&err);
156 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
157 err = !ReadAndDiscardMDATsUntil(max_clear);
160 }
while (result && !err);
163 DLOG(ERROR) <<
"Error while parsing MP4";
174 std::unique_ptr<File, FileCloser> file(
177 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
180 if (!file->Seek(0)) {
181 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
186 uint64_t file_position(0);
187 bool mdat_seen(
false);
189 const uint32_t kBoxHeaderReadSize(16);
190 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
191 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
192 if (bytes_read == 0) {
193 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
196 if (bytes_read < kBoxHeaderReadSize) {
197 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
205 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
208 if (box_type == FOURCC_mdat) {
210 }
else if (box_type == FOURCC_moov) {
216 if (!
Parse(&buffer[0], bytes_read)) {
217 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
220 uint64_t bytes_to_read = box_size - bytes_read;
221 buffer.resize(bytes_to_read);
222 while (bytes_to_read > 0) {
223 bytes_read = file->Read(&buffer[0], bytes_to_read);
224 if (bytes_read <= 0) {
225 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
229 if (!
Parse(&buffer[0], bytes_read)) {
230 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
233 bytes_to_read -= bytes_read;
239 file_position += box_size;
240 if (!file->Seek(file_position)) {
241 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
248 bool MP4MediaParser::ParseBox(
bool* err) {
251 queue_.Peek(&buf, &size);
256 if (reader.get() == NULL)
259 if (reader->type() == FOURCC_mdat) {
263 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
269 mdat_tail_ = queue_.
head() + reader->size();
271 if (reader->type() == FOURCC_moov) {
272 *err = !ParseMoov(reader.get());
273 }
else if (reader->type() == FOURCC_moof) {
274 moof_head_ = queue_.
head();
275 *err = !ParseMoof(reader.get());
283 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
286 queue_.Pop(static_cast<int>(reader->size()));
290 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
294 moov_.reset(
new Movie);
295 RCHECK(moov_->Parse(reader));
298 std::vector<std::shared_ptr<StreamInfo>> streams;
300 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
301 track != moov_->tracks.end(); ++track) {
302 const uint32_t timescale = track->media.header.timescale;
305 uint64_t duration = 0;
306 if (track->media.header.duration > 0) {
307 duration = track->media.header.duration;
308 }
else if (moov_->extends.header.fragment_duration > 0) {
309 DCHECK(moov_->header.timescale != 0);
310 duration = Rescale(moov_->extends.header.fragment_duration,
311 moov_->header.timescale,
313 }
else if (moov_->header.duration > 0 &&
314 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
315 DCHECK(moov_->header.timescale != 0);
317 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
320 const SampleDescription& samp_descr =
321 track->media.information.sample_table.description;
327 if (moov_->extends.tracks.size() > 0) {
328 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
329 const TrackExtends& trex = moov_->extends.tracks[t];
330 if (trex.track_id == track->header.track_id) {
331 desc_idx = trex.default_sample_description_index;
336 const std::vector<ChunkInfo>& chunk_info =
337 track->media.information.sample_table.sample_to_chunk.chunk_info;
338 RCHECK(chunk_info.size() > 0);
339 desc_idx = chunk_info[0].sample_description_index;
341 RCHECK(desc_idx > 0);
344 if (samp_descr.type == kAudio) {
345 RCHECK(!samp_descr.audio_entries.empty());
349 if (desc_idx >= samp_descr.audio_entries.size())
352 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
353 const FourCC actual_format = entry.GetActualFormat();
354 Codec codec = FourCCToCodec(actual_format);
355 uint8_t num_channels = 0;
356 uint32_t sampling_frequency = 0;
357 uint64_t codec_delay_ns = 0;
358 uint8_t audio_object_type = 0;
359 uint32_t max_bitrate = 0;
360 uint32_t avg_bitrate = 0;
361 std::vector<uint8_t> codec_config;
363 switch (actual_format) {
367 if (entry.esds.es_descriptor.IsAAC()) {
369 const AACAudioSpecificConfig& aac_audio_specific_config =
370 entry.esds.aac_audio_specific_config;
371 num_channels = aac_audio_specific_config.GetNumChannels();
373 aac_audio_specific_config.GetSamplesPerSecond();
374 audio_object_type = aac_audio_specific_config.GetAudioObjectType();
375 codec_config = entry.esds.es_descriptor.decoder_specific_info();
377 }
else if (entry.esds.es_descriptor.IsDTS()) {
378 ObjectType audio_type = entry.esds.es_descriptor.object_type();
379 switch (audio_type) {
393 LOG(ERROR) <<
"Unsupported audio type " << audio_type
397 num_channels = entry.channelcount;
400 if (num_channels != kDtsAudioNumChannels) {
401 LOG(ERROR) <<
"Unsupported channel count " << num_channels
402 <<
" for audio type " << audio_type <<
".";
405 sampling_frequency = entry.samplerate;
406 max_bitrate = entry.esds.es_descriptor.max_bitrate();
407 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
409 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
410 << actual_format <<
" in stsd box.";
415 FALLTHROUGH_INTENDED;
417 FALLTHROUGH_INTENDED;
419 FALLTHROUGH_INTENDED;
421 FALLTHROUGH_INTENDED;
423 codec_config = entry.ddts.extra_data;
424 max_bitrate = entry.ddts.max_bitrate;
425 avg_bitrate = entry.ddts.avg_bitrate;
426 num_channels = entry.channelcount;
427 sampling_frequency = entry.samplerate;
430 codec_config = entry.dac3.data;
431 num_channels = entry.channelcount;
432 sampling_frequency = entry.samplerate;
435 codec_config = entry.dec3.data;
436 num_channels = entry.channelcount;
437 sampling_frequency = entry.samplerate;
440 codec_config = entry.dops.opus_identification_header;
441 num_channels = entry.channelcount;
442 sampling_frequency = entry.samplerate;
443 RCHECK(sampling_frequency != 0);
445 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
448 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
449 << actual_format <<
" in stsd box.";
454 uint64_t seek_preroll_ns = 0;
455 for (
const auto& sample_group_description :
456 track->media.information.sample_table.sample_group_descriptions) {
457 if (sample_group_description.grouping_type != FOURCC_roll)
459 const auto& audio_roll_recovery_entries =
460 sample_group_description.audio_roll_recovery_entries;
461 if (audio_roll_recovery_entries.size() != 1) {
462 LOG(WARNING) <<
"Unexpected number of entries in "
463 "SampleGroupDescription table with grouping type "
467 const int16_t roll_distance_in_samples =
468 audio_roll_recovery_entries[0].roll_distance;
469 if (roll_distance_in_samples < 0) {
470 RCHECK(sampling_frequency != 0);
471 seek_preroll_ns = kNanosecondsPerSecond *
472 (-roll_distance_in_samples) / sampling_frequency;
475 <<
"Roll distance is supposed to be negative, but seeing "
476 << roll_distance_in_samples;
482 const bool is_encrypted =
485 : entry.sinf.info.track_encryption.default_is_protected == 1;
486 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
487 streams.emplace_back(
new AudioStreamInfo(
488 track->header.track_id, timescale, duration, codec,
490 codec_config.data(), codec_config.size(), entry.samplesize,
491 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
492 max_bitrate, avg_bitrate, track->media.header.language.code,
496 if (samp_descr.type == kVideo) {
497 RCHECK(!samp_descr.video_entries.empty());
498 if (desc_idx >= samp_descr.video_entries.size())
500 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
502 uint32_t coded_width = entry.width;
503 uint32_t coded_height = entry.height;
504 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
505 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
506 if (pixel_width == 0 && pixel_height == 0) {
510 std::string codec_string;
511 uint8_t nalu_length_size = 0;
513 const FourCC actual_format = entry.GetActualFormat();
514 const Codec video_codec = FourCCToCodec(actual_format);
515 switch (actual_format) {
518 AVCDecoderConfigurationRecord avc_config;
519 if (!avc_config.Parse(entry.codec_configuration.data)) {
520 LOG(ERROR) <<
"Failed to parse avcc.";
523 codec_string = avc_config.GetCodecString(actual_format);
524 nalu_length_size = avc_config.nalu_length_size();
526 if (coded_width != avc_config.coded_width() ||
527 coded_height != avc_config.coded_height()) {
528 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
529 <<
"," << coded_height
530 <<
") does not match with resolution in "
531 "AVCDecoderConfigurationRecord ("
532 << avc_config.coded_width() <<
","
533 << avc_config.coded_height()
534 <<
"). Use AVCDecoderConfigurationRecord.";
535 coded_width = avc_config.coded_width();
536 coded_height = avc_config.coded_height();
539 if (pixel_width != avc_config.pixel_width() ||
540 pixel_height != avc_config.pixel_height()) {
541 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
542 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
544 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
546 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
547 <<
"). Use AVCDecoderConfigurationRecord.";
548 pixel_width = avc_config.pixel_width();
549 pixel_height = avc_config.pixel_height();
555 HEVCDecoderConfigurationRecord hevc_config;
556 if (!hevc_config.Parse(entry.codec_configuration.data)) {
557 LOG(ERROR) <<
"Failed to parse hevc.";
560 codec_string = hevc_config.GetCodecString(actual_format);
561 nalu_length_size = hevc_config.nalu_length_size();
567 VPCodecConfigurationRecord vp_config;
568 if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
569 LOG(ERROR) <<
"Failed to parse vpcc.";
572 codec_string = vp_config.GetCodecString(video_codec);
576 LOG(ERROR) <<
"Unsupported video format "
577 << FourCCToString(actual_format) <<
" in stsd box.";
582 const bool is_encrypted =
585 : entry.sinf.info.track_encryption.default_is_protected == 1;
586 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
587 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
588 track->header.track_id, timescale, duration, video_codec,
589 GetH26xStreamFormat(actual_format), codec_string,
590 entry.codec_configuration.data.data(),
591 entry.codec_configuration.data.size(), coded_width, coded_height,
592 pixel_width, pixel_height,
594 nalu_length_size, track->media.header.language.code, is_encrypted));
597 if (moov_->pssh.size() > 0) {
598 std::vector<uint8_t> pssh_raw_data;
599 for (
const auto& pssh : moov_->pssh) {
600 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
603 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
604 pssh_raw_data.size());
607 streams.push_back(video_stream_info);
611 init_cb_.Run(streams);
612 if (!FetchKeysIfNecessary(moov_->pssh))
614 runs_.reset(
new TrackRunIterator(moov_.get()));
615 RCHECK(runs_->Init());
616 ChangeState(kEmittingSamples);
620 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
624 RCHECK(moof.Parse(reader));
626 runs_.reset(
new TrackRunIterator(moov_.get()));
627 RCHECK(runs_->Init(moof));
628 if (!FetchKeysIfNecessary(moof.pssh))
630 ChangeState(kEmittingSamples);
634 bool MP4MediaParser::FetchKeysIfNecessary(
635 const std::vector<ProtectionSystemSpecificHeader>& headers) {
640 if (!decryption_key_source_)
643 std::vector<uint8_t> pssh_raw_data;
644 for (
const auto& header : headers) {
645 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
646 header.raw_box.end());
649 decryption_key_source_->
FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
651 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
657 bool MP4MediaParser::EnqueueSample(
bool* err) {
658 if (!runs_->IsRunValid()) {
661 if (!queue_.
Trim(mdat_tail_))
664 ChangeState(kParsingBoxes);
668 if (!runs_->IsSampleValid()) {
677 queue_.Peek(&buf, &buf_size);
682 if (!runs_->is_audio() && !runs_->is_video())
692 if (runs_->AuxInfoNeedsToBeCached()) {
693 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
694 if (buf_size < runs_->aux_info_size())
696 *err = !runs_->CacheAuxInfo(buf, buf_size);
700 int64_t sample_offset = runs_->sample_offset() + moof_head_;
701 queue_.
PeekAt(sample_offset, &buf, &buf_size);
702 if (buf_size < runs_->sample_size()) {
703 if (sample_offset < queue_.
head()) {
704 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
705 <<
" < " << queue_.
head();
711 const uint8_t* media_data = buf;
712 const size_t media_data_size = runs_->sample_size();
715 const size_t kDummyDataSize = 0;
716 std::shared_ptr<MediaSample> stream_sample(
719 if (runs_->is_encrypted()) {
720 std::shared_ptr<uint8_t> decrypted_media_data(
721 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
722 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
723 if (!decrypt_config) {
725 LOG(ERROR) <<
"Missing decrypt config.";
729 if (!decryptor_source_) {
730 stream_sample->SetData(media_data, media_data_size);
733 stream_sample->set_decrypt_config(std::move(decrypt_config));
734 stream_sample->set_is_encrypted(
true);
736 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
737 media_data, media_data_size,
738 decrypted_media_data.get())) {
740 LOG(ERROR) <<
"Cannot decrypt samples.";
743 stream_sample->TransferData(std::move(decrypted_media_data),
747 stream_sample->SetData(media_data, media_data_size);
750 stream_sample->set_dts(runs_->dts());
751 stream_sample->set_pts(runs_->cts());
752 stream_sample->set_duration(runs_->duration());
754 DVLOG(3) <<
"Pushing frame: "
755 <<
", key=" << runs_->is_keyframe()
756 <<
", dur=" << runs_->duration()
757 <<
", dts=" << runs_->dts()
758 <<
", cts=" << runs_->cts()
759 <<
", size=" << runs_->sample_size();
761 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
763 LOG(ERROR) <<
"Failed to process the sample.";
767 runs_->AdvanceSample();
771 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
773 while (mdat_tail_ < offset) {
776 queue_.
PeekAt(mdat_tail_, &buf, &size);
783 mdat_tail_ += box_sz;
785 queue_.
Trim(std::min(mdat_tail_, offset));
789 void MP4MediaParser::ChangeState(State new_state) {
790 DVLOG(2) <<
"Changing state: " << new_state;
static File * OpenWithNoBuffering(const char *file_name, const char *mode)