5 #include "packager/media/formats/mp4/mp4_media_parser.h"
10 #include "packager/base/callback.h"
11 #include "packager/base/callback_helpers.h"
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/media/base/audio_stream_info.h"
15 #include "packager/media/base/buffer_reader.h"
16 #include "packager/media/base/decrypt_config.h"
17 #include "packager/media/base/key_source.h"
18 #include "packager/media/base/macros.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/rcheck.h"
21 #include "packager/media/base/video_stream_info.h"
22 #include "packager/media/codecs/avc_decoder_configuration_record.h"
23 #include "packager/media/codecs/es_descriptor.h"
24 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
25 #include "packager/media/codecs/vp_codec_configuration_record.h"
26 #include "packager/media/file/file.h"
27 #include "packager/media/file/file_closer.h"
28 #include "packager/media/formats/mp4/box_definitions.h"
29 #include "packager/media/formats/mp4/box_reader.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
46 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
48 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
50 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
52 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
54 return H26xStreamFormat::kUnSpecified;
58 Codec FourCCToCodec(FourCC fourcc) {
96 const uint8_t kDtsAudioNumChannels = 6;
97 const uint64_t kNanosecondsPerSecond = 1000000000ull;
101 MP4MediaParser::MP4MediaParser()
102 : state_(kWaitingForInit),
103 decryption_key_source_(NULL),
107 MP4MediaParser::~MP4MediaParser() {}
110 const NewSampleCB& new_sample_cb,
112 DCHECK_EQ(state_, kWaitingForInit);
113 DCHECK(init_cb_.is_null());
114 DCHECK(!init_cb.is_null());
115 DCHECK(!new_sample_cb.is_null());
117 ChangeState(kParsingBoxes);
119 new_sample_cb_ = new_sample_cb;
120 decryption_key_source_ = decryption_key_source;
121 if (decryption_key_source)
125 void MP4MediaParser::Reset() {
133 DCHECK_NE(state_, kWaitingForInit);
135 ChangeState(kParsingBoxes);
140 DCHECK_NE(state_, kWaitingForInit);
142 if (state_ == kError)
145 queue_.Push(buf, size);
147 bool result, err =
false;
150 if (state_ == kParsingBoxes) {
151 result = ParseBox(&err);
153 DCHECK_EQ(kEmittingSamples, state_);
154 result = EnqueueSample(&err);
156 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
157 err = !ReadAndDiscardMDATsUntil(max_clear);
160 }
while (result && !err);
163 DLOG(ERROR) <<
"Error while parsing MP4";
174 std::unique_ptr<File, FileCloser> file(
177 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
180 if (!file->Seek(0)) {
181 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
186 uint64_t file_position(0);
187 bool mdat_seen(
false);
189 const uint32_t kBoxHeaderReadSize(16);
190 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
191 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
192 if (bytes_read == 0) {
193 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
196 if (bytes_read < kBoxHeaderReadSize) {
197 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
205 LOG(ERROR) <<
"Could not start box from file '" << file_path <<
"'";
208 if (box_type == FOURCC_mdat) {
210 }
else if (box_type == FOURCC_moov) {
216 if (!
Parse(&buffer[0], bytes_read)) {
217 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
220 uint64_t bytes_to_read = box_size - bytes_read;
221 buffer.resize(bytes_to_read);
222 while (bytes_to_read > 0) {
223 bytes_read = file->Read(&buffer[0], bytes_to_read);
224 if (bytes_read <= 0) {
225 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
229 if (!
Parse(&buffer[0], bytes_read)) {
230 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
233 bytes_to_read -= bytes_read;
239 file_position += box_size;
240 if (!file->Seek(file_position)) {
241 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
248 bool MP4MediaParser::ParseBox(
bool* err) {
251 queue_.Peek(&buf, &size);
256 if (reader.get() == NULL)
259 if (reader->type() == FOURCC_mdat) {
263 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
269 mdat_tail_ = queue_.
head() + reader->size();
271 if (reader->type() == FOURCC_moov) {
272 *err = !ParseMoov(reader.get());
273 }
else if (reader->type() == FOURCC_moof) {
274 moof_head_ = queue_.
head();
275 *err = !ParseMoof(reader.get());
283 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
286 queue_.Pop(static_cast<int>(reader->size()));
290 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
294 moov_.reset(
new Movie);
295 RCHECK(moov_->Parse(reader));
298 std::vector<std::shared_ptr<StreamInfo>> streams;
300 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
301 track != moov_->tracks.end(); ++track) {
302 const uint32_t timescale = track->media.header.timescale;
305 uint64_t duration = 0;
306 if (track->media.header.duration > 0) {
307 duration = track->media.header.duration;
308 }
else if (moov_->extends.header.fragment_duration > 0) {
309 DCHECK(moov_->header.timescale != 0);
310 duration = Rescale(moov_->extends.header.fragment_duration,
311 moov_->header.timescale,
313 }
else if (moov_->header.duration > 0 &&
314 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
315 DCHECK(moov_->header.timescale != 0);
317 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
320 const SampleDescription& samp_descr =
321 track->media.information.sample_table.description;
327 if (moov_->extends.tracks.size() > 0) {
328 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
329 const TrackExtends& trex = moov_->extends.tracks[t];
330 if (trex.track_id == track->header.track_id) {
331 desc_idx = trex.default_sample_description_index;
336 const std::vector<ChunkInfo>& chunk_info =
337 track->media.information.sample_table.sample_to_chunk.chunk_info;
338 RCHECK(chunk_info.size() > 0);
339 desc_idx = chunk_info[0].sample_description_index;
341 RCHECK(desc_idx > 0);
344 if (samp_descr.type == kAudio) {
345 RCHECK(!samp_descr.audio_entries.empty());
349 if (desc_idx >= samp_descr.audio_entries.size())
352 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
353 const FourCC actual_format = entry.GetActualFormat();
354 Codec codec = FourCCToCodec(actual_format);
355 uint8_t num_channels = 0;
356 uint32_t sampling_frequency = 0;
357 uint64_t codec_delay_ns = 0;
358 uint8_t audio_object_type = 0;
359 uint32_t max_bitrate = 0;
360 uint32_t avg_bitrate = 0;
361 std::vector<uint8_t> codec_config;
363 switch (actual_format) {
367 if (entry.esds.es_descriptor.IsAAC()) {
369 const AACAudioSpecificConfig& aac_audio_specific_config =
370 entry.esds.aac_audio_specific_config;
371 num_channels = aac_audio_specific_config.num_channels();
372 sampling_frequency = aac_audio_specific_config.frequency();
373 audio_object_type = aac_audio_specific_config.audio_object_type();
374 codec_config = entry.esds.es_descriptor.decoder_specific_info();
376 }
else if (entry.esds.es_descriptor.IsDTS()) {
377 ObjectType audio_type = entry.esds.es_descriptor.object_type();
378 switch (audio_type) {
392 LOG(ERROR) <<
"Unsupported audio type " << audio_type
396 num_channels = entry.esds.aac_audio_specific_config.num_channels();
399 if (num_channels != kDtsAudioNumChannels) {
400 LOG(ERROR) <<
"Unsupported channel count " << num_channels
401 <<
" for audio type " << audio_type <<
".";
404 sampling_frequency = entry.samplerate;
405 max_bitrate = entry.esds.es_descriptor.max_bitrate();
406 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
408 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
409 << actual_format <<
" in stsd box.";
414 FALLTHROUGH_INTENDED;
416 FALLTHROUGH_INTENDED;
418 FALLTHROUGH_INTENDED;
420 FALLTHROUGH_INTENDED;
422 codec_config = entry.ddts.extra_data;
423 max_bitrate = entry.ddts.max_bitrate;
424 avg_bitrate = entry.ddts.avg_bitrate;
425 num_channels = entry.channelcount;
426 sampling_frequency = entry.samplerate;
429 codec_config = entry.dac3.data;
430 num_channels = entry.channelcount;
431 sampling_frequency = entry.samplerate;
434 codec_config = entry.dec3.data;
435 num_channels = entry.channelcount;
436 sampling_frequency = entry.samplerate;
439 codec_config = entry.dops.opus_identification_header;
440 num_channels = entry.channelcount;
441 sampling_frequency = entry.samplerate;
442 RCHECK(sampling_frequency != 0);
444 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
447 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
448 << actual_format <<
" in stsd box.";
453 uint64_t seek_preroll_ns = 0;
454 for (
const auto& sample_group_description :
455 track->media.information.sample_table.sample_group_descriptions) {
456 if (sample_group_description.grouping_type != FOURCC_roll)
458 const auto& audio_roll_recovery_entries =
459 sample_group_description.audio_roll_recovery_entries;
460 if (audio_roll_recovery_entries.size() != 1) {
461 LOG(WARNING) <<
"Unexpected number of entries in "
462 "SampleGroupDescription table with grouping type "
466 const int16_t roll_distance_in_samples =
467 audio_roll_recovery_entries[0].roll_distance;
468 if (roll_distance_in_samples < 0) {
469 RCHECK(sampling_frequency != 0);
470 seek_preroll_ns = kNanosecondsPerSecond *
471 (-roll_distance_in_samples) / sampling_frequency;
474 <<
"Roll distance is supposed to be negative, but seeing "
475 << roll_distance_in_samples;
481 const bool is_encrypted =
484 : entry.sinf.info.track_encryption.default_is_protected == 1;
485 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
486 streams.emplace_back(
new AudioStreamInfo(
487 track->header.track_id, timescale, duration, codec,
489 codec_config.data(), codec_config.size(), entry.samplesize,
490 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
491 max_bitrate, avg_bitrate, track->media.header.language.code,
495 if (samp_descr.type == kVideo) {
496 RCHECK(!samp_descr.video_entries.empty());
497 if (desc_idx >= samp_descr.video_entries.size())
499 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
501 uint32_t coded_width = entry.width;
502 uint32_t coded_height = entry.height;
503 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
504 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
505 if (pixel_width == 0 && pixel_height == 0) {
509 std::string codec_string;
510 uint8_t nalu_length_size = 0;
512 const FourCC actual_format = entry.GetActualFormat();
513 const Codec video_codec = FourCCToCodec(actual_format);
514 switch (actual_format) {
517 AVCDecoderConfigurationRecord avc_config;
518 if (!avc_config.Parse(entry.codec_configuration.data)) {
519 LOG(ERROR) <<
"Failed to parse avcc.";
522 codec_string = avc_config.GetCodecString();
523 nalu_length_size = avc_config.nalu_length_size();
525 if (coded_width != avc_config.coded_width() ||
526 coded_height != avc_config.coded_height()) {
527 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
528 <<
"," << coded_height
529 <<
") does not match with resolution in "
530 "AVCDecoderConfigurationRecord ("
531 << avc_config.coded_width() <<
","
532 << avc_config.coded_height()
533 <<
"). Use AVCDecoderConfigurationRecord.";
534 coded_width = avc_config.coded_width();
535 coded_height = avc_config.coded_height();
538 if (pixel_width != avc_config.pixel_width() ||
539 pixel_height != avc_config.pixel_height()) {
540 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
541 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
543 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
545 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
546 <<
"). Use AVCDecoderConfigurationRecord.";
547 pixel_width = avc_config.pixel_width();
548 pixel_height = avc_config.pixel_height();
554 HEVCDecoderConfigurationRecord hevc_config;
555 if (!hevc_config.Parse(entry.codec_configuration.data)) {
556 LOG(ERROR) <<
"Failed to parse hevc.";
559 codec_string = hevc_config.GetCodecString(actual_format);
560 nalu_length_size = hevc_config.nalu_length_size();
566 VPCodecConfigurationRecord vp_config;
567 if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
568 LOG(ERROR) <<
"Failed to parse vpcc.";
571 codec_string = vp_config.GetCodecString(video_codec);
575 LOG(ERROR) <<
"Unsupported video format "
576 << FourCCToString(actual_format) <<
" in stsd box.";
581 const bool is_encrypted =
584 : entry.sinf.info.track_encryption.default_is_protected == 1;
585 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
586 std::shared_ptr<VideoStreamInfo> video_stream_info(
new VideoStreamInfo(
587 track->header.track_id, timescale, duration, video_codec,
588 GetH26xStreamFormat(actual_format), codec_string,
589 entry.codec_configuration.data.data(),
590 entry.codec_configuration.data.size(), coded_width, coded_height,
591 pixel_width, pixel_height,
593 nalu_length_size, track->media.header.language.code, is_encrypted));
596 if (moov_->pssh.size() > 0) {
597 std::vector<uint8_t> pssh_raw_data;
598 for (
const auto& pssh : moov_->pssh) {
599 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
602 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
603 pssh_raw_data.size());
606 streams.push_back(video_stream_info);
610 init_cb_.Run(streams);
611 if (!FetchKeysIfNecessary(moov_->pssh))
613 runs_.reset(
new TrackRunIterator(moov_.get()));
614 RCHECK(runs_->Init());
615 ChangeState(kEmittingSamples);
619 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
623 RCHECK(moof.Parse(reader));
625 runs_.reset(
new TrackRunIterator(moov_.get()));
626 RCHECK(runs_->Init(moof));
627 if (!FetchKeysIfNecessary(moof.pssh))
629 ChangeState(kEmittingSamples);
633 bool MP4MediaParser::FetchKeysIfNecessary(
634 const std::vector<ProtectionSystemSpecificHeader>& headers) {
639 if (!decryption_key_source_)
643 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
644 headers.begin(); iter != headers.end(); ++iter) {
645 status = decryption_key_source_->
FetchKeys(iter->raw_box);
649 VLOG(1) <<
"Unable to fetch decryption keys: " << status
650 <<
", trying the next PSSH box";
657 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
661 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
665 bool MP4MediaParser::EnqueueSample(
bool* err) {
666 if (!runs_->IsRunValid()) {
669 if (!queue_.
Trim(mdat_tail_))
672 ChangeState(kParsingBoxes);
676 if (!runs_->IsSampleValid()) {
685 queue_.Peek(&buf, &buf_size);
690 if (!runs_->is_audio() && !runs_->is_video())
700 if (runs_->AuxInfoNeedsToBeCached()) {
701 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
702 if (buf_size < runs_->aux_info_size())
704 *err = !runs_->CacheAuxInfo(buf, buf_size);
708 int64_t sample_offset = runs_->sample_offset() + moof_head_;
709 queue_.
PeekAt(sample_offset, &buf, &buf_size);
710 if (buf_size < runs_->sample_size()) {
711 if (sample_offset < queue_.
head()) {
712 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
713 <<
" < " << queue_.
head();
719 std::shared_ptr<MediaSample> stream_sample(
721 if (runs_->is_encrypted()) {
722 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
723 if (!decrypt_config) {
725 LOG(ERROR) <<
"Missing decrypt config.";
729 if (!decryptor_source_) {
732 stream_sample->set_decrypt_config(std::move(decrypt_config));
733 stream_sample->set_is_encrypted(
true);
734 }
else if (!decryptor_source_->DecryptSampleBuffer(
735 decrypt_config.get(), stream_sample->writable_data(),
736 stream_sample->data_size())) {
738 LOG(ERROR) <<
"Cannot decrypt samples.";
743 stream_sample->set_dts(runs_->dts());
744 stream_sample->set_pts(runs_->cts());
745 stream_sample->set_duration(runs_->duration());
747 DVLOG(3) <<
"Pushing frame: "
748 <<
", key=" << runs_->is_keyframe()
749 <<
", dur=" << runs_->duration()
750 <<
", dts=" << runs_->dts()
751 <<
", cts=" << runs_->cts()
752 <<
", size=" << runs_->sample_size();
754 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
756 LOG(ERROR) <<
"Failed to process the sample.";
760 runs_->AdvanceSample();
764 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
766 while (mdat_tail_ < offset) {
769 queue_.
PeekAt(mdat_tail_, &buf, &size);
776 mdat_tail_ += box_sz;
778 queue_.
Trim(std::min(mdat_tail_, offset));
782 void MP4MediaParser::ChangeState(State new_state) {
783 DVLOG(2) <<
"Changing state: " << new_state;