5 #include "packager/media/formats/mp4/mp4_media_parser.h"
9 #include "packager/base/callback.h"
10 #include "packager/base/callback_helpers.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/memory/ref_counted.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/media/base/audio_stream_info.h"
15 #include "packager/media/base/buffer_reader.h"
16 #include "packager/media/base/decrypt_config.h"
17 #include "packager/media/base/key_source.h"
18 #include "packager/media/base/macros.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/rcheck.h"
21 #include "packager/media/base/video_stream_info.h"
22 #include "packager/media/file/file.h"
23 #include "packager/media/file/file_closer.h"
24 #include "packager/media/filters/avc_decoder_configuration.h"
25 #include "packager/media/filters/hevc_decoder_configuration.h"
26 #include "packager/media/filters/vp_codec_configuration.h"
27 #include "packager/media/formats/mp4/box_definitions.h"
28 #include "packager/media/formats/mp4/box_reader.h"
29 #include "packager/media/formats/mp4/es_descriptor.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
32 namespace edash_packager {
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 VideoCodec FourCCToVideoCodec(FourCC fourcc) {
58 return kUnknownVideoCodec;
62 AudioCodec FourCCToAudioCodec(FourCC fourcc) {
81 return kUnknownAudioCodec;
86 const uint8_t kDtsAudioNumChannels = 6;
87 const uint64_t kNanosecondsPerSecond = 1000000000ull;
91 MP4MediaParser::MP4MediaParser()
92 : state_(kWaitingForInit),
93 decryption_key_source_(NULL),
97 MP4MediaParser::~MP4MediaParser() {}
100 const NewSampleCB& new_sample_cb,
102 DCHECK_EQ(state_, kWaitingForInit);
103 DCHECK(init_cb_.is_null());
104 DCHECK(!init_cb.is_null());
105 DCHECK(!new_sample_cb.is_null());
107 ChangeState(kParsingBoxes);
109 new_sample_cb_ = new_sample_cb;
110 decryption_key_source_ = decryption_key_source;
111 if (decryption_key_source)
115 void MP4MediaParser::Reset() {
123 DCHECK_NE(state_, kWaitingForInit);
125 ChangeState(kParsingBoxes);
130 DCHECK_NE(state_, kWaitingForInit);
132 if (state_ == kError)
135 queue_.Push(buf, size);
137 bool result, err =
false;
140 if (state_ == kParsingBoxes) {
141 result = ParseBox(&err);
143 DCHECK_EQ(kEmittingSamples, state_);
144 result = EnqueueSample(&err);
146 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
147 err = !ReadAndDiscardMDATsUntil(max_clear);
150 }
while (result && !err);
153 DLOG(ERROR) <<
"Error while parsing MP4";
164 scoped_ptr<File, FileCloser> file(
167 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
170 if (!file->Seek(0)) {
171 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
176 uint64_t file_position(0);
177 bool mdat_seen(
false);
179 const uint32_t kBoxHeaderReadSize(16);
180 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
181 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
182 if (bytes_read == 0) {
183 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
186 if (bytes_read < kBoxHeaderReadSize) {
187 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
195 LOG(ERROR) <<
"Could not start top level box from file '" << file_path
199 if (box_type == FOURCC_mdat) {
201 }
else if (box_type == FOURCC_moov) {
207 if (!
Parse(&buffer[0], bytes_read)) {
208 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
211 uint64_t bytes_to_read = box_size - bytes_read;
212 buffer.resize(bytes_to_read);
213 while (bytes_to_read > 0) {
214 bytes_read = file->Read(&buffer[0], bytes_to_read);
215 if (bytes_read <= 0) {
216 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
220 if (!
Parse(&buffer[0], bytes_read)) {
221 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
224 bytes_to_read -= bytes_read;
230 file_position += box_size;
231 if (!file->Seek(file_position)) {
232 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
239 bool MP4MediaParser::ParseBox(
bool* err) {
242 queue_.Peek(&buf, &size);
247 if (reader.get() == NULL)
250 if (reader->type() == FOURCC_mdat) {
254 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
260 mdat_tail_ = queue_.
head() + reader->size();
262 if (reader->type() == FOURCC_moov) {
263 *err = !ParseMoov(reader.get());
264 }
else if (reader->type() == FOURCC_moof) {
265 moof_head_ = queue_.
head();
266 *err = !ParseMoof(reader.get());
274 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
277 queue_.Pop(reader->size());
281 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
285 moov_.reset(
new Movie);
286 RCHECK(moov_->Parse(reader));
289 std::vector<scoped_refptr<StreamInfo> > streams;
291 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
292 track != moov_->tracks.end(); ++track) {
293 const uint32_t timescale = track->media.header.timescale;
296 uint64_t duration = 0;
297 if (track->media.header.duration > 0) {
298 duration = track->media.header.duration;
299 }
else if (moov_->extends.header.fragment_duration > 0) {
300 DCHECK(moov_->header.timescale != 0);
301 duration = Rescale(moov_->extends.header.fragment_duration,
302 moov_->header.timescale,
304 }
else if (moov_->header.duration > 0 &&
305 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
306 DCHECK(moov_->header.timescale != 0);
308 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
311 const SampleDescription& samp_descr =
312 track->media.information.sample_table.description;
318 if (moov_->extends.tracks.size() > 0) {
319 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
320 const TrackExtends& trex = moov_->extends.tracks[t];
321 if (trex.track_id == track->header.track_id) {
322 desc_idx = trex.default_sample_description_index;
327 const std::vector<ChunkInfo>& chunk_info =
328 track->media.information.sample_table.sample_to_chunk.chunk_info;
329 RCHECK(chunk_info.size() > 0);
330 desc_idx = chunk_info[0].sample_description_index;
332 RCHECK(desc_idx > 0);
335 if (samp_descr.type == kAudio) {
336 RCHECK(!samp_descr.audio_entries.empty());
340 if (desc_idx >= samp_descr.audio_entries.size())
343 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
344 const FourCC actual_format = entry.GetActualFormat();
345 AudioCodec codec = FourCCToAudioCodec(actual_format);
346 uint8_t num_channels = 0;
347 uint32_t sampling_frequency = 0;
348 uint64_t codec_delay_ns = 0;
349 uint8_t audio_object_type = 0;
350 uint32_t max_bitrate = 0;
351 uint32_t avg_bitrate = 0;
352 std::vector<uint8_t> extra_data;
354 switch (actual_format) {
358 if (entry.esds.es_descriptor.IsAAC()) {
360 const AACAudioSpecificConfig& aac_audio_specific_config =
361 entry.esds.aac_audio_specific_config;
362 num_channels = aac_audio_specific_config.num_channels();
363 sampling_frequency = aac_audio_specific_config.frequency();
364 audio_object_type = aac_audio_specific_config.audio_object_type();
365 extra_data = entry.esds.es_descriptor.decoder_specific_info();
367 }
else if (entry.esds.es_descriptor.IsDTS()) {
368 ObjectType audio_type = entry.esds.es_descriptor.object_type();
369 switch (audio_type) {
383 LOG(ERROR) <<
"Unsupported audio type " << audio_type
387 num_channels = entry.esds.aac_audio_specific_config.num_channels();
390 if (num_channels != kDtsAudioNumChannels) {
391 LOG(ERROR) <<
"Unsupported channel count " << num_channels
392 <<
" for audio type " << audio_type <<
".";
395 sampling_frequency = entry.samplerate;
396 max_bitrate = entry.esds.es_descriptor.max_bitrate();
397 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
399 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
400 << actual_format <<
" in stsd box.";
405 FALLTHROUGH_INTENDED;
407 FALLTHROUGH_INTENDED;
409 FALLTHROUGH_INTENDED;
411 FALLTHROUGH_INTENDED;
413 extra_data = entry.ddts.extra_data;
414 max_bitrate = entry.ddts.max_bitrate;
415 avg_bitrate = entry.ddts.avg_bitrate;
416 num_channels = entry.channelcount;
417 sampling_frequency = entry.samplerate;
420 extra_data = entry.dac3.data;
421 num_channels = entry.channelcount;
422 sampling_frequency = entry.samplerate;
425 extra_data = entry.dec3.data;
426 num_channels = entry.channelcount;
427 sampling_frequency = entry.samplerate;
430 extra_data = entry.dops.opus_identification_header;
431 num_channels = entry.channelcount;
432 sampling_frequency = entry.samplerate;
433 RCHECK(sampling_frequency != 0);
435 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
438 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
439 << actual_format <<
" in stsd box.";
444 uint64_t seek_preroll_ns = 0;
445 for (
const auto& sample_group_description :
446 track->media.information.sample_table.sample_group_descriptions) {
447 if (sample_group_description.grouping_type != FOURCC_roll)
449 const auto& audio_roll_recovery_entries =
450 sample_group_description.audio_roll_recovery_entries;
451 if (audio_roll_recovery_entries.size() != 1) {
452 LOG(WARNING) <<
"Unexpected number of entries in "
453 "SampleGroupDescription table with grouping type "
457 const int16_t roll_distance_in_samples =
458 audio_roll_recovery_entries[0].roll_distance;
459 if (roll_distance_in_samples < 0) {
460 RCHECK(sampling_frequency != 0);
461 seek_preroll_ns = kNanosecondsPerSecond *
462 (-roll_distance_in_samples) / sampling_frequency;
465 <<
"Roll distance is supposed to be negative, but seeing "
466 << roll_distance_in_samples;
471 const bool is_encrypted =
472 entry.sinf.info.track_encryption.default_is_protected == 1;
473 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
474 streams.push_back(
new AudioStreamInfo(
475 track->header.track_id,
480 track->media.header.language.code,
493 if (samp_descr.type == kVideo) {
494 RCHECK(!samp_descr.video_entries.empty());
495 if (desc_idx >= samp_descr.video_entries.size())
497 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
499 uint32_t coded_width = entry.width;
500 uint32_t coded_height = entry.height;
501 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
502 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
503 if (pixel_width == 0 && pixel_height == 0) {
507 std::string codec_string;
508 uint8_t nalu_length_size = 0;
510 const FourCC actual_format = entry.GetActualFormat();
511 const VideoCodec video_codec = FourCCToVideoCodec(actual_format);
512 switch (actual_format) {
514 AVCDecoderConfiguration avc_config;
515 if (!avc_config.Parse(entry.codec_config_record.data)) {
516 LOG(ERROR) <<
"Failed to parse avcc.";
519 codec_string = avc_config.GetCodecString();
520 nalu_length_size = avc_config.nalu_length_size();
522 if (coded_width != avc_config.coded_width() ||
523 coded_height != avc_config.coded_height()) {
524 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
525 <<
"," << coded_height
526 <<
") does not match with resolution in "
527 "AVCDecoderConfigurationRecord ("
528 << avc_config.coded_width() <<
","
529 << avc_config.coded_height()
530 <<
"). Use AVCDecoderConfigurationRecord.";
531 coded_width = avc_config.coded_width();
532 coded_height = avc_config.coded_height();
535 if (pixel_width != avc_config.pixel_width() ||
536 pixel_height != avc_config.pixel_height()) {
537 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
538 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
540 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
542 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
543 <<
"). Use AVCDecoderConfigurationRecord.";
544 pixel_width = avc_config.pixel_width();
545 pixel_height = avc_config.pixel_height();
551 HEVCDecoderConfiguration hevc_config;
552 if (!hevc_config.Parse(entry.codec_config_record.data)) {
553 LOG(ERROR) <<
"Failed to parse hevc.";
556 codec_string = hevc_config.GetCodecString(video_codec);
557 nalu_length_size = hevc_config.nalu_length_size();
563 VPCodecConfiguration vp_config;
564 if (!vp_config.Parse(entry.codec_config_record.data)) {
565 LOG(ERROR) <<
"Failed to parse vpcc.";
568 codec_string = vp_config.GetCodecString(video_codec);
572 LOG(ERROR) <<
"Unsupported video format "
573 << FourCCToString(actual_format) <<
" in stsd box.";
577 const bool is_encrypted =
578 entry.sinf.info.track_encryption.default_is_protected == 1;
579 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
580 streams.push_back(
new VideoStreamInfo(
581 track->header.track_id, timescale, duration, video_codec,
582 codec_string, track->media.header.language.code, coded_width,
583 coded_height, pixel_width, pixel_height,
585 nalu_length_size, entry.codec_config_record.data.data(),
586 entry.codec_config_record.data.size(), is_encrypted));
590 init_cb_.Run(streams);
591 if (!FetchKeysIfNecessary(moov_->pssh))
593 runs_.reset(
new TrackRunIterator(moov_.get()));
594 RCHECK(runs_->Init());
595 ChangeState(kEmittingSamples);
599 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
603 RCHECK(moof.Parse(reader));
605 runs_.reset(
new TrackRunIterator(moov_.get()));
606 RCHECK(runs_->Init(moof));
607 if (!FetchKeysIfNecessary(moof.pssh))
609 ChangeState(kEmittingSamples);
613 bool MP4MediaParser::FetchKeysIfNecessary(
614 const std::vector<ProtectionSystemSpecificHeader>& headers) {
619 if (!decryption_key_source_)
623 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
624 headers.begin(); iter != headers.end(); ++iter) {
625 status = decryption_key_source_->
FetchKeys(iter->raw_box);
629 VLOG(1) <<
"Unable to fetch decryption keys: " << status
630 <<
", trying the next PSSH box";
637 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
641 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
645 bool MP4MediaParser::EnqueueSample(
bool* err) {
646 if (!runs_->IsRunValid()) {
649 if (!queue_.
Trim(mdat_tail_))
652 ChangeState(kParsingBoxes);
656 if (!runs_->IsSampleValid()) {
665 queue_.Peek(&buf, &buf_size);
670 if (!runs_->is_audio() && !runs_->is_video())
680 if (runs_->AuxInfoNeedsToBeCached()) {
681 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
682 if (buf_size < runs_->aux_info_size())
684 *err = !runs_->CacheAuxInfo(buf, buf_size);
688 int64_t sample_offset = runs_->sample_offset() + moof_head_;
689 queue_.
PeekAt(sample_offset, &buf, &buf_size);
690 if (buf_size < runs_->sample_size()) {
691 if (sample_offset < queue_.
head()) {
692 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
693 <<
" < " << queue_.
head();
700 buf, runs_->sample_size(), runs_->is_keyframe()));
701 if (runs_->is_encrypted()) {
702 if (!decryptor_source_) {
704 LOG(ERROR) <<
"Encrypted media sample encountered, but decryption is not "
709 scoped_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
710 if (!decrypt_config ||
711 !decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
712 stream_sample->writable_data(),
713 stream_sample->data_size())) {
715 LOG(ERROR) <<
"Cannot decrypt samples.";
720 stream_sample->set_dts(runs_->dts());
721 stream_sample->set_pts(runs_->cts());
722 stream_sample->set_duration(runs_->duration());
724 DVLOG(3) <<
"Pushing frame: "
725 <<
", key=" << runs_->is_keyframe()
726 <<
", dur=" << runs_->duration()
727 <<
", dts=" << runs_->dts()
728 <<
", cts=" << runs_->cts()
729 <<
", size=" << runs_->sample_size();
731 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
733 LOG(ERROR) <<
"Failed to process the sample.";
737 runs_->AdvanceSample();
741 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
743 while (mdat_tail_ < offset) {
746 queue_.
PeekAt(mdat_tail_, &buf, &size);
753 mdat_tail_ += box_sz;
755 queue_.
Trim(std::min(mdat_tail_, offset));
759 void MP4MediaParser::ChangeState(State new_state) {
760 DVLOG(2) <<
"Changing state: " << new_state;