5 #include "packager/media/formats/mp4/mp4_media_parser.h"
9 #include "packager/base/callback.h"
10 #include "packager/base/callback_helpers.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/memory/ref_counted.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/media/base/audio_stream_info.h"
15 #include "packager/media/base/buffer_reader.h"
16 #include "packager/media/base/decrypt_config.h"
17 #include "packager/media/base/key_source.h"
18 #include "packager/media/base/macros.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/rcheck.h"
21 #include "packager/media/base/video_stream_info.h"
22 #include "packager/media/file/file.h"
23 #include "packager/media/file/file_closer.h"
24 #include "packager/media/filters/avc_decoder_configuration.h"
25 #include "packager/media/filters/hevc_decoder_configuration.h"
26 #include "packager/media/filters/vp_codec_configuration.h"
27 #include "packager/media/formats/mp4/box_definitions.h"
28 #include "packager/media/formats/mp4/box_reader.h"
29 #include "packager/media/formats/mp4/es_descriptor.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
32 namespace edash_packager {
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 VideoCodec FourCCToVideoCodec(FourCC fourcc) {
58 return kUnknownVideoCodec;
62 AudioCodec FourCCToAudioCodec(FourCC fourcc) {
81 return kUnknownAudioCodec;
86 const uint8_t kDtsAudioNumChannels = 6;
90 MP4MediaParser::MP4MediaParser()
91 : state_(kWaitingForInit),
92 decryption_key_source_(NULL),
96 MP4MediaParser::~MP4MediaParser() {}
99 const NewSampleCB& new_sample_cb,
101 DCHECK_EQ(state_, kWaitingForInit);
102 DCHECK(init_cb_.is_null());
103 DCHECK(!init_cb.is_null());
104 DCHECK(!new_sample_cb.is_null());
106 ChangeState(kParsingBoxes);
108 new_sample_cb_ = new_sample_cb;
109 decryption_key_source_ = decryption_key_source;
110 if (decryption_key_source)
114 void MP4MediaParser::Reset() {
122 DCHECK_NE(state_, kWaitingForInit);
124 ChangeState(kParsingBoxes);
129 DCHECK_NE(state_, kWaitingForInit);
131 if (state_ == kError)
134 queue_.Push(buf, size);
136 bool result, err =
false;
139 if (state_ == kParsingBoxes) {
140 result = ParseBox(&err);
142 DCHECK_EQ(kEmittingSamples, state_);
143 result = EnqueueSample(&err);
145 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
146 err = !ReadAndDiscardMDATsUntil(max_clear);
149 }
while (result && !err);
152 DLOG(ERROR) <<
"Error while parsing MP4";
163 scoped_ptr<File, FileCloser> file(
166 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
169 if (!file->Seek(0)) {
170 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
175 uint64_t file_position(0);
176 bool mdat_seen(
false);
178 const uint32_t kBoxHeaderReadSize(16);
179 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
180 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
181 if (bytes_read == 0) {
182 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
185 if (bytes_read < kBoxHeaderReadSize) {
186 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
194 LOG(ERROR) <<
"Could not start top level box from file '" << file_path
198 if (box_type == FOURCC_mdat) {
200 }
else if (box_type == FOURCC_moov) {
206 if (!
Parse(&buffer[0], bytes_read)) {
207 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
210 uint64_t bytes_to_read = box_size - bytes_read;
211 buffer.resize(bytes_to_read);
212 while (bytes_to_read > 0) {
213 bytes_read = file->Read(&buffer[0], bytes_to_read);
214 if (bytes_read <= 0) {
215 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
219 if (!
Parse(&buffer[0], bytes_read)) {
220 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
223 bytes_to_read -= bytes_read;
229 file_position += box_size;
230 if (!file->Seek(file_position)) {
231 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
238 bool MP4MediaParser::ParseBox(
bool* err) {
241 queue_.Peek(&buf, &size);
246 if (reader.get() == NULL)
249 if (reader->type() == FOURCC_mdat) {
253 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
259 mdat_tail_ = queue_.
head() + reader->size();
261 if (reader->type() == FOURCC_moov) {
262 *err = !ParseMoov(reader.get());
263 }
else if (reader->type() == FOURCC_moof) {
264 moof_head_ = queue_.
head();
265 *err = !ParseMoof(reader.get());
273 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
276 queue_.Pop(reader->size());
280 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
284 moov_.reset(
new Movie);
285 RCHECK(moov_->Parse(reader));
288 std::vector<scoped_refptr<StreamInfo> > streams;
290 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
291 track != moov_->tracks.end(); ++track) {
292 const uint32_t timescale = track->media.header.timescale;
295 uint64_t duration = 0;
296 if (track->media.header.duration > 0) {
297 duration = track->media.header.duration;
298 }
else if (moov_->extends.header.fragment_duration > 0) {
299 DCHECK(moov_->header.timescale != 0);
300 duration = Rescale(moov_->extends.header.fragment_duration,
301 moov_->header.timescale,
303 }
else if (moov_->header.duration > 0 &&
304 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
305 DCHECK(moov_->header.timescale != 0);
307 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
310 const SampleDescription& samp_descr =
311 track->media.information.sample_table.description;
317 if (moov_->extends.tracks.size() > 0) {
318 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
319 const TrackExtends& trex = moov_->extends.tracks[t];
320 if (trex.track_id == track->header.track_id) {
321 desc_idx = trex.default_sample_description_index;
326 const std::vector<ChunkInfo>& chunk_info =
327 track->media.information.sample_table.sample_to_chunk.chunk_info;
328 RCHECK(chunk_info.size() > 0);
329 desc_idx = chunk_info[0].sample_description_index;
331 RCHECK(desc_idx > 0);
334 if (samp_descr.type == kAudio) {
335 RCHECK(!samp_descr.audio_entries.empty());
339 if (desc_idx >= samp_descr.audio_entries.size())
342 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
343 const FourCC actual_format = entry.GetActualFormat();
344 AudioCodec codec = FourCCToAudioCodec(actual_format);
345 uint8_t num_channels = 0;
346 uint32_t sampling_frequency = 0;
347 uint8_t audio_object_type = 0;
348 uint32_t max_bitrate = 0;
349 uint32_t avg_bitrate = 0;
350 std::vector<uint8_t> extra_data;
352 switch (actual_format) {
356 if (entry.esds.es_descriptor.IsAAC()) {
358 const AACAudioSpecificConfig& aac_audio_specific_config =
359 entry.esds.aac_audio_specific_config;
360 num_channels = aac_audio_specific_config.num_channels();
361 sampling_frequency = aac_audio_specific_config.frequency();
362 audio_object_type = aac_audio_specific_config.audio_object_type();
363 extra_data = entry.esds.es_descriptor.decoder_specific_info();
365 }
else if (entry.esds.es_descriptor.IsDTS()) {
366 ObjectType audio_type = entry.esds.es_descriptor.object_type();
367 switch (audio_type) {
381 LOG(ERROR) <<
"Unsupported audio type " << audio_type
385 num_channels = entry.esds.aac_audio_specific_config.num_channels();
388 if (num_channels != kDtsAudioNumChannels) {
389 LOG(ERROR) <<
"Unsupported channel count " << num_channels
390 <<
" for audio type " << audio_type <<
".";
393 sampling_frequency = entry.samplerate;
394 max_bitrate = entry.esds.es_descriptor.max_bitrate();
395 avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
397 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
398 << actual_format <<
" in stsd box.";
403 FALLTHROUGH_INTENDED;
405 FALLTHROUGH_INTENDED;
407 FALLTHROUGH_INTENDED;
409 FALLTHROUGH_INTENDED;
411 extra_data = entry.ddts.extra_data;
412 max_bitrate = entry.ddts.max_bitrate;
413 avg_bitrate = entry.ddts.avg_bitrate;
414 num_channels = entry.channelcount;
415 sampling_frequency = entry.samplerate;
418 extra_data = entry.dac3.data;
419 num_channels = entry.channelcount;
420 sampling_frequency = entry.samplerate;
423 extra_data = entry.dec3.data;
424 num_channels = entry.channelcount;
425 sampling_frequency = entry.samplerate;
428 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
429 << actual_format <<
" in stsd box.";
433 const bool is_encrypted =
434 entry.sinf.info.track_encryption.default_is_protected == 1;
435 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
436 streams.push_back(
new AudioStreamInfo(
437 track->header.track_id,
442 track->media.header.language.code,
453 if (samp_descr.type == kVideo) {
454 RCHECK(!samp_descr.video_entries.empty());
455 if (desc_idx >= samp_descr.video_entries.size())
457 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
459 uint32_t coded_width = entry.width;
460 uint32_t coded_height = entry.height;
461 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
462 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
463 if (pixel_width == 0 && pixel_height == 0) {
467 std::string codec_string;
468 uint8_t nalu_length_size = 0;
470 const FourCC actual_format = entry.GetActualFormat();
471 const VideoCodec video_codec = FourCCToVideoCodec(actual_format);
472 switch (actual_format) {
474 AVCDecoderConfiguration avc_config;
475 if (!avc_config.Parse(entry.codec_config_record.data)) {
476 LOG(ERROR) <<
"Failed to parse avcc.";
479 codec_string = avc_config.GetCodecString();
480 nalu_length_size = avc_config.nalu_length_size();
482 if (coded_width != avc_config.coded_width() ||
483 coded_height != avc_config.coded_height()) {
484 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
485 <<
"," << coded_height
486 <<
") does not match with resolution in "
487 "AVCDecoderConfigurationRecord ("
488 << avc_config.coded_width() <<
","
489 << avc_config.coded_height()
490 <<
"). Use AVCDecoderConfigurationRecord.";
491 coded_width = avc_config.coded_width();
492 coded_height = avc_config.coded_height();
495 if (pixel_width != avc_config.pixel_width() ||
496 pixel_height != avc_config.pixel_height()) {
497 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
498 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
500 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
502 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
503 <<
"). Use AVCDecoderConfigurationRecord.";
504 pixel_width = avc_config.pixel_width();
505 pixel_height = avc_config.pixel_height();
511 HEVCDecoderConfiguration hevc_config;
512 if (!hevc_config.Parse(entry.codec_config_record.data)) {
513 LOG(ERROR) <<
"Failed to parse hevc.";
516 codec_string = hevc_config.GetCodecString(video_codec);
517 nalu_length_size = hevc_config.nalu_length_size();
523 VPCodecConfiguration vp_config;
524 if (!vp_config.Parse(entry.codec_config_record.data)) {
525 LOG(ERROR) <<
"Failed to parse vpcc.";
528 codec_string = vp_config.GetCodecString(video_codec);
532 LOG(ERROR) <<
"Unsupported video format "
533 << FourCCToString(actual_format) <<
" in stsd box.";
537 const bool is_encrypted =
538 entry.sinf.info.track_encryption.default_is_protected == 1;
539 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
540 streams.push_back(
new VideoStreamInfo(
541 track->header.track_id, timescale, duration, video_codec,
542 codec_string, track->media.header.language.code, coded_width,
543 coded_height, pixel_width, pixel_height,
545 nalu_length_size, entry.codec_config_record.data.data(),
546 entry.codec_config_record.data.size(), is_encrypted));
550 init_cb_.Run(streams);
551 if (!FetchKeysIfNecessary(moov_->pssh))
553 runs_.reset(
new TrackRunIterator(moov_.get()));
554 RCHECK(runs_->Init());
555 ChangeState(kEmittingSamples);
559 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
563 RCHECK(moof.Parse(reader));
565 runs_.reset(
new TrackRunIterator(moov_.get()));
566 RCHECK(runs_->Init(moof));
567 if (!FetchKeysIfNecessary(moof.pssh))
569 ChangeState(kEmittingSamples);
573 bool MP4MediaParser::FetchKeysIfNecessary(
574 const std::vector<ProtectionSystemSpecificHeader>& headers) {
579 if (!decryption_key_source_)
583 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
584 headers.begin(); iter != headers.end(); ++iter) {
585 status = decryption_key_source_->
FetchKeys(iter->raw_box);
589 VLOG(1) <<
"Unable to fetch decryption keys: " << status
590 <<
", trying the next PSSH box";
597 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
601 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
605 bool MP4MediaParser::EnqueueSample(
bool* err) {
606 if (!runs_->IsRunValid()) {
609 if (!queue_.
Trim(mdat_tail_))
612 ChangeState(kParsingBoxes);
616 if (!runs_->IsSampleValid()) {
625 queue_.Peek(&buf, &buf_size);
630 if (!runs_->is_audio() && !runs_->is_video())
640 if (runs_->AuxInfoNeedsToBeCached()) {
641 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
642 if (buf_size < runs_->aux_info_size())
644 *err = !runs_->CacheAuxInfo(buf, buf_size);
648 int64_t sample_offset = runs_->sample_offset() + moof_head_;
649 queue_.
PeekAt(sample_offset, &buf, &buf_size);
650 if (buf_size < runs_->sample_size()) {
651 if (sample_offset < queue_.
head()) {
652 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
653 <<
" < " << queue_.
head();
660 buf, runs_->sample_size(), runs_->is_keyframe()));
661 if (runs_->is_encrypted()) {
662 if (!decryptor_source_) {
664 LOG(ERROR) <<
"Encrypted media sample encountered, but decryption is not "
669 scoped_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
670 if (!decrypt_config ||
671 !decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
672 stream_sample->writable_data(),
673 stream_sample->data_size())) {
675 LOG(ERROR) <<
"Cannot decrypt samples.";
680 stream_sample->set_dts(runs_->dts());
681 stream_sample->set_pts(runs_->cts());
682 stream_sample->set_duration(runs_->duration());
684 DVLOG(3) <<
"Pushing frame: "
685 <<
", key=" << runs_->is_keyframe()
686 <<
", dur=" << runs_->duration()
687 <<
", dts=" << runs_->dts()
688 <<
", cts=" << runs_->cts()
689 <<
", size=" << runs_->sample_size();
691 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
693 LOG(ERROR) <<
"Failed to process the sample.";
697 runs_->AdvanceSample();
701 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
703 while (mdat_tail_ < offset) {
706 queue_.
PeekAt(mdat_tail_, &buf, &size);
713 mdat_tail_ += box_sz;
715 queue_.
Trim(std::min(mdat_tail_, offset));
719 void MP4MediaParser::ChangeState(State new_state) {
720 DVLOG(2) <<
"Changing state: " << new_state;