5 #include "packager/media/formats/mp4/mp4_media_parser.h"
9 #include "packager/base/callback.h"
10 #include "packager/base/callback_helpers.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/memory/ref_counted.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/media/base/aes_encryptor.h"
15 #include "packager/media/base/audio_stream_info.h"
16 #include "packager/media/base/buffer_reader.h"
17 #include "packager/media/base/decrypt_config.h"
18 #include "packager/media/base/key_source.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/video_stream_info.h"
21 #include "packager/media/file/file.h"
22 #include "packager/media/file/file_closer.h"
23 #include "packager/media/filters/avc_decoder_configuration.h"
24 #include "packager/media/filters/hevc_decoder_configuration.h"
25 #include "packager/media/filters/vp_codec_configuration.h"
26 #include "packager/media/formats/mp4/box_definitions.h"
27 #include "packager/media/formats/mp4/box_reader.h"
28 #include "packager/media/formats/mp4/es_descriptor.h"
29 #include "packager/media/formats/mp4/rcheck.h"
30 #include "packager/media/formats/mp4/track_run_iterator.h"
32 namespace edash_packager {
37 uint64_t Rescale(uint64_t time_in_old_scale,
40 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 VideoCodec FourCCToVideoCodec(FourCC fourcc) {
58 return kUnknownVideoCodec;
62 AudioCodec FourCCToAudioCodec(FourCC fourcc) {
79 return kUnknownAudioCodec;
83 const char kWidevineKeySystemId[] =
"edef8ba979d64acea3c827dcd51d21ed";
87 MP4MediaParser::MP4MediaParser()
88 : state_(kWaitingForInit), moof_head_(0), mdat_tail_(0) {}
90 MP4MediaParser::~MP4MediaParser() {
91 STLDeleteValues(&decryptor_map_);
95 const NewSampleCB& new_sample_cb,
97 DCHECK_EQ(state_, kWaitingForInit);
98 DCHECK(init_cb_.is_null());
99 DCHECK(!init_cb.is_null());
100 DCHECK(!new_sample_cb.is_null());
102 ChangeState(kParsingBoxes);
104 new_sample_cb_ = new_sample_cb;
105 decryption_key_source_ = decryption_key_source;
108 void MP4MediaParser::Reset() {
116 DCHECK_NE(state_, kWaitingForInit);
118 ChangeState(kParsingBoxes);
122 DCHECK_NE(state_, kWaitingForInit);
124 if (state_ == kError)
127 queue_.Push(buf, size);
129 bool result, err =
false;
132 if (state_ == kParsingBoxes) {
133 result = ParseBox(&err);
135 DCHECK_EQ(kEmittingSamples, state_);
136 result = EnqueueSample(&err);
138 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
139 err = !ReadAndDiscardMDATsUntil(max_clear);
142 }
while (result && !err);
145 DLOG(ERROR) <<
"Error while parsing MP4";
156 scoped_ptr<File, FileCloser> file(
159 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
162 if (!file->Seek(0)) {
163 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
168 uint64_t file_position(0);
169 bool mdat_seen(
false);
171 const uint32_t kBoxHeaderReadSize(16);
172 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
173 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
174 if (bytes_read == 0) {
175 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
178 if (bytes_read < kBoxHeaderReadSize) {
179 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
187 LOG(ERROR) <<
"Could not start top level box from file '" << file_path
191 if (box_type == FOURCC_MDAT) {
193 }
else if (box_type == FOURCC_MOOV) {
199 if (!
Parse(&buffer[0], bytes_read)) {
200 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
203 uint64_t bytes_to_read = box_size - bytes_read;
204 buffer.resize(bytes_to_read);
205 while (bytes_to_read > 0) {
206 bytes_read = file->Read(&buffer[0], bytes_to_read);
207 if (bytes_read <= 0) {
208 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
212 if (!
Parse(&buffer[0], bytes_read)) {
213 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
216 bytes_to_read -= bytes_read;
222 file_position += box_size;
223 if (!file->Seek(file_position)) {
224 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
231 bool MP4MediaParser::ParseBox(
bool* err) {
234 queue_.Peek(&buf, &size);
239 if (reader.get() == NULL)
242 if (reader->type() == FOURCC_MDAT) {
246 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
252 mdat_tail_ = queue_.
head() + reader->size();
254 if (reader->type() == FOURCC_MOOV) {
255 *err = !ParseMoov(reader.get());
256 }
else if (reader->type() == FOURCC_MOOF) {
257 moof_head_ = queue_.
head();
258 *err = !ParseMoof(reader.get());
266 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
269 queue_.Pop(reader->size());
273 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
277 moov_.reset(
new Movie);
278 RCHECK(moov_->Parse(reader));
281 std::vector<scoped_refptr<StreamInfo> > streams;
283 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
284 track != moov_->tracks.end(); ++track) {
285 const uint32_t timescale = track->media.header.timescale;
288 uint64_t duration = 0;
289 if (track->media.header.duration > 0) {
290 duration = track->media.header.duration;
291 }
else if (moov_->extends.header.fragment_duration > 0) {
292 DCHECK(moov_->header.timescale != 0);
293 duration = Rescale(moov_->extends.header.fragment_duration,
294 moov_->header.timescale,
296 }
else if (moov_->header.duration > 0 &&
297 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
298 DCHECK(moov_->header.timescale != 0);
300 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
303 const SampleDescription& samp_descr =
304 track->media.information.sample_table.description;
310 if (moov_->extends.tracks.size() > 0) {
311 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
312 const TrackExtends& trex = moov_->extends.tracks[t];
313 if (trex.track_id == track->header.track_id) {
314 desc_idx = trex.default_sample_description_index;
319 const std::vector<ChunkInfo>& chunk_info =
320 track->media.information.sample_table.sample_to_chunk.chunk_info;
321 RCHECK(chunk_info.size() > 0);
322 desc_idx = chunk_info[0].sample_description_index;
324 RCHECK(desc_idx > 0);
327 if (track->media.handler.type == kAudio) {
328 RCHECK(!samp_descr.audio_entries.empty());
332 if (desc_idx >= samp_descr.audio_entries.size())
335 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
336 const FourCC actual_format = entry.GetActualFormat();
337 AudioCodec codec = FourCCToAudioCodec(actual_format);
338 uint8_t num_channels = 0;
339 uint32_t sampling_frequency = 0;
340 uint8_t audio_object_type = 0;
341 std::vector<uint8_t> extra_data;
343 switch (actual_format) {
347 if (entry.esds.es_descriptor.IsAAC()) {
349 const AACAudioSpecificConfig& aac_audio_specific_config =
350 entry.esds.aac_audio_specific_config;
351 num_channels = aac_audio_specific_config.num_channels();
352 sampling_frequency = aac_audio_specific_config.frequency();
353 audio_object_type = aac_audio_specific_config.audio_object_type();
354 extra_data = entry.esds.es_descriptor.decoder_specific_info();
357 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
358 << actual_format <<
" in stsd box.";
367 extra_data = entry.extra_data;
368 sampling_frequency = entry.samplerate;
371 num_channels = entry.channelcount;
372 sampling_frequency = entry.samplerate;
375 LOG(ERROR) <<
"Unsupported audio format 0x" << std::hex
376 << actual_format <<
" in stsd box.";
380 bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
381 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
382 streams.push_back(
new AudioStreamInfo(
383 track->header.track_id,
388 track->media.header.language,
392 extra_data.size() ? &extra_data[0] : NULL,
397 if (track->media.handler.type == kVideo) {
398 RCHECK(!samp_descr.video_entries.empty());
399 if (desc_idx >= samp_descr.video_entries.size())
401 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
403 uint32_t coded_width = entry.width;
404 uint32_t coded_height = entry.height;
405 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
406 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
407 if (pixel_width == 0 && pixel_height == 0) {
411 std::string codec_string;
412 uint8_t nalu_length_size = 0;
414 const FourCC actual_format = entry.GetActualFormat();
415 const VideoCodec video_codec = FourCCToVideoCodec(actual_format);
416 switch (actual_format) {
418 AVCDecoderConfiguration avc_config;
419 if (!avc_config.Parse(entry.codec_config_record.data)) {
420 LOG(ERROR) <<
"Failed to parse avcc.";
423 codec_string = avc_config.GetCodecString();
424 nalu_length_size = avc_config.length_size();
426 if (coded_width != avc_config.coded_width() ||
427 coded_height != avc_config.coded_height()) {
428 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
429 <<
"," << coded_height
430 <<
") does not match with resolution in "
431 "AVCDecoderConfigurationRecord ("
432 << avc_config.coded_width() <<
","
433 << avc_config.coded_height()
434 <<
"). Use AVCDecoderConfigurationRecord.";
435 coded_width = avc_config.coded_width();
436 coded_height = avc_config.coded_height();
439 if (pixel_width != avc_config.pixel_width() ||
440 pixel_height != avc_config.pixel_height()) {
441 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
442 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
444 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
446 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
447 <<
"). Use AVCDecoderConfigurationRecord.";
448 pixel_width = avc_config.pixel_width();
449 pixel_height = avc_config.pixel_height();
455 HEVCDecoderConfiguration hevc_config;
456 if (!hevc_config.Parse(entry.codec_config_record.data)) {
457 LOG(ERROR) <<
"Failed to parse hevc.";
460 codec_string = hevc_config.GetCodecString(video_codec);
461 nalu_length_size = hevc_config.length_size();
467 VPCodecConfiguration vp_config;
468 if (!vp_config.Parse(entry.codec_config_record.data)) {
469 LOG(ERROR) <<
"Failed to parse vpcc.";
472 codec_string = vp_config.GetCodecString(video_codec);
476 LOG(ERROR) <<
"Unsupported video format "
477 << FourCCToString(actual_format) <<
" in stsd box.";
481 bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
482 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
483 streams.push_back(
new VideoStreamInfo(
484 track->header.track_id, timescale, duration, video_codec,
485 codec_string, track->media.header.language, coded_width, coded_height,
486 pixel_width, pixel_height,
488 nalu_length_size, vector_as_array(&entry.codec_config_record.data),
489 entry.codec_config_record.data.size(), is_encrypted));
493 init_cb_.Run(streams);
494 if (!FetchKeysIfNecessary(moov_->pssh))
496 runs_.reset(
new TrackRunIterator(moov_.get()));
497 RCHECK(runs_->Init());
498 ChangeState(kEmittingSamples);
502 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
506 RCHECK(moof.Parse(reader));
508 runs_.reset(
new TrackRunIterator(moov_.get()));
509 RCHECK(runs_->Init(moof));
510 if (!FetchKeysIfNecessary(moof.pssh))
512 ChangeState(kEmittingSamples);
516 bool MP4MediaParser::FetchKeysIfNecessary(
517 const std::vector<ProtectionSystemSpecificHeader>& headers) {
522 if (!decryption_key_source_)
527 std::vector<uint8_t> widevine_system_id;
528 base::HexStringToBytes(kWidevineKeySystemId, &widevine_system_id);
529 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
530 headers.begin(); iter != headers.end(); ++iter) {
531 if (iter->system_id == widevine_system_id) {
532 Status status = decryption_key_source_->
FetchKeys(iter->data);
534 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
541 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
545 bool MP4MediaParser::EnqueueSample(
bool* err) {
546 if (!runs_->IsRunValid()) {
549 if (!queue_.
Trim(mdat_tail_))
552 ChangeState(kParsingBoxes);
556 if (!runs_->IsSampleValid()) {
565 queue_.Peek(&buf, &buf_size);
570 if (!runs_->is_audio() && !runs_->is_video())
580 if (runs_->AuxInfoNeedsToBeCached()) {
581 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
582 if (buf_size < runs_->aux_info_size())
584 *err = !runs_->CacheAuxInfo(buf, buf_size);
588 int64_t sample_offset = runs_->sample_offset() + moof_head_;
589 queue_.
PeekAt(sample_offset, &buf, &buf_size);
590 if (buf_size < runs_->sample_size()) {
591 if (sample_offset < queue_.
head()) {
592 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
593 <<
" < " << queue_.
head();
600 buf, runs_->sample_size(), runs_->is_keyframe()));
601 if (runs_->is_encrypted()) {
602 scoped_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
603 if (!decrypt_config ||
604 !DecryptSampleBuffer(decrypt_config.get(),
605 stream_sample->writable_data(),
606 stream_sample->data_size())) {
608 LOG(ERROR) <<
"Cannot decrypt samples.";
613 stream_sample->set_dts(runs_->dts());
614 stream_sample->set_pts(runs_->cts());
615 stream_sample->set_duration(runs_->duration());
617 DVLOG(3) <<
"Pushing frame: "
618 <<
", key=" << runs_->is_keyframe()
619 <<
", dur=" << runs_->duration()
620 <<
", dts=" << runs_->dts()
621 <<
", cts=" << runs_->cts()
622 <<
", size=" << runs_->sample_size();
624 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
626 LOG(ERROR) <<
"Failed to process the sample.";
630 runs_->AdvanceSample();
634 bool MP4MediaParser::DecryptSampleBuffer(
const DecryptConfig* decrypt_config,
636 size_t buffer_size) {
637 DCHECK(decrypt_config);
640 if (!decryption_key_source_) {
641 LOG(ERROR) <<
"Encrypted media sample encountered, but decryption is not "
647 AesCtrEncryptor* encryptor;
648 DecryptorMap::iterator found = decryptor_map_.find(decrypt_config->key_id());
649 if (found == decryptor_map_.end()) {
652 Status status(decryption_key_source_->
GetKey(decrypt_config->key_id(),
655 LOG(ERROR) <<
"Error retrieving decryption key: " << status;
658 scoped_ptr<AesCtrEncryptor> new_encryptor(
new AesCtrEncryptor);
659 if (!new_encryptor->InitializeWithIv(key.key, decrypt_config->iv())) {
660 LOG(ERROR) <<
"Failed to initialize AesCtrEncryptor for decryption.";
663 encryptor = new_encryptor.release();
664 decryptor_map_[decrypt_config->key_id()] = encryptor;
666 encryptor = found->second;
668 if (!encryptor->SetIv(decrypt_config->iv())) {
669 LOG(ERROR) <<
"Invalid initialization vector.";
673 if (decrypt_config->subsamples().empty()) {
675 if (!encryptor->Decrypt(buffer, buffer_size, buffer)) {
676 LOG(ERROR) <<
"Error during bulk sample decryption.";
683 const std::vector<SubsampleEntry>& subsamples = decrypt_config->subsamples();
684 uint8_t* current_ptr = buffer;
685 const uint8_t* buffer_end = buffer + buffer_size;
686 current_ptr += decrypt_config->data_offset();
687 if (current_ptr > buffer_end) {
688 LOG(ERROR) <<
"Subsample data_offset too large.";
691 for (std::vector<SubsampleEntry>::const_iterator iter = subsamples.begin();
692 iter != subsamples.end();
694 if ((current_ptr + iter->clear_bytes + iter->cipher_bytes) > buffer_end) {
695 LOG(ERROR) <<
"Subsamples overflow sample buffer.";
698 current_ptr += iter->clear_bytes;
699 if (!encryptor->Decrypt(current_ptr, iter->cipher_bytes, current_ptr)) {
700 LOG(ERROR) <<
"Error decrypting subsample buffer.";
703 current_ptr += iter->cipher_bytes;
708 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
710 while (mdat_tail_ < offset) {
713 queue_.
PeekAt(mdat_tail_, &buf, &size);
720 mdat_tail_ += box_sz;
722 queue_.
Trim(std::min(mdat_tail_, offset));
726 void MP4MediaParser::ChangeState(State new_state) {
727 DVLOG(2) <<
"Changing state: " << new_state;