5 #include "packager/media/formats/mp4/mp4_media_parser.h"
9 #include "packager/base/callback.h"
10 #include "packager/base/callback_helpers.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/memory/ref_counted.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/media/base/aes_encryptor.h"
15 #include "packager/media/base/audio_stream_info.h"
16 #include "packager/media/base/buffer_reader.h"
17 #include "packager/media/base/decrypt_config.h"
18 #include "packager/media/base/key_source.h"
19 #include "packager/media/base/media_sample.h"
20 #include "packager/media/base/video_stream_info.h"
21 #include "packager/media/file/file.h"
22 #include "packager/media/file/file_closer.h"
23 #include "packager/media/filters/avc_decoder_configuration.h"
24 #include "packager/media/filters/vp_codec_configuration.h"
25 #include "packager/media/formats/mp4/box_definitions.h"
26 #include "packager/media/formats/mp4/box_reader.h"
27 #include "packager/media/formats/mp4/es_descriptor.h"
28 #include "packager/media/formats/mp4/rcheck.h"
29 #include "packager/media/formats/mp4/track_run_iterator.h"
31 namespace edash_packager {
36 uint64_t Rescale(uint64_t time_in_old_scale,
39 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
42 VideoCodec FourCCToCodec(FourCC fourcc) {
53 return kUnknownVideoCodec;
57 const char kWidevineKeySystemId[] =
"edef8ba979d64acea3c827dcd51d21ed";
61 MP4MediaParser::MP4MediaParser()
62 : state_(kWaitingForInit), moof_head_(0), mdat_tail_(0) {}
64 MP4MediaParser::~MP4MediaParser() {
65 STLDeleteValues(&decryptor_map_);
69 const NewSampleCB& new_sample_cb,
71 DCHECK_EQ(state_, kWaitingForInit);
72 DCHECK(init_cb_.is_null());
73 DCHECK(!init_cb.is_null());
74 DCHECK(!new_sample_cb.is_null());
76 ChangeState(kParsingBoxes);
78 new_sample_cb_ = new_sample_cb;
79 decryption_key_source_ = decryption_key_source;
82 void MP4MediaParser::Reset() {
90 DCHECK_NE(state_, kWaitingForInit);
92 ChangeState(kParsingBoxes);
96 DCHECK_NE(state_, kWaitingForInit);
101 queue_.Push(buf, size);
103 bool result, err =
false;
106 if (state_ == kParsingBoxes) {
107 result = ParseBox(&err);
109 DCHECK_EQ(kEmittingSamples, state_);
110 result = EnqueueSample(&err);
112 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
113 err = !ReadAndDiscardMDATsUntil(max_clear);
116 }
while (result && !err);
119 DLOG(ERROR) <<
"Error while parsing MP4";
130 scoped_ptr<File, FileCloser> file(
133 LOG(ERROR) <<
"Unable to open media file '" << file_path <<
"'";
136 if (!file->Seek(0)) {
137 LOG(WARNING) <<
"Filesystem does not support seeking on file '" << file_path
142 uint64_t file_position(0);
143 bool mdat_seen(
false);
145 const uint32_t kBoxHeaderReadSize(16);
146 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
147 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
148 if (bytes_read == 0) {
149 LOG(ERROR) <<
"Could not find 'moov' box in file '" << file_path <<
"'";
152 if (bytes_read < kBoxHeaderReadSize) {
153 LOG(ERROR) <<
"Error reading media file '" << file_path <<
"'";
161 LOG(ERROR) <<
"Could not start top level box from file '" << file_path
165 if (box_type == FOURCC_MDAT) {
167 }
else if (box_type == FOURCC_MOOV) {
173 if (!
Parse(&buffer[0], bytes_read)) {
174 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
177 uint64_t bytes_to_read = box_size - bytes_read;
178 buffer.resize(bytes_to_read);
179 while (bytes_to_read > 0) {
180 bytes_read = file->Read(&buffer[0], bytes_to_read);
181 if (bytes_read <= 0) {
182 LOG(ERROR) <<
"Error reading 'moov' contents from file '" << file_path
186 if (!
Parse(&buffer[0], bytes_read)) {
187 LOG(ERROR) <<
"Error parsing mp4 file '" << file_path <<
"'";
190 bytes_to_read -= bytes_read;
196 file_position += box_size;
197 if (!file->Seek(file_position)) {
198 LOG(ERROR) <<
"Error skipping box in mp4 file '" << file_path <<
"'";
205 bool MP4MediaParser::ParseBox(
bool* err) {
208 queue_.Peek(&buf, &size);
213 if (reader.get() == NULL)
216 if (reader->type() == FOURCC_MDAT) {
220 NOTIMPLEMENTED() <<
" Files with MDAT before MOOV is not supported yet.";
226 mdat_tail_ = queue_.
head() + reader->size();
228 if (reader->type() == FOURCC_MOOV) {
229 *err = !ParseMoov(reader.get());
230 }
else if (reader->type() == FOURCC_MOOF) {
231 moof_head_ = queue_.
head();
232 *err = !ParseMoof(reader.get());
240 VLOG(2) <<
"Skipping top-level box: " << FourCCToString(reader->type());
243 queue_.Pop(reader->size());
247 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
251 moov_.reset(
new Movie);
252 RCHECK(moov_->Parse(reader));
255 std::vector<scoped_refptr<StreamInfo> > streams;
257 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
258 track != moov_->tracks.end(); ++track) {
259 const uint32_t timescale = track->media.header.timescale;
262 uint64_t duration = 0;
263 if (track->media.header.duration > 0) {
264 duration = track->media.header.duration;
265 }
else if (moov_->extends.header.fragment_duration > 0) {
266 DCHECK(moov_->header.timescale != 0);
267 duration = Rescale(moov_->extends.header.fragment_duration,
268 moov_->header.timescale,
270 }
else if (moov_->header.duration > 0 &&
271 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
272 DCHECK(moov_->header.timescale != 0);
274 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
277 const SampleDescription& samp_descr =
278 track->media.information.sample_table.description;
284 if (moov_->extends.tracks.size() > 0) {
285 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
286 const TrackExtends& trex = moov_->extends.tracks[t];
287 if (trex.track_id == track->header.track_id) {
288 desc_idx = trex.default_sample_description_index;
293 const std::vector<ChunkInfo>& chunk_info =
294 track->media.information.sample_table.sample_to_chunk.chunk_info;
295 RCHECK(chunk_info.size() > 0);
296 desc_idx = chunk_info[0].sample_description_index;
298 RCHECK(desc_idx > 0);
301 if (track->media.handler.type == kAudio) {
302 RCHECK(!samp_descr.audio_entries.empty());
306 if (desc_idx >= samp_descr.audio_entries.size())
308 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
310 if (!(entry.format == FOURCC_MP4A || entry.format == FOURCC_EAC3 ||
311 (entry.format == FOURCC_ENCA &&
312 entry.sinf.format.format == FOURCC_MP4A))) {
313 LOG(ERROR) <<
"Unsupported audio format 0x"
314 << std::hex << entry.format <<
" in stsd box.";
318 ObjectType audio_type = entry.esds.es_descriptor.object_type();
319 DVLOG(1) <<
"audio_type " << std::hex << audio_type;
320 if (audio_type == kForbidden && entry.format == FOURCC_EAC3) {
324 AudioCodec codec = kUnknownAudioCodec;
325 uint8_t num_channels = 0;
326 uint32_t sampling_frequency = 0;
327 uint8_t audio_object_type = 0;
328 std::vector<uint8_t> extra_data;
331 if (entry.esds.es_descriptor.IsAAC()) {
333 const AACAudioSpecificConfig& aac_audio_specific_config =
334 entry.esds.aac_audio_specific_config;
335 num_channels = aac_audio_specific_config.num_channels();
336 sampling_frequency = aac_audio_specific_config.frequency();
337 audio_object_type = aac_audio_specific_config.audio_object_type();
338 extra_data = entry.esds.es_descriptor.decoder_specific_info();
339 }
else if (audio_type == kEAC3) {
341 num_channels = entry.channelcount;
342 sampling_frequency = entry.samplerate;
344 LOG(ERROR) <<
"Unsupported audio object type 0x"
345 << std::hex << audio_type <<
" in esds.";
349 bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
350 DVLOG(1) <<
"is_audio_track_encrypted_: " << is_encrypted;
351 streams.push_back(
new AudioStreamInfo(
352 track->header.track_id,
357 track->media.header.language,
361 extra_data.size() ? &extra_data[0] : NULL,
366 if (track->media.handler.type == kVideo) {
367 RCHECK(!samp_descr.video_entries.empty());
368 if (desc_idx >= samp_descr.video_entries.size())
370 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
372 uint32_t coded_width = entry.width;
373 uint32_t coded_height = entry.height;
374 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
375 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
376 if (pixel_width == 0 && pixel_height == 0) {
380 std::string codec_string;
381 uint8_t nalu_length_size = 0;
383 const FourCC actual_format = entry.GetActualFormat();
384 const VideoCodec video_codec = FourCCToCodec(actual_format);
385 switch (actual_format) {
387 AVCDecoderConfiguration avc_config;
388 if (!avc_config.Parse(entry.codec_config_record.data)) {
389 LOG(ERROR) <<
"Failed to parse avcc.";
392 codec_string = avc_config.GetCodecString();
393 nalu_length_size = avc_config.length_size();
395 if (coded_width != avc_config.coded_width() ||
396 coded_height != avc_config.coded_height()) {
397 LOG(WARNING) <<
"Resolution in VisualSampleEntry (" << coded_width
398 <<
"," << coded_height
399 <<
") does not match with resolution in "
400 "AVCDecoderConfigurationRecord ("
401 << avc_config.coded_width() <<
","
402 << avc_config.coded_height()
403 <<
"). Use AVCDecoderConfigurationRecord.";
404 coded_width = avc_config.coded_width();
405 coded_height = avc_config.coded_height();
408 if (pixel_width != avc_config.pixel_width() ||
409 pixel_height != avc_config.pixel_height()) {
410 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
411 <<
"Pixel aspect ratio in PASP box (" << pixel_width <<
","
413 <<
") does not match with SAR in AVCDecoderConfigurationRecord "
415 << avc_config.pixel_width() <<
"," << avc_config.pixel_height()
416 <<
"). Use AVCDecoderConfigurationRecord.";
417 pixel_width = avc_config.pixel_width();
418 pixel_height = avc_config.pixel_height();
425 VPCodecConfiguration vp_config;
426 if (!vp_config.Parse(entry.codec_config_record.data)) {
427 LOG(ERROR) <<
"Failed to parse vpcc.";
430 codec_string = vp_config.GetCodecString(video_codec);
434 LOG(ERROR) <<
"Unsupported video format "
435 << FourCCToString(actual_format) <<
" in stsd box.";
439 bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
440 DVLOG(1) <<
"is_video_track_encrypted_: " << is_encrypted;
441 streams.push_back(
new VideoStreamInfo(
442 track->header.track_id, timescale, duration, video_codec,
443 codec_string, track->media.header.language, coded_width, coded_height,
444 pixel_width, pixel_height,
446 nalu_length_size, vector_as_array(&entry.codec_config_record.data),
447 entry.codec_config_record.data.size(), is_encrypted));
451 init_cb_.Run(streams);
452 if (!FetchKeysIfNecessary(moov_->pssh))
454 runs_.reset(
new TrackRunIterator(moov_.get()));
455 RCHECK(runs_->Init());
456 ChangeState(kEmittingSamples);
460 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
464 RCHECK(moof.Parse(reader));
466 runs_.reset(
new TrackRunIterator(moov_.get()));
467 RCHECK(runs_->Init(moof));
468 if (!FetchKeysIfNecessary(moof.pssh))
470 ChangeState(kEmittingSamples);
474 bool MP4MediaParser::FetchKeysIfNecessary(
475 const std::vector<ProtectionSystemSpecificHeader>& headers) {
480 if (!decryption_key_source_)
485 std::vector<uint8_t> widevine_system_id;
486 base::HexStringToBytes(kWidevineKeySystemId, &widevine_system_id);
487 for (std::vector<ProtectionSystemSpecificHeader>::const_iterator iter =
488 headers.begin(); iter != headers.end(); ++iter) {
489 if (iter->system_id == widevine_system_id) {
490 Status status = decryption_key_source_->
FetchKeys(iter->data);
492 LOG(ERROR) <<
"Error fetching decryption keys: " << status;
499 LOG(ERROR) <<
"No viable 'pssh' box found for content decryption.";
503 bool MP4MediaParser::EnqueueSample(
bool* err) {
504 if (!runs_->IsRunValid()) {
507 if (!queue_.
Trim(mdat_tail_))
510 ChangeState(kParsingBoxes);
514 if (!runs_->IsSampleValid()) {
523 queue_.Peek(&buf, &buf_size);
528 if (!runs_->is_audio() && !runs_->is_video())
538 if (runs_->AuxInfoNeedsToBeCached()) {
539 queue_.
PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
540 if (buf_size < runs_->aux_info_size())
542 *err = !runs_->CacheAuxInfo(buf, buf_size);
546 int64_t sample_offset = runs_->sample_offset() + moof_head_;
547 queue_.
PeekAt(sample_offset, &buf, &buf_size);
548 if (buf_size < runs_->sample_size()) {
549 if (sample_offset < queue_.
head()) {
550 LOG(ERROR) <<
"Incorrect sample offset " << sample_offset
551 <<
" < " << queue_.
head();
558 buf, runs_->sample_size(), runs_->is_keyframe()));
559 if (runs_->is_encrypted()) {
560 scoped_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
561 if (!decrypt_config ||
562 !DecryptSampleBuffer(decrypt_config.get(),
563 stream_sample->writable_data(),
564 stream_sample->data_size())) {
566 LOG(ERROR) <<
"Cannot decrypt samples.";
571 stream_sample->set_dts(runs_->dts());
572 stream_sample->set_pts(runs_->cts());
573 stream_sample->set_duration(runs_->duration());
575 DVLOG(3) <<
"Pushing frame: "
576 <<
", key=" << runs_->is_keyframe()
577 <<
", dur=" << runs_->duration()
578 <<
", dts=" << runs_->dts()
579 <<
", cts=" << runs_->cts()
580 <<
", size=" << runs_->sample_size();
582 if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
584 LOG(ERROR) <<
"Failed to process the sample.";
588 runs_->AdvanceSample();
592 bool MP4MediaParser::DecryptSampleBuffer(
const DecryptConfig* decrypt_config,
594 size_t buffer_size) {
595 DCHECK(decrypt_config);
598 if (!decryption_key_source_) {
599 LOG(ERROR) <<
"Encrypted media sample encountered, but decryption is not "
605 AesCtrEncryptor* encryptor;
606 DecryptorMap::iterator found = decryptor_map_.find(decrypt_config->key_id());
607 if (found == decryptor_map_.end()) {
610 Status status(decryption_key_source_->
GetKey(decrypt_config->key_id(),
613 LOG(ERROR) <<
"Error retrieving decryption key: " << status;
616 scoped_ptr<AesCtrEncryptor> new_encryptor(
new AesCtrEncryptor);
617 if (!new_encryptor->InitializeWithIv(key.key, decrypt_config->iv())) {
618 LOG(ERROR) <<
"Failed to initialize AesCtrEncryptor for decryption.";
621 encryptor = new_encryptor.release();
622 decryptor_map_[decrypt_config->key_id()] = encryptor;
624 encryptor = found->second;
626 if (!encryptor->SetIv(decrypt_config->iv())) {
627 LOG(ERROR) <<
"Invalid initialization vector.";
631 if (decrypt_config->subsamples().empty()) {
633 if (!encryptor->Decrypt(buffer, buffer_size, buffer)) {
634 LOG(ERROR) <<
"Error during bulk sample decryption.";
641 const std::vector<SubsampleEntry>& subsamples = decrypt_config->subsamples();
642 uint8_t* current_ptr = buffer;
643 const uint8_t* buffer_end = buffer + buffer_size;
644 current_ptr += decrypt_config->data_offset();
645 if (current_ptr > buffer_end) {
646 LOG(ERROR) <<
"Subsample data_offset too large.";
649 for (std::vector<SubsampleEntry>::const_iterator iter = subsamples.begin();
650 iter != subsamples.end();
652 if ((current_ptr + iter->clear_bytes + iter->cipher_bytes) > buffer_end) {
653 LOG(ERROR) <<
"Subsamples overflow sample buffer.";
656 current_ptr += iter->clear_bytes;
657 if (!encryptor->Decrypt(current_ptr, iter->cipher_bytes, current_ptr)) {
658 LOG(ERROR) <<
"Error decrypting subsample buffer.";
661 current_ptr += iter->cipher_bytes;
666 bool MP4MediaParser::ReadAndDiscardMDATsUntil(
const int64_t offset) {
668 while (mdat_tail_ < offset) {
671 queue_.
PeekAt(mdat_tail_, &buf, &size);
678 mdat_tail_ += box_sz;
680 queue_.
Trim(std::min(mdat_tail_, offset));
684 void MP4MediaParser::ChangeState(State new_state) {
685 DVLOG(2) <<
"Changing state: " << new_state;