Shaka Packager SDK
mp4_media_parser.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp4/mp4_media_parser.h"
6 
7 #include <algorithm>
8 #include <limits>
9 
10 #include "packager/base/callback.h"
11 #include "packager/base/callback_helpers.h"
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/file/file.h"
15 #include "packager/file/file_closer.h"
16 #include "packager/media/base/audio_stream_info.h"
17 #include "packager/media/base/buffer_reader.h"
18 #include "packager/media/base/decrypt_config.h"
19 #include "packager/media/base/key_source.h"
20 #include "packager/media/base/macros.h"
21 #include "packager/media/base/media_sample.h"
22 #include "packager/media/base/rcheck.h"
23 #include "packager/media/base/video_stream_info.h"
24 #include "packager/media/codecs/ac3_audio_util.h"
25 #include "packager/media/codecs/avc_decoder_configuration_record.h"
26 #include "packager/media/codecs/ec3_audio_util.h"
27 #include "packager/media/codecs/es_descriptor.h"
28 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
29 #include "packager/media/codecs/vp_codec_configuration_record.h"
30 #include "packager/media/formats/mp4/box_definitions.h"
31 #include "packager/media/formats/mp4/box_reader.h"
32 #include "packager/media/formats/mp4/track_run_iterator.h"
33 
34 namespace shaka {
35 namespace media {
36 namespace mp4 {
37 namespace {
38 
39 uint64_t Rescale(uint64_t time_in_old_scale,
40  uint32_t old_scale,
41  uint32_t new_scale) {
42  return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
43 }
44 
45 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
46  switch (fourcc) {
47  case FOURCC_avc1:
48  return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
49  case FOURCC_avc3:
50  return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
51  case FOURCC_hev1:
52  return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
53  case FOURCC_hvc1:
54  return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
55  default:
56  return H26xStreamFormat::kUnSpecified;
57  }
58 }
59 
60 Codec FourCCToCodec(FourCC fourcc) {
61  switch (fourcc) {
62  case FOURCC_avc1:
63  case FOURCC_avc3:
64  return kCodecH264;
65  case FOURCC_hev1:
66  case FOURCC_hvc1:
67  return kCodecH265;
68  case FOURCC_vp08:
69  return kCodecVP8;
70  case FOURCC_vp09:
71  return kCodecVP9;
72  case FOURCC_vp10:
73  return kCodecVP10;
74  case FOURCC_Opus:
75  return kCodecOpus;
76  case FOURCC_dtsc:
77  return kCodecDTSC;
78  case FOURCC_dtsh:
79  return kCodecDTSH;
80  case FOURCC_dtsl:
81  return kCodecDTSL;
82  case FOURCC_dtse:
83  return kCodecDTSE;
84  case FOURCC_dtsp:
85  return kCodecDTSP;
86  case FOURCC_dtsm:
87  return kCodecDTSM;
88  case FOURCC_ac_3:
89  return kCodecAC3;
90  case FOURCC_ec_3:
91  return kCodecEAC3;
92  default:
93  return kUnknownCodec;
94  }
95 }
96 
97 // Default DTS audio number of channels for 5.1 channel layout.
98 const uint8_t kDtsAudioNumChannels = 6;
99 const uint64_t kNanosecondsPerSecond = 1000000000ull;
100 
101 } // namespace
102 
103 MP4MediaParser::MP4MediaParser()
104  : state_(kWaitingForInit),
105  decryption_key_source_(NULL),
106  moof_head_(0),
107  mdat_tail_(0) {}
108 
109 MP4MediaParser::~MP4MediaParser() {}
110 
111 void MP4MediaParser::Init(const InitCB& init_cb,
112  const NewSampleCB& new_sample_cb,
113  KeySource* decryption_key_source) {
114  DCHECK_EQ(state_, kWaitingForInit);
115  DCHECK(init_cb_.is_null());
116  DCHECK(!init_cb.is_null());
117  DCHECK(!new_sample_cb.is_null());
118 
119  ChangeState(kParsingBoxes);
120  init_cb_ = init_cb;
121  new_sample_cb_ = new_sample_cb;
122  decryption_key_source_ = decryption_key_source;
123  if (decryption_key_source)
124  decryptor_source_.reset(new DecryptorSource(decryption_key_source));
125 }
126 
127 void MP4MediaParser::Reset() {
128  queue_.Reset();
129  runs_.reset();
130  moof_head_ = 0;
131  mdat_tail_ = 0;
132 }
133 
135  DCHECK_NE(state_, kWaitingForInit);
136  Reset();
137  ChangeState(kParsingBoxes);
138  return true;
139 }
140 
141 bool MP4MediaParser::Parse(const uint8_t* buf, int size) {
142  DCHECK_NE(state_, kWaitingForInit);
143 
144  if (state_ == kError)
145  return false;
146 
147  queue_.Push(buf, size);
148 
149  bool result, err = false;
150 
151  do {
152  if (state_ == kParsingBoxes) {
153  result = ParseBox(&err);
154  } else {
155  DCHECK_EQ(kEmittingSamples, state_);
156  result = EnqueueSample(&err);
157  if (result) {
158  int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
159  err = !ReadAndDiscardMDATsUntil(max_clear);
160  }
161  }
162  } while (result && !err);
163 
164  if (err) {
165  DLOG(ERROR) << "Error while parsing MP4";
166  moov_.reset();
167  Reset();
168  ChangeState(kError);
169  return false;
170  }
171 
172  return true;
173 }
174 
175 bool MP4MediaParser::LoadMoov(const std::string& file_path) {
176  std::unique_ptr<File, FileCloser> file(
177  File::OpenWithNoBuffering(file_path.c_str(), "r"));
178  if (!file) {
179  LOG(ERROR) << "Unable to open media file '" << file_path << "'";
180  return false;
181  }
182  if (!file->Seek(0)) {
183  LOG(WARNING) << "Filesystem does not support seeking on file '" << file_path
184  << "'";
185  return false;
186  }
187 
188  uint64_t file_position(0);
189  bool mdat_seen(false);
190  while (true) {
191  const uint32_t kBoxHeaderReadSize(16);
192  std::vector<uint8_t> buffer(kBoxHeaderReadSize);
193  int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
194  if (bytes_read == 0) {
195  LOG(ERROR) << "Could not find 'moov' box in file '" << file_path << "'";
196  return false;
197  }
198  if (bytes_read < kBoxHeaderReadSize) {
199  LOG(ERROR) << "Error reading media file '" << file_path << "'";
200  return false;
201  }
202  uint64_t box_size;
203  FourCC box_type;
204  bool err;
205  if (!BoxReader::StartBox(&buffer[0], kBoxHeaderReadSize, &box_type,
206  &box_size, &err)) {
207  LOG(ERROR) << "Could not start box from file '" << file_path << "'";
208  return false;
209  }
210  if (box_type == FOURCC_mdat) {
211  mdat_seen = true;
212  } else if (box_type == FOURCC_moov) {
213  if (!mdat_seen) {
214  // 'moov' is before 'mdat'. Nothing to do.
215  break;
216  }
217  // 'mdat' before 'moov'. Read and parse 'moov'.
218  if (!Parse(&buffer[0], bytes_read)) {
219  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
220  return false;
221  }
222  uint64_t bytes_to_read = box_size - bytes_read;
223  buffer.resize(bytes_to_read);
224  while (bytes_to_read > 0) {
225  bytes_read = file->Read(&buffer[0], bytes_to_read);
226  if (bytes_read <= 0) {
227  LOG(ERROR) << "Error reading 'moov' contents from file '" << file_path
228  << "'";
229  return false;
230  }
231  if (!Parse(&buffer[0], bytes_read)) {
232  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
233  return false;
234  }
235  bytes_to_read -= bytes_read;
236  }
237  queue_.Reset(); // So that we don't need to adjust data offsets.
238  mdat_tail_ = 0; // So it will skip boxes until mdat.
239  break; // Done.
240  }
241  file_position += box_size;
242  if (!file->Seek(file_position)) {
243  LOG(ERROR) << "Error skipping box in mp4 file '" << file_path << "'";
244  return false;
245  }
246  }
247  return true;
248 }
249 
250 bool MP4MediaParser::ParseBox(bool* err) {
251  const uint8_t* buf;
252  int size;
253  queue_.Peek(&buf, &size);
254  if (!size)
255  return false;
256 
257  std::unique_ptr<BoxReader> reader(BoxReader::ReadBox(buf, size, err));
258  if (reader.get() == NULL)
259  return false;
260 
261  if (reader->type() == FOURCC_mdat) {
262  if (!moov_) {
263  // For seekable files, we seek to the 'moov' and load the 'moov' first
264  // then seek back (see LoadMoov function for details); we do not support
265  // having 'mdat' before 'moov' for non-seekable files. The code ends up
266  // here only if it is a non-seekable file.
267  NOTIMPLEMENTED() << " Non-seekable Files with 'mdat' box before 'moov' "
268  "box is not supported.";
269  *err = true;
270  return false;
271  } else {
272  // This can happen if there are unused 'mdat' boxes, which is unusual
273  // but allowed by the spec. Ignore the 'mdat' and proceed.
274  LOG(INFO)
275  << "Ignore unused 'mdat' box - this could be as a result of extra "
276  "not usable 'mdat' or 'mdat' associated with unrecognized track.";
277  }
278  }
279 
280  // Set up mdat offset for ReadMDATsUntil().
281  mdat_tail_ = queue_.head() + reader->size();
282 
283  if (reader->type() == FOURCC_moov) {
284  *err = !ParseMoov(reader.get());
285  } else if (reader->type() == FOURCC_moof) {
286  moof_head_ = queue_.head();
287  *err = !ParseMoof(reader.get());
288 
289  // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
290  // be located anywhere in the file, including inside the 'moof' itself.
291  // (Since 'default-base-is-moof' is mandated, no data references can come
292  // before the head of the 'moof', so keeping this box around is sufficient.)
293  return !(*err);
294  } else {
295  VLOG(2) << "Skipping top-level box: " << FourCCToString(reader->type());
296  }
297 
298  queue_.Pop(static_cast<int>(reader->size()));
299  return !(*err);
300 }
301 
302 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
303  if (moov_)
304  return true; // Already parsed the 'moov' box.
305 
306  moov_.reset(new Movie);
307  RCHECK(moov_->Parse(reader));
308  runs_.reset();
309 
310  std::vector<std::shared_ptr<StreamInfo>> streams;
311 
312  for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
313  track != moov_->tracks.end(); ++track) {
314  const uint32_t timescale = track->media.header.timescale;
315 
316  // Calculate duration (based on timescale).
317  uint64_t duration = 0;
318  if (track->media.header.duration > 0) {
319  duration = track->media.header.duration;
320  } else if (moov_->extends.header.fragment_duration > 0) {
321  DCHECK(moov_->header.timescale != 0);
322  duration = Rescale(moov_->extends.header.fragment_duration,
323  moov_->header.timescale,
324  timescale);
325  } else if (moov_->header.duration > 0 &&
326  moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
327  DCHECK(moov_->header.timescale != 0);
328  duration =
329  Rescale(moov_->header.duration, moov_->header.timescale, timescale);
330  }
331 
332  const SampleDescription& samp_descr =
333  track->media.information.sample_table.description;
334 
335  size_t desc_idx = 0;
336 
337  // Read sample description index from mvex if it exists otherwise read
338  // from the first entry in Sample To Chunk box.
339  if (moov_->extends.tracks.size() > 0) {
340  for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
341  const TrackExtends& trex = moov_->extends.tracks[t];
342  if (trex.track_id == track->header.track_id) {
343  desc_idx = trex.default_sample_description_index;
344  break;
345  }
346  }
347  } else {
348  const std::vector<ChunkInfo>& chunk_info =
349  track->media.information.sample_table.sample_to_chunk.chunk_info;
350  RCHECK(chunk_info.size() > 0);
351  desc_idx = chunk_info[0].sample_description_index;
352  }
353  RCHECK(desc_idx > 0);
354  desc_idx -= 1; // BMFF descriptor index is one-based
355 
356  if (samp_descr.type == kAudio) {
357  RCHECK(!samp_descr.audio_entries.empty());
358 
359  // It is not uncommon to find otherwise-valid files with incorrect sample
360  // description indices, so we fail gracefully in that case.
361  if (desc_idx >= samp_descr.audio_entries.size())
362  desc_idx = 0;
363 
364  const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
365  const FourCC actual_format = entry.GetActualFormat();
366  Codec codec = FourCCToCodec(actual_format);
367  uint8_t num_channels = 0;
368  uint32_t sampling_frequency = 0;
369  uint64_t codec_delay_ns = 0;
370  uint8_t audio_object_type = 0;
371  uint32_t max_bitrate = 0;
372  uint32_t avg_bitrate = 0;
373  std::vector<uint8_t> codec_config;
374 
375  switch (actual_format) {
376  case FOURCC_mp4a:
377  // Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or
378  // supported MPEG2 AAC variants.
379  if (entry.esds.es_descriptor.IsAAC()) {
380  codec = kCodecAAC;
381  const AACAudioSpecificConfig& aac_audio_specific_config =
382  entry.esds.aac_audio_specific_config;
383  num_channels = aac_audio_specific_config.GetNumChannels();
384  sampling_frequency =
385  aac_audio_specific_config.GetSamplesPerSecond();
386  audio_object_type = aac_audio_specific_config.GetAudioObjectType();
387  codec_config = entry.esds.es_descriptor.decoder_specific_info();
388  break;
389  } else if (entry.esds.es_descriptor.IsDTS()) {
390  ObjectType audio_type = entry.esds.es_descriptor.object_type();
391  switch (audio_type) {
392  case kDTSC:
393  codec = kCodecDTSC;
394  break;
395  case kDTSE:
396  codec = kCodecDTSE;
397  break;
398  case kDTSH:
399  codec = kCodecDTSH;
400  break;
401  case kDTSL:
402  codec = kCodecDTSL;
403  break;
404  default:
405  LOG(ERROR) << "Unsupported audio type " << audio_type
406  << " in stsd box.";
407  return false;
408  }
409  num_channels = entry.channelcount;
410  // For dts audio in esds, current supported number of channels is 6
411  // as the only supported channel layout is 5.1.
412  if (num_channels != kDtsAudioNumChannels) {
413  LOG(ERROR) << "Unsupported channel count " << num_channels
414  << " for audio type " << audio_type << ".";
415  return false;
416  }
417  sampling_frequency = entry.samplerate;
418  max_bitrate = entry.esds.es_descriptor.max_bitrate();
419  avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
420  } else {
421  LOG(ERROR) << "Unsupported audio format 0x" << std::hex
422  << actual_format << " in stsd box.";
423  return false;
424  }
425  break;
426  case FOURCC_dtsc:
427  FALLTHROUGH_INTENDED;
428  case FOURCC_dtsh:
429  FALLTHROUGH_INTENDED;
430  case FOURCC_dtsl:
431  FALLTHROUGH_INTENDED;
432  case FOURCC_dtse:
433  FALLTHROUGH_INTENDED;
434  case FOURCC_dtsm:
435  codec_config = entry.ddts.extra_data;
436  max_bitrate = entry.ddts.max_bitrate;
437  avg_bitrate = entry.ddts.avg_bitrate;
438  num_channels = entry.channelcount;
439  sampling_frequency = entry.samplerate;
440  break;
441  case FOURCC_ac_3:
442  codec_config = entry.dac3.data;
443  num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
444  sampling_frequency = entry.samplerate;
445  break;
446  case FOURCC_ec_3:
447  codec_config = entry.dec3.data;
448  num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
449  sampling_frequency = entry.samplerate;
450  break;
451  case FOURCC_Opus:
452  codec_config = entry.dops.opus_identification_header;
453  num_channels = entry.channelcount;
454  sampling_frequency = entry.samplerate;
455  RCHECK(sampling_frequency != 0);
456  codec_delay_ns =
457  entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
458  break;
459  default:
460  LOG(ERROR) << "Unsupported audio format 0x" << std::hex
461  << actual_format << " in stsd box.";
462  return false;
463  }
464 
465  // Extract possible seek preroll.
466  uint64_t seek_preroll_ns = 0;
467  for (const auto& sample_group_description :
468  track->media.information.sample_table.sample_group_descriptions) {
469  if (sample_group_description.grouping_type != FOURCC_roll)
470  continue;
471  const auto& audio_roll_recovery_entries =
472  sample_group_description.audio_roll_recovery_entries;
473  if (audio_roll_recovery_entries.size() != 1) {
474  LOG(WARNING) << "Unexpected number of entries in "
475  "SampleGroupDescription table with grouping type "
476  "'roll'.";
477  break;
478  }
479  const int16_t roll_distance_in_samples =
480  audio_roll_recovery_entries[0].roll_distance;
481  if (roll_distance_in_samples < 0) {
482  RCHECK(sampling_frequency != 0);
483  seek_preroll_ns = kNanosecondsPerSecond *
484  (-roll_distance_in_samples) / sampling_frequency;
485  } else {
486  LOG(WARNING)
487  << "Roll distance is supposed to be negative, but seeing "
488  << roll_distance_in_samples;
489  }
490  break;
491  }
492 
493  // The stream will be decrypted if a |decryptor_source_| is available.
494  const bool is_encrypted =
495  decryptor_source_
496  ? false
497  : entry.sinf.info.track_encryption.default_is_protected == 1;
498  DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted;
499  streams.emplace_back(new AudioStreamInfo(
500  track->header.track_id, timescale, duration, codec,
501  AudioStreamInfo::GetCodecString(codec, audio_object_type),
502  codec_config.data(), codec_config.size(), entry.samplesize,
503  num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
504  max_bitrate, avg_bitrate, track->media.header.language.code,
505  is_encrypted));
506  }
507 
508  if (samp_descr.type == kVideo) {
509  RCHECK(!samp_descr.video_entries.empty());
510  if (desc_idx >= samp_descr.video_entries.size())
511  desc_idx = 0;
512  const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
513 
514  uint32_t coded_width = entry.width;
515  uint32_t coded_height = entry.height;
516  uint32_t pixel_width = entry.pixel_aspect.h_spacing;
517  uint32_t pixel_height = entry.pixel_aspect.v_spacing;
518  if (pixel_width == 0 && pixel_height == 0) {
519  pixel_width = 1;
520  pixel_height = 1;
521  }
522  std::string codec_string;
523  uint8_t nalu_length_size = 0;
524 
525  const FourCC actual_format = entry.GetActualFormat();
526  const Codec video_codec = FourCCToCodec(actual_format);
527  switch (actual_format) {
528  case FOURCC_avc1:
529  case FOURCC_avc3: {
531  if (!avc_config.Parse(entry.codec_configuration.data)) {
532  LOG(ERROR) << "Failed to parse avcc.";
533  return false;
534  }
535  codec_string = avc_config.GetCodecString(actual_format);
536  nalu_length_size = avc_config.nalu_length_size();
537 
538  if (coded_width != avc_config.coded_width() ||
539  coded_height != avc_config.coded_height()) {
540  LOG(WARNING) << "Resolution in VisualSampleEntry (" << coded_width
541  << "," << coded_height
542  << ") does not match with resolution in "
543  "AVCDecoderConfigurationRecord ("
544  << avc_config.coded_width() << ","
545  << avc_config.coded_height()
546  << "). Use AVCDecoderConfigurationRecord.";
547  coded_width = avc_config.coded_width();
548  coded_height = avc_config.coded_height();
549  }
550 
551  if (pixel_width != avc_config.pixel_width() ||
552  pixel_height != avc_config.pixel_height()) {
553  LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
554  << "Pixel aspect ratio in PASP box (" << pixel_width << ","
555  << pixel_height
556  << ") does not match with SAR in AVCDecoderConfigurationRecord "
557  "("
558  << avc_config.pixel_width() << "," << avc_config.pixel_height()
559  << "). Use AVCDecoderConfigurationRecord.";
560  pixel_width = avc_config.pixel_width();
561  pixel_height = avc_config.pixel_height();
562  }
563  break;
564  }
565  case FOURCC_hev1:
566  case FOURCC_hvc1: {
567  HEVCDecoderConfigurationRecord hevc_config;
568  if (!hevc_config.Parse(entry.codec_configuration.data)) {
569  LOG(ERROR) << "Failed to parse hevc.";
570  return false;
571  }
572  codec_string = hevc_config.GetCodecString(actual_format);
573  nalu_length_size = hevc_config.nalu_length_size();
574  break;
575  }
576  case FOURCC_vp08:
577  case FOURCC_vp09:
578  case FOURCC_vp10: {
579  VPCodecConfigurationRecord vp_config;
580  if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
581  LOG(ERROR) << "Failed to parse vpcc.";
582  return false;
583  }
584  codec_string = vp_config.GetCodecString(video_codec);
585  break;
586  }
587  default:
588  LOG(ERROR) << "Unsupported video format "
589  << FourCCToString(actual_format) << " in stsd box.";
590  return false;
591  }
592 
593  // The stream will be decrypted if a |decryptor_source_| is available.
594  const bool is_encrypted =
595  decryptor_source_
596  ? false
597  : entry.sinf.info.track_encryption.default_is_protected == 1;
598  DVLOG(1) << "is_video_track_encrypted_: " << is_encrypted;
599  std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
600  track->header.track_id, timescale, duration, video_codec,
601  GetH26xStreamFormat(actual_format), codec_string,
602  entry.codec_configuration.data.data(),
603  entry.codec_configuration.data.size(), coded_width, coded_height,
604  pixel_width, pixel_height,
605  0, // trick_play_factor
606  nalu_length_size, track->media.header.language.code, is_encrypted));
607 
608  // Set pssh raw data if it has.
609  if (moov_->pssh.size() > 0) {
610  std::vector<uint8_t> pssh_raw_data;
611  for (const auto& pssh : moov_->pssh) {
612  pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
613  pssh.raw_box.end());
614  }
615  video_stream_info->set_eme_init_data(pssh_raw_data.data(),
616  pssh_raw_data.size());
617  }
618 
619  streams.push_back(video_stream_info);
620  }
621  }
622 
623  init_cb_.Run(streams);
624  if (!FetchKeysIfNecessary(moov_->pssh))
625  return false;
626  runs_.reset(new TrackRunIterator(moov_.get()));
627  RCHECK(runs_->Init());
628  ChangeState(kEmittingSamples);
629  return true;
630 }
631 
632 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
633  // Must already have initialization segment.
634  RCHECK(moov_.get());
635  MovieFragment moof;
636  RCHECK(moof.Parse(reader));
637  if (!runs_)
638  runs_.reset(new TrackRunIterator(moov_.get()));
639  RCHECK(runs_->Init(moof));
640  if (!FetchKeysIfNecessary(moof.pssh))
641  return false;
642  ChangeState(kEmittingSamples);
643  return true;
644 }
645 
646 bool MP4MediaParser::FetchKeysIfNecessary(
647  const std::vector<ProtectionSystemSpecificHeader>& headers) {
648  if (headers.empty())
649  return true;
650 
651  // An error will be returned later if the samples need to be decrypted.
652  if (!decryption_key_source_)
653  return true;
654 
655  std::vector<uint8_t> pssh_raw_data;
656  for (const auto& header : headers) {
657  pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
658  header.raw_box.end());
659  }
660  Status status =
661  decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
662  if (!status.ok()) {
663  LOG(ERROR) << "Error fetching decryption keys: " << status;
664  return false;
665  }
666  return true;
667 }
668 
669 bool MP4MediaParser::EnqueueSample(bool* err) {
670  if (!runs_->IsRunValid()) {
671  // Remain in kEnqueueingSamples state, discarding data, until the end of
672  // the current 'mdat' box has been appended to the queue.
673  if (!queue_.Trim(mdat_tail_))
674  return false;
675 
676  ChangeState(kParsingBoxes);
677  return true;
678  }
679 
680  if (!runs_->IsSampleValid()) {
681  runs_->AdvanceRun();
682  return true;
683  }
684 
685  DCHECK(!(*err));
686 
687  const uint8_t* buf;
688  int buf_size;
689  queue_.Peek(&buf, &buf_size);
690  if (!buf_size)
691  return false;
692 
693  // Skip this entire track if it is not audio nor video.
694  if (!runs_->is_audio() && !runs_->is_video())
695  runs_->AdvanceRun();
696 
697  // Attempt to cache the auxiliary information first. Aux info is usually
698  // placed in a contiguous block before the sample data, rather than being
699  // interleaved. If we didn't cache it, this would require that we retain the
700  // start of the segment buffer while reading samples. Aux info is typically
701  // quite small compared to sample data, so this pattern is useful on
702  // memory-constrained devices where the source buffer consumes a substantial
703  // portion of the total system memory.
704  if (runs_->AuxInfoNeedsToBeCached()) {
705  queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
706  if (buf_size < runs_->aux_info_size())
707  return false;
708  *err = !runs_->CacheAuxInfo(buf, buf_size);
709  return !*err;
710  }
711 
712  int64_t sample_offset = runs_->sample_offset() + moof_head_;
713  queue_.PeekAt(sample_offset, &buf, &buf_size);
714  if (buf_size < runs_->sample_size()) {
715  if (sample_offset < queue_.head()) {
716  LOG(ERROR) << "Incorrect sample offset " << sample_offset
717  << " < " << queue_.head();
718  *err = true;
719  }
720  return false;
721  }
722 
723  const uint8_t* media_data = buf;
724  const size_t media_data_size = runs_->sample_size();
725  // Use a dummy data size of 0 to avoid copying overhead.
726  // Actual media data is set later.
727  const size_t kDummyDataSize = 0;
728  std::shared_ptr<MediaSample> stream_sample(
729  MediaSample::CopyFrom(media_data, kDummyDataSize, runs_->is_keyframe()));
730 
731  if (runs_->is_encrypted()) {
732  std::shared_ptr<uint8_t> decrypted_media_data(
733  new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
734  std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
735  if (!decrypt_config) {
736  *err = true;
737  LOG(ERROR) << "Missing decrypt config.";
738  return false;
739  }
740 
741  if (!decryptor_source_) {
742  stream_sample->SetData(media_data, media_data_size);
743  // If the demuxer does not have the decryptor_source_, store
744  // decrypt_config so that the demuxed sample can be decrypted later.
745  stream_sample->set_decrypt_config(std::move(decrypt_config));
746  stream_sample->set_is_encrypted(true);
747  } else {
748  if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
749  media_data, media_data_size,
750  decrypted_media_data.get())) {
751  *err = true;
752  LOG(ERROR) << "Cannot decrypt samples.";
753  return false;
754  }
755  stream_sample->TransferData(std::move(decrypted_media_data),
756  media_data_size);
757  }
758  } else {
759  stream_sample->SetData(media_data, media_data_size);
760  }
761 
762  stream_sample->set_dts(runs_->dts());
763  stream_sample->set_pts(runs_->cts());
764  stream_sample->set_duration(runs_->duration());
765 
766  DVLOG(3) << "Pushing frame: "
767  << ", key=" << runs_->is_keyframe()
768  << ", dur=" << runs_->duration()
769  << ", dts=" << runs_->dts()
770  << ", cts=" << runs_->cts()
771  << ", size=" << runs_->sample_size();
772 
773  if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
774  *err = true;
775  LOG(ERROR) << "Failed to process the sample.";
776  return false;
777  }
778 
779  runs_->AdvanceSample();
780  return true;
781 }
782 
783 bool MP4MediaParser::ReadAndDiscardMDATsUntil(const int64_t offset) {
784  bool err = false;
785  while (mdat_tail_ < offset) {
786  const uint8_t* buf;
787  int size;
788  queue_.PeekAt(mdat_tail_, &buf, &size);
789 
790  FourCC type;
791  uint64_t box_sz;
792  if (!BoxReader::StartBox(buf, size, &type, &box_sz, &err))
793  break;
794 
795  mdat_tail_ += box_sz;
796  }
797  queue_.Trim(std::min(mdat_tail_, offset));
798  return !err;
799 }
800 
801 void MP4MediaParser::ChangeState(State new_state) {
802  DVLOG(2) << "Changing state: " << new_state;
803  state_ = new_state;
804 }
805 
806 } // namespace mp4
807 } // namespace media
808 } // namespace shaka
Class for parsing or writing VP codec configuration record.
bool Flush() override WARN_UNUSED_RESULT
All the methods that are virtual are virtual for mocking.
bool Parse(const uint8_t *buf, int size) override WARN_UNUSED_RESULT
bool Parse(const std::vector< uint8_t > &data)
std::string GetCodecString(FourCC codec_fourcc) const
Class for reading MP4 boxes.
Definition: box_reader.h:25
bool ParseMP4(const std::vector< uint8_t > &data)
Class for parsing HEVC decoder configuration record.
static File * OpenWithNoBuffering(const char *file_name, const char *mode)
Definition: file.cc:187
static std::string GetCodecString(Codec codec, uint8_t audio_object_type)
bool LoadMoov(const std::string &file_path)
Class for parsing AVC decoder configuration record.
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:42
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:45
void Init(const InitCB &init_cb, const NewSampleCB &new_sample_cb, KeySource *decryption_key_source) override
Holds video stream information.
Holds audio stream information.
DecryptorSource wraps KeySource and is responsible for decryptor management.
static bool StartBox(const uint8_t *buf, const size_t buf_size, FourCC *type, uint64_t *box_size, bool *err) WARN_UNUSED_RESULT
Definition: box_reader.cc:54
static BoxReader * ReadBox(const uint8_t *buf, const size_t buf_size, bool *err)
Definition: box_reader.cc:36