Shaka Packager SDK
mp4_media_parser.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp4/mp4_media_parser.h"
6 
7 #include <algorithm>
8 
9 #include "packager/base/callback.h"
10 #include "packager/base/callback_helpers.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/strings/string_number_conversions.h"
13 #include "packager/file/file.h"
14 #include "packager/file/file_closer.h"
15 #include "packager/media/base/audio_stream_info.h"
16 #include "packager/media/base/buffer_reader.h"
17 #include "packager/media/base/decrypt_config.h"
18 #include "packager/media/base/key_source.h"
19 #include "packager/media/base/macros.h"
20 #include "packager/media/base/media_sample.h"
21 #include "packager/media/base/rcheck.h"
22 #include "packager/media/base/video_stream_info.h"
23 #include "packager/media/base/video_util.h"
24 #include "packager/media/codecs/ac3_audio_util.h"
25 #include "packager/media/codecs/av1_codec_configuration_record.h"
26 #include "packager/media/codecs/avc_decoder_configuration_record.h"
27 #include "packager/media/codecs/dovi_decoder_configuration_record.h"
28 #include "packager/media/codecs/ec3_audio_util.h"
29 #include "packager/media/codecs/ac4_audio_util.h"
30 #include "packager/media/codecs/es_descriptor.h"
31 #include "packager/media/codecs/hevc_decoder_configuration_record.h"
32 #include "packager/media/codecs/vp_codec_configuration_record.h"
33 #include "packager/media/formats/mp4/box_definitions.h"
34 #include "packager/media/formats/mp4/box_reader.h"
35 #include "packager/media/formats/mp4/track_run_iterator.h"
36 
37 namespace shaka {
38 namespace media {
39 namespace mp4 {
40 namespace {
41 
42 uint64_t Rescale(uint64_t time_in_old_scale,
43  uint32_t old_scale,
44  uint32_t new_scale) {
45  return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
46 }
47 
48 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
49  switch (fourcc) {
50  case FOURCC_avc1:
51  case FOURCC_dvh1:
52  case FOURCC_hvc1:
53  return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
54  case FOURCC_avc3:
55  case FOURCC_dvhe:
56  case FOURCC_hev1:
57  return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
58  default:
59  return H26xStreamFormat::kUnSpecified;
60  }
61 }
62 
63 Codec FourCCToCodec(FourCC fourcc) {
64  switch (fourcc) {
65  case FOURCC_av01:
66  return kCodecAV1;
67  case FOURCC_avc1:
68  case FOURCC_avc3:
69  return kCodecH264;
70  case FOURCC_dvh1:
71  case FOURCC_dvhe:
72  return kCodecH265DolbyVision;
73  case FOURCC_hev1:
74  case FOURCC_hvc1:
75  return kCodecH265;
76  case FOURCC_vp08:
77  return kCodecVP8;
78  case FOURCC_vp09:
79  return kCodecVP9;
80  case FOURCC_Opus:
81  return kCodecOpus;
82  case FOURCC_dtsc:
83  return kCodecDTSC;
84  case FOURCC_dtsh:
85  return kCodecDTSH;
86  case FOURCC_dtsl:
87  return kCodecDTSL;
88  case FOURCC_dtse:
89  return kCodecDTSE;
90  case FOURCC_dtsp:
91  return kCodecDTSP;
92  case FOURCC_dtsm:
93  return kCodecDTSM;
94  case FOURCC_ac_3:
95  return kCodecAC3;
96  case FOURCC_ec_3:
97  return kCodecEAC3;
98  case FOURCC_ac_4:
99  return kCodecAC4;
100  case FOURCC_fLaC:
101  return kCodecFlac;
102  default:
103  return kUnknownCodec;
104  }
105 }
106 
107 Codec ObjectTypeToCodec(ObjectType object_type) {
108  switch (object_type) {
109  case ObjectType::kISO_14496_3:
110  case ObjectType::kISO_13818_7_AAC_LC:
111  return kCodecAAC;
112  case ObjectType::kDTSC:
113  return kCodecDTSC;
114  case ObjectType::kDTSE:
115  return kCodecDTSE;
116  case ObjectType::kDTSH:
117  return kCodecDTSH;
118  case ObjectType::kDTSL:
119  return kCodecDTSL;
120  default:
121  return kUnknownCodec;
122  }
123 }
124 
125 std::vector<uint8_t> GetDOVIDecoderConfig(
126  const std::vector<CodecConfiguration>& configs) {
127  for (const CodecConfiguration& config : configs) {
128  if (config.box_type == FOURCC_dvcC || config.box_type == FOURCC_dvvC) {
129  return config.data;
130  }
131  }
132  return std::vector<uint8_t>();
133 }
134 
135 bool UpdateCodecStringForDolbyVision(
136  FourCC actual_format,
137  const std::vector<CodecConfiguration>& configs,
138  std::string* codec_string) {
139  DOVIDecoderConfigurationRecord dovi_config;
140  if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
141  LOG(ERROR) << "Failed to parse Dolby Vision decoder "
142  "configuration record.";
143  return false;
144  }
145  switch (actual_format) {
146  case FOURCC_dvh1:
147  case FOURCC_dvhe:
148  // Non-Backward compatibility mode. Replace the code string with
149  // Dolby Vision only.
150  *codec_string = dovi_config.GetCodecString(actual_format);
151  break;
152  case FOURCC_hev1:
153  // Backward compatibility mode. Two codecs are signalled: base codec
154  // without Dolby Vision and HDR with Dolby Vision.
155  *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvhe);
156  break;
157  case FOURCC_hvc1:
158  // See above.
159  *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvh1);
160  break;
161  default:
162  LOG(ERROR) << "Unsupported format with extra codec "
163  << FourCCToString(actual_format);
164  return false;
165  }
166  return true;
167 }
168 
169 const uint64_t kNanosecondsPerSecond = 1000000000ull;
170 
171 } // namespace
172 
173 MP4MediaParser::MP4MediaParser()
174  : state_(kWaitingForInit),
175  decryption_key_source_(NULL),
176  moof_head_(0),
177  mdat_tail_(0) {}
178 
179 MP4MediaParser::~MP4MediaParser() {}
180 
181 void MP4MediaParser::Init(const InitCB& init_cb,
182  const NewMediaSampleCB& new_media_sample_cb,
183  const NewTextSampleCB& new_text_sample_cb,
184  KeySource* decryption_key_source) {
185  DCHECK_EQ(state_, kWaitingForInit);
186  DCHECK(init_cb_.is_null());
187  DCHECK(!init_cb.is_null());
188  DCHECK(!new_media_sample_cb.is_null());
189 
190  ChangeState(kParsingBoxes);
191  init_cb_ = init_cb;
192  new_sample_cb_ = new_media_sample_cb;
193  decryption_key_source_ = decryption_key_source;
194  if (decryption_key_source)
195  decryptor_source_.reset(new DecryptorSource(decryption_key_source));
196 }
197 
198 void MP4MediaParser::Reset() {
199  queue_.Reset();
200  runs_.reset();
201  moof_head_ = 0;
202  mdat_tail_ = 0;
203 }
204 
205 bool MP4MediaParser::Flush() {
206  DCHECK_NE(state_, kWaitingForInit);
207  Reset();
208  ChangeState(kParsingBoxes);
209  return true;
210 }
211 
212 bool MP4MediaParser::Parse(const uint8_t* buf, int size) {
213  DCHECK_NE(state_, kWaitingForInit);
214 
215  if (state_ == kError)
216  return false;
217 
218  queue_.Push(buf, size);
219 
220  bool result, err = false;
221 
222  do {
223  if (state_ == kParsingBoxes) {
224  result = ParseBox(&err);
225  } else {
226  DCHECK_EQ(kEmittingSamples, state_);
227  result = EnqueueSample(&err);
228  if (result) {
229  int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
230  err = !ReadAndDiscardMDATsUntil(max_clear);
231  }
232  }
233  } while (result && !err);
234 
235  if (err) {
236  DLOG(ERROR) << "Error while parsing MP4";
237  moov_.reset();
238  Reset();
239  ChangeState(kError);
240  return false;
241  }
242 
243  return true;
244 }
245 
246 bool MP4MediaParser::LoadMoov(const std::string& file_path) {
247  std::unique_ptr<File, FileCloser> file(
248  File::OpenWithNoBuffering(file_path.c_str(), "r"));
249  if (!file) {
250  LOG(ERROR) << "Unable to open media file '" << file_path << "'";
251  return false;
252  }
253  if (!file->Seek(0)) {
254  LOG(WARNING) << "Filesystem does not support seeking on file '" << file_path
255  << "'";
256  return false;
257  }
258 
259  uint64_t file_position(0);
260  bool mdat_seen(false);
261  while (true) {
262  const uint32_t kBoxHeaderReadSize(16);
263  std::vector<uint8_t> buffer(kBoxHeaderReadSize);
264  int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
265  if (bytes_read == 0) {
266  LOG(ERROR) << "Could not find 'moov' box in file '" << file_path << "'";
267  return false;
268  }
269  if (bytes_read < kBoxHeaderReadSize) {
270  LOG(ERROR) << "Error reading media file '" << file_path << "'";
271  return false;
272  }
273  uint64_t box_size;
274  FourCC box_type;
275  bool err;
276  if (!BoxReader::StartBox(&buffer[0], kBoxHeaderReadSize, &box_type,
277  &box_size, &err)) {
278  LOG(ERROR) << "Could not start box from file '" << file_path << "'";
279  return false;
280  }
281  if (box_type == FOURCC_mdat) {
282  mdat_seen = true;
283  } else if (box_type == FOURCC_moov) {
284  if (!mdat_seen) {
285  // 'moov' is before 'mdat'. Nothing to do.
286  break;
287  }
288  // 'mdat' before 'moov'. Read and parse 'moov'.
289  if (!Parse(&buffer[0], bytes_read)) {
290  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
291  return false;
292  }
293  uint64_t bytes_to_read = box_size - bytes_read;
294  buffer.resize(bytes_to_read);
295  while (bytes_to_read > 0) {
296  bytes_read = file->Read(&buffer[0], bytes_to_read);
297  if (bytes_read <= 0) {
298  LOG(ERROR) << "Error reading 'moov' contents from file '" << file_path
299  << "'";
300  return false;
301  }
302  if (!Parse(&buffer[0], bytes_read)) {
303  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
304  return false;
305  }
306  bytes_to_read -= bytes_read;
307  }
308  queue_.Reset(); // So that we don't need to adjust data offsets.
309  mdat_tail_ = 0; // So it will skip boxes until mdat.
310  break; // Done.
311  }
312  file_position += box_size;
313  if (!file->Seek(file_position)) {
314  LOG(ERROR) << "Error skipping box in mp4 file '" << file_path << "'";
315  return false;
316  }
317  }
318  return true;
319 }
320 
321 bool MP4MediaParser::ParseBox(bool* err) {
322  const uint8_t* buf;
323  int size;
324  queue_.Peek(&buf, &size);
325  if (!size)
326  return false;
327 
328  std::unique_ptr<BoxReader> reader(BoxReader::ReadBox(buf, size, err));
329  if (reader.get() == NULL)
330  return false;
331 
332  if (reader->type() == FOURCC_mdat) {
333  if (!moov_) {
334  // For seekable files, we seek to the 'moov' and load the 'moov' first
335  // then seek back (see LoadMoov function for details); we do not support
336  // having 'mdat' before 'moov' for non-seekable files. The code ends up
337  // here only if it is a non-seekable file.
338  NOTIMPLEMENTED() << " Non-seekable Files with 'mdat' box before 'moov' "
339  "box is not supported.";
340  *err = true;
341  return false;
342  } else {
343  // This can happen if there are unused 'mdat' boxes, which is unusual
344  // but allowed by the spec. Ignore the 'mdat' and proceed.
345  LOG(INFO)
346  << "Ignore unused 'mdat' box - this could be as a result of extra "
347  "not usable 'mdat' or 'mdat' associated with unrecognized track.";
348  }
349  }
350 
351  // Set up mdat offset for ReadMDATsUntil().
352  mdat_tail_ = queue_.head() + reader->size();
353 
354  if (reader->type() == FOURCC_moov) {
355  *err = !ParseMoov(reader.get());
356  } else if (reader->type() == FOURCC_moof) {
357  moof_head_ = queue_.head();
358  *err = !ParseMoof(reader.get());
359 
360  // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
361  // be located anywhere in the file, including inside the 'moof' itself.
362  // (Since 'default-base-is-moof' is mandated, no data references can come
363  // before the head of the 'moof', so keeping this box around is sufficient.)
364  return !(*err);
365  } else {
366  VLOG(2) << "Skipping top-level box: " << FourCCToString(reader->type());
367  }
368 
369  queue_.Pop(static_cast<int>(reader->size()));
370  return !(*err);
371 }
372 
373 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
374  if (moov_)
375  return true; // Already parsed the 'moov' box.
376 
377  moov_.reset(new Movie);
378  RCHECK(moov_->Parse(reader));
379  runs_.reset();
380 
381  std::vector<std::shared_ptr<StreamInfo>> streams;
382 
383  for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
384  track != moov_->tracks.end(); ++track) {
385  const uint32_t timescale = track->media.header.timescale;
386 
387  // Calculate duration (based on timescale).
388  uint64_t duration = 0;
389  if (track->media.header.duration > 0) {
390  duration = track->media.header.duration;
391  } else if (moov_->extends.header.fragment_duration > 0) {
392  DCHECK(moov_->header.timescale != 0);
393  duration = Rescale(moov_->extends.header.fragment_duration,
394  moov_->header.timescale,
395  timescale);
396  } else if (moov_->header.duration > 0 &&
397  moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
398  DCHECK(moov_->header.timescale != 0);
399  duration =
400  Rescale(moov_->header.duration, moov_->header.timescale, timescale);
401  }
402 
403  const SampleDescription& samp_descr =
404  track->media.information.sample_table.description;
405 
406  size_t desc_idx = 0;
407 
408  // Read sample description index from mvex if it exists otherwise read
409  // from the first entry in Sample To Chunk box.
410  if (moov_->extends.tracks.size() > 0) {
411  for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
412  const TrackExtends& trex = moov_->extends.tracks[t];
413  if (trex.track_id == track->header.track_id) {
414  desc_idx = trex.default_sample_description_index;
415  break;
416  }
417  }
418  } else {
419  const std::vector<ChunkInfo>& chunk_info =
420  track->media.information.sample_table.sample_to_chunk.chunk_info;
421  RCHECK(chunk_info.size() > 0);
422  desc_idx = chunk_info[0].sample_description_index;
423  }
424  RCHECK(desc_idx > 0);
425  desc_idx -= 1; // BMFF descriptor index is one-based
426 
427  if (samp_descr.type == kAudio) {
428  RCHECK(!samp_descr.audio_entries.empty());
429 
430  // It is not uncommon to find otherwise-valid files with incorrect sample
431  // description indices, so we fail gracefully in that case.
432  if (desc_idx >= samp_descr.audio_entries.size())
433  desc_idx = 0;
434 
435  const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
436  const FourCC actual_format = entry.GetActualFormat();
437  Codec codec = FourCCToCodec(actual_format);
438  uint8_t num_channels = entry.channelcount;
439  uint32_t sampling_frequency = entry.samplerate;
440  uint64_t codec_delay_ns = 0;
441  uint8_t audio_object_type = 0;
442  uint32_t max_bitrate = 0;
443  uint32_t avg_bitrate = 0;
444  std::vector<uint8_t> codec_config;
445 
446  switch (actual_format) {
447  case FOURCC_mp4a: {
448  const DecoderConfigDescriptor& decoder_config =
449  entry.esds.es_descriptor.decoder_config_descriptor();
450  max_bitrate = decoder_config.max_bitrate();
451  avg_bitrate = decoder_config.avg_bitrate();
452 
453  codec = ObjectTypeToCodec(decoder_config.object_type());
454  if (codec == kCodecAAC) {
455  const AACAudioSpecificConfig& aac_audio_specific_config =
456  entry.esds.aac_audio_specific_config;
457  num_channels = aac_audio_specific_config.GetNumChannels();
458  sampling_frequency =
459  aac_audio_specific_config.GetSamplesPerSecond();
460  audio_object_type = aac_audio_specific_config.GetAudioObjectType();
461  codec_config =
462  decoder_config.decoder_specific_info_descriptor().data();
463  } else if (codec == kUnknownCodec) {
464  // Intentionally not to fail in the parser as there may be multiple
465  // streams in the source content, which allows the supported stream
466  // to be packaged. An error will be returned if the unsupported
467  // stream is passed to the muxer.
468  LOG(WARNING) << "Unsupported audio object type "
469  << static_cast<int>(decoder_config.object_type())
470  << " in stsd.es_desriptor.";
471  }
472  break;
473  }
474  case FOURCC_dtsc:
475  FALLTHROUGH_INTENDED;
476  case FOURCC_dtse:
477  FALLTHROUGH_INTENDED;
478  case FOURCC_dtsh:
479  FALLTHROUGH_INTENDED;
480  case FOURCC_dtsl:
481  FALLTHROUGH_INTENDED;
482  case FOURCC_dtsm:
483  codec_config = entry.ddts.extra_data;
484  max_bitrate = entry.ddts.max_bitrate;
485  avg_bitrate = entry.ddts.avg_bitrate;
486  break;
487  case FOURCC_ac_3:
488  codec_config = entry.dac3.data;
489  num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
490  break;
491  case FOURCC_ec_3:
492  codec_config = entry.dec3.data;
493  num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
494  break;
495  case FOURCC_ac_4:
496  codec_config = entry.dac4.data;
497  // Stop the process if have errors when parsing AC-4 dac4 box,
498  // bitstream version 0 (has beed deprecated) and contains multiple
499  // presentations in single AC-4 stream (only used for broadcast).
500  if (!GetAc4CodecInfo(codec_config, &audio_object_type)) {
501  LOG(ERROR) << "Failed to parse dac4.";
502  return false;
503  }
504  break;
505  case FOURCC_fLaC:
506  codec_config = entry.dfla.data;
507  break;
508  case FOURCC_Opus:
509  codec_config = entry.dops.opus_identification_header;
510  codec_delay_ns =
511  entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
512  break;
513  default:
514  // Intentionally not to fail in the parser as there may be multiple
515  // streams in the source content, which allows the supported stream to
516  // be packaged.
517  // An error will be returned if the unsupported stream is passed to
518  // the muxer.
519  LOG(WARNING) << "Unsupported audio format '"
520  << FourCCToString(actual_format) << "' in stsd box.";
521  break;
522  }
523 
524  // Extract possible seek preroll.
525  uint64_t seek_preroll_ns = 0;
526  for (const auto& sample_group_description :
527  track->media.information.sample_table.sample_group_descriptions) {
528  if (sample_group_description.grouping_type != FOURCC_roll)
529  continue;
530  const auto& audio_roll_recovery_entries =
531  sample_group_description.audio_roll_recovery_entries;
532  if (audio_roll_recovery_entries.size() != 1) {
533  LOG(WARNING) << "Unexpected number of entries in "
534  "SampleGroupDescription table with grouping type "
535  "'roll'.";
536  break;
537  }
538  const int16_t roll_distance_in_samples =
539  audio_roll_recovery_entries[0].roll_distance;
540  if (roll_distance_in_samples < 0) {
541  RCHECK(sampling_frequency != 0);
542  seek_preroll_ns = kNanosecondsPerSecond *
543  (-roll_distance_in_samples) / sampling_frequency;
544  } else {
545  LOG(WARNING)
546  << "Roll distance is supposed to be negative, but seeing "
547  << roll_distance_in_samples;
548  }
549  break;
550  }
551 
552  // The stream will be decrypted if a |decryptor_source_| is available.
553  const bool is_encrypted =
554  decryptor_source_
555  ? false
556  : entry.sinf.info.track_encryption.default_is_protected == 1;
557  DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted;
558  streams.emplace_back(new AudioStreamInfo(
559  track->header.track_id, timescale, duration, codec,
560  AudioStreamInfo::GetCodecString(codec, audio_object_type),
561  codec_config.data(), codec_config.size(), entry.samplesize,
562  num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
563  max_bitrate, avg_bitrate, track->media.header.language.code,
564  is_encrypted));
565  }
566 
567  if (samp_descr.type == kVideo) {
568  RCHECK(!samp_descr.video_entries.empty());
569  if (desc_idx >= samp_descr.video_entries.size())
570  desc_idx = 0;
571  const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
572  std::vector<uint8_t> codec_configuration_data =
573  entry.codec_configuration.data;
574 
575  uint32_t coded_width = entry.width;
576  uint32_t coded_height = entry.height;
577  uint32_t pixel_width = entry.pixel_aspect.h_spacing;
578  uint32_t pixel_height = entry.pixel_aspect.v_spacing;
579  if (pixel_width == 0 && pixel_height == 0) {
580  DerivePixelWidthHeight(coded_width, coded_height, track->header.width,
581  track->header.height, &pixel_width,
582  &pixel_height);
583  }
584  std::string codec_string;
585  uint8_t nalu_length_size = 0;
586  uint8_t transfer_characteristics = 0;
587 
588  const FourCC actual_format = entry.GetActualFormat();
589  const Codec video_codec = FourCCToCodec(actual_format);
590  switch (actual_format) {
591  case FOURCC_av01: {
592  AV1CodecConfigurationRecord av1_config;
593  if (!av1_config.Parse(codec_configuration_data)) {
594  LOG(ERROR) << "Failed to parse av1c.";
595  return false;
596  }
597  codec_string = av1_config.GetCodecString();
598  break;
599  }
600  case FOURCC_avc1:
601  case FOURCC_avc3: {
602  AVCDecoderConfigurationRecord avc_config;
603  if (!avc_config.Parse(codec_configuration_data)) {
604  LOG(ERROR) << "Failed to parse avcc.";
605  return false;
606  }
607  codec_string = avc_config.GetCodecString(actual_format);
608  nalu_length_size = avc_config.nalu_length_size();
609  transfer_characteristics = avc_config.transfer_characteristics();
610 
611  // Use configurations from |avc_config| if it is valid.
612  if (avc_config.coded_width() != 0) {
613  DCHECK_NE(avc_config.coded_height(), 0u);
614  if (coded_width != avc_config.coded_width() ||
615  coded_height != avc_config.coded_height()) {
616  LOG(WARNING) << "Resolution in VisualSampleEntry (" << coded_width
617  << "," << coded_height
618  << ") does not match with resolution in "
619  "AVCDecoderConfigurationRecord ("
620  << avc_config.coded_width() << ","
621  << avc_config.coded_height()
622  << "). Use AVCDecoderConfigurationRecord.";
623  coded_width = avc_config.coded_width();
624  coded_height = avc_config.coded_height();
625  }
626 
627  DCHECK_NE(avc_config.pixel_width(), 0u);
628  DCHECK_NE(avc_config.pixel_height(), 0u);
629  if (pixel_width != avc_config.pixel_width() ||
630  pixel_height != avc_config.pixel_height()) {
631  LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
632  << "Pixel aspect ratio in PASP box (" << pixel_width << ","
633  << pixel_height
634  << ") does not match with SAR in "
635  "AVCDecoderConfigurationRecord "
636  "("
637  << avc_config.pixel_width() << ","
638  << avc_config.pixel_height()
639  << "). Use AVCDecoderConfigurationRecord.";
640  pixel_width = avc_config.pixel_width();
641  pixel_height = avc_config.pixel_height();
642  }
643  }
644  break;
645  }
646  case FOURCC_dvh1:
647  case FOURCC_dvhe:
648  case FOURCC_hev1:
649  case FOURCC_hvc1: {
650  HEVCDecoderConfigurationRecord hevc_config;
651  if (!hevc_config.Parse(codec_configuration_data)) {
652  LOG(ERROR) << "Failed to parse hevc.";
653  return false;
654  }
655  codec_string = hevc_config.GetCodecString(actual_format);
656  nalu_length_size = hevc_config.nalu_length_size();
657  transfer_characteristics = hevc_config.transfer_characteristics();
658 
659  if (!entry.extra_codec_configs.empty()) {
660  // |extra_codec_configs| is present only for Dolby Vision.
661  if (!UpdateCodecStringForDolbyVision(
662  actual_format, entry.extra_codec_configs, &codec_string)) {
663  return false;
664  }
665  }
666  break;
667  }
668  case FOURCC_vp08:
669  case FOURCC_vp09: {
670  VPCodecConfigurationRecord vp_config;
671  if (!vp_config.ParseMP4(codec_configuration_data)) {
672  LOG(ERROR) << "Failed to parse vpcc.";
673  return false;
674  }
675  if (actual_format == FOURCC_vp09 &&
676  (!vp_config.is_level_set() || vp_config.level() == 0)) {
677  const double kUnknownSampleDuration = 0.0;
678  vp_config.SetVP9Level(coded_width, coded_height,
679  kUnknownSampleDuration);
680  vp_config.WriteMP4(&codec_configuration_data);
681  }
682  codec_string = vp_config.GetCodecString(video_codec);
683  break;
684  }
685  default:
686  // Intentionally not to fail in the parser as there may be multiple
687  // streams in the source content, which allows the supported stream to
688  // be packaged.
689  // An error will be returned if the unsupported stream is passed to
690  // the muxer.
691  LOG(WARNING) << "Unsupported video format '"
692  << FourCCToString(actual_format) << "' in stsd box.";
693  break;
694  }
695 
696  // The stream will be decrypted if a |decryptor_source_| is available.
697  const bool is_encrypted =
698  decryptor_source_
699  ? false
700  : entry.sinf.info.track_encryption.default_is_protected == 1;
701  DVLOG(1) << "is_video_track_encrypted_: " << is_encrypted;
702  std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
703  track->header.track_id, timescale, duration, video_codec,
704  GetH26xStreamFormat(actual_format), codec_string,
705  codec_configuration_data.data(), codec_configuration_data.size(),
706  coded_width, coded_height, pixel_width, pixel_height,
707  transfer_characteristics,
708  0, // trick_play_factor
709  nalu_length_size, track->media.header.language.code, is_encrypted));
710  video_stream_info->set_extra_config(entry.ExtraCodecConfigsAsVector());
711 
712  // Set pssh raw data if it has.
713  if (moov_->pssh.size() > 0) {
714  std::vector<uint8_t> pssh_raw_data;
715  for (const auto& pssh : moov_->pssh) {
716  pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
717  pssh.raw_box.end());
718  }
719  video_stream_info->set_eme_init_data(pssh_raw_data.data(),
720  pssh_raw_data.size());
721  }
722 
723  streams.push_back(video_stream_info);
724  }
725  }
726 
727  init_cb_.Run(streams);
728  if (!FetchKeysIfNecessary(moov_->pssh))
729  return false;
730  runs_.reset(new TrackRunIterator(moov_.get()));
731  RCHECK(runs_->Init());
732  ChangeState(kEmittingSamples);
733  return true;
734 }
735 
736 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
737  // Must already have initialization segment.
738  RCHECK(moov_.get());
739  MovieFragment moof;
740  RCHECK(moof.Parse(reader));
741  if (!runs_)
742  runs_.reset(new TrackRunIterator(moov_.get()));
743  RCHECK(runs_->Init(moof));
744  if (!FetchKeysIfNecessary(moof.pssh))
745  return false;
746  ChangeState(kEmittingSamples);
747  return true;
748 }
749 
750 bool MP4MediaParser::FetchKeysIfNecessary(
751  const std::vector<ProtectionSystemSpecificHeader>& headers) {
752  if (headers.empty())
753  return true;
754 
755  // An error will be returned later if the samples need to be decrypted.
756  if (!decryption_key_source_)
757  return true;
758 
759  std::vector<uint8_t> pssh_raw_data;
760  for (const auto& header : headers) {
761  pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
762  header.raw_box.end());
763  }
764  Status status =
765  decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
766  if (!status.ok()) {
767  LOG(ERROR) << "Error fetching decryption keys: " << status;
768  return false;
769  }
770  return true;
771 }
772 
773 bool MP4MediaParser::EnqueueSample(bool* err) {
774  if (!runs_->IsRunValid()) {
775  // Remain in kEnqueueingSamples state, discarding data, until the end of
776  // the current 'mdat' box has been appended to the queue.
777  if (!queue_.Trim(mdat_tail_))
778  return false;
779 
780  ChangeState(kParsingBoxes);
781  return true;
782  }
783 
784  if (!runs_->IsSampleValid()) {
785  runs_->AdvanceRun();
786  return true;
787  }
788 
789  DCHECK(!(*err));
790 
791  const uint8_t* buf;
792  int buf_size;
793  queue_.Peek(&buf, &buf_size);
794  if (!buf_size)
795  return false;
796 
797  // Skip this entire track if it is not audio nor video.
798  if (!runs_->is_audio() && !runs_->is_video())
799  runs_->AdvanceRun();
800 
801  // Attempt to cache the auxiliary information first. Aux info is usually
802  // placed in a contiguous block before the sample data, rather than being
803  // interleaved. If we didn't cache it, this would require that we retain the
804  // start of the segment buffer while reading samples. Aux info is typically
805  // quite small compared to sample data, so this pattern is useful on
806  // memory-constrained devices where the source buffer consumes a substantial
807  // portion of the total system memory.
808  if (runs_->AuxInfoNeedsToBeCached()) {
809  queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
810  if (buf_size < runs_->aux_info_size())
811  return false;
812  *err = !runs_->CacheAuxInfo(buf, buf_size);
813  return !*err;
814  }
815 
816  int64_t sample_offset = runs_->sample_offset() + moof_head_;
817  queue_.PeekAt(sample_offset, &buf, &buf_size);
818  if (buf_size < runs_->sample_size()) {
819  if (sample_offset < queue_.head()) {
820  LOG(ERROR) << "Incorrect sample offset " << sample_offset
821  << " < " << queue_.head();
822  *err = true;
823  }
824  return false;
825  }
826 
827  const uint8_t* media_data = buf;
828  const size_t media_data_size = runs_->sample_size();
829  // Use a dummy data size of 0 to avoid copying overhead.
830  // Actual media data is set later.
831  const size_t kDummyDataSize = 0;
832  std::shared_ptr<MediaSample> stream_sample(
833  MediaSample::CopyFrom(media_data, kDummyDataSize, runs_->is_keyframe()));
834 
835  if (runs_->is_encrypted()) {
836  std::shared_ptr<uint8_t> decrypted_media_data(
837  new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
838  std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
839  if (!decrypt_config) {
840  *err = true;
841  LOG(ERROR) << "Missing decrypt config.";
842  return false;
843  }
844 
845  if (!decryptor_source_) {
846  stream_sample->SetData(media_data, media_data_size);
847  // If the demuxer does not have the decryptor_source_, store
848  // decrypt_config so that the demuxed sample can be decrypted later.
849  stream_sample->set_decrypt_config(std::move(decrypt_config));
850  stream_sample->set_is_encrypted(true);
851  } else {
852  if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
853  media_data, media_data_size,
854  decrypted_media_data.get())) {
855  *err = true;
856  LOG(ERROR) << "Cannot decrypt samples.";
857  return false;
858  }
859  stream_sample->TransferData(std::move(decrypted_media_data),
860  media_data_size);
861  }
862  } else {
863  stream_sample->SetData(media_data, media_data_size);
864  }
865 
866  stream_sample->set_dts(runs_->dts());
867  stream_sample->set_pts(runs_->cts());
868  stream_sample->set_duration(runs_->duration());
869 
870  DVLOG(3) << "Pushing frame: "
871  << ", key=" << runs_->is_keyframe()
872  << ", dur=" << runs_->duration()
873  << ", dts=" << runs_->dts()
874  << ", cts=" << runs_->cts()
875  << ", size=" << runs_->sample_size();
876 
877  if (!new_sample_cb_.Run(runs_->track_id(), stream_sample)) {
878  *err = true;
879  LOG(ERROR) << "Failed to process the sample.";
880  return false;
881  }
882 
883  runs_->AdvanceSample();
884  return true;
885 }
886 
887 bool MP4MediaParser::ReadAndDiscardMDATsUntil(const int64_t offset) {
888  bool err = false;
889  while (mdat_tail_ < offset) {
890  const uint8_t* buf;
891  int size;
892  queue_.PeekAt(mdat_tail_, &buf, &size);
893 
894  FourCC type;
895  uint64_t box_sz;
896  if (!BoxReader::StartBox(buf, size, &type, &box_sz, &err))
897  break;
898 
899  mdat_tail_ += box_sz;
900  }
901  queue_.Trim(std::min(mdat_tail_, offset));
902  return !err;
903 }
904 
905 void MP4MediaParser::ChangeState(State new_state) {
906  DVLOG(2) << "Changing state: " << new_state;
907  state_ = new_state;
908 }
909 
910 } // namespace mp4
911 } // namespace media
912 } // namespace shaka
DecryptorSource wraps KeySource and is responsible for decryptor management.
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:51
base::Callback< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
Definition: media_parser.h:53
base::Callback< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
Definition: media_parser.h:44
base::Callback< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
Definition: media_parser.h:35
All the methods that are virtual are virtual for mocking.