DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/strings/string_number_conversions.h"
12 #include "packager/media/base/aes_decryptor.h"
13 #include "packager/media/base/audio_stream_info.h"
14 #include "packager/media/base/key_source.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/base/status.h"
17 #include "packager/media/base/video_stream_info.h"
18 #include "packager/media/codecs/aac_audio_specific_config.h"
19 #include "packager/media/codecs/avc_decoder_configuration_record.h"
20 #include "packager/media/codecs/es_descriptor.h"
21 #include "packager/media/formats/mp2t/adts_header.h"
22 
23 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
24  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
25  && (x != 0xFF))
26 
27 namespace {
28 const uint32_t kMpeg2ClockRate = 90000;
29 const uint32_t kPesOptPts = 0x80;
30 const uint32_t kPesOptDts = 0x40;
31 const uint32_t kPesOptAlign = 0x04;
32 const uint32_t kPsmStreamId = 0xBC;
33 const uint32_t kPaddingStreamId = 0xBE;
34 const uint32_t kIndexMagic = 0x49444d69;
35 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
36 const uint32_t kIndexVersion4HeaderSize = 12;
37 const uint32_t kEcmStreamId = 0xF0;
38 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
39 const uint32_t kScramblingBitsMask = 0x30;
40 const uint32_t kStartCode1 = 0x00;
41 const uint32_t kStartCode2 = 0x00;
42 const uint32_t kStartCode3 = 0x01;
43 const uint32_t kStartCode4Pack = 0xBA;
44 const uint32_t kStartCode4System = 0xBB;
45 const uint32_t kStartCode4ProgramEnd = 0xB9;
46 const uint32_t kPesStreamIdVideoMask = 0xF0;
47 const uint32_t kPesStreamIdVideo = 0xE0;
48 const uint32_t kPesStreamIdAudioMask = 0xE0;
49 const uint32_t kPesStreamIdAudio = 0xC0;
50 const uint32_t kVersion4 = 4;
51 const size_t kAdtsHeaderMinSize = 7;
52 const uint8_t kAacSampleSizeBits = 16;
53 // Applies to all video streams.
54 const uint8_t kNaluLengthSize = 4; // unit is bytes.
55 // Placeholder sampling frequency for all audio streams, which
56 // will be overwritten after filter parsing.
57 const uint32_t kDefaultSamplingFrequency = 100;
58 const uint16_t kEcmSizeBytes = 80;
59 const uint32_t kInitializationVectorSizeBytes = 16;
60 // ECM fields for processing.
61 const uint32_t kEcmContentKeySizeBytes = 16;
62 const uint32_t kEcmDCPFlagsSizeBytes = 3;
63 const uint32_t kEcmCCIFlagsSizeBytes = 1;
64 const uint32_t kEcmFlagsSizeBytes =
65  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
66 const uint32_t kEcmPaddingSizeBytes = 12;
67 const uint32_t kAssetKeySizeBytes = 16;
68 // Default audio and video PES stream IDs.
69 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
70 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
71 
72 enum Type {
73  Type_void = 0,
74  Type_uint8 = 1,
75  Type_int8 = 2,
76  Type_uint16 = 3,
77  Type_int16 = 4,
78  Type_uint32 = 5,
79  Type_int32 = 6,
80  Type_uint64 = 7,
81  Type_int64 = 8,
82  Type_string = 9,
83  Type_BinaryData = 10
84 };
85 } // namespace
86 
87 namespace shaka {
88 namespace media {
89 namespace wvm {
90 
91 WvmMediaParser::WvmMediaParser()
92  : is_initialized_(false),
93  parse_state_(StartCode1),
94  skip_bytes_(0),
95  metadata_is_complete_(false),
96  current_program_id_(0),
97  pes_stream_id_(0),
98  prev_pes_stream_id_(0),
99  pes_packet_bytes_(0),
100  pes_flags_1_(0),
101  pes_flags_2_(0),
102  prev_pes_flags_1_(0),
103  pes_header_data_bytes_(0),
104  timestamp_(0),
105  pts_(0),
106  dts_(0),
107  index_program_id_(0),
108  media_sample_(NULL),
109  crypto_unit_start_pos_(0),
110  stream_id_count_(0),
111  decryption_key_source_(NULL) {
112 }
113 
114 WvmMediaParser::~WvmMediaParser() {}
115 
116 void WvmMediaParser::Init(const InitCB& init_cb,
117  const NewSampleCB& new_sample_cb,
118  KeySource* decryption_key_source) {
119  DCHECK(!is_initialized_);
120  DCHECK(!init_cb.is_null());
121  DCHECK(!new_sample_cb.is_null());
122  decryption_key_source_ = decryption_key_source;
123  init_cb_ = init_cb;
124  new_sample_cb_ = new_sample_cb;
125 }
126 
127 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
128  size_t num_bytes = 0;
129  size_t prev_size = 0;
130  const uint8_t* read_ptr = buf;
131  const uint8_t* end = read_ptr + size;
132 
133  while (read_ptr < end) {
134  switch (parse_state_) {
135  case StartCode1:
136  if (*read_ptr == kStartCode1) {
137  parse_state_ = StartCode2;
138  }
139  break;
140  case StartCode2:
141  if (*read_ptr == kStartCode2) {
142  parse_state_ = StartCode3;
143  } else {
144  parse_state_ = StartCode1;
145  }
146  break;
147  case StartCode3:
148  if (*read_ptr == kStartCode3) {
149  parse_state_ = StartCode4;
150  } else {
151  parse_state_ = StartCode1;
152  }
153  break;
154  case StartCode4:
155  switch (*read_ptr) {
156  case kStartCode4Pack:
157  parse_state_ = PackHeader1;
158  break;
159  case kStartCode4System:
160  parse_state_ = SystemHeader1;
161  break;
162  case kStartCode4ProgramEnd:
163  parse_state_ = ProgramEnd;
164  continue;
165  default:
166  parse_state_ = PesStreamId;
167  continue;
168  }
169  break;
170  case PackHeader1:
171  parse_state_ = PackHeader2;
172  break;
173  case PackHeader2:
174  parse_state_ = PackHeader3;
175  break;
176  case PackHeader3:
177  parse_state_ = PackHeader4;
178  break;
179  case PackHeader4:
180  parse_state_ = PackHeader5;
181  break;
182  case PackHeader5:
183  parse_state_ = PackHeader6;
184  break;
185  case PackHeader6:
186  parse_state_ = PackHeader7;
187  break;
188  case PackHeader7:
189  parse_state_ = PackHeader8;
190  break;
191  case PackHeader8:
192  parse_state_ = PackHeader9;
193  break;
194  case PackHeader9:
195  parse_state_ = PackHeader10;
196  break;
197  case PackHeader10:
198  skip_bytes_ = *read_ptr & 0x07;
199  parse_state_ = PackHeaderStuffingSkip;
200  break;
201  case SystemHeader1:
202  skip_bytes_ = *read_ptr;
203  skip_bytes_ <<= 8;
204  parse_state_ = SystemHeader2;
205  break;
206  case SystemHeader2:
207  skip_bytes_ |= *read_ptr;
208  parse_state_ = SystemHeaderSkip;
209  break;
210  case PackHeaderStuffingSkip:
211  if ((end - read_ptr) >= skip_bytes_) {
212  read_ptr += skip_bytes_;
213  skip_bytes_ = 0;
214  parse_state_ = StartCode1;
215  } else {
216  skip_bytes_ -= (end - read_ptr);
217  read_ptr = end;
218  }
219  continue;
220  case SystemHeaderSkip:
221  if ((end - read_ptr) >= skip_bytes_) {
222  read_ptr += skip_bytes_;
223  skip_bytes_ = 0;
224  parse_state_ = StartCode1;
225  } else {
226  uint32_t remaining_size = end - read_ptr;
227  skip_bytes_ -= remaining_size;
228  read_ptr = end;
229  }
230  continue;
231  case PesStreamId:
232  pes_stream_id_ = *read_ptr;
233  if (!metadata_is_complete_ &&
234  (pes_stream_id_ != kPsmStreamId) &&
235  (pes_stream_id_ != kIndexStreamId) &&
236  (pes_stream_id_ != kEcmStreamId) &&
237  (pes_stream_id_ != kV2MetadataStreamId) &&
238  (pes_stream_id_ != kPaddingStreamId)) {
239  metadata_is_complete_ = true;
240  }
241  parse_state_ = PesPacketLength1;
242  break;
243  case PesPacketLength1:
244  pes_packet_bytes_ = *read_ptr;
245  pes_packet_bytes_ <<= 8;
246  parse_state_ = PesPacketLength2;
247  break;
248  case PesPacketLength2:
249  pes_packet_bytes_ |= *read_ptr;
250  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
251  parse_state_ = PesExtension1;
252  } else {
253  pes_flags_1_ = pes_flags_2_ = 0;
254  pes_header_data_bytes_ = 0;
255  parse_state_ = PesPayload;
256  }
257  break;
258  case PesExtension1:
259  prev_pes_flags_1_ = pes_flags_1_;
260  pes_flags_1_ = *read_ptr;
261  --pes_packet_bytes_;
262  parse_state_ = PesExtension2;
263  break;
264  case PesExtension2:
265  pes_flags_2_ = *read_ptr;
266  --pes_packet_bytes_;
267  parse_state_ = PesExtension3;
268  break;
269  case PesExtension3:
270  pes_header_data_bytes_ = *read_ptr;
271  --pes_packet_bytes_;
272  if (pes_flags_2_ & kPesOptPts) {
273  parse_state_ = Pts1;
274  } else {
275  parse_state_ = PesHeaderData;
276  }
277  break;
278  case Pts1:
279  timestamp_ = (*read_ptr & 0x0E);
280  --pes_header_data_bytes_;
281  --pes_packet_bytes_;
282  parse_state_ = Pts2;
283  break;
284  case Pts2:
285  timestamp_ <<= 7;
286  timestamp_ |= *read_ptr;
287  --pes_header_data_bytes_;
288  --pes_packet_bytes_;
289  parse_state_ = Pts3;
290  break;
291  case Pts3:
292  timestamp_ <<= 7;
293  timestamp_ |= *read_ptr >> 1;
294  --pes_header_data_bytes_;
295  --pes_packet_bytes_;
296  parse_state_ = Pts4;
297  break;
298  case Pts4:
299  timestamp_ <<= 8;
300  timestamp_ |= *read_ptr;
301  --pes_header_data_bytes_;
302  --pes_packet_bytes_;
303  parse_state_ = Pts5;
304  break;
305  case Pts5:
306  timestamp_ <<= 7;
307  timestamp_ |= *read_ptr >> 1;
308  pts_ = timestamp_;
309  --pes_header_data_bytes_;
310  --pes_packet_bytes_;
311  if (pes_flags_2_ & kPesOptDts) {
312  parse_state_ = Dts1;
313  } else {
314  dts_ = pts_;
315  parse_state_ = PesHeaderData;
316  }
317  break;
318  case Dts1:
319  timestamp_ = (*read_ptr & 0x0E);
320  --pes_header_data_bytes_;
321  --pes_packet_bytes_;
322  parse_state_ = Dts2;
323  break;
324  case Dts2:
325  timestamp_ <<= 7;
326  timestamp_ |= *read_ptr;
327  --pes_header_data_bytes_;
328  --pes_packet_bytes_;
329  parse_state_ = Dts3;
330  break;
331  case Dts3:
332  timestamp_ <<= 7;
333  timestamp_ |= *read_ptr >> 1;
334  --pes_header_data_bytes_;
335  --pes_packet_bytes_;
336  parse_state_ = Dts4;
337  break;
338  case Dts4:
339  timestamp_ <<= 8;
340  timestamp_ |= *read_ptr;
341  --pes_header_data_bytes_;
342  --pes_packet_bytes_;
343  parse_state_ = Dts5;
344  break;
345  case Dts5:
346  timestamp_ <<= 7;
347  timestamp_ |= *read_ptr >> 1;
348  dts_ = timestamp_;
349  --pes_header_data_bytes_;
350  --pes_packet_bytes_;
351  parse_state_ = PesHeaderData;
352  break;
353  case PesHeaderData:
354  num_bytes = end - read_ptr;
355  if (num_bytes >= pes_header_data_bytes_) {
356  num_bytes = pes_header_data_bytes_;
357  parse_state_ = PesPayload;
358  }
359  pes_header_data_bytes_ -= num_bytes;
360  pes_packet_bytes_ -= num_bytes;
361  read_ptr += num_bytes;
362  continue;
363  case PesPayload:
364  switch (pes_stream_id_) {
365  case kPsmStreamId:
366  psm_data_.clear();
367  parse_state_ = PsmPayload;
368  continue;
369  case kPaddingStreamId:
370  parse_state_ = Padding;
371  continue;
372  case kEcmStreamId:
373  ecm_.clear();
374  parse_state_ = EcmPayload;
375  continue;
376  case kIndexStreamId:
377  parse_state_ = IndexPayload;
378  continue;
379  default:
380  if (!DemuxNextPes(false)) {
381  return false;
382  }
383  parse_state_ = EsPayload;
384  }
385  continue;
386  case PsmPayload:
387  num_bytes = end - read_ptr;
388  if (num_bytes >= pes_packet_bytes_) {
389  num_bytes = pes_packet_bytes_;
390  parse_state_ = StartCode1;
391  }
392  if (num_bytes > 0) {
393  pes_packet_bytes_ -= num_bytes;
394  prev_size = psm_data_.size();
395  psm_data_.resize(prev_size + num_bytes);
396  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
397  }
398  read_ptr += num_bytes;
399  continue;
400  case EcmPayload:
401  num_bytes = end - read_ptr;
402  if (num_bytes >= pes_packet_bytes_) {
403  num_bytes = pes_packet_bytes_;
404  parse_state_ = StartCode1;
405  }
406  if (num_bytes > 0) {
407  pes_packet_bytes_ -= num_bytes;
408  prev_size = ecm_.size();
409  ecm_.resize(prev_size + num_bytes);
410  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
411  }
412  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
413  if (!ProcessEcm()) {
414  return(false);
415  }
416  }
417  read_ptr += num_bytes;
418  continue;
419  case IndexPayload:
420  num_bytes = end - read_ptr;
421  if (num_bytes >= pes_packet_bytes_) {
422  num_bytes = pes_packet_bytes_;
423  parse_state_ = StartCode1;
424  }
425  if (num_bytes > 0) {
426  pes_packet_bytes_ -= num_bytes;
427  prev_size = index_data_.size();
428  index_data_.resize(prev_size + num_bytes);
429  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
430  }
431  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
432  if (!metadata_is_complete_) {
433  if (!ParseIndexEntry()) {
434  return false;
435  }
436  }
437  }
438  read_ptr += num_bytes;
439  continue;
440  case EsPayload:
441  num_bytes = end - read_ptr;
442  if (num_bytes >= pes_packet_bytes_) {
443  num_bytes = pes_packet_bytes_;
444  parse_state_ = StartCode1;
445  }
446  pes_packet_bytes_ -= num_bytes;
447  if (pes_stream_id_ != kV2MetadataStreamId) {
448  sample_data_.resize(sample_data_.size() + num_bytes);
449  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
450  num_bytes);
451  }
452  prev_pes_stream_id_ = pes_stream_id_;
453  read_ptr += num_bytes;
454  continue;
455  case Padding:
456  num_bytes = end - read_ptr;
457  if (num_bytes >= pes_packet_bytes_) {
458  num_bytes = pes_packet_bytes_;
459  parse_state_ = StartCode1;
460  }
461  pes_packet_bytes_ -= num_bytes;
462  read_ptr += num_bytes;
463  continue;
464  case ProgramEnd:
465  parse_state_ = StartCode1;
466  metadata_is_complete_ = true;
467  if (!DemuxNextPes(true)) {
468  return false;
469  }
470  if (!Flush()) {
471  return false;
472  }
473  // Reset.
474  dts_ = pts_ = 0;
475  parse_state_ = StartCode1;
476  prev_media_sample_data_.Reset();
477  current_program_id_++;
478  ecm_.clear();
479  index_data_.clear();
480  psm_data_.clear();
481  break;
482  default:
483  break;
484  }
485  ++read_ptr;
486  }
487  return true;
488 }
489 
490 bool WvmMediaParser::EmitLastSample(uint32_t stream_id,
491  scoped_refptr<MediaSample>& new_sample) {
492  std::string key = base::UintToString(current_program_id_)
493  .append(":")
494  .append(base::UintToString(stream_id));
495  std::map<std::string, uint32_t>::iterator it =
496  program_demux_stream_map_.find(key);
497  if (it == program_demux_stream_map_.end())
498  return false;
499  return EmitSample(stream_id, (*it).second, new_sample, true);
500 }
501 
502 bool WvmMediaParser::EmitPendingSamples() {
503  // Emit queued samples which were built when not initialized.
504  while (!media_sample_queue_.empty()) {
505  DemuxStreamIdMediaSample& demux_stream_media_sample =
506  media_sample_queue_.front();
507  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
508  demux_stream_media_sample.demux_stream_id,
509  demux_stream_media_sample.media_sample,
510  false)) {
511  return false;
512  }
513  media_sample_queue_.pop_front();
514  }
515  return true;
516 }
517 
518 bool WvmMediaParser::Flush() {
519  // Flush the last audio and video sample for current program.
520  // Reset the streamID when successfully emitted.
521  if (prev_media_sample_data_.audio_sample != NULL) {
522  if (!EmitLastSample(prev_pes_stream_id_,
523  prev_media_sample_data_.audio_sample)) {
524  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
525  << prev_pes_stream_id_;
526  return false;
527  }
528  }
529  if (prev_media_sample_data_.video_sample != NULL) {
530  if (!EmitLastSample(prev_pes_stream_id_,
531  prev_media_sample_data_.video_sample)) {
532  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
533  << prev_pes_stream_id_;
534  return false;
535  }
536  }
537  return true;
538 }
539 
540 bool WvmMediaParser::ParseIndexEntry() {
541  // Do not parse index entry at the beginning of any track *after* the first
542  // track.
543  if (current_program_id_ > 0) {
544  return true;
545  }
546  uint32_t index_size = 0;
547  if (index_data_.size() < kIndexVersion4HeaderSize) {
548  return false;
549  }
550 
551  const uint8_t* read_ptr = index_data_.data();
552  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
553  index_data_.clear();
554  return false;
555  }
556  read_ptr += 4;
557 
558  uint32_t version = ntohlFromBuffer(read_ptr);
559  read_ptr += 4;
560  if (version == kVersion4) {
561  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
562  if (index_data_.size() < index_size) {
563  // We do not yet have the full index. Keep accumulating index data.
564  return true;
565  }
566  read_ptr += sizeof(uint32_t);
567 
568  // Index metadata
569  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
570  if (index_metadata_max_size < sizeof(uint8_t)) {
571  index_data_.clear();
572  return false;
573  }
574 
575  uint64_t track_duration = 0;
576  int16_t trick_play_rate = 0;
577  uint32_t sampling_frequency = kDefaultSamplingFrequency;
578  uint32_t time_scale = kMpeg2ClockRate;
579  uint16_t video_width = 0;
580  uint16_t video_height = 0;
581  uint32_t pixel_width = 0;
582  uint32_t pixel_height = 0;
583  uint8_t nalu_length_size = kNaluLengthSize;
584  uint8_t num_channels = 0;
585  int audio_pes_stream_id = 0;
586  int video_pes_stream_id = 0;
587  bool has_video = false;
588  bool has_audio = false;
589  std::vector<uint8_t> audio_codec_config;
590  std::vector<uint8_t> video_codec_config;
591  uint8_t num_index_entries = *read_ptr;
592  ++read_ptr;
593  --index_metadata_max_size;
594 
595  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
596  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
597  return false;
598  }
599  uint8_t tag = *read_ptr;
600  ++read_ptr;
601  uint8_t type = *read_ptr;
602  ++read_ptr;
603  uint32_t length = ntohlFromBuffer(read_ptr);
604  read_ptr += sizeof(uint32_t);
605  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
606  if (index_metadata_max_size < length) {
607  return false;
608  }
609  int64_t value = 0;
610  Tag tagtype = Unset;
611  std::vector<uint8_t> binary_data;
612  switch (Type(type)) {
613  case Type_uint8:
614  if (length == sizeof(uint8_t)) {
615  tagtype = GetTag(tag, length, read_ptr, &value);
616  } else {
617  return false;
618  }
619  break;
620  case Type_int8:
621  if (length == sizeof(int8_t)) {
622  tagtype = GetTag(tag, length, read_ptr, &value);
623  } else {
624  return false;
625  }
626  break;
627  case Type_uint16:
628  if (length == sizeof(uint16_t)) {
629  tagtype = GetTag(tag, length, read_ptr, &value);
630  } else {
631  return false;
632  }
633  break;
634  case Type_int16:
635  if (length == sizeof(int16_t)) {
636  tagtype = GetTag(tag, length, read_ptr, &value);
637  } else {
638  return false;
639  }
640  break;
641  case Type_uint32:
642  if (length == sizeof(uint32_t)) {
643  tagtype = GetTag(tag, length, read_ptr, &value);
644  } else {
645  return false;
646  }
647  break;
648  case Type_int32:
649  if (length == sizeof(int32_t)) {
650  tagtype = GetTag(tag, length, read_ptr, &value);
651  } else {
652  return false;
653  }
654  break;
655  case Type_uint64:
656  if (length == sizeof(uint64_t)) {
657  tagtype = GetTag(tag, length, read_ptr, &value);
658  } else {
659  return false;
660  }
661  break;
662  case Type_int64:
663  if (length == sizeof(int64_t)) {
664  tagtype = GetTag(tag, length, read_ptr, &value);
665  } else {
666  return false;
667  }
668  break;
669  case Type_string:
670  case Type_BinaryData:
671  binary_data.assign(read_ptr, read_ptr + length);
672  tagtype = Tag(tag);
673  break;
674  default:
675  break;
676  }
677 
678  switch (tagtype) {
679  case TrackDuration:
680  track_duration = value;
681  break;
682  case TrackTrickPlayRate:
683  trick_play_rate = value;
684  break;
685  case VideoStreamId:
686  video_pes_stream_id = value;
687  break;
688  case AudioStreamId:
689  audio_pes_stream_id = value;
690  break;
691  case VideoWidth:
692  video_width = (uint16_t)value;
693  break;
694  case VideoHeight:
695  video_height = (uint16_t)value;
696  break;
697  case AudioNumChannels:
698  num_channels = (uint8_t)value;
699  break;
700  case VideoType:
701  has_video = true;
702  break;
703  case AudioType:
704  has_audio = true;
705  break;
706  case VideoPixelWidth:
707  pixel_width = static_cast<uint32_t>(value);
708  break;
709  case VideoPixelHeight:
710  pixel_height = static_cast<uint32_t>(value);
711  break;
712  case Audio_EsDescriptor: {
713  ESDescriptor descriptor;
714  if (!descriptor.Parse(binary_data)) {
715  LOG(ERROR) <<
716  "Could not extract AudioSpecificConfig from ES_Descriptor";
717  return false;
718  }
719  audio_codec_config = descriptor.decoder_specific_info();
720  break;
721  }
722  case Audio_EC3SpecificData:
723  case Audio_DtsSpecificData:
724  case Audio_AC3SpecificData:
725  LOG(ERROR) << "Audio type not supported.";
726  return false;
727  case Video_AVCDecoderConfigurationRecord:
728  video_codec_config = binary_data;
729  break;
730  default:
731  break;
732  }
733 
734  read_ptr += length;
735  index_metadata_max_size -= length;
736  }
737  // End Index metadata
738  index_size = read_ptr - index_data_.data();
739 
740  if (has_video) {
741  Codec video_codec = kCodecH264;
742  stream_infos_.push_back(new VideoStreamInfo(
743  stream_id_count_, time_scale, track_duration, video_codec,
744  std::string(), video_codec_config.data(), video_codec_config.size(),
745  video_width, video_height, pixel_width, pixel_height, trick_play_rate,
746  nalu_length_size, std::string(), true));
747  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
748  base::UintToString(
749  video_pes_stream_id
750  ? video_pes_stream_id
751  : kDefaultVideoStreamId)] =
752  stream_id_count_++;
753  }
754  if (has_audio) {
755  const Codec audio_codec = kCodecAAC;
756  // TODO(beil): Pass in max and average bitrate in wvm container.
757  stream_infos_.push_back(new AudioStreamInfo(
758  stream_id_count_, time_scale, track_duration, audio_codec,
759  std::string(), audio_codec_config.data(), audio_codec_config.size(),
760  kAacSampleSizeBits, num_channels, sampling_frequency,
761  0 /* seek preroll */, 0 /* codec delay */, 0 /* max bitrate */,
762  0 /* avg bitrate */, std::string(), true));
763  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
764  base::UintToString(
765  audio_pes_stream_id
766  ? audio_pes_stream_id
767  : kDefaultAudioStreamId)] =
768  stream_id_count_++;
769  }
770  }
771 
772  index_program_id_++;
773  index_data_.clear();
774  return true;
775 }
776 
777 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
778  bool output_encrypted_sample = false;
779  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
780  // Decrypt crypto unit.
781  if (!content_decryptor_) {
782  output_encrypted_sample = true;
783  } else {
784  content_decryptor_->Crypt(&sample_data_[crypto_unit_start_pos_],
785  sample_data_.size() - crypto_unit_start_pos_,
786  &sample_data_[crypto_unit_start_pos_]);
787  }
788  }
789  // Demux media sample if we are at program end or if we are not at a
790  // continuation PES.
791  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
792  if (!sample_data_.empty()) {
793  if (!Output(output_encrypted_sample)) {
794  return false;
795  }
796  }
797  StartMediaSampleDemux();
798  }
799 
800  crypto_unit_start_pos_ = sample_data_.size();
801  return true;
802 }
803 
804 void WvmMediaParser::StartMediaSampleDemux() {
805  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
806  media_sample_ = MediaSample::CreateEmptyMediaSample();
807  media_sample_->set_dts(dts_);
808  media_sample_->set_pts(pts_);
809  media_sample_->set_is_key_frame(is_key_frame);
810 
811  sample_data_.clear();
812 }
813 
814 bool WvmMediaParser::Output(bool output_encrypted_sample) {
815  if (output_encrypted_sample) {
816  media_sample_->set_data(sample_data_.data(), sample_data_.size());
817  media_sample_->set_is_encrypted(true);
818  } else {
819  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
820  // Convert video stream to unit stream and get config.
821  std::vector<uint8_t> nal_unit_stream;
822  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
823  sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
824  LOG(ERROR) << "Could not convert h.264 byte stream sample";
825  return false;
826  }
827  media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
828  if (!is_initialized_) {
829  // Set extra data for video stream from AVC Decoder Config Record.
830  // Also, set codec string from the AVC Decoder Config Record.
831  std::vector<uint8_t> decoder_config_record;
832  byte_to_unit_stream_converter_.GetDecoderConfigurationRecord(
833  &decoder_config_record);
834  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
835  if (stream_infos_[i]->stream_type() == kStreamVideo &&
836  stream_infos_[i]->codec_string().empty()) {
837  const std::vector<uint8_t>* stream_config;
838  if (stream_infos_[i]->codec_config().empty()) {
839  // Decoder config record not available for stream. Use the one
840  // computed from the first video stream.
841  stream_infos_[i]->set_codec_config(decoder_config_record);
842  stream_config = &decoder_config_record;
843  } else {
844  // Use stream-specific config record.
845  stream_config = &stream_infos_[i]->codec_config();
846  }
847  DCHECK(stream_config);
848 
849  VideoStreamInfo* video_stream_info =
850  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
851  AVCDecoderConfigurationRecord avc_config;
852  if (!avc_config.Parse(*stream_config)) {
853  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
854  "Using computed configuration record instead.";
855  video_stream_info->set_codec_config(decoder_config_record);
856  if (!avc_config.Parse(decoder_config_record)) {
857  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
858  return false;
859  }
860  }
861  video_stream_info->set_codec_string(avc_config.GetCodecString());
862 
863  if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
864  avc_config.pixel_height() !=
865  video_stream_info->pixel_height()) {
866  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
867  video_stream_info->pixel_height() != 0)
868  << "Pixel aspect ratio in WVM metadata ("
869  << video_stream_info->pixel_width() << ","
870  << video_stream_info->pixel_height()
871  << ") does not match with SAR in "
872  "AVCDecoderConfigurationRecord ("
873  << avc_config.pixel_width() << ","
874  << avc_config.pixel_height()
875  << "). Use AVCDecoderConfigurationRecord.";
876  video_stream_info->set_pixel_width(avc_config.pixel_width());
877  video_stream_info->set_pixel_height(avc_config.pixel_height());
878  }
879  if (avc_config.coded_width() != video_stream_info->width() ||
880  avc_config.coded_height() != video_stream_info->height()) {
881  LOG(WARNING) << "Resolution in WVM metadata ("
882  << video_stream_info->width() << ","
883  << video_stream_info->height()
884  << ") does not match with resolution in "
885  "AVCDecoderConfigurationRecord ("
886  << avc_config.coded_width() << ","
887  << avc_config.coded_height()
888  << "). Use AVCDecoderConfigurationRecord.";
889  video_stream_info->set_width(avc_config.coded_width());
890  video_stream_info->set_height(avc_config.coded_height());
891  }
892  }
893  }
894  }
895  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
896  kPesStreamIdAudio) {
897  // Set data on the audio stream.
898  int frame_size = static_cast<int>(mp2t::AdtsHeader::GetAdtsFrameSize(
899  sample_data_.data(), kAdtsHeaderMinSize));
900  mp2t::AdtsHeader adts_header;
901  const uint8_t* frame_ptr = sample_data_.data();
902  if (!adts_header.Parse(frame_ptr, frame_size)) {
903  LOG(ERROR) << "Could not parse ADTS header";
904  return false;
905  }
906  size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
907  frame_size);
908  media_sample_->set_data(frame_ptr + header_size,
909  frame_size - header_size);
910  if (!is_initialized_) {
911  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
912  if (stream_infos_[i]->stream_type() == kStreamAudio &&
913  stream_infos_[i]->codec_string().empty()) {
914  AudioStreamInfo* audio_stream_info =
915  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
916  if (audio_stream_info->codec_config().empty()) {
917  // Set AudioStreamInfo fields using information from the ADTS
918  // header.
919  audio_stream_info->set_sampling_frequency(
920  adts_header.GetSamplingFrequency());
921  std::vector<uint8_t> audio_specific_config;
922  if (!adts_header.GetAudioSpecificConfig(&audio_specific_config)) {
923  LOG(ERROR) << "Could not compute AACaudiospecificconfig";
924  return false;
925  }
926  audio_stream_info->set_codec_config(audio_specific_config);
927  audio_stream_info->set_codec_string(
929  kCodecAAC, adts_header.GetObjectType()));
930  } else {
931  // Set AudioStreamInfo fields using information from the
932  // AACAudioSpecificConfig record.
933  AACAudioSpecificConfig aac_config;
934  if (!aac_config.Parse(stream_infos_[i]->codec_config())) {
935  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
936  return false;
937  }
938  audio_stream_info->set_sampling_frequency(aac_config.frequency());
939  audio_stream_info->set_codec_string(
941  kCodecAAC, aac_config.audio_object_type()));
942  }
943  }
944  }
945  }
946  }
947  }
948 
949  if (!is_initialized_) {
950  bool all_streams_have_config = true;
951  // Check if all collected stream infos have codec_config set.
952  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
953  if (stream_infos_[i]->codec_string().empty()) {
954  all_streams_have_config = false;
955  break;
956  }
957  }
958  if (all_streams_have_config) {
959  init_cb_.Run(stream_infos_);
960  is_initialized_ = true;
961  }
962  }
963 
964  DCHECK_GT(media_sample_->data_size(), 0UL);
965  std::string key = base::UintToString(current_program_id_).append(":")
966  .append(base::UintToString(prev_pes_stream_id_));
967  std::map<std::string, uint32_t>::iterator it =
968  program_demux_stream_map_.find(key);
969  if (it == program_demux_stream_map_.end()) {
970  // TODO(ramjic): Log error message here and in other error cases through
971  // this method.
972  return false;
973  }
974  DemuxStreamIdMediaSample demux_stream_media_sample;
975  demux_stream_media_sample.parsed_audio_or_video_stream_id =
976  prev_pes_stream_id_;
977  demux_stream_media_sample.demux_stream_id = (*it).second;
978  demux_stream_media_sample.media_sample = media_sample_;
979  // Check if sample can be emitted.
980  if (!is_initialized_) {
981  media_sample_queue_.push_back(demux_stream_media_sample);
982  } else {
983  // flush the sample queue and emit all queued samples.
984  while (!media_sample_queue_.empty()) {
985  if (!EmitPendingSamples())
986  return false;
987  }
988  // Emit current sample.
989  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
990  return false;
991  }
992  return true;
993 }
994 
995 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
996  uint32_t stream_id,
997  scoped_refptr<MediaSample>& new_sample,
998  bool isLastSample) {
999  DCHECK(new_sample);
1000  if (isLastSample) {
1001  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1002  kPesStreamIdVideo) {
1003  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1004  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1005  kPesStreamIdAudio) {
1006  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1007  }
1008  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1009  LOG(ERROR) << "Failed to process the last sample.";
1010  return false;
1011  }
1012  return true;
1013  }
1014 
1015  // Cannot emit current sample. Compute duration first and then,
1016  // emit previous sample.
1017  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1018  kPesStreamIdVideo) {
1019  if (prev_media_sample_data_.video_sample == NULL) {
1020  prev_media_sample_data_.video_sample = new_sample;
1021  prev_media_sample_data_.video_stream_id = stream_id;
1022  return true;
1023  }
1024  prev_media_sample_data_.video_sample->set_duration(
1025  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1026  prev_media_sample_data_.video_sample_duration =
1027  prev_media_sample_data_.video_sample->duration();
1028  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1029  prev_media_sample_data_.video_sample)) {
1030  LOG(ERROR) << "Failed to process the video sample.";
1031  return false;
1032  }
1033  prev_media_sample_data_.video_sample = new_sample;
1034  prev_media_sample_data_.video_stream_id = stream_id;
1035  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1036  kPesStreamIdAudio) {
1037  if (prev_media_sample_data_.audio_sample == NULL) {
1038  prev_media_sample_data_.audio_sample = new_sample;
1039  prev_media_sample_data_.audio_stream_id = stream_id;
1040  return true;
1041  }
1042  prev_media_sample_data_.audio_sample->set_duration(
1043  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1044  prev_media_sample_data_.audio_sample_duration =
1045  prev_media_sample_data_.audio_sample->duration();
1046  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1047  prev_media_sample_data_.audio_sample)) {
1048  LOG(ERROR) << "Failed to process the audio sample.";
1049  return false;
1050  }
1051  prev_media_sample_data_.audio_sample = new_sample;
1052  prev_media_sample_data_.audio_stream_id = stream_id;
1053  }
1054  return true;
1055 }
1056 
1057 bool WvmMediaParser::GetAssetKey(const uint32_t asset_id,
1058  EncryptionKey* encryption_key) {
1059  DCHECK(decryption_key_source_);
1060  Status status = decryption_key_source_->FetchKeys(asset_id);
1061  if (!status.ok()) {
1062  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1063  << ", error = " << status;
1064  return false;
1065  }
1066 
1067  status = decryption_key_source_->GetKey(KeySource::TRACK_TYPE_HD,
1068  encryption_key);
1069  if (!status.ok()) {
1070  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1071  << ", error = " << status;
1072  return false;
1073  }
1074 
1075  return true;
1076 }
1077 
1078 bool WvmMediaParser::ProcessEcm() {
1079  // An error will be returned later if the samples need to be decrypted.
1080  if (!decryption_key_source_)
1081  return true;
1082 
1083  if (current_program_id_ > 0) {
1084  return true;
1085  }
1086  if (ecm_.size() != kEcmSizeBytes) {
1087  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1088  << ", expected size = " << kEcmSizeBytes;
1089  return false;
1090  }
1091  const uint8_t* ecm_data = ecm_.data();
1092  DCHECK(ecm_data);
1093  ecm_data += sizeof(uint32_t); // old version field - skip.
1094  ecm_data += sizeof(uint32_t); // clear lead - skip.
1095  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1096  uint32_t asset_id = ntohlFromBuffer(ecm_data);
1097  if (asset_id == 0) {
1098  LOG(ERROR) << "AssetID in ECM is not valid.";
1099  return false;
1100  }
1101  ecm_data += sizeof(uint32_t); // asset_id.
1102  EncryptionKey encryption_key;
1103  if (!GetAssetKey(asset_id, &encryption_key)) {
1104  return false;
1105  }
1106  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1107  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1108  << " for AssetID = " << asset_id
1109  << " is less than minimum asset key size.";
1110  return false;
1111  }
1112  // Legacy WVM content may have asset keys > 16 bytes.
1113  // Use only the first 16 bytes of the asset key to get
1114  // the content key.
1115  std::vector<uint8_t> asset_key(
1116  encryption_key.key.begin(),
1117  encryption_key.key.begin() + kAssetKeySizeBytes);
1118  // WVM format always uses all zero IV.
1119  std::vector<uint8_t> zero_iv(kInitializationVectorSizeBytes, 0);
1120  AesCbcDecryptor asset_decryptor(kCtsPadding, AesCryptor::kUseConstantIv);
1121  if (!asset_decryptor.InitializeWithIv(asset_key, zero_iv)) {
1122  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1123  return false;
1124  }
1125 
1126  const size_t content_key_buffer_size =
1127  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1128  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1129  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1130  CHECK(asset_decryptor.Crypt(ecm_data, content_key_buffer_size,
1131  content_key_buffer.data()));
1132 
1133  std::vector<uint8_t> decrypted_content_key_vec(
1134  content_key_buffer.begin() + 4,
1135  content_key_buffer.begin() + 20);
1136  std::unique_ptr<AesCbcDecryptor> content_decryptor(
1137  new AesCbcDecryptor(kCtsPadding, AesCryptor::kUseConstantIv));
1138  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec,
1139  zero_iv)) {
1140  LOG(ERROR) << "Failed to initialize content decryptor.";
1141  return false;
1142  }
1143 
1144  content_decryptor_ = std::move(content_decryptor);
1145  return true;
1146 }
1147 
1148 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1149  demux_stream_id(0),
1150  parsed_audio_or_video_stream_id(0) {}
1151 
1152 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1153 
1154 PrevSampleData::PrevSampleData() {
1155  Reset();
1156 }
1157 
1158 PrevSampleData::~PrevSampleData() {}
1159 
1160 void PrevSampleData::Reset() {
1161  audio_sample = NULL;
1162  video_sample = NULL;
1163  audio_stream_id = 0;
1164  video_stream_id = 0;
1165  audio_sample_duration = 0;
1166  video_sample_duration = 0;
1167 }
1168 
1169 } // namespace wvm
1170 } // namespace media
1171 } // namespace shaka
static size_t GetAdtsFrameSize(const uint8_t *data, size_t num_bytes)
Definition: adts_header.cc:23
static std::string GetCodecString(Codec codec, uint8_t audio_object_type)
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:30
static scoped_refptr< MediaSample > CreateEmptyMediaSample()
Create a MediaSample object with default members.
Definition: media_sample.cc:74