Shaka Packager SDK
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/strings/string_number_conversions.h"
12 #include "packager/media/base/aes_decryptor.h"
13 #include "packager/media/base/audio_stream_info.h"
14 #include "packager/media/base/key_source.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/base/video_stream_info.h"
17 #include "packager/media/codecs/aac_audio_specific_config.h"
18 #include "packager/media/codecs/avc_decoder_configuration_record.h"
19 #include "packager/media/codecs/es_descriptor.h"
20 #include "packager/media/formats/mp2t/adts_header.h"
21 #include "packager/status.h"
22 
23 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
24  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
25  && (x != 0xFF))
26 
27 namespace {
28 const uint32_t kMpeg2ClockRate = 90000;
29 const uint32_t kPesOptPts = 0x80;
30 const uint32_t kPesOptDts = 0x40;
31 const uint32_t kPesOptAlign = 0x04;
32 const uint32_t kPsmStreamId = 0xBC;
33 const uint32_t kPaddingStreamId = 0xBE;
34 const uint32_t kIndexMagic = 0x49444d69;
35 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
36 const uint32_t kIndexVersion4HeaderSize = 12;
37 const uint32_t kEcmStreamId = 0xF0;
38 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
39 const uint32_t kScramblingBitsMask = 0x30;
40 const uint32_t kStartCode1 = 0x00;
41 const uint32_t kStartCode2 = 0x00;
42 const uint32_t kStartCode3 = 0x01;
43 const uint32_t kStartCode4Pack = 0xBA;
44 const uint32_t kStartCode4System = 0xBB;
45 const uint32_t kStartCode4ProgramEnd = 0xB9;
46 const uint32_t kPesStreamIdVideoMask = 0xF0;
47 const uint32_t kPesStreamIdVideo = 0xE0;
48 const uint32_t kPesStreamIdAudioMask = 0xE0;
49 const uint32_t kPesStreamIdAudio = 0xC0;
50 const uint32_t kVersion4 = 4;
51 const uint8_t kAacSampleSizeBits = 16;
52 // Applies to all video streams.
53 const uint8_t kNaluLengthSize = 4; // unit is bytes.
54 // Placeholder sampling frequency for all audio streams, which
55 // will be overwritten after filter parsing.
56 const uint32_t kDefaultSamplingFrequency = 100;
57 const uint16_t kEcmSizeBytes = 80;
58 const uint32_t kInitializationVectorSizeBytes = 16;
59 // ECM fields for processing.
60 const uint32_t kEcmContentKeySizeBytes = 16;
61 const uint32_t kEcmDCPFlagsSizeBytes = 3;
62 const uint32_t kEcmCCIFlagsSizeBytes = 1;
63 const uint32_t kEcmFlagsSizeBytes =
64  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
65 const uint32_t kEcmPaddingSizeBytes = 12;
66 const uint32_t kAssetKeySizeBytes = 16;
67 // Default audio and video PES stream IDs.
68 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
69 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
70 
71 enum Type {
72  Type_void = 0,
73  Type_uint8 = 1,
74  Type_int8 = 2,
75  Type_uint16 = 3,
76  Type_int16 = 4,
77  Type_uint32 = 5,
78  Type_int32 = 6,
79  Type_uint64 = 7,
80  Type_int64 = 8,
81  Type_string = 9,
82  Type_BinaryData = 10
83 };
84 } // namespace
85 
86 namespace shaka {
87 namespace media {
88 namespace wvm {
89 
90 WvmMediaParser::WvmMediaParser()
91  : is_initialized_(false),
92  parse_state_(StartCode1),
93  skip_bytes_(0),
94  metadata_is_complete_(false),
95  current_program_id_(0),
96  pes_stream_id_(0),
97  prev_pes_stream_id_(0),
98  pes_packet_bytes_(0),
99  pes_flags_1_(0),
100  pes_flags_2_(0),
101  prev_pes_flags_1_(0),
102  pes_header_data_bytes_(0),
103  timestamp_(0),
104  pts_(0),
105  dts_(0),
106  index_program_id_(0),
107  media_sample_(NULL),
108  crypto_unit_start_pos_(0),
109  stream_id_count_(0),
110  decryption_key_source_(NULL) {}
111 
112 WvmMediaParser::~WvmMediaParser() {}
113 
114 void WvmMediaParser::Init(const InitCB& init_cb,
115  const NewSampleCB& new_sample_cb,
116  KeySource* decryption_key_source) {
117  DCHECK(!is_initialized_);
118  DCHECK(!init_cb.is_null());
119  DCHECK(!new_sample_cb.is_null());
120  decryption_key_source_ = decryption_key_source;
121  init_cb_ = init_cb;
122  new_sample_cb_ = new_sample_cb;
123 }
124 
125 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
126  size_t num_bytes = 0;
127  size_t prev_size = 0;
128  const uint8_t* read_ptr = buf;
129  const uint8_t* end = read_ptr + size;
130 
131  while (read_ptr < end) {
132  switch (parse_state_) {
133  case StartCode1:
134  if (*read_ptr == kStartCode1) {
135  parse_state_ = StartCode2;
136  }
137  break;
138  case StartCode2:
139  if (*read_ptr == kStartCode2) {
140  parse_state_ = StartCode3;
141  } else {
142  parse_state_ = StartCode1;
143  }
144  break;
145  case StartCode3:
146  if (*read_ptr == kStartCode3) {
147  parse_state_ = StartCode4;
148  } else {
149  parse_state_ = StartCode1;
150  }
151  break;
152  case StartCode4:
153  switch (*read_ptr) {
154  case kStartCode4Pack:
155  parse_state_ = PackHeader1;
156  break;
157  case kStartCode4System:
158  parse_state_ = SystemHeader1;
159  break;
160  case kStartCode4ProgramEnd:
161  parse_state_ = ProgramEnd;
162  continue;
163  default:
164  parse_state_ = PesStreamId;
165  continue;
166  }
167  break;
168  case PackHeader1:
169  parse_state_ = PackHeader2;
170  break;
171  case PackHeader2:
172  parse_state_ = PackHeader3;
173  break;
174  case PackHeader3:
175  parse_state_ = PackHeader4;
176  break;
177  case PackHeader4:
178  parse_state_ = PackHeader5;
179  break;
180  case PackHeader5:
181  parse_state_ = PackHeader6;
182  break;
183  case PackHeader6:
184  parse_state_ = PackHeader7;
185  break;
186  case PackHeader7:
187  parse_state_ = PackHeader8;
188  break;
189  case PackHeader8:
190  parse_state_ = PackHeader9;
191  break;
192  case PackHeader9:
193  parse_state_ = PackHeader10;
194  break;
195  case PackHeader10:
196  skip_bytes_ = *read_ptr & 0x07;
197  parse_state_ = PackHeaderStuffingSkip;
198  break;
199  case SystemHeader1:
200  skip_bytes_ = *read_ptr;
201  skip_bytes_ <<= 8;
202  parse_state_ = SystemHeader2;
203  break;
204  case SystemHeader2:
205  skip_bytes_ |= *read_ptr;
206  parse_state_ = SystemHeaderSkip;
207  break;
208  case PackHeaderStuffingSkip:
209  if (end >= skip_bytes_ + read_ptr) {
210  read_ptr += skip_bytes_;
211  skip_bytes_ = 0;
212  parse_state_ = StartCode1;
213  } else {
214  skip_bytes_ -= (end - read_ptr);
215  read_ptr = end;
216  }
217  continue;
218  case SystemHeaderSkip:
219  if (end >= skip_bytes_ + read_ptr) {
220  read_ptr += skip_bytes_;
221  skip_bytes_ = 0;
222  parse_state_ = StartCode1;
223  } else {
224  uint32_t remaining_size = end - read_ptr;
225  skip_bytes_ -= remaining_size;
226  read_ptr = end;
227  }
228  continue;
229  case PesStreamId:
230  pes_stream_id_ = *read_ptr;
231  if (!metadata_is_complete_ &&
232  (pes_stream_id_ != kPsmStreamId) &&
233  (pes_stream_id_ != kIndexStreamId) &&
234  (pes_stream_id_ != kEcmStreamId) &&
235  (pes_stream_id_ != kV2MetadataStreamId) &&
236  (pes_stream_id_ != kPaddingStreamId)) {
237  metadata_is_complete_ = true;
238  }
239  parse_state_ = PesPacketLength1;
240  break;
241  case PesPacketLength1:
242  pes_packet_bytes_ = *read_ptr;
243  pes_packet_bytes_ <<= 8;
244  parse_state_ = PesPacketLength2;
245  break;
246  case PesPacketLength2:
247  pes_packet_bytes_ |= *read_ptr;
248  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
249  parse_state_ = PesExtension1;
250  } else {
251  prev_pes_flags_1_ = pes_flags_1_;
252  pes_flags_1_ = pes_flags_2_ = 0;
253  pes_header_data_bytes_ = 0;
254  parse_state_ = PesPayload;
255  }
256  break;
257  case PesExtension1:
258  prev_pes_flags_1_ = pes_flags_1_;
259  pes_flags_1_ = *read_ptr;
260  --pes_packet_bytes_;
261  parse_state_ = PesExtension2;
262  break;
263  case PesExtension2:
264  pes_flags_2_ = *read_ptr;
265  --pes_packet_bytes_;
266  parse_state_ = PesExtension3;
267  break;
268  case PesExtension3:
269  pes_header_data_bytes_ = *read_ptr;
270  --pes_packet_bytes_;
271  if (pes_flags_2_ & kPesOptPts) {
272  parse_state_ = Pts1;
273  } else {
274  parse_state_ = PesHeaderData;
275  }
276  break;
277  case Pts1:
278  timestamp_ = (*read_ptr & 0x0E);
279  --pes_header_data_bytes_;
280  --pes_packet_bytes_;
281  parse_state_ = Pts2;
282  break;
283  case Pts2:
284  timestamp_ <<= 7;
285  timestamp_ |= *read_ptr;
286  --pes_header_data_bytes_;
287  --pes_packet_bytes_;
288  parse_state_ = Pts3;
289  break;
290  case Pts3:
291  timestamp_ <<= 7;
292  timestamp_ |= *read_ptr >> 1;
293  --pes_header_data_bytes_;
294  --pes_packet_bytes_;
295  parse_state_ = Pts4;
296  break;
297  case Pts4:
298  timestamp_ <<= 8;
299  timestamp_ |= *read_ptr;
300  --pes_header_data_bytes_;
301  --pes_packet_bytes_;
302  parse_state_ = Pts5;
303  break;
304  case Pts5:
305  timestamp_ <<= 7;
306  timestamp_ |= *read_ptr >> 1;
307  pts_ = timestamp_;
308  --pes_header_data_bytes_;
309  --pes_packet_bytes_;
310  if (pes_flags_2_ & kPesOptDts) {
311  parse_state_ = Dts1;
312  } else {
313  dts_ = pts_;
314  parse_state_ = PesHeaderData;
315  }
316  break;
317  case Dts1:
318  timestamp_ = (*read_ptr & 0x0E);
319  --pes_header_data_bytes_;
320  --pes_packet_bytes_;
321  parse_state_ = Dts2;
322  break;
323  case Dts2:
324  timestamp_ <<= 7;
325  timestamp_ |= *read_ptr;
326  --pes_header_data_bytes_;
327  --pes_packet_bytes_;
328  parse_state_ = Dts3;
329  break;
330  case Dts3:
331  timestamp_ <<= 7;
332  timestamp_ |= *read_ptr >> 1;
333  --pes_header_data_bytes_;
334  --pes_packet_bytes_;
335  parse_state_ = Dts4;
336  break;
337  case Dts4:
338  timestamp_ <<= 8;
339  timestamp_ |= *read_ptr;
340  --pes_header_data_bytes_;
341  --pes_packet_bytes_;
342  parse_state_ = Dts5;
343  break;
344  case Dts5:
345  timestamp_ <<= 7;
346  timestamp_ |= *read_ptr >> 1;
347  dts_ = timestamp_;
348  --pes_header_data_bytes_;
349  --pes_packet_bytes_;
350  parse_state_ = PesHeaderData;
351  break;
352  case PesHeaderData:
353  num_bytes = end - read_ptr;
354  if (num_bytes >= pes_header_data_bytes_) {
355  num_bytes = pes_header_data_bytes_;
356  parse_state_ = PesPayload;
357  }
358  pes_header_data_bytes_ -= num_bytes;
359  pes_packet_bytes_ -= num_bytes;
360  read_ptr += num_bytes;
361  continue;
362  case PesPayload:
363  switch (pes_stream_id_) {
364  case kPsmStreamId:
365  psm_data_.clear();
366  parse_state_ = PsmPayload;
367  continue;
368  case kPaddingStreamId:
369  parse_state_ = Padding;
370  continue;
371  case kEcmStreamId:
372  ecm_.clear();
373  parse_state_ = EcmPayload;
374  continue;
375  case kIndexStreamId:
376  parse_state_ = IndexPayload;
377  continue;
378  default:
379  if (!DemuxNextPes(false)) {
380  return false;
381  }
382  parse_state_ = EsPayload;
383  }
384  continue;
385  case PsmPayload:
386  num_bytes = end - read_ptr;
387  if (num_bytes >= pes_packet_bytes_) {
388  num_bytes = pes_packet_bytes_;
389  parse_state_ = StartCode1;
390  }
391  if (num_bytes > 0) {
392  pes_packet_bytes_ -= num_bytes;
393  prev_size = psm_data_.size();
394  psm_data_.resize(prev_size + num_bytes);
395  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
396  }
397  read_ptr += num_bytes;
398  continue;
399  case EcmPayload:
400  num_bytes = end - read_ptr;
401  if (num_bytes >= pes_packet_bytes_) {
402  num_bytes = pes_packet_bytes_;
403  parse_state_ = StartCode1;
404  }
405  if (num_bytes > 0) {
406  pes_packet_bytes_ -= num_bytes;
407  prev_size = ecm_.size();
408  ecm_.resize(prev_size + num_bytes);
409  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
410  }
411  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
412  if (!ProcessEcm()) {
413  return(false);
414  }
415  }
416  read_ptr += num_bytes;
417  continue;
418  case IndexPayload:
419  num_bytes = end - read_ptr;
420  if (num_bytes >= pes_packet_bytes_) {
421  num_bytes = pes_packet_bytes_;
422  parse_state_ = StartCode1;
423  }
424  if (num_bytes > 0) {
425  pes_packet_bytes_ -= num_bytes;
426  prev_size = index_data_.size();
427  index_data_.resize(prev_size + num_bytes);
428  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
429  }
430  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
431  if (!metadata_is_complete_) {
432  if (!ParseIndexEntry()) {
433  return false;
434  }
435  }
436  }
437  read_ptr += num_bytes;
438  continue;
439  case EsPayload:
440  num_bytes = end - read_ptr;
441  if (num_bytes >= pes_packet_bytes_) {
442  num_bytes = pes_packet_bytes_;
443  parse_state_ = StartCode1;
444  }
445  pes_packet_bytes_ -= num_bytes;
446  if (pes_stream_id_ != kV2MetadataStreamId) {
447  sample_data_.resize(sample_data_.size() + num_bytes);
448  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
449  num_bytes);
450  }
451  prev_pes_stream_id_ = pes_stream_id_;
452  read_ptr += num_bytes;
453  continue;
454  case Padding:
455  num_bytes = end - read_ptr;
456  if (num_bytes >= pes_packet_bytes_) {
457  num_bytes = pes_packet_bytes_;
458  parse_state_ = StartCode1;
459  }
460  pes_packet_bytes_ -= num_bytes;
461  read_ptr += num_bytes;
462  continue;
463  case ProgramEnd:
464  parse_state_ = StartCode1;
465  metadata_is_complete_ = true;
466  if (!DemuxNextPes(true)) {
467  return false;
468  }
469  if (!Flush()) {
470  return false;
471  }
472  // Reset.
473  dts_ = pts_ = 0;
474  parse_state_ = StartCode1;
475  prev_media_sample_data_.Reset();
476  current_program_id_++;
477  ecm_.clear();
478  index_data_.clear();
479  psm_data_.clear();
480  break;
481  default:
482  break;
483  }
484  ++read_ptr;
485  }
486  return true;
487 }
488 
489 bool WvmMediaParser::EmitLastSample(
490  uint32_t stream_id,
491  const std::shared_ptr<MediaSample>& new_sample) {
492  std::string key = base::UintToString(current_program_id_)
493  .append(":")
494  .append(base::UintToString(stream_id));
495  std::map<std::string, uint32_t>::iterator it =
496  program_demux_stream_map_.find(key);
497  if (it == program_demux_stream_map_.end())
498  return false;
499  return EmitSample(stream_id, (*it).second, new_sample, true);
500 }
501 
502 bool WvmMediaParser::EmitPendingSamples() {
503  // Emit queued samples which were built when not initialized.
504  while (!media_sample_queue_.empty()) {
505  DemuxStreamIdMediaSample& demux_stream_media_sample =
506  media_sample_queue_.front();
507  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
508  demux_stream_media_sample.demux_stream_id,
509  demux_stream_media_sample.media_sample,
510  false)) {
511  return false;
512  }
513  media_sample_queue_.pop_front();
514  }
515  return true;
516 }
517 
518 bool WvmMediaParser::Flush() {
519  // Flush the last audio and video sample for current program.
520  // Reset the streamID when successfully emitted.
521  if (prev_media_sample_data_.audio_sample != NULL) {
522  if (!EmitLastSample(prev_pes_stream_id_,
523  prev_media_sample_data_.audio_sample)) {
524  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
525  << prev_pes_stream_id_;
526  return false;
527  }
528  }
529  if (prev_media_sample_data_.video_sample != NULL) {
530  if (!EmitLastSample(prev_pes_stream_id_,
531  prev_media_sample_data_.video_sample)) {
532  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
533  << prev_pes_stream_id_;
534  return false;
535  }
536  }
537  return true;
538 }
539 
540 bool WvmMediaParser::ParseIndexEntry() {
541  // Do not parse index entry at the beginning of any track *after* the first
542  // track.
543  if (current_program_id_ > 0) {
544  return true;
545  }
546  uint32_t index_size = 0;
547  if (index_data_.size() < kIndexVersion4HeaderSize) {
548  return false;
549  }
550 
551  const uint8_t* read_ptr = index_data_.data();
552  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
553  index_data_.clear();
554  return false;
555  }
556  read_ptr += 4;
557 
558  uint32_t version = ntohlFromBuffer(read_ptr);
559  read_ptr += 4;
560  if (version == kVersion4) {
561  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
562  if (index_data_.size() < index_size) {
563  // We do not yet have the full index. Keep accumulating index data.
564  return true;
565  }
566  read_ptr += sizeof(uint32_t);
567 
568  // Index metadata
569  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
570  if (index_metadata_max_size < sizeof(uint8_t)) {
571  index_data_.clear();
572  return false;
573  }
574 
575  uint64_t track_duration = 0;
576  uint32_t trick_play_factor = 0;
577  uint32_t sampling_frequency = kDefaultSamplingFrequency;
578  uint32_t time_scale = kMpeg2ClockRate;
579  uint16_t video_width = 0;
580  uint16_t video_height = 0;
581  uint32_t pixel_width = 0;
582  uint32_t pixel_height = 0;
583  uint8_t nalu_length_size = kNaluLengthSize;
584  uint8_t num_channels = 0;
585  int audio_pes_stream_id = 0;
586  int video_pes_stream_id = 0;
587  bool has_video = false;
588  bool has_audio = false;
589  std::vector<uint8_t> audio_codec_config;
590  std::vector<uint8_t> video_codec_config;
591  uint8_t num_index_entries = *read_ptr;
592  ++read_ptr;
593  --index_metadata_max_size;
594 
595  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
596  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
597  return false;
598  }
599  uint8_t tag = *read_ptr;
600  ++read_ptr;
601  uint8_t type = *read_ptr;
602  ++read_ptr;
603  uint32_t length = ntohlFromBuffer(read_ptr);
604  read_ptr += sizeof(uint32_t);
605  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
606  if (index_metadata_max_size < length) {
607  return false;
608  }
609  int64_t value = 0;
610  Tag tagtype = Unset;
611  std::vector<uint8_t> binary_data;
612  switch (Type(type)) {
613  case Type_uint8:
614  if (length == sizeof(uint8_t)) {
615  tagtype = GetTag(tag, length, read_ptr, &value);
616  } else {
617  return false;
618  }
619  break;
620  case Type_int8:
621  if (length == sizeof(int8_t)) {
622  tagtype = GetTag(tag, length, read_ptr, &value);
623  } else {
624  return false;
625  }
626  break;
627  case Type_uint16:
628  if (length == sizeof(uint16_t)) {
629  tagtype = GetTag(tag, length, read_ptr, &value);
630  } else {
631  return false;
632  }
633  break;
634  case Type_int16:
635  if (length == sizeof(int16_t)) {
636  tagtype = GetTag(tag, length, read_ptr, &value);
637  } else {
638  return false;
639  }
640  break;
641  case Type_uint32:
642  if (length == sizeof(uint32_t)) {
643  tagtype = GetTag(tag, length, read_ptr, &value);
644  } else {
645  return false;
646  }
647  break;
648  case Type_int32:
649  if (length == sizeof(int32_t)) {
650  tagtype = GetTag(tag, length, read_ptr, &value);
651  } else {
652  return false;
653  }
654  break;
655  case Type_uint64:
656  if (length == sizeof(uint64_t)) {
657  tagtype = GetTag(tag, length, read_ptr, &value);
658  } else {
659  return false;
660  }
661  break;
662  case Type_int64:
663  if (length == sizeof(int64_t)) {
664  tagtype = GetTag(tag, length, read_ptr, &value);
665  } else {
666  return false;
667  }
668  break;
669  case Type_string:
670  case Type_BinaryData:
671  binary_data.assign(read_ptr, read_ptr + length);
672  tagtype = Tag(tag);
673  break;
674  default:
675  break;
676  }
677 
678  switch (tagtype) {
679  case TrackDuration:
680  track_duration = value;
681  break;
682  case TrackTrickPlayFactor:
683  trick_play_factor = value;
684  break;
685  case VideoStreamId:
686  video_pes_stream_id = value;
687  break;
688  case AudioStreamId:
689  audio_pes_stream_id = value;
690  break;
691  case VideoWidth:
692  video_width = (uint16_t)value;
693  break;
694  case VideoHeight:
695  video_height = (uint16_t)value;
696  break;
697  case AudioNumChannels:
698  num_channels = (uint8_t)value;
699  break;
700  case VideoType:
701  has_video = true;
702  break;
703  case AudioType:
704  has_audio = true;
705  break;
706  case VideoPixelWidth:
707  pixel_width = static_cast<uint32_t>(value);
708  break;
709  case VideoPixelHeight:
710  pixel_height = static_cast<uint32_t>(value);
711  break;
712  case Audio_EsDescriptor: {
713  ESDescriptor descriptor;
714  if (!descriptor.Parse(binary_data)) {
715  LOG(ERROR) <<
716  "Could not extract AudioSpecificConfig from ES_Descriptor";
717  return false;
718  }
719  audio_codec_config = descriptor.decoder_config_descriptor()
720  .decoder_specific_info_descriptor()
721  .data();
722  break;
723  }
724  case Audio_EC3SpecificData:
725  case Audio_DtsSpecificData:
726  case Audio_AC3SpecificData:
727  LOG(ERROR) << "Audio type not supported.";
728  return false;
729  case Video_AVCDecoderConfigurationRecord:
730  video_codec_config = binary_data;
731  break;
732  default:
733  break;
734  }
735 
736  read_ptr += length;
737  index_metadata_max_size -= length;
738  }
739  // End Index metadata
740  index_size = read_ptr - index_data_.data();
741 
742  if (has_video) {
743  stream_infos_.emplace_back(new VideoStreamInfo(
744  stream_id_count_, time_scale, track_duration, kCodecH264,
745  byte_to_unit_stream_converter_.stream_format(), std::string(),
746  video_codec_config.data(), video_codec_config.size(), video_width,
747  video_height, pixel_width, pixel_height,
748  0 /* transfer_characteristics */, trick_play_factor, nalu_length_size,
749  std::string(), decryption_key_source_ ? false : true));
750  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
751  base::UintToString(
752  video_pes_stream_id
753  ? video_pes_stream_id
754  : kDefaultVideoStreamId)] =
755  stream_id_count_++;
756  }
757  if (has_audio) {
758  const Codec audio_codec = kCodecAAC;
759  // TODO(beil): Pass in max and average bitrate in wvm container.
760  stream_infos_.emplace_back(new AudioStreamInfo(
761  stream_id_count_, time_scale, track_duration, audio_codec,
762  std::string(), audio_codec_config.data(), audio_codec_config.size(),
763  kAacSampleSizeBits, num_channels, sampling_frequency,
764  0 /* seek preroll */, 0 /* codec delay */, 0 /* max bitrate */,
765  0 /* avg bitrate */, std::string(),
766  decryption_key_source_ ? false : true));
767  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
768  base::UintToString(
769  audio_pes_stream_id
770  ? audio_pes_stream_id
771  : kDefaultAudioStreamId)] =
772  stream_id_count_++;
773  }
774  }
775 
776  index_program_id_++;
777  index_data_.clear();
778  return true;
779 }
780 
781 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
782  bool output_encrypted_sample = false;
783  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
784  // Decrypt crypto unit.
785  if (!content_decryptor_) {
786  output_encrypted_sample = true;
787  } else {
788  content_decryptor_->Crypt(&sample_data_[crypto_unit_start_pos_],
789  sample_data_.size() - crypto_unit_start_pos_,
790  &sample_data_[crypto_unit_start_pos_]);
791  }
792  }
793  // Demux media sample if we are at program end or if we are not at a
794  // continuation PES.
795  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
796  if (!sample_data_.empty()) {
797  if (!Output(output_encrypted_sample)) {
798  return false;
799  }
800  }
801  StartMediaSampleDemux();
802  }
803 
804  crypto_unit_start_pos_ = sample_data_.size();
805  return true;
806 }
807 
808 void WvmMediaParser::StartMediaSampleDemux() {
809  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
810  media_sample_ = MediaSample::CreateEmptyMediaSample();
811  media_sample_->set_dts(dts_);
812  media_sample_->set_pts(pts_);
813  media_sample_->set_is_key_frame(is_key_frame);
814 
815  sample_data_.clear();
816 }
817 
818 bool WvmMediaParser::Output(bool output_encrypted_sample) {
819  if (output_encrypted_sample) {
820  media_sample_->SetData(sample_data_.data(), sample_data_.size());
821  media_sample_->set_is_encrypted(true);
822  } else {
823  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
824  // Convert video stream to unit stream and get config.
825  std::vector<uint8_t> nal_unit_stream;
826  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
827  sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
828  LOG(ERROR) << "Could not convert h.264 byte stream sample";
829  return false;
830  }
831  media_sample_->SetData(nal_unit_stream.data(), nal_unit_stream.size());
832  if (!is_initialized_) {
833  // Set extra data for video stream from AVC Decoder Config Record.
834  // Also, set codec string from the AVC Decoder Config Record.
835  std::vector<uint8_t> decoder_config_record;
836  byte_to_unit_stream_converter_.GetDecoderConfigurationRecord(
837  &decoder_config_record);
838  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
839  if (stream_infos_[i]->stream_type() == kStreamVideo &&
840  stream_infos_[i]->codec_string().empty()) {
841  const std::vector<uint8_t>* stream_config;
842  if (stream_infos_[i]->codec_config().empty()) {
843  // Decoder config record not available for stream. Use the one
844  // computed from the first video stream.
845  stream_infos_[i]->set_codec_config(decoder_config_record);
846  stream_config = &decoder_config_record;
847  } else {
848  // Use stream-specific config record.
849  stream_config = &stream_infos_[i]->codec_config();
850  }
851  DCHECK(stream_config);
852 
853  VideoStreamInfo* video_stream_info =
854  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
856  if (!avc_config.Parse(*stream_config)) {
857  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
858  "Using computed configuration record instead.";
859  video_stream_info->set_codec_config(decoder_config_record);
860  if (!avc_config.Parse(decoder_config_record)) {
861  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
862  return false;
863  }
864  }
865  const FourCC codec_fourcc =
866  byte_to_unit_stream_converter_.stream_format() ==
867  H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
868  ? FOURCC_avc3
869  : FOURCC_avc1;
870  video_stream_info->set_codec_string(
871  avc_config.GetCodecString(codec_fourcc));
872 
873  if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
874  avc_config.pixel_height() !=
875  video_stream_info->pixel_height()) {
876  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
877  video_stream_info->pixel_height() != 0)
878  << "Pixel aspect ratio in WVM metadata ("
879  << video_stream_info->pixel_width() << ","
880  << video_stream_info->pixel_height()
881  << ") does not match with SAR in "
882  "AVCDecoderConfigurationRecord ("
883  << avc_config.pixel_width() << ","
884  << avc_config.pixel_height()
885  << "). Use AVCDecoderConfigurationRecord.";
886  video_stream_info->set_pixel_width(avc_config.pixel_width());
887  video_stream_info->set_pixel_height(avc_config.pixel_height());
888  }
889  if (avc_config.coded_width() != video_stream_info->width() ||
890  avc_config.coded_height() != video_stream_info->height()) {
891  LOG(WARNING) << "Resolution in WVM metadata ("
892  << video_stream_info->width() << ","
893  << video_stream_info->height()
894  << ") does not match with resolution in "
895  "AVCDecoderConfigurationRecord ("
896  << avc_config.coded_width() << ","
897  << avc_config.coded_height()
898  << "). Use AVCDecoderConfigurationRecord.";
899  video_stream_info->set_width(avc_config.coded_width());
900  video_stream_info->set_height(avc_config.coded_height());
901  }
902  }
903  }
904  }
905  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
906  kPesStreamIdAudio) {
907  // Set data on the audio stream.
908  mp2t::AdtsHeader adts_header;
909  const uint8_t* frame_ptr = sample_data_.data();
910  if (!adts_header.Parse(frame_ptr, sample_data_.size())) {
911  LOG(ERROR) << "Could not parse ADTS header";
912  return false;
913  }
914  media_sample_->SetData(
915  frame_ptr + adts_header.GetHeaderSize(),
916  adts_header.GetFrameSize() - adts_header.GetHeaderSize());
917  if (!is_initialized_) {
918  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
919  if (stream_infos_[i]->stream_type() == kStreamAudio &&
920  stream_infos_[i]->codec_string().empty()) {
921  AudioStreamInfo* audio_stream_info =
922  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
923  if (audio_stream_info->codec_config().empty()) {
924  // Set AudioStreamInfo fields using information from the ADTS
925  // header.
926  audio_stream_info->set_sampling_frequency(
927  adts_header.GetSamplingFrequency());
928  std::vector<uint8_t> audio_specific_config;
929  adts_header.GetAudioSpecificConfig(&audio_specific_config);
930  audio_stream_info->set_codec_config(audio_specific_config);
931  audio_stream_info->set_codec_string(
933  kCodecAAC, adts_header.GetObjectType()));
934  } else {
935  // Set AudioStreamInfo fields using information from the
936  // AACAudioSpecificConfig record.
937  AACAudioSpecificConfig aac_config;
938  if (!aac_config.Parse(stream_infos_[i]->codec_config())) {
939  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
940  return false;
941  }
942  audio_stream_info->set_sampling_frequency(
943  aac_config.GetSamplesPerSecond());
944  audio_stream_info->set_codec_string(
946  kCodecAAC, aac_config.GetAudioObjectType()));
947  }
948  }
949  }
950  }
951  }
952  }
953 
954  if (!is_initialized_) {
955  bool all_streams_have_config = true;
956  // Check if all collected stream infos have codec_config set.
957  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
958  if (stream_infos_[i]->codec_string().empty()) {
959  all_streams_have_config = false;
960  break;
961  }
962  }
963  if (all_streams_have_config) {
964  init_cb_.Run(stream_infos_);
965  is_initialized_ = true;
966  }
967  }
968 
969  DCHECK_GT(media_sample_->data_size(), 0UL);
970  std::string key = base::UintToString(current_program_id_).append(":")
971  .append(base::UintToString(prev_pes_stream_id_));
972  std::map<std::string, uint32_t>::iterator it =
973  program_demux_stream_map_.find(key);
974  if (it == program_demux_stream_map_.end()) {
975  // TODO(ramjic): Log error message here and in other error cases through
976  // this method.
977  return false;
978  }
979  DemuxStreamIdMediaSample demux_stream_media_sample;
980  demux_stream_media_sample.parsed_audio_or_video_stream_id =
981  prev_pes_stream_id_;
982  demux_stream_media_sample.demux_stream_id = (*it).second;
983  demux_stream_media_sample.media_sample = media_sample_;
984  // Check if sample can be emitted.
985  if (!is_initialized_) {
986  media_sample_queue_.push_back(demux_stream_media_sample);
987  } else {
988  // flush the sample queue and emit all queued samples.
989  while (!media_sample_queue_.empty()) {
990  if (!EmitPendingSamples())
991  return false;
992  }
993  // Emit current sample.
994  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
995  return false;
996  }
997  return true;
998 }
999 
1000 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
1001  uint32_t stream_id,
1002  const std::shared_ptr<MediaSample>& new_sample,
1003  bool isLastSample) {
1004  DCHECK(new_sample);
1005  if (isLastSample) {
1006  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1007  kPesStreamIdVideo) {
1008  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1009  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1010  kPesStreamIdAudio) {
1011  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1012  }
1013  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1014  LOG(ERROR) << "Failed to process the last sample.";
1015  return false;
1016  }
1017  return true;
1018  }
1019 
1020  // Cannot emit current sample. Compute duration first and then,
1021  // emit previous sample.
1022  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1023  kPesStreamIdVideo) {
1024  if (prev_media_sample_data_.video_sample == NULL) {
1025  prev_media_sample_data_.video_sample = new_sample;
1026  prev_media_sample_data_.video_stream_id = stream_id;
1027  return true;
1028  }
1029  prev_media_sample_data_.video_sample->set_duration(
1030  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1031  prev_media_sample_data_.video_sample_duration =
1032  prev_media_sample_data_.video_sample->duration();
1033  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1034  prev_media_sample_data_.video_sample)) {
1035  LOG(ERROR) << "Failed to process the video sample.";
1036  return false;
1037  }
1038  prev_media_sample_data_.video_sample = new_sample;
1039  prev_media_sample_data_.video_stream_id = stream_id;
1040  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1041  kPesStreamIdAudio) {
1042  if (prev_media_sample_data_.audio_sample == NULL) {
1043  prev_media_sample_data_.audio_sample = new_sample;
1044  prev_media_sample_data_.audio_stream_id = stream_id;
1045  return true;
1046  }
1047  prev_media_sample_data_.audio_sample->set_duration(
1048  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1049  prev_media_sample_data_.audio_sample_duration =
1050  prev_media_sample_data_.audio_sample->duration();
1051  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1052  prev_media_sample_data_.audio_sample)) {
1053  LOG(ERROR) << "Failed to process the audio sample.";
1054  return false;
1055  }
1056  prev_media_sample_data_.audio_sample = new_sample;
1057  prev_media_sample_data_.audio_stream_id = stream_id;
1058  }
1059  return true;
1060 }
1061 
1062 bool WvmMediaParser::GetAssetKey(const uint8_t* asset_id,
1063  EncryptionKey* encryption_key) {
1064  DCHECK(decryption_key_source_);
1065  Status status = decryption_key_source_->FetchKeys(
1066  EmeInitDataType::WIDEVINE_CLASSIC,
1067  std::vector<uint8_t>(asset_id, asset_id + sizeof(uint32_t)));
1068  if (!status.ok()) {
1069  LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1070  << ntohlFromBuffer(asset_id) << ", error = " << status;
1071  return false;
1072  }
1073 
1074  const char kHdStreamLabel[] = "HD";
1075  status = decryption_key_source_->GetKey(kHdStreamLabel, encryption_key);
1076  if (!status.ok()) {
1077  LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1078  << ntohlFromBuffer(asset_id) << ", error = " << status;
1079  return false;
1080  }
1081 
1082  return true;
1083 }
1084 
1085 bool WvmMediaParser::ProcessEcm() {
1086  // An error will be returned later if the samples need to be decrypted.
1087  if (!decryption_key_source_)
1088  return true;
1089 
1090  if (current_program_id_ > 0) {
1091  return true;
1092  }
1093  if (ecm_.size() != kEcmSizeBytes) {
1094  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1095  << ", expected size = " << kEcmSizeBytes;
1096  return false;
1097  }
1098  const uint8_t* ecm_data = ecm_.data();
1099  DCHECK(ecm_data);
1100  ecm_data += sizeof(uint32_t); // old version field - skip.
1101  ecm_data += sizeof(uint32_t); // clear lead - skip.
1102  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1103  EncryptionKey encryption_key;
1104  if (!GetAssetKey(ecm_data, &encryption_key)) {
1105  return false;
1106  }
1107  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1108  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1109  << " for AssetID = " << ntohlFromBuffer(ecm_data)
1110  << " is less than minimum asset key size.";
1111  return false;
1112  }
1113  ecm_data += sizeof(uint32_t); // asset_id.
1114  // Legacy WVM content may have asset keys > 16 bytes.
1115  // Use only the first 16 bytes of the asset key to get
1116  // the content key.
1117  std::vector<uint8_t> asset_key(
1118  encryption_key.key.begin(),
1119  encryption_key.key.begin() + kAssetKeySizeBytes);
1120  // WVM format always uses all zero IV.
1121  std::vector<uint8_t> zero_iv(kInitializationVectorSizeBytes, 0);
1122  AesCbcDecryptor asset_decryptor(kCtsPadding, AesCryptor::kUseConstantIv);
1123  if (!asset_decryptor.InitializeWithIv(asset_key, zero_iv)) {
1124  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1125  return false;
1126  }
1127 
1128  const size_t content_key_buffer_size =
1129  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1130  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1131  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1132  CHECK(asset_decryptor.Crypt(ecm_data, content_key_buffer_size,
1133  content_key_buffer.data()));
1134 
1135  std::vector<uint8_t> decrypted_content_key_vec(
1136  content_key_buffer.begin() + 4,
1137  content_key_buffer.begin() + 20);
1138  std::unique_ptr<AesCbcDecryptor> content_decryptor(
1139  new AesCbcDecryptor(kCtsPadding, AesCryptor::kUseConstantIv));
1140  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec,
1141  zero_iv)) {
1142  LOG(ERROR) << "Failed to initialize content decryptor.";
1143  return false;
1144  }
1145 
1146  content_decryptor_ = std::move(content_decryptor);
1147  return true;
1148 }
1149 
1150 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1151  demux_stream_id(0),
1152  parsed_audio_or_video_stream_id(0) {}
1153 
1154 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1155 
1156 PrevSampleData::PrevSampleData() {
1157  Reset();
1158 }
1159 
1160 PrevSampleData::~PrevSampleData() {}
1161 
1162 void PrevSampleData::Reset() {
1163  audio_sample = NULL;
1164  video_sample = NULL;
1165  audio_stream_id = 0;
1166  video_stream_id = 0;
1167  audio_sample_duration = 0;
1168  video_sample_duration = 0;
1169 }
1170 
1171 } // namespace wvm
1172 } // namespace media
1173 } // namespace shaka
size_t GetHeaderSize() const override
Definition: adts_header.cc:82
Class which implements AES-CBC (Cipher block chaining) decryption.
Definition: aes_decryptor.h:25
virtual bool Parse(const std::vector< uint8_t > &data)
static std::shared_ptr< MediaSample > CreateEmptyMediaSample()
Create a MediaSample object with default members.
Definition: media_sample.cc:71
uint8_t GetObjectType() const override
Definition: adts_header.cc:108
All the methods that are virtual are virtual for mocking.
bool InitializeWithIv(const std::vector< uint8_t > &key, const std::vector< uint8_t > &iv) override
uint32_t GetSamplingFrequency() const override
Definition: adts_header.cc:112
bool Parse(const std::vector< uint8_t > &data)
void GetAudioSpecificConfig(std::vector< uint8_t > *buffer) const override
Definition: adts_header.cc:98
std::string GetCodecString(FourCC codec_fourcc) const
static std::string GetCodecString(Codec codec, uint8_t audio_object_type)
bool Parse(const std::vector< uint8_t > &data)
Class for parsing AVC decoder configuration record.
bool Parse(const uint8_t *adts_frame, size_t adts_frame_size) override
Definition: adts_header.cc:46
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:48
Holds video stream information.
Holds audio stream information.
size_t GetFrameSize() const override
Definition: adts_header.cc:87