Shaka Packager SDK
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/strings/string_number_conversions.h"
12 #include "packager/media/base/aes_decryptor.h"
13 #include "packager/media/base/audio_stream_info.h"
14 #include "packager/media/base/key_source.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/base/video_stream_info.h"
17 #include "packager/media/codecs/aac_audio_specific_config.h"
18 #include "packager/media/codecs/avc_decoder_configuration_record.h"
19 #include "packager/media/codecs/es_descriptor.h"
20 #include "packager/media/formats/mp2t/adts_header.h"
21 #include "packager/status.h"
22 
23 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
24  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
25  && (x != 0xFF))
26 
27 namespace {
28 const uint32_t kMpeg2ClockRate = 90000;
29 const uint32_t kPesOptPts = 0x80;
30 const uint32_t kPesOptDts = 0x40;
31 const uint32_t kPesOptAlign = 0x04;
32 const uint32_t kPsmStreamId = 0xBC;
33 const uint32_t kPaddingStreamId = 0xBE;
34 const uint32_t kIndexMagic = 0x49444d69;
35 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
36 const uint32_t kIndexVersion4HeaderSize = 12;
37 const uint32_t kEcmStreamId = 0xF0;
38 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
39 const uint32_t kScramblingBitsMask = 0x30;
40 const uint32_t kStartCode1 = 0x00;
41 const uint32_t kStartCode2 = 0x00;
42 const uint32_t kStartCode3 = 0x01;
43 const uint32_t kStartCode4Pack = 0xBA;
44 const uint32_t kStartCode4System = 0xBB;
45 const uint32_t kStartCode4ProgramEnd = 0xB9;
46 const uint32_t kPesStreamIdVideoMask = 0xF0;
47 const uint32_t kPesStreamIdVideo = 0xE0;
48 const uint32_t kPesStreamIdAudioMask = 0xE0;
49 const uint32_t kPesStreamIdAudio = 0xC0;
50 const uint32_t kVersion4 = 4;
51 const uint8_t kAacSampleSizeBits = 16;
52 // Applies to all video streams.
53 const uint8_t kNaluLengthSize = 4; // unit is bytes.
54 // Placeholder sampling frequency for all audio streams, which
55 // will be overwritten after filter parsing.
56 const uint32_t kDefaultSamplingFrequency = 100;
57 const uint16_t kEcmSizeBytes = 80;
58 const uint32_t kInitializationVectorSizeBytes = 16;
59 // ECM fields for processing.
60 const uint32_t kEcmContentKeySizeBytes = 16;
61 const uint32_t kEcmDCPFlagsSizeBytes = 3;
62 const uint32_t kEcmCCIFlagsSizeBytes = 1;
63 const uint32_t kEcmFlagsSizeBytes =
64  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
65 const uint32_t kEcmPaddingSizeBytes = 12;
66 const uint32_t kAssetKeySizeBytes = 16;
67 // Default audio and video PES stream IDs.
68 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
69 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
70 
71 enum Type {
72  Type_void = 0,
73  Type_uint8 = 1,
74  Type_int8 = 2,
75  Type_uint16 = 3,
76  Type_int16 = 4,
77  Type_uint32 = 5,
78  Type_int32 = 6,
79  Type_uint64 = 7,
80  Type_int64 = 8,
81  Type_string = 9,
82  Type_BinaryData = 10
83 };
84 } // namespace
85 
86 namespace shaka {
87 namespace media {
88 namespace wvm {
89 
90 WvmMediaParser::WvmMediaParser()
91  : is_initialized_(false),
92  parse_state_(StartCode1),
93  skip_bytes_(0),
94  metadata_is_complete_(false),
95  current_program_id_(0),
96  pes_stream_id_(0),
97  prev_pes_stream_id_(0),
98  pes_packet_bytes_(0),
99  pes_flags_1_(0),
100  pes_flags_2_(0),
101  prev_pes_flags_1_(0),
102  pes_header_data_bytes_(0),
103  timestamp_(0),
104  pts_(0),
105  dts_(0),
106  index_program_id_(0),
107  media_sample_(NULL),
108  crypto_unit_start_pos_(0),
109  stream_id_count_(0),
110  decryption_key_source_(NULL) {}
111 
112 WvmMediaParser::~WvmMediaParser() {}
113 
114 void WvmMediaParser::Init(const InitCB& init_cb,
115  const NewMediaSampleCB& new_media_sample_cb,
116  const NewTextSampleCB& new_text_sample_cb,
117  KeySource* decryption_key_source) {
118  DCHECK(!is_initialized_);
119  DCHECK(!init_cb.is_null());
120  DCHECK(!new_media_sample_cb.is_null());
121  decryption_key_source_ = decryption_key_source;
122  init_cb_ = init_cb;
123  new_sample_cb_ = new_media_sample_cb;
124 }
125 
126 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
127  size_t num_bytes = 0;
128  size_t prev_size = 0;
129  const uint8_t* read_ptr = buf;
130  const uint8_t* end = read_ptr + size;
131 
132  while (read_ptr < end) {
133  switch (parse_state_) {
134  case StartCode1:
135  if (*read_ptr == kStartCode1) {
136  parse_state_ = StartCode2;
137  }
138  break;
139  case StartCode2:
140  if (*read_ptr == kStartCode2) {
141  parse_state_ = StartCode3;
142  } else {
143  parse_state_ = StartCode1;
144  }
145  break;
146  case StartCode3:
147  if (*read_ptr == kStartCode3) {
148  parse_state_ = StartCode4;
149  } else {
150  parse_state_ = StartCode1;
151  }
152  break;
153  case StartCode4:
154  switch (*read_ptr) {
155  case kStartCode4Pack:
156  parse_state_ = PackHeader1;
157  break;
158  case kStartCode4System:
159  parse_state_ = SystemHeader1;
160  break;
161  case kStartCode4ProgramEnd:
162  parse_state_ = ProgramEnd;
163  continue;
164  default:
165  parse_state_ = PesStreamId;
166  continue;
167  }
168  break;
169  case PackHeader1:
170  parse_state_ = PackHeader2;
171  break;
172  case PackHeader2:
173  parse_state_ = PackHeader3;
174  break;
175  case PackHeader3:
176  parse_state_ = PackHeader4;
177  break;
178  case PackHeader4:
179  parse_state_ = PackHeader5;
180  break;
181  case PackHeader5:
182  parse_state_ = PackHeader6;
183  break;
184  case PackHeader6:
185  parse_state_ = PackHeader7;
186  break;
187  case PackHeader7:
188  parse_state_ = PackHeader8;
189  break;
190  case PackHeader8:
191  parse_state_ = PackHeader9;
192  break;
193  case PackHeader9:
194  parse_state_ = PackHeader10;
195  break;
196  case PackHeader10:
197  skip_bytes_ = *read_ptr & 0x07;
198  parse_state_ = PackHeaderStuffingSkip;
199  break;
200  case SystemHeader1:
201  skip_bytes_ = *read_ptr;
202  skip_bytes_ <<= 8;
203  parse_state_ = SystemHeader2;
204  break;
205  case SystemHeader2:
206  skip_bytes_ |= *read_ptr;
207  parse_state_ = SystemHeaderSkip;
208  break;
209  case PackHeaderStuffingSkip:
210  if (end >= skip_bytes_ + read_ptr) {
211  read_ptr += skip_bytes_;
212  skip_bytes_ = 0;
213  parse_state_ = StartCode1;
214  } else {
215  skip_bytes_ -= (end - read_ptr);
216  read_ptr = end;
217  }
218  continue;
219  case SystemHeaderSkip:
220  if (end >= skip_bytes_ + read_ptr) {
221  read_ptr += skip_bytes_;
222  skip_bytes_ = 0;
223  parse_state_ = StartCode1;
224  } else {
225  uint32_t remaining_size = end - read_ptr;
226  skip_bytes_ -= remaining_size;
227  read_ptr = end;
228  }
229  continue;
230  case PesStreamId:
231  pes_stream_id_ = *read_ptr;
232  if (!metadata_is_complete_ &&
233  (pes_stream_id_ != kPsmStreamId) &&
234  (pes_stream_id_ != kIndexStreamId) &&
235  (pes_stream_id_ != kEcmStreamId) &&
236  (pes_stream_id_ != kV2MetadataStreamId) &&
237  (pes_stream_id_ != kPaddingStreamId)) {
238  metadata_is_complete_ = true;
239  }
240  parse_state_ = PesPacketLength1;
241  break;
242  case PesPacketLength1:
243  pes_packet_bytes_ = *read_ptr;
244  pes_packet_bytes_ <<= 8;
245  parse_state_ = PesPacketLength2;
246  break;
247  case PesPacketLength2:
248  pes_packet_bytes_ |= *read_ptr;
249  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
250  parse_state_ = PesExtension1;
251  } else {
252  prev_pes_flags_1_ = pes_flags_1_;
253  pes_flags_1_ = pes_flags_2_ = 0;
254  pes_header_data_bytes_ = 0;
255  parse_state_ = PesPayload;
256  }
257  break;
258  case PesExtension1:
259  prev_pes_flags_1_ = pes_flags_1_;
260  pes_flags_1_ = *read_ptr;
261  --pes_packet_bytes_;
262  parse_state_ = PesExtension2;
263  break;
264  case PesExtension2:
265  pes_flags_2_ = *read_ptr;
266  --pes_packet_bytes_;
267  parse_state_ = PesExtension3;
268  break;
269  case PesExtension3:
270  pes_header_data_bytes_ = *read_ptr;
271  --pes_packet_bytes_;
272  if (pes_flags_2_ & kPesOptPts) {
273  parse_state_ = Pts1;
274  } else {
275  parse_state_ = PesHeaderData;
276  }
277  break;
278  case Pts1:
279  timestamp_ = (*read_ptr & 0x0E);
280  --pes_header_data_bytes_;
281  --pes_packet_bytes_;
282  parse_state_ = Pts2;
283  break;
284  case Pts2:
285  timestamp_ <<= 7;
286  timestamp_ |= *read_ptr;
287  --pes_header_data_bytes_;
288  --pes_packet_bytes_;
289  parse_state_ = Pts3;
290  break;
291  case Pts3:
292  timestamp_ <<= 7;
293  timestamp_ |= *read_ptr >> 1;
294  --pes_header_data_bytes_;
295  --pes_packet_bytes_;
296  parse_state_ = Pts4;
297  break;
298  case Pts4:
299  timestamp_ <<= 8;
300  timestamp_ |= *read_ptr;
301  --pes_header_data_bytes_;
302  --pes_packet_bytes_;
303  parse_state_ = Pts5;
304  break;
305  case Pts5:
306  timestamp_ <<= 7;
307  timestamp_ |= *read_ptr >> 1;
308  pts_ = timestamp_;
309  --pes_header_data_bytes_;
310  --pes_packet_bytes_;
311  if (pes_flags_2_ & kPesOptDts) {
312  parse_state_ = Dts1;
313  } else {
314  dts_ = pts_;
315  parse_state_ = PesHeaderData;
316  }
317  break;
318  case Dts1:
319  timestamp_ = (*read_ptr & 0x0E);
320  --pes_header_data_bytes_;
321  --pes_packet_bytes_;
322  parse_state_ = Dts2;
323  break;
324  case Dts2:
325  timestamp_ <<= 7;
326  timestamp_ |= *read_ptr;
327  --pes_header_data_bytes_;
328  --pes_packet_bytes_;
329  parse_state_ = Dts3;
330  break;
331  case Dts3:
332  timestamp_ <<= 7;
333  timestamp_ |= *read_ptr >> 1;
334  --pes_header_data_bytes_;
335  --pes_packet_bytes_;
336  parse_state_ = Dts4;
337  break;
338  case Dts4:
339  timestamp_ <<= 8;
340  timestamp_ |= *read_ptr;
341  --pes_header_data_bytes_;
342  --pes_packet_bytes_;
343  parse_state_ = Dts5;
344  break;
345  case Dts5:
346  timestamp_ <<= 7;
347  timestamp_ |= *read_ptr >> 1;
348  dts_ = timestamp_;
349  --pes_header_data_bytes_;
350  --pes_packet_bytes_;
351  parse_state_ = PesHeaderData;
352  break;
353  case PesHeaderData:
354  num_bytes = end - read_ptr;
355  if (num_bytes >= pes_header_data_bytes_) {
356  num_bytes = pes_header_data_bytes_;
357  parse_state_ = PesPayload;
358  }
359  pes_header_data_bytes_ -= num_bytes;
360  pes_packet_bytes_ -= num_bytes;
361  read_ptr += num_bytes;
362  continue;
363  case PesPayload:
364  switch (pes_stream_id_) {
365  case kPsmStreamId:
366  psm_data_.clear();
367  parse_state_ = PsmPayload;
368  continue;
369  case kPaddingStreamId:
370  parse_state_ = Padding;
371  continue;
372  case kEcmStreamId:
373  ecm_.clear();
374  parse_state_ = EcmPayload;
375  continue;
376  case kIndexStreamId:
377  parse_state_ = IndexPayload;
378  continue;
379  default:
380  if (!DemuxNextPes(false)) {
381  return false;
382  }
383  parse_state_ = EsPayload;
384  }
385  continue;
386  case PsmPayload:
387  num_bytes = end - read_ptr;
388  if (num_bytes >= pes_packet_bytes_) {
389  num_bytes = pes_packet_bytes_;
390  parse_state_ = StartCode1;
391  }
392  if (num_bytes > 0) {
393  pes_packet_bytes_ -= num_bytes;
394  prev_size = psm_data_.size();
395  psm_data_.resize(prev_size + num_bytes);
396  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
397  }
398  read_ptr += num_bytes;
399  continue;
400  case EcmPayload:
401  num_bytes = end - read_ptr;
402  if (num_bytes >= pes_packet_bytes_) {
403  num_bytes = pes_packet_bytes_;
404  parse_state_ = StartCode1;
405  }
406  if (num_bytes > 0) {
407  pes_packet_bytes_ -= num_bytes;
408  prev_size = ecm_.size();
409  ecm_.resize(prev_size + num_bytes);
410  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
411  }
412  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
413  if (!ProcessEcm()) {
414  return(false);
415  }
416  }
417  read_ptr += num_bytes;
418  continue;
419  case IndexPayload:
420  num_bytes = end - read_ptr;
421  if (num_bytes >= pes_packet_bytes_) {
422  num_bytes = pes_packet_bytes_;
423  parse_state_ = StartCode1;
424  }
425  if (num_bytes > 0) {
426  pes_packet_bytes_ -= num_bytes;
427  prev_size = index_data_.size();
428  index_data_.resize(prev_size + num_bytes);
429  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
430  }
431  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
432  if (!metadata_is_complete_) {
433  if (!ParseIndexEntry()) {
434  return false;
435  }
436  }
437  }
438  read_ptr += num_bytes;
439  continue;
440  case EsPayload:
441  num_bytes = end - read_ptr;
442  if (num_bytes >= pes_packet_bytes_) {
443  num_bytes = pes_packet_bytes_;
444  parse_state_ = StartCode1;
445  }
446  pes_packet_bytes_ -= num_bytes;
447  if (pes_stream_id_ != kV2MetadataStreamId) {
448  sample_data_.resize(sample_data_.size() + num_bytes);
449  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
450  num_bytes);
451  }
452  prev_pes_stream_id_ = pes_stream_id_;
453  read_ptr += num_bytes;
454  continue;
455  case Padding:
456  num_bytes = end - read_ptr;
457  if (num_bytes >= pes_packet_bytes_) {
458  num_bytes = pes_packet_bytes_;
459  parse_state_ = StartCode1;
460  }
461  pes_packet_bytes_ -= num_bytes;
462  read_ptr += num_bytes;
463  continue;
464  case ProgramEnd:
465  parse_state_ = StartCode1;
466  metadata_is_complete_ = true;
467  if (!DemuxNextPes(true)) {
468  return false;
469  }
470  if (!Flush()) {
471  return false;
472  }
473  // Reset.
474  dts_ = pts_ = 0;
475  parse_state_ = StartCode1;
476  prev_media_sample_data_.Reset();
477  current_program_id_++;
478  ecm_.clear();
479  index_data_.clear();
480  psm_data_.clear();
481  break;
482  default:
483  break;
484  }
485  ++read_ptr;
486  }
487  return true;
488 }
489 
490 bool WvmMediaParser::EmitLastSample(
491  uint32_t stream_id,
492  const std::shared_ptr<MediaSample>& new_sample) {
493  std::string key = base::UintToString(current_program_id_)
494  .append(":")
495  .append(base::UintToString(stream_id));
496  std::map<std::string, uint32_t>::iterator it =
497  program_demux_stream_map_.find(key);
498  if (it == program_demux_stream_map_.end())
499  return false;
500  return EmitSample(stream_id, (*it).second, new_sample, true);
501 }
502 
503 bool WvmMediaParser::EmitPendingSamples() {
504  // Emit queued samples which were built when not initialized.
505  while (!media_sample_queue_.empty()) {
506  DemuxStreamIdMediaSample& demux_stream_media_sample =
507  media_sample_queue_.front();
508  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
509  demux_stream_media_sample.demux_stream_id,
510  demux_stream_media_sample.media_sample,
511  false)) {
512  return false;
513  }
514  media_sample_queue_.pop_front();
515  }
516  return true;
517 }
518 
519 bool WvmMediaParser::Flush() {
520  // Flush the last audio and video sample for current program.
521  // Reset the streamID when successfully emitted.
522  if (prev_media_sample_data_.audio_sample != NULL) {
523  if (!EmitLastSample(prev_pes_stream_id_,
524  prev_media_sample_data_.audio_sample)) {
525  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
526  << prev_pes_stream_id_;
527  return false;
528  }
529  }
530  if (prev_media_sample_data_.video_sample != NULL) {
531  if (!EmitLastSample(prev_pes_stream_id_,
532  prev_media_sample_data_.video_sample)) {
533  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
534  << prev_pes_stream_id_;
535  return false;
536  }
537  }
538  return true;
539 }
540 
541 bool WvmMediaParser::ParseIndexEntry() {
542  // Do not parse index entry at the beginning of any track *after* the first
543  // track.
544  if (current_program_id_ > 0) {
545  return true;
546  }
547  uint32_t index_size = 0;
548  if (index_data_.size() < kIndexVersion4HeaderSize) {
549  return false;
550  }
551 
552  const uint8_t* read_ptr = index_data_.data();
553  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
554  index_data_.clear();
555  return false;
556  }
557  read_ptr += 4;
558 
559  uint32_t version = ntohlFromBuffer(read_ptr);
560  read_ptr += 4;
561  if (version == kVersion4) {
562  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
563  if (index_data_.size() < index_size) {
564  // We do not yet have the full index. Keep accumulating index data.
565  return true;
566  }
567  read_ptr += sizeof(uint32_t);
568 
569  // Index metadata
570  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
571  if (index_metadata_max_size < sizeof(uint8_t)) {
572  index_data_.clear();
573  return false;
574  }
575 
576  uint64_t track_duration = 0;
577  uint32_t trick_play_factor = 0;
578  uint32_t sampling_frequency = kDefaultSamplingFrequency;
579  uint32_t time_scale = kMpeg2ClockRate;
580  uint16_t video_width = 0;
581  uint16_t video_height = 0;
582  uint32_t pixel_width = 0;
583  uint32_t pixel_height = 0;
584  uint8_t nalu_length_size = kNaluLengthSize;
585  uint8_t num_channels = 0;
586  int audio_pes_stream_id = 0;
587  int video_pes_stream_id = 0;
588  bool has_video = false;
589  bool has_audio = false;
590  std::vector<uint8_t> audio_codec_config;
591  std::vector<uint8_t> video_codec_config;
592  uint8_t num_index_entries = *read_ptr;
593  ++read_ptr;
594  --index_metadata_max_size;
595 
596  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
597  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
598  return false;
599  }
600  uint8_t tag = *read_ptr;
601  ++read_ptr;
602  uint8_t type = *read_ptr;
603  ++read_ptr;
604  uint32_t length = ntohlFromBuffer(read_ptr);
605  read_ptr += sizeof(uint32_t);
606  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
607  if (index_metadata_max_size < length) {
608  return false;
609  }
610  int64_t value = 0;
611  Tag tagtype = Unset;
612  std::vector<uint8_t> binary_data;
613  switch (Type(type)) {
614  case Type_uint8:
615  if (length == sizeof(uint8_t)) {
616  tagtype = GetTag(tag, length, read_ptr, &value);
617  } else {
618  return false;
619  }
620  break;
621  case Type_int8:
622  if (length == sizeof(int8_t)) {
623  tagtype = GetTag(tag, length, read_ptr, &value);
624  } else {
625  return false;
626  }
627  break;
628  case Type_uint16:
629  if (length == sizeof(uint16_t)) {
630  tagtype = GetTag(tag, length, read_ptr, &value);
631  } else {
632  return false;
633  }
634  break;
635  case Type_int16:
636  if (length == sizeof(int16_t)) {
637  tagtype = GetTag(tag, length, read_ptr, &value);
638  } else {
639  return false;
640  }
641  break;
642  case Type_uint32:
643  if (length == sizeof(uint32_t)) {
644  tagtype = GetTag(tag, length, read_ptr, &value);
645  } else {
646  return false;
647  }
648  break;
649  case Type_int32:
650  if (length == sizeof(int32_t)) {
651  tagtype = GetTag(tag, length, read_ptr, &value);
652  } else {
653  return false;
654  }
655  break;
656  case Type_uint64:
657  if (length == sizeof(uint64_t)) {
658  tagtype = GetTag(tag, length, read_ptr, &value);
659  } else {
660  return false;
661  }
662  break;
663  case Type_int64:
664  if (length == sizeof(int64_t)) {
665  tagtype = GetTag(tag, length, read_ptr, &value);
666  } else {
667  return false;
668  }
669  break;
670  case Type_string:
671  case Type_BinaryData:
672  binary_data.assign(read_ptr, read_ptr + length);
673  tagtype = Tag(tag);
674  break;
675  default:
676  break;
677  }
678 
679  switch (tagtype) {
680  case TrackDuration:
681  track_duration = value;
682  break;
683  case TrackTrickPlayFactor:
684  trick_play_factor = value;
685  break;
686  case VideoStreamId:
687  video_pes_stream_id = value;
688  break;
689  case AudioStreamId:
690  audio_pes_stream_id = value;
691  break;
692  case VideoWidth:
693  video_width = (uint16_t)value;
694  break;
695  case VideoHeight:
696  video_height = (uint16_t)value;
697  break;
698  case AudioNumChannels:
699  num_channels = (uint8_t)value;
700  break;
701  case VideoType:
702  has_video = true;
703  break;
704  case AudioType:
705  has_audio = true;
706  break;
707  case VideoPixelWidth:
708  pixel_width = static_cast<uint32_t>(value);
709  break;
710  case VideoPixelHeight:
711  pixel_height = static_cast<uint32_t>(value);
712  break;
713  case Audio_EsDescriptor: {
714  ESDescriptor descriptor;
715  if (!descriptor.Parse(binary_data)) {
716  LOG(ERROR) <<
717  "Could not extract AudioSpecificConfig from ES_Descriptor";
718  return false;
719  }
720  audio_codec_config = descriptor.decoder_config_descriptor()
721  .decoder_specific_info_descriptor()
722  .data();
723  break;
724  }
725  case Audio_EC3SpecificData:
726  case Audio_DtsSpecificData:
727  case Audio_AC3SpecificData:
728  LOG(ERROR) << "Audio type not supported.";
729  return false;
730  case Video_AVCDecoderConfigurationRecord:
731  video_codec_config = binary_data;
732  break;
733  default:
734  break;
735  }
736 
737  read_ptr += length;
738  index_metadata_max_size -= length;
739  }
740  // End Index metadata
741  index_size = read_ptr - index_data_.data();
742 
743  if (has_video) {
744  stream_infos_.emplace_back(new VideoStreamInfo(
745  stream_id_count_, time_scale, track_duration, kCodecH264,
746  byte_to_unit_stream_converter_.stream_format(), std::string(),
747  video_codec_config.data(), video_codec_config.size(), video_width,
748  video_height, pixel_width, pixel_height,
749  0 /* transfer_characteristics */, trick_play_factor, nalu_length_size,
750  std::string(), decryption_key_source_ ? false : true));
751  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
752  base::UintToString(
753  video_pes_stream_id
754  ? video_pes_stream_id
755  : kDefaultVideoStreamId)] =
756  stream_id_count_++;
757  }
758  if (has_audio) {
759  const Codec audio_codec = kCodecAAC;
760  // TODO(beil): Pass in max and average bitrate in wvm container.
761  stream_infos_.emplace_back(new AudioStreamInfo(
762  stream_id_count_, time_scale, track_duration, audio_codec,
763  std::string(), audio_codec_config.data(), audio_codec_config.size(),
764  kAacSampleSizeBits, num_channels, sampling_frequency,
765  0 /* seek preroll */, 0 /* codec delay */, 0 /* max bitrate */,
766  0 /* avg bitrate */, std::string(),
767  decryption_key_source_ ? false : true));
768  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
769  base::UintToString(
770  audio_pes_stream_id
771  ? audio_pes_stream_id
772  : kDefaultAudioStreamId)] =
773  stream_id_count_++;
774  }
775  }
776 
777  index_program_id_++;
778  index_data_.clear();
779  return true;
780 }
781 
782 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
783  bool output_encrypted_sample = false;
784  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
785  // Decrypt crypto unit.
786  if (!content_decryptor_) {
787  output_encrypted_sample = true;
788  } else {
789  content_decryptor_->Crypt(&sample_data_[crypto_unit_start_pos_],
790  sample_data_.size() - crypto_unit_start_pos_,
791  &sample_data_[crypto_unit_start_pos_]);
792  }
793  }
794  // Demux media sample if we are at program end or if we are not at a
795  // continuation PES.
796  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
797  if (!sample_data_.empty()) {
798  if (!Output(output_encrypted_sample)) {
799  return false;
800  }
801  }
802  StartMediaSampleDemux();
803  }
804 
805  crypto_unit_start_pos_ = sample_data_.size();
806  return true;
807 }
808 
809 void WvmMediaParser::StartMediaSampleDemux() {
810  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
811  media_sample_ = MediaSample::CreateEmptyMediaSample();
812  media_sample_->set_dts(dts_);
813  media_sample_->set_pts(pts_);
814  media_sample_->set_is_key_frame(is_key_frame);
815 
816  sample_data_.clear();
817 }
818 
819 bool WvmMediaParser::Output(bool output_encrypted_sample) {
820  if (output_encrypted_sample) {
821  media_sample_->SetData(sample_data_.data(), sample_data_.size());
822  media_sample_->set_is_encrypted(true);
823  } else {
824  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
825  // Convert video stream to unit stream and get config.
826  std::vector<uint8_t> nal_unit_stream;
827  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
828  sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
829  LOG(ERROR) << "Could not convert h.264 byte stream sample";
830  return false;
831  }
832  media_sample_->SetData(nal_unit_stream.data(), nal_unit_stream.size());
833  if (!is_initialized_) {
834  // Set extra data for video stream from AVC Decoder Config Record.
835  // Also, set codec string from the AVC Decoder Config Record.
836  std::vector<uint8_t> decoder_config_record;
837  byte_to_unit_stream_converter_.GetDecoderConfigurationRecord(
838  &decoder_config_record);
839  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
840  if (stream_infos_[i]->stream_type() == kStreamVideo &&
841  stream_infos_[i]->codec_string().empty()) {
842  const std::vector<uint8_t>* stream_config;
843  if (stream_infos_[i]->codec_config().empty()) {
844  // Decoder config record not available for stream. Use the one
845  // computed from the first video stream.
846  stream_infos_[i]->set_codec_config(decoder_config_record);
847  stream_config = &decoder_config_record;
848  } else {
849  // Use stream-specific config record.
850  stream_config = &stream_infos_[i]->codec_config();
851  }
852  DCHECK(stream_config);
853 
854  VideoStreamInfo* video_stream_info =
855  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
856  AVCDecoderConfigurationRecord avc_config;
857  if (!avc_config.Parse(*stream_config)) {
858  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
859  "Using computed configuration record instead.";
860  video_stream_info->set_codec_config(decoder_config_record);
861  if (!avc_config.Parse(decoder_config_record)) {
862  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
863  return false;
864  }
865  }
866  const FourCC codec_fourcc =
867  byte_to_unit_stream_converter_.stream_format() ==
868  H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
869  ? FOURCC_avc3
870  : FOURCC_avc1;
871  video_stream_info->set_codec_string(
872  avc_config.GetCodecString(codec_fourcc));
873 
874  if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
875  avc_config.pixel_height() !=
876  video_stream_info->pixel_height()) {
877  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
878  video_stream_info->pixel_height() != 0)
879  << "Pixel aspect ratio in WVM metadata ("
880  << video_stream_info->pixel_width() << ","
881  << video_stream_info->pixel_height()
882  << ") does not match with SAR in "
883  "AVCDecoderConfigurationRecord ("
884  << avc_config.pixel_width() << ","
885  << avc_config.pixel_height()
886  << "). Use AVCDecoderConfigurationRecord.";
887  video_stream_info->set_pixel_width(avc_config.pixel_width());
888  video_stream_info->set_pixel_height(avc_config.pixel_height());
889  }
890  if (avc_config.coded_width() != video_stream_info->width() ||
891  avc_config.coded_height() != video_stream_info->height()) {
892  LOG(WARNING) << "Resolution in WVM metadata ("
893  << video_stream_info->width() << ","
894  << video_stream_info->height()
895  << ") does not match with resolution in "
896  "AVCDecoderConfigurationRecord ("
897  << avc_config.coded_width() << ","
898  << avc_config.coded_height()
899  << "). Use AVCDecoderConfigurationRecord.";
900  video_stream_info->set_width(avc_config.coded_width());
901  video_stream_info->set_height(avc_config.coded_height());
902  }
903  }
904  }
905  }
906  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
907  kPesStreamIdAudio) {
908  // Set data on the audio stream.
909  mp2t::AdtsHeader adts_header;
910  const uint8_t* frame_ptr = sample_data_.data();
911  if (!adts_header.Parse(frame_ptr, sample_data_.size())) {
912  LOG(ERROR) << "Could not parse ADTS header";
913  return false;
914  }
915  media_sample_->SetData(
916  frame_ptr + adts_header.GetHeaderSize(),
917  adts_header.GetFrameSize() - adts_header.GetHeaderSize());
918  if (!is_initialized_) {
919  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
920  if (stream_infos_[i]->stream_type() == kStreamAudio &&
921  stream_infos_[i]->codec_string().empty()) {
922  AudioStreamInfo* audio_stream_info =
923  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
924  if (audio_stream_info->codec_config().empty()) {
925  // Set AudioStreamInfo fields using information from the ADTS
926  // header.
927  audio_stream_info->set_sampling_frequency(
928  adts_header.GetSamplingFrequency());
929  std::vector<uint8_t> audio_specific_config;
930  adts_header.GetAudioSpecificConfig(&audio_specific_config);
931  audio_stream_info->set_codec_config(audio_specific_config);
932  audio_stream_info->set_codec_string(
933  AudioStreamInfo::GetCodecString(
934  kCodecAAC, adts_header.GetObjectType()));
935  } else {
936  // Set AudioStreamInfo fields using information from the
937  // AACAudioSpecificConfig record.
938  AACAudioSpecificConfig aac_config;
939  if (!aac_config.Parse(stream_infos_[i]->codec_config())) {
940  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
941  return false;
942  }
943  audio_stream_info->set_sampling_frequency(
944  aac_config.GetSamplesPerSecond());
945  audio_stream_info->set_codec_string(
946  AudioStreamInfo::GetCodecString(
947  kCodecAAC, aac_config.GetAudioObjectType()));
948  }
949  }
950  }
951  }
952  }
953  }
954 
955  if (!is_initialized_) {
956  bool all_streams_have_config = true;
957  // Check if all collected stream infos have codec_config set.
958  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
959  if (stream_infos_[i]->codec_string().empty()) {
960  all_streams_have_config = false;
961  break;
962  }
963  }
964  if (all_streams_have_config) {
965  init_cb_.Run(stream_infos_);
966  is_initialized_ = true;
967  }
968  }
969 
970  DCHECK_GT(media_sample_->data_size(), 0UL);
971  std::string key = base::UintToString(current_program_id_).append(":")
972  .append(base::UintToString(prev_pes_stream_id_));
973  std::map<std::string, uint32_t>::iterator it =
974  program_demux_stream_map_.find(key);
975  if (it == program_demux_stream_map_.end()) {
976  // TODO(ramjic): Log error message here and in other error cases through
977  // this method.
978  return false;
979  }
980  DemuxStreamIdMediaSample demux_stream_media_sample;
981  demux_stream_media_sample.parsed_audio_or_video_stream_id =
982  prev_pes_stream_id_;
983  demux_stream_media_sample.demux_stream_id = (*it).second;
984  demux_stream_media_sample.media_sample = media_sample_;
985  // Check if sample can be emitted.
986  if (!is_initialized_) {
987  media_sample_queue_.push_back(demux_stream_media_sample);
988  } else {
989  // flush the sample queue and emit all queued samples.
990  while (!media_sample_queue_.empty()) {
991  if (!EmitPendingSamples())
992  return false;
993  }
994  // Emit current sample.
995  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
996  return false;
997  }
998  return true;
999 }
1000 
1001 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
1002  uint32_t stream_id,
1003  const std::shared_ptr<MediaSample>& new_sample,
1004  bool isLastSample) {
1005  DCHECK(new_sample);
1006  if (isLastSample) {
1007  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1008  kPesStreamIdVideo) {
1009  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1010  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1011  kPesStreamIdAudio) {
1012  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1013  }
1014  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1015  LOG(ERROR) << "Failed to process the last sample.";
1016  return false;
1017  }
1018  return true;
1019  }
1020 
1021  // Cannot emit current sample. Compute duration first and then,
1022  // emit previous sample.
1023  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1024  kPesStreamIdVideo) {
1025  if (prev_media_sample_data_.video_sample == NULL) {
1026  prev_media_sample_data_.video_sample = new_sample;
1027  prev_media_sample_data_.video_stream_id = stream_id;
1028  return true;
1029  }
1030  prev_media_sample_data_.video_sample->set_duration(
1031  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1032  prev_media_sample_data_.video_sample_duration =
1033  prev_media_sample_data_.video_sample->duration();
1034  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1035  prev_media_sample_data_.video_sample)) {
1036  LOG(ERROR) << "Failed to process the video sample.";
1037  return false;
1038  }
1039  prev_media_sample_data_.video_sample = new_sample;
1040  prev_media_sample_data_.video_stream_id = stream_id;
1041  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1042  kPesStreamIdAudio) {
1043  if (prev_media_sample_data_.audio_sample == NULL) {
1044  prev_media_sample_data_.audio_sample = new_sample;
1045  prev_media_sample_data_.audio_stream_id = stream_id;
1046  return true;
1047  }
1048  prev_media_sample_data_.audio_sample->set_duration(
1049  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1050  prev_media_sample_data_.audio_sample_duration =
1051  prev_media_sample_data_.audio_sample->duration();
1052  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1053  prev_media_sample_data_.audio_sample)) {
1054  LOG(ERROR) << "Failed to process the audio sample.";
1055  return false;
1056  }
1057  prev_media_sample_data_.audio_sample = new_sample;
1058  prev_media_sample_data_.audio_stream_id = stream_id;
1059  }
1060  return true;
1061 }
1062 
1063 bool WvmMediaParser::GetAssetKey(const uint8_t* asset_id,
1064  EncryptionKey* encryption_key) {
1065  DCHECK(decryption_key_source_);
1066  Status status = decryption_key_source_->FetchKeys(
1067  EmeInitDataType::WIDEVINE_CLASSIC,
1068  std::vector<uint8_t>(asset_id, asset_id + sizeof(uint32_t)));
1069  if (!status.ok()) {
1070  LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1071  << ntohlFromBuffer(asset_id) << ", error = " << status;
1072  return false;
1073  }
1074 
1075  const char kHdStreamLabel[] = "HD";
1076  status = decryption_key_source_->GetKey(kHdStreamLabel, encryption_key);
1077  if (!status.ok()) {
1078  LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1079  << ntohlFromBuffer(asset_id) << ", error = " << status;
1080  return false;
1081  }
1082 
1083  return true;
1084 }
1085 
1086 bool WvmMediaParser::ProcessEcm() {
1087  // An error will be returned later if the samples need to be decrypted.
1088  if (!decryption_key_source_)
1089  return true;
1090 
1091  if (current_program_id_ > 0) {
1092  return true;
1093  }
1094  if (ecm_.size() != kEcmSizeBytes) {
1095  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1096  << ", expected size = " << kEcmSizeBytes;
1097  return false;
1098  }
1099  const uint8_t* ecm_data = ecm_.data();
1100  DCHECK(ecm_data);
1101  ecm_data += sizeof(uint32_t); // old version field - skip.
1102  ecm_data += sizeof(uint32_t); // clear lead - skip.
1103  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1104  EncryptionKey encryption_key;
1105  if (!GetAssetKey(ecm_data, &encryption_key)) {
1106  return false;
1107  }
1108  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1109  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1110  << " for AssetID = " << ntohlFromBuffer(ecm_data)
1111  << " is less than minimum asset key size.";
1112  return false;
1113  }
1114  ecm_data += sizeof(uint32_t); // asset_id.
1115  // Legacy WVM content may have asset keys > 16 bytes.
1116  // Use only the first 16 bytes of the asset key to get
1117  // the content key.
1118  std::vector<uint8_t> asset_key(
1119  encryption_key.key.begin(),
1120  encryption_key.key.begin() + kAssetKeySizeBytes);
1121  // WVM format always uses all zero IV.
1122  std::vector<uint8_t> zero_iv(kInitializationVectorSizeBytes, 0);
1123  AesCbcDecryptor asset_decryptor(kCtsPadding, AesCryptor::kUseConstantIv);
1124  if (!asset_decryptor.InitializeWithIv(asset_key, zero_iv)) {
1125  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1126  return false;
1127  }
1128 
1129  const size_t content_key_buffer_size =
1130  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1131  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1132  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1133  CHECK(asset_decryptor.Crypt(ecm_data, content_key_buffer_size,
1134  content_key_buffer.data()));
1135 
1136  std::vector<uint8_t> decrypted_content_key_vec(
1137  content_key_buffer.begin() + 4,
1138  content_key_buffer.begin() + 20);
1139  std::unique_ptr<AesCbcDecryptor> content_decryptor(
1140  new AesCbcDecryptor(kCtsPadding, AesCryptor::kUseConstantIv));
1141  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec,
1142  zero_iv)) {
1143  LOG(ERROR) << "Failed to initialize content decryptor.";
1144  return false;
1145  }
1146 
1147  content_decryptor_ = std::move(content_decryptor);
1148  return true;
1149 }
1150 
1151 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1152  demux_stream_id(0),
1153  parsed_audio_or_video_stream_id(0) {}
1154 
1155 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1156 
1157 PrevSampleData::PrevSampleData() {
1158  Reset();
1159 }
1160 
1161 PrevSampleData::~PrevSampleData() {}
1162 
1163 void PrevSampleData::Reset() {
1164  audio_sample = NULL;
1165  video_sample = NULL;
1166  audio_stream_id = 0;
1167  video_stream_id = 0;
1168  audio_sample_duration = 0;
1169  video_sample_duration = 0;
1170 }
1171 
1172 } // namespace wvm
1173 } // namespace media
1174 } // namespace shaka
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:51
base::Callback< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
Definition: media_parser.h:53
base::Callback< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
Definition: media_parser.h:44
base::Callback< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
Definition: media_parser.h:35
All the methods that are virtual are virtual for mocking.