DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/strings/string_number_conversions.h"
12 #include "packager/media/base/aes_encryptor.h"
13 #include "packager/media/base/audio_stream_info.h"
14 #include "packager/media/base/key_source.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/base/status.h"
17 #include "packager/media/base/video_stream_info.h"
18 #include "packager/media/filters/avc_decoder_configuration.h"
19 #include "packager/media/formats/mp2t/adts_header.h"
20 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
21 #include "packager/media/formats/mp4/es_descriptor.h"
22 
23 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
24  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
25  && (x != 0xFF))
26 
27 namespace {
28 const uint32_t kMpeg2ClockRate = 90000;
29 const uint32_t kPesOptPts = 0x80;
30 const uint32_t kPesOptDts = 0x40;
31 const uint32_t kPesOptAlign = 0x04;
32 const uint32_t kPsmStreamId = 0xBC;
33 const uint32_t kPaddingStreamId = 0xBE;
34 const uint32_t kIndexMagic = 0x49444d69;
35 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
36 const uint32_t kIndexVersion4HeaderSize = 12;
37 const uint32_t kEcmStreamId = 0xF0;
38 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
39 const uint32_t kScramblingBitsMask = 0x30;
40 const uint32_t kStartCode1 = 0x00;
41 const uint32_t kStartCode2 = 0x00;
42 const uint32_t kStartCode3 = 0x01;
43 const uint32_t kStartCode4Pack = 0xBA;
44 const uint32_t kStartCode4System = 0xBB;
45 const uint32_t kStartCode4ProgramEnd = 0xB9;
46 const uint32_t kPesStreamIdVideoMask = 0xF0;
47 const uint32_t kPesStreamIdVideo = 0xE0;
48 const uint32_t kPesStreamIdAudioMask = 0xE0;
49 const uint32_t kPesStreamIdAudio = 0xC0;
50 const uint32_t kVersion4 = 4;
51 const int kAdtsHeaderMinSize = 7;
52 const uint8_t kAacSampleSizeBits = 16;
53 // Applies to all video streams.
54 const uint8_t kNaluLengthSize = 4; // unit is bytes.
55 // Placeholder sampling frequency for all audio streams, which
56 // will be overwritten after filter parsing.
57 const uint32_t kDefaultSamplingFrequency = 100;
58 const uint16_t kEcmSizeBytes = 80;
59 const uint32_t kInitializationVectorSizeBytes = 16;
60 // ECM fields for processing.
61 const uint32_t kEcmContentKeySizeBytes = 16;
62 const uint32_t kEcmDCPFlagsSizeBytes = 3;
63 const uint32_t kEcmCCIFlagsSizeBytes = 1;
64 const uint32_t kEcmFlagsSizeBytes =
65  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
66 const uint32_t kEcmPaddingSizeBytes = 12;
67 const uint32_t kAssetKeySizeBytes = 16;
68 // Default audio and video PES stream IDs.
69 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
70 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
71 
72 enum Type {
73  Type_void = 0,
74  Type_uint8 = 1,
75  Type_int8 = 2,
76  Type_uint16 = 3,
77  Type_int16 = 4,
78  Type_uint32 = 5,
79  Type_int32 = 6,
80  Type_uint64 = 7,
81  Type_int64 = 8,
82  Type_string = 9,
83  Type_BinaryData = 10
84 };
85 } // namespace
86 
87 namespace edash_packager {
88 namespace media {
89 namespace wvm {
90 
91 WvmMediaParser::WvmMediaParser()
92  : is_initialized_(false),
93  parse_state_(StartCode1),
94  is_psm_needed_(true),
95  skip_bytes_(0),
96  metadata_is_complete_(false),
97  current_program_id_(0),
98  pes_stream_id_(0),
99  prev_pes_stream_id_(0),
100  pes_packet_bytes_(0),
101  pes_flags_1_(0),
102  pes_flags_2_(0),
103  prev_pes_flags_1_(0),
104  pes_header_data_bytes_(0),
105  timestamp_(0),
106  pts_(0),
107  dts_(0),
108  index_program_id_(0),
109  media_sample_(NULL),
110  crypto_unit_start_pos_(0),
111  stream_id_count_(0),
112  decryption_key_source_(NULL) {
113 }
114 
115 WvmMediaParser::~WvmMediaParser() {}
116 
117 void WvmMediaParser::Init(const InitCB& init_cb,
118  const NewSampleCB& new_sample_cb,
119  KeySource* decryption_key_source) {
120  DCHECK(!is_initialized_);
121  DCHECK(!init_cb.is_null());
122  DCHECK(!new_sample_cb.is_null());
123  decryption_key_source_ = decryption_key_source;
124  init_cb_ = init_cb;
125  new_sample_cb_ = new_sample_cb;
126 }
127 
128 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
129  uint32_t num_bytes, prev_size;
130  num_bytes = prev_size = 0;
131  const uint8_t* read_ptr = buf;
132  const uint8_t* end = read_ptr + size;
133 
134  while (read_ptr < end) {
135  switch (parse_state_) {
136  case StartCode1:
137  if (*read_ptr == kStartCode1) {
138  parse_state_ = StartCode2;
139  }
140  break;
141  case StartCode2:
142  if (*read_ptr == kStartCode2) {
143  parse_state_ = StartCode3;
144  } else {
145  parse_state_ = StartCode1;
146  }
147  break;
148  case StartCode3:
149  if (*read_ptr == kStartCode3) {
150  parse_state_ = StartCode4;
151  } else {
152  parse_state_ = StartCode1;
153  }
154  break;
155  case StartCode4:
156  switch (*read_ptr) {
157  case kStartCode4Pack:
158  parse_state_ = PackHeader1;
159  break;
160  case kStartCode4System:
161  parse_state_ = SystemHeader1;
162  break;
163  case kStartCode4ProgramEnd:
164  parse_state_ = ProgramEnd;
165  continue;
166  default:
167  parse_state_ = PesStreamId;
168  continue;
169  }
170  break;
171  case PackHeader1:
172  parse_state_ = PackHeader2;
173  break;
174  case PackHeader2:
175  parse_state_ = PackHeader3;
176  break;
177  case PackHeader3:
178  parse_state_ = PackHeader4;
179  break;
180  case PackHeader4:
181  parse_state_ = PackHeader5;
182  break;
183  case PackHeader5:
184  parse_state_ = PackHeader6;
185  break;
186  case PackHeader6:
187  parse_state_ = PackHeader7;
188  break;
189  case PackHeader7:
190  parse_state_ = PackHeader8;
191  break;
192  case PackHeader8:
193  parse_state_ = PackHeader9;
194  break;
195  case PackHeader9:
196  parse_state_ = PackHeader10;
197  break;
198  case PackHeader10:
199  skip_bytes_ = *read_ptr & 0x07;
200  parse_state_ = PackHeaderStuffingSkip;
201  break;
202  case SystemHeader1:
203  skip_bytes_ = *read_ptr;
204  skip_bytes_ <<= 8;
205  parse_state_ = SystemHeader2;
206  break;
207  case SystemHeader2:
208  skip_bytes_ |= *read_ptr;
209  parse_state_ = SystemHeaderSkip;
210  break;
211  case PackHeaderStuffingSkip:
212  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
213  read_ptr += skip_bytes_;
214  skip_bytes_ = 0;
215  parse_state_ = StartCode1;
216  } else {
217  skip_bytes_ -= (end - read_ptr);
218  read_ptr = end;
219  }
220  continue;
221  case SystemHeaderSkip:
222  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
223  read_ptr += skip_bytes_;
224  skip_bytes_ = 0;
225  parse_state_ = StartCode1;
226  } else {
227  uint32_t remaining_size = end - read_ptr;
228  skip_bytes_ -= remaining_size;
229  read_ptr = end;
230  }
231  continue;
232  case PesStreamId:
233  pes_stream_id_ = *read_ptr;
234  if (!metadata_is_complete_ &&
235  (pes_stream_id_ != kPsmStreamId) &&
236  (pes_stream_id_ != kIndexStreamId) &&
237  (pes_stream_id_ != kEcmStreamId) &&
238  (pes_stream_id_ != kV2MetadataStreamId) &&
239  (pes_stream_id_ != kPaddingStreamId)) {
240  metadata_is_complete_ = true;
241  }
242  parse_state_ = PesPacketLength1;
243  break;
244  case PesPacketLength1:
245  pes_packet_bytes_ = *read_ptr;
246  pes_packet_bytes_ <<= 8;
247  parse_state_ = PesPacketLength2;
248  break;
249  case PesPacketLength2:
250  pes_packet_bytes_ |= *read_ptr;
251  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
252  parse_state_ = PesExtension1;
253  } else {
254  pes_flags_1_ = pes_flags_2_ = 0;
255  pes_header_data_bytes_ = 0;
256  parse_state_ = PesPayload;
257  }
258  break;
259  case PesExtension1:
260  prev_pes_flags_1_ = pes_flags_1_;
261  pes_flags_1_ = *read_ptr;
262  --pes_packet_bytes_;
263  parse_state_ = PesExtension2;
264  break;
265  case PesExtension2:
266  pes_flags_2_ = *read_ptr;
267  --pes_packet_bytes_;
268  parse_state_ = PesExtension3;
269  break;
270  case PesExtension3:
271  pes_header_data_bytes_ = *read_ptr;
272  --pes_packet_bytes_;
273  if (pes_flags_2_ & kPesOptPts) {
274  parse_state_ = Pts1;
275  } else {
276  parse_state_ = PesHeaderData;
277  }
278  break;
279  case Pts1:
280  timestamp_ = (*read_ptr & 0x0E);
281  --pes_header_data_bytes_;
282  --pes_packet_bytes_;
283  parse_state_ = Pts2;
284  break;
285  case Pts2:
286  timestamp_ <<= 7;
287  timestamp_ |= *read_ptr;
288  --pes_header_data_bytes_;
289  --pes_packet_bytes_;
290  parse_state_ = Pts3;
291  break;
292  case Pts3:
293  timestamp_ <<= 7;
294  timestamp_ |= *read_ptr >> 1;
295  --pes_header_data_bytes_;
296  --pes_packet_bytes_;
297  parse_state_ = Pts4;
298  break;
299  case Pts4:
300  timestamp_ <<= 8;
301  timestamp_ |= *read_ptr;
302  --pes_header_data_bytes_;
303  --pes_packet_bytes_;
304  parse_state_ = Pts5;
305  break;
306  case Pts5:
307  timestamp_ <<= 7;
308  timestamp_ |= *read_ptr >> 1;
309  pts_ = timestamp_;
310  --pes_header_data_bytes_;
311  --pes_packet_bytes_;
312  if (pes_flags_2_ & kPesOptDts) {
313  parse_state_ = Dts1;
314  } else {
315  dts_ = pts_;
316  parse_state_ = PesHeaderData;
317  }
318  break;
319  case Dts1:
320  timestamp_ = (*read_ptr & 0x0E);
321  --pes_header_data_bytes_;
322  --pes_packet_bytes_;
323  parse_state_ = Dts2;
324  break;
325  case Dts2:
326  timestamp_ <<= 7;
327  timestamp_ |= *read_ptr;
328  --pes_header_data_bytes_;
329  --pes_packet_bytes_;
330  parse_state_ = Dts3;
331  break;
332  case Dts3:
333  timestamp_ <<= 7;
334  timestamp_ |= *read_ptr >> 1;
335  --pes_header_data_bytes_;
336  --pes_packet_bytes_;
337  parse_state_ = Dts4;
338  break;
339  case Dts4:
340  timestamp_ <<= 8;
341  timestamp_ |= *read_ptr;
342  --pes_header_data_bytes_;
343  --pes_packet_bytes_;
344  parse_state_ = Dts5;
345  break;
346  case Dts5:
347  timestamp_ <<= 7;
348  timestamp_ |= *read_ptr >> 1;
349  dts_ = timestamp_;
350  --pes_header_data_bytes_;
351  --pes_packet_bytes_;
352  parse_state_ = PesHeaderData;
353  break;
354  case PesHeaderData:
355  num_bytes = end - read_ptr;
356  if (num_bytes >= pes_header_data_bytes_) {
357  num_bytes = pes_header_data_bytes_;
358  parse_state_ = PesPayload;
359  }
360  pes_header_data_bytes_ -= num_bytes;
361  pes_packet_bytes_ -= num_bytes;
362  read_ptr += num_bytes;
363  continue;
364  case PesPayload:
365  switch (pes_stream_id_) {
366  case kPsmStreamId:
367  psm_data_.clear();
368  parse_state_ = PsmPayload;
369  continue;
370  case kPaddingStreamId:
371  parse_state_ = Padding;
372  continue;
373  case kEcmStreamId:
374  ecm_.clear();
375  parse_state_ = EcmPayload;
376  continue;
377  case kIndexStreamId:
378  parse_state_ = IndexPayload;
379  continue;
380  default:
381  if (!DemuxNextPes(false)) {
382  return false;
383  }
384  parse_state_ = EsPayload;
385  }
386  continue;
387  case PsmPayload:
388  num_bytes = end - read_ptr;
389  if (num_bytes >= pes_packet_bytes_) {
390  num_bytes = pes_packet_bytes_;
391  parse_state_ = StartCode1;
392  }
393  if (num_bytes > 0) {
394  pes_packet_bytes_ -= num_bytes;
395  prev_size = psm_data_.size();
396  psm_data_.resize(prev_size + num_bytes);
397  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
398  }
399  read_ptr += num_bytes;
400  continue;
401  case EcmPayload:
402  num_bytes = end - read_ptr;
403  if (num_bytes >= pes_packet_bytes_) {
404  num_bytes = pes_packet_bytes_;
405  parse_state_ = StartCode1;
406  }
407  if (num_bytes > 0) {
408  pes_packet_bytes_ -= num_bytes;
409  prev_size = ecm_.size();
410  ecm_.resize(prev_size + num_bytes);
411  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
412  }
413  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
414  if (!ProcessEcm()) {
415  return(false);
416  }
417  }
418  read_ptr += num_bytes;
419  continue;
420  case IndexPayload:
421  num_bytes = end - read_ptr;
422  if (num_bytes >= pes_packet_bytes_) {
423  num_bytes = pes_packet_bytes_;
424  parse_state_ = StartCode1;
425  }
426  if (num_bytes > 0) {
427  pes_packet_bytes_ -= num_bytes;
428  prev_size = index_data_.size();
429  index_data_.resize(prev_size + num_bytes);
430  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
431  }
432  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
433  if (!metadata_is_complete_) {
434  if (!ParseIndexEntry()) {
435  return false;
436  }
437  }
438  }
439  read_ptr += num_bytes;
440  continue;
441  case EsPayload:
442  num_bytes = end - read_ptr;
443  if (num_bytes >= pes_packet_bytes_) {
444  num_bytes = pes_packet_bytes_;
445  parse_state_ = StartCode1;
446  }
447  pes_packet_bytes_ -= num_bytes;
448  if (pes_stream_id_ != kV2MetadataStreamId) {
449  sample_data_.resize(sample_data_.size() + num_bytes);
450  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
451  num_bytes);
452  }
453  prev_pes_stream_id_ = pes_stream_id_;
454  read_ptr += num_bytes;
455  continue;
456  case Padding:
457  num_bytes = end - read_ptr;
458  if (num_bytes >= pes_packet_bytes_) {
459  num_bytes = pes_packet_bytes_;
460  parse_state_ = StartCode1;
461  }
462  pes_packet_bytes_ -= num_bytes;
463  read_ptr += num_bytes;
464  continue;
465  case ProgramEnd:
466  parse_state_ = StartCode1;
467  metadata_is_complete_ = true;
468  if (!DemuxNextPes(true)) {
469  return false;
470  }
471  if (!Flush()) {
472  return false;
473  }
474  // Reset.
475  dts_ = pts_ = 0;
476  parse_state_ = StartCode1;
477  prev_media_sample_data_.Reset();
478  current_program_id_++;
479  ecm_.clear();
480  index_data_.clear();
481  psm_data_.clear();
482  break;
483  default:
484  break;
485  }
486  ++read_ptr;
487  }
488  return true;
489 }
490 
491 bool WvmMediaParser::EmitLastSample(uint32_t stream_id,
492  scoped_refptr<MediaSample>& new_sample) {
493  std::string key = base::UintToString(current_program_id_)
494  .append(":")
495  .append(base::UintToString(stream_id));
496  std::map<std::string, uint32_t>::iterator it =
497  program_demux_stream_map_.find(key);
498  if (it == program_demux_stream_map_.end())
499  return false;
500  return EmitSample(stream_id, (*it).second, new_sample, true);
501 }
502 
503 bool WvmMediaParser::EmitPendingSamples() {
504  // Emit queued samples which were built when not initialized.
505  while (!media_sample_queue_.empty()) {
506  DemuxStreamIdMediaSample& demux_stream_media_sample =
507  media_sample_queue_.front();
508  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
509  demux_stream_media_sample.demux_stream_id,
510  demux_stream_media_sample.media_sample,
511  false)) {
512  return false;
513  }
514  media_sample_queue_.pop_front();
515  }
516  return true;
517 }
518 
519 bool WvmMediaParser::Flush() {
520  // Flush the last audio and video sample for current program.
521  // Reset the streamID when successfully emitted.
522  if (prev_media_sample_data_.audio_sample != NULL) {
523  if (!EmitLastSample(prev_pes_stream_id_,
524  prev_media_sample_data_.audio_sample)) {
525  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
526  << prev_pes_stream_id_;
527  return false;
528  }
529  }
530  if (prev_media_sample_data_.video_sample != NULL) {
531  if (!EmitLastSample(prev_pes_stream_id_,
532  prev_media_sample_data_.video_sample)) {
533  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
534  << prev_pes_stream_id_;
535  return false;
536  }
537  }
538  return true;
539 }
540 
541 bool WvmMediaParser::ParseIndexEntry() {
542  // Do not parse index entry at the beginning of any track *after* the first
543  // track.
544  if (current_program_id_ > 0) {
545  return true;
546  }
547  uint32_t index_size = 0;
548  if (index_data_.size() < kIndexVersion4HeaderSize) {
549  return false;
550  }
551 
552  const uint8_t* read_ptr = index_data_.data();
553  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
554  index_data_.clear();
555  return false;
556  }
557  read_ptr += 4;
558 
559  uint32_t version = ntohlFromBuffer(read_ptr);
560  read_ptr += 4;
561  if (version == kVersion4) {
562  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
563  if (index_data_.size() < index_size) {
564  // We do not yet have the full index. Keep accumulating index data.
565  return true;
566  }
567  read_ptr += sizeof(uint32_t);
568 
569  // Index metadata
570  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
571  if (index_metadata_max_size < sizeof(uint8_t)) {
572  index_data_.clear();
573  return false;
574  }
575 
576  uint64_t track_duration = 0;
577  int16_t trick_play_rate = 0;
578  uint32_t sampling_frequency = kDefaultSamplingFrequency;
579  uint32_t time_scale = kMpeg2ClockRate;
580  uint16_t video_width = 0;
581  uint16_t video_height = 0;
582  uint32_t pixel_width = 0;
583  uint32_t pixel_height = 0;
584  uint8_t nalu_length_size = kNaluLengthSize;
585  uint8_t num_channels = 0;
586  int audio_pes_stream_id = 0;
587  int video_pes_stream_id = 0;
588  bool has_video = false;
589  bool has_audio = false;
590  std::vector<uint8_t> audio_codec_config;
591  std::vector<uint8_t> video_codec_config;
592  uint8_t num_index_entries = *read_ptr;
593  ++read_ptr;
594  --index_metadata_max_size;
595 
596  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
597  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
598  return false;
599  }
600  uint8_t tag = *read_ptr;
601  ++read_ptr;
602  uint8_t type = *read_ptr;
603  ++read_ptr;
604  uint32_t length = ntohlFromBuffer(read_ptr);
605  read_ptr += sizeof(uint32_t);
606  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
607  if (index_metadata_max_size < length) {
608  return false;
609  }
610  int64_t value = 0;
611  Tag tagtype = Unset;
612  std::vector<uint8_t> binary_data;
613  switch (Type(type)) {
614  case Type_uint8:
615  if (length == sizeof(uint8_t)) {
616  tagtype = GetTag(tag, length, read_ptr, &value);
617  } else {
618  return false;
619  }
620  break;
621  case Type_int8:
622  if (length == sizeof(int8_t)) {
623  tagtype = GetTag(tag, length, read_ptr, &value);
624  } else {
625  return false;
626  }
627  break;
628  case Type_uint16:
629  if (length == sizeof(uint16_t)) {
630  tagtype = GetTag(tag, length, read_ptr, &value);
631  } else {
632  return false;
633  }
634  break;
635  case Type_int16:
636  if (length == sizeof(int16_t)) {
637  tagtype = GetTag(tag, length, read_ptr, &value);
638  } else {
639  return false;
640  }
641  break;
642  case Type_uint32:
643  if (length == sizeof(uint32_t)) {
644  tagtype = GetTag(tag, length, read_ptr, &value);
645  } else {
646  return false;
647  }
648  break;
649  case Type_int32:
650  if (length == sizeof(int32_t)) {
651  tagtype = GetTag(tag, length, read_ptr, &value);
652  } else {
653  return false;
654  }
655  break;
656  case Type_uint64:
657  if (length == sizeof(uint64_t)) {
658  tagtype = GetTag(tag, length, read_ptr, &value);
659  } else {
660  return false;
661  }
662  break;
663  case Type_int64:
664  if (length == sizeof(int64_t)) {
665  tagtype = GetTag(tag, length, read_ptr, &value);
666  } else {
667  return false;
668  }
669  break;
670  case Type_string:
671  case Type_BinaryData:
672  binary_data.assign(read_ptr, read_ptr + length);
673  tagtype = Tag(tag);
674  break;
675  default:
676  break;
677  }
678 
679  switch (tagtype) {
680  case TrackDuration:
681  track_duration = value;
682  break;
683  case TrackTrickPlayRate:
684  trick_play_rate = value;
685  break;
686  case VideoStreamId:
687  video_pes_stream_id = value;
688  break;
689  case AudioStreamId:
690  audio_pes_stream_id = value;
691  break;
692  case VideoWidth:
693  video_width = (uint16_t)value;
694  break;
695  case VideoHeight:
696  video_height = (uint16_t)value;
697  break;
698  case AudioNumChannels:
699  num_channels = (uint8_t)value;
700  break;
701  case VideoType:
702  has_video = true;
703  break;
704  case AudioType:
705  has_audio = true;
706  break;
707  case VideoPixelWidth:
708  pixel_width = static_cast<uint32_t>(value);
709  break;
710  case VideoPixelHeight:
711  pixel_height = static_cast<uint32_t>(value);
712  break;
713  case Audio_EsDescriptor: {
714  mp4::ESDescriptor descriptor;
715  if (!descriptor.Parse(binary_data)) {
716  LOG(ERROR) <<
717  "Could not extract AudioSpecificConfig from ES_Descriptor";
718  return false;
719  }
720  audio_codec_config = descriptor.decoder_specific_info();
721  break;
722  }
723  case Audio_EC3SpecificData:
724  case Audio_DtsSpecificData:
725  case Audio_AC3SpecificData:
726  LOG(ERROR) << "Audio type not supported.";
727  return false;
728  case AVCDecoderConfigurationRecord:
729  video_codec_config = binary_data;
730  break;
731  default:
732  break;
733  }
734 
735  read_ptr += length;
736  index_metadata_max_size -= length;
737  }
738  // End Index metadata
739  index_size = read_ptr - index_data_.data();
740 
741  if (has_video) {
742  VideoCodec video_codec = kCodecH264;
743  stream_infos_.push_back(new VideoStreamInfo(
744  stream_id_count_, time_scale, track_duration, video_codec,
745  std::string(), std::string(), video_width, video_height, pixel_width,
746  pixel_height, trick_play_rate, nalu_length_size,
747  video_codec_config.data(), video_codec_config.size(), true));
748  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
749  base::UintToString(video_pes_stream_id ?
750  video_pes_stream_id :
751  kDefaultVideoStreamId)] =
752  stream_id_count_++;
753  }
754  if (has_audio) {
755  AudioCodec audio_codec = kCodecAAC;
756  // TODO(beil): Pass in max and average bitrate in wvm container.
757  stream_infos_.push_back(new AudioStreamInfo(
758  stream_id_count_, time_scale, track_duration, audio_codec,
759  std::string(), std::string(), kAacSampleSizeBits, num_channels,
760  sampling_frequency, 0, 0, audio_codec_config.data(),
761  audio_codec_config.size(), true));
762  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
763  base::UintToString(audio_pes_stream_id ?
764  audio_pes_stream_id :
765  kDefaultAudioStreamId)] =
766  stream_id_count_++;
767  }
768  }
769 
770  index_program_id_++;
771  index_data_.clear();
772  return true;
773 }
774 
775 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
776  bool output_encrypted_sample = false;
777  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
778  // Decrypt crypto unit.
779  if (!content_decryptor_) {
780  output_encrypted_sample = true;
781  } else {
782  content_decryptor_->Decrypt(&sample_data_[crypto_unit_start_pos_],
783  sample_data_.size() - crypto_unit_start_pos_,
784  &sample_data_[crypto_unit_start_pos_]);
785  }
786  }
787  // Demux media sample if we are at program end or if we are not at a
788  // continuation PES.
789  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
790  if (!sample_data_.empty()) {
791  if (!Output(output_encrypted_sample)) {
792  return false;
793  }
794  }
795  StartMediaSampleDemux();
796  }
797 
798  crypto_unit_start_pos_ = sample_data_.size();
799  return true;
800 }
801 
802 void WvmMediaParser::StartMediaSampleDemux() {
803  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
804  media_sample_ = MediaSample::CreateEmptyMediaSample();
805  media_sample_->set_dts(dts_);
806  media_sample_->set_pts(pts_);
807  media_sample_->set_is_key_frame(is_key_frame);
808 
809  sample_data_.clear();
810 }
811 
812 bool WvmMediaParser::Output(bool output_encrypted_sample) {
813  if (output_encrypted_sample) {
814  media_sample_->set_data(sample_data_.data(), sample_data_.size());
815  media_sample_->set_is_encrypted(true);
816  } else {
817  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
818  // Convert video stream to unit stream and get config.
819  std::vector<uint8_t> nal_unit_stream;
820  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
821  sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
822  LOG(ERROR) << "Could not convert h.264 byte stream sample";
823  return false;
824  }
825  media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
826  if (!is_initialized_) {
827  // Set extra data for video stream from AVC Decoder Config Record.
828  // Also, set codec string from the AVC Decoder Config Record.
829  std::vector<uint8_t> decoder_config_record;
830  byte_to_unit_stream_converter_.GetAVCDecoderConfigurationRecord(
831  &decoder_config_record);
832  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
833  if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
834  stream_infos_[i]->codec_string().empty()) {
835  const std::vector<uint8_t>* stream_config;
836  if (stream_infos_[i]->extra_data().empty()) {
837  // Decoder config record not available for stream. Use the one
838  // computed from the first video stream.
839  stream_infos_[i]->set_extra_data(decoder_config_record);
840  stream_config = &decoder_config_record;
841  } else {
842  // Use stream-specific config record.
843  stream_config = &stream_infos_[i]->extra_data();
844  }
845  DCHECK(stream_config);
846 
847  VideoStreamInfo* video_stream_info =
848  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
849  AVCDecoderConfiguration avc_config;
850  if (!avc_config.Parse(*stream_config)) {
851  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
852  "Using computed configuration record instead.";
853  video_stream_info->set_extra_data(decoder_config_record);
854  if (!avc_config.Parse(decoder_config_record)) {
855  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
856  return false;
857  }
858  }
859  video_stream_info->set_codec_string(avc_config.GetCodecString());
860 
861  if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
862  avc_config.pixel_height() !=
863  video_stream_info->pixel_height()) {
864  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
865  video_stream_info->pixel_height() != 0)
866  << "Pixel aspect ratio in WVM metadata ("
867  << video_stream_info->pixel_width() << ","
868  << video_stream_info->pixel_height()
869  << ") does not match with SAR in "
870  "AVCDecoderConfigurationRecord ("
871  << avc_config.pixel_width() << ","
872  << avc_config.pixel_height()
873  << "). Use AVCDecoderConfigurationRecord.";
874  video_stream_info->set_pixel_width(avc_config.pixel_width());
875  video_stream_info->set_pixel_height(avc_config.pixel_height());
876  }
877  if (avc_config.coded_width() != video_stream_info->width() ||
878  avc_config.coded_height() != video_stream_info->height()) {
879  LOG(WARNING) << "Resolution in WVM metadata ("
880  << video_stream_info->width() << ","
881  << video_stream_info->height()
882  << ") does not match with resolution in "
883  "AVCDecoderConfigurationRecord ("
884  << avc_config.coded_width() << ","
885  << avc_config.coded_height()
886  << "). Use AVCDecoderConfigurationRecord.";
887  video_stream_info->set_width(avc_config.coded_width());
888  video_stream_info->set_height(avc_config.coded_height());
889  }
890  }
891  }
892  }
893  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
894  kPesStreamIdAudio) {
895  // Set data on the audio stream.
897  sample_data_.data(), kAdtsHeaderMinSize);
898  media::mp2t::AdtsHeader adts_header;
899  const uint8_t* frame_ptr = sample_data_.data();
900  if (!adts_header.Parse(frame_ptr, frame_size)) {
901  LOG(ERROR) << "Could not parse ADTS header";
902  return false;
903  }
904  size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
905  frame_size);
906  media_sample_->set_data(frame_ptr + header_size,
907  frame_size - header_size);
908  if (!is_initialized_) {
909  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
910  if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
911  stream_infos_[i]->codec_string().empty()) {
912  AudioStreamInfo* audio_stream_info =
913  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
914  if (audio_stream_info->extra_data().empty()) {
915  // Set AudioStreamInfo fields using information from the ADTS
916  // header.
917  audio_stream_info->set_sampling_frequency(
918  adts_header.GetSamplingFrequency());
919  std::vector<uint8_t> audio_specific_config;
920  if (!adts_header.GetAudioSpecificConfig(&audio_specific_config)) {
921  LOG(ERROR) << "Could not compute AACaudiospecificconfig";
922  return false;
923  }
924  audio_stream_info->set_extra_data(audio_specific_config);
925  audio_stream_info->set_codec_string(
927  kCodecAAC, adts_header.GetObjectType()));
928  } else {
929  // Set AudioStreamInfo fields using information from the
930  // AACAudioSpecificConfig record.
931  mp4::AACAudioSpecificConfig aac_config;
932  if (!aac_config.Parse(stream_infos_[i]->extra_data())) {
933  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
934  return false;
935  }
936  audio_stream_info->set_sampling_frequency(aac_config.frequency());
937  audio_stream_info->set_codec_string(
939  kCodecAAC, aac_config.audio_object_type()));
940  }
941  }
942  }
943  }
944  }
945  }
946 
947  if (!is_initialized_) {
948  bool all_streams_have_config = true;
949  // Check if all collected stream infos have extra_data set.
950  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
951  if (stream_infos_[i]->codec_string().empty()) {
952  all_streams_have_config = false;
953  break;
954  }
955  }
956  if (all_streams_have_config) {
957  init_cb_.Run(stream_infos_);
958  is_initialized_ = true;
959  }
960  }
961 
962  DCHECK_GT(media_sample_->data_size(), 0UL);
963  std::string key = base::UintToString(current_program_id_).append(":")
964  .append(base::UintToString(prev_pes_stream_id_));
965  std::map<std::string, uint32_t>::iterator it =
966  program_demux_stream_map_.find(key);
967  if (it == program_demux_stream_map_.end()) {
968  // TODO(ramjic): Log error message here and in other error cases through
969  // this method.
970  return false;
971  }
972  DemuxStreamIdMediaSample demux_stream_media_sample;
973  demux_stream_media_sample.parsed_audio_or_video_stream_id =
974  prev_pes_stream_id_;
975  demux_stream_media_sample.demux_stream_id = (*it).second;
976  demux_stream_media_sample.media_sample = media_sample_;
977  // Check if sample can be emitted.
978  if (!is_initialized_) {
979  media_sample_queue_.push_back(demux_stream_media_sample);
980  } else {
981  // flush the sample queue and emit all queued samples.
982  while (!media_sample_queue_.empty()) {
983  if (!EmitPendingSamples())
984  return false;
985  }
986  // Emit current sample.
987  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
988  return false;
989  }
990  return true;
991 }
992 
993 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
994  uint32_t stream_id,
995  scoped_refptr<MediaSample>& new_sample,
996  bool isLastSample) {
997  DCHECK(new_sample);
998  if (isLastSample) {
999  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1000  kPesStreamIdVideo) {
1001  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1002  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1003  kPesStreamIdAudio) {
1004  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1005  }
1006  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1007  LOG(ERROR) << "Failed to process the last sample.";
1008  return false;
1009  }
1010  return true;
1011  }
1012 
1013  // Cannot emit current sample. Compute duration first and then,
1014  // emit previous sample.
1015  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1016  kPesStreamIdVideo) {
1017  if (prev_media_sample_data_.video_sample == NULL) {
1018  prev_media_sample_data_.video_sample = new_sample;
1019  prev_media_sample_data_.video_stream_id = stream_id;
1020  return true;
1021  }
1022  prev_media_sample_data_.video_sample->set_duration(
1023  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1024  prev_media_sample_data_.video_sample_duration =
1025  prev_media_sample_data_.video_sample->duration();
1026  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1027  prev_media_sample_data_.video_sample)) {
1028  LOG(ERROR) << "Failed to process the video sample.";
1029  return false;
1030  }
1031  prev_media_sample_data_.video_sample = new_sample;
1032  prev_media_sample_data_.video_stream_id = stream_id;
1033  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1034  kPesStreamIdAudio) {
1035  if (prev_media_sample_data_.audio_sample == NULL) {
1036  prev_media_sample_data_.audio_sample = new_sample;
1037  prev_media_sample_data_.audio_stream_id = stream_id;
1038  return true;
1039  }
1040  prev_media_sample_data_.audio_sample->set_duration(
1041  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1042  prev_media_sample_data_.audio_sample_duration =
1043  prev_media_sample_data_.audio_sample->duration();
1044  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1045  prev_media_sample_data_.audio_sample)) {
1046  LOG(ERROR) << "Failed to process the audio sample.";
1047  return false;
1048  }
1049  prev_media_sample_data_.audio_sample = new_sample;
1050  prev_media_sample_data_.audio_stream_id = stream_id;
1051  }
1052  return true;
1053 }
1054 
1055 bool WvmMediaParser::GetAssetKey(const uint32_t asset_id,
1056  EncryptionKey* encryption_key) {
1057  DCHECK(decryption_key_source_);
1058  Status status = decryption_key_source_->FetchKeys(asset_id);
1059  if (!status.ok()) {
1060  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1061  << ", error = " << status;
1062  return false;
1063  }
1064 
1065  status = decryption_key_source_->GetKey(KeySource::TRACK_TYPE_HD,
1066  encryption_key);
1067  if (!status.ok()) {
1068  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1069  << ", error = " << status;
1070  return false;
1071  }
1072 
1073  return true;
1074 }
1075 
1076 bool WvmMediaParser::ProcessEcm() {
1077  // An error will be returned later if the samples need to be decrypted.
1078  if (!decryption_key_source_)
1079  return true;
1080 
1081  if (current_program_id_ > 0) {
1082  return true;
1083  }
1084  if (ecm_.size() != kEcmSizeBytes) {
1085  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1086  << ", expected size = " << kEcmSizeBytes;
1087  return false;
1088  }
1089  const uint8_t* ecm_data = ecm_.data();
1090  DCHECK(ecm_data);
1091  ecm_data += sizeof(uint32_t); // old version field - skip.
1092  ecm_data += sizeof(uint32_t); // clear lead - skip.
1093  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1094  uint32_t asset_id = ntohlFromBuffer(ecm_data);
1095  if (asset_id == 0) {
1096  LOG(ERROR) << "AssetID in ECM is not valid.";
1097  return false;
1098  }
1099  ecm_data += sizeof(uint32_t); // asset_id.
1100  EncryptionKey encryption_key;
1101  if (!GetAssetKey(asset_id, &encryption_key)) {
1102  return false;
1103  }
1104  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1105  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1106  << " for AssetID = " << asset_id
1107  << " is less than minimum asset key size.";
1108  return false;
1109  }
1110  // Legacy WVM content may have asset keys > 16 bytes.
1111  // Use only the first 16 bytes of the asset key to get
1112  // the content key.
1113  std::vector<uint8_t> asset_key(
1114  encryption_key.key.begin(),
1115  encryption_key.key.begin() + kAssetKeySizeBytes);
1116  std::vector<uint8_t> iv(kInitializationVectorSizeBytes);
1117  AesCbcCtsDecryptor asset_decryptor;
1118  if (!asset_decryptor.InitializeWithIv(asset_key, iv)) {
1119  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1120  return false;
1121  }
1122 
1123  const size_t content_key_buffer_size =
1124  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1125  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1126  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1127  asset_decryptor.Decrypt(ecm_data, content_key_buffer_size,
1128  content_key_buffer.data());
1129 
1130  std::vector<uint8_t> decrypted_content_key_vec(
1131  content_key_buffer.begin() + 4,
1132  content_key_buffer.begin() + 20);
1133  scoped_ptr<AesCbcCtsDecryptor> content_decryptor(new AesCbcCtsDecryptor);
1134  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec, iv)) {
1135  LOG(ERROR) << "Failed to initialize content decryptor.";
1136  return false;
1137  }
1138 
1139  content_decryptor_ = content_decryptor.Pass();
1140  return true;
1141 }
1142 
1143 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1144  demux_stream_id(0),
1145  parsed_audio_or_video_stream_id(0) {}
1146 
1147 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1148 
1149 PrevSampleData::PrevSampleData() {
1150  Reset();
1151 }
1152 
1153 PrevSampleData::~PrevSampleData() {}
1154 
1155 void PrevSampleData::Reset() {
1156  audio_sample = NULL;
1157  video_sample = NULL;
1158  audio_stream_id = 0;
1159  video_stream_id = 0;
1160  audio_sample_duration = 0;
1161  video_sample_duration = 0;
1162 }
1163 
1164 } // namespace wvm
1165 } // namespace media
1166 } // namespace edash_packager
static size_t GetAdtsFrameSize(const uint8_t *data, size_t num_bytes)
Definition: adts_header.cc:23
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:35
static scoped_refptr< MediaSample > CreateEmptyMediaSample()
Create a MediaSample object with default members.
Definition: media_sample.cc:74
static std::string GetCodecString(AudioCodec codec, uint8_t audio_object_type)