DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/stl_util.h"
12 #include "packager/base/strings/string_number_conversions.h"
13 #include "packager/media/base/aes_encryptor.h"
14 #include "packager/media/base/audio_stream_info.h"
15 #include "packager/media/base/key_source.h"
16 #include "packager/media/base/media_sample.h"
17 #include "packager/media/base/status.h"
18 #include "packager/media/base/video_stream_info.h"
19 #include "packager/media/filters/avc_decoder_configuration.h"
20 #include "packager/media/formats/mp2t/adts_header.h"
21 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
22 #include "packager/media/formats/mp4/es_descriptor.h"
23 
24 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
25  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
26  && (x != 0xFF))
27 
28 namespace {
29 const uint32_t kMpeg2ClockRate = 90000;
30 const uint32_t kPesOptPts = 0x80;
31 const uint32_t kPesOptDts = 0x40;
32 const uint32_t kPesOptAlign = 0x04;
33 const uint32_t kPsmStreamId = 0xBC;
34 const uint32_t kPaddingStreamId = 0xBE;
35 const uint32_t kIndexMagic = 0x49444d69;
36 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
37 const uint32_t kIndexVersion4HeaderSize = 12;
38 const uint32_t kEcmStreamId = 0xF0;
39 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
40 const uint32_t kScramblingBitsMask = 0x30;
41 const uint32_t kStartCode1 = 0x00;
42 const uint32_t kStartCode2 = 0x00;
43 const uint32_t kStartCode3 = 0x01;
44 const uint32_t kStartCode4Pack = 0xBA;
45 const uint32_t kStartCode4System = 0xBB;
46 const uint32_t kStartCode4ProgramEnd = 0xB9;
47 const uint32_t kPesStreamIdVideoMask = 0xF0;
48 const uint32_t kPesStreamIdVideo = 0xE0;
49 const uint32_t kPesStreamIdAudioMask = 0xE0;
50 const uint32_t kPesStreamIdAudio = 0xC0;
51 const uint32_t kVersion4 = 4;
52 const int kAdtsHeaderMinSize = 7;
53 const uint8_t kAacSampleSizeBits = 16;
54 // Applies to all video streams.
55 const uint8_t kNaluLengthSize = 4; // unit is bytes.
56 // Placeholder sampling frequency for all audio streams, which
57 // will be overwritten after filter parsing.
58 const uint32_t kDefaultSamplingFrequency = 100;
59 const uint16_t kEcmSizeBytes = 80;
60 const uint32_t kInitializationVectorSizeBytes = 16;
61 // ECM fields for processing.
62 const uint32_t kEcmContentKeySizeBytes = 16;
63 const uint32_t kEcmDCPFlagsSizeBytes = 3;
64 const uint32_t kEcmCCIFlagsSizeBytes = 1;
65 const uint32_t kEcmFlagsSizeBytes =
66  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
67 const uint32_t kEcmPaddingSizeBytes = 12;
68 const uint32_t kAssetKeySizeBytes = 16;
69 // Default audio and video PES stream IDs.
70 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
71 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
72 
73 enum Type {
74  Type_void = 0,
75  Type_uint8 = 1,
76  Type_int8 = 2,
77  Type_uint16 = 3,
78  Type_int16 = 4,
79  Type_uint32 = 5,
80  Type_int32 = 6,
81  Type_uint64 = 7,
82  Type_int64 = 8,
83  Type_string = 9,
84  Type_BinaryData = 10
85 };
86 } // namespace
87 
88 namespace edash_packager {
89 namespace media {
90 namespace wvm {
91 
92 WvmMediaParser::WvmMediaParser()
93  : is_initialized_(false),
94  parse_state_(StartCode1),
95  is_psm_needed_(true),
96  skip_bytes_(0),
97  metadata_is_complete_(false),
98  current_program_id_(0),
99  pes_stream_id_(0),
100  prev_pes_stream_id_(0),
101  pes_packet_bytes_(0),
102  pes_flags_1_(0),
103  pes_flags_2_(0),
104  prev_pes_flags_1_(0),
105  pes_header_data_bytes_(0),
106  timestamp_(0),
107  pts_(0),
108  dts_(0),
109  index_program_id_(0),
110  media_sample_(NULL),
111  crypto_unit_start_pos_(0),
112  stream_id_count_(0),
113  decryption_key_source_(NULL) {
114 }
115 
116 WvmMediaParser::~WvmMediaParser() {}
117 
118 void WvmMediaParser::Init(const InitCB& init_cb,
119  const NewSampleCB& new_sample_cb,
120  KeySource* decryption_key_source) {
121  DCHECK(!is_initialized_);
122  DCHECK(!init_cb.is_null());
123  DCHECK(!new_sample_cb.is_null());
124  decryption_key_source_ = decryption_key_source;
125  init_cb_ = init_cb;
126  new_sample_cb_ = new_sample_cb;
127 }
128 
129 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
130  uint32_t num_bytes, prev_size;
131  num_bytes = prev_size = 0;
132  const uint8_t* read_ptr = buf;
133  const uint8_t* end = read_ptr + size;
134 
135  while (read_ptr < end) {
136  switch (parse_state_) {
137  case StartCode1:
138  if (*read_ptr == kStartCode1) {
139  parse_state_ = StartCode2;
140  }
141  break;
142  case StartCode2:
143  if (*read_ptr == kStartCode2) {
144  parse_state_ = StartCode3;
145  } else {
146  parse_state_ = StartCode1;
147  }
148  break;
149  case StartCode3:
150  if (*read_ptr == kStartCode3) {
151  parse_state_ = StartCode4;
152  } else {
153  parse_state_ = StartCode1;
154  }
155  break;
156  case StartCode4:
157  switch (*read_ptr) {
158  case kStartCode4Pack:
159  parse_state_ = PackHeader1;
160  break;
161  case kStartCode4System:
162  parse_state_ = SystemHeader1;
163  break;
164  case kStartCode4ProgramEnd:
165  parse_state_ = ProgramEnd;
166  continue;
167  default:
168  parse_state_ = PesStreamId;
169  continue;
170  }
171  break;
172  case PackHeader1:
173  parse_state_ = PackHeader2;
174  break;
175  case PackHeader2:
176  parse_state_ = PackHeader3;
177  break;
178  case PackHeader3:
179  parse_state_ = PackHeader4;
180  break;
181  case PackHeader4:
182  parse_state_ = PackHeader5;
183  break;
184  case PackHeader5:
185  parse_state_ = PackHeader6;
186  break;
187  case PackHeader6:
188  parse_state_ = PackHeader7;
189  break;
190  case PackHeader7:
191  parse_state_ = PackHeader8;
192  break;
193  case PackHeader8:
194  parse_state_ = PackHeader9;
195  break;
196  case PackHeader9:
197  parse_state_ = PackHeader10;
198  break;
199  case PackHeader10:
200  skip_bytes_ = *read_ptr & 0x07;
201  parse_state_ = PackHeaderStuffingSkip;
202  break;
203  case SystemHeader1:
204  skip_bytes_ = *read_ptr;
205  skip_bytes_ <<= 8;
206  parse_state_ = SystemHeader2;
207  break;
208  case SystemHeader2:
209  skip_bytes_ |= *read_ptr;
210  parse_state_ = SystemHeaderSkip;
211  break;
212  case PackHeaderStuffingSkip:
213  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
214  read_ptr += skip_bytes_;
215  skip_bytes_ = 0;
216  parse_state_ = StartCode1;
217  } else {
218  skip_bytes_ -= (end - read_ptr);
219  read_ptr = end;
220  }
221  continue;
222  case SystemHeaderSkip:
223  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
224  read_ptr += skip_bytes_;
225  skip_bytes_ = 0;
226  parse_state_ = StartCode1;
227  } else {
228  uint32_t remaining_size = end - read_ptr;
229  skip_bytes_ -= remaining_size;
230  read_ptr = end;
231  }
232  continue;
233  case PesStreamId:
234  pes_stream_id_ = *read_ptr;
235  if (!metadata_is_complete_ &&
236  (pes_stream_id_ != kPsmStreamId) &&
237  (pes_stream_id_ != kIndexStreamId) &&
238  (pes_stream_id_ != kEcmStreamId) &&
239  (pes_stream_id_ != kV2MetadataStreamId) &&
240  (pes_stream_id_ != kPaddingStreamId)) {
241  metadata_is_complete_ = true;
242  }
243  parse_state_ = PesPacketLength1;
244  break;
245  case PesPacketLength1:
246  pes_packet_bytes_ = *read_ptr;
247  pes_packet_bytes_ <<= 8;
248  parse_state_ = PesPacketLength2;
249  break;
250  case PesPacketLength2:
251  pes_packet_bytes_ |= *read_ptr;
252  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
253  parse_state_ = PesExtension1;
254  } else {
255  pes_flags_1_ = pes_flags_2_ = 0;
256  pes_header_data_bytes_ = 0;
257  parse_state_ = PesPayload;
258  }
259  break;
260  case PesExtension1:
261  prev_pes_flags_1_ = pes_flags_1_;
262  pes_flags_1_ = *read_ptr;
263  --pes_packet_bytes_;
264  parse_state_ = PesExtension2;
265  break;
266  case PesExtension2:
267  pes_flags_2_ = *read_ptr;
268  --pes_packet_bytes_;
269  parse_state_ = PesExtension3;
270  break;
271  case PesExtension3:
272  pes_header_data_bytes_ = *read_ptr;
273  --pes_packet_bytes_;
274  if (pes_flags_2_ & kPesOptPts) {
275  parse_state_ = Pts1;
276  } else {
277  parse_state_ = PesHeaderData;
278  }
279  break;
280  case Pts1:
281  timestamp_ = (*read_ptr & 0x0E);
282  --pes_header_data_bytes_;
283  --pes_packet_bytes_;
284  parse_state_ = Pts2;
285  break;
286  case Pts2:
287  timestamp_ <<= 7;
288  timestamp_ |= *read_ptr;
289  --pes_header_data_bytes_;
290  --pes_packet_bytes_;
291  parse_state_ = Pts3;
292  break;
293  case Pts3:
294  timestamp_ <<= 7;
295  timestamp_ |= *read_ptr >> 1;
296  --pes_header_data_bytes_;
297  --pes_packet_bytes_;
298  parse_state_ = Pts4;
299  break;
300  case Pts4:
301  timestamp_ <<= 8;
302  timestamp_ |= *read_ptr;
303  --pes_header_data_bytes_;
304  --pes_packet_bytes_;
305  parse_state_ = Pts5;
306  break;
307  case Pts5:
308  timestamp_ <<= 7;
309  timestamp_ |= *read_ptr >> 1;
310  pts_ = timestamp_;
311  --pes_header_data_bytes_;
312  --pes_packet_bytes_;
313  if (pes_flags_2_ & kPesOptDts) {
314  parse_state_ = Dts1;
315  } else {
316  dts_ = pts_;
317  parse_state_ = PesHeaderData;
318  }
319  break;
320  case Dts1:
321  timestamp_ = (*read_ptr & 0x0E);
322  --pes_header_data_bytes_;
323  --pes_packet_bytes_;
324  parse_state_ = Dts2;
325  break;
326  case Dts2:
327  timestamp_ <<= 7;
328  timestamp_ |= *read_ptr;
329  --pes_header_data_bytes_;
330  --pes_packet_bytes_;
331  parse_state_ = Dts3;
332  break;
333  case Dts3:
334  timestamp_ <<= 7;
335  timestamp_ |= *read_ptr >> 1;
336  --pes_header_data_bytes_;
337  --pes_packet_bytes_;
338  parse_state_ = Dts4;
339  break;
340  case Dts4:
341  timestamp_ <<= 8;
342  timestamp_ |= *read_ptr;
343  --pes_header_data_bytes_;
344  --pes_packet_bytes_;
345  parse_state_ = Dts5;
346  break;
347  case Dts5:
348  timestamp_ <<= 7;
349  timestamp_ |= *read_ptr >> 1;
350  dts_ = timestamp_;
351  --pes_header_data_bytes_;
352  --pes_packet_bytes_;
353  parse_state_ = PesHeaderData;
354  break;
355  case PesHeaderData:
356  num_bytes = end - read_ptr;
357  if (num_bytes >= pes_header_data_bytes_) {
358  num_bytes = pes_header_data_bytes_;
359  parse_state_ = PesPayload;
360  }
361  pes_header_data_bytes_ -= num_bytes;
362  pes_packet_bytes_ -= num_bytes;
363  read_ptr += num_bytes;
364  continue;
365  case PesPayload:
366  switch (pes_stream_id_) {
367  case kPsmStreamId:
368  psm_data_.clear();
369  parse_state_ = PsmPayload;
370  continue;
371  case kPaddingStreamId:
372  parse_state_ = Padding;
373  continue;
374  case kEcmStreamId:
375  ecm_.clear();
376  parse_state_ = EcmPayload;
377  continue;
378  case kIndexStreamId:
379  parse_state_ = IndexPayload;
380  continue;
381  default:
382  if (!DemuxNextPes(false)) {
383  return false;
384  }
385  parse_state_ = EsPayload;
386  }
387  continue;
388  case PsmPayload:
389  num_bytes = end - read_ptr;
390  if (num_bytes >= pes_packet_bytes_) {
391  num_bytes = pes_packet_bytes_;
392  parse_state_ = StartCode1;
393  }
394  if (num_bytes > 0) {
395  pes_packet_bytes_ -= num_bytes;
396  prev_size = psm_data_.size();
397  psm_data_.resize(prev_size + num_bytes);
398  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
399  }
400  read_ptr += num_bytes;
401  continue;
402  case EcmPayload:
403  num_bytes = end - read_ptr;
404  if (num_bytes >= pes_packet_bytes_) {
405  num_bytes = pes_packet_bytes_;
406  parse_state_ = StartCode1;
407  }
408  if (num_bytes > 0) {
409  pes_packet_bytes_ -= num_bytes;
410  prev_size = ecm_.size();
411  ecm_.resize(prev_size + num_bytes);
412  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
413  }
414  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
415  if (!ProcessEcm()) {
416  return(false);
417  }
418  }
419  read_ptr += num_bytes;
420  continue;
421  case IndexPayload:
422  num_bytes = end - read_ptr;
423  if (num_bytes >= pes_packet_bytes_) {
424  num_bytes = pes_packet_bytes_;
425  parse_state_ = StartCode1;
426  }
427  if (num_bytes > 0) {
428  pes_packet_bytes_ -= num_bytes;
429  prev_size = index_data_.size();
430  index_data_.resize(prev_size + num_bytes);
431  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
432  }
433  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
434  if (!metadata_is_complete_) {
435  if (!ParseIndexEntry()) {
436  return false;
437  }
438  }
439  }
440  read_ptr += num_bytes;
441  continue;
442  case EsPayload:
443  num_bytes = end - read_ptr;
444  if (num_bytes >= pes_packet_bytes_) {
445  num_bytes = pes_packet_bytes_;
446  parse_state_ = StartCode1;
447  }
448  pes_packet_bytes_ -= num_bytes;
449  if (pes_stream_id_ != kV2MetadataStreamId) {
450  sample_data_.resize(sample_data_.size() + num_bytes);
451  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
452  num_bytes);
453  }
454  prev_pes_stream_id_ = pes_stream_id_;
455  read_ptr += num_bytes;
456  continue;
457  case Padding:
458  num_bytes = end - read_ptr;
459  if (num_bytes >= pes_packet_bytes_) {
460  num_bytes = pes_packet_bytes_;
461  parse_state_ = StartCode1;
462  }
463  pes_packet_bytes_ -= num_bytes;
464  read_ptr += num_bytes;
465  continue;
466  case ProgramEnd:
467  parse_state_ = StartCode1;
468  metadata_is_complete_ = true;
469  if (!DemuxNextPes(true)) {
470  return false;
471  }
472  if (!Flush()) {
473  return false;
474  }
475  // Reset.
476  dts_ = pts_ = 0;
477  parse_state_ = StartCode1;
478  prev_media_sample_data_.Reset();
479  current_program_id_++;
480  ecm_.clear();
481  index_data_.clear();
482  psm_data_.clear();
483  break;
484  default:
485  break;
486  }
487  ++read_ptr;
488  }
489  return true;
490 }
491 
492 bool WvmMediaParser::EmitLastSample(uint32_t stream_id,
493  scoped_refptr<MediaSample>& new_sample) {
494  std::string key = base::UintToString(current_program_id_)
495  .append(":")
496  .append(base::UintToString(stream_id));
497  std::map<std::string, uint32_t>::iterator it =
498  program_demux_stream_map_.find(key);
499  if (it == program_demux_stream_map_.end())
500  return false;
501  return EmitSample(stream_id, (*it).second, new_sample, true);
502 }
503 
504 bool WvmMediaParser::EmitPendingSamples() {
505  // Emit queued samples which were built when not initialized.
506  while (!media_sample_queue_.empty()) {
507  DemuxStreamIdMediaSample& demux_stream_media_sample =
508  media_sample_queue_.front();
509  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
510  demux_stream_media_sample.demux_stream_id,
511  demux_stream_media_sample.media_sample,
512  false)) {
513  return false;
514  }
515  media_sample_queue_.pop_front();
516  }
517  return true;
518 }
519 
520 bool WvmMediaParser::Flush() {
521  // Flush the last audio and video sample for current program.
522  // Reset the streamID when successfully emitted.
523  if (prev_media_sample_data_.audio_sample != NULL) {
524  if (!EmitLastSample(prev_pes_stream_id_,
525  prev_media_sample_data_.audio_sample)) {
526  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
527  << prev_pes_stream_id_;
528  return false;
529  }
530  }
531  if (prev_media_sample_data_.video_sample != NULL) {
532  if (!EmitLastSample(prev_pes_stream_id_,
533  prev_media_sample_data_.video_sample)) {
534  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
535  << prev_pes_stream_id_;
536  return false;
537  }
538  }
539  return true;
540 }
541 
542 bool WvmMediaParser::ParseIndexEntry() {
543  // Do not parse index entry at the beginning of any track *after* the first
544  // track.
545  if (current_program_id_ > 0) {
546  return true;
547  }
548  uint32_t index_size = 0;
549  if (index_data_.size() < kIndexVersion4HeaderSize) {
550  return false;
551  }
552 
553  const uint8_t* read_ptr = vector_as_array(&index_data_);
554  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
555  index_data_.clear();
556  return false;
557  }
558  read_ptr += 4;
559 
560  uint32_t version = ntohlFromBuffer(read_ptr);
561  read_ptr += 4;
562  if (version == kVersion4) {
563  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
564  if (index_data_.size() < index_size) {
565  // We do not yet have the full index. Keep accumulating index data.
566  return true;
567  }
568  read_ptr += sizeof(uint32_t);
569 
570  // Index metadata
571  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
572  if (index_metadata_max_size < sizeof(uint8_t)) {
573  index_data_.clear();
574  return false;
575  }
576 
577  uint64_t track_duration = 0;
578  int16_t trick_play_rate = 0;
579  uint32_t sampling_frequency = kDefaultSamplingFrequency;
580  uint32_t time_scale = kMpeg2ClockRate;
581  uint16_t video_width = 0;
582  uint16_t video_height = 0;
583  uint32_t pixel_width = 0;
584  uint32_t pixel_height = 0;
585  uint8_t nalu_length_size = kNaluLengthSize;
586  uint8_t num_channels = 0;
587  int audio_pes_stream_id = 0;
588  int video_pes_stream_id = 0;
589  bool has_video = false;
590  bool has_audio = false;
591  std::vector<uint8_t> audio_codec_config;
592  std::vector<uint8_t> video_codec_config;
593  uint8_t num_index_entries = *read_ptr;
594  ++read_ptr;
595  --index_metadata_max_size;
596 
597  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
598  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
599  return false;
600  }
601  uint8_t tag = *read_ptr;
602  ++read_ptr;
603  uint8_t type = *read_ptr;
604  ++read_ptr;
605  uint32_t length = ntohlFromBuffer(read_ptr);
606  read_ptr += sizeof(uint32_t);
607  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
608  if (index_metadata_max_size < length) {
609  return false;
610  }
611  int64_t value = 0;
612  Tag tagtype = Unset;
613  std::vector<uint8_t> binary_data;
614  switch (Type(type)) {
615  case Type_uint8:
616  if (length == sizeof(uint8_t)) {
617  tagtype = GetTag(tag, length, read_ptr, &value);
618  } else {
619  return false;
620  }
621  break;
622  case Type_int8:
623  if (length == sizeof(int8_t)) {
624  tagtype = GetTag(tag, length, read_ptr, &value);
625  } else {
626  return false;
627  }
628  break;
629  case Type_uint16:
630  if (length == sizeof(uint16_t)) {
631  tagtype = GetTag(tag, length, read_ptr, &value);
632  } else {
633  return false;
634  }
635  break;
636  case Type_int16:
637  if (length == sizeof(int16_t)) {
638  tagtype = GetTag(tag, length, read_ptr, &value);
639  } else {
640  return false;
641  }
642  break;
643  case Type_uint32:
644  if (length == sizeof(uint32_t)) {
645  tagtype = GetTag(tag, length, read_ptr, &value);
646  } else {
647  return false;
648  }
649  break;
650  case Type_int32:
651  if (length == sizeof(int32_t)) {
652  tagtype = GetTag(tag, length, read_ptr, &value);
653  } else {
654  return false;
655  }
656  break;
657  case Type_uint64:
658  if (length == sizeof(uint64_t)) {
659  tagtype = GetTag(tag, length, read_ptr, &value);
660  } else {
661  return false;
662  }
663  break;
664  case Type_int64:
665  if (length == sizeof(int64_t)) {
666  tagtype = GetTag(tag, length, read_ptr, &value);
667  } else {
668  return false;
669  }
670  break;
671  case Type_string:
672  case Type_BinaryData:
673  binary_data.assign(read_ptr, read_ptr + length);
674  tagtype = Tag(tag);
675  break;
676  default:
677  break;
678  }
679 
680  switch (tagtype) {
681  case TrackDuration:
682  track_duration = value;
683  break;
684  case TrackTrickPlayRate:
685  trick_play_rate = value;
686  break;
687  case VideoStreamId:
688  video_pes_stream_id = value;
689  break;
690  case AudioStreamId:
691  audio_pes_stream_id = value;
692  break;
693  case VideoWidth:
694  video_width = (uint16_t)value;
695  break;
696  case VideoHeight:
697  video_height = (uint16_t)value;
698  break;
699  case AudioNumChannels:
700  num_channels = (uint8_t)value;
701  break;
702  case VideoType:
703  has_video = true;
704  break;
705  case AudioType:
706  has_audio = true;
707  break;
708  case VideoPixelWidth:
709  pixel_width = static_cast<uint32_t>(value);
710  break;
711  case VideoPixelHeight:
712  pixel_height = static_cast<uint32_t>(value);
713  break;
714  case Audio_EsDescriptor: {
715  mp4::ESDescriptor descriptor;
716  if (!descriptor.Parse(binary_data)) {
717  LOG(ERROR) <<
718  "Could not extract AudioSpecificConfig from ES_Descriptor";
719  return false;
720  }
721  audio_codec_config = descriptor.decoder_specific_info();
722  break;
723  }
724  case Audio_EC3SpecificData:
725  case Audio_DtsSpecificData:
726  case Audio_AC3SpecificData:
727  LOG(ERROR) << "Audio type not supported.";
728  return false;
729  case AVCDecoderConfigurationRecord:
730  video_codec_config = binary_data;
731  break;
732  default:
733  break;
734  }
735 
736  read_ptr += length;
737  index_metadata_max_size -= length;
738  }
739  // End Index metadata
740  index_size = read_ptr - vector_as_array(&index_data_);
741 
742  if (has_video) {
743  VideoCodec video_codec = kCodecH264;
744  stream_infos_.push_back(new VideoStreamInfo(
745  stream_id_count_, time_scale, track_duration, video_codec,
746  std::string(), std::string(), video_width, video_height,
747  pixel_width, pixel_height, trick_play_rate, nalu_length_size,
748  vector_as_array(&video_codec_config), video_codec_config.size(),
749  true));
750  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
751  base::UintToString(video_pes_stream_id ?
752  video_pes_stream_id :
753  kDefaultVideoStreamId)] =
754  stream_id_count_++;
755  }
756  if (has_audio) {
757  AudioCodec audio_codec = kCodecAAC;
758  // TODO(beil): Pass in max and average bitrate in wvm container.
759  stream_infos_.push_back(new AudioStreamInfo(
760  stream_id_count_, time_scale, track_duration, audio_codec,
761  std::string(), std::string(), kAacSampleSizeBits, num_channels,
762  sampling_frequency, 0, 0, vector_as_array(&audio_codec_config),
763  audio_codec_config.size(), true));
764  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
765  base::UintToString(audio_pes_stream_id ?
766  audio_pes_stream_id :
767  kDefaultAudioStreamId)] =
768  stream_id_count_++;
769  }
770  }
771 
772  index_program_id_++;
773  index_data_.clear();
774  return true;
775 }
776 
777 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
778  bool output_encrypted_sample = false;
779  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
780  // Decrypt crypto unit.
781  if (!content_decryptor_) {
782  output_encrypted_sample = true;
783  } else {
784  content_decryptor_->Decrypt(&sample_data_[crypto_unit_start_pos_],
785  sample_data_.size() - crypto_unit_start_pos_,
786  &sample_data_[crypto_unit_start_pos_]);
787  }
788  }
789  // Demux media sample if we are at program end or if we are not at a
790  // continuation PES.
791  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
792  if (!sample_data_.empty()) {
793  if (!Output(output_encrypted_sample)) {
794  return false;
795  }
796  }
797  StartMediaSampleDemux();
798  }
799 
800  crypto_unit_start_pos_ = sample_data_.size();
801  return true;
802 }
803 
804 void WvmMediaParser::StartMediaSampleDemux() {
805  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
806  media_sample_ = MediaSample::CreateEmptyMediaSample();
807  media_sample_->set_dts(dts_);
808  media_sample_->set_pts(pts_);
809  media_sample_->set_is_key_frame(is_key_frame);
810 
811  sample_data_.clear();
812 }
813 
814 bool WvmMediaParser::Output(bool output_encrypted_sample) {
815  if (output_encrypted_sample) {
816  media_sample_->set_data(vector_as_array(&sample_data_),
817  sample_data_.size());
818  media_sample_->set_is_encrypted(true);
819  } else {
820  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
821  // Convert video stream to unit stream and get config.
822  std::vector<uint8_t> nal_unit_stream;
823  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
824  vector_as_array(&sample_data_), sample_data_.size(),
825  &nal_unit_stream)) {
826  LOG(ERROR) << "Could not convert h.264 byte stream sample";
827  return false;
828  }
829  media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
830  if (!is_initialized_) {
831  // Set extra data for video stream from AVC Decoder Config Record.
832  // Also, set codec string from the AVC Decoder Config Record.
833  std::vector<uint8_t> decoder_config_record;
834  byte_to_unit_stream_converter_.GetAVCDecoderConfigurationRecord(
835  &decoder_config_record);
836  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
837  if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
838  stream_infos_[i]->codec_string().empty()) {
839  const std::vector<uint8_t>* stream_config;
840  if (stream_infos_[i]->extra_data().empty()) {
841  // Decoder config record not available for stream. Use the one
842  // computed from the first video stream.
843  stream_infos_[i]->set_extra_data(decoder_config_record);
844  stream_config = &decoder_config_record;
845  } else {
846  // Use stream-specific config record.
847  stream_config = &stream_infos_[i]->extra_data();
848  }
849  DCHECK(stream_config);
850 
851  VideoStreamInfo* video_stream_info =
852  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
853  AVCDecoderConfiguration avc_config;
854  if (!avc_config.Parse(*stream_config)) {
855  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
856  "Using computed configuration record instead.";
857  video_stream_info->set_extra_data(decoder_config_record);
858  if (!avc_config.Parse(decoder_config_record)) {
859  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
860  return false;
861  }
862  }
863  video_stream_info->set_codec_string(avc_config.GetCodecString());
864 
865  if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
866  avc_config.pixel_height() !=
867  video_stream_info->pixel_height()) {
868  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
869  video_stream_info->pixel_height() != 0)
870  << "Pixel aspect ratio in WVM metadata ("
871  << video_stream_info->pixel_width() << ","
872  << video_stream_info->pixel_height()
873  << ") does not match with SAR in "
874  "AVCDecoderConfigurationRecord ("
875  << avc_config.pixel_width() << ","
876  << avc_config.pixel_height()
877  << "). Use AVCDecoderConfigurationRecord.";
878  video_stream_info->set_pixel_width(avc_config.pixel_width());
879  video_stream_info->set_pixel_height(avc_config.pixel_height());
880  }
881  if (avc_config.coded_width() != video_stream_info->width() ||
882  avc_config.coded_height() != video_stream_info->height()) {
883  LOG(WARNING) << "Resolution in WVM metadata ("
884  << video_stream_info->width() << ","
885  << video_stream_info->height()
886  << ") does not match with resolution in "
887  "AVCDecoderConfigurationRecord ("
888  << avc_config.coded_width() << ","
889  << avc_config.coded_height()
890  << "). Use AVCDecoderConfigurationRecord.";
891  video_stream_info->set_width(avc_config.coded_width());
892  video_stream_info->set_height(avc_config.coded_height());
893  }
894  }
895  }
896  }
897  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
898  kPesStreamIdAudio) {
899  // Set data on the audio stream.
901  vector_as_array(&sample_data_), kAdtsHeaderMinSize);
902  media::mp2t::AdtsHeader adts_header;
903  const uint8_t* frame_ptr = vector_as_array(&sample_data_);
904  if (!adts_header.Parse(frame_ptr, frame_size)) {
905  LOG(ERROR) << "Could not parse ADTS header";
906  return false;
907  }
908  size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
909  frame_size);
910  media_sample_->set_data(frame_ptr + header_size,
911  frame_size - header_size);
912  if (!is_initialized_) {
913  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
914  if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
915  stream_infos_[i]->codec_string().empty()) {
916  AudioStreamInfo* audio_stream_info =
917  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
918  if (audio_stream_info->extra_data().empty()) {
919  // Set AudioStreamInfo fields using information from the ADTS
920  // header.
921  audio_stream_info->set_sampling_frequency(
922  adts_header.GetSamplingFrequency());
923  std::vector<uint8_t> audio_specific_config;
924  if (!adts_header.GetAudioSpecificConfig(&audio_specific_config)) {
925  LOG(ERROR) << "Could not compute AACaudiospecificconfig";
926  return false;
927  }
928  audio_stream_info->set_extra_data(audio_specific_config);
929  audio_stream_info->set_codec_string(
931  kCodecAAC, adts_header.GetObjectType()));
932  } else {
933  // Set AudioStreamInfo fields using information from the
934  // AACAudioSpecificConfig record.
935  mp4::AACAudioSpecificConfig aac_config;
936  if (!aac_config.Parse(stream_infos_[i]->extra_data())) {
937  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
938  return false;
939  }
940  audio_stream_info->set_sampling_frequency(aac_config.frequency());
941  audio_stream_info->set_codec_string(
943  kCodecAAC, aac_config.audio_object_type()));
944  }
945  }
946  }
947  }
948  }
949  }
950 
951  if (!is_initialized_) {
952  bool all_streams_have_config = true;
953  // Check if all collected stream infos have extra_data set.
954  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
955  if (stream_infos_[i]->codec_string().empty()) {
956  all_streams_have_config = false;
957  break;
958  }
959  }
960  if (all_streams_have_config) {
961  init_cb_.Run(stream_infos_);
962  is_initialized_ = true;
963  }
964  }
965 
966  DCHECK_GT(media_sample_->data_size(), 0UL);
967  std::string key = base::UintToString(current_program_id_).append(":")
968  .append(base::UintToString(prev_pes_stream_id_));
969  std::map<std::string, uint32_t>::iterator it =
970  program_demux_stream_map_.find(key);
971  if (it == program_demux_stream_map_.end()) {
972  // TODO(ramjic): Log error message here and in other error cases through
973  // this method.
974  return false;
975  }
976  DemuxStreamIdMediaSample demux_stream_media_sample;
977  demux_stream_media_sample.parsed_audio_or_video_stream_id =
978  prev_pes_stream_id_;
979  demux_stream_media_sample.demux_stream_id = (*it).second;
980  demux_stream_media_sample.media_sample = media_sample_;
981  // Check if sample can be emitted.
982  if (!is_initialized_) {
983  media_sample_queue_.push_back(demux_stream_media_sample);
984  } else {
985  // flush the sample queue and emit all queued samples.
986  while (!media_sample_queue_.empty()) {
987  if (!EmitPendingSamples())
988  return false;
989  }
990  // Emit current sample.
991  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
992  return false;
993  }
994  return true;
995 }
996 
997 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
998  uint32_t stream_id,
999  scoped_refptr<MediaSample>& new_sample,
1000  bool isLastSample) {
1001  DCHECK(new_sample);
1002  if (isLastSample) {
1003  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1004  kPesStreamIdVideo) {
1005  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1006  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1007  kPesStreamIdAudio) {
1008  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1009  }
1010  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1011  LOG(ERROR) << "Failed to process the last sample.";
1012  return false;
1013  }
1014  return true;
1015  }
1016 
1017  // Cannot emit current sample. Compute duration first and then,
1018  // emit previous sample.
1019  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1020  kPesStreamIdVideo) {
1021  if (prev_media_sample_data_.video_sample == NULL) {
1022  prev_media_sample_data_.video_sample = new_sample;
1023  prev_media_sample_data_.video_stream_id = stream_id;
1024  return true;
1025  }
1026  prev_media_sample_data_.video_sample->set_duration(
1027  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1028  prev_media_sample_data_.video_sample_duration =
1029  prev_media_sample_data_.video_sample->duration();
1030  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1031  prev_media_sample_data_.video_sample)) {
1032  LOG(ERROR) << "Failed to process the video sample.";
1033  return false;
1034  }
1035  prev_media_sample_data_.video_sample = new_sample;
1036  prev_media_sample_data_.video_stream_id = stream_id;
1037  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1038  kPesStreamIdAudio) {
1039  if (prev_media_sample_data_.audio_sample == NULL) {
1040  prev_media_sample_data_.audio_sample = new_sample;
1041  prev_media_sample_data_.audio_stream_id = stream_id;
1042  return true;
1043  }
1044  prev_media_sample_data_.audio_sample->set_duration(
1045  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1046  prev_media_sample_data_.audio_sample_duration =
1047  prev_media_sample_data_.audio_sample->duration();
1048  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1049  prev_media_sample_data_.audio_sample)) {
1050  LOG(ERROR) << "Failed to process the audio sample.";
1051  return false;
1052  }
1053  prev_media_sample_data_.audio_sample = new_sample;
1054  prev_media_sample_data_.audio_stream_id = stream_id;
1055  }
1056  return true;
1057 }
1058 
1059 bool WvmMediaParser::GetAssetKey(const uint32_t asset_id,
1060  EncryptionKey* encryption_key) {
1061  DCHECK(decryption_key_source_);
1062  Status status = decryption_key_source_->FetchKeys(asset_id);
1063  if (!status.ok()) {
1064  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1065  << ", error = " << status;
1066  return false;
1067  }
1068 
1069  status = decryption_key_source_->GetKey(KeySource::TRACK_TYPE_HD,
1070  encryption_key);
1071  if (!status.ok()) {
1072  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1073  << ", error = " << status;
1074  return false;
1075  }
1076 
1077  return true;
1078 }
1079 
1080 bool WvmMediaParser::ProcessEcm() {
1081  // An error will be returned later if the samples need to be decrypted.
1082  if (!decryption_key_source_)
1083  return true;
1084 
1085  if (current_program_id_ > 0) {
1086  return true;
1087  }
1088  if (ecm_.size() != kEcmSizeBytes) {
1089  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1090  << ", expected size = " << kEcmSizeBytes;
1091  return false;
1092  }
1093  const uint8_t* ecm_data = ecm_.data();
1094  DCHECK(ecm_data);
1095  ecm_data += sizeof(uint32_t); // old version field - skip.
1096  ecm_data += sizeof(uint32_t); // clear lead - skip.
1097  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1098  uint32_t asset_id = ntohlFromBuffer(ecm_data);
1099  if (asset_id == 0) {
1100  LOG(ERROR) << "AssetID in ECM is not valid.";
1101  return false;
1102  }
1103  ecm_data += sizeof(uint32_t); // asset_id.
1104  EncryptionKey encryption_key;
1105  if (!GetAssetKey(asset_id, &encryption_key)) {
1106  return false;
1107  }
1108  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1109  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1110  << " for AssetID = " << asset_id
1111  << " is less than minimum asset key size.";
1112  return false;
1113  }
1114  // Legacy WVM content may have asset keys > 16 bytes.
1115  // Use only the first 16 bytes of the asset key to get
1116  // the content key.
1117  std::vector<uint8_t> asset_key(
1118  encryption_key.key.begin(),
1119  encryption_key.key.begin() + kAssetKeySizeBytes);
1120  std::vector<uint8_t> iv(kInitializationVectorSizeBytes);
1121  AesCbcCtsDecryptor asset_decryptor;
1122  if (!asset_decryptor.InitializeWithIv(asset_key, iv)) {
1123  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1124  return false;
1125  }
1126 
1127  const size_t content_key_buffer_size =
1128  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1129  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1130  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1131  asset_decryptor.Decrypt(
1132  ecm_data, content_key_buffer_size, vector_as_array(&content_key_buffer));
1133 
1134  std::vector<uint8_t> decrypted_content_key_vec(
1135  content_key_buffer.begin() + 4,
1136  content_key_buffer.begin() + 20);
1137  scoped_ptr<AesCbcCtsDecryptor> content_decryptor(new AesCbcCtsDecryptor);
1138  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec, iv)) {
1139  LOG(ERROR) << "Failed to initialize content decryptor.";
1140  return false;
1141  }
1142 
1143  content_decryptor_ = content_decryptor.Pass();
1144  return true;
1145 }
1146 
1147 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1148  demux_stream_id(0),
1149  parsed_audio_or_video_stream_id(0) {}
1150 
1151 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1152 
1153 PrevSampleData::PrevSampleData() {
1154  Reset();
1155 }
1156 
1157 PrevSampleData::~PrevSampleData() {}
1158 
1159 void PrevSampleData::Reset() {
1160  audio_sample = NULL;
1161  video_sample = NULL;
1162  audio_stream_id = 0;
1163  video_stream_id = 0;
1164  audio_sample_duration = 0;
1165  video_sample_duration = 0;
1166 }
1167 
1168 } // namespace wvm
1169 } // namespace media
1170 } // namespace edash_packager
static size_t GetAdtsFrameSize(const uint8_t *data, size_t num_bytes)
Definition: adts_header.cc:23
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:29
static scoped_refptr< MediaSample > CreateEmptyMediaSample()
Create a MediaSample object with default members.
Definition: media_sample.cc:74
static std::string GetCodecString(AudioCodec codec, uint8_t audio_object_type)