DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
wvm_media_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/wvm/wvm_media_parser.h"
6 
7 #include <map>
8 #include <sstream>
9 #include <vector>
10 
11 #include "packager/base/stl_util.h"
12 #include "packager/base/strings/string_number_conversions.h"
13 #include "packager/media/base/aes_encryptor.h"
14 #include "packager/media/base/audio_stream_info.h"
15 #include "packager/media/base/key_source.h"
16 #include "packager/media/base/media_sample.h"
17 #include "packager/media/base/status.h"
18 #include "packager/media/base/video_stream_info.h"
19 #include "packager/media/filters/h264_parser.h"
20 #include "packager/media/formats/mp2t/adts_header.h"
21 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
22 #include "packager/media/formats/mp4/es_descriptor.h"
23 
24 #define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
25  && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
26  && (x != 0xFF))
27 
28 namespace {
29 const uint32_t kMpeg2ClockRate = 90000;
30 const uint32_t kPesOptPts = 0x80;
31 const uint32_t kPesOptDts = 0x40;
32 const uint32_t kPesOptAlign = 0x04;
33 const uint32_t kPsmStreamId = 0xBC;
34 const uint32_t kPaddingStreamId = 0xBE;
35 const uint32_t kIndexMagic = 0x49444d69;
36 const uint32_t kIndexStreamId = 0xBF; // private_stream_2
37 const uint32_t kIndexVersion4HeaderSize = 12;
38 const uint32_t kEcmStreamId = 0xF0;
39 const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
40 const uint32_t kScramblingBitsMask = 0x30;
41 const uint32_t kStartCode1 = 0x00;
42 const uint32_t kStartCode2 = 0x00;
43 const uint32_t kStartCode3 = 0x01;
44 const uint32_t kStartCode4Pack = 0xBA;
45 const uint32_t kStartCode4System = 0xBB;
46 const uint32_t kStartCode4ProgramEnd = 0xB9;
47 const uint32_t kPesStreamIdVideoMask = 0xF0;
48 const uint32_t kPesStreamIdVideo = 0xE0;
49 const uint32_t kPesStreamIdAudioMask = 0xE0;
50 const uint32_t kPesStreamIdAudio = 0xC0;
51 const uint32_t kVersion4 = 4;
52 const int kAdtsHeaderMinSize = 7;
53 const uint8_t kAacSampleSizeBits = 16;
54 // Applies to all video streams.
55 const uint8_t kNaluLengthSize = 4; // unit is bytes.
56 // Placeholder sampling frequency for all audio streams, which
57 // will be overwritten after filter parsing.
58 const uint32_t kDefaultSamplingFrequency = 100;
59 const uint16_t kEcmSizeBytes = 80;
60 const uint32_t kInitializationVectorSizeBytes = 16;
61 // ECM fields for processing.
62 const uint32_t kEcmContentKeySizeBytes = 16;
63 const uint32_t kEcmDCPFlagsSizeBytes = 3;
64 const uint32_t kEcmCCIFlagsSizeBytes = 1;
65 const uint32_t kEcmFlagsSizeBytes =
66  kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
67 const uint32_t kEcmPaddingSizeBytes = 12;
68 const uint32_t kAssetKeySizeBytes = 16;
69 // Default audio and video PES stream IDs.
70 const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
71 const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
72 
73 enum Type {
74  Type_void = 0,
75  Type_uint8 = 1,
76  Type_int8 = 2,
77  Type_uint16 = 3,
78  Type_int16 = 4,
79  Type_uint32 = 5,
80  Type_int32 = 6,
81  Type_uint64 = 7,
82  Type_int64 = 8,
83  Type_string = 9,
84  Type_BinaryData = 10
85 };
86 } // namespace
87 
88 namespace edash_packager {
89 namespace media {
90 namespace wvm {
91 
92 WvmMediaParser::WvmMediaParser()
93  : is_initialized_(false),
94  parse_state_(StartCode1),
95  is_psm_needed_(true),
96  skip_bytes_(0),
97  metadata_is_complete_(false),
98  current_program_id_(0),
99  pes_stream_id_(0),
100  prev_pes_stream_id_(0),
101  pes_packet_bytes_(0),
102  pes_flags_1_(0),
103  pes_flags_2_(0),
104  prev_pes_flags_1_(0),
105  pes_header_data_bytes_(0),
106  timestamp_(0),
107  pts_(0),
108  dts_(0),
109  index_program_id_(0),
110  media_sample_(NULL),
111  crypto_unit_start_pos_(0),
112  stream_id_count_(0),
113  decryption_key_source_(NULL) {
114 }
115 
116 WvmMediaParser::~WvmMediaParser() {}
117 
118 void WvmMediaParser::Init(const InitCB& init_cb,
119  const NewSampleCB& new_sample_cb,
120  KeySource* decryption_key_source) {
121  DCHECK(!is_initialized_);
122  DCHECK(!init_cb.is_null());
123  DCHECK(!new_sample_cb.is_null());
124  decryption_key_source_ = decryption_key_source;
125  init_cb_ = init_cb;
126  new_sample_cb_ = new_sample_cb;
127 }
128 
129 bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
130  uint32_t num_bytes, prev_size;
131  num_bytes = prev_size = 0;
132  uint8_t* read_ptr = (uint8_t*)(&buf[0]);
133  uint8_t* end = read_ptr + size;
134 
135  while (read_ptr < end) {
136  switch(parse_state_) {
137  case StartCode1:
138  if (*read_ptr == kStartCode1) {
139  parse_state_ = StartCode2;
140  }
141  break;
142  case StartCode2:
143  if (*read_ptr == kStartCode2) {
144  parse_state_ = StartCode3;
145  } else {
146  parse_state_ = StartCode1;
147  }
148  break;
149  case StartCode3:
150  if (*read_ptr == kStartCode3) {
151  parse_state_ = StartCode4;
152  } else {
153  parse_state_ = StartCode1;
154  }
155  break;
156  case StartCode4:
157  switch (*read_ptr) {
158  case kStartCode4Pack:
159  parse_state_ = PackHeader1;
160  break;
161  case kStartCode4System:
162  parse_state_ = SystemHeader1;
163  break;
164  case kStartCode4ProgramEnd:
165  parse_state_ = ProgramEnd;
166  continue;
167  default:
168  parse_state_ = PesStreamId;
169  continue;
170  }
171  break;
172  case PackHeader1:
173  parse_state_ = PackHeader2;
174  break;
175  case PackHeader2:
176  parse_state_ = PackHeader3;
177  break;
178  case PackHeader3:
179  parse_state_ = PackHeader4;
180  break;
181  case PackHeader4:
182  parse_state_ = PackHeader5;
183  break;
184  case PackHeader5:
185  parse_state_ = PackHeader6;
186  break;
187  case PackHeader6:
188  parse_state_ = PackHeader7;
189  break;
190  case PackHeader7:
191  parse_state_ = PackHeader8;
192  break;
193  case PackHeader8:
194  parse_state_ = PackHeader9;
195  break;
196  case PackHeader9:
197  parse_state_ = PackHeader10;
198  break;
199  case PackHeader10:
200  skip_bytes_ = *read_ptr & 0x07;
201  parse_state_ = PackHeaderStuffingSkip;
202  break;
203  case SystemHeader1:
204  skip_bytes_ = *read_ptr;
205  skip_bytes_ <<= 8;
206  parse_state_ = SystemHeader2;
207  break;
208  case SystemHeader2:
209  skip_bytes_ |= *read_ptr;
210  parse_state_ = SystemHeaderSkip;
211  break;
212  case PackHeaderStuffingSkip:
213  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
214  read_ptr += skip_bytes_;
215  skip_bytes_ = 0;
216  parse_state_ = StartCode1;
217  } else {
218  skip_bytes_ -= (end - read_ptr);
219  read_ptr = end;
220  }
221  continue;
222  case SystemHeaderSkip:
223  if ((end - read_ptr) >= (int32_t)skip_bytes_) {
224  read_ptr += skip_bytes_;
225  skip_bytes_ = 0;
226  parse_state_ = StartCode1;
227  } else {
228  uint32_t remaining_size = end - read_ptr;
229  skip_bytes_ -= remaining_size;
230  read_ptr = end;
231  }
232  continue;
233  case PesStreamId:
234  pes_stream_id_ = *read_ptr;
235  if (!metadata_is_complete_ &&
236  (pes_stream_id_ != kPsmStreamId) &&
237  (pes_stream_id_ != kIndexStreamId) &&
238  (pes_stream_id_ != kEcmStreamId) &&
239  (pes_stream_id_ != kV2MetadataStreamId) &&
240  (pes_stream_id_ != kPaddingStreamId)) {
241  metadata_is_complete_ = true;
242  }
243  parse_state_ = PesPacketLength1;
244  break;
245  case PesPacketLength1:
246  pes_packet_bytes_ = *read_ptr;
247  pes_packet_bytes_ <<= 8;
248  parse_state_ = PesPacketLength2;
249  break;
250  case PesPacketLength2:
251  pes_packet_bytes_ |= *read_ptr;
252  if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
253  parse_state_ = PesExtension1;
254  } else {
255  pes_flags_1_ = pes_flags_2_ = 0;
256  pes_header_data_bytes_ = 0;
257  parse_state_ = PesPayload;
258  }
259  break;
260  case PesExtension1:
261  prev_pes_flags_1_ = pes_flags_1_;
262  pes_flags_1_ = *read_ptr;
263  *read_ptr &= ~kScramblingBitsMask;
264  --pes_packet_bytes_;
265  parse_state_ = PesExtension2;
266  break;
267  case PesExtension2:
268  pes_flags_2_ = *read_ptr;
269  --pes_packet_bytes_;
270  parse_state_ = PesExtension3;
271  break;
272  case PesExtension3:
273  pes_header_data_bytes_ = *read_ptr;
274  --pes_packet_bytes_;
275  if (pes_flags_2_ & kPesOptPts) {
276  parse_state_ = Pts1;
277  } else {
278  parse_state_ = PesHeaderData;
279  }
280  break;
281  case Pts1:
282  timestamp_ = (*read_ptr & 0x0E);
283  --pes_header_data_bytes_;
284  --pes_packet_bytes_;
285  parse_state_ = Pts2;
286  break;
287  case Pts2:
288  timestamp_ <<= 7;
289  timestamp_ |= *read_ptr;
290  --pes_header_data_bytes_;
291  --pes_packet_bytes_;
292  parse_state_ = Pts3;
293  break;
294  case Pts3:
295  timestamp_ <<= 7;
296  timestamp_ |= *read_ptr >> 1;
297  --pes_header_data_bytes_;
298  --pes_packet_bytes_;
299  parse_state_ = Pts4;
300  break;
301  case Pts4:
302  timestamp_ <<= 8;
303  timestamp_ |= *read_ptr;
304  --pes_header_data_bytes_;
305  --pes_packet_bytes_;
306  parse_state_ = Pts5;
307  break;
308  case Pts5:
309  timestamp_ <<= 7;
310  timestamp_ |= *read_ptr >> 1;
311  pts_ = timestamp_;
312  --pes_header_data_bytes_;
313  --pes_packet_bytes_;
314  if (pes_flags_2_ & kPesOptDts) {
315  parse_state_ = Dts1;
316  } else {
317  dts_ = pts_;
318  parse_state_ = PesHeaderData;
319  }
320  break;
321  case Dts1:
322  timestamp_ = (*read_ptr & 0x0E);
323  --pes_header_data_bytes_;
324  --pes_packet_bytes_;
325  parse_state_ = Dts2;
326  break;
327  case Dts2:
328  timestamp_ <<= 7;
329  timestamp_ |= *read_ptr;
330  --pes_header_data_bytes_;
331  --pes_packet_bytes_;
332  parse_state_ = Dts3;
333  break;
334  case Dts3:
335  timestamp_ <<= 7;
336  timestamp_ |= *read_ptr >> 1;
337  --pes_header_data_bytes_;
338  --pes_packet_bytes_;
339  parse_state_ = Dts4;
340  break;
341  case Dts4:
342  timestamp_ <<= 8;
343  timestamp_ |= *read_ptr;
344  --pes_header_data_bytes_;
345  --pes_packet_bytes_;
346  parse_state_ = Dts5;
347  break;
348  case Dts5:
349  timestamp_ <<= 7;
350  timestamp_ |= *read_ptr >> 1;
351  dts_ = timestamp_;
352  --pes_header_data_bytes_;
353  --pes_packet_bytes_;
354  parse_state_ = PesHeaderData;
355  break;
356  case PesHeaderData:
357  num_bytes = end - read_ptr;
358  if (num_bytes >= pes_header_data_bytes_) {
359  num_bytes = pes_header_data_bytes_;
360  parse_state_ = PesPayload;
361  }
362  pes_header_data_bytes_ -= num_bytes;
363  pes_packet_bytes_ -= num_bytes;
364  read_ptr += num_bytes;
365  continue;
366  case PesPayload:
367  switch (pes_stream_id_) {
368  case kPsmStreamId:
369  psm_data_.clear();
370  parse_state_ = PsmPayload;
371  continue;
372  case kPaddingStreamId:
373  parse_state_ = Padding;
374  continue;
375  case kEcmStreamId:
376  ecm_.clear();
377  parse_state_ = EcmPayload;
378  continue;
379  case kIndexStreamId:
380  parse_state_ = IndexPayload;
381  continue;
382  default:
383  if (!DemuxNextPes(false)) {
384  return false;
385  }
386  parse_state_ = EsPayload;
387  }
388  continue;
389  case PsmPayload:
390  num_bytes = end - read_ptr;
391  if (num_bytes >= pes_packet_bytes_) {
392  num_bytes = pes_packet_bytes_;
393  parse_state_ = StartCode1;
394  }
395  if (num_bytes > 0) {
396  pes_packet_bytes_ -= num_bytes;
397  prev_size = psm_data_.size();
398  psm_data_.resize(prev_size + num_bytes);
399  memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
400  }
401  read_ptr += num_bytes;
402  continue;
403  case EcmPayload:
404  num_bytes = end - read_ptr;
405  if (num_bytes >= pes_packet_bytes_) {
406  num_bytes = pes_packet_bytes_;
407  parse_state_ = StartCode1;
408  }
409  if (num_bytes > 0) {
410  pes_packet_bytes_ -= num_bytes;
411  prev_size = ecm_.size();
412  ecm_.resize(prev_size + num_bytes);
413  memcpy(&ecm_[prev_size], read_ptr, num_bytes);
414  }
415  if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
416  if (!ProcessEcm()) {
417  return(false);
418  }
419  }
420  read_ptr += num_bytes;
421  continue;
422  case IndexPayload:
423  num_bytes = end - read_ptr;
424  if (num_bytes >= pes_packet_bytes_) {
425  num_bytes = pes_packet_bytes_;
426  parse_state_ = StartCode1;
427  }
428  if (num_bytes > 0) {
429  pes_packet_bytes_ -= num_bytes;
430  prev_size = index_data_.size();
431  index_data_.resize(prev_size + num_bytes);
432  memcpy(&index_data_[prev_size], read_ptr, num_bytes);
433  }
434  if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
435  if (!metadata_is_complete_) {
436  if (!ParseIndexEntry()) {
437  return false;
438  }
439  }
440  }
441  read_ptr += num_bytes;
442  continue;
443  case EsPayload:
444  num_bytes = end - read_ptr;
445  if (num_bytes >= pes_packet_bytes_) {
446  num_bytes = pes_packet_bytes_;
447  parse_state_ = StartCode1;
448  }
449  pes_packet_bytes_ -= num_bytes;
450  if (pes_stream_id_ != kV2MetadataStreamId) {
451  sample_data_.resize(sample_data_.size() + num_bytes);
452  memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
453  num_bytes);
454  }
455  prev_pes_stream_id_ = pes_stream_id_;
456  read_ptr += num_bytes;
457  continue;
458  case Padding:
459  num_bytes = end - read_ptr;
460  if (num_bytes >= pes_packet_bytes_) {
461  num_bytes = pes_packet_bytes_;
462  parse_state_ = StartCode1;
463  }
464  pes_packet_bytes_ -= num_bytes;
465  read_ptr += num_bytes;
466  continue;
467  case ProgramEnd:
468  parse_state_ = StartCode1;
469  metadata_is_complete_ = true;
470  if (!DemuxNextPes(true)) {
471  return false;
472  }
473  Flush();
474  // Reset.
475  dts_ = pts_ = 0;
476  parse_state_ = StartCode1;
477  prev_media_sample_data_.Reset();
478  current_program_id_++;
479  ecm_.clear();
480  index_data_.clear();
481  psm_data_.clear();
482  break;
483  default:
484  break;
485  }
486  ++read_ptr;
487  }
488  return true;
489 }
490 
491 bool WvmMediaParser::EmitLastSample(uint32_t stream_id,
492  scoped_refptr<MediaSample>& new_sample) {
493  std::string key = base::UintToString(current_program_id_)
494  .append(":")
495  .append(base::UintToString(stream_id));
496  std::map<std::string, uint32_t>::iterator it =
497  program_demux_stream_map_.find(key);
498  if (it == program_demux_stream_map_.end())
499  return false;
500  return EmitSample(stream_id, (*it).second, new_sample, true);
501 }
502 
503 bool WvmMediaParser::EmitPendingSamples() {
504  // Emit queued samples which were built when not initialized.
505  while (!media_sample_queue_.empty()) {
506  DemuxStreamIdMediaSample& demux_stream_media_sample =
507  media_sample_queue_.front();
508  if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
509  demux_stream_media_sample.demux_stream_id,
510  demux_stream_media_sample.media_sample,
511  false)) {
512  return false;
513  }
514  media_sample_queue_.pop_front();
515  }
516  return true;
517 }
518 
519 void WvmMediaParser::Flush() {
520  // Flush the last audio and video sample for current program.
521  // Reset the streamID when successfully emitted.
522  if (prev_media_sample_data_.audio_sample != NULL) {
523  if (!EmitLastSample(prev_pes_stream_id_,
524  prev_media_sample_data_.audio_sample)) {
525  LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
526  << prev_pes_stream_id_;
527  }
528  }
529  if (prev_media_sample_data_.video_sample != NULL) {
530  if (!EmitLastSample(prev_pes_stream_id_,
531  prev_media_sample_data_.video_sample)) {
532  LOG(ERROR) << "Did not emit last sample for video stream with ID = "
533  << prev_pes_stream_id_;
534  }
535  }
536 }
537 
538 bool WvmMediaParser::ParseIndexEntry() {
539  // Do not parse index entry at the beginning of any track *after* the first
540  // track.
541  if (current_program_id_ > 0) {
542  return true;
543  }
544  uint32_t index_size = 0;
545  if (index_data_.size() < kIndexVersion4HeaderSize) {
546  return false;
547  }
548 
549  const uint8_t* read_ptr = vector_as_array(&index_data_);
550  if (ntohlFromBuffer(read_ptr) != kIndexMagic) {
551  index_data_.clear();
552  return false;
553  }
554  read_ptr += 4;
555 
556  uint32_t version = ntohlFromBuffer(read_ptr);
557  read_ptr += 4;
558  if (version == kVersion4) {
559  index_size = kIndexVersion4HeaderSize + ntohlFromBuffer(read_ptr);
560  if (index_data_.size() < index_size) {
561  // We do not yet have the full index. Keep accumulating index data.
562  return true;
563  }
564  read_ptr += sizeof(uint32_t);
565 
566  // Index metadata
567  uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
568  if (index_metadata_max_size < sizeof(uint8_t)) {
569  index_data_.clear();
570  return false;
571  }
572 
573  uint64_t track_duration = 0;
574  int16_t trick_play_rate = 0;
575  uint32_t sampling_frequency = kDefaultSamplingFrequency;
576  uint32_t time_scale = kMpeg2ClockRate;
577  uint16_t video_width = 0;
578  uint16_t video_height = 0;
579  uint32_t pixel_width = 0;
580  uint32_t pixel_height = 0;
581  uint8_t nalu_length_size = kNaluLengthSize;
582  uint8_t num_channels = 0;
583  int audio_pes_stream_id = 0;
584  int video_pes_stream_id = 0;
585  bool has_video = false;
586  bool has_audio = false;
587  std::vector<uint8_t> audio_codec_config;
588  std::vector<uint8_t> video_codec_config;
589  uint8_t num_index_entries = *read_ptr;
590  ++read_ptr;
591  --index_metadata_max_size;
592 
593  for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
594  if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
595  return false;
596  }
597  uint8_t tag = *read_ptr;
598  ++read_ptr;
599  uint8_t type = *read_ptr;
600  ++read_ptr;
601  uint32_t length = ntohlFromBuffer(read_ptr);
602  read_ptr += sizeof(uint32_t);
603  index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
604  if (index_metadata_max_size < length) {
605  return false;
606  }
607  int64_t value = 0;
608  Tag tagtype = Unset;
609  std::vector<uint8_t> binary_data;
610  switch (Type(type)) {
611  case Type_uint8:
612  if (length == sizeof(uint8_t)) {
613  tagtype = GetTag(tag, length, read_ptr, &value);
614  } else {
615  return false;
616  }
617  break;
618  case Type_int8:
619  if (length == sizeof(int8_t)) {
620  tagtype = GetTag(tag, length, read_ptr, &value);
621  } else {
622  return false;
623  }
624  break;
625  case Type_uint16:
626  if (length == sizeof(uint16_t)) {
627  tagtype = GetTag(tag, length, read_ptr, &value);
628  } else {
629  return false;
630  }
631  break;
632  case Type_int16:
633  if (length == sizeof(int16_t)) {
634  tagtype = GetTag(tag, length, read_ptr, &value);
635  } else {
636  return false;
637  }
638  break;
639  case Type_uint32:
640  if (length == sizeof(uint32_t)) {
641  tagtype = GetTag(tag, length, read_ptr, &value);
642  } else {
643  return false;
644  }
645  break;
646  case Type_int32:
647  if (length == sizeof(int32_t)) {
648  tagtype = GetTag(tag, length, read_ptr, &value);
649  } else {
650  return false;
651  }
652  break;
653  case Type_uint64:
654  if (length == sizeof(uint64_t)) {
655  tagtype = GetTag(tag, length, read_ptr, &value);
656  } else {
657  return false;
658  }
659  break;
660  case Type_int64:
661  if (length == sizeof(int64_t)) {
662  tagtype = GetTag(tag, length, read_ptr, &value);
663  } else {
664  return false;
665  }
666  break;
667  case Type_string:
668  case Type_BinaryData:
669  binary_data.assign(read_ptr, read_ptr + length);
670  tagtype = Tag(tag);
671  break;
672  default:
673  break;
674  }
675 
676  switch (tagtype) {
677  case TrackDuration:
678  track_duration = value;
679  break;
680  case TrackTrickPlayRate:
681  trick_play_rate = value;
682  break;
683  case VideoStreamId:
684  video_pes_stream_id = value;
685  break;
686  case AudioStreamId:
687  audio_pes_stream_id = value;
688  break;
689  case VideoWidth:
690  video_width = (uint16_t)value;
691  break;
692  case VideoHeight:
693  video_height = (uint16_t)value;
694  break;
695  case AudioNumChannels:
696  num_channels = (uint8_t)value;
697  break;
698  case VideoType:
699  has_video = true;
700  break;
701  case AudioType:
702  has_audio = true;
703  break;
704  case VideoPixelWidth:
705  pixel_width = static_cast<uint32_t>(value);
706  break;
707  case VideoPixelHeight:
708  pixel_height = static_cast<uint32_t>(value);
709  break;
710  case Audio_EsDescriptor: {
711  mp4::ESDescriptor descriptor;
712  if (!descriptor.Parse(binary_data)) {
713  LOG(ERROR) <<
714  "Could not extract AudioSpecificConfig from ES_Descriptor";
715  return false;
716  }
717  audio_codec_config = descriptor.decoder_specific_info();
718  break;
719  }
720  case Audio_EC3SpecificData:
721  case Audio_DtsSpecificData:
722  case Audio_AC3SpecificData:
723  LOG(ERROR) << "Audio type not supported.";
724  return false;
725  case AVCDecoderConfigurationRecord:
726  video_codec_config = binary_data;
727  break;
728  default:
729  break;
730  }
731 
732  read_ptr += length;
733  index_metadata_max_size -= length;
734  }
735  // End Index metadata
736  index_size = read_ptr - vector_as_array(&index_data_);
737 
738  if (has_video) {
739  VideoCodec video_codec = kCodecH264;
740  stream_infos_.push_back(new VideoStreamInfo(
741  stream_id_count_, time_scale, track_duration, video_codec,
742  std::string(), std::string(), video_width, video_height,
743  pixel_width, pixel_height, trick_play_rate, nalu_length_size,
744  vector_as_array(&video_codec_config), video_codec_config.size(),
745  true));
746  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
747  base::UintToString(video_pes_stream_id ?
748  video_pes_stream_id :
749  kDefaultVideoStreamId)] =
750  stream_id_count_++;
751  }
752  if (has_audio) {
753  AudioCodec audio_codec = kCodecAAC;
754  stream_infos_.push_back(new AudioStreamInfo(
755  stream_id_count_, time_scale, track_duration, audio_codec,
756  std::string(), std::string(), kAacSampleSizeBits, num_channels,
757  sampling_frequency, vector_as_array(&audio_codec_config),
758  audio_codec_config.size(), true));
759  program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
760  base::UintToString(audio_pes_stream_id ?
761  audio_pes_stream_id :
762  kDefaultAudioStreamId)] =
763  stream_id_count_++;
764  }
765  }
766 
767  index_program_id_++;
768  index_data_.clear();
769  return true;
770 }
771 
772 bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
773  bool output_encrypted_sample = false;
774  if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
775  // Decrypt crypto unit.
776  if (!content_decryptor_) {
777  output_encrypted_sample = true;
778  } else {
779  content_decryptor_->Decrypt(&sample_data_[crypto_unit_start_pos_],
780  sample_data_.size() - crypto_unit_start_pos_,
781  &sample_data_[crypto_unit_start_pos_]);
782  }
783  }
784  // Demux media sample if we are at program end or if we are not at a
785  // continuation PES.
786  if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
787  if (!sample_data_.empty()) {
788  if (!Output(output_encrypted_sample)) {
789  return false;
790  }
791  }
792  StartMediaSampleDemux();
793  }
794 
795  crypto_unit_start_pos_ = sample_data_.size();
796  return true;
797 }
798 
799 void WvmMediaParser::StartMediaSampleDemux() {
800  bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
801  media_sample_ = MediaSample::CreateEmptyMediaSample();
802  media_sample_->set_dts(dts_);
803  media_sample_->set_pts(pts_);
804  media_sample_->set_is_key_frame(is_key_frame);
805 
806  sample_data_.clear();
807 }
808 
809 bool WvmMediaParser::Output(bool output_encrypted_sample) {
810  if (output_encrypted_sample) {
811  media_sample_->set_data(vector_as_array(&sample_data_),
812  sample_data_.size());
813  media_sample_->set_is_encrypted(true);
814  } else {
815  if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
816  // Convert video stream to unit stream and get config.
817  std::vector<uint8_t> nal_unit_stream;
818  if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
819  vector_as_array(&sample_data_), sample_data_.size(),
820  &nal_unit_stream)) {
821  LOG(ERROR) << "Could not convert h.264 byte stream sample";
822  return false;
823  }
824  media_sample_->set_data(nal_unit_stream.data(), nal_unit_stream.size());
825  if (!is_initialized_) {
826  // Set extra data for video stream from AVC Decoder Config Record.
827  // Also, set codec string from the AVC Decoder Config Record.
828  std::vector<uint8_t> decoder_config_record;
829  byte_to_unit_stream_converter_.GetAVCDecoderConfigurationRecord(
830  &decoder_config_record);
831  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
832  if (stream_infos_[i]->stream_type() == media::kStreamVideo &&
833  stream_infos_[i]->codec_string().empty()) {
834  const std::vector<uint8_t>* stream_config;
835  if (stream_infos_[i]->extra_data().empty()) {
836  // Decoder config record not available for stream. Use the one
837  // computed from the first video stream.
838  stream_infos_[i]->set_extra_data(decoder_config_record);
839  stream_config = &decoder_config_record;
840  } else {
841  // Use stream-specific config record.
842  stream_config = &stream_infos_[i]->extra_data();
843  }
844  DCHECK(stream_config);
845  stream_infos_[i]->set_codec_string(VideoStreamInfo::GetCodecString(
846  kCodecH264, (*stream_config)[1], (*stream_config)[2],
847  (*stream_config)[3]));
848 
849  VideoStreamInfo* video_stream_info =
850  reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
851  uint32_t coded_width = 0;
852  uint32_t coded_height = 0;
853  uint32_t pixel_width = 0;
854  uint32_t pixel_height = 0;
855  if (!ExtractResolutionFromDecoderConfig(
856  vector_as_array(stream_config), stream_config->size(),
857  &coded_width, &coded_height, &pixel_width, &pixel_height)) {
858  LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
859  "Using computed configuration record instead.";
860  video_stream_info->set_extra_data(decoder_config_record);
861  if (!ExtractResolutionFromDecoderConfig(
862  vector_as_array(&decoder_config_record),
863  decoder_config_record.size(),
864  &coded_width, &coded_height, &pixel_width, &pixel_height)) {
865  LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
866  return false;
867  }
868  }
869  if (pixel_width != video_stream_info->pixel_width() ||
870  pixel_height != video_stream_info->pixel_height()) {
871  LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
872  video_stream_info->pixel_height() != 0)
873  << "Pixel aspect ratio in WVM metadata ("
874  << video_stream_info->pixel_width() << ","
875  << video_stream_info->pixel_height()
876  << ") does not match with SAR in "
877  "AVCDecoderConfigurationRecord ("
878  << pixel_width << "," << pixel_height
879  << "). Use AVCDecoderConfigurationRecord.";
880  video_stream_info->set_pixel_width(pixel_width);
881  video_stream_info->set_pixel_height(pixel_height);
882  }
883  if (coded_width != video_stream_info->width() ||
884  coded_height != video_stream_info->height()) {
885  LOG(WARNING) << "Resolution in WVM metadata ("
886  << video_stream_info->width() << ","
887  << video_stream_info->height()
888  << ") does not match with resolution in "
889  "AVCDecoderConfigurationRecord ("
890  << coded_width << "," << coded_height
891  << "). Use AVCDecoderConfigurationRecord.";
892  video_stream_info->set_width(coded_width);
893  video_stream_info->set_height(coded_height);
894  }
895  }
896  }
897  }
898  } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
899  kPesStreamIdAudio) {
900  // Set data on the audio stream.
902  vector_as_array(&sample_data_), kAdtsHeaderMinSize);
903  media::mp2t::AdtsHeader adts_header;
904  const uint8_t* frame_ptr = vector_as_array(&sample_data_);
905  if (!adts_header.Parse(frame_ptr, frame_size)) {
906  LOG(ERROR) << "Could not parse ADTS header";
907  return false;
908  }
909  size_t header_size = adts_header.GetAdtsHeaderSize(frame_ptr,
910  frame_size);
911  media_sample_->set_data(frame_ptr + header_size,
912  frame_size - header_size);
913  if (!is_initialized_) {
914  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
915  if (stream_infos_[i]->stream_type() == media::kStreamAudio &&
916  stream_infos_[i]->codec_string().empty()) {
917  AudioStreamInfo* audio_stream_info =
918  reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
919  if (audio_stream_info->extra_data().empty()) {
920  // Set AudioStreamInfo fields using information from the ADTS
921  // header.
922  audio_stream_info->set_sampling_frequency(
923  adts_header.GetSamplingFrequency());
924  std::vector<uint8_t> audio_specific_config;
925  if (!adts_header.GetAudioSpecificConfig(&audio_specific_config)) {
926  LOG(ERROR) << "Could not compute AACaudiospecificconfig";
927  return false;
928  }
929  audio_stream_info->set_extra_data(audio_specific_config);
930  audio_stream_info->set_codec_string(
932  kCodecAAC, adts_header.GetObjectType()));
933  } else {
934  // Set AudioStreamInfo fields using information from the
935  // AACAudioSpecificConfig record.
936  mp4::AACAudioSpecificConfig aac_config;
937  if (!aac_config.Parse(stream_infos_[i]->extra_data())) {
938  LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
939  return false;
940  }
941  audio_stream_info->set_sampling_frequency(aac_config.frequency());
942  audio_stream_info->set_codec_string(
944  kCodecAAC, aac_config.audio_object_type()));
945  }
946  }
947  }
948  }
949  }
950  }
951 
952  if (!is_initialized_) {
953  bool all_streams_have_config = true;
954  // Check if all collected stream infos have extra_data set.
955  for (uint32_t i = 0; i < stream_infos_.size(); i++) {
956  if (stream_infos_[i]->codec_string().empty()) {
957  all_streams_have_config = false;
958  break;
959  }
960  }
961  if (all_streams_have_config) {
962  init_cb_.Run(stream_infos_);
963  is_initialized_ = true;
964  }
965  }
966 
967  DCHECK_GT(media_sample_->data_size(), 0UL);
968  std::string key = base::UintToString(current_program_id_).append(":")
969  .append(base::UintToString(prev_pes_stream_id_));
970  std::map<std::string, uint32_t>::iterator it =
971  program_demux_stream_map_.find(key);
972  if (it == program_demux_stream_map_.end()) {
973  // TODO(ramjic): Log error message here and in other error cases through
974  // this method.
975  return false;
976  }
977  DemuxStreamIdMediaSample demux_stream_media_sample;
978  demux_stream_media_sample.parsed_audio_or_video_stream_id =
979  prev_pes_stream_id_;
980  demux_stream_media_sample.demux_stream_id = (*it).second;
981  demux_stream_media_sample.media_sample = media_sample_;
982  // Check if sample can be emitted.
983  if (!is_initialized_) {
984  media_sample_queue_.push_back(demux_stream_media_sample);
985  } else {
986  // flush the sample queue and emit all queued samples.
987  while (!media_sample_queue_.empty()) {
988  if (!EmitPendingSamples())
989  return false;
990  }
991  // Emit current sample.
992  if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
993  return false;
994  }
995  return true;
996 }
997 
998 bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
999  uint32_t stream_id,
1000  scoped_refptr<MediaSample>& new_sample,
1001  bool isLastSample) {
1002  DCHECK(new_sample);
1003  if (isLastSample) {
1004  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1005  kPesStreamIdVideo) {
1006  new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1007  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1008  kPesStreamIdAudio) {
1009  new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1010  }
1011  if (!new_sample_cb_.Run(stream_id, new_sample)) {
1012  LOG(ERROR) << "Failed to process the last sample.";
1013  return false;
1014  }
1015  return true;
1016  }
1017 
1018  // Cannot emit current sample. Compute duration first and then,
1019  // emit previous sample.
1020  if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1021  kPesStreamIdVideo) {
1022  if (prev_media_sample_data_.video_sample == NULL) {
1023  prev_media_sample_data_.video_sample = new_sample;
1024  prev_media_sample_data_.video_stream_id = stream_id;
1025  return true;
1026  }
1027  prev_media_sample_data_.video_sample->set_duration(
1028  new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1029  prev_media_sample_data_.video_sample_duration =
1030  prev_media_sample_data_.video_sample->duration();
1031  if (!new_sample_cb_.Run(prev_media_sample_data_.video_stream_id,
1032  prev_media_sample_data_.video_sample)) {
1033  LOG(ERROR) << "Failed to process the video sample.";
1034  return false;
1035  }
1036  prev_media_sample_data_.video_sample = new_sample;
1037  prev_media_sample_data_.video_stream_id = stream_id;
1038  } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1039  kPesStreamIdAudio) {
1040  if (prev_media_sample_data_.audio_sample == NULL) {
1041  prev_media_sample_data_.audio_sample = new_sample;
1042  prev_media_sample_data_.audio_stream_id = stream_id;
1043  return true;
1044  }
1045  prev_media_sample_data_.audio_sample->set_duration(
1046  new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1047  prev_media_sample_data_.audio_sample_duration =
1048  prev_media_sample_data_.audio_sample->duration();
1049  if (!new_sample_cb_.Run(prev_media_sample_data_.audio_stream_id,
1050  prev_media_sample_data_.audio_sample)) {
1051  LOG(ERROR) << "Failed to process the audio sample.";
1052  return false;
1053  }
1054  prev_media_sample_data_.audio_sample = new_sample;
1055  prev_media_sample_data_.audio_stream_id = stream_id;
1056  }
1057  return true;
1058 }
1059 
1060 bool WvmMediaParser::GetAssetKey(const uint32_t asset_id,
1061  EncryptionKey* encryption_key) {
1062  DCHECK(decryption_key_source_);
1063  Status status = decryption_key_source_->FetchKeys(asset_id);
1064  if (!status.ok()) {
1065  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1066  << ", error = " << status;
1067  return false;
1068  }
1069 
1070  status = decryption_key_source_->GetKey(KeySource::TRACK_TYPE_HD,
1071  encryption_key);
1072  if (!status.ok()) {
1073  LOG(ERROR) << "Fetch Key(s) failed for AssetID = " << asset_id
1074  << ", error = " << status;
1075  return false;
1076  }
1077 
1078  return true;
1079 }
1080 
1081 bool WvmMediaParser::ProcessEcm() {
1082  // An error will be returned later if the samples need to be decrypted.
1083  if (!decryption_key_source_)
1084  return true;
1085 
1086  if (current_program_id_ > 0) {
1087  return true;
1088  }
1089  if (ecm_.size() != kEcmSizeBytes) {
1090  LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1091  << ", expected size = " << kEcmSizeBytes;
1092  return false;
1093  }
1094  const uint8_t* ecm_data = ecm_.data();
1095  DCHECK(ecm_data);
1096  ecm_data += sizeof(uint32_t); // old version field - skip.
1097  ecm_data += sizeof(uint32_t); // clear lead - skip.
1098  ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1099  uint32_t asset_id = ntohlFromBuffer(ecm_data);
1100  if (asset_id == 0) {
1101  LOG(ERROR) << "AssetID in ECM is not valid.";
1102  return false;
1103  }
1104  ecm_data += sizeof(uint32_t); // asset_id.
1105  EncryptionKey encryption_key;
1106  if (!GetAssetKey(asset_id, &encryption_key)) {
1107  return false;
1108  }
1109  if (encryption_key.key.size() < kAssetKeySizeBytes) {
1110  LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1111  << " for AssetID = " << asset_id
1112  << " is less than minimum asset key size.";
1113  return false;
1114  }
1115  // Legacy WVM content may have asset keys > 16 bytes.
1116  // Use only the first 16 bytes of the asset key to get
1117  // the content key.
1118  std::vector<uint8_t> asset_key(
1119  encryption_key.key.begin(),
1120  encryption_key.key.begin() + kAssetKeySizeBytes);
1121  std::vector<uint8_t> iv(kInitializationVectorSizeBytes);
1122  AesCbcCtsDecryptor asset_decryptor;
1123  if (!asset_decryptor.InitializeWithIv(asset_key, iv)) {
1124  LOG(ERROR) << "Failed to initialize asset_decryptor.";
1125  return false;
1126  }
1127 
1128  const size_t content_key_buffer_size =
1129  kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1130  kEcmPaddingSizeBytes; // flags + contentKey + padding.
1131  std::vector<uint8_t> content_key_buffer(content_key_buffer_size);
1132  asset_decryptor.Decrypt(
1133  ecm_data, content_key_buffer_size, vector_as_array(&content_key_buffer));
1134 
1135  std::vector<uint8_t> decrypted_content_key_vec(
1136  content_key_buffer.begin() + 4,
1137  content_key_buffer.begin() + 20);
1138  scoped_ptr<AesCbcCtsDecryptor> content_decryptor(new AesCbcCtsDecryptor);
1139  if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec, iv)) {
1140  LOG(ERROR) << "Failed to initialize content decryptor.";
1141  return false;
1142  }
1143 
1144  content_decryptor_ = content_decryptor.Pass();
1145  return true;
1146 }
1147 
1148 DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1149  demux_stream_id(0),
1150  parsed_audio_or_video_stream_id(0) {}
1151 
1152 DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1153 
1154 PrevSampleData::PrevSampleData() {
1155  Reset();
1156 }
1157 
1158 PrevSampleData::~PrevSampleData() {}
1159 
1160 void PrevSampleData::Reset() {
1161  audio_sample = NULL;
1162  video_sample = NULL;
1163  audio_stream_id = 0;
1164  video_stream_id = 0;
1165  audio_sample_duration = 0;
1166  video_sample_duration = 0;
1167 }
1168 
1169 } // namespace wvm
1170 } // namespace media
1171 } // namespace edash_packager
static size_t GetAdtsFrameSize(const uint8_t *data, size_t num_bytes)
Definition: adts_header.cc:23
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:29
static std::string GetCodecString(VideoCodec codec, uint8_t profile, uint8_t compatible_profiles, uint8_t level)
static scoped_refptr< MediaSample > CreateEmptyMediaSample()
Create a MediaSample object with default members.
Definition: media_sample.cc:69
static std::string GetCodecString(AudioCodec codec, uint8_t audio_object_type)