DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
webm_cluster_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/webm/webm_cluster_parser.h"
6 
7 #include <vector>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/vp8_parser.h"
14 #include "packager/media/filters/vp9_parser.h"
15 #include "packager/media/filters/webvtt_util.h"
16 #include "packager/media/formats/webm/webm_constants.h"
17 #include "packager/media/formats/webm/webm_crypto_helpers.h"
18 #include "packager/media/formats/webm/webm_webvtt_parser.h"
19 
20 namespace edash_packager {
21 namespace media {
22 namespace {
23 
24 const int64_t kMicrosecondsPerMillisecond = 1000;
25 
26 // Helper function used to inspect block data to determine if the
27 // block is a keyframe.
28 // |data| contains the bytes in the block.
29 // |size| indicates the number of bytes in |data|.
30 bool IsKeyframe(bool is_video,
31  VideoCodec codec,
32  const uint8_t* data,
33  int size) {
34  // For now, assume that all blocks are keyframes for datatypes other than
35  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
36  if (!is_video)
37  return true;
38 
39  switch (codec) {
40  case kCodecVP8:
41  return VP8Parser::IsKeyframe(data, size);
42  case kCodecVP9:
43  return VP9Parser::IsKeyframe(data, size);
44  default:
45  NOTIMPLEMENTED() << "Unsupported codec " << codec;
46  return false;
47  }
48 }
49 
50 } // namespace
51 
52 WebMClusterParser::WebMClusterParser(
53  int64_t timecode_scale,
54  scoped_refptr<AudioStreamInfo> audio_stream_info,
55  scoped_refptr<VideoStreamInfo> video_stream_info,
56  int64_t audio_default_duration,
57  int64_t video_default_duration,
58  const WebMTracksParser::TextTracks& text_tracks,
59  const std::set<int64_t>& ignored_tracks,
60  const std::string& audio_encryption_key_id,
61  const std::string& video_encryption_key_id,
62  const MediaParser::NewSampleCB& new_sample_cb,
63  const MediaParser::InitCB& init_cb)
64  : timecode_multiplier_(timecode_scale / 1000.0),
65  audio_stream_info_(audio_stream_info),
66  video_stream_info_(video_stream_info),
67  ignored_tracks_(ignored_tracks),
68  audio_encryption_key_id_(audio_encryption_key_id),
69  video_encryption_key_id_(video_encryption_key_id),
70  parser_(kWebMIdCluster, this),
71  initialized_(false),
72  init_cb_(init_cb),
73  cluster_start_time_(kNoTimestamp),
74  audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
75  false,
76  audio_default_duration,
77  new_sample_cb),
78  video_(video_stream_info ? video_stream_info->track_id() : -1,
79  true,
80  video_default_duration,
81  new_sample_cb) {
82  for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
83  it != text_tracks.end();
84  ++it) {
85  text_track_map_.insert(std::make_pair(
86  it->first, Track(it->first, false, kNoTimestamp, new_sample_cb)));
87  }
88 }
89 
90 WebMClusterParser::~WebMClusterParser() {}
91 
93  last_block_timecode_ = -1;
94  cluster_timecode_ = -1;
95  cluster_start_time_ = kNoTimestamp;
96  cluster_ended_ = false;
97  parser_.Reset();
98  audio_.Reset();
99  video_.Reset();
100  ResetTextTracks();
101 }
102 
104  // Estimate the duration of the last frame if necessary.
105  audio_.ApplyDurationEstimateIfNeeded();
106  video_.ApplyDurationEstimateIfNeeded();
107  Reset();
108 }
109 
110 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
111  int result = parser_.Parse(buf, size);
112 
113  if (result < 0) {
114  cluster_ended_ = false;
115  return result;
116  }
117 
118  cluster_ended_ = parser_.IsParsingComplete();
119  if (cluster_ended_) {
120  // If there were no buffers in this cluster, set the cluster start time to
121  // be the |cluster_timecode_|.
122  if (cluster_start_time_ == kNoTimestamp) {
123  // If the cluster did not even have a |cluster_timecode_|, signal parse
124  // error.
125  if (cluster_timecode_ < 0)
126  return -1;
127 
128  cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
129  }
130 
131  // Reset the parser if we're done parsing so that
132  // it is ready to accept another cluster on the next
133  // call.
134  parser_.Reset();
135 
136  last_block_timecode_ = -1;
137  cluster_timecode_ = -1;
138  }
139 
140  return result;
141 }
142 
143 WebMParserClient* WebMClusterParser::OnListStart(int id) {
144  if (id == kWebMIdCluster) {
145  cluster_timecode_ = -1;
146  cluster_start_time_ = kNoTimestamp;
147  } else if (id == kWebMIdBlockGroup) {
148  block_data_.reset();
149  block_data_size_ = -1;
150  block_duration_ = -1;
151  discard_padding_ = -1;
152  discard_padding_set_ = false;
153  } else if (id == kWebMIdBlockAdditions) {
154  block_add_id_ = -1;
155  block_additional_data_.reset();
156  block_additional_data_size_ = 0;
157  }
158 
159  return this;
160 }
161 
162 bool WebMClusterParser::OnListEnd(int id) {
163  if (id != kWebMIdBlockGroup)
164  return true;
165 
166  // Make sure the BlockGroup actually had a Block.
167  if (block_data_size_ == -1) {
168  LOG(ERROR) << "Block missing from BlockGroup.";
169  return false;
170  }
171 
172  bool result = ParseBlock(false, block_data_.get(), block_data_size_,
173  block_additional_data_.get(),
174  block_additional_data_size_, block_duration_,
175  discard_padding_set_ ? discard_padding_ : 0);
176  block_data_.reset();
177  block_data_size_ = -1;
178  block_duration_ = -1;
179  block_add_id_ = -1;
180  block_additional_data_.reset();
181  block_additional_data_size_ = 0;
182  discard_padding_ = -1;
183  discard_padding_set_ = false;
184  return result;
185 }
186 
187 bool WebMClusterParser::OnUInt(int id, int64_t val) {
188  int64_t* dst;
189  switch (id) {
190  case kWebMIdTimecode:
191  dst = &cluster_timecode_;
192  break;
193  case kWebMIdBlockDuration:
194  dst = &block_duration_;
195  break;
196  case kWebMIdBlockAddID:
197  dst = &block_add_id_;
198  break;
199  default:
200  return true;
201  }
202  if (*dst != -1)
203  return false;
204  *dst = val;
205  return true;
206 }
207 
208 bool WebMClusterParser::ParseBlock(bool is_simple_block,
209  const uint8_t* buf,
210  int size,
211  const uint8_t* additional,
212  int additional_size,
213  int duration,
214  int64_t discard_padding) {
215  if (size < 4)
216  return false;
217 
218  // Return an error if the trackNum > 127. We just aren't
219  // going to support large track numbers right now.
220  if (!(buf[0] & 0x80)) {
221  LOG(ERROR) << "TrackNumber over 127 not supported";
222  return false;
223  }
224 
225  int track_num = buf[0] & 0x7f;
226  int timecode = buf[1] << 8 | buf[2];
227  int flags = buf[3] & 0xff;
228  int lacing = (flags >> 1) & 0x3;
229 
230  if (lacing) {
231  LOG(ERROR) << "Lacing " << lacing << " is not supported yet.";
232  return false;
233  }
234 
235  // Sign extend negative timecode offsets.
236  if (timecode & 0x8000)
237  timecode |= ~0xffff;
238 
239  const uint8_t* frame_data = buf + 4;
240  int frame_size = size - (frame_data - buf);
241  return OnBlock(is_simple_block, track_num, timecode, duration, flags,
242  frame_data, frame_size, additional, additional_size,
243  discard_padding);
244 }
245 
246 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
247  switch (id) {
248  case kWebMIdSimpleBlock:
249  return ParseBlock(true, data, size, NULL, 0, -1, 0);
250 
251  case kWebMIdBlock:
252  if (block_data_) {
253  LOG(ERROR) << "More than 1 Block in a BlockGroup is not "
254  "supported.";
255  return false;
256  }
257  block_data_.reset(new uint8_t[size]);
258  memcpy(block_data_.get(), data, size);
259  block_data_size_ = size;
260  return true;
261 
262  case kWebMIdBlockAdditional: {
263  uint64_t block_add_id = base::HostToNet64(block_add_id_);
264  if (block_additional_data_) {
265  // TODO: Technically, more than 1 BlockAdditional is allowed as per
266  // matroska spec. But for now we don't have a use case to support
267  // parsing of such files. Take a look at this again when such a case
268  // arises.
269  LOG(ERROR) << "More than 1 BlockAdditional in a "
270  "BlockGroup is not supported.";
271  return false;
272  }
273  // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
274  // element's value in Big Endian format. This is done to mimic ffmpeg
275  // demuxer's behavior.
276  block_additional_data_size_ = size + sizeof(block_add_id);
277  block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
278  memcpy(block_additional_data_.get(), &block_add_id,
279  sizeof(block_add_id));
280  memcpy(block_additional_data_.get() + 8, data, size);
281  return true;
282  }
283  case kWebMIdDiscardPadding: {
284  if (discard_padding_set_ || size <= 0 || size > 8)
285  return false;
286  discard_padding_set_ = true;
287 
288  // Read in the big-endian integer.
289  discard_padding_ = static_cast<int8_t>(data[0]);
290  for (int i = 1; i < size; ++i)
291  discard_padding_ = (discard_padding_ << 8) | data[i];
292 
293  return true;
294  }
295  default:
296  return true;
297  }
298 }
299 
300 bool WebMClusterParser::OnBlock(bool is_simple_block,
301  int track_num,
302  int timecode,
303  int block_duration,
304  int flags,
305  const uint8_t* data,
306  int size,
307  const uint8_t* additional,
308  int additional_size,
309  int64_t discard_padding) {
310  DCHECK_GE(size, 0);
311  if (cluster_timecode_ == -1) {
312  LOG(ERROR) << "Got a block before cluster timecode.";
313  return false;
314  }
315 
316  // TODO: Should relative negative timecode offsets be rejected? Or only when
317  // the absolute timecode is negative? See http://crbug.com/271794
318  if (timecode < 0) {
319  LOG(ERROR) << "Got a block with negative timecode offset " << timecode;
320  return false;
321  }
322 
323  if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
324  LOG(ERROR) << "Got a block with a timecode before the previous block.";
325  return false;
326  }
327 
328  Track* track = NULL;
329  StreamType stream_type = kStreamAudio;
330  std::string encryption_key_id;
331  if (track_num == audio_.track_num()) {
332  track = &audio_;
333  encryption_key_id = audio_encryption_key_id_;
334  } else if (track_num == video_.track_num()) {
335  track = &video_;
336  encryption_key_id = video_encryption_key_id_;
337  stream_type = kStreamVideo;
338  } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
339  return true;
340  } else if (Track* const text_track = FindTextTrack(track_num)) {
341  if (is_simple_block) // BlockGroup is required for WebVTT cues
342  return false;
343  if (block_duration < 0) // not specified
344  return false;
345  track = text_track;
346  stream_type = kStreamText;
347  } else {
348  LOG(ERROR) << "Unexpected track number " << track_num;
349  return false;
350  }
351 
352  last_block_timecode_ = timecode;
353 
354  int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
355 
356  scoped_refptr<MediaSample> buffer;
357  if (stream_type != kStreamText) {
358  // The first bit of the flags is set when a SimpleBlock contains only
359  // keyframes. If this is a Block, then inspection of the payload is
360  // necessary to determine whether it contains a keyframe or not.
361  // http://www.matroska.org/technical/specs/index.html
362  bool is_keyframe =
363  is_simple_block
364  ? (flags & 0x80) != 0
365  : IsKeyframe(stream_type == kStreamVideo,
366  video_stream_info_ ? video_stream_info_->codec()
367  : kUnknownVideoCodec,
368  data, size);
369 
370  // Every encrypted Block has a signal byte and IV prepended to it. Current
371  // encrypted WebM request for comments specification is here
372  // http://wiki.webmproject.org/encryption/webm-encryption-rfc
373  scoped_ptr<DecryptConfig> decrypt_config;
374  int data_offset = 0;
375  if (!encryption_key_id.empty() &&
376  !WebMCreateDecryptConfig(
377  data, size,
378  reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
379  encryption_key_id.size(),
380  &decrypt_config, &data_offset)) {
381  return false;
382  }
383 
384  buffer = MediaSample::CopyFrom(data + data_offset, size - data_offset,
385  additional, additional_size, is_keyframe);
386 
387  if (decrypt_config) {
388  // TODO(kqyang): Decrypt it if it is encrypted.
389  buffer->set_is_encrypted(true);
390  }
391  } else {
392  std::string id, settings, content;
393  WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
394 
395  std::vector<uint8_t> side_data;
396  MakeSideData(id.begin(), id.end(),
397  settings.begin(), settings.end(),
398  &side_data);
399 
400  buffer = MediaSample::CopyFrom(
401  reinterpret_cast<const uint8_t*>(content.data()), content.length(),
402  &side_data[0], side_data.size(), true);
403  }
404 
405  buffer->set_dts(timestamp);
406  buffer->set_pts(timestamp);
407  if (cluster_start_time_ == kNoTimestamp)
408  cluster_start_time_ = timestamp;
409  buffer->set_duration(block_duration > 0
410  ? (block_duration * timecode_multiplier_)
411  : kNoTimestamp);
412 
413  if (!init_cb_.is_null() && !initialized_) {
414  std::vector<scoped_refptr<StreamInfo>> streams;
415  if (audio_stream_info_)
416  streams.push_back(audio_stream_info_);
417  if (video_stream_info_) {
418  if (stream_type == kStreamVideo) {
419  scoped_ptr<VPxParser> vpx_parser;
420  switch (video_stream_info_->codec()) {
421  case kCodecVP8:
422  vpx_parser.reset(new VP8Parser);
423  break;
424  case kCodecVP9:
425  vpx_parser.reset(new VP9Parser);
426  break;
427  default:
428  NOTIMPLEMENTED() << "Unsupported codec "
429  << video_stream_info_->codec();
430  return false;
431  }
432  std::vector<VPxFrameInfo> vpx_frames;
433  if (!vpx_parser->Parse(buffer->data(), buffer->data_size(),
434  &vpx_frames)) {
435  LOG(ERROR) << "Failed to parse vpx frame.";
436  return false;
437  }
438  if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
439  LOG(ERROR) << "The first frame should be a key frame.";
440  return false;
441  }
442 
443  const VPCodecConfiguration* codec_config = &vpx_parser->codec_config();
444  video_stream_info_->set_codec_string(
445  codec_config->GetCodecString(video_stream_info_->codec()));
446  std::vector<uint8_t> extra_data;
447  codec_config->Write(&extra_data);
448  video_stream_info_->set_extra_data(extra_data);
449  streams.push_back(video_stream_info_);
450  init_cb_.Run(streams);
451  initialized_ = true;
452  }
453  } else {
454  init_cb_.Run(streams);
455  initialized_ = true;
456  }
457  }
458 
459  return track->EmitBuffer(buffer);
460 }
461 
462 WebMClusterParser::Track::Track(int track_num,
463  bool is_video,
464  int64_t default_duration,
465  const MediaParser::NewSampleCB& new_sample_cb)
466  : track_num_(track_num),
467  is_video_(is_video),
468  default_duration_(default_duration),
469  estimated_next_frame_duration_(kNoTimestamp),
470  new_sample_cb_(new_sample_cb) {
471  DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
472 }
473 
474 WebMClusterParser::Track::~Track() {}
475 
476 bool WebMClusterParser::Track::EmitBuffer(
477  const scoped_refptr<MediaSample>& buffer) {
478  DVLOG(2) << "EmitBuffer() : " << track_num_
479  << " ts " << buffer->pts()
480  << " dur " << buffer->duration()
481  << " kf " << buffer->is_key_frame()
482  << " size " << buffer->data_size();
483 
484  if (last_added_buffer_missing_duration_.get()) {
485  int64_t derived_duration =
486  buffer->pts() - last_added_buffer_missing_duration_->pts();
487  last_added_buffer_missing_duration_->set_duration(derived_duration);
488 
489  DVLOG(2) << "EmitBuffer() : applied derived duration to held-back buffer : "
490  << " ts "
491  << last_added_buffer_missing_duration_->pts()
492  << " dur "
493  << last_added_buffer_missing_duration_->duration()
494  << " kf " << last_added_buffer_missing_duration_->is_key_frame()
495  << " size " << last_added_buffer_missing_duration_->data_size();
496  scoped_refptr<MediaSample> updated_buffer =
497  last_added_buffer_missing_duration_;
498  last_added_buffer_missing_duration_ = NULL;
499  if (!EmitBufferHelp(updated_buffer))
500  return false;
501  }
502 
503  if (buffer->duration() == kNoTimestamp) {
504  last_added_buffer_missing_duration_ = buffer;
505  DVLOG(2) << "EmitBuffer() : holding back buffer that is missing duration";
506  return true;
507  }
508 
509  return EmitBufferHelp(buffer);
510 }
511 
512 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
513  if (!last_added_buffer_missing_duration_.get())
514  return;
515 
516  int64_t estimated_duration = GetDurationEstimate();
517  last_added_buffer_missing_duration_->set_duration(estimated_duration);
518 
519  VLOG(1) << "Track " << track_num_ << ": Estimating WebM block duration to be "
520  << estimated_duration / 1000
521  << "ms for the last (Simple)Block in the Cluster for this Track. Use "
522  "BlockGroups with BlockDurations at the end of each Track in a "
523  "Cluster to avoid estimation.";
524 
525  DVLOG(2) << " new dur : ts " << last_added_buffer_missing_duration_->pts()
526  << " dur " << last_added_buffer_missing_duration_->duration()
527  << " kf " << last_added_buffer_missing_duration_->is_key_frame()
528  << " size " << last_added_buffer_missing_duration_->data_size();
529 
530  // Don't use the applied duration as a future estimation (don't use
531  // EmitBufferHelp() here.)
532  new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
533  last_added_buffer_missing_duration_ = NULL;
534 }
535 
536 void WebMClusterParser::Track::Reset() {
537  last_added_buffer_missing_duration_ = NULL;
538 }
539 
540 bool WebMClusterParser::Track::EmitBufferHelp(
541  const scoped_refptr<MediaSample>& buffer) {
542  DCHECK(!last_added_buffer_missing_duration_.get());
543 
544  int64_t duration = buffer->duration();
545  if (duration < 0 || duration == kNoTimestamp) {
546  LOG(ERROR) << "Invalid buffer duration: " << duration;
547  return false;
548  }
549 
550  // The estimated frame duration is the maximum non-zero duration since the
551  // last initialization segment.
552  if (duration > 0) {
553  int64_t orig_duration_estimate = estimated_next_frame_duration_;
554  if (estimated_next_frame_duration_ == kNoTimestamp) {
555  estimated_next_frame_duration_ = duration;
556  } else {
557  estimated_next_frame_duration_ =
558  std::max(duration, estimated_next_frame_duration_);
559  }
560 
561  if (orig_duration_estimate != estimated_next_frame_duration_) {
562  DVLOG(3) << "Updated duration estimate:"
563  << orig_duration_estimate
564  << " -> "
565  << estimated_next_frame_duration_
566  << " at timestamp: "
567  << buffer->dts();
568  }
569  }
570 
571  new_sample_cb_.Run(track_num_, buffer);
572  return true;
573 }
574 
575 int64_t WebMClusterParser::Track::GetDurationEstimate() {
576  int64_t duration = kNoTimestamp;
577  if (default_duration_ != kNoTimestamp) {
578  duration = default_duration_;
579  DVLOG(3) << __FUNCTION__ << " : using track default duration " << duration;
580  } else if (estimated_next_frame_duration_ != kNoTimestamp) {
581  duration = estimated_next_frame_duration_;
582  DVLOG(3) << __FUNCTION__ << " : using estimated duration " << duration;
583  } else {
584  if (is_video_) {
585  duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
586  } else {
587  duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
588  }
589  DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration "
590  << duration;
591  }
592 
593  DCHECK_GT(duration, 0);
594  DCHECK_NE(duration, kNoTimestamp);
595  return duration;
596 }
597 
598 void WebMClusterParser::ResetTextTracks() {
599  for (TextTrackMap::iterator it = text_track_map_.begin();
600  it != text_track_map_.end();
601  ++it) {
602  it->second.Reset();
603  }
604 }
605 
606 WebMClusterParser::Track*
607 WebMClusterParser::FindTextTrack(int track_num) {
608  const TextTrackMap::iterator it = text_track_map_.find(track_num);
609 
610  if (it == text_track_map_.end())
611  return NULL;
612 
613  return &it->second;
614 }
615 
616 } // namespace media
617 } // namespace edash_packager
void Reset()
Resets the parser state so it can accept a new cluster.
static bool IsKeyframe(const uint8_t *data, size_t data_size)
Definition: vp8_parser.cc:183
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45
static void Parse(const uint8_t *payload, int payload_size, std::string *id, std::string *settings, std::string *content)
Utility function to parse the WebVTT cue from a byte stream.
int Parse(const uint8_t *buf, int size)
base::Callback< bool(uint32_t track_id, const scoped_refptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43
base::Callback< void(const std::vector< scoped_refptr< StreamInfo > > &stream_info)> InitCB
Definition: media_parser.h:34
static bool IsKeyframe(const uint8_t *data, size_t data_size)
Definition: vp9_parser.cc:534
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:719
void Reset()
Resets the state of the parser so it can start parsing a new list.
Definition: webm_parser.cc:714