DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
webm_cluster_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/webm/webm_cluster_parser.h"
6 
7 #include <vector>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/sys_byteorder.h"
11 #include "packager/media/base/decrypt_config.h"
12 #include "packager/media/base/timestamp.h"
13 #include "packager/media/filters/webvtt_util.h"
14 #include "packager/media/formats/webm/webm_constants.h"
15 #include "packager/media/formats/webm/webm_crypto_helpers.h"
16 #include "packager/media/formats/webm/webm_webvtt_parser.h"
17 
18 // Logs only while |count| < |max|, increments |count| for each log, and warns
19 // in the log if |count| has just reached |max|.
20 #define LIMITED_LOG(level, count, max) \
21  LOG_IF(level, (count) < (max)) \
22  << (((count) + 1 == (max)) \
23  ? "(Log limit reached. Further similar entries " \
24  "may be suppressed): " \
25  : "")
26 #define LIMITED_DLOG(level, count, max) \
27  DLOG_IF(level, (count) < (max)) \
28  << (((count) + 1 == (max)) \
29  ? "(Log limit reached. Further similar entries " \
30  "may be suppressed): " \
31  : "")
32 
33 namespace {
34 const int64_t kMicrosecondsPerMillisecond = 1000;
35 } // namespace
36 
37 namespace edash_packager {
38 namespace media {
39 
41  10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
42  60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
43  10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
44 
45 enum {
46  // Limits the number of LOG() calls in the path of reading encoded
47  // duration to avoid spamming for corrupted data.
48  kMaxDurationErrorLogs = 10,
49  // Limits the number of LOG() calls warning the user that buffer
50  // durations have been estimated.
51  kMaxDurationEstimateLogs = 10,
52 };
53 
54 WebMClusterParser::WebMClusterParser(
55  int64_t timecode_scale,
56  int audio_track_num,
57  int64_t audio_default_duration,
58  int video_track_num,
59  int64_t video_default_duration,
60  const WebMTracksParser::TextTracks& text_tracks,
61  const std::set<int64_t>& ignored_tracks,
62  const std::string& audio_encryption_key_id,
63  const std::string& video_encryption_key_id,
64  const AudioCodec audio_codec,
65  const MediaParser::NewSampleCB& new_sample_cb)
66  : timecode_multiplier_(timecode_scale / 1000.0),
67  ignored_tracks_(ignored_tracks),
68  audio_encryption_key_id_(audio_encryption_key_id),
69  video_encryption_key_id_(video_encryption_key_id),
70  audio_codec_(audio_codec),
71  parser_(kWebMIdCluster, this),
72  cluster_start_time_(kNoTimestamp),
73  audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
74  video_(video_track_num, true, video_default_duration, new_sample_cb) {
75  for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
76  it != text_tracks.end();
77  ++it) {
78  text_track_map_.insert(std::make_pair(
79  it->first, Track(it->first, false, kNoTimestamp, new_sample_cb)));
80  }
81 }
82 
83 WebMClusterParser::~WebMClusterParser() {}
84 
86  last_block_timecode_ = -1;
87  cluster_timecode_ = -1;
88  cluster_start_time_ = kNoTimestamp;
89  cluster_ended_ = false;
90  parser_.Reset();
91  audio_.Reset();
92  video_.Reset();
93  ResetTextTracks();
94 }
95 
96 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
97  int result = parser_.Parse(buf, size);
98 
99  if (result < 0) {
100  cluster_ended_ = false;
101  return result;
102  }
103 
104  cluster_ended_ = parser_.IsParsingComplete();
105  if (cluster_ended_) {
106  audio_.ApplyDurationEstimateIfNeeded();
107  video_.ApplyDurationEstimateIfNeeded();
108 
109  // If there were no buffers in this cluster, set the cluster start time to
110  // be the |cluster_timecode_|.
111  if (cluster_start_time_ == kNoTimestamp) {
112  // If the cluster did not even have a |cluster_timecode_|, signal parse
113  // error.
114  if (cluster_timecode_ < 0)
115  return -1;
116 
117  cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
118  }
119 
120  // Reset the parser if we're done parsing so that
121  // it is ready to accept another cluster on the next
122  // call.
123  parser_.Reset();
124 
125  last_block_timecode_ = -1;
126  cluster_timecode_ = -1;
127  }
128 
129  return result;
130 }
131 
132 int64_t WebMClusterParser::TryGetEncodedAudioDuration(
133  const uint8_t* data,
134  int size) {
135 
136  // Duration is currently read assuming the *entire* stream is unencrypted.
137  // The special "Signal Byte" prepended to Blocks in encrypted streams is
138  // assumed to not be present.
139  // TODO: Consider parsing "Signal Byte" for encrypted streams to return
140  // duration for any unencrypted blocks.
141 
142  if (audio_codec_ == kCodecOpus) {
143  return ReadOpusDuration(data, size);
144  }
145 
146  // TODO: Implement duration reading for Vorbis. See motivations in
147  // http://crbug.com/396634.
148 
149  return kNoTimestamp;
150 }
151 
152 int64_t WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) {
153  // Masks and constants for Opus packets. See
154  // https://tools.ietf.org/html/rfc6716#page-14
155  static const uint8_t kTocConfigMask = 0xf8;
156  static const uint8_t kTocFrameCountCodeMask = 0x03;
157  static const uint8_t kFrameCountMask = 0x3f;
158  static const int64_t kPacketDurationMax = 120;
159 
160  if (size < 1) {
161  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
162  << "Invalid zero-byte Opus packet; demuxed block duration may be "
163  "imprecise.";
164  return kNoTimestamp;
165  }
166 
167  // Frame count type described by last 2 bits of Opus TOC byte.
168  int frame_count_type = data[0] & kTocFrameCountCodeMask;
169 
170  int frame_count = 0;
171  switch (frame_count_type) {
172  case 0:
173  frame_count = 1;
174  break;
175  case 1:
176  case 2:
177  frame_count = 2;
178  break;
179  case 3:
180  // Type 3 indicates an arbitrary frame count described in the next byte.
181  if (size < 2) {
182  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
183  << "Second byte missing from 'Code 3' Opus packet; demuxed block "
184  "duration may be imprecise.";
185  return kNoTimestamp;
186  }
187 
188  frame_count = data[1] & kFrameCountMask;
189 
190  if (frame_count == 0) {
191  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
192  << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
193  "block duration may be imprecise.";
194  return kNoTimestamp;
195  }
196 
197  break;
198  default:
199  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
200  << "Unexpected Opus frame count type: " << frame_count_type << "; "
201  << "demuxed block duration may be imprecise.";
202  return kNoTimestamp;
203  }
204 
205  int opusConfig = (data[0] & kTocConfigMask) >> 3;
206  CHECK_GE(opusConfig, 0);
207  CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
208 
209  DCHECK_GT(frame_count, 0);
210  int64_t duration = kOpusFrameDurationsMu[opusConfig] * frame_count;
211 
212  if (duration > kPacketDurationMax) {
213  // Intentionally allowing packet to pass through for now. Decoder should
214  // either handle or fail gracefully. LOG as breadcrumbs in case
215  // things go sideways.
216  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
217  << "Warning, demuxed Opus packet with encoded duration: "
218  << duration << "ms. Should be no greater than "
219  << kPacketDurationMax << "ms.";
220  }
221 
222  return duration;
223 }
224 
225 WebMParserClient* WebMClusterParser::OnListStart(int id) {
226  if (id == kWebMIdCluster) {
227  cluster_timecode_ = -1;
228  cluster_start_time_ = kNoTimestamp;
229  } else if (id == kWebMIdBlockGroup) {
230  block_data_.reset();
231  block_data_size_ = -1;
232  block_duration_ = -1;
233  discard_padding_ = -1;
234  discard_padding_set_ = false;
235  } else if (id == kWebMIdBlockAdditions) {
236  block_add_id_ = -1;
237  block_additional_data_.reset();
238  block_additional_data_size_ = 0;
239  }
240 
241  return this;
242 }
243 
244 bool WebMClusterParser::OnListEnd(int id) {
245  if (id != kWebMIdBlockGroup)
246  return true;
247 
248  // Make sure the BlockGroup actually had a Block.
249  if (block_data_size_ == -1) {
250  LOG(ERROR) << "Block missing from BlockGroup.";
251  return false;
252  }
253 
254  bool result = ParseBlock(false, block_data_.get(), block_data_size_,
255  block_additional_data_.get(),
256  block_additional_data_size_, block_duration_,
257  discard_padding_set_ ? discard_padding_ : 0);
258  block_data_.reset();
259  block_data_size_ = -1;
260  block_duration_ = -1;
261  block_add_id_ = -1;
262  block_additional_data_.reset();
263  block_additional_data_size_ = 0;
264  discard_padding_ = -1;
265  discard_padding_set_ = false;
266  return result;
267 }
268 
269 bool WebMClusterParser::OnUInt(int id, int64_t val) {
270  int64_t* dst;
271  switch (id) {
272  case kWebMIdTimecode:
273  dst = &cluster_timecode_;
274  break;
275  case kWebMIdBlockDuration:
276  dst = &block_duration_;
277  break;
278  case kWebMIdBlockAddID:
279  dst = &block_add_id_;
280  break;
281  default:
282  return true;
283  }
284  if (*dst != -1)
285  return false;
286  *dst = val;
287  return true;
288 }
289 
290 bool WebMClusterParser::ParseBlock(bool is_simple_block,
291  const uint8_t* buf,
292  int size,
293  const uint8_t* additional,
294  int additional_size,
295  int duration,
296  int64_t discard_padding) {
297  if (size < 4)
298  return false;
299 
300  // Return an error if the trackNum > 127. We just aren't
301  // going to support large track numbers right now.
302  if (!(buf[0] & 0x80)) {
303  LOG(ERROR) << "TrackNumber over 127 not supported";
304  return false;
305  }
306 
307  int track_num = buf[0] & 0x7f;
308  int timecode = buf[1] << 8 | buf[2];
309  int flags = buf[3] & 0xff;
310  int lacing = (flags >> 1) & 0x3;
311 
312  if (lacing) {
313  LOG(ERROR) << "Lacing " << lacing << " is not supported yet.";
314  return false;
315  }
316 
317  // Sign extend negative timecode offsets.
318  if (timecode & 0x8000)
319  timecode |= ~0xffff;
320 
321  const uint8_t* frame_data = buf + 4;
322  int frame_size = size - (frame_data - buf);
323  return OnBlock(is_simple_block, track_num, timecode, duration, flags,
324  frame_data, frame_size, additional, additional_size,
325  discard_padding);
326 }
327 
328 bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
329  switch (id) {
330  case kWebMIdSimpleBlock:
331  return ParseBlock(true, data, size, NULL, 0, -1, 0);
332 
333  case kWebMIdBlock:
334  if (block_data_) {
335  LOG(ERROR) << "More than 1 Block in a BlockGroup is not "
336  "supported.";
337  return false;
338  }
339  block_data_.reset(new uint8_t[size]);
340  memcpy(block_data_.get(), data, size);
341  block_data_size_ = size;
342  return true;
343 
344  case kWebMIdBlockAdditional: {
345  uint64_t block_add_id = base::HostToNet64(block_add_id_);
346  if (block_additional_data_) {
347  // TODO: Technically, more than 1 BlockAdditional is allowed as per
348  // matroska spec. But for now we don't have a use case to support
349  // parsing of such files. Take a look at this again when such a case
350  // arises.
351  LOG(ERROR) << "More than 1 BlockAdditional in a "
352  "BlockGroup is not supported.";
353  return false;
354  }
355  // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
356  // element's value in Big Endian format. This is done to mimic ffmpeg
357  // demuxer's behavior.
358  block_additional_data_size_ = size + sizeof(block_add_id);
359  block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
360  memcpy(block_additional_data_.get(), &block_add_id,
361  sizeof(block_add_id));
362  memcpy(block_additional_data_.get() + 8, data, size);
363  return true;
364  }
365  case kWebMIdDiscardPadding: {
366  if (discard_padding_set_ || size <= 0 || size > 8)
367  return false;
368  discard_padding_set_ = true;
369 
370  // Read in the big-endian integer.
371  discard_padding_ = static_cast<int8_t>(data[0]);
372  for (int i = 1; i < size; ++i)
373  discard_padding_ = (discard_padding_ << 8) | data[i];
374 
375  return true;
376  }
377  default:
378  return true;
379  }
380 }
381 
382 bool WebMClusterParser::OnBlock(bool is_simple_block,
383  int track_num,
384  int timecode,
385  int block_duration,
386  int flags,
387  const uint8_t* data,
388  int size,
389  const uint8_t* additional,
390  int additional_size,
391  int64_t discard_padding) {
392  DCHECK_GE(size, 0);
393  if (cluster_timecode_ == -1) {
394  LOG(ERROR) << "Got a block before cluster timecode.";
395  return false;
396  }
397 
398  // TODO: Should relative negative timecode offsets be rejected? Or only when
399  // the absolute timecode is negative? See http://crbug.com/271794
400  if (timecode < 0) {
401  LOG(ERROR) << "Got a block with negative timecode offset " << timecode;
402  return false;
403  }
404 
405  if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
406  LOG(ERROR) << "Got a block with a timecode before the previous block.";
407  return false;
408  }
409 
410  Track* track = NULL;
411  StreamType stream_type = kStreamAudio;
412  std::string encryption_key_id;
413  int64_t encoded_duration = kNoTimestamp;
414  if (track_num == audio_.track_num()) {
415  track = &audio_;
416  encryption_key_id = audio_encryption_key_id_;
417  if (encryption_key_id.empty()) {
418  encoded_duration = TryGetEncodedAudioDuration(data, size);
419  }
420  } else if (track_num == video_.track_num()) {
421  track = &video_;
422  encryption_key_id = video_encryption_key_id_;
423  stream_type = kStreamVideo;
424  } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
425  return true;
426  } else if (Track* const text_track = FindTextTrack(track_num)) {
427  if (is_simple_block) // BlockGroup is required for WebVTT cues
428  return false;
429  if (block_duration < 0) // not specified
430  return false;
431  track = text_track;
432  stream_type = kStreamText;
433  } else {
434  LOG(ERROR) << "Unexpected track number " << track_num;
435  return false;
436  }
437 
438  last_block_timecode_ = timecode;
439 
440  int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
441 
442  scoped_refptr<MediaSample> buffer;
443  if (stream_type != kStreamText) {
444  // The first bit of the flags is set when a SimpleBlock contains only
445  // keyframes. If this is a Block, then inspection of the payload is
446  // necessary to determine whether it contains a keyframe or not.
447  // http://www.matroska.org/technical/specs/index.html
448  bool is_keyframe =
449  is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
450 
451  // Every encrypted Block has a signal byte and IV prepended to it. Current
452  // encrypted WebM request for comments specification is here
453  // http://wiki.webmproject.org/encryption/webm-encryption-rfc
454  scoped_ptr<DecryptConfig> decrypt_config;
455  int data_offset = 0;
456  if (!encryption_key_id.empty() &&
457  !WebMCreateDecryptConfig(
458  data, size,
459  reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
460  encryption_key_id.size(),
461  &decrypt_config, &data_offset)) {
462  return false;
463  }
464 
465  buffer = MediaSample::CopyFrom(data + data_offset, size - data_offset,
466  additional, additional_size, is_keyframe);
467 
468  if (decrypt_config) {
469  // TODO(kqyang): Decrypt it if it is encrypted.
470  buffer->set_is_encrypted(true);
471  }
472  } else {
473  std::string id, settings, content;
474  WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
475 
476  std::vector<uint8_t> side_data;
477  MakeSideData(id.begin(), id.end(),
478  settings.begin(), settings.end(),
479  &side_data);
480 
481  buffer = MediaSample::CopyFrom(
482  reinterpret_cast<const uint8_t*>(content.data()), content.length(),
483  &side_data[0], side_data.size(), true);
484  }
485 
486  buffer->set_pts(timestamp);
487  if (cluster_start_time_ == kNoTimestamp)
488  cluster_start_time_ = timestamp;
489 
490  int64_t block_duration_time_delta = kNoTimestamp;
491  if (block_duration >= 0) {
492  block_duration_time_delta = block_duration * timecode_multiplier_;
493  }
494 
495  // Prefer encoded duration over BlockGroup->BlockDuration or
496  // TrackEntry->DefaultDuration when available. This layering violation is a
497  // workaround for http://crbug.com/396634, decreasing the likelihood of
498  // fall-back to rough estimation techniques for Blocks that lack a
499  // BlockDuration at the end of a cluster. Cross cluster durations are not
500  // feasible given flexibility of cluster ordering and MSE APIs. Duration
501  // estimation may still apply in cases of encryption and codecs for which
502  // we do not extract encoded duration. Within a cluster, estimates are applied
503  // as Block Timecode deltas, or once the whole cluster is parsed in the case
504  // of the last Block in the cluster. See Track::EmitBuffer and
505  // ApplyDurationEstimateIfNeeded().
506  if (encoded_duration != kNoTimestamp) {
507  DCHECK(encoded_duration != kInfiniteDuration);
508  DCHECK(encoded_duration > 0);
509  buffer->set_duration(encoded_duration);
510 
511  DVLOG(3) << __FUNCTION__ << " : "
512  << "Using encoded duration " << encoded_duration;
513 
514  if (block_duration_time_delta != kNoTimestamp) {
515  int64_t duration_difference =
516  block_duration_time_delta - encoded_duration;
517 
518  const auto kWarnDurationDiff = timecode_multiplier_ * 2;
519  if (duration_difference > kWarnDurationDiff) {
520  LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
521  << "BlockDuration (" << block_duration_time_delta
522  << "ms) differs significantly from encoded duration ("
523  << encoded_duration << "ms).";
524  }
525  }
526  } else if (block_duration_time_delta != kNoTimestamp) {
527  buffer->set_duration(block_duration_time_delta);
528  } else {
529  buffer->set_duration(track->default_duration());
530  }
531 
532  return track->EmitBuffer(buffer);
533 }
534 
535 WebMClusterParser::Track::Track(int track_num,
536  bool is_video,
537  int64_t default_duration,
538  const MediaParser::NewSampleCB& new_sample_cb)
539  : track_num_(track_num),
540  is_video_(is_video),
541  default_duration_(default_duration),
542  estimated_next_frame_duration_(kNoTimestamp),
543  new_sample_cb_(new_sample_cb) {
544  DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
545 }
546 
547 WebMClusterParser::Track::~Track() {}
548 
549 bool WebMClusterParser::Track::EmitBuffer(
550  const scoped_refptr<MediaSample>& buffer) {
551  DVLOG(2) << "EmitBuffer() : " << track_num_
552  << " ts " << buffer->pts()
553  << " dur " << buffer->duration()
554  << " kf " << buffer->is_key_frame()
555  << " size " << buffer->data_size();
556 
557  if (last_added_buffer_missing_duration_.get()) {
558  int64_t derived_duration =
559  buffer->pts() - last_added_buffer_missing_duration_->pts();
560  last_added_buffer_missing_duration_->set_duration(derived_duration);
561 
562  DVLOG(2) << "EmitBuffer() : applied derived duration to held-back buffer : "
563  << " ts "
564  << last_added_buffer_missing_duration_->pts()
565  << " dur "
566  << last_added_buffer_missing_duration_->duration()
567  << " kf " << last_added_buffer_missing_duration_->is_key_frame()
568  << " size " << last_added_buffer_missing_duration_->data_size();
569  scoped_refptr<MediaSample> updated_buffer =
570  last_added_buffer_missing_duration_;
571  last_added_buffer_missing_duration_ = NULL;
572  if (!EmitBufferHelp(updated_buffer))
573  return false;
574  }
575 
576  if (buffer->duration() == kNoTimestamp) {
577  last_added_buffer_missing_duration_ = buffer;
578  DVLOG(2) << "EmitBuffer() : holding back buffer that is missing duration";
579  return true;
580  }
581 
582  return EmitBufferHelp(buffer);
583 }
584 
585 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
586  if (!last_added_buffer_missing_duration_.get())
587  return;
588 
589  int64_t estimated_duration = GetDurationEstimate();
590  last_added_buffer_missing_duration_->set_duration(estimated_duration);
591 
592  if (is_video_) {
593  // Exposing estimation so splicing/overlap frame processing can make
594  // informed decisions downstream.
595  // TODO(kqyang): Should we wait for the next cluster to set the duration?
596  // last_added_buffer_missing_duration_->set_is_duration_estimated(true);
597  }
598 
599  LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
600  << "Estimating WebM block duration to be "
601  << estimated_duration
602  << "ms for the last (Simple)Block in the Cluster for this Track. Use "
603  "BlockGroups with BlockDurations at the end of each Track in a "
604  "Cluster to avoid estimation.";
605 
606  DVLOG(2) << __FUNCTION__ << " new dur : ts "
607  << last_added_buffer_missing_duration_->pts()
608  << " dur "
609  << last_added_buffer_missing_duration_->duration()
610  << " kf " << last_added_buffer_missing_duration_->is_key_frame()
611  << " size " << last_added_buffer_missing_duration_->data_size();
612 
613  // Don't use the applied duration as a future estimation (don't use
614  // EmitBufferHelp() here.)
615  new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
616  last_added_buffer_missing_duration_ = NULL;
617 }
618 
619 void WebMClusterParser::Track::Reset() {
620  last_added_buffer_missing_duration_ = NULL;
621 }
622 
623 bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
624  // For now, assume that all blocks are keyframes for datatypes other than
625  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
626  if (!is_video_)
627  return true;
628 
629  // Make sure the block is big enough for the minimal keyframe header size.
630  if (size < 7)
631  return false;
632 
633  // The LSb of the first byte must be a 0 for a keyframe.
634  // http://tools.ietf.org/html/rfc6386 Section 19.1
635  if ((data[0] & 0x01) != 0)
636  return false;
637 
638  // Verify VP8 keyframe startcode.
639  // http://tools.ietf.org/html/rfc6386 Section 19.1
640  if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
641  return false;
642 
643  return true;
644 }
645 
646 bool WebMClusterParser::Track::EmitBufferHelp(
647  const scoped_refptr<MediaSample>& buffer) {
648  DCHECK(!last_added_buffer_missing_duration_.get());
649 
650  int64_t duration = buffer->duration();
651  if (duration < 0 || duration == kNoTimestamp) {
652  LOG(ERROR) << "Invalid buffer duration: " << duration;
653  return false;
654  }
655 
656  // The estimated frame duration is the minimum (for audio) or the maximum
657  // (for video) non-zero duration since the last initialization segment. The
658  // minimum is used for audio to ensure frame durations aren't overestimated,
659  // triggering unnecessary frame splicing. For video, splicing does not apply,
660  // so maximum is used and overlap is simply resolved by showing the
661  // later of the overlapping frames at its given PTS, effectively trimming down
662  // the over-estimated duration of the previous frame.
663  // TODO: Use max for audio and disable splicing whenever estimated buffers are
664  // encountered.
665  if (duration > 0) {
666  int64_t orig_duration_estimate = estimated_next_frame_duration_;
667  if (estimated_next_frame_duration_ == kNoTimestamp) {
668  estimated_next_frame_duration_ = duration;
669  } else if (is_video_) {
670  estimated_next_frame_duration_ =
671  std::max(duration, estimated_next_frame_duration_);
672  } else {
673  estimated_next_frame_duration_ =
674  std::min(duration, estimated_next_frame_duration_);
675  }
676 
677  if (orig_duration_estimate != estimated_next_frame_duration_) {
678  DVLOG(3) << "Updated duration estimate:"
679  << orig_duration_estimate
680  << " -> "
681  << estimated_next_frame_duration_
682  << " at timestamp: "
683  << buffer->dts();
684  }
685  }
686 
687  new_sample_cb_.Run(track_num_, buffer);
688  return true;
689 }
690 
691 int64_t WebMClusterParser::Track::GetDurationEstimate() {
692  int64_t duration = estimated_next_frame_duration_;
693  if (duration != kNoTimestamp) {
694  DVLOG(3) << __FUNCTION__ << " : using estimated duration";
695  } else {
696  DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
697  if (is_video_) {
698  duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
699  } else {
700  duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
701  }
702  }
703 
704  DCHECK(duration > 0);
705  DCHECK(duration != kNoTimestamp);
706  return duration;
707 }
708 
709 void WebMClusterParser::ResetTextTracks() {
710  for (TextTrackMap::iterator it = text_track_map_.begin();
711  it != text_track_map_.end();
712  ++it) {
713  it->second.Reset();
714  }
715 }
716 
717 WebMClusterParser::Track*
718 WebMClusterParser::FindTextTrack(int track_num) {
719  const TextTrackMap::iterator it = text_track_map_.find(track_num);
720 
721  if (it == text_track_map_.end())
722  return NULL;
723 
724  return &it->second;
725 }
726 
727 } // namespace media
728 } // namespace edash_packager
void Reset()
Resets the parser state so it can accept a new cluster.
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:47
static void Parse(const uint8_t *payload, int payload_size, std::string *id, std::string *settings, std::string *content)
Utility function to parse the WebVTT cue from a byte stream.
int Parse(const uint8_t *buf, int size)
base::Callback< bool(uint32_t track_id, const scoped_refptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:719
void Reset()
Resets the state of the parser so it can start parsing a new list.
Definition: webm_parser.cc:714