shaka-packager/packager/media/formats/webm/webm_cluster_parser.cc

// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "packager/media/formats/webm/webm_cluster_parser.h"

#include <vector>

#include "packager/base/logging.h"
#include "packager/base/sys_byteorder.h"
#include "packager/media/base/decrypt_config.h"
#include "packager/media/base/timestamp_constants.h"
#include "packager/media/filters/webvtt_util.h"
#include "packager/media/formats/webm/webm_constants.h"
#include "packager/media/formats/webm/webm_crypto_helpers.h"
#include "packager/media/formats/webm/webm_webvtt_parser.h"

// Logs only while |count| < |max|, increments |count| for each log, and warns
// in the log if |count| has just reached |max|.
#define LIMITED_LOG(level, count, max)                         \
  LOG_IF(level, (count) < (max))                               \
      << (((count) + 1 == (max))                               \
              ? "(Log limit reached. Further similar entries " \
                "may be suppressed): "                         \
              : "")
#define LIMITED_DLOG(level, count, max)                        \
  DLOG_IF(level, (count) < (max))                              \
      << (((count) + 1 == (max))                               \
              ? "(Log limit reached. Further similar entries " \
                "may be suppressed): "                         \
              : "")

namespace edash_packager {
namespace media {

const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
    10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
    60000, 10000, 20000, 10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,
    10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,  10000, 20000};

enum {
  // Limits the number of LOG() calls in the path of reading encoded
  // duration to avoid spamming for corrupted data.
  kMaxDurationErrorLogs = 10,
  // Limits the number of LOG() calls warning the user that buffer
  // durations have been estimated.
  kMaxDurationEstimateLogs = 10,
};

WebMClusterParser::WebMClusterParser(
    int64_t timecode_scale,
    int audio_track_num,
    base::TimeDelta audio_default_duration,
    int video_track_num,
    base::TimeDelta video_default_duration,
    const WebMTracksParser::TextTracks& text_tracks,
    const std::set<int64_t>& ignored_tracks,
    const std::string& audio_encryption_key_id,
    const std::string& video_encryption_key_id,
    const AudioCodec audio_codec)
    : timecode_multiplier_(timecode_scale / 1000.0),
      ignored_tracks_(ignored_tracks),
      audio_encryption_key_id_(audio_encryption_key_id),
      video_encryption_key_id_(video_encryption_key_id),
      audio_codec_(audio_codec),
      parser_(kWebMIdCluster, this),
      cluster_start_time_(kNoTimestamp()),
      audio_(audio_track_num, false, audio_default_duration),
      video_(video_track_num, true, video_default_duration),
      ready_buffer_upper_bound_(kNoDecodeTimestamp()) {
  for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
       it != text_tracks.end();
       ++it) {
    text_track_map_.insert(
        std::make_pair(it->first, Track(it->first, false, kNoTimestamp())));
  }
}

WebMClusterParser::~WebMClusterParser() {}

void WebMClusterParser::Reset() {
  last_block_timecode_ = -1;
  cluster_timecode_ = -1;
  cluster_start_time_ = kNoTimestamp();
  cluster_ended_ = false;
  parser_.Reset();
  audio_.Reset();
  video_.Reset();
  ResetTextTracks();
  ready_buffer_upper_bound_ = kNoDecodeTimestamp();
}

int WebMClusterParser::Parse(const uint8_t* buf, int size) {
  audio_.ClearReadyBuffers();
  video_.ClearReadyBuffers();
  ClearTextTrackReadyBuffers();
  ready_buffer_upper_bound_ = kNoDecodeTimestamp();

  int result = parser_.Parse(buf, size);

  if (result < 0) {
    cluster_ended_ = false;
    return result;
  }

  cluster_ended_ = parser_.IsParsingComplete();
  if (cluster_ended_) {
    // If there were no buffers in this cluster, set the cluster start time to
    // be the |cluster_timecode_|.
    if (cluster_start_time_ == kNoTimestamp()) {
      // If the cluster did not even have a |cluster_timecode_|, signal parse
      // error.
      if (cluster_timecode_ < 0)
        return -1;

      cluster_start_time_ = base::TimeDelta::FromMicroseconds(
          cluster_timecode_ * timecode_multiplier_);
    }

    // Reset the parser if we're done parsing so that
    // it is ready to accept another cluster on the next
    // call.
    parser_.Reset();

    last_block_timecode_ = -1;
    cluster_timecode_ = -1;
  }

  return result;
}

const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
    UpdateReadyBuffers();

  return audio_.ready_buffers();
}

const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
    UpdateReadyBuffers();

  return video_.ready_buffers();
}

const WebMClusterParser::TextBufferQueueMap&
WebMClusterParser::GetTextBuffers() {
  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
    UpdateReadyBuffers();

  // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
  // the output only for non-empty ready_buffer() queues in |text_track_map_|.
  text_buffers_map_.clear();
  for (TextTrackMap::const_iterator itr = text_track_map_.begin();
       itr != text_track_map_.end();
       ++itr) {
    const BufferQueue& text_buffers = itr->second.ready_buffers();
    if (!text_buffers.empty())
      text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
  }

  return text_buffers_map_;
}

base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
    const uint8_t* data,
    int size) {

  // Duration is currently read assuming the *entire* stream is unencrypted.
  // The special "Signal Byte" prepended to Blocks in encrypted streams is
  // assumed to not be present.
  // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
  // to return duration for any unencrypted blocks.

  if (audio_codec_ == kCodecOpus) {
    return ReadOpusDuration(data, size);
  }

  // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
  // motivations in http://crbug.com/396634.

  return kNoTimestamp();
}

base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
                                                    int size) {
  // Masks and constants for Opus packets. See
  // https://tools.ietf.org/html/rfc6716#page-14
  static const uint8_t kTocConfigMask = 0xf8;
  static const uint8_t kTocFrameCountCodeMask = 0x03;
  static const uint8_t kFrameCountMask = 0x3f;
  static const base::TimeDelta kPacketDurationMax =
      base::TimeDelta::FromMilliseconds(120);

  if (size < 1) {
    LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
        << "Invalid zero-byte Opus packet; demuxed block duration may be "
           "imprecise.";
    return kNoTimestamp();
  }

  // Frame count type described by last 2 bits of Opus TOC byte.
  int frame_count_type = data[0] & kTocFrameCountCodeMask;

  int frame_count = 0;
  switch (frame_count_type) {
    case 0:
      frame_count = 1;
      break;
    case 1:
    case 2:
      frame_count = 2;
      break;
    case 3:
      // Type 3 indicates an arbitrary frame count described in the next byte.
      if (size < 2) {
        LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
            << "Second byte missing from 'Code 3' Opus packet; demuxed block "
               "duration may be imprecise.";
        return kNoTimestamp();
      }

      frame_count = data[1] & kFrameCountMask;

      if (frame_count == 0) {
        LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
            << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
               "block duration may be imprecise.";
        return kNoTimestamp();
      }

      break;
    default:
      LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
          << "Unexpected Opus frame count type: " << frame_count_type << "; "
          << "demuxed block duration may be imprecise.";
      return kNoTimestamp();
  }

  int opusConfig = (data[0] & kTocConfigMask) >> 3;
  CHECK_GE(opusConfig, 0);
  CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));

  DCHECK_GT(frame_count, 0);
  base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
      kOpusFrameDurationsMu[opusConfig] * frame_count);

  if (duration > kPacketDurationMax) {
    // Intentionally allowing packet to pass through for now. Decoder should
    // either handle or fail gracefully. LOG as breadcrumbs in case
    // things go sideways.
    LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
        << "Warning, demuxed Opus packet with encoded duration: "
        << duration.InMilliseconds() << "ms. Should be no greater than "
        << kPacketDurationMax.InMilliseconds() << "ms.";
  }

  return duration;
}

WebMParserClient* WebMClusterParser::OnListStart(int id) {
  if (id == kWebMIdCluster) {
    cluster_timecode_ = -1;
    cluster_start_time_ = kNoTimestamp();
  } else if (id == kWebMIdBlockGroup) {
    block_data_.reset();
    block_data_size_ = -1;
    block_duration_ = -1;
    discard_padding_ = -1;
    discard_padding_set_ = false;
  } else if (id == kWebMIdBlockAdditions) {
    block_add_id_ = -1;
    block_additional_data_.reset();
    block_additional_data_size_ = 0;
  }

  return this;
}

bool WebMClusterParser::OnListEnd(int id) {
  if (id != kWebMIdBlockGroup)
    return true;

  // Make sure the BlockGroup actually had a Block.
  if (block_data_size_ == -1) {
    LOG(ERROR) << "Block missing from BlockGroup.";
    return false;
  }

  bool result = ParseBlock(false, block_data_.get(), block_data_size_,
                           block_additional_data_.get(),
                           block_additional_data_size_, block_duration_,
                           discard_padding_set_ ? discard_padding_ : 0);
  block_data_.reset();
  block_data_size_ = -1;
  block_duration_ = -1;
  block_add_id_ = -1;
  block_additional_data_.reset();
  block_additional_data_size_ = 0;
  discard_padding_ = -1;
  discard_padding_set_ = false;
  return result;
}

bool WebMClusterParser::OnUInt(int id, int64_t val) {
  int64_t* dst;
  switch (id) {
    case kWebMIdTimecode:
      dst = &cluster_timecode_;
      break;
    case kWebMIdBlockDuration:
      dst = &block_duration_;
      break;
    case kWebMIdBlockAddID:
      dst = &block_add_id_;
      break;
    default:
      return true;
  }
  if (*dst != -1)
    return false;
  *dst = val;
  return true;
}

bool WebMClusterParser::ParseBlock(bool is_simple_block,
                                   const uint8_t* buf,
                                   int size,
                                   const uint8_t* additional,
                                   int additional_size,
                                   int duration,
                                   int64_t discard_padding) {
  if (size < 4)
    return false;

  // Return an error if the trackNum > 127. We just aren't
  // going to support large track numbers right now.
  if (!(buf[0] & 0x80)) {
    LOG(ERROR) << "TrackNumber over 127 not supported";
    return false;
  }

  int track_num = buf[0] & 0x7f;
  int timecode = buf[1] << 8 | buf[2];
  int flags = buf[3] & 0xff;
  int lacing = (flags >> 1) & 0x3;

  if (lacing) {
    LOG(ERROR) << "Lacing " << lacing << " is not supported yet.";
    return false;
  }

  // Sign extend negative timecode offsets.
  if (timecode & 0x8000)
    timecode |= ~0xffff;

  const uint8_t* frame_data = buf + 4;
  int frame_size = size - (frame_data - buf);
  return OnBlock(is_simple_block, track_num, timecode, duration, flags,
                 frame_data, frame_size, additional, additional_size,
                 discard_padding);
}

bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
  switch (id) {
    case kWebMIdSimpleBlock:
      return ParseBlock(true, data, size, NULL, 0, -1, 0);

    case kWebMIdBlock:
      if (block_data_) {
        LOG(ERROR) << "More than 1 Block in a BlockGroup is not "
                      "supported.";
        return false;
      }
      block_data_.reset(new uint8_t[size]);
      memcpy(block_data_.get(), data, size);
      block_data_size_ = size;
      return true;

    case kWebMIdBlockAdditional: {
      uint64_t block_add_id = base::HostToNet64(block_add_id_);
      if (block_additional_data_) {
        // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
        // as per matroska spec. But for now we don't have a use case to
        // support parsing of such files. Take a look at this again when such a
        // case arises.
        LOG(ERROR) << "More than 1 BlockAdditional in a "
                      "BlockGroup is not supported.";
        return false;
      }
      // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
      // element's value in Big Endian format. This is done to mimic ffmpeg
      // demuxer's behavior.
      block_additional_data_size_ = size + sizeof(block_add_id);
      block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
      memcpy(block_additional_data_.get(), &block_add_id,
             sizeof(block_add_id));
      memcpy(block_additional_data_.get() + 8, data, size);
      return true;
    }
    case kWebMIdDiscardPadding: {
      if (discard_padding_set_ || size <= 0 || size > 8)
        return false;
      discard_padding_set_ = true;

      // Read in the big-endian integer.
      discard_padding_ = static_cast<int8_t>(data[0]);
      for (int i = 1; i < size; ++i)
        discard_padding_ = (discard_padding_ << 8) | data[i];

      return true;
    }
    default:
      return true;
  }
}

bool WebMClusterParser::OnBlock(bool is_simple_block,
                                int track_num,
                                int timecode,
                                int block_duration,
                                int flags,
                                const uint8_t* data,
                                int size,
                                const uint8_t* additional,
                                int additional_size,
                                int64_t discard_padding) {
  DCHECK_GE(size, 0);
  if (cluster_timecode_ == -1) {
    LOG(ERROR) << "Got a block before cluster timecode.";
    return false;
  }

  // TODO(acolwell): Should relative negative timecode offsets be rejected?  Or
  // only when the absolute timecode is negative?  See http://crbug.com/271794
  if (timecode < 0) {
    LOG(ERROR) << "Got a block with negative timecode offset " << timecode;
    return false;
  }

  if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
    LOG(ERROR) << "Got a block with a timecode before the previous block.";
    return false;
  }

  Track* track = NULL;
  StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
  std::string encryption_key_id;
  base::TimeDelta encoded_duration = kNoTimestamp();
  if (track_num == audio_.track_num()) {
    track = &audio_;
    encryption_key_id = audio_encryption_key_id_;
    if (encryption_key_id.empty()) {
      encoded_duration = TryGetEncodedAudioDuration(data, size);
    }
  } else if (track_num == video_.track_num()) {
    track = &video_;
    encryption_key_id = video_encryption_key_id_;
    buffer_type = DemuxerStream::VIDEO;
  } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
    return true;
  } else if (Track* const text_track = FindTextTrack(track_num)) {
    if (is_simple_block)  // BlockGroup is required for WebVTT cues
      return false;
    if (block_duration < 0)  // not specified
      return false;
    track = text_track;
    buffer_type = DemuxerStream::TEXT;
  } else {
    LOG(ERROR) << "Unexpected track number " << track_num;
    return false;
  }

  last_block_timecode_ = timecode;

  base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
      (cluster_timecode_ + timecode) * timecode_multiplier_);

  scoped_refptr<StreamParserBuffer> buffer;
  if (buffer_type != DemuxerStream::TEXT) {
    // The first bit of the flags is set when a SimpleBlock contains only
    // keyframes. If this is a Block, then inspection of the payload is
    // necessary to determine whether it contains a keyframe or not.
    // http://www.matroska.org/technical/specs/index.html
    bool is_keyframe =
        is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);

    // Every encrypted Block has a signal byte and IV prepended to it. Current
    // encrypted WebM request for comments specification is here
    // http://wiki.webmproject.org/encryption/webm-encryption-rfc
    scoped_ptr<DecryptConfig> decrypt_config;
    int data_offset = 0;
    if (!encryption_key_id.empty() &&
        !WebMCreateDecryptConfig(
             data, size,
             reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
             encryption_key_id.size(),
             &decrypt_config, &data_offset)) {
      return false;
    }

    // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
    // type with remapped bytestream track numbers and allow multiple tracks as
    // applicable. See https://crbug.com/341581.
    buffer = StreamParserBuffer::CopyFrom(
        data + data_offset, size - data_offset,
        additional, additional_size,
        is_keyframe, buffer_type, track_num);

    if (decrypt_config)
      buffer->set_decrypt_config(decrypt_config.Pass());
  } else {
    std::string id, settings, content;
    WebMWebVTTParser::Parse(data, size, &id, &settings, &content);

    std::vector<uint8_t> side_data;
    MakeSideData(id.begin(), id.end(),
                 settings.begin(), settings.end(),
                 &side_data);

    // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
    // type with remapped bytestream track numbers and allow multiple tracks as
    // applicable. See https://crbug.com/341581.
    buffer = StreamParserBuffer::CopyFrom(
        reinterpret_cast<const uint8_t*>(content.data()),
        content.length(),
        &side_data[0],
        side_data.size(),
        true, buffer_type, track_num);
  }

  buffer->set_timestamp(timestamp);
  if (cluster_start_time_ == kNoTimestamp())
    cluster_start_time_ = timestamp;

  base::TimeDelta block_duration_time_delta = kNoTimestamp();
  if (block_duration >= 0) {
    block_duration_time_delta = base::TimeDelta::FromMicroseconds(
        block_duration * timecode_multiplier_);
  }

  // Prefer encoded duration over BlockGroup->BlockDuration or
  // TrackEntry->DefaultDuration when available. This layering violation is a
  // workaround for http://crbug.com/396634, decreasing the likelihood of
  // fall-back to rough estimation techniques for Blocks that lack a
  // BlockDuration at the end of a cluster. Cross cluster durations are not
  // feasible given flexibility of cluster ordering and MSE APIs. Duration
  // estimation may still apply in cases of encryption and codecs for which
  // we do not extract encoded duration. Within a cluster, estimates are applied
  // as Block Timecode deltas, or once the whole cluster is parsed in the case
  // of the last Block in the cluster. See Track::AddBuffer and
  // ApplyDurationEstimateIfNeeded().
  if (encoded_duration != kNoTimestamp()) {
    DCHECK(encoded_duration != kInfiniteDuration());
    DCHECK(encoded_duration > base::TimeDelta());
    buffer->set_duration(encoded_duration);

    DVLOG(3) << __FUNCTION__ << " : "
             << "Using encoded duration " << encoded_duration.InSecondsF();

    if (block_duration_time_delta != kNoTimestamp()) {
      base::TimeDelta duration_difference =
          block_duration_time_delta - encoded_duration;

      const auto kWarnDurationDiff =
          base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
      if (duration_difference.magnitude() > kWarnDurationDiff) {
        LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
            << "BlockDuration (" << block_duration_time_delta.InMilliseconds()
            << "ms) differs significantly from encoded duration ("
            << encoded_duration.InMilliseconds() << "ms).";
      }
    }
  } else if (block_duration_time_delta != kNoTimestamp()) {
    buffer->set_duration(block_duration_time_delta);
  } else {
    DCHECK_NE(buffer_type, DemuxerStream::TEXT);
    buffer->set_duration(track->default_duration());
  }

  if (discard_padding != 0) {
    buffer->set_discard_padding(std::make_pair(
        base::TimeDelta(),
        base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
  }

  return track->AddBuffer(buffer);
}

WebMClusterParser::Track::Track(int track_num,
                                bool is_video,
                                base::TimeDelta default_duration)
    : track_num_(track_num),
      is_video_(is_video),
      default_duration_(default_duration),
      estimated_next_frame_duration_(kNoTimestamp()) {
  DCHECK(default_duration_ == kNoTimestamp() ||
         default_duration_ > base::TimeDelta());
}

WebMClusterParser::Track::~Track() {}

DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
  DCHECK(ready_buffers_.empty());
  if (last_added_buffer_missing_duration_.get())
    return last_added_buffer_missing_duration_->GetDecodeTimestamp();

  return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
}

void WebMClusterParser::Track::ExtractReadyBuffers(
    const DecodeTimestamp before_timestamp) {
  DCHECK(ready_buffers_.empty());
  DCHECK(DecodeTimestamp() <= before_timestamp);
  DCHECK(kNoDecodeTimestamp() != before_timestamp);

  if (buffers_.empty())
    return;

  if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
    // All of |buffers_| are ready.
    ready_buffers_.swap(buffers_);
    DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
             << ready_buffers_.size() << " are ready: before upper bound ts "
             << before_timestamp.InSecondsF();
    return;
  }

  // Not all of |buffers_| are ready yet. Move any that are ready to
  // |ready_buffers_|.
  while (true) {
    const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
    if (buffer->GetDecodeTimestamp() >= before_timestamp)
      break;
    ready_buffers_.push_back(buffer);
    buffers_.pop_front();
    DCHECK(!buffers_.empty());
  }

  DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
           << ready_buffers_.size() << " ready, " << buffers_.size()
           << " at or after upper bound ts " << before_timestamp.InSecondsF();
}

bool WebMClusterParser::Track::AddBuffer(
    const scoped_refptr<StreamParserBuffer>& buffer) {
  DVLOG(2) << "AddBuffer() : " << track_num_
           << " ts " << buffer->timestamp().InSecondsF()
           << " dur " << buffer->duration().InSecondsF()
           << " kf " << buffer->is_key_frame()
           << " size " << buffer->data_size();

  if (last_added_buffer_missing_duration_.get()) {
    base::TimeDelta derived_duration =
        buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
    last_added_buffer_missing_duration_->set_duration(derived_duration);

    DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
             << " ts "
             << last_added_buffer_missing_duration_->timestamp().InSecondsF()
             << " dur "
             << last_added_buffer_missing_duration_->duration().InSecondsF()
             << " kf " << last_added_buffer_missing_duration_->is_key_frame()
             << " size " << last_added_buffer_missing_duration_->data_size();
    scoped_refptr<StreamParserBuffer> updated_buffer =
        last_added_buffer_missing_duration_;
    last_added_buffer_missing_duration_ = NULL;
    if (!QueueBuffer(updated_buffer))
      return false;
  }

  if (buffer->duration() == kNoTimestamp()) {
    last_added_buffer_missing_duration_ = buffer;
    DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
    return true;
  }

  return QueueBuffer(buffer);
}

void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
  if (!last_added_buffer_missing_duration_.get())
    return;

  base::TimeDelta estimated_duration = GetDurationEstimate();
  last_added_buffer_missing_duration_->set_duration(estimated_duration);

  if (is_video_) {
    // Exposing estimation so splicing/overlap frame processing can make
    // informed decisions downstream.
    // TODO(chcunningham): Set this for audio as well in later change where
    // audio is switched to max estimation and splicing is disabled.
    last_added_buffer_missing_duration_->set_is_duration_estimated(true);
  }

  LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
      << "Estimating WebM block duration to be "
      << estimated_duration.InMilliseconds()
      << "ms for the last (Simple)Block in the Cluster for this Track. Use "
         "BlockGroups with BlockDurations at the end of each Track in a "
         "Cluster to avoid estimation.";

  DVLOG(2) << __FUNCTION__ << " new dur : ts "
           << last_added_buffer_missing_duration_->timestamp().InSecondsF()
           << " dur "
           << last_added_buffer_missing_duration_->duration().InSecondsF()
           << " kf " << last_added_buffer_missing_duration_->is_key_frame()
           << " size " << last_added_buffer_missing_duration_->data_size();

  // Don't use the applied duration as a future estimation (don't use
  // QueueBuffer() here.)
  buffers_.push_back(last_added_buffer_missing_duration_);
  last_added_buffer_missing_duration_ = NULL;
}

void WebMClusterParser::Track::ClearReadyBuffers() {
  // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
  // reset here.
  ready_buffers_.clear();
}

void WebMClusterParser::Track::Reset() {
  ClearReadyBuffers();
  buffers_.clear();
  last_added_buffer_missing_duration_ = NULL;
}

bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
  // For now, assume that all blocks are keyframes for datatypes other than
  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
  if (!is_video_)
    return true;

  // Make sure the block is big enough for the minimal keyframe header size.
  if (size < 7)
    return false;

  // The LSb of the first byte must be a 0 for a keyframe.
  // http://tools.ietf.org/html/rfc6386 Section 19.1
  if ((data[0] & 0x01) != 0)
    return false;

  // Verify VP8 keyframe startcode.
  // http://tools.ietf.org/html/rfc6386 Section 19.1
  if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
    return false;

  return true;
}

bool WebMClusterParser::Track::QueueBuffer(
    const scoped_refptr<StreamParserBuffer>& buffer) {
  DCHECK(!last_added_buffer_missing_duration_.get());

  // WebMClusterParser::OnBlock() gives LOG and parse error on decreasing
  // block timecode detection within a cluster. Therefore, we should not see
  // those here.
  DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
      DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
  CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());

  base::TimeDelta duration = buffer->duration();
  if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
    LOG(ERROR) << "Invalid buffer duration: " << duration.InSecondsF();
    return false;
  }

  // The estimated frame duration is the minimum (for audio) or the maximum
  // (for video) non-zero duration since the last initialization segment. The
  // minimum is used for audio to ensure frame durations aren't overestimated,
  // triggering unnecessary frame splicing. For video, splicing does not apply,
  // so maximum is used and overlap is simply resolved by showing the
  // later of the overlapping frames at its given PTS, effectively trimming down
  // the over-estimated duration of the previous frame.
  // TODO(chcunningham): Use max for audio and disable splicing whenever
  // estimated buffers are encountered.
  if (duration > base::TimeDelta()) {
    base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
    if (estimated_next_frame_duration_ == kNoTimestamp()) {
      estimated_next_frame_duration_ = duration;
    } else if (is_video_) {
      estimated_next_frame_duration_ =
          std::max(duration, estimated_next_frame_duration_);
    } else {
      estimated_next_frame_duration_ =
          std::min(duration, estimated_next_frame_duration_);
    }

    if (orig_duration_estimate != estimated_next_frame_duration_) {
      DVLOG(3) << "Updated duration estimate:"
               << orig_duration_estimate
               << " -> "
               << estimated_next_frame_duration_
               << " at timestamp: "
               << buffer->GetDecodeTimestamp().InSecondsF();
    }
  }

  buffers_.push_back(buffer);
  return true;
}

base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
  base::TimeDelta duration = estimated_next_frame_duration_;
  if (duration != kNoTimestamp()) {
    DVLOG(3) << __FUNCTION__ << " : using estimated duration";
  } else {
    DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
    if (is_video_) {
      duration = base::TimeDelta::FromMilliseconds(
          kDefaultVideoBufferDurationInMs);
    } else {
      duration = base::TimeDelta::FromMilliseconds(
          kDefaultAudioBufferDurationInMs);
    }
  }

  DCHECK(duration > base::TimeDelta());
  DCHECK(duration != kNoTimestamp());
  return duration;
}

void WebMClusterParser::ClearTextTrackReadyBuffers() {
  text_buffers_map_.clear();
  for (TextTrackMap::iterator it = text_track_map_.begin();
       it != text_track_map_.end();
       ++it) {
    it->second.ClearReadyBuffers();
  }
}

void WebMClusterParser::ResetTextTracks() {
  ClearTextTrackReadyBuffers();
  for (TextTrackMap::iterator it = text_track_map_.begin();
       it != text_track_map_.end();
       ++it) {
    it->second.Reset();
  }
}

void WebMClusterParser::UpdateReadyBuffers() {
  DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
  DCHECK(text_buffers_map_.empty());

  if (cluster_ended_) {
    audio_.ApplyDurationEstimateIfNeeded();
    video_.ApplyDurationEstimateIfNeeded();
    // Per OnBlock(), all text buffers should already have valid durations, so
    // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
    // here.
    ready_buffer_upper_bound_ =
        DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
    DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
    DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
  } else {
    ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
                                         video_.GetReadyUpperBound());
    DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
    DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
  }

  // Prepare each track's ready buffers for retrieval.
  audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
  video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
  for (TextTrackMap::iterator itr = text_track_map_.begin();
       itr != text_track_map_.end();
       ++itr) {
    itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
  }
}

WebMClusterParser::Track*
WebMClusterParser::FindTextTrack(int track_num) {
  const TextTrackMap::iterator it = text_track_map_.find(track_num);

  if (it == text_track_map_.end())
    return NULL;

  return &it->second;
}

}  // namespace media
}  // namespace edash_packager