From 5a4234f4dabcedcd83745b7dfbfcc5f66b01ae2c Mon Sep 17 00:00:00 2001
From: KongQun Yang <kqyang@google.com>
Date: Wed, 14 Oct 2015 16:10:12 -0700
Subject: [PATCH] Update webm_cluster_parser to emit samples

Change-Id: I02436cfcb53b96210d6f683227cdabb994f4c01f
---
 .../media/formats/webm/webm_cluster_parser.cc | 355 +++++-------------
 .../media/formats/webm/webm_cluster_parser.h  | 129 ++-----
 .../webm/webm_cluster_parser_unittest.cc      | 314 ++++++++--------
 3 files changed, 283 insertions(+), 515 deletions(-)

diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc
index afdfc4c943..7d5f7306a6 100644
--- a/packager/media/formats/webm/webm_cluster_parser.cc
+++ b/packager/media/formats/webm/webm_cluster_parser.cc
@@ -9,7 +9,7 @@
 #include "packager/base/logging.h"
 #include "packager/base/sys_byteorder.h"
 #include "packager/media/base/decrypt_config.h"
-#include "packager/media/base/timestamp_constants.h"
+#include "packager/media/base/timestamp.h"
 #include "packager/media/filters/webvtt_util.h"
 #include "packager/media/formats/webm/webm_constants.h"
 #include "packager/media/formats/webm/webm_crypto_helpers.h"
@@ -30,6 +30,10 @@
                 "may be suppressed): "                         \
               : "")
 
+namespace {
+const int64_t kMicrosecondsPerMillisecond = 1000;
+}  // namespace
+
 namespace edash_packager {
 namespace media {
 
@@ -50,29 +54,29 @@ enum {
 WebMClusterParser::WebMClusterParser(
     int64_t timecode_scale,
     int audio_track_num,
-    base::TimeDelta audio_default_duration,
+    int64_t audio_default_duration,
     int video_track_num,
-    base::TimeDelta video_default_duration,
+    int64_t video_default_duration,
     const WebMTracksParser::TextTracks& text_tracks,
     const std::set<int64_t>& ignored_tracks,
     const std::string& audio_encryption_key_id,
     const std::string& video_encryption_key_id,
-    const AudioCodec audio_codec)
+    const AudioCodec audio_codec,
+    const MediaParser::NewSampleCB& new_sample_cb)
     : timecode_multiplier_(timecode_scale / 1000.0),
       ignored_tracks_(ignored_tracks),
       audio_encryption_key_id_(audio_encryption_key_id),
       video_encryption_key_id_(video_encryption_key_id),
       audio_codec_(audio_codec),
       parser_(kWebMIdCluster, this),
-      cluster_start_time_(kNoTimestamp()),
-      audio_(audio_track_num, false, audio_default_duration),
-      video_(video_track_num, true, video_default_duration),
-      ready_buffer_upper_bound_(kNoDecodeTimestamp()) {
+      cluster_start_time_(kNoTimestamp),
+      audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
+      video_(video_track_num, true, video_default_duration, new_sample_cb) {
   for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
        it != text_tracks.end();
        ++it) {
-    text_track_map_.insert(
-        std::make_pair(it->first, Track(it->first, false, kNoTimestamp())));
+    text_track_map_.insert(std::make_pair(
+        it->first, Track(it->first, false, kNoTimestamp, new_sample_cb)));
   }
 }
 
@@ -81,21 +85,15 @@ WebMClusterParser::~WebMClusterParser() {}
 void WebMClusterParser::Reset() {
   last_block_timecode_ = -1;
   cluster_timecode_ = -1;
-  cluster_start_time_ = kNoTimestamp();
+  cluster_start_time_ = kNoTimestamp;
   cluster_ended_ = false;
   parser_.Reset();
   audio_.Reset();
   video_.Reset();
   ResetTextTracks();
-  ready_buffer_upper_bound_ = kNoDecodeTimestamp();
 }
 
 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
-  audio_.ClearReadyBuffers();
-  video_.ClearReadyBuffers();
-  ClearTextTrackReadyBuffers();
-  ready_buffer_upper_bound_ = kNoDecodeTimestamp();
-
   int result = parser_.Parse(buf, size);
 
   if (result < 0) {
@@ -105,16 +103,18 @@ int WebMClusterParser::Parse(const uint8_t* buf, int size) {
 
   cluster_ended_ = parser_.IsParsingComplete();
   if (cluster_ended_) {
+    audio_.ApplyDurationEstimateIfNeeded();
+    video_.ApplyDurationEstimateIfNeeded();
+
     // If there were no buffers in this cluster, set the cluster start time to
     // be the |cluster_timecode_|.
-    if (cluster_start_time_ == kNoTimestamp()) {
+    if (cluster_start_time_ == kNoTimestamp) {
       // If the cluster did not even have a |cluster_timecode_|, signal parse
       // error.
       if (cluster_timecode_ < 0)
         return -1;
 
-      cluster_start_time_ = base::TimeDelta::FromMicroseconds(
-          cluster_timecode_ * timecode_multiplier_);
+      cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
     }
 
     // Reset the parser if we're done parsing so that
@@ -129,40 +129,7 @@ int WebMClusterParser::Parse(const uint8_t* buf, int size) {
   return result;
 }
 
-const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
-  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
-    UpdateReadyBuffers();
-
-  return audio_.ready_buffers();
-}
-
-const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
-  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
-    UpdateReadyBuffers();
-
-  return video_.ready_buffers();
-}
-
-const WebMClusterParser::TextBufferQueueMap&
-WebMClusterParser::GetTextBuffers() {
-  if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
-    UpdateReadyBuffers();
-
-  // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
-  // the output only for non-empty ready_buffer() queues in |text_track_map_|.
-  text_buffers_map_.clear();
-  for (TextTrackMap::const_iterator itr = text_track_map_.begin();
-       itr != text_track_map_.end();
-       ++itr) {
-    const BufferQueue& text_buffers = itr->second.ready_buffers();
-    if (!text_buffers.empty())
-      text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
-  }
-
-  return text_buffers_map_;
-}
-
-base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
+int64_t WebMClusterParser::TryGetEncodedAudioDuration(
     const uint8_t* data,
     int size) {
 
@@ -179,24 +146,22 @@ base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
   // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
   // motivations in http://crbug.com/396634.
 
-  return kNoTimestamp();
+  return kNoTimestamp;
 }
 
-base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
-                                                    int size) {
+int64_t WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) {
   // Masks and constants for Opus packets. See
   // https://tools.ietf.org/html/rfc6716#page-14
   static const uint8_t kTocConfigMask = 0xf8;
   static const uint8_t kTocFrameCountCodeMask = 0x03;
   static const uint8_t kFrameCountMask = 0x3f;
-  static const base::TimeDelta kPacketDurationMax =
-      base::TimeDelta::FromMilliseconds(120);
+  static const int64_t kPacketDurationMax = 120;
 
   if (size < 1) {
     LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
         << "Invalid zero-byte Opus packet; demuxed block duration may be "
            "imprecise.";
-    return kNoTimestamp();
+    return kNoTimestamp;
   }
 
   // Frame count type described by last 2 bits of Opus TOC byte.
@@ -217,7 +182,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
         LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
             << "Second byte missing from 'Code 3' Opus packet; demuxed block "
                "duration may be imprecise.";
-        return kNoTimestamp();
+        return kNoTimestamp;
       }
 
       frame_count = data[1] & kFrameCountMask;
@@ -226,7 +191,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
         LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
             << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
                "block duration may be imprecise.";
-        return kNoTimestamp();
+        return kNoTimestamp;
       }
 
       break;
@@ -234,7 +199,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
       LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
           << "Unexpected Opus frame count type: " << frame_count_type << "; "
           << "demuxed block duration may be imprecise.";
-      return kNoTimestamp();
+      return kNoTimestamp;
   }
 
   int opusConfig = (data[0] & kTocConfigMask) >> 3;
@@ -242,8 +207,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
   CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
 
   DCHECK_GT(frame_count, 0);
-  base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
-      kOpusFrameDurationsMu[opusConfig] * frame_count);
+  int64_t duration = kOpusFrameDurationsMu[opusConfig] * frame_count;
 
   if (duration > kPacketDurationMax) {
     // Intentionally allowing packet to pass through for now. Decoder should
@@ -251,8 +215,8 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
     // things go sideways.
     LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
         << "Warning, demuxed Opus packet with encoded duration: "
-        << duration.InMilliseconds() << "ms. Should be no greater than "
-        << kPacketDurationMax.InMilliseconds() << "ms.";
+        << duration << "ms. Should be no greater than "
+        << kPacketDurationMax << "ms.";
   }
 
   return duration;
@@ -261,7 +225,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
 WebMParserClient* WebMClusterParser::OnListStart(int id) {
   if (id == kWebMIdCluster) {
     cluster_timecode_ = -1;
-    cluster_start_time_ = kNoTimestamp();
+    cluster_start_time_ = kNoTimestamp;
   } else if (id == kWebMIdBlockGroup) {
     block_data_.reset();
     block_data_size_ = -1;
@@ -444,9 +408,9 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
   }
 
   Track* track = NULL;
-  StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
+  StreamType stream_type = kStreamAudio;
   std::string encryption_key_id;
-  base::TimeDelta encoded_duration = kNoTimestamp();
+  int64_t encoded_duration = kNoTimestamp;
   if (track_num == audio_.track_num()) {
     track = &audio_;
     encryption_key_id = audio_encryption_key_id_;
@@ -456,7 +420,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
   } else if (track_num == video_.track_num()) {
     track = &video_;
     encryption_key_id = video_encryption_key_id_;
-    buffer_type = DemuxerStream::VIDEO;
+    stream_type = kStreamVideo;
   } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
     return true;
   } else if (Track* const text_track = FindTextTrack(track_num)) {
@@ -465,7 +429,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
     if (block_duration < 0)  // not specified
       return false;
     track = text_track;
-    buffer_type = DemuxerStream::TEXT;
+    stream_type = kStreamText;
   } else {
     LOG(ERROR) << "Unexpected track number " << track_num;
     return false;
@@ -473,11 +437,10 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
 
   last_block_timecode_ = timecode;
 
-  base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
-      (cluster_timecode_ + timecode) * timecode_multiplier_);
+  int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
 
-  scoped_refptr<StreamParserBuffer> buffer;
-  if (buffer_type != DemuxerStream::TEXT) {
+  scoped_refptr<MediaSample> buffer;
+  if (stream_type != kStreamText) {
     // The first bit of the flags is set when a SimpleBlock contains only
     // keyframes. If this is a Block, then inspection of the payload is
     // necessary to determine whether it contains a keyframe or not.
@@ -499,16 +462,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
       return false;
     }
 
-    // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
-    // type with remapped bytestream track numbers and allow multiple tracks as
-    // applicable. See https://crbug.com/341581.
-    buffer = StreamParserBuffer::CopyFrom(
-        data + data_offset, size - data_offset,
-        additional, additional_size,
-        is_keyframe, buffer_type, track_num);
+    buffer = MediaSample::CopyFrom(data + data_offset, size - data_offset,
+                                   additional, additional_size, is_keyframe);
 
-    if (decrypt_config)
-      buffer->set_decrypt_config(decrypt_config.Pass());
+    if (decrypt_config) {
+      // TODO(kqyang): Decrypt it if it is encrypted.
+      buffer->set_is_encrypted(true);
+    }
   } else {
     std::string id, settings, content;
     WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
@@ -518,25 +478,18 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
                  settings.begin(), settings.end(),
                  &side_data);
 
-    // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
-    // type with remapped bytestream track numbers and allow multiple tracks as
-    // applicable. See https://crbug.com/341581.
-    buffer = StreamParserBuffer::CopyFrom(
-        reinterpret_cast<const uint8_t*>(content.data()),
-        content.length(),
-        &side_data[0],
-        side_data.size(),
-        true, buffer_type, track_num);
+    buffer = MediaSample::CopyFrom(
+        reinterpret_cast<const uint8_t*>(content.data()), content.length(),
+        &side_data[0], side_data.size(), true);
   }
 
-  buffer->set_timestamp(timestamp);
-  if (cluster_start_time_ == kNoTimestamp())
+  buffer->set_pts(timestamp);
+  if (cluster_start_time_ == kNoTimestamp)
     cluster_start_time_ = timestamp;
 
-  base::TimeDelta block_duration_time_delta = kNoTimestamp();
+  int64_t block_duration_time_delta = kNoTimestamp;
   if (block_duration >= 0) {
-    block_duration_time_delta = base::TimeDelta::FromMicroseconds(
-        block_duration * timecode_multiplier_);
+    block_duration_time_delta = block_duration * timecode_multiplier_;
   }
 
   // Prefer encoded duration over BlockGroup->BlockDuration or
@@ -550,126 +503,77 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
   // as Block Timecode deltas, or once the whole cluster is parsed in the case
   // of the last Block in the cluster. See Track::AddBuffer and
   // ApplyDurationEstimateIfNeeded().
-  if (encoded_duration != kNoTimestamp()) {
-    DCHECK(encoded_duration != kInfiniteDuration());
-    DCHECK(encoded_duration > base::TimeDelta());
+  if (encoded_duration != kNoTimestamp) {
+    DCHECK(encoded_duration != kInfiniteDuration);
+    DCHECK(encoded_duration > 0);
     buffer->set_duration(encoded_duration);
 
     DVLOG(3) << __FUNCTION__ << " : "
-             << "Using encoded duration " << encoded_duration.InSecondsF();
+             << "Using encoded duration " << encoded_duration;
 
-    if (block_duration_time_delta != kNoTimestamp()) {
-      base::TimeDelta duration_difference =
+    if (block_duration_time_delta != kNoTimestamp) {
+      int64_t duration_difference =
           block_duration_time_delta - encoded_duration;
 
-      const auto kWarnDurationDiff =
-          base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
-      if (duration_difference.magnitude() > kWarnDurationDiff) {
+      const auto kWarnDurationDiff = timecode_multiplier_ * 2;
+      if (duration_difference > kWarnDurationDiff) {
         LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
-            << "BlockDuration (" << block_duration_time_delta.InMilliseconds()
+            << "BlockDuration (" << block_duration_time_delta
             << "ms) differs significantly from encoded duration ("
-            << encoded_duration.InMilliseconds() << "ms).";
+            << encoded_duration << "ms).";
       }
     }
-  } else if (block_duration_time_delta != kNoTimestamp()) {
+  } else if (block_duration_time_delta != kNoTimestamp) {
     buffer->set_duration(block_duration_time_delta);
   } else {
-    DCHECK_NE(buffer_type, DemuxerStream::TEXT);
     buffer->set_duration(track->default_duration());
   }
 
-  if (discard_padding != 0) {
-    buffer->set_discard_padding(std::make_pair(
-        base::TimeDelta(),
-        base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
-  }
-
   return track->AddBuffer(buffer);
 }
 
 WebMClusterParser::Track::Track(int track_num,
                                 bool is_video,
-                                base::TimeDelta default_duration)
+                                int64_t default_duration,
+                                const MediaParser::NewSampleCB& new_sample_cb)
     : track_num_(track_num),
       is_video_(is_video),
       default_duration_(default_duration),
-      estimated_next_frame_duration_(kNoTimestamp()) {
-  DCHECK(default_duration_ == kNoTimestamp() ||
-         default_duration_ > base::TimeDelta());
+      estimated_next_frame_duration_(kNoTimestamp),
+      new_sample_cb_(new_sample_cb) {
+  DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
 }
 
 WebMClusterParser::Track::~Track() {}
 
-DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
-  DCHECK(ready_buffers_.empty());
-  if (last_added_buffer_missing_duration_.get())
-    return last_added_buffer_missing_duration_->GetDecodeTimestamp();
-
-  return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
-}
-
-void WebMClusterParser::Track::ExtractReadyBuffers(
-    const DecodeTimestamp before_timestamp) {
-  DCHECK(ready_buffers_.empty());
-  DCHECK(DecodeTimestamp() <= before_timestamp);
-  DCHECK(kNoDecodeTimestamp() != before_timestamp);
-
-  if (buffers_.empty())
-    return;
-
-  if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
-    // All of |buffers_| are ready.
-    ready_buffers_.swap(buffers_);
-    DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
-             << ready_buffers_.size() << " are ready: before upper bound ts "
-             << before_timestamp.InSecondsF();
-    return;
-  }
-
-  // Not all of |buffers_| are ready yet. Move any that are ready to
-  // |ready_buffers_|.
-  while (true) {
-    const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
-    if (buffer->GetDecodeTimestamp() >= before_timestamp)
-      break;
-    ready_buffers_.push_back(buffer);
-    buffers_.pop_front();
-    DCHECK(!buffers_.empty());
-  }
-
-  DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
-           << ready_buffers_.size() << " ready, " << buffers_.size()
-           << " at or after upper bound ts " << before_timestamp.InSecondsF();
-}
-
 bool WebMClusterParser::Track::AddBuffer(
-    const scoped_refptr<StreamParserBuffer>& buffer) {
+    const scoped_refptr<MediaSample>& buffer) {
   DVLOG(2) << "AddBuffer() : " << track_num_
-           << " ts " << buffer->timestamp().InSecondsF()
-           << " dur " << buffer->duration().InSecondsF()
+           << " ts " << buffer->pts()
+           << " dur " << buffer->duration()
            << " kf " << buffer->is_key_frame()
            << " size " << buffer->data_size();
 
   if (last_added_buffer_missing_duration_.get()) {
-    base::TimeDelta derived_duration =
-        buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
+    int64_t derived_duration =
+        buffer->pts() - last_added_buffer_missing_duration_->pts();
     last_added_buffer_missing_duration_->set_duration(derived_duration);
 
     DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
              << " ts "
-             << last_added_buffer_missing_duration_->timestamp().InSecondsF()
+             << last_added_buffer_missing_duration_->pts()
              << " dur "
-             << last_added_buffer_missing_duration_->duration().InSecondsF()
+             << last_added_buffer_missing_duration_->duration()
              << " kf " << last_added_buffer_missing_duration_->is_key_frame()
              << " size " << last_added_buffer_missing_duration_->data_size();
-    scoped_refptr<StreamParserBuffer> updated_buffer =
+    scoped_refptr<MediaSample> updated_buffer =
         last_added_buffer_missing_duration_;
     last_added_buffer_missing_duration_ = NULL;
     if (!QueueBuffer(updated_buffer))
       return false;
   }
 
-  if (buffer->duration() == kNoTimestamp()) {
+  if (buffer->duration() == kNoTimestamp) {
     last_added_buffer_missing_duration_ = buffer;
     DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
     return true;
@@ -682,46 +586,37 @@ void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
   if (!last_added_buffer_missing_duration_.get())
     return;
 
-  base::TimeDelta estimated_duration = GetDurationEstimate();
+  int64_t estimated_duration = GetDurationEstimate();
   last_added_buffer_missing_duration_->set_duration(estimated_duration);
 
   if (is_video_) {
     // Exposing estimation so splicing/overlap frame processing can make
     // informed decisions downstream.
-    // TODO(chcunningham): Set this for audio as well in later change where
-    // audio is switched to max estimation and splicing is disabled.
-    last_added_buffer_missing_duration_->set_is_duration_estimated(true);
+    // TODO(kqyang): Should we wait for the next cluster to set the duration?
+    // last_added_buffer_missing_duration_->set_is_duration_estimated(true);
   }
 
   LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
       << "Estimating WebM block duration to be "
-      << estimated_duration.InMilliseconds()
+      << estimated_duration
       << "ms for the last (Simple)Block in the Cluster for this Track. Use "
          "BlockGroups with BlockDurations at the end of each Track in a "
          "Cluster to avoid estimation.";
 
   DVLOG(2) << __FUNCTION__ << " new dur : ts "
-           << last_added_buffer_missing_duration_->timestamp().InSecondsF()
+           << last_added_buffer_missing_duration_->pts()
            << " dur "
-           << last_added_buffer_missing_duration_->duration().InSecondsF()
+           << last_added_buffer_missing_duration_->duration()
            << " kf " << last_added_buffer_missing_duration_->is_key_frame()
            << " size " << last_added_buffer_missing_duration_->data_size();
 
   // Don't use the applied duration as a future estimation (don't use
   // QueueBuffer() here.)
-  buffers_.push_back(last_added_buffer_missing_duration_);
+  new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_);
   last_added_buffer_missing_duration_ = NULL;
 }
 
-void WebMClusterParser::Track::ClearReadyBuffers() {
-  // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
-  // reset here.
-  ready_buffers_.clear();
-}
-
 void WebMClusterParser::Track::Reset() {
-  ClearReadyBuffers();
-  buffers_.clear();
   last_added_buffer_missing_duration_ = NULL;
 }
 
@@ -749,19 +644,12 @@ bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
 }
 
 bool WebMClusterParser::Track::QueueBuffer(
-    const scoped_refptr<StreamParserBuffer>& buffer) {
+    const scoped_refptr<MediaSample>& buffer) {
   DCHECK(!last_added_buffer_missing_duration_.get());
 
-  // WebMClusterParser::OnBlock() gives LOG and parse error on decreasing
-  // block timecode detection within a cluster. Therefore, we should not see
-  // those here.
-  DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
-      DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
-  CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
-
-  base::TimeDelta duration = buffer->duration();
-  if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
-    LOG(ERROR) << "Invalid buffer duration: " << duration.InSecondsF();
+  int64_t duration = buffer->duration();
+  if (duration < 0 || duration == kNoTimestamp) {
+    LOG(ERROR) << "Invalid buffer duration: " << duration;
     return false;
   }
 
@@ -774,9 +662,9 @@ bool WebMClusterParser::Track::QueueBuffer(
   // the over-estimated duration of the previous frame.
   // TODO(chcunningham): Use max for audio and disable splicing whenever
   // estimated buffers are encountered.
-  if (duration > base::TimeDelta()) {
-    base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
-    if (estimated_next_frame_duration_ == kNoTimestamp()) {
+  if (duration > 0) {
+    int64_t orig_duration_estimate = estimated_next_frame_duration_;
+    if (estimated_next_frame_duration_ == kNoTimestamp) {
       estimated_next_frame_duration_ = duration;
     } else if (is_video_) {
       estimated_next_frame_duration_ =
@@ -792,45 +680,33 @@ bool WebMClusterParser::Track::QueueBuffer(
                << " -> "
                << estimated_next_frame_duration_
                << " at timestamp: "
-               << buffer->GetDecodeTimestamp().InSecondsF();
+               << buffer->dts();
     }
   }
 
-  buffers_.push_back(buffer);
+  new_sample_cb_.Run(track_num_, buffer);
   return true;
 }
 
-base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
-  base::TimeDelta duration = estimated_next_frame_duration_;
-  if (duration != kNoTimestamp()) {
+int64_t WebMClusterParser::Track::GetDurationEstimate() {
+  int64_t duration = estimated_next_frame_duration_;
+  if (duration != kNoTimestamp) {
     DVLOG(3) << __FUNCTION__ << " : using estimated duration";
   } else {
     DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
     if (is_video_) {
-      duration = base::TimeDelta::FromMilliseconds(
-          kDefaultVideoBufferDurationInMs);
+      duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
     } else {
-      duration = base::TimeDelta::FromMilliseconds(
-          kDefaultAudioBufferDurationInMs);
+      duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
     }
   }
 
-  DCHECK(duration > base::TimeDelta());
-  DCHECK(duration != kNoTimestamp());
+  DCHECK(duration > 0);
+  DCHECK(duration != kNoTimestamp);
   return duration;
 }
 
-void WebMClusterParser::ClearTextTrackReadyBuffers() {
-  text_buffers_map_.clear();
-  for (TextTrackMap::iterator it = text_track_map_.begin();
-       it != text_track_map_.end();
-       ++it) {
-    it->second.ClearReadyBuffers();
-  }
-}
-
 void WebMClusterParser::ResetTextTracks() {
-  ClearTextTrackReadyBuffers();
   for (TextTrackMap::iterator it = text_track_map_.begin();
        it != text_track_map_.end();
        ++it) {
@@ -838,37 +714,6 @@ void WebMClusterParser::ResetTextTracks() {
   }
 }
 
-void WebMClusterParser::UpdateReadyBuffers() {
-  DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
-  DCHECK(text_buffers_map_.empty());
-
-  if (cluster_ended_) {
-    audio_.ApplyDurationEstimateIfNeeded();
-    video_.ApplyDurationEstimateIfNeeded();
-    // Per OnBlock(), all text buffers should already have valid durations, so
-    // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
-    // here.
-    ready_buffer_upper_bound_ =
-        DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
-    DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
-    DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
-  } else {
-    ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
-                                         video_.GetReadyUpperBound());
-    DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
-    DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
-  }
-
-  // Prepare each track's ready buffers for retrieval.
-  audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
-  video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
-  for (TextTrackMap::iterator itr = text_track_map_.begin();
-       itr != text_track_map_.end();
-       ++itr) {
-    itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
-  }
-}
-
 WebMClusterParser::Track*
 WebMClusterParser::FindTextTrack(int track_num) {
   const TextTrackMap::iterator it = text_track_map_.find(track_num);
diff --git a/packager/media/formats/webm/webm_cluster_parser.h b/packager/media/formats/webm/webm_cluster_parser.h
index dec1ea966c..5ee6ecdda8 100644
--- a/packager/media/formats/webm/webm_cluster_parser.h
+++ b/packager/media/formats/webm/webm_cluster_parser.h
@@ -11,9 +11,8 @@
 #include <string>
 
 #include "packager/base/memory/scoped_ptr.h"
-#include "packager/media/base/audio_decoder_config.h"
-#include "packager/media/base/stream_parser.h"
-#include "packager/media/base/stream_parser_buffer.h"
+#include "packager/media/base/media_parser.h"
+#include "packager/media/base/media_sample.h"
 #include "packager/media/formats/webm/webm_parser.h"
 #include "packager/media/formats/webm/webm_tracks_parser.h"
 
@@ -22,10 +21,6 @@ namespace media {
 
 class WebMClusterParser : public WebMParserClient {
  public:
-  typedef StreamParser::TrackId TrackId;
-  typedef std::deque<scoped_refptr<StreamParserBuffer> > BufferQueue;
-  typedef std::map<TrackId, const BufferQueue> TextBufferQueueMap;
-
   // Numbers chosen to estimate the duration of a buffer if none is set and
   // there is not enough information to get a better estimate.
   enum {
@@ -49,29 +44,18 @@ class WebMClusterParser : public WebMParserClient {
    public:
     Track(int track_num,
           bool is_video,
-          base::TimeDelta default_duration);
+          int64_t default_duration,
+          const MediaParser::NewSampleCB& new_sample_cb);
     ~Track();
 
     int track_num() const { return track_num_; }
 
-    // If a buffer is currently held aside pending duration calculation, returns
-    // its decode timestamp. Otherwise, returns kInfiniteDuration().
-    DecodeTimestamp GetReadyUpperBound();
-
-    // Prepares |ready_buffers_| for retrieval. Prior to calling,
-    // |ready_buffers_| must be empty. Moves all |buffers_| with decode
-    // timestamp before |before_timestamp| to |ready_buffers_|, preserving their
-    // order.
-    void ExtractReadyBuffers(const DecodeTimestamp before_timestamp);
-
-    const BufferQueue& ready_buffers() const { return ready_buffers_; }
-
     // If |last_added_buffer_missing_duration_| is set, updates its duration
     // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets
     // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing
     // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or
     // otherwise adds |buffer| to |buffers_|.
-    bool AddBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
+    bool AddBuffer(const scoped_refptr<MediaSample>& buffer);
 
     // If |last_added_buffer_missing_duration_| is set, updates its duration to
     // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a
@@ -80,14 +64,8 @@ class WebMClusterParser : public WebMParserClient {
     // emit all buffers in a media segment before signaling end of segment.)
     void ApplyDurationEstimateIfNeeded();
 
-    // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again).
-    // Leaves as-is |buffers_| and any possibly held-aside buffer that is
-    // missing duration.
-    void ClearReadyBuffers();
-
     // Clears all buffer state, including any possibly held-aside buffer that
-    // was missing duration, and all contents of |buffers_| and
-    // |ready_buffers_|.
+    // was missing duration, and all contents of |buffers_|.
     void Reset();
 
     // Helper function used to inspect block data to determine if the
@@ -96,18 +74,18 @@ class WebMClusterParser : public WebMParserClient {
     // |size| indicates the number of bytes in |data|.
     bool IsKeyframe(const uint8_t* data, int size) const;
 
-    base::TimeDelta default_duration() const { return default_duration_; }
+    int64_t default_duration() const { return default_duration_; }
 
    private:
     // Helper that sanity-checks |buffer| duration, updates
     // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|.
     // Returns false if |buffer| failed sanity check and therefore was not added
     // to |buffers_|. Returns true otherwise.
-    bool QueueBuffer(const scoped_refptr<StreamParserBuffer>& buffer);
+    bool QueueBuffer(const scoped_refptr<MediaSample>& buffer);
 
     // Helper that calculates the buffer duration to use in
     // ApplyDurationEstimateIfNeeded().
-    base::TimeDelta GetDurationEstimate();
+    int64_t GetDurationEstimate();
 
     // Counts the number of estimated durations used in this track. Used to
     // prevent log spam for LOG()s about estimated duration.
@@ -120,26 +98,19 @@ class WebMClusterParser : public WebMParserClient {
     // that have not yet been extracted into |ready_buffers_|. Note that up to
     // one additional buffer missing duration may be tracked by
     // |last_added_buffer_missing_duration_|.
-    BufferQueue buffers_;
-    scoped_refptr<StreamParserBuffer> last_added_buffer_missing_duration_;
-
-    // Buffers in (decode) timestamp order that were previously parsed into and
-    // extracted from |buffers_|. Buffers are moved from |buffers_| to
-    // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified
-    // upper bound timestamp. Track users can therefore extract only those
-    // parsed buffers which are "ready" for emission (all before some maximum
-    // timestamp).
-    BufferQueue ready_buffers_;
+    scoped_refptr<MediaSample> last_added_buffer_missing_duration_;
 
     // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used.
-    base::TimeDelta default_duration_;
+    int64_t default_duration_;
 
     // If kNoTimestamp(), then a default value will be used. This estimate is
     // the maximum (for video), or minimum (for audio) duration seen so far for
     // this track, and is used only if |default_duration_| is kNoTimestamp().
     // TODO(chcunningham): Use maximum for audio too, adding checks to disable
     // splicing when these estimates are observed in SourceBufferStream.
-    base::TimeDelta estimated_next_frame_duration_;
+    int64_t estimated_next_frame_duration_;
+
+    MediaParser::NewSampleCB new_sample_cb_;
   };
 
   typedef std::map<int, Track> TextTrackMap;
@@ -147,14 +118,15 @@ class WebMClusterParser : public WebMParserClient {
  public:
   WebMClusterParser(int64_t timecode_scale,
                     int audio_track_num,
-                    base::TimeDelta audio_default_duration,
+                    int64_t audio_default_duration,
                     int video_track_num,
-                    base::TimeDelta video_default_duration,
+                    int64_t video_default_duration,
                     const WebMTracksParser::TextTracks& text_tracks,
                     const std::set<int64_t>& ignored_tracks,
                     const std::string& audio_encryption_key_id,
                     const std::string& video_encryption_key_id,
-                    const AudioCodec audio_codec);
+                    const AudioCodec audio_codec,
+                    const MediaParser::NewSampleCB& new_sample_cb);
   ~WebMClusterParser() override;
 
   // Resets the parser state so it can accept a new cluster.
@@ -167,35 +139,7 @@ class WebMClusterParser : public WebMParserClient {
   // Returns the number of bytes parsed on success.
   int Parse(const uint8_t* buf, int size);
 
-  base::TimeDelta cluster_start_time() const { return cluster_start_time_; }
-
-  // Get the current ready buffers resulting from Parse().
-  // If the parse reached the end of cluster and the last buffer was held aside
-  // due to missing duration, the buffer is given an estimated duration and
-  // included in the result.
-  // Otherwise, if there are is a buffer held aside due to missing duration for
-  // any of the tracks, no buffers with same or greater (decode) timestamp will
-  // be included in the buffers.
-  // The returned deques are cleared by Parse() or Reset() and updated by the
-  // next calls to Get{Audio,Video}Buffers().
-  // If no Parse() or Reset() has occurred since the last call to Get{Audio,
-  // Video,Text}Buffers(), then the previous BufferQueue& is returned again
-  // without any recalculation.
-  const BufferQueue& GetAudioBuffers();
-  const BufferQueue& GetVideoBuffers();
-
-  // Constructs and returns a subset of |text_track_map_| containing only
-  // tracks with non-empty buffer queues produced by the last Parse() and
-  // filtered to exclude any buffers that have (decode) timestamp same or
-  // greater than the lowest (decode) timestamp across all tracks of any buffer
-  // held aside due to missing duration (unless the end of cluster has been
-  // reached).
-  // The returned map is cleared by Parse() or Reset() and updated by the next
-  // call to GetTextBuffers().
-  // If no Parse() or Reset() has occurred since the last call to
-  // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again
-  // without any recalculation.
-  const TextBufferQueueMap& GetTextBuffers();
+  int64_t cluster_start_time() const { return cluster_start_time_; }
 
   // Returns true if the last Parse() call stopped at the end of a cluster.
   bool cluster_ended() const { return cluster_ended_; }
@@ -228,22 +172,6 @@ class WebMClusterParser : public WebMParserClient {
   // Resets the Track objects associated with each text track.
   void ResetTextTracks();
 
-  // Clears the the ready buffers associated with each text track.
-  void ClearTextTrackReadyBuffers();
-
-  // Helper method for Get{Audio,Video,Text}Buffers() that recomputes
-  // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track.
-  // If |cluster_ended_| is true, first applies duration estimate if needed for
-  // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to
-  // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the
-  // minimum upper bound across |audio_| and |video_|. (Text tracks can have no
-  // buffers missing duration, so they are not involved in calculating the upper
-  // bound.)
-  // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to
-  // clear each track's ready buffers and to reset |ready_buffer_upper_bound_|
-  // to kNoDecodeTimestamp().
-  void UpdateReadyBuffers();
-
   // Search for the indicated track_num among the text tracks.  Returns NULL
   // if that track num is not a text track.
   Track* FindTextTrack(int track_num);
@@ -256,11 +184,11 @@ class WebMClusterParser : public WebMParserClient {
   // Cluster we parse, so we can't simply use the delta of the first Block in
   // the next Cluster). Avoid calling if encrypted; may produce unexpected
   // output. See implementation for supported codecs.
-  base::TimeDelta TryGetEncodedAudioDuration(const uint8_t* data, int size);
+  int64_t TryGetEncodedAudioDuration(const uint8_t* data, int size);
 
   // Reads Opus packet header to determine packet duration. Duration returned
   // as TimeDelta or kNoTimestamp() upon failure to read duration from packet.
-  base::TimeDelta ReadOpusDuration(const uint8_t* data, int size);
+  int64_t ReadOpusDuration(const uint8_t* data, int size);
 
   // Tracks the number of LOGs made in process of reading encoded
   // duration. Useful to prevent log spam.
@@ -290,26 +218,13 @@ class WebMClusterParser : public WebMParserClient {
   bool discard_padding_set_ = false;
 
   int64_t cluster_timecode_ = -1;
-  base::TimeDelta cluster_start_time_;
+  int64_t cluster_start_time_;
   bool cluster_ended_ = false;
 
   Track audio_;
   Track video_;
   TextTrackMap text_track_map_;
 
-  // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by
-  // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref
-  // to this member.
-  TextBufferQueueMap text_buffers_map_;
-
-  // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to
-  // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet
-  // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then
-  // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode)
-  // timestamp across all tracks' |last_buffer_missing_duration_|, or
-  // kInfiniteDuration() if no buffers are currently missing duration.
-  DecodeTimestamp ready_buffer_upper_bound_;
-
   DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser);
 };
 
diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc
index 2b7c008f6d..7a3c821890 100644
--- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc
+++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc
@@ -15,9 +15,8 @@
 #include "packager/base/bind.h"
 #include "packager/base/logging.h"
 #include "packager/base/strings/string_number_conversions.h"
-#include "packager/media/base/audio_decoder_config.h"
 #include "packager/media/base/decrypt_config.h"
-#include "packager/media/base/timestamp_constants.h"
+#include "packager/media/base/timestamp.h"
 #include "packager/media/formats/webm/cluster_builder.h"
 #include "packager/media/formats/webm/opus_packet_builder.h"
 #include "packager/media/formats/webm/webm_constants.h"
@@ -29,10 +28,15 @@ using ::testing::StrictMock;
 using ::testing::Mock;
 using ::testing::_;
 
+namespace {
+const int64_t kMicrosecondsPerMillisecond = 1000;
+}  // namespace
+
 namespace edash_packager {
 namespace media {
 
 typedef WebMTracksParser::TextTracks TextTracks;
+typedef std::map<uint32_t, BufferQueue> TextBufferQueueMap;
 
 // Matchers for verifying common media log entry strings.
 MATCHER_P(OpusPacketDurationTooHigh, actual_duration_ms, "") {
@@ -115,9 +119,11 @@ const BlockInfo kDefaultBlockInfo[] = {
 const uint8_t kEncryptedFrame[] = {
     // Block is encrypted
     0x01,
-
     // IV
-    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08};
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    // Some dummy encrypted data
+    0x01,
+};
 
 scoped_ptr<Cluster> CreateCluster(int timecode,
                                   const BlockInfo* block_info,
@@ -125,7 +131,8 @@ scoped_ptr<Cluster> CreateCluster(int timecode,
   ClusterBuilder cb;
   cb.SetClusterTimecode(0);
 
-  uint8_t kDefaultBlockData[] = { 0x00 };
+  // Default block data for audio, video and text.
+  uint8_t kDefaultBlockData[] = {0x00, 0x0A, 0x01, 0x0D, 0x02};
 
   for (int i = 0; i < block_count; i++) {
     const uint8_t* data;
@@ -171,16 +178,16 @@ scoped_ptr<Cluster> CreateEncryptedCluster(int bytes_to_write) {
   return cb.Finish();
 }
 
-bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers,
-                   const WebMClusterParser::BufferQueue& video_buffers,
-                   const WebMClusterParser::BufferQueue& text_buffers,
-                   const BlockInfo* block_info,
-                   int block_count) {
+bool VerifyBuffersHelper(const BufferQueue& audio_buffers,
+                         const BufferQueue& video_buffers,
+                         const BufferQueue& text_buffers,
+                         const BlockInfo* block_info,
+                         int block_count) {
   int buffer_count = audio_buffers.size() + video_buffers.size() +
       text_buffers.size();
   if (block_count != buffer_count) {
-    DVLOG(1) << __FUNCTION__ << " : block_count (" << block_count
-             << ") mismatches buffer_count (" << buffer_count << ")";
+    LOG(ERROR) << __FUNCTION__ << " : block_count (" << block_count
+               << ") mismatches buffer_count (" << buffer_count << ")";
     return false;
   }
 
@@ -188,73 +195,48 @@ bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers,
   size_t video_offset = 0;
   size_t text_offset = 0;
   for (int i = 0; i < block_count; i++) {
-    const WebMClusterParser::BufferQueue* buffers = NULL;
+    const BufferQueue* buffers = NULL;
     size_t* offset;
-    StreamParserBuffer::Type expected_type = DemuxerStream::UNKNOWN;
 
     if (block_info[i].track_num == kAudioTrackNum) {
       buffers = &audio_buffers;
       offset = &audio_offset;
-      expected_type = DemuxerStream::AUDIO;
     } else if (block_info[i].track_num == kVideoTrackNum) {
       buffers = &video_buffers;
       offset = &video_offset;
-      expected_type = DemuxerStream::VIDEO;
     } else if (block_info[i].track_num == kTextTrackNum) {
       buffers = &text_buffers;
       offset = &text_offset;
-      expected_type = DemuxerStream::TEXT;
     } else {
       LOG(ERROR) << "Unexpected track number " << block_info[i].track_num;
       return false;
     }
 
     if (*offset >= buffers->size()) {
-      DVLOG(1) << __FUNCTION__ << " : Too few buffers (" << buffers->size()
-               << ") for track_num (" << block_info[i].track_num
-               << "), expected at least " << *offset + 1 << " buffers";
+      LOG(ERROR) << __FUNCTION__ << " : Too few buffers (" << buffers->size()
+                 << ") for track_num (" << block_info[i].track_num
+                 << "), expected at least " << *offset + 1 << " buffers";
       return false;
     }
 
-    scoped_refptr<StreamParserBuffer> buffer = (*buffers)[(*offset)++];
+    scoped_refptr<MediaSample> buffer = (*buffers)[(*offset)++];
 
-    EXPECT_EQ(block_info[i].timestamp, buffer->timestamp().InMilliseconds());
-    EXPECT_EQ(std::abs(block_info[i].duration),
-              buffer->duration().InMillisecondsF());
-    EXPECT_EQ(expected_type, buffer->type());
-    EXPECT_EQ(block_info[i].track_num, buffer->track_id());
+    EXPECT_EQ(block_info[i].timestamp * kMicrosecondsPerMillisecond,
+              buffer->pts());
+    EXPECT_EQ(std::abs(block_info[i].duration) * kMicrosecondsPerMillisecond,
+              buffer->duration());
   }
 
   return true;
 }
 
-bool VerifyBuffers(const scoped_ptr<WebMClusterParser>& parser,
-                   const BlockInfo* block_info,
-                   int block_count) {
-  const WebMClusterParser::TextBufferQueueMap& text_map =
-      parser->GetTextBuffers();
-  const WebMClusterParser::BufferQueue* text_buffers;
-  const WebMClusterParser::BufferQueue no_text_buffers;
-  if (!text_map.empty())
-    text_buffers = &(text_map.rbegin()->second);
-  else
-    text_buffers = &no_text_buffers;
-
-  return VerifyBuffers(parser->GetAudioBuffers(),
-                       parser->GetVideoBuffers(),
-                       *text_buffers,
-                       block_info,
-                       block_count);
-}
-
-bool VerifyTextBuffers(const scoped_ptr<WebMClusterParser>& parser,
-                       const BlockInfo* block_info_ptr,
+bool VerifyTextBuffers(const BlockInfo* block_info_ptr,
                        int block_count,
                        int text_track_num,
-                       const WebMClusterParser::BufferQueue& text_buffers) {
+                       const BufferQueue& text_buffers) {
   const BlockInfo* const block_info_end = block_info_ptr + block_count;
 
-  typedef WebMClusterParser::BufferQueue::const_iterator TextBufferIter;
+  typedef BufferQueue::const_iterator TextBufferIter;
   TextBufferIter buffer_iter = text_buffers.begin();
   const TextBufferIter buffer_end = text_buffers.end();
 
@@ -267,30 +249,19 @@ bool VerifyTextBuffers(const scoped_ptr<WebMClusterParser>& parser,
     EXPECT_FALSE(block_info.use_simple_block);
     EXPECT_FALSE(buffer_iter == buffer_end);
 
-    const scoped_refptr<StreamParserBuffer> buffer = *buffer_iter++;
-    EXPECT_EQ(block_info.timestamp, buffer->timestamp().InMilliseconds());
-    EXPECT_EQ(std::abs(block_info.duration),
-              buffer->duration().InMillisecondsF());
-    EXPECT_EQ(DemuxerStream::TEXT, buffer->type());
-    EXPECT_EQ(text_track_num, buffer->track_id());
+    const scoped_refptr<MediaSample> buffer = *buffer_iter++;
+    EXPECT_EQ(block_info.timestamp * kMicrosecondsPerMillisecond,
+              buffer->pts());
+    EXPECT_EQ(std::abs(block_info.duration) * kMicrosecondsPerMillisecond,
+              buffer->duration());
   }
 
   EXPECT_TRUE(buffer_iter == buffer_end);
   return true;
 }
 
-void VerifyEncryptedBuffer(scoped_refptr<StreamParserBuffer> buffer) {
-  EXPECT_TRUE(buffer->decrypt_config());
-  EXPECT_EQ(static_cast<unsigned long>(DecryptConfig::kDecryptionKeySize),
-            buffer->decrypt_config()->iv().length());
-}
-
-void AppendToEnd(const WebMClusterParser::BufferQueue& src,
-                 WebMClusterParser::BufferQueue* dest) {
-  for (WebMClusterParser::BufferQueue::const_iterator itr = src.begin();
-       itr != src.end(); ++itr) {
-    dest->push_back(*itr);
-  }
+void VerifyEncryptedBuffer(scoped_refptr<MediaSample> buffer) {
+  EXPECT_TRUE(buffer->is_encrypted());
 }
 
 }  // namespace
@@ -301,23 +272,43 @@ class WebMClusterParserTest : public testing::Test {
 
  protected:
   void ResetParserToHaveDefaultDurations() {
-    base::TimeDelta default_audio_duration = base::TimeDelta::FromMilliseconds(
-        kTestAudioFrameDefaultDurationInMs);
-    base::TimeDelta default_video_duration = base::TimeDelta::FromMilliseconds(
-        kTestVideoFrameDefaultDurationInMs);
-    ASSERT_GE(default_audio_duration, base::TimeDelta());
-    ASSERT_GE(default_video_duration, base::TimeDelta());
-    ASSERT_NE(kNoTimestamp(), default_audio_duration);
-    ASSERT_NE(kNoTimestamp(), default_video_duration);
+    int64_t default_audio_duration =
+        kTestAudioFrameDefaultDurationInMs * kMicrosecondsPerMillisecond;
+    int64_t default_video_duration =
+        kTestVideoFrameDefaultDurationInMs * kMicrosecondsPerMillisecond;
+    ASSERT_GE(default_audio_duration, 0);
+    ASSERT_GE(default_video_duration, 0);
+    ASSERT_NE(kNoTimestamp, default_audio_duration);
+    ASSERT_NE(kNoTimestamp, default_video_duration);
 
     parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
         default_audio_duration, default_video_duration));
   }
 
+  bool NewSampleEvent(uint32_t track_id,
+                      const scoped_refptr<MediaSample>& sample) {
+    switch (track_id) {
+      case kAudioTrackNum:
+        audio_buffers_.push_back(sample);
+        break;
+      case kVideoTrackNum:
+        video_buffers_.push_back(sample);
+        break;
+      case kTextTrackNum:
+      case kTextTrackNum + 1:
+        text_buffers_map_[track_id].push_back(sample);
+        break;
+      default:
+        LOG(ERROR) << "Unexpected track number " << track_id;
+        return false;
+    }
+    return true;
+  }
+
   // Helper that hard-codes some non-varying constructor parameters.
   WebMClusterParser* CreateParserHelper(
-      base::TimeDelta audio_default_duration,
-      base::TimeDelta video_default_duration,
+      int64_t audio_default_duration,
+      int64_t video_default_duration,
       const WebMTracksParser::TextTracks& text_tracks,
       const std::set<int64_t>& ignored_tracks,
       const std::string& audio_encryption_key_id,
@@ -326,12 +317,14 @@ class WebMClusterParserTest : public testing::Test {
     return new WebMClusterParser(
         kTimecodeScale, kAudioTrackNum, audio_default_duration, kVideoTrackNum,
         video_default_duration, text_tracks, ignored_tracks,
-        audio_encryption_key_id, video_encryption_key_id, audio_codec);
+        audio_encryption_key_id, video_encryption_key_id, audio_codec,
+        base::Bind(&WebMClusterParserTest::NewSampleEvent,
+                   base::Unretained(this)));
   }
 
   // Create a default version of the parser for test.
   WebMClusterParser* CreateDefaultParser() {
-    return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(),
+    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                               std::set<int64_t>(), std::string(), std::string(),
                               kUnknownAudioCodec);
   }
@@ -339,8 +332,8 @@ class WebMClusterParserTest : public testing::Test {
   // Create a parser for test with custom audio and video default durations, and
   // optionally custom text tracks.
   WebMClusterParser* CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      base::TimeDelta audio_default_duration,
-      base::TimeDelta video_default_duration,
+      int64_t audio_default_duration,
+      int64_t video_default_duration,
       const WebMTracksParser::TextTracks& text_tracks = TextTracks()) {
     return CreateParserHelper(audio_default_duration, video_default_duration,
                               text_tracks, std::set<int64_t>(), std::string(),
@@ -350,7 +343,7 @@ class WebMClusterParserTest : public testing::Test {
   // Create a parser for test with custom ignored tracks.
   WebMClusterParser* CreateParserWithIgnoredTracks(
       std::set<int64_t>& ignored_tracks) {
-    return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(),
+    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                               ignored_tracks, std::string(), std::string(),
                               kUnknownAudioCodec);
   }
@@ -360,22 +353,31 @@ class WebMClusterParserTest : public testing::Test {
       const std::string& audio_encryption_key_id,
       const std::string& video_encryption_key_id,
       const AudioCodec audio_codec) {
-    return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(),
+    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                               std::set<int64_t>(), audio_encryption_key_id,
                               video_encryption_key_id, audio_codec);
   }
 
+  bool VerifyBuffers(const BlockInfo* block_info, int block_count) {
+    bool result = VerifyBuffersHelper(audio_buffers_, video_buffers_,
+                                      text_buffers_map_[kTextTrackNum],
+                                      block_info, block_count);
+    audio_buffers_.clear();
+    video_buffers_.clear();
+    text_buffers_map_.clear();
+    return result;
+  }
+
   scoped_ptr<WebMClusterParser> parser_;
+  BufferQueue audio_buffers_;
+  BufferQueue video_buffers_;
+  TextBufferQueueMap text_buffers_map_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(WebMClusterParserTest);
 };
 
-TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) {
-  // If a buffer is missing duration and is being held back, then all other
-  // tracks' buffers that have same or higher (decode) timestamp should be held
-  // back too to keep the timestamps emitted for a cluster monotonically
-  // non-decreasing and in same order as parsed.
+TEST_F(WebMClusterParserTest, TracksWithSampleMissingDuration) {
   InSequence s;
 
   // Reset the parser to have 3 tracks: text, video (no default frame duration),
@@ -384,12 +386,12 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) {
   text_tracks.insert(std::make_pair(TextTracks::key_type(kTextTrackNum),
                                     TextTrackConfig(kTextSubtitles, "", "",
                                                     "")));
-  base::TimeDelta default_audio_duration =
-      base::TimeDelta::FromMilliseconds(kTestAudioFrameDefaultDurationInMs);
-  ASSERT_GE(default_audio_duration, base::TimeDelta());
-  ASSERT_NE(kNoTimestamp(), default_audio_duration);
+  int64_t default_audio_duration = kTestAudioFrameDefaultDurationInMs;
+  ASSERT_GE(default_audio_duration, 0);
+  ASSERT_NE(kNoTimestamp, default_audio_duration);
   parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      default_audio_duration, kNoTimestamp(), text_tracks));
+      default_audio_duration * kMicrosecondsPerMillisecond, kNoTimestamp,
+      text_tracks));
 
   const int kExpectedVideoEstimationInMs = 33;
 
@@ -405,15 +407,28 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) {
       {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
   };
 
+  // Samples are not emitted in the same order as |kBlockInfo| due to missing of
+  // duration in some samples.
+  const BlockInfo kExpectedBlockInfo[] = {
+      {kAudioTrackNum, 0, 23, false, NULL, 0},
+      {kTextTrackNum, 10, 42, false, NULL, 0},
+      {kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
+      {kVideoTrackNum, 0, 33, true, NULL, 0},
+      {kAudioTrackNum, 36, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
+      {kVideoTrackNum, 33, 33, true, NULL, 0},
+      {kAudioTrackNum, 70, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
+      {kVideoTrackNum, 66, kExpectedVideoEstimationInMs, true, NULL, 0},
+      {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
+  };
   const int kExpectedBuffersOnPartialCluster[] = {
     0,  // Video simple block without DefaultDuration should be held back
-    0,  // Audio buffer ready, but not emitted because its TS >= held back video
-    0,  // Text buffer ready, but not emitted because its TS >= held back video
-    0,  // 2nd audio buffer ready, also not emitted for same reason as first
-    4,  // All previous buffers emitted, 2nd video held back with no duration
-    4,  // 2nd video still has no duration, 3rd audio ready but not emitted
-    6,  // All previous buffers emitted, 3rd video held back with no duration
-    6,  // 3rd video still has no duration, 4th audio ready but not emitted
+    1,  // Audio buffer ready
+    2,  // Text buffer ready
+    3,  // 2nd audio buffer ready
+    4,  // 1st video emitted, 2nd video held back with no duration
+    5,  // 3rd audio ready
+    6,  // 2nd video emitted, 3rd video held back with no duration
+    7,  // 4th audio ready
     9,  // Cluster end emits all buffers and 3rd video's duration is estimated
   };
 
@@ -451,8 +466,8 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) {
       EXPECT_LT(0, result);
     }
 
-    EXPECT_TRUE(VerifyBuffers(parser_, kBlockInfo,
-                              kExpectedBuffersOnPartialCluster[i]));
+    EXPECT_TRUE(
+        VerifyBuffers(kExpectedBlockInfo, kExpectedBuffersOnPartialCluster[i]));
   }
 }
 
@@ -468,13 +483,13 @@ TEST_F(WebMClusterParserTest, Reset) {
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster->size());
 
-  ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count - 1));
+  ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count - 1));
   parser_->Reset();
 
   // Now parse a whole cluster to verify that all the blocks will get parsed.
   result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) {
@@ -483,16 +498,16 @@ TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
   int block_count = arraysize(kDefaultBlockInfo);
   scoped_ptr<Cluster> cluster(CreateCluster(0, kDefaultBlockInfo, block_count));
 
-  WebMClusterParser::BufferQueue audio_buffers;
-  WebMClusterParser::BufferQueue video_buffers;
-  const WebMClusterParser::BufferQueue no_text_buffers;
+  BufferQueue audio_buffers;
+  BufferQueue video_buffers;
+  const BufferQueue no_text_buffers;
 
   const uint8_t* data = cluster->data();
   int size = cluster->size();
@@ -511,17 +526,12 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
       continue;
     }
 
-    AppendToEnd(parser_->GetAudioBuffers(), &audio_buffers);
-    AppendToEnd(parser_->GetVideoBuffers(), &video_buffers);
-
     parse_size = default_parse_size;
 
     data += result;
     size -= result;
   }
-  ASSERT_TRUE(VerifyBuffers(audio_buffers, video_buffers,
-                            no_text_buffers, kDefaultBlockInfo,
-                            block_count));
+  ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count));
 }
 
 // Verify that both BlockGroups with the BlockDuration before the Block
@@ -552,7 +562,7 @@ TEST_F(WebMClusterParserTest, ParseBlockGroup) {
 
   int result = parser_->Parse(kClusterData, kClusterSize);
   EXPECT_EQ(kClusterSize, result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) {
@@ -568,7 +578,7 @@ TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, IgnoredTracks) {
@@ -601,7 +611,7 @@ TEST_F(WebMClusterParserTest, IgnoredTracks) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kOutputBlockInfo, output_block_count));
+  ASSERT_TRUE(VerifyBuffers(kOutputBlockInfo, output_block_count));
 }
 
 TEST_F(WebMClusterParserTest, ParseTextTracks) {
@@ -612,7 +622,7 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) {
                                                     "")));
 
   parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      kNoTimestamp(), kNoTimestamp(), text_tracks));
+      kNoTimestamp, kNoTimestamp, text_tracks));
 
   const BlockInfo kInputBlockInfo[] = {
       {kAudioTrackNum, 0, 23, true, NULL, 0},
@@ -630,7 +640,7 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kInputBlockInfo, input_block_count));
+  ASSERT_TRUE(VerifyBuffers(kInputBlockInfo, input_block_count));
 }
 
 TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) {
@@ -641,7 +651,7 @@ TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) {
                                                     "")));
 
   parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      kNoTimestamp(), kNoTimestamp(), text_tracks));
+      kNoTimestamp, kNoTimestamp, text_tracks));
 
   const BlockInfo kInputBlockInfo[] = {
     { kTextTrackNum,  33, 42, true },
@@ -670,7 +680,7 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) {
                                                     "")));
 
   parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      kNoTimestamp(), kNoTimestamp(), text_tracks));
+      kNoTimestamp, kNoTimestamp, text_tracks));
 
   const BlockInfo kInputBlockInfo[] = {
       {kAudioTrackNum, 0, 23, true, NULL, 0},
@@ -690,16 +700,12 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) {
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
 
-  const WebMClusterParser::TextBufferQueueMap& text_map =
-      parser_->GetTextBuffers();
-  for (WebMClusterParser::TextBufferQueueMap::const_iterator itr =
-           text_map.begin();
-       itr != text_map.end();
-       ++itr) {
+  for (TextBufferQueueMap::const_iterator itr = text_buffers_map_.begin();
+       itr != text_buffers_map_.end(); ++itr) {
     const TextTracks::const_iterator find_result =
         text_tracks.find(itr->first);
     ASSERT_TRUE(find_result != text_tracks.end());
-    ASSERT_TRUE(VerifyTextBuffers(parser_, kInputBlockInfo, input_block_count,
+    ASSERT_TRUE(VerifyTextBuffers(kInputBlockInfo, input_block_count,
                                   itr->first, itr->second));
   }
 }
@@ -712,14 +718,14 @@ TEST_F(WebMClusterParserTest, ParseEncryptedBlock) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_EQ(1UL, parser_->GetVideoBuffers().size());
-  scoped_refptr<StreamParserBuffer> buffer = parser_->GetVideoBuffers()[0];
+  ASSERT_EQ(1UL, video_buffers_.size());
+  scoped_refptr<MediaSample> buffer = video_buffers_[0];
   VerifyEncryptedBuffer(buffer);
 }
 
 TEST_F(WebMClusterParserTest, ParseBadEncryptedBlock) {
   scoped_ptr<Cluster> cluster(
-      CreateEncryptedCluster(sizeof(kEncryptedFrame) - 1));
+      CreateEncryptedCluster(sizeof(kEncryptedFrame) - 2));
 
   parser_.reset(CreateParserWithKeyIdsAndAudioCodec(
       std::string(), "video_key_id", kUnknownAudioCodec));
@@ -753,7 +759,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidTextBlockGroupWithoutDuration) {
                                                     "")));
 
   parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks(
-      kNoTimestamp(), kNoTimestamp(), text_tracks));
+      kNoTimestamp, kNoTimestamp, text_tracks));
 
   const BlockInfo kBlockInfo[] = {
     { kTextTrackNum,  33, -42, false },
@@ -791,14 +797,14 @@ TEST_F(WebMClusterParserTest, ParseWithDefaultDurationsSimpleBlocks) {
   int result = parser_->Parse(cluster->data(), cluster->size() - 1);
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster->size());
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count - 1));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count - 1));
 
   parser_->Reset();
 
   // Now parse a whole cluster to verify that all the blocks will get parsed.
   result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
@@ -809,7 +815,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
   // last block in a cluster is estimated independently for each track in the
   // cluster. For video tracks we use the maximum seen so far. For audio we use
   // the the minimum.
-  // TODO(chcunningham): Move audio over to use the maximum.
+  // TODO: Move audio over to use the maximum.
 
   const int kExpectedAudioEstimationInMs = 22;
   const int kExpectedVideoEstimationInMs = 34;
@@ -834,16 +840,16 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
   int result = parser_->Parse(cluster1->data(), cluster1->size() - 1);
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster1->size());
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1 - 3));
-  EXPECT_EQ(3UL, parser_->GetAudioBuffers().size());
-  EXPECT_EQ(1UL, parser_->GetVideoBuffers().size());
+  EXPECT_EQ(3UL, audio_buffers_.size());
+  EXPECT_EQ(1UL, video_buffers_.size());
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3));
 
   parser_->Reset();
 
   // Now parse the full first cluster and verify all the blocks are parsed.
   result = parser_->Parse(cluster1->data(), cluster1->size());
   EXPECT_EQ(cluster1->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1));
 
   // Verify that the estimated frame duration is tracked across clusters for
   // each track.
@@ -858,7 +864,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
   scoped_ptr<Cluster> cluster2(CreateCluster(0, kBlockInfo2, block_count2));
   result = parser_->Parse(cluster2->data(), cluster2->size());
   EXPECT_EQ(cluster2->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo2, block_count2));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2));
 }
 
 TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
@@ -894,16 +900,16 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
   int result = parser_->Parse(cluster1->data(), cluster1->size() - 1);
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster1->size());
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1 - 3));
-  EXPECT_EQ(3UL, parser_->GetAudioBuffers().size());
-  EXPECT_EQ(1UL, parser_->GetVideoBuffers().size());
+  EXPECT_EQ(3UL, audio_buffers_.size());
+  EXPECT_EQ(1UL, video_buffers_.size());
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3));
 
   parser_->Reset();
 
   // Now parse the full first cluster and verify all the blocks are parsed.
   result = parser_->Parse(cluster1->data(), cluster1->size());
   EXPECT_EQ(cluster1->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1));
 
   // Verify that the estimated frame duration is tracked across clusters for
   // each track.
@@ -916,7 +922,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
   scoped_ptr<Cluster> cluster2(CreateCluster(0, kBlockInfo2, block_count2));
   result = parser_->Parse(cluster2->data(), cluster2->size());
   EXPECT_EQ(cluster2->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo2, block_count2));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2));
 }
 
 // TODO(wolenetz): Is parser behavior correct? See http://crbug.com/363433.
@@ -952,14 +958,14 @@ TEST_F(WebMClusterParserTest,
   int result = parser_->Parse(cluster->data(), cluster->size() - 1);
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster->size());
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count - 1));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count - 1));
 
   parser_->Reset();
 
   // Now parse a whole cluster to verify that all the blocks will get parsed.
   result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest,
@@ -982,7 +988,7 @@ TEST_F(WebMClusterParserTest,
   scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest,
@@ -998,7 +1004,7 @@ TEST_F(WebMClusterParserTest,
   scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 TEST_F(WebMClusterParserTest, ReadOpusDurationsSimpleBlockAtEndOfCluster) {
@@ -1022,7 +1028,7 @@ TEST_F(WebMClusterParserTest, ReadOpusDurationsSimpleBlockAtEndOfCluster) {
 
     int result = parser_->Parse(cluster->data(), cluster->size());
     EXPECT_EQ(cluster->size(), result);
-    ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+    ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 
     loop_count++;
   }
@@ -1040,9 +1046,11 @@ TEST_F(WebMClusterParserTest, PreferOpusDurationsOverBlockDurations) {
     parser_.reset(CreateParserWithKeyIdsAndAudioCodec(
         std::string(), std::string(), kCodecOpus));
 
+    // Setting BlockDuration != Opus duration to see which one the parser uses.
+    int block_duration_ms = packet_ptr->duration_ms() + 10;
     BlockInfo block_infos[] = {{kAudioTrackNum,
                                 0,
-                                block_duration_ms,
+                                static_cast<double>(block_duration_ms),
                                 false,  // Not a SimpleBlock.
                                 packet_ptr->data(),
                                 packet_ptr->size()}};
@@ -1056,7 +1064,7 @@ TEST_F(WebMClusterParserTest, PreferOpusDurationsOverBlockDurations) {
     // duration to be that of the Opus packet to verify it was preferred.
     block_infos[0].duration = packet_ptr->duration_ms();
 
-    ASSERT_TRUE(VerifyBuffers(parser_, block_infos, block_count));
+    ASSERT_TRUE(VerifyBuffers(block_infos, block_count));
 
     loop_count++;
   }
@@ -1090,7 +1098,7 @@ TEST_F(WebMClusterParserTest, DontReadEncodedDurationWhenEncrypted) {
   EXPECT_EQ(cluster->size(), result);
 
   // Will verify that duration of buffer matches that of BlockDuration.
-  ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count));
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
 }  // namespace media