From af7d6a792174a48cd8f033add7683c47de5012a1 Mon Sep 17 00:00:00 2001
From: KongQun Yang <kqyang@google.com>
Date: Mon, 14 Dec 2015 12:33:18 -0800
Subject: [PATCH] Estimate duration of last sample in cluster from next cluster

Change-Id: I7dbc4045d366bbfb0c12f9652ffe97b8fcf447cf
---
 .../media/formats/webm/webm_cluster_parser.cc |  60 +++----
 .../media/formats/webm/webm_cluster_parser.h  |  10 +-
 .../webm/webm_cluster_parser_unittest.cc      | 161 ++++++++----------
 .../media/formats/webm/webm_media_parser.cc   |   2 +-
 4 files changed, 98 insertions(+), 135 deletions(-)

diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc
index 7d4e547170..fdf8a93238 100644
--- a/packager/media/formats/webm/webm_cluster_parser.cc
+++ b/packager/media/formats/webm/webm_cluster_parser.cc
@@ -93,6 +93,13 @@ void WebMClusterParser::Reset() {
   ResetTextTracks();
 }
 
+void WebMClusterParser::Flush() {
+  // Estimate the duration of the last frame if necessary.
+  audio_.ApplyDurationEstimateIfNeeded();
+  video_.ApplyDurationEstimateIfNeeded();
+  Reset();
+}
+
 int WebMClusterParser::Parse(const uint8_t* buf, int size) {
   int result = parser_.Parse(buf, size);
 
@@ -103,9 +110,6 @@ int WebMClusterParser::Parse(const uint8_t* buf, int size) {
 
   cluster_ended_ = parser_.IsParsingComplete();
   if (cluster_ended_) {
-    audio_.ApplyDurationEstimateIfNeeded();
-    video_.ApplyDurationEstimateIfNeeded();
-
     // If there were no buffers in this cluster, set the cluster start time to
     // be the |cluster_timecode_|.
     if (cluster_start_time_ == kNoTimestamp) {
@@ -155,7 +159,7 @@ int64_t WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) {
   static const uint8_t kTocConfigMask = 0xf8;
   static const uint8_t kTocFrameCountCodeMask = 0x03;
   static const uint8_t kFrameCountMask = 0x3f;
-  static const int64_t kPacketDurationMax = 120;
+  static const int64_t kPacketDurationMaxMs = 120000;
 
   if (size < 1) {
     LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
@@ -209,14 +213,14 @@ int64_t WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) {
   DCHECK_GT(frame_count, 0);
   int64_t duration = kOpusFrameDurationsMu[opusConfig] * frame_count;
 
-  if (duration > kPacketDurationMax) {
+  if (duration > kPacketDurationMaxMs * 1000) {
     // Intentionally allowing packet to pass through for now. Decoder should
     // either handle or fail gracefully. LOG as breadcrumbs in case
     // things go sideways.
     LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
         << "Warning, demuxed Opus packet with encoded duration: "
-        << duration << "ms. Should be no greater than "
-        << kPacketDurationMax << "ms.";
+        << duration / 1000 << "ms. Should be no greater than "
+        << kPacketDurationMaxMs << "ms.";
   }
 
   return duration;
@@ -496,13 +500,11 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
   // TrackEntry->DefaultDuration when available. This layering violation is a
   // workaround for http://crbug.com/396634, decreasing the likelihood of
   // fall-back to rough estimation techniques for Blocks that lack a
-  // BlockDuration at the end of a cluster. Cross cluster durations are not
-  // feasible given flexibility of cluster ordering and MSE APIs. Duration
-  // estimation may still apply in cases of encryption and codecs for which
-  // we do not extract encoded duration. Within a cluster, estimates are applied
-  // as Block Timecode deltas, or once the whole cluster is parsed in the case
-  // of the last Block in the cluster. See Track::EmitBuffer and
-  // ApplyDurationEstimateIfNeeded().
+  // BlockDuration at the end of a cluster. Duration estimation may still apply
+  // in cases of encryption and codecs for which we do not extract encoded
+  // duration. Estimates are applied as Block Timecode deltas, or once the whole
+  // stream is parsed in the case of the last Block in the stream. See
+  // Track::EmitBuffer and ApplyDurationEstimateIfNeeded().
   if (encoded_duration != kNoTimestamp) {
     DCHECK(encoded_duration != kInfiniteDuration);
     DCHECK(encoded_duration > 0);
@@ -518,9 +520,9 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
       const auto kWarnDurationDiff = timecode_multiplier_ * 2;
       if (duration_difference > kWarnDurationDiff) {
         LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs)
-            << "BlockDuration (" << block_duration_time_delta
+            << "BlockDuration (" << block_duration_time_delta / 1000
             << "ms) differs significantly from encoded duration ("
-            << encoded_duration << "ms).";
+            << encoded_duration / 1000 << "ms).";
       }
     }
   } else if (block_duration_time_delta != kNoTimestamp) {
@@ -589,16 +591,8 @@ void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
   int64_t estimated_duration = GetDurationEstimate();
   last_added_buffer_missing_duration_->set_duration(estimated_duration);
 
-  if (is_video_) {
-    // Exposing estimation so splicing/overlap frame processing can make
-    // informed decisions downstream.
-    // TODO(kqyang): Should we wait for the next cluster to set the duration?
-    // last_added_buffer_missing_duration_->set_is_duration_estimated(true);
-  }
-
   LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs)
-      << "Estimating WebM block duration to be "
-      << estimated_duration
+      << "Estimating WebM block duration to be " << estimated_duration / 1000
       << "ms for the last (Simple)Block in the Cluster for this Track. Use "
          "BlockGroups with BlockDurations at the end of each Track in a "
          "Cluster to avoid estimation.";
@@ -653,25 +647,15 @@ bool WebMClusterParser::Track::EmitBufferHelp(
     return false;
   }
 
-  // The estimated frame duration is the minimum (for audio) or the maximum
-  // (for video) non-zero duration since the last initialization segment. The
-  // minimum is used for audio to ensure frame durations aren't overestimated,
-  // triggering unnecessary frame splicing. For video, splicing does not apply,
-  // so maximum is used and overlap is simply resolved by showing the
-  // later of the overlapping frames at its given PTS, effectively trimming down
-  // the over-estimated duration of the previous frame.
-  // TODO: Use max for audio and disable splicing whenever estimated buffers are
-  // encountered.
+  // The estimated frame duration is the maximum non-zero duration since the
+  // last initialization segment.
   if (duration > 0) {
     int64_t orig_duration_estimate = estimated_next_frame_duration_;
     if (estimated_next_frame_duration_ == kNoTimestamp) {
       estimated_next_frame_duration_ = duration;
-    } else if (is_video_) {
-      estimated_next_frame_duration_ =
-          std::max(duration, estimated_next_frame_duration_);
     } else {
       estimated_next_frame_duration_ =
-          std::min(duration, estimated_next_frame_duration_);
+          std::max(duration, estimated_next_frame_duration_);
     }
 
     if (orig_duration_estimate != estimated_next_frame_duration_) {
diff --git a/packager/media/formats/webm/webm_cluster_parser.h b/packager/media/formats/webm/webm_cluster_parser.h
index cc7524af59..2f73b524f9 100644
--- a/packager/media/formats/webm/webm_cluster_parser.h
+++ b/packager/media/formats/webm/webm_cluster_parser.h
@@ -102,10 +102,8 @@ class WebMClusterParser : public WebMParserClient {
     int64_t default_duration_;
 
     // If kNoTimestamp, then a default value will be used. This estimate is the
-    // maximum (for video), or minimum (for audio) duration seen so far for this
-    // track, and is used only if |default_duration_| is kNoTimestamp.
-    // TODO: Use maximum for audio too, adding checks to disable splicing when
-    // these estimates are observed in SourceBufferStream.
+    // maximum duration seen so far for this track, and is used only if
+    // |default_duration_| is kNoTimestamp.
     int64_t estimated_next_frame_duration_;
 
     MediaParser::NewSampleCB new_sample_cb_;
@@ -130,6 +128,10 @@ class WebMClusterParser : public WebMParserClient {
   /// Resets the parser state so it can accept a new cluster.
   void Reset();
 
+  /// Flush data currently in the parser and reset the parser so it can accept a
+  /// new cluster.
+  void Flush();
+
   /// Parses a WebM cluster element in |buf|.
   /// @return -1 if the parse fails.
   /// @return 0 if more data is needed.
diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc
index 7f9111197e..83f91a186f 100644
--- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc
+++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc
@@ -417,8 +417,8 @@ TEST_F(WebMClusterParserTest, TracksWithSampleMissingDuration) {
       {kAudioTrackNum, 36, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
       {kVideoTrackNum, 33, 33, true, NULL, 0},
       {kAudioTrackNum, 70, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
-      {kVideoTrackNum, 66, kExpectedVideoEstimationInMs, true, NULL, 0},
       {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0},
+      {kVideoTrackNum, 66, kExpectedVideoEstimationInMs, true, NULL, 0},
   };
   const int kExpectedBuffersOnPartialCluster[] = {
     0,  // Video simple block without DefaultDuration should be held back
@@ -429,46 +429,32 @@ TEST_F(WebMClusterParserTest, TracksWithSampleMissingDuration) {
     5,  // 3rd audio ready
     6,  // 2nd video emitted, 3rd video held back with no duration
     7,  // 4th audio ready
-    9,  // Cluster end emits all buffers and 3rd video's duration is estimated
+    8,  // 5th audio ready
   };
 
   ASSERT_EQ(arraysize(kBlockInfo), arraysize(kExpectedBuffersOnPartialCluster));
   int block_count = arraysize(kBlockInfo);
 
-  // Iteratively create a cluster containing the first N+1 blocks and parse all
-  // but the last byte of the cluster (except when N==|block_count|, just parse
-  // the whole cluster). Verify that the corresponding entry in
+  // Iteratively create a cluster containing the first N+1 blocks and parse the
+  // cluster. Verify that the corresponding entry in
   // |kExpectedBuffersOnPartialCluster| identifies the exact subset of
   // |kBlockInfo| returned by the parser.
   for (int i = 0; i < block_count; ++i) {
-    if (i > 0)
-      parser_->Reset();
-    // Since we don't know exactly the offsets of each block in the full
-    // cluster, build a cluster with exactly one additional block so that
-    // parse of all but one byte should deterministically parse all but the
-    // last full block. Don't |exceed block_count| blocks though.
-    int blocks_in_cluster = std::min(i + 2, block_count);
-    scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo,
-                                              blocks_in_cluster));
-    // Parse all but the last byte unless we need to parse the full cluster.
-    bool parse_full_cluster = i == (block_count - 1);
+    parser_->Reset();
 
-    int result = parser_->Parse(cluster->data(), parse_full_cluster ?
-                                cluster->size() : cluster->size() - 1);
-    if (parse_full_cluster) {
-      DVLOG(1) << "Verifying parse result of full cluster of "
-               << blocks_in_cluster << " blocks";
-      EXPECT_EQ(cluster->size(), result);
-    } else {
-      DVLOG(1) << "Verifying parse result of cluster of "
-               << blocks_in_cluster << " blocks with last block incomplete";
-      EXPECT_GT(cluster->size(), result);
-      EXPECT_LT(0, result);
-    }
+    const int blocks_in_cluster = i + 1;
+    scoped_ptr<Cluster> cluster(
+        CreateCluster(0, kBlockInfo, blocks_in_cluster));
 
+    EXPECT_EQ(cluster->size(),
+              parser_->Parse(cluster->data(), cluster->size()));
     EXPECT_TRUE(
         VerifyBuffers(kExpectedBlockInfo, kExpectedBuffersOnPartialCluster[i]));
   }
+
+  // The last (3rd) video is emitted on flush with duration estimated.
+  parser_->Flush();
+  EXPECT_TRUE(VerifyBuffers(&kExpectedBlockInfo[block_count - 1], 1));
 }
 
 TEST_F(WebMClusterParserTest, Reset) {
@@ -611,6 +597,7 @@ TEST_F(WebMClusterParserTest, IgnoredTracks) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kOutputBlockInfo, output_block_count));
 }
 
@@ -640,6 +627,7 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kInputBlockInfo, input_block_count));
 }
 
@@ -718,6 +706,7 @@ TEST_F(WebMClusterParserTest, ParseEncryptedBlock) {
 
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_EQ(1UL, video_buffers_.size());
   scoped_refptr<MediaSample> buffer = video_buffers_[0];
   VerifyEncryptedBuffer(buffer);
@@ -811,59 +800,51 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) {
   InSequence s;
 
   // Absent DefaultDuration information, SimpleBlock durations are derived from
-  // inter-buffer track timestamp delta if within the cluster. Duration for the
-  // last block in a cluster is estimated independently for each track in the
-  // cluster. For video tracks we use the maximum seen so far. For audio we use
-  // the the minimum.
-  // TODO: Move audio over to use the maximum.
+  // inter-buffer track timestamp delta either within or across clusters.
+  // Duration for the last block is estimated independently for each track when
+  // Flush() is called. We use the maximum seen so far for estimation.
 
-  const int kExpectedAudioEstimationInMs = 22;
-  const int kExpectedVideoEstimationInMs = 34;
   const BlockInfo kBlockInfo1[] = {
       {kAudioTrackNum, 0, 23, true, NULL, 0},
       {kAudioTrackNum, 23, 22, true, NULL, 0},
       {kVideoTrackNum, 33, 33, true, NULL, 0},
       {kAudioTrackNum, 45, 23, true, NULL, 0},
       {kVideoTrackNum, 66, 34, true, NULL, 0},
-      {kAudioTrackNum, 68, kExpectedAudioEstimationInMs, true, NULL, 0},
-      {kVideoTrackNum, 100, kExpectedVideoEstimationInMs, true, NULL, 0},
+      {kAudioTrackNum, 68, 24, true, NULL, 0},
+      {kVideoTrackNum, 100, 35, true, NULL, 0},
   };
 
   int block_count1 = arraysize(kBlockInfo1);
   scoped_ptr<Cluster> cluster1(CreateCluster(0, kBlockInfo1, block_count1));
 
-  // Send slightly less than the first full cluster so all but the last video
-  // block is parsed. Verify the last fully parsed audio and video buffer are
-  // both missing from the result (parser should hold them aside for duration
-  // estimation prior to end of cluster detection in the absence of
-  // DefaultDurations.)
-  int result = parser_->Parse(cluster1->data(), cluster1->size() - 1);
-  EXPECT_GT(result, 0);
-  EXPECT_LT(result, cluster1->size());
+  // Verify the last fully parsed audio and video buffer are both missing from
+  // the result (parser should hold them aside for duration estimation until
+  // Flush() called in the absence of DefaultDurations).
+  EXPECT_EQ(cluster1->size(),
+            parser_->Parse(cluster1->data(), cluster1->size()));
   EXPECT_EQ(3UL, audio_buffers_.size());
-  EXPECT_EQ(1UL, video_buffers_.size());
-  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3));
-
-  parser_->Reset();
-
-  // Now parse the full first cluster and verify all the blocks are parsed.
-  result = parser_->Parse(cluster1->data(), cluster1->size());
-  EXPECT_EQ(cluster1->size(), result);
-  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1));
+  EXPECT_EQ(2UL, video_buffers_.size());
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 2));
 
   // Verify that the estimated frame duration is tracked across clusters for
   // each track.
+  const int kExpectedAudioEstimationInMs = 24;
+  const int kExpectedVideoEstimationInMs = 35;
   const BlockInfo kBlockInfo2[] = {
-      // Estimate carries over across clusters
-      {kAudioTrackNum, 200, kExpectedAudioEstimationInMs, true, NULL, 0},
-      // Estimate carries over across clusters
-      {kVideoTrackNum, 201, kExpectedVideoEstimationInMs, true, NULL, 0},
+      {kAudioTrackNum, 92, kExpectedAudioEstimationInMs, true, NULL, 0},
+      {kVideoTrackNum, 135, kExpectedVideoEstimationInMs, true, NULL, 0},
   };
 
   int block_count2 = arraysize(kBlockInfo2);
   scoped_ptr<Cluster> cluster2(CreateCluster(0, kBlockInfo2, block_count2));
-  result = parser_->Parse(cluster2->data(), cluster2->size());
-  EXPECT_EQ(cluster2->size(), result);
+  EXPECT_EQ(cluster2->size(),
+            parser_->Parse(cluster2->data(), cluster2->size()));
+
+  // Verify that remaining blocks of cluster1 are emitted.
+  ASSERT_TRUE(VerifyBuffers(&kBlockInfo1[block_count1 - 2], 2));
+
+  // Now flush and verify blocks in cluster2 are emitted.
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2));
 }
 
@@ -871,57 +852,51 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) {
   InSequence s;
 
   // Absent DefaultDuration and BlockDuration information, BlockGroup block
-  // durations are derived from inter-buffer track timestamp delta if within the
-  // cluster. Duration for the last block in a cluster is estimated
-  // independently for each track in the cluster. For video tracks we use the
-  // maximum seen so far. For audio we use the the minimum.
-  // TODO: Move audio over to use the maximum.
+  // durations are derived from inter-buffer track timestamp delta either within
+  // or across clusters. Duration for the last block is estimated independently
+  // for each track when Flush() is called. We use the maximum seen so far.
 
-  const int kExpectedAudioEstimationInMs = 22;
-  const int kExpectedVideoEstimationInMs = 34;
   const BlockInfo kBlockInfo1[] = {
       {kAudioTrackNum, 0, -23, false, NULL, 0},
       {kAudioTrackNum, 23, -22, false, NULL, 0},
       {kVideoTrackNum, 33, -33, false, NULL, 0},
       {kAudioTrackNum, 45, -23, false, NULL, 0},
       {kVideoTrackNum, 66, -34, false, NULL, 0},
-      {kAudioTrackNum, 68, -kExpectedAudioEstimationInMs, false, NULL, 0},
-      {kVideoTrackNum, 100, -kExpectedVideoEstimationInMs, false, NULL, 0},
+      {kAudioTrackNum, 68, -24, false, NULL, 0},
+      {kVideoTrackNum, 100, -35, false, NULL, 0},
   };
 
   int block_count1 = arraysize(kBlockInfo1);
   scoped_ptr<Cluster> cluster1(CreateCluster(0, kBlockInfo1, block_count1));
 
-  // Send slightly less than the first full cluster so all but the last video
-  // block is parsed. Verify the last fully parsed audio and video buffer are
-  // both missing from the result (parser should hold them aside for duration
-  // estimation prior to end of cluster detection in the absence of
-  // DefaultDurations.)
-  int result = parser_->Parse(cluster1->data(), cluster1->size() - 1);
-  EXPECT_GT(result, 0);
-  EXPECT_LT(result, cluster1->size());
+  // Verify the last fully parsed audio and video buffer are both missing from
+  // the result (parser should hold them aside for duration estimation until
+  // Flush() called in the absence of DefaultDurations).
+  EXPECT_EQ(cluster1->size(),
+            parser_->Parse(cluster1->data(), cluster1->size()));
   EXPECT_EQ(3UL, audio_buffers_.size());
-  EXPECT_EQ(1UL, video_buffers_.size());
-  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3));
-
-  parser_->Reset();
-
-  // Now parse the full first cluster and verify all the blocks are parsed.
-  result = parser_->Parse(cluster1->data(), cluster1->size());
-  EXPECT_EQ(cluster1->size(), result);
-  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1));
+  EXPECT_EQ(2UL, video_buffers_.size());
+  ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 2));
 
   // Verify that the estimated frame duration is tracked across clusters for
   // each track.
+  const int kExpectedAudioEstimationInMs = 24;
+  const int kExpectedVideoEstimationInMs = 35;
   const BlockInfo kBlockInfo2[] = {
-      {kAudioTrackNum, 200, -kExpectedAudioEstimationInMs, false, NULL, 0},
-      {kVideoTrackNum, 201, -kExpectedVideoEstimationInMs, false, NULL, 0},
+      {kAudioTrackNum, 92, -kExpectedAudioEstimationInMs, false, NULL, 0},
+      {kVideoTrackNum, 135, -kExpectedVideoEstimationInMs, false, NULL, 0},
   };
 
   int block_count2 = arraysize(kBlockInfo2);
   scoped_ptr<Cluster> cluster2(CreateCluster(0, kBlockInfo2, block_count2));
-  result = parser_->Parse(cluster2->data(), cluster2->size());
-  EXPECT_EQ(cluster2->size(), result);
+  EXPECT_EQ(cluster2->size(),
+            parser_->Parse(cluster2->data(), cluster2->size()));
+
+  // Verify that remaining blocks of cluster1 are emitted.
+  ASSERT_TRUE(VerifyBuffers(&kBlockInfo1[block_count1 - 2], 2));
+
+  // Now flush and verify blocks in cluster2 are emitted.
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2));
 }
 
@@ -958,13 +933,13 @@ TEST_F(WebMClusterParserTest,
   int result = parser_->Parse(cluster->data(), cluster->size() - 1);
   EXPECT_GT(result, 0);
   EXPECT_LT(result, cluster->size());
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count - 1));
 
-  parser_->Reset();
-
   // Now parse a whole cluster to verify that all the blocks will get parsed.
   result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
@@ -988,6 +963,7 @@ TEST_F(WebMClusterParserTest,
   scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
@@ -1004,6 +980,7 @@ TEST_F(WebMClusterParserTest,
   scoped_ptr<Cluster> cluster(CreateCluster(0, kBlockInfo, block_count));
   int result = parser_->Parse(cluster->data(), cluster->size());
   EXPECT_EQ(cluster->size(), result);
+  parser_->Flush();
   ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count));
 }
 
diff --git a/packager/media/formats/webm/webm_media_parser.cc b/packager/media/formats/webm/webm_media_parser.cc
index 25297e39b6..63c2d6e7bd 100644
--- a/packager/media/formats/webm/webm_media_parser.cc
+++ b/packager/media/formats/webm/webm_media_parser.cc
@@ -44,7 +44,7 @@ void WebMMediaParser::Flush() {
 
   byte_queue_.Reset();
   if (cluster_parser_)
-    cluster_parser_->Reset();
+    cluster_parser_->Flush();
   if (state_ == kParsingClusters) {
     ChangeState(kParsingHeaders);
   }