VP9 codec string from bitstream and subsample encryption support

- Parse vp9 bitstream to get vpx codec configuration - Add subsample encryption for vp9 - Also fixed a bug in VP9 parser if segmentation update_map is enabled Change-Id: I69dc97088aa38c94c6d37fdbcf3d9cfc942a3df6
2015-11-18 11:51:15 -08:00 · 2015-11-18 11:51:15 -08:00 · 94401d750a
parent 9c95309c12
commit 94401d750a
18 changed files with 423 additions and 163 deletions
--- a/packager/media/base/audio_stream_info.h
+++ b/packager/media/base/audio_stream_info.h
@ -71,11 +71,11 @@ class AudioStreamInfo : public StreamInfo {
    return static_cast<uint32_t>(num_channels_) * sample_bits_ / 8;
  }

+  void set_codec(AudioCodec codec) { codec_ = codec; }
  void set_sampling_frequency(const uint32_t sampling_frequency) {
    sampling_frequency_ = sampling_frequency;
  }

-
  /// @param audio_object_type is only used by AAC Codec, ignored otherwise.
  /// @return The codec string.
  static std::string GetCodecString(AudioCodec codec,
--- a/packager/media/base/bit_reader.cc
+++ b/packager/media/base/bit_reader.cc
@ -10,7 +10,10 @@ namespace edash_packager {
 namespace media {

 BitReader::BitReader(const uint8_t* data, off_t size)
-    : data_(data), bytes_left_(size), num_remaining_bits_in_curr_byte_(0) {
+    : data_(data),
+      initial_size_(size),
+      bytes_left_(size),
+      num_remaining_bits_in_curr_byte_(0) {
  DCHECK(data_ != NULL && bytes_left_ > 0);

  UpdateCurrByte();
@ -50,10 +53,6 @@ bool BitReader::SkipBits(int num_bits) {
  return ReadBitsInternal(num_bits, &not_needed);
 }

-int BitReader::bits_available() const {
-  return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_;
-}
-
 bool BitReader::ReadBitsInternal(int num_bits, uint64_t* out) {
  DCHECK_LE(num_bits, 64);

--- a/packager/media/base/bit_reader.h
+++ b/packager/media/base/bit_reader.h
@ -49,7 +49,12 @@ class BitReader {
  bool SkipBits(int num_bits);

  /// @return The number of bits available for reading.
-  int bits_available() const;
+  int bits_available() const {
+    return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_;
+  }
+
+  /// @return The current bit position.
+  int bit_position() const { return 8 * initial_size_ - bits_available(); }

 private:
  // Help function used by ReadBits to avoid inlining the bit reading logic.
@ -63,6 +68,10 @@ class BitReader {
  // Pointer to the next unread (not in curr_byte_) byte in the stream.
  const uint8_t* data_;

+  // Initial size of the input data.
+  // TODO(kqyang): Use size_t instead of off_t instead.
+  off_t initial_size_;
+
  // Bytes left in the stream (without the curr_byte_).
  off_t bytes_left_;

--- a/packager/media/base/video_stream_info.h
+++ b/packager/media/base/video_stream_info.h
@ -67,6 +67,7 @@ class VideoStreamInfo : public StreamInfo {
  uint8_t nalu_length_size() const { return nalu_length_size_; }
  int16_t trick_play_rate() const { return trick_play_rate_; }

+  void set_codec(VideoCodec codec) { codec_ = codec; }
  void set_width(uint32_t width) { width_ = width; }
  void set_height(uint32_t height) { height_ = height; }
  void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; }
--- a/packager/media/filters/vp9_parser.cc
+++ b/packager/media/filters/vp9_parser.cc
@ -184,6 +184,10 @@ VPCodecConfiguration::ColorSpace GetColorSpace(uint8_t color_space) {
      return VPCodecConfiguration::COLOR_SPACE_BT_601;
    case VPX_COLOR_SPACE_BT_709:
      return VPCodecConfiguration::COLOR_SPACE_BT_709;
+    case VPX_COLOR_SPACE_SMPTE_170:
+      return VPCodecConfiguration::COLOR_SPACE_SMPTE_170;
+    case VPX_COLOR_SPACE_SMPTE_240:
+      return VPCodecConfiguration::COLOR_SPACE_SMPTE_240;
    case VPX_COLOR_SPACE_BT_2020:
      // VP9 does not specify if it is in the form of “constant luminance” or
      // “non-constant luminance”. As such, application should rely on the
@ -368,15 +372,14 @@ bool ReadSegmentation(VP9BitReader* reader) {
  bool update_map;
  RCHECK(reader->ReadBits(1, &update_map));
  if (update_map) {
-    for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) {
+    for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i)
      RCHECK(reader->SkipBitsConditional(8));

-      bool temporal_update;
-      RCHECK(reader->ReadBits(1, &temporal_update));
-      if (temporal_update) {
-        for (uint32_t j = 0; j < PREDICTION_PROBS; ++j)
-          RCHECK(reader->SkipBitsConditional(8));
-      }
+    bool temporal_update;
+    RCHECK(reader->ReadBits(1, &temporal_update));
+    if (temporal_update) {
+      for (uint32_t j = 0; j < PREDICTION_PROBS; ++j)
+        RCHECK(reader->SkipBitsConditional(8));
    }
  }

@ -449,23 +452,23 @@ bool VP9Parser::Parse(const uint8_t* data,
      // End of current frame data. There should be no more bytes available.
      RCHECK(reader.bits_available() < 8);

-      vpx_frame.is_key_frame = false;
+      vpx_frame.is_keyframe = false;
      vpx_frame.uncompressed_header_size = vpx_frame.frame_size;
      vpx_frame.width = width_;
      vpx_frame.height = height_;
      continue;
    }

-    bool is_inter_frame;
-    RCHECK(reader.ReadBits(1, &is_inter_frame));
-    vpx_frame.is_key_frame = !is_inter_frame;
+    bool is_interframe;
+    RCHECK(reader.ReadBits(1, &is_interframe));
+    vpx_frame.is_keyframe = !is_interframe;

    bool show_frame;
    RCHECK(reader.ReadBits(1, &show_frame));
    bool error_resilient_mode;
    RCHECK(reader.ReadBits(1, &error_resilient_mode));

-    if (vpx_frame.is_key_frame) {
+    if (vpx_frame.is_keyframe) {
      RCHECK(ReadSyncCode(&reader));
      RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_));
      RCHECK(ReadFrameSizes(&reader, &width_, &height_));
@ -516,8 +519,7 @@ bool VP9Parser::Parse(const uint8_t* data,
    }
    RCHECK(reader.SkipBits(FRAME_CONTEXTS_LOG2));  // frame_context_idx

-    VLOG(4) << "bit offset: "
-            << vpx_frame.frame_size * 8 - reader.bits_available();
+    VLOG(4) << "Bits read before ReadLoopFilter: " << reader.bit_position();
    RCHECK(ReadLoopFilter(&reader));
    RCHECK(ReadQuantization(&reader));
    RCHECK(ReadSegmentation(&reader));
@ -532,8 +534,7 @@ bool VP9Parser::Parse(const uint8_t* data,

    VLOG(3) << "\n frame_size: " << vpx_frame.frame_size
            << "\n header_size: " << vpx_frame.uncompressed_header_size
-            << "\n bits_read: "
-            << vpx_frame.frame_size * 8 - reader.bits_available()
+            << "\n Bits read: " << reader.bit_position()
            << "\n first_partition_size: " << first_partition_size;

    RCHECK(first_partition_size > 0);
@ -544,5 +545,30 @@ bool VP9Parser::Parse(const uint8_t* data,
  return true;
 }

+bool VP9Parser::IsKeyframe(const uint8_t* data, size_t data_size) {
+  VP9BitReader reader(data, data_size);
+  uint8_t frame_marker;
+  RCHECK(reader.ReadBits(2, &frame_marker));
+  RCHECK(frame_marker == VP9_FRAME_MARKER);
+
+  VPCodecConfiguration codec_config;
+  RCHECK(ReadProfile(&reader, &codec_config));
+
+  bool show_existing_frame;
+  RCHECK(reader.ReadBits(1, &show_existing_frame));
+  if (show_existing_frame)
+    return false;
+
+  bool is_interframe;
+  RCHECK(reader.ReadBits(1, &is_interframe));
+  if (is_interframe)
+    return false;
+
+  RCHECK(reader.SkipBits(2));  // show_frame, error_resilient_mode.
+
+  RCHECK(ReadSyncCode(&reader));
+  return true;
+}
+
 }  // namespace media
 }  // namespace edash_packager
--- a/packager/media/filters/vp9_parser.h
+++ b/packager/media/filters/vp9_parser.h
@ -11,7 +11,6 @@
 #include <stdlib.h>

 #include "packager/base/macros.h"
-#include "packager/base/memory/scoped_ptr.h"
 #include "packager/media/filters/vp_codec_configuration.h"

 namespace edash_packager {
@ -20,7 +19,7 @@ namespace media {
 struct VPxFrameInfo {
  size_t frame_size;
  size_t uncompressed_header_size;
-  bool is_key_frame;
+  bool is_keyframe;
  uint32_t width;
  uint32_t height;
 };
@ -42,9 +41,16 @@ class VP9Parser {
             std::vector<VPxFrameInfo>* vpx_frames);

  /// @return VPx codec configuration extracted. Note that it is only valid
-  ///         after parsing a key frame or intra frame successfully.
+  ///         after parsing a keyframe or intra frame successfully.
  const VPCodecConfiguration& codec_config() { return codec_config_; }

+  /// A convenient utility function to check whether the frame is a keyframe.
+  /// Note that this function does not do a full parse of the frame header, so
+  /// should be more efficient than Parse().
+  /// @param data_size Size of the sample in bytes.
+  /// @return true if it is, false if it is not or if there is parsing error.
+  static bool IsKeyframe(const uint8_t* data, size_t data_size);
+
 private:
  // Keep track of the current width and height. Note that they may change from
  // frame to frame.
--- a/packager/media/filters/vp9_parser_unittest.cc
+++ b/packager/media/filters/vp9_parser_unittest.cc
@ -17,16 +17,16 @@ namespace {
 MATCHER_P5(EqualVPxFrame,
           frame_size,
           uncompressed_header_size,
-           is_key_frame,
+           is_keyframe,
           width,
           height,
           "") {
  *result_listener << "which is (" << arg.frame_size << ", "
-                   << arg.uncompressed_header_size << ", " << arg.is_key_frame
+                   << arg.uncompressed_header_size << ", " << arg.is_keyframe
                   << ", " << arg.width << ", " << arg.height << ").";
  return arg.frame_size == frame_size &&
         arg.uncompressed_header_size == uncompressed_header_size &&
-         arg.is_key_frame == is_key_frame && arg.width == width &&
+         arg.is_keyframe == is_keyframe && arg.width == width &&
         arg.height == height;
 }
 }  // namespace
@ -47,6 +47,8 @@ TEST(VP9ParserTest, Superframe) {
      0xc9, 0x3c, 0x00, 0x48, 0x00, 0xc9,
  };

+  EXPECT_FALSE(VP9Parser::IsKeyframe(data, arraysize(data)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(data, arraysize(data), &frames));
@ -69,6 +71,8 @@ TEST(VP9ParserTest, KeyframeChroma420) {
      0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b,
  };

+  EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -89,6 +93,8 @@ TEST(VP9ParserTest, KeyframeProfile1Chroma422) {
      0xa0, 0x96, 0xa7, 0xb8, 0xf4, 0xb4, 0x65, 0xff,
  };

+  EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -109,6 +115,8 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) {
      0xa4, 0xdf, 0x05, 0xaf, 0x6f, 0xff, 0xd1, 0x74,
  };

+  EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -119,7 +127,7 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) {
 }

 TEST(VP9ParserTest, KeyframeProfile3Chroma444) {
-  uint8_t kData[] = {
+  const uint8_t kData[] = {
      0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1,
      0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67,
      0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80,
@ -129,6 +137,8 @@ TEST(VP9ParserTest, KeyframeProfile3Chroma444) {
      0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f,
  };

+  EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -146,8 +156,11 @@ TEST(VP9ParserTest, Intra) {
      0xe2, 0xbd, 0x53, 0xd9, 0x00, 0x3a, 0x70, 0xe0, 0x00, 0x78, 0xea, 0xa5,
      0x61, 0x08, 0xb7, 0x9f, 0x33, 0xe5, 0xf8, 0xa5, 0x82, 0x32, 0xbb, 0xa3,
      0x75, 0xb4, 0x60, 0xf3, 0x39, 0x75, 0x1f, 0x2b,
+
  };

+  EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -159,6 +172,7 @@ TEST(VP9ParserTest, Intra) {

 TEST(VP9ParserTest, ShowExisting) {
  const uint8_t kData[] = {0x88};
+  EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -177,6 +191,8 @@ TEST(VP9ParserTest, Interframe) {
      0x90, 0xeb, 0x8c, 0xad, 0x5f, 0x69, 0xb7, 0x9b,
  };

+  EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -186,6 +202,7 @@ TEST(VP9ParserTest, Interframe) {

 TEST(VP9ParserTest, CorruptedFrameMarker) {
  const uint8_t kData[] = {0xc8};
+  EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames));
@ -202,6 +219,8 @@ TEST(VP9ParserTest, CorruptedSynccode) {
      0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b,
  };

+  EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames));
@ -218,6 +237,10 @@ TEST(VP9ParserTest, NotEnoughBytesForFirstPartitionSize) {
      0x07, 0xf4, 0x7f, 0xc7, 0xff, 0x6d, 0xff, 0xeb,
  };

+  // IsKeyframe only parses the bytes that is necessary to determine whether it
+  // is a keyframe.
+  EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
+
  VP9Parser parser;
  std::vector<VPxFrameInfo> frames;
  EXPECT_FALSE(parser.Parse(kData, arraysize(kData), &frames));
--- a/packager/media/filters/vp_codec_configuration.h
+++ b/packager/media/filters/vp_codec_configuration.h
@ -24,9 +24,11 @@ class VPCodecConfiguration {
    COLOR_SPACE_UNSPECIFIED = 0,
    COLOR_SPACE_BT_601 = 1,
    COLOR_SPACE_BT_709 = 2,
-    COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 3,
-    COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 4,
-    COLOR_SPACE_SRGB = 5,
+    COLOR_SPACE_SMPTE_170 = 3,
+    COLOR_SPACE_SMPTE_240 = 4,
+    COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 5,
+    COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 6,
+    COLOR_SPACE_SRGB = 7,
  };

  enum ChromaSubsampling {
@ -91,7 +93,9 @@ class VPCodecConfiguration {
  bool video_full_range_flag_;
  std::vector<uint8_t> codec_initialization_data_;

-  DISALLOW_COPY_AND_ASSIGN(VPCodecConfiguration);
+  // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler
+  // generated copy constructor and assignment operator. Since the internal data
+  // is small, the performance impact is minimal.
 };

 }  // namespace media
--- a/packager/media/formats/mp4/encrypting_fragmenter.cc
+++ b/packager/media/formats/mp4/encrypting_fragmenter.cc
@ -10,6 +10,7 @@
 #include "packager/media/base/buffer_reader.h"
 #include "packager/media/base/key_source.h"
 #include "packager/media/base/media_sample.h"
+#include "packager/media/filters/vp9_parser.h"
 #include "packager/media/formats/mp4/box_definitions.h"
 #include "packager/media/formats/mp4/cenc.h"

@ -26,15 +27,19 @@ EncryptingFragmenter::EncryptingFragmenter(
    TrackFragment* traf,
    scoped_ptr<EncryptionKey> encryption_key,
    int64_t clear_time,
+    VideoCodec video_codec,
    uint8_t nalu_length_size)
    : Fragmenter(traf),
      encryption_key_(encryption_key.Pass()),
+      video_codec_(video_codec),
      nalu_length_size_(nalu_length_size),
      clear_time_(clear_time) {
  DCHECK(encryption_key_);
+  if (video_codec == kCodecVP9)
+    vp9_parser_.reset(new VP9Parser);
 }
-EncryptingFragmenter::~EncryptingFragmenter() {}

+EncryptingFragmenter::~EncryptingFragmenter() {}

 Status EncryptingFragmenter::AddSample(scoped_refptr<MediaSample> sample) {
  DCHECK(sample);
@ -134,30 +139,48 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr<MediaSample> sample) {

  FrameCENCInfo cenc_info(encryptor_->iv());
  uint8_t* data = sample->writable_data();
-  if (!IsSubsampleEncryptionRequired()) {
-    EncryptBytes(data, sample->data_size());
-  } else {
-    BufferReader reader(data, sample->data_size());
-    while (reader.HasBytes(1)) {
-      uint64_t nalu_length;
-      if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_))
-        return Status(error::MUXER_FAILURE, "Fail to read nalu_length.");
-
-      SubsampleEntry subsample;
-      subsample.clear_bytes = nalu_length_size_ + 1;
-      subsample.cipher_bytes = nalu_length - 1;
-      if (!reader.SkipBytes(nalu_length)) {
-        return Status(error::MUXER_FAILURE,
-                      "Sample size does not match nalu_length.");
+  if (IsSubsampleEncryptionRequired()) {
+    if (video_codec_ == kCodecVP9) {
+      std::vector<VPxFrameInfo> vpx_frames;
+      if (!vp9_parser_->Parse(sample->data(), sample->data_size(),
+                              &vpx_frames)) {
+        return Status(error::MUXER_FAILURE, "Failed to parse vp9 frame.");
      }
+      for (const VPxFrameInfo& frame : vpx_frames) {
+        SubsampleEntry subsample;
+        subsample.clear_bytes = frame.uncompressed_header_size;
+        subsample.cipher_bytes =
+            frame.frame_size - frame.uncompressed_header_size;
+        cenc_info.AddSubsample(subsample);
+        if (subsample.cipher_bytes > 0)
+          EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes);
+        data += frame.frame_size;
+      }
+    } else {
+      BufferReader reader(data, sample->data_size());
+      while (reader.HasBytes(1)) {
+        uint64_t nalu_length;
+        if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_))
+          return Status(error::MUXER_FAILURE, "Fail to read nalu_length.");

-      EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes);
-      cenc_info.AddSubsample(subsample);
-      data += nalu_length_size_ + nalu_length;
+        SubsampleEntry subsample;
+        subsample.clear_bytes = nalu_length_size_ + 1;
+        subsample.cipher_bytes = nalu_length - 1;
+        if (!reader.SkipBytes(nalu_length)) {
+          return Status(error::MUXER_FAILURE,
+                        "Sample size does not match nalu_length.");
+        }
+
+        EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes);
+        cenc_info.AddSubsample(subsample);
+        data += nalu_length_size_ + nalu_length;
+      }
    }

    // The length of per-sample auxiliary datum, defined in CENC ch. 7.
    traf()->auxiliary_size.sample_info_sizes.push_back(cenc_info.ComputeSize());
+  } else {
+    EncryptBytes(data, sample->data_size());
  }

  cenc_info.Write(aux_data());
--- a/packager/media/formats/mp4/encrypting_fragmenter.h
+++ b/packager/media/formats/mp4/encrypting_fragmenter.h
@ -7,6 +7,8 @@
 #ifndef MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_
 #define MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_

+#include "packager/base/memory/scoped_ptr.h"
+#include "packager/media/filters/vp9_parser.h"
 #include "packager/media/formats/mp4/fragmenter.h"

 namespace edash_packager {
@ -24,11 +26,15 @@ class EncryptingFragmenter : public Fragmenter {
  /// @param encryption_key contains the encryption parameters.
  /// @param clear_time specifies clear lead duration in units of the current
  ///        track's timescale.
+  /// @param video_codec specifies the codec if input is a video stream; it
+  ///        should be set to kUnknownVideoCodec for audio stream. This
+  ///        parameter is used for proper subsample encryption.
  /// @param nalu_length_size specifies the size of NAL unit length, in bytes,
  ///        for subsample encryption.
  EncryptingFragmenter(TrackFragment* traf,
                       scoped_ptr<EncryptionKey> encryption_key,
                       int64_t clear_time,
+                       VideoCodec video_codec,
                       uint8_t nalu_length_size);

  ~EncryptingFragmenter() override;
@ -64,16 +70,23 @@ class EncryptingFragmenter : public Fragmenter {
  Status EncryptSample(scoped_refptr<MediaSample> sample);

  // Should we enable subsample encryption?
-  bool IsSubsampleEncryptionRequired() { return nalu_length_size_ != 0; }
+  bool IsSubsampleEncryptionRequired() {
+    return video_codec_ == kCodecVP9 || nalu_length_size_ != 0;
+  }

  scoped_ptr<EncryptionKey> encryption_key_;
  scoped_ptr<AesCtrEncryptor> encryptor_;
+  // For VP8/VP9, uncompressed_header should not be encrypted; for AVC/HEVC,
+  // the size and type NAL units should not be encrypted.
+  VideoCodec video_codec_;
  // If this stream contains AVC, subsample encryption specifies that the size
  // and type of NAL units remain unencrypted. This field specifies the size of
  // the size field. Can be 1, 2 or 4 bytes.
  const uint8_t nalu_length_size_;
  int64_t clear_time_;

+  scoped_ptr<VP9Parser> vp9_parser_;
+
  DISALLOW_COPY_AND_ASSIGN(EncryptingFragmenter);
 };

--- a/packager/media/formats/mp4/key_rotation_fragmenter.cc
+++ b/packager/media/formats/mp4/key_rotation_fragmenter.cc
@ -23,11 +23,13 @@ KeyRotationFragmenter::KeyRotationFragmenter(MovieFragment* moof,
                                             KeySource::TrackType track_type,
                                             int64_t crypto_period_duration,
                                             int64_t clear_time,
+                                             VideoCodec video_codec,
                                             uint8_t nalu_length_size,
                                             MuxerListener* muxer_listener)
    : EncryptingFragmenter(traf,
                           scoped_ptr<EncryptionKey>(new EncryptionKey()),
                           clear_time,
+                           video_codec,
                           nalu_length_size),
      moof_(moof),
      encryption_key_source_(encryption_key_source),
--- a/packager/media/formats/mp4/key_rotation_fragmenter.h
+++ b/packager/media/formats/mp4/key_rotation_fragmenter.h
@ -31,6 +31,9 @@ class KeyRotationFragmenter : public EncryptingFragmenter {
  ///        of the current track's timescale.
  /// @param clear_time specifies clear lead duration in units of the current
  ///        track's timescale.
+  /// @param video_codec specifies the codec if input is a video stream; it
+  ///        should be set to kUnknownVideoCodec for audio stream. This
+  ///        parameter is used for proper subsample encryption.
  /// @param nalu_length_size NAL unit length size, in bytes, for subsample
  ///        encryption.
  /// @param muxer_listener is a pointer to MuxerListener for notifying
@ -41,6 +44,7 @@ class KeyRotationFragmenter : public EncryptingFragmenter {
                        KeySource::TrackType track_type,
                        int64_t crypto_period_duration,
                        int64_t clear_time,
+                        VideoCodec video_codec,
                        uint8_t nalu_length_size,
                        MuxerListener* muxer_listener);
  ~KeyRotationFragmenter() override;
--- a/packager/media/formats/mp4/segmenter.cc
+++ b/packager/media/formats/mp4/segmenter.cc
@ -89,6 +89,14 @@ void GenerateEncryptedSampleEntry(const EncryptionKey& encryption_key,
  }
 }

+VideoCodec GetVideoCodec(const StreamInfo& stream_info) {
+  if (stream_info.stream_type() != kStreamVideo)
+    return kUnknownVideoCodec;
+  const VideoStreamInfo& video_stream_info =
+      static_cast<const VideoStreamInfo&>(stream_info);
+  return video_stream_info.codec();
+}
+
 uint8_t GetNaluLengthSize(const StreamInfo& stream_info) {
  if (stream_info.stream_type() != kStreamVideo)
    return 0;
@ -160,6 +168,7 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
      continue;
    }

+    VideoCodec video_codec = GetVideoCodec(*streams[i]->info());
    uint8_t nalu_length_size = GetNaluLengthSize(*streams[i]->info());
    KeySource::TrackType track_type =
        GetTrackTypeForEncryption(*streams[i]->info(), max_sd_pixels);
@ -182,14 +191,10 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
      }

      fragmenters_[i] = new KeyRotationFragmenter(
-          moof_.get(),
-          &moof_->tracks[i],
-          encryption_key_source,
-          track_type,
+          moof_.get(), &moof_->tracks[i], encryption_key_source, track_type,
          crypto_period_duration_in_seconds * streams[i]->info()->time_scale(),
-          clear_lead_in_seconds * streams[i]->info()->time_scale(),
-          nalu_length_size,
-          muxer_listener_);
+          clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec,
+          nalu_length_size, muxer_listener_);
      continue;
    }

@ -217,9 +222,8 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
    }

    fragmenters_[i] = new EncryptingFragmenter(
-        &moof_->tracks[i],
-        encryption_key.Pass(),
-        clear_lead_in_seconds * streams[i]->info()->time_scale(),
+        &moof_->tracks[i], encryption_key.Pass(),
+        clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec,
        nalu_length_size);
  }

--- a/packager/media/formats/webm/webm_cluster_parser.cc
+++ b/packager/media/formats/webm/webm_cluster_parser.cc
@ -10,6 +10,7 @@
 #include "packager/base/sys_byteorder.h"
 #include "packager/media/base/decrypt_config.h"
 #include "packager/media/base/timestamp.h"
+#include "packager/media/filters/vp9_parser.h"
 #include "packager/media/filters/webvtt_util.h"
 #include "packager/media/formats/webm/webm_constants.h"
 #include "packager/media/formats/webm/webm_crypto_helpers.h"
@ -30,17 +31,11 @@
                "may be suppressed): "                         \
              : "")

-namespace {
-const int64_t kMicrosecondsPerMillisecond = 1000;
-}  // namespace
-
 namespace edash_packager {
 namespace media {
+namespace {

-const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
-    10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
-    60000, 10000, 20000, 10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,
-    10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,  10000, 20000};
+const int64_t kMicrosecondsPerMillisecond = 1000;

 enum {
  // Limits the number of LOG() calls in the path of reading encoded
@ -51,27 +46,78 @@ enum {
  kMaxDurationEstimateLogs = 10,
 };

+// Helper function used to inspect block data to determine if the
+// block is a keyframe.
+// |data| contains the bytes in the block.
+// |size| indicates the number of bytes in |data|.
+bool IsKeyframe(bool is_video,
+                VideoCodec codec,
+                const uint8_t* data,
+                int size) {
+  // For now, assume that all blocks are keyframes for datatypes other than
+  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
+  if (!is_video)
+    return true;
+
+  if (codec == kCodecVP9)
+    return VP9Parser::IsKeyframe(data, size);
+
+  CHECK_EQ(kCodecVP8, codec);
+
+  // Make sure the block is big enough for the minimal keyframe header size.
+  if (size < 7)
+    return false;
+
+  // The LSb of the first byte must be a 0 for a keyframe.
+  // http://tools.ietf.org/html/rfc6386 Section 19.1
+  if ((data[0] & 0x01) != 0)
+    return false;
+
+  // Verify VP8 keyframe startcode.
+  // http://tools.ietf.org/html/rfc6386 Section 19.1
+  if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
+    return false;
+
+  return true;
+}
+
+}  // namespace
+
+const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
+    10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
+    60000, 10000, 20000, 10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,
+    10000, 20000, 2500,  5000,  10000, 20000, 2500,  5000,  10000, 20000};
+
 WebMClusterParser::WebMClusterParser(
    int64_t timecode_scale,
-    int audio_track_num,
+    scoped_refptr<AudioStreamInfo> audio_stream_info,
+    scoped_refptr<VideoStreamInfo> video_stream_info,
    int64_t audio_default_duration,
-    int video_track_num,
    int64_t video_default_duration,
    const WebMTracksParser::TextTracks& text_tracks,
    const std::set<int64_t>& ignored_tracks,
    const std::string& audio_encryption_key_id,
    const std::string& video_encryption_key_id,
-    const AudioCodec audio_codec,
-    const MediaParser::NewSampleCB& new_sample_cb)
+    const MediaParser::NewSampleCB& new_sample_cb,
+    const MediaParser::InitCB& init_cb)
    : timecode_multiplier_(timecode_scale / 1000.0),
+      audio_stream_info_(audio_stream_info),
+      video_stream_info_(video_stream_info),
      ignored_tracks_(ignored_tracks),
      audio_encryption_key_id_(audio_encryption_key_id),
      video_encryption_key_id_(video_encryption_key_id),
-      audio_codec_(audio_codec),
      parser_(kWebMIdCluster, this),
+      initialized_(false),
+      init_cb_(init_cb),
      cluster_start_time_(kNoTimestamp),
-      audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
-      video_(video_track_num, true, video_default_duration, new_sample_cb) {
+      audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
+             false,
+             audio_default_duration,
+             new_sample_cb),
+      video_(video_stream_info ? video_stream_info->track_id() : -1,
+             true,
+             video_default_duration,
+             new_sample_cb) {
  for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
       it != text_tracks.end();
       ++it) {
@ -143,7 +189,8 @@ int64_t WebMClusterParser::TryGetEncodedAudioDuration(
  // TODO: Consider parsing "Signal Byte" for encrypted streams to return
  // duration for any unencrypted blocks.

-  if (audio_codec_ == kCodecOpus) {
+  DCHECK(audio_stream_info_);
+  if (audio_stream_info_->codec() == kCodecOpus) {
    return ReadOpusDuration(data, size);
  }

@ -450,7 +497,12 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
    // necessary to determine whether it contains a keyframe or not.
    // http://www.matroska.org/technical/specs/index.html
    bool is_keyframe =
-        is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
+        is_simple_block
+            ? (flags & 0x80) != 0
+            : IsKeyframe(stream_type == kStreamVideo,
+                         video_stream_info_ ? video_stream_info_->codec()
+                                            : kUnknownVideoCodec,
+                         data, size);

    // Every encrypted Block has a signal byte and IV prepended to it. Current
    // encrypted WebM request for comments specification is here
@ -531,6 +583,44 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
    buffer->set_duration(track->default_duration());
  }

+  if (!initialized_) {
+    std::vector<scoped_refptr<StreamInfo>> streams;
+    if (audio_stream_info_)
+      streams.push_back(audio_stream_info_);
+    if (video_stream_info_) {
+      if (stream_type == kStreamVideo) {
+        VPCodecConfiguration codec_config;
+        if (video_stream_info_->codec() == kCodecVP9) {
+          VP9Parser vp9_parser;
+          std::vector<VPxFrameInfo> vpx_frames;
+          if (!vp9_parser.Parse(buffer->data(), buffer->data_size(),
+                                &vpx_frames)) {
+            LOG(ERROR) << "Failed to parse vp9 frame.";
+            return false;
+          }
+          if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
+            LOG(ERROR) << "The first frame should be a key frame.";
+            return false;
+          }
+          codec_config = vp9_parser.codec_config();
+        }
+        // TODO(kqyang): Support VP8.
+
+        video_stream_info_->set_codec_string(
+            codec_config.GetCodecString(video_stream_info_->codec()));
+        std::vector<uint8_t> extra_data;
+        codec_config.Write(&extra_data);
+        video_stream_info_->set_extra_data(extra_data);
+        streams.push_back(video_stream_info_);
+        init_cb_.Run(streams);
+        initialized_ = true;
+      }
+    } else {
+      init_cb_.Run(streams);
+      initialized_ = true;
+    }
+  }
+
  return track->EmitBuffer(buffer);
 }

@ -614,28 +704,6 @@ void WebMClusterParser::Track::Reset() {
  last_added_buffer_missing_duration_ = NULL;
 }

-bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
-  // For now, assume that all blocks are keyframes for datatypes other than
-  // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
-  if (!is_video_)
-    return true;
-
-  // Make sure the block is big enough for the minimal keyframe header size.
-  if (size < 7)
-    return false;
-
-  // The LSb of the first byte must be a 0 for a keyframe.
-  // http://tools.ietf.org/html/rfc6386 Section 19.1
-  if ((data[0] & 0x01) != 0)
-    return false;
-
-  // Verify VP8 keyframe startcode.
-  // http://tools.ietf.org/html/rfc6386 Section 19.1
-  if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
-    return false;
-
-  return true;
-}

 bool WebMClusterParser::Track::EmitBufferHelp(
    const scoped_refptr<MediaSample>& buffer) {
--- a/packager/media/formats/webm/webm_cluster_parser.h
+++ b/packager/media/formats/webm/webm_cluster_parser.h
@ -67,12 +67,6 @@ class WebMClusterParser : public WebMParserClient {
    // was missing duration.
    void Reset();

-    // Helper function used to inspect block data to determine if the
-    // block is a keyframe.
-    // |data| contains the bytes in the block.
-    // |size| indicates the number of bytes in |data|.
-    bool IsKeyframe(const uint8_t* data, int size) const;
-
    int64_t default_duration() const { return default_duration_; }

   private:
@ -113,16 +107,16 @@ class WebMClusterParser : public WebMParserClient {

 public:
  WebMClusterParser(int64_t timecode_scale,
-                    int audio_track_num,
+                    scoped_refptr<AudioStreamInfo> audio_stream_info,
+                    scoped_refptr<VideoStreamInfo> video_stream_info,
                    int64_t audio_default_duration,
-                    int video_track_num,
                    int64_t video_default_duration,
                    const WebMTracksParser::TextTracks& text_tracks,
                    const std::set<int64_t>& ignored_tracks,
                    const std::string& audio_encryption_key_id,
                    const std::string& video_encryption_key_id,
-                    const AudioCodec audio_codec,
-                    const MediaParser::NewSampleCB& new_sample_cb);
+                    const MediaParser::NewSampleCB& new_sample_cb,
+                    const MediaParser::InitCB& init_cb);
  ~WebMClusterParser() override;

  /// Resets the parser state so it can accept a new cluster.
@ -191,13 +185,20 @@ class WebMClusterParser : public WebMParserClient {

  double timecode_multiplier_;  // Multiplier used to convert timecodes into
                                // microseconds.
+  scoped_refptr<AudioStreamInfo> audio_stream_info_;
+  scoped_refptr<VideoStreamInfo> video_stream_info_;
  std::set<int64_t> ignored_tracks_;
  std::string audio_encryption_key_id_;
  std::string video_encryption_key_id_;
-  const AudioCodec audio_codec_;

  WebMListParser parser_;

+  // Indicates whether init_cb has been executed. |init_cb| is executed when we
+  // have codec configuration of video stream, which is extracted from the first
+  // video sample.
+  bool initialized_;
+  MediaParser::InitCB init_cb_;
+
  int64_t last_block_timecode_ = -1;
  scoped_ptr<uint8_t[]> block_data_;
  int block_data_size_ = -1;
--- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc
+++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc
@ -28,10 +28,6 @@ using ::testing::StrictMock;
 using ::testing::Mock;
 using ::testing::_;

-namespace {
-const int64_t kMicrosecondsPerMillisecond = 1000;
-}  // namespace
-
 namespace edash_packager {
 namespace media {

@ -67,6 +63,7 @@ MATCHER_P2(WebMBlockDurationMismatchesOpusDuration,

 namespace {

+const int64_t kMicrosecondsPerMillisecond = 1000;
 // Timecode scale for millisecond timestamps.
 const int kTimecodeScale = 1000000;

@ -76,6 +73,23 @@ const int kTextTrackNum = 3;
 const int kTestAudioFrameDefaultDurationInMs = 13;
 const int kTestVideoFrameDefaultDurationInMs = 17;

+// Constants for AudioStreamInfo and VideoStreamInfo. Most are not used.
+const uint32_t kTimeScale = 1000000u;
+const uint64_t kDuration = 10000000u;
+const char kCodecString[] = "codec_string";
+const char kLanguage[] = "eng";
+const uint8_t kBitsPerSample = 8u;
+const uint8_t kNumChannels = 2u;
+const uint32_t kSamplingFrequency = 48000u;
+const size_t kExtraDataSize = 0u;
+const bool kEncrypted = true;
+const uint16_t kWidth = 320u;
+const uint16_t kHeight = 180u;
+const uint32_t kPixelWidth = 1u;
+const uint32_t kPixelHeight = 1u;
+const int16_t kTrickPlayRate = 0u;
+const uint8_t kNaluLengthSize = 0u;
+
 // Test duration defaults must differ from parser estimation defaults to know
 // which durations parser used when emitting buffers.
 static_assert(
@ -125,6 +139,16 @@ const uint8_t kEncryptedFrame[] = {
    0x01,
 };

+const uint8_t kVP9Frame[] = {
+    0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1,
+    0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67,
+    0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80,
+    0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, 0x23, 0x9d, 0x6e, 0x5f,
+    0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, 0x31, 0x07, 0xab, 0xc7,
+    0x0c, 0x74, 0x48, 0x8b, 0x95, 0x30, 0xc9, 0xf0, 0x37, 0x3b, 0xe6, 0x11,
+    0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f,
+};
+
 scoped_ptr<Cluster> CreateCluster(int timecode,
                                  const BlockInfo* block_info,
                                  int block_count) {
@ -178,6 +202,14 @@ scoped_ptr<Cluster> CreateEncryptedCluster(int bytes_to_write) {
  return cb.Finish();
 }

+// Creates a Cluster with one vp9 frame (keyframe).
+scoped_ptr<Cluster> CreateVP9Cluster() {
+  ClusterBuilder cb;
+  cb.SetClusterTimecode(0);
+  cb.AddSimpleBlock(kVideoTrackNum, 0, 0, kVP9Frame, arraysize(kVP9Frame));
+  return cb.Finish();
+}
+
 bool VerifyBuffersHelper(const BufferQueue& audio_buffers,
                         const BufferQueue& video_buffers,
                         const BufferQueue& text_buffers,
@ -268,7 +300,35 @@ void VerifyEncryptedBuffer(scoped_refptr<MediaSample> buffer) {

 class WebMClusterParserTest : public testing::Test {
 public:
-  WebMClusterParserTest() : parser_(CreateDefaultParser()) {}
+  WebMClusterParserTest()
+      : audio_stream_info_(new AudioStreamInfo(kAudioTrackNum,
+                                               kTimeScale,
+                                               kDuration,
+                                               kUnknownAudioCodec,
+                                               kCodecString,
+                                               kLanguage,
+                                               kBitsPerSample,
+                                               kNumChannels,
+                                               kSamplingFrequency,
+                                               NULL,
+                                               kExtraDataSize,
+                                               !kEncrypted)),
+        video_stream_info_(new VideoStreamInfo(kVideoTrackNum,
+                                               kTimeScale,
+                                               kDuration,
+                                               kCodecVP8,
+                                               kCodecString,
+                                               kLanguage,
+                                               kWidth,
+                                               kHeight,
+                                               kPixelWidth,
+                                               kPixelHeight,
+                                               kTrickPlayRate,
+                                               kNaluLengthSize,
+                                               NULL,
+                                               kExtraDataSize,
+                                               !kEncrypted)),
+        parser_(CreateDefaultParser()) {}

 protected:
  void ResetParserToHaveDefaultDurations() {
@ -285,6 +345,10 @@ class WebMClusterParserTest : public testing::Test {
        default_audio_duration, default_video_duration));
  }

+  void InitEvent(const std::vector<scoped_refptr<StreamInfo>>& stream_info) {
+    streams_from_init_event_ = stream_info;
+  }
+
  bool NewSampleEvent(uint32_t track_id,
                      const scoped_refptr<MediaSample>& sample) {
    switch (track_id) {
@ -313,20 +377,24 @@ class WebMClusterParserTest : public testing::Test {
      const std::set<int64_t>& ignored_tracks,
      const std::string& audio_encryption_key_id,
      const std::string& video_encryption_key_id,
-      const AudioCodec audio_codec) {
+      const AudioCodec audio_codec,
+      const VideoCodec video_codec) {
+    audio_stream_info_->set_codec(audio_codec);
+    video_stream_info_->set_codec(video_codec);
    return new WebMClusterParser(
-        kTimecodeScale, kAudioTrackNum, audio_default_duration, kVideoTrackNum,
-        video_default_duration, text_tracks, ignored_tracks,
-        audio_encryption_key_id, video_encryption_key_id, audio_codec,
+        kTimecodeScale, audio_stream_info_, video_stream_info_,
+        audio_default_duration, video_default_duration, text_tracks,
+        ignored_tracks, audio_encryption_key_id, video_encryption_key_id,
        base::Bind(&WebMClusterParserTest::NewSampleEvent,
-                   base::Unretained(this)));
+                   base::Unretained(this)),
+        base::Bind(&WebMClusterParserTest::InitEvent, base::Unretained(this)));
  }

  // Create a default version of the parser for test.
  WebMClusterParser* CreateDefaultParser() {
    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                              std::set<int64_t>(), std::string(), std::string(),
-                              kUnknownAudioCodec);
+                              kUnknownAudioCodec, kCodecVP8);
  }

  // Create a parser for test with custom audio and video default durations, and
@ -337,7 +405,7 @@ class WebMClusterParserTest : public testing::Test {
      const WebMTracksParser::TextTracks& text_tracks = TextTracks()) {
    return CreateParserHelper(audio_default_duration, video_default_duration,
                              text_tracks, std::set<int64_t>(), std::string(),
-                              std::string(), kUnknownAudioCodec);
+                              std::string(), kUnknownAudioCodec, kCodecVP8);
  }

  // Create a parser for test with custom ignored tracks.
@ -345,7 +413,7 @@ class WebMClusterParserTest : public testing::Test {
      std::set<int64_t>& ignored_tracks) {
    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                              ignored_tracks, std::string(), std::string(),
-                              kUnknownAudioCodec);
+                              kUnknownAudioCodec, kCodecVP8);
  }

  // Create a parser for test with custom encryption key ids and audio codec.
@ -355,7 +423,14 @@ class WebMClusterParserTest : public testing::Test {
      const AudioCodec audio_codec) {
    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
                              std::set<int64_t>(), audio_encryption_key_id,
-                              video_encryption_key_id, audio_codec);
+                              video_encryption_key_id, audio_codec, kCodecVP8);
+  }
+
+  // Create a parser for test with custom video codec.
+  WebMClusterParser* CreateParserWithVideoCodec(const VideoCodec video_codec) {
+    return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
+                              std::set<int64_t>(), std::string(), std::string(),
+                              kUnknownAudioCodec, video_codec);
  }

  bool VerifyBuffers(const BlockInfo* block_info, int block_count) {
@ -368,7 +443,10 @@ class WebMClusterParserTest : public testing::Test {
    return result;
  }

+  scoped_refptr<AudioStreamInfo> audio_stream_info_;
+  scoped_refptr<VideoStreamInfo> video_stream_info_;
  scoped_ptr<WebMClusterParser> parser_;
+  std::vector<scoped_refptr<StreamInfo>> streams_from_init_event_;
  BufferQueue audio_buffers_;
  BufferQueue video_buffers_;
  TextBufferQueueMap text_buffers_map_;
@ -485,6 +563,10 @@ TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) {
  int result = parser_->Parse(cluster->data(), cluster->size());
  EXPECT_EQ(cluster->size(), result);
  ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count));
+  // Verify init event called.
+  ASSERT_EQ(2u, streams_from_init_event_.size());
+  EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type());
+  EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type());
 }

 TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
@ -698,6 +780,19 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) {
  }
 }

+TEST_F(WebMClusterParserTest, ParseVP9) {
+  scoped_ptr<Cluster> cluster(CreateVP9Cluster());
+  parser_.reset(CreateParserWithVideoCodec(kCodecVP9));
+
+  EXPECT_EQ(cluster->size(), parser_->Parse(cluster->data(), cluster->size()));
+
+  ASSERT_EQ(2u, streams_from_init_event_.size());
+  EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type());
+  EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type());
+  EXPECT_EQ("vp09.03.00.12.00.03.00.00",
+            streams_from_init_event_[1]->codec_string());
+}
+
 TEST_F(WebMClusterParserTest, ParseEncryptedBlock) {
  scoped_ptr<Cluster> cluster(CreateEncryptedCluster(sizeof(kEncryptedFrame)));

@ -728,6 +823,8 @@ TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) {
  };

  EXPECT_EQ(-1, parser_->Parse(kBuffer, sizeof(kBuffer)));
+  // Verify init event not called.
+  ASSERT_EQ(0u, streams_from_init_event_.size());
 }

 TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) {
--- a/packager/media/formats/webm/webm_media_parser.cc
+++ b/packager/media/formats/webm/webm_media_parser.cc
@ -181,37 +181,33 @@ int WebMMediaParser::ParseInfoAndTracks(const uint8_t* data, int size) {
  double timecode_scale_in_us = info_parser.timecode_scale() / 1000.0;
  int64_t duration_in_us = info_parser.duration() * timecode_scale_in_us;

-  std::vector<scoped_refptr<StreamInfo>> streams;
-  AudioCodec audio_codec = kCodecOpus;
-  if (tracks_parser.audio_stream_info()) {
-    streams.push_back(tracks_parser.audio_stream_info());
-    streams.back()->set_duration(duration_in_us);
-    if (streams.back()->is_encrypted())
+  scoped_refptr<AudioStreamInfo> audio_stream_info =
+      tracks_parser.audio_stream_info();
+  if (audio_stream_info) {
+    audio_stream_info->set_duration(duration_in_us);
+    if (audio_stream_info->is_encrypted())
      OnEncryptedMediaInitData(tracks_parser.audio_encryption_key_id());
-    audio_codec = tracks_parser.audio_stream_info()->codec();
  } else {
    VLOG(1) << "No audio track info found.";
  }

-  if (tracks_parser.video_stream_info()) {
-    streams.push_back(tracks_parser.video_stream_info());
-    streams.back()->set_duration(duration_in_us);
-    if (streams.back()->is_encrypted())
+  scoped_refptr<VideoStreamInfo> video_stream_info =
+      tracks_parser.video_stream_info();
+  if (video_stream_info) {
+    video_stream_info->set_duration(duration_in_us);
+    if (video_stream_info->is_encrypted())
      OnEncryptedMediaInitData(tracks_parser.video_encryption_key_id());
  } else {
    VLOG(1) << "No video track info found.";
  }

-  init_cb_.Run(streams);
-
  cluster_parser_.reset(new WebMClusterParser(
-      info_parser.timecode_scale(), tracks_parser.audio_track_num(),
+      info_parser.timecode_scale(), audio_stream_info, video_stream_info,
      tracks_parser.GetAudioDefaultDuration(timecode_scale_in_us),
-      tracks_parser.video_track_num(),
      tracks_parser.GetVideoDefaultDuration(timecode_scale_in_us),
      tracks_parser.text_tracks(), tracks_parser.ignored_tracks(),
      tracks_parser.audio_encryption_key_id(),
-      tracks_parser.video_encryption_key_id(), audio_codec, new_sample_cb_));
+      tracks_parser.video_encryption_key_id(), new_sample_cb_, init_cb_));

  return bytes_parsed;
 }
--- a/packager/media/formats/webm/webm_video_client.cc
+++ b/packager/media/formats/webm/webm_video_client.cc
@ -6,7 +6,6 @@

 #include "packager/base/logging.h"
 #include "packager/base/stl_util.h"
-#include "packager/media/filters/vp_codec_configuration.h"
 #include "packager/media/formats/webm/webm_constants.h"

 namespace {
@ -106,25 +105,10 @@ scoped_refptr<VideoStreamInfo> WebMVideoClient::GetVideoStreamInfo(
  sar_x /= gcd;
  sar_y /= gcd;

-  // TODO(kqyang): Fill in the values for vp codec configuration.
-  const uint8_t profile = 0;
-  const uint8_t level = 0;
-  const uint8_t bit_depth = 8;
-  const uint8_t color_space = 0;
-  const uint8_t chroma_subsampling = 0;
-  const uint8_t transfer_function = 0;
-  const bool video_full_range_flag = false;
-  VPCodecConfiguration vp_config(profile, level, bit_depth, color_space,
-                                 chroma_subsampling, transfer_function,
-                                 video_full_range_flag, codec_private);
-  std::vector<uint8_t> extra_data;
-  vp_config.Write(&extra_data);
-
  return scoped_refptr<VideoStreamInfo>(new VideoStreamInfo(
-      track_num, kWebMTimeScale, 0, video_codec,
-      vp_config.GetCodecString(video_codec), std::string(), width_after_crop,
-      height_after_crop, sar_x, sar_y, 0, 0, vector_as_array(&extra_data),
-      extra_data.size(), is_encrypted));
+      track_num, kWebMTimeScale, 0, video_codec, std::string(), std::string(),
+      width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, NULL, 0,
+      is_encrypted));
 }

 bool WebMVideoClient::OnUInt(int id, int64_t val) {