From 94401d750a72752c68f2494cb832f8370a8f12be Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Wed, 18 Nov 2015 11:51:15 -0800 Subject: [PATCH] VP9 codec string from bitstream and subsample encryption support - Parse vp9 bitstream to get vpx codec configuration - Add subsample encryption for vp9 - Also fixed a bug in VP9 parser if segmentation update_map is enabled Change-Id: I69dc97088aa38c94c6d37fdbcf3d9cfc942a3df6 --- packager/media/base/audio_stream_info.h | 2 +- packager/media/base/bit_reader.cc | 9 +- packager/media/base/bit_reader.h | 11 +- packager/media/base/video_stream_info.h | 1 + packager/media/filters/vp9_parser.cc | 58 +++++-- packager/media/filters/vp9_parser.h | 12 +- packager/media/filters/vp9_parser_unittest.cc | 31 +++- .../media/filters/vp_codec_configuration.h | 12 +- .../formats/mp4/encrypting_fragmenter.cc | 61 +++++--- .../media/formats/mp4/encrypting_fragmenter.h | 15 +- .../formats/mp4/key_rotation_fragmenter.cc | 2 + .../formats/mp4/key_rotation_fragmenter.h | 4 + packager/media/formats/mp4/segmenter.cc | 24 +-- .../media/formats/webm/webm_cluster_parser.cc | 146 +++++++++++++----- .../media/formats/webm/webm_cluster_parser.h | 23 +-- .../webm/webm_cluster_parser_unittest.cc | 125 +++++++++++++-- .../media/formats/webm/webm_media_parser.cc | 28 ++-- .../media/formats/webm/webm_video_client.cc | 22 +-- 18 files changed, 423 insertions(+), 163 deletions(-) diff --git a/packager/media/base/audio_stream_info.h b/packager/media/base/audio_stream_info.h index 71e3d79995..0f5ff088e9 100644 --- a/packager/media/base/audio_stream_info.h +++ b/packager/media/base/audio_stream_info.h @@ -71,11 +71,11 @@ class AudioStreamInfo : public StreamInfo { return static_cast(num_channels_) * sample_bits_ / 8; } + void set_codec(AudioCodec codec) { codec_ = codec; } void set_sampling_frequency(const uint32_t sampling_frequency) { sampling_frequency_ = sampling_frequency; } - /// @param audio_object_type is only used by AAC Codec, ignored otherwise. /// @return The codec string. static std::string GetCodecString(AudioCodec codec, diff --git a/packager/media/base/bit_reader.cc b/packager/media/base/bit_reader.cc index 5b1824e519..50fc6c3c70 100644 --- a/packager/media/base/bit_reader.cc +++ b/packager/media/base/bit_reader.cc @@ -10,7 +10,10 @@ namespace edash_packager { namespace media { BitReader::BitReader(const uint8_t* data, off_t size) - : data_(data), bytes_left_(size), num_remaining_bits_in_curr_byte_(0) { + : data_(data), + initial_size_(size), + bytes_left_(size), + num_remaining_bits_in_curr_byte_(0) { DCHECK(data_ != NULL && bytes_left_ > 0); UpdateCurrByte(); @@ -50,10 +53,6 @@ bool BitReader::SkipBits(int num_bits) { return ReadBitsInternal(num_bits, ¬_needed); } -int BitReader::bits_available() const { - return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_; -} - bool BitReader::ReadBitsInternal(int num_bits, uint64_t* out) { DCHECK_LE(num_bits, 64); diff --git a/packager/media/base/bit_reader.h b/packager/media/base/bit_reader.h index 65a494056b..bf00f9b560 100644 --- a/packager/media/base/bit_reader.h +++ b/packager/media/base/bit_reader.h @@ -49,7 +49,12 @@ class BitReader { bool SkipBits(int num_bits); /// @return The number of bits available for reading. - int bits_available() const; + int bits_available() const { + return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_; + } + + /// @return The current bit position. + int bit_position() const { return 8 * initial_size_ - bits_available(); } private: // Help function used by ReadBits to avoid inlining the bit reading logic. @@ -63,6 +68,10 @@ class BitReader { // Pointer to the next unread (not in curr_byte_) byte in the stream. const uint8_t* data_; + // Initial size of the input data. + // TODO(kqyang): Use size_t instead of off_t instead. + off_t initial_size_; + // Bytes left in the stream (without the curr_byte_). off_t bytes_left_; diff --git a/packager/media/base/video_stream_info.h b/packager/media/base/video_stream_info.h index 6786d5457f..91292e696d 100644 --- a/packager/media/base/video_stream_info.h +++ b/packager/media/base/video_stream_info.h @@ -67,6 +67,7 @@ class VideoStreamInfo : public StreamInfo { uint8_t nalu_length_size() const { return nalu_length_size_; } int16_t trick_play_rate() const { return trick_play_rate_; } + void set_codec(VideoCodec codec) { codec_ = codec; } void set_width(uint32_t width) { width_ = width; } void set_height(uint32_t height) { height_ = height; } void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; } diff --git a/packager/media/filters/vp9_parser.cc b/packager/media/filters/vp9_parser.cc index 5db7079bf7..f04d70618a 100644 --- a/packager/media/filters/vp9_parser.cc +++ b/packager/media/filters/vp9_parser.cc @@ -184,6 +184,10 @@ VPCodecConfiguration::ColorSpace GetColorSpace(uint8_t color_space) { return VPCodecConfiguration::COLOR_SPACE_BT_601; case VPX_COLOR_SPACE_BT_709: return VPCodecConfiguration::COLOR_SPACE_BT_709; + case VPX_COLOR_SPACE_SMPTE_170: + return VPCodecConfiguration::COLOR_SPACE_SMPTE_170; + case VPX_COLOR_SPACE_SMPTE_240: + return VPCodecConfiguration::COLOR_SPACE_SMPTE_240; case VPX_COLOR_SPACE_BT_2020: // VP9 does not specify if it is in the form of “constant luminance” or // “non-constant luminance”. As such, application should rely on the @@ -368,15 +372,14 @@ bool ReadSegmentation(VP9BitReader* reader) { bool update_map; RCHECK(reader->ReadBits(1, &update_map)); if (update_map) { - for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) { + for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) RCHECK(reader->SkipBitsConditional(8)); - bool temporal_update; - RCHECK(reader->ReadBits(1, &temporal_update)); - if (temporal_update) { - for (uint32_t j = 0; j < PREDICTION_PROBS; ++j) - RCHECK(reader->SkipBitsConditional(8)); - } + bool temporal_update; + RCHECK(reader->ReadBits(1, &temporal_update)); + if (temporal_update) { + for (uint32_t j = 0; j < PREDICTION_PROBS; ++j) + RCHECK(reader->SkipBitsConditional(8)); } } @@ -449,23 +452,23 @@ bool VP9Parser::Parse(const uint8_t* data, // End of current frame data. There should be no more bytes available. RCHECK(reader.bits_available() < 8); - vpx_frame.is_key_frame = false; + vpx_frame.is_keyframe = false; vpx_frame.uncompressed_header_size = vpx_frame.frame_size; vpx_frame.width = width_; vpx_frame.height = height_; continue; } - bool is_inter_frame; - RCHECK(reader.ReadBits(1, &is_inter_frame)); - vpx_frame.is_key_frame = !is_inter_frame; + bool is_interframe; + RCHECK(reader.ReadBits(1, &is_interframe)); + vpx_frame.is_keyframe = !is_interframe; bool show_frame; RCHECK(reader.ReadBits(1, &show_frame)); bool error_resilient_mode; RCHECK(reader.ReadBits(1, &error_resilient_mode)); - if (vpx_frame.is_key_frame) { + if (vpx_frame.is_keyframe) { RCHECK(ReadSyncCode(&reader)); RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_)); RCHECK(ReadFrameSizes(&reader, &width_, &height_)); @@ -516,8 +519,7 @@ bool VP9Parser::Parse(const uint8_t* data, } RCHECK(reader.SkipBits(FRAME_CONTEXTS_LOG2)); // frame_context_idx - VLOG(4) << "bit offset: " - << vpx_frame.frame_size * 8 - reader.bits_available(); + VLOG(4) << "Bits read before ReadLoopFilter: " << reader.bit_position(); RCHECK(ReadLoopFilter(&reader)); RCHECK(ReadQuantization(&reader)); RCHECK(ReadSegmentation(&reader)); @@ -532,8 +534,7 @@ bool VP9Parser::Parse(const uint8_t* data, VLOG(3) << "\n frame_size: " << vpx_frame.frame_size << "\n header_size: " << vpx_frame.uncompressed_header_size - << "\n bits_read: " - << vpx_frame.frame_size * 8 - reader.bits_available() + << "\n Bits read: " << reader.bit_position() << "\n first_partition_size: " << first_partition_size; RCHECK(first_partition_size > 0); @@ -544,5 +545,30 @@ bool VP9Parser::Parse(const uint8_t* data, return true; } +bool VP9Parser::IsKeyframe(const uint8_t* data, size_t data_size) { + VP9BitReader reader(data, data_size); + uint8_t frame_marker; + RCHECK(reader.ReadBits(2, &frame_marker)); + RCHECK(frame_marker == VP9_FRAME_MARKER); + + VPCodecConfiguration codec_config; + RCHECK(ReadProfile(&reader, &codec_config)); + + bool show_existing_frame; + RCHECK(reader.ReadBits(1, &show_existing_frame)); + if (show_existing_frame) + return false; + + bool is_interframe; + RCHECK(reader.ReadBits(1, &is_interframe)); + if (is_interframe) + return false; + + RCHECK(reader.SkipBits(2)); // show_frame, error_resilient_mode. + + RCHECK(ReadSyncCode(&reader)); + return true; +} + } // namespace media } // namespace edash_packager diff --git a/packager/media/filters/vp9_parser.h b/packager/media/filters/vp9_parser.h index cd9e532d2b..effd7769b2 100644 --- a/packager/media/filters/vp9_parser.h +++ b/packager/media/filters/vp9_parser.h @@ -11,7 +11,6 @@ #include #include "packager/base/macros.h" -#include "packager/base/memory/scoped_ptr.h" #include "packager/media/filters/vp_codec_configuration.h" namespace edash_packager { @@ -20,7 +19,7 @@ namespace media { struct VPxFrameInfo { size_t frame_size; size_t uncompressed_header_size; - bool is_key_frame; + bool is_keyframe; uint32_t width; uint32_t height; }; @@ -42,9 +41,16 @@ class VP9Parser { std::vector* vpx_frames); /// @return VPx codec configuration extracted. Note that it is only valid - /// after parsing a key frame or intra frame successfully. + /// after parsing a keyframe or intra frame successfully. const VPCodecConfiguration& codec_config() { return codec_config_; } + /// A convenient utility function to check whether the frame is a keyframe. + /// Note that this function does not do a full parse of the frame header, so + /// should be more efficient than Parse(). + /// @param data_size Size of the sample in bytes. + /// @return true if it is, false if it is not or if there is parsing error. + static bool IsKeyframe(const uint8_t* data, size_t data_size); + private: // Keep track of the current width and height. Note that they may change from // frame to frame. diff --git a/packager/media/filters/vp9_parser_unittest.cc b/packager/media/filters/vp9_parser_unittest.cc index 4c5eef64c5..ae1607391e 100644 --- a/packager/media/filters/vp9_parser_unittest.cc +++ b/packager/media/filters/vp9_parser_unittest.cc @@ -17,16 +17,16 @@ namespace { MATCHER_P5(EqualVPxFrame, frame_size, uncompressed_header_size, - is_key_frame, + is_keyframe, width, height, "") { *result_listener << "which is (" << arg.frame_size << ", " - << arg.uncompressed_header_size << ", " << arg.is_key_frame + << arg.uncompressed_header_size << ", " << arg.is_keyframe << ", " << arg.width << ", " << arg.height << ")."; return arg.frame_size == frame_size && arg.uncompressed_header_size == uncompressed_header_size && - arg.is_key_frame == is_key_frame && arg.width == width && + arg.is_keyframe == is_keyframe && arg.width == width && arg.height == height; } } // namespace @@ -47,6 +47,8 @@ TEST(VP9ParserTest, Superframe) { 0xc9, 0x3c, 0x00, 0x48, 0x00, 0xc9, }; + EXPECT_FALSE(VP9Parser::IsKeyframe(data, arraysize(data))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(data, arraysize(data), &frames)); @@ -69,6 +71,8 @@ TEST(VP9ParserTest, KeyframeChroma420) { 0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b, }; + EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -89,6 +93,8 @@ TEST(VP9ParserTest, KeyframeProfile1Chroma422) { 0xa0, 0x96, 0xa7, 0xb8, 0xf4, 0xb4, 0x65, 0xff, }; + EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -109,6 +115,8 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) { 0xa4, 0xdf, 0x05, 0xaf, 0x6f, 0xff, 0xd1, 0x74, }; + EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -119,7 +127,7 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) { } TEST(VP9ParserTest, KeyframeProfile3Chroma444) { - uint8_t kData[] = { + const uint8_t kData[] = { 0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1, 0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67, 0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80, @@ -129,6 +137,8 @@ TEST(VP9ParserTest, KeyframeProfile3Chroma444) { 0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f, }; + EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -146,8 +156,11 @@ TEST(VP9ParserTest, Intra) { 0xe2, 0xbd, 0x53, 0xd9, 0x00, 0x3a, 0x70, 0xe0, 0x00, 0x78, 0xea, 0xa5, 0x61, 0x08, 0xb7, 0x9f, 0x33, 0xe5, 0xf8, 0xa5, 0x82, 0x32, 0xbb, 0xa3, 0x75, 0xb4, 0x60, 0xf3, 0x39, 0x75, 0x1f, 0x2b, + }; + EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -159,6 +172,7 @@ TEST(VP9ParserTest, Intra) { TEST(VP9ParserTest, ShowExisting) { const uint8_t kData[] = {0x88}; + EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData))); VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -177,6 +191,8 @@ TEST(VP9ParserTest, Interframe) { 0x90, 0xeb, 0x8c, 0xad, 0x5f, 0x69, 0xb7, 0x9b, }; + EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); @@ -186,6 +202,7 @@ TEST(VP9ParserTest, Interframe) { TEST(VP9ParserTest, CorruptedFrameMarker) { const uint8_t kData[] = {0xc8}; + EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData))); VP9Parser parser; std::vector frames; ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); @@ -202,6 +219,8 @@ TEST(VP9ParserTest, CorruptedSynccode) { 0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b, }; + EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); @@ -218,6 +237,10 @@ TEST(VP9ParserTest, NotEnoughBytesForFirstPartitionSize) { 0x07, 0xf4, 0x7f, 0xc7, 0xff, 0x6d, 0xff, 0xeb, }; + // IsKeyframe only parses the bytes that is necessary to determine whether it + // is a keyframe. + EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData))); + VP9Parser parser; std::vector frames; EXPECT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); diff --git a/packager/media/filters/vp_codec_configuration.h b/packager/media/filters/vp_codec_configuration.h index 398c1bdd84..4fe04fd03e 100644 --- a/packager/media/filters/vp_codec_configuration.h +++ b/packager/media/filters/vp_codec_configuration.h @@ -24,9 +24,11 @@ class VPCodecConfiguration { COLOR_SPACE_UNSPECIFIED = 0, COLOR_SPACE_BT_601 = 1, COLOR_SPACE_BT_709 = 2, - COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 3, - COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 4, - COLOR_SPACE_SRGB = 5, + COLOR_SPACE_SMPTE_170 = 3, + COLOR_SPACE_SMPTE_240 = 4, + COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 5, + COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 6, + COLOR_SPACE_SRGB = 7, }; enum ChromaSubsampling { @@ -91,7 +93,9 @@ class VPCodecConfiguration { bool video_full_range_flag_; std::vector codec_initialization_data_; - DISALLOW_COPY_AND_ASSIGN(VPCodecConfiguration); + // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler + // generated copy constructor and assignment operator. Since the internal data + // is small, the performance impact is minimal. }; } // namespace media diff --git a/packager/media/formats/mp4/encrypting_fragmenter.cc b/packager/media/formats/mp4/encrypting_fragmenter.cc index 5ee7484fbd..8113c89a7f 100644 --- a/packager/media/formats/mp4/encrypting_fragmenter.cc +++ b/packager/media/formats/mp4/encrypting_fragmenter.cc @@ -10,6 +10,7 @@ #include "packager/media/base/buffer_reader.h" #include "packager/media/base/key_source.h" #include "packager/media/base/media_sample.h" +#include "packager/media/filters/vp9_parser.h" #include "packager/media/formats/mp4/box_definitions.h" #include "packager/media/formats/mp4/cenc.h" @@ -26,15 +27,19 @@ EncryptingFragmenter::EncryptingFragmenter( TrackFragment* traf, scoped_ptr encryption_key, int64_t clear_time, + VideoCodec video_codec, uint8_t nalu_length_size) : Fragmenter(traf), encryption_key_(encryption_key.Pass()), + video_codec_(video_codec), nalu_length_size_(nalu_length_size), clear_time_(clear_time) { DCHECK(encryption_key_); + if (video_codec == kCodecVP9) + vp9_parser_.reset(new VP9Parser); } -EncryptingFragmenter::~EncryptingFragmenter() {} +EncryptingFragmenter::~EncryptingFragmenter() {} Status EncryptingFragmenter::AddSample(scoped_refptr sample) { DCHECK(sample); @@ -134,30 +139,48 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr sample) { FrameCENCInfo cenc_info(encryptor_->iv()); uint8_t* data = sample->writable_data(); - if (!IsSubsampleEncryptionRequired()) { - EncryptBytes(data, sample->data_size()); - } else { - BufferReader reader(data, sample->data_size()); - while (reader.HasBytes(1)) { - uint64_t nalu_length; - if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_)) - return Status(error::MUXER_FAILURE, "Fail to read nalu_length."); - - SubsampleEntry subsample; - subsample.clear_bytes = nalu_length_size_ + 1; - subsample.cipher_bytes = nalu_length - 1; - if (!reader.SkipBytes(nalu_length)) { - return Status(error::MUXER_FAILURE, - "Sample size does not match nalu_length."); + if (IsSubsampleEncryptionRequired()) { + if (video_codec_ == kCodecVP9) { + std::vector vpx_frames; + if (!vp9_parser_->Parse(sample->data(), sample->data_size(), + &vpx_frames)) { + return Status(error::MUXER_FAILURE, "Failed to parse vp9 frame."); } + for (const VPxFrameInfo& frame : vpx_frames) { + SubsampleEntry subsample; + subsample.clear_bytes = frame.uncompressed_header_size; + subsample.cipher_bytes = + frame.frame_size - frame.uncompressed_header_size; + cenc_info.AddSubsample(subsample); + if (subsample.cipher_bytes > 0) + EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes); + data += frame.frame_size; + } + } else { + BufferReader reader(data, sample->data_size()); + while (reader.HasBytes(1)) { + uint64_t nalu_length; + if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_)) + return Status(error::MUXER_FAILURE, "Fail to read nalu_length."); - EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes); - cenc_info.AddSubsample(subsample); - data += nalu_length_size_ + nalu_length; + SubsampleEntry subsample; + subsample.clear_bytes = nalu_length_size_ + 1; + subsample.cipher_bytes = nalu_length - 1; + if (!reader.SkipBytes(nalu_length)) { + return Status(error::MUXER_FAILURE, + "Sample size does not match nalu_length."); + } + + EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes); + cenc_info.AddSubsample(subsample); + data += nalu_length_size_ + nalu_length; + } } // The length of per-sample auxiliary datum, defined in CENC ch. 7. traf()->auxiliary_size.sample_info_sizes.push_back(cenc_info.ComputeSize()); + } else { + EncryptBytes(data, sample->data_size()); } cenc_info.Write(aux_data()); diff --git a/packager/media/formats/mp4/encrypting_fragmenter.h b/packager/media/formats/mp4/encrypting_fragmenter.h index f0c772adff..75b4531e81 100644 --- a/packager/media/formats/mp4/encrypting_fragmenter.h +++ b/packager/media/formats/mp4/encrypting_fragmenter.h @@ -7,6 +7,8 @@ #ifndef MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_ #define MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_ +#include "packager/base/memory/scoped_ptr.h" +#include "packager/media/filters/vp9_parser.h" #include "packager/media/formats/mp4/fragmenter.h" namespace edash_packager { @@ -24,11 +26,15 @@ class EncryptingFragmenter : public Fragmenter { /// @param encryption_key contains the encryption parameters. /// @param clear_time specifies clear lead duration in units of the current /// track's timescale. + /// @param video_codec specifies the codec if input is a video stream; it + /// should be set to kUnknownVideoCodec for audio stream. This + /// parameter is used for proper subsample encryption. /// @param nalu_length_size specifies the size of NAL unit length, in bytes, /// for subsample encryption. EncryptingFragmenter(TrackFragment* traf, scoped_ptr encryption_key, int64_t clear_time, + VideoCodec video_codec, uint8_t nalu_length_size); ~EncryptingFragmenter() override; @@ -64,16 +70,23 @@ class EncryptingFragmenter : public Fragmenter { Status EncryptSample(scoped_refptr sample); // Should we enable subsample encryption? - bool IsSubsampleEncryptionRequired() { return nalu_length_size_ != 0; } + bool IsSubsampleEncryptionRequired() { + return video_codec_ == kCodecVP9 || nalu_length_size_ != 0; + } scoped_ptr encryption_key_; scoped_ptr encryptor_; + // For VP8/VP9, uncompressed_header should not be encrypted; for AVC/HEVC, + // the size and type NAL units should not be encrypted. + VideoCodec video_codec_; // If this stream contains AVC, subsample encryption specifies that the size // and type of NAL units remain unencrypted. This field specifies the size of // the size field. Can be 1, 2 or 4 bytes. const uint8_t nalu_length_size_; int64_t clear_time_; + scoped_ptr vp9_parser_; + DISALLOW_COPY_AND_ASSIGN(EncryptingFragmenter); }; diff --git a/packager/media/formats/mp4/key_rotation_fragmenter.cc b/packager/media/formats/mp4/key_rotation_fragmenter.cc index 3e4a389bcb..d8cbe135db 100644 --- a/packager/media/formats/mp4/key_rotation_fragmenter.cc +++ b/packager/media/formats/mp4/key_rotation_fragmenter.cc @@ -23,11 +23,13 @@ KeyRotationFragmenter::KeyRotationFragmenter(MovieFragment* moof, KeySource::TrackType track_type, int64_t crypto_period_duration, int64_t clear_time, + VideoCodec video_codec, uint8_t nalu_length_size, MuxerListener* muxer_listener) : EncryptingFragmenter(traf, scoped_ptr(new EncryptionKey()), clear_time, + video_codec, nalu_length_size), moof_(moof), encryption_key_source_(encryption_key_source), diff --git a/packager/media/formats/mp4/key_rotation_fragmenter.h b/packager/media/formats/mp4/key_rotation_fragmenter.h index 06c68146f5..2ec49f7228 100644 --- a/packager/media/formats/mp4/key_rotation_fragmenter.h +++ b/packager/media/formats/mp4/key_rotation_fragmenter.h @@ -31,6 +31,9 @@ class KeyRotationFragmenter : public EncryptingFragmenter { /// of the current track's timescale. /// @param clear_time specifies clear lead duration in units of the current /// track's timescale. + /// @param video_codec specifies the codec if input is a video stream; it + /// should be set to kUnknownVideoCodec for audio stream. This + /// parameter is used for proper subsample encryption. /// @param nalu_length_size NAL unit length size, in bytes, for subsample /// encryption. /// @param muxer_listener is a pointer to MuxerListener for notifying @@ -41,6 +44,7 @@ class KeyRotationFragmenter : public EncryptingFragmenter { KeySource::TrackType track_type, int64_t crypto_period_duration, int64_t clear_time, + VideoCodec video_codec, uint8_t nalu_length_size, MuxerListener* muxer_listener); ~KeyRotationFragmenter() override; diff --git a/packager/media/formats/mp4/segmenter.cc b/packager/media/formats/mp4/segmenter.cc index e4cc16b76b..4be256b454 100644 --- a/packager/media/formats/mp4/segmenter.cc +++ b/packager/media/formats/mp4/segmenter.cc @@ -89,6 +89,14 @@ void GenerateEncryptedSampleEntry(const EncryptionKey& encryption_key, } } +VideoCodec GetVideoCodec(const StreamInfo& stream_info) { + if (stream_info.stream_type() != kStreamVideo) + return kUnknownVideoCodec; + const VideoStreamInfo& video_stream_info = + static_cast(stream_info); + return video_stream_info.codec(); +} + uint8_t GetNaluLengthSize(const StreamInfo& stream_info) { if (stream_info.stream_type() != kStreamVideo) return 0; @@ -160,6 +168,7 @@ Status Segmenter::Initialize(const std::vector& streams, continue; } + VideoCodec video_codec = GetVideoCodec(*streams[i]->info()); uint8_t nalu_length_size = GetNaluLengthSize(*streams[i]->info()); KeySource::TrackType track_type = GetTrackTypeForEncryption(*streams[i]->info(), max_sd_pixels); @@ -182,14 +191,10 @@ Status Segmenter::Initialize(const std::vector& streams, } fragmenters_[i] = new KeyRotationFragmenter( - moof_.get(), - &moof_->tracks[i], - encryption_key_source, - track_type, + moof_.get(), &moof_->tracks[i], encryption_key_source, track_type, crypto_period_duration_in_seconds * streams[i]->info()->time_scale(), - clear_lead_in_seconds * streams[i]->info()->time_scale(), - nalu_length_size, - muxer_listener_); + clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec, + nalu_length_size, muxer_listener_); continue; } @@ -217,9 +222,8 @@ Status Segmenter::Initialize(const std::vector& streams, } fragmenters_[i] = new EncryptingFragmenter( - &moof_->tracks[i], - encryption_key.Pass(), - clear_lead_in_seconds * streams[i]->info()->time_scale(), + &moof_->tracks[i], encryption_key.Pass(), + clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec, nalu_length_size); } diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc index fdf8a93238..ba074432f9 100644 --- a/packager/media/formats/webm/webm_cluster_parser.cc +++ b/packager/media/formats/webm/webm_cluster_parser.cc @@ -10,6 +10,7 @@ #include "packager/base/sys_byteorder.h" #include "packager/media/base/decrypt_config.h" #include "packager/media/base/timestamp.h" +#include "packager/media/filters/vp9_parser.h" #include "packager/media/filters/webvtt_util.h" #include "packager/media/formats/webm/webm_constants.h" #include "packager/media/formats/webm/webm_crypto_helpers.h" @@ -30,17 +31,11 @@ "may be suppressed): " \ : "") -namespace { -const int64_t kMicrosecondsPerMillisecond = 1000; -} // namespace - namespace edash_packager { namespace media { +namespace { -const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { - 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, - 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, - 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; +const int64_t kMicrosecondsPerMillisecond = 1000; enum { // Limits the number of LOG() calls in the path of reading encoded @@ -51,27 +46,78 @@ enum { kMaxDurationEstimateLogs = 10, }; +// Helper function used to inspect block data to determine if the +// block is a keyframe. +// |data| contains the bytes in the block. +// |size| indicates the number of bytes in |data|. +bool IsKeyframe(bool is_video, + VideoCodec codec, + const uint8_t* data, + int size) { + // For now, assume that all blocks are keyframes for datatypes other than + // video. This is a valid assumption for Vorbis, WebVTT, & Opus. + if (!is_video) + return true; + + if (codec == kCodecVP9) + return VP9Parser::IsKeyframe(data, size); + + CHECK_EQ(kCodecVP8, codec); + + // Make sure the block is big enough for the minimal keyframe header size. + if (size < 7) + return false; + + // The LSb of the first byte must be a 0 for a keyframe. + // http://tools.ietf.org/html/rfc6386 Section 19.1 + if ((data[0] & 0x01) != 0) + return false; + + // Verify VP8 keyframe startcode. + // http://tools.ietf.org/html/rfc6386 Section 19.1 + if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) + return false; + + return true; +} + +} // namespace + +const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { + 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, + 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, + 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; + WebMClusterParser::WebMClusterParser( int64_t timecode_scale, - int audio_track_num, + scoped_refptr audio_stream_info, + scoped_refptr video_stream_info, int64_t audio_default_duration, - int video_track_num, int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, - const AudioCodec audio_codec, - const MediaParser::NewSampleCB& new_sample_cb) + const MediaParser::NewSampleCB& new_sample_cb, + const MediaParser::InitCB& init_cb) : timecode_multiplier_(timecode_scale / 1000.0), + audio_stream_info_(audio_stream_info), + video_stream_info_(video_stream_info), ignored_tracks_(ignored_tracks), audio_encryption_key_id_(audio_encryption_key_id), video_encryption_key_id_(video_encryption_key_id), - audio_codec_(audio_codec), parser_(kWebMIdCluster, this), + initialized_(false), + init_cb_(init_cb), cluster_start_time_(kNoTimestamp), - audio_(audio_track_num, false, audio_default_duration, new_sample_cb), - video_(video_track_num, true, video_default_duration, new_sample_cb) { + audio_(audio_stream_info ? audio_stream_info->track_id() : -1, + false, + audio_default_duration, + new_sample_cb), + video_(video_stream_info ? video_stream_info->track_id() : -1, + true, + video_default_duration, + new_sample_cb) { for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin(); it != text_tracks.end(); ++it) { @@ -143,7 +189,8 @@ int64_t WebMClusterParser::TryGetEncodedAudioDuration( // TODO: Consider parsing "Signal Byte" for encrypted streams to return // duration for any unencrypted blocks. - if (audio_codec_ == kCodecOpus) { + DCHECK(audio_stream_info_); + if (audio_stream_info_->codec() == kCodecOpus) { return ReadOpusDuration(data, size); } @@ -450,7 +497,12 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, // necessary to determine whether it contains a keyframe or not. // http://www.matroska.org/technical/specs/index.html bool is_keyframe = - is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size); + is_simple_block + ? (flags & 0x80) != 0 + : IsKeyframe(stream_type == kStreamVideo, + video_stream_info_ ? video_stream_info_->codec() + : kUnknownVideoCodec, + data, size); // Every encrypted Block has a signal byte and IV prepended to it. Current // encrypted WebM request for comments specification is here @@ -531,6 +583,44 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, buffer->set_duration(track->default_duration()); } + if (!initialized_) { + std::vector> streams; + if (audio_stream_info_) + streams.push_back(audio_stream_info_); + if (video_stream_info_) { + if (stream_type == kStreamVideo) { + VPCodecConfiguration codec_config; + if (video_stream_info_->codec() == kCodecVP9) { + VP9Parser vp9_parser; + std::vector vpx_frames; + if (!vp9_parser.Parse(buffer->data(), buffer->data_size(), + &vpx_frames)) { + LOG(ERROR) << "Failed to parse vp9 frame."; + return false; + } + if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) { + LOG(ERROR) << "The first frame should be a key frame."; + return false; + } + codec_config = vp9_parser.codec_config(); + } + // TODO(kqyang): Support VP8. + + video_stream_info_->set_codec_string( + codec_config.GetCodecString(video_stream_info_->codec())); + std::vector extra_data; + codec_config.Write(&extra_data); + video_stream_info_->set_extra_data(extra_data); + streams.push_back(video_stream_info_); + init_cb_.Run(streams); + initialized_ = true; + } + } else { + init_cb_.Run(streams); + initialized_ = true; + } + } + return track->EmitBuffer(buffer); } @@ -614,28 +704,6 @@ void WebMClusterParser::Track::Reset() { last_added_buffer_missing_duration_ = NULL; } -bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const { - // For now, assume that all blocks are keyframes for datatypes other than - // video. This is a valid assumption for Vorbis, WebVTT, & Opus. - if (!is_video_) - return true; - - // Make sure the block is big enough for the minimal keyframe header size. - if (size < 7) - return false; - - // The LSb of the first byte must be a 0 for a keyframe. - // http://tools.ietf.org/html/rfc6386 Section 19.1 - if ((data[0] & 0x01) != 0) - return false; - - // Verify VP8 keyframe startcode. - // http://tools.ietf.org/html/rfc6386 Section 19.1 - if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) - return false; - - return true; -} bool WebMClusterParser::Track::EmitBufferHelp( const scoped_refptr& buffer) { diff --git a/packager/media/formats/webm/webm_cluster_parser.h b/packager/media/formats/webm/webm_cluster_parser.h index 2f73b524f9..3bcb0aa63c 100644 --- a/packager/media/formats/webm/webm_cluster_parser.h +++ b/packager/media/formats/webm/webm_cluster_parser.h @@ -67,12 +67,6 @@ class WebMClusterParser : public WebMParserClient { // was missing duration. void Reset(); - // Helper function used to inspect block data to determine if the - // block is a keyframe. - // |data| contains the bytes in the block. - // |size| indicates the number of bytes in |data|. - bool IsKeyframe(const uint8_t* data, int size) const; - int64_t default_duration() const { return default_duration_; } private: @@ -113,16 +107,16 @@ class WebMClusterParser : public WebMParserClient { public: WebMClusterParser(int64_t timecode_scale, - int audio_track_num, + scoped_refptr audio_stream_info, + scoped_refptr video_stream_info, int64_t audio_default_duration, - int video_track_num, int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, - const AudioCodec audio_codec, - const MediaParser::NewSampleCB& new_sample_cb); + const MediaParser::NewSampleCB& new_sample_cb, + const MediaParser::InitCB& init_cb); ~WebMClusterParser() override; /// Resets the parser state so it can accept a new cluster. @@ -191,13 +185,20 @@ class WebMClusterParser : public WebMParserClient { double timecode_multiplier_; // Multiplier used to convert timecodes into // microseconds. + scoped_refptr audio_stream_info_; + scoped_refptr video_stream_info_; std::set ignored_tracks_; std::string audio_encryption_key_id_; std::string video_encryption_key_id_; - const AudioCodec audio_codec_; WebMListParser parser_; + // Indicates whether init_cb has been executed. |init_cb| is executed when we + // have codec configuration of video stream, which is extracted from the first + // video sample. + bool initialized_; + MediaParser::InitCB init_cb_; + int64_t last_block_timecode_ = -1; scoped_ptr block_data_; int block_data_size_ = -1; diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc index 83f91a186f..6f33b4716f 100644 --- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc +++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc @@ -28,10 +28,6 @@ using ::testing::StrictMock; using ::testing::Mock; using ::testing::_; -namespace { -const int64_t kMicrosecondsPerMillisecond = 1000; -} // namespace - namespace edash_packager { namespace media { @@ -67,6 +63,7 @@ MATCHER_P2(WebMBlockDurationMismatchesOpusDuration, namespace { +const int64_t kMicrosecondsPerMillisecond = 1000; // Timecode scale for millisecond timestamps. const int kTimecodeScale = 1000000; @@ -76,6 +73,23 @@ const int kTextTrackNum = 3; const int kTestAudioFrameDefaultDurationInMs = 13; const int kTestVideoFrameDefaultDurationInMs = 17; +// Constants for AudioStreamInfo and VideoStreamInfo. Most are not used. +const uint32_t kTimeScale = 1000000u; +const uint64_t kDuration = 10000000u; +const char kCodecString[] = "codec_string"; +const char kLanguage[] = "eng"; +const uint8_t kBitsPerSample = 8u; +const uint8_t kNumChannels = 2u; +const uint32_t kSamplingFrequency = 48000u; +const size_t kExtraDataSize = 0u; +const bool kEncrypted = true; +const uint16_t kWidth = 320u; +const uint16_t kHeight = 180u; +const uint32_t kPixelWidth = 1u; +const uint32_t kPixelHeight = 1u; +const int16_t kTrickPlayRate = 0u; +const uint8_t kNaluLengthSize = 0u; + // Test duration defaults must differ from parser estimation defaults to know // which durations parser used when emitting buffers. static_assert( @@ -125,6 +139,16 @@ const uint8_t kEncryptedFrame[] = { 0x01, }; +const uint8_t kVP9Frame[] = { + 0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1, + 0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67, + 0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80, + 0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, 0x23, 0x9d, 0x6e, 0x5f, + 0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, 0x31, 0x07, 0xab, 0xc7, + 0x0c, 0x74, 0x48, 0x8b, 0x95, 0x30, 0xc9, 0xf0, 0x37, 0x3b, 0xe6, 0x11, + 0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f, +}; + scoped_ptr CreateCluster(int timecode, const BlockInfo* block_info, int block_count) { @@ -178,6 +202,14 @@ scoped_ptr CreateEncryptedCluster(int bytes_to_write) { return cb.Finish(); } +// Creates a Cluster with one vp9 frame (keyframe). +scoped_ptr CreateVP9Cluster() { + ClusterBuilder cb; + cb.SetClusterTimecode(0); + cb.AddSimpleBlock(kVideoTrackNum, 0, 0, kVP9Frame, arraysize(kVP9Frame)); + return cb.Finish(); +} + bool VerifyBuffersHelper(const BufferQueue& audio_buffers, const BufferQueue& video_buffers, const BufferQueue& text_buffers, @@ -268,7 +300,35 @@ void VerifyEncryptedBuffer(scoped_refptr buffer) { class WebMClusterParserTest : public testing::Test { public: - WebMClusterParserTest() : parser_(CreateDefaultParser()) {} + WebMClusterParserTest() + : audio_stream_info_(new AudioStreamInfo(kAudioTrackNum, + kTimeScale, + kDuration, + kUnknownAudioCodec, + kCodecString, + kLanguage, + kBitsPerSample, + kNumChannels, + kSamplingFrequency, + NULL, + kExtraDataSize, + !kEncrypted)), + video_stream_info_(new VideoStreamInfo(kVideoTrackNum, + kTimeScale, + kDuration, + kCodecVP8, + kCodecString, + kLanguage, + kWidth, + kHeight, + kPixelWidth, + kPixelHeight, + kTrickPlayRate, + kNaluLengthSize, + NULL, + kExtraDataSize, + !kEncrypted)), + parser_(CreateDefaultParser()) {} protected: void ResetParserToHaveDefaultDurations() { @@ -285,6 +345,10 @@ class WebMClusterParserTest : public testing::Test { default_audio_duration, default_video_duration)); } + void InitEvent(const std::vector>& stream_info) { + streams_from_init_event_ = stream_info; + } + bool NewSampleEvent(uint32_t track_id, const scoped_refptr& sample) { switch (track_id) { @@ -313,20 +377,24 @@ class WebMClusterParserTest : public testing::Test { const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, - const AudioCodec audio_codec) { + const AudioCodec audio_codec, + const VideoCodec video_codec) { + audio_stream_info_->set_codec(audio_codec); + video_stream_info_->set_codec(video_codec); return new WebMClusterParser( - kTimecodeScale, kAudioTrackNum, audio_default_duration, kVideoTrackNum, - video_default_duration, text_tracks, ignored_tracks, - audio_encryption_key_id, video_encryption_key_id, audio_codec, + kTimecodeScale, audio_stream_info_, video_stream_info_, + audio_default_duration, video_default_duration, text_tracks, + ignored_tracks, audio_encryption_key_id, video_encryption_key_id, base::Bind(&WebMClusterParserTest::NewSampleEvent, - base::Unretained(this))); + base::Unretained(this)), + base::Bind(&WebMClusterParserTest::InitEvent, base::Unretained(this))); } // Create a default version of the parser for test. WebMClusterParser* CreateDefaultParser() { return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), std::set(), std::string(), std::string(), - kUnknownAudioCodec); + kUnknownAudioCodec, kCodecVP8); } // Create a parser for test with custom audio and video default durations, and @@ -337,7 +405,7 @@ class WebMClusterParserTest : public testing::Test { const WebMTracksParser::TextTracks& text_tracks = TextTracks()) { return CreateParserHelper(audio_default_duration, video_default_duration, text_tracks, std::set(), std::string(), - std::string(), kUnknownAudioCodec); + std::string(), kUnknownAudioCodec, kCodecVP8); } // Create a parser for test with custom ignored tracks. @@ -345,7 +413,7 @@ class WebMClusterParserTest : public testing::Test { std::set& ignored_tracks) { return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), ignored_tracks, std::string(), std::string(), - kUnknownAudioCodec); + kUnknownAudioCodec, kCodecVP8); } // Create a parser for test with custom encryption key ids and audio codec. @@ -355,7 +423,14 @@ class WebMClusterParserTest : public testing::Test { const AudioCodec audio_codec) { return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), std::set(), audio_encryption_key_id, - video_encryption_key_id, audio_codec); + video_encryption_key_id, audio_codec, kCodecVP8); + } + + // Create a parser for test with custom video codec. + WebMClusterParser* CreateParserWithVideoCodec(const VideoCodec video_codec) { + return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), + std::set(), std::string(), std::string(), + kUnknownAudioCodec, video_codec); } bool VerifyBuffers(const BlockInfo* block_info, int block_count) { @@ -368,7 +443,10 @@ class WebMClusterParserTest : public testing::Test { return result; } + scoped_refptr audio_stream_info_; + scoped_refptr video_stream_info_; scoped_ptr parser_; + std::vector> streams_from_init_event_; BufferQueue audio_buffers_; BufferQueue video_buffers_; TextBufferQueueMap text_buffers_map_; @@ -485,6 +563,10 @@ TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count)); + // Verify init event called. + ASSERT_EQ(2u, streams_from_init_event_.size()); + EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type()); + EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type()); } TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { @@ -698,6 +780,19 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) { } } +TEST_F(WebMClusterParserTest, ParseVP9) { + scoped_ptr cluster(CreateVP9Cluster()); + parser_.reset(CreateParserWithVideoCodec(kCodecVP9)); + + EXPECT_EQ(cluster->size(), parser_->Parse(cluster->data(), cluster->size())); + + ASSERT_EQ(2u, streams_from_init_event_.size()); + EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type()); + EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type()); + EXPECT_EQ("vp09.03.00.12.00.03.00.00", + streams_from_init_event_[1]->codec_string()); +} + TEST_F(WebMClusterParserTest, ParseEncryptedBlock) { scoped_ptr cluster(CreateEncryptedCluster(sizeof(kEncryptedFrame))); @@ -728,6 +823,8 @@ TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) { }; EXPECT_EQ(-1, parser_->Parse(kBuffer, sizeof(kBuffer))); + // Verify init event not called. + ASSERT_EQ(0u, streams_from_init_event_.size()); } TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) { diff --git a/packager/media/formats/webm/webm_media_parser.cc b/packager/media/formats/webm/webm_media_parser.cc index 63c2d6e7bd..14b2823cd8 100644 --- a/packager/media/formats/webm/webm_media_parser.cc +++ b/packager/media/formats/webm/webm_media_parser.cc @@ -181,37 +181,33 @@ int WebMMediaParser::ParseInfoAndTracks(const uint8_t* data, int size) { double timecode_scale_in_us = info_parser.timecode_scale() / 1000.0; int64_t duration_in_us = info_parser.duration() * timecode_scale_in_us; - std::vector> streams; - AudioCodec audio_codec = kCodecOpus; - if (tracks_parser.audio_stream_info()) { - streams.push_back(tracks_parser.audio_stream_info()); - streams.back()->set_duration(duration_in_us); - if (streams.back()->is_encrypted()) + scoped_refptr audio_stream_info = + tracks_parser.audio_stream_info(); + if (audio_stream_info) { + audio_stream_info->set_duration(duration_in_us); + if (audio_stream_info->is_encrypted()) OnEncryptedMediaInitData(tracks_parser.audio_encryption_key_id()); - audio_codec = tracks_parser.audio_stream_info()->codec(); } else { VLOG(1) << "No audio track info found."; } - if (tracks_parser.video_stream_info()) { - streams.push_back(tracks_parser.video_stream_info()); - streams.back()->set_duration(duration_in_us); - if (streams.back()->is_encrypted()) + scoped_refptr video_stream_info = + tracks_parser.video_stream_info(); + if (video_stream_info) { + video_stream_info->set_duration(duration_in_us); + if (video_stream_info->is_encrypted()) OnEncryptedMediaInitData(tracks_parser.video_encryption_key_id()); } else { VLOG(1) << "No video track info found."; } - init_cb_.Run(streams); - cluster_parser_.reset(new WebMClusterParser( - info_parser.timecode_scale(), tracks_parser.audio_track_num(), + info_parser.timecode_scale(), audio_stream_info, video_stream_info, tracks_parser.GetAudioDefaultDuration(timecode_scale_in_us), - tracks_parser.video_track_num(), tracks_parser.GetVideoDefaultDuration(timecode_scale_in_us), tracks_parser.text_tracks(), tracks_parser.ignored_tracks(), tracks_parser.audio_encryption_key_id(), - tracks_parser.video_encryption_key_id(), audio_codec, new_sample_cb_)); + tracks_parser.video_encryption_key_id(), new_sample_cb_, init_cb_)); return bytes_parsed; } diff --git a/packager/media/formats/webm/webm_video_client.cc b/packager/media/formats/webm/webm_video_client.cc index 373f8ae2f1..b99888cd6f 100644 --- a/packager/media/formats/webm/webm_video_client.cc +++ b/packager/media/formats/webm/webm_video_client.cc @@ -6,7 +6,6 @@ #include "packager/base/logging.h" #include "packager/base/stl_util.h" -#include "packager/media/filters/vp_codec_configuration.h" #include "packager/media/formats/webm/webm_constants.h" namespace { @@ -106,25 +105,10 @@ scoped_refptr WebMVideoClient::GetVideoStreamInfo( sar_x /= gcd; sar_y /= gcd; - // TODO(kqyang): Fill in the values for vp codec configuration. - const uint8_t profile = 0; - const uint8_t level = 0; - const uint8_t bit_depth = 8; - const uint8_t color_space = 0; - const uint8_t chroma_subsampling = 0; - const uint8_t transfer_function = 0; - const bool video_full_range_flag = false; - VPCodecConfiguration vp_config(profile, level, bit_depth, color_space, - chroma_subsampling, transfer_function, - video_full_range_flag, codec_private); - std::vector extra_data; - vp_config.Write(&extra_data); - return scoped_refptr(new VideoStreamInfo( - track_num, kWebMTimeScale, 0, video_codec, - vp_config.GetCodecString(video_codec), std::string(), width_after_crop, - height_after_crop, sar_x, sar_y, 0, 0, vector_as_array(&extra_data), - extra_data.size(), is_encrypted)); + track_num, kWebMTimeScale, 0, video_codec, std::string(), std::string(), + width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, NULL, 0, + is_encrypted)); } bool WebMVideoClient::OnUInt(int id, int64_t val) {