From 6550868574a941d6a087291d21dda1bef8ab90b8 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Wed, 25 May 2016 11:34:43 -0700 Subject: [PATCH] Add codec private data to VP9 in WebM. b/29009350 Change-Id: Iaafc87340043eff77c3ef7e1c1135d8c4c4287ae --- .../testdata/bear-320x240-vp9-golden.webm | Bin 69533 -> 69545 bytes .../bear-320x240-vp9-opus-webm-golden.mpd | 6 +- .../codecs/vp_codec_configuration_record.cc | 174 +++++++++++++++++- .../codecs/vp_codec_configuration_record.h | 63 +++++-- .../vp_codec_configuration_record_unittest.cc | 27 ++- .../media/formats/mp4/mp4_media_parser.cc | 2 +- packager/media/formats/webm/segmenter.cc | 18 +- .../media/formats/webm/webm_cluster_parser.cc | 11 +- .../media/formats/webm/webm_video_client.cc | 16 +- 9 files changed, 276 insertions(+), 41 deletions(-) diff --git a/packager/app/test/testdata/bear-320x240-vp9-golden.webm b/packager/app/test/testdata/bear-320x240-vp9-golden.webm index b6d4bb46fa8ef6058b2708505e70d263ac96bae2..3b18d7666fb9a718f0af2d19b4dd2678e4537901 100644 GIT binary patch delta 108 zcmbO`pJnBImI?Zdl@ks01b_SP?hRhuA``qjJ9teqBfsaw{4Rx!>(*ayWGp_~$k^P- z*wz{r9~NMlyr`3rk%5_!gN2cynU`@pFC$|ipZxFLy_=g@__nuyZ)E(~6!ZziW&aN4 H9&`f$?A9ys delta 96 zcmZ2EpJncRmI?Zdxf2ca1mFAa?hRhuA``qjJ9teqBfss${4SBD>lR;cWGp_~$k^P- v*wz{r9~NNQEXlZCl992HPyYSx-px%ceB0Z - + output_video.webm - - + + diff --git a/packager/media/codecs/vp_codec_configuration_record.cc b/packager/media/codecs/vp_codec_configuration_record.cc index be670c24d0..93c651aa86 100644 --- a/packager/media/codecs/vp_codec_configuration_record.cc +++ b/packager/media/codecs/vp_codec_configuration_record.cc @@ -9,6 +9,7 @@ #include "packager/base/strings/string_number_conversions.h" #include "packager/base/strings/string_util.h" #include "packager/media/base/bit_reader.h" +#include "packager/media/base/buffer_reader.h" #include "packager/media/base/buffer_writer.h" #include "packager/media/base/rcheck.h" #include "packager/base/strings/stringprintf.h" @@ -16,6 +17,12 @@ namespace shaka { namespace media { namespace { +enum VP9CodecFeatures { + kFeatureProfile = 1, + kFeatureLevel = 2, + kFeatureBitDepth = 3, + kFeatureChromaSubsampling = 4, +}; std::string VPCodecAsString(VideoCodec codec) { switch (codec) { @@ -33,14 +40,7 @@ std::string VPCodecAsString(VideoCodec codec) { } // namespace -VPCodecConfigurationRecord::VPCodecConfigurationRecord() - : profile_(0), - level_(0), - bit_depth_(0), - color_space_(0), - chroma_subsampling_(0), - transfer_function_(0), - video_full_range_flag_(false) {} +VPCodecConfigurationRecord::VPCodecConfigurationRecord() {} VPCodecConfigurationRecord::VPCodecConfigurationRecord( uint8_t profile, @@ -58,12 +58,26 @@ VPCodecConfigurationRecord::VPCodecConfigurationRecord( chroma_subsampling_(chroma_subsampling), transfer_function_(transfer_function), video_full_range_flag_(video_full_range_flag), + profile_is_set_(true), + level_is_set_(true), + bit_depth_is_set_(true), + color_space_is_set_(true), + chroma_subsampling_is_set_(true), + transfer_function_is_set_(true), + video_full_range_flag_is_set_(true), codec_initialization_data_(codec_initialization_data) {} VPCodecConfigurationRecord::~VPCodecConfigurationRecord(){}; -bool VPCodecConfigurationRecord::Parse(const std::vector& data) { +bool VPCodecConfigurationRecord::ParseMP4(const std::vector& data) { BitReader reader(data.data(), data.size()); + profile_is_set_ = true; + level_is_set_ = true; + bit_depth_is_set_ = true; + color_space_is_set_ = true; + chroma_subsampling_is_set_ = true; + transfer_function_is_set_ = true; + video_full_range_flag_is_set_ = true; RCHECK(reader.ReadBits(8, &profile_)); RCHECK(reader.ReadBits(8, &level_)); RCHECK(reader.ReadBits(4, &bit_depth_)); @@ -81,7 +95,47 @@ bool VPCodecConfigurationRecord::Parse(const std::vector& data) { return true; } -void VPCodecConfigurationRecord::Write(std::vector* data) const { +bool VPCodecConfigurationRecord::ParseWebM(const std::vector& data) { + BufferReader reader(data.data(), data.size()); + + while (reader.HasBytes(1)) { + uint8_t id; + uint8_t size; + RCHECK(reader.Read1(&id)); + RCHECK(reader.Read1(&size)); + + switch (id) { + case kFeatureProfile: + RCHECK(size == 1); + RCHECK(reader.Read1(&profile_)); + profile_is_set_ = true; + break; + case kFeatureLevel: + RCHECK(size == 1); + RCHECK(reader.Read1(&level_)); + level_is_set_ = true; + break; + case kFeatureBitDepth: + RCHECK(size == 1); + RCHECK(reader.Read1(&bit_depth_)); + bit_depth_is_set_ = true; + break; + case kFeatureChromaSubsampling: + RCHECK(size == 1); + RCHECK(reader.Read1(&chroma_subsampling_)); + chroma_subsampling_is_set_ = true; + break; + default: { + LOG(WARNING) << "Skipping unknown VP9 codec feature " << id; + RCHECK(reader.SkipBytes(size)); + } + } + } + + return true; +} + +void VPCodecConfigurationRecord::WriteMP4(std::vector* data) const { BufferWriter writer; writer.AppendInt(profile_); writer.AppendInt(level_); @@ -96,6 +150,36 @@ void VPCodecConfigurationRecord::Write(std::vector* data) const { writer.SwapBuffer(data); } +void VPCodecConfigurationRecord::WriteWebM(std::vector* data) const { + BufferWriter writer; + + writer.AppendInt(static_cast(kFeatureProfile)); // ID = 1 + writer.AppendInt(static_cast(1)); // Length = 1 + writer.AppendInt(static_cast(profile_)); + + if (level_ != 0) { + writer.AppendInt(static_cast(kFeatureLevel)); // ID = 2 + writer.AppendInt(static_cast(1)); // Length = 1 + writer.AppendInt(static_cast(level_)); + } + + writer.AppendInt(static_cast(kFeatureBitDepth)); // ID = 3 + writer.AppendInt(static_cast(1)); // Length = 1 + writer.AppendInt(static_cast(bit_depth_)); + + // WebM doesn't differentiate whether it is vertical or collocated with luma + // for 4:2:0. + const uint8_t subsampling = + chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA + ? CHROMA_420_VERTICAL + : chroma_subsampling_; + writer.AppendInt(static_cast(kFeatureChromaSubsampling)); // ID = 4 + writer.AppendInt(static_cast(1)); // Length = 1 + writer.AppendInt(subsampling); + + writer.SwapBuffer(data); +} + std::string VPCodecConfigurationRecord::GetCodecString(VideoCodec codec) const { const std::string fields[] = { base::IntToString(profile_), @@ -117,5 +201,75 @@ std::string VPCodecConfigurationRecord::GetCodecString(VideoCodec codec) const { return codec_string; } +void VPCodecConfigurationRecord::MergeFrom( + const VPCodecConfigurationRecord& other) { + if (!profile_is_set_ || other.profile_is_set_) { + profile_ = other.profile(); + profile_is_set_ = true; + } + if (!level_is_set_ || other.level_is_set_) { + if (level_is_set_ && other.level() != level_) { + LOG(WARNING) << "VPx level is inconsistent, " << level_ << " vs " + << other.level(); + } + level_ = other.level(); + level_is_set_ = true; + } + if (!bit_depth_is_set_ || other.bit_depth_is_set_) { + if (bit_depth_is_set_ && bit_depth_ != other.bit_depth()) { + LOG(WARNING) << "VPx bit depth is inconsistent, " << bit_depth_ << " vs " + << other.bit_depth(); + } + bit_depth_ = other.bit_depth(); + bit_depth_is_set_ = true; + } + if (!color_space_is_set_ || other.color_space_is_set_) { + if (color_space_is_set_ && color_space_ != other.color_space()) { + LOG(WARNING) << "VPx color space is inconsistent, " << color_space_ + << " vs " << other.color_space(); + } + color_space_ = other.color_space(); + color_space_is_set_ = true; + } + if (!chroma_subsampling_is_set_ || other.chroma_subsampling_is_set_) { + if (chroma_subsampling_is_set_ && + chroma_subsampling_ != other.chroma_subsampling_) { + LOG(WARNING) << "VPx chroma subsampling is inconsistent, " + << chroma_subsampling_ << " vs " + << other.chroma_subsampling(); + } + chroma_subsampling_ = other.chroma_subsampling(); + chroma_subsampling_is_set_ = true; + } + if (!transfer_function_is_set_ || other.transfer_function_is_set_) { + if (transfer_function_is_set_ && + transfer_function_ != other.transfer_function_) { + LOG(WARNING) << "VPx transfer function is inconsistent, " + << transfer_function_ << " vs " + << other.transfer_function(); + } + transfer_function_ = other.transfer_function(); + transfer_function_is_set_ = true; + } + if (!video_full_range_flag_is_set_ || other.video_full_range_flag_is_set_) { + if (video_full_range_flag_is_set_ && + video_full_range_flag_ != other.video_full_range_flag_) { + LOG(WARNING) << "VPx video full-range flag is inconsistent, " + << video_full_range_flag_<< " vs " + << other.video_full_range_flag(); + } + video_full_range_flag_ = other.video_full_range_flag(); + video_full_range_flag_is_set_ = true; + } + if (codec_initialization_data_.empty() || + !other.codec_initialization_data_.empty()) { + if (!codec_initialization_data_.empty() && + codec_initialization_data_ != other.codec_initialization_data_) { + LOG(WARNING) << "VPx codec initialization data is inconsistent"; + } + codec_initialization_data_ = other.codec_initialization_data_; + } +} + } // namespace media } // namespace shaka diff --git a/packager/media/codecs/vp_codec_configuration_record.h b/packager/media/codecs/vp_codec_configuration_record.h index fc6363a284..b298bdd9d1 100644 --- a/packager/media/codecs/vp_codec_configuration_record.h +++ b/packager/media/codecs/vp_codec_configuration_record.h @@ -51,26 +51,52 @@ class VPCodecConfigurationRecord { const std::vector& codec_initialization_data); ~VPCodecConfigurationRecord(); - /// Parses input to extract VP codec configuration record. + /// Parses input (in MP4 format) to extract VP codec configuration record. /// @return false if there is parsing errors. - bool Parse(const std::vector& data); + bool ParseMP4(const std::vector& data); + + /// Parses input (in WebM format) to extract VP codec configuration record. + /// @return false if there is parsing errors. + bool ParseWebM(const std::vector& data); /// @param data should not be null. - /// Writes VP codec configuration record to buffer. - void Write(std::vector* data) const; + /// Writes VP codec configuration record to buffer using MP4 format. + void WriteMP4(std::vector* data) const; + + /// @param data should not be null. + /// Writes VP codec configuration record to buffer using WebM format. + void WriteWebM(std::vector* data) const; /// @return The codec string. std::string GetCodecString(VideoCodec codec) const; - void set_profile(uint8_t profile) { profile_ = profile; } - void set_level(uint8_t level) { level_ = level; } - void set_bit_depth(uint8_t bit_depth) { bit_depth_ = bit_depth; } - void set_color_space(uint8_t color_space) { color_space_ = color_space; } + // Merges the values from the given configuration. If there are values in + // both |*this| and |other|, the values in |other| take precedence. + void MergeFrom(const VPCodecConfigurationRecord& other); + + void set_profile(uint8_t profile) { + profile_ = profile; + profile_is_set_ = true; + } + void set_level(uint8_t level) { + level_ = level; + level_is_set_ = true; + } + void set_bit_depth(uint8_t bit_depth) { + bit_depth_ = bit_depth; + bit_depth_is_set_ = true; + } + void set_color_space(uint8_t color_space) { + color_space_ = color_space; + color_space_is_set_ = true; + } void set_chroma_subsampling(uint8_t chroma_subsampling) { chroma_subsampling_ = chroma_subsampling; + chroma_subsampling_is_set_ = true; } void set_transfer_function(uint8_t transfer_function) { transfer_function_ = transfer_function; + transfer_function_is_set_ = true; } void set_video_full_range_flag(bool video_full_range_flag) { video_full_range_flag_ = video_full_range_flag; @@ -85,13 +111,20 @@ class VPCodecConfigurationRecord { bool video_full_range_flag() const { return video_full_range_flag_; } private: - uint8_t profile_; - uint8_t level_; - uint8_t bit_depth_; - uint8_t color_space_; - uint8_t chroma_subsampling_; - uint8_t transfer_function_; - bool video_full_range_flag_; + uint8_t profile_ = 0; + uint8_t level_ = 0; + uint8_t bit_depth_ = 0; + uint8_t color_space_ = 0; + uint8_t chroma_subsampling_ = 0; + uint8_t transfer_function_ = 0; + bool video_full_range_flag_ = false; + bool profile_is_set_ = false; + bool level_is_set_ = false; + bool bit_depth_is_set_ = false; + bool color_space_is_set_ = false; + bool chroma_subsampling_is_set_ = false; + bool transfer_function_is_set_ = false; + bool video_full_range_flag_is_set_ = false; std::vector codec_initialization_data_; // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler diff --git a/packager/media/codecs/vp_codec_configuration_record_unittest.cc b/packager/media/codecs/vp_codec_configuration_record_unittest.cc index caea6993cd..2dac9227d9 100644 --- a/packager/media/codecs/vp_codec_configuration_record_unittest.cc +++ b/packager/media/codecs/vp_codec_configuration_record_unittest.cc @@ -17,7 +17,7 @@ TEST(VPCodecConfigurationRecordTest, Parse) { }; VPCodecConfigurationRecord vp_config; - ASSERT_TRUE(vp_config.Parse(std::vector( + ASSERT_TRUE(vp_config.ParseMP4(std::vector( kVpCodecConfigurationData, kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData)))); @@ -38,19 +38,38 @@ TEST(VPCodecConfigurationRecordTest, ParseWithInsufficientData) { }; VPCodecConfigurationRecord vp_config; - ASSERT_FALSE(vp_config.Parse(std::vector( + ASSERT_FALSE(vp_config.ParseMP4(std::vector( kVpCodecConfigurationData, kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData)))); } -TEST(VPCodecConfigurationRecordTest, Write) { +TEST(VPCodecConfigurationRecordTest, WriteMP4) { const uint8_t kExpectedVpCodecConfigurationData[] = { 0x02, 0x01, 0x80, 0x21, 0x00, 0x00, }; VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x02, 0x00, true, std::vector()); std::vector data; - vp_config.Write(&data); + vp_config.WriteMP4(&data); + + EXPECT_EQ( + std::vector(kExpectedVpCodecConfigurationData, + kExpectedVpCodecConfigurationData + + arraysize(kExpectedVpCodecConfigurationData)), + data); +} + +TEST(VPCodecConfigurationRecordTest, WriteWebM) { + const uint8_t kExpectedVpCodecConfigurationData[] = { + 0x01, 0x01, 0x02, + 0x02, 0x01, 0x01, + 0x03, 0x01, 0x08, + 0x04, 0x01, 0x03 + }; + VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x03, 0x00, true, + std::vector()); + std::vector data; + vp_config.WriteWebM(&data); EXPECT_EQ( std::vector(kExpectedVpCodecConfigurationData, diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 75b73bf824..5f1dbbec4a 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -562,7 +562,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_vp09: case FOURCC_vp10: { VPCodecConfigurationRecord vp_config; - if (!vp_config.Parse(entry.codec_configuration.data)) { + if (!vp_config.ParseMP4(entry.codec_configuration.data)) { LOG(ERROR) << "Failed to parse vpcc."; return false; } diff --git a/packager/media/formats/webm/segmenter.cc b/packager/media/formats/webm/segmenter.cc index b6a231239d..8f63e7f97d 100644 --- a/packager/media/formats/webm/segmenter.cc +++ b/packager/media/formats/webm/segmenter.cc @@ -14,6 +14,7 @@ #include "packager/media/base/muxer_util.h" #include "packager/media/base/stream_info.h" #include "packager/media/base/video_stream_info.h" +#include "packager/media/codecs/vp_codec_configuration_record.h" #include "packager/media/event/muxer_listener.h" #include "packager/media/event/progress_listener.h" #include "packager/third_party/libwebm/src/mkvmuxerutil.hpp" @@ -271,8 +272,23 @@ Status Segmenter::CreateVideoTrack(VideoStreamInfo* info) { track->set_codec_id(mkvmuxer::Tracks::kVp8CodecId); } else if (info->codec() == kCodecVP9) { track->set_codec_id(mkvmuxer::Tracks::kVp9CodecId); + + // The |StreamInfo::extra_data| field is stored using the MP4 format; we + // need to convert it to the WebM format. + VPCodecConfigurationRecord vp_config; + if (!vp_config.ParseMP4(info->extra_data())) { + return Status(error::INTERNAL_ERROR, + "Unable to parse VP9 codec configuration"); + } + + std::vector extra_data; + vp_config.WriteWebM(&extra_data); + if (!track->SetCodecPrivate(extra_data.data(), extra_data.size())) { + return Status(error::INTERNAL_ERROR, + "Private codec data required for VP9 streams"); + } } else { - LOG(ERROR) << "Only VP8 and VP9 video codec is supported."; + LOG(ERROR) << "Only VP8 and VP9 video codecs are supported."; return Status(error::UNIMPLEMENTED, "Only VP8 and VP9 video codecs are supported."); } diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc index fdfb2e74fa..946c6a3aa9 100644 --- a/packager/media/formats/webm/webm_cluster_parser.cc +++ b/packager/media/formats/webm/webm_cluster_parser.cc @@ -456,12 +456,15 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, return false; } - const VPCodecConfigurationRecord* codec_config = - &vpx_parser->codec_config(); + VPCodecConfigurationRecord codec_config; + if (!video_stream_info_->extra_data().empty()) + codec_config.ParseMP4(video_stream_info_->extra_data()); + codec_config.MergeFrom(vpx_parser->codec_config()); + video_stream_info_->set_codec_string( - codec_config->GetCodecString(video_stream_info_->codec())); + codec_config.GetCodecString(video_stream_info_->codec())); std::vector extra_data; - codec_config->Write(&extra_data); + codec_config.WriteMP4(&extra_data); video_stream_info_->set_extra_data(extra_data); streams.push_back(video_stream_info_); init_cb_.Run(streams); diff --git a/packager/media/formats/webm/webm_video_client.cc b/packager/media/formats/webm/webm_video_client.cc index dc832e23c1..200442f90e 100644 --- a/packager/media/formats/webm/webm_video_client.cc +++ b/packager/media/formats/webm/webm_video_client.cc @@ -6,6 +6,7 @@ #include "packager/base/logging.h" #include "packager/base/stl_util.h" +#include "packager/media/codecs/vp_codec_configuration_record.h" #include "packager/media/formats/webm/webm_constants.h" namespace { @@ -50,13 +51,22 @@ void WebMVideoClient::Reset() { scoped_refptr WebMVideoClient::GetVideoStreamInfo( int64_t track_num, const std::string& codec_id, - const std::vector& codec_private, + const std::vector& codec_private_in, bool is_encrypted) { + std::vector codec_private = codec_private_in; VideoCodec video_codec = kUnknownVideoCodec; if (codec_id == "V_VP8") { video_codec = kCodecVP8; } else if (codec_id == "V_VP9") { video_codec = kCodecVP9; + + // Need to parse and convert the codec private data to MP4 format. + VPCodecConfigurationRecord vp_config; + if (!vp_config.ParseWebM(codec_private)) { + LOG(ERROR) << "Unable to parse VP9 codec configuration"; + return scoped_refptr(); + } + vp_config.WriteMP4(&codec_private); } else if (codec_id == "V_VP10") { video_codec = kCodecVP10; } else { @@ -107,8 +117,8 @@ scoped_refptr WebMVideoClient::GetVideoStreamInfo( return scoped_refptr(new VideoStreamInfo( track_num, kWebMTimeScale, 0, video_codec, std::string(), std::string(), - width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, NULL, 0, - is_encrypted)); + width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, + codec_private.data(), codec_private.size(), is_encrypted)); } bool WebMVideoClient::OnUInt(int id, int64_t val) {