diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index c7c5c0ad1f..6ebce76ce8 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -252,22 +252,22 @@ class PackagerAppTest(unittest.TestCase): self.packager.Package( self._GetStreams(['video'], output_format='webm', - test_files=['bear-640x360-vp9-altref.webm']), + test_files=['bear-320x180-vp9-altref.webm']), self._GetFlags(encryption=True)) - self._DiffGold(self.output[0], 'bear-640x360-vp9-altref-enc-golden.webm') + self._DiffGold(self.output[0], 'bear-320x180-vp9-altref-enc-golden.webm') self._VerifyDecryption(self.output[0], - 'bear-640x360-vp9-altref-dec-golden.webm') + 'bear-320x180-vp9-altref-dec-golden.webm') def testPackageWithWebmVp9FullSampleEncryption(self): self.packager.Package( self._GetStreams(['video'], output_format='webm', - test_files=['bear-640x360-vp9-altref.webm']), + test_files=['bear-320x180-vp9-altref.webm']), self._GetFlags(encryption=True, vp9_subsample_encryption=False)) self._DiffGold(self.output[0], - 'bear-640x360-vp9-fullsample-enc-golden.webm') + 'bear-320x180-vp9-fullsample-enc-golden.webm') self._VerifyDecryption(self.output[0], - 'bear-640x360-vp9-altref-dec-golden.webm') + 'bear-320x180-vp9-altref-dec-golden.webm') def testPackageAvcTsWithEncryption(self): # Currently we only support live packaging for ts. diff --git a/packager/app/test/testdata/bear-320x180-vp9-altref-dec-golden.webm b/packager/app/test/testdata/bear-320x180-vp9-altref-dec-golden.webm new file mode 100644 index 0000000000..f53aee8719 Binary files /dev/null and b/packager/app/test/testdata/bear-320x180-vp9-altref-dec-golden.webm differ diff --git a/packager/app/test/testdata/bear-320x180-vp9-altref-enc-golden.webm b/packager/app/test/testdata/bear-320x180-vp9-altref-enc-golden.webm new file mode 100644 index 0000000000..85a7f30c15 Binary files /dev/null and b/packager/app/test/testdata/bear-320x180-vp9-altref-enc-golden.webm differ diff --git a/packager/app/test/testdata/bear-320x180-vp9-fullsample-enc-golden.webm b/packager/app/test/testdata/bear-320x180-vp9-fullsample-enc-golden.webm new file mode 100644 index 0000000000..85a7f30c15 Binary files /dev/null and b/packager/app/test/testdata/bear-320x180-vp9-fullsample-enc-golden.webm differ diff --git a/packager/app/test/testdata/bear-640x360-vp9-altref-dec-golden.webm b/packager/app/test/testdata/bear-640x360-vp9-altref-dec-golden.webm deleted file mode 100644 index ef57017720..0000000000 Binary files a/packager/app/test/testdata/bear-640x360-vp9-altref-dec-golden.webm and /dev/null differ diff --git a/packager/app/test/testdata/bear-640x360-vp9-altref-enc-golden.webm b/packager/app/test/testdata/bear-640x360-vp9-altref-enc-golden.webm deleted file mode 100644 index 997c263a72..0000000000 Binary files a/packager/app/test/testdata/bear-640x360-vp9-altref-enc-golden.webm and /dev/null differ diff --git a/packager/app/test/testdata/bear-640x360-vp9-fullsample-enc-golden.webm b/packager/app/test/testdata/bear-640x360-vp9-fullsample-enc-golden.webm deleted file mode 100644 index 83046b2036..0000000000 Binary files a/packager/app/test/testdata/bear-640x360-vp9-fullsample-enc-golden.webm and /dev/null differ diff --git a/packager/media/codecs/vp8_parser.cc b/packager/media/codecs/vp8_parser.cc index 6a7b0f4c8b..430fac4f23 100644 --- a/packager/media/codecs/vp8_parser.cc +++ b/packager/media/codecs/vp8_parser.cc @@ -152,7 +152,7 @@ bool VP8Parser::Parse(const uint8_t* data, // VP8 uses an 8-bit YUV 4:2:0 format. // http://tools.ietf.org/html/rfc6386 Section 2. writable_codec_config()->set_bit_depth(8); - writable_codec_config()->set_chroma_subsampling( + writable_codec_config()->SetChromaSubsampling( VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA); VPxFrameInfo vpx_frame; diff --git a/packager/media/codecs/vp9_parser.cc b/packager/media/codecs/vp9_parser.cc index cacf6d6232..006096ffc9 100644 --- a/packager/media/codecs/vp9_parser.cc +++ b/packager/media/codecs/vp9_parser.cc @@ -269,7 +269,7 @@ bool ReadBitDepthAndColorSpace(BitReader* reader, if (chroma_subsampling == VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA) { LOG(ERROR) << "4:2:0 color not supported in profile " - << codec_config->profile(); + << static_cast(codec_config->profile()); return false; } @@ -293,7 +293,7 @@ bool ReadBitDepthAndColorSpace(BitReader* reader, } } codec_config->set_video_full_range_flag(yuv_full_range); - codec_config->set_chroma_subsampling(chroma_subsampling); + codec_config->SetChromaSubsampling(chroma_subsampling); VLOG(3) << "\n profile " << static_cast(codec_config->profile()) << "\n bit depth " << static_cast(codec_config->bit_depth()) @@ -511,7 +511,7 @@ bool VP9Parser::Parse(const uint8_t* data, // specification of either the color format or color sub-sampling in // profile 0. VP9 specifies that the default color format should be // YUV 4:2:0 in this case (normative). - writable_codec_config()->set_chroma_subsampling( + writable_codec_config()->SetChromaSubsampling( VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA); writable_codec_config()->set_bit_depth(8); } diff --git a/packager/media/codecs/vp_codec_configuration_record.cc b/packager/media/codecs/vp_codec_configuration_record.cc index f4803c4294..9a8fc28c2a 100644 --- a/packager/media/codecs/vp_codec_configuration_record.cc +++ b/packager/media/codecs/vp_codec_configuration_record.cc @@ -112,6 +112,7 @@ bool VPCodecConfigurationRecord::ParseMP4(const std::vector& data) { return true; } +// http://wiki.webmproject.org/vp9-codecprivate bool VPCodecConfigurationRecord::ParseWebM(const std::vector& data) { BufferReader reader(data.data(), data.size()); @@ -246,6 +247,70 @@ void VPCodecConfigurationRecord::MergeFrom( } codec_initialization_data_ = other.codec_initialization_data_; } + + MergeField("chroma location", other.chroma_location_, &chroma_location_); + UpdateChromaSubsamplingIfNeeded(); +} + +void VPCodecConfigurationRecord::SetChromaSubsampling(uint8_t subsampling_x, + uint8_t subsampling_y) { + VLOG(3) << "Set Chroma subsampling " << static_cast(subsampling_x) << " " + << static_cast(subsampling_y); + if (subsampling_x == 0 && subsampling_y == 0) { + chroma_subsampling_ = CHROMA_444; + } else if (subsampling_x == 0 && subsampling_y == 1) { + chroma_subsampling_ = CHROMA_440; + } else if (subsampling_x == 1 && subsampling_y == 0) { + chroma_subsampling_ = CHROMA_422; + } else if (subsampling_x == 1 && subsampling_y == 1) { + // VP9 assumes that chrome samples are collocated with luma samples if + // there is no explicit signaling outside of VP9 bitstream. + chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA; + } else { + LOG(WARNING) << "Unexpected chroma subsampling values: " + << static_cast(subsampling_x) << " " + << static_cast(subsampling_y); + } + UpdateChromaSubsamplingIfNeeded(); +} + +void VPCodecConfigurationRecord::SetChromaSubsampling( + ChromaSubsampling chroma_subsampling) { + chroma_subsampling_ = chroma_subsampling; + UpdateChromaSubsamplingIfNeeded(); +} + +void VPCodecConfigurationRecord::SetChromaLocation(uint8_t chroma_siting_x, + uint8_t chroma_siting_y) { + VLOG(3) << "Set Chroma Location " << static_cast(chroma_siting_x) << " " + << static_cast(chroma_siting_y); + if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kTopCollocated) { + chroma_location_ = AVCHROMA_LOC_TOPLEFT; + } else if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kHalf) { + chroma_location_ = AVCHROMA_LOC_LEFT; + } else if (chroma_siting_x == kHalf && chroma_siting_y == kTopCollocated) { + chroma_location_ = AVCHROMA_LOC_TOP; + } else if (chroma_siting_x == kHalf && chroma_siting_y == kHalf) { + chroma_location_ = AVCHROMA_LOC_CENTER; + } else { + LOG(WARNING) << "Unexpected chroma siting values: " + << static_cast(chroma_siting_x) << " " + << static_cast(chroma_siting_y); + } + UpdateChromaSubsamplingIfNeeded(); +} + +void VPCodecConfigurationRecord::UpdateChromaSubsamplingIfNeeded() { + // Use chroma location to fix the chroma subsampling format. + if (chroma_location_ && chroma_subsampling_ && + (*chroma_subsampling_ == CHROMA_420_VERTICAL || + *chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA)) { + if (*chroma_location_ == AVCHROMA_LOC_TOPLEFT) + chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA; + else if (*chroma_location_ == AVCHROMA_LOC_LEFT) + chroma_subsampling_ = CHROMA_420_VERTICAL; + VLOG(3) << "Chroma subsampling " << static_cast(*chroma_subsampling_); + } } } // namespace media diff --git a/packager/media/codecs/vp_codec_configuration_record.h b/packager/media/codecs/vp_codec_configuration_record.h index 91aa7075be..9433c42a16 100644 --- a/packager/media/codecs/vp_codec_configuration_record.h +++ b/packager/media/codecs/vp_codec_configuration_record.h @@ -121,6 +121,34 @@ enum AVColorSpace { AVCOL_SPC_NB }; +/// Location of chroma samples. +/// +/// Illustration showing the location of the first (top left) chroma sample of +/// the image, the left shows only luma, the right shows the location of the +/// chroma sample, the 2 could be imagined to overlay each other but are drawn +/// separately due to limitations of ASCII +/// +/// 1st 2nd 1st 2nd horizontal luma sample positions +/// v v v v +/// ______ ______ +/// 1st luma line > |X X ... |3 4 X ... X are luma samples, +/// | |1 2 1-6 are possible chroma positions +/// 2nd luma line > |X X ... |5 6 X ... 0 is undefined/unknown position +enum AVChromaLocation { + AVCHROMA_LOC_UNSPECIFIED = 0, + /// MPEG-2/4 4:2:0, H.264 default for 4:2:0 + AVCHROMA_LOC_LEFT = 1, + /// MPEG-1 4:2:0, JPEG 4:2:0, H.263 4:2:0 + AVCHROMA_LOC_CENTER = 2, + /// ITU-R 601, SMPTE 274M 296M S314M(DV 4:1:1), mpeg2 4:2:2 + AVCHROMA_LOC_TOPLEFT = 3, + AVCHROMA_LOC_TOP = 4, + AVCHROMA_LOC_BOTTOMLEFT = 5, + AVCHROMA_LOC_BOTTOM = 6, + /// Not part of ABI + AVCHROMA_LOC_NB +}; + /// Class for parsing or writing VP codec configuration record. class VPCodecConfigurationRecord { public: @@ -131,6 +159,12 @@ class VPCodecConfigurationRecord { CHROMA_444 = 3, CHROMA_440 = 4, }; + enum ChromaSitingValues { + kUnspecified = 0, + kLeftCollocated = 1, + kTopCollocated = kLeftCollocated, + kHalf = 2, + }; VPCodecConfigurationRecord(); VPCodecConfigurationRecord( @@ -164,16 +198,17 @@ class VPCodecConfigurationRecord { /// @return The codec string. std::string GetCodecString(Codec codec) const; - // Merges the values from the given configuration. If there are values in - // both |*this| and |other|, the values in |other| take precedence. + /// Merges the values from the given configuration. If there are values in + /// both |*this| and |other|, |*this| is not updated. void MergeFrom(const VPCodecConfigurationRecord& other); + void SetChromaSubsampling(uint8_t subsampling_x, uint8_t subsampling_y); + void SetChromaSubsampling(ChromaSubsampling chroma_subsampling); + void SetChromaLocation(uint8_t chroma_siting_x, uint8_t chroma_siting_y); + void set_profile(uint8_t profile) { profile_ = profile; } void set_level(uint8_t level) { level_ = level; } void set_bit_depth(uint8_t bit_depth) { bit_depth_ = bit_depth; } - void set_chroma_subsampling(uint8_t chroma_subsampling) { - chroma_subsampling_ = chroma_subsampling; - } void set_video_full_range_flag(bool video_full_range_flag) { video_full_range_flag_ = video_full_range_flag; } @@ -187,6 +222,28 @@ class VPCodecConfigurationRecord { matrix_coefficients_ = matrix_coefficients; } + bool is_profile_set() const { return static_cast(profile_); } + bool is_level_set() const { return static_cast(level_); } + bool is_bit_depth_set() const { return static_cast(bit_depth_); } + bool is_chroma_subsampling_set() const { + return static_cast(chroma_subsampling_); + } + bool is_video_full_range_flag_set() const { + return static_cast(video_full_range_flag_); + } + bool is_color_primaries_set() const { + return static_cast(color_primaries_); + } + bool is_transfer_characteristics_set() const { + return static_cast(transfer_characteristics_); + } + bool is_matrix_coefficients_set() const { + return static_cast(matrix_coefficients_); + } + bool is_chroma_location_set() const { + return static_cast(chroma_location_); + } + uint8_t profile() const { return profile_.value_or(0); } uint8_t level() const { return level_.value_or(10); } uint8_t bit_depth() const { return bit_depth_.value_or(8); } @@ -205,8 +262,13 @@ class VPCodecConfigurationRecord { uint8_t matrix_coefficients() const { return matrix_coefficients_.value_or(AVCOL_SPC_UNSPECIFIED); } + uint8_t chroma_location() const { + return chroma_location_ ? *chroma_location_ : AVCHROMA_LOC_UNSPECIFIED; + } private: + void UpdateChromaSubsamplingIfNeeded(); + base::Optional profile_; base::Optional level_; base::Optional bit_depth_; @@ -217,6 +279,9 @@ class VPCodecConfigurationRecord { base::Optional matrix_coefficients_; std::vector codec_initialization_data_; + // Not in the decoder config. It is there to help determine chroma subsampling + // format. + base::Optional chroma_location_; // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler // generated copy constructor and assignment operator. Since the internal data // is small, the performance impact is minimal. diff --git a/packager/media/codecs/vp_codec_configuration_record_unittest.cc b/packager/media/codecs/vp_codec_configuration_record_unittest.cc index dea8998052..a8f41a747b 100644 --- a/packager/media/codecs/vp_codec_configuration_record_unittest.cc +++ b/packager/media/codecs/vp_codec_configuration_record_unittest.cc @@ -76,5 +76,90 @@ TEST(VPCodecConfigurationRecordTest, WriteWebM) { data); } +TEST(VPCodecConfigurationRecordTest, SetAttributes) { + VPCodecConfigurationRecord vp_config; + // None of the members are set. + EXPECT_FALSE(vp_config.is_profile_set()); + EXPECT_FALSE(vp_config.is_level_set()); + EXPECT_FALSE(vp_config.is_bit_depth_set()); + EXPECT_FALSE(vp_config.is_chroma_subsampling_set()); + EXPECT_FALSE(vp_config.is_video_full_range_flag_set()); + EXPECT_FALSE(vp_config.is_color_primaries_set()); + EXPECT_FALSE(vp_config.is_transfer_characteristics_set()); + EXPECT_FALSE(vp_config.is_matrix_coefficients_set()); + + const uint8_t kProfile = 2; + vp_config.set_profile(kProfile); + EXPECT_TRUE(vp_config.is_profile_set()); + EXPECT_EQ(kProfile, vp_config.profile()); +} + +TEST(VPCodecConfigurationRecordTest, SetChromaSubsampling) { + VPCodecConfigurationRecord vp_config; + vp_config.SetChromaSubsampling(1, 1); + EXPECT_TRUE(vp_config.is_chroma_subsampling_set()); + EXPECT_FALSE(vp_config.is_chroma_location_set()); + EXPECT_EQ(VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA, + vp_config.chroma_subsampling()); + + vp_config.SetChromaLocation(VPCodecConfigurationRecord::kLeftCollocated, + VPCodecConfigurationRecord::kHalf); + EXPECT_TRUE(vp_config.is_chroma_location_set()); + EXPECT_EQ(VPCodecConfigurationRecord::CHROMA_420_VERTICAL, + vp_config.chroma_subsampling()); +} + +TEST(VPCodecConfigurationRecordTest, Merge) { + const uint8_t kProfile = 2; + const uint8_t kLevel = 20; + + VPCodecConfigurationRecord vp_config; + vp_config.set_profile(kProfile); + + VPCodecConfigurationRecord vp_config2; + vp_config2.set_profile(kProfile - 1); + vp_config2.set_level(kLevel); + + vp_config.MergeFrom(vp_config2); + EXPECT_TRUE(vp_config.is_profile_set()); + EXPECT_TRUE(vp_config.is_level_set()); + EXPECT_FALSE(vp_config.is_bit_depth_set()); + EXPECT_FALSE(vp_config.is_chroma_subsampling_set()); + EXPECT_FALSE(vp_config.is_video_full_range_flag_set()); + EXPECT_FALSE(vp_config.is_color_primaries_set()); + EXPECT_FALSE(vp_config.is_transfer_characteristics_set()); + EXPECT_FALSE(vp_config.is_matrix_coefficients_set()); + + // Profile is set in the original config, so not changed. + EXPECT_EQ(kProfile, vp_config.profile()); + // Merge level from the other config. + EXPECT_EQ(kLevel, vp_config.level()); +} + +TEST(VPCodecConfigurationRecordTest, MergeChromaSubsampling) { + VPCodecConfigurationRecord vp_config; + vp_config.SetChromaSubsampling( + VPCodecConfigurationRecord::CHROMA_420_VERTICAL); + + VPCodecConfigurationRecord vp_config2; + vp_config2.SetChromaLocation(VPCodecConfigurationRecord::kLeftCollocated, + VPCodecConfigurationRecord::kTopCollocated); + + vp_config.MergeFrom(vp_config2); + EXPECT_FALSE(vp_config.is_profile_set()); + EXPECT_FALSE(vp_config.is_level_set()); + EXPECT_FALSE(vp_config.is_bit_depth_set()); + EXPECT_TRUE(vp_config.is_chroma_subsampling_set()); + EXPECT_TRUE(vp_config.is_chroma_location_set()); + EXPECT_FALSE(vp_config.is_video_full_range_flag_set()); + EXPECT_FALSE(vp_config.is_color_primaries_set()); + EXPECT_FALSE(vp_config.is_transfer_characteristics_set()); + EXPECT_FALSE(vp_config.is_matrix_coefficients_set()); + + EXPECT_EQ(VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA, + vp_config.chroma_subsampling()); + EXPECT_EQ(AVCHROMA_LOC_TOPLEFT, vp_config.chroma_location()); +} + } // namespace media } // namespace shaka diff --git a/packager/media/formats/webm/segmenter.cc b/packager/media/formats/webm/segmenter.cc index 5b7721aae3..f04de8db8f 100644 --- a/packager/media/formats/webm/segmenter.cc +++ b/packager/media/formats/webm/segmenter.cc @@ -255,11 +255,26 @@ Status Segmenter::InitializeVideoTrack(const VideoStreamInfo* info, "Unable to parse VP9 codec configuration"); } + mkvmuxer::Colour colour; + if (vp_config.matrix_coefficients() != AVCOL_SPC_UNSPECIFIED) { + colour.set_matrix_coefficients(vp_config.matrix_coefficients()); + } + if (vp_config.transfer_characteristics() != AVCOL_TRC_UNSPECIFIED) { + colour.set_transfer_characteristics(vp_config.transfer_characteristics()); + } + if (vp_config.color_primaries() != AVCOL_PRI_UNSPECIFIED) { + colour.set_primaries(vp_config.color_primaries()); + } + if (!track->SetColour(colour)) { + return Status(error::INTERNAL_ERROR, + "Failed to setup color element for VPx streams"); + } + std::vector codec_config; vp_config.WriteWebM(&codec_config); if (!track->SetCodecPrivate(codec_config.data(), codec_config.size())) { return Status(error::INTERNAL_ERROR, - "Private codec data required for VP9 streams"); + "Private codec data required for VPx streams"); } } else { LOG(ERROR) << "Only VP8 and VP9 video codecs are supported."; diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc index 9126435488..023c6526da 100644 --- a/packager/media/formats/webm/webm_cluster_parser.cc +++ b/packager/media/formats/webm/webm_cluster_parser.cc @@ -30,6 +30,7 @@ WebMClusterParser::WebMClusterParser( int64_t timecode_scale, std::shared_ptr audio_stream_info, std::shared_ptr video_stream_info, + const VPCodecConfigurationRecord& vp_config, int64_t audio_default_duration, int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, @@ -39,9 +40,11 @@ WebMClusterParser::WebMClusterParser( const MediaParser::NewSampleCB& new_sample_cb, const MediaParser::InitCB& init_cb, KeySource* decryption_key_source) - : timecode_multiplier_(timecode_scale / 1000.0), + : timecode_multiplier_(timecode_scale / + static_cast(kMicrosecondsPerMillisecond)), audio_stream_info_(audio_stream_info), video_stream_info_(video_stream_info), + vp_config_(vp_config), ignored_tracks_(ignored_tracks), audio_encryption_key_id_(audio_encryption_key_id), video_encryption_key_id_(video_encryption_key_id), @@ -441,15 +444,11 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, return false; } - VPCodecConfigurationRecord codec_config; - if (!video_stream_info_->codec_config().empty()) - codec_config.ParseWebM(video_stream_info_->codec_config()); - codec_config.MergeFrom(vpx_parser->codec_config()); - + vp_config_.MergeFrom(vpx_parser->codec_config()); video_stream_info_->set_codec_string( - codec_config.GetCodecString(video_stream_info_->codec())); + vp_config_.GetCodecString(video_stream_info_->codec())); std::vector config_serialized; - codec_config.WriteMP4(&config_serialized); + vp_config_.WriteMP4(&config_serialized); video_stream_info_->set_codec_config(config_serialized); streams.push_back(video_stream_info_); init_cb_.Run(streams); diff --git a/packager/media/formats/webm/webm_cluster_parser.h b/packager/media/formats/webm/webm_cluster_parser.h index 5a1a09229d..47664d38aa 100644 --- a/packager/media/formats/webm/webm_cluster_parser.h +++ b/packager/media/formats/webm/webm_cluster_parser.h @@ -101,6 +101,8 @@ class WebMClusterParser : public WebMParserClient { /// be NULL if there are no audio tracks available. /// @param video_stream_info references video stream information. It will /// be NULL if there are no video tracks available. + /// @param vp_config references vp configuration record. Only useful for + /// video. /// @param audio_default_duration indicates default duration for audio /// samples. /// @param video_default_duration indicates default duration for video @@ -120,6 +122,7 @@ class WebMClusterParser : public WebMParserClient { WebMClusterParser(int64_t timecode_scale, std::shared_ptr audio_stream_info, std::shared_ptr video_stream_info, + const VPCodecConfigurationRecord& vp_config, int64_t audio_default_duration, int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, @@ -188,6 +191,7 @@ class WebMClusterParser : public WebMParserClient { std::shared_ptr audio_stream_info_; std::shared_ptr video_stream_info_; + VPCodecConfigurationRecord vp_config_; std::set ignored_tracks_; std::unique_ptr decryptor_source_; diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc index 249d2953d1..65694f7427 100644 --- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc +++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc @@ -407,8 +407,9 @@ class WebMClusterParserTest : public testing::Test { video_stream_info_->set_codec(video_codec); return new WebMClusterParser( kTimecodeScale, audio_stream_info_, video_stream_info_, - audio_default_duration, video_default_duration, text_tracks, - ignored_tracks, audio_encryption_key_id, video_encryption_key_id, + VPCodecConfigurationRecord(), audio_default_duration, + video_default_duration, text_tracks, ignored_tracks, + audio_encryption_key_id, video_encryption_key_id, base::Bind(&WebMClusterParserTest::NewSampleEvent, base::Unretained(this)), init_cb, &mock_key_source_); diff --git a/packager/media/formats/webm/webm_constants.h b/packager/media/formats/webm/webm_constants.h index 214aeb00f5..154e5b8a0b 100644 --- a/packager/media/formats/webm/webm_constants.h +++ b/packager/media/formats/webm/webm_constants.h @@ -63,6 +63,21 @@ const int kWebMIdCodecID = 0x86; const int kWebMIdCodecName = 0x258688; const int kWebMIdCodecPrivate = 0x63A2; const int kWebMIdCodecState = 0xA4; +const int kWebMIdColor = 0x55B0; +const int kWebMIdColorMatrixCoefficients = 0x55B1; +const int kWebMIdColorBitsPerChannel = 0x55B2; +const int kWebMIdColorChromaSubsamplingHorz = 0x55B3; +const int kWebMIdColorChromaSubsamplingVert = 0x55B4; +const int kWebMIdColorCbSamplingHorz = 0x55B5; +const int kWebMIdColorCbSamplingVert = 0x55B6; +const int kWebMIdColorChromaSitingHorz = 0x55B7; +const int kWebMIdColorChromaSitingVert = 0x55B8; +const int kWebMIdColorRange = 0x55B9; +const int kWebMIdColorTransferCharacteristics = 0x55BA; +const int kWebMIdColorPrimaries = 0x55BB; +const int kWebMIdColorMaxCLL = 0x55BC; +const int kWebMIdColorMaxFALL = 0x55BD; +const int kWebMIdColorMasteringMetadata = 0x55D0; const int kWebMIdColorSpace = 0x2EB524; const int kWebMIdContentCompAlgo = 0x4254; const int kWebMIdContentCompression = 0x5034; diff --git a/packager/media/formats/webm/webm_media_parser.cc b/packager/media/formats/webm/webm_media_parser.cc index be231eb797..fc13148a61 100644 --- a/packager/media/formats/webm/webm_media_parser.cc +++ b/packager/media/formats/webm/webm_media_parser.cc @@ -207,6 +207,7 @@ int WebMMediaParser::ParseInfoAndTracks(const uint8_t* data, int size) { cluster_parser_.reset(new WebMClusterParser( info_parser.timecode_scale(), audio_stream_info, video_stream_info, + tracks_parser.vp_config(), tracks_parser.GetAudioDefaultDuration(timecode_scale_in_us), tracks_parser.GetVideoDefaultDuration(timecode_scale_in_us), tracks_parser.text_tracks(), tracks_parser.ignored_tracks(), diff --git a/packager/media/formats/webm/webm_parser.cc b/packager/media/formats/webm/webm_parser.cc index 27a8db457a..62c66de39c 100644 --- a/packager/media/formats/webm/webm_parser.cc +++ b/packager/media/formats/webm/webm_parser.cc @@ -197,6 +197,24 @@ static const ElementIdInfo kVideoIds[] = { {UINT, kWebMIdAspectRatioType}, {BINARY, kWebMIdColorSpace}, {FLOAT, kWebMIdFrameRate}, + {LIST, kWebMIdColor}, +}; + +static const ElementIdInfo kColorIds[] = { + {UINT, kWebMIdColorMatrixCoefficients}, + {UINT, kWebMIdColorBitsPerChannel}, + {UINT, kWebMIdColorChromaSubsamplingHorz}, + {UINT, kWebMIdColorChromaSubsamplingVert}, + {UINT, kWebMIdColorCbSamplingHorz}, + {UINT, kWebMIdColorCbSamplingVert}, + {UINT, kWebMIdColorChromaSitingHorz}, + {UINT, kWebMIdColorChromaSitingVert}, + {UINT, kWebMIdColorRange}, + {UINT, kWebMIdColorTransferCharacteristics}, + {UINT, kWebMIdColorPrimaries}, + {UINT, kWebMIdColorMaxCLL}, + {UINT, kWebMIdColorMaxFALL}, + {LIST, kWebMIdColorMasteringMetadata}, }; static const ElementIdInfo kAudioIds[] = { @@ -382,6 +400,7 @@ static const ListElementInfo kListElementInfo[] = { LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds), LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds), LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds), + LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds), LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds), LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds), LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds), diff --git a/packager/media/formats/webm/webm_tracks_parser.cc b/packager/media/formats/webm/webm_tracks_parser.cc index f496f0b6e2..246bcb1513 100644 --- a/packager/media/formats/webm/webm_tracks_parser.cc +++ b/packager/media/formats/webm/webm_tracks_parser.cc @@ -224,9 +224,9 @@ bool WebMTracksParser::OnListEnd(int id) { video_default_duration_ = default_duration_; DCHECK(!video_stream_info_); + vp_config_ = video_client_.GetVpCodecConfig(codec_private_); video_stream_info_ = video_client_.GetVideoStreamInfo( - video_track_num_, codec_id_, codec_private_, - !video_encryption_key_id_.empty()); + video_track_num_, codec_id_, !video_encryption_key_id_.empty()); if (!video_stream_info_) return false; } else { diff --git a/packager/media/formats/webm/webm_tracks_parser.h b/packager/media/formats/webm/webm_tracks_parser.h index ecd001d113..71d7096ae2 100644 --- a/packager/media/formats/webm/webm_tracks_parser.h +++ b/packager/media/formats/webm/webm_tracks_parser.h @@ -69,6 +69,8 @@ class WebMTracksParser : public WebMParserClient { return text_tracks_; } + const VPCodecConfigurationRecord& vp_config() const { return vp_config_; } + private: // WebMParserClient implementation. WebMParserClient* OnListStart(int id) override; @@ -103,6 +105,7 @@ class WebMTracksParser : public WebMParserClient { std::shared_ptr audio_stream_info_; WebMVideoClient video_client_; + VPCodecConfigurationRecord vp_config_; std::shared_ptr video_stream_info_; DISALLOW_COPY_AND_ASSIGN(WebMTracksParser); diff --git a/packager/media/formats/webm/webm_video_client.cc b/packager/media/formats/webm/webm_video_client.cc index e742951d6c..d9fda12e9a 100644 --- a/packager/media/formats/webm/webm_video_client.cc +++ b/packager/media/formats/webm/webm_video_client.cc @@ -45,12 +45,17 @@ void WebMVideoClient::Reset() { display_height_ = -1; display_unit_ = -1; alpha_mode_ = -1; + + vp_config_ = VPCodecConfigurationRecord(); + chroma_subsampling_horz_ = -1; + chroma_subsampling_vert_ = -1; + chroma_siting_horz_ = -1; + chroma_siting_vert_ = -1; } std::shared_ptr WebMVideoClient::GetVideoStreamInfo( int64_t track_num, const std::string& codec_id, - const std::vector& codec_private, bool is_encrypted) { Codec video_codec = kUnknownCodec; if (codec_id == "V_VP8") { @@ -110,12 +115,33 @@ std::shared_ptr WebMVideoClient::GetVideoStreamInfo( return std::make_shared( track_num, kWebMTimeScale, 0, video_codec, H26xStreamFormat::kUnSpecified, - std::string(), codec_private.data(), codec_private.size(), - width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, std::string(), - is_encrypted); + std::string(), nullptr, 0, width_after_crop, height_after_crop, sar_x, + sar_y, 0, 0, std::string(), is_encrypted); +} + +const VPCodecConfigurationRecord& WebMVideoClient::GetVpCodecConfig( + const std::vector& codec_private) { + vp_config_.ParseWebM(codec_private); + if (chroma_subsampling_horz_ != -1 && chroma_subsampling_vert_ != -1) { + vp_config_.SetChromaSubsampling(chroma_subsampling_horz_, + chroma_subsampling_vert_); + } + if (chroma_siting_horz_ != -1 && chroma_siting_vert_ != -1) { + vp_config_.SetChromaLocation(chroma_siting_horz_, chroma_siting_vert_); + } + return vp_config_; +} + +WebMParserClient* WebMVideoClient::OnListStart(int id) { + return id == kWebMIdColor ? this : WebMParserClient::OnListStart(id); +} + +bool WebMVideoClient::OnListEnd(int id) { + return id == kWebMIdColor ? true : WebMParserClient::OnListEnd(id); } bool WebMVideoClient::OnUInt(int id, int64_t val) { + VPCodecConfigurationRecord vp_config; int64_t* dst = NULL; switch (id) { @@ -149,6 +175,41 @@ bool WebMVideoClient::OnUInt(int id, int64_t val) { case kWebMIdAlphaMode: dst = &alpha_mode_; break; + case kWebMIdColorMatrixCoefficients: + vp_config.set_matrix_coefficients(static_cast(val)); + break; + case kWebMIdColorBitsPerChannel: + vp_config.set_bit_depth(static_cast(val)); + break; + case kWebMIdColorChromaSubsamplingHorz: + dst = &chroma_subsampling_horz_; + break; + case kWebMIdColorChromaSubsamplingVert: + dst = &chroma_subsampling_vert_; + break; + case kWebMIdColorChromaSitingHorz: + dst = &chroma_siting_horz_; + break; + case kWebMIdColorChromaSitingVert: + dst = &chroma_siting_vert_; + break; + case kWebMIdColorRange: + if (val == 0) + vp_config.set_video_full_range_flag(false); + else if (val == 1) + vp_config.set_video_full_range_flag(true); + // Ignore for other values of val. + break; + case kWebMIdColorTransferCharacteristics: + vp_config.set_transfer_characteristics(static_cast(val)); + break; + case kWebMIdColorPrimaries: + vp_config.set_color_primaries(static_cast(val)); + break; + case kWebMIdColorMaxCLL: + case kWebMIdColorMaxFALL: + NOTIMPLEMENTED() << "HDR is not supported yet."; + return true; default: return true; } diff --git a/packager/media/formats/webm/webm_video_client.h b/packager/media/formats/webm/webm_video_client.h index 6b43d6fc10..3a146d91a9 100644 --- a/packager/media/formats/webm/webm_video_client.h +++ b/packager/media/formats/webm/webm_video_client.h @@ -10,6 +10,7 @@ #include #include "packager/media/base/video_stream_info.h" +#include "packager/media/codecs/vp_codec_configuration_record.h" #include "packager/media/formats/webm/webm_parser.h" namespace shaka { @@ -26,19 +27,25 @@ class WebMVideoClient : public WebMParserClient { void Reset(); /// Create a VideoStreamInfo with the data in |track_num|, |codec_id|, - /// |codec_private|, |is_encrypted| and the fields parsed from the last video - /// track element this object was used to parse. + /// |is_encrypted| and the fields parsed from the last video track element + /// this object was used to parse. /// @return A VideoStreamInfo if successful. /// @return An empty pointer if there was unexpected values in the /// provided parameters or video track element fields. std::shared_ptr GetVideoStreamInfo( int64_t track_num, const std::string& codec_id, - const std::vector& codec_private, bool is_encrypted); + /// Extracts VPCodecConfigurationRecord parsed from codec private data and + /// Colour element. + const VPCodecConfigurationRecord& GetVpCodecConfig( + const std::vector& codec_private); + private: // WebMParserClient implementation. + WebMParserClient* OnListStart(int id) override; + bool OnListEnd(int id) override; bool OnUInt(int id, int64_t val) override; bool OnBinary(int id, const uint8_t* data, int size) override; bool OnFloat(int id, double val) override; @@ -54,6 +61,12 @@ class WebMVideoClient : public WebMParserClient { int64_t display_unit_; int64_t alpha_mode_; + VPCodecConfigurationRecord vp_config_; + int64_t chroma_subsampling_horz_; + int64_t chroma_subsampling_vert_; + int64_t chroma_siting_horz_; + int64_t chroma_siting_vert_; + DISALLOW_COPY_AND_ASSIGN(WebMVideoClient); }; diff --git a/packager/media/test/data/README b/packager/media/test/data/README index ec06ff427a..fc6cc634bb 100644 --- a/packager/media/test/data/README +++ b/packager/media/test/data/README @@ -4,6 +4,12 @@ bear-320x240.webm - WebM encode of bear.1280x720.mp4 resized to 320x240. bear-320x240-vp9-opus.webm - Same as above, but with vp9 and opus codec. +bear-320x240-vp9-altref.webm - Same as above, but enabled altref: + ffmpeg -i bear-320x180.mp4 -c:v libvpx-vp9 -pass 1 -threads 8 -speed 4 \ + -frame-parallel 1 -an -f webm /dev/null + ffmpeg -i bear-320x180.mp4 -c:v libvpx-vp9 -pass 2 -threads 8 -speed 1 \ + -frame-parallel 1 -auto-alt-ref 1 -lag-in-frames 25 -an -f webm \ + bear-320x240-vp9-altref.webm no_streams.webm - Header, Info, & Tracks element from bear-320x240.webm slightly corrupted so it looks like there are no tracks. nonzero-start-time.webm - Has the same headers as bear-320x240.webm but the first cluster of this file diff --git a/packager/media/test/data/bear-320x180-vp9-altref.webm b/packager/media/test/data/bear-320x180-vp9-altref.webm new file mode 100644 index 0000000000..b77e519e4a Binary files /dev/null and b/packager/media/test/data/bear-320x180-vp9-altref.webm differ diff --git a/packager/media/test/data/bear-640x360-vp9-altref.webm b/packager/media/test/data/bear-640x360-vp9-altref.webm deleted file mode 100644 index 060f8705b8..0000000000 Binary files a/packager/media/test/data/bear-640x360-vp9-altref.webm and /dev/null differ