From 59f393779c9288aa1bdcb7266c2498cc2a058881 Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Mon, 24 Apr 2017 11:50:49 -0700 Subject: [PATCH] Support v1 vp9 in iso-bmff - Implemented according to v1.0 spec @ https://www.webmproject.org/vp9/mp4/ - v0 is no longer supported Change-Id: I189c813d788400beda797eea7da943a83dfa7d79 --- .../bear-320x240-opus-vp9-cenc-golden.mpd | 6 +- .../testdata/bear-320x240-vp9-cenc-golden.mp4 | Bin 71886 -> 71890 bytes .../test/testdata/bear-320x240-vp9-golden.mp4 | Bin 70696 -> 70698 bytes .../testdata/bear-320x240-vp9-golden.webm | Bin 69546 -> 69549 bytes .../bear-320x240-vp9-opus-webm-golden.mpd | 6 +- .../testdata/bear-640x360-vp8-cenc-golden.mp4 | Bin 116624 -> 116628 bytes .../testdata/bear-640x360-vp8-cenc-golden.mpd | 6 +- .../test/testdata/bear-640x360-vp8-golden.mp4 | Bin 115866 -> 115868 bytes .../bear-640x360-vp9-altref-dec-golden.webm | Bin 91693 -> 91696 bytes .../bear-640x360-vp9-altref-enc-golden.webm | Bin 92562 -> 92565 bytes ...ear-640x360-vp9-fullsample-enc-golden.webm | Bin 92242 -> 92245 bytes .../testdata/bear-vp9-blockgroup-golden.webm | Bin 67363 -> 67366 bytes packager/media/codecs/vp8_parser.cc | 4 - packager/media/codecs/vp8_parser_unittest.cc | 2 +- packager/media/codecs/vp9_parser.cc | 61 ++++-- packager/media/codecs/vp9_parser_unittest.cc | 10 +- .../codecs/vp_codec_configuration_record.cc | 162 +++++++------- .../codecs/vp_codec_configuration_record.h | 200 +++++++++++++----- .../vp_codec_configuration_record_unittest.cc | 54 +++-- packager/media/formats/mp4/box_definitions.cc | 7 +- .../webm/webm_cluster_parser_unittest.cc | 4 +- 21 files changed, 319 insertions(+), 203 deletions(-) diff --git a/packager/app/test/testdata/bear-320x240-opus-vp9-cenc-golden.mpd b/packager/app/test/testdata/bear-320x240-opus-vp9-cenc-golden.mpd index 95bb75d440..7d3209934b 100644 --- a/packager/app/test/testdata/bear-320x240-opus-vp9-cenc-golden.mpd +++ b/packager/app/test/testdata/bear-320x240-opus-vp9-cenc-golden.mpd @@ -20,10 +20,10 @@ AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== - + output_video.mp4 - - + + diff --git a/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 b/packager/app/test/testdata/bear-320x240-vp9-cenc-golden.mp4 index 32e999226927a3fd6cf135a81ad6a291d7c68a9c..3b3ccc2494418bb5b33ffc4714568ccbfddd54b7 100644 GIT binary patch delta 137 zcmX@Nk>%1xmI-Q1EX)%%{xZ&-tj3tkXgaxzF@*8KAxfT~DYL9#O=NS3RKi3udnz#veNSX=@~W^5D%08b_%m;e9( delta 117 zcmcb#k>%V*mI-Q1%>O59{AHXvS&cE7(QtAXV+iA&$@dwxW#<%^B;_zLFa{Kt6sIsi z027cpk(!rWHu)n{;N%LX0A8W8f@EiiWP`xuKxR?Kipg)8LQ%z=1DUr6GBY-c0swV1 BAg=%b diff --git a/packager/app/test/testdata/bear-320x240-vp9-golden.mp4 b/packager/app/test/testdata/bear-320x240-vp9-golden.mp4 index 9c5af5948a14c7e4554a1414afdd60dc5b9fb3eb..eb37055756482856be84d925082b703ca5f21e29 100644 GIT binary patch delta 86 zcmZ3nf@ReTmI>;NtP?f!U delta 84 zcmZ3rf@Q@DmI>;N%o8>KGM=5R#+b~QIk}55gi&PjeMW7`E5#*AISdR89mOTZDL@(o g7=gHAvNBUBuTWV*vNKqWp+TUzooRbJ6XRk*04+KeKmY&$ diff --git a/packager/app/test/testdata/bear-320x240-vp9-golden.webm b/packager/app/test/testdata/bear-320x240-vp9-golden.webm index 0618a8009c5c36b7ab4022af60daf5ce9c70db08..bf9c2ee34cbeda7f2e4157dc031cb00e2a5353c7 100644 GIT binary patch delta 119 zcmZ2ApJnZQmI?ZdwG$2Wg#P*N?g?JqBJaGdG_DfZerov-u}Ij@nci48i>pO S9m+je)f~gPJ%*9dj}HI|K`wa! delta 116 zcmZ2GpJmm2mI?ZdRTB;Lgns+(?g?JqBJaGdG - + output_video.webm - - + + diff --git a/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 b/packager/app/test/testdata/bear-640x360-vp8-cenc-golden.mp4 index 81c04f212c3aaa9022296a7dd2ce14538fda198f..6608e03f6472596f864eeb5830ee84476dccb2ca 100644 GIT binary patch delta 120 zcmbQx&pxG}eS$jU AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== - + output_video.mp4 - - + + diff --git a/packager/app/test/testdata/bear-640x360-vp8-golden.mp4 b/packager/app/test/testdata/bear-640x360-vp8-golden.mp4 index 3c7843fb39f830a82a1b7dd5a98988c2627ae841..8c51125bac835e62cda0c903fbd3d87096d25a68 100644 GIT binary patch delta 86 zcmbQ$$v&r(eS$jU*NGZ`8TU_CV@zg@p4`P4!pJuHKBKnevEq`X90mr4s^XI36d(-( jj6j?+TO~Zl190mr4isF*u6d(-( gj6j?*S(z!6SE#HY*%>Ux&?wN{&a}OqiBa?r07Qxwpa1{> diff --git a/packager/app/test/testdata/bear-640x360-vp9-altref-dec-golden.webm b/packager/app/test/testdata/bear-640x360-vp9-altref-dec-golden.webm index ae1c97b480560a7cf9b2ed4846c214c2136a8eb0..ef570177200d09dbc8bac17d8c7f76f57ca7e9e2 100644 GIT binary patch delta 161 zcmZ2`hIPXk)(LvfX$*qCvrBgQ?(Pm=-6HC_GA(#bV`>meZp-yX#^R%mjLnUVZLMMPVF8xOi+UIt8JHNkm>D@( z7#Sx^GO8&U?e5*&#Qbu5`}ao1k4+OIcK2;=V!yP#1H^4+3)(Lt}sec81XP4~s-Q69$x<%A;Wm@o>#z+WHEM#3akarZqlL_7p;{EmA z-2+r9@4T%vcufCMV*X{49tui zEQ}13Wf;{I^mq4eZeo79z5RP5y3=XM;jTN8yVYL z!{WmNERz@YFfuYQF>*08a> delta 111 zcmbPwnRU`-)(QHINfQn91q^(5_XMwQk$2u!8oZ_nD3;%)@NC_a>y3=XM;jTN8yVYL z!{WmNERz>?GBPqSGjgynGEA0Xj8sV4-MhJo`Q`TZ?~RNfnM5Hn4?j8m(GA49fvQ>=mkWsz@+Bk8W^*MR|Z#eo5X zfdPhvR$o?7Ib))X0RaF40SW^F2m}EElNkY7AZ5Favw{QZw}=mkVv%o(Am*;**MR|Z#eo5X zfdPhvR$o?7Ib))U0RaF50SE*E0FxX6Ss-G&jkAIS>9>dXfdTP?kfytivw{cGw}|(F S0r7(Y5N?B&0k@R_0l;Pk1};bd diff --git a/packager/app/test/testdata/bear-vp9-blockgroup-golden.webm b/packager/app/test/testdata/bear-vp9-blockgroup-golden.webm index aeda2e9b368f47be7a5d5b7ac124b0054823512b..d9a6961ca63e236871aabb6a9f830f882c81fc37 100644 GIT binary patch delta 89 zcmV-f0H*(=js&KT1duNQ_K`3y68=rQj8m(GA49fvQ>=mk5$u_faTt+^aU-;@ve$tD vbH#xHgMk5tg;rlyP&s3wi~#`v0s#sG0SE*E0h1X4BmohFJ^{Br0RiO&NZcN% delta 86 zcmV-c0IC0`js&BQ1duNQ^N}zv68lZNj8m(GA49fvQ>=mk4(yqcaTt+^aUij-uh)SA sbH#xHgMk5tg;rlyP&s3wi2(rs0|5vG0RWR60VDwqgFykeK>-2f1@d$rivR!s diff --git a/packager/media/codecs/vp8_parser.cc b/packager/media/codecs/vp8_parser.cc index b925e54c2d..6a7b0f4c8b 100644 --- a/packager/media/codecs/vp8_parser.cc +++ b/packager/media/codecs/vp8_parser.cc @@ -154,10 +154,6 @@ bool VP8Parser::Parse(const uint8_t* data, writable_codec_config()->set_bit_depth(8); writable_codec_config()->set_chroma_subsampling( VPCodecConfigurationRecord::CHROMA_420_COLLOCATED_WITH_LUMA); - // VP8 uses YCrCb color space defined in ITU-R_BT.601. - // http://tools.ietf.org/html/rfc6386 Section 9.2. - writable_codec_config()->set_color_space( - VPCodecConfigurationRecord::COLOR_SPACE_BT_601); VPxFrameInfo vpx_frame; vpx_frame.frame_size = data_size; diff --git a/packager/media/codecs/vp8_parser_unittest.cc b/packager/media/codecs/vp8_parser_unittest.cc index 357000fda8..c96b2051c4 100644 --- a/packager/media/codecs/vp8_parser_unittest.cc +++ b/packager/media/codecs/vp8_parser_unittest.cc @@ -43,7 +43,7 @@ TEST(VP8ParserTest, Keyframe) { VP8Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp08.02.00.08.01.01.00.00", + EXPECT_EQ("vp08.02.10.08.01.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP8)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 22u, true, 320u, 240u))); diff --git a/packager/media/codecs/vp9_parser.cc b/packager/media/codecs/vp9_parser.cc index 1ab53811fe..cacf6d6232 100644 --- a/packager/media/codecs/vp9_parser.cc +++ b/packager/media/codecs/vp9_parser.cc @@ -160,30 +160,66 @@ bool ReadSyncCode(BitReader* reader) { return sync_code == VP9_SYNC_CODE; } -VPCodecConfigurationRecord::ColorSpace GetColorSpace(uint8_t color_space) { +void SetColorAttributes(uint8_t bit_depth, + uint8_t color_space, + VPCodecConfigurationRecord* codec_config) { switch (color_space) { case VPX_COLOR_SPACE_UNKNOWN: - return VPCodecConfigurationRecord::COLOR_SPACE_UNSPECIFIED; + codec_config->set_color_primaries(AVCOL_PRI_UNSPECIFIED); + codec_config->set_matrix_coefficients(AVCOL_SPC_UNSPECIFIED); + codec_config->set_transfer_characteristics(AVCOL_TRC_UNSPECIFIED); + break; case VPX_COLOR_SPACE_BT_601: - return VPCodecConfigurationRecord::COLOR_SPACE_BT_601; + // Don't know if it is 525 line or 625 line. + codec_config->set_color_primaries(AVCOL_PRI_UNSPECIFIED); + codec_config->set_matrix_coefficients(AVCOL_SPC_UNSPECIFIED); + codec_config->set_transfer_characteristics(AVCOL_TRC_SMPTE170M); + break; case VPX_COLOR_SPACE_BT_709: - return VPCodecConfigurationRecord::COLOR_SPACE_BT_709; + codec_config->set_color_primaries(AVCOL_PRI_BT709); + codec_config->set_matrix_coefficients(AVCOL_SPC_BT709); + codec_config->set_transfer_characteristics(AVCOL_TRC_BT709); + break; case VPX_COLOR_SPACE_SMPTE_170: - return VPCodecConfigurationRecord::COLOR_SPACE_SMPTE_170; + codec_config->set_color_primaries(AVCOL_PRI_SMPTE170M); + codec_config->set_matrix_coefficients(AVCOL_SPC_SMPTE170M); + codec_config->set_transfer_characteristics(AVCOL_TRC_SMPTE170M); + break; case VPX_COLOR_SPACE_SMPTE_240: - return VPCodecConfigurationRecord::COLOR_SPACE_SMPTE_240; + codec_config->set_color_primaries(AVCOL_PRI_SMPTE240M); + codec_config->set_matrix_coefficients(AVCOL_SPC_SMPTE240M); + codec_config->set_transfer_characteristics(AVCOL_TRC_SMPTE240M); + break; case VPX_COLOR_SPACE_BT_2020: + codec_config->set_color_primaries(AVCOL_PRI_BT2020); // VP9 does not specify if it is in the form of “constant luminance” or // “non-constant luminance”. As such, application should rely on the // signaling outside of VP9 bitstream. If there is no such signaling, // application may assume non-constant luminance for BT.2020. - return VPCodecConfigurationRecord:: - COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE; + codec_config->set_matrix_coefficients(AVCOL_SPC_BT2020_NCL); + switch (bit_depth) { + case 10: + codec_config->set_transfer_characteristics(AVCOL_TRC_BT2020_10); + break; + case 12: + codec_config->set_transfer_characteristics(AVCOL_TRC_BT2020_12); + break; + default: + codec_config->set_transfer_characteristics(AVCOL_TRC_UNSPECIFIED); + break; + } + break; case VPX_COLOR_SPACE_SRGB: - return VPCodecConfigurationRecord::COLOR_SPACE_SRGB; + codec_config->set_color_primaries(AVCOL_PRI_UNSPECIFIED); + codec_config->set_matrix_coefficients(AVCOL_SPC_RGB); + codec_config->set_transfer_characteristics(AVCOL_TRC_UNSPECIFIED); + break; default: LOG(WARNING) << "Unknown color space: " << static_cast(color_space); - return VPCodecConfigurationRecord::COLOR_SPACE_UNSPECIFIED; + codec_config->set_color_primaries(AVCOL_PRI_UNSPECIFIED); + codec_config->set_matrix_coefficients(AVCOL_SPC_UNSPECIFIED); + codec_config->set_transfer_characteristics(AVCOL_TRC_UNSPECIFIED); + break; } } @@ -219,7 +255,7 @@ bool ReadBitDepthAndColorSpace(BitReader* reader, uint8_t color_space; RCHECK(reader->ReadBits(3, &color_space)); - codec_config->set_color_space(GetColorSpace(color_space)); + SetColorAttributes(bit_depth, color_space, codec_config); bool yuv_full_range = false; auto chroma_subsampling = VPCodecConfigurationRecord::CHROMA_444; @@ -261,7 +297,8 @@ bool ReadBitDepthAndColorSpace(BitReader* reader, VLOG(3) << "\n profile " << static_cast(codec_config->profile()) << "\n bit depth " << static_cast(codec_config->bit_depth()) - << "\n color space " << static_cast(codec_config->color_space()) + << "\n matrix coefficients " + << static_cast(codec_config->matrix_coefficients()) << "\n full_range " << static_cast(codec_config->video_full_range_flag()) << "\n chroma subsampling " diff --git a/packager/media/codecs/vp9_parser_unittest.cc b/packager/media/codecs/vp9_parser_unittest.cc index f7d3517525..c1358d6d7f 100644 --- a/packager/media/codecs/vp9_parser_unittest.cc +++ b/packager/media/codecs/vp9_parser_unittest.cc @@ -76,7 +76,7 @@ TEST(VP9ParserTest, KeyframeChroma420) { VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp09.00.00.08.00.01.00.00", + EXPECT_EQ("vp09.00.10.08.01.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP9)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, 32u, 8u))); @@ -98,7 +98,7 @@ TEST(VP9ParserTest, KeyframeProfile1Chroma422) { VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp09.01.00.08.00.02.00.00", + EXPECT_EQ("vp09.01.10.08.02.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP9)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, 160u, 90u))); @@ -120,7 +120,7 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) { VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp09.02.00.10.00.01.00.00", + EXPECT_EQ("vp09.02.10.10.01.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP9)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, 160u, 90u))); @@ -142,7 +142,7 @@ TEST(VP9ParserTest, KeyframeProfile3Chroma444) { VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp09.03.00.12.00.03.00.00", + EXPECT_EQ("vp09.03.10.12.03.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP9)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 19u, true, 160u, 90u))); } @@ -164,7 +164,7 @@ TEST(VP9ParserTest, Intra) { VP9Parser parser; std::vector frames; ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); - EXPECT_EQ("vp09.00.00.08.00.01.00.00", + EXPECT_EQ("vp09.00.10.08.01.02.02.02.00", parser.codec_config().GetCodecString(kCodecVP9)); EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 19u, false, 352u, 288u))); diff --git a/packager/media/codecs/vp_codec_configuration_record.cc b/packager/media/codecs/vp_codec_configuration_record.cc index 2709c5054e..f4803c4294 100644 --- a/packager/media/codecs/vp_codec_configuration_record.cc +++ b/packager/media/codecs/vp_codec_configuration_record.cc @@ -40,18 +40,17 @@ std::string VPCodecAsString(Codec codec) { template void MergeField(const std::string& name, - T source_value, - bool source_is_set, - T* dest_value, - bool* dest_is_set) { - if (!*dest_is_set || source_is_set) { - if (*dest_is_set && source_value != *dest_value) { + const base::Optional& source_value, + base::Optional* dest_value) { + if (*dest_value) { + if (source_value && *source_value != **dest_value) { LOG(WARNING) << "VPx " << name << " is inconsistent, " - << static_cast(*dest_value) << " vs " - << static_cast(source_value); + << static_cast(**dest_value) << " vs " + << static_cast(*source_value); } + } else { + // Only set dest_value if it is not set. *dest_value = source_value; - *dest_is_set = true; } } @@ -63,45 +62,46 @@ VPCodecConfigurationRecord::VPCodecConfigurationRecord( uint8_t profile, uint8_t level, uint8_t bit_depth, - uint8_t color_space, uint8_t chroma_subsampling, - uint8_t transfer_function, bool video_full_range_flag, + uint8_t color_primaries, + uint8_t transfer_characteristics, + uint8_t matrix_coefficients, const std::vector& codec_initialization_data) : profile_(profile), level_(level), bit_depth_(bit_depth), - color_space_(color_space), chroma_subsampling_(chroma_subsampling), - transfer_function_(transfer_function), video_full_range_flag_(video_full_range_flag), - profile_is_set_(true), - level_is_set_(true), - bit_depth_is_set_(true), - color_space_is_set_(true), - chroma_subsampling_is_set_(true), - transfer_function_is_set_(true), - video_full_range_flag_is_set_(true), + color_primaries_(color_primaries), + transfer_characteristics_(transfer_characteristics), + matrix_coefficients_(matrix_coefficients), codec_initialization_data_(codec_initialization_data) {} VPCodecConfigurationRecord::~VPCodecConfigurationRecord(){}; +// https://www.webmproject.org/vp9/mp4/ bool VPCodecConfigurationRecord::ParseMP4(const std::vector& data) { BitReader reader(data.data(), data.size()); - profile_is_set_ = true; - level_is_set_ = true; - bit_depth_is_set_ = true; - color_space_is_set_ = true; - chroma_subsampling_is_set_ = true; - transfer_function_is_set_ = true; - video_full_range_flag_is_set_ = true; - RCHECK(reader.ReadBits(8, &profile_)); - RCHECK(reader.ReadBits(8, &level_)); - RCHECK(reader.ReadBits(4, &bit_depth_)); - RCHECK(reader.ReadBits(4, &color_space_)); - RCHECK(reader.ReadBits(4, &chroma_subsampling_)); - RCHECK(reader.ReadBits(3, &transfer_function_)); - RCHECK(reader.ReadBits(1, &video_full_range_flag_)); + uint8_t value; + RCHECK(reader.ReadBits(8, &value)); + profile_ = value; + RCHECK(reader.ReadBits(8, &value)); + level_ = value; + RCHECK(reader.ReadBits(4, &value)); + bit_depth_ = value; + RCHECK(reader.ReadBits(3, &value)); + chroma_subsampling_ = value; + bool bool_value; + RCHECK(reader.ReadBits(1, &bool_value)); + video_full_range_flag_ = bool_value; + RCHECK(reader.ReadBits(8, &value)); + color_primaries_ = value; + RCHECK(reader.ReadBits(8, &value)); + transfer_characteristics_ = value; + RCHECK(reader.ReadBits(8, &value)); + matrix_coefficients_ = value; + uint16_t codec_initialization_data_size = 0; RCHECK(reader.ReadBits(16, &codec_initialization_data_size)); RCHECK(reader.bits_available() >= codec_initialization_data_size * 8u); @@ -121,26 +121,27 @@ bool VPCodecConfigurationRecord::ParseWebM(const std::vector& data) { RCHECK(reader.Read1(&id)); RCHECK(reader.Read1(&size)); + uint8_t value = 0; switch (id) { case kFeatureProfile: RCHECK(size == 1); - RCHECK(reader.Read1(&profile_)); - profile_is_set_ = true; + RCHECK(reader.Read1(&value)); + profile_ = value; break; case kFeatureLevel: RCHECK(size == 1); - RCHECK(reader.Read1(&level_)); - level_is_set_ = true; + RCHECK(reader.Read1(&value)); + level_ = value; break; case kFeatureBitDepth: RCHECK(size == 1); - RCHECK(reader.Read1(&bit_depth_)); - bit_depth_is_set_ = true; + RCHECK(reader.Read1(&value)); + bit_depth_ = value; break; case kFeatureChromaSubsampling: RCHECK(size == 1); - RCHECK(reader.Read1(&chroma_subsampling_)); - chroma_subsampling_is_set_ = true; + RCHECK(reader.Read1(&value)); + chroma_subsampling_ = value; break; default: { LOG(WARNING) << "Skipping unknown VP9 codec feature " << id; @@ -154,13 +155,14 @@ bool VPCodecConfigurationRecord::ParseWebM(const std::vector& data) { void VPCodecConfigurationRecord::WriteMP4(std::vector* data) const { BufferWriter writer; - writer.AppendInt(profile_); - writer.AppendInt(level_); - uint8_t bit_depth_color_space = (bit_depth_ << 4) | color_space_; - writer.AppendInt(bit_depth_color_space); - uint8_t chroma = (chroma_subsampling_ << 4) | (transfer_function_ << 1) | - (video_full_range_flag_ ? 1 : 0); - writer.AppendInt(chroma); + writer.AppendInt(profile()); + writer.AppendInt(level()); + uint8_t bit_depth_chroma = (bit_depth() << 4) | (chroma_subsampling() << 1) | + (video_full_range_flag() ? 1 : 0); + writer.AppendInt(bit_depth_chroma); + writer.AppendInt(color_primaries()); + writer.AppendInt(transfer_characteristics()); + writer.AppendInt(matrix_coefficients()); uint16_t codec_initialization_data_size = static_cast(codec_initialization_data_.size()); writer.AppendInt(codec_initialization_data_size); @@ -171,35 +173,29 @@ void VPCodecConfigurationRecord::WriteMP4(std::vector* data) const { void VPCodecConfigurationRecord::WriteWebM(std::vector* data) const { BufferWriter writer; - if (profile_is_set_) { + if (profile_) { writer.AppendInt(static_cast(kFeatureProfile)); // ID = 1 writer.AppendInt(static_cast(1)); // Length = 1 - writer.AppendInt(static_cast(profile_)); + writer.AppendInt(*profile_); } - if (level_is_set_ && level_ != 0) { + if (level_) { writer.AppendInt(static_cast(kFeatureLevel)); // ID = 2 writer.AppendInt(static_cast(1)); // Length = 1 - writer.AppendInt(static_cast(level_)); + writer.AppendInt(*level_); } - if (bit_depth_is_set_) { + if (bit_depth_) { writer.AppendInt(static_cast(kFeatureBitDepth)); // ID = 3 writer.AppendInt(static_cast(1)); // Length = 1 - writer.AppendInt(static_cast(bit_depth_)); + writer.AppendInt(*bit_depth_); } - if (chroma_subsampling_is_set_) { - // WebM doesn't differentiate whether it is vertical or collocated with luma - // for 4:2:0. - const uint8_t subsampling = - chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA - ? CHROMA_420_VERTICAL - : chroma_subsampling_; + if (chroma_subsampling_) { // ID = 4, Length = 1 writer.AppendInt(static_cast(kFeatureChromaSubsampling)); writer.AppendInt(static_cast(1)); - writer.AppendInt(subsampling); + writer.AppendInt(*chroma_subsampling_); } writer.SwapBuffer(data); @@ -207,13 +203,14 @@ void VPCodecConfigurationRecord::WriteWebM(std::vector* data) const { std::string VPCodecConfigurationRecord::GetCodecString(Codec codec) const { const std::string fields[] = { - base::IntToString(profile_), - base::IntToString(level_), - base::IntToString(bit_depth_), - base::IntToString(color_space_), - base::IntToString(chroma_subsampling_), - base::IntToString(transfer_function_), - (video_full_range_flag_ ? "01" : "00"), + base::IntToString(profile()), + base::IntToString(level()), + base::IntToString(bit_depth()), + base::IntToString(chroma_subsampling()), + base::IntToString(color_primaries()), + base::IntToString(transfer_characteristics()), + base::IntToString(matrix_coefficients()), + (video_full_range_flag_ && *video_full_range_flag_) ? "01" : "00", }; std::string codec_string = VPCodecAsString(codec); @@ -228,23 +225,18 @@ std::string VPCodecConfigurationRecord::GetCodecString(Codec codec) const { void VPCodecConfigurationRecord::MergeFrom( const VPCodecConfigurationRecord& other) { - MergeField("profile", other.profile_, other.profile_is_set_, &profile_, - &profile_is_set_); - MergeField("level", other.level_, other.level_is_set_, &level_, - &level_is_set_); - MergeField("bit depth", other.bit_depth_, other.bit_depth_is_set_, - &bit_depth_, &bit_depth_is_set_); - MergeField("color space", other.color_space_, other.color_space_is_set_, - &color_space_, &color_space_is_set_); + MergeField("profile", other.profile_, &profile_); + MergeField("level", other.level_, &level_); + MergeField("bit depth", other.bit_depth_, &bit_depth_); MergeField("chroma subsampling", other.chroma_subsampling_, - other.chroma_subsampling_is_set_, &chroma_subsampling_, - &chroma_subsampling_is_set_); - MergeField("transfer function", other.transfer_function_, - other.transfer_function_is_set_, &transfer_function_, - &transfer_function_is_set_); + &chroma_subsampling_); MergeField("video full range flag", other.video_full_range_flag_, - other.video_full_range_flag_is_set_, &video_full_range_flag_, - &video_full_range_flag_is_set_); + &video_full_range_flag_); + MergeField("color primaries", other.color_primaries_, &color_primaries_); + MergeField("transfer characteristics", other.transfer_characteristics_, + &transfer_characteristics_); + MergeField("matrix coefficients", other.matrix_coefficients_, + &matrix_coefficients_); if (codec_initialization_data_.empty() || !other.codec_initialization_data_.empty()) { diff --git a/packager/media/codecs/vp_codec_configuration_record.h b/packager/media/codecs/vp_codec_configuration_record.h index 8b07b48d1b..91aa7075be 100644 --- a/packager/media/codecs/vp_codec_configuration_record.h +++ b/packager/media/codecs/vp_codec_configuration_record.h @@ -12,25 +12,118 @@ #include #include "packager/base/macros.h" +#include "packager/base/optional.h" #include "packager/media/base/video_stream_info.h" namespace shaka { namespace media { +/// The below enums are from ffmpeg/libavutil/pixfmt.h. +/// Chromaticity coordinates of the source primaries. +enum AVColorPrimaries { + AVCOL_PRI_RESERVED0 = 0, + /// Also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B + AVCOL_PRI_BT709 = 1, + AVCOL_PRI_UNSPECIFIED = 2, + AVCOL_PRI_RESERVED = 3, + /// Also FCC Title 47 Code of Federal Regulations 73.682 (a)(20) + AVCOL_PRI_BT470M = 4, + /// Also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM + AVCOL_PRI_BT470BG = 5, + /// Also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC + AVCOL_PRI_SMPTE170M = 6, + /// Functionally identical to above + AVCOL_PRI_SMPTE240M = 7, + /// Colour filters using Illuminant C + AVCOL_PRI_FILM = 8, + /// ITU-R BT2020 + AVCOL_PRI_BT2020 = 9, + /// SMPTE ST 428-1 (CIE 1931 XYZ) + AVCOL_PRI_SMPTE428 = 10, + AVCOL_PRI_SMPTEST428_1 = AVCOL_PRI_SMPTE428, + /// SMPTE ST 431-2 (2011) + AVCOL_PRI_SMPTE431 = 11, + /// SMPTE ST 432-1 D65 (2010) + AVCOL_PRI_SMPTE432 = 12, + ///< Not part of ABI + AVCOL_PRI_NB +}; + +/// Color Transfer Characteristic. +enum AVColorTransferCharacteristic { + AVCOL_TRC_RESERVED0 = 0, + /// Also ITU-R BT1361 + AVCOL_TRC_BT709 = 1, + AVCOL_TRC_UNSPECIFIED = 2, + AVCOL_TRC_RESERVED = 3, + /// Also ITU-R BT470M / ITU-R BT1700 625 PAL & SECAM + AVCOL_TRC_GAMMA22 = 4, + /// Also ITU-R BT470BG + AVCOL_TRC_GAMMA28 = 5, + /// Also ITU-R BT601-6 525 or 625 / ITU-R BT1358 525 or 625 / ITU-R BT1700 + /// NTSC + AVCOL_TRC_SMPTE170M = 6, + AVCOL_TRC_SMPTE240M = 7, + /// "Linear transfer characteristics" + AVCOL_TRC_LINEAR = 8, + /// "Logarithmic transfer characteristic (100:1 range)" + AVCOL_TRC_LOG = 9, + /// "Logarithmic transfer characteristic (100 * Sqrt(10) : 1 range)" + AVCOL_TRC_LOG_SQRT = 10, + /// IEC 61966-2-4 + AVCOL_TRC_IEC61966_2_4 = 11, + /// ITU-R BT1361 Extended Colour Gamut + AVCOL_TRC_BT1361_ECG = 12, + /// IEC 61966-2-1 (sRGB or sYCC) + AVCOL_TRC_IEC61966_2_1 = 13, + /// ITU-R BT2020 for 10-bit system + AVCOL_TRC_BT2020_10 = 14, + /// ITU-R BT2020 for 12-bit system + AVCOL_TRC_BT2020_12 = 15, + /// SMPTE ST 2084 for 10-, 12-, 14- and 16-bit systems + AVCOL_TRC_SMPTE2084 = 16, + AVCOL_TRC_SMPTEST2084 = AVCOL_TRC_SMPTE2084, + /// SMPTE ST 428-1 + AVCOL_TRC_SMPTE428 = 17, + AVCOL_TRC_SMPTEST428_1 = AVCOL_TRC_SMPTE428, + /// ARIB STD-B67, known as "Hybrid log-gamma" + AVCOL_TRC_ARIB_STD_B67 = 18, + /// Not part of ABI + AVCOL_TRC_NB +}; + +/// YUV colorspace type (a.c.a matrix coefficients in 23001-8:2016). +enum AVColorSpace { + /// Order of coefficients is actually GBR, also IEC 61966-2-1 (sRGB) + AVCOL_SPC_RGB = 0, + /// Also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B + AVCOL_SPC_BT709 = 1, + AVCOL_SPC_UNSPECIFIED = 2, + AVCOL_SPC_RESERVED = 3, + /// FCC Title 47 Code of Federal Regulations 73.682 (a)(20) + AVCOL_SPC_FCC = 4, + /// Also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / + /// IEC 61966-2-4 xvYCC601 + AVCOL_SPC_BT470BG = 5, + /// Also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC + AVCOL_SPC_SMPTE170M = 6, + /// Functionally identical to above + AVCOL_SPC_SMPTE240M = 7, + /// Used by Dirac / VC-2 and H.264 FRext, see ITU-T SG16 + AVCOL_SPC_YCOCG = 8, + /// ITU-R BT2020 non-constant luminance system + AVCOL_SPC_BT2020_NCL = 9, + /// ITU-R BT2020 constant luminance system + AVCOL_SPC_BT2020_CL = 10, + /// SMPTE 2085, Y'D'zD'x + AVCOL_SPC_SMPTE2085 = 11, + /// Not part of ABI + AVCOL_SPC_NB +}; + /// Class for parsing or writing VP codec configuration record. class VPCodecConfigurationRecord { public: - enum ColorSpace { - COLOR_SPACE_UNSPECIFIED = 0, - COLOR_SPACE_BT_601 = 1, - COLOR_SPACE_BT_709 = 2, - COLOR_SPACE_SMPTE_170 = 3, - COLOR_SPACE_SMPTE_240 = 4, - COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 5, - COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 6, - COLOR_SPACE_SRGB = 7, - }; - enum ChromaSubsampling { CHROMA_420_VERTICAL = 0, CHROMA_420_COLLOCATED_WITH_LUMA = 1, @@ -44,10 +137,11 @@ class VPCodecConfigurationRecord { uint8_t profile, uint8_t level, uint8_t bit_depth, - uint8_t color_space, uint8_t chroma_subsampling, - uint8_t transfer_function, bool video_full_range_flag, + uint8_t color_primaries, + uint8_t transfer_characteristics, + uint8_t matrix_coefficients, const std::vector& codec_initialization_data); ~VPCodecConfigurationRecord(); @@ -74,57 +168,53 @@ class VPCodecConfigurationRecord { // both |*this| and |other|, the values in |other| take precedence. void MergeFrom(const VPCodecConfigurationRecord& other); - void set_profile(uint8_t profile) { - profile_ = profile; - profile_is_set_ = true; - } - void set_level(uint8_t level) { - level_ = level; - level_is_set_ = true; - } - void set_bit_depth(uint8_t bit_depth) { - bit_depth_ = bit_depth; - bit_depth_is_set_ = true; - } - void set_color_space(uint8_t color_space) { - color_space_ = color_space; - color_space_is_set_ = true; - } + void set_profile(uint8_t profile) { profile_ = profile; } + void set_level(uint8_t level) { level_ = level; } + void set_bit_depth(uint8_t bit_depth) { bit_depth_ = bit_depth; } void set_chroma_subsampling(uint8_t chroma_subsampling) { chroma_subsampling_ = chroma_subsampling; - chroma_subsampling_is_set_ = true; - } - void set_transfer_function(uint8_t transfer_function) { - transfer_function_ = transfer_function; - transfer_function_is_set_ = true; } void set_video_full_range_flag(bool video_full_range_flag) { video_full_range_flag_ = video_full_range_flag; } + void set_color_primaries(uint8_t color_primaries) { + color_primaries_ = color_primaries; + } + void set_transfer_characteristics(uint8_t transfer_characteristics) { + transfer_characteristics_ = transfer_characteristics; + } + void set_matrix_coefficients(uint8_t matrix_coefficients) { + matrix_coefficients_ = matrix_coefficients; + } - uint8_t profile() const { return profile_; } - uint8_t level() const { return level_; } - uint8_t bit_depth() const { return bit_depth_; } - uint8_t color_space() const { return color_space_; } - uint8_t chroma_subsampling() const { return chroma_subsampling_; } - uint8_t transfer_function() const { return transfer_function_; } - bool video_full_range_flag() const { return video_full_range_flag_; } + uint8_t profile() const { return profile_.value_or(0); } + uint8_t level() const { return level_.value_or(10); } + uint8_t bit_depth() const { return bit_depth_.value_or(8); } + uint8_t chroma_subsampling() const { + return chroma_subsampling_.value_or(CHROMA_420_COLLOCATED_WITH_LUMA); + } + bool video_full_range_flag() const { + return video_full_range_flag_.value_or(false); + } + uint8_t color_primaries() const { + return color_primaries_.value_or(AVCOL_PRI_UNSPECIFIED); + } + uint8_t transfer_characteristics() const { + return transfer_characteristics_.value_or(AVCOL_TRC_UNSPECIFIED); + } + uint8_t matrix_coefficients() const { + return matrix_coefficients_.value_or(AVCOL_SPC_UNSPECIFIED); + } private: - uint8_t profile_ = 0; - uint8_t level_ = 0; - uint8_t bit_depth_ = 0; - uint8_t color_space_ = 0; - uint8_t chroma_subsampling_ = 0; - uint8_t transfer_function_ = 0; - bool video_full_range_flag_ = false; - bool profile_is_set_ = false; - bool level_is_set_ = false; - bool bit_depth_is_set_ = false; - bool color_space_is_set_ = false; - bool chroma_subsampling_is_set_ = false; - bool transfer_function_is_set_ = false; - bool video_full_range_flag_is_set_ = false; + base::Optional profile_; + base::Optional level_; + base::Optional bit_depth_; + base::Optional chroma_subsampling_; + base::Optional video_full_range_flag_; + base::Optional color_primaries_; + base::Optional transfer_characteristics_; + base::Optional matrix_coefficients_; std::vector codec_initialization_data_; // Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler diff --git a/packager/media/codecs/vp_codec_configuration_record_unittest.cc b/packager/media/codecs/vp_codec_configuration_record_unittest.cc index 2dac9227d9..dea8998052 100644 --- a/packager/media/codecs/vp_codec_configuration_record_unittest.cc +++ b/packager/media/codecs/vp_codec_configuration_record_unittest.cc @@ -13,50 +13,50 @@ namespace media { TEST(VPCodecConfigurationRecordTest, Parse) { const uint8_t kVpCodecConfigurationData[] = { - 0x01, 0x00, 0xA2, 0x14, 0x00, 0x01, 0x00, + 0x01, 0x14, 0xA2, 0x02, 0x03, 0x04, 0x00, 0x00, }; VPCodecConfigurationRecord vp_config; - ASSERT_TRUE(vp_config.ParseMP4(std::vector( - kVpCodecConfigurationData, - kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData)))); + ASSERT_TRUE(vp_config.ParseMP4( + std::vector(std::begin(kVpCodecConfigurationData), + std::end(kVpCodecConfigurationData)))); EXPECT_EQ(1u, vp_config.profile()); - EXPECT_EQ(0u, vp_config.level()); + EXPECT_EQ(20u, vp_config.level()); EXPECT_EQ(10u, vp_config.bit_depth()); - EXPECT_EQ(2u, vp_config.color_space()); EXPECT_EQ(1u, vp_config.chroma_subsampling()); - EXPECT_EQ(2u, vp_config.transfer_function()); EXPECT_FALSE(vp_config.video_full_range_flag()); + EXPECT_EQ(2u, vp_config.color_primaries()); + EXPECT_EQ(3u, vp_config.transfer_characteristics()); + EXPECT_EQ(4u, vp_config.matrix_coefficients()); - EXPECT_EQ("vp09.01.00.10.02.01.02.00", vp_config.GetCodecString(kCodecVP9)); + EXPECT_EQ("vp09.01.20.10.01.02.03.04.00", + vp_config.GetCodecString(kCodecVP9)); } TEST(VPCodecConfigurationRecordTest, ParseWithInsufficientData) { const uint8_t kVpCodecConfigurationData[] = { - 0x01, 0x00, 0xA2, 0x14, + 0x01, 0x14, 0xA2, 0x02, }; VPCodecConfigurationRecord vp_config; - ASSERT_FALSE(vp_config.ParseMP4(std::vector( - kVpCodecConfigurationData, - kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData)))); + ASSERT_FALSE(vp_config.ParseMP4( + std::vector(std::begin(kVpCodecConfigurationData), + std::end(kVpCodecConfigurationData)))); } TEST(VPCodecConfigurationRecordTest, WriteMP4) { const uint8_t kExpectedVpCodecConfigurationData[] = { - 0x02, 0x01, 0x80, 0x21, 0x00, 0x00, + 0x02, 0x01, 0x85, 0x03, 0x04, 0x05, 0x00, 0x00, }; - VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x02, 0x00, true, - std::vector()); + VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x02, true, 0x03, 0x04, + 0x05, std::vector()); std::vector data; vp_config.WriteMP4(&data); - EXPECT_EQ( - std::vector(kExpectedVpCodecConfigurationData, - kExpectedVpCodecConfigurationData + - arraysize(kExpectedVpCodecConfigurationData)), - data); + EXPECT_EQ(std::vector(std::begin(kExpectedVpCodecConfigurationData), + std::end(kExpectedVpCodecConfigurationData)), + data); } TEST(VPCodecConfigurationRecordTest, WriteWebM) { @@ -64,18 +64,16 @@ TEST(VPCodecConfigurationRecordTest, WriteWebM) { 0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x03, 0x01, 0x08, - 0x04, 0x01, 0x03 + 0x04, 0x01, 0x02, }; - VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x03, 0x00, true, - std::vector()); + VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x02, true, 0x03, 0x04, + 0x05, std::vector()); std::vector data; vp_config.WriteWebM(&data); - EXPECT_EQ( - std::vector(kExpectedVpCodecConfigurationData, - kExpectedVpCodecConfigurationData + - arraysize(kExpectedVpCodecConfigurationData)), - data); + EXPECT_EQ(std::vector(std::begin(kExpectedVpCodecConfigurationData), + std::end(kExpectedVpCodecConfigurationData)), + data); } } // namespace media diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 95ff6e2e34..bf94410b8b 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1446,9 +1446,12 @@ bool CodecConfiguration::ReadWriteInternal(BoxBuffer* buffer) { // VPCodecConfiguration box inherits from FullBox instead of Box. The extra 4 // bytes are handled here. if (box_type == FOURCC_vpcC) { - uint32_t version_flags = 0; + // Only version 1 box is supported. + uint8_t vpcc_version = 1; + uint32_t version_flags = vpcc_version << 24; RCHECK(buffer->ReadWriteUInt32(&version_flags)); - RCHECK(version_flags == 0); + vpcc_version = version_flags >> 24; + RCHECK(vpcc_version == 1); } if (buffer->Reading()) { diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc index 800e34a9ec..249d2953d1 100644 --- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc +++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc @@ -821,7 +821,7 @@ TEST_F(WebMClusterParserTest, ParseVP8) { ASSERT_EQ(2u, streams_from_init_event_.size()); EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type()); EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type()); - EXPECT_EQ("vp08.01.00.08.01.01.00.00", + EXPECT_EQ("vp08.01.10.08.01.02.02.02.00", streams_from_init_event_[1]->codec_string()); } @@ -835,7 +835,7 @@ TEST_F(WebMClusterParserTest, ParseVP9) { ASSERT_EQ(2u, streams_from_init_event_.size()); EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type()); EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type()); - EXPECT_EQ("vp09.03.00.12.00.03.00.00", + EXPECT_EQ("vp09.03.10.12.03.02.02.02.00", streams_from_init_event_[1]->codec_string()); }