From d0978b3937d65fd93d587f6f2cfe7f110f0b2bc6 Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Tue, 11 Sep 2018 14:53:55 -0700 Subject: [PATCH] Compute and set VP9 Level if it is not already set The VP9 level is computed when the container is missing a codec config or if the level is missing from the codec config. This fixes VP9 in ISO-BMFF files generated by FFmpeg v4.0.2 or earlier which does not have level set in the codec config. Fixes #469. Change-Id: I685bfd48be16ee6b2209da1c3173f7d6bb02b36a --- .../bear-320x240-vp9-opus-video.mp4 | Bin 71890 -> 71890 bytes ...ecrypted-bear-320x240-vp9-opus-video-0.mp4 | Bin 70698 -> 70698 bytes .../opus-vp9-mp4-with-encryption/output.mpd | 2 +- .../bear-vp9-blockgroup-video.webm | Bin 67366 -> 67366 bytes .../vp9-webm-with-blockgroup/output.mpd | 2 +- .../vp9-webm/bear-320x240-vp9-opus-video.webm | Bin 69549 -> 69549 bytes .../app/test/testdata/vp9-webm/output.mpd | 2 +- .../bear-320x180-vp9-altref-video.webm | Bin 64062 -> 64062 bytes ...ypted-bear-320x180-vp9-altref-video-0.webm | Bin 63928 -> 63928 bytes .../webm-subsample-encryption/output.mpd | 2 +- .../bear-320x180-vp9-altref-video.webm | Bin 64062 -> 64062 bytes ...ypted-bear-320x180-vp9-altref-video-0.webm | Bin 63928 -> 63928 bytes .../output.mpd | 2 +- .../codecs/vp_codec_configuration_record.cc | 93 ++++++++++++++++++ .../codecs/vp_codec_configuration_record.h | 5 + .../vp_codec_configuration_record_unittest.cc | 52 ++++++++++ .../media/formats/mp4/mp4_media_parser.cc | 22 +++-- .../media/formats/webm/webm_tracks_parser.cc | 16 ++- 18 files changed, 182 insertions(+), 16 deletions(-) diff --git a/packager/app/test/testdata/opus-vp9-mp4-with-encryption/bear-320x240-vp9-opus-video.mp4 b/packager/app/test/testdata/opus-vp9-mp4-with-encryption/bear-320x240-vp9-opus-video.mp4 index 199276410d5c369c63bf974f4c2c33b5a88c9ed2..eca0bdb0f27b963f687201b85b6a10f66e3fe451 100644 GIT binary patch delta 30 mcmcb#k>%1xmJLlzj3SeLnI$LBVcNhb((KE;-ItlMRTKcOGYP){ delta 30 mcmcb#k>%1xmJLlzj9im_nI$LBVcNjR)$Gf>-ItlMRTKcN2MMbH diff --git a/packager/app/test/testdata/opus-vp9-mp4-with-encryption/decrypted-bear-320x240-vp9-opus-video-0.mp4 b/packager/app/test/testdata/opus-vp9-mp4-with-encryption/decrypted-bear-320x240-vp9-opus-video-0.mp4 index a7190fb2018ad2bb77d447705da91b49af3a16d0..987cf6ed2db856221656ad1148f5366e9aad199d 100644 GIT binary patch delta 20 ccmZ3rf@ReTmJLlzj3Uj=Oxv587?%qI08&c^^8f$< delta 20 ccmZ3rf@ReTmJLlzj9kslOxv587?%qI08!@#=>Px# diff --git a/packager/app/test/testdata/opus-vp9-mp4-with-encryption/output.mpd b/packager/app/test/testdata/opus-vp9-mp4-with-encryption/output.mpd index fcf3d57d14..b2547407cf 100644 --- a/packager/app/test/testdata/opus-vp9-mp4-with-encryption/output.mpd +++ b/packager/app/test/testdata/opus-vp9-mp4-with-encryption/output.mpd @@ -20,7 +20,7 @@ AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== - + bear-320x240-vp9-opus-video.mp4 diff --git a/packager/app/test/testdata/vp9-webm-with-blockgroup/bear-vp9-blockgroup-video.webm b/packager/app/test/testdata/vp9-webm-with-blockgroup/bear-vp9-blockgroup-video.webm index d9a6961ca63e236871aabb6a9f830f882c81fc37..4371b71385e72275ebbefcb6be743aed7fa05ac4 100644 GIT binary patch delta 20 bcmZ41$Fi)CWrHXqqe!zD<90DdMgcYeLB|Bk delta 20 bcmZ41$Fi)CWrHXqBUiH+<90DdMgcYeL1P5L diff --git a/packager/app/test/testdata/vp9-webm-with-blockgroup/output.mpd b/packager/app/test/testdata/vp9-webm-with-blockgroup/output.mpd index 99aad3ec97..69803111d3 100644 --- a/packager/app/test/testdata/vp9-webm-with-blockgroup/output.mpd +++ b/packager/app/test/testdata/vp9-webm-with-blockgroup/output.mpd @@ -3,7 +3,7 @@ - + bear-vp9-blockgroup-video.webm diff --git a/packager/app/test/testdata/vp9-webm/bear-320x240-vp9-opus-video.webm b/packager/app/test/testdata/vp9-webm/bear-320x240-vp9-opus-video.webm index bf9c2ee34cbeda7f2e4157dc031cb00e2a5353c7..74942ee6d2a92569c1d8b7937f73f6cef7f2573a 100644 GIT binary patch delta 20 ccmZ2GpJnZQmJOnej3UiqjN8Q+8K?6B07f$fiU0rr delta 20 ccmZ2GpJnZQmJOnej9krPjN8Q+8K?6B07cIQfB*mh diff --git a/packager/app/test/testdata/vp9-webm/output.mpd b/packager/app/test/testdata/vp9-webm/output.mpd index 4a1df9ba95..6564a760a3 100644 --- a/packager/app/test/testdata/vp9-webm/output.mpd +++ b/packager/app/test/testdata/vp9-webm/output.mpd @@ -12,7 +12,7 @@ - + bear-320x240-vp9-opus-video.webm diff --git a/packager/app/test/testdata/webm-subsample-encryption/bear-320x180-vp9-altref-video.webm b/packager/app/test/testdata/webm-subsample-encryption/bear-320x180-vp9-altref-video.webm index 85a7f30c15f53fd2e87afd08dad2ab631df86864..b1bc299d75e8557a711113b3e0004ea453ea7f12 100644 GIT binary patch delta 16 Ycmdn@g?Zl><_)5ZjNF^W7&U$Z06eY+C;$Ke delta 16 Ycmdn@g?Zl><_)5Zj9i<=7&U$Z06eG$CjbBd diff --git a/packager/app/test/testdata/webm-subsample-encryption/decrypted-bear-320x180-vp9-altref-video-0.webm b/packager/app/test/testdata/webm-subsample-encryption/decrypted-bear-320x180-vp9-altref-video-0.webm index f53aee8719e39512cd2bfc227dc8c19aaadd9664..4de594691ef01497ee867a0fe0f512109639d3ad 100644 GIT binary patch delta 16 Ycmdn-nR&-&<_)5ZjNF^W7#IBj06f?RlK=n! delta 16 Ycmdn-nR&-&<_)5Zj9i<=7#IBj06fwLk^lez diff --git a/packager/app/test/testdata/webm-subsample-encryption/output.mpd b/packager/app/test/testdata/webm-subsample-encryption/output.mpd index f2c8a061ae..6050e51404 100644 --- a/packager/app/test/testdata/webm-subsample-encryption/output.mpd +++ b/packager/app/test/testdata/webm-subsample-encryption/output.mpd @@ -6,7 +6,7 @@ AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== - + bear-320x180-vp9-altref-video.webm diff --git a/packager/app/test/testdata/webm-vp9-full-sample-encryption/bear-320x180-vp9-altref-video.webm b/packager/app/test/testdata/webm-vp9-full-sample-encryption/bear-320x180-vp9-altref-video.webm index 85a7f30c15f53fd2e87afd08dad2ab631df86864..b1bc299d75e8557a711113b3e0004ea453ea7f12 100644 GIT binary patch delta 16 Ycmdn@g?Zl><_)5ZjNF^W7&U$Z06eY+C;$Ke delta 16 Ycmdn@g?Zl><_)5Zj9i<=7&U$Z06eG$CjbBd diff --git a/packager/app/test/testdata/webm-vp9-full-sample-encryption/decrypted-bear-320x180-vp9-altref-video-0.webm b/packager/app/test/testdata/webm-vp9-full-sample-encryption/decrypted-bear-320x180-vp9-altref-video-0.webm index f53aee8719e39512cd2bfc227dc8c19aaadd9664..4de594691ef01497ee867a0fe0f512109639d3ad 100644 GIT binary patch delta 16 Ycmdn-nR&-&<_)5ZjNF^W7#IBj06f?RlK=n! delta 16 Ycmdn-nR&-&<_)5Zj9i<=7#IBj06fwLk^lez diff --git a/packager/app/test/testdata/webm-vp9-full-sample-encryption/output.mpd b/packager/app/test/testdata/webm-vp9-full-sample-encryption/output.mpd index f2c8a061ae..6050e51404 100644 --- a/packager/app/test/testdata/webm-vp9-full-sample-encryption/output.mpd +++ b/packager/app/test/testdata/webm-vp9-full-sample-encryption/output.mpd @@ -6,7 +6,7 @@ AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA== - + bear-320x180-vp9-altref-video.webm diff --git a/packager/media/codecs/vp_codec_configuration_record.cc b/packager/media/codecs/vp_codec_configuration_record.cc index ec6c26cd6f..c5c0c31714 100644 --- a/packager/media/codecs/vp_codec_configuration_record.cc +++ b/packager/media/codecs/vp_codec_configuration_record.cc @@ -52,6 +52,79 @@ void MergeField(const std::string& name, } } +enum VP9Level { + LEVEL_UNKNOWN = 0, + LEVEL_1 = 10, + LEVEL_1_1 = 11, + LEVEL_2 = 20, + LEVEL_2_1 = 21, + LEVEL_3 = 30, + LEVEL_3_1 = 31, + LEVEL_4 = 40, + LEVEL_4_1 = 41, + LEVEL_5 = 50, + LEVEL_5_1 = 51, + LEVEL_5_2 = 52, + LEVEL_6 = 60, + LEVEL_6_1 = 61, + LEVEL_6_2 = 62, + LEVEL_MAX = 255 +}; + +struct VP9LevelCharacteristics { + uint64_t max_luma_sample_rate; + uint32_t max_luma_picture_size; + double max_avg_bitrate; + double max_cpb_size; + double min_compression_ratio; + uint8_t max_num_column_tiles; + uint32_t min_altref_distance; + uint8_t max_ref_frame_buffers; +}; + +struct VP9LevelDefinition { + VP9Level level; + VP9LevelCharacteristics characteristics; +}; + +VP9Level LevelFromCharacteristics(uint64_t luma_sample_rate, + uint32_t luma_picture_size) { + // https://www.webmproject.org/vp9/levels/. + const VP9LevelDefinition vp9_level_definitions[] = { + {LEVEL_1, {829440, 36864, 200, 400, 2, 1, 4, 8}}, + {LEVEL_1_1, {2764800, 73728, 800, 1000, 2, 1, 4, 8}}, + {LEVEL_2, {4608000, 122880, 1800, 1500, 2, 1, 4, 8}}, + {LEVEL_2_1, {9216000, 245760, 3600, 2800, 2, 2, 4, 8}}, + {LEVEL_3, {20736000, 552960, 7200, 6000, 2, 4, 4, 8}}, + {LEVEL_3_1, {36864000, 983040, 12000, 10000, 2, 4, 4, 8}}, + {LEVEL_4, {83558400, 2228224, 18000, 16000, 4, 4, 4, 8}}, + {LEVEL_4_1, {160432128, 2228224, 30000, 18000, 4, 4, 5, 6}}, + {LEVEL_5, {311951360, 8912896, 60000, 36000, 6, 8, 6, 4}}, + {LEVEL_5_1, {588251136, 8912896, 120000, 46000, 8, 8, 10, 4}}, + {LEVEL_5_2, {1176502272, 8912896, 180000, 90000, 8, 8, 10, 4}}, + {LEVEL_6, {1176502272, 35651584, 180000, 90000, 8, 16, 10, 4}}, + {LEVEL_6_1, {2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4}}, + {LEVEL_6_2, {4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4}}, + }; + + for (const VP9LevelDefinition& def : vp9_level_definitions) { + // All the characteristic fields except max_luma_sample_rate and + // max_luma_picture_size are ignored to avoid the extra complexities of + // computing those values. It may result in incorrect level being returned. + // If this is a problem, please file a bug to + // https://github.com/google/shaka-packager/issues. + if (luma_sample_rate <= def.characteristics.max_luma_sample_rate && + luma_picture_size <= def.characteristics.max_luma_picture_size) { + return def.level; + } + } + + LOG(WARNING) << "Cannot determine VP9 level for luma_sample_rate (" + << luma_sample_rate << ") or luma_picture_size (" + << luma_picture_size << "). Returning LEVEL_1."; + return LEVEL_1; +} + } // namespace VPCodecConfigurationRecord::VPCodecConfigurationRecord() {} @@ -152,6 +225,26 @@ bool VPCodecConfigurationRecord::ParseWebM(const std::vector& data) { return true; } +void VPCodecConfigurationRecord::SetVP9Level(uint16_t width, + uint16_t height, + double sample_duration_seconds) { + // https://www.webmproject.org/vp9/levels/. + + const uint32_t luma_picture_size = width * height; + // Alt-Ref frames are not taken into consideration intentionally to avoid the + // extra complexities. It may result in smaller luma_sample_rate may than the + // actual luma_sample_rate, leading to incorrect level being returned. + // If this is a problem, please file a bug to + // https://github.com/google/shaka-packager/issues. + const double kUnknownSampleDuration = 0.0; + // The decision is based on luma_picture_size only if duration is unknown. + uint64_t luma_sample_rate = 0; + if (sample_duration_seconds != kUnknownSampleDuration) + luma_sample_rate = luma_picture_size / sample_duration_seconds; + + level_ = LevelFromCharacteristics(luma_sample_rate, luma_picture_size); +} + void VPCodecConfigurationRecord::WriteMP4(std::vector* data) const { BufferWriter writer; writer.AppendInt(profile()); diff --git a/packager/media/codecs/vp_codec_configuration_record.h b/packager/media/codecs/vp_codec_configuration_record.h index 798c3dfda5..ab6df911c7 100644 --- a/packager/media/codecs/vp_codec_configuration_record.h +++ b/packager/media/codecs/vp_codec_configuration_record.h @@ -187,6 +187,11 @@ class VPCodecConfigurationRecord { /// @return false if there is parsing errors. bool ParseWebM(const std::vector& data); + /// Compute and set VP9 Level based on the input attributes. + void SetVP9Level(uint16_t width, + uint16_t height, + double sample_duration_seconds); + /// @param data should not be null. /// Writes VP codec configuration record to buffer using MP4 format. void WriteMP4(std::vector* data) const; diff --git a/packager/media/codecs/vp_codec_configuration_record_unittest.cc b/packager/media/codecs/vp_codec_configuration_record_unittest.cc index a8f41a747b..a180f633b2 100644 --- a/packager/media/codecs/vp_codec_configuration_record_unittest.cc +++ b/packager/media/codecs/vp_codec_configuration_record_unittest.cc @@ -161,5 +161,57 @@ TEST(VPCodecConfigurationRecordTest, MergeChromaSubsampling) { EXPECT_EQ(AVCHROMA_LOC_TOPLEFT, vp_config.chroma_location()); } +TEST(VPCodecConfigurationRecordTest, SetLevel) { + const uint8_t kUnknownLevel = 0; + VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true, + 0x03, 0x04, 0x05, + std::vector()); + ASSERT_EQ(kUnknownLevel, vp_config.level()); + + // kExamples are copied from https://www.webmproject.org/vp9/levels/. + struct { + int expected_level; + int width; + int height; + int frame_rate; + } kExamples[] = { + {10, 256, 144, 15}, {11, 384, 192, 30}, {20, 480, 256, 30}, + {21, 640, 384, 30}, {30, 1080, 512, 30}, {31, 1280, 768, 30}, + {40, 2048, 1088, 30}, {41, 2048, 1088, 60}, {50, 4096, 2176, 30}, + {51, 4096, 2176, 60}, {52, 4096, 2176, 120}, {60, 8192, 4352, 30}, + {61, 8192, 4352, 60}, {62, 8192, 4352, 120}, + }; + for (const auto& example : kExamples) { + vp_config.SetVP9Level(example.width, example.height, + 1.0 / example.frame_rate); + ASSERT_EQ(example.expected_level, vp_config.level()); + } +} + +TEST(VPCodecConfigurationRecordTest, SetLevelWithUnknownFrameDuration) { + const uint8_t kUnknownLevel = 0; + VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true, + 0x03, 0x04, 0x05, + std::vector()); + ASSERT_EQ(kUnknownLevel, vp_config.level()); + + // kExamples are modified from https://www.webmproject.org/vp9/levels/ with + // frame rate removed. + struct { + int expected_level; + int width; + int height; + } kExamples[] = { + {10, 256, 144}, {11, 384, 192}, {20, 480, 256}, + {21, 640, 384}, {30, 1080, 512}, {31, 1280, 768}, + {40, 2048, 1088}, {50, 4096, 2176}, {60, 8192, 4352}, + }; + for (const auto& example : kExamples) { + const int kUnknownFrameDuration = 0; + vp_config.SetVP9Level(example.width, example.height, kUnknownFrameDuration); + ASSERT_EQ(example.expected_level, vp_config.level()); + } +} + } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 1e41ff0d71..8d16d0ad0f 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -504,6 +504,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { if (desc_idx >= samp_descr.video_entries.size()) desc_idx = 0; const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx]; + std::vector codec_configuration_data = + entry.codec_configuration.data; uint32_t coded_width = entry.width; uint32_t coded_height = entry.height; @@ -521,7 +523,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { switch (actual_format) { case FOURCC_av01: { AV1CodecConfigurationRecord av1_config; - if (!av1_config.Parse(entry.codec_configuration.data)) { + if (!av1_config.Parse(codec_configuration_data)) { LOG(ERROR) << "Failed to parse av1c."; return false; } @@ -531,7 +533,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_avc1: case FOURCC_avc3: { AVCDecoderConfigurationRecord avc_config; - if (!avc_config.Parse(entry.codec_configuration.data)) { + if (!avc_config.Parse(codec_configuration_data)) { LOG(ERROR) << "Failed to parse avcc."; return false; } @@ -568,7 +570,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_hev1: case FOURCC_hvc1: { HEVCDecoderConfigurationRecord hevc_config; - if (!hevc_config.Parse(entry.codec_configuration.data)) { + if (!hevc_config.Parse(codec_configuration_data)) { LOG(ERROR) << "Failed to parse hevc."; return false; } @@ -579,10 +581,17 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { case FOURCC_vp08: case FOURCC_vp09: { VPCodecConfigurationRecord vp_config; - if (!vp_config.ParseMP4(entry.codec_configuration.data)) { + if (!vp_config.ParseMP4(codec_configuration_data)) { LOG(ERROR) << "Failed to parse vpcc."; return false; } + if (actual_format == FOURCC_vp09 && + (!vp_config.is_level_set() || vp_config.level() == 0)) { + const double kUnknownSampleDuration = 0.0; + vp_config.SetVP9Level(coded_width, coded_height, + kUnknownSampleDuration); + vp_config.WriteMP4(&codec_configuration_data); + } codec_string = vp_config.GetCodecString(video_codec); break; } @@ -606,9 +615,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { std::shared_ptr video_stream_info(new VideoStreamInfo( track->header.track_id, timescale, duration, video_codec, GetH26xStreamFormat(actual_format), codec_string, - entry.codec_configuration.data.data(), - entry.codec_configuration.data.size(), coded_width, coded_height, - pixel_width, pixel_height, + codec_configuration_data.data(), codec_configuration_data.size(), + coded_width, coded_height, pixel_width, pixel_height, 0, // trick_play_factor nalu_length_size, track->media.header.language.code, is_encrypted)); diff --git a/packager/media/formats/webm/webm_tracks_parser.cc b/packager/media/formats/webm/webm_tracks_parser.cc index 84b5c26e5b..f9073d8d6f 100644 --- a/packager/media/formats/webm/webm_tracks_parser.cc +++ b/packager/media/formats/webm/webm_tracks_parser.cc @@ -223,16 +223,24 @@ bool WebMTracksParser::OnListEnd(int id) { } video_default_duration_ = default_duration_; - // |vp_config_| is only useful for VP8 and VP9. - if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9") - vp_config_ = video_client_.GetVpCodecConfig(codec_private_); - DCHECK(!video_stream_info_); video_stream_info_ = video_client_.GetVideoStreamInfo( video_track_num_, codec_id_, codec_private_, !video_encryption_key_id_.empty()); if (!video_stream_info_) return false; + + if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9") { + vp_config_ = video_client_.GetVpCodecConfig(codec_private_); + const double kNanosecondsPerSecond = 1000000000.0; + if (codec_id_ == "V_VP9" && + (!vp_config_.is_level_set() || vp_config_.level() == 0)) { + vp_config_.SetVP9Level( + video_stream_info_->width(), video_stream_info_->height(), + video_default_duration_ / kNanosecondsPerSecond); + } + } + } else { DLOG(INFO) << "Ignoring video track " << track_num_; ignored_tracks_.insert(track_num_);