Compute and set VP9 Level if it is not already set

The VP9 level is computed when the container is missing a codec config
or if the level is missing from the codec config.

This fixes VP9 in ISO-BMFF files generated by FFmpeg v4.0.2 or earlier
which does not have level set in the codec config.

Fixes #469.

Change-Id: I685bfd48be16ee6b2209da1c3173f7d6bb02b36a
This commit is contained in:
KongQun Yang 2018-09-11 14:53:55 -07:00
parent ea9379dc54
commit ad805c804c
18 changed files with 181 additions and 11 deletions

View File

@ -20,7 +20,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b"> <ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh> <cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection> </ContentProtection>
<Representation id="1" bandwidth="231745" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/mp4" sar="427:320"> <Representation id="1" bandwidth="231745" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/mp4" sar="427:320">
<BaseURL>bear-320x240-vp9-opus-video.mp4</BaseURL> <BaseURL>bear-320x240-vp9-opus-video.mp4</BaseURL>
<SegmentBase indexRange="1063-1130" timescale="1000000" presentationTimeOffset="37000"> <SegmentBase indexRange="1063-1130" timescale="1000000" presentationTimeOffset="37000">
<Initialization range="0-1062"/> <Initialization range="0-1062"/>

View File

@ -3,7 +3,7 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.7360000610351562S"> <MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.7360000610351562S">
<Period id="0"> <Period id="0">
<AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/33000" subsegmentAlignment="true" par="4:3"> <AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/33000" subsegmentAlignment="true" par="4:3">
<Representation id="0" bandwidth="196039" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/webm" sar="1:1"> <Representation id="0" bandwidth="196039" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-vp9-blockgroup-video.webm</BaseURL> <BaseURL>bear-vp9-blockgroup-video.webm</BaseURL>
<SegmentBase indexRange="302-320" timescale="1000000"> <SegmentBase indexRange="302-320" timescale="1000000">
<Initialization range="0-301"/> <Initialization range="0-301"/>

View File

@ -12,7 +12,7 @@
</Representation> </Representation>
</AdaptationSet> </AdaptationSet>
<AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" subsegmentAlignment="true" par="16:9"> <AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" subsegmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="225727" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/webm" sar="427:320"> <Representation id="1" bandwidth="225727" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/webm" sar="427:320">
<BaseURL>bear-320x240-vp9-opus-video.webm</BaseURL> <BaseURL>bear-320x240-vp9-opus-video.webm</BaseURL>
<SegmentBase indexRange="302-350" timescale="1000000" presentationTimeOffset="37000"> <SegmentBase indexRange="302-350" timescale="1000000" presentationTimeOffset="37000">
<Initialization range="0-301"/> <Initialization range="0-301"/>

View File

@ -6,7 +6,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536"> <ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh> <cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection> </ContentProtection>
<Representation id="0" bandwidth="184009" codecs="vp09.00.10.08.00.02.02.02.00" mimeType="video/webm" sar="1:1"> <Representation id="0" bandwidth="184009" codecs="vp09.00.11.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL> <BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL>
<SegmentBase indexRange="353-371" timescale="1000000"> <SegmentBase indexRange="353-371" timescale="1000000">
<Initialization range="0-352"/> <Initialization range="0-352"/>

View File

@ -6,7 +6,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536"> <ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh> <cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection> </ContentProtection>
<Representation id="0" bandwidth="184009" codecs="vp09.00.10.08.00.02.02.02.00" mimeType="video/webm" sar="1:1"> <Representation id="0" bandwidth="184009" codecs="vp09.00.11.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL> <BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL>
<SegmentBase indexRange="353-371" timescale="1000000"> <SegmentBase indexRange="353-371" timescale="1000000">
<Initialization range="0-352"/> <Initialization range="0-352"/>

View File

@ -54,6 +54,79 @@ void MergeField(const std::string& name,
} }
} }
enum VP9Level {
LEVEL_UNKNOWN = 0,
LEVEL_1 = 10,
LEVEL_1_1 = 11,
LEVEL_2 = 20,
LEVEL_2_1 = 21,
LEVEL_3 = 30,
LEVEL_3_1 = 31,
LEVEL_4 = 40,
LEVEL_4_1 = 41,
LEVEL_5 = 50,
LEVEL_5_1 = 51,
LEVEL_5_2 = 52,
LEVEL_6 = 60,
LEVEL_6_1 = 61,
LEVEL_6_2 = 62,
LEVEL_MAX = 255
};
struct VP9LevelCharacteristics {
uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size;
double max_avg_bitrate;
double max_cpb_size;
double min_compression_ratio;
uint8_t max_num_column_tiles;
uint32_t min_altref_distance;
uint8_t max_ref_frame_buffers;
};
struct VP9LevelDefinition {
VP9Level level;
VP9LevelCharacteristics characteristics;
};
VP9Level LevelFromCharacteristics(uint64_t luma_sample_rate,
uint32_t luma_picture_size) {
// https://www.webmproject.org/vp9/levels/.
const VP9LevelDefinition vp9_level_definitions[] = {
{LEVEL_1, {829440, 36864, 200, 400, 2, 1, 4, 8}},
{LEVEL_1_1, {2764800, 73728, 800, 1000, 2, 1, 4, 8}},
{LEVEL_2, {4608000, 122880, 1800, 1500, 2, 1, 4, 8}},
{LEVEL_2_1, {9216000, 245760, 3600, 2800, 2, 2, 4, 8}},
{LEVEL_3, {20736000, 552960, 7200, 6000, 2, 4, 4, 8}},
{LEVEL_3_1, {36864000, 983040, 12000, 10000, 2, 4, 4, 8}},
{LEVEL_4, {83558400, 2228224, 18000, 16000, 4, 4, 4, 8}},
{LEVEL_4_1, {160432128, 2228224, 30000, 18000, 4, 4, 5, 6}},
{LEVEL_5, {311951360, 8912896, 60000, 36000, 6, 8, 6, 4}},
{LEVEL_5_1, {588251136, 8912896, 120000, 46000, 8, 8, 10, 4}},
{LEVEL_5_2, {1176502272, 8912896, 180000, 90000, 8, 8, 10, 4}},
{LEVEL_6, {1176502272, 35651584, 180000, 90000, 8, 16, 10, 4}},
{LEVEL_6_1, {2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4}},
{LEVEL_6_2, {4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4}},
};
for (const VP9LevelDefinition& def : vp9_level_definitions) {
// All the characteristic fields except max_luma_sample_rate and
// max_luma_picture_size are ignored to avoid the extra complexities of
// computing those values. It may result in incorrect level being returned.
// If this is a problem, please file a bug to
// https://github.com/google/shaka-packager/issues.
if (luma_sample_rate <= def.characteristics.max_luma_sample_rate &&
luma_picture_size <= def.characteristics.max_luma_picture_size) {
return def.level;
}
}
LOG(WARNING) << "Cannot determine VP9 level for luma_sample_rate ("
<< luma_sample_rate << ") or luma_picture_size ("
<< luma_picture_size << "). Returning LEVEL_1.";
return LEVEL_1;
}
} // namespace } // namespace
VPCodecConfigurationRecord::VPCodecConfigurationRecord() {} VPCodecConfigurationRecord::VPCodecConfigurationRecord() {}
@ -154,6 +227,26 @@ bool VPCodecConfigurationRecord::ParseWebM(const std::vector<uint8_t>& data) {
return true; return true;
} }
void VPCodecConfigurationRecord::SetVP9Level(uint16_t width,
uint16_t height,
double sample_duration_seconds) {
// https://www.webmproject.org/vp9/levels/.
const uint32_t luma_picture_size = width * height;
// Alt-Ref frames are not taken into consideration intentionally to avoid the
// extra complexities. It may result in smaller luma_sample_rate may than the
// actual luma_sample_rate, leading to incorrect level being returned.
// If this is a problem, please file a bug to
// https://github.com/google/shaka-packager/issues.
const double kUnknownSampleDuration = 0.0;
// The decision is based on luma_picture_size only if duration is unknown.
uint64_t luma_sample_rate = 0;
if (sample_duration_seconds != kUnknownSampleDuration)
luma_sample_rate = luma_picture_size / sample_duration_seconds;
level_ = LevelFromCharacteristics(luma_sample_rate, luma_picture_size);
}
void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const { void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const {
BufferWriter writer; BufferWriter writer;
writer.AppendInt(profile()); writer.AppendInt(profile());

View File

@ -187,6 +187,11 @@ class VPCodecConfigurationRecord {
/// @return false if there is parsing errors. /// @return false if there is parsing errors.
bool ParseWebM(const std::vector<uint8_t>& data); bool ParseWebM(const std::vector<uint8_t>& data);
/// Compute and set VP9 Level based on the input attributes.
void SetVP9Level(uint16_t width,
uint16_t height,
double sample_duration_seconds);
/// @param data should not be null. /// @param data should not be null.
/// Writes VP codec configuration record to buffer using MP4 format. /// Writes VP codec configuration record to buffer using MP4 format.
void WriteMP4(std::vector<uint8_t>* data) const; void WriteMP4(std::vector<uint8_t>* data) const;

View File

@ -161,5 +161,57 @@ TEST(VPCodecConfigurationRecordTest, MergeChromaSubsampling) {
EXPECT_EQ(AVCHROMA_LOC_TOPLEFT, vp_config.chroma_location()); EXPECT_EQ(AVCHROMA_LOC_TOPLEFT, vp_config.chroma_location());
} }
TEST(VPCodecConfigurationRecordTest, SetLevel) {
const uint8_t kUnknownLevel = 0;
VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true,
0x03, 0x04, 0x05,
std::vector<uint8_t>());
ASSERT_EQ(kUnknownLevel, vp_config.level());
// kExamples are copied from https://www.webmproject.org/vp9/levels/.
struct {
int expected_level;
int width;
int height;
int frame_rate;
} kExamples[] = {
{10, 256, 144, 15}, {11, 384, 192, 30}, {20, 480, 256, 30},
{21, 640, 384, 30}, {30, 1080, 512, 30}, {31, 1280, 768, 30},
{40, 2048, 1088, 30}, {41, 2048, 1088, 60}, {50, 4096, 2176, 30},
{51, 4096, 2176, 60}, {52, 4096, 2176, 120}, {60, 8192, 4352, 30},
{61, 8192, 4352, 60}, {62, 8192, 4352, 120},
};
for (const auto& example : kExamples) {
vp_config.SetVP9Level(example.width, example.height,
1.0 / example.frame_rate);
ASSERT_EQ(example.expected_level, vp_config.level());
}
}
TEST(VPCodecConfigurationRecordTest, SetLevelWithUnknownFrameDuration) {
const uint8_t kUnknownLevel = 0;
VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true,
0x03, 0x04, 0x05,
std::vector<uint8_t>());
ASSERT_EQ(kUnknownLevel, vp_config.level());
// kExamples are modified from https://www.webmproject.org/vp9/levels/ with
// frame rate removed.
struct {
int expected_level;
int width;
int height;
} kExamples[] = {
{10, 256, 144}, {11, 384, 192}, {20, 480, 256},
{21, 640, 384}, {30, 1080, 512}, {31, 1280, 768},
{40, 2048, 1088}, {50, 4096, 2176}, {60, 8192, 4352},
};
for (const auto& example : kExamples) {
const int kUnknownFrameDuration = 0;
vp_config.SetVP9Level(example.width, example.height, kUnknownFrameDuration);
ASSERT_EQ(example.expected_level, vp_config.level());
}
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -503,6 +503,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
if (desc_idx >= samp_descr.video_entries.size()) if (desc_idx >= samp_descr.video_entries.size())
desc_idx = 0; desc_idx = 0;
const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx]; const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
std::vector<uint8_t> codec_configuration_data =
entry.codec_configuration.data;
uint32_t coded_width = entry.width; uint32_t coded_width = entry.width;
uint32_t coded_height = entry.height; uint32_t coded_height = entry.height;
@ -521,7 +523,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_avc1: case FOURCC_avc1:
case FOURCC_avc3: { case FOURCC_avc3: {
AVCDecoderConfigurationRecord avc_config; AVCDecoderConfigurationRecord avc_config;
if (!avc_config.Parse(entry.codec_configuration.data)) { if (!avc_config.Parse(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse avcc."; LOG(ERROR) << "Failed to parse avcc.";
return false; return false;
} }
@ -558,7 +560,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_hev1: case FOURCC_hev1:
case FOURCC_hvc1: { case FOURCC_hvc1: {
HEVCDecoderConfigurationRecord hevc_config; HEVCDecoderConfigurationRecord hevc_config;
if (!hevc_config.Parse(entry.codec_configuration.data)) { if (!hevc_config.Parse(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse hevc."; LOG(ERROR) << "Failed to parse hevc.";
return false; return false;
} }
@ -570,10 +572,17 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_vp09: case FOURCC_vp09:
case FOURCC_vp10: { case FOURCC_vp10: {
VPCodecConfigurationRecord vp_config; VPCodecConfigurationRecord vp_config;
if (!vp_config.ParseMP4(entry.codec_configuration.data)) { if (!vp_config.ParseMP4(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse vpcc."; LOG(ERROR) << "Failed to parse vpcc.";
return false; return false;
} }
if (actual_format == FOURCC_vp09 &&
(!vp_config.is_level_set() || vp_config.level() == 0)) {
const double kUnknownSampleDuration = 0.0;
vp_config.SetVP9Level(coded_width, coded_height,
kUnknownSampleDuration);
vp_config.WriteMP4(&codec_configuration_data);
}
codec_string = vp_config.GetCodecString(video_codec); codec_string = vp_config.GetCodecString(video_codec);
break; break;
} }
@ -597,9 +606,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo( std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
track->header.track_id, timescale, duration, video_codec, track->header.track_id, timescale, duration, video_codec,
GetH26xStreamFormat(actual_format), codec_string, GetH26xStreamFormat(actual_format), codec_string,
entry.codec_configuration.data.data(), codec_configuration_data.data(), codec_configuration_data.size(),
entry.codec_configuration.data.size(), coded_width, coded_height, coded_width, coded_height, pixel_width, pixel_height,
pixel_width, pixel_height,
0, // trick_play_factor 0, // trick_play_factor
nalu_length_size, track->media.header.language.code, is_encrypted)); nalu_length_size, track->media.header.language.code, is_encrypted));

View File

@ -229,6 +229,18 @@ bool WebMTracksParser::OnListEnd(int id) {
video_track_num_, codec_id_, !video_encryption_key_id_.empty()); video_track_num_, codec_id_, !video_encryption_key_id_.empty());
if (!video_stream_info_) if (!video_stream_info_)
return false; return false;
if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9") {
vp_config_ = video_client_.GetVpCodecConfig(codec_private_);
const double kNanosecondsPerSecond = 1000000000.0;
if (codec_id_ == "V_VP9" &&
(!vp_config_.is_level_set() || vp_config_.level() == 0)) {
vp_config_.SetVP9Level(
video_stream_info_->width(), video_stream_info_->height(),
video_default_duration_ / kNanosecondsPerSecond);
}
}
} else { } else {
DLOG(INFO) << "Ignoring video track " << track_num_; DLOG(INFO) << "Ignoring video track " << track_num_;
ignored_tracks_.insert(track_num_); ignored_tracks_.insert(track_num_);