Properly handle AVC profiles with SPS extension

Part of #755 to improve DASH spec compliance.
This commit is contained in:
koln67 2020-08-06 00:08:53 +00:00 committed by GitHub
parent cbc251c621
commit caa47e374d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 301 additions and 42 deletions

View File

@ -15,18 +15,18 @@
<AdaptationSet id="1" contentType="video" maxWidth="1280" maxHeight="720" frameRate="30000/1001" subsegmentAlignment="true" par="16:9">
<SupplementalProperty schemeIdUri="urn:mpeg:dash:adaptation-set-switching:2016" value="0"/>
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main"/>
<Representation id="1" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<BaseURL>bear-640x360-video.mp4</BaseURL>
<SegmentBase indexRange="859-926" timescale="30000">
<Initialization range="0-858"/>
</SegmentBase>
</Representation>
<Representation id="2" bandwidth="2627285" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<Representation id="1" bandwidth="2627285" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<BaseURL>bear-1280x720-video.mp4</BaseURL>
<SegmentBase indexRange="858-925" timescale="30000">
<Initialization range="0-857"/>
</SegmentBase>
</Representation>
<Representation id="2" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<BaseURL>bear-640x360-video.mp4</BaseURL>
<SegmentBase indexRange="859-926" timescale="30000">
<Initialization range="0-858"/>
</SegmentBase>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" contentType="audio" subsegmentAlignment="true">
<Representation id="3" bandwidth="133334" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">

View File

@ -4,14 +4,14 @@
#EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-I-FRAMES-ONLY
#EXT-X-MAP:URI="bear-640x360-ac3-video.mp4",BYTERANGE="859@0"
#EXT-X-MAP:URI="bear-640x360-ac3-video.mp4",BYTERANGE="863@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:15581@927
#EXT-X-BYTERANGE:15581@931
bear-640x360-ac3-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:18221@100240
#EXT-X-BYTERANGE:18221@100244
bear-640x360-ac3-video.mp4
#EXTINF:0.734,
#EXT-X-BYTERANGE:19663@222047
#EXT-X-BYTERANGE:19663@222051
bear-640x360-ac3-video.mp4
#EXT-X-ENDLIST

View File

@ -3,9 +3,9 @@
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:2
#EXT-X-PLAYLIST-TYPE:VOD
#EXT-X-MAP:URI="bear-640x360-ac3-video.mp4",BYTERANGE="859@0"
#EXT-X-MAP:URI="bear-640x360-ac3-video.mp4",BYTERANGE="863@0"
#EXTINF:1.001,
#EXT-X-BYTERANGE:99313@927
#EXT-X-BYTERANGE:99313@931
bear-640x360-ac3-video.mp4
#EXTINF:1.001,
#EXT-X-BYTERANGE:121807

View File

@ -2,18 +2,8 @@
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT2S" type="dynamic" publishTime="some_time" availabilityStartTime="some_time" minimumUpdatePeriod="PT5S" timeShiftBufferDepth="PT1800S">
<Period id="0" start="PT0S">
<AdaptationSet id="0" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="0" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="1000" r="4"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="974122" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="30000/1001" segmentAlignment="true" par="16:9">
<Representation id="0" bandwidth="974122" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
@ -22,6 +12,16 @@
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="text" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="1" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="1000" r="4"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" contentType="audio" segmentAlignment="true">
<Representation id="2" bandwidth="133929" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>

View File

@ -7,8 +7,8 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="0" bandwidth="978382" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<Representation id="0" bandwidth="2632184" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<SegmentTemplate timescale="30000" initialization="bear-1280x720-video-init.mp4" media="bear-1280x720-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
<S t="60060" d="22022"/>
@ -23,8 +23,8 @@
</SegmentTimeline>
</SegmentTemplate>
</Representation>
<Representation id="2" bandwidth="2632184" codecs="avc1.64001f" mimeType="video/mp4" sar="1:1" width="1280" height="720">
<SegmentTemplate timescale="30000" initialization="bear-1280x720-video-init.mp4" media="bear-1280x720-video-$Number$.m4s" startNumber="1">
<Representation id="2" bandwidth="978382" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1" width="640" height="360">
<SegmentTemplate timescale="30000" initialization="bear-640x360-video-init.mp4" media="bear-640x360-video-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="30030" r="1"/>
<S t="60060" d="22022"/>
@ -37,13 +37,13 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="3" bandwidth="134272" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<Representation id="3" bandwidth="134015" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<SegmentTemplate timescale="44100" initialization="bear-640x360-audio-init.mp4" media="bear-640x360-audio-$Number$.m4s" startNumber="1">
<SegmentTemplate timescale="44100" initialization="bear-320x180-audio-init.mp4" media="bear-320x180-audio-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="45056"/>
<S t="45056" d="44032"/>
<S t="89088" d="31744"/>
<S t="89088" d="33792"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
@ -57,13 +57,13 @@
</SegmentTimeline>
</SegmentTemplate>
</Representation>
<Representation id="5" bandwidth="134015" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<Representation id="5" bandwidth="134272" codecs="mp4a.40.2" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<SegmentTemplate timescale="44100" initialization="bear-320x180-audio-init.mp4" media="bear-320x180-audio-$Number$.m4s" startNumber="1">
<SegmentTemplate timescale="44100" initialization="bear-640x360-audio-init.mp4" media="bear-640x360-audio-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="45056"/>
<S t="45056" d="44032"/>
<S t="89088" d="33792"/>
<S t="89088" d="31744"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>

View File

@ -82,6 +82,30 @@ bool AVCDecoderConfigurationRecord::ParseInternal() {
AddNalu(nalu);
}
if (profile_indication_ == 100 || profile_indication_ == 110 ||
profile_indication_ == 122 || profile_indication_ == 144) {
uint8_t sps_ext_count;
if (!reader.Read1(&chroma_format_) || !reader.Read1(&bit_depth_luma_minus8_) ||
!reader.Read1(&bit_depth_chroma_minus8_) || !reader.Read1(&sps_ext_count)) {
LOG(WARNING) << "Insufficient bits in bitstream for given AVC profile";
return true;
}
chroma_format_ &= 0x3;
bit_depth_luma_minus8_ &= 0x7;
bit_depth_chroma_minus8_ &= 0x7;
for (uint8_t i = 0; i < sps_ext_count; i++) {
uint16_t size = 0;
RCHECK(reader.Read2(&size));
const uint8_t* nalu_data = reader.data() + reader.pos();
RCHECK(reader.SkipBytes(size));
Nalu nalu;
RCHECK(nalu.Initialize(Nalu::kH264, nalu_data, size));
RCHECK(nalu.type() == Nalu::H264_SPSExtension);
AddNalu(nalu);
}
}
return true;
}

View File

@ -35,6 +35,9 @@ class AVCDecoderConfigurationRecord : public DecoderConfigurationRecord {
uint32_t coded_height() const { return coded_height_; }
uint32_t pixel_width() const { return pixel_width_; }
uint32_t pixel_height() const { return pixel_height_; }
uint8_t chroma_format() const { return chroma_format_; }
uint8_t bit_depth_luma_minus8() const { return bit_depth_luma_minus8_; }
uint8_t bit_depth_chroma_minus8() const { return bit_depth_chroma_minus8_; }
/// Static version of GetCodecString.
/// @return The codec string.
@ -56,6 +59,12 @@ class AVCDecoderConfigurationRecord : public DecoderConfigurationRecord {
uint32_t coded_height_ = 0;
uint32_t pixel_width_ = 0;
uint32_t pixel_height_ = 0;
// Only should be present for special case profile values.
// Refer to ISO/IEC 14496-15 Section 5.3.3.1.1.
uint8_t chroma_format_ = 0;
uint8_t bit_depth_luma_minus8_ = 0;
uint8_t bit_depth_chroma_minus8_ = 0;
DISALLOW_COPY_AND_ASSIGN(AVCDecoderConfigurationRecord);
};

View File

@ -45,11 +45,59 @@ TEST(AVCDecoderConfigurationRecordTest, Success) {
EXPECT_EQ(8u, avc_config.pixel_width());
EXPECT_EQ(9u, avc_config.pixel_height());
EXPECT_EQ(0u, avc_config.transfer_characteristics());
EXPECT_EQ(0u, avc_config.chroma_format());
EXPECT_EQ(0u, avc_config.bit_depth_luma_minus8());
EXPECT_EQ(0u, avc_config.bit_depth_chroma_minus8());
EXPECT_EQ("avc1.64001e", avc_config.GetCodecString(FOURCC_avc1));
EXPECT_EQ("avc3.64001e", avc_config.GetCodecString(FOURCC_avc3));
}
TEST(AVCDecoderConfigurationRecordTest, SuccessWithSPSExtension) {
// clang-format off
const uint8_t kAvcDecoderConfigurationData[] = {
0x01, // version
0x64, // profile_indication
0x00, // profile_compatibility
0x1E, // avc_level
0xFF, // Least significant 3 bits is length_size_minus_one
0xE1, // Least significant 5 bits is num_sps
// sps 1
0x00, 0x1D, // size
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0,
0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
0x60, 0x0F, 0x16, 0x2D, 0x96,
0x01, // num_pps
0x00, 0x06, // size
0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0,
0xFC, // Least significant 2 bits is chroma_format
0xF9, // Least significant 3 bits is bit_depth_luma_minus8
0xFF, // Least significant 3 bits is bit_depth_chroma_minus8
0x01, // num_sps_ext
0x00, 0x05, // size
0x6D, 0x33, 0x01, 0x57, 0x78
};
// clang-format on
AVCDecoderConfigurationRecord avc_config;
ASSERT_TRUE(avc_config.Parse(kAvcDecoderConfigurationData,
arraysize(kAvcDecoderConfigurationData)));
EXPECT_EQ(1u, avc_config.version());
EXPECT_EQ(0x64, avc_config.profile_indication());
EXPECT_EQ(0u, avc_config.profile_compatibility());
EXPECT_EQ(0x1E, avc_config.avc_level());
EXPECT_EQ(4u, avc_config.nalu_length_size());
EXPECT_EQ(720u, avc_config.coded_width());
EXPECT_EQ(360u, avc_config.coded_height());
EXPECT_EQ(8u, avc_config.pixel_width());
EXPECT_EQ(9u, avc_config.pixel_height());
EXPECT_EQ(0u, avc_config.transfer_characteristics());
EXPECT_EQ(0u, avc_config.chroma_format());
EXPECT_EQ(1u, avc_config.bit_depth_luma_minus8());
EXPECT_EQ(7u, avc_config.bit_depth_chroma_minus8());
}
TEST(AVCDecoderConfigurationRecordTest, SuccessWithTransferCharacteristics) {
// clang-format off
const uint8_t kAvcDecoderConfigurationData[] = {

View File

@ -15,6 +15,21 @@
namespace shaka {
namespace media {
namespace {
// utility helper function to get an sps
const H264Sps* ParseSpsFromBytes(const std::vector<uint8_t> sps,
H264Parser* parser) {
Nalu nalu;
if (!nalu.Initialize(Nalu::kH264, sps.data(), sps.size()))
return nullptr;
int sps_id = 0;
if (parser->ParseSps(nalu, &sps_id) != H264Parser::kOk)
return nullptr;
return parser->GetSps(sps_id);
}
} // namespace
H264ByteToUnitStreamConverter::H264ByteToUnitStreamConverter()
: H26xByteToUnitStreamConverter(Nalu::kH264) {}
@ -27,15 +42,14 @@ H264ByteToUnitStreamConverter::~H264ByteToUnitStreamConverter() {}
bool H264ByteToUnitStreamConverter::GetDecoderConfigurationRecord(
std::vector<uint8_t>* decoder_config) const {
DCHECK(decoder_config);
if ((last_sps_.size() < 4) || last_pps_.empty()) {
// No data available to construct AVCDecoderConfigurationRecord.
return false;
}
// Construct an AVCDecoderConfigurationRecord containing a single SPS and a
// single PPS NALU. Please refer to ISO/IEC 14496-15 for format specifics.
BufferWriter buffer(last_sps_.size() + last_pps_.size() + 11);
// Construct an AVCDecoderConfigurationRecord containing a single SPS, a
// single PPS, and if available, a single SPS Extension NALU.
// Please refer to ISO/IEC 14496-15 for format specifics.
BufferWriter buffer;
uint8_t version(1);
buffer.AppendInt(version);
buffer.AppendInt(last_sps_[1]);
@ -51,6 +65,32 @@ bool H264ByteToUnitStreamConverter::GetDecoderConfigurationRecord(
buffer.AppendInt(num_pps);
buffer.AppendInt(static_cast<uint16_t>(last_pps_.size()));
buffer.AppendVector(last_pps_);
// handle profile special cases, refer to ISO/IEC 14496-15 Section 5.3.3.1.2
uint8_t profile_indication = last_sps_[1];
if (profile_indication == 100 || profile_indication == 110 ||
profile_indication == 122 || profile_indication == 144) {
H264Parser parser;
const H264Sps* sps = ParseSpsFromBytes(last_sps_, &parser);
if (sps == nullptr)
return false;
uint8_t reserved_chroma_format = 0xfc | (sps->chroma_format_idc);
buffer.AppendInt(reserved_chroma_format);
uint8_t reserved_bit_depth_luma_minus8 = 0xf8 | sps->bit_depth_luma_minus8;
buffer.AppendInt(reserved_bit_depth_luma_minus8);
uint8_t reserved_bit_depth_chroma_minus8 = 0xf8 | sps->bit_depth_chroma_minus8;
buffer.AppendInt(reserved_bit_depth_chroma_minus8);
if (last_sps_ext_.empty()) {
uint8_t num_sps_ext(0);
buffer.AppendInt(num_sps_ext);
} else {
uint8_t num_sps_ext(1);
buffer.AppendInt(num_sps_ext);
buffer.AppendVector(last_sps_ext_);
}
}
buffer.SwapBuffer(decoder_config);
return true;
@ -70,6 +110,12 @@ bool H264ByteToUnitStreamConverter::ProcessNalu(const Nalu& nalu) {
// Grab SPS NALU.
last_sps_.assign(nalu_ptr, nalu_ptr + nalu_size);
return strip_parameter_set_nalus();
case Nalu::H264_SPSExtension:
if (strip_parameter_set_nalus())
WarnIfNotMatch(nalu.type(), nalu_ptr, nalu_size, last_sps_ext_);
// Grab SPSExtension NALU.
last_sps_ext_.assign(nalu_ptr, nalu_ptr + nalu_size);
return strip_parameter_set_nalus();
case Nalu::H264_PPS:
if (strip_parameter_set_nalus())
WarnIfNotMatch(nalu.type(), nalu_ptr, nalu_size, last_pps_);

View File

@ -42,6 +42,7 @@ class H264ByteToUnitStreamConverter : public H26xByteToUnitStreamConverter {
std::vector<uint8_t> last_sps_;
std::vector<uint8_t> last_pps_;
std::vector<uint8_t> last_sps_ext_;
DISALLOW_COPY_AND_ASSIGN(H264ByteToUnitStreamConverter);
};

View File

@ -79,5 +79,64 @@ TEST(H264ByteToUnitStreamConverter, ConversionFailure) {
EXPECT_FALSE(converter.GetDecoderConfigurationRecord(&decoder_config));
}
TEST(H264ByteToUnitStreamConverter, NaluConversionWithSpsExtension) {
const uint8_t kByteStreamWithSpsExtension[] = {
0x00, 0x00, 0x00, 0x01, // Start code
0x09, // AUD Type
0xF0, // Primary pic type
0x00, 0x00, 0x00, 0x01, // Start code
// Some SPS Data
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F,
0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00,
0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16,
0x2D, 0x96,
0x00, 0x00, 0x00, 0x01, // Start code
// Some PPS Data
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15,
0x00, 0x00, 0x00, 0x01, // Start code
// Some SPS Extension data
0x6D, 0x33, 0x01, 0x57, 0x78,
0x00, 0x00, 0x00, 0x01, // Start code
// The input NALU
0x06, // NALU type
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77,
};
std::vector<uint8_t> byte_stream_with_sps_extension(
std::begin(kByteStreamWithSpsExtension),
std::end(kByteStreamWithSpsExtension));
H264ByteToUnitStreamConverter converter(
H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus);
std::vector<uint8_t> unit_stream;
ASSERT_TRUE(converter.ConvertByteStreamToNalUnitStream(
byte_stream_with_sps_extension.data(),
byte_stream_with_sps_extension.size(), &unit_stream));
const uint8_t kExpectedUnitStream[] = {
0x00, 0x00, 0x00, 0x0A, 0x06, 0xFD, 0x78,
0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77
};
EXPECT_EQ(std::vector<uint8_t>(std::begin(kExpectedUnitStream),
std::end(kExpectedUnitStream)), unit_stream);
std::cout << std::endl << std::endl;
std::vector<uint8_t> decoder_config;
EXPECT_TRUE(converter.GetDecoderConfigurationRecord(&decoder_config));
const uint8_t kExpectedDecoderConfig[] = {
0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D,
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4,
0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, 0x00, 0x0A,
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13,
0x14, 0x15, 0xFD, 0xF8, 0xF8, 0x01, 0x6D, 0x33,
0x01, 0x57, 0x78
};
EXPECT_EQ(std::vector<uint8_t>(std::begin(kExpectedDecoderConfig),
std::end(kExpectedDecoderConfig)), decoder_config);
}
} // namespace media
} // namespace shaka

View File

@ -242,6 +242,9 @@ bool NalUnitToByteStreamConverter::Initialize(
buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode));
AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
found_pps = true;
} else if (nalu.type() == Nalu::H264NaluType::H264_SPSExtension) {
buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode));
AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
}
}
if (!found_sps || !found_pps) {
@ -313,6 +316,8 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples(
break;
case Nalu::H264_SPS:
FALLTHROUGH_INTENDED;
case Nalu::H264_SPSExtension:
FALLTHROUGH_INTENDED;
case Nalu::H264_PPS: {
// Also write this SPS/PPS if it is not the same as SPS/PPS in decoder
// configuration, which is already written.

View File

@ -201,6 +201,74 @@ TEST(NalUnitToByteStreamConverterTest, ConvertUnitToByteStream) {
output);
}
// Expect a valid AVCDecoderConfigurationRecord with SPSExtension to pass.
TEST(NalUnitToByteStreamConverterTest, ConvertUnitToByteStreamWithSPSExtension) {
NalUnitToByteStreamConverter converter;
const uint8_t kDecoderConfigWithSpsExt[] = {
0x01, // configuration version (must be 1)
0x64, // AVCProfileIndication (100: sps special case)
0x00, // profile_compatibility (bogus)
0x00, // AVCLevelIndication (bogus)
0xFF, // Length size minus 1 == 3
0xE1, // 1 sps.
0x00, 0x1D, // SPS length == 29
// Some valid SPS data.
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4,
0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
0x60, 0x0F, 0x16, 0x2D, 0x96,
0x01, // 1 pps.
0x00, 0x0A, // PPS length == 10
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15,
0xFC, // chroma_format == 0
0xF9, // bit_depth_luma_minus8 == 1
0xFF, // bit_depth_chroma_minus8 == 7
0x01, // 1 SPS Extension
0x00, 0x05, // SPSExtension length = 5
0x6D, 0x33, 0x01, 0x57, 0x78
};
// Only the type of the NAL units are checked.
// This does not contain AUD, SPS, nor PPS.
const uint8_t kUnitStreamLikeMediaSample[] = {
0x00, 0x00, 0x00, 0x0A, // Size 10 NALU.
0x06, // NAL unit type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77
};
const uint8_t kByteStreamWithSpsExtension[] = {
0x00, 0x00, 0x00, 0x01, // Start code
0x09, // AUD Type
0xF0, // Primary pic type
0x00, 0x00, 0x00, 0x01, // Start code
// Some SPS Data
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F,
0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00,
0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16,
0x2D, 0x96,
0x00, 0x00, 0x00, 0x01, // Start code
// Some PPS Data
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15,
0x00, 0x00, 0x00, 0x01, // Start code
// Some SPS Extension data
0x6D, 0x33, 0x01, 0x57, 0x78,
0x00, 0x00, 0x00, 0x01, // Start code
// The input NALU
0x06, // NALU type
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77,
};
EXPECT_TRUE(converter.Initialize(kDecoderConfigWithSpsExt,
arraysize(kDecoderConfigWithSpsExt)));
std::vector<uint8_t> output;
EXPECT_TRUE(converter.ConvertUnitToByteStream(
kUnitStreamLikeMediaSample,
arraysize(kUnitStreamLikeMediaSample), kIsKeyFrame, &output));
EXPECT_EQ(std::vector<uint8_t>(std::begin(kByteStreamWithSpsExtension),
std::end(kByteStreamWithSpsExtension)),
output);
}
// Verify that if it is not a key frame then SPS and PPS from decoder
// configuration is not used.
TEST(NalUnitToByteStreamConverterTest, NonKeyFrameSample) {

View File

@ -35,8 +35,7 @@ bool H264VideoSliceHeaderParser::Initialize(
const Nalu& nalu = config.nalu(i);
if (nalu.type() == Nalu::H264_SPS) {
RCHECK(parser_.ParseSps(nalu, &id) == H264Parser::kOk);
} else {
DCHECK_EQ(Nalu::H264_PPS, nalu.type());
} else if (nalu.type() == Nalu::H264_PPS) {
RCHECK(parser_.ParsePps(nalu, &id) == H264Parser::kOk);
}
}