From 594d1f0de4a22ee838e7e5bd09aa2850655166bb Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Thu, 8 Feb 2018 22:03:47 -0800 Subject: [PATCH] Copy over parameter sets in the sample if different to sample entry When converting from NAL unit stream to byte stream. The packager should not assume the parameter sets in the sample is the same as the parameter sets in sample entry (decoder configuration). Fixes #327. Change-Id: I7e84d28a296f4b33db0523cca5eabd62f623e852 --- .../nal_unit_to_byte_stream_converter.cc | 52 +++++-- .../nal_unit_to_byte_stream_converter.h | 2 + ..._unit_to_byte_stream_converter_unittest.cc | 128 ++++++++++++++---- 3 files changed, 141 insertions(+), 41 deletions(-) diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc index 1f36deca3b..fb2936dce2 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc @@ -13,7 +13,6 @@ #include "packager/media/base/buffer_reader.h" #include "packager/media/base/buffer_writer.h" #include "packager/media/base/macros.h" -#include "packager/media/codecs/avc_decoder_configuration_record.h" #include "packager/media/codecs/nalu_reader.h" namespace shaka { @@ -28,6 +27,16 @@ const uint8_t kEmulationPreventionByte = 0x03; const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0; +bool IsNaluEqual(const Nalu& left, const Nalu& right) { + if (left.type() != right.type()) + return false; + const size_t left_size = left.header_size() + left.payload_size(); + const size_t right_size = right.header_size() + right.payload_size(); + if (left_size != right_size) + return false; + return memcmp(left.data(), right.data(), left_size) == 0; +} + void AppendNalu(const Nalu& nalu, int nalu_length_size, bool escape_data, @@ -207,25 +216,24 @@ bool NalUnitToByteStreamConverter::Initialize( return false; } - AVCDecoderConfigurationRecord decoder_config; - if (!decoder_config.Parse(std::vector( + if (!decoder_config_.Parse(std::vector( decoder_configuration_data, decoder_configuration_data + decoder_configuration_data_size))) { return false; } - if (decoder_config.nalu_count() < 2) { + if (decoder_config_.nalu_count() < 2) { LOG(ERROR) << "Cannot find SPS or PPS."; return false; } - nalu_length_size_ = decoder_config.nalu_length_size(); + nalu_length_size_ = decoder_config_.nalu_length_size(); BufferWriter buffer_writer(decoder_configuration_data_size); bool found_sps = false; bool found_pps = false; - for (uint32_t i = 0; i < decoder_config.nalu_count(); ++i) { - const Nalu& nalu = decoder_config.nalu(i); + for (uint32_t i = 0; i < decoder_config_.nalu_count(); ++i) { + const Nalu& nalu = decoder_config_.nalu(i); if (nalu.type() == Nalu::H264NaluType::H264_SPS) { buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode)); AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer); @@ -245,8 +253,6 @@ bool NalUnitToByteStreamConverter::Initialize( return true; } -// This ignores all AUD, SPS, and PPS in the sample. Instead uses the data -// parsed in Initialize(). bool NalUnitToByteStreamConverter::ConvertUnitToByteStream( const uint8_t* sample, size_t sample_size, @@ -258,7 +264,8 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStream( } // This ignores all AUD, SPS, and PPS in the sample. Instead uses the data -// parsed in Initialize(). +// parsed in Initialize(). However, if the SPS and PPS are different to +// those parsed in Initialized(), they are kept. bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( const uint8_t* sample, size_t sample_size, @@ -303,11 +310,30 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( } switch (nalu.type()) { case Nalu::H264_AUD: - FALLTHROUGH_INTENDED; + break; case Nalu::H264_SPS: FALLTHROUGH_INTENDED; - case Nalu::H264_PPS: - break; + case Nalu::H264_PPS: { + // Also write this SPS/PPS if it is not the same as SPS/PPS in decoder + // configuration, which is already written. + // + // For more information see: + // - github.com/google/shaka-packager/issues/327 + // - ISO/IEC 14496-15 5.4.5 Sync Sample + // + // TODO(kqyang): Parse sample data to figure out which SPS/PPS the + // sample actually uses and include that only. + bool new_decoder_config = true; + for (size_t i = 0; i < decoder_config_.nalu_count(); ++i) { + if (IsNaluEqual(decoder_config_.nalu(i), nalu)) { + new_decoder_config = false; + break; + } + } + if (!new_decoder_config) + break; + FALLTHROUGH_INTENDED; + } default: bool escape_data = false; if (subsamples && !subsamples->empty()) { diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter.h b/packager/media/codecs/nal_unit_to_byte_stream_converter.h index 9dc5a116c9..b4f746b6a5 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter.h +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter.h @@ -12,6 +12,7 @@ #include "packager/base/macros.h" #include "packager/media/base/decrypt_config.h" +#include "packager/media/codecs/avc_decoder_configuration_record.h" namespace shaka { namespace media { @@ -83,6 +84,7 @@ class NalUnitToByteStreamConverter { friend class NalUnitToByteStreamConverterTest; int nalu_length_size_; + AVCDecoderConfigurationRecord decoder_config_; std::vector decoder_configuration_in_byte_stream_; DISALLOW_COPY_AND_ASSIGN(NalUnitToByteStreamConverter); diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc index 576032ea2d..abed910f5c 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc @@ -33,7 +33,6 @@ const uint8_t kTestAVCDecoderConfigurationRecord[] = { 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, // 1 pps. 0x00, 0x0A, // PPS length == 10 - // The content of PPS is not checked except the type. 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, }; const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = { @@ -51,7 +50,6 @@ const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = { 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, // 1 pps. 0x00, 0x0A, // PPS length == 10 - // The content of PPS is not checked except the type. 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, }; @@ -566,21 +564,21 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedNaluEndingWithZero) { EXPECT_THAT(subsamples, ::testing::ElementsAre(SubsampleEntry(13, 3))); } -// corresponding subsample needs to be removed. +// Not supposed to happen, just in case, make sure it is properly supported. TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { // Only the type of the NAL units are checked. // This does not contain AUD, SPS, nor PPS. const uint8_t kUnitStreamLikeMediaSample[] = { - 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. - 0x06, // NAL unit type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear - 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. - 0x68, // PPS, will be removed after convertion - // The content of PPS is not checked. - 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher - 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. - 0x02, // NAL unit type. - 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear + 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. + 0x68, // PPS, will remain as it is different to the one in decoder + // configuration. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher }; std::vector subsamples{SubsampleEntry(19, 10), @@ -596,6 +594,7 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + // clang-format off const uint8_t kExpectedOutput[] = { 0x00, 0x00, 0x00, 0x01, // Start code. 0x09, // AUD type. @@ -613,13 +612,17 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { 0x06, // NALU type. 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x00, 0x00, 0x00, 0x01, // Start code. + // PPS from sample. + 0x68, 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher + 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 2. 0x02, // NALU type. 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, }; + // clang-format on const std::vector kExpectedOutputSubsamples{ - SubsampleEntry(72, 7)}; + SubsampleEntry(72, 10), SubsampleEntry(5, 7)}; EXPECT_EQ(std::vector(kExpectedOutput, kExpectedOutput + arraysize(kExpectedOutput)), @@ -627,22 +630,23 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { EXPECT_THAT(kExpectedOutputSubsamples, subsamples); } -// A clear PPS NALU follows a clear NALU, the PPS will be removed. So the -// corresponding subsample's clear bytes may be reduced. -TEST(NalUnitToByteStreamConverterTest, ClearPps) { +// A clear PPS NALU follows a clear NALU, the PPS in the sample is the same as +// the PPS in decoder configuration, the PPS is dropped and subsample size is +// adjusted. +TEST(NalUnitToByteStreamConverterTest, ClearPpsSame) { // Only the type of the NAL units are checked. // This does not contain AUD, SPS, nor PPS. const uint8_t kUnitStreamLikeMediaSample[] = { - 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. - 0x06, // NAL unit type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. - 0x68, // PPS, will be removed after convertion - // The content of PPS is not checked. - 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear - 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. - 0x02, // NAL unit type. - 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x68, // PPS, same as in decoder configuration, so is + // removed. + 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher }; std::vector subsamples{SubsampleEntry(34, 7)}; @@ -657,6 +661,7 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) { kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + // clang-format off const uint8_t kExpectedOutput[] = { 0x00, 0x00, 0x00, 0x01, // Start code. 0x09, // AUD type. @@ -672,15 +677,82 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) { 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 1. 0x06, // NALU type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 2. 0x02, // NALU type. 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, }; + // clang-format on const std::vector kExpectedOutputSubsamples{ - SubsampleEntry(72, 7)}; + SubsampleEntry(73, 7)}; + + EXPECT_EQ(std::vector(kExpectedOutput, + kExpectedOutput + arraysize(kExpectedOutput)), + output); + EXPECT_EQ(kExpectedOutputSubsamples, subsamples); +} + +// A clear PPS NALU follows a clear NALU, the PPS in the sample is different to +// the PPS in decoder configuration, so both the PPS in the sample and the PPS +// in decoder configuration are written to output. +TEST(NalUnitToByteStreamConverterTest, ClearPpsDifferent) { + // Only the type of the NAL units are checked. + const uint8_t kUnitStreamLikeMediaSample[] = { + 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x68, // PPS, different to the PPS in the decoder + // configuration, is also written to output. + 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + }; + + std::vector subsamples{SubsampleEntry(34, 7)}; + + NalUnitToByteStreamConverter converter; + EXPECT_TRUE( + converter.Initialize(kTestAVCDecoderConfigurationRecord, + arraysize(kTestAVCDecoderConfigurationRecord))); + + std::vector output; + EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples( + kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), + kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + + // clang-format off + const uint8_t kExpectedOutput[] = { + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x09, // AUD type. + 0xF0, // primary pic type is anything. + 0x00, 0x00, 0x00, 0x01, // Start code. + // Some valid SPS data. + 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, + 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, + 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, + 0x60, 0x0F, 0x16, 0x2D, 0x96, + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 1. + 0x06, // NALU type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, + 0x00, 0x00, 0x00, 0x01, // Start code. + // PPS should match the PPS above. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 2. + 0x02, // NALU type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, + }; + // clang-format on + + const std::vector kExpectedOutputSubsamples{ + SubsampleEntry(87, 7)}; EXPECT_EQ(std::vector(kExpectedOutput, kExpectedOutput + arraysize(kExpectedOutput)),