diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc index 1f36deca3b..fb2936dce2 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc @@ -13,7 +13,6 @@ #include "packager/media/base/buffer_reader.h" #include "packager/media/base/buffer_writer.h" #include "packager/media/base/macros.h" -#include "packager/media/codecs/avc_decoder_configuration_record.h" #include "packager/media/codecs/nalu_reader.h" namespace shaka { @@ -28,6 +27,16 @@ const uint8_t kEmulationPreventionByte = 0x03; const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0; +bool IsNaluEqual(const Nalu& left, const Nalu& right) { + if (left.type() != right.type()) + return false; + const size_t left_size = left.header_size() + left.payload_size(); + const size_t right_size = right.header_size() + right.payload_size(); + if (left_size != right_size) + return false; + return memcmp(left.data(), right.data(), left_size) == 0; +} + void AppendNalu(const Nalu& nalu, int nalu_length_size, bool escape_data, @@ -207,25 +216,24 @@ bool NalUnitToByteStreamConverter::Initialize( return false; } - AVCDecoderConfigurationRecord decoder_config; - if (!decoder_config.Parse(std::vector( + if (!decoder_config_.Parse(std::vector( decoder_configuration_data, decoder_configuration_data + decoder_configuration_data_size))) { return false; } - if (decoder_config.nalu_count() < 2) { + if (decoder_config_.nalu_count() < 2) { LOG(ERROR) << "Cannot find SPS or PPS."; return false; } - nalu_length_size_ = decoder_config.nalu_length_size(); + nalu_length_size_ = decoder_config_.nalu_length_size(); BufferWriter buffer_writer(decoder_configuration_data_size); bool found_sps = false; bool found_pps = false; - for (uint32_t i = 0; i < decoder_config.nalu_count(); ++i) { - const Nalu& nalu = decoder_config.nalu(i); + for (uint32_t i = 0; i < decoder_config_.nalu_count(); ++i) { + const Nalu& nalu = decoder_config_.nalu(i); if (nalu.type() == Nalu::H264NaluType::H264_SPS) { buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode)); AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer); @@ -245,8 +253,6 @@ bool NalUnitToByteStreamConverter::Initialize( return true; } -// This ignores all AUD, SPS, and PPS in the sample. Instead uses the data -// parsed in Initialize(). bool NalUnitToByteStreamConverter::ConvertUnitToByteStream( const uint8_t* sample, size_t sample_size, @@ -258,7 +264,8 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStream( } // This ignores all AUD, SPS, and PPS in the sample. Instead uses the data -// parsed in Initialize(). +// parsed in Initialize(). However, if the SPS and PPS are different to +// those parsed in Initialized(), they are kept. bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( const uint8_t* sample, size_t sample_size, @@ -303,11 +310,30 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( } switch (nalu.type()) { case Nalu::H264_AUD: - FALLTHROUGH_INTENDED; + break; case Nalu::H264_SPS: FALLTHROUGH_INTENDED; - case Nalu::H264_PPS: - break; + case Nalu::H264_PPS: { + // Also write this SPS/PPS if it is not the same as SPS/PPS in decoder + // configuration, which is already written. + // + // For more information see: + // - github.com/google/shaka-packager/issues/327 + // - ISO/IEC 14496-15 5.4.5 Sync Sample + // + // TODO(kqyang): Parse sample data to figure out which SPS/PPS the + // sample actually uses and include that only. + bool new_decoder_config = true; + for (size_t i = 0; i < decoder_config_.nalu_count(); ++i) { + if (IsNaluEqual(decoder_config_.nalu(i), nalu)) { + new_decoder_config = false; + break; + } + } + if (!new_decoder_config) + break; + FALLTHROUGH_INTENDED; + } default: bool escape_data = false; if (subsamples && !subsamples->empty()) { diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter.h b/packager/media/codecs/nal_unit_to_byte_stream_converter.h index 9dc5a116c9..b4f746b6a5 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter.h +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter.h @@ -12,6 +12,7 @@ #include "packager/base/macros.h" #include "packager/media/base/decrypt_config.h" +#include "packager/media/codecs/avc_decoder_configuration_record.h" namespace shaka { namespace media { @@ -83,6 +84,7 @@ class NalUnitToByteStreamConverter { friend class NalUnitToByteStreamConverterTest; int nalu_length_size_; + AVCDecoderConfigurationRecord decoder_config_; std::vector decoder_configuration_in_byte_stream_; DISALLOW_COPY_AND_ASSIGN(NalUnitToByteStreamConverter); diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc index 576032ea2d..abed910f5c 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc @@ -33,7 +33,6 @@ const uint8_t kTestAVCDecoderConfigurationRecord[] = { 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, // 1 pps. 0x00, 0x0A, // PPS length == 10 - // The content of PPS is not checked except the type. 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, }; const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = { @@ -51,7 +50,6 @@ const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = { 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, // 1 pps. 0x00, 0x0A, // PPS length == 10 - // The content of PPS is not checked except the type. 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, }; @@ -566,21 +564,21 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedNaluEndingWithZero) { EXPECT_THAT(subsamples, ::testing::ElementsAre(SubsampleEntry(13, 3))); } -// corresponding subsample needs to be removed. +// Not supposed to happen, just in case, make sure it is properly supported. TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { // Only the type of the NAL units are checked. // This does not contain AUD, SPS, nor PPS. const uint8_t kUnitStreamLikeMediaSample[] = { - 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. - 0x06, // NAL unit type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear - 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. - 0x68, // PPS, will be removed after convertion - // The content of PPS is not checked. - 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher - 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. - 0x02, // NAL unit type. - 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear + 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. + 0x68, // PPS, will remain as it is different to the one in decoder + // configuration. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher }; std::vector subsamples{SubsampleEntry(19, 10), @@ -596,6 +594,7 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + // clang-format off const uint8_t kExpectedOutput[] = { 0x00, 0x00, 0x00, 0x01, // Start code. 0x09, // AUD type. @@ -613,13 +612,17 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { 0x06, // NALU type. 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x00, 0x00, 0x00, 0x01, // Start code. + // PPS from sample. + 0x68, 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher + 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 2. 0x02, // NALU type. 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, }; + // clang-format on const std::vector kExpectedOutputSubsamples{ - SubsampleEntry(72, 7)}; + SubsampleEntry(72, 10), SubsampleEntry(5, 7)}; EXPECT_EQ(std::vector(kExpectedOutput, kExpectedOutput + arraysize(kExpectedOutput)), @@ -627,22 +630,23 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { EXPECT_THAT(kExpectedOutputSubsamples, subsamples); } -// A clear PPS NALU follows a clear NALU, the PPS will be removed. So the -// corresponding subsample's clear bytes may be reduced. -TEST(NalUnitToByteStreamConverterTest, ClearPps) { +// A clear PPS NALU follows a clear NALU, the PPS in the sample is the same as +// the PPS in decoder configuration, the PPS is dropped and subsample size is +// adjusted. +TEST(NalUnitToByteStreamConverterTest, ClearPpsSame) { // Only the type of the NAL units are checked. // This does not contain AUD, SPS, nor PPS. const uint8_t kUnitStreamLikeMediaSample[] = { - 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. - 0x06, // NAL unit type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. - 0x68, // PPS, will be removed after convertion - // The content of PPS is not checked. - 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear - 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. - 0x02, // NAL unit type. - 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x68, // PPS, same as in decoder configuration, so is + // removed. + 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher }; std::vector subsamples{SubsampleEntry(34, 7)}; @@ -657,6 +661,7 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) { kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + // clang-format off const uint8_t kExpectedOutput[] = { 0x00, 0x00, 0x00, 0x01, // Start code. 0x09, // AUD type. @@ -672,15 +677,82 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) { 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 1. 0x06, // NALU type. - 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, 0x00, 0x00, 0x00, 0x01, // Start code. // The input NALU 2. 0x02, // NALU type. 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, }; + // clang-format on const std::vector kExpectedOutputSubsamples{ - SubsampleEntry(72, 7)}; + SubsampleEntry(73, 7)}; + + EXPECT_EQ(std::vector(kExpectedOutput, + kExpectedOutput + arraysize(kExpectedOutput)), + output); + EXPECT_EQ(kExpectedOutputSubsamples, subsamples); +} + +// A clear PPS NALU follows a clear NALU, the PPS in the sample is different to +// the PPS in decoder configuration, so both the PPS in the sample and the PPS +// in decoder configuration are written to output. +TEST(NalUnitToByteStreamConverterTest, ClearPpsDifferent) { + // Only the type of the NAL units are checked. + const uint8_t kUnitStreamLikeMediaSample[] = { + 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x68, // PPS, different to the PPS in the decoder + // configuration, is also written to output. + 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher + }; + + std::vector subsamples{SubsampleEntry(34, 7)}; + + NalUnitToByteStreamConverter converter; + EXPECT_TRUE( + converter.Initialize(kTestAVCDecoderConfigurationRecord, + arraysize(kTestAVCDecoderConfigurationRecord))); + + std::vector output; + EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples( + kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), + kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + + // clang-format off + const uint8_t kExpectedOutput[] = { + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x09, // AUD type. + 0xF0, // primary pic type is anything. + 0x00, 0x00, 0x00, 0x01, // Start code. + // Some valid SPS data. + 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, + 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, + 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, + 0x60, 0x0F, 0x16, 0x2D, 0x96, + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 1. + 0x06, // NALU type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, + 0x00, 0x00, 0x00, 0x01, // Start code. + // PPS should match the PPS above. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 2. + 0x02, // NALU type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, + }; + // clang-format on + + const std::vector kExpectedOutputSubsamples{ + SubsampleEntry(87, 7)}; EXPECT_EQ(std::vector(kExpectedOutput, kExpectedOutput + arraysize(kExpectedOutput)),