Copy over parameter sets in the sample if different to sample entry

When converting from NAL unit stream to byte stream.

The packager should not assume the parameter sets in the sample is the
same as the parameter sets in sample entry (decoder configuration).

Fixes #327.

Change-Id: I7e84d28a296f4b33db0523cca5eabd62f623e852
This commit is contained in:
KongQun Yang 2018-02-08 22:03:47 -08:00 committed by Aaron Vaage
parent 11cbbd86fb
commit 594d1f0de4
3 changed files with 141 additions and 41 deletions

View File

@ -13,7 +13,6 @@
#include "packager/media/base/buffer_reader.h" #include "packager/media/base/buffer_reader.h"
#include "packager/media/base/buffer_writer.h" #include "packager/media/base/buffer_writer.h"
#include "packager/media/base/macros.h" #include "packager/media/base/macros.h"
#include "packager/media/codecs/avc_decoder_configuration_record.h"
#include "packager/media/codecs/nalu_reader.h" #include "packager/media/codecs/nalu_reader.h"
namespace shaka { namespace shaka {
@ -28,6 +27,16 @@ const uint8_t kEmulationPreventionByte = 0x03;
const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0; const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0;
bool IsNaluEqual(const Nalu& left, const Nalu& right) {
if (left.type() != right.type())
return false;
const size_t left_size = left.header_size() + left.payload_size();
const size_t right_size = right.header_size() + right.payload_size();
if (left_size != right_size)
return false;
return memcmp(left.data(), right.data(), left_size) == 0;
}
void AppendNalu(const Nalu& nalu, void AppendNalu(const Nalu& nalu,
int nalu_length_size, int nalu_length_size,
bool escape_data, bool escape_data,
@ -207,25 +216,24 @@ bool NalUnitToByteStreamConverter::Initialize(
return false; return false;
} }
AVCDecoderConfigurationRecord decoder_config; if (!decoder_config_.Parse(std::vector<uint8_t>(
if (!decoder_config.Parse(std::vector<uint8_t>(
decoder_configuration_data, decoder_configuration_data,
decoder_configuration_data + decoder_configuration_data_size))) { decoder_configuration_data + decoder_configuration_data_size))) {
return false; return false;
} }
if (decoder_config.nalu_count() < 2) { if (decoder_config_.nalu_count() < 2) {
LOG(ERROR) << "Cannot find SPS or PPS."; LOG(ERROR) << "Cannot find SPS or PPS.";
return false; return false;
} }
nalu_length_size_ = decoder_config.nalu_length_size(); nalu_length_size_ = decoder_config_.nalu_length_size();
BufferWriter buffer_writer(decoder_configuration_data_size); BufferWriter buffer_writer(decoder_configuration_data_size);
bool found_sps = false; bool found_sps = false;
bool found_pps = false; bool found_pps = false;
for (uint32_t i = 0; i < decoder_config.nalu_count(); ++i) { for (uint32_t i = 0; i < decoder_config_.nalu_count(); ++i) {
const Nalu& nalu = decoder_config.nalu(i); const Nalu& nalu = decoder_config_.nalu(i);
if (nalu.type() == Nalu::H264NaluType::H264_SPS) { if (nalu.type() == Nalu::H264NaluType::H264_SPS) {
buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode)); buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode));
AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer); AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
@ -245,8 +253,6 @@ bool NalUnitToByteStreamConverter::Initialize(
return true; return true;
} }
// This ignores all AUD, SPS, and PPS in the sample. Instead uses the data
// parsed in Initialize().
bool NalUnitToByteStreamConverter::ConvertUnitToByteStream( bool NalUnitToByteStreamConverter::ConvertUnitToByteStream(
const uint8_t* sample, const uint8_t* sample,
size_t sample_size, size_t sample_size,
@ -258,7 +264,8 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStream(
} }
// This ignores all AUD, SPS, and PPS in the sample. Instead uses the data // This ignores all AUD, SPS, and PPS in the sample. Instead uses the data
// parsed in Initialize(). // parsed in Initialize(). However, if the SPS and PPS are different to
// those parsed in Initialized(), they are kept.
bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples(
const uint8_t* sample, const uint8_t* sample,
size_t sample_size, size_t sample_size,
@ -303,11 +310,30 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples(
} }
switch (nalu.type()) { switch (nalu.type()) {
case Nalu::H264_AUD: case Nalu::H264_AUD:
FALLTHROUGH_INTENDED; break;
case Nalu::H264_SPS: case Nalu::H264_SPS:
FALLTHROUGH_INTENDED; FALLTHROUGH_INTENDED;
case Nalu::H264_PPS: case Nalu::H264_PPS: {
// Also write this SPS/PPS if it is not the same as SPS/PPS in decoder
// configuration, which is already written.
//
// For more information see:
// - github.com/google/shaka-packager/issues/327
// - ISO/IEC 14496-15 5.4.5 Sync Sample
//
// TODO(kqyang): Parse sample data to figure out which SPS/PPS the
// sample actually uses and include that only.
bool new_decoder_config = true;
for (size_t i = 0; i < decoder_config_.nalu_count(); ++i) {
if (IsNaluEqual(decoder_config_.nalu(i), nalu)) {
new_decoder_config = false;
break; break;
}
}
if (!new_decoder_config)
break;
FALLTHROUGH_INTENDED;
}
default: default:
bool escape_data = false; bool escape_data = false;
if (subsamples && !subsamples->empty()) { if (subsamples && !subsamples->empty()) {

View File

@ -12,6 +12,7 @@
#include "packager/base/macros.h" #include "packager/base/macros.h"
#include "packager/media/base/decrypt_config.h" #include "packager/media/base/decrypt_config.h"
#include "packager/media/codecs/avc_decoder_configuration_record.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -83,6 +84,7 @@ class NalUnitToByteStreamConverter {
friend class NalUnitToByteStreamConverterTest; friend class NalUnitToByteStreamConverterTest;
int nalu_length_size_; int nalu_length_size_;
AVCDecoderConfigurationRecord decoder_config_;
std::vector<uint8_t> decoder_configuration_in_byte_stream_; std::vector<uint8_t> decoder_configuration_in_byte_stream_;
DISALLOW_COPY_AND_ASSIGN(NalUnitToByteStreamConverter); DISALLOW_COPY_AND_ASSIGN(NalUnitToByteStreamConverter);

View File

@ -33,7 +33,6 @@ const uint8_t kTestAVCDecoderConfigurationRecord[] = {
0x60, 0x0F, 0x16, 0x2D, 0x96, 0x60, 0x0F, 0x16, 0x2D, 0x96,
0x01, // 1 pps. 0x01, // 1 pps.
0x00, 0x0A, // PPS length == 10 0x00, 0x0A, // PPS length == 10
// The content of PPS is not checked except the type.
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15,
}; };
const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = { const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = {
@ -51,7 +50,6 @@ const uint8_t kTestAVCDecoderConfigurationRecordNaluLengthSize2[] = {
0x60, 0x0F, 0x16, 0x2D, 0x96, 0x60, 0x0F, 0x16, 0x2D, 0x96,
0x01, // 1 pps. 0x01, // 1 pps.
0x00, 0x0A, // PPS length == 10 0x00, 0x0A, // PPS length == 10
// The content of PPS is not checked except the type.
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15,
}; };
@ -566,7 +564,7 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedNaluEndingWithZero) {
EXPECT_THAT(subsamples, ::testing::ElementsAre(SubsampleEntry(13, 3))); EXPECT_THAT(subsamples, ::testing::ElementsAre(SubsampleEntry(13, 3)));
} }
// corresponding subsample needs to be removed. // Not supposed to happen, just in case, make sure it is properly supported.
TEST(NalUnitToByteStreamConverterTest, EncryptedPps) { TEST(NalUnitToByteStreamConverterTest, EncryptedPps) {
// Only the type of the NAL units are checked. // Only the type of the NAL units are checked.
// This does not contain AUD, SPS, nor PPS. // This does not contain AUD, SPS, nor PPS.
@ -575,8 +573,8 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) {
0x06, // NAL unit type. 0x06, // NAL unit type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear
0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU.
0x68, // PPS, will be removed after convertion 0x68, // PPS, will remain as it is different to the one in decoder
// The content of PPS is not checked. // configuration.
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher
0x00, 0x00, 0x00, 0x08, // Size 8 NALU. 0x00, 0x00, 0x00, 0x08, // Size 8 NALU.
0x02, // NAL unit type. 0x02, // NAL unit type.
@ -596,6 +594,7 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) {
kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample),
kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples));
// clang-format off
const uint8_t kExpectedOutput[] = { const uint8_t kExpectedOutput[] = {
0x00, 0x00, 0x00, 0x01, // Start code. 0x00, 0x00, 0x00, 0x01, // Start code.
0x09, // AUD type. 0x09, // AUD type.
@ -613,13 +612,17 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) {
0x06, // NALU type. 0x06, // NALU type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77,
0x00, 0x00, 0x00, 0x01, // Start code. 0x00, 0x00, 0x00, 0x01, // Start code.
// PPS from sample.
0x68, 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // cipher
0x00, 0x00, 0x00, 0x01, // Start code.
// The input NALU 2. // The input NALU 2.
0x02, // NALU type. 0x02, // NALU type.
0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77,
}; };
// clang-format on
const std::vector<SubsampleEntry> kExpectedOutputSubsamples{ const std::vector<SubsampleEntry> kExpectedOutputSubsamples{
SubsampleEntry(72, 7)}; SubsampleEntry(72, 10), SubsampleEntry(5, 7)};
EXPECT_EQ(std::vector<uint8_t>(kExpectedOutput, EXPECT_EQ(std::vector<uint8_t>(kExpectedOutput,
kExpectedOutput + arraysize(kExpectedOutput)), kExpectedOutput + arraysize(kExpectedOutput)),
@ -627,19 +630,20 @@ TEST(NalUnitToByteStreamConverterTest, EncryptedPps) {
EXPECT_THAT(kExpectedOutputSubsamples, subsamples); EXPECT_THAT(kExpectedOutputSubsamples, subsamples);
} }
// A clear PPS NALU follows a clear NALU, the PPS will be removed. So the // A clear PPS NALU follows a clear NALU, the PPS in the sample is the same as
// corresponding subsample's clear bytes may be reduced. // the PPS in decoder configuration, the PPS is dropped and subsample size is
TEST(NalUnitToByteStreamConverterTest, ClearPps) { // adjusted.
TEST(NalUnitToByteStreamConverterTest, ClearPpsSame) {
// Only the type of the NAL units are checked. // Only the type of the NAL units are checked.
// This does not contain AUD, SPS, nor PPS. // This does not contain AUD, SPS, nor PPS.
const uint8_t kUnitStreamLikeMediaSample[] = { const uint8_t kUnitStreamLikeMediaSample[] = {
0x00, 0x00, 0x00, 0x0A, // Size 10 NALU.
0x06, // NAL unit type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // clear
0x00, 0x00, 0x00, 0x0B, // Size 11 NALU. 0x00, 0x00, 0x00, 0x0B, // Size 11 NALU.
0x68, // PPS, will be removed after convertion 0x06, // NAL unit type.
// The content of PPS is not checked. 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU.
0x68, // PPS, same as in decoder configuration, so is
// removed.
0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS.
0x00, 0x00, 0x00, 0x08, // Size 8 NALU. 0x00, 0x00, 0x00, 0x08, // Size 8 NALU.
0x02, // NAL unit type. 0x02, // NAL unit type.
0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher
@ -657,6 +661,7 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) {
kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample),
kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples));
// clang-format off
const uint8_t kExpectedOutput[] = { const uint8_t kExpectedOutput[] = {
0x00, 0x00, 0x00, 0x01, // Start code. 0x00, 0x00, 0x00, 0x01, // Start code.
0x09, // AUD type. 0x09, // AUD type.
@ -672,15 +677,82 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) {
0x00, 0x00, 0x00, 0x01, // Start code. 0x00, 0x00, 0x00, 0x01, // Start code.
// The input NALU 1. // The input NALU 1.
0x06, // NALU type. 0x06, // NALU type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88,
0x00, 0x00, 0x00, 0x01, // Start code. 0x00, 0x00, 0x00, 0x01, // Start code.
// The input NALU 2. // The input NALU 2.
0x02, // NALU type. 0x02, // NALU type.
0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77,
}; };
// clang-format on
const std::vector<SubsampleEntry> kExpectedOutputSubsamples{ const std::vector<SubsampleEntry> kExpectedOutputSubsamples{
SubsampleEntry(72, 7)}; SubsampleEntry(73, 7)};
EXPECT_EQ(std::vector<uint8_t>(kExpectedOutput,
kExpectedOutput + arraysize(kExpectedOutput)),
output);
EXPECT_EQ(kExpectedOutputSubsamples, subsamples);
}
// A clear PPS NALU follows a clear NALU, the PPS in the sample is different to
// the PPS in decoder configuration, so both the PPS in the sample and the PPS
// in decoder configuration are written to output.
TEST(NalUnitToByteStreamConverterTest, ClearPpsDifferent) {
// Only the type of the NAL units are checked.
const uint8_t kUnitStreamLikeMediaSample[] = {
0x00, 0x00, 0x00, 0x0B, // Size 11 NALU.
0x06, // NAL unit type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88, // clear
0x00, 0x00, 0x00, 0x0A, // Size 10 NALU.
0x68, // PPS, different to the PPS in the decoder
// configuration, is also written to output.
0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15, // clear
0x00, 0x00, 0x00, 0x08, // Size 8 NALU.
0x02, // NAL unit type.
0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data, cipher
};
std::vector<SubsampleEntry> subsamples{SubsampleEntry(34, 7)};
NalUnitToByteStreamConverter converter;
EXPECT_TRUE(
converter.Initialize(kTestAVCDecoderConfigurationRecord,
arraysize(kTestAVCDecoderConfigurationRecord)));
std::vector<uint8_t> output;
EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples(
kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample),
kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples));
// clang-format off
const uint8_t kExpectedOutput[] = {
0x00, 0x00, 0x00, 0x01, // Start code.
0x09, // AUD type.
0xF0, // primary pic type is anything.
0x00, 0x00, 0x00, 0x01, // Start code.
// Some valid SPS data.
0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4,
0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
0x60, 0x0F, 0x16, 0x2D, 0x96,
0x00, 0x00, 0x00, 0x01, // Start code.
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS.
0x00, 0x00, 0x00, 0x01, // Start code.
// The input NALU 1.
0x06, // NALU type.
0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x88,
0x00, 0x00, 0x00, 0x01, // Start code.
// PPS should match the PPS above.
0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x12, 0x12, 0x13, 0x14, 0x15,
0x00, 0x00, 0x00, 0x01, // Start code.
// The input NALU 2.
0x02, // NALU type.
0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77,
};
// clang-format on
const std::vector<SubsampleEntry> kExpectedOutputSubsamples{
SubsampleEntry(87, 7)};
EXPECT_EQ(std::vector<uint8_t>(kExpectedOutput, EXPECT_EQ(std::vector<uint8_t>(kExpectedOutput,
kExpectedOutput + arraysize(kExpectedOutput)), kExpectedOutput + arraysize(kExpectedOutput)),