diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc index 42903183d6..1f36deca3b 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter.cc @@ -47,27 +47,6 @@ void AddAccessUnitDelimiter(BufferWriter* buffer_writer) { buffer_writer->AppendInt(kAccessUnitDelimiterRbspAnyPrimaryPicType); } -bool CheckIsClearNalu(const std::vector* subsamples, - size_t subsample_id, - size_t nalu_size, - bool* is_nalu_all_clear) { - if (subsample_id >= subsamples->size()) { - LOG(ERROR) << "Subsample index exceeds subsamples' size."; - return false; - } - const SubsampleEntry& subsample = subsamples->at(subsample_id); - if (nalu_size == subsample.clear_bytes + subsample.cipher_bytes) { - *is_nalu_all_clear = false; - } else if (nalu_size < subsample.clear_bytes) { - *is_nalu_all_clear = true; - } else { - LOG(ERROR) << "Unexpected subsample entry " << subsample.clear_bytes << ":" - << subsample.cipher_bytes << " nalu size: " << nalu_size; - return false; - } - return true; -} - } // namespace void EscapeNalByteSequence(const uint8_t* input, @@ -107,6 +86,115 @@ void EscapeNalByteSequence(const uint8_t* input, } } +// This functions creates a new subsample entry (|clear_bytes|, |cipher_bytes|) +// and appends it to |subsamples|. It splits the oversized (64KB) clear_bytes +// into smaller ones. +void AppendSubsamples(uint32_t clear_bytes, + uint32_t cipher_bytes, + std::vector* subsamples) { + while (clear_bytes > UINT16_MAX) { + subsamples->emplace_back(UINT16_MAX, 0); + clear_bytes -= UINT16_MAX; + } + subsamples->emplace_back(clear_bytes, cipher_bytes); +} + +// TODO(hmchen): Wrap methods of processing subsamples into a separate class, +// e.g., SubsampleReader. +// This function finds the range of the subsamples corresponding a NAL unit +// size. If a subsample crosses the boundary of two NAL units, it is split into +// smaller subsamples. Each call processes one NAL unit and it assumes the input +// NAL unit is already aligned with subsamples->at(start_subsample_id). +// +// An example of calling multiple times on each NAL unit is as follow: +// +// Input: +// +// Nalu 0 Nalu 1 Nalu 2 +// | | | +// v v v +// | clear | cipher | clear | clear | clear | cipher | +// +// | Subsample 0 | Subsample 1 | +// +// Output: +// +// | Subsample 0 | Subsample 1 | Subsample 2 | Subsample 3 | +// +// Nalu 0: start_subsample_id = 0, next_subsample_id = 2 +// Nalu 1: start_subsample_id = 2, next_subsample_id = 3 +// Nalu 2: start_subsample_id = 3, next_subsample_id = 4 +bool AlignSubsamplesWithNalu(size_t nalu_size, + size_t start_subsample_id, + std::vector* subsamples, + size_t* next_subsample_id) { + DCHECK(subsamples && !subsamples->empty()); + size_t subsample_id = start_subsample_id; + size_t nalu_size_remain = nalu_size; + size_t subsample_bytes = 0; + while (subsample_id < subsamples->size()) { + subsample_bytes = subsamples->at(subsample_id).clear_bytes + + subsamples->at(subsample_id).cipher_bytes; + if (nalu_size_remain <= subsample_bytes) { + break; + } + nalu_size_remain -= subsample_bytes; + subsample_id++; + } + + if (subsample_id == subsamples->size()) { + DCHECK_GT(nalu_size_remain, 0u); + LOG(ERROR) + << "Total size of NAL unit is larger than the size of subsamples."; + return false; + } + + if (nalu_size_remain == subsample_bytes) { + *next_subsample_id = subsample_id + 1; + return true; + } + + DCHECK_GT(subsample_bytes, nalu_size_remain); + size_t clear_bytes = subsamples->at(subsample_id).clear_bytes; + size_t new_clear_bytes = 0; + size_t new_cipher_bytes = 0; + if (nalu_size_remain < clear_bytes) { + new_clear_bytes = nalu_size_remain; + } else { + new_clear_bytes = clear_bytes; + new_cipher_bytes = nalu_size_remain - clear_bytes; + } + subsamples->insert(subsamples->begin() + subsample_id, + SubsampleEntry(static_cast(new_clear_bytes), + static_cast(new_cipher_bytes))); + subsample_id++; + subsamples->at(subsample_id).clear_bytes -= + static_cast(new_clear_bytes); + subsamples->at(subsample_id).cipher_bytes -= + static_cast(new_cipher_bytes); + *next_subsample_id = subsample_id; + return true; +} + +// This function tries to merge clear-only into clear+cipher subsamples. This +// merge makes sure the clear_bytes will not exceed the clear size limits +// (2^16 bytes). +std::vector MergeSubsamples( + const std::vector& subsamples) { + std::vector new_subsamples; + uint32_t clear_bytes = 0; + for (size_t i = 0; i < subsamples.size(); ++i) { + clear_bytes += subsamples[i].clear_bytes; + // Add new subsample(s). + if (subsamples[i].cipher_bytes > 0 || i == subsamples.size() - 1) { + AppendSubsamples(clear_bytes, subsamples[i].cipher_bytes, + &new_subsamples); + clear_bytes = 0; + } + } + return new_subsamples; +} + NalUnitToByteStreamConverter::NalUnitToByteStreamConverter() : nalu_length_size_(0) {} NalUnitToByteStreamConverter::~NalUnitToByteStreamConverter() {} @@ -183,80 +271,88 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( return true; } + std::vector temp_subsamples; + BufferWriter buffer_writer(sample_size); buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode)); AddAccessUnitDelimiter(&buffer_writer); if (is_key_frame) buffer_writer.AppendVector(decoder_configuration_in_byte_stream_); - int adjustment = static_cast(buffer_writer.Size()); - const int start_code_size_adjustment = - arraysize(kNaluStartCode) - nalu_length_size_; - size_t subsample_id = 0; + if (subsamples && !subsamples->empty()) { + // The inserted part in buffer_writer is all clear. Add a corresponding + // all-clear subsample. + AppendSubsamples(static_cast(buffer_writer.Size()), 0u, + &temp_subsamples); + } NaluReader nalu_reader(Nalu::kH264, nalu_length_size_, sample, sample_size); Nalu nalu; NaluReader::Result result = nalu_reader.Advance(&nalu); + size_t start_subsample_id = 0; + size_t next_subsample_id = 0; while (result == NaluReader::kOk) { + const size_t old_nalu_size = + nalu_length_size_ + nalu.header_size() + nalu.payload_size(); + if (subsamples && !subsamples->empty()) { + if (!AlignSubsamplesWithNalu(old_nalu_size, start_subsample_id, + subsamples, &next_subsample_id)) { + return false; + } + } switch (nalu.type()) { case Nalu::H264_AUD: FALLTHROUGH_INTENDED; case Nalu::H264_SPS: FALLTHROUGH_INTENDED; case Nalu::H264_PPS: - if (subsamples && !subsamples->empty()) { - const size_t old_nalu_size = - nalu_length_size_ + nalu.header_size() + nalu.payload_size(); - bool is_nalu_all_clear; - if (!CheckIsClearNalu(subsamples, subsample_id, old_nalu_size, - &is_nalu_all_clear)) { - return false; - } - if (is_nalu_all_clear) { - // If AUD/SPS/PPS is all clear, reduce the clear bytes. - DCHECK_LT(old_nalu_size, subsamples->at(subsample_id).clear_bytes); - subsamples->at(subsample_id).clear_bytes -= - static_cast(old_nalu_size); - } else { - // If AUD/SPS/PPS has cipher, drop the corresponding subsample. - subsamples->erase(subsamples->begin() + subsample_id); - } - } break; default: bool escape_data = false; if (subsamples && !subsamples->empty()) { - const size_t old_nalu_size = - nalu_length_size_ + nalu.header_size() + nalu.payload_size(); - bool is_nalu_all_clear; - if (!CheckIsClearNalu(subsamples, subsample_id, old_nalu_size, - &is_nalu_all_clear)) { - return false; - } - if (is_nalu_all_clear) { - // Add this nalu to the adjustment and remove it from clear_bytes. - DCHECK_LT(old_nalu_size, subsamples->at(subsample_id).clear_bytes); - subsamples->at(subsample_id).clear_bytes -= - static_cast(old_nalu_size); - adjustment += - static_cast(old_nalu_size) + start_code_size_adjustment; - } else { - if (escape_encrypted_nalu) - escape_data = subsamples->at(subsample_id).cipher_bytes != 0; - // Apply the adjustment on the current subsample, reset the - // adjustment and move to the next subsample. - subsamples->at(subsample_id).clear_bytes += - adjustment + start_code_size_adjustment; - subsample_id++; - adjustment = 0; + if (escape_encrypted_nalu) { + for (size_t i = start_subsample_id; i < next_subsample_id; ++i) { + if (subsamples->at(i).cipher_bytes != 0) { + escape_data = true; + break; + } + } } } buffer_writer.AppendArray(kNaluStartCode, arraysize(kNaluStartCode)); AppendNalu(nalu, nalu_length_size_, escape_data, &buffer_writer); + + if (subsamples && !subsamples->empty()) { + temp_subsamples.emplace_back( + static_cast(arraysize(kNaluStartCode)), 0u); + // Update the first subsample of each NAL unit, which replaces NAL + // unit length field with start code. Note that if the escape_data is + // true, the total data size and the cipher_bytes may be changed. + // However, since the escape_data for encrypted nalu is only used in + // Sample-AES, which means the subsample is not really used, + // inaccurate subsamples should not be a big deal. + if (subsamples->at(start_subsample_id).clear_bytes < + nalu_length_size_) { + LOG(ERROR) << "Clear bytes (" + << subsamples->at(start_subsample_id).clear_bytes + << ") in start subsample of NAL unit is less than NAL " + "unit length size (" + << nalu_length_size_ + << "). The NAL unit length size is (partially) " + "encrypted. In that case, it cannot be " + "converted to byte stream."; + return false; + } + subsamples->at(start_subsample_id).clear_bytes -= nalu_length_size_; + temp_subsamples.insert(temp_subsamples.end(), + subsamples->begin() + start_subsample_id, + subsamples->begin() + next_subsample_id); + } break; } + start_subsample_id = next_subsample_id; result = nalu_reader.Advance(&nalu); } @@ -267,6 +363,17 @@ bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples( } buffer_writer.SwapBuffer(output); + if (subsamples && !subsamples->empty()) { + if (next_subsample_id < subsamples->size()) { + LOG(ERROR) + << "The total size of NAL unit is shorter than the subsample size."; + return false; + } + // This function may modify the new_subsamples. But since it creates a + // merged verion and assign to the output subsamples, the input one is no + // longer used. + *subsamples = MergeSubsamples(temp_subsamples); + } return true; } diff --git a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc index 078ea0520e..576032ea2d 100644 --- a/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc +++ b/packager/media/codecs/nal_unit_to_byte_stream_converter_unittest.cc @@ -271,7 +271,7 @@ TEST(NalUnitToByteStreamConverterTest, DispersedZeros) { output); } -// Verify that CnovertUnitToByteStream() with escape_data = false works. +// Verify that ConvertUnitToByteStream() with escape_data = false works. TEST(NalUnitToByteStreamConverterTest, DoNotEscape) { // This has sequences that should be escaped if escape_data = true. const uint8_t kNotEscaped[] = { @@ -490,8 +490,8 @@ TEST(NalUnitToByteStreamConverterTest, EscapeEncryptedNalu) { 0xFD, 0x01, 0x02, 0x00, 0x00, 0x01, 0x02, 0x03, }; - std::vector subsamples{ - SubsampleEntry(19, 7), SubsampleEntry(9, 4), SubsampleEntry(7, 3)}; + std::vector subsamples{SubsampleEntry(19, 7), + SubsampleEntry(9, 4)}; NalUnitToByteStreamConverter converter; EXPECT_TRUE( @@ -525,8 +525,7 @@ TEST(NalUnitToByteStreamConverterTest, EscapeEncryptedNalu) { output); // The result subsample does not include emulation prevention bytes. EXPECT_THAT(subsamples, ::testing::ElementsAre(SubsampleEntry(25, 7), - SubsampleEntry(9, 4), - SubsampleEntry(7, 3))); + SubsampleEntry(9, 4))); } TEST(NalUnitToByteStreamConverterTest, EncryptedNaluEndingWithZero) { @@ -689,5 +688,230 @@ TEST(NalUnitToByteStreamConverterTest, ClearPps) { EXPECT_EQ(kExpectedOutputSubsamples, subsamples); } +// One NAL unit has more than one subsamples. All subsample except the last +// be all-clear subsamples. This case is possible when the clear part is +// larger than 16-bit (64Kb), so that the clear part is split into two +// subsamples. +TEST(NalUnitToByteStreamConverterTest, + MultipleSubsamplesInSingleNaluOnlyLastEncrypted) { + // Only the type of the NAL units are checked. + // This does not contain AUD, SPS, nor PPS. + const uint8_t kUnitStreamLikeMediaSample[] = { + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0x00, 0x00, 0x00, 0x08, // Size 8 NALU. + 0x02, // NAL unit type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, // Slice data + }; + + std::vector subsamples{ + SubsampleEntry(6, 0), SubsampleEntry(8, 0), SubsampleEntry(5, 7)}; + + NalUnitToByteStreamConverter converter; + EXPECT_TRUE( + converter.Initialize(kTestAVCDecoderConfigurationRecord, + arraysize(kTestAVCDecoderConfigurationRecord))); + + std::vector output; + EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples( + kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), + kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + + const uint8_t kExpectedOutput[] = { + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x09, // AUD type. + 0xF0, // primary pic type is anything. + 0x00, 0x00, 0x00, 0x01, // Start code. + // Some valid SPS data. + 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, + 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, + 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x00, 0x00, 0x00, 0x01, // Start code. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 1. + 0x06, // NALU type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, 0x00, 0x00, 0x00, + 0x01, // Start code. + // The input NALU 2. + 0x02, // NALU type. + 0xFD, 0x78, 0xA4, 0x82, 0x62, 0x29, 0x77, + }; + + const std::vector kExpectedOutputSubsamples{ + SubsampleEntry(72, 7)}; + + EXPECT_EQ(std::vector(kExpectedOutput, + kExpectedOutput + arraysize(kExpectedOutput)), + output); + EXPECT_EQ(kExpectedOutputSubsamples, subsamples); +} + +// One NAL unit has more than one subsamples. All subsamples have cipher +// texts. +TEST(NalUnitToByteStreamConverterTest, + MultipleSubsamplesInSingleNaluAllEncrypted) { + // Only the type of the NAL units are checked. + // This does not contain AUD, SPS, nor PPS. + const uint8_t kUnitStreamLikeMediaSample[] = { + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0x00, 0x00, 0x00, 0x10, // Size 16 NALU. + 0x02, // NAL unit type. + // Slices data. + 0xFD, 0x78, 0xA4, 0x82, 0x62, // Encrypted. + 0x29, 0x77, 0x27, 0xFD, 0x78, 0xA4, // Clear. + 0xC3, 0x82, 0x62, 0x11, // Encrypted. + }; + + // The 2nd (partially) and 3rd subsamples belong to the 2nd input NALU. + std::vector subsamples{ SubsampleEntry(6, 0), + SubsampleEntry(13, 5), + SubsampleEntry(6, 4)}; + + NalUnitToByteStreamConverter converter; + EXPECT_TRUE( + converter.Initialize(kTestAVCDecoderConfigurationRecord, + arraysize(kTestAVCDecoderConfigurationRecord))); + + std::vector output; + EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples( + kUnitStreamLikeMediaSample, arraysize(kUnitStreamLikeMediaSample), + kIsKeyFrame, !kEscapeEncryptedNalu, &output, &subsamples)); + + const uint8_t kExpectedOutput[] = { + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x09, // AUD type. + 0xF0, // primary pic type is anything. + 0x00, 0x00, 0x00, 0x01, // Start code. + // Some valid SPS data. + 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, + 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, + 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x00, 0x00, 0x00, 0x01, // Start code. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 1. + 0x06, // NALU type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 2. + 0x02, // NALU type. + // Slices data. + 0xFD, 0x78, 0xA4, 0x82, 0x62, // Encrypted. + 0x29, 0x77, 0x27, 0xFD, 0x78, 0xA4, // Clear. + 0xC3, 0x82, 0x62, 0x11, // Encrypted. + }; + + const std::vector kExpectedOutputSubsamples{ + SubsampleEntry(72, 5), SubsampleEntry(6, 4)}; + + EXPECT_EQ(std::vector(kExpectedOutput, + kExpectedOutput + arraysize(kExpectedOutput)), + output); + EXPECT_EQ(kExpectedOutputSubsamples, subsamples); +} + +// One NAL unit is larger than 2^16 bytes and the corresponding subsamples is +// split into small subsamples. All subsamples have cipher texts. +TEST(NalUnitToByteStreamConverterTest, + LargeNaluWithMultipleSubsamples) { + // Only the type of the NAL units are checked. + // This does not contain AUD, SPS, nor PPS. + const uint8_t kUnitStreamLikeMediaSamplePart1[] = { + 0x00, 0x00, 0x00, 0x0A, // Size 10 NALU. + 0x06, // NAL unit type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, // Encrypted. + 0x00, 0x01, 0x00, 0x0f, // Size 65551 NALU. + 0x02, // NAL unit type. + }; + + const std::vector kUnitStreamLikeMediaSamplePart2(65535, 0x01); + + const uint8_t kUnitStreamLikeMediaSamplePart3[] = { + // Slices data. + 0xFD, 0x78, 0xA4, 0x82, 0x62, // Encrypted. + 0x29, 0x77, 0x27, 0xFD, 0x78, 0xA4, // Clear. + 0xC3, 0x82, 0x62, 0x11, // Encrypted. + }; + + std::vector unit_stream_like_media_sample( + std::begin(kUnitStreamLikeMediaSamplePart1), + std::end(kUnitStreamLikeMediaSamplePart1)); + + unit_stream_like_media_sample.insert( + unit_stream_like_media_sample.end(), + std::begin(kUnitStreamLikeMediaSamplePart2), + std::end(kUnitStreamLikeMediaSamplePart2)); + + unit_stream_like_media_sample.insert( + unit_stream_like_media_sample.end(), + std::begin(kUnitStreamLikeMediaSamplePart3), + std::end(kUnitStreamLikeMediaSamplePart3)); + + std::vector subsamples{ SubsampleEntry(5, 9), + SubsampleEntry(65535, 0), + SubsampleEntry(5, 5), + SubsampleEntry(6, 4)}; + + NalUnitToByteStreamConverter converter; + EXPECT_TRUE( + converter.Initialize(kTestAVCDecoderConfigurationRecord, + arraysize(kTestAVCDecoderConfigurationRecord))); + + std::vector output; + EXPECT_TRUE(converter.ConvertUnitToByteStreamWithSubsamples( + unit_stream_like_media_sample.data(), + unit_stream_like_media_sample.size(), kIsKeyFrame, !kEscapeEncryptedNalu, + &output, &subsamples)); + + const uint8_t kExpectedOutputPart1[] = { + 0x00, 0x00, 0x00, 0x01, // Start code. + 0x09, // AUD type. + 0xF0, // primary pic type is anything. + 0x00, 0x00, 0x00, 0x01, // Start code. + // Some valid SPS data. + 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, + 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, + 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x00, 0x00, 0x00, 0x01, // Start code. + 0x68, 0xFE, 0xFD, 0xFC, 0xFB, 0x11, 0x12, 0x13, 0x14, 0x15, // PPS. + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 1. + 0x06, // NALU type. + 0xFD, 0x78, 0xA4, 0xC3, 0x82, 0x62, 0x11, 0x29, 0x77, + 0x00, 0x00, 0x00, 0x01, // Start code. + // The input NALU 2. + 0x02, // NALU type. + }; + + const std::vector kExpectedOutputPart2 = + kUnitStreamLikeMediaSamplePart2; + + const uint8_t kExpectedOutputPart3[] = { + // Slices data. + 0xFD, 0x78, 0xA4, 0x82, 0x62, // Encrypted. + 0x29, 0x77, 0x27, 0xFD, 0x78, 0xA4, // Clear. + 0xC3, 0x82, 0x62, 0x11, // Encrypted. + }; + + std::vector expected_output(std::begin(kExpectedOutputPart1), + std::end(kExpectedOutputPart1)); + + expected_output.insert(expected_output.end(), + std::begin(kExpectedOutputPart2), + std::end(kExpectedOutputPart2)); + + expected_output.insert(expected_output.end(), + std::begin(kExpectedOutputPart3), + std::end(kExpectedOutputPart3)); + + const std::vector kExpectedOutputSubsamples{ + SubsampleEntry(58, 9), SubsampleEntry(65535, 0), SubsampleEntry(5, 5), + SubsampleEntry(6, 4)}; + + EXPECT_EQ(expected_output, output); + EXPECT_EQ(kExpectedOutputSubsamples, subsamples); +} + } // namespace media } // namespace shaka