From 0803e318366ebf4f463dd25146728bc0e3a7a1f8 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Fri, 26 Feb 2016 14:34:14 -0800 Subject: [PATCH] Read SPS/PPS Nalus from decoder configurations. Now the AVCDecoderConfiguration and HEVCDecoderConfiguration classes will read the Nalu from the blocks, meaning they will store a vector of Nalu objects for the data. Also added H.265 to the Nalu class and added argument to NaluReader to read H.264 vs H.265 Nalus. Change-Id: I8d8194a90fd72a30af66e9776e01379f8428542c --- .../filters/avc_decoder_configuration.cc | 66 ++++++++++----- .../media/filters/avc_decoder_configuration.h | 13 ++- .../avc_decoder_configuration_unittest.cc | 24 ++++-- .../media/filters/decoder_configuration.cc | 26 ++++++ .../media/filters/decoder_configuration.h | 80 +++++++++++++++++++ packager/media/filters/filters.gyp | 2 + .../h264_byte_to_unit_stream_converter.cc | 3 +- .../media/filters/h264_parser_unittest.cc | 3 +- .../filters/hevc_decoder_configuration.cc | 32 ++++++-- .../filters/hevc_decoder_configuration.h | 15 ++-- .../hevc_decoder_configuration_unittest.cc | 48 +++++++---- packager/media/filters/nalu_reader.cc | 44 ++++++++-- packager/media/filters/nalu_reader.h | 38 ++++++++- .../media/filters/nalu_reader_unittest.cc | 26 ++++-- packager/media/formats/mp2t/es_parser_h264.cc | 3 +- .../formats/mp4/encrypting_fragmenter.cc | 5 +- .../media/formats/mp4/mp4_media_parser.cc | 4 +- .../formats/mp4/video_slice_header_parser.cc | 43 +++------- .../mp4/video_slice_header_parser_unittest.cc | 28 ++++--- 19 files changed, 377 insertions(+), 126 deletions(-) create mode 100644 packager/media/filters/decoder_configuration.cc create mode 100644 packager/media/filters/decoder_configuration.h diff --git a/packager/media/filters/avc_decoder_configuration.cc b/packager/media/filters/avc_decoder_configuration.cc index d9a513a784..175a05606a 100644 --- a/packager/media/filters/avc_decoder_configuration.cc +++ b/packager/media/filters/avc_decoder_configuration.cc @@ -19,20 +19,25 @@ AVCDecoderConfiguration::AVCDecoderConfiguration() : version_(0), profile_indication_(0), profile_compatibility_(0), - avc_level_(0), - length_size_(0) {} + avc_level_(0) {} AVCDecoderConfiguration::~AVCDecoderConfiguration() {} -bool AVCDecoderConfiguration::Parse(const std::vector& data) { - BufferReader reader(data.data(), data.size()); +bool AVCDecoderConfiguration::ParseInternal() { + // See ISO 14496-15 sec 5.3.3.1.2 + BufferReader reader(data(), data_size()); + RCHECK(reader.Read1(&version_) && version_ == 1 && reader.Read1(&profile_indication_) && reader.Read1(&profile_compatibility_) && reader.Read1(&avc_level_)); uint8_t length_size_minus_one; RCHECK(reader.Read1(&length_size_minus_one)); - length_size_ = (length_size_minus_one & 0x3) + 1; + if ((length_size_minus_one & 0x3) == 2) { + LOG(ERROR) << "Invalid NALU length size."; + return false; + } + set_nalu_length_size((length_size_minus_one & 0x3) + 1); uint8_t num_sps; RCHECK(reader.Read1(&num_sps)); @@ -42,20 +47,45 @@ bool AVCDecoderConfiguration::Parse(const std::vector& data) { return false; } - uint16_t sps_length = 0; - RCHECK(reader.Read2(&sps_length)); + for (uint8_t i = 0; i < num_sps; i++) { + uint16_t size = 0; + RCHECK(reader.Read2(&size)); + const uint8_t* nalu_data = reader.data() + reader.pos(); + RCHECK(reader.SkipBytes(size)); - H264Parser parser; - int sps_id = 0; - Nalu nalu; - RCHECK(nalu.InitializeFromH264(reader.data() + reader.pos(), sps_length)); - RCHECK(parser.ParseSPS(nalu, &sps_id) == H264Parser::kOk); - return ExtractResolutionFromSps(*parser.GetSPS(sps_id), &coded_width_, - &coded_height_, &pixel_width_, - &pixel_height_); - // It is unlikely to have more than one SPS in practice. Also there's - // no way to change the {coded,pixel}_{width,height} dynamically from - // VideoStreamInfo. So skip the rest (if there are any). + Nalu nalu; + RCHECK(nalu.InitializeFromH264(nalu_data, size)); + RCHECK(nalu.type() == Nalu::H264_SPS); + AddNalu(nalu); + + if (i == 0) { + // It is unlikely to have more than one SPS in practice. Also there's + // no way to change the {coded,pixel}_{width,height} dynamically from + // VideoStreamInfo. + int sps_id = 0; + H264Parser parser; + RCHECK(parser.ParseSPS(nalu, &sps_id) == H264Parser::kOk); + RCHECK(ExtractResolutionFromSps(*parser.GetSPS(sps_id), &coded_width_, + &coded_height_, &pixel_width_, + &pixel_height_)); + } + } + + uint8_t pps_count; + RCHECK(reader.Read1(&pps_count)); + for (uint8_t i = 0; i < pps_count; i++) { + uint16_t size = 0; + RCHECK(reader.Read2(&size)); + const uint8_t* nalu_data = reader.data() + reader.pos(); + RCHECK(reader.SkipBytes(size)); + + Nalu nalu; + RCHECK(nalu.InitializeFromH264(nalu_data, size)); + RCHECK(nalu.type() == Nalu::H264_PPS); + AddNalu(nalu); + } + + return true; } std::string AVCDecoderConfiguration::GetCodecString() const { diff --git a/packager/media/filters/avc_decoder_configuration.h b/packager/media/filters/avc_decoder_configuration.h index 116f62baac..7db36a2798 100644 --- a/packager/media/filters/avc_decoder_configuration.h +++ b/packager/media/filters/avc_decoder_configuration.h @@ -12,19 +12,16 @@ #include #include "packager/base/macros.h" +#include "packager/media/filters/decoder_configuration.h" namespace edash_packager { namespace media { /// Class for parsing AVC decoder configuration. -class AVCDecoderConfiguration { +class AVCDecoderConfiguration : public DecoderConfiguration { public: AVCDecoderConfiguration(); - ~AVCDecoderConfiguration(); - - /// Parses input to extract AVC decoder configuration data. - /// @return false if there is parsing errors. - bool Parse(const std::vector& data); + ~AVCDecoderConfiguration() override; /// @return The codec string. std::string GetCodecString() const; @@ -33,7 +30,6 @@ class AVCDecoderConfiguration { uint8_t profile_indication() const { return profile_indication_; } uint8_t profile_compatibility() const { return profile_compatibility_; } uint8_t avc_level() const { return avc_level_; } - uint8_t length_size() const { return length_size_; } uint32_t coded_width() const { return coded_width_; } uint32_t coded_height() const { return coded_height_; } uint32_t pixel_width() const { return pixel_width_; } @@ -46,11 +42,12 @@ class AVCDecoderConfiguration { uint8_t avc_level); private: + bool ParseInternal() override; + uint8_t version_; uint8_t profile_indication_; uint8_t profile_compatibility_; uint8_t avc_level_; - uint8_t length_size_; // Extracted from SPS. uint32_t coded_width_; diff --git a/packager/media/filters/avc_decoder_configuration_unittest.cc b/packager/media/filters/avc_decoder_configuration_unittest.cc index 038037e981..6544ed6d7b 100644 --- a/packager/media/filters/avc_decoder_configuration_unittest.cc +++ b/packager/media/filters/avc_decoder_configuration_unittest.cc @@ -19,15 +19,14 @@ TEST(AVCDecoderConfigurationTest, Success) { 0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0}; AVCDecoderConfiguration avc_config; - ASSERT_TRUE(avc_config.Parse(std::vector( - kAvcDecoderConfigurationData, - kAvcDecoderConfigurationData + arraysize(kAvcDecoderConfigurationData)))); + ASSERT_TRUE(avc_config.Parse(kAvcDecoderConfigurationData, + arraysize(kAvcDecoderConfigurationData))); EXPECT_EQ(1u, avc_config.version()); EXPECT_EQ(0x64, avc_config.profile_indication()); EXPECT_EQ(0u, avc_config.profile_compatibility()); EXPECT_EQ(0x1E, avc_config.avc_level()); - EXPECT_EQ(4u, avc_config.length_size()); + EXPECT_EQ(4u, avc_config.nalu_length_size()); EXPECT_EQ(720u, avc_config.coded_width()); EXPECT_EQ(360u, avc_config.coded_height()); EXPECT_EQ(8u, avc_config.pixel_width()); @@ -36,13 +35,24 @@ TEST(AVCDecoderConfigurationTest, Success) { EXPECT_EQ("avc1.64001e", avc_config.GetCodecString()); } +TEST(AVCDecoderConfigurationTest, FailsOnInvalidNaluLengthSize) { + const uint8_t kAvcDecoderConfigurationData[] = { + 0x01, 0x64, 0x00, 0x1E, 0xFE, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E, + 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, + 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, + 0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0}; + + AVCDecoderConfiguration avc_config; + ASSERT_FALSE(avc_config.Parse(kAvcDecoderConfigurationData, + arraysize(kAvcDecoderConfigurationData))); +} + TEST(AVCDecoderConfigurationTest, FailOnInsufficientData) { const uint8_t kAvcDecoderConfigurationData[] = {0x01, 0x64, 0x00, 0x1E}; AVCDecoderConfiguration avc_config; - ASSERT_FALSE(avc_config.Parse(std::vector( - kAvcDecoderConfigurationData, - kAvcDecoderConfigurationData + arraysize(kAvcDecoderConfigurationData)))); + ASSERT_FALSE(avc_config.Parse(kAvcDecoderConfigurationData, + arraysize(kAvcDecoderConfigurationData))); } TEST(AVCDecoderConfigurationTest, GetCodecString) { diff --git a/packager/media/filters/decoder_configuration.cc b/packager/media/filters/decoder_configuration.cc new file mode 100644 index 0000000000..70313239e8 --- /dev/null +++ b/packager/media/filters/decoder_configuration.cc @@ -0,0 +1,26 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/filters/decoder_configuration.h" + +namespace edash_packager { +namespace media { + +DecoderConfiguration::DecoderConfiguration() : nalu_length_size_(0) {} +DecoderConfiguration::~DecoderConfiguration() {} + +bool DecoderConfiguration::Parse(const uint8_t* data, size_t data_size) { + data_.assign(data, data + data_size); + nalu_.clear(); + return ParseInternal(); +} + +void DecoderConfiguration::AddNalu(const Nalu& nalu) { + nalu_.push_back(nalu); +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/filters/decoder_configuration.h b/packager/media/filters/decoder_configuration.h new file mode 100644 index 0000000000..cee45f5909 --- /dev/null +++ b/packager/media/filters/decoder_configuration.h @@ -0,0 +1,80 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef MEDIA_FILTERS_DECODER_CONFIGURATION_H_ +#define MEDIA_FILTERS_DECODER_CONFIGURATION_H_ + +#include + +#include "packager/base/logging.h" +#include "packager/base/macros.h" +#include "packager/media/filters/nalu_reader.h" + +namespace edash_packager { +namespace media { + +// Defines a base class for decoder configurations. +class DecoderConfiguration { + public: + virtual ~DecoderConfiguration(); + + /// Parses input to extract decoder configuration data. This will make and + /// store a copy of the data for Nalu access. + /// @return false if there are parsing errors. + bool Parse(const std::vector& data) { + return Parse(data.data(), data.size()); + } + + /// Parses input to extract decoder configuration data. This will make and + /// store a copy of the data for Nalu access. + /// @return false if there are parsing errors. + bool Parse(const uint8_t* data, size_t data_size); + + /// @return The size of the NAL unit length field. + uint8_t nalu_length_size() const { return nalu_length_size_; } + + /// @return The number of Nalu in the configuration. + size_t nalu_count() const { return nalu_.size(); } + + /// @return The nalu at the given index. The Nalu is only valid for the + /// lifetime of this object, even if copied. + const Nalu& nalu(size_t i) const { return nalu_[i]; } + + protected: + DecoderConfiguration(); + + /// Adds the given Nalu to the configuration. + void AddNalu(const Nalu& nalu); + + /// @return a pointer to the copy of the data. + const uint8_t* data() const { return data_.data(); } + + /// @return the size of the copy of the data. + size_t data_size() const { return data_.size(); } + + /// Sets the size of the NAL unit length field. + void set_nalu_length_size(uint8_t nalu_length_size) { + DCHECK(nalu_length_size <= 2 || nalu_length_size == 4); + nalu_length_size_ = nalu_length_size; + } + + private: + // Performs the actual parsing of the data. + virtual bool ParseInternal() = 0; + + // Contains a copy of the data. This manages the pointer lifetime so the + // extracted Nalu can accessed. + std::vector data_; + std::vector nalu_; + uint8_t nalu_length_size_; + + DISALLOW_COPY_AND_ASSIGN(DecoderConfiguration); +}; + +} // namespace media +} // namespace edash_packager + +#endif // MEDIA_FILTERS_DECODER_CONFIGURATION_H_ diff --git a/packager/media/filters/filters.gyp b/packager/media/filters/filters.gyp index 75ca804dad..6ab721fe15 100644 --- a/packager/media/filters/filters.gyp +++ b/packager/media/filters/filters.gyp @@ -15,6 +15,8 @@ 'sources': [ 'avc_decoder_configuration.cc', 'avc_decoder_configuration.h', + 'decoder_configuration.cc', + 'decoder_configuration.h', 'ec3_audio_util.cc', 'ec3_audio_util.h', 'hevc_decoder_configuration.cc', diff --git a/packager/media/filters/h264_byte_to_unit_stream_converter.cc b/packager/media/filters/h264_byte_to_unit_stream_converter.cc index 34127faa86..7be8bb1688 100644 --- a/packager/media/filters/h264_byte_to_unit_stream_converter.cc +++ b/packager/media/filters/h264_byte_to_unit_stream_converter.cc @@ -35,7 +35,8 @@ bool H264ByteToUnitStreamConverter::ConvertByteStreamToNalUnitStream( BufferWriter output_buffer(input_frame_size + kStreamConversionOverhead); Nalu nalu; - NaluReader reader(kIsAnnexbByteStream, input_frame, input_frame_size); + NaluReader reader(NaluReader::kH264, kIsAnnexbByteStream, input_frame, + input_frame_size); if (!reader.StartsWithStartCode()) { LOG(ERROR) << "H.264 byte stream frame did not begin with start code."; return false; diff --git a/packager/media/filters/h264_parser_unittest.cc b/packager/media/filters/h264_parser_unittest.cc index a6b864d328..d15313890a 100644 --- a/packager/media/filters/h264_parser_unittest.cc +++ b/packager/media/filters/h264_parser_unittest.cc @@ -18,7 +18,8 @@ TEST(H264ParserTest, StreamFileParsing) { int num_nalus = 759; H264Parser parser; - NaluReader reader(kIsAnnexbByteStream, buffer.data(), buffer.size()); + NaluReader reader(NaluReader::kH264, kIsAnnexbByteStream, buffer.data(), + buffer.size()); // Parse until the end of stream/unsupported stream/error in stream is found. int num_parsed_nalus = 0; diff --git a/packager/media/filters/hevc_decoder_configuration.cc b/packager/media/filters/hevc_decoder_configuration.cc index 33d2c613c3..3a8624e229 100644 --- a/packager/media/filters/hevc_decoder_configuration.cc +++ b/packager/media/filters/hevc_decoder_configuration.cc @@ -79,13 +79,12 @@ HEVCDecoderConfiguration::HEVCDecoderConfiguration() general_tier_flag_(false), general_profile_idc_(0), general_profile_compatibility_flags_(0), - general_level_idc_(0), - length_size_(0) {} + general_level_idc_(0) {} HEVCDecoderConfiguration::~HEVCDecoderConfiguration() {} -bool HEVCDecoderConfiguration::Parse(const std::vector& data) { - BufferReader reader(data.data(), data.size()); +bool HEVCDecoderConfiguration::ParseInternal() { + BufferReader reader(data(), data_size()); uint8_t profile_indication = 0; uint8_t length_size_minus_one = 0; @@ -104,7 +103,30 @@ bool HEVCDecoderConfiguration::Parse(const std::vector& data) { general_tier_flag_ = ((profile_indication >> 5) & 1) == 1; general_profile_idc_ = profile_indication & 0x1f; - length_size_ = (length_size_minus_one & 0x3) + 1; + if ((length_size_minus_one & 0x3) == 2) { + LOG(ERROR) << "Invalid NALU length size."; + return false; + } + set_nalu_length_size((length_size_minus_one & 0x3) + 1); + + for (int i = 0; i < num_of_arrays; i++) { + uint8_t nal_unit_type; + uint16_t num_nalus; + RCHECK(reader.Read1(&nal_unit_type)); + nal_unit_type &= 0x3f; + RCHECK(reader.Read2(&num_nalus)); + for (int j = 0; j < num_nalus; j++) { + uint16_t nalu_length; + RCHECK(reader.Read2(&nalu_length)); + uint64_t nalu_offset = reader.pos(); + RCHECK(reader.SkipBytes(nalu_length)); + + Nalu nalu; + RCHECK(nalu.InitializeFromH265(data() + nalu_offset, nalu_length)); + RCHECK(nalu.type() == nal_unit_type); + AddNalu(nalu); + } + } // TODO(kqyang): Parse SPS to get resolutions. return true; diff --git a/packager/media/filters/hevc_decoder_configuration.h b/packager/media/filters/hevc_decoder_configuration.h index 2783c86f2e..0b111ebf40 100644 --- a/packager/media/filters/hevc_decoder_configuration.h +++ b/packager/media/filters/hevc_decoder_configuration.h @@ -13,27 +13,23 @@ #include "packager/base/macros.h" #include "packager/media/base/video_stream_info.h" +#include "packager/media/filters/decoder_configuration.h" namespace edash_packager { namespace media { /// Class for parsing HEVC decoder configuration. -class HEVCDecoderConfiguration { +class HEVCDecoderConfiguration : public DecoderConfiguration { public: HEVCDecoderConfiguration(); - ~HEVCDecoderConfiguration(); - - /// Parses input to extract HEVC decoder configuration data. - /// @return false if there is parsing errors. - bool Parse(const std::vector& data); + ~HEVCDecoderConfiguration() override; /// @return The codec string. std::string GetCodecString(VideoCodec codec) const; - /// @return The size of the NAL unit length field. - uint8_t length_size() { return length_size_; } - private: + bool ParseInternal() override; + uint8_t version_; uint8_t general_profile_space_; bool general_tier_flag_; @@ -41,7 +37,6 @@ class HEVCDecoderConfiguration { uint32_t general_profile_compatibility_flags_; std::vector general_constraint_indicator_flags_; uint8_t general_level_idc_; - uint8_t length_size_; DISALLOW_COPY_AND_ASSIGN(HEVCDecoderConfiguration); }; diff --git a/packager/media/filters/hevc_decoder_configuration_unittest.cc b/packager/media/filters/hevc_decoder_configuration_unittest.cc index d0f2810644..4860c5ed8c 100644 --- a/packager/media/filters/hevc_decoder_configuration_unittest.cc +++ b/packager/media/filters/hevc_decoder_configuration_unittest.cc @@ -13,34 +13,52 @@ namespace media { TEST(HEVCDecoderConfigurationTest, Success) { const uint8_t kHevcDecoderConfigurationData[] = { - 0x01, 0x02, 0x20, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x3F, 0xF0, 0x00, 0xFC, 0xFD, 0xFA, 0xFA, 0x00, 0x00, 0x0F, 0x04, 0x20, - 0x00, 0x01, 0x00, 0x18, 0x40, 0x01, 0x0C, 0x01, 0xFF, 0xFF, 0x02, 0x20, - 0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, - 0x3F, 0x99, 0x98, 0x09, 0x21, 0x00, 0x01, 0x00, 0x29, 0x42, 0x01, 0x01, - 0x02, 0x20, 0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x01, // Version + 0x02, // profile_indication + 0x20, 0x00, 0x00, 0x00, // general_profile_compatibility_flags + // general_constraint_indicator_flags + 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3F, // general_level_idc + 0xF0, 0x00, 0xFC, 0xFD, 0xFA, 0xFA, 0x00, 0x00, + 0x0F, // length_size_minus_one + 0x02, // num_of_arrays + // array 1 + 0x20, // nal type + 0x00, 0x01, // num nalus + // nalu 1 + 0x00, 0x18, // nal unit length + 0x40, 0x01, 0x0C, 0x01, 0xFF, 0xFF, 0x02, 0x20, + 0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, + 0x00, 0x3F, 0x99, 0x98, 0x09, + // array 2 + 0x21, // nal type + 0x00, 0x01, // num nalus + // Nalu 1 + 0x00, 0x0f, // nal unit length + 0x42, 0x01, 0x01, 0x02, 0x20, 0x00, 0x00, 0x03, 0x00, 0x90, + 0x00, 0x00, 0x03, 0x00, 0x00, }; HEVCDecoderConfiguration hevc_config; - ASSERT_TRUE(hevc_config.Parse( - std::vector(kHevcDecoderConfigurationData, - kHevcDecoderConfigurationData + - arraysize(kHevcDecoderConfigurationData)))); + ASSERT_TRUE(hevc_config.Parse(kHevcDecoderConfigurationData, + arraysize(kHevcDecoderConfigurationData))); - EXPECT_EQ(4u, hevc_config.length_size()); + EXPECT_EQ(4u, hevc_config.nalu_length_size()); EXPECT_EQ("hev1.2.4.L63.90", hevc_config.GetCodecString(kCodecHEV1)); EXPECT_EQ("hvc1.2.4.L63.90", hevc_config.GetCodecString(kCodecHVC1)); + + EXPECT_EQ(2u, hevc_config.nalu_count()); + EXPECT_EQ(0x16u, hevc_config.nalu(0).payload_size()); + EXPECT_EQ(0x40, hevc_config.nalu(0).data()[0]); } TEST(HEVCDecoderConfigurationTest, FailOnInsufficientData) { const uint8_t kHevcDecoderConfigurationData[] = {0x01, 0x02, 0x20, 0x00}; HEVCDecoderConfiguration hevc_config; - ASSERT_FALSE(hevc_config.Parse( - std::vector(kHevcDecoderConfigurationData, - kHevcDecoderConfigurationData + - arraysize(kHevcDecoderConfigurationData)))); + ASSERT_FALSE(hevc_config.Parse(kHevcDecoderConfigurationData, + arraysize(kHevcDecoderConfigurationData))); } } // namespace media diff --git a/packager/media/filters/nalu_reader.cc b/packager/media/filters/nalu_reader.cc index 54cc8e0947..806ede9a54 100644 --- a/packager/media/filters/nalu_reader.cc +++ b/packager/media/filters/nalu_reader.cc @@ -24,19 +24,22 @@ Nalu::Nalu() header_size_(0), payload_size_(0), ref_idc_(0), + nuh_layer_id_(0), + nuh_temporal_id_(0), type_(0), is_video_slice_(false) {} bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) { DCHECK(data); - DCHECK_GT(size, 0u); + if (size == 0) + return false; uint8_t header = data[0]; if ((header & 0x80) != 0) return false; data_ = data; header_size_ = 1; - payload_size_ = size - 1; + payload_size_ = size - header_size_; ref_idc_ = (header >> 5) & 0x3; type_ = header & 0x1F; is_video_slice_ = (type_ >= Nalu::H264_NonIDRSlice && @@ -44,11 +47,36 @@ bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) { return true; } -NaluReader::NaluReader(uint8_t nal_length_size, +bool Nalu::InitializeFromH265(const uint8_t* data, uint64_t size) { + DCHECK(data); + if (size < 2) + return false; + uint16_t header = (data[0] << 8) | data[1]; + if ((header & 0x8000) != 0) + return false; + + data_ = data; + header_size_ = 2; + payload_size_ = size - header_size_; + + type_ = (header >> 9) & 0x3F; + nuh_layer_id_ = (header >> 3) & 0x3F; + nuh_temporal_id_ = (header & 0x7) - 1; + + // Don't treat reserved VCL types as video slices since we cannot parse them. + is_video_slice_ = + (type_ >= Nalu::H265_TRAIL_N && type_ <= Nalu::H265_RASL_R) || + (type_ >= Nalu::H265_BLA_W_LP && type_ <= Nalu::H265_CRA_NUT); + return true; +} + +NaluReader::NaluReader(NaluType type, + uint8_t nal_length_size, const uint8_t* stream, uint64_t stream_size) : stream_(stream), stream_size_(stream_size), + nalu_type_(type), nalu_length_size_(nal_length_size), format_(nal_length_size == 0 ? kAnnexbByteStreamFormat : kNalUnitStreamFormat) { @@ -93,8 +121,14 @@ NaluReader::Result NaluReader::Advance(Nalu* nalu) { } const uint8_t* nalu_data = stream_ + nalu_length_size_or_start_code_size; - if (!nalu->InitializeFromH264(nalu_data, nalu_length)) - return NaluReader::kInvalidStream; + if (nalu_type_ == kH264) { + if (!nalu->InitializeFromH264(nalu_data, nalu_length)) + return NaluReader::kInvalidStream; + } else { + DCHECK_EQ(kH265, nalu_type_); + if (!nalu->InitializeFromH265(nalu_data, nalu_length)) + return NaluReader::kInvalidStream; + } // Move parser state to after this NALU, so next time Advance // is called, we will effectively be skipping it. diff --git a/packager/media/filters/nalu_reader.h b/packager/media/filters/nalu_reader.h index 43606ebd21..58125a3416 100644 --- a/packager/media/filters/nalu_reader.h +++ b/packager/media/filters/nalu_reader.h @@ -36,17 +36,41 @@ class Nalu { H264_EOSeq = 10, H264_CodedSliceExtension = 20, }; + enum H265NaluType { + H265_TRAIL_N = 0, + H265_RASL_R = 9, + + H265_BLA_W_LP = 16, + H265_IDR_W_RADL = 19, + H265_IDR_N_LP = 20, + H265_CRA_NUT = 21, + H265_RSV_IRAP_VCL23 = 23, + + H265_VPS = 32, + H265_SPS = 33, + H265_PPS = 34, + H265_AUD = 35, + }; Nalu(); bool InitializeFromH264(const uint8_t* data, uint64_t size) WARN_UNUSED_RESULT; + bool InitializeFromH265(const uint8_t* data, + uint64_t size) WARN_UNUSED_RESULT; + const uint8_t* data() const { return data_; } uint64_t header_size() const { return header_size_; } uint64_t payload_size() const { return payload_size_; } + // H.264 Specific: int ref_idc() const { return ref_idc_; } + + // H.265 Specific: + int nuh_layer_id() const { return nuh_layer_id_; } + int nuh_temporal_id() const { return nuh_temporal_id_; } + int type() const { return type_; } bool is_video_slice() const { return is_video_slice_; } @@ -61,10 +85,13 @@ class Nalu { uint64_t payload_size_; int ref_idc_; + int nuh_layer_id_; + int nuh_temporal_id_; int type_; bool is_video_slice_; - DISALLOW_COPY_AND_ASSIGN(Nalu); + // Don't use DISALLOW_COPY_AND_ASSIGN since it is just numbers and a pointer + // it does not own. This allows Nalus to be stored in a vector. }; /// Helper class used to read NAL units based on several formats: @@ -77,11 +104,16 @@ class NaluReader { kInvalidStream, // error in stream kEOStream, // end of stream }; + enum NaluType { + kH264, + kH265, + }; /// @param nalu_length_size should be set to 0 for AnnexB byte streams; /// otherwise, it indicates the size of NAL unit length for the NAL /// unit stream. - NaluReader(uint8_t nal_length_size, + NaluReader(NaluType type, + uint8_t nal_length_size, const uint8_t* stream, uint64_t stream_size); ~NaluReader(); @@ -129,6 +161,8 @@ class NaluReader { const uint8_t* stream_; // The remaining size of the stream. uint64_t stream_size_; + // The type of NALU being read. + NaluType nalu_type_; // The number of bytes the prefix length is; only valid if format is // kAnnexbByteStreamFormat. uint8_t nalu_length_size_; diff --git a/packager/media/filters/nalu_reader_unittest.cc b/packager/media/filters/nalu_reader_unittest.cc index 7552dcf7d4..0e70f3f20a 100644 --- a/packager/media/filters/nalu_reader_unittest.cc +++ b/packager/media/filters/nalu_reader_unittest.cc @@ -20,7 +20,8 @@ TEST(NaluReaderTest, StartCodeSearch) { 0x00, 0x00, 0x00, 0x01, 0x67, 0xbb, 0xcc, 0xdd }; - NaluReader reader(kIsAnnexbByteStream, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, kIsAnnexbByteStream, kNaluData, + arraysize(kNaluData)); Nalu nalu; ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); @@ -48,7 +49,7 @@ TEST(NaluReaderTest, OneByteNaluLength) { 0x06, 0x67, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e }; - NaluReader reader(1, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, 1, kNaluData, arraysize(kNaluData)); Nalu nalu; ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); @@ -76,7 +77,7 @@ TEST(NaluReaderTest, FourByteNaluLength) { 0x00, 0x00, 0x00, 0x03, 0x67, 0x0a, 0x0b }; - NaluReader reader(4, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, 4, kNaluData, arraysize(kNaluData)); Nalu nalu; ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); @@ -102,7 +103,7 @@ TEST(NaluReaderTest, ErrorForNotEnoughForNaluLength) { 0x00 }; - NaluReader reader(3, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, 3, kNaluData, arraysize(kNaluData)); Nalu nalu; EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); @@ -114,7 +115,7 @@ TEST(NaluReaderTest, ErrorForNaluLengthExceedsRemainingData) { 0xFF, 0x08, 0x00 }; - NaluReader reader(1, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, 1, kNaluData, arraysize(kNaluData)); Nalu nalu; EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); @@ -125,7 +126,7 @@ TEST(NaluReaderTest, ErrorForNaluLengthExceedsRemainingData) { 0x04, 0x08, 0x00, 0x00 }; - NaluReader reader2(1, kNaluData2, arraysize(kNaluData2)); + NaluReader reader2(NaluReader::kH264, 1, kNaluData2, arraysize(kNaluData2)); EXPECT_EQ(NaluReader::kInvalidStream, reader2.Advance(&nalu)); } @@ -135,11 +136,22 @@ TEST(NaluReaderTest, ErrorForForbiddenBitSet) { 0x03, 0x80, 0x00, 0x00 }; - NaluReader reader(1, kNaluData, arraysize(kNaluData)); + NaluReader reader(NaluReader::kH264, 1, kNaluData, arraysize(kNaluData)); Nalu nalu; EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); } +TEST(NaluReaderTest, ErrorForZeroSize) { + const uint8_t kNaluData[] = { + // First NALU + 0x03, 0x80, 0x00, 0x00 + }; + + Nalu nalu; + EXPECT_FALSE(nalu.InitializeFromH264(kNaluData, 0)); + EXPECT_FALSE(nalu.InitializeFromH265(kNaluData, 0)); +} + } // namespace media } // namespace edash_packager diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index fcb295eb36..fabe114299 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -182,7 +182,8 @@ bool EsParserH264::ParseInternal() { int access_unit_size = base::checked_cast( next_access_unit_pos_ - current_access_unit_pos_); DCHECK_LE(access_unit_size, size); - NaluReader reader(kIsAnnexbByteStream, es, access_unit_size); + NaluReader reader(NaluReader::kH264, kIsAnnexbByteStream, es, + access_unit_size); while (true) { Nalu nalu; diff --git a/packager/media/formats/mp4/encrypting_fragmenter.cc b/packager/media/formats/mp4/encrypting_fragmenter.cc index 0f42c35379..1138da290b 100644 --- a/packager/media/formats/mp4/encrypting_fragmenter.cc +++ b/packager/media/formats/mp4/encrypting_fragmenter.cc @@ -223,7 +223,10 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr sample) { data += frame.frame_size; } } else { - NaluReader reader(nalu_length_size_, data, sample->data_size()); + // TODO(modmaker): Support H.265. + const NaluReader::NaluType nalu_type = NaluReader::kH264; + NaluReader reader(nalu_type, nalu_length_size_, data, + sample->data_size()); // Store the current length of clear data. This is used to squash // multiple unencrypted NAL units into fewer subsample entries. diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 285ab22f16..ffe5cc294b 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -477,7 +477,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { return false; } codec_string = avc_config.GetCodecString(); - nalu_length_size = avc_config.length_size(); + nalu_length_size = avc_config.nalu_length_size(); if (coded_width != avc_config.coded_width() || coded_height != avc_config.coded_height()) { @@ -514,7 +514,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { return false; } codec_string = hevc_config.GetCodecString(video_codec); - nalu_length_size = hevc_config.length_size(); + nalu_length_size = hevc_config.nalu_length_size(); break; } case FOURCC_VP08: diff --git a/packager/media/formats/mp4/video_slice_header_parser.cc b/packager/media/formats/mp4/video_slice_header_parser.cc index e8f9050e57..0cc614c4b5 100644 --- a/packager/media/formats/mp4/video_slice_header_parser.cc +++ b/packager/media/formats/mp4/video_slice_header_parser.cc @@ -6,8 +6,8 @@ #include "packager/media/formats/mp4/video_slice_header_parser.h" +#include "packager/media/filters/avc_decoder_configuration.h" #include "packager/media/formats/mp4/rcheck.h" -#include "packager/media/base/buffer_reader.h" namespace edash_packager { namespace media { @@ -18,39 +18,18 @@ H264VideoSliceHeaderParser::~H264VideoSliceHeaderParser() {} bool H264VideoSliceHeaderParser::Initialize( const std::vector& decoder_configuration) { - // See ISO 14496-15 sec 5.3.3.1.2 - BufferReader reader(decoder_configuration.data(), - decoder_configuration.size()); - RCHECK(reader.SkipBytes(5)); - - uint8_t sps_count; - RCHECK(reader.Read1(&sps_count)); - sps_count = sps_count & 0x1f; - - for (size_t i = 0; i < sps_count; i++) { - uint16_t size; - RCHECK(reader.Read2(&size)); - const uint8_t* data = reader.data() + reader.pos(); - RCHECK(reader.SkipBytes(size)); + AVCDecoderConfiguration config; + RCHECK(config.Parse(decoder_configuration)); + for (size_t i = 0; i < config.nalu_count(); i++) { int id; - Nalu nalu; - RCHECK(nalu.InitializeFromH264(data, size)); - RCHECK(parser_.ParseSPS(nalu, &id) == H264Parser::kOk); - } - - uint8_t pps_count; - RCHECK(reader.Read1(&pps_count)); - for (size_t i = 0; i < pps_count; i++) { - uint16_t size; - RCHECK(reader.Read2(&size)); - const uint8_t* data = reader.data() + reader.pos(); - RCHECK(reader.SkipBytes(size)); - - int id; - Nalu nalu; - RCHECK(nalu.InitializeFromH264(data, size)); - RCHECK(parser_.ParsePPS(nalu, &id) == H264Parser::kOk); + const Nalu& nalu = config.nalu(i); + if (nalu.type() == Nalu::H264_SPS) { + RCHECK(parser_.ParseSPS(nalu, &id) == H264Parser::kOk); + } else { + DCHECK_EQ(Nalu::H264_PPS, nalu.type()); + RCHECK(parser_.ParsePPS(nalu, &id) == H264Parser::kOk); + } } return true; diff --git a/packager/media/formats/mp4/video_slice_header_parser_unittest.cc b/packager/media/formats/mp4/video_slice_header_parser_unittest.cc index cd541b8080..1f1c07dfbd 100644 --- a/packager/media/formats/mp4/video_slice_header_parser_unittest.cc +++ b/packager/media/formats/mp4/video_slice_header_parser_unittest.cc @@ -15,7 +15,7 @@ namespace mp4 { TEST(H264VideoSliceHeaderParserTest, BasicSupport) { // Taken from bear-640x360.mp4 (video) const uint8_t kExtraData[] = { - // Header (ignored) + // Header 0x01, 0x64, 0x00, 0x1e, 0xff, // SPS count (ignore top three bits) 0xe1, @@ -50,8 +50,8 @@ TEST(H264VideoSliceHeaderParserTest, BasicSupport) { TEST(H264VideoSliceHeaderParserTest, SupportsMultipleEntriesInExtraData) { const uint8_t kExtraData[] = { - // Header (ignored) - 0xfe, 0xed, 0xf0, 0x0d, 0x00, + // Header + 0x01, 0xed, 0xf0, 0x0d, 0x00, // SPS count (ignore top three bits) 0xe3, // SPS @@ -93,10 +93,16 @@ TEST(H264VideoSliceHeaderParserTest, SupportsMultipleEntriesInExtraData) { TEST(H264VideoSliceHeaderParserTest, IgnoresExtraDataAtEnd) { const uint8_t kExtraData[] = { - // Header (ignored) - 0xfe, 0xed, 0xf0, 0x0d, 0x00, - // SPS count - 0x00, + // Header + 0x01, 0xed, 0xf0, 0x0d, 0x00, + // SPS count (ignore top three bits) + 0xe1, + // SPS + 0x00, 0x19, // Size + 0x67, 0x64, 0x00, 0x1e, 0xac, 0xd9, 0x40, 0xa0, + 0x2f, 0xf9, 0x70, 0x11, 0x00, 0x00, 0x03, 0x03, + 0xe9, 0x00, 0x00, 0xea, 0x60, 0x0f, 0x16, 0x2d, + 0x96, // PPS count 0x00, // Extra data @@ -111,8 +117,8 @@ TEST(H264VideoSliceHeaderParserTest, IgnoresExtraDataAtEnd) { TEST(H264VideoSliceHeaderParserTest, ErrorsForEOSAfterEntry) { const uint8_t kExtraData[] = { - // Header (ignored) - 0xfe, 0xed, 0xf0, 0x0d, 0x00, + // Header + 0x01, 0xed, 0xf0, 0x0d, 0x00, // SPS count (ignore top three bits) 0xe3, // SPS @@ -131,8 +137,8 @@ TEST(H264VideoSliceHeaderParserTest, ErrorsForEOSAfterEntry) { TEST(H264VideoSliceHeaderParserTest, ErrorsForEOSWithinEntry) { const uint8_t kExtraData[] = { - // Header (ignored) - 0xfe, 0xed, 0xf0, 0x0d, 0x00, + // Header + 0x01, 0xed, 0xf0, 0x0d, 0x00, // SPS count (ignore top three bits) 0xe3, // SPS