From d850befb7243902cac2a1d842213a20e75921676 Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Fri, 4 Jan 2019 15:49:57 -0800 Subject: [PATCH] Refactor and Cleanup Descriptor classes - Define BaseDescriptor and generic read / write operations. - Define descriptors: ESDescriptor, DecoderConfigDescriptor, DecoderSpecificInfoDescriptor, SLConfigDescriptor. DecoderSpecificInfoDescriptor and all other descriptors can now handle arbitrary length size, not limiting to 64 byte for DecoderSpecificInfoDescriptor, which was placed to limit ESDescriptor length size to one byte. - Now DecoderConfigDescriptor is able to handle reading and writing of all fields including buffer_size_db, which was not handled earlier. Fixes #536. Change-Id: Ia8a775f8bf6e90e3343a85f0e643bc44cd017c7a --- packager/media/codecs/es_descriptor.cc | 243 ++++++++++-------- packager/media/codecs/es_descriptor.h | 167 +++++++++--- .../media/codecs/es_descriptor_unittest.cc | 188 ++++++++++++-- packager/media/formats/mp4/box_definitions.cc | 10 +- .../formats/mp4/box_definitions_comparison.h | 19 +- .../formats/mp4/box_definitions_unittest.cc | 7 +- .../media/formats/mp4/mp4_media_parser.cc | 17 +- packager/media/formats/mp4/mp4_muxer.cc | 14 +- .../media/formats/wvm/wvm_media_parser.cc | 4 +- 9 files changed, 485 insertions(+), 184 deletions(-) diff --git a/packager/media/codecs/es_descriptor.cc b/packager/media/codecs/es_descriptor.cc index 95afc2c82c..7d385a5f83 100644 --- a/packager/media/codecs/es_descriptor.cc +++ b/packager/media/codecs/es_descriptor.cc @@ -36,7 +36,7 @@ enum SLPredefinedTags { // The elementary stream size is specific by up to 4 bytes. // The MSB of a byte indicates if there are more bytes for the size. -bool ReadESSize(BitReader* reader, uint32_t* size) { +bool ReadDescriptorSize(BitReader* reader, size_t* size) { uint8_t msb; uint8_t byte; @@ -54,147 +54,180 @@ bool ReadESSize(BitReader* reader, uint32_t* size) { return true; } -// Descryptor Header Size: 1 byte tag and 1 byte size (we don't support -// multi-bytes size for now). -const size_t kHeaderSize = 2; -const size_t kMaxDecoderSpecificInfoSize = 64; -const uint32_t kUnknownBitrate = 0; -const size_t kBitsInByte = 8; +void WriteDescriptorSize(size_t size, BufferWriter* writer) { + std::vector size_bytes; + while (size > 0) { + uint8_t byte = (size & 0x7F); + size >>= 7; + if (!size_bytes.empty()) + byte |= 0x80; + size_bytes.push_back(byte); + } + for (auto iter = size_bytes.rbegin(); iter != size_bytes.rend(); iter++) + writer->AppendInt(*iter); +} + +size_t CountDescriptorSize(size_t size) { + size_t num_bytes = 0; + while (size > 0) { + num_bytes++; + size >>= 7; + } + return num_bytes; +} } // namespace -ESDescriptor::ESDescriptor() - : esid_(0), - object_type_(ObjectType::kForbidden), - max_bitrate_(kUnknownBitrate), - avg_bitrate_(kUnknownBitrate) {} +bool BaseDescriptor::Parse(const std::vector& data) { + BitReader reader(data.data(), data.size()); + return Read(&reader); +} -ESDescriptor::~ESDescriptor() {} - -bool ESDescriptor::Parse(const std::vector& data) { - BitReader reader(&data[0], data.size()); +bool BaseDescriptor::Read(BitReader* reader) { uint8_t tag; - uint32_t size; - uint8_t stream_dependency_flag; - uint8_t url_flag; - uint8_t ocr_stream_flag; - uint16_t dummy; + RCHECK(reader->ReadBits(8, &tag)); + if (tag != static_cast(tag_)) { + LOG(ERROR) << "Expecting tag " << static_cast(tag_) << ", but seeing " + << static_cast(tag); + return false; + } + RCHECK(ReadDescriptorSize(reader, &data_size_)); + return ReadData(reader); +} - RCHECK(reader.ReadBits(8, &tag)); - RCHECK(tag == kESDescrTag); - RCHECK(ReadESSize(&reader, &size)); +void BaseDescriptor::Write(BufferWriter* writer) { + // Compute and update descriptor size. + size_t size = ComputeSize(); + size_t buffer_size_before_write = writer->Size(); - RCHECK(reader.ReadBits(16, &esid_)); // ES_ID - RCHECK(reader.ReadBits(1, &stream_dependency_flag)); - RCHECK(reader.ReadBits(1, &url_flag)); - RCHECK(!url_flag); // We don't support url flag - RCHECK(reader.ReadBits(1, &ocr_stream_flag)); - RCHECK(reader.ReadBits(5, &dummy)); // streamPriority + WriteInternal(writer); - if (stream_dependency_flag) - RCHECK(reader.ReadBits(16, &dummy)); // dependsOn_ES_ID - if (ocr_stream_flag) - RCHECK(reader.ReadBits(16, &dummy)); // OCR_ES_Id + DCHECK_EQ(size, writer->Size() - buffer_size_before_write); +} - RCHECK(ParseDecoderConfigDescriptor(&reader)); +size_t BaseDescriptor::ComputeSize() { + data_size_ = ComputeDataSize(); + return 1 + CountDescriptorSize(data_size_) + data_size_; +} +void BaseDescriptor::WriteHeader(BufferWriter* writer) { + writer->AppendInt(static_cast(tag_)); + WriteDescriptorSize(data_size_, writer); +} + +bool DecoderSpecificInfoDescriptor::ReadData(BitReader* reader) { + data_.resize(data_size()); + for (uint8_t& data_entry : data_) + RCHECK(reader->ReadBits(8, &data_entry)); return true; } -bool ESDescriptor::ParseDecoderConfigDescriptor(BitReader* reader) { - uint8_t tag; - uint32_t size; - uint32_t dummy; +void DecoderSpecificInfoDescriptor::WriteInternal(BufferWriter* writer) { + WriteHeader(writer); + writer->AppendVector(data_); +} - RCHECK(reader->ReadBits(8, &tag)); - RCHECK(tag == kDecoderConfigDescrTag); - RCHECK(ReadESSize(reader, &size)); +size_t DecoderSpecificInfoDescriptor::ComputeDataSize() { + return data_.size(); +} +bool DecoderConfigDescriptor::ReadData(BitReader* reader) { const size_t start_pos = reader->bit_position(); RCHECK(reader->ReadBits(8, &object_type_)); - RCHECK(reader->ReadBits(32, &dummy)); + + int stream_type; + RCHECK(reader->ReadBits(6, &stream_type)); + if (stream_type != kAudioStreamType) { + LOG(ERROR) << "Seeing non audio stream type " << stream_type; + return false; + } + + RCHECK(reader->SkipBits(2)); // Skip |upStream| and |reserved|. + RCHECK(reader->ReadBits(24, &buffer_size_db_)); RCHECK(reader->ReadBits(32, &max_bitrate_)); RCHECK(reader->ReadBits(32, &avg_bitrate_)); const size_t fields_bits = reader->bit_position() - start_pos; - const bool has_child_tags = size * kBitsInByte > fields_bits; + const size_t kBitsInByte = 8; + const bool has_child_tags = data_size() * kBitsInByte > fields_bits; + decoder_specific_info_descriptor_ = DecoderSpecificInfoDescriptor(); if (has_child_tags) - RCHECK(ParseDecoderSpecificInfo(reader)); + RCHECK(decoder_specific_info_descriptor_.Read(reader)); return true; } -bool ESDescriptor::ParseDecoderSpecificInfo(BitReader* reader) { - DCHECK(reader); - uint8_t tag; - uint32_t size; - - RCHECK(reader->ReadBits(8, &tag)); - RCHECK(tag == kDecoderSpecificInfoTag); - RCHECK(ReadESSize(reader, &size)); - - decoder_specific_info_.resize(size); - for (uint32_t i = 0; i < size; ++i) - RCHECK(reader->ReadBits(8, &decoder_specific_info_[i])); - return true; -} - -void ESDescriptor::Write(BufferWriter* writer) const { - DCHECK(writer); - CHECK_LT(decoder_specific_info_.size(), kMaxDecoderSpecificInfoSize); - - const std::vector kEmptyDecodingBufferSize(3, 0); - const uint8_t kNoEsFlags = 0; - - const uint8_t decoder_specific_info_size = - static_cast(decoder_specific_info_.size()); +void DecoderConfigDescriptor::WriteInternal(BufferWriter* writer) { + WriteHeader(writer); + writer->AppendInt(static_cast(object_type_)); // 6 bit stream type. The last bit is reserved with 1. const uint8_t stream_type = (kAudioStreamType << 2) | 1; - const uint8_t decoder_config_size = - static_cast(decoder_specific_info_size + kHeaderSize + - sizeof(uint8_t) + // object_type_. - sizeof(stream_type) + - kEmptyDecodingBufferSize.size() + - sizeof(kUnknownBitrate) * 2); - - const uint8_t sl_config_size = sizeof(uint8_t); // predefined. - const uint8_t es_size = decoder_config_size + kHeaderSize + sl_config_size + - kHeaderSize + sizeof(esid_) + sizeof(kNoEsFlags); - - writer->AppendInt(static_cast(kESDescrTag)); - writer->AppendInt(es_size); - writer->AppendInt(esid_); - writer->AppendInt(kNoEsFlags); - - writer->AppendInt(static_cast(kDecoderConfigDescrTag)); - writer->AppendInt(decoder_config_size); - writer->AppendInt(static_cast(object_type_)); writer->AppendInt(stream_type); - writer->AppendVector(kEmptyDecodingBufferSize); + writer->AppendNBytes(buffer_size_db_, 3); writer->AppendInt(max_bitrate_); writer->AppendInt(avg_bitrate_); + decoder_specific_info_descriptor_.Write(writer); +} - writer->AppendInt(static_cast(kDecoderSpecificInfoTag)); - writer->AppendInt(decoder_specific_info_size); - writer->AppendVector(decoder_specific_info_); +size_t DecoderConfigDescriptor::ComputeDataSize() { + // object_type (1 byte), stream_type (1 byte), decoding_buffer_size (3 bytes), + // max_bitrate (4 bytes), avg_bitrate (4 bytes). + const size_t data_size_without_children = 1 + 1 + 3 + 4 + 4; + return data_size_without_children + + decoder_specific_info_descriptor_.ComputeSize(); +} - writer->AppendInt(static_cast(kSLConfigTag)); - writer->AppendInt(sl_config_size); +bool SLConfigDescriptor::ReadData(BitReader* reader) { + return true; +} + +void SLConfigDescriptor::WriteInternal(BufferWriter* writer) { + WriteHeader(writer); writer->AppendInt(static_cast(kSLPredefinedMP4)); } -size_t ESDescriptor::ComputeSize() const { - // A bit magical. Refer to ESDescriptor::Write for details. - const uint8_t decoder_specific_info_size = - static_cast(decoder_specific_info_.size()); - const uint8_t decoder_config_size = decoder_specific_info_size + kHeaderSize + - sizeof(uint8_t) * 5 + - sizeof(uint32_t) * 2; - const uint8_t sl_config_size = sizeof(uint8_t); - const uint8_t es_size = decoder_config_size + kHeaderSize + sl_config_size + - kHeaderSize + sizeof(esid_) + sizeof(uint8_t); - return es_size + kHeaderSize; +size_t SLConfigDescriptor::ComputeDataSize() { + return 1; +} + +bool ESDescriptor::ReadData(BitReader* reader) { + bool stream_dependency_flag; + bool url_flag; + bool ocr_stream_flag; + RCHECK(reader->ReadBits(16, &esid_)); + RCHECK(reader->ReadBits(1, &stream_dependency_flag)); + RCHECK(reader->ReadBits(1, &url_flag)); + RCHECK(!url_flag); // We don't support url flag + RCHECK(reader->ReadBits(1, &ocr_stream_flag)); + RCHECK(reader->SkipBits(5)); // streamPriority + + if (stream_dependency_flag) + RCHECK(reader->SkipBits(16)); // dependsOn_ES_ID + if (ocr_stream_flag) + RCHECK(reader->SkipBits(16)); // OCR_ES_Id + + return decoder_config_descriptor_.Read(reader); + // Skip the parsing of |sl_config_descriptor_| intentionally as we do not care + // about the data. +} + +void ESDescriptor::WriteInternal(BufferWriter* writer) { + WriteHeader(writer); + + writer->AppendInt(esid_); + const uint8_t kNoEsFlags = 0; + writer->AppendInt(kNoEsFlags); + + decoder_config_descriptor_.Write(writer); + sl_config_descriptor_.Write(writer); +} + +size_t ESDescriptor::ComputeDataSize() { + // esid (2 bytes), es_flags (1 byte). + const size_t data_size_without_children = 2 + 1; + return data_size_without_children + decoder_config_descriptor_.ComputeSize() + + sl_config_descriptor_.ComputeSize(); } } // namespace media diff --git a/packager/media/codecs/es_descriptor.h b/packager/media/codecs/es_descriptor.h index 8696f28e10..7f402e7230 100644 --- a/packager/media/codecs/es_descriptor.h +++ b/packager/media/codecs/es_descriptor.h @@ -28,22 +28,88 @@ enum class ObjectType : uint8_t { kDTSL = 0xAB, // DTS-HD Master Audio }; -/// This class parses object type and decoder specific information from an -/// elementary stream descriptor, which is usually contained in an esds -/// box. Please refer to ISO 14496 Part 1 7.2.6.5 for more details. -class ESDescriptor { +enum class DescriptorTag { + kForbidden = 0, + kES = 0x03, + kDecoderConfig = 0x04, + kDecoderSpecificInfo = 0x05, + kSLConfig = 0x06, +}; + +/// Defines the base Descriptor object as defined in ISO 14496-1:2004 Systems +/// section 7.2.2.2. All descriptors inherit from either BaseDescriptor. +class BaseDescriptor { public: - ESDescriptor(); - ~ESDescriptor(); + explicit BaseDescriptor(DescriptorTag tag) : tag_(tag) {} + /// Parse the descriptor from input data. + /// @param data contains the descriptor data. bool Parse(const std::vector& data); - void Write(BufferWriter* writer) const; - size_t ComputeSize() const; - uint16_t esid() const { return esid_; } - void set_esid(uint16_t esid) { esid_ = esid; } + /// Read the descriptor. + /// @param reader points to a BitReader object. + bool Read(BitReader* reader); - uint32_t max_bitrate() const {return max_bitrate_; } + /// Write the descriptor to buffer. This function calls ComputeSize internally + /// to compute and update descriptor size. + /// @param writer points to a BufferWriter object which wraps the buffer for + /// writing. + void Write(BufferWriter* writer); + + /// Compute the size of this descriptor. It will also update descriptor size. + /// @return The size of result descriptor including child descriptors. + size_t ComputeSize(); + + protected: + /// Write descriptor header. + void WriteHeader(BufferWriter* writer); + + /// @return descriptor data size without header in bytes. + size_t data_size() const { return data_size_; } + + private: + // Read the descriptor data (header is already read). + virtual bool ReadData(BitReader* reader) = 0; + // Write the descriptor. The descriptor data size should already be updated. + virtual void WriteInternal(BufferWriter* writer) = 0; + // Compute the data size, with child descriptors included. + virtual size_t ComputeDataSize() = 0; + + DescriptorTag tag_ = DescriptorTag::kForbidden; + size_t data_size_ = 0; +}; + +/// Implements DecoderSpecificInfo descriptor according to ISO +/// 14496-1:2004 7.2.6.7 DecoderSpecificInfo. +class DecoderSpecificInfoDescriptor : public BaseDescriptor { + public: + DecoderSpecificInfoDescriptor() + : BaseDescriptor(DescriptorTag::kDecoderSpecificInfo) {} + + const std::vector& data() const { return data_; } + + void set_data(const std::vector& data) { data_ = data; } + + private: + bool ReadData(BitReader* reader) override; + void WriteInternal(BufferWriter* writer) override; + size_t ComputeDataSize() override; + + std::vector data_; +}; + +/// Implements DecoderConfig descriptor according to ISO 14496-1:2004 7.2.6.6 +/// DecoderConfigDescriptor. +class DecoderConfigDescriptor : public BaseDescriptor { + public: + DecoderConfigDescriptor() : BaseDescriptor(DescriptorTag::kDecoderConfig) {} + + uint32_t buffer_size_db() const { return buffer_size_db_; } + void set_buffer_size_db(uint32_t buffer_size_db) { + buffer_size_db_ = buffer_size_db; + } + + uint32_t max_bitrate() const { return max_bitrate_; } void set_max_bitrate(uint32_t max_bitrate) { max_bitrate_ = max_bitrate; } uint32_t avg_bitrate() const { return avg_bitrate_; } @@ -52,20 +118,13 @@ class ESDescriptor { ObjectType object_type() const { return object_type_; } void set_object_type(ObjectType object_type) { object_type_ = object_type; } - const std::vector& decoder_specific_info() const { - return decoder_specific_info_; - } - void set_decoder_specific_info( - const std::vector& decoder_specific_info) { - decoder_specific_info_ = decoder_specific_info; - } - /// @return true if the stream is AAC. bool IsAAC() const { return object_type_ == ObjectType::kISO_14496_3 || object_type_ == ObjectType::kISO_13818_7_AAC_LC; } + /// @return true if the stream is DTS. bool IsDTS() const { return object_type_ == ObjectType::kDTSC || object_type_ == ObjectType::kDTSE || @@ -73,22 +132,66 @@ class ESDescriptor { object_type_ == ObjectType::kDTSL; } + const DecoderSpecificInfoDescriptor& decoder_specific_info_descriptor() + const { + return decoder_specific_info_descriptor_; + } + + DecoderSpecificInfoDescriptor* mutable_decoder_specific_info_descriptor() { + return &decoder_specific_info_descriptor_; + } + private: - enum Tag { - kESDescrTag = 0x03, - kDecoderConfigDescrTag = 0x04, - kDecoderSpecificInfoTag = 0x05, - kSLConfigTag = 0x06, - }; + bool ReadData(BitReader* reader) override; + void WriteInternal(BufferWriter* writer) override; + size_t ComputeDataSize() override; - bool ParseDecoderConfigDescriptor(BitReader* reader); - bool ParseDecoderSpecificInfo(BitReader* reader); + ObjectType object_type_ = ObjectType::kForbidden; + uint32_t buffer_size_db_ = 0; + uint32_t max_bitrate_ = 0; + uint32_t avg_bitrate_ = 0; + DecoderSpecificInfoDescriptor decoder_specific_info_descriptor_; +}; - uint16_t esid_; // Elementary Stream ID. - ObjectType object_type_; - uint32_t max_bitrate_; - uint32_t avg_bitrate_; - std::vector decoder_specific_info_; +/// Implements SLConfig descriptor according to ISO 14496-1:2004 7.2.6.8 +/// SLConfigDescriptor. +class SLConfigDescriptor : public BaseDescriptor { + public: + SLConfigDescriptor() : BaseDescriptor(DescriptorTag::kSLConfig) {} + + private: + bool ReadData(BitReader* reader) override; + void WriteInternal(BufferWriter* writer) override; + size_t ComputeDataSize() override; +}; + +/// This class parses object type and decoder specific information from an +/// elementary stream descriptor, which is usually contained in an esds +/// box. Please refer to ISO 14496 Part 1 7.2.6.5 for more details. +class ESDescriptor : public BaseDescriptor { + public: + ESDescriptor() : BaseDescriptor(DescriptorTag::kES) {} + + uint16_t esid() const { return esid_; } + void set_esid(uint16_t esid) { esid_ = esid; } + + const DecoderConfigDescriptor& decoder_config_descriptor() const { + return decoder_config_descriptor_; + } + + DecoderConfigDescriptor* mutable_decoder_config_descriptor() { + return &decoder_config_descriptor_; + } + + private: + bool ReadData(BitReader* reader) override; + void WriteInternal(BufferWriter* writer) override; + size_t ComputeDataSize() override; + + uint16_t esid_ = 0; // Elementary Stream ID. + + DecoderConfigDescriptor decoder_config_descriptor_; + SLConfigDescriptor sl_config_descriptor_; }; } // namespace media diff --git a/packager/media/codecs/es_descriptor_unittest.cc b/packager/media/codecs/es_descriptor_unittest.cc index 9c91432173..2bd32c5a8d 100644 --- a/packager/media/codecs/es_descriptor_unittest.cc +++ b/packager/media/codecs/es_descriptor_unittest.cc @@ -7,7 +7,10 @@ #include #include +#include "packager/media/base/buffer_writer.h" + using ::testing::ElementsAre; +using ::testing::ElementsAreArray; namespace shaka { namespace media { @@ -38,11 +41,21 @@ TEST(ESDescriptorTest, SingleByteLengthTest) { std::vector data(std::begin(kBuffer), std::end(kBuffer)); ESDescriptor es_desc; - EXPECT_EQ(es_desc.object_type(), ObjectType::kForbidden); + const DecoderConfigDescriptor& decoder_config_descriptor = + es_desc.decoder_config_descriptor(); + EXPECT_EQ(decoder_config_descriptor.object_type(), ObjectType::kForbidden); EXPECT_TRUE(es_desc.Parse(data)); - EXPECT_EQ(es_desc.object_type(), ObjectType::kISO_14496_3); - EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10)); + EXPECT_EQ(decoder_config_descriptor.object_type(), ObjectType::kISO_14496_3); + EXPECT_THAT( + decoder_config_descriptor.decoder_specific_info_descriptor().data(), + ElementsAre(0x12, 0x10)); + + BufferWriter writer; + es_desc.Write(&writer); + EXPECT_THAT( + std::vector(writer.Buffer(), writer.Buffer() + writer.Size()), + ElementsAreArray(kBuffer)); } TEST(ESDescriptorTest, NonAACTest) { @@ -73,9 +86,19 @@ TEST(ESDescriptorTest, NonAACTest) { ESDescriptor es_desc; EXPECT_TRUE(es_desc.Parse(data)); - EXPECT_EQ(static_cast(es_desc.object_type()), 0x66); - EXPECT_NE(es_desc.object_type(), ObjectType::kISO_14496_3); - EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10)); + const DecoderConfigDescriptor& decoder_config_descriptor = + es_desc.decoder_config_descriptor(); + EXPECT_EQ(static_cast(decoder_config_descriptor.object_type()), 0x66); + EXPECT_NE(decoder_config_descriptor.object_type(), ObjectType::kISO_14496_3); + EXPECT_THAT( + decoder_config_descriptor.decoder_specific_info_descriptor().data(), + ElementsAre(0x12, 0x10)); + + BufferWriter writer; + es_desc.Write(&writer); + EXPECT_THAT( + std::vector(writer.Buffer(), writer.Buffer() + writer.Size()), + ElementsAreArray(kBuffer)); } TEST(ESDescriptorTest, NonAACWithoutDecoderSpecificInfoTagTest) { @@ -102,42 +125,157 @@ TEST(ESDescriptorTest, NonAACWithoutDecoderSpecificInfoTagTest) { ESDescriptor es_desc; EXPECT_TRUE(es_desc.Parse(data)); - EXPECT_EQ(static_cast(es_desc.object_type()), 0x6b); - EXPECT_EQ(es_desc.max_bitrate(), 0x28500u); - EXPECT_EQ(es_desc.avg_bitrate(), 0x27100u); - EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre()); + const DecoderConfigDescriptor& decoder_config_descriptor = + es_desc.decoder_config_descriptor(); + EXPECT_EQ(static_cast(decoder_config_descriptor.object_type()), 0x6b); + EXPECT_EQ(decoder_config_descriptor.max_bitrate(), 0x28500u); + EXPECT_EQ(decoder_config_descriptor.avg_bitrate(), 0x27100u); + EXPECT_THAT( + decoder_config_descriptor.decoder_specific_info_descriptor().data(), + ElementsAre()); } -TEST(ESDescriptorTest, MultiByteLengthTest) { +// https://github.com/google/shaka-packager/issues/536. +TEST(ESDescriptorTest, Issue536) { // clang-format off - const uint8_t kBuffer[] = { - // ESDescriptor tag with two bytes size. - 0x03, 0x80, 0x1b, + const uint8_t kInput[] = { + // ESDescriptor tag with size. + 0x03, 0x80, 0x80, 0x80, 0x70, // ESDescriptor fields. - 0x00, 0x01, 0x00, - // DecoderConfigDescriptor tag with three bytes size. - 0x04, 0x80, 0x80, 0x14, + 0x00, 0x00, 0x00, + // DecoderConfigDescriptor tag with size. + 0x04, 0x80, 0x80, 0x80, 0x62, // Object Type. 0x40, // Three 4-byte fields: dummy, max bitrate, avg bitrate. - 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - // DecoderSpecificInfo tag with four bytes size. - 0x05, 0x80, 0x80, 0x80, 0x02, + 0x15, 0x00, 0x30, 0x00, 0x00, 0x01, 0xF4, 0x00, + 0x00, 0x01, 0xF4, 0x00, + // DecoderSpecificInfo tag with size. + 0x05, 0x80, 0x80, 0x80, 0x50, // DecoderSpecificInfo fields. - 0x12, 0x10, - // SLConfig tag with one byte size. + 0x11, 0x90, 0x08, 0xC4, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // SLConfig tag with size. + 0x06, 0x80, 0x80, 0x80, 0x01, + // SLConfig fields. + 0x02, + }; + const uint8_t kOutput[] = { + // ESDescriptor tag with size. + 0x03, 0x67, + // ESDescriptor fields. + 0x00, 0x00, 0x00, + // DecoderConfigDescriptor tag with size. + 0x04, 0x5F, + // Object Type. + 0x40, + // Three 4-byte fields: dummy, max bitrate, avg bitrate. + 0x15, 0x00, 0x30, 0x00, 0x00, 0x01, 0xF4, 0x00, + 0x00, 0x01, 0xF4, 0x00, + // DecoderSpecificInfo tag with size. + 0x05, 0x50, + // DecoderSpecificInfo fields. + 0x11, 0x90, 0x08, 0xC4, 0x00, 0x00, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // SLConfig tag with size. 0x06, 0x01, // SLConfig fields. 0x02, }; // clang-format on - std::vector data(std::begin(kBuffer), std::end(kBuffer)); + std::vector data(std::begin(kInput), std::end(kInput)); ESDescriptor es_desc; EXPECT_TRUE(es_desc.Parse(data)); - EXPECT_EQ(es_desc.object_type(), ObjectType::kISO_14496_3); - EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10)); + BufferWriter writer; + es_desc.Write(&writer); + EXPECT_THAT( + std::vector(writer.Buffer(), writer.Buffer() + writer.Size()), + ElementsAreArray(kOutput)); +} + +class DescriptorLengthTest : public testing::Test { + public: + void TestReadWrite(const std::vector& input, + const std::vector& expected_output) { + DecoderSpecificInfoDescriptor desc; + EXPECT_TRUE(desc.Parse(input)); + + BufferWriter writer; + desc.Write(&writer); + EXPECT_THAT( + std::vector(writer.Buffer(), writer.Buffer() + writer.Size()), + ElementsAreArray(expected_output)); + } +}; + +// Use DecoderSpecificInfo descriptor for length testing. + +TEST_F(DescriptorLengthTest, OneByteLengthData) { + const uint8_t kBuffer[] = {0x05, 0x02, 0x12, 0x10}; + std::vector data(std::begin(kBuffer), std::end(kBuffer)); + TestReadWrite(data, data); +} + +TEST_F(DescriptorLengthTest, TwoBytesLengthForOneByteLengthData) { + const uint8_t kInput[] = {0x05, 0x80, 0x02, 0x12, 0x10}; + const uint8_t kOutput[] = {0x05, 0x02, 0x12, 0x10}; + std::vector input(std::begin(kInput), std::end(kInput)); + std::vector output(std::begin(kOutput), std::end(kOutput)); + TestReadWrite(input, output); +} + +TEST_F(DescriptorLengthTest, ThreeBytesLengthForOneByteLengthData) { + const uint8_t kInput[] = {0x05, 0x80, 0x80, 0x02, 0x12, 0x10}; + const uint8_t kOutput[] = {0x05, 0x02, 0x12, 0x10}; + std::vector input(std::begin(kInput), std::end(kInput)); + std::vector output(std::begin(kOutput), std::end(kOutput)); + TestReadWrite(input, output); +} + +TEST_F(DescriptorLengthTest, FourBytesLengthForOneByteLengthData) { + const uint8_t kInput[] = {0x05, 0x80, 0x80, 0x80, 0x02, 0x12, 0x10}; + const uint8_t kOutput[] = {0x05, 0x02, 0x12, 0x10}; + std::vector input(std::begin(kInput), std::end(kInput)); + std::vector output(std::begin(kOutput), std::end(kOutput)); + TestReadWrite(input, output); +} + +TEST_F(DescriptorLengthTest, TwoBytesLengthData) { + const uint8_t kBuffer[] = { + 0x05, 0x81, 0x02, 0x12, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, + }; + std::vector input(std::begin(kBuffer), std::end(kBuffer)); + std::vector output(std::begin(kBuffer), std::end(kBuffer)); + TestReadWrite(input, output); } } // namespace media diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 2d9603a806..7785b91ee0 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1602,9 +1602,11 @@ bool ElementaryStreamDescriptor::ReadWriteInternal(BoxBuffer* buffer) { std::vector data; RCHECK(buffer->ReadWriteVector(&data, buffer->BytesLeft())); RCHECK(es_descriptor.Parse(data)); - if (es_descriptor.IsAAC()) { + if (es_descriptor.decoder_config_descriptor().IsAAC()) { RCHECK(aac_audio_specific_config.Parse( - es_descriptor.decoder_specific_info())); + es_descriptor.decoder_config_descriptor() + .decoder_specific_info_descriptor() + .data())); } } else { DCHECK(buffer->writer()); @@ -1615,8 +1617,10 @@ bool ElementaryStreamDescriptor::ReadWriteInternal(BoxBuffer* buffer) { size_t ElementaryStreamDescriptor::ComputeSizeInternal() { // This box is optional. Skip it if not initialized. - if (es_descriptor.object_type() == ObjectType::kForbidden) + if (es_descriptor.decoder_config_descriptor().object_type() == + ObjectType::kForbidden) { return 0; + } return HeaderSize() + es_descriptor.ComputeSize(); } diff --git a/packager/media/formats/mp4/box_definitions_comparison.h b/packager/media/formats/mp4/box_definitions_comparison.h index 9e2be24431..ca8ca83e23 100644 --- a/packager/media/formats/mp4/box_definitions_comparison.h +++ b/packager/media/formats/mp4/box_definitions_comparison.h @@ -256,11 +256,24 @@ inline bool operator==(const VideoSampleEntry& lhs, lhs.codec_configuration == rhs.codec_configuration; } -inline bool operator==(const ESDescriptor& lhs, const ESDescriptor& rhs) { - return lhs.esid() == rhs.esid() && lhs.object_type() == rhs.object_type() && +inline bool operator==(const DecoderSpecificInfoDescriptor& lhs, + const DecoderSpecificInfoDescriptor& rhs) { + return lhs.data() == rhs.data(); +} + +inline bool operator==(const DecoderConfigDescriptor& lhs, + const DecoderConfigDescriptor& rhs) { + return lhs.buffer_size_db() == rhs.buffer_size_db() && lhs.max_bitrate() == rhs.max_bitrate() && lhs.avg_bitrate() == rhs.avg_bitrate() && - lhs.decoder_specific_info() == rhs.decoder_specific_info(); + lhs.object_type() == rhs.object_type() && + lhs.decoder_specific_info_descriptor() == + rhs.decoder_specific_info_descriptor(); +} + +inline bool operator==(const ESDescriptor& lhs, const ESDescriptor& rhs) { + return lhs.esid() == rhs.esid() && + lhs.decoder_config_descriptor() == rhs.decoder_config_descriptor(); } inline bool operator==(const ElementaryStreamDescriptor& lhs, diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index 367af530b5..9545c98dd0 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -373,11 +373,14 @@ class BoxDefinitionsTestGeneral : public testing::Test { void Fill(ElementaryStreamDescriptor* esds) { const uint8_t kDecoderSpecificInfo[] = {18, 16}; esds->es_descriptor.set_esid(1); - esds->es_descriptor.set_object_type(ObjectType::kISO_14496_3); + esds->es_descriptor.mutable_decoder_config_descriptor()->set_object_type( + ObjectType::kISO_14496_3); std::vector decoder_specific_info( kDecoderSpecificInfo, kDecoderSpecificInfo + sizeof(kDecoderSpecificInfo)); - esds->es_descriptor.set_decoder_specific_info(decoder_specific_info); + esds->es_descriptor.mutable_decoder_config_descriptor() + ->mutable_decoder_specific_info_descriptor() + ->set_data(decoder_specific_info); } void Modify(ElementaryStreamDescriptor* esds) { diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 8d16d0ad0f..c307fa416c 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -392,11 +392,13 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { std::vector codec_config; switch (actual_format) { - case FOURCC_mp4a: - max_bitrate = entry.esds.es_descriptor.max_bitrate(); - avg_bitrate = entry.esds.es_descriptor.avg_bitrate(); + case FOURCC_mp4a: { + const DecoderConfigDescriptor& decoder_config = + entry.esds.es_descriptor.decoder_config_descriptor(); + max_bitrate = decoder_config.max_bitrate(); + avg_bitrate = decoder_config.avg_bitrate(); - codec = ObjectTypeToCodec(entry.esds.es_descriptor.object_type()); + codec = ObjectTypeToCodec(decoder_config.object_type()); if (codec == kCodecAAC) { const AACAudioSpecificConfig& aac_audio_specific_config = entry.esds.aac_audio_specific_config; @@ -404,18 +406,19 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { sampling_frequency = aac_audio_specific_config.GetSamplesPerSecond(); audio_object_type = aac_audio_specific_config.GetAudioObjectType(); - codec_config = entry.esds.es_descriptor.decoder_specific_info(); + codec_config = + decoder_config.decoder_specific_info_descriptor().data(); } else if (codec == kUnknownCodec) { // Intentionally not to fail in the parser as there may be multiple // streams in the source content, which allows the supported stream // to be packaged. An error will be returned if the unsupported // stream is passed to the muxer. LOG(WARNING) << "Unsupported audio object type " - << static_cast( - entry.esds.es_descriptor.object_type()) + << static_cast(decoder_config.object_type()) << " in stsd.es_desriptor."; } break; + } case FOURCC_dtsc: FALLTHROUGH_INTENDED; case FOURCC_dtse: diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index f0b5e1b887..b0f45ddced 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -444,15 +444,17 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, audio.format = CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified); switch(audio_info->codec()){ - case kCodecAAC: - audio.esds.es_descriptor.set_object_type( - ObjectType::kISO_14496_3); // MPEG4 AAC. + case kCodecAAC: { audio.esds.es_descriptor.set_esid(track_id); - audio.esds.es_descriptor.set_decoder_specific_info( + DecoderConfigDescriptor* decoder_config = + audio.esds.es_descriptor.mutable_decoder_config_descriptor(); + decoder_config->set_object_type(ObjectType::kISO_14496_3); // MPEG4 AAC. + decoder_config->set_max_bitrate(audio_info->max_bitrate()); + decoder_config->set_avg_bitrate(audio_info->avg_bitrate()); + decoder_config->mutable_decoder_specific_info_descriptor()->set_data( audio_info->codec_config()); - audio.esds.es_descriptor.set_max_bitrate(audio_info->max_bitrate()); - audio.esds.es_descriptor.set_avg_bitrate(audio_info->avg_bitrate()); break; + } case kCodecDTSC: case kCodecDTSH: case kCodecDTSL: diff --git a/packager/media/formats/wvm/wvm_media_parser.cc b/packager/media/formats/wvm/wvm_media_parser.cc index ac5b382b20..1d7139cd05 100644 --- a/packager/media/formats/wvm/wvm_media_parser.cc +++ b/packager/media/formats/wvm/wvm_media_parser.cc @@ -716,7 +716,9 @@ bool WvmMediaParser::ParseIndexEntry() { "Could not extract AudioSpecificConfig from ES_Descriptor"; return false; } - audio_codec_config = descriptor.decoder_specific_info(); + audio_codec_config = descriptor.decoder_config_descriptor() + .decoder_specific_info_descriptor() + .data(); break; } case Audio_EC3SpecificData: