diff --git a/packager/media/base/bit_reader.cc b/packager/media/base/bit_reader.cc index 85b43577e0..b6d0b4948b 100644 --- a/packager/media/base/bit_reader.cc +++ b/packager/media/base/bit_reader.cc @@ -51,6 +51,15 @@ bool BitReader::SkipBits(size_t num_bits) { return ReadBitsInternal(num_bits, ¬_needed); } +void BitReader::SkipToNextByte() { + // Already aligned. + if (num_remaining_bits_in_curr_byte_ == 8) + return; + + num_remaining_bits_in_curr_byte_ = 0; + UpdateCurrByte(); +} + bool BitReader::SkipBytes(size_t num_bytes) { if (num_remaining_bits_in_curr_byte_ != 8) return false; diff --git a/packager/media/base/bit_reader.h b/packager/media/base/bit_reader.h index 7741f80db5..df4d32c13f 100644 --- a/packager/media/base/bit_reader.h +++ b/packager/media/base/bit_reader.h @@ -73,6 +73,10 @@ class BitReader { return condition_read == condition ? SkipBits(num_bits) : true; } + /// Skip a number of bits so the stream is byte aligned to the initial data. + /// There could be 0 to 7 bits skipped. + void SkipToNextByte(); + /// Skip a number of bytes from stream. The current posision should be byte /// aligned, otherwise a false is returned and bytes are not skipped. /// @param num_bytes specifies the number of bytes to be skipped. diff --git a/packager/media/base/bit_reader_unittest.cc b/packager/media/base/bit_reader_unittest.cc index c8dcf13a25..8887f398fe 100644 --- a/packager/media/base/bit_reader_unittest.cc +++ b/packager/media/base/bit_reader_unittest.cc @@ -88,5 +88,30 @@ TEST(BitReaderTest, SkipBitsConditionalTest) { EXPECT_FALSE(reader.SkipBits(1)); } +TEST(BitReaderTest, SkipToNextByteAligned) { + uint8_t buffer[] = {0x8a, 0x12}; + BitReader reader(buffer, sizeof(buffer)); + + reader.SkipToNextByte(); + EXPECT_EQ(0u, reader.bit_position()); + + EXPECT_TRUE(reader.SkipBits(8)); + EXPECT_EQ(8u, reader.bit_position()); + + reader.SkipToNextByte(); + EXPECT_EQ(8u, reader.bit_position()); +} + +TEST(BitReaderTest, SkipToNextByteTestUnaligned) { + uint8_t buffer[] = {0x8a, 0x12}; + BitReader reader(buffer, sizeof(buffer)); + + EXPECT_TRUE(reader.SkipBits(4)); + EXPECT_EQ(4u, reader.bit_position()); + + reader.SkipToNextByte(); + EXPECT_EQ(8u, reader.bit_position()); +} + } // namespace media } // namespace shaka diff --git a/packager/media/codecs/aac_audio_specific_config.cc b/packager/media/codecs/aac_audio_specific_config.cc index 7783405f47..572778f84a 100644 --- a/packager/media/codecs/aac_audio_specific_config.cc +++ b/packager/media/codecs/aac_audio_specific_config.cc @@ -10,6 +10,8 @@ #include "packager/media/base/bit_reader.h" #include "packager/media/base/rcheck.h" +namespace shaka { +namespace media { namespace { // Sampling Frequency Index table, from ISO 14496-3 Table 1.16 @@ -20,10 +22,24 @@ static const uint32_t kSampleRates[] = {96000, 88200, 64000, 48000, 44100, // Channel Configuration table, from ISO 14496-3 Table 1.17 const uint8_t kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8}; -} // namespace +// ISO 14496-3 Table 4.2 – Syntax of program_config_element() +// program_config_element() +// ... +// element_is_cpe[i]; 1 bslbf +// element_tag_select[i]; 4 uimsbf +bool CountChannels(uint8_t num_elements, + uint8_t* num_channels, + BitReader* bit_reader) { + for (uint8_t i = 0; i < num_elements; ++i) { + bool is_pair = false; + RCHECK(bit_reader->ReadBits(1, &is_pair)); + *num_channels += is_pair ? 2 : 1; + RCHECK(bit_reader->SkipBits(4)); + } + return true; +} -namespace shaka { -namespace media { +} // namespace AACAudioSpecificConfig::AACAudioSpecificConfig() {} @@ -55,6 +71,9 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { RCHECK(reader.ReadBits(24, &frequency_)); RCHECK(reader.ReadBits(4, &channel_config_)); + RCHECK(channel_config_ < arraysize(kChannelConfigs)); + num_channels_ = kChannelConfigs[channel_config_]; + // Read extension configuration. if (audio_object_type_ == AOT_SBR || audio_object_type_ == AOT_PS) { sbr_present_ = audio_object_type_ == AOT_SBR; @@ -68,7 +87,7 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { RCHECK(audio_object_type_ < 31); } - RCHECK(SkipDecoderGASpecificConfig(&reader)); + RCHECK(ParseDecoderGASpecificConfig(&reader)); RCHECK(SkipErrorSpecificConfig()); // Read extension configuration again @@ -113,9 +132,6 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { extension_frequency_ = kSampleRates[extension_frequency_index]; } - RCHECK(channel_config_ < arraysize(kChannelConfigs)); - num_channels_ = kChannelConfigs[channel_config_]; - return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 && audio_object_type_ <= 4 && frequency_index_ != 0xf && channel_config_ <= 7; @@ -185,7 +201,8 @@ uint8_t AACAudioSpecificConfig::GetNumChannels() const { // Currently this function only support GASpecificConfig defined in // ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig() -bool AACAudioSpecificConfig::SkipDecoderGASpecificConfig(BitReader* bit_reader) const { +bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig( + BitReader* bit_reader) { switch (audio_object_type_) { case 1: case 2: @@ -199,7 +216,7 @@ bool AACAudioSpecificConfig::SkipDecoderGASpecificConfig(BitReader* bit_reader) case 21: case 22: case 23: - return SkipGASpecificConfig(bit_reader); + return ParseGASpecificConfig(bit_reader); default: break; } @@ -229,7 +246,7 @@ bool AACAudioSpecificConfig::SkipErrorSpecificConfig() const { // The following code is written according to ISO 14496 part 3 Table 4.1 - // GASpecificConfig. -bool AACAudioSpecificConfig::SkipGASpecificConfig(BitReader* bit_reader) const { +bool AACAudioSpecificConfig::ParseGASpecificConfig(BitReader* bit_reader) { uint8_t extension_flag = 0; uint8_t depends_on_core_coder; uint16_t dummy; @@ -240,7 +257,8 @@ bool AACAudioSpecificConfig::SkipGASpecificConfig(BitReader* bit_reader) const { RCHECK(bit_reader->ReadBits(14, &dummy)); // coreCoderDelay RCHECK(bit_reader->ReadBits(1, &extension_flag)); - RCHECK(channel_config_ != 0); + if (channel_config_ == 0) + RCHECK(ParseProgramConfigElement(bit_reader)); if (audio_object_type_ == 6 || audio_object_type_ == 20) RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr @@ -262,5 +280,95 @@ bool AACAudioSpecificConfig::SkipGASpecificConfig(BitReader* bit_reader) const { return true; } +// ISO 14496-3 Table 4.2 – Syntax of program_config_element() +// program_config_element() +// { +// element_instance_tag; 4 uimsbf +// object_type; 2 uimsbf +// sampling_frequency_index; 4 uimsbf +// num_front_channel_elements; 4 uimsbf +// num_side_channel_elements; 4 uimsbf +// num_back_channel_elements; 4 uimsbf +// num_lfe_channel_elements; 2 uimsbf +// num_assoc_data_elements; 3 uimsbf +// num_valid_cc_elements; 4 uimsbf +// mono_mixdown_present; 1 uimsbf +// if (mono_mixdown_present == 1) +// mono_mixdown_element_number; 4 uimsbf +// stereo_mixdown_present; 1 uimsbf +// if (stereo_mixdown_present == 1) +// stereo_mixdown_element_number; 4 uimsbf +// matrix_mixdown_idx_present; 1 uimsbf +// if (matrix_mixdown_idx_present == 1) { +// matrix_mixdown_idx ; 2 uimsbf +// pseudo_surround_enable; 1 uimsbf +// } +// for (i = 0; i < num_front_channel_elements; i++) { +// front_element_is_cpe[i]; 1 bslbf +// front_element_tag_select[i]; 4 uimsbf +// } +// for (i = 0; i < num_side_channel_elements; i++) { +// side_element_is_cpe[i]; 1 bslbf +// side_element_tag_select[i]; 4 uimsbf +// } +// for (i = 0; i < num_back_channel_elements; i++) { +// back_element_is_cpe[i]; 1 bslbf +// back_element_tag_select[i]; 4 uimsbf +// } +// for (i = 0; i < num_lfe_channel_elements; i++) +// lfe_element_tag_select[i]; 4 uimsbf +// for ( i = 0; i < num_assoc_data_elements; i++) +// assoc_data_element_tag_select[i]; 4 uimsbf +// for (i = 0; i < num_valid_cc_elements; i++) { +// cc_element_is_ind_sw[i]; 1 uimsbf +// valid_cc_element_tag_select[i]; 4 uimsbf +// } +// byte_alignment(); Note 1 +// comment_field_bytes; 8 uimsbf +// for (i = 0; i < comment_field_bytes; i++) +// comment_field_data[i]; 8 uimsbf +// } +// Note 1: If called from within an AudioSpecificConfig(), this +// byte_alignment shall be relative to the start of the AudioSpecificConfig(). +bool AACAudioSpecificConfig::ParseProgramConfigElement(BitReader* bit_reader) { + // element_instance_tag (4), object_type (2), sampling_frequency_index (4). + RCHECK(bit_reader->SkipBits(4 + 2 + 4)); + + uint8_t num_front_channel_elements = 0; + uint8_t num_side_channel_elements = 0; + uint8_t num_back_channel_elements = 0; + uint8_t num_lfe_channel_elements = 0; + RCHECK(bit_reader->ReadBits(4, &num_front_channel_elements)); + RCHECK(bit_reader->ReadBits(4, &num_side_channel_elements)); + RCHECK(bit_reader->ReadBits(4, &num_back_channel_elements)); + RCHECK(bit_reader->ReadBits(2, &num_lfe_channel_elements)); + + uint8_t num_assoc_data_elements = 0; + RCHECK(bit_reader->ReadBits(3, &num_assoc_data_elements)); + uint8_t num_valid_cc_elements = 0; + RCHECK(bit_reader->ReadBits(4, &num_valid_cc_elements)); + + RCHECK(bit_reader->SkipBitsConditional(true, 4)); // mono_mixdown + RCHECK(bit_reader->SkipBitsConditional(true, 4)); // stereo_mixdown + RCHECK(bit_reader->SkipBitsConditional(true, 3)); // matrix_mixdown_idx + + num_channels_ = 0; + RCHECK(CountChannels(num_front_channel_elements, &num_channels_, bit_reader)); + RCHECK(CountChannels(num_side_channel_elements, &num_channels_, bit_reader)); + RCHECK(CountChannels(num_back_channel_elements, &num_channels_, bit_reader)); + num_channels_ += num_lfe_channel_elements; + + RCHECK(bit_reader->SkipBits(4 * num_lfe_channel_elements)); + RCHECK(bit_reader->SkipBits(4 * num_assoc_data_elements)); + RCHECK(bit_reader->SkipBits(5 * num_valid_cc_elements)); + + bit_reader->SkipToNextByte(); + + uint8_t comment_field_bytes = 0; + RCHECK(bit_reader->ReadBits(8, &comment_field_bytes)); + RCHECK(bit_reader->SkipBytes(comment_field_bytes)); + return true; +} + } // namespace media } // namespace shaka diff --git a/packager/media/codecs/aac_audio_specific_config.h b/packager/media/codecs/aac_audio_specific_config.h index 3360925fd7..6bd71c44c4 100644 --- a/packager/media/codecs/aac_audio_specific_config.h +++ b/packager/media/codecs/aac_audio_specific_config.h @@ -111,9 +111,13 @@ class AACAudioSpecificConfig { void set_sbr_present(bool sbr_present) { sbr_present_ = sbr_present; } private: - bool SkipDecoderGASpecificConfig(BitReader* bit_reader) const; + bool ParseDecoderGASpecificConfig(BitReader* bit_reader); bool SkipErrorSpecificConfig() const; - bool SkipGASpecificConfig(BitReader* bit_reader) const; + // Parse GASpecificConfig. Calls |ParseProgramConfigElement| if + // |channel_config_| == 0. + bool ParseGASpecificConfig(BitReader* bit_reader); + // Parse program_config_element(). |num_channels_| will be updated. + bool ParseProgramConfigElement(BitReader* bit_reader); // The following variables store the AAC specific configuration information // that are used to generate the ADTS header. diff --git a/packager/media/codecs/aac_audio_specific_config_unittest.cc b/packager/media/codecs/aac_audio_specific_config_unittest.cc index ab46631f63..dd1d9aa54a 100644 --- a/packager/media/codecs/aac_audio_specific_config_unittest.cc +++ b/packager/media/codecs/aac_audio_specific_config_unittest.cc @@ -101,6 +101,22 @@ TEST(AACAudioSpecificConfigTest, SixChannelTest) { aac_audio_specific_config.GetAudioObjectType()); } +TEST(AACAudioSpecificConfigTest, ProgramConfigElementTest) { + uint8_t buffer[] = { + 0x11, 0x80, 0x04, 0xC8, 0x44, 0x00, 0x20, 0x00, 0xC4, + 0x0D, 0x4C, 0x61, 0x76, 0x63, 0x35, 0x38, 0x2E, 0x31, + 0x38, 0x2E, 0x31, 0x30, 0x30, 0x56, 0xE5, 0x00, + }; + std::vector data(std::begin(buffer), std::end(buffer)); + + AACAudioSpecificConfig aac_audio_specific_config; + EXPECT_TRUE(aac_audio_specific_config.Parse(data)); + EXPECT_EQ(48000u, aac_audio_specific_config.GetSamplesPerSecond()); + EXPECT_EQ(6u, aac_audio_specific_config.GetNumChannels()); + EXPECT_EQ(AACAudioSpecificConfig::AOT_AAC_LC, + aac_audio_specific_config.GetAudioObjectType()); +} + TEST(AACAudioSpecificConfigTest, DataTooShortTest) { AACAudioSpecificConfig aac_audio_specific_config; std::vector data;