// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "packager/media/codecs/aac_audio_specific_config.h" #include #include "glog/logging.h" #include "packager/media/base/bit_reader.h" #include "packager/media/base/rcheck.h" namespace shaka { namespace media { namespace { // Sampling Frequency Index table, from ISO 14496-3 Table 1.16 static const uint32_t kSampleRates[] = {96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350}; // Channel Configuration table, from ISO 14496-3 Table 1.17 const uint8_t kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8}; // ISO 14496-3 Table 4.2 – Syntax of program_config_element() // program_config_element() // ... // element_is_cpe[i]; 1 bslbf // element_tag_select[i]; 4 uimsbf bool CountChannels(uint8_t num_elements, uint8_t* num_channels, BitReader* bit_reader) { for (uint8_t i = 0; i < num_elements; ++i) { bool is_pair = false; RCHECK(bit_reader->ReadBits(1, &is_pair)); *num_channels += is_pair ? 2 : 1; RCHECK(bit_reader->SkipBits(4)); } return true; } } // namespace AACAudioSpecificConfig::AACAudioSpecificConfig() {} AACAudioSpecificConfig::~AACAudioSpecificConfig() {} bool AACAudioSpecificConfig::Parse(const std::vector& data) { if (data.empty()) return false; BitReader reader(&data[0], data.size()); uint8_t extension_type = AOT_NULL; uint8_t extension_frequency_index = 0xff; sbr_present_ = false; ps_present_ = false; frequency_ = 0; extension_frequency_ = 0; // The following code is written according to ISO 14496 Part 3 Table 1.13 - // Syntax of AudioSpecificConfig. // Read base configuration. // Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19" RCHECK(ParseAudioObjectType(&reader)); RCHECK(reader.ReadBits(4, &frequency_index_)); if (frequency_index_ == 0xf) RCHECK(reader.ReadBits(24, &frequency_)); RCHECK(reader.ReadBits(4, &channel_config_)); RCHECK(channel_config_ < std::size(kChannelConfigs)); num_channels_ = kChannelConfigs[channel_config_]; // Read extension configuration. if (audio_object_type_ == AOT_SBR || audio_object_type_ == AOT_PS) { sbr_present_ = audio_object_type_ == AOT_SBR; ps_present_ = audio_object_type_ == AOT_PS; extension_type = AOT_SBR; RCHECK(reader.ReadBits(4, &extension_frequency_index)); if (extension_frequency_index == 0xf) RCHECK(reader.ReadBits(24, &extension_frequency_)); RCHECK(ParseAudioObjectType(&reader)); } RCHECK(ParseDecoderGASpecificConfig(&reader)); RCHECK(SkipErrorSpecificConfig()); // Read extension configuration again // Note: The check for 16 available bits comes from the AAC spec. if (extension_type != AOT_SBR && reader.bits_available() >= 16) { uint16_t sync_extension_type; uint8_t sbr_present_flag; uint8_t ps_present_flag; if (reader.ReadBits(11, &sync_extension_type) && sync_extension_type == 0x2b7) { if (reader.ReadBits(5, &extension_type) && extension_type == 5) { RCHECK(reader.ReadBits(1, &sbr_present_flag)); sbr_present_ = sbr_present_flag != 0; if (sbr_present_flag) { RCHECK(reader.ReadBits(4, &extension_frequency_index)); if (extension_frequency_index == 0xf) RCHECK(reader.ReadBits(24, &extension_frequency_)); // Note: The check for 12 available bits comes from the AAC spec. if (reader.bits_available() >= 12) { RCHECK(reader.ReadBits(11, &sync_extension_type)); if (sync_extension_type == 0x548) { RCHECK(reader.ReadBits(1, &ps_present_flag)); ps_present_ = ps_present_flag != 0; } } } } } } if (frequency_ == 0) { RCHECK(frequency_index_ < std::size(kSampleRates)); frequency_ = kSampleRates[frequency_index_]; } if (extension_frequency_ == 0 && extension_frequency_index != 0xff) { RCHECK(extension_frequency_index < std::size(kSampleRates)); extension_frequency_ = kSampleRates[extension_frequency_index]; } if (audio_object_type_ == AOT_USAC) { return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7; } else { return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 && audio_object_type_ <= 4 && frequency_index_ != 0xf && channel_config_ <= 7; } } bool AACAudioSpecificConfig::ConvertToADTS( const uint8_t* data, size_t data_size, std::vector* audio_frame) const { DCHECK(audio_object_type_ >= 1 && audio_object_type_ <= 4 && frequency_index_ != 0xf && channel_config_ <= 7); size_t size = kADTSHeaderSize + data_size; // ADTS header uses 13 bits for packet size. if (size >= (1 << 13)) return false; audio_frame->reserve(size); audio_frame->resize(kADTSHeaderSize); audio_frame->at(0) = 0xff; audio_frame->at(1) = 0xf1; audio_frame->at(2) = ((audio_object_type_ - 1) << 6) + (frequency_index_ << 2) + (channel_config_ >> 2); audio_frame->at(3) = ((channel_config_ & 0x3) << 6) + static_cast(size >> 11); audio_frame->at(4) = static_cast((size & 0x7ff) >> 3); audio_frame->at(5) = static_cast(((size & 7) << 5) + 0x1f); audio_frame->at(6) = 0xfc; audio_frame->insert(audio_frame->end(), data, data + data_size); return true; } AACAudioSpecificConfig::AudioObjectType AACAudioSpecificConfig::GetAudioObjectType() const { if (ps_present_) return AOT_PS; if (sbr_present_) return AOT_SBR; return audio_object_type_; } uint32_t AACAudioSpecificConfig::GetSamplesPerSecond() const { if (extension_frequency_ > 0) return extension_frequency_; if (!sbr_present_) return frequency_; // The following code is written according to ISO 14496 Part 3 Table 1.11 and // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers // to SBR doubling the AAC sample rate.) DCHECK_GT(frequency_, 0u); return std::min(2 * frequency_, 48000u); } uint8_t AACAudioSpecificConfig::GetNumChannels() const { // Check for implicit signalling of HE-AAC and indicate stereo output // if the mono channel configuration is signalled. // See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing. if (sbr_present_ && channel_config_ == 1) return 2; // CHANNEL_LAYOUT_STEREO // When Parametric Stereo is on, mono will be played as stereo. if (ps_present_ && channel_config_ == 1) return 2; // CHANNEL_LAYOUT_STEREO return num_channels_; } bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) { RCHECK(bit_reader->ReadBits(5, &audio_object_type_)); if (audio_object_type_ == AOT_ESCAPE) { uint8_t audioObjectTypeExt; RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt)); audio_object_type_ = static_cast(32 + audioObjectTypeExt); } return true; } // Currently this function only support GASpecificConfig defined in // ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig() bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig( BitReader* bit_reader) { switch (audio_object_type_) { case 1: case 2: case 3: case 4: case 6: case 7: case 17: case 19: case 20: case 21: case 22: case 23: return ParseGASpecificConfig(bit_reader); case 42: // Skip UsacConfig() parsing until required RCHECK(bit_reader->SkipBits(bit_reader->bits_available())); return true; default: break; } return false; } bool AACAudioSpecificConfig::SkipErrorSpecificConfig() const { switch (audio_object_type_) { case 17: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: return false; default: break; } return true; } // The following code is written according to ISO 14496 part 3 Table 4.1 - // GASpecificConfig. bool AACAudioSpecificConfig::ParseGASpecificConfig(BitReader* bit_reader) { uint8_t extension_flag = 0; uint8_t depends_on_core_coder; uint16_t dummy; RCHECK(bit_reader->ReadBits(1, &dummy)); // frameLengthFlag RCHECK(bit_reader->ReadBits(1, &depends_on_core_coder)); if (depends_on_core_coder == 1) RCHECK(bit_reader->ReadBits(14, &dummy)); // coreCoderDelay RCHECK(bit_reader->ReadBits(1, &extension_flag)); if (channel_config_ == 0) RCHECK(ParseProgramConfigElement(bit_reader)); if (audio_object_type_ == 6 || audio_object_type_ == 20) RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr if (extension_flag) { if (audio_object_type_ == 22) { RCHECK(bit_reader->ReadBits(5, &dummy)); // numOfSubFrame RCHECK(bit_reader->ReadBits(11, &dummy)); // layer_length } if (audio_object_type_ == 17 || audio_object_type_ == 19 || audio_object_type_ == 20 || audio_object_type_ == 23) { RCHECK(bit_reader->ReadBits(3, &dummy)); // resilience flags } RCHECK(bit_reader->ReadBits(1, &dummy)); // extensionFlag3 } return true; } // ISO 14496-3 Table 4.2 – Syntax of program_config_element() // program_config_element() // { // element_instance_tag; 4 uimsbf // object_type; 2 uimsbf // sampling_frequency_index; 4 uimsbf // num_front_channel_elements; 4 uimsbf // num_side_channel_elements; 4 uimsbf // num_back_channel_elements; 4 uimsbf // num_lfe_channel_elements; 2 uimsbf // num_assoc_data_elements; 3 uimsbf // num_valid_cc_elements; 4 uimsbf // mono_mixdown_present; 1 uimsbf // if (mono_mixdown_present == 1) // mono_mixdown_element_number; 4 uimsbf // stereo_mixdown_present; 1 uimsbf // if (stereo_mixdown_present == 1) // stereo_mixdown_element_number; 4 uimsbf // matrix_mixdown_idx_present; 1 uimsbf // if (matrix_mixdown_idx_present == 1) { // matrix_mixdown_idx ; 2 uimsbf // pseudo_surround_enable; 1 uimsbf // } // for (i = 0; i < num_front_channel_elements; i++) { // front_element_is_cpe[i]; 1 bslbf // front_element_tag_select[i]; 4 uimsbf // } // for (i = 0; i < num_side_channel_elements; i++) { // side_element_is_cpe[i]; 1 bslbf // side_element_tag_select[i]; 4 uimsbf // } // for (i = 0; i < num_back_channel_elements; i++) { // back_element_is_cpe[i]; 1 bslbf // back_element_tag_select[i]; 4 uimsbf // } // for (i = 0; i < num_lfe_channel_elements; i++) // lfe_element_tag_select[i]; 4 uimsbf // for ( i = 0; i < num_assoc_data_elements; i++) // assoc_data_element_tag_select[i]; 4 uimsbf // for (i = 0; i < num_valid_cc_elements; i++) { // cc_element_is_ind_sw[i]; 1 uimsbf // valid_cc_element_tag_select[i]; 4 uimsbf // } // byte_alignment(); Note 1 // comment_field_bytes; 8 uimsbf // for (i = 0; i < comment_field_bytes; i++) // comment_field_data[i]; 8 uimsbf // } // Note 1: If called from within an AudioSpecificConfig(), this // byte_alignment shall be relative to the start of the AudioSpecificConfig(). bool AACAudioSpecificConfig::ParseProgramConfigElement(BitReader* bit_reader) { // element_instance_tag (4), object_type (2), sampling_frequency_index (4). RCHECK(bit_reader->SkipBits(4 + 2 + 4)); uint8_t num_front_channel_elements = 0; uint8_t num_side_channel_elements = 0; uint8_t num_back_channel_elements = 0; uint8_t num_lfe_channel_elements = 0; RCHECK(bit_reader->ReadBits(4, &num_front_channel_elements)); RCHECK(bit_reader->ReadBits(4, &num_side_channel_elements)); RCHECK(bit_reader->ReadBits(4, &num_back_channel_elements)); RCHECK(bit_reader->ReadBits(2, &num_lfe_channel_elements)); uint8_t num_assoc_data_elements = 0; RCHECK(bit_reader->ReadBits(3, &num_assoc_data_elements)); uint8_t num_valid_cc_elements = 0; RCHECK(bit_reader->ReadBits(4, &num_valid_cc_elements)); RCHECK(bit_reader->SkipBitsConditional(true, 4)); // mono_mixdown RCHECK(bit_reader->SkipBitsConditional(true, 4)); // stereo_mixdown RCHECK(bit_reader->SkipBitsConditional(true, 3)); // matrix_mixdown_idx num_channels_ = 0; RCHECK(CountChannels(num_front_channel_elements, &num_channels_, bit_reader)); RCHECK(CountChannels(num_side_channel_elements, &num_channels_, bit_reader)); RCHECK(CountChannels(num_back_channel_elements, &num_channels_, bit_reader)); num_channels_ += num_lfe_channel_elements; RCHECK(bit_reader->SkipBits(4 * num_lfe_channel_elements)); RCHECK(bit_reader->SkipBits(4 * num_assoc_data_elements)); RCHECK(bit_reader->SkipBits(5 * num_valid_cc_elements)); bit_reader->SkipToNextByte(); uint8_t comment_field_bytes = 0; RCHECK(bit_reader->ReadBits(8, &comment_field_bytes)); RCHECK(bit_reader->SkipBytes(comment_field_bytes)); return true; } } // namespace media } // namespace shaka