feat: Add xHE-AAC support (#1092)

Note:
* An xHE-AAC capable encoder will auto adjust the user-specified SAP/RAP
  value to the allowed grid where SAP/RAPs can occur.
e.g.: `-rapInterval 5000` (5s) may result in actual SAPs/RAPs every
4.984s.
* To ensure SAP/RAP starts a new segment, Shaka needs to executed with a
  "--segment_duration" is less than or equal to that adjusted value.
* If every SAP/RAP should trigger a new segment, just set the segment
  length to a very low value e.g.: `--segment_duration 0.1`
This commit is contained in:
Geoff Jukes 2022-10-18 10:14:31 -07:00 committed by Joey Parrish
parent 1131bf7eea
commit 84ba4afc51
4 changed files with 49 additions and 13 deletions

View File

@ -31,6 +31,7 @@ Daniel Cantarín <canta@canta.com.ar>
David Cavar <pal3thorn@gmail.com>
Evgeny Zajcev <zevlg@yandex.ru>
Gabe Kopley <gabe@philo.com>
Geoff Jukes <geoff@jukes.org>
Haoming Chen <hmchen@google.com>
Jacob Trimble <modmaker@google.com>
Joe Foraci <jforaci@gmail.com>

View File

@ -62,10 +62,9 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
// Syntax of AudioSpecificConfig.
// Read base configuration.
// Audio Object Types specified in ISO 14496-3, Table 1.15.
RCHECK(reader.ReadBits(5, &audio_object_type_));
// Audio objects type >=31 is not supported yet.
RCHECK(audio_object_type_ < 31);
// Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19"
RCHECK(ParseAudioObjectType(&reader));
RCHECK(reader.ReadBits(4, &frequency_index_));
if (frequency_index_ == 0xf)
RCHECK(reader.ReadBits(24, &frequency_));
@ -82,9 +81,7 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
RCHECK(reader.ReadBits(4, &extension_frequency_index));
if (extension_frequency_index == 0xf)
RCHECK(reader.ReadBits(24, &extension_frequency_));
RCHECK(reader.ReadBits(5, &audio_object_type_));
// Audio objects type >=31 is not supported yet.
RCHECK(audio_object_type_ < 31);
RCHECK(ParseAudioObjectType(&reader));
}
RCHECK(ParseDecoderGASpecificConfig(&reader));
@ -131,10 +128,14 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
RCHECK(extension_frequency_index < arraysize(kSampleRates));
extension_frequency_ = kSampleRates[extension_frequency_index];
}
return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
audio_object_type_ <= 4 && frequency_index_ != 0xf &&
channel_config_ <= 7;
if (audio_object_type_ == AOT_USAC) {
return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7;
} else {
return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
audio_object_type_ <= 4 && frequency_index_ != 0xf &&
channel_config_ <= 7;
}
}
bool AACAudioSpecificConfig::ConvertToADTS(
@ -205,6 +206,18 @@ uint8_t AACAudioSpecificConfig::GetNumChannels() const {
return num_channels_;
}
bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) {
RCHECK(bit_reader->ReadBits(5, &audio_object_type_));
if (audio_object_type_ == AOT_ESCAPE) {
uint8_t audioObjectTypeExt;
RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt));
audio_object_type_ = static_cast<AudioObjectType>(32 + audioObjectTypeExt);
}
return true;
}
// Currently this function only support GASpecificConfig defined in
// ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig()
bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
@ -223,6 +236,10 @@ bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
case 22:
case 23:
return ParseGASpecificConfig(bit_reader);
case 42:
// Skip UsacConfig() parsing until required
RCHECK(bit_reader->SkipBits(bit_reader->bits_available()));
return true;
default:
break;
}

View File

@ -66,10 +66,10 @@ class AACAudioSpecificConfig {
AOT_ER_AAC_ELD = 39, // Error Resilient Enhanced Low Delay
AOT_SMR_SIMPLE = 40, // Symbolic Music Representation Simple
AOT_SMR_MAIN = 41, // Symbolic Music Representation Main
AOT_USAC_NOSBR = 42, // Unified Speech and Audio Coding (no SBR)
AOT_USAC = 42, // Unified Speech and Audio Coding
AOT_SAOC = 43, // Spatial Audio Object Coding
AOT_LD_SURROUND = 44, // Low Delay MPEG Surround
AOT_USAC = 45, // Unified Speech and Audio Coding
SAOC_DE = 45, // Spatial Audio Object Coding Dialogue Enhancement
};
AACAudioSpecificConfig();
@ -114,6 +114,7 @@ class AACAudioSpecificConfig {
void set_sbr_present(bool sbr_present) { sbr_present_ = sbr_present; }
private:
bool ParseAudioObjectType(BitReader* bit_reader);
bool ParseDecoderGASpecificConfig(BitReader* bit_reader);
bool SkipErrorSpecificConfig() const;
// Parse GASpecificConfig. Calls |ParseProgramConfigElement| if

View File

@ -101,6 +101,23 @@ TEST(AACAudioSpecificConfigTest, SixChannelTest) {
aac_audio_specific_config.GetAudioObjectType());
}
TEST(AACAudioSpecificConfigTest, UsacTest) {
AACAudioSpecificConfig aac_audio_specific_config;
uint8_t buffer[] = {
0xF9, 0x5E, 0x01, 0x2C, 0x00, 0x52, 0x42, 0x2C, 0xC0, 0x51,
0x17, 0x55, 0x4F, 0x36, 0x00, 0x42, 0x80, 0x01, 0x00, 0x04,
0xA8, 0x82, 0x34, 0xE5, 0x80
};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
EXPECT_TRUE(aac_audio_specific_config.Parse(data));
EXPECT_EQ(38400u, aac_audio_specific_config.GetSamplesPerSecond());
EXPECT_EQ(2u, aac_audio_specific_config.GetNumChannels());
EXPECT_EQ(AACAudioSpecificConfig::AOT_USAC,
aac_audio_specific_config.GetAudioObjectType());
}
TEST(AACAudioSpecificConfigTest, ProgramConfigElementTest) {
uint8_t buffer[] = {
0x11, 0x80, 0x04, 0xC8, 0x44, 0x00, 0x20, 0x00, 0xC4,