feat: Add xHE-AAC support (#1092)
Note: * An xHE-AAC capable encoder will auto adjust the user-specified SAP/RAP value to the allowed grid where SAP/RAPs can occur. e.g.: `-rapInterval 5000` (5s) may result in actual SAPs/RAPs every 4.984s. * To ensure SAP/RAP starts a new segment, Shaka needs to executed with a "--segment_duration" is less than or equal to that adjusted value. * If every SAP/RAP should trigger a new segment, just set the segment length to a very low value e.g.: `--segment_duration 0.1`
This commit is contained in:
parent
31129eed64
commit
5d998fca7f
|
@ -31,6 +31,7 @@ Daniel Cantarín <canta@canta.com.ar>
|
|||
David Cavar <pal3thorn@gmail.com>
|
||||
Evgeny Zajcev <zevlg@yandex.ru>
|
||||
Gabe Kopley <gabe@philo.com>
|
||||
Geoff Jukes <geoff@jukes.org>
|
||||
Haoming Chen <hmchen@google.com>
|
||||
Jacob Trimble <modmaker@google.com>
|
||||
Joe Foraci <jforaci@gmail.com>
|
||||
|
|
|
@ -62,10 +62,9 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
|
|||
// Syntax of AudioSpecificConfig.
|
||||
|
||||
// Read base configuration.
|
||||
// Audio Object Types specified in ISO 14496-3, Table 1.15.
|
||||
RCHECK(reader.ReadBits(5, &audio_object_type_));
|
||||
// Audio objects type >=31 is not supported yet.
|
||||
RCHECK(audio_object_type_ < 31);
|
||||
// Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19"
|
||||
RCHECK(ParseAudioObjectType(&reader));
|
||||
|
||||
RCHECK(reader.ReadBits(4, &frequency_index_));
|
||||
if (frequency_index_ == 0xf)
|
||||
RCHECK(reader.ReadBits(24, &frequency_));
|
||||
|
@ -82,9 +81,7 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
|
|||
RCHECK(reader.ReadBits(4, &extension_frequency_index));
|
||||
if (extension_frequency_index == 0xf)
|
||||
RCHECK(reader.ReadBits(24, &extension_frequency_));
|
||||
RCHECK(reader.ReadBits(5, &audio_object_type_));
|
||||
// Audio objects type >=31 is not supported yet.
|
||||
RCHECK(audio_object_type_ < 31);
|
||||
RCHECK(ParseAudioObjectType(&reader));
|
||||
}
|
||||
|
||||
RCHECK(ParseDecoderGASpecificConfig(&reader));
|
||||
|
@ -132,9 +129,13 @@ bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
|
|||
extension_frequency_ = kSampleRates[extension_frequency_index];
|
||||
}
|
||||
|
||||
return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
|
||||
audio_object_type_ <= 4 && frequency_index_ != 0xf &&
|
||||
channel_config_ <= 7;
|
||||
if (audio_object_type_ == AOT_USAC) {
|
||||
return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7;
|
||||
} else {
|
||||
return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
|
||||
audio_object_type_ <= 4 && frequency_index_ != 0xf &&
|
||||
channel_config_ <= 7;
|
||||
}
|
||||
}
|
||||
|
||||
bool AACAudioSpecificConfig::ConvertToADTS(
|
||||
|
@ -205,6 +206,18 @@ uint8_t AACAudioSpecificConfig::GetNumChannels() const {
|
|||
return num_channels_;
|
||||
}
|
||||
|
||||
bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) {
|
||||
RCHECK(bit_reader->ReadBits(5, &audio_object_type_));
|
||||
|
||||
if (audio_object_type_ == AOT_ESCAPE) {
|
||||
uint8_t audioObjectTypeExt;
|
||||
RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt));
|
||||
audio_object_type_ = static_cast<AudioObjectType>(32 + audioObjectTypeExt);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Currently this function only support GASpecificConfig defined in
|
||||
// ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig()
|
||||
bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
|
||||
|
@ -223,6 +236,10 @@ bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
|
|||
case 22:
|
||||
case 23:
|
||||
return ParseGASpecificConfig(bit_reader);
|
||||
case 42:
|
||||
// Skip UsacConfig() parsing until required
|
||||
RCHECK(bit_reader->SkipBits(bit_reader->bits_available()));
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -66,10 +66,10 @@ class AACAudioSpecificConfig {
|
|||
AOT_ER_AAC_ELD = 39, // Error Resilient Enhanced Low Delay
|
||||
AOT_SMR_SIMPLE = 40, // Symbolic Music Representation Simple
|
||||
AOT_SMR_MAIN = 41, // Symbolic Music Representation Main
|
||||
AOT_USAC_NOSBR = 42, // Unified Speech and Audio Coding (no SBR)
|
||||
AOT_USAC = 42, // Unified Speech and Audio Coding
|
||||
AOT_SAOC = 43, // Spatial Audio Object Coding
|
||||
AOT_LD_SURROUND = 44, // Low Delay MPEG Surround
|
||||
AOT_USAC = 45, // Unified Speech and Audio Coding
|
||||
SAOC_DE = 45, // Spatial Audio Object Coding Dialogue Enhancement
|
||||
};
|
||||
|
||||
AACAudioSpecificConfig();
|
||||
|
@ -114,6 +114,7 @@ class AACAudioSpecificConfig {
|
|||
void set_sbr_present(bool sbr_present) { sbr_present_ = sbr_present; }
|
||||
|
||||
private:
|
||||
bool ParseAudioObjectType(BitReader* bit_reader);
|
||||
bool ParseDecoderGASpecificConfig(BitReader* bit_reader);
|
||||
bool SkipErrorSpecificConfig() const;
|
||||
// Parse GASpecificConfig. Calls |ParseProgramConfigElement| if
|
||||
|
|
|
@ -101,6 +101,23 @@ TEST(AACAudioSpecificConfigTest, SixChannelTest) {
|
|||
aac_audio_specific_config.GetAudioObjectType());
|
||||
}
|
||||
|
||||
TEST(AACAudioSpecificConfigTest, UsacTest) {
|
||||
AACAudioSpecificConfig aac_audio_specific_config;
|
||||
uint8_t buffer[] = {
|
||||
0xF9, 0x5E, 0x01, 0x2C, 0x00, 0x52, 0x42, 0x2C, 0xC0, 0x51,
|
||||
0x17, 0x55, 0x4F, 0x36, 0x00, 0x42, 0x80, 0x01, 0x00, 0x04,
|
||||
0xA8, 0x82, 0x34, 0xE5, 0x80
|
||||
};
|
||||
|
||||
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
|
||||
|
||||
EXPECT_TRUE(aac_audio_specific_config.Parse(data));
|
||||
EXPECT_EQ(38400u, aac_audio_specific_config.GetSamplesPerSecond());
|
||||
EXPECT_EQ(2u, aac_audio_specific_config.GetNumChannels());
|
||||
EXPECT_EQ(AACAudioSpecificConfig::AOT_USAC,
|
||||
aac_audio_specific_config.GetAudioObjectType());
|
||||
}
|
||||
|
||||
TEST(AACAudioSpecificConfigTest, ProgramConfigElementTest) {
|
||||
uint8_t buffer[] = {
|
||||
0x11, 0x80, 0x04, 0xC8, 0x44, 0x00, 0x20, 0x00, 0xC4,
|
||||
|
|
Loading…
Reference in New Issue