From 5d998fca7fb1d3d9c98f5b26561f842edcb2a925 Mon Sep 17 00:00:00 2001 From: Geoff Jukes Date: Tue, 18 Oct 2022 10:14:31 -0700 Subject: [PATCH] feat: Add xHE-AAC support (#1092) Note: * An xHE-AAC capable encoder will auto adjust the user-specified SAP/RAP value to the allowed grid where SAP/RAPs can occur. e.g.: `-rapInterval 5000` (5s) may result in actual SAPs/RAPs every 4.984s. * To ensure SAP/RAP starts a new segment, Shaka needs to executed with a "--segment_duration" is less than or equal to that adjusted value. * If every SAP/RAP should trigger a new segment, just set the segment length to a very low value e.g.: `--segment_duration 0.1` --- CONTRIBUTORS | 1 + .../media/codecs/aac_audio_specific_config.cc | 39 +++++++++++++------ .../media/codecs/aac_audio_specific_config.h | 5 ++- .../aac_audio_specific_config_unittest.cc | 17 ++++++++ 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 92f89f7969..b11338f611 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -31,6 +31,7 @@ Daniel CantarĂ­n David Cavar Evgeny Zajcev Gabe Kopley +Geoff Jukes Haoming Chen Jacob Trimble Joe Foraci diff --git a/packager/media/codecs/aac_audio_specific_config.cc b/packager/media/codecs/aac_audio_specific_config.cc index 486417868a..ede05c1a0a 100644 --- a/packager/media/codecs/aac_audio_specific_config.cc +++ b/packager/media/codecs/aac_audio_specific_config.cc @@ -62,10 +62,9 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { // Syntax of AudioSpecificConfig. // Read base configuration. - // Audio Object Types specified in ISO 14496-3, Table 1.15. - RCHECK(reader.ReadBits(5, &audio_object_type_)); - // Audio objects type >=31 is not supported yet. - RCHECK(audio_object_type_ < 31); + // Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19" + RCHECK(ParseAudioObjectType(&reader)); + RCHECK(reader.ReadBits(4, &frequency_index_)); if (frequency_index_ == 0xf) RCHECK(reader.ReadBits(24, &frequency_)); @@ -82,9 +81,7 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { RCHECK(reader.ReadBits(4, &extension_frequency_index)); if (extension_frequency_index == 0xf) RCHECK(reader.ReadBits(24, &extension_frequency_)); - RCHECK(reader.ReadBits(5, &audio_object_type_)); - // Audio objects type >=31 is not supported yet. - RCHECK(audio_object_type_ < 31); + RCHECK(ParseAudioObjectType(&reader)); } RCHECK(ParseDecoderGASpecificConfig(&reader)); @@ -131,10 +128,14 @@ bool AACAudioSpecificConfig::Parse(const std::vector& data) { RCHECK(extension_frequency_index < arraysize(kSampleRates)); extension_frequency_ = kSampleRates[extension_frequency_index]; } - - return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 && - audio_object_type_ <= 4 && frequency_index_ != 0xf && - channel_config_ <= 7; + + if (audio_object_type_ == AOT_USAC) { + return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7; + } else { + return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 && + audio_object_type_ <= 4 && frequency_index_ != 0xf && + channel_config_ <= 7; + } } bool AACAudioSpecificConfig::ConvertToADTS( @@ -205,6 +206,18 @@ uint8_t AACAudioSpecificConfig::GetNumChannels() const { return num_channels_; } +bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) { + RCHECK(bit_reader->ReadBits(5, &audio_object_type_)); + + if (audio_object_type_ == AOT_ESCAPE) { + uint8_t audioObjectTypeExt; + RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt)); + audio_object_type_ = static_cast(32 + audioObjectTypeExt); + } + + return true; +} + // Currently this function only support GASpecificConfig defined in // ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig() bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig( @@ -223,6 +236,10 @@ bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig( case 22: case 23: return ParseGASpecificConfig(bit_reader); + case 42: + // Skip UsacConfig() parsing until required + RCHECK(bit_reader->SkipBits(bit_reader->bits_available())); + return true; default: break; } diff --git a/packager/media/codecs/aac_audio_specific_config.h b/packager/media/codecs/aac_audio_specific_config.h index e83199c66f..ca1c93a4e6 100644 --- a/packager/media/codecs/aac_audio_specific_config.h +++ b/packager/media/codecs/aac_audio_specific_config.h @@ -66,10 +66,10 @@ class AACAudioSpecificConfig { AOT_ER_AAC_ELD = 39, // Error Resilient Enhanced Low Delay AOT_SMR_SIMPLE = 40, // Symbolic Music Representation Simple AOT_SMR_MAIN = 41, // Symbolic Music Representation Main - AOT_USAC_NOSBR = 42, // Unified Speech and Audio Coding (no SBR) + AOT_USAC = 42, // Unified Speech and Audio Coding AOT_SAOC = 43, // Spatial Audio Object Coding AOT_LD_SURROUND = 44, // Low Delay MPEG Surround - AOT_USAC = 45, // Unified Speech and Audio Coding + SAOC_DE = 45, // Spatial Audio Object Coding Dialogue Enhancement }; AACAudioSpecificConfig(); @@ -114,6 +114,7 @@ class AACAudioSpecificConfig { void set_sbr_present(bool sbr_present) { sbr_present_ = sbr_present; } private: + bool ParseAudioObjectType(BitReader* bit_reader); bool ParseDecoderGASpecificConfig(BitReader* bit_reader); bool SkipErrorSpecificConfig() const; // Parse GASpecificConfig. Calls |ParseProgramConfigElement| if diff --git a/packager/media/codecs/aac_audio_specific_config_unittest.cc b/packager/media/codecs/aac_audio_specific_config_unittest.cc index dd1d9aa54a..b8aa0ddf40 100644 --- a/packager/media/codecs/aac_audio_specific_config_unittest.cc +++ b/packager/media/codecs/aac_audio_specific_config_unittest.cc @@ -101,6 +101,23 @@ TEST(AACAudioSpecificConfigTest, SixChannelTest) { aac_audio_specific_config.GetAudioObjectType()); } +TEST(AACAudioSpecificConfigTest, UsacTest) { + AACAudioSpecificConfig aac_audio_specific_config; + uint8_t buffer[] = { + 0xF9, 0x5E, 0x01, 0x2C, 0x00, 0x52, 0x42, 0x2C, 0xC0, 0x51, + 0x17, 0x55, 0x4F, 0x36, 0x00, 0x42, 0x80, 0x01, 0x00, 0x04, + 0xA8, 0x82, 0x34, 0xE5, 0x80 + }; + + std::vector data(std::begin(buffer), std::end(buffer)); + + EXPECT_TRUE(aac_audio_specific_config.Parse(data)); + EXPECT_EQ(38400u, aac_audio_specific_config.GetSamplesPerSecond()); + EXPECT_EQ(2u, aac_audio_specific_config.GetNumChannels()); + EXPECT_EQ(AACAudioSpecificConfig::AOT_USAC, + aac_audio_specific_config.GetAudioObjectType()); +} + TEST(AACAudioSpecificConfigTest, ProgramConfigElementTest) { uint8_t buffer[] = { 0x11, 0x80, 0x04, 0xC8, 0x44, 0x00, 0x20, 0x00, 0xC4,