shaka-packager/packager/media/codecs/aac_audio_specific_config.h

147 lines
6.4 KiB
C
Raw Permalink Normal View History

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef PACKAGER_MEDIA_CODECS_AAC_AUDIO_SPECIFIC_CONFIG_H_
#define PACKAGER_MEDIA_CODECS_AAC_AUDIO_SPECIFIC_CONFIG_H_
#include <cstddef>
#include <cstdint>
#include <vector>
namespace shaka {
namespace media {
class BitReader;
// Methods are virtual for mocking.
/// This class parses the AAC information from decoder specific information
/// embedded in the @b esds box in an ISO BMFF file.
/// Please refer to ISO 14496 Part 3 Table 1.13 - Syntax of AudioSpecificConfig
/// for more details.
class AACAudioSpecificConfig {
public:
// Audio Object Types specified in ISO 14496-3 (2005), Table 1.3
enum AudioObjectType {
AOT_NULL = 0,
AOT_AAC_MAIN = 1, // Main
AOT_AAC_LC = 2, // Low Complexity
AOT_AAC_SSR = 3, // Scalable Sample Rate
AOT_AAC_LTP = 4, // Long Term Prediction
AOT_SBR = 5, // Spectral Band Replication
AOT_AAC_SCALABLE = 6, // Scalable
AOT_TWINVQ = 7, // Twin Vector Quantizer
AOT_CELP = 8, // Code Excited Linear Prediction
AOT_HVXC = 9, // Harmonic Vector eXcitation Coding
AOT_TTSI = 12, // Text-To-Speech Interface
AOT_MAINSYNTH = 13, // Main Synthesis
AOT_WAVESYNTH = 14, // Wavetable Synthesis
AOT_MIDI = 15, // General MIDI
AOT_SAFX = 16, // Algorithmic Synthesis and Audio Effects
AOT_ER_AAC_LC = 17, // Error Resilient Low Complexity
AOT_ER_AAC_LTP = 19, // Error Resilient Long Term Prediction
AOT_ER_AAC_SCALABLE = 20, // Error Resilient Scalable
AOT_ER_TWINVQ = 21, // Error Resilient Twin Vector Quantizer
AOT_ER_BSAC = 22, // Error Resilient Bit-Sliced Arithmetic Coding
AOT_ER_AAC_LD = 23, // Error Resilient Low Delay
AOT_ER_CELP = 24, // Error Resilient Code Excited Linear
// Prediction
AOT_ER_HVXC = 25, // Error Resilient Harmonic Vector eXcitation
// Coding
AOT_ER_HILN = 26, // Error Resilient Harmonic and Individual Lines
// plus Noise
AOT_ER_PARAM = 27, // Error Resilient Parametric
AOT_SSC = 28, // SinuSoidal Coding
AOT_PS = 29, // Parametric Stereo
AOT_SURROUND = 30, // MPEG Surround
AOT_ESCAPE = 31, // Escape Value
AOT_L1 = 32, // Layer 1
AOT_L2 = 33, // Layer 2
AOT_L3 = 34, // Layer 3
AOT_DST = 35, // Direct Stream Transfer
AOT_ALS = 36, // Audio LosslesS
AOT_SLS = 37, // Scalable LosslesS
AOT_SLS_NON_CORE = 38, // Scalable LosslesS (non core)
AOT_ER_AAC_ELD = 39, // Error Resilient Enhanced Low Delay
AOT_SMR_SIMPLE = 40, // Symbolic Music Representation Simple
AOT_SMR_MAIN = 41, // Symbolic Music Representation Main
AOT_USAC = 42, // Unified Speech and Audio Coding
AOT_SAOC = 43, // Spatial Audio Object Coding
AOT_LD_SURROUND = 44, // Low Delay MPEG Surround
SAOC_DE = 45, // Spatial Audio Object Coding Dialogue Enhancement
};
AACAudioSpecificConfig();
virtual ~AACAudioSpecificConfig();
/// Parse the AAC config from decoder specific information embedded in an @b
/// esds box. The function will parse the data and get the
/// ElementaryStreamDescriptor, then it will parse the
/// ElementaryStreamDescriptor to get audio stream configurations.
/// @param data contains decoder specific information from an @b esds box.
/// @return true if successful, false otherwise.
virtual bool Parse(const std::vector<uint8_t>& data);
/// Convert a raw AAC frame into an AAC frame with an ADTS header.
/// @param data points to the raw AAC frame to be converted.
/// @param data_size the size of a raw AAC frame.
/// @param[out] audio_frame contains the converted frame if successful; it is
/// untouched on failure.
/// @return true on success, false otherwise.
virtual bool ConvertToADTS(const uint8_t* data,
size_t data_size,
std::vector<uint8_t>* audio_frame) const;
/// @return The audio object type for this AAC config, with possible extension
/// considered.
AudioObjectType GetAudioObjectType() const;
/// @return Sample rate for the AAC stream, with possible extensions
/// considered.
uint32_t GetSamplesPerSecond() const;
/// @return Number of channels for the AAC stream, with possible extensions
/// considered.
uint8_t GetNumChannels() const;
/// Size in bytes of the ADTS header added by ConvertEsdsToADTS().
static const size_t kADTSHeaderSize = 7;
/// @return whether Spectral Band Repliation (SBR) is present in the stream.
bool sbr_present() const { return sbr_present_; }
/// Indicate whether SBR is present in the stream.
void set_sbr_present(bool sbr_present) { sbr_present_ = sbr_present; }
private:
bool ParseAudioObjectType(BitReader* bit_reader);
bool ParseDecoderGASpecificConfig(BitReader* bit_reader);
bool SkipErrorSpecificConfig() const;
// Parse GASpecificConfig. Calls |ParseProgramConfigElement| if
// |channel_config_| == 0.
bool ParseGASpecificConfig(BitReader* bit_reader);
// Parse program_config_element(). |num_channels_| will be updated.
bool ParseProgramConfigElement(BitReader* bit_reader);
// The following variables store the AAC specific configuration information
// that are used to generate the ADTS header.
AudioObjectType audio_object_type_ = AOT_NULL;
uint8_t frequency_index_ = 0;
uint8_t channel_config_ = 0;
// Is Spectral Band Replication (SBR) available?
bool sbr_present_ = false;
// Is Parametric Stereo available?
bool ps_present_ = false;
// The following variables store audio configuration information.
// They are based on the AAC specific configuration but can be overridden
// by extensions in elementary stream descriptor.
uint32_t frequency_ = 0;
uint32_t extension_frequency_ = 0;
uint8_t num_channels_ = 0;
};
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_CODECS_AAC_AUDIO_SPECIFIC_CONFIG_H_