Fix non standard channel layout AAC audio with PCE encoding

Latest version of FFmpeg encodes non standard channel layout, e.g. 5.1(side), in AAC using PCE.

This is now supported with the below changes:
- Allow channel_configuration in ADTS header to be 0, as the cctual channel layout is specified 
  in PCE.
- Add GetFrameSizeWithoutParsing to determine the frame size before actually parsing the frame.
- Skip and resume later if not the whole frame is available. 
- Also ensure that the next frame starts with a proper sync word.

Fixes #598.
This commit is contained in:
Phillip Baxter 2019-10-01 12:15:58 -05:00 committed by Kongqun Yang
parent a86a697d8d
commit 00fde07bf7
8 changed files with 67 additions and 16 deletions

View File

@ -40,6 +40,19 @@ const size_t kFrameSizeCodeTable[][3] = {
{1920, 1393, 1280}, {1920, 1394, 1280},
};
// Calculate the size of the frame from the sample rate code and the
// frame size code.
// @return the size of the frame (header + payload).
size_t CalcFrameSize(uint8_t fscod, uint8_t frmsizecod) {
const size_t kNumFscode = arraysize(kAc3SampleRateTable);
DCHECK_LT(fscod, kNumFscode);
DCHECK_LT(frmsizecod, arraysize(kFrameSizeCodeTable));
// The order of frequencies are reversed in |kFrameSizeCodeTable| compared to
// |kAc3SampleRateTable|.
const int index = kNumFscode - 1 - fscod;
return kFrameSizeCodeTable[frmsizecod][index] * 2;
}
} // namespace
bool Ac3Header::IsSyncWord(const uint8_t* buf) const {
@ -104,13 +117,15 @@ size_t Ac3Header::GetHeaderSize() const {
}
size_t Ac3Header::GetFrameSize() const {
const size_t kNumFscode = arraysize(kAc3SampleRateTable);
DCHECK_LT(fscod_, kNumFscode);
DCHECK_LT(frmsizecod_, arraysize(kFrameSizeCodeTable));
// The order of frequencies are reversed in |kFrameSizeCodeTable| compared to
// |kAc3SampleRateTable|.
const int index = kNumFscode - 1 - fscod_;
return kFrameSizeCodeTable[frmsizecod_][index] * 2;
return CalcFrameSize(fscod_, frmsizecod_);
}
size_t Ac3Header::GetFrameSizeWithoutParsing(const uint8_t* data,
size_t num_bytes) const {
DCHECK_GT(num_bytes, static_cast<size_t>(4));
uint8_t fscod = data[4] >> 6;
uint8_t frmsizecod = data[4] & 0x3f;
return CalcFrameSize(fscod, frmsizecod);
}
void Ac3Header::GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const {

View File

@ -32,6 +32,8 @@ class Ac3Header : public AudioHeader {
bool Parse(const uint8_t* adts_frame, size_t adts_frame_size) override;
size_t GetHeaderSize() const override;
size_t GetFrameSize() const override;
size_t GetFrameSizeWithoutParsing(const uint8_t* data,
size_t num_bytes) const override;
void GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const override;
uint8_t GetObjectType() const override;
uint32_t GetSamplingFrequency() const override;

View File

@ -74,6 +74,9 @@ TEST_F(Ac3HeaderTest, Parse44100HzSuccess) {
const uint8_t kExpectedAudioSpecificConfig[] = {0x50, 0x11, 0x40};
Ac3Header ac3_header;
EXPECT_EQ(kExpectedFrameSize,
ac3_header.GetFrameSizeWithoutParsing(ac3_frame_44100_hz_.data(),
ac3_frame_44100_hz_.size()));
ASSERT_TRUE(
ac3_header.Parse(ac3_frame_44100_hz_.data(), ac3_frame_44100_hz_.size()));
EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize());
@ -96,6 +99,9 @@ TEST_F(Ac3HeaderTest, Parse48kHzSuccess) {
const uint8_t kExpectedAudioSpecificConfig[] = {0x10, 0x11, 0x40};
Ac3Header ac3_header;
EXPECT_EQ(kExpectedFrameSize,
ac3_header.GetFrameSizeWithoutParsing(ac3_frame_48k_hz_.data(),
ac3_frame_48k_hz_.size()));
ASSERT_TRUE(
ac3_header.Parse(ac3_frame_48k_hz_.data(), ac3_frame_48k_hz_.size()));
EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize());
@ -118,6 +124,10 @@ TEST_F(Ac3HeaderTest, ParseMultiChannelSuccess) {
const uint8_t kExpectedAudioSpecificConfig[] = {0x50, 0x3D, 0xE0};
Ac3Header ac3_header;
EXPECT_EQ(
kExpectedFrameSize,
ac3_header.GetFrameSizeWithoutParsing(ac3_frame_six_channels_.data(),
ac3_frame_six_channels_.size()));
ASSERT_TRUE(ac3_header.Parse(ac3_frame_six_channels_.data(),
ac3_frame_six_channels_.size()));
EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize());

View File

@ -63,8 +63,7 @@ bool AdtsHeader::Parse(const uint8_t* adts_frame, size_t adts_frame_size) {
// Skip private stream bit.
RCHECK(frame.SkipBits(1));
RCHECK(frame.ReadBits(3, &channel_configuration_));
RCHECK((channel_configuration_ > 0) &&
(channel_configuration_ < kAdtsNumChannelsTableSize));
RCHECK(channel_configuration_ < kAdtsNumChannelsTableSize);
// Skip originality, home and copyright info.
RCHECK(frame.SkipBits(4));
RCHECK(frame.ReadBits(13, &frame_size_));
@ -89,6 +88,13 @@ size_t AdtsHeader::GetFrameSize() const {
return frame_size_;
}
size_t AdtsHeader::GetFrameSizeWithoutParsing(const uint8_t* data,
size_t num_bytes) const {
DCHECK_GT(num_bytes, static_cast<size_t>(5));
return ((static_cast<int>(data[5]) >> 5) | (static_cast<int>(data[4]) << 3) |
((static_cast<int>(data[3]) & 0x3) << 11));
}
void AdtsHeader::GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const {
DCHECK(buffer);
buffer->clear();
@ -109,7 +115,6 @@ uint32_t AdtsHeader::GetSamplingFrequency() const {
}
uint8_t AdtsHeader::GetNumChannels() const {
DCHECK_GT(channel_configuration_, 0);
DCHECK_LT(channel_configuration_, kAdtsNumChannelsTableSize);
return kAdtsNumChannelsTable[channel_configuration_];
}

View File

@ -32,6 +32,8 @@ class AdtsHeader : public AudioHeader {
bool Parse(const uint8_t* adts_frame, size_t adts_frame_size) override;
size_t GetHeaderSize() const override;
size_t GetFrameSize() const override;
size_t GetFrameSizeWithoutParsing(const uint8_t* data,
size_t num_bytes) const override;
void GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const override;
uint8_t GetObjectType() const override;
uint32_t GetSamplingFrequency() const override;

View File

@ -54,6 +54,9 @@ TEST_F(AdtsHeaderTest, ParseSuccess) {
const uint32_t kExpectedSamplingFrequency(44100);
const uint8_t kExpectedNumChannels(2);
AdtsHeader adts_header;
EXPECT_EQ(adts_frame_.size(),
adts_header.GetFrameSizeWithoutParsing(adts_frame_.data(),
adts_frame_.size()));
ASSERT_TRUE(adts_header.Parse(adts_frame_.data(), adts_frame_.size()));
EXPECT_EQ(adts_frame_.size(), adts_header.GetFrameSize());
EXPECT_EQ(kExpectedHeaderSize, adts_header.GetHeaderSize());

View File

@ -50,6 +50,12 @@ class AudioHeader {
/// @return the size of frame (header + payload).
virtual size_t GetFrameSize() const = 0;
/// Obtain the size of the frame from the header data without doing a full
/// Parse.
/// @return the size of the frame (header + payload).
virtual size_t GetFrameSizeWithoutParsing(const uint8_t* data,
size_t num_bytes) const = 0;
/// Synthesize an AudioSpecificConfig record from the fields within the audio
/// header.
/// Should only be called after a successful Parse.

View File

@ -59,17 +59,25 @@ static bool LookForSyncWord(const uint8_t* raw_es,
if (!audio_header->IsSyncWord(cur_buf))
continue;
if (!audio_header->Parse(cur_buf, raw_es_size - offset))
continue;
// Check whether there is another frame |size| apart from the current one.
const size_t remaining_size = static_cast<size_t>(raw_es_size - offset);
const int kSyncWordSize = 2;
if (remaining_size >= audio_header->GetFrameSize() + kSyncWordSize &&
!audio_header->IsSyncWord(&cur_buf[audio_header->GetFrameSize()])) {
const size_t frame_size =
audio_header->GetFrameSizeWithoutParsing(cur_buf, remaining_size);
if (frame_size < audio_header->GetMinFrameSize())
// Too short to be a valid frame.
continue;
if (remaining_size < frame_size)
// Not a full frame: will resume when we have more data.
return false;
// Check whether there is another frame |size| apart from the current one.
if (remaining_size >= frame_size + kSyncWordSize &&
!audio_header->IsSyncWord(&cur_buf[frame_size])) {
continue;
}
if (!audio_header->Parse(cur_buf, frame_size))
continue;
*new_pos = offset;
return true;
}