Ignore unsupported audio codec in the source content

Instead of failing immediately, ignore unsupported audio codec when
parsing the source file, as there may be more than one stream in the
source file. This allows the supported streams to be packaged.

Closes #395.

Change-Id: I01005a93a19012c19065251647c9b06dd25c673a
This commit is contained in:
KongQun Yang 2018-05-16 17:53:52 -07:00
parent 317425d92f
commit 734b4161f8
8 changed files with 209 additions and 144 deletions

View File

@ -59,12 +59,13 @@ bool ReadESSize(BitReader* reader, uint32_t* size) {
const size_t kHeaderSize = 2;
const size_t kMaxDecoderSpecificInfoSize = 64;
const uint32_t kUnknownBitrate = 0;
const size_t kBitsInByte = 8;
} // namespace
ESDescriptor::ESDescriptor()
: esid_(0),
object_type_(kForbidden),
object_type_(ObjectType::kForbidden),
max_bitrate_(kUnknownBitrate),
avg_bitrate_(kUnknownBitrate) {}
@ -109,10 +110,15 @@ bool ESDescriptor::ParseDecoderConfigDescriptor(BitReader* reader) {
RCHECK(tag == kDecoderConfigDescrTag);
RCHECK(ReadESSize(reader, &size));
const size_t start_pos = reader->bit_position();
RCHECK(reader->ReadBits(8, &object_type_));
RCHECK(reader->ReadBits(32, &dummy));
RCHECK(reader->ReadBits(32, &max_bitrate_));
RCHECK(reader->ReadBits(32, &avg_bitrate_));
const size_t fields_bits = reader->bit_position() - start_pos;
const bool has_child_tags = size * kBitsInByte > fields_bits;
if (has_child_tags)
RCHECK(ParseDecoderSpecificInfo(reader));
return true;

View File

@ -18,7 +18,7 @@ class BufferWriter;
// The following values are extracted from ISO 14496 Part 1 Table 5 -
// objectTypeIndication Values. Only values currently in use are included.
enum ObjectType {
enum class ObjectType : uint8_t {
kForbidden = 0,
kISO_14496_3 = 0x40, // MPEG4 AAC
kISO_13818_7_AAC_LC = 0x67, // MPEG2 AAC-LC
@ -62,12 +62,15 @@ class ESDescriptor {
/// @return true if the stream is AAC.
bool IsAAC() const {
return object_type_ == kISO_14496_3 || object_type_ == kISO_13818_7_AAC_LC;
return object_type_ == ObjectType::kISO_14496_3 ||
object_type_ == ObjectType::kISO_13818_7_AAC_LC;
}
bool IsDTS() const {
return object_type_ == kDTSC || object_type_ == kDTSE ||
object_type_ == kDTSH || object_type_ == kDTSL;
return object_type_ == ObjectType::kDTSC ||
object_type_ == ObjectType::kDTSE ||
object_type_ == ObjectType::kDTSH ||
object_type_ == ObjectType::kDTSL;
}
private:

View File

@ -2,76 +2,142 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "packager/media/codecs/es_descriptor.h"
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "packager/media/codecs/es_descriptor.h"
using ::testing::ElementsAre;
namespace shaka {
namespace media {
TEST(ESDescriptorTest, SingleByteLengthTest) {
// clang-format off
const uint8_t kBuffer[] = {
// ESDescriptor tag with one byte size.
0x03, 0x19,
// ESDescriptor fields.
0x00, 0x01, 0x00,
// DecoderConfigDescriptor tag with one byte size.
0x04, 0x11,
// Object Type.
0x40,
// Three 4-byte fields: dummy, max bitrate, avg bitrate.
0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// DecoderSpecificInfo tag with one byte size.
0x05, 0x02,
// DecoderSpecificInfo fields.
0x12, 0x10,
// SLConfig tag with one byte size.
0x06, 0x01,
// SLConfig fields.
0x02,
};
// clang-format on
std::vector<uint8_t> data(std::begin(kBuffer), std::end(kBuffer));
ESDescriptor es_desc;
uint8_t buffer[] = {0x03, 0x19, 0x00, 0x01, 0x00, 0x04, 0x11, 0x40, 0x15,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x05, 0x02, 0x12, 0x10, 0x06, 0x01, 0x02};
std::vector<uint8_t> data;
data.assign(buffer, buffer + sizeof(buffer));
EXPECT_EQ(es_desc.object_type(), kForbidden);
EXPECT_EQ(es_desc.object_type(), ObjectType::kForbidden);
EXPECT_TRUE(es_desc.Parse(data));
EXPECT_EQ(es_desc.object_type(), kISO_14496_3);
EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u);
EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12);
EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10);
EXPECT_EQ(es_desc.object_type(), ObjectType::kISO_14496_3);
EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10));
}
TEST(ESDescriptorTest, NonAACTest) {
// clang-format off
const uint8_t kBuffer[] = {
// ESDescriptor tag with one byte size.
0x03, 0x19,
// ESDescriptor fields.
0x00, 0x01, 0x00,
// DecoderConfigDescriptor tag with one byte size.
0x04, 0x11,
// Object Type.
0x66,
// Three 4-byte fields: dummy, max bitrate, avg bitrate.
0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// DecoderSpecificInfo tag with one byte size.
0x05, 0x02,
// DecoderSpecificInfo fields.
0x12, 0x10,
// SLConfig tag with one byte size.
0x06, 0x01,
// SLConfig fields.
0x02,
};
// clang-format on
std::vector<uint8_t> data(std::begin(kBuffer), std::end(kBuffer));
ESDescriptor es_desc;
uint8_t buffer[] = {0x03, 0x19, 0x00, 0x01, 0x00, 0x04, 0x11, 0x66, 0x15,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x05, 0x02, 0x12, 0x10, 0x06, 0x01, 0x02};
std::vector<uint8_t> data;
data.assign(buffer, buffer + sizeof(buffer));
EXPECT_TRUE(es_desc.Parse(data));
EXPECT_NE(es_desc.object_type(), kISO_14496_3);
EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u);
EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12);
EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10);
EXPECT_EQ(static_cast<int>(es_desc.object_type()), 0x66);
EXPECT_NE(es_desc.object_type(), ObjectType::kISO_14496_3);
EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10));
}
TEST(ESDescriptorTest, NonAACWithoutDecoderSpecificInfoTagTest) {
// clang-format off
const uint8_t kBuffer[] = {
// ESDescriptor tag with one byte size.
0x03, 0x15,
// ESDescriptor fields.
0x00, 0x00, 0x00,
// DecoderConfigDescriptor tag with one byte size.
0x04, 0x0d,
// Object Type.
0x6b,
// Three 4-byte fields: dummy, max bitrate, avg bitrate.
0x15, 0x00, 0x01, 0xe0, 0x00, 0x02, 0x85, 0x00, 0x00, 0x02, 0x71, 0x00,
// SLConfig tag with one byte size.
0x06, 0x01,
// SLConfig fields.
0x02,
};
// clang-format on
std::vector<uint8_t> data(std::begin(kBuffer), std::end(kBuffer));
ESDescriptor es_desc;
EXPECT_TRUE(es_desc.Parse(data));
EXPECT_EQ(static_cast<int>(es_desc.object_type()), 0x6b);
EXPECT_EQ(es_desc.max_bitrate(), 0x28500u);
EXPECT_EQ(es_desc.avg_bitrate(), 0x27100u);
EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre());
}
TEST(ESDescriptorTest, MultiByteLengthTest) {
// clang-format off
const uint8_t kBuffer[] = {
// ESDescriptor tag with two bytes size.
0x03, 0x80, 0x1b,
// ESDescriptor fields.
0x00, 0x01, 0x00,
// DecoderConfigDescriptor tag with three bytes size.
0x04, 0x80, 0x80, 0x14,
// Object Type.
0x40,
// Three 4-byte fields: dummy, max bitrate, avg bitrate.
0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// DecoderSpecificInfo tag with four bytes size.
0x05, 0x80, 0x80, 0x80, 0x02,
// DecoderSpecificInfo fields.
0x12, 0x10,
// SLConfig tag with one byte size.
0x06, 0x01,
// SLConfig fields.
0x02,
};
// clang-format on
std::vector<uint8_t> data(std::begin(kBuffer), std::end(kBuffer));
ESDescriptor es_desc;
uint8_t buffer[] = {0x03, 0x80, 0x19, 0x00, 0x01, 0x00, 0x04, 0x80, 0x80,
0x11, 0x40, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x80, 0x80, 0x80,
0x02, 0x12, 0x10, 0x06, 0x01, 0x02};
std::vector<uint8_t> data;
data.assign(buffer, buffer + sizeof(buffer));
EXPECT_TRUE(es_desc.Parse(data));
EXPECT_EQ(es_desc.object_type(), kISO_14496_3);
EXPECT_EQ(es_desc.decoder_specific_info().size(), 2u);
EXPECT_EQ(es_desc.decoder_specific_info()[0], 0x12);
EXPECT_EQ(es_desc.decoder_specific_info()[1], 0x10);
}
TEST(ESDescriptorTest, FiveByteLengthTest) {
ESDescriptor es_desc;
uint8_t buffer[] = {0x03, 0x80, 0x19, 0x00, 0x01, 0x00, 0x04, 0x80, 0x80,
0x11, 0x40, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x80, 0x80, 0x80,
0x80, 0x02, 0x12, 0x10, 0x06, 0x01, 0x02};
std::vector<uint8_t> data;
data.assign(buffer, buffer + sizeof(buffer));
EXPECT_TRUE(es_desc.Parse(data));
EXPECT_EQ(es_desc.object_type(), kISO_14496_3);
EXPECT_EQ(es_desc.decoder_specific_info().size(), 0u);
EXPECT_EQ(es_desc.object_type(), ObjectType::kISO_14496_3);
EXPECT_THAT(es_desc.decoder_specific_info(), ElementsAre(0x12, 0x10));
}
} // namespace media

View File

@ -1579,7 +1579,7 @@ bool ElementaryStreamDescriptor::ReadWriteInternal(BoxBuffer* buffer) {
size_t ElementaryStreamDescriptor::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (es_descriptor.object_type() == kForbidden)
if (es_descriptor.object_type() == ObjectType::kForbidden)
return 0;
return HeaderSize() + es_descriptor.ComputeSize();
}

View File

@ -373,7 +373,7 @@ class BoxDefinitionsTestGeneral : public testing::Test {
void Fill(ElementaryStreamDescriptor* esds) {
const uint8_t kDecoderSpecificInfo[] = {18, 16};
esds->es_descriptor.set_esid(1);
esds->es_descriptor.set_object_type(kISO_14496_3);
esds->es_descriptor.set_object_type(ObjectType::kISO_14496_3);
std::vector<uint8_t> decoder_specific_info(
kDecoderSpecificInfo,
kDecoderSpecificInfo + sizeof(kDecoderSpecificInfo));

View File

@ -96,8 +96,24 @@ Codec FourCCToCodec(FourCC fourcc) {
}
}
// Default DTS audio number of channels for 5.1 channel layout.
const uint8_t kDtsAudioNumChannels = 6;
Codec ObjectTypeToCodec(ObjectType object_type) {
switch (object_type) {
case ObjectType::kISO_14496_3:
case ObjectType::kISO_13818_7_AAC_LC:
return kCodecAAC;
case ObjectType::kDTSC:
return kCodecDTSC;
case ObjectType::kDTSE:
return kCodecDTSE;
case ObjectType::kDTSH:
return kCodecDTSH;
case ObjectType::kDTSL:
return kCodecDTSL;
default:
return kUnknownCodec;
}
}
const uint64_t kNanosecondsPerSecond = 1000000000ull;
} // namespace
@ -366,8 +382,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
const FourCC actual_format = entry.GetActualFormat();
Codec codec = FourCCToCodec(actual_format);
uint8_t num_channels = 0;
uint32_t sampling_frequency = 0;
uint8_t num_channels = entry.channelcount;
uint32_t sampling_frequency = entry.samplerate;
uint64_t codec_delay_ns = 0;
uint8_t audio_object_type = 0;
uint32_t max_bitrate = 0;
@ -376,10 +392,17 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
switch (actual_format) {
case FOURCC_mp4a:
// Check if it is MPEG4 AAC defined in ISO 14496 Part 3 or
// supported MPEG2 AAC variants.
if (entry.esds.es_descriptor.IsAAC()) {
codec = kCodecAAC;
codec = ObjectTypeToCodec(entry.esds.es_descriptor.object_type());
if (codec == kUnknownCodec) {
// Intentionally not to fail in the parser as there may be multiple
// streams in the source content, which allows the supported stream
// to be packaged. An error will be returned if the unsupported
// stream is passed to the muxer.
LOG(WARNING) << "Unsupported audio object type "
<< static_cast<int>(
entry.esds.es_descriptor.object_type())
<< " in stsd.es_desriptor.";
} else if (codec == kCodecAAC) {
const AACAudioSpecificConfig& aac_audio_specific_config =
entry.esds.aac_audio_specific_config;
num_channels = aac_audio_specific_config.GetNumChannels();
@ -387,42 +410,9 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
aac_audio_specific_config.GetSamplesPerSecond();
audio_object_type = aac_audio_specific_config.GetAudioObjectType();
codec_config = entry.esds.es_descriptor.decoder_specific_info();
break;
} else if (entry.esds.es_descriptor.IsDTS()) {
ObjectType audio_type = entry.esds.es_descriptor.object_type();
switch (audio_type) {
case kDTSC:
codec = kCodecDTSC;
break;
case kDTSE:
codec = kCodecDTSE;
break;
case kDTSH:
codec = kCodecDTSH;
break;
case kDTSL:
codec = kCodecDTSL;
break;
default:
LOG(ERROR) << "Unsupported audio type " << audio_type
<< " in stsd box.";
return false;
}
num_channels = entry.channelcount;
// For dts audio in esds, current supported number of channels is 6
// as the only supported channel layout is 5.1.
if (num_channels != kDtsAudioNumChannels) {
LOG(ERROR) << "Unsupported channel count " << num_channels
<< " for audio type " << audio_type << ".";
return false;
}
sampling_frequency = entry.samplerate;
} else {
max_bitrate = entry.esds.es_descriptor.max_bitrate();
avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
} else {
LOG(ERROR) << "Unsupported audio format 0x" << std::hex
<< actual_format << " in stsd box.";
return false;
}
break;
case FOURCC_dtsc:
@ -437,36 +427,32 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
codec_config = entry.ddts.extra_data;
max_bitrate = entry.ddts.max_bitrate;
avg_bitrate = entry.ddts.avg_bitrate;
num_channels = entry.channelcount;
sampling_frequency = entry.samplerate;
break;
case FOURCC_ac_3:
codec_config = entry.dac3.data;
num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
sampling_frequency = entry.samplerate;
break;
case FOURCC_ec_3:
codec_config = entry.dec3.data;
num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
sampling_frequency = entry.samplerate;
break;
case FOURCC_fLaC:
codec_config = entry.dfla.data;
num_channels = entry.channelcount;
sampling_frequency = entry.samplerate;
break;
case FOURCC_Opus:
codec_config = entry.dops.opus_identification_header;
num_channels = entry.channelcount;
sampling_frequency = entry.samplerate;
RCHECK(sampling_frequency != 0);
codec_delay_ns =
entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
break;
default:
LOG(ERROR) << "Unsupported audio format 0x" << std::hex
<< actual_format << " in stsd box.";
return false;
// Intentionally not to fail in the parser as there may be multiple
// streams in the source content, which allows the supported stream to
// be packaged.
// An error will be returned if the unsupported stream is passed to
// the muxer.
LOG(WARNING) << "Unsupported audio format '"
<< FourCCToString(actual_format) << "' in stsd box.";
break;
}
// Extract possible seek preroll.
@ -592,9 +578,14 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
break;
}
default:
LOG(ERROR) << "Unsupported video format "
<< FourCCToString(actual_format) << " in stsd box.";
return false;
// Intentionally not to fail in the parser as there may be multiple
// streams in the source content, which allows the supported stream to
// be packaged.
// An error will be returned if the unsupported stream is passed to
// the muxer.
LOG(WARNING) << "Unsupported video format '"
<< FourCCToString(actual_format) << "' in stsd box.";
break;
}
// The stream will be decrypted if a |decryptor_source_| is available.

View File

@ -175,6 +175,7 @@ Status MP4Muxer::InitializeMuxer() {
// Initialize tracks.
for (uint32_t i = 0; i < streams().size(); ++i) {
const StreamInfo* stream = streams()[i].get();
Track& trak = moov->tracks[i];
trak.header.track_id = i + 1;
@ -182,34 +183,29 @@ Status MP4Muxer::InitializeMuxer() {
trex.track_id = trak.header.track_id;
trex.default_sample_description_index = 1;
switch (streams()[i]->stream_type()) {
bool generate_trak_result = false;
switch (stream->stream_type()) {
case kStreamVideo:
GenerateVideoTrak(
static_cast<const VideoStreamInfo*>(streams()[i].get()),
&trak,
i + 1);
generate_trak_result = GenerateVideoTrak(
static_cast<const VideoStreamInfo*>(stream), &trak, i + 1);
break;
case kStreamAudio:
GenerateAudioTrak(
static_cast<const AudioStreamInfo*>(streams()[i].get()),
&trak,
i + 1);
generate_trak_result = GenerateAudioTrak(
static_cast<const AudioStreamInfo*>(stream), &trak, i + 1);
break;
case kStreamText:
GenerateTextTrak(
static_cast<const TextStreamInfo*>(streams()[i].get()),
&trak,
i + 1);
generate_trak_result = GenerateTextTrak(
static_cast<const TextStreamInfo*>(stream), &trak, i + 1);
break;
default:
NOTIMPLEMENTED() << "Not implemented for stream type: "
<< streams()[i]->stream_type();
<< stream->stream_type();
}
if (!generate_trak_result)
return Status(error::MUXER_FAILURE, "Failed to generate trak.");
if (streams()[i]->is_encrypted() &&
options().mp4_params.include_pssh_in_stream) {
const auto& key_system_info =
streams()[i]->encryption_config().key_system_info;
if (stream->is_encrypted() && options().mp4_params.include_pssh_in_stream) {
const auto& key_system_info = stream->encryption_config().key_system_info;
moov->pssh.resize(key_system_info.size());
for (size_t j = 0; j < key_system_info.size(); j++)
moov->pssh[j].raw_box = key_system_info[j].psshs;
@ -286,7 +282,7 @@ void MP4Muxer::InitializeTrak(const StreamInfo* info, Track* trak) {
}
}
void MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
bool MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
Track* trak,
uint32_t track_id) {
InitializeTrak(video_info, trak);
@ -331,9 +327,10 @@ void MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
GenerateSinf(entry.format, video_info->encryption_config(), &entry.sinf);
entry.format = FOURCC_encv;
}
return true;
}
void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
Track* trak,
uint32_t track_id) {
InitializeTrak(audio_info, trak);
@ -345,7 +342,8 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
switch(audio_info->codec()){
case kCodecAAC:
audio.esds.es_descriptor.set_object_type(kISO_14496_3); // MPEG4 AAC.
audio.esds.es_descriptor.set_object_type(
ObjectType::kISO_14496_3); // MPEG4 AAC.
audio.esds.es_descriptor.set_esid(track_id);
audio.esds.es_descriptor.set_decoder_specific_info(
audio_info->codec_config());
@ -376,8 +374,8 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
audio.dops.opus_identification_header = audio_info->codec_config();
break;
default:
NOTIMPLEMENTED();
break;
NOTIMPLEMENTED() << " Unsupported audio codec " << audio_info->codec();
return false;
}
if (audio_info->codec() == kCodecAC3 || audio_info->codec() == kCodecEAC3) {
@ -434,11 +432,11 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
SampleToGroupEntry::kTrackGroupDescriptionIndexBase + 1;
} else if (audio_info->seek_preroll_ns() != 0) {
LOG(WARNING) << "Unexpected seek preroll for codec " << audio_info->codec();
return;
}
return true;
}
void MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
Track* trak,
uint32_t track_id) {
InitializeTrak(text_info, trak);
@ -457,10 +455,11 @@ void MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
trak->media.information.sample_table.description;
sample_description.type = kText;
sample_description.text_entries.push_back(webvtt);
return;
return true;
}
NOTIMPLEMENTED() << text_info->codec_string()
<< " handling not implemented yet.";
return false;
}
base::Optional<Range> MP4Muxer::GetInitRangeStartAndEnd() {

View File

@ -45,13 +45,13 @@ class MP4Muxer : public Muxer {
// Generate Audio/Video Track box.
void InitializeTrak(const StreamInfo* info, Track* trak);
void GenerateAudioTrak(const AudioStreamInfo* audio_info,
bool GenerateAudioTrak(const AudioStreamInfo* audio_info,
Track* trak,
uint32_t track_id);
void GenerateVideoTrak(const VideoStreamInfo* video_info,
bool GenerateVideoTrak(const VideoStreamInfo* video_info,
Track* trak,
uint32_t track_id);
void GenerateTextTrak(const TextStreamInfo* video_info,
bool GenerateTextTrak(const TextStreamInfo* video_info,
Track* trak,
uint32_t track_id);