Add support for DTS in ESDS. Needed to handle DTS audio generated by FFMPEG.

Issue #52

Change-Id: Ibd97054bce0b7ccb344f4bb34370399b8e051f30
This commit is contained in:
Bei Li 2016-01-04 10:57:05 -08:00 committed by Gerrit Code Review
parent a4659c40dd
commit bb073cef51
14 changed files with 183 additions and 47 deletions

View File

@ -71,6 +71,8 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
uint8_t sample_bits,
uint8_t num_channels,
uint32_t sampling_frequency,
uint32_t max_bitrate,
uint32_t avg_bitrate,
const uint8_t* extra_data,
size_t extra_data_size,
bool is_encrypted)
@ -86,8 +88,9 @@ AudioStreamInfo::AudioStreamInfo(int track_id,
codec_(codec),
sample_bits_(sample_bits),
num_channels_(num_channels),
sampling_frequency_(sampling_frequency) {
}
sampling_frequency_(sampling_frequency),
max_bitrate_(max_bitrate),
avg_bitrate_(avg_bitrate) {}
AudioStreamInfo::~AudioStreamInfo() {}

View File

@ -52,6 +52,8 @@ class AudioStreamInfo : public StreamInfo {
uint8_t sample_bits,
uint8_t num_channels,
uint32_t sampling_frequency,
uint32_t max_bitrate,
uint32_t avg_bitrate,
const uint8_t* extra_data,
size_t extra_data_size,
bool is_encrypted);
@ -70,6 +72,8 @@ class AudioStreamInfo : public StreamInfo {
uint32_t bytes_per_frame() const {
return static_cast<uint32_t>(num_channels_) * sample_bits_ / 8;
}
uint32_t max_bitrate() const { return max_bitrate_; }
uint32_t avg_bitrate() const { return avg_bitrate_; }
void set_codec(AudioCodec codec) { codec_ = codec; }
void set_sampling_frequency(const uint32_t sampling_frequency) {
@ -88,6 +92,8 @@ class AudioStreamInfo : public StreamInfo {
uint8_t sample_bits_;
uint8_t num_channels_;
uint32_t sampling_frequency_;
uint32_t max_bitrate_;
uint32_t avg_bitrate_;
// Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler
// generated copy constructor and assignment operator. Since the extra data is

View File

@ -233,6 +233,8 @@ bool EsParserAdts::UpdateAudioConfiguration(const uint8_t* adts_frame,
kAacSampleSizeBits,
adts_header.GetNumChannels(),
extended_samples_per_second,
0,
0,
audio_specific_config.data(),
audio_specific_config.size(),
false));

View File

@ -47,6 +47,22 @@ bool IsIvSizeValid(size_t iv_size) {
return iv_size == 8 || iv_size == 16;
}
// Default values to construct the following fields in ddts box. Values are set
// according to FFMPEG.
// bit(2) FrameDuration; // 3 = 4096
// bit(5) StreamConstruction; // 18
// bit(1) CoreLFEPresent; // 0 = none
// bit(6) CoreLayout; // 31 = ignore core layout
// bit(14) CoreSize; // 0
// bit(1) StereoDownmix // 0 = none
// bit(3) RepresentationType; // 4
// bit(16) ChannelLayout; // 0xf = 5.1 channel layout.
// bit(1) MultiAssetFlag // 0 = single asset
// bit(1) LBRDurationMod // 0 = ignore
// bit(1) ReservedBoxPresent // 0 = none
// bit(5) Reserved // 0
const uint8_t kDdtsExtraData[] = {0xe4, 0x7c, 0, 4, 0, 0x0f, 0};
// Utility functions to check if the 64bit integers can fit in 32bit integer.
bool IsFitIn32Bits(uint64_t a) {
return a <= std::numeric_limits<uint32_t>::max();
@ -1190,27 +1206,40 @@ uint32_t ElementaryStreamDescriptor::ComputeSizeInternal() {
return HeaderSize() + es_descriptor.ComputeSize();
}
DTSSpecific::DTSSpecific() {}
DTSSpecific::DTSSpecific()
: sampling_frequency(0),
max_bitrate(0),
avg_bitrate(0),
pcm_sample_depth(0) {}
DTSSpecific::~DTSSpecific() {}
FourCC DTSSpecific::BoxType() const { return FOURCC_DDTS; }
bool DTSSpecific::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer));
RCHECK(ReadWriteHeaderInternal(buffer) &&
buffer->ReadWriteUInt32(&sampling_frequency) &&
buffer->ReadWriteUInt32(&max_bitrate) &&
buffer->ReadWriteUInt32(&avg_bitrate) &&
buffer->ReadWriteUInt8(&pcm_sample_depth));
if (buffer->Reading()) {
RCHECK(
buffer->ReadWriteVector(&data, buffer->Size() - buffer->Pos()));
RCHECK(buffer->ReadWriteVector(&extra_data, buffer->Size() - buffer->Pos()));
} else {
RCHECK(buffer->ReadWriteVector(&data, data.size()));
if (extra_data.empty()) {
extra_data.assign(kDdtsExtraData,
kDdtsExtraData + sizeof(kDdtsExtraData));
}
RCHECK(buffer->ReadWriteVector(&extra_data, extra_data.size()));
}
return true;
}
uint32_t DTSSpecific::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.size() == 0)
if (sampling_frequency == 0)
return 0;
return HeaderSize() + data.size();
return HeaderSize() + sizeof(sampling_frequency) + sizeof(max_bitrate) +
sizeof(avg_bitrate) + sizeof(pcm_sample_depth) +
sizeof(kDdtsExtraData);
}
AudioSampleEntry::AudioSampleEntry()
@ -1266,7 +1295,6 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(buffer->TryReadWriteChild(&esds));
RCHECK(buffer->TryReadWriteChild(&ddts));
return true;
}

View File

@ -268,7 +268,11 @@ struct ElementaryStreamDescriptor : FullBox {
struct DTSSpecific : Box {
DECLARE_BOX_METHODS(DTSSpecific);
std::vector<uint8_t> data;
uint32_t sampling_frequency;
uint32_t max_bitrate;
uint32_t avg_bitrate;
uint8_t pcm_sample_depth;
std::vector<uint8_t> extra_data;
};
struct AudioSampleEntry : Box {

View File

@ -204,6 +204,8 @@ inline bool operator==(const VideoSampleEntry& lhs,
inline bool operator==(const ESDescriptor& lhs, const ESDescriptor& rhs) {
return lhs.esid() == rhs.esid() && lhs.object_type() == rhs.object_type() &&
lhs.max_bitrate() == rhs.max_bitrate() &&
lhs.avg_bitrate() == rhs.avg_bitrate() &&
lhs.decoder_specific_info() == rhs.decoder_specific_info();
}
@ -214,7 +216,11 @@ inline bool operator==(const ElementaryStreamDescriptor& lhs,
inline bool operator==(const DTSSpecific& lhs,
const DTSSpecific& rhs) {
return lhs.data == rhs.data;
return lhs.sampling_frequency == rhs.sampling_frequency &&
lhs.max_bitrate == rhs.max_bitrate &&
lhs.avg_bitrate == rhs.avg_bitrate &&
lhs.pcm_sample_depth == rhs.pcm_sample_depth &&
lhs.extra_data == rhs.extra_data;
}
inline bool operator==(const AudioSampleEntry& lhs,

View File

@ -348,6 +348,20 @@ class BoxDefinitionsTestGeneral : public testing::Test {
esds->es_descriptor.set_esid(2);
}
void Fill(DTSSpecific* ddts) {
const uint8_t kDdtsExtraData[] = {0xe4, 0x7c, 0, 4, 0, 0x0f, 0};
ddts->max_bitrate = 768000;
ddts->avg_bitrate = 768000;
ddts->sampling_frequency = 48000;
ddts->pcm_sample_depth = 16;
ddts->extra_data.assign(kDdtsExtraData,
kDdtsExtraData + arraysize(kDdtsExtraData));
}
void Modify(DTSSpecific* ddts) {
ddts->pcm_sample_depth = 24;
}
void Fill(AudioSampleEntry* enca) {
enca->format = FOURCC_ENCA;
enca->data_reference_index = 2;
@ -861,6 +875,7 @@ class BoxDefinitionsTestGeneral : public testing::Test {
bool IsOptional(const CueIDBox* box) { return true; }
bool IsOptional(const CueTimeBox* box) { return true; }
bool IsOptional(const CueSettingsBox* box) { return true; }
bool IsOptional(const DTSSpecific* box) {return true; }
protected:
scoped_ptr<BufferWriter> buffer_;
@ -934,7 +949,8 @@ typedef testing::Types<
CuePayloadBox,
VTTEmptyCueBox,
VTTAdditionalTextBox,
VTTCueBox> Boxes2;
VTTCueBox,
DTSSpecific> Boxes2;
TYPED_TEST_CASE_P(BoxDefinitionsTestGeneral);
@ -1006,17 +1022,13 @@ INSTANTIATE_TYPED_TEST_CASE_P(BoxDefinitionTypedTests2,
class BoxDefinitionsTest : public BoxDefinitionsTestGeneral<Box> {};
TEST_F(BoxDefinitionsTest, DTSSampleEntry) {
const uint8_t kDtseData[] = {0x00, 0x00, 0x00, 0x1c, 0x64, 0x64, 0x74,
0x73, 0x00, 0x00, 0xbb, 0x80, 0x00, 0x03,
0xe4, 0x18, 0x00, 0x03, 0xe4, 0x18, 0x18,
0xe4, 0x7c, 0x00, 0x04, 0x00, 0x0f, 0x00};
AudioSampleEntry entry;
entry.format = FOURCC_DTSE;
entry.data_reference_index = 2;
entry.channelcount = 5;
entry.samplesize = 16;
entry.samplerate = 44100;
entry.ddts.data.assign(kDtseData, kDtseData + arraysize(kDtseData));
Fill(&entry.ddts);
entry.Write(this->buffer_.get());
AudioSampleEntry entry_readback;

View File

@ -58,12 +58,17 @@ bool ReadESSize(BitReader* reader, uint32_t* size) {
// multi-bytes size for now).
const size_t kHeaderSize = 2;
const size_t kMaxDecoderSpecificInfoSize = 64;
const uint32_t kUnknownBitrate = 0;
} // namespace
namespace mp4 {
ESDescriptor::ESDescriptor() : esid_(0), object_type_(kForbidden) {}
ESDescriptor::ESDescriptor()
: esid_(0),
object_type_(kForbidden),
max_bitrate_(kUnknownBitrate),
avg_bitrate_(kUnknownBitrate) {}
ESDescriptor::~ESDescriptor() {}
@ -100,15 +105,16 @@ bool ESDescriptor::Parse(const std::vector<uint8_t>& data) {
bool ESDescriptor::ParseDecoderConfigDescriptor(BitReader* reader) {
uint8_t tag;
uint32_t size;
uint64_t dummy;
uint32_t dummy;
RCHECK(reader->ReadBits(8, &tag));
RCHECK(tag == kDecoderConfigDescrTag);
RCHECK(ReadESSize(reader, &size));
RCHECK(reader->ReadBits(8, &object_type_));
RCHECK(reader->ReadBits(64, &dummy));
RCHECK(reader->ReadBits(32, &dummy));
RCHECK(reader->ReadBits(32, &max_bitrate_));
RCHECK(reader->ReadBits(32, &avg_bitrate_));
RCHECK(ParseDecoderSpecificInfo(reader));
return true;
@ -126,7 +132,6 @@ bool ESDescriptor::ParseDecoderSpecificInfo(BitReader* reader) {
decoder_specific_info_.resize(size);
for (uint32_t i = 0; i < size; ++i)
RCHECK(reader->ReadBits(8, &decoder_specific_info_[i]));
return true;
}
@ -135,7 +140,6 @@ void ESDescriptor::Write(BufferWriter* writer) const {
CHECK_LT(decoder_specific_info_.size(), kMaxDecoderSpecificInfoSize);
const std::vector<uint8_t> kEmptyDecodingBufferSize(3, 0);
const uint32_t kUnknownBitrate = 0;
const uint8_t kNoEsFlags = 0;
const uint8_t decoder_specific_info_size = decoder_specific_info_.size();
@ -162,8 +166,8 @@ void ESDescriptor::Write(BufferWriter* writer) const {
writer->AppendInt(static_cast<uint8_t>(object_type_));
writer->AppendInt(stream_type);
writer->AppendVector(kEmptyDecodingBufferSize);
writer->AppendInt(kUnknownBitrate); // max_bitrate.
writer->AppendInt(kUnknownBitrate); // avg_bitrate.
writer->AppendInt(max_bitrate_);
writer->AppendInt(avg_bitrate_);
writer->AppendInt(static_cast<uint8_t>(kDecoderSpecificInfoTag));
writer->AppendInt(decoder_specific_info_size);

View File

@ -24,6 +24,10 @@ enum ObjectType {
kForbidden = 0,
kISO_14496_3 = 0x40, // MPEG4 AAC
kISO_13818_7_AAC_LC = 0x67, // MPEG2 AAC-LC
kDTSC = 0xA9, // DTS Coherent Acoustics audio
kDTSE = 0xAC, // DTS Express low bit rate audio
kDTSH = 0xAA, // DTS-HD High Resolution Audio
kDTSL = 0xAB, // DTS-HD Master Audio
};
/// This class parses object type and decoder specific information from an
@ -41,6 +45,12 @@ class ESDescriptor {
uint16_t esid() const { return esid_; }
void set_esid(uint16_t esid) { esid_ = esid; }
uint32_t max_bitrate() const {return max_bitrate_; }
void set_max_bitrate(uint32_t max_bitrate) { max_bitrate_ = max_bitrate; }
uint32_t avg_bitrate() const { return avg_bitrate_; }
void set_avg_bitrate(uint32_t avg_bitrate) { avg_bitrate_ = avg_bitrate; }
ObjectType object_type() const { return object_type_; }
void set_object_type(ObjectType object_type) { object_type_ = object_type; }
@ -57,6 +67,11 @@ class ESDescriptor {
return object_type_ == kISO_14496_3 || object_type_ == kISO_13818_7_AAC_LC;
}
bool IsDTS() const {
return object_type_ == kDTSC || object_type_ == kDTSE ||
object_type_ == kDTSH || object_type_ == kDTSL;
}
private:
enum Tag {
kESDescrTag = 0x03,
@ -70,6 +85,8 @@ class ESDescriptor {
uint16_t esid_; // Elementary Stream ID.
ObjectType object_type_;
uint32_t max_bitrate_;
uint32_t avg_bitrate_;
std::vector<uint8_t> decoder_specific_info_;
};

View File

@ -81,6 +81,8 @@ AudioCodec FourCCToAudioCodec(FourCC fourcc) {
}
const char kWidevineKeySystemId[] = "edef8ba979d64acea3c827dcd51d21ed";
// Default DTS audio number of channels for 5.1 channel layout.
const uint8_t kDtsAudioNumChannels = 6;
} // namespace
@ -338,6 +340,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
uint8_t num_channels = 0;
uint32_t sampling_frequency = 0;
uint8_t audio_object_type = 0;
uint32_t max_bitrate = 0;
uint32_t avg_bitrate = 0;
std::vector<uint8_t> extra_data;
switch (actual_format) {
@ -353,6 +357,37 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
audio_object_type = aac_audio_specific_config.audio_object_type();
extra_data = entry.esds.es_descriptor.decoder_specific_info();
break;
} else if (entry.esds.es_descriptor.IsDTS()) {
ObjectType audio_type = entry.esds.es_descriptor.object_type();
switch (audio_type) {
case kDTSC:
codec = kCodecDTSC;
break;
case kDTSE:
codec = kCodecDTSE;
break;
case kDTSH:
codec = kCodecDTSH;
break;
case kDTSL:
codec = kCodecDTSL;
break;
default:
LOG(ERROR) << "Unsupported audio type " << audio_type
<< " in stsd box.";
return false;
}
num_channels = entry.esds.aac_audio_specific_config.num_channels();
// For dts audio in esds, current supported number of channels is 6
// as the only supported channel layout is 5.1.
if (num_channels != kDtsAudioNumChannels) {
LOG(ERROR) << "Unsupported channel count " << num_channels
<< " for audio type " << audio_type << ".";
return false;
}
sampling_frequency = entry.samplerate;
max_bitrate = entry.esds.es_descriptor.max_bitrate();
avg_bitrate = entry.esds.es_descriptor.avg_bitrate();
} else {
LOG(ERROR) << "Unsupported audio format 0x" << std::hex
<< actual_format << " in stsd box.";
@ -363,7 +398,9 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_DTSL:
case FOURCC_DTSE:
case FOURCC_DTSM:
extra_data = entry.ddts.data;
extra_data = entry.ddts.extra_data;
max_bitrate = entry.ddts.max_bitrate;
avg_bitrate = entry.ddts.avg_bitrate;
num_channels = entry.channelcount;
sampling_frequency = entry.samplerate;
break;
@ -389,7 +426,9 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
entry.samplesize,
num_channels,
sampling_frequency,
extra_data.size() ? &extra_data[0] : NULL,
max_bitrate,
avg_bitrate,
vector_as_array(&extra_data),
extra_data.size(),
is_encrypted));
}

View File

@ -40,7 +40,7 @@ void SetStartAndEndFromOffsetAndSize(size_t offset,
*end = *start + static_cast<uint32_t>(size) - 1;
}
FourCC CodecToFourCC(VideoCodec codec) {
FourCC VideoCodecToFourCC(VideoCodec codec) {
switch (codec) {
case kCodecH264:
return FOURCC_AVC1;
@ -59,6 +59,25 @@ FourCC CodecToFourCC(VideoCodec codec) {
}
}
FourCC AudioCodecToFourCC(AudioCodec codec) {
switch (codec) {
case kCodecAAC:
return FOURCC_MP4A;
case kCodecDTSC:
return FOURCC_DTSC;
case kCodecDTSH:
return FOURCC_DTSH;
case kCodecDTSL:
return FOURCC_DTSL;
case kCodecDTSE:
return FOURCC_DTSE;
case kCodecDTSM:
return FOURCC_DTSM;
default:
return FOURCC_NULL;
}
}
} // namespace
MP4Muxer::MP4Muxer(const MuxerOptions& options) : Muxer(options) {}
@ -75,7 +94,7 @@ Status MP4Muxer::Initialize() {
ftyp->compatible_brands.push_back(FOURCC_MP41);
if (streams().size() == 1 &&
streams()[0]->info()->stream_type() == kStreamVideo) {
const FourCC codec_fourcc = CodecToFourCC(
const FourCC codec_fourcc = VideoCodecToFourCC(
static_cast<VideoStreamInfo*>(streams()[0]->info().get())->codec());
if (codec_fourcc != FOURCC_NULL)
ftyp->compatible_brands.push_back(codec_fourcc);
@ -201,7 +220,7 @@ void MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
trak->media.handler.type = kVideo;
VideoSampleEntry video;
video.format = CodecToFourCC(video_info->codec());
video.format = VideoCodecToFourCC(video_info->codec());
video.width = video_info->width();
video.height = video_info->height();
video.codec_config_record.data = video_info->extra_data();
@ -225,33 +244,26 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
trak->media.handler.type = kAudio;
AudioSampleEntry audio;
audio.format = AudioCodecToFourCC(audio_info->codec());
switch(audio_info->codec()){
case kCodecAAC:
audio.format = FOURCC_MP4A;
audio.esds.es_descriptor.set_object_type(kISO_14496_3); // MPEG4 AAC.
audio.esds.es_descriptor.set_esid(track_id);
audio.esds.es_descriptor.set_decoder_specific_info(
audio_info->extra_data());
audio.esds.es_descriptor.set_max_bitrate(audio_info->max_bitrate());
audio.esds.es_descriptor.set_avg_bitrate(audio_info->avg_bitrate());
break;
case kCodecDTSC:
audio.format = FOURCC_DTSC;
audio.ddts.data = audio_info->extra_data();
break;
case kCodecDTSH:
audio.format = FOURCC_DTSH;
audio.ddts.data = audio_info->extra_data();
break;
case kCodecDTSL:
audio.format = FOURCC_DTSL;
audio.ddts.data = audio_info->extra_data();
break;
case kCodecDTSE:
audio.format = FOURCC_DTSE;
audio.ddts.data = audio_info->extra_data();
break;
case kCodecDTSM:
audio.format = FOURCC_DTSM;
audio.ddts.data = audio_info->extra_data();
audio.ddts.extra_data = audio_info->extra_data();
audio.ddts.max_bitrate = audio_info->max_bitrate();
audio.ddts.avg_bitrate = audio_info->avg_bitrate();
audio.ddts.sampling_frequency = audio_info->sampling_frequency();
audio.ddts.pcm_sample_depth = audio_info->sample_bits();
break;
default:
NOTIMPLEMENTED();

View File

@ -69,7 +69,7 @@ scoped_refptr<AudioStreamInfo> WebMAudioClient::GetAudioStreamInfo(
return scoped_refptr<AudioStreamInfo>(new AudioStreamInfo(
track_num, kWebMTimeScale, 0, audio_codec,
AudioStreamInfo::GetCodecString(audio_codec, 0), language,
kSampleSizeInBits, channels_, sampling_frequency, extra_data,
kSampleSizeInBits, channels_, sampling_frequency, 0, 0, extra_data,
extra_data_size, is_encrypted));
}

View File

@ -303,6 +303,8 @@ class WebMClusterParserTest : public testing::Test {
kBitsPerSample,
kNumChannels,
kSamplingFrequency,
0,
0,
NULL,
kExtraDataSize,
!kEncrypted)),

View File

@ -750,10 +750,11 @@ bool WvmMediaParser::ParseIndexEntry() {
}
if (has_audio) {
AudioCodec audio_codec = kCodecAAC;
// TODO(beil): Pass in max and average bitrate in wvm container.
stream_infos_.push_back(new AudioStreamInfo(
stream_id_count_, time_scale, track_duration, audio_codec,
std::string(), std::string(), kAacSampleSizeBits, num_channels,
sampling_frequency, vector_as_array(&audio_codec_config),
sampling_frequency, 0, 0, vector_as_array(&audio_codec_config),
audio_codec_config.size(), true));
program_demux_stream_map_[base::UintToString(index_program_id_) + ":" +
base::UintToString(audio_pes_stream_id ?