From 58b95fd3d5fdec73d01ad203992e9923ddf66017 Mon Sep 17 00:00:00 2001 From: Bei Li Date: Fri, 8 Jan 2016 15:56:33 -0800 Subject: [PATCH] Support Dolby audio AC3 in ISO BMFF (Part 1) - Box definitions for box type DAC3. - Parser/muxer changes to support AC3 audio codecs. - EC3 audio sample entry will come in Part 2. - MPD signaling will come in Part 3. Issue #64 Change-Id: I790b46ae8179b933bb8f7da9cdd38591fe8da43d --- packager/media/base/audio_stream_info.cc | 40 ++++++------------- packager/media/base/audio_stream_info.h | 19 +++------ packager/media/formats/mp4/box_definitions.cc | 37 +++++++++++++---- packager/media/formats/mp4/box_definitions.h | 7 ++++ .../formats/mp4/box_definitions_comparison.h | 9 ++++- .../formats/mp4/box_definitions_unittest.cc | 27 +++++++++++++ packager/media/formats/mp4/fourccs.h | 3 +- .../media/formats/mp4/mp4_media_parser.cc | 7 ++-- packager/media/formats/mp4/mp4_muxer.cc | 5 +++ 9 files changed, 98 insertions(+), 56 deletions(-) diff --git a/packager/media/base/audio_stream_info.cc b/packager/media/base/audio_stream_info.cc index 45e04b7ca6..39adcad273 100644 --- a/packager/media/base/audio_stream_info.cc +++ b/packager/media/base/audio_stream_info.cc @@ -19,42 +19,24 @@ std::string AudioCodecToString(AudioCodec audio_codec) { switch (audio_codec) { case kCodecAAC: return "AAC"; - case kCodecMP3: - return "MP3"; - case kCodecPCM: - return "PCM"; - case kCodecVorbis: - return "Vorbis"; - case kCodecFLAC: - return "FLAC"; - case kCodecAMR_NB: - return "AMR_NB"; - case kCodecAMR_WB: - return "AMR_WB"; - case kCodecPCM_MULAW: - return "PCM_MULAW"; - case kCodecGSM_MS: - return "GSM_MS"; - case kCodecPCM_S16BE: - return "PCM_S16BE"; - case kCodecPCM_S24BE: - return "PCM_S24BE"; - case kCodecOpus: - return "Opus"; - case kCodecEAC3: - return "EAC3"; + case kCodecAC3: + return "AC3"; case kCodecDTSC: return "DTSC"; + case kCodecDTSE: + return "DTSE"; case kCodecDTSH: return "DTSH"; case kCodecDTSL: return "DTSL"; - case kCodecDTSE: - return "DTSE"; - case kCodecDTSP: - return "DTS+"; case kCodecDTSM: return "DTS-"; + case kCodecDTSP: + return "DTS+"; + case kCodecOpus: + return "Opus"; + case kCodecVorbis: + return "Vorbis"; default: NOTIMPLEMENTED() << "Unknown Audio Codec: " << audio_codec; return "UnknownAudioCodec"; @@ -131,6 +113,8 @@ std::string AudioStreamInfo::GetCodecString(AudioCodec codec, return "dts+"; case kCodecDTSM: return "dts-"; + case kCodecAC3: + return "ac-3"; default: NOTIMPLEMENTED() << "Codec: " << codec; return "unknown"; diff --git a/packager/media/base/audio_stream_info.h b/packager/media/base/audio_stream_info.h index 6cad7dc997..dd11c7d342 100644 --- a/packager/media/base/audio_stream_info.h +++ b/packager/media/base/audio_stream_info.h @@ -17,24 +17,15 @@ namespace media { enum AudioCodec { kUnknownAudioCodec = 0, kCodecAAC, - kCodecMP3, - kCodecPCM, - kCodecVorbis, - kCodecFLAC, - kCodecAMR_NB, - kCodecAMR_WB, - kCodecPCM_MULAW, - kCodecGSM_MS, - kCodecPCM_S16BE, - kCodecPCM_S24BE, - kCodecOpus, - kCodecEAC3, + kCodecAC3, kCodecDTSC, + kCodecDTSE, kCodecDTSH, kCodecDTSL, - kCodecDTSE, - kCodecDTSP, kCodecDTSM, + kCodecDTSP, + kCodecOpus, + kCodecVorbis, kNumAudioCodec }; diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index f05e790aa1..062117ffd6 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -134,7 +134,8 @@ bool FileType::ReadWriteInternal(BoxBuffer* buffer) { buffer->ReadWriteUInt32(&minor_version)); size_t num_brands; if (buffer->Reading()) { - num_brands = (buffer->Size() - buffer->Pos()) / sizeof(FourCC); + RCHECK(buffer->BytesLeft() % sizeof(FourCC) == 0); + num_brands = buffer->BytesLeft() / sizeof(FourCC); compatible_brands.resize(num_brands); } else { num_brands = compatible_brands.size(); @@ -318,8 +319,8 @@ bool SampleEncryption::ReadWriteInternal(BoxBuffer* buffer) { // If we don't know |iv_size|, store sample encryption data to parse later // after we know iv_size. if (buffer->Reading() && iv_size == 0) { - RCHECK(buffer->ReadWriteVector(&sample_encryption_data, - buffer->Size() - buffer->Pos())); + RCHECK( + buffer->ReadWriteVector(&sample_encryption_data, buffer->BytesLeft())); return true; } @@ -1395,7 +1396,7 @@ bool DTSSpecific::ReadWriteInternal(BoxBuffer* buffer) { buffer->ReadWriteUInt8(&pcm_sample_depth)); if (buffer->Reading()) { - RCHECK(buffer->ReadWriteVector(&extra_data, buffer->Size() - buffer->Pos())); + RCHECK(buffer->ReadWriteVector(&extra_data, buffer->BytesLeft())); } else { if (extra_data.empty()) { extra_data.assign(kDdtsExtraData, @@ -1415,6 +1416,25 @@ uint32_t DTSSpecific::ComputeSizeInternal() { sizeof(kDdtsExtraData); } +AC3Specific::AC3Specific() {} +AC3Specific::~AC3Specific() {} + +FourCC AC3Specific::BoxType() const { return FOURCC_DAC3; } + +bool AC3Specific::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer) && + buffer->ReadWriteVector( + &data, buffer->Reading() ? buffer->BytesLeft() : data.size())); + return true; +} + +uint32_t AC3Specific::ComputeSizeInternal() { + // This box is optional. Skip it if not initialized. + if (data.empty()) + return 0; + return HeaderSize() + data.size(); +} + AudioSampleEntry::AudioSampleEntry() : format(FOURCC_NULL), data_reference_index(1), @@ -1468,15 +1488,16 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->TryReadWriteChild(&esds)); RCHECK(buffer->TryReadWriteChild(&ddts)); + RCHECK(buffer->TryReadWriteChild(&dac3)); return true; } uint32_t AudioSampleEntry::ComputeSizeInternal() { return HeaderSize() + sizeof(data_reference_index) + sizeof(channelcount) + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + - esds.ComputeSize() + ddts.ComputeSize() + 6 + - 8 + // 6 + 8 bytes reserved. - 4; // 4 bytes predefined. + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + + 6 + 8 + // 6 + 8 bytes reserved. + 4; // 4 bytes predefined. } WebVTTConfigurationBox::WebVTTConfigurationBox() {} @@ -1623,7 +1644,7 @@ FourCC DataEntryUrl::BoxType() const { return FOURCC_URL; } bool DataEntryUrl::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(ReadWriteHeaderInternal(buffer)); if (buffer->Reading()) { - RCHECK(buffer->ReadWriteVector(&location, buffer->Size() - buffer->Pos())); + RCHECK(buffer->ReadWriteVector(&location, buffer->BytesLeft())); } else { RCHECK(buffer->ReadWriteVector(&location, location.size())); } diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index 1cd56df23e..12ff1f5a7d 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -309,6 +309,12 @@ struct DTSSpecific : Box { std::vector extra_data; }; +struct AC3Specific : Box { + DECLARE_BOX_METHODS(AC3Specific); + + std::vector data; +}; + struct AudioSampleEntry : Box { DECLARE_BOX_METHODS(AudioSampleEntry); // Returns actual format of this sample entry. @@ -325,6 +331,7 @@ struct AudioSampleEntry : Box { ProtectionSchemeInfo sinf; ElementaryStreamDescriptor esds; DTSSpecific ddts; + AC3Specific dac3; }; struct WebVTTConfigurationBox : Box { diff --git a/packager/media/formats/mp4/box_definitions_comparison.h b/packager/media/formats/mp4/box_definitions_comparison.h index 6ba02840fb..896ad2b9e4 100644 --- a/packager/media/formats/mp4/box_definitions_comparison.h +++ b/packager/media/formats/mp4/box_definitions_comparison.h @@ -240,14 +240,19 @@ inline bool operator==(const DTSSpecific& lhs, lhs.extra_data == rhs.extra_data; } +inline bool operator==(const AC3Specific& lhs, + const AC3Specific& rhs) { + return lhs.data == rhs.data; +} + inline bool operator==(const AudioSampleEntry& lhs, const AudioSampleEntry& rhs) { return lhs.format == rhs.format && lhs.data_reference_index == rhs.data_reference_index && lhs.channelcount == rhs.channelcount && lhs.samplesize == rhs.samplesize && lhs.samplerate == rhs.samplerate && - lhs.sinf == rhs.sinf && lhs.esds == rhs.esds && - lhs.ddts == rhs.ddts; + lhs.sinf == rhs.sinf && lhs.esds == rhs.esds && lhs.ddts == rhs.ddts && + lhs.dac3 == rhs.dac3; } inline bool operator==(const WebVTTConfigurationBox& lhs, diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index 06e79bbe67..7d83ba4e3c 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -380,6 +380,16 @@ class BoxDefinitionsTestGeneral : public testing::Test { ddts->pcm_sample_depth = 24; } + void Fill(AC3Specific* dac3) { + const uint8_t kAc3Data[] = {0x50, 0x11, 0x60}; + dac3->data.assign(kAc3Data, kAc3Data + arraysize(kAc3Data)); + } + + void Modify(AC3Specific* dac3) { + const uint8_t kAc3Data[] = {0x50, 0x11, 0x40}; + dac3->data.assign(kAc3Data, kAc3Data + arraysize(kAc3Data)); + } + void Fill(AudioSampleEntry* enca) { enca->format = FOURCC_ENCA; enca->data_reference_index = 2; @@ -884,6 +894,7 @@ class BoxDefinitionsTestGeneral : public testing::Test { bool IsOptional(const CodecConfigurationRecord* box) { return true; } bool IsOptional(const PixelAspectRatio* box) { return true; } bool IsOptional(const ElementaryStreamDescriptor* box) { return true; } + bool IsOptional(const AC3Specific* box) { return true; } // Recommended, but optional. bool IsOptional(const WebVTTSourceLabelBox* box) { return true; } bool IsOptional(const CompositionTimeToSample* box) { return true; } @@ -926,6 +937,7 @@ typedef testing::Typesbuffer_.get()); + + AudioSampleEntry entry_readback; + ASSERT_TRUE(ReadBack(&entry_readback)); + ASSERT_EQ(entry, entry_readback); +} + TEST_F(BoxDefinitionsTest, ProtectionSystemSpecificHeader) { ProtectionSystemSpecificHeader pssh; Fill(&pssh); diff --git a/packager/media/formats/mp4/fourccs.h b/packager/media/formats/mp4/fourccs.h index a070d9b817..19bfaf2532 100644 --- a/packager/media/formats/mp4/fourccs.h +++ b/packager/media/formats/mp4/fourccs.h @@ -16,12 +16,14 @@ enum FourCC { FOURCC_NULL = 0, FOURCC_ID32 = 0x49443332, FOURCC_PRIV = 0x50524956, + FOURCC_AC3 = 0x61632d33, // This fourcc is "ac-3". FOURCC_AVC1 = 0x61766331, FOURCC_AVCC = 0x61766343, FOURCC_BLOC = 0x626C6F63, FOURCC_CENC = 0x63656e63, FOURCC_CO64 = 0x636f3634, FOURCC_CTTS = 0x63747473, + FOURCC_DAC3 = 0x64616333, FOURCC_DASH = 0x64617368, FOURCC_DDTS = 0x64647473, FOURCC_DINF = 0x64696e66, @@ -32,7 +34,6 @@ enum FourCC { FOURCC_DTSL = 0x6474736c, FOURCC_DTSM = 0x6474732d, FOURCC_DTSP = 0x6474732b, - FOURCC_EAC3 = 0x65632d33, FOURCC_EDTS = 0x65647473, FOURCC_ELST = 0x656c7374, FOURCC_ENCA = 0x656e6361, diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index cf1b658b45..19e508c3f7 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -74,8 +74,8 @@ AudioCodec FourCCToAudioCodec(FourCC fourcc) { return kCodecDTSP; case FOURCC_DTSM: return kCodecDTSM; - case FOURCC_EAC3: - return kCodecEAC3; + case FOURCC_AC3: + return kCodecAC3; default: return kUnknownAudioCodec; } @@ -410,7 +410,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { num_channels = entry.channelcount; sampling_frequency = entry.samplerate; break; - case FOURCC_EAC3: + case FOURCC_AC3: + extra_data = entry.dac3.data; num_channels = entry.channelcount; sampling_frequency = entry.samplerate; break; diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 52764393d2..e07ddd03b0 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -63,6 +63,8 @@ FourCC AudioCodecToFourCC(AudioCodec codec) { switch (codec) { case kCodecAAC: return FOURCC_MP4A; + case kCodecAC3: + return FOURCC_AC3; case kCodecDTSC: return FOURCC_DTSC; case kCodecDTSH: @@ -261,6 +263,9 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, audio.ddts.sampling_frequency = audio_info->sampling_frequency(); audio.ddts.pcm_sample_depth = audio_info->sample_bits(); break; + case kCodecAC3: + audio.dac3.data = audio_info->extra_data(); + break; default: NOTIMPLEMENTED(); break;