From cf4a2447c13695999ba45aa8ab5b5981eae05454 Mon Sep 17 00:00:00 2001 From: Kongqun Yang Date: Mon, 9 May 2016 11:55:14 -0700 Subject: [PATCH] Add support for Opus specific box in iso-bmff This is part of the effort to support Opus in iso-bmff #83. Change-Id: Ib3678b9cb74eac76372ed83ad48ce1f203ba0c35 --- packager/media/base/fourccs.h | 3 ++ packager/media/formats/mp4/box_definitions.cc | 54 ++++++++++++++++++- packager/media/formats/mp4/box_definitions.h | 9 ++++ .../formats/mp4/box_definitions_comparison.h | 16 +++--- .../formats/mp4/box_definitions_unittest.cc | 32 +++++++++++ .../media/formats/mp4/mp4_media_parser.cc | 12 ++++- packager/media/formats/mp4/mp4_muxer.cc | 5 ++ .../media/formats/webm/webm_audio_client.cc | 2 +- 8 files changed, 123 insertions(+), 10 deletions(-) diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 2ffe4c557c..291ad1c3e8 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -14,6 +14,8 @@ enum FourCC : uint32_t { FOURCC_NULL = 0, FOURCC_ID32 = 0x49443332, + FOURCC_Head = 0x48656164, + FOURCC_Opus = 0x4f707573, FOURCC_PRIV = 0x50524956, FOURCC_aacd = 0x61616364, @@ -29,6 +31,7 @@ enum FourCC : uint32_t { FOURCC_co64 = 0x636f3634, FOURCC_ctim = 0x6374696d, FOURCC_ctts = 0x63747473, + FOURCC_dOps = 0x644f7073, FOURCC_dac3 = 0x64616333, FOURCC_dash = 0x64617368, FOURCC_ddts = 0x64647473, diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 9d4cb61e71..5d974fb93f 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1467,6 +1467,57 @@ uint32_t EC3Specific::ComputeSizeInternal() { return HeaderSize() + data.size(); } +OpusSpecific::OpusSpecific() : preskip(0) {} +OpusSpecific::~OpusSpecific() {} + +FourCC OpusSpecific::BoxType() const { return FOURCC_dOps; } + +bool OpusSpecific::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer)); + if (buffer->Reading()) { + std::vector data; + const int kMinOpusSpecificBoxDataSize = 11; + RCHECK(buffer->BytesLeft() >= kMinOpusSpecificBoxDataSize); + RCHECK(buffer->ReadWriteVector(&data, buffer->BytesLeft())); + preskip = data[2] + (data[3] << 8); + + // https://tools.ietf.org/html/draft-ietf-codec-oggopus-06#section-5 + BufferWriter writer; + writer.AppendInt(FOURCC_Opus); + writer.AppendInt(FOURCC_Head); + // The version must always be 1. + const uint8_t kOpusIdentificationHeaderVersion = 1; + data[0] = kOpusIdentificationHeaderVersion; + writer.AppendVector(data); + writer.SwapBuffer(&opus_identification_header); + } else { + // https://tools.ietf.org/html/draft-ietf-codec-oggopus-06#section-5 + // The first 8 bytes is "magic signature". + const size_t kOpusMagicSignatureSize = 8u; + DCHECK_GT(opus_identification_header.size(), kOpusMagicSignatureSize); + // https://www.opus-codec.org/docs/opus_in_isobmff.html + // The version field shall be set to 0. + const uint8_t kOpusSpecificBoxVersion = 0; + buffer->writer()->AppendInt(kOpusSpecificBoxVersion); + buffer->writer()->AppendArray( + &opus_identification_header[kOpusMagicSignatureSize + 1], + opus_identification_header.size() - kOpusMagicSignatureSize - 1); + } + return true; +} + +uint32_t OpusSpecific::ComputeSizeInternal() { + // This box is optional. Skip it if not initialized. + if (opus_identification_header.empty()) + return 0; + // https://tools.ietf.org/html/draft-ietf-codec-oggopus-06#section-5 + // The first 8 bytes is "magic signature". + const size_t kOpusMagicSignatureSize = 8u; + DCHECK_GT(opus_identification_header.size(), kOpusMagicSignatureSize); + return HeaderSize() + opus_identification_header.size() - + kOpusMagicSignatureSize; +} + AudioSampleEntry::AudioSampleEntry() : format(FOURCC_NULL), data_reference_index(1), @@ -1512,6 +1563,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->TryReadWriteChild(&ddts)); RCHECK(buffer->TryReadWriteChild(&dac3)); RCHECK(buffer->TryReadWriteChild(&dec3)); + RCHECK(buffer->TryReadWriteChild(&dops)); return true; } @@ -1519,7 +1571,7 @@ uint32_t AudioSampleEntry::ComputeSizeInternal() { return HeaderSize() + sizeof(data_reference_index) + sizeof(channelcount) + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + - dec3.ComputeSize() + + dec3.ComputeSize() + dops.ComputeSize() + 6 + 8 + // 6 + 8 bytes reserved. 4; // 4 bytes predefined. } diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index e4a5821978..d9c835cec9 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -326,6 +326,14 @@ struct EC3Specific : Box { std::vector data; }; +struct OpusSpecific : Box { + DECLARE_BOX_METHODS(OpusSpecific); + + std::vector opus_identification_header; + // The number of priming samples. Extracted from |opus_identification_header|. + uint16_t preskip; +}; + struct AudioSampleEntry : Box { DECLARE_BOX_METHODS(AudioSampleEntry); // Returns actual format of this sample entry. @@ -345,6 +353,7 @@ struct AudioSampleEntry : Box { DTSSpecific ddts; AC3Specific dac3; EC3Specific dec3; + OpusSpecific dops; }; struct WebVTTConfigurationBox : Box { diff --git a/packager/media/formats/mp4/box_definitions_comparison.h b/packager/media/formats/mp4/box_definitions_comparison.h index 21b8cc79f2..ba5c006186 100644 --- a/packager/media/formats/mp4/box_definitions_comparison.h +++ b/packager/media/formats/mp4/box_definitions_comparison.h @@ -234,8 +234,7 @@ inline bool operator==(const ElementaryStreamDescriptor& lhs, return lhs.es_descriptor == rhs.es_descriptor; } -inline bool operator==(const DTSSpecific& lhs, - const DTSSpecific& rhs) { +inline bool operator==(const DTSSpecific& lhs, const DTSSpecific& rhs) { return lhs.sampling_frequency == rhs.sampling_frequency && lhs.max_bitrate == rhs.max_bitrate && lhs.avg_bitrate == rhs.avg_bitrate && @@ -243,16 +242,19 @@ inline bool operator==(const DTSSpecific& lhs, lhs.extra_data == rhs.extra_data; } -inline bool operator==(const AC3Specific& lhs, - const AC3Specific& rhs) { +inline bool operator==(const AC3Specific& lhs, const AC3Specific& rhs) { return lhs.data == rhs.data; } -inline bool operator==(const EC3Specific& lhs, - const EC3Specific& rhs) { +inline bool operator==(const EC3Specific& lhs, const EC3Specific& rhs) { return lhs.data == rhs.data; } +inline bool operator==(const OpusSpecific& lhs, const OpusSpecific& rhs) { + return lhs.opus_identification_header == rhs.opus_identification_header && + lhs.preskip == rhs.preskip; +} + inline bool operator==(const AudioSampleEntry& lhs, const AudioSampleEntry& rhs) { return lhs.format == rhs.format && @@ -260,7 +262,7 @@ inline bool operator==(const AudioSampleEntry& lhs, lhs.channelcount == rhs.channelcount && lhs.samplesize == rhs.samplesize && lhs.samplerate == rhs.samplerate && lhs.sinf == rhs.sinf && lhs.esds == rhs.esds && lhs.ddts == rhs.ddts && - lhs.dac3 == rhs.dac3 && lhs.dec3 == rhs.dec3; + lhs.dac3 == rhs.dac3 && lhs.dec3 == rhs.dec3 && lhs.dops == rhs.dops; } inline bool operator==(const WebVTTConfigurationBox& lhs, diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index b609940c12..e5169aad46 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -417,6 +417,21 @@ class BoxDefinitionsTestGeneral : public testing::Test { dec3->data.assign(kEc3Data, kEc3Data + arraysize(kEc3Data)); } + void Fill(OpusSpecific* dops) { + const uint8_t kOpusIdentificationHeader[] = { + 0x4f, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64, 0x01, 0x02, + 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10, 0x00}; + dops->opus_identification_header.assign( + kOpusIdentificationHeader, + kOpusIdentificationHeader + arraysize(kOpusIdentificationHeader)); + dops->preskip = 0x0403; + } + + void Modify(OpusSpecific* dops) { + dops->opus_identification_header.resize( + dops->opus_identification_header.size() - 1); + } + void Fill(AudioSampleEntry* enca) { enca->format = FOURCC_enca; enca->data_reference_index = 2; @@ -945,6 +960,7 @@ class BoxDefinitionsTestGeneral : public testing::Test { bool IsOptional(const CueTimeBox* box) { return true; } bool IsOptional(const CueSettingsBox* box) { return true; } bool IsOptional(const DTSSpecific* box) {return true; } + bool IsOptional(const OpusSpecific* box) {return true; } protected: scoped_ptr buffer_; @@ -976,6 +992,7 @@ typedef testing::Typesbuffer_.get()); + + AudioSampleEntry entry_readback; + ASSERT_TRUE(ReadBack(&entry_readback)); + ASSERT_EQ(entry, entry_readback); +} + TEST_F(BoxDefinitionsTest, CompactSampleSize_FieldSize16) { CompactSampleSize stz2; stz2.field_size = 16; diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 761061a2d9..c30c317cec 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -84,6 +84,7 @@ AudioCodec FourCCToAudioCodec(FourCC fourcc) { // Default DTS audio number of channels for 5.1 channel layout. const uint8_t kDtsAudioNumChannels = 6; +const uint64_t kNanosecondsPerSecond = 1000000000ull; } // namespace @@ -344,6 +345,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { AudioCodec codec = FourCCToAudioCodec(actual_format); uint8_t num_channels = 0; uint32_t sampling_frequency = 0; + uint64_t codec_delay_ns = 0; uint8_t audio_object_type = 0; uint32_t max_bitrate = 0; uint32_t avg_bitrate = 0; @@ -424,6 +426,14 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { num_channels = entry.channelcount; sampling_frequency = entry.samplerate; break; + case FOURCC_Opus: + extra_data = entry.dops.opus_identification_header; + num_channels = entry.channelcount; + sampling_frequency = entry.samplerate; + RCHECK(sampling_frequency != 0); + codec_delay_ns = + entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency; + break; default: LOG(ERROR) << "Unsupported audio format 0x" << std::hex << actual_format << " in stsd box."; @@ -444,7 +454,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { num_channels, sampling_frequency, 0 /* seek preroll */, - 0 /* codec delay */, + codec_delay_ns, max_bitrate, avg_bitrate, extra_data.data(), diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 723884e2f7..fed1e2d568 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -77,6 +77,8 @@ FourCC AudioCodecToFourCC(AudioCodec codec) { return FOURCC_dtsm; case kCodecEAC3: return FOURCC_ec_3; + case kCodecOpus: + return FOURCC_Opus; default: return FOURCC_NULL; } @@ -274,6 +276,9 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, case kCodecEAC3: audio.dec3.data = audio_info->extra_data(); break; + case kCodecOpus: + audio.dops.opus_identification_header = audio_info->extra_data(); + break; default: NOTIMPLEMENTED(); break; diff --git a/packager/media/formats/webm/webm_audio_client.cc b/packager/media/formats/webm/webm_audio_client.cc index 693de793b7..3475c6bad4 100644 --- a/packager/media/formats/webm/webm_audio_client.cc +++ b/packager/media/formats/webm/webm_audio_client.cc @@ -67,7 +67,7 @@ scoped_refptr WebMAudioClient::GetAudioStreamInfo( extra_data_size = codec_private.size(); } - const uint32_t kSampleSizeInBits = 4u; + const uint8_t kSampleSizeInBits = 16u; return scoped_refptr(new AudioStreamInfo( track_num, kWebMTimeScale, 0, audio_codec, AudioStreamInfo::GetCodecString(audio_codec, 0), language,