From f018c9a9bf6d936f3434ea32fb6a9ee42ae12b4b Mon Sep 17 00:00:00 2001 From: nvincen <64548874+nvincen@users.noreply.github.com> Date: Tue, 29 Jun 2021 23:10:53 -0700 Subject: [PATCH] Added MPEG-H support (mha1, mhm1) Implemented according to `Audio Amendment to Guidelines for Implementation: DASH-IF Interoperability Points, Version 4.3` (https://dashif.org/docs/Audio%20Amendment%20to%20DASH%20IOP%204.3.pdf). Closes #930. --- AUTHORS | 1 + CONTRIBUTORS | 1 + README.md | 1 + packager/media/base/audio_stream_info.cc | 22 ++++++++++++++++ packager/media/base/fourccs.h | 3 +++ packager/media/base/stream_info.h | 2 ++ packager/media/formats/mp4/box_definitions.cc | 26 ++++++++++++++++++- packager/media/formats/mp4/box_definitions.h | 8 ++++++ .../formats/mp4/box_definitions_unittest.cc | 15 +++++++++++ .../media/formats/mp4/mp4_media_parser.cc | 9 +++++++ packager/media/formats/mp4/mp4_muxer.cc | 8 ++++++ 11 files changed, 95 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index fa5959d1e7..6cf584a1f2 100644 --- a/AUTHORS +++ b/AUTHORS @@ -15,6 +15,7 @@ 3Q GmbH <*@3qsdn.com> Alen Vrecko +Amazon Music <*@amazon.com> Anders Hasselqvist Audible <*@audible.com> Chun-da Chen diff --git a/CONTRIBUTORS b/CONTRIBUTORS index bc24fa1476..92f89f7969 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -48,4 +48,5 @@ Sanil Raut Sergio Ammirata Thomas Inskip Tim Lansen +Vincent Nguyen Weiguo Shao diff --git a/README.md b/README.md index 4e6617442d..2ebf3e4c36 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Shaka Packager supports: | MP3 | O | - | I / O | - | O | | Dolby AC3 | I / O | - | I / O | - | O | | Dolby EAC3 | I / O | - | O | - | O | + | MPEG-H Audio | I / O | - | - | - | - | | Dolby AC4 | I / O | - | - | - | - | | DTS | I / O | - | - | - | - | | FLAC | I / O | - | - | - | - | diff --git a/packager/media/base/audio_stream_info.cc b/packager/media/base/audio_stream_info.cc index 349ebd3f14..50bf262f6d 100644 --- a/packager/media/base/audio_stream_info.cc +++ b/packager/media/base/audio_stream_info.cc @@ -52,6 +52,17 @@ std::string AudioCodecToString(Codec codec) { return "UnknownCodec"; } } + +FourCC CodecToFourCC(Codec codec) { + switch (codec) { + case kCodecMha1: + return FOURCC_mha1; + case kCodecMhm1: + return FOURCC_mhm1; + default: + return FOURCC_NULL; + } +} } // namespace AudioStreamInfo::AudioStreamInfo( @@ -139,6 +150,17 @@ std::string AudioStreamInfo::GetCodecString(Codec codec, return "mp3"; case kCodecVorbis: return "vorbis"; + case kCodecMha1: + case kCodecMhm1: + // The signalling of the codecs parameters is according to RFC6381 [11] + // and ISO/IEC 23008-3 clause 21 [7]. + // The value consists of the following two parts separated by a dot: + // - the sample entry 4CC code ('mha1', 'mha2', 'mhm1', 'mhm2') + // - ‘0x’ followed by the hex value of the profile-levelid, as defined + // in in ISO/IEC 23008-3 [7] + return base::StringPrintf("%s.0x%02x", + FourCCToString(CodecToFourCC(codec)).c_str(), + audio_object_type); default: NOTIMPLEMENTED() << "Codec: " << codec; return "unknown"; diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 5fb11fb20d..8be50c1b6b 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -89,6 +89,9 @@ enum FourCC : uint32_t { FOURCC_meta = 0x6d657461, FOURCC_mfhd = 0x6d666864, FOURCC_mfra = 0x6d667261, + FOURCC_mha1 = 0x6d686131, + FOURCC_mhaC = 0x6d686143, + FOURCC_mhm1 = 0x6d686d31, FOURCC_minf = 0x6d696e66, FOURCC_moof = 0x6d6f6f66, FOURCC_moov = 0x6d6f6f76, diff --git a/packager/media/base/stream_info.h b/packager/media/base/stream_info.h index a513ec7764..e1f8e81935 100644 --- a/packager/media/base/stream_info.h +++ b/packager/media/base/stream_info.h @@ -54,6 +54,8 @@ enum Codec { kCodecOpus, kCodecVorbis, kCodecMP3, + kCodecMha1, + kCodecMhm1, kCodecAudioMaxPlusOne, kCodecText = 300, diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index cdf942a47d..166727709b 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1709,6 +1709,29 @@ size_t ElementaryStreamDescriptor::ComputeSizeInternal() { return HeaderSize() + es_descriptor.ComputeSize(); } +MHAConfiguration::MHAConfiguration() = default; +MHAConfiguration::~MHAConfiguration() = default; + +FourCC MHAConfiguration::BoxType() const { + return FOURCC_mhaC; +} + +bool MHAConfiguration::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer) && + buffer->ReadWriteVector( + &data, buffer->Reading() ? buffer->BytesLeft() : data.size())); + RCHECK(data.size() > 1); + mpeg_h_3da_profile_level_indication = data[1]; + return true; +} + +size_t MHAConfiguration::ComputeSizeInternal() { + // This box is optional. Skip it if not initialized. + if (data.empty()) + return 0; + return HeaderSize() + data.size(); +} + DTSSpecific::DTSSpecific() = default; DTSSpecific::~DTSSpecific() = default; ; @@ -1922,6 +1945,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->TryReadWriteChild(&dac4)); RCHECK(buffer->TryReadWriteChild(&dops)); RCHECK(buffer->TryReadWriteChild(&dfla)); + RCHECK(buffer->TryReadWriteChild(&mhac)); // Somehow Edge does not support having sinf box before codec_configuration, // box, so just do it in the end of AudioSampleEntry. See @@ -1947,7 +1971,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() { sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() + - dac4.ComputeSize() + + dac4.ComputeSize() + mhac.ComputeSize() + // Reserved and predefined bytes. 6 + 8 + // 6 + 8 bytes reserved. 4; // 4 bytes predefined. diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index b4494ef178..989d49c8af 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -328,6 +328,13 @@ struct AC3Specific : Box { std::vector data; }; +struct MHAConfiguration : Box { + DECLARE_BOX_METHODS(MHAConfiguration); + + std::vector data; + uint8_t mpeg_h_3da_profile_level_indication; +}; + struct EC3Specific : Box { DECLARE_BOX_METHODS(EC3Specific); @@ -382,6 +389,7 @@ struct AudioSampleEntry : Box { AC4Specific dac4; OpusSpecific dops; FlacSpecific dfla; + MHAConfiguration mhac; }; struct WebVTTConfigurationBox : Box { diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index 19526922ae..075e490779 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -1222,6 +1222,21 @@ TEST_F(BoxDefinitionsTest, AC3SampleEntry) { ASSERT_EQ(entry, entry_readback); } +TEST_F(BoxDefinitionsTest, MHA1SampleEntry) { + AudioSampleEntry entry; + entry.format = FOURCC_mha1; + entry.data_reference_index = 2; + entry.channelcount = 5; + entry.samplesize = 16; + entry.samplerate = 44100; + Fill(&entry.mhac); + entry.Write(this->buffer_.get()); + + AudioSampleEntry entry_readback; + ASSERT_TRUE(ReadBack(&entry_readback)); + ASSERT_EQ(entry, entry_readback); +} + TEST_F(BoxDefinitionsTest, EC3SampleEntry) { AudioSampleEntry entry; entry.format = FOURCC_ec_3; diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 45bd369b30..e22b91a9b6 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -99,6 +99,10 @@ Codec FourCCToCodec(FourCC fourcc) { return kCodecAC4; case FOURCC_fLaC: return kCodecFlac; + case FOURCC_mha1: + return kCodecMha1; + case FOURCC_mhm1: + return kCodecMhm1; default: return kUnknownCodec; } @@ -510,6 +514,11 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { codec_delay_ns = entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency; break; + case FOURCC_mha1: + case FOURCC_mhm1: + codec_config = entry.mhac.data; + audio_object_type = entry.mhac.mpeg_h_3da_profile_level_indication; + break; default: // Intentionally not to fail in the parser as there may be multiple // streams in the source content, which allows the supported stream to diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 23ca8273ca..3de400d38b 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -90,6 +90,10 @@ FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) { return FOURCC_fLaC; case kCodecOpus: return FOURCC_Opus; + case kCodecMha1: + return FOURCC_mha1; + case kCodecMhm1: + return FOURCC_mhm1; default: return FOURCC_NULL; } @@ -513,6 +517,10 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, case kCodecOpus: audio.dops.opus_identification_header = audio_info->codec_config(); break; + case kCodecMha1: + case kCodecMhm1: + audio.mhac.data = audio_info->codec_config(); + break; default: NOTIMPLEMENTED() << " Unsupported audio codec " << audio_info->codec(); return false;