Added MPEG-H support (mha1, mhm1)

Implemented according to `Audio Amendment to Guidelines for Implementation:
DASH-IF Interoperability Points, Version 4.3`
(https://dashif.org/docs/Audio%20Amendment%20to%20DASH%20IOP%204.3.pdf).

Closes #930.
This commit is contained in:
nvincen 2021-06-29 23:10:53 -07:00 committed by GitHub
parent b7ef11fa70
commit f018c9a9bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 95 additions and 1 deletions

View File

@ -15,6 +15,7 @@
3Q GmbH <*@3qsdn.com> 3Q GmbH <*@3qsdn.com>
Alen Vrecko <alen.vrecko@gmail.com> Alen Vrecko <alen.vrecko@gmail.com>
Amazon Music <*@amazon.com>
Anders Hasselqvist <anders.hasselqvist@gmail.com> Anders Hasselqvist <anders.hasselqvist@gmail.com>
Audible <*@audible.com> Audible <*@audible.com>
Chun-da Chen <capitalm.c@gmail.com> Chun-da Chen <capitalm.c@gmail.com>

View File

@ -48,4 +48,5 @@ Sanil Raut <sr1990003@gmail.com>
Sergio Ammirata <sergio@ammirata.net> Sergio Ammirata <sergio@ammirata.net>
Thomas Inskip <tinskip@google.com> Thomas Inskip <tinskip@google.com>
Tim Lansen <tim.lansen@gmail.com> Tim Lansen <tim.lansen@gmail.com>
Vincent Nguyen <nvincen@amazon.com>
Weiguo Shao <weiguo.shao@dolby.com> Weiguo Shao <weiguo.shao@dolby.com>

View File

@ -43,6 +43,7 @@ Shaka Packager supports:
| MP3 | O | - | I / O | - | O | | MP3 | O | - | I / O | - | O |
| Dolby AC3 | I / O | - | I / O | - | O | | Dolby AC3 | I / O | - | I / O | - | O |
| Dolby EAC3 | I / O | - | O | - | O | | Dolby EAC3 | I / O | - | O | - | O |
| MPEG-H Audio | I / O | - | - | - | - |
| Dolby AC4 | I / O | - | - | - | - | | Dolby AC4 | I / O | - | - | - | - |
| DTS | I / O | - | - | - | - | | DTS | I / O | - | - | - | - |
| FLAC | I / O | - | - | - | - | | FLAC | I / O | - | - | - | - |

View File

@ -52,6 +52,17 @@ std::string AudioCodecToString(Codec codec) {
return "UnknownCodec"; return "UnknownCodec";
} }
} }
FourCC CodecToFourCC(Codec codec) {
switch (codec) {
case kCodecMha1:
return FOURCC_mha1;
case kCodecMhm1:
return FOURCC_mhm1;
default:
return FOURCC_NULL;
}
}
} // namespace } // namespace
AudioStreamInfo::AudioStreamInfo( AudioStreamInfo::AudioStreamInfo(
@ -139,6 +150,17 @@ std::string AudioStreamInfo::GetCodecString(Codec codec,
return "mp3"; return "mp3";
case kCodecVorbis: case kCodecVorbis:
return "vorbis"; return "vorbis";
case kCodecMha1:
case kCodecMhm1:
// The signalling of the codecs parameters is according to RFC6381 [11]
// and ISO/IEC 23008-3 clause 21 [7].
// The value consists of the following two parts separated by a dot:
// - the sample entry 4CC code ('mha1', 'mha2', 'mhm1', 'mhm2')
// - 0x followed by the hex value of the profile-levelid, as defined
// in in ISO/IEC 23008-3 [7]
return base::StringPrintf("%s.0x%02x",
FourCCToString(CodecToFourCC(codec)).c_str(),
audio_object_type);
default: default:
NOTIMPLEMENTED() << "Codec: " << codec; NOTIMPLEMENTED() << "Codec: " << codec;
return "unknown"; return "unknown";

View File

@ -89,6 +89,9 @@ enum FourCC : uint32_t {
FOURCC_meta = 0x6d657461, FOURCC_meta = 0x6d657461,
FOURCC_mfhd = 0x6d666864, FOURCC_mfhd = 0x6d666864,
FOURCC_mfra = 0x6d667261, FOURCC_mfra = 0x6d667261,
FOURCC_mha1 = 0x6d686131,
FOURCC_mhaC = 0x6d686143,
FOURCC_mhm1 = 0x6d686d31,
FOURCC_minf = 0x6d696e66, FOURCC_minf = 0x6d696e66,
FOURCC_moof = 0x6d6f6f66, FOURCC_moof = 0x6d6f6f66,
FOURCC_moov = 0x6d6f6f76, FOURCC_moov = 0x6d6f6f76,

View File

@ -54,6 +54,8 @@ enum Codec {
kCodecOpus, kCodecOpus,
kCodecVorbis, kCodecVorbis,
kCodecMP3, kCodecMP3,
kCodecMha1,
kCodecMhm1,
kCodecAudioMaxPlusOne, kCodecAudioMaxPlusOne,
kCodecText = 300, kCodecText = 300,

View File

@ -1709,6 +1709,29 @@ size_t ElementaryStreamDescriptor::ComputeSizeInternal() {
return HeaderSize() + es_descriptor.ComputeSize(); return HeaderSize() + es_descriptor.ComputeSize();
} }
MHAConfiguration::MHAConfiguration() = default;
MHAConfiguration::~MHAConfiguration() = default;
FourCC MHAConfiguration::BoxType() const {
return FOURCC_mhaC;
}
bool MHAConfiguration::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer) &&
buffer->ReadWriteVector(
&data, buffer->Reading() ? buffer->BytesLeft() : data.size()));
RCHECK(data.size() > 1);
mpeg_h_3da_profile_level_indication = data[1];
return true;
}
size_t MHAConfiguration::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.empty())
return 0;
return HeaderSize() + data.size();
}
DTSSpecific::DTSSpecific() = default; DTSSpecific::DTSSpecific() = default;
DTSSpecific::~DTSSpecific() = default; DTSSpecific::~DTSSpecific() = default;
; ;
@ -1922,6 +1945,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(buffer->TryReadWriteChild(&dac4)); RCHECK(buffer->TryReadWriteChild(&dac4));
RCHECK(buffer->TryReadWriteChild(&dops)); RCHECK(buffer->TryReadWriteChild(&dops));
RCHECK(buffer->TryReadWriteChild(&dfla)); RCHECK(buffer->TryReadWriteChild(&dfla));
RCHECK(buffer->TryReadWriteChild(&mhac));
// Somehow Edge does not support having sinf box before codec_configuration, // Somehow Edge does not support having sinf box before codec_configuration,
// box, so just do it in the end of AudioSampleEntry. See // box, so just do it in the end of AudioSampleEntry. See
@ -1947,7 +1971,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() {
sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() +
esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() +
dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() + dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() +
dac4.ComputeSize() + dac4.ComputeSize() + mhac.ComputeSize() +
// Reserved and predefined bytes. // Reserved and predefined bytes.
6 + 8 + // 6 + 8 bytes reserved. 6 + 8 + // 6 + 8 bytes reserved.
4; // 4 bytes predefined. 4; // 4 bytes predefined.

View File

@ -328,6 +328,13 @@ struct AC3Specific : Box {
std::vector<uint8_t> data; std::vector<uint8_t> data;
}; };
struct MHAConfiguration : Box {
DECLARE_BOX_METHODS(MHAConfiguration);
std::vector<uint8_t> data;
uint8_t mpeg_h_3da_profile_level_indication;
};
struct EC3Specific : Box { struct EC3Specific : Box {
DECLARE_BOX_METHODS(EC3Specific); DECLARE_BOX_METHODS(EC3Specific);
@ -382,6 +389,7 @@ struct AudioSampleEntry : Box {
AC4Specific dac4; AC4Specific dac4;
OpusSpecific dops; OpusSpecific dops;
FlacSpecific dfla; FlacSpecific dfla;
MHAConfiguration mhac;
}; };
struct WebVTTConfigurationBox : Box { struct WebVTTConfigurationBox : Box {

View File

@ -1222,6 +1222,21 @@ TEST_F(BoxDefinitionsTest, AC3SampleEntry) {
ASSERT_EQ(entry, entry_readback); ASSERT_EQ(entry, entry_readback);
} }
TEST_F(BoxDefinitionsTest, MHA1SampleEntry) {
AudioSampleEntry entry;
entry.format = FOURCC_mha1;
entry.data_reference_index = 2;
entry.channelcount = 5;
entry.samplesize = 16;
entry.samplerate = 44100;
Fill(&entry.mhac);
entry.Write(this->buffer_.get());
AudioSampleEntry entry_readback;
ASSERT_TRUE(ReadBack(&entry_readback));
ASSERT_EQ(entry, entry_readback);
}
TEST_F(BoxDefinitionsTest, EC3SampleEntry) { TEST_F(BoxDefinitionsTest, EC3SampleEntry) {
AudioSampleEntry entry; AudioSampleEntry entry;
entry.format = FOURCC_ec_3; entry.format = FOURCC_ec_3;

View File

@ -99,6 +99,10 @@ Codec FourCCToCodec(FourCC fourcc) {
return kCodecAC4; return kCodecAC4;
case FOURCC_fLaC: case FOURCC_fLaC:
return kCodecFlac; return kCodecFlac;
case FOURCC_mha1:
return kCodecMha1;
case FOURCC_mhm1:
return kCodecMhm1;
default: default:
return kUnknownCodec; return kUnknownCodec;
} }
@ -510,6 +514,11 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
codec_delay_ns = codec_delay_ns =
entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency; entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
break; break;
case FOURCC_mha1:
case FOURCC_mhm1:
codec_config = entry.mhac.data;
audio_object_type = entry.mhac.mpeg_h_3da_profile_level_indication;
break;
default: default:
// Intentionally not to fail in the parser as there may be multiple // Intentionally not to fail in the parser as there may be multiple
// streams in the source content, which allows the supported stream to // streams in the source content, which allows the supported stream to

View File

@ -90,6 +90,10 @@ FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
return FOURCC_fLaC; return FOURCC_fLaC;
case kCodecOpus: case kCodecOpus:
return FOURCC_Opus; return FOURCC_Opus;
case kCodecMha1:
return FOURCC_mha1;
case kCodecMhm1:
return FOURCC_mhm1;
default: default:
return FOURCC_NULL; return FOURCC_NULL;
} }
@ -513,6 +517,10 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
case kCodecOpus: case kCodecOpus:
audio.dops.opus_identification_header = audio_info->codec_config(); audio.dops.opus_identification_header = audio_info->codec_config();
break; break;
case kCodecMha1:
case kCodecMhm1:
audio.mhac.data = audio_info->codec_config();
break;
default: default:
NOTIMPLEMENTED() << " Unsupported audio codec " << audio_info->codec(); NOTIMPLEMENTED() << " Unsupported audio codec " << audio_info->codec();
return false; return false;