Add support for FLAC codec

Implemented according to spec:
  https://github.com/xiph/flac/blob/master/doc/isoflac.txt

Closes #345.

Change-Id: If2e277d8eac4baff5965faa0e13c44c334f6184d
This commit is contained in:
KongQun Yang 2018-04-25 11:27:53 -07:00
parent cb2ec22a06
commit b6f0da246f
15 changed files with 125 additions and 13 deletions

View File

@ -32,6 +32,7 @@ Shaka Packager supports:
| AAC | I / O | - | I / O | I | | AAC | I / O | - | I / O | I |
| Dolby AC3/EAC3 | I / O | - | I | - | | Dolby AC3/EAC3 | I / O | - | I | - |
| DTS | I / O | - | - | - | | DTS | I / O | - | - | - |
| FLAC | I / O | - | - | - |
| Opus | *I / O* | I / O | - | - | | Opus | *I / O* | I / O | - | - |
| Vorbis | - | I / O | - | - | | Vorbis | - | I / O | - | - |

View File

@ -1071,6 +1071,17 @@ class PackagerFunctionalTest(PackagerAppTest):
self._VerifyDecryption(self.output[0], 'bear-320x240-opus-golden.mp4') self._VerifyDecryption(self.output[0], 'bear-320x240-opus-golden.mp4')
self._VerifyDecryption(self.output[1], 'bear-320x240-vp9-golden.mp4') self._VerifyDecryption(self.output[1], 'bear-320x240-vp9-golden.mp4')
def testPackageFlacWithEncryption(self):
streams = [
self._GetStream(
'audio', output_format='mp4', test_file='bear-flac.mp4'),
]
flags = self._GetFlags(encryption=True)
self.assertPackageSuccess(streams, flags)
self._CheckTestResults('flac-with-encryption')
self._VerifyDecryption(self.output[0], 'bear-flac-golden.mp4')
def testPackageWvmInput(self): def testPackageWvmInput(self):
self.encryption_key = '9248d245390e0a49d483ba9b43fc69c3' self.encryption_key = '9248d245390e0a49d483ba9b43fc69c3'
self.assertPackageSuccess( self.assertPackageSuccess(

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.739954710006714S">
<Period id="0">
<AdaptationSet id="0" contentType="audio" subsegmentAlignment="true">
<ContentProtection value="cenc" schemeIdUri="urn:mpeg:dash:mp4protection:2011" cenc:default_KID="31323334-3536-3738-3930-313233343536"/>
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="0" bandwidth="672924" codecs="flac" mimeType="audio/mp4" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>bear-flac-audio.mp4</BaseURL>
<SegmentBase indexRange="983-1050" timescale="44100">
<Initialization range="0-982"/>
</SegmentBase>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -37,6 +37,8 @@ std::string AudioCodecToString(Codec codec) {
return "DTS+"; return "DTS+";
case kCodecEAC3: case kCodecEAC3:
return "EAC3"; return "EAC3";
case kCodecFlac:
return "FLAC";
case kCodecOpus: case kCodecOpus:
return "Opus"; return "Opus";
case kCodecVorbis: case kCodecVorbis:
@ -99,28 +101,30 @@ std::unique_ptr<StreamInfo> AudioStreamInfo::Clone() const {
std::string AudioStreamInfo::GetCodecString(Codec codec, std::string AudioStreamInfo::GetCodecString(Codec codec,
uint8_t audio_object_type) { uint8_t audio_object_type) {
switch (codec) { switch (codec) {
case kCodecVorbis:
return "vorbis";
case kCodecOpus:
return "opus";
case kCodecAAC: case kCodecAAC:
return "mp4a.40." + base::UintToString(audio_object_type); return "mp4a.40." + base::UintToString(audio_object_type);
case kCodecAC3:
return "ac-3";
case kCodecDTSC: case kCodecDTSC:
return "dtsc"; return "dtsc";
case kCodecDTSE:
return "dtse";
case kCodecDTSH: case kCodecDTSH:
return "dtsh"; return "dtsh";
case kCodecDTSL: case kCodecDTSL:
return "dtsl"; return "dtsl";
case kCodecDTSE:
return "dtse";
case kCodecDTSP:
return "dts+";
case kCodecDTSM: case kCodecDTSM:
return "dts-"; return "dts-";
case kCodecAC3: case kCodecDTSP:
return "ac-3"; return "dts+";
case kCodecEAC3: case kCodecEAC3:
return "ec-3"; return "ec-3";
case kCodecFlac:
return "flac";
case kCodecOpus:
return "opus";
case kCodecVorbis:
return "vorbis";
default: default:
NOTIMPLEMENTED() << "Codec: " << codec; NOTIMPLEMENTED() << "Codec: " << codec;
return "unknown"; return "unknown";

View File

@ -42,6 +42,7 @@ enum FourCC : uint32_t {
FOURCC_dash = 0x64617368, FOURCC_dash = 0x64617368,
FOURCC_ddts = 0x64647473, FOURCC_ddts = 0x64647473,
FOURCC_dec3 = 0x64656333, FOURCC_dec3 = 0x64656333,
FOURCC_dfLa = 0x64664c61,
FOURCC_dinf = 0x64696e66, FOURCC_dinf = 0x64696e66,
FOURCC_dref = 0x64726566, FOURCC_dref = 0x64726566,
FOURCC_dtsc = 0x64747363, FOURCC_dtsc = 0x64747363,
@ -57,6 +58,7 @@ enum FourCC : uint32_t {
FOURCC_enca = 0x656e6361, FOURCC_enca = 0x656e6361,
FOURCC_encv = 0x656e6376, FOURCC_encv = 0x656e6376,
FOURCC_esds = 0x65736473, FOURCC_esds = 0x65736473,
FOURCC_fLaC = 0x664c6143,
FOURCC_free = 0x66726565, FOURCC_free = 0x66726565,
FOURCC_frma = 0x66726d61, FOURCC_frma = 0x66726d61,
FOURCC_ftyp = 0x66747970, FOURCC_ftyp = 0x66747970,

View File

@ -46,6 +46,7 @@ enum Codec {
kCodecDTSM, kCodecDTSM,
kCodecDTSP, kCodecDTSP,
kCodecEAC3, kCodecEAC3,
kCodecFlac,
kCodecOpus, kCodecOpus,
kCodecVorbis, kCodecVorbis,
kCodecAudioMaxPlusOne, kCodecAudioMaxPlusOne,

View File

@ -1774,6 +1774,27 @@ size_t OpusSpecific::ComputeSizeInternal() {
kOpusMagicSignatureSize; kOpusMagicSignatureSize;
} }
FlacSpecific::FlacSpecific() {}
FlacSpecific::~FlacSpecific() {}
FourCC FlacSpecific::BoxType() const {
return FOURCC_dfLa;
}
bool FlacSpecific::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer));
size_t size = buffer->Reading() ? buffer->BytesLeft() : data.size();
RCHECK(buffer->ReadWriteVector(&data, size));
return true;
}
size_t FlacSpecific::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.empty())
return 0;
return HeaderSize() + data.size();
}
AudioSampleEntry::AudioSampleEntry() AudioSampleEntry::AudioSampleEntry()
: format(FOURCC_NULL), : format(FOURCC_NULL),
data_reference_index(1), data_reference_index(1),
@ -1818,6 +1839,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(buffer->TryReadWriteChild(&dac3)); RCHECK(buffer->TryReadWriteChild(&dac3));
RCHECK(buffer->TryReadWriteChild(&dec3)); RCHECK(buffer->TryReadWriteChild(&dec3));
RCHECK(buffer->TryReadWriteChild(&dops)); RCHECK(buffer->TryReadWriteChild(&dops));
RCHECK(buffer->TryReadWriteChild(&dfla));
// Somehow Edge does not support having sinf box before codec_configuration, // Somehow Edge does not support having sinf box before codec_configuration,
// box, so just do it in the end of AudioSampleEntry. See // box, so just do it in the end of AudioSampleEntry. See
@ -1842,7 +1864,8 @@ size_t AudioSampleEntry::ComputeSizeInternal() {
return HeaderSize() + sizeof(data_reference_index) + sizeof(channelcount) + return HeaderSize() + sizeof(data_reference_index) + sizeof(channelcount) +
sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() +
esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() +
dec3.ComputeSize() + dops.ComputeSize() + dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() +
// Reserved and predefined bytes.
6 + 8 + // 6 + 8 bytes reserved. 6 + 8 + // 6 + 8 bytes reserved.
4; // 4 bytes predefined. 4; // 4 bytes predefined.
} }

View File

@ -342,6 +342,15 @@ struct OpusSpecific : Box {
uint16_t preskip; uint16_t preskip;
}; };
// FLAC specific decoder configuration box:
// https://github.com/xiph/flac/blob/master/doc/isoflac.txt
// We do not care about the actual data inside, which is simply copied over.
struct FlacSpecific : FullBox {
DECLARE_BOX_METHODS(FlacSpecific);
std::vector<uint8_t> data;
};
struct AudioSampleEntry : Box { struct AudioSampleEntry : Box {
DECLARE_BOX_METHODS(AudioSampleEntry); DECLARE_BOX_METHODS(AudioSampleEntry);
// Returns actual format of this sample entry. // Returns actual format of this sample entry.
@ -362,6 +371,7 @@ struct AudioSampleEntry : Box {
AC3Specific dac3; AC3Specific dac3;
EC3Specific dec3; EC3Specific dec3;
OpusSpecific dops; OpusSpecific dops;
FlacSpecific dfla;
}; };
struct WebVTTConfigurationBox : Box { struct WebVTTConfigurationBox : Box {

View File

@ -434,6 +434,16 @@ class BoxDefinitionsTestGeneral : public testing::Test {
dops->opus_identification_header.size() - 1); dops->opus_identification_header.size() - 1);
} }
void Fill(FlacSpecific* dfla) {
const uint8_t kFlacData[] = {0x50, 0x11, 0x60};
dfla->data.assign(std::begin(kFlacData), std::end(kFlacData));
}
void Modify(FlacSpecific* dfla) {
const uint8_t kFlacData[] = {0x50, 0x11, 0x40};
dfla->data.assign(std::begin(kFlacData), std::end(kFlacData));
}
void Fill(AudioSampleEntry* enca) { void Fill(AudioSampleEntry* enca) {
enca->format = FOURCC_enca; enca->format = FOURCC_enca;
enca->data_reference_index = 2; enca->data_reference_index = 2;
@ -1234,6 +1244,21 @@ TEST_F(BoxDefinitionsTest, OpusSampleEntry) {
ASSERT_EQ(entry, entry_readback); ASSERT_EQ(entry, entry_readback);
} }
TEST_F(BoxDefinitionsTest, FlacSampleEntry) {
AudioSampleEntry entry;
entry.format = FOURCC_fLaC;
entry.data_reference_index = 2;
entry.channelcount = 5;
entry.samplesize = 16;
entry.samplerate = 44100;
Fill(&entry.dfla);
entry.Write(this->buffer_.get());
AudioSampleEntry entry_readback;
ASSERT_TRUE(ReadBack(&entry_readback));
ASSERT_EQ(entry, entry_readback);
}
TEST_F(BoxDefinitionsTest, CompactSampleSize_FieldSize16) { TEST_F(BoxDefinitionsTest, CompactSampleSize_FieldSize16) {
CompactSampleSize stz2; CompactSampleSize stz2;
stz2.field_size = 16; stz2.field_size = 16;

View File

@ -89,6 +89,8 @@ Codec FourCCToCodec(FourCC fourcc) {
return kCodecAC3; return kCodecAC3;
case FOURCC_ec_3: case FOURCC_ec_3:
return kCodecEAC3; return kCodecEAC3;
case FOURCC_fLaC:
return kCodecFlac;
default: default:
return kUnknownCodec; return kUnknownCodec;
} }
@ -425,12 +427,12 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
break; break;
case FOURCC_dtsc: case FOURCC_dtsc:
FALLTHROUGH_INTENDED; FALLTHROUGH_INTENDED;
case FOURCC_dtse:
FALLTHROUGH_INTENDED;
case FOURCC_dtsh: case FOURCC_dtsh:
FALLTHROUGH_INTENDED; FALLTHROUGH_INTENDED;
case FOURCC_dtsl: case FOURCC_dtsl:
FALLTHROUGH_INTENDED; FALLTHROUGH_INTENDED;
case FOURCC_dtse:
FALLTHROUGH_INTENDED;
case FOURCC_dtsm: case FOURCC_dtsm:
codec_config = entry.ddts.extra_data; codec_config = entry.ddts.extra_data;
max_bitrate = entry.ddts.max_bitrate; max_bitrate = entry.ddts.max_bitrate;
@ -448,6 +450,11 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config)); num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
sampling_frequency = entry.samplerate; sampling_frequency = entry.samplerate;
break; break;
case FOURCC_fLaC:
codec_config = entry.dfla.data;
num_channels = entry.channelcount;
sampling_frequency = entry.samplerate;
break;
case FOURCC_Opus: case FOURCC_Opus:
codec_config = entry.dops.opus_identification_header; codec_config = entry.dops.opus_identification_header;
num_channels = entry.channelcount; num_channels = entry.channelcount;

View File

@ -73,6 +73,8 @@ FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
return FOURCC_dtsm; return FOURCC_dtsm;
case kCodecEAC3: case kCodecEAC3:
return FOURCC_ec_3; return FOURCC_ec_3;
case kCodecFlac:
return FOURCC_fLaC;
case kCodecOpus: case kCodecOpus:
return FOURCC_Opus; return FOURCC_Opus;
default: default:
@ -367,6 +369,9 @@ void MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
case kCodecEAC3: case kCodecEAC3:
audio.dec3.data = audio_info->codec_config(); audio.dec3.data = audio_info->codec_config();
break; break;
case kCodecFlac:
audio.dfla.data = audio_info->codec_config();
break;
case kCodecOpus: case kCodecOpus:
audio.dops.opus_identification_header = audio_info->codec_config(); audio.dops.opus_identification_header = audio_info->codec_config();
break; break;

View File

@ -41,6 +41,10 @@ bear-640x360-trailing-moov-additional-mdat.mp4 - Same content, but with moov bo
bear-640x360-av_frag.mp4 - Same content, but in fragmented mp4. bear-640x360-av_frag.mp4 - Same content, but in fragmented mp4.
bear-640x360-aac_lc-silent_right.mp4 - Audio only, stereo, but right channel is silent, with AAC-LC profile. bear-640x360-aac_lc-silent_right.mp4 - Audio only, stereo, but right channel is silent, with AAC-LC profile.
bear-640x360-aac_he-silent_right.mp4 - Same as above, but with AAC-HE profile. bear-640x360-aac_he-silent_right.mp4 - Same as above, but with AAC-HE profile.
bear-flac.mp4 - Unfragmented audio-only 44.1kHz FLAC in MP4 file, created using:
ffmpeg -i bear-1280x720.mp4 -map 0:0 -acodec flac -strict -2 bear-flac.mp4
Note, "-strict -2" was required because current ffmpeg libavformat version
57.75.100 indicates that flac in MP4 support is experimental.
// Non square pixels. // Non square pixels.
bear-640x360-non_square_pixel-with_pasp.mp4 - A non-square pixel version of the video track of bear-640x360.mp4 with PixelAspectRatio box. bear-640x360-non_square_pixel-with_pasp.mp4 - A non-square pixel version of the video track of bear-640x360.mp4 with PixelAspectRatio box.

Binary file not shown.