Added AC-4 codec support (#795)

Closes #754.
This commit is contained in:
Weiguo Shao 2020-07-05 05:55:28 +08:00 committed by GitHub
parent 6b036b9bb1
commit 540c0aaffb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 1160 additions and 38 deletions

View File

@ -43,6 +43,7 @@ Shaka Packager supports:
| MP3 | O | - | I / O | - | O | | MP3 | O | - | I / O | - | O |
| Dolby AC3 | I / O | - | I / O | - | O | | Dolby AC3 | I / O | - | I / O | - | O |
| Dolby EAC3 | I / O | - | O | - | O | | Dolby EAC3 | I / O | - | O | - | O |
| Dolby AC4 | I / O | - | - | - | - |
| DTS | I / O | - | - | - | - | | DTS | I / O | - | - | - | - |
| FLAC | I / O | - | - | - | - | | FLAC | I / O | - | - | - | - |
| Opus | I / O³ | I / O | - | - | - | | Opus | I / O³ | I / O | - | - | - |

View File

@ -42,7 +42,7 @@ struct Variant {
const std::string* audio_group_id = nullptr; const std::string* audio_group_id = nullptr;
const std::string* text_group_id = nullptr; const std::string* text_group_id = nullptr;
// The bitrates should be the sum of audio bitrate and text bitrate. // The bitrates should be the sum of audio bitrate and text bitrate.
// However, given the contraints and assumptions, it makes sense to exclude // However, given the constraints and assumptions, it makes sense to exclude
// text bitrate out of the calculation: // text bitrate out of the calculation:
// - Text streams usually have a very small negligible bitrate. // - Text streams usually have a very small negligible bitrate.
// - Text does not have constant bitrates. To avoid fluctuation, an arbitrary // - Text does not have constant bitrates. To avoid fluctuation, an arbitrary
@ -260,7 +260,7 @@ void BuildMediaTag(const MediaPlaylist& playlist,
bool is_autoselect, bool is_autoselect,
const std::string& base_url, const std::string& base_url,
std::string* out) { std::string* out) {
// Tag attribures should follow the order as defined in // Tag attributes should follow the order as defined in
// https://tools.ietf.org/html/draft-pantos-http-live-streaming-23#section-3.5 // https://tools.ietf.org/html/draft-pantos-http-live-streaming-23#section-3.5
Tag tag("#EXT-X-MEDIA", out); Tag tag("#EXT-X-MEDIA", out);
@ -308,20 +308,27 @@ void BuildMediaTag(const MediaPlaylist& playlist,
const MediaPlaylist::MediaPlaylistStreamType kAudio = const MediaPlaylist::MediaPlaylistStreamType kAudio =
MediaPlaylist::MediaPlaylistStreamType::kAudio; MediaPlaylist::MediaPlaylistStreamType::kAudio;
if (playlist.stream_type() == kAudio) { if (playlist.stream_type() == kAudio) {
// According to HLS spec:
// https://tools.ietf.org/html/draft-pantos-hls-rfc8216bis 4.4.6.1.
// CHANNELS is a quoted-string that specifies an ordered,
// slash-separated ("/") list of parameters. The first parameter is a count
// of audio channels, and the second parameter identifies the encoding of
// object-based audio used by the Rendition. HLS Authoring Specification
// for Apple Devices Appendices documents how to handle Dolby Digital Plus
// JOC content.
// https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices/hls_authoring_specification_for_apple_devices_appendices
if (playlist.GetEC3JocComplexity() != 0) { if (playlist.GetEC3JocComplexity() != 0) {
// HLS Authoring Specification for Apple Devices Appendices documents how
// to handle Dolby Digital Plus JOC content.
// https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices/hls_authoring_specification_for_apple_devices_appendices
std::string channel_string = std::string channel_string =
std::to_string(playlist.GetEC3JocComplexity()) + "/JOC"; std::to_string(playlist.GetEC3JocComplexity()) + "/JOC";
tag.AddQuotedString("CHANNELS", channel_string); tag.AddQuotedString("CHANNELS", channel_string);
} else if (playlist.GetAC4ImsFlag() || playlist.GetAC4CbiFlag()) {
// Dolby has qualified using IMSA to present AC4 immersive audio (IMS and
// CBI without object-based audio) for Dolby internal use only. IMSA is
// not included in any publicly-available specifications as of June, 2020.
std::string channel_string =
std::to_string(playlist.GetNumChannels()) + "/IMSA";
tag.AddQuotedString("CHANNELS", channel_string);
} else { } else {
// According to HLS spec:
// https://tools.ietf.org/html/draft-pantos-hls-rfc8216bis 4.4.6.1.
// CHANNELS is a quoted-string that specifies an ordered,
// slash-separated ("/") list of parameters. The first parameter is a
// count of audio channels, and the second parameter identifies the
// encoding of object-based audio used by the Rendition.
std::string channel_string = std::to_string(playlist.GetNumChannels()); std::string channel_string = std::to_string(playlist.GetNumChannels());
tag.AddQuotedString("CHANNELS", channel_string); tag.AddQuotedString("CHANNELS", channel_string);
} }

View File

@ -34,6 +34,8 @@ const uint32_t kWidth = 800;
const uint32_t kHeight = 600; const uint32_t kHeight = 600;
const uint32_t kEC3JocComplexityZero = 0; const uint32_t kEC3JocComplexityZero = 0;
const uint32_t kEC3JocComplexity = 16; const uint32_t kEC3JocComplexity = 16;
const bool kAC4IMSFlagEnabled = true;
const bool kAC4CBIFlagEnabled = true;
std::unique_ptr<MockMediaPlaylist> CreateVideoPlaylist( std::unique_ptr<MockMediaPlaylist> CreateVideoPlaylist(
const std::string& filename, const std::string& filename,
@ -84,13 +86,17 @@ std::unique_ptr<MockMediaPlaylist> CreateAudioPlaylist(
uint64_t channels, uint64_t channels,
uint64_t max_bitrate, uint64_t max_bitrate,
uint64_t avg_bitrate, uint64_t avg_bitrate,
uint64_t ec3_joc_complexity) { uint64_t ec3_joc_complexity,
bool ac4_ims_flag,
bool ac4_cbi_flag) {
std::unique_ptr<MockMediaPlaylist> playlist( std::unique_ptr<MockMediaPlaylist> playlist(
new MockMediaPlaylist(filename, name, group)); new MockMediaPlaylist(filename, name, group));
EXPECT_CALL(*playlist, GetNumChannels()).WillRepeatedly(Return(channels)); EXPECT_CALL(*playlist, GetNumChannels()).WillRepeatedly(Return(channels));
EXPECT_CALL(*playlist, GetEC3JocComplexity()) EXPECT_CALL(*playlist, GetEC3JocComplexity())
.WillRepeatedly(Return(ec3_joc_complexity)); .WillRepeatedly(Return(ec3_joc_complexity));
EXPECT_CALL(*playlist, GetAC4ImsFlag()).WillRepeatedly(Return(ac4_ims_flag));
EXPECT_CALL(*playlist, GetAC4CbiFlag()).WillRepeatedly(Return(ac4_cbi_flag));
playlist->SetStreamTypeForTesting( playlist->SetStreamTypeForTesting(
MediaPlaylist::MediaPlaylistStreamType::kAudio); MediaPlaylist::MediaPlaylistStreamType::kAudio);
@ -251,12 +257,14 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistVideoAndAudio) {
// First audio, english.m3u8. // First audio, english.m3u8.
std::unique_ptr<MockMediaPlaylist> english_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> english_playlist = CreateAudioPlaylist(
"eng.m3u8", "english", "audiogroup", "audiocodec", "en", kAudio1Channels, "eng.m3u8", "english", "audiogroup", "audiocodec", "en", kAudio1Channels,
kAudio1MaxBitrate, kAudio1AvgBitrate, kEC3JocComplexityZero); kAudio1MaxBitrate, kAudio1AvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
// Second audio, spanish.m3u8. // Second audio, spanish.m3u8.
std::unique_ptr<MockMediaPlaylist> spanish_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> spanish_playlist = CreateAudioPlaylist(
"spa.m3u8", "espanol", "audiogroup", "audiocodec", "es", kAudio2Channels, "spa.m3u8", "espanol", "audiogroup", "audiocodec", "es", kAudio2Channels,
kAudio2MaxBitrate, kAudio2AvgBitrate, kEC3JocComplexityZero); kAudio2MaxBitrate, kAudio2AvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
const char kBaseUrl[] = "http://playlists.org/"; const char kBaseUrl[] = "http://playlists.org/";
EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(
@ -311,13 +319,13 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistMultipleAudioGroups) {
std::unique_ptr<MockMediaPlaylist> eng_lo_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> eng_lo_playlist = CreateAudioPlaylist(
"eng_lo.m3u8", "english_lo", "audio_lo", "audiocodec_lo", "en", "eng_lo.m3u8", "english_lo", "audio_lo", "audiocodec_lo", "en",
kAudio1Channels, kAudio1MaxBitrate, kAudio1AvgBitrate, kAudio1Channels, kAudio1MaxBitrate, kAudio1AvgBitrate,
kEC3JocComplexityZero); kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
// Second audio, eng_hi.m3u8. // Second audio, eng_hi.m3u8.
std::unique_ptr<MockMediaPlaylist> eng_hi_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> eng_hi_playlist = CreateAudioPlaylist(
"eng_hi.m3u8", "english_hi", "audio_hi", "audiocodec_hi", "en", "eng_hi.m3u8", "english_hi", "audio_hi", "audiocodec_hi", "en",
kAudio2Channels, kAudio2MaxBitrate, kAudio2AvgBitrate, kAudio2Channels, kAudio2MaxBitrate, kAudio2AvgBitrate,
kEC3JocComplexityZero); kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
const char kBaseUrl[] = "http://anydomain.com/"; const char kBaseUrl[] = "http://anydomain.com/";
EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(
@ -360,11 +368,11 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistSameAudioGroupSameLanguage) {
// First audio, eng_lo.m3u8. // First audio, eng_lo.m3u8.
std::unique_ptr<MockMediaPlaylist> eng_lo_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> eng_lo_playlist = CreateAudioPlaylist(
"eng_lo.m3u8", "english", "audio", "audiocodec", "en", 1, 50000, 40000, "eng_lo.m3u8", "english", "audio", "audiocodec", "en", 1, 50000, 40000,
kEC3JocComplexityZero); kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
std::unique_ptr<MockMediaPlaylist> eng_hi_playlist = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> eng_hi_playlist = CreateAudioPlaylist(
"eng_hi.m3u8", "english", "audio", "audiocodec", "en", 8, 100000, 80000, "eng_hi.m3u8", "english", "audio", "audiocodec", "en", 8, 100000, 80000,
kEC3JocComplexityZero); kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
const char kBaseUrl[] = "http://anydomain.com/"; const char kBaseUrl[] = "http://anydomain.com/";
EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(
@ -531,7 +539,7 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistVideoAndAudioAndText) {
// Audio, english.m3u8. // Audio, english.m3u8.
std::unique_ptr<MockMediaPlaylist> audio = CreateAudioPlaylist( std::unique_ptr<MockMediaPlaylist> audio = CreateAudioPlaylist(
"eng.m3u8", "english", "audiogroup", "audiocodec", "en", 2, 50000, 30000, "eng.m3u8", "english", "audiogroup", "audiocodec", "en", 2, 50000, 30000,
kEC3JocComplexityZero); kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled);
// Text, english.m3u8. // Text, english.m3u8.
std::unique_ptr<MockMediaPlaylist> text = std::unique_ptr<MockMediaPlaylist> text =
@ -578,10 +586,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistMixedPlaylistsDifferentGroups) {
// AUDIO // AUDIO
CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1",
"audiocodec", "en", kAudioChannels, kAudioMaxBitrate, "audiocodec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero), kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2",
"audiocodec", "fr", kAudioChannels, kAudioMaxBitrate, "audiocodec", "fr", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero), kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
// SUBTITLES // SUBTITLES
CreateTextPlaylist("text-1.m3u8", "text 1", "text-group-1", "textcodec", CreateTextPlaylist("text-1.m3u8", "text 1", "text-group-1", "textcodec",
@ -689,10 +699,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnly) {
// AUDIO // AUDIO
CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1",
"audiocodec", "en", kAudioChannels, kAudioMaxBitrate, "audiocodec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero), kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2",
"audiocodec", "fr", kAudioChannels, kAudioMaxBitrate, "audiocodec", "fr", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero), kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
}; };
// Add all the media playlists to the master playlist. // Add all the media playlists to the master playlist.
@ -739,10 +751,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyJOC) {
// AUDIO // AUDIO
CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1",
"audiocodec", "en", kAudioChannels, kAudioMaxBitrate, "audiocodec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero), kAudioAvgBitrate, kEC3JocComplexityZero, !kAC4IMSFlagEnabled,
!kAC4CBIFlagEnabled),
CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2",
"audiocodec", "en", kAudioChannels, kAudioMaxBitrate, "audiocodec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexity), kAudioAvgBitrate, kEC3JocComplexity, !kAC4IMSFlagEnabled,
!kAC4CBIFlagEnabled),
}; };
// Add all the media playlists to the master playlist. // Add all the media playlists to the master playlist.
@ -779,5 +793,111 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyJOC) {
ASSERT_EQ(expected, actual); ASSERT_EQ(expected, actual);
} }
TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyAC4IMS) {
const uint64_t kAudioChannels = 2;
const uint64_t kAudioMaxBitrate = 50000;
const uint64_t kAudioAvgBitrate = 30000;
std::unique_ptr<MockMediaPlaylist> media_playlists[] = {
// AUDIO
CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1",
"audio1codec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero,
kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2",
"audio2codec", "en", kAudioChannels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
};
// Add all the media playlists to the master playlist.
std::list<MediaPlaylist*> media_playlist_list;
for (const auto& media_playlist : media_playlists) {
media_playlist_list.push_back(media_playlist.get());
}
const char kBaseUrl[] = "http://playlists.org/";
EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(kBaseUrl, test_output_dir_,
media_playlist_list));
std::string actual;
ASSERT_TRUE(File::ReadFileToString(master_playlist_path_.c_str(), &actual));
const std::string expected =
"#EXTM3U\n"
"## Generated with https://github.com/google/shaka-packager version "
"test\n"
"\n"
"#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-1.m3u8\","
"GROUP-ID=\"audio-group-1\",LANGUAGE=\"en\",NAME=\"audio 1\","
"DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"2/IMSA\"\n"
"#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-2.m3u8\","
"GROUP-ID=\"audio-group-2\",LANGUAGE=\"en\",NAME=\"audio 2\","
"DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"2\"\n"
"\n"
"#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000,"
"CODECS=\"audio1codec\",AUDIO=\"audio-group-1\"\n"
"http://playlists.org/audio-1.m3u8\n"
"#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000,"
"CODECS=\"audio2codec\",AUDIO=\"audio-group-2\"\n"
"http://playlists.org/audio-2.m3u8\n";
ASSERT_EQ(expected, actual);
}
TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyAC4CBI) {
const uint64_t kAudio1Channels = 6;
const uint64_t kAudio2Channels = 8;
const uint64_t kAudioMaxBitrate = 50000;
const uint64_t kAudioAvgBitrate = 30000;
std::unique_ptr<MockMediaPlaylist> media_playlists[] = {
// AUDIO
CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1",
"audiocodec", "en", kAudio1Channels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled),
CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2",
"audiocodec", "en", kAudio2Channels, kAudioMaxBitrate,
kAudioAvgBitrate, kEC3JocComplexityZero,
!kAC4IMSFlagEnabled, kAC4CBIFlagEnabled),
};
// Add all the media playlists to the master playlist.
std::list<MediaPlaylist*> media_playlist_list;
for (const auto& media_playlist : media_playlists) {
media_playlist_list.push_back(media_playlist.get());
}
const char kBaseUrl[] = "http://playlists.org/";
EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(kBaseUrl, test_output_dir_,
media_playlist_list));
std::string actual;
ASSERT_TRUE(File::ReadFileToString(master_playlist_path_.c_str(), &actual));
const std::string expected =
"#EXTM3U\n"
"## Generated with https://github.com/google/shaka-packager version "
"test\n"
"\n"
"#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-1.m3u8\","
"GROUP-ID=\"audio-group-1\",LANGUAGE=\"en\",NAME=\"audio 1\","
"DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"6\"\n"
"#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-2.m3u8\","
"GROUP-ID=\"audio-group-2\",LANGUAGE=\"en\",NAME=\"audio 2\","
"DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"8/IMSA\"\n"
"\n"
"#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000,"
"CODECS=\"audiocodec\",AUDIO=\"audio-group-1\"\n"
"http://playlists.org/audio-1.m3u8\n"
"#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000,"
"CODECS=\"audiocodec\",AUDIO=\"audio-group-2\"\n"
"http://playlists.org/audio-2.m3u8\n";
ASSERT_EQ(expected, actual);
}
} // namespace hls } // namespace hls
} // namespace shaka } // namespace shaka

View File

@ -524,6 +524,14 @@ int MediaPlaylist::GetEC3JocComplexity() const {
return media_info_.audio_info().codec_specific_data().ec3_joc_complexity(); return media_info_.audio_info().codec_specific_data().ec3_joc_complexity();
} }
bool MediaPlaylist::GetAC4ImsFlag() const {
return media_info_.audio_info().codec_specific_data().ac4_ims_flag();
}
bool MediaPlaylist::GetAC4CbiFlag() const {
return media_info_.audio_info().codec_specific_data().ac4_cbi_flag();
}
bool MediaPlaylist::GetDisplayResolution(uint32_t* width, bool MediaPlaylist::GetDisplayResolution(uint32_t* width,
uint32_t* height) const { uint32_t* height) const {
DCHECK(width); DCHECK(width);

View File

@ -152,7 +152,7 @@ class MediaPlaylist {
/// Write the playlist to |file_path|. /// Write the playlist to |file_path|.
/// This does not close the file. /// This does not close the file.
/// If target duration is not set expliticly, this will try to find the target /// If target duration is not set explicitly, this will try to find the target
/// duration. Note that target duration cannot be changed. So calling this /// duration. Note that target duration cannot be changed. So calling this
/// without explicitly setting the target duration and before adding any /// without explicitly setting the target duration and before adding any
/// segments will end up setting the target duration to 0 and will always /// segments will end up setting the target duration to 0 and will always
@ -193,6 +193,16 @@ class MediaPlaylist {
/// Standard C.3.2.3. /// Standard C.3.2.3.
virtual int GetEC3JocComplexity() const; virtual int GetEC3JocComplexity() const;
/// @return true if it's an AC-4 IMS stream, based on Dolby AC-4 in MPEG-DASH
/// for Online Delivery Specification 2.5.3.
/// https://developer.dolby.com/tools-media/online-delivery-kits/dolby-ac-4/
virtual bool GetAC4ImsFlag() const;
/// @return true if it's an AC-4 CBI stream, based on ETSI TS 103 190-2
/// Digital Audio Compression (AC-4) Standard; Part 2: Immersive and
/// personalized audio 4.3.
virtual bool GetAC4CbiFlag() const;
/// @return true if |width| and |height| have been set with a valid /// @return true if |width| and |height| have been set with a valid
/// resolution values. /// resolution values.
virtual bool GetDisplayResolution(uint32_t* width, uint32_t* height) const; virtual bool GetDisplayResolution(uint32_t* width, uint32_t* height) const;

View File

@ -504,6 +504,42 @@ TEST_F(MediaPlaylistMultiSegmentTest, GetEC3JocComplexity) {
EXPECT_EQ(6, media_playlist_->GetEC3JocComplexity()); EXPECT_EQ(6, media_playlist_->GetEC3JocComplexity());
} }
TEST_F(MediaPlaylistMultiSegmentTest, GetAC4ImsFlag) {
MediaInfo media_info;
media_info.set_reference_time_scale(kTimeScale);
// Returns false by default if not audio.
EXPECT_EQ(false, media_playlist_->GetAC4ImsFlag());
media_info.mutable_audio_info()->mutable_codec_specific_data()->
set_ac4_ims_flag(false);
ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info));
EXPECT_EQ(false, media_playlist_->GetAC4ImsFlag());
media_info.mutable_audio_info()->mutable_codec_specific_data()->
set_ac4_ims_flag(true);
ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info));
EXPECT_EQ(true, media_playlist_->GetAC4ImsFlag());
}
TEST_F(MediaPlaylistMultiSegmentTest, GetAC4CbiFlag) {
MediaInfo media_info;
media_info.set_reference_time_scale(kTimeScale);
// Returns false by default if not audio.
EXPECT_EQ(false, media_playlist_->GetAC4CbiFlag());
media_info.mutable_audio_info()->mutable_codec_specific_data()->
set_ac4_cbi_flag(false);
ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info));
EXPECT_EQ(false, media_playlist_->GetAC4CbiFlag());
media_info.mutable_audio_info()->mutable_codec_specific_data()->
set_ac4_cbi_flag(true);
ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info));
EXPECT_EQ(true, media_playlist_->GetAC4CbiFlag());
}
TEST_F(MediaPlaylistMultiSegmentTest, Characteristics) { TEST_F(MediaPlaylistMultiSegmentTest, Characteristics) {
MediaInfo media_info; MediaInfo media_info;
media_info.set_reference_time_scale(kTimeScale); media_info.set_reference_time_scale(kTimeScale);

View File

@ -49,6 +49,8 @@ class MockMediaPlaylist : public MediaPlaylist {
MOCK_METHOD1(SetTargetDuration, void(uint32_t target_duration)); MOCK_METHOD1(SetTargetDuration, void(uint32_t target_duration));
MOCK_CONST_METHOD0(GetNumChannels, int()); MOCK_CONST_METHOD0(GetNumChannels, int());
MOCK_CONST_METHOD0(GetEC3JocComplexity, int()); MOCK_CONST_METHOD0(GetEC3JocComplexity, int());
MOCK_CONST_METHOD0(GetAC4ImsFlag, bool());
MOCK_CONST_METHOD0(GetAC4CbiFlag, bool());
MOCK_CONST_METHOD2(GetDisplayResolution, MOCK_CONST_METHOD2(GetDisplayResolution,
bool(uint32_t* width, uint32_t* height)); bool(uint32_t* width, uint32_t* height));
MOCK_CONST_METHOD0(GetFrameRate, double()); MOCK_CONST_METHOD0(GetFrameRate, double());

View File

@ -37,6 +37,8 @@ std::string AudioCodecToString(Codec codec) {
return "DTS+"; return "DTS+";
case kCodecEAC3: case kCodecEAC3:
return "EAC3"; return "EAC3";
case kCodecAC4:
return "AC4";
case kCodecFlac: case kCodecFlac:
return "FLAC"; return "FLAC";
case kCodecOpus: case kCodecOpus:
@ -121,6 +123,14 @@ std::string AudioStreamInfo::GetCodecString(Codec codec,
return "dts+"; return "dts+";
case kCodecEAC3: case kCodecEAC3:
return "ec-3"; return "ec-3";
case kCodecAC4:
// ETSI TS 103 190-2 Digital Audio Compression (AC-4) Standard; Part 2:
// Immersive and personalized audio E.13. audio_object_type is composed of
// bitstream_version (3bits), presentation_version (2bits) and
// mdcompat (3bits).
return base::StringPrintf(
"ac-4.%02d.%02d.%02d", (audio_object_type & 0xE0) >> 5,
(audio_object_type & 0x18) >> 3, audio_object_type & 0x7);
case kCodecFlac: case kCodecFlac:
return "flac"; return "flac";
case kCodecOpus: case kCodecOpus:

View File

@ -20,6 +20,7 @@ enum FourCC : uint32_t {
FOURCC_aacd = 0x61616364, FOURCC_aacd = 0x61616364,
FOURCC_ac_3 = 0x61632d33, // "ac-3" FOURCC_ac_3 = 0x61632d33, // "ac-3"
FOURCC_ac_4 = 0x61632d34, // "ac-4"
FOURCC_ac3d = 0x61633364, FOURCC_ac3d = 0x61633364,
FOURCC_apad = 0x61706164, FOURCC_apad = 0x61706164,
FOURCC_av01 = 0x61763031, FOURCC_av01 = 0x61763031,
@ -41,6 +42,7 @@ enum FourCC : uint32_t {
FOURCC_ctts = 0x63747473, FOURCC_ctts = 0x63747473,
FOURCC_dOps = 0x644f7073, FOURCC_dOps = 0x644f7073,
FOURCC_dac3 = 0x64616333, FOURCC_dac3 = 0x64616333,
FOURCC_dac4 = 0x64616334,
FOURCC_dash = 0x64617368, FOURCC_dash = 0x64617368,
FOURCC_ddts = 0x64647473, FOURCC_ddts = 0x64647473,
FOURCC_dec3 = 0x64656333, FOURCC_dec3 = 0x64656333,

View File

@ -40,6 +40,7 @@ enum Codec {
kCodecAudio = 200, kCodecAudio = 200,
kCodecAAC = kCodecAudio, kCodecAAC = kCodecAudio,
kCodecAC3, kCodecAC3,
kCodecAC4,
// TODO(kqyang): Use kCodecDTS and a kDtsStreamFormat for the various DTS // TODO(kqyang): Use kCodecDTS and a kDtsStreamFormat for the various DTS
// streams. // streams.
kCodecDTSC, kCodecDTSC,

View File

@ -0,0 +1,528 @@
// Copyright 2020 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/codecs/ac4_audio_util.h"
#include "packager/base/macros.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/media/base/bit_reader.h"
#include "packager/media/base/rcheck.h"
namespace shaka {
namespace media {
namespace {
// Speaker group index
// Bit, Location
// 0(LSB), Left/Right pair
// 1, Centre
// 2, Left surround/Right surround pair
// 3, Left back/Right back pair
// 4, Top front left/Top front right pair
// 5, Top back left/Top back right pair
// 6, LFE
// 7, Top left/Top right pair
// 8, Top side left/Top side right pair
// 9, Top front centre
// 10, Top back centre
// 11, Top centre
// 12, LFE2
// 13, Bottom front left/Bottom front right pair
// 14, Bottom front centre
// 15, Back centre
// 16, Left screen/Right screen pair
// 17, Left wide/Right wide pair
// 18, Vertical height left/Vertical height right pair
enum kAC4AudioChannelGroupIndex {
kLRPair = 0x1,
kCentre = 0x2,
kLsRsPair = 0x4,
kLbRbPair = 0x8,
kTflTfrPair = 0x10,
kTblTbrPair = 0x20,
kLFE = 0x40,
kTlTrPair = 0x80,
kTslTsrPair = 0x100,
kTopfrontCentre = 0x200,
kTopbackCentre = 0x400,
kTopCentre = 0x800,
kLFE2 = 0x1000,
kBflBfrPair = 0x2000,
kBottomFrontCentre = 0x4000,
kBackCentre = 0x8000,
kLscrRscrPair = 0x10000,
kLwRw = 0x20000,
kVhlVhrPair = 0x40000,
};
// Mapping of channel configurations to the MPEG audio value based on ETSI TS
// 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
// Part 2: Immersive and personalized Table G.1
uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) {
uint32_t ret = 0;
switch (channel_mask) {
case kCentre:
ret = 1;
break;
case kLRPair:
ret = 2;
break;
case kCentre | kLRPair:
ret = 3;
break;
case kCentre | kLRPair | kBackCentre:
ret = 4;
break;
case kCentre | kLRPair | kLsRsPair:
ret = 5;
break;
case kCentre | kLRPair | kLsRsPair | kLFE:
ret = 6;
break;
case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw:
ret = 7;
break;
case kBackCentre | kLRPair:
ret = 9;
break;
case kLRPair | kLsRsPair:
ret = 10;
break;
case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre:
ret = 11;
break;
case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE:
ret = 12;
break;
case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 |
kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE |
kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair|
kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair |
kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
ret = 13;
break;
case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair:
ret = 14;
break;
case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair |
kLsRsPair | kLbRbPair:
case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair |
kLsRsPair | kLbRbPair:
ret = 15;
break;
case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair:
ret = 16;
break;
case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
kLsRsPair | kCentre | kLRPair:
case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
kLsRsPair | kCentre | kLRPair:
ret = 17;
break;
case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
kCentre | kLRPair | kLsRsPair | kLbRbPair:
case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
kCentre | kLRPair | kLsRsPair | kLbRbPair:
ret = 18;
break;
case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair |
kLbRbPair:
case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair |
kLbRbPair:
ret = 19;
break;
case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair |
kLsRsPair | kLbRbPair:
case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair |
kLsRsPair | kLbRbPair:
ret = 20;
break;
default:
ret = 0xFFFFFFFF;
}
return ret;
}
// Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio
// Compression (AC-4) Standard; Part 2: Immersive and personalized E.11.
bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) {
bool b_substream_present;
RCHECK(bit_reader.ReadBits(1, &b_substream_present));
bool b_hsf_ext;
RCHECK(bit_reader.ReadBits(1, &b_hsf_ext));
bool b_channel_coded;
RCHECK(bit_reader.ReadBits(1, &b_channel_coded));
uint8_t n_substreams;
RCHECK(bit_reader.ReadBits(8, &n_substreams));
for (uint8_t i = 0; i < n_substreams; i++) {
RCHECK(bit_reader.SkipBits(2));
bool b_substream_bitrate_indicator;
RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator));
if (b_substream_bitrate_indicator) {
RCHECK(bit_reader.SkipBits(5));
}
if (b_channel_coded) {
RCHECK(bit_reader.SkipBits(24));
} else {
bool b_ajoc;
RCHECK(bit_reader.ReadBits(1, &b_ajoc));
if (b_ajoc) {
bool b_static_dmx;
RCHECK(bit_reader.ReadBits(1, &b_static_dmx));
if (!b_static_dmx) {
RCHECK(bit_reader.SkipBits(4));
}
RCHECK(bit_reader.SkipBits(6));
}
RCHECK(bit_reader.SkipBits(4));
}
}
bool b_content_type;
RCHECK(bit_reader.ReadBits(1, &b_content_type));
if (b_content_type) {
RCHECK(bit_reader.SkipBits(3));
bool b_language_indicator;
RCHECK(bit_reader.ReadBits(1, &b_language_indicator));
if (b_language_indicator) {
uint8_t n_language_tag_bytes;
RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes));
RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8));
}
}
return true;
}
// Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio
// Compression (AC-4) Standard;Part 2: Immersive and personalized E.10.
bool ParseAC4PresentationV1Dsi(BitReader& bit_reader,
uint32_t pres_bytes,
uint8_t* mdcompat,
uint32_t* presentation_channel_mask_v1,
bool* dolby_cbi_indicator,
uint8_t* dolby_atmos_indicator) {
bool ret = true;
// Record the initial offset.
const size_t presentation_start = bit_reader.bit_position();
uint8_t presentation_config_v1;
RCHECK(bit_reader.ReadBits(5, &presentation_config_v1));
uint8_t b_add_emdf_substreams;
// set default value (stereo content) for output parameters.
*mdcompat = 0;
*presentation_channel_mask_v1 = 2;
*dolby_cbi_indicator = false;
*dolby_atmos_indicator = 0;
if (presentation_config_v1 == 0x06) {
b_add_emdf_substreams = 1;
} else {
RCHECK(bit_reader.ReadBits(3, mdcompat));
bool b_presentation_id;
RCHECK(bit_reader.ReadBits(1, &b_presentation_id));
if (b_presentation_id) {
RCHECK(bit_reader.SkipBits(5));
}
RCHECK(bit_reader.SkipBits(19));
bool b_presentation_channel_coded;
RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded));
*presentation_channel_mask_v1 = 0;
if (b_presentation_channel_coded) {
uint8_t dsi_presentation_ch_mode;
RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode));
if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) {
RCHECK(bit_reader.SkipBits(1));
uint8_t pres_top_channel_pairs;
RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs));
if (pres_top_channel_pairs) {
*dolby_cbi_indicator = true;
}
} else if (dsi_presentation_ch_mode == 15) {
*dolby_cbi_indicator = true;
}
RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1));
}
bool b_presentation_core_differs;
RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs));
if (b_presentation_core_differs) {
bool b_presentation_core_channel_coded;
RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded));
if (b_presentation_core_channel_coded) {
RCHECK(bit_reader.SkipBits(2));
}
}
bool b_presentation_filter;
RCHECK(bit_reader.ReadBits(1, &b_presentation_filter));
if (b_presentation_filter) {
RCHECK(bit_reader.SkipBits(1));
uint8_t n_filter_bytes;
RCHECK(bit_reader.ReadBits(8, &n_filter_bytes));
RCHECK(bit_reader.SkipBits(n_filter_bytes * 8));
}
if (presentation_config_v1 == 0x1f) {
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
} else {
RCHECK(bit_reader.SkipBits(1));
if (presentation_config_v1 == 0 ||
presentation_config_v1 == 1 ||
presentation_config_v1 == 2) {
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
}
if (presentation_config_v1 == 3 || presentation_config_v1 == 4) {
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
}
if (presentation_config_v1 == 5) {
uint8_t n_substream_groups_minus2;
RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2));
for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) {
ret &= ParseAC4SubStreamGroupDsi(bit_reader);
}
}
if (presentation_config_v1 > 5) {
uint8_t n_skip_bytes;
RCHECK(bit_reader.ReadBits(7, &n_skip_bytes));
RCHECK(bit_reader.SkipBits(n_skip_bytes * 8));
}
}
RCHECK(bit_reader.SkipBits(1));
RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams));
}
if (b_add_emdf_substreams) {
uint8_t n_add_emdf_substreams;
RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams));
RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15));
}
bool b_presentation_bitrate_info;
RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info));
if (b_presentation_bitrate_info) {
// Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1
RCHECK(bit_reader.SkipBits(66));
}
bool b_alternative;
RCHECK(bit_reader.ReadBits(1, &b_alternative));
if (b_alternative) {
bit_reader.SkipToNextByte();
// Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12
uint16_t name_len;
RCHECK(bit_reader.ReadBits(16, &name_len));
RCHECK(bit_reader.SkipBits(name_len * 8));
uint8_t n_targets;
RCHECK(bit_reader.ReadBits(5, &n_targets));
RCHECK(bit_reader.SkipBits(n_targets * 11));
}
bit_reader.SkipToNextByte();
if ((bit_reader.bit_position() - presentation_start) <=
(pres_bytes - 1) * 8) {
RCHECK(bit_reader.SkipBits(1));
RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator));
RCHECK(bit_reader.SkipBits(4));
bool b_extended_presentation_group_index;
RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index));
if (b_extended_presentation_group_index) {
RCHECK(bit_reader.SkipBits(9));
} else {
RCHECK(bit_reader.SkipBits(1));
}
}
return ret;
}
bool ExtractAc4Data(const std::vector<uint8_t>& ac4_data,
uint8_t* bitstream_version,
uint8_t* presentation_version,
uint8_t* mdcompat,
uint32_t* presentation_channel_mask_v1,
bool* dolby_ims_indicator,
bool* dolby_cbi_indicator) {
BitReader bit_reader(ac4_data.data(), ac4_data.size());
uint16_t n_presentation;
RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version));
RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation));
if (*bitstream_version == 2) {
uint8_t b_program_id = 0;
RCHECK(bit_reader.ReadBits(1, &b_program_id));
if (b_program_id) {
RCHECK(bit_reader.SkipBits(16));
uint8_t b_uuid = 0;
RCHECK(bit_reader.ReadBits(1, &b_uuid));
if (b_uuid) {
RCHECK(bit_reader.SkipBits(16 * 8));
}
}
} else if (*bitstream_version == 0 || *bitstream_version == 1) {
LOG(WARNING) << "Bitstream version 0 or 1 is not supported";
return false;
} else {
LOG(WARNING) << "Invalid Bitstream version";
return false;
}
RCHECK(bit_reader.SkipBits(66));
bit_reader.SkipToNextByte();
// AC4 stream containing the single presentation is valid for OTT only.
// IMS has two presentations, and the 2nd is legacy (duplicated) presentation.
// So it can be considered as AC4 stream with single presentation. And IMS
// presentation must be prior to legacy presentation.
// In other word, only the 1st presentation in AC4 stream need to be parsed.
const uint8_t ott_n_presentation = 1;
for (uint8_t i = 0; i < ott_n_presentation; i++) {
RCHECK(bit_reader.ReadBits(8, presentation_version));
// *presentation_version == 2 means IMS presentation.
if ((*presentation_version == 2 && n_presentation > 2) ||
(*presentation_version == 1 && n_presentation > 1) ) {
LOG(WARNING) << "Seeing multiple presentations, only single presentation "
<< "(including IMS presentation) is supported";
return false;
}
uint32_t pres_bytes;
RCHECK(bit_reader.ReadBits(8, &pres_bytes));
if (pres_bytes == 255) {
uint32_t add_pres_bytes;
RCHECK(bit_reader.ReadBits(16, &add_pres_bytes));
pres_bytes += add_pres_bytes;
}
size_t presentation_bits = 0;
*dolby_ims_indicator = false;
if (*presentation_version == 0) {
LOG(WARNING) << "Presentation version 0 is not supported";
return false;
} else {
if (*presentation_version == 1 || *presentation_version == 2) {
if (*presentation_version == 2) {
*dolby_ims_indicator = true;
}
const size_t presentation_start = bit_reader.bit_position();
// dolby_atmos_indicator is extended in Dolby internal specs.
// It indicates whether the source content before encoding is Atmos.
// No final decision about how to use it in OTT.
// Parse it for the future usage.
uint8_t dolby_atmos_indicator;
if (!ParseAC4PresentationV1Dsi(bit_reader, pres_bytes, mdcompat,
presentation_channel_mask_v1,
dolby_cbi_indicator,
&dolby_atmos_indicator)) {
return false;
}
const size_t presentation_end = bit_reader.bit_position();
presentation_bits = presentation_end - presentation_start;
} else {
LOG(WARNING) << "Invalid Presentation version";
return false;
}
}
size_t skip_bits = pres_bytes * 8 - presentation_bits;
RCHECK(bit_reader.SkipBits(skip_bits));
}
return true;
}
} // namespace
bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
uint32_t* ac4_channel_mask) {
uint8_t bitstream_version;
uint8_t presentation_version;
uint8_t mdcompat;
uint32_t pre_channel_mask;
bool dolby_ims_indicator;
bool dolby_cbi_indicator;
if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
&mdcompat, &pre_channel_mask, &dolby_ims_indicator,
&dolby_cbi_indicator)) {
LOG(WARNING) << "Seeing invalid AC4 data: "
<< base::HexEncode(ac4_data.data(), ac4_data.size());
return false;
}
if (pre_channel_mask) {
*ac4_channel_mask = pre_channel_mask;
} else {
*ac4_channel_mask = 0x800000;
}
return true;
}
bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
uint32_t* ac4_channel_mpeg_value) {
uint8_t bitstream_version;
uint8_t presentation_version;
uint8_t mdcompat;
uint32_t pre_channel_mask;
bool dolby_ims_indicator;
bool dolby_cbi_indicator;
if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
&mdcompat, &pre_channel_mask, &dolby_ims_indicator,
&dolby_cbi_indicator)) {
LOG(WARNING) << "Seeing invalid AC4 data: "
<< base::HexEncode(ac4_data.data(), ac4_data.size());
return false;
}
*ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask);
return true;
}
bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
uint8_t* ac4_codec_info) {
uint8_t bitstream_version;
uint8_t presentation_version;
uint8_t mdcompat;
uint32_t pre_channel_mask;
bool dolby_ims_indicator;
bool dolby_cbi_indicator;
if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
&mdcompat, &pre_channel_mask, &dolby_ims_indicator,
&dolby_cbi_indicator)) {
LOG(WARNING) << "Seeing invalid AC4 data: "
<< base::HexEncode(ac4_data.data(), ac4_data.size());
return false;
}
// The valid value of bitstream_version (8 bits) is 2, the valid value of
// presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits.
// So uint8_t is fine now. If Dolby extends the value of bitstream_version and
// presentation_version in future, maybe need change the type from uint8_t to
// uint16_t or uint32_t to accommodate the valid values.
// If that, AudioStreamInfo::GetCodecString need to be changed accordingly.
// bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits)
*ac4_codec_info = ((bitstream_version << 5) |
((presentation_version << 3) & 0x1F) |
(mdcompat & 0x7));
return true;
}
bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
bool* ac4_ims_flag,
bool* ac4_cbi_flag) {
uint8_t bitstream_version;
uint8_t presentation_version;
uint8_t mdcompat;
uint32_t pre_channel_mask;
if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
&mdcompat, &pre_channel_mask, ac4_ims_flag,
ac4_cbi_flag)) {
LOG(WARNING) << "Seeing invalid AC4 data: "
<< base::HexEncode(ac4_data.data(), ac4_data.size());
return false;
}
return true;
}
} // namespace media
} // namespace shaka

View File

@ -0,0 +1,52 @@
// Copyright 2020 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
//
// AC4 audio utility functions.
#ifndef PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_
#define PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_
#include <stddef.h>
#include <stdint.h>
#include <vector>
namespace shaka {
namespace media {
/// Parse data from AC4Specific box and calculate AC4 channel mask value based
/// on ETSI TS 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
/// Part 2: Immersive and personalized E.10.14.
/// @return false if there are parsing errors.
bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
uint32_t* ac4_channel_mask);
/// Parse data from AC4Specific box, calculate AC4 channel mask and then
/// obtain channel configuration descriptor value with MPEG scheme based on
/// ETSI TS 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
/// Part 2: Immersive and personalized G.3.2.
/// @return false if there are parsing errors.
bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
uint32_t* ac4_channel_mpeg_value);
/// Parse data from AC4Specific box and obtain AC4 codec information
/// (bitstream version, presentation version and mdcompat) based on ETSI TS
/// 103 190-2, V1.2.1 Digital Audio Compression (AC-4) Standard;
/// Part 2: Immersive and personalized E.13.
/// @return false if there are parsing errors.
bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
uint8_t* ac4_codec_info);
/// Parse data from AC4Specific box and obtain AC4 Immersive stereo (IMS) flag
/// and Channel-base audio (CBI) flag.
/// @return false if there are parsing errors.
bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
bool* ac4_ims_flag,
bool* ac4_cbi_flag);
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_

View File

@ -0,0 +1,121 @@
// Copyright 2020 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <gtest/gtest.h>
#include "packager/media/codecs/ac4_audio_util.h"
namespace shaka {
namespace media {
TEST(AC4AudioUtilTest, ChannelTest1) {
// AC4 IMS
const std::vector<uint8_t> ac4_data = {0x20, 0xa4, 0x02, 0x40, 0x00, 0x00,
0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0,
0x02, 0x12, 0xf8, 0x80, 0x00, 0x00,
0x42, 0x00, 0x00, 0x02, 0x50, 0x10,
0x00, 0x00, 0x03, 0x10, 0x99, 0x5b,
0xa0, 0x40, 0x01, 0x12, 0xf8, 0x80,
0x00, 0x00, 0x42, 0x00, 0x00, 0x02,
0x50, 0x10, 0x00, 0x00, 0x03, 0x10,
0x99, 0x5b, 0x80, 0x40};
uint32_t ac4_channel_mask;
uint32_t ac4_channel_mpeg_value;
uint8_t ac4_codec_info;
bool ac4_ims_flag;
bool ac4_cbi_flag;
EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask));
EXPECT_EQ((uint32_t)0x1, ac4_channel_mask);
EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value));
EXPECT_EQ((uint32_t)0x2, ac4_channel_mpeg_value);
EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info));
EXPECT_EQ(80u, ac4_codec_info);
EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag));
EXPECT_TRUE(ac4_ims_flag);
EXPECT_FALSE(ac4_cbi_flag);
}
TEST(AC4AudioUtilTest, ChannelTest2) {
// AC4 5.1-channel
const std::vector<uint8_t> ac4_data = {0x20, 0xa6, 0x01, 0x60, 0x00, 0x00,
0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0,
0x01, 0x0e, 0xf9, 0x00, 0x00, 0x09,
0x00, 0x00, 0x11, 0xca, 0x02, 0x00,
0x00, 0x11, 0xc0, 0x80};
uint32_t ac4_channel_mask;
uint32_t ac4_channel_mpeg_value;
uint8_t ac4_codec_info;
bool ac4_ims_flag;
bool ac4_cbi_flag;
EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask));
EXPECT_EQ((uint32_t)0x47, ac4_channel_mask);
EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value));
EXPECT_EQ((uint32_t)0x6, ac4_channel_mpeg_value);
EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info));
EXPECT_EQ(73u, ac4_codec_info);
EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag));
EXPECT_FALSE(ac4_ims_flag);
EXPECT_FALSE(ac4_cbi_flag);
}
TEST(AC4AudioUtilTest, ChannelTest3) {
// AC4 stereo
const std::vector<uint8_t> ac4_data = {0x20, 0xa4, 0x01, 0x40, 0x00, 0x00,
0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0,
0x01, 0x12, 0xf8, 0x00, 0x00, 0x08,
0x40, 0x00, 0x00, 0x4a, 0x02, 0x00,
0x00, 0x00, 0x62, 0x13, 0x2b, 0x70,
0x00, 0x80};
uint32_t ac4_channel_mask;
uint32_t ac4_channel_mpeg_value;
uint8_t ac4_codec_info;
bool ac4_ims_flag;
bool ac4_cbi_flag;
EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask));
EXPECT_EQ((uint32_t)0x1, ac4_channel_mask);
EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value));
EXPECT_EQ((uint32_t)0x2, ac4_channel_mpeg_value);
EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info));
EXPECT_EQ(72u, ac4_codec_info);
EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag));
EXPECT_FALSE(ac4_ims_flag);
EXPECT_FALSE(ac4_cbi_flag);
}
TEST(AC4AudioUtilTest, ChannelTest4) {
// AC4 CBI 5.1.2
const std::vector<uint8_t> ac4_data = {0x20, 0xa0, 0x01, 0x60, 0x00, 0x00,
0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0,
0x01, 0x15, 0x13, 0x80, 0x00, 0x00,
0x58, 0x40, 0x00, 0x31, 0xfc, 0xa0,
0x20, 0x00, 0x03, 0x1d, 0x40, 0x40,
0x00, 0x00, 0x08, 0x00, 0xc0};
uint32_t ac4_channel_mask;
uint32_t ac4_channel_mpeg_value;
uint8_t ac4_codec_info;
bool ac4_ims_flag;
bool ac4_cbi_flag;
EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask));
EXPECT_EQ((uint32_t)0xC7, ac4_channel_mask);
EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value));
EXPECT_EQ((uint32_t)0xFFFFFFFF, ac4_channel_mpeg_value);
EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info));
EXPECT_EQ(75u, ac4_codec_info);
EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag));
EXPECT_FALSE(ac4_ims_flag);
EXPECT_TRUE(ac4_cbi_flag);
}
} // namespace media
} // namespace shaka

View File

@ -29,6 +29,8 @@
'dovi_decoder_configuration_record.h', 'dovi_decoder_configuration_record.h',
'ec3_audio_util.cc', 'ec3_audio_util.cc',
'ec3_audio_util.h', 'ec3_audio_util.h',
'ac4_audio_util.cc',
'ac4_audio_util.h',
'es_descriptor.cc', 'es_descriptor.cc',
'es_descriptor.h', 'es_descriptor.h',
'h264_byte_to_unit_stream_converter.cc', 'h264_byte_to_unit_stream_converter.cc',
@ -77,6 +79,7 @@
'avc_decoder_configuration_record_unittest.cc', 'avc_decoder_configuration_record_unittest.cc',
'dovi_decoder_configuration_record_unittest.cc', 'dovi_decoder_configuration_record_unittest.cc',
'ec3_audio_util_unittest.cc', 'ec3_audio_util_unittest.cc',
'ac4_audio_util_unittest.cc',
'es_descriptor_unittest.cc', 'es_descriptor_unittest.cc',
'h264_byte_to_unit_stream_converter_unittest.cc', 'h264_byte_to_unit_stream_converter_unittest.cc',
'h264_parser_unittest.cc', 'h264_parser_unittest.cc',

View File

@ -18,6 +18,7 @@
#include "packager/media/base/text_stream_info.h" #include "packager/media/base/text_stream_info.h"
#include "packager/media/base/video_stream_info.h" #include "packager/media/base/video_stream_info.h"
#include "packager/media/codecs/ec3_audio_util.h" #include "packager/media/codecs/ec3_audio_util.h"
#include "packager/media/codecs/ac4_audio_util.h"
#include "packager/mpd/base/media_info.pb.h" #include "packager/mpd/base/media_info.pb.h"
using ::google::protobuf::util::MessageDifferencer; using ::google::protobuf::util::MessageDifferencer;
@ -121,14 +122,14 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info,
return; return;
} }
auto* codec_data = audio_info->mutable_codec_specific_data(); auto* codec_data = audio_info->mutable_codec_specific_data();
codec_data->set_ec3_channel_map(ec3_channel_map); codec_data->set_channel_mask(ec3_channel_map);
uint32_t ec3_channel_mpeg_value; uint32_t ec3_channel_mpeg_value;
if (!CalculateEC3ChannelMPEGValue(codec_config, &ec3_channel_mpeg_value)) { if (!CalculateEC3ChannelMPEGValue(codec_config, &ec3_channel_mpeg_value)) {
LOG(ERROR) << "Failed to calculate EC3 channel configuration " LOG(ERROR) << "Failed to calculate EC3 channel configuration "
<< "descriptor value with MPEG scheme."; << "descriptor value with MPEG scheme.";
return; return;
} }
codec_data->set_ec3_channel_mpeg_value(ec3_channel_mpeg_value); codec_data->set_channel_mpeg_value(ec3_channel_mpeg_value);
uint32_t ec3_joc_complexity = 0; uint32_t ec3_joc_complexity = 0;
if (!GetEc3JocComplexity(codec_config, &ec3_joc_complexity)) { if (!GetEc3JocComplexity(codec_config, &ec3_joc_complexity)) {
LOG(ERROR) << "Failed to obtain DD+JOC Information."; LOG(ERROR) << "Failed to obtain DD+JOC Information.";
@ -136,6 +137,31 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info,
} }
codec_data->set_ec3_joc_complexity(ec3_joc_complexity); codec_data->set_ec3_joc_complexity(ec3_joc_complexity);
} }
if (audio_stream_info->codec() == kCodecAC4) {
uint32_t ac4_channel_mask;
if (!CalculateAC4ChannelMask(codec_config, &ac4_channel_mask)) {
LOG(ERROR) << "Failed to calculate AC4 channel mask.";
return;
}
auto* codec_data = audio_info->mutable_codec_specific_data();
codec_data->set_channel_mask(ac4_channel_mask);
uint32_t ac4_channel_mpeg_value;
if (!CalculateAC4ChannelMPEGValue(codec_config, &ac4_channel_mpeg_value)) {
LOG(ERROR) << "Failed to calculate AC4 channel configuration "
<< "descriptor value with MPEG scheme.";
return;
}
codec_data->set_channel_mpeg_value(ac4_channel_mpeg_value);
bool ac4_ims_flag;
bool ac4_cbi_flag;
if (!GetAc4ImmersiveInfo(codec_config, &ac4_ims_flag, &ac4_cbi_flag)) {
LOG(ERROR) << "Failed to obtain AC4 IMS flag and CBI flag.";
return;
}
codec_data->set_ac4_ims_flag(ac4_ims_flag);
codec_data->set_ac4_cbi_flag(ac4_cbi_flag);
}
} }
void AddTextInfo(const TextStreamInfo& text_stream_info, void AddTextInfo(const TextStreamInfo& text_stream_info,

View File

@ -1767,6 +1767,27 @@ size_t EC3Specific::ComputeSizeInternal() {
return HeaderSize() + data.size(); return HeaderSize() + data.size();
} }
AC4Specific::AC4Specific() = default;
AC4Specific::~AC4Specific() = default;
FourCC AC4Specific::BoxType() const {
return FOURCC_dac4;
}
bool AC4Specific::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer));
size_t size = buffer->Reading() ? buffer->BytesLeft() : data.size();
RCHECK(buffer->ReadWriteVector(&data, size));
return true;
}
size_t AC4Specific::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.empty())
return 0;
return HeaderSize() + data.size();
}
OpusSpecific::OpusSpecific() = default; OpusSpecific::OpusSpecific() = default;
OpusSpecific::~OpusSpecific() = default; OpusSpecific::~OpusSpecific() = default;
@ -1878,6 +1899,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(buffer->TryReadWriteChild(&ddts)); RCHECK(buffer->TryReadWriteChild(&ddts));
RCHECK(buffer->TryReadWriteChild(&dac3)); RCHECK(buffer->TryReadWriteChild(&dac3));
RCHECK(buffer->TryReadWriteChild(&dec3)); RCHECK(buffer->TryReadWriteChild(&dec3));
RCHECK(buffer->TryReadWriteChild(&dac4));
RCHECK(buffer->TryReadWriteChild(&dops)); RCHECK(buffer->TryReadWriteChild(&dops));
RCHECK(buffer->TryReadWriteChild(&dfla)); RCHECK(buffer->TryReadWriteChild(&dfla));
@ -1905,6 +1927,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() {
sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() +
esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() +
dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() + dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() +
dac4.ComputeSize() +
// Reserved and predefined bytes. // Reserved and predefined bytes.
6 + 8 + // 6 + 8 bytes reserved. 6 + 8 + // 6 + 8 bytes reserved.
4; // 4 bytes predefined. 4; // 4 bytes predefined.

View File

@ -333,6 +333,12 @@ struct EC3Specific : Box {
std::vector<uint8_t> data; std::vector<uint8_t> data;
}; };
struct AC4Specific : Box {
DECLARE_BOX_METHODS(AC4Specific);
std::vector<uint8_t> data;
};
struct OpusSpecific : Box { struct OpusSpecific : Box {
DECLARE_BOX_METHODS(OpusSpecific); DECLARE_BOX_METHODS(OpusSpecific);
@ -372,6 +378,7 @@ struct AudioSampleEntry : Box {
DTSSpecific ddts; DTSSpecific ddts;
AC3Specific dac3; AC3Specific dac3;
EC3Specific dec3; EC3Specific dec3;
AC4Specific dac4;
OpusSpecific dops; OpusSpecific dops;
FlacSpecific dfla; FlacSpecific dfla;
}; };

View File

@ -1242,6 +1242,21 @@ TEST_F(BoxDefinitionsTest, EC3SampleEntry) {
ASSERT_EQ(entry, entry_readback); ASSERT_EQ(entry, entry_readback);
} }
TEST_F(BoxDefinitionsTest, AC4SampleEntry) {
AudioSampleEntry entry;
entry.format = FOURCC_ac_4;
entry.data_reference_index = 2;
entry.channelcount = 6;
entry.samplesize = 16;
entry.samplerate = 48000;
Fill(&entry.dac4);
entry.Write(this->buffer_.get());
AudioSampleEntry entry_readback;
ASSERT_TRUE(ReadBack(&entry_readback));
ASSERT_EQ(entry, entry_readback);
}
TEST_F(BoxDefinitionsTest, OpusSampleEntry) { TEST_F(BoxDefinitionsTest, OpusSampleEntry) {
AudioSampleEntry entry; AudioSampleEntry entry;
entry.format = FOURCC_Opus; entry.format = FOURCC_Opus;

View File

@ -26,6 +26,7 @@
#include "packager/media/codecs/avc_decoder_configuration_record.h" #include "packager/media/codecs/avc_decoder_configuration_record.h"
#include "packager/media/codecs/dovi_decoder_configuration_record.h" #include "packager/media/codecs/dovi_decoder_configuration_record.h"
#include "packager/media/codecs/ec3_audio_util.h" #include "packager/media/codecs/ec3_audio_util.h"
#include "packager/media/codecs/ac4_audio_util.h"
#include "packager/media/codecs/es_descriptor.h" #include "packager/media/codecs/es_descriptor.h"
#include "packager/media/codecs/hevc_decoder_configuration_record.h" #include "packager/media/codecs/hevc_decoder_configuration_record.h"
#include "packager/media/codecs/vp_codec_configuration_record.h" #include "packager/media/codecs/vp_codec_configuration_record.h"
@ -94,6 +95,8 @@ Codec FourCCToCodec(FourCC fourcc) {
return kCodecAC3; return kCodecAC3;
case FOURCC_ec_3: case FOURCC_ec_3:
return kCodecEAC3; return kCodecEAC3;
case FOURCC_ac_4:
return kCodecAC4;
case FOURCC_fLaC: case FOURCC_fLaC:
return kCodecFlac; return kCodecFlac;
default: default:
@ -488,6 +491,16 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
codec_config = entry.dec3.data; codec_config = entry.dec3.data;
num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config)); num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
break; break;
case FOURCC_ac_4:
codec_config = entry.dac4.data;
// Stop the process if have errors when parsing AC-4 dac4 box,
// bitstream version 0 (has beed deprecated) and contains multiple
// presentations in single AC-4 stream (only used for broadcast).
if (!GetAc4CodecInfo(codec_config, &audio_object_type)) {
LOG(ERROR) << "Failed to parse dac4.";
return false;
}
break;
case FOURCC_fLaC: case FOURCC_fLaC:
codec_config = entry.dfla.data; codec_config = entry.dfla.data;
break; break;

View File

@ -83,6 +83,8 @@ FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
return FOURCC_dtsm; return FOURCC_dtsm;
case kCodecEAC3: case kCodecEAC3:
return FOURCC_ec_3; return FOURCC_ec_3;
case kCodecAC4:
return FOURCC_ac_4;
case kCodecFlac: case kCodecFlac:
return FOURCC_fLaC; return FOURCC_fLaC;
case kCodecOpus: case kCodecOpus:
@ -485,6 +487,9 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
case kCodecEAC3: case kCodecEAC3:
audio.dec3.data = audio_info->codec_config(); audio.dec3.data = audio_info->codec_config();
break; break;
case kCodecAC4:
audio.dac4.data = audio_info->codec_config();
break;
case kCodecFlac: case kCodecFlac:
audio.dfla.data = audio_info->codec_config(); audio.dfla.data = audio_info->codec_config();
break; break;
@ -520,6 +525,12 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
// sample description entry. Instead, two constants are used. // sample description entry. Instead, two constants are used.
audio.channelcount = 2; audio.channelcount = 2;
audio.samplesize = 16; audio.samplesize = 16;
} else if (audio_info->codec() == kCodecAC4) {
//ETSI TS 103 190-2, E.4.5 channelcount should be set to the total number of
//audio outputchannels of the default audio presentation of that track
audio.channelcount = audio_info->num_channels();
//ETSI TS 103 190-2, E.4.6 samplesize shall be set to 16.
audio.samplesize = 16;
} else { } else {
audio.channelcount = audio_info->num_channels(); audio.channelcount = audio_info->num_channels();
audio.samplesize = audio_info->sample_bits(); audio.samplesize = audio_info->sample_bits();

View File

@ -64,17 +64,33 @@ message MediaInfo {
message AudioCodecSpecificData { message AudioCodecSpecificData {
// EC3 Channel map bit fields, encoded based on ETSI TS 102 366 V1.3.1 // EC3 Channel map bit fields, encoded based on ETSI TS 102 366 V1.3.1
// Digital Audio Compression (AC-3, Enhanced AC-3) Standard E.1.3.1.8. // Digital Audio Compression (AC-3, Enhanced AC-3) Standard E.1.3.1.8.
optional uint32 ec3_channel_map = 1; // Or AC4 Channel mask bit fields, encoded based on ETSI TS 103 190-2
// V1.2.1 Digital Audio Compression (AC-4) Standard; Part 2: Immersive and
// personalized audio E.10.14.
optional uint32 channel_mask = 1;
// EC3 Channel configuration descriptor with MPEG scheme fields, // EC3 Channel configuration descriptor with MPEG scheme fields,
// encoded based on ETSI TS 102 366 V1.4.1 Digital Audio Compression // encoded based on ETSI TS 102 366 V1.4.1 Digital Audio Compression
// (AC-3, Enhanced AC-3) Standard I.1.2.1. // (AC-3, Enhanced AC-3) Standard I.1.2.1.
optional uint32 ec3_channel_mpeg_value = 2; // Or AC4 Channel configuration descriptor with MPEG scheme fields,
// encoded based on ETSI TS 103 190-2 V1.2.1 Digital Audio Compression
// (AC-4) Standard; Part 2: Immersive and personalized audio G.3.2.
optional uint32 channel_mpeg_value = 2;
// Dolby Digital Plus JOC decoding complexity fields, ETSI TS 103 420 v1.2.1 // Dolby Digital Plus JOC decoding complexity fields, ETSI TS 103 420 v1.2.1
// Backwards-compatible object audio carriage using Enhanced AC-3 Standard // Backwards-compatible object audio carriage using Enhanced AC-3 Standard
// C.3.2.3. // C.3.2.3.
optional uint32 ec3_joc_complexity = 3; optional uint32 ec3_joc_complexity = 3;
// AC4 Immersive stereo flag field, based on Dolby AC-4 in MPEG-DASH for
// Online Delivery Specification 2.5.3.
// https://developer.dolby.com/tools-media/online-delivery-kits/dolby-ac-4/
optional bool ac4_ims_flag = 4;
// AC4 Channel-based audio (CBI) flag field, encoded based on
// ETSI TS 103 190-2 Digital Audio Compression (AC-4) Standard;
// Part 2: Immersive and personalized audio 4.3.
optional bool ac4_cbi_flag = 5;
} }
message TextInfo { message TextInfo {

View File

@ -38,6 +38,7 @@ typedef MediaInfo::VideoInfo VideoInfo;
namespace { namespace {
const char kEC3Codec[] = "ec-3"; const char kEC3Codec[] = "ec-3";
const char kAC4Codec[] = "ac-4";
std::string RangeToString(const Range& range) { std::string RangeToString(const Range& range) {
return base::Uint64ToString(range.begin()) + "-" + return base::Uint64ToString(range.begin()) + "-" +
@ -464,13 +465,13 @@ bool RepresentationXmlNode::AddAudioChannelInfo(const AudioInfo& audio_info) {
// Use MPEG scheme if the mpeg value is available and valid, fallback to // Use MPEG scheme if the mpeg value is available and valid, fallback to
// EC3 channel mapping otherwise. // EC3 channel mapping otherwise.
// See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268 // See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268
const uint32_t ec3_channel_mpeg_value = codec_data.ec3_channel_mpeg_value(); const uint32_t ec3_channel_mpeg_value = codec_data.channel_mpeg_value();
const uint32_t NO_MAPPING = 0xFFFFFFFF; const uint32_t NO_MAPPING = 0xFFFFFFFF;
if (ec3_channel_mpeg_value == NO_MAPPING) { if (ec3_channel_mpeg_value == NO_MAPPING) {
// Convert EC3 channel map into string of hexadecimal digits. Spec: DASH-IF // Convert EC3 channel map into string of hexadecimal digits. Spec: DASH-IF
// Interoperability Points v3.0 9.2.1.2. // Interoperability Points v3.0 9.2.1.2.
const uint16_t ec3_channel_map = const uint16_t ec3_channel_map =
base::HostToNet16(codec_data.ec3_channel_map()); base::HostToNet16(codec_data.channel_mask());
audio_channel_config_value = audio_channel_config_value =
base::HexEncode(&ec3_channel_map, sizeof(ec3_channel_map)); base::HexEncode(&ec3_channel_map, sizeof(ec3_channel_map));
audio_channel_config_scheme = audio_channel_config_scheme =
@ -500,6 +501,42 @@ bool RepresentationXmlNode::AddAudioChannelInfo(const AudioInfo& audio_info) {
ec3_joc_complexity); ec3_joc_complexity);
} }
return ret; return ret;
} else if (audio_info.codec().substr(0, 4) == kAC4Codec) {
const auto& codec_data = audio_info.codec_specific_data();
const bool ac4_ims_flag = codec_data.ac4_ims_flag();
// Use MPEG scheme if the mpeg value is available and valid, fallback to
// AC4 channel mask otherwise.
// See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268
const uint32_t ac4_channel_mpeg_value = codec_data.channel_mpeg_value();
const uint32_t NO_MAPPING = 0xFFFFFFFF;
if (ac4_channel_mpeg_value == NO_MAPPING) {
// Calculate AC-4 channel mask. Spec: ETSI TS 103 190-2 V1.2.1 Digital
// Audio Compression (AC-4) Standard; Part 2: Immersive and personalized
// audio G.3.1.
const uint32_t ac4_channel_mask =
base::HostToNet32(codec_data.channel_mask() << 8);
audio_channel_config_value =
base::HexEncode(&ac4_channel_mask, sizeof(ac4_channel_mask) - 1);
// Note that the channel config schemes for EC-3 and AC-4 are different.
// See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268.
audio_channel_config_scheme =
"tag:dolby.com,2015:dash:audio_channel_configuration:2015";
} else {
// Calculate AC-4 channel configuration descriptor value with MPEG scheme.
// Spec: ETSI TS 103 190-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
// Part 2: Immersive and personalized audio G.3.2.
audio_channel_config_value = base::UintToString(ac4_channel_mpeg_value);
audio_channel_config_scheme = "urn:mpeg:mpegB:cicp:ChannelConfiguration";
}
bool ret = AddDescriptor("AudioChannelConfiguration",
audio_channel_config_scheme,
audio_channel_config_value);
if (ac4_ims_flag) {
ret &= AddDescriptor("SupplementalProperty",
"tag:dolby.com,2016:dash:virtualized_content:2016",
"1");
}
return ret;
} else { } else {
audio_channel_config_value = base::UintToString(audio_info.num_channels()); audio_channel_config_value = base::UintToString(audio_info.num_channels());
audio_channel_config_scheme = audio_channel_config_scheme =

View File

@ -215,8 +215,8 @@ TEST(XmlNodeTest, AddEC3AudioInfo) {
MediaInfo::AudioInfo audio_info; MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ec-3"); audio_info.set_codec("ec-3");
audio_info.set_sampling_frequency(48000); audio_info.set_sampling_frequency(48000);
audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801);
audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value( audio_info.mutable_codec_specific_data()->set_channel_mpeg_value(
0xFFFFFFFF); 0xFFFFFFFF);
RepresentationXmlNode representation; RepresentationXmlNode representation;
@ -236,8 +236,8 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGScheme) {
MediaInfo::AudioInfo audio_info; MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ec-3"); audio_info.set_codec("ec-3");
audio_info.set_sampling_frequency(48000); audio_info.set_sampling_frequency(48000);
audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801);
audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value(6); audio_info.mutable_codec_specific_data()->set_channel_mpeg_value(6);
RepresentationXmlNode representation; RepresentationXmlNode representation;
representation.AddAudioInfo(audio_info); representation.AddAudioInfo(audio_info);
@ -256,8 +256,8 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGSchemeJOC) {
MediaInfo::AudioInfo audio_info; MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ec-3"); audio_info.set_codec("ec-3");
audio_info.set_sampling_frequency(48000); audio_info.set_sampling_frequency(48000);
audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801);
audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value(6); audio_info.mutable_codec_specific_data()->set_channel_mpeg_value(6);
audio_info.mutable_codec_specific_data()->set_ec3_joc_complexity(16); audio_info.mutable_codec_specific_data()->set_ec3_joc_complexity(16);
RepresentationXmlNode representation; RepresentationXmlNode representation;
@ -281,6 +281,79 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGSchemeJOC) {
"</Representation>\n")); "</Representation>\n"));
} }
TEST(XmlNodeTest, AddAC4AudioInfo) {
MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ac-4.02.01.02");
audio_info.set_sampling_frequency(48000);
auto* codec_data = audio_info.mutable_codec_specific_data();
codec_data->set_channel_mpeg_value(0xFFFFFFFF);
codec_data->set_channel_mask(0x0000C7);
codec_data->set_ac4_ims_flag(false);
codec_data->set_ac4_cbi_flag(false);
RepresentationXmlNode representation;
representation.AddAudioInfo(audio_info);
EXPECT_THAT(
representation.GetRawPtr(),
XmlNodeEqual(
"<Representation audioSamplingRate=\"48000\">\n"
" <AudioChannelConfiguration\n"
" schemeIdUri=\n"
" \"tag:dolby.com,2015:dash:audio_channel_configuration:2015\"\n"
" value=\"0000C7\"/>\n"
"</Representation>\n"));
}
TEST(XmlNodeTest, AddAC4AudioInfoMPEGScheme) {
MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ac-4.02.01.00");
audio_info.set_sampling_frequency(48000);
auto* codec_data = audio_info.mutable_codec_specific_data();
codec_data->set_channel_mpeg_value(2);
codec_data->set_channel_mask(0x000001);
codec_data->set_ac4_ims_flag(false);
codec_data->set_ac4_cbi_flag(false);
RepresentationXmlNode representation;
representation.AddAudioInfo(audio_info);
EXPECT_THAT(
representation.GetRawPtr(),
XmlNodeEqual(
"<Representation audioSamplingRate=\"48000\">\n"
" <AudioChannelConfiguration\n"
" schemeIdUri=\n"
" \"urn:mpeg:mpegB:cicp:ChannelConfiguration\"\n"
" value=\"2\"/>\n"
"</Representation>\n"));
}
TEST(XmlNodeTest, AddAC4AudioInfoMPEGSchemeIMS) {
MediaInfo::AudioInfo audio_info;
audio_info.set_codec("ac-4.02.02.00");
audio_info.set_sampling_frequency(48000);
auto* codec_data = audio_info.mutable_codec_specific_data();
codec_data->set_channel_mpeg_value(2);
codec_data->set_channel_mask(0x000001);
codec_data->set_ac4_ims_flag(true);
codec_data->set_ac4_cbi_flag(false);
RepresentationXmlNode representation;
representation.AddAudioInfo(audio_info);
EXPECT_THAT(
representation.GetRawPtr(),
XmlNodeEqual(
"<Representation audioSamplingRate=\"48000\">\n"
" <AudioChannelConfiguration\n"
" schemeIdUri=\n"
" \"urn:mpeg:mpegB:cicp:ChannelConfiguration\"\n"
" value=\"2\"/>\n"
" <SupplementalProperty\n"
" schemeIdUri=\n"
" \"tag:dolby.com,2016:dash:virtualized_content:2016\"\n"
" value=\"1\"/>\n"
"</Representation>\n"));
}
class LiveSegmentTimelineTest : public ::testing::Test { class LiveSegmentTimelineTest : public ::testing::Test {
protected: protected:
void SetUp() override { void SetUp() override {