diff --git a/README.md b/README.md index b6c30e9697..a08350f731 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Shaka Packager supports: | MP3 | O | - | I / O | - | O | | Dolby AC3 | I / O | - | I / O | - | O | | Dolby EAC3 | I / O | - | O | - | O | + | Dolby AC4 | I / O | - | - | - | - | | DTS | I / O | - | - | - | - | | FLAC | I / O | - | - | - | - | | Opus | I / O³ | I / O | - | - | - | diff --git a/packager/hls/base/master_playlist.cc b/packager/hls/base/master_playlist.cc index 9e23fc2b9a..a3b061a69a 100644 --- a/packager/hls/base/master_playlist.cc +++ b/packager/hls/base/master_playlist.cc @@ -42,7 +42,7 @@ struct Variant { const std::string* audio_group_id = nullptr; const std::string* text_group_id = nullptr; // The bitrates should be the sum of audio bitrate and text bitrate. - // However, given the contraints and assumptions, it makes sense to exclude + // However, given the constraints and assumptions, it makes sense to exclude // text bitrate out of the calculation: // - Text streams usually have a very small negligible bitrate. // - Text does not have constant bitrates. To avoid fluctuation, an arbitrary @@ -260,7 +260,7 @@ void BuildMediaTag(const MediaPlaylist& playlist, bool is_autoselect, const std::string& base_url, std::string* out) { - // Tag attribures should follow the order as defined in + // Tag attributes should follow the order as defined in // https://tools.ietf.org/html/draft-pantos-http-live-streaming-23#section-3.5 Tag tag("#EXT-X-MEDIA", out); @@ -308,20 +308,27 @@ void BuildMediaTag(const MediaPlaylist& playlist, const MediaPlaylist::MediaPlaylistStreamType kAudio = MediaPlaylist::MediaPlaylistStreamType::kAudio; if (playlist.stream_type() == kAudio) { - // According to HLS spec: - // https://tools.ietf.org/html/draft-pantos-hls-rfc8216bis 4.4.6.1. - // CHANNELS is a quoted-string that specifies an ordered, - // slash-separated ("/") list of parameters. The first parameter is a count - // of audio channels, and the second parameter identifies the encoding of - // object-based audio used by the Rendition. HLS Authoring Specification - // for Apple Devices Appendices documents how to handle Dolby Digital Plus - // JOC content. - // https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices/hls_authoring_specification_for_apple_devices_appendices if (playlist.GetEC3JocComplexity() != 0) { + // HLS Authoring Specification for Apple Devices Appendices documents how + // to handle Dolby Digital Plus JOC content. + // https://developer.apple.com/documentation/http_live_streaming/hls_authoring_specification_for_apple_devices/hls_authoring_specification_for_apple_devices_appendices std::string channel_string = std::to_string(playlist.GetEC3JocComplexity()) + "/JOC"; tag.AddQuotedString("CHANNELS", channel_string); + } else if (playlist.GetAC4ImsFlag() || playlist.GetAC4CbiFlag()) { + // Dolby has qualified using IMSA to present AC4 immersive audio (IMS and + // CBI without object-based audio) for Dolby internal use only. IMSA is + // not included in any publicly-available specifications as of June, 2020. + std::string channel_string = + std::to_string(playlist.GetNumChannels()) + "/IMSA"; + tag.AddQuotedString("CHANNELS", channel_string); } else { + // According to HLS spec: + // https://tools.ietf.org/html/draft-pantos-hls-rfc8216bis 4.4.6.1. + // CHANNELS is a quoted-string that specifies an ordered, + // slash-separated ("/") list of parameters. The first parameter is a + // count of audio channels, and the second parameter identifies the + // encoding of object-based audio used by the Rendition. std::string channel_string = std::to_string(playlist.GetNumChannels()); tag.AddQuotedString("CHANNELS", channel_string); } diff --git a/packager/hls/base/master_playlist_unittest.cc b/packager/hls/base/master_playlist_unittest.cc index bce023a60b..9f6f4ab45d 100644 --- a/packager/hls/base/master_playlist_unittest.cc +++ b/packager/hls/base/master_playlist_unittest.cc @@ -34,6 +34,8 @@ const uint32_t kWidth = 800; const uint32_t kHeight = 600; const uint32_t kEC3JocComplexityZero = 0; const uint32_t kEC3JocComplexity = 16; +const bool kAC4IMSFlagEnabled = true; +const bool kAC4CBIFlagEnabled = true; std::unique_ptr CreateVideoPlaylist( const std::string& filename, @@ -84,13 +86,17 @@ std::unique_ptr CreateAudioPlaylist( uint64_t channels, uint64_t max_bitrate, uint64_t avg_bitrate, - uint64_t ec3_joc_complexity) { + uint64_t ec3_joc_complexity, + bool ac4_ims_flag, + bool ac4_cbi_flag) { std::unique_ptr playlist( new MockMediaPlaylist(filename, name, group)); EXPECT_CALL(*playlist, GetNumChannels()).WillRepeatedly(Return(channels)); EXPECT_CALL(*playlist, GetEC3JocComplexity()) .WillRepeatedly(Return(ec3_joc_complexity)); + EXPECT_CALL(*playlist, GetAC4ImsFlag()).WillRepeatedly(Return(ac4_ims_flag)); + EXPECT_CALL(*playlist, GetAC4CbiFlag()).WillRepeatedly(Return(ac4_cbi_flag)); playlist->SetStreamTypeForTesting( MediaPlaylist::MediaPlaylistStreamType::kAudio); @@ -251,12 +257,14 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistVideoAndAudio) { // First audio, english.m3u8. std::unique_ptr english_playlist = CreateAudioPlaylist( "eng.m3u8", "english", "audiogroup", "audiocodec", "en", kAudio1Channels, - kAudio1MaxBitrate, kAudio1AvgBitrate, kEC3JocComplexityZero); + kAudio1MaxBitrate, kAudio1AvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); // Second audio, spanish.m3u8. std::unique_ptr spanish_playlist = CreateAudioPlaylist( "spa.m3u8", "espanol", "audiogroup", "audiocodec", "es", kAudio2Channels, - kAudio2MaxBitrate, kAudio2AvgBitrate, kEC3JocComplexityZero); + kAudio2MaxBitrate, kAudio2AvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); const char kBaseUrl[] = "http://playlists.org/"; EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( @@ -311,13 +319,13 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistMultipleAudioGroups) { std::unique_ptr eng_lo_playlist = CreateAudioPlaylist( "eng_lo.m3u8", "english_lo", "audio_lo", "audiocodec_lo", "en", kAudio1Channels, kAudio1MaxBitrate, kAudio1AvgBitrate, - kEC3JocComplexityZero); + kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); // Second audio, eng_hi.m3u8. std::unique_ptr eng_hi_playlist = CreateAudioPlaylist( "eng_hi.m3u8", "english_hi", "audio_hi", "audiocodec_hi", "en", kAudio2Channels, kAudio2MaxBitrate, kAudio2AvgBitrate, - kEC3JocComplexityZero); + kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); const char kBaseUrl[] = "http://anydomain.com/"; EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( @@ -360,11 +368,11 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistSameAudioGroupSameLanguage) { // First audio, eng_lo.m3u8. std::unique_ptr eng_lo_playlist = CreateAudioPlaylist( "eng_lo.m3u8", "english", "audio", "audiocodec", "en", 1, 50000, 40000, - kEC3JocComplexityZero); + kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); std::unique_ptr eng_hi_playlist = CreateAudioPlaylist( "eng_hi.m3u8", "english", "audio", "audiocodec", "en", 8, 100000, 80000, - kEC3JocComplexityZero); + kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); const char kBaseUrl[] = "http://anydomain.com/"; EXPECT_TRUE(master_playlist_.WriteMasterPlaylist( @@ -531,7 +539,7 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistVideoAndAudioAndText) { // Audio, english.m3u8. std::unique_ptr audio = CreateAudioPlaylist( "eng.m3u8", "english", "audiogroup", "audiocodec", "en", 2, 50000, 30000, - kEC3JocComplexityZero); + kEC3JocComplexityZero, !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled); // Text, english.m3u8. std::unique_ptr text = @@ -578,10 +586,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistMixedPlaylistsDifferentGroups) { // AUDIO CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", "audiocodec", "en", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexityZero), + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", "audiocodec", "fr", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexityZero), + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), // SUBTITLES CreateTextPlaylist("text-1.m3u8", "text 1", "text-group-1", "textcodec", @@ -689,10 +699,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnly) { // AUDIO CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", "audiocodec", "en", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexityZero), + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", "audiocodec", "fr", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexityZero), + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), }; // Add all the media playlists to the master playlist. @@ -739,10 +751,12 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyJOC) { // AUDIO CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", "audiocodec", "en", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexityZero), + kAudioAvgBitrate, kEC3JocComplexityZero, !kAC4IMSFlagEnabled, + !kAC4CBIFlagEnabled), CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", "audiocodec", "en", kAudioChannels, kAudioMaxBitrate, - kAudioAvgBitrate, kEC3JocComplexity), + kAudioAvgBitrate, kEC3JocComplexity, !kAC4IMSFlagEnabled, + !kAC4CBIFlagEnabled), }; // Add all the media playlists to the master playlist. @@ -779,5 +793,111 @@ TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyJOC) { ASSERT_EQ(expected, actual); } + +TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyAC4IMS) { + const uint64_t kAudioChannels = 2; + const uint64_t kAudioMaxBitrate = 50000; + const uint64_t kAudioAvgBitrate = 30000; + + std::unique_ptr media_playlists[] = { + // AUDIO + CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", + "audio1codec", "en", kAudioChannels, kAudioMaxBitrate, + kAudioAvgBitrate, kEC3JocComplexityZero, + kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), + CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", + "audio2codec", "en", kAudioChannels, kAudioMaxBitrate, + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), + }; + + // Add all the media playlists to the master playlist. + std::list media_playlist_list; + for (const auto& media_playlist : media_playlists) { + media_playlist_list.push_back(media_playlist.get()); + } + + const char kBaseUrl[] = "http://playlists.org/"; + EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(kBaseUrl, test_output_dir_, + media_playlist_list)); + + std::string actual; + ASSERT_TRUE(File::ReadFileToString(master_playlist_path_.c_str(), &actual)); + + const std::string expected = + "#EXTM3U\n" + "## Generated with https://github.com/google/shaka-packager version " + "test\n" + "\n" + "#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-1.m3u8\"," + "GROUP-ID=\"audio-group-1\",LANGUAGE=\"en\",NAME=\"audio 1\"," + "DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"2/IMSA\"\n" + "#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-2.m3u8\"," + "GROUP-ID=\"audio-group-2\",LANGUAGE=\"en\",NAME=\"audio 2\"," + "DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"2\"\n" + "\n" + "#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000," + "CODECS=\"audio1codec\",AUDIO=\"audio-group-1\"\n" + "http://playlists.org/audio-1.m3u8\n" + "#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000," + "CODECS=\"audio2codec\",AUDIO=\"audio-group-2\"\n" + "http://playlists.org/audio-2.m3u8\n"; + + ASSERT_EQ(expected, actual); +} + +TEST_F(MasterPlaylistTest, WriteMasterPlaylistAudioOnlyAC4CBI) { + const uint64_t kAudio1Channels = 6; + const uint64_t kAudio2Channels = 8; + const uint64_t kAudioMaxBitrate = 50000; + const uint64_t kAudioAvgBitrate = 30000; + + std::unique_ptr media_playlists[] = { + // AUDIO + CreateAudioPlaylist("audio-1.m3u8", "audio 1", "audio-group-1", + "audiocodec", "en", kAudio1Channels, kAudioMaxBitrate, + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, !kAC4CBIFlagEnabled), + CreateAudioPlaylist("audio-2.m3u8", "audio 2", "audio-group-2", + "audiocodec", "en", kAudio2Channels, kAudioMaxBitrate, + kAudioAvgBitrate, kEC3JocComplexityZero, + !kAC4IMSFlagEnabled, kAC4CBIFlagEnabled), + }; + + // Add all the media playlists to the master playlist. + std::list media_playlist_list; + for (const auto& media_playlist : media_playlists) { + media_playlist_list.push_back(media_playlist.get()); + } + + const char kBaseUrl[] = "http://playlists.org/"; + EXPECT_TRUE(master_playlist_.WriteMasterPlaylist(kBaseUrl, test_output_dir_, + media_playlist_list)); + + std::string actual; + ASSERT_TRUE(File::ReadFileToString(master_playlist_path_.c_str(), &actual)); + + const std::string expected = + "#EXTM3U\n" + "## Generated with https://github.com/google/shaka-packager version " + "test\n" + "\n" + "#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-1.m3u8\"," + "GROUP-ID=\"audio-group-1\",LANGUAGE=\"en\",NAME=\"audio 1\"," + "DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"6\"\n" + "#EXT-X-MEDIA:TYPE=AUDIO,URI=\"http://playlists.org/audio-2.m3u8\"," + "GROUP-ID=\"audio-group-2\",LANGUAGE=\"en\",NAME=\"audio 2\"," + "DEFAULT=YES,AUTOSELECT=YES,CHANNELS=\"8/IMSA\"\n" + "\n" + "#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000," + "CODECS=\"audiocodec\",AUDIO=\"audio-group-1\"\n" + "http://playlists.org/audio-1.m3u8\n" + "#EXT-X-STREAM-INF:BANDWIDTH=50000,AVERAGE-BANDWIDTH=30000," + "CODECS=\"audiocodec\",AUDIO=\"audio-group-2\"\n" + "http://playlists.org/audio-2.m3u8\n"; + + ASSERT_EQ(expected, actual); +} + } // namespace hls } // namespace shaka diff --git a/packager/hls/base/media_playlist.cc b/packager/hls/base/media_playlist.cc index 450d1a75a6..9bc585b851 100644 --- a/packager/hls/base/media_playlist.cc +++ b/packager/hls/base/media_playlist.cc @@ -524,6 +524,14 @@ int MediaPlaylist::GetEC3JocComplexity() const { return media_info_.audio_info().codec_specific_data().ec3_joc_complexity(); } +bool MediaPlaylist::GetAC4ImsFlag() const { + return media_info_.audio_info().codec_specific_data().ac4_ims_flag(); +} + +bool MediaPlaylist::GetAC4CbiFlag() const { + return media_info_.audio_info().codec_specific_data().ac4_cbi_flag(); +} + bool MediaPlaylist::GetDisplayResolution(uint32_t* width, uint32_t* height) const { DCHECK(width); diff --git a/packager/hls/base/media_playlist.h b/packager/hls/base/media_playlist.h index e3a1aa96c7..2b419b99c2 100644 --- a/packager/hls/base/media_playlist.h +++ b/packager/hls/base/media_playlist.h @@ -152,7 +152,7 @@ class MediaPlaylist { /// Write the playlist to |file_path|. /// This does not close the file. - /// If target duration is not set expliticly, this will try to find the target + /// If target duration is not set explicitly, this will try to find the target /// duration. Note that target duration cannot be changed. So calling this /// without explicitly setting the target duration and before adding any /// segments will end up setting the target duration to 0 and will always @@ -193,6 +193,16 @@ class MediaPlaylist { /// Standard C.3.2.3. virtual int GetEC3JocComplexity() const; + /// @return true if it's an AC-4 IMS stream, based on Dolby AC-4 in MPEG-DASH + /// for Online Delivery Specification 2.5.3. + /// https://developer.dolby.com/tools-media/online-delivery-kits/dolby-ac-4/ + virtual bool GetAC4ImsFlag() const; + + /// @return true if it's an AC-4 CBI stream, based on ETSI TS 103 190-2 + /// Digital Audio Compression (AC-4) Standard; Part 2: Immersive and + /// personalized audio 4.3. + virtual bool GetAC4CbiFlag() const; + /// @return true if |width| and |height| have been set with a valid /// resolution values. virtual bool GetDisplayResolution(uint32_t* width, uint32_t* height) const; diff --git a/packager/hls/base/media_playlist_unittest.cc b/packager/hls/base/media_playlist_unittest.cc index 68806b8e83..1dce438a4f 100644 --- a/packager/hls/base/media_playlist_unittest.cc +++ b/packager/hls/base/media_playlist_unittest.cc @@ -504,6 +504,42 @@ TEST_F(MediaPlaylistMultiSegmentTest, GetEC3JocComplexity) { EXPECT_EQ(6, media_playlist_->GetEC3JocComplexity()); } +TEST_F(MediaPlaylistMultiSegmentTest, GetAC4ImsFlag) { + MediaInfo media_info; + media_info.set_reference_time_scale(kTimeScale); + + // Returns false by default if not audio. + EXPECT_EQ(false, media_playlist_->GetAC4ImsFlag()); + + media_info.mutable_audio_info()->mutable_codec_specific_data()-> + set_ac4_ims_flag(false); + ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info)); + EXPECT_EQ(false, media_playlist_->GetAC4ImsFlag()); + + media_info.mutable_audio_info()->mutable_codec_specific_data()-> + set_ac4_ims_flag(true); + ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info)); + EXPECT_EQ(true, media_playlist_->GetAC4ImsFlag()); +} + +TEST_F(MediaPlaylistMultiSegmentTest, GetAC4CbiFlag) { + MediaInfo media_info; + media_info.set_reference_time_scale(kTimeScale); + + // Returns false by default if not audio. + EXPECT_EQ(false, media_playlist_->GetAC4CbiFlag()); + + media_info.mutable_audio_info()->mutable_codec_specific_data()-> + set_ac4_cbi_flag(false); + ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info)); + EXPECT_EQ(false, media_playlist_->GetAC4CbiFlag()); + + media_info.mutable_audio_info()->mutable_codec_specific_data()-> + set_ac4_cbi_flag(true); + ASSERT_TRUE(media_playlist_->SetMediaInfo(media_info)); + EXPECT_EQ(true, media_playlist_->GetAC4CbiFlag()); +} + TEST_F(MediaPlaylistMultiSegmentTest, Characteristics) { MediaInfo media_info; media_info.set_reference_time_scale(kTimeScale); diff --git a/packager/hls/base/mock_media_playlist.h b/packager/hls/base/mock_media_playlist.h index e757271a3b..11a350842a 100644 --- a/packager/hls/base/mock_media_playlist.h +++ b/packager/hls/base/mock_media_playlist.h @@ -49,6 +49,8 @@ class MockMediaPlaylist : public MediaPlaylist { MOCK_METHOD1(SetTargetDuration, void(uint32_t target_duration)); MOCK_CONST_METHOD0(GetNumChannels, int()); MOCK_CONST_METHOD0(GetEC3JocComplexity, int()); + MOCK_CONST_METHOD0(GetAC4ImsFlag, bool()); + MOCK_CONST_METHOD0(GetAC4CbiFlag, bool()); MOCK_CONST_METHOD2(GetDisplayResolution, bool(uint32_t* width, uint32_t* height)); MOCK_CONST_METHOD0(GetFrameRate, double()); diff --git a/packager/media/base/audio_stream_info.cc b/packager/media/base/audio_stream_info.cc index 776bf92d2b..349ebd3f14 100644 --- a/packager/media/base/audio_stream_info.cc +++ b/packager/media/base/audio_stream_info.cc @@ -37,6 +37,8 @@ std::string AudioCodecToString(Codec codec) { return "DTS+"; case kCodecEAC3: return "EAC3"; + case kCodecAC4: + return "AC4"; case kCodecFlac: return "FLAC"; case kCodecOpus: @@ -121,6 +123,14 @@ std::string AudioStreamInfo::GetCodecString(Codec codec, return "dts+"; case kCodecEAC3: return "ec-3"; + case kCodecAC4: + // ETSI TS 103 190-2 Digital Audio Compression (AC-4) Standard; Part 2: + // Immersive and personalized audio E.13. audio_object_type is composed of + // bitstream_version (3bits), presentation_version (2bits) and + // mdcompat (3bits). + return base::StringPrintf( + "ac-4.%02d.%02d.%02d", (audio_object_type & 0xE0) >> 5, + (audio_object_type & 0x18) >> 3, audio_object_type & 0x7); case kCodecFlac: return "flac"; case kCodecOpus: diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 1dba254afb..0b6b29b71c 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -20,6 +20,7 @@ enum FourCC : uint32_t { FOURCC_aacd = 0x61616364, FOURCC_ac_3 = 0x61632d33, // "ac-3" + FOURCC_ac_4 = 0x61632d34, // "ac-4" FOURCC_ac3d = 0x61633364, FOURCC_apad = 0x61706164, FOURCC_av01 = 0x61763031, @@ -41,6 +42,7 @@ enum FourCC : uint32_t { FOURCC_ctts = 0x63747473, FOURCC_dOps = 0x644f7073, FOURCC_dac3 = 0x64616333, + FOURCC_dac4 = 0x64616334, FOURCC_dash = 0x64617368, FOURCC_ddts = 0x64647473, FOURCC_dec3 = 0x64656333, diff --git a/packager/media/base/stream_info.h b/packager/media/base/stream_info.h index 47872a9f34..dd07b5038a 100644 --- a/packager/media/base/stream_info.h +++ b/packager/media/base/stream_info.h @@ -40,6 +40,7 @@ enum Codec { kCodecAudio = 200, kCodecAAC = kCodecAudio, kCodecAC3, + kCodecAC4, // TODO(kqyang): Use kCodecDTS and a kDtsStreamFormat for the various DTS // streams. kCodecDTSC, diff --git a/packager/media/codecs/ac4_audio_util.cc b/packager/media/codecs/ac4_audio_util.cc new file mode 100644 index 0000000000..75c428a8c0 --- /dev/null +++ b/packager/media/codecs/ac4_audio_util.cc @@ -0,0 +1,528 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/codecs/ac4_audio_util.h" + +#include "packager/base/macros.h" +#include "packager/base/strings/string_number_conversions.h" +#include "packager/media/base/bit_reader.h" +#include "packager/media/base/rcheck.h" + +namespace shaka { +namespace media { + +namespace { + +// Speaker group index +// Bit, Location +// 0(LSB), Left/Right pair +// 1, Centre +// 2, Left surround/Right surround pair +// 3, Left back/Right back pair +// 4, Top front left/Top front right pair +// 5, Top back left/Top back right pair +// 6, LFE +// 7, Top left/Top right pair +// 8, Top side left/Top side right pair +// 9, Top front centre +// 10, Top back centre +// 11, Top centre +// 12, LFE2 +// 13, Bottom front left/Bottom front right pair +// 14, Bottom front centre +// 15, Back centre +// 16, Left screen/Right screen pair +// 17, Left wide/Right wide pair +// 18, Vertical height left/Vertical height right pair +enum kAC4AudioChannelGroupIndex { + kLRPair = 0x1, + kCentre = 0x2, + kLsRsPair = 0x4, + kLbRbPair = 0x8, + kTflTfrPair = 0x10, + kTblTbrPair = 0x20, + kLFE = 0x40, + kTlTrPair = 0x80, + kTslTsrPair = 0x100, + kTopfrontCentre = 0x200, + kTopbackCentre = 0x400, + kTopCentre = 0x800, + kLFE2 = 0x1000, + kBflBfrPair = 0x2000, + kBottomFrontCentre = 0x4000, + kBackCentre = 0x8000, + kLscrRscrPair = 0x10000, + kLwRw = 0x20000, + kVhlVhrPair = 0x40000, +}; + +// Mapping of channel configurations to the MPEG audio value based on ETSI TS +// 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard; +// Part 2: Immersive and personalized Table G.1 +uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) { + uint32_t ret = 0; + + switch (channel_mask) { + case kCentre: + ret = 1; + break; + case kLRPair: + ret = 2; + break; + case kCentre | kLRPair: + ret = 3; + break; + case kCentre | kLRPair | kBackCentre: + ret = 4; + break; + case kCentre | kLRPair | kLsRsPair: + ret = 5; + break; + case kCentre | kLRPair | kLsRsPair | kLFE: + ret = 6; + break; + case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw: + ret = 7; + break; + case kBackCentre | kLRPair: + ret = 9; + break; + case kLRPair | kLsRsPair: + ret = 10; + break; + case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre: + ret = 11; + break; + case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE: + ret = 12; + break; + case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 | + kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE | + kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair: + case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair| + kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | + kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair: + ret = 13; + break; + case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair: + case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair: + ret = 14; + break; + case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair | + kLsRsPair | kLbRbPair: + case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair | + kLsRsPair | kLbRbPair: + ret = 15; + break; + case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair: + case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair: + ret = 16; + break; + case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair | + kLsRsPair | kCentre | kLRPair: + case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | + kLsRsPair | kCentre | kLRPair: + ret = 17; + break; + case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair | + kCentre | kLRPair | kLsRsPair | kLbRbPair: + case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | + kCentre | kLRPair | kLsRsPair | kLbRbPair: + ret = 18; + break; + case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair | + kLbRbPair: + case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair | + kLbRbPair: + ret = 19; + break; + case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | + kLsRsPair | kLbRbPair: + case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair | + kLsRsPair | kLbRbPair: + ret = 20; + break; + default: + ret = 0xFFFFFFFF; + } + return ret; +} + +// Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio +// Compression (AC-4) Standard; Part 2: Immersive and personalized E.11. +bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) { + bool b_substream_present; + RCHECK(bit_reader.ReadBits(1, &b_substream_present)); + bool b_hsf_ext; + RCHECK(bit_reader.ReadBits(1, &b_hsf_ext)); + bool b_channel_coded; + RCHECK(bit_reader.ReadBits(1, &b_channel_coded)); + uint8_t n_substreams; + RCHECK(bit_reader.ReadBits(8, &n_substreams)); + for (uint8_t i = 0; i < n_substreams; i++) { + RCHECK(bit_reader.SkipBits(2)); + bool b_substream_bitrate_indicator; + RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator)); + if (b_substream_bitrate_indicator) { + RCHECK(bit_reader.SkipBits(5)); + } + if (b_channel_coded) { + RCHECK(bit_reader.SkipBits(24)); + } else { + bool b_ajoc; + RCHECK(bit_reader.ReadBits(1, &b_ajoc)); + if (b_ajoc) { + bool b_static_dmx; + RCHECK(bit_reader.ReadBits(1, &b_static_dmx)); + if (!b_static_dmx) { + RCHECK(bit_reader.SkipBits(4)); + } + RCHECK(bit_reader.SkipBits(6)); + } + RCHECK(bit_reader.SkipBits(4)); + } + } + bool b_content_type; + RCHECK(bit_reader.ReadBits(1, &b_content_type)); + if (b_content_type) { + RCHECK(bit_reader.SkipBits(3)); + bool b_language_indicator; + RCHECK(bit_reader.ReadBits(1, &b_language_indicator)); + if (b_language_indicator) { + uint8_t n_language_tag_bytes; + RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes)); + RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8)); + } + } + return true; +} + +// Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio +// Compression (AC-4) Standard;Part 2: Immersive and personalized E.10. +bool ParseAC4PresentationV1Dsi(BitReader& bit_reader, + uint32_t pres_bytes, + uint8_t* mdcompat, + uint32_t* presentation_channel_mask_v1, + bool* dolby_cbi_indicator, + uint8_t* dolby_atmos_indicator) { + bool ret = true; + // Record the initial offset. + const size_t presentation_start = bit_reader.bit_position(); + uint8_t presentation_config_v1; + RCHECK(bit_reader.ReadBits(5, &presentation_config_v1)); + uint8_t b_add_emdf_substreams; + // set default value (stereo content) for output parameters. + *mdcompat = 0; + *presentation_channel_mask_v1 = 2; + *dolby_cbi_indicator = false; + *dolby_atmos_indicator = 0; + if (presentation_config_v1 == 0x06) { + b_add_emdf_substreams = 1; + } else { + RCHECK(bit_reader.ReadBits(3, mdcompat)); + bool b_presentation_id; + RCHECK(bit_reader.ReadBits(1, &b_presentation_id)); + if (b_presentation_id) { + RCHECK(bit_reader.SkipBits(5)); + } + RCHECK(bit_reader.SkipBits(19)); + bool b_presentation_channel_coded; + RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded)); + *presentation_channel_mask_v1 = 0; + if (b_presentation_channel_coded) { + uint8_t dsi_presentation_ch_mode; + RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode)); + if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) { + RCHECK(bit_reader.SkipBits(1)); + uint8_t pres_top_channel_pairs; + RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs)); + if (pres_top_channel_pairs) { + *dolby_cbi_indicator = true; + } + } else if (dsi_presentation_ch_mode == 15) { + *dolby_cbi_indicator = true; + } + RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1)); + } + bool b_presentation_core_differs; + RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs)); + if (b_presentation_core_differs) { + bool b_presentation_core_channel_coded; + RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded)); + if (b_presentation_core_channel_coded) { + RCHECK(bit_reader.SkipBits(2)); + } + } + bool b_presentation_filter; + RCHECK(bit_reader.ReadBits(1, &b_presentation_filter)); + if (b_presentation_filter) { + RCHECK(bit_reader.SkipBits(1)); + uint8_t n_filter_bytes; + RCHECK(bit_reader.ReadBits(8, &n_filter_bytes)); + RCHECK(bit_reader.SkipBits(n_filter_bytes * 8)); + } + if (presentation_config_v1 == 0x1f) { + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + } else { + RCHECK(bit_reader.SkipBits(1)); + if (presentation_config_v1 == 0 || + presentation_config_v1 == 1 || + presentation_config_v1 == 2) { + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + } + if (presentation_config_v1 == 3 || presentation_config_v1 == 4) { + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + } + if (presentation_config_v1 == 5) { + uint8_t n_substream_groups_minus2; + RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2)); + for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) { + ret &= ParseAC4SubStreamGroupDsi(bit_reader); + } + } + if (presentation_config_v1 > 5) { + uint8_t n_skip_bytes; + RCHECK(bit_reader.ReadBits(7, &n_skip_bytes)); + RCHECK(bit_reader.SkipBits(n_skip_bytes * 8)); + } + } + RCHECK(bit_reader.SkipBits(1)); + RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams)); + } + if (b_add_emdf_substreams) { + uint8_t n_add_emdf_substreams; + RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams)); + RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15)); + } + bool b_presentation_bitrate_info; + RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info)); + if (b_presentation_bitrate_info) { + // Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1 + RCHECK(bit_reader.SkipBits(66)); + } + bool b_alternative; + RCHECK(bit_reader.ReadBits(1, &b_alternative)); + if (b_alternative) { + bit_reader.SkipToNextByte(); + // Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12 + uint16_t name_len; + RCHECK(bit_reader.ReadBits(16, &name_len)); + RCHECK(bit_reader.SkipBits(name_len * 8)); + uint8_t n_targets; + RCHECK(bit_reader.ReadBits(5, &n_targets)); + RCHECK(bit_reader.SkipBits(n_targets * 11)); + } + bit_reader.SkipToNextByte(); + if ((bit_reader.bit_position() - presentation_start) <= + (pres_bytes - 1) * 8) { + RCHECK(bit_reader.SkipBits(1)); + RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator)); + RCHECK(bit_reader.SkipBits(4)); + bool b_extended_presentation_group_index; + RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index)); + if (b_extended_presentation_group_index) { + RCHECK(bit_reader.SkipBits(9)); + } else { + RCHECK(bit_reader.SkipBits(1)); + } + } + return ret; +} + +bool ExtractAc4Data(const std::vector& ac4_data, + uint8_t* bitstream_version, + uint8_t* presentation_version, + uint8_t* mdcompat, + uint32_t* presentation_channel_mask_v1, + bool* dolby_ims_indicator, + bool* dolby_cbi_indicator) { + BitReader bit_reader(ac4_data.data(), ac4_data.size()); + + uint16_t n_presentation; + RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version)); + RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation)); + + if (*bitstream_version == 2) { + uint8_t b_program_id = 0; + RCHECK(bit_reader.ReadBits(1, &b_program_id)); + if (b_program_id) { + RCHECK(bit_reader.SkipBits(16)); + uint8_t b_uuid = 0; + RCHECK(bit_reader.ReadBits(1, &b_uuid)); + if (b_uuid) { + RCHECK(bit_reader.SkipBits(16 * 8)); + } + } + } else if (*bitstream_version == 0 || *bitstream_version == 1) { + LOG(WARNING) << "Bitstream version 0 or 1 is not supported"; + return false; + } else { + LOG(WARNING) << "Invalid Bitstream version"; + return false; + } + + RCHECK(bit_reader.SkipBits(66)); + bit_reader.SkipToNextByte(); + + // AC4 stream containing the single presentation is valid for OTT only. + // IMS has two presentations, and the 2nd is legacy (duplicated) presentation. + // So it can be considered as AC4 stream with single presentation. And IMS + // presentation must be prior to legacy presentation. + // In other word, only the 1st presentation in AC4 stream need to be parsed. + const uint8_t ott_n_presentation = 1; + for (uint8_t i = 0; i < ott_n_presentation; i++) { + RCHECK(bit_reader.ReadBits(8, presentation_version)); + // *presentation_version == 2 means IMS presentation. + if ((*presentation_version == 2 && n_presentation > 2) || + (*presentation_version == 1 && n_presentation > 1) ) { + LOG(WARNING) << "Seeing multiple presentations, only single presentation " + << "(including IMS presentation) is supported"; + return false; + } + uint32_t pres_bytes; + RCHECK(bit_reader.ReadBits(8, &pres_bytes)); + if (pres_bytes == 255) { + uint32_t add_pres_bytes; + RCHECK(bit_reader.ReadBits(16, &add_pres_bytes)); + pres_bytes += add_pres_bytes; + } + + size_t presentation_bits = 0; + *dolby_ims_indicator = false; + if (*presentation_version == 0) { + LOG(WARNING) << "Presentation version 0 is not supported"; + return false; + } else { + if (*presentation_version == 1 || *presentation_version == 2) { + if (*presentation_version == 2) { + *dolby_ims_indicator = true; + } + const size_t presentation_start = bit_reader.bit_position(); + // dolby_atmos_indicator is extended in Dolby internal specs. + // It indicates whether the source content before encoding is Atmos. + // No final decision about how to use it in OTT. + // Parse it for the future usage. + uint8_t dolby_atmos_indicator; + if (!ParseAC4PresentationV1Dsi(bit_reader, pres_bytes, mdcompat, + presentation_channel_mask_v1, + dolby_cbi_indicator, + &dolby_atmos_indicator)) { + return false; + } + const size_t presentation_end = bit_reader.bit_position(); + presentation_bits = presentation_end - presentation_start; + } else { + LOG(WARNING) << "Invalid Presentation version"; + return false; + } + } + size_t skip_bits = pres_bytes * 8 - presentation_bits; + RCHECK(bit_reader.SkipBits(skip_bits)); + } + return true; +} +} // namespace + +bool CalculateAC4ChannelMask(const std::vector& ac4_data, + uint32_t* ac4_channel_mask) { + uint8_t bitstream_version; + uint8_t presentation_version; + uint8_t mdcompat; + uint32_t pre_channel_mask; + bool dolby_ims_indicator; + bool dolby_cbi_indicator; + + if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version, + &mdcompat, &pre_channel_mask, &dolby_ims_indicator, + &dolby_cbi_indicator)) { + LOG(WARNING) << "Seeing invalid AC4 data: " + << base::HexEncode(ac4_data.data(), ac4_data.size()); + return false; + } + + if (pre_channel_mask) { + *ac4_channel_mask = pre_channel_mask; + } else { + *ac4_channel_mask = 0x800000; + } + return true; +} + +bool CalculateAC4ChannelMPEGValue(const std::vector& ac4_data, + uint32_t* ac4_channel_mpeg_value) { + uint8_t bitstream_version; + uint8_t presentation_version; + uint8_t mdcompat; + uint32_t pre_channel_mask; + bool dolby_ims_indicator; + bool dolby_cbi_indicator; + + if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version, + &mdcompat, &pre_channel_mask, &dolby_ims_indicator, + &dolby_cbi_indicator)) { + LOG(WARNING) << "Seeing invalid AC4 data: " + << base::HexEncode(ac4_data.data(), ac4_data.size()); + return false; + } + + *ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask); + return true; +} + +bool GetAc4CodecInfo(const std::vector& ac4_data, + uint8_t* ac4_codec_info) { + uint8_t bitstream_version; + uint8_t presentation_version; + uint8_t mdcompat; + uint32_t pre_channel_mask; + bool dolby_ims_indicator; + bool dolby_cbi_indicator; + + if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version, + &mdcompat, &pre_channel_mask, &dolby_ims_indicator, + &dolby_cbi_indicator)) { + LOG(WARNING) << "Seeing invalid AC4 data: " + << base::HexEncode(ac4_data.data(), ac4_data.size()); + return false; + } + + // The valid value of bitstream_version (8 bits) is 2, the valid value of + // presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits. + // So uint8_t is fine now. If Dolby extends the value of bitstream_version and + // presentation_version in future, maybe need change the type from uint8_t to + // uint16_t or uint32_t to accommodate the valid values. + // If that, AudioStreamInfo::GetCodecString need to be changed accordingly. + // bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits) + *ac4_codec_info = ((bitstream_version << 5) | + ((presentation_version << 3) & 0x1F) | + (mdcompat & 0x7)); + return true; +} + +bool GetAc4ImmersiveInfo(const std::vector& ac4_data, + bool* ac4_ims_flag, + bool* ac4_cbi_flag) { + uint8_t bitstream_version; + uint8_t presentation_version; + uint8_t mdcompat; + uint32_t pre_channel_mask; + + if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version, + &mdcompat, &pre_channel_mask, ac4_ims_flag, + ac4_cbi_flag)) { + LOG(WARNING) << "Seeing invalid AC4 data: " + << base::HexEncode(ac4_data.data(), ac4_data.size()); + return false; + } + + return true; +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/codecs/ac4_audio_util.h b/packager/media/codecs/ac4_audio_util.h new file mode 100644 index 0000000000..8b4b57564e --- /dev/null +++ b/packager/media/codecs/ac4_audio_util.h @@ -0,0 +1,52 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd +// +// AC4 audio utility functions. + +#ifndef PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_ +#define PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_ + +#include +#include +#include + +namespace shaka { +namespace media { + +/// Parse data from AC4Specific box and calculate AC4 channel mask value based +/// on ETSI TS 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard; +/// Part 2: Immersive and personalized E.10.14. +/// @return false if there are parsing errors. +bool CalculateAC4ChannelMask(const std::vector& ac4_data, + uint32_t* ac4_channel_mask); + +/// Parse data from AC4Specific box, calculate AC4 channel mask and then +/// obtain channel configuration descriptor value with MPEG scheme based on +/// ETSI TS 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard; +/// Part 2: Immersive and personalized G.3.2. +/// @return false if there are parsing errors. +bool CalculateAC4ChannelMPEGValue(const std::vector& ac4_data, + uint32_t* ac4_channel_mpeg_value); + +/// Parse data from AC4Specific box and obtain AC4 codec information +/// (bitstream version, presentation version and mdcompat) based on ETSI TS +/// 103 190-2, V1.2.1 Digital Audio Compression (AC-4) Standard; +/// Part 2: Immersive and personalized E.13. +/// @return false if there are parsing errors. +bool GetAc4CodecInfo(const std::vector& ac4_data, + uint8_t* ac4_codec_info); + +/// Parse data from AC4Specific box and obtain AC4 Immersive stereo (IMS) flag +/// and Channel-base audio (CBI) flag. +/// @return false if there are parsing errors. +bool GetAc4ImmersiveInfo(const std::vector& ac4_data, + bool* ac4_ims_flag, + bool* ac4_cbi_flag); + +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_CODECS_AC4_AUDIO_UTIL_H_ diff --git a/packager/media/codecs/ac4_audio_util_unittest.cc b/packager/media/codecs/ac4_audio_util_unittest.cc new file mode 100644 index 0000000000..3032cd4a29 --- /dev/null +++ b/packager/media/codecs/ac4_audio_util_unittest.cc @@ -0,0 +1,121 @@ +// Copyright 2020 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include + +#include "packager/media/codecs/ac4_audio_util.h" + +namespace shaka { +namespace media { + +TEST(AC4AudioUtilTest, ChannelTest1) { + // AC4 IMS + const std::vector ac4_data = {0x20, 0xa4, 0x02, 0x40, 0x00, 0x00, + 0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0, + 0x02, 0x12, 0xf8, 0x80, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x02, 0x50, 0x10, + 0x00, 0x00, 0x03, 0x10, 0x99, 0x5b, + 0xa0, 0x40, 0x01, 0x12, 0xf8, 0x80, + 0x00, 0x00, 0x42, 0x00, 0x00, 0x02, + 0x50, 0x10, 0x00, 0x00, 0x03, 0x10, + 0x99, 0x5b, 0x80, 0x40}; + + uint32_t ac4_channel_mask; + uint32_t ac4_channel_mpeg_value; + uint8_t ac4_codec_info; + bool ac4_ims_flag; + bool ac4_cbi_flag; + + EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask)); + EXPECT_EQ((uint32_t)0x1, ac4_channel_mask); + EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value)); + EXPECT_EQ((uint32_t)0x2, ac4_channel_mpeg_value); + EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info)); + EXPECT_EQ(80u, ac4_codec_info); + EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag)); + EXPECT_TRUE(ac4_ims_flag); + EXPECT_FALSE(ac4_cbi_flag); +} + +TEST(AC4AudioUtilTest, ChannelTest2) { + // AC4 5.1-channel + const std::vector ac4_data = {0x20, 0xa6, 0x01, 0x60, 0x00, 0x00, + 0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0, + 0x01, 0x0e, 0xf9, 0x00, 0x00, 0x09, + 0x00, 0x00, 0x11, 0xca, 0x02, 0x00, + 0x00, 0x11, 0xc0, 0x80}; + + uint32_t ac4_channel_mask; + uint32_t ac4_channel_mpeg_value; + uint8_t ac4_codec_info; + bool ac4_ims_flag; + bool ac4_cbi_flag; + + EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask)); + EXPECT_EQ((uint32_t)0x47, ac4_channel_mask); + EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value)); + EXPECT_EQ((uint32_t)0x6, ac4_channel_mpeg_value); + EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info)); + EXPECT_EQ(73u, ac4_codec_info); + EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag)); + EXPECT_FALSE(ac4_ims_flag); + EXPECT_FALSE(ac4_cbi_flag); +} + +TEST(AC4AudioUtilTest, ChannelTest3) { + // AC4 stereo + const std::vector ac4_data = {0x20, 0xa4, 0x01, 0x40, 0x00, 0x00, + 0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0, + 0x01, 0x12, 0xf8, 0x00, 0x00, 0x08, + 0x40, 0x00, 0x00, 0x4a, 0x02, 0x00, + 0x00, 0x00, 0x62, 0x13, 0x2b, 0x70, + 0x00, 0x80}; + + uint32_t ac4_channel_mask; + uint32_t ac4_channel_mpeg_value; + uint8_t ac4_codec_info; + bool ac4_ims_flag; + bool ac4_cbi_flag; + + EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask)); + EXPECT_EQ((uint32_t)0x1, ac4_channel_mask); + EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value)); + EXPECT_EQ((uint32_t)0x2, ac4_channel_mpeg_value); + EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info)); + EXPECT_EQ(72u, ac4_codec_info); + EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag)); + EXPECT_FALSE(ac4_ims_flag); + EXPECT_FALSE(ac4_cbi_flag); +} + +TEST(AC4AudioUtilTest, ChannelTest4) { + // AC4 CBI 5.1.2 + const std::vector ac4_data = {0x20, 0xa0, 0x01, 0x60, 0x00, 0x00, + 0x00, 0x1f, 0xff, 0xff, 0xff, 0xe0, + 0x01, 0x15, 0x13, 0x80, 0x00, 0x00, + 0x58, 0x40, 0x00, 0x31, 0xfc, 0xa0, + 0x20, 0x00, 0x03, 0x1d, 0x40, 0x40, + 0x00, 0x00, 0x08, 0x00, 0xc0}; + + uint32_t ac4_channel_mask; + uint32_t ac4_channel_mpeg_value; + uint8_t ac4_codec_info; + bool ac4_ims_flag; + bool ac4_cbi_flag; + + EXPECT_TRUE(CalculateAC4ChannelMask(ac4_data, &ac4_channel_mask)); + EXPECT_EQ((uint32_t)0xC7, ac4_channel_mask); + EXPECT_TRUE(CalculateAC4ChannelMPEGValue(ac4_data, &ac4_channel_mpeg_value)); + EXPECT_EQ((uint32_t)0xFFFFFFFF, ac4_channel_mpeg_value); + EXPECT_TRUE(GetAc4CodecInfo(ac4_data, &ac4_codec_info)); + EXPECT_EQ(75u, ac4_codec_info); + EXPECT_TRUE(GetAc4ImmersiveInfo(ac4_data, &ac4_ims_flag, &ac4_cbi_flag)); + EXPECT_FALSE(ac4_ims_flag); + EXPECT_TRUE(ac4_cbi_flag); +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/codecs/codecs.gyp b/packager/media/codecs/codecs.gyp index b965f630b0..9993dd02f2 100644 --- a/packager/media/codecs/codecs.gyp +++ b/packager/media/codecs/codecs.gyp @@ -29,6 +29,8 @@ 'dovi_decoder_configuration_record.h', 'ec3_audio_util.cc', 'ec3_audio_util.h', + 'ac4_audio_util.cc', + 'ac4_audio_util.h', 'es_descriptor.cc', 'es_descriptor.h', 'h264_byte_to_unit_stream_converter.cc', @@ -77,6 +79,7 @@ 'avc_decoder_configuration_record_unittest.cc', 'dovi_decoder_configuration_record_unittest.cc', 'ec3_audio_util_unittest.cc', + 'ac4_audio_util_unittest.cc', 'es_descriptor_unittest.cc', 'h264_byte_to_unit_stream_converter_unittest.cc', 'h264_parser_unittest.cc', diff --git a/packager/media/event/muxer_listener_internal.cc b/packager/media/event/muxer_listener_internal.cc index fb5cfe90e7..013d8a2a58 100644 --- a/packager/media/event/muxer_listener_internal.cc +++ b/packager/media/event/muxer_listener_internal.cc @@ -18,6 +18,7 @@ #include "packager/media/base/text_stream_info.h" #include "packager/media/base/video_stream_info.h" #include "packager/media/codecs/ec3_audio_util.h" +#include "packager/media/codecs/ac4_audio_util.h" #include "packager/mpd/base/media_info.pb.h" using ::google::protobuf::util::MessageDifferencer; @@ -121,14 +122,14 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info, return; } auto* codec_data = audio_info->mutable_codec_specific_data(); - codec_data->set_ec3_channel_map(ec3_channel_map); + codec_data->set_channel_mask(ec3_channel_map); uint32_t ec3_channel_mpeg_value; if (!CalculateEC3ChannelMPEGValue(codec_config, &ec3_channel_mpeg_value)) { LOG(ERROR) << "Failed to calculate EC3 channel configuration " << "descriptor value with MPEG scheme."; return; } - codec_data->set_ec3_channel_mpeg_value(ec3_channel_mpeg_value); + codec_data->set_channel_mpeg_value(ec3_channel_mpeg_value); uint32_t ec3_joc_complexity = 0; if (!GetEc3JocComplexity(codec_config, &ec3_joc_complexity)) { LOG(ERROR) << "Failed to obtain DD+JOC Information."; @@ -136,6 +137,31 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info, } codec_data->set_ec3_joc_complexity(ec3_joc_complexity); } + + if (audio_stream_info->codec() == kCodecAC4) { + uint32_t ac4_channel_mask; + if (!CalculateAC4ChannelMask(codec_config, &ac4_channel_mask)) { + LOG(ERROR) << "Failed to calculate AC4 channel mask."; + return; + } + auto* codec_data = audio_info->mutable_codec_specific_data(); + codec_data->set_channel_mask(ac4_channel_mask); + uint32_t ac4_channel_mpeg_value; + if (!CalculateAC4ChannelMPEGValue(codec_config, &ac4_channel_mpeg_value)) { + LOG(ERROR) << "Failed to calculate AC4 channel configuration " + << "descriptor value with MPEG scheme."; + return; + } + codec_data->set_channel_mpeg_value(ac4_channel_mpeg_value); + bool ac4_ims_flag; + bool ac4_cbi_flag; + if (!GetAc4ImmersiveInfo(codec_config, &ac4_ims_flag, &ac4_cbi_flag)) { + LOG(ERROR) << "Failed to obtain AC4 IMS flag and CBI flag."; + return; + } + codec_data->set_ac4_ims_flag(ac4_ims_flag); + codec_data->set_ac4_cbi_flag(ac4_cbi_flag); + } } void AddTextInfo(const TextStreamInfo& text_stream_info, diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 59f6bbd0e0..ffbbb032b1 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -1767,6 +1767,27 @@ size_t EC3Specific::ComputeSizeInternal() { return HeaderSize() + data.size(); } +AC4Specific::AC4Specific() = default; +AC4Specific::~AC4Specific() = default; + +FourCC AC4Specific::BoxType() const { + return FOURCC_dac4; +} + +bool AC4Specific::ReadWriteInternal(BoxBuffer* buffer) { + RCHECK(ReadWriteHeaderInternal(buffer)); + size_t size = buffer->Reading() ? buffer->BytesLeft() : data.size(); + RCHECK(buffer->ReadWriteVector(&data, size)); + return true; +} + +size_t AC4Specific::ComputeSizeInternal() { + // This box is optional. Skip it if not initialized. + if (data.empty()) + return 0; + return HeaderSize() + data.size(); +} + OpusSpecific::OpusSpecific() = default; OpusSpecific::~OpusSpecific() = default; @@ -1878,6 +1899,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(buffer->TryReadWriteChild(&ddts)); RCHECK(buffer->TryReadWriteChild(&dac3)); RCHECK(buffer->TryReadWriteChild(&dec3)); + RCHECK(buffer->TryReadWriteChild(&dac4)); RCHECK(buffer->TryReadWriteChild(&dops)); RCHECK(buffer->TryReadWriteChild(&dfla)); @@ -1905,6 +1927,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() { sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() + esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() + dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() + + dac4.ComputeSize() + // Reserved and predefined bytes. 6 + 8 + // 6 + 8 bytes reserved. 4; // 4 bytes predefined. diff --git a/packager/media/formats/mp4/box_definitions.h b/packager/media/formats/mp4/box_definitions.h index 164541340b..5c577b7116 100644 --- a/packager/media/formats/mp4/box_definitions.h +++ b/packager/media/formats/mp4/box_definitions.h @@ -333,6 +333,12 @@ struct EC3Specific : Box { std::vector data; }; +struct AC4Specific : Box { + DECLARE_BOX_METHODS(AC4Specific); + + std::vector data; +}; + struct OpusSpecific : Box { DECLARE_BOX_METHODS(OpusSpecific); @@ -372,6 +378,7 @@ struct AudioSampleEntry : Box { DTSSpecific ddts; AC3Specific dac3; EC3Specific dec3; + AC4Specific dac4; OpusSpecific dops; FlacSpecific dfla; }; diff --git a/packager/media/formats/mp4/box_definitions_unittest.cc b/packager/media/formats/mp4/box_definitions_unittest.cc index 2323ac97c7..96b9413ffd 100644 --- a/packager/media/formats/mp4/box_definitions_unittest.cc +++ b/packager/media/formats/mp4/box_definitions_unittest.cc @@ -1242,6 +1242,21 @@ TEST_F(BoxDefinitionsTest, EC3SampleEntry) { ASSERT_EQ(entry, entry_readback); } +TEST_F(BoxDefinitionsTest, AC4SampleEntry) { + AudioSampleEntry entry; + entry.format = FOURCC_ac_4; + entry.data_reference_index = 2; + entry.channelcount = 6; + entry.samplesize = 16; + entry.samplerate = 48000; + Fill(&entry.dac4); + entry.Write(this->buffer_.get()); + + AudioSampleEntry entry_readback; + ASSERT_TRUE(ReadBack(&entry_readback)); + ASSERT_EQ(entry, entry_readback); +} + TEST_F(BoxDefinitionsTest, OpusSampleEntry) { AudioSampleEntry entry; entry.format = FOURCC_Opus; diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 0ddac82c05..7cbb445b1b 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -26,6 +26,7 @@ #include "packager/media/codecs/avc_decoder_configuration_record.h" #include "packager/media/codecs/dovi_decoder_configuration_record.h" #include "packager/media/codecs/ec3_audio_util.h" +#include "packager/media/codecs/ac4_audio_util.h" #include "packager/media/codecs/es_descriptor.h" #include "packager/media/codecs/hevc_decoder_configuration_record.h" #include "packager/media/codecs/vp_codec_configuration_record.h" @@ -94,6 +95,8 @@ Codec FourCCToCodec(FourCC fourcc) { return kCodecAC3; case FOURCC_ec_3: return kCodecEAC3; + case FOURCC_ac_4: + return kCodecAC4; case FOURCC_fLaC: return kCodecFlac; default: @@ -488,6 +491,16 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { codec_config = entry.dec3.data; num_channels = static_cast(GetEc3NumChannels(codec_config)); break; + case FOURCC_ac_4: + codec_config = entry.dac4.data; + // Stop the process if have errors when parsing AC-4 dac4 box, + // bitstream version 0 (has beed deprecated) and contains multiple + // presentations in single AC-4 stream (only used for broadcast). + if (!GetAc4CodecInfo(codec_config, &audio_object_type)) { + LOG(ERROR) << "Failed to parse dac4."; + return false; + } + break; case FOURCC_fLaC: codec_config = entry.dfla.data; break; diff --git a/packager/media/formats/mp4/mp4_muxer.cc b/packager/media/formats/mp4/mp4_muxer.cc index 336f13a039..73dff8f4f9 100644 --- a/packager/media/formats/mp4/mp4_muxer.cc +++ b/packager/media/formats/mp4/mp4_muxer.cc @@ -83,6 +83,8 @@ FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) { return FOURCC_dtsm; case kCodecEAC3: return FOURCC_ec_3; + case kCodecAC4: + return FOURCC_ac_4; case kCodecFlac: return FOURCC_fLaC; case kCodecOpus: @@ -485,6 +487,9 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, case kCodecEAC3: audio.dec3.data = audio_info->codec_config(); break; + case kCodecAC4: + audio.dac4.data = audio_info->codec_config(); + break; case kCodecFlac: audio.dfla.data = audio_info->codec_config(); break; @@ -520,6 +525,12 @@ bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info, // sample description entry. Instead, two constants are used. audio.channelcount = 2; audio.samplesize = 16; + } else if (audio_info->codec() == kCodecAC4) { + //ETSI TS 103 190-2, E.4.5 channelcount should be set to the total number of + //audio outputchannels of the default audio presentation of that track + audio.channelcount = audio_info->num_channels(); + //ETSI TS 103 190-2, E.4.6 samplesize shall be set to 16. + audio.samplesize = 16; } else { audio.channelcount = audio_info->num_channels(); audio.samplesize = audio_info->sample_bits(); diff --git a/packager/mpd/base/media_info.proto b/packager/mpd/base/media_info.proto index da7f28dfe7..5e2f8619ff 100644 --- a/packager/mpd/base/media_info.proto +++ b/packager/mpd/base/media_info.proto @@ -64,17 +64,33 @@ message MediaInfo { message AudioCodecSpecificData { // EC3 Channel map bit fields, encoded based on ETSI TS 102 366 V1.3.1 // Digital Audio Compression (AC-3, Enhanced AC-3) Standard E.1.3.1.8. - optional uint32 ec3_channel_map = 1; + // Or AC4 Channel mask bit fields, encoded based on ETSI TS 103 190-2 + // V1.2.1 Digital Audio Compression (AC-4) Standard; Part 2: Immersive and + // personalized audio E.10.14. + optional uint32 channel_mask = 1; // EC3 Channel configuration descriptor with MPEG scheme fields, // encoded based on ETSI TS 102 366 V1.4.1 Digital Audio Compression // (AC-3, Enhanced AC-3) Standard I.1.2.1. - optional uint32 ec3_channel_mpeg_value = 2; + // Or AC4 Channel configuration descriptor with MPEG scheme fields, + // encoded based on ETSI TS 103 190-2 V1.2.1 Digital Audio Compression + // (AC-4) Standard; Part 2: Immersive and personalized audio G.3.2. + optional uint32 channel_mpeg_value = 2; // Dolby Digital Plus JOC decoding complexity fields, ETSI TS 103 420 v1.2.1 // Backwards-compatible object audio carriage using Enhanced AC-3 Standard // C.3.2.3. optional uint32 ec3_joc_complexity = 3; + + // AC4 Immersive stereo flag field, based on Dolby AC-4 in MPEG-DASH for + // Online Delivery Specification 2.5.3. + // https://developer.dolby.com/tools-media/online-delivery-kits/dolby-ac-4/ + optional bool ac4_ims_flag = 4; + + // AC4 Channel-based audio (CBI) flag field, encoded based on + // ETSI TS 103 190-2 Digital Audio Compression (AC-4) Standard; + // Part 2: Immersive and personalized audio 4.3. + optional bool ac4_cbi_flag = 5; } message TextInfo { diff --git a/packager/mpd/base/xml/xml_node.cc b/packager/mpd/base/xml/xml_node.cc index a231b3683f..3c4f781fd3 100644 --- a/packager/mpd/base/xml/xml_node.cc +++ b/packager/mpd/base/xml/xml_node.cc @@ -38,6 +38,7 @@ typedef MediaInfo::VideoInfo VideoInfo; namespace { const char kEC3Codec[] = "ec-3"; +const char kAC4Codec[] = "ac-4"; std::string RangeToString(const Range& range) { return base::Uint64ToString(range.begin()) + "-" + @@ -464,13 +465,13 @@ bool RepresentationXmlNode::AddAudioChannelInfo(const AudioInfo& audio_info) { // Use MPEG scheme if the mpeg value is available and valid, fallback to // EC3 channel mapping otherwise. // See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268 - const uint32_t ec3_channel_mpeg_value = codec_data.ec3_channel_mpeg_value(); + const uint32_t ec3_channel_mpeg_value = codec_data.channel_mpeg_value(); const uint32_t NO_MAPPING = 0xFFFFFFFF; if (ec3_channel_mpeg_value == NO_MAPPING) { // Convert EC3 channel map into string of hexadecimal digits. Spec: DASH-IF // Interoperability Points v3.0 9.2.1.2. const uint16_t ec3_channel_map = - base::HostToNet16(codec_data.ec3_channel_map()); + base::HostToNet16(codec_data.channel_mask()); audio_channel_config_value = base::HexEncode(&ec3_channel_map, sizeof(ec3_channel_map)); audio_channel_config_scheme = @@ -500,6 +501,42 @@ bool RepresentationXmlNode::AddAudioChannelInfo(const AudioInfo& audio_info) { ec3_joc_complexity); } return ret; + } else if (audio_info.codec().substr(0, 4) == kAC4Codec) { + const auto& codec_data = audio_info.codec_specific_data(); + const bool ac4_ims_flag = codec_data.ac4_ims_flag(); + // Use MPEG scheme if the mpeg value is available and valid, fallback to + // AC4 channel mask otherwise. + // See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268 + const uint32_t ac4_channel_mpeg_value = codec_data.channel_mpeg_value(); + const uint32_t NO_MAPPING = 0xFFFFFFFF; + if (ac4_channel_mpeg_value == NO_MAPPING) { + // Calculate AC-4 channel mask. Spec: ETSI TS 103 190-2 V1.2.1 Digital + // Audio Compression (AC-4) Standard; Part 2: Immersive and personalized + // audio G.3.1. + const uint32_t ac4_channel_mask = + base::HostToNet32(codec_data.channel_mask() << 8); + audio_channel_config_value = + base::HexEncode(&ac4_channel_mask, sizeof(ac4_channel_mask) - 1); + // Note that the channel config schemes for EC-3 and AC-4 are different. + // See https://github.com/Dash-Industry-Forum/DASH-IF-IOP/issues/268. + audio_channel_config_scheme = + "tag:dolby.com,2015:dash:audio_channel_configuration:2015"; + } else { + // Calculate AC-4 channel configuration descriptor value with MPEG scheme. + // Spec: ETSI TS 103 190-2 V1.2.1 Digital Audio Compression (AC-4) Standard; + // Part 2: Immersive and personalized audio G.3.2. + audio_channel_config_value = base::UintToString(ac4_channel_mpeg_value); + audio_channel_config_scheme = "urn:mpeg:mpegB:cicp:ChannelConfiguration"; + } + bool ret = AddDescriptor("AudioChannelConfiguration", + audio_channel_config_scheme, + audio_channel_config_value); + if (ac4_ims_flag) { + ret &= AddDescriptor("SupplementalProperty", + "tag:dolby.com,2016:dash:virtualized_content:2016", + "1"); + } + return ret; } else { audio_channel_config_value = base::UintToString(audio_info.num_channels()); audio_channel_config_scheme = diff --git a/packager/mpd/base/xml/xml_node_unittest.cc b/packager/mpd/base/xml/xml_node_unittest.cc index 1840765c70..2376c21cc9 100644 --- a/packager/mpd/base/xml/xml_node_unittest.cc +++ b/packager/mpd/base/xml/xml_node_unittest.cc @@ -215,8 +215,8 @@ TEST(XmlNodeTest, AddEC3AudioInfo) { MediaInfo::AudioInfo audio_info; audio_info.set_codec("ec-3"); audio_info.set_sampling_frequency(48000); - audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); - audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value( + audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801); + audio_info.mutable_codec_specific_data()->set_channel_mpeg_value( 0xFFFFFFFF); RepresentationXmlNode representation; @@ -236,8 +236,8 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGScheme) { MediaInfo::AudioInfo audio_info; audio_info.set_codec("ec-3"); audio_info.set_sampling_frequency(48000); - audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); - audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value(6); + audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801); + audio_info.mutable_codec_specific_data()->set_channel_mpeg_value(6); RepresentationXmlNode representation; representation.AddAudioInfo(audio_info); @@ -256,8 +256,8 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGSchemeJOC) { MediaInfo::AudioInfo audio_info; audio_info.set_codec("ec-3"); audio_info.set_sampling_frequency(48000); - audio_info.mutable_codec_specific_data()->set_ec3_channel_map(0xF801); - audio_info.mutable_codec_specific_data()->set_ec3_channel_mpeg_value(6); + audio_info.mutable_codec_specific_data()->set_channel_mask(0xF801); + audio_info.mutable_codec_specific_data()->set_channel_mpeg_value(6); audio_info.mutable_codec_specific_data()->set_ec3_joc_complexity(16); RepresentationXmlNode representation; @@ -281,6 +281,79 @@ TEST(XmlNodeTest, AddEC3AudioInfoMPEGSchemeJOC) { "\n")); } +TEST(XmlNodeTest, AddAC4AudioInfo) { + MediaInfo::AudioInfo audio_info; + audio_info.set_codec("ac-4.02.01.02"); + audio_info.set_sampling_frequency(48000); + auto* codec_data = audio_info.mutable_codec_specific_data(); + codec_data->set_channel_mpeg_value(0xFFFFFFFF); + codec_data->set_channel_mask(0x0000C7); + codec_data->set_ac4_ims_flag(false); + codec_data->set_ac4_cbi_flag(false); + + RepresentationXmlNode representation; + representation.AddAudioInfo(audio_info); + EXPECT_THAT( + representation.GetRawPtr(), + XmlNodeEqual( + "\n" + " \n" + "\n")); +} + +TEST(XmlNodeTest, AddAC4AudioInfoMPEGScheme) { + MediaInfo::AudioInfo audio_info; + audio_info.set_codec("ac-4.02.01.00"); + audio_info.set_sampling_frequency(48000); + auto* codec_data = audio_info.mutable_codec_specific_data(); + codec_data->set_channel_mpeg_value(2); + codec_data->set_channel_mask(0x000001); + codec_data->set_ac4_ims_flag(false); + codec_data->set_ac4_cbi_flag(false); + + RepresentationXmlNode representation; + representation.AddAudioInfo(audio_info); + EXPECT_THAT( + representation.GetRawPtr(), + XmlNodeEqual( + "\n" + " \n" + "\n")); +} + +TEST(XmlNodeTest, AddAC4AudioInfoMPEGSchemeIMS) { + MediaInfo::AudioInfo audio_info; + audio_info.set_codec("ac-4.02.02.00"); + audio_info.set_sampling_frequency(48000); + auto* codec_data = audio_info.mutable_codec_specific_data(); + codec_data->set_channel_mpeg_value(2); + codec_data->set_channel_mask(0x000001); + codec_data->set_ac4_ims_flag(true); + codec_data->set_ac4_cbi_flag(false); + + RepresentationXmlNode representation; + representation.AddAudioInfo(audio_info); + EXPECT_THAT( + representation.GetRawPtr(), + XmlNodeEqual( + "\n" + " \n" + " \n" + "\n")); +} + class LiveSegmentTimelineTest : public ::testing::Test { protected: void SetUp() override {