diff --git a/packager/media/base/fourccs.h b/packager/media/base/fourccs.h index 1225b669f4..0448fdc16d 100644 --- a/packager/media/base/fourccs.h +++ b/packager/media/base/fourccs.h @@ -20,6 +20,7 @@ enum FourCC : uint32_t { FOURCC_aacd = 0x61616364, FOURCC_ac_3 = 0x61632d33, // "ac-3" + FOURCC_ac3d = 0x61633364, FOURCC_apad = 0x61706164, FOURCC_avc1 = 0x61766331, FOURCC_avc3 = 0x61766333, @@ -50,6 +51,7 @@ enum FourCC : uint32_t { FOURCC_dtsm = 0x6474732d, // "dts-" FOURCC_dtsp = 0x6474732b, // "dts+" FOURCC_ec_3 = 0x65632d33, // "ec-3" + FOURCC_ec3d = 0x65633364, FOURCC_edts = 0x65647473, FOURCC_elst = 0x656c7374, FOURCC_enca = 0x656e6361, @@ -145,9 +147,11 @@ enum FourCC : uint32_t { FOURCC_wide = 0x77696465, FOURCC_wvtt = 0x77767474, FOURCC_zaac = 0x7A616163, + FOURCC_zac3 = 0x7A616333, FOURCC_zach = 0x7A616368, FOURCC_zacp = 0x7A616370, FOURCC_zavc = 0x7A617663, + FOURCC_zec3 = 0x7A656333, }; const FourCC kAppleSampleAesProtectionScheme = FOURCC_cbca; diff --git a/packager/media/formats/mp2t/ac3_header.cc b/packager/media/formats/mp2t/ac3_header.cc new file mode 100644 index 0000000000..757a8da73e --- /dev/null +++ b/packager/media/formats/mp2t/ac3_header.cc @@ -0,0 +1,135 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/mp2t/ac3_header.h" + +#include "packager/media/base/bit_reader.h" +#include "packager/media/base/bit_writer.h" +#include "packager/media/formats/mp2t/mp2t_common.h" + +namespace shaka { +namespace media { +namespace mp2t { +namespace { + +// ASTC Standard A/52:2012 Table 5.6 Sample Rate Codes. +const uint32_t kAc3SampleRateTable[] = {48000, 44100, 32000}; + +// ASTC Standard A/52:2012 Table 5.8 Audio Coding Mode. +const uint8_t kAc3NumChannelsTable[] = {2, 1, 2, 3, 3, 4, 4, 5}; + +// ATSC Standard A/52:2012 Table 5.18 Frame Size Code Table +// (in words = 16 bits). +const size_t kFrameSizeCodeTable[][3] = { + {96, 69, 64}, {96, 70, 64}, {120, 87, 80}, + {120, 88, 80}, {144, 104, 96}, {144, 105, 96}, + {168, 121, 112}, {168, 122, 112}, {192, 139, 128}, + {192, 140, 128}, {240, 174, 160}, {240, 175, 160}, + {288, 208, 192}, {288, 209, 192}, {336, 243, 224}, + {336, 244, 224}, {384, 278, 256}, {384, 279, 256}, + {480, 348, 320}, {480, 349, 320}, {576, 417, 384}, + {576, 418, 384}, {672, 487, 448}, {672, 488, 448}, + {768, 557, 512}, {768, 558, 512}, {960, 696, 640}, + {960, 697, 640}, {1152, 835, 768}, {1152, 836, 768}, + {1344, 975, 896}, {1344, 976, 896}, {1536, 1114, 1024}, + {1536, 1115, 1024}, {1728, 1253, 1152}, {1728, 1254, 1152}, + {1920, 1393, 1280}, {1920, 1394, 1280}, +}; + +} // namespace + +bool Ac3Header::IsSyncWord(const uint8_t* buf) const { + DCHECK(buf); + return buf[0] == 0x0B && buf[1] == 0x77; +} + +size_t Ac3Header::GetMinFrameSize() const { + // Arbitrary. Actual frame size starts with 96 words. + const size_t kMinAc3FrameSize = 10u; + return kMinAc3FrameSize; +} + +bool Ac3Header::Parse(const uint8_t* audio_frame, size_t audio_frame_size) { + BitReader frame(audio_frame, audio_frame_size); + + // ASTC Standard A/52:2012 5. BIT STREAM SYNTAX. + // syncinfo: synchronization information section. + uint16_t syncword; + RCHECK(frame.ReadBits(16, &syncword)); + RCHECK(syncword == 0x0B77); + uint16_t crc1; + RCHECK(frame.ReadBits(16, &crc1)); + RCHECK(frame.ReadBits(2, &fscod_)); + RCHECK(fscod_ < arraysize(kAc3SampleRateTable)); + RCHECK(frame.ReadBits(6, &frmsizecod_)); + RCHECK(frmsizecod_ < arraysize(kFrameSizeCodeTable)); + + // bsi: bit stream information section. + RCHECK(frame.ReadBits(5, &bsid_)); + RCHECK(frame.ReadBits(3, &bsmod_)); + + RCHECK(frame.ReadBits(3, &acmod_)); + RCHECK(acmod_ < arraysize(kAc3NumChannelsTable)); + // If 3 front channels. + if ((acmod_ & 0x01) && (acmod_ != 0x01)) + RCHECK(frame.SkipBits(2)); // cmixlev. + // If a surround channel exists. + if (acmod_ & 0x04) + RCHECK(frame.SkipBits(2)); // surmixlev. + // If in 2/0 mode. + if (acmod_ == 0x02) + RCHECK(frame.SkipBits(2)); // dsurmod. + + RCHECK(frame.ReadBits(1, &lfeon_)); + + return true; +} + +size_t Ac3Header::GetHeaderSize() const { + // Unlike ADTS, for AC3, the whole frame is included in the media sample, so + // return 0 header size. + return 0; +} + +size_t Ac3Header::GetFrameSize() const { + DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable)); + DCHECK_LT(frmsizecod_, arraysize(kFrameSizeCodeTable)); + return kFrameSizeCodeTable[frmsizecod_][fscod_] * 2; +} + +void Ac3Header::GetAudioSpecificConfig(std::vector* buffer) const { + DCHECK(buffer); + buffer->clear(); + BitWriter config(buffer); + // Accoding to ETSI TS 102 366 V1.3.1 (2014-08) F.4 AC3SpecificBox. + config.WriteBits(fscod_, 2); + config.WriteBits(bsid_, 5); + config.WriteBits(bsmod_, 3); + config.WriteBits(acmod_, 3); + config.WriteBits(lfeon_, 1); + const uint8_t bit_rate_code = frmsizecod_ >> 1; + config.WriteBits(bit_rate_code, 5); + config.Flush(); +} + +uint8_t Ac3Header::GetObjectType() const { + // Only useful for AAC. Return a dummy value instead. + return 0; +} + +uint32_t Ac3Header::GetSamplingFrequency() const { + DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable)); + return kAc3SampleRateTable[fscod_]; +} + +uint8_t Ac3Header::GetNumChannels() const { + DCHECK_LT(acmod_, arraysize(kAc3NumChannelsTable)); + return kAc3NumChannelsTable[acmod_]; +} + +} // namespace mp2t +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/mp2t/ac3_header.h b/packager/media/formats/mp2t/ac3_header.h new file mode 100644 index 0000000000..29debb39cb --- /dev/null +++ b/packager/media/formats/mp2t/ac3_header.h @@ -0,0 +1,56 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_ +#define PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_ + +#include + +#include + +#include "packager/media/formats/mp2t/audio_header.h" + +namespace shaka { +namespace media { +namespace mp2t { + +/// Class which parses AC3 frame (header / metadata) and synthesizes +/// AudioSpecificConfig from audio frame content. +class Ac3Header : public AudioHeader { + public: + Ac3Header() = default; + ~Ac3Header() override = default; + + /// @name AudioHeader implementation overrides. + /// @{ + bool IsSyncWord(const uint8_t* buf) const override; + size_t GetMinFrameSize() const override; + bool Parse(const uint8_t* adts_frame, size_t adts_frame_size) override; + size_t GetHeaderSize() const override; + size_t GetFrameSize() const override; + void GetAudioSpecificConfig(std::vector* buffer) const override; + uint8_t GetObjectType() const override; + uint32_t GetSamplingFrequency() const override; + uint8_t GetNumChannels() const override; + /// @} + + private: + Ac3Header(const Ac3Header&) = delete; + Ac3Header& operator=(const Ac3Header&) = delete; + + uint8_t fscod_ = 0; // Sample rate code + uint8_t frmsizecod_ = 0; // Frame size code + uint8_t bsid_ = 0; // Bit stream identification + uint8_t bsmod_ = 0; // Bit stream mode + uint8_t acmod_ = 0; // Audio coding mode + uint8_t lfeon_ = 0; // Low frequency effects channel on +}; + +} // namespace mp2t +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_ diff --git a/packager/media/formats/mp2t/ac3_header_unittest.cc b/packager/media/formats/mp2t/ac3_header_unittest.cc new file mode 100644 index 0000000000..9eadde2026 --- /dev/null +++ b/packager/media/formats/mp2t/ac3_header_unittest.cc @@ -0,0 +1,87 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include + +#include "packager/base/logging.h" +#include "packager/base/strings/string_number_conversions.h" +#include "packager/media/formats/mp2t/ac3_header.h" + +namespace { + +const char kValidPartialAc3Frame[] = + "0B772770554043E106F575F080821010415C7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF" + "9F3E7CF9F3EFF9D5F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3" + "E7CF9F3E7CF9F3E7CF9F3E7CF9F3E3FE757CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F" + "3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF8CBFC4912248000000000F1B6DB" + "6DB6DE3C78F1DDDDDDDC00000000000000000000000000EEEEEEF1E3C6DB6DB6DB7CF9AD6B" + "5AD6B5AD6B5AD6B5AD6B5AD6B4000000078DB6DB6DB6F1E3C78EEEEEEEE000000000000000" + "0000000000077777778F1E36DB6DB6DBE7CD6B5AD6B5AD6B5AD6B5AD6B5AD6B5A600000000" + "0003C6DB6DB6DB78F1E3C77777777000000000000000000000000003BBBBBBC78F1B6DB6DB" + "6DF3E6B5AD6B5AD6B5AD6B5AD6B5AD6B5AD00000001E36DB6DB6DBC78F1E3BBBBBBB800000" + "000000000000000000001DDDDDDE3C78DB6DB6DB6F9F35AD6B5AD6B5AD6B5AD6B5AD6B5AD6" + "9800000000000F1B6DB6DB6DE3C78F1DDD"; + +} // anonymous namespace + +namespace shaka { +namespace media { +namespace mp2t { + +class Ac3HeaderTest : public testing::Test { + public: + void SetUp() override { + ASSERT_TRUE(base::HexStringToBytes(kValidPartialAc3Frame, &ac3_frame_)); + } + + protected: + std::vector ac3_frame_; +}; + +TEST_F(Ac3HeaderTest, ParseSuccess) { + const size_t kExpectedFrameSize(836); + const size_t kExpectedHeaderSize(0); + const uint8_t kExpectedObjectType(0); + const uint32_t kExpectedSamplingFrequency(44100); + const uint8_t kExpectedNumChannels(2); + const uint8_t kExpectedAudioSpecificConfig[] = {0x50, 0x11, 0x40}; + + Ac3Header ac3_header; + ASSERT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size())); + EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize()); + EXPECT_EQ(kExpectedHeaderSize, ac3_header.GetHeaderSize()); + EXPECT_EQ(kExpectedObjectType, ac3_header.GetObjectType()); + EXPECT_EQ(kExpectedSamplingFrequency, ac3_header.GetSamplingFrequency()); + EXPECT_EQ(kExpectedNumChannels, ac3_header.GetNumChannels()); + std::vector audio_specific_config; + ac3_header.GetAudioSpecificConfig(&audio_specific_config); + EXPECT_EQ(arraysize(kExpectedAudioSpecificConfig), + audio_specific_config.size()); + EXPECT_EQ(std::vector(std::begin(kExpectedAudioSpecificConfig), + std::end(kExpectedAudioSpecificConfig)), + audio_specific_config); +} + +TEST_F(Ac3HeaderTest, ParseVariousDataSize) { + Ac3Header ac3_header; + + // Parse succeeds as long as the full metadata is provided. + EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size() - 1)); + const size_t frame_size = ac3_header.GetFrameSize(); + const size_t header_size = ac3_header.GetHeaderSize(); + + EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), 100)); + EXPECT_EQ(frame_size, ac3_header.GetFrameSize()); + EXPECT_EQ(header_size, ac3_header.GetHeaderSize()); + + // Parse fails if there is not enough data (no full metadata). + EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 1)); + EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 5)); +} + +} // Namespace mp2t +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_audio.cc b/packager/media/formats/mp2t/es_parser_audio.cc index 1d4116e224..2244a978ef 100644 --- a/packager/media/formats/mp2t/es_parser_audio.cc +++ b/packager/media/formats/mp2t/es_parser_audio.cc @@ -15,8 +15,10 @@ #include "packager/media/base/bit_reader.h" #include "packager/media/base/media_sample.h" #include "packager/media/base/timestamp.h" +#include "packager/media/formats/mp2t/ac3_header.h" #include "packager/media/formats/mp2t/adts_header.h" #include "packager/media/formats/mp2t/mp2t_common.h" +#include "packager/media/formats/mp2t/ts_stream_type.h" #include "packager/media/formats/mpeg/adts_constants.h" namespace shaka { @@ -78,14 +80,22 @@ static bool LookForSyncWord(const uint8_t* raw_es, } EsParserAudio::EsParserAudio(uint32_t pid, + TsStreamType stream_type, const NewStreamInfoCB& new_stream_info_cb, const EmitSampleCB& emit_sample_cb, bool sbr_in_mimetype) : EsParser(pid), - audio_header_(new AdtsHeader), + stream_type_(stream_type), new_stream_info_cb_(new_stream_info_cb), emit_sample_cb_(emit_sample_cb), - sbr_in_mimetype_(sbr_in_mimetype) {} + sbr_in_mimetype_(sbr_in_mimetype) { + if (stream_type == TsStreamType::kAc3) { + audio_header_.reset(new Ac3Header); + } else { + DCHECK_EQ(stream_type, TsStreamType::kAdtsAac); + audio_header_.reset(new AdtsHeader); + } +} EsParserAudio::~EsParserAudio() {} @@ -196,7 +206,8 @@ bool EsParserAudio::UpdateAudioConfiguration(const AudioHeader& audio_header) { sbr_in_mimetype_ ? std::min(2 * samples_per_second, 48000) : samples_per_second; - const Codec codec = kCodecAAC; + const Codec codec = + stream_type_ == TsStreamType::kAc3 ? kCodecAC3 : kCodecAAC; last_audio_decoder_config_ = std::make_shared( pid(), kMpeg2Timescale, kInfiniteDuration, codec, AudioStreamInfo::GetCodecString(codec, audio_header.GetObjectType()), diff --git a/packager/media/formats/mp2t/es_parser_audio.h b/packager/media/formats/mp2t/es_parser_audio.h index c846ff4472..e92a9fefb0 100644 --- a/packager/media/formats/mp2t/es_parser_audio.h +++ b/packager/media/formats/mp2t/es_parser_audio.h @@ -14,6 +14,7 @@ #include "packager/media/base/audio_stream_info.h" #include "packager/media/base/byte_queue.h" #include "packager/media/formats/mp2t/es_parser.h" +#include "packager/media/formats/mp2t/ts_stream_type.h" namespace shaka { namespace media { @@ -27,6 +28,7 @@ class AudioHeader; class EsParserAudio : public EsParser { public: EsParserAudio(uint32_t pid, + TsStreamType stream_type, const NewStreamInfoCB& new_stream_info_cb, const EmitSampleCB& emit_sample_cb, bool sbr_in_mimetype); @@ -52,6 +54,7 @@ class EsParserAudio : public EsParser { // Discard some bytes from the ES stream. void DiscardEs(int nbytes); + const TsStreamType stream_type_; std::unique_ptr audio_header_; // Callbacks: diff --git a/packager/media/formats/mp2t/mp2t.gyp b/packager/media/formats/mp2t/mp2t.gyp index 7a99a0f7fb..39a2dd28ba 100644 --- a/packager/media/formats/mp2t/mp2t.gyp +++ b/packager/media/formats/mp2t/mp2t.gyp @@ -13,6 +13,8 @@ 'target_name': 'mp2t', 'type': '<(component)', 'sources': [ + 'ac3_header.cc', + 'ac3_header.h', 'adts_header.cc', 'adts_header.h', 'audio_header.h', @@ -51,6 +53,7 @@ 'ts_section_psi.h', 'ts_segmenter.cc', 'ts_segmenter.h', + 'ts_stream_type.h', 'ts_writer.cc', 'ts_writer.h', ], @@ -64,6 +67,7 @@ 'target_name': 'mp2t_unittest', 'type': '<(gtest_target_type)', 'sources': [ + 'ac3_header_unittest.cc', 'adts_header_unittest.cc', 'es_parser_h264_unittest.cc', 'es_parser_h26x_unittest.cc', diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index c80e495d97..093bbd1489 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -18,19 +18,12 @@ #include "packager/media/formats/mp2t/ts_section_pat.h" #include "packager/media/formats/mp2t/ts_section_pes.h" #include "packager/media/formats/mp2t/ts_section_pmt.h" +#include "packager/media/formats/mp2t/ts_stream_type.h" namespace shaka { namespace media { namespace mp2t { -enum StreamType { - // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments" - kStreamTypeMpeg1Audio = 0x3, - kStreamTypeAAC = 0xf, - kStreamTypeAVC = 0x1b, - kStreamTypeHEVC = 0x24, -}; - class PidState { public: enum PidType { @@ -291,33 +284,32 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, // Create a stream parser corresponding to the stream type. bool is_audio = false; std::unique_ptr es_parser; - if (stream_type == kStreamTypeAVC) { - es_parser.reset( - new EsParserH264( - pes_pid, - base::Bind(&Mp2tMediaParser::OnNewStreamInfo, - base::Unretained(this)), - base::Bind(&Mp2tMediaParser::OnEmitSample, - base::Unretained(this)))); - } else if (stream_type == kStreamTypeHEVC) { - es_parser.reset( - new EsParserH265( - pes_pid, - base::Bind(&Mp2tMediaParser::OnNewStreamInfo, - base::Unretained(this)), - base::Bind(&Mp2tMediaParser::OnEmitSample, - base::Unretained(this)))); - } else if (stream_type == kStreamTypeAAC) { - es_parser.reset(new EsParserAudio( - pes_pid, - base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)), - base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)), - sbr_in_mimetype_)); - is_audio = true; - } else { - VLOG(1) << "Ignore unsupported stream type 0x" << std::hex << stream_type - << std::dec; - return; + switch (static_cast(stream_type)) { + case TsStreamType::kAvc: + es_parser.reset(new EsParserH264( + pes_pid, + base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)), + base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)))); + break; + case TsStreamType::kHevc: + es_parser.reset(new EsParserH265( + pes_pid, + base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)), + base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)))); + break; + case TsStreamType::kAdtsAac: + case TsStreamType::kAc3: + es_parser.reset(new EsParserAudio( + pes_pid, static_cast(stream_type), + base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)), + base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)), + sbr_in_mimetype_)); + is_audio = true; + break; + default: + VLOG(1) << "Ignore unsupported stream type 0x" << std::hex << stream_type + << std::dec; + return; } // Create the PES state here. diff --git a/packager/media/formats/mp2t/pes_packet_generator.cc b/packager/media/formats/mp2t/pes_packet_generator.cc index f4fb3f4f04..643a38328f 100644 --- a/packager/media/formats/mp2t/pes_packet_generator.cc +++ b/packager/media/formats/mp2t/pes_packet_generator.cc @@ -25,7 +25,8 @@ namespace mp2t { namespace { const uint8_t kVideoStreamId = 0xE0; -const uint8_t kAudioStreamId = 0xC0; +const uint8_t kAacAudioStreamId = 0xC0; +const uint8_t kAc3AudioStreamId = 0xBD; // AC3 uses private stream 1 id. const double kTsTimescale = 90000.0; } // namespace @@ -51,14 +52,19 @@ bool PesPacketGenerator::Initialize(const StreamInfo& stream_info) { } else if (stream_type_ == kStreamAudio) { const AudioStreamInfo& audio_stream_info = static_cast(stream_info); - if (audio_stream_info.codec() != Codec::kCodecAAC) { - NOTIMPLEMENTED() << "Audio codec " << audio_stream_info.codec() - << " is not supported yet."; - return false; - } timescale_scale_ = kTsTimescale / audio_stream_info.time_scale(); - adts_converter_.reset(new AACAudioSpecificConfig()); - return adts_converter_->Parse(audio_stream_info.codec_config()); + if (audio_stream_info.codec() == Codec::kCodecAAC) { + audio_stream_id_ = kAacAudioStreamId; + adts_converter_.reset(new AACAudioSpecificConfig()); + return adts_converter_->Parse(audio_stream_info.codec_config()); + } else if (audio_stream_info.codec() == Codec::kCodecAC3) { + audio_stream_id_ = kAc3AudioStreamId; + // No converter needed for AC3. + return true; + } + NOTIMPLEMENTED() << "Audio codec " << audio_stream_info.codec() + << " is not supported yet."; + return false; } NOTIMPLEMENTED() << "Stream type: " << stream_type_ << " not implemented."; @@ -91,22 +97,24 @@ bool PesPacketGenerator::PushSample(const MediaSample& sample) { return true; } DCHECK_EQ(stream_type_, kStreamAudio); - DCHECK(adts_converter_); - std::vector aac_frame(sample.data(), - sample.data() + sample.data_size()); + std::vector audio_frame(sample.data(), + sample.data() + sample.data_size()); - // TODO(rkuroiwa): ConvertToADTS() makes another copy of aac_frame internally. - // Optimize copying in this function, possibly by adding a method on - // AACAudioSpecificConfig that takes {pointer, length} pair and returns a - // vector that has the ADTS header. - if (!adts_converter_->ConvertToADTS(&aac_frame)) - return false; + // AAC is carried in ADTS. + if (adts_converter_) { + // TODO(rkuroiwa): ConvertToADTS() makes another copy of audio_frame + // internally. Optimize copying in this function, possibly by adding a + // method on AACAudioSpecificConfig that takes {pointer, length} pair and + // returns a vector that has the ADTS header. + if (!adts_converter_->ConvertToADTS(&audio_frame)) + return false; + } // TODO(rkuriowa): Put multiple samples in the PES packet to reduce # of PES // packets. - current_processing_pes_->mutable_data()->swap(aac_frame); - current_processing_pes_->set_stream_id(kAudioStreamId); + current_processing_pes_->mutable_data()->swap(audio_frame); + current_processing_pes_->set_stream_id(audio_stream_id_); pes_packets_.push_back(std::move(current_processing_pes_)); return true; } diff --git a/packager/media/formats/mp2t/pes_packet_generator.h b/packager/media/formats/mp2t/pes_packet_generator.h index 84b90f27e9..51a0ef2d11 100644 --- a/packager/media/formats/mp2t/pes_packet_generator.h +++ b/packager/media/formats/mp2t/pes_packet_generator.h @@ -73,6 +73,8 @@ class PesPacketGenerator { // This can be used to create a PES from multiple audio samples. std::unique_ptr current_processing_pes_; + // Audio stream id PES packet is codec dependent. + uint8_t audio_stream_id_ = 0; std::list> pes_packets_; DISALLOW_COPY_AND_ASSIGN(PesPacketGenerator); diff --git a/packager/media/formats/mp2t/program_map_table_writer.cc b/packager/media/formats/mp2t/program_map_table_writer.cc index 9c8d7af13b..c150241b4b 100644 --- a/packager/media/formats/mp2t/program_map_table_writer.cc +++ b/packager/media/formats/mp2t/program_map_table_writer.cc @@ -13,6 +13,7 @@ #include "packager/media/base/fourccs.h" #include "packager/media/codecs/aac_audio_specific_config.h" #include "packager/media/formats/mp2t/ts_packet_writer_util.h" +#include "packager/media/formats/mp2t/ts_stream_type.h" namespace shaka { namespace media { @@ -32,14 +33,6 @@ const int kNext= 0; const uint8_t kProgramNumber = 0x01; const uint8_t kProgramMapTableId = 0x02; -// Stream types. -// Clear. -const uint8_t kStreamTypeH264 = 0x1B; -const uint8_t kStreamTypeAdtsAac = 0x0F; -// Encrypted. -const uint8_t kStreamTypeEncryptedH264 = 0xDB; -const uint8_t kStreamTypeEncryptedAdtsAac = 0xCF; - // Table for CRC32/MPEG2. const uint32_t kCrcTable[] = { 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, @@ -137,60 +130,86 @@ void WritePrivateDataIndicatorDescriptor(FourCC fourcc, BufferWriter* output) { output->AppendInt(fourcc); } -bool WriteAacAudioSetupInformation(const uint8_t* aac_audio_specific_config, - size_t aac_audio_specific_config_size, - BufferWriter* audio_setup_information) { - AACAudioSpecificConfig config; - const bool result = config.Parse(std::vector( - aac_audio_specific_config, - aac_audio_specific_config + aac_audio_specific_config_size)); - if (!result) { - LOG(WARNING) << "Failed to parse config. Assuming AAC-LC."; - return false; - } +bool WriteAudioSetupInformation(Codec codec, + const uint8_t* audio_specific_config, + size_t audio_specific_config_size, + BufferWriter* audio_setup_information) { + uint32_t audio_type = FOURCC_NULL; + switch (codec) { + case kCodecAAC: { + AACAudioSpecificConfig config; + const bool result = config.Parse(std::vector( + audio_specific_config, + audio_specific_config + audio_specific_config_size)); - auto audio_object_type = config.GetAudioObjectType(); - switch (audio_object_type) { - case AACAudioSpecificConfig::AOT_AAC_LC: - audio_setup_information->AppendInt(FOURCC_zaac); + AACAudioSpecificConfig::AudioObjectType audio_object_type; + if (!result) { + LOG(WARNING) << "Failed to parse config. Assuming AAC-LC."; + audio_object_type = AACAudioSpecificConfig::AOT_AAC_LC; + } else { + audio_object_type = config.GetAudioObjectType(); + } + + switch (audio_object_type) { + case AACAudioSpecificConfig::AOT_AAC_LC: + audio_type = FOURCC_zaac; + break; + case AACAudioSpecificConfig::AOT_SBR: + audio_type = FOURCC_zach; + break; + case AACAudioSpecificConfig::AOT_PS: + audio_type = FOURCC_zacp; + break; + default: + LOG(ERROR) << "Unknown object type for aac " << audio_object_type; + return false; + } + } break; + case kCodecAC3: + audio_type = FOURCC_zac3; break; - case AACAudioSpecificConfig::AOT_SBR: - audio_setup_information->AppendInt(FOURCC_zach); - break; - case AACAudioSpecificConfig::AOT_PS: - audio_setup_information->AppendInt(FOURCC_zacp); + case kCodecEAC3: + audio_type = FOURCC_zec3; break; default: - LOG(ERROR) << "Unknown object type for aac " << audio_object_type; + LOG(ERROR) << "Codec " << codec << " is not supported in encrypted TS."; return false; } + DCHECK_NE(audio_type, FOURCC_NULL); + audio_setup_information->AppendInt(audio_type); // Priming. Since no info from encoder, set it to 0x0000. audio_setup_information->AppendInt(static_cast(0x0000)); // Version is always 0x01. audio_setup_information->AppendInt(static_cast(0x01)); audio_setup_information->AppendInt( - static_cast(aac_audio_specific_config_size)); - audio_setup_information->AppendArray(aac_audio_specific_config, - aac_audio_specific_config_size); + static_cast(audio_specific_config_size)); + audio_setup_information->AppendArray(audio_specific_config, + audio_specific_config_size); return true; } -bool WriteRegistrationDescriptorForEncryptedAudio(const uint8_t* setup_data, +bool WriteRegistrationDescriptorForEncryptedAudio(Codec codec, + const uint8_t* setup_data, size_t setup_data_size, BufferWriter* output) { const uint8_t kRegistrationDescriptor = 5; BufferWriter audio_setup_information; - if (!WriteAacAudioSetupInformation(setup_data, setup_data_size, - &audio_setup_information)) { + if (!WriteAudioSetupInformation(codec, setup_data, setup_data_size, + &audio_setup_information)) { + return false; + } + + const size_t registration_descriptor_size = + audio_setup_information.Size() + sizeof(FOURCC_apad); + if (registration_descriptor_size > std::numeric_limits::max()) { + LOG(ERROR) << "Audio setup data of size: " << setup_data_size + << " will not fit in the descriptor."; return false; } output->AppendInt(kRegistrationDescriptor); - // Length of the rest of this descriptor is size of audio_setup_information + - // 4 bytes (for 'apad'). - output->AppendInt(static_cast(audio_setup_information.Size() + - sizeof(FOURCC_apad))); + output->AppendInt(static_cast(registration_descriptor_size)); output->AppendInt(FOURCC_apad); output->AppendBuffer(audio_setup_information); return true; @@ -256,13 +275,19 @@ ProgramMapTableWriter::ProgramMapTableWriter(Codec codec) : codec_(codec) {} bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) { if (encrypted_pmt_.Size() == 0) { - uint8_t stream_type; + TsStreamType stream_type; switch (codec_) { case kCodecH264: - stream_type = kStreamTypeEncryptedH264; + stream_type = TsStreamType::kEncryptedAvc; break; case kCodecAAC: - stream_type = kStreamTypeEncryptedAdtsAac; + stream_type = TsStreamType::kEncryptedAdtsAac; + break; + case kCodecAC3: + stream_type = TsStreamType::kEncryptedAc3; + break; + case kCodecEAC3: + stream_type = TsStreamType::kEncryptedEac3; break; default: LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet."; @@ -274,8 +299,9 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) { return false; const bool has_clear_lead = clear_pmt_.Size() > 0; - WritePmtWithParameters(stream_type, has_clear_lead ? kVersion1 : kVersion0, - kCurrent, descriptors.Buffer(), descriptors.Size(), + WritePmtWithParameters(static_cast(stream_type), + has_clear_lead ? kVersion1 : kVersion0, kCurrent, + descriptors.Buffer(), descriptors.Size(), &encrypted_pmt_); DCHECK_NE(encrypted_pmt_.Size(), 0u); } @@ -286,21 +312,27 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) { bool ProgramMapTableWriter::ClearSegmentPmt(BufferWriter* writer) { if (clear_pmt_.Size() == 0) { - uint8_t stream_type; + TsStreamType stream_type; switch (codec_) { case kCodecH264: - stream_type = kStreamTypeH264; + stream_type = TsStreamType::kAvc; break; case kCodecAAC: - stream_type = kStreamTypeAdtsAac; + stream_type = TsStreamType::kAdtsAac; + break; + case kCodecAC3: + stream_type = TsStreamType::kAc3; + break; + case kCodecEAC3: + stream_type = TsStreamType::kEac3; break; default: LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet."; return false; } - WritePmtWithParameters(stream_type, kVersion0, kCurrent, nullptr, 0, - &clear_pmt_); + WritePmtWithParameters(static_cast(stream_type), kVersion0, + kCurrent, nullptr, 0, &clear_pmt_); DCHECK_NE(clear_pmt_.Size(), 0u); } WritePmtToBuffer(clear_pmt_.Buffer(), clear_pmt_.Size(), &continuity_counter_, @@ -341,23 +373,37 @@ bool AudioProgramMapTableWriter::WriteDescriptors( case kCodecAAC: fourcc = FOURCC_aacd; break; + case kCodecAC3: + fourcc = FOURCC_ac3d; + break; + case kCodecEAC3: + fourcc = FOURCC_ec3d; + break; default: LOG(ERROR) << "Codec " << codec() << " is not supported in TS yet."; return false; } WritePrivateDataIndicatorDescriptor(fourcc, descriptors); - // -12 because there are 12 bytes between 'descriptor_length' in - // registration_descriptor and 'setup_data_length' in audio_setup_information. - if (audio_specific_config_.size() > - std::numeric_limits::max() - 12U) { - LOG(ERROR) << "AACAudioSpecificConfig of size: " - << audio_specific_config_.size() - << " will not fit in the descriptor."; - return false; - } + // NOTE: There are two specifications of carrying AC-3 bit stream in MPEG-2 + // transport stream (ISO/IEC 13818-1): + // System A used by ATSC (TS 102 366 Digital Audio Compression Standard) + // stream_type: 0x81 + // system_id: 0xBD (private_stream_1) + // Requires Registration_descriptor, AC-3_audio_stream_descriptor. + // Optional ISO_639_language_code descriptor. + // System B used by DVB (TS 101 154 DVB specification for ... based on the + // MPEG-2 Transport Stream) + // stream_type: 0x06 (private data) + // stream_id: 0xBD (private_stream_1) + // Requires AC-3_descriptor (not the same as AC-3_audio_stream_descriptor + // in ATSC) + // Optional ISO_639_language_code descriptor. + // We follow "System A" but not strictly as we do not include Registration + // descriptor and AC-3_audio_stream_descriptor right now. + return WriteRegistrationDescriptorForEncryptedAudio( - audio_specific_config_.data(), audio_specific_config_.size(), + codec(), audio_specific_config_.data(), audio_specific_config_.size(), descriptors); } diff --git a/packager/media/formats/mp2t/ts_section_pes.cc b/packager/media/formats/mp2t/ts_section_pes.cc index d971b2cfbb..ac93dcf6a2 100644 --- a/packager/media/formats/mp2t/ts_section_pes.cc +++ b/packager/media/formats/mp2t/ts_section_pes.cc @@ -198,8 +198,12 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { pes_packet_length = static_cast(bit_reader.bits_available()) / 8; // Ignore the PES for unknown stream IDs. + // ATSC Standard A/52:2012 3. GENERIC IDENTIFICATION OF AN AC-3 STREAM. + // AC3/E-AC3 stream uses private stream id. + const int kPrivateStream1 = 0xBD; // See ITU H.222 Table 2-22 "Stream_id assignments" - bool is_audio_stream_id = ((stream_id & 0xe0) == 0xc0); + bool is_audio_stream_id = + ((stream_id & 0xe0) == 0xc0) || stream_id == kPrivateStream1; bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0); if (!is_audio_stream_id && !is_video_stream_id) return true; diff --git a/packager/media/formats/mp2t/ts_stream_type.h b/packager/media/formats/mp2t/ts_stream_type.h new file mode 100644 index 0000000000..10ebffc1bb --- /dev/null +++ b/packager/media/formats/mp2t/ts_stream_type.h @@ -0,0 +1,35 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_ +#define PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_ + +#include + +namespace shaka { +namespace media { +namespace mp2t { + +enum class TsStreamType { + // ISO-13818.1 / ITU H.222 Table 2-34 "Stream type assignments" + kAdtsAac = 0x0F, + kAvc = 0x1B, + kHevc = 0x24, + // ATSC Standard A/52. + kAc3 = 0x81, + kEac3 = 0x87, + // Encrypted: https://goo.gl/N7Tvqi. + kEncryptedAc3 = 0xC1, + kEncryptedEac3 = 0xC2, + kEncryptedAdtsAac = 0xCF, + kEncryptedAvc = 0xDB, +}; + +} // namespace mp2t +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_