Add AC3 support in TS

EC3 and encrypted AC3 in TS are not handled yet.

Partially address #165

Change-Id: If4839ee7801eac902e64e9c677dd37709ec8e88b
This commit is contained in:
KongQun Yang 2017-10-22 22:38:55 -07:00
parent ad836a5cf1
commit d7f531fe10
13 changed files with 504 additions and 117 deletions

View File

@ -20,6 +20,7 @@ enum FourCC : uint32_t {
FOURCC_aacd = 0x61616364, FOURCC_aacd = 0x61616364,
FOURCC_ac_3 = 0x61632d33, // "ac-3" FOURCC_ac_3 = 0x61632d33, // "ac-3"
FOURCC_ac3d = 0x61633364,
FOURCC_apad = 0x61706164, FOURCC_apad = 0x61706164,
FOURCC_avc1 = 0x61766331, FOURCC_avc1 = 0x61766331,
FOURCC_avc3 = 0x61766333, FOURCC_avc3 = 0x61766333,
@ -50,6 +51,7 @@ enum FourCC : uint32_t {
FOURCC_dtsm = 0x6474732d, // "dts-" FOURCC_dtsm = 0x6474732d, // "dts-"
FOURCC_dtsp = 0x6474732b, // "dts+" FOURCC_dtsp = 0x6474732b, // "dts+"
FOURCC_ec_3 = 0x65632d33, // "ec-3" FOURCC_ec_3 = 0x65632d33, // "ec-3"
FOURCC_ec3d = 0x65633364,
FOURCC_edts = 0x65647473, FOURCC_edts = 0x65647473,
FOURCC_elst = 0x656c7374, FOURCC_elst = 0x656c7374,
FOURCC_enca = 0x656e6361, FOURCC_enca = 0x656e6361,
@ -145,9 +147,11 @@ enum FourCC : uint32_t {
FOURCC_wide = 0x77696465, FOURCC_wide = 0x77696465,
FOURCC_wvtt = 0x77767474, FOURCC_wvtt = 0x77767474,
FOURCC_zaac = 0x7A616163, FOURCC_zaac = 0x7A616163,
FOURCC_zac3 = 0x7A616333,
FOURCC_zach = 0x7A616368, FOURCC_zach = 0x7A616368,
FOURCC_zacp = 0x7A616370, FOURCC_zacp = 0x7A616370,
FOURCC_zavc = 0x7A617663, FOURCC_zavc = 0x7A617663,
FOURCC_zec3 = 0x7A656333,
}; };
const FourCC kAppleSampleAesProtectionScheme = FOURCC_cbca; const FourCC kAppleSampleAesProtectionScheme = FOURCC_cbca;

View File

@ -0,0 +1,135 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/formats/mp2t/ac3_header.h"
#include "packager/media/base/bit_reader.h"
#include "packager/media/base/bit_writer.h"
#include "packager/media/formats/mp2t/mp2t_common.h"
namespace shaka {
namespace media {
namespace mp2t {
namespace {
// ASTC Standard A/52:2012 Table 5.6 Sample Rate Codes.
const uint32_t kAc3SampleRateTable[] = {48000, 44100, 32000};
// ASTC Standard A/52:2012 Table 5.8 Audio Coding Mode.
const uint8_t kAc3NumChannelsTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
// ATSC Standard A/52:2012 Table 5.18 Frame Size Code Table
// (in words = 16 bits).
const size_t kFrameSizeCodeTable[][3] = {
{96, 69, 64}, {96, 70, 64}, {120, 87, 80},
{120, 88, 80}, {144, 104, 96}, {144, 105, 96},
{168, 121, 112}, {168, 122, 112}, {192, 139, 128},
{192, 140, 128}, {240, 174, 160}, {240, 175, 160},
{288, 208, 192}, {288, 209, 192}, {336, 243, 224},
{336, 244, 224}, {384, 278, 256}, {384, 279, 256},
{480, 348, 320}, {480, 349, 320}, {576, 417, 384},
{576, 418, 384}, {672, 487, 448}, {672, 488, 448},
{768, 557, 512}, {768, 558, 512}, {960, 696, 640},
{960, 697, 640}, {1152, 835, 768}, {1152, 836, 768},
{1344, 975, 896}, {1344, 976, 896}, {1536, 1114, 1024},
{1536, 1115, 1024}, {1728, 1253, 1152}, {1728, 1254, 1152},
{1920, 1393, 1280}, {1920, 1394, 1280},
};
} // namespace
bool Ac3Header::IsSyncWord(const uint8_t* buf) const {
DCHECK(buf);
return buf[0] == 0x0B && buf[1] == 0x77;
}
size_t Ac3Header::GetMinFrameSize() const {
// Arbitrary. Actual frame size starts with 96 words.
const size_t kMinAc3FrameSize = 10u;
return kMinAc3FrameSize;
}
bool Ac3Header::Parse(const uint8_t* audio_frame, size_t audio_frame_size) {
BitReader frame(audio_frame, audio_frame_size);
// ASTC Standard A/52:2012 5. BIT STREAM SYNTAX.
// syncinfo: synchronization information section.
uint16_t syncword;
RCHECK(frame.ReadBits(16, &syncword));
RCHECK(syncword == 0x0B77);
uint16_t crc1;
RCHECK(frame.ReadBits(16, &crc1));
RCHECK(frame.ReadBits(2, &fscod_));
RCHECK(fscod_ < arraysize(kAc3SampleRateTable));
RCHECK(frame.ReadBits(6, &frmsizecod_));
RCHECK(frmsizecod_ < arraysize(kFrameSizeCodeTable));
// bsi: bit stream information section.
RCHECK(frame.ReadBits(5, &bsid_));
RCHECK(frame.ReadBits(3, &bsmod_));
RCHECK(frame.ReadBits(3, &acmod_));
RCHECK(acmod_ < arraysize(kAc3NumChannelsTable));
// If 3 front channels.
if ((acmod_ & 0x01) && (acmod_ != 0x01))
RCHECK(frame.SkipBits(2)); // cmixlev.
// If a surround channel exists.
if (acmod_ & 0x04)
RCHECK(frame.SkipBits(2)); // surmixlev.
// If in 2/0 mode.
if (acmod_ == 0x02)
RCHECK(frame.SkipBits(2)); // dsurmod.
RCHECK(frame.ReadBits(1, &lfeon_));
return true;
}
size_t Ac3Header::GetHeaderSize() const {
// Unlike ADTS, for AC3, the whole frame is included in the media sample, so
// return 0 header size.
return 0;
}
size_t Ac3Header::GetFrameSize() const {
DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable));
DCHECK_LT(frmsizecod_, arraysize(kFrameSizeCodeTable));
return kFrameSizeCodeTable[frmsizecod_][fscod_] * 2;
}
void Ac3Header::GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const {
DCHECK(buffer);
buffer->clear();
BitWriter config(buffer);
// Accoding to ETSI TS 102 366 V1.3.1 (2014-08) F.4 AC3SpecificBox.
config.WriteBits(fscod_, 2);
config.WriteBits(bsid_, 5);
config.WriteBits(bsmod_, 3);
config.WriteBits(acmod_, 3);
config.WriteBits(lfeon_, 1);
const uint8_t bit_rate_code = frmsizecod_ >> 1;
config.WriteBits(bit_rate_code, 5);
config.Flush();
}
uint8_t Ac3Header::GetObjectType() const {
// Only useful for AAC. Return a dummy value instead.
return 0;
}
uint32_t Ac3Header::GetSamplingFrequency() const {
DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable));
return kAc3SampleRateTable[fscod_];
}
uint8_t Ac3Header::GetNumChannels() const {
DCHECK_LT(acmod_, arraysize(kAc3NumChannelsTable));
return kAc3NumChannelsTable[acmod_];
}
} // namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -0,0 +1,56 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_
#define PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_
#include <stdint.h>
#include <vector>
#include "packager/media/formats/mp2t/audio_header.h"
namespace shaka {
namespace media {
namespace mp2t {
/// Class which parses AC3 frame (header / metadata) and synthesizes
/// AudioSpecificConfig from audio frame content.
class Ac3Header : public AudioHeader {
public:
Ac3Header() = default;
~Ac3Header() override = default;
/// @name AudioHeader implementation overrides.
/// @{
bool IsSyncWord(const uint8_t* buf) const override;
size_t GetMinFrameSize() const override;
bool Parse(const uint8_t* adts_frame, size_t adts_frame_size) override;
size_t GetHeaderSize() const override;
size_t GetFrameSize() const override;
void GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const override;
uint8_t GetObjectType() const override;
uint32_t GetSamplingFrequency() const override;
uint8_t GetNumChannels() const override;
/// @}
private:
Ac3Header(const Ac3Header&) = delete;
Ac3Header& operator=(const Ac3Header&) = delete;
uint8_t fscod_ = 0; // Sample rate code
uint8_t frmsizecod_ = 0; // Frame size code
uint8_t bsid_ = 0; // Bit stream identification
uint8_t bsmod_ = 0; // Bit stream mode
uint8_t acmod_ = 0; // Audio coding mode
uint8_t lfeon_ = 0; // Low frequency effects channel on
};
} // namespace mp2t
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_

View File

@ -0,0 +1,87 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <gtest/gtest.h>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/media/formats/mp2t/ac3_header.h"
namespace {
const char kValidPartialAc3Frame[] =
"0B772770554043E106F575F080821010415C7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF"
"9F3E7CF9F3EFF9D5F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3"
"E7CF9F3E7CF9F3E7CF9F3E7CF9F3E3FE757CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F"
"3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF8CBFC4912248000000000F1B6DB"
"6DB6DE3C78F1DDDDDDDC00000000000000000000000000EEEEEEF1E3C6DB6DB6DB7CF9AD6B"
"5AD6B5AD6B5AD6B5AD6B5AD6B4000000078DB6DB6DB6F1E3C78EEEEEEEE000000000000000"
"0000000000077777778F1E36DB6DB6DBE7CD6B5AD6B5AD6B5AD6B5AD6B5AD6B5A600000000"
"0003C6DB6DB6DB78F1E3C77777777000000000000000000000000003BBBBBBC78F1B6DB6DB"
"6DF3E6B5AD6B5AD6B5AD6B5AD6B5AD6B5AD00000001E36DB6DB6DBC78F1E3BBBBBBB800000"
"000000000000000000001DDDDDDE3C78DB6DB6DB6F9F35AD6B5AD6B5AD6B5AD6B5AD6B5AD6"
"9800000000000F1B6DB6DB6DE3C78F1DDD";
} // anonymous namespace
namespace shaka {
namespace media {
namespace mp2t {
class Ac3HeaderTest : public testing::Test {
public:
void SetUp() override {
ASSERT_TRUE(base::HexStringToBytes(kValidPartialAc3Frame, &ac3_frame_));
}
protected:
std::vector<uint8_t> ac3_frame_;
};
TEST_F(Ac3HeaderTest, ParseSuccess) {
const size_t kExpectedFrameSize(836);
const size_t kExpectedHeaderSize(0);
const uint8_t kExpectedObjectType(0);
const uint32_t kExpectedSamplingFrequency(44100);
const uint8_t kExpectedNumChannels(2);
const uint8_t kExpectedAudioSpecificConfig[] = {0x50, 0x11, 0x40};
Ac3Header ac3_header;
ASSERT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size()));
EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize());
EXPECT_EQ(kExpectedHeaderSize, ac3_header.GetHeaderSize());
EXPECT_EQ(kExpectedObjectType, ac3_header.GetObjectType());
EXPECT_EQ(kExpectedSamplingFrequency, ac3_header.GetSamplingFrequency());
EXPECT_EQ(kExpectedNumChannels, ac3_header.GetNumChannels());
std::vector<uint8_t> audio_specific_config;
ac3_header.GetAudioSpecificConfig(&audio_specific_config);
EXPECT_EQ(arraysize(kExpectedAudioSpecificConfig),
audio_specific_config.size());
EXPECT_EQ(std::vector<uint8_t>(std::begin(kExpectedAudioSpecificConfig),
std::end(kExpectedAudioSpecificConfig)),
audio_specific_config);
}
TEST_F(Ac3HeaderTest, ParseVariousDataSize) {
Ac3Header ac3_header;
// Parse succeeds as long as the full metadata is provided.
EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size() - 1));
const size_t frame_size = ac3_header.GetFrameSize();
const size_t header_size = ac3_header.GetHeaderSize();
EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), 100));
EXPECT_EQ(frame_size, ac3_header.GetFrameSize());
EXPECT_EQ(header_size, ac3_header.GetHeaderSize());
// Parse fails if there is not enough data (no full metadata).
EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 1));
EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 5));
}
} // Namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -15,8 +15,10 @@
#include "packager/media/base/bit_reader.h" #include "packager/media/base/bit_reader.h"
#include "packager/media/base/media_sample.h" #include "packager/media/base/media_sample.h"
#include "packager/media/base/timestamp.h" #include "packager/media/base/timestamp.h"
#include "packager/media/formats/mp2t/ac3_header.h"
#include "packager/media/formats/mp2t/adts_header.h" #include "packager/media/formats/mp2t/adts_header.h"
#include "packager/media/formats/mp2t/mp2t_common.h" #include "packager/media/formats/mp2t/mp2t_common.h"
#include "packager/media/formats/mp2t/ts_stream_type.h"
#include "packager/media/formats/mpeg/adts_constants.h" #include "packager/media/formats/mpeg/adts_constants.h"
namespace shaka { namespace shaka {
@ -78,14 +80,22 @@ static bool LookForSyncWord(const uint8_t* raw_es,
} }
EsParserAudio::EsParserAudio(uint32_t pid, EsParserAudio::EsParserAudio(uint32_t pid,
TsStreamType stream_type,
const NewStreamInfoCB& new_stream_info_cb, const NewStreamInfoCB& new_stream_info_cb,
const EmitSampleCB& emit_sample_cb, const EmitSampleCB& emit_sample_cb,
bool sbr_in_mimetype) bool sbr_in_mimetype)
: EsParser(pid), : EsParser(pid),
audio_header_(new AdtsHeader), stream_type_(stream_type),
new_stream_info_cb_(new_stream_info_cb), new_stream_info_cb_(new_stream_info_cb),
emit_sample_cb_(emit_sample_cb), emit_sample_cb_(emit_sample_cb),
sbr_in_mimetype_(sbr_in_mimetype) {} sbr_in_mimetype_(sbr_in_mimetype) {
if (stream_type == TsStreamType::kAc3) {
audio_header_.reset(new Ac3Header);
} else {
DCHECK_EQ(stream_type, TsStreamType::kAdtsAac);
audio_header_.reset(new AdtsHeader);
}
}
EsParserAudio::~EsParserAudio() {} EsParserAudio::~EsParserAudio() {}
@ -196,7 +206,8 @@ bool EsParserAudio::UpdateAudioConfiguration(const AudioHeader& audio_header) {
sbr_in_mimetype_ ? std::min(2 * samples_per_second, 48000) sbr_in_mimetype_ ? std::min(2 * samples_per_second, 48000)
: samples_per_second; : samples_per_second;
const Codec codec = kCodecAAC; const Codec codec =
stream_type_ == TsStreamType::kAc3 ? kCodecAC3 : kCodecAAC;
last_audio_decoder_config_ = std::make_shared<AudioStreamInfo>( last_audio_decoder_config_ = std::make_shared<AudioStreamInfo>(
pid(), kMpeg2Timescale, kInfiniteDuration, codec, pid(), kMpeg2Timescale, kInfiniteDuration, codec,
AudioStreamInfo::GetCodecString(codec, audio_header.GetObjectType()), AudioStreamInfo::GetCodecString(codec, audio_header.GetObjectType()),

View File

@ -14,6 +14,7 @@
#include "packager/media/base/audio_stream_info.h" #include "packager/media/base/audio_stream_info.h"
#include "packager/media/base/byte_queue.h" #include "packager/media/base/byte_queue.h"
#include "packager/media/formats/mp2t/es_parser.h" #include "packager/media/formats/mp2t/es_parser.h"
#include "packager/media/formats/mp2t/ts_stream_type.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -27,6 +28,7 @@ class AudioHeader;
class EsParserAudio : public EsParser { class EsParserAudio : public EsParser {
public: public:
EsParserAudio(uint32_t pid, EsParserAudio(uint32_t pid,
TsStreamType stream_type,
const NewStreamInfoCB& new_stream_info_cb, const NewStreamInfoCB& new_stream_info_cb,
const EmitSampleCB& emit_sample_cb, const EmitSampleCB& emit_sample_cb,
bool sbr_in_mimetype); bool sbr_in_mimetype);
@ -52,6 +54,7 @@ class EsParserAudio : public EsParser {
// Discard some bytes from the ES stream. // Discard some bytes from the ES stream.
void DiscardEs(int nbytes); void DiscardEs(int nbytes);
const TsStreamType stream_type_;
std::unique_ptr<AudioHeader> audio_header_; std::unique_ptr<AudioHeader> audio_header_;
// Callbacks: // Callbacks:

View File

@ -13,6 +13,8 @@
'target_name': 'mp2t', 'target_name': 'mp2t',
'type': '<(component)', 'type': '<(component)',
'sources': [ 'sources': [
'ac3_header.cc',
'ac3_header.h',
'adts_header.cc', 'adts_header.cc',
'adts_header.h', 'adts_header.h',
'audio_header.h', 'audio_header.h',
@ -51,6 +53,7 @@
'ts_section_psi.h', 'ts_section_psi.h',
'ts_segmenter.cc', 'ts_segmenter.cc',
'ts_segmenter.h', 'ts_segmenter.h',
'ts_stream_type.h',
'ts_writer.cc', 'ts_writer.cc',
'ts_writer.h', 'ts_writer.h',
], ],
@ -64,6 +67,7 @@
'target_name': 'mp2t_unittest', 'target_name': 'mp2t_unittest',
'type': '<(gtest_target_type)', 'type': '<(gtest_target_type)',
'sources': [ 'sources': [
'ac3_header_unittest.cc',
'adts_header_unittest.cc', 'adts_header_unittest.cc',
'es_parser_h264_unittest.cc', 'es_parser_h264_unittest.cc',
'es_parser_h26x_unittest.cc', 'es_parser_h26x_unittest.cc',

View File

@ -18,19 +18,12 @@
#include "packager/media/formats/mp2t/ts_section_pat.h" #include "packager/media/formats/mp2t/ts_section_pat.h"
#include "packager/media/formats/mp2t/ts_section_pes.h" #include "packager/media/formats/mp2t/ts_section_pes.h"
#include "packager/media/formats/mp2t/ts_section_pmt.h" #include "packager/media/formats/mp2t/ts_section_pmt.h"
#include "packager/media/formats/mp2t/ts_stream_type.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace mp2t { namespace mp2t {
enum StreamType {
// ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments"
kStreamTypeMpeg1Audio = 0x3,
kStreamTypeAAC = 0xf,
kStreamTypeAVC = 0x1b,
kStreamTypeHEVC = 0x24,
};
class PidState { class PidState {
public: public:
enum PidType { enum PidType {
@ -291,30 +284,29 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
// Create a stream parser corresponding to the stream type. // Create a stream parser corresponding to the stream type.
bool is_audio = false; bool is_audio = false;
std::unique_ptr<EsParser> es_parser; std::unique_ptr<EsParser> es_parser;
if (stream_type == kStreamTypeAVC) { switch (static_cast<TsStreamType>(stream_type)) {
es_parser.reset( case TsStreamType::kAvc:
new EsParserH264( es_parser.reset(new EsParserH264(
pes_pid, pes_pid,
base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
base::Unretained(this)), base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this))));
base::Bind(&Mp2tMediaParser::OnEmitSample, break;
base::Unretained(this)))); case TsStreamType::kHevc:
} else if (stream_type == kStreamTypeHEVC) { es_parser.reset(new EsParserH265(
es_parser.reset(
new EsParserH265(
pes_pid, pes_pid,
base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
base::Unretained(this)), base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this))));
base::Bind(&Mp2tMediaParser::OnEmitSample, break;
base::Unretained(this)))); case TsStreamType::kAdtsAac:
} else if (stream_type == kStreamTypeAAC) { case TsStreamType::kAc3:
es_parser.reset(new EsParserAudio( es_parser.reset(new EsParserAudio(
pes_pid, pes_pid, static_cast<TsStreamType>(stream_type),
base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)), base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)), base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)),
sbr_in_mimetype_)); sbr_in_mimetype_));
is_audio = true; is_audio = true;
} else { break;
default:
VLOG(1) << "Ignore unsupported stream type 0x" << std::hex << stream_type VLOG(1) << "Ignore unsupported stream type 0x" << std::hex << stream_type
<< std::dec; << std::dec;
return; return;

View File

@ -25,7 +25,8 @@ namespace mp2t {
namespace { namespace {
const uint8_t kVideoStreamId = 0xE0; const uint8_t kVideoStreamId = 0xE0;
const uint8_t kAudioStreamId = 0xC0; const uint8_t kAacAudioStreamId = 0xC0;
const uint8_t kAc3AudioStreamId = 0xBD; // AC3 uses private stream 1 id.
const double kTsTimescale = 90000.0; const double kTsTimescale = 90000.0;
} // namespace } // namespace
@ -51,15 +52,20 @@ bool PesPacketGenerator::Initialize(const StreamInfo& stream_info) {
} else if (stream_type_ == kStreamAudio) { } else if (stream_type_ == kStreamAudio) {
const AudioStreamInfo& audio_stream_info = const AudioStreamInfo& audio_stream_info =
static_cast<const AudioStreamInfo&>(stream_info); static_cast<const AudioStreamInfo&>(stream_info);
if (audio_stream_info.codec() != Codec::kCodecAAC) { timescale_scale_ = kTsTimescale / audio_stream_info.time_scale();
if (audio_stream_info.codec() == Codec::kCodecAAC) {
audio_stream_id_ = kAacAudioStreamId;
adts_converter_.reset(new AACAudioSpecificConfig());
return adts_converter_->Parse(audio_stream_info.codec_config());
} else if (audio_stream_info.codec() == Codec::kCodecAC3) {
audio_stream_id_ = kAc3AudioStreamId;
// No converter needed for AC3.
return true;
}
NOTIMPLEMENTED() << "Audio codec " << audio_stream_info.codec() NOTIMPLEMENTED() << "Audio codec " << audio_stream_info.codec()
<< " is not supported yet."; << " is not supported yet.";
return false; return false;
} }
timescale_scale_ = kTsTimescale / audio_stream_info.time_scale();
adts_converter_.reset(new AACAudioSpecificConfig());
return adts_converter_->Parse(audio_stream_info.codec_config());
}
NOTIMPLEMENTED() << "Stream type: " << stream_type_ << " not implemented."; NOTIMPLEMENTED() << "Stream type: " << stream_type_ << " not implemented.";
return false; return false;
@ -91,22 +97,24 @@ bool PesPacketGenerator::PushSample(const MediaSample& sample) {
return true; return true;
} }
DCHECK_EQ(stream_type_, kStreamAudio); DCHECK_EQ(stream_type_, kStreamAudio);
DCHECK(adts_converter_);
std::vector<uint8_t> aac_frame(sample.data(), std::vector<uint8_t> audio_frame(sample.data(),
sample.data() + sample.data_size()); sample.data() + sample.data_size());
// TODO(rkuroiwa): ConvertToADTS() makes another copy of aac_frame internally. // AAC is carried in ADTS.
// Optimize copying in this function, possibly by adding a method on if (adts_converter_) {
// AACAudioSpecificConfig that takes {pointer, length} pair and returns a // TODO(rkuroiwa): ConvertToADTS() makes another copy of audio_frame
// vector that has the ADTS header. // internally. Optimize copying in this function, possibly by adding a
if (!adts_converter_->ConvertToADTS(&aac_frame)) // method on AACAudioSpecificConfig that takes {pointer, length} pair and
// returns a vector that has the ADTS header.
if (!adts_converter_->ConvertToADTS(&audio_frame))
return false; return false;
}
// TODO(rkuriowa): Put multiple samples in the PES packet to reduce # of PES // TODO(rkuriowa): Put multiple samples in the PES packet to reduce # of PES
// packets. // packets.
current_processing_pes_->mutable_data()->swap(aac_frame); current_processing_pes_->mutable_data()->swap(audio_frame);
current_processing_pes_->set_stream_id(kAudioStreamId); current_processing_pes_->set_stream_id(audio_stream_id_);
pes_packets_.push_back(std::move(current_processing_pes_)); pes_packets_.push_back(std::move(current_processing_pes_));
return true; return true;
} }

View File

@ -73,6 +73,8 @@ class PesPacketGenerator {
// This can be used to create a PES from multiple audio samples. // This can be used to create a PES from multiple audio samples.
std::unique_ptr<PesPacket> current_processing_pes_; std::unique_ptr<PesPacket> current_processing_pes_;
// Audio stream id PES packet is codec dependent.
uint8_t audio_stream_id_ = 0;
std::list<std::unique_ptr<PesPacket>> pes_packets_; std::list<std::unique_ptr<PesPacket>> pes_packets_;
DISALLOW_COPY_AND_ASSIGN(PesPacketGenerator); DISALLOW_COPY_AND_ASSIGN(PesPacketGenerator);

View File

@ -13,6 +13,7 @@
#include "packager/media/base/fourccs.h" #include "packager/media/base/fourccs.h"
#include "packager/media/codecs/aac_audio_specific_config.h" #include "packager/media/codecs/aac_audio_specific_config.h"
#include "packager/media/formats/mp2t/ts_packet_writer_util.h" #include "packager/media/formats/mp2t/ts_packet_writer_util.h"
#include "packager/media/formats/mp2t/ts_stream_type.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -32,14 +33,6 @@ const int kNext= 0;
const uint8_t kProgramNumber = 0x01; const uint8_t kProgramNumber = 0x01;
const uint8_t kProgramMapTableId = 0x02; const uint8_t kProgramMapTableId = 0x02;
// Stream types.
// Clear.
const uint8_t kStreamTypeH264 = 0x1B;
const uint8_t kStreamTypeAdtsAac = 0x0F;
// Encrypted.
const uint8_t kStreamTypeEncryptedH264 = 0xDB;
const uint8_t kStreamTypeEncryptedAdtsAac = 0xCF;
// Table for CRC32/MPEG2. // Table for CRC32/MPEG2.
const uint32_t kCrcTable[] = { const uint32_t kCrcTable[] = {
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
@ -137,60 +130,86 @@ void WritePrivateDataIndicatorDescriptor(FourCC fourcc, BufferWriter* output) {
output->AppendInt(fourcc); output->AppendInt(fourcc);
} }
bool WriteAacAudioSetupInformation(const uint8_t* aac_audio_specific_config, bool WriteAudioSetupInformation(Codec codec,
size_t aac_audio_specific_config_size, const uint8_t* audio_specific_config,
size_t audio_specific_config_size,
BufferWriter* audio_setup_information) { BufferWriter* audio_setup_information) {
uint32_t audio_type = FOURCC_NULL;
switch (codec) {
case kCodecAAC: {
AACAudioSpecificConfig config; AACAudioSpecificConfig config;
const bool result = config.Parse(std::vector<uint8_t>( const bool result = config.Parse(std::vector<uint8_t>(
aac_audio_specific_config, audio_specific_config,
aac_audio_specific_config + aac_audio_specific_config_size)); audio_specific_config + audio_specific_config_size));
AACAudioSpecificConfig::AudioObjectType audio_object_type;
if (!result) { if (!result) {
LOG(WARNING) << "Failed to parse config. Assuming AAC-LC."; LOG(WARNING) << "Failed to parse config. Assuming AAC-LC.";
return false; audio_object_type = AACAudioSpecificConfig::AOT_AAC_LC;
} else {
audio_object_type = config.GetAudioObjectType();
} }
auto audio_object_type = config.GetAudioObjectType();
switch (audio_object_type) { switch (audio_object_type) {
case AACAudioSpecificConfig::AOT_AAC_LC: case AACAudioSpecificConfig::AOT_AAC_LC:
audio_setup_information->AppendInt(FOURCC_zaac); audio_type = FOURCC_zaac;
break; break;
case AACAudioSpecificConfig::AOT_SBR: case AACAudioSpecificConfig::AOT_SBR:
audio_setup_information->AppendInt(FOURCC_zach); audio_type = FOURCC_zach;
break; break;
case AACAudioSpecificConfig::AOT_PS: case AACAudioSpecificConfig::AOT_PS:
audio_setup_information->AppendInt(FOURCC_zacp); audio_type = FOURCC_zacp;
break; break;
default: default:
LOG(ERROR) << "Unknown object type for aac " << audio_object_type; LOG(ERROR) << "Unknown object type for aac " << audio_object_type;
return false; return false;
} }
} break;
case kCodecAC3:
audio_type = FOURCC_zac3;
break;
case kCodecEAC3:
audio_type = FOURCC_zec3;
break;
default:
LOG(ERROR) << "Codec " << codec << " is not supported in encrypted TS.";
return false;
}
DCHECK_NE(audio_type, FOURCC_NULL);
audio_setup_information->AppendInt(audio_type);
// Priming. Since no info from encoder, set it to 0x0000. // Priming. Since no info from encoder, set it to 0x0000.
audio_setup_information->AppendInt(static_cast<uint16_t>(0x0000)); audio_setup_information->AppendInt(static_cast<uint16_t>(0x0000));
// Version is always 0x01. // Version is always 0x01.
audio_setup_information->AppendInt(static_cast<uint8_t>(0x01)); audio_setup_information->AppendInt(static_cast<uint8_t>(0x01));
audio_setup_information->AppendInt( audio_setup_information->AppendInt(
static_cast<uint8_t>(aac_audio_specific_config_size)); static_cast<uint8_t>(audio_specific_config_size));
audio_setup_information->AppendArray(aac_audio_specific_config, audio_setup_information->AppendArray(audio_specific_config,
aac_audio_specific_config_size); audio_specific_config_size);
return true; return true;
} }
bool WriteRegistrationDescriptorForEncryptedAudio(const uint8_t* setup_data, bool WriteRegistrationDescriptorForEncryptedAudio(Codec codec,
const uint8_t* setup_data,
size_t setup_data_size, size_t setup_data_size,
BufferWriter* output) { BufferWriter* output) {
const uint8_t kRegistrationDescriptor = 5; const uint8_t kRegistrationDescriptor = 5;
BufferWriter audio_setup_information; BufferWriter audio_setup_information;
if (!WriteAacAudioSetupInformation(setup_data, setup_data_size, if (!WriteAudioSetupInformation(codec, setup_data, setup_data_size,
&audio_setup_information)) { &audio_setup_information)) {
return false; return false;
} }
const size_t registration_descriptor_size =
audio_setup_information.Size() + sizeof(FOURCC_apad);
if (registration_descriptor_size > std::numeric_limits<uint8_t>::max()) {
LOG(ERROR) << "Audio setup data of size: " << setup_data_size
<< " will not fit in the descriptor.";
return false;
}
output->AppendInt(kRegistrationDescriptor); output->AppendInt(kRegistrationDescriptor);
// Length of the rest of this descriptor is size of audio_setup_information + output->AppendInt(static_cast<uint8_t>(registration_descriptor_size));
// 4 bytes (for 'apad').
output->AppendInt(static_cast<uint8_t>(audio_setup_information.Size() +
sizeof(FOURCC_apad)));
output->AppendInt(FOURCC_apad); output->AppendInt(FOURCC_apad);
output->AppendBuffer(audio_setup_information); output->AppendBuffer(audio_setup_information);
return true; return true;
@ -256,13 +275,19 @@ ProgramMapTableWriter::ProgramMapTableWriter(Codec codec) : codec_(codec) {}
bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) { bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
if (encrypted_pmt_.Size() == 0) { if (encrypted_pmt_.Size() == 0) {
uint8_t stream_type; TsStreamType stream_type;
switch (codec_) { switch (codec_) {
case kCodecH264: case kCodecH264:
stream_type = kStreamTypeEncryptedH264; stream_type = TsStreamType::kEncryptedAvc;
break; break;
case kCodecAAC: case kCodecAAC:
stream_type = kStreamTypeEncryptedAdtsAac; stream_type = TsStreamType::kEncryptedAdtsAac;
break;
case kCodecAC3:
stream_type = TsStreamType::kEncryptedAc3;
break;
case kCodecEAC3:
stream_type = TsStreamType::kEncryptedEac3;
break; break;
default: default:
LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet."; LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet.";
@ -274,8 +299,9 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
return false; return false;
const bool has_clear_lead = clear_pmt_.Size() > 0; const bool has_clear_lead = clear_pmt_.Size() > 0;
WritePmtWithParameters(stream_type, has_clear_lead ? kVersion1 : kVersion0, WritePmtWithParameters(static_cast<uint8_t>(stream_type),
kCurrent, descriptors.Buffer(), descriptors.Size(), has_clear_lead ? kVersion1 : kVersion0, kCurrent,
descriptors.Buffer(), descriptors.Size(),
&encrypted_pmt_); &encrypted_pmt_);
DCHECK_NE(encrypted_pmt_.Size(), 0u); DCHECK_NE(encrypted_pmt_.Size(), 0u);
} }
@ -286,21 +312,27 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
bool ProgramMapTableWriter::ClearSegmentPmt(BufferWriter* writer) { bool ProgramMapTableWriter::ClearSegmentPmt(BufferWriter* writer) {
if (clear_pmt_.Size() == 0) { if (clear_pmt_.Size() == 0) {
uint8_t stream_type; TsStreamType stream_type;
switch (codec_) { switch (codec_) {
case kCodecH264: case kCodecH264:
stream_type = kStreamTypeH264; stream_type = TsStreamType::kAvc;
break; break;
case kCodecAAC: case kCodecAAC:
stream_type = kStreamTypeAdtsAac; stream_type = TsStreamType::kAdtsAac;
break;
case kCodecAC3:
stream_type = TsStreamType::kAc3;
break;
case kCodecEAC3:
stream_type = TsStreamType::kEac3;
break; break;
default: default:
LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet."; LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet.";
return false; return false;
} }
WritePmtWithParameters(stream_type, kVersion0, kCurrent, nullptr, 0, WritePmtWithParameters(static_cast<uint8_t>(stream_type), kVersion0,
&clear_pmt_); kCurrent, nullptr, 0, &clear_pmt_);
DCHECK_NE(clear_pmt_.Size(), 0u); DCHECK_NE(clear_pmt_.Size(), 0u);
} }
WritePmtToBuffer(clear_pmt_.Buffer(), clear_pmt_.Size(), &continuity_counter_, WritePmtToBuffer(clear_pmt_.Buffer(), clear_pmt_.Size(), &continuity_counter_,
@ -341,23 +373,37 @@ bool AudioProgramMapTableWriter::WriteDescriptors(
case kCodecAAC: case kCodecAAC:
fourcc = FOURCC_aacd; fourcc = FOURCC_aacd;
break; break;
case kCodecAC3:
fourcc = FOURCC_ac3d;
break;
case kCodecEAC3:
fourcc = FOURCC_ec3d;
break;
default: default:
LOG(ERROR) << "Codec " << codec() << " is not supported in TS yet."; LOG(ERROR) << "Codec " << codec() << " is not supported in TS yet.";
return false; return false;
} }
WritePrivateDataIndicatorDescriptor(fourcc, descriptors); WritePrivateDataIndicatorDescriptor(fourcc, descriptors);
// -12 because there are 12 bytes between 'descriptor_length' in // NOTE: There are two specifications of carrying AC-3 bit stream in MPEG-2
// registration_descriptor and 'setup_data_length' in audio_setup_information. // transport stream (ISO/IEC 13818-1):
if (audio_specific_config_.size() > // System A used by ATSC (TS 102 366 Digital Audio Compression Standard)
std::numeric_limits<uint8_t>::max() - 12U) { // stream_type: 0x81
LOG(ERROR) << "AACAudioSpecificConfig of size: " // system_id: 0xBD (private_stream_1)
<< audio_specific_config_.size() // Requires Registration_descriptor, AC-3_audio_stream_descriptor.
<< " will not fit in the descriptor."; // Optional ISO_639_language_code descriptor.
return false; // System B used by DVB (TS 101 154 DVB specification for ... based on the
} // MPEG-2 Transport Stream)
// stream_type: 0x06 (private data)
// stream_id: 0xBD (private_stream_1)
// Requires AC-3_descriptor (not the same as AC-3_audio_stream_descriptor
// in ATSC)
// Optional ISO_639_language_code descriptor.
// We follow "System A" but not strictly as we do not include Registration
// descriptor and AC-3_audio_stream_descriptor right now.
return WriteRegistrationDescriptorForEncryptedAudio( return WriteRegistrationDescriptorForEncryptedAudio(
audio_specific_config_.data(), audio_specific_config_.size(), codec(), audio_specific_config_.data(), audio_specific_config_.size(),
descriptors); descriptors);
} }

View File

@ -198,8 +198,12 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) {
pes_packet_length = static_cast<int>(bit_reader.bits_available()) / 8; pes_packet_length = static_cast<int>(bit_reader.bits_available()) / 8;
// Ignore the PES for unknown stream IDs. // Ignore the PES for unknown stream IDs.
// ATSC Standard A/52:2012 3. GENERIC IDENTIFICATION OF AN AC-3 STREAM.
// AC3/E-AC3 stream uses private stream id.
const int kPrivateStream1 = 0xBD;
// See ITU H.222 Table 2-22 "Stream_id assignments" // See ITU H.222 Table 2-22 "Stream_id assignments"
bool is_audio_stream_id = ((stream_id & 0xe0) == 0xc0); bool is_audio_stream_id =
((stream_id & 0xe0) == 0xc0) || stream_id == kPrivateStream1;
bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0); bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0);
if (!is_audio_stream_id && !is_video_stream_id) if (!is_audio_stream_id && !is_video_stream_id)
return true; return true;

View File

@ -0,0 +1,35 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_
#define PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_
#include <stdint.h>
namespace shaka {
namespace media {
namespace mp2t {
enum class TsStreamType {
// ISO-13818.1 / ITU H.222 Table 2-34 "Stream type assignments"
kAdtsAac = 0x0F,
kAvc = 0x1B,
kHevc = 0x24,
// ATSC Standard A/52.
kAc3 = 0x81,
kEac3 = 0x87,
// Encrypted: https://goo.gl/N7Tvqi.
kEncryptedAc3 = 0xC1,
kEncryptedEac3 = 0xC2,
kEncryptedAdtsAac = 0xCF,
kEncryptedAvc = 0xDB,
};
} // namespace mp2t
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_