Add AC3 support in TS

EC3 and encrypted AC3 in TS are not handled yet. Partially address #165 Change-Id: If4839ee7801eac902e64e9c677dd37709ec8e88b
2017-10-22 22:38:55 -07:00 · 2017-10-22 22:38:55 -07:00 · d7f531fe10
parent ad836a5cf1
commit d7f531fe10
13 changed files with 504 additions and 117 deletions
--- a/packager/media/base/fourccs.h
+++ b/packager/media/base/fourccs.h
@ -20,6 +20,7 @@ enum FourCC : uint32_t {
  FOURCC_aacd = 0x61616364,
  FOURCC_ac_3 = 0x61632d33,  // "ac-3"
  FOURCC_ac3d = 0x61633364,
  FOURCC_apad = 0x61706164,
  FOURCC_avc1 = 0x61766331,
  FOURCC_avc3 = 0x61766333,
@ -50,6 +51,7 @@ enum FourCC : uint32_t {
  FOURCC_dtsm = 0x6474732d,  // "dts-"
  FOURCC_dtsp = 0x6474732b,  // "dts+"
  FOURCC_ec_3 = 0x65632d33,  // "ec-3"
  FOURCC_ec3d = 0x65633364,
  FOURCC_edts = 0x65647473,
  FOURCC_elst = 0x656c7374,
  FOURCC_enca = 0x656e6361,
@ -145,9 +147,11 @@ enum FourCC : uint32_t {
  FOURCC_wide = 0x77696465,
  FOURCC_wvtt = 0x77767474,
  FOURCC_zaac = 0x7A616163,
  FOURCC_zac3 = 0x7A616333,
  FOURCC_zach = 0x7A616368,
  FOURCC_zacp = 0x7A616370,
  FOURCC_zavc = 0x7A617663,
  FOURCC_zec3 = 0x7A656333,
 };
 const FourCC kAppleSampleAesProtectionScheme = FOURCC_cbca;
--- a/packager/media/formats/mp2t/ac3_header.cc
+++ b/packager/media/formats/mp2t/ac3_header.cc
@ -0,0 +1,135 @@
 // Copyright 2017 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #include "packager/media/formats/mp2t/ac3_header.h"
 #include "packager/media/base/bit_reader.h"
 #include "packager/media/base/bit_writer.h"
 #include "packager/media/formats/mp2t/mp2t_common.h"
 namespace shaka {
 namespace media {
 namespace mp2t {
 namespace {
 // ASTC Standard A/52:2012 Table 5.6 Sample Rate Codes.
 const uint32_t kAc3SampleRateTable[] = {48000, 44100, 32000};
 // ASTC Standard A/52:2012 Table 5.8 Audio Coding Mode.
 const uint8_t kAc3NumChannelsTable[] = {2, 1, 2, 3, 3, 4, 4, 5};
 // ATSC Standard A/52:2012 Table 5.18 Frame Size Code Table
 // (in words = 16 bits).
 const size_t kFrameSizeCodeTable[][3] = {
    {96, 69, 64},       {96, 70, 64},       {120, 87, 80},
    {120, 88, 80},      {144, 104, 96},     {144, 105, 96},
    {168, 121, 112},    {168, 122, 112},    {192, 139, 128},
    {192, 140, 128},    {240, 174, 160},    {240, 175, 160},
    {288, 208, 192},    {288, 209, 192},    {336, 243, 224},
    {336, 244, 224},    {384, 278, 256},    {384, 279, 256},
    {480, 348, 320},    {480, 349, 320},    {576, 417, 384},
    {576, 418, 384},    {672, 487, 448},    {672, 488, 448},
    {768, 557, 512},    {768, 558, 512},    {960, 696, 640},
    {960, 697, 640},    {1152, 835, 768},   {1152, 836, 768},
    {1344, 975, 896},   {1344, 976, 896},   {1536, 1114, 1024},
    {1536, 1115, 1024}, {1728, 1253, 1152}, {1728, 1254, 1152},
    {1920, 1393, 1280}, {1920, 1394, 1280},
 };
 }  // namespace
 bool Ac3Header::IsSyncWord(const uint8_t* buf) const {
  DCHECK(buf);
  return buf[0] == 0x0B && buf[1] == 0x77;
 }
 size_t Ac3Header::GetMinFrameSize() const {
  // Arbitrary. Actual frame size starts with 96 words.
  const size_t kMinAc3FrameSize = 10u;
  return kMinAc3FrameSize;
 }
 bool Ac3Header::Parse(const uint8_t* audio_frame, size_t audio_frame_size) {
  BitReader frame(audio_frame, audio_frame_size);
  // ASTC Standard A/52:2012 5. BIT STREAM SYNTAX.
  // syncinfo: synchronization information section.
  uint16_t syncword;
  RCHECK(frame.ReadBits(16, &syncword));
  RCHECK(syncword == 0x0B77);
  uint16_t crc1;
  RCHECK(frame.ReadBits(16, &crc1));
  RCHECK(frame.ReadBits(2, &fscod_));
  RCHECK(fscod_ < arraysize(kAc3SampleRateTable));
  RCHECK(frame.ReadBits(6, &frmsizecod_));
  RCHECK(frmsizecod_ < arraysize(kFrameSizeCodeTable));
  // bsi: bit stream information section.
  RCHECK(frame.ReadBits(5, &bsid_));
  RCHECK(frame.ReadBits(3, &bsmod_));
  RCHECK(frame.ReadBits(3, &acmod_));
  RCHECK(acmod_ < arraysize(kAc3NumChannelsTable));
  // If 3 front channels.
  if ((acmod_ & 0x01) && (acmod_ != 0x01))
    RCHECK(frame.SkipBits(2));  // cmixlev.
  // If a surround channel exists.
  if (acmod_ & 0x04)
    RCHECK(frame.SkipBits(2));  // surmixlev.
  // If in 2/0 mode.
  if (acmod_ == 0x02)
    RCHECK(frame.SkipBits(2));  // dsurmod.
  RCHECK(frame.ReadBits(1, &lfeon_));
  return true;
 }
 size_t Ac3Header::GetHeaderSize() const {
  // Unlike ADTS, for AC3, the whole frame is included in the media sample, so
  // return 0 header size.
  return 0;
 }
 size_t Ac3Header::GetFrameSize() const {
  DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable));
  DCHECK_LT(frmsizecod_, arraysize(kFrameSizeCodeTable));
  return kFrameSizeCodeTable[frmsizecod_][fscod_] * 2;
 }
 void Ac3Header::GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const {
  DCHECK(buffer);
  buffer->clear();
  BitWriter config(buffer);
  // Accoding to ETSI TS 102 366 V1.3.1 (2014-08) F.4 AC3SpecificBox.
  config.WriteBits(fscod_, 2);
  config.WriteBits(bsid_, 5);
  config.WriteBits(bsmod_, 3);
  config.WriteBits(acmod_, 3);
  config.WriteBits(lfeon_, 1);
  const uint8_t bit_rate_code = frmsizecod_ >> 1;
  config.WriteBits(bit_rate_code, 5);
  config.Flush();
 }
 uint8_t Ac3Header::GetObjectType() const {
  // Only useful for AAC. Return a dummy value instead.
  return 0;
 }
 uint32_t Ac3Header::GetSamplingFrequency() const {
  DCHECK_LT(fscod_, arraysize(kAc3SampleRateTable));
  return kAc3SampleRateTable[fscod_];
 }
 uint8_t Ac3Header::GetNumChannels() const {
  DCHECK_LT(acmod_, arraysize(kAc3NumChannelsTable));
  return kAc3NumChannelsTable[acmod_];
 }
 }  // namespace mp2t
 }  // namespace media
 }  // namespace shaka
--- a/packager/media/formats/mp2t/ac3_header.h
+++ b/packager/media/formats/mp2t/ac3_header.h
@ -0,0 +1,56 @@
 // Copyright 2017 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #ifndef PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_
 #define PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_
 #include <stdint.h>
 #include <vector>
 #include "packager/media/formats/mp2t/audio_header.h"
 namespace shaka {
 namespace media {
 namespace mp2t {
 /// Class which parses AC3 frame (header / metadata) and synthesizes
 /// AudioSpecificConfig from audio frame content.
 class Ac3Header : public AudioHeader {
 public:
  Ac3Header() = default;
  ~Ac3Header() override = default;
  /// @name AudioHeader implementation overrides.
  /// @{
  bool IsSyncWord(const uint8_t* buf) const override;
  size_t GetMinFrameSize() const override;
  bool Parse(const uint8_t* adts_frame, size_t adts_frame_size) override;
  size_t GetHeaderSize() const override;
  size_t GetFrameSize() const override;
  void GetAudioSpecificConfig(std::vector<uint8_t>* buffer) const override;
  uint8_t GetObjectType() const override;
  uint32_t GetSamplingFrequency() const override;
  uint8_t GetNumChannels() const override;
  /// @}
 private:
  Ac3Header(const Ac3Header&) = delete;
  Ac3Header& operator=(const Ac3Header&) = delete;
  uint8_t fscod_ = 0;       // Sample rate code
  uint8_t frmsizecod_ = 0;  // Frame size code
  uint8_t bsid_ = 0;        // Bit stream identification
  uint8_t bsmod_ = 0;       // Bit stream mode
  uint8_t acmod_ = 0;       // Audio coding mode
  uint8_t lfeon_ = 0;       // Low frequency effects channel on
 };
 }  // namespace mp2t
 }  // namespace media
 }  // namespace shaka
 #endif  // PACKAGER_MEDIA_FORMATS_MP2T_AC3_HEADER_H_
--- a/packager/media/formats/mp2t/ac3_header_unittest.cc
+++ b/packager/media/formats/mp2t/ac3_header_unittest.cc
@ -0,0 +1,87 @@
 // Copyright 2017 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #include <gtest/gtest.h>
 #include "packager/base/logging.h"
 #include "packager/base/strings/string_number_conversions.h"
 #include "packager/media/formats/mp2t/ac3_header.h"
 namespace {
 const char kValidPartialAc3Frame[] =
    "0B772770554043E106F575F080821010415C7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF"
    "9F3E7CF9F3EFF9D5F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3"
    "E7CF9F3E7CF9F3E7CF9F3E7CF9F3E3FE757CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F"
    "3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF9F3E7CF8CBFC4912248000000000F1B6DB"
    "6DB6DE3C78F1DDDDDDDC00000000000000000000000000EEEEEEF1E3C6DB6DB6DB7CF9AD6B"
    "5AD6B5AD6B5AD6B5AD6B5AD6B4000000078DB6DB6DB6F1E3C78EEEEEEEE000000000000000"
    "0000000000077777778F1E36DB6DB6DBE7CD6B5AD6B5AD6B5AD6B5AD6B5AD6B5A600000000"
    "0003C6DB6DB6DB78F1E3C77777777000000000000000000000000003BBBBBBC78F1B6DB6DB"
    "6DF3E6B5AD6B5AD6B5AD6B5AD6B5AD6B5AD00000001E36DB6DB6DBC78F1E3BBBBBBB800000"
    "000000000000000000001DDDDDDE3C78DB6DB6DB6F9F35AD6B5AD6B5AD6B5AD6B5AD6B5AD6"
    "9800000000000F1B6DB6DB6DE3C78F1DDD";
 }  // anonymous namespace
 namespace shaka {
 namespace media {
 namespace mp2t {
 class Ac3HeaderTest : public testing::Test {
 public:
  void SetUp() override {
    ASSERT_TRUE(base::HexStringToBytes(kValidPartialAc3Frame, &ac3_frame_));
  }
 protected:
  std::vector<uint8_t> ac3_frame_;
 };
 TEST_F(Ac3HeaderTest, ParseSuccess) {
  const size_t kExpectedFrameSize(836);
  const size_t kExpectedHeaderSize(0);
  const uint8_t kExpectedObjectType(0);
  const uint32_t kExpectedSamplingFrequency(44100);
  const uint8_t kExpectedNumChannels(2);
  const uint8_t kExpectedAudioSpecificConfig[] = {0x50, 0x11, 0x40};
  Ac3Header ac3_header;
  ASSERT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size()));
  EXPECT_EQ(kExpectedFrameSize, ac3_header.GetFrameSize());
  EXPECT_EQ(kExpectedHeaderSize, ac3_header.GetHeaderSize());
  EXPECT_EQ(kExpectedObjectType, ac3_header.GetObjectType());
  EXPECT_EQ(kExpectedSamplingFrequency, ac3_header.GetSamplingFrequency());
  EXPECT_EQ(kExpectedNumChannels, ac3_header.GetNumChannels());
  std::vector<uint8_t> audio_specific_config;
  ac3_header.GetAudioSpecificConfig(&audio_specific_config);
  EXPECT_EQ(arraysize(kExpectedAudioSpecificConfig),
            audio_specific_config.size());
  EXPECT_EQ(std::vector<uint8_t>(std::begin(kExpectedAudioSpecificConfig),
                                 std::end(kExpectedAudioSpecificConfig)),
            audio_specific_config);
 }
 TEST_F(Ac3HeaderTest, ParseVariousDataSize) {
  Ac3Header ac3_header;
  // Parse succeeds as long as the full metadata is provided.
  EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), ac3_frame_.size() - 1));
  const size_t frame_size = ac3_header.GetFrameSize();
  const size_t header_size = ac3_header.GetHeaderSize();
  EXPECT_TRUE(ac3_header.Parse(ac3_frame_.data(), 100));
  EXPECT_EQ(frame_size, ac3_header.GetFrameSize());
  EXPECT_EQ(header_size, ac3_header.GetHeaderSize());
  // Parse fails if there is not enough data (no full metadata).
  EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 1));
  EXPECT_FALSE(ac3_header.Parse(ac3_frame_.data(), 5));
 }
 }  // Namespace mp2t
 }  // namespace media
 }  // namespace shaka
--- a/packager/media/formats/mp2t/es_parser_audio.cc
+++ b/packager/media/formats/mp2t/es_parser_audio.cc
@ -15,8 +15,10 @@
 #include "packager/media/base/bit_reader.h"
 #include "packager/media/base/media_sample.h"
 #include "packager/media/base/timestamp.h"
 #include "packager/media/formats/mp2t/ac3_header.h"
 #include "packager/media/formats/mp2t/adts_header.h"
 #include "packager/media/formats/mp2t/mp2t_common.h"
 #include "packager/media/formats/mp2t/ts_stream_type.h"
 #include "packager/media/formats/mpeg/adts_constants.h"
 namespace shaka {
@ -78,14 +80,22 @@ static bool LookForSyncWord(const uint8_t* raw_es,
 }
 EsParserAudio::EsParserAudio(uint32_t pid,
                             TsStreamType stream_type,
                             const NewStreamInfoCB& new_stream_info_cb,
                             const EmitSampleCB& emit_sample_cb,
                             bool sbr_in_mimetype)
    : EsParser(pid),
-      audio_header_(new AdtsHeader),
+      stream_type_(stream_type),
      new_stream_info_cb_(new_stream_info_cb),
      emit_sample_cb_(emit_sample_cb),
-      sbr_in_mimetype_(sbr_in_mimetype) {}
+      sbr_in_mimetype_(sbr_in_mimetype) {
  if (stream_type == TsStreamType::kAc3) {
    audio_header_.reset(new Ac3Header);
  } else {
    DCHECK_EQ(stream_type, TsStreamType::kAdtsAac);
    audio_header_.reset(new AdtsHeader);
  }
 }
 EsParserAudio::~EsParserAudio() {}
@ -196,7 +206,8 @@ bool EsParserAudio::UpdateAudioConfiguration(const AudioHeader& audio_header) {
      sbr_in_mimetype_ ? std::min(2 * samples_per_second, 48000)
                       : samples_per_second;
-  const Codec codec = kCodecAAC;
+  const Codec codec =
      stream_type_ == TsStreamType::kAc3 ? kCodecAC3 : kCodecAAC;
  last_audio_decoder_config_ = std::make_shared<AudioStreamInfo>(
      pid(), kMpeg2Timescale, kInfiniteDuration, codec,
      AudioStreamInfo::GetCodecString(codec, audio_header.GetObjectType()),
--- a/packager/media/formats/mp2t/es_parser_audio.h
+++ b/packager/media/formats/mp2t/es_parser_audio.h
@ -14,6 +14,7 @@
 #include "packager/media/base/audio_stream_info.h"
 #include "packager/media/base/byte_queue.h"
 #include "packager/media/formats/mp2t/es_parser.h"
 #include "packager/media/formats/mp2t/ts_stream_type.h"
 namespace shaka {
 namespace media {
@ -27,6 +28,7 @@ class AudioHeader;
 class EsParserAudio : public EsParser {
 public:
  EsParserAudio(uint32_t pid,
                TsStreamType stream_type,
                const NewStreamInfoCB& new_stream_info_cb,
                const EmitSampleCB& emit_sample_cb,
                bool sbr_in_mimetype);
@ -52,6 +54,7 @@ class EsParserAudio : public EsParser {
  // Discard some bytes from the ES stream.
  void DiscardEs(int nbytes);
  const TsStreamType stream_type_;
  std::unique_ptr<AudioHeader> audio_header_;
  // Callbacks:
--- a/packager/media/formats/mp2t/mp2t.gyp
+++ b/packager/media/formats/mp2t/mp2t.gyp
@ -13,6 +13,8 @@
      'target_name': 'mp2t',
      'type': '<(component)',
      'sources': [
        'ac3_header.cc',
        'ac3_header.h',
        'adts_header.cc',
        'adts_header.h',
        'audio_header.h',
@ -51,6 +53,7 @@
        'ts_section_psi.h',
        'ts_segmenter.cc',
        'ts_segmenter.h',
        'ts_stream_type.h',
        'ts_writer.cc',
        'ts_writer.h',
      ],
@ -64,6 +67,7 @@
      'target_name': 'mp2t_unittest',
      'type': '<(gtest_target_type)',
      'sources': [
        'ac3_header_unittest.cc',
        'adts_header_unittest.cc',
        'es_parser_h264_unittest.cc',
        'es_parser_h26x_unittest.cc',
--- a/packager/media/formats/mp2t/mp2t_media_parser.cc
+++ b/packager/media/formats/mp2t/mp2t_media_parser.cc
@ -18,19 +18,12 @@
 #include "packager/media/formats/mp2t/ts_section_pat.h"
 #include "packager/media/formats/mp2t/ts_section_pes.h"
 #include "packager/media/formats/mp2t/ts_section_pmt.h"
 #include "packager/media/formats/mp2t/ts_stream_type.h"
 namespace shaka {
 namespace media {
 namespace mp2t {
 enum StreamType {
  // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments"
  kStreamTypeMpeg1Audio = 0x3,
  kStreamTypeAAC = 0xf,
  kStreamTypeAVC = 0x1b,
  kStreamTypeHEVC = 0x24,
 };
 class PidState {
 public:
  enum PidType {
@ -291,30 +284,29 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
  // Create a stream parser corresponding to the stream type.
  bool is_audio = false;
  std::unique_ptr<EsParser> es_parser;
-  if (stream_type == kStreamTypeAVC) {
+  switch (static_cast<TsStreamType>(stream_type)) {
-    es_parser.reset(
+    case TsStreamType::kAvc:
-        new EsParserH264(
+      es_parser.reset(new EsParserH264(
          pes_pid,
-            base::Bind(&Mp2tMediaParser::OnNewStreamInfo,
+          base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
-                       base::Unretained(this)),
+          base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this))));
-            base::Bind(&Mp2tMediaParser::OnEmitSample,
+      break;
-                       base::Unretained(this))));
+    case TsStreamType::kHevc:
-  } else if (stream_type == kStreamTypeHEVC) {
+      es_parser.reset(new EsParserH265(
    es_parser.reset(
        new EsParserH265(
          pes_pid,
-            base::Bind(&Mp2tMediaParser::OnNewStreamInfo,
+          base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
-                       base::Unretained(this)),
+          base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this))));
-            base::Bind(&Mp2tMediaParser::OnEmitSample,
+      break;
-                       base::Unretained(this))));
+    case TsStreamType::kAdtsAac:
-  } else if (stream_type == kStreamTypeAAC) {
+    case TsStreamType::kAc3:
      es_parser.reset(new EsParserAudio(
-        pes_pid,
+          pes_pid, static_cast<TsStreamType>(stream_type),
          base::Bind(&Mp2tMediaParser::OnNewStreamInfo, base::Unretained(this)),
          base::Bind(&Mp2tMediaParser::OnEmitSample, base::Unretained(this)),
          sbr_in_mimetype_));
      is_audio = true;
-  } else {
+      break;
    default:
      VLOG(1) << "Ignore unsupported stream type 0x" << std::hex << stream_type
              << std::dec;
      return;
--- a/packager/media/formats/mp2t/pes_packet_generator.cc
+++ b/packager/media/formats/mp2t/pes_packet_generator.cc
@ -25,7 +25,8 @@ namespace mp2t {
 namespace {
 const uint8_t kVideoStreamId = 0xE0;
-const uint8_t kAudioStreamId = 0xC0;
+const uint8_t kAacAudioStreamId = 0xC0;
 const uint8_t kAc3AudioStreamId = 0xBD;  // AC3 uses private stream 1 id.
 const double kTsTimescale = 90000.0;
 }  // namespace
@ -51,15 +52,20 @@ bool PesPacketGenerator::Initialize(const StreamInfo& stream_info) {
  } else if (stream_type_ == kStreamAudio) {
    const AudioStreamInfo& audio_stream_info =
        static_cast<const AudioStreamInfo&>(stream_info);
-    if (audio_stream_info.codec() != Codec::kCodecAAC) {
+    timescale_scale_ = kTsTimescale / audio_stream_info.time_scale();
    if (audio_stream_info.codec() == Codec::kCodecAAC) {
      audio_stream_id_ = kAacAudioStreamId;
      adts_converter_.reset(new AACAudioSpecificConfig());
      return adts_converter_->Parse(audio_stream_info.codec_config());
    } else if (audio_stream_info.codec() == Codec::kCodecAC3) {
      audio_stream_id_ = kAc3AudioStreamId;
      // No converter needed for AC3.
      return true;
    }
    NOTIMPLEMENTED() << "Audio codec " << audio_stream_info.codec()
                     << " is not supported yet.";
    return false;
  }
    timescale_scale_ = kTsTimescale / audio_stream_info.time_scale();
    adts_converter_.reset(new AACAudioSpecificConfig());
    return adts_converter_->Parse(audio_stream_info.codec_config());
  }
  NOTIMPLEMENTED() << "Stream type: " << stream_type_ << " not implemented.";
  return false;
@ -91,22 +97,24 @@ bool PesPacketGenerator::PushSample(const MediaSample& sample) {
    return true;
  }
  DCHECK_EQ(stream_type_, kStreamAudio);
  DCHECK(adts_converter_);
-  std::vector<uint8_t> aac_frame(sample.data(),
+  std::vector<uint8_t> audio_frame(sample.data(),
                                   sample.data() + sample.data_size());
-  // TODO(rkuroiwa): ConvertToADTS() makes another copy of aac_frame internally.
+  // AAC is carried in ADTS.
-  // Optimize copying in this function, possibly by adding a method on
+  if (adts_converter_) {
-  // AACAudioSpecificConfig that takes {pointer, length} pair and returns a
+    // TODO(rkuroiwa): ConvertToADTS() makes another copy of audio_frame
-  // vector that has the ADTS header.
+    // internally. Optimize copying in this function, possibly by adding a
-  if (!adts_converter_->ConvertToADTS(&aac_frame))
+    // method on AACAudioSpecificConfig that takes {pointer, length} pair and
    // returns a vector that has the ADTS header.
    if (!adts_converter_->ConvertToADTS(&audio_frame))
      return false;
  }
  // TODO(rkuriowa): Put multiple samples in the PES packet to reduce # of PES
  // packets.
-  current_processing_pes_->mutable_data()->swap(aac_frame);
+  current_processing_pes_->mutable_data()->swap(audio_frame);
-  current_processing_pes_->set_stream_id(kAudioStreamId);
+  current_processing_pes_->set_stream_id(audio_stream_id_);
  pes_packets_.push_back(std::move(current_processing_pes_));
  return true;
 }
--- a/packager/media/formats/mp2t/pes_packet_generator.h
+++ b/packager/media/formats/mp2t/pes_packet_generator.h
@ -73,6 +73,8 @@ class PesPacketGenerator {
  // This can be used to create a PES from multiple audio samples.
  std::unique_ptr<PesPacket> current_processing_pes_;
  // Audio stream id PES packet is codec dependent.
  uint8_t audio_stream_id_ = 0;
  std::list<std::unique_ptr<PesPacket>> pes_packets_;
  DISALLOW_COPY_AND_ASSIGN(PesPacketGenerator);
--- a/packager/media/formats/mp2t/program_map_table_writer.cc
+++ b/packager/media/formats/mp2t/program_map_table_writer.cc
@ -13,6 +13,7 @@
 #include "packager/media/base/fourccs.h"
 #include "packager/media/codecs/aac_audio_specific_config.h"
 #include "packager/media/formats/mp2t/ts_packet_writer_util.h"
 #include "packager/media/formats/mp2t/ts_stream_type.h"
 namespace shaka {
 namespace media {
@ -32,14 +33,6 @@ const int kNext= 0;
 const uint8_t kProgramNumber = 0x01;
 const uint8_t kProgramMapTableId = 0x02;
 // Stream types.
 // Clear.
 const uint8_t kStreamTypeH264 = 0x1B;
 const uint8_t kStreamTypeAdtsAac = 0x0F;
 // Encrypted.
 const uint8_t kStreamTypeEncryptedH264 = 0xDB;
 const uint8_t kStreamTypeEncryptedAdtsAac = 0xCF;
 // Table for CRC32/MPEG2.
 const uint32_t kCrcTable[] = {
    0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
@ -137,60 +130,86 @@ void WritePrivateDataIndicatorDescriptor(FourCC fourcc, BufferWriter* output) {
  output->AppendInt(fourcc);
 }
-bool WriteAacAudioSetupInformation(const uint8_t* aac_audio_specific_config,
+bool WriteAudioSetupInformation(Codec codec,
-                                   size_t aac_audio_specific_config_size,
+                                const uint8_t* audio_specific_config,
                                size_t audio_specific_config_size,
                                BufferWriter* audio_setup_information) {
  uint32_t audio_type = FOURCC_NULL;
  switch (codec) {
    case kCodecAAC: {
      AACAudioSpecificConfig config;
      const bool result = config.Parse(std::vector<uint8_t>(
-      aac_audio_specific_config,
+          audio_specific_config,
-      aac_audio_specific_config + aac_audio_specific_config_size));
+          audio_specific_config + audio_specific_config_size));
      AACAudioSpecificConfig::AudioObjectType audio_object_type;
      if (!result) {
        LOG(WARNING) << "Failed to parse config. Assuming AAC-LC.";
-    return false;
+        audio_object_type = AACAudioSpecificConfig::AOT_AAC_LC;
      } else {
        audio_object_type = config.GetAudioObjectType();
      }
  auto audio_object_type = config.GetAudioObjectType();
      switch (audio_object_type) {
        case AACAudioSpecificConfig::AOT_AAC_LC:
-      audio_setup_information->AppendInt(FOURCC_zaac);
+          audio_type = FOURCC_zaac;
          break;
        case AACAudioSpecificConfig::AOT_SBR:
-      audio_setup_information->AppendInt(FOURCC_zach);
+          audio_type = FOURCC_zach;
          break;
        case AACAudioSpecificConfig::AOT_PS:
-      audio_setup_information->AppendInt(FOURCC_zacp);
+          audio_type = FOURCC_zacp;
          break;
        default:
          LOG(ERROR) << "Unknown object type for aac " << audio_object_type;
          return false;
      }
    } break;
    case kCodecAC3:
      audio_type = FOURCC_zac3;
      break;
    case kCodecEAC3:
      audio_type = FOURCC_zec3;
      break;
    default:
      LOG(ERROR) << "Codec " << codec << " is not supported in encrypted TS.";
      return false;
  }
  DCHECK_NE(audio_type, FOURCC_NULL);
  audio_setup_information->AppendInt(audio_type);
  // Priming. Since no info from encoder, set it to 0x0000.
  audio_setup_information->AppendInt(static_cast<uint16_t>(0x0000));
  // Version is always 0x01.
  audio_setup_information->AppendInt(static_cast<uint8_t>(0x01));
  audio_setup_information->AppendInt(
-      static_cast<uint8_t>(aac_audio_specific_config_size));
+      static_cast<uint8_t>(audio_specific_config_size));
-  audio_setup_information->AppendArray(aac_audio_specific_config,
+  audio_setup_information->AppendArray(audio_specific_config,
-                                       aac_audio_specific_config_size);
+                                       audio_specific_config_size);
  return true;
 }
-bool WriteRegistrationDescriptorForEncryptedAudio(const uint8_t* setup_data,
+bool WriteRegistrationDescriptorForEncryptedAudio(Codec codec,
                                                  const uint8_t* setup_data,
                                                  size_t setup_data_size,
                                                  BufferWriter* output) {
  const uint8_t kRegistrationDescriptor = 5;
  BufferWriter audio_setup_information;
-  if (!WriteAacAudioSetupInformation(setup_data, setup_data_size,
+  if (!WriteAudioSetupInformation(codec, setup_data, setup_data_size,
                                  &audio_setup_information)) {
    return false;
  }
  const size_t registration_descriptor_size =
      audio_setup_information.Size() + sizeof(FOURCC_apad);
  if (registration_descriptor_size > std::numeric_limits<uint8_t>::max()) {
    LOG(ERROR) << "Audio setup data of size: " << setup_data_size
               << " will not fit in the descriptor.";
    return false;
  }
  output->AppendInt(kRegistrationDescriptor);
-  // Length of the rest of this descriptor is size of audio_setup_information +
+  output->AppendInt(static_cast<uint8_t>(registration_descriptor_size));
  // 4 bytes (for 'apad').
  output->AppendInt(static_cast<uint8_t>(audio_setup_information.Size() +
                                         sizeof(FOURCC_apad)));
  output->AppendInt(FOURCC_apad);
  output->AppendBuffer(audio_setup_information);
  return true;
@ -256,13 +275,19 @@ ProgramMapTableWriter::ProgramMapTableWriter(Codec codec) : codec_(codec) {}
 bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
  if (encrypted_pmt_.Size() == 0) {
-    uint8_t stream_type;
+    TsStreamType stream_type;
    switch (codec_) {
      case kCodecH264:
-        stream_type = kStreamTypeEncryptedH264;
+        stream_type = TsStreamType::kEncryptedAvc;
        break;
      case kCodecAAC:
-        stream_type = kStreamTypeEncryptedAdtsAac;
+        stream_type = TsStreamType::kEncryptedAdtsAac;
        break;
      case kCodecAC3:
        stream_type = TsStreamType::kEncryptedAc3;
        break;
      case kCodecEAC3:
        stream_type = TsStreamType::kEncryptedEac3;
        break;
      default:
        LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet.";
@ -274,8 +299,9 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
      return false;
    const bool has_clear_lead = clear_pmt_.Size() > 0;
-    WritePmtWithParameters(stream_type, has_clear_lead ? kVersion1 : kVersion0,
+    WritePmtWithParameters(static_cast<uint8_t>(stream_type),
-                           kCurrent, descriptors.Buffer(), descriptors.Size(),
+                           has_clear_lead ? kVersion1 : kVersion0, kCurrent,
                           descriptors.Buffer(), descriptors.Size(),
                           &encrypted_pmt_);
    DCHECK_NE(encrypted_pmt_.Size(), 0u);
  }
@ -286,21 +312,27 @@ bool ProgramMapTableWriter::EncryptedSegmentPmt(BufferWriter* writer) {
 bool ProgramMapTableWriter::ClearSegmentPmt(BufferWriter* writer) {
  if (clear_pmt_.Size() == 0) {
-    uint8_t stream_type;
+    TsStreamType stream_type;
    switch (codec_) {
      case kCodecH264:
-        stream_type = kStreamTypeH264;
+        stream_type = TsStreamType::kAvc;
        break;
      case kCodecAAC:
-        stream_type = kStreamTypeAdtsAac;
+        stream_type = TsStreamType::kAdtsAac;
        break;
      case kCodecAC3:
        stream_type = TsStreamType::kAc3;
        break;
      case kCodecEAC3:
        stream_type = TsStreamType::kEac3;
        break;
      default:
        LOG(ERROR) << "Codec " << codec_ << " is not supported in TS yet.";
        return false;
    }
-    WritePmtWithParameters(stream_type, kVersion0, kCurrent, nullptr, 0,
+    WritePmtWithParameters(static_cast<uint8_t>(stream_type), kVersion0,
-                           &clear_pmt_);
+                           kCurrent, nullptr, 0, &clear_pmt_);
    DCHECK_NE(clear_pmt_.Size(), 0u);
  }
  WritePmtToBuffer(clear_pmt_.Buffer(), clear_pmt_.Size(), &continuity_counter_,
@ -341,23 +373,37 @@ bool AudioProgramMapTableWriter::WriteDescriptors(
    case kCodecAAC:
      fourcc = FOURCC_aacd;
      break;
    case kCodecAC3:
      fourcc = FOURCC_ac3d;
      break;
    case kCodecEAC3:
      fourcc = FOURCC_ec3d;
      break;
    default:
      LOG(ERROR) << "Codec " << codec() << " is not supported in TS yet.";
      return false;
  }
  WritePrivateDataIndicatorDescriptor(fourcc, descriptors);
-  // -12 because there are 12 bytes between 'descriptor_length' in
+  // NOTE: There are two specifications of carrying AC-3 bit stream in MPEG-2
-  // registration_descriptor and 'setup_data_length' in audio_setup_information.
+  // transport stream (ISO/IEC 13818-1):
-  if (audio_specific_config_.size() >
+  //   System A used by ATSC (TS 102 366 Digital Audio Compression Standard)
-      std::numeric_limits<uint8_t>::max() - 12U) {
+  //     stream_type: 0x81
-    LOG(ERROR) << "AACAudioSpecificConfig of size: "
+  //     system_id:   0xBD (private_stream_1)
-               << audio_specific_config_.size()
+  //     Requires Registration_descriptor, AC-3_audio_stream_descriptor.
-               << " will not fit in the descriptor.";
+  //     Optional ISO_639_language_code descriptor.
-    return false;
+  //   System B used by DVB (TS 101 154 DVB specification for ... based on the
-  }
+  //                         MPEG-2 Transport Stream)
  //     stream_type: 0x06 (private data)
  //     stream_id:   0xBD (private_stream_1)
  //     Requires AC-3_descriptor (not the same as AC-3_audio_stream_descriptor
  //     in ATSC)
  //     Optional ISO_639_language_code descriptor.
  // We follow "System A" but not strictly as we do not include Registration
  // descriptor and AC-3_audio_stream_descriptor right now.
  return WriteRegistrationDescriptorForEncryptedAudio(
-      audio_specific_config_.data(), audio_specific_config_.size(),
+      codec(), audio_specific_config_.data(), audio_specific_config_.size(),
      descriptors);
 }
--- a/packager/media/formats/mp2t/ts_section_pes.cc
+++ b/packager/media/formats/mp2t/ts_section_pes.cc
@ -198,8 +198,12 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) {
    pes_packet_length = static_cast<int>(bit_reader.bits_available()) / 8;
  // Ignore the PES for unknown stream IDs.
  // ATSC Standard A/52:2012 3. GENERIC IDENTIFICATION OF AN AC-3 STREAM.
  // AC3/E-AC3 stream uses private stream id.
  const int kPrivateStream1 = 0xBD;
  // See ITU H.222 Table 2-22 "Stream_id assignments"
-  bool is_audio_stream_id = ((stream_id & 0xe0) == 0xc0);
+  bool is_audio_stream_id =
      ((stream_id & 0xe0) == 0xc0) || stream_id == kPrivateStream1;
  bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0);
  if (!is_audio_stream_id && !is_video_stream_id)
    return true;
--- a/packager/media/formats/mp2t/ts_stream_type.h
+++ b/packager/media/formats/mp2t/ts_stream_type.h
@ -0,0 +1,35 @@
 // Copyright 2017 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #ifndef PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_
 #define PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_
 #include <stdint.h>
 namespace shaka {
 namespace media {
 namespace mp2t {
 enum class TsStreamType {
  // ISO-13818.1 / ITU H.222 Table 2-34 "Stream type assignments"
  kAdtsAac = 0x0F,
  kAvc = 0x1B,
  kHevc = 0x24,
  // ATSC Standard A/52.
  kAc3 = 0x81,
  kEac3 = 0x87,
  // Encrypted: https://goo.gl/N7Tvqi.
  kEncryptedAc3 = 0xC1,
  kEncryptedEac3 = 0xC2,
  kEncryptedAdtsAac = 0xCF,
  kEncryptedAvc = 0xDB,
 };
 }  // namespace mp2t
 }  // namespace media
 }  // namespace shaka
 #endif  // PACKAGER_MEDIA_FORMATS_MP2T_TS_STREAM_TYPE_H_