Add cc_index to stream descriptor.

This also allows setting the language of different text streams from the same input. Multiple streams can use the same input stream using different cc_index values and can each use a different language. This also will try to pull the language from the input if not specified. Change-Id: I7078710b509b7d77dad8cb4299a82f954af7e9e7
2020-12-11 12:58:26 -08:00 · 2020-12-11 12:58:26 -08:00 · a0f3f2cd3a
parent 78be14c092
commit a0f3f2cd3a
18 changed files with 255 additions and 21 deletions
--- a/docs/source/options/stream_descriptors.rst
+++ b/docs/source/options/stream_descriptors.rst
@ -63,6 +63,13 @@ These are the available fields:
    sampling rate among key frames. If specified, the output is a trick play
    stream.

+:cc_index:
+
+    Optional value which specifies the index/ID of the subtitle stream to use
+    for formats where multiple exist within the same stream. For example,
+    CEA allows specifying up to 4 streams within a single video stream. If not
+    specified, all subtitles will be merged together.
+
 .. include:: /options/drm_stream_descriptors.rst
 .. include:: /options/dash_stream_descriptors.rst
 .. include:: /options/hls_stream_descriptors.rst
--- a/packager/app/stream_descriptor.cc
+++ b/packager/app/stream_descriptor.cc
@ -22,6 +22,7 @@ enum FieldType {
  kSegmentTemplateField,
  kBandwidthField,
  kLanguageField,
+  kCcIndexField,
  kOutputFormatField,
  kHlsNameField,
  kHlsGroupIdField,
@ -57,6 +58,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = {
    {"bitrate", kBandwidthField},
    {"language", kLanguageField},
    {"lang", kLanguageField},
+    {"cc_index", kCcIndexField},
    {"output_format", kOutputFormatField},
    {"format", kOutputFormatField},
    {"hls_name", kHlsNameField},
@ -133,6 +135,15 @@ base::Optional<StreamDescriptor> ParseStreamDescriptor(
        descriptor.language = iter->second;
        break;
      }
+      case kCcIndexField: {
+        unsigned index;
+        if (!base::StringToUint(iter->second, &index)) {
+          LOG(ERROR) << "Non-numeric cc_index specified.";
+          return base::nullopt;
+        }
+        descriptor.cc_index = index;
+        break;
+      }
      case kOutputFormatField: {
        descriptor.output_format = iter->second;
        break;
--- a/packager/media/base/bit_reader.h
+++ b/packager/media/base/bit_reader.h
@ -93,6 +93,9 @@ class BitReader {
  /// @return The current bit position.
  size_t bit_position() const { return 8 * initial_size_ - bits_available(); }

+  /// @return A pointer to the current byte.
+  const uint8_t* current_byte_ptr() const { return data_ - 1; }
+
 private:
  // Help function used by ReadBits to avoid inlining the bit reading logic.
  bool ReadBitsInternal(size_t num_bits, uint64_t* out);
--- a/packager/media/base/cc_stream_filter.cc
+++ b/packager/media/base/cc_stream_filter.cc
@ -0,0 +1,54 @@
+// Copyright 2020 Google LLC. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd
+
+#include "packager/media/base/cc_stream_filter.h"
+
+#include "packager/media/base/stream_info.h"
+#include "packager/media/base/text_stream_info.h"
+
+namespace shaka {
+namespace media {
+
+CcStreamFilter::CcStreamFilter(const std::string& language, uint16_t cc_index)
+    : language_(language), cc_index_(cc_index) {}
+
+Status CcStreamFilter::InitializeInternal() {
+  return Status::OK;
+}
+
+Status CcStreamFilter::Process(std::unique_ptr<StreamData> stream_data) {
+  if (stream_data->stream_data_type == StreamDataType::kTextSample) {
+    if (stream_data->text_sample->sub_stream_index() != -1 &&
+        stream_data->text_sample->sub_stream_index() != cc_index_) {
+      return Status::OK;
+    }
+  } else if (stream_data->stream_data_type == StreamDataType::kStreamInfo) {
+    if (stream_data->stream_info->stream_type() == kStreamText) {
+      // Overwrite the per-input-stream language with our per-output-stream
+      // language; this requires cloning the stream info as it is used by other
+      // output streams.
+      auto clone = stream_data->stream_info->Clone();
+      if (!language_.empty()) {
+        clone->set_language(language_);
+      } else {
+        // Try to find the language in the sub-stream info.
+        auto* text_info = static_cast<TextStreamInfo*>(clone.get());
+        auto it = text_info->sub_streams().find(cc_index_);
+        if (it != text_info->sub_streams().end()) {
+          clone->set_language(it->second.language);
+        }
+      }
+
+      stream_data = StreamData::FromStreamInfo(stream_data->stream_index,
+                                               std::move(clone));
+    }
+  }
+
+  return Dispatch(std::move(stream_data));
+}
+
+}  // namespace media
+}  // namespace shaka
--- a/packager/media/base/cc_stream_filter.h
+++ b/packager/media/base/cc_stream_filter.h
@ -0,0 +1,39 @@
+// Copyright 2020 Google LLC. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file or at
+// https://developers.google.com/open-source/licenses/bsd
+
+#ifndef PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_
+#define PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_
+
+#include <string>
+
+#include "packager/media/base/media_handler.h"
+#include "packager/media/base/text_sample.h"
+#include "packager/status.h"
+
+namespace shaka {
+namespace media {
+
+/// A media handler that filters out text samples based on the cc_index
+/// field.  Some text formats allow multiple "channels" per stream, so this
+/// filters out only one of them.
+class CcStreamFilter : public MediaHandler {
+ public:
+  CcStreamFilter(const std::string& language, uint16_t cc_index);
+  ~CcStreamFilter() override = default;
+
+ protected:
+  Status InitializeInternal() override;
+  Status Process(std::unique_ptr<StreamData> stream_data) override;
+
+ private:
+  const std::string language_;
+  const uint16_t cc_index_;
+};
+
+}  // namespace media
+}  // namespace shaka
+
+#endif  // PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_
--- a/packager/media/base/media_base.gyp
+++ b/packager/media/base/media_base.gyp
@ -35,6 +35,8 @@
        'buffer_writer.h',
        'byte_queue.cc',
        'byte_queue.h',
+        'cc_stream_filter.cc',
+        'cc_stream_filter.h',
        'closure_thread.cc',
        'closure_thread.h',
        'common_pssh_generator.cc',
--- a/packager/media/base/stream_info.cc
+++ b/packager/media/base/stream_info.cc
@ -10,6 +10,7 @@

 #include "packager/base/logging.h"
 #include "packager/base/strings/stringprintf.h"
+#include "packager/media/base/timestamp.h"

 namespace shaka {
 namespace media {
@ -56,12 +57,19 @@ StreamInfo::StreamInfo(StreamType stream_type,
 StreamInfo::~StreamInfo() {}

 std::string StreamInfo::ToString() const {
+  std::string duration;
+  if (duration_ == kInfiniteDuration) {
+    duration = "Infinite";
+  } else {
+    duration = base::StringPrintf("%" PRIu64 " (%.1f seconds)", duration_,
+                                  static_cast<double>(duration_) / time_scale_);
+  }
+
  return base::StringPrintf(
      "type: %s\n codec_string: %s\n time_scale: %d\n duration: "
-      "%" PRIu64 " (%.1f seconds)\n is_encrypted: %s\n",
-      (stream_type_ == kStreamAudio ? "Audio" : "Video"), codec_string_.c_str(),
-      time_scale_, duration_, static_cast<double>(duration_) / time_scale_,
-      is_encrypted_ ? "true" : "false");
+      "%s\n is_encrypted: %s\n",
+      StreamTypeToString(stream_type_).c_str(), codec_string_.c_str(),
+      time_scale_, duration.c_str(), is_encrypted_ ? "true" : "false");
 }

 }  // namespace media
--- a/packager/media/base/text_sample.h
+++ b/packager/media/base/text_sample.h
@ -127,6 +127,9 @@ class TextSample {
  const TextFragment& body() const { return body_; }
  int64_t EndTime() const;

+  int32_t sub_stream_index() const { return sub_stream_index_; }
+  void set_sub_stream_index(int32_t idx) { sub_stream_index_ = idx; }
+
 private:
  // Allow the compiler generated copy constructor and assignment operator
  // intentionally. Since the text data is typically small, the performance
@ -137,6 +140,7 @@ class TextSample {
  const int64_t duration_ = 0;
  const TextSettings settings_;
  const TextFragment body_;
+  int32_t sub_stream_index_ = -1;
 };

 }  // namespace media
--- a/packager/media/base/text_stream_info.cc
+++ b/packager/media/base/text_stream_info.cc
@ -6,6 +6,8 @@

 #include "packager/media/base/text_stream_info.h"

+#include "packager/base/strings/stringprintf.h"
+
 namespace shaka {
 namespace media {

@ -28,6 +30,18 @@ bool TextStreamInfo::IsValidConfig() const {
  return true;
 }

+std::string TextStreamInfo::ToString() const {
+  std::string ret = StreamInfo::ToString();
+  if (!sub_streams_.empty()) {
+    ret += " Sub Streams:";
+    for (auto& pair : sub_streams_) {
+      ret += base::StringPrintf("\n  ID: %u, Lang: %s", pair.first,
+                                pair.second.language.c_str());
+    }
+  }
+  return ret + "\n";
+}
+
 std::unique_ptr<StreamInfo> TextStreamInfo::Clone() const {
  return std::unique_ptr<StreamInfo>(new TextStreamInfo(*this));
 }
--- a/packager/media/base/text_stream_info.h
+++ b/packager/media/base/text_stream_info.h
@ -40,6 +40,12 @@ struct TextRegion {
  bool scroll = false;
 };

+/// Contains info about a sub-stream within a text stream.  Depending on the
+/// format, some info may not be available.  This info doesn't affect output.
+struct TextSubStreamInfo {
+  std::string language;
+};
+
 class TextStreamInfo : public StreamInfo {
 public:
  /// No encryption supported.
@ -64,6 +70,7 @@ class TextStreamInfo : public StreamInfo {

  bool IsValidConfig() const override;

+  std::string ToString() const override;
  std::unique_ptr<StreamInfo> Clone() const override;

  uint16_t width() const { return width_; }
@ -75,8 +82,16 @@ class TextStreamInfo : public StreamInfo {
  const std::string& css_styles() const { return css_styles_; }
  void set_css_styles(const std::string& styles) { css_styles_ = styles; }

+  void AddSubStream(uint16_t index, TextSubStreamInfo info) {
+    sub_streams_.emplace(index, std::move(info));
+  }
+  const std::map<uint16_t, TextSubStreamInfo>& sub_streams() const {
+    return sub_streams_;
+  }
+
 private:
  std::map<std::string, TextRegion> regions_;
+  std::map<uint16_t, TextSubStreamInfo> sub_streams_;
  std::string css_styles_;
  uint16_t width_;
  uint16_t height_;
--- a/packager/media/formats/mp2t/es_parser_dvb.cc
+++ b/packager/media/formats/mp2t/es_parser_dvb.cc
@ -15,12 +15,50 @@ namespace shaka {
 namespace media {
 namespace mp2t {

+namespace {
+
+bool ParseSubtitlingDescriptor(
+    const uint8_t* descriptor,
+    size_t size,
+    std::unordered_map<uint16_t, std::string>* langs) {
+  // See ETSI EN 300 468 Section 6.2.41.
+  BitReader reader(descriptor, size);
+  size_t data_size;
+  RCHECK(reader.SkipBits(8));  // descriptor_tag
+  RCHECK(reader.ReadBits(8, &data_size));
+  RCHECK(data_size + 2 <= size);
+  for (size_t i = 0; i < data_size; i += 8) {
+    uint32_t lang_code;
+    uint16_t page;
+    RCHECK(reader.ReadBits(24, &lang_code));
+    RCHECK(reader.SkipBits(8));  // subtitling_type
+    RCHECK(reader.ReadBits(16, &page));
+    RCHECK(reader.SkipBits(16));  // ancillary_page_id
+
+    // The lang code is a ISO 639-2 code coded in Latin-1.
+    std::string lang(3, '\0');
+    lang[0] = (lang_code >> 16) & 0xff;
+    lang[1] = (lang_code >> 8) & 0xff;
+    lang[2] = (lang_code >> 0) & 0xff;
+    langs->emplace(page, std::move(lang));
+  }
+  return true;
+}
+
+}  // namespace
+
 EsParserDvb::EsParserDvb(uint32_t pid,
                         const NewStreamInfoCB& new_stream_info_cb,
-                         const EmitTextSampleCB& emit_sample_cb)
+                         const EmitTextSampleCB& emit_sample_cb,
+                         const uint8_t* descriptor,
+                         size_t descriptor_length)
    : EsParser(pid),
      new_stream_info_cb_(new_stream_info_cb),
-      emit_sample_cb_(emit_sample_cb) {}
+      emit_sample_cb_(emit_sample_cb) {
+  if (!ParseSubtitlingDescriptor(descriptor, descriptor_length, &languages_)) {
+    LOG(WARNING) << "Error parsing subtitling descriptor";
+  }
+}

 EsParserDvb::~EsParserDvb() {}

@ -30,10 +68,14 @@ bool EsParserDvb::Parse(const uint8_t* buf,
                        int64_t dts) {
  if (!sent_info_) {
    sent_info_ = true;
-    std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
+    std::shared_ptr<TextStreamInfo> info = std::make_shared<TextStreamInfo>(
        pid(), kMpeg2Timescale, kInfiniteDuration, kCodecText,
        /* codec_string= */ "", /* codec_config= */ "", /* width= */ 0,
        /* height= */ 0, /* language= */ "");
+    for (const auto& pair : languages_) {
+      info->AddSubStream(pair.first, {pair.second});
+    }
+
    new_stream_info_cb_.Run(info);
  }

@ -47,8 +89,10 @@ bool EsParserDvb::Flush() {
    std::vector<std::shared_ptr<TextSample>> samples;
    RCHECK(pair.second.Flush(&samples));

-    for (auto sample : samples)
+    for (auto sample : samples) {
+      sample->set_sub_stream_index(pair.first);
      emit_sample_cb_.Run(sample);
+    }
  }
  return true;
 }
@ -81,8 +125,10 @@ bool EsParserDvb::ParseInternal(const uint8_t* data, size_t size, int64_t pts) {
    std::vector<std::shared_ptr<TextSample>> samples;
    RCHECK(parsers_[page_id].Parse(segment_type, pts, payload, segment_length,
                                   &samples));
-    for (auto sample : samples)
+    for (auto sample : samples) {
+      sample->set_sub_stream_index(page_id);
      emit_sample_cb_.Run(sample);
+    }

    RCHECK(reader.SkipBytes(segment_length));
  }
--- a/packager/media/formats/mp2t/es_parser_dvb.h
+++ b/packager/media/formats/mp2t/es_parser_dvb.h
@ -22,7 +22,9 @@ class EsParserDvb : public EsParser {
 public:
  EsParserDvb(uint32_t pid,
              const NewStreamInfoCB& new_stream_info_cb,
-              const EmitTextSampleCB& emit_sample_cb);
+              const EmitTextSampleCB& emit_sample_cb,
+              const uint8_t* descriptor,
+              size_t descriptor_length);
  ~EsParserDvb() override;

  // EsParser implementation.
@ -44,6 +46,8 @@ class EsParserDvb : public EsParser {

  // A map of page_id to parser.
  std::unordered_map<uint16_t, DvbSubParser> parsers_;
+  // A map of page_id to language.
+  std::unordered_map<uint16_t, std::string> languages_;
  bool sent_info_ = false;
 };

--- a/packager/media/formats/mp2t/mp2t_media_parser.cc
+++ b/packager/media/formats/mp2t/mp2t_media_parser.cc
@ -275,7 +275,9 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {

 void Mp2tMediaParser::RegisterPes(int pmt_pid,
                                  int pes_pid,
-                                  TsStreamType stream_type) {
+                                  TsStreamType stream_type,
+                                  const uint8_t* descriptor,
+                                  size_t descriptor_length) {
  if (pids_.count(pes_pid) != 0)
    return;
  DVLOG(1) << "RegisterPes:"
@ -307,7 +309,8 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
      pid_type = PidState::kPidAudioPes;
      break;
    case TsStreamType::kDvbSubtitles:
-      es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text));
+      es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text,
+                                      descriptor, descriptor_length));
      pid_type = PidState::kPidTextPes;
      break;
    default: {
--- a/packager/media/formats/mp2t/mp2t_media_parser.h
+++ b/packager/media/formats/mp2t/mp2t_media_parser.h
@ -50,7 +50,11 @@ class Mp2tMediaParser : public MediaParser {
  // Possible values for |media_type| are defined in:
  // ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments".
  // |pes_pid| is part of the Program Map Table refered by |pmt_pid|.
-  void RegisterPes(int pmt_pid, int pes_pid, TsStreamType media_type);
+  void RegisterPes(int pmt_pid,
+                   int pes_pid,
+                   TsStreamType media_type,
+                   const uint8_t* descriptor,
+                   size_t descriptor_length);

  // Callback invoked each time the audio/video decoder configuration is
  // changed.
--- a/packager/media/formats/mp2t/ts_section_pmt.cc
+++ b/packager/media/formats/mp2t/ts_section_pmt.cc
@ -4,7 +4,7 @@

 #include "packager/media/formats/mp2t/ts_section_pmt.h"

-#include <map>
+#include <vector>

 #include "packager/base/logging.h"
 #include "packager/media/base/bit_reader.h"
@ -76,22 +76,29 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
  // The end of the PID map if 4 bytes away from the end of the section
  // (4 bytes = size of the CRC).
  int pid_map_end_marker = section_start_marker - section_length + 4;
-  std::map<int, TsStreamType> pid_map;
+  struct Info {
+    int pid_es;
+    TsStreamType stream_type;
+    const uint8_t* descriptor;
+    size_t descriptor_length;
+  };
+  std::vector<Info> pid_info;
  while (static_cast<int>(bit_reader->bits_available()) >
         8 * pid_map_end_marker) {
    TsStreamType stream_type;
    int pid_es;
-    int es_info_length;
+    size_t es_info_length;
    RCHECK(bit_reader->ReadBits(8, &stream_type));
    RCHECK(bit_reader->SkipBits(3));  // reserved
    RCHECK(bit_reader->ReadBits(13, &pid_es));
    RCHECK(bit_reader->ReadBits(4, &reserved));
    RCHECK(bit_reader->ReadBits(12, &es_info_length));
+    const uint8_t* descriptor = bit_reader->current_byte_ptr();

    // Do not register the PID right away.
    // Wait for the end of the section to be fully parsed
    // to make sure there is no error.
-    pid_map.emplace(pid_es, stream_type);
+    pid_info.push_back({pid_es, stream_type, descriptor, es_info_length});

    // Read the ES info descriptors.
    // Defined in section 2.6 of ISO-13818.
@ -103,7 +110,7 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
      // See ETSI EN 300 468 Section 6.1
      if (stream_type == TsStreamType::kPesPrivateData &&
          descriptor_tag == 0x59) {  // subtitling_descriptor
-        pid_map[pid_es] = TsStreamType::kDvbSubtitles;
+        pid_info.back().stream_type = TsStreamType::kDvbSubtitles;
      }
    }
    RCHECK(bit_reader->SkipBits(8 * es_info_length));
@ -114,8 +121,10 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
  RCHECK(bit_reader->ReadBits(32, &crc32));

  // Once the PMT has been proved to be correct, register the PIDs.
-  for (auto& pair : pid_map)
-    register_pes_cb_.Run(pair.first, pair.second);
+  for (auto& info : pid_info) {
+    register_pes_cb_.Run(info.pid_es, info.stream_type, info.descriptor,
+                         info.descriptor_length);
+  }

  return true;
 }
--- a/packager/media/formats/mp2t/ts_section_pmt.h
+++ b/packager/media/formats/mp2t/ts_section_pmt.h
@ -19,7 +19,8 @@ class TsSectionPmt : public TsSectionPsi {
  // RegisterPesCb::Run(int pes_pid, int stream_type);
  // Stream type is defined in
  // "Table 2-34 – Stream type assignments" in H.222
-  typedef base::Callback<void(int, TsStreamType)> RegisterPesCb;
+  typedef base::Callback<void(int, TsStreamType, const uint8_t*, size_t)>
+      RegisterPesCb;

  explicit TsSectionPmt(const RegisterPesCb& register_pes_cb);
  ~TsSectionPmt() override;
--- a/packager/packager.cc
+++ b/packager/packager.cc
@ -26,6 +26,7 @@
 #include "packager/file/file.h"
 #include "packager/hls/base/hls_notifier.h"
 #include "packager/hls/base/simple_hls_notifier.h"
+#include "packager/media/base/cc_stream_filter.h"
 #include "packager/media/base/container_names.h"
 #include "packager/media/base/fourccs.h"
 #include "packager/media/base/key_source.h"
@ -679,6 +680,11 @@ Status CreateAudioVideoJobs(
          std::make_shared<TrickPlayHandler>(stream.trick_play_factor));
    }

+    if (stream.cc_index >= 0) {
+      handlers.emplace_back(
+          std::make_shared<CcStreamFilter>(stream.language, stream.cc_index));
+    }
+
    if (is_text &&
        (!stream.segment_template.empty() || output_format == CONTAINER_MOV)) {
      handlers.emplace_back(
--- a/packager/packager.h
+++ b/packager/packager.h
@ -109,6 +109,10 @@ struct StreamDescriptor {
  /// Optional value which contains a user-specified language tag. If specified,
  /// this value overrides any language metadata in the input stream.
  std::string language;
+  /// Optional value for the index of the sub-stream to use. For some text
+  /// formats, there are multiple "channels" in a single stream. This allows
+  /// selecting only one channel.
+  int32_t cc_index = -1;

  /// Required for audio when outputting HLS. It defines the name of the output
  /// stream, which is not necessarily the same as output. This is used as the