Add cc_index to stream descriptor.

This also allows setting the language of different text streams from
the same input.  Multiple streams can use the same input stream
using different cc_index values and can each use a different language.

This also will try to pull the language from the input if not
specified.

Change-Id: I7078710b509b7d77dad8cb4299a82f954af7e9e7
This commit is contained in:
Jacob Trimble 2020-12-11 12:58:26 -08:00
parent 78be14c092
commit a0f3f2cd3a
18 changed files with 255 additions and 21 deletions

View File

@ -63,6 +63,13 @@ These are the available fields:
sampling rate among key frames. If specified, the output is a trick play
stream.
:cc_index:
Optional value which specifies the index/ID of the subtitle stream to use
for formats where multiple exist within the same stream. For example,
CEA allows specifying up to 4 streams within a single video stream. If not
specified, all subtitles will be merged together.
.. include:: /options/drm_stream_descriptors.rst
.. include:: /options/dash_stream_descriptors.rst
.. include:: /options/hls_stream_descriptors.rst

View File

@ -22,6 +22,7 @@ enum FieldType {
kSegmentTemplateField,
kBandwidthField,
kLanguageField,
kCcIndexField,
kOutputFormatField,
kHlsNameField,
kHlsGroupIdField,
@ -57,6 +58,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = {
{"bitrate", kBandwidthField},
{"language", kLanguageField},
{"lang", kLanguageField},
{"cc_index", kCcIndexField},
{"output_format", kOutputFormatField},
{"format", kOutputFormatField},
{"hls_name", kHlsNameField},
@ -133,6 +135,15 @@ base::Optional<StreamDescriptor> ParseStreamDescriptor(
descriptor.language = iter->second;
break;
}
case kCcIndexField: {
unsigned index;
if (!base::StringToUint(iter->second, &index)) {
LOG(ERROR) << "Non-numeric cc_index specified.";
return base::nullopt;
}
descriptor.cc_index = index;
break;
}
case kOutputFormatField: {
descriptor.output_format = iter->second;
break;

View File

@ -93,6 +93,9 @@ class BitReader {
/// @return The current bit position.
size_t bit_position() const { return 8 * initial_size_ - bits_available(); }
/// @return A pointer to the current byte.
const uint8_t* current_byte_ptr() const { return data_ - 1; }
private:
// Help function used by ReadBits to avoid inlining the bit reading logic.
bool ReadBitsInternal(size_t num_bits, uint64_t* out);

View File

@ -0,0 +1,54 @@
// Copyright 2020 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/base/cc_stream_filter.h"
#include "packager/media/base/stream_info.h"
#include "packager/media/base/text_stream_info.h"
namespace shaka {
namespace media {
CcStreamFilter::CcStreamFilter(const std::string& language, uint16_t cc_index)
: language_(language), cc_index_(cc_index) {}
Status CcStreamFilter::InitializeInternal() {
return Status::OK;
}
Status CcStreamFilter::Process(std::unique_ptr<StreamData> stream_data) {
if (stream_data->stream_data_type == StreamDataType::kTextSample) {
if (stream_data->text_sample->sub_stream_index() != -1 &&
stream_data->text_sample->sub_stream_index() != cc_index_) {
return Status::OK;
}
} else if (stream_data->stream_data_type == StreamDataType::kStreamInfo) {
if (stream_data->stream_info->stream_type() == kStreamText) {
// Overwrite the per-input-stream language with our per-output-stream
// language; this requires cloning the stream info as it is used by other
// output streams.
auto clone = stream_data->stream_info->Clone();
if (!language_.empty()) {
clone->set_language(language_);
} else {
// Try to find the language in the sub-stream info.
auto* text_info = static_cast<TextStreamInfo*>(clone.get());
auto it = text_info->sub_streams().find(cc_index_);
if (it != text_info->sub_streams().end()) {
clone->set_language(it->second.language);
}
}
stream_data = StreamData::FromStreamInfo(stream_data->stream_index,
std::move(clone));
}
}
return Dispatch(std::move(stream_data));
}
} // namespace media
} // namespace shaka

View File

@ -0,0 +1,39 @@
// Copyright 2020 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_
#define PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_
#include <string>
#include "packager/media/base/media_handler.h"
#include "packager/media/base/text_sample.h"
#include "packager/status.h"
namespace shaka {
namespace media {
/// A media handler that filters out text samples based on the cc_index
/// field. Some text formats allow multiple "channels" per stream, so this
/// filters out only one of them.
class CcStreamFilter : public MediaHandler {
public:
CcStreamFilter(const std::string& language, uint16_t cc_index);
~CcStreamFilter() override = default;
protected:
Status InitializeInternal() override;
Status Process(std::unique_ptr<StreamData> stream_data) override;
private:
const std::string language_;
const uint16_t cc_index_;
};
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_

View File

@ -35,6 +35,8 @@
'buffer_writer.h',
'byte_queue.cc',
'byte_queue.h',
'cc_stream_filter.cc',
'cc_stream_filter.h',
'closure_thread.cc',
'closure_thread.h',
'common_pssh_generator.cc',

View File

@ -10,6 +10,7 @@
#include "packager/base/logging.h"
#include "packager/base/strings/stringprintf.h"
#include "packager/media/base/timestamp.h"
namespace shaka {
namespace media {
@ -56,12 +57,19 @@ StreamInfo::StreamInfo(StreamType stream_type,
StreamInfo::~StreamInfo() {}
std::string StreamInfo::ToString() const {
std::string duration;
if (duration_ == kInfiniteDuration) {
duration = "Infinite";
} else {
duration = base::StringPrintf("%" PRIu64 " (%.1f seconds)", duration_,
static_cast<double>(duration_) / time_scale_);
}
return base::StringPrintf(
"type: %s\n codec_string: %s\n time_scale: %d\n duration: "
"%" PRIu64 " (%.1f seconds)\n is_encrypted: %s\n",
(stream_type_ == kStreamAudio ? "Audio" : "Video"), codec_string_.c_str(),
time_scale_, duration_, static_cast<double>(duration_) / time_scale_,
is_encrypted_ ? "true" : "false");
"%s\n is_encrypted: %s\n",
StreamTypeToString(stream_type_).c_str(), codec_string_.c_str(),
time_scale_, duration.c_str(), is_encrypted_ ? "true" : "false");
}
} // namespace media

View File

@ -127,6 +127,9 @@ class TextSample {
const TextFragment& body() const { return body_; }
int64_t EndTime() const;
int32_t sub_stream_index() const { return sub_stream_index_; }
void set_sub_stream_index(int32_t idx) { sub_stream_index_ = idx; }
private:
// Allow the compiler generated copy constructor and assignment operator
// intentionally. Since the text data is typically small, the performance
@ -137,6 +140,7 @@ class TextSample {
const int64_t duration_ = 0;
const TextSettings settings_;
const TextFragment body_;
int32_t sub_stream_index_ = -1;
};
} // namespace media

View File

@ -6,6 +6,8 @@
#include "packager/media/base/text_stream_info.h"
#include "packager/base/strings/stringprintf.h"
namespace shaka {
namespace media {
@ -28,6 +30,18 @@ bool TextStreamInfo::IsValidConfig() const {
return true;
}
std::string TextStreamInfo::ToString() const {
std::string ret = StreamInfo::ToString();
if (!sub_streams_.empty()) {
ret += " Sub Streams:";
for (auto& pair : sub_streams_) {
ret += base::StringPrintf("\n ID: %u, Lang: %s", pair.first,
pair.second.language.c_str());
}
}
return ret + "\n";
}
std::unique_ptr<StreamInfo> TextStreamInfo::Clone() const {
return std::unique_ptr<StreamInfo>(new TextStreamInfo(*this));
}

View File

@ -40,6 +40,12 @@ struct TextRegion {
bool scroll = false;
};
/// Contains info about a sub-stream within a text stream. Depending on the
/// format, some info may not be available. This info doesn't affect output.
struct TextSubStreamInfo {
std::string language;
};
class TextStreamInfo : public StreamInfo {
public:
/// No encryption supported.
@ -64,6 +70,7 @@ class TextStreamInfo : public StreamInfo {
bool IsValidConfig() const override;
std::string ToString() const override;
std::unique_ptr<StreamInfo> Clone() const override;
uint16_t width() const { return width_; }
@ -75,8 +82,16 @@ class TextStreamInfo : public StreamInfo {
const std::string& css_styles() const { return css_styles_; }
void set_css_styles(const std::string& styles) { css_styles_ = styles; }
void AddSubStream(uint16_t index, TextSubStreamInfo info) {
sub_streams_.emplace(index, std::move(info));
}
const std::map<uint16_t, TextSubStreamInfo>& sub_streams() const {
return sub_streams_;
}
private:
std::map<std::string, TextRegion> regions_;
std::map<uint16_t, TextSubStreamInfo> sub_streams_;
std::string css_styles_;
uint16_t width_;
uint16_t height_;

View File

@ -15,12 +15,50 @@ namespace shaka {
namespace media {
namespace mp2t {
namespace {
bool ParseSubtitlingDescriptor(
const uint8_t* descriptor,
size_t size,
std::unordered_map<uint16_t, std::string>* langs) {
// See ETSI EN 300 468 Section 6.2.41.
BitReader reader(descriptor, size);
size_t data_size;
RCHECK(reader.SkipBits(8)); // descriptor_tag
RCHECK(reader.ReadBits(8, &data_size));
RCHECK(data_size + 2 <= size);
for (size_t i = 0; i < data_size; i += 8) {
uint32_t lang_code;
uint16_t page;
RCHECK(reader.ReadBits(24, &lang_code));
RCHECK(reader.SkipBits(8)); // subtitling_type
RCHECK(reader.ReadBits(16, &page));
RCHECK(reader.SkipBits(16)); // ancillary_page_id
// The lang code is a ISO 639-2 code coded in Latin-1.
std::string lang(3, '\0');
lang[0] = (lang_code >> 16) & 0xff;
lang[1] = (lang_code >> 8) & 0xff;
lang[2] = (lang_code >> 0) & 0xff;
langs->emplace(page, std::move(lang));
}
return true;
}
} // namespace
EsParserDvb::EsParserDvb(uint32_t pid,
const NewStreamInfoCB& new_stream_info_cb,
const EmitTextSampleCB& emit_sample_cb)
const EmitTextSampleCB& emit_sample_cb,
const uint8_t* descriptor,
size_t descriptor_length)
: EsParser(pid),
new_stream_info_cb_(new_stream_info_cb),
emit_sample_cb_(emit_sample_cb) {}
emit_sample_cb_(emit_sample_cb) {
if (!ParseSubtitlingDescriptor(descriptor, descriptor_length, &languages_)) {
LOG(WARNING) << "Error parsing subtitling descriptor";
}
}
EsParserDvb::~EsParserDvb() {}
@ -30,10 +68,14 @@ bool EsParserDvb::Parse(const uint8_t* buf,
int64_t dts) {
if (!sent_info_) {
sent_info_ = true;
std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
std::shared_ptr<TextStreamInfo> info = std::make_shared<TextStreamInfo>(
pid(), kMpeg2Timescale, kInfiniteDuration, kCodecText,
/* codec_string= */ "", /* codec_config= */ "", /* width= */ 0,
/* height= */ 0, /* language= */ "");
for (const auto& pair : languages_) {
info->AddSubStream(pair.first, {pair.second});
}
new_stream_info_cb_.Run(info);
}
@ -47,9 +89,11 @@ bool EsParserDvb::Flush() {
std::vector<std::shared_ptr<TextSample>> samples;
RCHECK(pair.second.Flush(&samples));
for (auto sample : samples)
for (auto sample : samples) {
sample->set_sub_stream_index(pair.first);
emit_sample_cb_.Run(sample);
}
}
return true;
}
@ -81,8 +125,10 @@ bool EsParserDvb::ParseInternal(const uint8_t* data, size_t size, int64_t pts) {
std::vector<std::shared_ptr<TextSample>> samples;
RCHECK(parsers_[page_id].Parse(segment_type, pts, payload, segment_length,
&samples));
for (auto sample : samples)
for (auto sample : samples) {
sample->set_sub_stream_index(page_id);
emit_sample_cb_.Run(sample);
}
RCHECK(reader.SkipBytes(segment_length));
}

View File

@ -22,7 +22,9 @@ class EsParserDvb : public EsParser {
public:
EsParserDvb(uint32_t pid,
const NewStreamInfoCB& new_stream_info_cb,
const EmitTextSampleCB& emit_sample_cb);
const EmitTextSampleCB& emit_sample_cb,
const uint8_t* descriptor,
size_t descriptor_length);
~EsParserDvb() override;
// EsParser implementation.
@ -44,6 +46,8 @@ class EsParserDvb : public EsParser {
// A map of page_id to parser.
std::unordered_map<uint16_t, DvbSubParser> parsers_;
// A map of page_id to language.
std::unordered_map<uint16_t, std::string> languages_;
bool sent_info_ = false;
};

View File

@ -275,7 +275,9 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) {
void Mp2tMediaParser::RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType stream_type) {
TsStreamType stream_type,
const uint8_t* descriptor,
size_t descriptor_length) {
if (pids_.count(pes_pid) != 0)
return;
DVLOG(1) << "RegisterPes:"
@ -307,7 +309,8 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid,
pid_type = PidState::kPidAudioPes;
break;
case TsStreamType::kDvbSubtitles:
es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text));
es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text,
descriptor, descriptor_length));
pid_type = PidState::kPidTextPes;
break;
default: {

View File

@ -50,7 +50,11 @@ class Mp2tMediaParser : public MediaParser {
// Possible values for |media_type| are defined in:
// ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments".
// |pes_pid| is part of the Program Map Table refered by |pmt_pid|.
void RegisterPes(int pmt_pid, int pes_pid, TsStreamType media_type);
void RegisterPes(int pmt_pid,
int pes_pid,
TsStreamType media_type,
const uint8_t* descriptor,
size_t descriptor_length);
// Callback invoked each time the audio/video decoder configuration is
// changed.

View File

@ -4,7 +4,7 @@
#include "packager/media/formats/mp2t/ts_section_pmt.h"
#include <map>
#include <vector>
#include "packager/base/logging.h"
#include "packager/media/base/bit_reader.h"
@ -76,22 +76,29 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
// The end of the PID map if 4 bytes away from the end of the section
// (4 bytes = size of the CRC).
int pid_map_end_marker = section_start_marker - section_length + 4;
std::map<int, TsStreamType> pid_map;
struct Info {
int pid_es;
TsStreamType stream_type;
const uint8_t* descriptor;
size_t descriptor_length;
};
std::vector<Info> pid_info;
while (static_cast<int>(bit_reader->bits_available()) >
8 * pid_map_end_marker) {
TsStreamType stream_type;
int pid_es;
int es_info_length;
size_t es_info_length;
RCHECK(bit_reader->ReadBits(8, &stream_type));
RCHECK(bit_reader->SkipBits(3)); // reserved
RCHECK(bit_reader->ReadBits(13, &pid_es));
RCHECK(bit_reader->ReadBits(4, &reserved));
RCHECK(bit_reader->ReadBits(12, &es_info_length));
const uint8_t* descriptor = bit_reader->current_byte_ptr();
// Do not register the PID right away.
// Wait for the end of the section to be fully parsed
// to make sure there is no error.
pid_map.emplace(pid_es, stream_type);
pid_info.push_back({pid_es, stream_type, descriptor, es_info_length});
// Read the ES info descriptors.
// Defined in section 2.6 of ISO-13818.
@ -103,7 +110,7 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
// See ETSI EN 300 468 Section 6.1
if (stream_type == TsStreamType::kPesPrivateData &&
descriptor_tag == 0x59) { // subtitling_descriptor
pid_map[pid_es] = TsStreamType::kDvbSubtitles;
pid_info.back().stream_type = TsStreamType::kDvbSubtitles;
}
}
RCHECK(bit_reader->SkipBits(8 * es_info_length));
@ -114,8 +121,10 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) {
RCHECK(bit_reader->ReadBits(32, &crc32));
// Once the PMT has been proved to be correct, register the PIDs.
for (auto& pair : pid_map)
register_pes_cb_.Run(pair.first, pair.second);
for (auto& info : pid_info) {
register_pes_cb_.Run(info.pid_es, info.stream_type, info.descriptor,
info.descriptor_length);
}
return true;
}

View File

@ -19,7 +19,8 @@ class TsSectionPmt : public TsSectionPsi {
// RegisterPesCb::Run(int pes_pid, int stream_type);
// Stream type is defined in
// "Table 2-34 Stream type assignments" in H.222
typedef base::Callback<void(int, TsStreamType)> RegisterPesCb;
typedef base::Callback<void(int, TsStreamType, const uint8_t*, size_t)>
RegisterPesCb;
explicit TsSectionPmt(const RegisterPesCb& register_pes_cb);
~TsSectionPmt() override;

View File

@ -26,6 +26,7 @@
#include "packager/file/file.h"
#include "packager/hls/base/hls_notifier.h"
#include "packager/hls/base/simple_hls_notifier.h"
#include "packager/media/base/cc_stream_filter.h"
#include "packager/media/base/container_names.h"
#include "packager/media/base/fourccs.h"
#include "packager/media/base/key_source.h"
@ -679,6 +680,11 @@ Status CreateAudioVideoJobs(
std::make_shared<TrickPlayHandler>(stream.trick_play_factor));
}
if (stream.cc_index >= 0) {
handlers.emplace_back(
std::make_shared<CcStreamFilter>(stream.language, stream.cc_index));
}
if (is_text &&
(!stream.segment_template.empty() || output_format == CONTAINER_MOV)) {
handlers.emplace_back(

View File

@ -109,6 +109,10 @@ struct StreamDescriptor {
/// Optional value which contains a user-specified language tag. If specified,
/// this value overrides any language metadata in the input stream.
std::string language;
/// Optional value for the index of the sub-stream to use. For some text
/// formats, there are multiple "channels" in a single stream. This allows
/// selecting only one channel.
int32_t cc_index = -1;
/// Required for audio when outputting HLS. It defines the name of the output
/// stream, which is not necessarily the same as output. This is used as the