From a0f3f2cd3a781230af789a3d53bccfbf2945866d Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Fri, 11 Dec 2020 12:58:26 -0800 Subject: [PATCH] Add cc_index to stream descriptor. This also allows setting the language of different text streams from the same input. Multiple streams can use the same input stream using different cc_index values and can each use a different language. This also will try to pull the language from the input if not specified. Change-Id: I7078710b509b7d77dad8cb4299a82f954af7e9e7 --- docs/source/options/stream_descriptors.rst | 7 +++ packager/app/stream_descriptor.cc | 11 ++++ packager/media/base/bit_reader.h | 3 + packager/media/base/cc_stream_filter.cc | 54 ++++++++++++++++++ packager/media/base/cc_stream_filter.h | 39 +++++++++++++ packager/media/base/media_base.gyp | 2 + packager/media/base/stream_info.cc | 16 ++++-- packager/media/base/text_sample.h | 4 ++ packager/media/base/text_stream_info.cc | 14 +++++ packager/media/base/text_stream_info.h | 15 +++++ packager/media/formats/mp2t/es_parser_dvb.cc | 56 +++++++++++++++++-- packager/media/formats/mp2t/es_parser_dvb.h | 6 +- .../media/formats/mp2t/mp2t_media_parser.cc | 7 ++- .../media/formats/mp2t/mp2t_media_parser.h | 6 +- packager/media/formats/mp2t/ts_section_pmt.cc | 23 +++++--- packager/media/formats/mp2t/ts_section_pmt.h | 3 +- packager/packager.cc | 6 ++ packager/packager.h | 4 ++ 18 files changed, 255 insertions(+), 21 deletions(-) create mode 100644 packager/media/base/cc_stream_filter.cc create mode 100644 packager/media/base/cc_stream_filter.h diff --git a/docs/source/options/stream_descriptors.rst b/docs/source/options/stream_descriptors.rst index 992bffda77..a942708337 100644 --- a/docs/source/options/stream_descriptors.rst +++ b/docs/source/options/stream_descriptors.rst @@ -63,6 +63,13 @@ These are the available fields: sampling rate among key frames. If specified, the output is a trick play stream. +:cc_index: + + Optional value which specifies the index/ID of the subtitle stream to use + for formats where multiple exist within the same stream. For example, + CEA allows specifying up to 4 streams within a single video stream. If not + specified, all subtitles will be merged together. + .. include:: /options/drm_stream_descriptors.rst .. include:: /options/dash_stream_descriptors.rst .. include:: /options/hls_stream_descriptors.rst diff --git a/packager/app/stream_descriptor.cc b/packager/app/stream_descriptor.cc index aa2208f9a5..715e55d30d 100644 --- a/packager/app/stream_descriptor.cc +++ b/packager/app/stream_descriptor.cc @@ -22,6 +22,7 @@ enum FieldType { kSegmentTemplateField, kBandwidthField, kLanguageField, + kCcIndexField, kOutputFormatField, kHlsNameField, kHlsGroupIdField, @@ -57,6 +58,7 @@ const FieldNameToTypeMapping kFieldNameTypeMappings[] = { {"bitrate", kBandwidthField}, {"language", kLanguageField}, {"lang", kLanguageField}, + {"cc_index", kCcIndexField}, {"output_format", kOutputFormatField}, {"format", kOutputFormatField}, {"hls_name", kHlsNameField}, @@ -133,6 +135,15 @@ base::Optional ParseStreamDescriptor( descriptor.language = iter->second; break; } + case kCcIndexField: { + unsigned index; + if (!base::StringToUint(iter->second, &index)) { + LOG(ERROR) << "Non-numeric cc_index specified."; + return base::nullopt; + } + descriptor.cc_index = index; + break; + } case kOutputFormatField: { descriptor.output_format = iter->second; break; diff --git a/packager/media/base/bit_reader.h b/packager/media/base/bit_reader.h index df4d32c13f..2c0b5a4e57 100644 --- a/packager/media/base/bit_reader.h +++ b/packager/media/base/bit_reader.h @@ -93,6 +93,9 @@ class BitReader { /// @return The current bit position. size_t bit_position() const { return 8 * initial_size_ - bits_available(); } + /// @return A pointer to the current byte. + const uint8_t* current_byte_ptr() const { return data_ - 1; } + private: // Help function used by ReadBits to avoid inlining the bit reading logic. bool ReadBitsInternal(size_t num_bits, uint64_t* out); diff --git a/packager/media/base/cc_stream_filter.cc b/packager/media/base/cc_stream_filter.cc new file mode 100644 index 0000000000..3a6a9627f2 --- /dev/null +++ b/packager/media/base/cc_stream_filter.cc @@ -0,0 +1,54 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/base/cc_stream_filter.h" + +#include "packager/media/base/stream_info.h" +#include "packager/media/base/text_stream_info.h" + +namespace shaka { +namespace media { + +CcStreamFilter::CcStreamFilter(const std::string& language, uint16_t cc_index) + : language_(language), cc_index_(cc_index) {} + +Status CcStreamFilter::InitializeInternal() { + return Status::OK; +} + +Status CcStreamFilter::Process(std::unique_ptr stream_data) { + if (stream_data->stream_data_type == StreamDataType::kTextSample) { + if (stream_data->text_sample->sub_stream_index() != -1 && + stream_data->text_sample->sub_stream_index() != cc_index_) { + return Status::OK; + } + } else if (stream_data->stream_data_type == StreamDataType::kStreamInfo) { + if (stream_data->stream_info->stream_type() == kStreamText) { + // Overwrite the per-input-stream language with our per-output-stream + // language; this requires cloning the stream info as it is used by other + // output streams. + auto clone = stream_data->stream_info->Clone(); + if (!language_.empty()) { + clone->set_language(language_); + } else { + // Try to find the language in the sub-stream info. + auto* text_info = static_cast(clone.get()); + auto it = text_info->sub_streams().find(cc_index_); + if (it != text_info->sub_streams().end()) { + clone->set_language(it->second.language); + } + } + + stream_data = StreamData::FromStreamInfo(stream_data->stream_index, + std::move(clone)); + } + } + + return Dispatch(std::move(stream_data)); +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/base/cc_stream_filter.h b/packager/media/base/cc_stream_filter.h new file mode 100644 index 0000000000..5d19da9458 --- /dev/null +++ b/packager/media/base/cc_stream_filter.h @@ -0,0 +1,39 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_ +#define PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_ + +#include + +#include "packager/media/base/media_handler.h" +#include "packager/media/base/text_sample.h" +#include "packager/status.h" + +namespace shaka { +namespace media { + +/// A media handler that filters out text samples based on the cc_index +/// field. Some text formats allow multiple "channels" per stream, so this +/// filters out only one of them. +class CcStreamFilter : public MediaHandler { + public: + CcStreamFilter(const std::string& language, uint16_t cc_index); + ~CcStreamFilter() override = default; + + protected: + Status InitializeInternal() override; + Status Process(std::unique_ptr stream_data) override; + + private: + const std::string language_; + const uint16_t cc_index_; +}; + +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_BASE_CC_STREAM_FILTER_H_ diff --git a/packager/media/base/media_base.gyp b/packager/media/base/media_base.gyp index 7e2f3c76fa..77dcb072f2 100644 --- a/packager/media/base/media_base.gyp +++ b/packager/media/base/media_base.gyp @@ -35,6 +35,8 @@ 'buffer_writer.h', 'byte_queue.cc', 'byte_queue.h', + 'cc_stream_filter.cc', + 'cc_stream_filter.h', 'closure_thread.cc', 'closure_thread.h', 'common_pssh_generator.cc', diff --git a/packager/media/base/stream_info.cc b/packager/media/base/stream_info.cc index c9bce919bb..c62192f815 100644 --- a/packager/media/base/stream_info.cc +++ b/packager/media/base/stream_info.cc @@ -10,6 +10,7 @@ #include "packager/base/logging.h" #include "packager/base/strings/stringprintf.h" +#include "packager/media/base/timestamp.h" namespace shaka { namespace media { @@ -56,12 +57,19 @@ StreamInfo::StreamInfo(StreamType stream_type, StreamInfo::~StreamInfo() {} std::string StreamInfo::ToString() const { + std::string duration; + if (duration_ == kInfiniteDuration) { + duration = "Infinite"; + } else { + duration = base::StringPrintf("%" PRIu64 " (%.1f seconds)", duration_, + static_cast(duration_) / time_scale_); + } + return base::StringPrintf( "type: %s\n codec_string: %s\n time_scale: %d\n duration: " - "%" PRIu64 " (%.1f seconds)\n is_encrypted: %s\n", - (stream_type_ == kStreamAudio ? "Audio" : "Video"), codec_string_.c_str(), - time_scale_, duration_, static_cast(duration_) / time_scale_, - is_encrypted_ ? "true" : "false"); + "%s\n is_encrypted: %s\n", + StreamTypeToString(stream_type_).c_str(), codec_string_.c_str(), + time_scale_, duration.c_str(), is_encrypted_ ? "true" : "false"); } } // namespace media diff --git a/packager/media/base/text_sample.h b/packager/media/base/text_sample.h index 26297641ab..f0de1cde7f 100644 --- a/packager/media/base/text_sample.h +++ b/packager/media/base/text_sample.h @@ -127,6 +127,9 @@ class TextSample { const TextFragment& body() const { return body_; } int64_t EndTime() const; + int32_t sub_stream_index() const { return sub_stream_index_; } + void set_sub_stream_index(int32_t idx) { sub_stream_index_ = idx; } + private: // Allow the compiler generated copy constructor and assignment operator // intentionally. Since the text data is typically small, the performance @@ -137,6 +140,7 @@ class TextSample { const int64_t duration_ = 0; const TextSettings settings_; const TextFragment body_; + int32_t sub_stream_index_ = -1; }; } // namespace media diff --git a/packager/media/base/text_stream_info.cc b/packager/media/base/text_stream_info.cc index 9107397f38..30d5103493 100644 --- a/packager/media/base/text_stream_info.cc +++ b/packager/media/base/text_stream_info.cc @@ -6,6 +6,8 @@ #include "packager/media/base/text_stream_info.h" +#include "packager/base/strings/stringprintf.h" + namespace shaka { namespace media { @@ -28,6 +30,18 @@ bool TextStreamInfo::IsValidConfig() const { return true; } +std::string TextStreamInfo::ToString() const { + std::string ret = StreamInfo::ToString(); + if (!sub_streams_.empty()) { + ret += " Sub Streams:"; + for (auto& pair : sub_streams_) { + ret += base::StringPrintf("\n ID: %u, Lang: %s", pair.first, + pair.second.language.c_str()); + } + } + return ret + "\n"; +} + std::unique_ptr TextStreamInfo::Clone() const { return std::unique_ptr(new TextStreamInfo(*this)); } diff --git a/packager/media/base/text_stream_info.h b/packager/media/base/text_stream_info.h index 3aefa29b3a..7aa21d14d2 100644 --- a/packager/media/base/text_stream_info.h +++ b/packager/media/base/text_stream_info.h @@ -40,6 +40,12 @@ struct TextRegion { bool scroll = false; }; +/// Contains info about a sub-stream within a text stream. Depending on the +/// format, some info may not be available. This info doesn't affect output. +struct TextSubStreamInfo { + std::string language; +}; + class TextStreamInfo : public StreamInfo { public: /// No encryption supported. @@ -64,6 +70,7 @@ class TextStreamInfo : public StreamInfo { bool IsValidConfig() const override; + std::string ToString() const override; std::unique_ptr Clone() const override; uint16_t width() const { return width_; } @@ -75,8 +82,16 @@ class TextStreamInfo : public StreamInfo { const std::string& css_styles() const { return css_styles_; } void set_css_styles(const std::string& styles) { css_styles_ = styles; } + void AddSubStream(uint16_t index, TextSubStreamInfo info) { + sub_streams_.emplace(index, std::move(info)); + } + const std::map& sub_streams() const { + return sub_streams_; + } + private: std::map regions_; + std::map sub_streams_; std::string css_styles_; uint16_t width_; uint16_t height_; diff --git a/packager/media/formats/mp2t/es_parser_dvb.cc b/packager/media/formats/mp2t/es_parser_dvb.cc index a392b033c7..027ac21238 100644 --- a/packager/media/formats/mp2t/es_parser_dvb.cc +++ b/packager/media/formats/mp2t/es_parser_dvb.cc @@ -15,12 +15,50 @@ namespace shaka { namespace media { namespace mp2t { +namespace { + +bool ParseSubtitlingDescriptor( + const uint8_t* descriptor, + size_t size, + std::unordered_map* langs) { + // See ETSI EN 300 468 Section 6.2.41. + BitReader reader(descriptor, size); + size_t data_size; + RCHECK(reader.SkipBits(8)); // descriptor_tag + RCHECK(reader.ReadBits(8, &data_size)); + RCHECK(data_size + 2 <= size); + for (size_t i = 0; i < data_size; i += 8) { + uint32_t lang_code; + uint16_t page; + RCHECK(reader.ReadBits(24, &lang_code)); + RCHECK(reader.SkipBits(8)); // subtitling_type + RCHECK(reader.ReadBits(16, &page)); + RCHECK(reader.SkipBits(16)); // ancillary_page_id + + // The lang code is a ISO 639-2 code coded in Latin-1. + std::string lang(3, '\0'); + lang[0] = (lang_code >> 16) & 0xff; + lang[1] = (lang_code >> 8) & 0xff; + lang[2] = (lang_code >> 0) & 0xff; + langs->emplace(page, std::move(lang)); + } + return true; +} + +} // namespace + EsParserDvb::EsParserDvb(uint32_t pid, const NewStreamInfoCB& new_stream_info_cb, - const EmitTextSampleCB& emit_sample_cb) + const EmitTextSampleCB& emit_sample_cb, + const uint8_t* descriptor, + size_t descriptor_length) : EsParser(pid), new_stream_info_cb_(new_stream_info_cb), - emit_sample_cb_(emit_sample_cb) {} + emit_sample_cb_(emit_sample_cb) { + if (!ParseSubtitlingDescriptor(descriptor, descriptor_length, &languages_)) { + LOG(WARNING) << "Error parsing subtitling descriptor"; + } +} EsParserDvb::~EsParserDvb() {} @@ -30,10 +68,14 @@ bool EsParserDvb::Parse(const uint8_t* buf, int64_t dts) { if (!sent_info_) { sent_info_ = true; - std::shared_ptr info = std::make_shared( + std::shared_ptr info = std::make_shared( pid(), kMpeg2Timescale, kInfiniteDuration, kCodecText, /* codec_string= */ "", /* codec_config= */ "", /* width= */ 0, /* height= */ 0, /* language= */ ""); + for (const auto& pair : languages_) { + info->AddSubStream(pair.first, {pair.second}); + } + new_stream_info_cb_.Run(info); } @@ -47,8 +89,10 @@ bool EsParserDvb::Flush() { std::vector> samples; RCHECK(pair.second.Flush(&samples)); - for (auto sample : samples) + for (auto sample : samples) { + sample->set_sub_stream_index(pair.first); emit_sample_cb_.Run(sample); + } } return true; } @@ -81,8 +125,10 @@ bool EsParserDvb::ParseInternal(const uint8_t* data, size_t size, int64_t pts) { std::vector> samples; RCHECK(parsers_[page_id].Parse(segment_type, pts, payload, segment_length, &samples)); - for (auto sample : samples) + for (auto sample : samples) { + sample->set_sub_stream_index(page_id); emit_sample_cb_.Run(sample); + } RCHECK(reader.SkipBytes(segment_length)); } diff --git a/packager/media/formats/mp2t/es_parser_dvb.h b/packager/media/formats/mp2t/es_parser_dvb.h index ce56c0c17e..6f6972cd30 100644 --- a/packager/media/formats/mp2t/es_parser_dvb.h +++ b/packager/media/formats/mp2t/es_parser_dvb.h @@ -22,7 +22,9 @@ class EsParserDvb : public EsParser { public: EsParserDvb(uint32_t pid, const NewStreamInfoCB& new_stream_info_cb, - const EmitTextSampleCB& emit_sample_cb); + const EmitTextSampleCB& emit_sample_cb, + const uint8_t* descriptor, + size_t descriptor_length); ~EsParserDvb() override; // EsParser implementation. @@ -44,6 +46,8 @@ class EsParserDvb : public EsParser { // A map of page_id to parser. std::unordered_map parsers_; + // A map of page_id to language. + std::unordered_map languages_; bool sent_info_ = false; }; diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index 7ce6505b2a..f9f908fcd4 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -275,7 +275,9 @@ void Mp2tMediaParser::RegisterPmt(int program_number, int pmt_pid) { void Mp2tMediaParser::RegisterPes(int pmt_pid, int pes_pid, - TsStreamType stream_type) { + TsStreamType stream_type, + const uint8_t* descriptor, + size_t descriptor_length) { if (pids_.count(pes_pid) != 0) return; DVLOG(1) << "RegisterPes:" @@ -307,7 +309,8 @@ void Mp2tMediaParser::RegisterPes(int pmt_pid, pid_type = PidState::kPidAudioPes; break; case TsStreamType::kDvbSubtitles: - es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text)); + es_parser.reset(new EsParserDvb(pes_pid, on_new_stream, on_emit_text, + descriptor, descriptor_length)); pid_type = PidState::kPidTextPes; break; default: { diff --git a/packager/media/formats/mp2t/mp2t_media_parser.h b/packager/media/formats/mp2t/mp2t_media_parser.h index c33081ccce..f592584987 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.h +++ b/packager/media/formats/mp2t/mp2t_media_parser.h @@ -50,7 +50,11 @@ class Mp2tMediaParser : public MediaParser { // Possible values for |media_type| are defined in: // ISO-13818.1 / ITU H.222 Table 2.34 "Media type assignments". // |pes_pid| is part of the Program Map Table refered by |pmt_pid|. - void RegisterPes(int pmt_pid, int pes_pid, TsStreamType media_type); + void RegisterPes(int pmt_pid, + int pes_pid, + TsStreamType media_type, + const uint8_t* descriptor, + size_t descriptor_length); // Callback invoked each time the audio/video decoder configuration is // changed. diff --git a/packager/media/formats/mp2t/ts_section_pmt.cc b/packager/media/formats/mp2t/ts_section_pmt.cc index 5008156693..f8053a668d 100644 --- a/packager/media/formats/mp2t/ts_section_pmt.cc +++ b/packager/media/formats/mp2t/ts_section_pmt.cc @@ -4,7 +4,7 @@ #include "packager/media/formats/mp2t/ts_section_pmt.h" -#include +#include #include "packager/base/logging.h" #include "packager/media/base/bit_reader.h" @@ -76,22 +76,29 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { // The end of the PID map if 4 bytes away from the end of the section // (4 bytes = size of the CRC). int pid_map_end_marker = section_start_marker - section_length + 4; - std::map pid_map; + struct Info { + int pid_es; + TsStreamType stream_type; + const uint8_t* descriptor; + size_t descriptor_length; + }; + std::vector pid_info; while (static_cast(bit_reader->bits_available()) > 8 * pid_map_end_marker) { TsStreamType stream_type; int pid_es; - int es_info_length; + size_t es_info_length; RCHECK(bit_reader->ReadBits(8, &stream_type)); RCHECK(bit_reader->SkipBits(3)); // reserved RCHECK(bit_reader->ReadBits(13, &pid_es)); RCHECK(bit_reader->ReadBits(4, &reserved)); RCHECK(bit_reader->ReadBits(12, &es_info_length)); + const uint8_t* descriptor = bit_reader->current_byte_ptr(); // Do not register the PID right away. // Wait for the end of the section to be fully parsed // to make sure there is no error. - pid_map.emplace(pid_es, stream_type); + pid_info.push_back({pid_es, stream_type, descriptor, es_info_length}); // Read the ES info descriptors. // Defined in section 2.6 of ISO-13818. @@ -103,7 +110,7 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { // See ETSI EN 300 468 Section 6.1 if (stream_type == TsStreamType::kPesPrivateData && descriptor_tag == 0x59) { // subtitling_descriptor - pid_map[pid_es] = TsStreamType::kDvbSubtitles; + pid_info.back().stream_type = TsStreamType::kDvbSubtitles; } } RCHECK(bit_reader->SkipBits(8 * es_info_length)); @@ -114,8 +121,10 @@ bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { RCHECK(bit_reader->ReadBits(32, &crc32)); // Once the PMT has been proved to be correct, register the PIDs. - for (auto& pair : pid_map) - register_pes_cb_.Run(pair.first, pair.second); + for (auto& info : pid_info) { + register_pes_cb_.Run(info.pid_es, info.stream_type, info.descriptor, + info.descriptor_length); + } return true; } diff --git a/packager/media/formats/mp2t/ts_section_pmt.h b/packager/media/formats/mp2t/ts_section_pmt.h index 4422ee679a..ebc9e407e5 100644 --- a/packager/media/formats/mp2t/ts_section_pmt.h +++ b/packager/media/formats/mp2t/ts_section_pmt.h @@ -19,7 +19,8 @@ class TsSectionPmt : public TsSectionPsi { // RegisterPesCb::Run(int pes_pid, int stream_type); // Stream type is defined in // "Table 2-34 – Stream type assignments" in H.222 - typedef base::Callback RegisterPesCb; + typedef base::Callback + RegisterPesCb; explicit TsSectionPmt(const RegisterPesCb& register_pes_cb); ~TsSectionPmt() override; diff --git a/packager/packager.cc b/packager/packager.cc index bc22e22518..67a08d452c 100644 --- a/packager/packager.cc +++ b/packager/packager.cc @@ -26,6 +26,7 @@ #include "packager/file/file.h" #include "packager/hls/base/hls_notifier.h" #include "packager/hls/base/simple_hls_notifier.h" +#include "packager/media/base/cc_stream_filter.h" #include "packager/media/base/container_names.h" #include "packager/media/base/fourccs.h" #include "packager/media/base/key_source.h" @@ -679,6 +680,11 @@ Status CreateAudioVideoJobs( std::make_shared(stream.trick_play_factor)); } + if (stream.cc_index >= 0) { + handlers.emplace_back( + std::make_shared(stream.language, stream.cc_index)); + } + if (is_text && (!stream.segment_template.empty() || output_format == CONTAINER_MOV)) { handlers.emplace_back( diff --git a/packager/packager.h b/packager/packager.h index f4a9a6f502..d5ad9cc402 100644 --- a/packager/packager.h +++ b/packager/packager.h @@ -109,6 +109,10 @@ struct StreamDescriptor { /// Optional value which contains a user-specified language tag. If specified, /// this value overrides any language metadata in the input stream. std::string language; + /// Optional value for the index of the sub-stream to use. For some text + /// formats, there are multiple "channels" in a single stream. This allows + /// selecting only one channel. + int32_t cc_index = -1; /// Required for audio when outputting HLS. It defines the name of the output /// stream, which is not necessarily the same as output. This is used as the