From 4766654b4d019f4882e490e051bb84b1900bad7a Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Thu, 8 Oct 2020 14:46:37 -0700 Subject: [PATCH] Add TTML text output. This only supports TTML output; meaning the user can convert WebVTT into TTML, but not the other way around. This will be useful for DVB-sub subtitles that would be better supported within TTML. This only adds text-based output; a follow-up will add MP4 support. Change-Id: I0944b7df95d7765e55f203fc5e9a644f5c455dd8 --- packager/app/muxer_factory.cc | 4 + packager/app/test/packager_test.py | 10 +- .../bear-english-text-1.ttml | 9 + .../bear-english-text-2.ttml | 9 + .../bear-english-text-3.ttml | 9 + .../bear-english-text-4.ttml | 9 + .../bear-english-text-5.ttml | 9 + .../testdata/segmented-ttml-text/output.m3u8 | 6 + .../testdata/segmented-ttml-text/output.mpd | 16 + .../segmented-ttml-text/stream_0.m3u8 | 16 + packager/media/base/media_base.gyp | 2 + packager/media/base/stream_info.h | 1 + packager/media/base/text_muxer.cc | 93 +++++ packager/media/base/text_muxer.h | 47 +++ packager/media/formats/ttml/ttml.gyp | 46 +++ packager/media/formats/ttml/ttml_generator.cc | 194 +++++++++++ packager/media/formats/ttml/ttml_generator.h | 50 +++ .../formats/ttml/ttml_generator_unittest.cc | 327 ++++++++++++++++++ packager/media/formats/ttml/ttml_muxer.cc | 46 +++ packager/media/formats/ttml/ttml_muxer.h | 34 ++ packager/media/formats/webvtt/webvtt_muxer.cc | 77 +---- packager/media/formats/webvtt/webvtt_muxer.h | 24 +- packager/mpd/base/xml/scoped_xml_ptr.h | 3 + packager/mpd/base/xml/xml_node.cc | 5 + packager/mpd/base/xml/xml_node.h | 3 + packager/packager.cc | 7 +- packager/packager.gyp | 2 + 27 files changed, 966 insertions(+), 92 deletions(-) create mode 100644 packager/app/test/testdata/segmented-ttml-text/bear-english-text-1.ttml create mode 100644 packager/app/test/testdata/segmented-ttml-text/bear-english-text-2.ttml create mode 100644 packager/app/test/testdata/segmented-ttml-text/bear-english-text-3.ttml create mode 100644 packager/app/test/testdata/segmented-ttml-text/bear-english-text-4.ttml create mode 100644 packager/app/test/testdata/segmented-ttml-text/bear-english-text-5.ttml create mode 100644 packager/app/test/testdata/segmented-ttml-text/output.m3u8 create mode 100644 packager/app/test/testdata/segmented-ttml-text/output.mpd create mode 100644 packager/app/test/testdata/segmented-ttml-text/stream_0.m3u8 create mode 100644 packager/media/base/text_muxer.cc create mode 100644 packager/media/base/text_muxer.h create mode 100644 packager/media/formats/ttml/ttml.gyp create mode 100644 packager/media/formats/ttml/ttml_generator.cc create mode 100644 packager/media/formats/ttml/ttml_generator.h create mode 100644 packager/media/formats/ttml/ttml_generator_unittest.cc create mode 100644 packager/media/formats/ttml/ttml_muxer.cc create mode 100644 packager/media/formats/ttml/ttml_muxer.h diff --git a/packager/app/muxer_factory.cc b/packager/app/muxer_factory.cc index 825fc6cd58..c2649a046b 100644 --- a/packager/app/muxer_factory.cc +++ b/packager/app/muxer_factory.cc @@ -12,6 +12,7 @@ #include "packager/media/formats/mp2t/ts_muxer.h" #include "packager/media/formats/mp4/mp4_muxer.h" #include "packager/media/formats/packed_audio/packed_audio_writer.h" +#include "packager/media/formats/ttml/ttml_muxer.h" #include "packager/media/formats/webm/webm_muxer.h" #include "packager/media/formats/webvtt/webvtt_muxer.h" #include "packager/packager.h" @@ -49,6 +50,9 @@ std::shared_ptr MuxerFactory::CreateMuxer( case CONTAINER_WEBM: muxer = std::make_shared(options); break; + case CONTAINER_TTML: + muxer = std::make_shared(options); + break; case CONTAINER_WEBVTT: muxer = std::make_shared(options); break; diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index 7ceb670fcf..779f5eceb7 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -404,7 +404,7 @@ class PackagerAppTest(unittest.TestCase): stream.Append('dash_only', 1) requires_init_segment = segmented and base_ext not in [ - 'aac', 'ac3', 'ec3', 'ts', 'vtt' + 'aac', 'ac3', 'ec3', 'ts', 'vtt', 'ttml', ] output_file_path = os.path.join(self.tmp_dir, output_file_name_base) @@ -849,6 +849,14 @@ class PackagerFunctionalTest(PackagerAppTest): self.assertPackageSuccess(streams, flags) self._CheckTestResults('segmented-webvtt-mp4') + def testSegmentedTtmlText(self): + streams = self._GetStreams(['text'], test_files=['bear-english.vtt'], + output_format='ttml', segmented=True) + flags = self._GetFlags(output_hls=True, output_dash=True) + + self.assertPackageSuccess(streams, flags) + self._CheckTestResults('segmented-ttml-text') + def testMp4TrailingMoov(self): self.assertPackageSuccess( self._GetStreams(['audio', 'video'], diff --git a/packager/app/test/testdata/segmented-ttml-text/bear-english-text-1.ttml b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-1.ttml new file mode 100644 index 0000000000..70c1bdb8a8 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-1.ttml @@ -0,0 +1,9 @@ + + + + +
+

Yup, that's a bear, eh.

+
+ +
diff --git a/packager/app/test/testdata/segmented-ttml-text/bear-english-text-2.ttml b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-2.ttml new file mode 100644 index 0000000000..07dfd93d87 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-2.ttml @@ -0,0 +1,9 @@ + + + + +
+

He 's... um... doing bear-like stuff.

+
+ +
diff --git a/packager/app/test/testdata/segmented-ttml-text/bear-english-text-3.ttml b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-3.ttml new file mode 100644 index 0000000000..07dfd93d87 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-3.ttml @@ -0,0 +1,9 @@ + + + + +
+

He 's... um... doing bear-like stuff.

+
+ +
diff --git a/packager/app/test/testdata/segmented-ttml-text/bear-english-text-4.ttml b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-4.ttml new file mode 100644 index 0000000000..07dfd93d87 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-4.ttml @@ -0,0 +1,9 @@ + + + + +
+

He 's... um... doing bear-like stuff.

+
+ +
diff --git a/packager/app/test/testdata/segmented-ttml-text/bear-english-text-5.ttml b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-5.ttml new file mode 100644 index 0000000000..07dfd93d87 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/bear-english-text-5.ttml @@ -0,0 +1,9 @@ + + + + +
+

He 's... um... doing bear-like stuff.

+
+ +
diff --git a/packager/app/test/testdata/segmented-ttml-text/output.m3u8 b/packager/app/test/testdata/segmented-ttml-text/output.m3u8 new file mode 100644 index 0000000000..ad00c97c90 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/output.m3u8 @@ -0,0 +1,6 @@ +#EXTM3U +## Generated with https://github.com/google/shaka-packager version -- + +#EXT-X-INDEPENDENT-SEGMENTS + +#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_0.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES diff --git a/packager/app/test/testdata/segmented-ttml-text/output.mpd b/packager/app/test/testdata/segmented-ttml-text/output.mpd new file mode 100644 index 0000000000..372f266d5e --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/output.mpd @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/packager/app/test/testdata/segmented-ttml-text/stream_0.m3u8 b/packager/app/test/testdata/segmented-ttml-text/stream_0.m3u8 new file mode 100644 index 0000000000..9da28a3887 --- /dev/null +++ b/packager/app/test/testdata/segmented-ttml-text/stream_0.m3u8 @@ -0,0 +1,16 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:1 +#EXT-X-PLAYLIST-TYPE:VOD +#EXTINF:1.000, +bear-english-text-1.ttml +#EXTINF:1.000, +bear-english-text-2.ttml +#EXTINF:1.000, +bear-english-text-3.ttml +#EXTINF:1.000, +bear-english-text-4.ttml +#EXTINF:1.000, +bear-english-text-5.ttml +#EXT-X-ENDLIST diff --git a/packager/media/base/media_base.gyp b/packager/media/base/media_base.gyp index 71171a1a6f..7e2f3c76fa 100644 --- a/packager/media/base/media_base.gyp +++ b/packager/media/base/media_base.gyp @@ -98,6 +98,8 @@ 'rsa_key.h', 'stream_info.cc', 'stream_info.h', + 'text_muxer.cc', + 'text_muxer.h', 'text_sample.cc', 'text_sample.h', 'text_stream_info.cc', diff --git a/packager/media/base/stream_info.h b/packager/media/base/stream_info.h index dd07b5038a..a513ec7764 100644 --- a/packager/media/base/stream_info.h +++ b/packager/media/base/stream_info.h @@ -58,6 +58,7 @@ enum Codec { kCodecText = 300, kCodecWebVtt = kCodecText, + kCodecTtml, }; /// Abstract class holds stream information. diff --git a/packager/media/base/text_muxer.cc b/packager/media/base/text_muxer.cc new file mode 100644 index 0000000000..4ba5794175 --- /dev/null +++ b/packager/media/base/text_muxer.cc @@ -0,0 +1,93 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/base/text_muxer.h" + +#include "packager/media/base/muxer_util.h" +#include "packager/status_macros.h" + +namespace shaka { +namespace media { + +TextMuxer::TextMuxer(const MuxerOptions& options) : Muxer(options) {} +TextMuxer::~TextMuxer() {} + +Status TextMuxer::InitializeMuxer() { + if (streams().size() != 1 || streams()[0]->stream_type() != kStreamText) { + return Status(error::MUXER_FAILURE, + "Incorrect streams given to WebVTT muxer"); + } + + auto copy = streams()[0]->Clone(); + RETURN_IF_ERROR(InitializeStream(static_cast(copy.get()))); + + muxer_listener()->OnMediaStart(options(), *copy, copy->time_scale(), + MuxerListener::kContainerText); + + last_cue_ms_ = 0; + return Status::OK; +} + +Status TextMuxer::Finalize() { + const float duration_ms = static_cast(total_duration_ms_); + float duration_seconds = duration_ms / 1000; + + // If we haven't seen any segments, this is a single-file. In this case, + // flush the single segment. + MuxerListener::MediaRanges ranges; + if (duration_seconds == 0 && last_cue_ms_ != 0) { + DCHECK(options().segment_template.empty()); + duration_seconds = static_cast(last_cue_ms_) / 1000; + + uint64_t size; + RETURN_IF_ERROR(WriteToFile(options().output_file_name, &size)); + // Insert a dummy value so the HLS generator will generate a segment list. + ranges.subsegment_ranges.emplace_back(); + + muxer_listener()->OnNewSegment( + options().output_file_name, 0, + duration_seconds * streams()[0]->time_scale(), size); + } + + muxer_listener()->OnMediaEnd(ranges, duration_seconds); + + return Status::OK; +} + +Status TextMuxer::AddTextSample(size_t stream_id, const TextSample& sample) { + // Ignore sync samples. + if (sample.body().is_empty()) { + return Status::OK; + } + + RETURN_IF_ERROR(AddTextSampleInternal(sample)); + + last_cue_ms_ = sample.EndTime(); + return Status::OK; +} + +Status TextMuxer::FinalizeSegment(size_t stream_id, + const SegmentInfo& segment_info) { + total_duration_ms_ += segment_info.duration; + + const std::string& segment_template = options().segment_template; + DCHECK(!segment_template.empty()); + const uint32_t index = segment_index_++; + const uint64_t start = segment_info.start_timestamp; + const uint64_t duration = segment_info.duration; + const uint32_t bandwidth = options().bandwidth; + + const std::string filename = + GetSegmentName(segment_template, start, index, bandwidth); + uint64_t size; + RETURN_IF_ERROR(WriteToFile(filename, &size)); + + muxer_listener()->OnNewSegment(filename, start, duration, size); + return Status::OK; +} + +} // namespace media +} // namespace shaka diff --git a/packager/media/base/text_muxer.h b/packager/media/base/text_muxer.h new file mode 100644 index 0000000000..6ef571e13b --- /dev/null +++ b/packager/media/base/text_muxer.h @@ -0,0 +1,47 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_BASE_TEXT_MUXER_H_ +#define PACKAGER_MEDIA_BASE_TEXT_MUXER_H_ + +#include "packager/media/base/muxer.h" +#include "packager/media/base/text_sample.h" +#include "packager/media/base/text_stream_info.h" + +namespace shaka { +namespace media { + +/// Defines a base class for text format (i.e. not MP4) muxers. This handles +/// separating the single-segment and multi-segment modes. Derived classes are +/// expected to buffer cues (or text) and write them out in WriteToFile. +class TextMuxer : public Muxer { + public: + explicit TextMuxer(const MuxerOptions& options); + ~TextMuxer() override; + + private: + // Muxer implementation overrides. + Status InitializeMuxer() override; + Status Finalize() override; + Status AddTextSample(size_t stream_id, const TextSample& sample) override; + Status FinalizeSegment(size_t stream_id, + const SegmentInfo& segment_info) override; + + virtual Status InitializeStream(TextStreamInfo* stream) = 0; + virtual Status AddTextSampleInternal(const TextSample& sample) = 0; + /// Writes the buffered samples to the file with the given name. This should + /// also clear any buffered samples. + virtual Status WriteToFile(const std::string& filename, uint64_t* size) = 0; + + uint64_t total_duration_ms_ = 0; + uint64_t last_cue_ms_ = 0; + uint32_t segment_index_ = 0; +}; + +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_BASE_TEXT_MUXER_H_ diff --git a/packager/media/formats/ttml/ttml.gyp b/packager/media/formats/ttml/ttml.gyp new file mode 100644 index 0000000000..7a9b02bcb1 --- /dev/null +++ b/packager/media/formats/ttml/ttml.gyp @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file or at +# https://developers.google.com/open-source/licenses/bsd + +{ + 'variables': { + 'shaka_code': 1, + }, + 'targets': [ + { + 'target_name': 'ttml', + 'type': '<(component)', + 'sources': [ + 'ttml_generator.cc', + 'ttml_generator.h', + 'ttml_muxer.cc', + 'ttml_muxer.h', + ], + 'dependencies': [ + '../../base/media_base.gyp:media_base', + '../../../mpd/mpd.gyp:mpd_builder', + ], + 'export_dependent_settings': [ + '../../../mpd/mpd.gyp:mpd_builder', + ], + }, + { + 'target_name': 'ttml_unittest', + 'type': '<(gtest_target_type)', + 'sources': [ + 'ttml_generator_unittest.cc', + ], + 'dependencies': [ + '../../../testing/gtest.gyp:gtest', + '../../../testing/gmock.gyp:gmock', + '../../../third_party/libxml/libxml.gyp:libxml', + '../../base/media_base.gyp:media_handler_test_base', + '../../event/media_event.gyp:mock_muxer_listener', + '../../test/media_test.gyp:media_test_support', + 'ttml', + ] + }, + ], +} diff --git a/packager/media/formats/ttml/ttml_generator.cc b/packager/media/formats/ttml/ttml_generator.cc new file mode 100644 index 0000000000..61d8b92085 --- /dev/null +++ b/packager/media/formats/ttml/ttml_generator.cc @@ -0,0 +1,194 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/ttml/ttml_generator.h" + +#include "packager/base/strings/stringprintf.h" +#include "packager/media/base/rcheck.h" + +namespace shaka { +namespace media { +namespace ttml { + +namespace { + +std::string ToTtmlTime(int64_t time, uint32_t timescale) { + int64_t remaining = time * 1000 / timescale; + + const int ms = remaining % 1000; + remaining /= 1000; + const int sec = remaining % 60; + remaining /= 60; + const int min = remaining % 60; + remaining /= 60; + const int hr = remaining; + + return base::StringPrintf("%02d:%02d:%02d.%02d", hr, min, sec, ms); +} + +std::string ToTtmlSize(const TextNumber& x, const TextNumber& y) { + const char* kSuffixMap[] = {"px", "em", "%"}; + return base::StringPrintf("%.0f%s %.0f%s", x.value, + kSuffixMap[static_cast(x.type)], y.value, + kSuffixMap[static_cast(y.type)]); +} + +} // namespace + +TtmlGenerator::TtmlGenerator() {} + +TtmlGenerator::~TtmlGenerator() {} + +void TtmlGenerator::Initialize(const std::map& regions, + const std::string& language, + uint32_t time_scale) { + regions_ = regions; + language_ = language; + time_scale_ = time_scale; +} + +void TtmlGenerator::AddSample(const TextSample& sample) { + samples_.emplace_back(sample); +} + +void TtmlGenerator::Reset() { + samples_.clear(); +} + +bool TtmlGenerator::Dump(std::string* result) const { + xml::XmlNode root("tt"); + RCHECK(root.SetStringAttribute("xmlns", "http://www.w3.org/ns/ttml")); + RCHECK(root.SetStringAttribute("xmlns:tts", + "http://www.w3.org/ns/ttml#styling")); + + bool did_log = false; + xml::XmlNode head("head"); + RCHECK(root.SetStringAttribute("xml:lang", language_)); + for (const auto& pair : regions_) { + if (!did_log && (pair.second.region_anchor_x.value != 0 && + pair.second.region_anchor_y.value != 0)) { + LOG(WARNING) << "TTML doesn't support non-0 region anchor"; + did_log = true; + } + + xml::XmlNode region("region"); + const auto origin = + ToTtmlSize(pair.second.window_anchor_x, pair.second.window_anchor_y); + const auto extent = ToTtmlSize(pair.second.width, pair.second.height); + RCHECK(region.SetStringAttribute("xml:id", pair.first)); + RCHECK(region.SetStringAttribute("tts:origin", origin)); + RCHECK(region.SetStringAttribute("tts:extent", extent)); + RCHECK(head.AddChild(std::move(region))); + } + RCHECK(root.AddChild(std::move(head))); + + xml::XmlNode body("body"); + xml::XmlNode div("div"); + for (const auto& sample : samples_) { + RCHECK(AddSampleToXml(sample, &div)); + } + RCHECK(body.AddChild(std::move(div))); + RCHECK(root.AddChild(std::move(body))); + + *result = root.ToString(/* comment= */ ""); + return true; +} + +bool TtmlGenerator::AddSampleToXml(const TextSample& sample, + xml::XmlNode* body) const { + xml::XmlNode p("p"); + RCHECK(p.SetStringAttribute("xml:space", "preserve")); + RCHECK(p.SetStringAttribute("begin", + ToTtmlTime(sample.start_time(), time_scale_))); + RCHECK( + p.SetStringAttribute("end", ToTtmlTime(sample.EndTime(), time_scale_))); + RCHECK(ConvertFragmentToXml(sample.body(), &p)); + if (!sample.id().empty()) + RCHECK(p.SetStringAttribute("xml:id", sample.id())); + + const auto& settings = sample.settings(); + if (!settings.region.empty()) + RCHECK(p.SetStringAttribute("region", settings.region)); + if (settings.line || settings.position) { + const auto origin = ToTtmlSize( + settings.position.value_or(TextNumber(0, TextUnitType::kPixels)), + settings.line.value_or(TextNumber(0, TextUnitType::kPixels))); + + RCHECK(p.SetStringAttribute("tts:origin", origin)); + } + if (settings.writing_direction != WritingDirection::kHorizontal) { + const char* dir = + settings.writing_direction == WritingDirection::kVerticalGrowingLeft + ? "tbrl" + : "tblr"; + RCHECK(p.SetStringAttribute("tts:writingMode", dir)); + } + if (settings.text_alignment != TextAlignment::kStart) { + switch (settings.text_alignment) { + case TextAlignment::kStart: // To avoid compiler warning. + case TextAlignment::kCenter: + RCHECK(p.SetStringAttribute("tts:textAlign", "center")); + break; + case TextAlignment::kEnd: + RCHECK(p.SetStringAttribute("tts:textAlign", "end")); + break; + case TextAlignment::kLeft: + RCHECK(p.SetStringAttribute("tts:textAlign", "left")); + break; + case TextAlignment::kRight: + RCHECK(p.SetStringAttribute("tts:textAlign", "right")); + break; + } + } + + RCHECK(body->AddChild(std::move(p))); + return true; +} + +bool TtmlGenerator::ConvertFragmentToXml(const TextFragment& body, + xml::XmlNode* parent) const { + if (body.newline) { + xml::XmlNode br("br"); + return parent->AddChild(std::move(br)); + } + + // If we have new styles, add a new . + xml::XmlNode span("span"); + xml::XmlNode* node = parent; + if (body.style.bold || body.style.italic || body.style.underline) { + node = &span; + if (body.style.bold) { + RCHECK(span.SetStringAttribute("tts:fontWeight", + *body.style.bold ? "bold" : "normal")); + } + if (body.style.italic) { + RCHECK(span.SetStringAttribute("tts:fontStyle", + *body.style.italic ? "italic" : "normal")); + } + if (body.style.underline) { + RCHECK(span.SetStringAttribute( + "tts:textDecoration", + *body.style.underline ? "underline" : "noUnderline")); + } + } + + if (!body.body.empty()) { + node->AddContent(body.body); + } else { + for (const auto& frag : body.sub_fragments) { + if (!ConvertFragmentToXml(frag, node)) + return false; + } + } + + if (body.style.bold || body.style.italic || body.style.underline) + RCHECK(parent->AddChild(std::move(span))); + return true; +} + +} // namespace ttml +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/ttml/ttml_generator.h b/packager/media/formats/ttml/ttml_generator.h new file mode 100644 index 0000000000..3ac003a084 --- /dev/null +++ b/packager/media/formats/ttml/ttml_generator.h @@ -0,0 +1,50 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_TTML_TTML_GENERATOR_H_ +#define PACKAGER_MEDIA_FORMATS_TTML_TTML_GENERATOR_H_ + +#include +#include +#include + +#include "packager/media/base/text_sample.h" +#include "packager/media/base/text_stream_info.h" +#include "packager/mpd/base/xml/xml_node.h" + +namespace shaka { +namespace media { +namespace ttml { + +class TtmlGenerator { + public: + explicit TtmlGenerator(); + ~TtmlGenerator(); + + void Initialize(const std::map& regions, + const std::string& language, + uint32_t time_scale); + void AddSample(const TextSample& sample); + void Reset(); + + bool Dump(std::string* result) const; + + private: + bool AddSampleToXml(const TextSample& sample, xml::XmlNode* body) const; + bool ConvertFragmentToXml(const TextFragment& fragment, + xml::XmlNode* parent) const; + + std::list samples_; + std::map regions_; + std::string language_; + uint32_t time_scale_; +}; + +} // namespace ttml +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_TTML_TTML_GENERATOR_H_ diff --git a/packager/media/formats/ttml/ttml_generator_unittest.cc b/packager/media/formats/ttml/ttml_generator_unittest.cc new file mode 100644 index 0000000000..3737cafa48 --- /dev/null +++ b/packager/media/formats/ttml/ttml_generator_unittest.cc @@ -0,0 +1,327 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/ttml/ttml_generator.h" + +#include +#include + +namespace shaka { +namespace media { +namespace ttml { + +namespace { + +const uint64_t kMsTimeScale = 1000u; + +const TextFragmentStyle kNoStyles{}; +const bool kNewline = true; +const std::string kNoId = ""; + +TextSettings DefaultSettings() { + TextSettings settings; + // Override default value so TTML doesn't print this setting by default. + settings.text_alignment = TextAlignment::kStart; + return settings; +} + +struct TestProperties { + std::string id; + int64_t start = 5000; + int64_t end = 6000; + TextSettings settings = DefaultSettings(); + TextFragment body; + + std::map regions; + std::string language = ""; + uint32_t time_scale = kMsTimeScale; +}; + +} // namespace + +class TtmlMuxerTest : public testing::Test { + protected: + void ParseSingleCue(const std::string& expected_body, + const TestProperties& properties) { + TtmlGenerator generator; + generator.Initialize(properties.regions, properties.language, + properties.time_scale); + generator.AddSample(TextSample(properties.id, properties.start, + properties.end, properties.settings, + properties.body)); + + std::string results; + ASSERT_TRUE(generator.Dump(&results)); + ASSERT_EQ(results, expected_body); + } +}; + +TEST_F(TtmlMuxerTest, WithOneSegmentAndWithOneSample) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

payload

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.body.body = "payload"; + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, MultipleFragmentsWithNewlines) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

foo bar
baz

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.body.sub_fragments.emplace_back(kNoStyles, "foo "); + properties.body.sub_fragments.emplace_back(kNoStyles, "bar"); + properties.body.sub_fragments.emplace_back(kNoStyles, kNewline); + properties.body.sub_fragments.emplace_back(kNoStyles, "baz"); + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesStyles) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

\n" + " foo\n" + " bar\n" + " baz\n" + "

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.body.sub_fragments.emplace_back(kNoStyles, "foo"); + properties.body.sub_fragments.back().style.bold = true; + properties.body.sub_fragments.emplace_back(kNoStyles, "bar"); + properties.body.sub_fragments.back().style.italic = true; + properties.body.sub_fragments.emplace_back(kNoStyles, "baz"); + properties.body.sub_fragments.back().style.underline = true; + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesRegions) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.settings.region = "foo"; + properties.body.body = "bar"; + + TextRegion region; + region.width = TextNumber(22, TextUnitType::kPercent); + region.height = TextNumber(33, TextUnitType::kPercent); + region.window_anchor_x = TextNumber(20, TextUnitType::kPixels); + region.window_anchor_y = TextNumber(40, TextUnitType::kPixels); + properties.regions.emplace("foo", region); + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesLanguage) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.body.body = "bar"; + properties.language = "foo"; + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesPosition) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.settings.position.emplace(30, TextUnitType::kPercent); + properties.settings.line.emplace(4, TextUnitType::kLines); + properties.body.body = "bar"; + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesOtherSettings) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.settings.writing_direction = + WritingDirection::kVerticalGrowingRight; + properties.settings.text_alignment = TextAlignment::kEnd; + properties.body.body = "bar"; + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, HandlesCueId) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.id = "foo"; + properties.body.body = "bar"; + + ParseSingleCue(kExpectedOutput, properties); +} + +TEST_F(TtmlMuxerTest, EscapesSpecialChars) { + const char* kExpectedOutput = + "\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "
\n" + "

<tag>\"foo&bar\"

\n" + "
\n" + " \n" + "
\n"; + + TestProperties properties; + properties.id = "foo\n" + "\n" + " \n" + " \n" + "
\n" + "

foo

\n" + "
\n" + " \n" + "
\n"; + const char* kExpectedOutput2 = + "\n" + "\n" + " \n" + " \n" + "
\n" + "

bar

\n" + "
\n" + " \n" + "
\n"; + + TtmlGenerator generator; + generator.Initialize({}, "foobar", kMsTimeScale); + generator.AddSample(TextSample(kNoId, 5000, 6000, DefaultSettings(), + TextFragment(kNoStyles, "foo"))); + + std::string results; + ASSERT_TRUE(generator.Dump(&results)); + ASSERT_EQ(results, kExpectedOutput1); + + results.clear(); + generator.Reset(); + generator.AddSample(TextSample(kNoId, 8000, 9000, DefaultSettings(), + TextFragment(kNoStyles, "bar"))); + + ASSERT_TRUE(generator.Dump(&results)); + ASSERT_EQ(results, kExpectedOutput2); +} + +} // namespace ttml +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/ttml/ttml_muxer.cc b/packager/media/formats/ttml/ttml_muxer.cc new file mode 100644 index 0000000000..c2bad38b8f --- /dev/null +++ b/packager/media/formats/ttml/ttml_muxer.cc @@ -0,0 +1,46 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/ttml/ttml_muxer.h" + +#include "packager/file/file.h" +#include "packager/status_macros.h" + +namespace shaka { +namespace media { +namespace ttml { + +TtmlMuxer::TtmlMuxer(const MuxerOptions& options) : TextMuxer(options) {} +TtmlMuxer::~TtmlMuxer() {} + +Status TtmlMuxer::InitializeStream(TextStreamInfo* stream) { + stream->set_codec(kCodecTtml); + stream->set_codec_string("ttml"); + generator_.Initialize(stream->regions(), stream->language(), + stream->time_scale()); + return Status::OK; +} + +Status TtmlMuxer::AddTextSampleInternal(const TextSample& sample) { + generator_.AddSample(sample); + return Status::OK; +} + +Status TtmlMuxer::WriteToFile(const std::string& filename, uint64_t* size) { + std::string data; + if (!generator_.Dump(&data)) + return Status(error::INTERNAL_ERROR, "Error generating XML"); + generator_.Reset(); + *size = data.size(); + + if (!File::WriteStringToFile(filename.c_str(), data)) + return Status(error::FILE_FAILURE, "Failed to write " + filename); + return Status::OK; +} + +} // namespace ttml +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/ttml/ttml_muxer.h b/packager/media/formats/ttml/ttml_muxer.h new file mode 100644 index 0000000000..0ed61bf7ab --- /dev/null +++ b/packager/media/formats/ttml/ttml_muxer.h @@ -0,0 +1,34 @@ +// Copyright 2020 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_TTML_TTML_MUXER_H_ +#define PACKAGER_MEDIA_FORMATS_TTML_TTML_MUXER_H_ + +#include "packager/media/base/text_muxer.h" +#include "packager/media/formats/ttml/ttml_generator.h" + +namespace shaka { +namespace media { +namespace ttml { + +class TtmlMuxer : public TextMuxer { + public: + explicit TtmlMuxer(const MuxerOptions& options); + ~TtmlMuxer() override; + + private: + Status InitializeStream(TextStreamInfo* stream) override; + Status AddTextSampleInternal(const TextSample& sample) override; + Status WriteToFile(const std::string& filename, uint64_t* size) override; + + TtmlGenerator generator_; +}; + +} // namespace ttml +} // namespace media +} // namespace shaka + +#endif // PACKAGER_MEDIA_FORMATS_TTML_TTML_MUXER_H_ diff --git a/packager/media/formats/webvtt/webvtt_muxer.cc b/packager/media/formats/webvtt/webvtt_muxer.cc index 442b653fe3..b3cb7865b6 100644 --- a/packager/media/formats/webvtt/webvtt_muxer.cc +++ b/packager/media/formats/webvtt/webvtt_muxer.cc @@ -6,13 +6,8 @@ #include "packager/media/formats/webvtt/webvtt_muxer.h" -#include -#include - #include "packager/file/file.h" #include "packager/file/file_closer.h" -#include "packager/media/base/muxer_util.h" -#include "packager/media/base/text_stream_info.h" #include "packager/media/formats/webvtt/webvtt_utils.h" #include "packager/status_macros.h" @@ -20,90 +15,28 @@ namespace shaka { namespace media { namespace webvtt { -WebVttMuxer::WebVttMuxer(const MuxerOptions& options) : Muxer(options) {} +WebVttMuxer::WebVttMuxer(const MuxerOptions& options) : TextMuxer(options) {} WebVttMuxer::~WebVttMuxer() {} -Status WebVttMuxer::InitializeMuxer() { - if (streams().size() != 1 || streams()[0]->stream_type() != kStreamText) { - return Status(error::MUXER_FAILURE, - "Incorrect streams given to WebVTT muxer"); - } +Status WebVttMuxer::InitializeStream(TextStreamInfo* stream) { + stream->set_codec(kCodecWebVtt); + stream->set_codec_string("wvtt"); - // Only initialize the stream once we see a cue to avoid empty files. - muxer_listener()->OnMediaStart(options(), *streams()[0], - streams()[0]->time_scale(), - MuxerListener::kContainerText); - - auto* stream = static_cast(streams()[0].get()); const std::string preamble = WebVttGetPreamble(*stream); buffer_.reset(new WebVttFileBuffer( options().transport_stream_timestamp_offset_ms, preamble)); - last_cue_ms_ = 0; - return Status::OK; } -Status WebVttMuxer::Finalize() { - const float duration_ms = static_cast(total_duration_ms_); - float duration_seconds = duration_ms / 1000; - - // If we haven't seen any segments, this is a single-file. In this case, - // flush the single segment. - MuxerListener::MediaRanges ranges; - if (duration_seconds == 0 && last_cue_ms_ != 0) { - DCHECK(options().segment_template.empty()); - duration_seconds = static_cast(last_cue_ms_) / 1000; - - uint64_t size; - RETURN_IF_ERROR(WriteToFile(options().output_file_name, &size)); - // Insert a dummy value so the HLS generator will generate a segment list. - ranges.subsegment_ranges.emplace_back(); - - muxer_listener()->OnNewSegment( - options().output_file_name, 0, - duration_seconds * streams()[0]->time_scale(), size); - } - - muxer_listener()->OnMediaEnd(ranges, duration_seconds); - - return Status::OK; -} - -Status WebVttMuxer::AddTextSample(size_t stream_id, const TextSample& sample) { - // Ignore sync samples. - if (sample.body().is_empty()) { - return Status::OK; - } - +Status WebVttMuxer::AddTextSampleInternal(const TextSample& sample) { if (sample.id().find('\n') != std::string::npos) { return Status(error::MUXER_FAILURE, "Text id cannot contain newlines"); } - last_cue_ms_ = sample.EndTime(); buffer_->Append(sample); return Status::OK; } -Status WebVttMuxer::FinalizeSegment(size_t stream_id, - const SegmentInfo& segment_info) { - total_duration_ms_ += segment_info.duration; - - const std::string& segment_template = options().segment_template; - DCHECK(!segment_template.empty()); - const uint32_t index = segment_index_++; - const uint64_t start = segment_info.start_timestamp; - const uint64_t duration = segment_info.duration; - const uint32_t bandwidth = options().bandwidth; - - const std::string filename = - GetSegmentName(segment_template, start, index, bandwidth); - uint64_t size; - RETURN_IF_ERROR(WriteToFile(filename, &size)); - - muxer_listener()->OnNewSegment(filename, start, duration, size); - return Status::OK; -} - Status WebVttMuxer::WriteToFile(const std::string& filename, uint64_t* size) { // Write everything to the file before telling the manifest so that the // file will exist on disk. diff --git a/packager/media/formats/webvtt/webvtt_muxer.h b/packager/media/formats/webvtt/webvtt_muxer.h index 089fede39a..9963b83115 100644 --- a/packager/media/formats/webvtt/webvtt_muxer.h +++ b/packager/media/formats/webvtt/webvtt_muxer.h @@ -7,8 +7,9 @@ #ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MUXER_H_ #define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MUXER_H_ -#include "packager/media/base/buffer_writer.h" -#include "packager/media/base/muxer.h" +#include + +#include "packager/media/base/text_muxer.h" #include "packager/media/formats/webvtt/webvtt_file_buffer.h" namespace shaka { @@ -16,28 +17,19 @@ namespace media { namespace webvtt { /// Implements WebVtt Muxer. -class WebVttMuxer : public Muxer { +class WebVttMuxer : public TextMuxer { public: /// Create a WebMMuxer object from MuxerOptions. explicit WebVttMuxer(const MuxerOptions& options); ~WebVttMuxer() override; private: - // Muxer implementation overrides. - Status InitializeMuxer() override; - Status Finalize() override; - Status AddTextSample(size_t stream_id, const TextSample& sample) override; - Status FinalizeSegment(size_t stream_id, - const SegmentInfo& segment_info) override; - - Status WriteToFile(const std::string& filename, uint64_t* size); - - DISALLOW_COPY_AND_ASSIGN(WebVttMuxer); + // TextMuxer implementation overrides. + Status InitializeStream(TextStreamInfo* stream) override; + Status AddTextSampleInternal(const TextSample& sample) override; + Status WriteToFile(const std::string& filename, uint64_t* size) override; std::unique_ptr buffer_; - uint64_t total_duration_ms_ = 0; - uint64_t last_cue_ms_ = 0; - uint32_t segment_index_ = 0; }; } // namespace webvtt diff --git a/packager/mpd/base/xml/scoped_xml_ptr.h b/packager/mpd/base/xml/scoped_xml_ptr.h index a3cd875d65..71b62ed08b 100644 --- a/packager/mpd/base/xml/scoped_xml_ptr.h +++ b/packager/mpd/base/xml/scoped_xml_ptr.h @@ -28,6 +28,9 @@ struct XmlDeleter { inline void operator()(xmlSchemaValidCtxtPtr ptr) const { xmlSchemaFreeValidCtxt(ptr); } + inline void operator()(xmlOutputBufferPtr ptr) const { + xmlOutputBufferClose(ptr); + } inline void operator()(xmlSchemaPtr ptr) const { xmlSchemaFree(ptr); } inline void operator()(xmlNodePtr ptr) const { xmlFreeNode(ptr); } inline void operator()(xmlDocPtr ptr) const { xmlFreeDoc(ptr); } diff --git a/packager/mpd/base/xml/xml_node.cc b/packager/mpd/base/xml/xml_node.cc index 720d0b2786..ded7a8c292 100644 --- a/packager/mpd/base/xml/xml_node.cc +++ b/packager/mpd/base/xml/xml_node.cc @@ -205,6 +205,11 @@ bool XmlNode::SetId(uint32_t id) { return SetIntegerAttribute("id", id); } +void XmlNode::AddContent(const std::string& content) { + DCHECK(impl_->node); + xmlNodeAddContent(impl_->node.get(), BAD_CAST content.c_str()); +} + void XmlNode::SetContent(const std::string& content) { DCHECK(impl_->node); xmlNodeSetContent(impl_->node.get(), BAD_CAST content.c_str()); diff --git a/packager/mpd/base/xml/xml_node.h b/packager/mpd/base/xml/xml_node.h index 4aec56a065..a306fdfed7 100644 --- a/packager/mpd/base/xml/xml_node.h +++ b/packager/mpd/base/xml/xml_node.h @@ -81,6 +81,9 @@ class XmlNode { /// @param id is the ID for this element. bool SetId(uint32_t id) WARN_UNUSED_RESULT; + /// Similar to SetContent, but appends to the end of existing content. + void AddContent(const std::string& content); + /// Set the contents of an XML element using a string. /// This cannot set child elements because <> will become < and &rt; /// This should be used to set the text for the element, e.g. setting diff --git a/packager/packager.cc b/packager/packager.cc index 43e0a0de71..edb7ef18d5 100644 --- a/packager/packager.cc +++ b/packager/packager.cc @@ -214,6 +214,7 @@ Status ValidateStreamDescriptor(bool dump_stream_info, "descriptors 'output' or 'init_segment' are not allowed."); } } else if (output_format == CONTAINER_WEBVTT || + output_format == CONTAINER_TTML || output_format == CONTAINER_AAC || output_format == CONTAINER_MP3 || output_format == CONTAINER_AC3 || output_format == CONTAINER_EAC3) { @@ -222,9 +223,9 @@ Status ValidateStreamDescriptor(bool dump_stream_info, if (stream.segment_template.length() && stream.output.length()) { return Status( error::INVALID_ARGUMENT, - "Segmented WebVTT or PackedAudio output cannot have an init segment. " - "Do not specify stream descriptors 'output' or 'init_segment' when " - "using 'segment_template'."); + "Segmented subtitles or PackedAudio output cannot have an init " + "segment. Do not specify stream descriptors 'output' or " + "'init_segment' when using 'segment_template'."); } } else { // For any other format, if there is a segment template, there must be an diff --git a/packager/packager.gyp b/packager/packager.gyp index f3b20bf8fa..bce4c3a653 100644 --- a/packager/packager.gyp +++ b/packager/packager.gyp @@ -38,6 +38,7 @@ 'media/formats/mp2t/mp2t.gyp:mp2t', 'media/formats/mp4/mp4.gyp:mp4', 'media/formats/packed_audio/packed_audio.gyp:packed_audio', + 'media/formats/ttml/ttml.gyp:ttml', 'media/formats/webm/webm.gyp:webm', 'media/formats/webvtt/webvtt.gyp:webvtt', 'media/formats/wvm/wvm.gyp:wvm', @@ -215,6 +216,7 @@ 'media/formats/mp2t/mp2t.gyp:mp2t_unittest', 'media/formats/mp4/mp4.gyp:mp4_unittest', 'media/formats/packed_audio/packed_audio.gyp:packed_audio_unittest', + 'media/formats/ttml/ttml.gyp:ttml_unittest', 'media/formats/webm/webm.gyp:webm_unittest', 'media/formats/webvtt/webvtt.gyp:webvtt_unittest', 'media/formats/wvm/wvm.gyp:wvm_unittest',