From 1f21cc78cd1089f37fb0f0953bfc3d7feec75622 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Wed, 26 Aug 2020 13:47:14 -0700 Subject: [PATCH] Add style support for cue fragments. Now text cues are composed of nested fragments that can be individually styled. This allows portions of the cue to be bold, etc. The WebVTT parser doesn't parse the inputs, but the original tags are preserved in WebVTT output. The WebVTT output will add tags if the style elements are present in the cue object. Change-Id: I6abba4175e376e4f753193f7d8cac63e958d3c89 --- .../media/base/media_handler_test_base.cc | 2 +- packager/media/base/text_sample.cc | 7 +- packager/media/base/text_sample.h | 24 ++++- .../media/formats/webvtt/webvtt_parser.cc | 5 +- .../formats/webvtt/webvtt_parser_unittest.cc | 43 ++++++-- packager/media/formats/webvtt/webvtt_utils.cc | 102 +++++++++++++++++- .../formats/webvtt/webvtt_utils_unittest.cc | 74 +++++++++++++ 7 files changed, 240 insertions(+), 17 deletions(-) diff --git a/packager/media/base/media_handler_test_base.cc b/packager/media/base/media_handler_test_base.cc index 2932f9f319..02d3fff2b2 100644 --- a/packager/media/base/media_handler_test_base.cc +++ b/packager/media/base/media_handler_test_base.cc @@ -267,7 +267,7 @@ std::unique_ptr MediaHandlerTestBase::GetTextSample( int64_t end, const std::string& payload) const { return std::unique_ptr{ - new TextSample(id, start, end, {}, TextFragment{payload})}; + new TextSample(id, start, end, {}, TextFragment{{}, payload})}; } std::unique_ptr MediaHandlerTestBase::GetCueEvent( diff --git a/packager/media/base/text_sample.cc b/packager/media/base/text_sample.cc index 14585331b6..fec427123e 100644 --- a/packager/media/base/text_sample.cc +++ b/packager/media/base/text_sample.cc @@ -6,13 +6,18 @@ #include "packager/media/base/text_sample.h" +#include +#include + #include "packager/base/logging.h" namespace shaka { namespace media { bool TextFragment::is_empty() const { - return body.empty(); + return std::all_of(sub_fragments.begin(), sub_fragments.end(), + std::mem_fn(&TextFragment::is_empty)) && + body.empty(); } TextSample::TextSample(const std::string& id, diff --git a/packager/media/base/text_sample.h b/packager/media/base/text_sample.h index 15dd4a3fb4..ca0a4f9daa 100644 --- a/packager/media/base/text_sample.h +++ b/packager/media/base/text_sample.h @@ -73,9 +73,31 @@ struct TextSettings { TextAlignment text_alignment = TextAlignment::kCenter; }; +struct TextFragmentStyle { + base::Optional underline; + base::Optional bold; + base::Optional italic; +}; + +/// Represents a recursive structure of styled blocks of text. Only one of +/// sub_fragments, body, or newline will be set. struct TextFragment { - // TODO(modmaker): Fill with settings and sub-fragments. + TextFragment() {} + TextFragment(const TextFragmentStyle& style, + const std::vector& sub_fragments) + : style(style), sub_fragments(sub_fragments) {} + TextFragment(const TextFragmentStyle& style, const char* body) + : style(style), body(body) {} + TextFragment(const TextFragmentStyle& style, const std::string& body) + : style(style), body(body) {} + TextFragment(const TextFragmentStyle& style, bool newline) + : style(style), newline(newline) {} + + TextFragmentStyle style; + + std::vector sub_fragments; std::string body; + bool newline = false; bool is_empty() const; }; diff --git a/packager/media/formats/webvtt/webvtt_parser.cc b/packager/media/formats/webvtt/webvtt_parser.cc index 60ea10d674..86fd76c6ec 100644 --- a/packager/media/formats/webvtt/webvtt_parser.cc +++ b/packager/media/formats/webvtt/webvtt_parser.cc @@ -349,11 +349,12 @@ bool WebVttParser::ParseCue(const std::string& id, // The rest of the block is the payload. // TODO: Parse tags to support , , etc. TextFragment body; + TextFragmentStyle no_styles; for (size_t i = 1; i < block_size; i++) { if (i > 1) { - body.body += "\n"; + body.sub_fragments.emplace_back(no_styles, /* newline= */ true); } - body.body += block[i]; + body.sub_fragments.emplace_back(no_styles, block[i]); } const auto sample = diff --git a/packager/media/formats/webvtt/webvtt_parser_unittest.cc b/packager/media/formats/webvtt/webvtt_parser_unittest.cc index 3cadb492e9..9da884eefe 100644 --- a/packager/media/formats/webvtt/webvtt_parser_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_parser_unittest.cc @@ -24,6 +24,27 @@ std::string ToString(const std::vector& v) { return std::string(v.begin(), v.end()); } +void ExpectNoStyle(const TextFragmentStyle& style) { + EXPECT_FALSE(style.underline); + EXPECT_FALSE(style.bold); + EXPECT_FALSE(style.italic); +} + +void ExpectPlainCueWithBody(const TextFragment& fragment, + const std::string& expected) { + ExpectNoStyle(fragment.style); + ASSERT_TRUE(fragment.body.empty()); + ASSERT_FALSE(fragment.newline); + + if (expected.empty()) { + EXPECT_TRUE(fragment.sub_fragments.empty()); + } else { + ASSERT_EQ(fragment.sub_fragments.size(), 1u); + ExpectNoStyle(fragment.sub_fragments[0].style); + EXPECT_EQ(fragment.sub_fragments[0].body, expected); + } +} + } // namespace class WebVttParserTest : public testing::Test { @@ -177,7 +198,7 @@ TEST_F(WebVttParserTest, ParseOneCue) { EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->duration(), 3540000u); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); // No settings const auto& settings = samples_[0]->settings(); @@ -221,7 +242,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) { EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->duration(), 3540000u); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); } TEST_F(WebVttParserTest, ParseOneEmptyCue) { @@ -238,7 +259,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCue) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); - EXPECT_EQ(samples_[0]->body().body, ""); + ExpectPlainCueWithBody(samples_[0]->body(), ""); } TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) { @@ -271,7 +292,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithId) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); EXPECT_EQ(samples_[0]->id(), "id"); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); } TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) { @@ -290,7 +311,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); EXPECT_EQ(samples_[0]->id(), "id"); - EXPECT_EQ(samples_[0]->body().body, ""); + ExpectPlainCueWithBody(samples_[0]->body(), ""); } TEST_F(WebVttParserTest, ParseOneCueWithSettings) { @@ -363,13 +384,13 @@ TEST_F(WebVttParserTest, ParseMultipleCues) { EXPECT_EQ(samples_[0]->start_time(), 1000u); EXPECT_EQ(samples_[0]->duration(), 4200u); - EXPECT_EQ(samples_[0]->body().body, "subtitle A"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A"); EXPECT_EQ(samples_[1]->start_time(), 2321u); EXPECT_EQ(samples_[1]->duration(), 4679u); - EXPECT_EQ(samples_[1]->body().body, "subtitle B"); + ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B"); EXPECT_EQ(samples_[2]->start_time(), 5800u); EXPECT_EQ(samples_[2]->duration(), 2200u); - EXPECT_EQ(samples_[2]->body().body, "subtitle C"); + ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C"); } // Verify that a typical case with mulitple cues work even when comments are @@ -405,9 +426,9 @@ TEST_F(WebVttParserTest, ParseWithComments) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 3u); - EXPECT_EQ(samples_[0]->body().body, "subtitle A"); - EXPECT_EQ(samples_[1]->body().body, "subtitle B"); - EXPECT_EQ(samples_[2]->body().body, "subtitle C"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A"); + ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B"); + ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C"); } } // namespace media diff --git a/packager/media/formats/webvtt/webvtt_utils.cc b/packager/media/formats/webvtt/webvtt_utils.cc index 685d85b3f8..1be0dc4202 100644 --- a/packager/media/formats/webvtt/webvtt_utils.cc +++ b/packager/media/formats/webvtt/webvtt_utils.cc @@ -9,6 +9,8 @@ #include #include +#include + #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" #include "packager/base/strings/stringprintf.h" @@ -33,6 +35,103 @@ bool GetTotalMilliseconds(uint64_t hours, *out = 60 * 60 * 1000 * hours + 60 * 1000 * minutes + 1000 * seconds + ms; return true; } + +enum class StyleTagKind { + kUnderline, + kBold, + kItalic, +}; + +std::string GetOpenTag(StyleTagKind tag) { + switch (tag) { + case StyleTagKind::kUnderline: + return ""; + case StyleTagKind::kBold: + return ""; + case StyleTagKind::kItalic: + return ""; + } + return ""; // Not reached, but Windows doesn't like NOTREACHED. +} + +std::string GetCloseTag(StyleTagKind tag) { + switch (tag) { + case StyleTagKind::kUnderline: + return ""; + case StyleTagKind::kBold: + return ""; + case StyleTagKind::kItalic: + return ""; + } + return ""; // Not reached, but Windows doesn't like NOTREACHED. +} + +std::string WriteFragment(const TextFragment& fragment, + std::list* tags) { + std::string ret; + size_t local_tag_count = 0; + auto has = [tags](StyleTagKind tag) { + return std::find(tags->begin(), tags->end(), tag) != tags->end(); + }; + auto push_tag = [tags, &local_tag_count, &has](StyleTagKind tag) { + if (has(tag)) { + return std::string(); + } + tags->push_back(tag); + local_tag_count++; + return GetOpenTag(tag); + }; + + if ((fragment.style.underline == false && has(StyleTagKind::kUnderline)) || + (fragment.style.bold == false && has(StyleTagKind::kBold)) || + (fragment.style.italic == false && has(StyleTagKind::kItalic))) { + LOG(WARNING) << "WebVTT output doesn't support disabling " + "underline/bold/italic within a cue"; + } + + if (fragment.newline) { + // Newlines represent separate WebVTT cues. So close the existing tags to + // be nice and re-open them on the new line. + for (auto it = tags->rbegin(); it != tags->rend(); it++) { + ret += GetCloseTag(*it); + } + ret += "\n"; + for (const auto tag : *tags) { + ret += GetOpenTag(tag); + } + } else { + if (fragment.style.underline == true) { + ret += push_tag(StyleTagKind::kUnderline); + } + if (fragment.style.bold == true) { + ret += push_tag(StyleTagKind::kBold); + } + if (fragment.style.italic == true) { + ret += push_tag(StyleTagKind::kItalic); + } + + if (!fragment.body.empty()) { + // Replace newlines and consecutive whitespace with a single space. If + // the user wanted an explicit newline, they should use the "newline" + // field. + std::regex whitespace("\\s+", std::regex_constants::ECMAScript); + ret += std::regex_replace(fragment.body, whitespace, std::string(" ")); + } else { + for (const auto& frag : fragment.sub_fragments) { + ret += WriteFragment(frag, tags); + } + } + + // Pop all the local tags we pushed. + while (local_tag_count > 0) { + ret += GetCloseTag(tags->back()); + tags->pop_back(); + local_tag_count--; + } + } + return ret; +} + } // namespace bool WebVttTimestampToMs(const base::StringPiece& source, uint64_t* out) { @@ -157,7 +256,8 @@ std::string WebVttSettingsToString(const TextSettings& settings) { } std::string WebVttFragmentToString(const TextFragment& fragment) { - return fragment.body; + std::list tags; + return WriteFragment(fragment, &tags); } } // namespace media diff --git a/packager/media/formats/webvtt/webvtt_utils_unittest.cc b/packager/media/formats/webvtt/webvtt_utils_unittest.cc index 111e2ad033..4273a59a8a 100644 --- a/packager/media/formats/webvtt/webvtt_utils_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_utils_unittest.cc @@ -11,6 +11,24 @@ namespace shaka { namespace media { +namespace { + +const TextFragmentStyle kNoStyle{}; + +TextFragmentStyle GetItalicStyle() { + TextFragmentStyle style; + style.italic = true; + return style; +} + +TextFragmentStyle GetBoldStyle() { + TextFragmentStyle style; + style.bold = true; + return style; +} + +} // namespace + TEST(WebVttTimestampTest, TooShort) { uint64_t ms; EXPECT_FALSE(WebVttTimestampToMs("00.000", &ms)); @@ -150,5 +168,61 @@ TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) { EXPECT_EQ(actual, "region:foo"); } +TEST(WebVttUtilsTest, FragmentToString) { + TextFragment frag(GetBoldStyle(), "Foobar"); + EXPECT_EQ(WebVttFragmentToString(frag), "Foobar"); +} + +TEST(WebVttUtilsTest, FragmentToString_PreservesTags) { + TextFragment frag(kNoStyle, "Foobar"); + EXPECT_EQ(WebVttFragmentToString(frag), "Foobar"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragments) { + TextFragment frag; + frag.sub_fragments.emplace_back(kNoStyle, "Hello "); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello World"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragmentsWithStyle) { + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(GetItalicStyle(), "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, " World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello World"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNewlines) { + TextFragment frag; + frag.sub_fragments.emplace_back(kNoStyle, "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, true); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNewlinesWithStyle) { + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(kNoStyle, "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, true); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedNewlinesWithStyle) { + TextFragment nested; + nested.sub_fragments.emplace_back(kNoStyle, "Hello"); + nested.sub_fragments.emplace_back(kNoStyle, true); + nested.sub_fragments.emplace_back(kNoStyle, "World"); + + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(nested); + frag.sub_fragments.emplace_back(kNoStyle, " Now"); + + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld Now"); +} + } // namespace media } // namespace shaka