diff --git a/packager/media/base/media_handler_test_base.cc b/packager/media/base/media_handler_test_base.cc index 2932f9f319..02d3fff2b2 100644 --- a/packager/media/base/media_handler_test_base.cc +++ b/packager/media/base/media_handler_test_base.cc @@ -267,7 +267,7 @@ std::unique_ptr MediaHandlerTestBase::GetTextSample( int64_t end, const std::string& payload) const { return std::unique_ptr{ - new TextSample(id, start, end, {}, TextFragment{payload})}; + new TextSample(id, start, end, {}, TextFragment{{}, payload})}; } std::unique_ptr MediaHandlerTestBase::GetCueEvent( diff --git a/packager/media/base/text_sample.cc b/packager/media/base/text_sample.cc index 14585331b6..fec427123e 100644 --- a/packager/media/base/text_sample.cc +++ b/packager/media/base/text_sample.cc @@ -6,13 +6,18 @@ #include "packager/media/base/text_sample.h" +#include +#include + #include "packager/base/logging.h" namespace shaka { namespace media { bool TextFragment::is_empty() const { - return body.empty(); + return std::all_of(sub_fragments.begin(), sub_fragments.end(), + std::mem_fn(&TextFragment::is_empty)) && + body.empty(); } TextSample::TextSample(const std::string& id, diff --git a/packager/media/base/text_sample.h b/packager/media/base/text_sample.h index 15dd4a3fb4..ca0a4f9daa 100644 --- a/packager/media/base/text_sample.h +++ b/packager/media/base/text_sample.h @@ -73,9 +73,31 @@ struct TextSettings { TextAlignment text_alignment = TextAlignment::kCenter; }; +struct TextFragmentStyle { + base::Optional underline; + base::Optional bold; + base::Optional italic; +}; + +/// Represents a recursive structure of styled blocks of text. Only one of +/// sub_fragments, body, or newline will be set. struct TextFragment { - // TODO(modmaker): Fill with settings and sub-fragments. + TextFragment() {} + TextFragment(const TextFragmentStyle& style, + const std::vector& sub_fragments) + : style(style), sub_fragments(sub_fragments) {} + TextFragment(const TextFragmentStyle& style, const char* body) + : style(style), body(body) {} + TextFragment(const TextFragmentStyle& style, const std::string& body) + : style(style), body(body) {} + TextFragment(const TextFragmentStyle& style, bool newline) + : style(style), newline(newline) {} + + TextFragmentStyle style; + + std::vector sub_fragments; std::string body; + bool newline = false; bool is_empty() const; }; diff --git a/packager/media/formats/webvtt/webvtt_parser.cc b/packager/media/formats/webvtt/webvtt_parser.cc index 60ea10d674..86fd76c6ec 100644 --- a/packager/media/formats/webvtt/webvtt_parser.cc +++ b/packager/media/formats/webvtt/webvtt_parser.cc @@ -349,11 +349,12 @@ bool WebVttParser::ParseCue(const std::string& id, // The rest of the block is the payload. // TODO: Parse tags to support , , etc. TextFragment body; + TextFragmentStyle no_styles; for (size_t i = 1; i < block_size; i++) { if (i > 1) { - body.body += "\n"; + body.sub_fragments.emplace_back(no_styles, /* newline= */ true); } - body.body += block[i]; + body.sub_fragments.emplace_back(no_styles, block[i]); } const auto sample = diff --git a/packager/media/formats/webvtt/webvtt_parser_unittest.cc b/packager/media/formats/webvtt/webvtt_parser_unittest.cc index 3cadb492e9..9da884eefe 100644 --- a/packager/media/formats/webvtt/webvtt_parser_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_parser_unittest.cc @@ -24,6 +24,27 @@ std::string ToString(const std::vector& v) { return std::string(v.begin(), v.end()); } +void ExpectNoStyle(const TextFragmentStyle& style) { + EXPECT_FALSE(style.underline); + EXPECT_FALSE(style.bold); + EXPECT_FALSE(style.italic); +} + +void ExpectPlainCueWithBody(const TextFragment& fragment, + const std::string& expected) { + ExpectNoStyle(fragment.style); + ASSERT_TRUE(fragment.body.empty()); + ASSERT_FALSE(fragment.newline); + + if (expected.empty()) { + EXPECT_TRUE(fragment.sub_fragments.empty()); + } else { + ASSERT_EQ(fragment.sub_fragments.size(), 1u); + ExpectNoStyle(fragment.sub_fragments[0].style); + EXPECT_EQ(fragment.sub_fragments[0].body, expected); + } +} + } // namespace class WebVttParserTest : public testing::Test { @@ -177,7 +198,7 @@ TEST_F(WebVttParserTest, ParseOneCue) { EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->duration(), 3540000u); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); // No settings const auto& settings = samples_[0]->settings(); @@ -221,7 +242,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) { EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->duration(), 3540000u); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); } TEST_F(WebVttParserTest, ParseOneEmptyCue) { @@ -238,7 +259,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCue) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); - EXPECT_EQ(samples_[0]->body().body, ""); + ExpectPlainCueWithBody(samples_[0]->body(), ""); } TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) { @@ -271,7 +292,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithId) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); EXPECT_EQ(samples_[0]->id(), "id"); - EXPECT_EQ(samples_[0]->body().body, "subtitle"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle"); } TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) { @@ -290,7 +311,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); EXPECT_EQ(samples_[0]->id(), "id"); - EXPECT_EQ(samples_[0]->body().body, ""); + ExpectPlainCueWithBody(samples_[0]->body(), ""); } TEST_F(WebVttParserTest, ParseOneCueWithSettings) { @@ -363,13 +384,13 @@ TEST_F(WebVttParserTest, ParseMultipleCues) { EXPECT_EQ(samples_[0]->start_time(), 1000u); EXPECT_EQ(samples_[0]->duration(), 4200u); - EXPECT_EQ(samples_[0]->body().body, "subtitle A"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A"); EXPECT_EQ(samples_[1]->start_time(), 2321u); EXPECT_EQ(samples_[1]->duration(), 4679u); - EXPECT_EQ(samples_[1]->body().body, "subtitle B"); + ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B"); EXPECT_EQ(samples_[2]->start_time(), 5800u); EXPECT_EQ(samples_[2]->duration(), 2200u); - EXPECT_EQ(samples_[2]->body().body, "subtitle C"); + ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C"); } // Verify that a typical case with mulitple cues work even when comments are @@ -405,9 +426,9 @@ TEST_F(WebVttParserTest, ParseWithComments) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 3u); - EXPECT_EQ(samples_[0]->body().body, "subtitle A"); - EXPECT_EQ(samples_[1]->body().body, "subtitle B"); - EXPECT_EQ(samples_[2]->body().body, "subtitle C"); + ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A"); + ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B"); + ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C"); } } // namespace media diff --git a/packager/media/formats/webvtt/webvtt_utils.cc b/packager/media/formats/webvtt/webvtt_utils.cc index 685d85b3f8..1be0dc4202 100644 --- a/packager/media/formats/webvtt/webvtt_utils.cc +++ b/packager/media/formats/webvtt/webvtt_utils.cc @@ -9,6 +9,8 @@ #include #include +#include + #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" #include "packager/base/strings/stringprintf.h" @@ -33,6 +35,103 @@ bool GetTotalMilliseconds(uint64_t hours, *out = 60 * 60 * 1000 * hours + 60 * 1000 * minutes + 1000 * seconds + ms; return true; } + +enum class StyleTagKind { + kUnderline, + kBold, + kItalic, +}; + +std::string GetOpenTag(StyleTagKind tag) { + switch (tag) { + case StyleTagKind::kUnderline: + return ""; + case StyleTagKind::kBold: + return ""; + case StyleTagKind::kItalic: + return ""; + } + return ""; // Not reached, but Windows doesn't like NOTREACHED. +} + +std::string GetCloseTag(StyleTagKind tag) { + switch (tag) { + case StyleTagKind::kUnderline: + return ""; + case StyleTagKind::kBold: + return ""; + case StyleTagKind::kItalic: + return ""; + } + return ""; // Not reached, but Windows doesn't like NOTREACHED. +} + +std::string WriteFragment(const TextFragment& fragment, + std::list* tags) { + std::string ret; + size_t local_tag_count = 0; + auto has = [tags](StyleTagKind tag) { + return std::find(tags->begin(), tags->end(), tag) != tags->end(); + }; + auto push_tag = [tags, &local_tag_count, &has](StyleTagKind tag) { + if (has(tag)) { + return std::string(); + } + tags->push_back(tag); + local_tag_count++; + return GetOpenTag(tag); + }; + + if ((fragment.style.underline == false && has(StyleTagKind::kUnderline)) || + (fragment.style.bold == false && has(StyleTagKind::kBold)) || + (fragment.style.italic == false && has(StyleTagKind::kItalic))) { + LOG(WARNING) << "WebVTT output doesn't support disabling " + "underline/bold/italic within a cue"; + } + + if (fragment.newline) { + // Newlines represent separate WebVTT cues. So close the existing tags to + // be nice and re-open them on the new line. + for (auto it = tags->rbegin(); it != tags->rend(); it++) { + ret += GetCloseTag(*it); + } + ret += "\n"; + for (const auto tag : *tags) { + ret += GetOpenTag(tag); + } + } else { + if (fragment.style.underline == true) { + ret += push_tag(StyleTagKind::kUnderline); + } + if (fragment.style.bold == true) { + ret += push_tag(StyleTagKind::kBold); + } + if (fragment.style.italic == true) { + ret += push_tag(StyleTagKind::kItalic); + } + + if (!fragment.body.empty()) { + // Replace newlines and consecutive whitespace with a single space. If + // the user wanted an explicit newline, they should use the "newline" + // field. + std::regex whitespace("\\s+", std::regex_constants::ECMAScript); + ret += std::regex_replace(fragment.body, whitespace, std::string(" ")); + } else { + for (const auto& frag : fragment.sub_fragments) { + ret += WriteFragment(frag, tags); + } + } + + // Pop all the local tags we pushed. + while (local_tag_count > 0) { + ret += GetCloseTag(tags->back()); + tags->pop_back(); + local_tag_count--; + } + } + return ret; +} + } // namespace bool WebVttTimestampToMs(const base::StringPiece& source, uint64_t* out) { @@ -157,7 +256,8 @@ std::string WebVttSettingsToString(const TextSettings& settings) { } std::string WebVttFragmentToString(const TextFragment& fragment) { - return fragment.body; + std::list tags; + return WriteFragment(fragment, &tags); } } // namespace media diff --git a/packager/media/formats/webvtt/webvtt_utils_unittest.cc b/packager/media/formats/webvtt/webvtt_utils_unittest.cc index 111e2ad033..4273a59a8a 100644 --- a/packager/media/formats/webvtt/webvtt_utils_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_utils_unittest.cc @@ -11,6 +11,24 @@ namespace shaka { namespace media { +namespace { + +const TextFragmentStyle kNoStyle{}; + +TextFragmentStyle GetItalicStyle() { + TextFragmentStyle style; + style.italic = true; + return style; +} + +TextFragmentStyle GetBoldStyle() { + TextFragmentStyle style; + style.bold = true; + return style; +} + +} // namespace + TEST(WebVttTimestampTest, TooShort) { uint64_t ms; EXPECT_FALSE(WebVttTimestampToMs("00.000", &ms)); @@ -150,5 +168,61 @@ TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) { EXPECT_EQ(actual, "region:foo"); } +TEST(WebVttUtilsTest, FragmentToString) { + TextFragment frag(GetBoldStyle(), "Foobar"); + EXPECT_EQ(WebVttFragmentToString(frag), "Foobar"); +} + +TEST(WebVttUtilsTest, FragmentToString_PreservesTags) { + TextFragment frag(kNoStyle, "Foobar"); + EXPECT_EQ(WebVttFragmentToString(frag), "Foobar"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragments) { + TextFragment frag; + frag.sub_fragments.emplace_back(kNoStyle, "Hello "); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello World"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragmentsWithStyle) { + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(GetItalicStyle(), "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, " World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello World"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNewlines) { + TextFragment frag; + frag.sub_fragments.emplace_back(kNoStyle, "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, true); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNewlinesWithStyle) { + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(kNoStyle, "Hello"); + frag.sub_fragments.emplace_back(kNoStyle, true); + frag.sub_fragments.emplace_back(kNoStyle, "World"); + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld"); +} + +TEST(WebVttUtilsTest, FragmentToString_HandlesNestedNewlinesWithStyle) { + TextFragment nested; + nested.sub_fragments.emplace_back(kNoStyle, "Hello"); + nested.sub_fragments.emplace_back(kNoStyle, true); + nested.sub_fragments.emplace_back(kNoStyle, "World"); + + TextFragment frag; + frag.style.bold = true; + frag.sub_fragments.emplace_back(nested); + frag.sub_fragments.emplace_back(kNoStyle, " Now"); + + EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld Now"); +} + } // namespace media } // namespace shaka