Add style support for cue fragments.

Now text cues are composed of nested fragments that can be individually
styled.  This allows portions of the cue to be bold, etc.  The
WebVTT parser doesn't parse the inputs, but the original tags are
preserved in WebVTT output.  The WebVTT output will add tags if the
style elements are present in the cue object.

Change-Id: I6abba4175e376e4f753193f7d8cac63e958d3c89
This commit is contained in:
Jacob Trimble 2020-08-26 13:47:14 -07:00
parent f4c07b9ce0
commit 1f21cc78cd
7 changed files with 240 additions and 17 deletions

View File

@ -267,7 +267,7 @@ std::unique_ptr<TextSample> MediaHandlerTestBase::GetTextSample(
int64_t end,
const std::string& payload) const {
return std::unique_ptr<TextSample>{
new TextSample(id, start, end, {}, TextFragment{payload})};
new TextSample(id, start, end, {}, TextFragment{{}, payload})};
}
std::unique_ptr<CueEvent> MediaHandlerTestBase::GetCueEvent(

View File

@ -6,13 +6,18 @@
#include "packager/media/base/text_sample.h"
#include <algorithm>
#include <functional>
#include "packager/base/logging.h"
namespace shaka {
namespace media {
bool TextFragment::is_empty() const {
return body.empty();
return std::all_of(sub_fragments.begin(), sub_fragments.end(),
std::mem_fn(&TextFragment::is_empty)) &&
body.empty();
}
TextSample::TextSample(const std::string& id,

View File

@ -73,9 +73,31 @@ struct TextSettings {
TextAlignment text_alignment = TextAlignment::kCenter;
};
struct TextFragmentStyle {
base::Optional<bool> underline;
base::Optional<bool> bold;
base::Optional<bool> italic;
};
/// Represents a recursive structure of styled blocks of text. Only one of
/// sub_fragments, body, or newline will be set.
struct TextFragment {
// TODO(modmaker): Fill with settings and sub-fragments.
TextFragment() {}
TextFragment(const TextFragmentStyle& style,
const std::vector<TextFragment>& sub_fragments)
: style(style), sub_fragments(sub_fragments) {}
TextFragment(const TextFragmentStyle& style, const char* body)
: style(style), body(body) {}
TextFragment(const TextFragmentStyle& style, const std::string& body)
: style(style), body(body) {}
TextFragment(const TextFragmentStyle& style, bool newline)
: style(style), newline(newline) {}
TextFragmentStyle style;
std::vector<TextFragment> sub_fragments;
std::string body;
bool newline = false;
bool is_empty() const;
};

View File

@ -349,11 +349,12 @@ bool WebVttParser::ParseCue(const std::string& id,
// The rest of the block is the payload.
// TODO: Parse tags to support <b>, <i>, etc.
TextFragment body;
TextFragmentStyle no_styles;
for (size_t i = 1; i < block_size; i++) {
if (i > 1) {
body.body += "\n";
body.sub_fragments.emplace_back(no_styles, /* newline= */ true);
}
body.body += block[i];
body.sub_fragments.emplace_back(no_styles, block[i]);
}
const auto sample =

View File

@ -24,6 +24,27 @@ std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
void ExpectNoStyle(const TextFragmentStyle& style) {
EXPECT_FALSE(style.underline);
EXPECT_FALSE(style.bold);
EXPECT_FALSE(style.italic);
}
void ExpectPlainCueWithBody(const TextFragment& fragment,
const std::string& expected) {
ExpectNoStyle(fragment.style);
ASSERT_TRUE(fragment.body.empty());
ASSERT_FALSE(fragment.newline);
if (expected.empty()) {
EXPECT_TRUE(fragment.sub_fragments.empty());
} else {
ASSERT_EQ(fragment.sub_fragments.size(), 1u);
ExpectNoStyle(fragment.sub_fragments[0].style);
EXPECT_EQ(fragment.sub_fragments[0].body, expected);
}
}
} // namespace
class WebVttParserTest : public testing::Test {
@ -177,7 +198,7 @@ TEST_F(WebVttParserTest, ParseOneCue) {
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->body().body, "subtitle");
ExpectPlainCueWithBody(samples_[0]->body(), "subtitle");
// No settings
const auto& settings = samples_[0]->settings();
@ -221,7 +242,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->body().body, "subtitle");
ExpectPlainCueWithBody(samples_[0]->body(), "subtitle");
}
TEST_F(WebVttParserTest, ParseOneEmptyCue) {
@ -238,7 +259,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCue) {
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->body().body, "");
ExpectPlainCueWithBody(samples_[0]->body(), "");
}
TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) {
@ -271,7 +292,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithId) {
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->body().body, "subtitle");
ExpectPlainCueWithBody(samples_[0]->body(), "subtitle");
}
TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) {
@ -290,7 +311,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) {
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->body().body, "");
ExpectPlainCueWithBody(samples_[0]->body(), "");
}
TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
@ -363,13 +384,13 @@ TEST_F(WebVttParserTest, ParseMultipleCues) {
EXPECT_EQ(samples_[0]->start_time(), 1000u);
EXPECT_EQ(samples_[0]->duration(), 4200u);
EXPECT_EQ(samples_[0]->body().body, "subtitle A");
ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A");
EXPECT_EQ(samples_[1]->start_time(), 2321u);
EXPECT_EQ(samples_[1]->duration(), 4679u);
EXPECT_EQ(samples_[1]->body().body, "subtitle B");
ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B");
EXPECT_EQ(samples_[2]->start_time(), 5800u);
EXPECT_EQ(samples_[2]->duration(), 2200u);
EXPECT_EQ(samples_[2]->body().body, "subtitle C");
ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C");
}
// Verify that a typical case with mulitple cues work even when comments are
@ -405,9 +426,9 @@ TEST_F(WebVttParserTest, ParseWithComments) {
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 3u);
EXPECT_EQ(samples_[0]->body().body, "subtitle A");
EXPECT_EQ(samples_[1]->body().body, "subtitle B");
EXPECT_EQ(samples_[2]->body().body, "subtitle C");
ExpectPlainCueWithBody(samples_[0]->body(), "subtitle A");
ExpectPlainCueWithBody(samples_[1]->body(), "subtitle B");
ExpectPlainCueWithBody(samples_[2]->body(), "subtitle C");
}
} // namespace media

View File

@ -9,6 +9,8 @@
#include <ctype.h>
#include <inttypes.h>
#include <regex>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/stringprintf.h"
@ -33,6 +35,103 @@ bool GetTotalMilliseconds(uint64_t hours,
*out = 60 * 60 * 1000 * hours + 60 * 1000 * minutes + 1000 * seconds + ms;
return true;
}
enum class StyleTagKind {
kUnderline,
kBold,
kItalic,
};
std::string GetOpenTag(StyleTagKind tag) {
switch (tag) {
case StyleTagKind::kUnderline:
return "<u>";
case StyleTagKind::kBold:
return "<b>";
case StyleTagKind::kItalic:
return "<i>";
}
return ""; // Not reached, but Windows doesn't like NOTREACHED.
}
std::string GetCloseTag(StyleTagKind tag) {
switch (tag) {
case StyleTagKind::kUnderline:
return "</u>";
case StyleTagKind::kBold:
return "</b>";
case StyleTagKind::kItalic:
return "</i>";
}
return ""; // Not reached, but Windows doesn't like NOTREACHED.
}
std::string WriteFragment(const TextFragment& fragment,
std::list<StyleTagKind>* tags) {
std::string ret;
size_t local_tag_count = 0;
auto has = [tags](StyleTagKind tag) {
return std::find(tags->begin(), tags->end(), tag) != tags->end();
};
auto push_tag = [tags, &local_tag_count, &has](StyleTagKind tag) {
if (has(tag)) {
return std::string();
}
tags->push_back(tag);
local_tag_count++;
return GetOpenTag(tag);
};
if ((fragment.style.underline == false && has(StyleTagKind::kUnderline)) ||
(fragment.style.bold == false && has(StyleTagKind::kBold)) ||
(fragment.style.italic == false && has(StyleTagKind::kItalic))) {
LOG(WARNING) << "WebVTT output doesn't support disabling "
"underline/bold/italic within a cue";
}
if (fragment.newline) {
// Newlines represent separate WebVTT cues. So close the existing tags to
// be nice and re-open them on the new line.
for (auto it = tags->rbegin(); it != tags->rend(); it++) {
ret += GetCloseTag(*it);
}
ret += "\n";
for (const auto tag : *tags) {
ret += GetOpenTag(tag);
}
} else {
if (fragment.style.underline == true) {
ret += push_tag(StyleTagKind::kUnderline);
}
if (fragment.style.bold == true) {
ret += push_tag(StyleTagKind::kBold);
}
if (fragment.style.italic == true) {
ret += push_tag(StyleTagKind::kItalic);
}
if (!fragment.body.empty()) {
// Replace newlines and consecutive whitespace with a single space. If
// the user wanted an explicit newline, they should use the "newline"
// field.
std::regex whitespace("\\s+", std::regex_constants::ECMAScript);
ret += std::regex_replace(fragment.body, whitespace, std::string(" "));
} else {
for (const auto& frag : fragment.sub_fragments) {
ret += WriteFragment(frag, tags);
}
}
// Pop all the local tags we pushed.
while (local_tag_count > 0) {
ret += GetCloseTag(tags->back());
tags->pop_back();
local_tag_count--;
}
}
return ret;
}
} // namespace
bool WebVttTimestampToMs(const base::StringPiece& source, uint64_t* out) {
@ -157,7 +256,8 @@ std::string WebVttSettingsToString(const TextSettings& settings) {
}
std::string WebVttFragmentToString(const TextFragment& fragment) {
return fragment.body;
std::list<StyleTagKind> tags;
return WriteFragment(fragment, &tags);
}
} // namespace media

View File

@ -11,6 +11,24 @@
namespace shaka {
namespace media {
namespace {
const TextFragmentStyle kNoStyle{};
TextFragmentStyle GetItalicStyle() {
TextFragmentStyle style;
style.italic = true;
return style;
}
TextFragmentStyle GetBoldStyle() {
TextFragmentStyle style;
style.bold = true;
return style;
}
} // namespace
TEST(WebVttTimestampTest, TooShort) {
uint64_t ms;
EXPECT_FALSE(WebVttTimestampToMs("00.000", &ms));
@ -150,5 +168,61 @@ TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) {
EXPECT_EQ(actual, "region:foo");
}
TEST(WebVttUtilsTest, FragmentToString) {
TextFragment frag(GetBoldStyle(), "Foobar");
EXPECT_EQ(WebVttFragmentToString(frag), "<b>Foobar</b>");
}
TEST(WebVttUtilsTest, FragmentToString_PreservesTags) {
TextFragment frag(kNoStyle, "<i>Foobar</i>");
EXPECT_EQ(WebVttFragmentToString(frag), "<i>Foobar</i>");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragments) {
TextFragment frag;
frag.sub_fragments.emplace_back(kNoStyle, "Hello ");
frag.sub_fragments.emplace_back(kNoStyle, "World");
EXPECT_EQ(WebVttFragmentToString(frag), "Hello World");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNestedFragmentsWithStyle) {
TextFragment frag;
frag.style.bold = true;
frag.sub_fragments.emplace_back(GetItalicStyle(), "Hello");
frag.sub_fragments.emplace_back(kNoStyle, " World");
EXPECT_EQ(WebVttFragmentToString(frag), "<b><i>Hello</i> World</b>");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNewlines) {
TextFragment frag;
frag.sub_fragments.emplace_back(kNoStyle, "Hello");
frag.sub_fragments.emplace_back(kNoStyle, true);
frag.sub_fragments.emplace_back(kNoStyle, "World");
EXPECT_EQ(WebVttFragmentToString(frag), "Hello\nWorld");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNewlinesWithStyle) {
TextFragment frag;
frag.style.bold = true;
frag.sub_fragments.emplace_back(kNoStyle, "Hello");
frag.sub_fragments.emplace_back(kNoStyle, true);
frag.sub_fragments.emplace_back(kNoStyle, "World");
EXPECT_EQ(WebVttFragmentToString(frag), "<b>Hello</b>\n<b>World</b>");
}
TEST(WebVttUtilsTest, FragmentToString_HandlesNestedNewlinesWithStyle) {
TextFragment nested;
nested.sub_fragments.emplace_back(kNoStyle, "Hello");
nested.sub_fragments.emplace_back(kNoStyle, true);
nested.sub_fragments.emplace_back(kNoStyle, "World");
TextFragment frag;
frag.style.bold = true;
frag.sub_fragments.emplace_back(nested);
frag.sub_fragments.emplace_back(kNoStyle, " Now");
EXPECT_EQ(WebVttFragmentToString(frag), "<b>Hello</b>\n<b>World Now</b>");
}
} // namespace media
} // namespace shaka