From 414f4589c87283b9ad3f06feb3bbecd2bde54761 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Wed, 26 Aug 2020 12:31:58 -0700 Subject: [PATCH] Parse TextSettings from WebVTT. Now the Cue settings are a generic object that is parsed in WebVTT. This will allow setting the settings in different parsers without having to use WebVTT-specifics. Change-Id: I36689bec725bd2e515af962b7174fc5977f96fa2 --- packager/media/base/text_sample.h | 58 ++++++++- .../media/formats/webvtt/webvtt_parser.cc | 115 +++++++++++++++++- .../formats/webvtt/webvtt_parser_unittest.cc | 42 ++++++- packager/media/formats/webvtt/webvtt_utils.cc | 70 ++++++++++- .../formats/webvtt/webvtt_utils_unittest.cc | 23 ++++ 5 files changed, 296 insertions(+), 12 deletions(-) diff --git a/packager/media/base/text_sample.h b/packager/media/base/text_sample.h index e643f3073e..15dd4a3fb4 100644 --- a/packager/media/base/text_sample.h +++ b/packager/media/base/text_sample.h @@ -10,13 +10,67 @@ #include #include +#include + +#include "packager/base/optional.h" namespace shaka { namespace media { +enum class TextUnitType { + /// The units are absolute units in pixels. + kPixels, + /// The units are absolute units in number of lines. + kLines, + /// The units are relative to some size, in percent (i.e. 0-100). + kPercent, +}; + +enum class WritingDirection { + kHorizontal, + kVerticalGrowingLeft, + kVerticalGrowingRight, +}; + +enum class TextAlignment { + /// Align the text at the start, based on the Unicode text direction. + kStart, + /// Align the text in the center of the box. + kCenter, + /// Align the text at the end, based on the Unicode text direction. + kEnd, + /// Align the text at the left side (or top for non-horizontal). + kLeft, + /// Align the text at the right side (or bottom for non-horizontal). + kRight, +}; + +struct TextNumber { + TextNumber(float value, TextUnitType type) : value(value), type(type) {} + + float value; + TextUnitType type; +}; + struct TextSettings { - // TODO(modmaker): Convert to generic structure. - std::string settings; + /// The line offset of the cue. For horizontal cues, this is the vertical + /// offset. Percent units are relative to the window. + base::Optional line; + /// The position offset of the cue. For horizontal cues, this is the + /// horizontal offset. Percent units are relative to the window. + base::Optional position; + /// The size of the space used to draw text. For horizontal cues, this is the + /// width. Percent units are relative to the window. + base::Optional size; + + /// The region to draw the cue in. + std::string region; + + /// The direction to draw text. This is also used to determine how cues are + /// positioned within the region. + WritingDirection writing_direction = WritingDirection::kHorizontal; + /// How to align the text within the cue box. + TextAlignment text_alignment = TextAlignment::kCenter; }; struct TextFragment { diff --git a/packager/media/formats/webvtt/webvtt_parser.cc b/packager/media/formats/webvtt/webvtt_parser.cc index 51c8ba13bc..60ea10d674 100644 --- a/packager/media/formats/webvtt/webvtt_parser.cc +++ b/packager/media/formats/webvtt/webvtt_parser.cc @@ -6,10 +6,12 @@ #include "packager/media/formats/webvtt/webvtt_parser.h" +#include + #include "packager/base/logging.h" +#include "packager/base/strings/string_number_conversions.h" #include "packager/base/strings/string_split.h" #include "packager/base/strings/string_util.h" -#include "packager/media/base/text_sample.h" #include "packager/media/base/text_stream_info.h" #include "packager/media/formats/webvtt/webvtt_utils.h" @@ -75,6 +77,103 @@ bool IsLikelyRegion(const std::string& line) { return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION"; } +bool ParsePercent(const std::string& str, float* value) { + // https://www.w3.org/TR/webvtt1/#webvtt-percentage + // E.g. "4%" or "1.5%" + std::regex re(R"((\d+(?:\.\d+)?)%)"); + std::smatch match; + if (!std::regex_match(str, match, re)) { + return false; + } + + double temp; + base::StringToDouble(match[1], &temp); + if (temp >= 100) { + return false; + } + *value = temp; + return true; +} + +void ParseSettings(const std::string& id, + const std::string& value, + TextSettings* settings) { + // https://www.w3.org/TR/webvtt1/#ref-for-parse-the-webvtt-cue-settings-1 + if (id == "region") { + settings->region = value; + } else if (id == "vertical") { + if (value == "rl") { + settings->writing_direction = WritingDirection::kVerticalGrowingLeft; + } else if (value == "lr") { + settings->writing_direction = WritingDirection::kVerticalGrowingRight; + } else { + LOG(WARNING) << "Invalid WebVTT vertical setting: " << value; + } + } else if (id == "line") { + const auto pos = value.find(','); + const std::string line = value.substr(0, pos); + const std::string align = + pos != std::string::npos ? value.substr(pos + 1) : ""; + if (pos != std::string::npos) { + LOG(WARNING) << "WebVTT line alignment isn't supported"; + } + + if (!line.empty() && line[line.size() - 1] == '%') { + float temp; + if (!ParsePercent(line, &temp)) { + LOG(WARNING) << "Invalid WebVTT line: " << value; + return; + } + settings->line.emplace(temp, TextUnitType::kPercent); + } else { + double temp; + if (!base::StringToDouble(line, &temp)) { + LOG(WARNING) << "Invalid WebVTT line: " << value; + return; + } + settings->line.emplace(temp, TextUnitType::kLines); + } + } else if (id == "position") { + const auto pos = value.find(','); + const std::string position = value.substr(0, pos); + const std::string align = + pos != std::string::npos ? value.substr(pos + 1) : ""; + if (pos != std::string::npos) { + LOG(WARNING) << "WebVTT position alignment isn't supported"; + } + + float temp; + if (ParsePercent(position, &temp)) { + settings->position.emplace(temp, TextUnitType::kPercent); + } else { + LOG(WARNING) << "Invalid WebVTT position: " << value; + } + } else if (id == "size") { + float temp; + if (ParsePercent(value, &temp)) { + settings->size.emplace(temp, TextUnitType::kPercent); + } else { + LOG(WARNING) << "Invalid WebVTT size: " << value; + } + } else if (id == "align") { + if (value == "start") { + settings->text_alignment = TextAlignment::kStart; + } else if (value == "center") { + settings->text_alignment = TextAlignment::kCenter; + } else if (value == "end") { + settings->text_alignment = TextAlignment::kEnd; + } else if (value == "left") { + settings->text_alignment = TextAlignment::kLeft; + } else if (value == "right") { + settings->text_alignment = TextAlignment::kRight; + } else { + LOG(WARNING) << "Invalid WebVTT align: " << value; + } + } else { + LOG(WARNING) << "Unknown WebVTT setting: " << id; + } +} + void UpdateConfig(const std::vector& block, std::string* config) { if (!config->empty()) *config += "\n\n"; @@ -235,16 +334,20 @@ bool WebVttParser::ParseCue(const std::string& id, return true; } - // The rest of time_and_style are the style tokens. TextSettings settings; for (size_t i = 3; i < time_and_style.size(); i++) { - if (!settings.settings.empty()) { - settings.settings += " "; + const auto pos = time_and_style[i].find(':'); + if (pos == std::string::npos) { + continue; } - settings.settings += time_and_style[i]; + + const std::string key = time_and_style[i].substr(0, pos); + const std::string value = time_and_style[i].substr(pos + 1); + ParseSettings(key, value, &settings); } // The rest of the block is the payload. + // TODO: Parse tags to support , , etc. TextFragment body; for (size_t i = 1; i < block_size; i++) { if (i > 1) { @@ -253,7 +356,7 @@ bool WebVttParser::ParseCue(const std::string& id, body.body += block[i]; } - auto sample = + const auto sample = std::make_shared(id, start_time, end_time, settings, body); return new_text_sample_cb_.Run(kStreamIndex, sample); } diff --git a/packager/media/formats/webvtt/webvtt_parser_unittest.cc b/packager/media/formats/webvtt/webvtt_parser_unittest.cc index 6f985b3747..3cadb492e9 100644 --- a/packager/media/formats/webvtt/webvtt_parser_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_parser_unittest.cc @@ -19,7 +19,6 @@ const uint32_t kStreamId = 0; const uint32_t kTimeScale = 1000; const char* kNoId = ""; -const char* kNoSettings = ""; std::string ToString(const std::vector& v) { return std::string(v.begin(), v.end()); @@ -178,8 +177,16 @@ TEST_F(WebVttParserTest, ParseOneCue) { EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->duration(), 3540000u); - EXPECT_EQ(samples_[0]->settings().settings, kNoSettings); EXPECT_EQ(samples_[0]->body().body, "subtitle"); + + // No settings + const auto& settings = samples_[0]->settings(); + EXPECT_FALSE(settings.line); + EXPECT_FALSE(settings.position); + EXPECT_FALSE(settings.size); + EXPECT_EQ(settings.region, ""); + EXPECT_EQ(settings.writing_direction, WritingDirection::kHorizontal); + EXPECT_EQ(settings.text_alignment, TextAlignment::kCenter); } TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) { @@ -300,7 +307,36 @@ TEST_F(WebVttParserTest, ParseOneCueWithSettings) { ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(samples_.size(), 1u); - EXPECT_EQ(samples_[0]->settings().settings, "size:50%"); + ASSERT_TRUE(samples_[0]->settings().size); + EXPECT_EQ(samples_[0]->settings().size->type, TextUnitType::kPercent); + EXPECT_EQ(samples_[0]->settings().size->value, 50.0f); +} + +TEST_F(WebVttParserTest, ParseOneCueWithManySettings) { + const uint8_t text[] = + "WEBVTT\n" + "\n" + "00:01:00.000 --> 01:00:00.000 line:5 vertical:lr region:foo" + " align:right position:20%\n" + "subtitle\n"; + + ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize()); + + ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1)); + ASSERT_TRUE(parser_->Flush()); + + ASSERT_EQ(streams_.size(), 1u); + ASSERT_EQ(samples_.size(), 1u); + EXPECT_EQ(samples_[0]->settings().writing_direction, + WritingDirection::kVerticalGrowingRight); + EXPECT_EQ(samples_[0]->settings().text_alignment, TextAlignment::kRight); + EXPECT_FALSE(samples_[0]->settings().size); + ASSERT_TRUE(samples_[0]->settings().position); + EXPECT_EQ(samples_[0]->settings().position->type, TextUnitType::kPercent); + EXPECT_EQ(samples_[0]->settings().position->value, 20.0f); + ASSERT_TRUE(samples_[0]->settings().line); + EXPECT_EQ(samples_[0]->settings().line->type, TextUnitType::kLines); + EXPECT_EQ(samples_[0]->settings().line->value, 5.0f); } // Verify that a typical case with mulitple cues work. diff --git a/packager/media/formats/webvtt/webvtt_utils.cc b/packager/media/formats/webvtt/webvtt_utils.cc index bbf18ba4bb..685d85b3f8 100644 --- a/packager/media/formats/webvtt/webvtt_utils.cc +++ b/packager/media/formats/webvtt/webvtt_utils.cc @@ -85,7 +85,75 @@ std::string MsToWebVttTimestamp(uint64_t ms) { } std::string WebVttSettingsToString(const TextSettings& settings) { - return settings.settings; + std::string ret; + if (!settings.region.empty()) { + ret += " region:"; + ret += settings.region; + } + if (settings.line) { + switch (settings.line->type) { + case TextUnitType::kPercent: + ret += " line:"; + ret += base::DoubleToString(settings.line->value); + ret += "%"; + break; + case TextUnitType::kLines: + ret += " line:"; + ret += base::DoubleToString(settings.line->value); + break; + case TextUnitType::kPixels: + LOG(WARNING) << "WebVTT doesn't support pixel line settings"; + break; + } + } + if (settings.position) { + if (settings.position->type == TextUnitType::kPercent) { + ret += " position:"; + ret += base::DoubleToString(settings.position->value); + ret += "%"; + } else { + LOG(WARNING) << "WebVTT only supports percent position settings"; + } + } + if (settings.size) { + if (settings.size->type == TextUnitType::kPercent) { + ret += " size:"; + ret += base::DoubleToString(settings.size->value); + ret += "%"; + } else { + LOG(WARNING) << "WebVTT only supports percent size settings"; + } + } + if (settings.writing_direction != WritingDirection::kHorizontal) { + ret += " direction:"; + if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) { + ret += "rl"; + } else { + ret += "lr"; + } + } + switch (settings.text_alignment) { + case TextAlignment::kStart: + ret += " align:start"; + break; + case TextAlignment::kEnd: + ret += " align:end"; + break; + case TextAlignment::kLeft: + ret += " align:left"; + break; + case TextAlignment::kRight: + ret += " align:right"; + break; + case TextAlignment::kCenter: + break; + } + + if (!ret.empty()) { + DCHECK_EQ(ret[0], ' '); + ret.erase(0, 1); + } + return ret; } std::string WebVttFragmentToString(const TextFragment& fragment) { diff --git a/packager/media/formats/webvtt/webvtt_utils_unittest.cc b/packager/media/formats/webvtt/webvtt_utils_unittest.cc index 583dd87144..111e2ad033 100644 --- a/packager/media/formats/webvtt/webvtt_utils_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_utils_unittest.cc @@ -127,5 +127,28 @@ TEST(WebVttTimestampTest, CreateHoursShort) { TEST(WebVttTimestampTest, CreateHoursLong) { EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000)); } + +TEST(WebVttUtilsTest, SettingsToString) { + TextSettings settings; + settings.region = "foo"; + settings.line = TextNumber(27, TextUnitType::kPercent); + settings.position = TextNumber(42, TextUnitType::kPercent); + settings.size = TextNumber(54, TextUnitType::kPercent); + settings.writing_direction = WritingDirection::kVerticalGrowingLeft; + settings.text_alignment = TextAlignment::kEnd; + + const auto actual = WebVttSettingsToString(settings); + EXPECT_EQ(actual, + "region:foo line:27% position:42% size:54% direction:rl align:end"); +} + +TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) { + TextSettings settings; + settings.region = "foo"; + + const auto actual = WebVttSettingsToString(settings); + EXPECT_EQ(actual, "region:foo"); +} + } // namespace media } // namespace shaka