Parse TextSettings from WebVTT.

Now the Cue settings are a generic object that is parsed in WebVTT.
This will allow setting the settings in different parsers without having
to use WebVTT-specifics.

Change-Id: I36689bec725bd2e515af962b7174fc5977f96fa2
This commit is contained in:
Jacob Trimble 2020-08-26 12:31:58 -07:00
parent c169c83613
commit 414f4589c8
5 changed files with 296 additions and 12 deletions

View File

@ -10,13 +10,67 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include <vector>
#include "packager/base/optional.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
enum class TextUnitType {
/// The units are absolute units in pixels.
kPixels,
/// The units are absolute units in number of lines.
kLines,
/// The units are relative to some size, in percent (i.e. 0-100).
kPercent,
};
enum class WritingDirection {
kHorizontal,
kVerticalGrowingLeft,
kVerticalGrowingRight,
};
enum class TextAlignment {
/// Align the text at the start, based on the Unicode text direction.
kStart,
/// Align the text in the center of the box.
kCenter,
/// Align the text at the end, based on the Unicode text direction.
kEnd,
/// Align the text at the left side (or top for non-horizontal).
kLeft,
/// Align the text at the right side (or bottom for non-horizontal).
kRight,
};
struct TextNumber {
TextNumber(float value, TextUnitType type) : value(value), type(type) {}
float value;
TextUnitType type;
};
struct TextSettings { struct TextSettings {
// TODO(modmaker): Convert to generic structure. /// The line offset of the cue. For horizontal cues, this is the vertical
std::string settings; /// offset. Percent units are relative to the window.
base::Optional<TextNumber> line;
/// The position offset of the cue. For horizontal cues, this is the
/// horizontal offset. Percent units are relative to the window.
base::Optional<TextNumber> position;
/// The size of the space used to draw text. For horizontal cues, this is the
/// width. Percent units are relative to the window.
base::Optional<TextNumber> size;
/// The region to draw the cue in.
std::string region;
/// The direction to draw text. This is also used to determine how cues are
/// positioned within the region.
WritingDirection writing_direction = WritingDirection::kHorizontal;
/// How to align the text within the cue box.
TextAlignment text_alignment = TextAlignment::kCenter;
}; };
struct TextFragment { struct TextFragment {

View File

@ -6,10 +6,12 @@
#include "packager/media/formats/webvtt/webvtt_parser.h" #include "packager/media/formats/webvtt/webvtt_parser.h"
#include <regex>
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/string_split.h" #include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h" #include "packager/base/strings/string_util.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/base/text_stream_info.h" #include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_utils.h" #include "packager/media/formats/webvtt/webvtt_utils.h"
@ -75,6 +77,103 @@ bool IsLikelyRegion(const std::string& line) {
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION"; return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
} }
bool ParsePercent(const std::string& str, float* value) {
// https://www.w3.org/TR/webvtt1/#webvtt-percentage
// E.g. "4%" or "1.5%"
std::regex re(R"((\d+(?:\.\d+)?)%)");
std::smatch match;
if (!std::regex_match(str, match, re)) {
return false;
}
double temp;
base::StringToDouble(match[1], &temp);
if (temp >= 100) {
return false;
}
*value = temp;
return true;
}
void ParseSettings(const std::string& id,
const std::string& value,
TextSettings* settings) {
// https://www.w3.org/TR/webvtt1/#ref-for-parse-the-webvtt-cue-settings-1
if (id == "region") {
settings->region = value;
} else if (id == "vertical") {
if (value == "rl") {
settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
} else if (value == "lr") {
settings->writing_direction = WritingDirection::kVerticalGrowingRight;
} else {
LOG(WARNING) << "Invalid WebVTT vertical setting: " << value;
}
} else if (id == "line") {
const auto pos = value.find(',');
const std::string line = value.substr(0, pos);
const std::string align =
pos != std::string::npos ? value.substr(pos + 1) : "";
if (pos != std::string::npos) {
LOG(WARNING) << "WebVTT line alignment isn't supported";
}
if (!line.empty() && line[line.size() - 1] == '%') {
float temp;
if (!ParsePercent(line, &temp)) {
LOG(WARNING) << "Invalid WebVTT line: " << value;
return;
}
settings->line.emplace(temp, TextUnitType::kPercent);
} else {
double temp;
if (!base::StringToDouble(line, &temp)) {
LOG(WARNING) << "Invalid WebVTT line: " << value;
return;
}
settings->line.emplace(temp, TextUnitType::kLines);
}
} else if (id == "position") {
const auto pos = value.find(',');
const std::string position = value.substr(0, pos);
const std::string align =
pos != std::string::npos ? value.substr(pos + 1) : "";
if (pos != std::string::npos) {
LOG(WARNING) << "WebVTT position alignment isn't supported";
}
float temp;
if (ParsePercent(position, &temp)) {
settings->position.emplace(temp, TextUnitType::kPercent);
} else {
LOG(WARNING) << "Invalid WebVTT position: " << value;
}
} else if (id == "size") {
float temp;
if (ParsePercent(value, &temp)) {
settings->size.emplace(temp, TextUnitType::kPercent);
} else {
LOG(WARNING) << "Invalid WebVTT size: " << value;
}
} else if (id == "align") {
if (value == "start") {
settings->text_alignment = TextAlignment::kStart;
} else if (value == "center") {
settings->text_alignment = TextAlignment::kCenter;
} else if (value == "end") {
settings->text_alignment = TextAlignment::kEnd;
} else if (value == "left") {
settings->text_alignment = TextAlignment::kLeft;
} else if (value == "right") {
settings->text_alignment = TextAlignment::kRight;
} else {
LOG(WARNING) << "Invalid WebVTT align: " << value;
}
} else {
LOG(WARNING) << "Unknown WebVTT setting: " << id;
}
}
void UpdateConfig(const std::vector<std::string>& block, std::string* config) { void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
if (!config->empty()) if (!config->empty())
*config += "\n\n"; *config += "\n\n";
@ -235,16 +334,20 @@ bool WebVttParser::ParseCue(const std::string& id,
return true; return true;
} }
// The rest of time_and_style are the style tokens.
TextSettings settings; TextSettings settings;
for (size_t i = 3; i < time_and_style.size(); i++) { for (size_t i = 3; i < time_and_style.size(); i++) {
if (!settings.settings.empty()) { const auto pos = time_and_style[i].find(':');
settings.settings += " "; if (pos == std::string::npos) {
continue;
} }
settings.settings += time_and_style[i];
const std::string key = time_and_style[i].substr(0, pos);
const std::string value = time_and_style[i].substr(pos + 1);
ParseSettings(key, value, &settings);
} }
// The rest of the block is the payload. // The rest of the block is the payload.
// TODO: Parse tags to support <b>, <i>, etc.
TextFragment body; TextFragment body;
for (size_t i = 1; i < block_size; i++) { for (size_t i = 1; i < block_size; i++) {
if (i > 1) { if (i > 1) {
@ -253,7 +356,7 @@ bool WebVttParser::ParseCue(const std::string& id,
body.body += block[i]; body.body += block[i];
} }
auto sample = const auto sample =
std::make_shared<TextSample>(id, start_time, end_time, settings, body); std::make_shared<TextSample>(id, start_time, end_time, settings, body);
return new_text_sample_cb_.Run(kStreamIndex, sample); return new_text_sample_cb_.Run(kStreamIndex, sample);
} }

View File

@ -19,7 +19,6 @@ const uint32_t kStreamId = 0;
const uint32_t kTimeScale = 1000; const uint32_t kTimeScale = 1000;
const char* kNoId = ""; const char* kNoId = "";
const char* kNoSettings = "";
std::string ToString(const std::vector<uint8_t>& v) { std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end()); return std::string(v.begin(), v.end());
@ -178,8 +177,16 @@ TEST_F(WebVttParserTest, ParseOneCue) {
EXPECT_EQ(samples_[0]->id(), kNoId); EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u); EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u); EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->settings().settings, kNoSettings);
EXPECT_EQ(samples_[0]->body().body, "subtitle"); EXPECT_EQ(samples_[0]->body().body, "subtitle");
// No settings
const auto& settings = samples_[0]->settings();
EXPECT_FALSE(settings.line);
EXPECT_FALSE(settings.position);
EXPECT_FALSE(settings.size);
EXPECT_EQ(settings.region, "");
EXPECT_EQ(settings.writing_direction, WritingDirection::kHorizontal);
EXPECT_EQ(settings.text_alignment, TextAlignment::kCenter);
} }
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) { TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
@ -300,7 +307,36 @@ TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
ASSERT_EQ(streams_.size(), 1u); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u); ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->settings().settings, "size:50%"); ASSERT_TRUE(samples_[0]->settings().size);
EXPECT_EQ(samples_[0]->settings().size->type, TextUnitType::kPercent);
EXPECT_EQ(samples_[0]->settings().size->value, 50.0f);
}
TEST_F(WebVttParserTest, ParseOneCueWithManySettings) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 01:00:00.000 line:5 vertical:lr region:foo"
" align:right position:20%\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->settings().writing_direction,
WritingDirection::kVerticalGrowingRight);
EXPECT_EQ(samples_[0]->settings().text_alignment, TextAlignment::kRight);
EXPECT_FALSE(samples_[0]->settings().size);
ASSERT_TRUE(samples_[0]->settings().position);
EXPECT_EQ(samples_[0]->settings().position->type, TextUnitType::kPercent);
EXPECT_EQ(samples_[0]->settings().position->value, 20.0f);
ASSERT_TRUE(samples_[0]->settings().line);
EXPECT_EQ(samples_[0]->settings().line->type, TextUnitType::kLines);
EXPECT_EQ(samples_[0]->settings().line->value, 5.0f);
} }
// Verify that a typical case with mulitple cues work. // Verify that a typical case with mulitple cues work.

View File

@ -85,7 +85,75 @@ std::string MsToWebVttTimestamp(uint64_t ms) {
} }
std::string WebVttSettingsToString(const TextSettings& settings) { std::string WebVttSettingsToString(const TextSettings& settings) {
return settings.settings; std::string ret;
if (!settings.region.empty()) {
ret += " region:";
ret += settings.region;
}
if (settings.line) {
switch (settings.line->type) {
case TextUnitType::kPercent:
ret += " line:";
ret += base::DoubleToString(settings.line->value);
ret += "%";
break;
case TextUnitType::kLines:
ret += " line:";
ret += base::DoubleToString(settings.line->value);
break;
case TextUnitType::kPixels:
LOG(WARNING) << "WebVTT doesn't support pixel line settings";
break;
}
}
if (settings.position) {
if (settings.position->type == TextUnitType::kPercent) {
ret += " position:";
ret += base::DoubleToString(settings.position->value);
ret += "%";
} else {
LOG(WARNING) << "WebVTT only supports percent position settings";
}
}
if (settings.size) {
if (settings.size->type == TextUnitType::kPercent) {
ret += " size:";
ret += base::DoubleToString(settings.size->value);
ret += "%";
} else {
LOG(WARNING) << "WebVTT only supports percent size settings";
}
}
if (settings.writing_direction != WritingDirection::kHorizontal) {
ret += " direction:";
if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) {
ret += "rl";
} else {
ret += "lr";
}
}
switch (settings.text_alignment) {
case TextAlignment::kStart:
ret += " align:start";
break;
case TextAlignment::kEnd:
ret += " align:end";
break;
case TextAlignment::kLeft:
ret += " align:left";
break;
case TextAlignment::kRight:
ret += " align:right";
break;
case TextAlignment::kCenter:
break;
}
if (!ret.empty()) {
DCHECK_EQ(ret[0], ' ');
ret.erase(0, 1);
}
return ret;
} }
std::string WebVttFragmentToString(const TextFragment& fragment) { std::string WebVttFragmentToString(const TextFragment& fragment) {

View File

@ -127,5 +127,28 @@ TEST(WebVttTimestampTest, CreateHoursShort) {
TEST(WebVttTimestampTest, CreateHoursLong) { TEST(WebVttTimestampTest, CreateHoursLong) {
EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000)); EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000));
} }
TEST(WebVttUtilsTest, SettingsToString) {
TextSettings settings;
settings.region = "foo";
settings.line = TextNumber(27, TextUnitType::kPercent);
settings.position = TextNumber(42, TextUnitType::kPercent);
settings.size = TextNumber(54, TextUnitType::kPercent);
settings.writing_direction = WritingDirection::kVerticalGrowingLeft;
settings.text_alignment = TextAlignment::kEnd;
const auto actual = WebVttSettingsToString(settings);
EXPECT_EQ(actual,
"region:foo line:27% position:42% size:54% direction:rl align:end");
}
TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) {
TextSettings settings;
settings.region = "foo";
const auto actual = WebVttSettingsToString(settings);
EXPECT_EQ(actual, "region:foo");
}
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka