Parse TextSettings from WebVTT.
Now the Cue settings are a generic object that is parsed in WebVTT. This will allow setting the settings in different parsers without having to use WebVTT-specifics. Change-Id: I36689bec725bd2e515af962b7174fc5977f96fa2
This commit is contained in:
parent
c169c83613
commit
414f4589c8
|
@ -10,13 +10,67 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "packager/base/optional.h"
|
||||
|
||||
namespace shaka {
|
||||
namespace media {
|
||||
|
||||
enum class TextUnitType {
|
||||
/// The units are absolute units in pixels.
|
||||
kPixels,
|
||||
/// The units are absolute units in number of lines.
|
||||
kLines,
|
||||
/// The units are relative to some size, in percent (i.e. 0-100).
|
||||
kPercent,
|
||||
};
|
||||
|
||||
enum class WritingDirection {
|
||||
kHorizontal,
|
||||
kVerticalGrowingLeft,
|
||||
kVerticalGrowingRight,
|
||||
};
|
||||
|
||||
enum class TextAlignment {
|
||||
/// Align the text at the start, based on the Unicode text direction.
|
||||
kStart,
|
||||
/// Align the text in the center of the box.
|
||||
kCenter,
|
||||
/// Align the text at the end, based on the Unicode text direction.
|
||||
kEnd,
|
||||
/// Align the text at the left side (or top for non-horizontal).
|
||||
kLeft,
|
||||
/// Align the text at the right side (or bottom for non-horizontal).
|
||||
kRight,
|
||||
};
|
||||
|
||||
struct TextNumber {
|
||||
TextNumber(float value, TextUnitType type) : value(value), type(type) {}
|
||||
|
||||
float value;
|
||||
TextUnitType type;
|
||||
};
|
||||
|
||||
struct TextSettings {
|
||||
// TODO(modmaker): Convert to generic structure.
|
||||
std::string settings;
|
||||
/// The line offset of the cue. For horizontal cues, this is the vertical
|
||||
/// offset. Percent units are relative to the window.
|
||||
base::Optional<TextNumber> line;
|
||||
/// The position offset of the cue. For horizontal cues, this is the
|
||||
/// horizontal offset. Percent units are relative to the window.
|
||||
base::Optional<TextNumber> position;
|
||||
/// The size of the space used to draw text. For horizontal cues, this is the
|
||||
/// width. Percent units are relative to the window.
|
||||
base::Optional<TextNumber> size;
|
||||
|
||||
/// The region to draw the cue in.
|
||||
std::string region;
|
||||
|
||||
/// The direction to draw text. This is also used to determine how cues are
|
||||
/// positioned within the region.
|
||||
WritingDirection writing_direction = WritingDirection::kHorizontal;
|
||||
/// How to align the text within the cue box.
|
||||
TextAlignment text_alignment = TextAlignment::kCenter;
|
||||
};
|
||||
|
||||
struct TextFragment {
|
||||
|
|
|
@ -6,10 +6,12 @@
|
|||
|
||||
#include "packager/media/formats/webvtt/webvtt_parser.h"
|
||||
|
||||
#include <regex>
|
||||
|
||||
#include "packager/base/logging.h"
|
||||
#include "packager/base/strings/string_number_conversions.h"
|
||||
#include "packager/base/strings/string_split.h"
|
||||
#include "packager/base/strings/string_util.h"
|
||||
#include "packager/media/base/text_sample.h"
|
||||
#include "packager/media/base/text_stream_info.h"
|
||||
#include "packager/media/formats/webvtt/webvtt_utils.h"
|
||||
|
||||
|
@ -75,6 +77,103 @@ bool IsLikelyRegion(const std::string& line) {
|
|||
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
|
||||
}
|
||||
|
||||
bool ParsePercent(const std::string& str, float* value) {
|
||||
// https://www.w3.org/TR/webvtt1/#webvtt-percentage
|
||||
// E.g. "4%" or "1.5%"
|
||||
std::regex re(R"((\d+(?:\.\d+)?)%)");
|
||||
std::smatch match;
|
||||
if (!std::regex_match(str, match, re)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
double temp;
|
||||
base::StringToDouble(match[1], &temp);
|
||||
if (temp >= 100) {
|
||||
return false;
|
||||
}
|
||||
*value = temp;
|
||||
return true;
|
||||
}
|
||||
|
||||
void ParseSettings(const std::string& id,
|
||||
const std::string& value,
|
||||
TextSettings* settings) {
|
||||
// https://www.w3.org/TR/webvtt1/#ref-for-parse-the-webvtt-cue-settings-1
|
||||
if (id == "region") {
|
||||
settings->region = value;
|
||||
} else if (id == "vertical") {
|
||||
if (value == "rl") {
|
||||
settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
|
||||
} else if (value == "lr") {
|
||||
settings->writing_direction = WritingDirection::kVerticalGrowingRight;
|
||||
} else {
|
||||
LOG(WARNING) << "Invalid WebVTT vertical setting: " << value;
|
||||
}
|
||||
} else if (id == "line") {
|
||||
const auto pos = value.find(',');
|
||||
const std::string line = value.substr(0, pos);
|
||||
const std::string align =
|
||||
pos != std::string::npos ? value.substr(pos + 1) : "";
|
||||
if (pos != std::string::npos) {
|
||||
LOG(WARNING) << "WebVTT line alignment isn't supported";
|
||||
}
|
||||
|
||||
if (!line.empty() && line[line.size() - 1] == '%') {
|
||||
float temp;
|
||||
if (!ParsePercent(line, &temp)) {
|
||||
LOG(WARNING) << "Invalid WebVTT line: " << value;
|
||||
return;
|
||||
}
|
||||
settings->line.emplace(temp, TextUnitType::kPercent);
|
||||
} else {
|
||||
double temp;
|
||||
if (!base::StringToDouble(line, &temp)) {
|
||||
LOG(WARNING) << "Invalid WebVTT line: " << value;
|
||||
return;
|
||||
}
|
||||
settings->line.emplace(temp, TextUnitType::kLines);
|
||||
}
|
||||
} else if (id == "position") {
|
||||
const auto pos = value.find(',');
|
||||
const std::string position = value.substr(0, pos);
|
||||
const std::string align =
|
||||
pos != std::string::npos ? value.substr(pos + 1) : "";
|
||||
if (pos != std::string::npos) {
|
||||
LOG(WARNING) << "WebVTT position alignment isn't supported";
|
||||
}
|
||||
|
||||
float temp;
|
||||
if (ParsePercent(position, &temp)) {
|
||||
settings->position.emplace(temp, TextUnitType::kPercent);
|
||||
} else {
|
||||
LOG(WARNING) << "Invalid WebVTT position: " << value;
|
||||
}
|
||||
} else if (id == "size") {
|
||||
float temp;
|
||||
if (ParsePercent(value, &temp)) {
|
||||
settings->size.emplace(temp, TextUnitType::kPercent);
|
||||
} else {
|
||||
LOG(WARNING) << "Invalid WebVTT size: " << value;
|
||||
}
|
||||
} else if (id == "align") {
|
||||
if (value == "start") {
|
||||
settings->text_alignment = TextAlignment::kStart;
|
||||
} else if (value == "center") {
|
||||
settings->text_alignment = TextAlignment::kCenter;
|
||||
} else if (value == "end") {
|
||||
settings->text_alignment = TextAlignment::kEnd;
|
||||
} else if (value == "left") {
|
||||
settings->text_alignment = TextAlignment::kLeft;
|
||||
} else if (value == "right") {
|
||||
settings->text_alignment = TextAlignment::kRight;
|
||||
} else {
|
||||
LOG(WARNING) << "Invalid WebVTT align: " << value;
|
||||
}
|
||||
} else {
|
||||
LOG(WARNING) << "Unknown WebVTT setting: " << id;
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
|
||||
if (!config->empty())
|
||||
*config += "\n\n";
|
||||
|
@ -235,16 +334,20 @@ bool WebVttParser::ParseCue(const std::string& id,
|
|||
return true;
|
||||
}
|
||||
|
||||
// The rest of time_and_style are the style tokens.
|
||||
TextSettings settings;
|
||||
for (size_t i = 3; i < time_and_style.size(); i++) {
|
||||
if (!settings.settings.empty()) {
|
||||
settings.settings += " ";
|
||||
const auto pos = time_and_style[i].find(':');
|
||||
if (pos == std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
settings.settings += time_and_style[i];
|
||||
|
||||
const std::string key = time_and_style[i].substr(0, pos);
|
||||
const std::string value = time_and_style[i].substr(pos + 1);
|
||||
ParseSettings(key, value, &settings);
|
||||
}
|
||||
|
||||
// The rest of the block is the payload.
|
||||
// TODO: Parse tags to support <b>, <i>, etc.
|
||||
TextFragment body;
|
||||
for (size_t i = 1; i < block_size; i++) {
|
||||
if (i > 1) {
|
||||
|
@ -253,7 +356,7 @@ bool WebVttParser::ParseCue(const std::string& id,
|
|||
body.body += block[i];
|
||||
}
|
||||
|
||||
auto sample =
|
||||
const auto sample =
|
||||
std::make_shared<TextSample>(id, start_time, end_time, settings, body);
|
||||
return new_text_sample_cb_.Run(kStreamIndex, sample);
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ const uint32_t kStreamId = 0;
|
|||
const uint32_t kTimeScale = 1000;
|
||||
|
||||
const char* kNoId = "";
|
||||
const char* kNoSettings = "";
|
||||
|
||||
std::string ToString(const std::vector<uint8_t>& v) {
|
||||
return std::string(v.begin(), v.end());
|
||||
|
@ -178,8 +177,16 @@ TEST_F(WebVttParserTest, ParseOneCue) {
|
|||
EXPECT_EQ(samples_[0]->id(), kNoId);
|
||||
EXPECT_EQ(samples_[0]->start_time(), 60000u);
|
||||
EXPECT_EQ(samples_[0]->duration(), 3540000u);
|
||||
EXPECT_EQ(samples_[0]->settings().settings, kNoSettings);
|
||||
EXPECT_EQ(samples_[0]->body().body, "subtitle");
|
||||
|
||||
// No settings
|
||||
const auto& settings = samples_[0]->settings();
|
||||
EXPECT_FALSE(settings.line);
|
||||
EXPECT_FALSE(settings.position);
|
||||
EXPECT_FALSE(settings.size);
|
||||
EXPECT_EQ(settings.region, "");
|
||||
EXPECT_EQ(settings.writing_direction, WritingDirection::kHorizontal);
|
||||
EXPECT_EQ(settings.text_alignment, TextAlignment::kCenter);
|
||||
}
|
||||
|
||||
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
|
||||
|
@ -300,7 +307,36 @@ TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
|
|||
|
||||
ASSERT_EQ(streams_.size(), 1u);
|
||||
ASSERT_EQ(samples_.size(), 1u);
|
||||
EXPECT_EQ(samples_[0]->settings().settings, "size:50%");
|
||||
ASSERT_TRUE(samples_[0]->settings().size);
|
||||
EXPECT_EQ(samples_[0]->settings().size->type, TextUnitType::kPercent);
|
||||
EXPECT_EQ(samples_[0]->settings().size->value, 50.0f);
|
||||
}
|
||||
|
||||
TEST_F(WebVttParserTest, ParseOneCueWithManySettings) {
|
||||
const uint8_t text[] =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"00:01:00.000 --> 01:00:00.000 line:5 vertical:lr region:foo"
|
||||
" align:right position:20%\n"
|
||||
"subtitle\n";
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
||||
|
||||
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
|
||||
ASSERT_TRUE(parser_->Flush());
|
||||
|
||||
ASSERT_EQ(streams_.size(), 1u);
|
||||
ASSERT_EQ(samples_.size(), 1u);
|
||||
EXPECT_EQ(samples_[0]->settings().writing_direction,
|
||||
WritingDirection::kVerticalGrowingRight);
|
||||
EXPECT_EQ(samples_[0]->settings().text_alignment, TextAlignment::kRight);
|
||||
EXPECT_FALSE(samples_[0]->settings().size);
|
||||
ASSERT_TRUE(samples_[0]->settings().position);
|
||||
EXPECT_EQ(samples_[0]->settings().position->type, TextUnitType::kPercent);
|
||||
EXPECT_EQ(samples_[0]->settings().position->value, 20.0f);
|
||||
ASSERT_TRUE(samples_[0]->settings().line);
|
||||
EXPECT_EQ(samples_[0]->settings().line->type, TextUnitType::kLines);
|
||||
EXPECT_EQ(samples_[0]->settings().line->value, 5.0f);
|
||||
}
|
||||
|
||||
// Verify that a typical case with mulitple cues work.
|
||||
|
|
|
@ -85,7 +85,75 @@ std::string MsToWebVttTimestamp(uint64_t ms) {
|
|||
}
|
||||
|
||||
std::string WebVttSettingsToString(const TextSettings& settings) {
|
||||
return settings.settings;
|
||||
std::string ret;
|
||||
if (!settings.region.empty()) {
|
||||
ret += " region:";
|
||||
ret += settings.region;
|
||||
}
|
||||
if (settings.line) {
|
||||
switch (settings.line->type) {
|
||||
case TextUnitType::kPercent:
|
||||
ret += " line:";
|
||||
ret += base::DoubleToString(settings.line->value);
|
||||
ret += "%";
|
||||
break;
|
||||
case TextUnitType::kLines:
|
||||
ret += " line:";
|
||||
ret += base::DoubleToString(settings.line->value);
|
||||
break;
|
||||
case TextUnitType::kPixels:
|
||||
LOG(WARNING) << "WebVTT doesn't support pixel line settings";
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (settings.position) {
|
||||
if (settings.position->type == TextUnitType::kPercent) {
|
||||
ret += " position:";
|
||||
ret += base::DoubleToString(settings.position->value);
|
||||
ret += "%";
|
||||
} else {
|
||||
LOG(WARNING) << "WebVTT only supports percent position settings";
|
||||
}
|
||||
}
|
||||
if (settings.size) {
|
||||
if (settings.size->type == TextUnitType::kPercent) {
|
||||
ret += " size:";
|
||||
ret += base::DoubleToString(settings.size->value);
|
||||
ret += "%";
|
||||
} else {
|
||||
LOG(WARNING) << "WebVTT only supports percent size settings";
|
||||
}
|
||||
}
|
||||
if (settings.writing_direction != WritingDirection::kHorizontal) {
|
||||
ret += " direction:";
|
||||
if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) {
|
||||
ret += "rl";
|
||||
} else {
|
||||
ret += "lr";
|
||||
}
|
||||
}
|
||||
switch (settings.text_alignment) {
|
||||
case TextAlignment::kStart:
|
||||
ret += " align:start";
|
||||
break;
|
||||
case TextAlignment::kEnd:
|
||||
ret += " align:end";
|
||||
break;
|
||||
case TextAlignment::kLeft:
|
||||
ret += " align:left";
|
||||
break;
|
||||
case TextAlignment::kRight:
|
||||
ret += " align:right";
|
||||
break;
|
||||
case TextAlignment::kCenter:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret.empty()) {
|
||||
DCHECK_EQ(ret[0], ' ');
|
||||
ret.erase(0, 1);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string WebVttFragmentToString(const TextFragment& fragment) {
|
||||
|
|
|
@ -127,5 +127,28 @@ TEST(WebVttTimestampTest, CreateHoursShort) {
|
|||
TEST(WebVttTimestampTest, CreateHoursLong) {
|
||||
EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000));
|
||||
}
|
||||
|
||||
TEST(WebVttUtilsTest, SettingsToString) {
|
||||
TextSettings settings;
|
||||
settings.region = "foo";
|
||||
settings.line = TextNumber(27, TextUnitType::kPercent);
|
||||
settings.position = TextNumber(42, TextUnitType::kPercent);
|
||||
settings.size = TextNumber(54, TextUnitType::kPercent);
|
||||
settings.writing_direction = WritingDirection::kVerticalGrowingLeft;
|
||||
settings.text_alignment = TextAlignment::kEnd;
|
||||
|
||||
const auto actual = WebVttSettingsToString(settings);
|
||||
EXPECT_EQ(actual,
|
||||
"region:foo line:27% position:42% size:54% direction:rl align:end");
|
||||
}
|
||||
|
||||
TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) {
|
||||
TextSettings settings;
|
||||
settings.region = "foo";
|
||||
|
||||
const auto actual = WebVttSettingsToString(settings);
|
||||
EXPECT_EQ(actual, "region:foo");
|
||||
}
|
||||
|
||||
} // namespace media
|
||||
} // namespace shaka
|
||||
|
|
Loading…
Reference in New Issue