Parse TextSettings from WebVTT.
Now the Cue settings are a generic object that is parsed in WebVTT. This will allow setting the settings in different parsers without having to use WebVTT-specifics. Change-Id: I36689bec725bd2e515af962b7174fc5977f96fa2
This commit is contained in:
parent
c169c83613
commit
414f4589c8
|
@ -10,13 +10,67 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "packager/base/optional.h"
|
||||||
|
|
||||||
namespace shaka {
|
namespace shaka {
|
||||||
namespace media {
|
namespace media {
|
||||||
|
|
||||||
|
enum class TextUnitType {
|
||||||
|
/// The units are absolute units in pixels.
|
||||||
|
kPixels,
|
||||||
|
/// The units are absolute units in number of lines.
|
||||||
|
kLines,
|
||||||
|
/// The units are relative to some size, in percent (i.e. 0-100).
|
||||||
|
kPercent,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class WritingDirection {
|
||||||
|
kHorizontal,
|
||||||
|
kVerticalGrowingLeft,
|
||||||
|
kVerticalGrowingRight,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class TextAlignment {
|
||||||
|
/// Align the text at the start, based on the Unicode text direction.
|
||||||
|
kStart,
|
||||||
|
/// Align the text in the center of the box.
|
||||||
|
kCenter,
|
||||||
|
/// Align the text at the end, based on the Unicode text direction.
|
||||||
|
kEnd,
|
||||||
|
/// Align the text at the left side (or top for non-horizontal).
|
||||||
|
kLeft,
|
||||||
|
/// Align the text at the right side (or bottom for non-horizontal).
|
||||||
|
kRight,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TextNumber {
|
||||||
|
TextNumber(float value, TextUnitType type) : value(value), type(type) {}
|
||||||
|
|
||||||
|
float value;
|
||||||
|
TextUnitType type;
|
||||||
|
};
|
||||||
|
|
||||||
struct TextSettings {
|
struct TextSettings {
|
||||||
// TODO(modmaker): Convert to generic structure.
|
/// The line offset of the cue. For horizontal cues, this is the vertical
|
||||||
std::string settings;
|
/// offset. Percent units are relative to the window.
|
||||||
|
base::Optional<TextNumber> line;
|
||||||
|
/// The position offset of the cue. For horizontal cues, this is the
|
||||||
|
/// horizontal offset. Percent units are relative to the window.
|
||||||
|
base::Optional<TextNumber> position;
|
||||||
|
/// The size of the space used to draw text. For horizontal cues, this is the
|
||||||
|
/// width. Percent units are relative to the window.
|
||||||
|
base::Optional<TextNumber> size;
|
||||||
|
|
||||||
|
/// The region to draw the cue in.
|
||||||
|
std::string region;
|
||||||
|
|
||||||
|
/// The direction to draw text. This is also used to determine how cues are
|
||||||
|
/// positioned within the region.
|
||||||
|
WritingDirection writing_direction = WritingDirection::kHorizontal;
|
||||||
|
/// How to align the text within the cue box.
|
||||||
|
TextAlignment text_alignment = TextAlignment::kCenter;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextFragment {
|
struct TextFragment {
|
||||||
|
|
|
@ -6,10 +6,12 @@
|
||||||
|
|
||||||
#include "packager/media/formats/webvtt/webvtt_parser.h"
|
#include "packager/media/formats/webvtt/webvtt_parser.h"
|
||||||
|
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
#include "packager/base/logging.h"
|
#include "packager/base/logging.h"
|
||||||
|
#include "packager/base/strings/string_number_conversions.h"
|
||||||
#include "packager/base/strings/string_split.h"
|
#include "packager/base/strings/string_split.h"
|
||||||
#include "packager/base/strings/string_util.h"
|
#include "packager/base/strings/string_util.h"
|
||||||
#include "packager/media/base/text_sample.h"
|
|
||||||
#include "packager/media/base/text_stream_info.h"
|
#include "packager/media/base/text_stream_info.h"
|
||||||
#include "packager/media/formats/webvtt/webvtt_utils.h"
|
#include "packager/media/formats/webvtt/webvtt_utils.h"
|
||||||
|
|
||||||
|
@ -75,6 +77,103 @@ bool IsLikelyRegion(const std::string& line) {
|
||||||
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
|
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ParsePercent(const std::string& str, float* value) {
|
||||||
|
// https://www.w3.org/TR/webvtt1/#webvtt-percentage
|
||||||
|
// E.g. "4%" or "1.5%"
|
||||||
|
std::regex re(R"((\d+(?:\.\d+)?)%)");
|
||||||
|
std::smatch match;
|
||||||
|
if (!std::regex_match(str, match, re)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
double temp;
|
||||||
|
base::StringToDouble(match[1], &temp);
|
||||||
|
if (temp >= 100) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*value = temp;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ParseSettings(const std::string& id,
|
||||||
|
const std::string& value,
|
||||||
|
TextSettings* settings) {
|
||||||
|
// https://www.w3.org/TR/webvtt1/#ref-for-parse-the-webvtt-cue-settings-1
|
||||||
|
if (id == "region") {
|
||||||
|
settings->region = value;
|
||||||
|
} else if (id == "vertical") {
|
||||||
|
if (value == "rl") {
|
||||||
|
settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
|
||||||
|
} else if (value == "lr") {
|
||||||
|
settings->writing_direction = WritingDirection::kVerticalGrowingRight;
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT vertical setting: " << value;
|
||||||
|
}
|
||||||
|
} else if (id == "line") {
|
||||||
|
const auto pos = value.find(',');
|
||||||
|
const std::string line = value.substr(0, pos);
|
||||||
|
const std::string align =
|
||||||
|
pos != std::string::npos ? value.substr(pos + 1) : "";
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
LOG(WARNING) << "WebVTT line alignment isn't supported";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!line.empty() && line[line.size() - 1] == '%') {
|
||||||
|
float temp;
|
||||||
|
if (!ParsePercent(line, &temp)) {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT line: " << value;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
settings->line.emplace(temp, TextUnitType::kPercent);
|
||||||
|
} else {
|
||||||
|
double temp;
|
||||||
|
if (!base::StringToDouble(line, &temp)) {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT line: " << value;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
settings->line.emplace(temp, TextUnitType::kLines);
|
||||||
|
}
|
||||||
|
} else if (id == "position") {
|
||||||
|
const auto pos = value.find(',');
|
||||||
|
const std::string position = value.substr(0, pos);
|
||||||
|
const std::string align =
|
||||||
|
pos != std::string::npos ? value.substr(pos + 1) : "";
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
LOG(WARNING) << "WebVTT position alignment isn't supported";
|
||||||
|
}
|
||||||
|
|
||||||
|
float temp;
|
||||||
|
if (ParsePercent(position, &temp)) {
|
||||||
|
settings->position.emplace(temp, TextUnitType::kPercent);
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT position: " << value;
|
||||||
|
}
|
||||||
|
} else if (id == "size") {
|
||||||
|
float temp;
|
||||||
|
if (ParsePercent(value, &temp)) {
|
||||||
|
settings->size.emplace(temp, TextUnitType::kPercent);
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT size: " << value;
|
||||||
|
}
|
||||||
|
} else if (id == "align") {
|
||||||
|
if (value == "start") {
|
||||||
|
settings->text_alignment = TextAlignment::kStart;
|
||||||
|
} else if (value == "center") {
|
||||||
|
settings->text_alignment = TextAlignment::kCenter;
|
||||||
|
} else if (value == "end") {
|
||||||
|
settings->text_alignment = TextAlignment::kEnd;
|
||||||
|
} else if (value == "left") {
|
||||||
|
settings->text_alignment = TextAlignment::kLeft;
|
||||||
|
} else if (value == "right") {
|
||||||
|
settings->text_alignment = TextAlignment::kRight;
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Invalid WebVTT align: " << value;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "Unknown WebVTT setting: " << id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
|
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
|
||||||
if (!config->empty())
|
if (!config->empty())
|
||||||
*config += "\n\n";
|
*config += "\n\n";
|
||||||
|
@ -235,16 +334,20 @@ bool WebVttParser::ParseCue(const std::string& id,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The rest of time_and_style are the style tokens.
|
|
||||||
TextSettings settings;
|
TextSettings settings;
|
||||||
for (size_t i = 3; i < time_and_style.size(); i++) {
|
for (size_t i = 3; i < time_and_style.size(); i++) {
|
||||||
if (!settings.settings.empty()) {
|
const auto pos = time_and_style[i].find(':');
|
||||||
settings.settings += " ";
|
if (pos == std::string::npos) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
settings.settings += time_and_style[i];
|
|
||||||
|
const std::string key = time_and_style[i].substr(0, pos);
|
||||||
|
const std::string value = time_and_style[i].substr(pos + 1);
|
||||||
|
ParseSettings(key, value, &settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The rest of the block is the payload.
|
// The rest of the block is the payload.
|
||||||
|
// TODO: Parse tags to support <b>, <i>, etc.
|
||||||
TextFragment body;
|
TextFragment body;
|
||||||
for (size_t i = 1; i < block_size; i++) {
|
for (size_t i = 1; i < block_size; i++) {
|
||||||
if (i > 1) {
|
if (i > 1) {
|
||||||
|
@ -253,7 +356,7 @@ bool WebVttParser::ParseCue(const std::string& id,
|
||||||
body.body += block[i];
|
body.body += block[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
auto sample =
|
const auto sample =
|
||||||
std::make_shared<TextSample>(id, start_time, end_time, settings, body);
|
std::make_shared<TextSample>(id, start_time, end_time, settings, body);
|
||||||
return new_text_sample_cb_.Run(kStreamIndex, sample);
|
return new_text_sample_cb_.Run(kStreamIndex, sample);
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,6 @@ const uint32_t kStreamId = 0;
|
||||||
const uint32_t kTimeScale = 1000;
|
const uint32_t kTimeScale = 1000;
|
||||||
|
|
||||||
const char* kNoId = "";
|
const char* kNoId = "";
|
||||||
const char* kNoSettings = "";
|
|
||||||
|
|
||||||
std::string ToString(const std::vector<uint8_t>& v) {
|
std::string ToString(const std::vector<uint8_t>& v) {
|
||||||
return std::string(v.begin(), v.end());
|
return std::string(v.begin(), v.end());
|
||||||
|
@ -178,8 +177,16 @@ TEST_F(WebVttParserTest, ParseOneCue) {
|
||||||
EXPECT_EQ(samples_[0]->id(), kNoId);
|
EXPECT_EQ(samples_[0]->id(), kNoId);
|
||||||
EXPECT_EQ(samples_[0]->start_time(), 60000u);
|
EXPECT_EQ(samples_[0]->start_time(), 60000u);
|
||||||
EXPECT_EQ(samples_[0]->duration(), 3540000u);
|
EXPECT_EQ(samples_[0]->duration(), 3540000u);
|
||||||
EXPECT_EQ(samples_[0]->settings().settings, kNoSettings);
|
|
||||||
EXPECT_EQ(samples_[0]->body().body, "subtitle");
|
EXPECT_EQ(samples_[0]->body().body, "subtitle");
|
||||||
|
|
||||||
|
// No settings
|
||||||
|
const auto& settings = samples_[0]->settings();
|
||||||
|
EXPECT_FALSE(settings.line);
|
||||||
|
EXPECT_FALSE(settings.position);
|
||||||
|
EXPECT_FALSE(settings.size);
|
||||||
|
EXPECT_EQ(settings.region, "");
|
||||||
|
EXPECT_EQ(settings.writing_direction, WritingDirection::kHorizontal);
|
||||||
|
EXPECT_EQ(settings.text_alignment, TextAlignment::kCenter);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
|
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
|
||||||
|
@ -300,7 +307,36 @@ TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
|
||||||
|
|
||||||
ASSERT_EQ(streams_.size(), 1u);
|
ASSERT_EQ(streams_.size(), 1u);
|
||||||
ASSERT_EQ(samples_.size(), 1u);
|
ASSERT_EQ(samples_.size(), 1u);
|
||||||
EXPECT_EQ(samples_[0]->settings().settings, "size:50%");
|
ASSERT_TRUE(samples_[0]->settings().size);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().size->type, TextUnitType::kPercent);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().size->value, 50.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(WebVttParserTest, ParseOneCueWithManySettings) {
|
||||||
|
const uint8_t text[] =
|
||||||
|
"WEBVTT\n"
|
||||||
|
"\n"
|
||||||
|
"00:01:00.000 --> 01:00:00.000 line:5 vertical:lr region:foo"
|
||||||
|
" align:right position:20%\n"
|
||||||
|
"subtitle\n";
|
||||||
|
|
||||||
|
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
|
||||||
|
|
||||||
|
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
|
||||||
|
ASSERT_TRUE(parser_->Flush());
|
||||||
|
|
||||||
|
ASSERT_EQ(streams_.size(), 1u);
|
||||||
|
ASSERT_EQ(samples_.size(), 1u);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().writing_direction,
|
||||||
|
WritingDirection::kVerticalGrowingRight);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().text_alignment, TextAlignment::kRight);
|
||||||
|
EXPECT_FALSE(samples_[0]->settings().size);
|
||||||
|
ASSERT_TRUE(samples_[0]->settings().position);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().position->type, TextUnitType::kPercent);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().position->value, 20.0f);
|
||||||
|
ASSERT_TRUE(samples_[0]->settings().line);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().line->type, TextUnitType::kLines);
|
||||||
|
EXPECT_EQ(samples_[0]->settings().line->value, 5.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify that a typical case with mulitple cues work.
|
// Verify that a typical case with mulitple cues work.
|
||||||
|
|
|
@ -85,7 +85,75 @@ std::string MsToWebVttTimestamp(uint64_t ms) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string WebVttSettingsToString(const TextSettings& settings) {
|
std::string WebVttSettingsToString(const TextSettings& settings) {
|
||||||
return settings.settings;
|
std::string ret;
|
||||||
|
if (!settings.region.empty()) {
|
||||||
|
ret += " region:";
|
||||||
|
ret += settings.region;
|
||||||
|
}
|
||||||
|
if (settings.line) {
|
||||||
|
switch (settings.line->type) {
|
||||||
|
case TextUnitType::kPercent:
|
||||||
|
ret += " line:";
|
||||||
|
ret += base::DoubleToString(settings.line->value);
|
||||||
|
ret += "%";
|
||||||
|
break;
|
||||||
|
case TextUnitType::kLines:
|
||||||
|
ret += " line:";
|
||||||
|
ret += base::DoubleToString(settings.line->value);
|
||||||
|
break;
|
||||||
|
case TextUnitType::kPixels:
|
||||||
|
LOG(WARNING) << "WebVTT doesn't support pixel line settings";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (settings.position) {
|
||||||
|
if (settings.position->type == TextUnitType::kPercent) {
|
||||||
|
ret += " position:";
|
||||||
|
ret += base::DoubleToString(settings.position->value);
|
||||||
|
ret += "%";
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "WebVTT only supports percent position settings";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (settings.size) {
|
||||||
|
if (settings.size->type == TextUnitType::kPercent) {
|
||||||
|
ret += " size:";
|
||||||
|
ret += base::DoubleToString(settings.size->value);
|
||||||
|
ret += "%";
|
||||||
|
} else {
|
||||||
|
LOG(WARNING) << "WebVTT only supports percent size settings";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (settings.writing_direction != WritingDirection::kHorizontal) {
|
||||||
|
ret += " direction:";
|
||||||
|
if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) {
|
||||||
|
ret += "rl";
|
||||||
|
} else {
|
||||||
|
ret += "lr";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch (settings.text_alignment) {
|
||||||
|
case TextAlignment::kStart:
|
||||||
|
ret += " align:start";
|
||||||
|
break;
|
||||||
|
case TextAlignment::kEnd:
|
||||||
|
ret += " align:end";
|
||||||
|
break;
|
||||||
|
case TextAlignment::kLeft:
|
||||||
|
ret += " align:left";
|
||||||
|
break;
|
||||||
|
case TextAlignment::kRight:
|
||||||
|
ret += " align:right";
|
||||||
|
break;
|
||||||
|
case TextAlignment::kCenter:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ret.empty()) {
|
||||||
|
DCHECK_EQ(ret[0], ' ');
|
||||||
|
ret.erase(0, 1);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string WebVttFragmentToString(const TextFragment& fragment) {
|
std::string WebVttFragmentToString(const TextFragment& fragment) {
|
||||||
|
|
|
@ -127,5 +127,28 @@ TEST(WebVttTimestampTest, CreateHoursShort) {
|
||||||
TEST(WebVttTimestampTest, CreateHoursLong) {
|
TEST(WebVttTimestampTest, CreateHoursLong) {
|
||||||
EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000));
|
EXPECT_EQ("123:00:00.000", MsToWebVttTimestamp(442800000));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(WebVttUtilsTest, SettingsToString) {
|
||||||
|
TextSettings settings;
|
||||||
|
settings.region = "foo";
|
||||||
|
settings.line = TextNumber(27, TextUnitType::kPercent);
|
||||||
|
settings.position = TextNumber(42, TextUnitType::kPercent);
|
||||||
|
settings.size = TextNumber(54, TextUnitType::kPercent);
|
||||||
|
settings.writing_direction = WritingDirection::kVerticalGrowingLeft;
|
||||||
|
settings.text_alignment = TextAlignment::kEnd;
|
||||||
|
|
||||||
|
const auto actual = WebVttSettingsToString(settings);
|
||||||
|
EXPECT_EQ(actual,
|
||||||
|
"region:foo line:27% position:42% size:54% direction:rl align:end");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(WebVttUtilsTest, SettingsToString_IgnoresDefaults) {
|
||||||
|
TextSettings settings;
|
||||||
|
settings.region = "foo";
|
||||||
|
|
||||||
|
const auto actual = WebVttSettingsToString(settings);
|
||||||
|
EXPECT_EQ(actual, "region:foo");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace media
|
} // namespace media
|
||||||
} // namespace shaka
|
} // namespace shaka
|
||||||
|
|
Loading…
Reference in New Issue