Parse WebVTT regions and styles.

This adds more generic settings for regions and CSS styles.  These are
global settings, so they go on the StreamInfo object.

Change-Id: Ibb76c060206152ccf8e9a067c09877226f67c927
This commit is contained in:
Jacob Trimble 2020-08-26 14:21:09 -07:00
parent 4a777b6207
commit 10e71680a1
9 changed files with 361 additions and 43 deletions

View File

@ -8,12 +8,38 @@
#define PACKAGER_MEDIA_BASE_TEXT_STREAM_INFO_H_
#include "packager/media/base/stream_info.h"
#include "packager/media/base/text_sample.h"
#include <map>
#include <string>
namespace shaka {
namespace media {
struct TextRegion {
/// The width of the region; percent units are relative to the window.
TextNumber width{100, TextUnitType::kPercent};
/// The height of the region; percent units are relative to the window.
TextNumber height{100, TextUnitType::kPercent};
/// The x and y coordinates of the anchor point within the window. Percent
/// units are relative to the window. In WebVTT this is called the
/// "viewport region anchor".
TextNumber window_anchor_x{0, TextUnitType::kPercent};
TextNumber window_anchor_y{0, TextUnitType::kPercent};
/// The x and y coordinates of the anchor point within the region. Percent
/// units are relative to the region size. For example: if this is
/// (100, 100), then the bottom right of the region should be placed at the
/// window anchor point.
/// See https://www.w3.org/TR/webvtt1/#regions.
TextNumber region_anchor_x{0, TextUnitType::kPercent};
TextNumber region_anchor_y{0, TextUnitType::kPercent};
/// If true, cues are scrolled up when adding new cues; if false, cues are
/// added above existing cues or replace existing ones.
bool scroll = false;
};
class TextStreamInfo : public StreamInfo {
public:
/// No encryption supported.
@ -42,8 +68,16 @@ class TextStreamInfo : public StreamInfo {
uint16_t width() const { return width_; }
uint16_t height() const { return height_; }
const std::map<std::string, TextRegion>& regions() const { return regions_; }
void AddRegion(const std::string& id, const TextRegion& region) {
regions_[id] = region;
}
const std::string& css_styles() const { return css_styles_; }
void set_css_styles(const std::string& styles) { css_styles_ = styles; }
private:
std::map<std::string, TextRegion> regions_;
std::string css_styles_;
uint16_t width_;
uint16_t height_;

View File

@ -579,7 +579,7 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
webvtt.config.config = "WEBVTT";
// The spec does not define a way to carry STYLE and REGION information in
// the mp4 container.
if (!text_info->codec_config().empty()) {
if (!text_info->regions().empty() || !text_info->css_styles().empty()) {
LOG(INFO) << "Skipping possible style / region configuration as the spec "
"does not define a way to carry them inside ISO-BMFF files.";
}

View File

@ -7,10 +7,12 @@
#include "packager/media/formats/webvtt/webvtt_muxer.h"
#include <memory>
#include <regex>
#include "packager/file/file.h"
#include "packager/file/file_closer.h"
#include "packager/media/base/muxer_util.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_utils.h"
#include "packager/status_macros.h"
@ -18,20 +20,13 @@ namespace shaka {
namespace media {
namespace webvtt {
namespace {
std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
} // namespace
WebVttMuxer::WebVttMuxer(const MuxerOptions& options) : Muxer(options) {}
WebVttMuxer::~WebVttMuxer() {}
Status WebVttMuxer::InitializeMuxer() {
if (streams().size() != 1) {
return Status(error::MUXER_FAILURE, "Incorrect number of streams");
if (streams().size() != 1 || streams()[0]->stream_type() != kStreamText) {
return Status(error::MUXER_FAILURE,
"Incorrect streams given to WebVTT muxer");
}
// Only initialize the stream once we see a cue to avoid empty files.
@ -39,9 +34,10 @@ Status WebVttMuxer::InitializeMuxer() {
streams()[0]->time_scale(),
MuxerListener::kContainerText);
buffer_.reset(
new WebVttFileBuffer(options().transport_stream_timestamp_offset_ms,
ToString(streams()[0]->codec_config())));
auto* stream = static_cast<const TextStreamInfo*>(streams()[0].get());
const std::string preamble = WebVttGetPreamble(*stream);
buffer_.reset(new WebVttFileBuffer(
options().transport_stream_timestamp_offset_ms, preamble));
last_cue_ms_ = 0;
return Status::OK;

View File

@ -95,6 +95,24 @@ bool ParsePercent(const std::string& str, float* value) {
return true;
}
bool ParseDoublePercent(const std::string& str, float* a, float* b) {
std::regex re(R"((\d+(?:\.\d+)?)%,(\d+(?:\.\d+)?)%)");
std::smatch match;
if (!std::regex_match(str, match, re)) {
return false;
}
double tempA, tempB;
base::StringToDouble(match[1], &tempA);
base::StringToDouble(match[2], &tempB);
if (tempA >= 100 || tempB >= 100) {
return false;
}
*a = tempA;
*b = tempB;
return true;
}
void ParseSettings(const std::string& id,
const std::string& value,
TextSettings* settings) {
@ -174,12 +192,6 @@ void ParseSettings(const std::string& id,
}
}
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
if (!config->empty())
*config += "\n\n";
*config += base::JoinString(block, "\n");
}
} // namespace
WebVttParser::WebVttParser() {}
@ -250,7 +262,11 @@ bool WebVttParser::ParseBlock(const std::vector<std::string>& block) {
LOG(WARNING)
<< "Found style block after seeing cue. Ignoring style block";
} else {
UpdateConfig(block, &style_region_config_);
for (size_t i = 1; i < block.size(); i++) {
if (!css_styles_.empty())
css_styles_ += "\n";
css_styles_ += block[i];
}
}
return true;
}
@ -260,10 +276,10 @@ bool WebVttParser::ParseBlock(const std::vector<std::string>& block) {
if (saw_cue_) {
LOG(WARNING)
<< "Found region block after seeing cue. Ignoring region block";
} else {
UpdateConfig(block, &style_region_config_);
}
return true;
} else {
return ParseRegion(block);
}
}
// CUE with ID
@ -284,6 +300,93 @@ bool WebVttParser::ParseBlock(const std::vector<std::string>& block) {
return false;
}
bool WebVttParser::ParseRegion(const std::vector<std::string>& block) {
TextRegion region;
std::string region_id;
// Fill in defaults. Some may already be this, but set them anyway.
// See https://www.w3.org/TR/webvtt1/#regions
region.width.value = 100;
region.width.type = TextUnitType::kPercent;
region.height.value = 3;
region.height.type = TextUnitType::kLines;
region.window_anchor_x.value = 0;
region.window_anchor_x.type = TextUnitType::kPercent;
region.window_anchor_y.value = 100;
region.window_anchor_y.type = TextUnitType::kPercent;
region.region_anchor_x.value = 0;
region.region_anchor_x.type = TextUnitType::kPercent;
region.region_anchor_y.value = 100;
region.region_anchor_y.type = TextUnitType::kPercent;
bool first = true;
for (const auto& line : block) {
// First line is "REGION", skip.
if (first) {
first = false;
continue;
}
base::StringPairs pairs;
if (!base::SplitStringIntoKeyValuePairs(line, ':', ' ', &pairs)) {
LOG(ERROR) << "Invalid WebVTT settings: " << line;
return false;
}
for (const auto& pair : pairs) {
const std::string& value = pair.second;
if (pair.first == "id") {
if (value.find("-->") != std::string::npos) {
LOG(ERROR) << "Invalid WebVTT REGION ID: " << value;
return false;
}
if (regions_.find(value) != regions_.end()) {
LOG(ERROR) << "Duplicate WebVTT REGION: " << value;
return false;
}
region_id = value;
} else if (pair.first == "width") {
if (!ParsePercent(value, &region.width.value)) {
LOG(ERROR) << "Invalid WebVTT REGION width: " << value;
return false;
}
} else if (pair.first == "lines") {
unsigned int temp;
if (!base::StringToUint(value, &temp)) {
LOG(ERROR) << "Invalid WebVTT REGION lines: " << value;
return false;
}
region.height.value = temp;
} else if (pair.first == "regionanchor") {
if (!ParseDoublePercent(value, &region.region_anchor_x.value,
&region.region_anchor_y.value)) {
LOG(ERROR) << "Invalid WebVTT REGION regionanchor: " << value;
return false;
}
} else if (pair.first == "viewportanchor") {
if (!ParseDoublePercent(value, &region.window_anchor_x.value,
&region.window_anchor_y.value)) {
LOG(ERROR) << "Invalid WebVTT REGION windowanchor: " << value;
return false;
}
} else if (pair.first == "scroll") {
if (value != "up") {
LOG(ERROR) << "Invalid WebVTT REGION scroll: " << value;
return false;
}
region.scroll = true;
} else {
LOG(ERROR) << "Unknown WebVTT REGION setting: " << pair.first;
return false;
}
}
}
if (region_id.empty()) {
LOG(ERROR) << "WebVTT REGION id is required";
return false;
}
regions_.insert(std::make_pair(region_id, std::move(region)));
return true;
}
bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
return ParseCue("", block.data(), block.size());
}
@ -378,10 +481,14 @@ void WebVttParser::DispatchTextStreamInfo() {
// The language of the stream will be overwritten by the Demuxer later.
const char kNoLanguage[] = "";
std::vector<std::shared_ptr<StreamInfo>> streams;
streams.emplace_back(std::make_shared<TextStreamInfo>(
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
style_region_config_, kNoWidth, kNoHeight, kNoLanguage));
const auto stream = std::make_shared<TextStreamInfo>(
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString, "",
kNoWidth, kNoHeight, kNoLanguage);
stream->set_css_styles(css_styles_);
for (const auto& pair : regions_)
stream->AddRegion(pair.first, pair.second);
std::vector<std::shared_ptr<StreamInfo>> streams{stream};
init_cb_.Run(streams);
}

View File

@ -7,10 +7,13 @@
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#include <map>
#include <string>
#include <vector>
#include "packager/media/base/media_parser.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/text_readers.h"
namespace shaka {
@ -31,6 +34,7 @@ class WebVttParser : public MediaParser {
private:
bool Parse();
bool ParseBlock(const std::vector<std::string>& block);
bool ParseRegion(const std::vector<std::string>& block);
bool ParseCueWithNoId(const std::vector<std::string>& block);
bool ParseCueWithId(const std::vector<std::string>& block);
bool ParseCue(const std::string& id,
@ -43,7 +47,8 @@ class WebVttParser : public MediaParser {
NewTextSampleCB new_text_sample_cb_;
BlockReader reader_;
std::string style_region_config_;
std::map<std::string, TextRegion> regions_;
std::string css_styles_;
bool saw_cue_ = false;
bool stream_info_dispatched_ = false;
bool initialized_ = false;

View File

@ -4,12 +4,13 @@
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include <gtest/gtest.h>
#include "packager/base/bind.h"
#include "packager/media/base/stream_info.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
namespace shaka {
namespace media {
@ -20,10 +21,6 @@ const uint32_t kTimeScale = 1000;
const char* kNoId = "";
std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
void ExpectNoStyle(const TextFragmentStyle& style) {
EXPECT_FALSE(style.underline);
EXPECT_FALSE(style.bold);
@ -210,7 +207,7 @@ TEST_F(WebVttParserTest, ParseOneCue) {
EXPECT_EQ(settings.text_alignment, TextAlignment::kCenter);
}
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
TEST_F(WebVttParserTest, ParseOneCueWithStyle) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
@ -219,7 +216,7 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
"\n"
"REGION\n"
"id:scroll\n"
"scrol:up\n"
"scroll:up\n"
"\n"
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
@ -231,14 +228,9 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
auto* stream = static_cast<const TextStreamInfo*>(streams_[0].get());
EXPECT_EQ(ToString(streams_[0]->codec_config()),
"STYLE\n"
"::cue { color:lime }\n"
"\n"
"REGION\n"
"id:scroll\n"
"scrol:up");
EXPECT_EQ(stream->css_styles(), "::cue { color:lime }");
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
@ -314,7 +306,7 @@ TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) {
ExpectPlainCueWithBody(samples_[0]->body(), "");
}
TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
TEST_F(WebVttParserTest, ParseSettingSize) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
@ -360,6 +352,46 @@ TEST_F(WebVttParserTest, ParseOneCueWithManySettings) {
EXPECT_EQ(samples_[0]->settings().line->value, 5.0f);
}
TEST_F(WebVttParserTest, ParseRegions) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
"REGION\n"
"id:foo\n"
"width:20%\n"
"lines:6\n"
"viewportanchor:25%,75%\n"
"scroll:up\n"
"\n"
"00:01:00.000 --> 01:00:00.000 region:foo\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
auto* stream = static_cast<const TextStreamInfo*>(streams_[0].get());
const auto& regions = stream->regions();
ASSERT_EQ(regions.size(), 1u);
ASSERT_EQ(regions.count("foo"), 1u);
EXPECT_EQ(samples_[0]->settings().region, "foo");
const auto& region = regions.at("foo");
EXPECT_EQ(region.width.value, 20.0f);
EXPECT_EQ(region.width.type, TextUnitType::kPercent);
EXPECT_EQ(region.height.value, 6.0f);
EXPECT_EQ(region.height.type, TextUnitType::kLines);
EXPECT_EQ(region.window_anchor_x.value, 25.0f);
EXPECT_EQ(region.window_anchor_x.type, TextUnitType::kPercent);
EXPECT_EQ(region.window_anchor_y.value, 75.0f);
EXPECT_EQ(region.window_anchor_y.type, TextUnitType::kPercent);
EXPECT_TRUE(region.scroll);
}
// Verify that a typical case with mulitple cues work.
TEST_F(WebVttParserTest, ParseMultipleCues) {
const uint8_t text[] =

View File

@ -10,6 +10,7 @@
#include <inttypes.h>
#include <regex>
#include <unordered_set>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
@ -260,5 +261,49 @@ std::string WebVttFragmentToString(const TextFragment& fragment) {
return WriteFragment(fragment, &tags);
}
std::string WebVttGetPreamble(const TextStreamInfo& stream_info) {
std::string ret;
for (const auto& pair : stream_info.regions()) {
if (!ret.empty()) {
ret += "\n\n";
}
if (pair.second.width.type != TextUnitType::kPercent ||
pair.second.height.type != TextUnitType::kLines ||
pair.second.window_anchor_x.type != TextUnitType::kPercent ||
pair.second.window_anchor_y.type != TextUnitType::kPercent ||
pair.second.region_anchor_x.type != TextUnitType::kPercent ||
pair.second.region_anchor_y.type != TextUnitType::kPercent) {
LOG(WARNING) << "Unsupported unit type in WebVTT region";
continue;
}
base::StringAppendF(
&ret,
"REGION\n"
"id:%s\n"
"width:%f%%\n"
"lines:%d\n"
"viewportanchor:%f%%,%f%%\n"
"regionanchor:%f%%,%f%%",
pair.first.c_str(), pair.second.width.value,
static_cast<int>(pair.second.height.value),
pair.second.window_anchor_x.value, pair.second.window_anchor_y.value,
pair.second.region_anchor_x.value, pair.second.region_anchor_y.value);
if (pair.second.scroll) {
ret += "\nscroll:up";
}
}
if (!stream_info.css_styles().empty()) {
if (!ret.empty()) {
ret += "\n\n";
}
ret += "STYLE\n" + stream_info.css_styles();
}
return ret;
}
} // namespace media
} // namespace shaka

View File

@ -15,6 +15,7 @@
#include "packager/base/strings/string_piece.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/base/text_stream_info.h"
namespace shaka {
namespace media {
@ -33,6 +34,10 @@ std::string WebVttSettingsToString(const TextSettings& settings);
/// Converts the given TextFragment to a WebVTT cue body string.
std::string WebVttFragmentToString(const TextFragment& fragment);
/// Converts the common fields in the stream into WebVTT text. This pulls out
/// the REGION and STYLE blocks.
std::string WebVttGetPreamble(const TextStreamInfo& stream_info);
} // namespace media
} // namespace shaka

View File

@ -224,5 +224,99 @@ TEST(WebVttUtilsTest, FragmentToString_HandlesNestedNewlinesWithStyle) {
EXPECT_EQ(WebVttFragmentToString(frag), "<b>Hello</b>\n<b>World Now</b>");
}
TEST(WebVttUtilsTest, GetPreamble_BasicFlow) {
TextStreamInfo info(0, 0, 0, kCodecWebVtt, "", "", 0, 0, "");
info.set_css_styles("::cue { color: red; }");
TextRegion region;
region.width.value = 34;
region.height = TextNumber(56, TextUnitType::kLines);
region.window_anchor_x.value = 99;
region.window_anchor_y.value = 12;
region.region_anchor_x.value = 41;
region.region_anchor_y.value = 29;
info.AddRegion("foo", region);
EXPECT_EQ(WebVttGetPreamble(info),
"REGION\n"
"id:foo\n"
"width:34.000000%\n"
"lines:56\n"
"viewportanchor:99.000000%,12.000000%\n"
"regionanchor:41.000000%,29.000000%\n"
"\n"
"STYLE\n"
"::cue { color: red; }");
}
TEST(WebVttUtilsTest, GetPreamble_MultipleRegions) {
TextStreamInfo info(0, 0, 0, kCodecWebVtt, "", "", 0, 0, "");
TextRegion region1;
region1.width.value = 34;
region1.height = TextNumber(56, TextUnitType::kLines);
region1.window_anchor_x.value = 99;
region1.window_anchor_y.value = 12;
region1.region_anchor_x.value = 41;
region1.region_anchor_y.value = 29;
info.AddRegion("r1", region1);
TextRegion region2;
region2.width.value = 82;
region2.height = TextNumber(61, TextUnitType::kLines);
region2.window_anchor_x.value = 51;
region2.window_anchor_y.value = 62;
region2.region_anchor_x.value = 92;
region2.region_anchor_y.value = 78;
info.AddRegion("r2", region2);
EXPECT_EQ(WebVttGetPreamble(info),
"REGION\n"
"id:r1\n"
"width:34.000000%\n"
"lines:56\n"
"viewportanchor:99.000000%,12.000000%\n"
"regionanchor:41.000000%,29.000000%\n"
"\n"
"REGION\n"
"id:r2\n"
"width:82.000000%\n"
"lines:61\n"
"viewportanchor:51.000000%,62.000000%\n"
"regionanchor:92.000000%,78.000000%");
}
TEST(WebVttUtilsTest, GetPreamble_Scroll) {
TextStreamInfo info(0, 0, 0, kCodecWebVtt, "", "", 0, 0, "");
TextRegion region;
region.width.value = 37;
region.height = TextNumber(82, TextUnitType::kLines);
region.window_anchor_x.value = 32;
region.window_anchor_y.value = 66;
region.region_anchor_x.value = 95;
region.region_anchor_y.value = 72;
region.scroll = true;
info.AddRegion("foo", region);
EXPECT_EQ(WebVttGetPreamble(info),
"REGION\n"
"id:foo\n"
"width:37.000000%\n"
"lines:82\n"
"viewportanchor:32.000000%,66.000000%\n"
"regionanchor:95.000000%,72.000000%\n"
"scroll:up");
}
TEST(WebVttUtilsTest, GetPreamble_OnlyStyles) {
TextStreamInfo info(0, 0, 0, kCodecWebVtt, "", "", 0, 0, "");
info.set_css_styles("::cue { color: red; }");
EXPECT_EQ(WebVttGetPreamble(info),
"STYLE\n"
"::cue { color: red; }");
}
} // namespace media
} // namespace shaka