7 #include "packager/media/formats/webvtt/webvtt_parser.h"
9 #include "packager/base/logging.h"
10 #include "packager/base/strings/string_number_conversions.h"
11 #include "packager/base/strings/string_split.h"
12 #include "packager/base/strings/string_util.h"
13 #include "packager/media/base/text_stream_info.h"
14 #include "packager/media/formats/webvtt/webvtt_utils.h"
20 const uint64_t kStreamIndex = 0;
22 std::string BlockToString(
const std::string* block,
size_t size) {
23 std::string out =
" --- BLOCK START ---\n";
25 for (
size_t i = 0; i < size; i++) {
31 out.append(
" --- BLOCK END ---");
40 bool IsLikelyNote(
const std::string& line) {
41 return line ==
"NOTE" ||
42 base::StartsWith(line,
"NOTE ", base::CompareCase::SENSITIVE) ||
43 base::StartsWith(line,
"NOTE\t", base::CompareCase::SENSITIVE);
49 bool IsLikelyCueTiming(
const std::string& line) {
50 return line.find(
"-->") != std::string::npos;
58 bool MaybeCueId(
const std::string& line) {
59 return line.find(
"-->") == std::string::npos;
66 bool IsLikelyStyle(
const std::string& line) {
67 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"STYLE";
74 bool IsLikelyRegion(
const std::string& line) {
75 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"REGION";
78 bool ParsePercent(
const std::string& str,
float* value) {
81 if (str[str.size() - 1] !=
'%') {
86 if (!base::StringToDouble(str.substr(0, str.size() - 1), &temp) ||
94 bool ParseDoublePercent(
const std::string& str,
float* a,
float* b) {
95 auto percents = base::SplitString(str,
",", base::TRIM_WHITESPACE,
96 base::SPLIT_WANT_NONEMPTY);
97 if (percents.size() != 2) {
100 float temp_a, temp_b;
101 if (!ParsePercent(percents[0], &temp_a) ||
102 !ParsePercent(percents[1], &temp_b)) {
110 void ParseSettings(
const std::string&
id,
111 const std::string& value,
112 TextSettings* settings) {
114 if (
id ==
"region") {
115 settings->region = value;
116 }
else if (
id ==
"vertical") {
118 settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
119 }
else if (value ==
"lr") {
120 settings->writing_direction = WritingDirection::kVerticalGrowingRight;
122 LOG(WARNING) <<
"Invalid WebVTT vertical setting: " << value;
124 }
else if (
id ==
"line") {
125 const auto pos = value.find(
',');
126 const std::string line = value.substr(0, pos);
127 const std::string align =
128 pos != std::string::npos ? value.substr(pos + 1) :
"";
129 if (pos != std::string::npos) {
130 LOG(WARNING) <<
"WebVTT line alignment isn't supported";
133 if (!line.empty() && line[line.size() - 1] ==
'%') {
135 if (!ParsePercent(line, &temp)) {
136 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
139 settings->line.emplace(temp, TextUnitType::kPercent);
142 if (!base::StringToDouble(line, &temp)) {
143 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
146 settings->line.emplace(temp, TextUnitType::kLines);
148 }
else if (
id ==
"position") {
149 const auto pos = value.find(
',');
150 const std::string position = value.substr(0, pos);
151 const std::string align =
152 pos != std::string::npos ? value.substr(pos + 1) :
"";
153 if (pos != std::string::npos) {
154 LOG(WARNING) <<
"WebVTT position alignment isn't supported";
158 if (ParsePercent(position, &temp)) {
159 settings->position.emplace(temp, TextUnitType::kPercent);
161 LOG(WARNING) <<
"Invalid WebVTT position: " << value;
163 }
else if (
id ==
"size") {
165 if (ParsePercent(value, &temp)) {
166 settings->width.emplace(temp, TextUnitType::kPercent);
168 LOG(WARNING) <<
"Invalid WebVTT size: " << value;
170 }
else if (
id ==
"align") {
171 if (value ==
"start") {
172 settings->text_alignment = TextAlignment::kStart;
173 }
else if (value ==
"center" || value ==
"middle") {
174 settings->text_alignment = TextAlignment::kCenter;
175 }
else if (value ==
"end") {
176 settings->text_alignment = TextAlignment::kEnd;
177 }
else if (value ==
"left") {
178 settings->text_alignment = TextAlignment::kLeft;
179 }
else if (value ==
"right") {
180 settings->text_alignment = TextAlignment::kRight;
182 LOG(WARNING) <<
"Invalid WebVTT align: " << value;
185 LOG(WARNING) <<
"Unknown WebVTT setting: " << id;
191 WebVttParser::WebVttParser() {}
197 DCHECK(init_cb_.is_null());
198 DCHECK(!init_cb.is_null());
199 DCHECK(!new_text_sample_cb.is_null());
200 DCHECK(!decryption_key_source) <<
"Encrypted WebVTT not supported";
203 new_text_sample_cb_ = new_text_sample_cb;
211 bool WebVttParser::Parse(
const uint8_t* buf,
int size) {
216 bool WebVttParser::Parse() {
218 std::vector<std::string> block;
219 if (!reader_.
Next(&block)) {
225 if (block.size() != 1) {
226 LOG(ERROR) <<
"Failed to read WEBVTT header - "
227 <<
"block size should be 1 but was " << block.size() <<
".";
230 if (block[0] !=
"WEBVTT" && block[0] !=
"\xEF\xBB\xBFWEBVTT") {
231 LOG(ERROR) <<
"Failed to read WEBVTT header - should be WEBVTT but was "
238 std::vector<std::string> block;
239 while (reader_.
Next(&block)) {
240 if (!ParseBlock(block))
246 bool WebVttParser::ParseBlock(
const std::vector<std::string>& block) {
248 if (IsLikelyNote(block[0])) {
254 if (IsLikelyStyle(block[0])) {
257 <<
"Found style block after seeing cue. Ignoring style block";
259 for (
size_t i = 1; i < block.size(); i++) {
260 if (!css_styles_.empty())
262 css_styles_ += block[i];
269 if (IsLikelyRegion(block[0])) {
272 <<
"Found region block after seeing cue. Ignoring region block";
275 return ParseRegion(block);
280 if (block.size() >= 2 && MaybeCueId(block[0]) &&
281 IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
287 if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
292 LOG(ERROR) <<
"Failed to determine block classification:\n"
293 << BlockToString(block.data(), block.size());
297 bool WebVttParser::ParseRegion(
const std::vector<std::string>& block) {
299 std::string region_id;
302 region.width.value = 100;
303 region.width.type = TextUnitType::kPercent;
304 region.height.value = 3;
305 region.height.type = TextUnitType::kLines;
306 region.window_anchor_x.value = 0;
307 region.window_anchor_x.type = TextUnitType::kPercent;
308 region.window_anchor_y.value = 100;
309 region.window_anchor_y.type = TextUnitType::kPercent;
310 region.region_anchor_x.value = 0;
311 region.region_anchor_x.type = TextUnitType::kPercent;
312 region.region_anchor_y.value = 100;
313 region.region_anchor_y.type = TextUnitType::kPercent;
316 for (
const auto& line : block) {
323 base::StringPairs pairs;
324 if (!base::SplitStringIntoKeyValuePairs(line,
':',
' ', &pairs)) {
325 LOG(ERROR) <<
"Invalid WebVTT settings: " << line;
328 for (
const auto& pair : pairs) {
329 const std::string& value = pair.second;
330 if (pair.first ==
"id") {
331 if (value.find(
"-->") != std::string::npos) {
332 LOG(ERROR) <<
"Invalid WebVTT REGION ID: " << value;
335 if (regions_.find(value) != regions_.end()) {
336 LOG(ERROR) <<
"Duplicate WebVTT REGION: " << value;
340 }
else if (pair.first ==
"width") {
341 if (!ParsePercent(value, ®ion.width.value)) {
342 LOG(ERROR) <<
"Invalid WebVTT REGION width: " << value;
345 }
else if (pair.first ==
"lines") {
347 if (!base::StringToUint(value, &temp)) {
348 LOG(ERROR) <<
"Invalid WebVTT REGION lines: " << value;
351 region.height.value = temp;
352 }
else if (pair.first ==
"regionanchor") {
353 if (!ParseDoublePercent(value, ®ion.region_anchor_x.value,
354 ®ion.region_anchor_y.value)) {
355 LOG(ERROR) <<
"Invalid WebVTT REGION regionanchor: " << value;
358 }
else if (pair.first ==
"viewportanchor") {
359 if (!ParseDoublePercent(value, ®ion.window_anchor_x.value,
360 ®ion.window_anchor_y.value)) {
361 LOG(ERROR) <<
"Invalid WebVTT REGION windowanchor: " << value;
364 }
else if (pair.first ==
"scroll") {
366 LOG(ERROR) <<
"Invalid WebVTT REGION scroll: " << value;
369 region.scroll =
true;
371 LOG(ERROR) <<
"Unknown WebVTT REGION setting: " << pair.first;
376 if (region_id.empty()) {
377 LOG(ERROR) <<
"WebVTT REGION id is required";
380 regions_.insert(std::make_pair(region_id, std::move(region)));
384 bool WebVttParser::ParseCueWithNoId(
const std::vector<std::string>& block) {
385 return ParseCue(
"", block.data(), block.size());
388 bool WebVttParser::ParseCueWithId(
const std::vector<std::string>& block) {
389 return ParseCue(block[0], block.data() + 1, block.size() - 1);
392 bool WebVttParser::ParseCue(
const std::string&
id,
393 const std::string* block,
395 const std::vector<std::string> time_and_style = base::SplitString(
396 block[0],
" ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
398 uint64_t start_time = 0;
399 uint64_t end_time = 0;
401 const bool parsed_time =
402 time_and_style.size() >= 3 && time_and_style[1] ==
"-->" &&
403 WebVttTimestampToMs(time_and_style[0], &start_time) &&
404 WebVttTimestampToMs(time_and_style[2], &end_time);
407 LOG(ERROR) <<
"Could not parse start time, -->, and end time from "
412 if (!stream_info_dispatched_)
413 DispatchTextStreamInfo();
426 if (end_time <= start_time) {
427 LOG(WARNING) <<
"WebVTT input is not spec compliant. Start time ("
428 << start_time <<
") should be less than end time (" << end_time
429 <<
"). Skipping webvtt cue:"
430 << BlockToString(block, block_size);
434 TextSettings settings;
435 for (
size_t i = 3; i < time_and_style.size(); i++) {
436 const auto pos = time_and_style[i].find(
':');
437 if (pos == std::string::npos) {
441 const std::string key = time_and_style[i].substr(0, pos);
442 const std::string value = time_and_style[i].substr(pos + 1);
443 ParseSettings(key, value, &settings);
449 TextFragmentStyle no_styles;
450 for (
size_t i = 1; i < block_size; i++) {
452 body.sub_fragments.emplace_back(no_styles,
true);
454 body.sub_fragments.emplace_back(no_styles, block[i]);
458 std::make_shared<TextSample>(
id, start_time, end_time, settings, body);
459 return new_text_sample_cb_.Run(kStreamIndex, sample);
462 void WebVttParser::DispatchTextStreamInfo() {
463 stream_info_dispatched_ =
true;
465 const int kTrackId = 0;
467 const int kTimescale = 1000;
471 const int kDuration = 0;
472 const char kWebVttCodecString[] =
"wvtt";
473 const int64_t kNoWidth = 0;
474 const int64_t kNoHeight = 0;
476 const char kNoLanguage[] =
"";
478 const auto stream = std::make_shared<TextStreamInfo>(
479 kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
"",
480 kNoWidth, kNoHeight, kNoLanguage);
481 stream->set_css_styles(css_styles_);
482 for (
const auto& pair : regions_)
483 stream->AddRegion(pair.first, pair.second);
485 std::vector<std::shared_ptr<StreamInfo>> streams{stream};
486 init_cb_.Run(streams);