7 #include "packager/media/formats/webvtt/webvtt_parser.h"
11 #include "packager/base/logging.h"
12 #include "packager/base/strings/string_number_conversions.h"
13 #include "packager/base/strings/string_split.h"
14 #include "packager/base/strings/string_util.h"
15 #include "packager/media/base/text_stream_info.h"
16 #include "packager/media/formats/webvtt/webvtt_utils.h"
22 const uint64_t kStreamIndex = 0;
24 std::string BlockToString(
const std::string* block,
size_t size) {
25 std::string out =
" --- BLOCK START ---\n";
27 for (
size_t i = 0; i < size; i++) {
33 out.append(
" --- BLOCK END ---");
42 bool IsLikelyNote(
const std::string& line) {
43 return line ==
"NOTE" ||
44 base::StartsWith(line,
"NOTE ", base::CompareCase::SENSITIVE) ||
45 base::StartsWith(line,
"NOTE\t", base::CompareCase::SENSITIVE);
51 bool IsLikelyCueTiming(
const std::string& line) {
52 return line.find(
"-->") != std::string::npos;
60 bool MaybeCueId(
const std::string& line) {
61 return line.find(
"-->") == std::string::npos;
68 bool IsLikelyStyle(
const std::string& line) {
69 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"STYLE";
76 bool IsLikelyRegion(
const std::string& line) {
77 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"REGION";
80 bool ParsePercent(
const std::string& str,
float* value) {
83 std::regex re(R
"((\d+(?:\.\d+)?)%)");
85 if (!std::regex_match(str, match, re)) {
90 base::StringToDouble(match[1], &temp);
98 bool ParseDoublePercent(
const std::string& str,
float* a,
float* b) {
99 std::regex re(R
"((\d+(?:\.\d+)?)%,(\d+(?:\.\d+)?)%)");
101 if (!std::regex_match(str, match, re)) {
106 base::StringToDouble(match[1], &tempA);
107 base::StringToDouble(match[2], &tempB);
108 if (tempA >= 100 || tempB >= 100) {
116 void ParseSettings(
const std::string&
id,
117 const std::string& value,
118 TextSettings* settings) {
120 if (
id ==
"region") {
121 settings->region = value;
122 }
else if (
id ==
"vertical") {
124 settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
125 }
else if (value ==
"lr") {
126 settings->writing_direction = WritingDirection::kVerticalGrowingRight;
128 LOG(WARNING) <<
"Invalid WebVTT vertical setting: " << value;
130 }
else if (
id ==
"line") {
131 const auto pos = value.find(
',');
132 const std::string line = value.substr(0, pos);
133 const std::string align =
134 pos != std::string::npos ? value.substr(pos + 1) :
"";
135 if (pos != std::string::npos) {
136 LOG(WARNING) <<
"WebVTT line alignment isn't supported";
139 if (!line.empty() && line[line.size() - 1] ==
'%') {
141 if (!ParsePercent(line, &temp)) {
142 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
145 settings->line.emplace(temp, TextUnitType::kPercent);
148 if (!base::StringToDouble(line, &temp)) {
149 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
152 settings->line.emplace(temp, TextUnitType::kLines);
154 }
else if (
id ==
"position") {
155 const auto pos = value.find(
',');
156 const std::string position = value.substr(0, pos);
157 const std::string align =
158 pos != std::string::npos ? value.substr(pos + 1) :
"";
159 if (pos != std::string::npos) {
160 LOG(WARNING) <<
"WebVTT position alignment isn't supported";
164 if (ParsePercent(position, &temp)) {
165 settings->position.emplace(temp, TextUnitType::kPercent);
167 LOG(WARNING) <<
"Invalid WebVTT position: " << value;
169 }
else if (
id ==
"size") {
171 if (ParsePercent(value, &temp)) {
172 settings->width.emplace(temp, TextUnitType::kPercent);
174 LOG(WARNING) <<
"Invalid WebVTT size: " << value;
176 }
else if (
id ==
"align") {
177 if (value ==
"start") {
178 settings->text_alignment = TextAlignment::kStart;
179 }
else if (value ==
"center" || value ==
"middle") {
180 settings->text_alignment = TextAlignment::kCenter;
181 }
else if (value ==
"end") {
182 settings->text_alignment = TextAlignment::kEnd;
183 }
else if (value ==
"left") {
184 settings->text_alignment = TextAlignment::kLeft;
185 }
else if (value ==
"right") {
186 settings->text_alignment = TextAlignment::kRight;
188 LOG(WARNING) <<
"Invalid WebVTT align: " << value;
191 LOG(WARNING) <<
"Unknown WebVTT setting: " << id;
197 WebVttParser::WebVttParser() {}
203 DCHECK(init_cb_.is_null());
204 DCHECK(!init_cb.is_null());
205 DCHECK(!new_text_sample_cb.is_null());
206 DCHECK(!decryption_key_source) <<
"Encrypted WebVTT not supported";
209 new_text_sample_cb_ = new_text_sample_cb;
217 bool WebVttParser::Parse(
const uint8_t* buf,
int size) {
222 bool WebVttParser::Parse() {
224 std::vector<std::string> block;
225 if (!reader_.
Next(&block)) {
231 if (block.size() != 1) {
232 LOG(ERROR) <<
"Failed to read WEBVTT header - "
233 <<
"block size should be 1 but was " << block.size() <<
".";
236 if (block[0] !=
"WEBVTT" && block[0] !=
"\xEF\xBB\xBFWEBVTT") {
237 LOG(ERROR) <<
"Failed to read WEBVTT header - should be WEBVTT but was "
244 std::vector<std::string> block;
245 while (reader_.
Next(&block)) {
246 if (!ParseBlock(block))
252 bool WebVttParser::ParseBlock(
const std::vector<std::string>& block) {
254 if (IsLikelyNote(block[0])) {
260 if (IsLikelyStyle(block[0])) {
263 <<
"Found style block after seeing cue. Ignoring style block";
265 for (
size_t i = 1; i < block.size(); i++) {
266 if (!css_styles_.empty())
268 css_styles_ += block[i];
275 if (IsLikelyRegion(block[0])) {
278 <<
"Found region block after seeing cue. Ignoring region block";
281 return ParseRegion(block);
286 if (block.size() >= 2 && MaybeCueId(block[0]) &&
287 IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
293 if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
298 LOG(ERROR) <<
"Failed to determine block classification:\n"
299 << BlockToString(block.data(), block.size());
303 bool WebVttParser::ParseRegion(
const std::vector<std::string>& block) {
305 std::string region_id;
308 region.width.value = 100;
309 region.width.type = TextUnitType::kPercent;
310 region.height.value = 3;
311 region.height.type = TextUnitType::kLines;
312 region.window_anchor_x.value = 0;
313 region.window_anchor_x.type = TextUnitType::kPercent;
314 region.window_anchor_y.value = 100;
315 region.window_anchor_y.type = TextUnitType::kPercent;
316 region.region_anchor_x.value = 0;
317 region.region_anchor_x.type = TextUnitType::kPercent;
318 region.region_anchor_y.value = 100;
319 region.region_anchor_y.type = TextUnitType::kPercent;
322 for (
const auto& line : block) {
329 base::StringPairs pairs;
330 if (!base::SplitStringIntoKeyValuePairs(line,
':',
' ', &pairs)) {
331 LOG(ERROR) <<
"Invalid WebVTT settings: " << line;
334 for (
const auto& pair : pairs) {
335 const std::string& value = pair.second;
336 if (pair.first ==
"id") {
337 if (value.find(
"-->") != std::string::npos) {
338 LOG(ERROR) <<
"Invalid WebVTT REGION ID: " << value;
341 if (regions_.find(value) != regions_.end()) {
342 LOG(ERROR) <<
"Duplicate WebVTT REGION: " << value;
346 }
else if (pair.first ==
"width") {
347 if (!ParsePercent(value, ®ion.width.value)) {
348 LOG(ERROR) <<
"Invalid WebVTT REGION width: " << value;
351 }
else if (pair.first ==
"lines") {
353 if (!base::StringToUint(value, &temp)) {
354 LOG(ERROR) <<
"Invalid WebVTT REGION lines: " << value;
357 region.height.value = temp;
358 }
else if (pair.first ==
"regionanchor") {
359 if (!ParseDoublePercent(value, ®ion.region_anchor_x.value,
360 ®ion.region_anchor_y.value)) {
361 LOG(ERROR) <<
"Invalid WebVTT REGION regionanchor: " << value;
364 }
else if (pair.first ==
"viewportanchor") {
365 if (!ParseDoublePercent(value, ®ion.window_anchor_x.value,
366 ®ion.window_anchor_y.value)) {
367 LOG(ERROR) <<
"Invalid WebVTT REGION windowanchor: " << value;
370 }
else if (pair.first ==
"scroll") {
372 LOG(ERROR) <<
"Invalid WebVTT REGION scroll: " << value;
375 region.scroll =
true;
377 LOG(ERROR) <<
"Unknown WebVTT REGION setting: " << pair.first;
382 if (region_id.empty()) {
383 LOG(ERROR) <<
"WebVTT REGION id is required";
386 regions_.insert(std::make_pair(region_id, std::move(region)));
390 bool WebVttParser::ParseCueWithNoId(
const std::vector<std::string>& block) {
391 return ParseCue(
"", block.data(), block.size());
394 bool WebVttParser::ParseCueWithId(
const std::vector<std::string>& block) {
395 return ParseCue(block[0], block.data() + 1, block.size() - 1);
398 bool WebVttParser::ParseCue(
const std::string&
id,
399 const std::string* block,
401 const std::vector<std::string> time_and_style = base::SplitString(
402 block[0],
" ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
404 uint64_t start_time = 0;
405 uint64_t end_time = 0;
407 const bool parsed_time =
408 time_and_style.size() >= 3 && time_and_style[1] ==
"-->" &&
409 WebVttTimestampToMs(time_and_style[0], &start_time) &&
410 WebVttTimestampToMs(time_and_style[2], &end_time);
413 LOG(ERROR) <<
"Could not parse start time, -->, and end time from "
418 if (!stream_info_dispatched_)
419 DispatchTextStreamInfo();
432 if (end_time <= start_time) {
433 LOG(WARNING) <<
"WebVTT input is not spec compliant. Start time ("
434 << start_time <<
") should be less than end time (" << end_time
435 <<
"). Skipping webvtt cue:"
436 << BlockToString(block, block_size);
440 TextSettings settings;
441 for (
size_t i = 3; i < time_and_style.size(); i++) {
442 const auto pos = time_and_style[i].find(
':');
443 if (pos == std::string::npos) {
447 const std::string key = time_and_style[i].substr(0, pos);
448 const std::string value = time_and_style[i].substr(pos + 1);
449 ParseSettings(key, value, &settings);
455 TextFragmentStyle no_styles;
456 for (
size_t i = 1; i < block_size; i++) {
458 body.sub_fragments.emplace_back(no_styles,
true);
460 body.sub_fragments.emplace_back(no_styles, block[i]);
464 std::make_shared<TextSample>(
id, start_time, end_time, settings, body);
465 return new_text_sample_cb_.Run(kStreamIndex, sample);
468 void WebVttParser::DispatchTextStreamInfo() {
469 stream_info_dispatched_ =
true;
471 const int kTrackId = 0;
473 const int kTimescale = 1000;
477 const int kDuration = 0;
478 const char kWebVttCodecString[] =
"wvtt";
479 const int64_t kNoWidth = 0;
480 const int64_t kNoHeight = 0;
482 const char kNoLanguage[] =
"";
484 const auto stream = std::make_shared<TextStreamInfo>(
485 kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
"",
486 kNoWidth, kNoHeight, kNoLanguage);
487 stream->set_css_styles(css_styles_);
488 for (
const auto& pair : regions_)
489 stream->AddRegion(pair.first, pair.second);
491 std::vector<std::shared_ptr<StreamInfo>> streams{stream};
492 init_cb_.Run(streams);
All the methods that are virtual are virtual for mocking.