Shaka Packager SDK
webvtt_parser.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_split.h"
14 #include "packager/base/strings/string_util.h"
15 #include "packager/media/base/text_stream_info.h"
16 #include "packager/media/formats/webvtt/webvtt_timestamp.h"
17 
18 namespace shaka {
19 namespace media {
20 namespace {
21 const uint64_t kStreamIndex = 0;
22 
23 std::string BlockToString(const std::string* block, size_t size) {
24  std::string out = " --- BLOCK START ---\n";
25 
26  for (size_t i = 0; i < size; i++) {
27  out.append(" ");
28  out.append(block[i]);
29  out.append("\n");
30  }
31 
32  out.append(" --- BLOCK END ---");
33 
34  return out;
35 }
36 
37 // Comments are just blocks that are preceded by a blank line, start with the
38 // word "NOTE" (followed by a space or newline), and end at the first blank
39 // line.
40 // SOURCE: https://www.w3.org/TR/webvtt1
41 bool IsLikelyNote(const std::string& line) {
42  return line == "NOTE" ||
43  base::StartsWith(line, "NOTE ", base::CompareCase::SENSITIVE) ||
44  base::StartsWith(line, "NOTE\t", base::CompareCase::SENSITIVE);
45 }
46 
47 // As cue time is the only part of a WEBVTT file that is allowed to have
48 // "-->" appear, then if the given line contains it, we can safely assume
49 // that the line is likely to be a cue time.
50 bool IsLikelyCueTiming(const std::string& line) {
51  return line.find("-->") != std::string::npos;
52 }
53 
54 // A WebVTT cue identifier is any sequence of one or more characters not
55 // containing the substring "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS,
56 // U+003E GREATER-THAN SIGN), nor containing any U+000A LINE FEED (LF)
57 // characters or U+000D CARRIAGE RETURN (CR) characters.
58 // SOURCE: https://www.w3.org/TR/webvtt1/#webvtt-cue-identifier
59 bool MaybeCueId(const std::string& line) {
60  return line.find("-->") == std::string::npos;
61 }
62 
63 // Check to see if the block is likely a style block. Style blocks are
64 // identified as any block that starts with a line that only contains
65 // "STYLE".
66 // SOURCE: https://w3c.github.io/webvtt/#styling
67 bool IsLikelyStyle(const std::string& line) {
68  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "STYLE";
69 }
70 
71 // Check to see if the block is likely a region block. Region blocks are
72 // identified as any block that starts with a line that only contains
73 // "REGION".
74 // SOURCE: https://w3c.github.io/webvtt/#webvtt-region
75 bool IsLikelyRegion(const std::string& line) {
76  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
77 }
78 } // namespace
79 
80 WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
81  const std::string& language)
82  : reader_(std::move(source)), language_(language) {}
83 
84 Status WebVttParser::InitializeInternal() {
85  return Status::OK;
86 }
87 
88 bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
89  // Only support one output
90  return stream_index == kStreamIndex;
91 }
92 
93 Status WebVttParser::Run() {
94  return Parse()
95  ? FlushDownstream(kStreamIndex)
96  : Status(error::INTERNAL_ERROR,
97  "Failed to parse WebVTT source. See log for details.");
98 }
99 
100 void WebVttParser::Cancel() {
101  keep_reading_ = false;
102 }
103 
104 bool WebVttParser::Parse() {
105  std::vector<std::string> block;
106  if (!reader_.Next(&block)) {
107  LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source.";
108  return false;
109  }
110 
111  // Check the header. It is possible for a 0xFEFF BOM to come before the
112  // header text.
113  if (block.size() != 1) {
114  LOG(ERROR) << "Failed to read WEBVTT header - "
115  << "block size should be 1 but was " << block.size() << ".";
116  return false;
117  }
118  if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
119  LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
120  << block[0];
121  return false;
122  }
123 
124  const Status send_stream_info_result = DispatchTextStreamInfo();
125 
126  if (send_stream_info_result != Status::OK) {
127  LOG(ERROR) << "Failed to send stream info down stream:"
128  << send_stream_info_result.error_message();
129  return false;
130  }
131 
132  bool saw_cue = false;
133 
134  while (reader_.Next(&block) && keep_reading_) {
135  // NOTE
136  if (IsLikelyNote(block[0])) {
137  // We can safely ignore the whole block.
138  continue;
139  }
140 
141  // STYLE
142  if (IsLikelyStyle(block[0])) {
143  if (saw_cue) {
144  LOG(ERROR)
145  << "Found style block after seeing cue. Ignoring style block";
146  } else {
147  LOG(WARNING) << "Missing support for style blocks. Skipping block:\n"
148  << BlockToString(block.data(), block.size());
149  }
150  continue;
151  }
152 
153  // REGION
154  if (IsLikelyRegion(block[0])) {
155  if (saw_cue) {
156  LOG(ERROR)
157  << "Found region block after seeing cue. Ignoring region block";
158  } else {
159  LOG(WARNING) << "Missing support for region blocks. Skipping block:\n"
160  << BlockToString(block.data(), block.size());
161  }
162  continue;
163  }
164 
165  // CUE with ID
166  if (block.size() >= 2 && MaybeCueId(block[0]) &&
167  IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
168  saw_cue = true;
169  continue;
170  }
171 
172  // CUE with no ID
173  if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
174  saw_cue = true;
175  continue;
176  }
177 
178  LOG(ERROR) << "Failed to determine block classification:\n"
179  << BlockToString(block.data(), block.size());
180  return false;
181  }
182 
183  return keep_reading_;
184 }
185 
186 bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
187  const Status status = ParseCue("", block.data(), block.size());
188 
189  if (!status.ok()) {
190  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
191  }
192 
193  return status.ok();
194 }
195 
196 bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
197  const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
198 
199  if (!status.ok()) {
200  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
201  }
202 
203  return status.ok();
204 }
205 
206 Status WebVttParser::ParseCue(const std::string& id,
207  const std::string* block,
208  size_t block_size) {
209  const std::vector<std::string> time_and_style = base::SplitString(
210  block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
211 
212  uint64_t start_time = 0;
213  uint64_t end_time = 0;
214 
215  const bool parsed_time =
216  time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
217  WebVttTimestampToMs(time_and_style[0], &start_time) &&
218  WebVttTimestampToMs(time_and_style[2], &end_time);
219 
220  if (!parsed_time) {
221  return Status(
222  error::INTERNAL_ERROR,
223  "Could not parse start time, -->, and end time from " + block[0]);
224  }
225 
226  // According to the WebVTT spec end time must be greater than the start time
227  // of the cue. Since we are seeing content with invalid times in the field, we
228  // are going to drop the cue instead of failing to package.
229  //
230  // For more context see:
231  // - https://www.w3.org/TR/webvtt1/#webvtt-cue-timings
232  // - https://github.com/google/shaka-packager/issues/335
233  // - https://github.com/google/shaka-packager/issues/425
234  //
235  // Print a warning so that those packaging content can know that their
236  // content is not spec compliant.
237  if (end_time <= start_time) {
238  LOG(WARNING) << "WebVTT input is not spec compliant. Start time ("
239  << start_time << ") should be less than end time (" << end_time
240  << "). Skipping webvtt cue:"
241  << BlockToString(block, block_size);
242 
243  return Status::OK;
244  }
245 
246  std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
247  sample->set_id(id);
248  sample->SetTime(start_time, end_time);
249 
250  // The rest of time_and_style are the style tokens.
251  for (size_t i = 3; i < time_and_style.size(); i++) {
252  sample->AppendStyle(time_and_style[i]);
253  }
254 
255  // The rest of the block is the payload.
256  for (size_t i = 1; i < block_size; i++) {
257  sample->AppendPayload(block[i]);
258  }
259 
260  return DispatchTextSample(kStreamIndex, sample);
261 }
262 
263 Status WebVttParser::DispatchTextStreamInfo() {
264  const int kTrackId = 0;
265  // The resolution of timings are in milliseconds.
266  const int kTimescale = 1000;
267  // The duration passed here is not very important. Also the whole file
268  // must be read before determining the real duration which doesn't
269  // work nicely with the current demuxer.
270  const int kDuration = 0;
271  const char kWebVttCodecString[] = "wvtt";
272  const char kCodecConfig[] = "";
273  const int64_t kNoWidth = 0;
274  const int64_t kNoHeight = 0;
275 
276  std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
277  kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
278  kCodecConfig, kNoWidth, kNoHeight, language_);
279 
280  return DispatchStreamInfo(kStreamIndex, std::move(info));
281 }
282 } // namespace media
283 } // namespace shaka
STL namespace.
All the methods that are virtual are virtual for mocking.