Shaka Packager SDK
webvtt_parser.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_split.h"
14 #include "packager/base/strings/string_util.h"
15 #include "packager/media/base/text_stream_info.h"
16 #include "packager/media/formats/webvtt/webvtt_timestamp.h"
17 
18 namespace shaka {
19 namespace media {
20 namespace {
21 const uint64_t kStreamIndex = 0;
22 
23 std::string BlockToString(const std::string* block, size_t size) {
24  std::string out = " --- BLOCK START ---\n";
25 
26  for (size_t i = 0; i < size; i++) {
27  out.append(" ");
28  out.append(block[i]);
29  out.append("\n");
30  }
31 
32  out.append(" --- BLOCK END ---");
33 
34  return out;
35 }
36 
37 // Comments are just blocks that are preceded by a blank line, start with the
38 // word "NOTE" (followed by a space or newline), and end at the first blank
39 // line.
40 // SOURCE: https://www.w3.org/TR/webvtt1
41 bool IsLikelyNote(const std::string& line) {
42  return line == "NOTE" ||
43  base::StartsWith(line, "NOTE ", base::CompareCase::SENSITIVE) ||
44  base::StartsWith(line, "NOTE\t", base::CompareCase::SENSITIVE);
45 }
46 
47 // As cue time is the only part of a WEBVTT file that is allowed to have
48 // "-->" appear, then if the given line contains it, we can safely assume
49 // that the line is likely to be a cue time.
50 bool IsLikelyCueTiming(const std::string& line) {
51  return line.find("-->") != std::string::npos;
52 }
53 
54 // A WebVTT cue identifier is any sequence of one or more characters not
55 // containing the substring "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS,
56 // U+003E GREATER-THAN SIGN), nor containing any U+000A LINE FEED (LF)
57 // characters or U+000D CARRIAGE RETURN (CR) characters.
58 // SOURCE: https://www.w3.org/TR/webvtt1/#webvtt-cue-identifier
59 bool MaybeCueId(const std::string& line) {
60  return line.find("-->") == std::string::npos;
61 }
62 
63 // Check to see if the block is likely a style block. Style blocks are
64 // identified as any block that starts with a line that only contains
65 // "STYLE".
66 // SOURCE: https://w3c.github.io/webvtt/#styling
67 bool IsLikelyStyle(const std::string& line) {
68  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "STYLE";
69 }
70 
71 // Check to see if the block is likely a region block. Region blocks are
72 // identified as any block that starts with a line that only contains
73 // "REGION".
74 // SOURCE: https://w3c.github.io/webvtt/#webvtt-region
75 bool IsLikelyRegion(const std::string& line) {
76  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
77 }
78 } // namespace
79 
80 WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
81  const std::string& language)
82  : reader_(std::move(source)), language_(language) {}
83 
84 Status WebVttParser::InitializeInternal() {
85  return Status::OK;
86 }
87 
88 bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
89  // Only support one output
90  return stream_index == kStreamIndex;
91 }
92 
93 Status WebVttParser::Run() {
94  return Parse()
95  ? FlushDownstream(kStreamIndex)
96  : Status(error::INTERNAL_ERROR,
97  "Failed to parse WebVTT source. See log for details.");
98 }
99 
100 void WebVttParser::Cancel() {
101  keep_reading_ = false;
102 }
103 
104 bool WebVttParser::Parse() {
105  std::vector<std::string> block;
106  if (!reader_.Next(&block)) {
107  LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source.";
108  return false;
109  }
110 
111  // Check the header. It is possible for a 0xFEFF BOM to come before the
112  // header text.
113  if (block.size() != 1) {
114  LOG(ERROR) << "Failed to read WEBVTT header - "
115  << "block size should be 1 but was " << block.size() << ".";
116  return false;
117  }
118  if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
119  LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
120  << block[0];
121  return false;
122  }
123 
124  const Status send_stream_info_result = DispatchTextStreamInfo();
125 
126  if (send_stream_info_result != Status::OK) {
127  LOG(ERROR) << "Failed to send stream info down stream:"
128  << send_stream_info_result.error_message();
129  return false;
130  }
131 
132  bool saw_cue = false;
133 
134  while (reader_.Next(&block) && keep_reading_) {
135  // NOTE
136  if (IsLikelyNote(block[0])) {
137  // We can safely ignore the whole block.
138  continue;
139  }
140 
141  // STYLE
142  if (IsLikelyStyle(block[0])) {
143  if (saw_cue) {
144  LOG(ERROR)
145  << "Found style block after seeing cue. Ignoring style block";
146  } else {
147  LOG(WARNING) << "Missing support for style blocks. Skipping block:\n"
148  << BlockToString(block.data(), block.size());
149  }
150  continue;
151  }
152 
153  // REGION
154  if (IsLikelyRegion(block[0])) {
155  if (saw_cue) {
156  LOG(ERROR)
157  << "Found region block after seeing cue. Ignoring region block";
158  } else {
159  LOG(WARNING) << "Missing support for region blocks. Skipping block:\n"
160  << BlockToString(block.data(), block.size());
161  }
162  continue;
163  }
164 
165  // CUE with ID
166  if (block.size() > 2 && MaybeCueId(block[0]) &&
167  IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
168  saw_cue = true;
169  continue;
170  }
171 
172  // CUE with no ID
173  if (block.size() > 1 && IsLikelyCueTiming(block[0]) &&
174  ParseCueWithNoId(block)) {
175  saw_cue = true;
176  continue;
177  }
178 
179  LOG(ERROR) << "Failed to determine block classification:\n"
180  << BlockToString(block.data(), block.size());
181  return false;
182  }
183 
184  return keep_reading_;
185 }
186 
187 bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
188  const Status status = ParseCue("", block.data(), block.size());
189 
190  if (!status.ok()) {
191  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
192  }
193 
194  return status.ok();
195 }
196 
197 bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
198  const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
199 
200  if (!status.ok()) {
201  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
202  }
203 
204  return status.ok();
205 }
206 
207 Status WebVttParser::ParseCue(const std::string& id,
208  const std::string* block,
209  size_t block_size) {
210  const std::vector<std::string> time_and_style = base::SplitString(
211  block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
212 
213  uint64_t start_time = 0;
214  uint64_t end_time = 0;
215 
216  const bool parsed_time =
217  time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
218  WebVttTimestampToMs(time_and_style[0], &start_time) &&
219  WebVttTimestampToMs(time_and_style[2], &end_time);
220 
221  if (!parsed_time) {
222  return Status(
223  error::INTERNAL_ERROR,
224  "Could not parse start time, -->, and end time from " + block[0]);
225  }
226 
227  // According to the WebVTT spec
228  // (https://www.w3.org/TR/webvtt1/#webvtt-cue-timings) end time must be
229  // greater than the start time of the cue. Since we are seeing content with
230  // zero-duration cues in the field, we are going to drop the cue instead of
231  // failing to package.
232  //
233  // Print a warning so that those packaging content can know that their
234  // content is not spec compliant.
235  if (start_time == end_time) {
236  LOG(WARNING) << "WebVTT input is not spec compliant."
237  " Skipping zero-duration cue\n"
238  << BlockToString(block, block_size);
239 
240  return Status::OK;
241  }
242 
243  std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
244  sample->set_id(id);
245  sample->SetTime(start_time, end_time);
246 
247  // The rest of time_and_style are the style tokens.
248  for (size_t i = 3; i < time_and_style.size(); i++) {
249  sample->AppendStyle(time_and_style[i]);
250  }
251 
252  // The rest of the block is the payload.
253  for (size_t i = 1; i < block_size; i++) {
254  sample->AppendPayload(block[i]);
255  }
256 
257  return DispatchTextSample(kStreamIndex, sample);
258 }
259 
260 Status WebVttParser::DispatchTextStreamInfo() {
261  const int kTrackId = 0;
262  // The resolution of timings are in milliseconds.
263  const int kTimescale = 1000;
264  // The duration passed here is not very important. Also the whole file
265  // must be read before determining the real duration which doesn't
266  // work nicely with the current demuxer.
267  const int kDuration = 0;
268  const char kWebVttCodecString[] = "wvtt";
269  const char kCodecConfig[] = "";
270  const int64_t kNoWidth = 0;
271  const int64_t kNoHeight = 0;
272 
273  std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
274  kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
275  kCodecConfig, kNoWidth, kNoHeight, language_);
276 
277  return DispatchStreamInfo(kStreamIndex, std::move(info));
278 }
279 } // namespace media
280 } // namespace shaka
STL namespace.
All the methods that are virtual are virtual for mocking.