Shaka Packager SDK
webvtt_parser.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_split.h"
14 #include "packager/base/strings/string_util.h"
15 #include "packager/media/base/text_stream_info.h"
16 #include "packager/media/formats/webvtt/webvtt_timestamp.h"
17 #include "packager/status_macros.h"
18 
19 namespace shaka {
20 namespace media {
21 namespace {
22 const uint64_t kStreamIndex = 0;
23 
24 std::string BlockToString(const std::string* block, size_t size) {
25  std::string out = " --- BLOCK START ---\n";
26 
27  for (size_t i = 0; i < size; i++) {
28  out.append(" ");
29  out.append(block[i]);
30  out.append("\n");
31  }
32 
33  out.append(" --- BLOCK END ---");
34 
35  return out;
36 }
37 
38 // Comments are just blocks that are preceded by a blank line, start with the
39 // word "NOTE" (followed by a space or newline), and end at the first blank
40 // line.
41 // SOURCE: https://www.w3.org/TR/webvtt1
42 bool IsLikelyNote(const std::string& line) {
43  return line == "NOTE" ||
44  base::StartsWith(line, "NOTE ", base::CompareCase::SENSITIVE) ||
45  base::StartsWith(line, "NOTE\t", base::CompareCase::SENSITIVE);
46 }
47 
48 // As cue time is the only part of a WEBVTT file that is allowed to have
49 // "-->" appear, then if the given line contains it, we can safely assume
50 // that the line is likely to be a cue time.
51 bool IsLikelyCueTiming(const std::string& line) {
52  return line.find("-->") != std::string::npos;
53 }
54 
55 // A WebVTT cue identifier is any sequence of one or more characters not
56 // containing the substring "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS,
57 // U+003E GREATER-THAN SIGN), nor containing any U+000A LINE FEED (LF)
58 // characters or U+000D CARRIAGE RETURN (CR) characters.
59 // SOURCE: https://www.w3.org/TR/webvtt1/#webvtt-cue-identifier
60 bool MaybeCueId(const std::string& line) {
61  return line.find("-->") == std::string::npos;
62 }
63 
64 // Check to see if the block is likely a style block. Style blocks are
65 // identified as any block that starts with a line that only contains
66 // "STYLE".
67 // SOURCE: https://w3c.github.io/webvtt/#styling
68 bool IsLikelyStyle(const std::string& line) {
69  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "STYLE";
70 }
71 
72 // Check to see if the block is likely a region block. Region blocks are
73 // identified as any block that starts with a line that only contains
74 // "REGION".
75 // SOURCE: https://w3c.github.io/webvtt/#webvtt-region
76 bool IsLikelyRegion(const std::string& line) {
77  return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
78 }
79 
80 void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
81  if (!config->empty())
82  *config += "\n\n";
83  *config += base::JoinString(block, "\n");
84 }
85 
86 } // namespace
87 
88 WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
89  const std::string& language)
90  : reader_(std::move(source)), language_(language) {}
91 
92 Status WebVttParser::InitializeInternal() {
93  return Status::OK;
94 }
95 
96 bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
97  // Only support one output
98  return stream_index == kStreamIndex;
99 }
100 
101 Status WebVttParser::Run() {
102  return Parse()
103  ? FlushDownstream(kStreamIndex)
104  : Status(error::INTERNAL_ERROR,
105  "Failed to parse WebVTT source. See log for details.");
106 }
107 
108 void WebVttParser::Cancel() {
109  keep_reading_ = false;
110 }
111 
112 bool WebVttParser::Parse() {
113  std::vector<std::string> block;
114  if (!reader_.Next(&block)) {
115  LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source.";
116  return false;
117  }
118 
119  // Check the header. It is possible for a 0xFEFF BOM to come before the
120  // header text.
121  if (block.size() != 1) {
122  LOG(ERROR) << "Failed to read WEBVTT header - "
123  << "block size should be 1 but was " << block.size() << ".";
124  return false;
125  }
126  if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
127  LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
128  << block[0];
129  return false;
130  }
131 
132  bool saw_cue = false;
133 
134  while (reader_.Next(&block) && keep_reading_) {
135  // NOTE
136  if (IsLikelyNote(block[0])) {
137  // We can safely ignore the whole block.
138  continue;
139  }
140 
141  // STYLE
142  if (IsLikelyStyle(block[0])) {
143  if (saw_cue) {
144  LOG(WARNING)
145  << "Found style block after seeing cue. Ignoring style block";
146  } else {
147  UpdateConfig(block, &style_region_config_);
148  }
149  continue;
150  }
151 
152  // REGION
153  if (IsLikelyRegion(block[0])) {
154  if (saw_cue) {
155  LOG(WARNING)
156  << "Found region block after seeing cue. Ignoring region block";
157  } else {
158  UpdateConfig(block, &style_region_config_);
159  }
160  continue;
161  }
162 
163  // CUE with ID
164  if (block.size() >= 2 && MaybeCueId(block[0]) &&
165  IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
166  saw_cue = true;
167  continue;
168  }
169 
170  // CUE with no ID
171  if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
172  saw_cue = true;
173  continue;
174  }
175 
176  LOG(ERROR) << "Failed to determine block classification:\n"
177  << BlockToString(block.data(), block.size());
178  return false;
179  }
180 
181  return keep_reading_;
182 }
183 
184 bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
185  const Status status = ParseCue("", block.data(), block.size());
186 
187  if (!status.ok()) {
188  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
189  }
190 
191  return status.ok();
192 }
193 
194 bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
195  const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
196 
197  if (!status.ok()) {
198  LOG(ERROR) << "Failed to parse cue: " << status.error_message();
199  }
200 
201  return status.ok();
202 }
203 
204 Status WebVttParser::ParseCue(const std::string& id,
205  const std::string* block,
206  size_t block_size) {
207  const std::vector<std::string> time_and_style = base::SplitString(
208  block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
209 
210  uint64_t start_time = 0;
211  uint64_t end_time = 0;
212 
213  const bool parsed_time =
214  time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
215  WebVttTimestampToMs(time_and_style[0], &start_time) &&
216  WebVttTimestampToMs(time_and_style[2], &end_time);
217 
218  if (!parsed_time) {
219  return Status(
220  error::INTERNAL_ERROR,
221  "Could not parse start time, -->, and end time from " + block[0]);
222  }
223 
224  if (!stream_info_dispatched_)
225  RETURN_IF_ERROR(DispatchTextStreamInfo());
226 
227  // According to the WebVTT spec end time must be greater than the start time
228  // of the cue. Since we are seeing content with invalid times in the field, we
229  // are going to drop the cue instead of failing to package.
230  //
231  // For more context see:
232  // - https://www.w3.org/TR/webvtt1/#webvtt-cue-timings
233  // - https://github.com/google/shaka-packager/issues/335
234  // - https://github.com/google/shaka-packager/issues/425
235  //
236  // Print a warning so that those packaging content can know that their
237  // content is not spec compliant.
238  if (end_time <= start_time) {
239  LOG(WARNING) << "WebVTT input is not spec compliant. Start time ("
240  << start_time << ") should be less than end time (" << end_time
241  << "). Skipping webvtt cue:"
242  << BlockToString(block, block_size);
243 
244  return Status::OK;
245  }
246 
247  std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
248  sample->set_id(id);
249  sample->SetTime(start_time, end_time);
250 
251  // The rest of time_and_style are the style tokens.
252  for (size_t i = 3; i < time_and_style.size(); i++) {
253  sample->AppendStyle(time_and_style[i]);
254  }
255 
256  // The rest of the block is the payload.
257  for (size_t i = 1; i < block_size; i++) {
258  sample->AppendPayload(block[i]);
259  }
260 
261  return DispatchTextSample(kStreamIndex, sample);
262 }
263 
264 Status WebVttParser::DispatchTextStreamInfo() {
265  stream_info_dispatched_ = true;
266 
267  const int kTrackId = 0;
268  // The resolution of timings are in milliseconds.
269  const int kTimescale = 1000;
270  // The duration passed here is not very important. Also the whole file
271  // must be read before determining the real duration which doesn't
272  // work nicely with the current demuxer.
273  const int kDuration = 0;
274  const char kWebVttCodecString[] = "wvtt";
275  const int64_t kNoWidth = 0;
276  const int64_t kNoHeight = 0;
277 
278  std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
279  kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
280  style_region_config_, kNoWidth, kNoHeight, language_);
281 
282  return DispatchStreamInfo(kStreamIndex, std::move(info));
283 }
284 } // namespace media
285 } // namespace shaka
STL namespace.
All the methods that are virtual are virtual for mocking.