Shaka Packager SDK
webvtt_parser.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_split.h"
14 #include "packager/base/strings/string_util.h"
15 #include "packager/media/base/text_stream_info.h"
16 #include "packager/media/formats/webvtt/webvtt_timestamp.h"
17 
18 namespace shaka {
19 namespace media {
20 namespace {
21 
22 // Comments are just blocks that are preceded by a blank line, start with the
23 // word "NOTE" (followed by a space or newline), and end at the first blank
24 // line.
25 // SOURCE: https://www.w3.org/TR/webvtt1
26 bool IsLikelyNote(const std::string& line) {
27  return line == "NOTE" ||
28  base::StartsWith(line, "NOTE ", base::CompareCase::SENSITIVE) ||
29  base::StartsWith(line, "NOTE\t", base::CompareCase::SENSITIVE);
30 }
31 
32 // As cue time is the only part of a WEBVTT file that is allowed to have
33 // "-->" appear, then if the given line contains it, we can safely assume
34 // that the line is likely to be a cue time.
35 bool IsLikelyCueTiming(const std::string& line) {
36  return line.find("-->") != std::string::npos;
37 }
38 
39 // A WebVTT cue identifier is any sequence of one or more characters not
40 // containing the substring "-->" (U+002D HYPHEN-MINUS, U+002D HYPHEN-MINUS,
41 // U+003E GREATER-THAN SIGN), nor containing any U+000A LINE FEED (LF)
42 // characters or U+000D CARRIAGE RETURN (CR) characters.
43 // SOURCE: https://www.w3.org/TR/webvtt1/#webvtt-cue-identifier
44 bool MaybeCueId(const std::string& line) {
45  return line.find("-->") == std::string::npos;
46 }
47 } // namespace
48 
49 WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
50  const std::string& language)
51  : reader_(std::move(source)), language_(language) {}
52 
53 Status WebVttParser::InitializeInternal() {
54  return Status::OK;
55 }
56 
57 bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
58  // Only support one output
59  return stream_index == 0;
60 }
61 
62 Status WebVttParser::Run() {
63  return Parse()
64  ? FlushDownstream(0)
65  : Status(error::INTERNAL_ERROR,
66  "Failed to parse WebVTT source. See log for details.");
67 }
68 
69 void WebVttParser::Cancel() {
70  keep_reading_ = false;
71 }
72 
73 bool WebVttParser::Parse() {
74  std::vector<std::string> block;
75  if (!reader_.Next(&block)) {
76  LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source.";
77  return false;
78  }
79 
80  // Check the header. It is possible for a 0xFEFF BOM to come before the
81  // header text.
82  if (block.size() != 1) {
83  LOG(ERROR) << "Failed to read WEBVTT header - "
84  << "block size should be 1 but was " << block.size() << ".";
85  return false;
86  }
87  if (block[0] != "WEBVTT" && block[0] != "\xFE\xFFWEBVTT") {
88  LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
89  << block[0];
90  return false;
91  }
92 
93  const Status send_stream_info_result = DispatchTextStreamInfo();
94 
95  if (send_stream_info_result != Status::OK) {
96  LOG(ERROR) << "Failed to send stream info down stream:"
97  << send_stream_info_result.error_message();
98  return false;
99  }
100 
101  while (reader_.Next(&block) && keep_reading_) {
102  // NOTE
103  if (IsLikelyNote(block[0])) {
104  // We can safely ignore the whole block.
105  continue;
106  }
107 
108  // CUE with ID
109  if (block.size() > 2 && MaybeCueId(block[0]) &&
110  IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
111  continue;
112  }
113 
114  // CUE with no ID
115  if (block.size() > 1 && IsLikelyCueTiming(block[0]) &&
116  ParseCueWithNoId(block)) {
117  continue;
118  }
119 
120  LOG(ERROR) << "Failed to determine block classification:";
121  LOG(ERROR) << " --- BLOCK START ---";
122  for (const std::string& line : block) {
123  LOG(ERROR) << " " << line;
124  }
125  LOG(ERROR) << " --- BLOCK END ---";
126  return false;
127  }
128 
129  return keep_reading_;
130 }
131 
132 bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
133  return ParseCue("", block.data(), block.size());
134 }
135 
136 bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
137  return ParseCue(block[0], block.data() + 1, block.size() - 1);
138 }
139 
140 bool WebVttParser::ParseCue(const std::string& id,
141  const std::string* block,
142  size_t block_size) {
143  std::shared_ptr<TextSample> sample(new TextSample);
144  sample->set_id(id);
145 
146  const std::vector<std::string> time_and_style = base::SplitString(
147  block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
148 
149  uint64_t start_time;
150  uint64_t end_time;
151  if (time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
152  WebVttTimestampToMs(time_and_style[0], &start_time) &&
153  WebVttTimestampToMs(time_and_style[2], &end_time)) {
154  sample->SetTime(start_time, end_time);
155  } else {
156  LOG(ERROR) << "Could not parse start time, -->, and end time from "
157  << block[0];
158  return false;
159  }
160 
161  // The rest of time_and_style are the style tokens.
162  for (size_t i = 3; i < time_and_style.size(); i++) {
163  sample->AppendStyle(time_and_style[i]);
164  }
165 
166  // The rest of the block is the payload.
167  for (size_t i = 1; i < block_size; i++) {
168  sample->AppendPayload(block[i]);
169  }
170 
171  const Status send_result = DispatchTextSample(0, sample);
172 
173  if (send_result != Status::OK) {
174  LOG(ERROR) << "Failed to send text sample down stream:"
175  << send_result.error_message();
176  }
177 
178  return send_result == Status::OK;
179 }
180 
181 Status WebVttParser::DispatchTextStreamInfo() {
182  // The resolution of timings are in milliseconds.
183  const int kTimescale = 1000;
184 
185  // The duration passed here is not very important. Also the whole file
186  // must be read before determining the real duration which doesn't
187  // work nicely with the current demuxer.
188  const int kDuration = 0;
189 
190  const char kWebVttCodecString[] = "wvtt";
191 
192  StreamInfo* info = new TextStreamInfo(0, kTimescale, kDuration, kCodecWebVtt,
193  kWebVttCodecString, "",
194  0, // width
195  0, // height
196  language_);
197 
198  return DispatchStreamInfo(0, std::shared_ptr<StreamInfo>(info));
199 }
200 } // namespace media
201 } // namespace shaka
STL namespace.
All the methods that are virtual are virtual for mocking.
Status DispatchTextSample(size_t stream_index, std::shared_ptr< const TextSample > text_sample)
Dispatch the text sample to downsream handlers.
Status FlushDownstream(size_t output_stream_index)
Flush the downstream connected at the specified output stream index.
Status DispatchStreamInfo(size_t stream_index, std::shared_ptr< const StreamInfo > stream_info)
Dispatch the stream info to downstream handlers.