Shaka Packager SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
webvtt_media_parser.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_media_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/base/strings/string_split.h"
15 #include "packager/base/strings/string_util.h"
16 #include "packager/media/base/macros.h"
17 #include "packager/media/base/media_sample.h"
18 #include "packager/media/base/text_stream_info.h"
19 #include "packager/media/formats/webvtt/webvtt_timestamp.h"
20 
21 namespace shaka {
22 namespace media {
23 
24 namespace {
25 
26 const bool kFlush = true;
27 
28 // There's only one track in a WebVTT file.
29 const int kTrackId = 0;
30 
31 const char kCR = 0x0D;
32 const char kLF = 0x0A;
33 
34 // Reads the first line from |data| and removes the line. Returns false if there
35 // isn't a line break. Sets |line| with the content of the first line without
36 // the line break.
37 bool ReadLine(std::string* data, std::string* line) {
38  if (data->size() == 0) {
39  return false;
40  }
41  size_t string_position = 0;
42  // Length of the line break mark. 1 for LF and CR, 2 for CRLF.
43  int line_break_length = 1;
44  bool found_line_break = false;
45  while (string_position < data->size()) {
46  if (data->at(string_position) == kLF) {
47  found_line_break = true;
48  break;
49  }
50 
51  if (data->at(string_position) == kCR) {
52  found_line_break = true;
53  if (string_position + 1 >= data->size())
54  break;
55 
56  if (data->at(string_position + 1) == kLF)
57  line_break_length = 2;
58  break;
59  }
60 
61  ++string_position;
62  }
63 
64  if (!found_line_break)
65  return false;
66 
67  *line = data->substr(0, string_position);
68  data->erase(0, string_position + line_break_length);
69  return true;
70 }
71 
72 // Clears |settings| and 0s |start_time| and |duration| regardless of the
73 // parsing result.
74 bool ParseTimingAndSettingsLine(const std::string& line,
75  uint64_t* start_time,
76  uint64_t* duration,
77  std::string* settings) {
78  *start_time = 0;
79  *duration = 0;
80  settings->clear();
81  std::vector<std::string> entries = base::SplitString(
82  line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
83  if (entries.size() < 3) {
84  // The timing is time1 --> time3 so if there aren't 3 entries, this is parse
85  // error.
86  LOG(ERROR) << "Not enough tokens to be a timing " << line;
87  return false;
88  }
89 
90  if (entries[1] != "-->") {
91  LOG(ERROR) << "Cannot find an arrow at the right place " << line;
92  return false;
93  }
94 
95  const std::string& start_time_str = entries[0];
96  if (!WebVttTimestampToMs(start_time_str, start_time)) {
97  LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line;
98  return false;
99  }
100 
101  const std::string& end_time_str = entries[2];
102  uint64_t end_time = 0;
103  if (!WebVttTimestampToMs(end_time_str, &end_time)) {
104  LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line;
105  return false;
106  }
107  *duration = end_time - *start_time;
108 
109  entries.erase(entries.begin(), entries.begin() + 3);
110  *settings = base::JoinString(entries, " ");
111  return true;
112 }
113 
114 } // namespace
115 
116 WebVttMediaParser::WebVttMediaParser()
117  : state_(kHeader), sample_converter_(new WebVttSampleConverter()) {}
118 WebVttMediaParser::~WebVttMediaParser() {}
119 
120 void WebVttMediaParser::Init(const InitCB& init_cb,
121  const NewSampleCB& new_sample_cb,
122  KeySource* decryption_key_source) {
123  init_cb_ = init_cb;
124  new_sample_cb_ = new_sample_cb;
125 }
126 
128  // If not in one of these states just be ready for more data.
129  if (state_ != kCuePayload && state_ != kComment)
130  return true;
131 
132  if (!data_.empty()) {
133  // If it was in the middle of the payload and the stream finished, then this
134  // is an end of the payload. The rest of the data is part of the payload.
135  if (state_ == kCuePayload) {
136  current_cue_.payload += data_ + "\n";
137  } else {
138  current_cue_.comment += data_ + "\n";
139  }
140  data_.clear();
141  }
142 
143  if (!ProcessCurrentCue(kFlush)) {
144  state_ = kParseError;
145  return false;
146  }
147 
148  state_ = kCueIdentifierOrTimingOrComment;
149  return true;
150 }
151 
152 bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
153  if (state_ == kParseError) {
154  LOG(WARNING) << "The parser is in an error state, ignoring input.";
155  return false;
156  }
157 
158  data_.insert(data_.end(), buf, buf + size);
159 
160  std::string line;
161  while (ReadLine(&data_, &line)) {
162  // Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->.
163  // Error otherwise.
164  const bool has_arrow = line.find("-->") != std::string::npos;
165  if (state_ == kCueTiming) {
166  if (!has_arrow) {
167  LOG(ERROR) << "Expected --> in: " << line;
168  state_ = kParseError;
169  return false;
170  }
171  } else if (state_ != kCueIdentifierOrTimingOrComment) {
172  if (has_arrow) {
173  LOG(ERROR) << "Unexpected --> in " << line;
174  state_ = kParseError;
175  return false;
176  }
177  }
178 
179  switch (state_) {
180  case kHeader:
181  // No check. This should be WEBVTT when this object was created.
182  header_.push_back(line);
183  state_ = kMetadata;
184  break;
185  case kMetadata: {
186  if (line.empty()) {
187  std::vector<std::shared_ptr<StreamInfo>> streams;
188  // The resolution of timings are in milliseconds.
189  const int kTimescale = 1000;
190 
191  // The duration passed here is not very important. Also the whole file
192  // must be read before determining the real duration which doesn't
193  // work nicely with the current demuxer.
194  const int kDuration = 0;
195 
196  // There is no one metadata to determine what the language is. Parts
197  // of the text may be annotated as some specific language.
198  const char kLanguage[] = "";
199 
200  const char kWebVttCodecString[] = "wvtt";
201  streams.emplace_back(
202  new TextStreamInfo(kTrackId, kTimescale, kDuration,
203  kCodecWebVtt, kWebVttCodecString,
204  base::JoinString(header_, "\n"),
205  0, // Not necessary.
206  0,
207  kLanguage)); // Not necessary.
208 
209  init_cb_.Run(streams);
210  state_ = kCueIdentifierOrTimingOrComment;
211  break;
212  }
213 
214  header_.push_back(line);
215  break;
216  }
217  case kCueIdentifierOrTimingOrComment: {
218  // Note that there can be one or more line breaks before a cue starts;
219  // skip this line.
220  // Or the file could end without a new cue.
221  if (line.empty())
222  break;
223 
224  if (!has_arrow) {
225  if (base::StartsWith(line, "NOTE",
226  base::CompareCase::INSENSITIVE_ASCII)) {
227  state_ = kComment;
228  current_cue_.comment += line + "\n";
229  } else {
230  // A cue can start from a cue identifier.
231  // https://w3c.github.io/webvtt/#webvtt-cue-identifier
232  current_cue_.identifier = line;
233  // The next line must be a timing.
234  state_ = kCueTiming;
235  }
236  break;
237  }
238 
239  // No break statement if the line has an arrow; it should be a WebVTT
240  // timing, so fall thru. Setting state_ to kCueTiming so that the state
241  // always matches the case.
242  state_ = kCueTiming;
243  FALLTHROUGH_INTENDED;
244  }
245  case kCueTiming: {
246  DCHECK(has_arrow);
247  if (!ParseTimingAndSettingsLine(line, &current_cue_.start_time,
248  &current_cue_.duration,
249  &current_cue_.settings)) {
250  state_ = kParseError;
251  return false;
252  }
253  state_ = kCuePayload;
254  break;
255  }
256  case kCuePayload: {
257  if (line.empty()) {
258  state_ = kCueIdentifierOrTimingOrComment;
259  if (!ProcessCurrentCue(!kFlush)) {
260  state_ = kParseError;
261  return false;
262  }
263  break;
264  }
265 
266  current_cue_.payload += line + "\n";
267  break;
268  }
269  case kComment: {
270  if (line.empty()) {
271  state_ = kCueIdentifierOrTimingOrComment;
272  if (!ProcessCurrentCue(!kFlush)) {
273  state_ = kParseError;
274  return false;
275  }
276  break;
277  }
278 
279  current_cue_.comment += line + "\n";
280  break;
281  }
282  case kParseError:
283  NOTREACHED();
284  return false;
285  }
286  }
287 
288  return true;
289 }
290 
291 void WebVttMediaParser::InjectWebVttSampleConvertForTesting(
292  std::unique_ptr<WebVttSampleConverter> converter) {
293  sample_converter_ = std::move(converter);
294 }
295 
296 bool WebVttMediaParser::ProcessCurrentCue(bool flush) {
297  sample_converter_->PushCue(current_cue_);
298  current_cue_ = Cue();
299  if (flush)
300  sample_converter_->Flush();
301 
302  while (sample_converter_->ReadySamplesSize() > 0) {
303  if (!new_sample_cb_.Run(kTrackId, sample_converter_->PopSample())) {
304  LOG(ERROR) << "New sample callback failed.";
305  return false;
306  }
307  }
308  return true;
309 }
310 
311 } // namespace media
312 } // namespace shaka
void Init(const InitCB &init_cb, const NewSampleCB &new_sample_cb, KeySource *decryption_key_source) override
base::Callback< bool(uint32_t track_id, const std::shared_ptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43
bool Parse(const uint8_t *buf, int size) override WARN_UNUSED_RESULT
bool Flush() override WARN_UNUSED_RESULT
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:45