DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
webvtt_media_parser.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_media_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/base/strings/string_split.h"
15 #include "packager/base/strings/string_util.h"
16 #include "packager/media/base/macros.h"
17 #include "packager/media/base/media_sample.h"
18 #include "packager/media/base/text_stream_info.h"
19 
20 namespace shaka {
21 namespace media {
22 
23 namespace {
24 
25 const bool kFlush = true;
26 
27 // There's only one track in a WebVTT file.
28 const int kTrackId = 0;
29 
30 const char kCR = 0x0D;
31 const char kLF = 0x0A;
32 
33 // Reads the first line from |data| and removes the line. Returns false if there
34 // isn't a line break. Sets |line| with the content of the first line without
35 // the line break.
36 bool ReadLine(std::string* data, std::string* line) {
37  if (data->size() == 0) {
38  return false;
39  }
40  size_t string_position = 0;
41  // Length of the line break mark. 1 for LF and CR, 2 for CRLF.
42  int line_break_length = 1;
43  bool found_line_break = false;
44  while (string_position < data->size()) {
45  if (data->at(string_position) == kLF) {
46  found_line_break = true;
47  break;
48  }
49 
50  if (data->at(string_position) == kCR) {
51  found_line_break = true;
52  if (string_position + 1 >= data->size())
53  break;
54 
55  if (data->at(string_position + 1) == kLF)
56  line_break_length = 2;
57  break;
58  }
59 
60  ++string_position;
61  }
62 
63  if (!found_line_break)
64  return false;
65 
66  *line = data->substr(0, string_position);
67  data->erase(0, string_position + line_break_length);
68  return true;
69 }
70 
71 bool TimestampToMilliseconds(const std::string& original_str,
72  uint64_t* time_ms) {
73  const size_t kMinimalHoursLength = 2;
74  const size_t kMinutesLength = 2;
75  const size_t kSecondsLength = 2;
76  const size_t kMillisecondsLength = 3;
77 
78  // +2 for a colon and a dot for splitting minutes and seconds AND seconds and
79  // milliseconds, respectively.
80  const size_t kMinimalLength =
81  kMinutesLength + kSecondsLength + kMillisecondsLength + 2;
82 
83  base::StringPiece str(original_str);
84  if (str.size() < kMinimalLength)
85  return false;
86 
87  int hours = 0;
88  int minutes = 0;
89  int seconds = 0;
90  int milliseconds = 0;
91 
92  size_t str_index = 0;
93  if (str.size() > kMinimalLength) {
94  // Check if hours is in the right format, if so get the number.
95  // -1 for excluding colon for splitting hours and minutes.
96  const size_t hours_length = str.size() - kMinimalLength - 1;
97  if (hours_length < kMinimalHoursLength)
98  return false;
99  if (!base::StringToInt(str.substr(0, hours_length), &hours))
100  return false;
101  str_index += hours_length;
102 
103  if (str[str_index] != ':')
104  return false;
105  ++str_index;
106  }
107 
108  DCHECK_EQ(str.size() - str_index, kMinimalLength);
109 
110  if (!base::StringToInt(str.substr(str_index, kMinutesLength), &minutes))
111  return false;
112  if (minutes < 0 || minutes > 60)
113  return false;
114 
115  str_index += kMinutesLength;
116  if (str[str_index] != ':')
117  return false;
118  ++str_index;
119 
120  if (!base::StringToInt(str.substr(str_index, kSecondsLength), &seconds))
121  return false;
122  if (seconds < 0 || seconds > 60)
123  return false;
124 
125  str_index += kSecondsLength;
126  if (str[str_index] != '.')
127  return false;
128  ++str_index;
129 
130  if (!base::StringToInt(str.substr(str_index, kMillisecondsLength),
131  &milliseconds)) {
132  return false;
133  }
134  str_index += kMillisecondsLength;
135 
136  if (milliseconds < 0 || milliseconds > 999)
137  return false;
138 
139  DCHECK_EQ(str.size(), str_index);
140  *time_ms = milliseconds +
141  seconds * 1000 +
142  minutes * 60 * 1000 +
143  hours * 60 * 60 * 1000;
144  return true;
145 }
146 
147 // Clears |settings| and 0s |start_time| and |duration| regardless of the
148 // parsing result.
149 bool ParseTimingAndSettingsLine(const std::string& line,
150  uint64_t* start_time,
151  uint64_t* duration,
152  std::string* settings) {
153  *start_time = 0;
154  *duration = 0;
155  settings->clear();
156  std::vector<std::string> entries = base::SplitString(
157  line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
158  if (entries.size() < 3) {
159  // The timing is time1 --> time3 so if there aren't 3 entries, this is parse
160  // error.
161  LOG(ERROR) << "Not enough tokens to be a timing " << line;
162  return false;
163  }
164 
165  if (entries[1] != "-->") {
166  LOG(ERROR) << "Cannot find an arrow at the right place " << line;
167  return false;
168  }
169 
170  const std::string& start_time_str = entries[0];
171  if (!TimestampToMilliseconds(start_time_str, start_time)) {
172  LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line;
173  return false;
174  }
175 
176  const std::string& end_time_str = entries[2];
177  uint64_t end_time = 0;
178  if (!TimestampToMilliseconds(end_time_str, &end_time)) {
179  LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line;
180  return false;
181  }
182  *duration = end_time - *start_time;
183 
184  entries.erase(entries.begin(), entries.begin() + 3);
185  *settings = base::JoinString(entries, " ");
186  return true;
187 }
188 
189 } // namespace
190 
191 WebVttMediaParser::WebVttMediaParser()
192  : state_(kHeader), sample_converter_(new WebVttSampleConverter()) {}
193 WebVttMediaParser::~WebVttMediaParser() {}
194 
195 void WebVttMediaParser::Init(const InitCB& init_cb,
196  const NewSampleCB& new_sample_cb,
197  KeySource* decryption_key_source) {
198  init_cb_ = init_cb;
199  new_sample_cb_ = new_sample_cb;
200 }
201 
203  // If not in one of these states just be ready for more data.
204  if (state_ != kCuePayload && state_ != kComment)
205  return true;
206 
207  if (!data_.empty()) {
208  // If it was in the middle of the payload and the stream finished, then this
209  // is an end of the payload. The rest of the data is part of the payload.
210  if (state_ == kCuePayload) {
211  current_cue_.payload += data_ + "\n";
212  } else {
213  current_cue_.comment += data_ + "\n";
214  }
215  data_.clear();
216  }
217 
218  if (!ProcessCurrentCue(kFlush)) {
219  state_ = kParseError;
220  return false;
221  }
222 
223  state_ = kCueIdentifierOrTimingOrComment;
224  return true;
225 }
226 
227 bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
228  if (state_ == kParseError) {
229  LOG(WARNING) << "The parser is in an error state, ignoring input.";
230  return false;
231  }
232 
233  data_.insert(data_.end(), buf, buf + size);
234 
235  std::string line;
236  while (ReadLine(&data_, &line)) {
237  // Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->.
238  // Error otherwise.
239  const bool has_arrow = line.find("-->") != std::string::npos;
240  if (state_ == kCueTiming) {
241  if (!has_arrow) {
242  LOG(ERROR) << "Expected --> in: " << line;
243  state_ = kParseError;
244  return false;
245  }
246  } else if (state_ != kCueIdentifierOrTimingOrComment) {
247  if (has_arrow) {
248  LOG(ERROR) << "Unexpected --> in " << line;
249  state_ = kParseError;
250  return false;
251  }
252  }
253 
254  switch (state_) {
255  case kHeader:
256  // No check. This should be WEBVTT when this object was created.
257  header_.push_back(line);
258  state_ = kMetadata;
259  break;
260  case kMetadata: {
261  if (line.empty()) {
262  std::vector<std::shared_ptr<StreamInfo>> streams;
263  // The resolution of timings are in milliseconds.
264  const int kTimescale = 1000;
265 
266  // The duration passed here is not very important. Also the whole file
267  // must be read before determining the real duration which doesn't
268  // work nicely with the current demuxer.
269  const int kDuration = 0;
270 
271  // There is no one metadata to determine what the language is. Parts
272  // of the text may be annotated as some specific language.
273  const char kLanguage[] = "";
274 
275  const char kWebVttCodecString[] = "wvtt";
276  streams.emplace_back(
277  new TextStreamInfo(kTrackId, kTimescale, kDuration,
278  kCodecWebVtt, kWebVttCodecString,
279  base::JoinString(header_, "\n"),
280  0, // Not necessary.
281  0,
282  kLanguage)); // Not necessary.
283 
284  init_cb_.Run(streams);
285  state_ = kCueIdentifierOrTimingOrComment;
286  break;
287  }
288 
289  header_.push_back(line);
290  break;
291  }
292  case kCueIdentifierOrTimingOrComment: {
293  // Note that there can be one or more line breaks before a cue starts;
294  // skip this line.
295  // Or the file could end without a new cue.
296  if (line.empty())
297  break;
298 
299  if (!has_arrow) {
300  if (base::StartsWith(line, "NOTE",
301  base::CompareCase::INSENSITIVE_ASCII)) {
302  state_ = kComment;
303  current_cue_.comment += line + "\n";
304  } else {
305  // A cue can start from a cue identifier.
306  // https://w3c.github.io/webvtt/#webvtt-cue-identifier
307  current_cue_.identifier = line;
308  // The next line must be a timing.
309  state_ = kCueTiming;
310  }
311  break;
312  }
313 
314  // No break statement if the line has an arrow; it should be a WebVTT
315  // timing, so fall thru. Setting state_ to kCueTiming so that the state
316  // always matches the case.
317  state_ = kCueTiming;
318  FALLTHROUGH_INTENDED;
319  }
320  case kCueTiming: {
321  DCHECK(has_arrow);
322  if (!ParseTimingAndSettingsLine(line, &current_cue_.start_time,
323  &current_cue_.duration,
324  &current_cue_.settings)) {
325  state_ = kParseError;
326  return false;
327  }
328  state_ = kCuePayload;
329  break;
330  }
331  case kCuePayload: {
332  if (line.empty()) {
333  state_ = kCueIdentifierOrTimingOrComment;
334  if (!ProcessCurrentCue(!kFlush)) {
335  state_ = kParseError;
336  return false;
337  }
338  break;
339  }
340 
341  current_cue_.payload += line + "\n";
342  break;
343  }
344  case kComment: {
345  if (line.empty()) {
346  state_ = kCueIdentifierOrTimingOrComment;
347  if (!ProcessCurrentCue(!kFlush)) {
348  state_ = kParseError;
349  return false;
350  }
351  break;
352  }
353 
354  current_cue_.comment += line + "\n";
355  break;
356  }
357  case kParseError:
358  NOTREACHED();
359  return false;
360  }
361  }
362 
363  return true;
364 }
365 
366 void WebVttMediaParser::InjectWebVttSampleConvertForTesting(
367  std::unique_ptr<WebVttSampleConverter> converter) {
368  sample_converter_ = std::move(converter);
369 }
370 
371 bool WebVttMediaParser::ProcessCurrentCue(bool flush) {
372  sample_converter_->PushCue(current_cue_);
373  current_cue_ = Cue();
374  if (flush)
375  sample_converter_->Flush();
376 
377  while (sample_converter_->ReadySamplesSize() > 0) {
378  if (!new_sample_cb_.Run(kTrackId, sample_converter_->PopSample())) {
379  LOG(ERROR) << "New sample callback failed.";
380  return false;
381  }
382  }
383  return true;
384 }
385 
386 } // namespace media
387 } // namespace shaka
void Init(const InitCB &init_cb, const NewSampleCB &new_sample_cb, KeySource *decryption_key_source) override
base::Callback< bool(uint32_t track_id, const std::shared_ptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43
bool Parse(const uint8_t *buf, int size) override WARN_UNUSED_RESULT
bool Flush() override WARN_UNUSED_RESULT
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:45