DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
webvtt_media_parser.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_media_parser.h"
8 
9 #include <string>
10 #include <vector>
11 
12 #include "packager/base/logging.h"
13 #include "packager/base/strings/string_number_conversions.h"
14 #include "packager/base/strings/string_split.h"
15 #include "packager/base/strings/string_util.h"
16 #include "packager/media/base/media_sample.h"
17 #include "packager/media/base/text_stream_info.h"
18 
19 namespace edash_packager {
20 namespace media {
21 
22 namespace {
23 
24 // There's only one track in a WebVTT file.
25 const int kTrackId = 0;
26 
27 const char kCR = 0x0D;
28 const char kLF = 0x0A;
29 
30 // Reads the first line from |data| and removes the line. Returns false if there
31 // isn't a line break. Sets |line| with the content of the first line without
32 // the line break.
33 bool ReadLine(std::string* data, std::string* line) {
34  if (data->size() == 0) {
35  return false;
36  }
37  size_t string_position = 0;
38  // Length of the line break mark. 1 for LF and CR, 2 for CRLF.
39  int line_break_length = 1;
40  bool found_line_break = false;
41  while (string_position < data->size()) {
42  if (data->at(string_position) == kLF) {
43  found_line_break = true;
44  break;
45  }
46 
47  if (data->at(string_position) == kCR) {
48  found_line_break = true;
49  if (string_position + 1 >= data->size())
50  break;
51 
52  if (data->at(string_position + 1) == kLF)
53  line_break_length = 2;
54  break;
55  }
56 
57  ++string_position;
58  }
59 
60  if (!found_line_break)
61  return false;
62 
63  *line = data->substr(0, string_position);
64  data->erase(0, string_position + line_break_length);
65  return true;
66 }
67 
68 bool TimestampToMilliseconds(const std::string& original_str,
69  uint64_t* time_ms) {
70  const size_t kMinutesLength = 2;
71  const size_t kSecondsLength = 2;
72  const size_t kMillisecondsLength = 3;
73 
74  // +2 for a colon and a dot for splitting minutes and seconds AND seconds and
75  // milliseconds, respectively.
76  const size_t kMinimalLength =
77  kMinutesLength + kSecondsLength + kMillisecondsLength + 2;
78 
79  base::StringPiece str(original_str);
80  if (str.size() < kMinimalLength)
81  return false;
82 
83  int hours = 0;
84  int minutes = 0;
85  int seconds = 0;
86  int milliseconds = 0;
87 
88  size_t str_index = 0;
89  if (str.size() > kMinimalLength) {
90  // Check if hours is in the right format, if so get the number.
91  // -1 for excluding colon for splitting hours and minutes.
92  const size_t hours_length = str.size() - kMinimalLength - 1;
93  if (!base::StringToInt(str.substr(0, hours_length), &hours))
94  return false;
95  str_index += hours_length;
96 
97  if (str[str_index] != ':')
98  return false;
99  ++str_index;
100  }
101 
102  DCHECK_EQ(str.size() - str_index, kMinimalLength);
103 
104  if (!base::StringToInt(str.substr(str_index, kMinutesLength), &minutes))
105  return false;
106  if (minutes < 0 || minutes > 60)
107  return false;
108 
109  str_index += kMinutesLength;
110  if (str[str_index] != ':')
111  return false;
112  ++str_index;
113 
114  if (!base::StringToInt(str.substr(str_index, kSecondsLength), &seconds))
115  return false;
116  if (seconds < 0 || seconds > 60)
117  return false;
118 
119  str_index += kSecondsLength;
120  if (str[str_index] != '.')
121  return false;
122  ++str_index;
123 
124  if (!base::StringToInt(str.substr(str_index, kMillisecondsLength),
125  &milliseconds)) {
126  return false;
127  }
128  str_index += kMillisecondsLength;
129 
130  if (milliseconds < 0 || milliseconds > 999)
131  return false;
132 
133  DCHECK_EQ(str.size(), str_index);
134  *time_ms = milliseconds +
135  seconds * 1000 +
136  minutes * 60 * 1000 +
137  hours * 60 * 60 * 1000;
138  return true;
139 }
140 
141 // Clears |settings| and 0s |start_time| and |duration| regardless of the
142 // parsing result.
143 bool ParseTimingAndSettingsLine(const std::string& line,
144  uint64_t* start_time,
145  uint64_t* duration,
146  std::string* settings) {
147  *start_time = 0;
148  *duration = 0;
149  settings->clear();
150  std::vector<std::string> entries = base::SplitString(
151  line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
152  if (entries.size() < 3) {
153  // The timing is time1 --> time3 so if there aren't 3 entries, this is parse
154  // error.
155  LOG(ERROR) << "Not enough tokens to be a timing " << line;
156  return false;
157  }
158 
159  if (entries[1] != "-->") {
160  LOG(ERROR) << "Cannot find an arrow at the right place " << line;
161  return false;
162  }
163 
164  const std::string& start_time_str = entries[0];
165  if (!TimestampToMilliseconds(start_time_str, start_time)) {
166  LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line;
167  return false;
168  }
169 
170  const std::string& end_time_str = entries[2];
171  uint64_t end_time = 0;
172  if (!TimestampToMilliseconds(end_time_str, &end_time)) {
173  LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line;
174  return false;
175  }
176  *duration = end_time - *start_time;
177 
178  entries.erase(entries.begin(), entries.begin() + 3);
179  *settings = base::JoinString(entries, " ");
180  return true;
181 }
182 
183 // Mapping:
184 // comment --> side data (and side data only sample)
185 // settings --> side data
186 // start_time --> pts
187 scoped_refptr<MediaSample> CueToMediaSample(const Cue& cue) {
188  const bool kKeyFrame = true;
189  if (!cue.comment.empty()) {
190  const std::string comment = base::JoinString(cue.comment, "\n");
192  reinterpret_cast<const uint8_t*>(comment.data()), comment.size());
193  }
194 
195  const std::string payload = base::JoinString(cue.payload, "\n");
196  scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
197  reinterpret_cast<const uint8_t*>(payload.data()),
198  payload.size(),
199  reinterpret_cast<const uint8_t*>(cue.settings.data()),
200  cue.settings.size(),
201  !kKeyFrame);
202 
203  media_sample->set_config_id(cue.identifier);
204  media_sample->set_pts(cue.start_time);
205  media_sample->set_duration(cue.duration);
206  return media_sample;
207 }
208 
209 } // namespace
210 
211 Cue::Cue() : start_time(0), duration(0) {}
212 Cue::~Cue() {}
213 
214 WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {}
215 WebVttMediaParser::~WebVttMediaParser() {}
216 
217 void WebVttMediaParser::Init(const InitCB& init_cb,
218  const NewSampleCB& new_sample_cb,
219  KeySource* decryption_key_source) {
220  init_cb_ = init_cb;
221  new_sample_cb_ = new_sample_cb;
222 }
223 
225  // If not in one of these states just be ready for more data.
226  if (state_ != kCuePayload && state_ != kComment)
227  return;
228 
229  if (!data_.empty()) {
230  // If it was in the middle of the payload and the stream finished, then this
231  // is an end of the payload. The rest of the data is part of the payload.
232  if (state_ == kCuePayload) {
233  current_cue_.payload.push_back(data_);
234  } else {
235  current_cue_.comment.push_back(data_);
236  }
237  data_.clear();
238  }
239 
240  new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_));
241  current_cue_ = Cue();
242  state_ = kCueIdentifierOrTimingOrComment;
243 }
244 
245 bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
246  if (state_ == kParseError) {
247  LOG(WARNING) << "The parser is in an error state, ignoring input.";
248  return false;
249  }
250 
251  data_.insert(data_.end(), buf, buf + size);
252 
253  std::string line;
254  while (ReadLine(&data_, &line)) {
255  // Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->.
256  // Error otherwise.
257  const bool has_arrow = line.find("-->") != std::string::npos;
258  if (state_ == kCueTiming) {
259  if (!has_arrow) {
260  LOG(ERROR) << "Expected --> in: " << line;
261  state_ = kParseError;
262  return false;
263  }
264  } else if (state_ != kCueIdentifierOrTimingOrComment) {
265  if (has_arrow) {
266  LOG(ERROR) << "Unexpected --> in " << line;
267  state_ = kParseError;
268  return false;
269  }
270  }
271 
272  switch (state_) {
273  case kHeader:
274  // No check. This should be WEBVTT when this object was created.
275  header_.push_back(line);
276  state_ = kMetadata;
277  break;
278  case kMetadata: {
279  if (line.empty()) {
280  std::vector<scoped_refptr<StreamInfo> > streams;
281  // The resolution of timings are in milliseconds.
282  const int kTimescale = 1000;
283 
284  // The duration passed here is not very important. Also the whole file
285  // must be read before determining the real duration which doesn't
286  // work nicely with the current demuxer.
287  const int kDuration = 0;
288 
289  // There is no one metadata to determine what the language is. Parts
290  // of the text may be annotated as some specific language.
291  const char kLanguage[] = "";
292  streams.push_back(new TextStreamInfo(
293  kTrackId,
294  kTimescale,
295  kDuration,
296  "wvtt",
297  kLanguage,
298  base::JoinString(header_, "\n"),
299  0, // Not necessary.
300  0)); // Not necessary.
301 
302  init_cb_.Run(streams);
303  state_ = kCueIdentifierOrTimingOrComment;
304  break;
305  }
306 
307  header_.push_back(line);
308  break;
309  }
310  case kCueIdentifierOrTimingOrComment: {
311  // Note that there can be one or more line breaks before a cue starts;
312  // skip this line.
313  // Or the file could end without a new cue.
314  if (line.empty())
315  break;
316 
317  if (!has_arrow) {
318  if (base::StartsWith(line, "NOTE",
319  base::CompareCase::INSENSITIVE_ASCII)) {
320  state_ = kComment;
321  current_cue_.comment.push_back(line);
322  } else {
323  // A cue can start from a cue identifier.
324  // https://w3c.github.io/webvtt/#webvtt-cue-identifier
325  current_cue_.identifier = line;
326  // The next line must be a timing.
327  state_ = kCueTiming;
328  }
329  break;
330  }
331 
332  // No break statement if the line has an arrow; it should be a WebVTT
333  // timing, so fall thru. Setting state_ to kCueTiming so that the state
334  // always matches the case.
335  state_ = kCueTiming;
336  }
337  case kCueTiming: {
338  DCHECK(has_arrow);
339  if (!ParseTimingAndSettingsLine(line, &current_cue_.start_time,
340  &current_cue_.duration,
341  &current_cue_.settings)) {
342  state_ = kParseError;
343  return false;
344  }
345  state_ = kCuePayload;
346  break;
347  }
348  case kCuePayload: {
349  if (line.empty()) {
350  state_ = kCueIdentifierOrTimingOrComment;
351  new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_));
352  current_cue_ = Cue();
353  break;
354  }
355 
356  current_cue_.payload.push_back(line);
357  break;
358  }
359  case kComment: {
360  if (line.empty()) {
361  state_ = kCueIdentifierOrTimingOrComment;
362  new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_));
363  current_cue_ = Cue();
364  break;
365  }
366 
367  current_cue_.comment.push_back(line);
368  break;
369  }
370  case kParseError:
371  NOTREACHED();
372  return false;
373  }
374  }
375 
376  return true;
377 }
378 
379 } // namespace media
380 } // namespace edash_packager
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45
bool Parse(const uint8_t *buf, int size) override
base::Callback< bool(uint32_t track_id, const scoped_refptr< MediaSample > &media_sample)> NewSampleCB
Definition: media_parser.h:43
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:29
static scoped_refptr< MediaSample > FromMetadata(const uint8_t *metadata, size_t metadata_size)
Definition: media_sample.cc:67
void Init(const InitCB &init_cb, const NewSampleCB &new_sample_cb, KeySource *decryption_key_source) override