DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
webvtt_sample_converter.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_sample_converter.h"
8 
9 #include <algorithm>
10 #include <string>
11 
12 #include "packager/base/strings/string_util.h"
13 #include "packager/base/strings/stringprintf.h"
14 #include "packager/media/base/buffer_writer.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/formats/mp4/box_buffer.h"
17 #include "packager/media/formats/mp4/box_definitions.h"
18 
19 namespace shaka {
20 namespace media {
21 
22 namespace {
23 
24 std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
25  uint64_t end_time) {
26  DCHECK_GT(end_time, start_time);
27  mp4::VTTEmptyCueBox empty_cue_box;
28 
29  std::vector<uint8_t> serialized;
30  AppendBoxToVector(&empty_cue_box, &serialized);
31 
32  std::shared_ptr<MediaSample> empty_cue_sample = MediaSample::CopyFrom(
33  serialized.data(), serialized.size(), false);
34  empty_cue_sample->set_pts(start_time);
35  empty_cue_sample->set_duration(end_time - start_time);
36  return empty_cue_sample;
37 }
38 
39 void StripTrailingNewlines(const std::string& input, std::string* output) {
40  const size_t found = input.find_last_not_of('\n');
41  if (found != std::string::npos) {
42  *output = input.substr(0, found + 1);
43  } else {
44  *output = input;
45  }
46 }
47 
48 mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
49  mp4::VTTCueBox cue_box;
50  if (!cue.identifier.empty()) {
51  cue_box.cue_id.cue_id = cue.identifier;
52  }
53 
54  if (!cue.settings.empty()) {
55  cue_box.cue_settings.settings = cue.settings;
56  }
57 
58  StripTrailingNewlines(cue.payload, &cue_box.cue_payload.cue_text);
59  return cue_box;
60 }
61 
62 std::string TimeToWebVttTimeStamp(uint64_t time_in_ms) {
63  const int milliseconds = time_in_ms % 1000;
64  const uint64_t seconds_left = time_in_ms / 1000;
65  const int seconds = seconds_left % 60;
66  const uint64_t minutes_left = seconds_left / 60;
67  const int minutes = minutes_left % 60;
68  const int hours = minutes_left / 60;
69 
70  return base::StringPrintf("%02d:%02d:%02d.%03d", hours, minutes, seconds,
71  milliseconds);
72 }
73 
74 std::shared_ptr<MediaSample> CreateVTTCueBoxesSample(
75  const std::list<const Cue*>& cues,
76  uint64_t start_time,
77  uint64_t end_time) {
78  // TODO(rkuroiwa): Source IDs must be assigned to the cues and the same cue
79  // should have the same ID in different samples. Probably requires a mapping
80  // from cues to IDs.
81  CHECK(!cues.empty());
82 
83  std::vector<uint8_t> data;
84  std::string cue_current_time = TimeToWebVttTimeStamp(start_time);
85 
86  BufferWriter writer;
87  for (const Cue* cue : cues) {
88  mp4::VTTCueBox cue_box = CueBoxFromCue(*cue);
89  // If there is internal timing, i.e. WebVTT cue timestamp, then
90  // cue_current_time should be populated
91  // "which gives the VTT timestamp associated with the start time of sample."
92  // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
93  // internal timestamp in the payload to set CueTimeBox.cue_current_time.
94  cue_box.Write(&writer);
95  }
96 
97  std::shared_ptr<MediaSample> sample =
98  MediaSample::CopyFrom(writer.Buffer(), writer.Size(), false);
99  sample->set_pts(start_time);
100  sample->set_duration(end_time - start_time);
101  return sample;
102 }
103 
104 // This function returns the minimum of cue_start_time, cue_end_time,
105 // current_minimum should be bigger than sweep_line.
106 uint64_t GetMinimumPastSweepLine(uint64_t cue_start_time,
107  uint64_t cue_end_time,
108  uint64_t sweep_line,
109  uint64_t current_minimum) {
110  DCHECK_GE(current_minimum, sweep_line);
111  if (cue_end_time <= sweep_line)
112  return current_minimum;
113 
114  // Anything below is cue_end_time > sweep_line.
115  if (cue_start_time > sweep_line) {
116  // The start time of this cue is past the sweepline, return the min.
117  return std::min(cue_start_time, current_minimum);
118  } else {
119  // The sweep line is at the start or in the middle of a cue.
120  return std::min(cue_end_time, current_minimum);
121  }
122 }
123 
124 } // namespace
125 
126 void AppendBoxToVector(mp4::Box* box, std::vector<uint8_t>* output_vector) {
127  BufferWriter writer;
128  box->Write(&writer);
129  output_vector->insert(output_vector->end(),
130  writer.Buffer(),
131  writer.Buffer() + writer.Size());
132 }
133 
134 WebVttSampleConverter::WebVttSampleConverter() : next_cue_start_time_(0u) {}
135 WebVttSampleConverter::~WebVttSampleConverter() {}
136 
137 // Note that this |sample| is either a cue or a comment. It does not have any
138 // info on whether the next cue is overlapping or not.
140  if (!cue.comment.empty()) {
141  // A comment. Put it in the buffer and skip.
143  StripTrailingNewlines(cue.comment, &comment.cue_additional_text);
144  additional_texts_.push_back(comment);
145  // TODO(rkuriowa): Handle comments as samples.
146 
147  return;
148  }
149 
150  cues_.push_back(cue);
151  if (cues_.size() == 1) {
152  // Cannot make a decision with just one sample. Cache it and wait for
153  // another one.
154  next_cue_start_time_ = cues_.front().start_time;
155  return;
156  }
157 
158  CHECK_GE(cues_.size(), 2u);
159  // TODO(rkuroiwa): This isn't wrong but all the cues where
160  // endtime < latest cue start time
161  // can be processed. Change the logic so that if there are cues that meet the
162  // condition above, create samples immediately and remove them.
163  // Note: This doesn't mean that all the cues can be removed, just the ones
164  // that meet the condition.
165  bool processed_cues = HandleAllCuesButLatest();
166  if (!processed_cues)
167  return;
168 
169  // Remove all the cues except the latest one.
170  auto erase_last_iterator = --cues_.end();
171  cues_.erase(cues_.begin(), erase_last_iterator);
172 }
173 
175  if (cues_.empty())
176  return;
177  if (cues_.size() == 1) {
178  std::list<const Cue*> temp_list;
179  temp_list.push_back(&cues_.front());
180  CHECK_EQ(next_cue_start_time_, cues_.front().start_time);
181  ready_samples_.push_back(CreateVTTCueBoxesSample(
182  temp_list,
183  next_cue_start_time_,
184  cues_.front().start_time + cues_.front().duration));
185  cues_.clear();
186  return;
187  }
188 
189  bool processed_cue = HandleAllCues();
190  CHECK(processed_cue)
191  << "No cues were processed but the cues should have been flushed.";
192  cues_.clear();
193 }
194 
196  return ready_samples_.size();
197 }
198 
199 std::shared_ptr<MediaSample> WebVttSampleConverter::PopSample() {
200  CHECK(!ready_samples_.empty());
201  std::shared_ptr<MediaSample> ret = ready_samples_.front();
202  ready_samples_.pop_front();
203  return ret;
204 }
205 
206 // TODO(rkuroiwa): Some samples may be ready. Example:
207 // Cues:
208 // |--------- 1 ---------|
209 // |-- 2 --|
210 // |-- 3 --|
211 //
212 // Samples:
213 // |A| B | C |
214 // Samples A, B, and C can be created when Cue 3 is pushed.
215 // Change algorithm to create A,B,C samples right away.
216 // Note that this requires change to the caller on which cues
217 // to remove.
218 bool WebVttSampleConverter::HandleAllCuesButLatest() {
219  DCHECK_GE(cues_.size(), 2u);
220  const Cue& latest_cue = cues_.back();
221 
222  // Don't process the cues until the latest cue doesn't overlap with all the
223  // previous cues.
224  uint64_t max_cue_end_time = 0; // Not including the latest.
225  auto latest_cue_it = --cues_.end();
226  for (auto cue_it = cues_.begin(); cue_it != latest_cue_it; ++cue_it) {
227  const Cue& cue = *cue_it;
228  const uint64_t cue_end_time = cue.start_time + cue.duration;
229  if (cue_end_time > latest_cue.start_time)
230  return false;
231 
232  if (max_cue_end_time < cue_end_time)
233  max_cue_end_time = cue_end_time;
234  }
235  // Reaching here means that the latest cue does not overlap with all
236  // the previous cues.
237 
238  // Because sweep_stop_time is assigned to next_cue_start_time_ it is not
239  // set to latest_cue.start_time here; there may be a gap between
240  // latest_cue.start_time and previous_cue_end_time.
241  // The correctness of SweepCues() doesn't change whether the sweep stops
242  // right before the latest cue or right before the gap.
243  const uint64_t sweep_stop_time = max_cue_end_time;
244  const uint64_t sweep_line_start = cues_.front().start_time;
245  bool processed_cues =
246  SweepCues(sweep_line_start, sweep_stop_time);
247  next_cue_start_time_ = sweep_stop_time;
248  if (next_cue_start_time_ < latest_cue.start_time) {
249  ready_samples_.push_back(CreateEmptyCueSample(next_cue_start_time_,
250  latest_cue.start_time));
251  next_cue_start_time_ = latest_cue.start_time;
252  }
253  return processed_cues;
254 }
255 
256 bool WebVttSampleConverter::HandleAllCues() {
257  uint64_t latest_time = 0u;
258  for (const Cue& cue : cues_) {
259  if (cue.start_time + cue.duration > latest_time)
260  latest_time = cue.start_time + cue.duration;
261  }
262  const uint64_t sweep_line_start = cues_.front().start_time;
263  const uint64_t sweep_stop_time = latest_time;
264  bool processed = SweepCues(sweep_line_start, sweep_stop_time);
265  next_cue_start_time_ = sweep_stop_time;
266  return processed;
267 }
268 
269 bool WebVttSampleConverter::SweepCues(uint64_t sweep_line,
270  uint64_t sweep_stop_time) {
271  bool processed_cues = false;
272  // This is a sweep line algorithm. For every iteration, it determines active
273  // cues and makes a sample.
274  // At the end of an interation |next_start_time| should be set to the minimum
275  // of all the start and end times of the cues that is after |sweep_line|.
276  // |sweep_line| is set to |next_start_time| before the next iteration.
277  while (sweep_line < sweep_stop_time) {
278  std::list<const Cue*> cues_for_a_sample;
279  uint64_t next_start_time = sweep_stop_time;
280 
281  // Put all the cues that should be displayed at sweep_line, in
282  // cues_for_a_sample.
283  // next_start_time is also updated in this loop by checking all the cues.
284  for (const Cue& cue : cues_) {
285  if (cue.start_time >= sweep_stop_time)
286  break;
287  if (cue.start_time >= next_start_time)
288  break;
289 
290  const uint64_t cue_end_time = cue.start_time + cue.duration;
291  if (cue_end_time <= sweep_line)
292  continue;
293  next_start_time = GetMinimumPastSweepLine(
294  cue.start_time, cue_end_time, sweep_line, next_start_time);
295 
296  if (cue.start_time <= sweep_line) {
297  DCHECK_GT(cue_end_time, sweep_line);
298  cues_for_a_sample.push_back(&cue);
299  }
300  }
301 
302  DCHECK(!cues_for_a_sample.empty()) << "For now the only use case of this "
303  "function is to sweep non-empty "
304  "cues.";
305  if (!cues_for_a_sample.empty()) {
306  ready_samples_.push_back(CreateVTTCueBoxesSample(
307  cues_for_a_sample, sweep_line, next_start_time));
308  processed_cues = true;
309  }
310 
311  sweep_line = next_start_time;
312  }
313 
314  DCHECK_EQ(sweep_line, sweep_stop_time);
315  return processed_cues;
316 }
317 
318 } // namespace media
319 } // namespace shaka
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45
virtual std::shared_ptr< MediaSample > PopSample()