DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
webvtt_sample_converter.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_sample_converter.h"
8 
9 #include <algorithm>
10 #include <string>
11 
12 #include "packager/base/strings/string_util.h"
13 #include "packager/base/strings/stringprintf.h"
14 #include "packager/media/base/buffer_writer.h"
15 #include "packager/media/base/media_sample.h"
16 #include "packager/media/formats/mp4/box_buffer.h"
17 #include "packager/media/formats/mp4/box_definitions.h"
18 
19 namespace shaka {
20 namespace media {
21 
22 namespace {
23 
24 std::shared_ptr<MediaSample> CreateEmptyCueSample(uint64_t start_time,
25  uint64_t end_time) {
26  DCHECK_GT(end_time, start_time);
27  mp4::VTTEmptyCueBox empty_cue_box;
28 
29  std::vector<uint8_t> serialized;
30  AppendBoxToVector(&empty_cue_box, &serialized);
31 
32  std::shared_ptr<MediaSample> empty_cue_sample = MediaSample::CopyFrom(
33  serialized.data(), serialized.size(), false);
34  empty_cue_sample->set_pts(start_time);
35  empty_cue_sample->set_duration(end_time - start_time);
36  return empty_cue_sample;
37 }
38 
39 mp4::VTTCueBox CueBoxFromCue(const Cue& cue) {
40  mp4::VTTCueBox cue_box;
41  if (!cue.identifier.empty()) {
42  cue_box.cue_id.cue_id = cue.identifier;
43  }
44 
45  if (!cue.settings.empty()) {
46  cue_box.cue_settings.settings = cue.settings;
47  }
48 
49  cue_box.cue_payload.cue_text = cue.payload.front();
50  return cue_box;
51 }
52 
53 std::string TimeToWebVttTimeStamp(uint64_t time_in_ms) {
54  const int milliseconds = time_in_ms % 1000;
55  const uint64_t seconds_left = time_in_ms / 1000;
56  const int seconds = seconds_left % 60;
57  const uint64_t minutes_left = seconds_left / 60;
58  const int minutes = minutes_left % 60;
59  const int hours = minutes_left / 60;
60 
61  return base::StringPrintf("%02d:%02d:%02d.%03d", hours, minutes, seconds,
62  milliseconds);
63 }
64 
65 std::shared_ptr<MediaSample> CreateVTTCueBoxesSample(
66  const std::list<const Cue*>& cues,
67  uint64_t start_time,
68  uint64_t end_time) {
69  // TODO(rkuroiwa): Source IDs must be assigned to the cues and the same cue
70  // should have the same ID in different samples. Probably requires a mapping
71  // from cues to IDs.
72  CHECK(!cues.empty());
73 
74  std::vector<uint8_t> data;
75  std::string cue_current_time = TimeToWebVttTimeStamp(start_time);
76 
77  BufferWriter writer;
78  for (const Cue* cue : cues) {
79  mp4::VTTCueBox cue_box = CueBoxFromCue(*cue);
80  // If there is internal timing, i.e. WebVTT cue timestamp, then
81  // cue_current_time should be populated
82  // "which gives the VTT timestamp associated with the start time of sample."
83  // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
84  // internal timestamp in the payload to set CueTimeBox.cue_current_time.
85  cue_box.Write(&writer);
86  }
87 
88  std::shared_ptr<MediaSample> sample =
89  MediaSample::CopyFrom(writer.Buffer(), writer.Size(), false);
90  sample->set_pts(start_time);
91  sample->set_duration(end_time - start_time);
92  return sample;
93 }
94 
95 // This function returns the minimum of cue_start_time, cue_end_time,
96 // current_minimum should be bigger than sweep_line.
97 uint64_t GetMinimumPastSweepLine(uint64_t cue_start_time,
98  uint64_t cue_end_time,
99  uint64_t sweep_line,
100  uint64_t current_minimum) {
101  DCHECK_GE(current_minimum, sweep_line);
102  if (cue_end_time <= sweep_line)
103  return current_minimum;
104 
105  // Anything below is cue_end_time > sweep_line.
106  if (cue_start_time > sweep_line) {
107  // The start time of this cue is past the sweepline, return the min.
108  return std::min(cue_start_time, current_minimum);
109  } else {
110  // The sweep line is at the start or in the middle of a cue.
111  return std::min(cue_end_time, current_minimum);
112  }
113 }
114 
115 } // namespace
116 
117 void AppendBoxToVector(mp4::Box* box, std::vector<uint8_t>* output_vector) {
118  BufferWriter writer;
119  box->Write(&writer);
120  output_vector->insert(output_vector->end(),
121  writer.Buffer(),
122  writer.Buffer() + writer.Size());
123 }
124 
125 WebVttSampleConverter::WebVttSampleConverter() : next_cue_start_time_(0u) {}
126 WebVttSampleConverter::~WebVttSampleConverter() {}
127 
128 // Note that this |sample| is either a cue or a comment. It does not have any
129 // info on whether the next cue is overlapping or not.
130 void WebVttSampleConverter::PushSample(std::shared_ptr<MediaSample> sample) {
131  if (sample->data_size() == 0u) {
132  // A comment. Put it in the buffer and skip.
134  comment.cue_additional_text.assign(
135  sample->side_data(), sample->side_data() + sample->side_data_size());
136  additional_texts_.push_back(comment);
137  // TODO(rkuriowa): Handle comments as samples.
138 
139  return;
140  }
141 
142  cues_.push_back(MediaSampleToCue(*sample));
143  if (cues_.size() == 1) {
144  // Cannot make a decision with just one sample. Cache it and wait for
145  // another one.
146  next_cue_start_time_ = cues_.front().start_time;
147  return;
148  }
149 
150  CHECK_GE(cues_.size(), 2u);
151  // TODO(rkuroiwa): This isn't wrong but all the cues where
152  // endtime < latest cue start time
153  // can be processed. Change the logic so that if there are cues that meet the
154  // condition above, create samples immediately and remove them.
155  // Note: This doesn't mean that all the cues can be removed, just the ones
156  // that meet the condition.
157  bool processed_cues = HandleAllCuesButLatest();
158  if (!processed_cues)
159  return;
160 
161  // Remove all the cues except the latest one.
162  auto erase_last_iterator = --cues_.end();
163  cues_.erase(cues_.begin(), erase_last_iterator);
164 }
165 
167  if (cues_.empty())
168  return;
169  if (cues_.size() == 1) {
170  std::list<const Cue*> temp_list;
171  temp_list.push_back(&cues_.front());
172  CHECK_EQ(next_cue_start_time_, cues_.front().start_time);
173  ready_samples_.push_back(CreateVTTCueBoxesSample(
174  temp_list,
175  next_cue_start_time_,
176  cues_.front().start_time + cues_.front().duration));
177  cues_.clear();
178  return;
179  }
180 
181  bool processed_cue = HandleAllCues();
182  CHECK(processed_cue)
183  << "No cues were processed but the cues should have been flushed.";
184  cues_.clear();
185 }
186 
188  return ready_samples_.size();
189 }
190 
191 std::shared_ptr<MediaSample> WebVttSampleConverter::PopSample() {
192  CHECK(!ready_samples_.empty());
193  std::shared_ptr<MediaSample> ret = ready_samples_.front();
194  ready_samples_.pop_front();
195  return ret;
196 }
197 
198 // TODO(rkuroiwa): Some samples may be ready. Example:
199 // Cues:
200 // |--------- 1 ---------|
201 // |-- 2 --|
202 // |-- 3 --|
203 //
204 // Samples:
205 // |A| B | C |
206 // Samples A, B, and C can be created when Cue 3 is pushed.
207 // Change algorithm to create A,B,C samples right away.
208 // Note that this requires change to the caller on which cues
209 // to remove.
210 bool WebVttSampleConverter::HandleAllCuesButLatest() {
211  DCHECK_GE(cues_.size(), 2u);
212  const Cue& latest_cue = cues_.back();
213 
214  // Don't process the cues until the latest cue doesn't overlap with all the
215  // previous cues.
216  uint64_t max_cue_end_time = 0; // Not including the latest.
217  auto latest_cue_it = --cues_.end();
218  for (auto cue_it = cues_.begin(); cue_it != latest_cue_it; ++cue_it) {
219  const Cue& cue = *cue_it;
220  const uint64_t cue_end_time = cue.start_time + cue.duration;
221  if (cue_end_time > latest_cue.start_time)
222  return false;
223 
224  if (max_cue_end_time < cue_end_time)
225  max_cue_end_time = cue_end_time;
226  }
227  // Reaching here means that the latest cue does not overlap with all
228  // the previous cues.
229 
230  // Because sweep_stop_time is assigned to next_cue_start_time_ it is not
231  // set to latest_cue.start_time here; there may be a gap between
232  // latest_cue.start_time and previous_cue_end_time.
233  // The correctness of SweepCues() doesn't change whether the sweep stops
234  // right before the latest cue or right before the gap.
235  const uint64_t sweep_stop_time = max_cue_end_time;
236  const uint64_t sweep_line_start = cues_.front().start_time;
237  bool processed_cues =
238  SweepCues(sweep_line_start, sweep_stop_time);
239  next_cue_start_time_ = sweep_stop_time;
240  if (next_cue_start_time_ < latest_cue.start_time) {
241  ready_samples_.push_back(CreateEmptyCueSample(next_cue_start_time_,
242  latest_cue.start_time));
243  next_cue_start_time_ = latest_cue.start_time;
244  }
245  return processed_cues;
246 }
247 
248 bool WebVttSampleConverter::HandleAllCues() {
249  uint64_t latest_time = 0u;
250  for (const Cue& cue : cues_) {
251  if (cue.start_time + cue.duration > latest_time)
252  latest_time = cue.start_time + cue.duration;
253  }
254  const uint64_t sweep_line_start = cues_.front().start_time;
255  const uint64_t sweep_stop_time = latest_time;
256  bool processed = SweepCues(sweep_line_start, sweep_stop_time);
257  next_cue_start_time_ = sweep_stop_time;
258  return processed;
259 }
260 
261 bool WebVttSampleConverter::SweepCues(uint64_t sweep_line,
262  uint64_t sweep_stop_time) {
263  bool processed_cues = false;
264  // This is a sweep line algorithm. For every iteration, it determines active
265  // cues and makes a sample.
266  // At the end of an interation |next_start_time| should be set to the minimum
267  // of all the start and end times of the cues that is after |sweep_line|.
268  // |sweep_line| is set to |next_start_time| before the next iteration.
269  while (sweep_line < sweep_stop_time) {
270  std::list<const Cue*> cues_for_a_sample;
271  uint64_t next_start_time = sweep_stop_time;
272 
273  // Put all the cues that should be displayed at sweep_line, in
274  // cues_for_a_sample.
275  // next_start_time is also updated in this loop by checking all the cues.
276  for (const Cue& cue : cues_) {
277  if (cue.start_time >= sweep_stop_time)
278  break;
279  if (cue.start_time >= next_start_time)
280  break;
281 
282  const uint64_t cue_end_time = cue.start_time + cue.duration;
283  if (cue_end_time <= sweep_line)
284  continue;
285  next_start_time = GetMinimumPastSweepLine(
286  cue.start_time, cue_end_time, sweep_line, next_start_time);
287 
288  if (cue.start_time <= sweep_line) {
289  DCHECK_GT(cue_end_time, sweep_line);
290  cues_for_a_sample.push_back(&cue);
291  }
292  }
293 
294  DCHECK(!cues_for_a_sample.empty()) << "For now the only use case of this "
295  "function is to sweep non-empty "
296  "cues.";
297  if (!cues_for_a_sample.empty()) {
298  ready_samples_.push_back(CreateVTTCueBoxesSample(
299  cues_for_a_sample, sweep_line, next_start_time));
300  processed_cues = true;
301  }
302 
303  sweep_line = next_start_time;
304  }
305 
306  DCHECK_EQ(sweep_line, sweep_stop_time);
307  return processed_cues;
308 }
309 
310 } // namespace media
311 } // namespace shaka
void PushSample(std::shared_ptr< MediaSample > sample)
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45
std::shared_ptr< MediaSample > PopSample()