Shaka Packager SDK
webvtt_utils.cc
1 // Copyright 2017 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/formats/webvtt/webvtt_utils.h"
8 
9 #include <ctype.h>
10 #include <inttypes.h>
11 
12 #include <regex>
13 #include <unordered_set>
14 
15 #include "packager/base/logging.h"
16 #include "packager/base/strings/string_number_conversions.h"
17 #include "packager/base/strings/stringprintf.h"
18 
19 namespace shaka {
20 namespace media {
21 
22 namespace {
23 
24 bool GetTotalMilliseconds(uint64_t hours,
25  uint64_t minutes,
26  uint64_t seconds,
27  uint64_t ms,
28  uint64_t* out) {
29  DCHECK(out);
30  if (minutes > 59 || seconds > 59 || ms > 999) {
31  VLOG(1) << "Hours:" << hours << " Minutes:" << minutes
32  << " Seconds:" << seconds << " MS:" << ms
33  << " shoud have never made it to GetTotalMilliseconds";
34  return false;
35  }
36  *out = 60 * 60 * 1000 * hours + 60 * 1000 * minutes + 1000 * seconds + ms;
37  return true;
38 }
39 
40 enum class StyleTagKind {
41  kUnderline,
42  kBold,
43  kItalic,
44 };
45 
46 std::string GetOpenTag(StyleTagKind tag) {
47  switch (tag) {
48  case StyleTagKind::kUnderline:
49  return "<u>";
50  case StyleTagKind::kBold:
51  return "<b>";
52  case StyleTagKind::kItalic:
53  return "<i>";
54  }
55  return ""; // Not reached, but Windows doesn't like NOTREACHED.
56 }
57 
58 std::string GetCloseTag(StyleTagKind tag) {
59  switch (tag) {
60  case StyleTagKind::kUnderline:
61  return "</u>";
62  case StyleTagKind::kBold:
63  return "</b>";
64  case StyleTagKind::kItalic:
65  return "</i>";
66  }
67  return ""; // Not reached, but Windows doesn't like NOTREACHED.
68 }
69 
70 std::string WriteFragment(const TextFragment& fragment,
71  std::list<StyleTagKind>* tags) {
72  std::string ret;
73  size_t local_tag_count = 0;
74  auto has = [tags](StyleTagKind tag) {
75  return std::find(tags->begin(), tags->end(), tag) != tags->end();
76  };
77  auto push_tag = [tags, &local_tag_count, &has](StyleTagKind tag) {
78  if (has(tag)) {
79  return std::string();
80  }
81  tags->push_back(tag);
82  local_tag_count++;
83  return GetOpenTag(tag);
84  };
85 
86  if ((fragment.style.underline == false && has(StyleTagKind::kUnderline)) ||
87  (fragment.style.bold == false && has(StyleTagKind::kBold)) ||
88  (fragment.style.italic == false && has(StyleTagKind::kItalic))) {
89  LOG(WARNING) << "WebVTT output doesn't support disabling "
90  "underline/bold/italic within a cue";
91  }
92 
93  if (fragment.newline) {
94  // Newlines represent separate WebVTT cues. So close the existing tags to
95  // be nice and re-open them on the new line.
96  for (auto it = tags->rbegin(); it != tags->rend(); it++) {
97  ret += GetCloseTag(*it);
98  }
99  ret += "\n";
100  for (const auto tag : *tags) {
101  ret += GetOpenTag(tag);
102  }
103  } else {
104  if (fragment.style.underline == true) {
105  ret += push_tag(StyleTagKind::kUnderline);
106  }
107  if (fragment.style.bold == true) {
108  ret += push_tag(StyleTagKind::kBold);
109  }
110  if (fragment.style.italic == true) {
111  ret += push_tag(StyleTagKind::kItalic);
112  }
113 
114  if (!fragment.body.empty()) {
115  // Replace newlines and consecutive whitespace with a single space. If
116  // the user wanted an explicit newline, they should use the "newline"
117  // field.
118  std::regex whitespace("\\s+", std::regex_constants::ECMAScript);
119  ret += std::regex_replace(fragment.body, whitespace, std::string(" "));
120  } else {
121  for (const auto& frag : fragment.sub_fragments) {
122  ret += WriteFragment(frag, tags);
123  }
124  }
125 
126  // Pop all the local tags we pushed.
127  while (local_tag_count > 0) {
128  ret += GetCloseTag(tags->back());
129  tags->pop_back();
130  local_tag_count--;
131  }
132  }
133  return ret;
134 }
135 
136 } // namespace
137 
138 bool WebVttTimestampToMs(const base::StringPiece& source, uint64_t* out) {
139  DCHECK(out);
140 
141  if (source.length() < 9) {
142  LOG(WARNING) << "Timestamp '" << source << "' is mal-formed";
143  return false;
144  }
145 
146  const size_t minutes_begin = source.length() - 9;
147  const size_t seconds_begin = source.length() - 6;
148  const size_t milliseconds_begin = source.length() - 3;
149 
150  uint64_t hours = 0;
151  uint64_t minutes = 0;
152  uint64_t seconds = 0;
153  uint64_t ms = 0;
154 
155  const bool has_hours =
156  minutes_begin >= 3 && source[minutes_begin - 1] == ':' &&
157  base::StringToUint64(source.substr(0, minutes_begin - 1), &hours);
158 
159  if ((minutes_begin == 0 || has_hours) && source[seconds_begin - 1] == ':' &&
160  source[milliseconds_begin - 1] == '.' &&
161  base::StringToUint64(source.substr(minutes_begin, 2), &minutes) &&
162  base::StringToUint64(source.substr(seconds_begin, 2), &seconds) &&
163  base::StringToUint64(source.substr(milliseconds_begin, 3), &ms)) {
164  return GetTotalMilliseconds(hours, minutes, seconds, ms, out);
165  }
166 
167  LOG(WARNING) << "Timestamp '" << source << "' is mal-formed";
168  return false;
169 }
170 
171 std::string MsToWebVttTimestamp(uint64_t ms) {
172  uint64_t remaining = ms;
173 
174  uint64_t only_ms = remaining % 1000;
175  remaining /= 1000;
176  uint64_t only_seconds = remaining % 60;
177  remaining /= 60;
178  uint64_t only_minutes = remaining % 60;
179  remaining /= 60;
180  uint64_t only_hours = remaining;
181 
182  return base::StringPrintf("%02" PRIu64 ":%02" PRIu64 ":%02" PRIu64
183  ".%03" PRIu64,
184  only_hours, only_minutes, only_seconds, only_ms);
185 }
186 
187 std::string WebVttSettingsToString(const TextSettings& settings) {
188  std::string ret;
189  if (!settings.region.empty()) {
190  ret += " region:";
191  ret += settings.region;
192  }
193  if (settings.line) {
194  switch (settings.line->type) {
195  case TextUnitType::kPercent:
196  ret += " line:";
197  ret += base::DoubleToString(settings.line->value);
198  ret += "%";
199  break;
200  case TextUnitType::kLines:
201  ret += " line:";
202  ret += base::DoubleToString(settings.line->value);
203  break;
204  case TextUnitType::kPixels:
205  LOG(WARNING) << "WebVTT doesn't support pixel line settings";
206  break;
207  }
208  }
209  if (settings.position) {
210  if (settings.position->type == TextUnitType::kPercent) {
211  ret += " position:";
212  ret += base::DoubleToString(settings.position->value);
213  ret += "%";
214  } else {
215  LOG(WARNING) << "WebVTT only supports percent position settings";
216  }
217  }
218  if (settings.width) {
219  if (settings.width->type == TextUnitType::kPercent) {
220  ret += " size:";
221  ret += base::DoubleToString(settings.width->value);
222  ret += "%";
223  } else {
224  LOG(WARNING) << "WebVTT only supports percent width settings";
225  }
226  }
227  if (settings.height) {
228  LOG(WARNING) << "WebVTT doesn't support cue heights";
229  }
230  if (settings.writing_direction != WritingDirection::kHorizontal) {
231  ret += " direction:";
232  if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) {
233  ret += "rl";
234  } else {
235  ret += "lr";
236  }
237  }
238  switch (settings.text_alignment) {
239  case TextAlignment::kStart:
240  ret += " align:start";
241  break;
242  case TextAlignment::kEnd:
243  ret += " align:end";
244  break;
245  case TextAlignment::kLeft:
246  ret += " align:left";
247  break;
248  case TextAlignment::kRight:
249  ret += " align:right";
250  break;
251  case TextAlignment::kCenter:
252  break;
253  }
254 
255  if (!ret.empty()) {
256  DCHECK_EQ(ret[0], ' ');
257  ret.erase(0, 1);
258  }
259  return ret;
260 }
261 
262 std::string WebVttFragmentToString(const TextFragment& fragment) {
263  std::list<StyleTagKind> tags;
264  return WriteFragment(fragment, &tags);
265 }
266 
267 std::string WebVttGetPreamble(const TextStreamInfo& stream_info) {
268  std::string ret;
269  for (const auto& pair : stream_info.regions()) {
270  if (!ret.empty()) {
271  ret += "\n\n";
272  }
273 
274  if (pair.second.width.type != TextUnitType::kPercent ||
275  pair.second.height.type != TextUnitType::kLines ||
276  pair.second.window_anchor_x.type != TextUnitType::kPercent ||
277  pair.second.window_anchor_y.type != TextUnitType::kPercent ||
278  pair.second.region_anchor_x.type != TextUnitType::kPercent ||
279  pair.second.region_anchor_y.type != TextUnitType::kPercent) {
280  LOG(WARNING) << "Unsupported unit type in WebVTT region";
281  continue;
282  }
283 
284  base::StringAppendF(
285  &ret,
286  "REGION\n"
287  "id:%s\n"
288  "width:%f%%\n"
289  "lines:%d\n"
290  "viewportanchor:%f%%,%f%%\n"
291  "regionanchor:%f%%,%f%%",
292  pair.first.c_str(), pair.second.width.value,
293  static_cast<int>(pair.second.height.value),
294  pair.second.window_anchor_x.value, pair.second.window_anchor_y.value,
295  pair.second.region_anchor_x.value, pair.second.region_anchor_y.value);
296  if (pair.second.scroll) {
297  ret += "\nscroll:up";
298  }
299  }
300 
301  if (!stream_info.css_styles().empty()) {
302  if (!ret.empty()) {
303  ret += "\n\n";
304  }
305  ret += "STYLE\n" + stream_info.css_styles();
306  }
307 
308  return ret;
309 }
310 
311 } // namespace media
312 } // namespace shaka
All the methods that are virtual are virtual for mocking.