7 #include "packager/media/formats/webvtt/webvtt_parser.h" 12 #include "packager/base/logging.h" 13 #include "packager/base/strings/string_split.h" 14 #include "packager/base/strings/string_util.h" 15 #include "packager/media/base/text_stream_info.h" 16 #include "packager/media/formats/webvtt/webvtt_timestamp.h" 17 #include "packager/status_macros.h" 22 const uint64_t kStreamIndex = 0;
24 std::string BlockToString(
const std::string* block,
size_t size) {
25 std::string out =
" --- BLOCK START ---\n";
27 for (
size_t i = 0; i < size; i++) {
33 out.append(
" --- BLOCK END ---");
42 bool IsLikelyNote(
const std::string& line) {
43 return line ==
"NOTE" ||
44 base::StartsWith(line,
"NOTE ", base::CompareCase::SENSITIVE) ||
45 base::StartsWith(line,
"NOTE\t", base::CompareCase::SENSITIVE);
51 bool IsLikelyCueTiming(
const std::string& line) {
52 return line.find(
"-->") != std::string::npos;
60 bool MaybeCueId(
const std::string& line) {
61 return line.find(
"-->") == std::string::npos;
68 bool IsLikelyStyle(
const std::string& line) {
69 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"STYLE";
76 bool IsLikelyRegion(
const std::string& line) {
77 return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) ==
"REGION";
80 void UpdateConfig(
const std::vector<std::string>& block, std::string* config) {
83 *config += base::JoinString(block,
"\n");
88 WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
89 const std::string& language)
90 : reader_(
std::move(source)), language_(language) {}
92 Status WebVttParser::InitializeInternal() {
96 bool WebVttParser::ValidateOutputStreamIndex(
size_t stream_index)
const {
98 return stream_index == kStreamIndex;
101 Status WebVttParser::Run() {
103 ? FlushDownstream(kStreamIndex)
104 : Status(error::INTERNAL_ERROR,
105 "Failed to parse WebVTT source. See log for details.");
108 void WebVttParser::Cancel() {
109 keep_reading_ =
false;
112 bool WebVttParser::Parse() {
113 std::vector<std::string> block;
114 if (!reader_.Next(&block)) {
115 LOG(ERROR) <<
"Failed to read WEBVTT HEADER - No blocks in source.";
121 if (block.size() != 1) {
122 LOG(ERROR) <<
"Failed to read WEBVTT header - " 123 <<
"block size should be 1 but was " << block.size() <<
".";
126 if (block[0] !=
"WEBVTT" && block[0] !=
"\xEF\xBB\xBFWEBVTT") {
127 LOG(ERROR) <<
"Failed to read WEBVTT header - should be WEBVTT but was " 132 bool saw_cue =
false;
134 while (reader_.Next(&block) && keep_reading_) {
136 if (IsLikelyNote(block[0])) {
142 if (IsLikelyStyle(block[0])) {
145 <<
"Found style block after seeing cue. Ignoring style block";
147 UpdateConfig(block, &style_region_config_);
153 if (IsLikelyRegion(block[0])) {
156 <<
"Found region block after seeing cue. Ignoring region block";
158 UpdateConfig(block, &style_region_config_);
164 if (block.size() >= 2 && MaybeCueId(block[0]) &&
165 IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
171 if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
176 LOG(ERROR) <<
"Failed to determine block classification:\n" 177 << BlockToString(block.data(), block.size());
181 return keep_reading_;
184 bool WebVttParser::ParseCueWithNoId(
const std::vector<std::string>& block) {
185 const Status status = ParseCue(
"", block.data(), block.size());
188 LOG(ERROR) <<
"Failed to parse cue: " << status.error_message();
194 bool WebVttParser::ParseCueWithId(
const std::vector<std::string>& block) {
195 const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
198 LOG(ERROR) <<
"Failed to parse cue: " << status.error_message();
204 Status WebVttParser::ParseCue(
const std::string&
id,
205 const std::string* block,
207 const std::vector<std::string> time_and_style = base::SplitString(
208 block[0],
" ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
210 uint64_t start_time = 0;
211 uint64_t end_time = 0;
213 const bool parsed_time =
214 time_and_style.size() >= 3 && time_and_style[1] ==
"-->" &&
215 WebVttTimestampToMs(time_and_style[0], &start_time) &&
216 WebVttTimestampToMs(time_and_style[2], &end_time);
220 error::INTERNAL_ERROR,
221 "Could not parse start time, -->, and end time from " + block[0]);
224 if (!stream_info_dispatched_)
225 RETURN_IF_ERROR(DispatchTextStreamInfo());
238 if (end_time <= start_time) {
239 LOG(WARNING) <<
"WebVTT input is not spec compliant. Start time (" 240 << start_time <<
") should be less than end time (" << end_time
241 <<
"). Skipping webvtt cue:" 242 << BlockToString(block, block_size);
247 std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
249 sample->SetTime(start_time, end_time);
252 for (
size_t i = 3; i < time_and_style.size(); i++) {
253 sample->AppendStyle(time_and_style[i]);
257 for (
size_t i = 1; i < block_size; i++) {
258 sample->AppendPayload(block[i]);
261 return DispatchTextSample(kStreamIndex, sample);
264 Status WebVttParser::DispatchTextStreamInfo() {
265 stream_info_dispatched_ =
true;
267 const int kTrackId = 0;
269 const int kTimescale = 1000;
273 const int kDuration = 0;
274 const char kWebVttCodecString[] =
"wvtt";
275 const int64_t kNoWidth = 0;
276 const int64_t kNoHeight = 0;
278 std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
279 kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
280 style_region_config_, kNoWidth, kNoHeight, language_);
282 return DispatchStreamInfo(kStreamIndex, std::move(info));
All the methods that are virtual are virtual for mocking.