shaka-packager/packager/media/formats/webvtt/webvtt_media_parser.cc

313 lines
9.2 KiB
C++
Raw Permalink Normal View History

// Copyright 2015 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/formats/webvtt/webvtt_media_parser.h"
#include <string>
#include <vector>
#include "packager/base/logging.h"
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h"
#include "packager/media/base/macros.h"
#include "packager/media/base/media_sample.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h"
namespace shaka {
namespace media {
namespace {
const bool kFlush = true;
// There's only one track in a WebVTT file.
const int kTrackId = 0;
const char kCR = 0x0D;
const char kLF = 0x0A;
// Reads the first line from |data| and removes the line. Returns false if there
// isn't a line break. Sets |line| with the content of the first line without
// the line break.
bool ReadLine(std::string* data, std::string* line) {
if (data->size() == 0) {
return false;
}
size_t string_position = 0;
// Length of the line break mark. 1 for LF and CR, 2 for CRLF.
int line_break_length = 1;
bool found_line_break = false;
while (string_position < data->size()) {
if (data->at(string_position) == kLF) {
found_line_break = true;
break;
}
if (data->at(string_position) == kCR) {
found_line_break = true;
if (string_position + 1 >= data->size())
break;
if (data->at(string_position + 1) == kLF)
line_break_length = 2;
break;
}
++string_position;
}
if (!found_line_break)
return false;
*line = data->substr(0, string_position);
data->erase(0, string_position + line_break_length);
return true;
}
// Clears |settings| and 0s |start_time| and |duration| regardless of the
// parsing result.
bool ParseTimingAndSettingsLine(const std::string& line,
uint64_t* start_time,
uint64_t* duration,
std::string* settings) {
*start_time = 0;
*duration = 0;
settings->clear();
std::vector<std::string> entries = base::SplitString(
line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
if (entries.size() < 3) {
// The timing is time1 --> time3 so if there aren't 3 entries, this is parse
// error.
LOG(ERROR) << "Not enough tokens to be a timing " << line;
return false;
}
if (entries[1] != "-->") {
LOG(ERROR) << "Cannot find an arrow at the right place " << line;
return false;
}
const std::string& start_time_str = entries[0];
if (!WebVttTimestampToMs(start_time_str, start_time)) {
LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line;
return false;
}
const std::string& end_time_str = entries[2];
uint64_t end_time = 0;
if (!WebVttTimestampToMs(end_time_str, &end_time)) {
LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line;
return false;
}
*duration = end_time - *start_time;
entries.erase(entries.begin(), entries.begin() + 3);
*settings = base::JoinString(entries, " ");
return true;
}
} // namespace
WebVttMediaParser::WebVttMediaParser()
: state_(kHeader), sample_converter_(new WebVttSampleConverter()) {}
WebVttMediaParser::~WebVttMediaParser() {}
void WebVttMediaParser::Init(const InitCB& init_cb,
const NewSampleCB& new_sample_cb,
KeySource* decryption_key_source) {
init_cb_ = init_cb;
new_sample_cb_ = new_sample_cb;
}
bool WebVttMediaParser::Flush() {
// If not in one of these states just be ready for more data.
if (state_ != kCuePayload && state_ != kComment)
return true;
if (!data_.empty()) {
// If it was in the middle of the payload and the stream finished, then this
// is an end of the payload. The rest of the data is part of the payload.
if (state_ == kCuePayload) {
current_cue_.payload += data_ + "\n";
} else {
current_cue_.comment += data_ + "\n";
}
data_.clear();
}
if (!ProcessCurrentCue(kFlush)) {
state_ = kParseError;
return false;
}
state_ = kCueIdentifierOrTimingOrComment;
return true;
}
bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
if (state_ == kParseError) {
LOG(WARNING) << "The parser is in an error state, ignoring input.";
return false;
}
data_.insert(data_.end(), buf, buf + size);
std::string line;
while (ReadLine(&data_, &line)) {
// Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->.
// Error otherwise.
const bool has_arrow = line.find("-->") != std::string::npos;
if (state_ == kCueTiming) {
if (!has_arrow) {
LOG(ERROR) << "Expected --> in: " << line;
state_ = kParseError;
return false;
}
} else if (state_ != kCueIdentifierOrTimingOrComment) {
if (has_arrow) {
LOG(ERROR) << "Unexpected --> in " << line;
state_ = kParseError;
return false;
}
}
switch (state_) {
case kHeader:
// No check. This should be WEBVTT when this object was created.
header_.push_back(line);
state_ = kMetadata;
break;
case kMetadata: {
if (line.empty()) {
std::vector<std::shared_ptr<StreamInfo>> streams;
// The resolution of timings are in milliseconds.
const int kTimescale = 1000;
// The duration passed here is not very important. Also the whole file
// must be read before determining the real duration which doesn't
// work nicely with the current demuxer.
const int kDuration = 0;
// There is no one metadata to determine what the language is. Parts
// of the text may be annotated as some specific language.
const char kLanguage[] = "";
const char kWebVttCodecString[] = "wvtt";
streams.emplace_back(
new TextStreamInfo(kTrackId, kTimescale, kDuration,
kCodecWebVtt, kWebVttCodecString,
base::JoinString(header_, "\n"),
0, // Not necessary.
0,
kLanguage)); // Not necessary.
init_cb_.Run(streams);
state_ = kCueIdentifierOrTimingOrComment;
break;
}
header_.push_back(line);
break;
}
case kCueIdentifierOrTimingOrComment: {
// Note that there can be one or more line breaks before a cue starts;
// skip this line.
// Or the file could end without a new cue.
if (line.empty())
break;
if (!has_arrow) {
if (base::StartsWith(line, "NOTE",
base::CompareCase::INSENSITIVE_ASCII)) {
state_ = kComment;
current_cue_.comment += line + "\n";
} else {
// A cue can start from a cue identifier.
// https://w3c.github.io/webvtt/#webvtt-cue-identifier
current_cue_.identifier = line;
// The next line must be a timing.
state_ = kCueTiming;
}
break;
}
// No break statement if the line has an arrow; it should be a WebVTT
// timing, so fall thru. Setting state_ to kCueTiming so that the state
// always matches the case.
state_ = kCueTiming;
FALLTHROUGH_INTENDED;
}
case kCueTiming: {
DCHECK(has_arrow);
if (!ParseTimingAndSettingsLine(line, &current_cue_.start_time,
&current_cue_.duration,
&current_cue_.settings)) {
state_ = kParseError;
return false;
}
state_ = kCuePayload;
break;
}
case kCuePayload: {
if (line.empty()) {
state_ = kCueIdentifierOrTimingOrComment;
if (!ProcessCurrentCue(!kFlush)) {
state_ = kParseError;
return false;
}
break;
}
current_cue_.payload += line + "\n";
break;
}
case kComment: {
if (line.empty()) {
state_ = kCueIdentifierOrTimingOrComment;
if (!ProcessCurrentCue(!kFlush)) {
state_ = kParseError;
return false;
}
break;
}
current_cue_.comment += line + "\n";
break;
}
case kParseError:
NOTREACHED();
return false;
}
}
return true;
}
void WebVttMediaParser::InjectWebVttSampleConvertForTesting(
std::unique_ptr<WebVttSampleConverter> converter) {
sample_converter_ = std::move(converter);
}
bool WebVttMediaParser::ProcessCurrentCue(bool flush) {
sample_converter_->PushCue(current_cue_);
current_cue_ = Cue();
if (flush)
sample_converter_->Flush();
while (sample_converter_->ReadySamplesSize() > 0) {
if (!new_sample_cb_.Run(kTrackId, sample_converter_->PopSample())) {
LOG(ERROR) << "New sample callback failed.";
return false;
}
}
return true;
}
} // namespace media
} // namespace shaka