388 lines
12 KiB
C++
388 lines
12 KiB
C++
// Copyright 2015 Google Inc. All rights reserved.
|
|
//
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file or at
|
|
// https://developers.google.com/open-source/licenses/bsd
|
|
|
|
#include "packager/media/formats/webvtt/webvtt_media_parser.h"
|
|
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#include "packager/base/logging.h"
|
|
#include "packager/base/strings/string_number_conversions.h"
|
|
#include "packager/base/strings/string_split.h"
|
|
#include "packager/base/strings/string_util.h"
|
|
#include "packager/media/base/macros.h"
|
|
#include "packager/media/base/media_sample.h"
|
|
#include "packager/media/base/text_stream_info.h"
|
|
|
|
namespace shaka {
|
|
namespace media {
|
|
|
|
namespace {
|
|
|
|
// There's only one track in a WebVTT file.
|
|
const int kTrackId = 0;
|
|
|
|
const char kCR = 0x0D;
|
|
const char kLF = 0x0A;
|
|
|
|
// Reads the first line from |data| and removes the line. Returns false if there
|
|
// isn't a line break. Sets |line| with the content of the first line without
|
|
// the line break.
|
|
bool ReadLine(std::string* data, std::string* line) {
|
|
if (data->size() == 0) {
|
|
return false;
|
|
}
|
|
size_t string_position = 0;
|
|
// Length of the line break mark. 1 for LF and CR, 2 for CRLF.
|
|
int line_break_length = 1;
|
|
bool found_line_break = false;
|
|
while (string_position < data->size()) {
|
|
if (data->at(string_position) == kLF) {
|
|
found_line_break = true;
|
|
break;
|
|
}
|
|
|
|
if (data->at(string_position) == kCR) {
|
|
found_line_break = true;
|
|
if (string_position + 1 >= data->size())
|
|
break;
|
|
|
|
if (data->at(string_position + 1) == kLF)
|
|
line_break_length = 2;
|
|
break;
|
|
}
|
|
|
|
++string_position;
|
|
}
|
|
|
|
if (!found_line_break)
|
|
return false;
|
|
|
|
*line = data->substr(0, string_position);
|
|
data->erase(0, string_position + line_break_length);
|
|
return true;
|
|
}
|
|
|
|
bool TimestampToMilliseconds(const std::string& original_str,
|
|
uint64_t* time_ms) {
|
|
const size_t kMinimalHoursLength = 2;
|
|
const size_t kMinutesLength = 2;
|
|
const size_t kSecondsLength = 2;
|
|
const size_t kMillisecondsLength = 3;
|
|
|
|
// +2 for a colon and a dot for splitting minutes and seconds AND seconds and
|
|
// milliseconds, respectively.
|
|
const size_t kMinimalLength =
|
|
kMinutesLength + kSecondsLength + kMillisecondsLength + 2;
|
|
|
|
base::StringPiece str(original_str);
|
|
if (str.size() < kMinimalLength)
|
|
return false;
|
|
|
|
int hours = 0;
|
|
int minutes = 0;
|
|
int seconds = 0;
|
|
int milliseconds = 0;
|
|
|
|
size_t str_index = 0;
|
|
if (str.size() > kMinimalLength) {
|
|
// Check if hours is in the right format, if so get the number.
|
|
// -1 for excluding colon for splitting hours and minutes.
|
|
const size_t hours_length = str.size() - kMinimalLength - 1;
|
|
if (hours_length < kMinimalHoursLength)
|
|
return false;
|
|
if (!base::StringToInt(str.substr(0, hours_length), &hours))
|
|
return false;
|
|
str_index += hours_length;
|
|
|
|
if (str[str_index] != ':')
|
|
return false;
|
|
++str_index;
|
|
}
|
|
|
|
DCHECK_EQ(str.size() - str_index, kMinimalLength);
|
|
|
|
if (!base::StringToInt(str.substr(str_index, kMinutesLength), &minutes))
|
|
return false;
|
|
if (minutes < 0 || minutes > 60)
|
|
return false;
|
|
|
|
str_index += kMinutesLength;
|
|
if (str[str_index] != ':')
|
|
return false;
|
|
++str_index;
|
|
|
|
if (!base::StringToInt(str.substr(str_index, kSecondsLength), &seconds))
|
|
return false;
|
|
if (seconds < 0 || seconds > 60)
|
|
return false;
|
|
|
|
str_index += kSecondsLength;
|
|
if (str[str_index] != '.')
|
|
return false;
|
|
++str_index;
|
|
|
|
if (!base::StringToInt(str.substr(str_index, kMillisecondsLength),
|
|
&milliseconds)) {
|
|
return false;
|
|
}
|
|
str_index += kMillisecondsLength;
|
|
|
|
if (milliseconds < 0 || milliseconds > 999)
|
|
return false;
|
|
|
|
DCHECK_EQ(str.size(), str_index);
|
|
*time_ms = milliseconds +
|
|
seconds * 1000 +
|
|
minutes * 60 * 1000 +
|
|
hours * 60 * 60 * 1000;
|
|
return true;
|
|
}
|
|
|
|
// Clears |settings| and 0s |start_time| and |duration| regardless of the
|
|
// parsing result.
|
|
bool ParseTimingAndSettingsLine(const std::string& line,
|
|
uint64_t* start_time,
|
|
uint64_t* duration,
|
|
std::string* settings) {
|
|
*start_time = 0;
|
|
*duration = 0;
|
|
settings->clear();
|
|
std::vector<std::string> entries = base::SplitString(
|
|
line, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
|
|
if (entries.size() < 3) {
|
|
// The timing is time1 --> time3 so if there aren't 3 entries, this is parse
|
|
// error.
|
|
LOG(ERROR) << "Not enough tokens to be a timing " << line;
|
|
return false;
|
|
}
|
|
|
|
if (entries[1] != "-->") {
|
|
LOG(ERROR) << "Cannot find an arrow at the right place " << line;
|
|
return false;
|
|
}
|
|
|
|
const std::string& start_time_str = entries[0];
|
|
if (!TimestampToMilliseconds(start_time_str, start_time)) {
|
|
LOG(ERROR) << "Failed to parse " << start_time_str << " in " << line;
|
|
return false;
|
|
}
|
|
|
|
const std::string& end_time_str = entries[2];
|
|
uint64_t end_time = 0;
|
|
if (!TimestampToMilliseconds(end_time_str, &end_time)) {
|
|
LOG(ERROR) << "Failed to parse " << end_time_str << " in " << line;
|
|
return false;
|
|
}
|
|
*duration = end_time - *start_time;
|
|
|
|
entries.erase(entries.begin(), entries.begin() + 3);
|
|
*settings = base::JoinString(entries, " ");
|
|
return true;
|
|
}
|
|
|
|
// Mapping:
|
|
// comment --> side data (and side data only sample)
|
|
// settings --> side data
|
|
// start_time --> pts
|
|
std::shared_ptr<MediaSample> CueToMediaSample(const Cue& cue) {
|
|
const bool kKeyFrame = true;
|
|
if (!cue.comment.empty()) {
|
|
const std::string comment = base::JoinString(cue.comment, "\n");
|
|
return MediaSample::FromMetadata(
|
|
reinterpret_cast<const uint8_t*>(comment.data()), comment.size());
|
|
}
|
|
|
|
const std::string payload = base::JoinString(cue.payload, "\n");
|
|
std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
|
|
reinterpret_cast<const uint8_t*>(payload.data()), payload.size(),
|
|
reinterpret_cast<const uint8_t*>(cue.settings.data()),
|
|
cue.settings.size(), !kKeyFrame);
|
|
|
|
media_sample->set_config_id(cue.identifier);
|
|
media_sample->set_pts(cue.start_time);
|
|
media_sample->set_duration(cue.duration);
|
|
return media_sample;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Cue::Cue() : start_time(0), duration(0) {}
|
|
Cue::~Cue() {}
|
|
|
|
WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {}
|
|
WebVttMediaParser::~WebVttMediaParser() {}
|
|
|
|
void WebVttMediaParser::Init(const InitCB& init_cb,
|
|
const NewSampleCB& new_sample_cb,
|
|
KeySource* decryption_key_source) {
|
|
init_cb_ = init_cb;
|
|
new_sample_cb_ = new_sample_cb;
|
|
}
|
|
|
|
bool WebVttMediaParser::Flush() {
|
|
// If not in one of these states just be ready for more data.
|
|
if (state_ != kCuePayload && state_ != kComment)
|
|
return true;
|
|
|
|
if (!data_.empty()) {
|
|
// If it was in the middle of the payload and the stream finished, then this
|
|
// is an end of the payload. The rest of the data is part of the payload.
|
|
if (state_ == kCuePayload) {
|
|
current_cue_.payload.push_back(data_);
|
|
} else {
|
|
current_cue_.comment.push_back(data_);
|
|
}
|
|
data_.clear();
|
|
}
|
|
|
|
bool result = new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_));
|
|
current_cue_ = Cue();
|
|
state_ = kCueIdentifierOrTimingOrComment;
|
|
return result;
|
|
}
|
|
|
|
bool WebVttMediaParser::Parse(const uint8_t* buf, int size) {
|
|
if (state_ == kParseError) {
|
|
LOG(WARNING) << "The parser is in an error state, ignoring input.";
|
|
return false;
|
|
}
|
|
|
|
data_.insert(data_.end(), buf, buf + size);
|
|
|
|
std::string line;
|
|
while (ReadLine(&data_, &line)) {
|
|
// Only kCueIdentifierOrTimingOrComment and kCueTiming states accept -->.
|
|
// Error otherwise.
|
|
const bool has_arrow = line.find("-->") != std::string::npos;
|
|
if (state_ == kCueTiming) {
|
|
if (!has_arrow) {
|
|
LOG(ERROR) << "Expected --> in: " << line;
|
|
state_ = kParseError;
|
|
return false;
|
|
}
|
|
} else if (state_ != kCueIdentifierOrTimingOrComment) {
|
|
if (has_arrow) {
|
|
LOG(ERROR) << "Unexpected --> in " << line;
|
|
state_ = kParseError;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
switch (state_) {
|
|
case kHeader:
|
|
// No check. This should be WEBVTT when this object was created.
|
|
header_.push_back(line);
|
|
state_ = kMetadata;
|
|
break;
|
|
case kMetadata: {
|
|
if (line.empty()) {
|
|
std::vector<std::shared_ptr<StreamInfo>> streams;
|
|
// The resolution of timings are in milliseconds.
|
|
const int kTimescale = 1000;
|
|
|
|
// The duration passed here is not very important. Also the whole file
|
|
// must be read before determining the real duration which doesn't
|
|
// work nicely with the current demuxer.
|
|
const int kDuration = 0;
|
|
|
|
// There is no one metadata to determine what the language is. Parts
|
|
// of the text may be annotated as some specific language.
|
|
const char kLanguage[] = "";
|
|
streams.emplace_back(
|
|
new TextStreamInfo(kTrackId, kTimescale, kDuration, "wvtt",
|
|
base::JoinString(header_, "\n"),
|
|
0, // Not necessary.
|
|
0,
|
|
kLanguage)); // Not necessary.
|
|
|
|
init_cb_.Run(streams);
|
|
state_ = kCueIdentifierOrTimingOrComment;
|
|
break;
|
|
}
|
|
|
|
header_.push_back(line);
|
|
break;
|
|
}
|
|
case kCueIdentifierOrTimingOrComment: {
|
|
// Note that there can be one or more line breaks before a cue starts;
|
|
// skip this line.
|
|
// Or the file could end without a new cue.
|
|
if (line.empty())
|
|
break;
|
|
|
|
if (!has_arrow) {
|
|
if (base::StartsWith(line, "NOTE",
|
|
base::CompareCase::INSENSITIVE_ASCII)) {
|
|
state_ = kComment;
|
|
current_cue_.comment.push_back(line);
|
|
} else {
|
|
// A cue can start from a cue identifier.
|
|
// https://w3c.github.io/webvtt/#webvtt-cue-identifier
|
|
current_cue_.identifier = line;
|
|
// The next line must be a timing.
|
|
state_ = kCueTiming;
|
|
}
|
|
break;
|
|
}
|
|
|
|
// No break statement if the line has an arrow; it should be a WebVTT
|
|
// timing, so fall thru. Setting state_ to kCueTiming so that the state
|
|
// always matches the case.
|
|
state_ = kCueTiming;
|
|
FALLTHROUGH_INTENDED;
|
|
}
|
|
case kCueTiming: {
|
|
DCHECK(has_arrow);
|
|
if (!ParseTimingAndSettingsLine(line, ¤t_cue_.start_time,
|
|
¤t_cue_.duration,
|
|
¤t_cue_.settings)) {
|
|
state_ = kParseError;
|
|
return false;
|
|
}
|
|
state_ = kCuePayload;
|
|
break;
|
|
}
|
|
case kCuePayload: {
|
|
if (line.empty()) {
|
|
state_ = kCueIdentifierOrTimingOrComment;
|
|
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) {
|
|
state_ = kParseError;
|
|
return false;
|
|
}
|
|
current_cue_ = Cue();
|
|
break;
|
|
}
|
|
|
|
current_cue_.payload.push_back(line);
|
|
break;
|
|
}
|
|
case kComment: {
|
|
if (line.empty()) {
|
|
state_ = kCueIdentifierOrTimingOrComment;
|
|
if (!new_sample_cb_.Run(kTrackId, CueToMediaSample(current_cue_))) {
|
|
state_ = kParseError;
|
|
return false;
|
|
}
|
|
current_cue_ = Cue();
|
|
break;
|
|
}
|
|
|
|
current_cue_.comment.push_back(line);
|
|
break;
|
|
}
|
|
case kParseError:
|
|
NOTREACHED();
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace media
|
|
} // namespace shaka
|