WebVtt Ignore Zero Duration Cues

We have an assert that ensures that the end time is greater than
the start time for any cue. However we never checked that cues
had a non-zero duration when parsing them.

We will throw away cues with a duration of zero (and print a
warning message) as they are not spec compliant.

Closes: #335
Change-Id: I404e8f3a5a8d43eff75a2554db3e38e8d340f421
This commit is contained in:
Aaron Vaage 2018-03-01 13:46:08 -08:00 committed by KongQun Yang
parent 9476e826d5
commit 650f59f8a0
3 changed files with 87 additions and 34 deletions

View File

@ -19,6 +19,20 @@ namespace shaka {
namespace media { namespace media {
namespace { namespace {
std::string BlockToString(const std::string* block, size_t size) {
std::string out = " --- BLOCK START ---\n";
for (size_t i = 0; i < size; i++) {
out.append(" ");
out.append(block[i]);
out.append("\n");
}
out.append(" --- BLOCK END ---");
return out;
}
// Comments are just blocks that are preceded by a blank line, start with the // Comments are just blocks that are preceded by a blank line, start with the
// word "NOTE" (followed by a space or newline), and end at the first blank // word "NOTE" (followed by a space or newline), and end at the first blank
// line. // line.
@ -117,12 +131,8 @@ bool WebVttParser::Parse() {
continue; continue;
} }
LOG(ERROR) << "Failed to determine block classification:"; LOG(ERROR) << "Failed to determine block classification:\n"
LOG(ERROR) << " --- BLOCK START ---"; << BlockToString(block.data(), block.size());
for (const std::string& line : block) {
LOG(ERROR) << " " << line;
}
LOG(ERROR) << " --- BLOCK END ---";
return false; return false;
} }
@ -130,34 +140,65 @@ bool WebVttParser::Parse() {
} }
bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) { bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
return ParseCue("", block.data(), block.size()); const Status status = ParseCue("", block.data(), block.size());
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
}
return status.ok();
} }
bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) { bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
return ParseCue(block[0], block.data() + 1, block.size() - 1); const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
} }
bool WebVttParser::ParseCue(const std::string& id, return status.ok();
}
Status WebVttParser::ParseCue(const std::string& id,
const std::string* block, const std::string* block,
size_t block_size) { size_t block_size) {
std::shared_ptr<TextSample> sample(new TextSample);
sample->set_id(id);
const std::vector<std::string> time_and_style = base::SplitString( const std::vector<std::string> time_and_style = base::SplitString(
block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
uint64_t start_time; uint64_t start_time = 0;
uint64_t end_time; uint64_t end_time = 0;
if (time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
const bool parsed_time =
time_and_style.size() >= 3 && time_and_style[1] == "-->" &&
WebVttTimestampToMs(time_and_style[0], &start_time) && WebVttTimestampToMs(time_and_style[0], &start_time) &&
WebVttTimestampToMs(time_and_style[2], &end_time)) { WebVttTimestampToMs(time_and_style[2], &end_time);
sample->SetTime(start_time, end_time);
} else { if (!parsed_time) {
LOG(ERROR) << "Could not parse start time, -->, and end time from " return Status(
<< block[0]; error::INTERNAL_ERROR,
return false; "Could not parse start time, -->, and end time from " + block[0]);
} }
// According to the WebVTT spec
// (https://www.w3.org/TR/webvtt1/#webvtt-cue-timings) end time must be
// greater than the start time of the cue. Since we are seeing content with
// zero-duration cues in the field, we are going to drop the cue instead of
// failing to package.
//
// Print a warning so that those packaging content can know that their
// content is not spec compliant.
if (start_time == end_time) {
LOG(WARNING) << "WebVTT input is not spec compliant."
" Skipping zero-duration cue\n"
<< BlockToString(block, block_size);
return Status::OK;
}
std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
sample->set_id(id);
sample->SetTime(start_time, end_time);
// The rest of time_and_style are the style tokens. // The rest of time_and_style are the style tokens.
for (size_t i = 3; i < time_and_style.size(); i++) { for (size_t i = 3; i < time_and_style.size(); i++) {
sample->AppendStyle(time_and_style[i]); sample->AppendStyle(time_and_style[i]);
@ -168,14 +209,7 @@ bool WebVttParser::ParseCue(const std::string& id,
sample->AppendPayload(block[i]); sample->AppendPayload(block[i]);
} }
const Status send_result = DispatchTextSample(0, sample); return DispatchTextSample(0, sample);
if (send_result != Status::OK) {
LOG(ERROR) << "Failed to send text sample down stream:"
<< send_result.error_message();
}
return send_result == Status::OK;
} }
Status WebVttParser::DispatchTextStreamInfo() { Status WebVttParser::DispatchTextStreamInfo() {

View File

@ -35,7 +35,7 @@ class WebVttParser : public OriginHandler {
bool Parse(); bool Parse();
bool ParseCueWithNoId(const std::vector<std::string>& block); bool ParseCueWithNoId(const std::vector<std::string>& block);
bool ParseCueWithId(const std::vector<std::string>& block); bool ParseCueWithId(const std::vector<std::string>& block);
bool ParseCue(const std::string& id, Status ParseCue(const std::string& id,
const std::string* block, const std::string* block,
size_t block_size); size_t block_size);

View File

@ -147,6 +147,25 @@ TEST_F(WebVttParserTest, DISABLED_ParseStyleBlocks) {
ASSERT_OK(parser_->Run()); ASSERT_OK(parser_->Run());
} }
TEST_F(WebVttParserTest, IngoresZeroDurationCues) {
const char* text =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 00:01:00.000\n"
"This subtitle would never show\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(kStreamIndex, kTimeScale, !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
ASSERT_OK(parser_->Run());
}
TEST_F(WebVttParserTest, ParseOneCue) { TEST_F(WebVttParserTest, ParseOneCue) {
const char* text = const char* text =
"WEBVTT\n" "WEBVTT\n"