Make WebVttParser a MediaParser.

This changes it from an OriginHandler to a MediaParser and moves the
handling of it to the Demuxer.  This will allow more generic handling
of text by giving it the same abstractions as video/audio handling.

Change-Id: Ibbde3c84d228ec8e83af1ed266ea97dbc9589c24
This commit is contained in:
Jacob Trimble 2020-07-07 14:29:43 -07:00
parent ba33a63693
commit e3bc85f12d
20 changed files with 413 additions and 345 deletions

View File

@ -820,6 +820,19 @@ class PackagerFunctionalTest(PackagerAppTest):
self._GetFlags(output_dash=True, output_hls=True))
self._CheckTestResults('audio-video-with-language-override-with-subtag')
def testSegmentedWebVttWithLanguageOverride(self):
streams = self._GetStreams(
['text'], language='por', dash_only=True, output_format='mp4',
test_files=['bear-english.vtt'], segmented=True)
streams += self._GetStreams(
['text'], language='por', hls_only=True,
test_files=['bear-english.vtt'], segmented=True)
flags = self._GetFlags(output_hls=True, output_dash=True)
self.assertPackageSuccess(streams, flags)
self._CheckTestResults('segmented-webvtt-with-language-override')
def testMp4TrailingMoov(self):
self.assertPackageSuccess(
self._GetStreams(['audio', 'video'],

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800
Yup, that's a bear, eh.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,6 @@
#EXTM3U
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_1.m3u8",GROUP-ID="default-text-group",LANGUAGE="pt",NAME="stream_1",AUTOSELECT=YES

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT2S" type="dynamic" publishTime="some_time" availabilityStartTime="some_time" minimumUpdatePeriod="PT5S" timeShiftBufferDepth="PT1800S">
<Period id="0" start="PT0S">
<AdaptationSet id="0" contentType="text" lang="pt" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="0" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="1000" r="4"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:1
#EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:1.000,
bear-english-text-1.vtt
#EXTINF:1.000,
bear-english-text-2.vtt
#EXTINF:1.000,
bear-english-text-3.vtt
#EXTINF:1.000,
bear-english-text-4.vtt
#EXTINF:1.000,
bear-english-text-5.vtt
#EXT-X-ENDLIST

View File

@ -20,6 +20,7 @@
#include "packager/media/formats/mp2t/mp2t_media_parser.h"
#include "packager/media/formats/mp4/mp4_media_parser.h"
#include "packager/media/formats/webm/webm_media_parser.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/media/formats/wvm/wvm_media_parser.h"
namespace {
@ -193,6 +194,9 @@ Status Demuxer::InitializeParser() {
case CONTAINER_WEBM:
parser_.reset(new WebMMediaParser());
break;
case CONTAINER_WEBVTT:
parser_.reset(new WebVttParser());
break;
case CONTAINER_UNKNOWN: {
const int64_t kDumpSizeLimit = 512;
LOG(ERROR) << "Failed to detect the container type from the buffer: "

View File

@ -6,24 +6,18 @@
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include <string>
#include <vector>
#include "packager/base/logging.h"
#include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h"
#include "packager/file/file.h"
#include "packager/file/file_closer.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h"
#include "packager/status_macros.h"
namespace shaka {
namespace media {
namespace {
const uint64_t kStreamIndex = 0;
const uint64_t kBufferSize = 64 * 1024 * 1024;
std::string BlockToString(const std::string* block, size_t size) {
std::string out = " --- BLOCK START ---\n";
@ -89,141 +83,119 @@ void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
} // namespace
WebVttParser::WebVttParser(const std::string& input_path,
const std::string& language)
: input_path_(input_path), language_(language) {}
WebVttParser::WebVttParser() {}
Status WebVttParser::InitializeInternal() {
return Status::OK;
void WebVttParser::Init(const InitCB& init_cb,
const NewMediaSampleCB& new_media_sample_cb,
const NewTextSampleCB& new_text_sample_cb,
KeySource* decryption_key_source) {
DCHECK(init_cb_.is_null());
DCHECK(!init_cb.is_null());
DCHECK(!new_text_sample_cb.is_null());
DCHECK(!decryption_key_source) << "Encrypted WebVTT not supported";
init_cb_ = init_cb;
new_text_sample_cb_ = new_text_sample_cb;
}
bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const {
// Only support one output
return stream_index == kStreamIndex;
bool WebVttParser::Flush() {
reader_.Flush();
return Parse();
}
Status WebVttParser::Run() {
BlockReader block_reader;
std::unique_ptr<File, FileCloser> file(File::Open(input_path_.c_str(), "r"));
if (!file)
return Status(error::FILE_FAILURE, "Error reading from file");
while (true) {
std::vector<uint8_t> buffer(kBufferSize);
const auto size = file->Read(buffer.data(), buffer.size());
if (size < 0)
return Status(error::FILE_FAILURE, "Error reading from file");
if (size == 0)
break;
bool WebVttParser::Parse(const uint8_t* buf, int size) {
reader_.PushData(buf, size);
return Parse();
}
block_reader.PushData(buffer.data(), size);
bool WebVttParser::Parse() {
if (!initialized_) {
std::vector<std::string> block;
if (!reader_.Next(&block)) {
return true;
}
// Check the header. It is possible for a 0xFEFF BOM to come before the
// header text.
if (block.size() != 1) {
LOG(ERROR) << "Failed to read WEBVTT header - "
<< "block size should be 1 but was " << block.size() << ".";
return false;
}
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
<< block[0];
return false;
}
initialized_ = true;
}
block_reader.Flush();
return Parse(&block_reader)
? FlushDownstream(kStreamIndex)
: Status(error::INTERNAL_ERROR,
"Failed to parse WebVTT source. See log for details.");
}
void WebVttParser::Cancel() {
keep_reading_ = false;
}
bool WebVttParser::Parse(BlockReader* block_reader) {
std::vector<std::string> block;
if (!block_reader->Next(&block)) {
LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source.";
return false;
while (reader_.Next(&block)) {
if (!ParseBlock(block))
return false;
}
return true;
}
bool WebVttParser::ParseBlock(const std::vector<std::string>& block) {
// NOTE
if (IsLikelyNote(block[0])) {
// We can safely ignore the whole block.
return true;
}
// Check the header. It is possible for a 0xFEFF BOM to come before the
// header text.
if (block.size() != 1) {
LOG(ERROR) << "Failed to read WEBVTT header - "
<< "block size should be 1 but was " << block.size() << ".";
return false;
}
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
<< block[0];
return false;
// STYLE
if (IsLikelyStyle(block[0])) {
if (saw_cue_) {
LOG(WARNING)
<< "Found style block after seeing cue. Ignoring style block";
} else {
UpdateConfig(block, &style_region_config_);
}
return true;
}
bool saw_cue = false;
while (block_reader->Next(&block) && keep_reading_) {
// NOTE
if (IsLikelyNote(block[0])) {
// We can safely ignore the whole block.
continue;
// REGION
if (IsLikelyRegion(block[0])) {
if (saw_cue_) {
LOG(WARNING)
<< "Found region block after seeing cue. Ignoring region block";
} else {
UpdateConfig(block, &style_region_config_);
}
// STYLE
if (IsLikelyStyle(block[0])) {
if (saw_cue) {
LOG(WARNING)
<< "Found style block after seeing cue. Ignoring style block";
} else {
UpdateConfig(block, &style_region_config_);
}
continue;
}
// REGION
if (IsLikelyRegion(block[0])) {
if (saw_cue) {
LOG(WARNING)
<< "Found region block after seeing cue. Ignoring region block";
} else {
UpdateConfig(block, &style_region_config_);
}
continue;
}
// CUE with ID
if (block.size() >= 2 && MaybeCueId(block[0]) &&
IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
saw_cue = true;
continue;
}
// CUE with no ID
if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
saw_cue = true;
continue;
}
LOG(ERROR) << "Failed to determine block classification:\n"
<< BlockToString(block.data(), block.size());
return false;
return true;
}
return keep_reading_;
// CUE with ID
if (block.size() >= 2 && MaybeCueId(block[0]) &&
IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
saw_cue_ = true;
return true;
}
// CUE with no ID
if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
saw_cue_ = true;
return true;
}
LOG(ERROR) << "Failed to determine block classification:\n"
<< BlockToString(block.data(), block.size());
return false;
}
bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
const Status status = ParseCue("", block.data(), block.size());
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
}
return status.ok();
return ParseCue("", block.data(), block.size());
}
bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1);
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
}
return status.ok();
return ParseCue(block[0], block.data() + 1, block.size() - 1);
}
Status WebVttParser::ParseCue(const std::string& id,
const std::string* block,
size_t block_size) {
bool WebVttParser::ParseCue(const std::string& id,
const std::string* block,
size_t block_size) {
const std::vector<std::string> time_and_style = base::SplitString(
block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
@ -236,13 +208,13 @@ Status WebVttParser::ParseCue(const std::string& id,
WebVttTimestampToMs(time_and_style[2], &end_time);
if (!parsed_time) {
return Status(
error::INTERNAL_ERROR,
"Could not parse start time, -->, and end time from " + block[0]);
LOG(ERROR) << "Could not parse start time, -->, and end time from "
<< block[0];
return false;
}
if (!stream_info_dispatched_)
RETURN_IF_ERROR(DispatchTextStreamInfo());
DispatchTextStreamInfo();
// According to the WebVTT spec end time must be greater than the start time
// of the cue. Since we are seeing content with invalid times in the field, we
@ -260,8 +232,7 @@ Status WebVttParser::ParseCue(const std::string& id,
<< start_time << ") should be less than end time (" << end_time
<< "). Skipping webvtt cue:"
<< BlockToString(block, block_size);
return Status::OK;
return true;
}
std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
@ -278,10 +249,10 @@ Status WebVttParser::ParseCue(const std::string& id,
sample->AppendPayload(block[i]);
}
return DispatchTextSample(kStreamIndex, sample);
return new_text_sample_cb_.Run(kStreamIndex, sample);
}
Status WebVttParser::DispatchTextStreamInfo() {
void WebVttParser::DispatchTextStreamInfo() {
stream_info_dispatched_ = true;
const int kTrackId = 0;
@ -294,12 +265,14 @@ Status WebVttParser::DispatchTextStreamInfo() {
const char kWebVttCodecString[] = "wvtt";
const int64_t kNoWidth = 0;
const int64_t kNoHeight = 0;
// The language of the stream will be overwritten by the Demuxer later.
const char kNoLanguage[] = "";
std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
std::vector<std::shared_ptr<StreamInfo>> streams;
streams.emplace_back(std::make_shared<TextStreamInfo>(
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
style_region_config_, kNoWidth, kNoHeight, language_);
return DispatchStreamInfo(kStreamIndex, std::move(info));
style_region_config_, kNoWidth, kNoHeight, kNoLanguage));
init_cb_.Run(streams);
}
} // namespace media
} // namespace shaka

View File

@ -7,46 +7,46 @@
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#include <stdint.h>
#include <string>
#include <vector>
#include "packager/media/base/media_parser.h"
#include "packager/media/formats/webvtt/text_readers.h"
#include "packager/media/origin/origin_handler.h"
namespace shaka {
namespace media {
// Used to parse a WebVTT source into Cues that will be sent downstream.
class WebVttParser : public OriginHandler {
class WebVttParser : public MediaParser {
public:
WebVttParser(const std::string& input_path, const std::string& language);
WebVttParser();
Status Run() override;
void Cancel() override;
void Init(const InitCB& init_cb,
const NewMediaSampleCB& new_media_sample_cb,
const NewTextSampleCB& new_text_sample_cb,
KeySource* decryption_key_source) override;
bool Flush() override;
bool Parse(const uint8_t* buf, int size) override;
private:
WebVttParser(const WebVttParser&) = delete;
WebVttParser& operator=(const WebVttParser&) = delete;
Status InitializeInternal() override;
bool ValidateOutputStreamIndex(size_t stream_index) const override;
bool Parse(BlockReader* block_reader);
bool Parse();
bool ParseBlock(const std::vector<std::string>& block);
bool ParseCueWithNoId(const std::vector<std::string>& block);
bool ParseCueWithId(const std::vector<std::string>& block);
Status ParseCue(const std::string& id,
const std::string* block,
size_t block_size);
bool ParseCue(const std::string& id,
const std::string* block,
size_t block_size);
Status DispatchTextStreamInfo();
void DispatchTextStreamInfo();
std::string input_path_;
std::string language_;
InitCB init_cb_;
NewTextSampleCB new_text_sample_cb_;
BlockReader reader_;
std::string style_region_config_;
bool saw_cue_ = false;
bool stream_info_dispatched_ = false;
bool keep_reading_ = true;
bool initialized_ = false;
};
} // namespace media

View File

@ -4,28 +4,19 @@
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "packager/file/file.h"
#include "packager/media/base/media_handler_test_base.h"
#include "packager/media/formats/webvtt/text_readers.h"
#include "packager/base/bind.h"
#include "packager/media/base/stream_info.h"
#include "packager/media/base/text_sample.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/status_test_util.h"
using ::testing::_;
using ::testing::SaveArgPointee;
namespace shaka {
namespace media {
namespace {
const char kLanguage[] = "en";
const size_t kInputCount = 0;
const size_t kOutputCount = 1;
const size_t kOutputIndex = 0;
const uint32_t kStreamId = 0;
const uint32_t kTimeScale = 1000;
const bool kEncrypted = true;
const char* kNoId = "";
const char* kNoSettings = "";
@ -33,139 +24,166 @@ const char* kNoSettings = "";
std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
} // namespace
class WebVttParserTest : public MediaHandlerTestBase {
class WebVttParserTest : public testing::Test {
protected:
void SetUpAndInitializeGraph(const char* text) {
const char* kFilename = "memory://test-file";
// Create the input file from the text passed to the test.
ASSERT_TRUE(File::WriteStringToFile(kFilename, text));
// Read from the file we just wrote.
parser_ = std::make_shared<WebVttParser>(kFilename, kLanguage);
ASSERT_OK(MediaHandlerTestBase::SetUpAndInitializeGraph(
parser_, kInputCount, kOutputCount));
void SetUpAndInitialize() {
parser_ = std::make_shared<WebVttParser>();
parser_->Init(
base::Bind(&WebVttParserTest::InitCB, base::Unretained(this)),
base::Bind(&WebVttParserTest::NewMediaSampleCB, base::Unretained(this)),
base::Bind(&WebVttParserTest::NewTextSampleCB, base::Unretained(this)),
nullptr);
}
std::shared_ptr<OriginHandler> parser_;
void InitCB(const std::vector<std::shared_ptr<StreamInfo>>& streams) {
streams_ = streams;
}
bool NewMediaSampleCB(uint32_t stream_id,
std::shared_ptr<MediaSample> sample) {
ADD_FAILURE() << "Should not get media samples";
return false;
}
bool NewTextSampleCB(uint32_t stream_id, std::shared_ptr<TextSample> sample) {
EXPECT_EQ(stream_id, kStreamId);
samples_.emplace_back(std::move(sample));
return true;
}
std::shared_ptr<WebVttParser> parser_;
std::vector<std::shared_ptr<StreamInfo>> streams_;
std::vector<std::shared_ptr<TextSample>> samples_;
};
TEST_F(WebVttParserTest, FailToParseEmptyFile) {
const char* text = "";
const uint8_t text[] = "";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0);
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_NE(Status::OK, parser_->Run());
ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, ParseOnlyHeader) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, ParseHeaderWithBOM) {
const char* text =
const uint8_t text[] =
"\xEF\xBB\xBFWEBVTT\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, FailToParseHeaderWrongWord) {
const char* text =
const uint8_t text[] =
"NOT WEBVTT\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0);
ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_NE(Status::OK, parser_->Run());
ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"WEBVTT\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0);
ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_NE(Status::OK, parser_->Run());
ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, SendsStreamInfo) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:00:00.000 --> 00:01:00.000\n"
"Testing\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_EQ(streams_.size(), 1u);
EXPECT_EQ(streams_[0]->time_scale(), kTimeScale);
EXPECT_EQ(streams_[0]->is_encrypted(), false);
EXPECT_EQ(streams_[0]->codec(), kCodecWebVtt);
EXPECT_EQ(streams_[0]->codec_string(), "wvtt");
}
TEST_F(WebVttParserTest, IgnoresZeroDurationCues) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 00:01:00.000\n"
"This subtitle would never show\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, ParseOneCue) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->settings(), kNoSettings);
EXPECT_EQ(samples_[0]->payload(), "subtitle");
}
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"STYLE\n"
@ -178,136 +196,117 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
StreamData stream_data;
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)))
.WillOnce(SaveArgPointee<0>(&stream_data));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
EXPECT_EQ(ToString(stream_data.stream_info->codec_config()),
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(ToString(streams_[0]->codec_config()),
"STYLE\n"
"::cue { color:lime }\n"
"\n"
"REGION\n"
"id:scroll\n"
"scrol:up");
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->settings(), kNoSettings);
EXPECT_EQ(samples_[0]->payload(), "subtitle");
}
TEST_F(WebVttParserTest, ParseOneEmptyCue) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 01:00:00.000\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings, "")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->payload(), "");
}
TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"-->\n"
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_NE(Status::OK, parser_->Run());
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_FALSE(parser_->Flush());
}
TEST_F(WebVttParserTest, ParseOneCueWithId) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"id\n"
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, "id", 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->payload(), "subtitle");
}
TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"id\n"
"00:01:00.000 --> 01:00:00.000\n"
"\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsTextSample(_, "id", 60000u, 3600000u, kNoSettings, "")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->payload(), "");
}
TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:01:00.000 --> 01:00:00.000 size:50%\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, "size:50%",
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->settings(), "size:50%");
}
// Verify that a typical case with mulitple cues work.
TEST_F(WebVttParserTest, ParseMultipleCues) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:00:01.000 --> 00:00:05.200\n"
@ -319,31 +318,29 @@ TEST_F(WebVttParserTest, ParseMultipleCues) {
"00:00:05.800 --> 00:00:08.000\n"
"subtitle C\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 1000u, 5200u, kNoSettings,
"subtitle A")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 2321u, 7000u, kNoSettings,
"subtitle B")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 5800u, 8000u, kNoSettings,
"subtitle C")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 3u);
EXPECT_EQ(samples_[0]->start_time(), 1000u);
EXPECT_EQ(samples_[0]->duration(), 4200u);
EXPECT_EQ(samples_[0]->payload(), "subtitle A");
EXPECT_EQ(samples_[1]->start_time(), 2321u);
EXPECT_EQ(samples_[1]->duration(), 4679u);
EXPECT_EQ(samples_[1]->payload(), "subtitle B");
EXPECT_EQ(samples_[2]->start_time(), 5800u);
EXPECT_EQ(samples_[2]->duration(), 2200u);
EXPECT_EQ(samples_[2]->payload(), "subtitle C");
}
// Verify that a typical case with mulitple cues work even when comments are
// present.
TEST_F(WebVttParserTest, ParseWithComments) {
const char* text =
const uint8_t text[] =
"WEBVTT\n"
"\n"
"NOTE This is a one line comment\n"
@ -365,25 +362,17 @@ TEST_F(WebVttParserTest, ParseWithComments) {
"00:00:05.800 --> 00:00:08.000\n"
"subtitle C\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 1000u, 5200u, kNoSettings,
"subtitle A")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 2321u, 7000u, kNoSettings,
"subtitle B")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 5800u, 8000u, kNoSettings,
"subtitle C")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_OK(parser_->Run());
ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 3u);
EXPECT_EQ(samples_[0]->payload(), "subtitle A");
EXPECT_EQ(samples_[1]->payload(), "subtitle B");
EXPECT_EQ(samples_[2]->payload(), "subtitle C");
}
} // namespace media
} // namespace shaka

View File

@ -40,8 +40,6 @@
#include "packager/media/event/muxer_listener_factory.h"
#include "packager/media/event/vod_media_info_dump_muxer_listener.h"
#include "packager/media/formats/webvtt/text_padder.h"
#include "packager/media/formats/webvtt/text_readers.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/media/formats/webvtt/webvtt_text_output_handler.h"
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
#include "packager/media/replicator/replicator.h"
@ -511,18 +509,22 @@ Status CreateHlsTextJob(const StreamDescriptor& stream,
auto output = std::make_shared<WebVttTextOutputHandler>(
muxer_options, std::move(muxer_listener));
auto parser = std::make_shared<WebVttParser>(stream.input, stream.language);
std::shared_ptr<Demuxer> demuxer;
RETURN_IF_ERROR(CreateDemuxer(stream, packaging_params, &demuxer));
if (!stream.language.empty())
demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs);
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, padder));
auto cue_aligner = sync_points
? std::make_shared<CueAlignmentHandler>(sync_points)
: nullptr;
auto chunker = CreateTextChunker(packaging_params.chunking_params);
job_manager->Add("Segmented Text Job", parser);
job_manager->Add("Segmented Text Job", demuxer);
return MediaHandler::Chain({std::move(parser), std::move(padder),
std::move(cue_aligner), std::move(chunker),
std::move(output)});
return MediaHandler::Chain({std::move(padder), std::move(cue_aligner),
std::move(chunker), std::move(output)});
}
Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
@ -531,8 +533,12 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
SyncPointQueue* sync_points,
MuxerFactory* muxer_factory,
std::shared_ptr<OriginHandler>* root) {
auto parser = std::make_shared<WebVttParser>(stream.input, stream.language);
std::shared_ptr<Demuxer> demuxer;
RETURN_IF_ERROR(CreateDemuxer(stream, packaging_params, &demuxer));
if (!stream.language.empty())
demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs);
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, padder));
auto text_to_mp4 = std::make_shared<WebVttToMp4Handler>();
auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream);
@ -547,11 +553,11 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
std::shared_ptr<MediaHandler> chunker =
CreateTextChunker(packaging_params.chunking_params);
*root = parser;
*root = demuxer;
return MediaHandler::Chain({std::move(parser), std::move(padder),
std::move(cue_aligner), std::move(chunker),
std::move(text_to_mp4), std::move(muxer)});
return MediaHandler::Chain({std::move(padder), std::move(cue_aligner),
std::move(chunker), std::move(text_to_mp4),
std::move(muxer)});
}
Status CreateTextJobs(