Make WebVttParser a MediaParser.

This changes it from an OriginHandler to a MediaParser and moves the
handling of it to the Demuxer.  This will allow more generic handling
of text by giving it the same abstractions as video/audio handling.

Change-Id: Ibbde3c84d228ec8e83af1ed266ea97dbc9589c24
This commit is contained in:
Jacob Trimble 2020-07-07 14:29:43 -07:00
parent ba33a63693
commit e3bc85f12d
20 changed files with 413 additions and 345 deletions

View File

@ -820,6 +820,19 @@ class PackagerFunctionalTest(PackagerAppTest):
self._GetFlags(output_dash=True, output_hls=True)) self._GetFlags(output_dash=True, output_hls=True))
self._CheckTestResults('audio-video-with-language-override-with-subtag') self._CheckTestResults('audio-video-with-language-override-with-subtag')
def testSegmentedWebVttWithLanguageOverride(self):
streams = self._GetStreams(
['text'], language='por', dash_only=True, output_format='mp4',
test_files=['bear-english.vtt'], segmented=True)
streams += self._GetStreams(
['text'], language='por', hls_only=True,
test_files=['bear-english.vtt'], segmented=True)
flags = self._GetFlags(output_hls=True, output_dash=True)
self.assertPackageSuccess(streams, flags)
self._CheckTestResults('segmented-webvtt-with-language-override')
def testMp4TrailingMoov(self): def testMp4TrailingMoov(self):
self.assertPackageSuccess( self.assertPackageSuccess(
self._GetStreams(['audio', 'video'], self._GetStreams(['audio', 'video'],

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800
Yup, that's a bear, eh.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,9 @@
WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff.

View File

@ -0,0 +1,6 @@
#EXTM3U
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-INDEPENDENT-SEGMENTS
#EXT-X-MEDIA:TYPE=SUBTITLES,URI="stream_1.m3u8",GROUP-ID="default-text-group",LANGUAGE="pt",NAME="stream_1",AUTOSELECT=YES

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-live:2011" minBufferTime="PT2S" type="dynamic" publishTime="some_time" availabilityStartTime="some_time" minimumUpdatePeriod="PT5S" timeShiftBufferDepth="PT1800S">
<Period id="0" start="PT0S">
<AdaptationSet id="0" contentType="text" lang="pt" segmentAlignment="true">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle"/>
<Representation id="0" bandwidth="2024" codecs="wvtt" mimeType="application/mp4">
<SegmentTemplate timescale="1000" initialization="bear-english-text-init.mp4" media="bear-english-text-$Number$.m4s" startNumber="1">
<SegmentTimeline>
<S t="0" d="1000" r="4"/>
</SegmentTimeline>
</SegmentTemplate>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -0,0 +1,16 @@
#EXTM3U
#EXT-X-VERSION:6
## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
#EXT-X-TARGETDURATION:1
#EXT-X-PLAYLIST-TYPE:VOD
#EXTINF:1.000,
bear-english-text-1.vtt
#EXTINF:1.000,
bear-english-text-2.vtt
#EXTINF:1.000,
bear-english-text-3.vtt
#EXTINF:1.000,
bear-english-text-4.vtt
#EXTINF:1.000,
bear-english-text-5.vtt
#EXT-X-ENDLIST

View File

@ -20,6 +20,7 @@
#include "packager/media/formats/mp2t/mp2t_media_parser.h" #include "packager/media/formats/mp2t/mp2t_media_parser.h"
#include "packager/media/formats/mp4/mp4_media_parser.h" #include "packager/media/formats/mp4/mp4_media_parser.h"
#include "packager/media/formats/webm/webm_media_parser.h" #include "packager/media/formats/webm/webm_media_parser.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/media/formats/wvm/wvm_media_parser.h" #include "packager/media/formats/wvm/wvm_media_parser.h"
namespace { namespace {
@ -193,6 +194,9 @@ Status Demuxer::InitializeParser() {
case CONTAINER_WEBM: case CONTAINER_WEBM:
parser_.reset(new WebMMediaParser()); parser_.reset(new WebMMediaParser());
break; break;
case CONTAINER_WEBVTT:
parser_.reset(new WebVttParser());
break;
case CONTAINER_UNKNOWN: { case CONTAINER_UNKNOWN: {
const int64_t kDumpSizeLimit = 512; const int64_t kDumpSizeLimit = 512;
LOG(ERROR) << "Failed to detect the container type from the buffer: " LOG(ERROR) << "Failed to detect the container type from the buffer: "

View File

@ -6,24 +6,18 @@
#include "packager/media/formats/webvtt/webvtt_parser.h" #include "packager/media/formats/webvtt/webvtt_parser.h"
#include <string>
#include <vector>
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/base/strings/string_split.h" #include "packager/base/strings/string_split.h"
#include "packager/base/strings/string_util.h" #include "packager/base/strings/string_util.h"
#include "packager/file/file.h" #include "packager/media/base/text_sample.h"
#include "packager/file/file_closer.h"
#include "packager/media/base/text_stream_info.h" #include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h" #include "packager/media/formats/webvtt/webvtt_timestamp.h"
#include "packager/status_macros.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace { namespace {
const uint64_t kStreamIndex = 0; const uint64_t kStreamIndex = 0;
const uint64_t kBufferSize = 64 * 1024 * 1024;
std::string BlockToString(const std::string* block, size_t size) { std::string BlockToString(const std::string* block, size_t size) {
std::string out = " --- BLOCK START ---\n"; std::string out = " --- BLOCK START ---\n";
@ -89,141 +83,119 @@ void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
} // namespace } // namespace
WebVttParser::WebVttParser(const std::string& input_path, WebVttParser::WebVttParser() {}
const std::string& language)
: input_path_(input_path), language_(language) {}
Status WebVttParser::InitializeInternal() { void WebVttParser::Init(const InitCB& init_cb,
return Status::OK; const NewMediaSampleCB& new_media_sample_cb,
const NewTextSampleCB& new_text_sample_cb,
KeySource* decryption_key_source) {
DCHECK(init_cb_.is_null());
DCHECK(!init_cb.is_null());
DCHECK(!new_text_sample_cb.is_null());
DCHECK(!decryption_key_source) << "Encrypted WebVTT not supported";
init_cb_ = init_cb;
new_text_sample_cb_ = new_text_sample_cb;
} }
bool WebVttParser::ValidateOutputStreamIndex(size_t stream_index) const { bool WebVttParser::Flush() {
// Only support one output reader_.Flush();
return stream_index == kStreamIndex; return Parse();
} }
Status WebVttParser::Run() { bool WebVttParser::Parse(const uint8_t* buf, int size) {
BlockReader block_reader; reader_.PushData(buf, size);
std::unique_ptr<File, FileCloser> file(File::Open(input_path_.c_str(), "r")); return Parse();
if (!file) }
return Status(error::FILE_FAILURE, "Error reading from file");
while (true) {
std::vector<uint8_t> buffer(kBufferSize);
const auto size = file->Read(buffer.data(), buffer.size());
if (size < 0)
return Status(error::FILE_FAILURE, "Error reading from file");
if (size == 0)
break;
block_reader.PushData(buffer.data(), size); bool WebVttParser::Parse() {
if (!initialized_) {
std::vector<std::string> block;
if (!reader_.Next(&block)) {
return true;
}
// Check the header. It is possible for a 0xFEFF BOM to come before the
// header text.
if (block.size() != 1) {
LOG(ERROR) << "Failed to read WEBVTT header - "
<< "block size should be 1 but was " << block.size() << ".";
return false;
}
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") {
LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was "
<< block[0];
return false;
}
initialized_ = true;
} }
block_reader.Flush();
return Parse(&block_reader)
? FlushDownstream(kStreamIndex)
: Status(error::INTERNAL_ERROR,
"Failed to parse WebVTT source. See log for details.");
}
void WebVttParser::Cancel() {
keep_reading_ = false;
}
bool WebVttParser::Parse(BlockReader* block_reader) {
std::vector<std::string> block; std::vector<std::string> block;
if (!block_reader->Next(&block)) { while (reader_.Next(&block)) {
LOG(ERROR) << "Failed to read WEBVTT HEADER - No blocks in source."; if (!ParseBlock(block))
return false; return false;
}
return true;
}
bool WebVttParser::ParseBlock(const std::vector<std::string>& block) {
// NOTE
if (IsLikelyNote(block[0])) {
// We can safely ignore the whole block.
return true;
} }
// Check the header. It is possible for a 0xFEFF BOM to come before the // STYLE
// header text. if (IsLikelyStyle(block[0])) {
if (block.size() != 1) { if (saw_cue_) {
LOG(ERROR) << "Failed to read WEBVTT header - " LOG(WARNING)
<< "block size should be 1 but was " << block.size() << "."; << "Found style block after seeing cue. Ignoring style block";
return false; } else {
} UpdateConfig(block, &style_region_config_);
if (block[0] != "WEBVTT" && block[0] != "\xEF\xBB\xBFWEBVTT") { }
LOG(ERROR) << "Failed to read WEBVTT header - should be WEBVTT but was " return true;
<< block[0];
return false;
} }
bool saw_cue = false; // REGION
if (IsLikelyRegion(block[0])) {
while (block_reader->Next(&block) && keep_reading_) { if (saw_cue_) {
// NOTE LOG(WARNING)
if (IsLikelyNote(block[0])) { << "Found region block after seeing cue. Ignoring region block";
// We can safely ignore the whole block. } else {
continue; UpdateConfig(block, &style_region_config_);
} }
return true;
// STYLE
if (IsLikelyStyle(block[0])) {
if (saw_cue) {
LOG(WARNING)
<< "Found style block after seeing cue. Ignoring style block";
} else {
UpdateConfig(block, &style_region_config_);
}
continue;
}
// REGION
if (IsLikelyRegion(block[0])) {
if (saw_cue) {
LOG(WARNING)
<< "Found region block after seeing cue. Ignoring region block";
} else {
UpdateConfig(block, &style_region_config_);
}
continue;
}
// CUE with ID
if (block.size() >= 2 && MaybeCueId(block[0]) &&
IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
saw_cue = true;
continue;
}
// CUE with no ID
if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
saw_cue = true;
continue;
}
LOG(ERROR) << "Failed to determine block classification:\n"
<< BlockToString(block.data(), block.size());
return false;
} }
return keep_reading_; // CUE with ID
if (block.size() >= 2 && MaybeCueId(block[0]) &&
IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
saw_cue_ = true;
return true;
}
// CUE with no ID
if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
saw_cue_ = true;
return true;
}
LOG(ERROR) << "Failed to determine block classification:\n"
<< BlockToString(block.data(), block.size());
return false;
} }
bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) { bool WebVttParser::ParseCueWithNoId(const std::vector<std::string>& block) {
const Status status = ParseCue("", block.data(), block.size()); return ParseCue("", block.data(), block.size());
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
}
return status.ok();
} }
bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) { bool WebVttParser::ParseCueWithId(const std::vector<std::string>& block) {
const Status status = ParseCue(block[0], block.data() + 1, block.size() - 1); return ParseCue(block[0], block.data() + 1, block.size() - 1);
if (!status.ok()) {
LOG(ERROR) << "Failed to parse cue: " << status.error_message();
}
return status.ok();
} }
Status WebVttParser::ParseCue(const std::string& id, bool WebVttParser::ParseCue(const std::string& id,
const std::string* block, const std::string* block,
size_t block_size) { size_t block_size) {
const std::vector<std::string> time_and_style = base::SplitString( const std::vector<std::string> time_and_style = base::SplitString(
block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY); block[0], " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
@ -236,13 +208,13 @@ Status WebVttParser::ParseCue(const std::string& id,
WebVttTimestampToMs(time_and_style[2], &end_time); WebVttTimestampToMs(time_and_style[2], &end_time);
if (!parsed_time) { if (!parsed_time) {
return Status( LOG(ERROR) << "Could not parse start time, -->, and end time from "
error::INTERNAL_ERROR, << block[0];
"Could not parse start time, -->, and end time from " + block[0]); return false;
} }
if (!stream_info_dispatched_) if (!stream_info_dispatched_)
RETURN_IF_ERROR(DispatchTextStreamInfo()); DispatchTextStreamInfo();
// According to the WebVTT spec end time must be greater than the start time // According to the WebVTT spec end time must be greater than the start time
// of the cue. Since we are seeing content with invalid times in the field, we // of the cue. Since we are seeing content with invalid times in the field, we
@ -260,8 +232,7 @@ Status WebVttParser::ParseCue(const std::string& id,
<< start_time << ") should be less than end time (" << end_time << start_time << ") should be less than end time (" << end_time
<< "). Skipping webvtt cue:" << "). Skipping webvtt cue:"
<< BlockToString(block, block_size); << BlockToString(block, block_size);
return true;
return Status::OK;
} }
std::shared_ptr<TextSample> sample = std::make_shared<TextSample>(); std::shared_ptr<TextSample> sample = std::make_shared<TextSample>();
@ -278,10 +249,10 @@ Status WebVttParser::ParseCue(const std::string& id,
sample->AppendPayload(block[i]); sample->AppendPayload(block[i]);
} }
return DispatchTextSample(kStreamIndex, sample); return new_text_sample_cb_.Run(kStreamIndex, sample);
} }
Status WebVttParser::DispatchTextStreamInfo() { void WebVttParser::DispatchTextStreamInfo() {
stream_info_dispatched_ = true; stream_info_dispatched_ = true;
const int kTrackId = 0; const int kTrackId = 0;
@ -294,12 +265,14 @@ Status WebVttParser::DispatchTextStreamInfo() {
const char kWebVttCodecString[] = "wvtt"; const char kWebVttCodecString[] = "wvtt";
const int64_t kNoWidth = 0; const int64_t kNoWidth = 0;
const int64_t kNoHeight = 0; const int64_t kNoHeight = 0;
// The language of the stream will be overwritten by the Demuxer later.
const char kNoLanguage[] = "";
std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>( std::vector<std::shared_ptr<StreamInfo>> streams;
streams.emplace_back(std::make_shared<TextStreamInfo>(
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString, kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
style_region_config_, kNoWidth, kNoHeight, language_); style_region_config_, kNoWidth, kNoHeight, kNoLanguage));
init_cb_.Run(streams);
return DispatchStreamInfo(kStreamIndex, std::move(info));
} }
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -7,46 +7,46 @@
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_ #ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_ #define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_PARSER_H_
#include <stdint.h> #include <string>
#include <vector> #include <vector>
#include "packager/media/base/media_parser.h"
#include "packager/media/formats/webvtt/text_readers.h" #include "packager/media/formats/webvtt/text_readers.h"
#include "packager/media/origin/origin_handler.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
// Used to parse a WebVTT source into Cues that will be sent downstream. // Used to parse a WebVTT source into Cues that will be sent downstream.
class WebVttParser : public OriginHandler { class WebVttParser : public MediaParser {
public: public:
WebVttParser(const std::string& input_path, const std::string& language); WebVttParser();
Status Run() override; void Init(const InitCB& init_cb,
void Cancel() override; const NewMediaSampleCB& new_media_sample_cb,
const NewTextSampleCB& new_text_sample_cb,
KeySource* decryption_key_source) override;
bool Flush() override;
bool Parse(const uint8_t* buf, int size) override;
private: private:
WebVttParser(const WebVttParser&) = delete; bool Parse();
WebVttParser& operator=(const WebVttParser&) = delete; bool ParseBlock(const std::vector<std::string>& block);
Status InitializeInternal() override;
bool ValidateOutputStreamIndex(size_t stream_index) const override;
bool Parse(BlockReader* block_reader);
bool ParseCueWithNoId(const std::vector<std::string>& block); bool ParseCueWithNoId(const std::vector<std::string>& block);
bool ParseCueWithId(const std::vector<std::string>& block); bool ParseCueWithId(const std::vector<std::string>& block);
Status ParseCue(const std::string& id, bool ParseCue(const std::string& id,
const std::string* block, const std::string* block,
size_t block_size); size_t block_size);
Status DispatchTextStreamInfo(); void DispatchTextStreamInfo();
std::string input_path_; InitCB init_cb_;
std::string language_; NewTextSampleCB new_text_sample_cb_;
BlockReader reader_;
std::string style_region_config_; std::string style_region_config_;
bool saw_cue_ = false;
bool stream_info_dispatched_ = false; bool stream_info_dispatched_ = false;
bool keep_reading_ = true; bool initialized_ = false;
}; };
} // namespace media } // namespace media

View File

@ -4,28 +4,19 @@
// license that can be found in the LICENSE file or at // license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd // https://developers.google.com/open-source/licenses/bsd
#include <gmock/gmock.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "packager/file/file.h" #include "packager/base/bind.h"
#include "packager/media/base/media_handler_test_base.h" #include "packager/media/base/stream_info.h"
#include "packager/media/formats/webvtt/text_readers.h" #include "packager/media/base/text_sample.h"
#include "packager/media/formats/webvtt/webvtt_parser.h" #include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/status_test_util.h"
using ::testing::_;
using ::testing::SaveArgPointee;
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace { namespace {
const char kLanguage[] = "en";
const size_t kInputCount = 0;
const size_t kOutputCount = 1;
const size_t kOutputIndex = 0;
const uint32_t kStreamId = 0;
const uint32_t kTimeScale = 1000; const uint32_t kTimeScale = 1000;
const bool kEncrypted = true;
const char* kNoId = ""; const char* kNoId = "";
const char* kNoSettings = ""; const char* kNoSettings = "";
@ -33,139 +24,166 @@ const char* kNoSettings = "";
std::string ToString(const std::vector<uint8_t>& v) { std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end()); return std::string(v.begin(), v.end());
} }
} // namespace } // namespace
class WebVttParserTest : public MediaHandlerTestBase { class WebVttParserTest : public testing::Test {
protected: protected:
void SetUpAndInitializeGraph(const char* text) { void SetUpAndInitialize() {
const char* kFilename = "memory://test-file"; parser_ = std::make_shared<WebVttParser>();
parser_->Init(
// Create the input file from the text passed to the test. base::Bind(&WebVttParserTest::InitCB, base::Unretained(this)),
ASSERT_TRUE(File::WriteStringToFile(kFilename, text)); base::Bind(&WebVttParserTest::NewMediaSampleCB, base::Unretained(this)),
base::Bind(&WebVttParserTest::NewTextSampleCB, base::Unretained(this)),
// Read from the file we just wrote. nullptr);
parser_ = std::make_shared<WebVttParser>(kFilename, kLanguage);
ASSERT_OK(MediaHandlerTestBase::SetUpAndInitializeGraph(
parser_, kInputCount, kOutputCount));
} }
std::shared_ptr<OriginHandler> parser_; void InitCB(const std::vector<std::shared_ptr<StreamInfo>>& streams) {
streams_ = streams;
}
bool NewMediaSampleCB(uint32_t stream_id,
std::shared_ptr<MediaSample> sample) {
ADD_FAILURE() << "Should not get media samples";
return false;
}
bool NewTextSampleCB(uint32_t stream_id, std::shared_ptr<TextSample> sample) {
EXPECT_EQ(stream_id, kStreamId);
samples_.emplace_back(std::move(sample));
return true;
}
std::shared_ptr<WebVttParser> parser_;
std::vector<std::shared_ptr<StreamInfo>> streams_;
std::vector<std::shared_ptr<TextSample>> samples_;
}; };
TEST_F(WebVttParserTest, FailToParseEmptyFile) { TEST_F(WebVttParserTest, FailToParseEmptyFile) {
const char* text = ""; const uint8_t text[] = "";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0); ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0); ASSERT_TRUE(parser_->Flush());
ASSERT_NE(Status::OK, parser_->Run()); ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
} }
TEST_F(WebVttParserTest, ParseOnlyHeader) { TEST_F(WebVttParserTest, ParseOnlyHeader) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
} }
TEST_F(WebVttParserTest, ParseHeaderWithBOM) { TEST_F(WebVttParserTest, ParseHeaderWithBOM) {
const char* text = const uint8_t text[] =
"\xEF\xBB\xBFWEBVTT\n" "\xEF\xBB\xBFWEBVTT\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
} }
TEST_F(WebVttParserTest, FailToParseHeaderWrongWord) { TEST_F(WebVttParserTest, FailToParseHeaderWrongWord) {
const char* text = const uint8_t text[] =
"NOT WEBVTT\n" "NOT WEBVTT\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0); ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0);
ASSERT_NE(Status::OK, parser_->Run()); ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
} }
TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) { TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"WEBVTT\n" "WEBVTT\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
EXPECT_CALL(*Output(kOutputIndex), OnProcess(testing::_)).Times(0); ASSERT_FALSE(parser_->Parse(text, sizeof(text) - 1));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(testing::_)).Times(0);
ASSERT_NE(Status::OK, parser_->Run()); ASSERT_TRUE(streams_.empty());
ASSERT_TRUE(samples_.empty());
}
TEST_F(WebVttParserTest, SendsStreamInfo) {
const uint8_t text[] =
"WEBVTT\n"
"\n"
"00:00:00.000 --> 00:01:00.000\n"
"Testing\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_TRUE(parser_->Flush());
ASSERT_EQ(streams_.size(), 1u);
EXPECT_EQ(streams_[0]->time_scale(), kTimeScale);
EXPECT_EQ(streams_[0]->is_encrypted(), false);
EXPECT_EQ(streams_[0]->codec(), kCodecWebVtt);
EXPECT_EQ(streams_[0]->codec_string(), "wvtt");
} }
TEST_F(WebVttParserTest, IgnoresZeroDurationCues) { TEST_F(WebVttParserTest, IgnoresZeroDurationCues) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 00:01:00.000\n" "00:01:00.000 --> 00:01:00.000\n"
"This subtitle would never show\n"; "This subtitle would never show\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_TRUE(samples_.empty());
} }
TEST_F(WebVttParserTest, ParseOneCue) { TEST_F(WebVttParserTest, ParseOneCue) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"subtitle\n"; "subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->settings(), kNoSettings);
EXPECT_EQ(samples_[0]->payload(), "subtitle");
} }
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) { TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"STYLE\n" "STYLE\n"
@ -178,136 +196,117 @@ TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"subtitle\n"; "subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
StreamData stream_data; ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
{ ASSERT_TRUE(parser_->Flush());
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)))
.WillOnce(SaveArgPointee<0>(&stream_data));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
EXPECT_EQ(ToString(stream_data.stream_info->codec_config()), ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(ToString(streams_[0]->codec_config()),
"STYLE\n" "STYLE\n"
"::cue { color:lime }\n" "::cue { color:lime }\n"
"\n" "\n"
"REGION\n" "REGION\n"
"id:scroll\n" "id:scroll\n"
"scrol:up"); "scrol:up");
EXPECT_EQ(samples_[0]->id(), kNoId);
EXPECT_EQ(samples_[0]->start_time(), 60000u);
EXPECT_EQ(samples_[0]->duration(), 3540000u);
EXPECT_EQ(samples_[0]->settings(), kNoSettings);
EXPECT_EQ(samples_[0]->payload(), "subtitle");
} }
TEST_F(WebVttParserTest, ParseOneEmptyCue) { TEST_F(WebVttParserTest, ParseOneEmptyCue) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings, "")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->payload(), "");
} }
TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) { TEST_F(WebVttParserTest, FailToParseCueWithArrowInId) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"-->\n" "-->\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"subtitle\n"; "subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
ASSERT_NE(Status::OK, parser_->Run()); ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
ASSERT_FALSE(parser_->Flush());
} }
TEST_F(WebVttParserTest, ParseOneCueWithId) { TEST_F(WebVttParserTest, ParseOneCueWithId) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"id\n" "id\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"subtitle\n"; "subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, "id", 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->payload(), "subtitle");
} }
TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) { TEST_F(WebVttParserTest, ParseOneEmptyCueWithId) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"id\n" "id\n"
"00:01:00.000 --> 01:00:00.000\n" "00:01:00.000 --> 01:00:00.000\n"
"\n"; "\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsTextSample(_, "id", 60000u, 3600000u, kNoSettings, "")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->id(), "id");
EXPECT_EQ(samples_[0]->payload(), "");
} }
TEST_F(WebVttParserTest, ParseOneCueWithSettings) { TEST_F(WebVttParserTest, ParseOneCueWithSettings) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:01:00.000 --> 01:00:00.000 size:50%\n" "00:01:00.000 --> 01:00:00.000 size:50%\n"
"subtitle\n"; "subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, "size:50%",
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 1u);
EXPECT_EQ(samples_[0]->settings(), "size:50%");
} }
// Verify that a typical case with mulitple cues work. // Verify that a typical case with mulitple cues work.
TEST_F(WebVttParserTest, ParseMultipleCues) { TEST_F(WebVttParserTest, ParseMultipleCues) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"00:00:01.000 --> 00:00:05.200\n" "00:00:01.000 --> 00:00:05.200\n"
@ -319,31 +318,29 @@ TEST_F(WebVttParserTest, ParseMultipleCues) {
"00:00:05.800 --> 00:00:08.000\n" "00:00:05.800 --> 00:00:08.000\n"
"subtitle C\n"; "subtitle C\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 1000u, 5200u, kNoSettings,
"subtitle A")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 2321u, 7000u, kNoSettings,
"subtitle B")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 5800u, 8000u, kNoSettings,
"subtitle C")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 3u);
EXPECT_EQ(samples_[0]->start_time(), 1000u);
EXPECT_EQ(samples_[0]->duration(), 4200u);
EXPECT_EQ(samples_[0]->payload(), "subtitle A");
EXPECT_EQ(samples_[1]->start_time(), 2321u);
EXPECT_EQ(samples_[1]->duration(), 4679u);
EXPECT_EQ(samples_[1]->payload(), "subtitle B");
EXPECT_EQ(samples_[2]->start_time(), 5800u);
EXPECT_EQ(samples_[2]->duration(), 2200u);
EXPECT_EQ(samples_[2]->payload(), "subtitle C");
} }
// Verify that a typical case with mulitple cues work even when comments are // Verify that a typical case with mulitple cues work even when comments are
// present. // present.
TEST_F(WebVttParserTest, ParseWithComments) { TEST_F(WebVttParserTest, ParseWithComments) {
const char* text = const uint8_t text[] =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
"NOTE This is a one line comment\n" "NOTE This is a one line comment\n"
@ -365,25 +362,17 @@ TEST_F(WebVttParserTest, ParseWithComments) {
"00:00:05.800 --> 00:00:08.000\n" "00:00:05.800 --> 00:00:08.000\n"
"subtitle C\n"; "subtitle C\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text)); ASSERT_NO_FATAL_FAILURE(SetUpAndInitialize());
{ ASSERT_TRUE(parser_->Parse(text, sizeof(text) - 1));
testing::InSequence s; ASSERT_TRUE(parser_->Flush());
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 1000u, 5200u, kNoSettings,
"subtitle A")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 2321u, 7000u, kNoSettings,
"subtitle B")));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 5800u, 8000u, kNoSettings,
"subtitle C")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run()); ASSERT_EQ(streams_.size(), 1u);
ASSERT_EQ(samples_.size(), 3u);
EXPECT_EQ(samples_[0]->payload(), "subtitle A");
EXPECT_EQ(samples_[1]->payload(), "subtitle B");
EXPECT_EQ(samples_[2]->payload(), "subtitle C");
} }
} // namespace media } // namespace media
} // namespace shaka } // namespace shaka

View File

@ -40,8 +40,6 @@
#include "packager/media/event/muxer_listener_factory.h" #include "packager/media/event/muxer_listener_factory.h"
#include "packager/media/event/vod_media_info_dump_muxer_listener.h" #include "packager/media/event/vod_media_info_dump_muxer_listener.h"
#include "packager/media/formats/webvtt/text_padder.h" #include "packager/media/formats/webvtt/text_padder.h"
#include "packager/media/formats/webvtt/text_readers.h"
#include "packager/media/formats/webvtt/webvtt_parser.h"
#include "packager/media/formats/webvtt/webvtt_text_output_handler.h" #include "packager/media/formats/webvtt/webvtt_text_output_handler.h"
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" #include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
#include "packager/media/replicator/replicator.h" #include "packager/media/replicator/replicator.h"
@ -511,18 +509,22 @@ Status CreateHlsTextJob(const StreamDescriptor& stream,
auto output = std::make_shared<WebVttTextOutputHandler>( auto output = std::make_shared<WebVttTextOutputHandler>(
muxer_options, std::move(muxer_listener)); muxer_options, std::move(muxer_listener));
auto parser = std::make_shared<WebVttParser>(stream.input, stream.language); std::shared_ptr<Demuxer> demuxer;
RETURN_IF_ERROR(CreateDemuxer(stream, packaging_params, &demuxer));
if (!stream.language.empty())
demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs); auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs);
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, padder));
auto cue_aligner = sync_points auto cue_aligner = sync_points
? std::make_shared<CueAlignmentHandler>(sync_points) ? std::make_shared<CueAlignmentHandler>(sync_points)
: nullptr; : nullptr;
auto chunker = CreateTextChunker(packaging_params.chunking_params); auto chunker = CreateTextChunker(packaging_params.chunking_params);
job_manager->Add("Segmented Text Job", parser); job_manager->Add("Segmented Text Job", demuxer);
return MediaHandler::Chain({std::move(parser), std::move(padder), return MediaHandler::Chain({std::move(padder), std::move(cue_aligner),
std::move(cue_aligner), std::move(chunker), std::move(chunker), std::move(output)});
std::move(output)});
} }
Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream, Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
@ -531,8 +533,12 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
SyncPointQueue* sync_points, SyncPointQueue* sync_points,
MuxerFactory* muxer_factory, MuxerFactory* muxer_factory,
std::shared_ptr<OriginHandler>* root) { std::shared_ptr<OriginHandler>* root) {
auto parser = std::make_shared<WebVttParser>(stream.input, stream.language); std::shared_ptr<Demuxer> demuxer;
RETURN_IF_ERROR(CreateDemuxer(stream, packaging_params, &demuxer));
if (!stream.language.empty())
demuxer->SetLanguageOverride(stream.stream_selector, stream.language);
auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs); auto padder = std::make_shared<TextPadder>(kDefaultTextZeroBiasMs);
RETURN_IF_ERROR(demuxer->SetHandler(stream.stream_selector, padder));
auto text_to_mp4 = std::make_shared<WebVttToMp4Handler>(); auto text_to_mp4 = std::make_shared<WebVttToMp4Handler>();
auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream); auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream);
@ -547,11 +553,11 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
std::shared_ptr<MediaHandler> chunker = std::shared_ptr<MediaHandler> chunker =
CreateTextChunker(packaging_params.chunking_params); CreateTextChunker(packaging_params.chunking_params);
*root = parser; *root = demuxer;
return MediaHandler::Chain({std::move(parser), std::move(padder), return MediaHandler::Chain({std::move(padder), std::move(cue_aligner),
std::move(cue_aligner), std::move(chunker), std::move(chunker), std::move(text_to_mp4),
std::move(text_to_mp4), std::move(muxer)}); std::move(muxer)});
} }
Status CreateTextJobs( Status CreateTextJobs(