Support STYLE and REGION in WebVTT

Note that STYLE and REGION are not supported in mp4 container due to
spec limitation as 14496-30:2014 does not specify a way to signal
styles/regions inside mp4.

Closes #344.

Change-Id: I05c14df916f7b2c7ca4364ee9407e0eda4dc7a3f
This commit is contained in:
KongQun Yang 2018-08-17 13:27:59 -07:00
parent 715ed939f1
commit f49b89280c
22 changed files with 147 additions and 85 deletions

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800 00:00:00.000 --> 00:00:00.800
Yup, that's a bear, eh. Yup, that's a bear, eh.

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,6 +1,9 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000 X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800 00:00:00.000 --> 00:00:00.800
Yup, that's a bear, eh. Yup, that's a bear, eh.

View File

@ -1,6 +1,9 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000 X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,6 +1,9 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000 X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,6 +1,9 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000 X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -1,6 +1,9 @@
WEBVTT WEBVTT
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000 X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
STYLE
::cue { color:lime }
00:00:01.000 --> 00:00:04.700 00:00:01.000 --> 00:00:04.700
He 's... um... doing bear-like stuff. He 's... um... doing bear-like stuff.

View File

@ -538,8 +538,19 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
// Handle WebVTT. // Handle WebVTT.
TextSampleEntry webvtt; TextSampleEntry webvtt;
webvtt.format = FOURCC_wvtt; webvtt.format = FOURCC_wvtt;
webvtt.config.config.assign(text_info->codec_config().begin(),
text_info->codec_config().end()); // 14496-30:2014 7.5 Web Video Text Tracks Sample entry format.
// In the sample entry, a WebVTT configuration box must occur, carrying
// exactly the lines of the WebVTT file header, i.e. all text lines up to
// but excluding the 'two or more line terminators' that end the header.
webvtt.config.config = "WEBVTT";
// The spec does not define a way to carry STYLE and REGION information in
// the mp4 container.
if (!text_info->codec_config().empty()) {
LOG(INFO) << "Skipping possible style / region configuration as the spec "
"does not define a way to carry them inside ISO-BMFF files.";
}
// TODO(rkuroiwa): This should be the source file URI(s). Putting bogus // TODO(rkuroiwa): This should be the source file URI(s). Putting bogus
// string for now so that the box will be there for samples with overlapping // string for now so that the box will be there for samples with overlapping
// cues. // cues.

View File

@ -18,9 +18,11 @@ const int kTsTimescale = 90000;
} }
WebVttFileBuffer::WebVttFileBuffer( WebVttFileBuffer::WebVttFileBuffer(
uint32_t transport_stream_timestamp_offset_ms) uint32_t transport_stream_timestamp_offset_ms,
const std::string& style_region_config)
: transport_stream_timestamp_offset_(transport_stream_timestamp_offset_ms * : transport_stream_timestamp_offset_(transport_stream_timestamp_offset_ms *
kTsTimescale / 1000) { kTsTimescale / 1000),
style_region_config_(style_region_config) {
// Make sure we start with the same state that we would end up with if // Make sure we start with the same state that we would end up with if
// the caller reset our state. // the caller reset our state.
Reset(); Reset();
@ -38,6 +40,10 @@ void WebVttFileBuffer::Reset() {
transport_stream_timestamp_offset_); transport_stream_timestamp_offset_);
} }
buffer_.append("\n"); // end of header. buffer_.append("\n"); // end of header.
if (!style_region_config_.empty()) {
buffer_.append(style_region_config_);
buffer_.append("\n\n");
}
} }
void WebVttFileBuffer::Append(const TextSample& sample) { void WebVttFileBuffer::Append(const TextSample& sample) {

View File

@ -20,7 +20,8 @@ class TextSample;
// all the formatting requirements for a webvtt file. // all the formatting requirements for a webvtt file.
class WebVttFileBuffer { class WebVttFileBuffer {
public: public:
explicit WebVttFileBuffer(uint32_t transport_stream_timestamp_offset_ms); WebVttFileBuffer(uint32_t transport_stream_timestamp_offset_ms,
const std::string& style_region_config);
virtual ~WebVttFileBuffer() = default; virtual ~WebVttFileBuffer() = default;
void Reset(); void Reset();
@ -36,6 +37,7 @@ class WebVttFileBuffer {
WebVttFileBuffer& operator=(const WebVttFileBuffer&) = delete; WebVttFileBuffer& operator=(const WebVttFileBuffer&) = delete;
const uint32_t transport_stream_timestamp_offset_ = 0; const uint32_t transport_stream_timestamp_offset_ = 0;
const std::string style_region_config_;
std::string buffer_; std::string buffer_;
size_t sample_count_ = 0; size_t sample_count_ = 0;
}; };

View File

@ -14,6 +14,7 @@
#include "packager/base/strings/string_util.h" #include "packager/base/strings/string_util.h"
#include "packager/media/base/text_stream_info.h" #include "packager/media/base/text_stream_info.h"
#include "packager/media/formats/webvtt/webvtt_timestamp.h" #include "packager/media/formats/webvtt/webvtt_timestamp.h"
#include "packager/status_macros.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -75,6 +76,13 @@ bool IsLikelyStyle(const std::string& line) {
bool IsLikelyRegion(const std::string& line) { bool IsLikelyRegion(const std::string& line) {
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION"; return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
} }
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
if (!config->empty())
*config += "\n\n";
*config += base::JoinString(block, "\n");
}
} // namespace } // namespace
WebVttParser::WebVttParser(std::unique_ptr<FileReader> source, WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
@ -121,14 +129,6 @@ bool WebVttParser::Parse() {
return false; return false;
} }
const Status send_stream_info_result = DispatchTextStreamInfo();
if (send_stream_info_result != Status::OK) {
LOG(ERROR) << "Failed to send stream info down stream:"
<< send_stream_info_result.error_message();
return false;
}
bool saw_cue = false; bool saw_cue = false;
while (reader_.Next(&block) && keep_reading_) { while (reader_.Next(&block) && keep_reading_) {
@ -141,11 +141,10 @@ bool WebVttParser::Parse() {
// STYLE // STYLE
if (IsLikelyStyle(block[0])) { if (IsLikelyStyle(block[0])) {
if (saw_cue) { if (saw_cue) {
LOG(ERROR) LOG(WARNING)
<< "Found style block after seeing cue. Ignoring style block"; << "Found style block after seeing cue. Ignoring style block";
} else { } else {
LOG(WARNING) << "Missing support for style blocks. Skipping block:\n" UpdateConfig(block, &style_region_config_);
<< BlockToString(block.data(), block.size());
} }
continue; continue;
} }
@ -153,11 +152,10 @@ bool WebVttParser::Parse() {
// REGION // REGION
if (IsLikelyRegion(block[0])) { if (IsLikelyRegion(block[0])) {
if (saw_cue) { if (saw_cue) {
LOG(ERROR) LOG(WARNING)
<< "Found region block after seeing cue. Ignoring region block"; << "Found region block after seeing cue. Ignoring region block";
} else { } else {
LOG(WARNING) << "Missing support for region blocks. Skipping block:\n" UpdateConfig(block, &style_region_config_);
<< BlockToString(block.data(), block.size());
} }
continue; continue;
} }
@ -223,6 +221,9 @@ Status WebVttParser::ParseCue(const std::string& id,
"Could not parse start time, -->, and end time from " + block[0]); "Could not parse start time, -->, and end time from " + block[0]);
} }
if (!stream_info_dispatched_)
RETURN_IF_ERROR(DispatchTextStreamInfo());
// According to the WebVTT spec end time must be greater than the start time // According to the WebVTT spec end time must be greater than the start time
// of the cue. Since we are seeing content with invalid times in the field, we // of the cue. Since we are seeing content with invalid times in the field, we
// are going to drop the cue instead of failing to package. // are going to drop the cue instead of failing to package.
@ -261,6 +262,8 @@ Status WebVttParser::ParseCue(const std::string& id,
} }
Status WebVttParser::DispatchTextStreamInfo() { Status WebVttParser::DispatchTextStreamInfo() {
stream_info_dispatched_ = true;
const int kTrackId = 0; const int kTrackId = 0;
// The resolution of timings are in milliseconds. // The resolution of timings are in milliseconds.
const int kTimescale = 1000; const int kTimescale = 1000;
@ -269,13 +272,12 @@ Status WebVttParser::DispatchTextStreamInfo() {
// work nicely with the current demuxer. // work nicely with the current demuxer.
const int kDuration = 0; const int kDuration = 0;
const char kWebVttCodecString[] = "wvtt"; const char kWebVttCodecString[] = "wvtt";
const char kCodecConfig[] = "";
const int64_t kNoWidth = 0; const int64_t kNoWidth = 0;
const int64_t kNoHeight = 0; const int64_t kNoHeight = 0;
std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>( std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString, kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
kCodecConfig, kNoWidth, kNoHeight, language_); style_region_config_, kNoWidth, kNoHeight, language_);
return DispatchStreamInfo(kStreamIndex, std::move(info)); return DispatchStreamInfo(kStreamIndex, std::move(info));
} }

View File

@ -43,6 +43,8 @@ class WebVttParser : public OriginHandler {
BlockReader reader_; BlockReader reader_;
std::string language_; std::string language_;
std::string style_region_config_;
bool stream_info_dispatched_ = false;
bool keep_reading_ = true; bool keep_reading_ = true;
}; };

View File

@ -14,6 +14,7 @@
#include "packager/status_test_util.h" #include "packager/status_test_util.h"
using ::testing::_; using ::testing::_;
using ::testing::SaveArgPointee;
namespace shaka { namespace shaka {
namespace media { namespace media {
@ -28,6 +29,10 @@ const bool kEncrypted = true;
const char* kNoId = ""; const char* kNoId = "";
const char* kNoSettings = ""; const char* kNoSettings = "";
std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
} // namespace } // namespace
class WebVttParserTest : public MediaHandlerTestBase { class WebVttParserTest : public MediaHandlerTestBase {
@ -71,8 +76,7 @@ TEST_F(WebVttParserTest, ParseOnlyHeader) {
{ {
testing::InSequence s; testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex), EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, kLanguage)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_)); EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
} }
@ -88,8 +92,7 @@ TEST_F(WebVttParserTest, ParseHeaderWithBOM) {
{ {
testing::InSequence s; testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex), EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_)); EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
} }
@ -123,57 +126,7 @@ TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) {
ASSERT_NE(Status::OK, parser_->Run()); ASSERT_NE(Status::OK, parser_->Run());
} }
// Right now we don't support region blocks, but for now make sure that we don't TEST_F(WebVttParserTest, IgnoresZeroDurationCues) {
// die if we see a region block.
TEST_F(WebVttParserTest, ParserDoesNotDieOnRegionBlock) {
const char* text =
"WEBVTT\n"
"\n"
"REGION\n"
"id:fred\n"
"width:40%\n"
"lines:3\n"
"regionanchor:0%,100%\n"
"viewportanchor:10%,90%\n"
"scroll:up";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run());
}
// Right now we don't support style blocks, but for now make sure that we don't
// die if we see a style block.
TEST_F(WebVttParserTest, ParserDoesNotDieOnStyleBlock) {
const char* text =
"WEBVTT\n"
"\n"
"STYLE\n"
"::cue {\n"
" background-image: linear-gradient(to bottom, dimgray, lightgray);\n"
" color: papayawhip;\n"
"}";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run());
}
TEST_F(WebVttParserTest, IngoresZeroDurationCues) {
const char* text = const char* text =
"WEBVTT\n" "WEBVTT\n"
"\n" "\n"
@ -214,6 +167,44 @@ TEST_F(WebVttParserTest, ParseOneCue) {
ASSERT_OK(parser_->Run()); ASSERT_OK(parser_->Run());
} }
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
const char* text =
"WEBVTT\n"
"\n"
"STYLE\n"
"::cue { color:lime }\n"
"\n"
"REGION\n"
"id:scroll\n"
"scrol:up\n"
"\n"
"00:01:00.000 --> 01:00:00.000\n"
"subtitle\n";
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
StreamData stream_data;
{
testing::InSequence s;
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)))
.WillOnce(SaveArgPointee<0>(&stream_data));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
"subtitle")));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
}
ASSERT_OK(parser_->Run());
EXPECT_EQ(ToString(stream_data.stream_info->codec_config()),
"STYLE\n"
"::cue { color:lime }\n"
"\n"
"REGION\n"
"id:scroll\n"
"scrol:up");
}
TEST_F(WebVttParserTest, ParseOneEmptyCue) { TEST_F(WebVttParserTest, ParseOneEmptyCue) {
const char* text = const char* text =
"WEBVTT\n" "WEBVTT\n"

View File

@ -18,14 +18,17 @@ namespace shaka {
namespace media { namespace media {
namespace { namespace {
double kMillisecondsToSeconds = 1000.0; double kMillisecondsToSeconds = 1000.0;
std::string ToString(const std::vector<uint8_t>& v) {
return std::string(v.begin(), v.end());
}
} // namespace } // namespace
WebVttTextOutputHandler::WebVttTextOutputHandler( WebVttTextOutputHandler::WebVttTextOutputHandler(
const MuxerOptions& muxer_options, const MuxerOptions& muxer_options,
std::unique_ptr<MuxerListener> muxer_listener) std::unique_ptr<MuxerListener> muxer_listener)
: muxer_options_(muxer_options), : muxer_options_(muxer_options),
muxer_listener_(std::move(muxer_listener)), muxer_listener_(std::move(muxer_listener)) {}
buffer_(muxer_options.transport_stream_timestamp_offset_ms) {}
Status WebVttTextOutputHandler::InitializeInternal() { Status WebVttTextOutputHandler::InitializeInternal() {
return Status::OK; return Status::OK;
@ -50,7 +53,13 @@ Status WebVttTextOutputHandler::Process(
} }
Status WebVttTextOutputHandler::OnFlushRequest(size_t input_stream_index) { Status WebVttTextOutputHandler::OnFlushRequest(size_t input_stream_index) {
DCHECK_EQ(buffer_.sample_count(), 0u) if (!buffer_) {
LOG(INFO) << "Skip stream '" << muxer_options_.segment_template
<< "' which does not contain any sample.";
return Status::OK;
}
DCHECK_EQ(buffer_->sample_count(), 0u)
<< "There should have been a segment info before flushing that would " << "There should have been a segment info before flushing that would "
"have cleared out all the samples."; "have cleared out all the samples.";
@ -64,6 +73,9 @@ Status WebVttTextOutputHandler::OnFlushRequest(size_t input_stream_index) {
} }
Status WebVttTextOutputHandler::OnStreamInfo(const StreamInfo& info) { Status WebVttTextOutputHandler::OnStreamInfo(const StreamInfo& info) {
buffer_.reset(
new WebVttFileBuffer(muxer_options_.transport_stream_timestamp_offset_ms,
ToString(info.codec_config())));
muxer_listener_->OnMediaStart(muxer_options_, info, info.time_scale(), muxer_listener_->OnMediaStart(muxer_options_, info, info.time_scale(),
MuxerListener::kContainerText); MuxerListener::kContainerText);
return Status::OK; return Status::OK;
@ -89,8 +101,8 @@ Status WebVttTextOutputHandler::OnSegmentInfo(const SegmentInfo& info) {
return Status(error::FILE_FAILURE, "Failed to open " + filename); return Status(error::FILE_FAILURE, "Failed to open " + filename);
} }
buffer_.WriteTo(file.get()); buffer_->WriteTo(file.get());
buffer_.Reset(); buffer_->Reset();
if (!file.release()->Close()) { if (!file.release()->Close()) {
return Status(error::FILE_FAILURE, "Failed to close " + filename); return Status(error::FILE_FAILURE, "Failed to close " + filename);
@ -115,7 +127,7 @@ void WebVttTextOutputHandler::OnTextSample(const TextSample& sample) {
// Skip empty samples. It is normal to see empty samples as earlier in the // Skip empty samples. It is normal to see empty samples as earlier in the
// pipeline we pad the stream to remove gaps. // pipeline we pad the stream to remove gaps.
if (sample.payload().size()) { if (sample.payload().size()) {
buffer_.Append(sample); buffer_->Append(sample);
} }
} }
} // namespace media } // namespace media

View File

@ -49,7 +49,7 @@ class WebVttTextOutputHandler : public MediaHandler {
uint64_t total_duration_ms_ = 0; uint64_t total_duration_ms_ = 0;
uint32_t segment_index_ = 0; uint32_t segment_index_ = 0;
WebVttFileBuffer buffer_; std::unique_ptr<WebVttFileBuffer> buffer_;
}; };
} // namespace media } // namespace media

View File

@ -1,5 +1,8 @@
WEBVTT WEBVTT
STYLE
::cue { color:lime }
00:00:00.000 --> 00:00:00.800 00:00:00.000 --> 00:00:00.800
Yup, that's a bear, eh. Yup, that's a bear, eh.