Support STYLE and REGION in WebVTT
Note that STYLE and REGION are not supported in mp4 container due to spec limitation as 14496-30:2014 does not specify a way to signal styles/regions inside mp4. Closes #344. Change-Id: I05c14df916f7b2c7ca4364ee9407e0eda4dc7a3f
This commit is contained in:
parent
715ed939f1
commit
f49b89280c
|
@ -237,8 +237,8 @@ $ ninja -C out/Release
|
|||
Use `apk` command to install dependencies:
|
||||
|
||||
```shell
|
||||
$apk add --no-cache bash build-base curl findutils git ninja python \
|
||||
bsd-compat-headers linux-headers libexecinfo-dev
|
||||
$ apk add --no-cache bash build-base curl findutils git ninja python \
|
||||
bsd-compat-headers linux-headers libexecinfo-dev
|
||||
```
|
||||
|
||||
Alpine uses musl which does not have mallinfo defined in malloc.h. It is
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:00.000 --> 00:00:00.800
|
||||
Yup, that's a bear, eh.
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
WEBVTT
|
||||
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:00.000 --> 00:00:00.800
|
||||
Yup, that's a bear, eh.
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
WEBVTT
|
||||
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
WEBVTT
|
||||
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
WEBVTT
|
||||
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
WEBVTT
|
||||
X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:9000
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:01.000 --> 00:00:04.700
|
||||
He 's... um... doing bear-like stuff.
|
||||
|
||||
|
|
Binary file not shown.
|
@ -538,8 +538,19 @@ bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
|
|||
// Handle WebVTT.
|
||||
TextSampleEntry webvtt;
|
||||
webvtt.format = FOURCC_wvtt;
|
||||
webvtt.config.config.assign(text_info->codec_config().begin(),
|
||||
text_info->codec_config().end());
|
||||
|
||||
// 14496-30:2014 7.5 Web Video Text Tracks Sample entry format.
|
||||
// In the sample entry, a WebVTT configuration box must occur, carrying
|
||||
// exactly the lines of the WebVTT file header, i.e. all text lines up to
|
||||
// but excluding the 'two or more line terminators' that end the header.
|
||||
webvtt.config.config = "WEBVTT";
|
||||
// The spec does not define a way to carry STYLE and REGION information in
|
||||
// the mp4 container.
|
||||
if (!text_info->codec_config().empty()) {
|
||||
LOG(INFO) << "Skipping possible style / region configuration as the spec "
|
||||
"does not define a way to carry them inside ISO-BMFF files.";
|
||||
}
|
||||
|
||||
// TODO(rkuroiwa): This should be the source file URI(s). Putting bogus
|
||||
// string for now so that the box will be there for samples with overlapping
|
||||
// cues.
|
||||
|
|
|
@ -18,9 +18,11 @@ const int kTsTimescale = 90000;
|
|||
}
|
||||
|
||||
WebVttFileBuffer::WebVttFileBuffer(
|
||||
uint32_t transport_stream_timestamp_offset_ms)
|
||||
uint32_t transport_stream_timestamp_offset_ms,
|
||||
const std::string& style_region_config)
|
||||
: transport_stream_timestamp_offset_(transport_stream_timestamp_offset_ms *
|
||||
kTsTimescale / 1000) {
|
||||
kTsTimescale / 1000),
|
||||
style_region_config_(style_region_config) {
|
||||
// Make sure we start with the same state that we would end up with if
|
||||
// the caller reset our state.
|
||||
Reset();
|
||||
|
@ -38,6 +40,10 @@ void WebVttFileBuffer::Reset() {
|
|||
transport_stream_timestamp_offset_);
|
||||
}
|
||||
buffer_.append("\n"); // end of header.
|
||||
if (!style_region_config_.empty()) {
|
||||
buffer_.append(style_region_config_);
|
||||
buffer_.append("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
void WebVttFileBuffer::Append(const TextSample& sample) {
|
||||
|
|
|
@ -20,7 +20,8 @@ class TextSample;
|
|||
// all the formatting requirements for a webvtt file.
|
||||
class WebVttFileBuffer {
|
||||
public:
|
||||
explicit WebVttFileBuffer(uint32_t transport_stream_timestamp_offset_ms);
|
||||
WebVttFileBuffer(uint32_t transport_stream_timestamp_offset_ms,
|
||||
const std::string& style_region_config);
|
||||
virtual ~WebVttFileBuffer() = default;
|
||||
|
||||
void Reset();
|
||||
|
@ -36,6 +37,7 @@ class WebVttFileBuffer {
|
|||
WebVttFileBuffer& operator=(const WebVttFileBuffer&) = delete;
|
||||
|
||||
const uint32_t transport_stream_timestamp_offset_ = 0;
|
||||
const std::string style_region_config_;
|
||||
std::string buffer_;
|
||||
size_t sample_count_ = 0;
|
||||
};
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "packager/base/strings/string_util.h"
|
||||
#include "packager/media/base/text_stream_info.h"
|
||||
#include "packager/media/formats/webvtt/webvtt_timestamp.h"
|
||||
#include "packager/status_macros.h"
|
||||
|
||||
namespace shaka {
|
||||
namespace media {
|
||||
|
@ -75,6 +76,13 @@ bool IsLikelyStyle(const std::string& line) {
|
|||
bool IsLikelyRegion(const std::string& line) {
|
||||
return base::TrimWhitespaceASCII(line, base::TRIM_TRAILING) == "REGION";
|
||||
}
|
||||
|
||||
void UpdateConfig(const std::vector<std::string>& block, std::string* config) {
|
||||
if (!config->empty())
|
||||
*config += "\n\n";
|
||||
*config += base::JoinString(block, "\n");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
WebVttParser::WebVttParser(std::unique_ptr<FileReader> source,
|
||||
|
@ -121,14 +129,6 @@ bool WebVttParser::Parse() {
|
|||
return false;
|
||||
}
|
||||
|
||||
const Status send_stream_info_result = DispatchTextStreamInfo();
|
||||
|
||||
if (send_stream_info_result != Status::OK) {
|
||||
LOG(ERROR) << "Failed to send stream info down stream:"
|
||||
<< send_stream_info_result.error_message();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool saw_cue = false;
|
||||
|
||||
while (reader_.Next(&block) && keep_reading_) {
|
||||
|
@ -141,11 +141,10 @@ bool WebVttParser::Parse() {
|
|||
// STYLE
|
||||
if (IsLikelyStyle(block[0])) {
|
||||
if (saw_cue) {
|
||||
LOG(ERROR)
|
||||
LOG(WARNING)
|
||||
<< "Found style block after seeing cue. Ignoring style block";
|
||||
} else {
|
||||
LOG(WARNING) << "Missing support for style blocks. Skipping block:\n"
|
||||
<< BlockToString(block.data(), block.size());
|
||||
UpdateConfig(block, &style_region_config_);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -153,11 +152,10 @@ bool WebVttParser::Parse() {
|
|||
// REGION
|
||||
if (IsLikelyRegion(block[0])) {
|
||||
if (saw_cue) {
|
||||
LOG(ERROR)
|
||||
LOG(WARNING)
|
||||
<< "Found region block after seeing cue. Ignoring region block";
|
||||
} else {
|
||||
LOG(WARNING) << "Missing support for region blocks. Skipping block:\n"
|
||||
<< BlockToString(block.data(), block.size());
|
||||
UpdateConfig(block, &style_region_config_);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -223,6 +221,9 @@ Status WebVttParser::ParseCue(const std::string& id,
|
|||
"Could not parse start time, -->, and end time from " + block[0]);
|
||||
}
|
||||
|
||||
if (!stream_info_dispatched_)
|
||||
RETURN_IF_ERROR(DispatchTextStreamInfo());
|
||||
|
||||
// According to the WebVTT spec end time must be greater than the start time
|
||||
// of the cue. Since we are seeing content with invalid times in the field, we
|
||||
// are going to drop the cue instead of failing to package.
|
||||
|
@ -261,6 +262,8 @@ Status WebVttParser::ParseCue(const std::string& id,
|
|||
}
|
||||
|
||||
Status WebVttParser::DispatchTextStreamInfo() {
|
||||
stream_info_dispatched_ = true;
|
||||
|
||||
const int kTrackId = 0;
|
||||
// The resolution of timings are in milliseconds.
|
||||
const int kTimescale = 1000;
|
||||
|
@ -269,13 +272,12 @@ Status WebVttParser::DispatchTextStreamInfo() {
|
|||
// work nicely with the current demuxer.
|
||||
const int kDuration = 0;
|
||||
const char kWebVttCodecString[] = "wvtt";
|
||||
const char kCodecConfig[] = "";
|
||||
const int64_t kNoWidth = 0;
|
||||
const int64_t kNoHeight = 0;
|
||||
|
||||
std::shared_ptr<StreamInfo> info = std::make_shared<TextStreamInfo>(
|
||||
kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
|
||||
kCodecConfig, kNoWidth, kNoHeight, language_);
|
||||
style_region_config_, kNoWidth, kNoHeight, language_);
|
||||
|
||||
return DispatchStreamInfo(kStreamIndex, std::move(info));
|
||||
}
|
||||
|
|
|
@ -43,6 +43,8 @@ class WebVttParser : public OriginHandler {
|
|||
|
||||
BlockReader reader_;
|
||||
std::string language_;
|
||||
std::string style_region_config_;
|
||||
bool stream_info_dispatched_ = false;
|
||||
bool keep_reading_ = true;
|
||||
};
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "packager/status_test_util.h"
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::SaveArgPointee;
|
||||
|
||||
namespace shaka {
|
||||
namespace media {
|
||||
|
@ -28,6 +29,10 @@ const bool kEncrypted = true;
|
|||
|
||||
const char* kNoId = "";
|
||||
const char* kNoSettings = "";
|
||||
|
||||
std::string ToString(const std::vector<uint8_t>& v) {
|
||||
return std::string(v.begin(), v.end());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class WebVttParserTest : public MediaHandlerTestBase {
|
||||
|
@ -71,8 +76,7 @@ TEST_F(WebVttParserTest, ParseOnlyHeader) {
|
|||
|
||||
{
|
||||
testing::InSequence s;
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, kLanguage)));
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
|
||||
}
|
||||
|
||||
|
@ -88,8 +92,7 @@ TEST_F(WebVttParserTest, ParseHeaderWithBOM) {
|
|||
|
||||
{
|
||||
testing::InSequence s;
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnProcess(_)).Times(0);
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
|
||||
}
|
||||
|
||||
|
@ -123,57 +126,7 @@ TEST_F(WebVttParserTest, FailToParseHeaderNotOneLine) {
|
|||
ASSERT_NE(Status::OK, parser_->Run());
|
||||
}
|
||||
|
||||
// Right now we don't support region blocks, but for now make sure that we don't
|
||||
// die if we see a region block.
|
||||
TEST_F(WebVttParserTest, ParserDoesNotDieOnRegionBlock) {
|
||||
const char* text =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"REGION\n"
|
||||
"id:fred\n"
|
||||
"width:40%\n"
|
||||
"lines:3\n"
|
||||
"regionanchor:0%,100%\n"
|
||||
"viewportanchor:10%,90%\n"
|
||||
"scroll:up";
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
|
||||
|
||||
{
|
||||
testing::InSequence s;
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
|
||||
}
|
||||
|
||||
ASSERT_OK(parser_->Run());
|
||||
}
|
||||
|
||||
// Right now we don't support style blocks, but for now make sure that we don't
|
||||
// die if we see a style block.
|
||||
TEST_F(WebVttParserTest, ParserDoesNotDieOnStyleBlock) {
|
||||
const char* text =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"STYLE\n"
|
||||
"::cue {\n"
|
||||
" background-image: linear-gradient(to bottom, dimgray, lightgray);\n"
|
||||
" color: papayawhip;\n"
|
||||
"}";
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
|
||||
|
||||
{
|
||||
testing::InSequence s;
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)));
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
|
||||
}
|
||||
|
||||
ASSERT_OK(parser_->Run());
|
||||
}
|
||||
|
||||
TEST_F(WebVttParserTest, IngoresZeroDurationCues) {
|
||||
TEST_F(WebVttParserTest, IgnoresZeroDurationCues) {
|
||||
const char* text =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
|
@ -214,6 +167,44 @@ TEST_F(WebVttParserTest, ParseOneCue) {
|
|||
ASSERT_OK(parser_->Run());
|
||||
}
|
||||
|
||||
TEST_F(WebVttParserTest, ParseOneCueWithStyleAndRegion) {
|
||||
const char* text =
|
||||
"WEBVTT\n"
|
||||
"\n"
|
||||
"STYLE\n"
|
||||
"::cue { color:lime }\n"
|
||||
"\n"
|
||||
"REGION\n"
|
||||
"id:scroll\n"
|
||||
"scrol:up\n"
|
||||
"\n"
|
||||
"00:01:00.000 --> 01:00:00.000\n"
|
||||
"subtitle\n";
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(SetUpAndInitializeGraph(text));
|
||||
|
||||
StreamData stream_data;
|
||||
{
|
||||
testing::InSequence s;
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsStreamInfo(_, kTimeScale, !kEncrypted, _)))
|
||||
.WillOnce(SaveArgPointee<0>(&stream_data));
|
||||
EXPECT_CALL(*Output(kOutputIndex),
|
||||
OnProcess(IsTextSample(_, kNoId, 60000u, 3600000u, kNoSettings,
|
||||
"subtitle")));
|
||||
EXPECT_CALL(*Output(kOutputIndex), OnFlush(_));
|
||||
}
|
||||
|
||||
ASSERT_OK(parser_->Run());
|
||||
EXPECT_EQ(ToString(stream_data.stream_info->codec_config()),
|
||||
"STYLE\n"
|
||||
"::cue { color:lime }\n"
|
||||
"\n"
|
||||
"REGION\n"
|
||||
"id:scroll\n"
|
||||
"scrol:up");
|
||||
}
|
||||
|
||||
TEST_F(WebVttParserTest, ParseOneEmptyCue) {
|
||||
const char* text =
|
||||
"WEBVTT\n"
|
||||
|
|
|
@ -18,14 +18,17 @@ namespace shaka {
|
|||
namespace media {
|
||||
namespace {
|
||||
double kMillisecondsToSeconds = 1000.0;
|
||||
|
||||
std::string ToString(const std::vector<uint8_t>& v) {
|
||||
return std::string(v.begin(), v.end());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
WebVttTextOutputHandler::WebVttTextOutputHandler(
|
||||
const MuxerOptions& muxer_options,
|
||||
std::unique_ptr<MuxerListener> muxer_listener)
|
||||
: muxer_options_(muxer_options),
|
||||
muxer_listener_(std::move(muxer_listener)),
|
||||
buffer_(muxer_options.transport_stream_timestamp_offset_ms) {}
|
||||
muxer_listener_(std::move(muxer_listener)) {}
|
||||
|
||||
Status WebVttTextOutputHandler::InitializeInternal() {
|
||||
return Status::OK;
|
||||
|
@ -50,7 +53,13 @@ Status WebVttTextOutputHandler::Process(
|
|||
}
|
||||
|
||||
Status WebVttTextOutputHandler::OnFlushRequest(size_t input_stream_index) {
|
||||
DCHECK_EQ(buffer_.sample_count(), 0u)
|
||||
if (!buffer_) {
|
||||
LOG(INFO) << "Skip stream '" << muxer_options_.segment_template
|
||||
<< "' which does not contain any sample.";
|
||||
return Status::OK;
|
||||
}
|
||||
|
||||
DCHECK_EQ(buffer_->sample_count(), 0u)
|
||||
<< "There should have been a segment info before flushing that would "
|
||||
"have cleared out all the samples.";
|
||||
|
||||
|
@ -64,6 +73,9 @@ Status WebVttTextOutputHandler::OnFlushRequest(size_t input_stream_index) {
|
|||
}
|
||||
|
||||
Status WebVttTextOutputHandler::OnStreamInfo(const StreamInfo& info) {
|
||||
buffer_.reset(
|
||||
new WebVttFileBuffer(muxer_options_.transport_stream_timestamp_offset_ms,
|
||||
ToString(info.codec_config())));
|
||||
muxer_listener_->OnMediaStart(muxer_options_, info, info.time_scale(),
|
||||
MuxerListener::kContainerText);
|
||||
return Status::OK;
|
||||
|
@ -89,8 +101,8 @@ Status WebVttTextOutputHandler::OnSegmentInfo(const SegmentInfo& info) {
|
|||
return Status(error::FILE_FAILURE, "Failed to open " + filename);
|
||||
}
|
||||
|
||||
buffer_.WriteTo(file.get());
|
||||
buffer_.Reset();
|
||||
buffer_->WriteTo(file.get());
|
||||
buffer_->Reset();
|
||||
|
||||
if (!file.release()->Close()) {
|
||||
return Status(error::FILE_FAILURE, "Failed to close " + filename);
|
||||
|
@ -115,7 +127,7 @@ void WebVttTextOutputHandler::OnTextSample(const TextSample& sample) {
|
|||
// Skip empty samples. It is normal to see empty samples as earlier in the
|
||||
// pipeline we pad the stream to remove gaps.
|
||||
if (sample.payload().size()) {
|
||||
buffer_.Append(sample);
|
||||
buffer_->Append(sample);
|
||||
}
|
||||
}
|
||||
} // namespace media
|
||||
|
|
|
@ -49,7 +49,7 @@ class WebVttTextOutputHandler : public MediaHandler {
|
|||
uint64_t total_duration_ms_ = 0;
|
||||
uint32_t segment_index_ = 0;
|
||||
|
||||
WebVttFileBuffer buffer_;
|
||||
std::unique_ptr<WebVttFileBuffer> buffer_;
|
||||
};
|
||||
|
||||
} // namespace media
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
WEBVTT
|
||||
|
||||
STYLE
|
||||
::cue { color:lime }
|
||||
|
||||
00:00:00.000 --> 00:00:00.800
|
||||
Yup, that's a bear, eh.
|
||||
|
||||
|
|
Loading…
Reference in New Issue