WebVtt To MP4 Handler

Implemented a MediaHandler that takes text samples and creates
media samples. The data in each media sample is the MP4 box for
non-overlapping cues.

As per WebVtt in Mp4, all cues must be non-overlapping. This handler
takes care of grouping and dividing cues.

Bug: 36138902

Change-Id: I0c1d27964180c14a22cb200591f70e46e04a651f
This commit is contained in:
Aaron Vaage 2017-05-30 17:24:40 -07:00
parent fcc334d652
commit ab19082c20
4 changed files with 591 additions and 0 deletions

View File

@ -27,6 +27,8 @@
'webvtt_segmenter.h',
'webvtt_timestamp.cc',
'webvtt_timestamp.h',
'webvtt_to_mp4_handler.cc',
'webvtt_to_mp4_handler.h',
],
'dependencies': [
'../../../base/base.gyp:base',
@ -45,6 +47,8 @@
'webvtt_sample_converter_unittest.cc',
'webvtt_segmenter_unittest.cc',
'webvtt_timestamp_unittest.cc',
'webvtt_timestamp_unittest.cc',
'webvtt_to_mp4_handler_unittest.cc',
],
'dependencies': [
'../../../testing/gmock.gyp:gmock',

View File

@ -0,0 +1,178 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
#include <algorithm>
#include "packager/media/base/buffer_writer.h"
#include "packager/media/formats/mp4/box_buffer.h"
#include "packager/media/formats/mp4/box_definitions.h"
namespace shaka {
namespace media {
class DisplayAction {
public:
DisplayAction(uint64_t id, uint64_t time) : id_(id), time_(time) {}
uint64_t id() const { return id_; }
uint64_t time() const { return time_; }
virtual void ActOn(std::list<const TextSample*>* display) const = 0;
private:
uint64_t id_;
uint64_t time_;
};
namespace {
const uint64_t kTrackId = 0;
class AddToDisplayAction : public DisplayAction {
public:
explicit AddToDisplayAction(uint64_t id,
std::shared_ptr<const TextSample>& sample)
: DisplayAction(id, sample->start_time()), sample_(sample) {}
void ActOn(std::list<const TextSample*>* display) const override {
display->push_back(sample_.get());
}
private:
std::shared_ptr<const TextSample> sample_;
};
class RemoveFromDisplayAction : public DisplayAction {
public:
explicit RemoveFromDisplayAction(uint64_t id,
std::shared_ptr<const TextSample>& sample)
: DisplayAction(id, sample->EndTime()), sample_(sample) {}
void ActOn(std::list<const TextSample*>* display) const override {
display->remove(sample_.get());
}
private:
std::shared_ptr<const TextSample> sample_;
};
} // namespace
bool DisplayActionCompare::operator()(
const std::shared_ptr<DisplayAction>& left,
const std::shared_ptr<DisplayAction>& right) const {
return left->time() == right->time() ? left->id() > right->id()
: left->time() > right->time();
}
Status WebVttToMp4Handler::InitializeInternal() {
return Status::OK;
}
Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
if (StreamDataType::kStreamInfo == stream_data->stream_data_type) {
return DispatchStreamInfo(kTrackId, std::move(stream_data->stream_info));
}
if (stream_data->stream_data_type == StreamDataType::kTextSample) {
std::shared_ptr<const TextSample> sample = stream_data->text_sample;
std::shared_ptr<DisplayAction> add(
new AddToDisplayAction(NextActionId(), sample));
std::shared_ptr<DisplayAction> remove(
new RemoveFromDisplayAction(NextActionId(), sample));
actions_.push(add);
actions_.push(remove);
ProcessUpToTime(add->time());
return Status::OK;
}
return Status(error::INTERNAL_ERROR,
"Invalid stream data type for this handler");
}
Status WebVttToMp4Handler::OnFlushRequest(size_t input_stream_index) {
const uint64_t kEndOfTime = std::numeric_limits<uint64_t>::max();
ProcessUpToTime(kEndOfTime);
return FlushDownstream(0);
}
void WebVttToMp4Handler::WriteCue(const std::string& id,
const std::string& settings,
const std::string& payload,
BufferWriter* out) {
mp4::VTTCueBox box;
if (id.length()) {
box.cue_id.cue_id = id;
}
if (settings.length()) {
box.cue_settings.settings = settings;
}
if (payload.length()) {
box.cue_payload.cue_text = payload;
}
// If there is internal timing, i.e. WebVTT cue timestamp, then
// cue_current_time should be populated
// "which gives the VTT timestamp associated with the start time of sample."
// TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
// internal timestamp in the payload to set CueTimeBox.cue_current_time.
box.Write(out);
}
void WebVttToMp4Handler::ProcessUpToTime(uint64_t cutoff_time) {
// We can only process as far as the last add as no new events will be
// added that come before that time.
while (actions_.size() && actions_.top()->time() < cutoff_time) {
// STAGE 1: Write out the current state
// Get the time range for which the current active state is valid.
const uint64_t previous_change = next_change_;
next_change_ = actions_.top()->time();
// The only time that |previous_change| and |next_change_| should ever break
// the rule |next_change_ > previous_change| is at the start where
// |previous_change| and |next_change_| are both zero.
DCHECK((previous_change == 0 && next_change_ == 0) ||
next_change_ > previous_change);
// Send out the active group. If there is nothing in the active group, then
// this segment is ignored.
if (active_.size()) {
MergeAndSendSamples(active_, previous_change, next_change_);
}
// STAGE 2: Move to the next state.
while (actions_.size() && actions_.top()->time() == next_change_) {
actions_.top()->ActOn(&active_);
actions_.pop();
}
}
}
Status WebVttToMp4Handler::MergeAndSendSamples(
const std::list<const TextSample*>& samples,
uint64_t start_time,
uint64_t end_time) {
DCHECK_GT(end_time, start_time);
box_writer_.Clear();
for (const TextSample* sample : samples) {
DCHECK_LE(sample->start_time(), start_time);
DCHECK_GE(sample->EndTime(), end_time);
WriteCue(sample->id(), sample->settings(), sample->payload(), &box_writer_);
}
std::shared_ptr<MediaSample> sample =
MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true);
sample->set_pts(start_time);
sample->set_dts(start_time);
sample->set_duration(end_time - start_time);
return DispatchMediaSample(kTrackId, std::move(sample));
}
uint64_t WebVttToMp4Handler::NextActionId() {
return next_id_++;
}
} // namespace media
} // namespace shaka

View File

@ -0,0 +1,99 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_
#define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_
#include <stdint.h>
#include <list>
#include <queue>
#include "packager/media/base/buffer_writer.h"
#include "packager/media/base/media_handler.h"
namespace shaka {
namespace media {
class DisplayAction;
class DisplayActionCompare {
public:
bool operator()(const std::shared_ptr<DisplayAction>& left,
const std::shared_ptr<DisplayAction>& right) const;
};
// Take text samples, convert them to Mp4 boxes, and send them down stream.
// Virtual methods should only be overridden for testing only.
class WebVttToMp4Handler : public MediaHandler {
public:
WebVttToMp4Handler() = default;
protected:
// |Process| and |OnFlushRequest| need to be protected so that it can be
// called for testing.
Status Process(std::unique_ptr<StreamData> stream_data) override;
Status OnFlushRequest(size_t input_stream_index) override;
// This is made protected-virtual so that we can override it for testing.
virtual void WriteCue(const std::string& id,
const std::string& settings,
const std::string& payload,
BufferWriter* out);
private:
WebVttToMp4Handler(const WebVttToMp4Handler&) = delete;
WebVttToMp4Handler& operator=(const WebVttToMp4Handler&) = delete;
Status InitializeInternal() override;
// Merge and send all samples in the queue downstream while the head of the
// queue's time is less than |cutoff|. |cutoff| is needed as we can only
// merge and send samples when we are sure no new samples will appear before
// the next action.
void ProcessUpToTime(uint64_t cutoff_time);
// Merge together all TextSamples in |samples| into a single MP4 box and
// pass the box downstream.
Status MergeAndSendSamples(const std::list<const TextSample*>& samples,
uint64_t start_time,
uint64_t end_time);
// Take a Mp4 box as a byte buffer and send it downstream.
Status WriteSample(uint64_t start,
uint64_t end,
const uint8_t* sample,
size_t sample_length);
// Get a new id for the next action.
uint64_t NextActionId();
uint64_t next_change_ = 0;
// This is the current state of the box we are writing.
BufferWriter box_writer_;
// |actions_| is a time sorted list of actions that affect the timeline (e.g.
// adding or removing a cue). |active_| is the list of all cues that are
// currently on screen.
// When the cue is to be on screen, it is added to |active_|. When it is time
// for the cue to come off screen, it is removed from |active_|.
// As |actions_| has a shared pointer to the cue, |active_| can use normal
// pointers as the pointer will be valid and it makes the |remove| call
// easier.
std::priority_queue<std::shared_ptr<DisplayAction>,
std::vector<std::shared_ptr<DisplayAction>>,
DisplayActionCompare>
actions_;
std::list<const TextSample*> active_;
uint64_t next_id_ = 0;
};
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_

View File

@ -0,0 +1,310 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "packager/media/base/media_handler_test_base.h"
#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
#include "packager/status_test_util.h"
namespace shaka {
namespace media {
namespace {
const size_t kStreamIndex = 0;
const bool kEncrypted = true;
const size_t kInputCount = 1;
const size_t kOutputCount = 1;
const size_t kInputIndex = 0;
const size_t kOutputIndex = 0;
const char* kId[] = {"cue 1 id", "cue 2 id", "cue 3 id"};
const char* kPayload[] = {"cue 1 payload", "cue 2 payload", "cue 3 payload"};
const char* kNoSettings = "";
// These all refer to the samples. To make them easier to use in their
// correct context, they have purposely short names.
const size_t kA = 0;
const size_t kB = 1;
const size_t kC = 2;
} // namespace
class TestableWebVttToMp4Handler : public WebVttToMp4Handler {
public:
MOCK_METHOD3(OnWriteCue,
void(const std::string& id,
const std::string& settings,
const std::string& payload));
protected:
void WriteCue(const std::string& id,
const std::string& settings,
const std::string& payload,
BufferWriter* out) {
OnWriteCue(id, settings, payload);
// We need to write something out or else media sample will think it is the
// end of the stream.
out->AppendInt(0);
}
};
class WebVttToMp4HandlerTest : public MediaHandlerTestBase {
protected:
void SetUp() {
mp4_handler_ = std::make_shared<TestableWebVttToMp4Handler>();
ASSERT_OK(SetUpAndInitializeGraph(mp4_handler_, kInputCount, kOutputCount));
}
std::shared_ptr<TestableWebVttToMp4Handler> mp4_handler_;
};
// Verify the cues are grouped correctly when the cues do not overlap at all.
//
// [----A---] [---B---]
TEST_F(WebVttToMp4HandlerTest, NoOverlap) {
const int64_t kStart[] = {0, 1100};
const int64_t kEnd[] = {1000, 2100};
{
testing::InSequence s;
for (size_t i = kA; i <= kB; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[i],
kEnd[i] - kStart[i], !kEncrypted)));
}
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kB; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
// Verify the cues are grouped correctly when one cue overlaps another cue at
// one end.
//
// [-------A-------]
// [-------B------]
TEST_F(WebVttToMp4HandlerTest, Overlap) {
const int64_t kStart[] = {0, 500};
const int64_t kEnd[] = {1000, 1500};
{
testing::InSequence s;
// Sample A
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
kStart[kB] - kStart[kA], !kEncrypted)));
// Sample A and B
for (size_t i = kA; i <= kB; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
kEnd[kA] - kStart[kB], !kEncrypted)));
// Sample B
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kB], kNoSettings, kPayload[kB]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kEnd[kA],
kEnd[kB] - kEnd[kA], !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kB; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
// Verify the cues are grouped correctly when one cue starts before and ends
// after another cue.
//
// [-------------A-------------]
// [----------B----------]
TEST_F(WebVttToMp4HandlerTest, Contains) {
const int64_t kStart[] = {0, 100};
const int64_t kEnd[] = {1000, 900};
{
testing::InSequence s;
// Sample A
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
kStart[kB] - kStart[kA], !kEncrypted)));
// Sample A and B
for (size_t i = kA; i <= kB; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
kEnd[kB] - kStart[kB], !kEncrypted)));
// Sample A
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kEnd[kB],
kEnd[kA] - kEnd[kB], !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kB; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
// Verify that when two cues are completely on top of each other, that there
// is no extra boxes sent out.
//
// [----------A----------]
// [----------B----------]
TEST_F(WebVttToMp4HandlerTest, ExactOverlap) {
const int64_t kStart = 0;
const int64_t kDuration = 1000;
const int64_t kEnd = kStart + kDuration;
{
testing::InSequence s;
// Sample A and B
for (size_t i = kA; i <= kB; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart, kDuration, !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kB; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart, kEnd, kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
// Verify that when two cues are completely on top of each other, that there
// is no extra boxes sent out.
//
// [----A----]
// [--------B--------]
// [------------C------------]
TEST_F(WebVttToMp4HandlerTest, OverlapStartWithStaggerEnd) {
const int64_t kStart = 0;
const int64_t kEnd[] = {1000, 2000, 3000};
{
testing::InSequence s;
// Sample A, B, and C
for (size_t i = kA; i <= kC; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(
*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart, kEnd[kA], !kEncrypted)));
// Sample B and C
for (size_t i = kB; i <= kC; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kEnd[kA],
kEnd[kB] - kEnd[kA], !kEncrypted)));
// Sample C
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kC], kNoSettings, kPayload[kC]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kEnd[kB],
kEnd[kC] - kEnd[kB], !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kC; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart, kEnd[i], kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
// Verify that when two cues are completely on top of each other, that there
// is no extra boxes sent out.
//
// [------------A------------]
// [--------B--------]
// [----C----]
TEST_F(WebVttToMp4HandlerTest, StaggerStartWithOverlapEnd) {
const int64_t kStart[] = {0, 100, 200};
const int64_t kEnd = 1000;
{
testing::InSequence s;
// Sample A
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
kStart[kB] - kStart[kA], !kEncrypted)));
// Sample A and B
for (size_t i = kA; i <= kB; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
kStart[kC] - kStart[kB], !kEncrypted)));
// Sample A, B, and C
for (size_t i = kA; i <= kC; i++) {
EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
}
EXPECT_CALL(*Output(kOutputIndex),
OnProcess(IsMediaSample(kStreamIndex, kStart[kC],
kEnd - kStart[kC], !kEncrypted)));
EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
}
for (size_t i = kA; i <= kC; i++) {
ASSERT_OK(Input(kInputIndex)
->Dispatch(StreamData::FromTextSample(
kStreamIndex,
GetTextSample(kId[i], kStart[i], kEnd, kPayload[i]))));
}
ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
}
} // namespace media
} // namespace shaka