From ab19082c2049fef49c76ab005c2ab18bfe29acc3 Mon Sep 17 00:00:00 2001 From: Aaron Vaage Date: Tue, 30 May 2017 17:24:40 -0700 Subject: [PATCH] WebVtt To MP4 Handler Implemented a MediaHandler that takes text samples and creates media samples. The data in each media sample is the MP4 box for non-overlapping cues. As per WebVtt in Mp4, all cues must be non-overlapping. This handler takes care of grouping and dividing cues. Bug: 36138902 Change-Id: I0c1d27964180c14a22cb200591f70e46e04a651f --- packager/media/formats/webvtt/webvtt.gyp | 4 + .../formats/webvtt/webvtt_to_mp4_handler.cc | 178 ++++++++++ .../formats/webvtt/webvtt_to_mp4_handler.h | 99 ++++++ .../webvtt/webvtt_to_mp4_handler_unittest.cc | 310 ++++++++++++++++++ 4 files changed, 591 insertions(+) create mode 100644 packager/media/formats/webvtt/webvtt_to_mp4_handler.cc create mode 100644 packager/media/formats/webvtt/webvtt_to_mp4_handler.h create mode 100644 packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc diff --git a/packager/media/formats/webvtt/webvtt.gyp b/packager/media/formats/webvtt/webvtt.gyp index 4d53cc06c4..9d479ed915 100644 --- a/packager/media/formats/webvtt/webvtt.gyp +++ b/packager/media/formats/webvtt/webvtt.gyp @@ -27,6 +27,8 @@ 'webvtt_segmenter.h', 'webvtt_timestamp.cc', 'webvtt_timestamp.h', + 'webvtt_to_mp4_handler.cc', + 'webvtt_to_mp4_handler.h', ], 'dependencies': [ '../../../base/base.gyp:base', @@ -45,6 +47,8 @@ 'webvtt_sample_converter_unittest.cc', 'webvtt_segmenter_unittest.cc', 'webvtt_timestamp_unittest.cc', + 'webvtt_timestamp_unittest.cc', + 'webvtt_to_mp4_handler_unittest.cc', ], 'dependencies': [ '../../../testing/gmock.gyp:gmock', diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc new file mode 100644 index 0000000000..5366e7bd10 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc @@ -0,0 +1,178 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" + +#include + +#include "packager/media/base/buffer_writer.h" +#include "packager/media/formats/mp4/box_buffer.h" +#include "packager/media/formats/mp4/box_definitions.h" + +namespace shaka { +namespace media { + +class DisplayAction { + public: + DisplayAction(uint64_t id, uint64_t time) : id_(id), time_(time) {} + uint64_t id() const { return id_; } + uint64_t time() const { return time_; } + virtual void ActOn(std::list* display) const = 0; + + private: + uint64_t id_; + uint64_t time_; +}; + +namespace { +const uint64_t kTrackId = 0; + +class AddToDisplayAction : public DisplayAction { + public: + explicit AddToDisplayAction(uint64_t id, + std::shared_ptr& sample) + : DisplayAction(id, sample->start_time()), sample_(sample) {} + void ActOn(std::list* display) const override { + display->push_back(sample_.get()); + } + + private: + std::shared_ptr sample_; +}; + +class RemoveFromDisplayAction : public DisplayAction { + public: + explicit RemoveFromDisplayAction(uint64_t id, + std::shared_ptr& sample) + : DisplayAction(id, sample->EndTime()), sample_(sample) {} + void ActOn(std::list* display) const override { + display->remove(sample_.get()); + } + + private: + std::shared_ptr sample_; +}; +} // namespace + +bool DisplayActionCompare::operator()( + const std::shared_ptr& left, + const std::shared_ptr& right) const { + return left->time() == right->time() ? left->id() > right->id() + : left->time() > right->time(); +} + +Status WebVttToMp4Handler::InitializeInternal() { + return Status::OK; +} + +Status WebVttToMp4Handler::Process(std::unique_ptr stream_data) { + if (StreamDataType::kStreamInfo == stream_data->stream_data_type) { + return DispatchStreamInfo(kTrackId, std::move(stream_data->stream_info)); + } + if (stream_data->stream_data_type == StreamDataType::kTextSample) { + std::shared_ptr sample = stream_data->text_sample; + + std::shared_ptr add( + new AddToDisplayAction(NextActionId(), sample)); + std::shared_ptr remove( + new RemoveFromDisplayAction(NextActionId(), sample)); + + actions_.push(add); + actions_.push(remove); + + ProcessUpToTime(add->time()); + + return Status::OK; + } + return Status(error::INTERNAL_ERROR, + "Invalid stream data type for this handler"); +} + +Status WebVttToMp4Handler::OnFlushRequest(size_t input_stream_index) { + const uint64_t kEndOfTime = std::numeric_limits::max(); + ProcessUpToTime(kEndOfTime); + + return FlushDownstream(0); +} + +void WebVttToMp4Handler::WriteCue(const std::string& id, + const std::string& settings, + const std::string& payload, + BufferWriter* out) { + mp4::VTTCueBox box; + + if (id.length()) { + box.cue_id.cue_id = id; + } + if (settings.length()) { + box.cue_settings.settings = settings; + } + if (payload.length()) { + box.cue_payload.cue_text = payload; + } + // If there is internal timing, i.e. WebVTT cue timestamp, then + // cue_current_time should be populated + // "which gives the VTT timestamp associated with the start time of sample." + // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an + // internal timestamp in the payload to set CueTimeBox.cue_current_time. + box.Write(out); +} + +void WebVttToMp4Handler::ProcessUpToTime(uint64_t cutoff_time) { + // We can only process as far as the last add as no new events will be + // added that come before that time. + while (actions_.size() && actions_.top()->time() < cutoff_time) { + // STAGE 1: Write out the current state + // Get the time range for which the current active state is valid. + const uint64_t previous_change = next_change_; + next_change_ = actions_.top()->time(); + // The only time that |previous_change| and |next_change_| should ever break + // the rule |next_change_ > previous_change| is at the start where + // |previous_change| and |next_change_| are both zero. + DCHECK((previous_change == 0 && next_change_ == 0) || + next_change_ > previous_change); + + // Send out the active group. If there is nothing in the active group, then + // this segment is ignored. + if (active_.size()) { + MergeAndSendSamples(active_, previous_change, next_change_); + } + + // STAGE 2: Move to the next state. + while (actions_.size() && actions_.top()->time() == next_change_) { + actions_.top()->ActOn(&active_); + actions_.pop(); + } + } +} + +Status WebVttToMp4Handler::MergeAndSendSamples( + const std::list& samples, + uint64_t start_time, + uint64_t end_time) { + DCHECK_GT(end_time, start_time); + + box_writer_.Clear(); + + for (const TextSample* sample : samples) { + DCHECK_LE(sample->start_time(), start_time); + DCHECK_GE(sample->EndTime(), end_time); + WriteCue(sample->id(), sample->settings(), sample->payload(), &box_writer_); + } + + std::shared_ptr sample = + MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true); + sample->set_pts(start_time); + sample->set_dts(start_time); + sample->set_duration(end_time - start_time); + return DispatchMediaSample(kTrackId, std::move(sample)); +} + +uint64_t WebVttToMp4Handler::NextActionId() { + return next_id_++; +} +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.h b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h new file mode 100644 index 0000000000..cc335eb588 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h @@ -0,0 +1,99 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_ +#define PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_ + +#include + +#include +#include + +#include "packager/media/base/buffer_writer.h" +#include "packager/media/base/media_handler.h" + +namespace shaka { +namespace media { + +class DisplayAction; + +class DisplayActionCompare { + public: + bool operator()(const std::shared_ptr& left, + const std::shared_ptr& right) const; +}; + +// Take text samples, convert them to Mp4 boxes, and send them down stream. +// Virtual methods should only be overridden for testing only. +class WebVttToMp4Handler : public MediaHandler { + public: + WebVttToMp4Handler() = default; + + protected: + // |Process| and |OnFlushRequest| need to be protected so that it can be + // called for testing. + Status Process(std::unique_ptr stream_data) override; + Status OnFlushRequest(size_t input_stream_index) override; + + // This is made protected-virtual so that we can override it for testing. + virtual void WriteCue(const std::string& id, + const std::string& settings, + const std::string& payload, + BufferWriter* out); + + private: + WebVttToMp4Handler(const WebVttToMp4Handler&) = delete; + WebVttToMp4Handler& operator=(const WebVttToMp4Handler&) = delete; + + Status InitializeInternal() override; + + // Merge and send all samples in the queue downstream while the head of the + // queue's time is less than |cutoff|. |cutoff| is needed as we can only + // merge and send samples when we are sure no new samples will appear before + // the next action. + void ProcessUpToTime(uint64_t cutoff_time); + + // Merge together all TextSamples in |samples| into a single MP4 box and + // pass the box downstream. + Status MergeAndSendSamples(const std::list& samples, + uint64_t start_time, + uint64_t end_time); + + // Take a Mp4 box as a byte buffer and send it downstream. + Status WriteSample(uint64_t start, + uint64_t end, + const uint8_t* sample, + size_t sample_length); + + // Get a new id for the next action. + uint64_t NextActionId(); + + uint64_t next_change_ = 0; + + // This is the current state of the box we are writing. + BufferWriter box_writer_; + + // |actions_| is a time sorted list of actions that affect the timeline (e.g. + // adding or removing a cue). |active_| is the list of all cues that are + // currently on screen. + // When the cue is to be on screen, it is added to |active_|. When it is time + // for the cue to come off screen, it is removed from |active_|. + // As |actions_| has a shared pointer to the cue, |active_| can use normal + // pointers as the pointer will be valid and it makes the |remove| call + // easier. + + std::priority_queue, + std::vector>, + DisplayActionCompare> + actions_; + std::list active_; + + uint64_t next_id_ = 0; +}; + +} // namespace media +} // namespace shaka +#endif // PACKAGER_MEDIA_FORMATS_WEBVTT_WEBVTT_MP4_CUE_HANDLER_H_ diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc new file mode 100644 index 0000000000..9b89fde285 --- /dev/null +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc @@ -0,0 +1,310 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include +#include + +#include "packager/media/base/media_handler_test_base.h" +#include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" +#include "packager/status_test_util.h" + +namespace shaka { +namespace media { +namespace { +const size_t kStreamIndex = 0; +const bool kEncrypted = true; + +const size_t kInputCount = 1; +const size_t kOutputCount = 1; +const size_t kInputIndex = 0; +const size_t kOutputIndex = 0; + +const char* kId[] = {"cue 1 id", "cue 2 id", "cue 3 id"}; +const char* kPayload[] = {"cue 1 payload", "cue 2 payload", "cue 3 payload"}; +const char* kNoSettings = ""; + +// These all refer to the samples. To make them easier to use in their +// correct context, they have purposely short names. +const size_t kA = 0; +const size_t kB = 1; +const size_t kC = 2; + +} // namespace + +class TestableWebVttToMp4Handler : public WebVttToMp4Handler { + public: + MOCK_METHOD3(OnWriteCue, + void(const std::string& id, + const std::string& settings, + const std::string& payload)); + + protected: + void WriteCue(const std::string& id, + const std::string& settings, + const std::string& payload, + BufferWriter* out) { + OnWriteCue(id, settings, payload); + // We need to write something out or else media sample will think it is the + // end of the stream. + out->AppendInt(0); + } +}; + +class WebVttToMp4HandlerTest : public MediaHandlerTestBase { + protected: + void SetUp() { + mp4_handler_ = std::make_shared(); + ASSERT_OK(SetUpAndInitializeGraph(mp4_handler_, kInputCount, kOutputCount)); + } + + std::shared_ptr mp4_handler_; +}; + +// Verify the cues are grouped correctly when the cues do not overlap at all. +// +// [----A---] [---B---] +TEST_F(WebVttToMp4HandlerTest, NoOverlap) { + const int64_t kStart[] = {0, 1100}; + const int64_t kEnd[] = {1000, 2100}; + + { + testing::InSequence s; + + for (size_t i = kA; i <= kB; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[i], + kEnd[i] - kStart[i], !kEncrypted))); + } + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kB; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} + +// Verify the cues are grouped correctly when one cue overlaps another cue at +// one end. +// +// [-------A-------] +// [-------B------] +TEST_F(WebVttToMp4HandlerTest, Overlap) { + const int64_t kStart[] = {0, 500}; + const int64_t kEnd[] = {1000, 1500}; + + { + testing::InSequence s; + + // Sample A + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kA], + kStart[kB] - kStart[kA], !kEncrypted))); + + // Sample A and B + for (size_t i = kA; i <= kB; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kB], + kEnd[kA] - kStart[kB], !kEncrypted))); + + // Sample B + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kB], kNoSettings, kPayload[kB])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kEnd[kA], + kEnd[kB] - kEnd[kA], !kEncrypted))); + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kB; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} + +// Verify the cues are grouped correctly when one cue starts before and ends +// after another cue. +// +// [-------------A-------------] +// [----------B----------] +TEST_F(WebVttToMp4HandlerTest, Contains) { + const int64_t kStart[] = {0, 100}; + const int64_t kEnd[] = {1000, 900}; + + { + testing::InSequence s; + + // Sample A + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kA], + kStart[kB] - kStart[kA], !kEncrypted))); + + // Sample A and B + for (size_t i = kA; i <= kB; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kB], + kEnd[kB] - kStart[kB], !kEncrypted))); + + // Sample A + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kEnd[kB], + kEnd[kA] - kEnd[kB], !kEncrypted))); + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kB; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} + +// Verify that when two cues are completely on top of each other, that there +// is no extra boxes sent out. +// +// [----------A----------] +// [----------B----------] +TEST_F(WebVttToMp4HandlerTest, ExactOverlap) { + const int64_t kStart = 0; + const int64_t kDuration = 1000; + const int64_t kEnd = kStart + kDuration; + + { + testing::InSequence s; + + // Sample A and B + for (size_t i = kA; i <= kB; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL( + *Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart, kDuration, !kEncrypted))); + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kB; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart, kEnd, kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} + +// Verify that when two cues are completely on top of each other, that there +// is no extra boxes sent out. +// +// [----A----] +// [--------B--------] +// [------------C------------] +TEST_F(WebVttToMp4HandlerTest, OverlapStartWithStaggerEnd) { + const int64_t kStart = 0; + const int64_t kEnd[] = {1000, 2000, 3000}; + + { + testing::InSequence s; + + // Sample A, B, and C + for (size_t i = kA; i <= kC; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL( + *Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart, kEnd[kA], !kEncrypted))); + + // Sample B and C + for (size_t i = kB; i <= kC; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kEnd[kA], + kEnd[kB] - kEnd[kA], !kEncrypted))); + + // Sample C + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kC], kNoSettings, kPayload[kC])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kEnd[kB], + kEnd[kC] - kEnd[kB], !kEncrypted))); + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kC; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart, kEnd[i], kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} + +// Verify that when two cues are completely on top of each other, that there +// is no extra boxes sent out. +// +// [------------A------------] +// [--------B--------] +// [----C----] +TEST_F(WebVttToMp4HandlerTest, StaggerStartWithOverlapEnd) { + const int64_t kStart[] = {0, 100, 200}; + const int64_t kEnd = 1000; + + { + testing::InSequence s; + + // Sample A + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kA], + kStart[kB] - kStart[kA], !kEncrypted))); + + // Sample A and B + for (size_t i = kA; i <= kB; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kB], + kStart[kC] - kStart[kB], !kEncrypted))); + + // Sample A, B, and C + for (size_t i = kA; i <= kC; i++) { + EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); + } + EXPECT_CALL(*Output(kOutputIndex), + OnProcess(IsMediaSample(kStreamIndex, kStart[kC], + kEnd - kStart[kC], !kEncrypted))); + + EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + } + + for (size_t i = kA; i <= kC; i++) { + ASSERT_OK(Input(kInputIndex) + ->Dispatch(StreamData::FromTextSample( + kStreamIndex, + GetTextSample(kId[i], kStart[i], kEnd, kPayload[i])))); + } + ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); +} +} // namespace media +} // namespace shaka