diff --git a/packager/media/formats/mp4/box_definitions.cc b/packager/media/formats/mp4/box_definitions.cc index 99d08c0caa..edd5438a83 100644 --- a/packager/media/formats/mp4/box_definitions.cc +++ b/packager/media/formats/mp4/box_definitions.cc @@ -2733,10 +2733,10 @@ FourCC VTTCueBox::BoxType() const { bool VTTCueBox::ReadWriteInternal(BoxBuffer* buffer) { RCHECK(ReadWriteHeaderInternal(buffer) && buffer->PrepareChildren() && - buffer->ReadWriteChild(&cue_source_id) && - buffer->ReadWriteChild(&cue_id) && - buffer->ReadWriteChild(&cue_time) && - buffer->ReadWriteChild(&cue_settings) && + buffer->TryReadWriteChild(&cue_source_id) && + buffer->TryReadWriteChild(&cue_id) && + buffer->TryReadWriteChild(&cue_time) && + buffer->TryReadWriteChild(&cue_settings) && buffer->ReadWriteChild(&cue_payload)); return true; } diff --git a/packager/media/formats/mp4/mp4.gyp b/packager/media/formats/mp4/mp4.gyp index 2ebc5f2636..9a37f6cffd 100644 --- a/packager/media/formats/mp4/mp4.gyp +++ b/packager/media/formats/mp4/mp4.gyp @@ -46,12 +46,15 @@ 'sync_sample_iterator.h', 'track_run_iterator.cc', 'track_run_iterator.h', + 'webvtt_fragmenter.cc', + 'webvtt_fragmenter.h', ], 'dependencies': [ '../../../third_party/boringssl/boringssl.gyp:boringssl', '../../base/media_base.gyp:media_base', '../../codecs/codecs.gyp:codecs', '../../event/media_event.gyp:media_event', + '../../formats/webvtt/webvtt.gyp:webvtt', ], }, { @@ -66,6 +69,7 @@ 'mp4_media_parser_unittest.cc', 'sync_sample_iterator_unittest.cc', 'track_run_iterator_unittest.cc', + 'webvtt_fragmenter_unittest.cc', ], 'dependencies': [ '../../../testing/gtest.gyp:gtest', diff --git a/packager/media/formats/mp4/webvtt_fragmenter.cc b/packager/media/formats/mp4/webvtt_fragmenter.cc new file mode 100644 index 0000000000..e085c0cd23 --- /dev/null +++ b/packager/media/formats/mp4/webvtt_fragmenter.cc @@ -0,0 +1,312 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/formats/mp4/webvtt_fragmenter.h" + +#include + +#include "packager/base/strings/string_util.h" +#include "packager/base/strings/stringprintf.h" +#include "packager/media/base/buffer_writer.h" +#include "packager/media/base/media_sample.h" +#include "packager/media/formats/mp4/box_buffer.h" +#include "packager/media/formats/mp4/box_definitions.h" + +namespace shaka { +namespace media { +namespace mp4 { + +namespace { + +std::shared_ptr CreateEmptyCueSample(uint64_t start_time, + uint64_t end_time) { + DCHECK_GT(end_time, start_time); + VTTEmptyCueBox empty_cue_box; + + std::vector serialized; + AppendBoxToVector(&empty_cue_box, &serialized); + + std::shared_ptr empty_cue_sample = MediaSample::CopyFrom( + serialized.data(), serialized.size(), false); + empty_cue_sample->set_pts(start_time); + empty_cue_sample->set_duration(end_time - start_time); + return empty_cue_sample; +} + +VTTCueBox CueBoxFromCue(const Cue& cue) { + VTTCueBox cue_box; + if (!cue.identifier.empty()) { + cue_box.cue_id.cue_id = cue.identifier; + } + + if (!cue.settings.empty()) { + cue_box.cue_settings.settings = cue.settings; + } + + cue_box.cue_payload.cue_text = cue.payload.front(); + return cue_box; +} + +std::string TimeToWebVttTimeStamp(uint64_t time_in_ms) { + const int milliseconds = time_in_ms % 1000; + const uint64_t seconds_left = time_in_ms / 1000; + const int seconds = seconds_left % 60; + const uint64_t minutes_left = seconds_left / 60; + const int minutes = minutes_left % 60; + const int hours = minutes_left / 60; + + return base::StringPrintf("%02d:%02d:%02d.%03d", hours, minutes, seconds, + milliseconds); +} + +std::shared_ptr CreateVTTCueBoxesSample( + const std::list& cues, + uint64_t start_time, + uint64_t end_time) { + // TODO(rkuroiwa): Source IDs must be assigned to the cues and the same cue + // should have the same ID in different samples. Probably requires a mapping + // from cues to IDs. + CHECK(!cues.empty()); + + std::vector data; + std::string cue_current_time = TimeToWebVttTimeStamp(start_time); + + BufferWriter writer; + for (const Cue* cue : cues) { + VTTCueBox cue_box = CueBoxFromCue(*cue); + // If there is internal timing, i.e. WebVTT cue timestamp, then + // cue_current_time should be populated + // "which gives the VTT timestamp associated with the start time of sample." + // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an + // internal timestamp in the payload to set CueTimeBox.cue_current_time. + cue_box.Write(&writer); + } + + std::shared_ptr sample = + MediaSample::CopyFrom(writer.Buffer(), writer.Size(), false); + sample->set_pts(start_time); + sample->set_duration(end_time - start_time); + return sample; +} + +// This function returns the minimum of cue_start_time, cue_end_time, +// current_minimum should be bigger than sweep_line. +uint64_t GetMinimumPastSweepLine(uint64_t cue_start_time, + uint64_t cue_end_time, + uint64_t sweep_line, + uint64_t current_minimum) { + DCHECK_GE(current_minimum, sweep_line); + if (cue_end_time <= sweep_line) + return current_minimum; + + // Anything below is cue_end_time > sweep_line. + if (cue_start_time > sweep_line) { + // The start time of this cue is past the sweepline, return the min. + return std::min(cue_start_time, current_minimum); + } else { + // The sweep line is at the start or in the middle of a cue. + return std::min(cue_end_time, current_minimum); + } +} + +} // namespace + +void AppendBoxToVector(Box* box, std::vector* output_vector) { + BufferWriter writer; + box->Write(&writer); + output_vector->insert(output_vector->end(), + writer.Buffer(), + writer.Buffer() + writer.Size()); +} + +WebVttFragmenter::WebVttFragmenter() : next_cue_start_time_(0u) {} +WebVttFragmenter::~WebVttFragmenter() {} + +// Note that this |sample| is either a cue or a comment. It does not have any +// info on whether the next cue is overlapping or not. +void WebVttFragmenter::PushSample(std::shared_ptr sample) { + if (sample->data_size() == 0u) { + // A comment. Put it in the buffer and skip. + VTTAdditionalTextBox comment; + comment.cue_additional_text.assign( + sample->side_data(), sample->side_data() + sample->side_data_size()); + additional_texts_.push_back(comment); + // TODO(rkuriowa): Handle comments as samples. + + return; + } + + cues_.push_back(MediaSampleToCue(*sample)); + if (cues_.size() == 1) { + // Cannot make a decision with just one sample. Cache it and wait for + // another one. + next_cue_start_time_ = cues_.front().start_time; + return; + } + + CHECK_GE(cues_.size(), 2u); + // TODO(rkuroiwa): This isn't wrong but all the cues where + // endtime < latest cue start time + // can be processed. Change the logic so that if there are cues that meet the + // condition above, create samples immediately and remove them. + // Note: This doesn't mean that all the cues can be removed, just the ones + // that meet the condition. + bool processed_cues = HandleAllCuesButLatest(); + if (!processed_cues) + return; + + // Remove all the cues except the latest one. + auto erase_last_iterator = --cues_.end(); + cues_.erase(cues_.begin(), erase_last_iterator); +} + +void WebVttFragmenter::Flush() { + if (cues_.empty()) + return; + if (cues_.size() == 1) { + std::list temp_list; + temp_list.push_back(&cues_.front()); + CHECK_EQ(next_cue_start_time_, cues_.front().start_time); + ready_samples_.push_back(CreateVTTCueBoxesSample( + temp_list, + next_cue_start_time_, + cues_.front().start_time + cues_.front().duration)); + cues_.clear(); + return; + } + + bool processed_cue = HandleAllCues(); + CHECK(processed_cue) + << "No cues were processed but the cues should have been flushed."; + cues_.clear(); +} + +size_t WebVttFragmenter::ReadySamplesSize() { + return ready_samples_.size(); +} + +std::shared_ptr WebVttFragmenter::PopSample() { + CHECK(!ready_samples_.empty()); + std::shared_ptr ret = ready_samples_.front(); + ready_samples_.pop_front(); + return ret; +} + +// TODO(rkuroiwa): Some samples may be ready. Example: +// Cues: +// |--------- 1 ---------| +// |-- 2 --| +// |-- 3 --| +// +// Samples: +// |A| B | C | +// Samples A, B, and C can be created when Cue 3 is pushed. +// Change algorithm to create A,B,C samples right away. +// Note that this requires change to the caller on which cues +// to remove. +bool WebVttFragmenter::HandleAllCuesButLatest() { + DCHECK_GE(cues_.size(), 2u); + const Cue& latest_cue = cues_.back(); + + // Don't process the cues until the latest cue doesn't overlap with all the + // previous cues. + uint64_t max_cue_end_time = 0; // Not including the latest. + auto latest_cue_it = --cues_.end(); + for (auto cue_it = cues_.begin(); cue_it != latest_cue_it; ++cue_it) { + const Cue& cue = *cue_it; + const uint64_t cue_end_time = cue.start_time + cue.duration; + if (cue_end_time > latest_cue.start_time) + return false; + + if (max_cue_end_time < cue_end_time) + max_cue_end_time = cue_end_time; + } + // Reaching here means that the latest cue does not overlap with all + // the previous cues. + + // Because sweep_stop_time is assigned to next_cue_start_time_ it is not + // set to latest_cue.start_time here; there may be a gap between + // latest_cue.start_time and previous_cue_end_time. + // The correctness of SweepCues() doesn't change whether the sweep stops + // right before the latest cue or right before the gap. + const uint64_t sweep_stop_time = max_cue_end_time; + const uint64_t sweep_line_start = cues_.front().start_time; + bool processed_cues = + SweepCues(sweep_line_start, sweep_stop_time); + next_cue_start_time_ = sweep_stop_time; + if (next_cue_start_time_ < latest_cue.start_time) { + ready_samples_.push_back(CreateEmptyCueSample(next_cue_start_time_, + latest_cue.start_time)); + next_cue_start_time_ = latest_cue.start_time; + } + return processed_cues; +} + +bool WebVttFragmenter::HandleAllCues() { + uint64_t latest_time = 0u; + for (const Cue& cue : cues_) { + if (cue.start_time + cue.duration > latest_time) + latest_time = cue.start_time + cue.duration; + } + const uint64_t sweep_line_start = cues_.front().start_time; + const uint64_t sweep_stop_time = latest_time; + bool processed = SweepCues(sweep_line_start, sweep_stop_time); + next_cue_start_time_ = sweep_stop_time; + return processed; +} + +bool WebVttFragmenter::SweepCues(uint64_t sweep_line, + uint64_t sweep_stop_time) { + bool processed_cues = false; + // This is a sweep line algorithm. For every iteration, it determines active + // cues and makes a sample. + // At the end of an interation |next_start_time| should be set to the minimum + // of all the start and end times of the cues that is after |sweep_line|. + // |sweep_line| is set to |next_start_time| before the next iteration. + while (sweep_line < sweep_stop_time) { + std::list cues_for_a_sample; + uint64_t next_start_time = sweep_stop_time; + + // Put all the cues that should be displayed at sweep_line, in + // cues_for_a_sample. + // next_start_time is also updated in this loop by checking all the cues. + for (const Cue& cue : cues_) { + if (cue.start_time >= sweep_stop_time) + break; + if (cue.start_time >= next_start_time) + break; + + const uint64_t cue_end_time = cue.start_time + cue.duration; + if (cue_end_time <= sweep_line) + continue; + next_start_time = GetMinimumPastSweepLine( + cue.start_time, cue_end_time, sweep_line, next_start_time); + + if (cue.start_time <= sweep_line) { + DCHECK_GT(cue_end_time, sweep_line); + cues_for_a_sample.push_back(&cue); + } + } + + DCHECK(!cues_for_a_sample.empty()) << "For now the only use case of this " + "function is to sweep non-empty " + "cues."; + if (!cues_for_a_sample.empty()) { + ready_samples_.push_back(CreateVTTCueBoxesSample( + cues_for_a_sample, sweep_line, next_start_time)); + processed_cues = true; + } + + sweep_line = next_start_time; + } + + DCHECK_EQ(sweep_line, sweep_stop_time); + return processed_cues; +} + +} // namespace mp4 +} // namespace media +} // namespace shaka diff --git a/packager/media/formats/mp4/webvtt_fragmenter.h b/packager/media/formats/mp4/webvtt_fragmenter.h new file mode 100644 index 0000000000..2088e5921c --- /dev/null +++ b/packager/media/formats/mp4/webvtt_fragmenter.h @@ -0,0 +1,124 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef PACKAGER_MEDIA_FORMATS_MP4_FRAGMENTER_H_ +#define PACKAGER_MEDIA_FORMATS_MP4_FRAGMENTER_H_ + +#include +#include + +#include "packager/media/base/status.h" +#include "packager/media/formats/mp4/box_definitions.h" +#include "packager/media/formats/webvtt/webvtt_media_parser.h" + +namespace shaka { +namespace media { +namespace mp4 { + +/// Appends box to vector. +/// @param box is the box to be serialized. +/// @param output_vector is where the data is appended. +void AppendBoxToVector(Box* box, std::vector* output_vector); + +/// According to the spec, when cues overlap, samples must be created.\n +/// The example below has 2 WebVTT cues:\n +/// 00:01:00.000 --> 00:02:00.000\n +/// hello\n +///\n +/// 00:01:15.000 --> 00:02:15.000\n +/// how are you?\n +///\n +/// These are added (AddSample()) as 2 samples but must be split into 3 samples +/// and 4 cues ('vttc' boxes).\n +/// First sample:\n +/// start_time: 00:01:00.000\n +/// duration: 15 seconds\n +/// cue payload: hello\n +///\n +/// Second sample:\n +/// start_time: 00:01:15.000\n +/// duration: 45 seconds\n +/// cue payload: hello\n +/// cue payload: how are you?\n +///\n +/// Third sample:\n +/// start_time: 00:02:00.000\n +/// duration: 15 seconds\n +/// cue payload: how are you?\n +///\n +/// This class buffers the samples that are passed to AddSample() and creates +/// more samples as necessary. +// TODO(rkuroiwa): Rename this to WebVttSampleConverter, and put this in +// webvtt parser. +// For now, the output (from PopSample()) should still be in ISO-BMFF box form; +// and also to signal that, should have different types for TextStreamInfo. e.g. +// TextStreamInfo::type() returns kIsoBmffStreamText. +class WebVttFragmenter { + public: + WebVttFragmenter(); + ~WebVttFragmenter(); + + /// Add a sample. + /// @param sample is the sample to be added. It should contain one VTT cue. + void PushSample(std::shared_ptr sample); + + /// Process all the buffered samples. + /// This finalizes the object and further calls to PushSample() may result in + /// an undefined behavior. + void Flush(); + + /// @return The number of samples that are processed and ready to be popped. + size_t ReadySamplesSize(); + + /// Returns a MediaSample that is non-overlapping with the previous samples + /// that it has output. The data in the sample is one or more ISO-BMFF boxes + /// for the duration of the sample. + /// @return The first sample that is ready to be processed. + std::shared_ptr PopSample(); + + private: + // Handle |cues_| except the last item, and create samples from them. + // All cues that overlap with the latest cue are not processed. + // Usually the last cue (and cues that overlap with it) should not be + // processed right away because the following cues may overlap with the latest + // cue or the existing cues. + // If a cue has been proceessed, then this returns true. + bool HandleAllCuesButLatest(); + + // Same as HandleAllCuesButLatest() but it also includes the latest cue. + // If a cue has been processed, then this returns true. + bool HandleAllCues(); + + // Sweep line algorithm that handles the cues in |cues_|. + // This does not erase |cues_|. + // If a cue has been processed, this returns true. + // |sweep_line| is the start time and |sweep_stop_time| is when the sweep + // should stop. + bool SweepCues(uint64_t sweep_line, uint64_t sweep_stop_time); + + // This is going to be in 'mdat' box. Keep this around until a sample is + // ready. + std::list cues_; + + // For comment samples. + std::list additional_texts_; + + // Samples that are ready to be processed. + std::list> ready_samples_; + + // This keeps track of the max end time of the processed cues which is the + // start time of the next cue. Used to check if cue_current_time has to be set + // or an empty cue (gap) has to be added. + uint64_t next_cue_start_time_; + + DISALLOW_COPY_AND_ASSIGN(WebVttFragmenter); +}; + +} // namespace shaka +} // namespace media +} // namespace edash_packager + +#endif // PACKAGER_MEDIA_FORMATS_MP4_FRAGMENTER_H_ diff --git a/packager/media/formats/mp4/webvtt_fragmenter_unittest.cc b/packager/media/formats/mp4/webvtt_fragmenter_unittest.cc new file mode 100644 index 0000000000..4eb647c37c --- /dev/null +++ b/packager/media/formats/mp4/webvtt_fragmenter_unittest.cc @@ -0,0 +1,466 @@ +#include "packager/media/formats/mp4/webvtt_fragmenter.h" + +#include +#include + +#include "packager/base/strings/string_number_conversions.h" +#include "packager/media/base/media_sample.h" +#include "packager/media/base/test/status_test_util.h" + +namespace shaka { +namespace media { +namespace mp4 { + +namespace { +// The actual messages don't matter. +const char kCueMessage1[] = "hi"; +const char kCueMessage2[] = "hello"; +const char kCueMessage3[] = "some multi word message"; +const char kCueMessage4[] = "message!!"; + +// Data is a vector and must not be empty. +MATCHER_P3(MatchesStartTimeEndTimeAndData, start_time, end_time, data, "") { + *result_listener << "which is (" << arg->pts() << ", " + << (arg->pts() + arg->duration()) << ", " + << base::HexEncode(arg->data(), arg->data_size()) << ")"; + return arg->pts() == start_time && + (arg->pts() + arg->duration() == end_time) && + arg->data_size() == data.size() && + (memcmp(&data[0], arg->data(), arg->data_size()) == 0); +} +} // namespace + +class WebVttFragmenterTest : public ::testing::Test { + protected: + WebVttFragmenter webvtt_fragmenter_; +}; + +// Verify that AppednBoxToVector works. +TEST_F(WebVttFragmenterTest, AppendBoxToVector) { + const uint8_t kExpected[] = { + 0x0, 0x0, 0x0, 0x1c, // Size. + 0x76, 0x74, 0x74, 0x63, // 'vttc'. + 0x0, 0x0, 0x0, 0x14, // Size of payload Box. + 0x70, 0x61, 0x79, 0x6c, // 'payl'. + // "some message" as hex without null terminator. + 0x73, 0x6f, 0x6d, 0x65, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, + }; + VTTCueBox cue_box; + cue_box.cue_payload.cue_text = "some message"; + std::vector serialized; + AppendBoxToVector(&cue_box, &serialized); + std::vector expected_in_vector_form( + kExpected, kExpected + arraysize(kExpected)); + EXPECT_EQ(expected_in_vector_form, serialized); +} + +// There are 6 ways the cues can be arranged. +// 1. No overlap, contiguous. Test: NoOverlapContiguous +// |-- cue1 --| +// |-- cue2 --| +// +// 2. No overlap, gap. Test: Gap +// |-- cue1 --| +// |-- cue2 --| +// +// 3. Overlap sequential (like a staircase). Test: OverlappingCuesSequential +// |-- cue1 --| +// |-- cue2 --| +// |-- cue3 --| +// +// 4. Longer cues overlapping with shorter cues. Test: OverlappingLongCue +// |---------- cue1 ----------| +// |--- cue2 ---| +// |- cue3 -| +// |- cue4 -| +// +// 5. The first cue doesn't start at 00:00.000. Test: GapAtBeginning +// |--- cue1 ---| +// +// 6. 2 or more cues start at the same time. Test: Same start time. +// |--- cue1 ---| +// |-- cue2 --| + +TEST_F(WebVttFragmenterTest, NoOverlapContiguous) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(2000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(2000); + sample2->set_dts(2000); + sample2->set_duration(1000); + + webvtt_fragmenter_.PushSample(sample2); + webvtt_fragmenter_.Flush(); + EXPECT_EQ(2u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_cue_data; + first_cue_data.cue_payload.cue_text = kCueMessage1; + std::vector expected; + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 2000, expected)); + + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + expected.clear(); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2000, 3000, expected)); +} + +// Verify that if is a gap, then a sample is created for the gap. +TEST_F(WebVttFragmenterTest, Gap) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(1000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(2000); + sample2->set_dts(2000); + sample2->set_duration(1000); + + webvtt_fragmenter_.PushSample(sample2); + EXPECT_EQ(2u, webvtt_fragmenter_.ReadySamplesSize()); + + webvtt_fragmenter_.Flush(); + EXPECT_EQ(3u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_cue_data; + first_cue_data.cue_payload.cue_text = kCueMessage1; + std::vector expected; + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 1000, expected)); + + VTTEmptyCueBox empty_cue; + expected.clear(); + AppendBoxToVector(&empty_cue, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1000, 2000, expected)); + + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + expected.clear(); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2000, 3000, expected)); +} + +// The previous cue always ends before the current cue ends. +// Cues are overlapping, no samples should be created in PushSample(). +TEST_F(WebVttFragmenterTest, OverlappingCuesSequential) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(2000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(1000); + sample2->set_dts(1000); + sample2->set_duration(2000); + webvtt_fragmenter_.PushSample(sample2); + + std::shared_ptr sample3 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage3), + arraysize(kCueMessage3) - 1, true); + sample3->set_pts(1500); + sample3->set_dts(1500); + sample3->set_duration(4000); + webvtt_fragmenter_.PushSample(sample3); + + webvtt_fragmenter_.Flush(); + // There should be 5 samples for [0,1000], [1000,1500], [1500,2000], + // [2000,3000], and [3000, 5500]. + EXPECT_EQ(5u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_cue_data; + first_cue_data.cue_payload.cue_text = kCueMessage1; + std::vector expected; + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 1000, expected)); + + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1000, 1500, expected)); + + VTTCueBox third_cue_data; + third_cue_data.cue_payload.cue_text = kCueMessage3; + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + AppendBoxToVector(&third_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1500, 2000, expected)); + + expected.clear(); + AppendBoxToVector(&second_cue_data, &expected); + AppendBoxToVector(&third_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2000, 3000, expected)); + + expected.clear(); + AppendBoxToVector(&third_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(3000, 5500, expected)); +} + +TEST_F(WebVttFragmenterTest, OverlappingLongCue) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(10000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(1000); + sample2->set_dts(1000); + sample2->set_duration(5000); + webvtt_fragmenter_.PushSample(sample2); + + std::shared_ptr sample3 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage3), + arraysize(kCueMessage3) - 1, true); + sample3->set_pts(2000); + sample3->set_dts(2000); + sample3->set_duration(1000); + webvtt_fragmenter_.PushSample(sample3); + + std::shared_ptr sample4 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage4), + arraysize(kCueMessage4) - 1, true); + sample4->set_pts(8000); + sample4->set_dts(8000); + sample4->set_duration(1000); + webvtt_fragmenter_.PushSample(sample4); + webvtt_fragmenter_.Flush(); + + // There should be 7 samples for [0,1000], [1000,2000], [2000,3000], + // [3000,6000], [6000, 8000], [8000, 9000], [9000, 10000]. + EXPECT_EQ(7u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_long_cue_data; + first_long_cue_data.cue_payload.cue_text = kCueMessage1; + std::vector expected; + AppendBoxToVector(&first_long_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 1000, expected)); + + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1000, 2000, expected)); + + VTTCueBox third_cue_data; + third_cue_data.cue_payload.cue_text = kCueMessage3; + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + AppendBoxToVector(&third_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2000, 3000, expected)); + + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(3000, 6000, expected)); + + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(6000, 8000, expected)); + + VTTCueBox fourth_cue_data; + fourth_cue_data.cue_payload.cue_text = kCueMessage4; + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + AppendBoxToVector(&fourth_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(8000, 9000, expected)); + + expected.clear(); + AppendBoxToVector(&first_long_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(9000, 10000, expected)); +} + +TEST_F(WebVttFragmenterTest, GapAtBeginning) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(1200); + sample1->set_dts(1200); + sample1->set_duration(2000); + webvtt_fragmenter_.PushSample(sample1); + + webvtt_fragmenter_.Flush(); + EXPECT_EQ(1u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox cue_data; + cue_data.cue_payload.cue_text = kCueMessage1; + std::vector expected; + AppendBoxToVector(&cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1200, 3200, expected)); +} + +TEST_F(WebVttFragmenterTest, SameStartTime) { + // TODO(rkuroiwa): This should be std::shared_ptr if this is applied on HEAD. + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(2000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(0); + sample2->set_dts(0); + sample2->set_duration(1500); + + webvtt_fragmenter_.PushSample(sample2); + webvtt_fragmenter_.Flush(); + EXPECT_EQ(2u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_cue_data; + first_cue_data.cue_payload.cue_text = kCueMessage1; + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + + std::vector expected; + AppendBoxToVector(&first_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 1500, expected)); + + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1500, 2000, expected)); +} + +// This test is a combination of the test cases above. +TEST_F(WebVttFragmenterTest, MoreCases) { + std::shared_ptr sample1 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage1), + arraysize(kCueMessage1) - 1, true); + sample1->set_pts(0); + sample1->set_dts(0); + sample1->set_duration(2000); + + webvtt_fragmenter_.PushSample(sample1); + + std::shared_ptr sample2 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage2), + arraysize(kCueMessage2) - 1, true); + sample2->set_pts(100); + sample2->set_dts(100); + sample2->set_duration(100); + + webvtt_fragmenter_.PushSample(sample2); + + std::shared_ptr sample3 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage3), + arraysize(kCueMessage3) - 1, true); + sample3->set_pts(1500); + sample3->set_dts(1500); + sample3->set_duration(1000); + webvtt_fragmenter_.PushSample(sample3); + + std::shared_ptr sample4 = + MediaSample::CopyFrom(reinterpret_cast(kCueMessage4), + arraysize(kCueMessage4) - 1, true); + sample4->set_pts(1500); + sample4->set_dts(1500); + sample4->set_duration(800); + webvtt_fragmenter_.PushSample(sample4); + + webvtt_fragmenter_.Flush(); + EXPECT_EQ(6u, webvtt_fragmenter_.ReadySamplesSize()); + + VTTCueBox first_cue_data; + first_cue_data.cue_payload.cue_text = kCueMessage1; + VTTCueBox second_cue_data; + second_cue_data.cue_payload.cue_text = kCueMessage2; + VTTCueBox third_cue_data; + third_cue_data.cue_payload.cue_text = kCueMessage3; + VTTCueBox fourth_cue_data; + fourth_cue_data.cue_payload.cue_text = kCueMessage4; + + std::vector expected; + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(0, 100, expected)); + + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + AppendBoxToVector(&second_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(100, 200, expected)); + + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(200, 1500, expected)); + + expected.clear(); + AppendBoxToVector(&first_cue_data, &expected); + AppendBoxToVector(&third_cue_data, &expected); + AppendBoxToVector(&fourth_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(1500, 2000, expected)); + + expected.clear(); + AppendBoxToVector(&third_cue_data, &expected); + AppendBoxToVector(&fourth_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2000, 2300, expected)); + + expected.clear(); + AppendBoxToVector(&third_cue_data, &expected); + EXPECT_THAT(webvtt_fragmenter_.PopSample(), + MatchesStartTimeEndTimeAndData(2300, 2500, expected)); +} + +} // namespace shaka +} // namespace media +} // namespace edash_packager diff --git a/packager/media/formats/webvtt/webvtt_media_parser.cc b/packager/media/formats/webvtt/webvtt_media_parser.cc index ed5c36fd40..22126ad0fc 100644 --- a/packager/media/formats/webvtt/webvtt_media_parser.cc +++ b/packager/media/formats/webvtt/webvtt_media_parser.cc @@ -184,6 +184,11 @@ bool ParseTimingAndSettingsLine(const std::string& line, return true; } +} // namespace + +Cue::Cue() : start_time(0), duration(0) {} +Cue::~Cue() {} + // Mapping: // comment --> side data (and side data only sample) // settings --> side data @@ -208,10 +213,29 @@ std::shared_ptr CueToMediaSample(const Cue& cue) { return media_sample; } -} // namespace +// TODO(rkuroiwa): Cue gets converted to MediaSample in WebVttMediaParser and +// then back to Cue in the muxer. Consider making MediaSample a protobuf or make +// Cue a protobuf and (ab)use MediaSample::data() to store serialized Cue. +Cue MediaSampleToCue(const MediaSample& sample) { + Cue cue; + if (sample.data_size() == 0) { + std::string comment(sample.side_data(), + sample.side_data() + sample.side_data_size()); + cue.comment.push_back(comment); + return cue; + } -Cue::Cue() : start_time(0), duration(0) {} -Cue::~Cue() {} + std::string payload(sample.data(), sample.data() + sample.data_size()); + cue.payload.push_back(payload); + cue.identifier.assign(sample.config_id()); + cue.start_time = sample.pts(); + cue.duration = sample.duration(); + if (sample.side_data_size() != 0) { + cue.settings.assign(sample.side_data(), + sample.side_data() + sample.side_data_size()); + } + return cue; +} WebVttMediaParser::WebVttMediaParser() : state_(kHeader) {} WebVttMediaParser::~WebVttMediaParser() {} diff --git a/packager/media/formats/webvtt/webvtt_media_parser.h b/packager/media/formats/webvtt/webvtt_media_parser.h index b55aadd7fc..a6aa8a6f25 100644 --- a/packager/media/formats/webvtt/webvtt_media_parser.h +++ b/packager/media/formats/webvtt/webvtt_media_parser.h @@ -32,6 +32,16 @@ struct Cue { std::vector comment; }; +/// Convert Cue to MediaSample. +/// @param cue data. +/// @return @a cue converted to a MediaSample. +std::shared_ptr CueToMediaSample(const Cue& cue); + +/// Convert MediaSample to Cue. +/// @param sample to be converted. +/// @return @a sample converted to Cue. +Cue MediaSampleToCue(const MediaSample& sample); + // WebVTT parser. // The input may not be encrypted so decryption_key_source is ignored. class WebVttMediaParser : public MediaParser {