diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py index d62eb498dd..38109f0169 100755 --- a/packager/app/test/packager_test.py +++ b/packager/app/test/packager_test.py @@ -1047,6 +1047,24 @@ class PackagerFunctionalTest(PackagerAppTest): self.assertPackageSuccess(streams, flags) self._CheckTestResults('hls-audio-video-text-with-ad-cues') + def testVttTextToMp4WithAdCues(self): + streams = [ + self._GetStream('audio', + hls=True, + segmented=True), + self._GetStream('video', + hls=True, + segmented=True), + self._GetStream('text', + hls=True, + segmented=True, + test_file='bear-subtitle-english.vtt', + output_format='mp4') + ] + flags = self._GetFlags(output_hls=True, ad_cues='1.5') + self.assertPackageSuccess(streams, flags) + self._CheckTestResults('vtt-text-to-mp4-with-ad-cues') + def testWebmSubsampleEncryption(self): streams = [ self._GetStream('video', diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s new file mode 100644 index 0000000000..c4b4ac03d9 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s new file mode 100644 index 0000000000..8504387c99 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s new file mode 100644 index 0000000000..692408ca70 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s new file mode 100644 index 0000000000..cd5f00f50c Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4 new file mode 100644 index 0000000000..8f7a647e00 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4 differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8 new file mode 100644 index 0000000000..5e114bac78 --- /dev/null +++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8 @@ -0,0 +1,16 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:2 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-MAP:URI="bear-640x360-audio-init.mp4" +#EXTINF:1.022, +bear-640x360-audio-1.m4s +#EXTINF:0.998, +bear-640x360-audio-2.m4s +#EXTINF:0.046, +bear-640x360-audio-3.m4s +#EXT-X-PLACEMENT-OPPORTUNITY +#EXTINF:0.697, +bear-640x360-audio-4.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s new file mode 100644 index 0000000000..82605ec2c3 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s new file mode 100644 index 0000000000..311f93260e Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s new file mode 100644 index 0000000000..71e371e17a Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8 new file mode 100644 index 0000000000..756a032739 --- /dev/null +++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8 @@ -0,0 +1,18 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:2 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-I-FRAMES-ONLY +#EXT-X-MAP:URI="bear-640x360-video-init.mp4" +#EXTINF:1.001, +#EXT-X-BYTERANGE:15581@80 +bear-640x360-video-1.m4s +#EXTINF:1.001, +#EXT-X-BYTERANGE:18221@80 +bear-640x360-video-2.m4s +#EXT-X-PLACEMENT-OPPORTUNITY +#EXTINF:0.734, +#EXT-X-BYTERANGE:19663@80 +bear-640x360-video-3.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4 new file mode 100644 index 0000000000..5ed12ec053 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4 differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8 new file mode 100644 index 0000000000..ed69bc21bf --- /dev/null +++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8 @@ -0,0 +1,14 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:2 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-MAP:URI="bear-640x360-video-init.mp4" +#EXTINF:1.068, +bear-640x360-video-1.m4s +#EXTINF:1.001, +bear-640x360-video-2.m4s +#EXT-X-PLACEMENT-OPPORTUNITY +#EXTINF:0.734, +bear-640x360-video-3.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s new file mode 100644 index 0000000000..577826f3fa Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s new file mode 100644 index 0000000000..806b3320da Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s new file mode 100644 index 0000000000..8c94e9149d Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s new file mode 100644 index 0000000000..635b90fd6b Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s new file mode 100644 index 0000000000..706143b94e Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s new file mode 100644 index 0000000000..1f56626de2 Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4 new file mode 100644 index 0000000000..8c9238168e Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4 differ diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8 new file mode 100644 index 0000000000..ed76d2c784 --- /dev/null +++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8 @@ -0,0 +1,20 @@ +#EXTM3U +#EXT-X-VERSION:6 +## Generated with https://github.com/google/shaka-packager version -- +#EXT-X-TARGETDURATION:2 +#EXT-X-PLAYLIST-TYPE:VOD +#EXT-X-MAP:URI="bear-subtitle-english-text-init.mp4" +#EXTINF:1.000, +bear-subtitle-english-text-1.m4s +#EXTINF:1.000, +bear-subtitle-english-text-2.m4s +#EXTINF:0.068, +bear-subtitle-english-text-3.m4s +#EXT-X-PLACEMENT-OPPORTUNITY +#EXTINF:1.000, +bear-subtitle-english-text-4.m4s +#EXTINF:1.000, +bear-subtitle-english-text-5.m4s +#EXTINF:1.000, +bear-subtitle-english-text-6.m4s +#EXT-X-ENDLIST diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8 new file mode 100644 index 0000000000..05803d55d5 --- /dev/null +++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8 @@ -0,0 +1,11 @@ +#EXTM3U +## Generated with https://github.com/google/shaka-packager version -- + +#EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_1",AUTOSELECT=YES,CHANNELS="2" + +#EXT-X-MEDIA:TYPE=SUBTITLES,URI="bear-subtitle-english-text.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES + +#EXT-X-STREAM-INF:BANDWIDTH=1150004,CODECS="avc1.64001e,mp4a.40.2,wvtt",RESOLUTION=640x360,AUDIO="default-audio-group",SUBTITLES="default-text-group" +bear-640x360-video.m3u8 + +#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=214291,CODECS="avc1.64001e",RESOLUTION=640x360,URI="bear-640x360-video-iframe.m3u8" diff --git a/packager/media/base/media_handler.cc b/packager/media/base/media_handler.cc index e26bce3745..425b01368f 100644 --- a/packager/media/base/media_handler.cc +++ b/packager/media/base/media_handler.cc @@ -9,6 +9,26 @@ namespace shaka { namespace media { +std::string StreamDataTypeToString(StreamDataType type) { + switch (type) { + case StreamDataType::kStreamInfo: + return "stream info"; + case StreamDataType::kMediaSample: + return "media sample"; + case StreamDataType::kTextSample: + return "text sample"; + case StreamDataType::kSegmentInfo: + return "segment info"; + case StreamDataType::kScte35Event: + return "scte35 event"; + case StreamDataType::kCueEvent: + return "cue event"; + case StreamDataType::kUnknown: + return "unknown"; + } + return "unknown"; +} + Status MediaHandler::SetHandler(size_t output_stream_index, std::shared_ptr handler) { if (output_handlers_.find(output_stream_index) != output_handlers_.end()) { diff --git a/packager/media/base/media_handler.h b/packager/media/base/media_handler.h index b0cef596f1..32b623e8bb 100644 --- a/packager/media/base/media_handler.h +++ b/packager/media/base/media_handler.h @@ -29,6 +29,8 @@ enum class StreamDataType { kCueEvent, }; +std::string StreamDataTypeToString(StreamDataType type); + // Scte35Event represents cuepoint markers in input streams. It will be used // to represent out of band cuepoint markers too. struct Scte35Event { diff --git a/packager/media/base/media_handler_test_base.cc b/packager/media/base/media_handler_test_base.cc index ae983e8f96..d6ec24127a 100644 --- a/packager/media/base/media_handler_test_base.cc +++ b/packager/media/base/media_handler_test_base.cc @@ -62,26 +62,6 @@ const uint8_t kData[]{ namespace shaka { namespace media { -std::string StreamDataTypeToString(StreamDataType stream_data_type) { - switch (stream_data_type) { - case StreamDataType::kStreamInfo: - return "stream info"; - case StreamDataType::kMediaSample: - return "media sample"; - case StreamDataType::kTextSample: - return "text sample"; - case StreamDataType::kSegmentInfo: - return "segment info"; - case StreamDataType::kScte35Event: - return "scte35 event"; - case StreamDataType::kCueEvent: - return "cue event"; - case StreamDataType::kUnknown: - return "unknown"; - } - return "unknown"; -} - std::string BoolToString(bool value) { return value ? "true" : "false"; } diff --git a/packager/media/base/media_handler_test_base.h b/packager/media/base/media_handler_test_base.h index d6f7def9e2..aea5898a27 100644 --- a/packager/media/base/media_handler_test_base.h +++ b/packager/media/base/media_handler_test_base.h @@ -16,7 +16,6 @@ namespace shaka { namespace media { -std::string StreamDataTypeToString(StreamDataType stream_data_type); std::string BoolToString(bool value); MATCHER_P(IsStreamInfo, stream_index, "") { diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc index af37b7ec3d..8515ccae1f 100644 --- a/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc @@ -7,112 +7,64 @@ #include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h" #include +#include #include "packager/media/base/buffer_writer.h" #include "packager/media/formats/mp4/box_buffer.h" #include "packager/media/formats/mp4/box_definitions.h" +#include "packager/status_macros.h" namespace shaka { namespace media { - -class DisplayAction { - public: - DisplayAction(uint64_t id, int64_t time) : id_(id), time_(time) {} - virtual ~DisplayAction() = default; - - uint64_t id() const { return id_; } - int64_t time() const { return time_; } - virtual void ActOn(std::list* display) const = 0; - - private: - uint64_t id_; - int64_t time_; -}; - namespace { -const uint64_t kTrackId = 0; +size_t kTrackId = 0; -class AddToDisplayAction : public DisplayAction { - public: - explicit AddToDisplayAction(uint64_t id, - std::shared_ptr& sample) - : DisplayAction(id, sample->start_time()), sample_(sample) {} - void ActOn(std::list* display) const override { - display->push_back(sample_.get()); - } +enum class DisplayActionType { ADD, REMOVE }; - private: - std::shared_ptr sample_; +struct DisplayAction { + DisplayActionType type; + const TextSample* sample; }; -class RemoveFromDisplayAction : public DisplayAction { - public: - explicit RemoveFromDisplayAction(uint64_t id, - std::shared_ptr& sample) - : DisplayAction(id, sample->EndTime()), sample_(sample) {} - void ActOn(std::list* display) const override { - display->remove(sample_.get()); +std::multimap CreateActionList( + int64_t segment_start, + int64_t segment_end, + const std::list>& samples) { + std::multimap actions; + + for (const auto& sample : samples) { + DCHECK(sample); + + // The add action should occur either in this segment or in a previous + // segment. + DCHECK_LT(sample->start_time(), segment_end); + actions.insert( + {sample->start_time(), {DisplayActionType::ADD, sample.get()}}); + + // If the remove happens in a later segment, then we don't want to include + // that action. + if (sample->EndTime() < segment_end) { + actions.insert( + {sample->EndTime(), {DisplayActionType::REMOVE, sample.get()}}); + } } - private: - std::shared_ptr sample_; -}; -} // namespace - -bool DisplayActionCompare::operator()( - const std::shared_ptr& left, - const std::shared_ptr& right) const { - return left->time() == right->time() ? left->id() > right->id() - : left->time() > right->time(); + return actions; } -Status WebVttToMp4Handler::InitializeInternal() { - return Status::OK; -} - -Status WebVttToMp4Handler::Process(std::unique_ptr stream_data) { - if (StreamDataType::kStreamInfo == stream_data->stream_data_type) { - return DispatchStreamInfo(kTrackId, std::move(stream_data->stream_info)); - } - if (stream_data->stream_data_type == StreamDataType::kTextSample) { - std::shared_ptr sample = stream_data->text_sample; - - std::shared_ptr add( - new AddToDisplayAction(NextActionId(), sample)); - std::shared_ptr remove( - new RemoveFromDisplayAction(NextActionId(), sample)); - - actions_.push(add); - actions_.push(remove); - - return ProcessUpToTime(add->time()); - } - return Status(error::INTERNAL_ERROR, - "Invalid stream data type for this handler"); -} - -Status WebVttToMp4Handler::OnFlushRequest(size_t input_stream_index) { - const int64_t kEndOfTime = std::numeric_limits::max(); - ProcessUpToTime(kEndOfTime); - - return FlushDownstream(0); -} - -void WebVttToMp4Handler::WriteCue(const std::string& id, - const std::string& settings, - const std::string& payload, - BufferWriter* out) { +void WriteSample(const TextSample& sample, BufferWriter* out) { mp4::VTTCueBox box; - if (id.length()) { - box.cue_id.cue_id = id; + if (sample.id().length()) { + box.cue_id.cue_id = sample.id(); } - if (settings.length()) { - box.cue_settings.settings = settings; + if (sample.settings().length()) { + box.cue_settings.settings = sample.settings(); } - if (payload.length()) { - box.cue_payload.cue_text = payload; + if (sample.payload().length()) { + box.cue_payload.cue_text = sample.payload(); } + // If there is internal timing, i.e. WebVTT cue timestamp, then // cue_current_time should be populated // "which gives the VTT timestamp associated with the start time of sample." @@ -121,85 +73,195 @@ void WebVttToMp4Handler::WriteCue(const std::string& id, box.Write(out); } -Status WebVttToMp4Handler::ProcessUpToTime(int64_t cutoff_time) { - // We can only process as far as the last add as no new events will be - // added that come before that time. - while (actions_.size() && actions_.top()->time() < cutoff_time) { - // STAGE 1: Write out the current state - // Get the time range for which the current active state is valid. - const int64_t previous_change = next_change_; - next_change_ = actions_.top()->time(); +void WriteSamples(const std::list& samples, + BufferWriter* writer) { + DCHECK_GE(samples.size(), 0u); - if (next_change_ > previous_change) { - // Send out the active group. If there is nothing in the active group, - // then an empty cue is sent. - Status status = - active_.size() - ? MergeAndSendSamples(active_, previous_change, next_change_) - : SendEmptySample(previous_change, next_change_); - - if (!status.ok()) { - return status; - } - } else { - // The only time that |previous_change| and |next_change_| should ever - // break the rule |next_change_ > previous_change| is at the start where - // |previous_change| and |next_change_| are both zero. - DCHECK_EQ(previous_change, 0u); - DCHECK_EQ(next_change_, 0u); - } - - // STAGE 2: Move to the next state. - while (actions_.size() && actions_.top()->time() == next_change_) { - actions_.top()->ActOn(&active_); - actions_.pop(); - } + for (const auto& sample : samples) { + WriteSample(*sample, writer); } +} + +void WriteEmptySample(BufferWriter* writer) { + mp4::VTTEmptyCueBox box; + box.Write(writer); +} + +std::shared_ptr CreateMediaSample(const BufferWriter& buffer, + int64_t start_time, + int64_t end_time) { + DCHECK_GE(start_time, 0); + DCHECK_GT(end_time, start_time); + + const bool kIsKeyFrame = true; + + std::shared_ptr sample = + MediaSample::CopyFrom(buffer.Buffer(), buffer.Size(), kIsKeyFrame); + sample->set_pts(start_time); + sample->set_dts(start_time); + sample->set_duration(end_time - start_time); + + return sample; +} +} // namespace + +Status WebVttToMp4Handler::InitializeInternal() { + return Status::OK; +} + +Status WebVttToMp4Handler::Process(std::unique_ptr stream_data) { + switch (stream_data->stream_data_type) { + case StreamDataType::kStreamInfo: + return OnStreamInfo(std::move(stream_data)); + case StreamDataType::kCueEvent: + return OnCueEvent(std::move(stream_data)); + case StreamDataType::kSegmentInfo: + return OnSegmentInfo(std::move(stream_data)); + case StreamDataType::kTextSample: + return OnTextSample(std::move(stream_data)); + default: + return Status(error::INTERNAL_ERROR, + "Invalid stream data type (" + + StreamDataTypeToString(stream_data->stream_data_type) + + ") for this WebVttToMp4 handler"); + } +} + +Status WebVttToMp4Handler::OnStreamInfo( + std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->stream_info); + + return Dispatch(std::move(stream_data)); +} + +Status WebVttToMp4Handler::OnCueEvent(std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->cue_event); + + if (current_segment_.size()) { + return Status(error::INTERNAL_ERROR, + "Cue Events should come right after segment info."); + } + + return Dispatch(std::move(stream_data)); +} + +Status WebVttToMp4Handler::OnSegmentInfo( + std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->segment_info); + + const auto& segment = stream_data->segment_info; + + int64_t segment_start = segment->start_timestamp; + int64_t segment_duration = segment->duration; + int64_t segment_end = segment_start + segment_duration; + + RETURN_IF_ERROR(DispatchCurrentSegment(segment_start, segment_end)); + current_segment_.clear(); + + return Dispatch(std::move(stream_data)); +} + +Status WebVttToMp4Handler::OnTextSample( + std::unique_ptr stream_data) { + DCHECK(stream_data); + DCHECK(stream_data->text_sample); + + auto& sample = stream_data->text_sample; + + // Ignore empty samples. This will create gaps, but we will handle that + // later. + if (sample->payload().empty()) { + return Status::OK; + } + + // Add the new text sample to the cache of samples that belong in the + // current segment. + current_segment_.push_back(std::move(stream_data->text_sample)); + return Status::OK; +} + +Status WebVttToMp4Handler::DispatchCurrentSegment(int64_t segment_start, + int64_t segment_end) { + // Active will hold all the samples that are "on screen" for the current + // section of time. + std::list active; + + // Move through the segment, jumping between each change to the current state. + // A change is defined as a group of one or more DisplayActions. + int section_start = segment_start; + + // |actions| is a map of [time] -> [action]. + auto actions = CreateActionList(segment_start, segment_end, current_segment_); + auto front = actions.begin(); + + // As it is possible to have a segment with no samples, we can't base this + // loop on the number of actions. So we need to keep iterating until we + // have written enough sections to get to the end of the segment. + while (section_start < segment_end) { + // Apply all actions that occur at the start of this part of the segment. + // Normally we would only want "== section_start" but as it is possible for + // samples to span multiple segments, their start time will be before the + // segment's start time. So we want to apply them too if they come before + // the segment. Thus why we use "<=". + while (front != actions.end() && front->first <= section_start) { + auto& action = front->second; + + switch (action.type) { + case DisplayActionType::ADD: { + active.push_back(action.sample); + break; + } + case DisplayActionType::REMOVE: { + auto found = std::find(active.begin(), active.end(), action.sample); + DCHECK(found != active.end()); + active.erase(found); + break; + } + default: { + NOTREACHED() << "Unsupported DisplayActionType " + << static_cast(action.type); + break; + } + } + + // We have "consumed" the action at the front. We can move on. + front++; + } + + // The end of the section will either be the start of the next section or + // the end of the segment. + int64_t section_end = front == actions.end() ? segment_end : front->first; + DCHECK_GT(section_end, section_start); + DCHECK_LE(section_end, segment_end); + RETURN_IF_ERROR(MergeDispatchSamples(section_start, section_end, active)); + + section_start = section_end; + } + + DCHECK(front == actions.end()) << "We should have processed all actions."; return Status::OK; } -Status WebVttToMp4Handler::MergeAndSendSamples( - const std::list& samples, +Status WebVttToMp4Handler::MergeDispatchSamples( int64_t start_time, - int64_t end_time) { + int64_t end_time, + const std::list& state) { DCHECK_GT(end_time, start_time); box_writer_.Clear(); - for (const TextSample* sample : samples) { - DCHECK_LE(sample->start_time(), start_time); - DCHECK_GE(sample->EndTime(), end_time); - WriteCue(sample->id(), sample->settings(), sample->payload(), &box_writer_); + if (state.size()) { + WriteSamples(state, &box_writer_); + } else { + WriteEmptySample(&box_writer_); } - std::shared_ptr sample = - MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true); - sample->set_pts(start_time); - sample->set_dts(start_time); - sample->set_duration(end_time - start_time); - return DispatchMediaSample(kTrackId, std::move(sample)); -} - -Status WebVttToMp4Handler::SendEmptySample(int64_t start_time, - int64_t end_time) { - DCHECK_GT(end_time, start_time); - - box_writer_.Clear(); - - mp4::VTTEmptyCueBox box; - box.Write(&box_writer_); - - std::shared_ptr sample = - MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true); - sample->set_pts(start_time); - sample->set_dts(start_time); - sample->set_duration(end_time - start_time); - return DispatchMediaSample(kTrackId, std::move(sample)); -} - -uint64_t WebVttToMp4Handler::NextActionId() { - return next_id_++; + return DispatchMediaSample( + kTrackId, CreateMediaSample(box_writer_, start_time, end_time)); } } // namespace media } // namespace shaka diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.h b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h index 25dd838f19..c337c39ccb 100644 --- a/packager/media/formats/webvtt/webvtt_to_mp4_handler.h +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h @@ -18,76 +18,35 @@ namespace shaka { namespace media { -class DisplayAction; - -class DisplayActionCompare { - public: - bool operator()(const std::shared_ptr& left, - const std::shared_ptr& right) const; -}; - -// Take text samples, convert them to Mp4 boxes, and send them down stream. -// Virtual methods should only be overridden for testing only. +// A media handler that should come after the cue aligner and segmenter and +// should come before the muxer. This handler is to convert text samples +// to media samples so that they can be sent to a mp4 muxer. class WebVttToMp4Handler : public MediaHandler { public: WebVttToMp4Handler() = default; - - protected: - // |Process| and |OnFlushRequest| need to be protected so that it can be - // called for testing. - Status Process(std::unique_ptr stream_data) override; - Status OnFlushRequest(size_t input_stream_index) override; - - // This is made protected-virtual so that we can override it for testing. - virtual void WriteCue(const std::string& id, - const std::string& settings, - const std::string& payload, - BufferWriter* out); + virtual ~WebVttToMp4Handler() override = default; private: WebVttToMp4Handler(const WebVttToMp4Handler&) = delete; WebVttToMp4Handler& operator=(const WebVttToMp4Handler&) = delete; Status InitializeInternal() override; + Status Process(std::unique_ptr stream_data) override; - // Merge and send all samples in the queue downstream while the head of the - // queue's time is less than |cutoff|. |cutoff| is needed as we can only - // merge and send samples when we are sure no new samples will appear before - // the next action. - Status ProcessUpToTime(int64_t cutoff_time); + Status OnStreamInfo(std::unique_ptr stream_data); + Status OnCueEvent(std::unique_ptr stream_data); + Status OnSegmentInfo(std::unique_ptr stream_data); + Status OnTextSample(std::unique_ptr stream_data); - // Merge together all TextSamples in |samples| into a single MP4 box and - // pass the box downstream. - Status MergeAndSendSamples(const std::list& samples, - int64_t start_time, - int64_t end_time); + Status DispatchCurrentSegment(int64_t segment_start, int64_t segment_end); + Status MergeDispatchSamples(int64_t start_in_seconds, + int64_t end_in_seconds, + const std::list& state); - Status SendEmptySample(int64_t start_time, int64_t end_time); - - // Get a new id for the next action. - uint64_t NextActionId(); - - int64_t next_change_ = 0; + std::list> current_segment_; // This is the current state of the box we are writing. BufferWriter box_writer_; - - // |actions_| is a time sorted list of actions that affect the timeline (e.g. - // adding or removing a cue). |active_| is the list of all cues that are - // currently on screen. - // When the cue is to be on screen, it is added to |active_|. When it is time - // for the cue to come off screen, it is removed from |active_|. - // As |actions_| has a shared pointer to the cue, |active_| can use normal - // pointers as the pointer will be valid and it makes the |remove| call - // easier. - - std::priority_queue, - std::vector>, - DisplayActionCompare> - actions_; - std::list active_; - - uint64_t next_id_ = 0; }; } // namespace media diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc index 3ad8b2b8c8..28c23b5de8 100644 --- a/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc +++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc @@ -15,368 +15,535 @@ namespace shaka { namespace media { namespace { const size_t kStreamIndex = 0; +const bool kSubSegment = true; const bool kEncrypted = true; - -const size_t kInputCount = 1; -const size_t kOutputCount = 1; -const size_t kInputIndex = 0; -const size_t kOutputIndex = 0; - -const char* kId[] = {"cue 1 id", "cue 2 id", "cue 3 id"}; -const char* kPayload[] = {"cue 1 payload", "cue 2 payload", "cue 3 payload"}; -const char* kNoSettings = ""; - -// These all refer to the samples. To make them easier to use in their -// correct context, they have purposely short names. -const size_t kA = 0; -const size_t kB = 1; -const size_t kC = 2; - } // namespace -class TestableWebVttToMp4Handler : public WebVttToMp4Handler { - public: - MOCK_METHOD3(OnWriteCue, - void(const std::string& id, - const std::string& settings, - const std::string& payload)); - - protected: - void WriteCue(const std::string& id, - const std::string& settings, - const std::string& payload, - BufferWriter* out) { - OnWriteCue(id, settings, payload); - // We need to write something out or else media sample will think it is the - // end of the stream. - out->AppendInt(0); - } -}; - class WebVttToMp4HandlerTest : public MediaHandlerTestBase { protected: - void SetUp() { - mp4_handler_ = std::make_shared(); - ASSERT_OK(SetUpAndInitializeGraph(mp4_handler_, kInputCount, kOutputCount)); + Status SetUpTestGraph() { + const size_t kOneInput = 1; + const size_t kOneOutput = 1; + + auto handler = std::make_shared(); + return SetUpAndInitializeGraph(handler, kOneInput, kOneOutput); } - std::shared_ptr mp4_handler_; + FakeInputMediaHandler* In() { + const size_t kInputIndex = 0; + return Input(kInputIndex); + } + + MockOutputMediaHandler* Out() { + const size_t kOutputIndex = 0; + return Output(kOutputIndex); + } + + Status DispatchStream() { + auto info = GetTextStreamInfo(); + return In()->Dispatch( + StreamData::FromStreamInfo(kStreamIndex, std::move(info))); + } + + Status DispatchText(int64_t start_time, int64_t end_time) { + const std::string kId = "id"; + const std::string kPayload = "payload"; + + auto sample = GetTextSample(kId, start_time, end_time, kPayload); + return In()->Dispatch( + StreamData::FromTextSample(kStreamIndex, std::move(sample))); + } + + Status DispatchSegment(int64_t start_time, int64_t end_time) { + DCHECK_GT(end_time, start_time); + + const bool kIsSubSegment = true; + int64_t duration = end_time - start_time; + + auto segment = GetSegmentInfo(start_time, duration, !kIsSubSegment); + return In()->Dispatch( + StreamData::FromSegmentInfo(kStreamIndex, std::move(segment))); + } + + Status Flush() { return In()->FlushAllDownstreams(); } }; // Verify that when the stream starts at a non-zero value, the gap at the // start will be filled. -// | [----A----] +// +// |[-- SEGMENT ------------]| +// | [--- SAMPLE ---]| +// |[- GAP -] | +// TEST_F(WebVttToMp4HandlerTest, NonZeroStartTime) { - const int64_t kGapStart = 0; - const int64_t kGapEnd = 100; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kGapStart = kSegmentStart; + const int64_t kGapEnd = kGapStart + 200; const int64_t kGapDuration = kGapEnd - kGapStart; - const char* kSampleId = kId[0]; - const char* kSamplePayload = kPayload[0]; const int64_t kSampleStart = kGapEnd; - const int64_t kSampleDuration = 500; - const int64_t kSampleEnd = kSampleStart + kSampleDuration; + const int64_t kSampleEnd = kSegmentEnd; + const int64_t kSampleDuration = kSampleEnd - kSampleStart; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Empty Cue to fill gap - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kGapStart, kGapDuration, - !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); + // Gap + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGapStart, + kGapDuration, !kEncrypted))); // Sample - EXPECT_CALL(*mp4_handler_, - OnWriteCue(kSampleId, kNoSettings, kSamplePayload)); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kSampleStart, - kSampleDuration, !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kSampleStart, + kSampleDuration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, GetTextSample(kSampleId, kSampleStart, - kSampleEnd, kSamplePayload)))); - - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSampleStart, kSampleEnd)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify the cues are grouped correctly when the cues do not overlap at all. // An empty cue should be inserted between the two as there is a gap. // -// [----A---] [---B---] +// |[-- SEGMENT --------------------------]| +// |[-- SAMPLE --] [-- SAMPLE --]| +// | [-- GAP --] | +// TEST_F(WebVttToMp4HandlerTest, NoOverlap) { - const int64_t kDuration = 1000; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; - const char* kSample1Id = kId[0]; - const char* kSample1Payload = kPayload[0]; - const int64_t kSample1Start = 0; - const int64_t kSample1End = kSample1Start + kDuration; + const int64_t kSample1Start = kSegmentStart; + const int64_t kSample1End = kSample1Start + 1000; + const int64_t kSample1Duration = kSample1End - kSample1Start; - // Make sample 2 be just a little after sample 1. - const char* kSample2Id = kId[1]; - const char* kSample2Payload = kPayload[1]; - const int64_t kSample2Start = kSample1End + 100; - const int64_t kSample2End = kSample2Start + kDuration; + const int64_t kSample2Start = kSegmentEnd - 1000; + const int64_t kSample2End = kSegmentEnd; + const int64_t kSample2Duration = kSample2End - kSample2Start; const int64_t kGapStart = kSample1End; - const int64_t kGapDuration = kSample2Start - kSample1End; + const int64_t kGapEnd = kSample2Start; + const int64_t kGapDuration = kGapEnd - kGapStart; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); + // Sample 1 - EXPECT_CALL(*mp4_handler_, - OnWriteCue(kSample1Id, kNoSettings, kSample1Payload)); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kSample1Start, kDuration, - !kEncrypted))); - - // Empty Cue to fill gap - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kGapStart, kGapDuration, - !kEncrypted))); - + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kSample1Start, + kSample1Duration, !kEncrypted))); + // Gap + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGapStart, + kGapDuration, !kEncrypted))); // Sample 2 - EXPECT_CALL(*mp4_handler_, - OnWriteCue(kSample2Id, kNoSettings, kSample2Payload)); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kSample2Start, kDuration, - !kEncrypted))); + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kSample2Start, + kSample2Duration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - ASSERT_OK( - Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, GetTextSample(kSample1Id, kSample1Start, - kSample1End, kSample1Payload)))); - - ASSERT_OK( - Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, GetTextSample(kSample2Id, kSample2Start, - kSample2End, kSample2Payload)))); - - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSample1Start, kSample1End)); + ASSERT_OK(DispatchText(kSample2Start, kSample2End)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify the cues are grouped correctly when one cue overlaps another cue at // one end. // -// [-------A-------] -// [-------B------] +// |[-- SEGMENT -----------------]| +// |[-- SAMPLE --------] | +// | [------- SAMPLE --]| TEST_F(WebVttToMp4HandlerTest, Overlap) { - const int64_t kStart[] = {0, 500}; - const int64_t kEnd[] = {1000, 1500}; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kSample1Start = kSegmentStart; + const int64_t kSample1End = kSegmentEnd - 3000; + + const int64_t kSample2Start = kSegmentStart + 3000; + const int64_t kSample2End = kSegmentEnd; + + const int64_t kOnlySample1Start = kSample1Start; + const int64_t kOnlySample1End = kSample2Start; + const int64_t kOnlySample1Duration = kOnlySample1End - kOnlySample1Start; + + const int64_t kSample1AndSample2Start = kSample2Start; + const int64_t kSample1AndSample2End = kSample1End; + const int64_t kSample1AndSample2Duration = + kSample1AndSample2End - kSample1AndSample2Start; + + const int64_t kOnlySample2Start = kSample1End; + const int64_t kOnlySample2End = kSample2End; + const int64_t kOnlySample2Duration = kOnlySample2End - kOnlySample2Start; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Sample A - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kA], - kStart[kB] - kStart[kA], !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); - // Sample A and B - for (size_t i = kA; i <= kB; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kB], - kEnd[kA] - kStart[kB], !kEncrypted))); + // Sample 1 + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kOnlySample1Start, + kOnlySample1Duration, !kEncrypted))); + // Sample 1 and Sample 2 + EXPECT_CALL(*Out(), OnProcess(IsMediaSample( + kStreamIndex, kSample1AndSample2Start, + kSample1AndSample2Duration, !kEncrypted))); + // Sample 2 + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kOnlySample2Start, + kOnlySample2Duration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - // Sample B - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kB], kNoSettings, kPayload[kB])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kEnd[kA], - kEnd[kB] - kEnd[kA], !kEncrypted))); - - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - for (size_t i = kA; i <= kB; i++) { - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, - GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i])))); - } - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSample1Start, kSample1End)); + ASSERT_OK(DispatchText(kSample2Start, kSample2End)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify the cues are grouped correctly when one cue starts before and ends // after another cue. // -// [-------------A-------------] -// [----------B----------] +// |[-- SEGMENT -----------------]| +// |[-- SAMPLE ------------------]| +// | [------- SAMPLE --] | +// TEST_F(WebVttToMp4HandlerTest, Contains) { - const int64_t kStart[] = {0, 100}; - const int64_t kEnd[] = {1000, 900}; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kSample1Start = kSegmentStart; + const int64_t kSample1End = kSegmentEnd; + + const int64_t kSample2Start = kSegmentStart + 1000; + const int64_t kSample2End = kSegmentEnd - 1000; + + const int64_t kBeforeSample2Start = kSample1Start; + const int64_t kBeforeSample2End = kSample2Start; + const int64_t kBeforeSample2Duration = + kBeforeSample2End - kBeforeSample2Start; + + const int64_t kDuringSample2Start = kSample2Start; + const int64_t kDuringSample2End = kSample2End; + const int64_t kDuringSample2Duration = + kDuringSample2End - kDuringSample2Start; + + const int64_t kAfterSample2Start = kSample2End; + const int64_t kAfterSample2End = kSample1End; + const int64_t kAfterSample2Duration = kAfterSample2End - kAfterSample2Start; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Sample A - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kA], - kStart[kB] - kStart[kA], !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); - // Sample A and B - for (size_t i = kA; i <= kB; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kB], - kEnd[kB] - kStart[kB], !kEncrypted))); + // Sample 1 + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kBeforeSample2Start, + kBeforeSample2Duration, !kEncrypted))); + // Sample 1 and Sample 2 + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kDuringSample2Start, + kDuringSample2Duration, !kEncrypted))); + // Sample 1 Again + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kAfterSample2Start, + kAfterSample2Duration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - // Sample A - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kEnd[kB], - kEnd[kA] - kEnd[kB], !kEncrypted))); - - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - for (size_t i = kA; i <= kB; i++) { - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, - GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i])))); - } - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSample1Start, kSample1End)); + ASSERT_OK(DispatchText(kSample2Start, kSample2End)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify that when two cues are completely on top of each other, that there // is no extra boxes sent out. // -// [----------A----------] -// [----------B----------] +// |[-- SEGMENT -----------------]| +// |[-- SAMPLE ------------------]| +// |[-- SAMPLE ------------------]| +// TEST_F(WebVttToMp4HandlerTest, ExactOverlap) { - const int64_t kStart = 0; - const int64_t kDuration = 1000; - const int64_t kEnd = kStart + kDuration; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kSampleStart = kSegmentStart; + const int64_t kSampleEnd = kSegmentEnd; + const int64_t kSampleDuration = kSampleEnd - kSampleStart; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Sample A and B - for (size_t i = kA; i <= kB; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL( - *Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart, kDuration, !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + // Both Samples + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kSampleStart, + kSampleDuration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); + + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - for (size_t i = kA; i <= kB; i++) { - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, - GetTextSample(kId[i], kStart, kEnd, kPayload[i])))); - } - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSampleStart, kSampleEnd)); + ASSERT_OK(DispatchText(kSampleStart, kSampleEnd)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify that when two cues are completely on top of each other, that there // is no extra boxes sent out. // -// [----A----] -// [--------B--------] -// [------------C------------] +// |[-- SEGMENT -----------------]| +// |[-- SAMPLE ------------------]| +// |[-- SAMPLE ------------] | +// |[-- SAMPLE ------] | TEST_F(WebVttToMp4HandlerTest, OverlapStartWithStaggerEnd) { - const int64_t kStart = 0; - const int64_t kEnd[] = {1000, 2000, 3000}; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kSample1Start = kSegmentStart; + const int64_t kSample1End = kSegmentEnd; + + const int64_t kSample2Start = kSegmentStart; + const int64_t kSample2End = kSegmentEnd - 1000; + + const int64_t kSample3Start = kSegmentStart; + const int64_t kSample3End = kSegmentEnd - 2000; + + const int64_t kThreeSamplesStart = kSegmentStart; + const int64_t kThreeSamplesEnd = kSample3End; + const int64_t kThreeSamplesDuration = kThreeSamplesEnd - kThreeSamplesStart; + + const int64_t kTwoSamplesStart = kSample3End; + const int64_t kTwoSamplesEnd = kSample2End; + const int64_t kTwoSamplesDuration = kTwoSamplesEnd - kTwoSamplesStart; + + const int64_t kOneSampleStart = kSample2End; + const int64_t kOneSampleEnd = kSample1End; + const int64_t kOneSampleDuration = kOneSampleEnd - kOneSampleStart; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Sample A, B, and C - for (size_t i = kA; i <= kC; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL( - *Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart, kEnd[kA], !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); - // Sample B and C - for (size_t i = kB; i <= kC; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kEnd[kA], - kEnd[kB] - kEnd[kA], !kEncrypted))); + // Three Samples + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kThreeSamplesStart, + kThreeSamplesDuration, !kEncrypted))); + // Two Samples + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kTwoSamplesStart, + kTwoSamplesDuration, !kEncrypted))); + // One Sample + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kOneSampleStart, + kOneSampleDuration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - // Sample C - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kC], kNoSettings, kPayload[kC])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kEnd[kB], - kEnd[kC] - kEnd[kB], !kEncrypted))); - - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - for (size_t i = kA; i <= kC; i++) { - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, - GetTextSample(kId[i], kStart, kEnd[i], kPayload[i])))); - } - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSample1Start, kSample1End)); + ASSERT_OK(DispatchText(kSample2Start, kSample2End)); + ASSERT_OK(DispatchText(kSample3Start, kSample3End)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); } // Verify that when two cues are completely on top of each other, that there // is no extra boxes sent out. // -// [------------A------------] -// [--------B--------] -// [----C----] +// |[-- SEGMENT -----------------]| +// |[-- SAMPLE ------------------]| +// | [-- SAMPLE ------------]| +// | [-- SAMPLE ------]| TEST_F(WebVttToMp4HandlerTest, StaggerStartWithOverlapEnd) { - const int64_t kStart[] = {0, 100, 200}; - const int64_t kEnd = 1000; + const int64_t kSegmentStart = 0; + const int64_t kSegmentEnd = 10000; + const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart; + + const int64_t kSample1Start = kSegmentStart; + const int64_t kSample1End = kSegmentEnd; + + const int64_t kSample2Start = kSegmentStart + 1000; + const int64_t kSample2End = kSegmentEnd; + + const int64_t kSample3Start = kSegmentStart + 2000; + const int64_t kSample3End = kSegmentEnd; + + const int64_t kOneSampleStart = kSample1Start; + const int64_t kOneSampleEnd = kSample2Start; + const int64_t kOneSampleDuration = kOneSampleEnd - kOneSampleStart; + + const int64_t kTwoSamplesStart = kSample2Start; + const int64_t kTwoSamplesEnd = kSample3Start; + const int64_t kTwoSamplesDuration = kTwoSamplesEnd - kTwoSamplesStart; + + const int64_t kThreeSamplesStart = kSample3Start; + const int64_t kThreeSamplesEnd = kSample3End; + const int64_t kThreeSamplesDuration = kThreeSamplesEnd - kThreeSamplesStart; + + ASSERT_OK(SetUpTestGraph()); { testing::InSequence s; - // Sample A - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA])); - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kA], - kStart[kB] - kStart[kA], !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); - // Sample A and B - for (size_t i = kA; i <= kB; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kB], - kStart[kC] - kStart[kB], !kEncrypted))); + // One Sample + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kOneSampleStart, + kOneSampleDuration, !kEncrypted))); + // Two Samples + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kTwoSamplesStart, + kTwoSamplesDuration, !kEncrypted))); + // Three Samples + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kThreeSamplesStart, + kThreeSamplesDuration, !kEncrypted))); + // Segment + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart, + kSegmentDuration, !kSubSegment, + !kEncrypted))); - // Sample A, B, and C - for (size_t i = kA; i <= kC; i++) { - EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i])); - } - EXPECT_CALL(*Output(kOutputIndex), - OnProcess(IsMediaSample(kStreamIndex, kStart[kC], - kEnd - kStart[kC], !kEncrypted))); - - EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex)); + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - for (size_t i = kA; i <= kC; i++) { - ASSERT_OK(Input(kInputIndex) - ->Dispatch(StreamData::FromTextSample( - kStreamIndex, - GetTextSample(kId[i], kStart[i], kEnd, kPayload[i])))); + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSample1Start, kSample1End)); + ASSERT_OK(DispatchText(kSample2Start, kSample2End)); + ASSERT_OK(DispatchText(kSample3Start, kSample3End)); + ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd)); + ASSERT_OK(Flush()); +} + +// The text chunking handler will repeat text samples that cross over a segment +// boundary. We need to know that this handler will be okay with those repeated +// samples. +// +// |[------ SEGMENT ------]|[------ SEGMENT ------]| +// | [--- SAMPLE ---|--------] | +// |- GAP -] | [- GAP ------]| +TEST_F(WebVttToMp4HandlerTest, CrossSegmentSamples) { + const int64_t kSegmentDuration = 10000; + const int64_t kGapDuration = 1000; + + const int64_t kSegment1Start = 0; + const int64_t kSegment1End = 10000; + + const int64_t kSegment2Start = 10000; + const int64_t kSegment2End = 20000; + + const int64_t kGap1Start = 0; + const int64_t kGap2Start = 19000; + + const int64_t kSampleStart = 1000; + const int64_t kSampleEnd = 19000; + + const int64_t kSamplePart1Start = 1000; + const int64_t kSamplePart1Duration = 9000; + + const int64_t kSamplePart2Start = 10000; + const int64_t kSamplePart2Duration = 9000; + + ASSERT_OK(SetUpTestGraph()); + + { + testing::InSequence s; + + EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex))); + + // Gap, Sample, Segment + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGap1Start, + kGapDuration, !kEncrypted))); + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kSamplePart1Start, + kSamplePart1Duration, !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegment1Start, + kSegmentDuration, !kSubSegment, + !kEncrypted))); + + // Sample, Gap, Segment + EXPECT_CALL(*Out(), + OnProcess(IsMediaSample(kStreamIndex, kSamplePart2Start, + kSamplePart2Duration, !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGap2Start, + kGapDuration, !kEncrypted))); + EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegment2Start, + kSegmentDuration, !kSubSegment, + !kEncrypted))); + + EXPECT_CALL(*Out(), OnFlush(kStreamIndex)); } - ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams()); + + ASSERT_OK(DispatchStream()); + ASSERT_OK(DispatchText(kSampleStart, kSampleEnd)); + ASSERT_OK(DispatchSegment(kSegment1Start, kSegment1End)); + ASSERT_OK(DispatchText(kSampleStart, kSampleEnd)); + ASSERT_OK(DispatchSegment(kSegment2Start, kSegment2End)); + ASSERT_OK(Flush()); } } // namespace media } // namespace shaka diff --git a/packager/packager.cc b/packager/packager.cc index 27d751b825..6c24cb7ff6 100644 --- a/packager/packager.cc +++ b/packager/packager.cc @@ -475,6 +475,16 @@ Status CreateMp4ToMp4TextJob(const StreamDescriptor& stream, return Status::OK; } +std::unique_ptr CreateTextChunker( + const ChunkingParams& chunking_params) { + const float segment_length_in_seconds = + chunking_params.segment_duration_in_seconds; + const uint64_t segment_length_in_ms = + static_cast(segment_length_in_seconds * 1000); + + return std::unique_ptr(new TextChunker(segment_length_in_ms)); +} + Status CreateHlsTextJob(const StreamDescriptor& stream, const PackagingParams& packaging_params, std::unique_ptr muxer_listener, @@ -489,11 +499,6 @@ Status CreateHlsTextJob(const StreamDescriptor& stream, ") to HLS with no segment template"); } - const float segment_length_in_seconds = - packaging_params.chunking_params.segment_duration_in_seconds; - const uint64_t segment_length_in_ms = - static_cast(segment_length_in_seconds * 1000); - // Text files are usually small and since the input is one file; // there's no way for the player to do ranged requests. So set this // value to something reasonable if it is missing. @@ -513,15 +518,13 @@ Status CreateHlsTextJob(const StreamDescriptor& stream, auto cue_aligner = sync_points ? std::make_shared(sync_points) : nullptr; - auto chunker = std::make_shared(segment_length_in_ms); + auto chunker = CreateTextChunker(packaging_params.chunking_params); - RETURN_IF_ERROR( - ChainHandlers({parser, std::move(padder), std::move(cue_aligner), - std::move(chunker), std::move(output)})); + job_manager->Add("Segmented Text Job", parser); - job_manager->Add("Segmented Text Job", std::move(parser)); - - return Status::OK; + return ChainHandlers({std::move(parser), std::move(padder), + std::move(cue_aligner), std::move(chunker), + std::move(output)}); } Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream, @@ -530,8 +533,6 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream, SyncPointQueue* sync_points, MuxerFactory* muxer_factory, std::shared_ptr* root) { - // TODO(kqyang): Support Cue Alignment if |sync_points| is not null. - std::unique_ptr reader; RETURN_IF_ERROR(FileReader::Open(stream.input, &reader)); @@ -539,18 +540,25 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream, auto parser = std::make_shared(std::move(reader), stream.language); auto padder = std::make_shared(kNoDuration); + auto text_to_mp4 = std::make_shared(); - auto chunker = - std::make_shared(packaging_params.chunking_params); auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream); muxer->SetMuxerListener(std::move(muxer_listener)); - RETURN_IF_ERROR( - ChainHandlers({parser, std::move(padder), std::move(text_to_mp4), - std::move(chunker), std::move(muxer)})); - *root = std::move(parser); + // Optional Cue Alignment Handler + std::shared_ptr cue_aligner; + if (sync_points) { + cue_aligner = std::make_shared(sync_points); + } - return Status::OK; + std::shared_ptr chunker = + CreateTextChunker(packaging_params.chunking_params); + + *root = parser; + + return ChainHandlers({std::move(parser), std::move(padder), + std::move(cue_aligner), std::move(chunker), + std::move(text_to_mp4), std::move(muxer)}); } Status CreateTextJobs(