diff --git a/packager/app/test/packager_test.py b/packager/app/test/packager_test.py
index d62eb498dd..38109f0169 100755
--- a/packager/app/test/packager_test.py
+++ b/packager/app/test/packager_test.py
@@ -1047,6 +1047,24 @@ class PackagerFunctionalTest(PackagerAppTest):
     self.assertPackageSuccess(streams, flags)
     self._CheckTestResults('hls-audio-video-text-with-ad-cues')
 
+  def testVttTextToMp4WithAdCues(self):
+    streams = [
+        self._GetStream('audio',
+                        hls=True,
+                        segmented=True),
+        self._GetStream('video',
+                        hls=True,
+                        segmented=True),
+        self._GetStream('text',
+                        hls=True,
+                        segmented=True,
+                        test_file='bear-subtitle-english.vtt',
+                        output_format='mp4')
+    ]
+    flags = self._GetFlags(output_hls=True, ad_cues='1.5')
+    self.assertPackageSuccess(streams, flags)
+    self._CheckTestResults('vtt-text-to-mp4-with-ad-cues')
+
   def testWebmSubsampleEncryption(self):
     streams = [
         self._GetStream('video',
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s
new file mode 100644
index 0000000000..c4b4ac03d9
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-1.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s
new file mode 100644
index 0000000000..8504387c99
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-2.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s
new file mode 100644
index 0000000000..692408ca70
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-3.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s
new file mode 100644
index 0000000000..cd5f00f50c
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-4.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4
new file mode 100644
index 0000000000..8f7a647e00
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio-init.mp4 differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8
new file mode 100644
index 0000000000..5e114bac78
--- /dev/null
+++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-audio.m3u8
@@ -0,0 +1,16 @@
+#EXTM3U
+#EXT-X-VERSION:6
+## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
+#EXT-X-TARGETDURATION:2
+#EXT-X-PLAYLIST-TYPE:VOD
+#EXT-X-MAP:URI="bear-640x360-audio-init.mp4"
+#EXTINF:1.022,
+bear-640x360-audio-1.m4s
+#EXTINF:0.998,
+bear-640x360-audio-2.m4s
+#EXTINF:0.046,
+bear-640x360-audio-3.m4s
+#EXT-X-PLACEMENT-OPPORTUNITY
+#EXTINF:0.697,
+bear-640x360-audio-4.m4s
+#EXT-X-ENDLIST
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s
new file mode 100644
index 0000000000..82605ec2c3
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-1.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s
new file mode 100644
index 0000000000..311f93260e
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-2.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s
new file mode 100644
index 0000000000..71e371e17a
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-3.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8
new file mode 100644
index 0000000000..756a032739
--- /dev/null
+++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-iframe.m3u8
@@ -0,0 +1,18 @@
+#EXTM3U
+#EXT-X-VERSION:6
+## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
+#EXT-X-TARGETDURATION:2
+#EXT-X-PLAYLIST-TYPE:VOD
+#EXT-X-I-FRAMES-ONLY
+#EXT-X-MAP:URI="bear-640x360-video-init.mp4"
+#EXTINF:1.001,
+#EXT-X-BYTERANGE:15581@80
+bear-640x360-video-1.m4s
+#EXTINF:1.001,
+#EXT-X-BYTERANGE:18221@80
+bear-640x360-video-2.m4s
+#EXT-X-PLACEMENT-OPPORTUNITY
+#EXTINF:0.734,
+#EXT-X-BYTERANGE:19663@80
+bear-640x360-video-3.m4s
+#EXT-X-ENDLIST
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4
new file mode 100644
index 0000000000..5ed12ec053
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video-init.mp4 differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8
new file mode 100644
index 0000000000..ed69bc21bf
--- /dev/null
+++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-640x360-video.m3u8
@@ -0,0 +1,14 @@
+#EXTM3U
+#EXT-X-VERSION:6
+## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
+#EXT-X-TARGETDURATION:2
+#EXT-X-PLAYLIST-TYPE:VOD
+#EXT-X-MAP:URI="bear-640x360-video-init.mp4"
+#EXTINF:1.068,
+bear-640x360-video-1.m4s
+#EXTINF:1.001,
+bear-640x360-video-2.m4s
+#EXT-X-PLACEMENT-OPPORTUNITY
+#EXTINF:0.734,
+bear-640x360-video-3.m4s
+#EXT-X-ENDLIST
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s
new file mode 100644
index 0000000000..577826f3fa
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-1.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s
new file mode 100644
index 0000000000..806b3320da
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-2.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s
new file mode 100644
index 0000000000..8c94e9149d
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-3.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s
new file mode 100644
index 0000000000..635b90fd6b
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-4.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s
new file mode 100644
index 0000000000..706143b94e
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-5.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s
new file mode 100644
index 0000000000..1f56626de2
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-6.m4s differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4
new file mode 100644
index 0000000000..8c9238168e
Binary files /dev/null and b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text-init.mp4 differ
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8
new file mode 100644
index 0000000000..ed76d2c784
--- /dev/null
+++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/bear-subtitle-english-text.m3u8
@@ -0,0 +1,20 @@
+#EXTM3U
+#EXT-X-VERSION:6
+## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
+#EXT-X-TARGETDURATION:2
+#EXT-X-PLAYLIST-TYPE:VOD
+#EXT-X-MAP:URI="bear-subtitle-english-text-init.mp4"
+#EXTINF:1.000,
+bear-subtitle-english-text-1.m4s
+#EXTINF:1.000,
+bear-subtitle-english-text-2.m4s
+#EXTINF:0.068,
+bear-subtitle-english-text-3.m4s
+#EXT-X-PLACEMENT-OPPORTUNITY
+#EXTINF:1.000,
+bear-subtitle-english-text-4.m4s
+#EXTINF:1.000,
+bear-subtitle-english-text-5.m4s
+#EXTINF:1.000,
+bear-subtitle-english-text-6.m4s
+#EXT-X-ENDLIST
diff --git a/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8 b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8
new file mode 100644
index 0000000000..05803d55d5
--- /dev/null
+++ b/packager/app/test/testdata/vtt-text-to-mp4-with-ad-cues/output.m3u8
@@ -0,0 +1,11 @@
+#EXTM3U
+## Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>
+
+#EXT-X-MEDIA:TYPE=AUDIO,URI="bear-640x360-audio.m3u8",GROUP-ID="default-audio-group",NAME="stream_1",AUTOSELECT=YES,CHANNELS="2"
+
+#EXT-X-MEDIA:TYPE=SUBTITLES,URI="bear-subtitle-english-text.m3u8",GROUP-ID="default-text-group",NAME="stream_0",AUTOSELECT=YES
+
+#EXT-X-STREAM-INF:BANDWIDTH=1150004,CODECS="avc1.64001e,mp4a.40.2,wvtt",RESOLUTION=640x360,AUDIO="default-audio-group",SUBTITLES="default-text-group"
+bear-640x360-video.m3u8
+
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=214291,CODECS="avc1.64001e",RESOLUTION=640x360,URI="bear-640x360-video-iframe.m3u8"
diff --git a/packager/media/base/media_handler.cc b/packager/media/base/media_handler.cc
index e26bce3745..425b01368f 100644
--- a/packager/media/base/media_handler.cc
+++ b/packager/media/base/media_handler.cc
@@ -9,6 +9,26 @@
 namespace shaka {
 namespace media {
 
+std::string StreamDataTypeToString(StreamDataType type) {
+  switch (type) {
+    case StreamDataType::kStreamInfo:
+      return "stream info";
+    case StreamDataType::kMediaSample:
+      return "media sample";
+    case StreamDataType::kTextSample:
+      return "text sample";
+    case StreamDataType::kSegmentInfo:
+      return "segment info";
+    case StreamDataType::kScte35Event:
+      return "scte35 event";
+    case StreamDataType::kCueEvent:
+      return "cue event";
+    case StreamDataType::kUnknown:
+      return "unknown";
+  }
+  return "unknown";
+}
+
 Status MediaHandler::SetHandler(size_t output_stream_index,
                                 std::shared_ptr<MediaHandler> handler) {
   if (output_handlers_.find(output_stream_index) != output_handlers_.end()) {
diff --git a/packager/media/base/media_handler.h b/packager/media/base/media_handler.h
index b0cef596f1..32b623e8bb 100644
--- a/packager/media/base/media_handler.h
+++ b/packager/media/base/media_handler.h
@@ -29,6 +29,8 @@ enum class StreamDataType {
   kCueEvent,
 };
 
+std::string StreamDataTypeToString(StreamDataType type);
+
 // Scte35Event represents cuepoint markers in input streams. It will be used
 // to represent out of band cuepoint markers too.
 struct Scte35Event {
diff --git a/packager/media/base/media_handler_test_base.cc b/packager/media/base/media_handler_test_base.cc
index ae983e8f96..d6ec24127a 100644
--- a/packager/media/base/media_handler_test_base.cc
+++ b/packager/media/base/media_handler_test_base.cc
@@ -62,26 +62,6 @@ const uint8_t kData[]{
 namespace shaka {
 namespace media {
 
-std::string StreamDataTypeToString(StreamDataType stream_data_type) {
-  switch (stream_data_type) {
-    case StreamDataType::kStreamInfo:
-      return "stream info";
-    case StreamDataType::kMediaSample:
-      return "media sample";
-    case StreamDataType::kTextSample:
-      return "text sample";
-    case StreamDataType::kSegmentInfo:
-      return "segment info";
-    case StreamDataType::kScte35Event:
-      return "scte35 event";
-    case StreamDataType::kCueEvent:
-      return "cue event";
-    case StreamDataType::kUnknown:
-      return "unknown";
-  }
-  return "unknown";
-}
-
 std::string BoolToString(bool value) {
   return value ? "true" : "false";
 }
diff --git a/packager/media/base/media_handler_test_base.h b/packager/media/base/media_handler_test_base.h
index d6f7def9e2..aea5898a27 100644
--- a/packager/media/base/media_handler_test_base.h
+++ b/packager/media/base/media_handler_test_base.h
@@ -16,7 +16,6 @@
 namespace shaka {
 namespace media {
 
-std::string StreamDataTypeToString(StreamDataType stream_data_type);
 std::string BoolToString(bool value);
 
 MATCHER_P(IsStreamInfo, stream_index, "") {
diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc
index af37b7ec3d..8515ccae1f 100644
--- a/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc
+++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.cc
@@ -7,112 +7,64 @@
 #include "packager/media/formats/webvtt/webvtt_to_mp4_handler.h"
 
 #include <algorithm>
+#include <map>
 
 #include "packager/media/base/buffer_writer.h"
 #include "packager/media/formats/mp4/box_buffer.h"
 #include "packager/media/formats/mp4/box_definitions.h"
+#include "packager/status_macros.h"
 
 namespace shaka {
 namespace media {
-
-class DisplayAction {
- public:
-  DisplayAction(uint64_t id, int64_t time) : id_(id), time_(time) {}
-  virtual ~DisplayAction() = default;
-
-  uint64_t id() const { return id_; }
-  int64_t time() const { return time_; }
-  virtual void ActOn(std::list<const TextSample*>* display) const = 0;
-
- private:
-  uint64_t id_;
-  int64_t time_;
-};
-
 namespace {
-const uint64_t kTrackId = 0;
+size_t kTrackId = 0;
 
-class AddToDisplayAction : public DisplayAction {
- public:
-  explicit AddToDisplayAction(uint64_t id,
-                              std::shared_ptr<const TextSample>& sample)
-      : DisplayAction(id, sample->start_time()), sample_(sample) {}
-  void ActOn(std::list<const TextSample*>* display) const override {
-    display->push_back(sample_.get());
-  }
+enum class DisplayActionType { ADD, REMOVE };
 
- private:
-  std::shared_ptr<const TextSample> sample_;
+struct DisplayAction {
+  DisplayActionType type;
+  const TextSample* sample;
 };
 
-class RemoveFromDisplayAction : public DisplayAction {
- public:
-  explicit RemoveFromDisplayAction(uint64_t id,
-                                   std::shared_ptr<const TextSample>& sample)
-      : DisplayAction(id, sample->EndTime()), sample_(sample) {}
-  void ActOn(std::list<const TextSample*>* display) const override {
-    display->remove(sample_.get());
+std::multimap<int64_t, DisplayAction> CreateActionList(
+    int64_t segment_start,
+    int64_t segment_end,
+    const std::list<std::shared_ptr<const TextSample>>& samples) {
+  std::multimap<int64_t, DisplayAction> actions;
+
+  for (const auto& sample : samples) {
+    DCHECK(sample);
+
+    // The add action should occur either in this segment or in a previous
+    // segment.
+    DCHECK_LT(sample->start_time(), segment_end);
+    actions.insert(
+        {sample->start_time(), {DisplayActionType::ADD, sample.get()}});
+
+    // If the remove happens in a later segment, then we don't want to include
+    // that action.
+    if (sample->EndTime() < segment_end) {
+      actions.insert(
+          {sample->EndTime(), {DisplayActionType::REMOVE, sample.get()}});
+    }
   }
 
- private:
-  std::shared_ptr<const TextSample> sample_;
-};
-}  // namespace
-
-bool DisplayActionCompare::operator()(
-    const std::shared_ptr<DisplayAction>& left,
-    const std::shared_ptr<DisplayAction>& right) const {
-  return left->time() == right->time() ? left->id() > right->id()
-                                       : left->time() > right->time();
+  return actions;
 }
 
-Status WebVttToMp4Handler::InitializeInternal() {
-  return Status::OK;
-}
-
-Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
-  if (StreamDataType::kStreamInfo == stream_data->stream_data_type) {
-    return DispatchStreamInfo(kTrackId, std::move(stream_data->stream_info));
-  }
-  if (stream_data->stream_data_type == StreamDataType::kTextSample) {
-    std::shared_ptr<const TextSample> sample = stream_data->text_sample;
-
-    std::shared_ptr<DisplayAction> add(
-        new AddToDisplayAction(NextActionId(), sample));
-    std::shared_ptr<DisplayAction> remove(
-        new RemoveFromDisplayAction(NextActionId(), sample));
-
-    actions_.push(add);
-    actions_.push(remove);
-
-    return ProcessUpToTime(add->time());
-  }
-  return Status(error::INTERNAL_ERROR,
-                "Invalid stream data type for this handler");
-}
-
-Status WebVttToMp4Handler::OnFlushRequest(size_t input_stream_index) {
-  const int64_t kEndOfTime = std::numeric_limits<int64_t>::max();
-  ProcessUpToTime(kEndOfTime);
-
-  return FlushDownstream(0);
-}
-
-void WebVttToMp4Handler::WriteCue(const std::string& id,
-                                  const std::string& settings,
-                                  const std::string& payload,
-                                  BufferWriter* out) {
+void WriteSample(const TextSample& sample, BufferWriter* out) {
   mp4::VTTCueBox box;
 
-  if (id.length()) {
-    box.cue_id.cue_id = id;
+  if (sample.id().length()) {
+    box.cue_id.cue_id = sample.id();
   }
-  if (settings.length()) {
-    box.cue_settings.settings = settings;
+  if (sample.settings().length()) {
+    box.cue_settings.settings = sample.settings();
   }
-  if (payload.length()) {
-    box.cue_payload.cue_text = payload;
+  if (sample.payload().length()) {
+    box.cue_payload.cue_text = sample.payload();
   }
+
   // If there is internal timing, i.e. WebVTT cue timestamp, then
   // cue_current_time should be populated
   // "which gives the VTT timestamp associated with the start time of sample."
@@ -121,85 +73,195 @@ void WebVttToMp4Handler::WriteCue(const std::string& id,
   box.Write(out);
 }
 
-Status WebVttToMp4Handler::ProcessUpToTime(int64_t cutoff_time) {
-  // We can only process as far as the last add as no new events will be
-  // added that come before that time.
-  while (actions_.size() && actions_.top()->time() < cutoff_time) {
-    // STAGE 1: Write out the current state
-    // Get the time range for which the current active state is valid.
-    const int64_t previous_change = next_change_;
-    next_change_ = actions_.top()->time();
+void WriteSamples(const std::list<const TextSample*>& samples,
+                  BufferWriter* writer) {
+  DCHECK_GE(samples.size(), 0u);
 
-    if (next_change_ > previous_change) {
-      // Send out the active group. If there is nothing in the active group,
-      // then an empty cue is sent.
-      Status status =
-          active_.size()
-              ? MergeAndSendSamples(active_, previous_change, next_change_)
-              : SendEmptySample(previous_change, next_change_);
-
-      if (!status.ok()) {
-        return status;
-      }
-    } else {
-      // The only time that |previous_change| and |next_change_| should ever
-      // break the rule |next_change_ > previous_change| is at the start where
-      // |previous_change| and |next_change_| are both zero.
-      DCHECK_EQ(previous_change, 0u);
-      DCHECK_EQ(next_change_, 0u);
-    }
-
-    // STAGE 2: Move to the next state.
-    while (actions_.size() && actions_.top()->time() == next_change_) {
-      actions_.top()->ActOn(&active_);
-      actions_.pop();
-    }
+  for (const auto& sample : samples) {
+    WriteSample(*sample, writer);
   }
+}
+
+void WriteEmptySample(BufferWriter* writer) {
+  mp4::VTTEmptyCueBox box;
+  box.Write(writer);
+}
+
+std::shared_ptr<MediaSample> CreateMediaSample(const BufferWriter& buffer,
+                                               int64_t start_time,
+                                               int64_t end_time) {
+  DCHECK_GE(start_time, 0);
+  DCHECK_GT(end_time, start_time);
+
+  const bool kIsKeyFrame = true;
+
+  std::shared_ptr<MediaSample> sample =
+      MediaSample::CopyFrom(buffer.Buffer(), buffer.Size(), kIsKeyFrame);
+  sample->set_pts(start_time);
+  sample->set_dts(start_time);
+  sample->set_duration(end_time - start_time);
+
+  return sample;
+}
+}  // namespace
+
+Status WebVttToMp4Handler::InitializeInternal() {
+  return Status::OK;
+}
+
+Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
+  switch (stream_data->stream_data_type) {
+    case StreamDataType::kStreamInfo:
+      return OnStreamInfo(std::move(stream_data));
+    case StreamDataType::kCueEvent:
+      return OnCueEvent(std::move(stream_data));
+    case StreamDataType::kSegmentInfo:
+      return OnSegmentInfo(std::move(stream_data));
+    case StreamDataType::kTextSample:
+      return OnTextSample(std::move(stream_data));
+    default:
+      return Status(error::INTERNAL_ERROR,
+                    "Invalid stream data type (" +
+                        StreamDataTypeToString(stream_data->stream_data_type) +
+                        ") for this WebVttToMp4 handler");
+  }
+}
+
+Status WebVttToMp4Handler::OnStreamInfo(
+    std::unique_ptr<StreamData> stream_data) {
+  DCHECK(stream_data);
+  DCHECK(stream_data->stream_info);
+
+  return Dispatch(std::move(stream_data));
+}
+
+Status WebVttToMp4Handler::OnCueEvent(std::unique_ptr<StreamData> stream_data) {
+  DCHECK(stream_data);
+  DCHECK(stream_data->cue_event);
+
+  if (current_segment_.size()) {
+    return Status(error::INTERNAL_ERROR,
+                  "Cue Events should come right after segment info.");
+  }
+
+  return Dispatch(std::move(stream_data));
+}
+
+Status WebVttToMp4Handler::OnSegmentInfo(
+    std::unique_ptr<StreamData> stream_data) {
+  DCHECK(stream_data);
+  DCHECK(stream_data->segment_info);
+
+  const auto& segment = stream_data->segment_info;
+
+  int64_t segment_start = segment->start_timestamp;
+  int64_t segment_duration = segment->duration;
+  int64_t segment_end = segment_start + segment_duration;
+
+  RETURN_IF_ERROR(DispatchCurrentSegment(segment_start, segment_end));
+  current_segment_.clear();
+
+  return Dispatch(std::move(stream_data));
+}
+
+Status WebVttToMp4Handler::OnTextSample(
+    std::unique_ptr<StreamData> stream_data) {
+  DCHECK(stream_data);
+  DCHECK(stream_data->text_sample);
+
+  auto& sample = stream_data->text_sample;
+
+  // Ignore empty samples. This will create gaps, but we will handle that
+  // later.
+  if (sample->payload().empty()) {
+    return Status::OK;
+  }
+
+  // Add the new text sample to the cache of samples that belong in the
+  // current segment.
+  current_segment_.push_back(std::move(stream_data->text_sample));
+  return Status::OK;
+}
+
+Status WebVttToMp4Handler::DispatchCurrentSegment(int64_t segment_start,
+                                                  int64_t segment_end) {
+  // Active will hold all the samples that are "on screen" for the current
+  // section of time.
+  std::list<const TextSample*> active;
+
+  // Move through the segment, jumping between each change to the current state.
+  // A change is defined as a group of one or more DisplayActions.
+  int section_start = segment_start;
+
+  // |actions| is a map of [time] -> [action].
+  auto actions = CreateActionList(segment_start, segment_end, current_segment_);
+  auto front = actions.begin();
+
+  // As it is possible to have a segment with no samples, we can't base this
+  // loop on the number of actions. So we need to keep iterating until we
+  // have written enough sections to get to the end of the segment.
+  while (section_start < segment_end) {
+    // Apply all actions that occur at the start of this part of the segment.
+    // Normally we would only want "== section_start" but as it is possible for
+    // samples to span multiple segments, their start time will be before the
+    // segment's start time. So we want to apply them too if they come before
+    // the segment. Thus why we use "<=".
+    while (front != actions.end() && front->first <= section_start) {
+      auto& action = front->second;
+
+      switch (action.type) {
+        case DisplayActionType::ADD: {
+          active.push_back(action.sample);
+          break;
+        }
+        case DisplayActionType::REMOVE: {
+          auto found = std::find(active.begin(), active.end(), action.sample);
+          DCHECK(found != active.end());
+          active.erase(found);
+          break;
+        }
+        default: {
+          NOTREACHED() << "Unsupported DisplayActionType "
+                       << static_cast<int>(action.type);
+          break;
+        }
+      }
+
+      // We have "consumed" the action at the front. We can move on.
+      front++;
+    }
+
+    // The end of the section will either be the start of the next section or
+    // the end of the segment.
+    int64_t section_end = front == actions.end() ? segment_end : front->first;
+    DCHECK_GT(section_end, section_start);
+    DCHECK_LE(section_end, segment_end);
+    RETURN_IF_ERROR(MergeDispatchSamples(section_start, section_end, active));
+
+    section_start = section_end;
+  }
+
+  DCHECK(front == actions.end()) << "We should have processed all actions.";
 
   return Status::OK;
 }
 
-Status WebVttToMp4Handler::MergeAndSendSamples(
-    const std::list<const TextSample*>& samples,
+Status WebVttToMp4Handler::MergeDispatchSamples(
     int64_t start_time,
-    int64_t end_time) {
+    int64_t end_time,
+    const std::list<const TextSample*>& state) {
   DCHECK_GT(end_time, start_time);
 
   box_writer_.Clear();
 
-  for (const TextSample* sample : samples) {
-    DCHECK_LE(sample->start_time(), start_time);
-    DCHECK_GE(sample->EndTime(), end_time);
-    WriteCue(sample->id(), sample->settings(), sample->payload(), &box_writer_);
+  if (state.size()) {
+    WriteSamples(state, &box_writer_);
+  } else {
+    WriteEmptySample(&box_writer_);
   }
 
-  std::shared_ptr<MediaSample> sample =
-      MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true);
-  sample->set_pts(start_time);
-  sample->set_dts(start_time);
-  sample->set_duration(end_time - start_time);
-  return DispatchMediaSample(kTrackId, std::move(sample));
-}
-
-Status WebVttToMp4Handler::SendEmptySample(int64_t start_time,
-                                           int64_t end_time) {
-  DCHECK_GT(end_time, start_time);
-
-  box_writer_.Clear();
-
-  mp4::VTTEmptyCueBox box;
-  box.Write(&box_writer_);
-
-  std::shared_ptr<MediaSample> sample =
-      MediaSample::CopyFrom(box_writer_.Buffer(), box_writer_.Size(), true);
-  sample->set_pts(start_time);
-  sample->set_dts(start_time);
-  sample->set_duration(end_time - start_time);
-  return DispatchMediaSample(kTrackId, std::move(sample));
-}
-
-uint64_t WebVttToMp4Handler::NextActionId() {
-  return next_id_++;
+  return DispatchMediaSample(
+      kTrackId, CreateMediaSample(box_writer_, start_time, end_time));
 }
 }  // namespace media
 }  // namespace shaka
diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler.h b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h
index 25dd838f19..c337c39ccb 100644
--- a/packager/media/formats/webvtt/webvtt_to_mp4_handler.h
+++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler.h
@@ -18,76 +18,35 @@
 namespace shaka {
 namespace media {
 
-class DisplayAction;
-
-class DisplayActionCompare {
- public:
-  bool operator()(const std::shared_ptr<DisplayAction>& left,
-                  const std::shared_ptr<DisplayAction>& right) const;
-};
-
-// Take text samples, convert them to Mp4 boxes, and send them down stream.
-// Virtual methods should only be overridden for testing only.
+// A media handler that should come after the cue aligner and segmenter and
+// should come before the muxer. This handler is to convert text samples
+// to media samples so that they can be sent to a mp4 muxer.
 class WebVttToMp4Handler : public MediaHandler {
  public:
   WebVttToMp4Handler() = default;
-
- protected:
-  // |Process| and |OnFlushRequest| need to be protected so that it can be
-  // called for testing.
-  Status Process(std::unique_ptr<StreamData> stream_data) override;
-  Status OnFlushRequest(size_t input_stream_index) override;
-
-  // This is made protected-virtual so that we can override it for testing.
-  virtual void WriteCue(const std::string& id,
-                        const std::string& settings,
-                        const std::string& payload,
-                        BufferWriter* out);
+  virtual ~WebVttToMp4Handler() override = default;
 
  private:
   WebVttToMp4Handler(const WebVttToMp4Handler&) = delete;
   WebVttToMp4Handler& operator=(const WebVttToMp4Handler&) = delete;
 
   Status InitializeInternal() override;
+  Status Process(std::unique_ptr<StreamData> stream_data) override;
 
-  // Merge and send all samples in the queue downstream while the head of the
-  // queue's time is less than |cutoff|. |cutoff| is needed as we can only
-  // merge and send samples when we are sure no new samples will appear before
-  // the next action.
-  Status ProcessUpToTime(int64_t cutoff_time);
+  Status OnStreamInfo(std::unique_ptr<StreamData> stream_data);
+  Status OnCueEvent(std::unique_ptr<StreamData> stream_data);
+  Status OnSegmentInfo(std::unique_ptr<StreamData> stream_data);
+  Status OnTextSample(std::unique_ptr<StreamData> stream_data);
 
-  // Merge together all TextSamples in |samples| into a single MP4 box and
-  // pass the box downstream.
-  Status MergeAndSendSamples(const std::list<const TextSample*>& samples,
-                             int64_t start_time,
-                             int64_t end_time);
+  Status DispatchCurrentSegment(int64_t segment_start, int64_t segment_end);
+  Status MergeDispatchSamples(int64_t start_in_seconds,
+                              int64_t end_in_seconds,
+                              const std::list<const TextSample*>& state);
 
-  Status SendEmptySample(int64_t start_time, int64_t end_time);
-
-  // Get a new id for the next action.
-  uint64_t NextActionId();
-
-  int64_t next_change_ = 0;
+  std::list<std::shared_ptr<const TextSample>> current_segment_;
 
   // This is the current state of the box we are writing.
   BufferWriter box_writer_;
-
-  // |actions_| is a time sorted list of actions that affect the timeline (e.g.
-  //  adding or removing a cue). |active_| is the list of all cues that are
-  // currently on screen.
-  // When the cue is to be on screen, it is added to |active_|. When it is time
-  // for the cue to come off screen, it is removed from |active_|.
-  // As |actions_| has a shared pointer to the cue, |active_| can use normal
-  // pointers as the pointer will be valid and it makes the |remove| call
-  // easier.
-
-  std::priority_queue<std::shared_ptr<DisplayAction>,
-                      std::vector<std::shared_ptr<DisplayAction>>,
-                      DisplayActionCompare>
-      actions_;
-  std::list<const TextSample*> active_;
-
-  uint64_t next_id_ = 0;
 };
 
 }  // namespace media
diff --git a/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc
index 3ad8b2b8c8..28c23b5de8 100644
--- a/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc
+++ b/packager/media/formats/webvtt/webvtt_to_mp4_handler_unittest.cc
@@ -15,368 +15,535 @@ namespace shaka {
 namespace media {
 namespace {
 const size_t kStreamIndex = 0;
+const bool kSubSegment = true;
 const bool kEncrypted = true;
-
-const size_t kInputCount = 1;
-const size_t kOutputCount = 1;
-const size_t kInputIndex = 0;
-const size_t kOutputIndex = 0;
-
-const char* kId[] = {"cue 1 id", "cue 2 id", "cue 3 id"};
-const char* kPayload[] = {"cue 1 payload", "cue 2 payload", "cue 3 payload"};
-const char* kNoSettings = "";
-
-// These all refer to the samples. To make them easier to use in their
-// correct context, they have purposely short names.
-const size_t kA = 0;
-const size_t kB = 1;
-const size_t kC = 2;
-
 }  // namespace
 
-class TestableWebVttToMp4Handler : public WebVttToMp4Handler {
- public:
-  MOCK_METHOD3(OnWriteCue,
-               void(const std::string& id,
-                    const std::string& settings,
-                    const std::string& payload));
-
- protected:
-  void WriteCue(const std::string& id,
-                const std::string& settings,
-                const std::string& payload,
-                BufferWriter* out) {
-    OnWriteCue(id, settings, payload);
-    // We need to write something out or else media sample will think it is the
-    // end of the stream.
-    out->AppendInt(0);
-  }
-};
-
 class WebVttToMp4HandlerTest : public MediaHandlerTestBase {
  protected:
-  void SetUp() {
-    mp4_handler_ = std::make_shared<TestableWebVttToMp4Handler>();
-    ASSERT_OK(SetUpAndInitializeGraph(mp4_handler_, kInputCount, kOutputCount));
+  Status SetUpTestGraph() {
+    const size_t kOneInput = 1;
+    const size_t kOneOutput = 1;
+
+    auto handler = std::make_shared<WebVttToMp4Handler>();
+    return SetUpAndInitializeGraph(handler, kOneInput, kOneOutput);
   }
 
-  std::shared_ptr<TestableWebVttToMp4Handler> mp4_handler_;
+  FakeInputMediaHandler* In() {
+    const size_t kInputIndex = 0;
+    return Input(kInputIndex);
+  }
+
+  MockOutputMediaHandler* Out() {
+    const size_t kOutputIndex = 0;
+    return Output(kOutputIndex);
+  }
+
+  Status DispatchStream() {
+    auto info = GetTextStreamInfo();
+    return In()->Dispatch(
+        StreamData::FromStreamInfo(kStreamIndex, std::move(info)));
+  }
+
+  Status DispatchText(int64_t start_time, int64_t end_time) {
+    const std::string kId = "id";
+    const std::string kPayload = "payload";
+
+    auto sample = GetTextSample(kId, start_time, end_time, kPayload);
+    return In()->Dispatch(
+        StreamData::FromTextSample(kStreamIndex, std::move(sample)));
+  }
+
+  Status DispatchSegment(int64_t start_time, int64_t end_time) {
+    DCHECK_GT(end_time, start_time);
+
+    const bool kIsSubSegment = true;
+    int64_t duration = end_time - start_time;
+
+    auto segment = GetSegmentInfo(start_time, duration, !kIsSubSegment);
+    return In()->Dispatch(
+        StreamData::FromSegmentInfo(kStreamIndex, std::move(segment)));
+  }
+
+  Status Flush() { return In()->FlushAllDownstreams(); }
 };
 
 // Verify that when the stream starts at a non-zero value, the gap at the
 // start will be filled.
-// |    [----A----]
+//
+// |[-- SEGMENT ------------]|
+// |         [--- SAMPLE ---]|
+// |[- GAP -]                |
+//
 TEST_F(WebVttToMp4HandlerTest, NonZeroStartTime) {
-  const int64_t kGapStart = 0;
-  const int64_t kGapEnd = 100;
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kGapStart = kSegmentStart;
+  const int64_t kGapEnd = kGapStart + 200;
   const int64_t kGapDuration = kGapEnd - kGapStart;
 
-  const char* kSampleId = kId[0];
-  const char* kSamplePayload = kPayload[0];
   const int64_t kSampleStart = kGapEnd;
-  const int64_t kSampleDuration = 500;
-  const int64_t kSampleEnd = kSampleStart + kSampleDuration;
+  const int64_t kSampleEnd = kSegmentEnd;
+  const int64_t kSampleDuration = kSampleEnd - kSampleStart;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Empty Cue to fill gap
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kGapStart, kGapDuration,
-                                        !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
+    // Gap
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGapStart,
+                                                kGapDuration, !kEncrypted)));
     // Sample
-    EXPECT_CALL(*mp4_handler_,
-                OnWriteCue(kSampleId, kNoSettings, kSamplePayload));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kSampleStart,
-                                        kSampleDuration, !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kSampleStart,
+                                                kSampleDuration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  ASSERT_OK(Input(kInputIndex)
-                ->Dispatch(StreamData::FromTextSample(
-                    kStreamIndex, GetTextSample(kSampleId, kSampleStart,
-                                                kSampleEnd, kSamplePayload))));
-
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSampleStart, kSampleEnd));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify the cues are grouped correctly when the cues do not overlap at all.
 // An empty cue should be inserted between the two as there is a gap.
 //
-// [----A---]  [---B---]
+// |[-- SEGMENT --------------------------]|
+// |[-- SAMPLE --]           [-- SAMPLE --]|
+// |              [-- GAP --]              |
+//
 TEST_F(WebVttToMp4HandlerTest, NoOverlap) {
-  const int64_t kDuration = 1000;
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
 
-  const char* kSample1Id = kId[0];
-  const char* kSample1Payload = kPayload[0];
-  const int64_t kSample1Start = 0;
-  const int64_t kSample1End = kSample1Start + kDuration;
+  const int64_t kSample1Start = kSegmentStart;
+  const int64_t kSample1End = kSample1Start + 1000;
+  const int64_t kSample1Duration = kSample1End - kSample1Start;
 
-  // Make sample 2 be just a little after sample 1.
-  const char* kSample2Id = kId[1];
-  const char* kSample2Payload = kPayload[1];
-  const int64_t kSample2Start = kSample1End + 100;
-  const int64_t kSample2End = kSample2Start + kDuration;
+  const int64_t kSample2Start = kSegmentEnd - 1000;
+  const int64_t kSample2End = kSegmentEnd;
+  const int64_t kSample2Duration = kSample2End - kSample2Start;
 
   const int64_t kGapStart = kSample1End;
-  const int64_t kGapDuration = kSample2Start - kSample1End;
+  const int64_t kGapEnd = kSample2Start;
+  const int64_t kGapDuration = kGapEnd - kGapStart;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
+
     // Sample 1
-    EXPECT_CALL(*mp4_handler_,
-                OnWriteCue(kSample1Id, kNoSettings, kSample1Payload));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kSample1Start, kDuration,
-                                        !kEncrypted)));
-
-    // Empty Cue to fill gap
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kGapStart, kGapDuration,
-                                        !kEncrypted)));
-
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kSample1Start,
+                                        kSample1Duration, !kEncrypted)));
+    // Gap
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGapStart,
+                                                kGapDuration, !kEncrypted)));
     // Sample 2
-    EXPECT_CALL(*mp4_handler_,
-                OnWriteCue(kSample2Id, kNoSettings, kSample2Payload));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kSample2Start, kDuration,
-                                        !kEncrypted)));
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kSample2Start,
+                                        kSample2Duration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  ASSERT_OK(
-      Input(kInputIndex)
-          ->Dispatch(StreamData::FromTextSample(
-              kStreamIndex, GetTextSample(kSample1Id, kSample1Start,
-                                          kSample1End, kSample1Payload))));
-
-  ASSERT_OK(
-      Input(kInputIndex)
-          ->Dispatch(StreamData::FromTextSample(
-              kStreamIndex, GetTextSample(kSample2Id, kSample2Start,
-                                          kSample2End, kSample2Payload))));
-
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSample1Start, kSample1End));
+  ASSERT_OK(DispatchText(kSample2Start, kSample2End));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify the cues are grouped correctly when one cue overlaps another cue at
 // one end.
 //
-// [-------A-------]
-//         [-------B------]
+// |[-- SEGMENT -----------------]|
+// |[-- SAMPLE --------]          |
+// |           [------- SAMPLE --]|
 TEST_F(WebVttToMp4HandlerTest, Overlap) {
-  const int64_t kStart[] = {0, 500};
-  const int64_t kEnd[] = {1000, 1500};
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kSample1Start = kSegmentStart;
+  const int64_t kSample1End = kSegmentEnd - 3000;
+
+  const int64_t kSample2Start = kSegmentStart + 3000;
+  const int64_t kSample2End = kSegmentEnd;
+
+  const int64_t kOnlySample1Start = kSample1Start;
+  const int64_t kOnlySample1End = kSample2Start;
+  const int64_t kOnlySample1Duration = kOnlySample1End - kOnlySample1Start;
+
+  const int64_t kSample1AndSample2Start = kSample2Start;
+  const int64_t kSample1AndSample2End = kSample1End;
+  const int64_t kSample1AndSample2Duration =
+      kSample1AndSample2End - kSample1AndSample2Start;
+
+  const int64_t kOnlySample2Start = kSample1End;
+  const int64_t kOnlySample2End = kSample2End;
+  const int64_t kOnlySample2Duration = kOnlySample2End - kOnlySample2Start;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Sample A
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
-                                        kStart[kB] - kStart[kA], !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
-    // Sample A and B
-    for (size_t i = kA; i <= kB; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
-                                        kEnd[kA] - kStart[kB], !kEncrypted)));
+    // Sample 1
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kOnlySample1Start,
+                                        kOnlySample1Duration, !kEncrypted)));
+    // Sample 1 and Sample 2
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(
+                            kStreamIndex, kSample1AndSample2Start,
+                            kSample1AndSample2Duration, !kEncrypted)));
+    // Sample 2
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kOnlySample2Start,
+                                        kOnlySample2Duration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    // Sample B
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kB], kNoSettings, kPayload[kB]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kEnd[kA],
-                                        kEnd[kB] - kEnd[kA], !kEncrypted)));
-
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  for (size_t i = kA; i <= kB; i++) {
-    ASSERT_OK(Input(kInputIndex)
-                  ->Dispatch(StreamData::FromTextSample(
-                      kStreamIndex,
-                      GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i]))));
-  }
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSample1Start, kSample1End));
+  ASSERT_OK(DispatchText(kSample2Start, kSample2End));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify the cues are grouped correctly when one cue starts before and ends
 // after another cue.
 //
-// [-------------A-------------]
-//    [----------B----------]
+// |[-- SEGMENT -----------------]|
+// |[-- SAMPLE ------------------]|
+// |      [------- SAMPLE --]     |
+//
 TEST_F(WebVttToMp4HandlerTest, Contains) {
-  const int64_t kStart[] = {0, 100};
-  const int64_t kEnd[] = {1000, 900};
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kSample1Start = kSegmentStart;
+  const int64_t kSample1End = kSegmentEnd;
+
+  const int64_t kSample2Start = kSegmentStart + 1000;
+  const int64_t kSample2End = kSegmentEnd - 1000;
+
+  const int64_t kBeforeSample2Start = kSample1Start;
+  const int64_t kBeforeSample2End = kSample2Start;
+  const int64_t kBeforeSample2Duration =
+      kBeforeSample2End - kBeforeSample2Start;
+
+  const int64_t kDuringSample2Start = kSample2Start;
+  const int64_t kDuringSample2End = kSample2End;
+  const int64_t kDuringSample2Duration =
+      kDuringSample2End - kDuringSample2Start;
+
+  const int64_t kAfterSample2Start = kSample2End;
+  const int64_t kAfterSample2End = kSample1End;
+  const int64_t kAfterSample2Duration = kAfterSample2End - kAfterSample2Start;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Sample A
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
-                                        kStart[kB] - kStart[kA], !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
-    // Sample A and B
-    for (size_t i = kA; i <= kB; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
-                                        kEnd[kB] - kStart[kB], !kEncrypted)));
+    // Sample 1
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kBeforeSample2Start,
+                                        kBeforeSample2Duration, !kEncrypted)));
+    // Sample 1 and Sample 2
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kDuringSample2Start,
+                                        kDuringSample2Duration, !kEncrypted)));
+    // Sample 1 Again
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kAfterSample2Start,
+                                        kAfterSample2Duration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    // Sample A
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kEnd[kB],
-                                        kEnd[kA] - kEnd[kB], !kEncrypted)));
-
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  for (size_t i = kA; i <= kB; i++) {
-    ASSERT_OK(Input(kInputIndex)
-                  ->Dispatch(StreamData::FromTextSample(
-                      kStreamIndex,
-                      GetTextSample(kId[i], kStart[i], kEnd[i], kPayload[i]))));
-  }
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSample1Start, kSample1End));
+  ASSERT_OK(DispatchText(kSample2Start, kSample2End));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify that when two cues are completely on top of each other, that there
 // is no extra boxes sent out.
 //
-// [----------A----------]
-// [----------B----------]
+// |[-- SEGMENT -----------------]|
+// |[-- SAMPLE ------------------]|
+// |[-- SAMPLE ------------------]|
+//
 TEST_F(WebVttToMp4HandlerTest, ExactOverlap) {
-  const int64_t kStart = 0;
-  const int64_t kDuration = 1000;
-  const int64_t kEnd = kStart + kDuration;
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kSampleStart = kSegmentStart;
+  const int64_t kSampleEnd = kSegmentEnd;
+  const int64_t kSampleDuration = kSampleEnd - kSampleStart;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Sample A and B
-    for (size_t i = kA; i <= kB; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(
-        *Output(kOutputIndex),
-        OnProcess(IsMediaSample(kStreamIndex, kStart, kDuration, !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    // Both Samples
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kSampleStart,
+                                                kSampleDuration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
+
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  for (size_t i = kA; i <= kB; i++) {
-    ASSERT_OK(Input(kInputIndex)
-                  ->Dispatch(StreamData::FromTextSample(
-                      kStreamIndex,
-                      GetTextSample(kId[i], kStart, kEnd, kPayload[i]))));
-  }
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSampleStart, kSampleEnd));
+  ASSERT_OK(DispatchText(kSampleStart, kSampleEnd));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify that when two cues are completely on top of each other, that there
 // is no extra boxes sent out.
 //
-// [----A----]
-// [--------B--------]
-// [------------C------------]
+// |[-- SEGMENT -----------------]|
+// |[-- SAMPLE ------------------]|
+// |[-- SAMPLE ------------]      |
+// |[-- SAMPLE ------]            |
 TEST_F(WebVttToMp4HandlerTest, OverlapStartWithStaggerEnd) {
-  const int64_t kStart = 0;
-  const int64_t kEnd[] = {1000, 2000, 3000};
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kSample1Start = kSegmentStart;
+  const int64_t kSample1End = kSegmentEnd;
+
+  const int64_t kSample2Start = kSegmentStart;
+  const int64_t kSample2End = kSegmentEnd - 1000;
+
+  const int64_t kSample3Start = kSegmentStart;
+  const int64_t kSample3End = kSegmentEnd - 2000;
+
+  const int64_t kThreeSamplesStart = kSegmentStart;
+  const int64_t kThreeSamplesEnd = kSample3End;
+  const int64_t kThreeSamplesDuration = kThreeSamplesEnd - kThreeSamplesStart;
+
+  const int64_t kTwoSamplesStart = kSample3End;
+  const int64_t kTwoSamplesEnd = kSample2End;
+  const int64_t kTwoSamplesDuration = kTwoSamplesEnd - kTwoSamplesStart;
+
+  const int64_t kOneSampleStart = kSample2End;
+  const int64_t kOneSampleEnd = kSample1End;
+  const int64_t kOneSampleDuration = kOneSampleEnd - kOneSampleStart;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Sample A, B, and C
-    for (size_t i = kA; i <= kC; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(
-        *Output(kOutputIndex),
-        OnProcess(IsMediaSample(kStreamIndex, kStart, kEnd[kA], !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
-    // Sample B and C
-    for (size_t i = kB; i <= kC; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kEnd[kA],
-                                        kEnd[kB] - kEnd[kA], !kEncrypted)));
+    // Three Samples
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kThreeSamplesStart,
+                                        kThreeSamplesDuration, !kEncrypted)));
+    // Two Samples
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kTwoSamplesStart,
+                                        kTwoSamplesDuration, !kEncrypted)));
+    // One Sample
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kOneSampleStart,
+                                        kOneSampleDuration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    // Sample C
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kC], kNoSettings, kPayload[kC]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kEnd[kB],
-                                        kEnd[kC] - kEnd[kB], !kEncrypted)));
-
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  for (size_t i = kA; i <= kC; i++) {
-    ASSERT_OK(Input(kInputIndex)
-                  ->Dispatch(StreamData::FromTextSample(
-                      kStreamIndex,
-                      GetTextSample(kId[i], kStart, kEnd[i], kPayload[i]))));
-  }
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSample1Start, kSample1End));
+  ASSERT_OK(DispatchText(kSample2Start, kSample2End));
+  ASSERT_OK(DispatchText(kSample3Start, kSample3End));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
 }
 
 // Verify that when two cues are completely on top of each other, that there
 // is no extra boxes sent out.
 //
-// [------------A------------]
-//         [--------B--------]
-//                 [----C----]
+// |[-- SEGMENT -----------------]|
+// |[-- SAMPLE ------------------]|
+// |      [-- SAMPLE ------------]|
+// |            [-- SAMPLE ------]|
 TEST_F(WebVttToMp4HandlerTest, StaggerStartWithOverlapEnd) {
-  const int64_t kStart[] = {0, 100, 200};
-  const int64_t kEnd = 1000;
+  const int64_t kSegmentStart = 0;
+  const int64_t kSegmentEnd = 10000;
+  const int64_t kSegmentDuration = kSegmentEnd - kSegmentStart;
+
+  const int64_t kSample1Start = kSegmentStart;
+  const int64_t kSample1End = kSegmentEnd;
+
+  const int64_t kSample2Start = kSegmentStart + 1000;
+  const int64_t kSample2End = kSegmentEnd;
+
+  const int64_t kSample3Start = kSegmentStart + 2000;
+  const int64_t kSample3End = kSegmentEnd;
+
+  const int64_t kOneSampleStart = kSample1Start;
+  const int64_t kOneSampleEnd = kSample2Start;
+  const int64_t kOneSampleDuration = kOneSampleEnd - kOneSampleStart;
+
+  const int64_t kTwoSamplesStart = kSample2Start;
+  const int64_t kTwoSamplesEnd = kSample3Start;
+  const int64_t kTwoSamplesDuration = kTwoSamplesEnd - kTwoSamplesStart;
+
+  const int64_t kThreeSamplesStart = kSample3Start;
+  const int64_t kThreeSamplesEnd = kSample3End;
+  const int64_t kThreeSamplesDuration = kThreeSamplesEnd - kThreeSamplesStart;
+
+  ASSERT_OK(SetUpTestGraph());
 
   {
     testing::InSequence s;
 
-    // Sample A
-    EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[kA], kNoSettings, kPayload[kA]));
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kA],
-                                        kStart[kB] - kStart[kA], !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
 
-    // Sample A and B
-    for (size_t i = kA; i <= kB; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kB],
-                                        kStart[kC] - kStart[kB], !kEncrypted)));
+    // One Sample
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kOneSampleStart,
+                                        kOneSampleDuration, !kEncrypted)));
+    // Two Samples
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kTwoSamplesStart,
+                                        kTwoSamplesDuration, !kEncrypted)));
+    // Three Samples
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kThreeSamplesStart,
+                                        kThreeSamplesDuration, !kEncrypted)));
+    // Segment
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegmentStart,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
 
-    // Sample A, B, and C
-    for (size_t i = kA; i <= kC; i++) {
-      EXPECT_CALL(*mp4_handler_, OnWriteCue(kId[i], kNoSettings, kPayload[i]));
-    }
-    EXPECT_CALL(*Output(kOutputIndex),
-                OnProcess(IsMediaSample(kStreamIndex, kStart[kC],
-                                        kEnd - kStart[kC], !kEncrypted)));
-
-    EXPECT_CALL(*Output(kOutputIndex), OnFlush(kStreamIndex));
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
 
-  for (size_t i = kA; i <= kC; i++) {
-    ASSERT_OK(Input(kInputIndex)
-                  ->Dispatch(StreamData::FromTextSample(
-                      kStreamIndex,
-                      GetTextSample(kId[i], kStart[i], kEnd, kPayload[i]))));
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSample1Start, kSample1End));
+  ASSERT_OK(DispatchText(kSample2Start, kSample2End));
+  ASSERT_OK(DispatchText(kSample3Start, kSample3End));
+  ASSERT_OK(DispatchSegment(kSegmentStart, kSegmentEnd));
+  ASSERT_OK(Flush());
+}
+
+// The text chunking handler will repeat text samples that cross over a segment
+// boundary. We need to know that this handler will be okay with those repeated
+// samples.
+//
+// |[------ SEGMENT ------]|[------ SEGMENT ------]|
+// |        [--- SAMPLE ---|--------]              |
+// |- GAP -]               |         [- GAP ------]|
+TEST_F(WebVttToMp4HandlerTest, CrossSegmentSamples) {
+  const int64_t kSegmentDuration = 10000;
+  const int64_t kGapDuration = 1000;
+
+  const int64_t kSegment1Start = 0;
+  const int64_t kSegment1End = 10000;
+
+  const int64_t kSegment2Start = 10000;
+  const int64_t kSegment2End = 20000;
+
+  const int64_t kGap1Start = 0;
+  const int64_t kGap2Start = 19000;
+
+  const int64_t kSampleStart = 1000;
+  const int64_t kSampleEnd = 19000;
+
+  const int64_t kSamplePart1Start = 1000;
+  const int64_t kSamplePart1Duration = 9000;
+
+  const int64_t kSamplePart2Start = 10000;
+  const int64_t kSamplePart2Duration = 9000;
+
+  ASSERT_OK(SetUpTestGraph());
+
+  {
+    testing::InSequence s;
+
+    EXPECT_CALL(*Out(), OnProcess(IsStreamInfo(kStreamIndex)));
+
+    // Gap, Sample, Segment
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGap1Start,
+                                                kGapDuration, !kEncrypted)));
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kSamplePart1Start,
+                                        kSamplePart1Duration, !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegment1Start,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
+
+    // Sample, Gap, Segment
+    EXPECT_CALL(*Out(),
+                OnProcess(IsMediaSample(kStreamIndex, kSamplePart2Start,
+                                        kSamplePart2Duration, !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsMediaSample(kStreamIndex, kGap2Start,
+                                                kGapDuration, !kEncrypted)));
+    EXPECT_CALL(*Out(), OnProcess(IsSegmentInfo(kStreamIndex, kSegment2Start,
+                                                kSegmentDuration, !kSubSegment,
+                                                !kEncrypted)));
+
+    EXPECT_CALL(*Out(), OnFlush(kStreamIndex));
   }
-  ASSERT_OK(Input(kInputIndex)->FlushAllDownstreams());
+
+  ASSERT_OK(DispatchStream());
+  ASSERT_OK(DispatchText(kSampleStart, kSampleEnd));
+  ASSERT_OK(DispatchSegment(kSegment1Start, kSegment1End));
+  ASSERT_OK(DispatchText(kSampleStart, kSampleEnd));
+  ASSERT_OK(DispatchSegment(kSegment2Start, kSegment2End));
+  ASSERT_OK(Flush());
 }
 }  // namespace media
 }  // namespace shaka
diff --git a/packager/packager.cc b/packager/packager.cc
index 27d751b825..6c24cb7ff6 100644
--- a/packager/packager.cc
+++ b/packager/packager.cc
@@ -475,6 +475,16 @@ Status CreateMp4ToMp4TextJob(const StreamDescriptor& stream,
   return Status::OK;
 }
 
+std::unique_ptr<TextChunker> CreateTextChunker(
+    const ChunkingParams& chunking_params) {
+  const float segment_length_in_seconds =
+      chunking_params.segment_duration_in_seconds;
+  const uint64_t segment_length_in_ms =
+      static_cast<uint64_t>(segment_length_in_seconds * 1000);
+
+  return std::unique_ptr<TextChunker>(new TextChunker(segment_length_in_ms));
+}
+
 Status CreateHlsTextJob(const StreamDescriptor& stream,
                         const PackagingParams& packaging_params,
                         std::unique_ptr<MuxerListener> muxer_listener,
@@ -489,11 +499,6 @@ Status CreateHlsTextJob(const StreamDescriptor& stream,
                       ") to HLS with no segment template");
   }
 
-  const float segment_length_in_seconds =
-      packaging_params.chunking_params.segment_duration_in_seconds;
-  const uint64_t segment_length_in_ms =
-      static_cast<uint64_t>(segment_length_in_seconds * 1000);
-
   // Text files are usually small and since the input is one file;
   // there's no way for the player to do ranged requests. So set this
   // value to something reasonable if it is missing.
@@ -513,15 +518,13 @@ Status CreateHlsTextJob(const StreamDescriptor& stream,
   auto cue_aligner = sync_points
                          ? std::make_shared<CueAlignmentHandler>(sync_points)
                          : nullptr;
-  auto chunker = std::make_shared<TextChunker>(segment_length_in_ms);
+  auto chunker = CreateTextChunker(packaging_params.chunking_params);
 
-  RETURN_IF_ERROR(
-      ChainHandlers({parser, std::move(padder), std::move(cue_aligner),
-                     std::move(chunker), std::move(output)}));
+  job_manager->Add("Segmented Text Job", parser);
 
-  job_manager->Add("Segmented Text Job", std::move(parser));
-
-  return Status::OK;
+  return ChainHandlers({std::move(parser), std::move(padder),
+                        std::move(cue_aligner), std::move(chunker),
+                        std::move(output)});
 }
 
 Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
@@ -530,8 +533,6 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
                                 SyncPointQueue* sync_points,
                                 MuxerFactory* muxer_factory,
                                 std::shared_ptr<OriginHandler>* root) {
-  // TODO(kqyang): Support Cue Alignment if |sync_points| is not null.
-
   std::unique_ptr<FileReader> reader;
   RETURN_IF_ERROR(FileReader::Open(stream.input, &reader));
 
@@ -539,18 +540,25 @@ Status CreateWebVttToMp4TextJob(const StreamDescriptor& stream,
   auto parser =
       std::make_shared<WebVttParser>(std::move(reader), stream.language);
   auto padder = std::make_shared<TextPadder>(kNoDuration);
+
   auto text_to_mp4 = std::make_shared<WebVttToMp4Handler>();
-  auto chunker =
-      std::make_shared<ChunkingHandler>(packaging_params.chunking_params);
   auto muxer = muxer_factory->CreateMuxer(GetOutputFormat(stream), stream);
   muxer->SetMuxerListener(std::move(muxer_listener));
 
-  RETURN_IF_ERROR(
-      ChainHandlers({parser, std::move(padder), std::move(text_to_mp4),
-                     std::move(chunker), std::move(muxer)}));
-  *root = std::move(parser);
+  // Optional Cue Alignment Handler
+  std::shared_ptr<MediaHandler> cue_aligner;
+  if (sync_points) {
+    cue_aligner = std::make_shared<CueAlignmentHandler>(sync_points);
+  }
 
-  return Status::OK;
+  std::shared_ptr<MediaHandler> chunker =
+      CreateTextChunker(packaging_params.chunking_params);
+
+  *root = parser;
+
+  return ChainHandlers({std::move(parser), std::move(padder),
+                        std::move(cue_aligner), std::move(chunker),
+                        std::move(text_to_mp4), std::move(muxer)});
 }
 
 Status CreateTextJobs(