diff --git a/packager/media/chunking/cue_alignment_handler.cc b/packager/media/chunking/cue_alignment_handler.cc index 6588852e12..1d6e8eda89 100644 --- a/packager/media/chunking/cue_alignment_handler.cc +++ b/packager/media/chunking/cue_alignment_handler.cc @@ -6,6 +6,8 @@ #include "packager/media/chunking/cue_alignment_handler.h" +#include + #include "packager/status_macros.h" namespace shaka { @@ -49,6 +51,15 @@ double TimeInSeconds(const StreamInfo& info, const StreamData& data) { return static_cast(scaled_time) / time_scale; } +double TextEndTimeInSeconds(const StreamInfo& info, const StreamData& data) { + DCHECK(data.text_sample); + + const int64_t scaled_time = data.text_sample->EndTime(); + const uint32_t time_scale = info.time_scale(); + + return static_cast(scaled_time) / time_scale; +} + Status GetNextCue(double hint, SyncPointQueue* sync_points, std::shared_ptr* out_cue) { @@ -132,12 +143,23 @@ Status CueAlignmentHandler::OnFlushRequest(size_t stream_index) { RETURN_IF_ERROR(RunThroughSamples(&stream)); DCHECK_EQ(stream.samples.size(), 0u); - // It is possible for there to be cues that come after all the samples. Make - // sure to send them out too. - while (stream.cues.size()) { - RETURN_IF_ERROR(Dispatch(std::move(stream.cues.front()))); - stream.cues.pop_front(); + // Ignore extra cues at the end, except for text, as they will result in + // empty DASH Representations, which is not spec compliant. + // For text, if the cue is before the max end time, it will still be + // dispatched as the text samples intercepted by the cue can be split into + // two at the cue point. + for (auto& cue : stream.cues) { + // |max_text_sample_end_time_seconds| is always 0 for non-text samples. + if (cue->cue_event->time_in_seconds < + stream.max_text_sample_end_time_seconds) { + RETURN_IF_ERROR(Dispatch(std::move(cue))); + } else { + VLOG(1) << "Ignore extra cue in stream " << cue->stream_index + << " with time " << cue->cue_event->time_in_seconds + << "s in the end."; + } } + stream.cues.clear(); } return FlushAllDownstreams(); @@ -218,9 +240,16 @@ Status CueAlignmentHandler::OnSample(std::unique_ptr sample) { // us until there is a sync point. const size_t stream_index = sample->stream_index; + + if (sample->text_sample) { + StreamState& stream = stream_states_[stream_index]; + stream.max_text_sample_end_time_seconds = + std::max(stream.max_text_sample_end_time_seconds, + TextEndTimeInSeconds(*stream.info, *sample)); + } + const StreamType stream_type = stream_states_[stream_index].info->stream_type(); - const bool is_video = stream_type == kStreamVideo; return is_video ? OnVideoSample(std::move(sample)) diff --git a/packager/media/chunking/cue_alignment_handler.h b/packager/media/chunking/cue_alignment_handler.h index a0329c3f70..9ae340aff0 100644 --- a/packager/media/chunking/cue_alignment_handler.h +++ b/packager/media/chunking/cue_alignment_handler.h @@ -39,6 +39,8 @@ class CueAlignmentHandler : public MediaHandler { std::list> samples; // If set, the stream is pending to be flushed. bool to_be_flushed = false; + // Only set for text stream. + double max_text_sample_end_time_seconds = 0; // A list of cues that the stream should inject between media samples. When // there are no cues, the stream should run up to the hint. diff --git a/packager/media/chunking/cue_alignment_handler_unittest.cc b/packager/media/chunking/cue_alignment_handler_unittest.cc index 5daac8280d..47365d8393 100644 --- a/packager/media/chunking/cue_alignment_handler_unittest.cc +++ b/packager/media/chunking/cue_alignment_handler_unittest.cc @@ -474,11 +474,15 @@ TEST_F(CueAlignmentHandlerTest, TextInputWithCueAfterLastStart) { const int64_t kSample2Start = kSample1End; const int64_t kSample2End = kSample2Start + kSampleDuration; - // Put the cue between the start and end of the last sample. - const double kCueTimeInSeconds = - static_cast(kSample2Start + kSample2End) / kMsTimeScale; + // Put the cue 1 between the start and end of the last sample. + const double kCue1TimeInSeconds = + static_cast(kSample2Start + kSample2End) / 2 / kMsTimeScale; - auto sync_points = CreateSyncPoints({kCueTimeInSeconds}); + // Put the cue 2 after the end of the last sample. + const double kCue2TimeInSeconds = + static_cast(kSample2End) / kMsTimeScale; + + auto sync_points = CreateSyncPoints({kCue1TimeInSeconds, kCue2TimeInSeconds}); auto handler = std::make_shared(sync_points.get()); ASSERT_OK(SetUpAndInitializeGraph(handler, kOneInput, kOneOutput)); @@ -496,8 +500,10 @@ TEST_F(CueAlignmentHandlerTest, TextInputWithCueAfterLastStart) { EXPECT_CALL( *Output(kTextStream), OnProcess(IsTextSample(_, _, kSample2Start, kSample2End, _, _))); + // Cue before the sample end is processed. EXPECT_CALL(*Output(kTextStream), - OnProcess(IsCueEvent(_, kCueTimeInSeconds))); + OnProcess(IsCueEvent(_, kCue1TimeInSeconds))); + // Cue after the samples is ignored. EXPECT_CALL(*Output(kTextStream), OnFlush(_)); }