Remove Text Time Scale Assumption From Cue Alignment

In the cue aligner, we assumed that all text will be in milliseconds. This was the last place with that assumption. This change removes that assumption and uses the stream info's time scale. Issue #399 Change-Id: Ie21bf27148e020bd85111dcace0bbdff3419c1ac
2018-06-07 13:25:47 -07:00 · 2018-06-07 13:25:47 -07:00 · 440adb3086
parent dbce84f3ea
commit 440adb3086
1 changed files with 28 additions and 30 deletions
--- a/packager/media/chunking/cue_alignment_handler.cc
+++ b/packager/media/chunking/cue_alignment_handler.cc
@ -16,38 +16,36 @@ namespace {
 // configured. This is about 20 seconds of buffer for audio with 48kHz.
 const size_t kMaxBufferSize = 1000;
-double TimeInSeconds(const StreamInfo& info, const StreamData& data) {
+int64_t GetScaledTime(const StreamInfo& info, const StreamData& data) {
-  int64_t time_scale;
+  DCHECK(data.text_sample || data.media_sample);
-  int64_t scaled_time;
+
-  switch (data.stream_data_type) {
+  if (data.text_sample) {
-    case StreamDataType::kMediaSample:
+    return data.text_sample->start_time();
-      time_scale = info.time_scale();
+  }
  if (info.stream_type() == kStreamText) {
    // This class does not support splitting MediaSample at cue points, which is
    // required for text stream. This class expects MediaSample to be converted
    // to TextSample before passing to this class.
    NOTREACHED()
        << "A text streams should use text samples, not media samples.";
  }
  if (info.stream_type() == kStreamAudio) {
-        // Return the start time for video and mid-point for audio, so that for
+    // Return the mid-point for audio because if the portion of the sample
-        // an audio sample, if the portion of the sample after the cue point is
+    // after the cue point is bigger than the portion of the sample before
-        // bigger than the portion of the sample before the cue point, the
+    // the cue point, the sample is placed after the cue.
-        // sample is placed after the cue.
+    return data.media_sample->pts() + data.media_sample->duration() / 2;
        // It does not matter for text samples as text samples will be cut at
        // cue point.
        scaled_time =
            data.media_sample->pts() + data.media_sample->duration() / 2;
      } else {
        scaled_time = data.media_sample->pts();
      }
      break;
    case StreamDataType::kTextSample:
      // Text is always in MS but the stream info time scale is 0.
      time_scale = 1000;
      scaled_time = data.text_sample->start_time();
      break;
    default:
      time_scale = 0;
      scaled_time = 0;
      NOTREACHED() << "TimeInSeconds should only be called on media samples "
                      "and text samples.";
      break;
  }
  DCHECK_EQ(info.stream_type(), kStreamVideo);
  return data.media_sample->pts();
 }
 double TimeInSeconds(const StreamInfo& info, const StreamData& data) {
  const int64_t scaled_time = GetScaledTime(info, data);
  const uint32_t time_scale = info.time_scale();
  return static_cast<double>(scaled_time) / time_scale;
 }