Remove Text Time Scale Assumption From Cue Alignment

In the cue aligner, we assumed that all text will be in milliseconds.
This was the last place with that assumption. This change removes that
assumption and uses the stream info's time scale.

Issue #399

Change-Id: Ie21bf27148e020bd85111dcace0bbdff3419c1ac
This commit is contained in:
Aaron Vaage 2018-06-07 13:25:47 -07:00
parent dbce84f3ea
commit 440adb3086
1 changed files with 28 additions and 30 deletions

View File

@ -16,38 +16,36 @@ namespace {
// configured. This is about 20 seconds of buffer for audio with 48kHz. // configured. This is about 20 seconds of buffer for audio with 48kHz.
const size_t kMaxBufferSize = 1000; const size_t kMaxBufferSize = 1000;
double TimeInSeconds(const StreamInfo& info, const StreamData& data) { int64_t GetScaledTime(const StreamInfo& info, const StreamData& data) {
int64_t time_scale; DCHECK(data.text_sample || data.media_sample);
int64_t scaled_time;
switch (data.stream_data_type) { if (data.text_sample) {
case StreamDataType::kMediaSample: return data.text_sample->start_time();
time_scale = info.time_scale(); }
if (info.stream_type() == kStreamText) {
// This class does not support splitting MediaSample at cue points, which is
// required for text stream. This class expects MediaSample to be converted
// to TextSample before passing to this class.
NOTREACHED()
<< "A text streams should use text samples, not media samples.";
}
if (info.stream_type() == kStreamAudio) { if (info.stream_type() == kStreamAudio) {
// Return the start time for video and mid-point for audio, so that for // Return the mid-point for audio because if the portion of the sample
// an audio sample, if the portion of the sample after the cue point is // after the cue point is bigger than the portion of the sample before
// bigger than the portion of the sample before the cue point, the // the cue point, the sample is placed after the cue.
// sample is placed after the cue. return data.media_sample->pts() + data.media_sample->duration() / 2;
// It does not matter for text samples as text samples will be cut at
// cue point.
scaled_time =
data.media_sample->pts() + data.media_sample->duration() / 2;
} else {
scaled_time = data.media_sample->pts();
} }
break;
case StreamDataType::kTextSample: DCHECK_EQ(info.stream_type(), kStreamVideo);
// Text is always in MS but the stream info time scale is 0. return data.media_sample->pts();
time_scale = 1000;
scaled_time = data.text_sample->start_time();
break;
default:
time_scale = 0;
scaled_time = 0;
NOTREACHED() << "TimeInSeconds should only be called on media samples "
"and text samples.";
break;
} }
double TimeInSeconds(const StreamInfo& info, const StreamData& data) {
const int64_t scaled_time = GetScaledTime(info, data);
const uint32_t time_scale = info.time_scale();
return static_cast<double>(scaled_time) / time_scale; return static_cast<double>(scaled_time) / time_scale;
} }