shaka-packager/media/mp4/track_run_iterator_unittest.cc

438 lines
17 KiB
C++

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/basictypes.h"
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "media/mp4/box_definitions.h"
#include "media/mp4/rcheck.h"
#include "media/mp4/track_run_iterator.h"
#include "testing/gtest/include/gtest/gtest.h"
// The sum of the elements in a vector initialized with SumAscending,
// less the value of the last element.
static const int kSumAscending1 = 45;
static const int kAudioScale = 48000;
static const int kVideoScale = 25;
static const uint32 kSampleIsDifferenceSampleFlagMask = 0x10000;
static const uint8 kAuxInfo[] = {
0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x31,
0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x32,
0x00, 0x02,
0x00, 0x01, 0x00, 0x00, 0x00, 0x02,
0x00, 0x03, 0x00, 0x00, 0x00, 0x04
};
static const char kIv1[] = {
0x41, 0x54, 0x65, 0x73, 0x74, 0x49, 0x76, 0x31,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
static const uint8 kKeyId[] = {
0x41, 0x47, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x54,
0x65, 0x73, 0x74, 0x4b, 0x65, 0x79, 0x49, 0x44
};
namespace media {
namespace mp4 {
class TrackRunIteratorTest : public testing::Test {
public:
TrackRunIteratorTest() {
CreateMovie();
}
protected:
Movie moov_;
LogCB log_cb_;
scoped_ptr<TrackRunIterator> iter_;
void CreateMovie() {
moov_.header.timescale = 1000;
moov_.tracks.resize(3);
moov_.extends.tracks.resize(2);
moov_.tracks[0].header.track_id = 1;
moov_.tracks[0].media.header.timescale = kAudioScale;
SampleDescription& desc1 =
moov_.tracks[0].media.information.sample_table.description;
AudioSampleEntry aud_desc;
aud_desc.format = FOURCC_MP4A;
aud_desc.sinf.info.track_encryption.is_encrypted = false;
desc1.type = kAudio;
desc1.audio_entries.push_back(aud_desc);
moov_.extends.tracks[0].track_id = 1;
moov_.extends.tracks[0].default_sample_description_index = 1;
moov_.tracks[1].header.track_id = 2;
moov_.tracks[1].media.header.timescale = kVideoScale;
SampleDescription& desc2 =
moov_.tracks[1].media.information.sample_table.description;
VideoSampleEntry vid_desc;
vid_desc.format = FOURCC_AVC1;
vid_desc.sinf.info.track_encryption.is_encrypted = false;
desc2.type = kVideo;
desc2.video_entries.push_back(vid_desc);
moov_.extends.tracks[1].track_id = 2;
moov_.extends.tracks[1].default_sample_description_index = 1;
moov_.tracks[2].header.track_id = 3;
moov_.tracks[2].media.information.sample_table.description.type = kHint;
}
MovieFragment CreateFragment() {
MovieFragment moof;
moof.tracks.resize(2);
moof.tracks[0].decode_time.decode_time = 0;
moof.tracks[0].header.track_id = 1;
moof.tracks[0].header.has_default_sample_flags = true;
moof.tracks[0].header.default_sample_duration = 1024;
moof.tracks[0].header.default_sample_size = 4;
moof.tracks[0].runs.resize(2);
moof.tracks[0].runs[0].sample_count = 10;
moof.tracks[0].runs[0].data_offset = 100;
SetAscending(&moof.tracks[0].runs[0].sample_sizes);
moof.tracks[0].runs[1].sample_count = 10;
moof.tracks[0].runs[1].data_offset = 10000;
moof.tracks[1].header.track_id = 2;
moof.tracks[1].header.has_default_sample_flags = false;
moof.tracks[1].decode_time.decode_time = 10;
moof.tracks[1].runs.resize(1);
moof.tracks[1].runs[0].sample_count = 10;
moof.tracks[1].runs[0].data_offset = 200;
SetAscending(&moof.tracks[1].runs[0].sample_sizes);
SetAscending(&moof.tracks[1].runs[0].sample_durations);
moof.tracks[1].runs[0].sample_flags.resize(10);
for (size_t i = 1; i < moof.tracks[1].runs[0].sample_flags.size(); i++) {
moof.tracks[1].runs[0].sample_flags[i] =
kSampleIsDifferenceSampleFlagMask;
}
return moof;
}
// Update the first sample description of a Track to indicate encryption
void AddEncryption(Track* track) {
SampleDescription* stsd =
&track->media.information.sample_table.description;
ProtectionSchemeInfo* sinf;
if (!stsd->video_entries.empty()) {
sinf = &stsd->video_entries[0].sinf;
} else {
sinf = &stsd->audio_entries[0].sinf;
}
sinf->type.type = FOURCC_CENC;
sinf->info.track_encryption.is_encrypted = true;
sinf->info.track_encryption.default_iv_size = 8;
sinf->info.track_encryption.default_kid.insert(
sinf->info.track_encryption.default_kid.begin(),
kKeyId, kKeyId + arraysize(kKeyId));
}
// Add aux info covering the first track run to a TrackFragment, and update
// the run to ensure it matches length and subsample information.
void AddAuxInfoHeaders(int offset, TrackFragment* frag) {
frag->auxiliary_offset.offsets.push_back(offset);
frag->auxiliary_size.sample_count = 2;
frag->auxiliary_size.sample_info_sizes.push_back(8);
frag->auxiliary_size.sample_info_sizes.push_back(22);
frag->runs[0].sample_count = 2;
frag->runs[0].sample_sizes[1] = 10;
}
void SetAscending(std::vector<uint32>* vec) {
vec->resize(10);
for (size_t i = 0; i < vec->size(); i++)
(*vec)[i] = i+1;
}
};
TEST_F(TrackRunIteratorTest, NoRunsTest) {
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
ASSERT_TRUE(iter_->Init(MovieFragment()));
EXPECT_FALSE(iter_->IsRunValid());
EXPECT_FALSE(iter_->IsSampleValid());
}
TEST_F(TrackRunIteratorTest, BasicOperationTest) {
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
// Test that runs are sorted correctly, and that properties of the initial
// sample of the first run are correct
ASSERT_TRUE(iter_->Init(moof));
EXPECT_TRUE(iter_->IsRunValid());
EXPECT_FALSE(iter_->is_encrypted());
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->sample_offset(), 100);
EXPECT_EQ(iter_->sample_size(), 1);
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(0, kAudioScale));
EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(0, kAudioScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1024, kAudioScale));
EXPECT_TRUE(iter_->is_keyframe());
// Advance to the last sample in the current run, and test its properties
for (int i = 0; i < 9; i++) iter_->AdvanceSample();
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->sample_offset(), 100 + kSumAscending1);
EXPECT_EQ(iter_->sample_size(), 10);
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1024 * 9, kAudioScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1024, kAudioScale));
EXPECT_TRUE(iter_->is_keyframe());
// Test end-of-run
iter_->AdvanceSample();
EXPECT_FALSE(iter_->IsSampleValid());
// Test last sample of next run
iter_->AdvanceRun();
EXPECT_TRUE(iter_->is_keyframe());
for (int i = 0; i < 9; i++) iter_->AdvanceSample();
EXPECT_EQ(iter_->track_id(), 2u);
EXPECT_EQ(iter_->sample_offset(), 200 + kSumAscending1);
EXPECT_EQ(iter_->sample_size(), 10);
int64 base_dts = kSumAscending1 + moof.tracks[1].decode_time.decode_time;
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(base_dts, kVideoScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(10, kVideoScale));
EXPECT_FALSE(iter_->is_keyframe());
// Test final run
iter_->AdvanceRun();
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1024 * 10, kAudioScale));
iter_->AdvanceSample();
EXPECT_EQ(moof.tracks[0].runs[1].data_offset +
moof.tracks[0].header.default_sample_size,
iter_->sample_offset());
iter_->AdvanceRun();
EXPECT_FALSE(iter_->IsRunValid());
}
TEST_F(TrackRunIteratorTest, TrackExtendsDefaultsTest) {
moov_.extends.tracks[0].default_sample_duration = 50;
moov_.extends.tracks[0].default_sample_size = 3;
moov_.extends.tracks[0].default_sample_flags =
kSampleIsDifferenceSampleFlagMask;
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
moof.tracks[0].header.has_default_sample_flags = false;
moof.tracks[0].header.default_sample_size = 0;
moof.tracks[0].header.default_sample_duration = 0;
moof.tracks[0].runs[0].sample_sizes.clear();
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceSample();
EXPECT_FALSE(iter_->is_keyframe());
EXPECT_EQ(iter_->sample_size(), 3);
EXPECT_EQ(iter_->sample_offset(), moof.tracks[0].runs[0].data_offset + 3);
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(50, kAudioScale));
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(50, kAudioScale));
}
TEST_F(TrackRunIteratorTest, FirstSampleFlagTest) {
// Ensure that keyframes are flagged correctly in the face of BMFF boxes which
// explicitly specify the flags for the first sample in a run and rely on
// defaults for all subsequent samples
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
moof.tracks[1].header.has_default_sample_flags = true;
moof.tracks[1].header.default_sample_flags =
kSampleIsDifferenceSampleFlagMask;
moof.tracks[1].runs[0].sample_flags.resize(1);
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_TRUE(iter_->is_keyframe());
iter_->AdvanceSample();
EXPECT_FALSE(iter_->is_keyframe());
}
TEST_F(TrackRunIteratorTest, ReorderingTest) {
// Test frame reordering and edit list support. The frames have the following
// decode timestamps:
//
// 0ms 40ms 120ms 240ms
// | 0 | 1 - | 2 - - |
//
// ...and these composition timestamps, after edit list adjustment:
//
// 0ms 40ms 160ms 240ms
// | 0 | 2 - - | 1 - |
// Create an edit list with one entry, with an initial start time of 80ms
// (that is, 2 / kVideoTimescale) and a duration of zero (which is treated as
// infinite according to 14496-12:2012). This will cause the first 80ms of the
// media timeline - which will be empty, due to CTS biasing - to be discarded.
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
EditListEntry entry;
entry.segment_duration = 0;
entry.media_time = 2;
entry.media_rate_integer = 1;
entry.media_rate_fraction = 0;
moov_.tracks[1].edit.list.edits.push_back(entry);
// Add CTS offsets. Without bias, the CTS offsets for the first three frames
// would simply be [0, 3, -2]. Since CTS offsets should be non-negative for
// maximum compatibility, these values are biased up to [2, 5, 0], and the
// extra 80ms is removed via the edit list.
MovieFragment moof = CreateFragment();
std::vector<int32>& cts_offsets =
moof.tracks[1].runs[0].sample_composition_time_offsets;
cts_offsets.resize(10);
cts_offsets[0] = 2;
cts_offsets[1] = 5;
cts_offsets[2] = 0;
moof.tracks[1].decode_time.decode_time = 0;
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(0, kVideoScale));
EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(0, kVideoScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(1, kVideoScale));
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(1, kVideoScale));
EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(4, kVideoScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(2, kVideoScale));
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), TimeDeltaFromRational(3, kVideoScale));
EXPECT_EQ(iter_->cts(), TimeDeltaFromRational(1, kVideoScale));
EXPECT_EQ(iter_->duration(), TimeDeltaFromRational(3, kVideoScale));
}
TEST_F(TrackRunIteratorTest, IgnoreUnknownAuxInfoTest) {
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
moof.tracks[1].auxiliary_offset.offsets.push_back(50);
moof.tracks[1].auxiliary_size.default_sample_info_size = 2;
moof.tracks[1].auxiliary_size.sample_count = 2;
moof.tracks[1].runs[0].sample_count = 2;
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_FALSE(iter_->AuxInfoNeedsToBeCached());
}
TEST_F(TrackRunIteratorTest, DecryptConfigTest) {
AddEncryption(&moov_.tracks[1]);
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
AddAuxInfoHeaders(50, &moof.tracks[1]);
ASSERT_TRUE(iter_->Init(moof));
// The run for track 2 will be first, since its aux info offset is the first
// element in the file.
EXPECT_EQ(iter_->track_id(), 2u);
EXPECT_TRUE(iter_->is_encrypted());
EXPECT_TRUE(iter_->AuxInfoNeedsToBeCached());
EXPECT_EQ(static_cast<uint32>(iter_->aux_info_size()), arraysize(kAuxInfo));
EXPECT_EQ(iter_->aux_info_offset(), 50);
EXPECT_EQ(iter_->GetMaxClearOffset(), 50);
EXPECT_FALSE(iter_->CacheAuxInfo(NULL, 0));
EXPECT_FALSE(iter_->CacheAuxInfo(kAuxInfo, 3));
EXPECT_TRUE(iter_->AuxInfoNeedsToBeCached());
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
EXPECT_FALSE(iter_->AuxInfoNeedsToBeCached());
EXPECT_EQ(iter_->sample_offset(), 200);
EXPECT_EQ(iter_->GetMaxClearOffset(), moof.tracks[0].runs[0].data_offset);
scoped_ptr<DecryptConfig> config = iter_->GetDecryptConfig();
ASSERT_EQ(arraysize(kKeyId), config->key_id().size());
EXPECT_TRUE(!memcmp(kKeyId, config->key_id().data(),
config->key_id().size()));
ASSERT_EQ(arraysize(kIv1), config->iv().size());
EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size()));
EXPECT_TRUE(config->subsamples().empty());
iter_->AdvanceSample();
config = iter_->GetDecryptConfig();
EXPECT_EQ(config->subsamples().size(), 2u);
EXPECT_EQ(config->subsamples()[0].clear_bytes, 1u);
EXPECT_EQ(config->subsamples()[1].cypher_bytes, 4u);
}
// It is legal for aux info blocks to be shared among multiple formats.
TEST_F(TrackRunIteratorTest, SharedAuxInfoTest) {
AddEncryption(&moov_.tracks[0]);
AddEncryption(&moov_.tracks[1]);
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
moof.tracks[0].runs.resize(1);
AddAuxInfoHeaders(50, &moof.tracks[0]);
AddAuxInfoHeaders(50, &moof.tracks[1]);
moof.tracks[0].auxiliary_size.default_sample_info_size = 8;
ASSERT_TRUE(iter_->Init(moof));
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->aux_info_offset(), 50);
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
scoped_ptr<DecryptConfig> config = iter_->GetDecryptConfig();
ASSERT_EQ(arraysize(kIv1), config->iv().size());
EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size()));
iter_->AdvanceSample();
EXPECT_EQ(iter_->GetMaxClearOffset(), 50);
iter_->AdvanceRun();
EXPECT_EQ(iter_->GetMaxClearOffset(), 50);
EXPECT_EQ(iter_->aux_info_offset(), 50);
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
EXPECT_EQ(iter_->GetMaxClearOffset(), 200);
ASSERT_EQ(arraysize(kIv1), config->iv().size());
EXPECT_TRUE(!memcmp(kIv1, config->iv().data(), config->iv().size()));
iter_->AdvanceSample();
EXPECT_EQ(iter_->GetMaxClearOffset(), 201);
}
// Sensible files are expected to place auxiliary information for a run
// immediately before the main data for that run. Alternative schemes are
// possible, however, including the somewhat reasonable behavior of placing all
// aux info at the head of the 'mdat' box together, and the completely
// unreasonable behavior demonstrated here:
// byte 50: track 2, run 1 aux info
// byte 100: track 1, run 1 data
// byte 200: track 2, run 1 data
// byte 201: track 1, run 2 aux info (*inside* track 2, run 1 data)
// byte 10000: track 1, run 2 data
// byte 20000: track 1, run 1 aux info
TEST_F(TrackRunIteratorTest, UnexpectedOrderingTest) {
AddEncryption(&moov_.tracks[0]);
AddEncryption(&moov_.tracks[1]);
iter_.reset(new TrackRunIterator(&moov_, log_cb_));
MovieFragment moof = CreateFragment();
AddAuxInfoHeaders(20000, &moof.tracks[0]);
moof.tracks[0].auxiliary_offset.offsets.push_back(201);
moof.tracks[0].auxiliary_size.sample_count += 2;
moof.tracks[0].auxiliary_size.default_sample_info_size = 8;
moof.tracks[0].runs[1].sample_count = 2;
AddAuxInfoHeaders(50, &moof.tracks[1]);
moof.tracks[1].runs[0].sample_sizes[0] = 5;
ASSERT_TRUE(iter_->Init(moof));
EXPECT_EQ(iter_->track_id(), 2u);
EXPECT_EQ(iter_->aux_info_offset(), 50);
EXPECT_EQ(iter_->sample_offset(), 200);
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
EXPECT_EQ(iter_->GetMaxClearOffset(), 100);
iter_->AdvanceRun();
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->aux_info_offset(), 20000);
EXPECT_EQ(iter_->sample_offset(), 100);
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
EXPECT_EQ(iter_->GetMaxClearOffset(), 100);
iter_->AdvanceSample();
EXPECT_EQ(iter_->GetMaxClearOffset(), 101);
iter_->AdvanceRun();
EXPECT_EQ(iter_->track_id(), 1u);
EXPECT_EQ(iter_->aux_info_offset(), 201);
EXPECT_EQ(iter_->sample_offset(), 10000);
EXPECT_EQ(iter_->GetMaxClearOffset(), 201);
EXPECT_TRUE(iter_->CacheAuxInfo(kAuxInfo, arraysize(kAuxInfo)));
EXPECT_EQ(iter_->GetMaxClearOffset(), 10000);
}
} // namespace mp4
} // namespace media