Adjust timestamps in mp4 if there is an initial composition offset

In some ISO-BMFF files, there is an initial non-zero composition offset,
but there is no EditList present.

This is against ISO-BMFF spec recommentation [1] and we believe in most
cases it is just missing the EditList.

[1] 14496-12:2015 8.6.6.1
It is recommended that such an edit be used to establish a presentation
time of 0 for the first presented sample, when composition offsets are
used.

Issue: #112.
Fixes: b/110782437.

Change-Id: I23d33810ce536b09a1e22a2644828d824c1314f5
This commit is contained in:
KongQun Yang 2018-07-12 17:31:06 -07:00
parent 40ea1286b9
commit 9a55d4033f
9 changed files with 198 additions and 10 deletions

View File

@ -721,6 +721,11 @@ class PackagerFunctionalTest(PackagerAppTest):
self._GetFlags(output_dash=True)) self._GetFlags(output_dash=True))
self._CheckTestResults('video-audio-text') self._CheckTestResults('video-audio-text')
def testVideoNoEditList(self):
stream = self._GetStream('video', test_file='bear-640x360-no_edit_list.mp4')
self.assertPackageSuccess([stream], self._GetFlags(output_dash=True))
self._CheckTestResults('video-no-edit-list')
def testAvcAacTs(self): def testAvcAacTs(self):
# Currently we only support live packaging for ts. # Currently we only support live packaging for ts.
self.assertPackageSuccess( self.assertPackageSuccess(

View File

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/google/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.702699899673462S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="30000/1001" subsegmentAlignment="true" par="16:9">
<Representation id="0" bandwidth="973483" codecs="avc1.64001e" mimeType="video/mp4" sar="1:1">
<BaseURL>bear-640x360-no_edit_list-video.mp4</BaseURL>
<SegmentBase indexRange="859-926" timescale="30000">
<Initialization range="0-858"/>
</SegmentBase>
</Representation>
</AdaptationSet>
</Period>
</MPD>

View File

@ -46,6 +46,7 @@
], ],
'dependencies': [ 'dependencies': [
'../../../third_party/boringssl/boringssl.gyp:boringssl', '../../../third_party/boringssl/boringssl.gyp:boringssl',
'../../../third_party/gflags/gflags.gyp:gflags',
'../../base/media_base.gyp:media_base', '../../base/media_base.gyp:media_base',
'../../codecs/codecs.gyp:codecs', '../../codecs/codecs.gyp:codecs',
'../../event/media_event.gyp:media_event', '../../event/media_event.gyp:media_event',
@ -68,6 +69,7 @@
'../../../file/file.gyp:file', '../../../file/file.gyp:file',
'../../../testing/gtest.gyp:gtest', '../../../testing/gtest.gyp:gtest',
'../../../testing/gmock.gyp:gmock', '../../../testing/gmock.gyp:gmock',
'../../../third_party/gflags/gflags.gyp:gflags',
'../../test/media_test.gyp:media_test_support', '../../test/media_test.gyp:media_test_support',
'mp4', 'mp4',
] ]

View File

@ -4,6 +4,13 @@
#include "packager/media/formats/mp4/track_run_iterator.h" #include "packager/media/formats/mp4/track_run_iterator.h"
#include <gflags/gflags.h>
DEFINE_bool(mp4_reset_initial_composition_offset_to_zero,
true,
"MP4 only. If it is true, reset the initial composition offset to "
"zero, i.e. by assuming that there is a missing EditList.");
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
@ -180,7 +187,7 @@ bool TrackRunIterator::Init() {
// dts is directly adjusted, which then propagates to pts as pts is encoded // dts is directly adjusted, which then propagates to pts as pts is encoded
// as difference (composition offset) to dts in mp4. // as difference (composition offset) to dts in mp4.
int64_t run_start_dts = GetTimestampAdjustment(*moov_, *trak); int64_t run_start_dts = GetTimestampAdjustment(*moov_, *trak, nullptr);
uint32_t num_samples = sample_size.sample_count; uint32_t num_samples = sample_size.sample_count;
uint32_t num_chunks = static_cast<uint32_t>(chunk_offset_vector.size()); uint32_t num_chunks = static_cast<uint32_t>(chunk_offset_vector.size());
@ -349,7 +356,7 @@ bool TrackRunIterator::Init(const MovieFragment& moof) {
// dts is directly adjusted, which then propagates to pts as pts is encoded // dts is directly adjusted, which then propagates to pts as pts is encoded
// as difference (composition offset) to dts in mp4. // as difference (composition offset) to dts in mp4.
run_start_dts += GetTimestampAdjustment(*moov_, *trak); run_start_dts += GetTimestampAdjustment(*moov_, *trak, &traf);
int sample_count_sum = 0; int sample_count_sum = 0;
@ -634,8 +641,14 @@ std::unique_ptr<DecryptConfig> TrackRunIterator::GetDecryptConfig() {
} }
int64_t TrackRunIterator::GetTimestampAdjustment(const Movie& movie, int64_t TrackRunIterator::GetTimestampAdjustment(const Movie& movie,
const Track& track) { const Track& track,
int64_t edit_list_offset = 0; const TrackFragment* traf) {
const uint32_t track_id = track.header.track_id;
const auto iter = timestamp_adjustment_map_.find(track_id);
if (iter != timestamp_adjustment_map_.end())
return iter->second;
int64_t timestamp_adjustment = 0;
const std::vector<EditListEntry>& edits = track.edit.list.edits; const std::vector<EditListEntry>& edits = track.edit.list.edits;
if (!edits.empty()) { if (!edits.empty()) {
// ISO/IEC 14496-12:2015 8.6.6 Edit List Box. // ISO/IEC 14496-12:2015 8.6.6 Edit List Box.
@ -651,13 +664,48 @@ int64_t TrackRunIterator::GetTimestampAdjustment(const Movie& movie,
const int64_t scaled_time = const int64_t scaled_time =
Rescale(edit.segment_duration, movie.header.timescale, Rescale(edit.segment_duration, movie.header.timescale,
track.media.header.timescale); track.media.header.timescale);
edit_list_offset += scaled_time; timestamp_adjustment += scaled_time;
} else { } else {
edit_list_offset -= edit.media_time; timestamp_adjustment -= edit.media_time;
} }
} }
} }
return edit_list_offset;
if (timestamp_adjustment == 0) {
int64_t composition_offset = 0;
if (traf && !traf->runs.empty()) {
const auto& cts_offsets =
traf->runs.front().sample_composition_time_offsets;
if (!cts_offsets.empty())
composition_offset = cts_offsets.front();
} else {
CompositionOffsetIterator composition_offset_iter(
track.media.information.sample_table.composition_time_to_sample);
if (composition_offset_iter.IsValid())
composition_offset = composition_offset_iter.sample_offset();
}
int64_t decode_time = 0;
if (traf)
decode_time = traf->decode_time.decode_time;
if (composition_offset != 0 && decode_time == 0) {
LOG(WARNING) << "Seeing non-zero composition offset "
<< composition_offset
<< ". An EditList is probably missing.";
if (FLAGS_mp4_reset_initial_composition_offset_to_zero) {
LOG(WARNING)
<< "Adjusting timestamps by " << -composition_offset
<< ". Please file a bug to "
"https://github.com/google/shaka-packager/issues if you "
"do not think it is right or if you are seeing any problems.";
timestamp_adjustment = -composition_offset;
}
}
}
timestamp_adjustment_map_.insert(
std::make_pair(track_id, timestamp_adjustment));
return timestamp_adjustment;
} }
} // namespace mp4 } // namespace mp4

View File

@ -5,6 +5,7 @@
#ifndef PACKAGER_MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_ #ifndef PACKAGER_MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_
#define PACKAGER_MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_ #define PACKAGER_MEDIA_FORMATS_MP4_TRACK_RUN_ITERATOR_H_
#include <map>
#include <memory> #include <memory>
#include <vector> #include <vector>
@ -103,7 +104,9 @@ class TrackRunIterator {
private: private:
void ResetRun(); void ResetRun();
const TrackEncryption& track_encryption() const; const TrackEncryption& track_encryption() const;
int64_t GetTimestampAdjustment(const Movie& movie, const Track& track); int64_t GetTimestampAdjustment(const Movie& movie,
const Track& track,
const TrackFragment* traf);
const Movie* moov_; const Movie* moov_;
@ -118,6 +121,9 @@ class TrackRunIterator {
int64_t sample_dts_; int64_t sample_dts_;
int64_t sample_offset_; int64_t sample_offset_;
// TrackId => adjustment map.
std::map<uint32_t, int64_t> timestamp_adjustment_map_;
DISALLOW_COPY_AND_ASSIGN(TrackRunIterator); DISALLOW_COPY_AND_ASSIGN(TrackRunIterator);
}; };

View File

@ -2,12 +2,16 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "packager/media/formats/mp4/track_run_iterator.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <stdint.h> #include <stdint.h>
#include <memory> #include <memory>
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/media/formats/mp4/box_definitions.h" #include "packager/media/formats/mp4/box_definitions.h"
#include "packager/media/formats/mp4/track_run_iterator.h"
DECLARE_bool(mp4_reset_initial_composition_offset_to_zero);
namespace { namespace {
@ -15,6 +19,7 @@ namespace {
// less the value of the last element. // less the value of the last element.
const int kSumAscending1 = 45; const int kSumAscending1 = 45;
const int kMovieScale = 1000;
const int kAudioScale = 48000; const int kAudioScale = 48000;
const int kVideoScale = 25; const int kVideoScale = 25;
@ -108,7 +113,7 @@ class TrackRunIteratorTest : public testing::Test {
std::unique_ptr<TrackRunIterator> iter_; std::unique_ptr<TrackRunIterator> iter_;
void CreateMovie() { void CreateMovie() {
moov_.header.timescale = 1000; moov_.header.timescale = kMovieScale;
moov_.tracks.resize(3); moov_.tracks.resize(3);
moov_.extends.tracks.resize(2); moov_.extends.tracks.resize(2);
moov_.tracks[0].header.track_id = 1; moov_.tracks[0].header.track_id = 1;
@ -384,7 +389,49 @@ TEST_F(TrackRunIteratorTest, FirstSampleFlagTest) {
EXPECT_FALSE(iter_->is_keyframe()); EXPECT_FALSE(iter_->is_keyframe());
} }
TEST_F(TrackRunIteratorTest, EmptyEditTest) {
iter_.reset(new TrackRunIterator(&moov_));
EditListEntry entry;
entry.segment_duration = 2 * kMovieScale;
entry.media_time = -1;
entry.media_rate_integer = 1;
entry.media_rate_fraction = 0;
moov_.tracks[1].edit.list.edits.push_back(entry);
MovieFragment moof = CreateFragment();
moof.tracks[1].decode_time.decode_time = 0;
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_EQ(iter_->dts(), 2 * kVideoScale);
EXPECT_EQ(iter_->cts(), 2 * kVideoScale);
}
TEST_F(TrackRunIteratorTest, NormalEditTest) {
iter_.reset(new TrackRunIterator(&moov_));
const int kMediaTime = 5;
EditListEntry entry;
entry.segment_duration = 0;
entry.media_time = kMediaTime;
entry.media_rate_integer = 1;
entry.media_rate_fraction = 0;
moov_.tracks[1].edit.list.edits.push_back(entry);
MovieFragment moof = CreateFragment();
moof.tracks[1].decode_time.decode_time = 0;
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_EQ(iter_->dts(), -kMediaTime);
EXPECT_EQ(iter_->cts(), -kMediaTime);
}
TEST_F(TrackRunIteratorTest, ReorderingTest) { TEST_F(TrackRunIteratorTest, ReorderingTest) {
FLAGS_mp4_reset_initial_composition_offset_to_zero = false;
// Test frame reordering. The frames have the following // Test frame reordering. The frames have the following
// decode timestamps: // decode timestamps:
// //
@ -424,6 +471,71 @@ TEST_F(TrackRunIteratorTest, ReorderingTest) {
EXPECT_EQ(iter_->duration(), 3); EXPECT_EQ(iter_->duration(), 3);
} }
TEST_F(TrackRunIteratorTest, ReorderingTest_WithEditList) {
FLAGS_mp4_reset_initial_composition_offset_to_zero = false;
// See the test above for background.
iter_.reset(new TrackRunIterator(&moov_));
MovieFragment moof = CreateFragment();
std::vector<int64_t>& cts_offsets =
moof.tracks[1].runs[0].sample_composition_time_offsets;
cts_offsets.resize(10);
cts_offsets[0] = 2;
cts_offsets[1] = 5;
cts_offsets[2] = 0;
moof.tracks[1].decode_time.decode_time = 0;
EditListEntry entry;
entry.segment_duration = 0;
entry.media_time = 2;
entry.media_rate_integer = 1;
entry.media_rate_fraction = 0;
moov_.tracks[1].edit.list.edits.push_back(entry);
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_EQ(iter_->dts(), -2);
EXPECT_EQ(iter_->cts(), 0);
EXPECT_EQ(iter_->duration(), 1);
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), -1);
EXPECT_EQ(iter_->cts(), 4);
EXPECT_EQ(iter_->duration(), 2);
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), 1);
EXPECT_EQ(iter_->cts(), 1);
EXPECT_EQ(iter_->duration(), 3);
}
TEST_F(TrackRunIteratorTest, ReorderingTest_ResetInitialCompositionOffset) {
FLAGS_mp4_reset_initial_composition_offset_to_zero = true;
// See the test above for background.
iter_.reset(new TrackRunIterator(&moov_));
MovieFragment moof = CreateFragment();
std::vector<int64_t>& cts_offsets =
moof.tracks[1].runs[0].sample_composition_time_offsets;
cts_offsets.resize(10);
cts_offsets[0] = 2;
cts_offsets[1] = 5;
cts_offsets[2] = 0;
moof.tracks[1].decode_time.decode_time = 0;
ASSERT_TRUE(iter_->Init(moof));
iter_->AdvanceRun();
EXPECT_EQ(iter_->dts(), -2);
EXPECT_EQ(iter_->cts(), 0);
EXPECT_EQ(iter_->duration(), 1);
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), -1);
EXPECT_EQ(iter_->cts(), 4);
EXPECT_EQ(iter_->duration(), 2);
iter_->AdvanceSample();
EXPECT_EQ(iter_->dts(), 1);
EXPECT_EQ(iter_->cts(), 1);
EXPECT_EQ(iter_->duration(), 3);
}
TEST_F(TrackRunIteratorTest, IgnoreUnknownAuxInfoTest) { TEST_F(TrackRunIteratorTest, IgnoreUnknownAuxInfoTest) {
iter_.reset(new TrackRunIterator(&moov_)); iter_.reset(new TrackRunIterator(&moov_));
MovieFragment moof = CreateFragment(); MovieFragment moof = CreateFragment();

View File

@ -45,6 +45,7 @@ bear-flac.mp4 - Unfragmented audio-only 44.1kHz FLAC in MP4 file, created using:
ffmpeg -i bear-1280x720.mp4 -map 0:0 -acodec flac -strict -2 bear-flac.mp4 ffmpeg -i bear-1280x720.mp4 -map 0:0 -acodec flac -strict -2 bear-flac.mp4
Note, "-strict -2" was required because current ffmpeg libavformat version Note, "-strict -2" was required because current ffmpeg libavformat version
57.75.100 indicates that flac in MP4 support is experimental. 57.75.100 indicates that flac in MP4 support is experimental.
bear-640x360-no_edit_list.mp4 - Same content, but with EditLists removed.
// Non square pixels. // Non square pixels.
bear-640x360-non_square_pixel-with_pasp.mp4 - A non-square pixel version of the video track of bear-640x360.mp4 with PixelAspectRatio box. bear-640x360-non_square_pixel-with_pasp.mp4 - A non-square pixel version of the video track of bear-640x360.mp4 with PixelAspectRatio box.

Binary file not shown.