Sets the duration of the last frame in WebM Cluster.

This also changes the way frames are written in the WebM muxer.  Now,
frames are stored and written on the next call to AddSample.  So each
call to AddSample will write the previous frame.  This is needed to
determine whether the given frame is the last one in the cluster.

Closes #70

Change-Id: Ic69ebad3c4729cdaa2017c9c7f497048501ac907
This commit is contained in:
Jacob Trimble 2016-01-21 15:58:13 -08:00
parent 810f5b3ab5
commit bb3918e62b
13 changed files with 146 additions and 69 deletions

View File

@ -3,10 +3,10 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7679998874664307S">
<Period id="0">
<AdaptationSet id="0" contentType="audio">
<Representation id="0" bandwidth="69313" codecs="vorbis" mimeType="audio/webm" audioSamplingRate="44100">
<Representation id="0" bandwidth="69362" codecs="vorbis" mimeType="audio/webm" audioSamplingRate="44100">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
<BaseURL>output_audio.webm</BaseURL>
<SegmentBase indexRange="23933-23982" timescale="1000000">
<SegmentBase indexRange="23950-23999" timescale="1000000">
<Initialization range="0-4158"/>
</SegmentBase>
</Representation>

View File

@ -3,9 +3,9 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7360000610351562S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
<Representation id="0" bandwidth="203226" codecs="vp9" mimeType="video/webm" sar="427:320">
<Representation id="0" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
<BaseURL>output_video.webm</BaseURL>
<SegmentBase indexRange="69455-69503" timescale="1000000">
<SegmentBase indexRange="69485-69533" timescale="1000000">
<Initialization range="0-286"/>
</SegmentBase>
</Representation>

View File

@ -3,12 +3,12 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7360000610351562S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="1000000/33000" par="16:9">
<Representation id="0" bandwidth="336974" codecs="vp8" mimeType="video/webm" sar="1:1">
<Representation id="0" bandwidth="337062" codecs="vp8" mimeType="video/webm" sar="1:1">
<ContentProtection schemeIdUri="urn:uuid:edef8ba9-79d6-4ace-a3c8-27dcd51d21ed" cenc:default_KID="31323334-3536-3738-3930-313233343536">
<cenc:pssh>AAAAMHBzc2gAAAAA7e+LqXnWSs6jyCfc1R0h7QAAABAxMjM0NTY3ODkwMTIzNDU2</cenc:pssh>
</ContentProtection>
<BaseURL>output_video.webm</BaseURL>
<SegmentBase indexRange="115195-115245" timescale="1000000">
<SegmentBase indexRange="115225-115275" timescale="1000000">
<Initialization range="0-339"/>
</SegmentBase>
</Representation>

View File

@ -3,9 +3,9 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" minBufferTime="PT2S" type="static" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT2.7360000610351562S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="640" height="360" frameRate="1000000/33000" par="16:9">
<Representation id="0" bandwidth="335366" codecs="vp8" mimeType="video/webm" sar="1:1">
<Representation id="0" bandwidth="335454" codecs="vp8" mimeType="video/webm" sar="1:1">
<BaseURL>output_video.webm</BaseURL>
<SegmentBase indexRange="114646-114695" timescale="1000000">
<SegmentBase indexRange="114676-114725" timescale="1000000">
<Initialization range="0-288"/>
</SegmentBase>
</Representation>

View File

@ -37,8 +37,8 @@ const uint8_t kBasicSupportData[] = {
0x42, 0x87, 0x81, 0x02,
// DocTypeReadVersion: 2
0x42, 0x85, 0x81, 0x02,
// ID: Segment, Payload Size: 400
0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x90,
// ID: Segment, Payload Size: 406
0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x96,
// ID: SeekHead, Payload Size: 30
0x11, 0x4d, 0x9b, 0x74, 0x9e,
// ID: Seek, Payload Size: 12
@ -52,7 +52,7 @@ const uint8_t kBasicSupportData[] = {
// SeekID: binary(4) (Cues)
0x53, 0xab, 0x84, 0x1c, 0x53, 0xbb, 0x6b,
// SeekPosition: 429
0x53, 0xac, 0x82, 0x01, 0xad,
0x53, 0xac, 0x82, 0x01, 0xb3,
// ID: Void, Payload Size: 52
0xec, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -121,8 +121,8 @@ const uint8_t kBasicSupportData[] = {
0x54, 0xb0, 0x81, 0x64,
// DisplayHeight: 100
0x54, 0xba, 0x81, 0x64,
// ID: Cluster, Payload Size: 95
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5f,
// ID: Cluster, Payload Size: 101
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65,
// Timecode: 0
0xe7, 0x81, 0x00,
// ID: SimpleBlock, Payload Size: 10
@ -155,14 +155,18 @@ const uint8_t kBasicSupportData[] = {
0x01, 0x23, 0x45, 0x67, 0x89, 0x01, 0x23, 0x47,
// Frame Data:
0x0d, 0x8e, 0xae, 0xbe, 0xd0,
// ID: SimpleBlock, Payload Size: 18
0xa3, 0x92, 0x81, 0x0f, 0xa0, 0x80,
// ID: BlockGroup, Payload Size: 24
0xa0, 0x98,
// ID: Block, Payload Size: 18
0xa1, 0x92, 0x81, 0x0f, 0xa0, 0x00,
// Signal Byte: Encrypted
0x01,
// IV:
0x01, 0x23, 0x45, 0x67, 0x89, 0x01, 0x23, 0x48,
// Frame Data:
0xa5, 0x97, 0xf8, 0x9e, 0x87,
// BlockDuration: 1000
0x9b, 0x82, 0x03, 0xe8,
// ID: Cues, Payload Size: 14
0x1c, 0x53, 0xbb, 0x6b, 0x8e,
// ID: CuePoint, Payload Size: 12

View File

@ -85,8 +85,8 @@ const uint8_t kBasicSupportDataInit[] = {
0x54, 0xba, 0x81, 0x64
};
const uint8_t kBasicSupportDataSegment[] = {
// ID: Cluster, Payload Size: 58
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a,
// ID: Cluster, Payload Size: 64
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
// Timecode: 0
0xe7, 0x81, 0x00,
// ID: SimpleBlock, Payload Size: 9
@ -97,8 +97,12 @@ const uint8_t kBasicSupportDataSegment[] = {
0xa3, 0x89, 0x81, 0x07, 0xd0, 0x80, 0xde, 0xad, 0xbe, 0xef, 0x00,
// ID: SimpleBlock, Payload Size: 9
0xa3, 0x89, 0x81, 0x0b, 0xb8, 0x80, 0xde, 0xad, 0xbe, 0xef, 0x00,
// ID: SimpleBlock, Payload Size: 9
0xa3, 0x89, 0x81, 0x0f, 0xa0, 0x80, 0xde, 0xad, 0xbe, 0xef, 0x00
// ID: BlockGroup, Payload Size: 15
0xa0, 0x8f,
// ID: Block, Payload Size: 9
0xa1, 0x89, 0x81, 0x0f, 0xa0, 0x00, 0xde, 0xad, 0xbe, 0xef, 0x00,
// BlockDuration: 1000
0x9b, 0x82, 0x03, 0xe8
};
} // namespace

View File

@ -27,13 +27,14 @@ int64_t kSecondsToNs = 1000000000L;
} // namespace
Segmenter::Segmenter(const MuxerOptions& options)
: options_(options),
: reference_frame_timestamp_(0),
options_(options),
info_(NULL),
muxer_listener_(NULL),
progress_listener_(NULL),
progress_target_(0),
accumulated_progress_(0),
total_duration_(0),
first_timestamp_(0),
sample_duration_(0),
segment_payload_pos_(0),
cluster_length_sec_(0),
@ -66,7 +67,8 @@ Status Segmenter::Initialize(scoped_ptr<MkvWriter> writer,
segment_info_.set_writing_app(version_string.c_str());
if (options().single_segment) {
// Set an initial duration so the duration element is written; will be
// overwritten at the end.
// overwritten at the end. This works because this is a float and floats
// are always the same size.
segment_info_.set_duration(1);
}
@ -97,12 +99,19 @@ Status Segmenter::Initialize(scoped_ptr<MkvWriter> writer,
}
Status Segmenter::Finalize() {
segment_info_.set_duration(FromBMFFTimescale(total_duration_));
Status status = WriteFrame(true /* write_duration */);
if (!status.ok())
return status;
uint64_t duration =
prev_sample_->pts() - first_timestamp_ + prev_sample_->duration();
segment_info_.set_duration(FromBMFFTimescale(duration));
return DoFinalize();
}
Status Segmenter::AddSample(scoped_refptr<MediaSample> sample) {
if (sample_duration_ == 0) {
first_timestamp_ = sample->pts();
sample_duration_ = sample->duration();
if (muxer_listener_)
muxer_listener_->OnSampleDurationReady(sample_duration_);
@ -110,29 +119,45 @@ Status Segmenter::AddSample(scoped_refptr<MediaSample> sample) {
UpdateProgress(sample->duration());
// Create a new cluster if needed.
// This writes frames in a delay. Meaning that the previous frame is written
// on this call to AddSample. The current frame is stored until the next
// call. This is done to determine which frame is the last in a Cluster.
// This first block determines if this is a new Cluster and writes the
// previous frame first before creating the new Cluster.
Status status;
bool wrote_frame = false;
if (!cluster_) {
status = NewSegment(sample->pts());
// First frame, so no previous frame to write.
wrote_frame = true;
} else if (segment_length_sec_ >= options_.segment_duration) {
if (sample->is_key_frame() || !options_.segment_sap_aligned) {
status = NewSegment(sample->pts());
status = WriteFrame(true /* write_duration */);
status.Update(NewSegment(sample->pts()));
segment_length_sec_ = 0;
cluster_length_sec_ = 0;
wrote_frame = true;
}
} else if (cluster_length_sec_ >= options_.fragment_duration) {
if (sample->is_key_frame() || !options_.fragment_sap_aligned) {
status = NewSubsegment(sample->pts());
status = WriteFrame(true /* write_duration */);
status.Update(NewSubsegment(sample->pts()));
cluster_length_sec_ = 0;
wrote_frame = true;
}
}
if (!wrote_frame) {
status = WriteFrame(false /* write_duration */);
}
if (!status.ok())
return status;
// Encrypt the frame.
if (encryptor_) {
const bool encrypt_frame =
static_cast<double>(total_duration_) / info_->time_scale() >=
static_cast<double>(sample->pts() - first_timestamp_) /
info_->time_scale() >=
clear_lead_;
status = encryptor_->EncryptFrame(sample, encrypt_frame);
if (!status.ok()) {
@ -141,36 +166,16 @@ Status Segmenter::AddSample(scoped_refptr<MediaSample> sample) {
}
}
const int64_t time_ns =
sample->pts() * kSecondsToNs / info_->time_scale();
bool addframe_result;
if (sample->side_data_size() > 0) {
uint64_t block_add_id;
// First 8 bytes of side_data is the BlockAddID element's value, which is
// done to mimic ffmpeg behavior. See webm_cluster_parser.cc for details.
CHECK_GT(sample->side_data_size(), sizeof(block_add_id));
memcpy(&block_add_id, sample->side_data(), sizeof(block_add_id));
addframe_result = cluster_->AddFrameWithAdditional(
sample->data(), sample->data_size(),
sample->side_data() + sizeof(block_add_id),
sample->side_data_size() - sizeof(block_add_id), block_add_id,
track_id_, time_ns, sample->is_key_frame());
} else {
addframe_result =
cluster_->AddFrame(sample->data(), sample->data_size(), track_id_,
time_ns, sample->is_key_frame());
}
if (!addframe_result) {
LOG(ERROR) << "Error adding sample to segment.";
return Status(error::FILE_FAILURE, "Error adding sample to segment.");
}
// Add the sample to the durations even though we have not written the frame
// yet. This is needed to make sure we split Clusters at the correct point.
// These are only used in this method.
const double duration_sec =
static_cast<double>(sample->duration()) / info_->time_scale();
cluster_length_sec_ += duration_sec;
segment_length_sec_ += duration_sec;
total_duration_ += sample->duration();
prev_sample_ = sample;
return Status::OK;
}
@ -345,6 +350,60 @@ Status Segmenter::InitializeEncryptor(KeySource* key_source,
}
}
Status Segmenter::WriteFrame(bool write_duration) {
// Create a frame manually so we can create non-SimpleBlock frames. This
// is required to allow the frame duration to be added. If the duration
// is not set, then a SimpleBlock will still be written.
mkvmuxer::Frame frame;
if (!frame.Init(prev_sample_->data(), prev_sample_->data_size())) {
return Status(error::MUXER_FAILURE,
"Error adding sample to segment: Frame::Init failed");
}
if (write_duration) {
const uint64_t duration_ns =
prev_sample_->duration() * kSecondsToNs / info_->time_scale();
frame.set_duration(duration_ns);
}
frame.set_is_key(prev_sample_->is_key_frame());
frame.set_timestamp(prev_sample_->pts() * kSecondsToNs / info_->time_scale());
frame.set_track_number(track_id_);
if (prev_sample_->side_data_size() > 0) {
uint64_t block_add_id;
// First 8 bytes of side_data is the BlockAddID element's value, which is
// done to mimic ffmpeg behavior. See webm_cluster_parser.cc for details.
CHECK_GT(prev_sample_->side_data_size(), sizeof(block_add_id));
memcpy(&block_add_id, prev_sample_->side_data(), sizeof(block_add_id));
if (!frame.AddAdditionalData(
prev_sample_->side_data() + sizeof(block_add_id),
prev_sample_->side_data_size() - sizeof(block_add_id),
block_add_id)) {
return Status(
error::MUXER_FAILURE,
"Error adding sample to segment: Frame::AddAditionalData Failed");
}
}
if (!prev_sample_->is_key_frame() && !frame.CanBeSimpleBlock()) {
const int64_t timestamp_ns =
reference_frame_timestamp_ * kSecondsToNs / info_->time_scale();
frame.set_reference_block_timestamp(timestamp_ns);
}
if (!cluster_->AddFrame(&frame)) {
return Status(error::MUXER_FAILURE,
"Error adding sample to segment: Cluster::AddFrame failed");
}
// A reference frame is needed for non-keyframes. Having a reference to the
// previous block is good enough.
// See libwebm Segment::AddGenericFrame
reference_frame_timestamp_ = prev_sample_->pts();
return Status::OK;
}
} // namespace webm
} // namespace media
} // namespace edash_packager

View File

@ -114,6 +114,9 @@ class Segmenter {
Status CreateAudioTrack(AudioStreamInfo* info);
Status InitializeEncryptor(KeySource* key_source, uint32_t max_sd_pixels);
// Writes the previous frame to the file.
Status WriteFrame(bool write_duration);
// This is called when there needs to be a new subsegment. This does nothing
// in single-segment mode. In multi-segment mode this creates a new Cluster
// element.
@ -122,9 +125,12 @@ class Segmenter {
// mode, this creates a new Cluster element. In multi-segment mode this
// creates a new output file.
virtual Status NewSegment(uint64_t start_timescale) = 0;
// This is called when a segment ends. This is called right before a call
// to NewSegment and at the start of Finalize.
Status FinalizeSegment(uint64_t end_timescale);
// Store the previous sample so we know which one is the last frame.
scoped_refptr<MediaSample> prev_sample_;
// The reference frame timestamp; used to populate the ReferenceBlock element
// when writing non-keyframe BlockGroups.
uint64_t reference_frame_timestamp_;
const MuxerOptions& options_;
scoped_ptr<Encryptor> encryptor_;
@ -141,8 +147,8 @@ class Segmenter {
ProgressListener* progress_listener_;
uint64_t progress_target_;
uint64_t accumulated_progress_;
uint64_t total_duration_;
uint64_t sample_duration_;
uint64_t first_timestamp_;
int64_t sample_duration_;
// The position (in bytes) of the start of the Segment payload in the init
// file. This is also the size of the header before the SeekHead.
uint64_t segment_payload_pos_;

View File

@ -33,8 +33,8 @@ const uint8_t kBasicSupportData[] = {
0x42, 0x87, 0x81, 0x02,
// DocTypeReadVersion: 2
0x42, 0x85, 0x81, 0x02,
// ID: Segment, Payload Size: 337
0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x51,
// ID: Segment, Payload Size: 343
0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x57,
// ID: SeekHead, Payload Size: 30
0x11, 0x4d, 0x9b, 0x74, 0x9e,
// ID: Seek, Payload Size: 12
@ -48,7 +48,7 @@ const uint8_t kBasicSupportData[] = {
// SeekID: binary(4) (Cues)
0x53, 0xab, 0x84, 0x1c, 0x53, 0xbb, 0x6b,
// SeekPosition: 367
0x53, 0xac, 0x82, 0x01, 0x6f,
0x53, 0xac, 0x82, 0x01, 0x75,
// ID: Void, Payload Size: 52
0xec, 0xb4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@ -95,8 +95,8 @@ const uint8_t kBasicSupportData[] = {
0x54, 0xb0, 0x81, 0x64,
// DisplayHeight: 100
0x54, 0xba, 0x81, 0x64,
// ID: Cluster, Payload Size: 79
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4f,
// ID: Cluster, Payload Size: 85
0x1f, 0x43, 0xb6, 0x75, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55,
// Timecode: 0
0xe7, 0x81, 0x00,
// ID: SimpleBlock, Payload Size: 9
@ -117,8 +117,12 @@ const uint8_t kBasicSupportData[] = {
0xee, 0x85, 0x9a, 0x78, 0x56, 0x34, 0x12,
// ID: BlockAdditional, Payload Size: 5
0xa5, 0x85, 0x73, 0x69, 0x64, 0x65, 0x00,
// ID: SimpleBlock, Payload Size: 9
0xa3, 0x89, 0x81, 0x0f, 0xa0, 0x80, 0xde, 0xad, 0xbe, 0xef, 0x00,
// ID: BlockGroup, Payload Size: 15
0xa0, 0x8f,
// ID: Block, Payload Size: 9
0xa1, 0x89, 0x81, 0x0f, 0xa0, 0x00, 0xde, 0xad, 0xbe, 0xef, 0x00,
// BlockDuration: 1000
0x9b, 0x82, 0x03, 0xe8,
// ID: Cues, Payload Size: 13
0x1c, 0x53, 0xbb, 0x6b, 0x8d,
// ID: CuePoint, Payload Size: 11