// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "packager/media/formats/webm/webm_cluster_parser.h" #include #include "packager/base/logging.h" #include "packager/base/sys_byteorder.h" #include "packager/media/base/decrypt_config.h" #include "packager/media/base/timestamp_constants.h" #include "packager/media/filters/webvtt_util.h" #include "packager/media/formats/webm/webm_constants.h" #include "packager/media/formats/webm/webm_crypto_helpers.h" #include "packager/media/formats/webm/webm_webvtt_parser.h" // Logs only while |count| < |max|, increments |count| for each log, and warns // in the log if |count| has just reached |max|. #define LIMITED_LOG(level, count, max) \ LOG_IF(level, (count) < (max)) \ << (((count) + 1 == (max)) \ ? "(Log limit reached. Further similar entries " \ "may be suppressed): " \ : "") #define LIMITED_DLOG(level, count, max) \ DLOG_IF(level, (count) < (max)) \ << (((count) + 1 == (max)) \ ? "(Log limit reached. Further similar entries " \ "may be suppressed): " \ : "") namespace edash_packager { namespace media { const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; enum { // Limits the number of LOG() calls in the path of reading encoded // duration to avoid spamming for corrupted data. kMaxDurationErrorLogs = 10, // Limits the number of LOG() calls warning the user that buffer // durations have been estimated. kMaxDurationEstimateLogs = 10, }; WebMClusterParser::WebMClusterParser( int64_t timecode_scale, int audio_track_num, base::TimeDelta audio_default_duration, int video_track_num, base::TimeDelta video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, const AudioCodec audio_codec) : timecode_multiplier_(timecode_scale / 1000.0), ignored_tracks_(ignored_tracks), audio_encryption_key_id_(audio_encryption_key_id), video_encryption_key_id_(video_encryption_key_id), audio_codec_(audio_codec), parser_(kWebMIdCluster, this), cluster_start_time_(kNoTimestamp()), audio_(audio_track_num, false, audio_default_duration), video_(video_track_num, true, video_default_duration), ready_buffer_upper_bound_(kNoDecodeTimestamp()) { for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin(); it != text_tracks.end(); ++it) { text_track_map_.insert( std::make_pair(it->first, Track(it->first, false, kNoTimestamp()))); } } WebMClusterParser::~WebMClusterParser() {} void WebMClusterParser::Reset() { last_block_timecode_ = -1; cluster_timecode_ = -1; cluster_start_time_ = kNoTimestamp(); cluster_ended_ = false; parser_.Reset(); audio_.Reset(); video_.Reset(); ResetTextTracks(); ready_buffer_upper_bound_ = kNoDecodeTimestamp(); } int WebMClusterParser::Parse(const uint8_t* buf, int size) { audio_.ClearReadyBuffers(); video_.ClearReadyBuffers(); ClearTextTrackReadyBuffers(); ready_buffer_upper_bound_ = kNoDecodeTimestamp(); int result = parser_.Parse(buf, size); if (result < 0) { cluster_ended_ = false; return result; } cluster_ended_ = parser_.IsParsingComplete(); if (cluster_ended_) { // If there were no buffers in this cluster, set the cluster start time to // be the |cluster_timecode_|. if (cluster_start_time_ == kNoTimestamp()) { // If the cluster did not even have a |cluster_timecode_|, signal parse // error. if (cluster_timecode_ < 0) return -1; cluster_start_time_ = base::TimeDelta::FromMicroseconds( cluster_timecode_ * timecode_multiplier_); } // Reset the parser if we're done parsing so that // it is ready to accept another cluster on the next // call. parser_.Reset(); last_block_timecode_ = -1; cluster_timecode_ = -1; } return result; } const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() { if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) UpdateReadyBuffers(); return audio_.ready_buffers(); } const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() { if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) UpdateReadyBuffers(); return video_.ready_buffers(); } const WebMClusterParser::TextBufferQueueMap& WebMClusterParser::GetTextBuffers() { if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) UpdateReadyBuffers(); // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in // the output only for non-empty ready_buffer() queues in |text_track_map_|. text_buffers_map_.clear(); for (TextTrackMap::const_iterator itr = text_track_map_.begin(); itr != text_track_map_.end(); ++itr) { const BufferQueue& text_buffers = itr->second.ready_buffers(); if (!text_buffers.empty()) text_buffers_map_.insert(std::make_pair(itr->first, text_buffers)); } return text_buffers_map_; } base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration( const uint8_t* data, int size) { // Duration is currently read assuming the *entire* stream is unencrypted. // The special "Signal Byte" prepended to Blocks in encrypted streams is // assumed to not be present. // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams // to return duration for any unencrypted blocks. if (audio_codec_ == kCodecOpus) { return ReadOpusDuration(data, size); } // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See // motivations in http://crbug.com/396634. return kNoTimestamp(); } base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) { // Masks and constants for Opus packets. See // https://tools.ietf.org/html/rfc6716#page-14 static const uint8_t kTocConfigMask = 0xf8; static const uint8_t kTocFrameCountCodeMask = 0x03; static const uint8_t kFrameCountMask = 0x3f; static const base::TimeDelta kPacketDurationMax = base::TimeDelta::FromMilliseconds(120); if (size < 1) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Invalid zero-byte Opus packet; demuxed block duration may be " "imprecise."; return kNoTimestamp(); } // Frame count type described by last 2 bits of Opus TOC byte. int frame_count_type = data[0] & kTocFrameCountCodeMask; int frame_count = 0; switch (frame_count_type) { case 0: frame_count = 1; break; case 1: case 2: frame_count = 2; break; case 3: // Type 3 indicates an arbitrary frame count described in the next byte. if (size < 2) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Second byte missing from 'Code 3' Opus packet; demuxed block " "duration may be imprecise."; return kNoTimestamp(); } frame_count = data[1] & kFrameCountMask; if (frame_count == 0) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Illegal 'Code 3' Opus packet with frame count zero; demuxed " "block duration may be imprecise."; return kNoTimestamp(); } break; default: LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Unexpected Opus frame count type: " << frame_count_type << "; " << "demuxed block duration may be imprecise."; return kNoTimestamp(); } int opusConfig = (data[0] & kTocConfigMask) >> 3; CHECK_GE(opusConfig, 0); CHECK_LT(opusConfig, static_cast(arraysize(kOpusFrameDurationsMu))); DCHECK_GT(frame_count, 0); base::TimeDelta duration = base::TimeDelta::FromMicroseconds( kOpusFrameDurationsMu[opusConfig] * frame_count); if (duration > kPacketDurationMax) { // Intentionally allowing packet to pass through for now. Decoder should // either handle or fail gracefully. LOG as breadcrumbs in case // things go sideways. LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Warning, demuxed Opus packet with encoded duration: " << duration.InMilliseconds() << "ms. Should be no greater than " << kPacketDurationMax.InMilliseconds() << "ms."; } return duration; } WebMParserClient* WebMClusterParser::OnListStart(int id) { if (id == kWebMIdCluster) { cluster_timecode_ = -1; cluster_start_time_ = kNoTimestamp(); } else if (id == kWebMIdBlockGroup) { block_data_.reset(); block_data_size_ = -1; block_duration_ = -1; discard_padding_ = -1; discard_padding_set_ = false; } else if (id == kWebMIdBlockAdditions) { block_add_id_ = -1; block_additional_data_.reset(); block_additional_data_size_ = 0; } return this; } bool WebMClusterParser::OnListEnd(int id) { if (id != kWebMIdBlockGroup) return true; // Make sure the BlockGroup actually had a Block. if (block_data_size_ == -1) { LOG(ERROR) << "Block missing from BlockGroup."; return false; } bool result = ParseBlock(false, block_data_.get(), block_data_size_, block_additional_data_.get(), block_additional_data_size_, block_duration_, discard_padding_set_ ? discard_padding_ : 0); block_data_.reset(); block_data_size_ = -1; block_duration_ = -1; block_add_id_ = -1; block_additional_data_.reset(); block_additional_data_size_ = 0; discard_padding_ = -1; discard_padding_set_ = false; return result; } bool WebMClusterParser::OnUInt(int id, int64_t val) { int64_t* dst; switch (id) { case kWebMIdTimecode: dst = &cluster_timecode_; break; case kWebMIdBlockDuration: dst = &block_duration_; break; case kWebMIdBlockAddID: dst = &block_add_id_; break; default: return true; } if (*dst != -1) return false; *dst = val; return true; } bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8_t* buf, int size, const uint8_t* additional, int additional_size, int duration, int64_t discard_padding) { if (size < 4) return false; // Return an error if the trackNum > 127. We just aren't // going to support large track numbers right now. if (!(buf[0] & 0x80)) { LOG(ERROR) << "TrackNumber over 127 not supported"; return false; } int track_num = buf[0] & 0x7f; int timecode = buf[1] << 8 | buf[2]; int flags = buf[3] & 0xff; int lacing = (flags >> 1) & 0x3; if (lacing) { LOG(ERROR) << "Lacing " << lacing << " is not supported yet."; return false; } // Sign extend negative timecode offsets. if (timecode & 0x8000) timecode |= ~0xffff; const uint8_t* frame_data = buf + 4; int frame_size = size - (frame_data - buf); return OnBlock(is_simple_block, track_num, timecode, duration, flags, frame_data, frame_size, additional, additional_size, discard_padding); } bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) { switch (id) { case kWebMIdSimpleBlock: return ParseBlock(true, data, size, NULL, 0, -1, 0); case kWebMIdBlock: if (block_data_) { LOG(ERROR) << "More than 1 Block in a BlockGroup is not " "supported."; return false; } block_data_.reset(new uint8_t[size]); memcpy(block_data_.get(), data, size); block_data_size_ = size; return true; case kWebMIdBlockAdditional: { uint64_t block_add_id = base::HostToNet64(block_add_id_); if (block_additional_data_) { // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed // as per matroska spec. But for now we don't have a use case to // support parsing of such files. Take a look at this again when such a // case arises. LOG(ERROR) << "More than 1 BlockAdditional in a " "BlockGroup is not supported."; return false; } // First 8 bytes of side_data in DecoderBuffer is the BlockAddID // element's value in Big Endian format. This is done to mimic ffmpeg // demuxer's behavior. block_additional_data_size_ = size + sizeof(block_add_id); block_additional_data_.reset(new uint8_t[block_additional_data_size_]); memcpy(block_additional_data_.get(), &block_add_id, sizeof(block_add_id)); memcpy(block_additional_data_.get() + 8, data, size); return true; } case kWebMIdDiscardPadding: { if (discard_padding_set_ || size <= 0 || size > 8) return false; discard_padding_set_ = true; // Read in the big-endian integer. discard_padding_ = static_cast(data[0]); for (int i = 1; i < size; ++i) discard_padding_ = (discard_padding_ << 8) | data[i]; return true; } default: return true; } } bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, int timecode, int block_duration, int flags, const uint8_t* data, int size, const uint8_t* additional, int additional_size, int64_t discard_padding) { DCHECK_GE(size, 0); if (cluster_timecode_ == -1) { LOG(ERROR) << "Got a block before cluster timecode."; return false; } // TODO(acolwell): Should relative negative timecode offsets be rejected? Or // only when the absolute timecode is negative? See http://crbug.com/271794 if (timecode < 0) { LOG(ERROR) << "Got a block with negative timecode offset " << timecode; return false; } if (last_block_timecode_ != -1 && timecode < last_block_timecode_) { LOG(ERROR) << "Got a block with a timecode before the previous block."; return false; } Track* track = NULL; StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; std::string encryption_key_id; base::TimeDelta encoded_duration = kNoTimestamp(); if (track_num == audio_.track_num()) { track = &audio_; encryption_key_id = audio_encryption_key_id_; if (encryption_key_id.empty()) { encoded_duration = TryGetEncodedAudioDuration(data, size); } } else if (track_num == video_.track_num()) { track = &video_; encryption_key_id = video_encryption_key_id_; buffer_type = DemuxerStream::VIDEO; } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) { return true; } else if (Track* const text_track = FindTextTrack(track_num)) { if (is_simple_block) // BlockGroup is required for WebVTT cues return false; if (block_duration < 0) // not specified return false; track = text_track; buffer_type = DemuxerStream::TEXT; } else { LOG(ERROR) << "Unexpected track number " << track_num; return false; } last_block_timecode_ = timecode; base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds( (cluster_timecode_ + timecode) * timecode_multiplier_); scoped_refptr buffer; if (buffer_type != DemuxerStream::TEXT) { // The first bit of the flags is set when a SimpleBlock contains only // keyframes. If this is a Block, then inspection of the payload is // necessary to determine whether it contains a keyframe or not. // http://www.matroska.org/technical/specs/index.html bool is_keyframe = is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size); // Every encrypted Block has a signal byte and IV prepended to it. Current // encrypted WebM request for comments specification is here // http://wiki.webmproject.org/encryption/webm-encryption-rfc scoped_ptr decrypt_config; int data_offset = 0; if (!encryption_key_id.empty() && !WebMCreateDecryptConfig( data, size, reinterpret_cast(encryption_key_id.data()), encryption_key_id.size(), &decrypt_config, &data_offset)) { return false; } // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId // type with remapped bytestream track numbers and allow multiple tracks as // applicable. See https://crbug.com/341581. buffer = StreamParserBuffer::CopyFrom( data + data_offset, size - data_offset, additional, additional_size, is_keyframe, buffer_type, track_num); if (decrypt_config) buffer->set_decrypt_config(decrypt_config.Pass()); } else { std::string id, settings, content; WebMWebVTTParser::Parse(data, size, &id, &settings, &content); std::vector side_data; MakeSideData(id.begin(), id.end(), settings.begin(), settings.end(), &side_data); // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId // type with remapped bytestream track numbers and allow multiple tracks as // applicable. See https://crbug.com/341581. buffer = StreamParserBuffer::CopyFrom( reinterpret_cast(content.data()), content.length(), &side_data[0], side_data.size(), true, buffer_type, track_num); } buffer->set_timestamp(timestamp); if (cluster_start_time_ == kNoTimestamp()) cluster_start_time_ = timestamp; base::TimeDelta block_duration_time_delta = kNoTimestamp(); if (block_duration >= 0) { block_duration_time_delta = base::TimeDelta::FromMicroseconds( block_duration * timecode_multiplier_); } // Prefer encoded duration over BlockGroup->BlockDuration or // TrackEntry->DefaultDuration when available. This layering violation is a // workaround for http://crbug.com/396634, decreasing the likelihood of // fall-back to rough estimation techniques for Blocks that lack a // BlockDuration at the end of a cluster. Cross cluster durations are not // feasible given flexibility of cluster ordering and MSE APIs. Duration // estimation may still apply in cases of encryption and codecs for which // we do not extract encoded duration. Within a cluster, estimates are applied // as Block Timecode deltas, or once the whole cluster is parsed in the case // of the last Block in the cluster. See Track::AddBuffer and // ApplyDurationEstimateIfNeeded(). if (encoded_duration != kNoTimestamp()) { DCHECK(encoded_duration != kInfiniteDuration()); DCHECK(encoded_duration > base::TimeDelta()); buffer->set_duration(encoded_duration); DVLOG(3) << __FUNCTION__ << " : " << "Using encoded duration " << encoded_duration.InSecondsF(); if (block_duration_time_delta != kNoTimestamp()) { base::TimeDelta duration_difference = block_duration_time_delta - encoded_duration; const auto kWarnDurationDiff = base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); if (duration_difference.magnitude() > kWarnDurationDiff) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "BlockDuration (" << block_duration_time_delta.InMilliseconds() << "ms) differs significantly from encoded duration (" << encoded_duration.InMilliseconds() << "ms)."; } } } else if (block_duration_time_delta != kNoTimestamp()) { buffer->set_duration(block_duration_time_delta); } else { DCHECK_NE(buffer_type, DemuxerStream::TEXT); buffer->set_duration(track->default_duration()); } if (discard_padding != 0) { buffer->set_discard_padding(std::make_pair( base::TimeDelta(), base::TimeDelta::FromMicroseconds(discard_padding / 1000))); } return track->AddBuffer(buffer); } WebMClusterParser::Track::Track(int track_num, bool is_video, base::TimeDelta default_duration) : track_num_(track_num), is_video_(is_video), default_duration_(default_duration), estimated_next_frame_duration_(kNoTimestamp()) { DCHECK(default_duration_ == kNoTimestamp() || default_duration_ > base::TimeDelta()); } WebMClusterParser::Track::~Track() {} DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() { DCHECK(ready_buffers_.empty()); if (last_added_buffer_missing_duration_.get()) return last_added_buffer_missing_duration_->GetDecodeTimestamp(); return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max()); } void WebMClusterParser::Track::ExtractReadyBuffers( const DecodeTimestamp before_timestamp) { DCHECK(ready_buffers_.empty()); DCHECK(DecodeTimestamp() <= before_timestamp); DCHECK(kNoDecodeTimestamp() != before_timestamp); if (buffers_.empty()) return; if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) { // All of |buffers_| are ready. ready_buffers_.swap(buffers_); DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All " << ready_buffers_.size() << " are ready: before upper bound ts " << before_timestamp.InSecondsF(); return; } // Not all of |buffers_| are ready yet. Move any that are ready to // |ready_buffers_|. while (true) { const scoped_refptr& buffer = buffers_.front(); if (buffer->GetDecodeTimestamp() >= before_timestamp) break; ready_buffers_.push_back(buffer); buffers_.pop_front(); DCHECK(!buffers_.empty()); } DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only " << ready_buffers_.size() << " ready, " << buffers_.size() << " at or after upper bound ts " << before_timestamp.InSecondsF(); } bool WebMClusterParser::Track::AddBuffer( const scoped_refptr& buffer) { DVLOG(2) << "AddBuffer() : " << track_num_ << " ts " << buffer->timestamp().InSecondsF() << " dur " << buffer->duration().InSecondsF() << " kf " << buffer->is_key_frame() << " size " << buffer->data_size(); if (last_added_buffer_missing_duration_.get()) { base::TimeDelta derived_duration = buffer->timestamp() - last_added_buffer_missing_duration_->timestamp(); last_added_buffer_missing_duration_->set_duration(derived_duration); DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : " << " ts " << last_added_buffer_missing_duration_->timestamp().InSecondsF() << " dur " << last_added_buffer_missing_duration_->duration().InSecondsF() << " kf " << last_added_buffer_missing_duration_->is_key_frame() << " size " << last_added_buffer_missing_duration_->data_size(); scoped_refptr updated_buffer = last_added_buffer_missing_duration_; last_added_buffer_missing_duration_ = NULL; if (!QueueBuffer(updated_buffer)) return false; } if (buffer->duration() == kNoTimestamp()) { last_added_buffer_missing_duration_ = buffer; DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration"; return true; } return QueueBuffer(buffer); } void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() { if (!last_added_buffer_missing_duration_.get()) return; base::TimeDelta estimated_duration = GetDurationEstimate(); last_added_buffer_missing_duration_->set_duration(estimated_duration); if (is_video_) { // Exposing estimation so splicing/overlap frame processing can make // informed decisions downstream. // TODO(chcunningham): Set this for audio as well in later change where // audio is switched to max estimation and splicing is disabled. last_added_buffer_missing_duration_->set_is_duration_estimated(true); } LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs) << "Estimating WebM block duration to be " << estimated_duration.InMilliseconds() << "ms for the last (Simple)Block in the Cluster for this Track. Use " "BlockGroups with BlockDurations at the end of each Track in a " "Cluster to avoid estimation."; DVLOG(2) << __FUNCTION__ << " new dur : ts " << last_added_buffer_missing_duration_->timestamp().InSecondsF() << " dur " << last_added_buffer_missing_duration_->duration().InSecondsF() << " kf " << last_added_buffer_missing_duration_->is_key_frame() << " size " << last_added_buffer_missing_duration_->data_size(); // Don't use the applied duration as a future estimation (don't use // QueueBuffer() here.) buffers_.push_back(last_added_buffer_missing_duration_); last_added_buffer_missing_duration_ = NULL; } void WebMClusterParser::Track::ClearReadyBuffers() { // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not // reset here. ready_buffers_.clear(); } void WebMClusterParser::Track::Reset() { ClearReadyBuffers(); buffers_.clear(); last_added_buffer_missing_duration_ = NULL; } bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const { // For now, assume that all blocks are keyframes for datatypes other than // video. This is a valid assumption for Vorbis, WebVTT, & Opus. if (!is_video_) return true; // Make sure the block is big enough for the minimal keyframe header size. if (size < 7) return false; // The LSb of the first byte must be a 0 for a keyframe. // http://tools.ietf.org/html/rfc6386 Section 19.1 if ((data[0] & 0x01) != 0) return false; // Verify VP8 keyframe startcode. // http://tools.ietf.org/html/rfc6386 Section 19.1 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a) return false; return true; } bool WebMClusterParser::Track::QueueBuffer( const scoped_refptr& buffer) { DCHECK(!last_added_buffer_missing_duration_.get()); // WebMClusterParser::OnBlock() gives LOG and parse error on decreasing // block timecode detection within a cluster. Therefore, we should not see // those here. DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ? DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp(); CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp()); base::TimeDelta duration = buffer->duration(); if (duration < base::TimeDelta() || duration == kNoTimestamp()) { LOG(ERROR) << "Invalid buffer duration: " << duration.InSecondsF(); return false; } // The estimated frame duration is the minimum (for audio) or the maximum // (for video) non-zero duration since the last initialization segment. The // minimum is used for audio to ensure frame durations aren't overestimated, // triggering unnecessary frame splicing. For video, splicing does not apply, // so maximum is used and overlap is simply resolved by showing the // later of the overlapping frames at its given PTS, effectively trimming down // the over-estimated duration of the previous frame. // TODO(chcunningham): Use max for audio and disable splicing whenever // estimated buffers are encountered. if (duration > base::TimeDelta()) { base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_; if (estimated_next_frame_duration_ == kNoTimestamp()) { estimated_next_frame_duration_ = duration; } else if (is_video_) { estimated_next_frame_duration_ = std::max(duration, estimated_next_frame_duration_); } else { estimated_next_frame_duration_ = std::min(duration, estimated_next_frame_duration_); } if (orig_duration_estimate != estimated_next_frame_duration_) { DVLOG(3) << "Updated duration estimate:" << orig_duration_estimate << " -> " << estimated_next_frame_duration_ << " at timestamp: " << buffer->GetDecodeTimestamp().InSecondsF(); } } buffers_.push_back(buffer); return true; } base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() { base::TimeDelta duration = estimated_next_frame_duration_; if (duration != kNoTimestamp()) { DVLOG(3) << __FUNCTION__ << " : using estimated duration"; } else { DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration"; if (is_video_) { duration = base::TimeDelta::FromMilliseconds( kDefaultVideoBufferDurationInMs); } else { duration = base::TimeDelta::FromMilliseconds( kDefaultAudioBufferDurationInMs); } } DCHECK(duration > base::TimeDelta()); DCHECK(duration != kNoTimestamp()); return duration; } void WebMClusterParser::ClearTextTrackReadyBuffers() { text_buffers_map_.clear(); for (TextTrackMap::iterator it = text_track_map_.begin(); it != text_track_map_.end(); ++it) { it->second.ClearReadyBuffers(); } } void WebMClusterParser::ResetTextTracks() { ClearTextTrackReadyBuffers(); for (TextTrackMap::iterator it = text_track_map_.begin(); it != text_track_map_.end(); ++it) { it->second.Reset(); } } void WebMClusterParser::UpdateReadyBuffers() { DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp()); DCHECK(text_buffers_map_.empty()); if (cluster_ended_) { audio_.ApplyDurationEstimateIfNeeded(); video_.ApplyDurationEstimateIfNeeded(); // Per OnBlock(), all text buffers should already have valid durations, so // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks // here. ready_buffer_upper_bound_ = DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max()); DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound()); DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound()); } else { ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(), video_.GetReadyUpperBound()); DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_); DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_); } // Prepare each track's ready buffers for retrieval. audio_.ExtractReadyBuffers(ready_buffer_upper_bound_); video_.ExtractReadyBuffers(ready_buffer_upper_bound_); for (TextTrackMap::iterator itr = text_track_map_.begin(); itr != text_track_map_.end(); ++itr) { itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_); } } WebMClusterParser::Track* WebMClusterParser::FindTextTrack(int track_num) { const TextTrackMap::iterator it = text_track_map_.find(track_num); if (it == text_track_map_.end()) return NULL; return &it->second; } } // namespace media } // namespace edash_packager