From 5a4234f4dabcedcd83745b7dfbfcc5f66b01ae2c Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Wed, 14 Oct 2015 16:10:12 -0700 Subject: [PATCH] Update webm_cluster_parser to emit samples Change-Id: I02436cfcb53b96210d6f683227cdabb994f4c01f --- .../media/formats/webm/webm_cluster_parser.cc | 355 +++++------------- .../media/formats/webm/webm_cluster_parser.h | 129 ++----- .../webm/webm_cluster_parser_unittest.cc | 314 ++++++++-------- 3 files changed, 283 insertions(+), 515 deletions(-) diff --git a/packager/media/formats/webm/webm_cluster_parser.cc b/packager/media/formats/webm/webm_cluster_parser.cc index afdfc4c943..7d5f7306a6 100644 --- a/packager/media/formats/webm/webm_cluster_parser.cc +++ b/packager/media/formats/webm/webm_cluster_parser.cc @@ -9,7 +9,7 @@ #include "packager/base/logging.h" #include "packager/base/sys_byteorder.h" #include "packager/media/base/decrypt_config.h" -#include "packager/media/base/timestamp_constants.h" +#include "packager/media/base/timestamp.h" #include "packager/media/filters/webvtt_util.h" #include "packager/media/formats/webm/webm_constants.h" #include "packager/media/formats/webm/webm_crypto_helpers.h" @@ -30,6 +30,10 @@ "may be suppressed): " \ : "") +namespace { +const int64_t kMicrosecondsPerMillisecond = 1000; +} // namespace + namespace edash_packager { namespace media { @@ -50,29 +54,29 @@ enum { WebMClusterParser::WebMClusterParser( int64_t timecode_scale, int audio_track_num, - base::TimeDelta audio_default_duration, + int64_t audio_default_duration, int video_track_num, - base::TimeDelta video_default_duration, + int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, - const AudioCodec audio_codec) + const AudioCodec audio_codec, + const MediaParser::NewSampleCB& new_sample_cb) : timecode_multiplier_(timecode_scale / 1000.0), ignored_tracks_(ignored_tracks), audio_encryption_key_id_(audio_encryption_key_id), video_encryption_key_id_(video_encryption_key_id), audio_codec_(audio_codec), parser_(kWebMIdCluster, this), - cluster_start_time_(kNoTimestamp()), - audio_(audio_track_num, false, audio_default_duration), - video_(video_track_num, true, video_default_duration), - ready_buffer_upper_bound_(kNoDecodeTimestamp()) { + cluster_start_time_(kNoTimestamp), + audio_(audio_track_num, false, audio_default_duration, new_sample_cb), + video_(video_track_num, true, video_default_duration, new_sample_cb) { for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin(); it != text_tracks.end(); ++it) { - text_track_map_.insert( - std::make_pair(it->first, Track(it->first, false, kNoTimestamp()))); + text_track_map_.insert(std::make_pair( + it->first, Track(it->first, false, kNoTimestamp, new_sample_cb))); } } @@ -81,21 +85,15 @@ WebMClusterParser::~WebMClusterParser() {} void WebMClusterParser::Reset() { last_block_timecode_ = -1; cluster_timecode_ = -1; - cluster_start_time_ = kNoTimestamp(); + cluster_start_time_ = kNoTimestamp; cluster_ended_ = false; parser_.Reset(); audio_.Reset(); video_.Reset(); ResetTextTracks(); - ready_buffer_upper_bound_ = kNoDecodeTimestamp(); } int WebMClusterParser::Parse(const uint8_t* buf, int size) { - audio_.ClearReadyBuffers(); - video_.ClearReadyBuffers(); - ClearTextTrackReadyBuffers(); - ready_buffer_upper_bound_ = kNoDecodeTimestamp(); - int result = parser_.Parse(buf, size); if (result < 0) { @@ -105,16 +103,18 @@ int WebMClusterParser::Parse(const uint8_t* buf, int size) { cluster_ended_ = parser_.IsParsingComplete(); if (cluster_ended_) { + audio_.ApplyDurationEstimateIfNeeded(); + video_.ApplyDurationEstimateIfNeeded(); + // If there were no buffers in this cluster, set the cluster start time to // be the |cluster_timecode_|. - if (cluster_start_time_ == kNoTimestamp()) { + if (cluster_start_time_ == kNoTimestamp) { // If the cluster did not even have a |cluster_timecode_|, signal parse // error. if (cluster_timecode_ < 0) return -1; - cluster_start_time_ = base::TimeDelta::FromMicroseconds( - cluster_timecode_ * timecode_multiplier_); + cluster_start_time_ = cluster_timecode_ * timecode_multiplier_; } // Reset the parser if we're done parsing so that @@ -129,40 +129,7 @@ int WebMClusterParser::Parse(const uint8_t* buf, int size) { return result; } -const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() { - if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) - UpdateReadyBuffers(); - - return audio_.ready_buffers(); -} - -const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() { - if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) - UpdateReadyBuffers(); - - return video_.ready_buffers(); -} - -const WebMClusterParser::TextBufferQueueMap& -WebMClusterParser::GetTextBuffers() { - if (ready_buffer_upper_bound_ == kNoDecodeTimestamp()) - UpdateReadyBuffers(); - - // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in - // the output only for non-empty ready_buffer() queues in |text_track_map_|. - text_buffers_map_.clear(); - for (TextTrackMap::const_iterator itr = text_track_map_.begin(); - itr != text_track_map_.end(); - ++itr) { - const BufferQueue& text_buffers = itr->second.ready_buffers(); - if (!text_buffers.empty()) - text_buffers_map_.insert(std::make_pair(itr->first, text_buffers)); - } - - return text_buffers_map_; -} - -base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration( +int64_t WebMClusterParser::TryGetEncodedAudioDuration( const uint8_t* data, int size) { @@ -179,24 +146,22 @@ base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration( // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See // motivations in http://crbug.com/396634. - return kNoTimestamp(); + return kNoTimestamp; } -base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, - int size) { +int64_t WebMClusterParser::ReadOpusDuration(const uint8_t* data, int size) { // Masks and constants for Opus packets. See // https://tools.ietf.org/html/rfc6716#page-14 static const uint8_t kTocConfigMask = 0xf8; static const uint8_t kTocFrameCountCodeMask = 0x03; static const uint8_t kFrameCountMask = 0x3f; - static const base::TimeDelta kPacketDurationMax = - base::TimeDelta::FromMilliseconds(120); + static const int64_t kPacketDurationMax = 120; if (size < 1) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Invalid zero-byte Opus packet; demuxed block duration may be " "imprecise."; - return kNoTimestamp(); + return kNoTimestamp; } // Frame count type described by last 2 bits of Opus TOC byte. @@ -217,7 +182,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Second byte missing from 'Code 3' Opus packet; demuxed block " "duration may be imprecise."; - return kNoTimestamp(); + return kNoTimestamp; } frame_count = data[1] & kFrameCountMask; @@ -226,7 +191,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Illegal 'Code 3' Opus packet with frame count zero; demuxed " "block duration may be imprecise."; - return kNoTimestamp(); + return kNoTimestamp; } break; @@ -234,7 +199,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Unexpected Opus frame count type: " << frame_count_type << "; " << "demuxed block duration may be imprecise."; - return kNoTimestamp(); + return kNoTimestamp; } int opusConfig = (data[0] & kTocConfigMask) >> 3; @@ -242,8 +207,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, CHECK_LT(opusConfig, static_cast(arraysize(kOpusFrameDurationsMu))); DCHECK_GT(frame_count, 0); - base::TimeDelta duration = base::TimeDelta::FromMicroseconds( - kOpusFrameDurationsMu[opusConfig] * frame_count); + int64_t duration = kOpusFrameDurationsMu[opusConfig] * frame_count; if (duration > kPacketDurationMax) { // Intentionally allowing packet to pass through for now. Decoder should @@ -251,8 +215,8 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, // things go sideways. LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) << "Warning, demuxed Opus packet with encoded duration: " - << duration.InMilliseconds() << "ms. Should be no greater than " - << kPacketDurationMax.InMilliseconds() << "ms."; + << duration << "ms. Should be no greater than " + << kPacketDurationMax << "ms."; } return duration; @@ -261,7 +225,7 @@ base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, WebMParserClient* WebMClusterParser::OnListStart(int id) { if (id == kWebMIdCluster) { cluster_timecode_ = -1; - cluster_start_time_ = kNoTimestamp(); + cluster_start_time_ = kNoTimestamp; } else if (id == kWebMIdBlockGroup) { block_data_.reset(); block_data_size_ = -1; @@ -444,9 +408,9 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, } Track* track = NULL; - StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; + StreamType stream_type = kStreamAudio; std::string encryption_key_id; - base::TimeDelta encoded_duration = kNoTimestamp(); + int64_t encoded_duration = kNoTimestamp; if (track_num == audio_.track_num()) { track = &audio_; encryption_key_id = audio_encryption_key_id_; @@ -456,7 +420,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, } else if (track_num == video_.track_num()) { track = &video_; encryption_key_id = video_encryption_key_id_; - buffer_type = DemuxerStream::VIDEO; + stream_type = kStreamVideo; } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) { return true; } else if (Track* const text_track = FindTextTrack(track_num)) { @@ -465,7 +429,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, if (block_duration < 0) // not specified return false; track = text_track; - buffer_type = DemuxerStream::TEXT; + stream_type = kStreamText; } else { LOG(ERROR) << "Unexpected track number " << track_num; return false; @@ -473,11 +437,10 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, last_block_timecode_ = timecode; - base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds( - (cluster_timecode_ + timecode) * timecode_multiplier_); + int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_; - scoped_refptr buffer; - if (buffer_type != DemuxerStream::TEXT) { + scoped_refptr buffer; + if (stream_type != kStreamText) { // The first bit of the flags is set when a SimpleBlock contains only // keyframes. If this is a Block, then inspection of the payload is // necessary to determine whether it contains a keyframe or not. @@ -499,16 +462,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, return false; } - // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId - // type with remapped bytestream track numbers and allow multiple tracks as - // applicable. See https://crbug.com/341581. - buffer = StreamParserBuffer::CopyFrom( - data + data_offset, size - data_offset, - additional, additional_size, - is_keyframe, buffer_type, track_num); + buffer = MediaSample::CopyFrom(data + data_offset, size - data_offset, + additional, additional_size, is_keyframe); - if (decrypt_config) - buffer->set_decrypt_config(decrypt_config.Pass()); + if (decrypt_config) { + // TODO(kqyang): Decrypt it if it is encrypted. + buffer->set_is_encrypted(true); + } } else { std::string id, settings, content; WebMWebVTTParser::Parse(data, size, &id, &settings, &content); @@ -518,25 +478,18 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, settings.begin(), settings.end(), &side_data); - // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId - // type with remapped bytestream track numbers and allow multiple tracks as - // applicable. See https://crbug.com/341581. - buffer = StreamParserBuffer::CopyFrom( - reinterpret_cast(content.data()), - content.length(), - &side_data[0], - side_data.size(), - true, buffer_type, track_num); + buffer = MediaSample::CopyFrom( + reinterpret_cast(content.data()), content.length(), + &side_data[0], side_data.size(), true); } - buffer->set_timestamp(timestamp); - if (cluster_start_time_ == kNoTimestamp()) + buffer->set_pts(timestamp); + if (cluster_start_time_ == kNoTimestamp) cluster_start_time_ = timestamp; - base::TimeDelta block_duration_time_delta = kNoTimestamp(); + int64_t block_duration_time_delta = kNoTimestamp; if (block_duration >= 0) { - block_duration_time_delta = base::TimeDelta::FromMicroseconds( - block_duration * timecode_multiplier_); + block_duration_time_delta = block_duration * timecode_multiplier_; } // Prefer encoded duration over BlockGroup->BlockDuration or @@ -550,126 +503,77 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, // as Block Timecode deltas, or once the whole cluster is parsed in the case // of the last Block in the cluster. See Track::AddBuffer and // ApplyDurationEstimateIfNeeded(). - if (encoded_duration != kNoTimestamp()) { - DCHECK(encoded_duration != kInfiniteDuration()); - DCHECK(encoded_duration > base::TimeDelta()); + if (encoded_duration != kNoTimestamp) { + DCHECK(encoded_duration != kInfiniteDuration); + DCHECK(encoded_duration > 0); buffer->set_duration(encoded_duration); DVLOG(3) << __FUNCTION__ << " : " - << "Using encoded duration " << encoded_duration.InSecondsF(); + << "Using encoded duration " << encoded_duration; - if (block_duration_time_delta != kNoTimestamp()) { - base::TimeDelta duration_difference = + if (block_duration_time_delta != kNoTimestamp) { + int64_t duration_difference = block_duration_time_delta - encoded_duration; - const auto kWarnDurationDiff = - base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); - if (duration_difference.magnitude() > kWarnDurationDiff) { + const auto kWarnDurationDiff = timecode_multiplier_ * 2; + if (duration_difference > kWarnDurationDiff) { LIMITED_DLOG(INFO, num_duration_errors_, kMaxDurationErrorLogs) - << "BlockDuration (" << block_duration_time_delta.InMilliseconds() + << "BlockDuration (" << block_duration_time_delta << "ms) differs significantly from encoded duration (" - << encoded_duration.InMilliseconds() << "ms)."; + << encoded_duration << "ms)."; } } - } else if (block_duration_time_delta != kNoTimestamp()) { + } else if (block_duration_time_delta != kNoTimestamp) { buffer->set_duration(block_duration_time_delta); } else { - DCHECK_NE(buffer_type, DemuxerStream::TEXT); buffer->set_duration(track->default_duration()); } - if (discard_padding != 0) { - buffer->set_discard_padding(std::make_pair( - base::TimeDelta(), - base::TimeDelta::FromMicroseconds(discard_padding / 1000))); - } - return track->AddBuffer(buffer); } WebMClusterParser::Track::Track(int track_num, bool is_video, - base::TimeDelta default_duration) + int64_t default_duration, + const MediaParser::NewSampleCB& new_sample_cb) : track_num_(track_num), is_video_(is_video), default_duration_(default_duration), - estimated_next_frame_duration_(kNoTimestamp()) { - DCHECK(default_duration_ == kNoTimestamp() || - default_duration_ > base::TimeDelta()); + estimated_next_frame_duration_(kNoTimestamp), + new_sample_cb_(new_sample_cb) { + DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0); } WebMClusterParser::Track::~Track() {} -DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() { - DCHECK(ready_buffers_.empty()); - if (last_added_buffer_missing_duration_.get()) - return last_added_buffer_missing_duration_->GetDecodeTimestamp(); - - return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max()); -} - -void WebMClusterParser::Track::ExtractReadyBuffers( - const DecodeTimestamp before_timestamp) { - DCHECK(ready_buffers_.empty()); - DCHECK(DecodeTimestamp() <= before_timestamp); - DCHECK(kNoDecodeTimestamp() != before_timestamp); - - if (buffers_.empty()) - return; - - if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) { - // All of |buffers_| are ready. - ready_buffers_.swap(buffers_); - DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All " - << ready_buffers_.size() << " are ready: before upper bound ts " - << before_timestamp.InSecondsF(); - return; - } - - // Not all of |buffers_| are ready yet. Move any that are ready to - // |ready_buffers_|. - while (true) { - const scoped_refptr& buffer = buffers_.front(); - if (buffer->GetDecodeTimestamp() >= before_timestamp) - break; - ready_buffers_.push_back(buffer); - buffers_.pop_front(); - DCHECK(!buffers_.empty()); - } - - DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only " - << ready_buffers_.size() << " ready, " << buffers_.size() - << " at or after upper bound ts " << before_timestamp.InSecondsF(); -} - bool WebMClusterParser::Track::AddBuffer( - const scoped_refptr& buffer) { + const scoped_refptr& buffer) { DVLOG(2) << "AddBuffer() : " << track_num_ - << " ts " << buffer->timestamp().InSecondsF() - << " dur " << buffer->duration().InSecondsF() + << " ts " << buffer->pts() + << " dur " << buffer->duration() << " kf " << buffer->is_key_frame() << " size " << buffer->data_size(); if (last_added_buffer_missing_duration_.get()) { - base::TimeDelta derived_duration = - buffer->timestamp() - last_added_buffer_missing_duration_->timestamp(); + int64_t derived_duration = + buffer->pts() - last_added_buffer_missing_duration_->pts(); last_added_buffer_missing_duration_->set_duration(derived_duration); DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : " << " ts " - << last_added_buffer_missing_duration_->timestamp().InSecondsF() + << last_added_buffer_missing_duration_->pts() << " dur " - << last_added_buffer_missing_duration_->duration().InSecondsF() + << last_added_buffer_missing_duration_->duration() << " kf " << last_added_buffer_missing_duration_->is_key_frame() << " size " << last_added_buffer_missing_duration_->data_size(); - scoped_refptr updated_buffer = + scoped_refptr updated_buffer = last_added_buffer_missing_duration_; last_added_buffer_missing_duration_ = NULL; if (!QueueBuffer(updated_buffer)) return false; } - if (buffer->duration() == kNoTimestamp()) { + if (buffer->duration() == kNoTimestamp) { last_added_buffer_missing_duration_ = buffer; DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration"; return true; @@ -682,46 +586,37 @@ void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() { if (!last_added_buffer_missing_duration_.get()) return; - base::TimeDelta estimated_duration = GetDurationEstimate(); + int64_t estimated_duration = GetDurationEstimate(); last_added_buffer_missing_duration_->set_duration(estimated_duration); if (is_video_) { // Exposing estimation so splicing/overlap frame processing can make // informed decisions downstream. - // TODO(chcunningham): Set this for audio as well in later change where - // audio is switched to max estimation and splicing is disabled. - last_added_buffer_missing_duration_->set_is_duration_estimated(true); + // TODO(kqyang): Should we wait for the next cluster to set the duration? + // last_added_buffer_missing_duration_->set_is_duration_estimated(true); } LIMITED_LOG(INFO, num_duration_estimates_, kMaxDurationEstimateLogs) << "Estimating WebM block duration to be " - << estimated_duration.InMilliseconds() + << estimated_duration << "ms for the last (Simple)Block in the Cluster for this Track. Use " "BlockGroups with BlockDurations at the end of each Track in a " "Cluster to avoid estimation."; DVLOG(2) << __FUNCTION__ << " new dur : ts " - << last_added_buffer_missing_duration_->timestamp().InSecondsF() + << last_added_buffer_missing_duration_->pts() << " dur " - << last_added_buffer_missing_duration_->duration().InSecondsF() + << last_added_buffer_missing_duration_->duration() << " kf " << last_added_buffer_missing_duration_->is_key_frame() << " size " << last_added_buffer_missing_duration_->data_size(); // Don't use the applied duration as a future estimation (don't use // QueueBuffer() here.) - buffers_.push_back(last_added_buffer_missing_duration_); + new_sample_cb_.Run(track_num_, last_added_buffer_missing_duration_); last_added_buffer_missing_duration_ = NULL; } -void WebMClusterParser::Track::ClearReadyBuffers() { - // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not - // reset here. - ready_buffers_.clear(); -} - void WebMClusterParser::Track::Reset() { - ClearReadyBuffers(); - buffers_.clear(); last_added_buffer_missing_duration_ = NULL; } @@ -749,19 +644,12 @@ bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const { } bool WebMClusterParser::Track::QueueBuffer( - const scoped_refptr& buffer) { + const scoped_refptr& buffer) { DCHECK(!last_added_buffer_missing_duration_.get()); - // WebMClusterParser::OnBlock() gives LOG and parse error on decreasing - // block timecode detection within a cluster. Therefore, we should not see - // those here. - DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ? - DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp(); - CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp()); - - base::TimeDelta duration = buffer->duration(); - if (duration < base::TimeDelta() || duration == kNoTimestamp()) { - LOG(ERROR) << "Invalid buffer duration: " << duration.InSecondsF(); + int64_t duration = buffer->duration(); + if (duration < 0 || duration == kNoTimestamp) { + LOG(ERROR) << "Invalid buffer duration: " << duration; return false; } @@ -774,9 +662,9 @@ bool WebMClusterParser::Track::QueueBuffer( // the over-estimated duration of the previous frame. // TODO(chcunningham): Use max for audio and disable splicing whenever // estimated buffers are encountered. - if (duration > base::TimeDelta()) { - base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_; - if (estimated_next_frame_duration_ == kNoTimestamp()) { + if (duration > 0) { + int64_t orig_duration_estimate = estimated_next_frame_duration_; + if (estimated_next_frame_duration_ == kNoTimestamp) { estimated_next_frame_duration_ = duration; } else if (is_video_) { estimated_next_frame_duration_ = @@ -792,45 +680,33 @@ bool WebMClusterParser::Track::QueueBuffer( << " -> " << estimated_next_frame_duration_ << " at timestamp: " - << buffer->GetDecodeTimestamp().InSecondsF(); + << buffer->dts(); } } - buffers_.push_back(buffer); + new_sample_cb_.Run(track_num_, buffer); return true; } -base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() { - base::TimeDelta duration = estimated_next_frame_duration_; - if (duration != kNoTimestamp()) { +int64_t WebMClusterParser::Track::GetDurationEstimate() { + int64_t duration = estimated_next_frame_duration_; + if (duration != kNoTimestamp) { DVLOG(3) << __FUNCTION__ << " : using estimated duration"; } else { DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration"; if (is_video_) { - duration = base::TimeDelta::FromMilliseconds( - kDefaultVideoBufferDurationInMs); + duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond; } else { - duration = base::TimeDelta::FromMilliseconds( - kDefaultAudioBufferDurationInMs); + duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond; } } - DCHECK(duration > base::TimeDelta()); - DCHECK(duration != kNoTimestamp()); + DCHECK(duration > 0); + DCHECK(duration != kNoTimestamp); return duration; } -void WebMClusterParser::ClearTextTrackReadyBuffers() { - text_buffers_map_.clear(); - for (TextTrackMap::iterator it = text_track_map_.begin(); - it != text_track_map_.end(); - ++it) { - it->second.ClearReadyBuffers(); - } -} - void WebMClusterParser::ResetTextTracks() { - ClearTextTrackReadyBuffers(); for (TextTrackMap::iterator it = text_track_map_.begin(); it != text_track_map_.end(); ++it) { @@ -838,37 +714,6 @@ void WebMClusterParser::ResetTextTracks() { } } -void WebMClusterParser::UpdateReadyBuffers() { - DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp()); - DCHECK(text_buffers_map_.empty()); - - if (cluster_ended_) { - audio_.ApplyDurationEstimateIfNeeded(); - video_.ApplyDurationEstimateIfNeeded(); - // Per OnBlock(), all text buffers should already have valid durations, so - // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks - // here. - ready_buffer_upper_bound_ = - DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max()); - DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound()); - DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound()); - } else { - ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(), - video_.GetReadyUpperBound()); - DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_); - DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_); - } - - // Prepare each track's ready buffers for retrieval. - audio_.ExtractReadyBuffers(ready_buffer_upper_bound_); - video_.ExtractReadyBuffers(ready_buffer_upper_bound_); - for (TextTrackMap::iterator itr = text_track_map_.begin(); - itr != text_track_map_.end(); - ++itr) { - itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_); - } -} - WebMClusterParser::Track* WebMClusterParser::FindTextTrack(int track_num) { const TextTrackMap::iterator it = text_track_map_.find(track_num); diff --git a/packager/media/formats/webm/webm_cluster_parser.h b/packager/media/formats/webm/webm_cluster_parser.h index dec1ea966c..5ee6ecdda8 100644 --- a/packager/media/formats/webm/webm_cluster_parser.h +++ b/packager/media/formats/webm/webm_cluster_parser.h @@ -11,9 +11,8 @@ #include #include "packager/base/memory/scoped_ptr.h" -#include "packager/media/base/audio_decoder_config.h" -#include "packager/media/base/stream_parser.h" -#include "packager/media/base/stream_parser_buffer.h" +#include "packager/media/base/media_parser.h" +#include "packager/media/base/media_sample.h" #include "packager/media/formats/webm/webm_parser.h" #include "packager/media/formats/webm/webm_tracks_parser.h" @@ -22,10 +21,6 @@ namespace media { class WebMClusterParser : public WebMParserClient { public: - typedef StreamParser::TrackId TrackId; - typedef std::deque > BufferQueue; - typedef std::map TextBufferQueueMap; - // Numbers chosen to estimate the duration of a buffer if none is set and // there is not enough information to get a better estimate. enum { @@ -49,29 +44,18 @@ class WebMClusterParser : public WebMParserClient { public: Track(int track_num, bool is_video, - base::TimeDelta default_duration); + int64_t default_duration, + const MediaParser::NewSampleCB& new_sample_cb); ~Track(); int track_num() const { return track_num_; } - // If a buffer is currently held aside pending duration calculation, returns - // its decode timestamp. Otherwise, returns kInfiniteDuration(). - DecodeTimestamp GetReadyUpperBound(); - - // Prepares |ready_buffers_| for retrieval. Prior to calling, - // |ready_buffers_| must be empty. Moves all |buffers_| with decode - // timestamp before |before_timestamp| to |ready_buffers_|, preserving their - // order. - void ExtractReadyBuffers(const DecodeTimestamp before_timestamp); - - const BufferQueue& ready_buffers() const { return ready_buffers_; } - // If |last_added_buffer_missing_duration_| is set, updates its duration // relative to |buffer|'s timestamp, and adds it to |buffers_| and unsets // |last_added_buffer_missing_duration_|. Then, if |buffer| is missing // duration, saves |buffer| into |last_added_buffer_missing_duration_|, or // otherwise adds |buffer| to |buffers_|. - bool AddBuffer(const scoped_refptr& buffer); + bool AddBuffer(const scoped_refptr& buffer); // If |last_added_buffer_missing_duration_| is set, updates its duration to // be non-kNoTimestamp() value of |estimated_next_frame_duration_| or a @@ -80,14 +64,8 @@ class WebMClusterParser : public WebMParserClient { // emit all buffers in a media segment before signaling end of segment.) void ApplyDurationEstimateIfNeeded(); - // Clears |ready_buffers_| (use ExtractReadyBuffers() to fill it again). - // Leaves as-is |buffers_| and any possibly held-aside buffer that is - // missing duration. - void ClearReadyBuffers(); - // Clears all buffer state, including any possibly held-aside buffer that - // was missing duration, and all contents of |buffers_| and - // |ready_buffers_|. + // was missing duration, and all contents of |buffers_|. void Reset(); // Helper function used to inspect block data to determine if the @@ -96,18 +74,18 @@ class WebMClusterParser : public WebMParserClient { // |size| indicates the number of bytes in |data|. bool IsKeyframe(const uint8_t* data, int size) const; - base::TimeDelta default_duration() const { return default_duration_; } + int64_t default_duration() const { return default_duration_; } private: // Helper that sanity-checks |buffer| duration, updates // |estimated_next_frame_duration_|, and adds |buffer| to |buffers_|. // Returns false if |buffer| failed sanity check and therefore was not added // to |buffers_|. Returns true otherwise. - bool QueueBuffer(const scoped_refptr& buffer); + bool QueueBuffer(const scoped_refptr& buffer); // Helper that calculates the buffer duration to use in // ApplyDurationEstimateIfNeeded(). - base::TimeDelta GetDurationEstimate(); + int64_t GetDurationEstimate(); // Counts the number of estimated durations used in this track. Used to // prevent log spam for LOG()s about estimated duration. @@ -120,26 +98,19 @@ class WebMClusterParser : public WebMParserClient { // that have not yet been extracted into |ready_buffers_|. Note that up to // one additional buffer missing duration may be tracked by // |last_added_buffer_missing_duration_|. - BufferQueue buffers_; - scoped_refptr last_added_buffer_missing_duration_; - - // Buffers in (decode) timestamp order that were previously parsed into and - // extracted from |buffers_|. Buffers are moved from |buffers_| to - // |ready_buffers_| by ExtractReadyBuffers() if they are below a specified - // upper bound timestamp. Track users can therefore extract only those - // parsed buffers which are "ready" for emission (all before some maximum - // timestamp). - BufferQueue ready_buffers_; + scoped_refptr last_added_buffer_missing_duration_; // If kNoTimestamp(), then |estimated_next_frame_duration_| will be used. - base::TimeDelta default_duration_; + int64_t default_duration_; // If kNoTimestamp(), then a default value will be used. This estimate is // the maximum (for video), or minimum (for audio) duration seen so far for // this track, and is used only if |default_duration_| is kNoTimestamp(). // TODO(chcunningham): Use maximum for audio too, adding checks to disable // splicing when these estimates are observed in SourceBufferStream. - base::TimeDelta estimated_next_frame_duration_; + int64_t estimated_next_frame_duration_; + + MediaParser::NewSampleCB new_sample_cb_; }; typedef std::map TextTrackMap; @@ -147,14 +118,15 @@ class WebMClusterParser : public WebMParserClient { public: WebMClusterParser(int64_t timecode_scale, int audio_track_num, - base::TimeDelta audio_default_duration, + int64_t audio_default_duration, int video_track_num, - base::TimeDelta video_default_duration, + int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, - const AudioCodec audio_codec); + const AudioCodec audio_codec, + const MediaParser::NewSampleCB& new_sample_cb); ~WebMClusterParser() override; // Resets the parser state so it can accept a new cluster. @@ -167,35 +139,7 @@ class WebMClusterParser : public WebMParserClient { // Returns the number of bytes parsed on success. int Parse(const uint8_t* buf, int size); - base::TimeDelta cluster_start_time() const { return cluster_start_time_; } - - // Get the current ready buffers resulting from Parse(). - // If the parse reached the end of cluster and the last buffer was held aside - // due to missing duration, the buffer is given an estimated duration and - // included in the result. - // Otherwise, if there are is a buffer held aside due to missing duration for - // any of the tracks, no buffers with same or greater (decode) timestamp will - // be included in the buffers. - // The returned deques are cleared by Parse() or Reset() and updated by the - // next calls to Get{Audio,Video}Buffers(). - // If no Parse() or Reset() has occurred since the last call to Get{Audio, - // Video,Text}Buffers(), then the previous BufferQueue& is returned again - // without any recalculation. - const BufferQueue& GetAudioBuffers(); - const BufferQueue& GetVideoBuffers(); - - // Constructs and returns a subset of |text_track_map_| containing only - // tracks with non-empty buffer queues produced by the last Parse() and - // filtered to exclude any buffers that have (decode) timestamp same or - // greater than the lowest (decode) timestamp across all tracks of any buffer - // held aside due to missing duration (unless the end of cluster has been - // reached). - // The returned map is cleared by Parse() or Reset() and updated by the next - // call to GetTextBuffers(). - // If no Parse() or Reset() has occurred since the last call to - // GetTextBuffers(), then the previous TextBufferQueueMap& is returned again - // without any recalculation. - const TextBufferQueueMap& GetTextBuffers(); + int64_t cluster_start_time() const { return cluster_start_time_; } // Returns true if the last Parse() call stopped at the end of a cluster. bool cluster_ended() const { return cluster_ended_; } @@ -228,22 +172,6 @@ class WebMClusterParser : public WebMParserClient { // Resets the Track objects associated with each text track. void ResetTextTracks(); - // Clears the the ready buffers associated with each text track. - void ClearTextTrackReadyBuffers(); - - // Helper method for Get{Audio,Video,Text}Buffers() that recomputes - // |ready_buffer_upper_bound_| and calls ExtractReadyBuffers() on each track. - // If |cluster_ended_| is true, first applies duration estimate if needed for - // |audio_| and |video_| and sets |ready_buffer_upper_bound_| to - // kInfiniteDuration(). Otherwise, sets |ready_buffer_upper_bound_| to the - // minimum upper bound across |audio_| and |video_|. (Text tracks can have no - // buffers missing duration, so they are not involved in calculating the upper - // bound.) - // Parse() or Reset() must be called between calls to UpdateReadyBuffers() to - // clear each track's ready buffers and to reset |ready_buffer_upper_bound_| - // to kNoDecodeTimestamp(). - void UpdateReadyBuffers(); - // Search for the indicated track_num among the text tracks. Returns NULL // if that track num is not a text track. Track* FindTextTrack(int track_num); @@ -256,11 +184,11 @@ class WebMClusterParser : public WebMParserClient { // Cluster we parse, so we can't simply use the delta of the first Block in // the next Cluster). Avoid calling if encrypted; may produce unexpected // output. See implementation for supported codecs. - base::TimeDelta TryGetEncodedAudioDuration(const uint8_t* data, int size); + int64_t TryGetEncodedAudioDuration(const uint8_t* data, int size); // Reads Opus packet header to determine packet duration. Duration returned // as TimeDelta or kNoTimestamp() upon failure to read duration from packet. - base::TimeDelta ReadOpusDuration(const uint8_t* data, int size); + int64_t ReadOpusDuration(const uint8_t* data, int size); // Tracks the number of LOGs made in process of reading encoded // duration. Useful to prevent log spam. @@ -290,26 +218,13 @@ class WebMClusterParser : public WebMParserClient { bool discard_padding_set_ = false; int64_t cluster_timecode_ = -1; - base::TimeDelta cluster_start_time_; + int64_t cluster_start_time_; bool cluster_ended_ = false; Track audio_; Track video_; TextTrackMap text_track_map_; - // Subset of |text_track_map_| maintained by GetTextBuffers(), and cleared by - // ClearTextTrackReadyBuffers(). Callers of GetTextBuffers() get a const-ref - // to this member. - TextBufferQueueMap text_buffers_map_; - - // Limits the range of buffers returned by Get{Audio,Video,Text}Buffers() to - // this exclusive upper bound. Set to kNoDecodeTimestamp(), meaning not yet - // calculated, by Reset() and Parse(). If kNoDecodeTimestamp(), then - // Get{Audio,Video,Text}Buffers() will calculate it to be the minimum (decode) - // timestamp across all tracks' |last_buffer_missing_duration_|, or - // kInfiniteDuration() if no buffers are currently missing duration. - DecodeTimestamp ready_buffer_upper_bound_; - DISALLOW_IMPLICIT_CONSTRUCTORS(WebMClusterParser); }; diff --git a/packager/media/formats/webm/webm_cluster_parser_unittest.cc b/packager/media/formats/webm/webm_cluster_parser_unittest.cc index 2b7c008f6d..7a3c821890 100644 --- a/packager/media/formats/webm/webm_cluster_parser_unittest.cc +++ b/packager/media/formats/webm/webm_cluster_parser_unittest.cc @@ -15,9 +15,8 @@ #include "packager/base/bind.h" #include "packager/base/logging.h" #include "packager/base/strings/string_number_conversions.h" -#include "packager/media/base/audio_decoder_config.h" #include "packager/media/base/decrypt_config.h" -#include "packager/media/base/timestamp_constants.h" +#include "packager/media/base/timestamp.h" #include "packager/media/formats/webm/cluster_builder.h" #include "packager/media/formats/webm/opus_packet_builder.h" #include "packager/media/formats/webm/webm_constants.h" @@ -29,10 +28,15 @@ using ::testing::StrictMock; using ::testing::Mock; using ::testing::_; +namespace { +const int64_t kMicrosecondsPerMillisecond = 1000; +} // namespace + namespace edash_packager { namespace media { typedef WebMTracksParser::TextTracks TextTracks; +typedef std::map TextBufferQueueMap; // Matchers for verifying common media log entry strings. MATCHER_P(OpusPacketDurationTooHigh, actual_duration_ms, "") { @@ -115,9 +119,11 @@ const BlockInfo kDefaultBlockInfo[] = { const uint8_t kEncryptedFrame[] = { // Block is encrypted 0x01, - // IV - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}; + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + // Some dummy encrypted data + 0x01, +}; scoped_ptr CreateCluster(int timecode, const BlockInfo* block_info, @@ -125,7 +131,8 @@ scoped_ptr CreateCluster(int timecode, ClusterBuilder cb; cb.SetClusterTimecode(0); - uint8_t kDefaultBlockData[] = { 0x00 }; + // Default block data for audio, video and text. + uint8_t kDefaultBlockData[] = {0x00, 0x0A, 0x01, 0x0D, 0x02}; for (int i = 0; i < block_count; i++) { const uint8_t* data; @@ -171,16 +178,16 @@ scoped_ptr CreateEncryptedCluster(int bytes_to_write) { return cb.Finish(); } -bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers, - const WebMClusterParser::BufferQueue& video_buffers, - const WebMClusterParser::BufferQueue& text_buffers, - const BlockInfo* block_info, - int block_count) { +bool VerifyBuffersHelper(const BufferQueue& audio_buffers, + const BufferQueue& video_buffers, + const BufferQueue& text_buffers, + const BlockInfo* block_info, + int block_count) { int buffer_count = audio_buffers.size() + video_buffers.size() + text_buffers.size(); if (block_count != buffer_count) { - DVLOG(1) << __FUNCTION__ << " : block_count (" << block_count - << ") mismatches buffer_count (" << buffer_count << ")"; + LOG(ERROR) << __FUNCTION__ << " : block_count (" << block_count + << ") mismatches buffer_count (" << buffer_count << ")"; return false; } @@ -188,73 +195,48 @@ bool VerifyBuffers(const WebMClusterParser::BufferQueue& audio_buffers, size_t video_offset = 0; size_t text_offset = 0; for (int i = 0; i < block_count; i++) { - const WebMClusterParser::BufferQueue* buffers = NULL; + const BufferQueue* buffers = NULL; size_t* offset; - StreamParserBuffer::Type expected_type = DemuxerStream::UNKNOWN; if (block_info[i].track_num == kAudioTrackNum) { buffers = &audio_buffers; offset = &audio_offset; - expected_type = DemuxerStream::AUDIO; } else if (block_info[i].track_num == kVideoTrackNum) { buffers = &video_buffers; offset = &video_offset; - expected_type = DemuxerStream::VIDEO; } else if (block_info[i].track_num == kTextTrackNum) { buffers = &text_buffers; offset = &text_offset; - expected_type = DemuxerStream::TEXT; } else { LOG(ERROR) << "Unexpected track number " << block_info[i].track_num; return false; } if (*offset >= buffers->size()) { - DVLOG(1) << __FUNCTION__ << " : Too few buffers (" << buffers->size() - << ") for track_num (" << block_info[i].track_num - << "), expected at least " << *offset + 1 << " buffers"; + LOG(ERROR) << __FUNCTION__ << " : Too few buffers (" << buffers->size() + << ") for track_num (" << block_info[i].track_num + << "), expected at least " << *offset + 1 << " buffers"; return false; } - scoped_refptr buffer = (*buffers)[(*offset)++]; + scoped_refptr buffer = (*buffers)[(*offset)++]; - EXPECT_EQ(block_info[i].timestamp, buffer->timestamp().InMilliseconds()); - EXPECT_EQ(std::abs(block_info[i].duration), - buffer->duration().InMillisecondsF()); - EXPECT_EQ(expected_type, buffer->type()); - EXPECT_EQ(block_info[i].track_num, buffer->track_id()); + EXPECT_EQ(block_info[i].timestamp * kMicrosecondsPerMillisecond, + buffer->pts()); + EXPECT_EQ(std::abs(block_info[i].duration) * kMicrosecondsPerMillisecond, + buffer->duration()); } return true; } -bool VerifyBuffers(const scoped_ptr& parser, - const BlockInfo* block_info, - int block_count) { - const WebMClusterParser::TextBufferQueueMap& text_map = - parser->GetTextBuffers(); - const WebMClusterParser::BufferQueue* text_buffers; - const WebMClusterParser::BufferQueue no_text_buffers; - if (!text_map.empty()) - text_buffers = &(text_map.rbegin()->second); - else - text_buffers = &no_text_buffers; - - return VerifyBuffers(parser->GetAudioBuffers(), - parser->GetVideoBuffers(), - *text_buffers, - block_info, - block_count); -} - -bool VerifyTextBuffers(const scoped_ptr& parser, - const BlockInfo* block_info_ptr, +bool VerifyTextBuffers(const BlockInfo* block_info_ptr, int block_count, int text_track_num, - const WebMClusterParser::BufferQueue& text_buffers) { + const BufferQueue& text_buffers) { const BlockInfo* const block_info_end = block_info_ptr + block_count; - typedef WebMClusterParser::BufferQueue::const_iterator TextBufferIter; + typedef BufferQueue::const_iterator TextBufferIter; TextBufferIter buffer_iter = text_buffers.begin(); const TextBufferIter buffer_end = text_buffers.end(); @@ -267,30 +249,19 @@ bool VerifyTextBuffers(const scoped_ptr& parser, EXPECT_FALSE(block_info.use_simple_block); EXPECT_FALSE(buffer_iter == buffer_end); - const scoped_refptr buffer = *buffer_iter++; - EXPECT_EQ(block_info.timestamp, buffer->timestamp().InMilliseconds()); - EXPECT_EQ(std::abs(block_info.duration), - buffer->duration().InMillisecondsF()); - EXPECT_EQ(DemuxerStream::TEXT, buffer->type()); - EXPECT_EQ(text_track_num, buffer->track_id()); + const scoped_refptr buffer = *buffer_iter++; + EXPECT_EQ(block_info.timestamp * kMicrosecondsPerMillisecond, + buffer->pts()); + EXPECT_EQ(std::abs(block_info.duration) * kMicrosecondsPerMillisecond, + buffer->duration()); } EXPECT_TRUE(buffer_iter == buffer_end); return true; } -void VerifyEncryptedBuffer(scoped_refptr buffer) { - EXPECT_TRUE(buffer->decrypt_config()); - EXPECT_EQ(static_cast(DecryptConfig::kDecryptionKeySize), - buffer->decrypt_config()->iv().length()); -} - -void AppendToEnd(const WebMClusterParser::BufferQueue& src, - WebMClusterParser::BufferQueue* dest) { - for (WebMClusterParser::BufferQueue::const_iterator itr = src.begin(); - itr != src.end(); ++itr) { - dest->push_back(*itr); - } +void VerifyEncryptedBuffer(scoped_refptr buffer) { + EXPECT_TRUE(buffer->is_encrypted()); } } // namespace @@ -301,23 +272,43 @@ class WebMClusterParserTest : public testing::Test { protected: void ResetParserToHaveDefaultDurations() { - base::TimeDelta default_audio_duration = base::TimeDelta::FromMilliseconds( - kTestAudioFrameDefaultDurationInMs); - base::TimeDelta default_video_duration = base::TimeDelta::FromMilliseconds( - kTestVideoFrameDefaultDurationInMs); - ASSERT_GE(default_audio_duration, base::TimeDelta()); - ASSERT_GE(default_video_duration, base::TimeDelta()); - ASSERT_NE(kNoTimestamp(), default_audio_duration); - ASSERT_NE(kNoTimestamp(), default_video_duration); + int64_t default_audio_duration = + kTestAudioFrameDefaultDurationInMs * kMicrosecondsPerMillisecond; + int64_t default_video_duration = + kTestVideoFrameDefaultDurationInMs * kMicrosecondsPerMillisecond; + ASSERT_GE(default_audio_duration, 0); + ASSERT_GE(default_video_duration, 0); + ASSERT_NE(kNoTimestamp, default_audio_duration); + ASSERT_NE(kNoTimestamp, default_video_duration); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( default_audio_duration, default_video_duration)); } + bool NewSampleEvent(uint32_t track_id, + const scoped_refptr& sample) { + switch (track_id) { + case kAudioTrackNum: + audio_buffers_.push_back(sample); + break; + case kVideoTrackNum: + video_buffers_.push_back(sample); + break; + case kTextTrackNum: + case kTextTrackNum + 1: + text_buffers_map_[track_id].push_back(sample); + break; + default: + LOG(ERROR) << "Unexpected track number " << track_id; + return false; + } + return true; + } + // Helper that hard-codes some non-varying constructor parameters. WebMClusterParser* CreateParserHelper( - base::TimeDelta audio_default_duration, - base::TimeDelta video_default_duration, + int64_t audio_default_duration, + int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks, const std::set& ignored_tracks, const std::string& audio_encryption_key_id, @@ -326,12 +317,14 @@ class WebMClusterParserTest : public testing::Test { return new WebMClusterParser( kTimecodeScale, kAudioTrackNum, audio_default_duration, kVideoTrackNum, video_default_duration, text_tracks, ignored_tracks, - audio_encryption_key_id, video_encryption_key_id, audio_codec); + audio_encryption_key_id, video_encryption_key_id, audio_codec, + base::Bind(&WebMClusterParserTest::NewSampleEvent, + base::Unretained(this))); } // Create a default version of the parser for test. WebMClusterParser* CreateDefaultParser() { - return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(), + return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), std::set(), std::string(), std::string(), kUnknownAudioCodec); } @@ -339,8 +332,8 @@ class WebMClusterParserTest : public testing::Test { // Create a parser for test with custom audio and video default durations, and // optionally custom text tracks. WebMClusterParser* CreateParserWithDefaultDurationsAndOptionalTextTracks( - base::TimeDelta audio_default_duration, - base::TimeDelta video_default_duration, + int64_t audio_default_duration, + int64_t video_default_duration, const WebMTracksParser::TextTracks& text_tracks = TextTracks()) { return CreateParserHelper(audio_default_duration, video_default_duration, text_tracks, std::set(), std::string(), @@ -350,7 +343,7 @@ class WebMClusterParserTest : public testing::Test { // Create a parser for test with custom ignored tracks. WebMClusterParser* CreateParserWithIgnoredTracks( std::set& ignored_tracks) { - return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(), + return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), ignored_tracks, std::string(), std::string(), kUnknownAudioCodec); } @@ -360,22 +353,31 @@ class WebMClusterParserTest : public testing::Test { const std::string& audio_encryption_key_id, const std::string& video_encryption_key_id, const AudioCodec audio_codec) { - return CreateParserHelper(kNoTimestamp(), kNoTimestamp(), TextTracks(), + return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(), std::set(), audio_encryption_key_id, video_encryption_key_id, audio_codec); } + bool VerifyBuffers(const BlockInfo* block_info, int block_count) { + bool result = VerifyBuffersHelper(audio_buffers_, video_buffers_, + text_buffers_map_[kTextTrackNum], + block_info, block_count); + audio_buffers_.clear(); + video_buffers_.clear(); + text_buffers_map_.clear(); + return result; + } + scoped_ptr parser_; + BufferQueue audio_buffers_; + BufferQueue video_buffers_; + TextBufferQueueMap text_buffers_map_; private: DISALLOW_COPY_AND_ASSIGN(WebMClusterParserTest); }; -TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) { - // If a buffer is missing duration and is being held back, then all other - // tracks' buffers that have same or higher (decode) timestamp should be held - // back too to keep the timestamps emitted for a cluster monotonically - // non-decreasing and in same order as parsed. +TEST_F(WebMClusterParserTest, TracksWithSampleMissingDuration) { InSequence s; // Reset the parser to have 3 tracks: text, video (no default frame duration), @@ -384,12 +386,12 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) { text_tracks.insert(std::make_pair(TextTracks::key_type(kTextTrackNum), TextTrackConfig(kTextSubtitles, "", "", ""))); - base::TimeDelta default_audio_duration = - base::TimeDelta::FromMilliseconds(kTestAudioFrameDefaultDurationInMs); - ASSERT_GE(default_audio_duration, base::TimeDelta()); - ASSERT_NE(kNoTimestamp(), default_audio_duration); + int64_t default_audio_duration = kTestAudioFrameDefaultDurationInMs; + ASSERT_GE(default_audio_duration, 0); + ASSERT_NE(kNoTimestamp, default_audio_duration); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( - default_audio_duration, kNoTimestamp(), text_tracks)); + default_audio_duration * kMicrosecondsPerMillisecond, kNoTimestamp, + text_tracks)); const int kExpectedVideoEstimationInMs = 33; @@ -405,15 +407,28 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) { {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0}, }; + // Samples are not emitted in the same order as |kBlockInfo| due to missing of + // duration in some samples. + const BlockInfo kExpectedBlockInfo[] = { + {kAudioTrackNum, 0, 23, false, NULL, 0}, + {kTextTrackNum, 10, 42, false, NULL, 0}, + {kAudioTrackNum, 23, kTestAudioFrameDefaultDurationInMs, true, NULL, 0}, + {kVideoTrackNum, 0, 33, true, NULL, 0}, + {kAudioTrackNum, 36, kTestAudioFrameDefaultDurationInMs, true, NULL, 0}, + {kVideoTrackNum, 33, 33, true, NULL, 0}, + {kAudioTrackNum, 70, kTestAudioFrameDefaultDurationInMs, true, NULL, 0}, + {kVideoTrackNum, 66, kExpectedVideoEstimationInMs, true, NULL, 0}, + {kAudioTrackNum, 83, kTestAudioFrameDefaultDurationInMs, true, NULL, 0}, + }; const int kExpectedBuffersOnPartialCluster[] = { 0, // Video simple block without DefaultDuration should be held back - 0, // Audio buffer ready, but not emitted because its TS >= held back video - 0, // Text buffer ready, but not emitted because its TS >= held back video - 0, // 2nd audio buffer ready, also not emitted for same reason as first - 4, // All previous buffers emitted, 2nd video held back with no duration - 4, // 2nd video still has no duration, 3rd audio ready but not emitted - 6, // All previous buffers emitted, 3rd video held back with no duration - 6, // 3rd video still has no duration, 4th audio ready but not emitted + 1, // Audio buffer ready + 2, // Text buffer ready + 3, // 2nd audio buffer ready + 4, // 1st video emitted, 2nd video held back with no duration + 5, // 3rd audio ready + 6, // 2nd video emitted, 3rd video held back with no duration + 7, // 4th audio ready 9, // Cluster end emits all buffers and 3rd video's duration is estimated }; @@ -451,8 +466,8 @@ TEST_F(WebMClusterParserTest, HeldBackBufferHoldsBackAllTracks) { EXPECT_LT(0, result); } - EXPECT_TRUE(VerifyBuffers(parser_, kBlockInfo, - kExpectedBuffersOnPartialCluster[i])); + EXPECT_TRUE( + VerifyBuffers(kExpectedBlockInfo, kExpectedBuffersOnPartialCluster[i])); } } @@ -468,13 +483,13 @@ TEST_F(WebMClusterParserTest, Reset) { EXPECT_GT(result, 0); EXPECT_LT(result, cluster->size()); - ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count - 1)); + ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count - 1)); parser_->Reset(); // Now parse a whole cluster to verify that all the blocks will get parsed. result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) { @@ -483,16 +498,16 @@ TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kDefaultBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { int block_count = arraysize(kDefaultBlockInfo); scoped_ptr cluster(CreateCluster(0, kDefaultBlockInfo, block_count)); - WebMClusterParser::BufferQueue audio_buffers; - WebMClusterParser::BufferQueue video_buffers; - const WebMClusterParser::BufferQueue no_text_buffers; + BufferQueue audio_buffers; + BufferQueue video_buffers; + const BufferQueue no_text_buffers; const uint8_t* data = cluster->data(); int size = cluster->size(); @@ -511,17 +526,12 @@ TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) { continue; } - AppendToEnd(parser_->GetAudioBuffers(), &audio_buffers); - AppendToEnd(parser_->GetVideoBuffers(), &video_buffers); - parse_size = default_parse_size; data += result; size -= result; } - ASSERT_TRUE(VerifyBuffers(audio_buffers, video_buffers, - no_text_buffers, kDefaultBlockInfo, - block_count)); + ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count)); } // Verify that both BlockGroups with the BlockDuration before the Block @@ -552,7 +562,7 @@ TEST_F(WebMClusterParserTest, ParseBlockGroup) { int result = parser_->Parse(kClusterData, kClusterSize); EXPECT_EQ(kClusterSize, result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) { @@ -568,7 +578,7 @@ TEST_F(WebMClusterParserTest, ParseSimpleBlockAndBlockGroupMixture) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, IgnoredTracks) { @@ -601,7 +611,7 @@ TEST_F(WebMClusterParserTest, IgnoredTracks) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kOutputBlockInfo, output_block_count)); + ASSERT_TRUE(VerifyBuffers(kOutputBlockInfo, output_block_count)); } TEST_F(WebMClusterParserTest, ParseTextTracks) { @@ -612,7 +622,7 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) { ""))); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( - kNoTimestamp(), kNoTimestamp(), text_tracks)); + kNoTimestamp, kNoTimestamp, text_tracks)); const BlockInfo kInputBlockInfo[] = { {kAudioTrackNum, 0, 23, true, NULL, 0}, @@ -630,7 +640,7 @@ TEST_F(WebMClusterParserTest, ParseTextTracks) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kInputBlockInfo, input_block_count)); + ASSERT_TRUE(VerifyBuffers(kInputBlockInfo, input_block_count)); } TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) { @@ -641,7 +651,7 @@ TEST_F(WebMClusterParserTest, TextTracksSimpleBlock) { ""))); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( - kNoTimestamp(), kNoTimestamp(), text_tracks)); + kNoTimestamp, kNoTimestamp, text_tracks)); const BlockInfo kInputBlockInfo[] = { { kTextTrackNum, 33, 42, true }, @@ -670,7 +680,7 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) { ""))); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( - kNoTimestamp(), kNoTimestamp(), text_tracks)); + kNoTimestamp, kNoTimestamp, text_tracks)); const BlockInfo kInputBlockInfo[] = { {kAudioTrackNum, 0, 23, true, NULL, 0}, @@ -690,16 +700,12 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - const WebMClusterParser::TextBufferQueueMap& text_map = - parser_->GetTextBuffers(); - for (WebMClusterParser::TextBufferQueueMap::const_iterator itr = - text_map.begin(); - itr != text_map.end(); - ++itr) { + for (TextBufferQueueMap::const_iterator itr = text_buffers_map_.begin(); + itr != text_buffers_map_.end(); ++itr) { const TextTracks::const_iterator find_result = text_tracks.find(itr->first); ASSERT_TRUE(find_result != text_tracks.end()); - ASSERT_TRUE(VerifyTextBuffers(parser_, kInputBlockInfo, input_block_count, + ASSERT_TRUE(VerifyTextBuffers(kInputBlockInfo, input_block_count, itr->first, itr->second)); } } @@ -712,14 +718,14 @@ TEST_F(WebMClusterParserTest, ParseEncryptedBlock) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_EQ(1UL, parser_->GetVideoBuffers().size()); - scoped_refptr buffer = parser_->GetVideoBuffers()[0]; + ASSERT_EQ(1UL, video_buffers_.size()); + scoped_refptr buffer = video_buffers_[0]; VerifyEncryptedBuffer(buffer); } TEST_F(WebMClusterParserTest, ParseBadEncryptedBlock) { scoped_ptr cluster( - CreateEncryptedCluster(sizeof(kEncryptedFrame) - 1)); + CreateEncryptedCluster(sizeof(kEncryptedFrame) - 2)); parser_.reset(CreateParserWithKeyIdsAndAudioCodec( std::string(), "video_key_id", kUnknownAudioCodec)); @@ -753,7 +759,7 @@ TEST_F(WebMClusterParserTest, ParseInvalidTextBlockGroupWithoutDuration) { ""))); parser_.reset(CreateParserWithDefaultDurationsAndOptionalTextTracks( - kNoTimestamp(), kNoTimestamp(), text_tracks)); + kNoTimestamp, kNoTimestamp, text_tracks)); const BlockInfo kBlockInfo[] = { { kTextTrackNum, 33, -42, false }, @@ -791,14 +797,14 @@ TEST_F(WebMClusterParserTest, ParseWithDefaultDurationsSimpleBlocks) { int result = parser_->Parse(cluster->data(), cluster->size() - 1); EXPECT_GT(result, 0); EXPECT_LT(result, cluster->size()); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count - 1)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count - 1)); parser_->Reset(); // Now parse a whole cluster to verify that all the blocks will get parsed. result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { @@ -809,7 +815,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { // last block in a cluster is estimated independently for each track in the // cluster. For video tracks we use the maximum seen so far. For audio we use // the the minimum. - // TODO(chcunningham): Move audio over to use the maximum. + // TODO: Move audio over to use the maximum. const int kExpectedAudioEstimationInMs = 22; const int kExpectedVideoEstimationInMs = 34; @@ -834,16 +840,16 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { int result = parser_->Parse(cluster1->data(), cluster1->size() - 1); EXPECT_GT(result, 0); EXPECT_LT(result, cluster1->size()); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1 - 3)); - EXPECT_EQ(3UL, parser_->GetAudioBuffers().size()); - EXPECT_EQ(1UL, parser_->GetVideoBuffers().size()); + EXPECT_EQ(3UL, audio_buffers_.size()); + EXPECT_EQ(1UL, video_buffers_.size()); + ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3)); parser_->Reset(); // Now parse the full first cluster and verify all the blocks are parsed. result = parser_->Parse(cluster1->data(), cluster1->size()); EXPECT_EQ(cluster1->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1)); // Verify that the estimated frame duration is tracked across clusters for // each track. @@ -858,7 +864,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsSimpleBlocks) { scoped_ptr cluster2(CreateCluster(0, kBlockInfo2, block_count2)); result = parser_->Parse(cluster2->data(), cluster2->size()); EXPECT_EQ(cluster2->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo2, block_count2)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2)); } TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) { @@ -894,16 +900,16 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) { int result = parser_->Parse(cluster1->data(), cluster1->size() - 1); EXPECT_GT(result, 0); EXPECT_LT(result, cluster1->size()); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1 - 3)); - EXPECT_EQ(3UL, parser_->GetAudioBuffers().size()); - EXPECT_EQ(1UL, parser_->GetVideoBuffers().size()); + EXPECT_EQ(3UL, audio_buffers_.size()); + EXPECT_EQ(1UL, video_buffers_.size()); + ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1 - 3)); parser_->Reset(); // Now parse the full first cluster and verify all the blocks are parsed. result = parser_->Parse(cluster1->data(), cluster1->size()); EXPECT_EQ(cluster1->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo1, block_count1)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo1, block_count1)); // Verify that the estimated frame duration is tracked across clusters for // each track. @@ -916,7 +922,7 @@ TEST_F(WebMClusterParserTest, ParseWithoutAnyDurationsBlockGroups) { scoped_ptr cluster2(CreateCluster(0, kBlockInfo2, block_count2)); result = parser_->Parse(cluster2->data(), cluster2->size()); EXPECT_EQ(cluster2->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo2, block_count2)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo2, block_count2)); } // TODO(wolenetz): Is parser behavior correct? See http://crbug.com/363433. @@ -952,14 +958,14 @@ TEST_F(WebMClusterParserTest, int result = parser_->Parse(cluster->data(), cluster->size() - 1); EXPECT_GT(result, 0); EXPECT_LT(result, cluster->size()); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count - 1)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count - 1)); parser_->Reset(); // Now parse a whole cluster to verify that all the blocks will get parsed. result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, @@ -982,7 +988,7 @@ TEST_F(WebMClusterParserTest, scoped_ptr cluster(CreateCluster(0, kBlockInfo, block_count)); int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, @@ -998,7 +1004,7 @@ TEST_F(WebMClusterParserTest, scoped_ptr cluster(CreateCluster(0, kBlockInfo, block_count)); int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } TEST_F(WebMClusterParserTest, ReadOpusDurationsSimpleBlockAtEndOfCluster) { @@ -1022,7 +1028,7 @@ TEST_F(WebMClusterParserTest, ReadOpusDurationsSimpleBlockAtEndOfCluster) { int result = parser_->Parse(cluster->data(), cluster->size()); EXPECT_EQ(cluster->size(), result); - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); loop_count++; } @@ -1040,9 +1046,11 @@ TEST_F(WebMClusterParserTest, PreferOpusDurationsOverBlockDurations) { parser_.reset(CreateParserWithKeyIdsAndAudioCodec( std::string(), std::string(), kCodecOpus)); + // Setting BlockDuration != Opus duration to see which one the parser uses. + int block_duration_ms = packet_ptr->duration_ms() + 10; BlockInfo block_infos[] = {{kAudioTrackNum, 0, - block_duration_ms, + static_cast(block_duration_ms), false, // Not a SimpleBlock. packet_ptr->data(), packet_ptr->size()}}; @@ -1056,7 +1064,7 @@ TEST_F(WebMClusterParserTest, PreferOpusDurationsOverBlockDurations) { // duration to be that of the Opus packet to verify it was preferred. block_infos[0].duration = packet_ptr->duration_ms(); - ASSERT_TRUE(VerifyBuffers(parser_, block_infos, block_count)); + ASSERT_TRUE(VerifyBuffers(block_infos, block_count)); loop_count++; } @@ -1090,7 +1098,7 @@ TEST_F(WebMClusterParserTest, DontReadEncodedDurationWhenEncrypted) { EXPECT_EQ(cluster->size(), result); // Will verify that duration of buffer matches that of BlockDuration. - ASSERT_TRUE(VerifyBuffers(parser_, kBlockInfo, block_count)); + ASSERT_TRUE(VerifyBuffers(kBlockInfo, block_count)); } } // namespace media