421 lines
12 KiB
C++
421 lines
12 KiB
C++
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
// found in the LICENSE file.
|
||
|
|
||
|
#include "media/webm/webm_cluster_parser.h"
|
||
|
|
||
|
#include <vector>
|
||
|
|
||
|
#include "base/logging.h"
|
||
|
#include "base/sys_byteorder.h"
|
||
|
#include "media/base/buffers.h"
|
||
|
#include "media/base/decrypt_config.h"
|
||
|
#include "media/webm/webm_constants.h"
|
||
|
#include "media/webm/webm_crypto_helpers.h"
|
||
|
|
||
|
namespace media {
|
||
|
|
||
|
WebMClusterParser::TextTrackIterator::TextTrackIterator(
|
||
|
const TextTrackMap& text_track_map) :
|
||
|
iterator_(text_track_map.begin()),
|
||
|
iterator_end_(text_track_map.end()) {
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::TextTrackIterator::TextTrackIterator(
|
||
|
const TextTrackIterator& rhs) :
|
||
|
iterator_(rhs.iterator_),
|
||
|
iterator_end_(rhs.iterator_end_) {
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::TextTrackIterator::~TextTrackIterator() {
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::TextTrackIterator::operator()(
|
||
|
int* track_num,
|
||
|
const BufferQueue** buffers) {
|
||
|
if (iterator_ == iterator_end_) {
|
||
|
*track_num = 0;
|
||
|
*buffers = NULL;
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
*track_num = iterator_->first;
|
||
|
*buffers = &iterator_->second.buffers();
|
||
|
|
||
|
++iterator_;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::WebMClusterParser(
|
||
|
int64 timecode_scale, int audio_track_num, int video_track_num,
|
||
|
const WebMTracksParser::TextTracks& text_tracks,
|
||
|
const std::set<int64>& ignored_tracks,
|
||
|
const std::string& audio_encryption_key_id,
|
||
|
const std::string& video_encryption_key_id,
|
||
|
const LogCB& log_cb)
|
||
|
: timecode_multiplier_(timecode_scale / 1000.0),
|
||
|
ignored_tracks_(ignored_tracks),
|
||
|
audio_encryption_key_id_(audio_encryption_key_id),
|
||
|
video_encryption_key_id_(video_encryption_key_id),
|
||
|
parser_(kWebMIdCluster, this),
|
||
|
last_block_timecode_(-1),
|
||
|
block_data_size_(-1),
|
||
|
block_duration_(-1),
|
||
|
block_add_id_(-1),
|
||
|
block_additional_data_size_(-1),
|
||
|
cluster_timecode_(-1),
|
||
|
cluster_start_time_(kNoTimestamp()),
|
||
|
cluster_ended_(false),
|
||
|
audio_(audio_track_num, false),
|
||
|
video_(video_track_num, true),
|
||
|
log_cb_(log_cb) {
|
||
|
for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
|
||
|
it != text_tracks.end();
|
||
|
++it) {
|
||
|
text_track_map_.insert(std::make_pair(it->first, Track(it->first, false)));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::~WebMClusterParser() {}
|
||
|
|
||
|
void WebMClusterParser::Reset() {
|
||
|
last_block_timecode_ = -1;
|
||
|
cluster_timecode_ = -1;
|
||
|
cluster_start_time_ = kNoTimestamp();
|
||
|
cluster_ended_ = false;
|
||
|
parser_.Reset();
|
||
|
audio_.Reset();
|
||
|
video_.Reset();
|
||
|
ResetTextTracks();
|
||
|
}
|
||
|
|
||
|
int WebMClusterParser::Parse(const uint8* buf, int size) {
|
||
|
audio_.Reset();
|
||
|
video_.Reset();
|
||
|
ResetTextTracks();
|
||
|
|
||
|
int result = parser_.Parse(buf, size);
|
||
|
|
||
|
if (result < 0) {
|
||
|
cluster_ended_ = false;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
cluster_ended_ = parser_.IsParsingComplete();
|
||
|
if (cluster_ended_) {
|
||
|
// If there were no buffers in this cluster, set the cluster start time to
|
||
|
// be the |cluster_timecode_|.
|
||
|
if (cluster_start_time_ == kNoTimestamp()) {
|
||
|
DCHECK_GT(cluster_timecode_, -1);
|
||
|
cluster_start_time_ = base::TimeDelta::FromMicroseconds(
|
||
|
cluster_timecode_ * timecode_multiplier_);
|
||
|
}
|
||
|
|
||
|
// Reset the parser if we're done parsing so that
|
||
|
// it is ready to accept another cluster on the next
|
||
|
// call.
|
||
|
parser_.Reset();
|
||
|
|
||
|
last_block_timecode_ = -1;
|
||
|
cluster_timecode_ = -1;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::TextTrackIterator
|
||
|
WebMClusterParser::CreateTextTrackIterator() const {
|
||
|
return TextTrackIterator(text_track_map_);
|
||
|
}
|
||
|
|
||
|
WebMParserClient* WebMClusterParser::OnListStart(int id) {
|
||
|
if (id == kWebMIdCluster) {
|
||
|
cluster_timecode_ = -1;
|
||
|
cluster_start_time_ = kNoTimestamp();
|
||
|
} else if (id == kWebMIdBlockGroup) {
|
||
|
block_data_.reset();
|
||
|
block_data_size_ = -1;
|
||
|
block_duration_ = -1;
|
||
|
} else if (id == kWebMIdBlockAdditions) {
|
||
|
block_add_id_ = -1;
|
||
|
block_additional_data_.reset();
|
||
|
block_additional_data_size_ = -1;
|
||
|
}
|
||
|
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::OnListEnd(int id) {
|
||
|
if (id != kWebMIdBlockGroup)
|
||
|
return true;
|
||
|
|
||
|
// Make sure the BlockGroup actually had a Block.
|
||
|
if (block_data_size_ == -1) {
|
||
|
MEDIA_LOG(log_cb_) << "Block missing from BlockGroup.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool result = ParseBlock(false, block_data_.get(), block_data_size_,
|
||
|
block_additional_data_.get(),
|
||
|
block_additional_data_size_, block_duration_);
|
||
|
block_data_.reset();
|
||
|
block_data_size_ = -1;
|
||
|
block_duration_ = -1;
|
||
|
block_add_id_ = -1;
|
||
|
block_additional_data_.reset();
|
||
|
block_additional_data_size_ = -1;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::OnUInt(int id, int64 val) {
|
||
|
int64* dst;
|
||
|
switch (id) {
|
||
|
case kWebMIdTimecode:
|
||
|
dst = &cluster_timecode_;
|
||
|
break;
|
||
|
case kWebMIdBlockDuration:
|
||
|
dst = &block_duration_;
|
||
|
break;
|
||
|
case kWebMIdBlockAddID:
|
||
|
dst = &block_add_id_;
|
||
|
break;
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
if (*dst != -1)
|
||
|
return false;
|
||
|
*dst = val;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
|
||
|
int size, const uint8* additional,
|
||
|
int additional_size, int duration) {
|
||
|
if (size < 4)
|
||
|
return false;
|
||
|
|
||
|
// Return an error if the trackNum > 127. We just aren't
|
||
|
// going to support large track numbers right now.
|
||
|
if (!(buf[0] & 0x80)) {
|
||
|
MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int track_num = buf[0] & 0x7f;
|
||
|
int timecode = buf[1] << 8 | buf[2];
|
||
|
int flags = buf[3] & 0xff;
|
||
|
int lacing = (flags >> 1) & 0x3;
|
||
|
|
||
|
if (lacing) {
|
||
|
MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Sign extend negative timecode offsets.
|
||
|
if (timecode & 0x8000)
|
||
|
timecode |= ~0xffff;
|
||
|
|
||
|
const uint8* frame_data = buf + 4;
|
||
|
int frame_size = size - (frame_data - buf);
|
||
|
return OnBlock(is_simple_block, track_num, timecode, duration, flags,
|
||
|
frame_data, frame_size, additional, additional_size);
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
|
||
|
switch (id) {
|
||
|
case kWebMIdSimpleBlock:
|
||
|
return ParseBlock(true, data, size, NULL, -1, -1);
|
||
|
|
||
|
case kWebMIdBlock:
|
||
|
if (block_data_) {
|
||
|
MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not "
|
||
|
"supported.";
|
||
|
return false;
|
||
|
}
|
||
|
block_data_.reset(new uint8[size]);
|
||
|
memcpy(block_data_.get(), data, size);
|
||
|
block_data_size_ = size;
|
||
|
return true;
|
||
|
|
||
|
case kWebMIdBlockAdditional: {
|
||
|
uint64 block_add_id = base::HostToNet64(block_add_id_);
|
||
|
if (block_additional_data_) {
|
||
|
// TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
|
||
|
// as per matroska spec. But for now we don't have a use case to
|
||
|
// support parsing of such files. Take a look at this again when such a
|
||
|
// case arises.
|
||
|
MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is "
|
||
|
"not supported.";
|
||
|
return false;
|
||
|
}
|
||
|
// First 8 bytes of side_data in DecoderBuffer is the BlockAddID
|
||
|
// element's value in Big Endian format. This is done to mimic ffmpeg
|
||
|
// demuxer's behavior.
|
||
|
block_additional_data_size_ = size + sizeof(block_add_id);
|
||
|
block_additional_data_.reset(new uint8[block_additional_data_size_]);
|
||
|
memcpy(block_additional_data_.get(), &block_add_id,
|
||
|
sizeof(block_add_id));
|
||
|
memcpy(block_additional_data_.get() + 8, data, size);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
||
|
int timecode,
|
||
|
int block_duration,
|
||
|
int flags,
|
||
|
const uint8* data, int size,
|
||
|
const uint8* additional, int additional_size) {
|
||
|
DCHECK_GE(size, 0);
|
||
|
if (cluster_timecode_ == -1) {
|
||
|
MEDIA_LOG(log_cb_) << "Got a block before cluster timecode.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// TODO(acolwell): Should relative negative timecode offsets be rejected? Or
|
||
|
// only when the absolute timecode is negative? See http://crbug.com/271794
|
||
|
if (timecode < 0) {
|
||
|
MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset "
|
||
|
<< timecode;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
|
||
|
MEDIA_LOG(log_cb_)
|
||
|
<< "Got a block with a timecode before the previous block.";
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
Track* track = NULL;
|
||
|
std::string encryption_key_id;
|
||
|
if (track_num == audio_.track_num()) {
|
||
|
track = &audio_;
|
||
|
encryption_key_id = audio_encryption_key_id_;
|
||
|
} else if (track_num == video_.track_num()) {
|
||
|
track = &video_;
|
||
|
encryption_key_id = video_encryption_key_id_;
|
||
|
} else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
|
||
|
return true;
|
||
|
} else if (Track* const text_track = FindTextTrack(track_num)) {
|
||
|
if (is_simple_block) // BlockGroup is required for WebVTT cues
|
||
|
return false;
|
||
|
if (block_duration < 0) // not specified
|
||
|
return false;
|
||
|
track = text_track;
|
||
|
} else {
|
||
|
MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
last_block_timecode_ = timecode;
|
||
|
|
||
|
base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
|
||
|
(cluster_timecode_ + timecode) * timecode_multiplier_);
|
||
|
|
||
|
// The first bit of the flags is set when a SimpleBlock contains only
|
||
|
// keyframes. If this is a Block, then inspection of the payload is
|
||
|
// necessary to determine whether it contains a keyframe or not.
|
||
|
// http://www.matroska.org/technical/specs/index.html
|
||
|
bool is_keyframe =
|
||
|
is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
|
||
|
|
||
|
scoped_refptr<StreamParserBuffer> buffer =
|
||
|
StreamParserBuffer::CopyFrom(data, size, additional, additional_size,
|
||
|
is_keyframe);
|
||
|
|
||
|
// Every encrypted Block has a signal byte and IV prepended to it. Current
|
||
|
// encrypted WebM request for comments specification is here
|
||
|
// http://wiki.webmproject.org/encryption/webm-encryption-rfc
|
||
|
if (!encryption_key_id.empty()) {
|
||
|
scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(
|
||
|
data, size,
|
||
|
reinterpret_cast<const uint8*>(encryption_key_id.data()),
|
||
|
encryption_key_id.size()));
|
||
|
if (!config)
|
||
|
return false;
|
||
|
buffer->set_decrypt_config(config.Pass());
|
||
|
}
|
||
|
|
||
|
buffer->set_timestamp(timestamp);
|
||
|
if (cluster_start_time_ == kNoTimestamp())
|
||
|
cluster_start_time_ = timestamp;
|
||
|
|
||
|
if (block_duration >= 0) {
|
||
|
buffer->set_duration(base::TimeDelta::FromMicroseconds(
|
||
|
block_duration * timecode_multiplier_));
|
||
|
}
|
||
|
|
||
|
return track->AddBuffer(buffer);
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::Track::Track(int track_num, bool is_video)
|
||
|
: track_num_(track_num),
|
||
|
is_video_(is_video) {
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::Track::~Track() {}
|
||
|
|
||
|
bool WebMClusterParser::Track::AddBuffer(
|
||
|
const scoped_refptr<StreamParserBuffer>& buffer) {
|
||
|
DVLOG(2) << "AddBuffer() : " << track_num_
|
||
|
<< " ts " << buffer->timestamp().InSecondsF()
|
||
|
<< " dur " << buffer->duration().InSecondsF()
|
||
|
<< " kf " << buffer->IsKeyframe()
|
||
|
<< " size " << buffer->data_size();
|
||
|
|
||
|
buffers_.push_back(buffer);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void WebMClusterParser::Track::Reset() {
|
||
|
buffers_.clear();
|
||
|
}
|
||
|
|
||
|
bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
|
||
|
// For now, assume that all blocks are keyframes for datatypes other than
|
||
|
// video. This is a valid assumption for Vorbis, WebVTT, & Opus.
|
||
|
if (!is_video_)
|
||
|
return true;
|
||
|
|
||
|
// Make sure the block is big enough for the minimal keyframe header size.
|
||
|
if (size < 7)
|
||
|
return false;
|
||
|
|
||
|
// The LSb of the first byte must be a 0 for a keyframe.
|
||
|
// http://tools.ietf.org/html/rfc6386 Section 19.1
|
||
|
if ((data[0] & 0x01) != 0)
|
||
|
return false;
|
||
|
|
||
|
// Verify VP8 keyframe startcode.
|
||
|
// http://tools.ietf.org/html/rfc6386 Section 19.1
|
||
|
if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
|
||
|
return false;
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void WebMClusterParser::ResetTextTracks() {
|
||
|
for (TextTrackMap::iterator it = text_track_map_.begin();
|
||
|
it != text_track_map_.end();
|
||
|
++it) {
|
||
|
it->second.Reset();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
WebMClusterParser::Track*
|
||
|
WebMClusterParser::FindTextTrack(int track_num) {
|
||
|
const TextTrackMap::iterator it = text_track_map_.find(track_num);
|
||
|
|
||
|
if (it == text_track_map_.end())
|
||
|
return NULL;
|
||
|
|
||
|
return &it->second;
|
||
|
}
|
||
|
|
||
|
} // namespace media
|