VP9 codec string from bitstream and subsample encryption support

- Parse vp9 bitstream to get vpx codec configuration
- Add subsample encryption for vp9
- Also fixed a bug in VP9 parser if segmentation update_map is enabled

Change-Id: I69dc97088aa38c94c6d37fdbcf3d9cfc942a3df6
This commit is contained in:
KongQun Yang 2015-11-18 11:51:15 -08:00 committed by Gerrit Code Review
parent 9c95309c12
commit 94401d750a
18 changed files with 423 additions and 163 deletions

View File

@ -71,11 +71,11 @@ class AudioStreamInfo : public StreamInfo {
return static_cast<uint32_t>(num_channels_) * sample_bits_ / 8;
}
void set_codec(AudioCodec codec) { codec_ = codec; }
void set_sampling_frequency(const uint32_t sampling_frequency) {
sampling_frequency_ = sampling_frequency;
}
/// @param audio_object_type is only used by AAC Codec, ignored otherwise.
/// @return The codec string.
static std::string GetCodecString(AudioCodec codec,

View File

@ -10,7 +10,10 @@ namespace edash_packager {
namespace media {
BitReader::BitReader(const uint8_t* data, off_t size)
: data_(data), bytes_left_(size), num_remaining_bits_in_curr_byte_(0) {
: data_(data),
initial_size_(size),
bytes_left_(size),
num_remaining_bits_in_curr_byte_(0) {
DCHECK(data_ != NULL && bytes_left_ > 0);
UpdateCurrByte();
@ -50,10 +53,6 @@ bool BitReader::SkipBits(int num_bits) {
return ReadBitsInternal(num_bits, &not_needed);
}
int BitReader::bits_available() const {
return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_;
}
bool BitReader::ReadBitsInternal(int num_bits, uint64_t* out) {
DCHECK_LE(num_bits, 64);

View File

@ -49,7 +49,12 @@ class BitReader {
bool SkipBits(int num_bits);
/// @return The number of bits available for reading.
int bits_available() const;
int bits_available() const {
return 8 * bytes_left_ + num_remaining_bits_in_curr_byte_;
}
/// @return The current bit position.
int bit_position() const { return 8 * initial_size_ - bits_available(); }
private:
// Help function used by ReadBits to avoid inlining the bit reading logic.
@ -63,6 +68,10 @@ class BitReader {
// Pointer to the next unread (not in curr_byte_) byte in the stream.
const uint8_t* data_;
// Initial size of the input data.
// TODO(kqyang): Use size_t instead of off_t instead.
off_t initial_size_;
// Bytes left in the stream (without the curr_byte_).
off_t bytes_left_;

View File

@ -67,6 +67,7 @@ class VideoStreamInfo : public StreamInfo {
uint8_t nalu_length_size() const { return nalu_length_size_; }
int16_t trick_play_rate() const { return trick_play_rate_; }
void set_codec(VideoCodec codec) { codec_ = codec; }
void set_width(uint32_t width) { width_ = width; }
void set_height(uint32_t height) { height_ = height; }
void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; }

View File

@ -184,6 +184,10 @@ VPCodecConfiguration::ColorSpace GetColorSpace(uint8_t color_space) {
return VPCodecConfiguration::COLOR_SPACE_BT_601;
case VPX_COLOR_SPACE_BT_709:
return VPCodecConfiguration::COLOR_SPACE_BT_709;
case VPX_COLOR_SPACE_SMPTE_170:
return VPCodecConfiguration::COLOR_SPACE_SMPTE_170;
case VPX_COLOR_SPACE_SMPTE_240:
return VPCodecConfiguration::COLOR_SPACE_SMPTE_240;
case VPX_COLOR_SPACE_BT_2020:
// VP9 does not specify if it is in the form of “constant luminance” or
// “non-constant luminance”. As such, application should rely on the
@ -368,7 +372,7 @@ bool ReadSegmentation(VP9BitReader* reader) {
bool update_map;
RCHECK(reader->ReadBits(1, &update_map));
if (update_map) {
for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) {
for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i)
RCHECK(reader->SkipBitsConditional(8));
bool temporal_update;
@ -378,7 +382,6 @@ bool ReadSegmentation(VP9BitReader* reader) {
RCHECK(reader->SkipBitsConditional(8));
}
}
}
bool update_data;
RCHECK(reader->ReadBits(1, &update_data));
@ -449,23 +452,23 @@ bool VP9Parser::Parse(const uint8_t* data,
// End of current frame data. There should be no more bytes available.
RCHECK(reader.bits_available() < 8);
vpx_frame.is_key_frame = false;
vpx_frame.is_keyframe = false;
vpx_frame.uncompressed_header_size = vpx_frame.frame_size;
vpx_frame.width = width_;
vpx_frame.height = height_;
continue;
}
bool is_inter_frame;
RCHECK(reader.ReadBits(1, &is_inter_frame));
vpx_frame.is_key_frame = !is_inter_frame;
bool is_interframe;
RCHECK(reader.ReadBits(1, &is_interframe));
vpx_frame.is_keyframe = !is_interframe;
bool show_frame;
RCHECK(reader.ReadBits(1, &show_frame));
bool error_resilient_mode;
RCHECK(reader.ReadBits(1, &error_resilient_mode));
if (vpx_frame.is_key_frame) {
if (vpx_frame.is_keyframe) {
RCHECK(ReadSyncCode(&reader));
RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_));
RCHECK(ReadFrameSizes(&reader, &width_, &height_));
@ -516,8 +519,7 @@ bool VP9Parser::Parse(const uint8_t* data,
}
RCHECK(reader.SkipBits(FRAME_CONTEXTS_LOG2)); // frame_context_idx
VLOG(4) << "bit offset: "
<< vpx_frame.frame_size * 8 - reader.bits_available();
VLOG(4) << "Bits read before ReadLoopFilter: " << reader.bit_position();
RCHECK(ReadLoopFilter(&reader));
RCHECK(ReadQuantization(&reader));
RCHECK(ReadSegmentation(&reader));
@ -532,8 +534,7 @@ bool VP9Parser::Parse(const uint8_t* data,
VLOG(3) << "\n frame_size: " << vpx_frame.frame_size
<< "\n header_size: " << vpx_frame.uncompressed_header_size
<< "\n bits_read: "
<< vpx_frame.frame_size * 8 - reader.bits_available()
<< "\n Bits read: " << reader.bit_position()
<< "\n first_partition_size: " << first_partition_size;
RCHECK(first_partition_size > 0);
@ -544,5 +545,30 @@ bool VP9Parser::Parse(const uint8_t* data,
return true;
}
bool VP9Parser::IsKeyframe(const uint8_t* data, size_t data_size) {
VP9BitReader reader(data, data_size);
uint8_t frame_marker;
RCHECK(reader.ReadBits(2, &frame_marker));
RCHECK(frame_marker == VP9_FRAME_MARKER);
VPCodecConfiguration codec_config;
RCHECK(ReadProfile(&reader, &codec_config));
bool show_existing_frame;
RCHECK(reader.ReadBits(1, &show_existing_frame));
if (show_existing_frame)
return false;
bool is_interframe;
RCHECK(reader.ReadBits(1, &is_interframe));
if (is_interframe)
return false;
RCHECK(reader.SkipBits(2)); // show_frame, error_resilient_mode.
RCHECK(ReadSyncCode(&reader));
return true;
}
} // namespace media
} // namespace edash_packager

View File

@ -11,7 +11,6 @@
#include <stdlib.h>
#include "packager/base/macros.h"
#include "packager/base/memory/scoped_ptr.h"
#include "packager/media/filters/vp_codec_configuration.h"
namespace edash_packager {
@ -20,7 +19,7 @@ namespace media {
struct VPxFrameInfo {
size_t frame_size;
size_t uncompressed_header_size;
bool is_key_frame;
bool is_keyframe;
uint32_t width;
uint32_t height;
};
@ -42,9 +41,16 @@ class VP9Parser {
std::vector<VPxFrameInfo>* vpx_frames);
/// @return VPx codec configuration extracted. Note that it is only valid
/// after parsing a key frame or intra frame successfully.
/// after parsing a keyframe or intra frame successfully.
const VPCodecConfiguration& codec_config() { return codec_config_; }
/// A convenient utility function to check whether the frame is a keyframe.
/// Note that this function does not do a full parse of the frame header, so
/// should be more efficient than Parse().
/// @param data_size Size of the sample in bytes.
/// @return true if it is, false if it is not or if there is parsing error.
static bool IsKeyframe(const uint8_t* data, size_t data_size);
private:
// Keep track of the current width and height. Note that they may change from
// frame to frame.

View File

@ -17,16 +17,16 @@ namespace {
MATCHER_P5(EqualVPxFrame,
frame_size,
uncompressed_header_size,
is_key_frame,
is_keyframe,
width,
height,
"") {
*result_listener << "which is (" << arg.frame_size << ", "
<< arg.uncompressed_header_size << ", " << arg.is_key_frame
<< arg.uncompressed_header_size << ", " << arg.is_keyframe
<< ", " << arg.width << ", " << arg.height << ").";
return arg.frame_size == frame_size &&
arg.uncompressed_header_size == uncompressed_header_size &&
arg.is_key_frame == is_key_frame && arg.width == width &&
arg.is_keyframe == is_keyframe && arg.width == width &&
arg.height == height;
}
} // namespace
@ -47,6 +47,8 @@ TEST(VP9ParserTest, Superframe) {
0xc9, 0x3c, 0x00, 0x48, 0x00, 0xc9,
};
EXPECT_FALSE(VP9Parser::IsKeyframe(data, arraysize(data)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(data, arraysize(data), &frames));
@ -69,6 +71,8 @@ TEST(VP9ParserTest, KeyframeChroma420) {
0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b,
};
EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -89,6 +93,8 @@ TEST(VP9ParserTest, KeyframeProfile1Chroma422) {
0xa0, 0x96, 0xa7, 0xb8, 0xf4, 0xb4, 0x65, 0xff,
};
EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -109,6 +115,8 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) {
0xa4, 0xdf, 0x05, 0xaf, 0x6f, 0xff, 0xd1, 0x74,
};
EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -119,7 +127,7 @@ TEST(VP9ParserTest, KeyframeProfile2Chroma420) {
}
TEST(VP9ParserTest, KeyframeProfile3Chroma444) {
uint8_t kData[] = {
const uint8_t kData[] = {
0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1,
0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67,
0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80,
@ -129,6 +137,8 @@ TEST(VP9ParserTest, KeyframeProfile3Chroma444) {
0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f,
};
EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -146,8 +156,11 @@ TEST(VP9ParserTest, Intra) {
0xe2, 0xbd, 0x53, 0xd9, 0x00, 0x3a, 0x70, 0xe0, 0x00, 0x78, 0xea, 0xa5,
0x61, 0x08, 0xb7, 0x9f, 0x33, 0xe5, 0xf8, 0xa5, 0x82, 0x32, 0xbb, 0xa3,
0x75, 0xb4, 0x60, 0xf3, 0x39, 0x75, 0x1f, 0x2b,
};
EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -159,6 +172,7 @@ TEST(VP9ParserTest, Intra) {
TEST(VP9ParserTest, ShowExisting) {
const uint8_t kData[] = {0x88};
EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -177,6 +191,8 @@ TEST(VP9ParserTest, Interframe) {
0x90, 0xeb, 0x8c, 0xad, 0x5f, 0x69, 0xb7, 0x9b,
};
EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames));
@ -186,6 +202,7 @@ TEST(VP9ParserTest, Interframe) {
TEST(VP9ParserTest, CorruptedFrameMarker) {
const uint8_t kData[] = {0xc8};
EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames));
@ -202,6 +219,8 @@ TEST(VP9ParserTest, CorruptedSynccode) {
0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b,
};
EXPECT_FALSE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames));
@ -218,6 +237,10 @@ TEST(VP9ParserTest, NotEnoughBytesForFirstPartitionSize) {
0x07, 0xf4, 0x7f, 0xc7, 0xff, 0x6d, 0xff, 0xeb,
};
// IsKeyframe only parses the bytes that is necessary to determine whether it
// is a keyframe.
EXPECT_TRUE(VP9Parser::IsKeyframe(kData, arraysize(kData)));
VP9Parser parser;
std::vector<VPxFrameInfo> frames;
EXPECT_FALSE(parser.Parse(kData, arraysize(kData), &frames));

View File

@ -24,9 +24,11 @@ class VPCodecConfiguration {
COLOR_SPACE_UNSPECIFIED = 0,
COLOR_SPACE_BT_601 = 1,
COLOR_SPACE_BT_709 = 2,
COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 3,
COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 4,
COLOR_SPACE_SRGB = 5,
COLOR_SPACE_SMPTE_170 = 3,
COLOR_SPACE_SMPTE_240 = 4,
COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 5,
COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 6,
COLOR_SPACE_SRGB = 7,
};
enum ChromaSubsampling {
@ -91,7 +93,9 @@ class VPCodecConfiguration {
bool video_full_range_flag_;
std::vector<uint8_t> codec_initialization_data_;
DISALLOW_COPY_AND_ASSIGN(VPCodecConfiguration);
// Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler
// generated copy constructor and assignment operator. Since the internal data
// is small, the performance impact is minimal.
};
} // namespace media

View File

@ -10,6 +10,7 @@
#include "packager/media/base/buffer_reader.h"
#include "packager/media/base/key_source.h"
#include "packager/media/base/media_sample.h"
#include "packager/media/filters/vp9_parser.h"
#include "packager/media/formats/mp4/box_definitions.h"
#include "packager/media/formats/mp4/cenc.h"
@ -26,15 +27,19 @@ EncryptingFragmenter::EncryptingFragmenter(
TrackFragment* traf,
scoped_ptr<EncryptionKey> encryption_key,
int64_t clear_time,
VideoCodec video_codec,
uint8_t nalu_length_size)
: Fragmenter(traf),
encryption_key_(encryption_key.Pass()),
video_codec_(video_codec),
nalu_length_size_(nalu_length_size),
clear_time_(clear_time) {
DCHECK(encryption_key_);
if (video_codec == kCodecVP9)
vp9_parser_.reset(new VP9Parser);
}
EncryptingFragmenter::~EncryptingFragmenter() {}
EncryptingFragmenter::~EncryptingFragmenter() {}
Status EncryptingFragmenter::AddSample(scoped_refptr<MediaSample> sample) {
DCHECK(sample);
@ -134,8 +139,23 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr<MediaSample> sample) {
FrameCENCInfo cenc_info(encryptor_->iv());
uint8_t* data = sample->writable_data();
if (!IsSubsampleEncryptionRequired()) {
EncryptBytes(data, sample->data_size());
if (IsSubsampleEncryptionRequired()) {
if (video_codec_ == kCodecVP9) {
std::vector<VPxFrameInfo> vpx_frames;
if (!vp9_parser_->Parse(sample->data(), sample->data_size(),
&vpx_frames)) {
return Status(error::MUXER_FAILURE, "Failed to parse vp9 frame.");
}
for (const VPxFrameInfo& frame : vpx_frames) {
SubsampleEntry subsample;
subsample.clear_bytes = frame.uncompressed_header_size;
subsample.cipher_bytes =
frame.frame_size - frame.uncompressed_header_size;
cenc_info.AddSubsample(subsample);
if (subsample.cipher_bytes > 0)
EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes);
data += frame.frame_size;
}
} else {
BufferReader reader(data, sample->data_size());
while (reader.HasBytes(1)) {
@ -155,9 +175,12 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr<MediaSample> sample) {
cenc_info.AddSubsample(subsample);
data += nalu_length_size_ + nalu_length;
}
}
// The length of per-sample auxiliary datum, defined in CENC ch. 7.
traf()->auxiliary_size.sample_info_sizes.push_back(cenc_info.ComputeSize());
} else {
EncryptBytes(data, sample->data_size());
}
cenc_info.Write(aux_data());

View File

@ -7,6 +7,8 @@
#ifndef MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_
#define MEDIA_FORMATS_MP4_ENCRYPTING_FRAGMENTER_H_
#include "packager/base/memory/scoped_ptr.h"
#include "packager/media/filters/vp9_parser.h"
#include "packager/media/formats/mp4/fragmenter.h"
namespace edash_packager {
@ -24,11 +26,15 @@ class EncryptingFragmenter : public Fragmenter {
/// @param encryption_key contains the encryption parameters.
/// @param clear_time specifies clear lead duration in units of the current
/// track's timescale.
/// @param video_codec specifies the codec if input is a video stream; it
/// should be set to kUnknownVideoCodec for audio stream. This
/// parameter is used for proper subsample encryption.
/// @param nalu_length_size specifies the size of NAL unit length, in bytes,
/// for subsample encryption.
EncryptingFragmenter(TrackFragment* traf,
scoped_ptr<EncryptionKey> encryption_key,
int64_t clear_time,
VideoCodec video_codec,
uint8_t nalu_length_size);
~EncryptingFragmenter() override;
@ -64,16 +70,23 @@ class EncryptingFragmenter : public Fragmenter {
Status EncryptSample(scoped_refptr<MediaSample> sample);
// Should we enable subsample encryption?
bool IsSubsampleEncryptionRequired() { return nalu_length_size_ != 0; }
bool IsSubsampleEncryptionRequired() {
return video_codec_ == kCodecVP9 || nalu_length_size_ != 0;
}
scoped_ptr<EncryptionKey> encryption_key_;
scoped_ptr<AesCtrEncryptor> encryptor_;
// For VP8/VP9, uncompressed_header should not be encrypted; for AVC/HEVC,
// the size and type NAL units should not be encrypted.
VideoCodec video_codec_;
// If this stream contains AVC, subsample encryption specifies that the size
// and type of NAL units remain unencrypted. This field specifies the size of
// the size field. Can be 1, 2 or 4 bytes.
const uint8_t nalu_length_size_;
int64_t clear_time_;
scoped_ptr<VP9Parser> vp9_parser_;
DISALLOW_COPY_AND_ASSIGN(EncryptingFragmenter);
};

View File

@ -23,11 +23,13 @@ KeyRotationFragmenter::KeyRotationFragmenter(MovieFragment* moof,
KeySource::TrackType track_type,
int64_t crypto_period_duration,
int64_t clear_time,
VideoCodec video_codec,
uint8_t nalu_length_size,
MuxerListener* muxer_listener)
: EncryptingFragmenter(traf,
scoped_ptr<EncryptionKey>(new EncryptionKey()),
clear_time,
video_codec,
nalu_length_size),
moof_(moof),
encryption_key_source_(encryption_key_source),

View File

@ -31,6 +31,9 @@ class KeyRotationFragmenter : public EncryptingFragmenter {
/// of the current track's timescale.
/// @param clear_time specifies clear lead duration in units of the current
/// track's timescale.
/// @param video_codec specifies the codec if input is a video stream; it
/// should be set to kUnknownVideoCodec for audio stream. This
/// parameter is used for proper subsample encryption.
/// @param nalu_length_size NAL unit length size, in bytes, for subsample
/// encryption.
/// @param muxer_listener is a pointer to MuxerListener for notifying
@ -41,6 +44,7 @@ class KeyRotationFragmenter : public EncryptingFragmenter {
KeySource::TrackType track_type,
int64_t crypto_period_duration,
int64_t clear_time,
VideoCodec video_codec,
uint8_t nalu_length_size,
MuxerListener* muxer_listener);
~KeyRotationFragmenter() override;

View File

@ -89,6 +89,14 @@ void GenerateEncryptedSampleEntry(const EncryptionKey& encryption_key,
}
}
VideoCodec GetVideoCodec(const StreamInfo& stream_info) {
if (stream_info.stream_type() != kStreamVideo)
return kUnknownVideoCodec;
const VideoStreamInfo& video_stream_info =
static_cast<const VideoStreamInfo&>(stream_info);
return video_stream_info.codec();
}
uint8_t GetNaluLengthSize(const StreamInfo& stream_info) {
if (stream_info.stream_type() != kStreamVideo)
return 0;
@ -160,6 +168,7 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
continue;
}
VideoCodec video_codec = GetVideoCodec(*streams[i]->info());
uint8_t nalu_length_size = GetNaluLengthSize(*streams[i]->info());
KeySource::TrackType track_type =
GetTrackTypeForEncryption(*streams[i]->info(), max_sd_pixels);
@ -182,14 +191,10 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
}
fragmenters_[i] = new KeyRotationFragmenter(
moof_.get(),
&moof_->tracks[i],
encryption_key_source,
track_type,
moof_.get(), &moof_->tracks[i], encryption_key_source, track_type,
crypto_period_duration_in_seconds * streams[i]->info()->time_scale(),
clear_lead_in_seconds * streams[i]->info()->time_scale(),
nalu_length_size,
muxer_listener_);
clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec,
nalu_length_size, muxer_listener_);
continue;
}
@ -217,9 +222,8 @@ Status Segmenter::Initialize(const std::vector<MediaStream*>& streams,
}
fragmenters_[i] = new EncryptingFragmenter(
&moof_->tracks[i],
encryption_key.Pass(),
clear_lead_in_seconds * streams[i]->info()->time_scale(),
&moof_->tracks[i], encryption_key.Pass(),
clear_lead_in_seconds * streams[i]->info()->time_scale(), video_codec,
nalu_length_size);
}

View File

@ -10,6 +10,7 @@
#include "packager/base/sys_byteorder.h"
#include "packager/media/base/decrypt_config.h"
#include "packager/media/base/timestamp.h"
#include "packager/media/filters/vp9_parser.h"
#include "packager/media/filters/webvtt_util.h"
#include "packager/media/formats/webm/webm_constants.h"
#include "packager/media/formats/webm/webm_crypto_helpers.h"
@ -30,17 +31,11 @@
"may be suppressed): " \
: "")
namespace {
const int64_t kMicrosecondsPerMillisecond = 1000;
} // namespace
namespace edash_packager {
namespace media {
namespace {
const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
const int64_t kMicrosecondsPerMillisecond = 1000;
enum {
// Limits the number of LOG() calls in the path of reading encoded
@ -51,27 +46,78 @@ enum {
kMaxDurationEstimateLogs = 10,
};
// Helper function used to inspect block data to determine if the
// block is a keyframe.
// |data| contains the bytes in the block.
// |size| indicates the number of bytes in |data|.
bool IsKeyframe(bool is_video,
VideoCodec codec,
const uint8_t* data,
int size) {
// For now, assume that all blocks are keyframes for datatypes other than
// video. This is a valid assumption for Vorbis, WebVTT, & Opus.
if (!is_video)
return true;
if (codec == kCodecVP9)
return VP9Parser::IsKeyframe(data, size);
CHECK_EQ(kCodecVP8, codec);
// Make sure the block is big enough for the minimal keyframe header size.
if (size < 7)
return false;
// The LSb of the first byte must be a 0 for a keyframe.
// http://tools.ietf.org/html/rfc6386 Section 19.1
if ((data[0] & 0x01) != 0)
return false;
// Verify VP8 keyframe startcode.
// http://tools.ietf.org/html/rfc6386 Section 19.1
if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
return false;
return true;
}
} // namespace
const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
WebMClusterParser::WebMClusterParser(
int64_t timecode_scale,
int audio_track_num,
scoped_refptr<AudioStreamInfo> audio_stream_info,
scoped_refptr<VideoStreamInfo> video_stream_info,
int64_t audio_default_duration,
int video_track_num,
int64_t video_default_duration,
const WebMTracksParser::TextTracks& text_tracks,
const std::set<int64_t>& ignored_tracks,
const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id,
const AudioCodec audio_codec,
const MediaParser::NewSampleCB& new_sample_cb)
const MediaParser::NewSampleCB& new_sample_cb,
const MediaParser::InitCB& init_cb)
: timecode_multiplier_(timecode_scale / 1000.0),
audio_stream_info_(audio_stream_info),
video_stream_info_(video_stream_info),
ignored_tracks_(ignored_tracks),
audio_encryption_key_id_(audio_encryption_key_id),
video_encryption_key_id_(video_encryption_key_id),
audio_codec_(audio_codec),
parser_(kWebMIdCluster, this),
initialized_(false),
init_cb_(init_cb),
cluster_start_time_(kNoTimestamp),
audio_(audio_track_num, false, audio_default_duration, new_sample_cb),
video_(video_track_num, true, video_default_duration, new_sample_cb) {
audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
false,
audio_default_duration,
new_sample_cb),
video_(video_stream_info ? video_stream_info->track_id() : -1,
true,
video_default_duration,
new_sample_cb) {
for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
it != text_tracks.end();
++it) {
@ -143,7 +189,8 @@ int64_t WebMClusterParser::TryGetEncodedAudioDuration(
// TODO: Consider parsing "Signal Byte" for encrypted streams to return
// duration for any unencrypted blocks.
if (audio_codec_ == kCodecOpus) {
DCHECK(audio_stream_info_);
if (audio_stream_info_->codec() == kCodecOpus) {
return ReadOpusDuration(data, size);
}
@ -450,7 +497,12 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
// necessary to determine whether it contains a keyframe or not.
// http://www.matroska.org/technical/specs/index.html
bool is_keyframe =
is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
is_simple_block
? (flags & 0x80) != 0
: IsKeyframe(stream_type == kStreamVideo,
video_stream_info_ ? video_stream_info_->codec()
: kUnknownVideoCodec,
data, size);
// Every encrypted Block has a signal byte and IV prepended to it. Current
// encrypted WebM request for comments specification is here
@ -531,6 +583,44 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
buffer->set_duration(track->default_duration());
}
if (!initialized_) {
std::vector<scoped_refptr<StreamInfo>> streams;
if (audio_stream_info_)
streams.push_back(audio_stream_info_);
if (video_stream_info_) {
if (stream_type == kStreamVideo) {
VPCodecConfiguration codec_config;
if (video_stream_info_->codec() == kCodecVP9) {
VP9Parser vp9_parser;
std::vector<VPxFrameInfo> vpx_frames;
if (!vp9_parser.Parse(buffer->data(), buffer->data_size(),
&vpx_frames)) {
LOG(ERROR) << "Failed to parse vp9 frame.";
return false;
}
if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
LOG(ERROR) << "The first frame should be a key frame.";
return false;
}
codec_config = vp9_parser.codec_config();
}
// TODO(kqyang): Support VP8.
video_stream_info_->set_codec_string(
codec_config.GetCodecString(video_stream_info_->codec()));
std::vector<uint8_t> extra_data;
codec_config.Write(&extra_data);
video_stream_info_->set_extra_data(extra_data);
streams.push_back(video_stream_info_);
init_cb_.Run(streams);
initialized_ = true;
}
} else {
init_cb_.Run(streams);
initialized_ = true;
}
}
return track->EmitBuffer(buffer);
}
@ -614,28 +704,6 @@ void WebMClusterParser::Track::Reset() {
last_added_buffer_missing_duration_ = NULL;
}
bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
// For now, assume that all blocks are keyframes for datatypes other than
// video. This is a valid assumption for Vorbis, WebVTT, & Opus.
if (!is_video_)
return true;
// Make sure the block is big enough for the minimal keyframe header size.
if (size < 7)
return false;
// The LSb of the first byte must be a 0 for a keyframe.
// http://tools.ietf.org/html/rfc6386 Section 19.1
if ((data[0] & 0x01) != 0)
return false;
// Verify VP8 keyframe startcode.
// http://tools.ietf.org/html/rfc6386 Section 19.1
if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
return false;
return true;
}
bool WebMClusterParser::Track::EmitBufferHelp(
const scoped_refptr<MediaSample>& buffer) {

View File

@ -67,12 +67,6 @@ class WebMClusterParser : public WebMParserClient {
// was missing duration.
void Reset();
// Helper function used to inspect block data to determine if the
// block is a keyframe.
// |data| contains the bytes in the block.
// |size| indicates the number of bytes in |data|.
bool IsKeyframe(const uint8_t* data, int size) const;
int64_t default_duration() const { return default_duration_; }
private:
@ -113,16 +107,16 @@ class WebMClusterParser : public WebMParserClient {
public:
WebMClusterParser(int64_t timecode_scale,
int audio_track_num,
scoped_refptr<AudioStreamInfo> audio_stream_info,
scoped_refptr<VideoStreamInfo> video_stream_info,
int64_t audio_default_duration,
int video_track_num,
int64_t video_default_duration,
const WebMTracksParser::TextTracks& text_tracks,
const std::set<int64_t>& ignored_tracks,
const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id,
const AudioCodec audio_codec,
const MediaParser::NewSampleCB& new_sample_cb);
const MediaParser::NewSampleCB& new_sample_cb,
const MediaParser::InitCB& init_cb);
~WebMClusterParser() override;
/// Resets the parser state so it can accept a new cluster.
@ -191,13 +185,20 @@ class WebMClusterParser : public WebMParserClient {
double timecode_multiplier_; // Multiplier used to convert timecodes into
// microseconds.
scoped_refptr<AudioStreamInfo> audio_stream_info_;
scoped_refptr<VideoStreamInfo> video_stream_info_;
std::set<int64_t> ignored_tracks_;
std::string audio_encryption_key_id_;
std::string video_encryption_key_id_;
const AudioCodec audio_codec_;
WebMListParser parser_;
// Indicates whether init_cb has been executed. |init_cb| is executed when we
// have codec configuration of video stream, which is extracted from the first
// video sample.
bool initialized_;
MediaParser::InitCB init_cb_;
int64_t last_block_timecode_ = -1;
scoped_ptr<uint8_t[]> block_data_;
int block_data_size_ = -1;

View File

@ -28,10 +28,6 @@ using ::testing::StrictMock;
using ::testing::Mock;
using ::testing::_;
namespace {
const int64_t kMicrosecondsPerMillisecond = 1000;
} // namespace
namespace edash_packager {
namespace media {
@ -67,6 +63,7 @@ MATCHER_P2(WebMBlockDurationMismatchesOpusDuration,
namespace {
const int64_t kMicrosecondsPerMillisecond = 1000;
// Timecode scale for millisecond timestamps.
const int kTimecodeScale = 1000000;
@ -76,6 +73,23 @@ const int kTextTrackNum = 3;
const int kTestAudioFrameDefaultDurationInMs = 13;
const int kTestVideoFrameDefaultDurationInMs = 17;
// Constants for AudioStreamInfo and VideoStreamInfo. Most are not used.
const uint32_t kTimeScale = 1000000u;
const uint64_t kDuration = 10000000u;
const char kCodecString[] = "codec_string";
const char kLanguage[] = "eng";
const uint8_t kBitsPerSample = 8u;
const uint8_t kNumChannels = 2u;
const uint32_t kSamplingFrequency = 48000u;
const size_t kExtraDataSize = 0u;
const bool kEncrypted = true;
const uint16_t kWidth = 320u;
const uint16_t kHeight = 180u;
const uint32_t kPixelWidth = 1u;
const uint32_t kPixelHeight = 1u;
const int16_t kTrickPlayRate = 0u;
const uint8_t kNaluLengthSize = 0u;
// Test duration defaults must differ from parser estimation defaults to know
// which durations parser used when emitting buffers.
static_assert(
@ -125,6 +139,16 @@ const uint8_t kEncryptedFrame[] = {
0x01,
};
const uint8_t kVP9Frame[] = {
0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1,
0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67,
0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80,
0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, 0x23, 0x9d, 0x6e, 0x5f,
0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, 0x31, 0x07, 0xab, 0xc7,
0x0c, 0x74, 0x48, 0x8b, 0x95, 0x30, 0xc9, 0xf0, 0x37, 0x3b, 0xe6, 0x11,
0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f,
};
scoped_ptr<Cluster> CreateCluster(int timecode,
const BlockInfo* block_info,
int block_count) {
@ -178,6 +202,14 @@ scoped_ptr<Cluster> CreateEncryptedCluster(int bytes_to_write) {
return cb.Finish();
}
// Creates a Cluster with one vp9 frame (keyframe).
scoped_ptr<Cluster> CreateVP9Cluster() {
ClusterBuilder cb;
cb.SetClusterTimecode(0);
cb.AddSimpleBlock(kVideoTrackNum, 0, 0, kVP9Frame, arraysize(kVP9Frame));
return cb.Finish();
}
bool VerifyBuffersHelper(const BufferQueue& audio_buffers,
const BufferQueue& video_buffers,
const BufferQueue& text_buffers,
@ -268,7 +300,35 @@ void VerifyEncryptedBuffer(scoped_refptr<MediaSample> buffer) {
class WebMClusterParserTest : public testing::Test {
public:
WebMClusterParserTest() : parser_(CreateDefaultParser()) {}
WebMClusterParserTest()
: audio_stream_info_(new AudioStreamInfo(kAudioTrackNum,
kTimeScale,
kDuration,
kUnknownAudioCodec,
kCodecString,
kLanguage,
kBitsPerSample,
kNumChannels,
kSamplingFrequency,
NULL,
kExtraDataSize,
!kEncrypted)),
video_stream_info_(new VideoStreamInfo(kVideoTrackNum,
kTimeScale,
kDuration,
kCodecVP8,
kCodecString,
kLanguage,
kWidth,
kHeight,
kPixelWidth,
kPixelHeight,
kTrickPlayRate,
kNaluLengthSize,
NULL,
kExtraDataSize,
!kEncrypted)),
parser_(CreateDefaultParser()) {}
protected:
void ResetParserToHaveDefaultDurations() {
@ -285,6 +345,10 @@ class WebMClusterParserTest : public testing::Test {
default_audio_duration, default_video_duration));
}
void InitEvent(const std::vector<scoped_refptr<StreamInfo>>& stream_info) {
streams_from_init_event_ = stream_info;
}
bool NewSampleEvent(uint32_t track_id,
const scoped_refptr<MediaSample>& sample) {
switch (track_id) {
@ -313,20 +377,24 @@ class WebMClusterParserTest : public testing::Test {
const std::set<int64_t>& ignored_tracks,
const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id,
const AudioCodec audio_codec) {
const AudioCodec audio_codec,
const VideoCodec video_codec) {
audio_stream_info_->set_codec(audio_codec);
video_stream_info_->set_codec(video_codec);
return new WebMClusterParser(
kTimecodeScale, kAudioTrackNum, audio_default_duration, kVideoTrackNum,
video_default_duration, text_tracks, ignored_tracks,
audio_encryption_key_id, video_encryption_key_id, audio_codec,
kTimecodeScale, audio_stream_info_, video_stream_info_,
audio_default_duration, video_default_duration, text_tracks,
ignored_tracks, audio_encryption_key_id, video_encryption_key_id,
base::Bind(&WebMClusterParserTest::NewSampleEvent,
base::Unretained(this)));
base::Unretained(this)),
base::Bind(&WebMClusterParserTest::InitEvent, base::Unretained(this)));
}
// Create a default version of the parser for test.
WebMClusterParser* CreateDefaultParser() {
return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
std::set<int64_t>(), std::string(), std::string(),
kUnknownAudioCodec);
kUnknownAudioCodec, kCodecVP8);
}
// Create a parser for test with custom audio and video default durations, and
@ -337,7 +405,7 @@ class WebMClusterParserTest : public testing::Test {
const WebMTracksParser::TextTracks& text_tracks = TextTracks()) {
return CreateParserHelper(audio_default_duration, video_default_duration,
text_tracks, std::set<int64_t>(), std::string(),
std::string(), kUnknownAudioCodec);
std::string(), kUnknownAudioCodec, kCodecVP8);
}
// Create a parser for test with custom ignored tracks.
@ -345,7 +413,7 @@ class WebMClusterParserTest : public testing::Test {
std::set<int64_t>& ignored_tracks) {
return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
ignored_tracks, std::string(), std::string(),
kUnknownAudioCodec);
kUnknownAudioCodec, kCodecVP8);
}
// Create a parser for test with custom encryption key ids and audio codec.
@ -355,7 +423,14 @@ class WebMClusterParserTest : public testing::Test {
const AudioCodec audio_codec) {
return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
std::set<int64_t>(), audio_encryption_key_id,
video_encryption_key_id, audio_codec);
video_encryption_key_id, audio_codec, kCodecVP8);
}
// Create a parser for test with custom video codec.
WebMClusterParser* CreateParserWithVideoCodec(const VideoCodec video_codec) {
return CreateParserHelper(kNoTimestamp, kNoTimestamp, TextTracks(),
std::set<int64_t>(), std::string(), std::string(),
kUnknownAudioCodec, video_codec);
}
bool VerifyBuffers(const BlockInfo* block_info, int block_count) {
@ -368,7 +443,10 @@ class WebMClusterParserTest : public testing::Test {
return result;
}
scoped_refptr<AudioStreamInfo> audio_stream_info_;
scoped_refptr<VideoStreamInfo> video_stream_info_;
scoped_ptr<WebMClusterParser> parser_;
std::vector<scoped_refptr<StreamInfo>> streams_from_init_event_;
BufferQueue audio_buffers_;
BufferQueue video_buffers_;
TextBufferQueueMap text_buffers_map_;
@ -485,6 +563,10 @@ TEST_F(WebMClusterParserTest, ParseClusterWithSingleCall) {
int result = parser_->Parse(cluster->data(), cluster->size());
EXPECT_EQ(cluster->size(), result);
ASSERT_TRUE(VerifyBuffers(kDefaultBlockInfo, block_count));
// Verify init event called.
ASSERT_EQ(2u, streams_from_init_event_.size());
EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type());
EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type());
}
TEST_F(WebMClusterParserTest, ParseClusterWithMultipleCalls) {
@ -698,6 +780,19 @@ TEST_F(WebMClusterParserTest, ParseMultipleTextTracks) {
}
}
TEST_F(WebMClusterParserTest, ParseVP9) {
scoped_ptr<Cluster> cluster(CreateVP9Cluster());
parser_.reset(CreateParserWithVideoCodec(kCodecVP9));
EXPECT_EQ(cluster->size(), parser_->Parse(cluster->data(), cluster->size()));
ASSERT_EQ(2u, streams_from_init_event_.size());
EXPECT_EQ(kStreamAudio, streams_from_init_event_[0]->stream_type());
EXPECT_EQ(kStreamVideo, streams_from_init_event_[1]->stream_type());
EXPECT_EQ("vp09.03.00.12.00.03.00.00",
streams_from_init_event_[1]->codec_string());
}
TEST_F(WebMClusterParserTest, ParseEncryptedBlock) {
scoped_ptr<Cluster> cluster(CreateEncryptedCluster(sizeof(kEncryptedFrame)));
@ -728,6 +823,8 @@ TEST_F(WebMClusterParserTest, ParseInvalidZeroSizedCluster) {
};
EXPECT_EQ(-1, parser_->Parse(kBuffer, sizeof(kBuffer)));
// Verify init event not called.
ASSERT_EQ(0u, streams_from_init_event_.size());
}
TEST_F(WebMClusterParserTest, ParseInvalidUnknownButActuallyZeroSizedCluster) {

View File

@ -181,37 +181,33 @@ int WebMMediaParser::ParseInfoAndTracks(const uint8_t* data, int size) {
double timecode_scale_in_us = info_parser.timecode_scale() / 1000.0;
int64_t duration_in_us = info_parser.duration() * timecode_scale_in_us;
std::vector<scoped_refptr<StreamInfo>> streams;
AudioCodec audio_codec = kCodecOpus;
if (tracks_parser.audio_stream_info()) {
streams.push_back(tracks_parser.audio_stream_info());
streams.back()->set_duration(duration_in_us);
if (streams.back()->is_encrypted())
scoped_refptr<AudioStreamInfo> audio_stream_info =
tracks_parser.audio_stream_info();
if (audio_stream_info) {
audio_stream_info->set_duration(duration_in_us);
if (audio_stream_info->is_encrypted())
OnEncryptedMediaInitData(tracks_parser.audio_encryption_key_id());
audio_codec = tracks_parser.audio_stream_info()->codec();
} else {
VLOG(1) << "No audio track info found.";
}
if (tracks_parser.video_stream_info()) {
streams.push_back(tracks_parser.video_stream_info());
streams.back()->set_duration(duration_in_us);
if (streams.back()->is_encrypted())
scoped_refptr<VideoStreamInfo> video_stream_info =
tracks_parser.video_stream_info();
if (video_stream_info) {
video_stream_info->set_duration(duration_in_us);
if (video_stream_info->is_encrypted())
OnEncryptedMediaInitData(tracks_parser.video_encryption_key_id());
} else {
VLOG(1) << "No video track info found.";
}
init_cb_.Run(streams);
cluster_parser_.reset(new WebMClusterParser(
info_parser.timecode_scale(), tracks_parser.audio_track_num(),
info_parser.timecode_scale(), audio_stream_info, video_stream_info,
tracks_parser.GetAudioDefaultDuration(timecode_scale_in_us),
tracks_parser.video_track_num(),
tracks_parser.GetVideoDefaultDuration(timecode_scale_in_us),
tracks_parser.text_tracks(), tracks_parser.ignored_tracks(),
tracks_parser.audio_encryption_key_id(),
tracks_parser.video_encryption_key_id(), audio_codec, new_sample_cb_));
tracks_parser.video_encryption_key_id(), new_sample_cb_, init_cb_));
return bytes_parsed;
}

View File

@ -6,7 +6,6 @@
#include "packager/base/logging.h"
#include "packager/base/stl_util.h"
#include "packager/media/filters/vp_codec_configuration.h"
#include "packager/media/formats/webm/webm_constants.h"
namespace {
@ -106,25 +105,10 @@ scoped_refptr<VideoStreamInfo> WebMVideoClient::GetVideoStreamInfo(
sar_x /= gcd;
sar_y /= gcd;
// TODO(kqyang): Fill in the values for vp codec configuration.
const uint8_t profile = 0;
const uint8_t level = 0;
const uint8_t bit_depth = 8;
const uint8_t color_space = 0;
const uint8_t chroma_subsampling = 0;
const uint8_t transfer_function = 0;
const bool video_full_range_flag = false;
VPCodecConfiguration vp_config(profile, level, bit_depth, color_space,
chroma_subsampling, transfer_function,
video_full_range_flag, codec_private);
std::vector<uint8_t> extra_data;
vp_config.Write(&extra_data);
return scoped_refptr<VideoStreamInfo>(new VideoStreamInfo(
track_num, kWebMTimeScale, 0, video_codec,
vp_config.GetCodecString(video_codec), std::string(), width_after_crop,
height_after_crop, sar_x, sar_y, 0, 0, vector_as_array(&extra_data),
extra_data.size(), is_encrypted));
track_num, kWebMTimeScale, 0, video_codec, std::string(), std::string(),
width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, NULL, 0,
is_encrypted));
}
bool WebMVideoClient::OnUInt(int id, int64_t val) {