Add codec private data to VP9 in WebM.

b/29009350

Change-Id: Iaafc87340043eff77c3ef7e1c1135d8c4c4287ae
This commit is contained in:
Jacob Trimble 2016-05-25 11:34:43 -07:00
parent 47a3fb977d
commit 6550868574
9 changed files with 276 additions and 41 deletions

View File

@ -12,10 +12,10 @@
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" par="16:9">
<Representation id="1" bandwidth="203313" codecs="vp9" mimeType="video/webm" sar="427:320">
<Representation id="1" bandwidth="203348" codecs="vp9" mimeType="video/webm" sar="427:320">
<BaseURL>output_video.webm</BaseURL>
<SegmentBase indexRange="69485-69532" timescale="1000000">
<Initialization range="0-286"/>
<SegmentBase indexRange="69497-69544" timescale="1000000">
<Initialization range="0-298"/>
</SegmentBase>
</Representation>
</AdaptationSet>

View File

@ -9,6 +9,7 @@
#include "packager/base/strings/string_number_conversions.h"
#include "packager/base/strings/string_util.h"
#include "packager/media/base/bit_reader.h"
#include "packager/media/base/buffer_reader.h"
#include "packager/media/base/buffer_writer.h"
#include "packager/media/base/rcheck.h"
#include "packager/base/strings/stringprintf.h"
@ -16,6 +17,12 @@
namespace shaka {
namespace media {
namespace {
enum VP9CodecFeatures {
kFeatureProfile = 1,
kFeatureLevel = 2,
kFeatureBitDepth = 3,
kFeatureChromaSubsampling = 4,
};
std::string VPCodecAsString(VideoCodec codec) {
switch (codec) {
@ -33,14 +40,7 @@ std::string VPCodecAsString(VideoCodec codec) {
} // namespace
VPCodecConfigurationRecord::VPCodecConfigurationRecord()
: profile_(0),
level_(0),
bit_depth_(0),
color_space_(0),
chroma_subsampling_(0),
transfer_function_(0),
video_full_range_flag_(false) {}
VPCodecConfigurationRecord::VPCodecConfigurationRecord() {}
VPCodecConfigurationRecord::VPCodecConfigurationRecord(
uint8_t profile,
@ -58,12 +58,26 @@ VPCodecConfigurationRecord::VPCodecConfigurationRecord(
chroma_subsampling_(chroma_subsampling),
transfer_function_(transfer_function),
video_full_range_flag_(video_full_range_flag),
profile_is_set_(true),
level_is_set_(true),
bit_depth_is_set_(true),
color_space_is_set_(true),
chroma_subsampling_is_set_(true),
transfer_function_is_set_(true),
video_full_range_flag_is_set_(true),
codec_initialization_data_(codec_initialization_data) {}
VPCodecConfigurationRecord::~VPCodecConfigurationRecord(){};
bool VPCodecConfigurationRecord::Parse(const std::vector<uint8_t>& data) {
bool VPCodecConfigurationRecord::ParseMP4(const std::vector<uint8_t>& data) {
BitReader reader(data.data(), data.size());
profile_is_set_ = true;
level_is_set_ = true;
bit_depth_is_set_ = true;
color_space_is_set_ = true;
chroma_subsampling_is_set_ = true;
transfer_function_is_set_ = true;
video_full_range_flag_is_set_ = true;
RCHECK(reader.ReadBits(8, &profile_));
RCHECK(reader.ReadBits(8, &level_));
RCHECK(reader.ReadBits(4, &bit_depth_));
@ -81,7 +95,47 @@ bool VPCodecConfigurationRecord::Parse(const std::vector<uint8_t>& data) {
return true;
}
void VPCodecConfigurationRecord::Write(std::vector<uint8_t>* data) const {
bool VPCodecConfigurationRecord::ParseWebM(const std::vector<uint8_t>& data) {
BufferReader reader(data.data(), data.size());
while (reader.HasBytes(1)) {
uint8_t id;
uint8_t size;
RCHECK(reader.Read1(&id));
RCHECK(reader.Read1(&size));
switch (id) {
case kFeatureProfile:
RCHECK(size == 1);
RCHECK(reader.Read1(&profile_));
profile_is_set_ = true;
break;
case kFeatureLevel:
RCHECK(size == 1);
RCHECK(reader.Read1(&level_));
level_is_set_ = true;
break;
case kFeatureBitDepth:
RCHECK(size == 1);
RCHECK(reader.Read1(&bit_depth_));
bit_depth_is_set_ = true;
break;
case kFeatureChromaSubsampling:
RCHECK(size == 1);
RCHECK(reader.Read1(&chroma_subsampling_));
chroma_subsampling_is_set_ = true;
break;
default: {
LOG(WARNING) << "Skipping unknown VP9 codec feature " << id;
RCHECK(reader.SkipBytes(size));
}
}
}
return true;
}
void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const {
BufferWriter writer;
writer.AppendInt(profile_);
writer.AppendInt(level_);
@ -96,6 +150,36 @@ void VPCodecConfigurationRecord::Write(std::vector<uint8_t>* data) const {
writer.SwapBuffer(data);
}
void VPCodecConfigurationRecord::WriteWebM(std::vector<uint8_t>* data) const {
BufferWriter writer;
writer.AppendInt(static_cast<uint8_t>(kFeatureProfile)); // ID = 1
writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
writer.AppendInt(static_cast<uint8_t>(profile_));
if (level_ != 0) {
writer.AppendInt(static_cast<uint8_t>(kFeatureLevel)); // ID = 2
writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
writer.AppendInt(static_cast<uint8_t>(level_));
}
writer.AppendInt(static_cast<uint8_t>(kFeatureBitDepth)); // ID = 3
writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
writer.AppendInt(static_cast<uint8_t>(bit_depth_));
// WebM doesn't differentiate whether it is vertical or collocated with luma
// for 4:2:0.
const uint8_t subsampling =
chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA
? CHROMA_420_VERTICAL
: chroma_subsampling_;
writer.AppendInt(static_cast<uint8_t>(kFeatureChromaSubsampling)); // ID = 4
writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
writer.AppendInt(subsampling);
writer.SwapBuffer(data);
}
std::string VPCodecConfigurationRecord::GetCodecString(VideoCodec codec) const {
const std::string fields[] = {
base::IntToString(profile_),
@ -117,5 +201,75 @@ std::string VPCodecConfigurationRecord::GetCodecString(VideoCodec codec) const {
return codec_string;
}
void VPCodecConfigurationRecord::MergeFrom(
const VPCodecConfigurationRecord& other) {
if (!profile_is_set_ || other.profile_is_set_) {
profile_ = other.profile();
profile_is_set_ = true;
}
if (!level_is_set_ || other.level_is_set_) {
if (level_is_set_ && other.level() != level_) {
LOG(WARNING) << "VPx level is inconsistent, " << level_ << " vs "
<< other.level();
}
level_ = other.level();
level_is_set_ = true;
}
if (!bit_depth_is_set_ || other.bit_depth_is_set_) {
if (bit_depth_is_set_ && bit_depth_ != other.bit_depth()) {
LOG(WARNING) << "VPx bit depth is inconsistent, " << bit_depth_ << " vs "
<< other.bit_depth();
}
bit_depth_ = other.bit_depth();
bit_depth_is_set_ = true;
}
if (!color_space_is_set_ || other.color_space_is_set_) {
if (color_space_is_set_ && color_space_ != other.color_space()) {
LOG(WARNING) << "VPx color space is inconsistent, " << color_space_
<< " vs " << other.color_space();
}
color_space_ = other.color_space();
color_space_is_set_ = true;
}
if (!chroma_subsampling_is_set_ || other.chroma_subsampling_is_set_) {
if (chroma_subsampling_is_set_ &&
chroma_subsampling_ != other.chroma_subsampling_) {
LOG(WARNING) << "VPx chroma subsampling is inconsistent, "
<< chroma_subsampling_ << " vs "
<< other.chroma_subsampling();
}
chroma_subsampling_ = other.chroma_subsampling();
chroma_subsampling_is_set_ = true;
}
if (!transfer_function_is_set_ || other.transfer_function_is_set_) {
if (transfer_function_is_set_ &&
transfer_function_ != other.transfer_function_) {
LOG(WARNING) << "VPx transfer function is inconsistent, "
<< transfer_function_ << " vs "
<< other.transfer_function();
}
transfer_function_ = other.transfer_function();
transfer_function_is_set_ = true;
}
if (!video_full_range_flag_is_set_ || other.video_full_range_flag_is_set_) {
if (video_full_range_flag_is_set_ &&
video_full_range_flag_ != other.video_full_range_flag_) {
LOG(WARNING) << "VPx video full-range flag is inconsistent, "
<< video_full_range_flag_<< " vs "
<< other.video_full_range_flag();
}
video_full_range_flag_ = other.video_full_range_flag();
video_full_range_flag_is_set_ = true;
}
if (codec_initialization_data_.empty() ||
!other.codec_initialization_data_.empty()) {
if (!codec_initialization_data_.empty() &&
codec_initialization_data_ != other.codec_initialization_data_) {
LOG(WARNING) << "VPx codec initialization data is inconsistent";
}
codec_initialization_data_ = other.codec_initialization_data_;
}
}
} // namespace media
} // namespace shaka

View File

@ -51,26 +51,52 @@ class VPCodecConfigurationRecord {
const std::vector<uint8_t>& codec_initialization_data);
~VPCodecConfigurationRecord();
/// Parses input to extract VP codec configuration record.
/// Parses input (in MP4 format) to extract VP codec configuration record.
/// @return false if there is parsing errors.
bool Parse(const std::vector<uint8_t>& data);
bool ParseMP4(const std::vector<uint8_t>& data);
/// Parses input (in WebM format) to extract VP codec configuration record.
/// @return false if there is parsing errors.
bool ParseWebM(const std::vector<uint8_t>& data);
/// @param data should not be null.
/// Writes VP codec configuration record to buffer.
void Write(std::vector<uint8_t>* data) const;
/// Writes VP codec configuration record to buffer using MP4 format.
void WriteMP4(std::vector<uint8_t>* data) const;
/// @param data should not be null.
/// Writes VP codec configuration record to buffer using WebM format.
void WriteWebM(std::vector<uint8_t>* data) const;
/// @return The codec string.
std::string GetCodecString(VideoCodec codec) const;
void set_profile(uint8_t profile) { profile_ = profile; }
void set_level(uint8_t level) { level_ = level; }
void set_bit_depth(uint8_t bit_depth) { bit_depth_ = bit_depth; }
void set_color_space(uint8_t color_space) { color_space_ = color_space; }
// Merges the values from the given configuration. If there are values in
// both |*this| and |other|, the values in |other| take precedence.
void MergeFrom(const VPCodecConfigurationRecord& other);
void set_profile(uint8_t profile) {
profile_ = profile;
profile_is_set_ = true;
}
void set_level(uint8_t level) {
level_ = level;
level_is_set_ = true;
}
void set_bit_depth(uint8_t bit_depth) {
bit_depth_ = bit_depth;
bit_depth_is_set_ = true;
}
void set_color_space(uint8_t color_space) {
color_space_ = color_space;
color_space_is_set_ = true;
}
void set_chroma_subsampling(uint8_t chroma_subsampling) {
chroma_subsampling_ = chroma_subsampling;
chroma_subsampling_is_set_ = true;
}
void set_transfer_function(uint8_t transfer_function) {
transfer_function_ = transfer_function;
transfer_function_is_set_ = true;
}
void set_video_full_range_flag(bool video_full_range_flag) {
video_full_range_flag_ = video_full_range_flag;
@ -85,13 +111,20 @@ class VPCodecConfigurationRecord {
bool video_full_range_flag() const { return video_full_range_flag_; }
private:
uint8_t profile_;
uint8_t level_;
uint8_t bit_depth_;
uint8_t color_space_;
uint8_t chroma_subsampling_;
uint8_t transfer_function_;
bool video_full_range_flag_;
uint8_t profile_ = 0;
uint8_t level_ = 0;
uint8_t bit_depth_ = 0;
uint8_t color_space_ = 0;
uint8_t chroma_subsampling_ = 0;
uint8_t transfer_function_ = 0;
bool video_full_range_flag_ = false;
bool profile_is_set_ = false;
bool level_is_set_ = false;
bool bit_depth_is_set_ = false;
bool color_space_is_set_ = false;
bool chroma_subsampling_is_set_ = false;
bool transfer_function_is_set_ = false;
bool video_full_range_flag_is_set_ = false;
std::vector<uint8_t> codec_initialization_data_;
// Not using DISALLOW_COPY_AND_ASSIGN here intentionally to allow the compiler

View File

@ -17,7 +17,7 @@ TEST(VPCodecConfigurationRecordTest, Parse) {
};
VPCodecConfigurationRecord vp_config;
ASSERT_TRUE(vp_config.Parse(std::vector<uint8_t>(
ASSERT_TRUE(vp_config.ParseMP4(std::vector<uint8_t>(
kVpCodecConfigurationData,
kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData))));
@ -38,19 +38,38 @@ TEST(VPCodecConfigurationRecordTest, ParseWithInsufficientData) {
};
VPCodecConfigurationRecord vp_config;
ASSERT_FALSE(vp_config.Parse(std::vector<uint8_t>(
ASSERT_FALSE(vp_config.ParseMP4(std::vector<uint8_t>(
kVpCodecConfigurationData,
kVpCodecConfigurationData + arraysize(kVpCodecConfigurationData))));
}
TEST(VPCodecConfigurationRecordTest, Write) {
TEST(VPCodecConfigurationRecordTest, WriteMP4) {
const uint8_t kExpectedVpCodecConfigurationData[] = {
0x02, 0x01, 0x80, 0x21, 0x00, 0x00,
};
VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x02, 0x00, true,
std::vector<uint8_t>());
std::vector<uint8_t> data;
vp_config.Write(&data);
vp_config.WriteMP4(&data);
EXPECT_EQ(
std::vector<uint8_t>(kExpectedVpCodecConfigurationData,
kExpectedVpCodecConfigurationData +
arraysize(kExpectedVpCodecConfigurationData)),
data);
}
TEST(VPCodecConfigurationRecordTest, WriteWebM) {
const uint8_t kExpectedVpCodecConfigurationData[] = {
0x01, 0x01, 0x02,
0x02, 0x01, 0x01,
0x03, 0x01, 0x08,
0x04, 0x01, 0x03
};
VPCodecConfigurationRecord vp_config(0x02, 0x01, 0x08, 0x00, 0x03, 0x00, true,
std::vector<uint8_t>());
std::vector<uint8_t> data;
vp_config.WriteWebM(&data);
EXPECT_EQ(
std::vector<uint8_t>(kExpectedVpCodecConfigurationData,

View File

@ -562,7 +562,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_vp09:
case FOURCC_vp10: {
VPCodecConfigurationRecord vp_config;
if (!vp_config.Parse(entry.codec_configuration.data)) {
if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
LOG(ERROR) << "Failed to parse vpcc.";
return false;
}

View File

@ -14,6 +14,7 @@
#include "packager/media/base/muxer_util.h"
#include "packager/media/base/stream_info.h"
#include "packager/media/base/video_stream_info.h"
#include "packager/media/codecs/vp_codec_configuration_record.h"
#include "packager/media/event/muxer_listener.h"
#include "packager/media/event/progress_listener.h"
#include "packager/third_party/libwebm/src/mkvmuxerutil.hpp"
@ -271,8 +272,23 @@ Status Segmenter::CreateVideoTrack(VideoStreamInfo* info) {
track->set_codec_id(mkvmuxer::Tracks::kVp8CodecId);
} else if (info->codec() == kCodecVP9) {
track->set_codec_id(mkvmuxer::Tracks::kVp9CodecId);
// The |StreamInfo::extra_data| field is stored using the MP4 format; we
// need to convert it to the WebM format.
VPCodecConfigurationRecord vp_config;
if (!vp_config.ParseMP4(info->extra_data())) {
return Status(error::INTERNAL_ERROR,
"Unable to parse VP9 codec configuration");
}
std::vector<uint8_t> extra_data;
vp_config.WriteWebM(&extra_data);
if (!track->SetCodecPrivate(extra_data.data(), extra_data.size())) {
return Status(error::INTERNAL_ERROR,
"Private codec data required for VP9 streams");
}
} else {
LOG(ERROR) << "Only VP8 and VP9 video codec is supported.";
LOG(ERROR) << "Only VP8 and VP9 video codecs are supported.";
return Status(error::UNIMPLEMENTED,
"Only VP8 and VP9 video codecs are supported.");
}

View File

@ -456,12 +456,15 @@ bool WebMClusterParser::OnBlock(bool is_simple_block,
return false;
}
const VPCodecConfigurationRecord* codec_config =
&vpx_parser->codec_config();
VPCodecConfigurationRecord codec_config;
if (!video_stream_info_->extra_data().empty())
codec_config.ParseMP4(video_stream_info_->extra_data());
codec_config.MergeFrom(vpx_parser->codec_config());
video_stream_info_->set_codec_string(
codec_config->GetCodecString(video_stream_info_->codec()));
codec_config.GetCodecString(video_stream_info_->codec()));
std::vector<uint8_t> extra_data;
codec_config->Write(&extra_data);
codec_config.WriteMP4(&extra_data);
video_stream_info_->set_extra_data(extra_data);
streams.push_back(video_stream_info_);
init_cb_.Run(streams);

View File

@ -6,6 +6,7 @@
#include "packager/base/logging.h"
#include "packager/base/stl_util.h"
#include "packager/media/codecs/vp_codec_configuration_record.h"
#include "packager/media/formats/webm/webm_constants.h"
namespace {
@ -50,13 +51,22 @@ void WebMVideoClient::Reset() {
scoped_refptr<VideoStreamInfo> WebMVideoClient::GetVideoStreamInfo(
int64_t track_num,
const std::string& codec_id,
const std::vector<uint8_t>& codec_private,
const std::vector<uint8_t>& codec_private_in,
bool is_encrypted) {
std::vector<uint8_t> codec_private = codec_private_in;
VideoCodec video_codec = kUnknownVideoCodec;
if (codec_id == "V_VP8") {
video_codec = kCodecVP8;
} else if (codec_id == "V_VP9") {
video_codec = kCodecVP9;
// Need to parse and convert the codec private data to MP4 format.
VPCodecConfigurationRecord vp_config;
if (!vp_config.ParseWebM(codec_private)) {
LOG(ERROR) << "Unable to parse VP9 codec configuration";
return scoped_refptr<VideoStreamInfo>();
}
vp_config.WriteMP4(&codec_private);
} else if (codec_id == "V_VP10") {
video_codec = kCodecVP10;
} else {
@ -107,8 +117,8 @@ scoped_refptr<VideoStreamInfo> WebMVideoClient::GetVideoStreamInfo(
return scoped_refptr<VideoStreamInfo>(new VideoStreamInfo(
track_num, kWebMTimeScale, 0, video_codec, std::string(), std::string(),
width_after_crop, height_after_crop, sar_x, sar_y, 0, 0, NULL, 0,
is_encrypted));
width_after_crop, height_after_crop, sar_x, sar_y, 0, 0,
codec_private.data(), codec_private.size(), is_encrypted));
}
bool WebMVideoClient::OnUInt(int id, int64_t val) {