From c393b71b6f4f30e428c40d1266f9eb2dd589a84d Mon Sep 17 00:00:00 2001 From: KongQun Yang Date: Wed, 11 Nov 2015 13:54:45 -0800 Subject: [PATCH] Implement vp9 bitstream parser Bug: 25586821 Change-Id: I42d77a85c7214262842f49fec83689b6b58cad3d --- packager/media/filters/filters.gyp | 4 + packager/media/filters/h264_parser.h | 2 +- packager/media/filters/vp9_parser.cc | 548 ++++++++++++++++++ packager/media/filters/vp9_parser.h | 62 ++ packager/media/filters/vp9_parser_unittest.cc | 227 ++++++++ .../media/filters/vp_codec_configuration.cc | 6 +- .../media/filters/vp_codec_configuration.h | 31 + 7 files changed, 876 insertions(+), 4 deletions(-) create mode 100644 packager/media/filters/vp9_parser.cc create mode 100644 packager/media/filters/vp9_parser.h create mode 100644 packager/media/filters/vp9_parser_unittest.cc diff --git a/packager/media/filters/filters.gyp b/packager/media/filters/filters.gyp index dc36a9a225..ff624a8d92 100644 --- a/packager/media/filters/filters.gyp +++ b/packager/media/filters/filters.gyp @@ -25,6 +25,8 @@ 'h264_parser.h', 'vp_codec_configuration.cc', 'vp_codec_configuration.h', + 'vp9_parser.cc', + 'vp9_parser.h', ], 'dependencies': [ '../../base/base.gyp:base', @@ -40,9 +42,11 @@ 'h264_parser_unittest.cc', 'hevc_decoder_configuration_unittest.cc', 'vp_codec_configuration_unittest.cc', + 'vp9_parser_unittest.cc', ], 'dependencies': [ '../../media/base/media_base.gyp:base', + '../../testing/gmock.gyp:gmock', '../../testing/gtest.gyp:gtest', '../test/media_test.gyp:media_test_support', 'filters', diff --git a/packager/media/filters/h264_parser.h b/packager/media/filters/h264_parser.h index 37dbf33dff..dff5f9f62c 100644 --- a/packager/media/filters/h264_parser.h +++ b/packager/media/filters/h264_parser.h @@ -8,7 +8,7 @@ #define MEDIA_FILTERS_H264_PARSER_H_ #include -#include +#include #include diff --git a/packager/media/filters/vp9_parser.cc b/packager/media/filters/vp9_parser.cc new file mode 100644 index 0000000000..5db7079bf7 --- /dev/null +++ b/packager/media/filters/vp9_parser.cc @@ -0,0 +1,548 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/filters/vp9_parser.h" + +#include "packager/base/logging.h" +#include "packager/media/base/bit_reader.h" +#include "packager/media/formats/mp4/rcheck.h" + +namespace edash_packager { +namespace media { +namespace { + +const uint32_t VP9_FRAME_MARKER = 2; +const uint32_t VP9_SYNC_CODE = 0x498342; +const uint32_t REFS_PER_FRAME = 3; +const uint32_t REF_FRAMES_LOG2 = 3; +const uint32_t REF_FRAMES = (1 << REF_FRAMES_LOG2); +const uint32_t FRAME_CONTEXTS_LOG2 = 2; +const uint32_t MAX_REF_LF_DELTAS = 4; +const uint32_t MAX_MODE_LF_DELTAS = 2; +const uint32_t QINDEX_BITS = 8; +const uint32_t MAX_SEGMENTS = 8; +const uint32_t SEG_TREE_PROBS = (MAX_SEGMENTS - 1); +const uint32_t PREDICTION_PROBS = 3; +const uint32_t SEG_LVL_MAX = 4; +const uint32_t MI_SIZE_LOG2 = 3; +const uint32_t MI_BLOCK_SIZE_LOG2 = (6 - MI_SIZE_LOG2); // 64 = 2^6 +const uint32_t MIN_TILE_WIDTH_B64 = 4; +const uint32_t MAX_TILE_WIDTH_B64 = 64; + +const bool SEG_FEATURE_DATA_SIGNED[SEG_LVL_MAX] = {true, true, false, false}; +const uint32_t SEG_FEATURE_DATA_MAX_BITS[SEG_LVL_MAX] = {8, 6, 2, 0}; + +enum VpxColorSpace { + VPX_COLOR_SPACE_UNKNOWN = 0, + VPX_COLOR_SPACE_BT_601 = 1, + VPX_COLOR_SPACE_BT_709 = 2, + VPX_COLOR_SPACE_SMPTE_170 = 3, + VPX_COLOR_SPACE_SMPTE_240 = 4, + VPX_COLOR_SPACE_BT_2020 = 5, + VPX_COLOR_SPACE_RESERVED = 6, + VPX_COLOR_SPACE_SRGB = 7, +}; + +class VP9BitReader : public BitReader { + public: + VP9BitReader(const uint8_t* data, off_t size) : BitReader(data, size) {} + ~VP9BitReader() {} + + bool SkipBitsConditional(uint32_t num_bits) { + bool condition; + if (!ReadBits(1, &condition)) + return false; + return condition ? SkipBits(num_bits) : true; + } + + private: + DISALLOW_COPY_AND_ASSIGN(VP9BitReader); +}; + +uint32_t RoundupShift(uint32_t value, uint32_t n) { + return (value + (1 << n) - 1) >> n; +} + +// Number of MI-units (8*8). +uint32_t GetNumMiUnits(uint32_t pixels) { + return RoundupShift(pixels, MI_SIZE_LOG2); +} + +// Number of sb64 (64x64) blocks per mi_units. +uint32_t GetNumBlocks(uint32_t mi_units) { + return RoundupShift(mi_units, MI_BLOCK_SIZE_LOG2); +} + +uint32_t GetMinLog2TileCols(uint32_t sb64_cols) { + uint32_t min_log2 = 0; + while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) + ++min_log2; + return min_log2; +} + +uint32_t GetMaxLog2TileCols(uint32_t sb64_cols) { + uint32_t max_log2 = 1; + while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) + ++max_log2; + return max_log2 - 1; +} + +void GetTileNBits(uint32_t mi_cols, + uint32_t* min_log2_tile_cols, + uint32_t* max_log2_tile_cols) { + const uint32_t sb64_cols = GetNumBlocks(mi_cols); + *min_log2_tile_cols = GetMinLog2TileCols(sb64_cols); + *max_log2_tile_cols = GetMaxLog2TileCols(sb64_cols); + CHECK_LE(*min_log2_tile_cols, *max_log2_tile_cols); +} + +// Parse superframe index if it is a superframe. Fill |vpx_frames| with the +// frames information, which contains the sizes of the frames indicated in +// superframe index if it is a superframe; otherwise it should contain one +// single frame with |data_size| as frame size. +bool ParseIfSuperframeIndex(const uint8_t* data, + size_t data_size, + std::vector* vpx_frames) { + vpx_frames->clear(); + uint8_t superframe_marker = data[data_size - 1]; + VPxFrameInfo vpx_frame; + if ((superframe_marker & 0xe0) != 0xc0) { + // This is not a super frame. There should be only one frame. + vpx_frame.frame_size = data_size; + vpx_frames->push_back(vpx_frame); + return true; + } + + const size_t num_frames = (superframe_marker & 0x07) + 1; + const size_t frame_size_length = ((superframe_marker >> 3) & 0x03) + 1; + // Two maker bytes + frame sizes. + const size_t index_size = 2 + num_frames * frame_size_length; + + if (data_size < index_size) { + LOG(ERROR) << "This chunk is marked as having a superframe index but " + "doesn't have enough data for it."; + return false; + } + const uint8_t superframe_marker2 = data[data_size - index_size]; + if (superframe_marker2 != superframe_marker) { + LOG(ERROR) << "This chunk is marked as having a superframe index but " + "doesn't have the matching marker byte at the front of the " + "index."; + return false; + } + VLOG(3) << "Superframe num_frames=" << num_frames + << " frame_size_length=" << frame_size_length; + + data += data_size - index_size + 1; + size_t total_frame_sizes = 0; + for (size_t i = 0; i < num_frames; ++i) { + vpx_frame.frame_size = 0; + for (size_t i = 0; i < frame_size_length; ++i) { + vpx_frame.frame_size |= *data << (i * 8); + ++data; + } + total_frame_sizes += vpx_frame.frame_size; + vpx_frames->push_back(vpx_frame); + } + if (total_frame_sizes + index_size != data_size) { + LOG(ERROR) << "Data size (" << data_size + << ") does not match with sum of frame sizes (" + << total_frame_sizes << ") + index_size (" << index_size << ")"; + return false; + } + return true; +} + +bool ReadProfile(VP9BitReader* reader, VPCodecConfiguration* codec_config) { + uint8_t bit[2]; + RCHECK(reader->ReadBits(1, &bit[0])); + RCHECK(reader->ReadBits(1, &bit[1])); + uint8_t profile = bit[0] | (bit[1] << 1); + if (profile == 3) { + bool reserved; + RCHECK(reader->ReadBits(1, &reserved)); + RCHECK(!reserved); + } + codec_config->set_profile(profile); + return true; +} + +bool ReadSyncCode(VP9BitReader* reader) { + uint32_t sync_code; + RCHECK(reader->ReadBits(24, &sync_code)); + return sync_code == VP9_SYNC_CODE; +} + +VPCodecConfiguration::ColorSpace GetColorSpace(uint8_t color_space) { + switch (color_space) { + case VPX_COLOR_SPACE_UNKNOWN: + return VPCodecConfiguration::COLOR_SPACE_UNSPECIFIED; + case VPX_COLOR_SPACE_BT_601: + return VPCodecConfiguration::COLOR_SPACE_BT_601; + case VPX_COLOR_SPACE_BT_709: + return VPCodecConfiguration::COLOR_SPACE_BT_709; + case VPX_COLOR_SPACE_BT_2020: + // VP9 does not specify if it is in the form of “constant luminance” or + // “non-constant luminance”. As such, application should rely on the + // signaling outside of VP9 bitstream. If there is no such signaling, + // application may assume non-constant luminance for BT.2020. + return VPCodecConfiguration::COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE; + case VPX_COLOR_SPACE_SRGB: + return VPCodecConfiguration::COLOR_SPACE_SRGB; + default: + LOG(WARNING) << "Unknown color space: " << static_cast(color_space); + return VPCodecConfiguration::COLOR_SPACE_UNSPECIFIED; + } +} + +VPCodecConfiguration::ChromaSubsampling GetChromaSubsampling( + uint8_t subsampling) { + switch (subsampling) { + case 0: + return VPCodecConfiguration::CHROMA_444; + case 1: + return VPCodecConfiguration::CHROMA_440; + case 2: + return VPCodecConfiguration::CHROMA_422; + case 3: + // VP9 assumes that chrome samples are collocated with luma samples if + // there is no explicit signaling outside of VP9 bitstream. + return VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA; + default: + LOG(WARNING) << "Unexpected chroma subsampling value: " + << static_cast(subsampling); + return VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA; + } +} + +bool ReadBitDepthAndColorSpace(VP9BitReader* reader, + VPCodecConfiguration* codec_config) { + uint8_t bit_depth = 8; + if (codec_config->profile() >= 2) { + bool use_vpx_bits_12; + RCHECK(reader->ReadBits(1, &use_vpx_bits_12)); + bit_depth = use_vpx_bits_12 ? 12 : 10; + } + codec_config->set_bit_depth(bit_depth); + + uint8_t color_space; + RCHECK(reader->ReadBits(3, &color_space)); + codec_config->set_color_space(GetColorSpace(color_space)); + + bool yuv_full_range = false; + auto chroma_subsampling = VPCodecConfiguration::CHROMA_444; + if (color_space != VPX_COLOR_SPACE_SRGB) { + RCHECK(reader->ReadBits(1, &yuv_full_range)); + + if (codec_config->profile() & 1) { + uint8_t subsampling; + RCHECK(reader->ReadBits(2, &subsampling)); + chroma_subsampling = GetChromaSubsampling(subsampling); + if (chroma_subsampling == + VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA) { + LOG(ERROR) << "4:2:0 color not supported in profile " + << codec_config->profile(); + return false; + } + + bool reserved; + RCHECK(reader->ReadBits(1, &reserved)); + RCHECK(!reserved); + } else { + chroma_subsampling = + VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA; + } + } else { + // Assume 4:4:4 for colorspace SRGB. + chroma_subsampling = VPCodecConfiguration::CHROMA_444; + if (codec_config->profile() & 1) { + bool reserved; + RCHECK(reader->ReadBits(1, &reserved)); + RCHECK(!reserved); + } else { + LOG(ERROR) << "4:4:4 color not supported in profile 0 or 2."; + return false; + } + } + codec_config->set_video_full_range_flag(yuv_full_range); + codec_config->set_chroma_subsampling(chroma_subsampling); + + VLOG(3) << "\n profile " << static_cast(codec_config->profile()) + << "\n bit depth " << static_cast(codec_config->bit_depth()) + << "\n color space " << static_cast(codec_config->color_space()) + << "\n full_range " + << static_cast(codec_config->video_full_range_flag()) + << "\n chroma subsampling " + << static_cast(codec_config->chroma_subsampling()); + return true; +} + +bool ReadFrameSize(VP9BitReader* reader, uint32_t* width, uint32_t* height) { + RCHECK(reader->ReadBits(16, width)); + *width += 1; // Off by 1. + RCHECK(reader->ReadBits(16, height)); + *height += 1; // Off by 1. + return true; +} + +bool ReadDisplayFrameSize(VP9BitReader* reader, + uint32_t* display_width, + uint32_t* display_height) { + bool has_display_size; + RCHECK(reader->ReadBits(1, &has_display_size)); + if (has_display_size) + RCHECK(ReadFrameSize(reader, display_width, display_height)); + return true; +} + +bool ReadFrameSizes(VP9BitReader* reader, uint32_t* width, uint32_t* height) { + uint32_t new_width; + uint32_t new_height; + RCHECK(ReadFrameSize(reader, &new_width, &new_height)); + if (new_width != *width) { + VLOG(1) << "Width updates from " << *width << " to " << new_width; + *width = new_width; + } + if (new_height != *height) { + VLOG(1) << "Height updates from " << *height << " to " << new_height; + *height = new_height; + } + + uint32_t display_width = *width; + uint32_t display_height = *height; + RCHECK(ReadDisplayFrameSize(reader, &display_width, &display_height)); + return true; +} + +bool ReadFrameSizesWithRefs(VP9BitReader* reader, + uint32_t* width, + uint32_t* height) { + bool found = false; + for (uint32_t i = 0; i < REFS_PER_FRAME; ++i) { + RCHECK(reader->ReadBits(1, &found)); + if (found) + break; + } + if (!found) { + RCHECK(ReadFrameSizes(reader, width, height)); + } else { + uint32_t display_width; + uint32_t display_height; + RCHECK(ReadDisplayFrameSize(reader, &display_width, &display_height)); + } + return true; +} + +bool ReadLoopFilter(VP9BitReader* reader) { + RCHECK(reader->SkipBits(9)); // filter_evel, sharness_level + bool mode_ref_delta_enabled; + RCHECK(reader->ReadBits(1, &mode_ref_delta_enabled)); + if (!mode_ref_delta_enabled) + return true; + bool mode_ref_delta_update; + RCHECK(reader->ReadBits(1, &mode_ref_delta_update)); + if (!mode_ref_delta_update) return true; + + for (uint32_t i = 0; i < MAX_REF_LF_DELTAS + MAX_MODE_LF_DELTAS; ++i) + RCHECK(reader->SkipBitsConditional(6 + 1)); + return true; +} + +bool ReadQuantization(VP9BitReader* reader) { + RCHECK(reader->SkipBits(QINDEX_BITS)); + // Skip delta_q bits. + for (uint32_t i = 0; i < 3; ++i) + RCHECK(reader->SkipBitsConditional(4 + 1)); + return true; +} + +bool ReadSegmentation(VP9BitReader* reader) { + bool enabled; + RCHECK(reader->ReadBits(1, &enabled)); + if (!enabled) + return true; + + bool update_map; + RCHECK(reader->ReadBits(1, &update_map)); + if (update_map) { + for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) { + RCHECK(reader->SkipBitsConditional(8)); + + bool temporal_update; + RCHECK(reader->ReadBits(1, &temporal_update)); + if (temporal_update) { + for (uint32_t j = 0; j < PREDICTION_PROBS; ++j) + RCHECK(reader->SkipBitsConditional(8)); + } + } + } + + bool update_data; + RCHECK(reader->ReadBits(1, &update_data)); + if (update_data) { + RCHECK(reader->SkipBits(1)); // abs_delta + for (uint32_t i = 0; i < MAX_SEGMENTS; ++i) { + for (uint32_t j = 0; j < SEG_LVL_MAX; ++j) { + bool feature_enabled; + RCHECK(reader->ReadBits(1, &feature_enabled)); + if (feature_enabled) { + RCHECK(reader->SkipBits(SEG_FEATURE_DATA_MAX_BITS[j])); + if (SEG_FEATURE_DATA_SIGNED[j]) + RCHECK(reader->SkipBits(1)); // signness + } + } + } + } + return true; +} + +bool ReadTileInfo(uint32_t width, VP9BitReader* reader) { + uint32_t mi_cols = GetNumMiUnits(width); + + uint32_t min_log2_tile_cols; + uint32_t max_log2_tile_cols; + GetTileNBits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + uint32_t max_ones = max_log2_tile_cols - min_log2_tile_cols; + + uint32_t log2_tile_cols = min_log2_tile_cols; + while (max_ones--) { + bool has_more; + RCHECK(reader->ReadBits(1, &has_more)); + if (!has_more) + break; + ++log2_tile_cols; + } + RCHECK(log2_tile_cols <= 6); + + RCHECK(reader->SkipBitsConditional(1)); // log2_tile_rows + return true; +} + +} // namespace + +VP9Parser::VP9Parser() : width_(0), height_(0) {} +VP9Parser::~VP9Parser() {} + +bool VP9Parser::Parse(const uint8_t* data, + size_t data_size, + std::vector* vpx_frames) { + DCHECK(data); + DCHECK(vpx_frames); + RCHECK(ParseIfSuperframeIndex(data, data_size, vpx_frames)); + + for (auto& vpx_frame : *vpx_frames) { + VLOG(4) << "process frame with size " << vpx_frame.frame_size; + VP9BitReader reader(data, vpx_frame.frame_size); + uint8_t frame_marker; + RCHECK(reader.ReadBits(2, &frame_marker)); + RCHECK(frame_marker == VP9_FRAME_MARKER); + + RCHECK(ReadProfile(&reader, &codec_config_)); + + bool show_existing_frame; + RCHECK(reader.ReadBits(1, &show_existing_frame)); + if (show_existing_frame) { + RCHECK(reader.SkipBits(3)); // ref_frame_index + // End of current frame data. There should be no more bytes available. + RCHECK(reader.bits_available() < 8); + + vpx_frame.is_key_frame = false; + vpx_frame.uncompressed_header_size = vpx_frame.frame_size; + vpx_frame.width = width_; + vpx_frame.height = height_; + continue; + } + + bool is_inter_frame; + RCHECK(reader.ReadBits(1, &is_inter_frame)); + vpx_frame.is_key_frame = !is_inter_frame; + + bool show_frame; + RCHECK(reader.ReadBits(1, &show_frame)); + bool error_resilient_mode; + RCHECK(reader.ReadBits(1, &error_resilient_mode)); + + if (vpx_frame.is_key_frame) { + RCHECK(ReadSyncCode(&reader)); + RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_)); + RCHECK(ReadFrameSizes(&reader, &width_, &height_)); + } else { + bool intra_only = false; + if (!show_frame) + RCHECK(reader.ReadBits(1, &intra_only)); + if (!error_resilient_mode) + RCHECK(reader.SkipBits(2)); // reset_frame_context + + if (intra_only) { + RCHECK(ReadSyncCode(&reader)); + if (codec_config_.profile() > 0) { + RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_)); + } else { + // NOTE: The intra-only frame header does not include the + // specification of either the color format or color sub-sampling in + // profile 0. VP9 specifies that the default color format should be + // YUV 4:2:0 in this case (normative). + codec_config_.set_chroma_subsampling( + VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA); + codec_config_.set_bit_depth(8); + } + + RCHECK(reader.SkipBits(REF_FRAMES)); // refresh_frame_flags + RCHECK(ReadFrameSizes(&reader, &width_, &height_)); + } else { + RCHECK(reader.SkipBits(REF_FRAMES)); // refresh_frame_flags + RCHECK(reader.SkipBits(REFS_PER_FRAME * (REF_FRAMES_LOG2 + 1))); + + // TODO(kqyang): We may need to actually build the refs to extract the + // correct width and height for the current frame. The width will be + // used later in ReadTileInfo. + RCHECK(ReadFrameSizesWithRefs(&reader, &width_, &height_)); + + RCHECK(reader.SkipBits(1)); // allow_high_precision_mv + + bool interp_filter; + RCHECK(reader.ReadBits(1, &interp_filter)); + if (!interp_filter) + RCHECK(reader.SkipBits(2)); // more interp_filter + } + } + + if (!error_resilient_mode) { + RCHECK(reader.SkipBits(1)); // refresh_frame_context + RCHECK(reader.SkipBits(1)); // frame_parallel_decoding_mode + } + RCHECK(reader.SkipBits(FRAME_CONTEXTS_LOG2)); // frame_context_idx + + VLOG(4) << "bit offset: " + << vpx_frame.frame_size * 8 - reader.bits_available(); + RCHECK(ReadLoopFilter(&reader)); + RCHECK(ReadQuantization(&reader)); + RCHECK(ReadSegmentation(&reader)); + RCHECK(ReadTileInfo(width_, &reader)); + + uint16_t first_partition_size; + RCHECK(reader.ReadBits(16, &first_partition_size)); + vpx_frame.uncompressed_header_size = + vpx_frame.frame_size - reader.bits_available() / 8; + vpx_frame.width = width_; + vpx_frame.height = height_; + + VLOG(3) << "\n frame_size: " << vpx_frame.frame_size + << "\n header_size: " << vpx_frame.uncompressed_header_size + << "\n bits_read: " + << vpx_frame.frame_size * 8 - reader.bits_available() + << "\n first_partition_size: " << first_partition_size; + + RCHECK(first_partition_size > 0); + RCHECK(first_partition_size * 8 <= reader.bits_available()); + + data += vpx_frame.frame_size; + } + return true; +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/filters/vp9_parser.h b/packager/media/filters/vp9_parser.h new file mode 100644 index 0000000000..cd9e532d2b --- /dev/null +++ b/packager/media/filters/vp9_parser.h @@ -0,0 +1,62 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef MEDIA_FILTERS_VP9_PARSER_H_ +#define MEDIA_FILTERS_VP9_PARSER_H_ + +#include +#include + +#include "packager/base/macros.h" +#include "packager/base/memory/scoped_ptr.h" +#include "packager/media/filters/vp_codec_configuration.h" + +namespace edash_packager { +namespace media { + +struct VPxFrameInfo { + size_t frame_size; + size_t uncompressed_header_size; + bool is_key_frame; + uint32_t width; + uint32_t height; +}; + +/// Class to parse a vp9 bit stream. +class VP9Parser { + public: + VP9Parser(); + ~VP9Parser(); + + /// Parse @a data with size @a data_size. + /// @param data_size Size of the sample in bytes. Note that it should be a + /// full sample. + /// @param[out] vpx_frames points to the list of VPx frames for the current + /// sample on success. Cannot be NULL. + /// @return true on success, false otherwise. + bool Parse(const uint8_t* data, + size_t data_size, + std::vector* vpx_frames); + + /// @return VPx codec configuration extracted. Note that it is only valid + /// after parsing a key frame or intra frame successfully. + const VPCodecConfiguration& codec_config() { return codec_config_; } + + private: + // Keep track of the current width and height. Note that they may change from + // frame to frame. + uint32_t width_; + uint32_t height_; + + VPCodecConfiguration codec_config_; + + DISALLOW_COPY_AND_ASSIGN(VP9Parser); +}; + +} // namespace media +} // namespace edash_packager + +#endif // MEDIA_FILTERS_VP9_PARSER_H_ diff --git a/packager/media/filters/vp9_parser_unittest.cc b/packager/media/filters/vp9_parser_unittest.cc new file mode 100644 index 0000000000..4c5eef64c5 --- /dev/null +++ b/packager/media/filters/vp9_parser_unittest.cc @@ -0,0 +1,227 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/filters/vp9_parser.h" + +#include +#include + +using ::testing::ElementsAre; + +namespace edash_packager { +namespace media { +namespace { +MATCHER_P5(EqualVPxFrame, + frame_size, + uncompressed_header_size, + is_key_frame, + width, + height, + "") { + *result_listener << "which is (" << arg.frame_size << ", " + << arg.uncompressed_header_size << ", " << arg.is_key_frame + << ", " << arg.width << ", " << arg.height << ")."; + return arg.frame_size == frame_size && + arg.uncompressed_header_size == uncompressed_header_size && + arg.is_key_frame == is_key_frame && arg.width == width && + arg.height == height; +} +} // namespace + +TEST(VP9ParserTest, Superframe) { + uint8_t data[] = { + 0x85, 0x00, 0x81, 0x25, 0x86, 0x0e, 0x09, 0x07, 0x06, 0x47, 0x00, 0x00, + 0xb4, 0x69, 0x29, 0x1f, 0x69, 0x46, 0x6d, 0xaf, 0x4c, 0x1f, 0xac, 0x8c, + 0x40, 0x7e, 0xb9, 0x52, 0xe3, 0x6f, 0xe9, 0x82, 0x23, 0x62, 0x9a, 0x40, + 0xda, 0x87, 0x21, 0x7f, 0x1f, 0xc8, 0xfe, 0x3f, 0xd1, 0xfc, 0x7f, 0xc1, + 0xbb, 0x3e, 0x77, 0xa4, 0xfc, 0x94, 0xa2, 0xfa, 0xa2, 0x00, 0x7a, 0xc3, + 0x87, 0x01, 0x02, 0x4b, 0x0a, 0x1c, 0x12, 0x0e, 0x0c, 0x75, 0x00, 0x01, + 0xa0, 0x69, 0x23, 0x0f, 0xd2, 0xf6, 0xfb, 0xb0, 0x6b, 0xf2, 0xab, 0x57, + 0xc3, 0x3a, 0xa5, 0x74, 0x4d, 0xb1, 0x48, 0xf4, 0x59, 0x0f, 0xf1, 0x7e, + 0x2f, 0x89, 0xf9, 0x00, 0xab, 0x7b, 0x01, 0x11, 0xd3, 0x8a, 0xe6, 0x8f, + 0xab, 0xeb, 0x5f, 0x57, 0xdd, 0x7f, 0x45, 0x31, 0xbb, 0x66, 0xee, 0xf5, + 0xbc, 0x85, 0xf1, 0xd0, 0x00, 0x7b, 0x80, 0xa7, 0x96, 0xbf, 0x8c, 0x21, + 0xc9, 0x3c, 0x00, 0x48, 0x00, 0xc9, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(data, arraysize(data), &frames)); + EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(60u, 13u, false, 0u, 0u), + EqualVPxFrame(72u, 13u, false, 0u, 0u))); + + // Corrupt super frame marker. + data[arraysize(data) - 6] = 0xc0; + ASSERT_FALSE(parser.Parse(data, arraysize(data), &frames)); +} + +TEST(VP9ParserTest, KeyframeChroma420) { + const uint8_t kData[] = { + 0x82, 0x49, 0x83, 0x42, 0x00, 0x01, 0xf0, 0x00, 0x74, 0x04, 0x38, 0x24, + 0x1c, 0x18, 0x34, 0x00, 0x00, 0x90, 0x3e, 0x9e, 0xe3, 0xe1, 0xdf, 0x9c, + 0x6c, 0x00, 0x00, 0x41, 0x4d, 0xe4, 0x39, 0x94, 0xcd, 0x7b, 0x78, 0x30, + 0x4e, 0xb5, 0xb1, 0x78, 0x40, 0x6f, 0xe5, 0x75, 0xa4, 0x28, 0x93, 0xf7, + 0x97, 0x9f, 0x4f, 0xdf, 0xbf, 0xfc, 0xe2, 0x73, 0xfa, 0xef, 0xab, 0xcd, + 0x2a, 0x93, 0xed, 0xfc, 0x17, 0x32, 0x8f, 0x40, 0x15, 0xfa, 0xd5, 0x3e, + 0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_EQ("vp09.00.00.08.00.01.00.00", + parser.codec_config().GetCodecString(kCodecVP9)); + EXPECT_THAT(frames, + ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, 32u, 8u))); +} + +TEST(VP9ParserTest, KeyframeProfile1Chroma422) { + const uint8_t kData[] = { + 0xa2, 0x49, 0x83, 0x42, 0x08, 0x01, 0x3e, 0x00, 0xb2, 0x80, 0xc7, 0x04, + 0x83, 0x83, 0x0e, 0x40, 0x00, 0x2e, 0x7c, 0x66, 0x79, 0xb9, 0xfd, 0x4f, + 0xc7, 0x86, 0xf7, 0xc3, 0xc0, 0x82, 0xb2, 0x3c, 0xd6, 0xc0, 0xd0, 0x8d, + 0xee, 0x00, 0x47, 0xe0, 0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, + 0x23, 0x9d, 0x6e, 0x5f, 0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, + 0x31, 0x07, 0xab, 0xc7, 0x11, 0x67, 0x95, 0x30, 0x37, 0x6d, 0xc5, 0xcf, + 0xa0, 0x96, 0xa7, 0xb8, 0xf4, 0xb4, 0x65, 0xff, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_EQ("vp09.01.00.08.00.02.00.00", + parser.codec_config().GetCodecString(kCodecVP9)); + EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, + 160u, 90u))); +} + +TEST(VP9ParserTest, KeyframeProfile2Chroma420) { + const uint8_t kData[] = { + 0x92, 0x49, 0x83, 0x42, 0x00, 0x04, 0xf8, 0x02, 0xca, 0x04, 0x1c, 0x12, + 0x0e, 0x0c, 0x3d, 0x00, 0x00, 0xa8, 0x7c, 0x66, 0x85, 0xb9, 0xfb, 0x3c, + 0xc9, 0xf0, 0xff, 0xde, 0xf8, 0x78, 0x10, 0x59, 0x5f, 0xaa, 0x6e, 0xf0, + 0x2a, 0x70, 0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, 0x23, 0x9d, + 0x6e, 0x5f, 0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, 0x31, 0x07, + 0xab, 0xc7, 0x11, 0x67, 0x95, 0x30, 0x37, 0xde, 0x13, 0x16, 0x83, 0x0b, + 0xa4, 0xdf, 0x05, 0xaf, 0x6f, 0xff, 0xd1, 0x74, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_EQ("vp09.02.00.10.00.01.00.00", + parser.codec_config().GetCodecString(kCodecVP9)); + EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 18u, true, + 160u, 90u))); +} + +TEST(VP9ParserTest, KeyframeProfile3Chroma444) { + uint8_t kData[] = { + 0xb1, 0x24, 0xc1, 0xa1, 0x40, 0x00, 0x4f, 0x80, 0x2c, 0xa0, 0x41, 0xc1, + 0x20, 0xe0, 0xc3, 0xf0, 0x00, 0x09, 0x00, 0x7c, 0x57, 0x77, 0x3f, 0x67, + 0x99, 0x3e, 0x1f, 0xfb, 0xdf, 0x0f, 0x02, 0x0a, 0x37, 0x81, 0x53, 0x80, + 0x00, 0x7e, 0x6f, 0xfe, 0x74, 0x31, 0xc6, 0x4f, 0x23, 0x9d, 0x6e, 0x5f, + 0xfc, 0xa8, 0xef, 0x67, 0xdc, 0xac, 0xf7, 0x3e, 0x31, 0x07, 0xab, 0xc7, + 0x0c, 0x74, 0x48, 0x8b, 0x95, 0x30, 0xc9, 0xf0, 0x37, 0x3b, 0xe6, 0x11, + 0xe1, 0xe6, 0xef, 0xff, 0xfd, 0xf7, 0x4f, 0x0f, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_EQ("vp09.03.00.12.00.03.00.00", + parser.codec_config().GetCodecString(kCodecVP9)); + EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 19u, true, 160u, 90u))); +} + +TEST(VP9ParserTest, Intra) { + const uint8_t kData[] = { + 0x84, 0xc9, 0x30, 0x68, 0x40, 0x20, 0x2b, 0xe0, 0x23, 0xe8, 0x18, 0x70, + 0x48, 0x38, 0x30, 0xd4, 0x00, 0x04, 0xc0, 0x64, 0x17, 0xe3, 0xd1, 0x7a, + 0x6f, 0x87, 0xfa, 0x3e, 0x1f, 0xe4, 0xd0, 0xc1, 0x56, 0xaf, 0x9d, 0xad, + 0xcb, 0x37, 0x00, 0xf7, 0x5d, 0x83, 0x80, 0x40, 0x0f, 0x9f, 0xd6, 0xbf, + 0xe2, 0xbd, 0x53, 0xd9, 0x00, 0x3a, 0x70, 0xe0, 0x00, 0x78, 0xea, 0xa5, + 0x61, 0x08, 0xb7, 0x9f, 0x33, 0xe5, 0xf8, 0xa5, 0x82, 0x32, 0xbb, 0xa3, + 0x75, 0xb4, 0x60, 0xf3, 0x39, 0x75, 0x1f, 0x2b, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_EQ("vp09.00.00.08.00.01.00.00", + parser.codec_config().GetCodecString(kCodecVP9)); + EXPECT_THAT(frames, ElementsAre(EqualVPxFrame(arraysize(kData), 19u, false, + 352u, 288u))); +} + +TEST(VP9ParserTest, ShowExisting) { + const uint8_t kData[] = {0x88}; + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_THAT(frames, + ElementsAre(EqualVPxFrame(arraysize(kData), 1u, false, 0u, 0u))); +} + +TEST(VP9ParserTest, Interframe) { + const uint8_t kData[] = { + 0x86, 0x00, 0x40, 0x92, 0x88, 0x2c, 0x49, 0xe0, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x78, 0xc9, 0x78, 0x71, 0x24, 0x4a, 0x59, 0x44, 0x61, 0xa6, 0x25, + 0xd4, 0x3e, 0xce, 0x00, 0x3a, 0x05, 0xfb, 0x9c, 0xf2, 0x4e, 0xd6, 0x1a, + 0x38, 0x94, 0x86, 0x17, 0x2a, 0x7b, 0x29, 0xbc, 0x22, 0x7e, 0xf8, 0xce, + 0x26, 0x00, 0xb9, 0xb4, 0xfd, 0x74, 0x39, 0x15, 0xaa, 0xe6, 0xe3, 0xb1, + 0xa0, 0xa6, 0x00, 0xf5, 0x6f, 0x57, 0x71, 0x4b, 0x69, 0xd2, 0xcc, 0x21, + 0x90, 0xeb, 0x8c, 0xad, 0x5f, 0x69, 0xb7, 0x9b, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_TRUE(parser.Parse(kData, arraysize(kData), &frames)); + EXPECT_THAT(frames, + ElementsAre(EqualVPxFrame(arraysize(kData), 10u, false, 0u, 0u))); +} + +TEST(VP9ParserTest, CorruptedFrameMarker) { + const uint8_t kData[] = {0xc8}; + VP9Parser parser; + std::vector frames; + ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); +} + +TEST(VP9ParserTest, CorruptedSynccode) { + const uint8_t kData[] = { + 0x82, 0x49, 0x84, 0x42, 0x00, 0x01, 0xf0, 0x00, 0x74, 0x04, 0x38, 0x24, + 0x1c, 0x18, 0x34, 0x00, 0x00, 0x90, 0x3e, 0x9e, 0xe3, 0xe1, 0xdf, 0x9c, + 0x6c, 0x00, 0x00, 0x41, 0x4d, 0xe4, 0x39, 0x94, 0xcd, 0x7b, 0x78, 0x30, + 0x4e, 0xb5, 0xb1, 0x78, 0x40, 0x6f, 0xe5, 0x75, 0xa4, 0x28, 0x93, 0xf7, + 0x97, 0x9f, 0x4f, 0xdf, 0xbf, 0xfc, 0xe2, 0x73, 0xfa, 0xef, 0xab, 0xcd, + 0x2a, 0x93, 0xed, 0xfc, 0x17, 0x32, 0x8f, 0x40, 0x15, 0xfa, 0xd5, 0x3e, + 0x35, 0x7a, 0x88, 0x69, 0xf7, 0x1f, 0x26, 0x8b, + }; + + VP9Parser parser; + std::vector frames; + ASSERT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); +} + +TEST(VP9ParserTest, NotEnoughBytesForFirstPartitionSize) { + const uint8_t kData[] = { + 0x82, 0x49, 0x83, 0x42, 0x04, 0xaf, 0xf0, 0x06, 0xbb, 0xdd, 0xf8, 0x03, + 0xfc, 0x00, 0x38, 0x24, 0x1c, 0x18, 0x00, 0x00, 0x03, 0x38, 0x7f, 0x8f, + 0xe8, 0xff, 0xf1, 0x3f, 0xf4, 0x1f, 0xc5, 0xfd, 0xff, 0xf2, 0x7f, 0xf8, + 0x4f, 0xc9, 0xff, 0x5d, 0xff, 0xca, 0xff, 0x91, 0xff, 0xb4, 0xff, 0xe1, + 0xff, 0xa1, 0xff, 0x2b, 0xff, 0xb8, 0xdb, 0x98, 0xff, 0x4b, 0xff, 0x19, + 0xff, 0x0d, 0xf9, 0xbf, 0xf0, 0xbf, 0xe4, 0x7f, 0xbb, 0xff, 0x54, 0x19, + 0x07, 0xf4, 0x7f, 0xc7, 0xff, 0x6d, 0xff, 0xeb, + }; + + VP9Parser parser; + std::vector frames; + EXPECT_FALSE(parser.Parse(kData, arraysize(kData), &frames)); +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/filters/vp_codec_configuration.cc b/packager/media/filters/vp_codec_configuration.cc index 49595d31f4..6318d49eba 100644 --- a/packager/media/filters/vp_codec_configuration.cc +++ b/packager/media/filters/vp_codec_configuration.cc @@ -74,10 +74,10 @@ bool VPCodecConfiguration::Parse(const std::vector& data) { uint16_t codec_initialization_data_size = 0; RCHECK(reader.ReadBits(16, &codec_initialization_data_size)); RCHECK(reader.bits_available() >= codec_initialization_data_size * 8); - const size_t kHeaderSize = 6u; // Size of bytes read so far. + const size_t header_size = data.size() - reader.bits_available() / 8; codec_initialization_data_.assign( - data.begin() + kHeaderSize, - data.begin() + kHeaderSize + codec_initialization_data_size); + data.begin() + header_size, + data.begin() + header_size + codec_initialization_data_size); return true; } diff --git a/packager/media/filters/vp_codec_configuration.h b/packager/media/filters/vp_codec_configuration.h index 14481cdc24..398c1bdd84 100644 --- a/packager/media/filters/vp_codec_configuration.h +++ b/packager/media/filters/vp_codec_configuration.h @@ -20,6 +20,23 @@ namespace media { /// Class for parsing or writing VP codec configuration data. class VPCodecConfiguration { public: + enum ColorSpace { + COLOR_SPACE_UNSPECIFIED = 0, + COLOR_SPACE_BT_601 = 1, + COLOR_SPACE_BT_709 = 2, + COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE = 3, + COLOR_SPACE_BT_2020_CONSTANT_LUMINANCE = 4, + COLOR_SPACE_SRGB = 5, + }; + + enum ChromaSubsampling { + CHROMA_420_VERTICAL = 0, + CHROMA_420_COLLOCATED_WITH_LUMA = 1, + CHROMA_422 = 2, + CHROMA_444 = 3, + CHROMA_440 = 4, + }; + VPCodecConfiguration(); VPCodecConfiguration(uint8_t profile, uint8_t level, @@ -42,6 +59,20 @@ class VPCodecConfiguration { /// @return The codec string. std::string GetCodecString(VideoCodec codec) const; + void set_profile(uint8_t profile) { profile_ = profile; } + void set_level(uint8_t level) { level_ = level; } + void set_bit_depth(uint8_t bit_depth) { bit_depth_ = bit_depth; } + void set_color_space(uint8_t color_space) { color_space_ = color_space; } + void set_chroma_subsampling(uint8_t chroma_subsampling) { + chroma_subsampling_ = chroma_subsampling; + } + void set_transfer_function(uint8_t transfer_function) { + transfer_function_ = transfer_function; + } + void set_video_full_range_flag(bool video_full_range_flag) { + video_full_range_flag_ = video_full_range_flag; + } + uint8_t profile() const { return profile_; } uint8_t level() const { return level_; } uint8_t bit_depth() const { return bit_depth_; }