Parse vui_parameters in H.265.

Parsing vui_parameters in the SPS is required to be able to extract
the resolution of the video.  This also adds a method to extract
the resolution to the H.265 parser.

Issue #46

Change-Id: Idf498d2afdb36a689490151a4ae6baef0b6b73f6
This commit is contained in:
Jacob Trimble 2016-04-13 14:46:13 -07:00
parent 2756902934
commit bbf9c6849b
4 changed files with 359 additions and 3 deletions

View File

@ -49,9 +49,13 @@ bool H265ByteToUnitStreamConverter::GetDecoderConfigurationRecord(
// Skip Nalu header (2) and the first byte of the SPS to get the // Skip Nalu header (2) and the first byte of the SPS to get the
// profile_tier_level. // profile_tier_level.
buffer.AppendArray(&last_sps_[2+1], 12); buffer.AppendArray(&last_sps_[2+1], 12);
// min_spacial_segmentation_idc = 0 (Unknown)
// TODO(modmaker): Parse vui_parameters and update this. // The default value for this field is 0, which is Unknown.
buffer.AppendInt(static_cast<uint16_t>(0xf000)); int min_spatial_segmentation_idc =
sps->vui_parameters.min_spatial_segmentation_idc;
buffer.AppendInt(
static_cast<uint16_t>(0xf000 | min_spatial_segmentation_idc));
buffer.AppendInt(static_cast<uint8_t>(0xfc) /* parallelismType = 0 */); buffer.AppendInt(static_cast<uint8_t>(0xfc) /* parallelismType = 0 */);
buffer.AppendInt(static_cast<uint8_t>(0xfc | sps->chroma_format_idc)); buffer.AppendInt(static_cast<uint8_t>(0xfc | sps->chroma_format_idc));
buffer.AppendInt(static_cast<uint8_t>(0xf8 | sps->bit_depth_luma_minus8)); buffer.AppendInt(static_cast<uint8_t>(0xf8 | sps->bit_depth_luma_minus8));

View File

@ -11,6 +11,7 @@
#include "packager/base/logging.h" #include "packager/base/logging.h"
#include "packager/base/stl_util.h" #include "packager/base/stl_util.h"
#include "packager/media/base/macros.h"
#include "packager/media/filters/nalu_reader.h" #include "packager/media/filters/nalu_reader.h"
#define TRUE_OR_RETURN(a) \ #define TRUE_OR_RETURN(a) \
@ -51,8 +52,99 @@ int GetNumPicTotalCurr(const H265SliceHeader& slice_header,
return num_pic_total_curr + slice_header.used_by_curr_pic_lt; return num_pic_total_curr + slice_header.used_by_curr_pic_lt;
} }
void GetAspectRatioInfo(const H265Sps& sps,
uint32_t* pixel_width,
uint32_t* pixel_height) {
// The default value is 0; so if this is not in the SPS, it will correctly
// assume unspecified.
int aspect_ratio_idc = sps.vui_parameters.aspect_ratio_idc;
// Table E.1
switch (aspect_ratio_idc) {
case 1: *pixel_width = 1; *pixel_height = 1; break;
case 2: *pixel_width = 12; *pixel_height = 11; break;
case 3: *pixel_width = 10; *pixel_height = 11; break;
case 4: *pixel_width = 16; *pixel_height = 11; break;
case 5: *pixel_width = 40; *pixel_height = 33; break;
case 6: *pixel_width = 24; *pixel_height = 11; break;
case 7: *pixel_width = 20; *pixel_height = 11; break;
case 8: *pixel_width = 32; *pixel_height = 11; break;
case 9: *pixel_width = 80; *pixel_height = 33; break;
case 10: *pixel_width = 18; *pixel_height = 11; break;
case 11: *pixel_width = 15; *pixel_height = 11; break;
case 12: *pixel_width = 64; *pixel_height = 33; break;
case 13: *pixel_width = 160; *pixel_height = 99; break;
case 14: *pixel_width = 4; *pixel_height = 3; break;
case 15: *pixel_width = 3; *pixel_height = 2; break;
case 16: *pixel_width = 2; *pixel_height = 1; break;
case 255:
*pixel_width = sps.vui_parameters.sar_width;
*pixel_height = sps.vui_parameters.sar_height;
break;
default:
// Section E.3.1 specifies that other values should be interpreted as 0.
LOG(WARNING) << "Unknown aspect_ratio_idc " << aspect_ratio_idc;
FALLTHROUGH_INTENDED;
case 0:
// Unlike the spec, assume 1:1 if not specified.
*pixel_width = 1;
*pixel_height = 1;
break;
}
}
} // namespace } // namespace
bool ExtractResolutionFromSps(const H265Sps& sps,
uint32_t* coded_width,
uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height) {
int crop_x = 0;
int crop_y = 0;
if (sps.conformance_window_flag) {
int sub_width_c = 0;
int sub_height_c = 0;
// Table 6-1
switch (sps.chroma_format_idc) {
case 0: // Monochrome
sub_width_c = 1;
sub_height_c = 1;
break;
case 1: // 4:2:0
sub_width_c = 2;
sub_height_c = 2;
break;
case 2: // 4:2:2
sub_width_c = 2;
sub_height_c = 1;
break;
case 3: // 4:4:4
sub_width_c = 1;
sub_height_c = 1;
break;
default:
LOG(ERROR) << "Unexpected chroma_format_idc " << sps.chroma_format_idc;
return false;
}
// Formula D-28, D-29
crop_x =
sub_width_c * (sps.conf_win_right_offset + sps.conf_win_left_offset);
crop_y =
sub_height_c * (sps.conf_win_bottom_offset + sps.conf_win_top_offset);
}
// Formula D-28, D-29
*coded_width = sps.pic_width_in_luma_samples - crop_x;
*coded_height = sps.pic_height_in_luma_samples - crop_y;
GetAspectRatioInfo(sps, pixel_width, pixel_height);
return true;
}
H265Pps::H265Pps() {} H265Pps::H265Pps() {}
H265Pps::~H265Pps() {} H265Pps::~H265Pps() {}
@ -519,6 +611,12 @@ H265Parser::Result H265Parser::ParseSps(const Nalu& nalu, int* sps_id) {
TRUE_OR_RETURN(br->ReadBool(&sps->temporal_mvp_enabled_flag)); TRUE_OR_RETURN(br->ReadBool(&sps->temporal_mvp_enabled_flag));
TRUE_OR_RETURN(br->ReadBool(&sps->strong_intra_smoothing_enabled_flag)); TRUE_OR_RETURN(br->ReadBool(&sps->strong_intra_smoothing_enabled_flag));
TRUE_OR_RETURN(br->ReadBool(&sps->vui_parameters_present));
if (sps->vui_parameters_present) {
OK_OR_RETURN(ParseVuiParameters(sps->max_sub_layers_minus1, br,
&sps->vui_parameters));
}
// Ignore remaining extension data. // Ignore remaining extension data.
// This will replace any existing SPS instance. // This will replace any existing SPS instance.
@ -537,6 +635,98 @@ const H265Sps* H265Parser::GetSps(int sps_id) {
return active_spses_[sps_id]; return active_spses_[sps_id];
} }
H265Parser::Result H265Parser::ParseVuiParameters(int max_num_sub_layers_minus1,
H26xBitReader* br,
H265VuiParameters* vui) {
// Reads whole element but ignores most of it.
int ignored;
TRUE_OR_RETURN(br->ReadBool(&vui->aspect_ratio_info_present_flag));
if (vui->aspect_ratio_info_present_flag) {
TRUE_OR_RETURN(br->ReadBits(8, &vui->aspect_ratio_idc));
if (vui->aspect_ratio_idc == H265VuiParameters::kExtendedSar) {
TRUE_OR_RETURN(br->ReadBits(16, &vui->sar_width));
TRUE_OR_RETURN(br->ReadBits(16, &vui->sar_height));
}
}
bool overscan_info_present_flag;
TRUE_OR_RETURN(br->ReadBool(&overscan_info_present_flag));
if (overscan_info_present_flag) {
TRUE_OR_RETURN(br->SkipBits(1)); // overscan_appropriate_flag
}
bool video_signal_type_present_flag;
TRUE_OR_RETURN(br->ReadBool(&video_signal_type_present_flag));
if (video_signal_type_present_flag) {
TRUE_OR_RETURN(br->SkipBits(3)); // video_format
TRUE_OR_RETURN(br->SkipBits(1)); // video_full_range_flag
bool colour_description_present_flag;
TRUE_OR_RETURN(br->ReadBool(&colour_description_present_flag));
if (colour_description_present_flag) {
// colour_primaries, transfer_characteristics, matrix_coeffs
TRUE_OR_RETURN(br->SkipBits(8 + 8 + 8));
}
}
bool chroma_loc_info_present_flag;
TRUE_OR_RETURN(br->ReadBool(&chroma_loc_info_present_flag));
if (chroma_loc_info_present_flag) {
// chroma_sample_log_type_top_field, chroma_sample_log_type_bottom_field
TRUE_OR_RETURN(br->ReadUE(&ignored));
TRUE_OR_RETURN(br->ReadUE(&ignored));
}
// neutral_chroma_indication_flag, field_seq_flag,
// frame_field_info_present_flag.
TRUE_OR_RETURN(br->SkipBits(3));
bool default_display_window_flag;
TRUE_OR_RETURN(br->ReadBool(&default_display_window_flag));
if (default_display_window_flag) {
TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_left_offset
TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_right_offset
TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_top_offset
TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_bottom_offset
}
bool vui_timing_info_present_flag;
TRUE_OR_RETURN(br->ReadBool(&vui_timing_info_present_flag));
if (vui_timing_info_present_flag) {
// vui_num_units_in_tick, vui_time_scale
TRUE_OR_RETURN(br->SkipBits(32 + 32));
bool vui_poc_proportional_to_timing_flag;
TRUE_OR_RETURN(br->ReadBool(&vui_poc_proportional_to_timing_flag));
if (vui_poc_proportional_to_timing_flag) {
// vui_num_ticks_poc_diff_one_minus1
TRUE_OR_RETURN(br->ReadUE(&ignored));
}
bool vui_hdr_parameters_present_flag;
TRUE_OR_RETURN(br->ReadBool(&vui_hdr_parameters_present_flag));
if (vui_hdr_parameters_present_flag) {
OK_OR_RETURN(SkipHrdParameters(max_num_sub_layers_minus1, br));
}
}
TRUE_OR_RETURN(br->ReadBool(&vui->bitstream_restriction_flag));
if (vui->bitstream_restriction_flag) {
// tiles_fixed_structure_flag, motion_vectors_over_pic_boundaries_flag,
// restricted_ref_pic_lists_flag.
TRUE_OR_RETURN(br->SkipBits(3));
TRUE_OR_RETURN(br->ReadUE(&vui->min_spatial_segmentation_idc));
TRUE_OR_RETURN(br->ReadUE(&ignored)); // max_bytes_per_pic_denom
TRUE_OR_RETURN(br->ReadUE(&ignored)); // max_bits_per_min_cu_denum
TRUE_OR_RETURN(br->ReadUE(&ignored)); // log2_max_mv_length_horizontal
TRUE_OR_RETURN(br->ReadUE(&ignored)); // log2_max_mv_length_vertical
}
return kOk;
}
H265Parser::Result H265Parser::ParseReferencePictureSet( H265Parser::Result H265Parser::ParseReferencePictureSet(
int num_short_term_ref_pic_sets, int num_short_term_ref_pic_sets,
int st_rps_idx, int st_rps_idx,
@ -843,5 +1033,84 @@ H265Parser::Result H265Parser::SkipScalingListData(H26xBitReader* br) {
return kOk; return kOk;
} }
H265Parser::Result H265Parser::SkipHrdParameters(int max_num_sub_layers_minus1,
H26xBitReader* br) {
// common_inf_present_flag is always 1 when parsing vui_parameters.
const bool common_inf_present_flag = true;
int ignored;
bool nal_hdr_parameters_present_flag;
bool vcl_hdr_parameters_present_flag;
bool sub_pic_hdr_params_present_flag = false;
if (common_inf_present_flag) {
TRUE_OR_RETURN(br->ReadBool(&nal_hdr_parameters_present_flag));
TRUE_OR_RETURN(br->ReadBool(&vcl_hdr_parameters_present_flag));
if (nal_hdr_parameters_present_flag || vcl_hdr_parameters_present_flag) {
TRUE_OR_RETURN(br->ReadBool(&sub_pic_hdr_params_present_flag));
if (sub_pic_hdr_params_present_flag) {
// tick_divisor_minus2, du_cpb_removal_delay_increment_length_minus1,
// sub_pic_cpb_params_in_pic_timing_sei_flag
// dpb_output_delay_du_length_minus1
TRUE_OR_RETURN(br->SkipBits(8 + 5 + 1 + 5));
}
// bit_rate_scale, cpb_size_scale
TRUE_OR_RETURN(br->SkipBits(4 + 4));
if (sub_pic_hdr_params_present_flag)
TRUE_OR_RETURN(br->SkipBits(4)); // cpb_size_du_scale
// initial_cpb_removal_delay_length_minus1,
// au_cpb_removal_delay_length_minus1, dpb_output_delay_length_minus1
TRUE_OR_RETURN(br->SkipBits(5 + 5 + 5));
}
}
for (int i = 0; i <= max_num_sub_layers_minus1; i++) {
bool fixed_pic_rate_general_flag;
bool fixed_pic_rate_within_cvs_flag = true;
bool low_delay_hdr_flag = false;
int cpb_cnt_minus1 = 0;
TRUE_OR_RETURN(br->ReadBool(&fixed_pic_rate_general_flag));
if (!fixed_pic_rate_general_flag)
TRUE_OR_RETURN(br->ReadBool(&fixed_pic_rate_within_cvs_flag));
if (fixed_pic_rate_within_cvs_flag)
TRUE_OR_RETURN(br->ReadUE(&ignored)); // elemental_duration_ic_tc_minus1
else
TRUE_OR_RETURN(br->ReadBool(&low_delay_hdr_flag));
if (!low_delay_hdr_flag)
TRUE_OR_RETURN(br->ReadUE(&cpb_cnt_minus1));
if (nal_hdr_parameters_present_flag) {
OK_OR_RETURN(SkipSubLayerHrdParameters(
cpb_cnt_minus1, sub_pic_hdr_params_present_flag, br));
}
if (vcl_hdr_parameters_present_flag) {
OK_OR_RETURN(SkipSubLayerHrdParameters(
cpb_cnt_minus1, sub_pic_hdr_params_present_flag, br));
}
}
return kOk;
}
H265Parser::Result H265Parser::SkipSubLayerHrdParameters(
int cpb_cnt_minus1,
bool sub_pic_hdr_params_present_flag,
H26xBitReader* br) {
int ignored;
for (int i = 0; i <= cpb_cnt_minus1; i++) {
TRUE_OR_RETURN(br->ReadUE(&ignored)); // bit_rate_value_minus1
TRUE_OR_RETURN(br->ReadUE(&ignored)); // cpb_size_value_minus1
if (sub_pic_hdr_params_present_flag) {
TRUE_OR_RETURN(br->ReadUE(&ignored)); // cpb_size_du_value_minus1
TRUE_OR_RETURN(br->ReadUE(&ignored)); // bit_rate_du_value_minus1
}
TRUE_OR_RETURN(br->SkipBits(1)); // cbr_flag
}
return kOk;
}
} // namespace media } // namespace media
} // namespace edash_packager } // namespace edash_packager

View File

@ -22,6 +22,16 @@ enum H265SliceType { kBSlice = 0, kPSlice = 1, kISlice = 2 };
const int kMaxRefPicSetCount = 16; const int kMaxRefPicSetCount = 16;
// On success, |coded_width| and |coded_height| contains coded resolution after
// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is
// assigned if it is not present in SPS.
struct H265Sps;
bool ExtractResolutionFromSps(const H265Sps& sps,
uint32_t* coded_width,
uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height);
struct H265ReferencePictureSet { struct H265ReferencePictureSet {
int delta_poc_s0[kMaxRefPicSetCount]; int delta_poc_s0[kMaxRefPicSetCount];
int delta_poc_s1[kMaxRefPicSetCount]; int delta_poc_s1[kMaxRefPicSetCount];
@ -33,6 +43,20 @@ struct H265ReferencePictureSet {
int num_delta_pocs; int num_delta_pocs;
}; };
struct H265VuiParameters {
enum { kExtendedSar = 255 };
bool aspect_ratio_info_present_flag = false;
int aspect_ratio_idc = 0;
int sar_width = 0;
int sar_height = 0;
bool bitstream_restriction_flag = false;
int min_spatial_segmentation_idc = 0;
// Incomplete...
};
struct H265Pps { struct H265Pps {
H265Pps(); H265Pps();
~H265Pps(); ~H265Pps();
@ -162,6 +186,9 @@ struct H265Sps {
bool temporal_mvp_enabled_flag = false; bool temporal_mvp_enabled_flag = false;
bool strong_intra_smoothing_enabled_flag = false; bool strong_intra_smoothing_enabled_flag = false;
bool vui_parameters_present = false;
H265VuiParameters vui_parameters;
// Ignored: extensions... // Ignored: extensions...
}; };
@ -277,6 +304,10 @@ class H265Parser {
const H265Sps* GetSps(int sps_id); const H265Sps* GetSps(int sps_id);
private: private:
Result ParseVuiParameters(int max_num_sub_layers_minus1,
H26xBitReader* br,
H265VuiParameters* vui);
Result ParseReferencePictureSet( Result ParseReferencePictureSet(
int num_short_term_ref_pic_sets, int num_short_term_ref_pic_sets,
int st_rpx_idx, int st_rpx_idx,
@ -305,6 +336,12 @@ class H265Parser {
Result SkipScalingListData(H26xBitReader* br); Result SkipScalingListData(H26xBitReader* br);
Result SkipHrdParameters(int max_num_sub_layers_minus1, H26xBitReader* br);
Result SkipSubLayerHrdParameters(int cpb_cnt_minus1,
bool sub_pic_hdr_params_present_flag,
H26xBitReader* br);
typedef std::map<int, H265Sps*> SpsById; typedef std::map<int, H265Sps*> SpsById;
typedef std::map<int, H265Pps*> PpsById; typedef std::map<int, H265Pps*> PpsById;

View File

@ -125,6 +125,52 @@ TEST(H265ParserTest, ParsePps) {
EXPECT_EQ(0, pps->log2_parallel_merge_level_minus2); EXPECT_EQ(0, pps->log2_parallel_merge_level_minus2);
} }
TEST(H265ParserTest, ExtractResolutionFromSpsData) {
H265Parser parser;
int sps_id = 0;
Nalu nalu;
ASSERT_TRUE(nalu.InitializeFromH265(kSpsData, arraysize(kSpsData)));
ASSERT_EQ(H265Parser::kOk, parser.ParseSps(nalu, &sps_id));
uint32_t coded_width = 0;
uint32_t coded_height = 0;
uint32_t pixel_width = 0;
uint32_t pixel_height = 0;
ASSERT_TRUE(ExtractResolutionFromSps(*parser.GetSps(sps_id), &coded_width,
&coded_height, &pixel_width,
&pixel_height));
EXPECT_EQ(640u, coded_width);
EXPECT_EQ(360u, coded_height);
EXPECT_EQ(1u, pixel_width);
EXPECT_EQ(1u, pixel_height);
}
TEST(H265ParserTest, ExtractResolutionFromSpsDataWithCrop) {
const uint8_t kSpsCropData[] = {
0x42, 0x01, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03, 0x00, 0x90, 0x00,
0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x3c, 0xa0, 0x0f, 0x08, 0x0f,
0x16, 0x59, 0x99, 0xa4, 0x93, 0x2b, 0xff, 0xc0, 0xd5, 0xc0, 0xd6,
0x40, 0x40, 0x00, 0x00, 0x03, 0x00, 0x40, 0x00, 0x00, 0x06, 0x02,
};
H265Parser parser;
int sps_id = 0;
Nalu nalu;
ASSERT_TRUE(nalu.InitializeFromH265(kSpsCropData, arraysize(kSpsCropData)));
ASSERT_EQ(H265Parser::kOk, parser.ParseSps(nalu, &sps_id));
uint32_t coded_width = 0;
uint32_t coded_height = 0;
uint32_t pixel_width = 0;
uint32_t pixel_height = 0;
ASSERT_TRUE(ExtractResolutionFromSps(*parser.GetSps(sps_id), &coded_width,
&coded_height, &pixel_width,
&pixel_height));
EXPECT_EQ(480u, coded_width);
EXPECT_EQ(240u, coded_height);
EXPECT_EQ(855u, pixel_width);
EXPECT_EQ(857u, pixel_height);
}
} // namespace H265 } // namespace H265
} // namespace media } // namespace media
} // namespace edash_packager } // namespace edash_packager