Fix incorrect width/height in VisualSampleEntry for mp2t stream with cropping

Cropping is necessary if width/height are not exactly disivible by 16 due
to 16x16 macroblock size.

The width and height fields in VisualSampleEntry must correctly document
the cropped frame dimentions (visual presentation size) of the AVC stream
that is described by that entry.

Also fixes width/height in input mp4 and wvm streams if the values in
VisualSampleEntry / metadata do not match with the values in
AVCDecoderConfigurationRecord (SPS).

Issue: 39

Change-Id: Id55c5acf245bee3f4e66302b2042eb7d9c488c19
This commit is contained in:
Kongqun Yang 2015-09-16 15:22:22 -07:00 committed by Gerrit Code Review
parent c75d319985
commit 3a5fdd2d9c
7 changed files with 229 additions and 68 deletions

View File

@ -67,6 +67,8 @@ class VideoStreamInfo : public StreamInfo {
uint8_t nalu_length_size() const { return nalu_length_size_; } uint8_t nalu_length_size() const { return nalu_length_size_; }
int16_t trick_play_rate() const { return trick_play_rate_; } int16_t trick_play_rate() const { return trick_play_rate_; }
void set_width(uint32_t width) { width_ = width; }
void set_height(uint32_t height) { height_ = height; }
void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; } void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; }
void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; } void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; }

View File

@ -16,14 +16,16 @@ namespace media {
do { \ do { \
if (!(x)) { \ if (!(x)) { \
LOG(ERROR) << "Failure while parsing AVCDecoderConfig: " << #x; \ LOG(ERROR) << "Failure while parsing AVCDecoderConfig: " << #x; \
return; \ return false; \
} \ } \
} while (0) } while (0)
void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data,
size_t avc_decoder_config_data_size, size_t avc_decoder_config_data_size,
uint32_t* sar_width, uint32_t* coded_width,
uint32_t* sar_height) { uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height) {
BufferReader reader(avc_decoder_config_data, avc_decoder_config_data_size); BufferReader reader(avc_decoder_config_data, avc_decoder_config_data_size);
uint8_t value = 0; uint8_t value = 0;
// version check, must be 1. // version check, must be 1.
@ -52,33 +54,96 @@ void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data,
const uint8_t num_sps = value & 0x1F; const uint8_t num_sps = value & 0x1F;
if (num_sps < 1) { if (num_sps < 1) {
LOG(ERROR) << "No SPS found."; LOG(ERROR) << "No SPS found.";
return; return false;
} }
uint16_t sps_length = 0; uint16_t sps_length = 0;
RCHECK(reader.Read2(&sps_length)); RCHECK(reader.Read2(&sps_length));
ExtractSarFromSps(reader.data() + reader.pos(), sps_length, sar_width, return ExtractResolutionFromSpsData(reader.data() + reader.pos(), sps_length,
sar_height); coded_width, coded_height, pixel_width,
pixel_height);
// It is unlikely to have more than one SPS in practice. Also there's // It is unlikely to have more than one SPS in practice. Also there's
// no way to change the sar_{width,height} dynamically from VideoStreamInfo. // no way to change the {coded,pixel}_{width,height} dynamically from
// So skip the rest (if there are any). // VideoStreamInfo. So skip the rest (if there are any).
} }
void ExtractSarFromSps(const uint8_t* sps_data, bool ExtractResolutionFromSpsData(const uint8_t* sps_data,
size_t sps_data_size, size_t sps_data_size,
uint32_t* sar_width, uint32_t* coded_width,
uint32_t* sar_height) { uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height) {
H264Parser parser; H264Parser parser;
int sps_id; int sps_id;
RCHECK(parser.ParseSPSFromArray(sps_data, sps_data_size, &sps_id) == RCHECK(parser.ParseSPSFromArray(sps_data, sps_data_size, &sps_id) ==
H264Parser::kOk); H264Parser::kOk);
const H264SPS& sps = *parser.GetSPS(sps_id); return ExtractResolutionFromSps(*parser.GetSPS(sps_id), coded_width,
coded_height, pixel_width, pixel_height);
}
// Implemented according to ISO/IEC 14496-10:2005 7.4.2.1 Sequence parameter set
// RBSP semantics.
bool ExtractResolutionFromSps(const H264SPS& sps,
uint32_t* coded_width,
uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height) {
int crop_x = 0;
int crop_y = 0;
if (sps.frame_cropping_flag) {
int sub_width_c = 0;
int sub_height_c = 0;
// Table 6-1.
switch (sps.chroma_format_idc) {
case 0: // monochrome
// SubWidthC and SubHeightC are not defined for monochrome. For ease of
// computation afterwards, assign both to 1.
sub_width_c = 1;
sub_height_c = 1;
break;
case 1: // 4:2:0
sub_width_c = 2;
sub_height_c = 2;
break;
case 2: // 4:2:2
sub_width_c = 2;
sub_height_c = 1;
break;
case 3: // 4:4:4
sub_width_c = 1;
sub_height_c = 1;
break;
default:
LOG(ERROR) << "Unexpected chroma_format_idc " << sps.chroma_format_idc;
return false;
}
// Formula 7-16, 7-17, 7-18, 7-19.
int crop_unit_x = sub_width_c;
int crop_unit_y = sub_height_c * (2 - (sps.frame_mbs_only_flag ? 1 : 0));
crop_x = crop_unit_x *
(sps.frame_crop_left_offset + sps.frame_crop_right_offset);
crop_y = crop_unit_y *
(sps.frame_crop_top_offset + sps.frame_crop_bottom_offset);
}
// Formula 7-10, 7-11.
int pic_width_in_mbs = sps.pic_width_in_mbs_minus1 + 1;
*coded_width = pic_width_in_mbs * 16 - crop_x;
// Formula 7-13, 7-15.
int pic_height_in_mbs = (2 - (sps.frame_mbs_only_flag ? 1 : 0)) *
(sps.pic_height_in_map_units_minus1 + 1);
*coded_height = pic_height_in_mbs * 16 - crop_y;
// 0 means it wasn't in the SPS and therefore assume 1. // 0 means it wasn't in the SPS and therefore assume 1.
*sar_width = sps.sar_width == 0 ? 1 : sps.sar_width; *pixel_width = sps.sar_width == 0 ? 1 : sps.sar_width;
*sar_height = sps.sar_height == 0 ? 1 : sps.sar_height; *pixel_height = sps.sar_height == 0 ? 1 : sps.sar_height;
DVLOG(2) << "Found sar_width: " << *sar_width DVLOG(2) << "Found coded_width: " << *coded_width
<< " sar_height: " << *sar_height; << " coded_height: " << *coded_height
<< " pixel_width: " << *pixel_width
<< " pixel_height: " << *pixel_height;
return true;
} }
#undef RCHECK #undef RCHECK

View File

@ -19,22 +19,32 @@ namespace media {
// |avc_decoder_config_data| must be AVCDecoderConfigurationRecord specified // |avc_decoder_config_data| must be AVCDecoderConfigurationRecord specified
// in ISO/IEC 14496-15. // in ISO/IEC 14496-15.
// On success, |sar_width| and |sar_height| are set to the data specified in // On success, |coded_width| and |coded_height| contains coded resolution after
// |avc_decoder_config_data|. // cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is
// If the value is 0 or ommited in |avc_decoder_config_data|, then 1 is // assigned if it is not present.
// assigned. bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data,
void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, size_t avc_decoder_config_data_size,
size_t avc_decoder_config_data_size, uint32_t* coded_width,
uint32_t* sar_width, uint32_t* coded_height,
uint32_t* sar_height); uint32_t* pixel_width,
uint32_t* pixel_height);
// |sps_data| must be a valid SPS specified in ISO/IEC 14496-10. // |sps_data| must be a valid SPS specified in ISO/IEC 14496-10.
// On success, |sar_width| and |sar_height| contain pixel width and height // On success, |coded_width| and |coded_height| contains coded resolution after
// respectively. // cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is
void ExtractSarFromSps(const uint8_t* sps_data, // assigned if it is not present in SPS.
size_t sps_data_size, bool ExtractResolutionFromSpsData(const uint8_t* sps_data,
uint32_t* sar_width, size_t sps_data_size,
uint32_t* sar_height); uint32_t* coded_width,
uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height);
struct H264SPS;
bool ExtractResolutionFromSps(const H264SPS& sps,
uint32_t* coded_width,
uint32_t* coded_height,
uint32_t* pixel_width,
uint32_t* pixel_height);
// For explanations of each struct and its members, see H.264 specification // For explanations of each struct and its members, see H.264 specification
// at http://www.itu.int/rec/T-REC-H.264. // at http://www.itu.int/rec/T-REC-H.264.

View File

@ -65,33 +65,63 @@ TEST(H264ParserTest, StreamFileParsing) {
} }
} }
TEST(H264ParserTest, ExtractSarFromDecoderConfig) { TEST(H264ParserTest, ExtractResolutionFromDecoderConfig) {
const uint8_t kDecoderConfig[] = { const uint8_t kDecoderConfig[] = {
0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E, 0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E,
0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D,
0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0}; 0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0};
uint32_t coded_width = 0;
uint32_t coded_height = 0;
uint32_t pixel_width = 0; uint32_t pixel_width = 0;
uint32_t pixel_height = 0; uint32_t pixel_height = 0;
ExtractSarFromDecoderConfig(kDecoderConfig, arraysize(kDecoderConfig), ASSERT_TRUE(ExtractResolutionFromDecoderConfig(
&pixel_width, &pixel_height); kDecoderConfig, arraysize(kDecoderConfig), &coded_width, &coded_height,
&pixel_width, &pixel_height));
EXPECT_EQ(720u, coded_width);
EXPECT_EQ(360u, coded_height);
EXPECT_EQ(8u, pixel_width); EXPECT_EQ(8u, pixel_width);
EXPECT_EQ(9u, pixel_height); EXPECT_EQ(9u, pixel_height);
} }
TEST(H264ParserTest, ExtractSarFromSps) { TEST(H264ParserTest, ExtractResolutionFromSpsData) {
const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4,
0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
0x60, 0x0F, 0x16, 0x2D, 0x96}; 0x60, 0x0F, 0x16, 0x2D, 0x96};
uint32_t coded_width = 0;
uint32_t coded_height = 0;
uint32_t pixel_width = 0; uint32_t pixel_width = 0;
uint32_t pixel_height = 0; uint32_t pixel_height = 0;
ExtractSarFromSps(kSps, arraysize(kSps), &pixel_width, &pixel_height); ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width,
&coded_height, &pixel_width,
&pixel_height));
EXPECT_EQ(720u, coded_width);
EXPECT_EQ(360u, coded_height);
EXPECT_EQ(8u, pixel_width); EXPECT_EQ(8u, pixel_width);
EXPECT_EQ(9u, pixel_height); EXPECT_EQ(9u, pixel_height);
} }
TEST(H264ParserTest, ExtractResolutionFromSpsDataWithCropping) {
// 320x192 with frame_crop_bottom_offset of 6.
const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x0C, 0xAC, 0xD9, 0x41, 0x41, 0x9F,
0x9F, 0x01, 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00,
0x00, 0x03, 0x03, 0x00, 0xF1, 0x42, 0x99, 0x60};
uint32_t coded_width = 0;
uint32_t coded_height = 0;
uint32_t pixel_width = 0;
uint32_t pixel_height = 0;
ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width,
&coded_height, &pixel_width,
&pixel_height));
EXPECT_EQ(320u, coded_width);
EXPECT_EQ(180u, coded_height);
EXPECT_EQ(1u, pixel_width);
EXPECT_EQ(1u, pixel_height);
}
} // namespace media } // namespace media
} // namespace edash_packager } // namespace edash_packager

View File

@ -345,10 +345,15 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
return true; return true;
} }
// TODO: a MAP unit can be either 16 or 32 pixels. uint32_t coded_width = 0;
// although it's 16 pixels for progressive non MBAFF frames. uint32_t coded_height = 0;
uint16_t width = (sps->pic_width_in_mbs_minus1 + 1) * 16; uint32_t pixel_width = 0;
uint16_t height = (sps->pic_height_in_map_units_minus1 + 1) * 16; uint32_t pixel_height = 0;
if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width,
&pixel_height)) {
LOG(ERROR) << "Failed to parse SPS.";
return false;
}
last_video_decoder_config_ = scoped_refptr<StreamInfo>( last_video_decoder_config_ = scoped_refptr<StreamInfo>(
new VideoStreamInfo( new VideoStreamInfo(
@ -361,10 +366,10 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
decoder_config_record[2], decoder_config_record[2],
decoder_config_record[3]), decoder_config_record[3]),
std::string(), std::string(),
width, coded_width,
height, coded_height,
sps->sar_width == 0 ? 1 : sps->sar_width, pixel_width,
sps->sar_height == 0 ? 1 : sps->sar_height, pixel_height,
0, 0,
H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize, H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
decoder_config_record.data(), decoder_config_record.data(),
@ -372,12 +377,7 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
false)); false));
DVLOG(1) << "Profile IDC: " << sps->profile_idc; DVLOG(1) << "Profile IDC: " << sps->profile_idc;
DVLOG(1) << "Level IDC: " << sps->level_idc; DVLOG(1) << "Level IDC: " << sps->level_idc;
DVLOG(1) << "Pic width: " << width; DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4;
DVLOG(1) << "Pic height: " << height;
DVLOG(1) << "log2_max_frame_num_minus4: "
<< sps->log2_max_frame_num_minus4;
DVLOG(1) << "SAR: width=" << sps->sar_width
<< " height=" << sps->sar_height;
// Video config notification. // Video config notification.
new_stream_info_cb_.Run(last_video_decoder_config_); new_stream_info_cb_.Run(last_video_decoder_config_);

View File

@ -369,13 +369,40 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
entry.avcc.profile_compatibility, entry.avcc.profile_compatibility,
entry.avcc.avc_level); entry.avcc.avc_level);
uint32_t pixel_width = entry.pixel_aspect.h_spacing; uint32_t coded_width = 0;
uint32_t pixel_height = entry.pixel_aspect.v_spacing; uint32_t coded_height = 0;
if (pixel_width == 0 || pixel_height == 0) { uint32_t pixel_width = 0;
if (!entry.avcc.sps_list.empty()) { uint32_t pixel_height = 0;
const std::vector<uint8_t>& sps = entry.avcc.sps_list[0];
ExtractSarFromSps(&sps[0], sps.size(), &pixel_width, &pixel_height); if (entry.avcc.sps_list.empty()) {
} LOG(ERROR) << "Cannot find sps in avc decoder configuration record.";
return false;
}
const std::vector<uint8_t>& sps = entry.avcc.sps_list[0];
if (!ExtractResolutionFromSpsData(&sps[0], sps.size(), &coded_width,
&coded_height, &pixel_width,
&pixel_height)) {
LOG(ERROR) << "Failed to parse SPS.";
return false;
}
LOG_IF(WARNING,
entry.width != coded_width || entry.height != coded_height)
<< "Resolution in VisualSampleEntry (" << entry.width << ","
<< entry.height << ") does not match with resolution in "
"AVCDecoderConfigurationRecord ("
<< coded_width << "," << coded_height
<< "). Use AVCDecoderConfigurationRecord.";
if (entry.pixel_aspect.h_spacing != 0 || entry.pixel_aspect.v_spacing != 0) {
LOG_IF(WARNING, entry.pixel_aspect.h_spacing != pixel_width ||
entry.pixel_aspect.v_spacing != pixel_height)
<< "Pixel aspect ratio in PASP box ("
<< entry.pixel_aspect.h_spacing << ","
<< entry.pixel_aspect.v_spacing
<< ") does not match with SAR in AVCDecoderConfigurationRecord ("
<< pixel_width << "," << pixel_height
<< "). Use AVCDecoderConfigurationRecord.";
} }
bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted; bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
@ -386,8 +413,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
kCodecH264, kCodecH264,
codec_string, codec_string,
track->media.header.language, track->media.header.language,
entry.width, coded_width,
entry.height, coded_height,
pixel_width, pixel_width,
pixel_height, pixel_height,
0, // trick_play_rate 0, // trick_play_rate

View File

@ -809,15 +809,42 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
VideoStreamInfo* video_stream_info = VideoStreamInfo* video_stream_info =
reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get()); reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
uint32_t pixel_width = video_stream_info->pixel_width(); uint32_t coded_width = 0;
uint32_t pixel_height = video_stream_info->pixel_height(); uint32_t coded_height = 0;
if (pixel_width == 0 || pixel_height == 0) { uint32_t pixel_width = 0;
ExtractSarFromDecoderConfig(&decoder_config_record[0], uint32_t pixel_height = 0;
decoder_config_record.size(), if (!ExtractResolutionFromDecoderConfig(
&pixel_width, &pixel_height); &decoder_config_record[0], decoder_config_record.size(),
&coded_width, &coded_height, &pixel_width, &pixel_height)) {
LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
return false;
}
if (pixel_width != video_stream_info->pixel_width() ||
pixel_height != video_stream_info->pixel_height()) {
LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
video_stream_info->pixel_height() != 0)
<< "Pixel aspect ratio in WVM metadata ("
<< video_stream_info->pixel_width() << ","
<< video_stream_info->pixel_height()
<< ") does not match with SAR in "
"AVCDecoderConfigurationRecord ("
<< pixel_width << "," << pixel_height
<< "). Use AVCDecoderConfigurationRecord.";
video_stream_info->set_pixel_width(pixel_width); video_stream_info->set_pixel_width(pixel_width);
video_stream_info->set_pixel_height(pixel_height); video_stream_info->set_pixel_height(pixel_height);
} }
if (coded_width != video_stream_info->width() ||
coded_height != video_stream_info->height()) {
LOG(WARNING) << "Resolution in WVM metadata ("
<< video_stream_info->width() << ","
<< video_stream_info->height()
<< ") does not match with resolution in "
"AVCDecoderConfigurationRecord ("
<< coded_width << "," << coded_height
<< "). Use AVCDecoderConfigurationRecord.";
video_stream_info->set_width(coded_width);
video_stream_info->set_height(coded_height);
}
} }
} }
} }