Fix incorrect width/height in VisualSampleEntry for mp2t stream with cropping
Cropping is necessary if width/height are not exactly disivible by 16 due to 16x16 macroblock size. The width and height fields in VisualSampleEntry must correctly document the cropped frame dimentions (visual presentation size) of the AVC stream that is described by that entry. Also fixes width/height in input mp4 and wvm streams if the values in VisualSampleEntry / metadata do not match with the values in AVCDecoderConfigurationRecord (SPS). Issue: 39 Change-Id: Id55c5acf245bee3f4e66302b2042eb7d9c488c19
This commit is contained in:
parent
c75d319985
commit
3a5fdd2d9c
|
@ -67,6 +67,8 @@ class VideoStreamInfo : public StreamInfo {
|
|||
uint8_t nalu_length_size() const { return nalu_length_size_; }
|
||||
int16_t trick_play_rate() const { return trick_play_rate_; }
|
||||
|
||||
void set_width(uint32_t width) { width_ = width; }
|
||||
void set_height(uint32_t height) { height_ = height; }
|
||||
void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; }
|
||||
void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; }
|
||||
|
||||
|
|
|
@ -16,14 +16,16 @@ namespace media {
|
|||
do { \
|
||||
if (!(x)) { \
|
||||
LOG(ERROR) << "Failure while parsing AVCDecoderConfig: " << #x; \
|
||||
return; \
|
||||
return false; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data,
|
||||
size_t avc_decoder_config_data_size,
|
||||
uint32_t* sar_width,
|
||||
uint32_t* sar_height) {
|
||||
bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data,
|
||||
size_t avc_decoder_config_data_size,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height) {
|
||||
BufferReader reader(avc_decoder_config_data, avc_decoder_config_data_size);
|
||||
uint8_t value = 0;
|
||||
// version check, must be 1.
|
||||
|
@ -52,33 +54,96 @@ void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data,
|
|||
const uint8_t num_sps = value & 0x1F;
|
||||
if (num_sps < 1) {
|
||||
LOG(ERROR) << "No SPS found.";
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
uint16_t sps_length = 0;
|
||||
RCHECK(reader.Read2(&sps_length));
|
||||
|
||||
ExtractSarFromSps(reader.data() + reader.pos(), sps_length, sar_width,
|
||||
sar_height);
|
||||
|
||||
return ExtractResolutionFromSpsData(reader.data() + reader.pos(), sps_length,
|
||||
coded_width, coded_height, pixel_width,
|
||||
pixel_height);
|
||||
// It is unlikely to have more than one SPS in practice. Also there's
|
||||
// no way to change the sar_{width,height} dynamically from VideoStreamInfo.
|
||||
// So skip the rest (if there are any).
|
||||
// no way to change the {coded,pixel}_{width,height} dynamically from
|
||||
// VideoStreamInfo. So skip the rest (if there are any).
|
||||
}
|
||||
|
||||
void ExtractSarFromSps(const uint8_t* sps_data,
|
||||
size_t sps_data_size,
|
||||
uint32_t* sar_width,
|
||||
uint32_t* sar_height) {
|
||||
bool ExtractResolutionFromSpsData(const uint8_t* sps_data,
|
||||
size_t sps_data_size,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height) {
|
||||
H264Parser parser;
|
||||
int sps_id;
|
||||
RCHECK(parser.ParseSPSFromArray(sps_data, sps_data_size, &sps_id) ==
|
||||
H264Parser::kOk);
|
||||
const H264SPS& sps = *parser.GetSPS(sps_id);
|
||||
return ExtractResolutionFromSps(*parser.GetSPS(sps_id), coded_width,
|
||||
coded_height, pixel_width, pixel_height);
|
||||
}
|
||||
|
||||
// Implemented according to ISO/IEC 14496-10:2005 7.4.2.1 Sequence parameter set
|
||||
// RBSP semantics.
|
||||
bool ExtractResolutionFromSps(const H264SPS& sps,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height) {
|
||||
int crop_x = 0;
|
||||
int crop_y = 0;
|
||||
if (sps.frame_cropping_flag) {
|
||||
int sub_width_c = 0;
|
||||
int sub_height_c = 0;
|
||||
// Table 6-1.
|
||||
switch (sps.chroma_format_idc) {
|
||||
case 0: // monochrome
|
||||
// SubWidthC and SubHeightC are not defined for monochrome. For ease of
|
||||
// computation afterwards, assign both to 1.
|
||||
sub_width_c = 1;
|
||||
sub_height_c = 1;
|
||||
break;
|
||||
case 1: // 4:2:0
|
||||
sub_width_c = 2;
|
||||
sub_height_c = 2;
|
||||
break;
|
||||
case 2: // 4:2:2
|
||||
sub_width_c = 2;
|
||||
sub_height_c = 1;
|
||||
break;
|
||||
case 3: // 4:4:4
|
||||
sub_width_c = 1;
|
||||
sub_height_c = 1;
|
||||
break;
|
||||
default:
|
||||
LOG(ERROR) << "Unexpected chroma_format_idc " << sps.chroma_format_idc;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Formula 7-16, 7-17, 7-18, 7-19.
|
||||
int crop_unit_x = sub_width_c;
|
||||
int crop_unit_y = sub_height_c * (2 - (sps.frame_mbs_only_flag ? 1 : 0));
|
||||
crop_x = crop_unit_x *
|
||||
(sps.frame_crop_left_offset + sps.frame_crop_right_offset);
|
||||
crop_y = crop_unit_y *
|
||||
(sps.frame_crop_top_offset + sps.frame_crop_bottom_offset);
|
||||
}
|
||||
|
||||
// Formula 7-10, 7-11.
|
||||
int pic_width_in_mbs = sps.pic_width_in_mbs_minus1 + 1;
|
||||
*coded_width = pic_width_in_mbs * 16 - crop_x;
|
||||
|
||||
// Formula 7-13, 7-15.
|
||||
int pic_height_in_mbs = (2 - (sps.frame_mbs_only_flag ? 1 : 0)) *
|
||||
(sps.pic_height_in_map_units_minus1 + 1);
|
||||
*coded_height = pic_height_in_mbs * 16 - crop_y;
|
||||
|
||||
// 0 means it wasn't in the SPS and therefore assume 1.
|
||||
*sar_width = sps.sar_width == 0 ? 1 : sps.sar_width;
|
||||
*sar_height = sps.sar_height == 0 ? 1 : sps.sar_height;
|
||||
DVLOG(2) << "Found sar_width: " << *sar_width
|
||||
<< " sar_height: " << *sar_height;
|
||||
*pixel_width = sps.sar_width == 0 ? 1 : sps.sar_width;
|
||||
*pixel_height = sps.sar_height == 0 ? 1 : sps.sar_height;
|
||||
DVLOG(2) << "Found coded_width: " << *coded_width
|
||||
<< " coded_height: " << *coded_height
|
||||
<< " pixel_width: " << *pixel_width
|
||||
<< " pixel_height: " << *pixel_height;
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef RCHECK
|
||||
|
|
|
@ -19,22 +19,32 @@ namespace media {
|
|||
|
||||
// |avc_decoder_config_data| must be AVCDecoderConfigurationRecord specified
|
||||
// in ISO/IEC 14496-15.
|
||||
// On success, |sar_width| and |sar_height| are set to the data specified in
|
||||
// |avc_decoder_config_data|.
|
||||
// If the value is 0 or ommited in |avc_decoder_config_data|, then 1 is
|
||||
// assigned.
|
||||
void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data,
|
||||
size_t avc_decoder_config_data_size,
|
||||
uint32_t* sar_width,
|
||||
uint32_t* sar_height);
|
||||
// On success, |coded_width| and |coded_height| contains coded resolution after
|
||||
// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is
|
||||
// assigned if it is not present.
|
||||
bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data,
|
||||
size_t avc_decoder_config_data_size,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height);
|
||||
|
||||
// |sps_data| must be a valid SPS specified in ISO/IEC 14496-10.
|
||||
// On success, |sar_width| and |sar_height| contain pixel width and height
|
||||
// respectively.
|
||||
void ExtractSarFromSps(const uint8_t* sps_data,
|
||||
size_t sps_data_size,
|
||||
uint32_t* sar_width,
|
||||
uint32_t* sar_height);
|
||||
// On success, |coded_width| and |coded_height| contains coded resolution after
|
||||
// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is
|
||||
// assigned if it is not present in SPS.
|
||||
bool ExtractResolutionFromSpsData(const uint8_t* sps_data,
|
||||
size_t sps_data_size,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height);
|
||||
struct H264SPS;
|
||||
bool ExtractResolutionFromSps(const H264SPS& sps,
|
||||
uint32_t* coded_width,
|
||||
uint32_t* coded_height,
|
||||
uint32_t* pixel_width,
|
||||
uint32_t* pixel_height);
|
||||
|
||||
// For explanations of each struct and its members, see H.264 specification
|
||||
// at http://www.itu.int/rec/T-REC-H.264.
|
||||
|
|
|
@ -65,33 +65,63 @@ TEST(H264ParserTest, StreamFileParsing) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(H264ParserTest, ExtractSarFromDecoderConfig) {
|
||||
TEST(H264ParserTest, ExtractResolutionFromDecoderConfig) {
|
||||
const uint8_t kDecoderConfig[] = {
|
||||
0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E,
|
||||
0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
|
||||
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D,
|
||||
0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0};
|
||||
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
ExtractSarFromDecoderConfig(kDecoderConfig, arraysize(kDecoderConfig),
|
||||
&pixel_width, &pixel_height);
|
||||
ASSERT_TRUE(ExtractResolutionFromDecoderConfig(
|
||||
kDecoderConfig, arraysize(kDecoderConfig), &coded_width, &coded_height,
|
||||
&pixel_width, &pixel_height));
|
||||
EXPECT_EQ(720u, coded_width);
|
||||
EXPECT_EQ(360u, coded_height);
|
||||
EXPECT_EQ(8u, pixel_width);
|
||||
EXPECT_EQ(9u, pixel_height);
|
||||
}
|
||||
|
||||
TEST(H264ParserTest, ExtractSarFromSps) {
|
||||
TEST(H264ParserTest, ExtractResolutionFromSpsData) {
|
||||
const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4,
|
||||
0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91,
|
||||
0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA,
|
||||
0x60, 0x0F, 0x16, 0x2D, 0x96};
|
||||
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
ExtractSarFromSps(kSps, arraysize(kSps), &pixel_width, &pixel_height);
|
||||
ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width,
|
||||
&coded_height, &pixel_width,
|
||||
&pixel_height));
|
||||
EXPECT_EQ(720u, coded_width);
|
||||
EXPECT_EQ(360u, coded_height);
|
||||
EXPECT_EQ(8u, pixel_width);
|
||||
EXPECT_EQ(9u, pixel_height);
|
||||
}
|
||||
|
||||
TEST(H264ParserTest, ExtractResolutionFromSpsDataWithCropping) {
|
||||
// 320x192 with frame_crop_bottom_offset of 6.
|
||||
const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x0C, 0xAC, 0xD9, 0x41, 0x41, 0x9F,
|
||||
0x9F, 0x01, 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00,
|
||||
0x00, 0x03, 0x03, 0x00, 0xF1, 0x42, 0x99, 0x60};
|
||||
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width,
|
||||
&coded_height, &pixel_width,
|
||||
&pixel_height));
|
||||
EXPECT_EQ(320u, coded_width);
|
||||
EXPECT_EQ(180u, coded_height);
|
||||
EXPECT_EQ(1u, pixel_width);
|
||||
EXPECT_EQ(1u, pixel_height);
|
||||
}
|
||||
|
||||
} // namespace media
|
||||
} // namespace edash_packager
|
||||
|
|
|
@ -345,10 +345,15 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// TODO: a MAP unit can be either 16 or 32 pixels.
|
||||
// although it's 16 pixels for progressive non MBAFF frames.
|
||||
uint16_t width = (sps->pic_width_in_mbs_minus1 + 1) * 16;
|
||||
uint16_t height = (sps->pic_height_in_map_units_minus1 + 1) * 16;
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width,
|
||||
&pixel_height)) {
|
||||
LOG(ERROR) << "Failed to parse SPS.";
|
||||
return false;
|
||||
}
|
||||
|
||||
last_video_decoder_config_ = scoped_refptr<StreamInfo>(
|
||||
new VideoStreamInfo(
|
||||
|
@ -361,10 +366,10 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
|
|||
decoder_config_record[2],
|
||||
decoder_config_record[3]),
|
||||
std::string(),
|
||||
width,
|
||||
height,
|
||||
sps->sar_width == 0 ? 1 : sps->sar_width,
|
||||
sps->sar_height == 0 ? 1 : sps->sar_height,
|
||||
coded_width,
|
||||
coded_height,
|
||||
pixel_width,
|
||||
pixel_height,
|
||||
0,
|
||||
H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
|
||||
decoder_config_record.data(),
|
||||
|
@ -372,12 +377,7 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
|
|||
false));
|
||||
DVLOG(1) << "Profile IDC: " << sps->profile_idc;
|
||||
DVLOG(1) << "Level IDC: " << sps->level_idc;
|
||||
DVLOG(1) << "Pic width: " << width;
|
||||
DVLOG(1) << "Pic height: " << height;
|
||||
DVLOG(1) << "log2_max_frame_num_minus4: "
|
||||
<< sps->log2_max_frame_num_minus4;
|
||||
DVLOG(1) << "SAR: width=" << sps->sar_width
|
||||
<< " height=" << sps->sar_height;
|
||||
DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4;
|
||||
|
||||
// Video config notification.
|
||||
new_stream_info_cb_.Run(last_video_decoder_config_);
|
||||
|
|
|
@ -369,13 +369,40 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
|
|||
entry.avcc.profile_compatibility,
|
||||
entry.avcc.avc_level);
|
||||
|
||||
uint32_t pixel_width = entry.pixel_aspect.h_spacing;
|
||||
uint32_t pixel_height = entry.pixel_aspect.v_spacing;
|
||||
if (pixel_width == 0 || pixel_height == 0) {
|
||||
if (!entry.avcc.sps_list.empty()) {
|
||||
const std::vector<uint8_t>& sps = entry.avcc.sps_list[0];
|
||||
ExtractSarFromSps(&sps[0], sps.size(), &pixel_width, &pixel_height);
|
||||
}
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
|
||||
if (entry.avcc.sps_list.empty()) {
|
||||
LOG(ERROR) << "Cannot find sps in avc decoder configuration record.";
|
||||
return false;
|
||||
}
|
||||
const std::vector<uint8_t>& sps = entry.avcc.sps_list[0];
|
||||
if (!ExtractResolutionFromSpsData(&sps[0], sps.size(), &coded_width,
|
||||
&coded_height, &pixel_width,
|
||||
&pixel_height)) {
|
||||
LOG(ERROR) << "Failed to parse SPS.";
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_IF(WARNING,
|
||||
entry.width != coded_width || entry.height != coded_height)
|
||||
<< "Resolution in VisualSampleEntry (" << entry.width << ","
|
||||
<< entry.height << ") does not match with resolution in "
|
||||
"AVCDecoderConfigurationRecord ("
|
||||
<< coded_width << "," << coded_height
|
||||
<< "). Use AVCDecoderConfigurationRecord.";
|
||||
|
||||
if (entry.pixel_aspect.h_spacing != 0 || entry.pixel_aspect.v_spacing != 0) {
|
||||
LOG_IF(WARNING, entry.pixel_aspect.h_spacing != pixel_width ||
|
||||
entry.pixel_aspect.v_spacing != pixel_height)
|
||||
<< "Pixel aspect ratio in PASP box ("
|
||||
<< entry.pixel_aspect.h_spacing << ","
|
||||
<< entry.pixel_aspect.v_spacing
|
||||
<< ") does not match with SAR in AVCDecoderConfigurationRecord ("
|
||||
<< pixel_width << "," << pixel_height
|
||||
<< "). Use AVCDecoderConfigurationRecord.";
|
||||
}
|
||||
|
||||
bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted;
|
||||
|
@ -386,8 +413,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
|
|||
kCodecH264,
|
||||
codec_string,
|
||||
track->media.header.language,
|
||||
entry.width,
|
||||
entry.height,
|
||||
coded_width,
|
||||
coded_height,
|
||||
pixel_width,
|
||||
pixel_height,
|
||||
0, // trick_play_rate
|
||||
|
|
|
@ -809,15 +809,42 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) {
|
|||
|
||||
VideoStreamInfo* video_stream_info =
|
||||
reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
|
||||
uint32_t pixel_width = video_stream_info->pixel_width();
|
||||
uint32_t pixel_height = video_stream_info->pixel_height();
|
||||
if (pixel_width == 0 || pixel_height == 0) {
|
||||
ExtractSarFromDecoderConfig(&decoder_config_record[0],
|
||||
decoder_config_record.size(),
|
||||
&pixel_width, &pixel_height);
|
||||
uint32_t coded_width = 0;
|
||||
uint32_t coded_height = 0;
|
||||
uint32_t pixel_width = 0;
|
||||
uint32_t pixel_height = 0;
|
||||
if (!ExtractResolutionFromDecoderConfig(
|
||||
&decoder_config_record[0], decoder_config_record.size(),
|
||||
&coded_width, &coded_height, &pixel_width, &pixel_height)) {
|
||||
LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
|
||||
return false;
|
||||
}
|
||||
if (pixel_width != video_stream_info->pixel_width() ||
|
||||
pixel_height != video_stream_info->pixel_height()) {
|
||||
LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
|
||||
video_stream_info->pixel_height() != 0)
|
||||
<< "Pixel aspect ratio in WVM metadata ("
|
||||
<< video_stream_info->pixel_width() << ","
|
||||
<< video_stream_info->pixel_height()
|
||||
<< ") does not match with SAR in "
|
||||
"AVCDecoderConfigurationRecord ("
|
||||
<< pixel_width << "," << pixel_height
|
||||
<< "). Use AVCDecoderConfigurationRecord.";
|
||||
video_stream_info->set_pixel_width(pixel_width);
|
||||
video_stream_info->set_pixel_height(pixel_height);
|
||||
}
|
||||
if (coded_width != video_stream_info->width() ||
|
||||
coded_height != video_stream_info->height()) {
|
||||
LOG(WARNING) << "Resolution in WVM metadata ("
|
||||
<< video_stream_info->width() << ","
|
||||
<< video_stream_info->height()
|
||||
<< ") does not match with resolution in "
|
||||
"AVCDecoderConfigurationRecord ("
|
||||
<< coded_width << "," << coded_height
|
||||
<< "). Use AVCDecoderConfigurationRecord.";
|
||||
video_stream_info->set_width(coded_width);
|
||||
video_stream_info->set_height(coded_height);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue