From 3a5fdd2d9c2277af7d1235174f7e4e89afb54005 Mon Sep 17 00:00:00 2001 From: Kongqun Yang Date: Wed, 16 Sep 2015 15:22:22 -0700 Subject: [PATCH] Fix incorrect width/height in VisualSampleEntry for mp2t stream with cropping Cropping is necessary if width/height are not exactly disivible by 16 due to 16x16 macroblock size. The width and height fields in VisualSampleEntry must correctly document the cropped frame dimentions (visual presentation size) of the AVC stream that is described by that entry. Also fixes width/height in input mp4 and wvm streams if the values in VisualSampleEntry / metadata do not match with the values in AVCDecoderConfigurationRecord (SPS). Issue: 39 Change-Id: Id55c5acf245bee3f4e66302b2042eb7d9c488c19 --- packager/media/base/video_stream_info.h | 2 + packager/media/filters/h264_parser.cc | 105 ++++++++++++++---- packager/media/filters/h264_parser.h | 38 ++++--- .../media/filters/h264_parser_unittest.cc | 40 ++++++- packager/media/formats/mp2t/es_parser_h264.cc | 28 ++--- .../media/formats/mp4/mp4_media_parser.cc | 45 ++++++-- .../media/formats/wvm/wvm_media_parser.cc | 39 ++++++- 7 files changed, 229 insertions(+), 68 deletions(-) diff --git a/packager/media/base/video_stream_info.h b/packager/media/base/video_stream_info.h index be4867da04..a122a3e709 100644 --- a/packager/media/base/video_stream_info.h +++ b/packager/media/base/video_stream_info.h @@ -67,6 +67,8 @@ class VideoStreamInfo : public StreamInfo { uint8_t nalu_length_size() const { return nalu_length_size_; } int16_t trick_play_rate() const { return trick_play_rate_; } + void set_width(uint32_t width) { width_ = width; } + void set_height(uint32_t height) { height_ = height; } void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; } void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; } diff --git a/packager/media/filters/h264_parser.cc b/packager/media/filters/h264_parser.cc index f79f075391..ef51ea4129 100644 --- a/packager/media/filters/h264_parser.cc +++ b/packager/media/filters/h264_parser.cc @@ -16,14 +16,16 @@ namespace media { do { \ if (!(x)) { \ LOG(ERROR) << "Failure while parsing AVCDecoderConfig: " << #x; \ - return; \ + return false; \ } \ } while (0) -void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, - size_t avc_decoder_config_data_size, - uint32_t* sar_width, - uint32_t* sar_height) { +bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data, + size_t avc_decoder_config_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { BufferReader reader(avc_decoder_config_data, avc_decoder_config_data_size); uint8_t value = 0; // version check, must be 1. @@ -52,33 +54,96 @@ void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, const uint8_t num_sps = value & 0x1F; if (num_sps < 1) { LOG(ERROR) << "No SPS found."; - return; + return false; } uint16_t sps_length = 0; RCHECK(reader.Read2(&sps_length)); - ExtractSarFromSps(reader.data() + reader.pos(), sps_length, sar_width, - sar_height); - + return ExtractResolutionFromSpsData(reader.data() + reader.pos(), sps_length, + coded_width, coded_height, pixel_width, + pixel_height); // It is unlikely to have more than one SPS in practice. Also there's - // no way to change the sar_{width,height} dynamically from VideoStreamInfo. - // So skip the rest (if there are any). + // no way to change the {coded,pixel}_{width,height} dynamically from + // VideoStreamInfo. So skip the rest (if there are any). } -void ExtractSarFromSps(const uint8_t* sps_data, - size_t sps_data_size, - uint32_t* sar_width, - uint32_t* sar_height) { +bool ExtractResolutionFromSpsData(const uint8_t* sps_data, + size_t sps_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { H264Parser parser; int sps_id; RCHECK(parser.ParseSPSFromArray(sps_data, sps_data_size, &sps_id) == H264Parser::kOk); - const H264SPS& sps = *parser.GetSPS(sps_id); + return ExtractResolutionFromSps(*parser.GetSPS(sps_id), coded_width, + coded_height, pixel_width, pixel_height); +} + +// Implemented according to ISO/IEC 14496-10:2005 7.4.2.1 Sequence parameter set +// RBSP semantics. +bool ExtractResolutionFromSps(const H264SPS& sps, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { + int crop_x = 0; + int crop_y = 0; + if (sps.frame_cropping_flag) { + int sub_width_c = 0; + int sub_height_c = 0; + // Table 6-1. + switch (sps.chroma_format_idc) { + case 0: // monochrome + // SubWidthC and SubHeightC are not defined for monochrome. For ease of + // computation afterwards, assign both to 1. + sub_width_c = 1; + sub_height_c = 1; + break; + case 1: // 4:2:0 + sub_width_c = 2; + sub_height_c = 2; + break; + case 2: // 4:2:2 + sub_width_c = 2; + sub_height_c = 1; + break; + case 3: // 4:4:4 + sub_width_c = 1; + sub_height_c = 1; + break; + default: + LOG(ERROR) << "Unexpected chroma_format_idc " << sps.chroma_format_idc; + return false; + } + + // Formula 7-16, 7-17, 7-18, 7-19. + int crop_unit_x = sub_width_c; + int crop_unit_y = sub_height_c * (2 - (sps.frame_mbs_only_flag ? 1 : 0)); + crop_x = crop_unit_x * + (sps.frame_crop_left_offset + sps.frame_crop_right_offset); + crop_y = crop_unit_y * + (sps.frame_crop_top_offset + sps.frame_crop_bottom_offset); + } + + // Formula 7-10, 7-11. + int pic_width_in_mbs = sps.pic_width_in_mbs_minus1 + 1; + *coded_width = pic_width_in_mbs * 16 - crop_x; + + // Formula 7-13, 7-15. + int pic_height_in_mbs = (2 - (sps.frame_mbs_only_flag ? 1 : 0)) * + (sps.pic_height_in_map_units_minus1 + 1); + *coded_height = pic_height_in_mbs * 16 - crop_y; + // 0 means it wasn't in the SPS and therefore assume 1. - *sar_width = sps.sar_width == 0 ? 1 : sps.sar_width; - *sar_height = sps.sar_height == 0 ? 1 : sps.sar_height; - DVLOG(2) << "Found sar_width: " << *sar_width - << " sar_height: " << *sar_height; + *pixel_width = sps.sar_width == 0 ? 1 : sps.sar_width; + *pixel_height = sps.sar_height == 0 ? 1 : sps.sar_height; + DVLOG(2) << "Found coded_width: " << *coded_width + << " coded_height: " << *coded_height + << " pixel_width: " << *pixel_width + << " pixel_height: " << *pixel_height; + return true; } #undef RCHECK diff --git a/packager/media/filters/h264_parser.h b/packager/media/filters/h264_parser.h index 77fc00171d..c1a5f874b2 100644 --- a/packager/media/filters/h264_parser.h +++ b/packager/media/filters/h264_parser.h @@ -19,22 +19,32 @@ namespace media { // |avc_decoder_config_data| must be AVCDecoderConfigurationRecord specified // in ISO/IEC 14496-15. -// On success, |sar_width| and |sar_height| are set to the data specified in -// |avc_decoder_config_data|. -// If the value is 0 or ommited in |avc_decoder_config_data|, then 1 is -// assigned. -void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, - size_t avc_decoder_config_data_size, - uint32_t* sar_width, - uint32_t* sar_height); +// On success, |coded_width| and |coded_height| contains coded resolution after +// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is +// assigned if it is not present. +bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data, + size_t avc_decoder_config_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); // |sps_data| must be a valid SPS specified in ISO/IEC 14496-10. -// On success, |sar_width| and |sar_height| contain pixel width and height -// respectively. -void ExtractSarFromSps(const uint8_t* sps_data, - size_t sps_data_size, - uint32_t* sar_width, - uint32_t* sar_height); +// On success, |coded_width| and |coded_height| contains coded resolution after +// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is +// assigned if it is not present in SPS. +bool ExtractResolutionFromSpsData(const uint8_t* sps_data, + size_t sps_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); +struct H264SPS; +bool ExtractResolutionFromSps(const H264SPS& sps, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); // For explanations of each struct and its members, see H.264 specification // at http://www.itu.int/rec/T-REC-H.264. diff --git a/packager/media/filters/h264_parser_unittest.cc b/packager/media/filters/h264_parser_unittest.cc index 71a3ebec6a..236c98949e 100644 --- a/packager/media/filters/h264_parser_unittest.cc +++ b/packager/media/filters/h264_parser_unittest.cc @@ -65,33 +65,63 @@ TEST(H264ParserTest, StreamFileParsing) { } } -TEST(H264ParserTest, ExtractSarFromDecoderConfig) { +TEST(H264ParserTest, ExtractResolutionFromDecoderConfig) { const uint8_t kDecoderConfig[] = { 0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0}; + uint32_t coded_width = 0; + uint32_t coded_height = 0; uint32_t pixel_width = 0; uint32_t pixel_height = 0; - ExtractSarFromDecoderConfig(kDecoderConfig, arraysize(kDecoderConfig), - &pixel_width, &pixel_height); + ASSERT_TRUE(ExtractResolutionFromDecoderConfig( + kDecoderConfig, arraysize(kDecoderConfig), &coded_width, &coded_height, + &pixel_width, &pixel_height)); + EXPECT_EQ(720u, coded_width); + EXPECT_EQ(360u, coded_height); EXPECT_EQ(8u, pixel_width); EXPECT_EQ(9u, pixel_height); } -TEST(H264ParserTest, ExtractSarFromSps) { +TEST(H264ParserTest, ExtractResolutionFromSpsData) { const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, 0x96}; + uint32_t coded_width = 0; + uint32_t coded_height = 0; uint32_t pixel_width = 0; uint32_t pixel_height = 0; - ExtractSarFromSps(kSps, arraysize(kSps), &pixel_width, &pixel_height); + ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width, + &coded_height, &pixel_width, + &pixel_height)); + EXPECT_EQ(720u, coded_width); + EXPECT_EQ(360u, coded_height); EXPECT_EQ(8u, pixel_width); EXPECT_EQ(9u, pixel_height); } +TEST(H264ParserTest, ExtractResolutionFromSpsDataWithCropping) { + // 320x192 with frame_crop_bottom_offset of 6. + const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x0C, 0xAC, 0xD9, 0x41, 0x41, 0x9F, + 0x9F, 0x01, 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, + 0x00, 0x03, 0x03, 0x00, 0xF1, 0x42, 0x99, 0x60}; + + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width, + &coded_height, &pixel_width, + &pixel_height)); + EXPECT_EQ(320u, coded_width); + EXPECT_EQ(180u, coded_height); + EXPECT_EQ(1u, pixel_width); + EXPECT_EQ(1u, pixel_height); +} + } // namespace media } // namespace edash_packager diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index 06c8998e46..eaec7856e4 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -345,10 +345,15 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { return true; } - // TODO: a MAP unit can be either 16 or 32 pixels. - // although it's 16 pixels for progressive non MBAFF frames. - uint16_t width = (sps->pic_width_in_mbs_minus1 + 1) * 16; - uint16_t height = (sps->pic_height_in_map_units_minus1 + 1) * 16; + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width, + &pixel_height)) { + LOG(ERROR) << "Failed to parse SPS."; + return false; + } last_video_decoder_config_ = scoped_refptr( new VideoStreamInfo( @@ -361,10 +366,10 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { decoder_config_record[2], decoder_config_record[3]), std::string(), - width, - height, - sps->sar_width == 0 ? 1 : sps->sar_width, - sps->sar_height == 0 ? 1 : sps->sar_height, + coded_width, + coded_height, + pixel_width, + pixel_height, 0, H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize, decoder_config_record.data(), @@ -372,12 +377,7 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { false)); DVLOG(1) << "Profile IDC: " << sps->profile_idc; DVLOG(1) << "Level IDC: " << sps->level_idc; - DVLOG(1) << "Pic width: " << width; - DVLOG(1) << "Pic height: " << height; - DVLOG(1) << "log2_max_frame_num_minus4: " - << sps->log2_max_frame_num_minus4; - DVLOG(1) << "SAR: width=" << sps->sar_width - << " height=" << sps->sar_height; + DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4; // Video config notification. new_stream_info_cb_.Run(last_video_decoder_config_); diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 0c06e5577d..1e60aeb1be 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -369,13 +369,40 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { entry.avcc.profile_compatibility, entry.avcc.avc_level); - uint32_t pixel_width = entry.pixel_aspect.h_spacing; - uint32_t pixel_height = entry.pixel_aspect.v_spacing; - if (pixel_width == 0 || pixel_height == 0) { - if (!entry.avcc.sps_list.empty()) { - const std::vector& sps = entry.avcc.sps_list[0]; - ExtractSarFromSps(&sps[0], sps.size(), &pixel_width, &pixel_height); - } + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + + if (entry.avcc.sps_list.empty()) { + LOG(ERROR) << "Cannot find sps in avc decoder configuration record."; + return false; + } + const std::vector& sps = entry.avcc.sps_list[0]; + if (!ExtractResolutionFromSpsData(&sps[0], sps.size(), &coded_width, + &coded_height, &pixel_width, + &pixel_height)) { + LOG(ERROR) << "Failed to parse SPS."; + return false; + } + + LOG_IF(WARNING, + entry.width != coded_width || entry.height != coded_height) + << "Resolution in VisualSampleEntry (" << entry.width << "," + << entry.height << ") does not match with resolution in " + "AVCDecoderConfigurationRecord (" + << coded_width << "," << coded_height + << "). Use AVCDecoderConfigurationRecord."; + + if (entry.pixel_aspect.h_spacing != 0 || entry.pixel_aspect.v_spacing != 0) { + LOG_IF(WARNING, entry.pixel_aspect.h_spacing != pixel_width || + entry.pixel_aspect.v_spacing != pixel_height) + << "Pixel aspect ratio in PASP box (" + << entry.pixel_aspect.h_spacing << "," + << entry.pixel_aspect.v_spacing + << ") does not match with SAR in AVCDecoderConfigurationRecord (" + << pixel_width << "," << pixel_height + << "). Use AVCDecoderConfigurationRecord."; } bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted; @@ -386,8 +413,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { kCodecH264, codec_string, track->media.header.language, - entry.width, - entry.height, + coded_width, + coded_height, pixel_width, pixel_height, 0, // trick_play_rate diff --git a/packager/media/formats/wvm/wvm_media_parser.cc b/packager/media/formats/wvm/wvm_media_parser.cc index 4535e27439..631097cfed 100644 --- a/packager/media/formats/wvm/wvm_media_parser.cc +++ b/packager/media/formats/wvm/wvm_media_parser.cc @@ -809,15 +809,42 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) { VideoStreamInfo* video_stream_info = reinterpret_cast(stream_infos_[i].get()); - uint32_t pixel_width = video_stream_info->pixel_width(); - uint32_t pixel_height = video_stream_info->pixel_height(); - if (pixel_width == 0 || pixel_height == 0) { - ExtractSarFromDecoderConfig(&decoder_config_record[0], - decoder_config_record.size(), - &pixel_width, &pixel_height); + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + if (!ExtractResolutionFromDecoderConfig( + &decoder_config_record[0], decoder_config_record.size(), + &coded_width, &coded_height, &pixel_width, &pixel_height)) { + LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord."; + return false; + } + if (pixel_width != video_stream_info->pixel_width() || + pixel_height != video_stream_info->pixel_height()) { + LOG_IF(WARNING, video_stream_info->pixel_width() != 0 || + video_stream_info->pixel_height() != 0) + << "Pixel aspect ratio in WVM metadata (" + << video_stream_info->pixel_width() << "," + << video_stream_info->pixel_height() + << ") does not match with SAR in " + "AVCDecoderConfigurationRecord (" + << pixel_width << "," << pixel_height + << "). Use AVCDecoderConfigurationRecord."; video_stream_info->set_pixel_width(pixel_width); video_stream_info->set_pixel_height(pixel_height); } + if (coded_width != video_stream_info->width() || + coded_height != video_stream_info->height()) { + LOG(WARNING) << "Resolution in WVM metadata (" + << video_stream_info->width() << "," + << video_stream_info->height() + << ") does not match with resolution in " + "AVCDecoderConfigurationRecord (" + << coded_width << "," << coded_height + << "). Use AVCDecoderConfigurationRecord."; + video_stream_info->set_width(coded_width); + video_stream_info->set_height(coded_height); + } } } }