diff --git a/packager/media/base/video_stream_info.h b/packager/media/base/video_stream_info.h index be4867da04..a122a3e709 100644 --- a/packager/media/base/video_stream_info.h +++ b/packager/media/base/video_stream_info.h @@ -67,6 +67,8 @@ class VideoStreamInfo : public StreamInfo { uint8_t nalu_length_size() const { return nalu_length_size_; } int16_t trick_play_rate() const { return trick_play_rate_; } + void set_width(uint32_t width) { width_ = width; } + void set_height(uint32_t height) { height_ = height; } void set_pixel_width(uint32_t pixel_width) { pixel_width_ = pixel_width; } void set_pixel_height(uint32_t pixel_height) { pixel_height_ = pixel_height; } diff --git a/packager/media/filters/h264_parser.cc b/packager/media/filters/h264_parser.cc index f79f075391..ef51ea4129 100644 --- a/packager/media/filters/h264_parser.cc +++ b/packager/media/filters/h264_parser.cc @@ -16,14 +16,16 @@ namespace media { do { \ if (!(x)) { \ LOG(ERROR) << "Failure while parsing AVCDecoderConfig: " << #x; \ - return; \ + return false; \ } \ } while (0) -void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, - size_t avc_decoder_config_data_size, - uint32_t* sar_width, - uint32_t* sar_height) { +bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data, + size_t avc_decoder_config_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { BufferReader reader(avc_decoder_config_data, avc_decoder_config_data_size); uint8_t value = 0; // version check, must be 1. @@ -52,33 +54,96 @@ void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, const uint8_t num_sps = value & 0x1F; if (num_sps < 1) { LOG(ERROR) << "No SPS found."; - return; + return false; } uint16_t sps_length = 0; RCHECK(reader.Read2(&sps_length)); - ExtractSarFromSps(reader.data() + reader.pos(), sps_length, sar_width, - sar_height); - + return ExtractResolutionFromSpsData(reader.data() + reader.pos(), sps_length, + coded_width, coded_height, pixel_width, + pixel_height); // It is unlikely to have more than one SPS in practice. Also there's - // no way to change the sar_{width,height} dynamically from VideoStreamInfo. - // So skip the rest (if there are any). + // no way to change the {coded,pixel}_{width,height} dynamically from + // VideoStreamInfo. So skip the rest (if there are any). } -void ExtractSarFromSps(const uint8_t* sps_data, - size_t sps_data_size, - uint32_t* sar_width, - uint32_t* sar_height) { +bool ExtractResolutionFromSpsData(const uint8_t* sps_data, + size_t sps_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { H264Parser parser; int sps_id; RCHECK(parser.ParseSPSFromArray(sps_data, sps_data_size, &sps_id) == H264Parser::kOk); - const H264SPS& sps = *parser.GetSPS(sps_id); + return ExtractResolutionFromSps(*parser.GetSPS(sps_id), coded_width, + coded_height, pixel_width, pixel_height); +} + +// Implemented according to ISO/IEC 14496-10:2005 7.4.2.1 Sequence parameter set +// RBSP semantics. +bool ExtractResolutionFromSps(const H264SPS& sps, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height) { + int crop_x = 0; + int crop_y = 0; + if (sps.frame_cropping_flag) { + int sub_width_c = 0; + int sub_height_c = 0; + // Table 6-1. + switch (sps.chroma_format_idc) { + case 0: // monochrome + // SubWidthC and SubHeightC are not defined for monochrome. For ease of + // computation afterwards, assign both to 1. + sub_width_c = 1; + sub_height_c = 1; + break; + case 1: // 4:2:0 + sub_width_c = 2; + sub_height_c = 2; + break; + case 2: // 4:2:2 + sub_width_c = 2; + sub_height_c = 1; + break; + case 3: // 4:4:4 + sub_width_c = 1; + sub_height_c = 1; + break; + default: + LOG(ERROR) << "Unexpected chroma_format_idc " << sps.chroma_format_idc; + return false; + } + + // Formula 7-16, 7-17, 7-18, 7-19. + int crop_unit_x = sub_width_c; + int crop_unit_y = sub_height_c * (2 - (sps.frame_mbs_only_flag ? 1 : 0)); + crop_x = crop_unit_x * + (sps.frame_crop_left_offset + sps.frame_crop_right_offset); + crop_y = crop_unit_y * + (sps.frame_crop_top_offset + sps.frame_crop_bottom_offset); + } + + // Formula 7-10, 7-11. + int pic_width_in_mbs = sps.pic_width_in_mbs_minus1 + 1; + *coded_width = pic_width_in_mbs * 16 - crop_x; + + // Formula 7-13, 7-15. + int pic_height_in_mbs = (2 - (sps.frame_mbs_only_flag ? 1 : 0)) * + (sps.pic_height_in_map_units_minus1 + 1); + *coded_height = pic_height_in_mbs * 16 - crop_y; + // 0 means it wasn't in the SPS and therefore assume 1. - *sar_width = sps.sar_width == 0 ? 1 : sps.sar_width; - *sar_height = sps.sar_height == 0 ? 1 : sps.sar_height; - DVLOG(2) << "Found sar_width: " << *sar_width - << " sar_height: " << *sar_height; + *pixel_width = sps.sar_width == 0 ? 1 : sps.sar_width; + *pixel_height = sps.sar_height == 0 ? 1 : sps.sar_height; + DVLOG(2) << "Found coded_width: " << *coded_width + << " coded_height: " << *coded_height + << " pixel_width: " << *pixel_width + << " pixel_height: " << *pixel_height; + return true; } #undef RCHECK diff --git a/packager/media/filters/h264_parser.h b/packager/media/filters/h264_parser.h index 77fc00171d..c1a5f874b2 100644 --- a/packager/media/filters/h264_parser.h +++ b/packager/media/filters/h264_parser.h @@ -19,22 +19,32 @@ namespace media { // |avc_decoder_config_data| must be AVCDecoderConfigurationRecord specified // in ISO/IEC 14496-15. -// On success, |sar_width| and |sar_height| are set to the data specified in -// |avc_decoder_config_data|. -// If the value is 0 or ommited in |avc_decoder_config_data|, then 1 is -// assigned. -void ExtractSarFromDecoderConfig(const uint8_t* avc_decoder_config_data, - size_t avc_decoder_config_data_size, - uint32_t* sar_width, - uint32_t* sar_height); +// On success, |coded_width| and |coded_height| contains coded resolution after +// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is +// assigned if it is not present. +bool ExtractResolutionFromDecoderConfig(const uint8_t* avc_decoder_config_data, + size_t avc_decoder_config_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); // |sps_data| must be a valid SPS specified in ISO/IEC 14496-10. -// On success, |sar_width| and |sar_height| contain pixel width and height -// respectively. -void ExtractSarFromSps(const uint8_t* sps_data, - size_t sps_data_size, - uint32_t* sar_width, - uint32_t* sar_height); +// On success, |coded_width| and |coded_height| contains coded resolution after +// cropping; |pixel_width:pixel_height| contains pixel aspect ratio, 1:1 is +// assigned if it is not present in SPS. +bool ExtractResolutionFromSpsData(const uint8_t* sps_data, + size_t sps_data_size, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); +struct H264SPS; +bool ExtractResolutionFromSps(const H264SPS& sps, + uint32_t* coded_width, + uint32_t* coded_height, + uint32_t* pixel_width, + uint32_t* pixel_height); // For explanations of each struct and its members, see H.264 specification // at http://www.itu.int/rec/T-REC-H.264. diff --git a/packager/media/filters/h264_parser_unittest.cc b/packager/media/filters/h264_parser_unittest.cc index 71a3ebec6a..236c98949e 100644 --- a/packager/media/filters/h264_parser_unittest.cc +++ b/packager/media/filters/h264_parser_unittest.cc @@ -65,33 +65,63 @@ TEST(H264ParserTest, StreamFileParsing) { } } -TEST(H264ParserTest, ExtractSarFromDecoderConfig) { +TEST(H264ParserTest, ExtractResolutionFromDecoderConfig) { const uint8_t kDecoderConfig[] = { 0x01, 0x64, 0x00, 0x1E, 0xFF, 0xE1, 0x00, 0x1D, 0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, 0x96, 0x01, 0x00, 0x06, 0x68, 0xEB, 0xE3, 0xCB, 0x22, 0xC0}; + uint32_t coded_width = 0; + uint32_t coded_height = 0; uint32_t pixel_width = 0; uint32_t pixel_height = 0; - ExtractSarFromDecoderConfig(kDecoderConfig, arraysize(kDecoderConfig), - &pixel_width, &pixel_height); + ASSERT_TRUE(ExtractResolutionFromDecoderConfig( + kDecoderConfig, arraysize(kDecoderConfig), &coded_width, &coded_height, + &pixel_width, &pixel_height)); + EXPECT_EQ(720u, coded_width); + EXPECT_EQ(360u, coded_height); EXPECT_EQ(8u, pixel_width); EXPECT_EQ(9u, pixel_height); } -TEST(H264ParserTest, ExtractSarFromSps) { +TEST(H264ParserTest, ExtractResolutionFromSpsData) { const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x1E, 0xAC, 0xD9, 0x40, 0xB4, 0x2F, 0xF9, 0x7F, 0xF0, 0x00, 0x80, 0x00, 0x91, 0x00, 0x00, 0x03, 0x03, 0xE9, 0x00, 0x00, 0xEA, 0x60, 0x0F, 0x16, 0x2D, 0x96}; + uint32_t coded_width = 0; + uint32_t coded_height = 0; uint32_t pixel_width = 0; uint32_t pixel_height = 0; - ExtractSarFromSps(kSps, arraysize(kSps), &pixel_width, &pixel_height); + ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width, + &coded_height, &pixel_width, + &pixel_height)); + EXPECT_EQ(720u, coded_width); + EXPECT_EQ(360u, coded_height); EXPECT_EQ(8u, pixel_width); EXPECT_EQ(9u, pixel_height); } +TEST(H264ParserTest, ExtractResolutionFromSpsDataWithCropping) { + // 320x192 with frame_crop_bottom_offset of 6. + const uint8_t kSps[] = {0x67, 0x64, 0x00, 0x0C, 0xAC, 0xD9, 0x41, 0x41, 0x9F, + 0x9F, 0x01, 0x10, 0x00, 0x00, 0x03, 0x00, 0x10, 0x00, + 0x00, 0x03, 0x03, 0x00, 0xF1, 0x42, 0x99, 0x60}; + + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + ASSERT_TRUE(ExtractResolutionFromSpsData(kSps, arraysize(kSps), &coded_width, + &coded_height, &pixel_width, + &pixel_height)); + EXPECT_EQ(320u, coded_width); + EXPECT_EQ(180u, coded_height); + EXPECT_EQ(1u, pixel_width); + EXPECT_EQ(1u, pixel_height); +} + } // namespace media } // namespace edash_packager diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index 06c8998e46..eaec7856e4 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -345,10 +345,15 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { return true; } - // TODO: a MAP unit can be either 16 or 32 pixels. - // although it's 16 pixels for progressive non MBAFF frames. - uint16_t width = (sps->pic_width_in_mbs_minus1 + 1) * 16; - uint16_t height = (sps->pic_height_in_map_units_minus1 + 1) * 16; + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width, + &pixel_height)) { + LOG(ERROR) << "Failed to parse SPS."; + return false; + } last_video_decoder_config_ = scoped_refptr( new VideoStreamInfo( @@ -361,10 +366,10 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { decoder_config_record[2], decoder_config_record[3]), std::string(), - width, - height, - sps->sar_width == 0 ? 1 : sps->sar_width, - sps->sar_height == 0 ? 1 : sps->sar_height, + coded_width, + coded_height, + pixel_width, + pixel_height, 0, H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize, decoder_config_record.data(), @@ -372,12 +377,7 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { false)); DVLOG(1) << "Profile IDC: " << sps->profile_idc; DVLOG(1) << "Level IDC: " << sps->level_idc; - DVLOG(1) << "Pic width: " << width; - DVLOG(1) << "Pic height: " << height; - DVLOG(1) << "log2_max_frame_num_minus4: " - << sps->log2_max_frame_num_minus4; - DVLOG(1) << "SAR: width=" << sps->sar_width - << " height=" << sps->sar_height; + DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4; // Video config notification. new_stream_info_cb_.Run(last_video_decoder_config_); diff --git a/packager/media/formats/mp4/mp4_media_parser.cc b/packager/media/formats/mp4/mp4_media_parser.cc index 0c06e5577d..1e60aeb1be 100644 --- a/packager/media/formats/mp4/mp4_media_parser.cc +++ b/packager/media/formats/mp4/mp4_media_parser.cc @@ -369,13 +369,40 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { entry.avcc.profile_compatibility, entry.avcc.avc_level); - uint32_t pixel_width = entry.pixel_aspect.h_spacing; - uint32_t pixel_height = entry.pixel_aspect.v_spacing; - if (pixel_width == 0 || pixel_height == 0) { - if (!entry.avcc.sps_list.empty()) { - const std::vector& sps = entry.avcc.sps_list[0]; - ExtractSarFromSps(&sps[0], sps.size(), &pixel_width, &pixel_height); - } + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + + if (entry.avcc.sps_list.empty()) { + LOG(ERROR) << "Cannot find sps in avc decoder configuration record."; + return false; + } + const std::vector& sps = entry.avcc.sps_list[0]; + if (!ExtractResolutionFromSpsData(&sps[0], sps.size(), &coded_width, + &coded_height, &pixel_width, + &pixel_height)) { + LOG(ERROR) << "Failed to parse SPS."; + return false; + } + + LOG_IF(WARNING, + entry.width != coded_width || entry.height != coded_height) + << "Resolution in VisualSampleEntry (" << entry.width << "," + << entry.height << ") does not match with resolution in " + "AVCDecoderConfigurationRecord (" + << coded_width << "," << coded_height + << "). Use AVCDecoderConfigurationRecord."; + + if (entry.pixel_aspect.h_spacing != 0 || entry.pixel_aspect.v_spacing != 0) { + LOG_IF(WARNING, entry.pixel_aspect.h_spacing != pixel_width || + entry.pixel_aspect.v_spacing != pixel_height) + << "Pixel aspect ratio in PASP box (" + << entry.pixel_aspect.h_spacing << "," + << entry.pixel_aspect.v_spacing + << ") does not match with SAR in AVCDecoderConfigurationRecord (" + << pixel_width << "," << pixel_height + << "). Use AVCDecoderConfigurationRecord."; } bool is_encrypted = entry.sinf.info.track_encryption.is_encrypted; @@ -386,8 +413,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) { kCodecH264, codec_string, track->media.header.language, - entry.width, - entry.height, + coded_width, + coded_height, pixel_width, pixel_height, 0, // trick_play_rate diff --git a/packager/media/formats/wvm/wvm_media_parser.cc b/packager/media/formats/wvm/wvm_media_parser.cc index 4535e27439..631097cfed 100644 --- a/packager/media/formats/wvm/wvm_media_parser.cc +++ b/packager/media/formats/wvm/wvm_media_parser.cc @@ -809,15 +809,42 @@ bool WvmMediaParser::Output(bool output_encrypted_sample) { VideoStreamInfo* video_stream_info = reinterpret_cast(stream_infos_[i].get()); - uint32_t pixel_width = video_stream_info->pixel_width(); - uint32_t pixel_height = video_stream_info->pixel_height(); - if (pixel_width == 0 || pixel_height == 0) { - ExtractSarFromDecoderConfig(&decoder_config_record[0], - decoder_config_record.size(), - &pixel_width, &pixel_height); + uint32_t coded_width = 0; + uint32_t coded_height = 0; + uint32_t pixel_width = 0; + uint32_t pixel_height = 0; + if (!ExtractResolutionFromDecoderConfig( + &decoder_config_record[0], decoder_config_record.size(), + &coded_width, &coded_height, &pixel_width, &pixel_height)) { + LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord."; + return false; + } + if (pixel_width != video_stream_info->pixel_width() || + pixel_height != video_stream_info->pixel_height()) { + LOG_IF(WARNING, video_stream_info->pixel_width() != 0 || + video_stream_info->pixel_height() != 0) + << "Pixel aspect ratio in WVM metadata (" + << video_stream_info->pixel_width() << "," + << video_stream_info->pixel_height() + << ") does not match with SAR in " + "AVCDecoderConfigurationRecord (" + << pixel_width << "," << pixel_height + << "). Use AVCDecoderConfigurationRecord."; video_stream_info->set_pixel_width(pixel_width); video_stream_info->set_pixel_height(pixel_height); } + if (coded_width != video_stream_info->width() || + coded_height != video_stream_info->height()) { + LOG(WARNING) << "Resolution in WVM metadata (" + << video_stream_info->width() << "," + << video_stream_info->height() + << ") does not match with resolution in " + "AVCDecoderConfigurationRecord (" + << coded_width << "," << coded_height + << "). Use AVCDecoderConfigurationRecord."; + video_stream_info->set_width(coded_width); + video_stream_info->set_height(coded_height); + } } } }