From b221aa9caf4f8357a696f3265d1e2a5bf504dbd9 Mon Sep 17 00:00:00 2001 From: Bartek Zdanowski Date: Fri, 28 Oct 2022 05:22:17 +0200 Subject: [PATCH] fix: Parse one frame mpeg-ts video (#1015) Closes #1013 Co-authored-by: Joey Parrish --- packager/media/codecs/h264_parser.cc | 25 ++++++++---- packager/media/codecs/h264_parser.h | 5 +++ packager/media/codecs/h265_parser.cc | 25 +++++++++--- packager/media/codecs/h265_parser.h | 4 ++ packager/media/formats/mp2t/es_parser_h264.cc | 16 ++++++++ packager/media/formats/mp2t/es_parser_h264.h | 3 +- packager/media/formats/mp2t/es_parser_h265.cc | 18 +++++++++ packager/media/formats/mp2t/es_parser_h265.h | 1 + packager/media/formats/mp2t/es_parser_h26x.cc | 8 +++- packager/media/formats/mp2t/es_parser_h26x.h | 3 ++ .../formats/mp2t/es_parser_h26x_unittest.cc | 38 +++++++++++++++++++ .../media/formats/mp2t/mp2t_media_parser.cc | 11 +++--- packager/media/formats/mp2t/ts_section_pes.cc | 12 +++--- 13 files changed, 141 insertions(+), 28 deletions(-) diff --git a/packager/media/codecs/h264_parser.cc b/packager/media/codecs/h264_parser.cc index bd2189cd61..28681fe1c8 100644 --- a/packager/media/codecs/h264_parser.cc +++ b/packager/media/codecs/h264_parser.cc @@ -112,6 +112,17 @@ bool H264SliceHeader::IsSISlice() const { } \ } while (0) +#define READ_LONG_OR_RETURN(out) \ + do { \ + long _out; \ + int _tmp_out; \ + READ_BITS_OR_RETURN(16, &_tmp_out); \ + _out = (long)(_tmp_out) << 16; \ + READ_BITS_OR_RETURN(16, &_tmp_out); \ + _out |= _tmp_out; \ + *(out) = _out; \ + } while(0) + #define READ_BOOL_OR_RETURN(out) \ do { \ int _out; \ @@ -524,14 +535,12 @@ H264Parser::Result H264Parser::ParseVUIParameters(H26xBitReader* br, READ_UE_OR_RETURN(&data); // chroma_sample_loc_type_bottom_field } - // Read and ignore timing info. - READ_BOOL_OR_RETURN(&data); // timing_info_present_flag - if (data) { - READ_BITS_OR_RETURN(16, &data); // num_units_in_tick - READ_BITS_OR_RETURN(16, &data); // num_units_in_tick - READ_BITS_OR_RETURN(16, &data); // time_scale - READ_BITS_OR_RETURN(16, &data); // time_scale - READ_BOOL_OR_RETURN(&data); // fixed_frame_rate_flag + // Read timing info. + READ_BOOL_OR_RETURN(&sps->timing_info_present_flag); + if (sps->timing_info_present_flag) { + READ_LONG_OR_RETURN(&sps->num_units_in_tick); + READ_LONG_OR_RETURN(&sps->time_scale); + READ_BOOL_OR_RETURN(&sps->fixed_frame_rate_flag); } // Read and ignore NAL HRD parameters, if present. diff --git a/packager/media/codecs/h264_parser.h b/packager/media/codecs/h264_parser.h index 82772d45ee..0e33e54070 100644 --- a/packager/media/codecs/h264_parser.h +++ b/packager/media/codecs/h264_parser.h @@ -82,6 +82,11 @@ struct H264Sps { int sar_height; // Set to 0 when not specified. int transfer_characteristics; + bool timing_info_present_flag; + long num_units_in_tick; + long time_scale; + bool fixed_frame_rate_flag; + bool bitstream_restriction_flag; int max_num_reorder_frames; int max_dec_frame_buffering; diff --git a/packager/media/codecs/h265_parser.cc b/packager/media/codecs/h265_parser.cc index a70ea03c43..0d368b178c 100644 --- a/packager/media/codecs/h265_parser.cc +++ b/packager/media/codecs/h265_parser.cc @@ -28,6 +28,22 @@ return status; \ } while (false) +#define READ_LONG_OR_RETURN(out) \ + do { \ + int _top_half, _bottom_half; \ + if (!br->ReadBits(16, &_top_half)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + if (!br->ReadBits(16, &_bottom_half)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + *(out) = ((long)_top_half) << 16 | _bottom_half; \ + } while (false) + namespace shaka { namespace media { @@ -688,11 +704,10 @@ H265Parser::Result H265Parser::ParseVuiParameters(int max_num_sub_layers_minus1, TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_bottom_offset } - bool vui_timing_info_present_flag; - TRUE_OR_RETURN(br->ReadBool(&vui_timing_info_present_flag)); - if (vui_timing_info_present_flag) { - // vui_num_units_in_tick, vui_time_scale - TRUE_OR_RETURN(br->SkipBits(32 + 32)); + TRUE_OR_RETURN(br->ReadBool(&vui->vui_timing_info_present_flag)); + if (vui->vui_timing_info_present_flag) { + READ_LONG_OR_RETURN(&vui->vui_num_units_in_tick); + READ_LONG_OR_RETURN(&vui->vui_time_scale); bool vui_poc_proportional_to_timing_flag; TRUE_OR_RETURN(br->ReadBool(&vui_poc_proportional_to_timing_flag)); diff --git a/packager/media/codecs/h265_parser.h b/packager/media/codecs/h265_parser.h index 779fc80a28..5674860b78 100644 --- a/packager/media/codecs/h265_parser.h +++ b/packager/media/codecs/h265_parser.h @@ -52,6 +52,10 @@ struct H265VuiParameters { int sar_height = 0; int transfer_characteristics = 0; + bool vui_timing_info_present_flag = false; + long vui_num_units_in_tick = 0; + long vui_time_scale = 0; + bool bitstream_restriction_flag = false; int min_spatial_segmentation_idc = 0; diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index 10bde0c309..100a7b0dbe 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -184,6 +184,22 @@ bool EsParserH264::UpdateVideoDecoderConfig(int pps_id) { return true; } +int64_t EsParserH264::CalculateSampleDuration(int pps_id) { + auto pps = h264_parser_->GetPps(pps_id); + if (pps) { + auto sps_id = pps->seq_parameter_set_id; + auto sps = h264_parser_->GetSps(sps_id); + if (sps && sps->timing_info_present_flag && sps->fixed_frame_rate_flag) { + return static_cast(kMpeg2Timescale) * sps->num_units_in_tick * + 2 / sps->time_scale; + } + } + LOG(WARNING) << "[MPEG-2 TS] PID " << pid() + << " Cannot calculate frame rate from SPS."; + // Returns arbitrary safe duration + return 0.001 * kMpeg2Timescale; // 1ms. +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_h264.h b/packager/media/formats/mp2t/es_parser_h264.h index 5db7fa4d8e..757f61d5ad 100644 --- a/packager/media/formats/mp2t/es_parser_h264.h +++ b/packager/media/formats/mp2t/es_parser_h264.h @@ -39,7 +39,8 @@ class EsParserH264 : public EsParserH26x { // Update the video decoder config based on an H264 SPS. // Return true if successful. bool UpdateVideoDecoderConfig(int sps_id) override; - + // Calculate video sample duration based on SPS data + int64_t CalculateSampleDuration(int pps_id) override; // Callback to pass the stream configuration. NewStreamInfoCB new_stream_info_cb_; diff --git a/packager/media/formats/mp2t/es_parser_h265.cc b/packager/media/formats/mp2t/es_parser_h265.cc index 3d270dffd8..bcf2302791 100644 --- a/packager/media/formats/mp2t/es_parser_h265.cc +++ b/packager/media/formats/mp2t/es_parser_h265.cc @@ -184,6 +184,24 @@ bool EsParserH265::UpdateVideoDecoderConfig(int pps_id) { return true; } +int64_t EsParserH265::CalculateSampleDuration(int pps_id) { + auto pps = h265_parser_->GetPps(pps_id); + if (pps) { + auto sps_id = pps->seq_parameter_set_id; + auto sps = h265_parser_->GetSps(sps_id); + if (sps && sps->vui_parameters_present && + sps->vui_parameters.vui_timing_info_present_flag) { + return static_cast(kMpeg2Timescale) * + sps->vui_parameters.vui_num_units_in_tick * 2 / + sps->vui_parameters.vui_time_scale; + } + } + LOG(WARNING) << "[MPEG-2 TS] PID " << pid() + << " Cannot calculate frame rate from SPS."; + // Returns arbitrary safe duration + return 0.001 * kMpeg2Timescale; // 1ms. +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_h265.h b/packager/media/formats/mp2t/es_parser_h265.h index 02944d661d..b8dcafc929 100644 --- a/packager/media/formats/mp2t/es_parser_h265.h +++ b/packager/media/formats/mp2t/es_parser_h265.h @@ -42,6 +42,7 @@ class EsParserH265 : public EsParserH26x { // Return true if successful. bool UpdateVideoDecoderConfig(int sps_id) override; + int64_t CalculateSampleDuration(int pps_id) override; // Callback to pass the stream configuration. NewStreamInfoCB new_stream_info_cb_; diff --git a/packager/media/formats/mp2t/es_parser_h26x.cc b/packager/media/formats/mp2t/es_parser_h26x.cc index c0b4672dc7..b04974a071 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.cc +++ b/packager/media/formats/mp2t/es_parser_h26x.cc @@ -99,7 +99,9 @@ bool EsParserH26x::Flush() { if (pending_sample_) { // Flush pending sample. - DCHECK(pending_sample_duration_); + if (!pending_sample_duration_) { + pending_sample_duration_ = CalculateSampleDuration(pending_sample_pps_id_); + } pending_sample_->set_duration(pending_sample_duration_); emit_sample_cb_.Run(std::move(pending_sample_)); } @@ -330,7 +332,8 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos, pending_sample_->set_duration(sample_duration); const int kArbitraryGapScale = 10; - if (sample_duration > kArbitraryGapScale * pending_sample_duration_) { + if (pending_sample_duration_ && + sample_duration > kArbitraryGapScale * pending_sample_duration_) { LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts " << pending_sample_->dts() << " with next sample at dts " << media_sample->dts() << " (difference " @@ -342,6 +345,7 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos, emit_sample_cb_.Run(std::move(pending_sample_)); } pending_sample_ = media_sample; + pending_sample_pps_id_ = pps_id; return true; } diff --git a/packager/media/formats/mp2t/es_parser_h26x.h b/packager/media/formats/mp2t/es_parser_h26x.h index 921b326d43..29246574aa 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.h +++ b/packager/media/formats/mp2t/es_parser_h26x.h @@ -96,6 +96,8 @@ class EsParserH26x : public EsParser { int access_unit_size, bool is_key_frame, int pps_id); + // Calculates frame duration based on SPS frame data + virtual int64_t CalculateSampleDuration(int pps_id) = 0; // Callback to pass the frames. EmitSampleCB emit_sample_cb_; @@ -127,6 +129,7 @@ class EsParserH26x : public EsParser { // Frame for which we do not yet have a duration. std::shared_ptr pending_sample_; + int pending_sample_pps_id_ = -1; int64_t pending_sample_duration_ = 0; // Indicates whether waiting for first key frame. diff --git a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc index a2b86d6abf..12602b4afa 100644 --- a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc +++ b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc @@ -120,6 +120,11 @@ class TestableEsParser : public EsParserH26x { } private: + int64_t CalculateSampleDuration(int pps_id) override { + // Typical 40ms - frame duration with 25 FPS + return 0.04 * 90000; + } + const int kTestPpsId = 123; Nalu::CodecType codec_type_; @@ -179,6 +184,7 @@ class EsParserH26xTest : public testing::Test { const std::vector sample_data( sample->data(), sample->data() + sample->data_size()); EXPECT_EQ(samples_[sample_id], sample_data); + media_samples_.push_back(sample); } void NewVideoConfig(std::shared_ptr config) { @@ -187,6 +193,7 @@ class EsParserH26xTest : public testing::Test { protected: std::vector> samples_; + std::vector> media_samples_; size_t sample_count_; bool has_stream_info_; }; @@ -377,6 +384,10 @@ TEST_F(EsParserH26xTest, H264BasicSupport) { RunTest(Nalu::kH264, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(3u, media_samples_.size()); + for (size_t i = 0; i < media_samples_.size(); i++) { + EXPECT_GT(media_samples_[i]->duration(), 0u); + } } // This is not compliant to H264 spec, but VLC generates streams like this. See @@ -453,6 +464,33 @@ TEST_F(EsParserH26xTest, H264DoesNotStartOnRsv) { EXPECT_TRUE(has_stream_info_); } +TEST_F(EsParserH26xTest, H264ContainsOnlyOneFrame) { + const H26xNaluType kData[] = { + kSeparator, + kH264Aud, + kH264Sps, + kH264VclKeyFrame, + }; + + RunTest(Nalu::kH264, kData, arraysize(kData)); + EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(1u, sample_count_); + EXPECT_EQ(1u, media_samples_.size()); + EXPECT_GT(media_samples_[0]->duration(), 0u); +} + +TEST_F(EsParserH26xTest, H265ContainsOnlyOneFrame) { + const H26xNaluType kData[] = { + kSeparator, kH265Aud, kH265Sps, kH265VclKeyFrame, + }; + + RunTest(Nalu::kH265, kData, arraysize(kData)); + EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(1u, sample_count_); + EXPECT_EQ(1u, media_samples_.size()); + EXPECT_GT(media_samples_[0]->duration(), 0u); +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index f9f908fcd4..01fc78c624 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -97,7 +97,7 @@ bool PidState::PushTsPacket(const TsPacket& ts_packet) { // just discard the incoming TS packet. if (!enable_) return true; - + // TODO(bzd): continuity_counter_ is never set int expected_continuity_counter = (continuity_counter_ + 1) % 16; if (continuity_counter_ >= 0 && ts_packet.continuity_counter() != expected_continuity_counter) { @@ -218,10 +218,11 @@ bool Mp2tMediaParser::Parse(const uint8_t* buf, int size) { ts_byte_queue_.Pop(1); continue; } - DVLOG(LOG_LEVEL_TS) - << "Processing PID=" << ts_packet->pid() - << " start_unit=" << ts_packet->payload_unit_start_indicator(); - + DVLOG(LOG_LEVEL_TS) << "Processing PID=" << ts_packet->pid() + << " start_unit=" + << ts_packet->payload_unit_start_indicator() + << " continuity_counter=" + << ts_packet->continuity_counter(); // Parse the section. auto it = pids_.find(ts_packet->pid()); if (it == pids_.end() && diff --git a/packager/media/formats/mp2t/ts_section_pes.cc b/packager/media/formats/mp2t/ts_section_pes.cc index 9b6e5c8b3c..ec42ade21c 100644 --- a/packager/media/formats/mp2t/ts_section_pes.cc +++ b/packager/media/formats/mp2t/ts_section_pes.cc @@ -192,7 +192,7 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { RCHECK(bit_reader.ReadBits(16, &pes_packet_length)); RCHECK(packet_start_code_prefix == kPesStartCode); - DVLOG(LOG_LEVEL_PES) << "stream_id=" << std::hex << stream_id << std::dec; + DVLOG(LOG_LEVEL_PES) << "stream_id=" << stream_id; if (pes_packet_length == 0) pes_packet_length = static_cast(bit_reader.bits_available()) / 8; @@ -303,12 +303,10 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { RCHECK(pes_header_remaining_size >= 0); // Read the PES packet. - DVLOG(LOG_LEVEL_PES) - << "Emit a reassembled PES:" - << " size=" << es_size - << " pts=" << media_pts - << " dts=" << media_dts - << " data_alignment_indicator=" << data_alignment_indicator; + DVLOG(LOG_LEVEL_PES) << "Emit a reassembled PES:" + << " size=" << es_size << " pts=" << media_pts + << " dts=" << media_dts << " data_alignment_indicator=" + << data_alignment_indicator; return es_parser_->Parse(&raw_pes[es_offset], es_size, media_pts, media_dts); }