fix: Parse one frame mpeg-ts video (#1015)

Closes #1013

Co-authored-by: Joey Parrish <joeyparrish@users.noreply.github.com>
This commit is contained in:
Bartek Zdanowski 2022-10-28 05:22:17 +02:00 committed by GitHub
parent ab8ab12d09
commit b221aa9caf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 141 additions and 28 deletions

View File

@ -112,6 +112,17 @@ bool H264SliceHeader::IsSISlice() const {
} \
} while (0)
#define READ_LONG_OR_RETURN(out) \
do { \
long _out; \
int _tmp_out; \
READ_BITS_OR_RETURN(16, &_tmp_out); \
_out = (long)(_tmp_out) << 16; \
READ_BITS_OR_RETURN(16, &_tmp_out); \
_out |= _tmp_out; \
*(out) = _out; \
} while(0)
#define READ_BOOL_OR_RETURN(out) \
do { \
int _out; \
@ -524,14 +535,12 @@ H264Parser::Result H264Parser::ParseVUIParameters(H26xBitReader* br,
READ_UE_OR_RETURN(&data); // chroma_sample_loc_type_bottom_field
}
// Read and ignore timing info.
READ_BOOL_OR_RETURN(&data); // timing_info_present_flag
if (data) {
READ_BITS_OR_RETURN(16, &data); // num_units_in_tick
READ_BITS_OR_RETURN(16, &data); // num_units_in_tick
READ_BITS_OR_RETURN(16, &data); // time_scale
READ_BITS_OR_RETURN(16, &data); // time_scale
READ_BOOL_OR_RETURN(&data); // fixed_frame_rate_flag
// Read timing info.
READ_BOOL_OR_RETURN(&sps->timing_info_present_flag);
if (sps->timing_info_present_flag) {
READ_LONG_OR_RETURN(&sps->num_units_in_tick);
READ_LONG_OR_RETURN(&sps->time_scale);
READ_BOOL_OR_RETURN(&sps->fixed_frame_rate_flag);
}
// Read and ignore NAL HRD parameters, if present.

View File

@ -82,6 +82,11 @@ struct H264Sps {
int sar_height; // Set to 0 when not specified.
int transfer_characteristics;
bool timing_info_present_flag;
long num_units_in_tick;
long time_scale;
bool fixed_frame_rate_flag;
bool bitstream_restriction_flag;
int max_num_reorder_frames;
int max_dec_frame_buffering;

View File

@ -28,6 +28,22 @@
return status; \
} while (false)
#define READ_LONG_OR_RETURN(out) \
do { \
int _top_half, _bottom_half; \
if (!br->ReadBits(16, &_top_half)) { \
DVLOG(1) \
<< "Error in stream: unexpected EOS while trying to read " #out; \
return kInvalidStream; \
} \
if (!br->ReadBits(16, &_bottom_half)) { \
DVLOG(1) \
<< "Error in stream: unexpected EOS while trying to read " #out; \
return kInvalidStream; \
} \
*(out) = ((long)_top_half) << 16 | _bottom_half; \
} while (false)
namespace shaka {
namespace media {
@ -688,11 +704,10 @@ H265Parser::Result H265Parser::ParseVuiParameters(int max_num_sub_layers_minus1,
TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_bottom_offset
}
bool vui_timing_info_present_flag;
TRUE_OR_RETURN(br->ReadBool(&vui_timing_info_present_flag));
if (vui_timing_info_present_flag) {
// vui_num_units_in_tick, vui_time_scale
TRUE_OR_RETURN(br->SkipBits(32 + 32));
TRUE_OR_RETURN(br->ReadBool(&vui->vui_timing_info_present_flag));
if (vui->vui_timing_info_present_flag) {
READ_LONG_OR_RETURN(&vui->vui_num_units_in_tick);
READ_LONG_OR_RETURN(&vui->vui_time_scale);
bool vui_poc_proportional_to_timing_flag;
TRUE_OR_RETURN(br->ReadBool(&vui_poc_proportional_to_timing_flag));

View File

@ -52,6 +52,10 @@ struct H265VuiParameters {
int sar_height = 0;
int transfer_characteristics = 0;
bool vui_timing_info_present_flag = false;
long vui_num_units_in_tick = 0;
long vui_time_scale = 0;
bool bitstream_restriction_flag = false;
int min_spatial_segmentation_idc = 0;

View File

@ -184,6 +184,22 @@ bool EsParserH264::UpdateVideoDecoderConfig(int pps_id) {
return true;
}
int64_t EsParserH264::CalculateSampleDuration(int pps_id) {
auto pps = h264_parser_->GetPps(pps_id);
if (pps) {
auto sps_id = pps->seq_parameter_set_id;
auto sps = h264_parser_->GetSps(sps_id);
if (sps && sps->timing_info_present_flag && sps->fixed_frame_rate_flag) {
return static_cast<int64_t>(kMpeg2Timescale) * sps->num_units_in_tick *
2 / sps->time_scale;
}
}
LOG(WARNING) << "[MPEG-2 TS] PID " << pid()
<< " Cannot calculate frame rate from SPS.";
// Returns arbitrary safe duration
return 0.001 * kMpeg2Timescale; // 1ms.
}
} // namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -39,7 +39,8 @@ class EsParserH264 : public EsParserH26x {
// Update the video decoder config based on an H264 SPS.
// Return true if successful.
bool UpdateVideoDecoderConfig(int sps_id) override;
// Calculate video sample duration based on SPS data
int64_t CalculateSampleDuration(int pps_id) override;
// Callback to pass the stream configuration.
NewStreamInfoCB new_stream_info_cb_;

View File

@ -184,6 +184,24 @@ bool EsParserH265::UpdateVideoDecoderConfig(int pps_id) {
return true;
}
int64_t EsParserH265::CalculateSampleDuration(int pps_id) {
auto pps = h265_parser_->GetPps(pps_id);
if (pps) {
auto sps_id = pps->seq_parameter_set_id;
auto sps = h265_parser_->GetSps(sps_id);
if (sps && sps->vui_parameters_present &&
sps->vui_parameters.vui_timing_info_present_flag) {
return static_cast<int64_t>(kMpeg2Timescale) *
sps->vui_parameters.vui_num_units_in_tick * 2 /
sps->vui_parameters.vui_time_scale;
}
}
LOG(WARNING) << "[MPEG-2 TS] PID " << pid()
<< " Cannot calculate frame rate from SPS.";
// Returns arbitrary safe duration
return 0.001 * kMpeg2Timescale; // 1ms.
}
} // namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -42,6 +42,7 @@ class EsParserH265 : public EsParserH26x {
// Return true if successful.
bool UpdateVideoDecoderConfig(int sps_id) override;
int64_t CalculateSampleDuration(int pps_id) override;
// Callback to pass the stream configuration.
NewStreamInfoCB new_stream_info_cb_;

View File

@ -99,7 +99,9 @@ bool EsParserH26x::Flush() {
if (pending_sample_) {
// Flush pending sample.
DCHECK(pending_sample_duration_);
if (!pending_sample_duration_) {
pending_sample_duration_ = CalculateSampleDuration(pending_sample_pps_id_);
}
pending_sample_->set_duration(pending_sample_duration_);
emit_sample_cb_.Run(std::move(pending_sample_));
}
@ -330,7 +332,8 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
pending_sample_->set_duration(sample_duration);
const int kArbitraryGapScale = 10;
if (sample_duration > kArbitraryGapScale * pending_sample_duration_) {
if (pending_sample_duration_ &&
sample_duration > kArbitraryGapScale * pending_sample_duration_) {
LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts "
<< pending_sample_->dts() << " with next sample at dts "
<< media_sample->dts() << " (difference "
@ -342,6 +345,7 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
emit_sample_cb_.Run(std::move(pending_sample_));
}
pending_sample_ = media_sample;
pending_sample_pps_id_ = pps_id;
return true;
}

View File

@ -96,6 +96,8 @@ class EsParserH26x : public EsParser {
int access_unit_size,
bool is_key_frame,
int pps_id);
// Calculates frame duration based on SPS frame data
virtual int64_t CalculateSampleDuration(int pps_id) = 0;
// Callback to pass the frames.
EmitSampleCB emit_sample_cb_;
@ -127,6 +129,7 @@ class EsParserH26x : public EsParser {
// Frame for which we do not yet have a duration.
std::shared_ptr<MediaSample> pending_sample_;
int pending_sample_pps_id_ = -1;
int64_t pending_sample_duration_ = 0;
// Indicates whether waiting for first key frame.

View File

@ -120,6 +120,11 @@ class TestableEsParser : public EsParserH26x {
}
private:
int64_t CalculateSampleDuration(int pps_id) override {
// Typical 40ms - frame duration with 25 FPS
return 0.04 * 90000;
}
const int kTestPpsId = 123;
Nalu::CodecType codec_type_;
@ -179,6 +184,7 @@ class EsParserH26xTest : public testing::Test {
const std::vector<uint8_t> sample_data(
sample->data(), sample->data() + sample->data_size());
EXPECT_EQ(samples_[sample_id], sample_data);
media_samples_.push_back(sample);
}
void NewVideoConfig(std::shared_ptr<StreamInfo> config) {
@ -187,6 +193,7 @@ class EsParserH26xTest : public testing::Test {
protected:
std::vector<std::vector<uint8_t>> samples_;
std::vector<std::shared_ptr<MediaSample>> media_samples_;
size_t sample_count_;
bool has_stream_info_;
};
@ -377,6 +384,10 @@ TEST_F(EsParserH26xTest, H264BasicSupport) {
RunTest(Nalu::kH264, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_);
EXPECT_TRUE(has_stream_info_);
EXPECT_EQ(3u, media_samples_.size());
for (size_t i = 0; i < media_samples_.size(); i++) {
EXPECT_GT(media_samples_[i]->duration(), 0u);
}
}
// This is not compliant to H264 spec, but VLC generates streams like this. See
@ -453,6 +464,33 @@ TEST_F(EsParserH26xTest, H264DoesNotStartOnRsv) {
EXPECT_TRUE(has_stream_info_);
}
TEST_F(EsParserH26xTest, H264ContainsOnlyOneFrame) {
const H26xNaluType kData[] = {
kSeparator,
kH264Aud,
kH264Sps,
kH264VclKeyFrame,
};
RunTest(Nalu::kH264, kData, arraysize(kData));
EXPECT_TRUE(has_stream_info_);
EXPECT_EQ(1u, sample_count_);
EXPECT_EQ(1u, media_samples_.size());
EXPECT_GT(media_samples_[0]->duration(), 0u);
}
TEST_F(EsParserH26xTest, H265ContainsOnlyOneFrame) {
const H26xNaluType kData[] = {
kSeparator, kH265Aud, kH265Sps, kH265VclKeyFrame,
};
RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_TRUE(has_stream_info_);
EXPECT_EQ(1u, sample_count_);
EXPECT_EQ(1u, media_samples_.size());
EXPECT_GT(media_samples_[0]->duration(), 0u);
}
} // namespace mp2t
} // namespace media
} // namespace shaka

View File

@ -97,7 +97,7 @@ bool PidState::PushTsPacket(const TsPacket& ts_packet) {
// just discard the incoming TS packet.
if (!enable_)
return true;
// TODO(bzd): continuity_counter_ is never set
int expected_continuity_counter = (continuity_counter_ + 1) % 16;
if (continuity_counter_ >= 0 &&
ts_packet.continuity_counter() != expected_continuity_counter) {
@ -218,10 +218,11 @@ bool Mp2tMediaParser::Parse(const uint8_t* buf, int size) {
ts_byte_queue_.Pop(1);
continue;
}
DVLOG(LOG_LEVEL_TS)
<< "Processing PID=" << ts_packet->pid()
<< " start_unit=" << ts_packet->payload_unit_start_indicator();
DVLOG(LOG_LEVEL_TS) << "Processing PID=" << ts_packet->pid()
<< " start_unit="
<< ts_packet->payload_unit_start_indicator()
<< " continuity_counter="
<< ts_packet->continuity_counter();
// Parse the section.
auto it = pids_.find(ts_packet->pid());
if (it == pids_.end() &&

View File

@ -192,7 +192,7 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) {
RCHECK(bit_reader.ReadBits(16, &pes_packet_length));
RCHECK(packet_start_code_prefix == kPesStartCode);
DVLOG(LOG_LEVEL_PES) << "stream_id=" << std::hex << stream_id << std::dec;
DVLOG(LOG_LEVEL_PES) << "stream_id=" << stream_id;
if (pes_packet_length == 0)
pes_packet_length = static_cast<int>(bit_reader.bits_available()) / 8;
@ -303,12 +303,10 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) {
RCHECK(pes_header_remaining_size >= 0);
// Read the PES packet.
DVLOG(LOG_LEVEL_PES)
<< "Emit a reassembled PES:"
<< " size=" << es_size
<< " pts=" << media_pts
<< " dts=" << media_dts
<< " data_alignment_indicator=" << data_alignment_indicator;
DVLOG(LOG_LEVEL_PES) << "Emit a reassembled PES:"
<< " size=" << es_size << " pts=" << media_pts
<< " dts=" << media_dts << " data_alignment_indicator="
<< data_alignment_indicator;
return es_parser_->Parse(&raw_pes[es_offset], es_size, media_pts, media_dts);
}