diff --git a/.github/workflows/sync-labels.yaml b/.github/workflows/sync-labels.yaml index ba7ddba6e2..47e73a9a44 100644 --- a/.github/workflows/sync-labels.yaml +++ b/.github/workflows/sync-labels.yaml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: shaka-project/shaka-github-tools diff --git a/.github/workflows/update-issues.yaml b/.github/workflows/update-issues.yaml index 66e3f5431c..8edcdc9de7 100644 --- a/.github/workflows/update-issues.yaml +++ b/.github/workflows/update-issues.yaml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: repository: shaka-project/shaka-github-tools diff --git a/packager/media/codecs/h264_parser.cc b/packager/media/codecs/h264_parser.cc index bd2189cd61..28681fe1c8 100644 --- a/packager/media/codecs/h264_parser.cc +++ b/packager/media/codecs/h264_parser.cc @@ -112,6 +112,17 @@ bool H264SliceHeader::IsSISlice() const { } \ } while (0) +#define READ_LONG_OR_RETURN(out) \ + do { \ + long _out; \ + int _tmp_out; \ + READ_BITS_OR_RETURN(16, &_tmp_out); \ + _out = (long)(_tmp_out) << 16; \ + READ_BITS_OR_RETURN(16, &_tmp_out); \ + _out |= _tmp_out; \ + *(out) = _out; \ + } while(0) + #define READ_BOOL_OR_RETURN(out) \ do { \ int _out; \ @@ -524,14 +535,12 @@ H264Parser::Result H264Parser::ParseVUIParameters(H26xBitReader* br, READ_UE_OR_RETURN(&data); // chroma_sample_loc_type_bottom_field } - // Read and ignore timing info. - READ_BOOL_OR_RETURN(&data); // timing_info_present_flag - if (data) { - READ_BITS_OR_RETURN(16, &data); // num_units_in_tick - READ_BITS_OR_RETURN(16, &data); // num_units_in_tick - READ_BITS_OR_RETURN(16, &data); // time_scale - READ_BITS_OR_RETURN(16, &data); // time_scale - READ_BOOL_OR_RETURN(&data); // fixed_frame_rate_flag + // Read timing info. + READ_BOOL_OR_RETURN(&sps->timing_info_present_flag); + if (sps->timing_info_present_flag) { + READ_LONG_OR_RETURN(&sps->num_units_in_tick); + READ_LONG_OR_RETURN(&sps->time_scale); + READ_BOOL_OR_RETURN(&sps->fixed_frame_rate_flag); } // Read and ignore NAL HRD parameters, if present. diff --git a/packager/media/codecs/h264_parser.h b/packager/media/codecs/h264_parser.h index 82772d45ee..0e33e54070 100644 --- a/packager/media/codecs/h264_parser.h +++ b/packager/media/codecs/h264_parser.h @@ -82,6 +82,11 @@ struct H264Sps { int sar_height; // Set to 0 when not specified. int transfer_characteristics; + bool timing_info_present_flag; + long num_units_in_tick; + long time_scale; + bool fixed_frame_rate_flag; + bool bitstream_restriction_flag; int max_num_reorder_frames; int max_dec_frame_buffering; diff --git a/packager/media/codecs/h265_parser.cc b/packager/media/codecs/h265_parser.cc index c2eb1ece36..d8302fd6d2 100644 --- a/packager/media/codecs/h265_parser.cc +++ b/packager/media/codecs/h265_parser.cc @@ -28,6 +28,22 @@ return status; \ } while (false) +#define READ_LONG_OR_RETURN(out) \ + do { \ + int _top_half, _bottom_half; \ + if (!br->ReadBits(16, &_top_half)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + if (!br->ReadBits(16, &_bottom_half)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + *(out) = ((long)_top_half) << 16 | _bottom_half; \ + } while (false) + namespace shaka { namespace media { @@ -688,11 +704,10 @@ H265Parser::Result H265Parser::ParseVuiParameters(int max_num_sub_layers_minus1, TRUE_OR_RETURN(br->ReadUE(&ignored)); // def_disp_win_bottom_offset } - bool vui_timing_info_present_flag; - TRUE_OR_RETURN(br->ReadBool(&vui_timing_info_present_flag)); - if (vui_timing_info_present_flag) { - // vui_num_units_in_tick, vui_time_scale - TRUE_OR_RETURN(br->SkipBits(32 + 32)); + TRUE_OR_RETURN(br->ReadBool(&vui->vui_timing_info_present_flag)); + if (vui->vui_timing_info_present_flag) { + READ_LONG_OR_RETURN(&vui->vui_num_units_in_tick); + READ_LONG_OR_RETURN(&vui->vui_time_scale); bool vui_poc_proportional_to_timing_flag; TRUE_OR_RETURN(br->ReadBool(&vui_poc_proportional_to_timing_flag)); diff --git a/packager/media/codecs/h265_parser.h b/packager/media/codecs/h265_parser.h index 07538a9148..fb78a3434b 100644 --- a/packager/media/codecs/h265_parser.h +++ b/packager/media/codecs/h265_parser.h @@ -52,6 +52,10 @@ struct H265VuiParameters { int sar_height = 0; int transfer_characteristics = 0; + bool vui_timing_info_present_flag = false; + long vui_num_units_in_tick = 0; + long vui_time_scale = 0; + bool bitstream_restriction_flag = false; int min_spatial_segmentation_idc = 0; diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index 10bde0c309..100a7b0dbe 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -184,6 +184,22 @@ bool EsParserH264::UpdateVideoDecoderConfig(int pps_id) { return true; } +int64_t EsParserH264::CalculateSampleDuration(int pps_id) { + auto pps = h264_parser_->GetPps(pps_id); + if (pps) { + auto sps_id = pps->seq_parameter_set_id; + auto sps = h264_parser_->GetSps(sps_id); + if (sps && sps->timing_info_present_flag && sps->fixed_frame_rate_flag) { + return static_cast(kMpeg2Timescale) * sps->num_units_in_tick * + 2 / sps->time_scale; + } + } + LOG(WARNING) << "[MPEG-2 TS] PID " << pid() + << " Cannot calculate frame rate from SPS."; + // Returns arbitrary safe duration + return 0.001 * kMpeg2Timescale; // 1ms. +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_h264.h b/packager/media/formats/mp2t/es_parser_h264.h index 5db7fa4d8e..757f61d5ad 100644 --- a/packager/media/formats/mp2t/es_parser_h264.h +++ b/packager/media/formats/mp2t/es_parser_h264.h @@ -39,7 +39,8 @@ class EsParserH264 : public EsParserH26x { // Update the video decoder config based on an H264 SPS. // Return true if successful. bool UpdateVideoDecoderConfig(int sps_id) override; - + // Calculate video sample duration based on SPS data + int64_t CalculateSampleDuration(int pps_id) override; // Callback to pass the stream configuration. NewStreamInfoCB new_stream_info_cb_; diff --git a/packager/media/formats/mp2t/es_parser_h265.cc b/packager/media/formats/mp2t/es_parser_h265.cc index 4eb595f9fb..3dc07c0cc8 100644 --- a/packager/media/formats/mp2t/es_parser_h265.cc +++ b/packager/media/formats/mp2t/es_parser_h265.cc @@ -184,6 +184,24 @@ bool EsParserH265::UpdateVideoDecoderConfig(int pps_id) { return true; } +int64_t EsParserH265::CalculateSampleDuration(int pps_id) { + auto pps = h265_parser_->GetPps(pps_id); + if (pps) { + auto sps_id = pps->seq_parameter_set_id; + auto sps = h265_parser_->GetSps(sps_id); + if (sps && sps->vui_parameters_present && + sps->vui_parameters.vui_timing_info_present_flag) { + return static_cast(kMpeg2Timescale) * + sps->vui_parameters.vui_num_units_in_tick * 2 / + sps->vui_parameters.vui_time_scale; + } + } + LOG(WARNING) << "[MPEG-2 TS] PID " << pid() + << " Cannot calculate frame rate from SPS."; + // Returns arbitrary safe duration + return 0.001 * kMpeg2Timescale; // 1ms. +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/es_parser_h265.h b/packager/media/formats/mp2t/es_parser_h265.h index 8f2c56c5e7..d58fadce8b 100644 --- a/packager/media/formats/mp2t/es_parser_h265.h +++ b/packager/media/formats/mp2t/es_parser_h265.h @@ -42,6 +42,7 @@ class EsParserH265 : public EsParserH26x { // Return true if successful. bool UpdateVideoDecoderConfig(int sps_id) override; + int64_t CalculateSampleDuration(int pps_id) override; // Callback to pass the stream configuration. NewStreamInfoCB new_stream_info_cb_; diff --git a/packager/media/formats/mp2t/es_parser_h26x.cc b/packager/media/formats/mp2t/es_parser_h26x.cc index c0b4672dc7..b04974a071 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.cc +++ b/packager/media/formats/mp2t/es_parser_h26x.cc @@ -99,7 +99,9 @@ bool EsParserH26x::Flush() { if (pending_sample_) { // Flush pending sample. - DCHECK(pending_sample_duration_); + if (!pending_sample_duration_) { + pending_sample_duration_ = CalculateSampleDuration(pending_sample_pps_id_); + } pending_sample_->set_duration(pending_sample_duration_); emit_sample_cb_.Run(std::move(pending_sample_)); } @@ -330,7 +332,8 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos, pending_sample_->set_duration(sample_duration); const int kArbitraryGapScale = 10; - if (sample_duration > kArbitraryGapScale * pending_sample_duration_) { + if (pending_sample_duration_ && + sample_duration > kArbitraryGapScale * pending_sample_duration_) { LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts " << pending_sample_->dts() << " with next sample at dts " << media_sample->dts() << " (difference " @@ -342,6 +345,7 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos, emit_sample_cb_.Run(std::move(pending_sample_)); } pending_sample_ = media_sample; + pending_sample_pps_id_ = pps_id; return true; } diff --git a/packager/media/formats/mp2t/es_parser_h26x.h b/packager/media/formats/mp2t/es_parser_h26x.h index 921b326d43..29246574aa 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.h +++ b/packager/media/formats/mp2t/es_parser_h26x.h @@ -96,6 +96,8 @@ class EsParserH26x : public EsParser { int access_unit_size, bool is_key_frame, int pps_id); + // Calculates frame duration based on SPS frame data + virtual int64_t CalculateSampleDuration(int pps_id) = 0; // Callback to pass the frames. EmitSampleCB emit_sample_cb_; @@ -127,6 +129,7 @@ class EsParserH26x : public EsParser { // Frame for which we do not yet have a duration. std::shared_ptr pending_sample_; + int pending_sample_pps_id_ = -1; int64_t pending_sample_duration_ = 0; // Indicates whether waiting for first key frame. diff --git a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc index 48916ffb60..0df0f1794d 100644 --- a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc +++ b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc @@ -120,6 +120,11 @@ class TestableEsParser : public EsParserH26x { } private: + int64_t CalculateSampleDuration(int pps_id) override { + // Typical 40ms - frame duration with 25 FPS + return 0.04 * 90000; + } + const int kTestPpsId = 123; Nalu::CodecType codec_type_; @@ -179,6 +184,7 @@ class EsParserH26xTest : public testing::Test { const std::vector sample_data( sample->data(), sample->data() + sample->data_size()); EXPECT_EQ(samples_[sample_id], sample_data); + media_samples_.push_back(sample); } void NewVideoConfig(std::shared_ptr config) { @@ -187,6 +193,7 @@ class EsParserH26xTest : public testing::Test { protected: std::vector> samples_; + std::vector> media_samples_; size_t sample_count_; bool has_stream_info_; }; @@ -377,6 +384,10 @@ TEST_F(EsParserH26xTest, H264BasicSupport) { RunTest(Nalu::kH264, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(3u, media_samples_.size()); + for (size_t i = 0; i < media_samples_.size(); i++) { + EXPECT_GT(media_samples_[i]->duration(), 0u); + } } // This is not compliant to H264 spec, but VLC generates streams like this. See @@ -453,6 +464,33 @@ TEST_F(EsParserH26xTest, H264DoesNotStartOnRsv) { EXPECT_TRUE(has_stream_info_); } +TEST_F(EsParserH26xTest, H264ContainsOnlyOneFrame) { + const H26xNaluType kData[] = { + kSeparator, + kH264Aud, + kH264Sps, + kH264VclKeyFrame, + }; + + RunTest(Nalu::kH264, kData, arraysize(kData)); + EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(1u, sample_count_); + EXPECT_EQ(1u, media_samples_.size()); + EXPECT_GT(media_samples_[0]->duration(), 0u); +} + +TEST_F(EsParserH26xTest, H265ContainsOnlyOneFrame) { + const H26xNaluType kData[] = { + kSeparator, kH265Aud, kH265Sps, kH265VclKeyFrame, + }; + + RunTest(Nalu::kH265, kData, arraysize(kData)); + EXPECT_TRUE(has_stream_info_); + EXPECT_EQ(1u, sample_count_); + EXPECT_EQ(1u, media_samples_.size()); + EXPECT_GT(media_samples_[0]->duration(), 0u); +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/mp2t_media_parser.cc b/packager/media/formats/mp2t/mp2t_media_parser.cc index f9f908fcd4..01fc78c624 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser.cc @@ -97,7 +97,7 @@ bool PidState::PushTsPacket(const TsPacket& ts_packet) { // just discard the incoming TS packet. if (!enable_) return true; - + // TODO(bzd): continuity_counter_ is never set int expected_continuity_counter = (continuity_counter_ + 1) % 16; if (continuity_counter_ >= 0 && ts_packet.continuity_counter() != expected_continuity_counter) { @@ -218,10 +218,11 @@ bool Mp2tMediaParser::Parse(const uint8_t* buf, int size) { ts_byte_queue_.Pop(1); continue; } - DVLOG(LOG_LEVEL_TS) - << "Processing PID=" << ts_packet->pid() - << " start_unit=" << ts_packet->payload_unit_start_indicator(); - + DVLOG(LOG_LEVEL_TS) << "Processing PID=" << ts_packet->pid() + << " start_unit=" + << ts_packet->payload_unit_start_indicator() + << " continuity_counter=" + << ts_packet->continuity_counter(); // Parse the section. auto it = pids_.find(ts_packet->pid()); if (it == pids_.end() && diff --git a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc index 639039d5c8..011be293c6 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc @@ -28,7 +28,9 @@ class Mp2tMediaParserTest : public testing::Test { : audio_frame_count_(0), video_frame_count_(0), video_min_dts_(kNoTimestamp), - video_max_dts_(kNoTimestamp) { + video_max_dts_(kNoTimestamp), + video_min_pts_(kNoTimestamp), + video_max_pts_(kNoTimestamp) { parser_.reset(new Mp2tMediaParser()); } @@ -41,6 +43,8 @@ class Mp2tMediaParserTest : public testing::Test { int video_frame_count_; int64_t video_min_dts_; int64_t video_max_dts_; + int64_t video_min_pts_; + int64_t video_max_pts_; bool AppendData(const uint8_t* data, size_t length) { return parser_->Parse(data, static_cast(length)); @@ -79,6 +83,8 @@ class Mp2tMediaParserTest : public testing::Test { ++video_frame_count_; if (video_min_dts_ == kNoTimestamp) video_min_dts_ = sample->dts(); + if (video_min_pts_ == kNoTimestamp || video_min_pts_ > sample->pts()) + video_min_pts_ = sample->pts(); // Verify timestamps are increasing. if (video_max_dts_ == kNoTimestamp) video_max_dts_ = sample->dts(); @@ -86,6 +92,9 @@ class Mp2tMediaParserTest : public testing::Test { LOG(ERROR) << "Video DTS not strictly increasing."; return false; } + if (video_max_pts_ < sample->pts()) { + video_max_pts_ = sample->pts(); + } video_max_dts_ = sample->dts(); } else { LOG(ERROR) << "Missing StreamInfo for track ID " << track_id; @@ -153,9 +162,9 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H265) { } TEST_F(Mp2tMediaParserTest, TimestampWrapAround) { - // "bear-640x360.ts" has been transcoded from bear-640x360.mp4 by applying a - // time offset of 95442s (close to 2^33 / 90000) which results in timestamps - // wrap around in the Mpeg2 TS stream. + // "bear-640x360_ptszero_dtswraparound.ts" has been transcoded from + // bear-640x360.mp4 by applying a time offset of 95442s (close to 2^33 / + // 90000) which results in timestamp wrap around in the Mpeg2 TS stream. ParseMpeg2TsFile("bear-640x360_ptswraparound.ts", 512); EXPECT_TRUE(parser_->Flush()); EXPECT_EQ(82, video_frame_count_); @@ -163,6 +172,22 @@ TEST_F(Mp2tMediaParserTest, TimestampWrapAround) { EXPECT_GT(video_max_dts_, static_cast(1) << 33); } +TEST_F(Mp2tMediaParserTest, PtsZeroDtsWrapAround) { + // "bear-640x360.ts" has been transcoded from bear-640x360.mp4 by applying a + // dts (close to 2^33 / 90000) and pts 1433 which results in dts + // wrap around in the Mpeg2 TS stream but pts does not. + ParseMpeg2TsFile("bear-640x360_ptszero_dtswraparound.ts", 512); + EXPECT_TRUE(parser_->Flush()); + EXPECT_EQ(64, video_frame_count_); + // DTS was subjected to unroll + EXPECT_LT(video_min_dts_, static_cast(1) << 33); + EXPECT_GT(video_max_dts_, static_cast(1) << 33); + // PTS was not subjected to unroll but was artificially unrolled to be close + // to DTS + EXPECT_GT(video_min_pts_, static_cast(1) << 33); + EXPECT_GT(video_max_pts_, static_cast(1) << 33); +} + } // namespace mp2t } // namespace media } // namespace shaka diff --git a/packager/media/formats/mp2t/ts_section_pes.cc b/packager/media/formats/mp2t/ts_section_pes.cc index 31f799cb11..ec42ade21c 100644 --- a/packager/media/formats/mp2t/ts_section_pes.cc +++ b/packager/media/formats/mp2t/ts_section_pes.cc @@ -5,7 +5,6 @@ #include "packager/media/formats/mp2t/ts_section_pes.h" #include "packager/base/logging.h" -#include "packager/base/strings/string_number_conversions.h" #include "packager/media/base/bit_reader.h" #include "packager/media/base/timestamp.h" #include "packager/media/formats/mp2t/es_parser.h" @@ -193,7 +192,7 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { RCHECK(bit_reader.ReadBits(16, &pes_packet_length)); RCHECK(packet_start_code_prefix == kPesStartCode); - DVLOG(LOG_LEVEL_PES) << "stream_id=" << std::hex << stream_id << std::dec; + DVLOG(LOG_LEVEL_PES) << "stream_id=" << stream_id; if (pes_packet_length == 0) pes_packet_length = static_cast(bit_reader.bits_available()) / 8; @@ -273,14 +272,6 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { // Convert and unroll the timestamps. int64_t media_pts(kNoTimestamp); int64_t media_dts(kNoTimestamp); - if (is_pts_valid) { - int64_t pts = ConvertTimestampSectionToTimestamp(pts_section); - if (previous_pts_valid_) - pts = UnrollTimestamp(previous_pts_, pts); - previous_pts_ = pts; - previous_pts_valid_ = true; - media_pts = pts; - } if (is_dts_valid) { int64_t dts = ConvertTimestampSectionToTimestamp(dts_section); if (previous_dts_valid_) @@ -289,6 +280,19 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { previous_dts_valid_ = true; media_dts = dts; } + if (is_pts_valid) { + int64_t pts = ConvertTimestampSectionToTimestamp(pts_section); + if (previous_pts_valid_) { + pts = UnrollTimestamp(previous_pts_, pts); + } else { + if (media_dts != kNoTimestamp) { + pts = UnrollTimestamp(media_dts, pts); + } + } + previous_pts_ = pts; + previous_pts_valid_ = true; + media_pts = pts; + } // Discard the rest of the PES packet header. DCHECK_EQ(bit_reader.bits_available() % 8, 0u); @@ -299,12 +303,10 @@ bool TsSectionPes::ParseInternal(const uint8_t* raw_pes, int raw_pes_size) { RCHECK(pes_header_remaining_size >= 0); // Read the PES packet. - DVLOG(LOG_LEVEL_PES) - << "Emit a reassembled PES:" - << " size=" << es_size - << " pts=" << media_pts - << " dts=" << media_dts - << " data_alignment_indicator=" << data_alignment_indicator; + DVLOG(LOG_LEVEL_PES) << "Emit a reassembled PES:" + << " size=" << es_size << " pts=" << media_pts + << " dts=" << media_dts << " data_alignment_indicator=" + << data_alignment_indicator; return es_parser_->Parse(&raw_pes[es_offset], es_size, media_pts, media_dts); } diff --git a/packager/media/test/data/bear-640x360_ptszero_dtswraparound.ts b/packager/media/test/data/bear-640x360_ptszero_dtswraparound.ts new file mode 100644 index 0000000000..8d633b5700 Binary files /dev/null and b/packager/media/test/data/bear-640x360_ptszero_dtswraparound.ts differ