diff --git a/packager/media/codecs/nalu_reader.cc b/packager/media/codecs/nalu_reader.cc index a8aa7dc5cf..b93f2314a1 100644 --- a/packager/media/codecs/nalu_reader.cc +++ b/packager/media/codecs/nalu_reader.cc @@ -21,15 +21,7 @@ inline bool IsStartCode(const uint8_t* data) { } } // namespace -Nalu::Nalu() - : data_(nullptr), - header_size_(0), - payload_size_(0), - ref_idc_(0), - nuh_layer_id_(0), - nuh_temporal_id_(0), - type_(0), - is_video_slice_(false) {} +Nalu::Nalu() = default; bool Nalu::Initialize(CodecType type, const uint8_t* data, @@ -68,8 +60,8 @@ bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) { << ")."; return false; } else if (type_ == Nalu::H264_IDRSlice || type_ == Nalu::H264_SPS || - type_ == Nalu::H264_SPSExtension || type_ == Nalu::H264_SubsetSPS || - type_ == Nalu::H264_PPS) { + type_ == Nalu::H264_SPSExtension || + type_ == Nalu::H264_SubsetSPS || type_ == Nalu::H264_PPS) { if (ref_idc_ == 0) { LOG(WARNING) << "nal_ref_idc shall not be equal to 0 for nalu type " << type_ << " (header 0x" << std::hex @@ -86,6 +78,7 @@ bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) { } } + is_aud_ = type_ == H264_AUD; is_video_slice_ = (type_ >= Nalu::H264_NonIDRSlice && type_ <= Nalu::H264_IDRSlice); can_start_access_unit_ = @@ -153,6 +146,7 @@ bool Nalu::InitializeFromH265(const uint8_t* data, uint64_t size) { } } + is_aud_ = type_ == H265_AUD; is_video_slice_ = type_ >= Nalu::H265_TRAIL_N && type_ <= Nalu::H265_CRA_NUT; can_start_access_unit_ = nuh_layer_id_ == 0 && diff --git a/packager/media/codecs/nalu_reader.h b/packager/media/codecs/nalu_reader.h index 78f9fb3789..552977d0db 100644 --- a/packager/media/codecs/nalu_reader.h +++ b/packager/media/codecs/nalu_reader.h @@ -110,6 +110,7 @@ class Nalu { /// H264NaluType and H265NaluType enums may be used to compare against the /// return value. int type() const { return type_; } + bool is_aud() const { return is_aud_; } bool is_video_slice() const { return is_video_slice_; } bool can_start_access_unit() const { return can_start_access_unit_; } @@ -119,19 +120,20 @@ class Nalu { // A pointer to the NALU (i.e. points to the header). This pointer is not // owned by this instance. - const uint8_t* data_; + const uint8_t* data_ = nullptr; // NALU header size (e.g. 1 byte for H.264). Note that it does not include // header extension data in some NAL units. - uint64_t header_size_; + uint64_t header_size_ = 0; // Size of data after the header. - uint64_t payload_size_; + uint64_t payload_size_ = 0; - int ref_idc_; - int nuh_layer_id_; - int nuh_temporal_id_; - int type_; - bool is_video_slice_; - bool can_start_access_unit_; + int ref_idc_ = 0; + int nuh_layer_id_ = 0; + int nuh_temporal_id_ = 0; + int type_ = 0; + bool is_aud_ = false; + bool is_video_slice_ = false; + bool can_start_access_unit_ = false; // Don't use DISALLOW_COPY_AND_ASSIGN since it is just numbers and a pointer // it does not own. This allows Nalus to be stored in a vector. diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index c6123fd6b5..ccc8da4493 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -42,8 +42,8 @@ void EsParserH264::Reset() { } bool EsParserH264::ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) { + VideoSliceInfo* video_slice_info) { + video_slice_info->valid = false; switch (nalu.type()) { case Nalu::H264_AUD: { DVLOG(LOG_LEVEL_ES) << "Nalu: AUD"; @@ -71,7 +71,7 @@ bool EsParserH264::ProcessNalu(const Nalu& nalu, } case Nalu::H264_IDRSlice: case Nalu::H264_NonIDRSlice: { - *is_key_frame = (nalu.type() == Nalu::H264_IDRSlice); + const bool is_key_frame = (nalu.type() == Nalu::H264_IDRSlice); DVLOG(LOG_LEVEL_ES) << "Nalu: slice IDR=" << is_key_frame; H264SliceHeader shdr; if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { @@ -80,7 +80,10 @@ bool EsParserH264::ProcessNalu(const Nalu& nalu, if (last_video_decoder_config_) return false; } else { - *pps_id_for_access_unit = shdr.pic_parameter_set_id; + video_slice_info->valid = true; + video_slice_info->is_key_frame = is_key_frame; + video_slice_info->frame_num = shdr.frame_num; + video_slice_info->pps_id = shdr.pic_parameter_set_id; } break; } diff --git a/packager/media/formats/mp2t/es_parser_h264.h b/packager/media/formats/mp2t/es_parser_h264.h index b8478f4b16..03fff73486 100644 --- a/packager/media/formats/mp2t/es_parser_h264.h +++ b/packager/media/formats/mp2t/es_parser_h264.h @@ -33,11 +33,8 @@ class EsParserH264 : public EsParserH26x { void Reset() override; private: - // Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit - // value will be passed to UpdateVideoDecoderConfig. - bool ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) override; + // Processes a NAL unit found in ParseInternal. + bool ProcessNalu(const Nalu& nalu, VideoSliceInfo* video_slice_info) override; // Update the video decoder config based on an H264 SPS. // Return true if successful. diff --git a/packager/media/formats/mp2t/es_parser_h265.cc b/packager/media/formats/mp2t/es_parser_h265.cc index 7383a34441..6318700fc2 100644 --- a/packager/media/formats/mp2t/es_parser_h265.cc +++ b/packager/media/formats/mp2t/es_parser_h265.cc @@ -45,8 +45,8 @@ void EsParserH265::Reset() { } bool EsParserH265::ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) { + VideoSliceInfo* video_slice_info) { + video_slice_info->valid = false; switch (nalu.type()) { case Nalu::H265_AUD: { DVLOG(LOG_LEVEL_ES) << "Nalu: AUD"; @@ -73,9 +73,9 @@ bool EsParserH265::ProcessNalu(const Nalu& nalu, break; } default: { - if (nalu.is_video_slice()) { - *is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || - nalu.type() == Nalu::H265_IDR_N_LP; + if (nalu.is_video_slice() && nalu.nuh_layer_id() == 0) { + const bool is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || + nalu.type() == Nalu::H265_IDR_N_LP; DVLOG(LOG_LEVEL_ES) << "Nalu: slice KeyFrame=" << is_key_frame; H265SliceHeader shdr; if (h265_parser_->ParseSliceHeader(nalu, &shdr) != H265Parser::kOk) { @@ -84,7 +84,10 @@ bool EsParserH265::ProcessNalu(const Nalu& nalu, if (last_video_decoder_config_) return false; } else { - *pps_id_for_access_unit = shdr.pic_parameter_set_id; + video_slice_info->valid = true; + video_slice_info->is_key_frame = is_key_frame; + video_slice_info->frame_num = 0; // frame_num is only for H264. + video_slice_info->pps_id = shdr.pic_parameter_set_id; } } else { DVLOG(LOG_LEVEL_ES) << "Nalu: " << nalu.type(); diff --git a/packager/media/formats/mp2t/es_parser_h265.h b/packager/media/formats/mp2t/es_parser_h265.h index a639d32e18..18b972e333 100644 --- a/packager/media/formats/mp2t/es_parser_h265.h +++ b/packager/media/formats/mp2t/es_parser_h265.h @@ -35,11 +35,8 @@ class EsParserH265 : public EsParserH26x { void Reset() override; private: - // Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit - // value will be passed to UpdateVideoDecoderConfig. - bool ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) override; + // Processes a NAL unit found in ParseInternal. + bool ProcessNalu(const Nalu& nalu, VideoSliceInfo* video_slice_info) override; // Update the video decoder config based on an H264 SPS. // Return true if successful. diff --git a/packager/media/formats/mp2t/es_parser_h26x.cc b/packager/media/formats/mp2t/es_parser_h26x.cc index f607d847f0..eab393fb81 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.cc +++ b/packager/media/formats/mp2t/es_parser_h26x.cc @@ -36,10 +36,7 @@ EsParserH26x::EsParserH26x( emit_sample_cb_(emit_sample_cb), type_(type), es_queue_(new media::OffsetByteQueue()), - current_search_position_(0), - stream_converter_(std::move(stream_converter)), - pending_sample_duration_(0), - waiting_for_key_frame_(true) {} + stream_converter_(std::move(stream_converter)) {} EsParserH26x::~EsParserH26x() {} @@ -69,43 +66,29 @@ bool EsParserH26x::Parse(const uint8_t* buf, // Add the incoming bytes to the ES queue. es_queue_->Push(buf, size); - - // We should always have entries in the vector and it should always start - // with |can_start_access_unit == true|. If not, we are just starting and - // should skip to the first access unit. - if (access_unit_nalus_.empty()) { - if (!SkipToFirstAccessUnit()) - return true; - } - DCHECK(!access_unit_nalus_.empty()); - DCHECK(access_unit_nalus_.front().nalu.can_start_access_unit()); - return ParseInternal(); } void EsParserH26x::Flush() { DVLOG(1) << "EsParserH26x::Flush"; - // Simulate an additional AUD to force emitting the last access unit + // Simulate two additional AUDs to force emitting the last access unit // which is assumed to be complete at this point. + // Two AUDs are needed because the exact size of a NAL unit can only be + // determined after seeing the next NAL unit, so we need a second AUD to + // finish the parsing of the first AUD. if (type_ == Nalu::kH264) { - const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09}; + const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09}; es_queue_->Push(aud, sizeof(aud)); } else { DCHECK_EQ(Nalu::kH265, type_); - const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01}; + const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01, + 0x00, 0x00, 0x01, 0x46, 0x01}; es_queue_->Push(aud, sizeof(aud)); } CHECK(ParseInternal()); - // Note that the end argument is exclusive. We do not want to include the - // fake AUD we just added, so the argument should point to the AUD. - if (access_unit_nalus_.size() > 1 && - !ProcessAccessUnit(access_unit_nalus_.end() - 1)) { - LOG(WARNING) << "Error processing last access unit."; - } - if (pending_sample_) { // Flush pending sample. DCHECK(pending_sample_duration_); @@ -118,28 +101,18 @@ void EsParserH26x::Flush() { void EsParserH26x::Reset() { es_queue_.reset(new media::OffsetByteQueue()); current_search_position_ = 0; - access_unit_nalus_.clear(); + current_access_unit_position_ = 0; + current_video_slice_info_.valid = false; + next_access_unit_position_set_ = false; + next_access_unit_position_ = 0; + current_nalu_info_.reset(); timing_desc_list_.clear(); pending_sample_ = scoped_refptr(); pending_sample_duration_ = 0; waiting_for_key_frame_ = true; } -bool EsParserH26x::SkipToFirstAccessUnit() { - DCHECK(access_unit_nalus_.empty()); - while (access_unit_nalus_.empty()) { - if (!SearchForNextNalu()) - return false; - - // If we can't start an access unit, remove it and continue. - DCHECK_EQ(1u, access_unit_nalus_.size()); - if (!access_unit_nalus_.back().nalu.can_start_access_unit()) - access_unit_nalus_.clear(); - } - return true; -} - -bool EsParserH26x::SearchForNextNalu() { +bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) { const uint8_t* es; int es_size; es_queue_->PeekAt(current_search_position_, &es, &es_size); @@ -158,7 +131,7 @@ bool EsParserH26x::SearchForNextNalu() { } // Ensure the next NAL unit is a real NAL unit. - const uint8_t* nalu_ptr = es + start_code_offset + start_code_size; + const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size; // This size is likely inaccurate, this is just to get the header info. const int64_t next_nalu_size = es_size - start_code_offset - start_code_size; if (next_nalu_size < @@ -167,106 +140,129 @@ bool EsParserH26x::SearchForNextNalu() { return false; } - Nalu next_nalu; - if (!next_nalu.Initialize(type_, nalu_ptr, next_nalu_size)) { - // The next NAL unit is invalid, skip it and search again. - current_search_position_ += start_code_offset + start_code_size; - return SearchForNextNalu(); - } - + // Update search position for next nalu. current_search_position_ += start_code_offset + start_code_size; - NaluInfo info; - info.position = current_search_position_ - start_code_size; - info.start_code_size = start_code_size; - info.nalu = next_nalu; - access_unit_nalus_.push_back(info); - - return true; -} - -bool EsParserH26x::ProcessAccessUnit(std::deque::iterator end) { - DCHECK(end < access_unit_nalus_.end()); - auto begin = access_unit_nalus_.begin(); - const uint8_t* es; - int es_size; - es_queue_->PeekAt(begin->position, &es, &es_size); - DCHECK_GE(static_cast(es_size), (end->position - begin->position)); - - // Process the NAL units in the access unit. - bool is_key_frame = false; - int pps_id = -1; - for (auto it = begin; it != end; ++it) { - if (it->nalu.nuh_layer_id() == 0) { - // Update the NALU because the data pointer may have been invalidated. - CHECK(it->nalu.Initialize( - type_, es + (it->position - begin->position) + it->start_code_size, - ((it+1)->position - it->position) - it->start_code_size)); - if (!ProcessNalu(it->nalu, &is_key_frame, &pps_id)) - return false; - } + // |next_nalu_info_| is made global intentionally to avoid repetitive memory + // allocation which could create memory fragments. + if (!next_nalu_info_) + next_nalu_info_.reset(new NaluInfo); + if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) { + // This NAL unit is invalid, skip it and search again. + return SearchForNalu(position, nalu); } + next_nalu_info_->position = current_search_position_ - start_code_size; + next_nalu_info_->start_code_size = start_code_size; - if (is_key_frame) - waiting_for_key_frame_ = false; - if (!waiting_for_key_frame_) { - const uint64_t access_unit_size = end->position - begin->position; - RCHECK(EmitFrame(begin->position, access_unit_size, is_key_frame, pps_id)); + const bool current_nalu_set = current_nalu_info_ ? true : false; + if (current_nalu_info_) { + // Starting position for the nalu including start code. + *position = current_nalu_info_->position; + // Update the NALU because the data pointer may have been invalidated. + const uint8_t* current_nalu_ptr = + next_nalu_ptr + + (current_nalu_info_->position + current_nalu_info_->start_code_size) - + current_search_position_; + const uint64_t current_nalu_size = next_nalu_info_->position - + current_nalu_info_->position - + current_nalu_info_->start_code_size; + CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size)); } - - return true; + current_nalu_info_.swap(next_nalu_info_); + return current_nalu_set ? true : SearchForNalu(position, nalu); } bool EsParserH26x::ParseInternal() { - while (true) { - if (!SearchForNextNalu()) - return true; - + uint64_t position; + Nalu nalu; + VideoSliceInfo video_slice_info; + while (SearchForNalu(&position, &nalu)) { // ITU H.264 sec. 7.4.1.2.3 // H264: The first of the NAL units with |can_start_access_unit() == true| // after the last VCL NAL unit of a primary coded picture specifies the - // start of a new access unit. |nuh_layer_id()| is for H265 only; it is - // included below for ease of computation (the value is always 0). + // start of a new access unit. // ITU H.265 sec. 7.4.2.4.4 // H265: The first of the NAL units with |can_start_access_unit() == true| // after the last VCL NAL unit preceding firstBlPicNalUnit (the first // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if // any, specifies the start of a new access unit. - DCHECK(!access_unit_nalus_.empty()); - if (!access_unit_nalus_.back().nalu.is_video_slice() || - access_unit_nalus_.back().nalu.nuh_layer_id() != 0) { + if (nalu.can_start_access_unit()) { + if (!next_access_unit_position_set_) { + next_access_unit_position_set_ = true; + next_access_unit_position_ = position; + } + RCHECK(ProcessNalu(nalu, &video_slice_info)); + if (nalu.is_video_slice() && !video_slice_info.valid) { + // This could happen only if decoder config is not available yet. Drop + // this frame. + DCHECK(!current_video_slice_info_.valid); + next_access_unit_position_set_ = false; + continue; + } + } else if (nalu.is_video_slice()) { + // This isn't the first VCL NAL unit. Next access unit should start after + // this NAL unit. + next_access_unit_position_set_ = false; continue; } - // First, find the end of the access unit. Search backward to find the - // first VCL NALU before the current one. - auto access_unit_end_rit = access_unit_nalus_.rbegin(); - bool found_vcl = false; - for (auto rit = access_unit_nalus_.rbegin() + 1; - rit != access_unit_nalus_.rend(); ++rit) { - if (rit->nalu.is_video_slice()) { - found_vcl = true; - break; - } else if (rit->nalu.can_start_access_unit()) { - // The start of the next access unit is the first unit with - // |can_start_access_unit| after the previous VCL unit. - access_unit_end_rit = rit; + // AUD shall be the first NAL unit if present. There shall be at most one + // AUD in any access unit. We can emit the current access unit which shall + // not contain the AUD. + if (nalu.is_aud()) + return EmitCurrentAccessUnit(); + + // We can only determine if the current access unit ends after seeing + // another VCL NAL unit. + if (!video_slice_info.valid) + continue; + + // Check if it is the first VCL NAL unit of a primary coded picture. It is + // always true for H265 as nuh_layer_id shall be == 0 at this point. + bool is_first_vcl_nalu = true; + if (type_ == Nalu::kH264) { + if (current_video_slice_info_.valid) { + // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a + // primary coded picture. Only pps_id and frame_num are checked here. + is_first_vcl_nalu = + video_slice_info.frame_num != current_video_slice_info_.frame_num || + video_slice_info.pps_id != current_video_slice_info_.pps_id; } } - if (!found_vcl) - return true; + if (!is_first_vcl_nalu) { + // This isn't the first VCL NAL unit. Next access unit should start after + // this NAL unit. + next_access_unit_position_set_ = false; + continue; + } - // Get a forward iterator that corresponds to the same element pointed by - // |access_unit_end_rit|. Note: |end| refers to the exclusive end and - // will point to a valid object. - auto end = (access_unit_end_rit + 1).base(); - if (!ProcessAccessUnit(end)) - return false; + DCHECK(next_access_unit_position_set_); + RCHECK(EmitCurrentAccessUnit()); // Delete the data we have already processed. - es_queue_->Trim(end->position); - access_unit_nalus_.erase(access_unit_nalus_.begin(), end); + es_queue_->Trim(next_access_unit_position_); + + current_access_unit_position_ = next_access_unit_position_; + current_video_slice_info_ = video_slice_info; + next_access_unit_position_set_ = false; } + return true; +} + +bool EsParserH26x::EmitCurrentAccessUnit() { + if (current_video_slice_info_.valid) { + if (current_video_slice_info_.is_key_frame) + waiting_for_key_frame_ = false; + if (!waiting_for_key_frame_) { + RCHECK( + EmitFrame(current_access_unit_position_, + next_access_unit_position_ - current_access_unit_position_, + current_video_slice_info_.is_key_frame, + current_video_slice_info_.pps_id)); + } + current_video_slice_info_.valid = false; + } + return true; } bool EsParserH26x::EmitFrame(int64_t access_unit_pos, diff --git a/packager/media/formats/mp2t/es_parser_h26x.h b/packager/media/formats/mp2t/es_parser_h26x.h index 312746830b..e5bcb82c07 100644 --- a/packager/media/formats/mp2t/es_parser_h26x.h +++ b/packager/media/formats/mp2t/es_parser_h26x.h @@ -39,6 +39,15 @@ class EsParserH26x : public EsParser { void Reset() override; protected: + struct VideoSliceInfo { + bool valid = false; + bool is_key_frame = false; + // Both pps_id and frame_num are extracted from slice header (frame_num is + // only for H.264). + int pps_id = 0; + int frame_num = 0; + }; + const H26xByteToUnitStreamConverter* stream_converter() const { return stream_converter_.get(); } @@ -54,37 +63,33 @@ class EsParserH26x : public EsParser { Nalu nalu; // The offset of the NALU from the beginning of the stream, usable as an // argument to OffsetByteQueue. This points to the start code. - uint64_t position; - uint8_t start_code_size; + uint64_t position = 0; + uint8_t start_code_size = 0; }; - // Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit - // value will be passed to UpdateVideoDecoderConfig. + // Processes a NAL unit found in ParseInternal. |video_slice_info| should not + // be null, it will contain the video slice info if it is a video slice nalu + // and it is processed successfully; otherwise the |valid| member will be set + // to false with other members untouched. virtual bool ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) = 0; + VideoSliceInfo* video_slice_info) = 0; // Update the video decoder config. // Return true if successful. virtual bool UpdateVideoDecoderConfig(int pps_id) = 0; - // Skips to the first access unit available. Returns whether an access unit - // is found. - bool SkipToFirstAccessUnit(); - - // Finds the next NAL unit by finding the next start code. This will modify - // the search position. - // Returns true when it has found the next NALU. - bool SearchForNextNalu(); - - // Process an access unit that spans the given NAL units (end is exclusive - // and should point to a valid object). - bool ProcessAccessUnit(std::deque::iterator end); + // Finds the NAL unit by finding the next start code. This will modify the + // search position. + // Returns true when it has found the NALU. + bool SearchForNalu(uint64_t* position, Nalu* nalu); // Resumes the H26x ES parsing. // Return true if successful. bool ParseInternal(); + // Emit the current access unit if exists. + bool EmitCurrentAccessUnit(); + // Emit a frame whose position in the ES queue starts at |access_unit_pos|. // Returns true if successful, false if no PTS is available for the frame. bool EmitFrame(int64_t access_unit_pos, @@ -104,21 +109,28 @@ class EsParserH26x : public EsParser { // Parser state. // The position of the search head. - uint64_t current_search_position_; - // The NALU that make up the current access unit. This may include elements - // from the next access unit. The last item is the NAL unit currently - // being processed. - std::deque access_unit_nalus_; + uint64_t current_search_position_ = 0; + // Current access unit starting position. + uint64_t current_access_unit_position_ = 0; + // The VideoSliceInfo in the current access unit, useful for first vcl nalu + // detection (for H.264). + VideoSliceInfo current_video_slice_info_; + bool next_access_unit_position_set_ = false; + uint64_t next_access_unit_position_ = 0; + // Current nalu information. + std::unique_ptr current_nalu_info_; + // This is really a temporary storage for the next nalu information. + std::unique_ptr next_nalu_info_; // Filter to convert H.264/H.265 Annex B byte stream to unit stream. std::unique_ptr stream_converter_; // Frame for which we do not yet have a duration. scoped_refptr pending_sample_; - uint64_t pending_sample_duration_; + uint64_t pending_sample_duration_ = 0; // Indicates whether waiting for first key frame. - bool waiting_for_key_frame_; + bool waiting_for_key_frame_ = true; }; } // namespace mp2t diff --git a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc index 5fe80680da..761cfd1e81 100644 --- a/packager/media/formats/mp2t/es_parser_h26x_unittest.cc +++ b/packager/media/formats/mp2t/es_parser_h26x_unittest.cc @@ -22,26 +22,43 @@ namespace mp2t { namespace { +const int kH264RefIdc = 1 << 5; + // NAL unit types used for testing. -enum H265NaluType { - kAud = Nalu::H265_AUD, - kSps = Nalu::H265_SPS, - kSei = Nalu::H265_PREFIX_SEI, +enum H26xNaluType { + kH264Aud = Nalu::H264_AUD, + kH264Sps = Nalu::H264_SPS | kH264RefIdc, + kH264Sei = Nalu::H264_SEIMessage, // Something with |can_start_access_unit() == false|. - kRsv = Nalu::H265_FD, + kH264Rsv = Nalu::H264_FillerData, // Non-key-frame video slice. - kVcl = Nalu::H265_TRAIL_N, - kVclKeyFrame = Nalu::H265_IDR_W_RADL, - // Needs to be different than |kVCL| so we can tell the difference. - kVclWithNuhLayer = Nalu::H265_TRAIL_R, + kH264Vcl = Nalu::H264_NonIDRSlice, + // For testing purpose, the first 2 bits contains the frame num. + kH264VclFrame0 = Nalu::H264_NonIDRSlice | (0 << 6), + kH264VclFrame1 = Nalu::H264_NonIDRSlice | (1 << 6), + kH264VclFrame2 = Nalu::H264_NonIDRSlice | (2 << 6), + kH264VclFrame3 = Nalu::H264_NonIDRSlice | (3 << 6), + kH264VclKeyFrame = Nalu::H264_IDRSlice | kH264RefIdc, + + kH265Aud = Nalu::H265_AUD, + kH265Sps = Nalu::H265_SPS, + kH265Sei = Nalu::H265_PREFIX_SEI, + // Something with |can_start_access_unit() == false|. + kH265Rsv = Nalu::H265_FD, + // Non-key-frame video slice. + kH265Vcl = Nalu::H265_TRAIL_N, + kH265VclKeyFrame = Nalu::H265_IDR_W_RADL, + // Needs to be different than |kH265VCL| so we can tell the difference. + kH265VclWithNuhLayer = Nalu::H265_TRAIL_R, + // Used to separate expected access units. kSeparator = 0xff, }; class FakeByteToUnitStreamConverter : public H26xByteToUnitStreamConverter { public: - FakeByteToUnitStreamConverter() - : H26xByteToUnitStreamConverter(Nalu::kH265) {} + explicit FakeByteToUnitStreamConverter(Nalu::CodecType codec_type) + : H26xByteToUnitStreamConverter(codec_type) {} bool GetDecoderConfigurationRecord( std::vector* decoder_config) const override { @@ -55,30 +72,40 @@ class FakeByteToUnitStreamConverter : public H26xByteToUnitStreamConverter { }; // This is the code-under-test. This implements the required abstract methods -// to ignore the contents of the NAL units. This behaves the same as the -// H.264 and H.265 types. +// to ignore the contents of the NAL units. class TestableEsParser : public EsParserH26x { public: - TestableEsParser(const NewStreamInfoCB& new_stream_info_cb, + TestableEsParser(Nalu::CodecType codec_type, + const NewStreamInfoCB& new_stream_info_cb, const EmitSampleCB& emit_sample_cb) - : EsParserH26x(Nalu::kH265, + : EsParserH26x(codec_type, std::unique_ptr( - new FakeByteToUnitStreamConverter()), + new FakeByteToUnitStreamConverter(codec_type)), 0, emit_sample_cb), + codec_type_(codec_type), new_stream_info_cb_(new_stream_info_cb), decoder_config_check_pending_(false) {} bool ProcessNalu(const Nalu& nalu, - bool* is_key_frame, - int* pps_id_for_access_unit) override { - if (nalu.type() == Nalu::H265_SPS) { + VideoSliceInfo* video_slice_info) override { + if (codec_type_ == Nalu::kH264 ? (nalu.type() == Nalu::H264_SPS) + : (nalu.type() == Nalu::H265_SPS)) { + video_slice_info->valid = false; decoder_config_check_pending_ = true; } else if (nalu.is_video_slice()) { - // This should be the same as EsParserH265::ProcessNalu. - *is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || - nalu.type() == Nalu::H265_IDR_N_LP; - *pps_id_for_access_unit = kTestPpsId; + video_slice_info->valid = true; + // This should be the same as EsParserH26x::ProcessNalu. + if (codec_type_ == Nalu::kH264) { + video_slice_info->is_key_frame = nalu.type() == Nalu::H264_IDRSlice; + } else { + video_slice_info->is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || + nalu.type() == Nalu::H265_IDR_N_LP; + } + video_slice_info->pps_id = kTestPpsId; + // for testing purpose, the frame_num is coded in the first byte of + // payload. + video_slice_info->frame_num = nalu.data()[nalu.header_size()]; } return true; } @@ -95,19 +122,31 @@ class TestableEsParser : public EsParserH26x { private: const int kTestPpsId = 123; + Nalu::CodecType codec_type_; NewStreamInfoCB new_stream_info_cb_; bool decoder_config_check_pending_; }; -std::vector CreateNalu(H265NaluType type, int i) { +std::vector CreateNalu(Nalu::CodecType codec_type, + H26xNaluType type, + int i) { std::vector ret; - ret.resize(4); - ret[0] = (type << 1); - // nuh_layer_id == 1, nuh_temporal_id_plus1 == 1 - ret[1] = (type == kVclWithNuhLayer ? 9 : 1); - // Add some extra data to tell consecutive frames apart. - ret[2] = 0xff; - ret[3] = i + 1; + if (codec_type == Nalu::kH264) { + ret.resize(3); + // For testing purpose, the first 2 bits contains the frame num and encoded + // in the first byte of the payload. + ret[0] = (type & 0x3f); + ret[1] = (type >> 6); + ret[2] = i + 1; + } else { + ret.resize(4); + ret[0] = (type << 1); + // nuh_layer_id == 1, nuh_temporal_id_plus1 == 1 + ret[1] = (type == kH265VclWithNuhLayer ? 9 : 1); + // Add some extra data to tell consecutive frames apart. + ret[2] = 0xff; + ret[3] = i + 1; + } return ret; } @@ -120,7 +159,9 @@ class EsParserH26xTest : public testing::Test { // Runs a test by constructing NAL units of the given types and passing them // to the parser. Access units should be separated by |kSeparator|, there // should be one at the start and not at the end. - void RunTest(const H265NaluType* types, size_t types_count); + void RunTest(Nalu::CodecType codec_type, + const H26xNaluType* types, + size_t types_count); void EmitSample(uint32_t pid, const scoped_refptr& sample) { size_t sample_id = sample_count_; @@ -144,13 +185,15 @@ class EsParserH26xTest : public testing::Test { bool has_stream_info_; }; -void EsParserH26xTest::RunTest(const H265NaluType* types, +void EsParserH26xTest::RunTest(Nalu::CodecType codec_type, + const H26xNaluType* types, size_t types_count) { // Duration of one 25fps video frame in 90KHz clock units. const uint32_t kMpegTicksPerFrame = 3600; const uint8_t kStartCode[] = {0x00, 0x00, 0x01}; TestableEsParser es_parser( + codec_type, base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)), base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this))); @@ -164,10 +207,15 @@ void EsParserH26xTest::RunTest(const H265NaluType* types, samples_.push_back(cur_sample_data); cur_sample_data.clear(); } else { - if (types[k] == kVclKeyFrame) - seen_key_frame = true; + if (codec_type == Nalu::kH264) { + if (types[k] == kH264VclKeyFrame) + seen_key_frame = true; + } else { + if (types[k] == kH265VclKeyFrame) + seen_key_frame = true; + } - std::vector es_data = CreateNalu(types[k], k); + std::vector es_data = CreateNalu(codec_type, types[k], k); cur_sample_data.push_back(0); cur_sample_data.push_back(0); cur_sample_data.push_back(0); @@ -198,107 +246,130 @@ void EsParserH26xTest::RunTest(const H265NaluType* types, es_parser.Flush(); } -TEST_F(EsParserH26xTest, BasicSupport) { - const H265NaluType kData[] = { - kSeparator, kAud, kSps, kVclKeyFrame, - kSeparator, kAud, kVcl, - kSeparator, kAud, kVcl, +TEST_F(EsParserH26xTest, H265BasicSupport) { + const H26xNaluType kData[] = { + kSeparator, kH265Aud, kH265Sps, kH265VclKeyFrame, + kSeparator, kH265Aud, kH265Vcl, + kSeparator, kH265Aud, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_TRUE(has_stream_info_); } -TEST_F(EsParserH26xTest, DeterminesAccessUnitsWithoutAUD) { - const H265NaluType kData[] = { - kSeparator, kSps, kVclKeyFrame, - kSeparator, kVcl, - kSeparator, kVcl, - kSeparator, kSei, kVcl, +TEST_F(EsParserH26xTest, H265DeterminesAccessUnitsWithoutAUD) { + const H26xNaluType kData[] = { + kSeparator, kH265Sps, kH265VclKeyFrame, + kSeparator, kH265Vcl, + kSeparator, kH265Vcl, + kSeparator, kH265Sei, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(4u, sample_count_); EXPECT_TRUE(has_stream_info_); } -TEST_F(EsParserH26xTest, DoesNotStartOnRsv) { - const H265NaluType kData[] = { - kSeparator, kSps, kVclKeyFrame, kRsv, - kSeparator, kAud, kVcl, - kSeparator, kSei, kVcl, +TEST_F(EsParserH26xTest, H265DoesNotStartOnRsv) { + const H26xNaluType kData[] = { + kSeparator, kH265Sps, kH265VclKeyFrame, kH265Rsv, + kSeparator, kH265Aud, kH265Vcl, + kSeparator, kH265Sei, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_TRUE(has_stream_info_); } -TEST_F(EsParserH26xTest, SupportsNonZeroNuhLayerId) { - const H265NaluType kData[] = { - kSeparator, kSps, kVclKeyFrame, - kSeparator, kAud, kVcl, kSei, kSei, kVclWithNuhLayer, kRsv, - kSeparator, kSei, kVcl, - kSeparator, kAud, kVcl, kSps, kRsv, kVclWithNuhLayer, - kSeparator, kVcl, +TEST_F(EsParserH26xTest, H265SupportsNonZeroNuhLayerId) { + const H26xNaluType kData[] = { + kSeparator, kH265Sps, kH265VclKeyFrame, + kSeparator, kH265Aud, kH265Vcl, kH265Sei, kH265VclWithNuhLayer, kH265Rsv, + kSeparator, kH265Sei, kH265Vcl, + kSeparator, kH265Aud, kH265Vcl, kH265Sps, kH265Rsv, kH265VclWithNuhLayer, + kSeparator, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(5u, sample_count_); EXPECT_TRUE(has_stream_info_); } -TEST_F(EsParserH26xTest, WaitsForKeyFrame) { - const H265NaluType kData[] = { - kSeparator, kVcl, - kSeparator, kVcl, - kSeparator, kSps, kVclKeyFrame, - kSeparator, kVcl, - kSeparator, kVcl, +TEST_F(EsParserH26xTest, H265WaitsForKeyFrame) { + const H26xNaluType kData[] = { + kSeparator, kH265Vcl, + kSeparator, kH265Vcl, + kSeparator, kH265Sps, kH265VclKeyFrame, + kSeparator, kH265Vcl, + kSeparator, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_TRUE(has_stream_info_); } -TEST_F(EsParserH26xTest, EmitsFramesWithNoStreamInfo) { - const H265NaluType kData[] = { - kSeparator, kVclKeyFrame, - kSeparator, kVcl, - kSeparator, kVcl, +TEST_F(EsParserH26xTest, H265EmitsFramesWithNoStreamInfo) { + const H26xNaluType kData[] = { + kSeparator, kH265VclKeyFrame, + kSeparator, kH265Vcl, kH265Rsv, + kSeparator, kH265Sei, kH265Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_FALSE(has_stream_info_); } -TEST_F(EsParserH26xTest, EmitsLastFrameWhenDoesntEndOnVCL) { - // This tests that it will emit the last frame and last frame will include - // the correct data and nothing extra. - const H265NaluType kData[] = { - kSeparator, kVclKeyFrame, - kSeparator, kVcl, - kSeparator, kVcl, kSei, +TEST_F(EsParserH26xTest, H265EmitsLastFrameWithNuhLayerId) { + const H26xNaluType kData[] = { + kSeparator, kH265VclKeyFrame, + kSeparator, kH265Vcl, + kSeparator, kH265Vcl, kH265Sei, kH265VclWithNuhLayer, kH265Rsv, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH265, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); EXPECT_FALSE(has_stream_info_); } -TEST_F(EsParserH26xTest, EmitsLastFrameWithNuhLayerId) { - const H265NaluType kData[] = { - kSeparator, kVclKeyFrame, - kSeparator, kVcl, - kSeparator, kVcl, kVclWithNuhLayer, kSei, +TEST_F(EsParserH26xTest, H264BasicSupport) { + const H26xNaluType kData[] = { + kSeparator, kH264Aud, kH264Sps, kH264VclKeyFrame, + kSeparator, kH264Aud, kH264Vcl, + kSeparator, kH264Aud, kH264Vcl, }; - RunTest(kData, arraysize(kData)); + RunTest(Nalu::kH264, kData, arraysize(kData)); EXPECT_EQ(3u, sample_count_); - EXPECT_FALSE(has_stream_info_); + EXPECT_TRUE(has_stream_info_); +} + +TEST_F(EsParserH26xTest, H264DeterminesAccessUnitsWithoutAUD) { + const H26xNaluType kData[] = { + kSeparator, kH264Sps, kH264VclKeyFrame, + kSeparator, kH264VclFrame1, kH264VclFrame1, + kSeparator, kH264VclFrame2, kH264VclFrame2, kH264VclFrame2, + kSeparator, kH264Sei, kH264VclFrame3, + }; + + RunTest(Nalu::kH264, kData, arraysize(kData)); + EXPECT_EQ(4u, sample_count_); + EXPECT_TRUE(has_stream_info_); +} + +TEST_F(EsParserH26xTest, H264DoesNotStartOnRsv) { + const H26xNaluType kData[] = { + kSeparator, kH264Sps, kH264VclKeyFrame, kH264Rsv, + kSeparator, kH264Aud, kH264VclFrame1, + kSeparator, kH264Sei, kH264VclFrame2, + }; + + RunTest(Nalu::kH264, kData, arraysize(kData)); + EXPECT_EQ(3u, sample_count_); + EXPECT_TRUE(has_stream_info_); } } // namespace mp2t diff --git a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc index f37041a172..60a028d6fc 100644 --- a/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc +++ b/packager/media/formats/mp2t/mp2t_media_parser_unittest.cc @@ -138,7 +138,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H264) { TEST_F(Mp2tMediaParserTest, UnalignedAppend17_H265) { // Test small, non-segment-aligned appends. ParseMpeg2TsFile("bear-640x360-hevc.ts", 17); - EXPECT_EQ(79, video_frame_count_); + EXPECT_EQ(78, video_frame_count_); EXPECT_TRUE(parser_->Flush()); EXPECT_EQ(82, video_frame_count_); } @@ -146,7 +146,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend17_H265) { TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H265) { // Test small, non-segment-aligned appends. ParseMpeg2TsFile("bear-640x360-hevc.ts", 512); - EXPECT_EQ(79, video_frame_count_); + EXPECT_EQ(78, video_frame_count_); EXPECT_TRUE(parser_->Flush()); EXPECT_EQ(82, video_frame_count_); }