Fix access unit detection problem for H264

In H264, there may be multiple consecutive video slice NAL units
in the same frame. The original code assigns a new access unit
for every video slice NAL unit, which is incorrect.

Fixes #134.

Change-Id: I4d44271df48cb08867ddd02f7494fb3573af3356
This commit is contained in:
Kongqun Yang 2016-08-25 15:44:25 -07:00
parent 52cbcb321d
commit 09891dcd9e
10 changed files with 351 additions and 276 deletions

View File

@ -21,15 +21,7 @@ inline bool IsStartCode(const uint8_t* data) {
} }
} // namespace } // namespace
Nalu::Nalu() Nalu::Nalu() = default;
: data_(nullptr),
header_size_(0),
payload_size_(0),
ref_idc_(0),
nuh_layer_id_(0),
nuh_temporal_id_(0),
type_(0),
is_video_slice_(false) {}
bool Nalu::Initialize(CodecType type, bool Nalu::Initialize(CodecType type,
const uint8_t* data, const uint8_t* data,
@ -68,8 +60,8 @@ bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) {
<< ")."; << ").";
return false; return false;
} else if (type_ == Nalu::H264_IDRSlice || type_ == Nalu::H264_SPS || } else if (type_ == Nalu::H264_IDRSlice || type_ == Nalu::H264_SPS ||
type_ == Nalu::H264_SPSExtension || type_ == Nalu::H264_SubsetSPS || type_ == Nalu::H264_SPSExtension ||
type_ == Nalu::H264_PPS) { type_ == Nalu::H264_SubsetSPS || type_ == Nalu::H264_PPS) {
if (ref_idc_ == 0) { if (ref_idc_ == 0) {
LOG(WARNING) << "nal_ref_idc shall not be equal to 0 for nalu type " LOG(WARNING) << "nal_ref_idc shall not be equal to 0 for nalu type "
<< type_ << " (header 0x" << std::hex << type_ << " (header 0x" << std::hex
@ -86,6 +78,7 @@ bool Nalu::InitializeFromH264(const uint8_t* data, uint64_t size) {
} }
} }
is_aud_ = type_ == H264_AUD;
is_video_slice_ = (type_ >= Nalu::H264_NonIDRSlice && is_video_slice_ = (type_ >= Nalu::H264_NonIDRSlice &&
type_ <= Nalu::H264_IDRSlice); type_ <= Nalu::H264_IDRSlice);
can_start_access_unit_ = can_start_access_unit_ =
@ -153,6 +146,7 @@ bool Nalu::InitializeFromH265(const uint8_t* data, uint64_t size) {
} }
} }
is_aud_ = type_ == H265_AUD;
is_video_slice_ = type_ >= Nalu::H265_TRAIL_N && type_ <= Nalu::H265_CRA_NUT; is_video_slice_ = type_ >= Nalu::H265_TRAIL_N && type_ <= Nalu::H265_CRA_NUT;
can_start_access_unit_ = can_start_access_unit_ =
nuh_layer_id_ == 0 && nuh_layer_id_ == 0 &&

View File

@ -110,6 +110,7 @@ class Nalu {
/// H264NaluType and H265NaluType enums may be used to compare against the /// H264NaluType and H265NaluType enums may be used to compare against the
/// return value. /// return value.
int type() const { return type_; } int type() const { return type_; }
bool is_aud() const { return is_aud_; }
bool is_video_slice() const { return is_video_slice_; } bool is_video_slice() const { return is_video_slice_; }
bool can_start_access_unit() const { return can_start_access_unit_; } bool can_start_access_unit() const { return can_start_access_unit_; }
@ -119,19 +120,20 @@ class Nalu {
// A pointer to the NALU (i.e. points to the header). This pointer is not // A pointer to the NALU (i.e. points to the header). This pointer is not
// owned by this instance. // owned by this instance.
const uint8_t* data_; const uint8_t* data_ = nullptr;
// NALU header size (e.g. 1 byte for H.264). Note that it does not include // NALU header size (e.g. 1 byte for H.264). Note that it does not include
// header extension data in some NAL units. // header extension data in some NAL units.
uint64_t header_size_; uint64_t header_size_ = 0;
// Size of data after the header. // Size of data after the header.
uint64_t payload_size_; uint64_t payload_size_ = 0;
int ref_idc_; int ref_idc_ = 0;
int nuh_layer_id_; int nuh_layer_id_ = 0;
int nuh_temporal_id_; int nuh_temporal_id_ = 0;
int type_; int type_ = 0;
bool is_video_slice_; bool is_aud_ = false;
bool can_start_access_unit_; bool is_video_slice_ = false;
bool can_start_access_unit_ = false;
// Don't use DISALLOW_COPY_AND_ASSIGN since it is just numbers and a pointer // Don't use DISALLOW_COPY_AND_ASSIGN since it is just numbers and a pointer
// it does not own. This allows Nalus to be stored in a vector. // it does not own. This allows Nalus to be stored in a vector.

View File

@ -42,8 +42,8 @@ void EsParserH264::Reset() {
} }
bool EsParserH264::ProcessNalu(const Nalu& nalu, bool EsParserH264::ProcessNalu(const Nalu& nalu,
bool* is_key_frame, VideoSliceInfo* video_slice_info) {
int* pps_id_for_access_unit) { video_slice_info->valid = false;
switch (nalu.type()) { switch (nalu.type()) {
case Nalu::H264_AUD: { case Nalu::H264_AUD: {
DVLOG(LOG_LEVEL_ES) << "Nalu: AUD"; DVLOG(LOG_LEVEL_ES) << "Nalu: AUD";
@ -71,7 +71,7 @@ bool EsParserH264::ProcessNalu(const Nalu& nalu,
} }
case Nalu::H264_IDRSlice: case Nalu::H264_IDRSlice:
case Nalu::H264_NonIDRSlice: { case Nalu::H264_NonIDRSlice: {
*is_key_frame = (nalu.type() == Nalu::H264_IDRSlice); const bool is_key_frame = (nalu.type() == Nalu::H264_IDRSlice);
DVLOG(LOG_LEVEL_ES) << "Nalu: slice IDR=" << is_key_frame; DVLOG(LOG_LEVEL_ES) << "Nalu: slice IDR=" << is_key_frame;
H264SliceHeader shdr; H264SliceHeader shdr;
if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) {
@ -80,7 +80,10 @@ bool EsParserH264::ProcessNalu(const Nalu& nalu,
if (last_video_decoder_config_) if (last_video_decoder_config_)
return false; return false;
} else { } else {
*pps_id_for_access_unit = shdr.pic_parameter_set_id; video_slice_info->valid = true;
video_slice_info->is_key_frame = is_key_frame;
video_slice_info->frame_num = shdr.frame_num;
video_slice_info->pps_id = shdr.pic_parameter_set_id;
} }
break; break;
} }

View File

@ -33,11 +33,8 @@ class EsParserH264 : public EsParserH26x {
void Reset() override; void Reset() override;
private: private:
// Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit // Processes a NAL unit found in ParseInternal.
// value will be passed to UpdateVideoDecoderConfig. bool ProcessNalu(const Nalu& nalu, VideoSliceInfo* video_slice_info) override;
bool ProcessNalu(const Nalu& nalu,
bool* is_key_frame,
int* pps_id_for_access_unit) override;
// Update the video decoder config based on an H264 SPS. // Update the video decoder config based on an H264 SPS.
// Return true if successful. // Return true if successful.

View File

@ -45,8 +45,8 @@ void EsParserH265::Reset() {
} }
bool EsParserH265::ProcessNalu(const Nalu& nalu, bool EsParserH265::ProcessNalu(const Nalu& nalu,
bool* is_key_frame, VideoSliceInfo* video_slice_info) {
int* pps_id_for_access_unit) { video_slice_info->valid = false;
switch (nalu.type()) { switch (nalu.type()) {
case Nalu::H265_AUD: { case Nalu::H265_AUD: {
DVLOG(LOG_LEVEL_ES) << "Nalu: AUD"; DVLOG(LOG_LEVEL_ES) << "Nalu: AUD";
@ -73,9 +73,9 @@ bool EsParserH265::ProcessNalu(const Nalu& nalu,
break; break;
} }
default: { default: {
if (nalu.is_video_slice()) { if (nalu.is_video_slice() && nalu.nuh_layer_id() == 0) {
*is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || const bool is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL ||
nalu.type() == Nalu::H265_IDR_N_LP; nalu.type() == Nalu::H265_IDR_N_LP;
DVLOG(LOG_LEVEL_ES) << "Nalu: slice KeyFrame=" << is_key_frame; DVLOG(LOG_LEVEL_ES) << "Nalu: slice KeyFrame=" << is_key_frame;
H265SliceHeader shdr; H265SliceHeader shdr;
if (h265_parser_->ParseSliceHeader(nalu, &shdr) != H265Parser::kOk) { if (h265_parser_->ParseSliceHeader(nalu, &shdr) != H265Parser::kOk) {
@ -84,7 +84,10 @@ bool EsParserH265::ProcessNalu(const Nalu& nalu,
if (last_video_decoder_config_) if (last_video_decoder_config_)
return false; return false;
} else { } else {
*pps_id_for_access_unit = shdr.pic_parameter_set_id; video_slice_info->valid = true;
video_slice_info->is_key_frame = is_key_frame;
video_slice_info->frame_num = 0; // frame_num is only for H264.
video_slice_info->pps_id = shdr.pic_parameter_set_id;
} }
} else { } else {
DVLOG(LOG_LEVEL_ES) << "Nalu: " << nalu.type(); DVLOG(LOG_LEVEL_ES) << "Nalu: " << nalu.type();

View File

@ -35,11 +35,8 @@ class EsParserH265 : public EsParserH26x {
void Reset() override; void Reset() override;
private: private:
// Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit // Processes a NAL unit found in ParseInternal.
// value will be passed to UpdateVideoDecoderConfig. bool ProcessNalu(const Nalu& nalu, VideoSliceInfo* video_slice_info) override;
bool ProcessNalu(const Nalu& nalu,
bool* is_key_frame,
int* pps_id_for_access_unit) override;
// Update the video decoder config based on an H264 SPS. // Update the video decoder config based on an H264 SPS.
// Return true if successful. // Return true if successful.

View File

@ -36,10 +36,7 @@ EsParserH26x::EsParserH26x(
emit_sample_cb_(emit_sample_cb), emit_sample_cb_(emit_sample_cb),
type_(type), type_(type),
es_queue_(new media::OffsetByteQueue()), es_queue_(new media::OffsetByteQueue()),
current_search_position_(0), stream_converter_(std::move(stream_converter)) {}
stream_converter_(std::move(stream_converter)),
pending_sample_duration_(0),
waiting_for_key_frame_(true) {}
EsParserH26x::~EsParserH26x() {} EsParserH26x::~EsParserH26x() {}
@ -69,43 +66,29 @@ bool EsParserH26x::Parse(const uint8_t* buf,
// Add the incoming bytes to the ES queue. // Add the incoming bytes to the ES queue.
es_queue_->Push(buf, size); es_queue_->Push(buf, size);
// We should always have entries in the vector and it should always start
// with |can_start_access_unit == true|. If not, we are just starting and
// should skip to the first access unit.
if (access_unit_nalus_.empty()) {
if (!SkipToFirstAccessUnit())
return true;
}
DCHECK(!access_unit_nalus_.empty());
DCHECK(access_unit_nalus_.front().nalu.can_start_access_unit());
return ParseInternal(); return ParseInternal();
} }
void EsParserH26x::Flush() { void EsParserH26x::Flush() {
DVLOG(1) << "EsParserH26x::Flush"; DVLOG(1) << "EsParserH26x::Flush";
// Simulate an additional AUD to force emitting the last access unit // Simulate two additional AUDs to force emitting the last access unit
// which is assumed to be complete at this point. // which is assumed to be complete at this point.
// Two AUDs are needed because the exact size of a NAL unit can only be
// determined after seeing the next NAL unit, so we need a second AUD to
// finish the parsing of the first AUD.
if (type_ == Nalu::kH264) { if (type_ == Nalu::kH264) {
const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09}; const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
es_queue_->Push(aud, sizeof(aud)); es_queue_->Push(aud, sizeof(aud));
} else { } else {
DCHECK_EQ(Nalu::kH265, type_); DCHECK_EQ(Nalu::kH265, type_);
const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01}; const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
0x00, 0x00, 0x01, 0x46, 0x01};
es_queue_->Push(aud, sizeof(aud)); es_queue_->Push(aud, sizeof(aud));
} }
CHECK(ParseInternal()); CHECK(ParseInternal());
// Note that the end argument is exclusive. We do not want to include the
// fake AUD we just added, so the argument should point to the AUD.
if (access_unit_nalus_.size() > 1 &&
!ProcessAccessUnit(access_unit_nalus_.end() - 1)) {
LOG(WARNING) << "Error processing last access unit.";
}
if (pending_sample_) { if (pending_sample_) {
// Flush pending sample. // Flush pending sample.
DCHECK(pending_sample_duration_); DCHECK(pending_sample_duration_);
@ -118,28 +101,18 @@ void EsParserH26x::Flush() {
void EsParserH26x::Reset() { void EsParserH26x::Reset() {
es_queue_.reset(new media::OffsetByteQueue()); es_queue_.reset(new media::OffsetByteQueue());
current_search_position_ = 0; current_search_position_ = 0;
access_unit_nalus_.clear(); current_access_unit_position_ = 0;
current_video_slice_info_.valid = false;
next_access_unit_position_set_ = false;
next_access_unit_position_ = 0;
current_nalu_info_.reset();
timing_desc_list_.clear(); timing_desc_list_.clear();
pending_sample_ = scoped_refptr<MediaSample>(); pending_sample_ = scoped_refptr<MediaSample>();
pending_sample_duration_ = 0; pending_sample_duration_ = 0;
waiting_for_key_frame_ = true; waiting_for_key_frame_ = true;
} }
bool EsParserH26x::SkipToFirstAccessUnit() { bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
DCHECK(access_unit_nalus_.empty());
while (access_unit_nalus_.empty()) {
if (!SearchForNextNalu())
return false;
// If we can't start an access unit, remove it and continue.
DCHECK_EQ(1u, access_unit_nalus_.size());
if (!access_unit_nalus_.back().nalu.can_start_access_unit())
access_unit_nalus_.clear();
}
return true;
}
bool EsParserH26x::SearchForNextNalu() {
const uint8_t* es; const uint8_t* es;
int es_size; int es_size;
es_queue_->PeekAt(current_search_position_, &es, &es_size); es_queue_->PeekAt(current_search_position_, &es, &es_size);
@ -158,7 +131,7 @@ bool EsParserH26x::SearchForNextNalu() {
} }
// Ensure the next NAL unit is a real NAL unit. // Ensure the next NAL unit is a real NAL unit.
const uint8_t* nalu_ptr = es + start_code_offset + start_code_size; const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
// This size is likely inaccurate, this is just to get the header info. // This size is likely inaccurate, this is just to get the header info.
const int64_t next_nalu_size = es_size - start_code_offset - start_code_size; const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
if (next_nalu_size < if (next_nalu_size <
@ -167,106 +140,129 @@ bool EsParserH26x::SearchForNextNalu() {
return false; return false;
} }
Nalu next_nalu; // Update search position for next nalu.
if (!next_nalu.Initialize(type_, nalu_ptr, next_nalu_size)) {
// The next NAL unit is invalid, skip it and search again.
current_search_position_ += start_code_offset + start_code_size;
return SearchForNextNalu();
}
current_search_position_ += start_code_offset + start_code_size; current_search_position_ += start_code_offset + start_code_size;
NaluInfo info; // |next_nalu_info_| is made global intentionally to avoid repetitive memory
info.position = current_search_position_ - start_code_size; // allocation which could create memory fragments.
info.start_code_size = start_code_size; if (!next_nalu_info_)
info.nalu = next_nalu; next_nalu_info_.reset(new NaluInfo);
access_unit_nalus_.push_back(info); if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
// This NAL unit is invalid, skip it and search again.
return true; return SearchForNalu(position, nalu);
}
bool EsParserH26x::ProcessAccessUnit(std::deque<NaluInfo>::iterator end) {
DCHECK(end < access_unit_nalus_.end());
auto begin = access_unit_nalus_.begin();
const uint8_t* es;
int es_size;
es_queue_->PeekAt(begin->position, &es, &es_size);
DCHECK_GE(static_cast<uint64_t>(es_size), (end->position - begin->position));
// Process the NAL units in the access unit.
bool is_key_frame = false;
int pps_id = -1;
for (auto it = begin; it != end; ++it) {
if (it->nalu.nuh_layer_id() == 0) {
// Update the NALU because the data pointer may have been invalidated.
CHECK(it->nalu.Initialize(
type_, es + (it->position - begin->position) + it->start_code_size,
((it+1)->position - it->position) - it->start_code_size));
if (!ProcessNalu(it->nalu, &is_key_frame, &pps_id))
return false;
}
} }
next_nalu_info_->position = current_search_position_ - start_code_size;
next_nalu_info_->start_code_size = start_code_size;
if (is_key_frame) const bool current_nalu_set = current_nalu_info_ ? true : false;
waiting_for_key_frame_ = false; if (current_nalu_info_) {
if (!waiting_for_key_frame_) { // Starting position for the nalu including start code.
const uint64_t access_unit_size = end->position - begin->position; *position = current_nalu_info_->position;
RCHECK(EmitFrame(begin->position, access_unit_size, is_key_frame, pps_id)); // Update the NALU because the data pointer may have been invalidated.
const uint8_t* current_nalu_ptr =
next_nalu_ptr +
(current_nalu_info_->position + current_nalu_info_->start_code_size) -
current_search_position_;
const uint64_t current_nalu_size = next_nalu_info_->position -
current_nalu_info_->position -
current_nalu_info_->start_code_size;
CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
} }
current_nalu_info_.swap(next_nalu_info_);
return true; return current_nalu_set ? true : SearchForNalu(position, nalu);
} }
bool EsParserH26x::ParseInternal() { bool EsParserH26x::ParseInternal() {
while (true) { uint64_t position;
if (!SearchForNextNalu()) Nalu nalu;
return true; VideoSliceInfo video_slice_info;
while (SearchForNalu(&position, &nalu)) {
// ITU H.264 sec. 7.4.1.2.3 // ITU H.264 sec. 7.4.1.2.3
// H264: The first of the NAL units with |can_start_access_unit() == true| // H264: The first of the NAL units with |can_start_access_unit() == true|
// after the last VCL NAL unit of a primary coded picture specifies the // after the last VCL NAL unit of a primary coded picture specifies the
// start of a new access unit. |nuh_layer_id()| is for H265 only; it is // start of a new access unit.
// included below for ease of computation (the value is always 0).
// ITU H.265 sec. 7.4.2.4.4 // ITU H.265 sec. 7.4.2.4.4
// H265: The first of the NAL units with |can_start_access_unit() == true| // H265: The first of the NAL units with |can_start_access_unit() == true|
// after the last VCL NAL unit preceding firstBlPicNalUnit (the first // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
// VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
// any, specifies the start of a new access unit. // any, specifies the start of a new access unit.
DCHECK(!access_unit_nalus_.empty()); if (nalu.can_start_access_unit()) {
if (!access_unit_nalus_.back().nalu.is_video_slice() || if (!next_access_unit_position_set_) {
access_unit_nalus_.back().nalu.nuh_layer_id() != 0) { next_access_unit_position_set_ = true;
next_access_unit_position_ = position;
}
RCHECK(ProcessNalu(nalu, &video_slice_info));
if (nalu.is_video_slice() && !video_slice_info.valid) {
// This could happen only if decoder config is not available yet. Drop
// this frame.
DCHECK(!current_video_slice_info_.valid);
next_access_unit_position_set_ = false;
continue;
}
} else if (nalu.is_video_slice()) {
// This isn't the first VCL NAL unit. Next access unit should start after
// this NAL unit.
next_access_unit_position_set_ = false;
continue; continue;
} }
// First, find the end of the access unit. Search backward to find the // AUD shall be the first NAL unit if present. There shall be at most one
// first VCL NALU before the current one. // AUD in any access unit. We can emit the current access unit which shall
auto access_unit_end_rit = access_unit_nalus_.rbegin(); // not contain the AUD.
bool found_vcl = false; if (nalu.is_aud())
for (auto rit = access_unit_nalus_.rbegin() + 1; return EmitCurrentAccessUnit();
rit != access_unit_nalus_.rend(); ++rit) {
if (rit->nalu.is_video_slice()) { // We can only determine if the current access unit ends after seeing
found_vcl = true; // another VCL NAL unit.
break; if (!video_slice_info.valid)
} else if (rit->nalu.can_start_access_unit()) { continue;
// The start of the next access unit is the first unit with
// |can_start_access_unit| after the previous VCL unit. // Check if it is the first VCL NAL unit of a primary coded picture. It is
access_unit_end_rit = rit; // always true for H265 as nuh_layer_id shall be == 0 at this point.
bool is_first_vcl_nalu = true;
if (type_ == Nalu::kH264) {
if (current_video_slice_info_.valid) {
// ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
// primary coded picture. Only pps_id and frame_num are checked here.
is_first_vcl_nalu =
video_slice_info.frame_num != current_video_slice_info_.frame_num ||
video_slice_info.pps_id != current_video_slice_info_.pps_id;
} }
} }
if (!found_vcl) if (!is_first_vcl_nalu) {
return true; // This isn't the first VCL NAL unit. Next access unit should start after
// this NAL unit.
next_access_unit_position_set_ = false;
continue;
}
// Get a forward iterator that corresponds to the same element pointed by DCHECK(next_access_unit_position_set_);
// |access_unit_end_rit|. Note: |end| refers to the exclusive end and RCHECK(EmitCurrentAccessUnit());
// will point to a valid object.
auto end = (access_unit_end_rit + 1).base();
if (!ProcessAccessUnit(end))
return false;
// Delete the data we have already processed. // Delete the data we have already processed.
es_queue_->Trim(end->position); es_queue_->Trim(next_access_unit_position_);
access_unit_nalus_.erase(access_unit_nalus_.begin(), end);
current_access_unit_position_ = next_access_unit_position_;
current_video_slice_info_ = video_slice_info;
next_access_unit_position_set_ = false;
} }
return true;
}
bool EsParserH26x::EmitCurrentAccessUnit() {
if (current_video_slice_info_.valid) {
if (current_video_slice_info_.is_key_frame)
waiting_for_key_frame_ = false;
if (!waiting_for_key_frame_) {
RCHECK(
EmitFrame(current_access_unit_position_,
next_access_unit_position_ - current_access_unit_position_,
current_video_slice_info_.is_key_frame,
current_video_slice_info_.pps_id));
}
current_video_slice_info_.valid = false;
}
return true;
} }
bool EsParserH26x::EmitFrame(int64_t access_unit_pos, bool EsParserH26x::EmitFrame(int64_t access_unit_pos,

View File

@ -39,6 +39,15 @@ class EsParserH26x : public EsParser {
void Reset() override; void Reset() override;
protected: protected:
struct VideoSliceInfo {
bool valid = false;
bool is_key_frame = false;
// Both pps_id and frame_num are extracted from slice header (frame_num is
// only for H.264).
int pps_id = 0;
int frame_num = 0;
};
const H26xByteToUnitStreamConverter* stream_converter() const { const H26xByteToUnitStreamConverter* stream_converter() const {
return stream_converter_.get(); return stream_converter_.get();
} }
@ -54,37 +63,33 @@ class EsParserH26x : public EsParser {
Nalu nalu; Nalu nalu;
// The offset of the NALU from the beginning of the stream, usable as an // The offset of the NALU from the beginning of the stream, usable as an
// argument to OffsetByteQueue. This points to the start code. // argument to OffsetByteQueue. This points to the start code.
uint64_t position; uint64_t position = 0;
uint8_t start_code_size; uint8_t start_code_size = 0;
}; };
// Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit // Processes a NAL unit found in ParseInternal. |video_slice_info| should not
// value will be passed to UpdateVideoDecoderConfig. // be null, it will contain the video slice info if it is a video slice nalu
// and it is processed successfully; otherwise the |valid| member will be set
// to false with other members untouched.
virtual bool ProcessNalu(const Nalu& nalu, virtual bool ProcessNalu(const Nalu& nalu,
bool* is_key_frame, VideoSliceInfo* video_slice_info) = 0;
int* pps_id_for_access_unit) = 0;
// Update the video decoder config. // Update the video decoder config.
// Return true if successful. // Return true if successful.
virtual bool UpdateVideoDecoderConfig(int pps_id) = 0; virtual bool UpdateVideoDecoderConfig(int pps_id) = 0;
// Skips to the first access unit available. Returns whether an access unit // Finds the NAL unit by finding the next start code. This will modify the
// is found. // search position.
bool SkipToFirstAccessUnit(); // Returns true when it has found the NALU.
bool SearchForNalu(uint64_t* position, Nalu* nalu);
// Finds the next NAL unit by finding the next start code. This will modify
// the search position.
// Returns true when it has found the next NALU.
bool SearchForNextNalu();
// Process an access unit that spans the given NAL units (end is exclusive
// and should point to a valid object).
bool ProcessAccessUnit(std::deque<NaluInfo>::iterator end);
// Resumes the H26x ES parsing. // Resumes the H26x ES parsing.
// Return true if successful. // Return true if successful.
bool ParseInternal(); bool ParseInternal();
// Emit the current access unit if exists.
bool EmitCurrentAccessUnit();
// Emit a frame whose position in the ES queue starts at |access_unit_pos|. // Emit a frame whose position in the ES queue starts at |access_unit_pos|.
// Returns true if successful, false if no PTS is available for the frame. // Returns true if successful, false if no PTS is available for the frame.
bool EmitFrame(int64_t access_unit_pos, bool EmitFrame(int64_t access_unit_pos,
@ -104,21 +109,28 @@ class EsParserH26x : public EsParser {
// Parser state. // Parser state.
// The position of the search head. // The position of the search head.
uint64_t current_search_position_; uint64_t current_search_position_ = 0;
// The NALU that make up the current access unit. This may include elements // Current access unit starting position.
// from the next access unit. The last item is the NAL unit currently uint64_t current_access_unit_position_ = 0;
// being processed. // The VideoSliceInfo in the current access unit, useful for first vcl nalu
std::deque<NaluInfo> access_unit_nalus_; // detection (for H.264).
VideoSliceInfo current_video_slice_info_;
bool next_access_unit_position_set_ = false;
uint64_t next_access_unit_position_ = 0;
// Current nalu information.
std::unique_ptr<NaluInfo> current_nalu_info_;
// This is really a temporary storage for the next nalu information.
std::unique_ptr<NaluInfo> next_nalu_info_;
// Filter to convert H.264/H.265 Annex B byte stream to unit stream. // Filter to convert H.264/H.265 Annex B byte stream to unit stream.
std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter_; std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter_;
// Frame for which we do not yet have a duration. // Frame for which we do not yet have a duration.
scoped_refptr<MediaSample> pending_sample_; scoped_refptr<MediaSample> pending_sample_;
uint64_t pending_sample_duration_; uint64_t pending_sample_duration_ = 0;
// Indicates whether waiting for first key frame. // Indicates whether waiting for first key frame.
bool waiting_for_key_frame_; bool waiting_for_key_frame_ = true;
}; };
} // namespace mp2t } // namespace mp2t

View File

@ -22,26 +22,43 @@ namespace mp2t {
namespace { namespace {
const int kH264RefIdc = 1 << 5;
// NAL unit types used for testing. // NAL unit types used for testing.
enum H265NaluType { enum H26xNaluType {
kAud = Nalu::H265_AUD, kH264Aud = Nalu::H264_AUD,
kSps = Nalu::H265_SPS, kH264Sps = Nalu::H264_SPS | kH264RefIdc,
kSei = Nalu::H265_PREFIX_SEI, kH264Sei = Nalu::H264_SEIMessage,
// Something with |can_start_access_unit() == false|. // Something with |can_start_access_unit() == false|.
kRsv = Nalu::H265_FD, kH264Rsv = Nalu::H264_FillerData,
// Non-key-frame video slice. // Non-key-frame video slice.
kVcl = Nalu::H265_TRAIL_N, kH264Vcl = Nalu::H264_NonIDRSlice,
kVclKeyFrame = Nalu::H265_IDR_W_RADL, // For testing purpose, the first 2 bits contains the frame num.
// Needs to be different than |kVCL| so we can tell the difference. kH264VclFrame0 = Nalu::H264_NonIDRSlice | (0 << 6),
kVclWithNuhLayer = Nalu::H265_TRAIL_R, kH264VclFrame1 = Nalu::H264_NonIDRSlice | (1 << 6),
kH264VclFrame2 = Nalu::H264_NonIDRSlice | (2 << 6),
kH264VclFrame3 = Nalu::H264_NonIDRSlice | (3 << 6),
kH264VclKeyFrame = Nalu::H264_IDRSlice | kH264RefIdc,
kH265Aud = Nalu::H265_AUD,
kH265Sps = Nalu::H265_SPS,
kH265Sei = Nalu::H265_PREFIX_SEI,
// Something with |can_start_access_unit() == false|.
kH265Rsv = Nalu::H265_FD,
// Non-key-frame video slice.
kH265Vcl = Nalu::H265_TRAIL_N,
kH265VclKeyFrame = Nalu::H265_IDR_W_RADL,
// Needs to be different than |kH265VCL| so we can tell the difference.
kH265VclWithNuhLayer = Nalu::H265_TRAIL_R,
// Used to separate expected access units. // Used to separate expected access units.
kSeparator = 0xff, kSeparator = 0xff,
}; };
class FakeByteToUnitStreamConverter : public H26xByteToUnitStreamConverter { class FakeByteToUnitStreamConverter : public H26xByteToUnitStreamConverter {
public: public:
FakeByteToUnitStreamConverter() explicit FakeByteToUnitStreamConverter(Nalu::CodecType codec_type)
: H26xByteToUnitStreamConverter(Nalu::kH265) {} : H26xByteToUnitStreamConverter(codec_type) {}
bool GetDecoderConfigurationRecord( bool GetDecoderConfigurationRecord(
std::vector<uint8_t>* decoder_config) const override { std::vector<uint8_t>* decoder_config) const override {
@ -55,30 +72,40 @@ class FakeByteToUnitStreamConverter : public H26xByteToUnitStreamConverter {
}; };
// This is the code-under-test. This implements the required abstract methods // This is the code-under-test. This implements the required abstract methods
// to ignore the contents of the NAL units. This behaves the same as the // to ignore the contents of the NAL units.
// H.264 and H.265 types.
class TestableEsParser : public EsParserH26x { class TestableEsParser : public EsParserH26x {
public: public:
TestableEsParser(const NewStreamInfoCB& new_stream_info_cb, TestableEsParser(Nalu::CodecType codec_type,
const NewStreamInfoCB& new_stream_info_cb,
const EmitSampleCB& emit_sample_cb) const EmitSampleCB& emit_sample_cb)
: EsParserH26x(Nalu::kH265, : EsParserH26x(codec_type,
std::unique_ptr<H26xByteToUnitStreamConverter>( std::unique_ptr<H26xByteToUnitStreamConverter>(
new FakeByteToUnitStreamConverter()), new FakeByteToUnitStreamConverter(codec_type)),
0, 0,
emit_sample_cb), emit_sample_cb),
codec_type_(codec_type),
new_stream_info_cb_(new_stream_info_cb), new_stream_info_cb_(new_stream_info_cb),
decoder_config_check_pending_(false) {} decoder_config_check_pending_(false) {}
bool ProcessNalu(const Nalu& nalu, bool ProcessNalu(const Nalu& nalu,
bool* is_key_frame, VideoSliceInfo* video_slice_info) override {
int* pps_id_for_access_unit) override { if (codec_type_ == Nalu::kH264 ? (nalu.type() == Nalu::H264_SPS)
if (nalu.type() == Nalu::H265_SPS) { : (nalu.type() == Nalu::H265_SPS)) {
video_slice_info->valid = false;
decoder_config_check_pending_ = true; decoder_config_check_pending_ = true;
} else if (nalu.is_video_slice()) { } else if (nalu.is_video_slice()) {
// This should be the same as EsParserH265::ProcessNalu. video_slice_info->valid = true;
*is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL || // This should be the same as EsParserH26x::ProcessNalu.
nalu.type() == Nalu::H265_IDR_N_LP; if (codec_type_ == Nalu::kH264) {
*pps_id_for_access_unit = kTestPpsId; video_slice_info->is_key_frame = nalu.type() == Nalu::H264_IDRSlice;
} else {
video_slice_info->is_key_frame = nalu.type() == Nalu::H265_IDR_W_RADL ||
nalu.type() == Nalu::H265_IDR_N_LP;
}
video_slice_info->pps_id = kTestPpsId;
// for testing purpose, the frame_num is coded in the first byte of
// payload.
video_slice_info->frame_num = nalu.data()[nalu.header_size()];
} }
return true; return true;
} }
@ -95,19 +122,31 @@ class TestableEsParser : public EsParserH26x {
private: private:
const int kTestPpsId = 123; const int kTestPpsId = 123;
Nalu::CodecType codec_type_;
NewStreamInfoCB new_stream_info_cb_; NewStreamInfoCB new_stream_info_cb_;
bool decoder_config_check_pending_; bool decoder_config_check_pending_;
}; };
std::vector<uint8_t> CreateNalu(H265NaluType type, int i) { std::vector<uint8_t> CreateNalu(Nalu::CodecType codec_type,
H26xNaluType type,
int i) {
std::vector<uint8_t> ret; std::vector<uint8_t> ret;
ret.resize(4); if (codec_type == Nalu::kH264) {
ret[0] = (type << 1); ret.resize(3);
// nuh_layer_id == 1, nuh_temporal_id_plus1 == 1 // For testing purpose, the first 2 bits contains the frame num and encoded
ret[1] = (type == kVclWithNuhLayer ? 9 : 1); // in the first byte of the payload.
// Add some extra data to tell consecutive frames apart. ret[0] = (type & 0x3f);
ret[2] = 0xff; ret[1] = (type >> 6);
ret[3] = i + 1; ret[2] = i + 1;
} else {
ret.resize(4);
ret[0] = (type << 1);
// nuh_layer_id == 1, nuh_temporal_id_plus1 == 1
ret[1] = (type == kH265VclWithNuhLayer ? 9 : 1);
// Add some extra data to tell consecutive frames apart.
ret[2] = 0xff;
ret[3] = i + 1;
}
return ret; return ret;
} }
@ -120,7 +159,9 @@ class EsParserH26xTest : public testing::Test {
// Runs a test by constructing NAL units of the given types and passing them // Runs a test by constructing NAL units of the given types and passing them
// to the parser. Access units should be separated by |kSeparator|, there // to the parser. Access units should be separated by |kSeparator|, there
// should be one at the start and not at the end. // should be one at the start and not at the end.
void RunTest(const H265NaluType* types, size_t types_count); void RunTest(Nalu::CodecType codec_type,
const H26xNaluType* types,
size_t types_count);
void EmitSample(uint32_t pid, const scoped_refptr<MediaSample>& sample) { void EmitSample(uint32_t pid, const scoped_refptr<MediaSample>& sample) {
size_t sample_id = sample_count_; size_t sample_id = sample_count_;
@ -144,13 +185,15 @@ class EsParserH26xTest : public testing::Test {
bool has_stream_info_; bool has_stream_info_;
}; };
void EsParserH26xTest::RunTest(const H265NaluType* types, void EsParserH26xTest::RunTest(Nalu::CodecType codec_type,
const H26xNaluType* types,
size_t types_count) { size_t types_count) {
// Duration of one 25fps video frame in 90KHz clock units. // Duration of one 25fps video frame in 90KHz clock units.
const uint32_t kMpegTicksPerFrame = 3600; const uint32_t kMpegTicksPerFrame = 3600;
const uint8_t kStartCode[] = {0x00, 0x00, 0x01}; const uint8_t kStartCode[] = {0x00, 0x00, 0x01};
TestableEsParser es_parser( TestableEsParser es_parser(
codec_type,
base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)), base::Bind(&EsParserH26xTest::NewVideoConfig, base::Unretained(this)),
base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this))); base::Bind(&EsParserH26xTest::EmitSample, base::Unretained(this)));
@ -164,10 +207,15 @@ void EsParserH26xTest::RunTest(const H265NaluType* types,
samples_.push_back(cur_sample_data); samples_.push_back(cur_sample_data);
cur_sample_data.clear(); cur_sample_data.clear();
} else { } else {
if (types[k] == kVclKeyFrame) if (codec_type == Nalu::kH264) {
seen_key_frame = true; if (types[k] == kH264VclKeyFrame)
seen_key_frame = true;
} else {
if (types[k] == kH265VclKeyFrame)
seen_key_frame = true;
}
std::vector<uint8_t> es_data = CreateNalu(types[k], k); std::vector<uint8_t> es_data = CreateNalu(codec_type, types[k], k);
cur_sample_data.push_back(0); cur_sample_data.push_back(0);
cur_sample_data.push_back(0); cur_sample_data.push_back(0);
cur_sample_data.push_back(0); cur_sample_data.push_back(0);
@ -198,107 +246,130 @@ void EsParserH26xTest::RunTest(const H265NaluType* types,
es_parser.Flush(); es_parser.Flush();
} }
TEST_F(EsParserH26xTest, BasicSupport) { TEST_F(EsParserH26xTest, H265BasicSupport) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kAud, kSps, kVclKeyFrame, kSeparator, kH265Aud, kH265Sps, kH265VclKeyFrame,
kSeparator, kAud, kVcl, kSeparator, kH265Aud, kH265Vcl,
kSeparator, kAud, kVcl, kSeparator, kH265Aud, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, DeterminesAccessUnitsWithoutAUD) { TEST_F(EsParserH26xTest, H265DeterminesAccessUnitsWithoutAUD) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kSps, kVclKeyFrame, kSeparator, kH265Sps, kH265VclKeyFrame,
kSeparator, kVcl, kSeparator, kH265Vcl,
kSeparator, kVcl, kSeparator, kH265Vcl,
kSeparator, kSei, kVcl, kSeparator, kH265Sei, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(4u, sample_count_); EXPECT_EQ(4u, sample_count_);
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, DoesNotStartOnRsv) { TEST_F(EsParserH26xTest, H265DoesNotStartOnRsv) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kSps, kVclKeyFrame, kRsv, kSeparator, kH265Sps, kH265VclKeyFrame, kH265Rsv,
kSeparator, kAud, kVcl, kSeparator, kH265Aud, kH265Vcl,
kSeparator, kSei, kVcl, kSeparator, kH265Sei, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, SupportsNonZeroNuhLayerId) { TEST_F(EsParserH26xTest, H265SupportsNonZeroNuhLayerId) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kSps, kVclKeyFrame, kSeparator, kH265Sps, kH265VclKeyFrame,
kSeparator, kAud, kVcl, kSei, kSei, kVclWithNuhLayer, kRsv, kSeparator, kH265Aud, kH265Vcl, kH265Sei, kH265VclWithNuhLayer, kH265Rsv,
kSeparator, kSei, kVcl, kSeparator, kH265Sei, kH265Vcl,
kSeparator, kAud, kVcl, kSps, kRsv, kVclWithNuhLayer, kSeparator, kH265Aud, kH265Vcl, kH265Sps, kH265Rsv, kH265VclWithNuhLayer,
kSeparator, kVcl, kSeparator, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(5u, sample_count_); EXPECT_EQ(5u, sample_count_);
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, WaitsForKeyFrame) { TEST_F(EsParserH26xTest, H265WaitsForKeyFrame) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kVcl, kSeparator, kH265Vcl,
kSeparator, kVcl, kSeparator, kH265Vcl,
kSeparator, kSps, kVclKeyFrame, kSeparator, kH265Sps, kH265VclKeyFrame,
kSeparator, kVcl, kSeparator, kH265Vcl,
kSeparator, kVcl, kSeparator, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, EmitsFramesWithNoStreamInfo) { TEST_F(EsParserH26xTest, H265EmitsFramesWithNoStreamInfo) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kVclKeyFrame, kSeparator, kH265VclKeyFrame,
kSeparator, kVcl, kSeparator, kH265Vcl, kH265Rsv,
kSeparator, kVcl, kSeparator, kH265Sei, kH265Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_FALSE(has_stream_info_); EXPECT_FALSE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, EmitsLastFrameWhenDoesntEndOnVCL) { TEST_F(EsParserH26xTest, H265EmitsLastFrameWithNuhLayerId) {
// This tests that it will emit the last frame and last frame will include const H26xNaluType kData[] = {
// the correct data and nothing extra. kSeparator, kH265VclKeyFrame,
const H265NaluType kData[] = { kSeparator, kH265Vcl,
kSeparator, kVclKeyFrame, kSeparator, kH265Vcl, kH265Sei, kH265VclWithNuhLayer, kH265Rsv,
kSeparator, kVcl,
kSeparator, kVcl, kSei,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH265, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_FALSE(has_stream_info_); EXPECT_FALSE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, EmitsLastFrameWithNuhLayerId) { TEST_F(EsParserH26xTest, H264BasicSupport) {
const H265NaluType kData[] = { const H26xNaluType kData[] = {
kSeparator, kVclKeyFrame, kSeparator, kH264Aud, kH264Sps, kH264VclKeyFrame,
kSeparator, kVcl, kSeparator, kH264Aud, kH264Vcl,
kSeparator, kVcl, kVclWithNuhLayer, kSei, kSeparator, kH264Aud, kH264Vcl,
}; };
RunTest(kData, arraysize(kData)); RunTest(Nalu::kH264, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_); EXPECT_EQ(3u, sample_count_);
EXPECT_FALSE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
}
TEST_F(EsParserH26xTest, H264DeterminesAccessUnitsWithoutAUD) {
const H26xNaluType kData[] = {
kSeparator, kH264Sps, kH264VclKeyFrame,
kSeparator, kH264VclFrame1, kH264VclFrame1,
kSeparator, kH264VclFrame2, kH264VclFrame2, kH264VclFrame2,
kSeparator, kH264Sei, kH264VclFrame3,
};
RunTest(Nalu::kH264, kData, arraysize(kData));
EXPECT_EQ(4u, sample_count_);
EXPECT_TRUE(has_stream_info_);
}
TEST_F(EsParserH26xTest, H264DoesNotStartOnRsv) {
const H26xNaluType kData[] = {
kSeparator, kH264Sps, kH264VclKeyFrame, kH264Rsv,
kSeparator, kH264Aud, kH264VclFrame1,
kSeparator, kH264Sei, kH264VclFrame2,
};
RunTest(Nalu::kH264, kData, arraysize(kData));
EXPECT_EQ(3u, sample_count_);
EXPECT_TRUE(has_stream_info_);
} }
} // namespace mp2t } // namespace mp2t

View File

@ -138,7 +138,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H264) {
TEST_F(Mp2tMediaParserTest, UnalignedAppend17_H265) { TEST_F(Mp2tMediaParserTest, UnalignedAppend17_H265) {
// Test small, non-segment-aligned appends. // Test small, non-segment-aligned appends.
ParseMpeg2TsFile("bear-640x360-hevc.ts", 17); ParseMpeg2TsFile("bear-640x360-hevc.ts", 17);
EXPECT_EQ(79, video_frame_count_); EXPECT_EQ(78, video_frame_count_);
EXPECT_TRUE(parser_->Flush()); EXPECT_TRUE(parser_->Flush());
EXPECT_EQ(82, video_frame_count_); EXPECT_EQ(82, video_frame_count_);
} }
@ -146,7 +146,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend17_H265) {
TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H265) { TEST_F(Mp2tMediaParserTest, UnalignedAppend512_H265) {
// Test small, non-segment-aligned appends. // Test small, non-segment-aligned appends.
ParseMpeg2TsFile("bear-640x360-hevc.ts", 512); ParseMpeg2TsFile("bear-640x360-hevc.ts", 512);
EXPECT_EQ(79, video_frame_count_); EXPECT_EQ(78, video_frame_count_);
EXPECT_TRUE(parser_->Flush()); EXPECT_TRUE(parser_->Flush());
EXPECT_EQ(82, video_frame_count_); EXPECT_EQ(82, video_frame_count_);
} }