Optimize the MPEG2-TS demuxer.

Change-Id: I5836fc8503fe22a3d73241dda00c36d0ffb26e65
This commit is contained in:
Jacob Trimble 2016-05-03 15:05:45 -07:00
parent d08f6ae0cc
commit cb3b277575
4 changed files with 201 additions and 136 deletions

View File

@ -12,14 +12,22 @@
#include "packager/media/base/offset_byte_queue.h" #include "packager/media/base/offset_byte_queue.h"
#include "packager/media/base/timestamp.h" #include "packager/media/base/timestamp.h"
#include "packager/media/base/video_stream_info.h" #include "packager/media/base/video_stream_info.h"
#include "packager/media/codecs/h264_byte_to_unit_stream_converter.h" #include "packager/media/codecs/h26x_byte_to_unit_stream_converter.h"
#include "packager/media/codecs/h265_byte_to_unit_stream_converter.h"
#include "packager/media/formats/mp2t/mp2t_common.h" #include "packager/media/formats/mp2t/mp2t_common.h"
namespace shaka { namespace shaka {
namespace media { namespace media {
namespace mp2t { namespace mp2t {
namespace {
const int kStartCodeSize = 3;
const int kH264NaluHeaderSize = 1;
const int kH265NaluHeaderSize = 2;
} // namespace
EsParserH26x::EsParserH26x( EsParserH26x::EsParserH26x(
Nalu::CodecType type, Nalu::CodecType type,
scoped_ptr<H26xByteToUnitStreamConverter> stream_converter, scoped_ptr<H26xByteToUnitStreamConverter> stream_converter,
@ -29,8 +37,7 @@ EsParserH26x::EsParserH26x(
emit_sample_cb_(emit_sample_cb), emit_sample_cb_(emit_sample_cb),
type_(type), type_(type),
es_queue_(new media::OffsetByteQueue()), es_queue_(new media::OffsetByteQueue()),
current_access_unit_pos_(0), current_search_position_(0),
found_access_unit_(false),
stream_converter_(stream_converter.Pass()), stream_converter_(stream_converter.Pass()),
pending_sample_duration_(0), pending_sample_duration_(0),
waiting_for_key_frame_(true) {} waiting_for_key_frame_(true) {}
@ -64,15 +71,15 @@ bool EsParserH26x::Parse(const uint8_t* buf,
// Add the incoming bytes to the ES queue. // Add the incoming bytes to the ES queue.
es_queue_->Push(buf, size); es_queue_->Push(buf, size);
// Skip to the first access unit. // We should always have entries in the vector and it should always start
if (!found_access_unit_) { // with |can_start_access_unit == true|. If not, we are just starting and
if (!FindNextAccessUnit(current_access_unit_pos_, // should skip to the first access unit.
&current_access_unit_pos_)) { if (access_unit_nalus_.empty()) {
if (!SkipToFirstAccessUnit())
return true; return true;
} }
es_queue_->Trim(current_access_unit_pos_); DCHECK(!access_unit_nalus_.empty());
found_access_unit_ = true; DCHECK(access_unit_nalus_.front().nalu.can_start_access_unit());
}
return ParseInternal(); return ParseInternal();
} }
@ -83,14 +90,22 @@ void EsParserH26x::Flush() {
// Simulate an additional AUD to force emitting the last access unit // Simulate an additional AUD to force emitting the last access unit
// which is assumed to be complete at this point. // which is assumed to be complete at this point.
if (type_ == Nalu::kH264) { if (type_ == Nalu::kH264) {
uint8_t aud[] = {0x00, 0x00, 0x01, 0x09}; const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09};
es_queue_->Push(aud, sizeof(aud)); es_queue_->Push(aud, sizeof(aud));
} else { } else {
DCHECK_EQ(Nalu::kH265, type_); DCHECK_EQ(Nalu::kH265, type_);
uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01}; const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01};
es_queue_->Push(aud, sizeof(aud)); es_queue_->Push(aud, sizeof(aud));
} }
ParseInternal();
CHECK(ParseInternal());
// Note that the end argument is exclusive. We do not want to include the
// fake AUD we just added, so the argument should point to the AUD.
if (access_unit_nalus_.size() > 1 &&
!ProcessAccessUnit(access_unit_nalus_.end() - 1)) {
LOG(WARNING) << "Error processing last access unit.";
}
if (pending_sample_) { if (pending_sample_) {
// Flush pending sample. // Flush pending sample.
@ -103,43 +118,111 @@ void EsParserH26x::Flush() {
void EsParserH26x::Reset() { void EsParserH26x::Reset() {
es_queue_.reset(new media::OffsetByteQueue()); es_queue_.reset(new media::OffsetByteQueue());
current_access_unit_pos_ = 0; current_search_position_ = 0;
found_access_unit_ = false; access_unit_nalus_.clear();
timing_desc_list_.clear(); timing_desc_list_.clear();
pending_sample_ = scoped_refptr<MediaSample>(); pending_sample_ = scoped_refptr<MediaSample>();
pending_sample_duration_ = 0; pending_sample_duration_ = 0;
waiting_for_key_frame_ = true; waiting_for_key_frame_ = true;
} }
bool EsParserH26x::FindNextAccessUnit(int64_t stream_pos, bool EsParserH26x::SkipToFirstAccessUnit() {
int64_t* next_unit_pos) { DCHECK(access_unit_nalus_.empty());
// TODO(modmaker): Avoid re-parsing by saving old position. while (access_unit_nalus_.empty()) {
// Every access unit must have a VCL entry and defines the end of the access if (!SearchForNextNalu())
// unit. Track it to return on the element after it so we get the whole return false;
// access unit.
bool seen_vcl_nalu = false; // If we can't start an access unit, remove it and continue.
while (true) { DCHECK_EQ(1u, access_unit_nalus_.size());
if (!access_unit_nalus_.back().nalu.can_start_access_unit())
access_unit_nalus_.clear();
}
return true;
}
bool EsParserH26x::SearchForNextNalu() {
const uint8_t* es; const uint8_t* es;
int size; int es_size;
es_queue_->PeekAt(stream_pos, &es, &size); es_queue_->PeekAt(current_search_position_, &es, &es_size);
// Find a start code. // Find a start code.
uint64_t start_code_offset; uint64_t start_code_offset;
uint8_t start_code_size; uint8_t start_code_size;
bool start_code_found = NaluReader::FindStartCode( const bool start_code_found = NaluReader::FindStartCode(
es, size, &start_code_offset, &start_code_size); es, es_size, &start_code_offset, &start_code_size);
stream_pos += start_code_offset;
// No start code found or NALU type not available yet. if (!start_code_found) {
if (!start_code_found || // We didn't find a start code, so we don't have to search this data again.
start_code_offset + start_code_size >= static_cast<uint64_t>(size)) { if (es_size > kStartCodeSize)
current_search_position_ += es_size - kStartCodeSize;
return false; return false;
} }
Nalu nalu; // Ensure the next NAL unit is a real NAL unit.
const uint8_t* nalu_ptr = es + start_code_offset + start_code_size; const uint8_t* nalu_ptr = es + start_code_offset + start_code_size;
size_t nalu_size = size - (start_code_offset + start_code_size); // This size is likely inaccurate, this is just to get the header info.
if (nalu.Initialize(type_, nalu_ptr, nalu_size)) { const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
if (next_nalu_size <
(type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
// There was not enough data, wait for more.
return false;
}
Nalu next_nalu;
if (!next_nalu.Initialize(type_, nalu_ptr, next_nalu_size)) {
// The next NAL unit is invalid, skip it and search again.
current_search_position_ += start_code_offset + start_code_size;
return SearchForNextNalu();
}
current_search_position_ += start_code_offset + start_code_size;
NaluInfo info;
info.position = current_search_position_ - start_code_size;
info.start_code_size = start_code_size;
info.nalu = next_nalu;
access_unit_nalus_.push_back(info);
return true;
}
bool EsParserH26x::ProcessAccessUnit(std::deque<NaluInfo>::iterator end) {
DCHECK(end < access_unit_nalus_.end());
auto begin = access_unit_nalus_.begin();
const uint8_t* es;
int es_size;
es_queue_->PeekAt(begin->position, &es, &es_size);
DCHECK_GE(static_cast<uint64_t>(es_size), (end->position - begin->position));
// Process the NAL units in the access unit.
bool is_key_frame = false;
int pps_id = -1;
for (auto it = begin; it != end; ++it) {
if (it->nalu.nuh_layer_id() == 0) {
// Update the NALU because the data pointer may have been invalidated.
CHECK(it->nalu.Initialize(
type_, es + (it->position - begin->position) + it->start_code_size,
((it+1)->position - it->position) - it->start_code_size));
if (!ProcessNalu(it->nalu, &is_key_frame, &pps_id))
return false;
}
}
if (is_key_frame)
waiting_for_key_frame_ = false;
if (!waiting_for_key_frame_) {
const uint64_t access_unit_size = end->position - begin->position;
RCHECK(EmitFrame(begin->position, access_unit_size, is_key_frame, pps_id));
}
return true;
}
bool EsParserH26x::ParseInternal() {
while (true) {
if (!SearchForNextNalu())
return true;
// ITU H.264 sec. 7.4.1.2.3 // ITU H.264 sec. 7.4.1.2.3
// H264: The first of the NAL units with |can_start_access_unit() == true| // H264: The first of the NAL units with |can_start_access_unit() == true|
// after the last VCL NAL unit of a primary coded picture specifies the // after the last VCL NAL unit of a primary coded picture specifies the
@ -150,83 +233,41 @@ bool EsParserH26x::FindNextAccessUnit(int64_t stream_pos,
// after the last VCL NAL unit preceding firstBlPicNalUnit (the first // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
// VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
// any, specifies the start of a new access unit. // any, specifies the start of a new access unit.
// TODO(modmaker): This does not handle nuh_layer_id != 0 correctly. DCHECK(!access_unit_nalus_.empty());
// AUD VCL SEI VCL* VPS VCL if (!access_unit_nalus_.back().nalu.is_video_slice() ||
// | Current method splits here. access_unit_nalus_.back().nalu.nuh_layer_id() != 0) {
// | Should split here. continue;
// If we are searching for the first access unit, then stop at the first }
// NAL unit that can start an access unit.
if ((seen_vcl_nalu || !found_access_unit_) && // First, find the end of the access unit. Search backward to find the
nalu.can_start_access_unit()) { // first VCL NALU before the current one.
auto access_unit_end_rit = access_unit_nalus_.rbegin();
bool found_vcl = false;
for (auto rit = access_unit_nalus_.rbegin() + 1;
rit != access_unit_nalus_.rend(); ++rit) {
if (rit->nalu.is_video_slice()) {
found_vcl = true;
break; break;
} else if (rit->nalu.can_start_access_unit()) {
// The start of the next access unit is the first unit with
// |can_start_access_unit| after the previous VCL unit.
access_unit_end_rit = rit;
} }
bool is_vcl_nalu = nalu.is_video_slice() && nalu.nuh_layer_id() == 0;
seen_vcl_nalu |= is_vcl_nalu;
} }
if (!found_vcl)
// The current NALU is not an AUD, skip the start code
// and continue parsing the stream.
stream_pos += start_code_size;
}
*next_unit_pos = stream_pos;
return true;
}
bool EsParserH26x::ParseInternal() {
DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
DCHECK_LE(current_access_unit_pos_, es_queue_->tail());
// Resume parsing later if no AUD was found.
int64_t access_unit_end;
if (!FindNextAccessUnit(current_access_unit_pos_, &access_unit_end))
return true; return true;
// At this point, we know we have a full access unit. // Get a forward iterator that corresponds to the same element pointed by
bool is_key_frame = false; // |access_unit_end_rit|. Note: |end| refers to the exclusive end and
int pps_id_for_access_unit = -1; // will point to a valid object.
auto end = (access_unit_end_rit + 1).base();
const uint8_t* es; if (!ProcessAccessUnit(end))
int size;
es_queue_->PeekAt(current_access_unit_pos_, &es, &size);
int access_unit_size = base::checked_cast<int, int64_t>(
access_unit_end - current_access_unit_pos_);
DCHECK_LE(access_unit_size, size);
NaluReader reader(type_, kIsAnnexbByteStream, es, access_unit_size);
// TODO(modmaker): Consider combining with FindNextAccessUnit to avoid
// scanning the data twice.
while (true) {
Nalu nalu;
bool is_eos = false;
switch (reader.Advance(&nalu)) {
case NaluReader::kOk:
break;
case NaluReader::kEOStream:
is_eos = true;
break;
default:
return false; return false;
}
if (is_eos)
break;
if (!ProcessNalu(nalu, &is_key_frame, &pps_id_for_access_unit)) // Delete the data we have already processed.
return false; es_queue_->Trim(end->position);
access_unit_nalus_.erase(access_unit_nalus_.begin(), end);
} }
if (waiting_for_key_frame_) {
waiting_for_key_frame_ = !is_key_frame;
}
if (!waiting_for_key_frame_) {
// Emit a frame and move the stream to the next AUD position.
RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size,
is_key_frame, pps_id_for_access_unit));
}
current_access_unit_pos_ = access_unit_end;
es_queue_->Trim(current_access_unit_pos_);
return true;
} }
bool EsParserH26x::EmitFrame(int64_t access_unit_pos, bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
@ -244,12 +285,11 @@ bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
return false; return false;
// Emit a frame. // Emit a frame.
DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_ DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
<< " size=" << access_unit_size; << " size=" << access_unit_size;
int es_size; int es_size;
const uint8_t* es; const uint8_t* es;
es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size); es_queue_->PeekAt(access_unit_pos, &es, &es_size);
CHECK_GE(es_size, access_unit_size);
// Convert frame to unit stream format. // Convert frame to unit stream format.
std::vector<uint8_t> converted_frame; std::vector<uint8_t> converted_frame;

View File

@ -7,6 +7,7 @@
#include <stdint.h> #include <stdint.h>
#include <deque>
#include <list> #include <list>
#include "packager/base/callback.h" #include "packager/base/callback.h"
@ -47,6 +48,15 @@ class EsParserH26x : public EsParser {
int64_t dts; int64_t dts;
int64_t pts; int64_t pts;
}; };
struct NaluInfo {
// NOTE: Nalu does not own the memory pointed by its data pointers. The
// caller owns and maintains the memory.
Nalu nalu;
// The offset of the NALU from the beginning of the stream, usable as an
// argument to OffsetByteQueue. This points to the start code.
uint64_t position;
uint8_t start_code_size;
};
// Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit // Processes a NAL unit found in ParseInternal. The @a pps_id_for_access_unit
// value will be passed to UpdateVideoDecoderConfig. // value will be passed to UpdateVideoDecoderConfig.
@ -58,13 +68,20 @@ class EsParserH26x : public EsParser {
// Return true if successful. // Return true if successful.
virtual bool UpdateVideoDecoderConfig(int pps_id) = 0; virtual bool UpdateVideoDecoderConfig(int pps_id) = 0;
// Find the start of the next access unit staring at |stream_pos|. // Skips to the first access unit available. Returns whether an access unit
// Return true if the end is found. // is found.
// If found, |*next_unit_start| contains the start of the next access unit. bool SkipToFirstAccessUnit();
// Otherwise, |*next_unit_start| is unchanged.
bool FindNextAccessUnit(int64_t stream_pos, int64_t* next_unit_start);
// Resumes the H264 ES parsing. // Finds the next NAL unit by finding the next start code. This will modify
// the search position.
// Returns true when it has found the next NALU.
bool SearchForNextNalu();
// Process an access unit that spans the given NAL units (end is exclusive
// and should point to a valid object).
bool ProcessAccessUnit(std::deque<NaluInfo>::iterator end);
// Resumes the H26x ES parsing.
// Return true if successful. // Return true if successful.
bool ParseInternal(); bool ParseInternal();
@ -86,10 +103,12 @@ class EsParserH26x : public EsParser {
std::list<std::pair<int64_t, TimingDesc>> timing_desc_list_; std::list<std::pair<int64_t, TimingDesc>> timing_desc_list_;
// Parser state. // Parser state.
// - |current_access_unit_pos_| is pointing to an annexB syncword // The position of the search head.
// representing the first NALU of an access unit. uint64_t current_search_position_;
int64_t current_access_unit_pos_; // The NALU that make up the current access unit. This may include elements
bool found_access_unit_; // from the next access unit. The last item is the NAL unit currently
// being processed.
std::deque<NaluInfo> access_unit_nalus_;
// Filter to convert H.264/H.265 Annex B byte stream to unit stream. // Filter to convert H.264/H.265 Annex B byte stream to unit stream.
scoped_ptr<H26xByteToUnitStreamConverter> stream_converter_; scoped_ptr<H26xByteToUnitStreamConverter> stream_converter_;

View File

@ -182,7 +182,14 @@ void EsParserH26xTest::RunTest(const H265NaluType* types,
// This may process the previous sample; but since we don't know whether // This may process the previous sample; but since we don't know whether
// we are at the end yet, this will not process the current sample until // we are at the end yet, this will not process the current sample until
// later. // later.
ASSERT_TRUE(es_parser.Parse(es_data.data(), es_data.size(), pts, dts)); size_t offset = 0;
size_t size = 1;
while (offset < es_data.size()) {
// Insert the data in parts to test partial data searches.
size = std::min(size + 1, es_data.size() - offset);
ASSERT_TRUE(es_parser.Parse(&es_data[offset], size, pts, dts));
offset += size;
}
} }
} }
if (seen_key_frame) if (seen_key_frame)
@ -228,7 +235,7 @@ TEST_F(EsParserH26xTest, DoesNotStartOnRsv) {
EXPECT_TRUE(has_stream_info_); EXPECT_TRUE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, DISABLED_SupportsNonZeroNuhLayerId) { TEST_F(EsParserH26xTest, SupportsNonZeroNuhLayerId) {
const H265NaluType kData[] = { const H265NaluType kData[] = {
kSeparator, kSps, kVclKeyFrame, kSeparator, kSps, kVclKeyFrame,
kSeparator, kAud, kVcl, kSei, kSei, kVclWithNuhLayer, kRsv, kSeparator, kAud, kVcl, kSei, kSei, kVclWithNuhLayer, kRsv,
@ -268,9 +275,7 @@ TEST_F(EsParserH26xTest, EmitsFramesWithNoStreamInfo) {
EXPECT_FALSE(has_stream_info_); EXPECT_FALSE(has_stream_info_);
} }
// TODO(modmaker): Currently, the SEI here will not be included. This needs to TEST_F(EsParserH26xTest, EmitsLastFrameWhenDoesntEndOnVCL) {
// be fixed.
TEST_F(EsParserH26xTest, DISABLED_EmitsLastFrameWhenDoesntEndOnVCL) {
// This tests that it will emit the last frame and last frame will include // This tests that it will emit the last frame and last frame will include
// the correct data and nothing extra. // the correct data and nothing extra.
const H265NaluType kData[] = { const H265NaluType kData[] = {
@ -284,7 +289,7 @@ TEST_F(EsParserH26xTest, DISABLED_EmitsLastFrameWhenDoesntEndOnVCL) {
EXPECT_FALSE(has_stream_info_); EXPECT_FALSE(has_stream_info_);
} }
TEST_F(EsParserH26xTest, DISABLED_EmitsLastFrameWithNuhLayerId) { TEST_F(EsParserH26xTest, EmitsLastFrameWithNuhLayerId) {
const H265NaluType kData[] = { const H265NaluType kData[] = {
kSeparator, kVclKeyFrame, kSeparator, kVclKeyFrame,
kSeparator, kVcl, kSeparator, kVcl,

View File

@ -74,6 +74,7 @@ class Mp2tMediaParserTest : public testing::Test {
bool OnNewSample(uint32_t track_id, bool OnNewSample(uint32_t track_id,
const scoped_refptr<MediaSample>& sample) { const scoped_refptr<MediaSample>& sample) {
StreamMap::const_iterator stream = stream_map_.find(track_id); StreamMap::const_iterator stream = stream_map_.find(track_id);
EXPECT_NE(stream_map_.end(), stream);
if (stream != stream_map_.end()) { if (stream != stream_map_.end()) {
if (stream->second->stream_type() == kStreamAudio) { if (stream->second->stream_type() == kStreamAudio) {
++audio_frame_count_; ++audio_frame_count_;