Implemented H.264 byte stream to unit stream conversion and other components

needed for MPEG-2 TS h.264 video demux and transmux.

Change-Id: I878cdd141140cfd6833d75c7133301b1d65f1da0
This commit is contained in:
Thomas Inskip 2014-04-17 18:57:31 -07:00
parent 4a39a0fc98
commit 67bdd89ba2
11 changed files with 356 additions and 44 deletions

View File

@ -50,6 +50,8 @@ class BufferWriter {
void AppendBuffer(const BufferWriter& buffer);
void Swap(BufferWriter* buffer) { buf_.swap(buffer->buf_); }
void SwapBuffer(std::vector<uint8>* buffer) { buf_.swap(*buffer); }
void Clear() { buf_.clear(); }
size_t Size() const { return buf_.size(); }
/// @return Underlying buffer. Behavior is undefined if the buffer size is 0.

View File

@ -21,6 +21,8 @@
'sources': [
'h264_bit_reader.cc',
'h264_bit_reader.h',
'h264_byte_to_unit_stream_converter.cc',
'h264_byte_to_unit_stream_converter.h',
'h264_parser.cc',
'h264_parser.h',
],
@ -33,6 +35,7 @@
'type': '<(gtest_target_type)',
'sources': [
'h264_bit_reader_unittest.cc',
'h264_byte_to_unit_stream_converter_unittest.cc',
'h264_parser_unittest.cc',
],
'dependencies': [

View File

@ -0,0 +1,131 @@
// Copyright 2014 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "media/filters/h264_byte_to_unit_stream_converter.h"
#include "base/logging.h"
#include "media/base/buffer_writer.h"
#include "media/filters/h264_parser.h"
namespace media {
namespace {
// Additional space to reserve for output frame. This value ought to be enough
// to acommodate frames consisting of 100 NAL units with 3-byte start codes.
const size_t kStreamConversionOverhead = 100;
}
H264ByteToUnitStreamConverter::H264ByteToUnitStreamConverter() {}
H264ByteToUnitStreamConverter::~H264ByteToUnitStreamConverter() {}
bool H264ByteToUnitStreamConverter::ConvertByteStreamToNalUnitStream(
const uint8* input_frame,
size_t input_frame_size,
std::vector<uint8>* output_frame) {
DCHECK(input_frame);
DCHECK(output_frame);
BufferWriter output_buffer(input_frame_size + kStreamConversionOverhead);
const uint8* input_ptr(input_frame);
const uint8* input_end(input_ptr + input_frame_size);
off_t next_start_code_offset;
off_t next_start_code_size;
bool first_nalu(true);
while (H264Parser::FindStartCode(input_ptr,
input_end - input_ptr,
&next_start_code_offset,
&next_start_code_size)) {
if (first_nalu) {
if (next_start_code_offset != 0) {
LOG(ERROR) << "H.264 byte stream frame did not begin with start code.";
return false;
}
first_nalu = false;
} else {
ProcessNalu(input_ptr, next_start_code_offset, &output_buffer);
}
input_ptr += next_start_code_offset + next_start_code_size;
}
if (first_nalu) {
LOG(ERROR) << "H.264 byte stream frame did not contain start codes.";
return false;
} else {
ProcessNalu(input_ptr, input_end - input_ptr, &output_buffer);
}
output_buffer.SwapBuffer(output_frame);
return true;
}
void H264ByteToUnitStreamConverter::ProcessNalu(
const uint8* nalu_ptr,
size_t nalu_size,
BufferWriter* output_buffer) {
DCHECK(nalu_ptr);
DCHECK(output_buffer);
if (!nalu_size)
return; // Edge case.
uint8 nalu_type = *nalu_ptr & 0x0f;
switch (nalu_type) {
case H264NALU::kSPS:
// Grab SPS NALU.
last_sps_.assign(nalu_ptr, nalu_ptr + nalu_size);
return;
case H264NALU::kPPS:
// Grab PPS NALU.
last_pps_.assign(nalu_ptr, nalu_ptr + nalu_size);
return;
case H264NALU::kAUD:
// Ignore AUD NALU.
return;
default:
// Copy all other NALUs.
break;
}
// Append 4-byte length and NAL unit data to the buffer.
output_buffer->AppendInt(static_cast<uint32>(nalu_size));
output_buffer->AppendArray(nalu_ptr, nalu_size);
}
bool H264ByteToUnitStreamConverter::GetAVCDecoderConfigurationRecord(
std::vector<uint8>* decoder_config) {
DCHECK(decoder_config);
if ((last_sps_.size() < 4) || last_pps_.empty()) {
// No data available to construct AVCDecoderConfigurationRecord.
return false;
}
// Construct an AVCDecoderConfigurationRecord containing a single SPS and a
// single PPS NALU. Please refer to ISO/IEC 14496-15 for format specifics.
BufferWriter buffer(last_sps_.size() + last_pps_.size() + 11);
uint8 version(1);
buffer.AppendInt(version);
buffer.AppendInt(last_sps_[1]);
buffer.AppendInt(last_sps_[2]);
buffer.AppendInt(last_sps_[3]);
uint8 reserved_and_length_size_minus_one(0xff);
buffer.AppendInt(reserved_and_length_size_minus_one);
uint8 reserved_and_num_sps(0xe1);
buffer.AppendInt(reserved_and_num_sps);
buffer.AppendInt(static_cast<uint16>(last_sps_.size()));
buffer.AppendVector(last_sps_);
uint8 num_pps(1);
buffer.AppendInt(num_pps);
buffer.AppendInt(static_cast<uint16>(last_pps_.size()));
buffer.AppendVector(last_pps_);
buffer.SwapBuffer(decoder_config);
return true;
}
} // namespace media

View File

@ -0,0 +1,57 @@
// Copyright 2014 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_
#define MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_
#include "base/basictypes.h"
#include <vector>
namespace media {
class BufferWriter;
/// Class which converts H.264 byte streams (as specified in ISO/IEC 14496-10
/// Annex B) into H.264 NAL unit streams (as specified in ISO/IEC 14496-15).
class H264ByteToUnitStreamConverter {
public:
static const size_t kUnitStreamNaluLengthSize = 4;
H264ByteToUnitStreamConverter();
~H264ByteToUnitStreamConverter();
/// Converts a whole AVC byte stream encoded video frame to NAL unit stream
/// format.
/// @param input_frame is a buffer containing a whole H.264 frame in byte
/// stream format.
/// @param input_frame_size is the size of the H.264 frame, in bytes.
/// @param output_frame is a pointer to a vector which will receive the
/// converted frame.
/// @return true if successful, false otherwise.
bool ConvertByteStreamToNalUnitStream(const uint8* input_frame,
size_t input_frame_size,
std::vector<uint8>* output_frame);
/// Synthesizes an AVCDecoderConfigurationRecord from the SPS and PPS NAL
/// units extracted from the AVC byte stream.
/// @param decoder_config is a pointer to a vector, which on successful
/// return will contain the computed AVCDecoderConfigurationRecord.
/// @return true if successful, or false otherwise.
bool GetAVCDecoderConfigurationRecord(std::vector<uint8>* decoder_config);
private:
void ProcessNalu(const uint8* nalu_ptr,
size_t nalu_size,
BufferWriter* output_buffer);
std::vector<uint8> last_sps_;
std::vector<uint8> last_pps_;
};
} // namespace media
#endif // MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_

View File

@ -0,0 +1,61 @@
// Copyright 2014 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#include "base/strings/string_number_conversions.h"
#include "media/filters/h264_byte_to_unit_stream_converter.h"
#include "media/test/test_data_util.h"
#include "testing/gtest/include/gtest/gtest.h"
#include <stdio.h>
namespace {
const char kExpectedConfigRecord[] =
"014d400dffe10013274d400da918283e600d418041adb0ad7bdf01010004"
"28de0988";
}
namespace media {
TEST(H264ByteToUnitStreamConverter, ConversionSuccess) {
std::vector<uint8> input_frame =
ReadTestDataFile("avc-byte-stream-frame.h264");
ASSERT_FALSE(input_frame.empty());
std::vector<uint8> expected_output_frame =
ReadTestDataFile("avc-unit-stream-frame.h264");
ASSERT_FALSE(expected_output_frame.empty());
H264ByteToUnitStreamConverter converter;
std::vector<uint8> output_frame;
ASSERT_TRUE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
input_frame.size(),
&output_frame));
EXPECT_EQ(expected_output_frame, output_frame);
std::vector<uint8> expected_decoder_config;
ASSERT_TRUE(base::HexStringToBytes(kExpectedConfigRecord,
&expected_decoder_config));
std::vector<uint8> decoder_config;
ASSERT_TRUE(converter.GetAVCDecoderConfigurationRecord(&decoder_config));
EXPECT_EQ(expected_decoder_config, decoder_config);
}
TEST(H264ByteToUnitStreamConverter, ConversionFailure) {
std::vector<uint8> input_frame(100, 0);
H264ByteToUnitStreamConverter converter;
std::vector<uint8> output_frame;
EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
0,
&output_frame));
EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
input_frame.size(),
&output_frame));
std::vector<uint8> decoder_config;
EXPECT_FALSE(converter.GetAVCDecoderConfigurationRecord(&decoder_config));
}
} // namespace media

View File

@ -11,6 +11,7 @@
#include "media/base/offset_byte_queue.h"
#include "media/base/timestamp.h"
#include "media/base/video_stream_info.h"
#include "media/filters/h264_byte_to_unit_stream_converter.h"
#include "media/filters/h264_parser.h"
#include "media/formats/mp2t/mp2t_common.h"
@ -38,7 +39,10 @@ EsParserH264::EsParserH264(
es_queue_(new media::OffsetByteQueue()),
h264_parser_(new H264Parser()),
current_access_unit_pos_(0),
next_access_unit_pos_(0) {
next_access_unit_pos_(0),
stream_converter_(new H264ByteToUnitStreamConverter),
decoder_config_check_pending_(false),
pending_sample_duration_(0) {
}
EsParserH264::~EsParserH264() {
@ -72,14 +76,22 @@ bool EsParserH264::Parse(const uint8* buf, int size, int64 pts, int64 dts) {
void EsParserH264::Flush() {
DVLOG(1) << "EsParserH264::Flush";
if (!FindAUD(&current_access_unit_pos_))
return;
// Simulate an additional AUD to force emitting the last access unit
// which is assumed to be complete at this point.
uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 };
es_queue_->Push(aud, sizeof(aud));
ParseInternal();
if (FindAUD(&current_access_unit_pos_)) {
// Simulate an additional AUD to force emitting the last access unit
// which is assumed to be complete at this point.
uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 };
es_queue_->Push(aud, sizeof(aud));
ParseInternal();
}
if (pending_sample_) {
// Flush pending sample.
DCHECK(pending_sample_duration_);
pending_sample_->set_duration(pending_sample_duration_);
emit_sample_cb_.Run(pid(), pending_sample_);
pending_sample_ = scoped_refptr<MediaSample>();
}
}
void EsParserH264::Reset() {
@ -90,6 +102,9 @@ void EsParserH264::Reset() {
next_access_unit_pos_ = 0;
timing_desc_list_.clear();
last_video_decoder_config_ = scoped_refptr<StreamInfo>();
decoder_config_check_pending_ = false;
pending_sample_ = scoped_refptr<MediaSample>();
pending_sample_duration_ = 0;
}
bool EsParserH264::FindAUD(int64* stream_pos) {
@ -189,6 +204,7 @@ bool EsParserH264::ParseInternal() {
int sps_id;
if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk)
return false;
decoder_config_check_pending_ = true;
break;
}
case H264NALU::kPPS: {
@ -196,6 +212,7 @@ bool EsParserH264::ParseInternal() {
int pps_id;
if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk)
return false;
decoder_config_check_pending_ = true;
break;
}
case H264NALU::kIDRSlice:
@ -242,23 +259,6 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size,
if (current_timing_desc.pts == kNoTimestamp)
return false;
// Update the video decoder configuration if needed.
const H264PPS* pps = h264_parser_->GetPPS(pps_id);
if (!pps) {
// Only accept an invalid PPS at the beginning when the stream
// does not necessarily start with an SPS/PPS/IDR.
// In this case, the initial frames are conveyed to the upper layer with
// an invalid VideoDecoderConfig and it's up to the upper layer
// to process this kind of frame accordingly.
if (last_video_decoder_config_)
return false;
} else {
const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
if (!sps)
return false;
RCHECK(UpdateVideoDecoderConfig(sps));
}
// Emit a frame.
DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
<< " size=" << access_unit_size;
@ -267,25 +267,67 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size,
es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
CHECK_GE(es_size, access_unit_size);
// TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
// type and allow multiple video tracks. See https://crbug.com/341581.
scoped_refptr<MediaSample> media_sample =
MediaSample::CopyFrom(
es,
access_unit_size,
is_key_frame);
// Convert frame to unit stream format.
std::vector<uint8> converted_frame;
if (!stream_converter_->ConvertByteStreamToNalUnitStream(
es, access_unit_size, &converted_frame)) {
DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
return false;
}
if (decoder_config_check_pending_) {
// Update the video decoder configuration if needed.
const H264PPS* pps = h264_parser_->GetPPS(pps_id);
if (!pps) {
// Only accept an invalid PPS at the beginning when the stream
// does not necessarily start with an SPS/PPS/IDR.
// In this case, the initial frames are conveyed to the upper layer with
// an invalid VideoDecoderConfig and it's up to the upper layer
// to process this kind of frame accordingly.
if (last_video_decoder_config_)
return false;
} else {
const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
if (!sps)
return false;
RCHECK(UpdateVideoDecoderConfig(sps));
decoder_config_check_pending_ = false;
}
}
// Create the media sample, emitting always the previous sample after
// calculating its duration.
scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
converted_frame.data(), converted_frame.size(), is_key_frame);
media_sample->set_dts(current_timing_desc.dts);
media_sample->set_pts(current_timing_desc.pts);
emit_sample_cb_.Run(pid(), media_sample);
if (pending_sample_) {
DCHECK_GT(media_sample->dts(), pending_sample_->dts());
pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
pending_sample_->set_duration(pending_sample_duration_);
emit_sample_cb_.Run(pid(), pending_sample_);
}
pending_sample_ = media_sample;
return true;
}
bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
// TODO(tinskip): Generate an error if video configuration change is detected.
std::vector<uint8> decoder_config_record;
if (!stream_converter_->GetAVCDecoderConfigurationRecord(
&decoder_config_record)) {
DLOG(ERROR) << "Failure to construct an AVCDecoderConfigurationRecord";
return false;
}
if (last_video_decoder_config_) {
// Varying video configurations currently not supported. Just assume that
// the video configuration has not changed.
return true;
// Verify that the video decoder config has not changed.
if (last_video_decoder_config_->extra_data() == decoder_config_record) {
// Video configuration has not changed.
return true;
}
NOTIMPLEMENTED() << "Varying video configurations are not supported.";
return false;
}
// TODO(damienv): a MAP unit can be either 16 or 32 pixels.
@ -299,13 +341,16 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
kMpeg2Timescale,
kInfiniteDuration,
kCodecH264,
std::string(), // TODO(tinskip): calculate codec string.
VideoStreamInfo::GetCodecString(kCodecH264,
decoder_config_record[1],
decoder_config_record[2],
decoder_config_record[3]),
std::string(),
width,
height,
kCommonNaluLengthSize,
NULL, // TODO(tinskip): calculate AVCDecoderConfigurationRecord.
0,
H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
decoder_config_record.data(),
decoder_config_record.size(),
false));
DVLOG(1) << "Profile IDC: " << sps->profile_idc;
DVLOG(1) << "Level IDC: " << sps->level_idc;

View File

@ -16,6 +16,7 @@
namespace media {
class H264ByteToUnitStreamConverter;
class H264Parser;
class OffsetByteQueue;
struct H264SPS;
@ -83,8 +84,16 @@ class EsParserH264 : public EsParser {
int64 current_access_unit_pos_;
int64 next_access_unit_pos_;
// Filter to convert H.264 Annex B byte stream to unit stream.
scoped_ptr<H264ByteToUnitStreamConverter> stream_converter_;
// Last video decoder config.
scoped_refptr<StreamInfo> last_video_decoder_config_;
bool decoder_config_check_pending_;
// Frame for which we do not yet have a duration.
scoped_refptr<MediaSample> pending_sample_;
uint64 pending_sample_duration_;
};
} // namespace mp2t

View File

@ -126,7 +126,7 @@ class Mp2tMediaParserTest : public testing::Test {
TEST_F(Mp2tMediaParserTest, UnalignedAppend17) {
// Test small, non-segment-aligned appends.
ParseMpeg2TsFile("bear-1280x720.ts", 17);
EXPECT_EQ(video_frame_count_, 81);
EXPECT_EQ(video_frame_count_, 80);
parser_->Flush();
EXPECT_EQ(video_frame_count_, 82);
}
@ -134,7 +134,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend17) {
TEST_F(Mp2tMediaParserTest, UnalignedAppend512) {
// Test small, non-segment-aligned appends.
ParseMpeg2TsFile("bear-1280x720.ts", 512);
EXPECT_EQ(video_frame_count_, 81);
EXPECT_EQ(video_frame_count_, 80);
parser_->Flush();
EXPECT_EQ(video_frame_count_, 82);
}
@ -145,7 +145,8 @@ TEST_F(Mp2tMediaParserTest, TimestampWrapAround) {
// (close to 2^33 / 90000) which results in timestamps wrap around
// in the Mpeg2 TS stream.
ParseMpeg2TsFile("bear-1280x720_ptswraparound.ts", 512);
EXPECT_EQ(video_frame_count_, 81);
parser_->Flush();
EXPECT_EQ(video_frame_count_, 82);
EXPECT_GE(video_min_dts_, (95443 - 1) * kMpeg2Timescale);
EXPECT_LE(video_max_dts_,
static_cast<int64>((95443 + 4)) * kMpeg2Timescale);

View File

@ -74,3 +74,6 @@ bear.h264:
bear.mp4 (https://chromiumcodereview.appspot.com/10805089):
ffmpeg -i bear.mp4 -vcodec copy -vbsf h264_mp4toannexb \
-an bear.h264
avc-byte-stream-frame.h264 - Single IDR frame extracted from test-25fps.h264 in Annex B byte stream format.
avc-unit-stream-frame.h264 - Single IDR frame from avc-byte-stream-frame.h264 converted to unit stream format.

Binary file not shown.

Binary file not shown.