diff --git a/media/base/buffer_writer.h b/media/base/buffer_writer.h index bc8141db63..39aa75d291 100644 --- a/media/base/buffer_writer.h +++ b/media/base/buffer_writer.h @@ -50,6 +50,8 @@ class BufferWriter { void AppendBuffer(const BufferWriter& buffer); void Swap(BufferWriter* buffer) { buf_.swap(buffer->buf_); } + void SwapBuffer(std::vector* buffer) { buf_.swap(*buffer); } + void Clear() { buf_.clear(); } size_t Size() const { return buf_.size(); } /// @return Underlying buffer. Behavior is undefined if the buffer size is 0. diff --git a/media/filters/filters.gyp b/media/filters/filters.gyp index d4d1ace796..81e490bf1c 100644 --- a/media/filters/filters.gyp +++ b/media/filters/filters.gyp @@ -21,6 +21,8 @@ 'sources': [ 'h264_bit_reader.cc', 'h264_bit_reader.h', + 'h264_byte_to_unit_stream_converter.cc', + 'h264_byte_to_unit_stream_converter.h', 'h264_parser.cc', 'h264_parser.h', ], @@ -33,6 +35,7 @@ 'type': '<(gtest_target_type)', 'sources': [ 'h264_bit_reader_unittest.cc', + 'h264_byte_to_unit_stream_converter_unittest.cc', 'h264_parser_unittest.cc', ], 'dependencies': [ diff --git a/media/filters/h264_byte_to_unit_stream_converter.cc b/media/filters/h264_byte_to_unit_stream_converter.cc new file mode 100644 index 0000000000..cf79118bce --- /dev/null +++ b/media/filters/h264_byte_to_unit_stream_converter.cc @@ -0,0 +1,131 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "media/filters/h264_byte_to_unit_stream_converter.h" + +#include "base/logging.h" +#include "media/base/buffer_writer.h" +#include "media/filters/h264_parser.h" + +namespace media { + +namespace { +// Additional space to reserve for output frame. This value ought to be enough +// to acommodate frames consisting of 100 NAL units with 3-byte start codes. +const size_t kStreamConversionOverhead = 100; +} + +H264ByteToUnitStreamConverter::H264ByteToUnitStreamConverter() {} + +H264ByteToUnitStreamConverter::~H264ByteToUnitStreamConverter() {} + +bool H264ByteToUnitStreamConverter::ConvertByteStreamToNalUnitStream( + const uint8* input_frame, + size_t input_frame_size, + std::vector* output_frame) { + DCHECK(input_frame); + DCHECK(output_frame); + + BufferWriter output_buffer(input_frame_size + kStreamConversionOverhead); + + const uint8* input_ptr(input_frame); + const uint8* input_end(input_ptr + input_frame_size); + off_t next_start_code_offset; + off_t next_start_code_size; + bool first_nalu(true); + while (H264Parser::FindStartCode(input_ptr, + input_end - input_ptr, + &next_start_code_offset, + &next_start_code_size)) { + if (first_nalu) { + if (next_start_code_offset != 0) { + LOG(ERROR) << "H.264 byte stream frame did not begin with start code."; + return false; + } + first_nalu = false; + } else { + ProcessNalu(input_ptr, next_start_code_offset, &output_buffer); + } + input_ptr += next_start_code_offset + next_start_code_size; + } + + if (first_nalu) { + LOG(ERROR) << "H.264 byte stream frame did not contain start codes."; + return false; + } else { + ProcessNalu(input_ptr, input_end - input_ptr, &output_buffer); + } + + output_buffer.SwapBuffer(output_frame); + return true; +} + +void H264ByteToUnitStreamConverter::ProcessNalu( + const uint8* nalu_ptr, + size_t nalu_size, + BufferWriter* output_buffer) { + DCHECK(nalu_ptr); + DCHECK(output_buffer); + + if (!nalu_size) + return; // Edge case. + + uint8 nalu_type = *nalu_ptr & 0x0f; + switch (nalu_type) { + case H264NALU::kSPS: + // Grab SPS NALU. + last_sps_.assign(nalu_ptr, nalu_ptr + nalu_size); + return; + case H264NALU::kPPS: + // Grab PPS NALU. + last_pps_.assign(nalu_ptr, nalu_ptr + nalu_size); + return; + case H264NALU::kAUD: + // Ignore AUD NALU. + return; + default: + // Copy all other NALUs. + break; + } + + // Append 4-byte length and NAL unit data to the buffer. + output_buffer->AppendInt(static_cast(nalu_size)); + output_buffer->AppendArray(nalu_ptr, nalu_size); +} + +bool H264ByteToUnitStreamConverter::GetAVCDecoderConfigurationRecord( + std::vector* decoder_config) { + DCHECK(decoder_config); + + if ((last_sps_.size() < 4) || last_pps_.empty()) { + // No data available to construct AVCDecoderConfigurationRecord. + return false; + } + + // Construct an AVCDecoderConfigurationRecord containing a single SPS and a + // single PPS NALU. Please refer to ISO/IEC 14496-15 for format specifics. + BufferWriter buffer(last_sps_.size() + last_pps_.size() + 11); + uint8 version(1); + buffer.AppendInt(version); + buffer.AppendInt(last_sps_[1]); + buffer.AppendInt(last_sps_[2]); + buffer.AppendInt(last_sps_[3]); + uint8 reserved_and_length_size_minus_one(0xff); + buffer.AppendInt(reserved_and_length_size_minus_one); + uint8 reserved_and_num_sps(0xe1); + buffer.AppendInt(reserved_and_num_sps); + buffer.AppendInt(static_cast(last_sps_.size())); + buffer.AppendVector(last_sps_); + uint8 num_pps(1); + buffer.AppendInt(num_pps); + buffer.AppendInt(static_cast(last_pps_.size())); + buffer.AppendVector(last_pps_); + buffer.SwapBuffer(decoder_config); + + return true; +} + +} // namespace media diff --git a/media/filters/h264_byte_to_unit_stream_converter.h b/media/filters/h264_byte_to_unit_stream_converter.h new file mode 100644 index 0000000000..87c28fe450 --- /dev/null +++ b/media/filters/h264_byte_to_unit_stream_converter.h @@ -0,0 +1,57 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_ +#define MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_ + +#include "base/basictypes.h" + +#include + +namespace media { + +class BufferWriter; + +/// Class which converts H.264 byte streams (as specified in ISO/IEC 14496-10 +/// Annex B) into H.264 NAL unit streams (as specified in ISO/IEC 14496-15). +class H264ByteToUnitStreamConverter { + public: + static const size_t kUnitStreamNaluLengthSize = 4; + + H264ByteToUnitStreamConverter(); + ~H264ByteToUnitStreamConverter(); + + /// Converts a whole AVC byte stream encoded video frame to NAL unit stream + /// format. + /// @param input_frame is a buffer containing a whole H.264 frame in byte + /// stream format. + /// @param input_frame_size is the size of the H.264 frame, in bytes. + /// @param output_frame is a pointer to a vector which will receive the + /// converted frame. + /// @return true if successful, false otherwise. + bool ConvertByteStreamToNalUnitStream(const uint8* input_frame, + size_t input_frame_size, + std::vector* output_frame); + + /// Synthesizes an AVCDecoderConfigurationRecord from the SPS and PPS NAL + /// units extracted from the AVC byte stream. + /// @param decoder_config is a pointer to a vector, which on successful + /// return will contain the computed AVCDecoderConfigurationRecord. + /// @return true if successful, or false otherwise. + bool GetAVCDecoderConfigurationRecord(std::vector* decoder_config); + + private: + void ProcessNalu(const uint8* nalu_ptr, + size_t nalu_size, + BufferWriter* output_buffer); + + std::vector last_sps_; + std::vector last_pps_; +}; + +} // namespace media + +#endif // MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_ diff --git a/media/filters/h264_byte_to_unit_stream_converter_unittest.cc b/media/filters/h264_byte_to_unit_stream_converter_unittest.cc new file mode 100644 index 0000000000..e62d976db4 --- /dev/null +++ b/media/filters/h264_byte_to_unit_stream_converter_unittest.cc @@ -0,0 +1,61 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "base/strings/string_number_conversions.h" +#include "media/filters/h264_byte_to_unit_stream_converter.h" +#include "media/test/test_data_util.h" +#include "testing/gtest/include/gtest/gtest.h" + +#include + +namespace { +const char kExpectedConfigRecord[] = + "014d400dffe10013274d400da918283e600d418041adb0ad7bdf01010004" + "28de0988"; +} + +namespace media { + +TEST(H264ByteToUnitStreamConverter, ConversionSuccess) { + std::vector input_frame = + ReadTestDataFile("avc-byte-stream-frame.h264"); + ASSERT_FALSE(input_frame.empty()); + + std::vector expected_output_frame = + ReadTestDataFile("avc-unit-stream-frame.h264"); + ASSERT_FALSE(expected_output_frame.empty()); + + H264ByteToUnitStreamConverter converter; + std::vector output_frame; + ASSERT_TRUE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(), + input_frame.size(), + &output_frame)); + EXPECT_EQ(expected_output_frame, output_frame); + + std::vector expected_decoder_config; + ASSERT_TRUE(base::HexStringToBytes(kExpectedConfigRecord, + &expected_decoder_config)); + std::vector decoder_config; + ASSERT_TRUE(converter.GetAVCDecoderConfigurationRecord(&decoder_config)); + EXPECT_EQ(expected_decoder_config, decoder_config); +} + +TEST(H264ByteToUnitStreamConverter, ConversionFailure) { + std::vector input_frame(100, 0); + + H264ByteToUnitStreamConverter converter; + std::vector output_frame; + EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(), + 0, + &output_frame)); + EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(), + input_frame.size(), + &output_frame)); + std::vector decoder_config; + EXPECT_FALSE(converter.GetAVCDecoderConfigurationRecord(&decoder_config)); +} + +} // namespace media diff --git a/media/formats/mp2t/es_parser_h264.cc b/media/formats/mp2t/es_parser_h264.cc index c0aca48c92..b4380b573a 100644 --- a/media/formats/mp2t/es_parser_h264.cc +++ b/media/formats/mp2t/es_parser_h264.cc @@ -11,6 +11,7 @@ #include "media/base/offset_byte_queue.h" #include "media/base/timestamp.h" #include "media/base/video_stream_info.h" +#include "media/filters/h264_byte_to_unit_stream_converter.h" #include "media/filters/h264_parser.h" #include "media/formats/mp2t/mp2t_common.h" @@ -38,7 +39,10 @@ EsParserH264::EsParserH264( es_queue_(new media::OffsetByteQueue()), h264_parser_(new H264Parser()), current_access_unit_pos_(0), - next_access_unit_pos_(0) { + next_access_unit_pos_(0), + stream_converter_(new H264ByteToUnitStreamConverter), + decoder_config_check_pending_(false), + pending_sample_duration_(0) { } EsParserH264::~EsParserH264() { @@ -72,14 +76,22 @@ bool EsParserH264::Parse(const uint8* buf, int size, int64 pts, int64 dts) { void EsParserH264::Flush() { DVLOG(1) << "EsParserH264::Flush"; - if (!FindAUD(¤t_access_unit_pos_)) - return; - // Simulate an additional AUD to force emitting the last access unit - // which is assumed to be complete at this point. - uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 }; - es_queue_->Push(aud, sizeof(aud)); - ParseInternal(); + if (FindAUD(¤t_access_unit_pos_)) { + // Simulate an additional AUD to force emitting the last access unit + // which is assumed to be complete at this point. + uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 }; + es_queue_->Push(aud, sizeof(aud)); + ParseInternal(); + } + + if (pending_sample_) { + // Flush pending sample. + DCHECK(pending_sample_duration_); + pending_sample_->set_duration(pending_sample_duration_); + emit_sample_cb_.Run(pid(), pending_sample_); + pending_sample_ = scoped_refptr(); + } } void EsParserH264::Reset() { @@ -90,6 +102,9 @@ void EsParserH264::Reset() { next_access_unit_pos_ = 0; timing_desc_list_.clear(); last_video_decoder_config_ = scoped_refptr(); + decoder_config_check_pending_ = false; + pending_sample_ = scoped_refptr(); + pending_sample_duration_ = 0; } bool EsParserH264::FindAUD(int64* stream_pos) { @@ -189,6 +204,7 @@ bool EsParserH264::ParseInternal() { int sps_id; if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk) return false; + decoder_config_check_pending_ = true; break; } case H264NALU::kPPS: { @@ -196,6 +212,7 @@ bool EsParserH264::ParseInternal() { int pps_id; if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) return false; + decoder_config_check_pending_ = true; break; } case H264NALU::kIDRSlice: @@ -242,23 +259,6 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size, if (current_timing_desc.pts == kNoTimestamp) return false; - // Update the video decoder configuration if needed. - const H264PPS* pps = h264_parser_->GetPPS(pps_id); - if (!pps) { - // Only accept an invalid PPS at the beginning when the stream - // does not necessarily start with an SPS/PPS/IDR. - // In this case, the initial frames are conveyed to the upper layer with - // an invalid VideoDecoderConfig and it's up to the upper layer - // to process this kind of frame accordingly. - if (last_video_decoder_config_) - return false; - } else { - const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id); - if (!sps) - return false; - RCHECK(UpdateVideoDecoderConfig(sps)); - } - // Emit a frame. DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_ << " size=" << access_unit_size; @@ -267,25 +267,67 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size, es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size); CHECK_GE(es_size, access_unit_size); - // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId - // type and allow multiple video tracks. See https://crbug.com/341581. - scoped_refptr media_sample = - MediaSample::CopyFrom( - es, - access_unit_size, - is_key_frame); + // Convert frame to unit stream format. + std::vector converted_frame; + if (!stream_converter_->ConvertByteStreamToNalUnitStream( + es, access_unit_size, &converted_frame)) { + DLOG(ERROR) << "Failure to convert video frame to unit stream format."; + return false; + } + + if (decoder_config_check_pending_) { + // Update the video decoder configuration if needed. + const H264PPS* pps = h264_parser_->GetPPS(pps_id); + if (!pps) { + // Only accept an invalid PPS at the beginning when the stream + // does not necessarily start with an SPS/PPS/IDR. + // In this case, the initial frames are conveyed to the upper layer with + // an invalid VideoDecoderConfig and it's up to the upper layer + // to process this kind of frame accordingly. + if (last_video_decoder_config_) + return false; + } else { + const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id); + if (!sps) + return false; + RCHECK(UpdateVideoDecoderConfig(sps)); + decoder_config_check_pending_ = false; + } + } + + // Create the media sample, emitting always the previous sample after + // calculating its duration. + scoped_refptr media_sample = MediaSample::CopyFrom( + converted_frame.data(), converted_frame.size(), is_key_frame); media_sample->set_dts(current_timing_desc.dts); media_sample->set_pts(current_timing_desc.pts); - emit_sample_cb_.Run(pid(), media_sample); + if (pending_sample_) { + DCHECK_GT(media_sample->dts(), pending_sample_->dts()); + pending_sample_duration_ = media_sample->dts() - pending_sample_->dts(); + pending_sample_->set_duration(pending_sample_duration_); + emit_sample_cb_.Run(pid(), pending_sample_); + } + pending_sample_ = media_sample; + return true; } bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { - // TODO(tinskip): Generate an error if video configuration change is detected. + std::vector decoder_config_record; + if (!stream_converter_->GetAVCDecoderConfigurationRecord( + &decoder_config_record)) { + DLOG(ERROR) << "Failure to construct an AVCDecoderConfigurationRecord"; + return false; + } + if (last_video_decoder_config_) { - // Varying video configurations currently not supported. Just assume that - // the video configuration has not changed. - return true; + // Verify that the video decoder config has not changed. + if (last_video_decoder_config_->extra_data() == decoder_config_record) { + // Video configuration has not changed. + return true; + } + NOTIMPLEMENTED() << "Varying video configurations are not supported."; + return false; } // TODO(damienv): a MAP unit can be either 16 or 32 pixels. @@ -299,13 +341,16 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { kMpeg2Timescale, kInfiniteDuration, kCodecH264, - std::string(), // TODO(tinskip): calculate codec string. + VideoStreamInfo::GetCodecString(kCodecH264, + decoder_config_record[1], + decoder_config_record[2], + decoder_config_record[3]), std::string(), width, height, - kCommonNaluLengthSize, - NULL, // TODO(tinskip): calculate AVCDecoderConfigurationRecord. - 0, + H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize, + decoder_config_record.data(), + decoder_config_record.size(), false)); DVLOG(1) << "Profile IDC: " << sps->profile_idc; DVLOG(1) << "Level IDC: " << sps->level_idc; diff --git a/media/formats/mp2t/es_parser_h264.h b/media/formats/mp2t/es_parser_h264.h index 11bb8c4393..8f5ab7f85c 100644 --- a/media/formats/mp2t/es_parser_h264.h +++ b/media/formats/mp2t/es_parser_h264.h @@ -16,6 +16,7 @@ namespace media { +class H264ByteToUnitStreamConverter; class H264Parser; class OffsetByteQueue; struct H264SPS; @@ -83,8 +84,16 @@ class EsParserH264 : public EsParser { int64 current_access_unit_pos_; int64 next_access_unit_pos_; + // Filter to convert H.264 Annex B byte stream to unit stream. + scoped_ptr stream_converter_; + // Last video decoder config. scoped_refptr last_video_decoder_config_; + bool decoder_config_check_pending_; + + // Frame for which we do not yet have a duration. + scoped_refptr pending_sample_; + uint64 pending_sample_duration_; }; } // namespace mp2t diff --git a/media/formats/mp2t/mp2t_media_parser_unittest.cc b/media/formats/mp2t/mp2t_media_parser_unittest.cc index 6d4e64d620..34726cc869 100644 --- a/media/formats/mp2t/mp2t_media_parser_unittest.cc +++ b/media/formats/mp2t/mp2t_media_parser_unittest.cc @@ -126,7 +126,7 @@ class Mp2tMediaParserTest : public testing::Test { TEST_F(Mp2tMediaParserTest, UnalignedAppend17) { // Test small, non-segment-aligned appends. ParseMpeg2TsFile("bear-1280x720.ts", 17); - EXPECT_EQ(video_frame_count_, 81); + EXPECT_EQ(video_frame_count_, 80); parser_->Flush(); EXPECT_EQ(video_frame_count_, 82); } @@ -134,7 +134,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend17) { TEST_F(Mp2tMediaParserTest, UnalignedAppend512) { // Test small, non-segment-aligned appends. ParseMpeg2TsFile("bear-1280x720.ts", 512); - EXPECT_EQ(video_frame_count_, 81); + EXPECT_EQ(video_frame_count_, 80); parser_->Flush(); EXPECT_EQ(video_frame_count_, 82); } @@ -145,7 +145,8 @@ TEST_F(Mp2tMediaParserTest, TimestampWrapAround) { // (close to 2^33 / 90000) which results in timestamps wrap around // in the Mpeg2 TS stream. ParseMpeg2TsFile("bear-1280x720_ptswraparound.ts", 512); - EXPECT_EQ(video_frame_count_, 81); + parser_->Flush(); + EXPECT_EQ(video_frame_count_, 82); EXPECT_GE(video_min_dts_, (95443 - 1) * kMpeg2Timescale); EXPECT_LE(video_max_dts_, static_cast((95443 + 4)) * kMpeg2Timescale); diff --git a/media/test/data/README b/media/test/data/README index 105c8dbac4..e3e8699710 100644 --- a/media/test/data/README +++ b/media/test/data/README @@ -74,3 +74,6 @@ bear.h264: bear.mp4 (https://chromiumcodereview.appspot.com/10805089): ffmpeg -i bear.mp4 -vcodec copy -vbsf h264_mp4toannexb \ -an bear.h264 + +avc-byte-stream-frame.h264 - Single IDR frame extracted from test-25fps.h264 in Annex B byte stream format. +avc-unit-stream-frame.h264 - Single IDR frame from avc-byte-stream-frame.h264 converted to unit stream format. diff --git a/media/test/data/avc-byte-stream-frame.h264 b/media/test/data/avc-byte-stream-frame.h264 new file mode 100644 index 0000000000..0a91e6fa43 Binary files /dev/null and b/media/test/data/avc-byte-stream-frame.h264 differ diff --git a/media/test/data/avc-unit-stream-frame.h264 b/media/test/data/avc-unit-stream-frame.h264 new file mode 100644 index 0000000000..ab7a09492a Binary files /dev/null and b/media/test/data/avc-unit-stream-frame.h264 differ