Implemented H.264 byte stream to unit stream conversion and other components

needed for MPEG-2 TS h.264 video demux and transmux. Change-Id: I878cdd141140cfd6833d75c7133301b1d65f1da0
2014-04-17 18:57:31 -07:00 · 2014-04-17 18:57:31 -07:00 · 67bdd89ba2
parent 4a39a0fc98
commit 67bdd89ba2
11 changed files with 356 additions and 44 deletions
--- a/media/base/buffer_writer.h
+++ b/media/base/buffer_writer.h
@ -50,6 +50,8 @@ class BufferWriter {
  void AppendBuffer(const BufferWriter& buffer);
  void Swap(BufferWriter* buffer) { buf_.swap(buffer->buf_); }
  void SwapBuffer(std::vector<uint8>* buffer) { buf_.swap(*buffer); }
  void Clear() { buf_.clear(); }
  size_t Size() const { return buf_.size(); }
  /// @return Underlying buffer. Behavior is undefined if the buffer size is 0.
--- a/media/filters/filters.gyp
+++ b/media/filters/filters.gyp
@ -21,6 +21,8 @@
      'sources': [
        'h264_bit_reader.cc',
        'h264_bit_reader.h',
        'h264_byte_to_unit_stream_converter.cc',
        'h264_byte_to_unit_stream_converter.h',
        'h264_parser.cc',
        'h264_parser.h',
      ],
@ -33,6 +35,7 @@
      'type': '<(gtest_target_type)',
      'sources': [
        'h264_bit_reader_unittest.cc',
        'h264_byte_to_unit_stream_converter_unittest.cc',
        'h264_parser_unittest.cc',
      ],
      'dependencies': [
--- a/media/filters/h264_byte_to_unit_stream_converter.cc
+++ b/media/filters/h264_byte_to_unit_stream_converter.cc
@ -0,0 +1,131 @@
 // Copyright 2014 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #include "media/filters/h264_byte_to_unit_stream_converter.h"
 #include "base/logging.h"
 #include "media/base/buffer_writer.h"
 #include "media/filters/h264_parser.h"
 namespace media {
 namespace {
 // Additional space to reserve for output frame. This value ought to be enough
 // to acommodate frames consisting of 100 NAL units with 3-byte start codes.
 const size_t kStreamConversionOverhead = 100;
 }
 H264ByteToUnitStreamConverter::H264ByteToUnitStreamConverter() {}
 H264ByteToUnitStreamConverter::~H264ByteToUnitStreamConverter() {}
 bool H264ByteToUnitStreamConverter::ConvertByteStreamToNalUnitStream(
    const uint8* input_frame,
    size_t input_frame_size,
    std::vector<uint8>* output_frame) {
  DCHECK(input_frame);
  DCHECK(output_frame);
  BufferWriter output_buffer(input_frame_size + kStreamConversionOverhead);
  const uint8* input_ptr(input_frame);
  const uint8* input_end(input_ptr + input_frame_size);
  off_t next_start_code_offset;
  off_t next_start_code_size;
  bool first_nalu(true);
  while (H264Parser::FindStartCode(input_ptr,
                                   input_end - input_ptr,
                                   &next_start_code_offset,
                                   &next_start_code_size)) {
    if (first_nalu) {
      if (next_start_code_offset != 0) {
        LOG(ERROR) << "H.264 byte stream frame did not begin with start code.";
        return false;
      }
      first_nalu = false;
    } else {
      ProcessNalu(input_ptr, next_start_code_offset, &output_buffer);
    }
    input_ptr += next_start_code_offset + next_start_code_size;
  }
  if (first_nalu) {
    LOG(ERROR) << "H.264 byte stream frame did not contain start codes.";
    return false;
  } else {
    ProcessNalu(input_ptr, input_end - input_ptr, &output_buffer);
  }
  output_buffer.SwapBuffer(output_frame);
  return true;
 }
 void H264ByteToUnitStreamConverter::ProcessNalu(
    const uint8* nalu_ptr,
    size_t nalu_size,
    BufferWriter* output_buffer) {
  DCHECK(nalu_ptr);
  DCHECK(output_buffer);
  if (!nalu_size)
    return;  // Edge case.
  uint8 nalu_type = *nalu_ptr & 0x0f;
  switch (nalu_type) {
    case H264NALU::kSPS:
      // Grab SPS NALU.
      last_sps_.assign(nalu_ptr, nalu_ptr + nalu_size);
      return;
    case H264NALU::kPPS:
      // Grab PPS NALU.
      last_pps_.assign(nalu_ptr, nalu_ptr + nalu_size);
      return;
    case H264NALU::kAUD:
      // Ignore AUD NALU.
      return;
    default:
      // Copy all other NALUs.
      break;
  }
  // Append 4-byte length and NAL unit data to the buffer.
  output_buffer->AppendInt(static_cast<uint32>(nalu_size));
  output_buffer->AppendArray(nalu_ptr, nalu_size);
 }
 bool H264ByteToUnitStreamConverter::GetAVCDecoderConfigurationRecord(
    std::vector<uint8>* decoder_config) {
  DCHECK(decoder_config);
  if ((last_sps_.size() < 4) || last_pps_.empty()) {
    // No data available to construct AVCDecoderConfigurationRecord.
    return false;
  }
  // Construct an AVCDecoderConfigurationRecord containing a single SPS and a
  // single PPS NALU. Please refer to ISO/IEC 14496-15 for format specifics.
  BufferWriter buffer(last_sps_.size() + last_pps_.size() + 11);
  uint8 version(1);
  buffer.AppendInt(version);
  buffer.AppendInt(last_sps_[1]);
  buffer.AppendInt(last_sps_[2]);
  buffer.AppendInt(last_sps_[3]);
  uint8 reserved_and_length_size_minus_one(0xff);
  buffer.AppendInt(reserved_and_length_size_minus_one);
  uint8 reserved_and_num_sps(0xe1);
  buffer.AppendInt(reserved_and_num_sps);
  buffer.AppendInt(static_cast<uint16>(last_sps_.size()));
  buffer.AppendVector(last_sps_);
  uint8 num_pps(1);
  buffer.AppendInt(num_pps);
  buffer.AppendInt(static_cast<uint16>(last_pps_.size()));
  buffer.AppendVector(last_pps_);
  buffer.SwapBuffer(decoder_config);
  return true;
 }
 }  // namespace media
--- a/media/filters/h264_byte_to_unit_stream_converter.h
+++ b/media/filters/h264_byte_to_unit_stream_converter.h
@ -0,0 +1,57 @@
 // Copyright 2014 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #ifndef MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_
 #define MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_
 #include "base/basictypes.h"
 #include <vector>
 namespace media {
 class BufferWriter;
 /// Class which converts H.264 byte streams (as specified in ISO/IEC 14496-10
 /// Annex B) into H.264 NAL unit streams (as specified in ISO/IEC 14496-15).
 class H264ByteToUnitStreamConverter {
 public:
  static const size_t kUnitStreamNaluLengthSize = 4;
  H264ByteToUnitStreamConverter();
  ~H264ByteToUnitStreamConverter();
  /// Converts a whole AVC byte stream encoded video frame to NAL unit stream
  /// format.
  /// @param input_frame is a buffer containing a whole H.264 frame in byte
  ///        stream format.
  /// @param input_frame_size is the size of the H.264 frame, in bytes.
  /// @param output_frame is a pointer to a vector which will receive the
  ///        converted frame.
  /// @return true if successful, false otherwise.
  bool ConvertByteStreamToNalUnitStream(const uint8* input_frame,
                                        size_t input_frame_size,
                                        std::vector<uint8>* output_frame);
  /// Synthesizes an AVCDecoderConfigurationRecord from the SPS and PPS NAL
  /// units extracted from the AVC byte stream.
  /// @param decoder_config is a pointer to a vector, which on successful
  ///        return will contain the computed AVCDecoderConfigurationRecord.
  /// @return true if successful, or false otherwise.
  bool GetAVCDecoderConfigurationRecord(std::vector<uint8>* decoder_config);
 private:
  void ProcessNalu(const uint8* nalu_ptr,
                   size_t nalu_size,
                   BufferWriter* output_buffer);
  std::vector<uint8> last_sps_;
  std::vector<uint8> last_pps_;
 };
 }  // namespace media
 #endif  // MEDIA_FILTERS_H264_BYTE_TO_UNIT_STREAM_CONVERTER_H_
--- a/media/filters/h264_byte_to_unit_stream_converter_unittest.cc
+++ b/media/filters/h264_byte_to_unit_stream_converter_unittest.cc
@ -0,0 +1,61 @@
 // Copyright 2014 Google Inc. All rights reserved.
 //
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file or at
 // https://developers.google.com/open-source/licenses/bsd
 #include "base/strings/string_number_conversions.h"
 #include "media/filters/h264_byte_to_unit_stream_converter.h"
 #include "media/test/test_data_util.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include <stdio.h>
 namespace {
 const char kExpectedConfigRecord[] =
    "014d400dffe10013274d400da918283e600d418041adb0ad7bdf01010004"
    "28de0988";
 }
 namespace media {
 TEST(H264ByteToUnitStreamConverter, ConversionSuccess) {
  std::vector<uint8> input_frame =
      ReadTestDataFile("avc-byte-stream-frame.h264");
  ASSERT_FALSE(input_frame.empty());
  std::vector<uint8> expected_output_frame =
      ReadTestDataFile("avc-unit-stream-frame.h264");
  ASSERT_FALSE(expected_output_frame.empty());
  H264ByteToUnitStreamConverter converter;
  std::vector<uint8> output_frame;
  ASSERT_TRUE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
                                                         input_frame.size(),
                                                         &output_frame));
  EXPECT_EQ(expected_output_frame, output_frame);
  std::vector<uint8> expected_decoder_config;
  ASSERT_TRUE(base::HexStringToBytes(kExpectedConfigRecord,
                                     &expected_decoder_config));
  std::vector<uint8> decoder_config;
  ASSERT_TRUE(converter.GetAVCDecoderConfigurationRecord(&decoder_config));
  EXPECT_EQ(expected_decoder_config, decoder_config);
 }
 TEST(H264ByteToUnitStreamConverter, ConversionFailure) {
  std::vector<uint8> input_frame(100, 0);
  H264ByteToUnitStreamConverter converter;
  std::vector<uint8> output_frame;
  EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
                                                          0,
                                                          &output_frame));
  EXPECT_FALSE(converter.ConvertByteStreamToNalUnitStream(input_frame.data(),
                                                          input_frame.size(),
                                                          &output_frame));
  std::vector<uint8> decoder_config;
  EXPECT_FALSE(converter.GetAVCDecoderConfigurationRecord(&decoder_config));
 }
 }  // namespace media
--- a/media/formats/mp2t/es_parser_h264.cc
+++ b/media/formats/mp2t/es_parser_h264.cc
@ -11,6 +11,7 @@
 #include "media/base/offset_byte_queue.h"
 #include "media/base/timestamp.h"
 #include "media/base/video_stream_info.h"
 #include "media/filters/h264_byte_to_unit_stream_converter.h"
 #include "media/filters/h264_parser.h"
 #include "media/formats/mp2t/mp2t_common.h"
@ -38,7 +39,10 @@ EsParserH264::EsParserH264(
      es_queue_(new media::OffsetByteQueue()),
      h264_parser_(new H264Parser()),
      current_access_unit_pos_(0),
-      next_access_unit_pos_(0) {
+      next_access_unit_pos_(0),
      stream_converter_(new H264ByteToUnitStreamConverter),
      decoder_config_check_pending_(false),
      pending_sample_duration_(0) {
 }
 EsParserH264::~EsParserH264() {
@ -72,14 +76,22 @@ bool EsParserH264::Parse(const uint8* buf, int size, int64 pts, int64 dts) {
 void EsParserH264::Flush() {
  DVLOG(1) << "EsParserH264::Flush";
  if (!FindAUD(&current_access_unit_pos_))
    return;
-  // Simulate an additional AUD to force emitting the last access unit
+  if (FindAUD(&current_access_unit_pos_)) {
-  // which is assumed to be complete at this point.
+    // Simulate an additional AUD to force emitting the last access unit
-  uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 };
+    // which is assumed to be complete at this point.
-  es_queue_->Push(aud, sizeof(aud));
+    uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 };
-  ParseInternal();
+    es_queue_->Push(aud, sizeof(aud));
    ParseInternal();
  }
  if (pending_sample_) {
    // Flush pending sample.
    DCHECK(pending_sample_duration_);
    pending_sample_->set_duration(pending_sample_duration_);
    emit_sample_cb_.Run(pid(), pending_sample_);
    pending_sample_ = scoped_refptr<MediaSample>();
  }
 }
 void EsParserH264::Reset() {
@ -90,6 +102,9 @@ void EsParserH264::Reset() {
  next_access_unit_pos_ = 0;
  timing_desc_list_.clear();
  last_video_decoder_config_ = scoped_refptr<StreamInfo>();
  decoder_config_check_pending_ = false;
  pending_sample_ = scoped_refptr<MediaSample>();
  pending_sample_duration_ = 0;
 }
 bool EsParserH264::FindAUD(int64* stream_pos) {
@ -189,6 +204,7 @@ bool EsParserH264::ParseInternal() {
        int sps_id;
        if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk)
          return false;
        decoder_config_check_pending_ = true;
        break;
      }
      case H264NALU::kPPS: {
@ -196,6 +212,7 @@ bool EsParserH264::ParseInternal() {
        int pps_id;
        if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk)
          return false;
        decoder_config_check_pending_ = true;
        break;
      }
      case H264NALU::kIDRSlice:
@ -242,23 +259,6 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size,
  if (current_timing_desc.pts == kNoTimestamp)
    return false;
  // Update the video decoder configuration if needed.
  const H264PPS* pps = h264_parser_->GetPPS(pps_id);
  if (!pps) {
    // Only accept an invalid PPS at the beginning when the stream
    // does not necessarily start with an SPS/PPS/IDR.
    // In this case, the initial frames are conveyed to the upper layer with
    // an invalid VideoDecoderConfig and it's up to the upper layer
    // to process this kind of frame accordingly.
    if (last_video_decoder_config_)
      return false;
  } else {
    const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
    if (!sps)
      return false;
    RCHECK(UpdateVideoDecoderConfig(sps));
  }
  // Emit a frame.
  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
                      << " size=" << access_unit_size;
@ -267,25 +267,67 @@ bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size,
  es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
  CHECK_GE(es_size, access_unit_size);
-  // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
+  // Convert frame to unit stream format.
-  // type and allow multiple video tracks. See https://crbug.com/341581.
+  std::vector<uint8> converted_frame;
-  scoped_refptr<MediaSample> media_sample =
+  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
-      MediaSample::CopyFrom(
+          es, access_unit_size, &converted_frame)) {
-          es,
+    DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
-          access_unit_size,
+    return false;
-          is_key_frame);
+  }
  if (decoder_config_check_pending_) {
    // Update the video decoder configuration if needed.
    const H264PPS* pps = h264_parser_->GetPPS(pps_id);
    if (!pps) {
      // Only accept an invalid PPS at the beginning when the stream
      // does not necessarily start with an SPS/PPS/IDR.
      // In this case, the initial frames are conveyed to the upper layer with
      // an invalid VideoDecoderConfig and it's up to the upper layer
      // to process this kind of frame accordingly.
      if (last_video_decoder_config_)
        return false;
    } else {
      const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
      if (!sps)
        return false;
      RCHECK(UpdateVideoDecoderConfig(sps));
      decoder_config_check_pending_ = false;
    }
  }
  // Create the media sample, emitting always the previous sample after
  // calculating its duration.
  scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
      converted_frame.data(), converted_frame.size(), is_key_frame);
  media_sample->set_dts(current_timing_desc.dts);
  media_sample->set_pts(current_timing_desc.pts);
-  emit_sample_cb_.Run(pid(), media_sample);
+  if (pending_sample_) {
    DCHECK_GT(media_sample->dts(), pending_sample_->dts());
    pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
    pending_sample_->set_duration(pending_sample_duration_);
    emit_sample_cb_.Run(pid(), pending_sample_);
  }
  pending_sample_ = media_sample;
  return true;
 }
 bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
-  // TODO(tinskip): Generate an error if video configuration change is detected.
+  std::vector<uint8> decoder_config_record;
  if (!stream_converter_->GetAVCDecoderConfigurationRecord(
          &decoder_config_record)) {
    DLOG(ERROR) << "Failure to construct an AVCDecoderConfigurationRecord";
    return false;
  }
  if (last_video_decoder_config_) {
-    // Varying video configurations currently not supported. Just assume that
+    // Verify that the video decoder config has not changed.
-    // the video configuration has not changed.
+    if (last_video_decoder_config_->extra_data() == decoder_config_record) {
-    return true;
+      // Video configuration has not changed.
      return true;
    }
    NOTIMPLEMENTED() << "Varying video configurations are not supported.";
    return false;
  }
  // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
@ -299,13 +341,16 @@ bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
          kMpeg2Timescale,
          kInfiniteDuration,
          kCodecH264,
-          std::string(),  // TODO(tinskip): calculate codec string.
+          VideoStreamInfo::GetCodecString(kCodecH264,
                                          decoder_config_record[1],
                                          decoder_config_record[2],
                                          decoder_config_record[3]),
          std::string(),
          width,
          height,
-          kCommonNaluLengthSize,
+          H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
-          NULL,  // TODO(tinskip): calculate AVCDecoderConfigurationRecord.
+          decoder_config_record.data(),
-          0,
+          decoder_config_record.size(),
          false));
  DVLOG(1) << "Profile IDC: " << sps->profile_idc;
  DVLOG(1) << "Level IDC: " << sps->level_idc;
--- a/media/formats/mp2t/es_parser_h264.h
+++ b/media/formats/mp2t/es_parser_h264.h
@ -16,6 +16,7 @@
 namespace media {
 class H264ByteToUnitStreamConverter;
 class H264Parser;
 class OffsetByteQueue;
 struct H264SPS;
@ -83,8 +84,16 @@ class EsParserH264 : public EsParser {
  int64 current_access_unit_pos_;
  int64 next_access_unit_pos_;
  // Filter to convert H.264 Annex B byte stream to unit stream.
  scoped_ptr<H264ByteToUnitStreamConverter> stream_converter_;
  // Last video decoder config.
  scoped_refptr<StreamInfo> last_video_decoder_config_;
  bool decoder_config_check_pending_;
  // Frame for which we do not yet have a duration.
  scoped_refptr<MediaSample> pending_sample_;
  uint64 pending_sample_duration_;
 };
 }  // namespace mp2t
--- a/media/formats/mp2t/mp2t_media_parser_unittest.cc
+++ b/media/formats/mp2t/mp2t_media_parser_unittest.cc
@ -126,7 +126,7 @@ class Mp2tMediaParserTest : public testing::Test {
 TEST_F(Mp2tMediaParserTest, UnalignedAppend17) {
  // Test small, non-segment-aligned appends.
  ParseMpeg2TsFile("bear-1280x720.ts", 17);
-  EXPECT_EQ(video_frame_count_, 81);
+  EXPECT_EQ(video_frame_count_, 80);
  parser_->Flush();
  EXPECT_EQ(video_frame_count_, 82);
 }
@ -134,7 +134,7 @@ TEST_F(Mp2tMediaParserTest, UnalignedAppend17) {
 TEST_F(Mp2tMediaParserTest, UnalignedAppend512) {
  // Test small, non-segment-aligned appends.
  ParseMpeg2TsFile("bear-1280x720.ts", 512);
-  EXPECT_EQ(video_frame_count_, 81);
+  EXPECT_EQ(video_frame_count_, 80);
  parser_->Flush();
  EXPECT_EQ(video_frame_count_, 82);
 }
@ -145,7 +145,8 @@ TEST_F(Mp2tMediaParserTest, TimestampWrapAround) {
  // (close to 2^33 / 90000) which results in timestamps wrap around
  // in the Mpeg2 TS stream.
  ParseMpeg2TsFile("bear-1280x720_ptswraparound.ts", 512);
-  EXPECT_EQ(video_frame_count_, 81);
+  parser_->Flush();
  EXPECT_EQ(video_frame_count_, 82);
  EXPECT_GE(video_min_dts_, (95443 - 1) * kMpeg2Timescale);
  EXPECT_LE(video_max_dts_,
            static_cast<int64>((95443 + 4)) * kMpeg2Timescale);
--- a/media/test/data/README
+++ b/media/test/data/README
@ -74,3 +74,6 @@ bear.h264:
  bear.mp4 (https://chromiumcodereview.appspot.com/10805089):
  ffmpeg -i bear.mp4 -vcodec copy -vbsf h264_mp4toannexb \
      -an bear.h264
 avc-byte-stream-frame.h264 - Single IDR frame extracted from test-25fps.h264 in Annex B byte stream format.
 avc-unit-stream-frame.h264 - Single IDR frame from avc-byte-stream-frame.h264 converted to unit stream format.
--- a/media/test/data/avc-byte-stream-frame.h264
+++ b/media/test/data/avc-byte-stream-frame.h264
--- a/media/test/data/avc-unit-stream-frame.h264
+++ b/media/test/data/avc-unit-stream-frame.h264