From 0ad332896e9d396b33b5a68f384f409c1ed50407 Mon Sep 17 00:00:00 2001 From: Thomas Inskip Date: Mon, 31 Mar 2014 18:34:59 -0700 Subject: [PATCH] Check-in of unmodified MPEG-2 and H.264 parsers. Source of media/formats/mp2t: http://src.chromium.org/chrome/trunk/src/media/formats/mp2t@260741 Source of media/filters: http://src.chromium.org/chrome/trunk/src/media/filters@260719 Change-Id: Ib4c72553f0213cb6dd25fa3dcc0367d96cdd094a --- media/filters/h264_bit_reader.cc | 113 ++ media/filters/h264_bit_reader.h | 79 ++ media/filters/h264_bit_reader_unittest.cc | 73 + media/filters/h264_parser.cc | 1264 +++++++++++++++++ media/filters/h264_parser.h | 399 ++++++ media/filters/h264_parser_unittest.cc | 72 + media/formats/mp2t/es_parser.h | 42 + media/formats/mp2t/es_parser_adts.cc | 276 ++++ media/formats/mp2t/es_parser_adts.h | 86 ++ media/formats/mp2t/es_parser_h264.cc | 332 +++++ media/formats/mp2t/es_parser_h264.h | 98 ++ media/formats/mp2t/es_parser_h264_unittest.cc | 261 ++++ media/formats/mp2t/mp2t_common.h | 21 + media/formats/mp2t/mp2t_stream_parser.cc | 622 ++++++++ media/formats/mp2t/mp2t_stream_parser.h | 136 ++ .../mp2t/mp2t_stream_parser_unittest.cc | 187 +++ media/formats/mp2t/ts_packet.cc | 215 +++ media/formats/mp2t/ts_packet.h | 73 + media/formats/mp2t/ts_section.h | 40 + media/formats/mp2t/ts_section_pat.cc | 122 ++ media/formats/mp2t/ts_section_pat.h | 40 + media/formats/mp2t/ts_section_pes.cc | 312 ++++ media/formats/mp2t/ts_section_pes.h | 64 + media/formats/mp2t/ts_section_pmt.cc | 122 ++ media/formats/mp2t/ts_section_pmt.h | 40 + media/formats/mp2t/ts_section_psi.cc | 132 ++ media/formats/mp2t/ts_section_psi.h | 54 + 27 files changed, 5275 insertions(+) create mode 100644 media/filters/h264_bit_reader.cc create mode 100644 media/filters/h264_bit_reader.h create mode 100644 media/filters/h264_bit_reader_unittest.cc create mode 100644 media/filters/h264_parser.cc create mode 100644 media/filters/h264_parser.h create mode 100644 media/filters/h264_parser_unittest.cc create mode 100644 media/formats/mp2t/es_parser.h create mode 100644 media/formats/mp2t/es_parser_adts.cc create mode 100644 media/formats/mp2t/es_parser_adts.h create mode 100644 media/formats/mp2t/es_parser_h264.cc create mode 100644 media/formats/mp2t/es_parser_h264.h create mode 100644 media/formats/mp2t/es_parser_h264_unittest.cc create mode 100644 media/formats/mp2t/mp2t_common.h create mode 100644 media/formats/mp2t/mp2t_stream_parser.cc create mode 100644 media/formats/mp2t/mp2t_stream_parser.h create mode 100644 media/formats/mp2t/mp2t_stream_parser_unittest.cc create mode 100644 media/formats/mp2t/ts_packet.cc create mode 100644 media/formats/mp2t/ts_packet.h create mode 100644 media/formats/mp2t/ts_section.h create mode 100644 media/formats/mp2t/ts_section_pat.cc create mode 100644 media/formats/mp2t/ts_section_pat.h create mode 100644 media/formats/mp2t/ts_section_pes.cc create mode 100644 media/formats/mp2t/ts_section_pes.h create mode 100644 media/formats/mp2t/ts_section_pmt.cc create mode 100644 media/formats/mp2t/ts_section_pmt.h create mode 100644 media/formats/mp2t/ts_section_psi.cc create mode 100644 media/formats/mp2t/ts_section_psi.h diff --git a/media/filters/h264_bit_reader.cc b/media/filters/h264_bit_reader.cc new file mode 100644 index 0000000000..9894d97897 --- /dev/null +++ b/media/filters/h264_bit_reader.cc @@ -0,0 +1,113 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/logging.h" +#include "media/filters/h264_bit_reader.h" + +namespace media { + +H264BitReader::H264BitReader() + : data_(NULL), + bytes_left_(0), + curr_byte_(0), + num_remaining_bits_in_curr_byte_(0), + prev_two_bytes_(0), + emulation_prevention_bytes_(0) {} + +H264BitReader::~H264BitReader() {} + +bool H264BitReader::Initialize(const uint8* data, off_t size) { + DCHECK(data); + + if (size < 1) + return false; + + data_ = data; + bytes_left_ = size; + num_remaining_bits_in_curr_byte_ = 0; + // Initially set to 0xffff to accept all initial two-byte sequences. + prev_two_bytes_ = 0xffff; + emulation_prevention_bytes_ = 0; + + return true; +} + +bool H264BitReader::UpdateCurrByte() { + if (bytes_left_ < 1) + return false; + + // Emulation prevention three-byte detection. + // If a sequence of 0x000003 is found, skip (ignore) the last byte (0x03). + if (*data_ == 0x03 && (prev_two_bytes_ & 0xffff) == 0) { + // Detected 0x000003, skip last byte. + ++data_; + --bytes_left_; + ++emulation_prevention_bytes_; + // Need another full three bytes before we can detect the sequence again. + prev_two_bytes_ = 0xffff; + + if (bytes_left_ < 1) + return false; + } + + // Load a new byte and advance pointers. + curr_byte_ = *data_++ & 0xff; + --bytes_left_; + num_remaining_bits_in_curr_byte_ = 8; + + prev_two_bytes_ = (prev_two_bytes_ << 8) | curr_byte_; + + return true; +} + +// Read |num_bits| (1 to 31 inclusive) from the stream and return them +// in |out|, with first bit in the stream as MSB in |out| at position +// (|num_bits| - 1). +bool H264BitReader::ReadBits(int num_bits, int* out) { + int bits_left = num_bits; + *out = 0; + DCHECK(num_bits <= 31); + + while (num_remaining_bits_in_curr_byte_ < bits_left) { + // Take all that's left in current byte, shift to make space for the rest. + *out |= (curr_byte_ << (bits_left - num_remaining_bits_in_curr_byte_)); + bits_left -= num_remaining_bits_in_curr_byte_; + + if (!UpdateCurrByte()) + return false; + } + + *out |= (curr_byte_ >> (num_remaining_bits_in_curr_byte_ - bits_left)); + *out &= ((1 << num_bits) - 1); + num_remaining_bits_in_curr_byte_ -= bits_left; + + return true; +} + +off_t H264BitReader::NumBitsLeft() { + return (num_remaining_bits_in_curr_byte_ + bytes_left_ * 8); +} + +bool H264BitReader::HasMoreRBSPData() { + // Make sure we have more bits, if we are at 0 bits in current byte + // and updating current byte fails, we don't have more data anyway. + if (num_remaining_bits_in_curr_byte_ == 0 && !UpdateCurrByte()) + return false; + + // On last byte? + if (bytes_left_) + return true; + + // Last byte, look for stop bit; + // We have more RBSP data if the last non-zero bit we find is not the + // first available bit. + return (curr_byte_ & + ((1 << (num_remaining_bits_in_curr_byte_ - 1)) - 1)) != 0; +} + +size_t H264BitReader::NumEmulationPreventionBytesRead() { + return emulation_prevention_bytes_; +} + +} // namespace media diff --git a/media/filters/h264_bit_reader.h b/media/filters/h264_bit_reader.h new file mode 100644 index 0000000000..01cfd74109 --- /dev/null +++ b/media/filters/h264_bit_reader.h @@ -0,0 +1,79 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file contains an implementation of an H264 Annex-B video stream parser. + +#ifndef MEDIA_FILTERS_H264_BIT_READER_H_ +#define MEDIA_FILTERS_H264_BIT_READER_H_ + +#include + +#include "base/basictypes.h" +#include "media/base/media_export.h" + +namespace media { + +// A class to provide bit-granularity reading of H.264 streams. +// This is not a generic bit reader class, as it takes into account +// H.264 stream-specific constraints, such as skipping emulation-prevention +// bytes and stop bits. See spec for more details. +class MEDIA_EXPORT H264BitReader { + public: + H264BitReader(); + ~H264BitReader(); + + // Initialize the reader to start reading at |data|, |size| being size + // of |data| in bytes. + // Return false on insufficient size of stream.. + // TODO(posciak,fischman): consider replacing Initialize() with + // heap-allocating and creating bit readers on demand instead. + bool Initialize(const uint8* data, off_t size); + + // Read |num_bits| next bits from stream and return in |*out|, first bit + // from the stream starting at |num_bits| position in |*out|. + // |num_bits| may be 1-32, inclusive. + // Return false if the given number of bits cannot be read (not enough + // bits in the stream), true otherwise. + bool ReadBits(int num_bits, int* out); + + // Return the number of bits left in the stream. + off_t NumBitsLeft(); + + // See the definition of more_rbsp_data() in spec. + bool HasMoreRBSPData(); + + // Return the number of emulation prevention bytes already read. + size_t NumEmulationPreventionBytesRead(); + + private: + // Advance to the next byte, loading it into curr_byte_. + // Return false on end of stream. + bool UpdateCurrByte(); + + // Pointer to the next unread (not in curr_byte_) byte in the stream. + const uint8* data_; + + // Bytes left in the stream (without the curr_byte_). + off_t bytes_left_; + + // Contents of the current byte; first unread bit starting at position + // 8 - num_remaining_bits_in_curr_byte_ from MSB. + int curr_byte_; + + // Number of bits remaining in curr_byte_ + int num_remaining_bits_in_curr_byte_; + + // Used in emulation prevention three byte detection (see spec). + // Initially set to 0xffff to accept all initial two-byte sequences. + int prev_two_bytes_; + + // Number of emulation preventation bytes (0x000003) we met. + size_t emulation_prevention_bytes_; + + DISALLOW_COPY_AND_ASSIGN(H264BitReader); +}; + +} // namespace media + +#endif // MEDIA_FILTERS_H264_BIT_READER_H_ diff --git a/media/filters/h264_bit_reader_unittest.cc b/media/filters/h264_bit_reader_unittest.cc new file mode 100644 index 0000000000..e12e75ebcd --- /dev/null +++ b/media/filters/h264_bit_reader_unittest.cc @@ -0,0 +1,73 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/filters/h264_bit_reader.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +TEST(H264BitReaderTest, ReadStreamWithoutEscapeAndTrailingZeroBytes) { + H264BitReader reader; + const unsigned char rbsp[] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xa0}; + int dummy = 0; + + EXPECT_TRUE(reader.Initialize(rbsp, sizeof(rbsp))); + + EXPECT_TRUE(reader.ReadBits(1, &dummy)); + EXPECT_EQ(dummy, 0x00); + EXPECT_EQ(reader.NumBitsLeft(), 47); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(8, &dummy)); + EXPECT_EQ(dummy, 0x02); + EXPECT_EQ(reader.NumBitsLeft(), 39); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(31, &dummy)); + EXPECT_EQ(dummy, 0x23456789); + EXPECT_EQ(reader.NumBitsLeft(), 8); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(1, &dummy)); + EXPECT_EQ(dummy, 1); + EXPECT_EQ(reader.NumBitsLeft(), 7); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(1, &dummy)); + EXPECT_EQ(dummy, 0); + EXPECT_EQ(reader.NumBitsLeft(), 6); + EXPECT_FALSE(reader.HasMoreRBSPData()); +} + +TEST(H264BitReaderTest, SingleByteStream) { + H264BitReader reader; + const unsigned char rbsp[] = {0x18}; + int dummy = 0; + + EXPECT_TRUE(reader.Initialize(rbsp, sizeof(rbsp))); + EXPECT_EQ(reader.NumBitsLeft(), 8); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(4, &dummy)); + EXPECT_EQ(dummy, 0x01); + EXPECT_EQ(reader.NumBitsLeft(), 4); + EXPECT_FALSE(reader.HasMoreRBSPData()); +} + +TEST(H264BitReaderTest, StopBitOccupyFullByte) { + H264BitReader reader; + const unsigned char rbsp[] = {0xab, 0x80}; + int dummy = 0; + + EXPECT_TRUE(reader.Initialize(rbsp, sizeof(rbsp))); + EXPECT_EQ(reader.NumBitsLeft(), 16); + EXPECT_TRUE(reader.HasMoreRBSPData()); + + EXPECT_TRUE(reader.ReadBits(8, &dummy)); + EXPECT_EQ(dummy, 0xab); + EXPECT_EQ(reader.NumBitsLeft(), 8); + EXPECT_FALSE(reader.HasMoreRBSPData()); +} + +} // namespace media diff --git a/media/filters/h264_parser.cc b/media/filters/h264_parser.cc new file mode 100644 index 0000000000..4cdc695933 --- /dev/null +++ b/media/filters/h264_parser.cc @@ -0,0 +1,1264 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/logging.h" +#include "base/memory/scoped_ptr.h" +#include "base/stl_util.h" + +#include "media/filters/h264_parser.h" + +namespace media { + +bool H264SliceHeader::IsPSlice() const { + return (slice_type % 5 == kPSlice); +} + +bool H264SliceHeader::IsBSlice() const { + return (slice_type % 5 == kBSlice); +} + +bool H264SliceHeader::IsISlice() const { + return (slice_type % 5 == kISlice); +} + +bool H264SliceHeader::IsSPSlice() const { + return (slice_type % 5 == kSPSlice); +} + +bool H264SliceHeader::IsSISlice() const { + return (slice_type % 5 == kSISlice); +} + +H264NALU::H264NALU() { + memset(this, 0, sizeof(*this)); +} + +H264SPS::H264SPS() { + memset(this, 0, sizeof(*this)); +} + +H264PPS::H264PPS() { + memset(this, 0, sizeof(*this)); +} + +H264SliceHeader::H264SliceHeader() { + memset(this, 0, sizeof(*this)); +} + +H264SEIMessage::H264SEIMessage() { + memset(this, 0, sizeof(*this)); +} + +#define READ_BITS_OR_RETURN(num_bits, out) \ + do { \ + int _out; \ + if (!br_.ReadBits(num_bits, &_out)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + *out = _out; \ + } while (0) + +#define READ_BOOL_OR_RETURN(out) \ + do { \ + int _out; \ + if (!br_.ReadBits(1, &_out)) { \ + DVLOG(1) \ + << "Error in stream: unexpected EOS while trying to read " #out; \ + return kInvalidStream; \ + } \ + *out = _out != 0; \ + } while (0) + +#define READ_UE_OR_RETURN(out) \ + do { \ + if (ReadUE(out) != kOk) { \ + DVLOG(1) << "Error in stream: invalid value while trying to read " #out; \ + return kInvalidStream; \ + } \ + } while (0) + +#define READ_SE_OR_RETURN(out) \ + do { \ + if (ReadSE(out) != kOk) { \ + DVLOG(1) << "Error in stream: invalid value while trying to read " #out; \ + return kInvalidStream; \ + } \ + } while (0) + +#define IN_RANGE_OR_RETURN(val, min, max) \ + do { \ + if ((val) < (min) || (val) > (max)) { \ + DVLOG(1) << "Error in stream: invalid value, expected " #val " to be" \ + << " in range [" << (min) << ":" << (max) << "]" \ + << " found " << (val) << " instead"; \ + return kInvalidStream; \ + } \ + } while (0) + +#define TRUE_OR_RETURN(a) \ + do { \ + if (!(a)) { \ + DVLOG(1) << "Error in stream: invalid value, expected " << #a; \ + return kInvalidStream; \ + } \ + } while (0) + +enum AspectRatioIdc { + kExtendedSar = 255, +}; + +// ISO 14496 part 10 +// VUI parameters: Table E-1 "Meaning of sample aspect ratio indicator" +static const int kTableSarWidth[] = { + 0, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160, 4, 3, 2 +}; +static const int kTableSarHeight[] = { + 0, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99, 3, 2, 1 +}; +COMPILE_ASSERT(arraysize(kTableSarWidth) == arraysize(kTableSarHeight), + sar_tables_must_have_same_size); + +H264Parser::H264Parser() { + Reset(); +} + +H264Parser::~H264Parser() { + STLDeleteValues(&active_SPSes_); + STLDeleteValues(&active_PPSes_); +} + +void H264Parser::Reset() { + stream_ = NULL; + bytes_left_ = 0; +} + +void H264Parser::SetStream(const uint8* stream, off_t stream_size) { + DCHECK(stream); + DCHECK_GT(stream_size, 0); + + stream_ = stream; + bytes_left_ = stream_size; +} + +const H264PPS* H264Parser::GetPPS(int pps_id) { + return active_PPSes_[pps_id]; +} + +const H264SPS* H264Parser::GetSPS(int sps_id) { + return active_SPSes_[sps_id]; +} + +static inline bool IsStartCode(const uint8* data) { + return data[0] == 0x00 && data[1] == 0x00 && data[2] == 0x01; +} + +// static +bool H264Parser::FindStartCode(const uint8* data, off_t data_size, + off_t* offset, off_t* start_code_size) { + DCHECK_GE(data_size, 0); + off_t bytes_left = data_size; + + while (bytes_left >= 3) { + if (IsStartCode(data)) { + // Found three-byte start code, set pointer at its beginning. + *offset = data_size - bytes_left; + *start_code_size = 3; + + // If there is a zero byte before this start code, + // then it's actually a four-byte start code, so backtrack one byte. + if (*offset > 0 && *(data - 1) == 0x00) { + --(*offset); + ++(*start_code_size); + } + + return true; + } + + ++data; + --bytes_left; + } + + // End of data: offset is pointing to the first byte that was not considered + // as a possible start of a start code. + // Note: there is no security issue when receiving a negative |data_size| + // since in this case, |bytes_left| is equal to |data_size| and thus + // |*offset| is equal to 0 (valid offset). + *offset = data_size - bytes_left; + *start_code_size = 0; + return false; +} + +bool H264Parser::LocateNALU(off_t* nalu_size, off_t* start_code_size) { + // Find the start code of next NALU. + off_t nalu_start_off = 0; + off_t annexb_start_code_size = 0; + if (!FindStartCode(stream_, bytes_left_, + &nalu_start_off, &annexb_start_code_size)) { + DVLOG(4) << "Could not find start code, end of stream?"; + return false; + } + + // Move the stream to the beginning of the NALU (pointing at the start code). + stream_ += nalu_start_off; + bytes_left_ -= nalu_start_off; + + const uint8* nalu_data = stream_ + annexb_start_code_size; + off_t max_nalu_data_size = bytes_left_ - annexb_start_code_size; + if (max_nalu_data_size <= 0) { + DVLOG(3) << "End of stream"; + return false; + } + + // Find the start code of next NALU; + // if successful, |nalu_size_without_start_code| is the number of bytes from + // after previous start code to before this one; + // if next start code is not found, it is still a valid NALU since there + // are some bytes left after the first start code: all the remaining bytes + // belong to the current NALU. + off_t next_start_code_size = 0; + off_t nalu_size_without_start_code = 0; + if (!FindStartCode(nalu_data, max_nalu_data_size, + &nalu_size_without_start_code, &next_start_code_size)) { + nalu_size_without_start_code = max_nalu_data_size; + } + *nalu_size = nalu_size_without_start_code + annexb_start_code_size; + *start_code_size = annexb_start_code_size; + return true; +} + +H264Parser::Result H264Parser::ReadUE(int* val) { + int num_bits = -1; + int bit; + int rest; + + // Count the number of contiguous zero bits. + do { + READ_BITS_OR_RETURN(1, &bit); + num_bits++; + } while (bit == 0); + + if (num_bits > 31) + return kInvalidStream; + + // Calculate exp-Golomb code value of size num_bits. + *val = (1 << num_bits) - 1; + + if (num_bits > 0) { + READ_BITS_OR_RETURN(num_bits, &rest); + *val += rest; + } + + return kOk; +} + +H264Parser::Result H264Parser::ReadSE(int* val) { + int ue; + Result res; + + // See Chapter 9 in the spec. + res = ReadUE(&ue); + if (res != kOk) + return res; + + if (ue % 2 == 0) + *val = -(ue / 2); + else + *val = ue / 2 + 1; + + return kOk; +} + +H264Parser::Result H264Parser::AdvanceToNextNALU(H264NALU* nalu) { + off_t start_code_size; + off_t nalu_size_with_start_code; + if (!LocateNALU(&nalu_size_with_start_code, &start_code_size)) { + DVLOG(4) << "Could not find next NALU, bytes left in stream: " + << bytes_left_; + return kEOStream; + } + + nalu->data = stream_ + start_code_size; + nalu->size = nalu_size_with_start_code - start_code_size; + DVLOG(4) << "NALU found: size=" << nalu_size_with_start_code; + + // Initialize bit reader at the start of found NALU. + if (!br_.Initialize(nalu->data, nalu->size)) + return kEOStream; + + // Move parser state to after this NALU, so next time AdvanceToNextNALU + // is called, we will effectively be skipping it; + // other parsing functions will use the position saved + // in bit reader for parsing, so we don't have to remember it here. + stream_ += nalu_size_with_start_code; + bytes_left_ -= nalu_size_with_start_code; + + // Read NALU header, skip the forbidden_zero_bit, but check for it. + int data; + READ_BITS_OR_RETURN(1, &data); + TRUE_OR_RETURN(data == 0); + + READ_BITS_OR_RETURN(2, &nalu->nal_ref_idc); + READ_BITS_OR_RETURN(5, &nalu->nal_unit_type); + + DVLOG(4) << "NALU type: " << static_cast(nalu->nal_unit_type) + << " at: " << reinterpret_cast(nalu->data) + << " size: " << nalu->size + << " ref: " << static_cast(nalu->nal_ref_idc); + + return kOk; +} + +// Default scaling lists (per spec). +static const int kDefault4x4Intra[kH264ScalingList4x4Length] = { + 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42, }; + +static const int kDefault4x4Inter[kH264ScalingList4x4Length] = { + 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, 27, 27, 30, 30, 34, }; + +static const int kDefault8x8Intra[kH264ScalingList8x8Length] = { + 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, 18, 18, 18, 18, 23, + 23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, + 27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, + 31, 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42, }; + +static const int kDefault8x8Inter[kH264ScalingList8x8Length] = { + 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, 19, 19, 19, 19, 21, + 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35, }; + +static inline void DefaultScalingList4x4( + int i, + int scaling_list4x4[][kH264ScalingList4x4Length]) { + DCHECK_LT(i, 6); + + if (i < 3) + memcpy(scaling_list4x4[i], kDefault4x4Intra, sizeof(kDefault4x4Intra)); + else if (i < 6) + memcpy(scaling_list4x4[i], kDefault4x4Inter, sizeof(kDefault4x4Inter)); +} + +static inline void DefaultScalingList8x8( + int i, + int scaling_list8x8[][kH264ScalingList8x8Length]) { + DCHECK_LT(i, 6); + + if (i % 2 == 0) + memcpy(scaling_list8x8[i], kDefault8x8Intra, sizeof(kDefault8x8Intra)); + else + memcpy(scaling_list8x8[i], kDefault8x8Inter, sizeof(kDefault8x8Inter)); +} + +static void FallbackScalingList4x4( + int i, + const int default_scaling_list_intra[], + const int default_scaling_list_inter[], + int scaling_list4x4[][kH264ScalingList4x4Length]) { + static const int kScalingList4x4ByteSize = + sizeof(scaling_list4x4[0][0]) * kH264ScalingList4x4Length; + + switch (i) { + case 0: + memcpy(scaling_list4x4[i], default_scaling_list_intra, + kScalingList4x4ByteSize); + break; + + case 1: + memcpy(scaling_list4x4[i], scaling_list4x4[0], kScalingList4x4ByteSize); + break; + + case 2: + memcpy(scaling_list4x4[i], scaling_list4x4[1], kScalingList4x4ByteSize); + break; + + case 3: + memcpy(scaling_list4x4[i], default_scaling_list_inter, + kScalingList4x4ByteSize); + break; + + case 4: + memcpy(scaling_list4x4[i], scaling_list4x4[3], kScalingList4x4ByteSize); + break; + + case 5: + memcpy(scaling_list4x4[i], scaling_list4x4[4], kScalingList4x4ByteSize); + break; + + default: + NOTREACHED(); + break; + } +} + +static void FallbackScalingList8x8( + int i, + const int default_scaling_list_intra[], + const int default_scaling_list_inter[], + int scaling_list8x8[][kH264ScalingList8x8Length]) { + static const int kScalingList8x8ByteSize = + sizeof(scaling_list8x8[0][0]) * kH264ScalingList8x8Length; + + switch (i) { + case 0: + memcpy(scaling_list8x8[i], default_scaling_list_intra, + kScalingList8x8ByteSize); + break; + + case 1: + memcpy(scaling_list8x8[i], default_scaling_list_inter, + kScalingList8x8ByteSize); + break; + + case 2: + memcpy(scaling_list8x8[i], scaling_list8x8[0], kScalingList8x8ByteSize); + break; + + case 3: + memcpy(scaling_list8x8[i], scaling_list8x8[1], kScalingList8x8ByteSize); + break; + + case 4: + memcpy(scaling_list8x8[i], scaling_list8x8[2], kScalingList8x8ByteSize); + break; + + case 5: + memcpy(scaling_list8x8[i], scaling_list8x8[3], kScalingList8x8ByteSize); + break; + + default: + NOTREACHED(); + break; + } +} + +H264Parser::Result H264Parser::ParseScalingList(int size, + int* scaling_list, + bool* use_default) { + // See chapter 7.3.2.1.1.1. + int last_scale = 8; + int next_scale = 8; + int delta_scale; + + *use_default = false; + + for (int j = 0; j < size; ++j) { + if (next_scale != 0) { + READ_SE_OR_RETURN(&delta_scale); + IN_RANGE_OR_RETURN(delta_scale, -128, 127); + next_scale = (last_scale + delta_scale + 256) & 0xff; + + if (j == 0 && next_scale == 0) { + *use_default = true; + return kOk; + } + } + + scaling_list[j] = (next_scale == 0) ? last_scale : next_scale; + last_scale = scaling_list[j]; + } + + return kOk; +} + +H264Parser::Result H264Parser::ParseSPSScalingLists(H264SPS* sps) { + // See 7.4.2.1.1. + bool seq_scaling_list_present_flag; + bool use_default; + Result res; + + // Parse scaling_list4x4. + for (int i = 0; i < 6; ++i) { + READ_BOOL_OR_RETURN(&seq_scaling_list_present_flag); + + if (seq_scaling_list_present_flag) { + res = ParseScalingList(arraysize(sps->scaling_list4x4[i]), + sps->scaling_list4x4[i], + &use_default); + if (res != kOk) + return res; + + if (use_default) + DefaultScalingList4x4(i, sps->scaling_list4x4); + + } else { + FallbackScalingList4x4( + i, kDefault4x4Intra, kDefault4x4Inter, sps->scaling_list4x4); + } + } + + // Parse scaling_list8x8. + for (int i = 0; i < ((sps->chroma_format_idc != 3) ? 2 : 6); ++i) { + READ_BOOL_OR_RETURN(&seq_scaling_list_present_flag); + + if (seq_scaling_list_present_flag) { + res = ParseScalingList(arraysize(sps->scaling_list8x8[i]), + sps->scaling_list8x8[i], + &use_default); + if (res != kOk) + return res; + + if (use_default) + DefaultScalingList8x8(i, sps->scaling_list8x8); + + } else { + FallbackScalingList8x8( + i, kDefault8x8Intra, kDefault8x8Inter, sps->scaling_list8x8); + } + } + + return kOk; +} + +H264Parser::Result H264Parser::ParsePPSScalingLists(const H264SPS& sps, + H264PPS* pps) { + // See 7.4.2.2. + bool pic_scaling_list_present_flag; + bool use_default; + Result res; + + for (int i = 0; i < 6; ++i) { + READ_BOOL_OR_RETURN(&pic_scaling_list_present_flag); + + if (pic_scaling_list_present_flag) { + res = ParseScalingList(arraysize(pps->scaling_list4x4[i]), + pps->scaling_list4x4[i], + &use_default); + if (res != kOk) + return res; + + if (use_default) + DefaultScalingList4x4(i, pps->scaling_list4x4); + + } else { + if (sps.seq_scaling_matrix_present_flag) { + // Table 7-2 fallback rule A in spec. + FallbackScalingList4x4( + i, kDefault4x4Intra, kDefault4x4Inter, pps->scaling_list4x4); + } else { + // Table 7-2 fallback rule B in spec. + FallbackScalingList4x4(i, + sps.scaling_list4x4[0], + sps.scaling_list4x4[3], + pps->scaling_list4x4); + } + } + } + + if (pps->transform_8x8_mode_flag) { + for (int i = 0; i < ((sps.chroma_format_idc != 3) ? 2 : 6); ++i) { + READ_BOOL_OR_RETURN(&pic_scaling_list_present_flag); + + if (pic_scaling_list_present_flag) { + res = ParseScalingList(arraysize(pps->scaling_list8x8[i]), + pps->scaling_list8x8[i], + &use_default); + if (res != kOk) + return res; + + if (use_default) + DefaultScalingList8x8(i, pps->scaling_list8x8); + + } else { + if (sps.seq_scaling_matrix_present_flag) { + // Table 7-2 fallback rule A in spec. + FallbackScalingList8x8( + i, kDefault8x8Intra, kDefault8x8Inter, pps->scaling_list8x8); + } else { + // Table 7-2 fallback rule B in spec. + FallbackScalingList8x8(i, + sps.scaling_list8x8[0], + sps.scaling_list8x8[1], + pps->scaling_list8x8); + } + } + } + } + return kOk; +} + +H264Parser::Result H264Parser::ParseAndIgnoreHRDParameters( + bool* hrd_parameters_present) { + int data; + READ_BOOL_OR_RETURN(&data); // {nal,vcl}_hrd_parameters_present_flag + if (!data) + return kOk; + + *hrd_parameters_present = true; + + int cpb_cnt_minus1; + READ_UE_OR_RETURN(&cpb_cnt_minus1); + IN_RANGE_OR_RETURN(cpb_cnt_minus1, 0, 31); + READ_BITS_OR_RETURN(8, &data); // bit_rate_scale, cpb_size_scale + for (int i = 0; i <= cpb_cnt_minus1; ++i) { + READ_UE_OR_RETURN(&data); // bit_rate_value_minus1[i] + READ_UE_OR_RETURN(&data); // cpb_size_value_minus1[i] + READ_BOOL_OR_RETURN(&data); // cbr_flag + } + READ_BITS_OR_RETURN(20, &data); // cpb/dpb delays, etc. + + return kOk; +} + +H264Parser::Result H264Parser::ParseVUIParameters(H264SPS* sps) { + bool aspect_ratio_info_present_flag; + READ_BOOL_OR_RETURN(&aspect_ratio_info_present_flag); + if (aspect_ratio_info_present_flag) { + int aspect_ratio_idc; + READ_BITS_OR_RETURN(8, &aspect_ratio_idc); + if (aspect_ratio_idc == kExtendedSar) { + READ_BITS_OR_RETURN(16, &sps->sar_width); + READ_BITS_OR_RETURN(16, &sps->sar_height); + } else { + const int max_aspect_ratio_idc = arraysize(kTableSarWidth) - 1; + IN_RANGE_OR_RETURN(aspect_ratio_idc, 0, max_aspect_ratio_idc); + sps->sar_width = kTableSarWidth[aspect_ratio_idc]; + sps->sar_height = kTableSarHeight[aspect_ratio_idc]; + } + } + + int data; + // Read and ignore overscan and video signal type info. + READ_BOOL_OR_RETURN(&data); // overscan_info_present_flag + if (data) + READ_BOOL_OR_RETURN(&data); // overscan_appropriate_flag + + READ_BOOL_OR_RETURN(&data); // video_signal_type_present_flag + if (data) { + READ_BITS_OR_RETURN(3, &data); // video_format + READ_BOOL_OR_RETURN(&data); // video_full_range_flag + READ_BOOL_OR_RETURN(&data); // colour_description_present_flag + if (data) + READ_BITS_OR_RETURN(24, &data); // color description syntax elements + } + + READ_BOOL_OR_RETURN(&data); // chroma_loc_info_present_flag + if (data) { + READ_UE_OR_RETURN(&data); // chroma_sample_loc_type_top_field + READ_UE_OR_RETURN(&data); // chroma_sample_loc_type_bottom_field + } + + // Read and ignore timing info. + READ_BOOL_OR_RETURN(&data); // timing_info_present_flag + if (data) { + READ_BITS_OR_RETURN(16, &data); // num_units_in_tick + READ_BITS_OR_RETURN(16, &data); // num_units_in_tick + READ_BITS_OR_RETURN(16, &data); // time_scale + READ_BITS_OR_RETURN(16, &data); // time_scale + READ_BOOL_OR_RETURN(&data); // fixed_frame_rate_flag + } + + // Read and ignore NAL HRD parameters, if present. + bool hrd_parameters_present = false; + Result res = ParseAndIgnoreHRDParameters(&hrd_parameters_present); + if (res != kOk) + return res; + + // Read and ignore VCL HRD parameters, if present. + res = ParseAndIgnoreHRDParameters(&hrd_parameters_present); + if (res != kOk) + return res; + + if (hrd_parameters_present) // One of NAL or VCL params present is enough. + READ_BOOL_OR_RETURN(&data); // low_delay_hrd_flag + + READ_BOOL_OR_RETURN(&data); // pic_struct_present_flag + READ_BOOL_OR_RETURN(&sps->bitstream_restriction_flag); + if (sps->bitstream_restriction_flag) { + READ_BOOL_OR_RETURN(&data); // motion_vectors_over_pic_boundaries_flag + READ_UE_OR_RETURN(&data); // max_bytes_per_pic_denom + READ_UE_OR_RETURN(&data); // max_bits_per_mb_denom + READ_UE_OR_RETURN(&data); // log2_max_mv_length_horizontal + READ_UE_OR_RETURN(&data); // log2_max_mv_length_vertical + READ_UE_OR_RETURN(&sps->max_num_reorder_frames); + READ_UE_OR_RETURN(&sps->max_dec_frame_buffering); + TRUE_OR_RETURN(sps->max_dec_frame_buffering >= sps->max_num_ref_frames); + IN_RANGE_OR_RETURN( + sps->max_num_reorder_frames, 0, sps->max_dec_frame_buffering); + } + + return kOk; +} + +static void FillDefaultSeqScalingLists(H264SPS* sps) { + for (int i = 0; i < 6; ++i) + for (int j = 0; j < kH264ScalingList4x4Length; ++j) + sps->scaling_list4x4[i][j] = 16; + + for (int i = 0; i < 6; ++i) + for (int j = 0; j < kH264ScalingList8x8Length; ++j) + sps->scaling_list8x8[i][j] = 16; +} + +H264Parser::Result H264Parser::ParseSPS(int* sps_id) { + // See 7.4.2.1. + int data; + Result res; + + *sps_id = -1; + + scoped_ptr sps(new H264SPS()); + + READ_BITS_OR_RETURN(8, &sps->profile_idc); + READ_BOOL_OR_RETURN(&sps->constraint_set0_flag); + READ_BOOL_OR_RETURN(&sps->constraint_set1_flag); + READ_BOOL_OR_RETURN(&sps->constraint_set2_flag); + READ_BOOL_OR_RETURN(&sps->constraint_set3_flag); + READ_BOOL_OR_RETURN(&sps->constraint_set4_flag); + READ_BOOL_OR_RETURN(&sps->constraint_set5_flag); + READ_BITS_OR_RETURN(2, &data); // reserved_zero_2bits + READ_BITS_OR_RETURN(8, &sps->level_idc); + READ_UE_OR_RETURN(&sps->seq_parameter_set_id); + TRUE_OR_RETURN(sps->seq_parameter_set_id < 32); + + if (sps->profile_idc == 100 || sps->profile_idc == 110 || + sps->profile_idc == 122 || sps->profile_idc == 244 || + sps->profile_idc == 44 || sps->profile_idc == 83 || + sps->profile_idc == 86 || sps->profile_idc == 118 || + sps->profile_idc == 128) { + READ_UE_OR_RETURN(&sps->chroma_format_idc); + TRUE_OR_RETURN(sps->chroma_format_idc < 4); + + if (sps->chroma_format_idc == 3) + READ_BOOL_OR_RETURN(&sps->separate_colour_plane_flag); + + READ_UE_OR_RETURN(&sps->bit_depth_luma_minus8); + TRUE_OR_RETURN(sps->bit_depth_luma_minus8 < 7); + + READ_UE_OR_RETURN(&sps->bit_depth_chroma_minus8); + TRUE_OR_RETURN(sps->bit_depth_chroma_minus8 < 7); + + READ_BOOL_OR_RETURN(&sps->qpprime_y_zero_transform_bypass_flag); + READ_BOOL_OR_RETURN(&sps->seq_scaling_matrix_present_flag); + + if (sps->seq_scaling_matrix_present_flag) { + DVLOG(4) << "Scaling matrix present"; + res = ParseSPSScalingLists(sps.get()); + if (res != kOk) + return res; + } else { + FillDefaultSeqScalingLists(sps.get()); + } + } else { + sps->chroma_format_idc = 1; + FillDefaultSeqScalingLists(sps.get()); + } + + if (sps->separate_colour_plane_flag) + sps->chroma_array_type = 0; + else + sps->chroma_array_type = sps->chroma_format_idc; + + READ_UE_OR_RETURN(&sps->log2_max_frame_num_minus4); + TRUE_OR_RETURN(sps->log2_max_frame_num_minus4 < 13); + + READ_UE_OR_RETURN(&sps->pic_order_cnt_type); + TRUE_OR_RETURN(sps->pic_order_cnt_type < 3); + + sps->expected_delta_per_pic_order_cnt_cycle = 0; + if (sps->pic_order_cnt_type == 0) { + READ_UE_OR_RETURN(&sps->log2_max_pic_order_cnt_lsb_minus4); + TRUE_OR_RETURN(sps->log2_max_pic_order_cnt_lsb_minus4 < 13); + } else if (sps->pic_order_cnt_type == 1) { + READ_BOOL_OR_RETURN(&sps->delta_pic_order_always_zero_flag); + READ_SE_OR_RETURN(&sps->offset_for_non_ref_pic); + READ_SE_OR_RETURN(&sps->offset_for_top_to_bottom_field); + READ_UE_OR_RETURN(&sps->num_ref_frames_in_pic_order_cnt_cycle); + TRUE_OR_RETURN(sps->num_ref_frames_in_pic_order_cnt_cycle < 255); + + for (int i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; ++i) { + READ_SE_OR_RETURN(&sps->offset_for_ref_frame[i]); + sps->expected_delta_per_pic_order_cnt_cycle += + sps->offset_for_ref_frame[i]; + } + } + + READ_UE_OR_RETURN(&sps->max_num_ref_frames); + READ_BOOL_OR_RETURN(&sps->gaps_in_frame_num_value_allowed_flag); + + if (sps->gaps_in_frame_num_value_allowed_flag) + return kUnsupportedStream; + + READ_UE_OR_RETURN(&sps->pic_width_in_mbs_minus1); + READ_UE_OR_RETURN(&sps->pic_height_in_map_units_minus1); + + READ_BOOL_OR_RETURN(&sps->frame_mbs_only_flag); + if (!sps->frame_mbs_only_flag) + READ_BOOL_OR_RETURN(&sps->mb_adaptive_frame_field_flag); + + READ_BOOL_OR_RETURN(&sps->direct_8x8_inference_flag); + + READ_BOOL_OR_RETURN(&sps->frame_cropping_flag); + if (sps->frame_cropping_flag) { + READ_UE_OR_RETURN(&sps->frame_crop_left_offset); + READ_UE_OR_RETURN(&sps->frame_crop_right_offset); + READ_UE_OR_RETURN(&sps->frame_crop_top_offset); + READ_UE_OR_RETURN(&sps->frame_crop_bottom_offset); + } + + READ_BOOL_OR_RETURN(&sps->vui_parameters_present_flag); + if (sps->vui_parameters_present_flag) { + DVLOG(4) << "VUI parameters present"; + res = ParseVUIParameters(sps.get()); + if (res != kOk) + return res; + } + + // If an SPS with the same id already exists, replace it. + *sps_id = sps->seq_parameter_set_id; + delete active_SPSes_[*sps_id]; + active_SPSes_[*sps_id] = sps.release(); + + return kOk; +} + +H264Parser::Result H264Parser::ParsePPS(int* pps_id) { + // See 7.4.2.2. + const H264SPS* sps; + Result res; + + *pps_id = -1; + + scoped_ptr pps(new H264PPS()); + + READ_UE_OR_RETURN(&pps->pic_parameter_set_id); + READ_UE_OR_RETURN(&pps->seq_parameter_set_id); + TRUE_OR_RETURN(pps->seq_parameter_set_id < 32); + + sps = GetSPS(pps->seq_parameter_set_id); + TRUE_OR_RETURN(sps); + + READ_BOOL_OR_RETURN(&pps->entropy_coding_mode_flag); + READ_BOOL_OR_RETURN(&pps->bottom_field_pic_order_in_frame_present_flag); + + READ_UE_OR_RETURN(&pps->num_slice_groups_minus1); + if (pps->num_slice_groups_minus1 > 1) { + DVLOG(1) << "Slice groups not supported"; + return kUnsupportedStream; + } + + READ_UE_OR_RETURN(&pps->num_ref_idx_l0_default_active_minus1); + TRUE_OR_RETURN(pps->num_ref_idx_l0_default_active_minus1 < 32); + + READ_UE_OR_RETURN(&pps->num_ref_idx_l1_default_active_minus1); + TRUE_OR_RETURN(pps->num_ref_idx_l1_default_active_minus1 < 32); + + READ_BOOL_OR_RETURN(&pps->weighted_pred_flag); + READ_BITS_OR_RETURN(2, &pps->weighted_bipred_idc); + TRUE_OR_RETURN(pps->weighted_bipred_idc < 3); + + READ_SE_OR_RETURN(&pps->pic_init_qp_minus26); + IN_RANGE_OR_RETURN(pps->pic_init_qp_minus26, -26, 25); + + READ_SE_OR_RETURN(&pps->pic_init_qs_minus26); + IN_RANGE_OR_RETURN(pps->pic_init_qs_minus26, -26, 25); + + READ_SE_OR_RETURN(&pps->chroma_qp_index_offset); + IN_RANGE_OR_RETURN(pps->chroma_qp_index_offset, -12, 12); + pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset; + + READ_BOOL_OR_RETURN(&pps->deblocking_filter_control_present_flag); + READ_BOOL_OR_RETURN(&pps->constrained_intra_pred_flag); + READ_BOOL_OR_RETURN(&pps->redundant_pic_cnt_present_flag); + + if (br_.HasMoreRBSPData()) { + READ_BOOL_OR_RETURN(&pps->transform_8x8_mode_flag); + READ_BOOL_OR_RETURN(&pps->pic_scaling_matrix_present_flag); + + if (pps->pic_scaling_matrix_present_flag) { + DVLOG(4) << "Picture scaling matrix present"; + res = ParsePPSScalingLists(*sps, pps.get()); + if (res != kOk) + return res; + } + + READ_SE_OR_RETURN(&pps->second_chroma_qp_index_offset); + } + + // If a PPS with the same id already exists, replace it. + *pps_id = pps->pic_parameter_set_id; + delete active_PPSes_[*pps_id]; + active_PPSes_[*pps_id] = pps.release(); + + return kOk; +} + +H264Parser::Result H264Parser::ParseRefPicListModification( + int num_ref_idx_active_minus1, + H264ModificationOfPicNum* ref_list_mods) { + H264ModificationOfPicNum* pic_num_mod; + + if (num_ref_idx_active_minus1 >= 32) + return kInvalidStream; + + for (int i = 0; i < 32; ++i) { + pic_num_mod = &ref_list_mods[i]; + READ_UE_OR_RETURN(&pic_num_mod->modification_of_pic_nums_idc); + TRUE_OR_RETURN(pic_num_mod->modification_of_pic_nums_idc < 4); + + switch (pic_num_mod->modification_of_pic_nums_idc) { + case 0: + case 1: + READ_UE_OR_RETURN(&pic_num_mod->abs_diff_pic_num_minus1); + break; + + case 2: + READ_UE_OR_RETURN(&pic_num_mod->long_term_pic_num); + break; + + case 3: + // Per spec, list cannot be empty. + if (i == 0) + return kInvalidStream; + return kOk; + + default: + return kInvalidStream; + } + } + + // If we got here, we didn't get loop end marker prematurely, + // so make sure it is there for our client. + int modification_of_pic_nums_idc; + READ_UE_OR_RETURN(&modification_of_pic_nums_idc); + TRUE_OR_RETURN(modification_of_pic_nums_idc == 3); + + return kOk; +} + +H264Parser::Result H264Parser::ParseRefPicListModifications( + H264SliceHeader* shdr) { + Result res; + + if (!shdr->IsISlice() && !shdr->IsSISlice()) { + READ_BOOL_OR_RETURN(&shdr->ref_pic_list_modification_flag_l0); + if (shdr->ref_pic_list_modification_flag_l0) { + res = ParseRefPicListModification(shdr->num_ref_idx_l0_active_minus1, + shdr->ref_list_l0_modifications); + if (res != kOk) + return res; + } + } + + if (shdr->IsBSlice()) { + READ_BOOL_OR_RETURN(&shdr->ref_pic_list_modification_flag_l1); + if (shdr->ref_pic_list_modification_flag_l1) { + res = ParseRefPicListModification(shdr->num_ref_idx_l1_active_minus1, + shdr->ref_list_l1_modifications); + if (res != kOk) + return res; + } + } + + return kOk; +} + +H264Parser::Result H264Parser::ParseWeightingFactors( + int num_ref_idx_active_minus1, + int chroma_array_type, + int luma_log2_weight_denom, + int chroma_log2_weight_denom, + H264WeightingFactors* w_facts) { + + int def_luma_weight = 1 << luma_log2_weight_denom; + int def_chroma_weight = 1 << chroma_log2_weight_denom; + + for (int i = 0; i < num_ref_idx_active_minus1 + 1; ++i) { + READ_BOOL_OR_RETURN(&w_facts->luma_weight_flag); + if (w_facts->luma_weight_flag) { + READ_SE_OR_RETURN(&w_facts->luma_weight[i]); + IN_RANGE_OR_RETURN(w_facts->luma_weight[i], -128, 127); + + READ_SE_OR_RETURN(&w_facts->luma_offset[i]); + IN_RANGE_OR_RETURN(w_facts->luma_offset[i], -128, 127); + } else { + w_facts->luma_weight[i] = def_luma_weight; + w_facts->luma_offset[i] = 0; + } + + if (chroma_array_type != 0) { + READ_BOOL_OR_RETURN(&w_facts->chroma_weight_flag); + if (w_facts->chroma_weight_flag) { + for (int j = 0; j < 2; ++j) { + READ_SE_OR_RETURN(&w_facts->chroma_weight[i][j]); + IN_RANGE_OR_RETURN(w_facts->chroma_weight[i][j], -128, 127); + + READ_SE_OR_RETURN(&w_facts->chroma_offset[i][j]); + IN_RANGE_OR_RETURN(w_facts->chroma_offset[i][j], -128, 127); + } + } else { + for (int j = 0; j < 2; ++j) { + w_facts->chroma_weight[i][j] = def_chroma_weight; + w_facts->chroma_offset[i][j] = 0; + } + } + } + } + + return kOk; +} + +H264Parser::Result H264Parser::ParsePredWeightTable(const H264SPS& sps, + H264SliceHeader* shdr) { + READ_UE_OR_RETURN(&shdr->luma_log2_weight_denom); + TRUE_OR_RETURN(shdr->luma_log2_weight_denom < 8); + + if (sps.chroma_array_type != 0) + READ_UE_OR_RETURN(&shdr->chroma_log2_weight_denom); + TRUE_OR_RETURN(shdr->chroma_log2_weight_denom < 8); + + Result res = ParseWeightingFactors(shdr->num_ref_idx_l0_active_minus1, + sps.chroma_array_type, + shdr->luma_log2_weight_denom, + shdr->chroma_log2_weight_denom, + &shdr->pred_weight_table_l0); + if (res != kOk) + return res; + + if (shdr->IsBSlice()) { + res = ParseWeightingFactors(shdr->num_ref_idx_l1_active_minus1, + sps.chroma_array_type, + shdr->luma_log2_weight_denom, + shdr->chroma_log2_weight_denom, + &shdr->pred_weight_table_l1); + if (res != kOk) + return res; + } + + return kOk; +} + +H264Parser::Result H264Parser::ParseDecRefPicMarking(H264SliceHeader* shdr) { + if (shdr->idr_pic_flag) { + READ_BOOL_OR_RETURN(&shdr->no_output_of_prior_pics_flag); + READ_BOOL_OR_RETURN(&shdr->long_term_reference_flag); + } else { + READ_BOOL_OR_RETURN(&shdr->adaptive_ref_pic_marking_mode_flag); + + H264DecRefPicMarking* marking; + if (shdr->adaptive_ref_pic_marking_mode_flag) { + size_t i; + for (i = 0; i < arraysize(shdr->ref_pic_marking); ++i) { + marking = &shdr->ref_pic_marking[i]; + + READ_UE_OR_RETURN(&marking->memory_mgmnt_control_operation); + if (marking->memory_mgmnt_control_operation == 0) + break; + + if (marking->memory_mgmnt_control_operation == 1 || + marking->memory_mgmnt_control_operation == 3) + READ_UE_OR_RETURN(&marking->difference_of_pic_nums_minus1); + + if (marking->memory_mgmnt_control_operation == 2) + READ_UE_OR_RETURN(&marking->long_term_pic_num); + + if (marking->memory_mgmnt_control_operation == 3 || + marking->memory_mgmnt_control_operation == 6) + READ_UE_OR_RETURN(&marking->long_term_frame_idx); + + if (marking->memory_mgmnt_control_operation == 4) + READ_UE_OR_RETURN(&marking->max_long_term_frame_idx_plus1); + + if (marking->memory_mgmnt_control_operation > 6) + return kInvalidStream; + } + + if (i == arraysize(shdr->ref_pic_marking)) { + DVLOG(1) << "Ran out of dec ref pic marking fields"; + return kUnsupportedStream; + } + } + } + + return kOk; +} + +H264Parser::Result H264Parser::ParseSliceHeader(const H264NALU& nalu, + H264SliceHeader* shdr) { + // See 7.4.3. + const H264SPS* sps; + const H264PPS* pps; + Result res; + + memset(shdr, 0, sizeof(*shdr)); + + shdr->idr_pic_flag = (nalu.nal_unit_type == 5); + shdr->nal_ref_idc = nalu.nal_ref_idc; + shdr->nalu_data = nalu.data; + shdr->nalu_size = nalu.size; + + READ_UE_OR_RETURN(&shdr->first_mb_in_slice); + READ_UE_OR_RETURN(&shdr->slice_type); + TRUE_OR_RETURN(shdr->slice_type < 10); + + READ_UE_OR_RETURN(&shdr->pic_parameter_set_id); + + pps = GetPPS(shdr->pic_parameter_set_id); + TRUE_OR_RETURN(pps); + + sps = GetSPS(pps->seq_parameter_set_id); + TRUE_OR_RETURN(sps); + + if (sps->separate_colour_plane_flag) { + DVLOG(1) << "Interlaced streams not supported"; + return kUnsupportedStream; + } + + READ_BITS_OR_RETURN(sps->log2_max_frame_num_minus4 + 4, &shdr->frame_num); + if (!sps->frame_mbs_only_flag) { + READ_BOOL_OR_RETURN(&shdr->field_pic_flag); + if (shdr->field_pic_flag) { + DVLOG(1) << "Interlaced streams not supported"; + return kUnsupportedStream; + } + } + + if (shdr->idr_pic_flag) + READ_UE_OR_RETURN(&shdr->idr_pic_id); + + if (sps->pic_order_cnt_type == 0) { + READ_BITS_OR_RETURN(sps->log2_max_pic_order_cnt_lsb_minus4 + 4, + &shdr->pic_order_cnt_lsb); + if (pps->bottom_field_pic_order_in_frame_present_flag && + !shdr->field_pic_flag) + READ_SE_OR_RETURN(&shdr->delta_pic_order_cnt_bottom); + } + + if (sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag) { + READ_SE_OR_RETURN(&shdr->delta_pic_order_cnt[0]); + if (pps->bottom_field_pic_order_in_frame_present_flag && + !shdr->field_pic_flag) + READ_SE_OR_RETURN(&shdr->delta_pic_order_cnt[1]); + } + + if (pps->redundant_pic_cnt_present_flag) { + READ_UE_OR_RETURN(&shdr->redundant_pic_cnt); + TRUE_OR_RETURN(shdr->redundant_pic_cnt < 128); + } + + if (shdr->IsBSlice()) + READ_BOOL_OR_RETURN(&shdr->direct_spatial_mv_pred_flag); + + if (shdr->IsPSlice() || shdr->IsSPSlice() || shdr->IsBSlice()) { + READ_BOOL_OR_RETURN(&shdr->num_ref_idx_active_override_flag); + if (shdr->num_ref_idx_active_override_flag) { + READ_UE_OR_RETURN(&shdr->num_ref_idx_l0_active_minus1); + if (shdr->IsBSlice()) + READ_UE_OR_RETURN(&shdr->num_ref_idx_l1_active_minus1); + } else { + shdr->num_ref_idx_l0_active_minus1 = + pps->num_ref_idx_l0_default_active_minus1; + if (shdr->IsBSlice()) { + shdr->num_ref_idx_l1_active_minus1 = + pps->num_ref_idx_l1_default_active_minus1; + } + } + } + if (shdr->field_pic_flag) { + TRUE_OR_RETURN(shdr->num_ref_idx_l0_active_minus1 < 32); + TRUE_OR_RETURN(shdr->num_ref_idx_l1_active_minus1 < 32); + } else { + TRUE_OR_RETURN(shdr->num_ref_idx_l0_active_minus1 < 16); + TRUE_OR_RETURN(shdr->num_ref_idx_l1_active_minus1 < 16); + } + + if (nalu.nal_unit_type == H264NALU::kCodedSliceExtension) { + return kUnsupportedStream; + } else { + res = ParseRefPicListModifications(shdr); + if (res != kOk) + return res; + } + + if ((pps->weighted_pred_flag && (shdr->IsPSlice() || shdr->IsSPSlice())) || + (pps->weighted_bipred_idc == 1 && shdr->IsBSlice())) { + res = ParsePredWeightTable(*sps, shdr); + if (res != kOk) + return res; + } + + if (nalu.nal_ref_idc != 0) { + res = ParseDecRefPicMarking(shdr); + if (res != kOk) + return res; + } + + if (pps->entropy_coding_mode_flag && !shdr->IsISlice() && + !shdr->IsSISlice()) { + READ_UE_OR_RETURN(&shdr->cabac_init_idc); + TRUE_OR_RETURN(shdr->cabac_init_idc < 3); + } + + READ_SE_OR_RETURN(&shdr->slice_qp_delta); + + if (shdr->IsSPSlice() || shdr->IsSISlice()) { + if (shdr->IsSPSlice()) + READ_BOOL_OR_RETURN(&shdr->sp_for_switch_flag); + READ_SE_OR_RETURN(&shdr->slice_qs_delta); + } + + if (pps->deblocking_filter_control_present_flag) { + READ_UE_OR_RETURN(&shdr->disable_deblocking_filter_idc); + TRUE_OR_RETURN(shdr->disable_deblocking_filter_idc < 3); + + if (shdr->disable_deblocking_filter_idc != 1) { + READ_SE_OR_RETURN(&shdr->slice_alpha_c0_offset_div2); + IN_RANGE_OR_RETURN(shdr->slice_alpha_c0_offset_div2, -6, 6); + + READ_SE_OR_RETURN(&shdr->slice_beta_offset_div2); + IN_RANGE_OR_RETURN(shdr->slice_beta_offset_div2, -6, 6); + } + } + + if (pps->num_slice_groups_minus1 > 0) { + DVLOG(1) << "Slice groups not supported"; + return kUnsupportedStream; + } + + size_t epb = br_.NumEmulationPreventionBytesRead(); + shdr->header_bit_size = (shdr->nalu_size - epb) * 8 - br_.NumBitsLeft(); + + return kOk; +} + +H264Parser::Result H264Parser::ParseSEI(H264SEIMessage* sei_msg) { + int byte; + + memset(sei_msg, 0, sizeof(*sei_msg)); + + READ_BITS_OR_RETURN(8, &byte); + while (byte == 0xff) { + sei_msg->type += 255; + READ_BITS_OR_RETURN(8, &byte); + } + sei_msg->type += byte; + + READ_BITS_OR_RETURN(8, &byte); + while (byte == 0xff) { + sei_msg->payload_size += 255; + READ_BITS_OR_RETURN(8, &byte); + } + sei_msg->payload_size += byte; + + DVLOG(4) << "Found SEI message type: " << sei_msg->type + << " payload size: " << sei_msg->payload_size; + + switch (sei_msg->type) { + case H264SEIMessage::kSEIRecoveryPoint: + READ_UE_OR_RETURN(&sei_msg->recovery_point.recovery_frame_cnt); + READ_BOOL_OR_RETURN(&sei_msg->recovery_point.exact_match_flag); + READ_BOOL_OR_RETURN(&sei_msg->recovery_point.broken_link_flag); + READ_BITS_OR_RETURN(2, &sei_msg->recovery_point.changing_slice_group_idc); + break; + + default: + DVLOG(4) << "Unsupported SEI message"; + break; + } + + return kOk; +} + +} // namespace media diff --git a/media/filters/h264_parser.h b/media/filters/h264_parser.h new file mode 100644 index 0000000000..5dc8f51cfd --- /dev/null +++ b/media/filters/h264_parser.h @@ -0,0 +1,399 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// This file contains an implementation of an H264 Annex-B video stream parser. + +#ifndef MEDIA_FILTERS_H264_PARSER_H_ +#define MEDIA_FILTERS_H264_PARSER_H_ + +#include + +#include + +#include "base/basictypes.h" +#include "media/base/media_export.h" +#include "media/filters/h264_bit_reader.h" + +namespace media { + +// For explanations of each struct and its members, see H.264 specification +// at http://www.itu.int/rec/T-REC-H.264. +struct MEDIA_EXPORT H264NALU { + H264NALU(); + + enum Type { + kUnspecified = 0, + kNonIDRSlice = 1, + kIDRSlice = 5, + kSEIMessage = 6, + kSPS = 7, + kPPS = 8, + kAUD = 9, + kEOSeq = 10, + kEOStream = 11, + kCodedSliceExtension = 20, + }; + + // After (without) start code; we don't own the underlying memory + // and a shallow copy should be made when copying this struct. + const uint8* data; + off_t size; // From after start code to start code of next NALU (or EOS). + + int nal_ref_idc; + int nal_unit_type; +}; + +enum { + kH264ScalingList4x4Length = 16, + kH264ScalingList8x8Length = 64, +}; + +struct MEDIA_EXPORT H264SPS { + H264SPS(); + + int profile_idc; + bool constraint_set0_flag; + bool constraint_set1_flag; + bool constraint_set2_flag; + bool constraint_set3_flag; + bool constraint_set4_flag; + bool constraint_set5_flag; + int level_idc; + int seq_parameter_set_id; + + int chroma_format_idc; + bool separate_colour_plane_flag; + int bit_depth_luma_minus8; + int bit_depth_chroma_minus8; + bool qpprime_y_zero_transform_bypass_flag; + + bool seq_scaling_matrix_present_flag; + int scaling_list4x4[6][kH264ScalingList4x4Length]; + int scaling_list8x8[6][kH264ScalingList8x8Length]; + + int log2_max_frame_num_minus4; + int pic_order_cnt_type; + int log2_max_pic_order_cnt_lsb_minus4; + bool delta_pic_order_always_zero_flag; + int offset_for_non_ref_pic; + int offset_for_top_to_bottom_field; + int num_ref_frames_in_pic_order_cnt_cycle; + int expected_delta_per_pic_order_cnt_cycle; // calculated + int offset_for_ref_frame[255]; + int max_num_ref_frames; + bool gaps_in_frame_num_value_allowed_flag; + int pic_width_in_mbs_minus1; + int pic_height_in_map_units_minus1; + bool frame_mbs_only_flag; + bool mb_adaptive_frame_field_flag; + bool direct_8x8_inference_flag; + bool frame_cropping_flag; + int frame_crop_left_offset; + int frame_crop_right_offset; + int frame_crop_top_offset; + int frame_crop_bottom_offset; + + bool vui_parameters_present_flag; + int sar_width; // Set to 0 when not specified. + int sar_height; // Set to 0 when not specified. + bool bitstream_restriction_flag; + int max_num_reorder_frames; + int max_dec_frame_buffering; + + int chroma_array_type; +}; + +struct MEDIA_EXPORT H264PPS { + H264PPS(); + + int pic_parameter_set_id; + int seq_parameter_set_id; + bool entropy_coding_mode_flag; + bool bottom_field_pic_order_in_frame_present_flag; + int num_slice_groups_minus1; + // TODO(posciak): Slice groups not implemented, could be added at some point. + int num_ref_idx_l0_default_active_minus1; + int num_ref_idx_l1_default_active_minus1; + bool weighted_pred_flag; + int weighted_bipred_idc; + int pic_init_qp_minus26; + int pic_init_qs_minus26; + int chroma_qp_index_offset; + bool deblocking_filter_control_present_flag; + bool constrained_intra_pred_flag; + bool redundant_pic_cnt_present_flag; + bool transform_8x8_mode_flag; + + bool pic_scaling_matrix_present_flag; + int scaling_list4x4[6][kH264ScalingList4x4Length]; + int scaling_list8x8[6][kH264ScalingList8x8Length]; + + int second_chroma_qp_index_offset; +}; + +struct MEDIA_EXPORT H264ModificationOfPicNum { + int modification_of_pic_nums_idc; + union { + int abs_diff_pic_num_minus1; + int long_term_pic_num; + }; +}; + +struct MEDIA_EXPORT H264WeightingFactors { + bool luma_weight_flag; + bool chroma_weight_flag; + int luma_weight[32]; + int luma_offset[32]; + int chroma_weight[32][2]; + int chroma_offset[32][2]; +}; + +struct MEDIA_EXPORT H264DecRefPicMarking { + int memory_mgmnt_control_operation; + int difference_of_pic_nums_minus1; + int long_term_pic_num; + int long_term_frame_idx; + int max_long_term_frame_idx_plus1; +}; + +struct MEDIA_EXPORT H264SliceHeader { + H264SliceHeader(); + + enum { + kRefListSize = 32, + kRefListModSize = kRefListSize + }; + + enum Type { + kPSlice = 0, + kBSlice = 1, + kISlice = 2, + kSPSlice = 3, + kSISlice = 4, + }; + + bool IsPSlice() const; + bool IsBSlice() const; + bool IsISlice() const; + bool IsSPSlice() const; + bool IsSISlice() const; + + bool idr_pic_flag; // from NAL header + int nal_ref_idc; // from NAL header + const uint8* nalu_data; // from NAL header + off_t nalu_size; // from NAL header + off_t header_bit_size; // calculated + + int first_mb_in_slice; + int slice_type; + int pic_parameter_set_id; + int colour_plane_id; // TODO(posciak): use this! http://crbug.com/139878 + int frame_num; + bool field_pic_flag; + bool bottom_field_flag; + int idr_pic_id; + int pic_order_cnt_lsb; + int delta_pic_order_cnt_bottom; + int delta_pic_order_cnt[2]; + int redundant_pic_cnt; + bool direct_spatial_mv_pred_flag; + + bool num_ref_idx_active_override_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + bool ref_pic_list_modification_flag_l0; + bool ref_pic_list_modification_flag_l1; + H264ModificationOfPicNum ref_list_l0_modifications[kRefListModSize]; + H264ModificationOfPicNum ref_list_l1_modifications[kRefListModSize]; + + int luma_log2_weight_denom; + int chroma_log2_weight_denom; + + bool luma_weight_l0_flag; + bool chroma_weight_l0_flag; + H264WeightingFactors pred_weight_table_l0; + + bool luma_weight_l1_flag; + bool chroma_weight_l1_flag; + H264WeightingFactors pred_weight_table_l1; + + bool no_output_of_prior_pics_flag; + bool long_term_reference_flag; + + bool adaptive_ref_pic_marking_mode_flag; + H264DecRefPicMarking ref_pic_marking[kRefListSize]; + + int cabac_init_idc; + int slice_qp_delta; + bool sp_for_switch_flag; + int slice_qs_delta; + int disable_deblocking_filter_idc; + int slice_alpha_c0_offset_div2; + int slice_beta_offset_div2; +}; + +struct H264SEIRecoveryPoint { + int recovery_frame_cnt; + bool exact_match_flag; + bool broken_link_flag; + int changing_slice_group_idc; +}; + +struct MEDIA_EXPORT H264SEIMessage { + H264SEIMessage(); + + enum Type { + kSEIRecoveryPoint = 6, + }; + + int type; + int payload_size; + union { + // Placeholder; in future more supported types will contribute to more + // union members here. + H264SEIRecoveryPoint recovery_point; + }; +}; + +// Class to parse an Annex-B H.264 stream, +// as specified in chapters 7 and Annex B of the H.264 spec. +class MEDIA_EXPORT H264Parser { + public: + enum Result { + kOk, + kInvalidStream, // error in stream + kUnsupportedStream, // stream not supported by the parser + kEOStream, // end of stream + }; + + // Find offset from start of data to next NALU start code + // and size of found start code (3 or 4 bytes). + // If no start code is found, offset is pointing to the first unprocessed byte + // (i.e. the first byte that was not considered as a possible start of a start + // code) and |*start_code_size| is set to 0. + // Preconditions: + // - |data_size| >= 0 + // Postconditions: + // - |*offset| is between 0 and |data_size| included. + // It is strictly less than |data_size| if |data_size| > 0. + // - |*start_code_size| is either 0, 3 or 4. + static bool FindStartCode(const uint8* data, off_t data_size, + off_t* offset, off_t* start_code_size); + + H264Parser(); + ~H264Parser(); + + void Reset(); + // Set current stream pointer to |stream| of |stream_size| in bytes, + // |stream| owned by caller. + void SetStream(const uint8* stream, off_t stream_size); + + // Read the stream to find the next NALU, identify it and return + // that information in |*nalu|. This advances the stream to the beginning + // of this NALU, but not past it, so subsequent calls to NALU-specific + // parsing functions (ParseSPS, etc.) will parse this NALU. + // If the caller wishes to skip the current NALU, it can call this function + // again, instead of any NALU-type specific parse functions below. + Result AdvanceToNextNALU(H264NALU* nalu); + + // NALU-specific parsing functions. + // These should be called after AdvanceToNextNALU(). + + // SPSes and PPSes are owned by the parser class and the memory for their + // structures is managed here, not by the caller, as they are reused + // across NALUs. + // + // Parse an SPS/PPS NALU and save their data in the parser, returning id + // of the parsed structure in |*pps_id|/|*sps_id|. + // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), + // passing the returned |*sps_id|/|*pps_id| as parameter. + // TODO(posciak,fischman): consider replacing returning Result from Parse*() + // methods with a scoped_ptr and adding an AtEOS() function to check for EOS + // if Parse*() return NULL. + Result ParseSPS(int* sps_id); + Result ParsePPS(int* pps_id); + + // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not + // present. + const H264SPS* GetSPS(int sps_id); + const H264PPS* GetPPS(int pps_id); + + // Slice headers and SEI messages are not used across NALUs by the parser + // and can be discarded after current NALU, so the parser does not store + // them, nor does it manage their memory. + // The caller has to provide and manage it instead. + + // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to + // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. + Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); + + // Parse a SEI message, returning it in |*sei_msg|, provided and managed + // by the caller. + Result ParseSEI(H264SEIMessage* sei_msg); + + private: + // Move the stream pointer to the beginning of the next NALU, + // i.e. pointing at the next start code. + // Return true if a NALU has been found. + // If a NALU is found: + // - its size in bytes is returned in |*nalu_size| and includes + // the start code as well as the trailing zero bits. + // - the size in bytes of the start code is returned in |*start_code_size|. + bool LocateNALU(off_t* nalu_size, off_t* start_code_size); + + // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. + // Read one unsigned exp-Golomb code from the stream and return in |*val|. + Result ReadUE(int* val); + + // Read one signed exp-Golomb code from the stream and return in |*val|. + Result ReadSE(int* val); + + // Parse scaling lists (see spec). + Result ParseScalingList(int size, int* scaling_list, bool* use_default); + Result ParseSPSScalingLists(H264SPS* sps); + Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); + + // Parse optional VUI parameters in SPS (see spec). + Result ParseVUIParameters(H264SPS* sps); + // Set |hrd_parameters_present| to true only if they are present. + Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); + + // Parse reference picture lists' modifications (see spec). + Result ParseRefPicListModifications(H264SliceHeader* shdr); + Result ParseRefPicListModification(int num_ref_idx_active_minus1, + H264ModificationOfPicNum* ref_list_mods); + + // Parse prediction weight table (see spec). + Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); + + // Parse weighting factors (see spec). + Result ParseWeightingFactors(int num_ref_idx_active_minus1, + int chroma_array_type, + int luma_log2_weight_denom, + int chroma_log2_weight_denom, + H264WeightingFactors* w_facts); + + // Parse decoded reference picture marking information (see spec). + Result ParseDecRefPicMarking(H264SliceHeader* shdr); + + // Pointer to the current NALU in the stream. + const uint8* stream_; + + // Bytes left in the stream after the current NALU. + off_t bytes_left_; + + H264BitReader br_; + + // PPSes and SPSes stored for future reference. + typedef std::map SPSById; + typedef std::map PPSById; + SPSById active_SPSes_; + PPSById active_PPSes_; + + DISALLOW_COPY_AND_ASSIGN(H264Parser); +}; + +} // namespace media + +#endif // MEDIA_FILTERS_H264_PARSER_H_ diff --git a/media/filters/h264_parser_unittest.cc b/media/filters/h264_parser_unittest.cc new file mode 100644 index 0000000000..a08cf26a15 --- /dev/null +++ b/media/filters/h264_parser_unittest.cc @@ -0,0 +1,72 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/command_line.h" +#include "base/files/memory_mapped_file.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/strings/string_number_conversions.h" +#include "media/base/test_data_util.h" +#include "media/filters/h264_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +TEST(H264ParserTest, StreamFileParsing) { + base::FilePath file_path = GetTestDataFilePath("test-25fps.h264"); + // Number of NALUs in the test stream to be parsed. + int num_nalus = 759; + + base::MemoryMappedFile stream; + ASSERT_TRUE(stream.Initialize(file_path)) + << "Couldn't open stream file: " << file_path.MaybeAsASCII(); + + H264Parser parser; + parser.SetStream(stream.data(), stream.length()); + + // Parse until the end of stream/unsupported stream/error in stream is found. + int num_parsed_nalus = 0; + while (true) { + media::H264SliceHeader shdr; + media::H264SEIMessage sei_msg; + H264NALU nalu; + H264Parser::Result res = parser.AdvanceToNextNALU(&nalu); + if (res == H264Parser::kEOStream) { + DVLOG(1) << "Number of successfully parsed NALUs before EOS: " + << num_parsed_nalus; + ASSERT_EQ(num_nalus, num_parsed_nalus); + return; + } + ASSERT_EQ(res, H264Parser::kOk); + + ++num_parsed_nalus; + + int id; + switch (nalu.nal_unit_type) { + case H264NALU::kIDRSlice: + case H264NALU::kNonIDRSlice: + ASSERT_EQ(parser.ParseSliceHeader(nalu, &shdr), H264Parser::kOk); + break; + + case H264NALU::kSPS: + ASSERT_EQ(parser.ParseSPS(&id), H264Parser::kOk); + break; + + case H264NALU::kPPS: + ASSERT_EQ(parser.ParsePPS(&id), H264Parser::kOk); + break; + + case H264NALU::kSEIMessage: + ASSERT_EQ(parser.ParseSEI(&sei_msg), H264Parser::kOk); + break; + + default: + // Skip unsupported NALU. + DVLOG(4) << "Skipping unsupported NALU"; + break; + } + } +} + +} // namespace media diff --git a/media/formats/mp2t/es_parser.h b/media/formats/mp2t/es_parser.h new file mode 100644 index 0000000000..5297d32133 --- /dev/null +++ b/media/formats/mp2t/es_parser.h @@ -0,0 +1,42 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_H_ + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/memory/ref_counted.h" +#include "base/time/time.h" + +namespace media { + +class StreamParserBuffer; + +namespace mp2t { + +class EsParser { + public: + typedef base::Callback)> EmitBufferCB; + + EsParser() {} + virtual ~EsParser() {} + + // ES parsing. + // Should use kNoTimestamp when a timestamp is not valid. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) = 0; + + // Flush any pending buffer. + virtual void Flush() = 0; + + // Reset the state of the ES parser. + virtual void Reset() = 0; +}; + +} // namespace mp2t +} // namespace media + +#endif diff --git a/media/formats/mp2t/es_parser_adts.cc b/media/formats/mp2t/es_parser_adts.cc new file mode 100644 index 0000000000..2154c9e24a --- /dev/null +++ b/media/formats/mp2t/es_parser_adts.cc @@ -0,0 +1,276 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/es_parser_adts.h" + +#include + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "media/base/audio_timestamp_helper.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/base/channel_layout.h" +#include "media/base/stream_parser_buffer.h" +#include "media/formats/mp2t/mp2t_common.h" +#include "media/formats/mpeg/adts_constants.h" + +namespace media { + +static int ExtractAdtsFrameSize(const uint8* adts_header) { + return ((static_cast(adts_header[5]) >> 5) | + (static_cast(adts_header[4]) << 3) | + ((static_cast(adts_header[3]) & 0x3) << 11)); +} + +static size_t ExtractAdtsFrequencyIndex(const uint8* adts_header) { + return ((adts_header[2] >> 2) & 0xf); +} + +static size_t ExtractAdtsChannelConfig(const uint8* adts_header) { + return (((adts_header[3] >> 6) & 0x3) | + ((adts_header[2] & 0x1) << 2)); +} + +// Return true if buf corresponds to an ADTS syncword. +// |buf| size must be at least 2. +static bool isAdtsSyncWord(const uint8* buf) { + return (buf[0] == 0xff) && ((buf[1] & 0xf6) == 0xf0); +} + +// Look for an ADTS syncword. +// |new_pos| returns +// - either the byte position of the ADTS frame (if found) +// - or the byte position of 1st byte that was not processed (if not found). +// In every case, the returned value in |new_pos| is such that new_pos >= pos +// |frame_sz| returns the size of the ADTS frame (if found). +// Return whether a syncword was found. +static bool LookForSyncWord(const uint8* raw_es, int raw_es_size, + int pos, + int* new_pos, int* frame_sz) { + DCHECK_GE(pos, 0); + DCHECK_LE(pos, raw_es_size); + + int max_offset = raw_es_size - kADTSHeaderMinSize; + if (pos >= max_offset) { + // Do not change the position if: + // - max_offset < 0: not enough bytes to get a full header + // Since pos >= 0, this is a subcase of the next condition. + // - pos >= max_offset: might be the case after reading one full frame, + // |pos| is then incremented by the frame size and might then point + // to the end of the buffer. + *new_pos = pos; + return false; + } + + for (int offset = pos; offset < max_offset; offset++) { + const uint8* cur_buf = &raw_es[offset]; + + if (!isAdtsSyncWord(cur_buf)) + // The first 12 bits must be 1. + // The layer field (2 bits) must be set to 0. + continue; + + int frame_size = ExtractAdtsFrameSize(cur_buf); + if (frame_size < kADTSHeaderMinSize) { + // Too short to be an ADTS frame. + continue; + } + + // Check whether there is another frame + // |size| apart from the current one. + int remaining_size = raw_es_size - offset; + if (remaining_size >= frame_size + 2 && + !isAdtsSyncWord(&cur_buf[frame_size])) { + continue; + } + + *new_pos = offset; + *frame_sz = frame_size; + return true; + } + + *new_pos = max_offset; + return false; +} + +namespace mp2t { + +EsParserAdts::EsParserAdts( + const NewAudioConfigCB& new_audio_config_cb, + const EmitBufferCB& emit_buffer_cb, + bool sbr_in_mimetype) + : new_audio_config_cb_(new_audio_config_cb), + emit_buffer_cb_(emit_buffer_cb), + sbr_in_mimetype_(sbr_in_mimetype) { +} + +EsParserAdts::~EsParserAdts() { +} + +bool EsParserAdts::Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) { + int raw_es_size; + const uint8* raw_es; + + // The incoming PTS applies to the access unit that comes just after + // the beginning of |buf|. + if (pts != kNoTimestamp()) { + es_byte_queue_.Peek(&raw_es, &raw_es_size); + pts_list_.push_back(EsPts(raw_es_size, pts)); + } + + // Copy the input data to the ES buffer. + es_byte_queue_.Push(buf, size); + es_byte_queue_.Peek(&raw_es, &raw_es_size); + + // Look for every ADTS frame in the ES buffer starting at offset = 0 + int es_position = 0; + int frame_size; + while (LookForSyncWord(raw_es, raw_es_size, es_position, + &es_position, &frame_size)) { + DVLOG(LOG_LEVEL_ES) + << "ADTS syncword @ pos=" << es_position + << " frame_size=" << frame_size; + DVLOG(LOG_LEVEL_ES) + << "ADTS header: " + << base::HexEncode(&raw_es[es_position], kADTSHeaderMinSize); + + // Do not process the frame if this one is a partial frame. + int remaining_size = raw_es_size - es_position; + if (frame_size > remaining_size) + break; + + // Update the audio configuration if needed. + DCHECK_GE(frame_size, kADTSHeaderMinSize); + if (!UpdateAudioConfiguration(&raw_es[es_position])) + return false; + + // Get the PTS & the duration of this access unit. + while (!pts_list_.empty() && + pts_list_.front().first <= es_position) { + audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second); + pts_list_.pop_front(); + } + + base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp(); + base::TimeDelta frame_duration = + audio_timestamp_helper_->GetFrameDuration(kSamplesPerAACFrame); + + // Emit an audio frame. + bool is_key_frame = true; + + // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId + // type and allow multiple audio tracks. See https://crbug.com/341581. + scoped_refptr stream_parser_buffer = + StreamParserBuffer::CopyFrom( + &raw_es[es_position], + frame_size, + is_key_frame, + DemuxerStream::AUDIO, 0); + stream_parser_buffer->SetDecodeTimestamp(current_pts); + stream_parser_buffer->set_timestamp(current_pts); + stream_parser_buffer->set_duration(frame_duration); + emit_buffer_cb_.Run(stream_parser_buffer); + + // Update the PTS of the next frame. + audio_timestamp_helper_->AddFrames(kSamplesPerAACFrame); + + // Skip the current frame. + es_position += frame_size; + } + + // Discard all the bytes that have been processed. + DiscardEs(es_position); + + return true; +} + +void EsParserAdts::Flush() { +} + +void EsParserAdts::Reset() { + es_byte_queue_.Reset(); + pts_list_.clear(); + last_audio_decoder_config_ = AudioDecoderConfig(); +} + +bool EsParserAdts::UpdateAudioConfiguration(const uint8* adts_header) { + size_t frequency_index = ExtractAdtsFrequencyIndex(adts_header); + if (frequency_index >= kADTSFrequencyTableSize) { + // Frequency index 13 & 14 are reserved + // while 15 means that the frequency is explicitly written + // (not supported). + return false; + } + + size_t channel_configuration = ExtractAdtsChannelConfig(adts_header); + if (channel_configuration == 0 || + channel_configuration >= kADTSChannelLayoutTableSize) { + // TODO(damienv): Add support for inband channel configuration. + return false; + } + + // TODO(damienv): support HE-AAC frequency doubling (SBR) + // based on the incoming ADTS profile. + int samples_per_second = kADTSFrequencyTable[frequency_index]; + int adts_profile = (adts_header[2] >> 6) & 0x3; + + // The following code is written according to ISO 14496 Part 3 Table 1.11 and + // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers + // to SBR doubling the AAC sample rate.) + // TODO(damienv) : Extend sample rate cap to 96kHz for Level 5 content. + int extended_samples_per_second = sbr_in_mimetype_ + ? std::min(2 * samples_per_second, 48000) + : samples_per_second; + + AudioDecoderConfig audio_decoder_config( + kCodecAAC, + kSampleFormatS16, + kADTSChannelLayoutTable[channel_configuration], + extended_samples_per_second, + NULL, 0, + false); + + if (!audio_decoder_config.Matches(last_audio_decoder_config_)) { + DVLOG(1) << "Sampling frequency: " << samples_per_second; + DVLOG(1) << "Extended sampling frequency: " << extended_samples_per_second; + DVLOG(1) << "Channel config: " << channel_configuration; + DVLOG(1) << "Adts profile: " << adts_profile; + // Reset the timestamp helper to use a new time scale. + if (audio_timestamp_helper_) { + base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp(); + audio_timestamp_helper_.reset( + new AudioTimestampHelper(samples_per_second)); + audio_timestamp_helper_->SetBaseTimestamp(base_timestamp); + } else { + audio_timestamp_helper_.reset( + new AudioTimestampHelper(samples_per_second)); + } + // Audio config notification. + last_audio_decoder_config_ = audio_decoder_config; + new_audio_config_cb_.Run(audio_decoder_config); + } + + return true; +} + +void EsParserAdts::DiscardEs(int nbytes) { + DCHECK_GE(nbytes, 0); + if (nbytes <= 0) + return; + + // Adjust the ES position of each PTS. + for (EsPtsList::iterator it = pts_list_.begin(); it != pts_list_.end(); ++it) + it->first -= nbytes; + + // Discard |nbytes| of ES. + es_byte_queue_.Pop(nbytes); +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/es_parser_adts.h b/media/formats/mp2t/es_parser_adts.h new file mode 100644 index 0000000000..e55eaf70e1 --- /dev/null +++ b/media/formats/mp2t/es_parser_adts.h @@ -0,0 +1,86 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_ADTS_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_ADTS_H_ + +#include +#include + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/es_parser.h" + +namespace media { +class AudioTimestampHelper; +class BitReader; +class StreamParserBuffer; +} + +namespace media { +namespace mp2t { + +class EsParserAdts : public EsParser { + public: + typedef base::Callback NewAudioConfigCB; + + EsParserAdts(const NewAudioConfigCB& new_audio_config_cb, + const EmitBufferCB& emit_buffer_cb, + bool sbr_in_mimetype); + virtual ~EsParserAdts(); + + // EsParser implementation. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + // Used to link a PTS with a byte position in the ES stream. + typedef std::pair EsPts; + typedef std::list EsPtsList; + + // Signal any audio configuration change (if any). + // Return false if the current audio config is not + // a supported ADTS audio config. + bool UpdateAudioConfiguration(const uint8* adts_header); + + // Discard some bytes from the ES stream. + void DiscardEs(int nbytes); + + // Callbacks: + // - to signal a new audio configuration, + // - to send ES buffers. + NewAudioConfigCB new_audio_config_cb_; + EmitBufferCB emit_buffer_cb_; + + // True when AAC SBR extension is signalled in the mimetype + // (mp4a.40.5 in the codecs parameter). + bool sbr_in_mimetype_; + + // Bytes of the ES stream that have not been emitted yet. + ByteQueue es_byte_queue_; + + // List of PTS associated with a position in the ES stream. + EsPtsList pts_list_; + + // Interpolated PTS for frames that don't have one. + scoped_ptr audio_timestamp_helper_; + + // Last audio config. + AudioDecoderConfig last_audio_decoder_config_; + + DISALLOW_COPY_AND_ASSIGN(EsParserAdts); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/es_parser_h264.cc b/media/formats/mp2t/es_parser_h264.cc new file mode 100644 index 0000000000..691678ce81 --- /dev/null +++ b/media/formats/mp2t/es_parser_h264.cc @@ -0,0 +1,332 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/es_parser_h264.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/video_frame.h" +#include "media/filters/h264_parser.h" +#include "media/formats/common/offset_byte_queue.h" +#include "media/formats/mp2t/mp2t_common.h" +#include "ui/gfx/rect.h" +#include "ui/gfx/size.h" + +namespace media { +namespace mp2t { + +// An AUD NALU is at least 4 bytes: +// 3 bytes for the start code + 1 byte for the NALU type. +const int kMinAUDSize = 4; + +EsParserH264::EsParserH264( + const NewVideoConfigCB& new_video_config_cb, + const EmitBufferCB& emit_buffer_cb) + : new_video_config_cb_(new_video_config_cb), + emit_buffer_cb_(emit_buffer_cb), + es_queue_(new media::OffsetByteQueue()), + h264_parser_(new H264Parser()), + current_access_unit_pos_(0), + next_access_unit_pos_(0) { +} + +EsParserH264::~EsParserH264() { +} + +bool EsParserH264::Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) { + // Note: Parse is invoked each time a PES packet has been reassembled. + // Unfortunately, a PES packet does not necessarily map + // to an h264 access unit, although the HLS recommendation is to use one PES + // for each access unit (but this is just a recommendation and some streams + // do not comply with this recommendation). + + // HLS recommendation: "In AVC video, you should have both a DTS and a + // PTS in each PES header". + // However, some streams do not comply with this recommendation. + DVLOG_IF(1, pts == kNoTimestamp()) << "Each video PES should have a PTS"; + if (pts != kNoTimestamp()) { + TimingDesc timing_desc; + timing_desc.pts = pts; + timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; + + // Link the end of the byte queue with the incoming timing descriptor. + timing_desc_list_.push_back( + std::pair(es_queue_->tail(), timing_desc)); + } + + // Add the incoming bytes to the ES queue. + es_queue_->Push(buf, size); + return ParseInternal(); +} + +void EsParserH264::Flush() { + DVLOG(1) << "EsParserH264::Flush"; + if (!FindAUD(¤t_access_unit_pos_)) + return; + + // Simulate an additional AUD to force emitting the last access unit + // which is assumed to be complete at this point. + uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 }; + es_queue_->Push(aud, sizeof(aud)); + ParseInternal(); +} + +void EsParserH264::Reset() { + DVLOG(1) << "EsParserH264::Reset"; + es_queue_.reset(new media::OffsetByteQueue()); + h264_parser_.reset(new H264Parser()); + current_access_unit_pos_ = 0; + next_access_unit_pos_ = 0; + timing_desc_list_.clear(); + last_video_decoder_config_ = VideoDecoderConfig(); +} + +bool EsParserH264::FindAUD(int64* stream_pos) { + while (true) { + const uint8* es; + int size; + es_queue_->PeekAt(*stream_pos, &es, &size); + + // Find a start code and move the stream to the start code parser position. + off_t start_code_offset; + off_t start_code_size; + bool start_code_found = H264Parser::FindStartCode( + es, size, &start_code_offset, &start_code_size); + *stream_pos += start_code_offset; + + // No H264 start code found or NALU type not available yet. + if (!start_code_found || start_code_offset + start_code_size >= size) + return false; + + // Exit the parser loop when an AUD is found. + // Note: NALU header for an AUD: + // - nal_ref_idc must be 0 + // - nal_unit_type must be H264NALU::kAUD + if (es[start_code_offset + start_code_size] == H264NALU::kAUD) + break; + + // The current NALU is not an AUD, skip the start code + // and continue parsing the stream. + *stream_pos += start_code_size; + } + + return true; +} + +bool EsParserH264::ParseInternal() { + DCHECK_LE(es_queue_->head(), current_access_unit_pos_); + DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_); + DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); + + // Find the next AUD located at or after |current_access_unit_pos_|. This is + // needed since initially |current_access_unit_pos_| might not point to + // an AUD. + // Discard all the data before the updated |current_access_unit_pos_| + // since it won't be used again. + bool aud_found = FindAUD(¤t_access_unit_pos_); + es_queue_->Trim(current_access_unit_pos_); + if (next_access_unit_pos_ < current_access_unit_pos_) + next_access_unit_pos_ = current_access_unit_pos_; + + // Resume parsing later if no AUD was found. + if (!aud_found) + return true; + + // Find the next AUD to make sure we have a complete access unit. + if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) { + next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize; + DCHECK_LE(next_access_unit_pos_, es_queue_->tail()); + } + if (!FindAUD(&next_access_unit_pos_)) + return true; + + // At this point, we know we have a full access unit. + bool is_key_frame = false; + int pps_id_for_access_unit = -1; + + const uint8* es; + int size; + es_queue_->PeekAt(current_access_unit_pos_, &es, &size); + int access_unit_size = base::checked_cast( + next_access_unit_pos_ - current_access_unit_pos_); + DCHECK_LE(access_unit_size, size); + h264_parser_->SetStream(es, access_unit_size); + + while (true) { + bool is_eos = false; + H264NALU nalu; + switch (h264_parser_->AdvanceToNextNALU(&nalu)) { + case H264Parser::kOk: + break; + case H264Parser::kInvalidStream: + case H264Parser::kUnsupportedStream: + return false; + case H264Parser::kEOStream: + is_eos = true; + break; + } + if (is_eos) + break; + + switch (nalu.nal_unit_type) { + case H264NALU::kAUD: { + DVLOG(LOG_LEVEL_ES) << "NALU: AUD"; + break; + } + case H264NALU::kSPS: { + DVLOG(LOG_LEVEL_ES) << "NALU: SPS"; + int sps_id; + if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk) + return false; + break; + } + case H264NALU::kPPS: { + DVLOG(LOG_LEVEL_ES) << "NALU: PPS"; + int pps_id; + if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) + return false; + break; + } + case H264NALU::kIDRSlice: + case H264NALU::kNonIDRSlice: { + is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice); + DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame; + H264SliceHeader shdr; + if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { + // Only accept an invalid SPS/PPS at the beginning when the stream + // does not necessarily start with an SPS/PPS/IDR. + // TODO(damienv): Should be able to differentiate a missing SPS/PPS + // from a slice header parsing error. + if (last_video_decoder_config_.IsValidConfig()) + return false; + } else { + pps_id_for_access_unit = shdr.pic_parameter_set_id; + } + break; + } + default: { + DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type; + } + } + } + + // Emit a frame and move the stream to the next AUD position. + RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size, + is_key_frame, pps_id_for_access_unit)); + current_access_unit_pos_ = next_access_unit_pos_; + es_queue_->Trim(current_access_unit_pos_); + + return true; +} + +bool EsParserH264::EmitFrame(int64 access_unit_pos, int access_unit_size, + bool is_key_frame, int pps_id) { + // Get the access unit timing info. + TimingDesc current_timing_desc = {kNoTimestamp(), kNoTimestamp()}; + while (!timing_desc_list_.empty() && + timing_desc_list_.front().first <= access_unit_pos) { + current_timing_desc = timing_desc_list_.front().second; + timing_desc_list_.pop_front(); + } + if (current_timing_desc.pts == kNoTimestamp()) + return false; + + // Update the video decoder configuration if needed. + const H264PPS* pps = h264_parser_->GetPPS(pps_id); + if (!pps) { + // Only accept an invalid PPS at the beginning when the stream + // does not necessarily start with an SPS/PPS/IDR. + // In this case, the initial frames are conveyed to the upper layer with + // an invalid VideoDecoderConfig and it's up to the upper layer + // to process this kind of frame accordingly. + if (last_video_decoder_config_.IsValidConfig()) + return false; + } else { + const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id); + if (!sps) + return false; + RCHECK(UpdateVideoDecoderConfig(sps)); + } + + // Emit a frame. + DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_ + << " size=" << access_unit_size; + int es_size; + const uint8* es; + es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size); + CHECK_GE(es_size, access_unit_size); + + // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId + // type and allow multiple video tracks. See https://crbug.com/341581. + scoped_refptr stream_parser_buffer = + StreamParserBuffer::CopyFrom( + es, + access_unit_size, + is_key_frame, + DemuxerStream::VIDEO, + 0); + stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); + stream_parser_buffer->set_timestamp(current_timing_desc.pts); + emit_buffer_cb_.Run(stream_parser_buffer); + return true; +} + +bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) { + // Set the SAR to 1 when not specified in the H264 stream. + int sar_width = (sps->sar_width == 0) ? 1 : sps->sar_width; + int sar_height = (sps->sar_height == 0) ? 1 : sps->sar_height; + + // TODO(damienv): a MAP unit can be either 16 or 32 pixels. + // although it's 16 pixels for progressive non MBAFF frames. + gfx::Size coded_size((sps->pic_width_in_mbs_minus1 + 1) * 16, + (sps->pic_height_in_map_units_minus1 + 1) * 16); + gfx::Rect visible_rect( + sps->frame_crop_left_offset, + sps->frame_crop_top_offset, + (coded_size.width() - sps->frame_crop_right_offset) - + sps->frame_crop_left_offset, + (coded_size.height() - sps->frame_crop_bottom_offset) - + sps->frame_crop_top_offset); + if (visible_rect.width() <= 0 || visible_rect.height() <= 0) + return false; + gfx::Size natural_size( + (visible_rect.width() * sar_width) / sar_height, + visible_rect.height()); + if (natural_size.width() == 0) + return false; + + VideoDecoderConfig video_decoder_config( + kCodecH264, + VIDEO_CODEC_PROFILE_UNKNOWN, + VideoFrame::YV12, + coded_size, + visible_rect, + natural_size, + NULL, 0, + false); + + if (!video_decoder_config.Matches(last_video_decoder_config_)) { + DVLOG(1) << "Profile IDC: " << sps->profile_idc; + DVLOG(1) << "Level IDC: " << sps->level_idc; + DVLOG(1) << "Pic width: " << coded_size.width(); + DVLOG(1) << "Pic height: " << coded_size.height(); + DVLOG(1) << "log2_max_frame_num_minus4: " + << sps->log2_max_frame_num_minus4; + DVLOG(1) << "SAR: width=" << sps->sar_width + << " height=" << sps->sar_height; + last_video_decoder_config_ = video_decoder_config; + new_video_config_cb_.Run(video_decoder_config); + } + + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/es_parser_h264.h b/media/formats/mp2t/es_parser_h264.h new file mode 100644 index 0000000000..bf4f4cc1d9 --- /dev/null +++ b/media/formats/mp2t/es_parser_h264.h @@ -0,0 +1,98 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_ES_PARSER_H264_H_ +#define MEDIA_FORMATS_MP2T_ES_PARSER_H264_H_ + +#include +#include + +#include "base/basictypes.h" +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "base/time/time.h" +#include "media/base/media_export.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/es_parser.h" + +namespace media { +class H264Parser; +struct H264SPS; +class OffsetByteQueue; +} + +namespace media { +namespace mp2t { + +// Remark: +// In this h264 parser, frame splitting is based on AUD nals. +// Mpeg2 TS spec: "2.14 Carriage of Rec. ITU-T H.264 | ISO/IEC 14496-10 video" +// "Each AVC access unit shall contain an access unit delimiter NAL Unit;" +// +class MEDIA_EXPORT EsParserH264 : NON_EXPORTED_BASE(public EsParser) { + public: + typedef base::Callback NewVideoConfigCB; + + EsParserH264(const NewVideoConfigCB& new_video_config_cb, + const EmitBufferCB& emit_buffer_cb); + virtual ~EsParserH264(); + + // EsParser implementation. + virtual bool Parse(const uint8* buf, int size, + base::TimeDelta pts, + base::TimeDelta dts) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + struct TimingDesc { + base::TimeDelta dts; + base::TimeDelta pts; + }; + + // Find the AUD located at or after |*stream_pos|. + // Return true if an AUD is found. + // If found, |*stream_pos| corresponds to the position of the AUD start code + // in the stream. Otherwise, |*stream_pos| corresponds to the last position + // of the start code parser. + bool FindAUD(int64* stream_pos); + + // Resumes the H264 ES parsing. + // Return true if successful. + bool ParseInternal(); + + // Emit a frame whose position in the ES queue starts at |access_unit_pos|. + // Returns true if successful, false if no PTS is available for the frame. + bool EmitFrame(int64 access_unit_pos, int access_unit_size, + bool is_key_frame, int pps_id); + + // Update the video decoder config based on an H264 SPS. + // Return true if successful. + bool UpdateVideoDecoderConfig(const H264SPS* sps); + + // Callbacks to pass the stream configuration and the frames. + NewVideoConfigCB new_video_config_cb_; + EmitBufferCB emit_buffer_cb_; + + // Bytes of the ES stream that have not been emitted yet. + scoped_ptr es_queue_; + std::list > timing_desc_list_; + + // H264 parser state. + // - |current_access_unit_pos_| is pointing to an annexB syncword + // representing the first NALU of an H264 access unit. + scoped_ptr h264_parser_; + int64 current_access_unit_pos_; + int64 next_access_unit_pos_; + + // Last video decoder config. + VideoDecoderConfig last_video_decoder_config_; +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/es_parser_h264_unittest.cc b/media/formats/mp2t/es_parser_h264_unittest.cc new file mode 100644 index 0000000000..6e141ba7bf --- /dev/null +++ b/media/formats/mp2t/es_parser_h264_unittest.cc @@ -0,0 +1,261 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/bind.h" +#include "base/command_line.h" +#include "base/files/memory_mapped_file.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/test_data_util.h" +#include "media/filters/h264_parser.h" +#include "media/formats/mp2t/es_parser_h264.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { +class VideoDecoderConfig; + +namespace mp2t { + +namespace { + +struct Packet { + // Offset in the stream. + size_t offset; + + // Size of the packet. + size_t size; +}; + +// Compute the size of each packet assuming packets are given in stream order +// and the last packet covers the end of the stream. +void ComputePacketSize(std::vector& packets, size_t stream_size) { + for (size_t k = 0; k < packets.size() - 1; k++) { + DCHECK_GE(packets[k + 1].offset, packets[k].offset); + packets[k].size = packets[k + 1].offset - packets[k].offset; + } + packets[packets.size() - 1].size = + stream_size - packets[packets.size() - 1].offset; +} + +// Get the offset of the start of each access unit. +// This function assumes there is only one slice per access unit. +// This is a very simplified access unit segmenter that is good +// enough for unit tests. +std::vector GetAccessUnits(const uint8* stream, size_t stream_size) { + std::vector access_units; + bool start_access_unit = true; + + // In a first pass, retrieve the offsets of all access units. + size_t offset = 0; + while (true) { + // Find the next start code. + off_t relative_offset = 0; + off_t start_code_size = 0; + bool success = H264Parser::FindStartCode( + &stream[offset], stream_size - offset, + &relative_offset, &start_code_size); + if (!success) + break; + offset += relative_offset; + + if (start_access_unit) { + Packet cur_access_unit; + cur_access_unit.offset = offset; + access_units.push_back(cur_access_unit); + start_access_unit = false; + } + + // Get the NALU type. + offset += start_code_size; + if (offset >= stream_size) + break; + int nal_unit_type = stream[offset] & 0x1f; + + // We assume there is only one slice per access unit. + if (nal_unit_type == H264NALU::kIDRSlice || + nal_unit_type == H264NALU::kNonIDRSlice) { + start_access_unit = true; + } + } + + ComputePacketSize(access_units, stream_size); + return access_units; +} + +// Append an AUD NALU at the beginning of each access unit +// needed for streams which do not already have AUD NALUs. +void AppendAUD( + const uint8* stream, size_t stream_size, + const std::vector& access_units, + std::vector& stream_with_aud, + std::vector& access_units_with_aud) { + uint8 aud[] = { 0x00, 0x00, 0x01, 0x09 }; + stream_with_aud.resize(stream_size + access_units.size() * sizeof(aud)); + access_units_with_aud.resize(access_units.size()); + + size_t offset = 0; + for (size_t k = 0; k < access_units.size(); k++) { + access_units_with_aud[k].offset = offset; + access_units_with_aud[k].size = access_units[k].size + sizeof(aud); + + memcpy(&stream_with_aud[offset], aud, sizeof(aud)); + offset += sizeof(aud); + + memcpy(&stream_with_aud[offset], + &stream[access_units[k].offset], access_units[k].size); + offset += access_units[k].size; + } +} + +} // namespace + +class EsParserH264Test : public testing::Test { + public: + EsParserH264Test() : buffer_count_(0) { + } + + void LoadStream(const char* filename); + void ProcessPesPackets(const std::vector& pes_packets); + + void EmitBuffer(scoped_refptr buffer) { + buffer_count_++; + } + + void NewVideoConfig(const VideoDecoderConfig& config) { + } + + size_t buffer_count() const { return buffer_count_; } + + // Stream with AUD NALUs. + std::vector stream_; + + // Access units of the stream with AUD NALUs. + std::vector access_units_; + + protected: + size_t buffer_count_; +}; + +void EsParserH264Test::LoadStream(const char* filename) { + base::FilePath file_path = GetTestDataFilePath(filename); + + base::MemoryMappedFile stream_without_aud; + ASSERT_TRUE(stream_without_aud.Initialize(file_path)) + << "Couldn't open stream file: " << file_path.MaybeAsASCII(); + + // The input file does not have AUDs. + std::vector access_units_without_aud = GetAccessUnits( + stream_without_aud.data(), stream_without_aud.length()); + ASSERT_GT(access_units_without_aud.size(), 0u); + AppendAUD(stream_without_aud.data(), stream_without_aud.length(), + access_units_without_aud, + stream_, access_units_); +} + +void EsParserH264Test::ProcessPesPackets( + const std::vector& pes_packets) { + EsParserH264 es_parser( + base::Bind(&EsParserH264Test::NewVideoConfig, base::Unretained(this)), + base::Bind(&EsParserH264Test::EmitBuffer, base::Unretained(this))); + + size_t au_idx = 0; + for (size_t k = 0; k < pes_packets.size(); k++) { + size_t cur_pes_offset = pes_packets[k].offset; + size_t cur_pes_size = pes_packets[k].size; + + // Update the access unit the PES belongs to from a timing point of view. + while (au_idx < access_units_.size() - 1 && + cur_pes_offset <= access_units_[au_idx + 1].offset && + cur_pes_offset + cur_pes_size > access_units_[au_idx + 1].offset) { + au_idx++; + } + + // Check whether the PES packet includes the start of an access unit. + // The timings are relevant only in this case. + base::TimeDelta pts = kNoTimestamp(); + base::TimeDelta dts = kNoTimestamp(); + if (cur_pes_offset <= access_units_[au_idx].offset && + cur_pes_offset + cur_pes_size > access_units_[au_idx].offset) { + pts = base::TimeDelta::FromMilliseconds(au_idx * 40u); + } + + ASSERT_TRUE( + es_parser.Parse(&stream_[cur_pes_offset], cur_pes_size, pts, dts)); + } + es_parser.Flush(); +} + + +TEST_F(EsParserH264Test, OneAccessUnitPerPes) { + LoadStream("bear.h264"); + + // One to one equivalence between PES packets and access units. + std::vector pes_packets(access_units_); + + // Process each PES packet. + ProcessPesPackets(pes_packets); + ASSERT_EQ(buffer_count(), access_units_.size()); +} + +TEST_F(EsParserH264Test, NonAlignedPesPacket) { + LoadStream("bear.h264"); + + // Generate the PES packets. + std::vector pes_packets; + Packet cur_pes_packet; + cur_pes_packet.offset = 0; + for (size_t k = 0; k < access_units_.size(); k++) { + pes_packets.push_back(cur_pes_packet); + + // The current PES packet includes the remaining bytes of the previous + // access unit and some bytes of the current access unit + // (487 bytes in this unit test but no more than the current access unit + // size). + cur_pes_packet.offset = access_units_[k].offset + + std::min(487u, access_units_[k].size); + } + ComputePacketSize(pes_packets, stream_.size()); + + // Process each PES packet. + ProcessPesPackets(pes_packets); + ASSERT_EQ(buffer_count(), access_units_.size()); +} + +TEST_F(EsParserH264Test, SeveralPesPerAccessUnit) { + LoadStream("bear.h264"); + + // Get the minimum size of an access unit. + size_t min_access_unit_size = stream_.size(); + for (size_t k = 0; k < access_units_.size(); k++) { + if (min_access_unit_size >= access_units_[k].size) + min_access_unit_size = access_units_[k].size; + } + + // Use a small PES packet size or the minimum access unit size + // if it is even smaller. + size_t pes_size = 512; + if (min_access_unit_size < pes_size) + pes_size = min_access_unit_size; + + std::vector pes_packets; + Packet cur_pes_packet; + cur_pes_packet.offset = 0; + while (cur_pes_packet.offset < stream_.size()) { + pes_packets.push_back(cur_pes_packet); + cur_pes_packet.offset += pes_size; + } + ComputePacketSize(pes_packets, stream_.size()); + + // Process each PES packet. + ProcessPesPackets(pes_packets); + ASSERT_EQ(buffer_count(), access_units_.size()); +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/mp2t_common.h b/media/formats/mp2t/mp2t_common.h new file mode 100644 index 0000000000..64446fb454 --- /dev/null +++ b/media/formats/mp2t/mp2t_common.h @@ -0,0 +1,21 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_MP2T_COMMON_H_ +#define MEDIA_FORMATS_MP2T_MP2T_COMMON_H_ + +#define LOG_LEVEL_TS 5 +#define LOG_LEVEL_PES 4 +#define LOG_LEVEL_ES 3 + +#define RCHECK(x) \ + do { \ + if (!(x)) { \ + DLOG(WARNING) << "Failure while parsing Mpeg2TS: " << #x; \ + return false; \ + } \ + } while (0) + +#endif + diff --git a/media/formats/mp2t/mp2t_stream_parser.cc b/media/formats/mp2t/mp2t_stream_parser.cc new file mode 100644 index 0000000000..a22af1a149 --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser.cc @@ -0,0 +1,622 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/mp2t_stream_parser.h" + +#include "base/bind.h" +#include "base/memory/scoped_ptr.h" +#include "base/stl_util.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/buffers.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/es_parser.h" +#include "media/formats/mp2t/es_parser_adts.h" +#include "media/formats/mp2t/es_parser_h264.h" +#include "media/formats/mp2t/mp2t_common.h" +#include "media/formats/mp2t/ts_packet.h" +#include "media/formats/mp2t/ts_section.h" +#include "media/formats/mp2t/ts_section_pat.h" +#include "media/formats/mp2t/ts_section_pes.h" +#include "media/formats/mp2t/ts_section_pmt.h" + +namespace media { +namespace mp2t { + +enum StreamType { + // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments" + kStreamTypeMpeg1Audio = 0x3, + kStreamTypeAAC = 0xf, + kStreamTypeAVC = 0x1b, +}; + +class PidState { + public: + enum PidType { + kPidPat, + kPidPmt, + kPidAudioPes, + kPidVideoPes, + }; + + PidState(int pid, PidType pid_tyoe, + scoped_ptr section_parser); + + // Extract the content of the TS packet and parse it. + // Return true if successful. + bool PushTsPacket(const TsPacket& ts_packet); + + // Flush the PID state (possibly emitting some pending frames) + // and reset its state. + void Flush(); + + // Enable/disable the PID. + // Disabling a PID will reset its state and ignore any further incoming TS + // packets. + void Enable(); + void Disable(); + bool IsEnabled() const; + + PidType pid_type() const { return pid_type_; } + + private: + void ResetState(); + + int pid_; + PidType pid_type_; + scoped_ptr section_parser_; + + bool enable_; + + int continuity_counter_; +}; + +PidState::PidState(int pid, PidType pid_type, + scoped_ptr section_parser) + : pid_(pid), + pid_type_(pid_type), + section_parser_(section_parser.Pass()), + enable_(false), + continuity_counter_(-1) { + DCHECK(section_parser_); +} + +bool PidState::PushTsPacket(const TsPacket& ts_packet) { + DCHECK_EQ(ts_packet.pid(), pid_); + + // The current PID is not part of the PID filter, + // just discard the incoming TS packet. + if (!enable_) + return true; + + int expected_continuity_counter = (continuity_counter_ + 1) % 16; + if (continuity_counter_ >= 0 && + ts_packet.continuity_counter() != expected_continuity_counter) { + DVLOG(1) << "TS discontinuity detected for pid: " << pid_; + return false; + } + + bool status = section_parser_->Parse( + ts_packet.payload_unit_start_indicator(), + ts_packet.payload(), + ts_packet.payload_size()); + + // At the minimum, when parsing failed, auto reset the section parser. + // Components that use the StreamParser can take further action if needed. + if (!status) { + DVLOG(1) << "Parsing failed for pid = " << pid_; + ResetState(); + } + + return status; +} + +void PidState::Flush() { + section_parser_->Flush(); + ResetState(); +} + +void PidState::Enable() { + enable_ = true; +} + +void PidState::Disable() { + if (!enable_) + return; + + ResetState(); + enable_ = false; +} + +bool PidState::IsEnabled() const { + return enable_; +} + +void PidState::ResetState() { + section_parser_->Reset(); + continuity_counter_ = -1; +} + +Mp2tStreamParser::BufferQueueWithConfig::BufferQueueWithConfig( + bool is_cfg_sent, + const AudioDecoderConfig& audio_cfg, + const VideoDecoderConfig& video_cfg) + : is_config_sent(is_cfg_sent), + audio_config(audio_cfg), + video_config(video_cfg) { +} + +Mp2tStreamParser::BufferQueueWithConfig::~BufferQueueWithConfig() { +} + +Mp2tStreamParser::Mp2tStreamParser(bool sbr_in_mimetype) + : sbr_in_mimetype_(sbr_in_mimetype), + selected_audio_pid_(-1), + selected_video_pid_(-1), + is_initialized_(false), + segment_started_(false), + first_video_frame_in_segment_(true) { +} + +Mp2tStreamParser::~Mp2tStreamParser() { + STLDeleteValues(&pids_); +} + +void Mp2tStreamParser::Init( + const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + bool /* ignore_text_tracks */ , + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) { + DCHECK(!is_initialized_); + DCHECK(init_cb_.is_null()); + DCHECK(!init_cb.is_null()); + DCHECK(!config_cb.is_null()); + DCHECK(!new_buffers_cb.is_null()); + DCHECK(!need_key_cb.is_null()); + DCHECK(!end_of_segment_cb.is_null()); + + init_cb_ = init_cb; + config_cb_ = config_cb; + new_buffers_cb_ = new_buffers_cb; + need_key_cb_ = need_key_cb; + new_segment_cb_ = new_segment_cb; + end_of_segment_cb_ = end_of_segment_cb; + log_cb_ = log_cb; +} + +void Mp2tStreamParser::Flush() { + DVLOG(1) << "Mp2tStreamParser::Flush"; + + // Flush the buffers and reset the pids. + for (std::map::iterator it = pids_.begin(); + it != pids_.end(); ++it) { + DVLOG(1) << "Flushing PID: " << it->first; + PidState* pid_state = it->second; + pid_state->Flush(); + delete pid_state; + } + pids_.clear(); + EmitRemainingBuffers(); + buffer_queue_chain_.clear(); + + // End of the segment. + // Note: does not need to invoke |end_of_segment_cb_| since flushing the + // stream parser already involves the end of the current segment. + segment_started_ = false; + first_video_frame_in_segment_ = true; + + // Remove any bytes left in the TS buffer. + // (i.e. any partial TS packet => less than 188 bytes). + ts_byte_queue_.Reset(); + + // Reset the selected PIDs. + selected_audio_pid_ = -1; + selected_video_pid_ = -1; +} + +bool Mp2tStreamParser::Parse(const uint8* buf, int size) { + DVLOG(1) << "Mp2tStreamParser::Parse size=" << size; + + // Add the data to the parser state. + ts_byte_queue_.Push(buf, size); + + while (true) { + const uint8* ts_buffer; + int ts_buffer_size; + ts_byte_queue_.Peek(&ts_buffer, &ts_buffer_size); + if (ts_buffer_size < TsPacket::kPacketSize) + break; + + // Synchronization. + int skipped_bytes = TsPacket::Sync(ts_buffer, ts_buffer_size); + if (skipped_bytes > 0) { + DVLOG(1) << "Packet not aligned on a TS syncword:" + << " skipped_bytes=" << skipped_bytes; + ts_byte_queue_.Pop(skipped_bytes); + continue; + } + + // Parse the TS header, skipping 1 byte if the header is invalid. + scoped_ptr ts_packet(TsPacket::Parse(ts_buffer, ts_buffer_size)); + if (!ts_packet) { + DVLOG(1) << "Error: invalid TS packet"; + ts_byte_queue_.Pop(1); + continue; + } + DVLOG(LOG_LEVEL_TS) + << "Processing PID=" << ts_packet->pid() + << " start_unit=" << ts_packet->payload_unit_start_indicator(); + + // Parse the section. + std::map::iterator it = pids_.find(ts_packet->pid()); + if (it == pids_.end() && + ts_packet->pid() == TsSection::kPidPat) { + // Create the PAT state here if needed. + scoped_ptr pat_section_parser( + new TsSectionPat( + base::Bind(&Mp2tStreamParser::RegisterPmt, + base::Unretained(this)))); + scoped_ptr pat_pid_state( + new PidState(ts_packet->pid(), PidState::kPidPat, + pat_section_parser.Pass())); + pat_pid_state->Enable(); + it = pids_.insert( + std::pair(ts_packet->pid(), + pat_pid_state.release())).first; + } + + if (it != pids_.end()) { + if (!it->second->PushTsPacket(*ts_packet)) + return false; + } else { + DVLOG(LOG_LEVEL_TS) << "Ignoring TS packet for pid: " << ts_packet->pid(); + } + + // Go to the next packet. + ts_byte_queue_.Pop(TsPacket::kPacketSize); + } + + RCHECK(FinishInitializationIfNeeded()); + + // Emit the A/V buffers that kept accumulating during TS parsing. + return EmitRemainingBuffers(); +} + +void Mp2tStreamParser::RegisterPmt(int program_number, int pmt_pid) { + DVLOG(1) << "RegisterPmt:" + << " program_number=" << program_number + << " pmt_pid=" << pmt_pid; + + // Only one TS program is allowed. Ignore the incoming program map table, + // if there is already one registered. + for (std::map::iterator it = pids_.begin(); + it != pids_.end(); ++it) { + PidState* pid_state = it->second; + if (pid_state->pid_type() == PidState::kPidPmt) { + DVLOG_IF(1, pmt_pid != it->first) << "More than one program is defined"; + return; + } + } + + // Create the PMT state here if needed. + DVLOG(1) << "Create a new PMT parser"; + scoped_ptr pmt_section_parser( + new TsSectionPmt( + base::Bind(&Mp2tStreamParser::RegisterPes, + base::Unretained(this), pmt_pid))); + scoped_ptr pmt_pid_state( + new PidState(pmt_pid, PidState::kPidPmt, pmt_section_parser.Pass())); + pmt_pid_state->Enable(); + pids_.insert(std::pair(pmt_pid, pmt_pid_state.release())); +} + +void Mp2tStreamParser::RegisterPes(int pmt_pid, + int pes_pid, + int stream_type) { + // TODO(damienv): check there is no mismatch if the entry already exists. + DVLOG(1) << "RegisterPes:" + << " pes_pid=" << pes_pid + << " stream_type=" << std::hex << stream_type << std::dec; + std::map::iterator it = pids_.find(pes_pid); + if (it != pids_.end()) + return; + + // Create a stream parser corresponding to the stream type. + bool is_audio = false; + scoped_ptr es_parser; + if (stream_type == kStreamTypeAVC) { + es_parser.reset( + new EsParserH264( + base::Bind(&Mp2tStreamParser::OnVideoConfigChanged, + base::Unretained(this), + pes_pid), + base::Bind(&Mp2tStreamParser::OnEmitVideoBuffer, + base::Unretained(this), + pes_pid))); + } else if (stream_type == kStreamTypeAAC) { + es_parser.reset( + new EsParserAdts( + base::Bind(&Mp2tStreamParser::OnAudioConfigChanged, + base::Unretained(this), + pes_pid), + base::Bind(&Mp2tStreamParser::OnEmitAudioBuffer, + base::Unretained(this), + pes_pid), + sbr_in_mimetype_)); + is_audio = true; + } else { + return; + } + + // Create the PES state here. + DVLOG(1) << "Create a new PES state"; + scoped_ptr pes_section_parser( + new TsSectionPes(es_parser.Pass())); + PidState::PidType pid_type = + is_audio ? PidState::kPidAudioPes : PidState::kPidVideoPes; + scoped_ptr pes_pid_state( + new PidState(pes_pid, pid_type, pes_section_parser.Pass())); + pids_.insert(std::pair(pes_pid, pes_pid_state.release())); + + // A new PES pid has been added, the PID filter might change. + UpdatePidFilter(); +} + +void Mp2tStreamParser::UpdatePidFilter() { + // Applies the HLS rule to select the default audio/video PIDs: + // select the audio/video streams with the lowest PID. + // TODO(damienv): this can be changed when the StreamParser interface + // supports multiple audio/video streams. + PidMap::iterator lowest_audio_pid = pids_.end(); + PidMap::iterator lowest_video_pid = pids_.end(); + for (PidMap::iterator it = pids_.begin(); it != pids_.end(); ++it) { + int pid = it->first; + PidState* pid_state = it->second; + if (pid_state->pid_type() == PidState::kPidAudioPes && + (lowest_audio_pid == pids_.end() || pid < lowest_audio_pid->first)) + lowest_audio_pid = it; + if (pid_state->pid_type() == PidState::kPidVideoPes && + (lowest_video_pid == pids_.end() || pid < lowest_video_pid->first)) + lowest_video_pid = it; + } + + // Enable both the lowest audio and video PIDs. + if (lowest_audio_pid != pids_.end()) { + DVLOG(1) << "Enable audio pid: " << lowest_audio_pid->first; + lowest_audio_pid->second->Enable(); + selected_audio_pid_ = lowest_audio_pid->first; + } + if (lowest_video_pid != pids_.end()) { + DVLOG(1) << "Enable video pid: " << lowest_video_pid->first; + lowest_video_pid->second->Enable(); + selected_video_pid_ = lowest_video_pid->first; + } + + // Disable all the other audio and video PIDs. + for (PidMap::iterator it = pids_.begin(); it != pids_.end(); ++it) { + PidState* pid_state = it->second; + if (it != lowest_audio_pid && it != lowest_video_pid && + (pid_state->pid_type() == PidState::kPidAudioPes || + pid_state->pid_type() == PidState::kPidVideoPes)) + pid_state->Disable(); + } +} + +void Mp2tStreamParser::OnVideoConfigChanged( + int pes_pid, + const VideoDecoderConfig& video_decoder_config) { + DVLOG(1) << "OnVideoConfigChanged for pid=" << pes_pid; + DCHECK_EQ(pes_pid, selected_video_pid_); + DCHECK(video_decoder_config.IsValidConfig()); + + // Create a new entry in |buffer_queue_chain_| with the updated configs. + BufferQueueWithConfig buffer_queue_with_config( + false, + buffer_queue_chain_.empty() + ? AudioDecoderConfig() : buffer_queue_chain_.back().audio_config, + video_decoder_config); + buffer_queue_chain_.push_back(buffer_queue_with_config); + + // Replace any non valid config with the 1st valid entry. + // This might happen if there was no available config before. + for (std::list::iterator it = + buffer_queue_chain_.begin(); it != buffer_queue_chain_.end(); ++it) { + if (it->video_config.IsValidConfig()) + break; + it->video_config = video_decoder_config; + } +} + +void Mp2tStreamParser::OnAudioConfigChanged( + int pes_pid, + const AudioDecoderConfig& audio_decoder_config) { + DVLOG(1) << "OnAudioConfigChanged for pid=" << pes_pid; + DCHECK_EQ(pes_pid, selected_audio_pid_); + DCHECK(audio_decoder_config.IsValidConfig()); + + // Create a new entry in |buffer_queue_chain_| with the updated configs. + BufferQueueWithConfig buffer_queue_with_config( + false, + audio_decoder_config, + buffer_queue_chain_.empty() + ? VideoDecoderConfig() : buffer_queue_chain_.back().video_config); + buffer_queue_chain_.push_back(buffer_queue_with_config); + + // Replace any non valid config with the 1st valid entry. + // This might happen if there was no available config before. + for (std::list::iterator it = + buffer_queue_chain_.begin(); it != buffer_queue_chain_.end(); ++it) { + if (it->audio_config.IsValidConfig()) + break; + it->audio_config = audio_decoder_config; + } +} + +bool Mp2tStreamParser::FinishInitializationIfNeeded() { + // Nothing to be done if already initialized. + if (is_initialized_) + return true; + + // Wait for more data to come to finish initialization. + if (buffer_queue_chain_.empty()) + return true; + + // Wait for more data to come if one of the config is not available. + BufferQueueWithConfig& queue_with_config = buffer_queue_chain_.front(); + if (selected_audio_pid_ > 0 && + !queue_with_config.audio_config.IsValidConfig()) + return true; + if (selected_video_pid_ > 0 && + !queue_with_config.video_config.IsValidConfig()) + return true; + + // Pass the config before invoking the initialization callback. + RCHECK(config_cb_.Run(queue_with_config.audio_config, + queue_with_config.video_config, + TextTrackConfigMap())); + queue_with_config.is_config_sent = true; + + // For Mpeg2 TS, the duration is not known. + DVLOG(1) << "Mpeg2TS stream parser initialization done"; + init_cb_.Run(true, kInfiniteDuration(), false); + is_initialized_ = true; + + return true; +} + +void Mp2tStreamParser::OnEmitAudioBuffer( + int pes_pid, + scoped_refptr stream_parser_buffer) { + DCHECK_EQ(pes_pid, selected_audio_pid_); + + DVLOG(LOG_LEVEL_ES) + << "OnEmitAudioBuffer: " + << " size=" + << stream_parser_buffer->data_size() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds() + << " pts=" + << stream_parser_buffer->timestamp().InMilliseconds(); + stream_parser_buffer->set_timestamp( + stream_parser_buffer->timestamp() - time_offset_); + stream_parser_buffer->SetDecodeTimestamp( + stream_parser_buffer->GetDecodeTimestamp() - time_offset_); + + // Ignore the incoming buffer if it is not associated with any config. + if (buffer_queue_chain_.empty()) { + DVLOG(1) << "Ignoring audio buffer with no corresponding audio config"; + return; + } + + buffer_queue_chain_.back().audio_queue.push_back(stream_parser_buffer); +} + +void Mp2tStreamParser::OnEmitVideoBuffer( + int pes_pid, + scoped_refptr stream_parser_buffer) { + DCHECK_EQ(pes_pid, selected_video_pid_); + + DVLOG(LOG_LEVEL_ES) + << "OnEmitVideoBuffer" + << " size=" + << stream_parser_buffer->data_size() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds() + << " pts=" + << stream_parser_buffer->timestamp().InMilliseconds() + << " IsKeyframe=" + << stream_parser_buffer->IsKeyframe(); + stream_parser_buffer->set_timestamp( + stream_parser_buffer->timestamp() - time_offset_); + stream_parser_buffer->SetDecodeTimestamp( + stream_parser_buffer->GetDecodeTimestamp() - time_offset_); + + // Ignore the incoming buffer if it is not associated with any config. + if (buffer_queue_chain_.empty()) { + DVLOG(1) << "Ignoring video buffer with no corresponding video config:" + << " keyframe=" << stream_parser_buffer->IsKeyframe() + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds(); + return; + } + + // A segment cannot start with a non key frame. + // Ignore the frame if that's the case. + if (first_video_frame_in_segment_ && !stream_parser_buffer->IsKeyframe()) { + DVLOG(1) << "Ignoring non-key frame:" + << " dts=" + << stream_parser_buffer->GetDecodeTimestamp().InMilliseconds(); + return; + } + + first_video_frame_in_segment_ = false; + buffer_queue_chain_.back().video_queue.push_back(stream_parser_buffer); +} + +bool Mp2tStreamParser::EmitRemainingBuffers() { + DVLOG(LOG_LEVEL_ES) << "Mp2tStreamParser::EmitRemainingBuffers"; + + // No buffer should be sent until fully initialized. + if (!is_initialized_) + return true; + + if (buffer_queue_chain_.empty()) + return true; + + // Keep track of the last audio and video config sent. + AudioDecoderConfig last_audio_config = + buffer_queue_chain_.back().audio_config; + VideoDecoderConfig last_video_config = + buffer_queue_chain_.back().video_config; + + // Buffer emission. + while (!buffer_queue_chain_.empty()) { + // Start a segment if needed. + if (!segment_started_) { + DVLOG(1) << "Starting a new segment"; + segment_started_ = true; + new_segment_cb_.Run(); + } + + // Update the audio and video config if needed. + BufferQueueWithConfig& queue_with_config = buffer_queue_chain_.front(); + if (!queue_with_config.is_config_sent) { + if (!config_cb_.Run(queue_with_config.audio_config, + queue_with_config.video_config, + TextTrackConfigMap())) + return false; + queue_with_config.is_config_sent = true; + } + + // Add buffers. + TextBufferQueueMap empty_text_map; + if (!queue_with_config.audio_queue.empty() || + !queue_with_config.video_queue.empty()) { + if (!new_buffers_cb_.Run(queue_with_config.audio_queue, + queue_with_config.video_queue, + empty_text_map)) { + return false; + } + } + + buffer_queue_chain_.pop_front(); + } + + // Push an empty queue with the last audio/video config + // so that buffers with the same config can be added later on. + BufferQueueWithConfig queue_with_config( + true, last_audio_config, last_video_config); + buffer_queue_chain_.push_back(queue_with_config); + + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/mp2t_stream_parser.h b/media/formats/mp2t/mp2t_stream_parser.h new file mode 100644 index 0000000000..85629dceae --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser.h @@ -0,0 +1,136 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_MP2T_STREAM_PARSER_H_ +#define MEDIA_FORMATS_MP2T_MP2T_STREAM_PARSER_H_ + +#include +#include + +#include "base/memory/ref_counted.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/byte_queue.h" +#include "media/base/media_export.h" +#include "media/base/stream_parser.h" +#include "media/base/video_decoder_config.h" + +namespace media { + +class StreamParserBuffer; + +namespace mp2t { + +class PidState; + +class MEDIA_EXPORT Mp2tStreamParser : public StreamParser { + public: + explicit Mp2tStreamParser(bool sbr_in_mimetype); + virtual ~Mp2tStreamParser(); + + // StreamParser implementation. + virtual void Init(const InitCB& init_cb, + const NewConfigCB& config_cb, + const NewBuffersCB& new_buffers_cb, + bool ignore_text_tracks, + const NeedKeyCB& need_key_cb, + const NewMediaSegmentCB& new_segment_cb, + const base::Closure& end_of_segment_cb, + const LogCB& log_cb) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual bool Parse(const uint8* buf, int size) OVERRIDE; + + private: + typedef std::map PidMap; + + struct BufferQueueWithConfig { + BufferQueueWithConfig(bool is_cfg_sent, + const AudioDecoderConfig& audio_cfg, + const VideoDecoderConfig& video_cfg); + ~BufferQueueWithConfig(); + + bool is_config_sent; + AudioDecoderConfig audio_config; + StreamParser::BufferQueue audio_queue; + VideoDecoderConfig video_config; + StreamParser::BufferQueue video_queue; + }; + + // Callback invoked to register a Program Map Table. + // Note: Does nothing if the PID is already registered. + void RegisterPmt(int program_number, int pmt_pid); + + // Callback invoked to register a PES pid. + // Possible values for |stream_type| are defined in: + // ISO-13818.1 / ITU H.222 Table 2.34 "Stream type assignments". + // |pes_pid| is part of the Program Map Table refered by |pmt_pid|. + void RegisterPes(int pmt_pid, int pes_pid, int stream_type); + + // Since the StreamParser interface allows only one audio & video streams, + // an automatic PID filtering should be applied to select the audio & video + // streams. + void UpdatePidFilter(); + + // Callback invoked each time the audio/video decoder configuration is + // changed. + void OnVideoConfigChanged(int pes_pid, + const VideoDecoderConfig& video_decoder_config); + void OnAudioConfigChanged(int pes_pid, + const AudioDecoderConfig& audio_decoder_config); + + // Invoke the initialization callback if needed. + bool FinishInitializationIfNeeded(); + + // Callback invoked by the ES stream parser + // to emit a new audio/video access unit. + void OnEmitAudioBuffer( + int pes_pid, + scoped_refptr stream_parser_buffer); + void OnEmitVideoBuffer( + int pes_pid, + scoped_refptr stream_parser_buffer); + bool EmitRemainingBuffers(); + + // List of callbacks. + InitCB init_cb_; + NewConfigCB config_cb_; + NewBuffersCB new_buffers_cb_; + NeedKeyCB need_key_cb_; + NewMediaSegmentCB new_segment_cb_; + base::Closure end_of_segment_cb_; + LogCB log_cb_; + + // True when AAC SBR extension is signalled in the mimetype + // (mp4a.40.5 in the codecs parameter). + bool sbr_in_mimetype_; + + // Bytes of the TS stream. + ByteQueue ts_byte_queue_; + + // List of PIDs and their state. + PidMap pids_; + + // Selected audio and video PIDs. + int selected_audio_pid_; + int selected_video_pid_; + + // Pending audio & video buffers. + std::list buffer_queue_chain_; + + // Whether |init_cb_| has been invoked. + bool is_initialized_; + + // Indicate whether a segment was started. + bool segment_started_; + bool first_video_frame_in_segment_; + base::TimeDelta time_offset_; + + DISALLOW_COPY_AND_ASSIGN(Mp2tStreamParser); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/mp2t_stream_parser_unittest.cc b/media/formats/mp2t/mp2t_stream_parser_unittest.cc new file mode 100644 index 0000000000..ea796745da --- /dev/null +++ b/media/formats/mp2t/mp2t_stream_parser_unittest.cc @@ -0,0 +1,187 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/bind.h" +#include "base/bind_helpers.h" +#include "base/logging.h" +#include "base/memory/ref_counted.h" +#include "base/time/time.h" +#include "media/base/audio_decoder_config.h" +#include "media/base/decoder_buffer.h" +#include "media/base/stream_parser_buffer.h" +#include "media/base/test_data_util.h" +#include "media/base/text_track_config.h" +#include "media/base/video_decoder_config.h" +#include "media/formats/mp2t/mp2t_stream_parser.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { +namespace mp2t { + +class Mp2tStreamParserTest : public testing::Test { + public: + Mp2tStreamParserTest() + : audio_frame_count_(0), + video_frame_count_(0), + video_min_dts_(kNoTimestamp()), + video_max_dts_(kNoTimestamp()) { + bool has_sbr = false; + parser_.reset(new Mp2tStreamParser(has_sbr)); + } + + protected: + scoped_ptr parser_; + int audio_frame_count_; + int video_frame_count_; + base::TimeDelta video_min_dts_; + base::TimeDelta video_max_dts_; + + bool AppendData(const uint8* data, size_t length) { + return parser_->Parse(data, length); + } + + bool AppendDataInPieces(const uint8* data, size_t length, size_t piece_size) { + const uint8* start = data; + const uint8* end = data + length; + while (start < end) { + size_t append_size = std::min(piece_size, + static_cast(end - start)); + if (!AppendData(start, append_size)) + return false; + start += append_size; + } + return true; + } + + void OnInit(bool init_ok, + base::TimeDelta duration, + bool auto_update_timestamp_offset) { + DVLOG(1) << "OnInit: ok=" << init_ok + << ", dur=" << duration.InMilliseconds() + << ", autoTimestampOffset=" << auto_update_timestamp_offset; + } + + bool OnNewConfig(const AudioDecoderConfig& ac, + const VideoDecoderConfig& vc, + const StreamParser::TextTrackConfigMap& tc) { + DVLOG(1) << "OnNewConfig: audio=" << ac.IsValidConfig() + << ", video=" << vc.IsValidConfig(); + return true; + } + + + void DumpBuffers(const std::string& label, + const StreamParser::BufferQueue& buffers) { + DVLOG(2) << "DumpBuffers: " << label << " size " << buffers.size(); + for (StreamParser::BufferQueue::const_iterator buf = buffers.begin(); + buf != buffers.end(); buf++) { + DVLOG(3) << " n=" << buf - buffers.begin() + << ", size=" << (*buf)->data_size() + << ", dur=" << (*buf)->duration().InMilliseconds(); + } + } + + bool OnNewBuffers(const StreamParser::BufferQueue& audio_buffers, + const StreamParser::BufferQueue& video_buffers, + const StreamParser::TextBufferQueueMap& text_map) { + DumpBuffers("audio_buffers", audio_buffers); + DumpBuffers("video_buffers", video_buffers); + audio_frame_count_ += audio_buffers.size(); + video_frame_count_ += video_buffers.size(); + + // TODO(wolenetz/acolwell): Add text track support to more MSE parsers. See + // http://crbug.com/336926. + if (!text_map.empty()) + return false; + + if (video_min_dts_ == kNoTimestamp() && !video_buffers.empty()) + video_min_dts_ = video_buffers.front()->GetDecodeTimestamp(); + if (!video_buffers.empty()) { + video_max_dts_ = video_buffers.back()->GetDecodeTimestamp(); + // Verify monotonicity. + StreamParser::BufferQueue::const_iterator it1 = video_buffers.begin(); + StreamParser::BufferQueue::const_iterator it2 = ++it1; + for ( ; it2 != video_buffers.end(); ++it1, ++it2) { + if ((*it2)->GetDecodeTimestamp() < (*it1)->GetDecodeTimestamp()) + return false; + } + } + + return true; + } + + void OnKeyNeeded(const std::string& type, + const std::vector& init_data) { + DVLOG(1) << "OnKeyNeeded: " << init_data.size(); + } + + void OnNewSegment() { + DVLOG(1) << "OnNewSegment"; + } + + void OnEndOfSegment() { + DVLOG(1) << "OnEndOfSegment()"; + } + + void InitializeParser() { + parser_->Init( + base::Bind(&Mp2tStreamParserTest::OnInit, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewConfig, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewBuffers, + base::Unretained(this)), + true, + base::Bind(&Mp2tStreamParserTest::OnKeyNeeded, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnNewSegment, + base::Unretained(this)), + base::Bind(&Mp2tStreamParserTest::OnEndOfSegment, + base::Unretained(this)), + LogCB()); + } + + bool ParseMpeg2TsFile(const std::string& filename, int append_bytes) { + InitializeParser(); + + scoped_refptr buffer = ReadTestDataFile(filename); + EXPECT_TRUE(AppendDataInPieces(buffer->data(), + buffer->data_size(), + append_bytes)); + return true; + } +}; + +TEST_F(Mp2tStreamParserTest, UnalignedAppend17) { + // Test small, non-segment-aligned appends. + ParseMpeg2TsFile("bear-1280x720.ts", 17); + EXPECT_EQ(video_frame_count_, 81); + parser_->Flush(); + EXPECT_EQ(video_frame_count_, 82); +} + +TEST_F(Mp2tStreamParserTest, UnalignedAppend512) { + // Test small, non-segment-aligned appends. + ParseMpeg2TsFile("bear-1280x720.ts", 512); + EXPECT_EQ(video_frame_count_, 81); + parser_->Flush(); + EXPECT_EQ(video_frame_count_, 82); +} + +TEST_F(Mp2tStreamParserTest, TimestampWrapAround) { + // "bear-1280x720_ptswraparound.ts" has been transcoded + // from bear-1280x720.mp4 by applying a time offset of 95442s + // (close to 2^33 / 90000) which results in timestamps wrap around + // in the Mpeg2 TS stream. + ParseMpeg2TsFile("bear-1280x720_ptswraparound.ts", 512); + EXPECT_EQ(video_frame_count_, 81); + EXPECT_GE(video_min_dts_, base::TimeDelta::FromSeconds(95443 - 10)); + EXPECT_LE(video_max_dts_, base::TimeDelta::FromSeconds(95443 + 10)); +} + +} // namespace mp2t +} // namespace media diff --git a/media/formats/mp2t/ts_packet.cc b/media/formats/mp2t/ts_packet.cc new file mode 100644 index 0000000000..8463c11e33 --- /dev/null +++ b/media/formats/mp2t/ts_packet.cc @@ -0,0 +1,215 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_packet.h" + +#include "base/memory/scoped_ptr.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +static const uint8 kTsHeaderSyncword = 0x47; + +// static +int TsPacket::Sync(const uint8* buf, int size) { + int k = 0; + for (; k < size; k++) { + // Verify that we have 4 syncwords in a row when possible, + // this should improve synchronization robustness. + // TODO(damienv): Consider the case where there is garbage + // between TS packets. + bool is_header = true; + for (int i = 0; i < 4; i++) { + int idx = k + i * kPacketSize; + if (idx >= size) + break; + if (buf[idx] != kTsHeaderSyncword) { + DVLOG(LOG_LEVEL_TS) + << "ByteSync" << idx << ": " + << std::hex << static_cast(buf[idx]) << std::dec; + is_header = false; + break; + } + } + if (is_header) + break; + } + + DVLOG_IF(1, k != 0) << "SYNC: nbytes_skipped=" << k; + return k; +} + +// static +TsPacket* TsPacket::Parse(const uint8* buf, int size) { + if (size < kPacketSize) { + DVLOG(1) << "Buffer does not hold one full TS packet:" + << " buffer_size=" << size; + return NULL; + } + + DCHECK_EQ(buf[0], kTsHeaderSyncword); + if (buf[0] != kTsHeaderSyncword) { + DVLOG(1) << "Not on a TS syncword:" + << " buf[0]=" + << std::hex << static_cast(buf[0]) << std::dec; + return NULL; + } + + scoped_ptr ts_packet(new TsPacket()); + bool status = ts_packet->ParseHeader(buf); + if (!status) { + DVLOG(1) << "Parsing header failed"; + return NULL; + } + return ts_packet.release(); +} + +TsPacket::TsPacket() { +} + +TsPacket::~TsPacket() { +} + +bool TsPacket::ParseHeader(const uint8* buf) { + BitReader bit_reader(buf, kPacketSize); + payload_ = buf; + payload_size_ = kPacketSize; + + // Read the TS header: 4 bytes. + int syncword; + int transport_error_indicator; + int payload_unit_start_indicator; + int transport_priority; + int transport_scrambling_control; + int adaptation_field_control; + RCHECK(bit_reader.ReadBits(8, &syncword)); + RCHECK(bit_reader.ReadBits(1, &transport_error_indicator)); + RCHECK(bit_reader.ReadBits(1, &payload_unit_start_indicator)); + RCHECK(bit_reader.ReadBits(1, &transport_priority)); + RCHECK(bit_reader.ReadBits(13, &pid_)); + RCHECK(bit_reader.ReadBits(2, &transport_scrambling_control)); + RCHECK(bit_reader.ReadBits(2, &adaptation_field_control)); + RCHECK(bit_reader.ReadBits(4, &continuity_counter_)); + payload_unit_start_indicator_ = (payload_unit_start_indicator != 0); + payload_ += 4; + payload_size_ -= 4; + + // Default values when no adaptation field. + discontinuity_indicator_ = false; + random_access_indicator_ = false; + + // Done since no adaptation field. + if ((adaptation_field_control & 0x2) == 0) + return true; + + // Read the adaptation field if needed. + int adaptation_field_length; + RCHECK(bit_reader.ReadBits(8, &adaptation_field_length)); + DVLOG(LOG_LEVEL_TS) << "adaptation_field_length=" << adaptation_field_length; + payload_ += 1; + payload_size_ -= 1; + if ((adaptation_field_control & 0x1) == 0 && + adaptation_field_length != 183) { + DVLOG(1) << "adaptation_field_length=" << adaptation_field_length; + return false; + } + if ((adaptation_field_control & 0x1) == 1 && + adaptation_field_length > 182) { + DVLOG(1) << "adaptation_field_length=" << adaptation_field_length; + // This is not allowed by the spec. + // However, some badly encoded streams are using + // adaptation_field_length = 183 + return false; + } + + // adaptation_field_length = '0' is used to insert a single stuffing byte + // in the adaptation field of a transport stream packet. + if (adaptation_field_length == 0) + return true; + + bool status = ParseAdaptationField(&bit_reader, adaptation_field_length); + payload_ += adaptation_field_length; + payload_size_ -= adaptation_field_length; + return status; +} + +bool TsPacket::ParseAdaptationField(BitReader* bit_reader, + int adaptation_field_length) { + DCHECK_GT(adaptation_field_length, 0); + int adaptation_field_start_marker = bit_reader->bits_available() / 8; + + int discontinuity_indicator; + int random_access_indicator; + int elementary_stream_priority_indicator; + int pcr_flag; + int opcr_flag; + int splicing_point_flag; + int transport_private_data_flag; + int adaptation_field_extension_flag; + RCHECK(bit_reader->ReadBits(1, &discontinuity_indicator)); + RCHECK(bit_reader->ReadBits(1, &random_access_indicator)); + RCHECK(bit_reader->ReadBits(1, &elementary_stream_priority_indicator)); + RCHECK(bit_reader->ReadBits(1, &pcr_flag)); + RCHECK(bit_reader->ReadBits(1, &opcr_flag)); + RCHECK(bit_reader->ReadBits(1, &splicing_point_flag)); + RCHECK(bit_reader->ReadBits(1, &transport_private_data_flag)); + RCHECK(bit_reader->ReadBits(1, &adaptation_field_extension_flag)); + discontinuity_indicator_ = (discontinuity_indicator != 0); + random_access_indicator_ = (random_access_indicator != 0); + + if (pcr_flag) { + int64 program_clock_reference_base; + int reserved; + int program_clock_reference_extension; + RCHECK(bit_reader->ReadBits(33, &program_clock_reference_base)); + RCHECK(bit_reader->ReadBits(6, &reserved)); + RCHECK(bit_reader->ReadBits(9, &program_clock_reference_extension)); + } + + if (opcr_flag) { + int64 original_program_clock_reference_base; + int reserved; + int original_program_clock_reference_extension; + RCHECK(bit_reader->ReadBits(33, &original_program_clock_reference_base)); + RCHECK(bit_reader->ReadBits(6, &reserved)); + RCHECK( + bit_reader->ReadBits(9, &original_program_clock_reference_extension)); + } + + if (splicing_point_flag) { + int splice_countdown; + RCHECK(bit_reader->ReadBits(8, &splice_countdown)); + } + + if (transport_private_data_flag) { + int transport_private_data_length; + RCHECK(bit_reader->ReadBits(8, &transport_private_data_length)); + RCHECK(bit_reader->SkipBits(8 * transport_private_data_length)); + } + + if (adaptation_field_extension_flag) { + int adaptation_field_extension_length; + RCHECK(bit_reader->ReadBits(8, &adaptation_field_extension_length)); + RCHECK(bit_reader->SkipBits(8 * adaptation_field_extension_length)); + } + + // The rest of the adaptation field should be stuffing bytes. + int adaptation_field_remaining_size = adaptation_field_length - + (adaptation_field_start_marker - bit_reader->bits_available() / 8); + RCHECK(adaptation_field_remaining_size >= 0); + for (int k = 0; k < adaptation_field_remaining_size; k++) { + int stuffing_byte; + RCHECK(bit_reader->ReadBits(8, &stuffing_byte)); + RCHECK(stuffing_byte == 0xff); + } + + DVLOG(LOG_LEVEL_TS) << "random_access_indicator=" << random_access_indicator_; + return true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_packet.h b/media/formats/mp2t/ts_packet.h new file mode 100644 index 0000000000..a232705fbd --- /dev/null +++ b/media/formats/mp2t/ts_packet.h @@ -0,0 +1,73 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_PACKET_H_ +#define MEDIA_FORMATS_MP2T_TS_PACKET_H_ + +#include "base/basictypes.h" + +namespace media { + +class BitReader; + +namespace mp2t { + +class TsPacket { + public: + static const int kPacketSize = 188; + + // Return the number of bytes to discard + // to be synchronized on a TS syncword. + static int Sync(const uint8* buf, int size); + + // Parse a TS packet. + // Return a TsPacket only when parsing was successful. + // Return NULL otherwise. + static TsPacket* Parse(const uint8* buf, int size); + + ~TsPacket(); + + // TS header accessors. + bool payload_unit_start_indicator() const { + return payload_unit_start_indicator_; + } + int pid() const { return pid_; } + int continuity_counter() const { return continuity_counter_; } + bool discontinuity_indicator() const { return discontinuity_indicator_; } + bool random_access_indicator() const { return random_access_indicator_; } + + // Return the offset and the size of the payload. + const uint8* payload() const { return payload_; } + int payload_size() const { return payload_size_; } + + private: + TsPacket(); + + // Parse an Mpeg2 TS header. + // The buffer size should be at least |kPacketSize| + bool ParseHeader(const uint8* buf); + bool ParseAdaptationField(BitReader* bit_reader, + int adaptation_field_length); + + // Size of the payload. + const uint8* payload_; + int payload_size_; + + // TS header. + bool payload_unit_start_indicator_; + int pid_; + int continuity_counter_; + + // Params from the adaptation field. + bool discontinuity_indicator_; + bool random_access_indicator_; + + DISALLOW_COPY_AND_ASSIGN(TsPacket); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section.h b/media/formats/mp2t/ts_section.h new file mode 100644 index 0000000000..9273733d35 --- /dev/null +++ b/media/formats/mp2t/ts_section.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_H_ + +namespace media { +namespace mp2t { + +class TsSection { + public: + // From ISO/IEC 13818-1 or ITU H.222 spec: Table 2-3 - PID table. + enum SpecialPid { + kPidPat = 0x0, + kPidCat = 0x1, + kPidTsdt = 0x2, + kPidNullPacket = 0x1fff, + kPidMax = 0x1fff, + }; + + virtual ~TsSection() {} + + // Parse the data bytes of the TS packet. + // Return true if parsing is successful. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) = 0; + + // Process bytes that have not been processed yet (pending buffers in the + // pipe). Flush might thus results in frame emission, as an example. + virtual void Flush() = 0; + + // Reset the state of the parser to its initial state. + virtual void Reset() = 0; +}; + +} // namespace mp2t +} // namespace media + +#endif diff --git a/media/formats/mp2t/ts_section_pat.cc b/media/formats/mp2t/ts_section_pat.cc new file mode 100644 index 0000000000..2fcc24bb71 --- /dev/null +++ b/media/formats/mp2t/ts_section_pat.cc @@ -0,0 +1,122 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pat.h" + +#include + +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +TsSectionPat::TsSectionPat(const RegisterPmtCb& register_pmt_cb) + : register_pmt_cb_(register_pmt_cb), + version_number_(-1) { +} + +TsSectionPat::~TsSectionPat() { +} + +bool TsSectionPat::ParsePsiSection(BitReader* bit_reader) { + // Read the fixed section length. + int table_id; + int section_syntax_indicator; + int dummy_zero; + int reserved; + int section_length; + int transport_stream_id; + int version_number; + int current_next_indicator; + int section_number; + int last_section_number; + RCHECK(bit_reader->ReadBits(8, &table_id)); + RCHECK(bit_reader->ReadBits(1, §ion_syntax_indicator)); + RCHECK(bit_reader->ReadBits(1, &dummy_zero)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(12, §ion_length)); + RCHECK(section_length >= 5); + RCHECK(section_length <= 1021); + RCHECK(bit_reader->ReadBits(16, &transport_stream_id)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(5, &version_number)); + RCHECK(bit_reader->ReadBits(1, ¤t_next_indicator)); + RCHECK(bit_reader->ReadBits(8, §ion_number)); + RCHECK(bit_reader->ReadBits(8, &last_section_number)); + section_length -= 5; + + // Perform a few verifications: + // - Table ID should be 0 for a PAT. + // - section_syntax_indicator should be one. + // - section length should not exceed 1021 + RCHECK(table_id == 0x0); + RCHECK(section_syntax_indicator); + RCHECK(!dummy_zero); + + // Both the program table and the CRC have a size multiple of 4. + // Note for pmt_pid_count: minus 4 to account for the CRC. + RCHECK((section_length % 4) == 0); + int pmt_pid_count = (section_length - 4) / 4; + + // Read the variable length section: program table & crc. + std::vector program_number_array(pmt_pid_count); + std::vector pmt_pid_array(pmt_pid_count); + for (int k = 0; k < pmt_pid_count; k++) { + int reserved; + RCHECK(bit_reader->ReadBits(16, &program_number_array[k])); + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pmt_pid_array[k])); + } + int crc32; + RCHECK(bit_reader->ReadBits(32, &crc32)); + + // Just ignore the PAT if not applicable yet. + if (!current_next_indicator) { + DVLOG(1) << "Not supported: received a PAT not applicable yet"; + return true; + } + + // Ignore the program table if it hasn't changed. + if (version_number == version_number_) + return true; + + // Both the MSE and the HLS spec specifies that TS streams should convey + // exactly one program. + if (pmt_pid_count > 1) { + DVLOG(1) << "Multiple programs detected in the Mpeg2 TS stream"; + return false; + } + + // Can now register the PMT. +#if !defined(NDEBUG) + int expected_version_number = version_number; + if (version_number_ >= 0) + expected_version_number = (version_number_ + 1) % 32; + DVLOG_IF(1, version_number != expected_version_number) + << "Unexpected version number: " + << version_number << " vs " << version_number_; +#endif + for (int k = 0; k < pmt_pid_count; k++) { + if (program_number_array[k] != 0) { + // Program numbers different from 0 correspond to PMT. + register_pmt_cb_.Run(program_number_array[k], pmt_pid_array[k]); + // Even if there are multiple programs, only one can be supported now. + // HLS: "Transport Stream segments MUST contain a single MPEG-2 Program." + break; + } + } + version_number_ = version_number; + + return true; +} + +void TsSectionPat::ResetPsiSection() { + version_number_ = -1; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pat.h b/media/formats/mp2t/ts_section_pat.h new file mode 100644 index 0000000000..f8079adc33 --- /dev/null +++ b/media/formats/mp2t/ts_section_pat.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PAT_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PAT_H_ + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "media/formats/mp2t/ts_section_psi.h" + +namespace media { +namespace mp2t { + +class TsSectionPat : public TsSectionPsi { + public: + // RegisterPmtCb::Run(int program_number, int pmt_pid); + typedef base::Callback RegisterPmtCb; + + explicit TsSectionPat(const RegisterPmtCb& register_pmt_cb); + virtual ~TsSectionPat(); + + // TsSectionPsi implementation. + virtual bool ParsePsiSection(BitReader* bit_reader) OVERRIDE; + virtual void ResetPsiSection() OVERRIDE; + + private: + RegisterPmtCb register_pmt_cb_; + + // Parameters from the PAT. + int version_number_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPat); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_pes.cc b/media/formats/mp2t/ts_section_pes.cc new file mode 100644 index 0000000000..de69a32e63 --- /dev/null +++ b/media/formats/mp2t/ts_section_pes.cc @@ -0,0 +1,312 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pes.h" + +#include "base/logging.h" +#include "base/strings/string_number_conversions.h" +#include "media/base/bit_reader.h" +#include "media/base/buffers.h" +#include "media/formats/mp2t/es_parser.h" +#include "media/formats/mp2t/mp2t_common.h" + +static const int kPesStartCode = 0x000001; + +// Given that |time| is coded using 33 bits, +// UnrollTimestamp returns the corresponding unrolled timestamp. +// The unrolled timestamp is defined by: +// |time| + k * (2 ^ 33) +// where k is estimated so that the unrolled timestamp +// is as close as possible to |previous_unrolled_time|. +static int64 UnrollTimestamp(int64 previous_unrolled_time, int64 time) { + // Mpeg2 TS timestamps have an accuracy of 33 bits. + const int nbits = 33; + + // |timestamp| has a precision of |nbits| + // so make sure the highest bits are set to 0. + DCHECK_EQ((time >> nbits), 0); + + // Consider 3 possibilities to estimate the missing high bits of |time|. + int64 previous_unrolled_time_high = + (previous_unrolled_time >> nbits); + int64 time0 = ((previous_unrolled_time_high - 1) << nbits) | time; + int64 time1 = ((previous_unrolled_time_high + 0) << nbits) | time; + int64 time2 = ((previous_unrolled_time_high + 1) << nbits) | time; + + // Select the min absolute difference with the current time + // so as to ensure time continuity. + int64 diff0 = time0 - previous_unrolled_time; + int64 diff1 = time1 - previous_unrolled_time; + int64 diff2 = time2 - previous_unrolled_time; + if (diff0 < 0) + diff0 = -diff0; + if (diff1 < 0) + diff1 = -diff1; + if (diff2 < 0) + diff2 = -diff2; + + int64 unrolled_time; + int64 min_diff; + if (diff1 < diff0) { + unrolled_time = time1; + min_diff = diff1; + } else { + unrolled_time = time0; + min_diff = diff0; + } + if (diff2 < min_diff) + unrolled_time = time2; + + return unrolled_time; +} + +static bool IsTimestampSectionValid(int64 timestamp_section) { + // |pts_section| has 40 bits: + // - starting with either '0010' or '0011' or '0001' + // - and ending with a marker bit. + // See ITU H.222 standard - PES section. + + // Verify that all the marker bits are set to one. + return ((timestamp_section & 0x1) != 0) && + ((timestamp_section & 0x10000) != 0) && + ((timestamp_section & 0x100000000) != 0); +} + +static int64 ConvertTimestampSectionToTimestamp(int64 timestamp_section) { + return (((timestamp_section >> 33) & 0x7) << 30) | + (((timestamp_section >> 17) & 0x7fff) << 15) | + (((timestamp_section >> 1) & 0x7fff) << 0); +} + +namespace media { +namespace mp2t { + +TsSectionPes::TsSectionPes(scoped_ptr es_parser) + : es_parser_(es_parser.release()), + wait_for_pusi_(true), + previous_pts_valid_(false), + previous_pts_(0), + previous_dts_valid_(false), + previous_dts_(0) { + DCHECK(es_parser_); +} + +TsSectionPes::~TsSectionPes() { +} + +bool TsSectionPes::Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) { + // Ignore partial PES. + if (wait_for_pusi_ && !payload_unit_start_indicator) + return true; + + bool parse_result = true; + if (payload_unit_start_indicator) { + // Try emitting a packet since we might have a pending PES packet + // with an undefined size. + // In this case, a unit is emitted when the next unit is coming. + int raw_pes_size; + const uint8* raw_pes; + pes_byte_queue_.Peek(&raw_pes, &raw_pes_size); + if (raw_pes_size > 0) + parse_result = Emit(true); + + // Reset the state. + ResetPesState(); + + // Update the state. + wait_for_pusi_ = false; + } + + // Add the data to the parser state. + if (size > 0) + pes_byte_queue_.Push(buf, size); + + // Try emitting the current PES packet. + return (parse_result && Emit(false)); +} + +void TsSectionPes::Flush() { + // Try emitting a packet since we might have a pending PES packet + // with an undefined size. + Emit(true); + + // Flush the underlying ES parser. + es_parser_->Flush(); +} + +void TsSectionPes::Reset() { + ResetPesState(); + + previous_pts_valid_ = false; + previous_pts_ = 0; + previous_dts_valid_ = false; + previous_dts_ = 0; + + es_parser_->Reset(); +} + +bool TsSectionPes::Emit(bool emit_for_unknown_size) { + int raw_pes_size; + const uint8* raw_pes; + pes_byte_queue_.Peek(&raw_pes, &raw_pes_size); + + // A PES should be at least 6 bytes. + // Wait for more data to come if not enough bytes. + if (raw_pes_size < 6) + return true; + + // Check whether we have enough data to start parsing. + int pes_packet_length = + (static_cast(raw_pes[4]) << 8) | + (static_cast(raw_pes[5])); + if ((pes_packet_length == 0 && !emit_for_unknown_size) || + (pes_packet_length != 0 && raw_pes_size < pes_packet_length + 6)) { + // Wait for more data to come either because: + // - there are not enough bytes, + // - or the PES size is unknown and the "force emit" flag is not set. + // (PES size might be unknown for video PES packet). + return true; + } + DVLOG(LOG_LEVEL_PES) << "pes_packet_length=" << pes_packet_length; + + // Parse the packet. + bool parse_result = ParseInternal(raw_pes, raw_pes_size); + + // Reset the state. + ResetPesState(); + + return parse_result; +} + +bool TsSectionPes::ParseInternal(const uint8* raw_pes, int raw_pes_size) { + BitReader bit_reader(raw_pes, raw_pes_size); + + // Read up to the pes_packet_length (6 bytes). + int packet_start_code_prefix; + int stream_id; + int pes_packet_length; + RCHECK(bit_reader.ReadBits(24, &packet_start_code_prefix)); + RCHECK(bit_reader.ReadBits(8, &stream_id)); + RCHECK(bit_reader.ReadBits(16, &pes_packet_length)); + + RCHECK(packet_start_code_prefix == kPesStartCode); + DVLOG(LOG_LEVEL_PES) << "stream_id=" << std::hex << stream_id << std::dec; + if (pes_packet_length == 0) + pes_packet_length = bit_reader.bits_available() / 8; + + // Ignore the PES for unknown stream IDs. + // See ITU H.222 Table 2-22 "Stream_id assignments" + bool is_audio_stream_id = ((stream_id & 0xe0) == 0xc0); + bool is_video_stream_id = ((stream_id & 0xf0) == 0xe0); + if (!is_audio_stream_id && !is_video_stream_id) + return true; + + // Read up to "pes_header_data_length". + int dummy_2; + int PES_scrambling_control; + int PES_priority; + int data_alignment_indicator; + int copyright; + int original_or_copy; + int pts_dts_flags; + int escr_flag; + int es_rate_flag; + int dsm_trick_mode_flag; + int additional_copy_info_flag; + int pes_crc_flag; + int pes_extension_flag; + int pes_header_data_length; + RCHECK(bit_reader.ReadBits(2, &dummy_2)); + RCHECK(dummy_2 == 0x2); + RCHECK(bit_reader.ReadBits(2, &PES_scrambling_control)); + RCHECK(bit_reader.ReadBits(1, &PES_priority)); + RCHECK(bit_reader.ReadBits(1, &data_alignment_indicator)); + RCHECK(bit_reader.ReadBits(1, ©right)); + RCHECK(bit_reader.ReadBits(1, &original_or_copy)); + RCHECK(bit_reader.ReadBits(2, &pts_dts_flags)); + RCHECK(bit_reader.ReadBits(1, &escr_flag)); + RCHECK(bit_reader.ReadBits(1, &es_rate_flag)); + RCHECK(bit_reader.ReadBits(1, &dsm_trick_mode_flag)); + RCHECK(bit_reader.ReadBits(1, &additional_copy_info_flag)); + RCHECK(bit_reader.ReadBits(1, &pes_crc_flag)); + RCHECK(bit_reader.ReadBits(1, &pes_extension_flag)); + RCHECK(bit_reader.ReadBits(8, &pes_header_data_length)); + int pes_header_start_size = bit_reader.bits_available() / 8; + + // Compute the size and the offset of the ES payload. + // "6" for the 6 bytes read before and including |pes_packet_length|. + // "3" for the 3 bytes read before and including |pes_header_data_length|. + int es_size = pes_packet_length - 3 - pes_header_data_length; + int es_offset = 6 + 3 + pes_header_data_length; + RCHECK(es_size >= 0); + RCHECK(es_offset + es_size <= raw_pes_size); + + // Read the timing information section. + bool is_pts_valid = false; + bool is_dts_valid = false; + int64 pts_section = 0; + int64 dts_section = 0; + if (pts_dts_flags == 0x2) { + RCHECK(bit_reader.ReadBits(40, &pts_section)); + RCHECK((((pts_section >> 36) & 0xf) == 0x2) && + IsTimestampSectionValid(pts_section)); + is_pts_valid = true; + } + if (pts_dts_flags == 0x3) { + RCHECK(bit_reader.ReadBits(40, &pts_section)); + RCHECK(bit_reader.ReadBits(40, &dts_section)); + RCHECK((((pts_section >> 36) & 0xf) == 0x3) && + IsTimestampSectionValid(pts_section)); + RCHECK((((dts_section >> 36) & 0xf) == 0x1) && + IsTimestampSectionValid(dts_section)); + is_pts_valid = true; + is_dts_valid = true; + } + + // Convert and unroll the timestamps. + base::TimeDelta media_pts(kNoTimestamp()); + base::TimeDelta media_dts(kNoTimestamp()); + if (is_pts_valid) { + int64 pts = ConvertTimestampSectionToTimestamp(pts_section); + if (previous_pts_valid_) + pts = UnrollTimestamp(previous_pts_, pts); + previous_pts_ = pts; + previous_pts_valid_ = true; + media_pts = base::TimeDelta::FromMicroseconds((1000 * pts) / 90); + } + if (is_dts_valid) { + int64 dts = ConvertTimestampSectionToTimestamp(dts_section); + if (previous_dts_valid_) + dts = UnrollTimestamp(previous_dts_, dts); + previous_dts_ = dts; + previous_dts_valid_ = true; + media_dts = base::TimeDelta::FromMicroseconds((1000 * dts) / 90); + } + + // Discard the rest of the PES packet header. + // TODO(damienv): check if some info of the PES packet header are useful. + DCHECK_EQ(bit_reader.bits_available() % 8, 0); + int pes_header_remaining_size = pes_header_data_length - + (pes_header_start_size - bit_reader.bits_available() / 8); + RCHECK(pes_header_remaining_size >= 0); + + // Read the PES packet. + DVLOG(LOG_LEVEL_PES) + << "Emit a reassembled PES:" + << " size=" << es_size + << " pts=" << media_pts.InMilliseconds() + << " dts=" << media_dts.InMilliseconds() + << " data_alignment_indicator=" << data_alignment_indicator; + return es_parser_->Parse(&raw_pes[es_offset], es_size, media_pts, media_dts); +} + +void TsSectionPes::ResetPesState() { + pes_byte_queue_.Reset(); + wait_for_pusi_ = true; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pes.h b/media/formats/mp2t/ts_section_pes.h new file mode 100644 index 0000000000..b442ae491f --- /dev/null +++ b/media/formats/mp2t/ts_section_pes.h @@ -0,0 +1,64 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PES_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PES_H_ + +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/memory/scoped_ptr.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/ts_section.h" + +namespace media { +namespace mp2t { + +class EsParser; + +class TsSectionPes : public TsSection { + public: + explicit TsSectionPes(scoped_ptr es_parser); + virtual ~TsSectionPes(); + + // TsSection implementation. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + private: + // Emit a reassembled PES packet. + // Return true if successful. + // |emit_for_unknown_size| is used to force emission for PES packets + // whose size is unknown. + bool Emit(bool emit_for_unknown_size); + + // Parse a PES packet, return true if successful. + bool ParseInternal(const uint8* raw_pes, int raw_pes_size); + + void ResetPesState(); + + // Bytes of the current PES. + ByteQueue pes_byte_queue_; + + // ES parser. + scoped_ptr es_parser_; + + // Do not start parsing before getting a unit start indicator. + bool wait_for_pusi_; + + // Used to unroll PTS and DTS. + bool previous_pts_valid_; + int64 previous_pts_; + bool previous_dts_valid_; + int64 previous_dts_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPes); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_pmt.cc b/media/formats/mp2t/ts_section_pmt.cc new file mode 100644 index 0000000000..72b492aaa4 --- /dev/null +++ b/media/formats/mp2t/ts_section_pmt.cc @@ -0,0 +1,122 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_pmt.h" + +#include + +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +namespace media { +namespace mp2t { + +TsSectionPmt::TsSectionPmt(const RegisterPesCb& register_pes_cb) + : register_pes_cb_(register_pes_cb) { +} + +TsSectionPmt::~TsSectionPmt() { +} + +bool TsSectionPmt::ParsePsiSection(BitReader* bit_reader) { + // Read up to |last_section_number|. + int table_id; + int section_syntax_indicator; + int dummy_zero; + int reserved; + int section_length; + int program_number; + int version_number; + int current_next_indicator; + int section_number; + int last_section_number; + RCHECK(bit_reader->ReadBits(8, &table_id)); + RCHECK(bit_reader->ReadBits(1, §ion_syntax_indicator)); + RCHECK(bit_reader->ReadBits(1, &dummy_zero)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(12, §ion_length)); + int section_start_marker = bit_reader->bits_available() / 8; + + RCHECK(bit_reader->ReadBits(16, &program_number)); + RCHECK(bit_reader->ReadBits(2, &reserved)); + RCHECK(bit_reader->ReadBits(5, &version_number)); + RCHECK(bit_reader->ReadBits(1, ¤t_next_indicator)); + RCHECK(bit_reader->ReadBits(8, §ion_number)); + RCHECK(bit_reader->ReadBits(8, &last_section_number)); + + // Perform a few verifications: + // - table ID should be 2 for a PMT. + // - section_syntax_indicator should be one. + // - section length should not exceed 1021. + RCHECK(table_id == 0x2); + RCHECK(section_syntax_indicator); + RCHECK(!dummy_zero); + RCHECK(section_length <= 1021); + RCHECK(section_number == 0); + RCHECK(last_section_number == 0); + + // TODO(damienv): + // Verify that there is no mismatch between the program number + // and the program number that was provided in a PAT for the current PMT. + + // Read the end of the fixed length section. + int pcr_pid; + int program_info_length; + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pcr_pid)); + RCHECK(bit_reader->ReadBits(4, &reserved)); + RCHECK(bit_reader->ReadBits(12, &program_info_length)); + RCHECK(program_info_length < 1024); + + // Read the program info descriptor. + // TODO(damienv): check wether any of the descriptors could be useful. + // Defined in section 2.6 of ISO-13818. + RCHECK(bit_reader->SkipBits(8 * program_info_length)); + + // Read the ES description table. + // The end of the PID map if 4 bytes away from the end of the section + // (4 bytes = size of the CRC). + int pid_map_end_marker = section_start_marker - section_length + 4; + std::map pid_map; + while (bit_reader->bits_available() > 8 * pid_map_end_marker) { + int stream_type; + int reserved; + int pid_es; + int es_info_length; + RCHECK(bit_reader->ReadBits(8, &stream_type)); + RCHECK(bit_reader->ReadBits(3, &reserved)); + RCHECK(bit_reader->ReadBits(13, &pid_es)); + RCHECK(bit_reader->ReadBits(4, &reserved)); + RCHECK(bit_reader->ReadBits(12, &es_info_length)); + + // Do not register the PID right away. + // Wait for the end of the section to be fully parsed + // to make sure there is no error. + pid_map.insert(std::pair(pid_es, stream_type)); + + // Read the ES info descriptors. + // TODO(damienv): check wether any of the descriptors could be useful. + // Defined in section 2.6 of ISO-13818. + RCHECK(bit_reader->SkipBits(8 * es_info_length)); + } + + // Read the CRC. + int crc32; + RCHECK(bit_reader->ReadBits(32, &crc32)); + + // Once the PMT has been proved to be correct, register the PIDs. + for (std::map::iterator it = pid_map.begin(); + it != pid_map.end(); ++it) + register_pes_cb_.Run(it->first, it->second); + + return true; +} + +void TsSectionPmt::ResetPsiSection() { +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_pmt.h b/media/formats/mp2t/ts_section_pmt.h new file mode 100644 index 0000000000..c1b3d467cc --- /dev/null +++ b/media/formats/mp2t/ts_section_pmt.h @@ -0,0 +1,40 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PMT_H_ + +#include "base/callback.h" +#include "base/compiler_specific.h" +#include "media/formats/mp2t/ts_section_psi.h" + +namespace media { +namespace mp2t { + +class TsSectionPmt : public TsSectionPsi { + public: + // RegisterPesCb::Run(int pes_pid, int stream_type); + // Stream type is defined in + // "Table 2-34 – Stream type assignments" in H.222 + // TODO(damienv): add the program number. + typedef base::Callback RegisterPesCb; + + explicit TsSectionPmt(const RegisterPesCb& register_pes_cb); + virtual ~TsSectionPmt(); + + // Mpeg2TsPsiParser implementation. + virtual bool ParsePsiSection(BitReader* bit_reader) OVERRIDE; + virtual void ResetPsiSection() OVERRIDE; + + private: + RegisterPesCb register_pes_cb_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPmt); +}; + +} // namespace mp2t +} // namespace media + +#endif + diff --git a/media/formats/mp2t/ts_section_psi.cc b/media/formats/mp2t/ts_section_psi.cc new file mode 100644 index 0000000000..f9db880537 --- /dev/null +++ b/media/formats/mp2t/ts_section_psi.cc @@ -0,0 +1,132 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "media/formats/mp2t/ts_section_psi.h" + +#include "base/basictypes.h" +#include "base/logging.h" +#include "media/base/bit_reader.h" +#include "media/formats/mp2t/mp2t_common.h" + +static bool IsCrcValid(const uint8* buf, int size) { + uint32 crc = 0xffffffffu; + const uint32 kCrcPoly = 0x4c11db7; + + for (int k = 0; k < size; k++) { + int nbits = 8; + uint32 data_msb_aligned = buf[k]; + data_msb_aligned <<= (32 - nbits); + + while (nbits > 0) { + if ((data_msb_aligned ^ crc) & 0x80000000) { + crc <<= 1; + crc ^= kCrcPoly; + } else { + crc <<= 1; + } + + data_msb_aligned <<= 1; + nbits--; + } + } + + return (crc == 0); +} + +namespace media { +namespace mp2t { + +TsSectionPsi::TsSectionPsi() + : wait_for_pusi_(true), + leading_bytes_to_discard_(0) { +} + +TsSectionPsi::~TsSectionPsi() { +} + +bool TsSectionPsi::Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) { + // Ignore partial PSI. + if (wait_for_pusi_ && !payload_unit_start_indicator) + return true; + + if (payload_unit_start_indicator) { + // Reset the state of the PSI section. + ResetPsiState(); + + // Update the state. + wait_for_pusi_ = false; + DCHECK_GE(size, 1); + int pointer_field = buf[0]; + leading_bytes_to_discard_ = pointer_field; + buf++; + size--; + } + + // Discard some leading bytes if needed. + if (leading_bytes_to_discard_ > 0) { + int nbytes_to_discard = std::min(leading_bytes_to_discard_, size); + buf += nbytes_to_discard; + size -= nbytes_to_discard; + leading_bytes_to_discard_ -= nbytes_to_discard; + } + if (size == 0) + return true; + + // Add the data to the parser state. + psi_byte_queue_.Push(buf, size); + int raw_psi_size; + const uint8* raw_psi; + psi_byte_queue_.Peek(&raw_psi, &raw_psi_size); + + // Check whether we have enough data to start parsing. + if (raw_psi_size < 3) + return true; + int section_length = + ((static_cast(raw_psi[1]) << 8) | + (static_cast(raw_psi[2]))) & 0xfff; + if (section_length >= 1021) + return false; + int psi_length = section_length + 3; + if (raw_psi_size < psi_length) { + // Don't throw an error when there is not enough data, + // just wait for more data to come. + return true; + } + + // There should not be any trailing bytes after a PMT. + // Instead, the pointer field should be used to stuff bytes. + DVLOG_IF(1, raw_psi_size > psi_length) + << "Trailing bytes after a PSI section: " + << psi_length << " vs " << raw_psi_size; + + // Verify the CRC. + RCHECK(IsCrcValid(raw_psi, psi_length)); + + // Parse the PSI section. + BitReader bit_reader(raw_psi, raw_psi_size); + bool status = ParsePsiSection(&bit_reader); + if (status) + ResetPsiState(); + + return status; +} + +void TsSectionPsi::Flush() { +} + +void TsSectionPsi::Reset() { + ResetPsiSection(); + ResetPsiState(); +} + +void TsSectionPsi::ResetPsiState() { + wait_for_pusi_ = true; + psi_byte_queue_.Reset(); + leading_bytes_to_discard_ = 0; +} + +} // namespace mp2t +} // namespace media + diff --git a/media/formats/mp2t/ts_section_psi.h b/media/formats/mp2t/ts_section_psi.h new file mode 100644 index 0000000000..1b81884854 --- /dev/null +++ b/media/formats/mp2t/ts_section_psi.h @@ -0,0 +1,54 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef MEDIA_FORMATS_MP2T_TS_SECTION_PSI_H_ +#define MEDIA_FORMATS_MP2T_TS_SECTION_PSI_H_ + +#include "base/compiler_specific.h" +#include "media/base/byte_queue.h" +#include "media/formats/mp2t/ts_section.h" + +namespace media { + +class BitReader; + +namespace mp2t { + +class TsSectionPsi : public TsSection { + public: + TsSectionPsi(); + virtual ~TsSectionPsi(); + + // TsSection implementation. + virtual bool Parse(bool payload_unit_start_indicator, + const uint8* buf, int size) OVERRIDE; + virtual void Flush() OVERRIDE; + virtual void Reset() OVERRIDE; + + // Parse the content of the PSI section. + virtual bool ParsePsiSection(BitReader* bit_reader) = 0; + + // Reset the state of the PSI section. + virtual void ResetPsiSection() = 0; + + private: + void ResetPsiState(); + + // Bytes of the current PSI. + ByteQueue psi_byte_queue_; + + // Do not start parsing before getting a unit start indicator. + bool wait_for_pusi_; + + // Number of leading bytes to discard (pointer field). + int leading_bytes_to_discard_; + + DISALLOW_COPY_AND_ASSIGN(TsSectionPsi); +}; + +} // namespace mp2t +} // namespace media + +#endif +