From 890c601dced20e8fb1815716d977af7efd766c77 Mon Sep 17 00:00:00 2001 From: Jacob Trimble Date: Fri, 5 Feb 2016 11:08:07 -0800 Subject: [PATCH] Split NALU enumeration into its own class. Removed the code for NALU splitting in the H.264 parser and moved it to its own class. Also added support for length-prefixed NALU splitting for use with the encrypting fragmenter. Change-Id: I1e91266681f1b117fb2382cf80590651efc06619 --- .../filters/avc_decoder_configuration.cc | 5 +- packager/media/filters/filters.gyp | 3 + .../h264_byte_to_unit_stream_converter.cc | 42 ++- .../h264_byte_to_unit_stream_converter.h | 4 +- packager/media/filters/h264_parser.cc | 279 +++++------------- packager/media/filters/h264_parser.h | 124 ++------ .../media/filters/h264_parser_unittest.cc | 45 +-- packager/media/filters/nalu_reader.cc | 188 ++++++++++++ packager/media/filters/nalu_reader.h | 139 +++++++++ .../media/filters/nalu_reader_unittest.cc | 145 +++++++++ packager/media/formats/mp2t/es_parser_h264.cc | 59 ++-- .../formats/mp2t/es_parser_h264_unittest.cc | 10 +- .../formats/mp4/encrypting_fragmenter.cc | 25 +- 13 files changed, 670 insertions(+), 398 deletions(-) create mode 100644 packager/media/filters/nalu_reader.cc create mode 100644 packager/media/filters/nalu_reader.h create mode 100644 packager/media/filters/nalu_reader_unittest.cc diff --git a/packager/media/filters/avc_decoder_configuration.cc b/packager/media/filters/avc_decoder_configuration.cc index 835c53a304..5412515626 100644 --- a/packager/media/filters/avc_decoder_configuration.cc +++ b/packager/media/filters/avc_decoder_configuration.cc @@ -48,8 +48,9 @@ bool AVCDecoderConfiguration::Parse(const std::vector& data) { H264Parser parser; int sps_id = 0; - RCHECK(parser.ParseSPSFromArray(reader.data() + reader.pos(), sps_length, - &sps_id) == H264Parser::kOk); + Nalu nalu; + RCHECK(nalu.InitializeFromH264(reader.data() + reader.pos(), sps_length, 0)); + RCHECK(parser.ParseSPS(nalu, &sps_id) == H264Parser::kOk); return ExtractResolutionFromSps(*parser.GetSPS(sps_id), &coded_width_, &coded_height_, &pixel_width_, &pixel_height_); diff --git a/packager/media/filters/filters.gyp b/packager/media/filters/filters.gyp index 32b28095e4..75ca804dad 100644 --- a/packager/media/filters/filters.gyp +++ b/packager/media/filters/filters.gyp @@ -25,6 +25,8 @@ 'h264_byte_to_unit_stream_converter.h', 'h264_parser.cc', 'h264_parser.h', + 'nalu_reader.cc', + 'nalu_reader.h', 'vp_codec_configuration.cc', 'vp_codec_configuration.h', 'vp8_parser.cc', @@ -47,6 +49,7 @@ 'h264_byte_to_unit_stream_converter_unittest.cc', 'h264_parser_unittest.cc', 'hevc_decoder_configuration_unittest.cc', + 'nalu_reader_unittest.cc', 'vp_codec_configuration_unittest.cc', 'vp8_parser_unittest.cc', 'vp9_parser_unittest.cc', diff --git a/packager/media/filters/h264_byte_to_unit_stream_converter.cc b/packager/media/filters/h264_byte_to_unit_stream_converter.cc index ceb24210a7..0d3be5d7e9 100644 --- a/packager/media/filters/h264_byte_to_unit_stream_converter.cc +++ b/packager/media/filters/h264_byte_to_unit_stream_converter.cc @@ -6,6 +6,8 @@ #include "packager/media/filters/h264_byte_to_unit_stream_converter.h" +#include + #include "packager/base/logging.h" #include "packager/media/base/buffer_writer.h" #include "packager/media/filters/h264_parser.h" @@ -32,58 +34,50 @@ bool H264ByteToUnitStreamConverter::ConvertByteStreamToNalUnitStream( BufferWriter output_buffer(input_frame_size + kStreamConversionOverhead); - const uint8_t* input_ptr(input_frame); - const uint8_t* input_end(input_ptr + input_frame_size); - off_t next_start_code_offset; - off_t next_start_code_size; bool first_nalu(true); - while (H264Parser::FindStartCode(input_ptr, - input_end - input_ptr, - &next_start_code_offset, - &next_start_code_size)) { + Nalu nalu; + NaluReader reader(kIsAnnexbByteStream, input_frame, input_frame_size); + while (reader.Advance(&nalu) == NaluReader::kOk) { if (first_nalu) { - if (next_start_code_offset != 0) { + if (nalu.data() != input_frame) { LOG(ERROR) << "H.264 byte stream frame did not begin with start code."; return false; } first_nalu = false; - } else { - ProcessNalu(input_ptr, next_start_code_offset, &output_buffer); } - input_ptr += next_start_code_offset + next_start_code_size; + + ProcessNalu(nalu, &output_buffer); } if (first_nalu) { LOG(ERROR) << "H.264 byte stream frame did not contain start codes."; return false; - } else { - ProcessNalu(input_ptr, input_end - input_ptr, &output_buffer); } output_buffer.SwapBuffer(output_frame); return true; } -void H264ByteToUnitStreamConverter::ProcessNalu(const uint8_t* nalu_ptr, - size_t nalu_size, +void H264ByteToUnitStreamConverter::ProcessNalu(const Nalu& nalu, BufferWriter* output_buffer) { - DCHECK(nalu_ptr); + DCHECK(nalu.data()); DCHECK(output_buffer); - if (!nalu_size) - return; // Edge case. + // Skip the start code, but keep the 1-byte NALU type. + const uint8_t* nalu_ptr = nalu.data() + nalu.header_size() - 1; + const uint64_t nalu_size = nalu.data_size() + 1; + DCHECK_LE(nalu_size, std::numeric_limits::max()); - uint8_t nalu_type = *nalu_ptr & 0x0f; - switch (nalu_type) { - case H264NALU::kSPS: + switch (nalu.type()) { + case Nalu::H264_SPS: // Grab SPS NALU. last_sps_.assign(nalu_ptr, nalu_ptr + nalu_size); return; - case H264NALU::kPPS: + case Nalu::H264_PPS: // Grab PPS NALU. last_pps_.assign(nalu_ptr, nalu_ptr + nalu_size); return; - case H264NALU::kAUD: + case Nalu::H264_AUD: // Ignore AUD NALU. return; default: diff --git a/packager/media/filters/h264_byte_to_unit_stream_converter.h b/packager/media/filters/h264_byte_to_unit_stream_converter.h index 056f9c9c09..1cd5816a79 100644 --- a/packager/media/filters/h264_byte_to_unit_stream_converter.h +++ b/packager/media/filters/h264_byte_to_unit_stream_converter.h @@ -16,6 +16,7 @@ namespace edash_packager { namespace media { class BufferWriter; +class Nalu; /// Class which converts H.264 byte streams (as specified in ISO/IEC 14496-10 /// Annex B) into H.264 NAL unit streams (as specified in ISO/IEC 14496-15). @@ -46,8 +47,7 @@ class H264ByteToUnitStreamConverter { bool GetAVCDecoderConfigurationRecord(std::vector* decoder_config); private: - void ProcessNalu(const uint8_t* nalu_ptr, - size_t nalu_size, + void ProcessNalu(const Nalu& nalu, BufferWriter* output_buffer); std::vector last_sps_; diff --git a/packager/media/filters/h264_parser.cc b/packager/media/filters/h264_parser.cc index 74afe61707..ccd59298fa 100644 --- a/packager/media/filters/h264_parser.cc +++ b/packager/media/filters/h264_parser.cc @@ -97,10 +97,6 @@ bool H264SliceHeader::IsSISlice() const { return (slice_type % 5 == kSISlice); } -H264NALU::H264NALU() { - memset(this, 0, sizeof(*this)); -} - H264SPS::H264SPS() { memset(this, 0, sizeof(*this)); } @@ -120,7 +116,7 @@ H264SEIMessage::H264SEIMessage() { #define READ_BITS_OR_RETURN(num_bits, out) \ do { \ int _out; \ - if (!br_.ReadBits(num_bits, &_out)) { \ + if (!br->ReadBits(num_bits, &_out)) { \ DVLOG(1) \ << "Error in stream: unexpected EOS while trying to read " #out; \ return kInvalidStream; \ @@ -131,7 +127,7 @@ H264SEIMessage::H264SEIMessage() { #define READ_BOOL_OR_RETURN(out) \ do { \ int _out; \ - if (!br_.ReadBits(1, &_out)) { \ + if (!br->ReadBits(1, &_out)) { \ DVLOG(1) \ << "Error in stream: unexpected EOS while trying to read " #out; \ return kInvalidStream; \ @@ -141,7 +137,7 @@ H264SEIMessage::H264SEIMessage() { #define READ_UE_OR_RETURN(out) \ do { \ - if (ReadUE(out) != kOk) { \ + if (ReadUE(br, out) != kOk) { \ DVLOG(1) << "Error in stream: invalid value while trying to read " #out; \ return kInvalidStream; \ } \ @@ -149,7 +145,7 @@ H264SEIMessage::H264SEIMessage() { #define READ_SE_OR_RETURN(out) \ do { \ - if (ReadSE(out) != kOk) { \ + if (ReadSE(br, out) != kOk) { \ DVLOG(1) << "Error in stream: invalid value while trying to read " #out; \ return kInvalidStream; \ } \ @@ -188,28 +184,13 @@ static const int kTableSarHeight[] = { COMPILE_ASSERT(arraysize(kTableSarWidth) == arraysize(kTableSarHeight), sar_tables_must_have_same_size); -H264Parser::H264Parser() { - Reset(); -} +H264Parser::H264Parser() {} H264Parser::~H264Parser() { STLDeleteValues(&active_SPSes_); STLDeleteValues(&active_PPSes_); } -void H264Parser::Reset() { - stream_ = NULL; - bytes_left_ = 0; -} - -void H264Parser::SetStream(const uint8_t* stream, off_t stream_size) { - DCHECK(stream); - DCHECK_GT(stream_size, 0); - - stream_ = stream; - bytes_left_ = stream_size; -} - const H264PPS* H264Parser::GetPPS(int pps_id) { return active_PPSes_[pps_id]; } @@ -218,87 +199,7 @@ const H264SPS* H264Parser::GetSPS(int sps_id) { return active_SPSes_[sps_id]; } -static inline bool IsStartCode(const uint8_t* data) { - return data[0] == 0x00 && data[1] == 0x00 && data[2] == 0x01; -} - -// static -bool H264Parser::FindStartCode(const uint8_t* data, - off_t data_size, - off_t* offset, - off_t* start_code_size) { - DCHECK_GE(data_size, 0); - off_t bytes_left = data_size; - - while (bytes_left >= 3) { - if (IsStartCode(data)) { - // Found three-byte start code, set pointer at its beginning. - *offset = data_size - bytes_left; - *start_code_size = 3; - - // If there is a zero byte before this start code, - // then it's actually a four-byte start code, so backtrack one byte. - if (*offset > 0 && *(data - 1) == 0x00) { - --(*offset); - ++(*start_code_size); - } - - return true; - } - - ++data; - --bytes_left; - } - - // End of data: offset is pointing to the first byte that was not considered - // as a possible start of a start code. - // Note: there is no security issue when receiving a negative |data_size| - // since in this case, |bytes_left| is equal to |data_size| and thus - // |*offset| is equal to 0 (valid offset). - *offset = data_size - bytes_left; - *start_code_size = 0; - return false; -} - -bool H264Parser::LocateNALU(off_t* nalu_size, off_t* start_code_size) { - // Find the start code of next NALU. - off_t nalu_start_off = 0; - off_t annexb_start_code_size = 0; - if (!FindStartCode(stream_, bytes_left_, - &nalu_start_off, &annexb_start_code_size)) { - DVLOG(4) << "Could not find start code, end of stream?"; - return false; - } - - // Move the stream to the beginning of the NALU (pointing at the start code). - stream_ += nalu_start_off; - bytes_left_ -= nalu_start_off; - - const uint8_t* nalu_data = stream_ + annexb_start_code_size; - off_t max_nalu_data_size = bytes_left_ - annexb_start_code_size; - if (max_nalu_data_size <= 0) { - DVLOG(3) << "End of stream"; - return false; - } - - // Find the start code of next NALU; - // if successful, |nalu_size_without_start_code| is the number of bytes from - // after previous start code to before this one; - // if next start code is not found, it is still a valid NALU since there - // are some bytes left after the first start code: all the remaining bytes - // belong to the current NALU. - off_t next_start_code_size = 0; - off_t nalu_size_without_start_code = 0; - if (!FindStartCode(nalu_data, max_nalu_data_size, - &nalu_size_without_start_code, &next_start_code_size)) { - nalu_size_without_start_code = max_nalu_data_size; - } - *nalu_size = nalu_size_without_start_code + annexb_start_code_size; - *start_code_size = annexb_start_code_size; - return true; -} - -H264Parser::Result H264Parser::ReadUE(int* val) { +H264Parser::Result H264Parser::ReadUE(H264BitReader* br, int* val) { int num_bits = -1; int bit; int rest; @@ -323,12 +224,12 @@ H264Parser::Result H264Parser::ReadUE(int* val) { return kOk; } -H264Parser::Result H264Parser::ReadSE(int* val) { +H264Parser::Result H264Parser::ReadSE(H264BitReader* br, int* val) { int ue; Result res; // See Chapter 9 in the spec. - res = ReadUE(&ue); + res = ReadUE(br, &ue); if (res != kOk) return res; @@ -340,46 +241,6 @@ H264Parser::Result H264Parser::ReadSE(int* val) { return kOk; } -H264Parser::Result H264Parser::AdvanceToNextNALU(H264NALU* nalu) { - off_t start_code_size; - off_t nalu_size_with_start_code; - if (!LocateNALU(&nalu_size_with_start_code, &start_code_size)) { - DVLOG(4) << "Could not find next NALU, bytes left in stream: " - << bytes_left_; - return kEOStream; - } - - nalu->data = stream_ + start_code_size; - nalu->size = nalu_size_with_start_code - start_code_size; - DVLOG(4) << "NALU found: size=" << nalu_size_with_start_code; - - // Initialize bit reader at the start of found NALU. - if (!br_.Initialize(nalu->data, nalu->size)) - return kEOStream; - - // Move parser state to after this NALU, so next time AdvanceToNextNALU - // is called, we will effectively be skipping it; - // other parsing functions will use the position saved - // in bit reader for parsing, so we don't have to remember it here. - stream_ += nalu_size_with_start_code; - bytes_left_ -= nalu_size_with_start_code; - - // Read NALU header, skip the forbidden_zero_bit, but check for it. - int data; - READ_BITS_OR_RETURN(1, &data); - TRUE_OR_RETURN(data == 0); - - READ_BITS_OR_RETURN(2, &nalu->nal_ref_idc); - READ_BITS_OR_RETURN(5, &nalu->nal_unit_type); - - DVLOG(4) << "NALU type: " << static_cast(nalu->nal_unit_type) - << " at: " << reinterpret_cast(nalu->data) - << " size: " << nalu->size - << " ref: " << static_cast(nalu->nal_ref_idc); - - return kOk; -} - // Default scaling lists (per spec). static const int kDefault4x4Intra[kH264ScalingList4x4Length] = { 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, 32, 32, 37, 37, 42, }; @@ -503,7 +364,8 @@ static void FallbackScalingList8x8( } } -H264Parser::Result H264Parser::ParseScalingList(int size, +H264Parser::Result H264Parser::ParseScalingList(H264BitReader* br, + int size, int* scaling_list, bool* use_default) { // See chapter 7.3.2.1.1.1. @@ -532,7 +394,8 @@ H264Parser::Result H264Parser::ParseScalingList(int size, return kOk; } -H264Parser::Result H264Parser::ParseSPSScalingLists(H264SPS* sps) { +H264Parser::Result H264Parser::ParseSPSScalingLists(H264BitReader* br, + H264SPS* sps) { // See 7.4.2.1.1. bool seq_scaling_list_present_flag; bool use_default; @@ -543,7 +406,8 @@ H264Parser::Result H264Parser::ParseSPSScalingLists(H264SPS* sps) { READ_BOOL_OR_RETURN(&seq_scaling_list_present_flag); if (seq_scaling_list_present_flag) { - res = ParseScalingList(arraysize(sps->scaling_list4x4[i]), + res = ParseScalingList(br, + arraysize(sps->scaling_list4x4[i]), sps->scaling_list4x4[i], &use_default); if (res != kOk) @@ -563,7 +427,8 @@ H264Parser::Result H264Parser::ParseSPSScalingLists(H264SPS* sps) { READ_BOOL_OR_RETURN(&seq_scaling_list_present_flag); if (seq_scaling_list_present_flag) { - res = ParseScalingList(arraysize(sps->scaling_list8x8[i]), + res = ParseScalingList(br, + arraysize(sps->scaling_list8x8[i]), sps->scaling_list8x8[i], &use_default); if (res != kOk) @@ -581,7 +446,8 @@ H264Parser::Result H264Parser::ParseSPSScalingLists(H264SPS* sps) { return kOk; } -H264Parser::Result H264Parser::ParsePPSScalingLists(const H264SPS& sps, +H264Parser::Result H264Parser::ParsePPSScalingLists(H264BitReader* br, + const H264SPS& sps, H264PPS* pps) { // See 7.4.2.2. bool pic_scaling_list_present_flag; @@ -592,7 +458,8 @@ H264Parser::Result H264Parser::ParsePPSScalingLists(const H264SPS& sps, READ_BOOL_OR_RETURN(&pic_scaling_list_present_flag); if (pic_scaling_list_present_flag) { - res = ParseScalingList(arraysize(pps->scaling_list4x4[i]), + res = ParseScalingList(br, + arraysize(pps->scaling_list4x4[i]), pps->scaling_list4x4[i], &use_default); if (res != kOk) @@ -621,7 +488,8 @@ H264Parser::Result H264Parser::ParsePPSScalingLists(const H264SPS& sps, READ_BOOL_OR_RETURN(&pic_scaling_list_present_flag); if (pic_scaling_list_present_flag) { - res = ParseScalingList(arraysize(pps->scaling_list8x8[i]), + res = ParseScalingList(br, + arraysize(pps->scaling_list8x8[i]), pps->scaling_list8x8[i], &use_default); if (res != kOk) @@ -649,7 +517,7 @@ H264Parser::Result H264Parser::ParsePPSScalingLists(const H264SPS& sps, } H264Parser::Result H264Parser::ParseAndIgnoreHRDParameters( - bool* hrd_parameters_present) { + H264BitReader* br, bool* hrd_parameters_present) { int data; READ_BOOL_OR_RETURN(&data); // {nal,vcl}_hrd_parameters_present_flag if (!data) @@ -671,7 +539,8 @@ H264Parser::Result H264Parser::ParseAndIgnoreHRDParameters( return kOk; } -H264Parser::Result H264Parser::ParseVUIParameters(H264SPS* sps) { +H264Parser::Result H264Parser::ParseVUIParameters(H264BitReader* br, + H264SPS* sps) { bool aspect_ratio_info_present_flag; READ_BOOL_OR_RETURN(&aspect_ratio_info_present_flag); if (aspect_ratio_info_present_flag) { @@ -721,12 +590,12 @@ H264Parser::Result H264Parser::ParseVUIParameters(H264SPS* sps) { // Read and ignore NAL HRD parameters, if present. bool hrd_parameters_present = false; - Result res = ParseAndIgnoreHRDParameters(&hrd_parameters_present); + Result res = ParseAndIgnoreHRDParameters(br, &hrd_parameters_present); if (res != kOk) return res; // Read and ignore VCL HRD parameters, if present. - res = ParseAndIgnoreHRDParameters(&hrd_parameters_present); + res = ParseAndIgnoreHRDParameters(br, &hrd_parameters_present); if (res != kOk) return res; @@ -761,10 +630,13 @@ static void FillDefaultSeqScalingLists(H264SPS* sps) { sps->scaling_list8x8[i][j] = 16; } -H264Parser::Result H264Parser::ParseSPS(int* sps_id) { +H264Parser::Result H264Parser::ParseSPS(const Nalu& nalu, int* sps_id) { // See 7.4.2.1. int data; Result res; + H264BitReader reader; + reader.Initialize(nalu.data() + nalu.header_size(), nalu.data_size()); + H264BitReader* br = &reader; *sps_id = -1; @@ -804,7 +676,7 @@ H264Parser::Result H264Parser::ParseSPS(int* sps_id) { if (sps->seq_scaling_matrix_present_flag) { DVLOG(4) << "Scaling matrix present"; - res = ParseSPSScalingLists(sps.get()); + res = ParseSPSScalingLists(br, sps.get()); if (res != kOk) return res; } else { @@ -870,7 +742,7 @@ H264Parser::Result H264Parser::ParseSPS(int* sps_id) { READ_BOOL_OR_RETURN(&sps->vui_parameters_present_flag); if (sps->vui_parameters_present_flag) { DVLOG(4) << "VUI parameters present"; - res = ParseVUIParameters(sps.get()); + res = ParseVUIParameters(br, sps.get()); if (res != kOk) return res; } @@ -883,10 +755,13 @@ H264Parser::Result H264Parser::ParseSPS(int* sps_id) { return kOk; } -H264Parser::Result H264Parser::ParsePPS(int* pps_id) { +H264Parser::Result H264Parser::ParsePPS(const Nalu& nalu, int* pps_id) { // See 7.4.2.2. const H264SPS* sps; Result res; + H264BitReader reader; + reader.Initialize(nalu.data() + nalu.header_size(), nalu.data_size()); + H264BitReader* br = &reader; *pps_id = -1; @@ -932,13 +807,13 @@ H264Parser::Result H264Parser::ParsePPS(int* pps_id) { READ_BOOL_OR_RETURN(&pps->constrained_intra_pred_flag); READ_BOOL_OR_RETURN(&pps->redundant_pic_cnt_present_flag); - if (br_.HasMoreRBSPData()) { + if (br->HasMoreRBSPData()) { READ_BOOL_OR_RETURN(&pps->transform_8x8_mode_flag); READ_BOOL_OR_RETURN(&pps->pic_scaling_matrix_present_flag); if (pps->pic_scaling_matrix_present_flag) { DVLOG(4) << "Picture scaling matrix present"; - res = ParsePPSScalingLists(*sps, pps.get()); + res = ParsePPSScalingLists(br, *sps, pps.get()); if (res != kOk) return res; } @@ -954,29 +829,8 @@ H264Parser::Result H264Parser::ParsePPS(int* pps_id) { return kOk; } -H264Parser::Result H264Parser::ParseSPSFromArray( - const uint8_t* sps_data, - size_t sps_data_length, - int* sps_id) { - br_.Initialize(sps_data, sps_data_length); - - int data; - READ_BITS_OR_RETURN(1, &data); - // First bit must be 0. - TRUE_OR_RETURN(data == 0); - int nal_ref_idc; - READ_BITS_OR_RETURN(2, &nal_ref_idc); - // From the spec "nal_ref_idc shall not be equal to 0 for sequence parameter - // set". - TRUE_OR_RETURN(nal_ref_idc != 0); - int nal_unit_type; - READ_BITS_OR_RETURN(5, &nal_unit_type); - TRUE_OR_RETURN(nal_unit_type == H264NALU::kSPS); - - return ParseSPS(sps_id); -} - H264Parser::Result H264Parser::ParseRefPicListModification( + H264BitReader* br, int num_ref_idx_active_minus1, H264ModificationOfPicNum* ref_list_mods) { H264ModificationOfPicNum* pic_num_mod; @@ -1020,13 +874,13 @@ H264Parser::Result H264Parser::ParseRefPicListModification( } H264Parser::Result H264Parser::ParseRefPicListModifications( - H264SliceHeader* shdr) { + H264BitReader* br, H264SliceHeader* shdr) { Result res; if (!shdr->IsISlice() && !shdr->IsSISlice()) { READ_BOOL_OR_RETURN(&shdr->ref_pic_list_modification_flag_l0); if (shdr->ref_pic_list_modification_flag_l0) { - res = ParseRefPicListModification(shdr->num_ref_idx_l0_active_minus1, + res = ParseRefPicListModification(br, shdr->num_ref_idx_l0_active_minus1, shdr->ref_list_l0_modifications); if (res != kOk) return res; @@ -1036,7 +890,7 @@ H264Parser::Result H264Parser::ParseRefPicListModifications( if (shdr->IsBSlice()) { READ_BOOL_OR_RETURN(&shdr->ref_pic_list_modification_flag_l1); if (shdr->ref_pic_list_modification_flag_l1) { - res = ParseRefPicListModification(shdr->num_ref_idx_l1_active_minus1, + res = ParseRefPicListModification(br, shdr->num_ref_idx_l1_active_minus1, shdr->ref_list_l1_modifications); if (res != kOk) return res; @@ -1047,12 +901,12 @@ H264Parser::Result H264Parser::ParseRefPicListModifications( } H264Parser::Result H264Parser::ParseWeightingFactors( + H264BitReader* br, int num_ref_idx_active_minus1, int chroma_array_type, int luma_log2_weight_denom, int chroma_log2_weight_denom, H264WeightingFactors* w_facts) { - int def_luma_weight = 1 << luma_log2_weight_denom; int def_chroma_weight = 1 << chroma_log2_weight_denom; @@ -1091,7 +945,8 @@ H264Parser::Result H264Parser::ParseWeightingFactors( return kOk; } -H264Parser::Result H264Parser::ParsePredWeightTable(const H264SPS& sps, +H264Parser::Result H264Parser::ParsePredWeightTable(H264BitReader* br, + const H264SPS& sps, H264SliceHeader* shdr) { READ_UE_OR_RETURN(&shdr->luma_log2_weight_denom); TRUE_OR_RETURN(shdr->luma_log2_weight_denom < 8); @@ -1100,7 +955,8 @@ H264Parser::Result H264Parser::ParsePredWeightTable(const H264SPS& sps, READ_UE_OR_RETURN(&shdr->chroma_log2_weight_denom); TRUE_OR_RETURN(shdr->chroma_log2_weight_denom < 8); - Result res = ParseWeightingFactors(shdr->num_ref_idx_l0_active_minus1, + Result res = ParseWeightingFactors(br, + shdr->num_ref_idx_l0_active_minus1, sps.chroma_array_type, shdr->luma_log2_weight_denom, shdr->chroma_log2_weight_denom, @@ -1109,7 +965,8 @@ H264Parser::Result H264Parser::ParsePredWeightTable(const H264SPS& sps, return res; if (shdr->IsBSlice()) { - res = ParseWeightingFactors(shdr->num_ref_idx_l1_active_minus1, + res = ParseWeightingFactors(br, + shdr->num_ref_idx_l1_active_minus1, sps.chroma_array_type, shdr->luma_log2_weight_denom, shdr->chroma_log2_weight_denom, @@ -1121,7 +978,8 @@ H264Parser::Result H264Parser::ParsePredWeightTable(const H264SPS& sps, return kOk; } -H264Parser::Result H264Parser::ParseDecRefPicMarking(H264SliceHeader* shdr) { +H264Parser::Result H264Parser::ParseDecRefPicMarking(H264BitReader* br, + H264SliceHeader* shdr) { if (shdr->idr_pic_flag) { READ_BOOL_OR_RETURN(&shdr->no_output_of_prior_pics_flag); READ_BOOL_OR_RETURN(&shdr->long_term_reference_flag); @@ -1166,19 +1024,22 @@ H264Parser::Result H264Parser::ParseDecRefPicMarking(H264SliceHeader* shdr) { return kOk; } -H264Parser::Result H264Parser::ParseSliceHeader(const H264NALU& nalu, +H264Parser::Result H264Parser::ParseSliceHeader(const Nalu& nalu, H264SliceHeader* shdr) { // See 7.4.3. const H264SPS* sps; const H264PPS* pps; Result res; + H264BitReader reader; + reader.Initialize(nalu.data() + nalu.header_size(), nalu.data_size()); + H264BitReader* br = &reader; memset(shdr, 0, sizeof(*shdr)); - shdr->idr_pic_flag = (nalu.nal_unit_type == 5); - shdr->nal_ref_idc = nalu.nal_ref_idc; - shdr->nalu_data = nalu.data; - shdr->nalu_size = nalu.size; + shdr->idr_pic_flag = (nalu.type() == 5); + shdr->nal_ref_idc = nalu.ref_idc(); + shdr->nalu_data = nalu.data() + nalu.header_size(); + shdr->nalu_size = nalu.data_size(); READ_UE_OR_RETURN(&shdr->first_mb_in_slice); READ_UE_OR_RETURN(&shdr->slice_type); @@ -1255,23 +1116,23 @@ H264Parser::Result H264Parser::ParseSliceHeader(const H264NALU& nalu, TRUE_OR_RETURN(shdr->num_ref_idx_l1_active_minus1 < 16); } - if (nalu.nal_unit_type == H264NALU::kCodedSliceExtension) { + if (nalu.type() == Nalu::H264_CodedSliceExtension) { return kUnsupportedStream; } else { - res = ParseRefPicListModifications(shdr); + res = ParseRefPicListModifications(br, shdr); if (res != kOk) return res; } if ((pps->weighted_pred_flag && (shdr->IsPSlice() || shdr->IsSPSlice())) || (pps->weighted_bipred_idc == 1 && shdr->IsBSlice())) { - res = ParsePredWeightTable(*sps, shdr); + res = ParsePredWeightTable(br, *sps, shdr); if (res != kOk) return res; } - if (nalu.nal_ref_idc != 0) { - res = ParseDecRefPicMarking(shdr); + if (nalu.ref_idc() != 0) { + res = ParseDecRefPicMarking(br, shdr); if (res != kOk) return res; } @@ -1308,14 +1169,18 @@ H264Parser::Result H264Parser::ParseSliceHeader(const H264NALU& nalu, return kUnsupportedStream; } - size_t epb = br_.NumEmulationPreventionBytesRead(); - shdr->header_bit_size = (shdr->nalu_size - epb) * 8 - br_.NumBitsLeft(); + size_t epb = br->NumEmulationPreventionBytesRead(); + shdr->header_bit_size = (shdr->nalu_size - epb) * 8 - br->NumBitsLeft(); return kOk; } -H264Parser::Result H264Parser::ParseSEI(H264SEIMessage* sei_msg) { +H264Parser::Result H264Parser::ParseSEI(const Nalu& nalu, + H264SEIMessage* sei_msg) { int byte; + H264BitReader reader; + reader.Initialize(nalu.data() + nalu.header_size(), nalu.data_size()); + H264BitReader* br = &reader; memset(sei_msg, 0, sizeof(*sei_msg)); diff --git a/packager/media/filters/h264_parser.h b/packager/media/filters/h264_parser.h index dff5f9f62c..3f954181d9 100644 --- a/packager/media/filters/h264_parser.h +++ b/packager/media/filters/h264_parser.h @@ -13,6 +13,7 @@ #include #include "packager/media/filters/h264_bit_reader.h" +#include "packager/media/filters/nalu_reader.h" namespace edash_packager { namespace media { @@ -27,33 +28,6 @@ bool ExtractResolutionFromSps(const H264SPS& sps, uint32_t* pixel_width, uint32_t* pixel_height); -// For explanations of each struct and its members, see H.264 specification -// at http://www.itu.int/rec/T-REC-H.264. -struct H264NALU { - H264NALU(); - - enum Type { - kUnspecified = 0, - kNonIDRSlice = 1, - kIDRSlice = 5, - kSEIMessage = 6, - kSPS = 7, - kPPS = 8, - kAUD = 9, - kEOSeq = 10, - kEOStream = 11, - kCodedSliceExtension = 20, - }; - - // After (without) start code; we don't own the underlying memory - // and a shallow copy should be made when copying this struct. - const uint8_t* data; - off_t size; // From after start code to start code of next NALU (or EOS). - - int nal_ref_idc; - int nal_unit_type; -}; - enum { kH264ScalingList4x4Length = 16, kH264ScalingList8x8Length = 64, @@ -276,40 +250,10 @@ class H264Parser { kEOStream, // end of stream }; - // Find offset from start of data to next NALU start code - // and size of found start code (3 or 4 bytes). - // If no start code is found, offset is pointing to the first unprocessed byte - // (i.e. the first byte that was not considered as a possible start of a start - // code) and |*start_code_size| is set to 0. - // Preconditions: - // - |data_size| >= 0 - // Postconditions: - // - |*offset| is between 0 and |data_size| included. - // It is strictly less than |data_size| if |data_size| > 0. - // - |*start_code_size| is either 0, 3 or 4. - static bool FindStartCode(const uint8_t* data, - off_t data_size, - off_t* offset, - off_t* start_code_size); - H264Parser(); ~H264Parser(); - void Reset(); - // Set current stream pointer to |stream| of |stream_size| in bytes, - // |stream| owned by caller. - void SetStream(const uint8_t* stream, off_t stream_size); - - // Read the stream to find the next NALU, identify it and return - // that information in |*nalu|. This advances the stream to the beginning - // of this NALU, but not past it, so subsequent calls to NALU-specific - // parsing functions (ParseSPS, etc.) will parse this NALU. - // If the caller wishes to skip the current NALU, it can call this function - // again, instead of any NALU-type specific parse functions below. - Result AdvanceToNextNALU(H264NALU* nalu); - // NALU-specific parsing functions. - // These should be called after AdvanceToNextNALU(). // SPSes and PPSes are owned by the parser class and the memory for their // structures is managed here, not by the caller, as they are reused @@ -319,15 +263,8 @@ class H264Parser { // of the parsed structure in |*pps_id|/|*sps_id|. // To get a pointer to a given SPS/PPS structure, use GetSPS()/GetPPS(), // passing the returned |*sps_id|/|*pps_id| as parameter. - // methods with a scoped_ptr and adding an AtEOS() function to check for EOS - // if Parse*() return NULL. - Result ParseSPS(int* sps_id); - Result ParsePPS(int* pps_id); - - // Samme as ParseSPS but instead uses |sps_data|. - Result ParseSPSFromArray(const uint8_t* sps_data, - size_t sps_data_size, - int* sps_id); + Result ParseSPS(const Nalu& nalu, int* sps_id); + Result ParsePPS(const Nalu& nalu, int* pps_id); // Return a pointer to SPS/PPS with given |sps_id|/|pps_id| or NULL if not // present. @@ -341,64 +278,57 @@ class H264Parser { // Parse a slice header, returning it in |*shdr|. |*nalu| must be set to // the NALU returned from AdvanceToNextNALU() and corresponding to |*shdr|. - Result ParseSliceHeader(const H264NALU& nalu, H264SliceHeader* shdr); + Result ParseSliceHeader(const Nalu& nalu, H264SliceHeader* shdr); // Parse a SEI message, returning it in |*sei_msg|, provided and managed // by the caller. - Result ParseSEI(H264SEIMessage* sei_msg); + Result ParseSEI(const Nalu& nalu, H264SEIMessage* sei_msg); private: - // Move the stream pointer to the beginning of the next NALU, - // i.e. pointing at the next start code. - // Return true if a NALU has been found. - // If a NALU is found: - // - its size in bytes is returned in |*nalu_size| and includes - // the start code as well as the trailing zero bits. - // - the size in bytes of the start code is returned in |*start_code_size|. - bool LocateNALU(off_t* nalu_size, off_t* start_code_size); - // Exp-Golomb code parsing as specified in chapter 9.1 of the spec. // Read one unsigned exp-Golomb code from the stream and return in |*val|. - Result ReadUE(int* val); + Result ReadUE(H264BitReader* br, int* val); // Read one signed exp-Golomb code from the stream and return in |*val|. - Result ReadSE(int* val); + Result ReadSE(H264BitReader* br, int* val); // Parse scaling lists (see spec). - Result ParseScalingList(int size, int* scaling_list, bool* use_default); - Result ParseSPSScalingLists(H264SPS* sps); - Result ParsePPSScalingLists(const H264SPS& sps, H264PPS* pps); + Result ParseScalingList(H264BitReader* br, + int size, + int* scaling_list, + bool* use_default); + Result ParseSPSScalingLists(H264BitReader* br, H264SPS* sps); + Result ParsePPSScalingLists(H264BitReader* br, + const H264SPS& sps, + H264PPS* pps); // Parse optional VUI parameters in SPS (see spec). - Result ParseVUIParameters(H264SPS* sps); + Result ParseVUIParameters(H264BitReader* br, H264SPS* sps); // Set |hrd_parameters_present| to true only if they are present. - Result ParseAndIgnoreHRDParameters(bool* hrd_parameters_present); + Result ParseAndIgnoreHRDParameters(H264BitReader* br, + bool* hrd_parameters_present); // Parse reference picture lists' modifications (see spec). - Result ParseRefPicListModifications(H264SliceHeader* shdr); - Result ParseRefPicListModification(int num_ref_idx_active_minus1, + Result ParseRefPicListModifications(H264BitReader* br, H264SliceHeader* shdr); + Result ParseRefPicListModification(H264BitReader* br, + int num_ref_idx_active_minus1, H264ModificationOfPicNum* ref_list_mods); // Parse prediction weight table (see spec). - Result ParsePredWeightTable(const H264SPS& sps, H264SliceHeader* shdr); + Result ParsePredWeightTable(H264BitReader* br, + const H264SPS& sps, + H264SliceHeader* shdr); // Parse weighting factors (see spec). - Result ParseWeightingFactors(int num_ref_idx_active_minus1, + Result ParseWeightingFactors(H264BitReader* br, + int num_ref_idx_active_minus1, int chroma_array_type, int luma_log2_weight_denom, int chroma_log2_weight_denom, H264WeightingFactors* w_facts); // Parse decoded reference picture marking information (see spec). - Result ParseDecRefPicMarking(H264SliceHeader* shdr); - - // Pointer to the current NALU in the stream. - const uint8_t* stream_; - - // Bytes left in the stream after the current NALU. - off_t bytes_left_; - - H264BitReader br_; + Result ParseDecRefPicMarking(H264BitReader* br, H264SliceHeader* shdr); // PPSes and SPSes stored for future reference. typedef std::map SPSById; diff --git a/packager/media/filters/h264_parser_unittest.cc b/packager/media/filters/h264_parser_unittest.cc index c5fb7c55ae..51abd221b4 100644 --- a/packager/media/filters/h264_parser_unittest.cc +++ b/packager/media/filters/h264_parser_unittest.cc @@ -12,6 +12,12 @@ namespace edash_packager { namespace media { +namespace { +// The test data does not include a start code, the start of the data is the +// NALU header. +const uint8_t kStartCodeSize = 0; +} + TEST(H264ParserTest, StreamFileParsing) { std::vector buffer = ReadTestDataFile("test-25fps.h264"); @@ -19,42 +25,43 @@ TEST(H264ParserTest, StreamFileParsing) { int num_nalus = 759; H264Parser parser; - parser.SetStream(vector_as_array(&buffer), buffer.size()); + NaluReader reader(kIsAnnexbByteStream, vector_as_array(&buffer), + buffer.size()); // Parse until the end of stream/unsupported stream/error in stream is found. int num_parsed_nalus = 0; while (true) { H264SliceHeader shdr; H264SEIMessage sei_msg; - H264NALU nalu; - H264Parser::Result res = parser.AdvanceToNextNALU(&nalu); - if (res == H264Parser::kEOStream) { + Nalu nalu; + NaluReader::Result res = reader.Advance(&nalu); + if (res == NaluReader::kEOStream) { DVLOG(1) << "Number of successfully parsed NALUs before EOS: " << num_parsed_nalus; ASSERT_EQ(num_nalus, num_parsed_nalus); return; } - ASSERT_EQ(res, H264Parser::kOk); + ASSERT_EQ(res, NaluReader::kOk); ++num_parsed_nalus; int id; - switch (nalu.nal_unit_type) { - case H264NALU::kIDRSlice: - case H264NALU::kNonIDRSlice: + switch (nalu.type()) { + case Nalu::H264_IDRSlice: + case Nalu::H264_NonIDRSlice: ASSERT_EQ(parser.ParseSliceHeader(nalu, &shdr), H264Parser::kOk); break; - case H264NALU::kSPS: - ASSERT_EQ(parser.ParseSPS(&id), H264Parser::kOk); + case Nalu::H264_SPS: + ASSERT_EQ(parser.ParseSPS(nalu, &id), H264Parser::kOk); break; - case H264NALU::kPPS: - ASSERT_EQ(parser.ParsePPS(&id), H264Parser::kOk); + case Nalu::H264_PPS: + ASSERT_EQ(parser.ParsePPS(nalu, &id), H264Parser::kOk); break; - case H264NALU::kSEIMessage: - ASSERT_EQ(parser.ParseSEI(&sei_msg), H264Parser::kOk); + case Nalu::H264_SEIMessage: + ASSERT_EQ(parser.ParseSEI(nalu, &sei_msg), H264Parser::kOk); break; default: @@ -73,8 +80,9 @@ TEST(H264ParserTest, ExtractResolutionFromSpsData) { H264Parser parser; int sps_id = 0; - ASSERT_EQ(H264Parser::kOk, - parser.ParseSPSFromArray(kSps, arraysize(kSps), &sps_id)); + Nalu nalu; + ASSERT_TRUE(nalu.InitializeFromH264(kSps, arraysize(kSps), kStartCodeSize)); + ASSERT_EQ(H264Parser::kOk, parser.ParseSPS(nalu, &sps_id)); uint32_t coded_width = 0; uint32_t coded_height = 0; @@ -97,8 +105,9 @@ TEST(H264ParserTest, ExtractResolutionFromSpsDataWithCropping) { H264Parser parser; int sps_id = 0; - ASSERT_EQ(H264Parser::kOk, - parser.ParseSPSFromArray(kSps, arraysize(kSps), &sps_id)); + Nalu nalu; + ASSERT_TRUE(nalu.InitializeFromH264(kSps, arraysize(kSps), kStartCodeSize)); + ASSERT_EQ(H264Parser::kOk, parser.ParseSPS(nalu, &sps_id)); uint32_t coded_width = 0; uint32_t coded_height = 0; diff --git a/packager/media/filters/nalu_reader.cc b/packager/media/filters/nalu_reader.cc new file mode 100644 index 0000000000..3d254a19bd --- /dev/null +++ b/packager/media/filters/nalu_reader.cc @@ -0,0 +1,188 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "packager/media/filters/nalu_reader.h" + +#include "packager/base/logging.h" +#include "packager/media/base/buffer_reader.h" +#include "packager/media/filters/h264_parser.h" + +namespace edash_packager { +namespace media { + +namespace { +inline bool IsStartCode(const uint8_t* data) { + return data[0] == 0x00 && data[1] == 0x00 && data[2] == 0x01; +} +} // namespace + +Nalu::Nalu() + : data_(nullptr), + data_size_(0), + header_size_(0), + ref_idc_(0), + type_(0), + is_video_slice_(false) {} + +bool Nalu::InitializeFromH264(const uint8_t* data, + uint64_t size, + uint8_t start_code_size) { + DCHECK(data); + DCHECK_GT(size, start_code_size); + uint8_t header = data[start_code_size]; + if ((header & 0x80) != 0) + return false; + + data_ = data; + header_size_ = start_code_size + 1; + data_size_ = size - start_code_size - 1; + ref_idc_ = (header >> 5) & 0x3; + type_ = header & 0x1F; + is_video_slice_ = (type_ >= Nalu::H264_NonIDRSlice && + type_ <= Nalu::H264_IDRSlice); + return true; +} + +NaluReader::NaluReader(uint8_t nal_length_size, + const uint8_t* stream, + uint64_t stream_size) + : stream_(stream), + stream_size_(stream_size), + nalu_length_size_(nal_length_size), + format_(nal_length_size == 0 ? kAnnexbByteStreamFormat + : kNalUnitStreamFormat) { + DCHECK(stream); +} +NaluReader::~NaluReader() {} + +NaluReader::Result NaluReader::Advance(Nalu* nalu) { + if (stream_size_ <= 0) + return NaluReader::kEOStream; + + uint8_t nalu_length_size_or_start_code_size; + uint64_t nalu_length_with_header; + if (format_ == kAnnexbByteStreamFormat) { + // This will move |stream_| to the start code. + if (!LocateNaluByStartCode(&nalu_length_with_header, + &nalu_length_size_or_start_code_size)) { + LOG(ERROR) << "Could not find next NALU, bytes left in stream: " + << stream_size_; + // This is actually an error. Since we always move to past the end of + // each NALU, if there is no next start code, then this is the first call + // and there are no start codes in the stream. + return NaluReader::kInvalidStream; + } + } else { + uint64_t nalu_length; + BufferReader reader(stream_, stream_size_); + if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_)) + return NaluReader::kInvalidStream; + nalu_length_size_or_start_code_size = nalu_length_size_; + + if (nalu_length + nalu_length_size_ > stream_size_) { + LOG(ERROR) << "NALU length exceeds stream size: " + << stream_size_ << " < " << nalu_length; + return NaluReader::kInvalidStream; + } + if (nalu_length == 0) { + LOG(ERROR) << "NALU size 0"; + return NaluReader::kInvalidStream; + } + nalu_length_with_header = nalu_length + nalu_length_size_; + } + + if (!nalu->InitializeFromH264(stream_, nalu_length_with_header, + nalu_length_size_or_start_code_size)) + return NaluReader::kInvalidStream; + + // Move parser state to after this NALU, so next time Advance + // is called, we will effectively be skipping it. + stream_ += nalu_length_with_header; + stream_size_ -= nalu_length_with_header; + + DVLOG(4) << "NALU type: " << static_cast(nalu->type()) + << " at: " << reinterpret_cast(nalu->data()) + << " data size: " << nalu->data_size() + << " ref: " << static_cast(nalu->ref_idc()); + + return NaluReader::kOk; +} + +// static +bool NaluReader::FindStartCode(const uint8_t* data, + uint64_t data_size, + uint64_t* offset, + uint8_t* start_code_size) { + uint64_t bytes_left = data_size; + + while (bytes_left >= 3) { + if (IsStartCode(data)) { + // Found three-byte start code, set pointer at its beginning. + *offset = data_size - bytes_left; + *start_code_size = 3; + + // If there is a zero byte before this start code, + // then it's actually a four-byte start code, so backtrack one byte. + if (*offset > 0 && *(data - 1) == 0x00) { + --(*offset); + ++(*start_code_size); + } + + return true; + } + + ++data; + --bytes_left; + } + + // End of data: offset is pointing to the first byte that was not considered + // as a possible start of a start code. + *offset = data_size - bytes_left; + *start_code_size = 0; + return false; +} + +bool NaluReader::LocateNaluByStartCode(uint64_t* nalu_size, + uint8_t* start_code_size) { + // Find the start code of next NALU. + uint64_t nalu_start_off = 0; + uint8_t annexb_start_code_size = 0; + if (!FindStartCode(stream_, stream_size_, + &nalu_start_off, &annexb_start_code_size)) { + DVLOG(4) << "Could not find start code, end of stream?"; + return false; + } + + // Move the stream to the beginning of the NALU (pointing at the start code). + stream_ += nalu_start_off; + stream_size_ -= nalu_start_off; + + const uint8_t* nalu_data = stream_ + annexb_start_code_size; + uint64_t max_nalu_data_size = stream_size_ - annexb_start_code_size; + if (max_nalu_data_size <= 0) { + DVLOG(3) << "End of stream"; + return false; + } + + // Find the start code of next NALU; + // if successful, |nalu_size_without_start_code| is the number of bytes from + // after previous start code to before this one; + // if next start code is not found, it is still a valid NALU since there + // are some bytes left after the first start code: all the remaining bytes + // belong to the current NALU. + uint64_t nalu_size_without_start_code = 0; + uint8_t next_start_code_size = 0; + if (!FindStartCode(nalu_data, max_nalu_data_size, + &nalu_size_without_start_code, &next_start_code_size)) { + nalu_size_without_start_code = max_nalu_data_size; + } + *nalu_size = nalu_size_without_start_code + annexb_start_code_size; + *start_code_size = annexb_start_code_size; + return true; +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/filters/nalu_reader.h b/packager/media/filters/nalu_reader.h new file mode 100644 index 0000000000..88b4147216 --- /dev/null +++ b/packager/media/filters/nalu_reader.h @@ -0,0 +1,139 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef MEDIA_FILTERS_NALU_READER_H_ +#define MEDIA_FILTERS_NALU_READER_H_ + +#include +#include + +#include "packager/base/compiler_specific.h" +#include "packager/base/macros.h" + +namespace edash_packager { +namespace media { + +// Used as the |nalu_length_size| argument to NaluReader to indicate to use +// AnnexB byte streams. An AnnexB byte stream starts with 3 or 4 byte start +// codes instead of a fixed size NAL unit length. +const uint8_t kIsAnnexbByteStream = 0; + +/// For explanations of each struct and its members, see H.264 specification +/// at http://www.itu.int/rec/T-REC-H.264. +class Nalu { + public: + enum H264NaluType { + H264_Unspecified = 0, + H264_NonIDRSlice = 1, + H264_IDRSlice = 5, + H264_SEIMessage = 6, + H264_SPS = 7, + H264_PPS = 8, + H264_AUD = 9, + H264_EOSeq = 10, + H264_CodedSliceExtension = 20, + }; + + Nalu(); + + bool InitializeFromH264(const uint8_t* data, + uint64_t size, + uint8_t start_code_size) WARN_UNUSED_RESULT; + + const uint8_t* data() const { return data_; } + uint64_t data_size() const { return data_size_; } + uint64_t header_size() const { return header_size_; } + + int ref_idc() const { return ref_idc_; } + int type() const { return type_; } + bool is_video_slice() const { return is_video_slice_; } + + private: + // A pointer to the NALU (i.e. points to the header). This pointer is not + // owned by this instance. + const uint8_t* data_; + uint64_t data_size_; + uint64_t header_size_; + + int ref_idc_; + int type_; + bool is_video_slice_; + + DISALLOW_COPY_AND_ASSIGN(Nalu); +}; + +/// Helper class used to read NAL units based on several formats: +/// * Annex B H.264/h.265 +/// * NAL Unit Stream +class NaluReader { + public: + enum Result { + kOk, + kInvalidStream, // error in stream + kEOStream, // end of stream + }; + + /// @param nalu_length_size should be set to 0 for AnnexB byte streams; + /// otherwise, it indicates the size of NAL unit length for the NAL + /// unit stream. + NaluReader(uint8_t nal_length_size, + const uint8_t* stream, + uint64_t stream_size); + ~NaluReader(); + + // Find offset from start of data to next NALU start code + // and size of found start code (3 or 4 bytes). + // If no start code is found, offset is pointing to the first unprocessed byte + // (i.e. the first byte that was not considered as a possible start of a start + // code) and |*start_code_size| is set to 0. + // Postconditions: + // - |*offset| is between 0 and |data_size| included. + // It is strictly less than |data_size| if |data_size| > 0. + // - |*start_code_size| is either 0, 3 or 4. + static bool FindStartCode(const uint8_t* data, + uint64_t data_size, + uint64_t* offset, + uint8_t* start_code_size); + + /// Reads a NALU from the stream into |*nalu|, if one exists, and then + /// advances to the next NALU. + /// @param nalu contains the NALU read if it exists. + /// @return kOk if a NALU is read; kEOStream if the stream is at the + /// end-of-stream; kInvalidStream on error. + Result Advance(Nalu* nalu); + + private: + enum Format { + kAnnexbByteStreamFormat, + kNalUnitStreamFormat + }; + + // Move the stream pointer to the beginning of the next NALU, + // i.e. pointing at the next start code. + // Return true if a NALU has been found. + // If a NALU is found: + // - its size in bytes is returned in |*nalu_size| and includes + // the start code as well as the trailing zero bits. + // - the size in bytes of the start code is returned in |*start_code_size|. + bool LocateNaluByStartCode(uint64_t* nalu_size, uint8_t* start_code_size); + + // Pointer to the current NALU in the stream. + const uint8_t* stream_; + // The remaining size of the stream. + uint64_t stream_size_; + // The number of bytes the prefix length is; only valid if format is + // kAnnexbByteStreamFormat. + uint8_t nalu_length_size_; + // The format of the stream. + Format format_; + + DISALLOW_COPY_AND_ASSIGN(NaluReader); +}; + +} // namespace media +} // namespace edash_packager + +#endif // MEDIA_FILTERS_NALU_READER_H_ diff --git a/packager/media/filters/nalu_reader_unittest.cc b/packager/media/filters/nalu_reader_unittest.cc new file mode 100644 index 0000000000..3430a48d87 --- /dev/null +++ b/packager/media/filters/nalu_reader_unittest.cc @@ -0,0 +1,145 @@ +// Copyright 2016 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include + +#include "packager/media/filters/nalu_reader.h" + +namespace edash_packager { +namespace media { + +TEST(NaluReaderTest, StartCodeSearch) { + const uint8_t kNaluData[] = { + 0x01, 0x00, 0x00, 0x04, 0x23, 0x56, + // First NALU + 0x00, 0x00, 0x01, 0x12, 0x34, 0x56, 0x78, + // Second NALU + 0x00, 0x00, 0x00, 0x01, 0x67, 0xbb, 0xcc, 0xdd + }; + + NaluReader reader(kIsAnnexbByteStream, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData + 6, nalu.data()); + EXPECT_EQ(3u, nalu.data_size()); + EXPECT_EQ(4u, nalu.header_size()); + EXPECT_EQ(0, nalu.ref_idc()); + EXPECT_EQ(0x12, nalu.type()); + + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData + 13, nalu.data()); + EXPECT_EQ(3u, nalu.data_size()); + EXPECT_EQ(5u, nalu.header_size()); + EXPECT_EQ(3, nalu.ref_idc()); + EXPECT_EQ(7, nalu.type()); + + EXPECT_EQ(NaluReader::kEOStream, reader.Advance(&nalu)); +} + +TEST(NaluReaderTest, OneByteNaluLength) { + const uint8_t kNaluData[] = { + // First NALU + 0x05, 0x08, 0x01, 0x02, 0x03, 0x04, + // Second NALU + 0x06, 0x67, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e + }; + + NaluReader reader(1, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData, nalu.data()); + EXPECT_EQ(4u, nalu.data_size()); + EXPECT_EQ(2u, nalu.header_size()); + EXPECT_EQ(0, nalu.ref_idc()); + EXPECT_EQ(8, nalu.type()); + + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData + 6, nalu.data()); + EXPECT_EQ(5u, nalu.data_size()); + EXPECT_EQ(2u, nalu.header_size()); + EXPECT_EQ(3, nalu.ref_idc()); + EXPECT_EQ(7, nalu.type()); + + EXPECT_EQ(NaluReader::kEOStream, reader.Advance(&nalu)); +} + +TEST(NaluReaderTest, ThreeByteNaluLength) { + const uint8_t kNaluData[] = { + // First NALU + 0x00, 0x00, 0x07, 0x08, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, + // Second NALU + 0x00, 0x00, 0x03, 0x67, 0x0a, 0x0b + }; + + NaluReader reader(3, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData, nalu.data()); + EXPECT_EQ(6u, nalu.data_size()); + EXPECT_EQ(4u, nalu.header_size()); + EXPECT_EQ(0, nalu.ref_idc()); + EXPECT_EQ(8, nalu.type()); + + ASSERT_EQ(NaluReader::kOk, reader.Advance(&nalu)); + EXPECT_EQ(kNaluData + 10, nalu.data()); + EXPECT_EQ(2u, nalu.data_size()); + EXPECT_EQ(4u, nalu.header_size()); + EXPECT_EQ(3, nalu.ref_idc()); + EXPECT_EQ(7, nalu.type()); + + EXPECT_EQ(NaluReader::kEOStream, reader.Advance(&nalu)); +} + +TEST(NaluReaderTest, ErrorForNotEnoughForNaluLength) { + const uint8_t kNaluData[] = { + // First NALU + 0x00 + }; + + NaluReader reader(3, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); +} + +TEST(NaluReaderTest, ErrorForNaluLengthExceedsRemainingData) { + const uint8_t kNaluData[] = { + // First NALU + 0xFF, 0x08, 0x00 + }; + + NaluReader reader(1, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); + + // Another test for off by one. + const uint8_t kNaluData2[] = { + // First NALU + 0x04, 0x08, 0x00, 0x00 + }; + + NaluReader reader2(1, kNaluData2, arraysize(kNaluData2)); + EXPECT_EQ(NaluReader::kInvalidStream, reader2.Advance(&nalu)); +} + +TEST(NaluReaderTest, ErrorForForbiddenBitSet) { + const uint8_t kNaluData[] = { + // First NALU + 0x03, 0x80, 0x00, 0x00 + }; + + NaluReader reader(1, kNaluData, arraysize(kNaluData)); + + Nalu nalu; + EXPECT_EQ(NaluReader::kInvalidStream, reader.Advance(&nalu)); +} + +} // namespace media +} // namespace edash_packager diff --git a/packager/media/formats/mp2t/es_parser_h264.cc b/packager/media/formats/mp2t/es_parser_h264.cc index 7a5d651799..fcb295eb36 100644 --- a/packager/media/formats/mp2t/es_parser_h264.cc +++ b/packager/media/formats/mp2t/es_parser_h264.cc @@ -118,21 +118,23 @@ bool EsParserH264::FindAUD(int64_t* stream_pos) { es_queue_->PeekAt(*stream_pos, &es, &size); // Find a start code and move the stream to the start code parser position. - off_t start_code_offset; - off_t start_code_size; - bool start_code_found = H264Parser::FindStartCode( + uint64_t start_code_offset; + uint8_t start_code_size; + bool start_code_found = NaluReader::FindStartCode( es, size, &start_code_offset, &start_code_size); *stream_pos += start_code_offset; // No H264 start code found or NALU type not available yet. - if (!start_code_found || start_code_offset + start_code_size >= size) + if (!start_code_found || + start_code_offset + start_code_size >= static_cast(size)) { return false; + } // Exit the parser loop when an AUD is found. // Note: NALU header for an AUD: - // - nal_ref_idc must be 0 - // - nal_unit_type must be H264NALU::kAUD - if (es[start_code_offset + start_code_size] == H264NALU::kAUD) + // - ref_idc must be 0 + // - type must be Nalu::H264_AUD + if (es[start_code_offset + start_code_size] == Nalu::H264_AUD) break; // The current NALU is not an AUD, skip the start code @@ -180,41 +182,40 @@ bool EsParserH264::ParseInternal() { int access_unit_size = base::checked_cast( next_access_unit_pos_ - current_access_unit_pos_); DCHECK_LE(access_unit_size, size); - h264_parser_->SetStream(es, access_unit_size); + NaluReader reader(kIsAnnexbByteStream, es, access_unit_size); while (true) { + Nalu nalu; bool is_eos = false; - H264NALU nalu; - switch (h264_parser_->AdvanceToNextNALU(&nalu)) { - case H264Parser::kOk: + switch (reader.Advance(&nalu)) { + case NaluReader::kOk: break; - case H264Parser::kInvalidStream: - case H264Parser::kUnsupportedStream: - return false; - case H264Parser::kEOStream: + case NaluReader::kEOStream: is_eos = true; break; + default: + return false; } if (is_eos) break; - switch (nalu.nal_unit_type) { - case H264NALU::kAUD: { - DVLOG(LOG_LEVEL_ES) << "NALU: AUD"; + switch (nalu.type()) { + case Nalu::H264_AUD: { + DVLOG(LOG_LEVEL_ES) << "Nalu: AUD"; break; } - case H264NALU::kSPS: { - DVLOG(LOG_LEVEL_ES) << "NALU: SPS"; + case Nalu::H264_SPS: { + DVLOG(LOG_LEVEL_ES) << "Nalu: SPS"; int sps_id; - if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk) + if (h264_parser_->ParseSPS(nalu, &sps_id) != H264Parser::kOk) return false; decoder_config_check_pending_ = true; break; } - case H264NALU::kPPS: { - DVLOG(LOG_LEVEL_ES) << "NALU: PPS"; + case Nalu::H264_PPS: { + DVLOG(LOG_LEVEL_ES) << "Nalu: PPS"; int pps_id; - if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) { + if (h264_parser_->ParsePPS(nalu, &pps_id) != H264Parser::kOk) { // Allow PPS parsing to fail if waiting for SPS. if (last_video_decoder_config_) return false; @@ -223,10 +224,10 @@ bool EsParserH264::ParseInternal() { } break; } - case H264NALU::kIDRSlice: - case H264NALU::kNonIDRSlice: { - is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice); - DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame; + case Nalu::H264_IDRSlice: + case Nalu::H264_NonIDRSlice: { + is_key_frame = (nalu.type() == Nalu::H264_IDRSlice); + DVLOG(LOG_LEVEL_ES) << "Nalu: slice IDR=" << is_key_frame; H264SliceHeader shdr; if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) { // Only accept an invalid SPS/PPS at the beginning when the stream @@ -239,7 +240,7 @@ bool EsParserH264::ParseInternal() { break; } default: { - DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type; + DVLOG(LOG_LEVEL_ES) << "Nalu: " << nalu.type(); } } } diff --git a/packager/media/formats/mp2t/es_parser_h264_unittest.cc b/packager/media/formats/mp2t/es_parser_h264_unittest.cc index ab99296581..ec80897b36 100644 --- a/packager/media/formats/mp2t/es_parser_h264_unittest.cc +++ b/packager/media/formats/mp2t/es_parser_h264_unittest.cc @@ -56,9 +56,9 @@ std::vector GetAccessUnits(const uint8_t* stream, size_t stream_size) { size_t offset = 0; while (true) { // Find the next start code. - off_t relative_offset = 0; - off_t start_code_size = 0; - bool success = H264Parser::FindStartCode( + uint64_t relative_offset = 0; + uint8_t start_code_size = 0; + bool success = NaluReader::FindStartCode( &stream[offset], stream_size - offset, &relative_offset, &start_code_size); if (!success) @@ -79,8 +79,8 @@ std::vector GetAccessUnits(const uint8_t* stream, size_t stream_size) { int nal_unit_type = stream[offset] & 0x1f; // We assume there is only one slice per access unit. - if (nal_unit_type == H264NALU::kIDRSlice || - nal_unit_type == H264NALU::kNonIDRSlice) { + if (nal_unit_type == Nalu::H264_IDRSlice || + nal_unit_type == Nalu::H264_NonIDRSlice) { start_access_unit = true; } } diff --git a/packager/media/formats/mp4/encrypting_fragmenter.cc b/packager/media/formats/mp4/encrypting_fragmenter.cc index 065b264a84..f38b3f0351 100644 --- a/packager/media/formats/mp4/encrypting_fragmenter.cc +++ b/packager/media/formats/mp4/encrypting_fragmenter.cc @@ -10,6 +10,7 @@ #include "packager/media/base/buffer_reader.h" #include "packager/media/base/key_source.h" #include "packager/media/base/media_sample.h" +#include "packager/media/filters/nalu_reader.h" #include "packager/media/filters/vp8_parser.h" #include "packager/media/filters/vp9_parser.h" #include "packager/media/formats/mp4/box_definitions.h" @@ -181,25 +182,21 @@ Status EncryptingFragmenter::EncryptSample(scoped_refptr sample) { data += frame.frame_size; } } else { - BufferReader reader(data, sample->data_size()); - while (reader.HasBytes(1)) { - uint64_t nalu_length; - if (!reader.ReadNBytesInto8(&nalu_length, nalu_length_size_)) - return Status(error::MUXER_FAILURE, "Fail to read nalu_length."); - - if (!reader.SkipBytes(nalu_length)) { - return Status(error::MUXER_FAILURE, - "Sample size does not match nalu_length."); - } + NaluReader reader(nalu_length_size_, data, sample->data_size()); + Nalu nalu; + NaluReader::Result result; + while ((result = reader.Advance(&nalu)) == NaluReader::kOk) { SubsampleEntry subsample; - subsample.clear_bytes = nalu_length_size_ + 1; - subsample.cipher_bytes = nalu_length - 1; + subsample.clear_bytes = nalu.header_size(); + subsample.cipher_bytes = nalu.data_size(); sample_encryption_entry.subsamples.push_back(subsample); - EncryptBytes(data + subsample.clear_bytes, subsample.cipher_bytes); - data += nalu_length_size_ + nalu_length; + EncryptBytes(const_cast(nalu.data() + nalu.header_size()), + subsample.cipher_bytes); } + if (result != NaluReader::kEOStream) + return Status(error::MUXER_FAILURE, "Failed to parse NAL units."); } // The length of per-sample auxiliary datum, defined in CENC ch. 7.