// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "packager/media/formats/mp2t/es_parser_h26x.h" #include #include "packager/base/logging.h" #include "packager/base/numerics/safe_conversions.h" #include "packager/media/base/media_sample.h" #include "packager/media/base/offset_byte_queue.h" #include "packager/media/base/timestamp.h" #include "packager/media/base/video_stream_info.h" #include "packager/media/codecs/h26x_byte_to_unit_stream_converter.h" #include "packager/media/formats/mp2t/mp2t_common.h" namespace shaka { namespace media { namespace mp2t { namespace { const int kStartCodeSize = 3; const int kH264NaluHeaderSize = 1; const int kH265NaluHeaderSize = 2; } // namespace EsParserH26x::EsParserH26x( Nalu::CodecType type, std::unique_ptr stream_converter, uint32_t pid, const EmitSampleCB& emit_sample_cb) : EsParser(pid), emit_sample_cb_(emit_sample_cb), type_(type), es_queue_(new media::OffsetByteQueue()), current_search_position_(0), stream_converter_(std::move(stream_converter)), pending_sample_duration_(0), waiting_for_key_frame_(true) {} EsParserH26x::~EsParserH26x() {} bool EsParserH26x::Parse(const uint8_t* buf, int size, int64_t pts, int64_t dts) { // Note: Parse is invoked each time a PES packet has been reassembled. // Unfortunately, a PES packet does not necessarily map // to an h264/h265 access unit, although the HLS recommendation is to use one // PES for each access unit (but this is just a recommendation and some // streams do not comply with this recommendation). // HLS recommendation: "In AVC video, you should have both a DTS and a // PTS in each PES header". // However, some streams do not comply with this recommendation. DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS"; if (pts != kNoTimestamp) { TimingDesc timing_desc; timing_desc.pts = pts; timing_desc.dts = (dts != kNoTimestamp) ? dts : pts; // Link the end of the byte queue with the incoming timing descriptor. timing_desc_list_.push_back( std::pair(es_queue_->tail(), timing_desc)); } // Add the incoming bytes to the ES queue. es_queue_->Push(buf, size); // We should always have entries in the vector and it should always start // with |can_start_access_unit == true|. If not, we are just starting and // should skip to the first access unit. if (access_unit_nalus_.empty()) { if (!SkipToFirstAccessUnit()) return true; } DCHECK(!access_unit_nalus_.empty()); DCHECK(access_unit_nalus_.front().nalu.can_start_access_unit()); return ParseInternal(); } void EsParserH26x::Flush() { DVLOG(1) << "EsParserH26x::Flush"; // Simulate an additional AUD to force emitting the last access unit // which is assumed to be complete at this point. if (type_ == Nalu::kH264) { const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09}; es_queue_->Push(aud, sizeof(aud)); } else { DCHECK_EQ(Nalu::kH265, type_); const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01}; es_queue_->Push(aud, sizeof(aud)); } CHECK(ParseInternal()); // Note that the end argument is exclusive. We do not want to include the // fake AUD we just added, so the argument should point to the AUD. if (access_unit_nalus_.size() > 1 && !ProcessAccessUnit(access_unit_nalus_.end() - 1)) { LOG(WARNING) << "Error processing last access unit."; } if (pending_sample_) { // Flush pending sample. DCHECK(pending_sample_duration_); pending_sample_->set_duration(pending_sample_duration_); emit_sample_cb_.Run(pid(), pending_sample_); pending_sample_ = scoped_refptr(); } } void EsParserH26x::Reset() { es_queue_.reset(new media::OffsetByteQueue()); current_search_position_ = 0; access_unit_nalus_.clear(); timing_desc_list_.clear(); pending_sample_ = scoped_refptr(); pending_sample_duration_ = 0; waiting_for_key_frame_ = true; } bool EsParserH26x::SkipToFirstAccessUnit() { DCHECK(access_unit_nalus_.empty()); while (access_unit_nalus_.empty()) { if (!SearchForNextNalu()) return false; // If we can't start an access unit, remove it and continue. DCHECK_EQ(1u, access_unit_nalus_.size()); if (!access_unit_nalus_.back().nalu.can_start_access_unit()) access_unit_nalus_.clear(); } return true; } bool EsParserH26x::SearchForNextNalu() { const uint8_t* es; int es_size; es_queue_->PeekAt(current_search_position_, &es, &es_size); // Find a start code. uint64_t start_code_offset; uint8_t start_code_size; const bool start_code_found = NaluReader::FindStartCode( es, es_size, &start_code_offset, &start_code_size); if (!start_code_found) { // We didn't find a start code, so we don't have to search this data again. if (es_size > kStartCodeSize) current_search_position_ += es_size - kStartCodeSize; return false; } // Ensure the next NAL unit is a real NAL unit. const uint8_t* nalu_ptr = es + start_code_offset + start_code_size; // This size is likely inaccurate, this is just to get the header info. const int64_t next_nalu_size = es_size - start_code_offset - start_code_size; if (next_nalu_size < (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) { // There was not enough data, wait for more. return false; } Nalu next_nalu; if (!next_nalu.Initialize(type_, nalu_ptr, next_nalu_size)) { // The next NAL unit is invalid, skip it and search again. current_search_position_ += start_code_offset + start_code_size; return SearchForNextNalu(); } current_search_position_ += start_code_offset + start_code_size; NaluInfo info; info.position = current_search_position_ - start_code_size; info.start_code_size = start_code_size; info.nalu = next_nalu; access_unit_nalus_.push_back(info); return true; } bool EsParserH26x::ProcessAccessUnit(std::deque::iterator end) { DCHECK(end < access_unit_nalus_.end()); auto begin = access_unit_nalus_.begin(); const uint8_t* es; int es_size; es_queue_->PeekAt(begin->position, &es, &es_size); DCHECK_GE(static_cast(es_size), (end->position - begin->position)); // Process the NAL units in the access unit. bool is_key_frame = false; int pps_id = -1; for (auto it = begin; it != end; ++it) { if (it->nalu.nuh_layer_id() == 0) { // Update the NALU because the data pointer may have been invalidated. CHECK(it->nalu.Initialize( type_, es + (it->position - begin->position) + it->start_code_size, ((it+1)->position - it->position) - it->start_code_size)); if (!ProcessNalu(it->nalu, &is_key_frame, &pps_id)) return false; } } if (is_key_frame) waiting_for_key_frame_ = false; if (!waiting_for_key_frame_) { const uint64_t access_unit_size = end->position - begin->position; RCHECK(EmitFrame(begin->position, access_unit_size, is_key_frame, pps_id)); } return true; } bool EsParserH26x::ParseInternal() { while (true) { if (!SearchForNextNalu()) return true; // ITU H.264 sec. 7.4.1.2.3 // H264: The first of the NAL units with |can_start_access_unit() == true| // after the last VCL NAL unit of a primary coded picture specifies the // start of a new access unit. |nuh_layer_id()| is for H265 only; it is // included below for ease of computation (the value is always 0). // ITU H.265 sec. 7.4.2.4.4 // H265: The first of the NAL units with |can_start_access_unit() == true| // after the last VCL NAL unit preceding firstBlPicNalUnit (the first // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if // any, specifies the start of a new access unit. DCHECK(!access_unit_nalus_.empty()); if (!access_unit_nalus_.back().nalu.is_video_slice() || access_unit_nalus_.back().nalu.nuh_layer_id() != 0) { continue; } // First, find the end of the access unit. Search backward to find the // first VCL NALU before the current one. auto access_unit_end_rit = access_unit_nalus_.rbegin(); bool found_vcl = false; for (auto rit = access_unit_nalus_.rbegin() + 1; rit != access_unit_nalus_.rend(); ++rit) { if (rit->nalu.is_video_slice()) { found_vcl = true; break; } else if (rit->nalu.can_start_access_unit()) { // The start of the next access unit is the first unit with // |can_start_access_unit| after the previous VCL unit. access_unit_end_rit = rit; } } if (!found_vcl) return true; // Get a forward iterator that corresponds to the same element pointed by // |access_unit_end_rit|. Note: |end| refers to the exclusive end and // will point to a valid object. auto end = (access_unit_end_rit + 1).base(); if (!ProcessAccessUnit(end)) return false; // Delete the data we have already processed. es_queue_->Trim(end->position); access_unit_nalus_.erase(access_unit_nalus_.begin(), end); } } bool EsParserH26x::EmitFrame(int64_t access_unit_pos, int access_unit_size, bool is_key_frame, int pps_id) { // Get the access unit timing info. TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp}; while (!timing_desc_list_.empty() && timing_desc_list_.front().first <= access_unit_pos) { current_timing_desc = timing_desc_list_.front().second; timing_desc_list_.pop_front(); } if (current_timing_desc.pts == kNoTimestamp) return false; // Emit a frame. DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos << " size=" << access_unit_size; int es_size; const uint8_t* es; es_queue_->PeekAt(access_unit_pos, &es, &es_size); // Convert frame to unit stream format. std::vector converted_frame; if (!stream_converter_->ConvertByteStreamToNalUnitStream( es, access_unit_size, &converted_frame)) { DLOG(ERROR) << "Failure to convert video frame to unit stream format."; return false; } // Update the video decoder configuration if needed. RCHECK(UpdateVideoDecoderConfig(pps_id)); // Create the media sample, emitting always the previous sample after // calculating its duration. scoped_refptr media_sample = MediaSample::CopyFrom( converted_frame.data(), converted_frame.size(), is_key_frame); media_sample->set_dts(current_timing_desc.dts); media_sample->set_pts(current_timing_desc.pts); if (pending_sample_) { DCHECK_GT(media_sample->dts(), pending_sample_->dts()); pending_sample_duration_ = media_sample->dts() - pending_sample_->dts(); pending_sample_->set_duration(pending_sample_duration_); emit_sample_cb_.Run(pid(), pending_sample_); } pending_sample_ = media_sample; return true; } } // namespace mp2t } // namespace media } // namespace shaka