Shaka Packager SDK
es_parser_h26x.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp2t/es_parser_h26x.h"
6 
7 #include <stdint.h>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/numerics/safe_conversions.h"
11 #include "packager/media/base/media_sample.h"
12 #include "packager/media/base/offset_byte_queue.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/base/video_stream_info.h"
15 #include "packager/media/codecs/h26x_byte_to_unit_stream_converter.h"
16 #include "packager/media/formats/mp2t/mp2t_common.h"
17 
18 namespace shaka {
19 namespace media {
20 namespace mp2t {
21 
22 namespace {
23 
24 const int kStartCodeSize = 3;
25 const int kH264NaluHeaderSize = 1;
26 const int kH265NaluHeaderSize = 2;
27 
28 } // namespace
29 
30 EsParserH26x::EsParserH26x(
31  Nalu::CodecType type,
32  std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter,
33  uint32_t pid,
34  const EmitSampleCB& emit_sample_cb)
35  : EsParser(pid),
36  emit_sample_cb_(emit_sample_cb),
37  type_(type),
38  es_queue_(new media::OffsetByteQueue()),
39  stream_converter_(std::move(stream_converter)) {}
40 
41 EsParserH26x::~EsParserH26x() {}
42 
43 bool EsParserH26x::Parse(const uint8_t* buf,
44  int size,
45  int64_t pts,
46  int64_t dts) {
47  // Note: Parse is invoked each time a PES packet has been reassembled.
48  // Unfortunately, a PES packet does not necessarily map
49  // to an h264/h265 access unit, although the HLS recommendation is to use one
50  // PES for each access unit (but this is just a recommendation and some
51  // streams do not comply with this recommendation).
52 
53  // HLS recommendation: "In AVC video, you should have both a DTS and a
54  // PTS in each PES header".
55  // However, some streams do not comply with this recommendation.
56  DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS";
57  if (pts != kNoTimestamp) {
58  TimingDesc timing_desc;
59  timing_desc.pts = pts;
60  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
61 
62  // Link the end of the byte queue with the incoming timing descriptor.
63  timing_desc_list_.push_back(
64  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
65  }
66 
67  // Add the incoming bytes to the ES queue.
68  es_queue_->Push(buf, size);
69  return ParseInternal();
70 }
71 
72 void EsParserH26x::Flush() {
73  DVLOG(1) << "EsParserH26x::Flush";
74 
75  // Simulate two additional AUDs to force emitting the last access unit
76  // which is assumed to be complete at this point.
77  // Two AUDs are needed because the exact size of a NAL unit can only be
78  // determined after seeing the next NAL unit, so we need a second AUD to
79  // finish the parsing of the first AUD.
80  if (type_ == Nalu::kH264) {
81  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
82  es_queue_->Push(aud, sizeof(aud));
83  } else {
84  DCHECK_EQ(Nalu::kH265, type_);
85  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
86  0x00, 0x00, 0x01, 0x46, 0x01};
87  es_queue_->Push(aud, sizeof(aud));
88  }
89 
90  CHECK(ParseInternal());
91 
92  if (pending_sample_) {
93  // Flush pending sample.
94  DCHECK(pending_sample_duration_);
95  pending_sample_->set_duration(pending_sample_duration_);
96  emit_sample_cb_.Run(pid(), pending_sample_);
97  pending_sample_ = std::shared_ptr<MediaSample>();
98  }
99 }
100 
101 void EsParserH26x::Reset() {
102  es_queue_.reset(new media::OffsetByteQueue());
103  current_search_position_ = 0;
104  current_access_unit_position_ = 0;
105  current_video_slice_info_.valid = false;
106  next_access_unit_position_set_ = false;
107  next_access_unit_position_ = 0;
108  current_nalu_info_.reset();
109  timing_desc_list_.clear();
110  pending_sample_ = std::shared_ptr<MediaSample>();
111  pending_sample_duration_ = 0;
112  waiting_for_key_frame_ = true;
113 }
114 
115 bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
116  const uint8_t* es;
117  int es_size;
118  es_queue_->PeekAt(current_search_position_, &es, &es_size);
119 
120  // Find a start code.
121  uint64_t start_code_offset;
122  uint8_t start_code_size;
123  const bool start_code_found = NaluReader::FindStartCode(
124  es, es_size, &start_code_offset, &start_code_size);
125 
126  if (!start_code_found) {
127  // We didn't find a start code, so we don't have to search this data again.
128  if (es_size > kStartCodeSize)
129  current_search_position_ += es_size - kStartCodeSize;
130  return false;
131  }
132 
133  // Ensure the next NAL unit is a real NAL unit.
134  const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
135  // This size is likely inaccurate, this is just to get the header info.
136  const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
137  if (next_nalu_size <
138  (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
139  // There was not enough data, wait for more.
140  return false;
141  }
142 
143  // Update search position for next nalu.
144  current_search_position_ += start_code_offset + start_code_size;
145 
146  // |next_nalu_info_| is made global intentionally to avoid repetitive memory
147  // allocation which could create memory fragments.
148  if (!next_nalu_info_)
149  next_nalu_info_.reset(new NaluInfo);
150  if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
151  // This NAL unit is invalid, skip it and search again.
152  return SearchForNalu(position, nalu);
153  }
154  next_nalu_info_->position = current_search_position_ - start_code_size;
155  next_nalu_info_->start_code_size = start_code_size;
156 
157  const bool current_nalu_set = current_nalu_info_ ? true : false;
158  if (current_nalu_info_) {
159  // Starting position for the nalu including start code.
160  *position = current_nalu_info_->position;
161  // Update the NALU because the data pointer may have been invalidated.
162  const uint8_t* current_nalu_ptr =
163  next_nalu_ptr +
164  (current_nalu_info_->position + current_nalu_info_->start_code_size) -
165  current_search_position_;
166  const uint64_t current_nalu_size = next_nalu_info_->position -
167  current_nalu_info_->position -
168  current_nalu_info_->start_code_size;
169  CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
170  }
171  current_nalu_info_.swap(next_nalu_info_);
172  return current_nalu_set ? true : SearchForNalu(position, nalu);
173 }
174 
175 bool EsParserH26x::ParseInternal() {
176  uint64_t position;
177  Nalu nalu;
178  VideoSliceInfo video_slice_info;
179  while (SearchForNalu(&position, &nalu)) {
180  // ITU H.264 sec. 7.4.1.2.3
181  // H264: The first of the NAL units with |can_start_access_unit() == true|
182  // after the last VCL NAL unit of a primary coded picture specifies the
183  // start of a new access unit.
184  // ITU H.265 sec. 7.4.2.4.4
185  // H265: The first of the NAL units with |can_start_access_unit() == true|
186  // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
187  // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
188  // any, specifies the start of a new access unit.
189  if (nalu.can_start_access_unit()) {
190  if (!next_access_unit_position_set_) {
191  next_access_unit_position_set_ = true;
192  next_access_unit_position_ = position;
193  }
194  RCHECK(ProcessNalu(nalu, &video_slice_info));
195  if (nalu.is_vcl() && !video_slice_info.valid) {
196  // This could happen only if decoder config is not available yet. Drop
197  // this frame.
198  DCHECK(!current_video_slice_info_.valid);
199  next_access_unit_position_set_ = false;
200  continue;
201  }
202  } else if (nalu.is_vcl()) {
203  // This isn't the first VCL NAL unit. Next access unit should start after
204  // this NAL unit.
205  next_access_unit_position_set_ = false;
206  continue;
207  }
208 
209  // AUD shall be the first NAL unit if present. There shall be at most one
210  // AUD in any access unit. We can emit the current access unit which shall
211  // not contain the AUD.
212  if (nalu.is_aud())
213  return EmitCurrentAccessUnit();
214 
215  // We can only determine if the current access unit ends after seeing
216  // another VCL NAL unit.
217  if (!video_slice_info.valid)
218  continue;
219 
220  // Check if it is the first VCL NAL unit of a primary coded picture. It is
221  // always true for H265 as nuh_layer_id shall be == 0 at this point.
222  bool is_first_vcl_nalu = true;
223  if (type_ == Nalu::kH264) {
224  if (current_video_slice_info_.valid) {
225  // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
226  // primary coded picture. Only pps_id and frame_num are checked here.
227  is_first_vcl_nalu =
228  video_slice_info.frame_num != current_video_slice_info_.frame_num ||
229  video_slice_info.pps_id != current_video_slice_info_.pps_id;
230  }
231  }
232  if (!is_first_vcl_nalu) {
233  // This isn't the first VCL NAL unit. Next access unit should start after
234  // this NAL unit.
235  next_access_unit_position_set_ = false;
236  continue;
237  }
238 
239  DCHECK(next_access_unit_position_set_);
240  RCHECK(EmitCurrentAccessUnit());
241 
242  // Delete the data we have already processed.
243  es_queue_->Trim(next_access_unit_position_);
244 
245  current_access_unit_position_ = next_access_unit_position_;
246  current_video_slice_info_ = video_slice_info;
247  next_access_unit_position_set_ = false;
248  }
249  return true;
250 }
251 
252 bool EsParserH26x::EmitCurrentAccessUnit() {
253  if (current_video_slice_info_.valid) {
254  if (current_video_slice_info_.is_key_frame)
255  waiting_for_key_frame_ = false;
256  if (!waiting_for_key_frame_) {
257  RCHECK(
258  EmitFrame(current_access_unit_position_,
259  next_access_unit_position_ - current_access_unit_position_,
260  current_video_slice_info_.is_key_frame,
261  current_video_slice_info_.pps_id));
262  }
263  current_video_slice_info_.valid = false;
264  }
265  return true;
266 }
267 
268 bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
269  int access_unit_size,
270  bool is_key_frame,
271  int pps_id) {
272  // Get the access unit timing info.
273  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
274  while (!timing_desc_list_.empty() &&
275  timing_desc_list_.front().first <= access_unit_pos) {
276  current_timing_desc = timing_desc_list_.front().second;
277  timing_desc_list_.pop_front();
278  }
279  if (current_timing_desc.pts == kNoTimestamp)
280  return false;
281 
282  // Emit a frame.
283  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
284  << " size=" << access_unit_size;
285  int es_size;
286  const uint8_t* es;
287  es_queue_->PeekAt(access_unit_pos, &es, &es_size);
288 
289  // Convert frame to unit stream format.
290  std::vector<uint8_t> converted_frame;
291  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
292  es, access_unit_size, &converted_frame)) {
293  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
294  return false;
295  }
296 
297  // Update the video decoder configuration if needed.
298  RCHECK(UpdateVideoDecoderConfig(pps_id));
299 
300  // Create the media sample, emitting always the previous sample after
301  // calculating its duration.
302  std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
303  converted_frame.data(), converted_frame.size(), is_key_frame);
304  media_sample->set_dts(current_timing_desc.dts);
305  media_sample->set_pts(current_timing_desc.pts);
306  if (pending_sample_) {
307  DCHECK_GT(media_sample->dts(), pending_sample_->dts());
308  pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
309  pending_sample_->set_duration(pending_sample_duration_);
310  emit_sample_cb_.Run(pid(), pending_sample_);
311  }
312  pending_sample_ = media_sample;
313 
314  return true;
315 }
316 
317 } // namespace mp2t
318 } // namespace media
319 } // namespace shaka
STL namespace.
All the methods that are virtual are virtual for mocking.
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:42