Shaka Packager SDK
es_parser_h26x.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp2t/es_parser_h26x.h"
6 
7 #include <stdint.h>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/numerics/safe_conversions.h"
11 #include "packager/media/base/media_sample.h"
12 #include "packager/media/base/offset_byte_queue.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/base/video_stream_info.h"
15 #include "packager/media/codecs/h26x_byte_to_unit_stream_converter.h"
16 #include "packager/media/formats/mp2t/mp2t_common.h"
17 
18 namespace shaka {
19 namespace media {
20 namespace mp2t {
21 
22 namespace {
23 
24 const int kStartCodeSize = 3;
25 const int kH264NaluHeaderSize = 1;
26 const int kH265NaluHeaderSize = 2;
27 
28 } // namespace
29 
30 EsParserH26x::EsParserH26x(
31  Nalu::CodecType type,
32  std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter,
33  uint32_t pid,
34  const EmitSampleCB& emit_sample_cb)
35  : EsParser(pid),
36  emit_sample_cb_(emit_sample_cb),
37  type_(type),
38  es_queue_(new media::OffsetByteQueue()),
39  stream_converter_(std::move(stream_converter)) {}
40 
41 EsParserH26x::~EsParserH26x() {}
42 
43 bool EsParserH26x::Parse(const uint8_t* buf,
44  int size,
45  int64_t pts,
46  int64_t dts) {
47  // Note: Parse is invoked each time a PES packet has been reassembled.
48  // Unfortunately, a PES packet does not necessarily map
49  // to an h264/h265 access unit, although the HLS recommendation is to use one
50  // PES for each access unit (but this is just a recommendation and some
51  // streams do not comply with this recommendation).
52 
53  // HLS recommendation: "In AVC video, you should have both a DTS and a
54  // PTS in each PES header".
55  // However, some streams do not comply with this recommendation.
56  DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS";
57  if (pts != kNoTimestamp) {
58  TimingDesc timing_desc;
59  timing_desc.pts = pts;
60  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
61 
62  // Link the end of the byte queue with the incoming timing descriptor.
63  timing_desc_list_.push_back(
64  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
65 
66  // Warns if there are a large number of cached timestamps, which should be 1
67  // or 2 if everythings works as expected.
68  const size_t kWarningSize =
69  24; // An arbitrary number (it is 1 second for a fps of 24).
70  LOG_IF(WARNING, timing_desc_list_.size() >= kWarningSize)
71  << "Unusually large number of cached timestamps ("
72  << timing_desc_list_.size() << ").";
73  }
74 
75  // Add the incoming bytes to the ES queue.
76  es_queue_->Push(buf, size);
77  return ParseInternal();
78 }
79 
80 bool EsParserH26x::Flush() {
81  DVLOG(1) << "EsParserH26x::Flush";
82 
83  // Simulate two additional AUDs to force emitting the last access unit
84  // which is assumed to be complete at this point.
85  // Two AUDs are needed because the exact size of a NAL unit can only be
86  // determined after seeing the next NAL unit, so we need a second AUD to
87  // finish the parsing of the first AUD.
88  if (type_ == Nalu::kH264) {
89  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
90  es_queue_->Push(aud, sizeof(aud));
91  } else {
92  DCHECK_EQ(Nalu::kH265, type_);
93  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
94  0x00, 0x00, 0x01, 0x46, 0x01};
95  es_queue_->Push(aud, sizeof(aud));
96  }
97 
98  RCHECK(ParseInternal());
99 
100  if (pending_sample_) {
101  // Flush pending sample.
102  DCHECK(pending_sample_duration_);
103  pending_sample_->set_duration(pending_sample_duration_);
104  emit_sample_cb_.Run(std::move(pending_sample_));
105  }
106  return true;
107 }
108 
109 void EsParserH26x::Reset() {
110  es_queue_.reset(new media::OffsetByteQueue());
111  current_search_position_ = 0;
112  current_access_unit_position_ = 0;
113  current_video_slice_info_.valid = false;
114  next_access_unit_position_set_ = false;
115  next_access_unit_position_ = 0;
116  current_nalu_info_.reset();
117  timing_desc_list_.clear();
118  pending_sample_ = std::shared_ptr<MediaSample>();
119  pending_sample_duration_ = 0;
120  waiting_for_key_frame_ = true;
121 }
122 
123 bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
124  const uint8_t* es;
125  int es_size;
126  es_queue_->PeekAt(current_search_position_, &es, &es_size);
127 
128  // Find a start code.
129  uint64_t start_code_offset;
130  uint8_t start_code_size;
131  const bool start_code_found = NaluReader::FindStartCode(
132  es, es_size, &start_code_offset, &start_code_size);
133 
134  if (!start_code_found) {
135  // We didn't find a start code, so we don't have to search this data again.
136  if (es_size > kStartCodeSize)
137  current_search_position_ += es_size - kStartCodeSize;
138  return false;
139  }
140 
141  // Ensure the next NAL unit is a real NAL unit.
142  const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
143  // This size is likely inaccurate, this is just to get the header info.
144  const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
145  if (next_nalu_size <
146  (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
147  // There was not enough data, wait for more.
148  return false;
149  }
150 
151  // Update search position for next nalu.
152  current_search_position_ += start_code_offset + start_code_size;
153 
154  // |next_nalu_info_| is made global intentionally to avoid repetitive memory
155  // allocation which could create memory fragments.
156  if (!next_nalu_info_)
157  next_nalu_info_.reset(new NaluInfo);
158  if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
159  // This NAL unit is invalid, skip it and search again.
160  return SearchForNalu(position, nalu);
161  }
162  next_nalu_info_->position = current_search_position_ - start_code_size;
163  next_nalu_info_->start_code_size = start_code_size;
164 
165  const bool current_nalu_set = current_nalu_info_ ? true : false;
166  if (current_nalu_info_) {
167  // Starting position for the nalu including start code.
168  *position = current_nalu_info_->position;
169  // Update the NALU because the data pointer may have been invalidated.
170  const uint8_t* current_nalu_ptr =
171  next_nalu_ptr +
172  (current_nalu_info_->position + current_nalu_info_->start_code_size) -
173  current_search_position_;
174  const uint64_t current_nalu_size = next_nalu_info_->position -
175  current_nalu_info_->position -
176  current_nalu_info_->start_code_size;
177  CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
178  }
179  current_nalu_info_.swap(next_nalu_info_);
180  return current_nalu_set ? true : SearchForNalu(position, nalu);
181 }
182 
183 bool EsParserH26x::ParseInternal() {
184  uint64_t position;
185  Nalu nalu;
186  VideoSliceInfo video_slice_info;
187  while (SearchForNalu(&position, &nalu)) {
188  // ITU H.264 sec. 7.4.1.2.3
189  // H264: The first of the NAL units with |can_start_access_unit() == true|
190  // after the last VCL NAL unit of a primary coded picture specifies the
191  // start of a new access unit.
192  // ITU H.265 sec. 7.4.2.4.4
193  // H265: The first of the NAL units with |can_start_access_unit() == true|
194  // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
195  // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
196  // any, specifies the start of a new access unit.
197  if (nalu.can_start_access_unit()) {
198  if (!next_access_unit_position_set_) {
199  next_access_unit_position_set_ = true;
200  next_access_unit_position_ = position;
201  }
202  RCHECK(ProcessNalu(nalu, &video_slice_info));
203  if (nalu.is_vcl() && !video_slice_info.valid) {
204  // This could happen only if decoder config is not available yet. Drop
205  // this frame.
206  DCHECK(!current_video_slice_info_.valid);
207  next_access_unit_position_set_ = false;
208  continue;
209  }
210  } else if (nalu.is_vcl()) {
211  // This isn't the first VCL NAL unit. Next access unit should start after
212  // this NAL unit.
213  next_access_unit_position_set_ = false;
214  continue;
215  }
216 
217  // AUD shall be the first NAL unit if present. There shall be at most one
218  // AUD in any access unit. We can emit the current access unit which shall
219  // not contain the AUD.
220  if (nalu.is_aud()) {
221  RCHECK(EmitCurrentAccessUnit());
222  continue;
223  }
224 
225  // We can only determine if the current access unit ends after seeing
226  // another VCL NAL unit.
227  if (!video_slice_info.valid)
228  continue;
229 
230  // Check if it is the first VCL NAL unit of a primary coded picture. It is
231  // always true for H265 as nuh_layer_id shall be == 0 at this point.
232  bool is_first_vcl_nalu = true;
233  if (type_ == Nalu::kH264) {
234  if (current_video_slice_info_.valid) {
235  // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
236  // primary coded picture. Only pps_id and frame_num are checked here.
237  is_first_vcl_nalu =
238  video_slice_info.frame_num != current_video_slice_info_.frame_num ||
239  video_slice_info.pps_id != current_video_slice_info_.pps_id;
240  }
241  }
242  if (!is_first_vcl_nalu) {
243  // This isn't the first VCL NAL unit. Next access unit should start after
244  // this NAL unit.
245  next_access_unit_position_set_ = false;
246  continue;
247  }
248 
249  DCHECK(next_access_unit_position_set_);
250  RCHECK(EmitCurrentAccessUnit());
251 
252  // Delete the data we have already processed.
253  es_queue_->Trim(next_access_unit_position_);
254 
255  current_access_unit_position_ = next_access_unit_position_;
256  current_video_slice_info_ = video_slice_info;
257  next_access_unit_position_set_ = false;
258  }
259  return true;
260 }
261 
262 bool EsParserH26x::EmitCurrentAccessUnit() {
263  if (current_video_slice_info_.valid) {
264  if (current_video_slice_info_.is_key_frame)
265  waiting_for_key_frame_ = false;
266  if (!waiting_for_key_frame_) {
267  RCHECK(
268  EmitFrame(current_access_unit_position_,
269  next_access_unit_position_ - current_access_unit_position_,
270  current_video_slice_info_.is_key_frame,
271  current_video_slice_info_.pps_id));
272  }
273  current_video_slice_info_.valid = false;
274  }
275  return true;
276 }
277 
278 bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
279  int access_unit_size,
280  bool is_key_frame,
281  int pps_id) {
282  // Get the access unit timing info.
283  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
284  while (!timing_desc_list_.empty() &&
285  timing_desc_list_.front().first <= access_unit_pos) {
286  current_timing_desc = timing_desc_list_.front().second;
287  timing_desc_list_.pop_front();
288  }
289  if (current_timing_desc.pts == kNoTimestamp)
290  return false;
291 
292  // Emit a frame.
293  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
294  << " size=" << access_unit_size << " pts "
295  << current_timing_desc.pts << " timing_desc_list size "
296  << timing_desc_list_.size();
297  int es_size;
298  const uint8_t* es;
299  es_queue_->PeekAt(access_unit_pos, &es, &es_size);
300 
301  // Convert frame to unit stream format.
302  std::vector<uint8_t> converted_frame;
303  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
304  es, access_unit_size, &converted_frame)) {
305  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
306  return false;
307  }
308 
309  // Update the video decoder configuration if needed.
310  RCHECK(UpdateVideoDecoderConfig(pps_id));
311 
312  // Create the media sample, emitting always the previous sample after
313  // calculating its duration.
314  std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
315  converted_frame.data(), converted_frame.size(), is_key_frame);
316  media_sample->set_dts(current_timing_desc.dts);
317  media_sample->set_pts(current_timing_desc.pts);
318  if (pending_sample_) {
319  if (media_sample->dts() <= pending_sample_->dts()) {
320  LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " dts "
321  << media_sample->dts()
322  << " less than or equal to previous dts "
323  << pending_sample_->dts();
324  // Keep the sample but adjust the sample duration to a very small value,
325  // in case that the sample is still needed for the decoding afterwards.
326  const int64_t kArbitrarySmallDuration = 0.001 * kMpeg2Timescale; // 1ms.
327  pending_sample_->set_duration(kArbitrarySmallDuration);
328  } else {
329  uint64_t sample_duration = media_sample->dts() - pending_sample_->dts();
330  pending_sample_->set_duration(sample_duration);
331 
332  const int kArbitraryGapScale = 10;
333  if (sample_duration > kArbitraryGapScale * pending_sample_duration_) {
334  LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts "
335  << pending_sample_->dts() << " with next sample at dts "
336  << media_sample->dts() << " (difference "
337  << sample_duration << ")";
338  }
339 
340  pending_sample_duration_ = sample_duration;
341  }
342  emit_sample_cb_.Run(std::move(pending_sample_));
343  }
344  pending_sample_ = media_sample;
345 
346  return true;
347 }
348 
349 } // namespace mp2t
350 } // namespace media
351 } // namespace shaka
All the methods that are virtual are virtual for mocking.