DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
es_parser_h26x.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp2t/es_parser_h26x.h"
6 
7 #include <stdint.h>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/numerics/safe_conversions.h"
11 #include "packager/media/base/media_sample.h"
12 #include "packager/media/base/offset_byte_queue.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/base/video_stream_info.h"
15 #include "packager/media/codecs/h264_byte_to_unit_stream_converter.h"
16 #include "packager/media/codecs/h265_byte_to_unit_stream_converter.h"
17 #include "packager/media/formats/mp2t/mp2t_common.h"
18 
19 namespace shaka {
20 namespace media {
21 namespace mp2t {
22 
23 EsParserH26x::EsParserH26x(
24  Nalu::CodecType type,
25  scoped_ptr<H26xByteToUnitStreamConverter> stream_converter,
26  uint32_t pid,
27  const EmitSampleCB& emit_sample_cb)
28  : EsParser(pid),
29  emit_sample_cb_(emit_sample_cb),
30  type_(type),
31  es_queue_(new media::OffsetByteQueue()),
32  current_access_unit_pos_(0),
33  found_access_unit_(false),
34  stream_converter_(stream_converter.Pass()),
35  pending_sample_duration_(0),
36  waiting_for_key_frame_(true) {}
37 
38 EsParserH26x::~EsParserH26x() {}
39 
40 bool EsParserH26x::Parse(const uint8_t* buf,
41  int size,
42  int64_t pts,
43  int64_t dts) {
44  // Note: Parse is invoked each time a PES packet has been reassembled.
45  // Unfortunately, a PES packet does not necessarily map
46  // to an h264/h265 access unit, although the HLS recommendation is to use one
47  // PES for each access unit (but this is just a recommendation and some
48  // streams do not comply with this recommendation).
49 
50  // HLS recommendation: "In AVC video, you should have both a DTS and a
51  // PTS in each PES header".
52  // However, some streams do not comply with this recommendation.
53  DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS";
54  if (pts != kNoTimestamp) {
55  TimingDesc timing_desc;
56  timing_desc.pts = pts;
57  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
58 
59  // Link the end of the byte queue with the incoming timing descriptor.
60  timing_desc_list_.push_back(
61  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
62  }
63 
64  // Add the incoming bytes to the ES queue.
65  es_queue_->Push(buf, size);
66 
67  // Skip to the first access unit.
68  if (!found_access_unit_) {
69  if (!FindNextAccessUnit(current_access_unit_pos_,
70  &current_access_unit_pos_)) {
71  return true;
72  }
73  es_queue_->Trim(current_access_unit_pos_);
74  found_access_unit_ = true;
75  }
76 
77  return ParseInternal();
78 }
79 
80 void EsParserH26x::Flush() {
81  DVLOG(1) << "EsParserH26x::Flush";
82 
83  // Simulate an additional AUD to force emitting the last access unit
84  // which is assumed to be complete at this point.
85  if (type_ == Nalu::kH264) {
86  uint8_t aud[] = {0x00, 0x00, 0x01, 0x09};
87  es_queue_->Push(aud, sizeof(aud));
88  } else {
89  DCHECK_EQ(Nalu::kH265, type_);
90  uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01};
91  es_queue_->Push(aud, sizeof(aud));
92  }
93  ParseInternal();
94 
95  if (pending_sample_) {
96  // Flush pending sample.
97  DCHECK(pending_sample_duration_);
98  pending_sample_->set_duration(pending_sample_duration_);
99  emit_sample_cb_.Run(pid(), pending_sample_);
100  pending_sample_ = scoped_refptr<MediaSample>();
101  }
102 }
103 
104 void EsParserH26x::Reset() {
105  es_queue_.reset(new media::OffsetByteQueue());
106  current_access_unit_pos_ = 0;
107  found_access_unit_ = false;
108  timing_desc_list_.clear();
109  pending_sample_ = scoped_refptr<MediaSample>();
110  pending_sample_duration_ = 0;
111  waiting_for_key_frame_ = true;
112 }
113 
114 bool EsParserH26x::FindNextAccessUnit(int64_t stream_pos,
115  int64_t* next_unit_pos) {
116  // TODO(modmaker): Avoid re-parsing by saving old position.
117  // Every access unit must have a VCL entry and defines the end of the access
118  // unit. Track it to return on the element after it so we get the whole
119  // access unit.
120  bool seen_vcl_nalu = false;
121  while (true) {
122  const uint8_t* es;
123  int size;
124  es_queue_->PeekAt(stream_pos, &es, &size);
125 
126  // Find a start code.
127  uint64_t start_code_offset;
128  uint8_t start_code_size;
129  bool start_code_found = NaluReader::FindStartCode(
130  es, size, &start_code_offset, &start_code_size);
131  stream_pos += start_code_offset;
132 
133  // No start code found or NALU type not available yet.
134  if (!start_code_found ||
135  start_code_offset + start_code_size >= static_cast<uint64_t>(size)) {
136  return false;
137  }
138 
139  Nalu nalu;
140  const uint8_t* nalu_ptr = es + start_code_offset + start_code_size;
141  size_t nalu_size = size - (start_code_offset + start_code_size);
142  if (nalu.Initialize(type_, nalu_ptr, nalu_size)) {
143  // ITU H.264 sec. 7.4.1.2.3
144  // H264: The first of the NAL units with |can_start_access_unit() == true|
145  // after the last VCL NAL unit of a primary coded picture specifies the
146  // start of a new access unit. |nuh_layer_id()| is for H265 only; it is
147  // included below for ease of computation (the value is always 0).
148  // ITU H.265 sec. 7.4.2.4.4
149  // H265: The first of the NAL units with |can_start_access_unit() == true|
150  // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
151  // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
152  // any, specifies the start of a new access unit.
153  // TODO(modmaker): This does not handle nuh_layer_id != 0 correctly.
154  // AUD VCL SEI VCL* VPS VCL
155  // | Current method splits here.
156  // | Should split here.
157  // If we are searching for the first access unit, then stop at the first
158  // NAL unit that can start an access unit.
159  if ((seen_vcl_nalu || !found_access_unit_) &&
160  nalu.can_start_access_unit()) {
161  break;
162  }
163  bool is_vcl_nalu = nalu.is_video_slice() && nalu.nuh_layer_id() == 0;
164  seen_vcl_nalu |= is_vcl_nalu;
165  }
166 
167  // The current NALU is not an AUD, skip the start code
168  // and continue parsing the stream.
169  stream_pos += start_code_size;
170  }
171 
172  *next_unit_pos = stream_pos;
173  return true;
174 }
175 
176 bool EsParserH26x::ParseInternal() {
177  DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
178  DCHECK_LE(current_access_unit_pos_, es_queue_->tail());
179 
180  // Resume parsing later if no AUD was found.
181  int64_t access_unit_end;
182  if (!FindNextAccessUnit(current_access_unit_pos_, &access_unit_end))
183  return true;
184 
185  // At this point, we know we have a full access unit.
186  bool is_key_frame = false;
187  int pps_id_for_access_unit = -1;
188 
189  const uint8_t* es;
190  int size;
191  es_queue_->PeekAt(current_access_unit_pos_, &es, &size);
192  int access_unit_size = base::checked_cast<int, int64_t>(
193  access_unit_end - current_access_unit_pos_);
194  DCHECK_LE(access_unit_size, size);
195  NaluReader reader(type_, kIsAnnexbByteStream, es, access_unit_size);
196 
197  // TODO(modmaker): Consider combining with FindNextAccessUnit to avoid
198  // scanning the data twice.
199  while (true) {
200  Nalu nalu;
201  bool is_eos = false;
202  switch (reader.Advance(&nalu)) {
203  case NaluReader::kOk:
204  break;
205  case NaluReader::kEOStream:
206  is_eos = true;
207  break;
208  default:
209  return false;
210  }
211  if (is_eos)
212  break;
213 
214  if (!ProcessNalu(nalu, &is_key_frame, &pps_id_for_access_unit))
215  return false;
216  }
217 
218  if (waiting_for_key_frame_) {
219  waiting_for_key_frame_ = !is_key_frame;
220  }
221  if (!waiting_for_key_frame_) {
222  // Emit a frame and move the stream to the next AUD position.
223  RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size,
224  is_key_frame, pps_id_for_access_unit));
225  }
226  current_access_unit_pos_ = access_unit_end;
227  es_queue_->Trim(current_access_unit_pos_);
228 
229  return true;
230 }
231 
232 bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
233  int access_unit_size,
234  bool is_key_frame,
235  int pps_id) {
236  // Get the access unit timing info.
237  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
238  while (!timing_desc_list_.empty() &&
239  timing_desc_list_.front().first <= access_unit_pos) {
240  current_timing_desc = timing_desc_list_.front().second;
241  timing_desc_list_.pop_front();
242  }
243  if (current_timing_desc.pts == kNoTimestamp)
244  return false;
245 
246  // Emit a frame.
247  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
248  << " size=" << access_unit_size;
249  int es_size;
250  const uint8_t* es;
251  es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
252  CHECK_GE(es_size, access_unit_size);
253 
254  // Convert frame to unit stream format.
255  std::vector<uint8_t> converted_frame;
256  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
257  es, access_unit_size, &converted_frame)) {
258  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
259  return false;
260  }
261 
262  // Update the video decoder configuration if needed.
263  RCHECK(UpdateVideoDecoderConfig(pps_id));
264 
265  // Create the media sample, emitting always the previous sample after
266  // calculating its duration.
267  scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
268  converted_frame.data(), converted_frame.size(), is_key_frame);
269  media_sample->set_dts(current_timing_desc.dts);
270  media_sample->set_pts(current_timing_desc.pts);
271  if (pending_sample_) {
272  DCHECK_GT(media_sample->dts(), pending_sample_->dts());
273  pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
274  pending_sample_->set_duration(pending_sample_duration_);
275  emit_sample_cb_.Run(pid(), pending_sample_);
276  }
277  pending_sample_ = media_sample;
278 
279  return true;
280 }
281 
282 } // namespace mp2t
283 } // namespace media
284 } // namespace shaka
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45