DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
es_parser_h264.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp2t/es_parser_h264.h"
6 
7 #include <stdint.h>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/numerics/safe_conversions.h"
11 #include "packager/media/base/media_sample.h"
12 #include "packager/media/base/offset_byte_queue.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/base/video_stream_info.h"
15 #include "packager/media/filters/avc_decoder_configuration.h"
16 #include "packager/media/filters/h264_byte_to_unit_stream_converter.h"
17 #include "packager/media/filters/h264_parser.h"
18 #include "packager/media/formats/mp2t/mp2t_common.h"
19 
20 namespace edash_packager {
21 namespace media {
22 namespace mp2t {
23 
24 namespace {
25 
26 // An AUD NALU is at least 4 bytes:
27 // 3 bytes for the start code + 1 byte for the NALU type.
28 const int kMinAUDSize = 4;
29 
30 } // anonymous namespace
31 
32 EsParserH264::EsParserH264(uint32_t pid,
33  const NewStreamInfoCB& new_stream_info_cb,
34  const EmitSampleCB& emit_sample_cb)
35  : EsParser(pid),
36  new_stream_info_cb_(new_stream_info_cb),
37  emit_sample_cb_(emit_sample_cb),
38  es_queue_(new media::OffsetByteQueue()),
39  h264_parser_(new H264Parser()),
40  current_access_unit_pos_(0),
41  next_access_unit_pos_(0),
42  stream_converter_(new H264ByteToUnitStreamConverter),
43  decoder_config_check_pending_(false),
44  pending_sample_duration_(0),
45  waiting_for_key_frame_(true) {
46 }
47 
48 EsParserH264::~EsParserH264() {
49 }
50 
51 bool EsParserH264::Parse(const uint8_t* buf,
52  int size,
53  int64_t pts,
54  int64_t dts) {
55  // Note: Parse is invoked each time a PES packet has been reassembled.
56  // Unfortunately, a PES packet does not necessarily map
57  // to an h264 access unit, although the HLS recommendation is to use one PES
58  // for each access unit (but this is just a recommendation and some streams
59  // do not comply with this recommendation).
60 
61  // HLS recommendation: "In AVC video, you should have both a DTS and a
62  // PTS in each PES header".
63  // However, some streams do not comply with this recommendation.
64  DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS";
65  if (pts != kNoTimestamp) {
66  TimingDesc timing_desc;
67  timing_desc.pts = pts;
68  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
69 
70  // Link the end of the byte queue with the incoming timing descriptor.
71  timing_desc_list_.push_back(
72  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
73  }
74 
75  // Add the incoming bytes to the ES queue.
76  es_queue_->Push(buf, size);
77  return ParseInternal();
78 }
79 
80 void EsParserH264::Flush() {
81  DVLOG(1) << "EsParserH264::Flush";
82 
83  if (FindAUD(&current_access_unit_pos_)) {
84  // Simulate an additional AUD to force emitting the last access unit
85  // which is assumed to be complete at this point.
86  uint8_t aud[] = {0x00, 0x00, 0x01, 0x09};
87  es_queue_->Push(aud, sizeof(aud));
88  ParseInternal();
89  }
90 
91  if (pending_sample_) {
92  // Flush pending sample.
93  DCHECK(pending_sample_duration_);
94  pending_sample_->set_duration(pending_sample_duration_);
95  emit_sample_cb_.Run(pid(), pending_sample_);
96  pending_sample_ = scoped_refptr<MediaSample>();
97  }
98 }
99 
100 void EsParserH264::Reset() {
101  DVLOG(1) << "EsParserH264::Reset";
102  es_queue_.reset(new media::OffsetByteQueue());
103  h264_parser_.reset(new H264Parser());
104  current_access_unit_pos_ = 0;
105  next_access_unit_pos_ = 0;
106  timing_desc_list_.clear();
107  last_video_decoder_config_ = scoped_refptr<StreamInfo>();
108  decoder_config_check_pending_ = false;
109  pending_sample_ = scoped_refptr<MediaSample>();
110  pending_sample_duration_ = 0;
111  waiting_for_key_frame_ = true;
112 }
113 
114 bool EsParserH264::FindAUD(int64_t* stream_pos) {
115  while (true) {
116  const uint8_t* es;
117  int size;
118  es_queue_->PeekAt(*stream_pos, &es, &size);
119 
120  // Find a start code and move the stream to the start code parser position.
121  uint64_t start_code_offset;
122  uint8_t start_code_size;
123  bool start_code_found = NaluReader::FindStartCode(
124  es, size, &start_code_offset, &start_code_size);
125  *stream_pos += start_code_offset;
126 
127  // No H264 start code found or NALU type not available yet.
128  if (!start_code_found ||
129  start_code_offset + start_code_size >= static_cast<uint64_t>(size)) {
130  return false;
131  }
132 
133  // Exit the parser loop when an AUD is found.
134  // Note: NALU header for an AUD:
135  // - ref_idc must be 0
136  // - type must be Nalu::H264_AUD
137  if (es[start_code_offset + start_code_size] == Nalu::H264_AUD)
138  break;
139 
140  // The current NALU is not an AUD, skip the start code
141  // and continue parsing the stream.
142  *stream_pos += start_code_size;
143  }
144 
145  return true;
146 }
147 
148 bool EsParserH264::ParseInternal() {
149  DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
150  DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_);
151  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
152 
153  // Find the next AUD located at or after |current_access_unit_pos_|. This is
154  // needed since initially |current_access_unit_pos_| might not point to
155  // an AUD.
156  // Discard all the data before the updated |current_access_unit_pos_|
157  // since it won't be used again.
158  bool aud_found = FindAUD(&current_access_unit_pos_);
159  es_queue_->Trim(current_access_unit_pos_);
160  if (next_access_unit_pos_ < current_access_unit_pos_)
161  next_access_unit_pos_ = current_access_unit_pos_;
162 
163  // Resume parsing later if no AUD was found.
164  if (!aud_found)
165  return true;
166 
167  // Find the next AUD to make sure we have a complete access unit.
168  if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) {
169  next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize;
170  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
171  }
172  if (!FindAUD(&next_access_unit_pos_))
173  return true;
174 
175  // At this point, we know we have a full access unit.
176  bool is_key_frame = false;
177  int pps_id_for_access_unit = -1;
178 
179  const uint8_t* es;
180  int size;
181  es_queue_->PeekAt(current_access_unit_pos_, &es, &size);
182  int access_unit_size = base::checked_cast<int, int64_t>(
183  next_access_unit_pos_ - current_access_unit_pos_);
184  DCHECK_LE(access_unit_size, size);
185  NaluReader reader(NaluReader::kH264, kIsAnnexbByteStream, es,
186  access_unit_size);
187 
188  while (true) {
189  Nalu nalu;
190  bool is_eos = false;
191  switch (reader.Advance(&nalu)) {
192  case NaluReader::kOk:
193  break;
194  case NaluReader::kEOStream:
195  is_eos = true;
196  break;
197  default:
198  return false;
199  }
200  if (is_eos)
201  break;
202 
203  switch (nalu.type()) {
204  case Nalu::H264_AUD: {
205  DVLOG(LOG_LEVEL_ES) << "Nalu: AUD";
206  break;
207  }
208  case Nalu::H264_SPS: {
209  DVLOG(LOG_LEVEL_ES) << "Nalu: SPS";
210  int sps_id;
211  if (h264_parser_->ParseSps(nalu, &sps_id) != H264Parser::kOk)
212  return false;
213  decoder_config_check_pending_ = true;
214  break;
215  }
216  case Nalu::H264_PPS: {
217  DVLOG(LOG_LEVEL_ES) << "Nalu: PPS";
218  int pps_id;
219  if (h264_parser_->ParsePps(nalu, &pps_id) != H264Parser::kOk) {
220  // Allow PPS parsing to fail if waiting for SPS.
221  if (last_video_decoder_config_)
222  return false;
223  } else {
224  decoder_config_check_pending_ = true;
225  }
226  break;
227  }
228  case Nalu::H264_IDRSlice:
229  case Nalu::H264_NonIDRSlice: {
230  is_key_frame = (nalu.type() == Nalu::H264_IDRSlice);
231  DVLOG(LOG_LEVEL_ES) << "Nalu: slice IDR=" << is_key_frame;
232  H264SliceHeader shdr;
233  if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) {
234  // Only accept an invalid SPS/PPS at the beginning when the stream
235  // does not necessarily start with an SPS/PPS/IDR.
236  if (last_video_decoder_config_)
237  return false;
238  } else {
239  pps_id_for_access_unit = shdr.pic_parameter_set_id;
240  }
241  break;
242  }
243  default: {
244  DVLOG(LOG_LEVEL_ES) << "Nalu: " << nalu.type();
245  }
246  }
247  }
248 
249  if (waiting_for_key_frame_) {
250  waiting_for_key_frame_ = !is_key_frame;
251  }
252  if (!waiting_for_key_frame_) {
253  // Emit a frame and move the stream to the next AUD position.
254  RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size,
255  is_key_frame, pps_id_for_access_unit));
256  }
257  current_access_unit_pos_ = next_access_unit_pos_;
258  es_queue_->Trim(current_access_unit_pos_);
259 
260  return true;
261 }
262 
263 bool EsParserH264::EmitFrame(int64_t access_unit_pos,
264  int access_unit_size,
265  bool is_key_frame,
266  int pps_id) {
267  // Get the access unit timing info.
268  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
269  while (!timing_desc_list_.empty() &&
270  timing_desc_list_.front().first <= access_unit_pos) {
271  current_timing_desc = timing_desc_list_.front().second;
272  timing_desc_list_.pop_front();
273  }
274  if (current_timing_desc.pts == kNoTimestamp)
275  return false;
276 
277  // Emit a frame.
278  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
279  << " size=" << access_unit_size;
280  int es_size;
281  const uint8_t* es;
282  es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
283  CHECK_GE(es_size, access_unit_size);
284 
285  // Convert frame to unit stream format.
286  std::vector<uint8_t> converted_frame;
287  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
288  es, access_unit_size, &converted_frame)) {
289  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
290  return false;
291  }
292 
293  if (decoder_config_check_pending_) {
294  // Update the video decoder configuration if needed.
295  const H264Pps* pps = h264_parser_->GetPps(pps_id);
296  if (!pps) {
297  // Only accept an invalid PPS at the beginning when the stream
298  // does not necessarily start with an SPS/PPS/IDR.
299  // In this case, the initial frames are conveyed to the upper layer with
300  // an invalid VideoDecoderConfig and it's up to the upper layer
301  // to process this kind of frame accordingly.
302  if (last_video_decoder_config_)
303  return false;
304  } else {
305  const H264Sps* sps = h264_parser_->GetSps(pps->seq_parameter_set_id);
306  if (!sps)
307  return false;
308  RCHECK(UpdateVideoDecoderConfig(sps));
309  decoder_config_check_pending_ = false;
310  }
311  }
312 
313  // Create the media sample, emitting always the previous sample after
314  // calculating its duration.
315  scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
316  converted_frame.data(), converted_frame.size(), is_key_frame);
317  media_sample->set_dts(current_timing_desc.dts);
318  media_sample->set_pts(current_timing_desc.pts);
319  if (pending_sample_) {
320  DCHECK_GT(media_sample->dts(), pending_sample_->dts());
321  pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
322  pending_sample_->set_duration(pending_sample_duration_);
323  emit_sample_cb_.Run(pid(), pending_sample_);
324  }
325  pending_sample_ = media_sample;
326 
327  return true;
328 }
329 
330 bool EsParserH264::UpdateVideoDecoderConfig(const H264Sps* sps) {
331  std::vector<uint8_t> decoder_config_record;
332  if (!stream_converter_->GetAVCDecoderConfigurationRecord(
333  &decoder_config_record)) {
334  DLOG(ERROR) << "Failure to construct an AVCDecoderConfigurationRecord";
335  return false;
336  }
337 
338  if (last_video_decoder_config_) {
339  if (last_video_decoder_config_->extra_data() != decoder_config_record) {
340  // Video configuration has changed. Issue warning.
341  // TODO(tinskip): Check the nature of the configuration change. Only
342  // minor configuration changes (such as frame ordering) can be handled
343  // gracefully by decoders without notification. Major changes (such as
344  // video resolution changes) should be treated as errors.
345  LOG(WARNING) << "H.264 decoder configuration has changed.";
346  last_video_decoder_config_->set_extra_data(decoder_config_record);
347  }
348  return true;
349  }
350 
351  uint32_t coded_width = 0;
352  uint32_t coded_height = 0;
353  uint32_t pixel_width = 0;
354  uint32_t pixel_height = 0;
355  if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width,
356  &pixel_height)) {
357  LOG(ERROR) << "Failed to parse SPS.";
358  return false;
359  }
360 
361  last_video_decoder_config_ = scoped_refptr<StreamInfo>(
362  new VideoStreamInfo(
363  pid(),
364  kMpeg2Timescale,
365  kInfiniteDuration,
366  kCodecH264,
367  AVCDecoderConfiguration::GetCodecString(decoder_config_record[1],
368  decoder_config_record[2],
369  decoder_config_record[3]),
370  std::string(),
371  coded_width,
372  coded_height,
373  pixel_width,
374  pixel_height,
375  0,
376  H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
377  decoder_config_record.data(),
378  decoder_config_record.size(),
379  false));
380  DVLOG(1) << "Profile IDC: " << sps->profile_idc;
381  DVLOG(1) << "Level IDC: " << sps->level_idc;
382  DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4;
383 
384  // Video config notification.
385  new_stream_info_cb_.Run(last_video_decoder_config_);
386 
387  return true;
388 }
389 
390 } // namespace mp2t
391 } // namespace media
392 } // namespace edash_packager
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:45