DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs
es_parser_h264.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/mp2t/es_parser_h264.h"
6 
7 #include <stdint.h>
8 
9 #include "packager/base/logging.h"
10 #include "packager/base/numerics/safe_conversions.h"
11 #include "packager/media/base/media_sample.h"
12 #include "packager/media/base/offset_byte_queue.h"
13 #include "packager/media/base/timestamp.h"
14 #include "packager/media/base/video_stream_info.h"
15 #include "packager/media/filters/h264_byte_to_unit_stream_converter.h"
16 #include "packager/media/filters/h264_parser.h"
17 #include "packager/media/formats/mp2t/mp2t_common.h"
18 
19 namespace edash_packager {
20 namespace media {
21 namespace mp2t {
22 
23 namespace {
24 
25 // An AUD NALU is at least 4 bytes:
26 // 3 bytes for the start code + 1 byte for the NALU type.
27 const int kMinAUDSize = 4;
28 
29 } // anonymous namespace
30 
31 EsParserH264::EsParserH264(uint32_t pid,
32  const NewStreamInfoCB& new_stream_info_cb,
33  const EmitSampleCB& emit_sample_cb)
34  : EsParser(pid),
35  new_stream_info_cb_(new_stream_info_cb),
36  emit_sample_cb_(emit_sample_cb),
37  es_queue_(new media::OffsetByteQueue()),
38  h264_parser_(new H264Parser()),
39  current_access_unit_pos_(0),
40  next_access_unit_pos_(0),
41  stream_converter_(new H264ByteToUnitStreamConverter),
42  decoder_config_check_pending_(false),
43  pending_sample_duration_(0),
44  waiting_for_key_frame_(true) {
45 }
46 
47 EsParserH264::~EsParserH264() {
48 }
49 
50 bool EsParserH264::Parse(const uint8_t* buf,
51  int size,
52  int64_t pts,
53  int64_t dts) {
54  // Note: Parse is invoked each time a PES packet has been reassembled.
55  // Unfortunately, a PES packet does not necessarily map
56  // to an h264 access unit, although the HLS recommendation is to use one PES
57  // for each access unit (but this is just a recommendation and some streams
58  // do not comply with this recommendation).
59 
60  // HLS recommendation: "In AVC video, you should have both a DTS and a
61  // PTS in each PES header".
62  // However, some streams do not comply with this recommendation.
63  DVLOG_IF(1, pts == kNoTimestamp) << "Each video PES should have a PTS";
64  if (pts != kNoTimestamp) {
65  TimingDesc timing_desc;
66  timing_desc.pts = pts;
67  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
68 
69  // Link the end of the byte queue with the incoming timing descriptor.
70  timing_desc_list_.push_back(
71  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
72  }
73 
74  // Add the incoming bytes to the ES queue.
75  es_queue_->Push(buf, size);
76  return ParseInternal();
77 }
78 
79 void EsParserH264::Flush() {
80  DVLOG(1) << "EsParserH264::Flush";
81 
82  if (FindAUD(&current_access_unit_pos_)) {
83  // Simulate an additional AUD to force emitting the last access unit
84  // which is assumed to be complete at this point.
85  uint8_t aud[] = {0x00, 0x00, 0x01, 0x09};
86  es_queue_->Push(aud, sizeof(aud));
87  ParseInternal();
88  }
89 
90  if (pending_sample_) {
91  // Flush pending sample.
92  DCHECK(pending_sample_duration_);
93  pending_sample_->set_duration(pending_sample_duration_);
94  emit_sample_cb_.Run(pid(), pending_sample_);
95  pending_sample_ = scoped_refptr<MediaSample>();
96  }
97 }
98 
99 void EsParserH264::Reset() {
100  DVLOG(1) << "EsParserH264::Reset";
101  es_queue_.reset(new media::OffsetByteQueue());
102  h264_parser_.reset(new H264Parser());
103  current_access_unit_pos_ = 0;
104  next_access_unit_pos_ = 0;
105  timing_desc_list_.clear();
106  last_video_decoder_config_ = scoped_refptr<StreamInfo>();
107  decoder_config_check_pending_ = false;
108  pending_sample_ = scoped_refptr<MediaSample>();
109  pending_sample_duration_ = 0;
110  waiting_for_key_frame_ = true;
111 }
112 
113 bool EsParserH264::FindAUD(int64_t* stream_pos) {
114  while (true) {
115  const uint8_t* es;
116  int size;
117  es_queue_->PeekAt(*stream_pos, &es, &size);
118 
119  // Find a start code and move the stream to the start code parser position.
120  off_t start_code_offset;
121  off_t start_code_size;
122  bool start_code_found = H264Parser::FindStartCode(
123  es, size, &start_code_offset, &start_code_size);
124  *stream_pos += start_code_offset;
125 
126  // No H264 start code found or NALU type not available yet.
127  if (!start_code_found || start_code_offset + start_code_size >= size)
128  return false;
129 
130  // Exit the parser loop when an AUD is found.
131  // Note: NALU header for an AUD:
132  // - nal_ref_idc must be 0
133  // - nal_unit_type must be H264NALU::kAUD
134  if (es[start_code_offset + start_code_size] == H264NALU::kAUD)
135  break;
136 
137  // The current NALU is not an AUD, skip the start code
138  // and continue parsing the stream.
139  *stream_pos += start_code_size;
140  }
141 
142  return true;
143 }
144 
145 bool EsParserH264::ParseInternal() {
146  DCHECK_LE(es_queue_->head(), current_access_unit_pos_);
147  DCHECK_LE(current_access_unit_pos_, next_access_unit_pos_);
148  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
149 
150  // Find the next AUD located at or after |current_access_unit_pos_|. This is
151  // needed since initially |current_access_unit_pos_| might not point to
152  // an AUD.
153  // Discard all the data before the updated |current_access_unit_pos_|
154  // since it won't be used again.
155  bool aud_found = FindAUD(&current_access_unit_pos_);
156  es_queue_->Trim(current_access_unit_pos_);
157  if (next_access_unit_pos_ < current_access_unit_pos_)
158  next_access_unit_pos_ = current_access_unit_pos_;
159 
160  // Resume parsing later if no AUD was found.
161  if (!aud_found)
162  return true;
163 
164  // Find the next AUD to make sure we have a complete access unit.
165  if (next_access_unit_pos_ < current_access_unit_pos_ + kMinAUDSize) {
166  next_access_unit_pos_ = current_access_unit_pos_ + kMinAUDSize;
167  DCHECK_LE(next_access_unit_pos_, es_queue_->tail());
168  }
169  if (!FindAUD(&next_access_unit_pos_))
170  return true;
171 
172  // At this point, we know we have a full access unit.
173  bool is_key_frame = false;
174  int pps_id_for_access_unit = -1;
175 
176  const uint8_t* es;
177  int size;
178  es_queue_->PeekAt(current_access_unit_pos_, &es, &size);
179  int access_unit_size = base::checked_cast<int, int64_t>(
180  next_access_unit_pos_ - current_access_unit_pos_);
181  DCHECK_LE(access_unit_size, size);
182  h264_parser_->SetStream(es, access_unit_size);
183 
184  while (true) {
185  bool is_eos = false;
186  H264NALU nalu;
187  switch (h264_parser_->AdvanceToNextNALU(&nalu)) {
188  case H264Parser::kOk:
189  break;
190  case H264Parser::kInvalidStream:
191  case H264Parser::kUnsupportedStream:
192  return false;
193  case H264Parser::kEOStream:
194  is_eos = true;
195  break;
196  }
197  if (is_eos)
198  break;
199 
200  switch (nalu.nal_unit_type) {
201  case H264NALU::kAUD: {
202  DVLOG(LOG_LEVEL_ES) << "NALU: AUD";
203  break;
204  }
205  case H264NALU::kSPS: {
206  DVLOG(LOG_LEVEL_ES) << "NALU: SPS";
207  int sps_id;
208  if (h264_parser_->ParseSPS(&sps_id) != H264Parser::kOk)
209  return false;
210  decoder_config_check_pending_ = true;
211  break;
212  }
213  case H264NALU::kPPS: {
214  DVLOG(LOG_LEVEL_ES) << "NALU: PPS";
215  int pps_id;
216  if (h264_parser_->ParsePPS(&pps_id) != H264Parser::kOk) {
217  // Allow PPS parsing to fail if waiting for SPS.
218  if (last_video_decoder_config_)
219  return false;
220  } else {
221  decoder_config_check_pending_ = true;
222  }
223  break;
224  }
225  case H264NALU::kIDRSlice:
226  case H264NALU::kNonIDRSlice: {
227  is_key_frame = (nalu.nal_unit_type == H264NALU::kIDRSlice);
228  DVLOG(LOG_LEVEL_ES) << "NALU: slice IDR=" << is_key_frame;
229  H264SliceHeader shdr;
230  if (h264_parser_->ParseSliceHeader(nalu, &shdr) != H264Parser::kOk) {
231  // Only accept an invalid SPS/PPS at the beginning when the stream
232  // does not necessarily start with an SPS/PPS/IDR.
233  if (last_video_decoder_config_)
234  return false;
235  } else {
236  pps_id_for_access_unit = shdr.pic_parameter_set_id;
237  }
238  break;
239  }
240  default: {
241  DVLOG(LOG_LEVEL_ES) << "NALU: " << nalu.nal_unit_type;
242  }
243  }
244  }
245 
246  if (waiting_for_key_frame_) {
247  waiting_for_key_frame_ = !is_key_frame;
248  }
249  if (!waiting_for_key_frame_) {
250  // Emit a frame and move the stream to the next AUD position.
251  RCHECK(EmitFrame(current_access_unit_pos_, access_unit_size,
252  is_key_frame, pps_id_for_access_unit));
253  }
254  current_access_unit_pos_ = next_access_unit_pos_;
255  es_queue_->Trim(current_access_unit_pos_);
256 
257  return true;
258 }
259 
260 bool EsParserH264::EmitFrame(int64_t access_unit_pos,
261  int access_unit_size,
262  bool is_key_frame,
263  int pps_id) {
264  // Get the access unit timing info.
265  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
266  while (!timing_desc_list_.empty() &&
267  timing_desc_list_.front().first <= access_unit_pos) {
268  current_timing_desc = timing_desc_list_.front().second;
269  timing_desc_list_.pop_front();
270  }
271  if (current_timing_desc.pts == kNoTimestamp)
272  return false;
273 
274  // Emit a frame.
275  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << current_access_unit_pos_
276  << " size=" << access_unit_size;
277  int es_size;
278  const uint8_t* es;
279  es_queue_->PeekAt(current_access_unit_pos_, &es, &es_size);
280  CHECK_GE(es_size, access_unit_size);
281 
282  // Convert frame to unit stream format.
283  std::vector<uint8_t> converted_frame;
284  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
285  es, access_unit_size, &converted_frame)) {
286  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
287  return false;
288  }
289 
290  if (decoder_config_check_pending_) {
291  // Update the video decoder configuration if needed.
292  const H264PPS* pps = h264_parser_->GetPPS(pps_id);
293  if (!pps) {
294  // Only accept an invalid PPS at the beginning when the stream
295  // does not necessarily start with an SPS/PPS/IDR.
296  // In this case, the initial frames are conveyed to the upper layer with
297  // an invalid VideoDecoderConfig and it's up to the upper layer
298  // to process this kind of frame accordingly.
299  if (last_video_decoder_config_)
300  return false;
301  } else {
302  const H264SPS* sps = h264_parser_->GetSPS(pps->seq_parameter_set_id);
303  if (!sps)
304  return false;
305  RCHECK(UpdateVideoDecoderConfig(sps));
306  decoder_config_check_pending_ = false;
307  }
308  }
309 
310  // Create the media sample, emitting always the previous sample after
311  // calculating its duration.
312  scoped_refptr<MediaSample> media_sample = MediaSample::CopyFrom(
313  converted_frame.data(), converted_frame.size(), is_key_frame);
314  media_sample->set_dts(current_timing_desc.dts);
315  media_sample->set_pts(current_timing_desc.pts);
316  if (pending_sample_) {
317  DCHECK_GT(media_sample->dts(), pending_sample_->dts());
318  pending_sample_duration_ = media_sample->dts() - pending_sample_->dts();
319  pending_sample_->set_duration(pending_sample_duration_);
320  emit_sample_cb_.Run(pid(), pending_sample_);
321  }
322  pending_sample_ = media_sample;
323 
324  return true;
325 }
326 
327 bool EsParserH264::UpdateVideoDecoderConfig(const H264SPS* sps) {
328  std::vector<uint8_t> decoder_config_record;
329  if (!stream_converter_->GetAVCDecoderConfigurationRecord(
330  &decoder_config_record)) {
331  DLOG(ERROR) << "Failure to construct an AVCDecoderConfigurationRecord";
332  return false;
333  }
334 
335  if (last_video_decoder_config_) {
336  if (last_video_decoder_config_->extra_data() != decoder_config_record) {
337  // Video configuration has changed. Issue warning.
338  // TODO(tinskip): Check the nature of the configuration change. Only
339  // minor configuration changes (such as frame ordering) can be handled
340  // gracefully by decoders without notification. Major changes (such as
341  // video resolution changes) should be treated as errors.
342  LOG(WARNING) << "H.264 decoder configuration has changed.";
343  last_video_decoder_config_->set_extra_data(decoder_config_record);
344  }
345  return true;
346  }
347 
348  uint32_t coded_width = 0;
349  uint32_t coded_height = 0;
350  uint32_t pixel_width = 0;
351  uint32_t pixel_height = 0;
352  if (!ExtractResolutionFromSps(*sps, &coded_width, &coded_height, &pixel_width,
353  &pixel_height)) {
354  LOG(ERROR) << "Failed to parse SPS.";
355  return false;
356  }
357 
358  last_video_decoder_config_ = scoped_refptr<StreamInfo>(
359  new VideoStreamInfo(
360  pid(),
361  kMpeg2Timescale,
362  kInfiniteDuration,
363  kCodecH264,
365  decoder_config_record[1],
366  decoder_config_record[2],
367  decoder_config_record[3]),
368  std::string(),
369  coded_width,
370  coded_height,
371  pixel_width,
372  pixel_height,
373  0,
374  H264ByteToUnitStreamConverter::kUnitStreamNaluLengthSize,
375  decoder_config_record.data(),
376  decoder_config_record.size(),
377  false));
378  DVLOG(1) << "Profile IDC: " << sps->profile_idc;
379  DVLOG(1) << "Level IDC: " << sps->level_idc;
380  DVLOG(1) << "log2_max_frame_num_minus4: " << sps->log2_max_frame_num_minus4;
381 
382  // Video config notification.
383  new_stream_info_cb_.Run(last_video_decoder_config_);
384 
385  return true;
386 }
387 
388 } // namespace mp2t
389 } // namespace media
390 } // namespace edash_packager
static scoped_refptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:47
static std::string GetCodecString(VideoCodec codec, uint8_t profile, uint8_t compatible_profiles, uint8_t level)