DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
vp9_parser.cc
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/filters/vp9_parser.h"
8 
9 #include "packager/base/logging.h"
10 #include "packager/media/base/bit_reader.h"
11 #include "packager/media/formats/mp4/rcheck.h"
12 
13 namespace edash_packager {
14 namespace media {
15 namespace {
16 
17 const uint32_t VP9_FRAME_MARKER = 2;
18 const uint32_t VP9_SYNC_CODE = 0x498342;
19 const uint32_t REFS_PER_FRAME = 3;
20 const uint32_t REF_FRAMES_LOG2 = 3;
21 const uint32_t REF_FRAMES = (1 << REF_FRAMES_LOG2);
22 const uint32_t FRAME_CONTEXTS_LOG2 = 2;
23 const uint32_t MAX_REF_LF_DELTAS = 4;
24 const uint32_t MAX_MODE_LF_DELTAS = 2;
25 const uint32_t QINDEX_BITS = 8;
26 const uint32_t MAX_SEGMENTS = 8;
27 const uint32_t SEG_TREE_PROBS = (MAX_SEGMENTS - 1);
28 const uint32_t PREDICTION_PROBS = 3;
29 const uint32_t SEG_LVL_MAX = 4;
30 const uint32_t MI_SIZE_LOG2 = 3;
31 const uint32_t MI_BLOCK_SIZE_LOG2 = (6 - MI_SIZE_LOG2); // 64 = 2^6
32 const uint32_t MIN_TILE_WIDTH_B64 = 4;
33 const uint32_t MAX_TILE_WIDTH_B64 = 64;
34 
35 const bool SEG_FEATURE_DATA_SIGNED[SEG_LVL_MAX] = {true, true, false, false};
36 const uint32_t SEG_FEATURE_DATA_MAX_BITS[SEG_LVL_MAX] = {8, 6, 2, 0};
37 
38 enum VpxColorSpace {
39  VPX_COLOR_SPACE_UNKNOWN = 0,
40  VPX_COLOR_SPACE_BT_601 = 1,
41  VPX_COLOR_SPACE_BT_709 = 2,
42  VPX_COLOR_SPACE_SMPTE_170 = 3,
43  VPX_COLOR_SPACE_SMPTE_240 = 4,
44  VPX_COLOR_SPACE_BT_2020 = 5,
45  VPX_COLOR_SPACE_RESERVED = 6,
46  VPX_COLOR_SPACE_SRGB = 7,
47 };
48 
49 class VP9BitReader : public BitReader {
50  public:
51  VP9BitReader(const uint8_t* data, off_t size) : BitReader(data, size) {}
52  ~VP9BitReader() {}
53 
54  bool SkipBitsConditional(uint32_t num_bits) {
55  bool condition;
56  if (!ReadBits(1, &condition))
57  return false;
58  return condition ? SkipBits(num_bits) : true;
59  }
60 
61  private:
62  DISALLOW_COPY_AND_ASSIGN(VP9BitReader);
63 };
64 
65 uint32_t RoundupShift(uint32_t value, uint32_t n) {
66  return (value + (1 << n) - 1) >> n;
67 }
68 
69 // Number of MI-units (8*8).
70 uint32_t GetNumMiUnits(uint32_t pixels) {
71  return RoundupShift(pixels, MI_SIZE_LOG2);
72 }
73 
74 // Number of sb64 (64x64) blocks per mi_units.
75 uint32_t GetNumBlocks(uint32_t mi_units) {
76  return RoundupShift(mi_units, MI_BLOCK_SIZE_LOG2);
77 }
78 
79 uint32_t GetMinLog2TileCols(uint32_t sb64_cols) {
80  uint32_t min_log2 = 0;
81  while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols)
82  ++min_log2;
83  return min_log2;
84 }
85 
86 uint32_t GetMaxLog2TileCols(uint32_t sb64_cols) {
87  uint32_t max_log2 = 1;
88  while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64)
89  ++max_log2;
90  return max_log2 - 1;
91 }
92 
93 void GetTileNBits(uint32_t mi_cols,
94  uint32_t* min_log2_tile_cols,
95  uint32_t* max_log2_tile_cols) {
96  const uint32_t sb64_cols = GetNumBlocks(mi_cols);
97  *min_log2_tile_cols = GetMinLog2TileCols(sb64_cols);
98  *max_log2_tile_cols = GetMaxLog2TileCols(sb64_cols);
99  CHECK_LE(*min_log2_tile_cols, *max_log2_tile_cols);
100 }
101 
102 // Parse superframe index if it is a superframe. Fill |vpx_frames| with the
103 // frames information, which contains the sizes of the frames indicated in
104 // superframe index if it is a superframe; otherwise it should contain one
105 // single frame with |data_size| as frame size.
106 bool ParseIfSuperframeIndex(const uint8_t* data,
107  size_t data_size,
108  std::vector<VPxFrameInfo>* vpx_frames) {
109  vpx_frames->clear();
110  uint8_t superframe_marker = data[data_size - 1];
111  VPxFrameInfo vpx_frame;
112  if ((superframe_marker & 0xe0) != 0xc0) {
113  // This is not a super frame. There should be only one frame.
114  vpx_frame.frame_size = data_size;
115  vpx_frames->push_back(vpx_frame);
116  return true;
117  }
118 
119  const size_t num_frames = (superframe_marker & 0x07) + 1;
120  const size_t frame_size_length = ((superframe_marker >> 3) & 0x03) + 1;
121  // Two maker bytes + frame sizes.
122  const size_t index_size = 2 + num_frames * frame_size_length;
123 
124  if (data_size < index_size) {
125  LOG(ERROR) << "This chunk is marked as having a superframe index but "
126  "doesn't have enough data for it.";
127  return false;
128  }
129  const uint8_t superframe_marker2 = data[data_size - index_size];
130  if (superframe_marker2 != superframe_marker) {
131  LOG(ERROR) << "This chunk is marked as having a superframe index but "
132  "doesn't have the matching marker byte at the front of the "
133  "index.";
134  return false;
135  }
136  VLOG(3) << "Superframe num_frames=" << num_frames
137  << " frame_size_length=" << frame_size_length;
138 
139  data += data_size - index_size + 1;
140  size_t total_frame_sizes = 0;
141  for (size_t i = 0; i < num_frames; ++i) {
142  vpx_frame.frame_size = 0;
143  for (size_t i = 0; i < frame_size_length; ++i) {
144  vpx_frame.frame_size |= *data << (i * 8);
145  ++data;
146  }
147  total_frame_sizes += vpx_frame.frame_size;
148  vpx_frames->push_back(vpx_frame);
149  }
150  if (total_frame_sizes + index_size != data_size) {
151  LOG(ERROR) << "Data size (" << data_size
152  << ") does not match with sum of frame sizes ("
153  << total_frame_sizes << ") + index_size (" << index_size << ")";
154  return false;
155  }
156  return true;
157 }
158 
159 bool ReadProfile(VP9BitReader* reader, VPCodecConfiguration* codec_config) {
160  uint8_t bit[2];
161  RCHECK(reader->ReadBits(1, &bit[0]));
162  RCHECK(reader->ReadBits(1, &bit[1]));
163  uint8_t profile = bit[0] | (bit[1] << 1);
164  if (profile == 3) {
165  bool reserved;
166  RCHECK(reader->ReadBits(1, &reserved));
167  RCHECK(!reserved);
168  }
169  codec_config->set_profile(profile);
170  return true;
171 }
172 
173 bool ReadSyncCode(VP9BitReader* reader) {
174  uint32_t sync_code;
175  RCHECK(reader->ReadBits(24, &sync_code));
176  return sync_code == VP9_SYNC_CODE;
177 }
178 
179 VPCodecConfiguration::ColorSpace GetColorSpace(uint8_t color_space) {
180  switch (color_space) {
181  case VPX_COLOR_SPACE_UNKNOWN:
182  return VPCodecConfiguration::COLOR_SPACE_UNSPECIFIED;
183  case VPX_COLOR_SPACE_BT_601:
184  return VPCodecConfiguration::COLOR_SPACE_BT_601;
185  case VPX_COLOR_SPACE_BT_709:
186  return VPCodecConfiguration::COLOR_SPACE_BT_709;
187  case VPX_COLOR_SPACE_BT_2020:
188  // VP9 does not specify if it is in the form of “constant luminance” or
189  // “non-constant luminance”. As such, application should rely on the
190  // signaling outside of VP9 bitstream. If there is no such signaling,
191  // application may assume non-constant luminance for BT.2020.
192  return VPCodecConfiguration::COLOR_SPACE_BT_2020_NON_CONSTANT_LUMINANCE;
193  case VPX_COLOR_SPACE_SRGB:
194  return VPCodecConfiguration::COLOR_SPACE_SRGB;
195  default:
196  LOG(WARNING) << "Unknown color space: " << static_cast<int>(color_space);
197  return VPCodecConfiguration::COLOR_SPACE_UNSPECIFIED;
198  }
199 }
200 
201 VPCodecConfiguration::ChromaSubsampling GetChromaSubsampling(
202  uint8_t subsampling) {
203  switch (subsampling) {
204  case 0:
205  return VPCodecConfiguration::CHROMA_444;
206  case 1:
207  return VPCodecConfiguration::CHROMA_440;
208  case 2:
209  return VPCodecConfiguration::CHROMA_422;
210  case 3:
211  // VP9 assumes that chrome samples are collocated with luma samples if
212  // there is no explicit signaling outside of VP9 bitstream.
213  return VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA;
214  default:
215  LOG(WARNING) << "Unexpected chroma subsampling value: "
216  << static_cast<int>(subsampling);
217  return VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA;
218  }
219 }
220 
221 bool ReadBitDepthAndColorSpace(VP9BitReader* reader,
222  VPCodecConfiguration* codec_config) {
223  uint8_t bit_depth = 8;
224  if (codec_config->profile() >= 2) {
225  bool use_vpx_bits_12;
226  RCHECK(reader->ReadBits(1, &use_vpx_bits_12));
227  bit_depth = use_vpx_bits_12 ? 12 : 10;
228  }
229  codec_config->set_bit_depth(bit_depth);
230 
231  uint8_t color_space;
232  RCHECK(reader->ReadBits(3, &color_space));
233  codec_config->set_color_space(GetColorSpace(color_space));
234 
235  bool yuv_full_range = false;
236  auto chroma_subsampling = VPCodecConfiguration::CHROMA_444;
237  if (color_space != VPX_COLOR_SPACE_SRGB) {
238  RCHECK(reader->ReadBits(1, &yuv_full_range));
239 
240  if (codec_config->profile() & 1) {
241  uint8_t subsampling;
242  RCHECK(reader->ReadBits(2, &subsampling));
243  chroma_subsampling = GetChromaSubsampling(subsampling);
244  if (chroma_subsampling ==
245  VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA) {
246  LOG(ERROR) << "4:2:0 color not supported in profile "
247  << codec_config->profile();
248  return false;
249  }
250 
251  bool reserved;
252  RCHECK(reader->ReadBits(1, &reserved));
253  RCHECK(!reserved);
254  } else {
255  chroma_subsampling =
256  VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA;
257  }
258  } else {
259  // Assume 4:4:4 for colorspace SRGB.
260  chroma_subsampling = VPCodecConfiguration::CHROMA_444;
261  if (codec_config->profile() & 1) {
262  bool reserved;
263  RCHECK(reader->ReadBits(1, &reserved));
264  RCHECK(!reserved);
265  } else {
266  LOG(ERROR) << "4:4:4 color not supported in profile 0 or 2.";
267  return false;
268  }
269  }
270  codec_config->set_video_full_range_flag(yuv_full_range);
271  codec_config->set_chroma_subsampling(chroma_subsampling);
272 
273  VLOG(3) << "\n profile " << static_cast<int>(codec_config->profile())
274  << "\n bit depth " << static_cast<int>(codec_config->bit_depth())
275  << "\n color space " << static_cast<int>(codec_config->color_space())
276  << "\n full_range "
277  << static_cast<int>(codec_config->video_full_range_flag())
278  << "\n chroma subsampling "
279  << static_cast<int>(codec_config->chroma_subsampling());
280  return true;
281 }
282 
283 bool ReadFrameSize(VP9BitReader* reader, uint32_t* width, uint32_t* height) {
284  RCHECK(reader->ReadBits(16, width));
285  *width += 1; // Off by 1.
286  RCHECK(reader->ReadBits(16, height));
287  *height += 1; // Off by 1.
288  return true;
289 }
290 
291 bool ReadDisplayFrameSize(VP9BitReader* reader,
292  uint32_t* display_width,
293  uint32_t* display_height) {
294  bool has_display_size;
295  RCHECK(reader->ReadBits(1, &has_display_size));
296  if (has_display_size)
297  RCHECK(ReadFrameSize(reader, display_width, display_height));
298  return true;
299 }
300 
301 bool ReadFrameSizes(VP9BitReader* reader, uint32_t* width, uint32_t* height) {
302  uint32_t new_width;
303  uint32_t new_height;
304  RCHECK(ReadFrameSize(reader, &new_width, &new_height));
305  if (new_width != *width) {
306  VLOG(1) << "Width updates from " << *width << " to " << new_width;
307  *width = new_width;
308  }
309  if (new_height != *height) {
310  VLOG(1) << "Height updates from " << *height << " to " << new_height;
311  *height = new_height;
312  }
313 
314  uint32_t display_width = *width;
315  uint32_t display_height = *height;
316  RCHECK(ReadDisplayFrameSize(reader, &display_width, &display_height));
317  return true;
318 }
319 
320 bool ReadFrameSizesWithRefs(VP9BitReader* reader,
321  uint32_t* width,
322  uint32_t* height) {
323  bool found = false;
324  for (uint32_t i = 0; i < REFS_PER_FRAME; ++i) {
325  RCHECK(reader->ReadBits(1, &found));
326  if (found)
327  break;
328  }
329  if (!found) {
330  RCHECK(ReadFrameSizes(reader, width, height));
331  } else {
332  uint32_t display_width;
333  uint32_t display_height;
334  RCHECK(ReadDisplayFrameSize(reader, &display_width, &display_height));
335  }
336  return true;
337 }
338 
339 bool ReadLoopFilter(VP9BitReader* reader) {
340  RCHECK(reader->SkipBits(9)); // filter_evel, sharness_level
341  bool mode_ref_delta_enabled;
342  RCHECK(reader->ReadBits(1, &mode_ref_delta_enabled));
343  if (!mode_ref_delta_enabled)
344  return true;
345  bool mode_ref_delta_update;
346  RCHECK(reader->ReadBits(1, &mode_ref_delta_update));
347  if (!mode_ref_delta_update) return true;
348 
349  for (uint32_t i = 0; i < MAX_REF_LF_DELTAS + MAX_MODE_LF_DELTAS; ++i)
350  RCHECK(reader->SkipBitsConditional(6 + 1));
351  return true;
352 }
353 
354 bool ReadQuantization(VP9BitReader* reader) {
355  RCHECK(reader->SkipBits(QINDEX_BITS));
356  // Skip delta_q bits.
357  for (uint32_t i = 0; i < 3; ++i)
358  RCHECK(reader->SkipBitsConditional(4 + 1));
359  return true;
360 }
361 
362 bool ReadSegmentation(VP9BitReader* reader) {
363  bool enabled;
364  RCHECK(reader->ReadBits(1, &enabled));
365  if (!enabled)
366  return true;
367 
368  bool update_map;
369  RCHECK(reader->ReadBits(1, &update_map));
370  if (update_map) {
371  for (uint32_t i = 0; i < SEG_TREE_PROBS; ++i) {
372  RCHECK(reader->SkipBitsConditional(8));
373 
374  bool temporal_update;
375  RCHECK(reader->ReadBits(1, &temporal_update));
376  if (temporal_update) {
377  for (uint32_t j = 0; j < PREDICTION_PROBS; ++j)
378  RCHECK(reader->SkipBitsConditional(8));
379  }
380  }
381  }
382 
383  bool update_data;
384  RCHECK(reader->ReadBits(1, &update_data));
385  if (update_data) {
386  RCHECK(reader->SkipBits(1)); // abs_delta
387  for (uint32_t i = 0; i < MAX_SEGMENTS; ++i) {
388  for (uint32_t j = 0; j < SEG_LVL_MAX; ++j) {
389  bool feature_enabled;
390  RCHECK(reader->ReadBits(1, &feature_enabled));
391  if (feature_enabled) {
392  RCHECK(reader->SkipBits(SEG_FEATURE_DATA_MAX_BITS[j]));
393  if (SEG_FEATURE_DATA_SIGNED[j])
394  RCHECK(reader->SkipBits(1)); // signness
395  }
396  }
397  }
398  }
399  return true;
400 }
401 
402 bool ReadTileInfo(uint32_t width, VP9BitReader* reader) {
403  uint32_t mi_cols = GetNumMiUnits(width);
404 
405  uint32_t min_log2_tile_cols;
406  uint32_t max_log2_tile_cols;
407  GetTileNBits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
408  uint32_t max_ones = max_log2_tile_cols - min_log2_tile_cols;
409 
410  uint32_t log2_tile_cols = min_log2_tile_cols;
411  while (max_ones--) {
412  bool has_more;
413  RCHECK(reader->ReadBits(1, &has_more));
414  if (!has_more)
415  break;
416  ++log2_tile_cols;
417  }
418  RCHECK(log2_tile_cols <= 6);
419 
420  RCHECK(reader->SkipBitsConditional(1)); // log2_tile_rows
421  return true;
422 }
423 
424 } // namespace
425 
426 VP9Parser::VP9Parser() : width_(0), height_(0) {}
427 VP9Parser::~VP9Parser() {}
428 
429 bool VP9Parser::Parse(const uint8_t* data,
430  size_t data_size,
431  std::vector<VPxFrameInfo>* vpx_frames) {
432  DCHECK(data);
433  DCHECK(vpx_frames);
434  RCHECK(ParseIfSuperframeIndex(data, data_size, vpx_frames));
435 
436  for (auto& vpx_frame : *vpx_frames) {
437  VLOG(4) << "process frame with size " << vpx_frame.frame_size;
438  VP9BitReader reader(data, vpx_frame.frame_size);
439  uint8_t frame_marker;
440  RCHECK(reader.ReadBits(2, &frame_marker));
441  RCHECK(frame_marker == VP9_FRAME_MARKER);
442 
443  RCHECK(ReadProfile(&reader, &codec_config_));
444 
445  bool show_existing_frame;
446  RCHECK(reader.ReadBits(1, &show_existing_frame));
447  if (show_existing_frame) {
448  RCHECK(reader.SkipBits(3)); // ref_frame_index
449  // End of current frame data. There should be no more bytes available.
450  RCHECK(reader.bits_available() < 8);
451 
452  vpx_frame.is_key_frame = false;
453  vpx_frame.uncompressed_header_size = vpx_frame.frame_size;
454  vpx_frame.width = width_;
455  vpx_frame.height = height_;
456  continue;
457  }
458 
459  bool is_inter_frame;
460  RCHECK(reader.ReadBits(1, &is_inter_frame));
461  vpx_frame.is_key_frame = !is_inter_frame;
462 
463  bool show_frame;
464  RCHECK(reader.ReadBits(1, &show_frame));
465  bool error_resilient_mode;
466  RCHECK(reader.ReadBits(1, &error_resilient_mode));
467 
468  if (vpx_frame.is_key_frame) {
469  RCHECK(ReadSyncCode(&reader));
470  RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_));
471  RCHECK(ReadFrameSizes(&reader, &width_, &height_));
472  } else {
473  bool intra_only = false;
474  if (!show_frame)
475  RCHECK(reader.ReadBits(1, &intra_only));
476  if (!error_resilient_mode)
477  RCHECK(reader.SkipBits(2)); // reset_frame_context
478 
479  if (intra_only) {
480  RCHECK(ReadSyncCode(&reader));
481  if (codec_config_.profile() > 0) {
482  RCHECK(ReadBitDepthAndColorSpace(&reader, &codec_config_));
483  } else {
484  // NOTE: The intra-only frame header does not include the
485  // specification of either the color format or color sub-sampling in
486  // profile 0. VP9 specifies that the default color format should be
487  // YUV 4:2:0 in this case (normative).
488  codec_config_.set_chroma_subsampling(
489  VPCodecConfiguration::CHROMA_420_COLLOCATED_WITH_LUMA);
490  codec_config_.set_bit_depth(8);
491  }
492 
493  RCHECK(reader.SkipBits(REF_FRAMES)); // refresh_frame_flags
494  RCHECK(ReadFrameSizes(&reader, &width_, &height_));
495  } else {
496  RCHECK(reader.SkipBits(REF_FRAMES)); // refresh_frame_flags
497  RCHECK(reader.SkipBits(REFS_PER_FRAME * (REF_FRAMES_LOG2 + 1)));
498 
499  // TODO(kqyang): We may need to actually build the refs to extract the
500  // correct width and height for the current frame. The width will be
501  // used later in ReadTileInfo.
502  RCHECK(ReadFrameSizesWithRefs(&reader, &width_, &height_));
503 
504  RCHECK(reader.SkipBits(1)); // allow_high_precision_mv
505 
506  bool interp_filter;
507  RCHECK(reader.ReadBits(1, &interp_filter));
508  if (!interp_filter)
509  RCHECK(reader.SkipBits(2)); // more interp_filter
510  }
511  }
512 
513  if (!error_resilient_mode) {
514  RCHECK(reader.SkipBits(1)); // refresh_frame_context
515  RCHECK(reader.SkipBits(1)); // frame_parallel_decoding_mode
516  }
517  RCHECK(reader.SkipBits(FRAME_CONTEXTS_LOG2)); // frame_context_idx
518 
519  VLOG(4) << "bit offset: "
520  << vpx_frame.frame_size * 8 - reader.bits_available();
521  RCHECK(ReadLoopFilter(&reader));
522  RCHECK(ReadQuantization(&reader));
523  RCHECK(ReadSegmentation(&reader));
524  RCHECK(ReadTileInfo(width_, &reader));
525 
526  uint16_t first_partition_size;
527  RCHECK(reader.ReadBits(16, &first_partition_size));
528  vpx_frame.uncompressed_header_size =
529  vpx_frame.frame_size - reader.bits_available() / 8;
530  vpx_frame.width = width_;
531  vpx_frame.height = height_;
532 
533  VLOG(3) << "\n frame_size: " << vpx_frame.frame_size
534  << "\n header_size: " << vpx_frame.uncompressed_header_size
535  << "\n bits_read: "
536  << vpx_frame.frame_size * 8 - reader.bits_available()
537  << "\n first_partition_size: " << first_partition_size;
538 
539  RCHECK(first_partition_size > 0);
540  RCHECK(first_partition_size * 8 <= reader.bits_available());
541 
542  data += vpx_frame.frame_size;
543  }
544  return true;
545 }
546 
547 } // namespace media
548 } // namespace edash_packager
bool Parse(const uint8_t *data, size_t data_size, std::vector< VPxFrameInfo > *vpx_frames)
Definition: vp9_parser.cc:429