Shaka Packager SDK
ac4_audio_util.cc
1 // Copyright 2020 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include "packager/media/codecs/ac4_audio_util.h"
8 
9 #include "packager/base/macros.h"
10 #include "packager/base/strings/string_number_conversions.h"
11 #include "packager/media/base/bit_reader.h"
12 #include "packager/media/base/rcheck.h"
13 
14 namespace shaka {
15 namespace media {
16 
17 namespace {
18 
19 // Speaker group index
20 // Bit, Location
21 // 0(LSB), Left/Right pair
22 // 1, Centre
23 // 2, Left surround/Right surround pair
24 // 3, Left back/Right back pair
25 // 4, Top front left/Top front right pair
26 // 5, Top back left/Top back right pair
27 // 6, LFE
28 // 7, Top left/Top right pair
29 // 8, Top side left/Top side right pair
30 // 9, Top front centre
31 // 10, Top back centre
32 // 11, Top centre
33 // 12, LFE2
34 // 13, Bottom front left/Bottom front right pair
35 // 14, Bottom front centre
36 // 15, Back centre
37 // 16, Left screen/Right screen pair
38 // 17, Left wide/Right wide pair
39 // 18, Vertical height left/Vertical height right pair
40 enum kAC4AudioChannelGroupIndex {
41  kLRPair = 0x1,
42  kCentre = 0x2,
43  kLsRsPair = 0x4,
44  kLbRbPair = 0x8,
45  kTflTfrPair = 0x10,
46  kTblTbrPair = 0x20,
47  kLFE = 0x40,
48  kTlTrPair = 0x80,
49  kTslTsrPair = 0x100,
50  kTopfrontCentre = 0x200,
51  kTopbackCentre = 0x400,
52  kTopCentre = 0x800,
53  kLFE2 = 0x1000,
54  kBflBfrPair = 0x2000,
55  kBottomFrontCentre = 0x4000,
56  kBackCentre = 0x8000,
57  kLscrRscrPair = 0x10000,
58  kLwRw = 0x20000,
59  kVhlVhrPair = 0x40000,
60 };
61 
62 // Mapping of channel configurations to the MPEG audio value based on ETSI TS
63 // 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
64 // Part 2: Immersive and personalized Table G.1
65 uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) {
66  uint32_t ret = 0;
67 
68  switch (channel_mask) {
69  case kCentre:
70  ret = 1;
71  break;
72  case kLRPair:
73  ret = 2;
74  break;
75  case kCentre | kLRPair:
76  ret = 3;
77  break;
78  case kCentre | kLRPair | kBackCentre:
79  ret = 4;
80  break;
81  case kCentre | kLRPair | kLsRsPair:
82  ret = 5;
83  break;
84  case kCentre | kLRPair | kLsRsPair | kLFE:
85  ret = 6;
86  break;
87  case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw:
88  ret = 7;
89  break;
90  case kBackCentre | kLRPair:
91  ret = 9;
92  break;
93  case kLRPair | kLsRsPair:
94  ret = 10;
95  break;
96  case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre:
97  ret = 11;
98  break;
99  case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE:
100  ret = 12;
101  break;
102  case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 |
103  kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE |
104  kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
105  case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair|
106  kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair |
107  kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
108  ret = 13;
109  break;
110  case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
111  case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair:
112  ret = 14;
113  break;
114  case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair |
115  kLsRsPair | kLbRbPair:
116  case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair |
117  kLsRsPair | kLbRbPair:
118  ret = 15;
119  break;
120  case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
121  case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair:
122  ret = 16;
123  break;
124  case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
125  kLsRsPair | kCentre | kLRPair:
126  case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
127  kLsRsPair | kCentre | kLRPair:
128  ret = 17;
129  break;
130  case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
131  kCentre | kLRPair | kLsRsPair | kLbRbPair:
132  case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
133  kCentre | kLRPair | kLsRsPair | kLbRbPair:
134  ret = 18;
135  break;
136  case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair |
137  kLbRbPair:
138  case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair |
139  kLbRbPair:
140  ret = 19;
141  break;
142  case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair |
143  kLsRsPair | kLbRbPair:
144  case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair |
145  kLsRsPair | kLbRbPair:
146  ret = 20;
147  break;
148  default:
149  ret = 0xFFFFFFFF;
150  }
151  return ret;
152 }
153 
154 // Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio
155 // Compression (AC-4) Standard; Part 2: Immersive and personalized E.11.
156 bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) {
157  bool b_substream_present;
158  RCHECK(bit_reader.ReadBits(1, &b_substream_present));
159  bool b_hsf_ext;
160  RCHECK(bit_reader.ReadBits(1, &b_hsf_ext));
161  bool b_channel_coded;
162  RCHECK(bit_reader.ReadBits(1, &b_channel_coded));
163  uint8_t n_substreams;
164  RCHECK(bit_reader.ReadBits(8, &n_substreams));
165  for (uint8_t i = 0; i < n_substreams; i++) {
166  RCHECK(bit_reader.SkipBits(2));
167  bool b_substream_bitrate_indicator;
168  RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator));
169  if (b_substream_bitrate_indicator) {
170  RCHECK(bit_reader.SkipBits(5));
171  }
172  if (b_channel_coded) {
173  RCHECK(bit_reader.SkipBits(24));
174  } else {
175  bool b_ajoc;
176  RCHECK(bit_reader.ReadBits(1, &b_ajoc));
177  if (b_ajoc) {
178  bool b_static_dmx;
179  RCHECK(bit_reader.ReadBits(1, &b_static_dmx));
180  if (!b_static_dmx) {
181  RCHECK(bit_reader.SkipBits(4));
182  }
183  RCHECK(bit_reader.SkipBits(6));
184  }
185  RCHECK(bit_reader.SkipBits(4));
186  }
187  }
188  bool b_content_type;
189  RCHECK(bit_reader.ReadBits(1, &b_content_type));
190  if (b_content_type) {
191  RCHECK(bit_reader.SkipBits(3));
192  bool b_language_indicator;
193  RCHECK(bit_reader.ReadBits(1, &b_language_indicator));
194  if (b_language_indicator) {
195  uint8_t n_language_tag_bytes;
196  RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes));
197  RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8));
198  }
199  }
200  return true;
201 }
202 
203 // Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio
204 // Compression (AC-4) Standard;Part 2: Immersive and personalized E.10.
205 bool ParseAC4PresentationV1Dsi(BitReader& bit_reader,
206  uint32_t pres_bytes,
207  uint8_t* mdcompat,
208  uint32_t* presentation_channel_mask_v1,
209  bool* dolby_cbi_indicator,
210  uint8_t* dolby_atmos_indicator) {
211  bool ret = true;
212  // Record the initial offset.
213  const size_t presentation_start = bit_reader.bit_position();
214  uint8_t presentation_config_v1;
215  RCHECK(bit_reader.ReadBits(5, &presentation_config_v1));
216  uint8_t b_add_emdf_substreams;
217  // set default value (stereo content) for output parameters.
218  *mdcompat = 0;
219  *presentation_channel_mask_v1 = 2;
220  *dolby_cbi_indicator = false;
221  *dolby_atmos_indicator = 0;
222  if (presentation_config_v1 == 0x06) {
223  b_add_emdf_substreams = 1;
224  } else {
225  RCHECK(bit_reader.ReadBits(3, mdcompat));
226  bool b_presentation_id;
227  RCHECK(bit_reader.ReadBits(1, &b_presentation_id));
228  if (b_presentation_id) {
229  RCHECK(bit_reader.SkipBits(5));
230  }
231  RCHECK(bit_reader.SkipBits(19));
232  bool b_presentation_channel_coded;
233  RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded));
234  *presentation_channel_mask_v1 = 0;
235  if (b_presentation_channel_coded) {
236  uint8_t dsi_presentation_ch_mode;
237  RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode));
238  if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) {
239  RCHECK(bit_reader.SkipBits(1));
240  uint8_t pres_top_channel_pairs;
241  RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs));
242  if (pres_top_channel_pairs) {
243  *dolby_cbi_indicator = true;
244  }
245  } else if (dsi_presentation_ch_mode == 15) {
246  *dolby_cbi_indicator = true;
247  }
248  RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1));
249  }
250  bool b_presentation_core_differs;
251  RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs));
252  if (b_presentation_core_differs) {
253  bool b_presentation_core_channel_coded;
254  RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded));
255  if (b_presentation_core_channel_coded) {
256  RCHECK(bit_reader.SkipBits(2));
257  }
258  }
259  bool b_presentation_filter;
260  RCHECK(bit_reader.ReadBits(1, &b_presentation_filter));
261  if (b_presentation_filter) {
262  RCHECK(bit_reader.SkipBits(1));
263  uint8_t n_filter_bytes;
264  RCHECK(bit_reader.ReadBits(8, &n_filter_bytes));
265  RCHECK(bit_reader.SkipBits(n_filter_bytes * 8));
266  }
267  if (presentation_config_v1 == 0x1f) {
268  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
269  } else {
270  RCHECK(bit_reader.SkipBits(1));
271  if (presentation_config_v1 == 0 ||
272  presentation_config_v1 == 1 ||
273  presentation_config_v1 == 2) {
274  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
275  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
276  }
277  if (presentation_config_v1 == 3 || presentation_config_v1 == 4) {
278  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
279  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
280  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
281  }
282  if (presentation_config_v1 == 5) {
283  uint8_t n_substream_groups_minus2;
284  RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2));
285  for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) {
286  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
287  }
288  }
289  if (presentation_config_v1 > 5) {
290  uint8_t n_skip_bytes;
291  RCHECK(bit_reader.ReadBits(7, &n_skip_bytes));
292  RCHECK(bit_reader.SkipBits(n_skip_bytes * 8));
293  }
294  }
295  RCHECK(bit_reader.SkipBits(1));
296  RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams));
297  }
298  if (b_add_emdf_substreams) {
299  uint8_t n_add_emdf_substreams;
300  RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams));
301  RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15));
302  }
303  bool b_presentation_bitrate_info;
304  RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info));
305  if (b_presentation_bitrate_info) {
306  // Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1
307  RCHECK(bit_reader.SkipBits(66));
308  }
309  bool b_alternative;
310  RCHECK(bit_reader.ReadBits(1, &b_alternative));
311  if (b_alternative) {
312  bit_reader.SkipToNextByte();
313  // Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12
314  uint16_t name_len;
315  RCHECK(bit_reader.ReadBits(16, &name_len));
316  RCHECK(bit_reader.SkipBits(name_len * 8));
317  uint8_t n_targets;
318  RCHECK(bit_reader.ReadBits(5, &n_targets));
319  RCHECK(bit_reader.SkipBits(n_targets * 11));
320  }
321  bit_reader.SkipToNextByte();
322  if ((bit_reader.bit_position() - presentation_start) <=
323  (pres_bytes - 1) * 8) {
324  RCHECK(bit_reader.SkipBits(1));
325  RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator));
326  RCHECK(bit_reader.SkipBits(4));
327  bool b_extended_presentation_group_index;
328  RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index));
329  if (b_extended_presentation_group_index) {
330  RCHECK(bit_reader.SkipBits(9));
331  } else {
332  RCHECK(bit_reader.SkipBits(1));
333  }
334  }
335  return ret;
336 }
337 
338 bool ExtractAc4Data(const std::vector<uint8_t>& ac4_data,
339  uint8_t* bitstream_version,
340  uint8_t* presentation_version,
341  uint8_t* mdcompat,
342  uint32_t* presentation_channel_mask_v1,
343  bool* dolby_ims_indicator,
344  bool* dolby_cbi_indicator) {
345  BitReader bit_reader(ac4_data.data(), ac4_data.size());
346 
347  uint16_t n_presentation;
348  RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version));
349  RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation));
350 
351  if (*bitstream_version == 2) {
352  uint8_t b_program_id = 0;
353  RCHECK(bit_reader.ReadBits(1, &b_program_id));
354  if (b_program_id) {
355  RCHECK(bit_reader.SkipBits(16));
356  uint8_t b_uuid = 0;
357  RCHECK(bit_reader.ReadBits(1, &b_uuid));
358  if (b_uuid) {
359  RCHECK(bit_reader.SkipBits(16 * 8));
360  }
361  }
362  } else if (*bitstream_version == 0 || *bitstream_version == 1) {
363  LOG(WARNING) << "Bitstream version 0 or 1 is not supported";
364  return false;
365  } else {
366  LOG(WARNING) << "Invalid Bitstream version";
367  return false;
368  }
369 
370  RCHECK(bit_reader.SkipBits(66));
371  bit_reader.SkipToNextByte();
372 
373  // AC4 stream containing the single presentation is valid for OTT only.
374  // IMS has two presentations, and the 2nd is legacy (duplicated) presentation.
375  // So it can be considered as AC4 stream with single presentation. And IMS
376  // presentation must be prior to legacy presentation.
377  // In other word, only the 1st presentation in AC4 stream need to be parsed.
378  const uint8_t ott_n_presentation = 1;
379  for (uint8_t i = 0; i < ott_n_presentation; i++) {
380  RCHECK(bit_reader.ReadBits(8, presentation_version));
381  // *presentation_version == 2 means IMS presentation.
382  if ((*presentation_version == 2 && n_presentation > 2) ||
383  (*presentation_version == 1 && n_presentation > 1) ) {
384  LOG(WARNING) << "Seeing multiple presentations, only single presentation "
385  << "(including IMS presentation) is supported";
386  return false;
387  }
388  uint32_t pres_bytes;
389  RCHECK(bit_reader.ReadBits(8, &pres_bytes));
390  if (pres_bytes == 255) {
391  uint32_t add_pres_bytes;
392  RCHECK(bit_reader.ReadBits(16, &add_pres_bytes));
393  pres_bytes += add_pres_bytes;
394  }
395 
396  size_t presentation_bits = 0;
397  *dolby_ims_indicator = false;
398  if (*presentation_version == 0) {
399  LOG(WARNING) << "Presentation version 0 is not supported";
400  return false;
401  } else {
402  if (*presentation_version == 1 || *presentation_version == 2) {
403  if (*presentation_version == 2) {
404  *dolby_ims_indicator = true;
405  }
406  const size_t presentation_start = bit_reader.bit_position();
407  // dolby_atmos_indicator is extended in Dolby internal specs.
408  // It indicates whether the source content before encoding is Atmos.
409  // No final decision about how to use it in OTT.
410  // Parse it for the future usage.
411  uint8_t dolby_atmos_indicator;
412  if (!ParseAC4PresentationV1Dsi(bit_reader, pres_bytes, mdcompat,
413  presentation_channel_mask_v1,
414  dolby_cbi_indicator,
415  &dolby_atmos_indicator)) {
416  return false;
417  }
418  const size_t presentation_end = bit_reader.bit_position();
419  presentation_bits = presentation_end - presentation_start;
420  } else {
421  LOG(WARNING) << "Invalid Presentation version";
422  return false;
423  }
424  }
425  size_t skip_bits = pres_bytes * 8 - presentation_bits;
426  RCHECK(bit_reader.SkipBits(skip_bits));
427  }
428  return true;
429 }
430 } // namespace
431 
432 bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
433  uint32_t* ac4_channel_mask) {
434  uint8_t bitstream_version;
435  uint8_t presentation_version;
436  uint8_t mdcompat;
437  uint32_t pre_channel_mask;
438  bool dolby_ims_indicator;
439  bool dolby_cbi_indicator;
440 
441  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
442  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
443  &dolby_cbi_indicator)) {
444  LOG(WARNING) << "Seeing invalid AC4 data: "
445  << base::HexEncode(ac4_data.data(), ac4_data.size());
446  return false;
447  }
448 
449  if (pre_channel_mask) {
450  *ac4_channel_mask = pre_channel_mask;
451  } else {
452  *ac4_channel_mask = 0x800000;
453  }
454  return true;
455 }
456 
457 bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
458  uint32_t* ac4_channel_mpeg_value) {
459  uint8_t bitstream_version;
460  uint8_t presentation_version;
461  uint8_t mdcompat;
462  uint32_t pre_channel_mask;
463  bool dolby_ims_indicator;
464  bool dolby_cbi_indicator;
465 
466  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
467  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
468  &dolby_cbi_indicator)) {
469  LOG(WARNING) << "Seeing invalid AC4 data: "
470  << base::HexEncode(ac4_data.data(), ac4_data.size());
471  return false;
472  }
473 
474  *ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask);
475  return true;
476 }
477 
478 bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
479  uint8_t* ac4_codec_info) {
480  uint8_t bitstream_version;
481  uint8_t presentation_version;
482  uint8_t mdcompat;
483  uint32_t pre_channel_mask;
484  bool dolby_ims_indicator;
485  bool dolby_cbi_indicator;
486 
487  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
488  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
489  &dolby_cbi_indicator)) {
490  LOG(WARNING) << "Seeing invalid AC4 data: "
491  << base::HexEncode(ac4_data.data(), ac4_data.size());
492  return false;
493  }
494 
495  // The valid value of bitstream_version (8 bits) is 2, the valid value of
496  // presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits.
497  // So uint8_t is fine now. If Dolby extends the value of bitstream_version and
498  // presentation_version in future, maybe need change the type from uint8_t to
499  // uint16_t or uint32_t to accommodate the valid values.
500  // If that, AudioStreamInfo::GetCodecString need to be changed accordingly.
501  // bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits)
502  *ac4_codec_info = ((bitstream_version << 5) |
503  ((presentation_version << 3) & 0x1F) |
504  (mdcompat & 0x7));
505  return true;
506 }
507 
508 bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
509  bool* ac4_ims_flag,
510  bool* ac4_cbi_flag) {
511  uint8_t bitstream_version;
512  uint8_t presentation_version;
513  uint8_t mdcompat;
514  uint32_t pre_channel_mask;
515 
516  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
517  &mdcompat, &pre_channel_mask, ac4_ims_flag,
518  ac4_cbi_flag)) {
519  LOG(WARNING) << "Seeing invalid AC4 data: "
520  << base::HexEncode(ac4_data.data(), ac4_data.size());
521  return false;
522  }
523 
524  return true;
525 }
526 
527 } // namespace media
528 } // namespace shaka
shaka
All the methods that are virtual are virtual for mocking.
Definition: gflags_hex_bytes.cc:11