Shaka Packager SDK
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef PACKAGER_MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define PACKAGER_MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/base/fourccs.h"
12 #include "packager/media/codecs/aac_audio_specific_config.h"
13 #include "packager/media/codecs/es_descriptor.h"
14 #include "packager/media/formats/mp4/box.h"
15 
16 namespace shaka {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  size_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
86  bool ReadWrite(uint8_t iv_size,
87  bool has_subsamples,
88  BoxBuffer* buffer);
95  bool ParseFromBuffer(uint8_t iv_size,
96  bool has_subsamples,
97  BufferReader* reader);
99  uint32_t ComputeSize() const;
102  uint32_t GetTotalSizeOfSubsamples() const;
103 
104  std::vector<uint8_t> initialization_vector;
105  std::vector<SubsampleEntry> subsamples;
106 };
107 
109  enum SampleEncryptionFlags {
110  kUseSubsampleEncryption = 2,
111  };
112 
113  DECLARE_BOX_METHODS(SampleEncryption);
119  bool ParseFromSampleEncryptionData(
120  uint8_t iv_size,
121  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
122 
125  std::vector<uint8_t> sample_encryption_data;
126 
127  uint8_t iv_size;
128  std::vector<SampleEncryptionEntry> sample_encryption_entries;
129 };
130 
131 struct OriginalFormat : Box {
132  DECLARE_BOX_METHODS(OriginalFormat);
133 
134  FourCC format;
135 };
136 
137 struct SchemeType : FullBox {
138  DECLARE_BOX_METHODS(SchemeType);
139 
140  FourCC type;
141  uint32_t version;
142 };
143 
145  DECLARE_BOX_METHODS(TrackEncryption);
146 
147  uint8_t default_is_protected;
148  uint8_t default_per_sample_iv_size;
149  std::vector<uint8_t> default_kid;
150 
151  // For pattern-based encryption.
152  uint8_t default_crypt_byte_block;
153  uint8_t default_skip_byte_block;
154 
155  // Present only if
156  // |default_is_protected == 1 && default_per_sample_iv_size == 0|.
157  std::vector<uint8_t> default_constant_iv;
158 };
159 
160 struct SchemeInfo : Box {
161  DECLARE_BOX_METHODS(SchemeInfo);
162 
163  TrackEncryption track_encryption;
164 };
165 
167  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
168 
169  OriginalFormat format;
170  SchemeType type;
171  SchemeInfo info;
172 };
173 
175  DECLARE_BOX_METHODS(MovieHeader);
176 
177  uint64_t creation_time;
178  uint64_t modification_time;
179  uint32_t timescale;
180  uint64_t duration;
181  int32_t rate;
182  int16_t volume;
183  uint32_t next_track_id;
184 };
185 
187  enum TrackHeaderFlags {
188  kTrackEnabled = 0x000001,
189  kTrackInMovie = 0x000002,
190  kTrackInPreview = 0x000004,
191  };
192 
193  DECLARE_BOX_METHODS(TrackHeader);
194 
195  uint64_t creation_time;
196  uint64_t modification_time;
197  uint32_t track_id;
198  uint64_t duration;
199  int16_t layer;
200  int16_t alternate_group;
201  int16_t volume;
202  // width and height specify the track's visual presentation size as
203  // fixed-point 16.16 values.
204  uint32_t width;
205  uint32_t height;
206 };
207 
209  uint64_t segment_duration;
210  int64_t media_time;
211  int16_t media_rate_integer;
212  int16_t media_rate_fraction;
213 };
214 
215 struct EditList : FullBox {
216  DECLARE_BOX_METHODS(EditList);
217 
218  std::vector<EditListEntry> edits;
219 };
220 
221 struct Edit : Box {
222  DECLARE_BOX_METHODS(Edit);
223 
224  EditList list;
225 };
226 
228  DECLARE_BOX_METHODS(HandlerReference);
229 
230  FourCC handler_type;
231 };
232 
233 struct Language {
234  bool ReadWrite(BoxBuffer* buffer);
235  uint32_t ComputeSize() const;
236 
237  std::string code;
238 };
239 
241 struct ID3v2 : FullBox {
242  DECLARE_BOX_METHODS(ID3v2);
243 
244  Language language;
245  std::vector<uint8_t> id3v2_data;
246 };
247 
248 struct Metadata : FullBox {
249  DECLARE_BOX_METHODS(Metadata);
250 
251  HandlerReference handler;
252  ID3v2 id3v2;
253 };
254 
255 // This defines a common structure for various CodecConfiguration boxes:
256 // AVCConfiguration, HEVCConfiguration and VPCodecConfiguration.
257 // Note that unlike the other two CodecConfiguration boxes, VPCodecConfiguration
258 // box inherits from FullBox instead of Box, according to VP Codec ISO Media
259 // File Format Binding specification. It will be handled properly in the
260 // implementation.
262  DECLARE_BOX_METHODS(CodecConfiguration);
263 
264  FourCC box_type;
265  // Contains full codec configuration record, including possible extension
266  // boxes.
267  std::vector<uint8_t> data;
268 };
269 
271  DECLARE_BOX_METHODS(PixelAspectRatio);
272 
273  uint32_t h_spacing;
274  uint32_t v_spacing;
275 };
276 
278  DECLARE_BOX_METHODS(VideoSampleEntry);
279  // Returns actual format of this sample entry.
280  FourCC GetActualFormat() const {
281  return format == FOURCC_encv ? sinf.format.format : format;
282  }
283  // Returns the box type of codec configuration box from video format.
284  FourCC GetCodecConfigurationBoxType(FourCC format) const;
285 
286  FourCC format;
287  uint16_t data_reference_index;
288  uint16_t width;
289  uint16_t height;
290 
291  PixelAspectRatio pixel_aspect;
293  CodecConfiguration codec_configuration;
294 };
295 
297  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
298 
299  AACAudioSpecificConfig aac_audio_specific_config;
300  ESDescriptor es_descriptor;
301 };
302 
303 struct DTSSpecific : Box {
304  DECLARE_BOX_METHODS(DTSSpecific);
305 
306  uint32_t sampling_frequency;
307  uint32_t max_bitrate;
308  uint32_t avg_bitrate;
309  uint8_t pcm_sample_depth;
310  std::vector<uint8_t> extra_data;
311 };
312 
313 struct AC3Specific : Box {
314  DECLARE_BOX_METHODS(AC3Specific);
315 
316  std::vector<uint8_t> data;
317 };
318 
319 struct EC3Specific : Box {
320  DECLARE_BOX_METHODS(EC3Specific);
321 
322  std::vector<uint8_t> data;
323 };
324 
325 struct OpusSpecific : Box {
326  DECLARE_BOX_METHODS(OpusSpecific);
327 
328  std::vector<uint8_t> opus_identification_header;
329  // The number of priming samples. Extracted from |opus_identification_header|.
330  uint16_t preskip;
331 };
332 
333 // FLAC specific decoder configuration box:
334 // https://github.com/xiph/flac/blob/master/doc/isoflac.txt
335 // We do not care about the actual data inside, which is simply copied over.
337  DECLARE_BOX_METHODS(FlacSpecific);
338 
339  std::vector<uint8_t> data;
340 };
341 
343  DECLARE_BOX_METHODS(AudioSampleEntry);
344  // Returns actual format of this sample entry.
345  FourCC GetActualFormat() const {
346  return format == FOURCC_enca ? sinf.format.format : format;
347  }
348 
349  FourCC format;
350  uint16_t data_reference_index;
351  uint16_t channelcount;
352  uint16_t samplesize;
353  uint32_t samplerate;
354 
356 
358  DTSSpecific ddts;
359  AC3Specific dac3;
360  EC3Specific dec3;
361  OpusSpecific dops;
362  FlacSpecific dfla;
363 };
364 
366  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
367  std::string config;
368 };
369 
371  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
372  std::string source_label;
373 };
374 
376  DECLARE_BOX_METHODS(TextSampleEntry);
377 
378  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
379  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
380  // header.
381  FourCC format;
382  uint16_t data_reference_index;
383 
384  // Sub boxes for wvtt text sample entry.
385  WebVTTConfigurationBox config;
386  WebVTTSourceLabelBox label;
387  // Optional MPEG4BitRateBox.
388 };
389 
391  DECLARE_BOX_METHODS(SampleDescription);
392 
393  TrackType type;
394  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
395  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
396  std::vector<VideoSampleEntry> video_entries;
397  std::vector<AudioSampleEntry> audio_entries;
398  std::vector<TextSampleEntry> text_entries;
399 };
400 
401 struct DecodingTime {
402  uint32_t sample_count;
403  uint32_t sample_delta;
404 };
405 
406 // stts.
408  DECLARE_BOX_METHODS(DecodingTimeToSample);
409 
410  std::vector<DecodingTime> decoding_time;
411 };
412 
414  uint32_t sample_count;
415  // If version == 0, sample_offset is uint32_t;
416  // If version == 1, sample_offset is int32_t.
417  // Use int64_t so both can be supported properly.
418  int64_t sample_offset;
419 };
420 
421 // ctts. Optional.
423  DECLARE_BOX_METHODS(CompositionTimeToSample);
424 
425  std::vector<CompositionOffset> composition_offset;
426 };
427 
428 struct ChunkInfo {
429  uint32_t first_chunk;
430  uint32_t samples_per_chunk;
431  uint32_t sample_description_index;
432 };
433 
434 // stsc.
436  DECLARE_BOX_METHODS(SampleToChunk);
437 
438  std::vector<ChunkInfo> chunk_info;
439 };
440 
441 // stsz.
442 struct SampleSize : FullBox {
443  DECLARE_BOX_METHODS(SampleSize);
444 
445  uint32_t sample_size;
446  uint32_t sample_count;
447  std::vector<uint32_t> sizes;
448 };
449 
450 // stz2.
452  DECLARE_BOX_METHODS(CompactSampleSize);
453 
454  uint8_t field_size;
455  std::vector<uint32_t> sizes;
456 };
457 
458 // co64.
460  DECLARE_BOX_METHODS(ChunkLargeOffset);
461 
462  std::vector<uint64_t> offsets;
463 };
464 
465 // stco.
467  DECLARE_BOX_METHODS(ChunkOffset);
468 };
469 
470 // stss. Optional.
471 struct SyncSample : FullBox {
472  DECLARE_BOX_METHODS(SyncSample);
473 
474  std::vector<uint32_t> sample_number;
475 };
476 
480 
481  bool ReadWrite(BoxBuffer* buffer);
482  uint32_t ComputeSize() const;
483 
484  uint8_t is_protected;
485  uint8_t per_sample_iv_size;
486  std::vector<uint8_t> key_id;
487 
488  // For pattern-based encryption.
489  uint8_t crypt_byte_block;
490  uint8_t skip_byte_block;
491 
492  // Present only if |is_protected == 1 && per_sample_iv_size == 0|.
493  std::vector<uint8_t> constant_iv;
494 };
495 
499 
500  bool ReadWrite(BoxBuffer* buffer);
501  uint32_t ComputeSize() const;
502 
503  int16_t roll_distance;
504 };
505 
507  DECLARE_BOX_METHODS(SampleGroupDescription);
508 
509  template <typename T>
510  bool ReadWriteEntries(BoxBuffer* buffer, std::vector<T>* entries);
511 
512  uint32_t grouping_type;
513  // Only present if grouping_type == 'seig'.
514  std::vector<CencSampleEncryptionInfoEntry>
515  cenc_sample_encryption_info_entries;
516  // Only present if grouping_type == 'roll'.
517  std::vector<AudioRollRecoveryEntry> audio_roll_recovery_entries;
518 };
519 
521  enum GroupDescriptionIndexBase {
522  kTrackGroupDescriptionIndexBase = 0,
523  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
524  };
525 
526  uint32_t sample_count;
527  uint32_t group_description_index;
528 };
529 
531  DECLARE_BOX_METHODS(SampleToGroup);
532 
533  uint32_t grouping_type;
534  uint32_t grouping_type_parameter; // Version 1 only.
535  std::vector<SampleToGroupEntry> entries;
536 };
537 
538 struct SampleTable : Box {
539  DECLARE_BOX_METHODS(SampleTable);
540 
541  SampleDescription description;
542  DecodingTimeToSample decoding_time_to_sample;
543  CompositionTimeToSample composition_time_to_sample;
544  SampleToChunk sample_to_chunk;
545  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
546  SampleSize sample_size;
547  // Either ChunkOffset or ChunkLargeOffset must present. Store in
548  // ChunkLargeOffset.
549  ChunkLargeOffset chunk_large_offset;
550  SyncSample sync_sample;
551  std::vector<SampleGroupDescription> sample_group_descriptions;
552  std::vector<SampleToGroup> sample_to_groups;
553 };
554 
556  DECLARE_BOX_METHODS(MediaHeader);
557 
558  uint64_t creation_time;
559  uint64_t modification_time;
560  uint32_t timescale;
561  uint64_t duration;
562  Language language;
563 };
564 
566  DECLARE_BOX_METHODS(VideoMediaHeader);
567 
568  uint16_t graphicsmode;
569  uint16_t opcolor_red;
570  uint16_t opcolor_green;
571  uint16_t opcolor_blue;
572 };
573 
575  DECLARE_BOX_METHODS(SoundMediaHeader);
576 
577  uint16_t balance;
578 };
579 
581  DECLARE_BOX_METHODS(SubtitleMediaHeader);
582 };
583 
585  DECLARE_BOX_METHODS(DataEntryUrl);
586 
587  std::vector<uint8_t> location;
588 };
589 
591  DECLARE_BOX_METHODS(DataReference);
592 
593  // data entry can be either url or urn box. Fix to url box for now.
594  std::vector<DataEntryUrl> data_entry;
595 };
596 
598  DECLARE_BOX_METHODS(DataInformation);
599 
600  DataReference dref;
601 };
602 
604  DECLARE_BOX_METHODS(MediaInformation);
605 
606  DataInformation dinf;
607  SampleTable sample_table;
608  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
609  VideoMediaHeader vmhd;
610  SoundMediaHeader smhd;
611  SubtitleMediaHeader sthd;
612 };
613 
614 struct Media : Box {
615  DECLARE_BOX_METHODS(Media);
616 
617  MediaHeader header;
618  HandlerReference handler;
619  MediaInformation information;
620 };
621 
622 struct Track : Box {
623  DECLARE_BOX_METHODS(Track);
624 
625  TrackHeader header;
626  Media media;
627  Edit edit;
628  SampleEncryption sample_encryption;
629 };
630 
632  DECLARE_BOX_METHODS(MovieExtendsHeader);
633 
634  uint64_t fragment_duration;
635 };
636 
638  DECLARE_BOX_METHODS(TrackExtends);
639 
640  uint32_t track_id;
641  uint32_t default_sample_description_index;
642  uint32_t default_sample_duration;
643  uint32_t default_sample_size;
644  uint32_t default_sample_flags;
645 };
646 
647 struct MovieExtends : Box {
648  DECLARE_BOX_METHODS(MovieExtends);
649 
650  MovieExtendsHeader header;
651  std::vector<TrackExtends> tracks;
652 };
653 
654 struct Movie : Box {
655  DECLARE_BOX_METHODS(Movie);
656 
657  MovieHeader header;
658  Metadata metadata; // Used to hold version information.
659  MovieExtends extends;
660  std::vector<Track> tracks;
661  std::vector<ProtectionSystemSpecificHeader> pssh;
662 };
663 
665  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
666 
667  uint64_t decode_time;
668 };
669 
671  DECLARE_BOX_METHODS(MovieFragmentHeader);
672 
673  uint32_t sequence_number;
674 };
675 
677  enum TrackFragmentFlagsMasks {
678  kBaseDataOffsetPresentMask = 0x000001,
679  kSampleDescriptionIndexPresentMask = 0x000002,
680  kDefaultSampleDurationPresentMask = 0x000008,
681  kDefaultSampleSizePresentMask = 0x000010,
682  kDefaultSampleFlagsPresentMask = 0x000020,
683  kDurationIsEmptyMask = 0x010000,
684  kDefaultBaseIsMoofMask = 0x020000,
685  };
686 
687  enum SampleFlagsMasks {
688  kReservedMask = 0xFC000000,
689  kSampleDependsOnMask = 0x03000000,
690  kSampleIsDependedOnMask = 0x00C00000,
691  kSampleHasRedundancyMask = 0x00300000,
692  kSamplePaddingValueMask = 0x000E0000,
693  kNonKeySampleMask = 0x00010000,
694  kSampleDegradationPriorityMask = 0x0000FFFF,
695  };
696 
697  DECLARE_BOX_METHODS(TrackFragmentHeader);
698 
699  uint32_t track_id;
700  uint32_t sample_description_index;
701  uint32_t default_sample_duration;
702  uint32_t default_sample_size;
703  uint32_t default_sample_flags;
704 };
705 
707  enum TrackFragmentFlagsMasks {
708  kDataOffsetPresentMask = 0x000001,
709  kFirstSampleFlagsPresentMask = 0x000004,
710  kSampleDurationPresentMask = 0x000100,
711  kSampleSizePresentMask = 0x000200,
712  kSampleFlagsPresentMask = 0x000400,
713  kSampleCompTimeOffsetsPresentMask = 0x000800,
714  };
715 
716  DECLARE_BOX_METHODS(TrackFragmentRun);
717 
718  uint32_t sample_count;
719  uint32_t data_offset;
720  std::vector<uint32_t> sample_flags;
721  std::vector<uint32_t> sample_sizes;
722  std::vector<uint32_t> sample_durations;
723  std::vector<int64_t> sample_composition_time_offsets;
724 };
725 
726 struct TrackFragment : Box {
727  DECLARE_BOX_METHODS(TrackFragment);
728 
729  TrackFragmentHeader header;
730  std::vector<TrackFragmentRun> runs;
731  bool decode_time_absent;
732  TrackFragmentDecodeTime decode_time;
733  std::vector<SampleGroupDescription> sample_group_descriptions;
734  std::vector<SampleToGroup> sample_to_groups;
735  SampleAuxiliaryInformationSize auxiliary_size;
736  SampleAuxiliaryInformationOffset auxiliary_offset;
737  SampleEncryption sample_encryption;
738 };
739 
740 struct MovieFragment : Box {
741  DECLARE_BOX_METHODS(MovieFragment);
742 
743  MovieFragmentHeader header;
744  std::vector<TrackFragment> tracks;
745  std::vector<ProtectionSystemSpecificHeader> pssh;
746 };
747 
749  enum SAPType {
750  TypeUnknown = 0,
751  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
752  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
753  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
754  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
755  Type5 = 5, // T(ept) = T(dec) < T(sap)
756  Type6 = 6, // T(ept) < T(dec) < T(sap)
757  };
758 
759  bool reference_type;
760  uint32_t referenced_size;
761  uint32_t subsegment_duration;
762  bool starts_with_sap;
763  SAPType sap_type;
764  uint32_t sap_delta_time;
765  // We add this field to keep track of earliest_presentation_time in this
766  // subsegment. It is not part of SegmentReference.
767  uint64_t earliest_presentation_time;
768 };
769 
771  DECLARE_BOX_METHODS(SegmentIndex);
772 
773  uint32_t reference_id;
774  uint32_t timescale;
775  uint64_t earliest_presentation_time;
776  uint64_t first_offset;
777  std::vector<SegmentReference> references;
778 };
779 
780 // The actual data is parsed and written separately.
781 struct MediaData : Box {
782  DECLARE_BOX_METHODS(MediaData);
783 
784  uint32_t data_size;
785 };
786 
787 struct CueSourceIDBox : Box {
788  DECLARE_BOX_METHODS(CueSourceIDBox);
789  int32_t source_id;
790 };
791 
792 struct CueTimeBox : Box {
793  DECLARE_BOX_METHODS(CueTimeBox);
794  std::string cue_current_time;
795 };
796 
797 struct CueIDBox : Box {
798  DECLARE_BOX_METHODS(CueIDBox);
799  std::string cue_id;
800 };
801 
802 struct CueSettingsBox : Box {
803  DECLARE_BOX_METHODS(CueSettingsBox);
804  std::string settings;
805 };
806 
807 struct CuePayloadBox : Box {
808  DECLARE_BOX_METHODS(CuePayloadBox);
809  std::string cue_text;
810 };
811 
812 struct VTTEmptyCueBox : Box {
813  DECLARE_BOX_METHODS(VTTEmptyCueBox);
814 };
815 
817  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
818  std::string cue_additional_text;
819 };
820 
821 struct VTTCueBox : Box {
822  DECLARE_BOX_METHODS(VTTCueBox);
823 
824  CueSourceIDBox cue_source_id;
825  CueIDBox cue_id;
826  CueTimeBox cue_time;
827  CueSettingsBox cue_settings;
828  CuePayloadBox cue_payload;
829 };
830 
831 #undef DECLARE_BOX
832 
833 } // namespace mp4
834 } // namespace media
835 } // namespace shaka
836 
837 #endif // PACKAGER_MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
All the methods that are virtual are virtual for mocking.
uint32_t ComputeSize()
Definition: box.cc:50
std::vector< uint8_t > sample_encryption_data
Implemented per http://mp4ra.org/#/references.
FourCC BoxType() const override