DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/base/fourccs.h"
12 #include "packager/media/codecs/aac_audio_specific_config.h"
13 #include "packager/media/codecs/es_descriptor.h"
14 #include "packager/media/formats/mp4/box.h"
15 
16 namespace shaka {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
86  bool ReadWrite(uint8_t iv_size,
87  bool has_subsamples,
88  BoxBuffer* buffer);
95  bool ParseFromBuffer(uint8_t iv_size,
96  bool has_subsamples,
97  BufferReader* reader);
99  uint32_t ComputeSize() const;
102  uint32_t GetTotalSizeOfSubsamples() const;
103 
104  std::vector<uint8_t> initialization_vector;
105  std::vector<SubsampleEntry> subsamples;
106 };
107 
109  enum SampleEncryptionFlags {
110  kUseSubsampleEncryption = 2,
111  };
112 
113  DECLARE_BOX_METHODS(SampleEncryption);
120  uint8_t iv_size,
121  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
122 
125  std::vector<uint8_t> sample_encryption_data;
126 
127  uint8_t iv_size;
128  std::vector<SampleEncryptionEntry> sample_encryption_entries;
129 };
130 
131 struct OriginalFormat : Box {
132  DECLARE_BOX_METHODS(OriginalFormat);
133 
134  FourCC format;
135 };
136 
137 struct SchemeType : FullBox {
138  DECLARE_BOX_METHODS(SchemeType);
139 
140  FourCC type;
141  uint32_t version;
142 };
143 
145  DECLARE_BOX_METHODS(TrackEncryption);
146 
147  uint8_t default_is_protected;
148  uint8_t default_per_sample_iv_size;
149  std::vector<uint8_t> default_kid;
150 
151  // For pattern-based encryption.
152  uint8_t default_crypt_byte_block;
153  uint8_t default_skip_byte_block;
154 
155  // Present only if
156  // |default_is_protected == 1 && default_per_sample_iv_size == 0|.
157  std::vector<uint8_t> default_constant_iv;
158 };
159 
160 struct SchemeInfo : Box {
161  DECLARE_BOX_METHODS(SchemeInfo);
162 
163  TrackEncryption track_encryption;
164 };
165 
167  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
168 
169  OriginalFormat format;
170  SchemeType type;
171  SchemeInfo info;
172 };
173 
175  DECLARE_BOX_METHODS(MovieHeader);
176 
177  uint64_t creation_time;
178  uint64_t modification_time;
179  uint32_t timescale;
180  uint64_t duration;
181  int32_t rate;
182  int16_t volume;
183  uint32_t next_track_id;
184 };
185 
187  enum TrackHeaderFlags {
188  kTrackEnabled = 0x000001,
189  kTrackInMovie = 0x000002,
190  kTrackInPreview = 0x000004,
191  };
192 
193  DECLARE_BOX_METHODS(TrackHeader);
194 
195  uint64_t creation_time;
196  uint64_t modification_time;
197  uint32_t track_id;
198  uint64_t duration;
199  int16_t layer;
200  int16_t alternate_group;
201  int16_t volume;
202  // width and height specify the track's visual presentation size as
203  // fixed-point 16.16 values.
204  uint32_t width;
205  uint32_t height;
206 };
207 
209  uint64_t segment_duration;
210  int64_t media_time;
211  int16_t media_rate_integer;
212  int16_t media_rate_fraction;
213 };
214 
215 struct EditList : FullBox {
216  DECLARE_BOX_METHODS(EditList);
217 
218  std::vector<EditListEntry> edits;
219 };
220 
221 struct Edit : Box {
222  DECLARE_BOX_METHODS(Edit);
223 
224  EditList list;
225 };
226 
228  DECLARE_BOX_METHODS(HandlerReference);
229 
230  FourCC handler_type;
231 };
232 
233 struct Language {
234  bool ReadWrite(BoxBuffer* buffer);
235  uint32_t ComputeSize() const;
236 
237  std::string code;
238 };
239 
241 struct PrivFrame {
242  bool ReadWrite(BoxBuffer* buffer);
243  uint32_t ComputeSize() const;
244 
245  std::string owner;
246  std::string value;
247 };
248 
251 struct ID3v2 : FullBox {
252  DECLARE_BOX_METHODS(ID3v2);
253 
254  Language language;
255 
258 };
259 
260 struct Metadata : FullBox {
261  DECLARE_BOX_METHODS(Metadata);
262 
263  HandlerReference handler;
264  ID3v2 id3v2;
265 };
266 
267 // This defines a common structure for various CodecConfiguration boxes:
268 // AVCConfiguration, HEVCConfiguration and VPCodecConfiguration.
269 // Note that unlike the other two CodecConfiguration boxes, VPCodecConfiguration
270 // box inherits from FullBox instead of Box, according to VP Codec ISO Media
271 // File Format Binding specification. It will be handled properly in the
272 // implementation.
274  DECLARE_BOX_METHODS(CodecConfiguration);
275 
276  FourCC box_type;
277  // Contains full codec configuration record, including possible extension
278  // boxes.
279  std::vector<uint8_t> data;
280 };
281 
283  DECLARE_BOX_METHODS(PixelAspectRatio);
284 
285  uint32_t h_spacing;
286  uint32_t v_spacing;
287 };
288 
290  DECLARE_BOX_METHODS(VideoSampleEntry);
291  // Returns actual format of this sample entry.
292  FourCC GetActualFormat() const {
293  return format == FOURCC_encv ? sinf.format.format : format;
294  }
295  // Returns the box type of codec configuration box from video format.
296  FourCC GetCodecConfigurationBoxType(FourCC format) const;
297 
298  FourCC format;
299  uint16_t data_reference_index;
300  uint16_t width;
301  uint16_t height;
302 
303  PixelAspectRatio pixel_aspect;
305  CodecConfiguration codec_configuration;
306 };
307 
309  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
310 
311  AACAudioSpecificConfig aac_audio_specific_config;
312  ESDescriptor es_descriptor;
313 };
314 
315 struct DTSSpecific : Box {
316  DECLARE_BOX_METHODS(DTSSpecific);
317 
318  uint32_t sampling_frequency;
319  uint32_t max_bitrate;
320  uint32_t avg_bitrate;
321  uint8_t pcm_sample_depth;
322  std::vector<uint8_t> extra_data;
323 };
324 
325 struct AC3Specific : Box {
326  DECLARE_BOX_METHODS(AC3Specific);
327 
328  std::vector<uint8_t> data;
329 };
330 
331 struct EC3Specific : Box {
332  DECLARE_BOX_METHODS(EC3Specific);
333 
334  std::vector<uint8_t> data;
335 };
336 
337 struct OpusSpecific : Box {
338  DECLARE_BOX_METHODS(OpusSpecific);
339 
340  std::vector<uint8_t> opus_identification_header;
341  // The number of priming samples. Extracted from |opus_identification_header|.
342  uint16_t preskip;
343 };
344 
346  DECLARE_BOX_METHODS(AudioSampleEntry);
347  // Returns actual format of this sample entry.
348  FourCC GetActualFormat() const {
349  return format == FOURCC_enca ? sinf.format.format : format;
350  }
351 
352  FourCC format;
353  uint16_t data_reference_index;
354  uint16_t channelcount;
355  uint16_t samplesize;
356  uint32_t samplerate;
357 
359 
361  DTSSpecific ddts;
362  AC3Specific dac3;
363  EC3Specific dec3;
364  OpusSpecific dops;
365 };
366 
368  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
369  std::string config;
370 };
371 
373  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
374  std::string source_label;
375 };
376 
378  DECLARE_BOX_METHODS(TextSampleEntry);
379 
380  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
381  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
382  // header.
383  FourCC format;
384  uint16_t data_reference_index;
385 
386  // Sub boxes for wvtt text sample entry.
387  WebVTTConfigurationBox config;
388  WebVTTSourceLabelBox label;
389  // Optional MPEG4BitRateBox.
390 };
391 
393  DECLARE_BOX_METHODS(SampleDescription);
394 
395  TrackType type;
396  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
397  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
398  std::vector<VideoSampleEntry> video_entries;
399  std::vector<AudioSampleEntry> audio_entries;
400  std::vector<TextSampleEntry> text_entries;
401 };
402 
403 struct DecodingTime {
404  uint32_t sample_count;
405  uint32_t sample_delta;
406 };
407 
408 // stts.
410  DECLARE_BOX_METHODS(DecodingTimeToSample);
411 
412  std::vector<DecodingTime> decoding_time;
413 };
414 
416  uint32_t sample_count;
417  // If version == 0, sample_offset is uint32_t;
418  // If version == 1, sample_offset is int32_t.
419  // Use int64_t so both can be supported properly.
420  int64_t sample_offset;
421 };
422 
423 // ctts. Optional.
425  DECLARE_BOX_METHODS(CompositionTimeToSample);
426 
427  std::vector<CompositionOffset> composition_offset;
428 };
429 
430 struct ChunkInfo {
431  uint32_t first_chunk;
432  uint32_t samples_per_chunk;
433  uint32_t sample_description_index;
434 };
435 
436 // stsc.
438  DECLARE_BOX_METHODS(SampleToChunk);
439 
440  std::vector<ChunkInfo> chunk_info;
441 };
442 
443 // stsz.
444 struct SampleSize : FullBox {
445  DECLARE_BOX_METHODS(SampleSize);
446 
447  uint32_t sample_size;
448  uint32_t sample_count;
449  std::vector<uint32_t> sizes;
450 };
451 
452 // stz2.
454  DECLARE_BOX_METHODS(CompactSampleSize);
455 
456  uint8_t field_size;
457  std::vector<uint32_t> sizes;
458 };
459 
460 // co64.
462  DECLARE_BOX_METHODS(ChunkLargeOffset);
463 
464  std::vector<uint64_t> offsets;
465 };
466 
467 // stco.
469  DECLARE_BOX_METHODS(ChunkOffset);
470 };
471 
472 // stss. Optional.
473 struct SyncSample : FullBox {
474  DECLARE_BOX_METHODS(SyncSample);
475 
476  std::vector<uint32_t> sample_number;
477 };
478 
482 
483  bool ReadWrite(BoxBuffer* buffer);
484  uint32_t ComputeSize() const;
485 
486  uint8_t is_protected;
487  uint8_t per_sample_iv_size;
488  std::vector<uint8_t> key_id;
489 
490  // For pattern-based encryption.
491  uint8_t crypt_byte_block;
492  uint8_t skip_byte_block;
493 
494  // Present only if |is_protected == 1 && per_sample_iv_size == 0|.
495  std::vector<uint8_t> constant_iv;
496 };
497 
501 
502  bool ReadWrite(BoxBuffer* buffer);
503  uint32_t ComputeSize() const;
504 
505  int16_t roll_distance;
506 };
507 
509  DECLARE_BOX_METHODS(SampleGroupDescription);
510 
511  template <typename T>
512  bool ReadWriteEntries(BoxBuffer* buffer, std::vector<T>* entries);
513 
514  uint32_t grouping_type;
515  // Only present if grouping_type == 'seig'.
516  std::vector<CencSampleEncryptionInfoEntry>
517  cenc_sample_encryption_info_entries;
518  // Only present if grouping_type == 'roll'.
519  std::vector<AudioRollRecoveryEntry> audio_roll_recovery_entries;
520 };
521 
523  enum GroupDescriptionIndexBase {
524  kTrackGroupDescriptionIndexBase = 0,
525  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
526  };
527 
528  uint32_t sample_count;
529  uint32_t group_description_index;
530 };
531 
533  DECLARE_BOX_METHODS(SampleToGroup);
534 
535  uint32_t grouping_type;
536  uint32_t grouping_type_parameter; // Version 1 only.
537  std::vector<SampleToGroupEntry> entries;
538 };
539 
540 struct SampleTable : Box {
541  DECLARE_BOX_METHODS(SampleTable);
542 
543  SampleDescription description;
544  DecodingTimeToSample decoding_time_to_sample;
545  CompositionTimeToSample composition_time_to_sample;
546  SampleToChunk sample_to_chunk;
547  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
548  SampleSize sample_size;
549  // Either ChunkOffset or ChunkLargeOffset must present. Store in
550  // ChunkLargeOffset.
551  ChunkLargeOffset chunk_large_offset;
552  SyncSample sync_sample;
553  std::vector<SampleGroupDescription> sample_group_descriptions;
554  std::vector<SampleToGroup> sample_to_groups;
555 };
556 
558  DECLARE_BOX_METHODS(MediaHeader);
559 
560  uint64_t creation_time;
561  uint64_t modification_time;
562  uint32_t timescale;
563  uint64_t duration;
564  Language language;
565 };
566 
568  DECLARE_BOX_METHODS(VideoMediaHeader);
569 
570  uint16_t graphicsmode;
571  uint16_t opcolor_red;
572  uint16_t opcolor_green;
573  uint16_t opcolor_blue;
574 };
575 
577  DECLARE_BOX_METHODS(SoundMediaHeader);
578 
579  uint16_t balance;
580 };
581 
583  DECLARE_BOX_METHODS(SubtitleMediaHeader);
584 };
585 
587  DECLARE_BOX_METHODS(DataEntryUrl);
588 
589  std::vector<uint8_t> location;
590 };
591 
593  DECLARE_BOX_METHODS(DataReference);
594 
595  // data entry can be either url or urn box. Fix to url box for now.
596  std::vector<DataEntryUrl> data_entry;
597 };
598 
600  DECLARE_BOX_METHODS(DataInformation);
601 
602  DataReference dref;
603 };
604 
606  DECLARE_BOX_METHODS(MediaInformation);
607 
608  DataInformation dinf;
609  SampleTable sample_table;
610  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
611  VideoMediaHeader vmhd;
612  SoundMediaHeader smhd;
613  SubtitleMediaHeader sthd;
614 };
615 
616 struct Media : Box {
617  DECLARE_BOX_METHODS(Media);
618 
619  MediaHeader header;
620  HandlerReference handler;
621  MediaInformation information;
622 };
623 
624 struct Track : Box {
625  DECLARE_BOX_METHODS(Track);
626 
627  TrackHeader header;
628  Media media;
629  Edit edit;
630  SampleEncryption sample_encryption;
631 };
632 
634  DECLARE_BOX_METHODS(MovieExtendsHeader);
635 
636  uint64_t fragment_duration;
637 };
638 
640  DECLARE_BOX_METHODS(TrackExtends);
641 
642  uint32_t track_id;
643  uint32_t default_sample_description_index;
644  uint32_t default_sample_duration;
645  uint32_t default_sample_size;
646  uint32_t default_sample_flags;
647 };
648 
649 struct MovieExtends : Box {
650  DECLARE_BOX_METHODS(MovieExtends);
651 
652  MovieExtendsHeader header;
653  std::vector<TrackExtends> tracks;
654 };
655 
656 struct Movie : Box {
657  DECLARE_BOX_METHODS(Movie);
658 
659  MovieHeader header;
660  Metadata metadata; // Used to hold version information.
661  MovieExtends extends;
662  std::vector<Track> tracks;
663  std::vector<ProtectionSystemSpecificHeader> pssh;
664 };
665 
667  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
668 
669  uint64_t decode_time;
670 };
671 
673  DECLARE_BOX_METHODS(MovieFragmentHeader);
674 
675  uint32_t sequence_number;
676 };
677 
679  enum TrackFragmentFlagsMasks {
680  kBaseDataOffsetPresentMask = 0x000001,
681  kSampleDescriptionIndexPresentMask = 0x000002,
682  kDefaultSampleDurationPresentMask = 0x000008,
683  kDefaultSampleSizePresentMask = 0x000010,
684  kDefaultSampleFlagsPresentMask = 0x000020,
685  kDurationIsEmptyMask = 0x010000,
686  kDefaultBaseIsMoofMask = 0x020000,
687  };
688 
689  enum SampleFlagsMasks {
690  kReservedMask = 0xFC000000,
691  kSampleDependsOnMask = 0x03000000,
692  kSampleIsDependedOnMask = 0x00C00000,
693  kSampleHasRedundancyMask = 0x00300000,
694  kSamplePaddingValueMask = 0x000E0000,
695  kNonKeySampleMask = 0x00010000,
696  kSampleDegradationPriorityMask = 0x0000FFFF,
697  };
698 
699  DECLARE_BOX_METHODS(TrackFragmentHeader);
700 
701  uint32_t track_id;
702  uint32_t sample_description_index;
703  uint32_t default_sample_duration;
704  uint32_t default_sample_size;
705  uint32_t default_sample_flags;
706 };
707 
709  enum TrackFragmentFlagsMasks {
710  kDataOffsetPresentMask = 0x000001,
711  kFirstSampleFlagsPresentMask = 0x000004,
712  kSampleDurationPresentMask = 0x000100,
713  kSampleSizePresentMask = 0x000200,
714  kSampleFlagsPresentMask = 0x000400,
715  kSampleCompTimeOffsetsPresentMask = 0x000800,
716  };
717 
718  DECLARE_BOX_METHODS(TrackFragmentRun);
719 
720  uint32_t sample_count;
721  uint32_t data_offset;
722  std::vector<uint32_t> sample_flags;
723  std::vector<uint32_t> sample_sizes;
724  std::vector<uint32_t> sample_durations;
725  std::vector<int64_t> sample_composition_time_offsets;
726 };
727 
728 struct TrackFragment : Box {
729  DECLARE_BOX_METHODS(TrackFragment);
730 
731  TrackFragmentHeader header;
732  std::vector<TrackFragmentRun> runs;
733  bool decode_time_absent;
734  TrackFragmentDecodeTime decode_time;
735  std::vector<SampleGroupDescription> sample_group_descriptions;
736  std::vector<SampleToGroup> sample_to_groups;
737  SampleAuxiliaryInformationSize auxiliary_size;
738  SampleAuxiliaryInformationOffset auxiliary_offset;
739  SampleEncryption sample_encryption;
740 };
741 
742 struct MovieFragment : Box {
743  DECLARE_BOX_METHODS(MovieFragment);
744 
745  MovieFragmentHeader header;
746  std::vector<TrackFragment> tracks;
747  std::vector<ProtectionSystemSpecificHeader> pssh;
748 };
749 
751  enum SAPType {
752  TypeUnknown = 0,
753  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
754  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
755  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
756  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
757  Type5 = 5, // T(ept) = T(dec) < T(sap)
758  Type6 = 6, // T(ept) < T(dec) < T(sap)
759  };
760 
761  bool reference_type;
762  uint32_t referenced_size;
763  uint32_t subsegment_duration;
764  bool starts_with_sap;
765  SAPType sap_type;
766  uint32_t sap_delta_time;
767  // We add this field to keep track of earliest_presentation_time in this
768  // subsegment. It is not part of SegmentReference.
769  uint64_t earliest_presentation_time;
770 };
771 
773  DECLARE_BOX_METHODS(SegmentIndex);
774 
775  uint32_t reference_id;
776  uint32_t timescale;
777  uint64_t earliest_presentation_time;
778  uint64_t first_offset;
779  std::vector<SegmentReference> references;
780 };
781 
782 // The actual data is parsed and written separately.
783 struct MediaData : Box {
784  DECLARE_BOX_METHODS(MediaData);
785 
786  uint32_t data_size;
787 };
788 
789 struct CueSourceIDBox : Box {
790  DECLARE_BOX_METHODS(CueSourceIDBox);
791  int32_t source_id;
792 };
793 
794 struct CueTimeBox : Box {
795  DECLARE_BOX_METHODS(CueTimeBox);
796  std::string cue_current_time;
797 };
798 
799 struct CueIDBox : Box {
800  DECLARE_BOX_METHODS(CueIDBox);
801  std::string cue_id;
802 };
803 
804 struct CueSettingsBox : Box {
805  DECLARE_BOX_METHODS(CueSettingsBox);
806  std::string settings;
807 };
808 
809 struct CuePayloadBox : Box {
810  DECLARE_BOX_METHODS(CuePayloadBox);
811  std::string cue_text;
812 };
813 
814 struct VTTEmptyCueBox : Box {
815  DECLARE_BOX_METHODS(VTTEmptyCueBox);
816 };
817 
819  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
820  std::string cue_additional_text;
821 };
822 
823 struct VTTCueBox : Box {
824  DECLARE_BOX_METHODS(VTTCueBox);
825 
826  CueSourceIDBox cue_source_id;
827  CueIDBox cue_id;
828  CueTimeBox cue_time;
829  CueSettingsBox cue_settings;
830  CuePayloadBox cue_payload;
831 };
832 
833 #undef DECLARE_BOX
834 
835 } // namespace mp4
836 } // namespace media
837 } // namespace shaka
838 
839 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
Implemented per http://id3.org/id3v2.4.0-frames.
FourCC BoxType() const override
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)
std::vector< uint8_t > sample_encryption_data
bool ParseFromSampleEncryptionData(uint8_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.