DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
11 #include "packager/media/formats/mp4/box.h"
12 #include "packager/media/formats/mp4/es_descriptor.h"
13 #include "packager/media/formats/mp4/fourccs.h"
14 
15 namespace edash_packager {
16 namespace media {
17 
18 class BufferReader;
19 
20 namespace mp4 {
21 
22 enum TrackType {
23  kInvalid = 0,
24  kVideo,
25  kAudio,
26  kHint,
27  kText,
28 };
29 
30 class BoxBuffer;
31 
32 #define DECLARE_BOX_METHODS(T) \
33  public: \
34  T(); \
35  ~T() override; \
36  FourCC BoxType() const override; \
37  \
38  private: \
39  bool ReadWriteInternal(BoxBuffer* buffer) override; \
40  uint32_t ComputeSizeInternal() override; \
41  \
42  public:
43 
44 struct FileType : Box {
45  DECLARE_BOX_METHODS(FileType);
46 
47  FourCC major_brand;
48  uint32_t minor_version;
49  std::vector<FourCC> compatible_brands;
50 };
51 
53  FourCC BoxType() const override;
54 };
55 
57  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
58 
59  std::vector<uint8_t> system_id;
60  std::vector<uint8_t> data;
61  std::vector<uint8_t> raw_box;
62 };
63 
65  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
66 
67  std::vector<uint64_t> offsets;
68 };
69 
71  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
72 
73  uint8_t default_sample_info_size;
74  uint32_t sample_count;
75  std::vector<uint8_t> sample_info_sizes;
76 };
77 
78 struct OriginalFormat : Box {
79  DECLARE_BOX_METHODS(OriginalFormat);
80 
81  FourCC format;
82 };
83 
84 struct SchemeType : FullBox {
85  DECLARE_BOX_METHODS(SchemeType);
86 
87  FourCC type;
88  uint32_t version;
89 };
90 
92  DECLARE_BOX_METHODS(TrackEncryption);
93 
94  // Note: this definition is specific to the CENC protection type.
95  bool is_encrypted;
96  uint8_t default_iv_size;
97  std::vector<uint8_t> default_kid;
98 };
99 
100 struct SchemeInfo : Box {
101  DECLARE_BOX_METHODS(SchemeInfo);
102 
103  TrackEncryption track_encryption;
104 };
105 
107  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
108 
109  OriginalFormat format;
110  SchemeType type;
111  SchemeInfo info;
112 };
113 
115  DECLARE_BOX_METHODS(MovieHeader);
116 
117  uint64_t creation_time;
118  uint64_t modification_time;
119  uint32_t timescale;
120  uint64_t duration;
121  int32_t rate;
122  int16_t volume;
123  uint32_t next_track_id;
124 };
125 
127  enum TrackHeaderFlags {
128  kTrackEnabled = 0x000001,
129  kTrackInMovie = 0x000002,
130  kTrackInPreview = 0x000004,
131  };
132 
133  DECLARE_BOX_METHODS(TrackHeader);
134 
135  uint64_t creation_time;
136  uint64_t modification_time;
137  uint32_t track_id;
138  uint64_t duration;
139  int16_t layer;
140  int16_t alternate_group;
141  int16_t volume;
142  // width and height specify the track's visual presentation size as
143  // fixed-point 16.16 values.
144  uint32_t width;
145  uint32_t height;
146 };
147 
149  uint64_t segment_duration;
150  int64_t media_time;
151  int16_t media_rate_integer;
152  int16_t media_rate_fraction;
153 };
154 
155 struct EditList : FullBox {
156  DECLARE_BOX_METHODS(EditList);
157 
158  std::vector<EditListEntry> edits;
159 };
160 
161 struct Edit : Box {
162  DECLARE_BOX_METHODS(Edit);
163 
164  EditList list;
165 };
166 
168  DECLARE_BOX_METHODS(HandlerReference);
169 
170  TrackType type;
171 };
172 
174  DECLARE_BOX_METHODS(CodecConfigurationRecord);
175 
176  FourCC box_type;
177  // Contains full codec configuration record, including possible extension
178  // boxes.
179  std::vector<uint8_t> data;
180 };
181 
183  DECLARE_BOX_METHODS(PixelAspectRatio);
184 
185  uint32_t h_spacing;
186  uint32_t v_spacing;
187 };
188 
190  DECLARE_BOX_METHODS(VideoSampleEntry);
191  // Returns actual format of this sample entry.
192  FourCC GetActualFormat() const {
193  return format == FOURCC_ENCV ? sinf.format.format : format;
194  }
195 
196  FourCC format;
197  uint16_t data_reference_index;
198  uint16_t width;
199  uint16_t height;
200 
201  PixelAspectRatio pixel_aspect;
203  CodecConfigurationRecord codec_config_record;
204 };
205 
207  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
208 
209  AACAudioSpecificConfig aac_audio_specific_config;
210  ESDescriptor es_descriptor;
211 };
212 
213 struct DTSSpecific : Box {
214  DECLARE_BOX_METHODS(DTSSpecific);
215 
216  std::vector<uint8_t> data;
217 };
218 
220  DECLARE_BOX_METHODS(AudioSampleEntry);
221  // Returns actual format of this sample entry.
222  FourCC GetActualFormat() const {
223  return format == FOURCC_ENCA ? sinf.format.format : format;
224  }
225 
226  FourCC format;
227  uint16_t data_reference_index;
228  uint16_t channelcount;
229  uint16_t samplesize;
230  uint32_t samplerate;
231 
234  DTSSpecific ddts;
235 };
236 
238  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
239  std::string config;
240 };
241 
243  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
244  std::string source_label;
245 };
246 
248  DECLARE_BOX_METHODS(WVTTSampleEntry);
249 
250  uint16_t data_reference_index;
251 
252  WebVTTConfigurationBox config;
253  WebVTTSourceLabelBox label;
254  // Optional MPEG4BitRateBox.
255 };
256 
258  DECLARE_BOX_METHODS(SampleDescription);
259 
260  TrackType type;
261  std::vector<VideoSampleEntry> video_entries;
262  std::vector<AudioSampleEntry> audio_entries;
263  std::vector<WVTTSampleEntry> wvtt_entries;
264 };
265 
266 struct DecodingTime {
267  uint32_t sample_count;
268  uint32_t sample_delta;
269 };
270 
271 // stts.
273  DECLARE_BOX_METHODS(DecodingTimeToSample);
274 
275  std::vector<DecodingTime> decoding_time;
276 };
277 
279  uint32_t sample_count;
280  // If version == 0, sample_offset is uint32_t;
281  // If version == 1, sample_offset is int32_t.
282  // Use int64_t so both can be supported properly.
283  int64_t sample_offset;
284 };
285 
286 // ctts. Optional.
288  DECLARE_BOX_METHODS(CompositionTimeToSample);
289 
290  std::vector<CompositionOffset> composition_offset;
291 };
292 
293 struct ChunkInfo {
294  uint32_t first_chunk;
295  uint32_t samples_per_chunk;
296  uint32_t sample_description_index;
297 };
298 
299 // stsc.
301  DECLARE_BOX_METHODS(SampleToChunk);
302 
303  std::vector<ChunkInfo> chunk_info;
304 };
305 
306 // stsz.
307 struct SampleSize : FullBox {
308  DECLARE_BOX_METHODS(SampleSize);
309 
310  uint32_t sample_size;
311  uint32_t sample_count;
312  std::vector<uint32_t> sizes;
313 };
314 
315 // stz2.
317  DECLARE_BOX_METHODS(CompactSampleSize);
318 
319  uint8_t field_size;
320  std::vector<uint32_t> sizes;
321 };
322 
323 // co64.
325  DECLARE_BOX_METHODS(ChunkLargeOffset);
326 
327  std::vector<uint64_t> offsets;
328 };
329 
330 // stco.
332  DECLARE_BOX_METHODS(ChunkOffset);
333 };
334 
335 // stss. Optional.
336 struct SyncSample : FullBox {
337  DECLARE_BOX_METHODS(SyncSample);
338 
339  std::vector<uint32_t> sample_number;
340 };
341 
342 struct SampleTable : Box {
343  DECLARE_BOX_METHODS(SampleTable);
344 
345  SampleDescription description;
346  DecodingTimeToSample decoding_time_to_sample;
347  CompositionTimeToSample composition_time_to_sample;
348  SampleToChunk sample_to_chunk;
349  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
350  SampleSize sample_size;
351  // Either ChunkOffset or ChunkLargeOffset must present. Store in
352  // ChunkLargeOffset.
353  ChunkLargeOffset chunk_large_offset;
354  SyncSample sync_sample;
355 };
356 
358  DECLARE_BOX_METHODS(MediaHeader);
359 
360  uint64_t creation_time;
361  uint64_t modification_time;
362  uint32_t timescale;
363  uint64_t duration;
364  // 3-char language code + 1 null terminating char.
365  char language[4];
366 };
367 
369  DECLARE_BOX_METHODS(VideoMediaHeader);
370 
371  uint16_t graphicsmode;
372  uint16_t opcolor_red;
373  uint16_t opcolor_green;
374  uint16_t opcolor_blue;
375 };
376 
378  DECLARE_BOX_METHODS(SoundMediaHeader);
379 
380  uint16_t balance;
381 };
382 
384  DECLARE_BOX_METHODS(SubtitleMediaHeader);
385 };
386 
388  DECLARE_BOX_METHODS(DataEntryUrl);
389 
390  std::vector<uint8_t> location;
391 };
392 
394  DECLARE_BOX_METHODS(DataReference);
395 
396  // data entry can be either url or urn box. Fix to url box for now.
397  std::vector<DataEntryUrl> data_entry;
398 };
399 
401  DECLARE_BOX_METHODS(DataInformation);
402 
403  DataReference dref;
404 };
405 
407  DECLARE_BOX_METHODS(MediaInformation);
408 
409  DataInformation dinf;
410  SampleTable sample_table;
411  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
412  VideoMediaHeader vmhd;
413  SoundMediaHeader smhd;
414  SubtitleMediaHeader sthd;
415 };
416 
417 struct Media : Box {
418  DECLARE_BOX_METHODS(Media);
419 
420  MediaHeader header;
421  HandlerReference handler;
422  MediaInformation information;
423 };
424 
425 struct Track : Box {
426  DECLARE_BOX_METHODS(Track);
427 
428  TrackHeader header;
429  Media media;
430  Edit edit;
431 };
432 
434  DECLARE_BOX_METHODS(MovieExtendsHeader);
435 
436  uint64_t fragment_duration;
437 };
438 
440  DECLARE_BOX_METHODS(TrackExtends);
441 
442  uint32_t track_id;
443  uint32_t default_sample_description_index;
444  uint32_t default_sample_duration;
445  uint32_t default_sample_size;
446  uint32_t default_sample_flags;
447 };
448 
449 struct MovieExtends : Box {
450  DECLARE_BOX_METHODS(MovieExtends);
451 
452  MovieExtendsHeader header;
453  std::vector<TrackExtends> tracks;
454 };
455 
456 struct Movie : Box {
457  DECLARE_BOX_METHODS(Movie);
458 
459  MovieHeader header;
460  MovieExtends extends;
461  std::vector<Track> tracks;
462  std::vector<ProtectionSystemSpecificHeader> pssh;
463 };
464 
466  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
467 
468  uint64_t decode_time;
469 };
470 
472  DECLARE_BOX_METHODS(MovieFragmentHeader);
473 
474  uint32_t sequence_number;
475 };
476 
478  enum TrackFragmentFlagsMasks {
479  kBaseDataOffsetPresentMask = 0x000001,
480  kSampleDescriptionIndexPresentMask = 0x000002,
481  kDefaultSampleDurationPresentMask = 0x000008,
482  kDefaultSampleSizePresentMask = 0x000010,
483  kDefaultSampleFlagsPresentMask = 0x000020,
484  kDurationIsEmptyMask = 0x010000,
485  kDefaultBaseIsMoofMask = 0x020000,
486  };
487 
488  enum SampleFlagsMasks {
489  kReservedMask = 0xFC000000,
490  kSampleDependsOnMask = 0x03000000,
491  kSampleIsDependedOnMask = 0x00C00000,
492  kSampleHasRedundancyMask = 0x00300000,
493  kSamplePaddingValueMask = 0x000E0000,
494  kNonKeySampleMask = 0x00010000,
495  kSampleDegradationPriorityMask = 0x0000FFFF,
496  };
497 
498  DECLARE_BOX_METHODS(TrackFragmentHeader);
499 
500  uint32_t track_id;
501  uint32_t sample_description_index;
502  uint32_t default_sample_duration;
503  uint32_t default_sample_size;
504  uint32_t default_sample_flags;
505 };
506 
508  enum TrackFragmentFlagsMasks {
509  kDataOffsetPresentMask = 0x000001,
510  kFirstSampleFlagsPresentMask = 0x000004,
511  kSampleDurationPresentMask = 0x000100,
512  kSampleSizePresentMask = 0x000200,
513  kSampleFlagsPresentMask = 0x000400,
514  kSampleCompTimeOffsetsPresentMask = 0x000800,
515  };
516 
517  DECLARE_BOX_METHODS(TrackFragmentRun);
518 
519  uint32_t sample_count;
520  uint32_t data_offset;
521  std::vector<uint32_t> sample_flags;
522  std::vector<uint32_t> sample_sizes;
523  std::vector<uint32_t> sample_durations;
524  std::vector<int64_t> sample_composition_time_offsets;
525 };
526 
528  enum GroupDescriptionIndexBase {
529  kTrackGroupDescriptionIndexBase = 0,
530  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
531  };
532 
533  uint32_t sample_count;
534  uint32_t group_description_index;
535 };
536 
538  DECLARE_BOX_METHODS(SampleToGroup);
539 
540  uint32_t grouping_type;
541  uint32_t grouping_type_parameter; // Version 1 only.
542  std::vector<SampleToGroupEntry> entries;
543 };
544 
548 
549  bool is_encrypted;
550  uint8_t iv_size;
551  std::vector<uint8_t> key_id;
552 };
553 
555  DECLARE_BOX_METHODS(SampleGroupDescription);
556 
557  uint32_t grouping_type;
558  std::vector<CencSampleEncryptionInfoEntry> entries;
559 };
560 
561 struct TrackFragment : Box {
562  DECLARE_BOX_METHODS(TrackFragment);
563 
564  TrackFragmentHeader header;
565  std::vector<TrackFragmentRun> runs;
566  bool decode_time_absent;
567  TrackFragmentDecodeTime decode_time;
568  SampleToGroup sample_to_group;
569  SampleGroupDescription sample_group_description;
570  SampleAuxiliaryInformationSize auxiliary_size;
571  SampleAuxiliaryInformationOffset auxiliary_offset;
572 };
573 
574 struct MovieFragment : Box {
575  DECLARE_BOX_METHODS(MovieFragment);
576 
577  MovieFragmentHeader header;
578  std::vector<TrackFragment> tracks;
579  std::vector<ProtectionSystemSpecificHeader> pssh;
580 };
581 
583  enum SAPType {
584  TypeUnknown = 0,
585  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
586  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
587  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
588  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
589  Type5 = 5, // T(ept) = T(dec) < T(sap)
590  Type6 = 6, // T(ept) < T(dec) < T(sap)
591  };
592 
593  bool reference_type;
594  uint32_t referenced_size;
595  uint32_t subsegment_duration;
596  bool starts_with_sap;
597  SAPType sap_type;
598  uint32_t sap_delta_time;
599  // We add this field to keep track of earliest_presentation_time in this
600  // subsegment. It is not part of SegmentReference.
601  uint64_t earliest_presentation_time;
602 };
603 
605  DECLARE_BOX_METHODS(SegmentIndex);
606 
607  uint32_t reference_id;
608  uint32_t timescale;
609  uint64_t earliest_presentation_time;
610  uint64_t first_offset;
611  std::vector<SegmentReference> references;
612 };
613 
614 // The actual data is parsed and written separately.
615 struct MediaData : Box {
616  DECLARE_BOX_METHODS(MediaData);
617 
618  uint32_t data_size;
619 };
620 
621 struct CueSourceIDBox : Box {
622  DECLARE_BOX_METHODS(CueSourceIDBox);
623  int32_t source_id;
624 };
625 
626 struct CueTimeBox : Box {
627  DECLARE_BOX_METHODS(CueTimeBox);
628  std::string cue_current_time;
629 };
630 
631 struct CueIDBox : Box {
632  DECLARE_BOX_METHODS(CueIDBox);
633  std::string cue_id;
634 };
635 
636 struct CueSettingsBox : Box {
637  DECLARE_BOX_METHODS(CueSettingsBox);
638  std::string settings;
639 };
640 
641 struct CuePayloadBox : Box {
642  DECLARE_BOX_METHODS(CuePayloadBox);
643  std::string cue_text;
644 };
645 
646 struct VTTEmptyCueBox : Box {
647  DECLARE_BOX_METHODS(VTTEmptyCueBox);
648 };
649 
651  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
652  std::string cue_additional_text;
653 };
654 
655 struct VTTCueBox : Box {
656  DECLARE_BOX_METHODS(VTTCueBox);
657 
658  CueSourceIDBox cue_source_id;
659  CueIDBox cue_id;
660  CueTimeBox cue_time;
661  CueSettingsBox cue_settings;
662  CuePayloadBox cue_payload;
663 };
664 
665 #undef DECLARE_BOX
666 
667 } // namespace mp4
668 } // namespace media
669 } // namespace edash_packager
670 
671 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_