DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
11 #include "packager/media/formats/mp4/box.h"
12 #include "packager/media/formats/mp4/es_descriptor.h"
13 #include "packager/media/formats/mp4/fourccs.h"
14 
15 namespace edash_packager {
16 namespace media {
17 
18 class BufferReader;
19 
20 namespace mp4 {
21 
22 enum TrackType {
23  kInvalid = 0,
24  kVideo,
25  kAudio,
26  kHint
27 };
28 
29 class BoxBuffer;
30 
31 #define DECLARE_BOX_METHODS(T) \
32  public: \
33  T(); \
34  ~T() override; \
35  FourCC BoxType() const override; \
36  \
37  private: \
38  bool ReadWriteInternal(BoxBuffer* buffer) override; \
39  uint32_t ComputeSizeInternal() override; \
40  \
41  public:
42 
43 struct FileType : Box {
44  DECLARE_BOX_METHODS(FileType);
45 
46  FourCC major_brand;
47  uint32_t minor_version;
48  std::vector<FourCC> compatible_brands;
49 };
50 
52  FourCC BoxType() const override;
53 };
54 
56  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
57 
58  std::vector<uint8_t> system_id;
59  std::vector<uint8_t> data;
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
77 struct OriginalFormat : Box {
78  DECLARE_BOX_METHODS(OriginalFormat);
79 
80  FourCC format;
81 };
82 
83 struct SchemeType : FullBox {
84  DECLARE_BOX_METHODS(SchemeType);
85 
86  FourCC type;
87  uint32_t version;
88 };
89 
91  DECLARE_BOX_METHODS(TrackEncryption);
92 
93  // Note: this definition is specific to the CENC protection type.
94  bool is_encrypted;
95  uint8_t default_iv_size;
96  std::vector<uint8_t> default_kid;
97 };
98 
99 struct SchemeInfo : Box {
100  DECLARE_BOX_METHODS(SchemeInfo);
101 
102  TrackEncryption track_encryption;
103 };
104 
106  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
107 
108  OriginalFormat format;
109  SchemeType type;
110  SchemeInfo info;
111 };
112 
114  DECLARE_BOX_METHODS(MovieHeader);
115 
116  uint64_t creation_time;
117  uint64_t modification_time;
118  uint32_t timescale;
119  uint64_t duration;
120  int32_t rate;
121  int16_t volume;
122  uint32_t next_track_id;
123 };
124 
126  enum TrackHeaderFlags {
127  kTrackEnabled = 0x000001,
128  kTrackInMovie = 0x000002,
129  kTrackInPreview = 0x000004,
130  };
131 
132  DECLARE_BOX_METHODS(TrackHeader);
133 
134  uint64_t creation_time;
135  uint64_t modification_time;
136  uint32_t track_id;
137  uint64_t duration;
138  int16_t layer;
139  int16_t alternate_group;
140  int16_t volume;
141  // width and height specify the track's visual presentation size as
142  // fixed-point 16.16 values.
143  uint32_t width;
144  uint32_t height;
145 };
146 
148  uint64_t segment_duration;
149  int64_t media_time;
150  int16_t media_rate_integer;
151  int16_t media_rate_fraction;
152 };
153 
154 struct EditList : FullBox {
155  DECLARE_BOX_METHODS(EditList);
156 
157  std::vector<EditListEntry> edits;
158 };
159 
160 struct Edit : Box {
161  DECLARE_BOX_METHODS(Edit);
162 
163  EditList list;
164 };
165 
167  DECLARE_BOX_METHODS(HandlerReference);
168 
169  TrackType type;
170 };
171 
173  DECLARE_BOX_METHODS(CodecConfigurationRecord);
174 
175  FourCC box_type;
176  // Contains full codec configuration record, including possible extension
177  // boxes.
178  std::vector<uint8_t> data;
179 };
180 
182  DECLARE_BOX_METHODS(PixelAspectRatio);
183 
184  uint32_t h_spacing;
185  uint32_t v_spacing;
186 };
187 
189  DECLARE_BOX_METHODS(VideoSampleEntry);
190  // Returns actual format of this sample entry.
191  FourCC GetActualFormat() const {
192  return format == FOURCC_ENCV ? sinf.format.format : format;
193  }
194 
195  FourCC format;
196  uint16_t data_reference_index;
197  uint16_t width;
198  uint16_t height;
199 
200  PixelAspectRatio pixel_aspect;
202  CodecConfigurationRecord codec_config_record;
203 };
204 
206  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
207 
208  AACAudioSpecificConfig aac_audio_specific_config;
209  ESDescriptor es_descriptor;
210 };
211 
212 struct DTSSpecific : Box {
213  DECLARE_BOX_METHODS(DTSSpecific);
214 
215  std::vector<uint8_t> data;
216 };
217 
219  DECLARE_BOX_METHODS(AudioSampleEntry);
220  // Returns actual format of this sample entry.
221  FourCC GetActualFormat() const {
222  return format == FOURCC_ENCA ? sinf.format.format : format;
223  }
224 
225  FourCC format;
226  uint16_t data_reference_index;
227  uint16_t channelcount;
228  uint16_t samplesize;
229  uint32_t samplerate;
230 
233  DTSSpecific ddts;
234 };
235 
237  DECLARE_BOX_METHODS(SampleDescription);
238 
239  TrackType type;
240  std::vector<VideoSampleEntry> video_entries;
241  std::vector<AudioSampleEntry> audio_entries;
242 };
243 
244 struct DecodingTime {
245  uint32_t sample_count;
246  uint32_t sample_delta;
247 };
248 
249 // stts.
251  DECLARE_BOX_METHODS(DecodingTimeToSample);
252 
253  std::vector<DecodingTime> decoding_time;
254 };
255 
257  uint32_t sample_count;
258  // If version == 0, sample_offset is uint32_t;
259  // If version == 1, sample_offset is int32_t.
260  // Use int64_t so both can be supported properly.
261  int64_t sample_offset;
262 };
263 
264 // ctts. Optional.
266  DECLARE_BOX_METHODS(CompositionTimeToSample);
267 
268  std::vector<CompositionOffset> composition_offset;
269 };
270 
271 struct ChunkInfo {
272  uint32_t first_chunk;
273  uint32_t samples_per_chunk;
274  uint32_t sample_description_index;
275 };
276 
277 // stsc.
279  DECLARE_BOX_METHODS(SampleToChunk);
280 
281  std::vector<ChunkInfo> chunk_info;
282 };
283 
284 // stsz.
285 struct SampleSize : FullBox {
286  DECLARE_BOX_METHODS(SampleSize);
287 
288  uint32_t sample_size;
289  uint32_t sample_count;
290  std::vector<uint32_t> sizes;
291 };
292 
293 // stz2.
295  DECLARE_BOX_METHODS(CompactSampleSize);
296 
297  uint8_t field_size;
298  std::vector<uint32_t> sizes;
299 };
300 
301 // co64.
303  DECLARE_BOX_METHODS(ChunkLargeOffset);
304 
305  std::vector<uint64_t> offsets;
306 };
307 
308 // stco.
310  DECLARE_BOX_METHODS(ChunkOffset);
311 };
312 
313 // stss. Optional.
314 struct SyncSample : FullBox {
315  DECLARE_BOX_METHODS(SyncSample);
316 
317  std::vector<uint32_t> sample_number;
318 };
319 
320 struct SampleTable : Box {
321  DECLARE_BOX_METHODS(SampleTable);
322 
323  SampleDescription description;
324  DecodingTimeToSample decoding_time_to_sample;
325  CompositionTimeToSample composition_time_to_sample;
326  SampleToChunk sample_to_chunk;
327  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
328  SampleSize sample_size;
329  // Either ChunkOffset or ChunkLargeOffset must present. Store in
330  // ChunkLargeOffset.
331  ChunkLargeOffset chunk_large_offset;
332  SyncSample sync_sample;
333 };
334 
336  DECLARE_BOX_METHODS(MediaHeader);
337 
338  uint64_t creation_time;
339  uint64_t modification_time;
340  uint32_t timescale;
341  uint64_t duration;
342  // 3-char language code + 1 null terminating char.
343  char language[4];
344 };
345 
347  DECLARE_BOX_METHODS(VideoMediaHeader);
348 
349  uint16_t graphicsmode;
350  uint16_t opcolor_red;
351  uint16_t opcolor_green;
352  uint16_t opcolor_blue;
353 };
354 
356  DECLARE_BOX_METHODS(SoundMediaHeader);
357 
358  uint16_t balance;
359 };
360 
362  DECLARE_BOX_METHODS(DataEntryUrl);
363 
364  std::vector<uint8_t> location;
365 };
366 
368  DECLARE_BOX_METHODS(DataReference);
369 
370  // data entry can be either url or urn box. Fix to url box for now.
371  std::vector<DataEntryUrl> data_entry;
372 };
373 
375  DECLARE_BOX_METHODS(DataInformation);
376 
377  DataReference dref;
378 };
379 
381  DECLARE_BOX_METHODS(MediaInformation);
382 
383  DataInformation dinf;
384  SampleTable sample_table;
385  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
386  VideoMediaHeader vmhd;
387  SoundMediaHeader smhd;
388 };
389 
390 struct Media : Box {
391  DECLARE_BOX_METHODS(Media);
392 
393  MediaHeader header;
394  HandlerReference handler;
395  MediaInformation information;
396 };
397 
398 struct Track : Box {
399  DECLARE_BOX_METHODS(Track);
400 
401  TrackHeader header;
402  Media media;
403  Edit edit;
404 };
405 
407  DECLARE_BOX_METHODS(MovieExtendsHeader);
408 
409  uint64_t fragment_duration;
410 };
411 
413  DECLARE_BOX_METHODS(TrackExtends);
414 
415  uint32_t track_id;
416  uint32_t default_sample_description_index;
417  uint32_t default_sample_duration;
418  uint32_t default_sample_size;
419  uint32_t default_sample_flags;
420 };
421 
422 struct MovieExtends : Box {
423  DECLARE_BOX_METHODS(MovieExtends);
424 
425  MovieExtendsHeader header;
426  std::vector<TrackExtends> tracks;
427 };
428 
429 struct Movie : Box {
430  DECLARE_BOX_METHODS(Movie);
431 
432  MovieHeader header;
433  MovieExtends extends;
434  std::vector<Track> tracks;
435  std::vector<ProtectionSystemSpecificHeader> pssh;
436 };
437 
439  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
440 
441  uint64_t decode_time;
442 };
443 
445  DECLARE_BOX_METHODS(MovieFragmentHeader);
446 
447  uint32_t sequence_number;
448 };
449 
451  enum TrackFragmentFlagsMasks {
452  kBaseDataOffsetPresentMask = 0x000001,
453  kSampleDescriptionIndexPresentMask = 0x000002,
454  kDefaultSampleDurationPresentMask = 0x000008,
455  kDefaultSampleSizePresentMask = 0x000010,
456  kDefaultSampleFlagsPresentMask = 0x000020,
457  kDurationIsEmptyMask = 0x010000,
458  kDefaultBaseIsMoofMask = 0x020000,
459  };
460 
461  enum SampleFlagsMasks {
462  kReservedMask = 0xFC000000,
463  kSampleDependsOnMask = 0x03000000,
464  kSampleIsDependedOnMask = 0x00C00000,
465  kSampleHasRedundancyMask = 0x00300000,
466  kSamplePaddingValueMask = 0x000E0000,
467  kNonKeySampleMask = 0x00010000,
468  kSampleDegradationPriorityMask = 0x0000FFFF,
469  };
470 
471  DECLARE_BOX_METHODS(TrackFragmentHeader);
472 
473  uint32_t track_id;
474  uint32_t sample_description_index;
475  uint32_t default_sample_duration;
476  uint32_t default_sample_size;
477  uint32_t default_sample_flags;
478 };
479 
481  enum TrackFragmentFlagsMasks {
482  kDataOffsetPresentMask = 0x000001,
483  kFirstSampleFlagsPresentMask = 0x000004,
484  kSampleDurationPresentMask = 0x000100,
485  kSampleSizePresentMask = 0x000200,
486  kSampleFlagsPresentMask = 0x000400,
487  kSampleCompTimeOffsetsPresentMask = 0x000800,
488  };
489 
490  DECLARE_BOX_METHODS(TrackFragmentRun);
491 
492  uint32_t sample_count;
493  uint32_t data_offset;
494  std::vector<uint32_t> sample_flags;
495  std::vector<uint32_t> sample_sizes;
496  std::vector<uint32_t> sample_durations;
497  std::vector<int64_t> sample_composition_time_offsets;
498 };
499 
501  enum GroupDescriptionIndexBase {
502  kTrackGroupDescriptionIndexBase = 0,
503  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
504  };
505 
506  uint32_t sample_count;
507  uint32_t group_description_index;
508 };
509 
511  DECLARE_BOX_METHODS(SampleToGroup);
512 
513  uint32_t grouping_type;
514  uint32_t grouping_type_parameter; // Version 1 only.
515  std::vector<SampleToGroupEntry> entries;
516 };
517 
521 
522  bool is_encrypted;
523  uint8_t iv_size;
524  std::vector<uint8_t> key_id;
525 };
526 
528  DECLARE_BOX_METHODS(SampleGroupDescription);
529 
530  uint32_t grouping_type;
531  std::vector<CencSampleEncryptionInfoEntry> entries;
532 };
533 
534 struct TrackFragment : Box {
535  DECLARE_BOX_METHODS(TrackFragment);
536 
537  TrackFragmentHeader header;
538  std::vector<TrackFragmentRun> runs;
539  bool decode_time_absent;
540  TrackFragmentDecodeTime decode_time;
541  SampleToGroup sample_to_group;
542  SampleGroupDescription sample_group_description;
543  SampleAuxiliaryInformationSize auxiliary_size;
544  SampleAuxiliaryInformationOffset auxiliary_offset;
545 };
546 
547 struct MovieFragment : Box {
548  DECLARE_BOX_METHODS(MovieFragment);
549 
550  MovieFragmentHeader header;
551  std::vector<TrackFragment> tracks;
552  std::vector<ProtectionSystemSpecificHeader> pssh;
553 };
554 
556  enum SAPType {
557  TypeUnknown = 0,
558  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
559  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
560  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
561  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
562  Type5 = 5, // T(ept) = T(dec) < T(sap)
563  Type6 = 6, // T(ept) < T(dec) < T(sap)
564  };
565 
566  bool reference_type;
567  uint32_t referenced_size;
568  uint32_t subsegment_duration;
569  bool starts_with_sap;
570  SAPType sap_type;
571  uint32_t sap_delta_time;
572  // We add this field to keep track of earliest_presentation_time in this
573  // subsegment. It is not part of SegmentReference.
574  uint64_t earliest_presentation_time;
575 };
576 
578  DECLARE_BOX_METHODS(SegmentIndex);
579 
580  uint32_t reference_id;
581  uint32_t timescale;
582  uint64_t earliest_presentation_time;
583  uint64_t first_offset;
584  std::vector<SegmentReference> references;
585 };
586 
587 // The actual data is parsed and written separately.
588 struct MediaData : Box {
589  DECLARE_BOX_METHODS(MediaData);
590 
591  uint32_t data_size;
592 };
593 
594 #undef DECLARE_BOX
595 
596 } // namespace mp4
597 } // namespace media
598 } // namespace edash_packager
599 
600 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_