DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
11 #include "packager/media/formats/mp4/box.h"
12 #include "packager/media/formats/mp4/es_descriptor.h"
13 #include "packager/media/formats/mp4/fourccs.h"
14 
15 namespace edash_packager {
16 namespace media {
17 
18 class BufferReader;
19 
20 namespace mp4 {
21 
22 enum TrackType {
23  kInvalid = 0,
24  kVideo,
25  kAudio,
26  kHint
27 };
28 
29 class BoxBuffer;
30 
31 #define DECLARE_BOX_METHODS(T) \
32  T(); \
33  ~T() override; \
34  bool ReadWrite(BoxBuffer* buffer) override; \
35  FourCC BoxType() const override; \
36  uint32_t ComputeSize() override;
37 
38 struct FileType : Box {
39  DECLARE_BOX_METHODS(FileType);
40 
41  FourCC major_brand;
42  uint32_t minor_version;
43  std::vector<FourCC> compatible_brands;
44 };
45 
47  DECLARE_BOX_METHODS(SegmentType);
48 };
49 
51  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
52 
53  std::vector<uint8_t> system_id;
54  std::vector<uint8_t> data;
55  std::vector<uint8_t> raw_box;
56 };
57 
59  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
60 
61  std::vector<uint64_t> offsets;
62 };
63 
65  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
66 
67  uint8_t default_sample_info_size;
68  uint32_t sample_count;
69  std::vector<uint8_t> sample_info_sizes;
70 };
71 
72 struct OriginalFormat : Box {
73  DECLARE_BOX_METHODS(OriginalFormat);
74 
75  FourCC format;
76 };
77 
78 struct SchemeType : FullBox {
79  DECLARE_BOX_METHODS(SchemeType);
80 
81  FourCC type;
82  uint32_t version;
83 };
84 
86  DECLARE_BOX_METHODS(TrackEncryption);
87 
88  // Note: this definition is specific to the CENC protection type.
89  bool is_encrypted;
90  uint8_t default_iv_size;
91  std::vector<uint8_t> default_kid;
92 };
93 
94 struct SchemeInfo : Box {
95  DECLARE_BOX_METHODS(SchemeInfo);
96 
97  TrackEncryption track_encryption;
98 };
99 
101  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
102 
103  OriginalFormat format;
104  SchemeType type;
105  SchemeInfo info;
106 };
107 
109  DECLARE_BOX_METHODS(MovieHeader);
110 
111  uint64_t creation_time;
112  uint64_t modification_time;
113  uint32_t timescale;
114  uint64_t duration;
115  int32_t rate;
116  int16_t volume;
117  uint32_t next_track_id;
118 };
119 
121  enum TrackHeaderFlags {
122  kTrackEnabled = 0x000001,
123  kTrackInMovie = 0x000002,
124  kTrackInPreview = 0x000004,
125  };
126 
127  DECLARE_BOX_METHODS(TrackHeader);
128 
129  uint64_t creation_time;
130  uint64_t modification_time;
131  uint32_t track_id;
132  uint64_t duration;
133  int16_t layer;
134  int16_t alternate_group;
135  int16_t volume;
136  // width and height specify the track's visual presentation size as
137  // fixed-point 16.16 values.
138  uint32_t width;
139  uint32_t height;
140 };
141 
143  uint64_t segment_duration;
144  int64_t media_time;
145  int16_t media_rate_integer;
146  int16_t media_rate_fraction;
147 };
148 
149 struct EditList : FullBox {
150  DECLARE_BOX_METHODS(EditList);
151 
152  std::vector<EditListEntry> edits;
153 };
154 
155 struct Edit : Box {
156  DECLARE_BOX_METHODS(Edit);
157 
158  EditList list;
159 };
160 
162  DECLARE_BOX_METHODS(HandlerReference);
163 
164  TrackType type;
165 };
166 
168  DECLARE_BOX_METHODS(CodecConfigurationRecord);
169 
170  FourCC box_type;
171  // Contains full codec configuration record, including possible extension boxes.
172  std::vector<uint8_t> data;
173 };
174 
176  DECLARE_BOX_METHODS(PixelAspectRatioBox);
177 
178  uint32_t h_spacing;
179  uint32_t v_spacing;
180 };
181 
183  DECLARE_BOX_METHODS(VideoSampleEntry);
184  // Returns actual format of this sample entry.
185  FourCC GetActualFormat() const {
186  return format == FOURCC_ENCV ? sinf.format.format : format;
187  }
188 
189  FourCC format;
190  uint16_t data_reference_index;
191  uint16_t width;
192  uint16_t height;
193 
194  PixelAspectRatioBox pixel_aspect;
196  CodecConfigurationRecord codec_config_record;
197 };
198 
200  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
201 
202  AACAudioSpecificConfig aac_audio_specific_config;
203  ESDescriptor es_descriptor;
204 };
205 
206 struct DTSSpecificBox : Box {
207  DECLARE_BOX_METHODS(DTSSpecificBox);
208 
209  std::vector<uint8_t> data;
210 };
211 
213  DECLARE_BOX_METHODS(AudioSampleEntry);
214  // Returns actual format of this sample entry.
215  FourCC GetActualFormat() const {
216  return format == FOURCC_ENCA ? sinf.format.format : format;
217  }
218 
219  FourCC format;
220  uint16_t data_reference_index;
221  uint16_t channelcount;
222  uint16_t samplesize;
223  uint32_t samplerate;
224 
227  DTSSpecificBox ddts;
228 };
229 
231  DECLARE_BOX_METHODS(SampleDescription);
232 
233  TrackType type;
234  std::vector<VideoSampleEntry> video_entries;
235  std::vector<AudioSampleEntry> audio_entries;
236 };
237 
238 struct DecodingTime {
239  uint32_t sample_count;
240  uint32_t sample_delta;
241 };
242 
243 // stts.
245  DECLARE_BOX_METHODS(DecodingTimeToSample);
246 
247  std::vector<DecodingTime> decoding_time;
248 };
249 
251  uint32_t sample_count;
252  // If version == 0, sample_offset is uint32_t;
253  // If version == 1, sample_offset is int32_t.
254  // Use int64_t so both can be supported properly.
255  int64_t sample_offset;
256 };
257 
258 // ctts. Optional.
260  DECLARE_BOX_METHODS(CompositionTimeToSample);
261 
262  std::vector<CompositionOffset> composition_offset;
263 };
264 
265 struct ChunkInfo {
266  uint32_t first_chunk;
267  uint32_t samples_per_chunk;
268  uint32_t sample_description_index;
269 };
270 
271 // stsc.
273  DECLARE_BOX_METHODS(SampleToChunk);
274 
275  std::vector<ChunkInfo> chunk_info;
276 };
277 
278 // stsz.
279 struct SampleSize : FullBox {
280  DECLARE_BOX_METHODS(SampleSize);
281 
282  uint32_t sample_size;
283  uint32_t sample_count;
284  std::vector<uint32_t> sizes;
285 };
286 
287 // stz2.
289  DECLARE_BOX_METHODS(CompactSampleSize);
290 
291  uint8_t field_size;
292  std::vector<uint32_t> sizes;
293 };
294 
295 // co64.
297  DECLARE_BOX_METHODS(ChunkLargeOffset);
298 
299  std::vector<uint64_t> offsets;
300 };
301 
302 // stco.
304  DECLARE_BOX_METHODS(ChunkOffset);
305 };
306 
307 // stss. Optional.
308 struct SyncSample : FullBox {
309  DECLARE_BOX_METHODS(SyncSample);
310 
311  std::vector<uint32_t> sample_number;
312 };
313 
314 struct SampleTable : Box {
315  DECLARE_BOX_METHODS(SampleTable);
316 
317  SampleDescription description;
318  DecodingTimeToSample decoding_time_to_sample;
319  CompositionTimeToSample composition_time_to_sample;
320  SampleToChunk sample_to_chunk;
321  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
322  SampleSize sample_size;
323  // Either ChunkOffset or ChunkLargeOffset must present. Store in
324  // ChunkLargeOffset.
325  ChunkLargeOffset chunk_large_offset;
326  SyncSample sync_sample;
327 };
328 
330  DECLARE_BOX_METHODS(MediaHeader);
331 
332  uint64_t creation_time;
333  uint64_t modification_time;
334  uint32_t timescale;
335  uint64_t duration;
336  // 3-char language code + 1 null terminating char.
337  char language[4];
338 };
339 
341  DECLARE_BOX_METHODS(VideoMediaHeader);
342 
343  uint16_t graphicsmode;
344  uint16_t opcolor_red;
345  uint16_t opcolor_green;
346  uint16_t opcolor_blue;
347 };
348 
350  DECLARE_BOX_METHODS(SoundMediaHeader);
351 
352  uint16_t balance;
353 };
354 
356  DECLARE_BOX_METHODS(DataEntryUrl);
357 
358  std::vector<uint8_t> location;
359 };
360 
362  DECLARE_BOX_METHODS(DataReference);
363 
364  // data entry can be either url or urn box. Fix to url box for now.
365  std::vector<DataEntryUrl> data_entry;
366 };
367 
369  DECLARE_BOX_METHODS(DataInformation);
370 
371  DataReference dref;
372 };
373 
375  DECLARE_BOX_METHODS(MediaInformation);
376 
377  DataInformation dinf;
378  SampleTable sample_table;
379  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
380  VideoMediaHeader vmhd;
381  SoundMediaHeader smhd;
382 };
383 
384 struct Media : Box {
385  DECLARE_BOX_METHODS(Media);
386 
387  MediaHeader header;
388  HandlerReference handler;
389  MediaInformation information;
390 };
391 
392 struct Track : Box {
393  DECLARE_BOX_METHODS(Track);
394 
395  TrackHeader header;
396  Media media;
397  Edit edit;
398 };
399 
401  DECLARE_BOX_METHODS(MovieExtendsHeader);
402 
403  uint64_t fragment_duration;
404 };
405 
407  DECLARE_BOX_METHODS(TrackExtends);
408 
409  uint32_t track_id;
410  uint32_t default_sample_description_index;
411  uint32_t default_sample_duration;
412  uint32_t default_sample_size;
413  uint32_t default_sample_flags;
414 };
415 
416 struct MovieExtends : Box {
417  DECLARE_BOX_METHODS(MovieExtends);
418 
419  MovieExtendsHeader header;
420  std::vector<TrackExtends> tracks;
421 };
422 
423 struct Movie : Box {
424  DECLARE_BOX_METHODS(Movie);
425 
426  MovieHeader header;
427  MovieExtends extends;
428  std::vector<Track> tracks;
429  std::vector<ProtectionSystemSpecificHeader> pssh;
430 };
431 
433  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
434 
435  uint64_t decode_time;
436 };
437 
439  DECLARE_BOX_METHODS(MovieFragmentHeader);
440 
441  uint32_t sequence_number;
442 };
443 
445  enum TrackFragmentFlagsMasks {
446  kBaseDataOffsetPresentMask = 0x000001,
447  kSampleDescriptionIndexPresentMask = 0x000002,
448  kDefaultSampleDurationPresentMask = 0x000008,
449  kDefaultSampleSizePresentMask = 0x000010,
450  kDefaultSampleFlagsPresentMask = 0x000020,
451  kDurationIsEmptyMask = 0x010000,
452  kDefaultBaseIsMoofMask = 0x020000,
453  };
454 
455  enum SampleFlagsMasks {
456  kReservedMask = 0xFC000000,
457  kSampleDependsOnMask = 0x03000000,
458  kSampleIsDependedOnMask = 0x00C00000,
459  kSampleHasRedundancyMask = 0x00300000,
460  kSamplePaddingValueMask = 0x000E0000,
461  kNonKeySampleMask = 0x00010000,
462  kSampleDegradationPriorityMask = 0x0000FFFF,
463  };
464 
465  DECLARE_BOX_METHODS(TrackFragmentHeader);
466 
467  uint32_t track_id;
468  uint32_t sample_description_index;
469  uint32_t default_sample_duration;
470  uint32_t default_sample_size;
471  uint32_t default_sample_flags;
472 };
473 
475  enum TrackFragmentFlagsMasks {
476  kDataOffsetPresentMask = 0x000001,
477  kFirstSampleFlagsPresentMask = 0x000004,
478  kSampleDurationPresentMask = 0x000100,
479  kSampleSizePresentMask = 0x000200,
480  kSampleFlagsPresentMask = 0x000400,
481  kSampleCompTimeOffsetsPresentMask = 0x000800,
482  };
483 
484  DECLARE_BOX_METHODS(TrackFragmentRun);
485 
486  uint32_t sample_count;
487  uint32_t data_offset;
488  std::vector<uint32_t> sample_flags;
489  std::vector<uint32_t> sample_sizes;
490  std::vector<uint32_t> sample_durations;
491  std::vector<int64_t> sample_composition_time_offsets;
492 };
493 
495  enum GroupDescriptionIndexBase {
496  kTrackGroupDescriptionIndexBase = 0,
497  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
498  };
499 
500  uint32_t sample_count;
501  uint32_t group_description_index;
502 };
503 
505  DECLARE_BOX_METHODS(SampleToGroup);
506 
507  uint32_t grouping_type;
508  uint32_t grouping_type_parameter; // Version 1 only.
509  std::vector<SampleToGroupEntry> entries;
510 };
511 
515 
516  bool is_encrypted;
517  uint8_t iv_size;
518  std::vector<uint8_t> key_id;
519 };
520 
522  DECLARE_BOX_METHODS(SampleGroupDescription);
523 
524  uint32_t grouping_type;
525  std::vector<CencSampleEncryptionInfoEntry> entries;
526 };
527 
528 struct TrackFragment : Box {
529  DECLARE_BOX_METHODS(TrackFragment);
530 
531  TrackFragmentHeader header;
532  std::vector<TrackFragmentRun> runs;
533  bool decode_time_absent;
534  TrackFragmentDecodeTime decode_time;
535  SampleToGroup sample_to_group;
536  SampleGroupDescription sample_group_description;
537  SampleAuxiliaryInformationSize auxiliary_size;
538  SampleAuxiliaryInformationOffset auxiliary_offset;
539 };
540 
541 struct MovieFragment : Box {
542  DECLARE_BOX_METHODS(MovieFragment);
543 
544  MovieFragmentHeader header;
545  std::vector<TrackFragment> tracks;
546  std::vector<ProtectionSystemSpecificHeader> pssh;
547 };
548 
550  enum SAPType {
551  TypeUnknown = 0,
552  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
553  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
554  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
555  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
556  Type5 = 5, // T(ept) = T(dec) < T(sap)
557  Type6 = 6, // T(ept) < T(dec) < T(sap)
558  };
559 
560  bool reference_type;
561  uint32_t referenced_size;
562  uint32_t subsegment_duration;
563  bool starts_with_sap;
564  SAPType sap_type;
565  uint32_t sap_delta_time;
566  // We add this field to keep track of earliest_presentation_time in this
567  // subsegment. It is not part of SegmentReference.
568  uint64_t earliest_presentation_time;
569 };
570 
572  DECLARE_BOX_METHODS(SegmentIndex);
573 
574  uint32_t reference_id;
575  uint32_t timescale;
576  uint64_t earliest_presentation_time;
577  uint64_t first_offset;
578  std::vector<SegmentReference> references;
579 };
580 
581 // The actual data is parsed and written separately, so we do not inherit it
582 // from Box.
583 struct MediaData {
584  MediaData();
585  ~MediaData();
586  void Write(BufferWriter* buffer_writer);
587  uint32_t ComputeSize();
588  FourCC BoxType() const;
589 
590  uint32_t data_size;
591 };
592 
593 #undef DECLARE_BOX
594 
595 } // namespace mp4
596 } // namespace media
597 } // namespace edash_packager
598 
599 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_