DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <string>
9 #include <vector>
10 
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint
28 };
29 
30 class BoxBuffer;
31 
32 #define DECLARE_BOX_METHODS(T) \
33  T(); \
34  ~T() override; \
35  bool ReadWrite(BoxBuffer* buffer) override; \
36  FourCC BoxType() const override; \
37  uint32_t ComputeSize() override;
38 
39 struct FileType : Box {
40  DECLARE_BOX_METHODS(FileType);
41 
42  FourCC major_brand;
43  uint32_t minor_version;
44  std::vector<FourCC> compatible_brands;
45 };
46 
48  DECLARE_BOX_METHODS(SegmentType);
49 };
50 
52  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
53 
54  std::vector<uint8_t> system_id;
55  std::vector<uint8_t> data;
56  std::vector<uint8_t> raw_box;
57 };
58 
60  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
61 
62  std::vector<uint64_t> offsets;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
67 
68  uint8_t default_sample_info_size;
69  uint32_t sample_count;
70  std::vector<uint8_t> sample_info_sizes;
71 };
72 
73 struct OriginalFormat : Box {
74  DECLARE_BOX_METHODS(OriginalFormat);
75 
76  FourCC format;
77 };
78 
79 struct SchemeType : FullBox {
80  DECLARE_BOX_METHODS(SchemeType);
81 
82  FourCC type;
83  uint32_t version;
84 };
85 
87  DECLARE_BOX_METHODS(TrackEncryption);
88 
89  // Note: this definition is specific to the CENC protection type.
90  bool is_encrypted;
91  uint8_t default_iv_size;
92  std::vector<uint8_t> default_kid;
93 };
94 
95 struct SchemeInfo : Box {
96  DECLARE_BOX_METHODS(SchemeInfo);
97 
98  TrackEncryption track_encryption;
99 };
100 
102  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
103 
104  OriginalFormat format;
105  SchemeType type;
106  SchemeInfo info;
107 };
108 
110  DECLARE_BOX_METHODS(MovieHeader);
111 
112  uint64_t creation_time;
113  uint64_t modification_time;
114  uint32_t timescale;
115  uint64_t duration;
116  int32_t rate;
117  int16_t volume;
118  uint32_t next_track_id;
119 };
120 
122  enum TrackHeaderFlags {
123  kTrackEnabled = 0x000001,
124  kTrackInMovie = 0x000002,
125  kTrackInPreview = 0x000004,
126  };
127 
128  DECLARE_BOX_METHODS(TrackHeader);
129 
130  uint64_t creation_time;
131  uint64_t modification_time;
132  uint32_t track_id;
133  uint64_t duration;
134  int16_t layer;
135  int16_t alternate_group;
136  int16_t volume;
137  // width and height specify the track's visual presentation size as
138  // fixed-point 16.16 values.
139  uint32_t width;
140  uint32_t height;
141 };
142 
144  uint64_t segment_duration;
145  int64_t media_time;
146  int16_t media_rate_integer;
147  int16_t media_rate_fraction;
148 };
149 
150 struct EditList : FullBox {
151  DECLARE_BOX_METHODS(EditList);
152 
153  std::vector<EditListEntry> edits;
154 };
155 
156 struct Edit : Box {
157  DECLARE_BOX_METHODS(Edit);
158 
159  EditList list;
160 };
161 
163  DECLARE_BOX_METHODS(HandlerReference);
164 
165  TrackType type;
166 };
167 
169  DECLARE_BOX_METHODS(AVCDecoderConfigurationRecord);
170  bool ParseData(BufferReader* reader);
171 
172  // Contains full avc decoder configuration record as defined in iso14496-15
173  // 5.2.4.1, including possible extension bytes described in paragraph 3.
174  // Known fields defined in the spec are also parsed and included in this
175  // structure.
176  std::vector<uint8_t> data;
177 
178  uint8_t version;
179  uint8_t profile_indication;
180  uint8_t profile_compatibility;
181  uint8_t avc_level;
182  uint8_t length_size;
183 
184  typedef std::vector<uint8_t> SPS;
185  typedef std::vector<uint8_t> PPS;
186 
187  std::vector<SPS> sps_list;
188  std::vector<PPS> pps_list;
189 };
190 
192  DECLARE_BOX_METHODS(PixelAspectRatioBox);
193 
194  uint32_t h_spacing;
195  uint32_t v_spacing;
196 };
197 
199  DECLARE_BOX_METHODS(VideoSampleEntry);
200 
201  FourCC format;
202  uint16_t data_reference_index;
203  uint16_t width;
204  uint16_t height;
205 
206  PixelAspectRatioBox pixel_aspect;
208 
209  // Currently expected to be present regardless of format.
211 };
212 
214  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
215 
216  AACAudioSpecificConfig aac_audio_specific_config;
217  ESDescriptor es_descriptor;
218 };
219 
221  DECLARE_BOX_METHODS(AudioSampleEntry);
222 
223  FourCC format;
224  uint16_t data_reference_index;
225  uint16_t channelcount;
226  uint16_t samplesize;
227  uint32_t samplerate;
228 
231 };
232 
234  DECLARE_BOX_METHODS(SampleDescription);
235 
236  TrackType type;
237  std::vector<VideoSampleEntry> video_entries;
238  std::vector<AudioSampleEntry> audio_entries;
239 };
240 
241 struct DecodingTime {
242  uint32_t sample_count;
243  uint32_t sample_delta;
244 };
245 
246 // stts.
248  DECLARE_BOX_METHODS(DecodingTimeToSample);
249 
250  std::vector<DecodingTime> decoding_time;
251 };
252 
254  uint32_t sample_count;
255  // If version == 0, sample_offset is uint32_t;
256  // If version == 1, sample_offset is int32_t.
257  // Use int64_t so both can be supported properly.
258  int64_t sample_offset;
259 };
260 
261 // ctts. Optional.
263  DECLARE_BOX_METHODS(CompositionTimeToSample);
264 
265  std::vector<CompositionOffset> composition_offset;
266 };
267 
268 struct ChunkInfo {
269  uint32_t first_chunk;
270  uint32_t samples_per_chunk;
271  uint32_t sample_description_index;
272 };
273 
274 // stsc.
276  DECLARE_BOX_METHODS(SampleToChunk);
277 
278  std::vector<ChunkInfo> chunk_info;
279 };
280 
281 // stsz.
282 struct SampleSize : FullBox {
283  DECLARE_BOX_METHODS(SampleSize);
284 
285  uint32_t sample_size;
286  uint32_t sample_count;
287  std::vector<uint32_t> sizes;
288 };
289 
290 // stz2.
292  DECLARE_BOX_METHODS(CompactSampleSize);
293 
294  uint8_t field_size;
295  std::vector<uint32_t> sizes;
296 };
297 
298 // co64.
300  DECLARE_BOX_METHODS(ChunkLargeOffset);
301 
302  std::vector<uint64_t> offsets;
303 };
304 
305 // stco.
307  DECLARE_BOX_METHODS(ChunkOffset);
308 };
309 
310 // stss. Optional.
311 struct SyncSample : FullBox {
312  DECLARE_BOX_METHODS(SyncSample);
313 
314  std::vector<uint32_t> sample_number;
315 };
316 
317 struct SampleTable : Box {
318  DECLARE_BOX_METHODS(SampleTable);
319 
320  SampleDescription description;
321  DecodingTimeToSample decoding_time_to_sample;
322  CompositionTimeToSample composition_time_to_sample;
323  SampleToChunk sample_to_chunk;
324  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
325  SampleSize sample_size;
326  // Either ChunkOffset or ChunkLargeOffset must present. Store in
327  // ChunkLargeOffset.
328  ChunkLargeOffset chunk_large_offset;
329  SyncSample sync_sample;
330 };
331 
333  DECLARE_BOX_METHODS(MediaHeader);
334 
335  uint64_t creation_time;
336  uint64_t modification_time;
337  uint32_t timescale;
338  uint64_t duration;
339  // 3-char language code + 1 null terminating char.
340  char language[4];
341 };
342 
344  DECLARE_BOX_METHODS(VideoMediaHeader);
345 
346  uint16_t graphicsmode;
347  uint16_t opcolor_red;
348  uint16_t opcolor_green;
349  uint16_t opcolor_blue;
350 };
351 
353  DECLARE_BOX_METHODS(SoundMediaHeader);
354 
355  uint16_t balance;
356 };
357 
359  DECLARE_BOX_METHODS(DataEntryUrl);
360 
361  std::vector<uint8_t> location;
362 };
363 
365  DECLARE_BOX_METHODS(DataReference);
366 
367  // data entry can be either url or urn box. Fix to url box for now.
368  std::vector<DataEntryUrl> data_entry;
369 };
370 
372  DECLARE_BOX_METHODS(DataInformation);
373 
374  DataReference dref;
375 };
376 
378  DECLARE_BOX_METHODS(MediaInformation);
379 
380  DataInformation dinf;
381  SampleTable sample_table;
382  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
383  VideoMediaHeader vmhd;
384  SoundMediaHeader smhd;
385 };
386 
387 struct Media : Box {
388  DECLARE_BOX_METHODS(Media);
389 
390  MediaHeader header;
391  HandlerReference handler;
392  MediaInformation information;
393 };
394 
395 struct Track : Box {
396  DECLARE_BOX_METHODS(Track);
397 
398  TrackHeader header;
399  Media media;
400  Edit edit;
401 };
402 
404  DECLARE_BOX_METHODS(MovieExtendsHeader);
405 
406  uint64_t fragment_duration;
407 };
408 
410  DECLARE_BOX_METHODS(TrackExtends);
411 
412  uint32_t track_id;
413  uint32_t default_sample_description_index;
414  uint32_t default_sample_duration;
415  uint32_t default_sample_size;
416  uint32_t default_sample_flags;
417 };
418 
419 struct MovieExtends : Box {
420  DECLARE_BOX_METHODS(MovieExtends);
421 
422  MovieExtendsHeader header;
423  std::vector<TrackExtends> tracks;
424 };
425 
426 struct Movie : Box {
427  DECLARE_BOX_METHODS(Movie);
428 
429  MovieHeader header;
430  MovieExtends extends;
431  std::vector<Track> tracks;
432  std::vector<ProtectionSystemSpecificHeader> pssh;
433 };
434 
436  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
437 
438  uint64_t decode_time;
439 };
440 
442  DECLARE_BOX_METHODS(MovieFragmentHeader);
443 
444  uint32_t sequence_number;
445 };
446 
448  enum TrackFragmentFlagsMasks {
449  kBaseDataOffsetPresentMask = 0x000001,
450  kSampleDescriptionIndexPresentMask = 0x000002,
451  kDefaultSampleDurationPresentMask = 0x000008,
452  kDefaultSampleSizePresentMask = 0x000010,
453  kDefaultSampleFlagsPresentMask = 0x000020,
454  kDurationIsEmptyMask = 0x010000,
455  kDefaultBaseIsMoofMask = 0x020000,
456  };
457 
458  enum SampleFlagsMasks {
459  kReservedMask = 0xFC000000,
460  kSampleDependsOnMask = 0x03000000,
461  kSampleIsDependedOnMask = 0x00C00000,
462  kSampleHasRedundancyMask = 0x00300000,
463  kSamplePaddingValueMask = 0x000E0000,
464  kNonKeySampleMask = 0x00010000,
465  kSampleDegradationPriorityMask = 0x0000FFFF,
466  };
467 
468  DECLARE_BOX_METHODS(TrackFragmentHeader);
469 
470  uint32_t track_id;
471  uint32_t sample_description_index;
472  uint32_t default_sample_duration;
473  uint32_t default_sample_size;
474  uint32_t default_sample_flags;
475 };
476 
478  enum TrackFragmentFlagsMasks {
479  kDataOffsetPresentMask = 0x000001,
480  kFirstSampleFlagsPresentMask = 0x000004,
481  kSampleDurationPresentMask = 0x000100,
482  kSampleSizePresentMask = 0x000200,
483  kSampleFlagsPresentMask = 0x000400,
484  kSampleCompTimeOffsetsPresentMask = 0x000800,
485  };
486 
487  DECLARE_BOX_METHODS(TrackFragmentRun);
488 
489  uint32_t sample_count;
490  uint32_t data_offset;
491  std::vector<uint32_t> sample_flags;
492  std::vector<uint32_t> sample_sizes;
493  std::vector<uint32_t> sample_durations;
494  std::vector<int64_t> sample_composition_time_offsets;
495 };
496 
498  enum GroupDescriptionIndexBase {
499  kTrackGroupDescriptionIndexBase = 0,
500  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
501  };
502 
503  uint32_t sample_count;
504  uint32_t group_description_index;
505 };
506 
508  DECLARE_BOX_METHODS(SampleToGroup);
509 
510  uint32_t grouping_type;
511  uint32_t grouping_type_parameter; // Version 1 only.
512  std::vector<SampleToGroupEntry> entries;
513 };
514 
518 
519  bool is_encrypted;
520  uint8_t iv_size;
521  std::vector<uint8_t> key_id;
522 };
523 
525  DECLARE_BOX_METHODS(SampleGroupDescription);
526 
527  uint32_t grouping_type;
528  std::vector<CencSampleEncryptionInfoEntry> entries;
529 };
530 
531 struct TrackFragment : Box {
532  DECLARE_BOX_METHODS(TrackFragment);
533 
534  TrackFragmentHeader header;
535  std::vector<TrackFragmentRun> runs;
536  bool decode_time_absent;
537  TrackFragmentDecodeTime decode_time;
538  SampleToGroup sample_to_group;
539  SampleGroupDescription sample_group_description;
540  SampleAuxiliaryInformationSize auxiliary_size;
541  SampleAuxiliaryInformationOffset auxiliary_offset;
542 };
543 
544 struct MovieFragment : Box {
545  DECLARE_BOX_METHODS(MovieFragment);
546 
547  MovieFragmentHeader header;
548  std::vector<TrackFragment> tracks;
549  std::vector<ProtectionSystemSpecificHeader> pssh;
550 };
551 
553  enum SAPType {
554  TypeUnknown = 0,
555  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
556  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
557  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
558  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
559  Type5 = 5, // T(ept) = T(dec) < T(sap)
560  Type6 = 6, // T(ept) < T(dec) < T(sap)
561  };
562 
563  bool reference_type;
564  uint32_t referenced_size;
565  uint32_t subsegment_duration;
566  bool starts_with_sap;
567  SAPType sap_type;
568  uint32_t sap_delta_time;
569  // We add this field to keep track of earliest_presentation_time in this
570  // subsegment. It is not part of SegmentReference.
571  uint64_t earliest_presentation_time;
572 };
573 
575  DECLARE_BOX_METHODS(SegmentIndex);
576 
577  uint32_t reference_id;
578  uint32_t timescale;
579  uint64_t earliest_presentation_time;
580  uint64_t first_offset;
581  std::vector<SegmentReference> references;
582 };
583 
584 // The actual data is parsed and written separately, so we do not inherit it
585 // from Box.
586 struct MediaData {
587  MediaData();
588  ~MediaData();
589  void Write(BufferWriter* buffer_writer);
590  uint32_t ComputeSize();
591  FourCC BoxType() const;
592 
593  uint32_t data_size;
594 };
595 
596 #undef DECLARE_BOX
597 
598 } // namespace mp4
599 } // namespace media
600 } // namespace edash_packager
601 
602 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_