DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> system_id;
61  std::vector<uint8_t> data;
62  std::vector<uint8_t> raw_box;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
67 
68  std::vector<uint64_t> offsets;
69 };
70 
72  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
73 
74  uint8_t default_sample_info_size;
75  uint32_t sample_count;
76  std::vector<uint8_t> sample_info_sizes;
77 };
78 
88  bool ReadWrite(uint8_t iv_size,
89  bool has_subsamples,
90  BoxBuffer* buffer);
97  bool ParseFromBuffer(uint8_t iv_size,
98  bool has_subsamples,
99  BufferReader* reader);
101  uint32_t ComputeSize() const;
104  uint32_t GetTotalSizeOfSubsamples() const;
105 
106  std::vector<uint8_t> initialization_vector;
107  std::vector<SubsampleEntry> subsamples;
108 };
109 
111  enum SampleEncryptionFlags {
112  kUseSubsampleEncryption = 2,
113  };
114 
115  DECLARE_BOX_METHODS(SampleEncryption);
122  size_t iv_size,
123  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
124 
127  std::vector<uint8_t> sample_encryption_data;
128 
129  size_t iv_size;
130  std::vector<SampleEncryptionEntry> sample_encryption_entries;
131 };
132 
133 struct OriginalFormat : Box {
134  DECLARE_BOX_METHODS(OriginalFormat);
135 
136  FourCC format;
137 };
138 
139 struct SchemeType : FullBox {
140  DECLARE_BOX_METHODS(SchemeType);
141 
142  FourCC type;
143  uint32_t version;
144 };
145 
147  DECLARE_BOX_METHODS(TrackEncryption);
148 
149  // Note: this definition is specific to the CENC protection type.
150  bool is_encrypted;
151  uint8_t default_iv_size;
152  std::vector<uint8_t> default_kid;
153 };
154 
155 struct SchemeInfo : Box {
156  DECLARE_BOX_METHODS(SchemeInfo);
157 
158  TrackEncryption track_encryption;
159 };
160 
162  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
163 
164  OriginalFormat format;
165  SchemeType type;
166  SchemeInfo info;
167 };
168 
170  DECLARE_BOX_METHODS(MovieHeader);
171 
172  uint64_t creation_time;
173  uint64_t modification_time;
174  uint32_t timescale;
175  uint64_t duration;
176  int32_t rate;
177  int16_t volume;
178  uint32_t next_track_id;
179 };
180 
182  enum TrackHeaderFlags {
183  kTrackEnabled = 0x000001,
184  kTrackInMovie = 0x000002,
185  kTrackInPreview = 0x000004,
186  };
187 
188  DECLARE_BOX_METHODS(TrackHeader);
189 
190  uint64_t creation_time;
191  uint64_t modification_time;
192  uint32_t track_id;
193  uint64_t duration;
194  int16_t layer;
195  int16_t alternate_group;
196  int16_t volume;
197  // width and height specify the track's visual presentation size as
198  // fixed-point 16.16 values.
199  uint32_t width;
200  uint32_t height;
201 };
202 
204  uint64_t segment_duration;
205  int64_t media_time;
206  int16_t media_rate_integer;
207  int16_t media_rate_fraction;
208 };
209 
210 struct EditList : FullBox {
211  DECLARE_BOX_METHODS(EditList);
212 
213  std::vector<EditListEntry> edits;
214 };
215 
216 struct Edit : Box {
217  DECLARE_BOX_METHODS(Edit);
218 
219  EditList list;
220 };
221 
223  DECLARE_BOX_METHODS(HandlerReference);
224 
225  TrackType type;
226 };
227 
229  DECLARE_BOX_METHODS(CodecConfigurationRecord);
230 
231  FourCC box_type;
232  // Contains full codec configuration record, including possible extension
233  // boxes.
234  std::vector<uint8_t> data;
235 };
236 
238  DECLARE_BOX_METHODS(PixelAspectRatio);
239 
240  uint32_t h_spacing;
241  uint32_t v_spacing;
242 };
243 
245  DECLARE_BOX_METHODS(VideoSampleEntry);
246  // Returns actual format of this sample entry.
247  FourCC GetActualFormat() const {
248  return format == FOURCC_ENCV ? sinf.format.format : format;
249  }
250 
251  FourCC format;
252  uint16_t data_reference_index;
253  uint16_t width;
254  uint16_t height;
255 
256  PixelAspectRatio pixel_aspect;
258  CodecConfigurationRecord codec_config_record;
259 };
260 
262  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
263 
264  AACAudioSpecificConfig aac_audio_specific_config;
265  ESDescriptor es_descriptor;
266 };
267 
268 struct DTSSpecific : Box {
269  DECLARE_BOX_METHODS(DTSSpecific);
270 
271  std::vector<uint8_t> data;
272 };
273 
275  DECLARE_BOX_METHODS(AudioSampleEntry);
276  // Returns actual format of this sample entry.
277  FourCC GetActualFormat() const {
278  return format == FOURCC_ENCA ? sinf.format.format : format;
279  }
280 
281  FourCC format;
282  uint16_t data_reference_index;
283  uint16_t channelcount;
284  uint16_t samplesize;
285  uint32_t samplerate;
286 
289  DTSSpecific ddts;
290 };
291 
293  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
294  std::string config;
295 };
296 
298  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
299  std::string source_label;
300 };
301 
303  DECLARE_BOX_METHODS(WVTTSampleEntry);
304 
305  uint16_t data_reference_index;
306 
307  WebVTTConfigurationBox config;
308  WebVTTSourceLabelBox label;
309  // Optional MPEG4BitRateBox.
310 };
311 
313  DECLARE_BOX_METHODS(SampleDescription);
314 
315  TrackType type;
316  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
317  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
318  std::vector<VideoSampleEntry> video_entries;
319  std::vector<AudioSampleEntry> audio_entries;
320  std::vector<WVTTSampleEntry> wvtt_entries;
321 };
322 
323 struct DecodingTime {
324  uint32_t sample_count;
325  uint32_t sample_delta;
326 };
327 
328 // stts.
330  DECLARE_BOX_METHODS(DecodingTimeToSample);
331 
332  std::vector<DecodingTime> decoding_time;
333 };
334 
336  uint32_t sample_count;
337  // If version == 0, sample_offset is uint32_t;
338  // If version == 1, sample_offset is int32_t.
339  // Use int64_t so both can be supported properly.
340  int64_t sample_offset;
341 };
342 
343 // ctts. Optional.
345  DECLARE_BOX_METHODS(CompositionTimeToSample);
346 
347  std::vector<CompositionOffset> composition_offset;
348 };
349 
350 struct ChunkInfo {
351  uint32_t first_chunk;
352  uint32_t samples_per_chunk;
353  uint32_t sample_description_index;
354 };
355 
356 // stsc.
358  DECLARE_BOX_METHODS(SampleToChunk);
359 
360  std::vector<ChunkInfo> chunk_info;
361 };
362 
363 // stsz.
364 struct SampleSize : FullBox {
365  DECLARE_BOX_METHODS(SampleSize);
366 
367  uint32_t sample_size;
368  uint32_t sample_count;
369  std::vector<uint32_t> sizes;
370 };
371 
372 // stz2.
374  DECLARE_BOX_METHODS(CompactSampleSize);
375 
376  uint8_t field_size;
377  std::vector<uint32_t> sizes;
378 };
379 
380 // co64.
382  DECLARE_BOX_METHODS(ChunkLargeOffset);
383 
384  std::vector<uint64_t> offsets;
385 };
386 
387 // stco.
389  DECLARE_BOX_METHODS(ChunkOffset);
390 };
391 
392 // stss. Optional.
393 struct SyncSample : FullBox {
394  DECLARE_BOX_METHODS(SyncSample);
395 
396  std::vector<uint32_t> sample_number;
397 };
398 
399 struct SampleTable : Box {
400  DECLARE_BOX_METHODS(SampleTable);
401 
402  SampleDescription description;
403  DecodingTimeToSample decoding_time_to_sample;
404  CompositionTimeToSample composition_time_to_sample;
405  SampleToChunk sample_to_chunk;
406  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
407  SampleSize sample_size;
408  // Either ChunkOffset or ChunkLargeOffset must present. Store in
409  // ChunkLargeOffset.
410  ChunkLargeOffset chunk_large_offset;
411  SyncSample sync_sample;
412 };
413 
415  DECLARE_BOX_METHODS(MediaHeader);
416 
417  uint64_t creation_time;
418  uint64_t modification_time;
419  uint32_t timescale;
420  uint64_t duration;
421  // 3-char language code + 1 null terminating char.
422  char language[4];
423 };
424 
426  DECLARE_BOX_METHODS(VideoMediaHeader);
427 
428  uint16_t graphicsmode;
429  uint16_t opcolor_red;
430  uint16_t opcolor_green;
431  uint16_t opcolor_blue;
432 };
433 
435  DECLARE_BOX_METHODS(SoundMediaHeader);
436 
437  uint16_t balance;
438 };
439 
441  DECLARE_BOX_METHODS(SubtitleMediaHeader);
442 };
443 
445  DECLARE_BOX_METHODS(DataEntryUrl);
446 
447  std::vector<uint8_t> location;
448 };
449 
451  DECLARE_BOX_METHODS(DataReference);
452 
453  // data entry can be either url or urn box. Fix to url box for now.
454  std::vector<DataEntryUrl> data_entry;
455 };
456 
458  DECLARE_BOX_METHODS(DataInformation);
459 
460  DataReference dref;
461 };
462 
464  DECLARE_BOX_METHODS(MediaInformation);
465 
466  DataInformation dinf;
467  SampleTable sample_table;
468  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
469  VideoMediaHeader vmhd;
470  SoundMediaHeader smhd;
471  SubtitleMediaHeader sthd;
472 };
473 
474 struct Media : Box {
475  DECLARE_BOX_METHODS(Media);
476 
477  MediaHeader header;
478  HandlerReference handler;
479  MediaInformation information;
480 };
481 
482 struct Track : Box {
483  DECLARE_BOX_METHODS(Track);
484 
485  TrackHeader header;
486  Media media;
487  Edit edit;
488  SampleEncryption sample_encryption;
489 };
490 
492  DECLARE_BOX_METHODS(MovieExtendsHeader);
493 
494  uint64_t fragment_duration;
495 };
496 
498  DECLARE_BOX_METHODS(TrackExtends);
499 
500  uint32_t track_id;
501  uint32_t default_sample_description_index;
502  uint32_t default_sample_duration;
503  uint32_t default_sample_size;
504  uint32_t default_sample_flags;
505 };
506 
507 struct MovieExtends : Box {
508  DECLARE_BOX_METHODS(MovieExtends);
509 
510  MovieExtendsHeader header;
511  std::vector<TrackExtends> tracks;
512 };
513 
514 struct Movie : Box {
515  DECLARE_BOX_METHODS(Movie);
516 
517  MovieHeader header;
518  MovieExtends extends;
519  std::vector<Track> tracks;
520  std::vector<ProtectionSystemSpecificHeader> pssh;
521 };
522 
524  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
525 
526  uint64_t decode_time;
527 };
528 
530  DECLARE_BOX_METHODS(MovieFragmentHeader);
531 
532  uint32_t sequence_number;
533 };
534 
536  enum TrackFragmentFlagsMasks {
537  kBaseDataOffsetPresentMask = 0x000001,
538  kSampleDescriptionIndexPresentMask = 0x000002,
539  kDefaultSampleDurationPresentMask = 0x000008,
540  kDefaultSampleSizePresentMask = 0x000010,
541  kDefaultSampleFlagsPresentMask = 0x000020,
542  kDurationIsEmptyMask = 0x010000,
543  kDefaultBaseIsMoofMask = 0x020000,
544  };
545 
546  enum SampleFlagsMasks {
547  kReservedMask = 0xFC000000,
548  kSampleDependsOnMask = 0x03000000,
549  kSampleIsDependedOnMask = 0x00C00000,
550  kSampleHasRedundancyMask = 0x00300000,
551  kSamplePaddingValueMask = 0x000E0000,
552  kNonKeySampleMask = 0x00010000,
553  kSampleDegradationPriorityMask = 0x0000FFFF,
554  };
555 
556  DECLARE_BOX_METHODS(TrackFragmentHeader);
557 
558  uint32_t track_id;
559  uint32_t sample_description_index;
560  uint32_t default_sample_duration;
561  uint32_t default_sample_size;
562  uint32_t default_sample_flags;
563 };
564 
566  enum TrackFragmentFlagsMasks {
567  kDataOffsetPresentMask = 0x000001,
568  kFirstSampleFlagsPresentMask = 0x000004,
569  kSampleDurationPresentMask = 0x000100,
570  kSampleSizePresentMask = 0x000200,
571  kSampleFlagsPresentMask = 0x000400,
572  kSampleCompTimeOffsetsPresentMask = 0x000800,
573  };
574 
575  DECLARE_BOX_METHODS(TrackFragmentRun);
576 
577  uint32_t sample_count;
578  uint32_t data_offset;
579  std::vector<uint32_t> sample_flags;
580  std::vector<uint32_t> sample_sizes;
581  std::vector<uint32_t> sample_durations;
582  std::vector<int64_t> sample_composition_time_offsets;
583 };
584 
586  enum GroupDescriptionIndexBase {
587  kTrackGroupDescriptionIndexBase = 0,
588  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
589  };
590 
591  uint32_t sample_count;
592  uint32_t group_description_index;
593 };
594 
596  DECLARE_BOX_METHODS(SampleToGroup);
597 
598  uint32_t grouping_type;
599  uint32_t grouping_type_parameter; // Version 1 only.
600  std::vector<SampleToGroupEntry> entries;
601 };
602 
606 
607  bool is_encrypted;
608  uint8_t iv_size;
609  std::vector<uint8_t> key_id;
610 };
611 
613  DECLARE_BOX_METHODS(SampleGroupDescription);
614 
615  uint32_t grouping_type;
616  std::vector<CencSampleEncryptionInfoEntry> entries;
617 };
618 
619 struct TrackFragment : Box {
620  DECLARE_BOX_METHODS(TrackFragment);
621 
622  TrackFragmentHeader header;
623  std::vector<TrackFragmentRun> runs;
624  bool decode_time_absent;
625  TrackFragmentDecodeTime decode_time;
626  SampleToGroup sample_to_group;
627  SampleGroupDescription sample_group_description;
628  SampleAuxiliaryInformationSize auxiliary_size;
629  SampleAuxiliaryInformationOffset auxiliary_offset;
630  SampleEncryption sample_encryption;
631 };
632 
633 struct MovieFragment : Box {
634  DECLARE_BOX_METHODS(MovieFragment);
635 
636  MovieFragmentHeader header;
637  std::vector<TrackFragment> tracks;
638  std::vector<ProtectionSystemSpecificHeader> pssh;
639 };
640 
642  enum SAPType {
643  TypeUnknown = 0,
644  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
645  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
646  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
647  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
648  Type5 = 5, // T(ept) = T(dec) < T(sap)
649  Type6 = 6, // T(ept) < T(dec) < T(sap)
650  };
651 
652  bool reference_type;
653  uint32_t referenced_size;
654  uint32_t subsegment_duration;
655  bool starts_with_sap;
656  SAPType sap_type;
657  uint32_t sap_delta_time;
658  // We add this field to keep track of earliest_presentation_time in this
659  // subsegment. It is not part of SegmentReference.
660  uint64_t earliest_presentation_time;
661 };
662 
664  DECLARE_BOX_METHODS(SegmentIndex);
665 
666  uint32_t reference_id;
667  uint32_t timescale;
668  uint64_t earliest_presentation_time;
669  uint64_t first_offset;
670  std::vector<SegmentReference> references;
671 };
672 
673 // The actual data is parsed and written separately.
674 struct MediaData : Box {
675  DECLARE_BOX_METHODS(MediaData);
676 
677  uint32_t data_size;
678 };
679 
680 struct CueSourceIDBox : Box {
681  DECLARE_BOX_METHODS(CueSourceIDBox);
682  int32_t source_id;
683 };
684 
685 struct CueTimeBox : Box {
686  DECLARE_BOX_METHODS(CueTimeBox);
687  std::string cue_current_time;
688 };
689 
690 struct CueIDBox : Box {
691  DECLARE_BOX_METHODS(CueIDBox);
692  std::string cue_id;
693 };
694 
695 struct CueSettingsBox : Box {
696  DECLARE_BOX_METHODS(CueSettingsBox);
697  std::string settings;
698 };
699 
700 struct CuePayloadBox : Box {
701  DECLARE_BOX_METHODS(CuePayloadBox);
702  std::string cue_text;
703 };
704 
705 struct VTTEmptyCueBox : Box {
706  DECLARE_BOX_METHODS(VTTEmptyCueBox);
707 };
708 
710  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
711  std::string cue_additional_text;
712 };
713 
714 struct VTTCueBox : Box {
715  DECLARE_BOX_METHODS(VTTCueBox);
716 
717  CueSourceIDBox cue_source_id;
718  CueIDBox cue_id;
719  CueTimeBox cue_time;
720  CueSettingsBox cue_settings;
721  CuePayloadBox cue_payload;
722 };
723 
724 #undef DECLARE_BOX
725 
726 } // namespace mp4
727 } // namespace media
728 } // namespace edash_packager
729 
730 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)