DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> system_id;
61  std::vector<uint8_t> data;
62  std::vector<uint8_t> raw_box;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
67 
68  std::vector<uint64_t> offsets;
69 };
70 
72  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
73 
74  uint8_t default_sample_info_size;
75  uint32_t sample_count;
76  std::vector<uint8_t> sample_info_sizes;
77 };
78 
88  bool ReadWrite(uint8_t iv_size,
89  bool has_subsamples,
90  BoxBuffer* buffer);
97  bool ParseFromBuffer(uint8_t iv_size,
98  bool has_subsamples,
99  BufferReader* reader);
101  uint32_t ComputeSize() const;
104  uint32_t GetTotalSizeOfSubsamples() const;
105 
106  std::vector<uint8_t> initialization_vector;
107  std::vector<SubsampleEntry> subsamples;
108 };
109 
111  enum SampleEncryptionFlags {
112  kUseSubsampleEncryption = 2,
113  };
114 
115  DECLARE_BOX_METHODS(SampleEncryption);
122  size_t iv_size,
123  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
124 
127  std::vector<uint8_t> sample_encryption_data;
128 
129  size_t iv_size;
130  std::vector<SampleEncryptionEntry> sample_encryption_entries;
131 };
132 
133 struct OriginalFormat : Box {
134  DECLARE_BOX_METHODS(OriginalFormat);
135 
136  FourCC format;
137 };
138 
139 struct SchemeType : FullBox {
140  DECLARE_BOX_METHODS(SchemeType);
141 
142  FourCC type;
143  uint32_t version;
144 };
145 
147  DECLARE_BOX_METHODS(TrackEncryption);
148 
149  // Note: this definition is specific to the CENC protection type.
150  bool is_encrypted;
151  uint8_t default_iv_size;
152  std::vector<uint8_t> default_kid;
153 };
154 
155 struct SchemeInfo : Box {
156  DECLARE_BOX_METHODS(SchemeInfo);
157 
158  TrackEncryption track_encryption;
159 };
160 
162  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
163 
164  OriginalFormat format;
165  SchemeType type;
166  SchemeInfo info;
167 };
168 
170  DECLARE_BOX_METHODS(MovieHeader);
171 
172  uint64_t creation_time;
173  uint64_t modification_time;
174  uint32_t timescale;
175  uint64_t duration;
176  int32_t rate;
177  int16_t volume;
178  uint32_t next_track_id;
179 };
180 
182  enum TrackHeaderFlags {
183  kTrackEnabled = 0x000001,
184  kTrackInMovie = 0x000002,
185  kTrackInPreview = 0x000004,
186  };
187 
188  DECLARE_BOX_METHODS(TrackHeader);
189 
190  uint64_t creation_time;
191  uint64_t modification_time;
192  uint32_t track_id;
193  uint64_t duration;
194  int16_t layer;
195  int16_t alternate_group;
196  int16_t volume;
197  // width and height specify the track's visual presentation size as
198  // fixed-point 16.16 values.
199  uint32_t width;
200  uint32_t height;
201 };
202 
204  uint64_t segment_duration;
205  int64_t media_time;
206  int16_t media_rate_integer;
207  int16_t media_rate_fraction;
208 };
209 
210 struct EditList : FullBox {
211  DECLARE_BOX_METHODS(EditList);
212 
213  std::vector<EditListEntry> edits;
214 };
215 
216 struct Edit : Box {
217  DECLARE_BOX_METHODS(Edit);
218 
219  EditList list;
220 };
221 
223  DECLARE_BOX_METHODS(HandlerReference);
224 
225  TrackType type;
226 };
227 
229  DECLARE_BOX_METHODS(CodecConfigurationRecord);
230 
231  FourCC box_type;
232  // Contains full codec configuration record, including possible extension
233  // boxes.
234  std::vector<uint8_t> data;
235 };
236 
238  DECLARE_BOX_METHODS(PixelAspectRatio);
239 
240  uint32_t h_spacing;
241  uint32_t v_spacing;
242 };
243 
245  DECLARE_BOX_METHODS(VideoSampleEntry);
246  // Returns actual format of this sample entry.
247  FourCC GetActualFormat() const {
248  return format == FOURCC_ENCV ? sinf.format.format : format;
249  }
250 
251  FourCC format;
252  uint16_t data_reference_index;
253  uint16_t width;
254  uint16_t height;
255 
256  PixelAspectRatio pixel_aspect;
258  CodecConfigurationRecord codec_config_record;
259 };
260 
262  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
263 
264  AACAudioSpecificConfig aac_audio_specific_config;
265  ESDescriptor es_descriptor;
266 };
267 
268 struct DTSSpecific : Box {
269  DECLARE_BOX_METHODS(DTSSpecific);
270 
271  uint32_t sampling_frequency;
272  uint32_t max_bitrate;
273  uint32_t avg_bitrate;
274  uint8_t pcm_sample_depth;
275  std::vector<uint8_t> extra_data;
276 };
277 
279  DECLARE_BOX_METHODS(AudioSampleEntry);
280  // Returns actual format of this sample entry.
281  FourCC GetActualFormat() const {
282  return format == FOURCC_ENCA ? sinf.format.format : format;
283  }
284 
285  FourCC format;
286  uint16_t data_reference_index;
287  uint16_t channelcount;
288  uint16_t samplesize;
289  uint32_t samplerate;
290 
293  DTSSpecific ddts;
294 };
295 
297  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
298  std::string config;
299 };
300 
302  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
303  std::string source_label;
304 };
305 
307  DECLARE_BOX_METHODS(WVTTSampleEntry);
308 
309  uint16_t data_reference_index;
310 
311  WebVTTConfigurationBox config;
312  WebVTTSourceLabelBox label;
313  // Optional MPEG4BitRateBox.
314 };
315 
317  DECLARE_BOX_METHODS(SampleDescription);
318 
319  TrackType type;
320  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
321  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
322  std::vector<VideoSampleEntry> video_entries;
323  std::vector<AudioSampleEntry> audio_entries;
324  std::vector<WVTTSampleEntry> wvtt_entries;
325 };
326 
327 struct DecodingTime {
328  uint32_t sample_count;
329  uint32_t sample_delta;
330 };
331 
332 // stts.
334  DECLARE_BOX_METHODS(DecodingTimeToSample);
335 
336  std::vector<DecodingTime> decoding_time;
337 };
338 
340  uint32_t sample_count;
341  // If version == 0, sample_offset is uint32_t;
342  // If version == 1, sample_offset is int32_t.
343  // Use int64_t so both can be supported properly.
344  int64_t sample_offset;
345 };
346 
347 // ctts. Optional.
349  DECLARE_BOX_METHODS(CompositionTimeToSample);
350 
351  std::vector<CompositionOffset> composition_offset;
352 };
353 
354 struct ChunkInfo {
355  uint32_t first_chunk;
356  uint32_t samples_per_chunk;
357  uint32_t sample_description_index;
358 };
359 
360 // stsc.
362  DECLARE_BOX_METHODS(SampleToChunk);
363 
364  std::vector<ChunkInfo> chunk_info;
365 };
366 
367 // stsz.
368 struct SampleSize : FullBox {
369  DECLARE_BOX_METHODS(SampleSize);
370 
371  uint32_t sample_size;
372  uint32_t sample_count;
373  std::vector<uint32_t> sizes;
374 };
375 
376 // stz2.
378  DECLARE_BOX_METHODS(CompactSampleSize);
379 
380  uint8_t field_size;
381  std::vector<uint32_t> sizes;
382 };
383 
384 // co64.
386  DECLARE_BOX_METHODS(ChunkLargeOffset);
387 
388  std::vector<uint64_t> offsets;
389 };
390 
391 // stco.
393  DECLARE_BOX_METHODS(ChunkOffset);
394 };
395 
396 // stss. Optional.
397 struct SyncSample : FullBox {
398  DECLARE_BOX_METHODS(SyncSample);
399 
400  std::vector<uint32_t> sample_number;
401 };
402 
403 struct SampleTable : Box {
404  DECLARE_BOX_METHODS(SampleTable);
405 
406  SampleDescription description;
407  DecodingTimeToSample decoding_time_to_sample;
408  CompositionTimeToSample composition_time_to_sample;
409  SampleToChunk sample_to_chunk;
410  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
411  SampleSize sample_size;
412  // Either ChunkOffset or ChunkLargeOffset must present. Store in
413  // ChunkLargeOffset.
414  ChunkLargeOffset chunk_large_offset;
415  SyncSample sync_sample;
416 };
417 
419  DECLARE_BOX_METHODS(MediaHeader);
420 
421  uint64_t creation_time;
422  uint64_t modification_time;
423  uint32_t timescale;
424  uint64_t duration;
425  // 3-char language code + 1 null terminating char.
426  char language[4];
427 };
428 
430  DECLARE_BOX_METHODS(VideoMediaHeader);
431 
432  uint16_t graphicsmode;
433  uint16_t opcolor_red;
434  uint16_t opcolor_green;
435  uint16_t opcolor_blue;
436 };
437 
439  DECLARE_BOX_METHODS(SoundMediaHeader);
440 
441  uint16_t balance;
442 };
443 
445  DECLARE_BOX_METHODS(SubtitleMediaHeader);
446 };
447 
449  DECLARE_BOX_METHODS(DataEntryUrl);
450 
451  std::vector<uint8_t> location;
452 };
453 
455  DECLARE_BOX_METHODS(DataReference);
456 
457  // data entry can be either url or urn box. Fix to url box for now.
458  std::vector<DataEntryUrl> data_entry;
459 };
460 
462  DECLARE_BOX_METHODS(DataInformation);
463 
464  DataReference dref;
465 };
466 
468  DECLARE_BOX_METHODS(MediaInformation);
469 
470  DataInformation dinf;
471  SampleTable sample_table;
472  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
473  VideoMediaHeader vmhd;
474  SoundMediaHeader smhd;
475  SubtitleMediaHeader sthd;
476 };
477 
478 struct Media : Box {
479  DECLARE_BOX_METHODS(Media);
480 
481  MediaHeader header;
482  HandlerReference handler;
483  MediaInformation information;
484 };
485 
486 struct Track : Box {
487  DECLARE_BOX_METHODS(Track);
488 
489  TrackHeader header;
490  Media media;
491  Edit edit;
492  SampleEncryption sample_encryption;
493 };
494 
496  DECLARE_BOX_METHODS(MovieExtendsHeader);
497 
498  uint64_t fragment_duration;
499 };
500 
502  DECLARE_BOX_METHODS(TrackExtends);
503 
504  uint32_t track_id;
505  uint32_t default_sample_description_index;
506  uint32_t default_sample_duration;
507  uint32_t default_sample_size;
508  uint32_t default_sample_flags;
509 };
510 
511 struct MovieExtends : Box {
512  DECLARE_BOX_METHODS(MovieExtends);
513 
514  MovieExtendsHeader header;
515  std::vector<TrackExtends> tracks;
516 };
517 
518 struct Movie : Box {
519  DECLARE_BOX_METHODS(Movie);
520 
521  MovieHeader header;
522  MovieExtends extends;
523  std::vector<Track> tracks;
524  std::vector<ProtectionSystemSpecificHeader> pssh;
525 };
526 
528  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
529 
530  uint64_t decode_time;
531 };
532 
534  DECLARE_BOX_METHODS(MovieFragmentHeader);
535 
536  uint32_t sequence_number;
537 };
538 
540  enum TrackFragmentFlagsMasks {
541  kBaseDataOffsetPresentMask = 0x000001,
542  kSampleDescriptionIndexPresentMask = 0x000002,
543  kDefaultSampleDurationPresentMask = 0x000008,
544  kDefaultSampleSizePresentMask = 0x000010,
545  kDefaultSampleFlagsPresentMask = 0x000020,
546  kDurationIsEmptyMask = 0x010000,
547  kDefaultBaseIsMoofMask = 0x020000,
548  };
549 
550  enum SampleFlagsMasks {
551  kReservedMask = 0xFC000000,
552  kSampleDependsOnMask = 0x03000000,
553  kSampleIsDependedOnMask = 0x00C00000,
554  kSampleHasRedundancyMask = 0x00300000,
555  kSamplePaddingValueMask = 0x000E0000,
556  kNonKeySampleMask = 0x00010000,
557  kSampleDegradationPriorityMask = 0x0000FFFF,
558  };
559 
560  DECLARE_BOX_METHODS(TrackFragmentHeader);
561 
562  uint32_t track_id;
563  uint32_t sample_description_index;
564  uint32_t default_sample_duration;
565  uint32_t default_sample_size;
566  uint32_t default_sample_flags;
567 };
568 
570  enum TrackFragmentFlagsMasks {
571  kDataOffsetPresentMask = 0x000001,
572  kFirstSampleFlagsPresentMask = 0x000004,
573  kSampleDurationPresentMask = 0x000100,
574  kSampleSizePresentMask = 0x000200,
575  kSampleFlagsPresentMask = 0x000400,
576  kSampleCompTimeOffsetsPresentMask = 0x000800,
577  };
578 
579  DECLARE_BOX_METHODS(TrackFragmentRun);
580 
581  uint32_t sample_count;
582  uint32_t data_offset;
583  std::vector<uint32_t> sample_flags;
584  std::vector<uint32_t> sample_sizes;
585  std::vector<uint32_t> sample_durations;
586  std::vector<int64_t> sample_composition_time_offsets;
587 };
588 
590  enum GroupDescriptionIndexBase {
591  kTrackGroupDescriptionIndexBase = 0,
592  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
593  };
594 
595  uint32_t sample_count;
596  uint32_t group_description_index;
597 };
598 
600  DECLARE_BOX_METHODS(SampleToGroup);
601 
602  uint32_t grouping_type;
603  uint32_t grouping_type_parameter; // Version 1 only.
604  std::vector<SampleToGroupEntry> entries;
605 };
606 
610 
611  bool is_encrypted;
612  uint8_t iv_size;
613  std::vector<uint8_t> key_id;
614 };
615 
617  DECLARE_BOX_METHODS(SampleGroupDescription);
618 
619  uint32_t grouping_type;
620  std::vector<CencSampleEncryptionInfoEntry> entries;
621 };
622 
623 struct TrackFragment : Box {
624  DECLARE_BOX_METHODS(TrackFragment);
625 
626  TrackFragmentHeader header;
627  std::vector<TrackFragmentRun> runs;
628  bool decode_time_absent;
629  TrackFragmentDecodeTime decode_time;
630  SampleToGroup sample_to_group;
631  SampleGroupDescription sample_group_description;
632  SampleAuxiliaryInformationSize auxiliary_size;
633  SampleAuxiliaryInformationOffset auxiliary_offset;
634  SampleEncryption sample_encryption;
635 };
636 
637 struct MovieFragment : Box {
638  DECLARE_BOX_METHODS(MovieFragment);
639 
640  MovieFragmentHeader header;
641  std::vector<TrackFragment> tracks;
642  std::vector<ProtectionSystemSpecificHeader> pssh;
643 };
644 
646  enum SAPType {
647  TypeUnknown = 0,
648  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
649  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
650  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
651  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
652  Type5 = 5, // T(ept) = T(dec) < T(sap)
653  Type6 = 6, // T(ept) < T(dec) < T(sap)
654  };
655 
656  bool reference_type;
657  uint32_t referenced_size;
658  uint32_t subsegment_duration;
659  bool starts_with_sap;
660  SAPType sap_type;
661  uint32_t sap_delta_time;
662  // We add this field to keep track of earliest_presentation_time in this
663  // subsegment. It is not part of SegmentReference.
664  uint64_t earliest_presentation_time;
665 };
666 
668  DECLARE_BOX_METHODS(SegmentIndex);
669 
670  uint32_t reference_id;
671  uint32_t timescale;
672  uint64_t earliest_presentation_time;
673  uint64_t first_offset;
674  std::vector<SegmentReference> references;
675 };
676 
677 // The actual data is parsed and written separately.
678 struct MediaData : Box {
679  DECLARE_BOX_METHODS(MediaData);
680 
681  uint32_t data_size;
682 };
683 
684 struct CueSourceIDBox : Box {
685  DECLARE_BOX_METHODS(CueSourceIDBox);
686  int32_t source_id;
687 };
688 
689 struct CueTimeBox : Box {
690  DECLARE_BOX_METHODS(CueTimeBox);
691  std::string cue_current_time;
692 };
693 
694 struct CueIDBox : Box {
695  DECLARE_BOX_METHODS(CueIDBox);
696  std::string cue_id;
697 };
698 
699 struct CueSettingsBox : Box {
700  DECLARE_BOX_METHODS(CueSettingsBox);
701  std::string settings;
702 };
703 
704 struct CuePayloadBox : Box {
705  DECLARE_BOX_METHODS(CuePayloadBox);
706  std::string cue_text;
707 };
708 
709 struct VTTEmptyCueBox : Box {
710  DECLARE_BOX_METHODS(VTTEmptyCueBox);
711 };
712 
714  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
715  std::string cue_additional_text;
716 };
717 
718 struct VTTCueBox : Box {
719  DECLARE_BOX_METHODS(VTTCueBox);
720 
721  CueSourceIDBox cue_source_id;
722  CueIDBox cue_id;
723  CueTimeBox cue_time;
724  CueSettingsBox cue_settings;
725  CuePayloadBox cue_payload;
726 };
727 
728 #undef DECLARE_BOX
729 
730 } // namespace mp4
731 } // namespace media
732 } // namespace edash_packager
733 
734 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)