DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> system_id;
61  std::vector<uint8_t> data;
62  std::vector<uint8_t> raw_box;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
67 
68  std::vector<uint64_t> offsets;
69 };
70 
72  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
73 
74  uint8_t default_sample_info_size;
75  uint32_t sample_count;
76  std::vector<uint8_t> sample_info_sizes;
77 };
78 
88  bool ReadWrite(uint8_t iv_size,
89  bool has_subsamples,
90  BoxBuffer* buffer);
97  bool ParseFromBuffer(uint8_t iv_size,
98  bool has_subsamples,
99  BufferReader* reader);
101  uint32_t ComputeSize() const;
104  uint32_t GetTotalSizeOfSubsamples() const;
105 
106  std::vector<uint8_t> initialization_vector;
107  std::vector<SubsampleEntry> subsamples;
108 };
109 
111  enum SampleEncryptionFlags {
112  kUseSubsampleEncryption = 2,
113  };
114 
115  DECLARE_BOX_METHODS(SampleEncryption);
122  size_t iv_size,
123  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
124 
127  std::vector<uint8_t> sample_encryption_data;
128 
129  size_t iv_size;
130  std::vector<SampleEncryptionEntry> sample_encryption_entries;
131 };
132 
133 struct OriginalFormat : Box {
134  DECLARE_BOX_METHODS(OriginalFormat);
135 
136  FourCC format;
137 };
138 
139 struct SchemeType : FullBox {
140  DECLARE_BOX_METHODS(SchemeType);
141 
142  FourCC type;
143  uint32_t version;
144 };
145 
147  DECLARE_BOX_METHODS(TrackEncryption);
148 
149  // Note: this definition is specific to the CENC protection type.
150  bool is_encrypted;
151  uint8_t default_iv_size;
152  std::vector<uint8_t> default_kid;
153 };
154 
155 struct SchemeInfo : Box {
156  DECLARE_BOX_METHODS(SchemeInfo);
157 
158  TrackEncryption track_encryption;
159 };
160 
162  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
163 
164  OriginalFormat format;
165  SchemeType type;
166  SchemeInfo info;
167 };
168 
170  DECLARE_BOX_METHODS(MovieHeader);
171 
172  uint64_t creation_time;
173  uint64_t modification_time;
174  uint32_t timescale;
175  uint64_t duration;
176  int32_t rate;
177  int16_t volume;
178  uint32_t next_track_id;
179 };
180 
182  enum TrackHeaderFlags {
183  kTrackEnabled = 0x000001,
184  kTrackInMovie = 0x000002,
185  kTrackInPreview = 0x000004,
186  };
187 
188  DECLARE_BOX_METHODS(TrackHeader);
189 
190  uint64_t creation_time;
191  uint64_t modification_time;
192  uint32_t track_id;
193  uint64_t duration;
194  int16_t layer;
195  int16_t alternate_group;
196  int16_t volume;
197  // width and height specify the track's visual presentation size as
198  // fixed-point 16.16 values.
199  uint32_t width;
200  uint32_t height;
201 };
202 
204  uint64_t segment_duration;
205  int64_t media_time;
206  int16_t media_rate_integer;
207  int16_t media_rate_fraction;
208 };
209 
210 struct EditList : FullBox {
211  DECLARE_BOX_METHODS(EditList);
212 
213  std::vector<EditListEntry> edits;
214 };
215 
216 struct Edit : Box {
217  DECLARE_BOX_METHODS(Edit);
218 
219  EditList list;
220 };
221 
223  DECLARE_BOX_METHODS(HandlerReference);
224 
225  FourCC handler_type;
226 };
227 
228 struct Language {
229  bool ReadWrite(BoxBuffer* buffer);
230  uint32_t ComputeSize() const;
231 
232  std::string code;
233 };
234 
236 struct PrivFrame {
237  bool ReadWrite(BoxBuffer* buffer);
238  uint32_t ComputeSize() const;
239 
240  std::string owner;
241  std::string value;
242 };
243 
246 struct ID3v2 : FullBox {
247  DECLARE_BOX_METHODS(ID3v2);
248 
249  Language language;
250 
253 };
254 
255 struct Metadata : FullBox {
256  DECLARE_BOX_METHODS(Metadata);
257 
258  HandlerReference handler;
259  ID3v2 id3v2;
260 };
261 
263  DECLARE_BOX_METHODS(CodecConfigurationRecord);
264 
265  FourCC box_type;
266  // Contains full codec configuration record, including possible extension
267  // boxes.
268  std::vector<uint8_t> data;
269 };
270 
272  DECLARE_BOX_METHODS(PixelAspectRatio);
273 
274  uint32_t h_spacing;
275  uint32_t v_spacing;
276 };
277 
279  DECLARE_BOX_METHODS(VideoSampleEntry);
280  // Returns actual format of this sample entry.
281  FourCC GetActualFormat() const {
282  return format == FOURCC_ENCV ? sinf.format.format : format;
283  }
284 
285  FourCC format;
286  uint16_t data_reference_index;
287  uint16_t width;
288  uint16_t height;
289 
290  PixelAspectRatio pixel_aspect;
292  CodecConfigurationRecord codec_config_record;
293 };
294 
296  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
297 
298  AACAudioSpecificConfig aac_audio_specific_config;
299  ESDescriptor es_descriptor;
300 };
301 
302 struct DTSSpecific : Box {
303  DECLARE_BOX_METHODS(DTSSpecific);
304 
305  uint32_t sampling_frequency;
306  uint32_t max_bitrate;
307  uint32_t avg_bitrate;
308  uint8_t pcm_sample_depth;
309  std::vector<uint8_t> extra_data;
310 };
311 
312 struct AC3Specific : Box {
313  DECLARE_BOX_METHODS(AC3Specific);
314 
315  std::vector<uint8_t> data;
316 };
317 
318 // Independent substream in EC3Specific box.
320  uint8_t sample_rate_code; // fscod: 2 bits
321  uint8_t bit_stream_identification; // bsid: 5 bits
322  // reserved_1: 1 bit
323  uint8_t audio_service; // asvc: 1 bit
324  uint8_t bit_stream_mode; // bsmod: 3 bits
325  uint8_t audio_coding_mode; // acmod: 3 bits
326  uint8_t lfe_channel_on; // lfeon: 1 bit
327  // reserved_2: 3 bit
328  uint8_t number_dependent_substreams; // num_dep_sub: 4 bits.
329  // If num_dep_sub > 0, chan_loc is present and the size is 9 bits.
330  // Otherwise, reserved_3 is present and the size is 1 bit.
331  uint16_t channel_location; // chan_loc: 9 bits.
332  // reserved_3: 1 bit
333 };
334 
335 struct EC3Specific : Box {
336  DECLARE_BOX_METHODS(EC3Specific);
337 
338  // Before we know the number of independent substreams, data in EC3Specific
339  // box is store for parsing later.
340  std::vector<uint8_t> data;
341 
342  size_t number_independent_substreams; // num_id_sub: 3 bits.
343  std::vector<IndependentSubstream> independent_substreams;
344 };
345 
347  DECLARE_BOX_METHODS(AudioSampleEntry);
348  // Returns actual format of this sample entry.
349  FourCC GetActualFormat() const {
350  return format == FOURCC_ENCA ? sinf.format.format : format;
351  }
352 
353  FourCC format;
354  uint16_t data_reference_index;
355  uint16_t channelcount;
356  uint16_t samplesize;
357  uint32_t samplerate;
358 
360 
362  DTSSpecific ddts;
363  AC3Specific dac3;
364  EC3Specific dec3;
365 };
366 
368  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
369  std::string config;
370 };
371 
373  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
374  std::string source_label;
375 };
376 
378  DECLARE_BOX_METHODS(TextSampleEntry);
379 
380  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
381  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
382  // header.
383  FourCC format;
384  uint16_t data_reference_index;
385 
386  // Sub boxes for wvtt text sample entry.
387  WebVTTConfigurationBox config;
388  WebVTTSourceLabelBox label;
389  // Optional MPEG4BitRateBox.
390 };
391 
393  DECLARE_BOX_METHODS(SampleDescription);
394 
395  TrackType type;
396  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
397  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
398  std::vector<VideoSampleEntry> video_entries;
399  std::vector<AudioSampleEntry> audio_entries;
400  std::vector<TextSampleEntry> text_entries;
401 };
402 
403 struct DecodingTime {
404  uint32_t sample_count;
405  uint32_t sample_delta;
406 };
407 
408 // stts.
410  DECLARE_BOX_METHODS(DecodingTimeToSample);
411 
412  std::vector<DecodingTime> decoding_time;
413 };
414 
416  uint32_t sample_count;
417  // If version == 0, sample_offset is uint32_t;
418  // If version == 1, sample_offset is int32_t.
419  // Use int64_t so both can be supported properly.
420  int64_t sample_offset;
421 };
422 
423 // ctts. Optional.
425  DECLARE_BOX_METHODS(CompositionTimeToSample);
426 
427  std::vector<CompositionOffset> composition_offset;
428 };
429 
430 struct ChunkInfo {
431  uint32_t first_chunk;
432  uint32_t samples_per_chunk;
433  uint32_t sample_description_index;
434 };
435 
436 // stsc.
438  DECLARE_BOX_METHODS(SampleToChunk);
439 
440  std::vector<ChunkInfo> chunk_info;
441 };
442 
443 // stsz.
444 struct SampleSize : FullBox {
445  DECLARE_BOX_METHODS(SampleSize);
446 
447  uint32_t sample_size;
448  uint32_t sample_count;
449  std::vector<uint32_t> sizes;
450 };
451 
452 // stz2.
454  DECLARE_BOX_METHODS(CompactSampleSize);
455 
456  uint8_t field_size;
457  std::vector<uint32_t> sizes;
458 };
459 
460 // co64.
462  DECLARE_BOX_METHODS(ChunkLargeOffset);
463 
464  std::vector<uint64_t> offsets;
465 };
466 
467 // stco.
469  DECLARE_BOX_METHODS(ChunkOffset);
470 };
471 
472 // stss. Optional.
473 struct SyncSample : FullBox {
474  DECLARE_BOX_METHODS(SyncSample);
475 
476  std::vector<uint32_t> sample_number;
477 };
478 
479 struct SampleTable : Box {
480  DECLARE_BOX_METHODS(SampleTable);
481 
482  SampleDescription description;
483  DecodingTimeToSample decoding_time_to_sample;
484  CompositionTimeToSample composition_time_to_sample;
485  SampleToChunk sample_to_chunk;
486  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
487  SampleSize sample_size;
488  // Either ChunkOffset or ChunkLargeOffset must present. Store in
489  // ChunkLargeOffset.
490  ChunkLargeOffset chunk_large_offset;
491  SyncSample sync_sample;
492 };
493 
495  DECLARE_BOX_METHODS(MediaHeader);
496 
497  uint64_t creation_time;
498  uint64_t modification_time;
499  uint32_t timescale;
500  uint64_t duration;
501  Language language;
502 };
503 
505  DECLARE_BOX_METHODS(VideoMediaHeader);
506 
507  uint16_t graphicsmode;
508  uint16_t opcolor_red;
509  uint16_t opcolor_green;
510  uint16_t opcolor_blue;
511 };
512 
514  DECLARE_BOX_METHODS(SoundMediaHeader);
515 
516  uint16_t balance;
517 };
518 
520  DECLARE_BOX_METHODS(SubtitleMediaHeader);
521 };
522 
524  DECLARE_BOX_METHODS(DataEntryUrl);
525 
526  std::vector<uint8_t> location;
527 };
528 
530  DECLARE_BOX_METHODS(DataReference);
531 
532  // data entry can be either url or urn box. Fix to url box for now.
533  std::vector<DataEntryUrl> data_entry;
534 };
535 
537  DECLARE_BOX_METHODS(DataInformation);
538 
539  DataReference dref;
540 };
541 
543  DECLARE_BOX_METHODS(MediaInformation);
544 
545  DataInformation dinf;
546  SampleTable sample_table;
547  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
548  VideoMediaHeader vmhd;
549  SoundMediaHeader smhd;
550  SubtitleMediaHeader sthd;
551 };
552 
553 struct Media : Box {
554  DECLARE_BOX_METHODS(Media);
555 
556  MediaHeader header;
557  HandlerReference handler;
558  MediaInformation information;
559 };
560 
561 struct Track : Box {
562  DECLARE_BOX_METHODS(Track);
563 
564  TrackHeader header;
565  Media media;
566  Edit edit;
567  SampleEncryption sample_encryption;
568 };
569 
571  DECLARE_BOX_METHODS(MovieExtendsHeader);
572 
573  uint64_t fragment_duration;
574 };
575 
577  DECLARE_BOX_METHODS(TrackExtends);
578 
579  uint32_t track_id;
580  uint32_t default_sample_description_index;
581  uint32_t default_sample_duration;
582  uint32_t default_sample_size;
583  uint32_t default_sample_flags;
584 };
585 
586 struct MovieExtends : Box {
587  DECLARE_BOX_METHODS(MovieExtends);
588 
589  MovieExtendsHeader header;
590  std::vector<TrackExtends> tracks;
591 };
592 
593 struct Movie : Box {
594  DECLARE_BOX_METHODS(Movie);
595 
596  MovieHeader header;
597  Metadata metadata; // Used to hold version information.
598  MovieExtends extends;
599  std::vector<Track> tracks;
600  std::vector<ProtectionSystemSpecificHeader> pssh;
601 };
602 
604  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
605 
606  uint64_t decode_time;
607 };
608 
610  DECLARE_BOX_METHODS(MovieFragmentHeader);
611 
612  uint32_t sequence_number;
613 };
614 
616  enum TrackFragmentFlagsMasks {
617  kBaseDataOffsetPresentMask = 0x000001,
618  kSampleDescriptionIndexPresentMask = 0x000002,
619  kDefaultSampleDurationPresentMask = 0x000008,
620  kDefaultSampleSizePresentMask = 0x000010,
621  kDefaultSampleFlagsPresentMask = 0x000020,
622  kDurationIsEmptyMask = 0x010000,
623  kDefaultBaseIsMoofMask = 0x020000,
624  };
625 
626  enum SampleFlagsMasks {
627  kReservedMask = 0xFC000000,
628  kSampleDependsOnMask = 0x03000000,
629  kSampleIsDependedOnMask = 0x00C00000,
630  kSampleHasRedundancyMask = 0x00300000,
631  kSamplePaddingValueMask = 0x000E0000,
632  kNonKeySampleMask = 0x00010000,
633  kSampleDegradationPriorityMask = 0x0000FFFF,
634  };
635 
636  DECLARE_BOX_METHODS(TrackFragmentHeader);
637 
638  uint32_t track_id;
639  uint32_t sample_description_index;
640  uint32_t default_sample_duration;
641  uint32_t default_sample_size;
642  uint32_t default_sample_flags;
643 };
644 
646  enum TrackFragmentFlagsMasks {
647  kDataOffsetPresentMask = 0x000001,
648  kFirstSampleFlagsPresentMask = 0x000004,
649  kSampleDurationPresentMask = 0x000100,
650  kSampleSizePresentMask = 0x000200,
651  kSampleFlagsPresentMask = 0x000400,
652  kSampleCompTimeOffsetsPresentMask = 0x000800,
653  };
654 
655  DECLARE_BOX_METHODS(TrackFragmentRun);
656 
657  uint32_t sample_count;
658  uint32_t data_offset;
659  std::vector<uint32_t> sample_flags;
660  std::vector<uint32_t> sample_sizes;
661  std::vector<uint32_t> sample_durations;
662  std::vector<int64_t> sample_composition_time_offsets;
663 };
664 
666  enum GroupDescriptionIndexBase {
667  kTrackGroupDescriptionIndexBase = 0,
668  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
669  };
670 
671  uint32_t sample_count;
672  uint32_t group_description_index;
673 };
674 
676  DECLARE_BOX_METHODS(SampleToGroup);
677 
678  uint32_t grouping_type;
679  uint32_t grouping_type_parameter; // Version 1 only.
680  std::vector<SampleToGroupEntry> entries;
681 };
682 
686 
687  bool is_encrypted;
688  uint8_t iv_size;
689  std::vector<uint8_t> key_id;
690 };
691 
693  DECLARE_BOX_METHODS(SampleGroupDescription);
694 
695  uint32_t grouping_type;
696  std::vector<CencSampleEncryptionInfoEntry> entries;
697 };
698 
699 struct TrackFragment : Box {
700  DECLARE_BOX_METHODS(TrackFragment);
701 
702  TrackFragmentHeader header;
703  std::vector<TrackFragmentRun> runs;
704  bool decode_time_absent;
705  TrackFragmentDecodeTime decode_time;
706  SampleToGroup sample_to_group;
707  SampleGroupDescription sample_group_description;
708  SampleAuxiliaryInformationSize auxiliary_size;
709  SampleAuxiliaryInformationOffset auxiliary_offset;
710  SampleEncryption sample_encryption;
711 };
712 
713 struct MovieFragment : Box {
714  DECLARE_BOX_METHODS(MovieFragment);
715 
716  MovieFragmentHeader header;
717  std::vector<TrackFragment> tracks;
718  std::vector<ProtectionSystemSpecificHeader> pssh;
719 };
720 
722  enum SAPType {
723  TypeUnknown = 0,
724  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
725  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
726  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
727  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
728  Type5 = 5, // T(ept) = T(dec) < T(sap)
729  Type6 = 6, // T(ept) < T(dec) < T(sap)
730  };
731 
732  bool reference_type;
733  uint32_t referenced_size;
734  uint32_t subsegment_duration;
735  bool starts_with_sap;
736  SAPType sap_type;
737  uint32_t sap_delta_time;
738  // We add this field to keep track of earliest_presentation_time in this
739  // subsegment. It is not part of SegmentReference.
740  uint64_t earliest_presentation_time;
741 };
742 
744  DECLARE_BOX_METHODS(SegmentIndex);
745 
746  uint32_t reference_id;
747  uint32_t timescale;
748  uint64_t earliest_presentation_time;
749  uint64_t first_offset;
750  std::vector<SegmentReference> references;
751 };
752 
753 // The actual data is parsed and written separately.
754 struct MediaData : Box {
755  DECLARE_BOX_METHODS(MediaData);
756 
757  uint32_t data_size;
758 };
759 
760 struct CueSourceIDBox : Box {
761  DECLARE_BOX_METHODS(CueSourceIDBox);
762  int32_t source_id;
763 };
764 
765 struct CueTimeBox : Box {
766  DECLARE_BOX_METHODS(CueTimeBox);
767  std::string cue_current_time;
768 };
769 
770 struct CueIDBox : Box {
771  DECLARE_BOX_METHODS(CueIDBox);
772  std::string cue_id;
773 };
774 
775 struct CueSettingsBox : Box {
776  DECLARE_BOX_METHODS(CueSettingsBox);
777  std::string settings;
778 };
779 
780 struct CuePayloadBox : Box {
781  DECLARE_BOX_METHODS(CuePayloadBox);
782  std::string cue_text;
783 };
784 
785 struct VTTEmptyCueBox : Box {
786  DECLARE_BOX_METHODS(VTTEmptyCueBox);
787 };
788 
790  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
791  std::string cue_additional_text;
792 };
793 
794 struct VTTCueBox : Box {
795  DECLARE_BOX_METHODS(VTTCueBox);
796 
797  CueSourceIDBox cue_source_id;
798  CueIDBox cue_id;
799  CueTimeBox cue_time;
800  CueSettingsBox cue_settings;
801  CuePayloadBox cue_payload;
802 };
803 
804 #undef DECLARE_BOX
805 
806 } // namespace mp4
807 } // namespace media
808 } // namespace edash_packager
809 
810 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
Implemented per http://id3.org/id3v2.4.0-frames.
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)