DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> system_id;
61  std::vector<uint8_t> data;
62  std::vector<uint8_t> raw_box;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
67 
68  std::vector<uint64_t> offsets;
69 };
70 
72  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
73 
74  uint8_t default_sample_info_size;
75  uint32_t sample_count;
76  std::vector<uint8_t> sample_info_sizes;
77 };
78 
88  bool ReadWrite(uint8_t iv_size,
89  bool has_subsamples,
90  BoxBuffer* buffer);
97  bool ParseFromBuffer(uint8_t iv_size,
98  bool has_subsamples,
99  BufferReader* reader);
101  uint32_t ComputeSize() const;
104  uint32_t GetTotalSizeOfSubsamples() const;
105 
106  std::vector<uint8_t> initialization_vector;
107  std::vector<SubsampleEntry> subsamples;
108 };
109 
111  enum SampleEncryptionFlags {
112  kUseSubsampleEncryption = 2,
113  };
114 
115  DECLARE_BOX_METHODS(SampleEncryption);
122  size_t iv_size,
123  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
124 
127  std::vector<uint8_t> sample_encryption_data;
128 
129  size_t iv_size;
130  std::vector<SampleEncryptionEntry> sample_encryption_entries;
131 };
132 
133 struct OriginalFormat : Box {
134  DECLARE_BOX_METHODS(OriginalFormat);
135 
136  FourCC format;
137 };
138 
139 struct SchemeType : FullBox {
140  DECLARE_BOX_METHODS(SchemeType);
141 
142  FourCC type;
143  uint32_t version;
144 };
145 
147  DECLARE_BOX_METHODS(TrackEncryption);
148 
149  // Note: this definition is specific to the CENC protection type.
150  bool is_encrypted;
151  uint8_t default_iv_size;
152  std::vector<uint8_t> default_kid;
153 };
154 
155 struct SchemeInfo : Box {
156  DECLARE_BOX_METHODS(SchemeInfo);
157 
158  TrackEncryption track_encryption;
159 };
160 
162  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
163 
164  OriginalFormat format;
165  SchemeType type;
166  SchemeInfo info;
167 };
168 
170  DECLARE_BOX_METHODS(MovieHeader);
171 
172  uint64_t creation_time;
173  uint64_t modification_time;
174  uint32_t timescale;
175  uint64_t duration;
176  int32_t rate;
177  int16_t volume;
178  uint32_t next_track_id;
179 };
180 
182  enum TrackHeaderFlags {
183  kTrackEnabled = 0x000001,
184  kTrackInMovie = 0x000002,
185  kTrackInPreview = 0x000004,
186  };
187 
188  DECLARE_BOX_METHODS(TrackHeader);
189 
190  uint64_t creation_time;
191  uint64_t modification_time;
192  uint32_t track_id;
193  uint64_t duration;
194  int16_t layer;
195  int16_t alternate_group;
196  int16_t volume;
197  // width and height specify the track's visual presentation size as
198  // fixed-point 16.16 values.
199  uint32_t width;
200  uint32_t height;
201 };
202 
204  uint64_t segment_duration;
205  int64_t media_time;
206  int16_t media_rate_integer;
207  int16_t media_rate_fraction;
208 };
209 
210 struct EditList : FullBox {
211  DECLARE_BOX_METHODS(EditList);
212 
213  std::vector<EditListEntry> edits;
214 };
215 
216 struct Edit : Box {
217  DECLARE_BOX_METHODS(Edit);
218 
219  EditList list;
220 };
221 
223  DECLARE_BOX_METHODS(HandlerReference);
224 
225  FourCC handler_type;
226 };
227 
228 struct Language {
229  bool ReadWrite(BoxBuffer* buffer);
230  uint32_t ComputeSize() const;
231 
232  std::string code;
233 };
234 
236 struct PrivFrame {
237  bool ReadWrite(BoxBuffer* buffer);
238  uint32_t ComputeSize() const;
239 
240  std::string owner;
241  std::string value;
242 };
243 
246 struct ID3v2 : FullBox {
247  DECLARE_BOX_METHODS(ID3v2);
248 
249  Language language;
250 
253 };
254 
255 struct Metadata : FullBox {
256  DECLARE_BOX_METHODS(Metadata);
257 
258  HandlerReference handler;
259  ID3v2 id3v2;
260 };
261 
263  DECLARE_BOX_METHODS(CodecConfigurationRecord);
264 
265  FourCC box_type;
266  // Contains full codec configuration record, including possible extension
267  // boxes.
268  std::vector<uint8_t> data;
269 };
270 
272  DECLARE_BOX_METHODS(PixelAspectRatio);
273 
274  uint32_t h_spacing;
275  uint32_t v_spacing;
276 };
277 
279  DECLARE_BOX_METHODS(VideoSampleEntry);
280  // Returns actual format of this sample entry.
281  FourCC GetActualFormat() const {
282  return format == FOURCC_ENCV ? sinf.format.format : format;
283  }
284 
285  FourCC format;
286  uint16_t data_reference_index;
287  uint16_t width;
288  uint16_t height;
289 
290  PixelAspectRatio pixel_aspect;
292  CodecConfigurationRecord codec_config_record;
293 };
294 
296  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
297 
298  AACAudioSpecificConfig aac_audio_specific_config;
299  ESDescriptor es_descriptor;
300 };
301 
302 struct DTSSpecific : Box {
303  DECLARE_BOX_METHODS(DTSSpecific);
304 
305  uint32_t sampling_frequency;
306  uint32_t max_bitrate;
307  uint32_t avg_bitrate;
308  uint8_t pcm_sample_depth;
309  std::vector<uint8_t> extra_data;
310 };
311 
312 struct AC3Specific : Box {
313  DECLARE_BOX_METHODS(AC3Specific);
314 
315  std::vector<uint8_t> data;
316 };
317 
318 // Independent substream in EC3Specific box.
320  uint8_t sample_rate_code; // fscod: 2 bits
321  uint8_t bit_stream_identification; // bsid: 5 bits
322  // reserved_1: 1 bit
323  uint8_t audio_service; // asvc: 1 bit
324  uint8_t bit_stream_mode; // bsmod: 3 bits
325  uint8_t audio_coding_mode; // acmod: 3 bits
326  uint8_t lfe_channel_on; // lfeon: 1 bit
327  // reserved_2: 3 bit
328  uint8_t number_dependent_substreams; // num_dep_sub: 4 bits.
329  // If num_dep_sub > 0, chan_loc is present and the size is 9 bits.
330  // Otherwise, reserved_3 is present and the size is 1 bit.
331  uint16_t channel_location; // chan_loc: 9 bits.
332  // reserved_3: 1 bit
333 };
334 
335 struct EC3Specific : Box {
336  DECLARE_BOX_METHODS(EC3Specific);
337 
338  // Before we know the number of independent substreams, data in EC3Specific
339  // box is store for parsing later.
340  std::vector<uint8_t> data;
341 
342  size_t number_independent_substreams; // num_id_sub: 3 bits.
343  std::vector<IndependentSubstream> independent_substreams;
344 };
345 
347  DECLARE_BOX_METHODS(AudioSampleEntry);
348  // Returns actual format of this sample entry.
349  FourCC GetActualFormat() const {
350  return format == FOURCC_ENCA ? sinf.format.format : format;
351  }
352 
353  FourCC format;
354  uint16_t data_reference_index;
355  uint16_t channelcount;
356  uint16_t samplesize;
357  uint32_t samplerate;
358 
360 
362  DTSSpecific ddts;
363  AC3Specific dac3;
364  EC3Specific dec3;
365 };
366 
368  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
369  std::string config;
370 };
371 
373  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
374  std::string source_label;
375 };
376 
378  DECLARE_BOX_METHODS(WVTTSampleEntry);
379 
380  uint16_t data_reference_index;
381 
382  WebVTTConfigurationBox config;
383  WebVTTSourceLabelBox label;
384  // Optional MPEG4BitRateBox.
385 };
386 
388  DECLARE_BOX_METHODS(SampleDescription);
389 
390  TrackType type;
391  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
392  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
393  std::vector<VideoSampleEntry> video_entries;
394  std::vector<AudioSampleEntry> audio_entries;
395  std::vector<WVTTSampleEntry> wvtt_entries;
396 };
397 
398 struct DecodingTime {
399  uint32_t sample_count;
400  uint32_t sample_delta;
401 };
402 
403 // stts.
405  DECLARE_BOX_METHODS(DecodingTimeToSample);
406 
407  std::vector<DecodingTime> decoding_time;
408 };
409 
411  uint32_t sample_count;
412  // If version == 0, sample_offset is uint32_t;
413  // If version == 1, sample_offset is int32_t.
414  // Use int64_t so both can be supported properly.
415  int64_t sample_offset;
416 };
417 
418 // ctts. Optional.
420  DECLARE_BOX_METHODS(CompositionTimeToSample);
421 
422  std::vector<CompositionOffset> composition_offset;
423 };
424 
425 struct ChunkInfo {
426  uint32_t first_chunk;
427  uint32_t samples_per_chunk;
428  uint32_t sample_description_index;
429 };
430 
431 // stsc.
433  DECLARE_BOX_METHODS(SampleToChunk);
434 
435  std::vector<ChunkInfo> chunk_info;
436 };
437 
438 // stsz.
439 struct SampleSize : FullBox {
440  DECLARE_BOX_METHODS(SampleSize);
441 
442  uint32_t sample_size;
443  uint32_t sample_count;
444  std::vector<uint32_t> sizes;
445 };
446 
447 // stz2.
449  DECLARE_BOX_METHODS(CompactSampleSize);
450 
451  uint8_t field_size;
452  std::vector<uint32_t> sizes;
453 };
454 
455 // co64.
457  DECLARE_BOX_METHODS(ChunkLargeOffset);
458 
459  std::vector<uint64_t> offsets;
460 };
461 
462 // stco.
464  DECLARE_BOX_METHODS(ChunkOffset);
465 };
466 
467 // stss. Optional.
468 struct SyncSample : FullBox {
469  DECLARE_BOX_METHODS(SyncSample);
470 
471  std::vector<uint32_t> sample_number;
472 };
473 
474 struct SampleTable : Box {
475  DECLARE_BOX_METHODS(SampleTable);
476 
477  SampleDescription description;
478  DecodingTimeToSample decoding_time_to_sample;
479  CompositionTimeToSample composition_time_to_sample;
480  SampleToChunk sample_to_chunk;
481  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
482  SampleSize sample_size;
483  // Either ChunkOffset or ChunkLargeOffset must present. Store in
484  // ChunkLargeOffset.
485  ChunkLargeOffset chunk_large_offset;
486  SyncSample sync_sample;
487 };
488 
490  DECLARE_BOX_METHODS(MediaHeader);
491 
492  uint64_t creation_time;
493  uint64_t modification_time;
494  uint32_t timescale;
495  uint64_t duration;
496  Language language;
497 };
498 
500  DECLARE_BOX_METHODS(VideoMediaHeader);
501 
502  uint16_t graphicsmode;
503  uint16_t opcolor_red;
504  uint16_t opcolor_green;
505  uint16_t opcolor_blue;
506 };
507 
509  DECLARE_BOX_METHODS(SoundMediaHeader);
510 
511  uint16_t balance;
512 };
513 
515  DECLARE_BOX_METHODS(SubtitleMediaHeader);
516 };
517 
519  DECLARE_BOX_METHODS(DataEntryUrl);
520 
521  std::vector<uint8_t> location;
522 };
523 
525  DECLARE_BOX_METHODS(DataReference);
526 
527  // data entry can be either url or urn box. Fix to url box for now.
528  std::vector<DataEntryUrl> data_entry;
529 };
530 
532  DECLARE_BOX_METHODS(DataInformation);
533 
534  DataReference dref;
535 };
536 
538  DECLARE_BOX_METHODS(MediaInformation);
539 
540  DataInformation dinf;
541  SampleTable sample_table;
542  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
543  VideoMediaHeader vmhd;
544  SoundMediaHeader smhd;
545  SubtitleMediaHeader sthd;
546 };
547 
548 struct Media : Box {
549  DECLARE_BOX_METHODS(Media);
550 
551  MediaHeader header;
552  HandlerReference handler;
553  MediaInformation information;
554 };
555 
556 struct Track : Box {
557  DECLARE_BOX_METHODS(Track);
558 
559  TrackHeader header;
560  Media media;
561  Edit edit;
562  SampleEncryption sample_encryption;
563 };
564 
566  DECLARE_BOX_METHODS(MovieExtendsHeader);
567 
568  uint64_t fragment_duration;
569 };
570 
572  DECLARE_BOX_METHODS(TrackExtends);
573 
574  uint32_t track_id;
575  uint32_t default_sample_description_index;
576  uint32_t default_sample_duration;
577  uint32_t default_sample_size;
578  uint32_t default_sample_flags;
579 };
580 
581 struct MovieExtends : Box {
582  DECLARE_BOX_METHODS(MovieExtends);
583 
584  MovieExtendsHeader header;
585  std::vector<TrackExtends> tracks;
586 };
587 
588 struct Movie : Box {
589  DECLARE_BOX_METHODS(Movie);
590 
591  MovieHeader header;
592  Metadata metadata; // Used to hold version information.
593  MovieExtends extends;
594  std::vector<Track> tracks;
595  std::vector<ProtectionSystemSpecificHeader> pssh;
596 };
597 
599  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
600 
601  uint64_t decode_time;
602 };
603 
605  DECLARE_BOX_METHODS(MovieFragmentHeader);
606 
607  uint32_t sequence_number;
608 };
609 
611  enum TrackFragmentFlagsMasks {
612  kBaseDataOffsetPresentMask = 0x000001,
613  kSampleDescriptionIndexPresentMask = 0x000002,
614  kDefaultSampleDurationPresentMask = 0x000008,
615  kDefaultSampleSizePresentMask = 0x000010,
616  kDefaultSampleFlagsPresentMask = 0x000020,
617  kDurationIsEmptyMask = 0x010000,
618  kDefaultBaseIsMoofMask = 0x020000,
619  };
620 
621  enum SampleFlagsMasks {
622  kReservedMask = 0xFC000000,
623  kSampleDependsOnMask = 0x03000000,
624  kSampleIsDependedOnMask = 0x00C00000,
625  kSampleHasRedundancyMask = 0x00300000,
626  kSamplePaddingValueMask = 0x000E0000,
627  kNonKeySampleMask = 0x00010000,
628  kSampleDegradationPriorityMask = 0x0000FFFF,
629  };
630 
631  DECLARE_BOX_METHODS(TrackFragmentHeader);
632 
633  uint32_t track_id;
634  uint32_t sample_description_index;
635  uint32_t default_sample_duration;
636  uint32_t default_sample_size;
637  uint32_t default_sample_flags;
638 };
639 
641  enum TrackFragmentFlagsMasks {
642  kDataOffsetPresentMask = 0x000001,
643  kFirstSampleFlagsPresentMask = 0x000004,
644  kSampleDurationPresentMask = 0x000100,
645  kSampleSizePresentMask = 0x000200,
646  kSampleFlagsPresentMask = 0x000400,
647  kSampleCompTimeOffsetsPresentMask = 0x000800,
648  };
649 
650  DECLARE_BOX_METHODS(TrackFragmentRun);
651 
652  uint32_t sample_count;
653  uint32_t data_offset;
654  std::vector<uint32_t> sample_flags;
655  std::vector<uint32_t> sample_sizes;
656  std::vector<uint32_t> sample_durations;
657  std::vector<int64_t> sample_composition_time_offsets;
658 };
659 
661  enum GroupDescriptionIndexBase {
662  kTrackGroupDescriptionIndexBase = 0,
663  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
664  };
665 
666  uint32_t sample_count;
667  uint32_t group_description_index;
668 };
669 
671  DECLARE_BOX_METHODS(SampleToGroup);
672 
673  uint32_t grouping_type;
674  uint32_t grouping_type_parameter; // Version 1 only.
675  std::vector<SampleToGroupEntry> entries;
676 };
677 
681 
682  bool is_encrypted;
683  uint8_t iv_size;
684  std::vector<uint8_t> key_id;
685 };
686 
688  DECLARE_BOX_METHODS(SampleGroupDescription);
689 
690  uint32_t grouping_type;
691  std::vector<CencSampleEncryptionInfoEntry> entries;
692 };
693 
694 struct TrackFragment : Box {
695  DECLARE_BOX_METHODS(TrackFragment);
696 
697  TrackFragmentHeader header;
698  std::vector<TrackFragmentRun> runs;
699  bool decode_time_absent;
700  TrackFragmentDecodeTime decode_time;
701  SampleToGroup sample_to_group;
702  SampleGroupDescription sample_group_description;
703  SampleAuxiliaryInformationSize auxiliary_size;
704  SampleAuxiliaryInformationOffset auxiliary_offset;
705  SampleEncryption sample_encryption;
706 };
707 
708 struct MovieFragment : Box {
709  DECLARE_BOX_METHODS(MovieFragment);
710 
711  MovieFragmentHeader header;
712  std::vector<TrackFragment> tracks;
713  std::vector<ProtectionSystemSpecificHeader> pssh;
714 };
715 
717  enum SAPType {
718  TypeUnknown = 0,
719  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
720  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
721  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
722  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
723  Type5 = 5, // T(ept) = T(dec) < T(sap)
724  Type6 = 6, // T(ept) < T(dec) < T(sap)
725  };
726 
727  bool reference_type;
728  uint32_t referenced_size;
729  uint32_t subsegment_duration;
730  bool starts_with_sap;
731  SAPType sap_type;
732  uint32_t sap_delta_time;
733  // We add this field to keep track of earliest_presentation_time in this
734  // subsegment. It is not part of SegmentReference.
735  uint64_t earliest_presentation_time;
736 };
737 
739  DECLARE_BOX_METHODS(SegmentIndex);
740 
741  uint32_t reference_id;
742  uint32_t timescale;
743  uint64_t earliest_presentation_time;
744  uint64_t first_offset;
745  std::vector<SegmentReference> references;
746 };
747 
748 // The actual data is parsed and written separately.
749 struct MediaData : Box {
750  DECLARE_BOX_METHODS(MediaData);
751 
752  uint32_t data_size;
753 };
754 
755 struct CueSourceIDBox : Box {
756  DECLARE_BOX_METHODS(CueSourceIDBox);
757  int32_t source_id;
758 };
759 
760 struct CueTimeBox : Box {
761  DECLARE_BOX_METHODS(CueTimeBox);
762  std::string cue_current_time;
763 };
764 
765 struct CueIDBox : Box {
766  DECLARE_BOX_METHODS(CueIDBox);
767  std::string cue_id;
768 };
769 
770 struct CueSettingsBox : Box {
771  DECLARE_BOX_METHODS(CueSettingsBox);
772  std::string settings;
773 };
774 
775 struct CuePayloadBox : Box {
776  DECLARE_BOX_METHODS(CuePayloadBox);
777  std::string cue_text;
778 };
779 
780 struct VTTEmptyCueBox : Box {
781  DECLARE_BOX_METHODS(VTTEmptyCueBox);
782 };
783 
785  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
786  std::string cue_additional_text;
787 };
788 
789 struct VTTCueBox : Box {
790  DECLARE_BOX_METHODS(VTTCueBox);
791 
792  CueSourceIDBox cue_source_id;
793  CueIDBox cue_id;
794  CueTimeBox cue_time;
795  CueSettingsBox cue_settings;
796  CuePayloadBox cue_payload;
797 };
798 
799 #undef DECLARE_BOX
800 
801 } // namespace mp4
802 } // namespace media
803 } // namespace edash_packager
804 
805 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
Implemented per http://id3.org/id3v2.4.0-frames.
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)