DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> system_id;
61  std::vector<uint8_t> data;
62  std::vector<uint8_t> raw_box;
63 };
64 
66  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
67 
68  std::vector<uint64_t> offsets;
69 };
70 
72  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
73 
74  uint8_t default_sample_info_size;
75  uint32_t sample_count;
76  std::vector<uint8_t> sample_info_sizes;
77 };
78 
88  bool ReadWrite(uint8_t iv_size,
89  bool has_subsamples,
90  BoxBuffer* buffer);
97  bool ParseFromBuffer(uint8_t iv_size,
98  bool has_subsamples,
99  BufferReader* reader);
101  uint32_t ComputeSize() const;
104  uint32_t GetTotalSizeOfSubsamples() const;
105 
106  std::vector<uint8_t> initialization_vector;
107  std::vector<SubsampleEntry> subsamples;
108 };
109 
111  enum SampleEncryptionFlags {
112  kUseSubsampleEncryption = 2,
113  };
114 
115  DECLARE_BOX_METHODS(SampleEncryption);
122  size_t iv_size,
123  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
124 
127  std::vector<uint8_t> sample_encryption_data;
128 
129  size_t iv_size;
130  std::vector<SampleEncryptionEntry> sample_encryption_entries;
131 };
132 
133 struct OriginalFormat : Box {
134  DECLARE_BOX_METHODS(OriginalFormat);
135 
136  FourCC format;
137 };
138 
139 struct SchemeType : FullBox {
140  DECLARE_BOX_METHODS(SchemeType);
141 
142  FourCC type;
143  uint32_t version;
144 };
145 
147  DECLARE_BOX_METHODS(TrackEncryption);
148 
149  // Note: this definition is specific to the CENC protection type.
150  bool is_encrypted;
151  uint8_t default_iv_size;
152  std::vector<uint8_t> default_kid;
153 };
154 
155 struct SchemeInfo : Box {
156  DECLARE_BOX_METHODS(SchemeInfo);
157 
158  TrackEncryption track_encryption;
159 };
160 
162  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
163 
164  OriginalFormat format;
165  SchemeType type;
166  SchemeInfo info;
167 };
168 
170  DECLARE_BOX_METHODS(MovieHeader);
171 
172  uint64_t creation_time;
173  uint64_t modification_time;
174  uint32_t timescale;
175  uint64_t duration;
176  int32_t rate;
177  int16_t volume;
178  uint32_t next_track_id;
179 };
180 
182  enum TrackHeaderFlags {
183  kTrackEnabled = 0x000001,
184  kTrackInMovie = 0x000002,
185  kTrackInPreview = 0x000004,
186  };
187 
188  DECLARE_BOX_METHODS(TrackHeader);
189 
190  uint64_t creation_time;
191  uint64_t modification_time;
192  uint32_t track_id;
193  uint64_t duration;
194  int16_t layer;
195  int16_t alternate_group;
196  int16_t volume;
197  // width and height specify the track's visual presentation size as
198  // fixed-point 16.16 values.
199  uint32_t width;
200  uint32_t height;
201 };
202 
204  uint64_t segment_duration;
205  int64_t media_time;
206  int16_t media_rate_integer;
207  int16_t media_rate_fraction;
208 };
209 
210 struct EditList : FullBox {
211  DECLARE_BOX_METHODS(EditList);
212 
213  std::vector<EditListEntry> edits;
214 };
215 
216 struct Edit : Box {
217  DECLARE_BOX_METHODS(Edit);
218 
219  EditList list;
220 };
221 
223  DECLARE_BOX_METHODS(HandlerReference);
224 
225  FourCC handler_type;
226 };
227 
228 struct Language {
229  bool ReadWrite(BoxBuffer* buffer);
230  uint32_t ComputeSize() const;
231 
232  std::string code;
233 };
234 
236 struct PrivFrame {
237  bool ReadWrite(BoxBuffer* buffer);
238  uint32_t ComputeSize() const;
239 
240  std::string owner;
241  std::string value;
242 };
243 
246 struct ID3v2 : FullBox {
247  DECLARE_BOX_METHODS(ID3v2);
248 
249  Language language;
250 
253 };
254 
255 struct Metadata : FullBox {
256  DECLARE_BOX_METHODS(Metadata);
257 
258  HandlerReference handler;
259  ID3v2 id3v2;
260 };
261 
263  DECLARE_BOX_METHODS(CodecConfigurationRecord);
264 
265  FourCC box_type;
266  // Contains full codec configuration record, including possible extension
267  // boxes.
268  std::vector<uint8_t> data;
269 };
270 
272  DECLARE_BOX_METHODS(PixelAspectRatio);
273 
274  uint32_t h_spacing;
275  uint32_t v_spacing;
276 };
277 
279  DECLARE_BOX_METHODS(VideoSampleEntry);
280  // Returns actual format of this sample entry.
281  FourCC GetActualFormat() const {
282  return format == FOURCC_ENCV ? sinf.format.format : format;
283  }
284 
285  FourCC format;
286  uint16_t data_reference_index;
287  uint16_t width;
288  uint16_t height;
289 
290  PixelAspectRatio pixel_aspect;
292  CodecConfigurationRecord codec_config_record;
293 };
294 
296  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
297 
298  AACAudioSpecificConfig aac_audio_specific_config;
299  ESDescriptor es_descriptor;
300 };
301 
302 struct DTSSpecific : Box {
303  DECLARE_BOX_METHODS(DTSSpecific);
304 
305  uint32_t sampling_frequency;
306  uint32_t max_bitrate;
307  uint32_t avg_bitrate;
308  uint8_t pcm_sample_depth;
309  std::vector<uint8_t> extra_data;
310 };
311 
312 struct AC3Specific : Box {
313  DECLARE_BOX_METHODS(AC3Specific);
314 
315  std::vector<uint8_t> data;
316 };
317 
318 struct EC3Specific : Box {
319  DECLARE_BOX_METHODS(EC3Specific);
320 
321  std::vector<uint8_t> data;
322 };
323 
325  DECLARE_BOX_METHODS(AudioSampleEntry);
326  // Returns actual format of this sample entry.
327  FourCC GetActualFormat() const {
328  return format == FOURCC_ENCA ? sinf.format.format : format;
329  }
330 
331  FourCC format;
332  uint16_t data_reference_index;
333  uint16_t channelcount;
334  uint16_t samplesize;
335  uint32_t samplerate;
336 
338 
340  DTSSpecific ddts;
341  AC3Specific dac3;
342  EC3Specific dec3;
343 };
344 
346  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
347  std::string config;
348 };
349 
351  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
352  std::string source_label;
353 };
354 
356  DECLARE_BOX_METHODS(TextSampleEntry);
357 
358  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
359  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
360  // header.
361  FourCC format;
362  uint16_t data_reference_index;
363 
364  // Sub boxes for wvtt text sample entry.
365  WebVTTConfigurationBox config;
366  WebVTTSourceLabelBox label;
367  // Optional MPEG4BitRateBox.
368 };
369 
371  DECLARE_BOX_METHODS(SampleDescription);
372 
373  TrackType type;
374  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
375  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
376  std::vector<VideoSampleEntry> video_entries;
377  std::vector<AudioSampleEntry> audio_entries;
378  std::vector<TextSampleEntry> text_entries;
379 };
380 
381 struct DecodingTime {
382  uint32_t sample_count;
383  uint32_t sample_delta;
384 };
385 
386 // stts.
388  DECLARE_BOX_METHODS(DecodingTimeToSample);
389 
390  std::vector<DecodingTime> decoding_time;
391 };
392 
394  uint32_t sample_count;
395  // If version == 0, sample_offset is uint32_t;
396  // If version == 1, sample_offset is int32_t.
397  // Use int64_t so both can be supported properly.
398  int64_t sample_offset;
399 };
400 
401 // ctts. Optional.
403  DECLARE_BOX_METHODS(CompositionTimeToSample);
404 
405  std::vector<CompositionOffset> composition_offset;
406 };
407 
408 struct ChunkInfo {
409  uint32_t first_chunk;
410  uint32_t samples_per_chunk;
411  uint32_t sample_description_index;
412 };
413 
414 // stsc.
416  DECLARE_BOX_METHODS(SampleToChunk);
417 
418  std::vector<ChunkInfo> chunk_info;
419 };
420 
421 // stsz.
422 struct SampleSize : FullBox {
423  DECLARE_BOX_METHODS(SampleSize);
424 
425  uint32_t sample_size;
426  uint32_t sample_count;
427  std::vector<uint32_t> sizes;
428 };
429 
430 // stz2.
432  DECLARE_BOX_METHODS(CompactSampleSize);
433 
434  uint8_t field_size;
435  std::vector<uint32_t> sizes;
436 };
437 
438 // co64.
440  DECLARE_BOX_METHODS(ChunkLargeOffset);
441 
442  std::vector<uint64_t> offsets;
443 };
444 
445 // stco.
447  DECLARE_BOX_METHODS(ChunkOffset);
448 };
449 
450 // stss. Optional.
451 struct SyncSample : FullBox {
452  DECLARE_BOX_METHODS(SyncSample);
453 
454  std::vector<uint32_t> sample_number;
455 };
456 
457 struct SampleTable : Box {
458  DECLARE_BOX_METHODS(SampleTable);
459 
460  SampleDescription description;
461  DecodingTimeToSample decoding_time_to_sample;
462  CompositionTimeToSample composition_time_to_sample;
463  SampleToChunk sample_to_chunk;
464  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
465  SampleSize sample_size;
466  // Either ChunkOffset or ChunkLargeOffset must present. Store in
467  // ChunkLargeOffset.
468  ChunkLargeOffset chunk_large_offset;
469  SyncSample sync_sample;
470 };
471 
473  DECLARE_BOX_METHODS(MediaHeader);
474 
475  uint64_t creation_time;
476  uint64_t modification_time;
477  uint32_t timescale;
478  uint64_t duration;
479  Language language;
480 };
481 
483  DECLARE_BOX_METHODS(VideoMediaHeader);
484 
485  uint16_t graphicsmode;
486  uint16_t opcolor_red;
487  uint16_t opcolor_green;
488  uint16_t opcolor_blue;
489 };
490 
492  DECLARE_BOX_METHODS(SoundMediaHeader);
493 
494  uint16_t balance;
495 };
496 
498  DECLARE_BOX_METHODS(SubtitleMediaHeader);
499 };
500 
502  DECLARE_BOX_METHODS(DataEntryUrl);
503 
504  std::vector<uint8_t> location;
505 };
506 
508  DECLARE_BOX_METHODS(DataReference);
509 
510  // data entry can be either url or urn box. Fix to url box for now.
511  std::vector<DataEntryUrl> data_entry;
512 };
513 
515  DECLARE_BOX_METHODS(DataInformation);
516 
517  DataReference dref;
518 };
519 
521  DECLARE_BOX_METHODS(MediaInformation);
522 
523  DataInformation dinf;
524  SampleTable sample_table;
525  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
526  VideoMediaHeader vmhd;
527  SoundMediaHeader smhd;
528  SubtitleMediaHeader sthd;
529 };
530 
531 struct Media : Box {
532  DECLARE_BOX_METHODS(Media);
533 
534  MediaHeader header;
535  HandlerReference handler;
536  MediaInformation information;
537 };
538 
539 struct Track : Box {
540  DECLARE_BOX_METHODS(Track);
541 
542  TrackHeader header;
543  Media media;
544  Edit edit;
545  SampleEncryption sample_encryption;
546 };
547 
549  DECLARE_BOX_METHODS(MovieExtendsHeader);
550 
551  uint64_t fragment_duration;
552 };
553 
555  DECLARE_BOX_METHODS(TrackExtends);
556 
557  uint32_t track_id;
558  uint32_t default_sample_description_index;
559  uint32_t default_sample_duration;
560  uint32_t default_sample_size;
561  uint32_t default_sample_flags;
562 };
563 
564 struct MovieExtends : Box {
565  DECLARE_BOX_METHODS(MovieExtends);
566 
567  MovieExtendsHeader header;
568  std::vector<TrackExtends> tracks;
569 };
570 
571 struct Movie : Box {
572  DECLARE_BOX_METHODS(Movie);
573 
574  MovieHeader header;
575  Metadata metadata; // Used to hold version information.
576  MovieExtends extends;
577  std::vector<Track> tracks;
578  std::vector<ProtectionSystemSpecificHeader> pssh;
579 };
580 
582  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
583 
584  uint64_t decode_time;
585 };
586 
588  DECLARE_BOX_METHODS(MovieFragmentHeader);
589 
590  uint32_t sequence_number;
591 };
592 
594  enum TrackFragmentFlagsMasks {
595  kBaseDataOffsetPresentMask = 0x000001,
596  kSampleDescriptionIndexPresentMask = 0x000002,
597  kDefaultSampleDurationPresentMask = 0x000008,
598  kDefaultSampleSizePresentMask = 0x000010,
599  kDefaultSampleFlagsPresentMask = 0x000020,
600  kDurationIsEmptyMask = 0x010000,
601  kDefaultBaseIsMoofMask = 0x020000,
602  };
603 
604  enum SampleFlagsMasks {
605  kReservedMask = 0xFC000000,
606  kSampleDependsOnMask = 0x03000000,
607  kSampleIsDependedOnMask = 0x00C00000,
608  kSampleHasRedundancyMask = 0x00300000,
609  kSamplePaddingValueMask = 0x000E0000,
610  kNonKeySampleMask = 0x00010000,
611  kSampleDegradationPriorityMask = 0x0000FFFF,
612  };
613 
614  DECLARE_BOX_METHODS(TrackFragmentHeader);
615 
616  uint32_t track_id;
617  uint32_t sample_description_index;
618  uint32_t default_sample_duration;
619  uint32_t default_sample_size;
620  uint32_t default_sample_flags;
621 };
622 
624  enum TrackFragmentFlagsMasks {
625  kDataOffsetPresentMask = 0x000001,
626  kFirstSampleFlagsPresentMask = 0x000004,
627  kSampleDurationPresentMask = 0x000100,
628  kSampleSizePresentMask = 0x000200,
629  kSampleFlagsPresentMask = 0x000400,
630  kSampleCompTimeOffsetsPresentMask = 0x000800,
631  };
632 
633  DECLARE_BOX_METHODS(TrackFragmentRun);
634 
635  uint32_t sample_count;
636  uint32_t data_offset;
637  std::vector<uint32_t> sample_flags;
638  std::vector<uint32_t> sample_sizes;
639  std::vector<uint32_t> sample_durations;
640  std::vector<int64_t> sample_composition_time_offsets;
641 };
642 
644  enum GroupDescriptionIndexBase {
645  kTrackGroupDescriptionIndexBase = 0,
646  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
647  };
648 
649  uint32_t sample_count;
650  uint32_t group_description_index;
651 };
652 
654  DECLARE_BOX_METHODS(SampleToGroup);
655 
656  uint32_t grouping_type;
657  uint32_t grouping_type_parameter; // Version 1 only.
658  std::vector<SampleToGroupEntry> entries;
659 };
660 
664 
665  bool is_encrypted;
666  uint8_t iv_size;
667  std::vector<uint8_t> key_id;
668 };
669 
671  DECLARE_BOX_METHODS(SampleGroupDescription);
672 
673  uint32_t grouping_type;
674  std::vector<CencSampleEncryptionInfoEntry> entries;
675 };
676 
677 struct TrackFragment : Box {
678  DECLARE_BOX_METHODS(TrackFragment);
679 
680  TrackFragmentHeader header;
681  std::vector<TrackFragmentRun> runs;
682  bool decode_time_absent;
683  TrackFragmentDecodeTime decode_time;
684  SampleToGroup sample_to_group;
685  SampleGroupDescription sample_group_description;
686  SampleAuxiliaryInformationSize auxiliary_size;
687  SampleAuxiliaryInformationOffset auxiliary_offset;
688  SampleEncryption sample_encryption;
689 };
690 
691 struct MovieFragment : Box {
692  DECLARE_BOX_METHODS(MovieFragment);
693 
694  MovieFragmentHeader header;
695  std::vector<TrackFragment> tracks;
696  std::vector<ProtectionSystemSpecificHeader> pssh;
697 };
698 
700  enum SAPType {
701  TypeUnknown = 0,
702  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
703  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
704  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
705  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
706  Type5 = 5, // T(ept) = T(dec) < T(sap)
707  Type6 = 6, // T(ept) < T(dec) < T(sap)
708  };
709 
710  bool reference_type;
711  uint32_t referenced_size;
712  uint32_t subsegment_duration;
713  bool starts_with_sap;
714  SAPType sap_type;
715  uint32_t sap_delta_time;
716  // We add this field to keep track of earliest_presentation_time in this
717  // subsegment. It is not part of SegmentReference.
718  uint64_t earliest_presentation_time;
719 };
720 
722  DECLARE_BOX_METHODS(SegmentIndex);
723 
724  uint32_t reference_id;
725  uint32_t timescale;
726  uint64_t earliest_presentation_time;
727  uint64_t first_offset;
728  std::vector<SegmentReference> references;
729 };
730 
731 // The actual data is parsed and written separately.
732 struct MediaData : Box {
733  DECLARE_BOX_METHODS(MediaData);
734 
735  uint32_t data_size;
736 };
737 
738 struct CueSourceIDBox : Box {
739  DECLARE_BOX_METHODS(CueSourceIDBox);
740  int32_t source_id;
741 };
742 
743 struct CueTimeBox : Box {
744  DECLARE_BOX_METHODS(CueTimeBox);
745  std::string cue_current_time;
746 };
747 
748 struct CueIDBox : Box {
749  DECLARE_BOX_METHODS(CueIDBox);
750  std::string cue_id;
751 };
752 
753 struct CueSettingsBox : Box {
754  DECLARE_BOX_METHODS(CueSettingsBox);
755  std::string settings;
756 };
757 
758 struct CuePayloadBox : Box {
759  DECLARE_BOX_METHODS(CuePayloadBox);
760  std::string cue_text;
761 };
762 
763 struct VTTEmptyCueBox : Box {
764  DECLARE_BOX_METHODS(VTTEmptyCueBox);
765 };
766 
768  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
769  std::string cue_additional_text;
770 };
771 
772 struct VTTCueBox : Box {
773  DECLARE_BOX_METHODS(VTTCueBox);
774 
775  CueSourceIDBox cue_source_id;
776  CueIDBox cue_id;
777  CueTimeBox cue_time;
778  CueSettingsBox cue_settings;
779  CuePayloadBox cue_payload;
780 };
781 
782 #undef DECLARE_BOX
783 
784 } // namespace mp4
785 } // namespace media
786 } // namespace edash_packager
787 
788 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
Implemented per http://id3.org/id3v2.4.0-frames.
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)