DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/base/fourccs.h"
12 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
13 #include "packager/media/formats/mp4/box.h"
14 #include "packager/media/formats/mp4/es_descriptor.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
86  bool ReadWrite(uint8_t iv_size,
87  bool has_subsamples,
88  BoxBuffer* buffer);
95  bool ParseFromBuffer(uint8_t iv_size,
96  bool has_subsamples,
97  BufferReader* reader);
99  uint32_t ComputeSize() const;
102  uint32_t GetTotalSizeOfSubsamples() const;
103 
104  std::vector<uint8_t> initialization_vector;
105  std::vector<SubsampleEntry> subsamples;
106 };
107 
109  enum SampleEncryptionFlags {
110  kUseSubsampleEncryption = 2,
111  };
112 
113  DECLARE_BOX_METHODS(SampleEncryption);
120  size_t iv_size,
121  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
122 
125  std::vector<uint8_t> sample_encryption_data;
126 
127  size_t iv_size;
128  std::vector<SampleEncryptionEntry> sample_encryption_entries;
129 };
130 
131 struct OriginalFormat : Box {
132  DECLARE_BOX_METHODS(OriginalFormat);
133 
134  FourCC format;
135 };
136 
137 struct SchemeType : FullBox {
138  DECLARE_BOX_METHODS(SchemeType);
139 
140  FourCC type;
141  uint32_t version;
142 };
143 
145  DECLARE_BOX_METHODS(TrackEncryption);
146 
147  uint8_t default_is_protected;
148  uint8_t default_per_sample_iv_size;
149  std::vector<uint8_t> default_kid;
150 
151  // For pattern-based encryption.
152  uint8_t default_crypt_byte_block;
153  uint8_t default_skip_byte_block;
154 
155  // Present only if
156  // |default_is_protected == 1 && default_per_sample_iv_size == 0|.
157  std::vector<uint8_t> default_constant_iv;
158 };
159 
160 struct SchemeInfo : Box {
161  DECLARE_BOX_METHODS(SchemeInfo);
162 
163  TrackEncryption track_encryption;
164 };
165 
167  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
168 
169  OriginalFormat format;
170  SchemeType type;
171  SchemeInfo info;
172 };
173 
175  DECLARE_BOX_METHODS(MovieHeader);
176 
177  uint64_t creation_time;
178  uint64_t modification_time;
179  uint32_t timescale;
180  uint64_t duration;
181  int32_t rate;
182  int16_t volume;
183  uint32_t next_track_id;
184 };
185 
187  enum TrackHeaderFlags {
188  kTrackEnabled = 0x000001,
189  kTrackInMovie = 0x000002,
190  kTrackInPreview = 0x000004,
191  };
192 
193  DECLARE_BOX_METHODS(TrackHeader);
194 
195  uint64_t creation_time;
196  uint64_t modification_time;
197  uint32_t track_id;
198  uint64_t duration;
199  int16_t layer;
200  int16_t alternate_group;
201  int16_t volume;
202  // width and height specify the track's visual presentation size as
203  // fixed-point 16.16 values.
204  uint32_t width;
205  uint32_t height;
206 };
207 
209  uint64_t segment_duration;
210  int64_t media_time;
211  int16_t media_rate_integer;
212  int16_t media_rate_fraction;
213 };
214 
215 struct EditList : FullBox {
216  DECLARE_BOX_METHODS(EditList);
217 
218  std::vector<EditListEntry> edits;
219 };
220 
221 struct Edit : Box {
222  DECLARE_BOX_METHODS(Edit);
223 
224  EditList list;
225 };
226 
228  DECLARE_BOX_METHODS(HandlerReference);
229 
230  FourCC handler_type;
231 };
232 
233 struct Language {
234  bool ReadWrite(BoxBuffer* buffer);
235  uint32_t ComputeSize() const;
236 
237  std::string code;
238 };
239 
241 struct PrivFrame {
242  bool ReadWrite(BoxBuffer* buffer);
243  uint32_t ComputeSize() const;
244 
245  std::string owner;
246  std::string value;
247 };
248 
251 struct ID3v2 : FullBox {
252  DECLARE_BOX_METHODS(ID3v2);
253 
254  Language language;
255 
258 };
259 
260 struct Metadata : FullBox {
261  DECLARE_BOX_METHODS(Metadata);
262 
263  HandlerReference handler;
264  ID3v2 id3v2;
265 };
266 
268  DECLARE_BOX_METHODS(CodecConfigurationRecord);
269 
270  FourCC box_type;
271  // Contains full codec configuration record, including possible extension
272  // boxes.
273  std::vector<uint8_t> data;
274 };
275 
277  DECLARE_BOX_METHODS(PixelAspectRatio);
278 
279  uint32_t h_spacing;
280  uint32_t v_spacing;
281 };
282 
284  DECLARE_BOX_METHODS(VideoSampleEntry);
285  // Returns actual format of this sample entry.
286  FourCC GetActualFormat() const {
287  return format == FOURCC_encv ? sinf.format.format : format;
288  }
289 
290  FourCC format;
291  uint16_t data_reference_index;
292  uint16_t width;
293  uint16_t height;
294 
295  PixelAspectRatio pixel_aspect;
297  CodecConfigurationRecord codec_config_record;
298 };
299 
301  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
302 
303  AACAudioSpecificConfig aac_audio_specific_config;
304  ESDescriptor es_descriptor;
305 };
306 
307 struct DTSSpecific : Box {
308  DECLARE_BOX_METHODS(DTSSpecific);
309 
310  uint32_t sampling_frequency;
311  uint32_t max_bitrate;
312  uint32_t avg_bitrate;
313  uint8_t pcm_sample_depth;
314  std::vector<uint8_t> extra_data;
315 };
316 
317 struct AC3Specific : Box {
318  DECLARE_BOX_METHODS(AC3Specific);
319 
320  std::vector<uint8_t> data;
321 };
322 
323 struct EC3Specific : Box {
324  DECLARE_BOX_METHODS(EC3Specific);
325 
326  std::vector<uint8_t> data;
327 };
328 
330  DECLARE_BOX_METHODS(AudioSampleEntry);
331  // Returns actual format of this sample entry.
332  FourCC GetActualFormat() const {
333  return format == FOURCC_enca ? sinf.format.format : format;
334  }
335 
336  FourCC format;
337  uint16_t data_reference_index;
338  uint16_t channelcount;
339  uint16_t samplesize;
340  uint32_t samplerate;
341 
343 
345  DTSSpecific ddts;
346  AC3Specific dac3;
347  EC3Specific dec3;
348 };
349 
351  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
352  std::string config;
353 };
354 
356  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
357  std::string source_label;
358 };
359 
361  DECLARE_BOX_METHODS(TextSampleEntry);
362 
363  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
364  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
365  // header.
366  FourCC format;
367  uint16_t data_reference_index;
368 
369  // Sub boxes for wvtt text sample entry.
370  WebVTTConfigurationBox config;
371  WebVTTSourceLabelBox label;
372  // Optional MPEG4BitRateBox.
373 };
374 
376  DECLARE_BOX_METHODS(SampleDescription);
377 
378  TrackType type;
379  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
380  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
381  std::vector<VideoSampleEntry> video_entries;
382  std::vector<AudioSampleEntry> audio_entries;
383  std::vector<TextSampleEntry> text_entries;
384 };
385 
386 struct DecodingTime {
387  uint32_t sample_count;
388  uint32_t sample_delta;
389 };
390 
391 // stts.
393  DECLARE_BOX_METHODS(DecodingTimeToSample);
394 
395  std::vector<DecodingTime> decoding_time;
396 };
397 
399  uint32_t sample_count;
400  // If version == 0, sample_offset is uint32_t;
401  // If version == 1, sample_offset is int32_t.
402  // Use int64_t so both can be supported properly.
403  int64_t sample_offset;
404 };
405 
406 // ctts. Optional.
408  DECLARE_BOX_METHODS(CompositionTimeToSample);
409 
410  std::vector<CompositionOffset> composition_offset;
411 };
412 
413 struct ChunkInfo {
414  uint32_t first_chunk;
415  uint32_t samples_per_chunk;
416  uint32_t sample_description_index;
417 };
418 
419 // stsc.
421  DECLARE_BOX_METHODS(SampleToChunk);
422 
423  std::vector<ChunkInfo> chunk_info;
424 };
425 
426 // stsz.
427 struct SampleSize : FullBox {
428  DECLARE_BOX_METHODS(SampleSize);
429 
430  uint32_t sample_size;
431  uint32_t sample_count;
432  std::vector<uint32_t> sizes;
433 };
434 
435 // stz2.
437  DECLARE_BOX_METHODS(CompactSampleSize);
438 
439  uint8_t field_size;
440  std::vector<uint32_t> sizes;
441 };
442 
443 // co64.
445  DECLARE_BOX_METHODS(ChunkLargeOffset);
446 
447  std::vector<uint64_t> offsets;
448 };
449 
450 // stco.
452  DECLARE_BOX_METHODS(ChunkOffset);
453 };
454 
455 // stss. Optional.
456 struct SyncSample : FullBox {
457  DECLARE_BOX_METHODS(SyncSample);
458 
459  std::vector<uint32_t> sample_number;
460 };
461 
462 struct SampleTable : Box {
463  DECLARE_BOX_METHODS(SampleTable);
464 
465  SampleDescription description;
466  DecodingTimeToSample decoding_time_to_sample;
467  CompositionTimeToSample composition_time_to_sample;
468  SampleToChunk sample_to_chunk;
469  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
470  SampleSize sample_size;
471  // Either ChunkOffset or ChunkLargeOffset must present. Store in
472  // ChunkLargeOffset.
473  ChunkLargeOffset chunk_large_offset;
474  SyncSample sync_sample;
475 };
476 
478  DECLARE_BOX_METHODS(MediaHeader);
479 
480  uint64_t creation_time;
481  uint64_t modification_time;
482  uint32_t timescale;
483  uint64_t duration;
484  Language language;
485 };
486 
488  DECLARE_BOX_METHODS(VideoMediaHeader);
489 
490  uint16_t graphicsmode;
491  uint16_t opcolor_red;
492  uint16_t opcolor_green;
493  uint16_t opcolor_blue;
494 };
495 
497  DECLARE_BOX_METHODS(SoundMediaHeader);
498 
499  uint16_t balance;
500 };
501 
503  DECLARE_BOX_METHODS(SubtitleMediaHeader);
504 };
505 
507  DECLARE_BOX_METHODS(DataEntryUrl);
508 
509  std::vector<uint8_t> location;
510 };
511 
513  DECLARE_BOX_METHODS(DataReference);
514 
515  // data entry can be either url or urn box. Fix to url box for now.
516  std::vector<DataEntryUrl> data_entry;
517 };
518 
520  DECLARE_BOX_METHODS(DataInformation);
521 
522  DataReference dref;
523 };
524 
526  DECLARE_BOX_METHODS(MediaInformation);
527 
528  DataInformation dinf;
529  SampleTable sample_table;
530  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
531  VideoMediaHeader vmhd;
532  SoundMediaHeader smhd;
533  SubtitleMediaHeader sthd;
534 };
535 
536 struct Media : Box {
537  DECLARE_BOX_METHODS(Media);
538 
539  MediaHeader header;
540  HandlerReference handler;
541  MediaInformation information;
542 };
543 
544 struct Track : Box {
545  DECLARE_BOX_METHODS(Track);
546 
547  TrackHeader header;
548  Media media;
549  Edit edit;
550  SampleEncryption sample_encryption;
551 };
552 
554  DECLARE_BOX_METHODS(MovieExtendsHeader);
555 
556  uint64_t fragment_duration;
557 };
558 
560  DECLARE_BOX_METHODS(TrackExtends);
561 
562  uint32_t track_id;
563  uint32_t default_sample_description_index;
564  uint32_t default_sample_duration;
565  uint32_t default_sample_size;
566  uint32_t default_sample_flags;
567 };
568 
569 struct MovieExtends : Box {
570  DECLARE_BOX_METHODS(MovieExtends);
571 
572  MovieExtendsHeader header;
573  std::vector<TrackExtends> tracks;
574 };
575 
576 struct Movie : Box {
577  DECLARE_BOX_METHODS(Movie);
578 
579  MovieHeader header;
580  Metadata metadata; // Used to hold version information.
581  MovieExtends extends;
582  std::vector<Track> tracks;
583  std::vector<ProtectionSystemSpecificHeader> pssh;
584 };
585 
587  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
588 
589  uint64_t decode_time;
590 };
591 
593  DECLARE_BOX_METHODS(MovieFragmentHeader);
594 
595  uint32_t sequence_number;
596 };
597 
599  enum TrackFragmentFlagsMasks {
600  kBaseDataOffsetPresentMask = 0x000001,
601  kSampleDescriptionIndexPresentMask = 0x000002,
602  kDefaultSampleDurationPresentMask = 0x000008,
603  kDefaultSampleSizePresentMask = 0x000010,
604  kDefaultSampleFlagsPresentMask = 0x000020,
605  kDurationIsEmptyMask = 0x010000,
606  kDefaultBaseIsMoofMask = 0x020000,
607  };
608 
609  enum SampleFlagsMasks {
610  kReservedMask = 0xFC000000,
611  kSampleDependsOnMask = 0x03000000,
612  kSampleIsDependedOnMask = 0x00C00000,
613  kSampleHasRedundancyMask = 0x00300000,
614  kSamplePaddingValueMask = 0x000E0000,
615  kNonKeySampleMask = 0x00010000,
616  kSampleDegradationPriorityMask = 0x0000FFFF,
617  };
618 
619  DECLARE_BOX_METHODS(TrackFragmentHeader);
620 
621  uint32_t track_id;
622  uint32_t sample_description_index;
623  uint32_t default_sample_duration;
624  uint32_t default_sample_size;
625  uint32_t default_sample_flags;
626 };
627 
629  enum TrackFragmentFlagsMasks {
630  kDataOffsetPresentMask = 0x000001,
631  kFirstSampleFlagsPresentMask = 0x000004,
632  kSampleDurationPresentMask = 0x000100,
633  kSampleSizePresentMask = 0x000200,
634  kSampleFlagsPresentMask = 0x000400,
635  kSampleCompTimeOffsetsPresentMask = 0x000800,
636  };
637 
638  DECLARE_BOX_METHODS(TrackFragmentRun);
639 
640  uint32_t sample_count;
641  uint32_t data_offset;
642  std::vector<uint32_t> sample_flags;
643  std::vector<uint32_t> sample_sizes;
644  std::vector<uint32_t> sample_durations;
645  std::vector<int64_t> sample_composition_time_offsets;
646 };
647 
649  enum GroupDescriptionIndexBase {
650  kTrackGroupDescriptionIndexBase = 0,
651  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
652  };
653 
654  uint32_t sample_count;
655  uint32_t group_description_index;
656 };
657 
659  DECLARE_BOX_METHODS(SampleToGroup);
660 
661  uint32_t grouping_type;
662  uint32_t grouping_type_parameter; // Version 1 only.
663  std::vector<SampleToGroupEntry> entries;
664 };
665 
669 
670  uint8_t is_protected;
671  uint8_t per_sample_iv_size;
672  std::vector<uint8_t> key_id;
673 
674  // For pattern-based encryption.
675  uint8_t crypt_byte_block;
676  uint8_t skip_byte_block;
677 
678  // Present only if |is_protected == 1 && per_sample_iv_size == 0|.
679  std::vector<uint8_t> constant_iv;
680 };
681 
683  DECLARE_BOX_METHODS(SampleGroupDescription);
684 
685  uint32_t grouping_type;
686  std::vector<CencSampleEncryptionInfoEntry> entries;
687 };
688 
689 struct TrackFragment : Box {
690  DECLARE_BOX_METHODS(TrackFragment);
691 
692  TrackFragmentHeader header;
693  std::vector<TrackFragmentRun> runs;
694  bool decode_time_absent;
695  TrackFragmentDecodeTime decode_time;
696  SampleToGroup sample_to_group;
697  SampleGroupDescription sample_group_description;
698  SampleAuxiliaryInformationSize auxiliary_size;
699  SampleAuxiliaryInformationOffset auxiliary_offset;
700  SampleEncryption sample_encryption;
701 };
702 
703 struct MovieFragment : Box {
704  DECLARE_BOX_METHODS(MovieFragment);
705 
706  MovieFragmentHeader header;
707  std::vector<TrackFragment> tracks;
708  std::vector<ProtectionSystemSpecificHeader> pssh;
709 };
710 
712  enum SAPType {
713  TypeUnknown = 0,
714  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
715  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
716  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
717  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
718  Type5 = 5, // T(ept) = T(dec) < T(sap)
719  Type6 = 6, // T(ept) < T(dec) < T(sap)
720  };
721 
722  bool reference_type;
723  uint32_t referenced_size;
724  uint32_t subsegment_duration;
725  bool starts_with_sap;
726  SAPType sap_type;
727  uint32_t sap_delta_time;
728  // We add this field to keep track of earliest_presentation_time in this
729  // subsegment. It is not part of SegmentReference.
730  uint64_t earliest_presentation_time;
731 };
732 
734  DECLARE_BOX_METHODS(SegmentIndex);
735 
736  uint32_t reference_id;
737  uint32_t timescale;
738  uint64_t earliest_presentation_time;
739  uint64_t first_offset;
740  std::vector<SegmentReference> references;
741 };
742 
743 // The actual data is parsed and written separately.
744 struct MediaData : Box {
745  DECLARE_BOX_METHODS(MediaData);
746 
747  uint32_t data_size;
748 };
749 
750 struct CueSourceIDBox : Box {
751  DECLARE_BOX_METHODS(CueSourceIDBox);
752  int32_t source_id;
753 };
754 
755 struct CueTimeBox : Box {
756  DECLARE_BOX_METHODS(CueTimeBox);
757  std::string cue_current_time;
758 };
759 
760 struct CueIDBox : Box {
761  DECLARE_BOX_METHODS(CueIDBox);
762  std::string cue_id;
763 };
764 
765 struct CueSettingsBox : Box {
766  DECLARE_BOX_METHODS(CueSettingsBox);
767  std::string settings;
768 };
769 
770 struct CuePayloadBox : Box {
771  DECLARE_BOX_METHODS(CuePayloadBox);
772  std::string cue_text;
773 };
774 
775 struct VTTEmptyCueBox : Box {
776  DECLARE_BOX_METHODS(VTTEmptyCueBox);
777 };
778 
780  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
781  std::string cue_additional_text;
782 };
783 
784 struct VTTCueBox : Box {
785  DECLARE_BOX_METHODS(VTTCueBox);
786 
787  CueSourceIDBox cue_source_id;
788  CueIDBox cue_id;
789  CueTimeBox cue_time;
790  CueSettingsBox cue_settings;
791  CuePayloadBox cue_payload;
792 };
793 
794 #undef DECLARE_BOX
795 
796 } // namespace mp4
797 } // namespace media
798 } // namespace edash_packager
799 
800 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
Implemented per http://id3.org/id3v2.4.0-frames.
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)