DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/base/fourccs.h"
12 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
13 #include "packager/media/formats/mp4/box.h"
14 #include "packager/media/formats/mp4/es_descriptor.h"
15 
16 namespace shaka {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
86  bool ReadWrite(uint8_t iv_size,
87  bool has_subsamples,
88  BoxBuffer* buffer);
95  bool ParseFromBuffer(uint8_t iv_size,
96  bool has_subsamples,
97  BufferReader* reader);
99  uint32_t ComputeSize() const;
102  uint32_t GetTotalSizeOfSubsamples() const;
103 
104  std::vector<uint8_t> initialization_vector;
105  std::vector<SubsampleEntry> subsamples;
106 };
107 
109  enum SampleEncryptionFlags {
110  kUseSubsampleEncryption = 2,
111  };
112 
113  DECLARE_BOX_METHODS(SampleEncryption);
120  size_t iv_size,
121  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
122 
125  std::vector<uint8_t> sample_encryption_data;
126 
127  size_t iv_size;
128  std::vector<SampleEncryptionEntry> sample_encryption_entries;
129 };
130 
131 struct OriginalFormat : Box {
132  DECLARE_BOX_METHODS(OriginalFormat);
133 
134  FourCC format;
135 };
136 
137 struct SchemeType : FullBox {
138  DECLARE_BOX_METHODS(SchemeType);
139 
140  FourCC type;
141  uint32_t version;
142 };
143 
145  DECLARE_BOX_METHODS(TrackEncryption);
146 
147  uint8_t default_is_protected;
148  uint8_t default_per_sample_iv_size;
149  std::vector<uint8_t> default_kid;
150 
151  // For pattern-based encryption.
152  uint8_t default_crypt_byte_block;
153  uint8_t default_skip_byte_block;
154 
155  // Present only if
156  // |default_is_protected == 1 && default_per_sample_iv_size == 0|.
157  std::vector<uint8_t> default_constant_iv;
158 };
159 
160 struct SchemeInfo : Box {
161  DECLARE_BOX_METHODS(SchemeInfo);
162 
163  TrackEncryption track_encryption;
164 };
165 
167  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
168 
169  OriginalFormat format;
170  SchemeType type;
171  SchemeInfo info;
172 };
173 
175  DECLARE_BOX_METHODS(MovieHeader);
176 
177  uint64_t creation_time;
178  uint64_t modification_time;
179  uint32_t timescale;
180  uint64_t duration;
181  int32_t rate;
182  int16_t volume;
183  uint32_t next_track_id;
184 };
185 
187  enum TrackHeaderFlags {
188  kTrackEnabled = 0x000001,
189  kTrackInMovie = 0x000002,
190  kTrackInPreview = 0x000004,
191  };
192 
193  DECLARE_BOX_METHODS(TrackHeader);
194 
195  uint64_t creation_time;
196  uint64_t modification_time;
197  uint32_t track_id;
198  uint64_t duration;
199  int16_t layer;
200  int16_t alternate_group;
201  int16_t volume;
202  // width and height specify the track's visual presentation size as
203  // fixed-point 16.16 values.
204  uint32_t width;
205  uint32_t height;
206 };
207 
209  uint64_t segment_duration;
210  int64_t media_time;
211  int16_t media_rate_integer;
212  int16_t media_rate_fraction;
213 };
214 
215 struct EditList : FullBox {
216  DECLARE_BOX_METHODS(EditList);
217 
218  std::vector<EditListEntry> edits;
219 };
220 
221 struct Edit : Box {
222  DECLARE_BOX_METHODS(Edit);
223 
224  EditList list;
225 };
226 
228  DECLARE_BOX_METHODS(HandlerReference);
229 
230  FourCC handler_type;
231 };
232 
233 struct Language {
234  bool ReadWrite(BoxBuffer* buffer);
235  uint32_t ComputeSize() const;
236 
237  std::string code;
238 };
239 
241 struct PrivFrame {
242  bool ReadWrite(BoxBuffer* buffer);
243  uint32_t ComputeSize() const;
244 
245  std::string owner;
246  std::string value;
247 };
248 
251 struct ID3v2 : FullBox {
252  DECLARE_BOX_METHODS(ID3v2);
253 
254  Language language;
255 
258 };
259 
260 struct Metadata : FullBox {
261  DECLARE_BOX_METHODS(Metadata);
262 
263  HandlerReference handler;
264  ID3v2 id3v2;
265 };
266 
268  DECLARE_BOX_METHODS(CodecConfigurationRecord);
269 
270  FourCC box_type;
271  // Contains full codec configuration record, including possible extension
272  // boxes.
273  std::vector<uint8_t> data;
274 };
275 
277  DECLARE_BOX_METHODS(PixelAspectRatio);
278 
279  uint32_t h_spacing;
280  uint32_t v_spacing;
281 };
282 
284  DECLARE_BOX_METHODS(VideoSampleEntry);
285  // Returns actual format of this sample entry.
286  FourCC GetActualFormat() const {
287  return format == FOURCC_encv ? sinf.format.format : format;
288  }
289 
290  FourCC format;
291  uint16_t data_reference_index;
292  uint16_t width;
293  uint16_t height;
294 
295  PixelAspectRatio pixel_aspect;
297  CodecConfigurationRecord codec_config_record;
298 };
299 
301  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
302 
303  AACAudioSpecificConfig aac_audio_specific_config;
304  ESDescriptor es_descriptor;
305 };
306 
307 struct DTSSpecific : Box {
308  DECLARE_BOX_METHODS(DTSSpecific);
309 
310  uint32_t sampling_frequency;
311  uint32_t max_bitrate;
312  uint32_t avg_bitrate;
313  uint8_t pcm_sample_depth;
314  std::vector<uint8_t> extra_data;
315 };
316 
317 struct AC3Specific : Box {
318  DECLARE_BOX_METHODS(AC3Specific);
319 
320  std::vector<uint8_t> data;
321 };
322 
323 struct EC3Specific : Box {
324  DECLARE_BOX_METHODS(EC3Specific);
325 
326  std::vector<uint8_t> data;
327 };
328 
329 struct OpusSpecific : Box {
330  DECLARE_BOX_METHODS(OpusSpecific);
331 
332  std::vector<uint8_t> opus_identification_header;
333  // The number of priming samples. Extracted from |opus_identification_header|.
334  uint16_t preskip;
335 };
336 
338  DECLARE_BOX_METHODS(AudioSampleEntry);
339  // Returns actual format of this sample entry.
340  FourCC GetActualFormat() const {
341  return format == FOURCC_enca ? sinf.format.format : format;
342  }
343 
344  FourCC format;
345  uint16_t data_reference_index;
346  uint16_t channelcount;
347  uint16_t samplesize;
348  uint32_t samplerate;
349 
351 
353  DTSSpecific ddts;
354  AC3Specific dac3;
355  EC3Specific dec3;
356  OpusSpecific dops;
357 };
358 
360  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
361  std::string config;
362 };
363 
365  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
366  std::string source_label;
367 };
368 
370  DECLARE_BOX_METHODS(TextSampleEntry);
371 
372  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
373  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
374  // header.
375  FourCC format;
376  uint16_t data_reference_index;
377 
378  // Sub boxes for wvtt text sample entry.
379  WebVTTConfigurationBox config;
380  WebVTTSourceLabelBox label;
381  // Optional MPEG4BitRateBox.
382 };
383 
385  DECLARE_BOX_METHODS(SampleDescription);
386 
387  TrackType type;
388  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
389  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
390  std::vector<VideoSampleEntry> video_entries;
391  std::vector<AudioSampleEntry> audio_entries;
392  std::vector<TextSampleEntry> text_entries;
393 };
394 
395 struct DecodingTime {
396  uint32_t sample_count;
397  uint32_t sample_delta;
398 };
399 
400 // stts.
402  DECLARE_BOX_METHODS(DecodingTimeToSample);
403 
404  std::vector<DecodingTime> decoding_time;
405 };
406 
408  uint32_t sample_count;
409  // If version == 0, sample_offset is uint32_t;
410  // If version == 1, sample_offset is int32_t.
411  // Use int64_t so both can be supported properly.
412  int64_t sample_offset;
413 };
414 
415 // ctts. Optional.
417  DECLARE_BOX_METHODS(CompositionTimeToSample);
418 
419  std::vector<CompositionOffset> composition_offset;
420 };
421 
422 struct ChunkInfo {
423  uint32_t first_chunk;
424  uint32_t samples_per_chunk;
425  uint32_t sample_description_index;
426 };
427 
428 // stsc.
430  DECLARE_BOX_METHODS(SampleToChunk);
431 
432  std::vector<ChunkInfo> chunk_info;
433 };
434 
435 // stsz.
436 struct SampleSize : FullBox {
437  DECLARE_BOX_METHODS(SampleSize);
438 
439  uint32_t sample_size;
440  uint32_t sample_count;
441  std::vector<uint32_t> sizes;
442 };
443 
444 // stz2.
446  DECLARE_BOX_METHODS(CompactSampleSize);
447 
448  uint8_t field_size;
449  std::vector<uint32_t> sizes;
450 };
451 
452 // co64.
454  DECLARE_BOX_METHODS(ChunkLargeOffset);
455 
456  std::vector<uint64_t> offsets;
457 };
458 
459 // stco.
461  DECLARE_BOX_METHODS(ChunkOffset);
462 };
463 
464 // stss. Optional.
465 struct SyncSample : FullBox {
466  DECLARE_BOX_METHODS(SyncSample);
467 
468  std::vector<uint32_t> sample_number;
469 };
470 
474 
475  bool ReadWrite(BoxBuffer* buffer);
476  uint32_t ComputeSize() const;
477 
478  uint8_t is_protected;
479  uint8_t per_sample_iv_size;
480  std::vector<uint8_t> key_id;
481 
482  // For pattern-based encryption.
483  uint8_t crypt_byte_block;
484  uint8_t skip_byte_block;
485 
486  // Present only if |is_protected == 1 && per_sample_iv_size == 0|.
487  std::vector<uint8_t> constant_iv;
488 };
489 
493 
494  bool ReadWrite(BoxBuffer* buffer);
495  uint32_t ComputeSize() const;
496 
497  int16_t roll_distance;
498 };
499 
501  DECLARE_BOX_METHODS(SampleGroupDescription);
502 
503  template <typename T>
504  bool ReadWriteEntries(BoxBuffer* buffer, std::vector<T>* entries);
505 
506  uint32_t grouping_type;
507  // Only present if grouping_type == 'seig'.
508  std::vector<CencSampleEncryptionInfoEntry>
509  cenc_sample_encryption_info_entries;
510  // Only present if grouping_type == 'roll'.
511  std::vector<AudioRollRecoveryEntry> audio_roll_recovery_entries;
512 };
513 
515  enum GroupDescriptionIndexBase {
516  kTrackGroupDescriptionIndexBase = 0,
517  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
518  };
519 
520  uint32_t sample_count;
521  uint32_t group_description_index;
522 };
523 
525  DECLARE_BOX_METHODS(SampleToGroup);
526 
527  uint32_t grouping_type;
528  uint32_t grouping_type_parameter; // Version 1 only.
529  std::vector<SampleToGroupEntry> entries;
530 };
531 
532 struct SampleTable : Box {
533  DECLARE_BOX_METHODS(SampleTable);
534 
535  SampleDescription description;
536  DecodingTimeToSample decoding_time_to_sample;
537  CompositionTimeToSample composition_time_to_sample;
538  SampleToChunk sample_to_chunk;
539  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
540  SampleSize sample_size;
541  // Either ChunkOffset or ChunkLargeOffset must present. Store in
542  // ChunkLargeOffset.
543  ChunkLargeOffset chunk_large_offset;
544  SyncSample sync_sample;
545  std::vector<SampleGroupDescription> sample_group_descriptions;
546  std::vector<SampleToGroup> sample_to_groups;
547 };
548 
550  DECLARE_BOX_METHODS(MediaHeader);
551 
552  uint64_t creation_time;
553  uint64_t modification_time;
554  uint32_t timescale;
555  uint64_t duration;
556  Language language;
557 };
558 
560  DECLARE_BOX_METHODS(VideoMediaHeader);
561 
562  uint16_t graphicsmode;
563  uint16_t opcolor_red;
564  uint16_t opcolor_green;
565  uint16_t opcolor_blue;
566 };
567 
569  DECLARE_BOX_METHODS(SoundMediaHeader);
570 
571  uint16_t balance;
572 };
573 
575  DECLARE_BOX_METHODS(SubtitleMediaHeader);
576 };
577 
579  DECLARE_BOX_METHODS(DataEntryUrl);
580 
581  std::vector<uint8_t> location;
582 };
583 
585  DECLARE_BOX_METHODS(DataReference);
586 
587  // data entry can be either url or urn box. Fix to url box for now.
588  std::vector<DataEntryUrl> data_entry;
589 };
590 
592  DECLARE_BOX_METHODS(DataInformation);
593 
594  DataReference dref;
595 };
596 
598  DECLARE_BOX_METHODS(MediaInformation);
599 
600  DataInformation dinf;
601  SampleTable sample_table;
602  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
603  VideoMediaHeader vmhd;
604  SoundMediaHeader smhd;
605  SubtitleMediaHeader sthd;
606 };
607 
608 struct Media : Box {
609  DECLARE_BOX_METHODS(Media);
610 
611  MediaHeader header;
612  HandlerReference handler;
613  MediaInformation information;
614 };
615 
616 struct Track : Box {
617  DECLARE_BOX_METHODS(Track);
618 
619  TrackHeader header;
620  Media media;
621  Edit edit;
622  SampleEncryption sample_encryption;
623 };
624 
626  DECLARE_BOX_METHODS(MovieExtendsHeader);
627 
628  uint64_t fragment_duration;
629 };
630 
632  DECLARE_BOX_METHODS(TrackExtends);
633 
634  uint32_t track_id;
635  uint32_t default_sample_description_index;
636  uint32_t default_sample_duration;
637  uint32_t default_sample_size;
638  uint32_t default_sample_flags;
639 };
640 
641 struct MovieExtends : Box {
642  DECLARE_BOX_METHODS(MovieExtends);
643 
644  MovieExtendsHeader header;
645  std::vector<TrackExtends> tracks;
646 };
647 
648 struct Movie : Box {
649  DECLARE_BOX_METHODS(Movie);
650 
651  MovieHeader header;
652  Metadata metadata; // Used to hold version information.
653  MovieExtends extends;
654  std::vector<Track> tracks;
655  std::vector<ProtectionSystemSpecificHeader> pssh;
656 };
657 
659  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
660 
661  uint64_t decode_time;
662 };
663 
665  DECLARE_BOX_METHODS(MovieFragmentHeader);
666 
667  uint32_t sequence_number;
668 };
669 
671  enum TrackFragmentFlagsMasks {
672  kBaseDataOffsetPresentMask = 0x000001,
673  kSampleDescriptionIndexPresentMask = 0x000002,
674  kDefaultSampleDurationPresentMask = 0x000008,
675  kDefaultSampleSizePresentMask = 0x000010,
676  kDefaultSampleFlagsPresentMask = 0x000020,
677  kDurationIsEmptyMask = 0x010000,
678  kDefaultBaseIsMoofMask = 0x020000,
679  };
680 
681  enum SampleFlagsMasks {
682  kReservedMask = 0xFC000000,
683  kSampleDependsOnMask = 0x03000000,
684  kSampleIsDependedOnMask = 0x00C00000,
685  kSampleHasRedundancyMask = 0x00300000,
686  kSamplePaddingValueMask = 0x000E0000,
687  kNonKeySampleMask = 0x00010000,
688  kSampleDegradationPriorityMask = 0x0000FFFF,
689  };
690 
691  DECLARE_BOX_METHODS(TrackFragmentHeader);
692 
693  uint32_t track_id;
694  uint32_t sample_description_index;
695  uint32_t default_sample_duration;
696  uint32_t default_sample_size;
697  uint32_t default_sample_flags;
698 };
699 
701  enum TrackFragmentFlagsMasks {
702  kDataOffsetPresentMask = 0x000001,
703  kFirstSampleFlagsPresentMask = 0x000004,
704  kSampleDurationPresentMask = 0x000100,
705  kSampleSizePresentMask = 0x000200,
706  kSampleFlagsPresentMask = 0x000400,
707  kSampleCompTimeOffsetsPresentMask = 0x000800,
708  };
709 
710  DECLARE_BOX_METHODS(TrackFragmentRun);
711 
712  uint32_t sample_count;
713  uint32_t data_offset;
714  std::vector<uint32_t> sample_flags;
715  std::vector<uint32_t> sample_sizes;
716  std::vector<uint32_t> sample_durations;
717  std::vector<int64_t> sample_composition_time_offsets;
718 };
719 
720 struct TrackFragment : Box {
721  DECLARE_BOX_METHODS(TrackFragment);
722 
723  TrackFragmentHeader header;
724  std::vector<TrackFragmentRun> runs;
725  bool decode_time_absent;
726  TrackFragmentDecodeTime decode_time;
727  std::vector<SampleGroupDescription> sample_group_descriptions;
728  std::vector<SampleToGroup> sample_to_groups;
729  SampleAuxiliaryInformationSize auxiliary_size;
730  SampleAuxiliaryInformationOffset auxiliary_offset;
731  SampleEncryption sample_encryption;
732 };
733 
734 struct MovieFragment : Box {
735  DECLARE_BOX_METHODS(MovieFragment);
736 
737  MovieFragmentHeader header;
738  std::vector<TrackFragment> tracks;
739  std::vector<ProtectionSystemSpecificHeader> pssh;
740 };
741 
743  enum SAPType {
744  TypeUnknown = 0,
745  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
746  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
747  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
748  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
749  Type5 = 5, // T(ept) = T(dec) < T(sap)
750  Type6 = 6, // T(ept) < T(dec) < T(sap)
751  };
752 
753  bool reference_type;
754  uint32_t referenced_size;
755  uint32_t subsegment_duration;
756  bool starts_with_sap;
757  SAPType sap_type;
758  uint32_t sap_delta_time;
759  // We add this field to keep track of earliest_presentation_time in this
760  // subsegment. It is not part of SegmentReference.
761  uint64_t earliest_presentation_time;
762 };
763 
765  DECLARE_BOX_METHODS(SegmentIndex);
766 
767  uint32_t reference_id;
768  uint32_t timescale;
769  uint64_t earliest_presentation_time;
770  uint64_t first_offset;
771  std::vector<SegmentReference> references;
772 };
773 
774 // The actual data is parsed and written separately.
775 struct MediaData : Box {
776  DECLARE_BOX_METHODS(MediaData);
777 
778  uint32_t data_size;
779 };
780 
781 struct CueSourceIDBox : Box {
782  DECLARE_BOX_METHODS(CueSourceIDBox);
783  int32_t source_id;
784 };
785 
786 struct CueTimeBox : Box {
787  DECLARE_BOX_METHODS(CueTimeBox);
788  std::string cue_current_time;
789 };
790 
791 struct CueIDBox : Box {
792  DECLARE_BOX_METHODS(CueIDBox);
793  std::string cue_id;
794 };
795 
796 struct CueSettingsBox : Box {
797  DECLARE_BOX_METHODS(CueSettingsBox);
798  std::string settings;
799 };
800 
801 struct CuePayloadBox : Box {
802  DECLARE_BOX_METHODS(CuePayloadBox);
803  std::string cue_text;
804 };
805 
806 struct VTTEmptyCueBox : Box {
807  DECLARE_BOX_METHODS(VTTEmptyCueBox);
808 };
809 
811  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
812  std::string cue_additional_text;
813 };
814 
815 struct VTTCueBox : Box {
816  DECLARE_BOX_METHODS(VTTCueBox);
817 
818  CueSourceIDBox cue_source_id;
819  CueIDBox cue_id;
820  CueTimeBox cue_time;
821  CueSettingsBox cue_settings;
822  CuePayloadBox cue_payload;
823 };
824 
825 #undef DECLARE_BOX
826 
827 } // namespace mp4
828 } // namespace media
829 } // namespace shaka
830 
831 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
Implemented per http://id3.org/id3v2.4.0-frames.
FourCC BoxType() const override
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)
std::vector< uint8_t > sample_encryption_data
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.