DASH Media Packaging SDK
 All Classes Namespaces Functions Variables Typedefs Enumerator
box_definitions.h
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
6 #define MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
7 
8 #include <vector>
9 
10 #include "packager/media/base/decrypt_config.h"
11 #include "packager/media/formats/mp4/aac_audio_specific_config.h"
12 #include "packager/media/formats/mp4/box.h"
13 #include "packager/media/formats/mp4/es_descriptor.h"
14 #include "packager/media/formats/mp4/fourccs.h"
15 
16 namespace edash_packager {
17 namespace media {
18 
19 class BufferReader;
20 
21 namespace mp4 {
22 
23 enum TrackType {
24  kInvalid = 0,
25  kVideo,
26  kAudio,
27  kHint,
28  kText,
29 };
30 
31 class BoxBuffer;
32 
33 #define DECLARE_BOX_METHODS(T) \
34  public: \
35  T(); \
36  ~T() override; \
37  FourCC BoxType() const override; \
38  \
39  private: \
40  bool ReadWriteInternal(BoxBuffer* buffer) override; \
41  uint32_t ComputeSizeInternal() override; \
42  \
43  public:
44 
45 struct FileType : Box {
46  DECLARE_BOX_METHODS(FileType);
47 
48  FourCC major_brand;
49  uint32_t minor_version;
50  std::vector<FourCC> compatible_brands;
51 };
52 
54  FourCC BoxType() const override;
55 };
56 
58  DECLARE_BOX_METHODS(ProtectionSystemSpecificHeader);
59 
60  std::vector<uint8_t> raw_box;
61 };
62 
64  DECLARE_BOX_METHODS(SampleAuxiliaryInformationOffset);
65 
66  std::vector<uint64_t> offsets;
67 };
68 
70  DECLARE_BOX_METHODS(SampleAuxiliaryInformationSize);
71 
72  uint8_t default_sample_info_size;
73  uint32_t sample_count;
74  std::vector<uint8_t> sample_info_sizes;
75 };
76 
86  bool ReadWrite(uint8_t iv_size,
87  bool has_subsamples,
88  BoxBuffer* buffer);
95  bool ParseFromBuffer(uint8_t iv_size,
96  bool has_subsamples,
97  BufferReader* reader);
99  uint32_t ComputeSize() const;
102  uint32_t GetTotalSizeOfSubsamples() const;
103 
104  std::vector<uint8_t> initialization_vector;
105  std::vector<SubsampleEntry> subsamples;
106 };
107 
109  enum SampleEncryptionFlags {
110  kUseSubsampleEncryption = 2,
111  };
112 
113  DECLARE_BOX_METHODS(SampleEncryption);
120  size_t iv_size,
121  std::vector<SampleEncryptionEntry>* sample_encryption_entries) const;
122 
125  std::vector<uint8_t> sample_encryption_data;
126 
127  size_t iv_size;
128  std::vector<SampleEncryptionEntry> sample_encryption_entries;
129 };
130 
131 struct OriginalFormat : Box {
132  DECLARE_BOX_METHODS(OriginalFormat);
133 
134  FourCC format;
135 };
136 
137 struct SchemeType : FullBox {
138  DECLARE_BOX_METHODS(SchemeType);
139 
140  FourCC type;
141  uint32_t version;
142 };
143 
145  DECLARE_BOX_METHODS(TrackEncryption);
146 
147  // Note: this definition is specific to the CENC protection type.
148  bool is_encrypted;
149  uint8_t default_iv_size;
150  std::vector<uint8_t> default_kid;
151 };
152 
153 struct SchemeInfo : Box {
154  DECLARE_BOX_METHODS(SchemeInfo);
155 
156  TrackEncryption track_encryption;
157 };
158 
160  DECLARE_BOX_METHODS(ProtectionSchemeInfo);
161 
162  OriginalFormat format;
163  SchemeType type;
164  SchemeInfo info;
165 };
166 
168  DECLARE_BOX_METHODS(MovieHeader);
169 
170  uint64_t creation_time;
171  uint64_t modification_time;
172  uint32_t timescale;
173  uint64_t duration;
174  int32_t rate;
175  int16_t volume;
176  uint32_t next_track_id;
177 };
178 
180  enum TrackHeaderFlags {
181  kTrackEnabled = 0x000001,
182  kTrackInMovie = 0x000002,
183  kTrackInPreview = 0x000004,
184  };
185 
186  DECLARE_BOX_METHODS(TrackHeader);
187 
188  uint64_t creation_time;
189  uint64_t modification_time;
190  uint32_t track_id;
191  uint64_t duration;
192  int16_t layer;
193  int16_t alternate_group;
194  int16_t volume;
195  // width and height specify the track's visual presentation size as
196  // fixed-point 16.16 values.
197  uint32_t width;
198  uint32_t height;
199 };
200 
202  uint64_t segment_duration;
203  int64_t media_time;
204  int16_t media_rate_integer;
205  int16_t media_rate_fraction;
206 };
207 
208 struct EditList : FullBox {
209  DECLARE_BOX_METHODS(EditList);
210 
211  std::vector<EditListEntry> edits;
212 };
213 
214 struct Edit : Box {
215  DECLARE_BOX_METHODS(Edit);
216 
217  EditList list;
218 };
219 
221  DECLARE_BOX_METHODS(HandlerReference);
222 
223  FourCC handler_type;
224 };
225 
226 struct Language {
227  bool ReadWrite(BoxBuffer* buffer);
228  uint32_t ComputeSize() const;
229 
230  std::string code;
231 };
232 
234 struct PrivFrame {
235  bool ReadWrite(BoxBuffer* buffer);
236  uint32_t ComputeSize() const;
237 
238  std::string owner;
239  std::string value;
240 };
241 
244 struct ID3v2 : FullBox {
245  DECLARE_BOX_METHODS(ID3v2);
246 
247  Language language;
248 
251 };
252 
253 struct Metadata : FullBox {
254  DECLARE_BOX_METHODS(Metadata);
255 
256  HandlerReference handler;
257  ID3v2 id3v2;
258 };
259 
261  DECLARE_BOX_METHODS(CodecConfigurationRecord);
262 
263  FourCC box_type;
264  // Contains full codec configuration record, including possible extension
265  // boxes.
266  std::vector<uint8_t> data;
267 };
268 
270  DECLARE_BOX_METHODS(PixelAspectRatio);
271 
272  uint32_t h_spacing;
273  uint32_t v_spacing;
274 };
275 
277  DECLARE_BOX_METHODS(VideoSampleEntry);
278  // Returns actual format of this sample entry.
279  FourCC GetActualFormat() const {
280  return format == FOURCC_ENCV ? sinf.format.format : format;
281  }
282 
283  FourCC format;
284  uint16_t data_reference_index;
285  uint16_t width;
286  uint16_t height;
287 
288  PixelAspectRatio pixel_aspect;
290  CodecConfigurationRecord codec_config_record;
291 };
292 
294  DECLARE_BOX_METHODS(ElementaryStreamDescriptor);
295 
296  AACAudioSpecificConfig aac_audio_specific_config;
297  ESDescriptor es_descriptor;
298 };
299 
300 struct DTSSpecific : Box {
301  DECLARE_BOX_METHODS(DTSSpecific);
302 
303  uint32_t sampling_frequency;
304  uint32_t max_bitrate;
305  uint32_t avg_bitrate;
306  uint8_t pcm_sample_depth;
307  std::vector<uint8_t> extra_data;
308 };
309 
310 struct AC3Specific : Box {
311  DECLARE_BOX_METHODS(AC3Specific);
312 
313  std::vector<uint8_t> data;
314 };
315 
316 struct EC3Specific : Box {
317  DECLARE_BOX_METHODS(EC3Specific);
318 
319  std::vector<uint8_t> data;
320 };
321 
323  DECLARE_BOX_METHODS(AudioSampleEntry);
324  // Returns actual format of this sample entry.
325  FourCC GetActualFormat() const {
326  return format == FOURCC_ENCA ? sinf.format.format : format;
327  }
328 
329  FourCC format;
330  uint16_t data_reference_index;
331  uint16_t channelcount;
332  uint16_t samplesize;
333  uint32_t samplerate;
334 
336 
338  DTSSpecific ddts;
339  AC3Specific dac3;
340  EC3Specific dec3;
341 };
342 
344  DECLARE_BOX_METHODS(WebVTTConfigurationBox);
345  std::string config;
346 };
347 
349  DECLARE_BOX_METHODS(WebVTTSourceLabelBox);
350  std::string source_label;
351 };
352 
354  DECLARE_BOX_METHODS(TextSampleEntry);
355 
356  // Specifies fourcc of this sample entry. It needs to be set on write, e.g.
357  // set to 'wvtt' to write WVTTSampleEntry; On read, it is recovered from box
358  // header.
359  FourCC format;
360  uint16_t data_reference_index;
361 
362  // Sub boxes for wvtt text sample entry.
363  WebVTTConfigurationBox config;
364  WebVTTSourceLabelBox label;
365  // Optional MPEG4BitRateBox.
366 };
367 
369  DECLARE_BOX_METHODS(SampleDescription);
370 
371  TrackType type;
372  // TODO(kqyang): Clean up the code to have one single member, e.g. by creating
373  // SampleEntry struct, std::vector<SampleEntry> sample_entries.
374  std::vector<VideoSampleEntry> video_entries;
375  std::vector<AudioSampleEntry> audio_entries;
376  std::vector<TextSampleEntry> text_entries;
377 };
378 
379 struct DecodingTime {
380  uint32_t sample_count;
381  uint32_t sample_delta;
382 };
383 
384 // stts.
386  DECLARE_BOX_METHODS(DecodingTimeToSample);
387 
388  std::vector<DecodingTime> decoding_time;
389 };
390 
392  uint32_t sample_count;
393  // If version == 0, sample_offset is uint32_t;
394  // If version == 1, sample_offset is int32_t.
395  // Use int64_t so both can be supported properly.
396  int64_t sample_offset;
397 };
398 
399 // ctts. Optional.
401  DECLARE_BOX_METHODS(CompositionTimeToSample);
402 
403  std::vector<CompositionOffset> composition_offset;
404 };
405 
406 struct ChunkInfo {
407  uint32_t first_chunk;
408  uint32_t samples_per_chunk;
409  uint32_t sample_description_index;
410 };
411 
412 // stsc.
414  DECLARE_BOX_METHODS(SampleToChunk);
415 
416  std::vector<ChunkInfo> chunk_info;
417 };
418 
419 // stsz.
420 struct SampleSize : FullBox {
421  DECLARE_BOX_METHODS(SampleSize);
422 
423  uint32_t sample_size;
424  uint32_t sample_count;
425  std::vector<uint32_t> sizes;
426 };
427 
428 // stz2.
430  DECLARE_BOX_METHODS(CompactSampleSize);
431 
432  uint8_t field_size;
433  std::vector<uint32_t> sizes;
434 };
435 
436 // co64.
438  DECLARE_BOX_METHODS(ChunkLargeOffset);
439 
440  std::vector<uint64_t> offsets;
441 };
442 
443 // stco.
445  DECLARE_BOX_METHODS(ChunkOffset);
446 };
447 
448 // stss. Optional.
449 struct SyncSample : FullBox {
450  DECLARE_BOX_METHODS(SyncSample);
451 
452  std::vector<uint32_t> sample_number;
453 };
454 
455 struct SampleTable : Box {
456  DECLARE_BOX_METHODS(SampleTable);
457 
458  SampleDescription description;
459  DecodingTimeToSample decoding_time_to_sample;
460  CompositionTimeToSample composition_time_to_sample;
461  SampleToChunk sample_to_chunk;
462  // Either SampleSize or CompactSampleSize must present. Store in SampleSize.
463  SampleSize sample_size;
464  // Either ChunkOffset or ChunkLargeOffset must present. Store in
465  // ChunkLargeOffset.
466  ChunkLargeOffset chunk_large_offset;
467  SyncSample sync_sample;
468 };
469 
471  DECLARE_BOX_METHODS(MediaHeader);
472 
473  uint64_t creation_time;
474  uint64_t modification_time;
475  uint32_t timescale;
476  uint64_t duration;
477  Language language;
478 };
479 
481  DECLARE_BOX_METHODS(VideoMediaHeader);
482 
483  uint16_t graphicsmode;
484  uint16_t opcolor_red;
485  uint16_t opcolor_green;
486  uint16_t opcolor_blue;
487 };
488 
490  DECLARE_BOX_METHODS(SoundMediaHeader);
491 
492  uint16_t balance;
493 };
494 
496  DECLARE_BOX_METHODS(SubtitleMediaHeader);
497 };
498 
500  DECLARE_BOX_METHODS(DataEntryUrl);
501 
502  std::vector<uint8_t> location;
503 };
504 
506  DECLARE_BOX_METHODS(DataReference);
507 
508  // data entry can be either url or urn box. Fix to url box for now.
509  std::vector<DataEntryUrl> data_entry;
510 };
511 
513  DECLARE_BOX_METHODS(DataInformation);
514 
515  DataReference dref;
516 };
517 
519  DECLARE_BOX_METHODS(MediaInformation);
520 
521  DataInformation dinf;
522  SampleTable sample_table;
523  // Exactly one specific meida header shall be present, vmhd, smhd, hmhd, nmhd.
524  VideoMediaHeader vmhd;
525  SoundMediaHeader smhd;
526  SubtitleMediaHeader sthd;
527 };
528 
529 struct Media : Box {
530  DECLARE_BOX_METHODS(Media);
531 
532  MediaHeader header;
533  HandlerReference handler;
534  MediaInformation information;
535 };
536 
537 struct Track : Box {
538  DECLARE_BOX_METHODS(Track);
539 
540  TrackHeader header;
541  Media media;
542  Edit edit;
543  SampleEncryption sample_encryption;
544 };
545 
547  DECLARE_BOX_METHODS(MovieExtendsHeader);
548 
549  uint64_t fragment_duration;
550 };
551 
553  DECLARE_BOX_METHODS(TrackExtends);
554 
555  uint32_t track_id;
556  uint32_t default_sample_description_index;
557  uint32_t default_sample_duration;
558  uint32_t default_sample_size;
559  uint32_t default_sample_flags;
560 };
561 
562 struct MovieExtends : Box {
563  DECLARE_BOX_METHODS(MovieExtends);
564 
565  MovieExtendsHeader header;
566  std::vector<TrackExtends> tracks;
567 };
568 
569 struct Movie : Box {
570  DECLARE_BOX_METHODS(Movie);
571 
572  MovieHeader header;
573  Metadata metadata; // Used to hold version information.
574  MovieExtends extends;
575  std::vector<Track> tracks;
576  std::vector<ProtectionSystemSpecificHeader> pssh;
577 };
578 
580  DECLARE_BOX_METHODS(TrackFragmentDecodeTime);
581 
582  uint64_t decode_time;
583 };
584 
586  DECLARE_BOX_METHODS(MovieFragmentHeader);
587 
588  uint32_t sequence_number;
589 };
590 
592  enum TrackFragmentFlagsMasks {
593  kBaseDataOffsetPresentMask = 0x000001,
594  kSampleDescriptionIndexPresentMask = 0x000002,
595  kDefaultSampleDurationPresentMask = 0x000008,
596  kDefaultSampleSizePresentMask = 0x000010,
597  kDefaultSampleFlagsPresentMask = 0x000020,
598  kDurationIsEmptyMask = 0x010000,
599  kDefaultBaseIsMoofMask = 0x020000,
600  };
601 
602  enum SampleFlagsMasks {
603  kReservedMask = 0xFC000000,
604  kSampleDependsOnMask = 0x03000000,
605  kSampleIsDependedOnMask = 0x00C00000,
606  kSampleHasRedundancyMask = 0x00300000,
607  kSamplePaddingValueMask = 0x000E0000,
608  kNonKeySampleMask = 0x00010000,
609  kSampleDegradationPriorityMask = 0x0000FFFF,
610  };
611 
612  DECLARE_BOX_METHODS(TrackFragmentHeader);
613 
614  uint32_t track_id;
615  uint32_t sample_description_index;
616  uint32_t default_sample_duration;
617  uint32_t default_sample_size;
618  uint32_t default_sample_flags;
619 };
620 
622  enum TrackFragmentFlagsMasks {
623  kDataOffsetPresentMask = 0x000001,
624  kFirstSampleFlagsPresentMask = 0x000004,
625  kSampleDurationPresentMask = 0x000100,
626  kSampleSizePresentMask = 0x000200,
627  kSampleFlagsPresentMask = 0x000400,
628  kSampleCompTimeOffsetsPresentMask = 0x000800,
629  };
630 
631  DECLARE_BOX_METHODS(TrackFragmentRun);
632 
633  uint32_t sample_count;
634  uint32_t data_offset;
635  std::vector<uint32_t> sample_flags;
636  std::vector<uint32_t> sample_sizes;
637  std::vector<uint32_t> sample_durations;
638  std::vector<int64_t> sample_composition_time_offsets;
639 };
640 
642  enum GroupDescriptionIndexBase {
643  kTrackGroupDescriptionIndexBase = 0,
644  kTrackFragmentGroupDescriptionIndexBase = 0x10000,
645  };
646 
647  uint32_t sample_count;
648  uint32_t group_description_index;
649 };
650 
652  DECLARE_BOX_METHODS(SampleToGroup);
653 
654  uint32_t grouping_type;
655  uint32_t grouping_type_parameter; // Version 1 only.
656  std::vector<SampleToGroupEntry> entries;
657 };
658 
662 
663  bool is_encrypted;
664  uint8_t iv_size;
665  std::vector<uint8_t> key_id;
666 };
667 
669  DECLARE_BOX_METHODS(SampleGroupDescription);
670 
671  uint32_t grouping_type;
672  std::vector<CencSampleEncryptionInfoEntry> entries;
673 };
674 
675 struct TrackFragment : Box {
676  DECLARE_BOX_METHODS(TrackFragment);
677 
678  TrackFragmentHeader header;
679  std::vector<TrackFragmentRun> runs;
680  bool decode_time_absent;
681  TrackFragmentDecodeTime decode_time;
682  SampleToGroup sample_to_group;
683  SampleGroupDescription sample_group_description;
684  SampleAuxiliaryInformationSize auxiliary_size;
685  SampleAuxiliaryInformationOffset auxiliary_offset;
686  SampleEncryption sample_encryption;
687 };
688 
689 struct MovieFragment : Box {
690  DECLARE_BOX_METHODS(MovieFragment);
691 
692  MovieFragmentHeader header;
693  std::vector<TrackFragment> tracks;
694  std::vector<ProtectionSystemSpecificHeader> pssh;
695 };
696 
698  enum SAPType {
699  TypeUnknown = 0,
700  Type1 = 1, // T(ept) = T(dec) = T(sap) = T(ptf)
701  Type2 = 2, // T(ept) = T(dec) = T(sap) < T(ptf)
702  Type3 = 3, // T(ept) < T(dec) = T(sap) <= T(ptf)
703  Type4 = 4, // T(ept) <= T(ptf) < T(dec) = T(sap)
704  Type5 = 5, // T(ept) = T(dec) < T(sap)
705  Type6 = 6, // T(ept) < T(dec) < T(sap)
706  };
707 
708  bool reference_type;
709  uint32_t referenced_size;
710  uint32_t subsegment_duration;
711  bool starts_with_sap;
712  SAPType sap_type;
713  uint32_t sap_delta_time;
714  // We add this field to keep track of earliest_presentation_time in this
715  // subsegment. It is not part of SegmentReference.
716  uint64_t earliest_presentation_time;
717 };
718 
720  DECLARE_BOX_METHODS(SegmentIndex);
721 
722  uint32_t reference_id;
723  uint32_t timescale;
724  uint64_t earliest_presentation_time;
725  uint64_t first_offset;
726  std::vector<SegmentReference> references;
727 };
728 
729 // The actual data is parsed and written separately.
730 struct MediaData : Box {
731  DECLARE_BOX_METHODS(MediaData);
732 
733  uint32_t data_size;
734 };
735 
736 struct CueSourceIDBox : Box {
737  DECLARE_BOX_METHODS(CueSourceIDBox);
738  int32_t source_id;
739 };
740 
741 struct CueTimeBox : Box {
742  DECLARE_BOX_METHODS(CueTimeBox);
743  std::string cue_current_time;
744 };
745 
746 struct CueIDBox : Box {
747  DECLARE_BOX_METHODS(CueIDBox);
748  std::string cue_id;
749 };
750 
751 struct CueSettingsBox : Box {
752  DECLARE_BOX_METHODS(CueSettingsBox);
753  std::string settings;
754 };
755 
756 struct CuePayloadBox : Box {
757  DECLARE_BOX_METHODS(CuePayloadBox);
758  std::string cue_text;
759 };
760 
761 struct VTTEmptyCueBox : Box {
762  DECLARE_BOX_METHODS(VTTEmptyCueBox);
763 };
764 
766  DECLARE_BOX_METHODS(VTTAdditionalTextBox);
767  std::string cue_additional_text;
768 };
769 
770 struct VTTCueBox : Box {
771  DECLARE_BOX_METHODS(VTTCueBox);
772 
773  CueSourceIDBox cue_source_id;
774  CueIDBox cue_id;
775  CueTimeBox cue_time;
776  CueSettingsBox cue_settings;
777  CuePayloadBox cue_payload;
778 };
779 
780 #undef DECLARE_BOX
781 
782 } // namespace mp4
783 } // namespace media
784 } // namespace edash_packager
785 
786 #endif // MEDIA_FORMATS_MP4_BOX_DEFINITIONS_H_
Implemented per http://id3.org/id3v2.4.0-frames.
bool ParseFromSampleEncryptionData(size_t iv_size, std::vector< SampleEncryptionEntry > *sample_encryption_entries) const
PrivFrame private_frame
We only support PrivateFrame in ID3. Other frames are ignored.
bool ParseFromBuffer(uint8_t iv_size, bool has_subsamples, BufferReader *reader)
bool ReadWrite(uint8_t iv_size, bool has_subsamples, BoxBuffer *buffer)