Shaka Packager SDK
webm_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/webm/webm_parser.h"
6 
7 // This file contains code to parse WebM file elements. It was created
8 // from information in the Matroska spec.
9 // http://www.matroska.org/technical/specs/index.html
10 // This file contains code for encrypted WebM. Current WebM
11 // encrypted request for comments specification is here
12 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
13 
14 #include <limits>
15 
16 #include "packager/base/logging.h"
17 #include "packager/base/numerics/safe_conversions.h"
18 #include "packager/media/formats/webm/webm_constants.h"
19 
20 namespace shaka {
21 namespace media {
22 
23 enum ElementType {
24  UNKNOWN,
25  LIST, // Referred to as Master Element in the Matroska spec.
26  UINT,
27  FLOAT,
28  BINARY,
29  STRING,
30  SKIP,
31 };
32 
33 struct ElementIdInfo {
34  ElementType type_;
35  int id_;
36 };
37 
38 struct ListElementInfo {
39  int id_;
40  int level_;
41  const ElementIdInfo* id_info_;
42  int id_info_count_;
43 };
44 
45 // The following are tables indicating what IDs are valid sub-elements
46 // of particular elements. If an element is encountered that doesn't
47 // appear in the list, a parsing error is signalled. Some elements are
48 // marked as SKIP because they are valid, but we don't care about them
49 // right now.
50 static const ElementIdInfo kEBMLHeaderIds[] = {
51  {UINT, kWebMIdEBMLVersion},
52  {UINT, kWebMIdEBMLReadVersion},
53  {UINT, kWebMIdEBMLMaxIDLength},
54  {UINT, kWebMIdEBMLMaxSizeLength},
55  {STRING, kWebMIdDocType},
56  {UINT, kWebMIdDocTypeVersion},
57  {UINT, kWebMIdDocTypeReadVersion},
58 };
59 
60 static const ElementIdInfo kSegmentIds[] = {
61  {LIST, kWebMIdSeekHead},
62  {LIST, kWebMIdInfo},
63  {LIST, kWebMIdCluster},
64  {LIST, kWebMIdTracks},
65  {LIST, kWebMIdCues},
66  {LIST, kWebMIdAttachments},
67  {LIST, kWebMIdChapters},
68  {LIST, kWebMIdTags},
69 };
70 
71 static const ElementIdInfo kSeekHeadIds[] = {
72  {LIST, kWebMIdSeek},
73 };
74 
75 static const ElementIdInfo kSeekIds[] = {
76  {BINARY, kWebMIdSeekID},
77  {UINT, kWebMIdSeekPosition},
78 };
79 
80 static const ElementIdInfo kInfoIds[] = {
81  {BINARY, kWebMIdSegmentUID},
82  {STRING, kWebMIdSegmentFilename},
83  {BINARY, kWebMIdPrevUID},
84  {STRING, kWebMIdPrevFilename},
85  {BINARY, kWebMIdNextUID},
86  {STRING, kWebMIdNextFilename},
87  {BINARY, kWebMIdSegmentFamily},
88  {LIST, kWebMIdChapterTranslate},
89  {UINT, kWebMIdTimecodeScale},
90  {FLOAT, kWebMIdDuration},
91  {BINARY, kWebMIdDateUTC},
92  {STRING, kWebMIdTitle},
93  {STRING, kWebMIdMuxingApp},
94  {STRING, kWebMIdWritingApp},
95 };
96 
97 static const ElementIdInfo kChapterTranslateIds[] = {
98  {UINT, kWebMIdChapterTranslateEditionUID},
99  {UINT, kWebMIdChapterTranslateCodec},
100  {BINARY, kWebMIdChapterTranslateID},
101 };
102 
103 static const ElementIdInfo kClusterIds[] = {
104  {BINARY, kWebMIdSimpleBlock},
105  {UINT, kWebMIdTimecode},
106  {LIST, kWebMIdSilentTracks},
107  {UINT, kWebMIdPosition},
108  {UINT, kWebMIdPrevSize},
109  {LIST, kWebMIdBlockGroup},
110 };
111 
112 static const ElementIdInfo kSilentTracksIds[] = {
113  {UINT, kWebMIdSilentTrackNumber},
114 };
115 
116 static const ElementIdInfo kBlockGroupIds[] = {
117  {BINARY, kWebMIdBlock},
118  {LIST, kWebMIdBlockAdditions},
119  {UINT, kWebMIdBlockDuration},
120  {UINT, kWebMIdReferencePriority},
121  {BINARY, kWebMIdReferenceBlock},
122  {BINARY, kWebMIdCodecState},
123  {BINARY, kWebMIdDiscardPadding},
124  {LIST, kWebMIdSlices},
125 };
126 
127 static const ElementIdInfo kBlockAdditionsIds[] = {
128  {LIST, kWebMIdBlockMore},
129 };
130 
131 static const ElementIdInfo kBlockMoreIds[] = {
132  {UINT, kWebMIdBlockAddID},
133  {BINARY, kWebMIdBlockAdditional},
134 };
135 
136 static const ElementIdInfo kSlicesIds[] = {
137  {LIST, kWebMIdTimeSlice},
138 };
139 
140 static const ElementIdInfo kTimeSliceIds[] = {
141  {UINT, kWebMIdLaceNumber},
142 };
143 
144 static const ElementIdInfo kTracksIds[] = {
145  {LIST, kWebMIdTrackEntry},
146 };
147 
148 static const ElementIdInfo kTrackEntryIds[] = {
149  {UINT, kWebMIdTrackNumber},
150  {BINARY, kWebMIdTrackUID},
151  {UINT, kWebMIdTrackType},
152  {UINT, kWebMIdFlagEnabled},
153  {UINT, kWebMIdFlagDefault},
154  {UINT, kWebMIdFlagForced},
155  {UINT, kWebMIdFlagLacing},
156  {UINT, kWebMIdMinCache},
157  {UINT, kWebMIdMaxCache},
158  {UINT, kWebMIdDefaultDuration},
159  {FLOAT, kWebMIdTrackTimecodeScale},
160  {UINT, kWebMIdMaxBlockAdditionId},
161  {STRING, kWebMIdName},
162  {STRING, kWebMIdLanguage},
163  {STRING, kWebMIdCodecID},
164  {BINARY, kWebMIdCodecPrivate},
165  {STRING, kWebMIdCodecName},
166  {UINT, kWebMIdAttachmentLink},
167  {UINT, kWebMIdCodecDecodeAll},
168  {UINT, kWebMIdTrackOverlay},
169  {UINT, kWebMIdCodecDelay},
170  {UINT, kWebMIdSeekPreRoll},
171  {LIST, kWebMIdTrackTranslate},
172  {LIST, kWebMIdVideo},
173  {LIST, kWebMIdAudio},
174  {LIST, kWebMIdTrackOperation},
175  {LIST, kWebMIdContentEncodings},
176 };
177 
178 static const ElementIdInfo kTrackTranslateIds[] = {
179  {UINT, kWebMIdTrackTranslateEditionUID},
180  {UINT, kWebMIdTrackTranslateCodec},
181  {BINARY, kWebMIdTrackTranslateTrackID},
182 };
183 
184 static const ElementIdInfo kVideoIds[] = {
185  {UINT, kWebMIdFlagInterlaced},
186  {UINT, kWebMIdStereoMode},
187  {UINT, kWebMIdAlphaMode},
188  {UINT, kWebMIdPixelWidth},
189  {UINT, kWebMIdPixelHeight},
190  {UINT, kWebMIdPixelCropBottom},
191  {UINT, kWebMIdPixelCropTop},
192  {UINT, kWebMIdPixelCropLeft},
193  {UINT, kWebMIdPixelCropRight},
194  {UINT, kWebMIdDisplayWidth},
195  {UINT, kWebMIdDisplayHeight},
196  {UINT, kWebMIdDisplayUnit},
197  {UINT, kWebMIdAspectRatioType},
198  {BINARY, kWebMIdColorSpace},
199  {FLOAT, kWebMIdFrameRate},
200  {LIST, kWebMIdColor},
201  {LIST, kWebMIdProjection},
202 };
203 
204 static const ElementIdInfo kColorIds[] = {
205  {UINT, kWebMIdColorMatrixCoefficients},
206  {UINT, kWebMIdColorBitsPerChannel},
207  {UINT, kWebMIdColorChromaSubsamplingHorz},
208  {UINT, kWebMIdColorChromaSubsamplingVert},
209  {UINT, kWebMIdColorCbSamplingHorz},
210  {UINT, kWebMIdColorCbSamplingVert},
211  {UINT, kWebMIdColorChromaSitingHorz},
212  {UINT, kWebMIdColorChromaSitingVert},
213  {UINT, kWebMIdColorRange},
214  {UINT, kWebMIdColorTransferCharacteristics},
215  {UINT, kWebMIdColorPrimaries},
216  {UINT, kWebMIdColorMaxCLL},
217  {UINT, kWebMIdColorMaxFALL},
218  {LIST, kWebMIdColorMasteringMetadata},
219 };
220 
221 static const ElementIdInfo kProjectionIds[] = {
222  {UINT, kWebMIdProjectionType},
223 };
224 
225 static const ElementIdInfo kAudioIds[] = {
226  {FLOAT, kWebMIdSamplingFrequency},
227  {FLOAT, kWebMIdOutputSamplingFrequency},
228  {UINT, kWebMIdChannels},
229  {UINT, kWebMIdBitDepth},
230 };
231 
232 static const ElementIdInfo kTrackOperationIds[] = {
233  {LIST, kWebMIdTrackCombinePlanes},
234  {LIST, kWebMIdJoinBlocks},
235 };
236 
237 static const ElementIdInfo kTrackCombinePlanesIds[] = {
238  {LIST, kWebMIdTrackPlane},
239 };
240 
241 static const ElementIdInfo kTrackPlaneIds[] = {
242  {UINT, kWebMIdTrackPlaneUID},
243  {UINT, kWebMIdTrackPlaneType},
244 };
245 
246 static const ElementIdInfo kJoinBlocksIds[] = {
247  {UINT, kWebMIdTrackJoinUID},
248 };
249 
250 static const ElementIdInfo kContentEncodingsIds[] = {
251  {LIST, kWebMIdContentEncoding},
252 };
253 
254 static const ElementIdInfo kContentEncodingIds[] = {
255  {UINT, kWebMIdContentEncodingOrder},
256  {UINT, kWebMIdContentEncodingScope},
257  {UINT, kWebMIdContentEncodingType},
258  {LIST, kWebMIdContentCompression},
259  {LIST, kWebMIdContentEncryption},
260 };
261 
262 static const ElementIdInfo kContentCompressionIds[] = {
263  {UINT, kWebMIdContentCompAlgo},
264  {BINARY, kWebMIdContentCompSettings},
265 };
266 
267 static const ElementIdInfo kContentEncryptionIds[] = {
268  {LIST, kWebMIdContentEncAESSettings},
269  {UINT, kWebMIdContentEncAlgo},
270  {BINARY, kWebMIdContentEncKeyID},
271  {BINARY, kWebMIdContentSignature},
272  {BINARY, kWebMIdContentSigKeyID},
273  {UINT, kWebMIdContentSigAlgo},
274  {UINT, kWebMIdContentSigHashAlgo},
275 };
276 
277 static const ElementIdInfo kContentEncAESSettingsIds[] = {
278  {UINT, kWebMIdAESSettingsCipherMode},
279 };
280 
281 static const ElementIdInfo kCuesIds[] = {
282  {LIST, kWebMIdCuePoint},
283 };
284 
285 static const ElementIdInfo kCuePointIds[] = {
286  {UINT, kWebMIdCueTime},
287  {LIST, kWebMIdCueTrackPositions},
288 };
289 
290 static const ElementIdInfo kCueTrackPositionsIds[] = {
291  {UINT, kWebMIdCueTrack},
292  {UINT, kWebMIdCueClusterPosition},
293  {UINT, kWebMIdCueBlockNumber},
294  {UINT, kWebMIdCueCodecState},
295  {LIST, kWebMIdCueReference},
296 };
297 
298 static const ElementIdInfo kCueReferenceIds[] = {
299  {UINT, kWebMIdCueRefTime},
300 };
301 
302 static const ElementIdInfo kAttachmentsIds[] = {
303  {LIST, kWebMIdAttachedFile},
304 };
305 
306 static const ElementIdInfo kAttachedFileIds[] = {
307  {STRING, kWebMIdFileDescription},
308  {STRING, kWebMIdFileName},
309  {STRING, kWebMIdFileMimeType},
310  {BINARY, kWebMIdFileData},
311  {UINT, kWebMIdFileUID},
312 };
313 
314 static const ElementIdInfo kChaptersIds[] = {
315  {LIST, kWebMIdEditionEntry},
316 };
317 
318 static const ElementIdInfo kEditionEntryIds[] = {
319  {UINT, kWebMIdEditionUID},
320  {UINT, kWebMIdEditionFlagHidden},
321  {UINT, kWebMIdEditionFlagDefault},
322  {UINT, kWebMIdEditionFlagOrdered},
323  {LIST, kWebMIdChapterAtom},
324 };
325 
326 static const ElementIdInfo kChapterAtomIds[] = {
327  {UINT, kWebMIdChapterUID},
328  {UINT, kWebMIdChapterTimeStart},
329  {UINT, kWebMIdChapterTimeEnd},
330  {UINT, kWebMIdChapterFlagHidden},
331  {UINT, kWebMIdChapterFlagEnabled},
332  {BINARY, kWebMIdChapterSegmentUID},
333  {UINT, kWebMIdChapterSegmentEditionUID},
334  {UINT, kWebMIdChapterPhysicalEquiv},
335  {LIST, kWebMIdChapterTrack},
336  {LIST, kWebMIdChapterDisplay},
337  {LIST, kWebMIdChapProcess},
338 };
339 
340 static const ElementIdInfo kChapterTrackIds[] = {
341  {UINT, kWebMIdChapterTrackNumber},
342 };
343 
344 static const ElementIdInfo kChapterDisplayIds[] = {
345  {STRING, kWebMIdChapString},
346  {STRING, kWebMIdChapLanguage},
347  {STRING, kWebMIdChapCountry},
348 };
349 
350 static const ElementIdInfo kChapProcessIds[] = {
351  {UINT, kWebMIdChapProcessCodecID},
352  {BINARY, kWebMIdChapProcessPrivate},
353  {LIST, kWebMIdChapProcessCommand},
354 };
355 
356 static const ElementIdInfo kChapProcessCommandIds[] = {
357  {UINT, kWebMIdChapProcessTime},
358  {BINARY, kWebMIdChapProcessData},
359 };
360 
361 static const ElementIdInfo kTagsIds[] = {
362  {LIST, kWebMIdTag},
363 };
364 
365 static const ElementIdInfo kTagIds[] = {
366  {LIST, kWebMIdTargets},
367  {LIST, kWebMIdSimpleTag},
368 };
369 
370 static const ElementIdInfo kTargetsIds[] = {
371  {UINT, kWebMIdTargetTypeValue},
372  {STRING, kWebMIdTargetType},
373  {UINT, kWebMIdTagTrackUID},
374  {UINT, kWebMIdTagEditionUID},
375  {UINT, kWebMIdTagChapterUID},
376  {UINT, kWebMIdTagAttachmentUID},
377 };
378 
379 static const ElementIdInfo kSimpleTagIds[] = {
380  {STRING, kWebMIdTagName},
381  {STRING, kWebMIdTagLanguage},
382  {UINT, kWebMIdTagDefault},
383  {STRING, kWebMIdTagString},
384  {BINARY, kWebMIdTagBinary},
385 };
386 
387 #define LIST_ELEMENT_INFO(id, level, id_info) \
388  { (id), (level), (id_info), arraysize(id_info) }
389 
390 static const ListElementInfo kListElementInfo[] = {
391  LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
392  LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
393  LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
394  LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
395  LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
396  LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
397  LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
398  LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
399  LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
400  LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
401  LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
402  LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
403  LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
404  LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
405  LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
406  LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
407  LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
408  LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds),
409  LIST_ELEMENT_INFO(kWebMIdProjection, 4, kProjectionIds),
410  LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
411  LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
412  LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
413  LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
414  LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
415  LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
416  LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
417  LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
418  LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
419  LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds),
420  LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
421  LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
422  LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
423  LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
424  LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
425  LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
426  LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
427  LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
428  LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
429  LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
430  LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
431  LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
432  LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
433  LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
434  LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
435  LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
436  LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
437 };
438 
439 // Parses an element header id or size field. These fields are variable length
440 // encoded. The first byte indicates how many bytes the field occupies.
441 // |buf| - The buffer to parse.
442 // |size| - The number of bytes in |buf|
443 // |max_bytes| - The maximum number of bytes the field can be. ID fields
444 // set this to 4 & element size fields set this to 8. If the
445 // first byte indicates a larger field size than this it is a
446 // parser error.
447 // |mask_first_byte| - For element size fields the field length encoding bits
448 // need to be masked off. This parameter is true for
449 // element size fields and is false for ID field values.
450 //
451 // Returns: The number of bytes parsed on success. -1 on error.
452 static int ParseWebMElementHeaderField(const uint8_t* buf,
453  int size,
454  int max_bytes,
455  bool mask_first_byte,
456  int64_t* num) {
457  DCHECK(buf);
458  DCHECK(num);
459 
460  if (size < 0)
461  return -1;
462 
463  if (size == 0)
464  return 0;
465 
466  int mask = 0x80;
467  uint8_t ch = buf[0];
468  int extra_bytes = -1;
469  bool all_ones = false;
470  for (int i = 0; i < max_bytes; ++i) {
471  if ((ch & mask) != 0) {
472  mask = ~mask & 0xff;
473  *num = mask_first_byte ? ch & mask : ch;
474  all_ones = (ch & mask) == mask;
475  extra_bytes = i;
476  break;
477  }
478  mask = 0x80 | mask >> 1;
479  }
480 
481  if (extra_bytes == -1)
482  return -1;
483 
484  // Return 0 if we need more data.
485  if ((1 + extra_bytes) > size)
486  return 0;
487 
488  int bytes_used = 1;
489 
490  for (int i = 0; i < extra_bytes; ++i) {
491  ch = buf[bytes_used++];
492  all_ones &= (ch == 0xff);
493  *num = (*num << 8) | ch;
494  }
495 
496  if (all_ones)
497  *num = std::numeric_limits<int64_t>::max();
498 
499  return bytes_used;
500 }
501 
502 int WebMParseElementHeader(const uint8_t* buf,
503  int size,
504  int* id,
505  int64_t* element_size) {
506  DCHECK(buf);
507  DCHECK_GE(size, 0);
508  DCHECK(id);
509  DCHECK(element_size);
510 
511  if (size == 0)
512  return 0;
513 
514  int64_t tmp = 0;
515  int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
516 
517  if (num_id_bytes <= 0)
518  return num_id_bytes;
519 
520  if (tmp == std::numeric_limits<int64_t>::max())
521  tmp = kWebMReservedId;
522 
523  *id = static_cast<int>(tmp);
524 
525  int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
526  size - num_id_bytes,
527  8, true, &tmp);
528 
529  if (num_size_bytes <= 0)
530  return num_size_bytes;
531 
532  if (tmp == std::numeric_limits<int64_t>::max())
533  tmp = kWebMUnknownSize;
534 
535  *element_size = tmp;
536  DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
537  << " size " << *element_size;
538  return num_id_bytes + num_size_bytes;
539 }
540 
541 // Finds ElementType for a specific ID.
542 static ElementType FindIdType(int id,
543  const ElementIdInfo* id_info,
544  int id_info_count) {
545 
546  // Check for global element IDs that can be anywhere.
547  if (id == kWebMIdVoid || id == kWebMIdCRC32)
548  return SKIP;
549 
550  for (int i = 0; i < id_info_count; ++i) {
551  if (id == id_info[i].id_)
552  return id_info[i].type_;
553  }
554 
555  return UNKNOWN;
556 }
557 
558 // Finds ListElementInfo for a specific ID.
559 static const ListElementInfo* FindListInfo(int id) {
560  for (size_t i = 0; i < arraysize(kListElementInfo); ++i) {
561  if (id == kListElementInfo[i].id_)
562  return &kListElementInfo[i];
563  }
564 
565  return NULL;
566 }
567 
568 static int FindListLevel(int id) {
569  const ListElementInfo* list_info = FindListInfo(id);
570  if (list_info)
571  return list_info->level_;
572 
573  return -1;
574 }
575 
576 static int ParseUInt(const uint8_t* buf,
577  int size,
578  int id,
579  WebMParserClient* client) {
580  if ((size <= 0) || (size > 8))
581  return -1;
582 
583  // Read in the big-endian integer.
584  uint64_t value = 0;
585  for (int i = 0; i < size; ++i)
586  value = (value << 8) | buf[i];
587 
588  // We use int64_t in place of uint64_t everywhere for convenience. See this
589  // bug
590  // for more details: http://crbug.com/366750#c3
591  if (!base::IsValueInRangeForNumericType<int64_t>(value))
592  return -1;
593 
594  if (!client->OnUInt(id, value))
595  return -1;
596 
597  return size;
598 }
599 
600 static int ParseFloat(const uint8_t* buf,
601  int size,
602  int id,
603  WebMParserClient* client) {
604  if ((size != 4) && (size != 8))
605  return -1;
606 
607  double value = -1;
608 
609  // Read the bytes from big-endian form into a native endian integer.
610  int64_t tmp = 0;
611  for (int i = 0; i < size; ++i)
612  tmp = (tmp << 8) | buf[i];
613 
614  // Use a union to convert the integer bit pattern into a floating point
615  // number.
616  if (size == 4) {
617  union {
618  int32_t src;
619  float dst;
620  } tmp2;
621  tmp2.src = static_cast<int32_t>(tmp);
622  value = tmp2.dst;
623  } else if (size == 8) {
624  union {
625  int64_t src;
626  double dst;
627  } tmp2;
628  tmp2.src = tmp;
629  value = tmp2.dst;
630  } else {
631  return -1;
632  }
633 
634  if (!client->OnFloat(id, value))
635  return -1;
636 
637  return size;
638 }
639 
640 static int ParseBinary(const uint8_t* buf,
641  int size,
642  int id,
643  WebMParserClient* client) {
644  return client->OnBinary(id, buf, size) ? size : -1;
645 }
646 
647 static int ParseString(const uint8_t* buf,
648  int size,
649  int id,
650  WebMParserClient* client) {
651  const uint8_t* end = static_cast<const uint8_t*>(memchr(buf, '\0', size));
652  int length = (end != NULL) ? static_cast<int>(end - buf) : size;
653  std::string str(reinterpret_cast<const char*>(buf), length);
654  return client->OnString(id, str) ? size : -1;
655 }
656 
657 static int ParseNonListElement(ElementType type,
658  int id,
659  int64_t element_size,
660  const uint8_t* buf,
661  int size,
662  WebMParserClient* client) {
663  DCHECK_GE(size, element_size);
664 
665  int result = -1;
666  switch(type) {
667  case LIST:
668  NOTIMPLEMENTED();
669  result = -1;
670  break;
671  case UINT:
672  result = ParseUInt(buf, element_size, id, client);
673  break;
674  case FLOAT:
675  result = ParseFloat(buf, element_size, id, client);
676  break;
677  case BINARY:
678  result = ParseBinary(buf, element_size, id, client);
679  break;
680  case STRING:
681  result = ParseString(buf, element_size, id, client);
682  break;
683  case SKIP:
684  result = element_size;
685  break;
686  default:
687  DVLOG(1) << "Unhandled ID type " << type;
688  return -1;
689  };
690 
691  DCHECK_LE(result, size);
692  return result;
693 }
694 
695 WebMParserClient::WebMParserClient() {}
696 WebMParserClient::~WebMParserClient() {}
697 
698 WebMParserClient* WebMParserClient::OnListStart(int id) {
699  DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
700  return NULL;
701 }
702 
703 bool WebMParserClient::OnListEnd(int id) {
704  DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
705  return false;
706 }
707 
708 bool WebMParserClient::OnUInt(int id, int64_t val) {
709  DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
710  return false;
711 }
712 
713 bool WebMParserClient::OnFloat(int id, double val) {
714  DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
715  return false;
716 }
717 
718 bool WebMParserClient::OnBinary(int id, const uint8_t* data, int size) {
719  DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
720  return false;
721 }
722 
723 bool WebMParserClient::OnString(int id, const std::string& str) {
724  DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
725  return false;
726 }
727 
729  : state_(NEED_LIST_HEADER),
730  root_id_(id),
731  root_level_(FindListLevel(id)),
732  root_client_(client) {
733  DCHECK_GE(root_level_, 0);
734  DCHECK(client);
735 }
736 
737 WebMListParser::~WebMListParser() {}
738 
740  ChangeState(NEED_LIST_HEADER);
741  list_state_stack_.clear();
742 }
743 
744 int WebMListParser::Parse(const uint8_t* buf, int size) {
745  DCHECK(buf);
746 
747  if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
748  return -1;
749 
750  if (size == 0)
751  return 0;
752 
753  const uint8_t* cur = buf;
754  int cur_size = size;
755  int bytes_parsed = 0;
756 
757  while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
758  int element_id = 0;
759  int64_t element_size = 0;
760  int result = WebMParseElementHeader(cur, cur_size, &element_id,
761  &element_size);
762 
763  if (result < 0)
764  return result;
765 
766  if (result == 0)
767  return bytes_parsed;
768 
769  switch(state_) {
770  case NEED_LIST_HEADER: {
771  if (element_id != root_id_) {
772  ChangeState(PARSE_ERROR);
773  return -1;
774  }
775 
776  // Only allow Segment & Cluster to have an unknown size.
777  if (element_size == kWebMUnknownSize &&
778  (element_id != kWebMIdSegment) &&
779  (element_id != kWebMIdCluster)) {
780  ChangeState(PARSE_ERROR);
781  return -1;
782  }
783 
784  ChangeState(INSIDE_LIST);
785  if (!OnListStart(root_id_, element_size))
786  return -1;
787 
788  break;
789  }
790 
791  case INSIDE_LIST: {
792  int header_size = result;
793  const uint8_t* element_data = cur + header_size;
794  int element_data_size = cur_size - header_size;
795 
796  if (element_size < element_data_size)
797  element_data_size = element_size;
798 
799  result = ParseListElement(header_size, element_id, element_size,
800  element_data, element_data_size);
801 
802  DCHECK_LE(result, header_size + element_data_size);
803  if (result < 0) {
804  ChangeState(PARSE_ERROR);
805  return -1;
806  }
807 
808  if (result == 0)
809  return bytes_parsed;
810 
811  break;
812  }
813  case DONE_PARSING_LIST:
814  case PARSE_ERROR:
815  // Shouldn't be able to get here.
816  NOTIMPLEMENTED();
817  break;
818  }
819 
820  cur += result;
821  cur_size -= result;
822  bytes_parsed += result;
823  }
824 
825  return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
826 }
827 
829  return state_ == DONE_PARSING_LIST;
830 }
831 
832 void WebMListParser::ChangeState(State new_state) {
833  state_ = new_state;
834 }
835 
836 int WebMListParser::ParseListElement(int header_size,
837  int id,
838  int64_t element_size,
839  const uint8_t* data,
840  int size) {
841  DCHECK_GT(list_state_stack_.size(), 0u);
842 
843  ListState& list_state = list_state_stack_.back();
844  DCHECK(list_state.element_info_);
845 
846  const ListElementInfo* element_info = list_state.element_info_;
847  ElementType id_type =
848  FindIdType(id, element_info->id_info_, element_info->id_info_count_);
849 
850  // Unexpected ID.
851  if (id_type == UNKNOWN) {
852  if (list_state.size_ != kWebMUnknownSize ||
853  !IsSiblingOrAncestor(list_state.id_, id)) {
854  DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
855  return -1;
856  }
857 
858  // We've reached the end of a list of unknown size. Update the size now that
859  // we know it and dispatch the end of list calls.
860  list_state.size_ = list_state.bytes_parsed_;
861 
862  if (!OnListEnd())
863  return -1;
864 
865  // Check to see if all open lists have ended.
866  if (list_state_stack_.size() == 0)
867  return 0;
868 
869  list_state = list_state_stack_.back();
870  }
871 
872  // Make sure the whole element can fit inside the current list.
873  int64_t total_element_size = header_size + element_size;
874  if (list_state.size_ != kWebMUnknownSize &&
875  list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
876  return -1;
877  }
878 
879  if (id_type == LIST) {
880  list_state.bytes_parsed_ += header_size;
881 
882  if (!OnListStart(id, element_size))
883  return -1;
884  return header_size;
885  }
886 
887  // Make sure we have the entire element before trying to parse a non-list
888  // element.
889  if (size < element_size)
890  return 0;
891 
892  int bytes_parsed = ParseNonListElement(id_type, id, element_size,
893  data, size, list_state.client_);
894  DCHECK_LE(bytes_parsed, size);
895 
896  // Return if an error occurred or we need more data.
897  // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
898  // need to check the element_size to disambiguate the "need more data" case
899  // from a successful parse.
900  if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
901  return bytes_parsed;
902 
903  int result = header_size + bytes_parsed;
904  list_state.bytes_parsed_ += result;
905 
906  // See if we have reached the end of the current list.
907  if (list_state.bytes_parsed_ == list_state.size_) {
908  if (!OnListEnd())
909  return -1;
910  }
911 
912  return result;
913 }
914 
915 bool WebMListParser::OnListStart(int id, int64_t size) {
916  const ListElementInfo* element_info = FindListInfo(id);
917  if (!element_info)
918  return false;
919 
920  int current_level =
921  root_level_ + static_cast<int>(list_state_stack_.size()) - 1;
922  if (current_level + 1 != element_info->level_)
923  return false;
924 
925  WebMParserClient* current_list_client = NULL;
926  if (!list_state_stack_.empty()) {
927  // Make sure the new list doesn't go past the end of the current list.
928  ListState current_list_state = list_state_stack_.back();
929  if (current_list_state.size_ != kWebMUnknownSize &&
930  current_list_state.size_ < current_list_state.bytes_parsed_ + size)
931  return false;
932  current_list_client = current_list_state.client_;
933  } else {
934  current_list_client = root_client_;
935  }
936 
937  WebMParserClient* new_list_client = current_list_client->OnListStart(id);
938  if (!new_list_client)
939  return false;
940 
941  ListState new_list_state = { id, size, 0, element_info, new_list_client };
942  list_state_stack_.push_back(new_list_state);
943 
944  if (size == 0)
945  return OnListEnd();
946 
947  return true;
948 }
949 
950 bool WebMListParser::OnListEnd() {
951  int lists_ended = 0;
952  for (; !list_state_stack_.empty(); ++lists_ended) {
953  const ListState& list_state = list_state_stack_.back();
954  int64_t bytes_parsed = list_state.bytes_parsed_;
955  int id = list_state.id_;
956 
957  if (bytes_parsed != list_state.size_)
958  break;
959 
960  list_state_stack_.pop_back();
961 
962  WebMParserClient* client = NULL;
963  if (!list_state_stack_.empty()) {
964  // Update the bytes_parsed_ for the parent element.
965  list_state_stack_.back().bytes_parsed_ += bytes_parsed;
966  client = list_state_stack_.back().client_;
967  } else {
968  client = root_client_;
969  }
970 
971  if (!client->OnListEnd(id))
972  return false;
973  }
974 
975  DCHECK_GE(lists_ended, 1);
976 
977  if (list_state_stack_.empty())
978  ChangeState(DONE_PARSING_LIST);
979 
980  return true;
981 }
982 
983 bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
984  DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
985 
986  if (id_a == kWebMIdCluster) {
987  // kWebMIdCluster siblings.
988  for (size_t i = 0; i < arraysize(kSegmentIds); i++) {
989  if (kSegmentIds[i].id_ == id_b)
990  return true;
991  }
992  }
993 
994  // kWebMIdSegment siblings.
995  return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
996 }
997 
998 } // namespace media
999 } // namespace shaka
shaka::media::WebMListParser::IsParsingComplete
bool IsParsingComplete() const
Definition: webm_parser.cc:828
shaka
All the methods that are virtual are virtual for mocking.
Definition: gflags_hex_bytes.cc:11
shaka::media::WebMParserClient
Definition: webm_parser.h:30
shaka::media::WebMListParser::Reset
void Reset()
Resets the state of the parser so it can start parsing a new list.
Definition: webm_parser.cc:739
shaka::media::WebMListParser::WebMListParser
WebMListParser(int id, WebMParserClient *client)
Definition: webm_parser.cc:728
shaka::media::WebMListParser::Parse
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:744