Shaka Packager SDK
webm_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "packager/media/formats/webm/webm_parser.h"
6 
7 // This file contains code to parse WebM file elements. It was created
8 // from information in the Matroska spec.
9 // http://www.matroska.org/technical/specs/index.html
10 // This file contains code for encrypted WebM. Current WebM
11 // encrypted request for comments specification is here
12 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
13 
14 #include <limits>
15 
16 #include "packager/base/logging.h"
17 #include "packager/base/numerics/safe_conversions.h"
18 #include "packager/media/formats/webm/webm_constants.h"
19 
20 namespace shaka {
21 namespace media {
22 
23 enum ElementType {
24  UNKNOWN,
25  LIST, // Referred to as Master Element in the Matroska spec.
26  UINT,
27  FLOAT,
28  BINARY,
29  STRING,
30  SKIP,
31 };
32 
33 struct ElementIdInfo {
34  ElementType type_;
35  int id_;
36 };
37 
38 struct ListElementInfo {
39  int id_;
40  int level_;
41  const ElementIdInfo* id_info_;
42  int id_info_count_;
43 };
44 
45 // The following are tables indicating what IDs are valid sub-elements
46 // of particular elements. If an element is encountered that doesn't
47 // appear in the list, a parsing error is signalled. Some elements are
48 // marked as SKIP because they are valid, but we don't care about them
49 // right now.
50 static const ElementIdInfo kEBMLHeaderIds[] = {
51  {UINT, kWebMIdEBMLVersion},
52  {UINT, kWebMIdEBMLReadVersion},
53  {UINT, kWebMIdEBMLMaxIDLength},
54  {UINT, kWebMIdEBMLMaxSizeLength},
55  {STRING, kWebMIdDocType},
56  {UINT, kWebMIdDocTypeVersion},
57  {UINT, kWebMIdDocTypeReadVersion},
58 };
59 
60 static const ElementIdInfo kSegmentIds[] = {
61  {LIST, kWebMIdSeekHead},
62  {LIST, kWebMIdInfo},
63  {LIST, kWebMIdCluster},
64  {LIST, kWebMIdTracks},
65  {LIST, kWebMIdCues},
66  {LIST, kWebMIdAttachments},
67  {LIST, kWebMIdChapters},
68  {LIST, kWebMIdTags},
69 };
70 
71 static const ElementIdInfo kSeekHeadIds[] = {
72  {LIST, kWebMIdSeek},
73 };
74 
75 static const ElementIdInfo kSeekIds[] = {
76  {BINARY, kWebMIdSeekID},
77  {UINT, kWebMIdSeekPosition},
78 };
79 
80 static const ElementIdInfo kInfoIds[] = {
81  {BINARY, kWebMIdSegmentUID},
82  {STRING, kWebMIdSegmentFilename},
83  {BINARY, kWebMIdPrevUID},
84  {STRING, kWebMIdPrevFilename},
85  {BINARY, kWebMIdNextUID},
86  {STRING, kWebMIdNextFilename},
87  {BINARY, kWebMIdSegmentFamily},
88  {LIST, kWebMIdChapterTranslate},
89  {UINT, kWebMIdTimecodeScale},
90  {FLOAT, kWebMIdDuration},
91  {BINARY, kWebMIdDateUTC},
92  {STRING, kWebMIdTitle},
93  {STRING, kWebMIdMuxingApp},
94  {STRING, kWebMIdWritingApp},
95 };
96 
97 static const ElementIdInfo kChapterTranslateIds[] = {
98  {UINT, kWebMIdChapterTranslateEditionUID},
99  {UINT, kWebMIdChapterTranslateCodec},
100  {BINARY, kWebMIdChapterTranslateID},
101 };
102 
103 static const ElementIdInfo kClusterIds[] = {
104  {BINARY, kWebMIdSimpleBlock},
105  {UINT, kWebMIdTimecode},
106  {LIST, kWebMIdSilentTracks},
107  {UINT, kWebMIdPosition},
108  {UINT, kWebMIdPrevSize},
109  {LIST, kWebMIdBlockGroup},
110 };
111 
112 static const ElementIdInfo kSilentTracksIds[] = {
113  {UINT, kWebMIdSilentTrackNumber},
114 };
115 
116 static const ElementIdInfo kBlockGroupIds[] = {
117  {BINARY, kWebMIdBlock},
118  {LIST, kWebMIdBlockAdditions},
119  {UINT, kWebMIdBlockDuration},
120  {UINT, kWebMIdReferencePriority},
121  {BINARY, kWebMIdReferenceBlock},
122  {BINARY, kWebMIdCodecState},
123  {BINARY, kWebMIdDiscardPadding},
124  {LIST, kWebMIdSlices},
125 };
126 
127 static const ElementIdInfo kBlockAdditionsIds[] = {
128  {LIST, kWebMIdBlockMore},
129 };
130 
131 static const ElementIdInfo kBlockMoreIds[] = {
132  {UINT, kWebMIdBlockAddID},
133  {BINARY, kWebMIdBlockAdditional},
134 };
135 
136 static const ElementIdInfo kSlicesIds[] = {
137  {LIST, kWebMIdTimeSlice},
138 };
139 
140 static const ElementIdInfo kTimeSliceIds[] = {
141  {UINT, kWebMIdLaceNumber},
142 };
143 
144 static const ElementIdInfo kTracksIds[] = {
145  {LIST, kWebMIdTrackEntry},
146 };
147 
148 static const ElementIdInfo kTrackEntryIds[] = {
149  {UINT, kWebMIdTrackNumber},
150  {BINARY, kWebMIdTrackUID},
151  {UINT, kWebMIdTrackType},
152  {UINT, kWebMIdFlagEnabled},
153  {UINT, kWebMIdFlagDefault},
154  {UINT, kWebMIdFlagForced},
155  {UINT, kWebMIdFlagLacing},
156  {UINT, kWebMIdMinCache},
157  {UINT, kWebMIdMaxCache},
158  {UINT, kWebMIdDefaultDuration},
159  {FLOAT, kWebMIdTrackTimecodeScale},
160  {UINT, kWebMIdMaxBlockAdditionId},
161  {STRING, kWebMIdName},
162  {STRING, kWebMIdLanguage},
163  {STRING, kWebMIdCodecID},
164  {BINARY, kWebMIdCodecPrivate},
165  {STRING, kWebMIdCodecName},
166  {UINT, kWebMIdAttachmentLink},
167  {UINT, kWebMIdCodecDecodeAll},
168  {UINT, kWebMIdTrackOverlay},
169  {UINT, kWebMIdCodecDelay},
170  {UINT, kWebMIdSeekPreRoll},
171  {LIST, kWebMIdTrackTranslate},
172  {LIST, kWebMIdVideo},
173  {LIST, kWebMIdAudio},
174  {LIST, kWebMIdTrackOperation},
175  {LIST, kWebMIdContentEncodings},
176 };
177 
178 static const ElementIdInfo kTrackTranslateIds[] = {
179  {UINT, kWebMIdTrackTranslateEditionUID},
180  {UINT, kWebMIdTrackTranslateCodec},
181  {BINARY, kWebMIdTrackTranslateTrackID},
182 };
183 
184 static const ElementIdInfo kVideoIds[] = {
185  {UINT, kWebMIdFlagInterlaced},
186  {UINT, kWebMIdStereoMode},
187  {UINT, kWebMIdAlphaMode},
188  {UINT, kWebMIdPixelWidth},
189  {UINT, kWebMIdPixelHeight},
190  {UINT, kWebMIdPixelCropBottom},
191  {UINT, kWebMIdPixelCropTop},
192  {UINT, kWebMIdPixelCropLeft},
193  {UINT, kWebMIdPixelCropRight},
194  {UINT, kWebMIdDisplayWidth},
195  {UINT, kWebMIdDisplayHeight},
196  {UINT, kWebMIdDisplayUnit},
197  {UINT, kWebMIdAspectRatioType},
198  {BINARY, kWebMIdColorSpace},
199  {FLOAT, kWebMIdFrameRate},
200  {LIST, kWebMIdColor},
201 };
202 
203 static const ElementIdInfo kColorIds[] = {
204  {UINT, kWebMIdColorMatrixCoefficients},
205  {UINT, kWebMIdColorBitsPerChannel},
206  {UINT, kWebMIdColorChromaSubsamplingHorz},
207  {UINT, kWebMIdColorChromaSubsamplingVert},
208  {UINT, kWebMIdColorCbSamplingHorz},
209  {UINT, kWebMIdColorCbSamplingVert},
210  {UINT, kWebMIdColorChromaSitingHorz},
211  {UINT, kWebMIdColorChromaSitingVert},
212  {UINT, kWebMIdColorRange},
213  {UINT, kWebMIdColorTransferCharacteristics},
214  {UINT, kWebMIdColorPrimaries},
215  {UINT, kWebMIdColorMaxCLL},
216  {UINT, kWebMIdColorMaxFALL},
217  {LIST, kWebMIdColorMasteringMetadata},
218 };
219 
220 static const ElementIdInfo kAudioIds[] = {
221  {FLOAT, kWebMIdSamplingFrequency},
222  {FLOAT, kWebMIdOutputSamplingFrequency},
223  {UINT, kWebMIdChannels},
224  {UINT, kWebMIdBitDepth},
225 };
226 
227 static const ElementIdInfo kTrackOperationIds[] = {
228  {LIST, kWebMIdTrackCombinePlanes},
229  {LIST, kWebMIdJoinBlocks},
230 };
231 
232 static const ElementIdInfo kTrackCombinePlanesIds[] = {
233  {LIST, kWebMIdTrackPlane},
234 };
235 
236 static const ElementIdInfo kTrackPlaneIds[] = {
237  {UINT, kWebMIdTrackPlaneUID},
238  {UINT, kWebMIdTrackPlaneType},
239 };
240 
241 static const ElementIdInfo kJoinBlocksIds[] = {
242  {UINT, kWebMIdTrackJoinUID},
243 };
244 
245 static const ElementIdInfo kContentEncodingsIds[] = {
246  {LIST, kWebMIdContentEncoding},
247 };
248 
249 static const ElementIdInfo kContentEncodingIds[] = {
250  {UINT, kWebMIdContentEncodingOrder},
251  {UINT, kWebMIdContentEncodingScope},
252  {UINT, kWebMIdContentEncodingType},
253  {LIST, kWebMIdContentCompression},
254  {LIST, kWebMIdContentEncryption},
255 };
256 
257 static const ElementIdInfo kContentCompressionIds[] = {
258  {UINT, kWebMIdContentCompAlgo},
259  {BINARY, kWebMIdContentCompSettings},
260 };
261 
262 static const ElementIdInfo kContentEncryptionIds[] = {
263  {LIST, kWebMIdContentEncAESSettings},
264  {UINT, kWebMIdContentEncAlgo},
265  {BINARY, kWebMIdContentEncKeyID},
266  {BINARY, kWebMIdContentSignature},
267  {BINARY, kWebMIdContentSigKeyID},
268  {UINT, kWebMIdContentSigAlgo},
269  {UINT, kWebMIdContentSigHashAlgo},
270 };
271 
272 static const ElementIdInfo kContentEncAESSettingsIds[] = {
273  {UINT, kWebMIdAESSettingsCipherMode},
274 };
275 
276 static const ElementIdInfo kCuesIds[] = {
277  {LIST, kWebMIdCuePoint},
278 };
279 
280 static const ElementIdInfo kCuePointIds[] = {
281  {UINT, kWebMIdCueTime},
282  {LIST, kWebMIdCueTrackPositions},
283 };
284 
285 static const ElementIdInfo kCueTrackPositionsIds[] = {
286  {UINT, kWebMIdCueTrack},
287  {UINT, kWebMIdCueClusterPosition},
288  {UINT, kWebMIdCueBlockNumber},
289  {UINT, kWebMIdCueCodecState},
290  {LIST, kWebMIdCueReference},
291 };
292 
293 static const ElementIdInfo kCueReferenceIds[] = {
294  {UINT, kWebMIdCueRefTime},
295 };
296 
297 static const ElementIdInfo kAttachmentsIds[] = {
298  {LIST, kWebMIdAttachedFile},
299 };
300 
301 static const ElementIdInfo kAttachedFileIds[] = {
302  {STRING, kWebMIdFileDescription},
303  {STRING, kWebMIdFileName},
304  {STRING, kWebMIdFileMimeType},
305  {BINARY, kWebMIdFileData},
306  {UINT, kWebMIdFileUID},
307 };
308 
309 static const ElementIdInfo kChaptersIds[] = {
310  {LIST, kWebMIdEditionEntry},
311 };
312 
313 static const ElementIdInfo kEditionEntryIds[] = {
314  {UINT, kWebMIdEditionUID},
315  {UINT, kWebMIdEditionFlagHidden},
316  {UINT, kWebMIdEditionFlagDefault},
317  {UINT, kWebMIdEditionFlagOrdered},
318  {LIST, kWebMIdChapterAtom},
319 };
320 
321 static const ElementIdInfo kChapterAtomIds[] = {
322  {UINT, kWebMIdChapterUID},
323  {UINT, kWebMIdChapterTimeStart},
324  {UINT, kWebMIdChapterTimeEnd},
325  {UINT, kWebMIdChapterFlagHidden},
326  {UINT, kWebMIdChapterFlagEnabled},
327  {BINARY, kWebMIdChapterSegmentUID},
328  {UINT, kWebMIdChapterSegmentEditionUID},
329  {UINT, kWebMIdChapterPhysicalEquiv},
330  {LIST, kWebMIdChapterTrack},
331  {LIST, kWebMIdChapterDisplay},
332  {LIST, kWebMIdChapProcess},
333 };
334 
335 static const ElementIdInfo kChapterTrackIds[] = {
336  {UINT, kWebMIdChapterTrackNumber},
337 };
338 
339 static const ElementIdInfo kChapterDisplayIds[] = {
340  {STRING, kWebMIdChapString},
341  {STRING, kWebMIdChapLanguage},
342  {STRING, kWebMIdChapCountry},
343 };
344 
345 static const ElementIdInfo kChapProcessIds[] = {
346  {UINT, kWebMIdChapProcessCodecID},
347  {BINARY, kWebMIdChapProcessPrivate},
348  {LIST, kWebMIdChapProcessCommand},
349 };
350 
351 static const ElementIdInfo kChapProcessCommandIds[] = {
352  {UINT, kWebMIdChapProcessTime},
353  {BINARY, kWebMIdChapProcessData},
354 };
355 
356 static const ElementIdInfo kTagsIds[] = {
357  {LIST, kWebMIdTag},
358 };
359 
360 static const ElementIdInfo kTagIds[] = {
361  {LIST, kWebMIdTargets},
362  {LIST, kWebMIdSimpleTag},
363 };
364 
365 static const ElementIdInfo kTargetsIds[] = {
366  {UINT, kWebMIdTargetTypeValue},
367  {STRING, kWebMIdTargetType},
368  {UINT, kWebMIdTagTrackUID},
369  {UINT, kWebMIdTagEditionUID},
370  {UINT, kWebMIdTagChapterUID},
371  {UINT, kWebMIdTagAttachmentUID},
372 };
373 
374 static const ElementIdInfo kSimpleTagIds[] = {
375  {STRING, kWebMIdTagName},
376  {STRING, kWebMIdTagLanguage},
377  {UINT, kWebMIdTagDefault},
378  {STRING, kWebMIdTagString},
379  {BINARY, kWebMIdTagBinary},
380 };
381 
382 #define LIST_ELEMENT_INFO(id, level, id_info) \
383  { (id), (level), (id_info), arraysize(id_info) }
384 
385 static const ListElementInfo kListElementInfo[] = {
386  LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
387  LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
388  LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
389  LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
390  LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
391  LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
392  LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
393  LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
394  LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
395  LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
396  LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
397  LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
398  LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
399  LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
400  LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
401  LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
402  LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
403  LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds),
404  LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
405  LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
406  LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
407  LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
408  LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
409  LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
410  LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
411  LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
412  LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
413  LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds),
414  LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
415  LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
416  LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
417  LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
418  LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
419  LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
420  LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
421  LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
422  LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
423  LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
424  LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
425  LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
426  LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
427  LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
428  LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
429  LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
430  LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
431 };
432 
433 // Parses an element header id or size field. These fields are variable length
434 // encoded. The first byte indicates how many bytes the field occupies.
435 // |buf| - The buffer to parse.
436 // |size| - The number of bytes in |buf|
437 // |max_bytes| - The maximum number of bytes the field can be. ID fields
438 // set this to 4 & element size fields set this to 8. If the
439 // first byte indicates a larger field size than this it is a
440 // parser error.
441 // |mask_first_byte| - For element size fields the field length encoding bits
442 // need to be masked off. This parameter is true for
443 // element size fields and is false for ID field values.
444 //
445 // Returns: The number of bytes parsed on success. -1 on error.
446 static int ParseWebMElementHeaderField(const uint8_t* buf,
447  int size,
448  int max_bytes,
449  bool mask_first_byte,
450  int64_t* num) {
451  DCHECK(buf);
452  DCHECK(num);
453 
454  if (size < 0)
455  return -1;
456 
457  if (size == 0)
458  return 0;
459 
460  int mask = 0x80;
461  uint8_t ch = buf[0];
462  int extra_bytes = -1;
463  bool all_ones = false;
464  for (int i = 0; i < max_bytes; ++i) {
465  if ((ch & mask) != 0) {
466  mask = ~mask & 0xff;
467  *num = mask_first_byte ? ch & mask : ch;
468  all_ones = (ch & mask) == mask;
469  extra_bytes = i;
470  break;
471  }
472  mask = 0x80 | mask >> 1;
473  }
474 
475  if (extra_bytes == -1)
476  return -1;
477 
478  // Return 0 if we need more data.
479  if ((1 + extra_bytes) > size)
480  return 0;
481 
482  int bytes_used = 1;
483 
484  for (int i = 0; i < extra_bytes; ++i) {
485  ch = buf[bytes_used++];
486  all_ones &= (ch == 0xff);
487  *num = (*num << 8) | ch;
488  }
489 
490  if (all_ones)
491  *num = std::numeric_limits<int64_t>::max();
492 
493  return bytes_used;
494 }
495 
496 int WebMParseElementHeader(const uint8_t* buf,
497  int size,
498  int* id,
499  int64_t* element_size) {
500  DCHECK(buf);
501  DCHECK_GE(size, 0);
502  DCHECK(id);
503  DCHECK(element_size);
504 
505  if (size == 0)
506  return 0;
507 
508  int64_t tmp = 0;
509  int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
510 
511  if (num_id_bytes <= 0)
512  return num_id_bytes;
513 
514  if (tmp == std::numeric_limits<int64_t>::max())
515  tmp = kWebMReservedId;
516 
517  *id = static_cast<int>(tmp);
518 
519  int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
520  size - num_id_bytes,
521  8, true, &tmp);
522 
523  if (num_size_bytes <= 0)
524  return num_size_bytes;
525 
526  if (tmp == std::numeric_limits<int64_t>::max())
527  tmp = kWebMUnknownSize;
528 
529  *element_size = tmp;
530  DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
531  << " size " << *element_size;
532  return num_id_bytes + num_size_bytes;
533 }
534 
535 // Finds ElementType for a specific ID.
536 static ElementType FindIdType(int id,
537  const ElementIdInfo* id_info,
538  int id_info_count) {
539 
540  // Check for global element IDs that can be anywhere.
541  if (id == kWebMIdVoid || id == kWebMIdCRC32)
542  return SKIP;
543 
544  for (int i = 0; i < id_info_count; ++i) {
545  if (id == id_info[i].id_)
546  return id_info[i].type_;
547  }
548 
549  return UNKNOWN;
550 }
551 
552 // Finds ListElementInfo for a specific ID.
553 static const ListElementInfo* FindListInfo(int id) {
554  for (size_t i = 0; i < arraysize(kListElementInfo); ++i) {
555  if (id == kListElementInfo[i].id_)
556  return &kListElementInfo[i];
557  }
558 
559  return NULL;
560 }
561 
562 static int FindListLevel(int id) {
563  const ListElementInfo* list_info = FindListInfo(id);
564  if (list_info)
565  return list_info->level_;
566 
567  return -1;
568 }
569 
570 static int ParseUInt(const uint8_t* buf,
571  int size,
572  int id,
573  WebMParserClient* client) {
574  if ((size <= 0) || (size > 8))
575  return -1;
576 
577  // Read in the big-endian integer.
578  uint64_t value = 0;
579  for (int i = 0; i < size; ++i)
580  value = (value << 8) | buf[i];
581 
582  // We use int64_t in place of uint64_t everywhere for convenience. See this
583  // bug
584  // for more details: http://crbug.com/366750#c3
585  if (!base::IsValueInRangeForNumericType<int64_t>(value))
586  return -1;
587 
588  if (!client->OnUInt(id, value))
589  return -1;
590 
591  return size;
592 }
593 
594 static int ParseFloat(const uint8_t* buf,
595  int size,
596  int id,
597  WebMParserClient* client) {
598  if ((size != 4) && (size != 8))
599  return -1;
600 
601  double value = -1;
602 
603  // Read the bytes from big-endian form into a native endian integer.
604  int64_t tmp = 0;
605  for (int i = 0; i < size; ++i)
606  tmp = (tmp << 8) | buf[i];
607 
608  // Use a union to convert the integer bit pattern into a floating point
609  // number.
610  if (size == 4) {
611  union {
612  int32_t src;
613  float dst;
614  } tmp2;
615  tmp2.src = static_cast<int32_t>(tmp);
616  value = tmp2.dst;
617  } else if (size == 8) {
618  union {
619  int64_t src;
620  double dst;
621  } tmp2;
622  tmp2.src = tmp;
623  value = tmp2.dst;
624  } else {
625  return -1;
626  }
627 
628  if (!client->OnFloat(id, value))
629  return -1;
630 
631  return size;
632 }
633 
634 static int ParseBinary(const uint8_t* buf,
635  int size,
636  int id,
637  WebMParserClient* client) {
638  return client->OnBinary(id, buf, size) ? size : -1;
639 }
640 
641 static int ParseString(const uint8_t* buf,
642  int size,
643  int id,
644  WebMParserClient* client) {
645  const uint8_t* end = static_cast<const uint8_t*>(memchr(buf, '\0', size));
646  int length = (end != NULL) ? static_cast<int>(end - buf) : size;
647  std::string str(reinterpret_cast<const char*>(buf), length);
648  return client->OnString(id, str) ? size : -1;
649 }
650 
651 static int ParseNonListElement(ElementType type,
652  int id,
653  int64_t element_size,
654  const uint8_t* buf,
655  int size,
656  WebMParserClient* client) {
657  DCHECK_GE(size, element_size);
658 
659  int result = -1;
660  switch(type) {
661  case LIST:
662  NOTIMPLEMENTED();
663  result = -1;
664  break;
665  case UINT:
666  result = ParseUInt(buf, element_size, id, client);
667  break;
668  case FLOAT:
669  result = ParseFloat(buf, element_size, id, client);
670  break;
671  case BINARY:
672  result = ParseBinary(buf, element_size, id, client);
673  break;
674  case STRING:
675  result = ParseString(buf, element_size, id, client);
676  break;
677  case SKIP:
678  result = element_size;
679  break;
680  default:
681  DVLOG(1) << "Unhandled ID type " << type;
682  return -1;
683  };
684 
685  DCHECK_LE(result, size);
686  return result;
687 }
688 
689 WebMParserClient::WebMParserClient() {}
690 WebMParserClient::~WebMParserClient() {}
691 
692 WebMParserClient* WebMParserClient::OnListStart(int id) {
693  DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
694  return NULL;
695 }
696 
697 bool WebMParserClient::OnListEnd(int id) {
698  DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
699  return false;
700 }
701 
702 bool WebMParserClient::OnUInt(int id, int64_t val) {
703  DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
704  return false;
705 }
706 
707 bool WebMParserClient::OnFloat(int id, double val) {
708  DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
709  return false;
710 }
711 
712 bool WebMParserClient::OnBinary(int id, const uint8_t* data, int size) {
713  DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
714  return false;
715 }
716 
717 bool WebMParserClient::OnString(int id, const std::string& str) {
718  DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
719  return false;
720 }
721 
723  : state_(NEED_LIST_HEADER),
724  root_id_(id),
725  root_level_(FindListLevel(id)),
726  root_client_(client) {
727  DCHECK_GE(root_level_, 0);
728  DCHECK(client);
729 }
730 
731 WebMListParser::~WebMListParser() {}
732 
734  ChangeState(NEED_LIST_HEADER);
735  list_state_stack_.clear();
736 }
737 
738 int WebMListParser::Parse(const uint8_t* buf, int size) {
739  DCHECK(buf);
740 
741  if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
742  return -1;
743 
744  if (size == 0)
745  return 0;
746 
747  const uint8_t* cur = buf;
748  int cur_size = size;
749  int bytes_parsed = 0;
750 
751  while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
752  int element_id = 0;
753  int64_t element_size = 0;
754  int result = WebMParseElementHeader(cur, cur_size, &element_id,
755  &element_size);
756 
757  if (result < 0)
758  return result;
759 
760  if (result == 0)
761  return bytes_parsed;
762 
763  switch(state_) {
764  case NEED_LIST_HEADER: {
765  if (element_id != root_id_) {
766  ChangeState(PARSE_ERROR);
767  return -1;
768  }
769 
770  // Only allow Segment & Cluster to have an unknown size.
771  if (element_size == kWebMUnknownSize &&
772  (element_id != kWebMIdSegment) &&
773  (element_id != kWebMIdCluster)) {
774  ChangeState(PARSE_ERROR);
775  return -1;
776  }
777 
778  ChangeState(INSIDE_LIST);
779  if (!OnListStart(root_id_, element_size))
780  return -1;
781 
782  break;
783  }
784 
785  case INSIDE_LIST: {
786  int header_size = result;
787  const uint8_t* element_data = cur + header_size;
788  int element_data_size = cur_size - header_size;
789 
790  if (element_size < element_data_size)
791  element_data_size = element_size;
792 
793  result = ParseListElement(header_size, element_id, element_size,
794  element_data, element_data_size);
795 
796  DCHECK_LE(result, header_size + element_data_size);
797  if (result < 0) {
798  ChangeState(PARSE_ERROR);
799  return -1;
800  }
801 
802  if (result == 0)
803  return bytes_parsed;
804 
805  break;
806  }
807  case DONE_PARSING_LIST:
808  case PARSE_ERROR:
809  // Shouldn't be able to get here.
810  NOTIMPLEMENTED();
811  break;
812  }
813 
814  cur += result;
815  cur_size -= result;
816  bytes_parsed += result;
817  }
818 
819  return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
820 }
821 
823  return state_ == DONE_PARSING_LIST;
824 }
825 
826 void WebMListParser::ChangeState(State new_state) {
827  state_ = new_state;
828 }
829 
830 int WebMListParser::ParseListElement(int header_size,
831  int id,
832  int64_t element_size,
833  const uint8_t* data,
834  int size) {
835  DCHECK_GT(list_state_stack_.size(), 0u);
836 
837  ListState& list_state = list_state_stack_.back();
838  DCHECK(list_state.element_info_);
839 
840  const ListElementInfo* element_info = list_state.element_info_;
841  ElementType id_type =
842  FindIdType(id, element_info->id_info_, element_info->id_info_count_);
843 
844  // Unexpected ID.
845  if (id_type == UNKNOWN) {
846  if (list_state.size_ != kWebMUnknownSize ||
847  !IsSiblingOrAncestor(list_state.id_, id)) {
848  DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
849  return -1;
850  }
851 
852  // We've reached the end of a list of unknown size. Update the size now that
853  // we know it and dispatch the end of list calls.
854  list_state.size_ = list_state.bytes_parsed_;
855 
856  if (!OnListEnd())
857  return -1;
858 
859  // Check to see if all open lists have ended.
860  if (list_state_stack_.size() == 0)
861  return 0;
862 
863  list_state = list_state_stack_.back();
864  }
865 
866  // Make sure the whole element can fit inside the current list.
867  int64_t total_element_size = header_size + element_size;
868  if (list_state.size_ != kWebMUnknownSize &&
869  list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
870  return -1;
871  }
872 
873  if (id_type == LIST) {
874  list_state.bytes_parsed_ += header_size;
875 
876  if (!OnListStart(id, element_size))
877  return -1;
878  return header_size;
879  }
880 
881  // Make sure we have the entire element before trying to parse a non-list
882  // element.
883  if (size < element_size)
884  return 0;
885 
886  int bytes_parsed = ParseNonListElement(id_type, id, element_size,
887  data, size, list_state.client_);
888  DCHECK_LE(bytes_parsed, size);
889 
890  // Return if an error occurred or we need more data.
891  // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
892  // need to check the element_size to disambiguate the "need more data" case
893  // from a successful parse.
894  if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
895  return bytes_parsed;
896 
897  int result = header_size + bytes_parsed;
898  list_state.bytes_parsed_ += result;
899 
900  // See if we have reached the end of the current list.
901  if (list_state.bytes_parsed_ == list_state.size_) {
902  if (!OnListEnd())
903  return -1;
904  }
905 
906  return result;
907 }
908 
909 bool WebMListParser::OnListStart(int id, int64_t size) {
910  const ListElementInfo* element_info = FindListInfo(id);
911  if (!element_info)
912  return false;
913 
914  int current_level =
915  root_level_ + static_cast<int>(list_state_stack_.size()) - 1;
916  if (current_level + 1 != element_info->level_)
917  return false;
918 
919  WebMParserClient* current_list_client = NULL;
920  if (!list_state_stack_.empty()) {
921  // Make sure the new list doesn't go past the end of the current list.
922  ListState current_list_state = list_state_stack_.back();
923  if (current_list_state.size_ != kWebMUnknownSize &&
924  current_list_state.size_ < current_list_state.bytes_parsed_ + size)
925  return false;
926  current_list_client = current_list_state.client_;
927  } else {
928  current_list_client = root_client_;
929  }
930 
931  WebMParserClient* new_list_client = current_list_client->OnListStart(id);
932  if (!new_list_client)
933  return false;
934 
935  ListState new_list_state = { id, size, 0, element_info, new_list_client };
936  list_state_stack_.push_back(new_list_state);
937 
938  if (size == 0)
939  return OnListEnd();
940 
941  return true;
942 }
943 
944 bool WebMListParser::OnListEnd() {
945  int lists_ended = 0;
946  for (; !list_state_stack_.empty(); ++lists_ended) {
947  const ListState& list_state = list_state_stack_.back();
948  int64_t bytes_parsed = list_state.bytes_parsed_;
949  int id = list_state.id_;
950 
951  if (bytes_parsed != list_state.size_)
952  break;
953 
954  list_state_stack_.pop_back();
955 
956  WebMParserClient* client = NULL;
957  if (!list_state_stack_.empty()) {
958  // Update the bytes_parsed_ for the parent element.
959  list_state_stack_.back().bytes_parsed_ += bytes_parsed;
960  client = list_state_stack_.back().client_;
961  } else {
962  client = root_client_;
963  }
964 
965  if (!client->OnListEnd(id))
966  return false;
967  }
968 
969  DCHECK_GE(lists_ended, 1);
970 
971  if (list_state_stack_.empty())
972  ChangeState(DONE_PARSING_LIST);
973 
974  return true;
975 }
976 
977 bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
978  DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
979 
980  if (id_a == kWebMIdCluster) {
981  // kWebMIdCluster siblings.
982  for (size_t i = 0; i < arraysize(kSegmentIds); i++) {
983  if (kSegmentIds[i].id_ == id_b)
984  return true;
985  }
986  }
987 
988  // kWebMIdSegment siblings.
989  return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
990 }
991 
992 } // namespace media
993 } // namespace shaka
All the methods that are virtual are virtual for mocking.
WebMListParser(int id, WebMParserClient *client)
Definition: webm_parser.cc:722
void Reset()
Resets the state of the parser so it can start parsing a new list.
Definition: webm_parser.cc:733
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:738