shaka-packager/tools/code_coverage/croc_scan.py

# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Crocodile source scanners."""


import re


class Scanner(object):
  """Generic source scanner."""

  def __init__(self):
    """Constructor."""

    self.re_token = re.compile('#')
    self.comment_to_eol = ['#']
    self.comment_start = None
    self.comment_end = None

  def ScanLines(self, lines):
    """Scans the lines for executable statements.

    Args:
      lines: Iterator returning source lines.

    Returns:
      An array of line numbers which are executable.
    """
    exe_lines = []
    lineno = 0

    in_string = None
    in_comment = None
    comment_index = None

    for line in lines:
      lineno += 1
      in_string_at_start = in_string

      for t in self.re_token.finditer(line):
        tokenstr = t.groups()[0]

        if in_comment:
          # Inside a multi-line comment, so look for end token
          if tokenstr == in_comment:
            in_comment = None
            # Replace comment with spaces
            line = (line[:comment_index]
                    + ' ' * (t.end(0) - comment_index)
                    + line[t.end(0):])

        elif in_string:
          # Inside a string, so look for end token
          if tokenstr == in_string:
            in_string = None

        elif tokenstr in self.comment_to_eol:
          # Single-line comment, so truncate line at start of token
          line = line[:t.start(0)]
          break

        elif tokenstr == self.comment_start:
          # Multi-line comment start - end token is comment_end
          in_comment = self.comment_end
          comment_index = t.start(0)

        else:
          # Starting a string - end token is same as start
          in_string = tokenstr

      # If still in comment at end of line, remove comment
      if in_comment:
        line = line[:comment_index]
        # Next line, delete from the beginnine
        comment_index = 0

      # If line-sans-comments is not empty, claim it may be executable
      if line.strip() or in_string_at_start:
        exe_lines.append(lineno)

    # Return executable lines
    return exe_lines

  def Scan(self, filename):
    """Reads the file and scans its lines.

    Args:
      filename: Path to file to scan.

    Returns:
      An array of line numbers which are executable.
    """

    # TODO: All manner of error checking
    f = None
    try:
      f = open(filename, 'rt')
      return self.ScanLines(f)
    finally:
      if f:
        f.close()


class PythonScanner(Scanner):
  """Python source scanner."""

  def __init__(self):
    """Constructor."""
    Scanner.__init__(self)

    # TODO: This breaks for strings ending in more than 2 backslashes.  Need
    # a pattern which counts only an odd number of backslashes, so the last
    # one thus escapes the quote.
    self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])')
    self.comment_to_eol = ['#']
    self.comment_start = None
    self.comment_end = None


class CppScanner(Scanner):
  """C / C++ / ObjC / ObjC++ source scanner."""

  def __init__(self):
    """Constructor."""
    Scanner.__init__(self)

    # TODO: This breaks for strings ending in more than 2 backslashes.  Need
    # a pattern which counts only an odd number of backslashes, so the last
    # one thus escapes the quote.
    self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')

    # TODO: Treat '\' at EOL as a token, and handle it as continuing the
    # previous line.  That is, if in a comment-to-eol, this line is a comment
    # too.

    # Note that we treat # at beginning of line as a comment, so that we ignore
    # preprocessor definitions
    self.comment_to_eol = ['//', '#']

    self.comment_start = '/*'
    self.comment_end = '*/'


def ScanFile(filename, language):
  """Scans a file for executable lines.

  Args:
    filename: Path to file to scan.
    language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')

  Returns:
    A list of executable lines, or an empty list if the file was not a handled
        language.
  """

  if language == 'python':
    return PythonScanner().Scan(filename)
  elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
    return CppScanner().Scan(filename)

  # Something we don't handle
  return []
Start with media/mp4, media/webm and base codes from Chromium. 2013-09-24 01:35:40 +00:00			`# Copyright (c) 2011 The Chromium Authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`"""Crocodile source scanners."""`


			`import re`


			`class Scanner(object):`
			`"""Generic source scanner."""`

			`def __init__(self):`
			`"""Constructor."""`

			`self.re_token = re.compile('#')`
			`self.comment_to_eol = ['#']`
			`self.comment_start = None`
			`self.comment_end = None`

			`def ScanLines(self, lines):`
			`"""Scans the lines for executable statements.`

			`Args:`
			`lines: Iterator returning source lines.`

			`Returns:`
			`An array of line numbers which are executable.`
			`"""`
			`exe_lines = []`
			`lineno = 0`

			`in_string = None`
			`in_comment = None`
			`comment_index = None`

			`for line in lines:`
			`lineno += 1`
			`in_string_at_start = in_string`

			`for t in self.re_token.finditer(line):`
			`tokenstr = t.groups()[0]`

			`if in_comment:`
			`# Inside a multi-line comment, so look for end token`
			`if tokenstr == in_comment:`
			`in_comment = None`
			`# Replace comment with spaces`
			`line = (line[:comment_index]`
			`+ ' ' * (t.end(0) - comment_index)`
			`+ line[t.end(0):])`

			`elif in_string:`
			`# Inside a string, so look for end token`
			`if tokenstr == in_string:`
			`in_string = None`

			`elif tokenstr in self.comment_to_eol:`
			`# Single-line comment, so truncate line at start of token`
			`line = line[:t.start(0)]`
			`break`

			`elif tokenstr == self.comment_start:`
			`# Multi-line comment start - end token is comment_end`
			`in_comment = self.comment_end`
			`comment_index = t.start(0)`

			`else:`
			`# Starting a string - end token is same as start`
			`in_string = tokenstr`

			`# If still in comment at end of line, remove comment`
			`if in_comment:`
			`line = line[:comment_index]`
			`# Next line, delete from the beginnine`
			`comment_index = 0`

			`# If line-sans-comments is not empty, claim it may be executable`
			`if line.strip() or in_string_at_start:`
			`exe_lines.append(lineno)`

			`# Return executable lines`
			`return exe_lines`

			`def Scan(self, filename):`
			`"""Reads the file and scans its lines.`

			`Args:`
			`filename: Path to file to scan.`

			`Returns:`
			`An array of line numbers which are executable.`
			`"""`

			`# TODO: All manner of error checking`
			`f = None`
			`try:`
			`f = open(filename, 'rt')`
			`return self.ScanLines(f)`
			`finally:`
			`if f:`
			`f.close()`


			`class PythonScanner(Scanner):`
			`"""Python source scanner."""`

			`def __init__(self):`
			`"""Constructor."""`
			`Scanner.__init__(self)`

			`# TODO: This breaks for strings ending in more than 2 backslashes. Need`
			`# a pattern which counts only an odd number of backslashes, so the last`
			`# one thus escapes the quote.`
			`self.re_token = re.compile(r'(#\|\'\'\'\|"""\|(?<!(?<!\\)\\)["\'])')`
			`self.comment_to_eol = ['#']`
			`self.comment_start = None`
			`self.comment_end = None`


			`class CppScanner(Scanner):`
			`"""C / C++ / ObjC / ObjC++ source scanner."""`

			`def __init__(self):`
			`"""Constructor."""`
			`Scanner.__init__(self)`

			`# TODO: This breaks for strings ending in more than 2 backslashes. Need`
			`# a pattern which counts only an odd number of backslashes, so the last`
			`# one thus escapes the quote.`
			`self.re_token = re.compile(r'(^\s#\|//\|/\\|\*/\|(?<!(?<!\\)\\)["\'])')`

			`# TODO: Treat '\' at EOL as a token, and handle it as continuing the`
			`# previous line. That is, if in a comment-to-eol, this line is a comment`
			`# too.`

			`# Note that we treat # at beginning of line as a comment, so that we ignore`
			`# preprocessor definitions`
			`self.comment_to_eol = ['//', '#']`

			`self.comment_start = '/*'`
			`self.comment_end = '*/'`


			`def ScanFile(filename, language):`
			`"""Scans a file for executable lines.`

			`Args:`
			`filename: Path to file to scan.`
			`language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')`

			`Returns:`
			`A list of executable lines, or an empty list if the file was not a handled`
			`language.`
			`"""`

			`if language == 'python':`
			`return PythonScanner().Scan(filename)`
			`elif language in ['C', 'C++', 'ObjC', 'ObjC++']:`
			`return CppScanner().Scan(filename)`

			`# Something we don't handle`
			`return []`