shaka-packager/tools/code_coverage/process_coverage.py

#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.


"""Script to clean the lcov files and convert it to HTML

TODO(niranjan): Add usage information here
"""


import optparse
import os
import shutil
import subprocess
import sys
import tempfile
import time
import urllib2


# These are source files that were generated during compile time. We want to
# remove references to these files from the lcov file otherwise genhtml will
# throw an error.
win32_srcs_exclude = ['parse.y',
                      'xpathgrammar.cpp',
                      'cssgrammar.cpp',
                      'csspropertynames.gperf']

# Number of lines of a new coverage data set
# to send at a time to the dashboard.
POST_CHUNK_SIZE = 50

# Number of post request failures to allow before exiting.
MAX_FAILURES = 5

def CleanPathNames(dir):
  """Clean the pathnames of the HTML generated by genhtml.

  This method is required only for code coverage on Win32. Due to a known issue
  with reading from CIFS shares mounted on Linux, genhtml appends a ^M to every
  file name it reads from the Windows share, causing corrupt filenames in
  genhtml's output folder.

  Args:
    dir: Output folder of the genhtml output.

  Returns:
    None
  """
  # Stip off the ^M characters that get appended to the file name
  for dirpath, dirname, filenames in os.walk(dir):
    for file in filenames:
      file_clean = file.replace('\r', '')
      if file_clean != file:
        os.rename(file, file_clean)


def GenerateHtml(lcov_path, dash_root):
  """Runs genhtml to convert lcov data to human readable HTML.

  This script expects the LCOV file name to be in the format:
  chrome_<platform>_<revision#>.lcov.
  This method parses the file name and then sets up the correct folder
  hierarchy for the coverage data and then runs genhtml to get the actual HTML
  formatted coverage data.

  Args:
    lcov_path: Path of the lcov data file.
    dash_root: Root location of the dashboard.

  Returns:
    Code coverage percentage on sucess.
    None on failure.
  """
  # Parse the LCOV file name.
  filename = os.path.basename(lcov_path).split('.')[0]
  buffer = filename.split('_')
  dash_root = dash_root.rstrip('/') # Remove trailing '/'

  # Set up correct folder hierarchy in the dashboard root
  # TODO(niranjan): Check the formatting using a regexp
  if len(buffer) >= 3: # Check if filename has right formatting
    platform = buffer[len(buffer) - 2]
    revision = buffer[len(buffer) - 1]
    if os.path.exists(os.path.join(dash_root, platform)) == False:
      os.mkdir(os.path.join(dash_root, platform))
    output_dir = os.path.join(dash_root, platform, revision)
    os.mkdir(output_dir)
  else:
    # TODO(niranjan): Add failure logging here.
    return None # File not formatted correctly

  # Run genhtml
  os.system('/usr/bin/genhtml -o %s %s' % (output_dir, lcov_path))
  # TODO(niranjan): Check the exit status of the genhtml command.
  # TODO(niranjan): Parse the stdout and return coverage percentage.
  CleanPathNames(output_dir)
  return 'dummy' # TODO(niranjan): Return actual percentage.


def CleanWin32Lcov(lcov_path, src_root):
  """Cleanup the lcov data generated on Windows.

  This method fixes up the paths inside the lcov file from the Win32 specific
  paths to the actual paths of the mounted CIFS share. The lcov files generated
  on Windows have the following format:

  SF:c:\chrome_src\src\skia\sgl\skscan_antihair.cpp
  DA:97,0
  DA:106,0
  DA:107,0
  DA:109,0
  ...
  end_of_record

  This method changes the source-file (SF) lines to a format compatible with
  genhtml on Linux by fixing paths. This method also removes references to
  certain dynamically generated files to be excluded from the code ceverage.

  Args:
    lcov_path: Path of the Win32 lcov file to be cleaned.
    src_root: Location of the source and symbols dir.
  Returns:
    None
  """
  strip_flag = False
  lcov = open(lcov_path, 'r')
  loc_csv_file = open(lcov_path + '.csv', 'w')
  (tmpfile_id, tmpfile_name) = tempfile.mkstemp()
  tmpfile = open(tmpfile_name, 'w')
  src_root = src_root.rstrip('/')       # Remove trailing '/'
  for line in lcov:
    if line.startswith('SF'):
      # We want to exclude certain auto-generated files otherwise genhtml will
      # fail to convert lcov to HTML.
      for exp in win32_srcs_exclude:
        if line.rfind(exp) != -1:
          strip_flag = True # Indicates that we want to remove this section

      # Now we normalize the paths
      # e.g. Change SF:c:\foo\src\... to SF:/chrome_src/...
      parse_buffer = line.split(':')
      buffer = '%s:%s%s' % (parse_buffer[0],
                            src_root,
                            parse_buffer[2])
      buffer = buffer.replace('\\', '/')
      line = buffer.replace('\r', '')

      # We want an accurate count of the lines of code in a given file so that
      # we can estimate the code coverage perscentage accurately. We use a
      # third party script cloc.pl which gives that count and then just parse
      # its command line output to filter out the other unnecessary data.
      # TODO(niranjan): Find out a better way of doing this.
      buffer = buffer.lstrip('SF:')
      file_for_loc = buffer.replace('\r\n', '')
      # TODO(niranjan): Add a check to see if cloc is present on the machine.
      command = ["perl",
                 "cloc.pl",
                 file_for_loc]
      output = subprocess.Popen(command,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT).communicate()[0]
      if output.rfind('error:'):
        return None

      tmp_buf1 = output.split('=')
      tmp_buf2 = tmp_buf1[len(tmp_buf1) - 2].split('x')[0].split(' ')
      loc = tmp_buf2[len(tmp_buf2) - 2]
      loc_csv_file.write('%s,%s\r\n' % (file_for_loc, loc))

    # Write to the temp file if the section to write is valid
    if strip_flag == False:
      # Also write this to the 'clean' LCOV file
      tmpfile.write('%s' % (line))

    # Reset the strip flag
    if line.endswith('end_of_record'):
      strip_flag = False

  # Close the files and replace the lcov file by the 'clean' tmpfile
  tmpfile.close()
  lcov.close()
  loc_csv_file.close()
  shutil.move(tmpfile_name, lcov_path)


def ParseCoverageDataForDashboard(lcov_path):
  """Parse code coverage data into coverage results per source node.

  Use lcov and linecount data to create a map of source nodes to
  corresponding total and tested line counts.

  Args:
    lcov_path: File path to lcov coverage data.

  Returns:
    List of strings with comma separated source node and coverage.
  """
  results = {}
  linecount_path = lcov_path + '.csv'
  assert(os.path.exists(linecount_path),
         'linecount csv does not exist at: %s' % linecount_path)
  csv_file = open(linecount_path, 'r')
  linecounts = csv_file.readlines()
  csv_file.close()
  lcov_file = open(lcov_path, 'r')
  srcfile_index = 0
  for line in lcov_file:
    line = line.strip()

    # Set the current srcfile name for a new src file declaration.
    if line[:len('SF:')] == 'SF:':
      instrumented_set = {}
      executed_set = {}
      srcfile_name = line[len('SF:'):]

    # Mark coverage data points hashlist style for the current src file.
    if line[:len('DA:')] == 'DA:':
      line_info = line[len('DA:'):].split(',')
      assert(len(line_info) == 2, 'DA: line format unexpected - %s' % line)
      (line_num, line_was_executed) = line_info
      instrumented_set[line_num] = True
      # line_was_executed is '0' or '1'
      if int(line_was_executed):
        executed_set[line_num] = True

    # Update results for the current src file at record end.
    if line == 'end_of_record':
      instrumented = len(instrumented_set.keys())
      executed = len(executed_set.keys())
      parent_directory = srcfile_name[:srcfile_name.rfind('/') + 1]
      linecount_point = linecounts[srcfile_index].strip().split(',')
      assert(len(linecount_point) == 2,
             'lintcount format unexpected - %s' % linecounts[srcfile_index])
      (linecount_path, linecount_count) = linecount_point
      srcfile_index += 1

      # Sanity check that path names in the lcov and linecount are lined up.
      if linecount_path[-10:] != srcfile_name[-10:]:
        print 'NAME MISMATCH: %s :: %s' % (srcfile_name, linecount_path)
      if instrumented > int(linecount_count):
        linecount_count = instrumented

      # Keep counts the same way that it is done in the genhtml utility.
      # Count the coverage of a file towards the file,
      # the parent directory, and the source root.
      AddResults(results, srcfile_name, int(linecount_count), executed)
      AddResults(results, parent_directory, int(linecount_count), executed)
      AddResults(results, '/', instrumented, executed)

  lcov_file.close()
  keys = results.keys()
  keys.sort()
  # The first key (sorted) will be the base directory '/'
  # but its full path may be '/mnt/chrome_src/src/'
  # using this offset will ignore the part '/mnt/chrome_src/src'.
  # Offset is the last '/' that isn't the last character for the
  # first directory name in results (position 1 in keys).
  offset = len(keys[1][:keys[1][:-1].rfind('/')])
  lines = []
  for key in keys:
    if len(key) > offset:
      node_path = key[offset:]
    else:
      node_path = key
    (total, covered) = results[key]
    percent = float(covered) * 100 / total
    lines.append('%s,%.2f' % (node_path, percent))
  return lines


def AddResults(results, location, lines_total, lines_executed):
  """Add resulting line tallies to a location's total.

  Args:
    results: Map of node location to corresponding coverage data.
    location: Source node string.
    lines_total: Number of lines to add to the total count for this node.
    lines_executed: Number of lines to add to the executed count for this node.
  """
  if results.has_key(location):
    (i, e) = results[location]
    results[location] = (i + lines_total, e + lines_executed)
  else:
    results[location] = (lines_total, lines_executed)


def PostResultsToDashboard(lcov_path, results, post_url):
  """Post coverage results to coverage dashboard.

  Args:
    lcov_path: File path for lcov data in the expected format:
        <project>_<platform>_<cl#>.coverage.lcov
    results: string list in the appropriate posting format.
  """
  project_platform_cl = lcov_path.split('.')[0].split('_')
  assert(len(project_platform_cl) == 3,
         'lcov_path not in expected format: %s' % lcov_path)
  (project, platform, cl_string) = project_platform_cl
  project_name = '%s-%s' % (project, platform)
  url = '%s/newdata.do?project=%s&cl=%s' % (post_url, project_name, cl_string)

  # Send POSTs of POST_CHUNK_SIZE lines of the result set until
  # there is no more data and last_loop is set to True.
  last_loop = False
  cur_line = 0
  while not last_loop:
    body = '\n'.join(results[cur_line:cur_line + POST_CHUNK_SIZE])
    cur_line += POST_CHUNK_SIZE
    last_loop = (cur_line >= len(results))
    req = urllib2.Request('%s&last=%s' % (url, str(last_loop)), body)
    req.add_header('Content-Type', 'text/plain')
    SendPost(req)


# Global counter for the current number of request failures.
num_fails = 0

def SendPost(req):
  """Execute a post request and retry for up to MAX_FAILURES.

  Args:
    req: A urllib2 request object.

  Raises:
    URLError: If urlopen throws after too many retries.
    HTTPError: If urlopen throws after too many retries.
  """
  global num_fails
  try:
    urllib2.urlopen(req)
    # Reset failure count.
    num_fails = 0
  except (urllib2.URLError, urllib2.HTTPError):
    num_fails += 1
    if num_fails < MAX_FAILURES:
      print 'fail, retrying (%d)' % num_fails
      time.sleep(5)
      SendPost(req)
    else:
      print 'POST request exceeded allowed retries.'
      raise


def main():
  if not sys.platform.startswith('linux'):
    print 'This script is supported only on Linux'
    return 0

  # Command line parsing
  parser = optparse.OptionParser()
  parser.add_option('-p',
                    '--platform',
                    dest='platform',
                    default=None,
                    help=('Platform that the locv file was generated on. Must'
                          'be one of {win32, linux2, linux3, macosx}'))
  parser.add_option('-s',
                    '--source',
                    dest='src_dir',
                    default=None,
                    help='Path to the source code and symbols')
  parser.add_option('-d',
                    '--dash_root',
                    dest='dash_root',
                    default=None,
                    help='Root directory for the dashboard')
  parser.add_option('-l',
                    '--lcov',
                    dest='lcov_path',
                    default=None,
                    help='Location of the LCOV file to process')
  parser.add_option('-u',
                    '--post_url',
                    dest='post_url',
                    default=None,
                    help='Base URL of the coverage dashboard')
  (options, args) = parser.parse_args()

  if options.platform == None:
    parser.error('Platform not specified')
  if options.lcov_path == None:
    parser.error('lcov file path not specified')
  if options.src_dir == None:
    parser.error('Source directory not specified')
  if options.dash_root == None:
    parser.error('Dashboard root not specified')
  if options.post_url == None:
    parser.error('Post URL not specified')
  if options.platform == 'win32':
    CleanWin32Lcov(options.lcov_path, options.src_dir)
    percent = GenerateHtml(options.lcov_path, options.dash_root)
    if percent == None:
      # TODO(niranjan): Add logging.
      print 'Failed to generate code coverage'
      return 1
    else:
      # TODO(niranjan): Do something with the code coverage numbers
      pass
  else:
    print 'Unsupported platform'
    return 1

  # Prep coverage results for dashboard and post new set.
  parsed_data = ParseCoverageDataForDashboard(options.lcov_path)
  PostResultsToDashboard(options.lcov_path, parsed_data, options.post_url)
  return 0


if __name__ == '__main__':
  sys.exit(main())