shaka-packager/tools/valgrind/scan-build.py

#!/usr/bin/env python
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import argparse
import errno
import os
import re
import sys
import urllib
import urllib2

# Where all the data lives.
ROOT_URL = "http://build.chromium.org/p/chromium.memory.fyi/builders"

# TODO(groby) - support multi-line search from the command line. Useful when
# scanning for classes of failures, see below.
SEARCH_STRING = """<p class=\"failure result\">
Failed memory test: content
</p>"""

# Location of the log cache.
CACHE_DIR = "buildlogs.tmp"

# If we don't find anything after searching |CUTOFF| logs, we're probably done.
CUTOFF = 100

def EnsurePath(path):
  """Makes sure |path| does exist, tries to create it if it doesn't."""
  try:
    os.makedirs(path)
  except OSError as exception:
    if exception.errno != errno.EEXIST:
      raise


class Cache(object):
  def __init__(self, root_dir):
    self._root_dir = os.path.abspath(root_dir)

  def _LocalName(self, name):
    """If name is a relative path, treat it as relative to cache root.
       If it is absolute and under cache root, pass it through.
       Otherwise, raise error.
    """
    if os.path.isabs(name):
      assert os.path.commonprefix([name, self._root_dir]) == self._root_dir
    else:
      name = os.path.join(self._root_dir, name)
    return name

  def _FetchLocal(self, local_name):
    local_name = self._LocalName(local_name)
    EnsurePath(os.path.dirname(local_name))
    if os.path.exists(local_name):
      f = open(local_name, 'r')
      return f.readlines();
    return None

  def _FetchRemote(self, remote_name):
    try:
      response = urllib2.urlopen(remote_name)
    except:
      print "Could not fetch", remote_name
      raise
    return response.read()

  def Update(self, local_name, remote_name):
    local_name = self._LocalName(local_name)
    EnsurePath(os.path.dirname(local_name))
    blob = self._FetchRemote(remote_name)
    f = open(local_name, "w")
    f.write(blob)
    return blob.splitlines()

  def FetchData(self, local_name, remote_name):
    result = self._FetchLocal(local_name)
    if result:
      return result
    # If we get here, the local cache does not exist yet. Fetch, and store.
    return self.Update(local_name, remote_name)


class Builder(object):
  def __init__(self, waterfall, name):
    self._name = name
    self._waterfall = waterfall

  def Name(self):
    return self._name

  def LatestBuild(self):
    return self._waterfall.GetLatestBuild(self._name)

  def GetBuildPath(self, build_num):
    return "%s/%s/builds/%d" % (
        self._waterfall._root_url, urllib.quote(self._name), build_num)

  def _FetchBuildLog(self, build_num):
    local_build_path = "builds/%s" % self._name
    local_build_file = os.path.join(local_build_path, "%d.log" % build_num)
    return self._waterfall._cache.FetchData(local_build_file,
                                            self.GetBuildPath(build_num))

  def _CheckLog(self, build_num, tester):
    log_lines = self._FetchBuildLog(build_num)
    return any(tester(line) for line in log_lines)

  def ScanLogs(self, tester):
    occurrences = []
    build = self.LatestBuild()
    no_results = 0
    while build != 0 and no_results < CUTOFF:
      if self._CheckLog(build, tester):
        occurrences.append(build)
      else:
        no_results = no_results + 1
      build = build - 1
    return occurrences


class Waterfall(object):
  def __init__(self, root_url, cache_dir):
    self._root_url = root_url
    self._builders = {}
    self._top_revision = {}
    self._cache = Cache(cache_dir)

  def Builders(self):
    return self._builders.values()

  def Update(self):
    self._cache.Update("builders", self._root_url)
    self.FetchInfo()

  def FetchInfo(self):
    if self._top_revision:
      return

    html = self._cache.FetchData("builders", self._root_url)

    """ Search for both builders and latest build number in HTML
    <td class="box"><a href="builders/<builder-name>"> identifies a builder
    <a href="builders/<builder-name>/builds/<build-num>"> is the latest build.
    """
    box_matcher = re.compile('.*a href[^>]*>([^<]*)\<')
    build_matcher = re.compile('.*a href=\"builders/(.*)/builds/([0-9]+)\".*')
    last_builder = ""
    for line in html:
      if 'a href="builders/' in line:
        if 'td class="box"' in line:
          last_builder = box_matcher.match(line).group(1)
          self._builders[last_builder] = Builder(self, last_builder)
        else:
          result = build_matcher.match(line)
          builder = result.group(1)
          assert builder == urllib.quote(last_builder)
          self._top_revision[last_builder] = int(result.group(2))

  def GetLatestBuild(self, name):
    self.FetchInfo()
    assert self._top_revision
    return self._top_revision[name]


class MultiLineChange(object):
  def __init__(self, lines):
    self._tracked_lines = lines
    self._current = 0

  def __call__(self, line):
    """ Test a single line against multi-line change.

    If it matches the currently active line, advance one line.
    If the current line is the last line, report a match.
    """
    if self._tracked_lines[self._current] in line:
      self._current = self._current + 1
      if self._current == len(self._tracked_lines):
        self._current = 0
        return True
    else:
      self._current = 0
    return False


def main(argv):
  # Create argument parser.
  parser = argparse.ArgumentParser()
  commands = parser.add_mutually_exclusive_group(required=True)
  commands.add_argument("--update", action='store_true')
  commands.add_argument("--find", metavar='search term')
  args = parser.parse_args()

  path = os.path.abspath(os.path.dirname(argv[0]))
  cache_path = os.path.join(path, CACHE_DIR)

  fyi = Waterfall(ROOT_URL, cache_path)

  if args.update:
    fyi.Update()
    for builder in fyi.Builders():
      print "Updating", builder.Name()
      builder.ScanLogs(lambda x:False)

  if args.find:
    tester = MultiLineChange(args.find.splitlines())
    fyi.FetchInfo()

    print "SCANNING FOR ", args.find
    for builder in fyi.Builders():
      print "Scanning", builder.Name()
      occurrences = builder.ScanLogs(tester)
      if occurrences:
        min_build = min(occurrences)
        path = builder.GetBuildPath(min_build)
        print "Earliest occurrence in build %d" % min_build
        print "Latest occurrence in build %d" % max(occurrences)
        print "Latest build: %d" % builder.LatestBuild()
        print path
        print "%d total" % len(occurrences)


if __name__ == "__main__":
  sys.exit(main(sys.argv))