shaka-packager/tools/valgrind/tsan_analyze.py

#!/usr/bin/env python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

# tsan_analyze.py

''' Given a ThreadSanitizer output file, parses errors and uniques them.'''

import gdb_helper

from collections import defaultdict
import hashlib
import logging
import optparse
import os
import re
import subprocess
import sys
import time

import common

# Global symbol table (ugh)
TheAddressTable = None

class _StackTraceLine(object):
  def __init__(self, line, address, binary):
    self.raw_line_ = line
    self.address = address
    self.binary = binary
  def __str__(self):
    global TheAddressTable
    file, line = TheAddressTable.GetFileLine(self.binary, self.address)
    if (file is None) or (line is None):
      return self.raw_line_
    else:
      return self.raw_line_.replace(self.binary, '%s:%s' % (file, line))

class TsanAnalyzer(object):
  ''' Given a set of ThreadSanitizer output files, parse all the errors out of
  them, unique them and output the results.'''

  LOAD_LIB_RE = re.compile('--[0-9]+-- ([^(:]*) \((0x[0-9a-f]+)\)')
  TSAN_LINE_RE = re.compile('==[0-9]+==\s*[#0-9]+\s*'
                            '([0-9A-Fa-fx]+):'
                            '(?:[^ ]* )*'
                            '([^ :\n]+)'
                            '')
  THREAD_CREATION_STR = ("INFO: T.* "
      "(has been created by T.* at this point|is program's main thread)")

  SANITY_TEST_SUPPRESSION = ("ThreadSanitizer sanity test "
                             "(ToolsSanityTest.DataRace)")
  TSAN_RACE_DESCRIPTION = "Possible data race"
  TSAN_WARNING_DESCRIPTION =  ("Unlocking a non-locked lock"
      "|accessing an invalid lock"
      "|which did not acquire this lock")
  RACE_VERIFIER_LINE = "Confirmed a race|unexpected race"
  TSAN_ASSERTION = "Assertion failed: "

  def __init__(self, source_dir, use_gdb=False):
    '''Reads in a set of files.

    Args:
      source_dir: Path to top of source tree for this build
    '''

    self._use_gdb = use_gdb
    self._cur_testcase = None

  def ReadLine(self):
    self.line_ = self.cur_fd_.readline()
    self.stack_trace_line_ = None
    if not self._use_gdb:
      return
    global TheAddressTable
    match = TsanAnalyzer.LOAD_LIB_RE.match(self.line_)
    if match:
      binary, ip = match.groups()
      TheAddressTable.AddBinaryAt(binary, ip)
      return
    match = TsanAnalyzer.TSAN_LINE_RE.match(self.line_)
    if match:
      address, binary_name = match.groups()
      stack_trace_line = _StackTraceLine(self.line_, address, binary_name)
      TheAddressTable.Add(stack_trace_line.binary, stack_trace_line.address)
      self.stack_trace_line_ = stack_trace_line

  def ReadSection(self):
    """ Example of a section:
    ==4528== WARNING: Possible data race: {{{
    ==4528==    T20 (L{}):
    ==4528==     #0  MyTest::Foo1
    ==4528==     #1  MyThread::ThreadBody
    ==4528==   Concurrent write happened at this point:
    ==4528==    T19 (L{}):
    ==4528==     #0  MyTest::Foo2
    ==4528==     #1  MyThread::ThreadBody
    ==4528== }}}
    ------- suppression -------
    {
      <Put your suppression name here>
      ThreadSanitizer:Race
      fun:MyTest::Foo1
      fun:MyThread::ThreadBody
    }
    ------- end suppression -------
    """
    result = [self.line_]
    if re.search("{{{", self.line_):
      while not re.search('}}}', self.line_):
        self.ReadLine()
        if self.stack_trace_line_ is None:
          result.append(self.line_)
        else:
          result.append(self.stack_trace_line_)
      self.ReadLine()
      if re.match('-+ suppression -+', self.line_):
        # We need to calculate the suppression hash and prepend a line like
        # "Suppression (error hash=#0123456789ABCDEF#):" so the buildbot can
        # extract the suppression snippet.
        supp = ""
        while not re.match('-+ end suppression -+', self.line_):
          self.ReadLine()
          supp += self.line_
        self.ReadLine()
        if self._cur_testcase:
          result.append("The report came from the `%s` test.\n" % \
                        self._cur_testcase)
        result.append("Suppression (error hash=#%016X#):\n" % \
                      (int(hashlib.md5(supp).hexdigest()[:16], 16)))
        result.append("  For more info on using suppressions see "
            "http://dev.chromium.org/developers/how-tos/using-valgrind/threadsanitizer#TOC-Suppressing-data-races\n")
        result.append(supp)
    else:
      self.ReadLine()

    return result

  def ReadTillTheEnd(self):
    result = [self.line_]
    while self.line_:
      self.ReadLine()
      result.append(self.line_)
    return result

  def ParseReportFile(self, filename):
    '''Parses a report file and returns a list of ThreadSanitizer reports.


    Args:
      filename: report filename.
    Returns:
      list of (list of (str iff self._use_gdb, _StackTraceLine otherwise)).
    '''
    ret = []
    self.cur_fd_ = open(filename, 'r')

    while True:
      # Read ThreadSanitizer reports.
      self.ReadLine()
      if not self.line_:
        break

      while True:
        tmp = []
        while re.search(TsanAnalyzer.RACE_VERIFIER_LINE, self.line_):
          tmp.append(self.line_)
          self.ReadLine()
        while re.search(TsanAnalyzer.THREAD_CREATION_STR, self.line_):
          tmp.extend(self.ReadSection())
        if re.search(TsanAnalyzer.TSAN_RACE_DESCRIPTION, self.line_):
          tmp.extend(self.ReadSection())
          ret.append(tmp)  # includes RaceVerifier and thread creation stacks
        elif (re.search(TsanAnalyzer.TSAN_WARNING_DESCRIPTION, self.line_) and
            not common.IsWindows()): # workaround for http://crbug.com/53198
          tmp.extend(self.ReadSection())
          ret.append(tmp)
        else:
          break

      tmp = []
      if re.search(TsanAnalyzer.TSAN_ASSERTION, self.line_):
        tmp.extend(self.ReadTillTheEnd())
        ret.append(tmp)
        break

      match = re.search("used_suppression:\s+([0-9]+)\s(.*)", self.line_)
      if match:
        count, supp_name = match.groups()
        count = int(count)
        self.used_suppressions[supp_name] += count
    self.cur_fd_.close()
    return ret

  def GetReports(self, files):
    '''Extracts reports from a set of files.

    Reads a set of files and returns a list of all discovered
    ThreadSanitizer race reports. As a side effect, populates
    self.used_suppressions with appropriate info.
    '''

    global TheAddressTable
    if self._use_gdb:
      TheAddressTable = gdb_helper.AddressTable()
    else:
      TheAddressTable = None
    reports = []
    self.used_suppressions = defaultdict(int)
    for file in files:
      reports.extend(self.ParseReportFile(file))
    if self._use_gdb:
      TheAddressTable.ResolveAll()
      # Make each line of each report a string.
      reports = map(lambda(x): map(str, x), reports)
    return [''.join(report_lines) for report_lines in reports]

  def Report(self, files, testcase, check_sanity=False):
    '''Reads in a set of files and prints ThreadSanitizer report.

    Args:
      files: A list of filenames.
      check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS
    '''

    # We set up _cur_testcase class-wide variable to avoid passing it through
    # about 5 functions.
    self._cur_testcase = testcase
    reports = self.GetReports(files)
    self._cur_testcase = None  # just in case, shouldn't be used anymore

    common.PrintUsedSuppressionsList(self.used_suppressions)


    retcode = 0
    if reports:
      sys.stdout.flush()
      sys.stderr.flush()
      logging.info("FAIL! Found %i report(s)" % len(reports))
      for report in reports:
        logging.info('\n' + report)
      sys.stdout.flush()
      retcode = -1

    # Report tool's insanity even if there were errors.
    if (check_sanity and
        TsanAnalyzer.SANITY_TEST_SUPPRESSION not in self.used_suppressions):
      logging.error("FAIL! Sanity check failed!")
      retcode = -3

    if retcode != 0:
      return retcode

    logging.info("PASS: No reports found")
    return 0


def main():
  '''For testing only. The TsanAnalyzer class should be imported instead.'''
  parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")
  parser.add_option("", "--source_dir",
                    help="path to top of source tree for this build"
                    "(used to normalize source paths in baseline)")

  (options, args) = parser.parse_args()
  if not args:
    parser.error("no filename specified")
  filenames = args

  logging.getLogger().setLevel(logging.INFO)
  analyzer = TsanAnalyzer(options.source_dir, use_gdb=True)
  return analyzer.Report(filenames, None)


if __name__ == '__main__':
  sys.exit(main())
Start with media/mp4, media/webm and base codes from Chromium. 2013-09-24 01:35:40 +00:00			`#!/usr/bin/env python`
			`# Copyright (c) 2011 The Chromium Authors. All rights reserved.`
			`# Use of this source code is governed by a BSD-style license that can be`
			`# found in the LICENSE file.`

			`# tsan_analyze.py`

			`''' Given a ThreadSanitizer output file, parses errors and uniques them.'''`

			`import gdb_helper`

			`from collections import defaultdict`
			`import hashlib`
			`import logging`
			`import optparse`
			`import os`
			`import re`
			`import subprocess`
			`import sys`
			`import time`

			`import common`

			`# Global symbol table (ugh)`
			`TheAddressTable = None`

			`class _StackTraceLine(object):`
			`def __init__(self, line, address, binary):`
			`self.raw_line_ = line`
			`self.address = address`
			`self.binary = binary`
			`def __str__(self):`
			`global TheAddressTable`
			`file, line = TheAddressTable.GetFileLine(self.binary, self.address)`
			`if (file is None) or (line is None):`
			`return self.raw_line_`
			`else:`
			`return self.raw_line_.replace(self.binary, '%s:%s' % (file, line))`

			`class TsanAnalyzer(object):`
			`''' Given a set of ThreadSanitizer output files, parse all the errors out of`
			`them, unique them and output the results.'''`

			`LOAD_LIB_RE = re.compile('--[0-9]+-- ([^(:]*) \((0x[0-9a-f]+)\)')`
			`TSAN_LINE_RE = re.compile('==[0-9]+==\s[#0-9]+\s'`
			`'([0-9A-Fa-fx]+):'`
			`'(?:[^ ]* )*'`
			`'([^ :\n]+)'`
			`'')`
			`THREAD_CREATION_STR = ("INFO: T.* "`
			`"(has been created by T.* at this point\|is program's main thread)")`

			`SANITY_TEST_SUPPRESSION = ("ThreadSanitizer sanity test "`
			`"(ToolsSanityTest.DataRace)")`
			`TSAN_RACE_DESCRIPTION = "Possible data race"`
			`TSAN_WARNING_DESCRIPTION = ("Unlocking a non-locked lock"`
			`"\|accessing an invalid lock"`
			`"\|which did not acquire this lock")`
			`RACE_VERIFIER_LINE = "Confirmed a race\|unexpected race"`
			`TSAN_ASSERTION = "Assertion failed: "`

			`def __init__(self, source_dir, use_gdb=False):`
			`'''Reads in a set of files.`

			`Args:`
			`source_dir: Path to top of source tree for this build`
			`'''`

			`self._use_gdb = use_gdb`
			`self._cur_testcase = None`

			`def ReadLine(self):`
			`self.line_ = self.cur_fd_.readline()`
			`self.stack_trace_line_ = None`
			`if not self._use_gdb:`
			`return`
			`global TheAddressTable`
			`match = TsanAnalyzer.LOAD_LIB_RE.match(self.line_)`
			`if match:`
			`binary, ip = match.groups()`
			`TheAddressTable.AddBinaryAt(binary, ip)`
			`return`
			`match = TsanAnalyzer.TSAN_LINE_RE.match(self.line_)`
			`if match:`
			`address, binary_name = match.groups()`
			`stack_trace_line = _StackTraceLine(self.line_, address, binary_name)`
			`TheAddressTable.Add(stack_trace_line.binary, stack_trace_line.address)`
			`self.stack_trace_line_ = stack_trace_line`

			`def ReadSection(self):`
			`""" Example of a section:`
			`==4528== WARNING: Possible data race: {{{`
			`==4528== T20 (L{}):`
			`==4528== #0 MyTest::Foo1`
			`==4528== #1 MyThread::ThreadBody`
			`==4528== Concurrent write happened at this point:`
			`==4528== T19 (L{}):`
			`==4528== #0 MyTest::Foo2`
			`==4528== #1 MyThread::ThreadBody`
			`==4528== }}}`
			`------- suppression -------`
			`{`
			`<Put your suppression name here>`
			`ThreadSanitizer:Race`
			`fun:MyTest::Foo1`
			`fun:MyThread::ThreadBody`
			`}`
			`------- end suppression -------`
			`"""`
			`result = [self.line_]`
			`if re.search("{{{", self.line_):`
			`while not re.search('}}}', self.line_):`
			`self.ReadLine()`
			`if self.stack_trace_line_ is None:`
			`result.append(self.line_)`
			`else:`
			`result.append(self.stack_trace_line_)`
			`self.ReadLine()`
			`if re.match('-+ suppression -+', self.line_):`
			`# We need to calculate the suppression hash and prepend a line like`
			`# "Suppression (error hash=#0123456789ABCDEF#):" so the buildbot can`
			`# extract the suppression snippet.`
			`supp = ""`
			`while not re.match('-+ end suppression -+', self.line_):`
			`self.ReadLine()`
			`supp += self.line_`
			`self.ReadLine()`
			`if self._cur_testcase:`
			result.append("The report came from the `%s` test.\n" % \
			`self._cur_testcase)`
			`result.append("Suppression (error hash=#%016X#):\n" % \`
			`(int(hashlib.md5(supp).hexdigest()[:16], 16)))`
			`result.append(" For more info on using suppressions see "`
			`"http://dev.chromium.org/developers/how-tos/using-valgrind/threadsanitizer#TOC-Suppressing-data-races\n")`
			`result.append(supp)`
			`else:`
			`self.ReadLine()`

			`return result`

			`def ReadTillTheEnd(self):`
			`result = [self.line_]`
			`while self.line_:`
			`self.ReadLine()`
			`result.append(self.line_)`
			`return result`

			`def ParseReportFile(self, filename):`
			`'''Parses a report file and returns a list of ThreadSanitizer reports.`


			`Args:`
			`filename: report filename.`
			`Returns:`
			`list of (list of (str iff self._use_gdb, _StackTraceLine otherwise)).`
			`'''`
			`ret = []`
			`self.cur_fd_ = open(filename, 'r')`

			`while True:`
			`# Read ThreadSanitizer reports.`
			`self.ReadLine()`
			`if not self.line_:`
			`break`

			`while True:`
			`tmp = []`
			`while re.search(TsanAnalyzer.RACE_VERIFIER_LINE, self.line_):`
			`tmp.append(self.line_)`
			`self.ReadLine()`
			`while re.search(TsanAnalyzer.THREAD_CREATION_STR, self.line_):`
			`tmp.extend(self.ReadSection())`
			`if re.search(TsanAnalyzer.TSAN_RACE_DESCRIPTION, self.line_):`
			`tmp.extend(self.ReadSection())`
			`ret.append(tmp) # includes RaceVerifier and thread creation stacks`
			`elif (re.search(TsanAnalyzer.TSAN_WARNING_DESCRIPTION, self.line_) and`
			`not common.IsWindows()): # workaround for http://crbug.com/53198`
			`tmp.extend(self.ReadSection())`
			`ret.append(tmp)`
			`else:`
			`break`

			`tmp = []`
			`if re.search(TsanAnalyzer.TSAN_ASSERTION, self.line_):`
			`tmp.extend(self.ReadTillTheEnd())`
			`ret.append(tmp)`
			`break`

			`match = re.search("used_suppression:\s+([0-9]+)\s(.*)", self.line_)`
			`if match:`
			`count, supp_name = match.groups()`
			`count = int(count)`
			`self.used_suppressions[supp_name] += count`
			`self.cur_fd_.close()`
			`return ret`

			`def GetReports(self, files):`
			`'''Extracts reports from a set of files.`

			`Reads a set of files and returns a list of all discovered`
			`ThreadSanitizer race reports. As a side effect, populates`
			`self.used_suppressions with appropriate info.`
			`'''`

			`global TheAddressTable`
			`if self._use_gdb:`
			`TheAddressTable = gdb_helper.AddressTable()`
			`else:`
			`TheAddressTable = None`
			`reports = []`
			`self.used_suppressions = defaultdict(int)`
			`for file in files:`
			`reports.extend(self.ParseReportFile(file))`
			`if self._use_gdb:`
			`TheAddressTable.ResolveAll()`
			`# Make each line of each report a string.`
			`reports = map(lambda(x): map(str, x), reports)`
			`return [''.join(report_lines) for report_lines in reports]`

			`def Report(self, files, testcase, check_sanity=False):`
			`'''Reads in a set of files and prints ThreadSanitizer report.`

			`Args:`
			`files: A list of filenames.`
			`check_sanity: if true, search for SANITY_TEST_SUPPRESSIONS`
			`'''`

			`# We set up _cur_testcase class-wide variable to avoid passing it through`
			`# about 5 functions.`
			`self._cur_testcase = testcase`
			`reports = self.GetReports(files)`
			`self._cur_testcase = None # just in case, shouldn't be used anymore`

			`common.PrintUsedSuppressionsList(self.used_suppressions)`


			`retcode = 0`
			`if reports:`
			`sys.stdout.flush()`
			`sys.stderr.flush()`
			`logging.info("FAIL! Found %i report(s)" % len(reports))`
			`for report in reports:`
			`logging.info('\n' + report)`
			`sys.stdout.flush()`
			`retcode = -1`

			`# Report tool's insanity even if there were errors.`
			`if (check_sanity and`
			`TsanAnalyzer.SANITY_TEST_SUPPRESSION not in self.used_suppressions):`
			`logging.error("FAIL! Sanity check failed!")`
			`retcode = -3`

			`if retcode != 0:`
			`return retcode`

			`logging.info("PASS: No reports found")`
			`return 0`


			`def main():`
			`'''For testing only. The TsanAnalyzer class should be imported instead.'''`
			`parser = optparse.OptionParser("usage: %prog [options] <files to analyze>")`
			`parser.add_option("", "--source_dir",`
			`help="path to top of source tree for this build"`
			`"(used to normalize source paths in baseline)")`

			`(options, args) = parser.parse_args()`
			`if not args:`
			`parser.error("no filename specified")`
			`filenames = args`

			`logging.getLogger().setLevel(logging.INFO)`
			`analyzer = TsanAnalyzer(options.source_dir, use_gdb=True)`
			`return analyzer.Report(filenames, None)`


			`if __name__ == '__main__':`
			`sys.exit(main())`