304 lines
10 KiB
Python
304 lines
10 KiB
Python
|
#!/usr/bin/env python
|
||
|
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
"""Wrapper script to help run clang tools across Chromium code.
|
||
|
|
||
|
How to use this tool:
|
||
|
If you want to run the tool across all Chromium code:
|
||
|
run_tool.py <tool> <path/to/compiledb>
|
||
|
|
||
|
If you only want to run the tool across just chrome/browser and content/browser:
|
||
|
run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
|
||
|
|
||
|
Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
|
||
|
information, which documents the entire automated refactoring flow in Chromium.
|
||
|
|
||
|
Why use this tool:
|
||
|
The clang tool implementation doesn't take advantage of multiple cores, and if
|
||
|
it fails mysteriously in the middle, all the generated replacements will be
|
||
|
lost.
|
||
|
|
||
|
Unfortunately, if the work is simply sharded across multiple cores by running
|
||
|
multiple RefactoringTools, problems arise when they attempt to rewrite a file at
|
||
|
the same time. To work around that, clang tools that are run using this tool
|
||
|
should output edits to stdout in the following format:
|
||
|
|
||
|
==== BEGIN EDITS ====
|
||
|
r:<file path>:<offset>:<length>:<replacement text>
|
||
|
r:<file path>:<offset>:<length>:<replacement text>
|
||
|
...etc...
|
||
|
==== END EDITS ====
|
||
|
|
||
|
Any generated edits are applied once the clang tool has finished running
|
||
|
across Chromium, regardless of whether some instances failed or not.
|
||
|
"""
|
||
|
|
||
|
import collections
|
||
|
import functools
|
||
|
import multiprocessing
|
||
|
import os.path
|
||
|
import subprocess
|
||
|
import sys
|
||
|
|
||
|
|
||
|
Edit = collections.namedtuple(
|
||
|
'Edit', ('edit_type', 'offset', 'length', 'replacement'))
|
||
|
|
||
|
|
||
|
def _GetFilesFromGit(paths = None):
|
||
|
"""Gets the list of files in the git repository.
|
||
|
|
||
|
Args:
|
||
|
paths: Prefix filter for the returned paths. May contain multiple entries.
|
||
|
"""
|
||
|
args = ['git', 'ls-files']
|
||
|
if paths:
|
||
|
args.extend(paths)
|
||
|
command = subprocess.Popen(args, stdout=subprocess.PIPE)
|
||
|
output, _ = command.communicate()
|
||
|
return output.splitlines()
|
||
|
|
||
|
|
||
|
def _ExtractEditsFromStdout(build_directory, stdout):
|
||
|
"""Extracts generated list of edits from the tool's stdout.
|
||
|
|
||
|
The expected format is documented at the top of this file.
|
||
|
|
||
|
Args:
|
||
|
build_directory: Directory that contains the compile database. Used to
|
||
|
normalize the filenames.
|
||
|
stdout: The stdout from running the clang tool.
|
||
|
|
||
|
Returns:
|
||
|
A dictionary mapping filenames to the associated edits.
|
||
|
"""
|
||
|
lines = stdout.splitlines()
|
||
|
start_index = lines.index('==== BEGIN EDITS ====')
|
||
|
end_index = lines.index('==== END EDITS ====')
|
||
|
edits = collections.defaultdict(list)
|
||
|
for line in lines[start_index + 1:end_index]:
|
||
|
try:
|
||
|
edit_type, path, offset, length, replacement = line.split(':', 4)
|
||
|
# Normalize the file path emitted by the clang tool to be relative to the
|
||
|
# current working directory.
|
||
|
path = os.path.relpath(os.path.join(build_directory, path))
|
||
|
edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
|
||
|
except ValueError:
|
||
|
print 'Unable to parse edit: %s' % line
|
||
|
return edits
|
||
|
|
||
|
|
||
|
def _ExecuteTool(toolname, build_directory, filename):
|
||
|
"""Executes the tool.
|
||
|
|
||
|
This is defined outside the class so it can be pickled for the multiprocessing
|
||
|
module.
|
||
|
|
||
|
Args:
|
||
|
toolname: Path to the tool to execute.
|
||
|
build_directory: Directory that contains the compile database.
|
||
|
filename: The file to run the tool over.
|
||
|
|
||
|
Returns:
|
||
|
A dictionary that must contain the key "status" and a boolean value
|
||
|
associated with it.
|
||
|
|
||
|
If status is True, then the generated edits are stored with the key "edits"
|
||
|
in the dictionary.
|
||
|
|
||
|
Otherwise, the filename and the output from stderr are associated with the
|
||
|
keys "filename" and "stderr" respectively.
|
||
|
"""
|
||
|
command = subprocess.Popen((toolname, '-p', build_directory, filename),
|
||
|
stdout=subprocess.PIPE,
|
||
|
stderr=subprocess.PIPE)
|
||
|
stdout, stderr = command.communicate()
|
||
|
if command.returncode != 0:
|
||
|
return {'status': False, 'filename': filename, 'stderr': stderr}
|
||
|
else:
|
||
|
return {'status': True,
|
||
|
'edits': _ExtractEditsFromStdout(build_directory, stdout)}
|
||
|
|
||
|
|
||
|
class _CompilerDispatcher(object):
|
||
|
"""Multiprocessing controller for running clang tools in parallel."""
|
||
|
|
||
|
def __init__(self, toolname, build_directory, filenames):
|
||
|
"""Initializer method.
|
||
|
|
||
|
Args:
|
||
|
toolname: Path to the tool to execute.
|
||
|
build_directory: Directory that contains the compile database.
|
||
|
filenames: The files to run the tool over.
|
||
|
"""
|
||
|
self.__toolname = toolname
|
||
|
self.__build_directory = build_directory
|
||
|
self.__filenames = filenames
|
||
|
self.__success_count = 0
|
||
|
self.__failed_count = 0
|
||
|
self.__edits = collections.defaultdict(list)
|
||
|
|
||
|
@property
|
||
|
def edits(self):
|
||
|
return self.__edits
|
||
|
|
||
|
@property
|
||
|
def failed_count(self):
|
||
|
return self.__failed_count
|
||
|
|
||
|
def Run(self):
|
||
|
"""Does the grunt work."""
|
||
|
pool = multiprocessing.Pool()
|
||
|
result_iterator = pool.imap_unordered(
|
||
|
functools.partial(_ExecuteTool, self.__toolname,
|
||
|
self.__build_directory),
|
||
|
self.__filenames)
|
||
|
for result in result_iterator:
|
||
|
self.__ProcessResult(result)
|
||
|
sys.stdout.write('\n')
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
def __ProcessResult(self, result):
|
||
|
"""Handles result processing.
|
||
|
|
||
|
Args:
|
||
|
result: The result dictionary returned by _ExecuteTool.
|
||
|
"""
|
||
|
if result['status']:
|
||
|
self.__success_count += 1
|
||
|
for k, v in result['edits'].iteritems():
|
||
|
self.__edits[k].extend(v)
|
||
|
else:
|
||
|
self.__failed_count += 1
|
||
|
sys.stdout.write('\nFailed to process %s\n' % result['filename'])
|
||
|
sys.stdout.write(result['stderr'])
|
||
|
sys.stdout.write('\n')
|
||
|
percentage = (
|
||
|
float(self.__success_count + self.__failed_count) /
|
||
|
len(self.__filenames)) * 100
|
||
|
sys.stdout.write('Succeeded: %d, Failed: %d [%.2f%%]\r' % (
|
||
|
self.__success_count, self.__failed_count, percentage))
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
|
||
|
def _ApplyEdits(edits, clang_format_diff_path):
|
||
|
"""Apply the generated edits.
|
||
|
|
||
|
Args:
|
||
|
edits: A dict mapping filenames to Edit instances that apply to that file.
|
||
|
clang_format_diff_path: Path to the clang-format-diff.py helper to help
|
||
|
automatically reformat diffs to avoid style violations. Pass None if the
|
||
|
clang-format step should be skipped.
|
||
|
"""
|
||
|
edit_count = 0
|
||
|
for k, v in edits.iteritems():
|
||
|
# Sort the edits and iterate through them in reverse order. Sorting allows
|
||
|
# duplicate edits to be quickly skipped, while reversing means that
|
||
|
# subsequent edits don't need to have their offsets updated with each edit
|
||
|
# applied.
|
||
|
v.sort()
|
||
|
last_edit = None
|
||
|
with open(k, 'rb+') as f:
|
||
|
contents = bytearray(f.read())
|
||
|
for edit in reversed(v):
|
||
|
if edit == last_edit:
|
||
|
continue
|
||
|
last_edit = edit
|
||
|
contents[edit.offset:edit.offset + edit.length] = edit.replacement
|
||
|
if not edit.replacement:
|
||
|
_ExtendDeletionIfElementIsInList(contents, edit.offset)
|
||
|
edit_count += 1
|
||
|
f.seek(0)
|
||
|
f.truncate()
|
||
|
f.write(contents)
|
||
|
if clang_format_diff_path:
|
||
|
if subprocess.call('git diff -U0 %s | python %s -style=Chromium' % (
|
||
|
k, clang_format_diff_path), shell=True) != 0:
|
||
|
print 'clang-format failed for %s' % k
|
||
|
print 'Applied %d edits to %d files' % (edit_count, len(edits))
|
||
|
|
||
|
|
||
|
_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
|
||
|
|
||
|
|
||
|
def _ExtendDeletionIfElementIsInList(contents, offset):
|
||
|
"""Extends the range of a deletion if the deleted element was part of a list.
|
||
|
|
||
|
This rewriter helper makes it easy for refactoring tools to remove elements
|
||
|
from a list. Even if a matcher callback knows that it is removing an element
|
||
|
from a list, it may not have enough information to accurately remove the list
|
||
|
element; for example, another matcher callback may end up removing an adjacent
|
||
|
list element, or all the list elements may end up being removed.
|
||
|
|
||
|
With this helper, refactoring tools can simply remove the list element and not
|
||
|
worry about having to include the comma in the replacement.
|
||
|
|
||
|
Args:
|
||
|
contents: A bytearray with the deletion already applied.
|
||
|
offset: The offset in the bytearray where the deleted range used to be.
|
||
|
"""
|
||
|
char_before = char_after = None
|
||
|
left_trim_count = 0
|
||
|
for byte in reversed(contents[:offset]):
|
||
|
left_trim_count += 1
|
||
|
if byte in _WHITESPACE_BYTES:
|
||
|
continue
|
||
|
if byte in (ord(','), ord(':'), ord('('), ord('{')):
|
||
|
char_before = chr(byte)
|
||
|
break
|
||
|
|
||
|
right_trim_count = 0
|
||
|
for byte in contents[offset:]:
|
||
|
right_trim_count += 1
|
||
|
if byte in _WHITESPACE_BYTES:
|
||
|
continue
|
||
|
if byte == ord(','):
|
||
|
char_after = chr(byte)
|
||
|
break
|
||
|
|
||
|
if char_before:
|
||
|
if char_after:
|
||
|
del contents[offset:offset + right_trim_count]
|
||
|
elif char_before in (',', ':'):
|
||
|
del contents[offset - left_trim_count:offset]
|
||
|
|
||
|
|
||
|
def main(argv):
|
||
|
if len(argv) < 2:
|
||
|
print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
|
||
|
print ' <clang tool> is the clang tool that should be run.'
|
||
|
print ' <compile db> is the directory that contains the compile database'
|
||
|
print ' <path 1> <path2> ... can be used to filter what files are edited'
|
||
|
return 1
|
||
|
|
||
|
clang_format_diff_path = os.path.join(
|
||
|
os.path.dirname(os.path.realpath(__file__)),
|
||
|
'../../../third_party/llvm/tools/clang/tools/clang-format',
|
||
|
'clang-format-diff.py')
|
||
|
# TODO(dcheng): Allow this to be controlled with a flag as well.
|
||
|
if not os.path.isfile(clang_format_diff_path):
|
||
|
clang_format_diff_path = None
|
||
|
|
||
|
filenames = frozenset(_GetFilesFromGit(argv[2:]))
|
||
|
# Filter out files that aren't C/C++/Obj-C/Obj-C++.
|
||
|
extensions = frozenset(('.c', '.cc', '.m', '.mm'))
|
||
|
dispatcher = _CompilerDispatcher(argv[0], argv[1],
|
||
|
[f for f in filenames
|
||
|
if os.path.splitext(f)[1] in extensions])
|
||
|
dispatcher.Run()
|
||
|
# Filter out edits to files that aren't in the git repository, since it's not
|
||
|
# useful to modify files that aren't under source control--typically, these
|
||
|
# are generated files or files in a git submodule that's not part of Chromium.
|
||
|
_ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
|
||
|
if k in filenames},
|
||
|
clang_format_diff_path)
|
||
|
if dispatcher.failed_count != 0:
|
||
|
return 2
|
||
|
return 0
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
sys.exit(main(sys.argv[1:]))
|