shaka-packager/tools/win/split_link/split_link.py

421 lines
14 KiB
Python
Raw Normal View History

# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Takes the same arguments as Windows link.exe, and a definition of libraries
to split into subcomponents. Does multiple passes of link.exe invocation to
determine exports between parts and generates .def and import libraries to
cause symbols to be available to other parts."""
import _winreg
import ctypes
import os
import re
import shutil
import subprocess
import sys
import tempfile
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# This can be set to ignore data exports. The resulting DLLs will probably not
# run, but at least they can be generated. The log of data exports will still
# be output.
IGNORE_DATA = 0
def Log(message):
print 'split_link:', message
def GetFlagsAndInputs(argv):
"""Parses the command line intended for link.exe and return the flags and
input files."""
rsp_expanded = []
for arg in argv:
if arg[0] == '@':
with open(arg[1:]) as rsp:
rsp_expanded.extend(rsp.read().splitlines())
else:
rsp_expanded.append(arg)
# Use CommandLineToArgvW so we match link.exe parsing.
try:
size = ctypes.c_int()
ptr = ctypes.windll.shell32.CommandLineToArgvW(
ctypes.create_unicode_buffer(' '.join(rsp_expanded)),
ctypes.byref(size))
ref = ctypes.c_wchar_p * size.value
raw = ref.from_address(ptr)
args = [arg for arg in raw]
finally:
ctypes.windll.kernel32.LocalFree(ptr)
inputs = []
flags = []
intermediate_manifest = ''
for arg in args:
lower_arg = arg.lower()
# We'll be replacing these ourselves.
if lower_arg.startswith('/out:'):
continue
if lower_arg.startswith('/manifestfile:'):
intermediate_manifest = arg[arg.index(':')+1:]
continue
if lower_arg.startswith('/pdb:'):
continue
if (not lower_arg.startswith('/') and
lower_arg.endswith(('.obj', '.lib', '.res'))):
inputs.append(arg)
else:
flags.append(arg)
return flags, inputs, intermediate_manifest
def GetRegistryValue(subkey):
try:
val = _winreg.QueryValue(_winreg.HKEY_CURRENT_USER,
'Software\\Chromium\\' + subkey)
if os.path.exists(val):
return val
except WindowsError:
pass
raise SystemExit("Couldn't read from registry")
def GetOriginalLinkerPath():
return GetRegistryValue('split_link_installed')
def GetMtPath():
return GetRegistryValue('split_link_mt_path')
def PartFor(input_file, description_parts, description_all):
"""Determines which part a given link input should be put into (or all)."""
# Check if it should go in all parts.
input_file = input_file.lower()
if any(re.search(spec, input_file) for spec in description_all):
return -1
# Or pick which particular one it belongs in.
for i, spec_list in enumerate(description_parts):
if any(re.search(spec, input_file) for spec in spec_list):
return i
raise ValueError("couldn't find location for %s" % input_file)
def ParseOutExternals(output):
"""Given the stdout of link.exe, parses the error messages to retrieve all
symbols that are unresolved."""
result = set()
# Styles of messages for unresolved externals, and a boolean to indicate
# whether the error message emits the symbols with or without a leading
# underscore.
unresolved_regexes = [
(re.compile(r' : error LNK2019: unresolved external symbol ".*" \((.*)\)'
r' referenced in function'),
False),
(re.compile(r' : error LNK2001: unresolved external symbol ".*" \((.*)\)$'),
False),
(re.compile(r' : error LNK2019: unresolved external symbol (.*)'
r' referenced in function '),
True),
(re.compile(r' : error LNK2001: unresolved external symbol (.*)$'),
True),
]
for line in output.splitlines():
line = line.strip()
for regex, strip_leading_underscore in unresolved_regexes:
mo = regex.search(line)
if mo:
if strip_leading_underscore:
result.add(mo.group(1)[1:])
else:
result.add(mo.group(1))
break
mo = re.search(r'fatal error LNK1120: (\d+) unresolved externals', output)
# Make sure we have the same number that the linker thinks we have.
if mo is None and result:
raise SystemExit(output)
if len(result) != int(mo.group(1)):
print output
print 'Expecting %d, got %d' % (int(mo.group(1)), len(result))
assert len(result) == int(mo.group(1))
return sorted(result)
def AsCommandLineArgs(items):
"""Intended for output to a response file. Quotes all arguments."""
return '\n'.join('"' + x + '"' for x in items)
def OutputNameForIndex(index):
"""Gets the final output DLL name, given a zero-based index."""
if index == 0:
return "chrome.dll"
else:
return 'chrome%d.dll' % index
def ManifestNameForIndex(index):
return OutputNameForIndex(index) + '.intermediate.manifest'
def PdbNameForIndex(index):
return OutputNameForIndex(index) + '.pdb'
def RunLinker(flags, index, inputs, phase, intermediate_manifest):
"""Invokes the linker and returns the stdout, returncode and target name."""
rspfile = 'part%d_%s.rsp' % (index, phase)
with open(rspfile, 'w') as f:
print >> f, AsCommandLineArgs(inputs)
print >> f, AsCommandLineArgs(flags)
output_name = OutputNameForIndex(index)
manifest_name = ManifestNameForIndex(index)
print >> f, '/ENTRY:ChromeEmptyEntry@12'
print >> f, '/OUT:' + output_name
print >> f, '/MANIFESTFILE:' + manifest_name
print >> f, '/PDB:' + PdbNameForIndex(index)
# Log('[[[\n' + open(rspfile).read() + '\n]]]')
link_exe = GetOriginalLinkerPath()
popen = subprocess.Popen([link_exe, '@' + rspfile], stdout=subprocess.PIPE)
stdout, _ = popen.communicate()
if index == 0 and popen.returncode == 0 and intermediate_manifest:
# Hack for ninja build. After the linker runs, it does some manifest
# things and expects there to be a file in this location. We just put it
# there so it's happy. This is a no-op.
if os.path.isdir(os.path.dirname(intermediate_manifest)):
shutil.copyfile(manifest_name, intermediate_manifest)
return stdout, popen.returncode, output_name
def GetLibObjList(lib):
"""Gets the list of object files contained in a .lib."""
link_exe = GetOriginalLinkerPath()
popen = subprocess.Popen(
[link_exe, '/lib', '/nologo', '/list', lib], stdout=subprocess.PIPE)
stdout, _ = popen.communicate()
return stdout.splitlines()
def ExtractObjFromLib(lib, obj):
"""Extracts a .obj file contained in a .lib file. Returns the absolute path
a temp file."""
link_exe = GetOriginalLinkerPath()
temp = tempfile.NamedTemporaryFile(
prefix='split_link_', suffix='.obj', delete=False)
temp.close()
subprocess.check_call([
link_exe, '/lib', '/nologo', '/extract:' + obj, lib, '/out:' + temp.name])
return temp.name
def Unmangle(export):
"Returns the human-presentable name of a mangled symbol."""
# Use dbghelp.dll to demangle the name.
# TODO(scottmg): Perhaps a simple cache? Seems pretty fast though.
UnDecorateSymbolName = ctypes.windll.dbghelp.UnDecorateSymbolName
buffer_size = 2048
output_string = ctypes.create_string_buffer(buffer_size)
if not UnDecorateSymbolName(
export, ctypes.byref(output_string), buffer_size, 0):
raise ctypes.WinError()
return output_string.value
def IsDataDefinition(export):
"""Determines if a given name is data rather than a function. Always returns
False for C-style (as opposed to C++-style names)."""
if export[0] != '?':
return False
# If it contains a '(' we assume it's a function.
return '(' not in Unmangle(export)
def GenerateDefFiles(unresolved_by_part):
"""Given a list of unresolved externals, generates a .def file that will
cause all those symbols to be exported."""
deffiles = []
Log('generating .def files')
for i, part in enumerate(unresolved_by_part):
deffile = 'part%d.def' % i
with open(deffile, 'w') as f:
print >> f, 'LIBRARY %s' % OutputNameForIndex(i)
print >> f, 'EXPORTS'
for j, part in enumerate(unresolved_by_part):
if i == j:
continue
is_data = \
[' DATA' if IsDataDefinition(export) and not IGNORE_DATA else ''
for export in part]
print >> f, '\n'.join(' ' + export + data
for export, data in zip(part, is_data))
deffiles.append(deffile)
return deffiles
def BuildImportLibs(flags, inputs_by_part, deffiles):
"""Runs the linker to generate an import library."""
import_libs = []
Log('building import libs')
for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
libfile = 'part%d.lib' % i
flags_with_implib_and_deffile = flags + ['/IMPLIB:%s' % libfile,
'/DEF:%s' % deffile]
RunLinker(flags_with_implib_and_deffile, i, inputs, 'implib', None)
import_libs.append(libfile)
return import_libs
def AttemptLink(flags, inputs_by_part, unresolved_by_part, deffiles,
import_libs, intermediate_manifest):
"""Tries to run the linker for all parts using the current round of
generated import libs and .def files. If the link fails, updates the
unresolved externals list per part."""
dlls = []
all_succeeded = True
new_externals = []
Log('unresolveds now: %r' % [len(part) for part in unresolved_by_part])
for i, (inputs, deffile) in enumerate(zip(inputs_by_part, deffiles)):
Log('running link, part %d' % i)
others_implibs = import_libs[:]
others_implibs.pop(i)
inputs_with_implib = inputs + filter(lambda x: x, others_implibs)
if deffile:
flags = flags + ['/DEF:%s' % deffile, '/LTCG']
stdout, rc, output = RunLinker(
flags, i, inputs_with_implib, 'final', intermediate_manifest)
if rc != 0:
all_succeeded = False
new_externals.append(ParseOutExternals(stdout))
else:
new_externals.append([])
dlls.append(output)
combined_externals = [sorted(set(prev) | set(new))
for prev, new in zip(unresolved_by_part, new_externals)]
return all_succeeded, dlls, combined_externals
def ExtractSubObjsTargetedAtAll(
inputs,
num_parts,
description_parts,
description_all,
description_all_from_libs):
"""For (lib, obj) tuples in the all_from_libs section, extract the obj out of
the lib and added it to inputs. Returns a list of lists for which part the
extracted obj belongs in (which is whichever the .lib isn't in)."""
by_parts = [[] for _ in range(num_parts)]
for lib_spec, obj_spec in description_all_from_libs:
for input_file in inputs:
if re.search(lib_spec, input_file):
objs = GetLibObjList(input_file)
match_count = 0
for obj in objs:
if re.search(obj_spec, obj, re.I):
extracted_obj = ExtractObjFromLib(input_file, obj)
#Log('extracted %s (%s %s)' % (extracted_obj, input_file, obj))
i = PartFor(input_file, description_parts, description_all)
if i == -1:
raise SystemExit(
'%s is already in all parts, but matched '
'%s in all_from_libs' % (input_file, obj))
# See note in main().
assert num_parts == 2, "Can't handle > 2 dlls currently"
by_parts[1 - i].append(obj)
match_count += 1
if match_count == 0:
raise SystemExit(
'%s, %s matched a lib, but no objs' % (lib_spec, obj_spec))
return by_parts
def main():
flags, inputs, intermediate_manifest = GetFlagsAndInputs(sys.argv[1:])
partition_file = os.path.normpath(
os.path.join(BASE_DIR, '../../../build/split_link_partition.py'))
with open(partition_file) as partition:
description = eval(partition.read())
inputs_by_part = []
description_parts = description['parts']
# We currently assume that if a symbol isn't in dll 0, then it's in dll 1
# when generating def files. Otherwise, we'd need to do more complex things
# to figure out where each symbol actually is to assign it to the correct
# .def file.
num_parts = len(description_parts)
assert num_parts == 2, "Can't handle > 2 dlls currently"
description_parts.reverse()
objs_from_libs = ExtractSubObjsTargetedAtAll(
inputs,
num_parts,
description_parts,
description['all'],
description['all_from_libs'])
objs_from_libs.reverse()
inputs_by_part = [[] for _ in range(num_parts)]
for input_file in inputs:
i = PartFor(input_file, description_parts, description['all'])
if i == -1:
for part in inputs_by_part:
part.append(input_file)
else:
inputs_by_part[i].append(input_file)
inputs_by_part.reverse()
# Put the subobjs on to the main list.
for i, part in enumerate(objs_from_libs):
Log('%d sub .objs added to part %d' % (len(part), i))
inputs_by_part[i].extend(part)
unresolved_by_part = [[] for _ in range(num_parts)]
import_libs = [None] * num_parts
deffiles = [None] * num_parts
data_exports = 0
for i in range(5):
Log('--- starting pass %d' % i)
ok, dlls, unresolved_by_part = AttemptLink(
flags, inputs_by_part, unresolved_by_part, deffiles, import_libs,
intermediate_manifest)
if ok:
break
data_exports = 0
for i, part in enumerate(unresolved_by_part):
for export in part:
if IsDataDefinition(export):
print 'part %d contains data export: %s (aka %s)' % (
i, Unmangle(export), export)
data_exports += 1
deffiles = GenerateDefFiles(unresolved_by_part)
import_libs = BuildImportLibs(flags, inputs_by_part, deffiles)
else:
if data_exports and not IGNORE_DATA:
print '%d data exports found, see report above.' % data_exports
print('These cannot be exported, and must be either duplicated to the '
'target DLL (if constant), or wrapped in a function.')
return 1
mt_exe = GetMtPath()
for i, dll in enumerate(dlls):
Log('embedding manifest in %s' % dll)
args = [mt_exe, '-nologo', '-manifest']
args.append(ManifestNameForIndex(i))
args.append(description['manifest'])
args.append('-outputresource:%s;2' % dll)
subprocess.check_call(args)
Log('built %r' % dlls)
return 0
if __name__ == '__main__':
sys.exit(main())