153 lines
4.7 KiB
Python
153 lines
4.7 KiB
Python
|
#!/usr/bin/env python
|
||
|
#
|
||
|
# Copyright 2013 The Chromium Authors. All rights reserved.
|
||
|
# Use of this source code is governed by a BSD-style license that can be
|
||
|
# found in the LICENSE file.
|
||
|
|
||
|
import base64
|
||
|
import os
|
||
|
import sys
|
||
|
import re
|
||
|
|
||
|
from optparse import OptionParser
|
||
|
|
||
|
"""Extracts the list of resident symbols of a library loaded in a process.
|
||
|
|
||
|
This scripts combines the extended output of memdump for a given process
|
||
|
(obtained through memdump -x PID) and the symbol table of a .so loaded in that
|
||
|
process (obtained through nm -C lib-with-symbols.so), filtering out only those
|
||
|
symbols that, at the time of the snapshot, were resident in memory (that are,
|
||
|
the symbols which start address belongs to a mapped page of the .so which was
|
||
|
resident at the time of the snapshot).
|
||
|
The aim is to perform a "code coverage"-like profiling of a binary, intersecting
|
||
|
run-time information (list of resident pages) and debug symbols.
|
||
|
"""
|
||
|
|
||
|
_PAGE_SIZE = 4096
|
||
|
|
||
|
|
||
|
def _TestBit(word, bit):
|
||
|
assert(bit >= 0 and bit < 8)
|
||
|
return not not ((word >> bit) & 1)
|
||
|
|
||
|
|
||
|
def _HexAddr(addr):
|
||
|
return hex(addr)[2:].zfill(8)
|
||
|
|
||
|
|
||
|
def _GetResidentPagesSet(memdump_contents, lib_name, verbose):
|
||
|
"""Parses the memdump output and extracts the resident page set for lib_name.
|
||
|
Args:
|
||
|
memdump_contents: Array of strings (lines) of a memdump output.
|
||
|
lib_name: A string containing the name of the library.so to be matched.
|
||
|
verbose: Print a verbose header for each mapping matched.
|
||
|
|
||
|
Returns:
|
||
|
A set of resident pages (the key is the page index) for all the
|
||
|
mappings matching .*lib_name.
|
||
|
"""
|
||
|
resident_pages = set()
|
||
|
MAP_RX = re.compile(
|
||
|
r'^([0-9a-f]+)-([0-9a-f]+) ([\w-]+) ([0-9a-f]+) .* "(.*)" \[(.*)\]$')
|
||
|
for line in memdump_contents:
|
||
|
line = line.rstrip('\r\n')
|
||
|
if line.startswith('[ PID'):
|
||
|
continue
|
||
|
|
||
|
r = MAP_RX.match(line)
|
||
|
if not r:
|
||
|
sys.stderr.write('Skipping %s from %s\n' % (line, memdump_file))
|
||
|
continue
|
||
|
|
||
|
map_start = int(r.group(1), 16)
|
||
|
map_end = int(r.group(2), 16)
|
||
|
prot = r.group(3)
|
||
|
offset = int(r.group(4), 16)
|
||
|
assert(offset % _PAGE_SIZE == 0)
|
||
|
lib = r.group(5)
|
||
|
enc_bitmap = r.group(6)
|
||
|
|
||
|
if not lib.endswith(lib_name):
|
||
|
continue
|
||
|
|
||
|
bitmap = base64.b64decode(enc_bitmap)
|
||
|
map_pages_count = (map_end - map_start + 1) / _PAGE_SIZE
|
||
|
bitmap_pages_count = len(bitmap) * 8
|
||
|
|
||
|
if verbose:
|
||
|
print 'Found %s: mapped %d pages in mode %s @ offset %s.' % (
|
||
|
lib, map_pages_count, prot, _HexAddr(offset))
|
||
|
print ' Map range in the process VA: [%s - %s]. Len: %s' % (
|
||
|
_HexAddr(map_start),
|
||
|
_HexAddr(map_end),
|
||
|
_HexAddr(map_pages_count * _PAGE_SIZE))
|
||
|
print ' Corresponding addresses in the binary: [%s - %s]. Len: %s' % (
|
||
|
_HexAddr(offset),
|
||
|
_HexAddr(offset + map_end - map_start),
|
||
|
_HexAddr(map_pages_count * _PAGE_SIZE))
|
||
|
print ' Bitmap: %d pages' % bitmap_pages_count
|
||
|
print ''
|
||
|
|
||
|
assert(bitmap_pages_count >= map_pages_count)
|
||
|
for i in xrange(map_pages_count):
|
||
|
bitmap_idx = i / 8
|
||
|
bitmap_off = i % 8
|
||
|
if (bitmap_idx < len(bitmap) and
|
||
|
_TestBit(ord(bitmap[bitmap_idx]), bitmap_off)):
|
||
|
resident_pages.add(offset / _PAGE_SIZE + i)
|
||
|
return resident_pages
|
||
|
|
||
|
|
||
|
def main(argv):
|
||
|
NM_RX = re.compile(r'^([0-9a-f]+)\s+.*$')
|
||
|
|
||
|
parser = OptionParser()
|
||
|
parser.add_option("-r", "--reverse",
|
||
|
action="store_true", dest="reverse", default=False,
|
||
|
help="Print out non present symbols")
|
||
|
parser.add_option("-v", "--verbose",
|
||
|
action="store_true", dest="verbose", default=False,
|
||
|
help="Print out verbose debug information.")
|
||
|
|
||
|
(options, args) = parser.parse_args()
|
||
|
|
||
|
if len(args) != 3:
|
||
|
print 'Usage: %s [-v] memdump.file nm.file library.so' % (
|
||
|
os.path.basename(argv[0]))
|
||
|
return 1
|
||
|
|
||
|
memdump_file = args[0]
|
||
|
nm_file = args[1]
|
||
|
lib_name = args[2]
|
||
|
|
||
|
if memdump_file == '-':
|
||
|
memdump_contents = sys.stdin.readlines()
|
||
|
else:
|
||
|
memdump_contents = open(memdump_file, 'r').readlines()
|
||
|
resident_pages = _GetResidentPagesSet(memdump_contents,
|
||
|
lib_name,
|
||
|
options.verbose)
|
||
|
|
||
|
# Process the nm symbol table, filtering out the resident symbols.
|
||
|
nm_fh = open(nm_file, 'r')
|
||
|
for line in nm_fh:
|
||
|
line = line.rstrip('\r\n')
|
||
|
# Skip undefined symbols (lines with no address).
|
||
|
if line.startswith(' '):
|
||
|
continue
|
||
|
|
||
|
r = NM_RX.match(line)
|
||
|
if not r:
|
||
|
sys.stderr.write('Skipping %s from %s\n' % (line, nm_file))
|
||
|
continue
|
||
|
|
||
|
sym_addr = int(r.group(1), 16)
|
||
|
sym_page = sym_addr / _PAGE_SIZE
|
||
|
last_sym_matched = (sym_page in resident_pages)
|
||
|
if (sym_page in resident_pages) != options.reverse:
|
||
|
print line
|
||
|
return 0
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
sys.exit(main(sys.argv))
|