# Given a path to llvm-objdump and a directory tree, spider the directory tree # dumping every object file encountered with correct options needed to demangle # symbols in the object file, and collect statistics about failed / crashed # demanglings. Useful for stress testing the demangler against a large corpus # of inputs. from __future__ import print_function import argparse import functools import os import re import sys import subprocess import traceback from multiprocessing import Pool import multiprocessing args = None def parse_line(line): question = line.find('?') if question == -1: return None, None open_paren = line.find('(', question) if open_paren == -1: return None, None close_paren = line.rfind(')', open_paren) if open_paren == -1: return None, None mangled = line[question : open_paren] demangled = line[open_paren+1 : close_paren] return mangled.strip(), demangled.strip() class Result(object): def __init__(self): self.crashed = [] self.file = None self.nsymbols = 0 self.errors = set() self.nfiles = 0 class MapContext(object): def __init__(self): self.rincomplete = None self.rcumulative = Result() self.pending_objs = [] self.npending = 0 def process_file(path, objdump): r = Result() r.file = path popen_args = [objdump, '-t', '-demangle', path] p = subprocess.Popen(popen_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: r.crashed = [r.file] return r output = stdout.decode('utf-8') for line in output.splitlines(): mangled, demangled = parse_line(line) if mangled is None: continue r.nsymbols += 1 if "invalid mangled name" in demangled: r.errors.add(mangled) return r def add_results(r1, r2): r1.crashed.extend(r2.crashed) r1.errors.update(r2.errors) r1.nsymbols += r2.nsymbols r1.nfiles += r2.nfiles def print_result_row(directory, result): print("[{0} files, {1} crashes, {2} errors, {3} symbols]: '{4}'".format( result.nfiles, len(result.crashed), len(result.errors), result.nsymbols, directory)) def process_one_chunk(pool, chunk_size, objdump, context): objs = [] incomplete = False dir_results = {} ordered_dirs = [] while context.npending > 0 and len(objs) < chunk_size: this_dir = context.pending_objs[0][0] ordered_dirs.append(this_dir) re = Result() if context.rincomplete is not None: re = context.rincomplete context.rincomplete = None dir_results[this_dir] = re re.file = this_dir nneeded = chunk_size - len(objs) objs_this_dir = context.pending_objs[0][1] navail = len(objs_this_dir) ntaken = min(nneeded, navail) objs.extend(objs_this_dir[0:ntaken]) remaining_objs_this_dir = objs_this_dir[ntaken:] context.pending_objs[0] = (context.pending_objs[0][0], remaining_objs_this_dir) context.npending -= ntaken if ntaken == navail: context.pending_objs.pop(0) else: incomplete = True re.nfiles += ntaken assert(len(objs) == chunk_size or context.npending == 0) copier = functools.partial(process_file, objdump=objdump) mapped_results = list(pool.map(copier, objs)) for mr in mapped_results: result_dir = os.path.dirname(mr.file) result_entry = dir_results[result_dir] add_results(result_entry, mr) # It's only possible that a single item is incomplete, and it has to be the # last item. if incomplete: context.rincomplete = dir_results[ordered_dirs[-1]] ordered_dirs.pop() # Now ordered_dirs contains a list of all directories which *did* complete. for c in ordered_dirs: re = dir_results[c] add_results(context.rcumulative, re) print_result_row(c, re) def process_pending_files(pool, chunk_size, objdump, context): while context.npending >= chunk_size: process_one_chunk(pool, chunk_size, objdump, context) def go(): global args obj_dir = args.dir extensions = args.extensions.split(',') extensions = [x if x[0] == '.' else '.' + x for x in extensions] pool_size = 48 pool = Pool(processes=pool_size) try: nfiles = 0 context = MapContext() for root, dirs, files in os.walk(obj_dir): root = os.path.normpath(root) pending = [] for f in files: file, ext = os.path.splitext(f) if not ext in extensions: continue nfiles += 1 full_path = os.path.join(root, f) full_path = os.path.normpath(full_path) pending.append(full_path) # If this directory had no object files, just print a default # status line and continue with the next dir if len(pending) == 0: print_result_row(root, Result()) continue context.npending += len(pending) context.pending_objs.append((root, pending)) # Drain the tasks, `pool_size` at a time, until we have less than # `pool_size` tasks remaining. process_pending_files(pool, pool_size, args.objdump, context) assert(context.npending < pool_size); process_one_chunk(pool, pool_size, args.objdump, context) total = context.rcumulative nfailed = len(total.errors) nsuccess = total.nsymbols - nfailed ncrashed = len(total.crashed) if (nfailed > 0): print("Failures:") for m in sorted(total.errors): print(" " + m) if (ncrashed > 0): print("Crashes:") for f in sorted(total.crashed): print(" " + f) print("Summary:") spct = float(nsuccess)/float(total.nsymbols) fpct = float(nfailed)/float(total.nsymbols) cpct = float(ncrashed)/float(nfiles) print("Processed {0} object files.".format(nfiles)) print("{0}/{1} symbols successfully demangled ({2:.4%})".format(nsuccess, total.nsymbols, spct)) print("{0} symbols could not be demangled ({1:.4%})".format(nfailed, fpct)) print("{0} files crashed while demangling ({1:.4%})".format(ncrashed, cpct)) except: traceback.print_exc() pool.close() pool.join() if __name__ == "__main__": def_obj = 'obj' if sys.platform == 'win32' else 'o' parser = argparse.ArgumentParser(description='Demangle all symbols in a tree of object files, looking for failures.') parser.add_argument('dir', type=str, help='the root directory at which to start crawling') parser.add_argument('--objdump', type=str, help='path to llvm-objdump. If not specified ' + 'the tool is located as if by `which llvm-objdump`.') parser.add_argument('--extensions', type=str, default=def_obj, help='comma separated list of extensions to demangle (e.g. `o,obj`). ' + 'By default this will be `obj` on Windows and `o` otherwise.') args = parser.parse_args() multiprocessing.freeze_support() go()