#!/usr/bin/env python3 """ This script: - Builds clang with user-defined flags - Uses that clang to build an instrumented clang, which can be used to collect PGO samples - Builds a user-defined set of sources (default: clang) to act as a "benchmark" to generate a PGO profile - Builds clang once more with the PGO profile generated above This is a total of four clean builds of clang (by default). This may take a while. :) """ import argparse import collections import multiprocessing import os import shlex import shutil import subprocess import sys ### User configuration # If you want to use a different 'benchmark' than building clang, make this # function do what you want. out_dir is the build directory for clang, so all # of the clang binaries will live under "${out_dir}/bin/". Using clang in # ${out_dir} will magically have the profiles go to the right place. # # You may assume that out_dir is a freshly-built directory that you can reach # in to build more things, if you'd like. def _run_benchmark(env, out_dir, include_debug_info): """The 'benchmark' we run to generate profile data.""" target_dir = env.output_subdir('instrumentation_run') # `check-llvm` and `check-clang` are cheap ways to increase coverage. The # former lets us touch on the non-x86 backends a bit if configured, and the # latter gives us more C to chew on (and will send us through diagnostic # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }` # branches should still heavily be weighted in the not-taken direction, # since we built all of LLVM/etc). _build_things_in(env, out_dir, what=['check-llvm', 'check-clang']) # Building tblgen gets us coverage; don't skip it. (out_dir may also not # have them anyway, but that's less of an issue) cmake = _get_cmake_invocation_for_bootstrap_from( env, out_dir, skip_tablegens=False) if include_debug_info: cmake.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo') _run_fresh_cmake(env, cmake, target_dir) # Just build all the things. The more data we have, the better. _build_things_in(env, target_dir, what=['all']) ### Script class CmakeInvocation: _cflags = ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS'] _ldflags = [ 'CMAKE_EXE_LINKER_FLAGS', 'CMAKE_MODULE_LINKER_FLAGS', 'CMAKE_SHARED_LINKER_FLAGS', ] def __init__(self, cmake, maker, cmake_dir): self._prefix = [cmake, '-G', maker, cmake_dir] # Map of str -> (list|str). self._flags = {} for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags: self._flags[flag] = [] def add_new_flag(self, key, value): self.add_flag(key, value, allow_overwrites=False) def add_flag(self, key, value, allow_overwrites=True): if key not in self._flags: self._flags[key] = value return existing_value = self._flags[key] if isinstance(existing_value, list): existing_value.append(value) return if not allow_overwrites: raise ValueError('Invalid overwrite of %s requested' % key) self._flags[key] = value def add_cflags(self, flags): # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :) assert not isinstance(flags, str) for f in CmakeInvocation._cflags: self._flags[f].extend(flags) def add_ldflags(self, flags): assert not isinstance(flags, str) for f in CmakeInvocation._ldflags: self._flags[f].extend(flags) def to_args(self): args = self._prefix.copy() for key, value in sorted(self._flags.items()): if isinstance(value, list): # We preload all of the list-y values (cflags, ...). If we've # nothing to add, don't. if not value: continue value = ' '.join(value) arg = '-D' + key if value != '': arg += '=' + value args.append(arg) return args class Env: def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, dry_run): self.llvm_dir = llvm_dir self.use_make = use_make self.output_dir = output_dir self.default_cmake_args = default_cmake_args.copy() self.dry_run = dry_run def get_default_cmake_args_kv(self): return self.default_cmake_args.items() def get_cmake_maker(self): return 'Ninja' if not self.use_make else 'Unix Makefiles' def get_make_command(self): if self.use_make: return ['make', '-j{}'.format(multiprocessing.cpu_count())] return ['ninja'] def output_subdir(self, name): return os.path.join(self.output_dir, name) def has_llvm_subproject(self, name): if name == 'compiler-rt': subdir = 'projects/compiler-rt' elif name == 'clang': subdir = 'tools/clang' else: raise ValueError('Unknown subproject: %s' % name) return os.path.isdir(os.path.join(self.llvm_dir, subdir)) # Note that we don't allow capturing stdout/stderr. This works quite nicely # with dry_run. def run_command(self, cmd, cwd=None, check=False, silent_unless_error=False): cmd_str = ' '.join(shlex.quote(s) for s in cmd) print( 'Running `%s` in %s' % (cmd_str, shlex.quote(cwd or os.getcwd()))) if self.dry_run: return if silent_unless_error: stdout, stderr = subprocess.PIPE, subprocess.STDOUT else: stdout, stderr = None, None # Don't use subprocess.run because it's >= py3.5 only, and it's not too # much extra effort to get what it gives us anyway. popen = subprocess.Popen( cmd, stdin=subprocess.DEVNULL, stdout=stdout, stderr=stderr, cwd=cwd) stdout, _ = popen.communicate() return_code = popen.wait(timeout=0) if not return_code: return if silent_unless_error: print(stdout.decode('utf-8', 'ignore')) if check: raise subprocess.CalledProcessError( returncode=return_code, cmd=cmd, output=stdout, stderr=None) def _get_default_cmake_invocation(env): inv = CmakeInvocation( cmake='cmake', maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir) for key, value in env.get_default_cmake_args_kv(): inv.add_new_flag(key, value) return inv def _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=True): clang = os.path.join(out_dir, 'bin', 'clang') cmake = _get_default_cmake_invocation(env) cmake.add_new_flag('CMAKE_C_COMPILER', clang) cmake.add_new_flag('CMAKE_CXX_COMPILER', clang + '++') # We often get no value out of building new tblgens; the previous build # should have them. It's still correct to build them, just slower. def add_tablegen(key, binary): path = os.path.join(out_dir, 'bin', binary) # Check that this exists, since the user's allowed to specify their own # stage1 directory (which is generally where we'll source everything # from). Dry runs should hope for the best from our user, as well. if env.dry_run or os.path.exists(path): cmake.add_new_flag(key, path) if skip_tablegens: add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen') add_tablegen('CLANG_TABLEGEN', 'clang-tblgen') return cmake def _build_things_in(env, target_dir, what): cmd = env.get_make_command() + what env.run_command(cmd, cwd=target_dir, check=True) def _run_fresh_cmake(env, cmake, target_dir): if not env.dry_run: try: shutil.rmtree(target_dir) except FileNotFoundError: pass os.makedirs(target_dir, mode=0o755) cmake_args = cmake.to_args() env.run_command( cmake_args, cwd=target_dir, check=True, silent_unless_error=True) def _build_stage1_clang(env): target_dir = env.output_subdir('stage1') cmake = _get_default_cmake_invocation(env) _run_fresh_cmake(env, cmake, target_dir) _build_things_in(env, target_dir, what=['clang', 'llvm-profdata', 'profile']) return target_dir def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, output_file): llvm_profdata = os.path.join(stage1_dir, 'bin', 'llvm-profdata') if env.dry_run: profiles = [os.path.join(profile_dir, '*.profraw')] else: profiles = [ os.path.join(profile_dir, f) for f in os.listdir(profile_dir) if f.endswith('.profraw') ] cmd = [llvm_profdata, 'merge', '-output=' + output_file] + profiles env.run_command(cmd, check=True) def _build_instrumented_clang(env, stage1_dir): assert os.path.isabs(stage1_dir) target_dir = os.path.join(env.output_dir, 'instrumented') cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir) cmake.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR') # libcxx's configure step messes with our link order: we'll link # libclang_rt.profile after libgcc, and the former requires atexit from the # latter. So, configure checks fail. # # Since we don't need libcxx or compiler-rt anyway, just disable them. cmake.add_new_flag('LLVM_BUILD_RUNTIME', 'No') _run_fresh_cmake(env, cmake, target_dir) _build_things_in(env, target_dir, what=['clang', 'lld']) profiles_dir = os.path.join(target_dir, 'profiles') return target_dir, profiles_dir def _build_optimized_clang(env, stage1_dir, profdata_file): if not env.dry_run and not os.path.exists(profdata_file): raise ValueError('Looks like the profdata file at %s doesn\'t exist' % profdata_file) target_dir = os.path.join(env.output_dir, 'optimized') cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir) cmake.add_new_flag('LLVM_PROFDATA_FILE', os.path.abspath(profdata_file)) # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore # it. cmake.add_cflags(['-Wno-backend-plugin']) _run_fresh_cmake(env, cmake, target_dir) _build_things_in(env, target_dir, what=['clang']) return target_dir Args = collections.namedtuple('Args', [ 'do_optimized_build', 'include_debug_info', 'profile_location', 'stage1_dir', ]) def _parse_args(): parser = argparse.ArgumentParser( description='Builds LLVM and Clang with instrumentation, collects ' 'instrumentation profiles for them, and (optionally) builds things' 'with these PGO profiles. By default, it\'s assumed that you\'re ' 'running this from your LLVM root, and all build artifacts will be ' 'saved to $PWD/out.') parser.add_argument( '--cmake-extra-arg', action='append', default=[], help='an extra arg to pass to all cmake invocations. Note that this ' 'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will ' 'be passed as -DFOO=BAR. This may be specified multiple times.') parser.add_argument( '--dry-run', action='store_true', help='print commands instead of running them') parser.add_argument( '--llvm-dir', default='.', help='directory containing an LLVM checkout (default: $PWD)') parser.add_argument( '--no-optimized-build', action='store_true', help='disable the final, PGO-optimized build') parser.add_argument( '--out-dir', help='directory to write artifacts to (default: $llvm_dir/out)') parser.add_argument( '--profile-output', help='where to output the profile (default is $out/pgo_profile.prof)') parser.add_argument( '--stage1-dir', help='instead of having an initial build of everything, use the given ' 'directory. It is expected that this directory will have clang, ' 'llvm-profdata, and the appropriate libclang_rt.profile already built') parser.add_argument( '--use-debug-info-in-benchmark', action='store_true', help='use a regular build instead of RelWithDebInfo in the benchmark. ' 'This increases benchmark execution time and disk space requirements, ' 'but gives more coverage over debuginfo bits in LLVM and clang.') parser.add_argument( '--use-make', action='store_true', default=shutil.which('ninja') is None, help='use Makefiles instead of ninja') args = parser.parse_args() llvm_dir = os.path.abspath(args.llvm_dir) if args.out_dir is None: output_dir = os.path.join(llvm_dir, 'out') else: output_dir = os.path.abspath(args.out_dir) extra_args = {'CMAKE_BUILD_TYPE': 'Release'} for arg in args.cmake_extra_arg: if arg.startswith('-D'): arg = arg[2:] elif arg.startswith('-'): raise ValueError('Unknown not- -D arg encountered; you may need ' 'to tweak the source...') split = arg.split('=', 1) if len(split) == 1: key, val = split[0], '' else: key, val = split extra_args[key] = val env = Env( default_cmake_args=extra_args, dry_run=args.dry_run, llvm_dir=llvm_dir, output_dir=output_dir, use_make=args.use_make, ) if args.profile_output is not None: profile_location = args.profile_output else: profile_location = os.path.join(env.output_dir, 'pgo_profile.prof') result_args = Args( do_optimized_build=not args.no_optimized_build, include_debug_info=args.use_debug_info_in_benchmark, profile_location=profile_location, stage1_dir=args.stage1_dir, ) return env, result_args def _looks_like_llvm_dir(directory): """Arbitrary set of heuristics to determine if `directory` is an llvm dir. Errs on the side of false-positives.""" contents = set(os.listdir(directory)) expected_contents = [ 'CODE_OWNERS.TXT', 'cmake', 'docs', 'include', 'utils', ] if not all(c in contents for c in expected_contents): return False try: include_listing = os.listdir(os.path.join(directory, 'include')) except NotADirectoryError: return False return 'llvm' in include_listing def _die(*args, **kwargs): kwargs['file'] = sys.stderr print(*args, **kwargs) sys.exit(1) def _main(): env, args = _parse_args() if not _looks_like_llvm_dir(env.llvm_dir): _die('Looks like %s isn\'t an LLVM directory; please see --help' % env.llvm_dir) if not env.has_llvm_subproject('clang'): _die('Need a clang checkout at tools/clang') if not env.has_llvm_subproject('compiler-rt'): _die('Need a compiler-rt checkout at projects/compiler-rt') def status(*args): print(*args, file=sys.stderr) if args.stage1_dir is None: status('*** Building stage1 clang...') stage1_out = _build_stage1_clang(env) else: stage1_out = args.stage1_dir status('*** Building instrumented clang...') instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out) status('*** Running profdata benchmarks...') _run_benchmark(env, instrumented_out, args.include_debug_info) status('*** Generating profile...') _generate_instrumented_clang_profile(env, stage1_out, profile_dir, args.profile_location) print('Final profile:', args.profile_location) if args.do_optimized_build: status('*** Building PGO-optimized binaries...') optimized_out = _build_optimized_clang(env, stage1_out, args.profile_location) print('Final build directory:', optimized_out) if __name__ == '__main__': _main()