#!/usr/bin/env python3 import os import sys import time import optparse import logging import re import subprocess import urllib import urllib.parse from xml.dom import minidom # Magic name for mirror destination host DEST_KEYWORD = "DESTINATION" log = logging.getLogger(__file__) class MirrorConfig(object): def __init__(self, fname): self.fname = fname self.host_map = {} self.parse() def store_host_info(self, host, host_vars, host_proj_map, host_wildcard_map): if host and self.get_bool(host_vars.get("active", "true")): self.host_map[host] = (host_vars, host_proj_map, host_wildcard_map) def get_bool(self, val): val2 = val.lower() if val2 in ("true", "on", "1"): return True if val2 in ("false", "off", "0"): return False assert False, "Syntax error: boolean value expected, got: " + val def parse(self): in_file = open(self.fname) host = None host_vars = {} host_proj_map = {} host_wildcard_map = {} for l in in_file: l = l.strip() if not l: continue if l[0] == "#": continue if l[0] == "[": self.store_host_info( host, host_vars, host_proj_map, host_wildcard_map) host = l[1:-1] if not host.startswith(DEST_KEYWORD): assert "://" in host, "URL schema is required in " + l host_proj_map = {} host_wildcard_map = {} host_vars = {} else: fields = l.split("=", 1) fields = [f.strip() for f in fields] if fields[0][0] == "$": # Variable spec host_vars[fields[0][1:]] = fields[1] else: # Repository spec # If there's no explicit remapping, treat it as remapping # to the same name if len(fields) == 1: fields.append(fields[0]) stars = fields[0].count("*") assert stars <= 1, "Only one * allowed in pattern: " + l if stars: host_wildcard_map[fields[0]] = fields[1] else: host_proj_map[fields[0]] = fields[1] # Store last block self.store_host_info(host, host_vars, host_proj_map, host_wildcard_map) in_file.close() def get_hosts(self, substr_match=""): return sorted( [h for h in self.host_map.keys() if substr_match in h and not h.startswith(DEST_KEYWORD)]) def get_var(self, host, var, default=None): return self.host_map[host][0].get(var, default) def has_host(self, host): return host in self.host_map def get_mirror_repo(self, host, source_repo): """Get mirror path for a repo. By default it is equal to source path, but if exception was defined, it is different.""" if source_repo in self.host_map[host][1]: return self.host_map[host][1][source_repo] for pat, repl in self.host_map[host][2].items(): # Convert glob-style pattern to regexp, then use it # for match and substitution pat = pat.replace("*", "(.+)") m = re.match(pat, source_repo) if m: return repl.replace("*", m.group(1)) return source_repo def get_repo_map(self, host): "Get all mirror repo paths as dict (key - src path, value - dest path)" return self.host_map[host][1] def run_command(cmdline, favor_dry_run=True): if favor_dry_run and options.dry_run: log.info("Would run: %s", cmdline) else: log.debug("Running: %s", cmdline) os.system(cmdline) def scan_git_projects(basedir): git_repos = [] for root, dirs, files in os.walk(basedir): for d in dirs: if d.endswith(".git"): abspath = os.path.join(root, d) relpath = abspath[len(basedir) + 1:] git_repos.append((os.path.abspath(abspath), relpath)) return git_repos def get_manifest_projects(host, manifest): if manifest.startswith("mirror:"): manifest = manifest.split(":", 1)[1] manifest, rest = manifest.split("#", 1) branch, fname = rest.split(";", 1) manifest_repo = host_url_to_path(host) + "/" + manifest + ".git" if not os.path.isdir(manifest_repo): log.warn("Cannot get project list - manifest repo %s hasn't been mirrored yet", manifest_repo) return [] f = os.popen("GIT_DIR=%s git show %s:%s" % (manifest_repo, branch, fname), "r") data = f.read() f.close() dom = minidom.parseString(data) else: dom = minidom.parse(manifest) return [p.getAttribute("name") for p in dom.getElementsByTagName("project")] def get_cached(get_func, get_params_array, cache_id=None, cache_ttl=120): if cache_id is None: cache_id = get_params_array[0] cache_file = re.sub(r"[ :/?&]", "_", cache_id) + ".cache" cache_file = os.path.join('/tmp', cache_file) if os.path.exists(cache_file): age = time.time() - os.stat(cache_file).st_mtime if age < cache_ttl: log.debug("Using cached file %s", cache_file) f = open(cache_file) projects = [l.strip() for l in f] f.close() return projects result = get_func(*get_params_array) f = open(cache_file, "w") f.write("\n".join(result) + "\n") f.close() return result def get_gerrit_projects(gerrit_host): parts = gerrit_host.split(":", 1) if len(parts) == 1: parts.append("29418") ssh_identity_option = get_ssh_identity_file_option() cmd = "ssh %s -p %s %s gerrit ls-projects --all" % ( ssh_identity_option, parts[1], parts[0]) log.debug("Running command: %s", cmd) return [x.strip() for x in subprocess.check_output(cmd, shell=True).decode().split('\n') if x] def get_url_project_list(url): f = urllib.urlopen(url) projects = [l.strip() for l in f] f.close() return projects def get_ssh_identity_file_option(): """Return an identity file option to use in SSH command based on config.""" ssh_identity = conf.get_var(options.dest, "ssh_identity") if ssh_identity is not None: ssh_identity_option = '-i %s' % ssh_identity else: ssh_identity_option = '' return ssh_identity_option #HERE def get_project_map_for_a_host(host): """Get {src: dest} project map for a host. These can be either: 1. Manually specified projects (with destination remaps or no) 2. Project list got from Gerrit for that host (as specified by $gerrit) 3. Project list from Gerrit, with manually specified remaps applied. """ gerrit_host = conf.get_var(host, "gerrit") projects = [] if options.manifest: print("Warning: global --manifest option is deprecated, " "use per-host $manifest setting in mirror.conf") projects = get_manifest_projects(host, options.manifest) elif conf.get_var(host, "manifest"): manifest = conf.get_var(host, "manifest") projects = get_manifest_projects(host, manifest) elif conf.get_var(host, "project_list"): url = conf.get_var(host, "project_list") projects = get_cached(get_url_project_list, (url,)) elif gerrit_host: ssh_supported = conf.get_var(host, "gerrit_ssh", "true") if ssh_supported == "true": projects = get_cached(get_gerrit_projects, (gerrit_host,)) else: log.warn( "SSH access disabled for %s, cannot get project list", host) projects.extend(conf.get_repo_map(host).keys()) return dict((p, conf.get_mirror_repo(host, p)) for p in projects) def create_gerrit_projects(projects): gerrit_port = conf.get_var(options.dest, "gerrit_port") gerrit_host = conf.get_var(options.dest, "gerrit_host") ssh_identity_option = get_ssh_identity_file_option() for project in projects: run_command("ssh %s -p %s %s gerrit create-project %s" % (ssh_identity_option, gerrit_port, gerrit_host, project)) def clone_repos(host, basedir, projects): "Clone-mirror repos from remote server" new_repos = [] for p in projects: if os.path.exists(basedir + "/" + p + ".git"): log.debug("%s already exists, skipping", p) continue dir = os.path.dirname(p) dir = os.path.join(basedir, dir) try: os.makedirs(dir) except OSError: pass log.info("cd'ing to %s", dir) os.chdir(dir) # --depth=1 cmd = "git clone --mirror %s/%s.git" % (clean_host(host), p) run_command(cmd) os.chdir(basedir) new_repos.append(p + '.git') return new_repos def fetch_repos(git_repos): "Update locally present mirror repos from remote server" for abspath, relpath in git_repos: log.info("Fetching in " + relpath) os.chdir(abspath) run_command("git fetch --prune") def push_repos(host, git_repos, force=False): "Push-mirror locally present mirror repos to remote Gerrit git server" for abspath, relpath in git_repos: log.info("Pushing in " + relpath) projname = relpath[:-len(".git")] mirror_projname = conf.get_mirror_repo(host, projname) os.chdir(abspath) extra_opts = "" if force: extra_opts += " --force" if not options.debug: extra_opts += " --no-progress" # allow to override repo_root repo_root = conf.get_var( host, "repo_root", conf.get_var(options.dest, "repo_root") ) cmdline = "time git push%s %s/%s.git 'refs/heads/*' 'refs/tags/*'" % ( extra_opts, repo_root, mirror_projname) run_command(cmdline) def update_grok(manifest, repo_root, repo): log.debug('updating grok manifest for %s', repo) cmd = '/usr/local/bin/grok-manifest -n -m %s -t %s -n %s' % ( manifest, repo_root, repo) cmd = 'cd %s; %s' % (repo_root, cmd) run_command(cmd) def handle_updated_repos(conf, host, repos): manifest = conf.get_var(options.dest, 'grok_manifest') if manifest: root = conf.get_var(options.dest, 'grok_repo_root') for p in repos: projname = p[:-len(".git")] mirror_projname = conf.get_mirror_repo(host, projname) + '.git' update_grok(manifest, root, mirror_projname) def host_url_to_path(host): "Convert host url from config to mirror dir path." parts = host.rsplit("#", 1) host = parts[0] host_p = urllib.parse.urlparse(host.rstrip("/")) path_adjusted = host_p.path.replace("/", "__") ret = options.mirror_dir + "/" + host_p.netloc + path_adjusted if len(parts) > 1: ret += "#" + parts[1] return ret def clean_host(host): "Split any #N trailers from config host url." return host.rsplit("#", 1)[0] def check_args(optparser, args, expected): if len(args) != expected: optparser.error("Wrong number of arguments") # gerrit-mirror.py clone git://android.git.kernel.org aosp-projects.txt # gerrit-mirror.py fetch # gerrit-mirror.py push optparser = optparse.OptionParser(usage="""%prog ... Command: create - Create any new projects in destination Gerrit (based on config) clone - Clone any new projects from source upstreams (based on config) fetch - Fetch upstream updates (based on working copy) push - Push updates into destination (based on working copy) ls-upstream - List upstream projects (based on config)""") optparser.add_option("--mirror-dir", metavar="DIR", help="Mirror root directory") optparser.add_option("--config", default="mirror.conf", help="Config file to use (%default)") optparser.add_option("--manifest", help=("Use manifest for list of upstream projects " "(overrides $gerrit in config)")) optparser.add_option("--upstream", metavar="SUBSTR", default="", help="Process only upstreams matching SUBSTR") optparser.add_option("--dest", default="", help=("Use alternative destination as specified in " "mirror.conf")) optparser.add_option("--force", action="store_true", help="Pass --force to git commands where applicable") optparser.add_option("--dry-run", action="store_true", help="Don't make any changes") optparser.add_option("--debug", action="store_true", help="Enable debug logging") optparser.add_option("--limit", metavar="N", type="int", help="Process max N projects") optparser.add_option("--quiet", action="store_true", help="Limit logging to errors") options, args = optparser.parse_args(sys.argv[1:]) if len(args) < 1: optparser.error("Wrong number of arguments") log_level = logging.INFO if options.debug: log_level = logging.DEBUG elif options.quiet: log_level = logging.ERROR logging.basicConfig(level=log_level) if options.mirror_dir: options.mirror_dir = os.path.abspath(options.mirror_dir) conf = MirrorConfig(options.config) dest = options.dest if dest: dest = "-" + dest options.dest = DEST_KEYWORD + dest if args[0] == "create": check_args(optparser, args, 1) gerrit_host = conf.get_var(options.dest, "gerrit_host") gerrit_port = conf.get_var(options.dest, "gerrit_port") repo_root = conf.get_var(options.dest, "repo_root") if not gerrit_host and repo_root[0] == "/": # Create repository locally for host in conf.get_hosts(options.upstream): log.debug("=== Processing: %s ===", host) projects = get_project_map_for_a_host(host).values() for p in projects: path = "%s/%s.git" % (repo_root, p) if not os.path.exists(path): if not options.dry_run: os.makedirs(path) run_command("GIT_DIR=%s git init --bare" % path) sys.exit() if not gerrit_host: log.warn("Non-local repo_root: Don't know how to create projects") sys.exit() log.debug("Getting list of projects in target Gerrit %s:%s", gerrit_host, gerrit_port) existing_projects = get_cached( get_gerrit_projects, ("%s:%s" % (gerrit_host, gerrit_port),)) for host in conf.get_hosts(options.upstream): log.debug("=== Processing: %s ===", host) projects = get_project_map_for_a_host(host).values() for p in projects: if p not in existing_projects: create_gerrit_projects([p]) else: log.debug("Skipping creating %s - already exists", p) elif args[0] == "clone": check_args(optparser, args, 1) if not options.mirror_dir: optparser.error("--mirror-dir is required") for host in conf.get_hosts(options.upstream): log.debug("=== Processing: %s ===", host) projects = get_project_map_for_a_host(host).keys() host_p = urllib.parse.urlparse(host) new_repos = clone_repos(host, host_url_to_path(host), projects) handle_updated_repos(conf, host, new_repos) elif args[0] == "fetch": check_args(optparser, args, 1) if not options.mirror_dir: optparser.error("--mirror-dir is required") for host in conf.get_hosts(options.upstream): host_p = urllib.parse.urlparse(host) git_repos = scan_git_projects(host_url_to_path(host)) log.info("=== Processing: %s (%d repositories) ===", host, len(git_repos)) fetch_repos(git_repos) elif args[0] == "push": check_args(optparser, args, 1) if not options.mirror_dir: optparser.error("--mirror-dir is required") for host in conf.get_hosts(options.upstream): host_p = urllib.parse.urlparse(host) git_repos = scan_git_projects(host_url_to_path(host)) log.info("=== Processing: %s (%d repositories) ===", host, len(git_repos)) push_repos(host, git_repos, force=options.force) handle_updated_repos(conf, host, [x[1] for x in git_repos]) elif args[0] in ("upstream-ls", "ls-upstream"): check_args(optparser, args, 1) for host in conf.get_hosts(options.upstream): print("%s:" % host) projects = sorted( get_project_map_for_a_host(host).keys()) for p in projects: print(p) print else: optparser.error("Unknown command")