From: Ralf Jung Date: Sun, 22 Feb 2015 15:14:46 +0000 (+0100) Subject: add support for github -> local sync X-Git-Url: https://git.ralfj.de/git-mirror.git/commitdiff_plain/7373610f7eb8e516b6610370411b57a20e7af2a0?ds=sidebyside;hp=2e049a06806faee3cfd40bb36adeffddacce9620 add support for github -> local sync --- diff --git a/update.py b/update.py index 80334f5..8d2c207 100755 --- a/update.py +++ b/update.py @@ -1,69 +1,122 @@ #!/usr/bin/python3 import sys, os, subprocess, argparse +import configparser, itertools, json, re +import traceback +import email.mime.text, email.utils, smtplib class GitCommand: def __getattr__(self, name): - def call(*args, get_stderr = False): + def call(*args, capture_stderr = False, check = True): + '''If , return stderr merged with stdout. Otherwise, return stdout and forward stderr to our own. + If is true, throw an exception of the process fails with non-zero exit code. Otherwise, do not. + In any case, return a pair of the captured output and the exit code.''' cmd = ["git", name.replace('_', '-')] + list(args) - output = subprocess.check_output(cmd, stderr=subprocess.STDOUT if get_stderr else None) - return output.decode('utf-8').strip('\n') + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT if capture_stderr else None) as p: + (stdout, stderr) = p.communicate() + assert stderr is None + code = p.returncode + if check and code: + raise Exception("Error running {0}: Non-zero exit code".format(cmd)) + return (stdout.decode('utf-8').strip('\n'), code) return call - - def branches(self, *args): - b = self.branch(*args).split('\n') - b = map(lambda s: s[2:], b) - return list(b) git = GitCommand() -def is_all_zero(str): - return len(str.replace('0', '')) == 0 +def read_config(fname, defSection = 'DEFAULT'): + '''Reads a config file that may have options outside of any section.''' + config = configparser.ConfigParser() + with open(fname) as file: + stream = itertools.chain(("["+defSection+"]\n",), file) + config.read_file(stream) + return config + +def send_mail(subject, text, receivers, sender='post+webhook@ralfj.de', replyTo=None): + assert isinstance(receivers, list) + if not len(receivers): return # nothing to do + # construct content + msg = email.mime.text.MIMEText(text.encode('UTF-8'), 'plain', 'UTF-8') + msg['Subject'] = subject + msg['Date'] = email.utils.formatdate(localtime=True) + msg['From'] = sender + msg['To'] = ', '.join(receivers) + if replyTo is not None: + msg['Reply-To'] = replyTo + # put into envelope and send + s = smtplib.SMTP('localhost') + s.sendmail(sender, receivers, msg.as_string()) + s.quit() + +def get_github_payload(): + '''Reeturn the github-style JSON encoded payload (as if we were called as a github webhook)''' + try: + data = sys.stdin.buffer.read() + data = json.loads(data.decode('utf-8')) + return data + except: + return {} # nothing read class Repo: - def __init__(self, local, mirrors): - ''' is the directory containing the repository locally, a list of remote repositories''' - self.local = local - self.mirrors = mirrors + def __init__(self, conf): + '''Creates a repository from a section of the git-mirror configuration file''' + self.local = conf['local'] + self.mirrors = {} # maps mirrors to their URLs + mirror_prefix = 'mirror-' + for name in filter(lambda s: s.startswith(mirror_prefix), conf.keys()): + mirror = name[len(mirror_prefix):] + self.mirrors[mirror] = conf[name] -# This is old code, that may be useful again if we decide to care about racy pushes loosing commits. -# def pull(self, slavenr): -# slave = self.slaves[slavenr] -# slavename = "slave-"+str(slavenr) -# # make sure we have the remote -# try: -# git.remote("add", slavename, slave, get_stderr=True) -# except subprocess.CalledProcessError: # the remote already exists -# git.remote("set-url", slavename, slave) -# # get all the changes -# git.fetch(slavename, get_stderr=True) -# # merge them... or hope so... -# branches = git.branches("-r") -# for branch in filter(lambda s: s.startswith(slavename+"/"), branches): -# local = branch[len(slavename+"/"):] -# print(local, branch) + def find_mirror_by_url(self, match_urls): + for mirror, url in self.mirrors.items(): + if url in match_urls: + return mirror + return None - def update_mirror_ref(self, ref, mirror): - '''Update on to the local state. If is all-zero, the ref should be deleted.''' - git.push('--force', self.mirrors[mirror], ref) + def have_ref(self, ref, url=None): + '''Tests if a given ref exists, locally or (if the url is given) remotely''' + if url is None: + out, code = git.show_ref(ref, check = False) + if code and len(out): + raise Exception("Checking for a local ref failed") + else: + out, code = git.ls_remote(url, ref) + # the ref exists iff we have output + return len(out) > 0 + + def update_mirrors(self, ref, delete, exception = None, suppress_stderr = False): + '''Update on all mirrors except for to the local state, or delete it.''' + for mirror in self.mirrors: + if mirror == exception: + continue + # update this mirror + if not self.have_ref(ref): + # delete ref remotely + git.push(self.mirrors[mirror], ':'+ref, capture_stderr = suppress_stderr) + else: + # update ref remotely + git.push('--force', self.mirrors[mirror], ref, capture_stderr = suppress_stderr) - def update_ref(self, newsha, ref, source): - '''Update the to everywhere. is None if this update comes from the local repository, - or the name of a mirror. If is all-zero, the ref should be deleted.''' + def update_ref(self, ref, source, suppress_stderr = False): + '''Update the to its state in everywhere. is None to refer to the local repository, + or the name of a mirror.''' os.chdir(self.local) if source is None: # We already have the latest version locally. Update all the mirrors. - for mirror in self.mirrors: - self.update_mirror_ref(ref, mirror) + self.update_mirrors(ref, delete = not self.have_ref(ref), suppress_stderr = suppress_stderr) else: - raise Exception("Help, what should I do?") + # update our version of this ref. This may fail if the ref does not exist anymore. + url = self.mirrors[source] + if not self.have_ref(ref, url): + # delete ref locally + git.update_ref("-d", ref) + # and everywhere (except for the source) + self.update_mirrors(ref, delete = True, exception = source, suppress_stderr = suppress_stderr) + else: + # update local ref to remote state (yes, there's a race condition here - the ref could no longer exist by now) + git.fetch(url, ref+":"+ref) + # and everywhere else + self.update_mirrors(ref, delete = False, exception = source, suppress_stderr = suppress_stderr) -# for now, configuration is hard-coded here... - -repos = { - 'sync-test': Repo('/home/git/repositories/test.git', {'github': 'git@github.com:RalfJung/sync-test.git'}), -} - -def find_repo_by_directory(dir): +def find_repo_by_directory(repos, dir): for (name, repo) in repos.items(): if dir == repo.local: return name @@ -71,26 +124,71 @@ def find_repo_by_directory(dir): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Keep git repositories in sync') - parser.add_argument("--hook", - action="store_true", dest="hook", - help="Act as git hook: Auto-detect the repository based on the working directoy, and fetch information from stdin") + parser.add_argument("--git-hook", + action="store_true", dest="git_hook", + help="Act as git hook: Auto-detect the repository based on the working directoy, and fetch information from stdin the way git encodes it") + parser.add_argument("--web-hook", + action="store_true", dest="web_hook", + help="Act as github-style web hook: Repository has to be given explicitly, all the rest is read from stdin JSON form") parser.add_argument("-r", "--repository", dest="repository", help="The name of the repository to act on") args = parser.parse_args() + if args.git_hook and args.web_hook: + raise Exception("I cannot be two hooks at once.") - reponame = args.repository - if reponame is None and args.hook: - reponame = find_repo_by_directory(os.getcwd()) - if reponame is None: - raise Exception("Unable to detect repository, please use --repository.") - - # now sync this repository - repo = repos[reponame] - if args.hook: - # parse the information we get from stdin - for line in sys.stdin: - (oldsha, newsha, ref) = line.split() - repo.update_ref(newsha, ref, source=None) - else: - raise Exception("I am unsure what to do here.") + try: + # All arguments are *untrusted* input, as we may be called via sudo from the webserver. So we fix the configuration file location. + conffile = os.path.join(os.path.dirname(__file__), 'git-mirror.conf') + conf = read_config(conffile) + repos = {} + for name, section in conf.items(): + if name != 'DEFAULT': + repos[name] = Repo(section) + + # find the repository we are dealing with + reponame = args.repository + if reponame is None and args.git_hook: + reponame = find_repo_by_directory(repos, os.getcwd()) + if reponame is None or reponame not in repos: + raise Exception("Unknown or missing repository name.") + + # now sync this repository + repo = repos[reponame] + if args.git_hook: + # parse the information we get from stdin + for line in sys.stdin: + (oldsha, newsha, ref) = line.split() + repo.update_ref(ref, source = None) + elif args.web_hook: + data = get_github_payload() + ref = data["ref"] + # validate the ref name + if re.match('refs/[a-z/]+', ref) is None: + raise Exception("Invalid ref name {0}".format(ref)) + # collect URLs of this repository + urls = [] + for key in ("git_url", "ssh_url", "clone_url"): + urls.append(data["repository"][key]) + source = repo.find_mirror_by_url(urls) + if source is None: + raise Exception("Could not find the source.") + repo.update_ref(ref, source = source, suppress_stderr = True) + # print an answer + print("Content-Type: text/plain") + print() + print("Updated {0}:{1} from source {2}".format(reponame, ref, source)) + else: + raise Exception("No manual mode is implemented so far.") + except Exception as e: + # don't leak filenames etc. when we are running as a hook + if args.web_hook: + print("Status: 500 Internal Server Error") + print("Content-Type: text/plain") + print() + print(str(e)) + elif args.git_hook: + #sys.stderr.write(str(e)) + traceback.print_exc() + else: + traceback.print_exc() diff --git a/webhook.py b/webhook.py new file mode 100755 index 0000000..a7ae5f8 --- /dev/null +++ b/webhook.py @@ -0,0 +1,22 @@ +#!/usr/bin/python3 +import urllib.request, urllib.parse, json, os, sys + +def is_github(remote_addr): + '''Returns whether the address is a github hook address. This function requires Python 3.3.''' + from ipaddress import ip_address, ip_network + remote_addr = ip_address(ip_network) + github = urllib.request.urlopen('https://api.github.com/meta').read() + github = json.loads(github.decode('utf-8')) + for net in github['hooks']: + if remote_addr in ip_network(net): + return True + +# get repository from query string +query = os.getenv("QUERY_STRING") +query = urllib.parse.parse_qs(query) +repository = query.get('repository', []) +repository = repository[0] if len(repository) else '' + +# execute the actual script +git_mirror = "/home/ralf/git-mirror/update.py" +os.execlp("sudo", "sudo", "-n", "-u", "git", git_mirror, "--web-hook", "--repository", repository)