From: Ralf Jung Date: Sun, 22 Feb 2015 18:52:18 +0000 (+0100) Subject: redesign to use force pushes only if necessary, to avoid race conditions, and to... X-Git-Url: https://git.ralfj.de/git-mirror.git/commitdiff_plain/503462ba4433cef0cf10deaf9595a431157832ee redesign to use force pushes only if necessary, to avoid race conditions, and to add email reports --- diff --git a/git_mirror.py b/git_mirror.py new file mode 100644 index 0000000..9b5b528 --- /dev/null +++ b/git_mirror.py @@ -0,0 +1,156 @@ +import sys, os, subprocess +import configparser, itertools, json, re +import email.mime.text, email.utils, smtplib + +class GitCommand: + def __getattr__(self, name): + def call(*args, capture_stderr = False, check = True): + '''If , return stderr merged with stdout. Otherwise, return stdout and forward stderr to our own. + If is true, throw an exception of the process fails with non-zero exit code. Otherwise, do not. + In any case, return a pair of the captured output and the exit code.''' + cmd = ["git", name.replace('_', '-')] + list(args) + with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT if capture_stderr else None) as p: + (stdout, stderr) = p.communicate() + assert stderr is None + code = p.returncode + if check and code: + raise Exception("Error running {0}: Non-zero exit code".format(cmd)) + return (stdout.decode('utf-8').strip('\n'), code) + return call + +git = GitCommand() +git_nullsha = 40*"0" + +def git_is_forced_update(oldsha, newsha): + out, code = git.merge_base("--is-ancestor", oldsha, newsha, check = False) # "Check if the first is an ancestor of the second " + assert not out + assert code in (0, 1) + return False if code == 0 else True # if oldsha is an ancestor of newsha, then this was a "good" (non-forced) update + +def read_config(fname, defSection = 'DEFAULT'): + '''Reads a config file that may have options outside of any section.''' + config = configparser.ConfigParser() + with open(fname) as file: + stream = itertools.chain(("["+defSection+"]\n",), file) + config.read_file(stream) + return config + +def send_mail(subject, text, receivers, sender='post+webhook@ralfj.de', replyTo=None): + assert isinstance(receivers, list) + if not len(receivers): return # nothing to do + # construct content + msg = email.mime.text.MIMEText(text.encode('UTF-8'), 'plain', 'UTF-8') + msg['Subject'] = subject + msg['Date'] = email.utils.formatdate(localtime=True) + msg['From'] = sender + msg['To'] = ', '.join(receivers) + if replyTo is not None: + msg['Reply-To'] = replyTo + # put into envelope and send + s = smtplib.SMTP('localhost') + s.sendmail(sender, receivers, msg.as_string()) + s.quit() + +def get_github_payload(): + '''Reeturn the github-style JSON encoded payload (as if we were called as a github webhook)''' + try: + data = sys.stdin.buffer.read() + data = json.loads(data.decode('utf-8')) + return data + except: + return {} # nothing read + +class Repo: + def __init__(self, name, conf): + '''Creates a repository from a section of the git-mirror configuration file''' + self.name = name + self.local = conf['local'] + self.owner = conf['owner'] # email address to notify in case of problems + self.mirrors = {} # maps mirrors to their URLs + mirror_prefix = 'mirror-' + for name in filter(lambda s: s.startswith(mirror_prefix), conf.keys()): + mirror = name[len(mirror_prefix):] + self.mirrors[mirror] = conf[name] + + def mail_owner(self, msg): + send_mail("git-mirror {0}".format(self.name), msg, [self.owner]) + + def find_mirror_by_url(self, match_urls): + for mirror, url in self.mirrors.items(): + if url in match_urls: + return mirror + return None + + def update_mirrors(self, ref, oldsha, newsha, except_mirrors = [], suppress_stderr = False): + '''Update the from to on all mirrors. The update must already have happened locally.''' + assert len(oldsha) == 40 and len(newsha) == 40, "These are not valid SHAs." + os.chdir(self.local) + # check for a forced update + is_forced = newsha != git_nullsha and oldsha != git_nullsha and git_is_forced_update(oldsha, newsha) + # tell all the mirrors + for mirror in self.mirrors: + if mirror in except_mirrors: + continue + # update this mirror + if is_forced: + # forcibly update ref remotely (someone already did a force push and hence accepted data loss) + git.push('--force', self.mirrors[mirror], newsha+":"+ref, capture_stderr = suppress_stderr) + else: + # nicely update ref remotely (this avoids data loss due to race conditions) + git.push(self.mirrors[mirror], newsha+":"+ref, capture_stderr = suppress_stderr) + + def update_ref_from_mirror(self, ref, oldsha, newsha, mirror, suppress_stderr = False): + '''Update the local version of this to what's currently on the given . and are checked. Then update all the other mirrors.''' + os.chdir(self.local) + url = self.mirrors[mirror] + # first check whether the remote really is at newsha + remote_state, code = git.ls_remote(url, ref) + if remote_state: + remote_sha = remote_state.split()[0] + else: + remote_sha = git_nullsha + assert newsha == remote_sha, "Someone lied about the new SHA, which should be {0}.".format(newsha) + # locally, we have to be at oldsha or newsha (the latter can happen if we already got this update, e.g. if it originated from us) + local_state, code = git.show_ref(ref, check=False) + if code == 0: + local_sha = local_state.split()[0] + else: + if len(local_state): + raise Exception("Something went wrong getting the local state of {0}.".format(ref)) + local_sha = git_nullsha + assert local_sha in (oldsha, newsha), "Someone lied about the old SHA." + # if we are already at newsha locally, we also ran the local hooks, so we do not have to do anything + if local_sha == newsha: + return + # update local state from local_sha to newsha. + if newsha != git_nullsha: + # We *could* now fetch the remote ref and immediately update the local one. However, then we would have to + # decide whether we want to allow a force-update or not. Also, the ref could already have changed remotely, + # so that may update to some other commit. + # Instead, we just fetch without updating any local ref. If the remote side changed in such a way that + # is not actually fetched, that's a race and will be noticed when updating the local ref. + git.fetch(url, ref, capture_stderr = suppress_stderr) + # now update the ref, checking the old value is still local_oldsha. + git.update_ref(ref, newsha, 40*"0" if local_sha is None else local_sha) + else: + # ref does not exist anymore. delete it. + assert local_sha != git_nullsha, "Why didn't we bail out earlier if there is nothing to do...?" + git.update_ref("-d", ref, local_sha) # this checks that the old value is still local_sha + # update all the mirrors + self.update_mirrors(ref, oldsha, newsha, [mirror], suppress_stderr) + +def find_repo_by_directory(repos, dir): + for (name, repo) in repos.items(): + if dir == repo.local: + return name + return None + +def load_repos(): + conffile = os.path.join(os.path.dirname(__file__), 'git-mirror.conf') + conf = read_config(conffile) + repos = {} + for name, section in conf.items(): + if name != 'DEFAULT': + repos[name] = Repo(name, section) + return repos + diff --git a/githook.py b/githook.py new file mode 100755 index 0000000..b528644 --- /dev/null +++ b/githook.py @@ -0,0 +1,51 @@ +#!/usr/bin/python3 +# Copyright (c) 2014, Ralf Jung +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================== + +# This is the hook called by git post-commit. It updats all mirrors to the status of the local repository. +import traceback +from git_mirror import * + +if __name__ == "__main__": + repo = None # we will try to use this during exception handling + try: + repos = load_repos() + + # find the repository we are dealing with + reponame = find_repo_by_directory(repos, os.getcwd()) + if reponame is None or reponame not in repos: + raise Exception("Unknown repository.") + + # now sync this repository + repo = repos[reponame] + # parse the information we get from stdin. we trust this information. + for line in sys.stdin: + (oldsha, newsha, ref) = line.split() + repo.update_mirrors(ref, oldsha, newsha) + except Exception as e: + if repo is not None: + repo.mail_owner("There was a problem running the git-mirror git hook:\n\n{0}".format(traceback.format_exc())) + # do not print all the details + sys.stderr.write("We have a problem:\n{0}".format('\n'.join(traceback.format_exception_only(type(e), e)))) + diff --git a/update.py b/update.py deleted file mode 100755 index 8d2c207..0000000 --- a/update.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/python3 -import sys, os, subprocess, argparse -import configparser, itertools, json, re -import traceback -import email.mime.text, email.utils, smtplib - -class GitCommand: - def __getattr__(self, name): - def call(*args, capture_stderr = False, check = True): - '''If , return stderr merged with stdout. Otherwise, return stdout and forward stderr to our own. - If is true, throw an exception of the process fails with non-zero exit code. Otherwise, do not. - In any case, return a pair of the captured output and the exit code.''' - cmd = ["git", name.replace('_', '-')] + list(args) - with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT if capture_stderr else None) as p: - (stdout, stderr) = p.communicate() - assert stderr is None - code = p.returncode - if check and code: - raise Exception("Error running {0}: Non-zero exit code".format(cmd)) - return (stdout.decode('utf-8').strip('\n'), code) - return call - -git = GitCommand() - -def read_config(fname, defSection = 'DEFAULT'): - '''Reads a config file that may have options outside of any section.''' - config = configparser.ConfigParser() - with open(fname) as file: - stream = itertools.chain(("["+defSection+"]\n",), file) - config.read_file(stream) - return config - -def send_mail(subject, text, receivers, sender='post+webhook@ralfj.de', replyTo=None): - assert isinstance(receivers, list) - if not len(receivers): return # nothing to do - # construct content - msg = email.mime.text.MIMEText(text.encode('UTF-8'), 'plain', 'UTF-8') - msg['Subject'] = subject - msg['Date'] = email.utils.formatdate(localtime=True) - msg['From'] = sender - msg['To'] = ', '.join(receivers) - if replyTo is not None: - msg['Reply-To'] = replyTo - # put into envelope and send - s = smtplib.SMTP('localhost') - s.sendmail(sender, receivers, msg.as_string()) - s.quit() - -def get_github_payload(): - '''Reeturn the github-style JSON encoded payload (as if we were called as a github webhook)''' - try: - data = sys.stdin.buffer.read() - data = json.loads(data.decode('utf-8')) - return data - except: - return {} # nothing read - -class Repo: - def __init__(self, conf): - '''Creates a repository from a section of the git-mirror configuration file''' - self.local = conf['local'] - self.mirrors = {} # maps mirrors to their URLs - mirror_prefix = 'mirror-' - for name in filter(lambda s: s.startswith(mirror_prefix), conf.keys()): - mirror = name[len(mirror_prefix):] - self.mirrors[mirror] = conf[name] - - def find_mirror_by_url(self, match_urls): - for mirror, url in self.mirrors.items(): - if url in match_urls: - return mirror - return None - - def have_ref(self, ref, url=None): - '''Tests if a given ref exists, locally or (if the url is given) remotely''' - if url is None: - out, code = git.show_ref(ref, check = False) - if code and len(out): - raise Exception("Checking for a local ref failed") - else: - out, code = git.ls_remote(url, ref) - # the ref exists iff we have output - return len(out) > 0 - - def update_mirrors(self, ref, delete, exception = None, suppress_stderr = False): - '''Update on all mirrors except for to the local state, or delete it.''' - for mirror in self.mirrors: - if mirror == exception: - continue - # update this mirror - if not self.have_ref(ref): - # delete ref remotely - git.push(self.mirrors[mirror], ':'+ref, capture_stderr = suppress_stderr) - else: - # update ref remotely - git.push('--force', self.mirrors[mirror], ref, capture_stderr = suppress_stderr) - - def update_ref(self, ref, source, suppress_stderr = False): - '''Update the to its state in everywhere. is None to refer to the local repository, - or the name of a mirror.''' - os.chdir(self.local) - if source is None: - # We already have the latest version locally. Update all the mirrors. - self.update_mirrors(ref, delete = not self.have_ref(ref), suppress_stderr = suppress_stderr) - else: - # update our version of this ref. This may fail if the ref does not exist anymore. - url = self.mirrors[source] - if not self.have_ref(ref, url): - # delete ref locally - git.update_ref("-d", ref) - # and everywhere (except for the source) - self.update_mirrors(ref, delete = True, exception = source, suppress_stderr = suppress_stderr) - else: - # update local ref to remote state (yes, there's a race condition here - the ref could no longer exist by now) - git.fetch(url, ref+":"+ref) - # and everywhere else - self.update_mirrors(ref, delete = False, exception = source, suppress_stderr = suppress_stderr) - -def find_repo_by_directory(repos, dir): - for (name, repo) in repos.items(): - if dir == repo.local: - return name - return None - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Keep git repositories in sync') - parser.add_argument("--git-hook", - action="store_true", dest="git_hook", - help="Act as git hook: Auto-detect the repository based on the working directoy, and fetch information from stdin the way git encodes it") - parser.add_argument("--web-hook", - action="store_true", dest="web_hook", - help="Act as github-style web hook: Repository has to be given explicitly, all the rest is read from stdin JSON form") - parser.add_argument("-r", "--repository", - dest="repository", - help="The name of the repository to act on") - args = parser.parse_args() - if args.git_hook and args.web_hook: - raise Exception("I cannot be two hooks at once.") - - try: - # All arguments are *untrusted* input, as we may be called via sudo from the webserver. So we fix the configuration file location. - conffile = os.path.join(os.path.dirname(__file__), 'git-mirror.conf') - conf = read_config(conffile) - repos = {} - for name, section in conf.items(): - if name != 'DEFAULT': - repos[name] = Repo(section) - - # find the repository we are dealing with - reponame = args.repository - if reponame is None and args.git_hook: - reponame = find_repo_by_directory(repos, os.getcwd()) - if reponame is None or reponame not in repos: - raise Exception("Unknown or missing repository name.") - - # now sync this repository - repo = repos[reponame] - if args.git_hook: - # parse the information we get from stdin - for line in sys.stdin: - (oldsha, newsha, ref) = line.split() - repo.update_ref(ref, source = None) - elif args.web_hook: - data = get_github_payload() - ref = data["ref"] - # validate the ref name - if re.match('refs/[a-z/]+', ref) is None: - raise Exception("Invalid ref name {0}".format(ref)) - # collect URLs of this repository - urls = [] - for key in ("git_url", "ssh_url", "clone_url"): - urls.append(data["repository"][key]) - source = repo.find_mirror_by_url(urls) - if source is None: - raise Exception("Could not find the source.") - repo.update_ref(ref, source = source, suppress_stderr = True) - # print an answer - print("Content-Type: text/plain") - print() - print("Updated {0}:{1} from source {2}".format(reponame, ref, source)) - else: - raise Exception("No manual mode is implemented so far.") - except Exception as e: - # don't leak filenames etc. when we are running as a hook - if args.web_hook: - print("Status: 500 Internal Server Error") - print("Content-Type: text/plain") - print() - print(str(e)) - elif args.git_hook: - #sys.stderr.write(str(e)) - traceback.print_exc() - else: - traceback.print_exc() diff --git a/webhook-core.py b/webhook-core.py new file mode 100755 index 0000000..93b138c --- /dev/null +++ b/webhook-core.py @@ -0,0 +1,66 @@ +#!/usr/bin/python3 +# Copyright (c) 2014, Ralf Jung +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================== + +# This is the hook called by git post-commit. It updats all mirrors to the status of the local repository. +import sys, traceback +from git_mirror import * + +if __name__ == "__main__": + repo = None # we will try to use this during exception handling + try: + repos = load_repos() + reponame = sys.argv[1] if len(sys.argv) > 1 else None + if reponame not in repos: + raise Exception("Repository missing or not found.") + repo = repos[reponame] + + # now sync this repository + data = get_github_payload() + ref = data["ref"] + oldsha = data["before"] + newsha = data["after"] + # validate the ref name + if re.match('refs/[a-z/]+', ref) is None: + raise Exception("Invalid ref name {0}".format(ref)) + # collect URLs of this repository, to find the mirror name + urls = [] + for key in ("git_url", "ssh_url", "clone_url"): + urls.append(data["repository"][key]) + mirror = repo.find_mirror_by_url(urls) + if mirror is None: + raise Exception("Could not find the mirror.") + repo.update_ref_from_mirror(ref, oldsha, newsha, mirror, suppress_stderr = True) + # print an answer + print("Content-Type: text/plain") + print() + print("Updated {0}:{1} from mirror {2} from {3} to {4}".format(reponame, ref, mirror, oldsha, newsha)) + except Exception as e: + if repo is not None: + repo.mail_owner("There was a problem running the git-mirror webhook:\n\n{0}".format(traceback.format_exc())) + # do not print all the details + print("Status: 500 Internal Server Error") + print("Content-Type: text/plain") + print() + print("We have a problem:\n{0}".format('\n'.join(traceback.format_exception_only(type(e), e)))) diff --git a/webhook.py b/webhook.py index a7ae5f8..034427a 100755 --- a/webhook.py +++ b/webhook.py @@ -10,6 +10,7 @@ def is_github(remote_addr): for net in github['hooks']: if remote_addr in ip_network(net): return True + return False # get repository from query string query = os.getenv("QUERY_STRING") @@ -18,5 +19,5 @@ repository = query.get('repository', []) repository = repository[0] if len(repository) else '' # execute the actual script -git_mirror = "/home/ralf/git-mirror/update.py" -os.execlp("sudo", "sudo", "-n", "-u", "git", git_mirror, "--web-hook", "--repository", repository) +webhook_core = "/home/ralf/git-mirror/webhook-core.py" +os.execlp("sudo", "sudo", "-n", "-u", "git", webhook_core, repository)