1#!/usr/bin/env python3 2# 3# Copyright BitBake Contributors 4# 5# SPDX-License-Identifier: GPL-2.0-only 6# 7 8"""git-make-shallow: make the current git repository shallow 9 10Remove the history of the specified revisions, then optionally filter the 11available refs to those specified. 12""" 13 14import argparse 15import collections 16import errno 17import itertools 18import os 19import subprocess 20import sys 21import warnings 22warnings.simplefilter("default") 23 24version = 1.0 25 26 27git_cmd = ['git', '-c', 'safe.bareRepository=all'] 28 29def main(): 30 if sys.version_info < (3, 4, 0): 31 sys.exit('Python 3.4 or greater is required') 32 33 git_dir = check_output(git_cmd + ['rev-parse', '--git-dir']).rstrip() 34 shallow_file = os.path.join(git_dir, 'shallow') 35 if os.path.exists(shallow_file): 36 try: 37 check_output(git_cmd + ['fetch', '--unshallow']) 38 except subprocess.CalledProcessError: 39 try: 40 os.unlink(shallow_file) 41 except OSError as exc: 42 if exc.errno != errno.ENOENT: 43 raise 44 45 args = process_args() 46 revs = check_output(git_cmd + ['rev-list'] + args.revisions).splitlines() 47 48 make_shallow(shallow_file, args.revisions, args.refs) 49 50 ref_revs = check_output(git_cmd + ['rev-list'] + args.refs).splitlines() 51 remaining_history = set(revs) & set(ref_revs) 52 for rev in remaining_history: 53 if check_output(git_cmd + ['rev-parse', '{}^@'.format(rev)]): 54 sys.exit('Error: %s was not made shallow' % rev) 55 56 filter_refs(args.refs) 57 58 if args.shrink: 59 shrink_repo(git_dir) 60 subprocess.check_call(git_cmd + ['fsck', '--unreachable']) 61 62 63def process_args(): 64 # TODO: add argument to automatically keep local-only refs, since they 65 # can't be easily restored with a git fetch. 66 parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.') 67 parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)') 68 parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning') 69 parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit') 70 if len(sys.argv) < 2: 71 parser.print_help() 72 sys.exit(2) 73 74 args = parser.parse_args() 75 76 if args.refs: 77 args.refs = check_output(git_cmd + ['rev-parse', '--symbolic-full-name'] + args.refs).splitlines() 78 else: 79 args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit') 80 81 args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs)) 82 args.revisions = check_output(git_cmd + ['rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines() 83 return args 84 85 86def check_output(cmd, input=None): 87 return subprocess.check_output(cmd, universal_newlines=True, input=input) 88 89 90def make_shallow(shallow_file, revisions, refs): 91 """Remove the history of the specified revisions.""" 92 for rev in follow_history_intersections(revisions, refs): 93 print("Processing %s" % rev) 94 with open(shallow_file, 'a') as f: 95 f.write(rev + '\n') 96 97 98def get_all_refs(ref_filter=None): 99 """Return all the existing refs in this repository, optionally filtering the refs.""" 100 ref_output = check_output(git_cmd + ['for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)']) 101 ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()] 102 if ref_filter: 103 ref_split = (e for e in ref_split if ref_filter(*e)) 104 refs = [r[0] for r in ref_split] 105 return refs 106 107 108def iter_extend(iterable, length, obj=None): 109 """Ensure that iterable is the specified length by extending with obj.""" 110 return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length) 111 112 113def filter_refs(refs): 114 """Remove all but the specified refs from the git repository.""" 115 all_refs = get_all_refs() 116 to_remove = set(all_refs) - set(refs) 117 if to_remove: 118 check_output(['xargs', '-0', '-n', '1'] + git_cmd + ['update-ref', '-d', '--no-deref'], 119 input=''.join(l + '\0' for l in to_remove)) 120 121 122def follow_history_intersections(revisions, refs): 123 """Determine all the points where the history of the specified revisions intersects the specified refs.""" 124 queue = collections.deque(revisions) 125 seen = set() 126 127 for rev in iter_except(queue.popleft, IndexError): 128 if rev in seen: 129 continue 130 131 parents = check_output(git_cmd + ['rev-parse', '%s^@' % rev]).splitlines() 132 133 yield rev 134 seen.add(rev) 135 136 if not parents: 137 continue 138 139 check_refs = check_output(git_cmd + ['merge-base', '--independent'] + sorted(refs)).splitlines() 140 for parent in parents: 141 for ref in check_refs: 142 print("Checking %s vs %s" % (parent, ref)) 143 try: 144 merge_base = check_output(git_cmd + ['merge-base', parent, ref]).rstrip() 145 except subprocess.CalledProcessError: 146 continue 147 else: 148 queue.append(merge_base) 149 150 151def iter_except(func, exception, start=None): 152 """Yield a function repeatedly until it raises an exception.""" 153 try: 154 if start is not None: 155 yield start() 156 while True: 157 yield func() 158 except exception: 159 pass 160 161 162def shrink_repo(git_dir): 163 """Shrink the newly shallow repository, removing the unreachable objects.""" 164 subprocess.check_call(git_cmd + ['reflog', 'expire', '--expire-unreachable=now', '--all']) 165 subprocess.check_call(git_cmd + ['repack', '-ad']) 166 try: 167 os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates')) 168 except OSError as exc: 169 if exc.errno != errno.ENOENT: 170 raise 171 subprocess.check_call(git_cmd + ['prune', '--expire', 'now']) 172 173 174if __name__ == '__main__': 175 main() 176