1#!/usr/bin/env python3
2#
3# Copyright BitBake Contributors
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7
8"""git-make-shallow: make the current git repository shallow
9
10Remove the history of the specified revisions, then optionally filter the
11available refs to those specified.
12"""
13
14import argparse
15import collections
16import errno
17import itertools
18import os
19import subprocess
20import sys
21import warnings
22warnings.simplefilter("default")
23
24version = 1.0
25
26
27git_cmd = ['git', '-c', 'safe.bareRepository=all']
28
29def main():
30    if sys.version_info < (3, 4, 0):
31        sys.exit('Python 3.4 or greater is required')
32
33    git_dir = check_output(git_cmd + ['rev-parse', '--git-dir']).rstrip()
34    shallow_file = os.path.join(git_dir, 'shallow')
35    if os.path.exists(shallow_file):
36        try:
37            check_output(git_cmd + ['fetch', '--unshallow'])
38        except subprocess.CalledProcessError:
39            try:
40                os.unlink(shallow_file)
41            except OSError as exc:
42                if exc.errno != errno.ENOENT:
43                    raise
44
45    args = process_args()
46    revs = check_output(git_cmd + ['rev-list'] + args.revisions).splitlines()
47
48    make_shallow(shallow_file, args.revisions, args.refs)
49
50    ref_revs = check_output(git_cmd + ['rev-list'] + args.refs).splitlines()
51    remaining_history = set(revs) & set(ref_revs)
52    for rev in remaining_history:
53        if check_output(git_cmd + ['rev-parse', '{}^@'.format(rev)]):
54            sys.exit('Error: %s was not made shallow' % rev)
55
56    filter_refs(args.refs)
57
58    if args.shrink:
59        shrink_repo(git_dir)
60        subprocess.check_call(git_cmd + ['fsck', '--unreachable'])
61
62
63def process_args():
64    # TODO: add argument to automatically keep local-only refs, since they
65    # can't be easily restored with a git fetch.
66    parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
67    parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
68    parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
69    parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
70    if len(sys.argv) < 2:
71        parser.print_help()
72        sys.exit(2)
73
74    args = parser.parse_args()
75
76    if args.refs:
77        args.refs = check_output(git_cmd + ['rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
78    else:
79        args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
80
81    args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
82    args.revisions = check_output(git_cmd + ['rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
83    return args
84
85
86def check_output(cmd, input=None):
87    return subprocess.check_output(cmd, universal_newlines=True, input=input)
88
89
90def make_shallow(shallow_file, revisions, refs):
91    """Remove the history of the specified revisions."""
92    for rev in follow_history_intersections(revisions, refs):
93        print("Processing %s" % rev)
94        with open(shallow_file, 'a') as f:
95            f.write(rev + '\n')
96
97
98def get_all_refs(ref_filter=None):
99    """Return all the existing refs in this repository, optionally filtering the refs."""
100    ref_output = check_output(git_cmd + ['for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
101    ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
102    if ref_filter:
103        ref_split = (e for e in ref_split if ref_filter(*e))
104    refs = [r[0] for r in ref_split]
105    return refs
106
107
108def iter_extend(iterable, length, obj=None):
109    """Ensure that iterable is the specified length by extending with obj."""
110    return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
111
112
113def filter_refs(refs):
114    """Remove all but the specified refs from the git repository."""
115    all_refs = get_all_refs()
116    to_remove = set(all_refs) - set(refs)
117    if to_remove:
118        check_output(['xargs', '-0', '-n', '1'] + git_cmd + ['update-ref', '-d', '--no-deref'],
119                     input=''.join(l + '\0' for l in to_remove))
120
121
122def follow_history_intersections(revisions, refs):
123    """Determine all the points where the history of the specified revisions intersects the specified refs."""
124    queue = collections.deque(revisions)
125    seen = set()
126
127    for rev in iter_except(queue.popleft, IndexError):
128        if rev in seen:
129            continue
130
131        parents = check_output(git_cmd + ['rev-parse', '%s^@' % rev]).splitlines()
132
133        yield rev
134        seen.add(rev)
135
136        if not parents:
137            continue
138
139        check_refs = check_output(git_cmd + ['merge-base', '--independent'] + sorted(refs)).splitlines()
140        for parent in parents:
141            for ref in check_refs:
142                print("Checking %s vs %s" % (parent, ref))
143                try:
144                    merge_base = check_output(git_cmd + ['merge-base', parent, ref]).rstrip()
145                except subprocess.CalledProcessError:
146                    continue
147                else:
148                    queue.append(merge_base)
149
150
151def iter_except(func, exception, start=None):
152    """Yield a function repeatedly until it raises an exception."""
153    try:
154        if start is not None:
155            yield start()
156        while True:
157            yield func()
158    except exception:
159        pass
160
161
162def shrink_repo(git_dir):
163    """Shrink the newly shallow repository, removing the unreachable objects."""
164    subprocess.check_call(git_cmd + ['reflog', 'expire', '--expire-unreachable=now', '--all'])
165    subprocess.check_call(git_cmd + ['repack', '-ad'])
166    try:
167        os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
168    except OSError as exc:
169        if exc.errno != errno.ENOENT:
170            raise
171    subprocess.check_call(git_cmd + ['prune', '--expire', 'now'])
172
173
174if __name__ == '__main__':
175    main()
176