1#!/usr/bin/env python3
2#
3# SPDX-License-Identifier: GPL-2.0-only
4#
5
6"""git-make-shallow: make the current git repository shallow
7
8Remove the history of the specified revisions, then optionally filter the
9available refs to those specified.
10"""
11
12import argparse
13import collections
14import errno
15import itertools
16import os
17import subprocess
18import sys
19
20version = 1.0
21
22
23def main():
24    if sys.version_info < (3, 4, 0):
25        sys.exit('Python 3.4 or greater is required')
26
27    git_dir = check_output(['git', 'rev-parse', '--git-dir']).rstrip()
28    shallow_file = os.path.join(git_dir, 'shallow')
29    if os.path.exists(shallow_file):
30        try:
31            check_output(['git', 'fetch', '--unshallow'])
32        except subprocess.CalledProcessError:
33            try:
34                os.unlink(shallow_file)
35            except OSError as exc:
36                if exc.errno != errno.ENOENT:
37                    raise
38
39    args = process_args()
40    revs = check_output(['git', 'rev-list'] + args.revisions).splitlines()
41
42    make_shallow(shallow_file, args.revisions, args.refs)
43
44    ref_revs = check_output(['git', 'rev-list'] + args.refs).splitlines()
45    remaining_history = set(revs) & set(ref_revs)
46    for rev in remaining_history:
47        if check_output(['git', 'rev-parse', '{}^@'.format(rev)]):
48            sys.exit('Error: %s was not made shallow' % rev)
49
50    filter_refs(args.refs)
51
52    if args.shrink:
53        shrink_repo(git_dir)
54        subprocess.check_call(['git', 'fsck', '--unreachable'])
55
56
57def process_args():
58    # TODO: add argument to automatically keep local-only refs, since they
59    # can't be easily restored with a git fetch.
60    parser = argparse.ArgumentParser(description='Remove the history of the specified revisions, then optionally filter the available refs to those specified.')
61    parser.add_argument('--ref', '-r', metavar='REF', action='append', dest='refs', help='remove all but the specified refs (cumulative)')
62    parser.add_argument('--shrink', '-s', action='store_true', help='shrink the git repository by repacking and pruning')
63    parser.add_argument('revisions', metavar='REVISION', nargs='+', help='a git revision/commit')
64    if len(sys.argv) < 2:
65        parser.print_help()
66        sys.exit(2)
67
68    args = parser.parse_args()
69
70    if args.refs:
71        args.refs = check_output(['git', 'rev-parse', '--symbolic-full-name'] + args.refs).splitlines()
72    else:
73        args.refs = get_all_refs(lambda r, t, tt: t == 'commit' or tt == 'commit')
74
75    args.refs = list(filter(lambda r: not r.endswith('/HEAD'), args.refs))
76    args.revisions = check_output(['git', 'rev-parse'] + ['%s^{}' % i for i in args.revisions]).splitlines()
77    return args
78
79
80def check_output(cmd, input=None):
81    return subprocess.check_output(cmd, universal_newlines=True, input=input)
82
83
84def make_shallow(shallow_file, revisions, refs):
85    """Remove the history of the specified revisions."""
86    for rev in follow_history_intersections(revisions, refs):
87        print("Processing %s" % rev)
88        with open(shallow_file, 'a') as f:
89            f.write(rev + '\n')
90
91
92def get_all_refs(ref_filter=None):
93    """Return all the existing refs in this repository, optionally filtering the refs."""
94    ref_output = check_output(['git', 'for-each-ref', '--format=%(refname)\t%(objecttype)\t%(*objecttype)'])
95    ref_split = [tuple(iter_extend(l.rsplit('\t'), 3)) for l in ref_output.splitlines()]
96    if ref_filter:
97        ref_split = (e for e in ref_split if ref_filter(*e))
98    refs = [r[0] for r in ref_split]
99    return refs
100
101
102def iter_extend(iterable, length, obj=None):
103    """Ensure that iterable is the specified length by extending with obj."""
104    return itertools.islice(itertools.chain(iterable, itertools.repeat(obj)), length)
105
106
107def filter_refs(refs):
108    """Remove all but the specified refs from the git repository."""
109    all_refs = get_all_refs()
110    to_remove = set(all_refs) - set(refs)
111    if to_remove:
112        check_output(['xargs', '-0', '-n', '1', 'git', 'update-ref', '-d', '--no-deref'],
113                     input=''.join(l + '\0' for l in to_remove))
114
115
116def follow_history_intersections(revisions, refs):
117    """Determine all the points where the history of the specified revisions intersects the specified refs."""
118    queue = collections.deque(revisions)
119    seen = set()
120
121    for rev in iter_except(queue.popleft, IndexError):
122        if rev in seen:
123            continue
124
125        parents = check_output(['git', 'rev-parse', '%s^@' % rev]).splitlines()
126
127        yield rev
128        seen.add(rev)
129
130        if not parents:
131            continue
132
133        check_refs = check_output(['git', 'merge-base', '--independent'] + sorted(refs)).splitlines()
134        for parent in parents:
135            for ref in check_refs:
136                print("Checking %s vs %s" % (parent, ref))
137                try:
138                    merge_base = check_output(['git', 'merge-base', parent, ref]).rstrip()
139                except subprocess.CalledProcessError:
140                    continue
141                else:
142                    queue.append(merge_base)
143
144
145def iter_except(func, exception, start=None):
146    """Yield a function repeatedly until it raises an exception."""
147    try:
148        if start is not None:
149            yield start()
150        while True:
151            yield func()
152    except exception:
153        pass
154
155
156def shrink_repo(git_dir):
157    """Shrink the newly shallow repository, removing the unreachable objects."""
158    subprocess.check_call(['git', 'reflog', 'expire', '--expire-unreachable=now', '--all'])
159    subprocess.check_call(['git', 'repack', '-ad'])
160    try:
161        os.unlink(os.path.join(git_dir, 'objects', 'info', 'alternates'))
162    except OSError as exc:
163        if exc.errno != errno.ENOENT:
164            raise
165    subprocess.check_call(['git', 'prune', '--expire', 'now'])
166
167
168if __name__ == '__main__':
169    main()
170