xref: /openbmc/openbmc/poky/scripts/combo-layer (revision 73bd93f1)
1#!/usr/bin/env python3
2# ex:ts=4:sw=4:sts=4:et
3# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
4#
5# Copyright 2011 Intel Corporation
6# Authored-by:  Yu Ke <ke.yu@intel.com>
7#               Paul Eggleton <paul.eggleton@intel.com>
8#               Richard Purdie <richard.purdie@intel.com>
9#
10# SPDX-License-Identifier: GPL-2.0-only
11#
12
13import fnmatch
14import os, sys
15import optparse
16import logging
17import subprocess
18import tempfile
19import configparser
20import re
21import copy
22import shlex
23import shutil
24from string import Template
25from functools import reduce
26
27__version__ = "0.2.1"
28
29def logger_create():
30    logger = logging.getLogger("")
31    loggerhandler = logging.StreamHandler()
32    loggerhandler.setFormatter(logging.Formatter("[%(asctime)s] %(message)s","%H:%M:%S"))
33    logger.addHandler(loggerhandler)
34    logger.setLevel(logging.INFO)
35    return logger
36
37logger = logger_create()
38
39def get_current_branch(repodir=None):
40    try:
41        if not os.path.exists(os.path.join(repodir if repodir else '', ".git")):
42            # Repo not created yet (i.e. during init) so just assume master
43            return "master"
44        branchname = runcmd("git symbolic-ref HEAD 2>/dev/null", repodir).strip()
45        if branchname.startswith("refs/heads/"):
46            branchname = branchname[11:]
47        return branchname
48    except subprocess.CalledProcessError:
49        return ""
50
51class Configuration(object):
52    """
53    Manages the configuration
54
55    For an example config file, see combo-layer.conf.example
56
57    """
58    def __init__(self, options):
59        for key, val in options.__dict__.items():
60            setattr(self, key, val)
61
62        def readsection(parser, section, repo):
63            for (name, value) in parser.items(section):
64                if value.startswith("@"):
65                    self.repos[repo][name] = eval(value.strip("@"))
66                else:
67                    # Apply special type transformations for some properties.
68                    # Type matches the RawConfigParser.get*() methods.
69                    types = {'signoff': 'boolean', 'update': 'boolean', 'history': 'boolean'}
70                    if name in types:
71                        value = getattr(parser, 'get' + types[name])(section, name)
72                    self.repos[repo][name] = value
73
74        def readglobalsection(parser, section):
75            for (name, value) in parser.items(section):
76                if name == "commit_msg":
77                    self.commit_msg_template = value
78
79        logger.debug("Loading config file %s" % self.conffile)
80        self.parser = configparser.ConfigParser()
81        with open(self.conffile) as f:
82            self.parser.read_file(f)
83
84        # initialize default values
85        self.commit_msg_template = "Automatic commit to update last_revision"
86
87        self.repos = {}
88        for repo in self.parser.sections():
89            if repo == "combo-layer-settings":
90                # special handling for global settings
91                readglobalsection(self.parser, repo)
92            else:
93                self.repos[repo] = {}
94                readsection(self.parser, repo, repo)
95
96        # Load local configuration, if available
97        self.localconffile = None
98        self.localparser = None
99        self.combobranch = None
100        if self.conffile.endswith('.conf'):
101            lcfile = self.conffile.replace('.conf', '-local.conf')
102            if os.path.exists(lcfile):
103                # Read combo layer branch
104                self.combobranch = get_current_branch()
105                logger.debug("Combo layer branch is %s" % self.combobranch)
106
107                self.localconffile = lcfile
108                logger.debug("Loading local config file %s" % self.localconffile)
109                self.localparser = configparser.ConfigParser()
110                with open(self.localconffile) as f:
111                    self.localparser.readfp(f)
112
113                for section in self.localparser.sections():
114                    if '|' in section:
115                        sectionvals = section.split('|')
116                        repo = sectionvals[0]
117                        if sectionvals[1] != self.combobranch:
118                            continue
119                    else:
120                        repo = section
121                    if repo in self.repos:
122                        readsection(self.localparser, section, repo)
123
124    def update(self, repo, option, value, initmode=False):
125        # If the main config has the option already, that is what we
126        # are expected to modify.
127        if self.localparser and not self.parser.has_option(repo, option):
128            parser = self.localparser
129            section = "%s|%s" % (repo, self.combobranch)
130            conffile = self.localconffile
131            if initmode and not parser.has_section(section):
132                parser.add_section(section)
133        else:
134            parser = self.parser
135            section = repo
136            conffile = self.conffile
137        parser.set(section, option, value)
138        with open(conffile, "w") as f:
139            parser.write(f)
140        self.repos[repo][option] = value
141
142    def sanity_check(self, initmode=False):
143        required_options=["src_uri", "local_repo_dir", "dest_dir", "last_revision"]
144        if initmode:
145            required_options.remove("last_revision")
146        msg = ""
147        missing_options = []
148        for name in self.repos:
149            for option in required_options:
150                if option not in self.repos[name]:
151                    msg = "%s\nOption %s is not defined for component %s" %(msg, option, name)
152                    missing_options.append(option)
153            # Sanitize dest_dir so that we do not have to deal with edge cases
154            # (unset, empty string, double slashes) in the rest of the code.
155            # It not being set will still be flagged as error because it is
156            # listed as required option above; that could be changed now.
157            dest_dir = os.path.normpath(self.repos[name].get("dest_dir", "."))
158            self.repos[name]["dest_dir"] = "." if not dest_dir else dest_dir
159        if msg != "":
160            logger.error("configuration file %s has the following error: %s" % (self.conffile,msg))
161            if self.localconffile and 'last_revision' in missing_options:
162                logger.error("local configuration file %s may be missing configuration for combo branch %s" % (self.localconffile, self.combobranch))
163            sys.exit(1)
164
165        # filterdiff is required by action_splitpatch, so check its availability
166        if subprocess.call("which filterdiff > /dev/null 2>&1", shell=True) != 0:
167            logger.error("ERROR: patchutils package is missing, please install it (e.g. # apt-get install patchutils)")
168            sys.exit(1)
169
170def runcmd(cmd,destdir=None,printerr=True,out=None,env=None):
171    """
172        execute command, raise CalledProcessError if fail
173        return output if succeed
174    """
175    logger.debug("run cmd '%s' in %s" % (cmd, os.getcwd() if destdir is None else destdir))
176    if not out:
177        out = tempfile.TemporaryFile()
178        err = out
179    else:
180        err = tempfile.TemporaryFile()
181    try:
182        subprocess.check_call(cmd, stdout=out, stderr=err, cwd=destdir, shell=isinstance(cmd, str), env=env or os.environ)
183    except subprocess.CalledProcessError as e:
184        err.seek(0)
185        if printerr:
186            logger.error("%s" % err.read())
187        raise e
188
189    err.seek(0)
190    output = err.read().decode('utf-8')
191    logger.debug("output: %s" % output.replace(chr(0), '\\0'))
192    return output
193
194def action_sync_revs(conf, args):
195    """
196    Update the last_revision config option for each repo with the latest
197    revision in the remote's branch. Useful if multiple people are using
198    combo-layer.
199    """
200    repos = get_repos(conf, args[1:])
201
202    for name in repos:
203        repo = conf.repos[name]
204        ldir = repo['local_repo_dir']
205        branch = repo.get('branch', "master")
206        runcmd("git fetch", ldir)
207        lastrev = runcmd('git rev-parse origin/%s' % branch, ldir).strip()
208        print("Updating %s to %s" % (name, lastrev))
209        conf.update(name, "last_revision", lastrev)
210
211def action_init(conf, args):
212    """
213        Clone component repositories
214        Check git is initialised; if not, copy initial data from component repos
215    """
216    for name in conf.repos:
217        ldir = conf.repos[name]['local_repo_dir']
218        if not os.path.exists(ldir):
219            logger.info("cloning %s to %s" %(conf.repos[name]['src_uri'], ldir))
220            subprocess.check_call("git clone %s %s" % (conf.repos[name]['src_uri'], ldir), shell=True)
221    if not os.path.exists(".git"):
222        runcmd("git init")
223        if conf.history:
224            # Need a common ref for all trees.
225            runcmd('git commit -m "initial empty commit" --allow-empty')
226            startrev = runcmd('git rev-parse master').strip()
227
228        for name in conf.repos:
229            repo = conf.repos[name]
230            ldir = repo['local_repo_dir']
231            branch = repo.get('branch', "master")
232            lastrev = repo.get('last_revision', None)
233            if lastrev and lastrev != "HEAD":
234                initialrev = lastrev
235                if branch:
236                    if not check_rev_branch(name, ldir, lastrev, branch):
237                        sys.exit(1)
238                logger.info("Copying data from %s at specified revision %s..." % (name, lastrev))
239            else:
240                lastrev = None
241                initialrev = branch
242                logger.info("Copying data from %s..." % name)
243            # Sanity check initialrev and turn it into hash (required for copying history,
244            # because resolving a name ref only works in the component repo).
245            rev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
246            if rev != initialrev:
247                try:
248                    refs = runcmd('git show-ref -s %s' % initialrev, ldir).split('\n')
249                    if len(set(refs)) > 1:
250                        # Happens for example when configured to track
251                        # "master" and there is a refs/heads/master. The
252                        # traditional behavior from "git archive" (preserved
253                        # here) it to choose the first one. This might not be
254                        # intended, so at least warn about it.
255                        logger.warning("%s: initial revision '%s' not unique, picking result of rev-parse = %s" %
256                                    (name, initialrev, refs[0]))
257                        initialrev = rev
258                except:
259                    # show-ref fails for hashes. Skip the sanity warning in that case.
260                    pass
261                initialrev = rev
262            dest_dir = repo['dest_dir']
263            if dest_dir != ".":
264                extract_dir = os.path.join(os.getcwd(), dest_dir)
265                if not os.path.exists(extract_dir):
266                    os.makedirs(extract_dir)
267            else:
268                extract_dir = os.getcwd()
269            file_filter = repo.get('file_filter', "")
270            exclude_patterns = repo.get('file_exclude', '').split()
271            def copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir,
272                                    subdir=""):
273                # When working inside a filtered branch which had the
274                # files already moved, we need to prepend the
275                # subdirectory to all filters, otherwise they would
276                # not match.
277                if subdir == '.':
278                    subdir = ''
279                elif subdir:
280                    subdir = os.path.normpath(subdir)
281                    file_filter = ' '.join([subdir + '/' + x for x in file_filter.split()])
282                    exclude_patterns = [subdir + '/' + x for x in exclude_patterns]
283                # To handle both cases, we cd into the target
284                # directory and optionally tell tar to strip the path
285                # prefix when the files were already moved.
286                subdir_components = len(subdir.split(os.path.sep)) if subdir else 0
287                strip=('--strip-components=%d' % subdir_components) if subdir else ''
288                # TODO: file_filter wild cards do not work (and haven't worked before either), because
289                # a) GNU tar requires a --wildcards parameter before turning on wild card matching.
290                # b) The semantic is not as intendend (src/*.c also matches src/foo/bar.c,
291                #    in contrast to the other use of file_filter as parameter of "git archive"
292                #    where it only matches .c files directly in src).
293                files = runcmd("git archive %s %s | tar -x -v %s -C %s %s" %
294                               (initialrev, subdir,
295                                strip, extract_dir, file_filter),
296                               ldir)
297                if exclude_patterns:
298                    # Implement file removal by letting tar create the
299                    # file and then deleting it in the file system
300                    # again. Uses the list of files created by tar (easier
301                    # than walking the tree).
302                    for file in files.split('\n'):
303                        if file.endswith(os.path.sep):
304                            continue
305                        for pattern in exclude_patterns:
306                            if fnmatch.fnmatch(file, pattern):
307                                os.unlink(os.path.join(*([extract_dir] + ['..'] * subdir_components + [file])))
308                                break
309
310            if not conf.history:
311                copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir)
312            else:
313                # First fetch remote history into local repository.
314                # We need a ref for that, so ensure that there is one.
315                refname = "combo-layer-init-%s" % name
316                runcmd("git branch -f %s %s" % (refname, initialrev), ldir)
317                runcmd("git fetch %s %s" % (ldir, refname))
318                runcmd("git branch -D %s" % refname, ldir)
319                # Make that the head revision.
320                runcmd("git checkout -b %s %s" % (name, initialrev))
321                # Optional: cut the history by replacing the given
322                # start point(s) with commits providing the same
323                # content (aka tree), but with commit information that
324                # makes it clear that this is an artifically created
325                # commit and nothing the original authors had anything
326                # to do with.
327                since_rev = repo.get('since_revision', '')
328                if since_rev:
329                    committer = runcmd('git var GIT_AUTHOR_IDENT').strip()
330                    # Same time stamp, no name.
331                    author = re.sub('.* (\d+ [+-]\d+)', r'unknown <unknown> \1', committer)
332                    logger.info('author %s' % author)
333                    for rev in since_rev.split():
334                        # Resolve in component repo...
335                        rev = runcmd('git log --oneline --no-abbrev-commit -n1 %s' % rev, ldir).split()[0]
336                        # ... and then get the tree in current
337                        # one. The commit should be in both repos with
338                        # the same tree, but better check here.
339                        tree = runcmd('git show -s --pretty=format:%%T %s' % rev).strip()
340                        with tempfile.NamedTemporaryFile(mode='wt') as editor:
341                            editor.write('''cat >$1 <<EOF
342tree %s
343author %s
344committer %s
345
346%s: squashed import of component
347
348This commit copies the entire set of files as found in
349%s %s
350
351For more information about previous commits, see the
352upstream repository.
353
354Commit created by combo-layer.
355EOF
356''' % (tree, author, committer, name, name, since_rev))
357                            editor.flush()
358                            os.environ['GIT_EDITOR'] = 'sh %s' % editor.name
359                            runcmd('git replace --edit %s' % rev)
360
361                # Optional: rewrite history to change commit messages or to move files.
362                if 'hook' in repo or dest_dir != ".":
363                    filter_branch = ['git', 'filter-branch', '--force']
364                    with tempfile.NamedTemporaryFile(mode='wt') as hookwrapper:
365                        if 'hook' in repo:
366                            # Create a shell script wrapper around the original hook that
367                            # can be used by git filter-branch. Hook may or may not have
368                            # an absolute path.
369                            hook = repo['hook']
370                            hook = os.path.join(os.path.dirname(conf.conffile), '..', hook)
371                            # The wrappers turns the commit message
372                            # from stdin into a fake patch header.
373                            # This is good enough for changing Subject
374                            # and commit msg body with normal
375                            # combo-layer hooks.
376                            hookwrapper.write('''set -e
377tmpname=$(mktemp)
378trap "rm $tmpname" EXIT
379echo -n 'Subject: [PATCH] ' >>$tmpname
380cat >>$tmpname
381if ! [ $(tail -c 1 $tmpname | od -A n -t x1) == '0a' ]; then
382    echo >>$tmpname
383fi
384echo '---' >>$tmpname
385%s $tmpname $GIT_COMMIT %s
386tail -c +18 $tmpname | head -c -4
387''' % (hook, name))
388                            hookwrapper.flush()
389                            filter_branch.extend(['--msg-filter', 'bash %s' % hookwrapper.name])
390                        if dest_dir != ".":
391                            parent = os.path.dirname(dest_dir)
392                            if not parent:
393                                parent = '.'
394                            # May run outside of the current directory, so do not assume that .git exists.
395                            filter_branch.extend(['--tree-filter', 'mkdir -p .git/tmptree && find . -mindepth 1 -maxdepth 1 ! -name .git -print0 | xargs -0 -I SOURCE mv SOURCE .git/tmptree && mkdir -p %s && mv .git/tmptree %s' % (parent, dest_dir)])
396                        filter_branch.append('HEAD')
397                        runcmd(filter_branch)
398                        runcmd('git update-ref -d refs/original/refs/heads/%s' % name)
399                repo['rewritten_revision'] = runcmd('git rev-parse HEAD').strip()
400                repo['stripped_revision'] = repo['rewritten_revision']
401                # Optional filter files: remove everything and re-populate using the normal filtering code.
402                # Override any potential .gitignore.
403                if file_filter or exclude_patterns:
404                    runcmd('git rm -rf .')
405                    if not os.path.exists(extract_dir):
406                        os.makedirs(extract_dir)
407                    copy_selected_files('HEAD', extract_dir, file_filter, exclude_patterns, '.',
408                                        subdir=dest_dir)
409                    runcmd('git add --all --force .')
410                    if runcmd('git status --porcelain'):
411                        # Something to commit.
412                        runcmd(['git', 'commit', '-m',
413                                '''%s: select file subset
414
415Files from the component repository were chosen based on
416the following filters:
417file_filter = %s
418file_exclude = %s''' % (name, file_filter or '<empty>', repo.get('file_exclude', '<empty>'))])
419                        repo['stripped_revision'] = runcmd('git rev-parse HEAD').strip()
420
421            if not lastrev:
422                lastrev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
423                conf.update(name, "last_revision", lastrev, initmode=True)
424
425        if not conf.history:
426            runcmd("git add .")
427        else:
428            # Create Octopus merge commit according to http://stackoverflow.com/questions/10874149/git-octopus-merge-with-unrelated-repositoies
429            runcmd('git checkout master')
430            merge = ['git', 'merge', '--no-commit']
431            for name in conf.repos:
432                repo = conf.repos[name]
433                # Use branch created earlier.
434                merge.append(name)
435                # Root all commits which have no parent in the common
436                # ancestor in the new repository.
437                for start in runcmd('git log --pretty=format:%%H --max-parents=0 %s --' % name).split('\n'):
438                    runcmd('git replace --graft %s %s' % (start, startrev))
439            try:
440                runcmd(merge)
441            except Exception as error:
442                logger.info('''Merging component repository history failed, perhaps because of merge conflicts.
443It may be possible to commit anyway after resolving these conflicts.
444
445%s''' % error)
446            # Create MERGE_HEAD and MERGE_MSG. "git merge" itself
447            # does not create MERGE_HEAD in case of a (harmless) failure,
448            # and we want certain auto-generated information in the
449            # commit message for future reference and/or automation.
450            with open('.git/MERGE_HEAD', 'w') as head:
451                with open('.git/MERGE_MSG', 'w') as msg:
452                    msg.write('repo: initial import of components\n\n')
453                    # head.write('%s\n' % startrev)
454                    for name in conf.repos:
455                        repo = conf.repos[name]
456                        # <upstream ref> <rewritten ref> <rewritten + files removed>
457                        msg.write('combo-layer-%s: %s %s %s\n' % (name,
458                                                                  repo['last_revision'],
459                                                                  repo['rewritten_revision'],
460                                                                  repo['stripped_revision']))
461                        rev = runcmd('git rev-parse %s' % name).strip()
462                        head.write('%s\n' % rev)
463
464        if conf.localconffile:
465            localadded = True
466            try:
467                runcmd("git rm --cached %s" % conf.localconffile, printerr=False)
468            except subprocess.CalledProcessError:
469                localadded = False
470            if localadded:
471                localrelpath = os.path.relpath(conf.localconffile)
472                runcmd("grep -q %s .gitignore || echo %s >> .gitignore" % (localrelpath, localrelpath))
473                runcmd("git add .gitignore")
474                logger.info("Added local configuration file %s to .gitignore", localrelpath)
475        logger.info("Initial combo layer repository data has been created; please make any changes if desired and then use 'git commit' to make the initial commit.")
476    else:
477        logger.info("Repository already initialised, nothing to do.")
478
479
480def check_repo_clean(repodir):
481    """
482        check if the repo is clean
483        exit if repo is dirty
484    """
485    output=runcmd("git status --porcelain", repodir)
486    r = re.compile(r'\?\? patch-.*/')
487    dirtyout = [item for item in output.splitlines() if not r.match(item)]
488    if dirtyout:
489        logger.error("git repo %s is dirty, please fix it first", repodir)
490        sys.exit(1)
491
492def check_patch(patchfile):
493    f = open(patchfile, 'rb')
494    ln = f.readline()
495    of = None
496    in_patch = False
497    beyond_msg = False
498    pre_buf = b''
499    while ln:
500        if not beyond_msg:
501            if ln == b'---\n':
502                if not of:
503                    break
504                in_patch = False
505                beyond_msg = True
506            elif ln.startswith(b'--- '):
507                # We have a diff in the commit message
508                in_patch = True
509                if not of:
510                    print('WARNING: %s contains a diff in its commit message, indenting to avoid failure during apply' % patchfile)
511                    of = open(patchfile + '.tmp', 'wb')
512                    of.write(pre_buf)
513                    pre_buf = b''
514            elif in_patch and not ln[0] in b'+-@ \n\r':
515                in_patch = False
516        if of:
517            if in_patch:
518                of.write(b' ' + ln)
519            else:
520                of.write(ln)
521        else:
522            pre_buf += ln
523        ln = f.readline()
524    f.close()
525    if of:
526        of.close()
527        os.rename(of.name, patchfile)
528
529def drop_to_shell(workdir=None):
530    if not sys.stdin.isatty():
531        print("Not a TTY so can't drop to shell for resolution, exiting.")
532        return False
533
534    shell = os.environ.get('SHELL', 'bash')
535    print('Dropping to shell "%s"\n' \
536          'When you are finished, run the following to continue:\n' \
537          '       exit    -- continue to apply the patches\n' \
538          '       exit 1  -- abort\n' % shell);
539    ret = subprocess.call([shell], cwd=workdir)
540    if ret != 0:
541        print("Aborting")
542        return False
543    else:
544        return True
545
546def check_rev_branch(component, repodir, rev, branch):
547    try:
548        actualbranch = runcmd("git branch --contains %s" % rev, repodir, printerr=False)
549    except subprocess.CalledProcessError as e:
550        if e.returncode == 129:
551            actualbranch = ""
552        else:
553            raise
554
555    if not actualbranch:
556        logger.error("%s: specified revision %s is invalid!" % (component, rev))
557        return False
558
559    branches = []
560    branchlist = actualbranch.split("\n")
561    for b in branchlist:
562        branches.append(b.strip().split(' ')[-1])
563
564    if branch not in branches:
565        logger.error("%s: specified revision %s is not on specified branch %s!" % (component, rev, branch))
566        return False
567    return True
568
569def get_repos(conf, repo_names):
570    repos = []
571    for name in repo_names:
572        if name.startswith('-'):
573            break
574        else:
575            repos.append(name)
576    for repo in repos:
577        if not repo in conf.repos:
578            logger.error("Specified component '%s' not found in configuration" % repo)
579            sys.exit(1)
580
581    if not repos:
582        repos = [ repo for repo in conf.repos if conf.repos[repo].get("update", True) ]
583
584    return repos
585
586def action_pull(conf, args):
587    """
588        update the component repos only
589    """
590    repos = get_repos(conf, args[1:])
591
592    # make sure all repos are clean
593    for name in repos:
594        check_repo_clean(conf.repos[name]['local_repo_dir'])
595
596    for name in repos:
597        repo = conf.repos[name]
598        ldir = repo['local_repo_dir']
599        branch = repo.get('branch', "master")
600        logger.info("update branch %s of component repo %s in %s ..." % (branch, name, ldir))
601        if not conf.hard_reset:
602            # Try to pull only the configured branch. Beware that this may fail
603            # when the branch is currently unknown (for example, after reconfiguring
604            # combo-layer). In that case we need to fetch everything and try the check out
605            # and pull again.
606            try:
607                runcmd("git checkout %s" % branch, ldir, printerr=False)
608            except subprocess.CalledProcessError:
609                output=runcmd("git fetch", ldir)
610                logger.info(output)
611                runcmd("git checkout %s" % branch, ldir)
612                runcmd("git pull --ff-only", ldir)
613            else:
614                output=runcmd("git pull --ff-only", ldir)
615                logger.info(output)
616        else:
617            output=runcmd("git fetch", ldir)
618            logger.info(output)
619            runcmd("git checkout %s" % branch, ldir)
620            runcmd("git reset --hard FETCH_HEAD", ldir)
621
622def action_update(conf, args):
623    """
624        update the component repos
625        either:
626           generate the patch list
627           apply the generated patches
628        or:
629           re-creates the entire component history and merges them
630           into the current branch with a merge commit
631    """
632    components = [arg.split(':')[0] for arg in args[1:]]
633    revisions = {}
634    for arg in args[1:]:
635        if ':' in arg:
636            a = arg.split(':', 1)
637            revisions[a[0]] = a[1]
638    repos = get_repos(conf, components)
639
640    # make sure combo repo is clean
641    check_repo_clean(os.getcwd())
642
643    # Check whether we keep the component histories. Must be
644    # set either via --history command line parameter or consistently
645    # in combo-layer.conf. Mixing modes is (currently, and probably
646    # permanently because it would be complicated) not supported.
647    if conf.history:
648        history = True
649    else:
650        history = None
651        for name in repos:
652            repo = conf.repos[name]
653            repo_history = repo.get('history', False)
654            if history is None:
655                history = repo_history
656            elif history != repo_history:
657                logger.error("'history' property is set inconsistently")
658                sys.exit(1)
659
660    # Step 1: update the component repos
661    if conf.nopull:
662        logger.info("Skipping pull (-n)")
663    else:
664        action_pull(conf, ['arg0'] + components)
665
666    if history:
667        update_with_history(conf, components, revisions, repos)
668    else:
669        update_with_patches(conf, components, revisions, repos)
670
671def update_with_patches(conf, components, revisions, repos):
672    import uuid
673    patch_dir = "patch-%s" % uuid.uuid4()
674    if not os.path.exists(patch_dir):
675        os.mkdir(patch_dir)
676
677    for name in repos:
678        revision = revisions.get(name, None)
679        repo = conf.repos[name]
680        ldir = repo['local_repo_dir']
681        dest_dir = repo['dest_dir']
682        branch = repo.get('branch', "master")
683        repo_patch_dir = os.path.join(os.getcwd(), patch_dir, name)
684
685        # Step 2: generate the patch list and store to patch dir
686        logger.info("Generating patches from %s..." % name)
687        top_revision = revision or branch
688        if not check_rev_branch(name, ldir, top_revision, branch):
689            sys.exit(1)
690        if dest_dir != ".":
691            prefix = "--src-prefix=a/%s/ --dst-prefix=b/%s/" % (dest_dir, dest_dir)
692        else:
693            prefix = ""
694        if repo['last_revision'] == "":
695            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
696            patch_cmd_range = "--root %s" % top_revision
697            rev_cmd_range = top_revision
698        else:
699            if not check_rev_branch(name, ldir, repo['last_revision'], branch):
700                sys.exit(1)
701            patch_cmd_range = "%s..%s" % (repo['last_revision'], top_revision)
702            rev_cmd_range = patch_cmd_range
703
704        file_filter = repo.get('file_filter',".")
705
706        # Filter out unwanted files
707        exclude = repo.get('file_exclude', '')
708        if exclude:
709            for path in exclude.split():
710                p = "%s/%s" % (dest_dir, path) if dest_dir != '.' else path
711                file_filter += " ':!%s'" % p
712
713        patch_cmd = "git format-patch -N %s --output-directory %s %s -- %s" % \
714            (prefix,repo_patch_dir, patch_cmd_range, file_filter)
715        output = runcmd(patch_cmd, ldir)
716        logger.debug("generated patch set:\n%s" % output)
717        patchlist = output.splitlines()
718
719        rev_cmd = "git rev-list --no-merges %s -- %s" % (rev_cmd_range, file_filter)
720        revlist = runcmd(rev_cmd, ldir).splitlines()
721
722        # Step 3: Call repo specific hook to adjust patch
723        if 'hook' in repo:
724            # hook parameter is: ./hook patchpath revision reponame
725            count=len(revlist)-1
726            for patch in patchlist:
727                runcmd("%s %s %s %s" % (repo['hook'], patch, revlist[count], name))
728                count=count-1
729
730        # Step 4: write patch list and revision list to file, for user to edit later
731        patchlist_file = os.path.join(os.getcwd(), patch_dir, "patchlist-%s" % name)
732        repo['patchlist'] = patchlist_file
733        f = open(patchlist_file, 'w')
734        count=len(revlist)-1
735        for patch in patchlist:
736            f.write("%s %s\n" % (patch, revlist[count]))
737            check_patch(os.path.join(patch_dir, patch))
738            count=count-1
739        f.close()
740
741    # Step 5: invoke bash for user to edit patch and patch list
742    if conf.interactive:
743        print('You may now edit the patch and patch list in %s\n' \
744              'For example, you can remove unwanted patch entries from patchlist-*, so that they will be not applied later' % patch_dir);
745        if not drop_to_shell(patch_dir):
746            sys.exit(1)
747
748    # Step 6: apply the generated and revised patch
749    apply_patchlist(conf, repos)
750    runcmd("rm -rf %s" % patch_dir)
751
752    # Step 7: commit the updated config file if it's being tracked
753    commit_conf_file(conf, components)
754
755def conf_commit_msg(conf, components):
756    # create the "components" string
757    component_str = "all components"
758    if len(components) > 0:
759        # otherwise tell which components were actually changed
760        component_str = ", ".join(components)
761
762    # expand the template with known values
763    template = Template(conf.commit_msg_template)
764    msg = template.substitute(components = component_str)
765    return msg
766
767def commit_conf_file(conf, components, commit=True):
768    relpath = os.path.relpath(conf.conffile)
769    try:
770        output = runcmd("git status --porcelain %s" % relpath, printerr=False)
771    except:
772        # Outside the repository
773        output = None
774    if output:
775        if output.lstrip().startswith("M"):
776            logger.info("Committing updated configuration file")
777            if commit:
778                msg = conf_commit_msg(conf, components)
779                runcmd('git commit -m'.split() + [msg, relpath])
780            else:
781                runcmd('git add %s' % relpath)
782            return True
783    return False
784
785def apply_patchlist(conf, repos):
786    """
787        apply the generated patch list to combo repo
788    """
789    for name in repos:
790        repo = conf.repos[name]
791        lastrev = repo["last_revision"]
792        prevrev = lastrev
793
794        # Get non-blank lines from patch list file
795        patchlist = []
796        if os.path.exists(repo['patchlist']) or not conf.interactive:
797            # Note: we want this to fail here if the file doesn't exist and we're not in
798            # interactive mode since the file should exist in this case
799            with open(repo['patchlist']) as f:
800                for line in f:
801                    line = line.rstrip()
802                    if line:
803                        patchlist.append(line)
804
805        ldir = conf.repos[name]['local_repo_dir']
806        branch = conf.repos[name].get('branch', "master")
807        branchrev = runcmd("git rev-parse %s" % branch, ldir).strip()
808
809        if patchlist:
810            logger.info("Applying patches from %s..." % name)
811            linecount = len(patchlist)
812            i = 1
813            for line in patchlist:
814                patchfile = line.split()[0]
815                lastrev = line.split()[1]
816                patchdisp = os.path.relpath(patchfile)
817                if os.path.getsize(patchfile) == 0:
818                    logger.info("(skipping %d/%d %s - no changes)" % (i, linecount, patchdisp))
819                else:
820                    cmd = "git am --keep-cr %s-p1 %s" % ('-s ' if repo.get('signoff', True) else '', patchfile)
821                    logger.info("Applying %d/%d: %s" % (i, linecount, patchdisp))
822                    try:
823                        runcmd(cmd)
824                    except subprocess.CalledProcessError:
825                        logger.info('Running "git am --abort" to cleanup repo')
826                        runcmd("git am --abort")
827                        logger.error('"%s" failed' % cmd)
828                        logger.info("Please manually apply patch %s" % patchdisp)
829                        logger.info("Note: if you exit and continue applying without manually applying the patch, it will be skipped")
830                        if not drop_to_shell():
831                            if prevrev != repo['last_revision']:
832                                conf.update(name, "last_revision", prevrev)
833                            sys.exit(1)
834                prevrev = lastrev
835                i += 1
836            # Once all patches are applied, we should update
837            # last_revision to the branch head instead of the last
838            # applied patch. The two are not necessarily the same when
839            # the last commit is a merge commit or when the patches at
840            # the branch head were intentionally excluded.
841            #
842            # If we do not do that for a merge commit, the next
843            # combo-layer run will only exclude patches reachable from
844            # one of the merged branches and try to re-apply patches
845            # from other branches even though they were already
846            # copied.
847            #
848            # If patches were intentionally excluded, the next run will
849            # present them again instead of skipping over them. This
850            # may or may not be intended, so the code here is conservative
851            # and only addresses the "head is merge commit" case.
852            if lastrev != branchrev and \
853               len(runcmd("git show --pretty=format:%%P --no-patch %s" % branch, ldir).split()) > 1:
854                lastrev = branchrev
855        else:
856            logger.info("No patches to apply from %s" % name)
857            lastrev = branchrev
858
859        if lastrev != repo['last_revision']:
860            conf.update(name, "last_revision", lastrev)
861
862def action_splitpatch(conf, args):
863    """
864        generate the commit patch and
865        split the patch per repo
866    """
867    logger.debug("action_splitpatch")
868    if len(args) > 1:
869        commit = args[1]
870    else:
871        commit = "HEAD"
872    patchdir = "splitpatch-%s" % commit
873    if not os.path.exists(patchdir):
874        os.mkdir(patchdir)
875
876    # filerange_root is for the repo whose dest_dir is root "."
877    # and it should be specified by excluding all other repo dest dir
878    # like "-x repo1 -x repo2 -x repo3 ..."
879    filerange_root = ""
880    for name in conf.repos:
881        dest_dir = conf.repos[name]['dest_dir']
882        if dest_dir != ".":
883            filerange_root = '%s -x "%s/*"' % (filerange_root, dest_dir)
884
885    for name in conf.repos:
886        dest_dir = conf.repos[name]['dest_dir']
887        patch_filename = "%s/%s.patch" % (patchdir, name)
888        if dest_dir == ".":
889            cmd = "git format-patch -n1 --stdout %s^..%s | filterdiff -p1 %s > %s" % (commit, commit, filerange_root, patch_filename)
890        else:
891            cmd = "git format-patch --no-prefix -n1 --stdout %s^..%s -- %s > %s" % (commit, commit, dest_dir, patch_filename)
892        runcmd(cmd)
893        # Detect empty patches (including those produced by filterdiff above
894        # that contain only preamble text)
895        if os.path.getsize(patch_filename) == 0 or runcmd("filterdiff %s" % patch_filename) == "":
896            os.remove(patch_filename)
897            logger.info("(skipping %s - no changes)", name)
898        else:
899            logger.info(patch_filename)
900
901def update_with_history(conf, components, revisions, repos):
902    '''Update all components with full history.
903
904    Works by importing all commits reachable from a component's
905    current head revision.  If those commits are rooted in an already
906    imported commit, their content gets mixed with the content of the
907    combined repo of that commit (new or modified files overwritten,
908    removed files removed).
909
910    The last commit is an artificial merge commit that merges all the
911    updated components into the combined repository.
912
913    The HEAD ref only gets updated at the very end. All intermediate work
914    happens in a worktree which will get garbage collected by git eventually
915    after a failure.
916    '''
917    # Remember current HEAD and what we need to add to it.
918    head = runcmd("git rev-parse HEAD").strip()
919    additional_heads = {}
920
921    # Track the mapping between original commit and commit in the
922    # combined repo. We do not have to distinguish between components,
923    # because commit hashes are different anyway. Often we can
924    # skip find_revs() entirely (for example, when all new commits
925    # are derived from the last imported revision).
926    #
927    # Using "head" (typically the merge commit) instead of the actual
928    # commit for the component leads to a nicer history in the combined
929    # repo.
930    old2new_revs = {}
931    for name in repos:
932        repo = conf.repos[name]
933        revision = repo['last_revision']
934        if revision:
935            old2new_revs[revision] = head
936
937    def add_p(parents):
938        '''Insert -p before each entry.'''
939        parameters = []
940        for p in parents:
941            parameters.append('-p')
942            parameters.append(p)
943        return parameters
944
945    # Do all intermediate work with a separate work dir and index,
946    # chosen via env variables (can't use "git worktree", it is too
947    # new). This is useful (no changes to current work tree unless the
948    # update succeeds) and required (otherwise we end up temporarily
949    # removing the combo-layer hooks that we currently use when
950    # importing a new component).
951    #
952    # Not cleaned up after a failure at the moment.
953    wdir = os.path.join(os.getcwd(), ".git", "combo-layer")
954    windex = wdir + ".index"
955    if os.path.isdir(wdir):
956        shutil.rmtree(wdir)
957    os.mkdir(wdir)
958    wenv = copy.deepcopy(os.environ)
959    wenv["GIT_WORK_TREE"] = wdir
960    wenv["GIT_INDEX_FILE"] = windex
961    # This one turned out to be needed in practice.
962    wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects")
963    wargs = {"destdir": wdir, "env": wenv}
964
965    for name in repos:
966        revision = revisions.get(name, None)
967        repo = conf.repos[name]
968        ldir = repo['local_repo_dir']
969        dest_dir = repo['dest_dir']
970        branch = repo.get('branch', "master")
971        hook = repo.get('hook', None)
972        largs = {"destdir": ldir, "env": None}
973        file_include = repo.get('file_filter', '').split()
974        file_include.sort() # make sure that short entries like '.' come first.
975        file_exclude = repo.get('file_exclude', '').split()
976
977        def include_file(file):
978            if not file_include:
979                # No explicit filter set, include file.
980                return True
981            for filter in file_include:
982                if filter == '.':
983                    # Another special case: include current directory and thus all files.
984                    return True
985                if os.path.commonprefix((filter, file)) == filter:
986                    # Included in directory or direct file match.
987                    return True
988                # Check for wildcard match *with* allowing * to match /, i.e.
989                # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere
990                # when passing the filtering to "git archive", but it is unclear what
991                # the intended semantic is (the comment on file_exclude that "append a * wildcard
992                # at the end" to match the full content of a directories implies that
993                # slashes are indeed not special), so here we simply do what's easy to
994                # implement in Python.
995                logger.debug('fnmatch(%s, %s)' % (file, filter))
996                if fnmatch.fnmatchcase(file, filter):
997                    return True
998            return False
999
1000        def exclude_file(file):
1001            for filter in file_exclude:
1002                if fnmatch.fnmatchcase(file, filter):
1003                    return True
1004            return False
1005
1006        def file_filter(files):
1007            '''Clean up file list so that only included files remain.'''
1008            index = 0
1009            while index < len(files):
1010                file = files[index]
1011                if not include_file(file) or exclude_file(file):
1012                    del files[index]
1013                else:
1014                    index += 1
1015
1016
1017        # Generate the revision list.
1018        logger.info("Analyzing commits from %s..." % name)
1019        top_revision = revision or branch
1020        if not check_rev_branch(name, ldir, top_revision, branch):
1021            sys.exit(1)
1022
1023        last_revision = repo['last_revision']
1024        rev_list_args = "--full-history --sparse --topo-order --reverse"
1025        if not last_revision:
1026            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
1027            rev_list_args = rev_list_args + ' ' + top_revision
1028        else:
1029            if not check_rev_branch(name, ldir, last_revision, branch):
1030                sys.exit(1)
1031            rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision)
1032
1033            # By definition, the current HEAD contains the latest imported
1034            # commit of each component. We use that as initial mapping even
1035            # though the commits do not match exactly because
1036            # a) it always works (in contrast to find_revs, which relies on special
1037            #    commit messages)
1038            # b) it is faster than find_revs, which will only be called on demand
1039            #    and can be skipped entirely in most cases
1040            # c) last but not least, the combined history looks nicer when all
1041            #    new commits are rooted in the same merge commit
1042            old2new_revs[last_revision] = head
1043
1044        # We care about all commits (--full-history and --sparse) and
1045        # we want reconstruct the topology and thus do not care
1046        # about ordering by time (--topo-order). We ask for the ones
1047        # we need to import first to be listed first (--reverse).
1048        revs = runcmd("git rev-list %s" % rev_list_args, **largs).split()
1049        logger.debug("To be imported: %s" % revs)
1050        # Now 'revs' contains all revisions reachable from the top revision.
1051        # All revisions derived from the 'last_revision' definitely are new,
1052        # whereas the others may or may not have been imported before. For
1053        # a linear history in the component, that second set will be empty.
1054        # To distinguish between them, we also get the shorter list
1055        # of revisions starting at the ancestor.
1056        if last_revision:
1057            ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split()
1058        else:
1059            ancestor_revs = []
1060        logger.debug("Ancestors: %s" % ancestor_revs)
1061
1062        # Now import each revision.
1063        logger.info("Importing commits from %s..." % name)
1064        def import_rev(rev):
1065            global scanned_revs
1066
1067            # If it is part of the new commits, we definitely need
1068            # to import it. Otherwise we need to check, we might have
1069            # imported it before. If it was imported and we merely
1070            # fail to find it because commit messages did not track
1071            # the mapping, then we end up importing it again. So
1072            # combined repos using "updating with history" really should
1073            # enable the "From ... rev:" commit header modifications.
1074            if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs:
1075                logger.debug("Revision %s triggers log analysis." % rev)
1076                find_revs(old2new_revs, head)
1077                scanned_revs = True
1078            new_rev = old2new_revs.get(rev, None)
1079            if new_rev:
1080                return new_rev
1081
1082            # If the commit is not in the original list of revisions
1083            # to be imported, then it must be a parent of one of those
1084            # commits and it was skipped during earlier imports or not
1085            # found. Importing such merge commits leads to very ugly
1086            # history (long cascade of merge commits which all point
1087            # to to older commits) when switching from "update via
1088            # patches" to "update with history".
1089            #
1090            # We can avoid importing merge commits if all non-merge commits
1091            # reachable from it were already imported. In that case we
1092            # can root the new commits in the current head revision.
1093            def is_imported(prev):
1094                parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split()
1095                if len(parents) > 1:
1096                    for p in parents:
1097                        if not is_imported(p):
1098                            logger.debug("Must import %s because %s is not imported." % (rev, p))
1099                            return False
1100                    return True
1101                elif prev in old2new_revs:
1102                    return True
1103                else:
1104                    logger.debug("Must import %s because %s is not imported." % (rev, prev))
1105                    return False
1106            if rev not in revs and is_imported(rev):
1107                old2new_revs[rev] = head
1108                return head
1109
1110            # Need to import rev. Collect some information about it.
1111            logger.debug("Importing %s" % rev)
1112            (parents, author_name, author_email, author_timestamp, body) = \
1113                runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0))
1114            parents = parents.split()
1115            if parents:
1116                # Arbitrarily pick the first parent as base. It may or may not have
1117                # been imported before. For example, if the parent is a merge commit
1118                # and previously the combined repository used patching as update
1119                # method, then the actual merge commit parent never was imported.
1120                # To cover this, We recursively import parents.
1121                parent = parents[0]
1122                new_parent = import_rev(parent)
1123                # Clean index and working tree. TODO: can we combine this and the
1124                # next into one command with less file IO?
1125                # "git reset --hard" does not work, it changes HEAD of the parent
1126                # repo, which we wanted to avoid. Probably need to keep
1127                # track of the rev that corresponds to the index and use apply_commit().
1128                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1129                # Update index and working tree to match the parent.
1130                runcmd("git checkout -q -f %s ." % new_parent, **wargs)
1131            else:
1132                parent = None
1133                # Clean index and working tree.
1134                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1135
1136            # Modify index and working tree such that it mirrors the commit.
1137            apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter)
1138
1139            # Now commit.
1140            new_tree = runcmd("git write-tree", **wargs).strip()
1141            env = copy.deepcopy(wenv)
1142            env['GIT_AUTHOR_NAME'] = author_name
1143            env['GIT_AUTHOR_EMAIL'] = author_email
1144            env['GIT_AUTHOR_DATE'] = author_timestamp
1145            if hook:
1146                # Need to turn the verbatim commit message into something resembling a patch header
1147                # for the hook.
1148                with tempfile.NamedTemporaryFile(mode='wt', delete=False) as patch:
1149                    patch.write('Subject: [PATCH] ')
1150                    patch.write(body)
1151                    patch.write('\n---\n')
1152                    patch.close()
1153                    runcmd([hook, patch.name, rev, name])
1154                    with open(patch.name) as f:
1155                        body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')]
1156
1157            # We can skip non-merge commits that did not change any files. Those are typically
1158            # the result of file filtering, although they could also have been introduced
1159            # intentionally upstream, in which case we drop some information here.
1160            if len(parents) == 1:
1161                parent_rev = import_rev(parents[0])
1162                old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip()
1163                commit = old_tree != new_tree
1164                if not commit:
1165                    new_rev = parent_rev
1166            else:
1167                commit = True
1168            if commit:
1169                new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) +
1170                                 ["-m", body, new_tree],
1171                                 env=env).strip()
1172            old2new_revs[rev] = new_rev
1173
1174            return new_rev
1175
1176        if revs:
1177            for rev in revs:
1178                import_rev(rev)
1179            # Remember how to update our current head. New components get added,
1180            # updated components get the delta between current head and the updated component
1181            # applied.
1182            additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None
1183            repo['last_revision'] = revs[-1]
1184
1185    # Now construct the final merge commit. We create the tree by
1186    # starting with the head and applying the changes from each
1187    # components imported head revision.
1188    if additional_heads:
1189        runcmd("git reset --hard", **wargs)
1190        for rev, base in additional_heads.items():
1191            apply_commit(base, rev, wargs, wargs, None)
1192
1193        # Commit with all component branches as parents as well as the previous head.
1194        logger.info("Writing final merge commit...")
1195        msg = conf_commit_msg(conf, components)
1196        new_tree = runcmd("git write-tree", **wargs).strip()
1197        new_rev = runcmd("git commit-tree".split() +
1198                         add_p([head] + list(additional_heads.keys())) +
1199                         ["-m", msg, new_tree],
1200                         **wargs).strip()
1201        # And done! This is the first time we change the HEAD in the actual work tree.
1202        runcmd("git reset --hard %s" % new_rev)
1203
1204        # Update and stage the (potentially modified)
1205        # combo-layer.conf, but do not commit separately.
1206        for name in repos:
1207            repo = conf.repos[name]
1208            rev = repo['last_revision']
1209            conf.update(name, "last_revision", rev)
1210        if commit_conf_file(conf, components, False):
1211            # Must augment the previous commit.
1212            runcmd("git commit --amend -C HEAD")
1213
1214
1215scanned_revs = False
1216def find_revs(old2new, head):
1217    '''Construct mapping from original commit hash to commit hash in
1218    combined repo by looking at the commit messages. Depends on the
1219    "From ... rev: ..." convention.'''
1220    logger.info("Analyzing log messages to find previously imported commits...")
1221    num_known = len(old2new)
1222    log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0))
1223    regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)')
1224    for new_rev, body in zip(*[iter(log)]* 2):
1225        # Use the last one, in the unlikely case there are more than one.
1226        rev = regex.findall(body)[-1]
1227        if rev not in old2new:
1228            old2new[rev] = new_rev.strip()
1229    logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new))
1230
1231
1232def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None):
1233    '''Compare revision against parent, remove files deleted in the
1234    commit, re-write new or modified ones. Moves them into dest_dir.
1235    Optionally filters files.
1236    '''
1237    if not dest_dir:
1238        dest_dir = "."
1239    # -r recurses into sub-directories, given is the full overview of
1240    # what changed.  We do not care about copy/edits or renames, so we
1241    # can disable those with --no-renames (but we still parse them,
1242    # because it was not clear from git documentation whether C and M
1243    # lines can still occur).
1244    logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"]))
1245    delete = []
1246    update = []
1247    if parent:
1248        # Apply delta.
1249        changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0))
1250        for status, name in zip(*[iter(changes)]*2):
1251            if status[0] in "ACMRT":
1252                update.append(name)
1253            elif status[0] in "D":
1254                delete.append(name)
1255            else:
1256                logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev))
1257                sys.exit(1)
1258    else:
1259        # Copy all files.
1260        update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0)))
1261
1262    # Include/exclude files as define in the component config.
1263    # Both updated and deleted file lists get filtered, because it might happen
1264    # that a file gets excluded, pulled from a different component, and then the
1265    # excluded file gets deleted. In that case we must keep the copy.
1266    if file_filter:
1267        file_filter(update)
1268        file_filter(delete)
1269
1270    # We export into a tar archive here and extract with tar because it is simple (no
1271    # need to implement file and symlink writing ourselves) and gives us some degree
1272    # of parallel IO. The downside is that we have to pass the list of files via
1273    # command line parameters - hopefully there will never be too many at once.
1274    if update:
1275        target = os.path.join(wargs["destdir"], dest_dir)
1276        if not os.path.isdir(target):
1277            os.makedirs(target)
1278        quoted_target = shlex.quote(target)
1279        # os.sysconf('SC_ARG_MAX') is lying: running a command with
1280        # string length 629343 already failed with "Argument list too
1281        # long" although SC_ARG_MAX = 2097152. "man execve" explains
1282        # the limitations, but those are pretty complicated. So here
1283        # we just hard-code a fixed value which is more likely to work.
1284        max_cmdsize = 64 * 1024
1285        while update:
1286            quoted_args = []
1287            unquoted_args = []
1288            cmdsize = 100 + len(quoted_target)
1289            while update:
1290                quoted_next = shlex.quote(update[0])
1291                size_next = len(quoted_next) + len(dest_dir) + 1
1292                logger.debug('cmdline length %d + %d < %d?' % (cmdsize, size_next, os.sysconf('SC_ARG_MAX')))
1293                if cmdsize + size_next < max_cmdsize:
1294                    quoted_args.append(quoted_next)
1295                    unquoted_args.append(update.pop(0))
1296                    cmdsize += size_next
1297                else:
1298                    logger.debug('Breaking the cmdline at length %d' % cmdsize)
1299                    break
1300            logger.debug('Final cmdline length %d / %d' % (cmdsize, os.sysconf('SC_ARG_MAX')))
1301            cmd = "git archive %s %s | tar -C %s -xf -" % (rev, ' '.join(quoted_args), quoted_target)
1302            logger.debug('First cmdline length %d' % len(cmd))
1303            runcmd(cmd, **largs)
1304            cmd = "git add -f".split() + [os.path.join(dest_dir, x) for x in unquoted_args]
1305            logger.debug('Second cmdline length %d' % reduce(lambda x, y: x + len(y), cmd, 0))
1306            runcmd(cmd, **wargs)
1307    if delete:
1308        for path in delete:
1309            if dest_dir:
1310                path = os.path.join(dest_dir, path)
1311        runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs)
1312
1313def action_error(conf, args):
1314    logger.info("invalid action %s" % args[0])
1315
1316actions = {
1317    "init": action_init,
1318    "update": action_update,
1319    "pull": action_pull,
1320    "splitpatch": action_splitpatch,
1321    "sync-revs": action_sync_revs,
1322}
1323
1324def main():
1325    parser = optparse.OptionParser(
1326        version = "Combo Layer Repo Tool version %s" % __version__,
1327        usage = """%prog [options] action
1328
1329Create and update a combination layer repository from multiple component repositories.
1330
1331Action:
1332  init                   initialise the combo layer repo
1333  update [components]    get patches from component repos and apply them to the combo repo
1334  pull [components]      just pull component repos only
1335  sync-revs [components] update the config file's last_revision for each repository
1336  splitpatch [commit]    generate commit patch and split per component, default commit is HEAD""")
1337
1338    parser.add_option("-c", "--conf", help = "specify the config file (conf/combo-layer.conf is the default).",
1339               action = "store", dest = "conffile", default = "conf/combo-layer.conf")
1340
1341    parser.add_option("-i", "--interactive", help = "interactive mode, user can edit the patch list and patches",
1342               action = "store_true", dest = "interactive", default = False)
1343
1344    parser.add_option("-D", "--debug", help = "output debug information",
1345               action = "store_true", dest = "debug", default = False)
1346
1347    parser.add_option("-n", "--no-pull", help = "skip pulling component repos during update",
1348               action = "store_true", dest = "nopull", default = False)
1349
1350    parser.add_option("--hard-reset",
1351               help = "instead of pull do fetch and hard-reset in component repos",
1352               action = "store_true", dest = "hard_reset", default = False)
1353
1354    parser.add_option("-H", "--history", help = "import full history of components during init",
1355                      action = "store_true", default = False)
1356
1357    options, args = parser.parse_args(sys.argv)
1358
1359    # Dispatch to action handler
1360    if len(args) == 1:
1361        logger.error("No action specified, exiting")
1362        parser.print_help()
1363    elif args[1] not in actions:
1364        logger.error("Unsupported action %s, exiting\n" % (args[1]))
1365        parser.print_help()
1366    elif not os.path.exists(options.conffile):
1367        logger.error("No valid config file, exiting\n")
1368        parser.print_help()
1369    else:
1370        if options.debug:
1371            logger.setLevel(logging.DEBUG)
1372        confdata = Configuration(options)
1373        initmode = (args[1] == 'init')
1374        confdata.sanity_check(initmode)
1375        actions.get(args[1], action_error)(confdata, args[1:])
1376
1377if __name__ == "__main__":
1378    try:
1379        ret = main()
1380    except Exception:
1381        ret = 1
1382        import traceback
1383        traceback.print_exc()
1384    sys.exit(ret)
1385