xref: /openbmc/openbmc/poky/scripts/combo-layer (revision 1a4b7ee2)
1#!/usr/bin/env python3
2# ex:ts=4:sw=4:sts=4:et
3# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
4#
5# Copyright 2011 Intel Corporation
6# Authored-by:  Yu Ke <ke.yu@intel.com>
7#               Paul Eggleton <paul.eggleton@intel.com>
8#               Richard Purdie <richard.purdie@intel.com>
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License version 2 as
12# published by the Free Software Foundation.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License along
20# with this program; if not, write to the Free Software Foundation, Inc.,
21# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
23import fnmatch
24import os, sys
25import optparse
26import logging
27import subprocess
28import tempfile
29import configparser
30import re
31import copy
32import pipes
33import shutil
34from collections import OrderedDict
35from string import Template
36from functools import reduce
37
38__version__ = "0.2.1"
39
40def logger_create():
41    logger = logging.getLogger("")
42    loggerhandler = logging.StreamHandler()
43    loggerhandler.setFormatter(logging.Formatter("[%(asctime)s] %(message)s","%H:%M:%S"))
44    logger.addHandler(loggerhandler)
45    logger.setLevel(logging.INFO)
46    return logger
47
48logger = logger_create()
49
50def get_current_branch(repodir=None):
51    try:
52        if not os.path.exists(os.path.join(repodir if repodir else '', ".git")):
53            # Repo not created yet (i.e. during init) so just assume master
54            return "master"
55        branchname = runcmd("git symbolic-ref HEAD 2>/dev/null", repodir).strip()
56        if branchname.startswith("refs/heads/"):
57            branchname = branchname[11:]
58        return branchname
59    except subprocess.CalledProcessError:
60        return ""
61
62class Configuration(object):
63    """
64    Manages the configuration
65
66    For an example config file, see combo-layer.conf.example
67
68    """
69    def __init__(self, options):
70        for key, val in options.__dict__.items():
71            setattr(self, key, val)
72
73        def readsection(parser, section, repo):
74            for (name, value) in parser.items(section):
75                if value.startswith("@"):
76                    self.repos[repo][name] = eval(value.strip("@"))
77                else:
78                    # Apply special type transformations for some properties.
79                    # Type matches the RawConfigParser.get*() methods.
80                    types = {'signoff': 'boolean', 'update': 'boolean', 'history': 'boolean'}
81                    if name in types:
82                        value = getattr(parser, 'get' + types[name])(section, name)
83                    self.repos[repo][name] = value
84
85        def readglobalsection(parser, section):
86            for (name, value) in parser.items(section):
87                if name == "commit_msg":
88                    self.commit_msg_template = value
89
90        logger.debug("Loading config file %s" % self.conffile)
91        self.parser = configparser.ConfigParser()
92        with open(self.conffile) as f:
93            self.parser.readfp(f)
94
95        # initialize default values
96        self.commit_msg_template = "Automatic commit to update last_revision"
97
98        self.repos = {}
99        for repo in self.parser.sections():
100            if repo == "combo-layer-settings":
101                # special handling for global settings
102                readglobalsection(self.parser, repo)
103            else:
104                self.repos[repo] = {}
105                readsection(self.parser, repo, repo)
106
107        # Load local configuration, if available
108        self.localconffile = None
109        self.localparser = None
110        self.combobranch = None
111        if self.conffile.endswith('.conf'):
112            lcfile = self.conffile.replace('.conf', '-local.conf')
113            if os.path.exists(lcfile):
114                # Read combo layer branch
115                self.combobranch = get_current_branch()
116                logger.debug("Combo layer branch is %s" % self.combobranch)
117
118                self.localconffile = lcfile
119                logger.debug("Loading local config file %s" % self.localconffile)
120                self.localparser = configparser.ConfigParser()
121                with open(self.localconffile) as f:
122                    self.localparser.readfp(f)
123
124                for section in self.localparser.sections():
125                    if '|' in section:
126                        sectionvals = section.split('|')
127                        repo = sectionvals[0]
128                        if sectionvals[1] != self.combobranch:
129                            continue
130                    else:
131                        repo = section
132                    if repo in self.repos:
133                        readsection(self.localparser, section, repo)
134
135    def update(self, repo, option, value, initmode=False):
136        # If the main config has the option already, that is what we
137        # are expected to modify.
138        if self.localparser and not self.parser.has_option(repo, option):
139            parser = self.localparser
140            section = "%s|%s" % (repo, self.combobranch)
141            conffile = self.localconffile
142            if initmode and not parser.has_section(section):
143                parser.add_section(section)
144        else:
145            parser = self.parser
146            section = repo
147            conffile = self.conffile
148        parser.set(section, option, value)
149        with open(conffile, "w") as f:
150            parser.write(f)
151        self.repos[repo][option] = value
152
153    def sanity_check(self, initmode=False):
154        required_options=["src_uri", "local_repo_dir", "dest_dir", "last_revision"]
155        if initmode:
156            required_options.remove("last_revision")
157        msg = ""
158        missing_options = []
159        for name in self.repos:
160            for option in required_options:
161                if option not in self.repos[name]:
162                    msg = "%s\nOption %s is not defined for component %s" %(msg, option, name)
163                    missing_options.append(option)
164            # Sanitize dest_dir so that we do not have to deal with edge cases
165            # (unset, empty string, double slashes) in the rest of the code.
166            # It not being set will still be flagged as error because it is
167            # listed as required option above; that could be changed now.
168            dest_dir = os.path.normpath(self.repos[name].get("dest_dir", "."))
169            self.repos[name]["dest_dir"] = "." if not dest_dir else dest_dir
170        if msg != "":
171            logger.error("configuration file %s has the following error: %s" % (self.conffile,msg))
172            if self.localconffile and 'last_revision' in missing_options:
173                logger.error("local configuration file %s may be missing configuration for combo branch %s" % (self.localconffile, self.combobranch))
174            sys.exit(1)
175
176        # filterdiff is required by action_splitpatch, so check its availability
177        if subprocess.call("which filterdiff > /dev/null 2>&1", shell=True) != 0:
178            logger.error("ERROR: patchutils package is missing, please install it (e.g. # apt-get install patchutils)")
179            sys.exit(1)
180
181def runcmd(cmd,destdir=None,printerr=True,out=None,env=None):
182    """
183        execute command, raise CalledProcessError if fail
184        return output if succeed
185    """
186    logger.debug("run cmd '%s' in %s" % (cmd, os.getcwd() if destdir is None else destdir))
187    if not out:
188        out = tempfile.TemporaryFile()
189        err = out
190    else:
191        err = tempfile.TemporaryFile()
192    try:
193        subprocess.check_call(cmd, stdout=out, stderr=err, cwd=destdir, shell=isinstance(cmd, str), env=env or os.environ)
194    except subprocess.CalledProcessError as e:
195        err.seek(0)
196        if printerr:
197            logger.error("%s" % err.read())
198        raise e
199
200    err.seek(0)
201    output = err.read().decode('utf-8')
202    logger.debug("output: %s" % output.replace(chr(0), '\\0'))
203    return output
204
205def action_init(conf, args):
206    """
207        Clone component repositories
208        Check git is initialised; if not, copy initial data from component repos
209    """
210    for name in conf.repos:
211        ldir = conf.repos[name]['local_repo_dir']
212        if not os.path.exists(ldir):
213            logger.info("cloning %s to %s" %(conf.repos[name]['src_uri'], ldir))
214            subprocess.check_call("git clone %s %s" % (conf.repos[name]['src_uri'], ldir), shell=True)
215    if not os.path.exists(".git"):
216        runcmd("git init")
217        if conf.history:
218            # Need a common ref for all trees.
219            runcmd('git commit -m "initial empty commit" --allow-empty')
220            startrev = runcmd('git rev-parse master').strip()
221
222        for name in conf.repos:
223            repo = conf.repos[name]
224            ldir = repo['local_repo_dir']
225            branch = repo.get('branch', "master")
226            lastrev = repo.get('last_revision', None)
227            if lastrev and lastrev != "HEAD":
228                initialrev = lastrev
229                if branch:
230                    if not check_rev_branch(name, ldir, lastrev, branch):
231                        sys.exit(1)
232                logger.info("Copying data from %s at specified revision %s..." % (name, lastrev))
233            else:
234                lastrev = None
235                initialrev = branch
236                logger.info("Copying data from %s..." % name)
237            # Sanity check initialrev and turn it into hash (required for copying history,
238            # because resolving a name ref only works in the component repo).
239            rev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
240            if rev != initialrev:
241                try:
242                    refs = runcmd('git show-ref -s %s' % initialrev, ldir).split('\n')
243                    if len(set(refs)) > 1:
244                        # Happens for example when configured to track
245                        # "master" and there is a refs/heads/master. The
246                        # traditional behavior from "git archive" (preserved
247                        # here) it to choose the first one. This might not be
248                        # intended, so at least warn about it.
249                        logger.warning("%s: initial revision '%s' not unique, picking result of rev-parse = %s" %
250                                    (name, initialrev, refs[0]))
251                        initialrev = rev
252                except:
253                    # show-ref fails for hashes. Skip the sanity warning in that case.
254                    pass
255                initialrev = rev
256            dest_dir = repo['dest_dir']
257            if dest_dir != ".":
258                extract_dir = os.path.join(os.getcwd(), dest_dir)
259                if not os.path.exists(extract_dir):
260                    os.makedirs(extract_dir)
261            else:
262                extract_dir = os.getcwd()
263            file_filter = repo.get('file_filter', "")
264            exclude_patterns = repo.get('file_exclude', '').split()
265            def copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir,
266                                    subdir=""):
267                # When working inside a filtered branch which had the
268                # files already moved, we need to prepend the
269                # subdirectory to all filters, otherwise they would
270                # not match.
271                if subdir == '.':
272                    subdir = ''
273                elif subdir:
274                    subdir = os.path.normpath(subdir)
275                    file_filter = ' '.join([subdir + '/' + x for x in file_filter.split()])
276                    exclude_patterns = [subdir + '/' + x for x in exclude_patterns]
277                # To handle both cases, we cd into the target
278                # directory and optionally tell tar to strip the path
279                # prefix when the files were already moved.
280                subdir_components = len(subdir.split(os.path.sep)) if subdir else 0
281                strip=('--strip-components=%d' % subdir_components) if subdir else ''
282                # TODO: file_filter wild cards do not work (and haven't worked before either), because
283                # a) GNU tar requires a --wildcards parameter before turning on wild card matching.
284                # b) The semantic is not as intendend (src/*.c also matches src/foo/bar.c,
285                #    in contrast to the other use of file_filter as parameter of "git archive"
286                #    where it only matches .c files directly in src).
287                files = runcmd("git archive %s %s | tar -x -v %s -C %s %s" %
288                               (initialrev, subdir,
289                                strip, extract_dir, file_filter),
290                               ldir)
291                if exclude_patterns:
292                    # Implement file removal by letting tar create the
293                    # file and then deleting it in the file system
294                    # again. Uses the list of files created by tar (easier
295                    # than walking the tree).
296                    for file in files.split('\n'):
297                        if file.endswith(os.path.sep):
298                            continue
299                        for pattern in exclude_patterns:
300                            if fnmatch.fnmatch(file, pattern):
301                                os.unlink(os.path.join(*([extract_dir] + ['..'] * subdir_components + [file])))
302                                break
303
304            if not conf.history:
305                copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir)
306            else:
307                # First fetch remote history into local repository.
308                # We need a ref for that, so ensure that there is one.
309                refname = "combo-layer-init-%s" % name
310                runcmd("git branch -f %s %s" % (refname, initialrev), ldir)
311                runcmd("git fetch %s %s" % (ldir, refname))
312                runcmd("git branch -D %s" % refname, ldir)
313                # Make that the head revision.
314                runcmd("git checkout -b %s %s" % (name, initialrev))
315                # Optional: cut the history by replacing the given
316                # start point(s) with commits providing the same
317                # content (aka tree), but with commit information that
318                # makes it clear that this is an artifically created
319                # commit and nothing the original authors had anything
320                # to do with.
321                since_rev = repo.get('since_revision', '')
322                if since_rev:
323                    committer = runcmd('git var GIT_AUTHOR_IDENT').strip()
324                    # Same time stamp, no name.
325                    author = re.sub('.* (\d+ [+-]\d+)', r'unknown <unknown> \1', committer)
326                    logger.info('author %s' % author)
327                    for rev in since_rev.split():
328                        # Resolve in component repo...
329                        rev = runcmd('git log --oneline --no-abbrev-commit -n1 %s' % rev, ldir).split()[0]
330                        # ... and then get the tree in current
331                        # one. The commit should be in both repos with
332                        # the same tree, but better check here.
333                        tree = runcmd('git show -s --pretty=format:%%T %s' % rev).strip()
334                        with tempfile.NamedTemporaryFile(mode='wt') as editor:
335                            editor.write('''cat >$1 <<EOF
336tree %s
337author %s
338committer %s
339
340%s: squashed import of component
341
342This commit copies the entire set of files as found in
343%s %s
344
345For more information about previous commits, see the
346upstream repository.
347
348Commit created by combo-layer.
349EOF
350''' % (tree, author, committer, name, name, since_rev))
351                            editor.flush()
352                            os.environ['GIT_EDITOR'] = 'sh %s' % editor.name
353                            runcmd('git replace --edit %s' % rev)
354
355                # Optional: rewrite history to change commit messages or to move files.
356                if 'hook' in repo or dest_dir != ".":
357                    filter_branch = ['git', 'filter-branch', '--force']
358                    with tempfile.NamedTemporaryFile(mode='wt') as hookwrapper:
359                        if 'hook' in repo:
360                            # Create a shell script wrapper around the original hook that
361                            # can be used by git filter-branch. Hook may or may not have
362                            # an absolute path.
363                            hook = repo['hook']
364                            hook = os.path.join(os.path.dirname(conf.conffile), '..', hook)
365                            # The wrappers turns the commit message
366                            # from stdin into a fake patch header.
367                            # This is good enough for changing Subject
368                            # and commit msg body with normal
369                            # combo-layer hooks.
370                            hookwrapper.write('''set -e
371tmpname=$(mktemp)
372trap "rm $tmpname" EXIT
373echo -n 'Subject: [PATCH] ' >>$tmpname
374cat >>$tmpname
375if ! [ $(tail -c 1 $tmpname | od -A n -t x1) == '0a' ]; then
376    echo >>$tmpname
377fi
378echo '---' >>$tmpname
379%s $tmpname $GIT_COMMIT %s
380tail -c +18 $tmpname | head -c -4
381''' % (hook, name))
382                            hookwrapper.flush()
383                            filter_branch.extend(['--msg-filter', 'bash %s' % hookwrapper.name])
384                        if dest_dir != ".":
385                            parent = os.path.dirname(dest_dir)
386                            if not parent:
387                                parent = '.'
388                            # May run outside of the current directory, so do not assume that .git exists.
389                            filter_branch.extend(['--tree-filter', 'mkdir -p .git/tmptree && find . -mindepth 1 -maxdepth 1 ! -name .git -print0 | xargs -0 -I SOURCE mv SOURCE .git/tmptree && mkdir -p %s && mv .git/tmptree %s' % (parent, dest_dir)])
390                        filter_branch.append('HEAD')
391                        runcmd(filter_branch)
392                        runcmd('git update-ref -d refs/original/refs/heads/%s' % name)
393                repo['rewritten_revision'] = runcmd('git rev-parse HEAD').strip()
394                repo['stripped_revision'] = repo['rewritten_revision']
395                # Optional filter files: remove everything and re-populate using the normal filtering code.
396                # Override any potential .gitignore.
397                if file_filter or exclude_patterns:
398                    runcmd('git rm -rf .')
399                    if not os.path.exists(extract_dir):
400                        os.makedirs(extract_dir)
401                    copy_selected_files('HEAD', extract_dir, file_filter, exclude_patterns, '.',
402                                        subdir=dest_dir)
403                    runcmd('git add --all --force .')
404                    if runcmd('git status --porcelain'):
405                        # Something to commit.
406                        runcmd(['git', 'commit', '-m',
407                                '''%s: select file subset
408
409Files from the component repository were chosen based on
410the following filters:
411file_filter = %s
412file_exclude = %s''' % (name, file_filter or '<empty>', repo.get('file_exclude', '<empty>'))])
413                        repo['stripped_revision'] = runcmd('git rev-parse HEAD').strip()
414
415            if not lastrev:
416                lastrev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
417                conf.update(name, "last_revision", lastrev, initmode=True)
418
419        if not conf.history:
420            runcmd("git add .")
421        else:
422            # Create Octopus merge commit according to http://stackoverflow.com/questions/10874149/git-octopus-merge-with-unrelated-repositoies
423            runcmd('git checkout master')
424            merge = ['git', 'merge', '--no-commit']
425            for name in conf.repos:
426                repo = conf.repos[name]
427                # Use branch created earlier.
428                merge.append(name)
429                # Root all commits which have no parent in the common
430                # ancestor in the new repository.
431                for start in runcmd('git log --pretty=format:%%H --max-parents=0 %s --' % name).split('\n'):
432                    runcmd('git replace --graft %s %s' % (start, startrev))
433            try:
434                runcmd(merge)
435            except Exception as error:
436                logger.info('''Merging component repository history failed, perhaps because of merge conflicts.
437It may be possible to commit anyway after resolving these conflicts.
438
439%s''' % error)
440            # Create MERGE_HEAD and MERGE_MSG. "git merge" itself
441            # does not create MERGE_HEAD in case of a (harmless) failure,
442            # and we want certain auto-generated information in the
443            # commit message for future reference and/or automation.
444            with open('.git/MERGE_HEAD', 'w') as head:
445                with open('.git/MERGE_MSG', 'w') as msg:
446                    msg.write('repo: initial import of components\n\n')
447                    # head.write('%s\n' % startrev)
448                    for name in conf.repos:
449                        repo = conf.repos[name]
450                        # <upstream ref> <rewritten ref> <rewritten + files removed>
451                        msg.write('combo-layer-%s: %s %s %s\n' % (name,
452                                                                  repo['last_revision'],
453                                                                  repo['rewritten_revision'],
454                                                                  repo['stripped_revision']))
455                        rev = runcmd('git rev-parse %s' % name).strip()
456                        head.write('%s\n' % rev)
457
458        if conf.localconffile:
459            localadded = True
460            try:
461                runcmd("git rm --cached %s" % conf.localconffile, printerr=False)
462            except subprocess.CalledProcessError:
463                localadded = False
464            if localadded:
465                localrelpath = os.path.relpath(conf.localconffile)
466                runcmd("grep -q %s .gitignore || echo %s >> .gitignore" % (localrelpath, localrelpath))
467                runcmd("git add .gitignore")
468                logger.info("Added local configuration file %s to .gitignore", localrelpath)
469        logger.info("Initial combo layer repository data has been created; please make any changes if desired and then use 'git commit' to make the initial commit.")
470    else:
471        logger.info("Repository already initialised, nothing to do.")
472
473
474def check_repo_clean(repodir):
475    """
476        check if the repo is clean
477        exit if repo is dirty
478    """
479    output=runcmd("git status --porcelain", repodir)
480    r = re.compile('\?\? patch-.*/')
481    dirtyout = [item for item in output.splitlines() if not r.match(item)]
482    if dirtyout:
483        logger.error("git repo %s is dirty, please fix it first", repodir)
484        sys.exit(1)
485
486def check_patch(patchfile):
487    f = open(patchfile, 'rb')
488    ln = f.readline()
489    of = None
490    in_patch = False
491    beyond_msg = False
492    pre_buf = b''
493    while ln:
494        if not beyond_msg:
495            if ln == b'---\n':
496                if not of:
497                    break
498                in_patch = False
499                beyond_msg = True
500            elif ln.startswith(b'--- '):
501                # We have a diff in the commit message
502                in_patch = True
503                if not of:
504                    print('WARNING: %s contains a diff in its commit message, indenting to avoid failure during apply' % patchfile)
505                    of = open(patchfile + '.tmp', 'wb')
506                    of.write(pre_buf)
507                    pre_buf = b''
508            elif in_patch and not ln[0] in b'+-@ \n\r':
509                in_patch = False
510        if of:
511            if in_patch:
512                of.write(b' ' + ln)
513            else:
514                of.write(ln)
515        else:
516            pre_buf += ln
517        ln = f.readline()
518    f.close()
519    if of:
520        of.close()
521        os.rename(patchfile + '.tmp', patchfile)
522
523def drop_to_shell(workdir=None):
524    if not sys.stdin.isatty():
525        print("Not a TTY so can't drop to shell for resolution, exiting.")
526        return False
527
528    shell = os.environ.get('SHELL', 'bash')
529    print('Dropping to shell "%s"\n' \
530          'When you are finished, run the following to continue:\n' \
531          '       exit    -- continue to apply the patches\n' \
532          '       exit 1  -- abort\n' % shell);
533    ret = subprocess.call([shell], cwd=workdir)
534    if ret != 0:
535        print("Aborting")
536        return False
537    else:
538        return True
539
540def check_rev_branch(component, repodir, rev, branch):
541    try:
542        actualbranch = runcmd("git branch --contains %s" % rev, repodir, printerr=False)
543    except subprocess.CalledProcessError as e:
544        if e.returncode == 129:
545            actualbranch = ""
546        else:
547            raise
548
549    if not actualbranch:
550        logger.error("%s: specified revision %s is invalid!" % (component, rev))
551        return False
552
553    branches = []
554    branchlist = actualbranch.split("\n")
555    for b in branchlist:
556        branches.append(b.strip().split(' ')[-1])
557
558    if branch not in branches:
559        logger.error("%s: specified revision %s is not on specified branch %s!" % (component, rev, branch))
560        return False
561    return True
562
563def get_repos(conf, repo_names):
564    repos = []
565    for name in repo_names:
566        if name.startswith('-'):
567            break
568        else:
569            repos.append(name)
570    for repo in repos:
571        if not repo in conf.repos:
572            logger.error("Specified component '%s' not found in configuration" % repo)
573            sys.exit(1)
574
575    if not repos:
576        repos = [ repo for repo in conf.repos if conf.repos[repo].get("update", True) ]
577
578    return repos
579
580def action_pull(conf, args):
581    """
582        update the component repos only
583    """
584    repos = get_repos(conf, args[1:])
585
586    # make sure all repos are clean
587    for name in repos:
588        check_repo_clean(conf.repos[name]['local_repo_dir'])
589
590    for name in repos:
591        repo = conf.repos[name]
592        ldir = repo['local_repo_dir']
593        branch = repo.get('branch', "master")
594        logger.info("update branch %s of component repo %s in %s ..." % (branch, name, ldir))
595        if not conf.hard_reset:
596            # Try to pull only the configured branch. Beware that this may fail
597            # when the branch is currently unknown (for example, after reconfiguring
598            # combo-layer). In that case we need to fetch everything and try the check out
599            # and pull again.
600            try:
601                runcmd("git checkout %s" % branch, ldir, printerr=False)
602            except subprocess.CalledProcessError:
603                output=runcmd("git fetch", ldir)
604                logger.info(output)
605                runcmd("git checkout %s" % branch, ldir)
606                runcmd("git pull --ff-only", ldir)
607            else:
608                output=runcmd("git pull --ff-only", ldir)
609                logger.info(output)
610        else:
611            output=runcmd("git fetch", ldir)
612            logger.info(output)
613            runcmd("git checkout %s" % branch, ldir)
614            runcmd("git reset --hard FETCH_HEAD", ldir)
615
616def action_update(conf, args):
617    """
618        update the component repos
619        either:
620           generate the patch list
621           apply the generated patches
622        or:
623           re-creates the entire component history and merges them
624           into the current branch with a merge commit
625    """
626    components = [arg.split(':')[0] for arg in args[1:]]
627    revisions = {}
628    for arg in args[1:]:
629        if ':' in arg:
630            a = arg.split(':', 1)
631            revisions[a[0]] = a[1]
632    repos = get_repos(conf, components)
633
634    # make sure combo repo is clean
635    check_repo_clean(os.getcwd())
636
637    # Check whether we keep the component histories. Must be
638    # set either via --history command line parameter or consistently
639    # in combo-layer.conf. Mixing modes is (currently, and probably
640    # permanently because it would be complicated) not supported.
641    if conf.history:
642        history = True
643    else:
644        history = None
645        for name in repos:
646            repo = conf.repos[name]
647            repo_history = repo.get('history', False)
648            if history is None:
649                history = repo_history
650            elif history != repo_history:
651                logger.error("'history' property is set inconsistently")
652                sys.exit(1)
653
654    # Step 1: update the component repos
655    if conf.nopull:
656        logger.info("Skipping pull (-n)")
657    else:
658        action_pull(conf, ['arg0'] + components)
659
660    if history:
661        update_with_history(conf, components, revisions, repos)
662    else:
663        update_with_patches(conf, components, revisions, repos)
664
665def update_with_patches(conf, components, revisions, repos):
666    import uuid
667    patch_dir = "patch-%s" % uuid.uuid4()
668    if not os.path.exists(patch_dir):
669        os.mkdir(patch_dir)
670
671    for name in repos:
672        revision = revisions.get(name, None)
673        repo = conf.repos[name]
674        ldir = repo['local_repo_dir']
675        dest_dir = repo['dest_dir']
676        branch = repo.get('branch', "master")
677        repo_patch_dir = os.path.join(os.getcwd(), patch_dir, name)
678
679        # Step 2: generate the patch list and store to patch dir
680        logger.info("Generating patches from %s..." % name)
681        top_revision = revision or branch
682        if not check_rev_branch(name, ldir, top_revision, branch):
683            sys.exit(1)
684        if dest_dir != ".":
685            prefix = "--src-prefix=a/%s/ --dst-prefix=b/%s/" % (dest_dir, dest_dir)
686        else:
687            prefix = ""
688        if repo['last_revision'] == "":
689            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
690            patch_cmd_range = "--root %s" % top_revision
691            rev_cmd_range = top_revision
692        else:
693            if not check_rev_branch(name, ldir, repo['last_revision'], branch):
694                sys.exit(1)
695            patch_cmd_range = "%s..%s" % (repo['last_revision'], top_revision)
696            rev_cmd_range = patch_cmd_range
697
698        file_filter = repo.get('file_filter',".")
699
700        # Filter out unwanted files
701        exclude = repo.get('file_exclude', '')
702        if exclude:
703            for path in exclude.split():
704                p = "%s/%s" % (dest_dir, path) if dest_dir != '.' else path
705                file_filter += " ':!%s'" % p
706
707        patch_cmd = "git format-patch -N %s --output-directory %s %s -- %s" % \
708            (prefix,repo_patch_dir, patch_cmd_range, file_filter)
709        output = runcmd(patch_cmd, ldir)
710        logger.debug("generated patch set:\n%s" % output)
711        patchlist = output.splitlines()
712
713        rev_cmd = "git rev-list --no-merges %s -- %s" % (rev_cmd_range, file_filter)
714        revlist = runcmd(rev_cmd, ldir).splitlines()
715
716        # Step 3: Call repo specific hook to adjust patch
717        if 'hook' in repo:
718            # hook parameter is: ./hook patchpath revision reponame
719            count=len(revlist)-1
720            for patch in patchlist:
721                runcmd("%s %s %s %s" % (repo['hook'], patch, revlist[count], name))
722                count=count-1
723
724        # Step 4: write patch list and revision list to file, for user to edit later
725        patchlist_file = os.path.join(os.getcwd(), patch_dir, "patchlist-%s" % name)
726        repo['patchlist'] = patchlist_file
727        f = open(patchlist_file, 'w')
728        count=len(revlist)-1
729        for patch in patchlist:
730            f.write("%s %s\n" % (patch, revlist[count]))
731            check_patch(os.path.join(patch_dir, patch))
732            count=count-1
733        f.close()
734
735    # Step 5: invoke bash for user to edit patch and patch list
736    if conf.interactive:
737        print('You may now edit the patch and patch list in %s\n' \
738              'For example, you can remove unwanted patch entries from patchlist-*, so that they will be not applied later' % patch_dir);
739        if not drop_to_shell(patch_dir):
740            sys.exit(1)
741
742    # Step 6: apply the generated and revised patch
743    apply_patchlist(conf, repos)
744    runcmd("rm -rf %s" % patch_dir)
745
746    # Step 7: commit the updated config file if it's being tracked
747    commit_conf_file(conf, components)
748
749def conf_commit_msg(conf, components):
750    # create the "components" string
751    component_str = "all components"
752    if len(components) > 0:
753        # otherwise tell which components were actually changed
754        component_str = ", ".join(components)
755
756    # expand the template with known values
757    template = Template(conf.commit_msg_template)
758    msg = template.substitute(components = component_str)
759    return msg
760
761def commit_conf_file(conf, components, commit=True):
762    relpath = os.path.relpath(conf.conffile)
763    try:
764        output = runcmd("git status --porcelain %s" % relpath, printerr=False)
765    except:
766        # Outside the repository
767        output = None
768    if output:
769        if output.lstrip().startswith("M"):
770            logger.info("Committing updated configuration file")
771            if commit:
772                msg = conf_commit_msg(conf, components)
773                runcmd('git commit -m'.split() + [msg, relpath])
774            else:
775                runcmd('git add %s' % relpath)
776            return True
777    return False
778
779def apply_patchlist(conf, repos):
780    """
781        apply the generated patch list to combo repo
782    """
783    for name in repos:
784        repo = conf.repos[name]
785        lastrev = repo["last_revision"]
786        prevrev = lastrev
787
788        # Get non-blank lines from patch list file
789        patchlist = []
790        if os.path.exists(repo['patchlist']) or not conf.interactive:
791            # Note: we want this to fail here if the file doesn't exist and we're not in
792            # interactive mode since the file should exist in this case
793            with open(repo['patchlist']) as f:
794                for line in f:
795                    line = line.rstrip()
796                    if line:
797                        patchlist.append(line)
798
799        ldir = conf.repos[name]['local_repo_dir']
800        branch = conf.repos[name].get('branch', "master")
801        branchrev = runcmd("git rev-parse %s" % branch, ldir).strip()
802
803        if patchlist:
804            logger.info("Applying patches from %s..." % name)
805            linecount = len(patchlist)
806            i = 1
807            for line in patchlist:
808                patchfile = line.split()[0]
809                lastrev = line.split()[1]
810                patchdisp = os.path.relpath(patchfile)
811                if os.path.getsize(patchfile) == 0:
812                    logger.info("(skipping %d/%d %s - no changes)" % (i, linecount, patchdisp))
813                else:
814                    cmd = "git am --keep-cr %s-p1 %s" % ('-s ' if repo.get('signoff', True) else '', patchfile)
815                    logger.info("Applying %d/%d: %s" % (i, linecount, patchdisp))
816                    try:
817                        runcmd(cmd)
818                    except subprocess.CalledProcessError:
819                        logger.info('Running "git am --abort" to cleanup repo')
820                        runcmd("git am --abort")
821                        logger.error('"%s" failed' % cmd)
822                        logger.info("Please manually apply patch %s" % patchdisp)
823                        logger.info("Note: if you exit and continue applying without manually applying the patch, it will be skipped")
824                        if not drop_to_shell():
825                            if prevrev != repo['last_revision']:
826                                conf.update(name, "last_revision", prevrev)
827                            sys.exit(1)
828                prevrev = lastrev
829                i += 1
830            # Once all patches are applied, we should update
831            # last_revision to the branch head instead of the last
832            # applied patch. The two are not necessarily the same when
833            # the last commit is a merge commit or when the patches at
834            # the branch head were intentionally excluded.
835            #
836            # If we do not do that for a merge commit, the next
837            # combo-layer run will only exclude patches reachable from
838            # one of the merged branches and try to re-apply patches
839            # from other branches even though they were already
840            # copied.
841            #
842            # If patches were intentionally excluded, the next run will
843            # present them again instead of skipping over them. This
844            # may or may not be intended, so the code here is conservative
845            # and only addresses the "head is merge commit" case.
846            if lastrev != branchrev and \
847               len(runcmd("git show --pretty=format:%%P --no-patch %s" % branch, ldir).split()) > 1:
848                lastrev = branchrev
849        else:
850            logger.info("No patches to apply from %s" % name)
851            lastrev = branchrev
852
853        if lastrev != repo['last_revision']:
854            conf.update(name, "last_revision", lastrev)
855
856def action_splitpatch(conf, args):
857    """
858        generate the commit patch and
859        split the patch per repo
860    """
861    logger.debug("action_splitpatch")
862    if len(args) > 1:
863        commit = args[1]
864    else:
865        commit = "HEAD"
866    patchdir = "splitpatch-%s" % commit
867    if not os.path.exists(patchdir):
868        os.mkdir(patchdir)
869
870    # filerange_root is for the repo whose dest_dir is root "."
871    # and it should be specified by excluding all other repo dest dir
872    # like "-x repo1 -x repo2 -x repo3 ..."
873    filerange_root = ""
874    for name in conf.repos:
875        dest_dir = conf.repos[name]['dest_dir']
876        if dest_dir != ".":
877            filerange_root = '%s -x "%s/*"' % (filerange_root, dest_dir)
878
879    for name in conf.repos:
880        dest_dir = conf.repos[name]['dest_dir']
881        patch_filename = "%s/%s.patch" % (patchdir, name)
882        if dest_dir == ".":
883            cmd = "git format-patch -n1 --stdout %s^..%s | filterdiff -p1 %s > %s" % (commit, commit, filerange_root, patch_filename)
884        else:
885            cmd = "git format-patch --no-prefix -n1 --stdout %s^..%s -- %s > %s" % (commit, commit, dest_dir, patch_filename)
886        runcmd(cmd)
887        # Detect empty patches (including those produced by filterdiff above
888        # that contain only preamble text)
889        if os.path.getsize(patch_filename) == 0 or runcmd("filterdiff %s" % patch_filename) == "":
890            os.remove(patch_filename)
891            logger.info("(skipping %s - no changes)", name)
892        else:
893            logger.info(patch_filename)
894
895def update_with_history(conf, components, revisions, repos):
896    '''Update all components with full history.
897
898    Works by importing all commits reachable from a component's
899    current head revision.  If those commits are rooted in an already
900    imported commit, their content gets mixed with the content of the
901    combined repo of that commit (new or modified files overwritten,
902    removed files removed).
903
904    The last commit is an artificial merge commit that merges all the
905    updated components into the combined repository.
906
907    The HEAD ref only gets updated at the very end. All intermediate work
908    happens in a worktree which will get garbage collected by git eventually
909    after a failure.
910    '''
911    # Remember current HEAD and what we need to add to it.
912    head = runcmd("git rev-parse HEAD").strip()
913    additional_heads = {}
914
915    # Track the mapping between original commit and commit in the
916    # combined repo. We do not have to distinguish between components,
917    # because commit hashes are different anyway. Often we can
918    # skip find_revs() entirely (for example, when all new commits
919    # are derived from the last imported revision).
920    #
921    # Using "head" (typically the merge commit) instead of the actual
922    # commit for the component leads to a nicer history in the combined
923    # repo.
924    old2new_revs = {}
925    for name in repos:
926        repo = conf.repos[name]
927        revision = repo['last_revision']
928        if revision:
929            old2new_revs[revision] = head
930
931    def add_p(parents):
932        '''Insert -p before each entry.'''
933        parameters = []
934        for p in parents:
935            parameters.append('-p')
936            parameters.append(p)
937        return parameters
938
939    # Do all intermediate work with a separate work dir and index,
940    # chosen via env variables (can't use "git worktree", it is too
941    # new). This is useful (no changes to current work tree unless the
942    # update succeeds) and required (otherwise we end up temporarily
943    # removing the combo-layer hooks that we currently use when
944    # importing a new component).
945    #
946    # Not cleaned up after a failure at the moment.
947    wdir = os.path.join(os.getcwd(), ".git", "combo-layer")
948    windex = wdir + ".index"
949    if os.path.isdir(wdir):
950        shutil.rmtree(wdir)
951    os.mkdir(wdir)
952    wenv = copy.deepcopy(os.environ)
953    wenv["GIT_WORK_TREE"] = wdir
954    wenv["GIT_INDEX_FILE"] = windex
955    # This one turned out to be needed in practice.
956    wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects")
957    wargs = {"destdir": wdir, "env": wenv}
958
959    for name in repos:
960        revision = revisions.get(name, None)
961        repo = conf.repos[name]
962        ldir = repo['local_repo_dir']
963        dest_dir = repo['dest_dir']
964        branch = repo.get('branch', "master")
965        hook = repo.get('hook', None)
966        largs = {"destdir": ldir, "env": None}
967        file_include = repo.get('file_filter', '').split()
968        file_include.sort() # make sure that short entries like '.' come first.
969        file_exclude = repo.get('file_exclude', '').split()
970
971        def include_file(file):
972            if not file_include:
973                # No explicit filter set, include file.
974                return True
975            for filter in file_include:
976                if filter == '.':
977                    # Another special case: include current directory and thus all files.
978                    return True
979                if os.path.commonprefix((filter, file)) == filter:
980                    # Included in directory or direct file match.
981                    return True
982                # Check for wildcard match *with* allowing * to match /, i.e.
983                # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere
984                # when passing the filtering to "git archive", but it is unclear what
985                # the intended semantic is (the comment on file_exclude that "append a * wildcard
986                # at the end" to match the full content of a directories implies that
987                # slashes are indeed not special), so here we simply do what's easy to
988                # implement in Python.
989                logger.debug('fnmatch(%s, %s)' % (file, filter))
990                if fnmatch.fnmatchcase(file, filter):
991                    return True
992            return False
993
994        def exclude_file(file):
995            for filter in file_exclude:
996                if fnmatch.fnmatchcase(file, filter):
997                    return True
998            return False
999
1000        def file_filter(files):
1001            '''Clean up file list so that only included files remain.'''
1002            index = 0
1003            while index < len(files):
1004                file = files[index]
1005                if not include_file(file) or exclude_file(file):
1006                    del files[index]
1007                else:
1008                    index += 1
1009
1010
1011        # Generate the revision list.
1012        logger.info("Analyzing commits from %s..." % name)
1013        top_revision = revision or branch
1014        if not check_rev_branch(name, ldir, top_revision, branch):
1015            sys.exit(1)
1016
1017        last_revision = repo['last_revision']
1018        rev_list_args = "--full-history --sparse --topo-order --reverse"
1019        if not last_revision:
1020            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
1021            rev_list_args = rev_list_args + ' ' + top_revision
1022        else:
1023            if not check_rev_branch(name, ldir, last_revision, branch):
1024                sys.exit(1)
1025            rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision)
1026
1027            # By definition, the current HEAD contains the latest imported
1028            # commit of each component. We use that as initial mapping even
1029            # though the commits do not match exactly because
1030            # a) it always works (in contrast to find_revs, which relies on special
1031            #    commit messages)
1032            # b) it is faster than find_revs, which will only be called on demand
1033            #    and can be skipped entirely in most cases
1034            # c) last but not least, the combined history looks nicer when all
1035            #    new commits are rooted in the same merge commit
1036            old2new_revs[last_revision] = head
1037
1038        # We care about all commits (--full-history and --sparse) and
1039        # we want reconstruct the topology and thus do not care
1040        # about ordering by time (--topo-order). We ask for the ones
1041        # we need to import first to be listed first (--reverse).
1042        revs = runcmd("git rev-list %s" % rev_list_args, **largs).split()
1043        logger.debug("To be imported: %s" % revs)
1044        # Now 'revs' contains all revisions reachable from the top revision.
1045        # All revisions derived from the 'last_revision' definitely are new,
1046        # whereas the others may or may not have been imported before. For
1047        # a linear history in the component, that second set will be empty.
1048        # To distinguish between them, we also get the shorter list
1049        # of revisions starting at the ancestor.
1050        if last_revision:
1051            ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split()
1052        else:
1053            ancestor_revs = []
1054        logger.debug("Ancestors: %s" % ancestor_revs)
1055
1056        # Now import each revision.
1057        logger.info("Importing commits from %s..." % name)
1058        def import_rev(rev):
1059            global scanned_revs
1060
1061            # If it is part of the new commits, we definitely need
1062            # to import it. Otherwise we need to check, we might have
1063            # imported it before. If it was imported and we merely
1064            # fail to find it because commit messages did not track
1065            # the mapping, then we end up importing it again. So
1066            # combined repos using "updating with history" really should
1067            # enable the "From ... rev:" commit header modifications.
1068            if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs:
1069                logger.debug("Revision %s triggers log analysis." % rev)
1070                find_revs(old2new_revs, head)
1071                scanned_revs = True
1072            new_rev = old2new_revs.get(rev, None)
1073            if new_rev:
1074                return new_rev
1075
1076            # If the commit is not in the original list of revisions
1077            # to be imported, then it must be a parent of one of those
1078            # commits and it was skipped during earlier imports or not
1079            # found. Importing such merge commits leads to very ugly
1080            # history (long cascade of merge commits which all point
1081            # to to older commits) when switching from "update via
1082            # patches" to "update with history".
1083            #
1084            # We can avoid importing merge commits if all non-merge commits
1085            # reachable from it were already imported. In that case we
1086            # can root the new commits in the current head revision.
1087            def is_imported(prev):
1088                parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split()
1089                if len(parents) > 1:
1090                    for p in parents:
1091                        if not is_imported(p):
1092                            logger.debug("Must import %s because %s is not imported." % (rev, p))
1093                            return False
1094                    return True
1095                elif prev in old2new_revs:
1096                    return True
1097                else:
1098                    logger.debug("Must import %s because %s is not imported." % (rev, prev))
1099                    return False
1100            if rev not in revs and is_imported(rev):
1101                old2new_revs[rev] = head
1102                return head
1103
1104            # Need to import rev. Collect some information about it.
1105            logger.debug("Importing %s" % rev)
1106            (parents, author_name, author_email, author_timestamp, body) = \
1107                runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0))
1108            parents = parents.split()
1109            if parents:
1110                # Arbitrarily pick the first parent as base. It may or may not have
1111                # been imported before. For example, if the parent is a merge commit
1112                # and previously the combined repository used patching as update
1113                # method, then the actual merge commit parent never was imported.
1114                # To cover this, We recursively import parents.
1115                parent = parents[0]
1116                new_parent = import_rev(parent)
1117                # Clean index and working tree. TODO: can we combine this and the
1118                # next into one command with less file IO?
1119                # "git reset --hard" does not work, it changes HEAD of the parent
1120                # repo, which we wanted to avoid. Probably need to keep
1121                # track of the rev that corresponds to the index and use apply_commit().
1122                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1123                # Update index and working tree to match the parent.
1124                runcmd("git checkout -q -f %s ." % new_parent, **wargs)
1125            else:
1126                parent = None
1127                # Clean index and working tree.
1128                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1129
1130            # Modify index and working tree such that it mirrors the commit.
1131            apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter)
1132
1133            # Now commit.
1134            new_tree = runcmd("git write-tree", **wargs).strip()
1135            env = copy.deepcopy(wenv)
1136            env['GIT_AUTHOR_NAME'] = author_name
1137            env['GIT_AUTHOR_EMAIL'] = author_email
1138            env['GIT_AUTHOR_DATE'] = author_timestamp
1139            if hook:
1140                # Need to turn the verbatim commit message into something resembling a patch header
1141                # for the hook.
1142                with tempfile.NamedTemporaryFile(mode='wt', delete=False) as patch:
1143                    patch.write('Subject: [PATCH] ')
1144                    patch.write(body)
1145                    patch.write('\n---\n')
1146                    patch.close()
1147                    runcmd([hook, patch.name, rev, name])
1148                    with open(patch.name) as f:
1149                        body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')]
1150
1151            # We can skip non-merge commits that did not change any files. Those are typically
1152            # the result of file filtering, although they could also have been introduced
1153            # intentionally upstream, in which case we drop some information here.
1154            if len(parents) == 1:
1155                parent_rev = import_rev(parents[0])
1156                old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip()
1157                commit = old_tree != new_tree
1158                if not commit:
1159                    new_rev = parent_rev
1160            else:
1161                commit = True
1162            if commit:
1163                new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) +
1164                                 ["-m", body, new_tree],
1165                                 env=env).strip()
1166            old2new_revs[rev] = new_rev
1167
1168            return new_rev
1169
1170        if revs:
1171            for rev in revs:
1172                import_rev(rev)
1173            # Remember how to update our current head. New components get added,
1174            # updated components get the delta between current head and the updated component
1175            # applied.
1176            additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None
1177            repo['last_revision'] = revs[-1]
1178
1179    # Now construct the final merge commit. We create the tree by
1180    # starting with the head and applying the changes from each
1181    # components imported head revision.
1182    if additional_heads:
1183        runcmd("git reset --hard", **wargs)
1184        for rev, base in additional_heads.items():
1185            apply_commit(base, rev, wargs, wargs, None)
1186
1187        # Commit with all component branches as parents as well as the previous head.
1188        logger.info("Writing final merge commit...")
1189        msg = conf_commit_msg(conf, components)
1190        new_tree = runcmd("git write-tree", **wargs).strip()
1191        new_rev = runcmd("git commit-tree".split() +
1192                         add_p([head] + list(additional_heads.keys())) +
1193                         ["-m", msg, new_tree],
1194                         **wargs).strip()
1195        # And done! This is the first time we change the HEAD in the actual work tree.
1196        runcmd("git reset --hard %s" % new_rev)
1197
1198        # Update and stage the (potentially modified)
1199        # combo-layer.conf, but do not commit separately.
1200        for name in repos:
1201            repo = conf.repos[name]
1202            rev = repo['last_revision']
1203            conf.update(name, "last_revision", rev)
1204        if commit_conf_file(conf, components, False):
1205            # Must augment the previous commit.
1206            runcmd("git commit --amend -C HEAD")
1207
1208
1209scanned_revs = False
1210def find_revs(old2new, head):
1211    '''Construct mapping from original commit hash to commit hash in
1212    combined repo by looking at the commit messages. Depends on the
1213    "From ... rev: ..." convention.'''
1214    logger.info("Analyzing log messages to find previously imported commits...")
1215    num_known = len(old2new)
1216    log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0))
1217    regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)')
1218    for new_rev, body in zip(*[iter(log)]* 2):
1219        # Use the last one, in the unlikely case there are more than one.
1220        rev = regex.findall(body)[-1]
1221        if rev not in old2new:
1222            old2new[rev] = new_rev.strip()
1223    logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new))
1224
1225
1226def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None):
1227    '''Compare revision against parent, remove files deleted in the
1228    commit, re-write new or modified ones. Moves them into dest_dir.
1229    Optionally filters files.
1230    '''
1231    if not dest_dir:
1232        dest_dir = "."
1233    # -r recurses into sub-directories, given is the full overview of
1234    # what changed.  We do not care about copy/edits or renames, so we
1235    # can disable those with --no-renames (but we still parse them,
1236    # because it was not clear from git documentation whether C and M
1237    # lines can still occur).
1238    logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"]))
1239    delete = []
1240    update = []
1241    if parent:
1242        # Apply delta.
1243        changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0))
1244        for status, name in zip(*[iter(changes)]*2):
1245            if status[0] in "ACMRT":
1246                update.append(name)
1247            elif status[0] in "D":
1248                delete.append(name)
1249            else:
1250                logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev))
1251                sys.exit(1)
1252    else:
1253        # Copy all files.
1254        update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0)))
1255
1256    # Include/exclude files as define in the component config.
1257    # Both updated and deleted file lists get filtered, because it might happen
1258    # that a file gets excluded, pulled from a different component, and then the
1259    # excluded file gets deleted. In that case we must keep the copy.
1260    if file_filter:
1261        file_filter(update)
1262        file_filter(delete)
1263
1264    # We export into a tar archive here and extract with tar because it is simple (no
1265    # need to implement file and symlink writing ourselves) and gives us some degree
1266    # of parallel IO. The downside is that we have to pass the list of files via
1267    # command line parameters - hopefully there will never be too many at once.
1268    if update:
1269        target = os.path.join(wargs["destdir"], dest_dir)
1270        if not os.path.isdir(target):
1271            os.makedirs(target)
1272        quoted_target = pipes.quote(target)
1273        # os.sysconf('SC_ARG_MAX') is lying: running a command with
1274        # string length 629343 already failed with "Argument list too
1275        # long" although SC_ARG_MAX = 2097152. "man execve" explains
1276        # the limitations, but those are pretty complicated. So here
1277        # we just hard-code a fixed value which is more likely to work.
1278        max_cmdsize = 64 * 1024
1279        while update:
1280            quoted_args = []
1281            unquoted_args = []
1282            cmdsize = 100 + len(quoted_target)
1283            while update:
1284                quoted_next = pipes.quote(update[0])
1285                size_next = len(quoted_next) + len(dest_dir) + 1
1286                logger.debug('cmdline length %d + %d < %d?' % (cmdsize, size_next, os.sysconf('SC_ARG_MAX')))
1287                if cmdsize + size_next < max_cmdsize:
1288                    quoted_args.append(quoted_next)
1289                    unquoted_args.append(update.pop(0))
1290                    cmdsize += size_next
1291                else:
1292                    logger.debug('Breaking the cmdline at length %d' % cmdsize)
1293                    break
1294            logger.debug('Final cmdline length %d / %d' % (cmdsize, os.sysconf('SC_ARG_MAX')))
1295            cmd = "git archive %s %s | tar -C %s -xf -" % (rev, ' '.join(quoted_args), quoted_target)
1296            logger.debug('First cmdline length %d' % len(cmd))
1297            runcmd(cmd, **largs)
1298            cmd = "git add -f".split() + [os.path.join(dest_dir, x) for x in unquoted_args]
1299            logger.debug('Second cmdline length %d' % reduce(lambda x, y: x + len(y), cmd, 0))
1300            runcmd(cmd, **wargs)
1301    if delete:
1302        for path in delete:
1303            if dest_dir:
1304                path = os.path.join(dest_dir, path)
1305        runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs)
1306
1307def action_error(conf, args):
1308    logger.info("invalid action %s" % args[0])
1309
1310actions = {
1311    "init": action_init,
1312    "update": action_update,
1313    "pull": action_pull,
1314    "splitpatch": action_splitpatch,
1315}
1316
1317def main():
1318    parser = optparse.OptionParser(
1319        version = "Combo Layer Repo Tool version %s" % __version__,
1320        usage = """%prog [options] action
1321
1322Create and update a combination layer repository from multiple component repositories.
1323
1324Action:
1325  init                 initialise the combo layer repo
1326  update [components]  get patches from component repos and apply them to the combo repo
1327  pull [components]    just pull component repos only
1328  splitpatch [commit]  generate commit patch and split per component, default commit is HEAD""")
1329
1330    parser.add_option("-c", "--conf", help = "specify the config file (conf/combo-layer.conf is the default).",
1331               action = "store", dest = "conffile", default = "conf/combo-layer.conf")
1332
1333    parser.add_option("-i", "--interactive", help = "interactive mode, user can edit the patch list and patches",
1334               action = "store_true", dest = "interactive", default = False)
1335
1336    parser.add_option("-D", "--debug", help = "output debug information",
1337               action = "store_true", dest = "debug", default = False)
1338
1339    parser.add_option("-n", "--no-pull", help = "skip pulling component repos during update",
1340               action = "store_true", dest = "nopull", default = False)
1341
1342    parser.add_option("--hard-reset",
1343               help = "instead of pull do fetch and hard-reset in component repos",
1344               action = "store_true", dest = "hard_reset", default = False)
1345
1346    parser.add_option("-H", "--history", help = "import full history of components during init",
1347                      action = "store_true", default = False)
1348
1349    options, args = parser.parse_args(sys.argv)
1350
1351    # Dispatch to action handler
1352    if len(args) == 1:
1353        logger.error("No action specified, exiting")
1354        parser.print_help()
1355    elif args[1] not in actions:
1356        logger.error("Unsupported action %s, exiting\n" % (args[1]))
1357        parser.print_help()
1358    elif not os.path.exists(options.conffile):
1359        logger.error("No valid config file, exiting\n")
1360        parser.print_help()
1361    else:
1362        if options.debug:
1363            logger.setLevel(logging.DEBUG)
1364        confdata = Configuration(options)
1365        initmode = (args[1] == 'init')
1366        confdata.sanity_check(initmode)
1367        actions.get(args[1], action_error)(confdata, args[1:])
1368
1369if __name__ == "__main__":
1370    try:
1371        ret = main()
1372    except Exception:
1373        ret = 1
1374        import traceback
1375        traceback.print_exc()
1376    sys.exit(ret)
1377