xref: /openbmc/openbmc/poky/scripts/combo-layer (revision c342db35)
1#!/usr/bin/env python3
2# ex:ts=4:sw=4:sts=4:et
3# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
4#
5# Copyright 2011 Intel Corporation
6# Authored-by:  Yu Ke <ke.yu@intel.com>
7#               Paul Eggleton <paul.eggleton@intel.com>
8#               Richard Purdie <richard.purdie@intel.com>
9#
10# SPDX-License-Identifier: GPL-2.0-only
11#
12
13import fnmatch
14import os, sys
15import optparse
16import logging
17import subprocess
18import tempfile
19import configparser
20import re
21import copy
22import pipes
23import shutil
24from collections import OrderedDict
25from string import Template
26from functools import reduce
27
28__version__ = "0.2.1"
29
30def logger_create():
31    logger = logging.getLogger("")
32    loggerhandler = logging.StreamHandler()
33    loggerhandler.setFormatter(logging.Formatter("[%(asctime)s] %(message)s","%H:%M:%S"))
34    logger.addHandler(loggerhandler)
35    logger.setLevel(logging.INFO)
36    return logger
37
38logger = logger_create()
39
40def get_current_branch(repodir=None):
41    try:
42        if not os.path.exists(os.path.join(repodir if repodir else '', ".git")):
43            # Repo not created yet (i.e. during init) so just assume master
44            return "master"
45        branchname = runcmd("git symbolic-ref HEAD 2>/dev/null", repodir).strip()
46        if branchname.startswith("refs/heads/"):
47            branchname = branchname[11:]
48        return branchname
49    except subprocess.CalledProcessError:
50        return ""
51
52class Configuration(object):
53    """
54    Manages the configuration
55
56    For an example config file, see combo-layer.conf.example
57
58    """
59    def __init__(self, options):
60        for key, val in options.__dict__.items():
61            setattr(self, key, val)
62
63        def readsection(parser, section, repo):
64            for (name, value) in parser.items(section):
65                if value.startswith("@"):
66                    self.repos[repo][name] = eval(value.strip("@"))
67                else:
68                    # Apply special type transformations for some properties.
69                    # Type matches the RawConfigParser.get*() methods.
70                    types = {'signoff': 'boolean', 'update': 'boolean', 'history': 'boolean'}
71                    if name in types:
72                        value = getattr(parser, 'get' + types[name])(section, name)
73                    self.repos[repo][name] = value
74
75        def readglobalsection(parser, section):
76            for (name, value) in parser.items(section):
77                if name == "commit_msg":
78                    self.commit_msg_template = value
79
80        logger.debug("Loading config file %s" % self.conffile)
81        self.parser = configparser.ConfigParser()
82        with open(self.conffile) as f:
83            self.parser.readfp(f)
84
85        # initialize default values
86        self.commit_msg_template = "Automatic commit to update last_revision"
87
88        self.repos = {}
89        for repo in self.parser.sections():
90            if repo == "combo-layer-settings":
91                # special handling for global settings
92                readglobalsection(self.parser, repo)
93            else:
94                self.repos[repo] = {}
95                readsection(self.parser, repo, repo)
96
97        # Load local configuration, if available
98        self.localconffile = None
99        self.localparser = None
100        self.combobranch = None
101        if self.conffile.endswith('.conf'):
102            lcfile = self.conffile.replace('.conf', '-local.conf')
103            if os.path.exists(lcfile):
104                # Read combo layer branch
105                self.combobranch = get_current_branch()
106                logger.debug("Combo layer branch is %s" % self.combobranch)
107
108                self.localconffile = lcfile
109                logger.debug("Loading local config file %s" % self.localconffile)
110                self.localparser = configparser.ConfigParser()
111                with open(self.localconffile) as f:
112                    self.localparser.readfp(f)
113
114                for section in self.localparser.sections():
115                    if '|' in section:
116                        sectionvals = section.split('|')
117                        repo = sectionvals[0]
118                        if sectionvals[1] != self.combobranch:
119                            continue
120                    else:
121                        repo = section
122                    if repo in self.repos:
123                        readsection(self.localparser, section, repo)
124
125    def update(self, repo, option, value, initmode=False):
126        # If the main config has the option already, that is what we
127        # are expected to modify.
128        if self.localparser and not self.parser.has_option(repo, option):
129            parser = self.localparser
130            section = "%s|%s" % (repo, self.combobranch)
131            conffile = self.localconffile
132            if initmode and not parser.has_section(section):
133                parser.add_section(section)
134        else:
135            parser = self.parser
136            section = repo
137            conffile = self.conffile
138        parser.set(section, option, value)
139        with open(conffile, "w") as f:
140            parser.write(f)
141        self.repos[repo][option] = value
142
143    def sanity_check(self, initmode=False):
144        required_options=["src_uri", "local_repo_dir", "dest_dir", "last_revision"]
145        if initmode:
146            required_options.remove("last_revision")
147        msg = ""
148        missing_options = []
149        for name in self.repos:
150            for option in required_options:
151                if option not in self.repos[name]:
152                    msg = "%s\nOption %s is not defined for component %s" %(msg, option, name)
153                    missing_options.append(option)
154            # Sanitize dest_dir so that we do not have to deal with edge cases
155            # (unset, empty string, double slashes) in the rest of the code.
156            # It not being set will still be flagged as error because it is
157            # listed as required option above; that could be changed now.
158            dest_dir = os.path.normpath(self.repos[name].get("dest_dir", "."))
159            self.repos[name]["dest_dir"] = "." if not dest_dir else dest_dir
160        if msg != "":
161            logger.error("configuration file %s has the following error: %s" % (self.conffile,msg))
162            if self.localconffile and 'last_revision' in missing_options:
163                logger.error("local configuration file %s may be missing configuration for combo branch %s" % (self.localconffile, self.combobranch))
164            sys.exit(1)
165
166        # filterdiff is required by action_splitpatch, so check its availability
167        if subprocess.call("which filterdiff > /dev/null 2>&1", shell=True) != 0:
168            logger.error("ERROR: patchutils package is missing, please install it (e.g. # apt-get install patchutils)")
169            sys.exit(1)
170
171def runcmd(cmd,destdir=None,printerr=True,out=None,env=None):
172    """
173        execute command, raise CalledProcessError if fail
174        return output if succeed
175    """
176    logger.debug("run cmd '%s' in %s" % (cmd, os.getcwd() if destdir is None else destdir))
177    if not out:
178        out = tempfile.TemporaryFile()
179        err = out
180    else:
181        err = tempfile.TemporaryFile()
182    try:
183        subprocess.check_call(cmd, stdout=out, stderr=err, cwd=destdir, shell=isinstance(cmd, str), env=env or os.environ)
184    except subprocess.CalledProcessError as e:
185        err.seek(0)
186        if printerr:
187            logger.error("%s" % err.read())
188        raise e
189
190    err.seek(0)
191    output = err.read().decode('utf-8')
192    logger.debug("output: %s" % output.replace(chr(0), '\\0'))
193    return output
194
195def action_init(conf, args):
196    """
197        Clone component repositories
198        Check git is initialised; if not, copy initial data from component repos
199    """
200    for name in conf.repos:
201        ldir = conf.repos[name]['local_repo_dir']
202        if not os.path.exists(ldir):
203            logger.info("cloning %s to %s" %(conf.repos[name]['src_uri'], ldir))
204            subprocess.check_call("git clone %s %s" % (conf.repos[name]['src_uri'], ldir), shell=True)
205    if not os.path.exists(".git"):
206        runcmd("git init")
207        if conf.history:
208            # Need a common ref for all trees.
209            runcmd('git commit -m "initial empty commit" --allow-empty')
210            startrev = runcmd('git rev-parse master').strip()
211
212        for name in conf.repos:
213            repo = conf.repos[name]
214            ldir = repo['local_repo_dir']
215            branch = repo.get('branch', "master")
216            lastrev = repo.get('last_revision', None)
217            if lastrev and lastrev != "HEAD":
218                initialrev = lastrev
219                if branch:
220                    if not check_rev_branch(name, ldir, lastrev, branch):
221                        sys.exit(1)
222                logger.info("Copying data from %s at specified revision %s..." % (name, lastrev))
223            else:
224                lastrev = None
225                initialrev = branch
226                logger.info("Copying data from %s..." % name)
227            # Sanity check initialrev and turn it into hash (required for copying history,
228            # because resolving a name ref only works in the component repo).
229            rev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
230            if rev != initialrev:
231                try:
232                    refs = runcmd('git show-ref -s %s' % initialrev, ldir).split('\n')
233                    if len(set(refs)) > 1:
234                        # Happens for example when configured to track
235                        # "master" and there is a refs/heads/master. The
236                        # traditional behavior from "git archive" (preserved
237                        # here) it to choose the first one. This might not be
238                        # intended, so at least warn about it.
239                        logger.warning("%s: initial revision '%s' not unique, picking result of rev-parse = %s" %
240                                    (name, initialrev, refs[0]))
241                        initialrev = rev
242                except:
243                    # show-ref fails for hashes. Skip the sanity warning in that case.
244                    pass
245                initialrev = rev
246            dest_dir = repo['dest_dir']
247            if dest_dir != ".":
248                extract_dir = os.path.join(os.getcwd(), dest_dir)
249                if not os.path.exists(extract_dir):
250                    os.makedirs(extract_dir)
251            else:
252                extract_dir = os.getcwd()
253            file_filter = repo.get('file_filter', "")
254            exclude_patterns = repo.get('file_exclude', '').split()
255            def copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir,
256                                    subdir=""):
257                # When working inside a filtered branch which had the
258                # files already moved, we need to prepend the
259                # subdirectory to all filters, otherwise they would
260                # not match.
261                if subdir == '.':
262                    subdir = ''
263                elif subdir:
264                    subdir = os.path.normpath(subdir)
265                    file_filter = ' '.join([subdir + '/' + x for x in file_filter.split()])
266                    exclude_patterns = [subdir + '/' + x for x in exclude_patterns]
267                # To handle both cases, we cd into the target
268                # directory and optionally tell tar to strip the path
269                # prefix when the files were already moved.
270                subdir_components = len(subdir.split(os.path.sep)) if subdir else 0
271                strip=('--strip-components=%d' % subdir_components) if subdir else ''
272                # TODO: file_filter wild cards do not work (and haven't worked before either), because
273                # a) GNU tar requires a --wildcards parameter before turning on wild card matching.
274                # b) The semantic is not as intendend (src/*.c also matches src/foo/bar.c,
275                #    in contrast to the other use of file_filter as parameter of "git archive"
276                #    where it only matches .c files directly in src).
277                files = runcmd("git archive %s %s | tar -x -v %s -C %s %s" %
278                               (initialrev, subdir,
279                                strip, extract_dir, file_filter),
280                               ldir)
281                if exclude_patterns:
282                    # Implement file removal by letting tar create the
283                    # file and then deleting it in the file system
284                    # again. Uses the list of files created by tar (easier
285                    # than walking the tree).
286                    for file in files.split('\n'):
287                        if file.endswith(os.path.sep):
288                            continue
289                        for pattern in exclude_patterns:
290                            if fnmatch.fnmatch(file, pattern):
291                                os.unlink(os.path.join(*([extract_dir] + ['..'] * subdir_components + [file])))
292                                break
293
294            if not conf.history:
295                copy_selected_files(initialrev, extract_dir, file_filter, exclude_patterns, ldir)
296            else:
297                # First fetch remote history into local repository.
298                # We need a ref for that, so ensure that there is one.
299                refname = "combo-layer-init-%s" % name
300                runcmd("git branch -f %s %s" % (refname, initialrev), ldir)
301                runcmd("git fetch %s %s" % (ldir, refname))
302                runcmd("git branch -D %s" % refname, ldir)
303                # Make that the head revision.
304                runcmd("git checkout -b %s %s" % (name, initialrev))
305                # Optional: cut the history by replacing the given
306                # start point(s) with commits providing the same
307                # content (aka tree), but with commit information that
308                # makes it clear that this is an artifically created
309                # commit and nothing the original authors had anything
310                # to do with.
311                since_rev = repo.get('since_revision', '')
312                if since_rev:
313                    committer = runcmd('git var GIT_AUTHOR_IDENT').strip()
314                    # Same time stamp, no name.
315                    author = re.sub('.* (\d+ [+-]\d+)', r'unknown <unknown> \1', committer)
316                    logger.info('author %s' % author)
317                    for rev in since_rev.split():
318                        # Resolve in component repo...
319                        rev = runcmd('git log --oneline --no-abbrev-commit -n1 %s' % rev, ldir).split()[0]
320                        # ... and then get the tree in current
321                        # one. The commit should be in both repos with
322                        # the same tree, but better check here.
323                        tree = runcmd('git show -s --pretty=format:%%T %s' % rev).strip()
324                        with tempfile.NamedTemporaryFile(mode='wt') as editor:
325                            editor.write('''cat >$1 <<EOF
326tree %s
327author %s
328committer %s
329
330%s: squashed import of component
331
332This commit copies the entire set of files as found in
333%s %s
334
335For more information about previous commits, see the
336upstream repository.
337
338Commit created by combo-layer.
339EOF
340''' % (tree, author, committer, name, name, since_rev))
341                            editor.flush()
342                            os.environ['GIT_EDITOR'] = 'sh %s' % editor.name
343                            runcmd('git replace --edit %s' % rev)
344
345                # Optional: rewrite history to change commit messages or to move files.
346                if 'hook' in repo or dest_dir != ".":
347                    filter_branch = ['git', 'filter-branch', '--force']
348                    with tempfile.NamedTemporaryFile(mode='wt') as hookwrapper:
349                        if 'hook' in repo:
350                            # Create a shell script wrapper around the original hook that
351                            # can be used by git filter-branch. Hook may or may not have
352                            # an absolute path.
353                            hook = repo['hook']
354                            hook = os.path.join(os.path.dirname(conf.conffile), '..', hook)
355                            # The wrappers turns the commit message
356                            # from stdin into a fake patch header.
357                            # This is good enough for changing Subject
358                            # and commit msg body with normal
359                            # combo-layer hooks.
360                            hookwrapper.write('''set -e
361tmpname=$(mktemp)
362trap "rm $tmpname" EXIT
363echo -n 'Subject: [PATCH] ' >>$tmpname
364cat >>$tmpname
365if ! [ $(tail -c 1 $tmpname | od -A n -t x1) == '0a' ]; then
366    echo >>$tmpname
367fi
368echo '---' >>$tmpname
369%s $tmpname $GIT_COMMIT %s
370tail -c +18 $tmpname | head -c -4
371''' % (hook, name))
372                            hookwrapper.flush()
373                            filter_branch.extend(['--msg-filter', 'bash %s' % hookwrapper.name])
374                        if dest_dir != ".":
375                            parent = os.path.dirname(dest_dir)
376                            if not parent:
377                                parent = '.'
378                            # May run outside of the current directory, so do not assume that .git exists.
379                            filter_branch.extend(['--tree-filter', 'mkdir -p .git/tmptree && find . -mindepth 1 -maxdepth 1 ! -name .git -print0 | xargs -0 -I SOURCE mv SOURCE .git/tmptree && mkdir -p %s && mv .git/tmptree %s' % (parent, dest_dir)])
380                        filter_branch.append('HEAD')
381                        runcmd(filter_branch)
382                        runcmd('git update-ref -d refs/original/refs/heads/%s' % name)
383                repo['rewritten_revision'] = runcmd('git rev-parse HEAD').strip()
384                repo['stripped_revision'] = repo['rewritten_revision']
385                # Optional filter files: remove everything and re-populate using the normal filtering code.
386                # Override any potential .gitignore.
387                if file_filter or exclude_patterns:
388                    runcmd('git rm -rf .')
389                    if not os.path.exists(extract_dir):
390                        os.makedirs(extract_dir)
391                    copy_selected_files('HEAD', extract_dir, file_filter, exclude_patterns, '.',
392                                        subdir=dest_dir)
393                    runcmd('git add --all --force .')
394                    if runcmd('git status --porcelain'):
395                        # Something to commit.
396                        runcmd(['git', 'commit', '-m',
397                                '''%s: select file subset
398
399Files from the component repository were chosen based on
400the following filters:
401file_filter = %s
402file_exclude = %s''' % (name, file_filter or '<empty>', repo.get('file_exclude', '<empty>'))])
403                        repo['stripped_revision'] = runcmd('git rev-parse HEAD').strip()
404
405            if not lastrev:
406                lastrev = runcmd('git rev-parse %s' % initialrev, ldir).strip()
407                conf.update(name, "last_revision", lastrev, initmode=True)
408
409        if not conf.history:
410            runcmd("git add .")
411        else:
412            # Create Octopus merge commit according to http://stackoverflow.com/questions/10874149/git-octopus-merge-with-unrelated-repositoies
413            runcmd('git checkout master')
414            merge = ['git', 'merge', '--no-commit']
415            for name in conf.repos:
416                repo = conf.repos[name]
417                # Use branch created earlier.
418                merge.append(name)
419                # Root all commits which have no parent in the common
420                # ancestor in the new repository.
421                for start in runcmd('git log --pretty=format:%%H --max-parents=0 %s --' % name).split('\n'):
422                    runcmd('git replace --graft %s %s' % (start, startrev))
423            try:
424                runcmd(merge)
425            except Exception as error:
426                logger.info('''Merging component repository history failed, perhaps because of merge conflicts.
427It may be possible to commit anyway after resolving these conflicts.
428
429%s''' % error)
430            # Create MERGE_HEAD and MERGE_MSG. "git merge" itself
431            # does not create MERGE_HEAD in case of a (harmless) failure,
432            # and we want certain auto-generated information in the
433            # commit message for future reference and/or automation.
434            with open('.git/MERGE_HEAD', 'w') as head:
435                with open('.git/MERGE_MSG', 'w') as msg:
436                    msg.write('repo: initial import of components\n\n')
437                    # head.write('%s\n' % startrev)
438                    for name in conf.repos:
439                        repo = conf.repos[name]
440                        # <upstream ref> <rewritten ref> <rewritten + files removed>
441                        msg.write('combo-layer-%s: %s %s %s\n' % (name,
442                                                                  repo['last_revision'],
443                                                                  repo['rewritten_revision'],
444                                                                  repo['stripped_revision']))
445                        rev = runcmd('git rev-parse %s' % name).strip()
446                        head.write('%s\n' % rev)
447
448        if conf.localconffile:
449            localadded = True
450            try:
451                runcmd("git rm --cached %s" % conf.localconffile, printerr=False)
452            except subprocess.CalledProcessError:
453                localadded = False
454            if localadded:
455                localrelpath = os.path.relpath(conf.localconffile)
456                runcmd("grep -q %s .gitignore || echo %s >> .gitignore" % (localrelpath, localrelpath))
457                runcmd("git add .gitignore")
458                logger.info("Added local configuration file %s to .gitignore", localrelpath)
459        logger.info("Initial combo layer repository data has been created; please make any changes if desired and then use 'git commit' to make the initial commit.")
460    else:
461        logger.info("Repository already initialised, nothing to do.")
462
463
464def check_repo_clean(repodir):
465    """
466        check if the repo is clean
467        exit if repo is dirty
468    """
469    output=runcmd("git status --porcelain", repodir)
470    r = re.compile('\?\? patch-.*/')
471    dirtyout = [item for item in output.splitlines() if not r.match(item)]
472    if dirtyout:
473        logger.error("git repo %s is dirty, please fix it first", repodir)
474        sys.exit(1)
475
476def check_patch(patchfile):
477    f = open(patchfile, 'rb')
478    ln = f.readline()
479    of = None
480    in_patch = False
481    beyond_msg = False
482    pre_buf = b''
483    while ln:
484        if not beyond_msg:
485            if ln == b'---\n':
486                if not of:
487                    break
488                in_patch = False
489                beyond_msg = True
490            elif ln.startswith(b'--- '):
491                # We have a diff in the commit message
492                in_patch = True
493                if not of:
494                    print('WARNING: %s contains a diff in its commit message, indenting to avoid failure during apply' % patchfile)
495                    of = open(patchfile + '.tmp', 'wb')
496                    of.write(pre_buf)
497                    pre_buf = b''
498            elif in_patch and not ln[0] in b'+-@ \n\r':
499                in_patch = False
500        if of:
501            if in_patch:
502                of.write(b' ' + ln)
503            else:
504                of.write(ln)
505        else:
506            pre_buf += ln
507        ln = f.readline()
508    f.close()
509    if of:
510        of.close()
511        os.rename(patchfile + '.tmp', patchfile)
512
513def drop_to_shell(workdir=None):
514    if not sys.stdin.isatty():
515        print("Not a TTY so can't drop to shell for resolution, exiting.")
516        return False
517
518    shell = os.environ.get('SHELL', 'bash')
519    print('Dropping to shell "%s"\n' \
520          'When you are finished, run the following to continue:\n' \
521          '       exit    -- continue to apply the patches\n' \
522          '       exit 1  -- abort\n' % shell);
523    ret = subprocess.call([shell], cwd=workdir)
524    if ret != 0:
525        print("Aborting")
526        return False
527    else:
528        return True
529
530def check_rev_branch(component, repodir, rev, branch):
531    try:
532        actualbranch = runcmd("git branch --contains %s" % rev, repodir, printerr=False)
533    except subprocess.CalledProcessError as e:
534        if e.returncode == 129:
535            actualbranch = ""
536        else:
537            raise
538
539    if not actualbranch:
540        logger.error("%s: specified revision %s is invalid!" % (component, rev))
541        return False
542
543    branches = []
544    branchlist = actualbranch.split("\n")
545    for b in branchlist:
546        branches.append(b.strip().split(' ')[-1])
547
548    if branch not in branches:
549        logger.error("%s: specified revision %s is not on specified branch %s!" % (component, rev, branch))
550        return False
551    return True
552
553def get_repos(conf, repo_names):
554    repos = []
555    for name in repo_names:
556        if name.startswith('-'):
557            break
558        else:
559            repos.append(name)
560    for repo in repos:
561        if not repo in conf.repos:
562            logger.error("Specified component '%s' not found in configuration" % repo)
563            sys.exit(1)
564
565    if not repos:
566        repos = [ repo for repo in conf.repos if conf.repos[repo].get("update", True) ]
567
568    return repos
569
570def action_pull(conf, args):
571    """
572        update the component repos only
573    """
574    repos = get_repos(conf, args[1:])
575
576    # make sure all repos are clean
577    for name in repos:
578        check_repo_clean(conf.repos[name]['local_repo_dir'])
579
580    for name in repos:
581        repo = conf.repos[name]
582        ldir = repo['local_repo_dir']
583        branch = repo.get('branch', "master")
584        logger.info("update branch %s of component repo %s in %s ..." % (branch, name, ldir))
585        if not conf.hard_reset:
586            # Try to pull only the configured branch. Beware that this may fail
587            # when the branch is currently unknown (for example, after reconfiguring
588            # combo-layer). In that case we need to fetch everything and try the check out
589            # and pull again.
590            try:
591                runcmd("git checkout %s" % branch, ldir, printerr=False)
592            except subprocess.CalledProcessError:
593                output=runcmd("git fetch", ldir)
594                logger.info(output)
595                runcmd("git checkout %s" % branch, ldir)
596                runcmd("git pull --ff-only", ldir)
597            else:
598                output=runcmd("git pull --ff-only", ldir)
599                logger.info(output)
600        else:
601            output=runcmd("git fetch", ldir)
602            logger.info(output)
603            runcmd("git checkout %s" % branch, ldir)
604            runcmd("git reset --hard FETCH_HEAD", ldir)
605
606def action_update(conf, args):
607    """
608        update the component repos
609        either:
610           generate the patch list
611           apply the generated patches
612        or:
613           re-creates the entire component history and merges them
614           into the current branch with a merge commit
615    """
616    components = [arg.split(':')[0] for arg in args[1:]]
617    revisions = {}
618    for arg in args[1:]:
619        if ':' in arg:
620            a = arg.split(':', 1)
621            revisions[a[0]] = a[1]
622    repos = get_repos(conf, components)
623
624    # make sure combo repo is clean
625    check_repo_clean(os.getcwd())
626
627    # Check whether we keep the component histories. Must be
628    # set either via --history command line parameter or consistently
629    # in combo-layer.conf. Mixing modes is (currently, and probably
630    # permanently because it would be complicated) not supported.
631    if conf.history:
632        history = True
633    else:
634        history = None
635        for name in repos:
636            repo = conf.repos[name]
637            repo_history = repo.get('history', False)
638            if history is None:
639                history = repo_history
640            elif history != repo_history:
641                logger.error("'history' property is set inconsistently")
642                sys.exit(1)
643
644    # Step 1: update the component repos
645    if conf.nopull:
646        logger.info("Skipping pull (-n)")
647    else:
648        action_pull(conf, ['arg0'] + components)
649
650    if history:
651        update_with_history(conf, components, revisions, repos)
652    else:
653        update_with_patches(conf, components, revisions, repos)
654
655def update_with_patches(conf, components, revisions, repos):
656    import uuid
657    patch_dir = "patch-%s" % uuid.uuid4()
658    if not os.path.exists(patch_dir):
659        os.mkdir(patch_dir)
660
661    for name in repos:
662        revision = revisions.get(name, None)
663        repo = conf.repos[name]
664        ldir = repo['local_repo_dir']
665        dest_dir = repo['dest_dir']
666        branch = repo.get('branch', "master")
667        repo_patch_dir = os.path.join(os.getcwd(), patch_dir, name)
668
669        # Step 2: generate the patch list and store to patch dir
670        logger.info("Generating patches from %s..." % name)
671        top_revision = revision or branch
672        if not check_rev_branch(name, ldir, top_revision, branch):
673            sys.exit(1)
674        if dest_dir != ".":
675            prefix = "--src-prefix=a/%s/ --dst-prefix=b/%s/" % (dest_dir, dest_dir)
676        else:
677            prefix = ""
678        if repo['last_revision'] == "":
679            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
680            patch_cmd_range = "--root %s" % top_revision
681            rev_cmd_range = top_revision
682        else:
683            if not check_rev_branch(name, ldir, repo['last_revision'], branch):
684                sys.exit(1)
685            patch_cmd_range = "%s..%s" % (repo['last_revision'], top_revision)
686            rev_cmd_range = patch_cmd_range
687
688        file_filter = repo.get('file_filter',".")
689
690        # Filter out unwanted files
691        exclude = repo.get('file_exclude', '')
692        if exclude:
693            for path in exclude.split():
694                p = "%s/%s" % (dest_dir, path) if dest_dir != '.' else path
695                file_filter += " ':!%s'" % p
696
697        patch_cmd = "git format-patch -N %s --output-directory %s %s -- %s" % \
698            (prefix,repo_patch_dir, patch_cmd_range, file_filter)
699        output = runcmd(patch_cmd, ldir)
700        logger.debug("generated patch set:\n%s" % output)
701        patchlist = output.splitlines()
702
703        rev_cmd = "git rev-list --no-merges %s -- %s" % (rev_cmd_range, file_filter)
704        revlist = runcmd(rev_cmd, ldir).splitlines()
705
706        # Step 3: Call repo specific hook to adjust patch
707        if 'hook' in repo:
708            # hook parameter is: ./hook patchpath revision reponame
709            count=len(revlist)-1
710            for patch in patchlist:
711                runcmd("%s %s %s %s" % (repo['hook'], patch, revlist[count], name))
712                count=count-1
713
714        # Step 4: write patch list and revision list to file, for user to edit later
715        patchlist_file = os.path.join(os.getcwd(), patch_dir, "patchlist-%s" % name)
716        repo['patchlist'] = patchlist_file
717        f = open(patchlist_file, 'w')
718        count=len(revlist)-1
719        for patch in patchlist:
720            f.write("%s %s\n" % (patch, revlist[count]))
721            check_patch(os.path.join(patch_dir, patch))
722            count=count-1
723        f.close()
724
725    # Step 5: invoke bash for user to edit patch and patch list
726    if conf.interactive:
727        print('You may now edit the patch and patch list in %s\n' \
728              'For example, you can remove unwanted patch entries from patchlist-*, so that they will be not applied later' % patch_dir);
729        if not drop_to_shell(patch_dir):
730            sys.exit(1)
731
732    # Step 6: apply the generated and revised patch
733    apply_patchlist(conf, repos)
734    runcmd("rm -rf %s" % patch_dir)
735
736    # Step 7: commit the updated config file if it's being tracked
737    commit_conf_file(conf, components)
738
739def conf_commit_msg(conf, components):
740    # create the "components" string
741    component_str = "all components"
742    if len(components) > 0:
743        # otherwise tell which components were actually changed
744        component_str = ", ".join(components)
745
746    # expand the template with known values
747    template = Template(conf.commit_msg_template)
748    msg = template.substitute(components = component_str)
749    return msg
750
751def commit_conf_file(conf, components, commit=True):
752    relpath = os.path.relpath(conf.conffile)
753    try:
754        output = runcmd("git status --porcelain %s" % relpath, printerr=False)
755    except:
756        # Outside the repository
757        output = None
758    if output:
759        if output.lstrip().startswith("M"):
760            logger.info("Committing updated configuration file")
761            if commit:
762                msg = conf_commit_msg(conf, components)
763                runcmd('git commit -m'.split() + [msg, relpath])
764            else:
765                runcmd('git add %s' % relpath)
766            return True
767    return False
768
769def apply_patchlist(conf, repos):
770    """
771        apply the generated patch list to combo repo
772    """
773    for name in repos:
774        repo = conf.repos[name]
775        lastrev = repo["last_revision"]
776        prevrev = lastrev
777
778        # Get non-blank lines from patch list file
779        patchlist = []
780        if os.path.exists(repo['patchlist']) or not conf.interactive:
781            # Note: we want this to fail here if the file doesn't exist and we're not in
782            # interactive mode since the file should exist in this case
783            with open(repo['patchlist']) as f:
784                for line in f:
785                    line = line.rstrip()
786                    if line:
787                        patchlist.append(line)
788
789        ldir = conf.repos[name]['local_repo_dir']
790        branch = conf.repos[name].get('branch', "master")
791        branchrev = runcmd("git rev-parse %s" % branch, ldir).strip()
792
793        if patchlist:
794            logger.info("Applying patches from %s..." % name)
795            linecount = len(patchlist)
796            i = 1
797            for line in patchlist:
798                patchfile = line.split()[0]
799                lastrev = line.split()[1]
800                patchdisp = os.path.relpath(patchfile)
801                if os.path.getsize(patchfile) == 0:
802                    logger.info("(skipping %d/%d %s - no changes)" % (i, linecount, patchdisp))
803                else:
804                    cmd = "git am --keep-cr %s-p1 %s" % ('-s ' if repo.get('signoff', True) else '', patchfile)
805                    logger.info("Applying %d/%d: %s" % (i, linecount, patchdisp))
806                    try:
807                        runcmd(cmd)
808                    except subprocess.CalledProcessError:
809                        logger.info('Running "git am --abort" to cleanup repo')
810                        runcmd("git am --abort")
811                        logger.error('"%s" failed' % cmd)
812                        logger.info("Please manually apply patch %s" % patchdisp)
813                        logger.info("Note: if you exit and continue applying without manually applying the patch, it will be skipped")
814                        if not drop_to_shell():
815                            if prevrev != repo['last_revision']:
816                                conf.update(name, "last_revision", prevrev)
817                            sys.exit(1)
818                prevrev = lastrev
819                i += 1
820            # Once all patches are applied, we should update
821            # last_revision to the branch head instead of the last
822            # applied patch. The two are not necessarily the same when
823            # the last commit is a merge commit or when the patches at
824            # the branch head were intentionally excluded.
825            #
826            # If we do not do that for a merge commit, the next
827            # combo-layer run will only exclude patches reachable from
828            # one of the merged branches and try to re-apply patches
829            # from other branches even though they were already
830            # copied.
831            #
832            # If patches were intentionally excluded, the next run will
833            # present them again instead of skipping over them. This
834            # may or may not be intended, so the code here is conservative
835            # and only addresses the "head is merge commit" case.
836            if lastrev != branchrev and \
837               len(runcmd("git show --pretty=format:%%P --no-patch %s" % branch, ldir).split()) > 1:
838                lastrev = branchrev
839        else:
840            logger.info("No patches to apply from %s" % name)
841            lastrev = branchrev
842
843        if lastrev != repo['last_revision']:
844            conf.update(name, "last_revision", lastrev)
845
846def action_splitpatch(conf, args):
847    """
848        generate the commit patch and
849        split the patch per repo
850    """
851    logger.debug("action_splitpatch")
852    if len(args) > 1:
853        commit = args[1]
854    else:
855        commit = "HEAD"
856    patchdir = "splitpatch-%s" % commit
857    if not os.path.exists(patchdir):
858        os.mkdir(patchdir)
859
860    # filerange_root is for the repo whose dest_dir is root "."
861    # and it should be specified by excluding all other repo dest dir
862    # like "-x repo1 -x repo2 -x repo3 ..."
863    filerange_root = ""
864    for name in conf.repos:
865        dest_dir = conf.repos[name]['dest_dir']
866        if dest_dir != ".":
867            filerange_root = '%s -x "%s/*"' % (filerange_root, dest_dir)
868
869    for name in conf.repos:
870        dest_dir = conf.repos[name]['dest_dir']
871        patch_filename = "%s/%s.patch" % (patchdir, name)
872        if dest_dir == ".":
873            cmd = "git format-patch -n1 --stdout %s^..%s | filterdiff -p1 %s > %s" % (commit, commit, filerange_root, patch_filename)
874        else:
875            cmd = "git format-patch --no-prefix -n1 --stdout %s^..%s -- %s > %s" % (commit, commit, dest_dir, patch_filename)
876        runcmd(cmd)
877        # Detect empty patches (including those produced by filterdiff above
878        # that contain only preamble text)
879        if os.path.getsize(patch_filename) == 0 or runcmd("filterdiff %s" % patch_filename) == "":
880            os.remove(patch_filename)
881            logger.info("(skipping %s - no changes)", name)
882        else:
883            logger.info(patch_filename)
884
885def update_with_history(conf, components, revisions, repos):
886    '''Update all components with full history.
887
888    Works by importing all commits reachable from a component's
889    current head revision.  If those commits are rooted in an already
890    imported commit, their content gets mixed with the content of the
891    combined repo of that commit (new or modified files overwritten,
892    removed files removed).
893
894    The last commit is an artificial merge commit that merges all the
895    updated components into the combined repository.
896
897    The HEAD ref only gets updated at the very end. All intermediate work
898    happens in a worktree which will get garbage collected by git eventually
899    after a failure.
900    '''
901    # Remember current HEAD and what we need to add to it.
902    head = runcmd("git rev-parse HEAD").strip()
903    additional_heads = {}
904
905    # Track the mapping between original commit and commit in the
906    # combined repo. We do not have to distinguish between components,
907    # because commit hashes are different anyway. Often we can
908    # skip find_revs() entirely (for example, when all new commits
909    # are derived from the last imported revision).
910    #
911    # Using "head" (typically the merge commit) instead of the actual
912    # commit for the component leads to a nicer history in the combined
913    # repo.
914    old2new_revs = {}
915    for name in repos:
916        repo = conf.repos[name]
917        revision = repo['last_revision']
918        if revision:
919            old2new_revs[revision] = head
920
921    def add_p(parents):
922        '''Insert -p before each entry.'''
923        parameters = []
924        for p in parents:
925            parameters.append('-p')
926            parameters.append(p)
927        return parameters
928
929    # Do all intermediate work with a separate work dir and index,
930    # chosen via env variables (can't use "git worktree", it is too
931    # new). This is useful (no changes to current work tree unless the
932    # update succeeds) and required (otherwise we end up temporarily
933    # removing the combo-layer hooks that we currently use when
934    # importing a new component).
935    #
936    # Not cleaned up after a failure at the moment.
937    wdir = os.path.join(os.getcwd(), ".git", "combo-layer")
938    windex = wdir + ".index"
939    if os.path.isdir(wdir):
940        shutil.rmtree(wdir)
941    os.mkdir(wdir)
942    wenv = copy.deepcopy(os.environ)
943    wenv["GIT_WORK_TREE"] = wdir
944    wenv["GIT_INDEX_FILE"] = windex
945    # This one turned out to be needed in practice.
946    wenv["GIT_OBJECT_DIRECTORY"] = os.path.join(os.getcwd(), ".git", "objects")
947    wargs = {"destdir": wdir, "env": wenv}
948
949    for name in repos:
950        revision = revisions.get(name, None)
951        repo = conf.repos[name]
952        ldir = repo['local_repo_dir']
953        dest_dir = repo['dest_dir']
954        branch = repo.get('branch', "master")
955        hook = repo.get('hook', None)
956        largs = {"destdir": ldir, "env": None}
957        file_include = repo.get('file_filter', '').split()
958        file_include.sort() # make sure that short entries like '.' come first.
959        file_exclude = repo.get('file_exclude', '').split()
960
961        def include_file(file):
962            if not file_include:
963                # No explicit filter set, include file.
964                return True
965            for filter in file_include:
966                if filter == '.':
967                    # Another special case: include current directory and thus all files.
968                    return True
969                if os.path.commonprefix((filter, file)) == filter:
970                    # Included in directory or direct file match.
971                    return True
972                # Check for wildcard match *with* allowing * to match /, i.e.
973                # src/*.c does match src/foobar/*.c. That's not how it is done elsewhere
974                # when passing the filtering to "git archive", but it is unclear what
975                # the intended semantic is (the comment on file_exclude that "append a * wildcard
976                # at the end" to match the full content of a directories implies that
977                # slashes are indeed not special), so here we simply do what's easy to
978                # implement in Python.
979                logger.debug('fnmatch(%s, %s)' % (file, filter))
980                if fnmatch.fnmatchcase(file, filter):
981                    return True
982            return False
983
984        def exclude_file(file):
985            for filter in file_exclude:
986                if fnmatch.fnmatchcase(file, filter):
987                    return True
988            return False
989
990        def file_filter(files):
991            '''Clean up file list so that only included files remain.'''
992            index = 0
993            while index < len(files):
994                file = files[index]
995                if not include_file(file) or exclude_file(file):
996                    del files[index]
997                else:
998                    index += 1
999
1000
1001        # Generate the revision list.
1002        logger.info("Analyzing commits from %s..." % name)
1003        top_revision = revision or branch
1004        if not check_rev_branch(name, ldir, top_revision, branch):
1005            sys.exit(1)
1006
1007        last_revision = repo['last_revision']
1008        rev_list_args = "--full-history --sparse --topo-order --reverse"
1009        if not last_revision:
1010            logger.info("Warning: last_revision of component %s is not set, starting from the first commit" % name)
1011            rev_list_args = rev_list_args + ' ' + top_revision
1012        else:
1013            if not check_rev_branch(name, ldir, last_revision, branch):
1014                sys.exit(1)
1015            rev_list_args = "%s %s..%s" % (rev_list_args, last_revision, top_revision)
1016
1017            # By definition, the current HEAD contains the latest imported
1018            # commit of each component. We use that as initial mapping even
1019            # though the commits do not match exactly because
1020            # a) it always works (in contrast to find_revs, which relies on special
1021            #    commit messages)
1022            # b) it is faster than find_revs, which will only be called on demand
1023            #    and can be skipped entirely in most cases
1024            # c) last but not least, the combined history looks nicer when all
1025            #    new commits are rooted in the same merge commit
1026            old2new_revs[last_revision] = head
1027
1028        # We care about all commits (--full-history and --sparse) and
1029        # we want reconstruct the topology and thus do not care
1030        # about ordering by time (--topo-order). We ask for the ones
1031        # we need to import first to be listed first (--reverse).
1032        revs = runcmd("git rev-list %s" % rev_list_args, **largs).split()
1033        logger.debug("To be imported: %s" % revs)
1034        # Now 'revs' contains all revisions reachable from the top revision.
1035        # All revisions derived from the 'last_revision' definitely are new,
1036        # whereas the others may or may not have been imported before. For
1037        # a linear history in the component, that second set will be empty.
1038        # To distinguish between them, we also get the shorter list
1039        # of revisions starting at the ancestor.
1040        if last_revision:
1041            ancestor_revs = runcmd("git rev-list --ancestry-path %s" % rev_list_args, **largs).split()
1042        else:
1043            ancestor_revs = []
1044        logger.debug("Ancestors: %s" % ancestor_revs)
1045
1046        # Now import each revision.
1047        logger.info("Importing commits from %s..." % name)
1048        def import_rev(rev):
1049            global scanned_revs
1050
1051            # If it is part of the new commits, we definitely need
1052            # to import it. Otherwise we need to check, we might have
1053            # imported it before. If it was imported and we merely
1054            # fail to find it because commit messages did not track
1055            # the mapping, then we end up importing it again. So
1056            # combined repos using "updating with history" really should
1057            # enable the "From ... rev:" commit header modifications.
1058            if rev not in ancestor_revs and rev not in old2new_revs and not scanned_revs:
1059                logger.debug("Revision %s triggers log analysis." % rev)
1060                find_revs(old2new_revs, head)
1061                scanned_revs = True
1062            new_rev = old2new_revs.get(rev, None)
1063            if new_rev:
1064                return new_rev
1065
1066            # If the commit is not in the original list of revisions
1067            # to be imported, then it must be a parent of one of those
1068            # commits and it was skipped during earlier imports or not
1069            # found. Importing such merge commits leads to very ugly
1070            # history (long cascade of merge commits which all point
1071            # to to older commits) when switching from "update via
1072            # patches" to "update with history".
1073            #
1074            # We can avoid importing merge commits if all non-merge commits
1075            # reachable from it were already imported. In that case we
1076            # can root the new commits in the current head revision.
1077            def is_imported(prev):
1078                parents = runcmd("git show --no-patch --pretty=format:%P " + prev, **largs).split()
1079                if len(parents) > 1:
1080                    for p in parents:
1081                        if not is_imported(p):
1082                            logger.debug("Must import %s because %s is not imported." % (rev, p))
1083                            return False
1084                    return True
1085                elif prev in old2new_revs:
1086                    return True
1087                else:
1088                    logger.debug("Must import %s because %s is not imported." % (rev, prev))
1089                    return False
1090            if rev not in revs and is_imported(rev):
1091                old2new_revs[rev] = head
1092                return head
1093
1094            # Need to import rev. Collect some information about it.
1095            logger.debug("Importing %s" % rev)
1096            (parents, author_name, author_email, author_timestamp, body) = \
1097                runcmd("git show --no-patch --pretty=format:%P%x00%an%x00%ae%x00%at%x00%B " + rev, **largs).split(chr(0))
1098            parents = parents.split()
1099            if parents:
1100                # Arbitrarily pick the first parent as base. It may or may not have
1101                # been imported before. For example, if the parent is a merge commit
1102                # and previously the combined repository used patching as update
1103                # method, then the actual merge commit parent never was imported.
1104                # To cover this, We recursively import parents.
1105                parent = parents[0]
1106                new_parent = import_rev(parent)
1107                # Clean index and working tree. TODO: can we combine this and the
1108                # next into one command with less file IO?
1109                # "git reset --hard" does not work, it changes HEAD of the parent
1110                # repo, which we wanted to avoid. Probably need to keep
1111                # track of the rev that corresponds to the index and use apply_commit().
1112                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1113                # Update index and working tree to match the parent.
1114                runcmd("git checkout -q -f %s ." % new_parent, **wargs)
1115            else:
1116                parent = None
1117                # Clean index and working tree.
1118                runcmd("git rm -q --ignore-unmatch -rf .", **wargs)
1119
1120            # Modify index and working tree such that it mirrors the commit.
1121            apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=file_filter)
1122
1123            # Now commit.
1124            new_tree = runcmd("git write-tree", **wargs).strip()
1125            env = copy.deepcopy(wenv)
1126            env['GIT_AUTHOR_NAME'] = author_name
1127            env['GIT_AUTHOR_EMAIL'] = author_email
1128            env['GIT_AUTHOR_DATE'] = author_timestamp
1129            if hook:
1130                # Need to turn the verbatim commit message into something resembling a patch header
1131                # for the hook.
1132                with tempfile.NamedTemporaryFile(mode='wt', delete=False) as patch:
1133                    patch.write('Subject: [PATCH] ')
1134                    patch.write(body)
1135                    patch.write('\n---\n')
1136                    patch.close()
1137                    runcmd([hook, patch.name, rev, name])
1138                    with open(patch.name) as f:
1139                        body = f.read()[len('Subject: [PATCH] '):][:-len('\n---\n')]
1140
1141            # We can skip non-merge commits that did not change any files. Those are typically
1142            # the result of file filtering, although they could also have been introduced
1143            # intentionally upstream, in which case we drop some information here.
1144            if len(parents) == 1:
1145                parent_rev = import_rev(parents[0])
1146                old_tree = runcmd("git show -s --pretty=format:%T " + parent_rev, **wargs).strip()
1147                commit = old_tree != new_tree
1148                if not commit:
1149                    new_rev = parent_rev
1150            else:
1151                commit = True
1152            if commit:
1153                new_rev = runcmd("git commit-tree".split() + add_p([import_rev(p) for p in parents]) +
1154                                 ["-m", body, new_tree],
1155                                 env=env).strip()
1156            old2new_revs[rev] = new_rev
1157
1158            return new_rev
1159
1160        if revs:
1161            for rev in revs:
1162                import_rev(rev)
1163            # Remember how to update our current head. New components get added,
1164            # updated components get the delta between current head and the updated component
1165            # applied.
1166            additional_heads[old2new_revs[revs[-1]]] = head if repo['last_revision'] else None
1167            repo['last_revision'] = revs[-1]
1168
1169    # Now construct the final merge commit. We create the tree by
1170    # starting with the head and applying the changes from each
1171    # components imported head revision.
1172    if additional_heads:
1173        runcmd("git reset --hard", **wargs)
1174        for rev, base in additional_heads.items():
1175            apply_commit(base, rev, wargs, wargs, None)
1176
1177        # Commit with all component branches as parents as well as the previous head.
1178        logger.info("Writing final merge commit...")
1179        msg = conf_commit_msg(conf, components)
1180        new_tree = runcmd("git write-tree", **wargs).strip()
1181        new_rev = runcmd("git commit-tree".split() +
1182                         add_p([head] + list(additional_heads.keys())) +
1183                         ["-m", msg, new_tree],
1184                         **wargs).strip()
1185        # And done! This is the first time we change the HEAD in the actual work tree.
1186        runcmd("git reset --hard %s" % new_rev)
1187
1188        # Update and stage the (potentially modified)
1189        # combo-layer.conf, but do not commit separately.
1190        for name in repos:
1191            repo = conf.repos[name]
1192            rev = repo['last_revision']
1193            conf.update(name, "last_revision", rev)
1194        if commit_conf_file(conf, components, False):
1195            # Must augment the previous commit.
1196            runcmd("git commit --amend -C HEAD")
1197
1198
1199scanned_revs = False
1200def find_revs(old2new, head):
1201    '''Construct mapping from original commit hash to commit hash in
1202    combined repo by looking at the commit messages. Depends on the
1203    "From ... rev: ..." convention.'''
1204    logger.info("Analyzing log messages to find previously imported commits...")
1205    num_known = len(old2new)
1206    log = runcmd("git log --grep='From .* rev: [a-fA-F0-9][a-fA-F0-9]*' --pretty=format:%H%x00%B%x00 " + head).split(chr(0))
1207    regex = re.compile(r'From .* rev: ([a-fA-F0-9]+)')
1208    for new_rev, body in zip(*[iter(log)]* 2):
1209        # Use the last one, in the unlikely case there are more than one.
1210        rev = regex.findall(body)[-1]
1211        if rev not in old2new:
1212            old2new[rev] = new_rev.strip()
1213    logger.info("Found %d additional commits, leading to: %s" % (len(old2new) - num_known, old2new))
1214
1215
1216def apply_commit(parent, rev, largs, wargs, dest_dir, file_filter=None):
1217    '''Compare revision against parent, remove files deleted in the
1218    commit, re-write new or modified ones. Moves them into dest_dir.
1219    Optionally filters files.
1220    '''
1221    if not dest_dir:
1222        dest_dir = "."
1223    # -r recurses into sub-directories, given is the full overview of
1224    # what changed.  We do not care about copy/edits or renames, so we
1225    # can disable those with --no-renames (but we still parse them,
1226    # because it was not clear from git documentation whether C and M
1227    # lines can still occur).
1228    logger.debug("Applying changes between %s and %s in %s" % (parent, rev, largs["destdir"]))
1229    delete = []
1230    update = []
1231    if parent:
1232        # Apply delta.
1233        changes = runcmd("git diff-tree --no-commit-id --no-renames --name-status -r --raw -z %s %s" % (parent, rev), **largs).split(chr(0))
1234        for status, name in zip(*[iter(changes)]*2):
1235            if status[0] in "ACMRT":
1236                update.append(name)
1237            elif status[0] in "D":
1238                delete.append(name)
1239            else:
1240                logger.error("Unknown status %s of file %s in revision %s" % (status, name, rev))
1241                sys.exit(1)
1242    else:
1243        # Copy all files.
1244        update.extend(runcmd("git ls-tree -r --name-only -z %s" % rev, **largs).split(chr(0)))
1245
1246    # Include/exclude files as define in the component config.
1247    # Both updated and deleted file lists get filtered, because it might happen
1248    # that a file gets excluded, pulled from a different component, and then the
1249    # excluded file gets deleted. In that case we must keep the copy.
1250    if file_filter:
1251        file_filter(update)
1252        file_filter(delete)
1253
1254    # We export into a tar archive here and extract with tar because it is simple (no
1255    # need to implement file and symlink writing ourselves) and gives us some degree
1256    # of parallel IO. The downside is that we have to pass the list of files via
1257    # command line parameters - hopefully there will never be too many at once.
1258    if update:
1259        target = os.path.join(wargs["destdir"], dest_dir)
1260        if not os.path.isdir(target):
1261            os.makedirs(target)
1262        quoted_target = pipes.quote(target)
1263        # os.sysconf('SC_ARG_MAX') is lying: running a command with
1264        # string length 629343 already failed with "Argument list too
1265        # long" although SC_ARG_MAX = 2097152. "man execve" explains
1266        # the limitations, but those are pretty complicated. So here
1267        # we just hard-code a fixed value which is more likely to work.
1268        max_cmdsize = 64 * 1024
1269        while update:
1270            quoted_args = []
1271            unquoted_args = []
1272            cmdsize = 100 + len(quoted_target)
1273            while update:
1274                quoted_next = pipes.quote(update[0])
1275                size_next = len(quoted_next) + len(dest_dir) + 1
1276                logger.debug('cmdline length %d + %d < %d?' % (cmdsize, size_next, os.sysconf('SC_ARG_MAX')))
1277                if cmdsize + size_next < max_cmdsize:
1278                    quoted_args.append(quoted_next)
1279                    unquoted_args.append(update.pop(0))
1280                    cmdsize += size_next
1281                else:
1282                    logger.debug('Breaking the cmdline at length %d' % cmdsize)
1283                    break
1284            logger.debug('Final cmdline length %d / %d' % (cmdsize, os.sysconf('SC_ARG_MAX')))
1285            cmd = "git archive %s %s | tar -C %s -xf -" % (rev, ' '.join(quoted_args), quoted_target)
1286            logger.debug('First cmdline length %d' % len(cmd))
1287            runcmd(cmd, **largs)
1288            cmd = "git add -f".split() + [os.path.join(dest_dir, x) for x in unquoted_args]
1289            logger.debug('Second cmdline length %d' % reduce(lambda x, y: x + len(y), cmd, 0))
1290            runcmd(cmd, **wargs)
1291    if delete:
1292        for path in delete:
1293            if dest_dir:
1294                path = os.path.join(dest_dir, path)
1295        runcmd("git rm -f --ignore-unmatch".split() + [os.path.join(dest_dir, x) for x in delete], **wargs)
1296
1297def action_error(conf, args):
1298    logger.info("invalid action %s" % args[0])
1299
1300actions = {
1301    "init": action_init,
1302    "update": action_update,
1303    "pull": action_pull,
1304    "splitpatch": action_splitpatch,
1305}
1306
1307def main():
1308    parser = optparse.OptionParser(
1309        version = "Combo Layer Repo Tool version %s" % __version__,
1310        usage = """%prog [options] action
1311
1312Create and update a combination layer repository from multiple component repositories.
1313
1314Action:
1315  init                 initialise the combo layer repo
1316  update [components]  get patches from component repos and apply them to the combo repo
1317  pull [components]    just pull component repos only
1318  splitpatch [commit]  generate commit patch and split per component, default commit is HEAD""")
1319
1320    parser.add_option("-c", "--conf", help = "specify the config file (conf/combo-layer.conf is the default).",
1321               action = "store", dest = "conffile", default = "conf/combo-layer.conf")
1322
1323    parser.add_option("-i", "--interactive", help = "interactive mode, user can edit the patch list and patches",
1324               action = "store_true", dest = "interactive", default = False)
1325
1326    parser.add_option("-D", "--debug", help = "output debug information",
1327               action = "store_true", dest = "debug", default = False)
1328
1329    parser.add_option("-n", "--no-pull", help = "skip pulling component repos during update",
1330               action = "store_true", dest = "nopull", default = False)
1331
1332    parser.add_option("--hard-reset",
1333               help = "instead of pull do fetch and hard-reset in component repos",
1334               action = "store_true", dest = "hard_reset", default = False)
1335
1336    parser.add_option("-H", "--history", help = "import full history of components during init",
1337                      action = "store_true", default = False)
1338
1339    options, args = parser.parse_args(sys.argv)
1340
1341    # Dispatch to action handler
1342    if len(args) == 1:
1343        logger.error("No action specified, exiting")
1344        parser.print_help()
1345    elif args[1] not in actions:
1346        logger.error("Unsupported action %s, exiting\n" % (args[1]))
1347        parser.print_help()
1348    elif not os.path.exists(options.conffile):
1349        logger.error("No valid config file, exiting\n")
1350        parser.print_help()
1351    else:
1352        if options.debug:
1353            logger.setLevel(logging.DEBUG)
1354        confdata = Configuration(options)
1355        initmode = (args[1] == 'init')
1356        confdata.sanity_check(initmode)
1357        actions.get(args[1], action_error)(confdata, args[1:])
1358
1359if __name__ == "__main__":
1360    try:
1361        ret = main()
1362    except Exception:
1363        ret = 1
1364        import traceback
1365        traceback.print_exc()
1366    sys.exit(ret)
1367