xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/git.py (revision bccaff34)
1"""
2BitBake 'Fetch' git implementation
3
4git fetcher support the SRC_URI with format of:
5SRC_URI = "git://some.host/somepath;OptionA=xxx;OptionB=xxx;..."
6
7Supported SRC_URI options are:
8
9- branch
10   The git branch to retrieve from. The default is "master"
11
12   This option also supports multiple branch fetching, with branches
13   separated by commas.  In multiple branches case, the name option
14   must have the same number of names to match the branches, which is
15   used to specify the SRC_REV for the branch
16   e.g:
17   SRC_URI="git://some.host/somepath;branch=branchX,branchY;name=nameX,nameY"
18   SRCREV_nameX = "xxxxxxxxxxxxxxxxxxxx"
19   SRCREV_nameY = "YYYYYYYYYYYYYYYYYYYY"
20
21- tag
22    The git tag to retrieve. The default is "master"
23
24- protocol
25   The method to use to access the repository. Common options are "git",
26   "http", "https", "file", "ssh" and "rsync". The default is "git".
27
28- rebaseable
29   rebaseable indicates that the upstream git repo may rebase in the future,
30   and current revision may disappear from upstream repo. This option will
31   remind fetcher to preserve local cache carefully for future use.
32   The default value is "0", set rebaseable=1 for rebaseable git repo.
33
34- nocheckout
35   Don't checkout source code when unpacking. set this option for the recipe
36   who has its own routine to checkout code.
37   The default is "0", set nocheckout=1 if needed.
38
39- bareclone
40   Create a bare clone of the source code and don't checkout the source code
41   when unpacking. Set this option for the recipe who has its own routine to
42   checkout code and tracking branch requirements.
43   The default is "0", set bareclone=1 if needed.
44
45- nobranch
46   Don't check the SHA validation for branch. set this option for the recipe
47   referring to commit which is valid in any namespace (branch, tag, ...)
48   instead of branch.
49   The default is "0", set nobranch=1 if needed.
50
51- usehead
52   For local git:// urls to use the current branch HEAD as the revision for use with
53   AUTOREV. Implies nobranch.
54
55"""
56
57# Copyright (C) 2005 Richard Purdie
58#
59# SPDX-License-Identifier: GPL-2.0-only
60#
61
62import collections
63import errno
64import fnmatch
65import os
66import re
67import shlex
68import subprocess
69import tempfile
70import bb
71import bb.progress
72from contextlib import contextmanager
73from   bb.fetch2 import FetchMethod
74from   bb.fetch2 import runfetchcmd
75from   bb.fetch2 import logger
76
77
78sha1_re = re.compile(r'^[0-9a-f]{40}$')
79slash_re = re.compile(r"/+")
80
81class GitProgressHandler(bb.progress.LineFilterProgressHandler):
82    """Extract progress information from git output"""
83    def __init__(self, d):
84        self._buffer = ''
85        self._count = 0
86        super(GitProgressHandler, self).__init__(d)
87        # Send an initial progress event so the bar gets shown
88        self._fire_progress(-1)
89
90    def write(self, string):
91        self._buffer += string
92        stages = ['Counting objects', 'Compressing objects', 'Receiving objects', 'Resolving deltas']
93        stage_weights = [0.2, 0.05, 0.5, 0.25]
94        stagenum = 0
95        for i, stage in reversed(list(enumerate(stages))):
96            if stage in self._buffer:
97                stagenum = i
98                self._buffer = ''
99                break
100        self._status = stages[stagenum]
101        percs = re.findall(r'(\d+)%', string)
102        if percs:
103            progress = int(round((int(percs[-1]) * stage_weights[stagenum]) + (sum(stage_weights[:stagenum]) * 100)))
104            rates = re.findall(r'([\d.]+ [a-zA-Z]*/s+)', string)
105            if rates:
106                rate = rates[-1]
107            else:
108                rate = None
109            self.update(progress, rate)
110        else:
111            if stagenum == 0:
112                percs = re.findall(r': (\d+)', string)
113                if percs:
114                    count = int(percs[-1])
115                    if count > self._count:
116                        self._count = count
117                        self._fire_progress(-count)
118        super(GitProgressHandler, self).write(string)
119
120
121class Git(FetchMethod):
122    bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
123    make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
124
125    """Class to fetch a module or modules from git repositories"""
126    def init(self, d):
127        pass
128
129    def supports(self, ud, d):
130        """
131        Check to see if a given url can be fetched with git.
132        """
133        return ud.type in ['git']
134
135    def supports_checksum(self, urldata):
136        return False
137
138    def urldata_init(self, ud, d):
139        """
140        init git specific variable within url data
141        so that the git method like latest_revision() can work
142        """
143        if 'protocol' in ud.parm:
144            ud.proto = ud.parm['protocol']
145        elif not ud.host:
146            ud.proto = 'file'
147        else:
148            ud.proto = "git"
149        if ud.host == "github.com" and ud.proto == "git":
150            # github stopped supporting git protocol
151            # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
152            ud.proto = "https"
153            bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
154
155        if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
156            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
157
158        ud.nocheckout = ud.parm.get("nocheckout","0") == "1"
159
160        ud.rebaseable = ud.parm.get("rebaseable","0") == "1"
161
162        ud.nobranch = ud.parm.get("nobranch","0") == "1"
163
164        # usehead implies nobranch
165        ud.usehead = ud.parm.get("usehead","0") == "1"
166        if ud.usehead:
167            if ud.proto != "file":
168                 raise bb.fetch2.ParameterError("The usehead option is only for use with local ('protocol=file') git repositories", ud.url)
169            ud.nobranch = 1
170
171        # bareclone implies nocheckout
172        ud.bareclone = ud.parm.get("bareclone","0") == "1"
173        if ud.bareclone:
174            ud.nocheckout = 1
175
176        ud.unresolvedrev = {}
177        branches = ud.parm.get("branch", "").split(',')
178        if branches == [""] and not ud.nobranch:
179            bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url)
180            branches = ["master"]
181        if len(branches) != len(ud.names):
182            raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
183
184        ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
185
186        ud.cloneflags = "-n"
187        if not ud.noshared:
188            ud.cloneflags += " -s"
189        if ud.bareclone:
190            ud.cloneflags += " --mirror"
191
192        ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
193        ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
194
195        depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
196        if depth_default is not None:
197            try:
198                depth_default = int(depth_default or 0)
199            except ValueError:
200                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
201            else:
202                if depth_default < 0:
203                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
204        else:
205            depth_default = 1
206        ud.shallow_depths = collections.defaultdict(lambda: depth_default)
207
208        revs_default = d.getVar("BB_GIT_SHALLOW_REVS")
209        ud.shallow_revs = []
210        ud.branches = {}
211        for pos, name in enumerate(ud.names):
212            branch = branches[pos]
213            ud.branches[name] = branch
214            ud.unresolvedrev[name] = branch
215
216            shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % name)
217            if shallow_depth is not None:
218                try:
219                    shallow_depth = int(shallow_depth or 0)
220                except ValueError:
221                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
222                else:
223                    if shallow_depth < 0:
224                        raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
225                    ud.shallow_depths[name] = shallow_depth
226
227            revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % name)
228            if revs is not None:
229                ud.shallow_revs.extend(revs.split())
230            elif revs_default is not None:
231                ud.shallow_revs.extend(revs_default.split())
232
233        if (ud.shallow and
234                not ud.shallow_revs and
235                all(ud.shallow_depths[n] == 0 for n in ud.names)):
236            # Shallow disabled for this URL
237            ud.shallow = False
238
239        if ud.usehead:
240            # When usehead is set let's associate 'HEAD' with the unresolved
241            # rev of this repository. This will get resolved into a revision
242            # later. If an actual revision happens to have also been provided
243            # then this setting will be overridden.
244            for name in ud.names:
245                ud.unresolvedrev[name] = 'HEAD'
246
247        ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat"
248
249        write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
250        ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
251        ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
252
253        ud.setup_revisions(d)
254
255        for name in ud.names:
256            # Ensure any revision that doesn't look like a SHA-1 is translated into one
257            if not sha1_re.match(ud.revisions[name] or ''):
258                if ud.revisions[name]:
259                    ud.unresolvedrev[name] = ud.revisions[name]
260                ud.revisions[name] = self.latest_revision(ud, d, name)
261
262        gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
263        if gitsrcname.startswith('.'):
264            gitsrcname = gitsrcname[1:]
265
266        # For a rebaseable git repo, it is necessary to keep a mirror tar ball
267        # per revision, so that even if the revision disappears from the
268        # upstream repo in the future, the mirror will remain intact and still
269        # contain the revision
270        if ud.rebaseable:
271            for name in ud.names:
272                gitsrcname = gitsrcname + '_' + ud.revisions[name]
273
274        dl_dir = d.getVar("DL_DIR")
275        gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
276        ud.clonedir = os.path.join(gitdir, gitsrcname)
277        ud.localfile = ud.clonedir
278
279        mirrortarball = 'git2_%s.tar.gz' % gitsrcname
280        ud.fullmirror = os.path.join(dl_dir, mirrortarball)
281        ud.mirrortarballs = [mirrortarball]
282        if ud.shallow:
283            tarballname = gitsrcname
284            if ud.bareclone:
285                tarballname = "%s_bare" % tarballname
286
287            if ud.shallow_revs:
288                tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
289
290            for name, revision in sorted(ud.revisions.items()):
291                tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
292                depth = ud.shallow_depths[name]
293                if depth:
294                    tarballname = "%s-%s" % (tarballname, depth)
295
296            shallow_refs = []
297            if not ud.nobranch:
298                shallow_refs.extend(ud.branches.values())
299            if ud.shallow_extra_refs:
300                shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
301            if shallow_refs:
302                tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
303
304            fetcher = self.__class__.__name__.lower()
305            ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
306            ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
307            ud.mirrortarballs.insert(0, ud.shallowtarball)
308
309    def localpath(self, ud, d):
310        return ud.clonedir
311
312    def need_update(self, ud, d):
313        return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud)
314
315    def clonedir_need_update(self, ud, d):
316        if not os.path.exists(ud.clonedir):
317            return True
318        if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d):
319            return True
320        for name in ud.names:
321            if not self._contains_ref(ud, d, name, ud.clonedir):
322                return True
323        return False
324
325    def clonedir_need_shallow_revs(self, ud, d):
326        for rev in ud.shallow_revs:
327            try:
328                runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir)
329            except bb.fetch2.FetchError:
330                return rev
331        return None
332
333    def shallow_tarball_need_update(self, ud):
334        return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow)
335
336    def tarball_need_update(self, ud):
337        return ud.write_tarballs and not os.path.exists(ud.fullmirror)
338
339    def try_premirror(self, ud, d):
340        # If we don't do this, updating an existing checkout with only premirrors
341        # is not possible
342        if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
343            return True
344        if os.path.exists(ud.clonedir):
345            return False
346        return True
347
348    def download(self, ud, d):
349        """Fetch url"""
350
351        # A current clone is preferred to either tarball, a shallow tarball is
352        # preferred to an out of date clone, and a missing clone will use
353        # either tarball.
354        if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
355            ud.localpath = ud.fullshallow
356            return
357        elif os.path.exists(ud.fullmirror) and self.need_update(ud, d):
358            if not os.path.exists(ud.clonedir):
359                bb.utils.mkdirhier(ud.clonedir)
360                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
361            else:
362                tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
363                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
364                fetch_cmd = "LANG=C %s fetch -f --progress %s " % (ud.basecmd, shlex.quote(tmpdir))
365                runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
366        repourl = self._get_repo_url(ud)
367
368        # If the repo still doesn't exist, fallback to cloning it
369        if not os.path.exists(ud.clonedir):
370            # We do this since git will use a "-l" option automatically for local urls where possible,
371            # but it doesn't work when git/objects is a symlink, only works when it is a directory.
372            if repourl.startswith("file://"):
373                repourl_path = repourl[7:]
374                objects = os.path.join(repourl_path, 'objects')
375                if os.path.isdir(objects) and not os.path.islink(objects):
376                    repourl = repourl_path
377            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
378            if ud.proto.lower() != 'file':
379                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
380            progresshandler = GitProgressHandler(d)
381            runfetchcmd(clone_cmd, d, log=progresshandler)
382
383        # Update the checkout if needed
384        if self.clonedir_need_update(ud, d):
385            output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
386            if "origin" in output:
387              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
388
389            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
390
391            if ud.nobranch:
392                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
393            else:
394                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl))
395            if ud.proto.lower() != 'file':
396                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
397            progresshandler = GitProgressHandler(d)
398            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
399            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
400            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
401            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
402            try:
403                os.unlink(ud.fullmirror)
404            except OSError as exc:
405                if exc.errno != errno.ENOENT:
406                    raise
407
408        for name in ud.names:
409            if not self._contains_ref(ud, d, name, ud.clonedir):
410                raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
411
412        if ud.shallow and ud.write_shallow_tarballs:
413            missing_rev = self.clonedir_need_shallow_revs(ud, d)
414            if missing_rev:
415                raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
416
417        if self._contains_lfs(ud, d, ud.clonedir) and self._need_lfs(ud):
418            # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
419            # of all LFS blobs needed at the srcrev.
420            #
421            # It would be nice to just do this inline here by running 'git-lfs fetch'
422            # on the bare clonedir, but that operation requires a working copy on some
423            # releases of Git LFS.
424            with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
425                # Do the checkout. This implicitly involves a Git LFS fetch.
426                Git.unpack(self, ud, tmpdir, d)
427
428                # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into
429                # the bare clonedir.
430                #
431                # As this procedure is invoked repeatedly on incremental fetches as
432                # a recipe's SRCREV is bumped throughout its lifetime, this will
433                # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs
434                # corresponding to all the blobs reachable from the different revs
435                # fetched across time.
436                #
437                # Only do this if the unpack resulted in a .git/lfs directory being
438                # created; this only happens if at least one blob needed to be
439                # downloaded.
440                if os.path.exists(os.path.join(tmpdir, "git", ".git", "lfs")):
441                    runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/git/.git" % tmpdir)
442
443    def build_mirror_data(self, ud, d):
444
445        # Create as a temp file and move atomically into position to avoid races
446        @contextmanager
447        def create_atomic(filename):
448            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
449            try:
450                yield tfile
451                umask = os.umask(0o666)
452                os.umask(umask)
453                os.chmod(tfile, (0o666 & ~umask))
454                os.rename(tfile, filename)
455            finally:
456                os.close(fd)
457
458        if ud.shallow and ud.write_shallow_tarballs:
459            if not os.path.exists(ud.fullshallow):
460                if os.path.islink(ud.fullshallow):
461                    os.unlink(ud.fullshallow)
462                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
463                shallowclone = os.path.join(tempdir, 'git')
464                try:
465                    self.clone_shallow_local(ud, shallowclone, d)
466
467                    logger.info("Creating tarball of git repository")
468                    with create_atomic(ud.fullshallow) as tfile:
469                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
470                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
471                finally:
472                    bb.utils.remove(tempdir, recurse=True)
473        elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
474            if os.path.islink(ud.fullmirror):
475                os.unlink(ud.fullmirror)
476
477            logger.info("Creating tarball of git repository")
478            with create_atomic(ud.fullmirror) as tfile:
479                mtime = runfetchcmd("git log --all -1 --format=%cD", d,
480                        quiet=True, workdir=ud.clonedir)
481                runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
482                        % (tfile, mtime), d, workdir=ud.clonedir)
483            runfetchcmd("touch %s.done" % ud.fullmirror, d)
484
485    def clone_shallow_local(self, ud, dest, d):
486        """Clone the repo and make it shallow.
487
488        The upstream url of the new clone isn't set at this time, as it'll be
489        set correctly when unpacked."""
490        runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
491
492        to_parse, shallow_branches = [], []
493        for name in ud.names:
494            revision = ud.revisions[name]
495            depth = ud.shallow_depths[name]
496            if depth:
497                to_parse.append('%s~%d^{}' % (revision, depth - 1))
498
499            # For nobranch, we need a ref, otherwise the commits will be
500            # removed, and for non-nobranch, we truncate the branch to our
501            # srcrev, to avoid keeping unnecessary history beyond that.
502            branch = ud.branches[name]
503            if ud.nobranch:
504                ref = "refs/shallow/%s" % name
505            elif ud.bareclone:
506                ref = "refs/heads/%s" % branch
507            else:
508                ref = "refs/remotes/origin/%s" % branch
509
510            shallow_branches.append(ref)
511            runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
512
513        # Map srcrev+depths to revisions
514        parsed_depths = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest)
515
516        # Resolve specified revisions
517        parsed_revs = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join('"%s^{}"' % r for r in ud.shallow_revs)), d, workdir=dest)
518        shallow_revisions = parsed_depths.splitlines() + parsed_revs.splitlines()
519
520        # Apply extra ref wildcards
521        all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
522                               d, workdir=dest).splitlines()
523        for r in ud.shallow_extra_refs:
524            if not ud.bareclone:
525                r = r.replace('refs/heads/', 'refs/remotes/origin/')
526
527            if '*' in r:
528                matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
529                shallow_branches.extend(matches)
530            else:
531                shallow_branches.append(r)
532
533        # Make the repository shallow
534        shallow_cmd = [self.make_shallow_path, '-s']
535        for b in shallow_branches:
536            shallow_cmd.append('-r')
537            shallow_cmd.append(b)
538        shallow_cmd.extend(shallow_revisions)
539        runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
540
541    def unpack(self, ud, destdir, d):
542        """ unpack the downloaded src to destdir"""
543
544        subdir = ud.parm.get("subdir")
545        subpath = ud.parm.get("subpath")
546        readpathspec = ""
547        def_destsuffix = "git/"
548
549        if subpath:
550            readpathspec = ":%s" % subpath
551            def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
552
553        if subdir:
554            # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
555            if os.path.isabs(subdir):
556                if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
557                    raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
558                destdir = subdir
559            else:
560                destdir = os.path.join(destdir, subdir)
561            def_destsuffix = ""
562
563        destsuffix = ud.parm.get("destsuffix", def_destsuffix)
564        destdir = ud.destdir = os.path.join(destdir, destsuffix)
565        if os.path.exists(destdir):
566            bb.utils.prunedir(destdir)
567
568        need_lfs = self._need_lfs(ud)
569
570        if not need_lfs:
571            ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
572
573        source_found = False
574        source_error = []
575
576        clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
577        if clonedir_is_up_to_date:
578            runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
579            source_found = True
580        else:
581            source_error.append("clone directory not available or not up to date: " + ud.clonedir)
582
583        if not source_found:
584            if ud.shallow:
585                if os.path.exists(ud.fullshallow):
586                    bb.utils.mkdirhier(destdir)
587                    runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
588                    source_found = True
589                else:
590                    source_error.append("shallow clone not available: " + ud.fullshallow)
591            else:
592                source_error.append("shallow clone not enabled")
593
594        if not source_found:
595            raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
596
597        repourl = self._get_repo_url(ud)
598        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
599
600        if self._contains_lfs(ud, d, destdir):
601            if need_lfs and not self._find_git_lfs(d):
602                raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl))
603            elif not need_lfs:
604                bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
605
606        if not ud.nocheckout:
607            if subpath:
608                runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d,
609                            workdir=destdir)
610                runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
611            elif not ud.nobranch:
612                branchname =  ud.branches[ud.names[0]]
613                runfetchcmd("%s checkout -B %s %s" % (ud.basecmd, branchname, \
614                            ud.revisions[ud.names[0]]), d, workdir=destdir)
615                runfetchcmd("%s branch %s --set-upstream-to origin/%s" % (ud.basecmd, branchname, \
616                            branchname), d, workdir=destdir)
617            else:
618                runfetchcmd("%s checkout %s" % (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=destdir)
619
620        return True
621
622    def clean(self, ud, d):
623        """ clean the git directory """
624
625        to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"]
626        # The localpath is a symlink to clonedir when it is cloned from a
627        # mirror, so remove both of them.
628        if os.path.islink(ud.localpath):
629            clonedir = os.path.realpath(ud.localpath)
630            to_remove.append(clonedir)
631
632        for r in to_remove:
633            if os.path.exists(r):
634                bb.note('Removing %s' % r)
635                bb.utils.remove(r, True)
636
637    def supports_srcrev(self):
638        return True
639
640    def _contains_ref(self, ud, d, name, wd):
641        cmd = ""
642        if ud.nobranch:
643            cmd = "%s log --pretty=oneline -n 1 %s -- 2> /dev/null | wc -l" % (
644                ud.basecmd, ud.revisions[name])
645        else:
646            cmd =  "%s branch --contains %s --list %s 2> /dev/null | wc -l" % (
647                ud.basecmd, ud.revisions[name], ud.branches[name])
648        try:
649            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
650        except bb.fetch2.FetchError:
651            return False
652        if len(output.split()) > 1:
653            raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
654        return output.split()[0] != "0"
655
656    def _need_lfs(self, ud):
657        return ud.parm.get("lfs", "1") == "1"
658
659    def _contains_lfs(self, ud, d, wd):
660        """
661        Check if the repository has 'lfs' (large file) content
662        """
663
664        # The bare clonedir doesn't use the remote names; it has the branch immediately.
665        if wd == ud.clonedir:
666            refname = ud.branches[ud.names[0]]
667        else:
668            refname = "origin/%s" % ud.branches[ud.names[0]]
669
670        cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
671            ud.basecmd, refname)
672
673        try:
674            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
675            if int(output) > 0:
676                return True
677        except (bb.fetch2.FetchError,ValueError):
678            pass
679        return False
680
681    def _find_git_lfs(self, d):
682        """
683        Return True if git-lfs can be found, False otherwise.
684        """
685        import shutil
686        return shutil.which("git-lfs", path=d.getVar('PATH')) is not None
687
688    def _get_repo_url(self, ud):
689        """
690        Return the repository URL
691        """
692        # Note that we do not support passwords directly in the git urls. There are several
693        # reasons. SRC_URI can be written out to things like buildhistory and people don't
694        # want to leak passwords like that. Its also all too easy to share metadata without
695        # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
696        # alternatives so we will not take patches adding password support here.
697        if ud.user:
698            username = ud.user + '@'
699        else:
700            username = ""
701        return "%s://%s%s%s" % (ud.proto, username, ud.host, ud.path)
702
703    def _revision_key(self, ud, d, name):
704        """
705        Return a unique key for the url
706        """
707        # Collapse adjacent slashes
708        return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
709
710    def _lsremote(self, ud, d, search):
711        """
712        Run git ls-remote with the specified search string
713        """
714        # Prevent recursion e.g. in OE if SRCPV is in PV, PV is in WORKDIR,
715        # and WORKDIR is in PATH (as a result of RSS), our call to
716        # runfetchcmd() exports PATH so this function will get called again (!)
717        # In this scenario the return call of the function isn't actually
718        # important - WORKDIR isn't needed in PATH to call git ls-remote
719        # anyway.
720        if d.getVar('_BB_GIT_IN_LSREMOTE', False):
721            return ''
722        d.setVar('_BB_GIT_IN_LSREMOTE', '1')
723        try:
724            repourl = self._get_repo_url(ud)
725            cmd = "%s ls-remote %s %s" % \
726                (ud.basecmd, shlex.quote(repourl), search)
727            if ud.proto.lower() != 'file':
728                bb.fetch2.check_network_access(d, cmd, repourl)
729            output = runfetchcmd(cmd, d, True)
730            if not output:
731                raise bb.fetch2.FetchError("The command %s gave empty output unexpectedly" % cmd, ud.url)
732        finally:
733            d.delVar('_BB_GIT_IN_LSREMOTE')
734        return output
735
736    def _latest_revision(self, ud, d, name):
737        """
738        Compute the HEAD revision for the url
739        """
740        if not d.getVar("__BBSRCREV_SEEN"):
741            raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path))
742
743        # Ensure we mark as not cached
744        bb.fetch2.mark_recipe_nocache(d)
745
746        output = self._lsremote(ud, d, "")
747        # Tags of the form ^{} may not work, need to fallback to other form
748        if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
749            head = ud.unresolvedrev[name]
750            tag = ud.unresolvedrev[name]
751        else:
752            head = "refs/heads/%s" % ud.unresolvedrev[name]
753            tag = "refs/tags/%s" % ud.unresolvedrev[name]
754        for s in [head, tag + "^{}", tag]:
755            for l in output.strip().split('\n'):
756                sha1, ref = l.split()
757                if s == ref:
758                    return sha1
759        raise bb.fetch2.FetchError("Unable to resolve '%s' in upstream git repository in git ls-remote output for %s" % \
760            (ud.unresolvedrev[name], ud.host+ud.path))
761
762    def latest_versionstring(self, ud, d):
763        """
764        Compute the latest release name like "x.y.x" in "x.y.x+gitHASH"
765        by searching through the tags output of ls-remote, comparing
766        versions and returning the highest match.
767        """
768        pupver = ('', '')
769
770        tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
771        try:
772            output = self._lsremote(ud, d, "refs/tags/*")
773        except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
774            bb.note("Could not list remote: %s" % str(e))
775            return pupver
776
777        verstring = ""
778        revision = ""
779        for line in output.split("\n"):
780            if not line:
781                break
782
783            tag_head = line.split("/")[-1]
784            # Ignore non-released branches
785            m = re.search(r"(alpha|beta|rc|final)+", tag_head)
786            if m:
787                continue
788
789            # search for version in the line
790            tag = tagregex.search(tag_head)
791            if tag is None:
792                continue
793
794            tag = tag.group('pver')
795            tag = tag.replace("_", ".")
796
797            if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0:
798                continue
799
800            verstring = tag
801            revision = line.split()[0]
802            pupver = (verstring, revision)
803
804        return pupver
805
806    def _build_revision(self, ud, d, name):
807        return ud.revisions[name]
808
809    def gitpkgv_revision(self, ud, d, name):
810        """
811        Return a sortable revision number by counting commits in the history
812        Based on gitpkgv.bblass in meta-openembedded
813        """
814        rev = self._build_revision(ud, d, name)
815        localpath = ud.localpath
816        rev_file = os.path.join(localpath, "oe-gitpkgv_" + rev)
817        if not os.path.exists(localpath):
818            commits = None
819        else:
820            if not os.path.exists(rev_file) or not os.path.getsize(rev_file):
821                from pipes import quote
822                commits = bb.fetch2.runfetchcmd(
823                        "git rev-list %s -- | wc -l" % quote(rev),
824                        d, quiet=True).strip().lstrip('0')
825                if commits:
826                    open(rev_file, "w").write("%d\n" % int(commits))
827            else:
828                commits = open(rev_file, "r").readline(128).strip()
829        if commits:
830            return False, "%s+%s" % (commits, rev[:7])
831        else:
832            return True, str(rev)
833
834    def checkstatus(self, fetch, ud, d):
835        try:
836            self._lsremote(ud, d, "")
837            return True
838        except bb.fetch2.FetchError:
839            return False
840