xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/git.py (revision 03514f19)
1"""
2BitBake 'Fetch' git implementation
3
4git fetcher support the SRC_URI with format of:
5SRC_URI = "git://some.host/somepath;OptionA=xxx;OptionB=xxx;..."
6
7Supported SRC_URI options are:
8
9- branch
10   The git branch to retrieve from. The default is "master"
11
12   This option also supports multiple branch fetching, with branches
13   separated by commas.  In multiple branches case, the name option
14   must have the same number of names to match the branches, which is
15   used to specify the SRC_REV for the branch
16   e.g:
17   SRC_URI="git://some.host/somepath;branch=branchX,branchY;name=nameX,nameY"
18   SRCREV_nameX = "xxxxxxxxxxxxxxxxxxxx"
19   SRCREV_nameY = "YYYYYYYYYYYYYYYYYYYY"
20
21- tag
22    The git tag to retrieve. The default is "master"
23
24- protocol
25   The method to use to access the repository. Common options are "git",
26   "http", "https", "file", "ssh" and "rsync". The default is "git".
27
28- rebaseable
29   rebaseable indicates that the upstream git repo may rebase in the future,
30   and current revision may disappear from upstream repo. This option will
31   remind fetcher to preserve local cache carefully for future use.
32   The default value is "0", set rebaseable=1 for rebaseable git repo.
33
34- nocheckout
35   Don't checkout source code when unpacking. set this option for the recipe
36   who has its own routine to checkout code.
37   The default is "0", set nocheckout=1 if needed.
38
39- bareclone
40   Create a bare clone of the source code and don't checkout the source code
41   when unpacking. Set this option for the recipe who has its own routine to
42   checkout code and tracking branch requirements.
43   The default is "0", set bareclone=1 if needed.
44
45- nobranch
46   Don't check the SHA validation for branch. set this option for the recipe
47   referring to commit which is valid in any namespace (branch, tag, ...)
48   instead of branch.
49   The default is "0", set nobranch=1 if needed.
50
51- subpath
52   Limit the checkout to a specific subpath of the tree.
53   By default, checkout the whole tree, set subpath=<path> if needed
54
55- destsuffix
56   The name of the path in which to place the checkout.
57   By default, the path is git/, set destsuffix=<suffix> if needed
58
59- usehead
60   For local git:// urls to use the current branch HEAD as the revision for use with
61   AUTOREV. Implies nobranch.
62
63- lfs
64    Enable the checkout to use LFS for large files. This will download all LFS files
65    in the download step, as the unpack step does not have network access.
66    The default is "1", set lfs=0 to skip.
67
68"""
69
70# Copyright (C) 2005 Richard Purdie
71#
72# SPDX-License-Identifier: GPL-2.0-only
73#
74
75import collections
76import errno
77import fnmatch
78import os
79import re
80import shlex
81import shutil
82import subprocess
83import tempfile
84import bb
85import bb.progress
86from contextlib import contextmanager
87from   bb.fetch2 import FetchMethod
88from   bb.fetch2 import runfetchcmd
89from   bb.fetch2 import logger
90from   bb.fetch2 import trusted_network
91
92
93sha1_re = re.compile(r'^[0-9a-f]{40}$')
94slash_re = re.compile(r"/+")
95
96class GitProgressHandler(bb.progress.LineFilterProgressHandler):
97    """Extract progress information from git output"""
98    def __init__(self, d):
99        self._buffer = ''
100        self._count = 0
101        super(GitProgressHandler, self).__init__(d)
102        # Send an initial progress event so the bar gets shown
103        self._fire_progress(-1)
104
105    def write(self, string):
106        self._buffer += string
107        stages = ['Counting objects', 'Compressing objects', 'Receiving objects', 'Resolving deltas']
108        stage_weights = [0.2, 0.05, 0.5, 0.25]
109        stagenum = 0
110        for i, stage in reversed(list(enumerate(stages))):
111            if stage in self._buffer:
112                stagenum = i
113                self._buffer = ''
114                break
115        self._status = stages[stagenum]
116        percs = re.findall(r'(\d+)%', string)
117        if percs:
118            progress = int(round((int(percs[-1]) * stage_weights[stagenum]) + (sum(stage_weights[:stagenum]) * 100)))
119            rates = re.findall(r'([\d.]+ [a-zA-Z]*/s+)', string)
120            if rates:
121                rate = rates[-1]
122            else:
123                rate = None
124            self.update(progress, rate)
125        else:
126            if stagenum == 0:
127                percs = re.findall(r': (\d+)', string)
128                if percs:
129                    count = int(percs[-1])
130                    if count > self._count:
131                        self._count = count
132                        self._fire_progress(-count)
133        super(GitProgressHandler, self).write(string)
134
135
136class Git(FetchMethod):
137    bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
138    make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
139
140    """Class to fetch a module or modules from git repositories"""
141    def init(self, d):
142        pass
143
144    def supports(self, ud, d):
145        """
146        Check to see if a given url can be fetched with git.
147        """
148        return ud.type in ['git']
149
150    def supports_checksum(self, urldata):
151        return False
152
153    def cleanup_upon_failure(self):
154        return False
155
156    def urldata_init(self, ud, d):
157        """
158        init git specific variable within url data
159        so that the git method like latest_revision() can work
160        """
161        if 'protocol' in ud.parm:
162            ud.proto = ud.parm['protocol']
163        elif not ud.host:
164            ud.proto = 'file'
165        else:
166            ud.proto = "git"
167        if ud.host == "github.com" and ud.proto == "git":
168            # github stopped supporting git protocol
169            # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
170            ud.proto = "https"
171            bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
172
173        if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
174            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
175
176        ud.nocheckout = ud.parm.get("nocheckout","0") == "1"
177
178        ud.rebaseable = ud.parm.get("rebaseable","0") == "1"
179
180        ud.nobranch = ud.parm.get("nobranch","0") == "1"
181
182        # usehead implies nobranch
183        ud.usehead = ud.parm.get("usehead","0") == "1"
184        if ud.usehead:
185            if ud.proto != "file":
186                 raise bb.fetch2.ParameterError("The usehead option is only for use with local ('protocol=file') git repositories", ud.url)
187            ud.nobranch = 1
188
189        # bareclone implies nocheckout
190        ud.bareclone = ud.parm.get("bareclone","0") == "1"
191        if ud.bareclone:
192            ud.nocheckout = 1
193
194        ud.unresolvedrev = {}
195        branches = ud.parm.get("branch", "").split(',')
196        if branches == [""] and not ud.nobranch:
197            bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url)
198            branches = ["master"]
199        if len(branches) != len(ud.names):
200            raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
201
202        ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
203
204        ud.cloneflags = "-n"
205        if not ud.noshared:
206            ud.cloneflags += " -s"
207        if ud.bareclone:
208            ud.cloneflags += " --mirror"
209
210        ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
211        ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
212
213        depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
214        if depth_default is not None:
215            try:
216                depth_default = int(depth_default or 0)
217            except ValueError:
218                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
219            else:
220                if depth_default < 0:
221                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
222        else:
223            depth_default = 1
224        ud.shallow_depths = collections.defaultdict(lambda: depth_default)
225
226        revs_default = d.getVar("BB_GIT_SHALLOW_REVS")
227        ud.shallow_revs = []
228        ud.branches = {}
229        for pos, name in enumerate(ud.names):
230            branch = branches[pos]
231            ud.branches[name] = branch
232            ud.unresolvedrev[name] = branch
233
234            shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % name)
235            if shallow_depth is not None:
236                try:
237                    shallow_depth = int(shallow_depth or 0)
238                except ValueError:
239                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
240                else:
241                    if shallow_depth < 0:
242                        raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
243                    ud.shallow_depths[name] = shallow_depth
244
245            revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % name)
246            if revs is not None:
247                ud.shallow_revs.extend(revs.split())
248            elif revs_default is not None:
249                ud.shallow_revs.extend(revs_default.split())
250
251        if (ud.shallow and
252                not ud.shallow_revs and
253                all(ud.shallow_depths[n] == 0 for n in ud.names)):
254            # Shallow disabled for this URL
255            ud.shallow = False
256
257        if ud.usehead:
258            # When usehead is set let's associate 'HEAD' with the unresolved
259            # rev of this repository. This will get resolved into a revision
260            # later. If an actual revision happens to have also been provided
261            # then this setting will be overridden.
262            for name in ud.names:
263                ud.unresolvedrev[name] = 'HEAD'
264
265        ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all"
266
267        write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
268        ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
269        ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
270
271        ud.setup_revisions(d)
272
273        for name in ud.names:
274            # Ensure any revision that doesn't look like a SHA-1 is translated into one
275            if not sha1_re.match(ud.revisions[name] or ''):
276                if ud.revisions[name]:
277                    ud.unresolvedrev[name] = ud.revisions[name]
278                ud.revisions[name] = self.latest_revision(ud, d, name)
279
280        gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_'))
281        if gitsrcname.startswith('.'):
282            gitsrcname = gitsrcname[1:]
283
284        # For a rebaseable git repo, it is necessary to keep a mirror tar ball
285        # per revision, so that even if the revision disappears from the
286        # upstream repo in the future, the mirror will remain intact and still
287        # contain the revision
288        if ud.rebaseable:
289            for name in ud.names:
290                gitsrcname = gitsrcname + '_' + ud.revisions[name]
291
292        dl_dir = d.getVar("DL_DIR")
293        gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
294        ud.clonedir = os.path.join(gitdir, gitsrcname)
295        ud.localfile = ud.clonedir
296
297        mirrortarball = 'git2_%s.tar.gz' % gitsrcname
298        ud.fullmirror = os.path.join(dl_dir, mirrortarball)
299        ud.mirrortarballs = [mirrortarball]
300        if ud.shallow:
301            tarballname = gitsrcname
302            if ud.bareclone:
303                tarballname = "%s_bare" % tarballname
304
305            if ud.shallow_revs:
306                tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
307
308            for name, revision in sorted(ud.revisions.items()):
309                tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
310                depth = ud.shallow_depths[name]
311                if depth:
312                    tarballname = "%s-%s" % (tarballname, depth)
313
314            shallow_refs = []
315            if not ud.nobranch:
316                shallow_refs.extend(ud.branches.values())
317            if ud.shallow_extra_refs:
318                shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
319            if shallow_refs:
320                tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
321
322            fetcher = self.__class__.__name__.lower()
323            ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
324            ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
325            ud.mirrortarballs.insert(0, ud.shallowtarball)
326
327    def localpath(self, ud, d):
328        return ud.clonedir
329
330    def need_update(self, ud, d):
331        return self.clonedir_need_update(ud, d) \
332                or self.shallow_tarball_need_update(ud) \
333                or self.tarball_need_update(ud) \
334                or self.lfs_need_update(ud, d)
335
336    def clonedir_need_update(self, ud, d):
337        if not os.path.exists(ud.clonedir):
338            return True
339        if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d):
340            return True
341        for name in ud.names:
342            if not self._contains_ref(ud, d, name, ud.clonedir):
343                return True
344        return False
345
346    def lfs_need_update(self, ud, d):
347        if self.clonedir_need_update(ud, d):
348            return True
349
350        for name in ud.names:
351            if not self._lfs_objects_downloaded(ud, d, name, ud.clonedir):
352                return True
353        return False
354
355    def clonedir_need_shallow_revs(self, ud, d):
356        for rev in ud.shallow_revs:
357            try:
358                runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir)
359            except bb.fetch2.FetchError:
360                return rev
361        return None
362
363    def shallow_tarball_need_update(self, ud):
364        return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow)
365
366    def tarball_need_update(self, ud):
367        return ud.write_tarballs and not os.path.exists(ud.fullmirror)
368
369    def try_premirror(self, ud, d):
370        # If we don't do this, updating an existing checkout with only premirrors
371        # is not possible
372        if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
373            return True
374        # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0
375        # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then
376        # we need to try premirrors first as using upstream is destined to fail.
377        if not trusted_network(d, ud.url):
378            return True
379        # the following check is to ensure incremental fetch in downloads, this is
380        # because the premirror might be old and does not contain the new rev required,
381        # and this will cause a total removal and new clone. So if we can reach to
382        # network, we prefer upstream over premirror, though the premirror might contain
383        # the new rev.
384        if os.path.exists(ud.clonedir):
385            return False
386        return True
387
388    def download(self, ud, d):
389        """Fetch url"""
390
391        # A current clone is preferred to either tarball, a shallow tarball is
392        # preferred to an out of date clone, and a missing clone will use
393        # either tarball.
394        if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
395            ud.localpath = ud.fullshallow
396            return
397        elif os.path.exists(ud.fullmirror) and self.need_update(ud, d):
398            if not os.path.exists(ud.clonedir):
399                bb.utils.mkdirhier(ud.clonedir)
400                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
401            else:
402                tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
403                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
404                output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
405                if 'mirror' in output:
406                    runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir)
407                runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir)
408                fetch_cmd = "LANG=C %s fetch -f --update-head-ok  --progress mirror " % (ud.basecmd)
409                runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
410        repourl = self._get_repo_url(ud)
411
412        needs_clone = False
413        if os.path.exists(ud.clonedir):
414            # The directory may exist, but not be the top level of a bare git
415            # repository in which case it needs to be deleted and re-cloned.
416            try:
417                # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel
418                output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir)
419                toplevel = output.rstrip()
420
421                if not bb.utils.path_is_descendant(toplevel, ud.clonedir):
422                    logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir)
423                    needs_clone = True
424            except bb.fetch2.FetchError as e:
425                logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e)
426                needs_clone = True
427            except FileNotFoundError as e:
428                logger.warning("%s", e)
429                needs_clone = True
430
431            if needs_clone:
432                shutil.rmtree(ud.clonedir)
433        else:
434            needs_clone = True
435
436        # If the repo still doesn't exist, fallback to cloning it
437        if needs_clone:
438            # We do this since git will use a "-l" option automatically for local urls where possible,
439            # but it doesn't work when git/objects is a symlink, only works when it is a directory.
440            if repourl.startswith("file://"):
441                repourl_path = repourl[7:]
442                objects = os.path.join(repourl_path, 'objects')
443                if os.path.isdir(objects) and not os.path.islink(objects):
444                    repourl = repourl_path
445            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
446            if ud.proto.lower() != 'file':
447                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
448            progresshandler = GitProgressHandler(d)
449            runfetchcmd(clone_cmd, d, log=progresshandler)
450
451        # Update the checkout if needed
452        if self.clonedir_need_update(ud, d):
453            output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
454            if "origin" in output:
455              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
456
457            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
458
459            if ud.nobranch:
460                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
461            else:
462                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl))
463            if ud.proto.lower() != 'file':
464                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
465            progresshandler = GitProgressHandler(d)
466            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
467            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
468            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
469            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
470            try:
471                os.unlink(ud.fullmirror)
472            except OSError as exc:
473                if exc.errno != errno.ENOENT:
474                    raise
475
476        for name in ud.names:
477            if not self._contains_ref(ud, d, name, ud.clonedir):
478                raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
479
480        if ud.shallow and ud.write_shallow_tarballs:
481            missing_rev = self.clonedir_need_shallow_revs(ud, d)
482            if missing_rev:
483                raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
484
485        if self.lfs_need_update(ud, d):
486            # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
487            # of all LFS blobs needed at the srcrev.
488            #
489            # It would be nice to just do this inline here by running 'git-lfs fetch'
490            # on the bare clonedir, but that operation requires a working copy on some
491            # releases of Git LFS.
492            with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
493                # Do the checkout. This implicitly involves a Git LFS fetch.
494                Git.unpack(self, ud, tmpdir, d)
495
496                # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into
497                # the bare clonedir.
498                #
499                # As this procedure is invoked repeatedly on incremental fetches as
500                # a recipe's SRCREV is bumped throughout its lifetime, this will
501                # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs
502                # corresponding to all the blobs reachable from the different revs
503                # fetched across time.
504                #
505                # Only do this if the unpack resulted in a .git/lfs directory being
506                # created; this only happens if at least one blob needed to be
507                # downloaded.
508                if os.path.exists(os.path.join(ud.destdir, ".git", "lfs")):
509                    runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/.git" % ud.destdir)
510
511    def build_mirror_data(self, ud, d):
512
513        # Create as a temp file and move atomically into position to avoid races
514        @contextmanager
515        def create_atomic(filename):
516            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
517            try:
518                yield tfile
519                umask = os.umask(0o666)
520                os.umask(umask)
521                os.chmod(tfile, (0o666 & ~umask))
522                os.rename(tfile, filename)
523            finally:
524                os.close(fd)
525
526        if ud.shallow and ud.write_shallow_tarballs:
527            if not os.path.exists(ud.fullshallow):
528                if os.path.islink(ud.fullshallow):
529                    os.unlink(ud.fullshallow)
530                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
531                shallowclone = os.path.join(tempdir, 'git')
532                try:
533                    self.clone_shallow_local(ud, shallowclone, d)
534
535                    logger.info("Creating tarball of git repository")
536                    with create_atomic(ud.fullshallow) as tfile:
537                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
538                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
539                finally:
540                    bb.utils.remove(tempdir, recurse=True)
541        elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
542            if os.path.islink(ud.fullmirror):
543                os.unlink(ud.fullmirror)
544
545            logger.info("Creating tarball of git repository")
546            with create_atomic(ud.fullmirror) as tfile:
547                mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
548                        quiet=True, workdir=ud.clonedir)
549                runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
550                        % (tfile, mtime), d, workdir=ud.clonedir)
551            runfetchcmd("touch %s.done" % ud.fullmirror, d)
552
553    def clone_shallow_local(self, ud, dest, d):
554        """Clone the repo and make it shallow.
555
556        The upstream url of the new clone isn't set at this time, as it'll be
557        set correctly when unpacked."""
558        runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
559
560        to_parse, shallow_branches = [], []
561        for name in ud.names:
562            revision = ud.revisions[name]
563            depth = ud.shallow_depths[name]
564            if depth:
565                to_parse.append('%s~%d^{}' % (revision, depth - 1))
566
567            # For nobranch, we need a ref, otherwise the commits will be
568            # removed, and for non-nobranch, we truncate the branch to our
569            # srcrev, to avoid keeping unnecessary history beyond that.
570            branch = ud.branches[name]
571            if ud.nobranch:
572                ref = "refs/shallow/%s" % name
573            elif ud.bareclone:
574                ref = "refs/heads/%s" % branch
575            else:
576                ref = "refs/remotes/origin/%s" % branch
577
578            shallow_branches.append(ref)
579            runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
580
581        # Map srcrev+depths to revisions
582        parsed_depths = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest)
583
584        # Resolve specified revisions
585        parsed_revs = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join('"%s^{}"' % r for r in ud.shallow_revs)), d, workdir=dest)
586        shallow_revisions = parsed_depths.splitlines() + parsed_revs.splitlines()
587
588        # Apply extra ref wildcards
589        all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
590                               d, workdir=dest).splitlines()
591        for r in ud.shallow_extra_refs:
592            if not ud.bareclone:
593                r = r.replace('refs/heads/', 'refs/remotes/origin/')
594
595            if '*' in r:
596                matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
597                shallow_branches.extend(matches)
598            else:
599                shallow_branches.append(r)
600
601        # Make the repository shallow
602        shallow_cmd = [self.make_shallow_path, '-s']
603        for b in shallow_branches:
604            shallow_cmd.append('-r')
605            shallow_cmd.append(b)
606        shallow_cmd.extend(shallow_revisions)
607        runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
608
609    def unpack(self, ud, destdir, d):
610        """ unpack the downloaded src to destdir"""
611
612        subdir = ud.parm.get("subdir")
613        subpath = ud.parm.get("subpath")
614        readpathspec = ""
615        def_destsuffix = "git/"
616
617        if subpath:
618            readpathspec = ":%s" % subpath
619            def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
620
621        if subdir:
622            # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
623            if os.path.isabs(subdir):
624                if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
625                    raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
626                destdir = subdir
627            else:
628                destdir = os.path.join(destdir, subdir)
629            def_destsuffix = ""
630
631        destsuffix = ud.parm.get("destsuffix", def_destsuffix)
632        destdir = ud.destdir = os.path.join(destdir, destsuffix)
633        if os.path.exists(destdir):
634            bb.utils.prunedir(destdir)
635        if not ud.bareclone:
636            ud.unpack_tracer.unpack("git", destdir)
637
638        need_lfs = self._need_lfs(ud)
639
640        if not need_lfs:
641            ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
642
643        source_found = False
644        source_error = []
645
646        clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
647        if clonedir_is_up_to_date:
648            runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
649            source_found = True
650        else:
651            source_error.append("clone directory not available or not up to date: " + ud.clonedir)
652
653        if not source_found:
654            if ud.shallow:
655                if os.path.exists(ud.fullshallow):
656                    bb.utils.mkdirhier(destdir)
657                    runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
658                    source_found = True
659                else:
660                    source_error.append("shallow clone not available: " + ud.fullshallow)
661            else:
662                source_error.append("shallow clone not enabled")
663
664        if not source_found:
665            raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
666
667        repourl = self._get_repo_url(ud)
668        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
669
670        if self._contains_lfs(ud, d, destdir):
671            if need_lfs and not self._find_git_lfs(d):
672                raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl))
673            elif not need_lfs:
674                bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
675            else:
676                runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir)
677
678        if not ud.nocheckout:
679            if subpath:
680                runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d,
681                            workdir=destdir)
682                runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
683            elif not ud.nobranch:
684                branchname =  ud.branches[ud.names[0]]
685                runfetchcmd("%s checkout -B %s %s" % (ud.basecmd, branchname, \
686                            ud.revisions[ud.names[0]]), d, workdir=destdir)
687                runfetchcmd("%s branch %s --set-upstream-to origin/%s" % (ud.basecmd, branchname, \
688                            branchname), d, workdir=destdir)
689            else:
690                runfetchcmd("%s checkout %s" % (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=destdir)
691
692        return True
693
694    def clean(self, ud, d):
695        """ clean the git directory """
696
697        to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"]
698        # The localpath is a symlink to clonedir when it is cloned from a
699        # mirror, so remove both of them.
700        if os.path.islink(ud.localpath):
701            clonedir = os.path.realpath(ud.localpath)
702            to_remove.append(clonedir)
703
704        for r in to_remove:
705            if os.path.exists(r):
706                bb.note('Removing %s' % r)
707                bb.utils.remove(r, True)
708
709    def supports_srcrev(self):
710        return True
711
712    def _contains_ref(self, ud, d, name, wd):
713        cmd = ""
714        if ud.nobranch:
715            cmd = "%s log --pretty=oneline -n 1 %s -- 2> /dev/null | wc -l" % (
716                ud.basecmd, ud.revisions[name])
717        else:
718            cmd =  "%s branch --contains %s --list %s 2> /dev/null | wc -l" % (
719                ud.basecmd, ud.revisions[name], ud.branches[name])
720        try:
721            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
722        except bb.fetch2.FetchError:
723            return False
724        if len(output.split()) > 1:
725            raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
726        return output.split()[0] != "0"
727
728    def _lfs_objects_downloaded(self, ud, d, name, wd):
729        """
730        Verifies whether the LFS objects for requested revisions have already been downloaded
731        """
732        # Bail out early if this repository doesn't use LFS
733        if not self._need_lfs(ud) or not self._contains_lfs(ud, d, wd):
734            return True
735
736        # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file
737        # existence.
738        # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
739        cmd = "%s lfs ls-files -l %s" \
740                % (ud.basecmd, ud.revisions[name])
741        output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip()
742        # Do not do any further matching if no objects are managed by LFS
743        if not output:
744            return True
745
746        # Match all lines beginning with the hexadecimal OID
747        oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)")
748        for line in output.split("\n"):
749            oid = re.search(oid_regex, line)
750            if not oid:
751                bb.warn("git lfs ls-files output '%s' did not match expected format." % line)
752            if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))):
753                return False
754
755        return True
756
757    def _need_lfs(self, ud):
758        return ud.parm.get("lfs", "1") == "1"
759
760    def _contains_lfs(self, ud, d, wd):
761        """
762        Check if the repository has 'lfs' (large file) content
763        """
764
765        if ud.nobranch:
766            # If no branch is specified, use the current git commit
767            refname = self._build_revision(ud, d, ud.names[0])
768        elif wd == ud.clonedir:
769            # The bare clonedir doesn't use the remote names; it has the branch immediately.
770            refname = ud.branches[ud.names[0]]
771        else:
772            refname = "origin/%s" % ud.branches[ud.names[0]]
773
774        cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
775            ud.basecmd, refname)
776
777        try:
778            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
779            if int(output) > 0:
780                return True
781        except (bb.fetch2.FetchError,ValueError):
782            pass
783        return False
784
785    def _find_git_lfs(self, d):
786        """
787        Return True if git-lfs can be found, False otherwise.
788        """
789        import shutil
790        return shutil.which("git-lfs", path=d.getVar('PATH')) is not None
791
792    def _get_repo_url(self, ud):
793        """
794        Return the repository URL
795        """
796        # Note that we do not support passwords directly in the git urls. There are several
797        # reasons. SRC_URI can be written out to things like buildhistory and people don't
798        # want to leak passwords like that. Its also all too easy to share metadata without
799        # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
800        # alternatives so we will not take patches adding password support here.
801        if ud.user:
802            username = ud.user + '@'
803        else:
804            username = ""
805        return "%s://%s%s%s" % (ud.proto, username, ud.host, ud.path)
806
807    def _revision_key(self, ud, d, name):
808        """
809        Return a unique key for the url
810        """
811        # Collapse adjacent slashes
812        return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
813
814    def _lsremote(self, ud, d, search):
815        """
816        Run git ls-remote with the specified search string
817        """
818        # Prevent recursion e.g. in OE if SRCPV is in PV, PV is in WORKDIR,
819        # and WORKDIR is in PATH (as a result of RSS), our call to
820        # runfetchcmd() exports PATH so this function will get called again (!)
821        # In this scenario the return call of the function isn't actually
822        # important - WORKDIR isn't needed in PATH to call git ls-remote
823        # anyway.
824        if d.getVar('_BB_GIT_IN_LSREMOTE', False):
825            return ''
826        d.setVar('_BB_GIT_IN_LSREMOTE', '1')
827        try:
828            repourl = self._get_repo_url(ud)
829            cmd = "%s ls-remote %s %s" % \
830                (ud.basecmd, shlex.quote(repourl), search)
831            if ud.proto.lower() != 'file':
832                bb.fetch2.check_network_access(d, cmd, repourl)
833            output = runfetchcmd(cmd, d, True)
834            if not output:
835                raise bb.fetch2.FetchError("The command %s gave empty output unexpectedly" % cmd, ud.url)
836        finally:
837            d.delVar('_BB_GIT_IN_LSREMOTE')
838        return output
839
840    def _latest_revision(self, ud, d, name):
841        """
842        Compute the HEAD revision for the url
843        """
844        if not d.getVar("__BBSRCREV_SEEN"):
845            raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev[name], ud.host+ud.path))
846
847        # Ensure we mark as not cached
848        bb.fetch2.mark_recipe_nocache(d)
849
850        output = self._lsremote(ud, d, "")
851        # Tags of the form ^{} may not work, need to fallback to other form
852        if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
853            head = ud.unresolvedrev[name]
854            tag = ud.unresolvedrev[name]
855        else:
856            head = "refs/heads/%s" % ud.unresolvedrev[name]
857            tag = "refs/tags/%s" % ud.unresolvedrev[name]
858        for s in [head, tag + "^{}", tag]:
859            for l in output.strip().split('\n'):
860                sha1, ref = l.split()
861                if s == ref:
862                    return sha1
863        raise bb.fetch2.FetchError("Unable to resolve '%s' in upstream git repository in git ls-remote output for %s" % \
864            (ud.unresolvedrev[name], ud.host+ud.path))
865
866    def latest_versionstring(self, ud, d):
867        """
868        Compute the latest release name like "x.y.x" in "x.y.x+gitHASH"
869        by searching through the tags output of ls-remote, comparing
870        versions and returning the highest match.
871        """
872        pupver = ('', '')
873
874        try:
875            output = self._lsremote(ud, d, "refs/tags/*")
876        except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
877            bb.note("Could not list remote: %s" % str(e))
878            return pupver
879
880        rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)")
881        pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
882        nonrel_re = re.compile(r"(alpha|beta|rc|final)+")
883
884        verstring = ""
885        for line in output.split("\n"):
886            if not line:
887                break
888
889            m = rev_tag_re.match(line)
890            if not m:
891                continue
892
893            (revision, tag) = m.groups()
894
895            # Ignore non-released branches
896            if nonrel_re.search(tag):
897                continue
898
899            # search for version in the line
900            m = pver_re.search(tag)
901            if not m:
902                continue
903
904            pver = m.group('pver').replace("_", ".")
905
906            if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0:
907                continue
908
909            verstring = pver
910            pupver = (verstring, revision)
911
912        return pupver
913
914    def _build_revision(self, ud, d, name):
915        return ud.revisions[name]
916
917    def gitpkgv_revision(self, ud, d, name):
918        """
919        Return a sortable revision number by counting commits in the history
920        Based on gitpkgv.bblass in meta-openembedded
921        """
922        rev = self._build_revision(ud, d, name)
923        localpath = ud.localpath
924        rev_file = os.path.join(localpath, "oe-gitpkgv_" + rev)
925        if not os.path.exists(localpath):
926            commits = None
927        else:
928            if not os.path.exists(rev_file) or not os.path.getsize(rev_file):
929                from pipes import quote
930                commits = bb.fetch2.runfetchcmd(
931                        "git rev-list %s -- | wc -l" % quote(rev),
932                        d, quiet=True).strip().lstrip('0')
933                if commits:
934                    open(rev_file, "w").write("%d\n" % int(commits))
935            else:
936                commits = open(rev_file, "r").readline(128).strip()
937        if commits:
938            return False, "%s+%s" % (commits, rev[:7])
939        else:
940            return True, str(rev)
941
942    def checkstatus(self, fetch, ud, d):
943        try:
944            self._lsremote(ud, d, "")
945            return True
946        except bb.fetch2.FetchError:
947            return False
948