xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/git.py (revision c9537f57ab488bf5d90132917b0184e2527970a5)
1"""
2BitBake 'Fetch' git implementation
3
4git fetcher support the SRC_URI with format of:
5SRC_URI = "git://some.host/somepath;OptionA=xxx;OptionB=xxx;..."
6
7Supported SRC_URI options are:
8
9- branch
10   The git branch to retrieve from. The default is "master"
11
12- tag
13    The git tag to retrieve. The default is "master"
14
15- protocol
16   The method to use to access the repository. Common options are "git",
17   "http", "https", "file", "ssh" and "rsync". The default is "git".
18
19- rebaseable
20   rebaseable indicates that the upstream git repo may rebase in the future,
21   and current revision may disappear from upstream repo. This option will
22   remind fetcher to preserve local cache carefully for future use.
23   The default value is "0", set rebaseable=1 for rebaseable git repo.
24
25- nocheckout
26   Don't checkout source code when unpacking. set this option for the recipe
27   who has its own routine to checkout code.
28   The default is "0", set nocheckout=1 if needed.
29
30- bareclone
31   Create a bare clone of the source code and don't checkout the source code
32   when unpacking. Set this option for the recipe who has its own routine to
33   checkout code and tracking branch requirements.
34   The default is "0", set bareclone=1 if needed.
35
36- nobranch
37   Don't check the SHA validation for branch. set this option for the recipe
38   referring to commit which is valid in any namespace (branch, tag, ...)
39   instead of branch.
40   The default is "0", set nobranch=1 if needed.
41
42- subpath
43   Limit the checkout to a specific subpath of the tree.
44   By default, checkout the whole tree, set subpath=<path> if needed
45
46- destsuffix
47   The name of the path in which to place the checkout.
48   By default, the path is git/, set destsuffix=<suffix> if needed
49
50- usehead
51   For local git:// urls to use the current branch HEAD as the revision for use with
52   AUTOREV. Implies nobranch.
53
54- lfs
55    Enable the checkout to use LFS for large files. This will download all LFS files
56    in the download step, as the unpack step does not have network access.
57    The default is "1", set lfs=0 to skip.
58
59"""
60
61# Copyright (C) 2005 Richard Purdie
62#
63# SPDX-License-Identifier: GPL-2.0-only
64#
65
66import collections
67import errno
68import fnmatch
69import os
70import re
71import shlex
72import shutil
73import subprocess
74import tempfile
75import urllib
76import bb
77import bb.progress
78from contextlib import contextmanager
79from   bb.fetch2 import FetchMethod
80from   bb.fetch2 import runfetchcmd
81from   bb.fetch2 import logger
82from   bb.fetch2 import trusted_network
83
84
85sha1_re = re.compile(r'^[0-9a-f]{40}$')
86slash_re = re.compile(r"/+")
87
88class GitProgressHandler(bb.progress.LineFilterProgressHandler):
89    """Extract progress information from git output"""
90    def __init__(self, d):
91        self._buffer = ''
92        self._count = 0
93        super(GitProgressHandler, self).__init__(d)
94        # Send an initial progress event so the bar gets shown
95        self._fire_progress(-1)
96
97    def write(self, string):
98        self._buffer += string
99        stages = ['Counting objects', 'Compressing objects', 'Receiving objects', 'Resolving deltas']
100        stage_weights = [0.2, 0.05, 0.5, 0.25]
101        stagenum = 0
102        for i, stage in reversed(list(enumerate(stages))):
103            if stage in self._buffer:
104                stagenum = i
105                self._buffer = ''
106                break
107        self._status = stages[stagenum]
108        percs = re.findall(r'(\d+)%', string)
109        if percs:
110            progress = int(round((int(percs[-1]) * stage_weights[stagenum]) + (sum(stage_weights[:stagenum]) * 100)))
111            rates = re.findall(r'([\d.]+ [a-zA-Z]*/s+)', string)
112            if rates:
113                rate = rates[-1]
114            else:
115                rate = None
116            self.update(progress, rate)
117        else:
118            if stagenum == 0:
119                percs = re.findall(r': (\d+)', string)
120                if percs:
121                    count = int(percs[-1])
122                    if count > self._count:
123                        self._count = count
124                        self._fire_progress(-count)
125        super(GitProgressHandler, self).write(string)
126
127
128class Git(FetchMethod):
129    bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
130    make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
131
132    """Class to fetch a module or modules from git repositories"""
133    def init(self, d):
134        pass
135
136    def supports(self, ud, d):
137        """
138        Check to see if a given url can be fetched with git.
139        """
140        return ud.type in ['git']
141
142    def supports_checksum(self, urldata):
143        return False
144
145    def cleanup_upon_failure(self):
146        return False
147
148    def urldata_init(self, ud, d):
149        """
150        init git specific variable within url data
151        so that the git method like latest_revision() can work
152        """
153        if 'protocol' in ud.parm:
154            ud.proto = ud.parm['protocol']
155        elif not ud.host:
156            ud.proto = 'file'
157        else:
158            ud.proto = "git"
159        if ud.host == "github.com" and ud.proto == "git":
160            # github stopped supporting git protocol
161            # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
162            ud.proto = "https"
163            bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
164
165        if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
166            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
167
168        ud.nocheckout = ud.parm.get("nocheckout","0") == "1"
169
170        ud.rebaseable = ud.parm.get("rebaseable","0") == "1"
171
172        ud.nobranch = ud.parm.get("nobranch","0") == "1"
173
174        # usehead implies nobranch
175        ud.usehead = ud.parm.get("usehead","0") == "1"
176        if ud.usehead:
177            if ud.proto != "file":
178                 raise bb.fetch2.ParameterError("The usehead option is only for use with local ('protocol=file') git repositories", ud.url)
179            ud.nobranch = 1
180
181        # bareclone implies nocheckout
182        ud.bareclone = ud.parm.get("bareclone","0") == "1"
183        if ud.bareclone:
184            ud.nocheckout = 1
185
186        ud.unresolvedrev = ""
187        ud.branch = ud.parm.get("branch", "")
188        if not ud.branch and not ud.nobranch:
189            raise bb.fetch2.ParameterError("The url does not set any branch parameter or set nobranch=1.", ud.url)
190
191        ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
192
193        ud.cloneflags = "-n"
194        if not ud.noshared:
195            ud.cloneflags += " -s"
196        if ud.bareclone:
197            ud.cloneflags += " --mirror"
198
199        ud.shallow_skip_fast = False
200        ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
201        ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
202
203        depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
204        if depth_default is not None:
205            try:
206                depth_default = int(depth_default or 0)
207            except ValueError:
208                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
209            else:
210                if depth_default < 0:
211                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
212        else:
213            depth_default = 1
214        ud.shallow_depths = collections.defaultdict(lambda: depth_default)
215
216        revs_default = d.getVar("BB_GIT_SHALLOW_REVS")
217        ud.shallow_revs = []
218
219        ud.unresolvedrev = ud.branch
220
221        shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % ud.name)
222        if shallow_depth is not None:
223            try:
224                shallow_depth = int(shallow_depth or 0)
225            except ValueError:
226                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (ud.name, shallow_depth))
227            else:
228                if shallow_depth < 0:
229                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (ud.name, shallow_depth))
230                ud.shallow_depths[ud.name] = shallow_depth
231
232        revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % ud.name)
233        if revs is not None:
234            ud.shallow_revs.extend(revs.split())
235        elif revs_default is not None:
236            ud.shallow_revs.extend(revs_default.split())
237
238        if ud.shallow and not ud.shallow_revs and ud.shallow_depths[ud.name] == 0:
239            # Shallow disabled for this URL
240            ud.shallow = False
241
242        if ud.usehead:
243            # When usehead is set let's associate 'HEAD' with the unresolved
244            # rev of this repository. This will get resolved into a revision
245            # later. If an actual revision happens to have also been provided
246            # then this setting will be overridden.
247            ud.unresolvedrev = 'HEAD'
248
249        ud.basecmd = d.getVar("FETCHCMD_git") or "git -c gc.autoDetach=false -c core.pager=cat -c safe.bareRepository=all -c clone.defaultRemoteName=origin"
250
251        write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
252        ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
253        ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
254
255        ud.setup_revisions(d)
256
257        # Ensure any revision that doesn't look like a SHA-1 is translated into one
258        if not sha1_re.match(ud.revision or ''):
259            if ud.revision:
260                ud.unresolvedrev = ud.revision
261            ud.revision = self.latest_revision(ud, d, ud.name)
262
263        gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_').replace('(', '_').replace(')', '_'))
264        if gitsrcname.startswith('.'):
265            gitsrcname = gitsrcname[1:]
266
267        # For a rebaseable git repo, it is necessary to keep a mirror tar ball
268        # per revision, so that even if the revision disappears from the
269        # upstream repo in the future, the mirror will remain intact and still
270        # contain the revision
271        if ud.rebaseable:
272            gitsrcname = gitsrcname + '_' + ud.revision
273
274        dl_dir = d.getVar("DL_DIR")
275        gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
276        ud.clonedir = os.path.join(gitdir, gitsrcname)
277        ud.localfile = ud.clonedir
278
279        mirrortarball = 'git2_%s.tar.gz' % gitsrcname
280        ud.fullmirror = os.path.join(dl_dir, mirrortarball)
281        ud.mirrortarballs = [mirrortarball]
282        if ud.shallow:
283            tarballname = gitsrcname
284            if ud.bareclone:
285                tarballname = "%s_bare" % tarballname
286
287            if ud.shallow_revs:
288                tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
289
290            tarballname = "%s_%s" % (tarballname, ud.revision[:7])
291            depth = ud.shallow_depths[ud.name]
292            if depth:
293                tarballname = "%s-%s" % (tarballname, depth)
294
295            shallow_refs = []
296            if not ud.nobranch:
297                shallow_refs.append(ud.branch)
298            if ud.shallow_extra_refs:
299                shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
300            if shallow_refs:
301                tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
302
303            fetcher = self.__class__.__name__.lower()
304            ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
305            ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
306            ud.mirrortarballs.insert(0, ud.shallowtarball)
307
308    def localpath(self, ud, d):
309        return ud.clonedir
310
311    def need_update(self, ud, d):
312        return self.clonedir_need_update(ud, d) \
313                or self.shallow_tarball_need_update(ud) \
314                or self.tarball_need_update(ud) \
315                or self.lfs_need_update(ud, d)
316
317    def clonedir_need_update(self, ud, d):
318        if not os.path.exists(ud.clonedir):
319            return True
320        if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d):
321            return True
322        if not self._contains_ref(ud, d, ud.name, ud.clonedir):
323            return True
324        return False
325
326    def lfs_need_update(self, ud, d):
327        if not self._need_lfs(ud):
328            return False
329
330        if self.clonedir_need_update(ud, d):
331            return True
332
333        if not self._lfs_objects_downloaded(ud, d, ud.clonedir):
334            return True
335        return False
336
337    def clonedir_need_shallow_revs(self, ud, d):
338        for rev in ud.shallow_revs:
339            try:
340                runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir)
341            except bb.fetch2.FetchError:
342                return rev
343        return None
344
345    def shallow_tarball_need_update(self, ud):
346        return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow)
347
348    def tarball_need_update(self, ud):
349        return ud.write_tarballs and not os.path.exists(ud.fullmirror)
350
351    def update_mirror_links(self, ud, origud):
352        super().update_mirror_links(ud, origud)
353        # When using shallow mode, add a symlink to the original fullshallow
354        # path to ensure a valid symlink even in the `PREMIRRORS` case
355        if ud.shallow and not os.path.exists(origud.fullshallow):
356            self.ensure_symlink(ud.localpath, origud.fullshallow)
357
358    def try_premirror(self, ud, d):
359        # If we don't do this, updating an existing checkout with only premirrors
360        # is not possible
361        if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
362            return True
363        # If the url is not in trusted network, that is, BB_NO_NETWORK is set to 0
364        # and BB_ALLOWED_NETWORKS does not contain the host that ud.url uses, then
365        # we need to try premirrors first as using upstream is destined to fail.
366        if not trusted_network(d, ud.url):
367            return True
368        # the following check is to ensure incremental fetch in downloads, this is
369        # because the premirror might be old and does not contain the new rev required,
370        # and this will cause a total removal and new clone. So if we can reach to
371        # network, we prefer upstream over premirror, though the premirror might contain
372        # the new rev.
373        if os.path.exists(ud.clonedir):
374            return False
375        return True
376
377    def download(self, ud, d):
378        """Fetch url"""
379
380        # A current clone is preferred to either tarball, a shallow tarball is
381        # preferred to an out of date clone, and a missing clone will use
382        # either tarball.
383        if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
384            ud.localpath = ud.fullshallow
385            return
386        elif os.path.exists(ud.fullmirror) and self.need_update(ud, d):
387            if not os.path.exists(ud.clonedir):
388                bb.utils.mkdirhier(ud.clonedir)
389                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
390            else:
391                tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
392                runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=tmpdir)
393                output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
394                if 'mirror' in output:
395                    runfetchcmd("%s remote rm mirror" % ud.basecmd, d, workdir=ud.clonedir)
396                runfetchcmd("%s remote add --mirror=fetch mirror %s" % (ud.basecmd, tmpdir), d, workdir=ud.clonedir)
397                fetch_cmd = "LANG=C %s fetch -f --update-head-ok  --progress mirror " % (ud.basecmd)
398                runfetchcmd(fetch_cmd, d, workdir=ud.clonedir)
399        repourl = self._get_repo_url(ud)
400
401        needs_clone = False
402        if os.path.exists(ud.clonedir):
403            # The directory may exist, but not be the top level of a bare git
404            # repository in which case it needs to be deleted and re-cloned.
405            try:
406                # Since clones can be bare, use --absolute-git-dir instead of --show-toplevel
407                output = runfetchcmd("LANG=C %s rev-parse --absolute-git-dir" % ud.basecmd, d, workdir=ud.clonedir)
408                toplevel = output.rstrip()
409
410                if not bb.utils.path_is_descendant(toplevel, ud.clonedir):
411                    logger.warning("Top level directory '%s' is not a descendant of '%s'. Re-cloning", toplevel, ud.clonedir)
412                    needs_clone = True
413            except bb.fetch2.FetchError as e:
414                logger.warning("Unable to get top level for %s (not a git directory?): %s", ud.clonedir, e)
415                needs_clone = True
416            except FileNotFoundError as e:
417                logger.warning("%s", e)
418                needs_clone = True
419
420            if needs_clone:
421                shutil.rmtree(ud.clonedir)
422        else:
423            needs_clone = True
424
425        # If the repo still doesn't exist, fallback to cloning it
426        if needs_clone:
427            # We do this since git will use a "-l" option automatically for local urls where possible,
428            # but it doesn't work when git/objects is a symlink, only works when it is a directory.
429            if repourl.startswith("file://"):
430                repourl_path = repourl[7:]
431                objects = os.path.join(repourl_path, 'objects')
432                if os.path.isdir(objects) and not os.path.islink(objects):
433                    repourl = repourl_path
434            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
435            if ud.proto.lower() != 'file':
436                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
437            progresshandler = GitProgressHandler(d)
438
439            # Try creating a fast initial shallow clone
440            # Enabling ud.shallow_skip_fast will skip this
441            # If the Git error "Server does not allow request for unadvertised object"
442            # occurs, shallow_skip_fast is enabled automatically.
443            # This may happen if the Git server does not allow the request
444            # or if the Git client has issues with this functionality.
445            if ud.shallow and not ud.shallow_skip_fast:
446                try:
447                    self.clone_shallow_with_tarball(ud, d)
448                    # When the shallow clone has succeeded, use the shallow tarball
449                    ud.localpath = ud.fullshallow
450                    return
451                except:
452                    logger.warning("Creating fast initial shallow clone failed, try initial regular clone now.")
453
454            # When skipping fast initial shallow or the fast inital shallow clone failed:
455            # Try again with an initial regular clone
456            runfetchcmd(clone_cmd, d, log=progresshandler)
457
458        # Update the checkout if needed
459        if self.clonedir_need_update(ud, d):
460            output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
461            if "origin" in output:
462              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
463
464            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
465
466            if ud.nobranch:
467                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
468            else:
469                fetch_cmd = "LANG=C %s fetch -f --progress %s refs/heads/*:refs/heads/* refs/tags/*:refs/tags/*" % (ud.basecmd, shlex.quote(repourl))
470            if ud.proto.lower() != 'file':
471                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
472            progresshandler = GitProgressHandler(d)
473            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
474            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
475            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
476            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
477            try:
478                os.unlink(ud.fullmirror)
479            except OSError as exc:
480                if exc.errno != errno.ENOENT:
481                    raise
482
483        if not self._contains_ref(ud, d, ud.name, ud.clonedir):
484            raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revision, ud.branch))
485
486        if ud.shallow and ud.write_shallow_tarballs:
487            missing_rev = self.clonedir_need_shallow_revs(ud, d)
488            if missing_rev:
489                raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
490
491        if self.lfs_need_update(ud, d):
492            self.lfs_fetch(ud, d, ud.clonedir, ud.revision)
493
494    def lfs_fetch(self, ud, d, clonedir, revision, fetchall=False, progresshandler=None):
495        """Helper method for fetching Git LFS data"""
496        try:
497            if self._need_lfs(ud) and self._contains_lfs(ud, d, clonedir) and len(revision):
498                self._ensure_git_lfs(d, ud)
499
500                # Using worktree with the revision because .lfsconfig may exists
501                worktree_add_cmd = "%s worktree add wt %s" % (ud.basecmd, revision)
502                runfetchcmd(worktree_add_cmd, d, log=progresshandler, workdir=clonedir)
503                lfs_fetch_cmd = "%s lfs fetch %s" % (ud.basecmd, "--all" if fetchall else "")
504                runfetchcmd(lfs_fetch_cmd, d, log=progresshandler, workdir=(clonedir + "/wt"))
505                worktree_rem_cmd = "%s worktree remove -f wt" % ud.basecmd
506                runfetchcmd(worktree_rem_cmd, d, log=progresshandler, workdir=clonedir)
507        except:
508            logger.warning("Fetching LFS did not succeed.")
509
510    @contextmanager
511    def create_atomic(self, filename):
512        """Create as a temp file and move atomically into position to avoid races"""
513        fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
514        try:
515            yield tfile
516            umask = os.umask(0o666)
517            os.umask(umask)
518            os.chmod(tfile, (0o666 & ~umask))
519            os.rename(tfile, filename)
520        finally:
521            os.close(fd)
522
523    def build_mirror_data(self, ud, d):
524        if ud.shallow and ud.write_shallow_tarballs:
525            if not os.path.exists(ud.fullshallow):
526                if os.path.islink(ud.fullshallow):
527                    os.unlink(ud.fullshallow)
528                self.clone_shallow_with_tarball(ud, d)
529        elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
530            if os.path.islink(ud.fullmirror):
531                os.unlink(ud.fullmirror)
532
533            logger.info("Creating tarball of git repository")
534            with self.create_atomic(ud.fullmirror) as tfile:
535                mtime = runfetchcmd("{} log --all -1 --format=%cD".format(ud.basecmd), d,
536                        quiet=True, workdir=ud.clonedir)
537                runfetchcmd("tar -czf %s --owner oe:0 --group oe:0 --mtime \"%s\" ."
538                        % (tfile, mtime), d, workdir=ud.clonedir)
539            runfetchcmd("touch %s.done" % ud.fullmirror, d)
540
541    def clone_shallow_with_tarball(self, ud, d):
542        ret = False
543        tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
544        shallowclone = os.path.join(tempdir, 'git')
545        try:
546            try:
547                self.clone_shallow_local(ud, shallowclone, d)
548            except:
549                logger.warning("Fast shallow clone failed, try to skip fast mode now.")
550                bb.utils.remove(tempdir, recurse=True)
551                os.mkdir(tempdir)
552                ud.shallow_skip_fast = True
553                self.clone_shallow_local(ud, shallowclone, d)
554            logger.info("Creating tarball of git repository")
555            with self.create_atomic(ud.fullshallow) as tfile:
556                runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
557            runfetchcmd("touch %s.done" % ud.fullshallow, d)
558            ret = True
559        finally:
560            bb.utils.remove(tempdir, recurse=True)
561
562        return ret
563
564    def clone_shallow_local(self, ud, dest, d):
565        """
566        Shallow fetch from ud.clonedir (${DL_DIR}/git2/<gitrepo> by default):
567        - For BB_GIT_SHALLOW_DEPTH: git fetch --depth <depth> rev
568        - For BB_GIT_SHALLOW_REVS: git fetch --shallow-exclude=<revs> rev
569        """
570
571        progresshandler = GitProgressHandler(d)
572        repourl = self._get_repo_url(ud)
573        bb.utils.mkdirhier(dest)
574        init_cmd = "%s init -q" % ud.basecmd
575        if ud.bareclone:
576            init_cmd += " --bare"
577        runfetchcmd(init_cmd, d, workdir=dest)
578        # Use repourl when creating a fast initial shallow clone
579        # Prefer already existing full bare clones if available
580        if not ud.shallow_skip_fast and not os.path.exists(ud.clonedir):
581            remote = shlex.quote(repourl)
582        else:
583            remote = ud.clonedir
584        runfetchcmd("%s remote add origin %s" % (ud.basecmd, remote), d, workdir=dest)
585
586        # Check the histories which should be excluded
587        shallow_exclude = ''
588        for revision in ud.shallow_revs:
589            shallow_exclude += " --shallow-exclude=%s" % revision
590
591        revision = ud.revision
592        depth = ud.shallow_depths[ud.name]
593
594        # The --depth and --shallow-exclude can't be used together
595        if depth and shallow_exclude:
596            raise bb.fetch2.FetchError("BB_GIT_SHALLOW_REVS is set, but BB_GIT_SHALLOW_DEPTH is not 0.")
597
598        # For nobranch, we need a ref, otherwise the commits will be
599        # removed, and for non-nobranch, we truncate the branch to our
600        # srcrev, to avoid keeping unnecessary history beyond that.
601        branch = ud.branch
602        if ud.nobranch:
603            ref = "refs/shallow/%s" % ud.name
604        elif ud.bareclone:
605            ref = "refs/heads/%s" % branch
606        else:
607            ref = "refs/remotes/origin/%s" % branch
608
609        fetch_cmd = "%s fetch origin %s" % (ud.basecmd, revision)
610        if depth:
611            fetch_cmd += " --depth %s" % depth
612
613        if shallow_exclude:
614            fetch_cmd += shallow_exclude
615
616        # Advertise the revision for lower version git such as 2.25.1:
617        # error: Server does not allow request for unadvertised object.
618        # The ud.clonedir is a local temporary dir, will be removed when
619        # fetch is done, so we can do anything on it.
620        adv_cmd = 'git branch -f advertise-%s %s' % (revision, revision)
621        if ud.shallow_skip_fast:
622            runfetchcmd(adv_cmd, d, workdir=ud.clonedir)
623
624        runfetchcmd(fetch_cmd, d, workdir=dest)
625        runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
626        # Fetch Git LFS data
627        self.lfs_fetch(ud, d, dest, ud.revision)
628
629        # Apply extra ref wildcards
630        all_refs_remote = runfetchcmd("%s ls-remote origin 'refs/*'" % ud.basecmd, \
631                                        d, workdir=dest).splitlines()
632        all_refs = []
633        for line in all_refs_remote:
634            all_refs.append(line.split()[-1])
635        extra_refs = []
636        if 'tag' in ud.parm:
637            extra_refs.append(ud.parm['tag'])
638        for r in ud.shallow_extra_refs:
639            if not ud.bareclone:
640                r = r.replace('refs/heads/', 'refs/remotes/origin/')
641
642            if '*' in r:
643                matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
644                extra_refs.extend(matches)
645            else:
646                extra_refs.append(r)
647
648        for ref in extra_refs:
649            ref_fetch = ref.replace('refs/heads/', '').replace('refs/remotes/origin/', '').replace('refs/tags/', '')
650            runfetchcmd("%s fetch origin --depth 1 %s" % (ud.basecmd, ref_fetch), d, workdir=dest)
651            revision = runfetchcmd("%s rev-parse FETCH_HEAD" % ud.basecmd, d, workdir=dest)
652            runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
653
654        # The url is local ud.clonedir, set it to upstream one
655        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=dest)
656
657    def unpack(self, ud, destdir, d):
658        """ unpack the downloaded src to destdir"""
659
660        subdir = ud.parm.get("subdir")
661        subpath = ud.parm.get("subpath")
662        readpathspec = ""
663        def_destsuffix = "git/"
664
665        if subpath:
666            readpathspec = ":%s" % subpath
667            def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
668
669        if subdir:
670            # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
671            if os.path.isabs(subdir):
672                if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
673                    raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
674                destdir = subdir
675            else:
676                destdir = os.path.join(destdir, subdir)
677            def_destsuffix = ""
678
679        destsuffix = ud.parm.get("destsuffix", def_destsuffix)
680        destdir = ud.destdir = os.path.join(destdir, destsuffix)
681        if os.path.exists(destdir):
682            bb.utils.prunedir(destdir)
683        if not ud.bareclone:
684            ud.unpack_tracer.unpack("git", destdir)
685
686        need_lfs = self._need_lfs(ud)
687
688        if not need_lfs:
689            ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
690
691        source_found = False
692        source_error = []
693
694        clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
695        if clonedir_is_up_to_date:
696            runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
697            source_found = True
698        else:
699            source_error.append("clone directory not available or not up to date: " + ud.clonedir)
700
701        if not source_found:
702            if ud.shallow:
703                if os.path.exists(ud.fullshallow):
704                    bb.utils.mkdirhier(destdir)
705                    runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
706                    source_found = True
707                else:
708                    source_error.append("shallow clone not available: " + ud.fullshallow)
709            else:
710                source_error.append("shallow clone not enabled")
711
712        if not source_found:
713            raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
714
715        # If there is a tag parameter in the url and we also have a fixed srcrev, check the tag
716        # matches the revision
717        if 'tag' in ud.parm and sha1_re.match(ud.revision):
718            output = runfetchcmd("%s rev-list -n 1 %s" % (ud.basecmd, ud.parm['tag']), d, workdir=destdir)
719            output = output.strip()
720            if output != ud.revision:
721                # It is possible ud.revision is the revision on an annotated tag which won't match the output of rev-list
722                # If it resolves to the same thing there isn't a problem.
723                output2 = runfetchcmd("%s rev-list -n 1 %s" % (ud.basecmd, ud.revision), d, workdir=destdir)
724                output2 = output2.strip()
725                if output != output2:
726                    raise bb.fetch2.FetchError("The revision the git tag '%s' resolved to didn't match the SRCREV in use (%s vs %s)" % (ud.parm['tag'], output, ud.revision), ud.url)
727
728        repourl = self._get_repo_url(ud)
729        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
730
731        if self._contains_lfs(ud, d, destdir):
732            if not need_lfs:
733                bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
734            else:
735                self._ensure_git_lfs(d, ud)
736
737                runfetchcmd("%s lfs install --local" % ud.basecmd, d, workdir=destdir)
738
739        if not ud.nocheckout:
740            if subpath:
741                runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revision, readpathspec), d,
742                            workdir=destdir)
743                runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
744            elif not ud.nobranch:
745                branchname =  ud.branch
746                runfetchcmd("%s checkout -B %s %s" % (ud.basecmd, branchname, \
747                            ud.revision), d, workdir=destdir)
748                runfetchcmd("%s branch %s --set-upstream-to origin/%s" % (ud.basecmd, branchname, \
749                            branchname), d, workdir=destdir)
750            else:
751                runfetchcmd("%s checkout %s" % (ud.basecmd, ud.revision), d, workdir=destdir)
752
753        return True
754
755    def clean(self, ud, d):
756        """ clean the git directory """
757
758        to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"]
759        # The localpath is a symlink to clonedir when it is cloned from a
760        # mirror, so remove both of them.
761        if os.path.islink(ud.localpath):
762            clonedir = os.path.realpath(ud.localpath)
763            to_remove.append(clonedir)
764
765        # Remove shallow mirror tarball
766        if ud.shallow:
767            to_remove.append(ud.fullshallow)
768            to_remove.append(ud.fullshallow + ".done")
769
770        for r in to_remove:
771            if os.path.exists(r) or os.path.islink(r):
772                bb.note('Removing %s' % r)
773                bb.utils.remove(r, True)
774
775    def supports_srcrev(self):
776        return True
777
778    def _contains_ref(self, ud, d, name, wd):
779        cmd = ""
780        if ud.nobranch:
781            cmd = "%s log --pretty=oneline -n 1 %s -- 2> /dev/null | wc -l" % (
782                ud.basecmd, ud.revision)
783        else:
784            cmd =  "%s branch --contains %s --list %s 2> /dev/null | wc -l" % (
785                ud.basecmd, ud.revision, ud.branch)
786        try:
787            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
788        except bb.fetch2.FetchError:
789            return False
790        if len(output.split()) > 1:
791            raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
792        return output.split()[0] != "0"
793
794    def _lfs_objects_downloaded(self, ud, d, wd):
795        """
796        Verifies whether the LFS objects for requested revisions have already been downloaded
797        """
798        # Bail out early if this repository doesn't use LFS
799        if not self._contains_lfs(ud, d, wd):
800            return True
801
802        self._ensure_git_lfs(d, ud)
803
804        # The Git LFS specification specifies ([1]) the LFS folder layout so it should be safe to check for file
805        # existence.
806        # [1] https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
807        cmd = "%s lfs ls-files -l %s" \
808                % (ud.basecmd, ud.revision)
809        output = runfetchcmd(cmd, d, quiet=True, workdir=wd).rstrip()
810        # Do not do any further matching if no objects are managed by LFS
811        if not output:
812            return True
813
814        # Match all lines beginning with the hexadecimal OID
815        oid_regex = re.compile("^(([a-fA-F0-9]{2})([a-fA-F0-9]{2})[A-Fa-f0-9]+)")
816        for line in output.split("\n"):
817            oid = re.search(oid_regex, line)
818            if not oid:
819                bb.warn("git lfs ls-files output '%s' did not match expected format." % line)
820            if not os.path.exists(os.path.join(wd, "lfs", "objects", oid.group(2), oid.group(3), oid.group(1))):
821                return False
822
823        return True
824
825    def _need_lfs(self, ud):
826        return ud.parm.get("lfs", "1") == "1"
827
828    def _contains_lfs(self, ud, d, wd):
829        """
830        Check if the repository has 'lfs' (large file) content
831        """
832        cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
833            ud.basecmd, ud.revision)
834
835        try:
836            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
837            if int(output) > 0:
838                return True
839        except (bb.fetch2.FetchError,ValueError):
840            pass
841        return False
842
843    def _ensure_git_lfs(self, d, ud):
844        """
845        Ensures that git-lfs is available, raising a FetchError if it isn't.
846        """
847        if shutil.which("git-lfs", path=d.getVar('PATH')) is None:
848            raise bb.fetch2.FetchError(
849                "Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 "
850                "to ignore it)" % self._get_repo_url(ud))
851
852    def _get_repo_url(self, ud):
853        """
854        Return the repository URL
855        """
856        # Note that we do not support passwords directly in the git urls. There are several
857        # reasons. SRC_URI can be written out to things like buildhistory and people don't
858        # want to leak passwords like that. Its also all too easy to share metadata without
859        # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
860        # alternatives so we will not take patches adding password support here.
861        if ud.user:
862            username = ud.user + '@'
863        else:
864            username = ""
865        return "%s://%s%s%s" % (ud.proto, username, ud.host, urllib.parse.quote(ud.path))
866
867    def _revision_key(self, ud, d, name):
868        """
869        Return a unique key for the url
870        """
871        # Collapse adjacent slashes
872        return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev
873
874    def _lsremote(self, ud, d, search):
875        """
876        Run git ls-remote with the specified search string
877        """
878        # Prevent recursion e.g. in OE if SRCPV is in PV, PV is in WORKDIR,
879        # and WORKDIR is in PATH (as a result of RSS), our call to
880        # runfetchcmd() exports PATH so this function will get called again (!)
881        # In this scenario the return call of the function isn't actually
882        # important - WORKDIR isn't needed in PATH to call git ls-remote
883        # anyway.
884        if d.getVar('_BB_GIT_IN_LSREMOTE', False):
885            return ''
886        d.setVar('_BB_GIT_IN_LSREMOTE', '1')
887        try:
888            repourl = self._get_repo_url(ud)
889            cmd = "%s ls-remote %s %s" % \
890                (ud.basecmd, shlex.quote(repourl), search)
891            if ud.proto.lower() != 'file':
892                bb.fetch2.check_network_access(d, cmd, repourl)
893            output = runfetchcmd(cmd, d, True)
894            if not output:
895                raise bb.fetch2.FetchError("The command %s gave empty output unexpectedly" % cmd, ud.url)
896        finally:
897            d.delVar('_BB_GIT_IN_LSREMOTE')
898        return output
899
900    def _latest_revision(self, ud, d, name):
901        """
902        Compute the HEAD revision for the url
903        """
904        if not d.getVar("__BBSRCREV_SEEN"):
905            raise bb.fetch2.FetchError("Recipe uses a floating tag/branch '%s' for repo '%s' without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE)." % (ud.unresolvedrev, ud.host+ud.path))
906
907        # Ensure we mark as not cached
908        bb.fetch2.mark_recipe_nocache(d)
909
910        output = self._lsremote(ud, d, "")
911        # Tags of the form ^{} may not work, need to fallback to other form
912        if ud.unresolvedrev[:5] == "refs/" or ud.usehead:
913            head = ud.unresolvedrev
914            tag = ud.unresolvedrev
915        else:
916            head = "refs/heads/%s" % ud.unresolvedrev
917            tag = "refs/tags/%s" % ud.unresolvedrev
918        for s in [head, tag + "^{}", tag]:
919            for l in output.strip().split('\n'):
920                sha1, ref = l.split()
921                if s == ref:
922                    return sha1
923        raise bb.fetch2.FetchError("Unable to resolve '%s' in upstream git repository in git ls-remote output for %s" % \
924            (ud.unresolvedrev, ud.host+ud.path))
925
926    def latest_versionstring(self, ud, d):
927        """
928        Compute the latest release name like "x.y.x" in "x.y.x+gitHASH"
929        by searching through the tags output of ls-remote, comparing
930        versions and returning the highest match.
931        """
932        pupver = ('', '')
933
934        try:
935            output = self._lsremote(ud, d, "refs/tags/*")
936        except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
937            bb.note("Could not list remote: %s" % str(e))
938            return pupver
939
940        rev_tag_re = re.compile(r"([0-9a-f]{40})\s+refs/tags/(.*)")
941        pver_re = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
942        nonrel_re = re.compile(r"(alpha|beta|rc|final)+")
943
944        verstring = ""
945        for line in output.split("\n"):
946            if not line:
947                break
948
949            m = rev_tag_re.match(line)
950            if not m:
951                continue
952
953            (revision, tag) = m.groups()
954
955            # Ignore non-released branches
956            if nonrel_re.search(tag):
957                continue
958
959            # search for version in the line
960            m = pver_re.search(tag)
961            if not m:
962                continue
963
964            pver = m.group('pver').replace("_", ".")
965
966            if verstring and bb.utils.vercmp(("0", pver, ""), ("0", verstring, "")) < 0:
967                continue
968
969            verstring = pver
970            pupver = (verstring, revision)
971
972        return pupver
973
974    def _build_revision(self, ud, d, name):
975        return ud.revision
976
977    def gitpkgv_revision(self, ud, d, name):
978        """
979        Return a sortable revision number by counting commits in the history
980        Based on gitpkgv.bblass in meta-openembedded
981        """
982        rev = ud.revision
983        localpath = ud.localpath
984        rev_file = os.path.join(localpath, "oe-gitpkgv_" + rev)
985        if not os.path.exists(localpath):
986            commits = None
987        else:
988            if not os.path.exists(rev_file) or not os.path.getsize(rev_file):
989                commits = bb.fetch2.runfetchcmd(
990                        "git rev-list %s -- | wc -l" % shlex.quote(rev),
991                        d, quiet=True).strip().lstrip('0')
992                if commits:
993                    open(rev_file, "w").write("%d\n" % int(commits))
994            else:
995                commits = open(rev_file, "r").readline(128).strip()
996        if commits:
997            return False, "%s+%s" % (commits, rev[:7])
998        else:
999            return True, str(rev)
1000
1001    def checkstatus(self, fetch, ud, d):
1002        try:
1003            self._lsremote(ud, d, "")
1004            return True
1005        except bb.fetch2.FetchError:
1006            return False
1007