xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/git.py (revision 09696fc3)
1"""
2BitBake 'Fetch' git implementation
3
4git fetcher support the SRC_URI with format of:
5SRC_URI = "git://some.host/somepath;OptionA=xxx;OptionB=xxx;..."
6
7Supported SRC_URI options are:
8
9- branch
10   The git branch to retrieve from. The default is "master"
11
12   This option also supports multiple branch fetching, with branches
13   separated by commas.  In multiple branches case, the name option
14   must have the same number of names to match the branches, which is
15   used to specify the SRC_REV for the branch
16   e.g:
17   SRC_URI="git://some.host/somepath;branch=branchX,branchY;name=nameX,nameY"
18   SRCREV_nameX = "xxxxxxxxxxxxxxxxxxxx"
19   SRCREV_nameY = "YYYYYYYYYYYYYYYYYYYY"
20
21- tag
22    The git tag to retrieve. The default is "master"
23
24- protocol
25   The method to use to access the repository. Common options are "git",
26   "http", "https", "file", "ssh" and "rsync". The default is "git".
27
28- rebaseable
29   rebaseable indicates that the upstream git repo may rebase in the future,
30   and current revision may disappear from upstream repo. This option will
31   remind fetcher to preserve local cache carefully for future use.
32   The default value is "0", set rebaseable=1 for rebaseable git repo.
33
34- nocheckout
35   Don't checkout source code when unpacking. set this option for the recipe
36   who has its own routine to checkout code.
37   The default is "0", set nocheckout=1 if needed.
38
39- bareclone
40   Create a bare clone of the source code and don't checkout the source code
41   when unpacking. Set this option for the recipe who has its own routine to
42   checkout code and tracking branch requirements.
43   The default is "0", set bareclone=1 if needed.
44
45- nobranch
46   Don't check the SHA validation for branch. set this option for the recipe
47   referring to commit which is valid in tag instead of branch.
48   The default is "0", set nobranch=1 if needed.
49
50- usehead
51   For local git:// urls to use the current branch HEAD as the revision for use with
52   AUTOREV. Implies nobranch.
53
54"""
55
56# Copyright (C) 2005 Richard Purdie
57#
58# SPDX-License-Identifier: GPL-2.0-only
59#
60
61import collections
62import errno
63import fnmatch
64import os
65import re
66import shlex
67import subprocess
68import tempfile
69import bb
70import bb.progress
71from contextlib import contextmanager
72from   bb.fetch2 import FetchMethod
73from   bb.fetch2 import runfetchcmd
74from   bb.fetch2 import logger
75
76
77class GitProgressHandler(bb.progress.LineFilterProgressHandler):
78    """Extract progress information from git output"""
79    def __init__(self, d):
80        self._buffer = ''
81        self._count = 0
82        super(GitProgressHandler, self).__init__(d)
83        # Send an initial progress event so the bar gets shown
84        self._fire_progress(-1)
85
86    def write(self, string):
87        self._buffer += string
88        stages = ['Counting objects', 'Compressing objects', 'Receiving objects', 'Resolving deltas']
89        stage_weights = [0.2, 0.05, 0.5, 0.25]
90        stagenum = 0
91        for i, stage in reversed(list(enumerate(stages))):
92            if stage in self._buffer:
93                stagenum = i
94                self._buffer = ''
95                break
96        self._status = stages[stagenum]
97        percs = re.findall(r'(\d+)%', string)
98        if percs:
99            progress = int(round((int(percs[-1]) * stage_weights[stagenum]) + (sum(stage_weights[:stagenum]) * 100)))
100            rates = re.findall(r'([\d.]+ [a-zA-Z]*/s+)', string)
101            if rates:
102                rate = rates[-1]
103            else:
104                rate = None
105            self.update(progress, rate)
106        else:
107            if stagenum == 0:
108                percs = re.findall(r': (\d+)', string)
109                if percs:
110                    count = int(percs[-1])
111                    if count > self._count:
112                        self._count = count
113                        self._fire_progress(-count)
114        super(GitProgressHandler, self).write(string)
115
116
117class Git(FetchMethod):
118    bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
119    make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
120
121    """Class to fetch a module or modules from git repositories"""
122    def init(self, d):
123        pass
124
125    def supports(self, ud, d):
126        """
127        Check to see if a given url can be fetched with git.
128        """
129        return ud.type in ['git']
130
131    def supports_checksum(self, urldata):
132        return False
133
134    def urldata_init(self, ud, d):
135        """
136        init git specific variable within url data
137        so that the git method like latest_revision() can work
138        """
139        if 'protocol' in ud.parm:
140            ud.proto = ud.parm['protocol']
141        elif not ud.host:
142            ud.proto = 'file'
143        else:
144            ud.proto = "git"
145        if ud.host == "github.com" and ud.proto == "git":
146            # github stopped supporting git protocol
147            # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
148            ud.proto = "https"
149            bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
150
151        if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
152            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
153
154        ud.nocheckout = ud.parm.get("nocheckout","0") == "1"
155
156        ud.rebaseable = ud.parm.get("rebaseable","0") == "1"
157
158        ud.nobranch = ud.parm.get("nobranch","0") == "1"
159
160        # usehead implies nobranch
161        ud.usehead = ud.parm.get("usehead","0") == "1"
162        if ud.usehead:
163            if ud.proto != "file":
164                 raise bb.fetch2.ParameterError("The usehead option is only for use with local ('protocol=file') git repositories", ud.url)
165            ud.nobranch = 1
166
167        # bareclone implies nocheckout
168        ud.bareclone = ud.parm.get("bareclone","0") == "1"
169        if ud.bareclone:
170            ud.nocheckout = 1
171
172        ud.unresolvedrev = {}
173        branches = ud.parm.get("branch", "").split(',')
174        if branches == [""] and not ud.nobranch:
175            bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url)
176            branches = ["master"]
177        if len(branches) != len(ud.names):
178            raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
179
180        ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
181
182        ud.cloneflags = "-n"
183        if not ud.noshared:
184            ud.cloneflags += " -s"
185        if ud.bareclone:
186            ud.cloneflags += " --mirror"
187
188        ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
189        ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
190
191        depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
192        if depth_default is not None:
193            try:
194                depth_default = int(depth_default or 0)
195            except ValueError:
196                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
197            else:
198                if depth_default < 0:
199                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
200        else:
201            depth_default = 1
202        ud.shallow_depths = collections.defaultdict(lambda: depth_default)
203
204        revs_default = d.getVar("BB_GIT_SHALLOW_REVS")
205        ud.shallow_revs = []
206        ud.branches = {}
207        for pos, name in enumerate(ud.names):
208            branch = branches[pos]
209            ud.branches[name] = branch
210            ud.unresolvedrev[name] = branch
211
212            shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % name)
213            if shallow_depth is not None:
214                try:
215                    shallow_depth = int(shallow_depth or 0)
216                except ValueError:
217                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
218                else:
219                    if shallow_depth < 0:
220                        raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
221                    ud.shallow_depths[name] = shallow_depth
222
223            revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % name)
224            if revs is not None:
225                ud.shallow_revs.extend(revs.split())
226            elif revs_default is not None:
227                ud.shallow_revs.extend(revs_default.split())
228
229        if (ud.shallow and
230                not ud.shallow_revs and
231                all(ud.shallow_depths[n] == 0 for n in ud.names)):
232            # Shallow disabled for this URL
233            ud.shallow = False
234
235        if ud.usehead:
236            # When usehead is set let's associate 'HEAD' with the unresolved
237            # rev of this repository. This will get resolved into a revision
238            # later. If an actual revision happens to have also been provided
239            # then this setting will be overridden.
240            for name in ud.names:
241                ud.unresolvedrev[name] = 'HEAD'
242
243        ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false"
244
245        write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
246        ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
247        ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
248
249        ud.setup_revisions(d)
250
251        for name in ud.names:
252            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
253            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
254                if ud.revisions[name]:
255                    ud.unresolvedrev[name] = ud.revisions[name]
256                ud.revisions[name] = self.latest_revision(ud, d, name)
257
258        gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
259        if gitsrcname.startswith('.'):
260            gitsrcname = gitsrcname[1:]
261
262        # for rebaseable git repo, it is necessary to keep mirror tar ball
263        # per revision, so that even the revision disappears from the
264        # upstream repo in the future, the mirror will remain intact and still
265        # contains the revision
266        if ud.rebaseable:
267            for name in ud.names:
268                gitsrcname = gitsrcname + '_' + ud.revisions[name]
269
270        dl_dir = d.getVar("DL_DIR")
271        gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
272        ud.clonedir = os.path.join(gitdir, gitsrcname)
273        ud.localfile = ud.clonedir
274
275        mirrortarball = 'git2_%s.tar.gz' % gitsrcname
276        ud.fullmirror = os.path.join(dl_dir, mirrortarball)
277        ud.mirrortarballs = [mirrortarball]
278        if ud.shallow:
279            tarballname = gitsrcname
280            if ud.bareclone:
281                tarballname = "%s_bare" % tarballname
282
283            if ud.shallow_revs:
284                tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
285
286            for name, revision in sorted(ud.revisions.items()):
287                tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
288                depth = ud.shallow_depths[name]
289                if depth:
290                    tarballname = "%s-%s" % (tarballname, depth)
291
292            shallow_refs = []
293            if not ud.nobranch:
294                shallow_refs.extend(ud.branches.values())
295            if ud.shallow_extra_refs:
296                shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
297            if shallow_refs:
298                tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
299
300            fetcher = self.__class__.__name__.lower()
301            ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
302            ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
303            ud.mirrortarballs.insert(0, ud.shallowtarball)
304
305    def localpath(self, ud, d):
306        return ud.clonedir
307
308    def need_update(self, ud, d):
309        return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud)
310
311    def clonedir_need_update(self, ud, d):
312        if not os.path.exists(ud.clonedir):
313            return True
314        if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d):
315            return True
316        for name in ud.names:
317            if not self._contains_ref(ud, d, name, ud.clonedir):
318                return True
319        return False
320
321    def clonedir_need_shallow_revs(self, ud, d):
322        for rev in ud.shallow_revs:
323            try:
324                runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir)
325            except bb.fetch2.FetchError:
326                return rev
327        return None
328
329    def shallow_tarball_need_update(self, ud):
330        return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow)
331
332    def tarball_need_update(self, ud):
333        return ud.write_tarballs and not os.path.exists(ud.fullmirror)
334
335    def try_premirror(self, ud, d):
336        # If we don't do this, updating an existing checkout with only premirrors
337        # is not possible
338        if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
339            return True
340        if os.path.exists(ud.clonedir):
341            return False
342        return True
343
344    def download(self, ud, d):
345        """Fetch url"""
346
347        # A current clone is preferred to either tarball, a shallow tarball is
348        # preferred to an out of date clone, and a missing clone will use
349        # either tarball.
350        if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
351            ud.localpath = ud.fullshallow
352            return
353        elif os.path.exists(ud.fullmirror) and not os.path.exists(ud.clonedir):
354            bb.utils.mkdirhier(ud.clonedir)
355            runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
356
357        repourl = self._get_repo_url(ud)
358
359        # If the repo still doesn't exist, fallback to cloning it
360        if not os.path.exists(ud.clonedir):
361            # We do this since git will use a "-l" option automatically for local urls where possible
362            if repourl.startswith("file://"):
363                repourl = repourl[7:]
364            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
365            if ud.proto.lower() != 'file':
366                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
367            progresshandler = GitProgressHandler(d)
368            runfetchcmd(clone_cmd, d, log=progresshandler)
369
370        # Update the checkout if needed
371        if self.clonedir_need_update(ud, d):
372            output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
373            if "origin" in output:
374              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
375
376            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
377            fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
378            if ud.proto.lower() != 'file':
379                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
380            progresshandler = GitProgressHandler(d)
381            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
382            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
383            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
384            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
385            try:
386                os.unlink(ud.fullmirror)
387            except OSError as exc:
388                if exc.errno != errno.ENOENT:
389                    raise
390
391        for name in ud.names:
392            if not self._contains_ref(ud, d, name, ud.clonedir):
393                raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
394
395        if ud.shallow and ud.write_shallow_tarballs:
396            missing_rev = self.clonedir_need_shallow_revs(ud, d)
397            if missing_rev:
398                raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
399
400        if self._contains_lfs(ud, d, ud.clonedir) and self._need_lfs(ud):
401            # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
402            # of all LFS blobs needed at the srcrev.
403            #
404            # It would be nice to just do this inline here by running 'git-lfs fetch'
405            # on the bare clonedir, but that operation requires a working copy on some
406            # releases of Git LFS.
407            tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
408            try:
409                # Do the checkout. This implicitly involves a Git LFS fetch.
410                Git.unpack(self, ud, tmpdir, d)
411
412                # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into
413                # the bare clonedir.
414                #
415                # As this procedure is invoked repeatedly on incremental fetches as
416                # a recipe's SRCREV is bumped throughout its lifetime, this will
417                # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs
418                # corresponding to all the blobs reachable from the different revs
419                # fetched across time.
420                #
421                # Only do this if the unpack resulted in a .git/lfs directory being
422                # created; this only happens if at least one blob needed to be
423                # downloaded.
424                if os.path.exists(os.path.join(tmpdir, "git", ".git", "lfs")):
425                    runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/git/.git" % tmpdir)
426            finally:
427                bb.utils.remove(tmpdir, recurse=True)
428
429    def build_mirror_data(self, ud, d):
430
431        # Create as a temp file and move atomically into position to avoid races
432        @contextmanager
433        def create_atomic(filename):
434            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
435            try:
436                yield tfile
437                umask = os.umask(0o666)
438                os.umask(umask)
439                os.chmod(tfile, (0o666 & ~umask))
440                os.rename(tfile, filename)
441            finally:
442                os.close(fd)
443
444        if ud.shallow and ud.write_shallow_tarballs:
445            if not os.path.exists(ud.fullshallow):
446                if os.path.islink(ud.fullshallow):
447                    os.unlink(ud.fullshallow)
448                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
449                shallowclone = os.path.join(tempdir, 'git')
450                try:
451                    self.clone_shallow_local(ud, shallowclone, d)
452
453                    logger.info("Creating tarball of git repository")
454                    with create_atomic(ud.fullshallow) as tfile:
455                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
456                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
457                finally:
458                    bb.utils.remove(tempdir, recurse=True)
459        elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
460            if os.path.islink(ud.fullmirror):
461                os.unlink(ud.fullmirror)
462
463            logger.info("Creating tarball of git repository")
464            with create_atomic(ud.fullmirror) as tfile:
465                mtime = runfetchcmd("git log --all -1 --format=%cD", d,
466                        quiet=True, workdir=ud.clonedir)
467                runfetchcmd("tar -czf %s --owner pokybuild --group users --mtime \"%s\" ."
468                        % (tfile, mtime), d, workdir=ud.clonedir)
469            runfetchcmd("touch %s.done" % ud.fullmirror, d)
470
471    def clone_shallow_local(self, ud, dest, d):
472        """Clone the repo and make it shallow.
473
474        The upstream url of the new clone isn't set at this time, as it'll be
475        set correctly when unpacked."""
476        runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
477
478        to_parse, shallow_branches = [], []
479        for name in ud.names:
480            revision = ud.revisions[name]
481            depth = ud.shallow_depths[name]
482            if depth:
483                to_parse.append('%s~%d^{}' % (revision, depth - 1))
484
485            # For nobranch, we need a ref, otherwise the commits will be
486            # removed, and for non-nobranch, we truncate the branch to our
487            # srcrev, to avoid keeping unnecessary history beyond that.
488            branch = ud.branches[name]
489            if ud.nobranch:
490                ref = "refs/shallow/%s" % name
491            elif ud.bareclone:
492                ref = "refs/heads/%s" % branch
493            else:
494                ref = "refs/remotes/origin/%s" % branch
495
496            shallow_branches.append(ref)
497            runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
498
499        # Map srcrev+depths to revisions
500        parsed_depths = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest)
501
502        # Resolve specified revisions
503        parsed_revs = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join('"%s^{}"' % r for r in ud.shallow_revs)), d, workdir=dest)
504        shallow_revisions = parsed_depths.splitlines() + parsed_revs.splitlines()
505
506        # Apply extra ref wildcards
507        all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
508                               d, workdir=dest).splitlines()
509        for r in ud.shallow_extra_refs:
510            if not ud.bareclone:
511                r = r.replace('refs/heads/', 'refs/remotes/origin/')
512
513            if '*' in r:
514                matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
515                shallow_branches.extend(matches)
516            else:
517                shallow_branches.append(r)
518
519        # Make the repository shallow
520        shallow_cmd = [self.make_shallow_path, '-s']
521        for b in shallow_branches:
522            shallow_cmd.append('-r')
523            shallow_cmd.append(b)
524        shallow_cmd.extend(shallow_revisions)
525        runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
526
527    def unpack(self, ud, destdir, d):
528        """ unpack the downloaded src to destdir"""
529
530        subdir = ud.parm.get("subdir")
531        subpath = ud.parm.get("subpath")
532        readpathspec = ""
533        def_destsuffix = "git/"
534
535        if subpath:
536            readpathspec = ":%s" % subpath
537            def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
538
539        if subdir:
540            # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
541            if os.path.isabs(subdir):
542                if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
543                    raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
544                destdir = subdir
545            else:
546                destdir = os.path.join(destdir, subdir)
547            def_destsuffix = ""
548
549        destsuffix = ud.parm.get("destsuffix", def_destsuffix)
550        destdir = ud.destdir = os.path.join(destdir, destsuffix)
551        if os.path.exists(destdir):
552            bb.utils.prunedir(destdir)
553
554        need_lfs = self._need_lfs(ud)
555
556        if not need_lfs:
557            ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
558
559        source_found = False
560        source_error = []
561
562        if not source_found:
563            clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
564            if clonedir_is_up_to_date:
565                runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
566                source_found = True
567            else:
568                source_error.append("clone directory not available or not up to date: " + ud.clonedir)
569
570        if not source_found:
571            if ud.shallow:
572                if os.path.exists(ud.fullshallow):
573                    bb.utils.mkdirhier(destdir)
574                    runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
575                    source_found = True
576                else:
577                    source_error.append("shallow clone not available: " + ud.fullshallow)
578            else:
579                source_error.append("shallow clone not enabled")
580
581        if not source_found:
582            raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
583
584        repourl = self._get_repo_url(ud)
585        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
586
587        if self._contains_lfs(ud, d, destdir):
588            if need_lfs and not self._find_git_lfs(d):
589                raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl))
590            elif not need_lfs:
591                bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
592
593        if not ud.nocheckout:
594            if subpath:
595                runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d,
596                            workdir=destdir)
597                runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
598            elif not ud.nobranch:
599                branchname =  ud.branches[ud.names[0]]
600                runfetchcmd("%s checkout -B %s %s" % (ud.basecmd, branchname, \
601                            ud.revisions[ud.names[0]]), d, workdir=destdir)
602                runfetchcmd("%s branch %s --set-upstream-to origin/%s" % (ud.basecmd, branchname, \
603                            branchname), d, workdir=destdir)
604            else:
605                runfetchcmd("%s checkout %s" % (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=destdir)
606
607        return True
608
609    def clean(self, ud, d):
610        """ clean the git directory """
611
612        to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"]
613        # The localpath is a symlink to clonedir when it is cloned from a
614        # mirror, so remove both of them.
615        if os.path.islink(ud.localpath):
616            clonedir = os.path.realpath(ud.localpath)
617            to_remove.append(clonedir)
618
619        for r in to_remove:
620            if os.path.exists(r):
621                bb.note('Removing %s' % r)
622                bb.utils.remove(r, True)
623
624    def supports_srcrev(self):
625        return True
626
627    def _contains_ref(self, ud, d, name, wd):
628        cmd = ""
629        if ud.nobranch:
630            cmd = "%s log --pretty=oneline -n 1 %s -- 2> /dev/null | wc -l" % (
631                ud.basecmd, ud.revisions[name])
632        else:
633            cmd =  "%s branch --contains %s --list %s 2> /dev/null | wc -l" % (
634                ud.basecmd, ud.revisions[name], ud.branches[name])
635        try:
636            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
637        except bb.fetch2.FetchError:
638            return False
639        if len(output.split()) > 1:
640            raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
641        return output.split()[0] != "0"
642
643    def _need_lfs(self, ud):
644        return ud.parm.get("lfs", "1") == "1"
645
646    def _contains_lfs(self, ud, d, wd):
647        """
648        Check if the repository has 'lfs' (large file) content
649        """
650
651        if not ud.nobranch:
652            branchname = ud.branches[ud.names[0]]
653        else:
654            branchname = "master"
655
656        # The bare clonedir doesn't use the remote names; it has the branch immediately.
657        if wd == ud.clonedir:
658            refname = ud.branches[ud.names[0]]
659        else:
660            refname = "origin/%s" % ud.branches[ud.names[0]]
661
662        cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
663            ud.basecmd, refname)
664
665        try:
666            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
667            if int(output) > 0:
668                return True
669        except (bb.fetch2.FetchError,ValueError):
670            pass
671        return False
672
673    def _find_git_lfs(self, d):
674        """
675        Return True if git-lfs can be found, False otherwise.
676        """
677        import shutil
678        return shutil.which("git-lfs", path=d.getVar('PATH')) is not None
679
680    def _get_repo_url(self, ud):
681        """
682        Return the repository URL
683        """
684        # Note that we do not support passwords directly in the git urls. There are several
685        # reasons. SRC_URI can be written out to things like buildhistory and people don't
686        # want to leak passwords like that. Its also all too easy to share metadata without
687        # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
688        # alternatives so we will not take patches adding password support here.
689        if ud.user:
690            username = ud.user + '@'
691        else:
692            username = ""
693        return "%s://%s%s%s" % (ud.proto, username, ud.host, ud.path)
694
695    def _revision_key(self, ud, d, name):
696        """
697        Return a unique key for the url
698        """
699        # Collapse adjacent slashes
700        slash_re = re.compile(r"/+")
701        return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
702
703    def _lsremote(self, ud, d, search):
704        """
705        Run git ls-remote with the specified search string
706        """
707        # Prevent recursion e.g. in OE if SRCPV is in PV, PV is in WORKDIR,
708        # and WORKDIR is in PATH (as a result of RSS), our call to
709        # runfetchcmd() exports PATH so this function will get called again (!)
710        # In this scenario the return call of the function isn't actually
711        # important - WORKDIR isn't needed in PATH to call git ls-remote
712        # anyway.
713        if d.getVar('_BB_GIT_IN_LSREMOTE', False):
714            return ''
715        d.setVar('_BB_GIT_IN_LSREMOTE', '1')
716        try:
717            repourl = self._get_repo_url(ud)
718            cmd = "%s ls-remote %s %s" % \
719                (ud.basecmd, shlex.quote(repourl), search)
720            if ud.proto.lower() != 'file':
721                bb.fetch2.check_network_access(d, cmd, repourl)
722            output = runfetchcmd(cmd, d, True)
723            if not output:
724                raise bb.fetch2.FetchError("The command %s gave empty output unexpectedly" % cmd, ud.url)
725        finally:
726            d.delVar('_BB_GIT_IN_LSREMOTE')
727        return output
728
729    def _latest_revision(self, ud, d, name):
730        """
731        Compute the HEAD revision for the url
732        """
733        if not d.getVar("__BBSEENSRCREV"):
734            raise bb.fetch2.FetchError("Recipe uses a floating tag/branch without a fixed SRCREV yet doesn't call bb.fetch2.get_srcrev() (use SRCPV in PV for OE).")
735
736        # Ensure we mark as not cached
737        bb.fetch2.get_autorev(d)
738
739        output = self._lsremote(ud, d, "")
740        # Tags of the form ^{} may not work, need to fallback to other form
741        if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
742            head = ud.unresolvedrev[name]
743            tag = ud.unresolvedrev[name]
744        else:
745            head = "refs/heads/%s" % ud.unresolvedrev[name]
746            tag = "refs/tags/%s" % ud.unresolvedrev[name]
747        for s in [head, tag + "^{}", tag]:
748            for l in output.strip().split('\n'):
749                sha1, ref = l.split()
750                if s == ref:
751                    return sha1
752        raise bb.fetch2.FetchError("Unable to resolve '%s' in upstream git repository in git ls-remote output for %s" % \
753            (ud.unresolvedrev[name], ud.host+ud.path))
754
755    def latest_versionstring(self, ud, d):
756        """
757        Compute the latest release name like "x.y.x" in "x.y.x+gitHASH"
758        by searching through the tags output of ls-remote, comparing
759        versions and returning the highest match.
760        """
761        pupver = ('', '')
762
763        tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
764        try:
765            output = self._lsremote(ud, d, "refs/tags/*")
766        except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
767            bb.note("Could not list remote: %s" % str(e))
768            return pupver
769
770        verstring = ""
771        revision = ""
772        for line in output.split("\n"):
773            if not line:
774                break
775
776            tag_head = line.split("/")[-1]
777            # Ignore non-released branches
778            m = re.search(r"(alpha|beta|rc|final)+", tag_head)
779            if m:
780                continue
781
782            # search for version in the line
783            tag = tagregex.search(tag_head)
784            if tag is None:
785                continue
786
787            tag = tag.group('pver')
788            tag = tag.replace("_", ".")
789
790            if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0:
791                continue
792
793            verstring = tag
794            revision = line.split()[0]
795            pupver = (verstring, revision)
796
797        return pupver
798
799    def _build_revision(self, ud, d, name):
800        return ud.revisions[name]
801
802    def gitpkgv_revision(self, ud, d, name):
803        """
804        Return a sortable revision number by counting commits in the history
805        Based on gitpkgv.bblass in meta-openembedded
806        """
807        rev = self._build_revision(ud, d, name)
808        localpath = ud.localpath
809        rev_file = os.path.join(localpath, "oe-gitpkgv_" + rev)
810        if not os.path.exists(localpath):
811            commits = None
812        else:
813            if not os.path.exists(rev_file) or not os.path.getsize(rev_file):
814                from pipes import quote
815                commits = bb.fetch2.runfetchcmd(
816                        "git rev-list %s -- | wc -l" % quote(rev),
817                        d, quiet=True).strip().lstrip('0')
818                if commits:
819                    open(rev_file, "w").write("%d\n" % int(commits))
820            else:
821                commits = open(rev_file, "r").readline(128).strip()
822        if commits:
823            return False, "%s+%s" % (commits, rev[:7])
824        else:
825            return True, str(rev)
826
827    def checkstatus(self, fetch, ud, d):
828        try:
829            self._lsremote(ud, d, "")
830            return True
831        except bb.fetch2.FetchError:
832            return False
833