xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/git.py (revision 595f6308)
1"""
2BitBake 'Fetch' git implementation
3
4git fetcher support the SRC_URI with format of:
5SRC_URI = "git://some.host/somepath;OptionA=xxx;OptionB=xxx;..."
6
7Supported SRC_URI options are:
8
9- branch
10   The git branch to retrieve from. The default is "master"
11
12   This option also supports multiple branch fetching, with branches
13   separated by commas.  In multiple branches case, the name option
14   must have the same number of names to match the branches, which is
15   used to specify the SRC_REV for the branch
16   e.g:
17   SRC_URI="git://some.host/somepath;branch=branchX,branchY;name=nameX,nameY"
18   SRCREV_nameX = "xxxxxxxxxxxxxxxxxxxx"
19   SRCREV_nameY = "YYYYYYYYYYYYYYYYYYYY"
20
21- tag
22    The git tag to retrieve. The default is "master"
23
24- protocol
25   The method to use to access the repository. Common options are "git",
26   "http", "https", "file", "ssh" and "rsync". The default is "git".
27
28- rebaseable
29   rebaseable indicates that the upstream git repo may rebase in the future,
30   and current revision may disappear from upstream repo. This option will
31   remind fetcher to preserve local cache carefully for future use.
32   The default value is "0", set rebaseable=1 for rebaseable git repo.
33
34- nocheckout
35   Don't checkout source code when unpacking. set this option for the recipe
36   who has its own routine to checkout code.
37   The default is "0", set nocheckout=1 if needed.
38
39- bareclone
40   Create a bare clone of the source code and don't checkout the source code
41   when unpacking. Set this option for the recipe who has its own routine to
42   checkout code and tracking branch requirements.
43   The default is "0", set bareclone=1 if needed.
44
45- nobranch
46   Don't check the SHA validation for branch. set this option for the recipe
47   referring to commit which is valid in tag instead of branch.
48   The default is "0", set nobranch=1 if needed.
49
50- usehead
51   For local git:// urls to use the current branch HEAD as the revision for use with
52   AUTOREV. Implies nobranch.
53
54"""
55
56# Copyright (C) 2005 Richard Purdie
57#
58# SPDX-License-Identifier: GPL-2.0-only
59#
60
61import collections
62import errno
63import fnmatch
64import os
65import re
66import shlex
67import subprocess
68import tempfile
69import bb
70import bb.progress
71from contextlib import contextmanager
72from   bb.fetch2 import FetchMethod
73from   bb.fetch2 import runfetchcmd
74from   bb.fetch2 import logger
75
76
77class GitProgressHandler(bb.progress.LineFilterProgressHandler):
78    """Extract progress information from git output"""
79    def __init__(self, d):
80        self._buffer = ''
81        self._count = 0
82        super(GitProgressHandler, self).__init__(d)
83        # Send an initial progress event so the bar gets shown
84        self._fire_progress(-1)
85
86    def write(self, string):
87        self._buffer += string
88        stages = ['Counting objects', 'Compressing objects', 'Receiving objects', 'Resolving deltas']
89        stage_weights = [0.2, 0.05, 0.5, 0.25]
90        stagenum = 0
91        for i, stage in reversed(list(enumerate(stages))):
92            if stage in self._buffer:
93                stagenum = i
94                self._buffer = ''
95                break
96        self._status = stages[stagenum]
97        percs = re.findall(r'(\d+)%', string)
98        if percs:
99            progress = int(round((int(percs[-1]) * stage_weights[stagenum]) + (sum(stage_weights[:stagenum]) * 100)))
100            rates = re.findall(r'([\d.]+ [a-zA-Z]*/s+)', string)
101            if rates:
102                rate = rates[-1]
103            else:
104                rate = None
105            self.update(progress, rate)
106        else:
107            if stagenum == 0:
108                percs = re.findall(r': (\d+)', string)
109                if percs:
110                    count = int(percs[-1])
111                    if count > self._count:
112                        self._count = count
113                        self._fire_progress(-count)
114        super(GitProgressHandler, self).write(string)
115
116
117class Git(FetchMethod):
118    bitbake_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.join(os.path.abspath(__file__))), '..', '..', '..'))
119    make_shallow_path = os.path.join(bitbake_dir, 'bin', 'git-make-shallow')
120
121    """Class to fetch a module or modules from git repositories"""
122    def init(self, d):
123        pass
124
125    def supports(self, ud, d):
126        """
127        Check to see if a given url can be fetched with git.
128        """
129        return ud.type in ['git']
130
131    def supports_checksum(self, urldata):
132        return False
133
134    def urldata_init(self, ud, d):
135        """
136        init git specific variable within url data
137        so that the git method like latest_revision() can work
138        """
139        if 'protocol' in ud.parm:
140            ud.proto = ud.parm['protocol']
141        elif not ud.host:
142            ud.proto = 'file'
143        else:
144            ud.proto = "git"
145        if ud.host == "github.com" and ud.proto == "git":
146            # github stopped supporting git protocol
147            # https://github.blog/2021-09-01-improving-git-protocol-security-github/#no-more-unauthenticated-git
148            ud.proto = "https"
149            bb.warn("URL: %s uses git protocol which is no longer supported by github. Please change to ;protocol=https in the url." % ud.url)
150
151        if not ud.proto in ('git', 'file', 'ssh', 'http', 'https', 'rsync'):
152            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
153
154        ud.nocheckout = ud.parm.get("nocheckout","0") == "1"
155
156        ud.rebaseable = ud.parm.get("rebaseable","0") == "1"
157
158        ud.nobranch = ud.parm.get("nobranch","0") == "1"
159
160        # usehead implies nobranch
161        ud.usehead = ud.parm.get("usehead","0") == "1"
162        if ud.usehead:
163            if ud.proto != "file":
164                 raise bb.fetch2.ParameterError("The usehead option is only for use with local ('protocol=file') git repositories", ud.url)
165            ud.nobranch = 1
166
167        # bareclone implies nocheckout
168        ud.bareclone = ud.parm.get("bareclone","0") == "1"
169        if ud.bareclone:
170            ud.nocheckout = 1
171
172        ud.unresolvedrev = {}
173        branches = ud.parm.get("branch", "").split(',')
174        if branches == [""] and not ud.nobranch:
175            bb.warn("URL: %s does not set any branch parameter. The future default branch used by tools and repositories is uncertain and we will therefore soon require this is set in all git urls." % ud.url)
176            branches = ["master"]
177        if len(branches) != len(ud.names):
178            raise bb.fetch2.ParameterError("The number of name and branch parameters is not balanced", ud.url)
179
180        ud.noshared = d.getVar("BB_GIT_NOSHARED") == "1"
181
182        ud.cloneflags = "-n"
183        if not ud.noshared:
184            ud.cloneflags += " -s"
185        if ud.bareclone:
186            ud.cloneflags += " --mirror"
187
188        ud.shallow = d.getVar("BB_GIT_SHALLOW") == "1"
189        ud.shallow_extra_refs = (d.getVar("BB_GIT_SHALLOW_EXTRA_REFS") or "").split()
190
191        depth_default = d.getVar("BB_GIT_SHALLOW_DEPTH")
192        if depth_default is not None:
193            try:
194                depth_default = int(depth_default or 0)
195            except ValueError:
196                raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
197            else:
198                if depth_default < 0:
199                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH: %s" % depth_default)
200        else:
201            depth_default = 1
202        ud.shallow_depths = collections.defaultdict(lambda: depth_default)
203
204        revs_default = d.getVar("BB_GIT_SHALLOW_REVS")
205        ud.shallow_revs = []
206        ud.branches = {}
207        for pos, name in enumerate(ud.names):
208            branch = branches[pos]
209            ud.branches[name] = branch
210            ud.unresolvedrev[name] = branch
211
212            shallow_depth = d.getVar("BB_GIT_SHALLOW_DEPTH_%s" % name)
213            if shallow_depth is not None:
214                try:
215                    shallow_depth = int(shallow_depth or 0)
216                except ValueError:
217                    raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
218                else:
219                    if shallow_depth < 0:
220                        raise bb.fetch2.FetchError("Invalid depth for BB_GIT_SHALLOW_DEPTH_%s: %s" % (name, shallow_depth))
221                    ud.shallow_depths[name] = shallow_depth
222
223            revs = d.getVar("BB_GIT_SHALLOW_REVS_%s" % name)
224            if revs is not None:
225                ud.shallow_revs.extend(revs.split())
226            elif revs_default is not None:
227                ud.shallow_revs.extend(revs_default.split())
228
229        if (ud.shallow and
230                not ud.shallow_revs and
231                all(ud.shallow_depths[n] == 0 for n in ud.names)):
232            # Shallow disabled for this URL
233            ud.shallow = False
234
235        if ud.usehead:
236            # When usehead is set let's associate 'HEAD' with the unresolved
237            # rev of this repository. This will get resolved into a revision
238            # later. If an actual revision happens to have also been provided
239            # then this setting will be overridden.
240            for name in ud.names:
241                ud.unresolvedrev[name] = 'HEAD'
242
243        ud.basecmd = d.getVar("FETCHCMD_git") or "git -c core.fsyncobjectfiles=0 -c gc.autoDetach=false"
244
245        write_tarballs = d.getVar("BB_GENERATE_MIRROR_TARBALLS") or "0"
246        ud.write_tarballs = write_tarballs != "0" or ud.rebaseable
247        ud.write_shallow_tarballs = (d.getVar("BB_GENERATE_SHALLOW_TARBALLS") or write_tarballs) != "0"
248
249        ud.setup_revisions(d)
250
251        for name in ud.names:
252            # Ensure anything that doesn't look like a sha256 checksum/revision is translated into one
253            if not ud.revisions[name] or len(ud.revisions[name]) != 40  or (False in [c in "abcdef0123456789" for c in ud.revisions[name]]):
254                if ud.revisions[name]:
255                    ud.unresolvedrev[name] = ud.revisions[name]
256                ud.revisions[name] = self.latest_revision(ud, d, name)
257
258        gitsrcname = '%s%s' % (ud.host.replace(':', '.'), ud.path.replace('/', '.').replace('*', '.').replace(' ','_'))
259        if gitsrcname.startswith('.'):
260            gitsrcname = gitsrcname[1:]
261
262        # for rebaseable git repo, it is necessary to keep mirror tar ball
263        # per revision, so that even the revision disappears from the
264        # upstream repo in the future, the mirror will remain intact and still
265        # contains the revision
266        if ud.rebaseable:
267            for name in ud.names:
268                gitsrcname = gitsrcname + '_' + ud.revisions[name]
269
270        dl_dir = d.getVar("DL_DIR")
271        gitdir = d.getVar("GITDIR") or (dl_dir + "/git2")
272        ud.clonedir = os.path.join(gitdir, gitsrcname)
273        ud.localfile = ud.clonedir
274
275        mirrortarball = 'git2_%s.tar.gz' % gitsrcname
276        ud.fullmirror = os.path.join(dl_dir, mirrortarball)
277        ud.mirrortarballs = [mirrortarball]
278        if ud.shallow:
279            tarballname = gitsrcname
280            if ud.bareclone:
281                tarballname = "%s_bare" % tarballname
282
283            if ud.shallow_revs:
284                tarballname = "%s_%s" % (tarballname, "_".join(sorted(ud.shallow_revs)))
285
286            for name, revision in sorted(ud.revisions.items()):
287                tarballname = "%s_%s" % (tarballname, ud.revisions[name][:7])
288                depth = ud.shallow_depths[name]
289                if depth:
290                    tarballname = "%s-%s" % (tarballname, depth)
291
292            shallow_refs = []
293            if not ud.nobranch:
294                shallow_refs.extend(ud.branches.values())
295            if ud.shallow_extra_refs:
296                shallow_refs.extend(r.replace('refs/heads/', '').replace('*', 'ALL') for r in ud.shallow_extra_refs)
297            if shallow_refs:
298                tarballname = "%s_%s" % (tarballname, "_".join(sorted(shallow_refs)).replace('/', '.'))
299
300            fetcher = self.__class__.__name__.lower()
301            ud.shallowtarball = '%sshallow_%s.tar.gz' % (fetcher, tarballname)
302            ud.fullshallow = os.path.join(dl_dir, ud.shallowtarball)
303            ud.mirrortarballs.insert(0, ud.shallowtarball)
304
305    def localpath(self, ud, d):
306        return ud.clonedir
307
308    def need_update(self, ud, d):
309        return self.clonedir_need_update(ud, d) or self.shallow_tarball_need_update(ud) or self.tarball_need_update(ud)
310
311    def clonedir_need_update(self, ud, d):
312        if not os.path.exists(ud.clonedir):
313            return True
314        if ud.shallow and ud.write_shallow_tarballs and self.clonedir_need_shallow_revs(ud, d):
315            return True
316        for name in ud.names:
317            if not self._contains_ref(ud, d, name, ud.clonedir):
318                return True
319        return False
320
321    def clonedir_need_shallow_revs(self, ud, d):
322        for rev in ud.shallow_revs:
323            try:
324                runfetchcmd('%s rev-parse -q --verify %s' % (ud.basecmd, rev), d, quiet=True, workdir=ud.clonedir)
325            except bb.fetch2.FetchError:
326                return rev
327        return None
328
329    def shallow_tarball_need_update(self, ud):
330        return ud.shallow and ud.write_shallow_tarballs and not os.path.exists(ud.fullshallow)
331
332    def tarball_need_update(self, ud):
333        return ud.write_tarballs and not os.path.exists(ud.fullmirror)
334
335    def try_premirror(self, ud, d):
336        # If we don't do this, updating an existing checkout with only premirrors
337        # is not possible
338        if bb.utils.to_boolean(d.getVar("BB_FETCH_PREMIRRORONLY")):
339            return True
340        if os.path.exists(ud.clonedir):
341            return False
342        return True
343
344    def download(self, ud, d):
345        """Fetch url"""
346
347        # A current clone is preferred to either tarball, a shallow tarball is
348        # preferred to an out of date clone, and a missing clone will use
349        # either tarball.
350        if ud.shallow and os.path.exists(ud.fullshallow) and self.need_update(ud, d):
351            ud.localpath = ud.fullshallow
352            return
353        elif os.path.exists(ud.fullmirror) and not os.path.exists(ud.clonedir):
354            bb.utils.mkdirhier(ud.clonedir)
355            runfetchcmd("tar -xzf %s" % ud.fullmirror, d, workdir=ud.clonedir)
356
357        repourl = self._get_repo_url(ud)
358
359        # If the repo still doesn't exist, fallback to cloning it
360        if not os.path.exists(ud.clonedir):
361            # We do this since git will use a "-l" option automatically for local urls where possible
362            if repourl.startswith("file://"):
363                repourl = repourl[7:]
364            clone_cmd = "LANG=C %s clone --bare --mirror %s %s --progress" % (ud.basecmd, shlex.quote(repourl), ud.clonedir)
365            if ud.proto.lower() != 'file':
366                bb.fetch2.check_network_access(d, clone_cmd, ud.url)
367            progresshandler = GitProgressHandler(d)
368            runfetchcmd(clone_cmd, d, log=progresshandler)
369
370        # Update the checkout if needed
371        if self.clonedir_need_update(ud, d):
372            output = runfetchcmd("%s remote" % ud.basecmd, d, quiet=True, workdir=ud.clonedir)
373            if "origin" in output:
374              runfetchcmd("%s remote rm origin" % ud.basecmd, d, workdir=ud.clonedir)
375
376            runfetchcmd("%s remote add --mirror=fetch origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=ud.clonedir)
377            fetch_cmd = "LANG=C %s fetch -f --progress %s refs/*:refs/*" % (ud.basecmd, shlex.quote(repourl))
378            if ud.proto.lower() != 'file':
379                bb.fetch2.check_network_access(d, fetch_cmd, ud.url)
380            progresshandler = GitProgressHandler(d)
381            runfetchcmd(fetch_cmd, d, log=progresshandler, workdir=ud.clonedir)
382            runfetchcmd("%s prune-packed" % ud.basecmd, d, workdir=ud.clonedir)
383            runfetchcmd("%s pack-refs --all" % ud.basecmd, d, workdir=ud.clonedir)
384            runfetchcmd("%s pack-redundant --all | xargs -r rm" % ud.basecmd, d, workdir=ud.clonedir)
385            try:
386                os.unlink(ud.fullmirror)
387            except OSError as exc:
388                if exc.errno != errno.ENOENT:
389                    raise
390
391        for name in ud.names:
392            if not self._contains_ref(ud, d, name, ud.clonedir):
393                raise bb.fetch2.FetchError("Unable to find revision %s in branch %s even from upstream" % (ud.revisions[name], ud.branches[name]))
394
395        if ud.shallow and ud.write_shallow_tarballs:
396            missing_rev = self.clonedir_need_shallow_revs(ud, d)
397            if missing_rev:
398                raise bb.fetch2.FetchError("Unable to find revision %s even from upstream" % missing_rev)
399
400        if self._contains_lfs(ud, d, ud.clonedir) and self._need_lfs(ud):
401            # Unpack temporary working copy, use it to run 'git checkout' to force pre-fetching
402            # of all LFS blobs needed at the the srcrev.
403            #
404            # It would be nice to just do this inline here by running 'git-lfs fetch'
405            # on the bare clonedir, but that operation requires a working copy on some
406            # releases of Git LFS.
407            tmpdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
408            try:
409                # Do the checkout. This implicitly involves a Git LFS fetch.
410                Git.unpack(self, ud, tmpdir, d)
411
412                # Scoop up a copy of any stuff that Git LFS downloaded. Merge them into
413                # the bare clonedir.
414                #
415                # As this procedure is invoked repeatedly on incremental fetches as
416                # a recipe's SRCREV is bumped throughout its lifetime, this will
417                # result in a gradual accumulation of LFS blobs in <ud.clonedir>/lfs
418                # corresponding to all the blobs reachable from the different revs
419                # fetched across time.
420                #
421                # Only do this if the unpack resulted in a .git/lfs directory being
422                # created; this only happens if at least one blob needed to be
423                # downloaded.
424                if os.path.exists(os.path.join(tmpdir, "git", ".git", "lfs")):
425                    runfetchcmd("tar -cf - lfs | tar -xf - -C %s" % ud.clonedir, d, workdir="%s/git/.git" % tmpdir)
426            finally:
427                bb.utils.remove(tmpdir, recurse=True)
428
429    def build_mirror_data(self, ud, d):
430
431        # Create as a temp file and move atomically into position to avoid races
432        @contextmanager
433        def create_atomic(filename):
434            fd, tfile = tempfile.mkstemp(dir=os.path.dirname(filename))
435            try:
436                yield tfile
437                umask = os.umask(0o666)
438                os.umask(umask)
439                os.chmod(tfile, (0o666 & ~umask))
440                os.rename(tfile, filename)
441            finally:
442                os.close(fd)
443
444        if ud.shallow and ud.write_shallow_tarballs:
445            if not os.path.exists(ud.fullshallow):
446                if os.path.islink(ud.fullshallow):
447                    os.unlink(ud.fullshallow)
448                tempdir = tempfile.mkdtemp(dir=d.getVar('DL_DIR'))
449                shallowclone = os.path.join(tempdir, 'git')
450                try:
451                    self.clone_shallow_local(ud, shallowclone, d)
452
453                    logger.info("Creating tarball of git repository")
454                    with create_atomic(ud.fullshallow) as tfile:
455                        runfetchcmd("tar -czf %s ." % tfile, d, workdir=shallowclone)
456                    runfetchcmd("touch %s.done" % ud.fullshallow, d)
457                finally:
458                    bb.utils.remove(tempdir, recurse=True)
459        elif ud.write_tarballs and not os.path.exists(ud.fullmirror):
460            if os.path.islink(ud.fullmirror):
461                os.unlink(ud.fullmirror)
462
463            logger.info("Creating tarball of git repository")
464            with create_atomic(ud.fullmirror) as tfile:
465                runfetchcmd("tar -czf %s ." % tfile, d, workdir=ud.clonedir)
466            runfetchcmd("touch %s.done" % ud.fullmirror, d)
467
468    def clone_shallow_local(self, ud, dest, d):
469        """Clone the repo and make it shallow.
470
471        The upstream url of the new clone isn't set at this time, as it'll be
472        set correctly when unpacked."""
473        runfetchcmd("%s clone %s %s %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, dest), d)
474
475        to_parse, shallow_branches = [], []
476        for name in ud.names:
477            revision = ud.revisions[name]
478            depth = ud.shallow_depths[name]
479            if depth:
480                to_parse.append('%s~%d^{}' % (revision, depth - 1))
481
482            # For nobranch, we need a ref, otherwise the commits will be
483            # removed, and for non-nobranch, we truncate the branch to our
484            # srcrev, to avoid keeping unnecessary history beyond that.
485            branch = ud.branches[name]
486            if ud.nobranch:
487                ref = "refs/shallow/%s" % name
488            elif ud.bareclone:
489                ref = "refs/heads/%s" % branch
490            else:
491                ref = "refs/remotes/origin/%s" % branch
492
493            shallow_branches.append(ref)
494            runfetchcmd("%s update-ref %s %s" % (ud.basecmd, ref, revision), d, workdir=dest)
495
496        # Map srcrev+depths to revisions
497        parsed_depths = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join(to_parse)), d, workdir=dest)
498
499        # Resolve specified revisions
500        parsed_revs = runfetchcmd("%s rev-parse %s" % (ud.basecmd, " ".join('"%s^{}"' % r for r in ud.shallow_revs)), d, workdir=dest)
501        shallow_revisions = parsed_depths.splitlines() + parsed_revs.splitlines()
502
503        # Apply extra ref wildcards
504        all_refs = runfetchcmd('%s for-each-ref "--format=%%(refname)"' % ud.basecmd,
505                               d, workdir=dest).splitlines()
506        for r in ud.shallow_extra_refs:
507            if not ud.bareclone:
508                r = r.replace('refs/heads/', 'refs/remotes/origin/')
509
510            if '*' in r:
511                matches = filter(lambda a: fnmatch.fnmatchcase(a, r), all_refs)
512                shallow_branches.extend(matches)
513            else:
514                shallow_branches.append(r)
515
516        # Make the repository shallow
517        shallow_cmd = [self.make_shallow_path, '-s']
518        for b in shallow_branches:
519            shallow_cmd.append('-r')
520            shallow_cmd.append(b)
521        shallow_cmd.extend(shallow_revisions)
522        runfetchcmd(subprocess.list2cmdline(shallow_cmd), d, workdir=dest)
523
524    def unpack(self, ud, destdir, d):
525        """ unpack the downloaded src to destdir"""
526
527        subdir = ud.parm.get("subdir")
528        subpath = ud.parm.get("subpath")
529        readpathspec = ""
530        def_destsuffix = "git/"
531
532        if subpath:
533            readpathspec = ":%s" % subpath
534            def_destsuffix = "%s/" % os.path.basename(subpath.rstrip('/'))
535
536        if subdir:
537            # If 'subdir' param exists, create a dir and use it as destination for unpack cmd
538            if os.path.isabs(subdir):
539                if not os.path.realpath(subdir).startswith(os.path.realpath(destdir)):
540                    raise bb.fetch2.UnpackError("subdir argument isn't a subdirectory of unpack root %s" % destdir, ud.url)
541                destdir = subdir
542            else:
543                destdir = os.path.join(destdir, subdir)
544            def_destsuffix = ""
545
546        destsuffix = ud.parm.get("destsuffix", def_destsuffix)
547        destdir = ud.destdir = os.path.join(destdir, destsuffix)
548        if os.path.exists(destdir):
549            bb.utils.prunedir(destdir)
550
551        need_lfs = self._need_lfs(ud)
552
553        if not need_lfs:
554            ud.basecmd = "GIT_LFS_SKIP_SMUDGE=1 " + ud.basecmd
555
556        source_found = False
557        source_error = []
558
559        if not source_found:
560            clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
561            if clonedir_is_up_to_date:
562                runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
563                source_found = True
564            else:
565                source_error.append("clone directory not available or not up to date: " + ud.clonedir)
566
567        if not source_found:
568            if ud.shallow:
569                if os.path.exists(ud.fullshallow):
570                    bb.utils.mkdirhier(destdir)
571                    runfetchcmd("tar -xzf %s" % ud.fullshallow, d, workdir=destdir)
572                    source_found = True
573                else:
574                    source_error.append("shallow clone not available: " + ud.fullshallow)
575            else:
576                source_error.append("shallow clone not enabled")
577
578        if not source_found:
579            raise bb.fetch2.UnpackError("No up to date source found: " + "; ".join(source_error), ud.url)
580
581        repourl = self._get_repo_url(ud)
582        runfetchcmd("%s remote set-url origin %s" % (ud.basecmd, shlex.quote(repourl)), d, workdir=destdir)
583
584        if self._contains_lfs(ud, d, destdir):
585            if need_lfs and not self._find_git_lfs(d):
586                raise bb.fetch2.FetchError("Repository %s has LFS content, install git-lfs on host to download (or set lfs=0 to ignore it)" % (repourl))
587            elif not need_lfs:
588                bb.note("Repository %s has LFS content but it is not being fetched" % (repourl))
589
590        if not ud.nocheckout:
591            if subpath:
592                runfetchcmd("%s read-tree %s%s" % (ud.basecmd, ud.revisions[ud.names[0]], readpathspec), d,
593                            workdir=destdir)
594                runfetchcmd("%s checkout-index -q -f -a" % ud.basecmd, d, workdir=destdir)
595            elif not ud.nobranch:
596                branchname =  ud.branches[ud.names[0]]
597                runfetchcmd("%s checkout -B %s %s" % (ud.basecmd, branchname, \
598                            ud.revisions[ud.names[0]]), d, workdir=destdir)
599                runfetchcmd("%s branch %s --set-upstream-to origin/%s" % (ud.basecmd, branchname, \
600                            branchname), d, workdir=destdir)
601            else:
602                runfetchcmd("%s checkout %s" % (ud.basecmd, ud.revisions[ud.names[0]]), d, workdir=destdir)
603
604        return True
605
606    def clean(self, ud, d):
607        """ clean the git directory """
608
609        to_remove = [ud.localpath, ud.fullmirror, ud.fullmirror + ".done"]
610        # The localpath is a symlink to clonedir when it is cloned from a
611        # mirror, so remove both of them.
612        if os.path.islink(ud.localpath):
613            clonedir = os.path.realpath(ud.localpath)
614            to_remove.append(clonedir)
615
616        for r in to_remove:
617            if os.path.exists(r):
618                bb.note('Removing %s' % r)
619                bb.utils.remove(r, True)
620
621    def supports_srcrev(self):
622        return True
623
624    def _contains_ref(self, ud, d, name, wd):
625        cmd = ""
626        if ud.nobranch:
627            cmd = "%s log --pretty=oneline -n 1 %s -- 2> /dev/null | wc -l" % (
628                ud.basecmd, ud.revisions[name])
629        else:
630            cmd =  "%s branch --contains %s --list %s 2> /dev/null | wc -l" % (
631                ud.basecmd, ud.revisions[name], ud.branches[name])
632        try:
633            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
634        except bb.fetch2.FetchError:
635            return False
636        if len(output.split()) > 1:
637            raise bb.fetch2.FetchError("The command '%s' gave output with more then 1 line unexpectedly, output: '%s'" % (cmd, output))
638        return output.split()[0] != "0"
639
640    def _need_lfs(self, ud):
641        return ud.parm.get("lfs", "1") == "1"
642
643    def _contains_lfs(self, ud, d, wd):
644        """
645        Check if the repository has 'lfs' (large file) content
646        """
647
648        if not ud.nobranch:
649            branchname = ud.branches[ud.names[0]]
650        else:
651            branchname = "master"
652
653        # The bare clonedir doesn't use the remote names; it has the branch immediately.
654        if wd == ud.clonedir:
655            refname = ud.branches[ud.names[0]]
656        else:
657            refname = "origin/%s" % ud.branches[ud.names[0]]
658
659        cmd = "%s grep lfs %s:.gitattributes | wc -l" % (
660            ud.basecmd, refname)
661
662        try:
663            output = runfetchcmd(cmd, d, quiet=True, workdir=wd)
664            if int(output) > 0:
665                return True
666        except (bb.fetch2.FetchError,ValueError):
667            pass
668        return False
669
670    def _find_git_lfs(self, d):
671        """
672        Return True if git-lfs can be found, False otherwise.
673        """
674        import shutil
675        return shutil.which("git-lfs", path=d.getVar('PATH')) is not None
676
677    def _get_repo_url(self, ud):
678        """
679        Return the repository URL
680        """
681        # Note that we do not support passwords directly in the git urls. There are several
682        # reasons. SRC_URI can be written out to things like buildhistory and people don't
683        # want to leak passwords like that. Its also all too easy to share metadata without
684        # removing the password. ssh keys, ~/.netrc and ~/.ssh/config files can be used as
685        # alternatives so we will not take patches adding password support here.
686        if ud.user:
687            username = ud.user + '@'
688        else:
689            username = ""
690        return "%s://%s%s%s" % (ud.proto, username, ud.host, ud.path)
691
692    def _revision_key(self, ud, d, name):
693        """
694        Return a unique key for the url
695        """
696        # Collapse adjacent slashes
697        slash_re = re.compile(r"/+")
698        return "git:" + ud.host + slash_re.sub(".", ud.path) + ud.unresolvedrev[name]
699
700    def _lsremote(self, ud, d, search):
701        """
702        Run git ls-remote with the specified search string
703        """
704        # Prevent recursion e.g. in OE if SRCPV is in PV, PV is in WORKDIR,
705        # and WORKDIR is in PATH (as a result of RSS), our call to
706        # runfetchcmd() exports PATH so this function will get called again (!)
707        # In this scenario the return call of the function isn't actually
708        # important - WORKDIR isn't needed in PATH to call git ls-remote
709        # anyway.
710        if d.getVar('_BB_GIT_IN_LSREMOTE', False):
711            return ''
712        d.setVar('_BB_GIT_IN_LSREMOTE', '1')
713        try:
714            repourl = self._get_repo_url(ud)
715            cmd = "%s ls-remote %s %s" % \
716                (ud.basecmd, shlex.quote(repourl), search)
717            if ud.proto.lower() != 'file':
718                bb.fetch2.check_network_access(d, cmd, repourl)
719            output = runfetchcmd(cmd, d, True)
720            if not output:
721                raise bb.fetch2.FetchError("The command %s gave empty output unexpectedly" % cmd, ud.url)
722        finally:
723            d.delVar('_BB_GIT_IN_LSREMOTE')
724        return output
725
726    def _latest_revision(self, ud, d, name):
727        """
728        Compute the HEAD revision for the url
729        """
730        output = self._lsremote(ud, d, "")
731        # Tags of the form ^{} may not work, need to fallback to other form
732        if ud.unresolvedrev[name][:5] == "refs/" or ud.usehead:
733            head = ud.unresolvedrev[name]
734            tag = ud.unresolvedrev[name]
735        else:
736            head = "refs/heads/%s" % ud.unresolvedrev[name]
737            tag = "refs/tags/%s" % ud.unresolvedrev[name]
738        for s in [head, tag + "^{}", tag]:
739            for l in output.strip().split('\n'):
740                sha1, ref = l.split()
741                if s == ref:
742                    return sha1
743        raise bb.fetch2.FetchError("Unable to resolve '%s' in upstream git repository in git ls-remote output for %s" % \
744            (ud.unresolvedrev[name], ud.host+ud.path))
745
746    def latest_versionstring(self, ud, d):
747        """
748        Compute the latest release name like "x.y.x" in "x.y.x+gitHASH"
749        by searching through the tags output of ls-remote, comparing
750        versions and returning the highest match.
751        """
752        pupver = ('', '')
753
754        tagregex = re.compile(d.getVar('UPSTREAM_CHECK_GITTAGREGEX') or r"(?P<pver>([0-9][\.|_]?)+)")
755        try:
756            output = self._lsremote(ud, d, "refs/tags/*")
757        except (bb.fetch2.FetchError, bb.fetch2.NetworkAccess) as e:
758            bb.note("Could not list remote: %s" % str(e))
759            return pupver
760
761        verstring = ""
762        revision = ""
763        for line in output.split("\n"):
764            if not line:
765                break
766
767            tag_head = line.split("/")[-1]
768            # Ignore non-released branches
769            m = re.search(r"(alpha|beta|rc|final)+", tag_head)
770            if m:
771                continue
772
773            # search for version in the line
774            tag = tagregex.search(tag_head)
775            if tag is None:
776                continue
777
778            tag = tag.group('pver')
779            tag = tag.replace("_", ".")
780
781            if verstring and bb.utils.vercmp(("0", tag, ""), ("0", verstring, "")) < 0:
782                continue
783
784            verstring = tag
785            revision = line.split()[0]
786            pupver = (verstring, revision)
787
788        return pupver
789
790    def _build_revision(self, ud, d, name):
791        return ud.revisions[name]
792
793    def gitpkgv_revision(self, ud, d, name):
794        """
795        Return a sortable revision number by counting commits in the history
796        Based on gitpkgv.bblass in meta-openembedded
797        """
798        rev = self._build_revision(ud, d, name)
799        localpath = ud.localpath
800        rev_file = os.path.join(localpath, "oe-gitpkgv_" + rev)
801        if not os.path.exists(localpath):
802            commits = None
803        else:
804            if not os.path.exists(rev_file) or not os.path.getsize(rev_file):
805                from pipes import quote
806                commits = bb.fetch2.runfetchcmd(
807                        "git rev-list %s -- | wc -l" % quote(rev),
808                        d, quiet=True).strip().lstrip('0')
809                if commits:
810                    open(rev_file, "w").write("%d\n" % int(commits))
811            else:
812                commits = open(rev_file, "r").readline(128).strip()
813        if commits:
814            return False, "%s+%s" % (commits, rev[:7])
815        else:
816            return True, str(rev)
817
818    def checkstatus(self, fetch, ud, d):
819        try:
820            self._lsremote(ud, d, "")
821            return True
822        except bb.fetch2.FetchError:
823            return False
824