xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision c9537f57ab488bf5d90132917b0184e2527970a5)
1"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004  Chris Larson
10#
11# SPDX-License-Identifier: GPL-2.0-only
12#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
15import shlex
16import re
17import tempfile
18import os
19import errno
20import bb
21import bb.progress
22import socket
23import http.client
24import urllib.request, urllib.parse, urllib.error
25from   bb.fetch2 import FetchMethod
26from   bb.fetch2 import FetchError
27from   bb.fetch2 import logger
28from   bb.fetch2 import runfetchcmd
29from   bs4 import BeautifulSoup
30from   bs4 import SoupStrainer
31
32class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
33    """
34    Extract progress information from wget output.
35    Note: relies on --progress=dot (with -v or without -q/-nv) being
36    specified on the wget command line.
37    """
38    def __init__(self, d):
39        super(WgetProgressHandler, self).__init__(d)
40        # Send an initial progress event so the bar gets shown
41        self._fire_progress(0)
42
43    def writeline(self, line):
44        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
45        if percs:
46            progress = int(percs[-1][0])
47            rate = percs[-1][1] + '/s'
48            self.update(progress, rate)
49            return False
50        return True
51
52
53class Wget(FetchMethod):
54    """Class to fetch urls via 'wget'"""
55
56    def check_certs(self, d):
57        """
58        Should certificates be checked?
59        """
60        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
61
62    def supports(self, ud, d):
63        """
64        Check to see if a given url can be fetched with wget.
65        """
66        return ud.type in ['http', 'https', 'ftp', 'ftps']
67
68    def recommends_checksum(self, urldata):
69        return True
70
71    def urldata_init(self, ud, d):
72        if 'protocol' in ud.parm:
73            if ud.parm['protocol'] == 'git':
74                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
75
76        if 'downloadfilename' in ud.parm:
77            ud.basename = ud.parm['downloadfilename']
78        else:
79            ud.basename = os.path.basename(ud.path)
80
81        ud.localfile = ud.basename
82        if not ud.localfile:
83            ud.localfile = ud.host + ud.path.replace("/", ".")
84
85        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget --tries=2 --timeout=100"
86
87        if ud.type == 'ftp' or ud.type == 'ftps':
88            self.basecmd += " --passive-ftp"
89
90        if not self.check_certs(d):
91            self.basecmd += " --no-check-certificate"
92
93    def _runwget(self, ud, d, command, quiet, workdir=None):
94
95        progresshandler = WgetProgressHandler(d)
96
97        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
98        bb.fetch2.check_network_access(d, command, ud.url)
99        runfetchcmd(command + ' --progress=dot --verbose', d, quiet, log=progresshandler, workdir=workdir)
100
101    def download(self, ud, d):
102        """Fetch urls"""
103
104        fetchcmd = self.basecmd
105
106        dldir = os.path.realpath(d.getVar("DL_DIR"))
107        localpath = os.path.join(dldir, ud.localfile) + ".tmp"
108        bb.utils.mkdirhier(os.path.dirname(localpath))
109        fetchcmd += " --output-document=%s" % shlex.quote(localpath)
110
111        if ud.user and ud.pswd:
112            fetchcmd += " --auth-no-challenge"
113            if ud.parm.get("redirectauth", "1") == "1":
114                # An undocumented feature of wget is that if the
115                # username/password are specified on the URI, wget will only
116                # send the Authorization header to the first host and not to
117                # any hosts that it is redirected to.  With the increasing
118                # usage of temporary AWS URLs, this difference now matters as
119                # AWS will reject any request that has authentication both in
120                # the query parameters (from the redirect) and in the
121                # Authorization header.
122                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
123
124        uri = ud.url.split(";")[0]
125        fetchcmd += " --continue --directory-prefix=%s '%s'" % (dldir, uri)
126        self._runwget(ud, d, fetchcmd, False)
127
128        # Sanity check since wget can pretend it succeed when it didn't
129        # Also, this used to happen if sourceforge sent us to the mirror page
130        if not os.path.exists(localpath):
131            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
132
133        if os.path.getsize(localpath) == 0:
134            os.remove(localpath)
135            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
136
137        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
138        # original file, which might be a race (imagine two recipes referencing the same
139        # source, one with an incorrect checksum)
140        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
141
142        # Remove the ".tmp" and move the file into position atomically
143        # Our lock prevents multiple writers but mirroring code may grab incomplete files
144        os.rename(localpath, localpath[:-4])
145
146        return True
147
148    def checkstatus(self, fetch, ud, d, try_again=True):
149        class HTTPConnectionCache(http.client.HTTPConnection):
150            if fetch.connection_cache:
151                def connect(self):
152                    """Connect to the host and port specified in __init__."""
153
154                    sock = fetch.connection_cache.get_connection(self.host, self.port)
155                    if sock:
156                        self.sock = sock
157                    else:
158                        self.sock = socket.create_connection((self.host, self.port),
159                                    self.timeout, self.source_address)
160                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
161
162                    if self._tunnel_host:
163                        self._tunnel()
164
165        class CacheHTTPHandler(urllib.request.HTTPHandler):
166            def http_open(self, req):
167                return self.do_open(HTTPConnectionCache, req)
168
169            def do_open(self, http_class, req):
170                """Return an addinfourl object for the request, using http_class.
171
172                http_class must implement the HTTPConnection API from httplib.
173                The addinfourl return value is a file-like object.  It also
174                has methods and attributes including:
175                    - info(): return a mimetools.Message object for the headers
176                    - geturl(): return the original request URL
177                    - code: HTTP status code
178                """
179                host = req.host
180                if not host:
181                    raise urllib.error.URLError('no host given')
182
183                h = http_class(host, timeout=req.timeout) # will parse host:port
184                h.set_debuglevel(self._debuglevel)
185
186                headers = dict(req.unredirected_hdrs)
187                headers.update(dict((k, v) for k, v in list(req.headers.items())
188                            if k not in headers))
189
190                # We want to make an HTTP/1.1 request, but the addinfourl
191                # class isn't prepared to deal with a persistent connection.
192                # It will try to read all remaining data from the socket,
193                # which will block while the server waits for the next request.
194                # So make sure the connection gets closed after the (only)
195                # request.
196
197                # Don't close connection when connection_cache is enabled,
198                if fetch.connection_cache is None:
199                    headers["Connection"] = "close"
200                else:
201                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
202
203                headers = dict(
204                    (name.title(), val) for name, val in list(headers.items()))
205
206                if req._tunnel_host:
207                    tunnel_headers = {}
208                    proxy_auth_hdr = "Proxy-Authorization"
209                    if proxy_auth_hdr in headers:
210                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
211                        # Proxy-Authorization should not be sent to origin
212                        # server.
213                        del headers[proxy_auth_hdr]
214                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
215
216                try:
217                    h.request(req.get_method(), req.selector, req.data, headers)
218                except socket.error as err: # XXX what error?
219                    # Don't close connection when cache is enabled.
220                    # Instead, try to detect connections that are no longer
221                    # usable (for example, closed unexpectedly) and remove
222                    # them from the cache.
223                    if fetch.connection_cache is None:
224                        h.close()
225                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
226                        # This happens when the server closes the connection despite the Keep-Alive.
227                        # Apparently urllib then uses the file descriptor, expecting it to be
228                        # connected, when in reality the connection is already gone.
229                        # We let the request fail and expect it to be
230                        # tried once more ("try_again" in check_status()),
231                        # with the dead connection removed from the cache.
232                        # If it still fails, we give up, which can happen for bad
233                        # HTTP proxy settings.
234                        fetch.connection_cache.remove_connection(h.host, h.port)
235                    raise urllib.error.URLError(err)
236                else:
237                    try:
238                        r = h.getresponse()
239                    except TimeoutError as e:
240                        if fetch.connection_cache:
241                            fetch.connection_cache.remove_connection(h.host, h.port)
242                        raise TimeoutError(e)
243
244                # Pick apart the HTTPResponse object to get the addinfourl
245                # object initialized properly.
246
247                # Wrap the HTTPResponse object in socket's file object adapter
248                # for Windows.  That adapter calls recv(), so delegate recv()
249                # to read().  This weird wrapping allows the returned object to
250                # have readline() and readlines() methods.
251
252                # XXX It might be better to extract the read buffering code
253                # out of socket._fileobject() and into a base class.
254                r.recv = r.read
255
256                # no data, just have to read
257                r.read()
258                class fp_dummy(object):
259                    def read(self):
260                        return ""
261                    def readline(self):
262                        return ""
263                    def close(self):
264                        pass
265                    closed = False
266
267                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
268                resp.code = r.status
269                resp.msg = r.reason
270
271                # Close connection when server request it.
272                if fetch.connection_cache is not None:
273                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
274                        fetch.connection_cache.remove_connection(h.host, h.port)
275
276                return resp
277
278        class HTTPMethodFallback(urllib.request.BaseHandler):
279            """
280            Fallback to GET if HEAD is not allowed (405 HTTP error)
281            """
282            def http_error_405(self, req, fp, code, msg, headers):
283                fp.read()
284                fp.close()
285
286                if req.get_method() != 'GET':
287                    newheaders = dict((k, v) for k, v in list(req.headers.items())
288                                      if k.lower() not in ("content-length", "content-type"))
289                    return self.parent.open(urllib.request.Request(req.get_full_url(),
290                                                            headers=newheaders,
291                                                            origin_req_host=req.origin_req_host,
292                                                            unverifiable=True))
293
294                raise urllib.request.HTTPError(req, code, msg, headers, None)
295
296            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
297            # Forbidden when they actually mean 405 Method Not Allowed.
298            http_error_403 = http_error_405
299
300
301        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
302            """
303            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
304
305            It resets the method to GET on redirect when we want to follow
306            redirects using the original method (typically HEAD). This was fixed
307            in 759e8e7.
308
309            It also doesn't handle 308 (Permanent Redirect). This was fixed in
310            c379bc5.
311
312            Until we depend on Python 3.13 onwards, copy the redirect_request
313            method to fix these issues.
314            """
315            def redirect_request(self, req, fp, code, msg, headers, newurl):
316                m = req.get_method()
317                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
318                    or code in (301, 302, 303) and m == "POST")):
319                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
320
321                # Strictly (according to RFC 2616), 301 or 302 in response to
322                # a POST MUST NOT cause a redirection without confirmation
323                # from the user (of urllib.request, in this case).  In practice,
324                # essentially all clients do redirect in this case, so we do
325                # the same.
326
327                # Be conciliant with URIs containing a space.  This is mainly
328                # redundant with the more complete encoding done in http_error_302(),
329                # but it is kept for compatibility with other callers.
330                newurl = newurl.replace(' ', '%20')
331
332                CONTENT_HEADERS = ("content-length", "content-type")
333                newheaders = {k: v for k, v in req.headers.items()
334                            if k.lower() not in CONTENT_HEADERS}
335                return urllib.request.Request(newurl,
336                            method="HEAD" if m == "HEAD" else "GET",
337                            headers=newheaders,
338                            origin_req_host=req.origin_req_host,
339                            unverifiable=True)
340
341            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
342
343        # We need to update the environment here as both the proxy and HTTPS
344        # handlers need variables set. The proxy needs http_proxy and friends to
345        # be set, and HTTPSHandler ends up calling into openssl to load the
346        # certificates. In buildtools configurations this will be looking at the
347        # wrong place for certificates by default: we set SSL_CERT_FILE to the
348        # right location in the buildtools environment script but as BitBake
349        # prunes prunes the environment this is lost. When binaries are executed
350        # runfetchcmd ensures these values are in the environment, but this is
351        # pure Python so we need to update the environment.
352        #
353        # Avoid tramping the environment too much by using bb.utils.environment
354        # to scope the changes to the build_opener request, which is when the
355        # environment lookups happen.
356        newenv = bb.fetch2.get_fetcher_environment(d)
357
358        with bb.utils.environment(**newenv):
359            import ssl
360
361            if self.check_certs(d):
362                context = ssl.create_default_context()
363            else:
364                context = ssl._create_unverified_context()
365
366            handlers = [FixedHTTPRedirectHandler,
367                        HTTPMethodFallback,
368                        urllib.request.ProxyHandler(),
369                        CacheHTTPHandler(),
370                        urllib.request.HTTPSHandler(context=context)]
371            opener = urllib.request.build_opener(*handlers)
372
373            try:
374                parts = urllib.parse.urlparse(ud.url.split(";")[0])
375                uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
376                r = urllib.request.Request(uri)
377                r.get_method = lambda: "HEAD"
378                # Some servers (FusionForge, as used on Alioth) require that the
379                # optional Accept header is set.
380                r.add_header("Accept", "*/*")
381                r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
382                def add_basic_auth(login_str, request):
383                    '''Adds Basic auth to http request, pass in login:password as string'''
384                    import base64
385                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
386                    authheader = "Basic %s" % encodeuser
387                    r.add_header("Authorization", authheader)
388
389                if ud.user and ud.pswd:
390                    add_basic_auth(ud.user + ':' + ud.pswd, r)
391
392                try:
393                    import netrc
394                    auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
395                    if auth_data:
396                        login, _, password = auth_data
397                        add_basic_auth("%s:%s" % (login, password), r)
398                except (FileNotFoundError, netrc.NetrcParseError):
399                    pass
400
401                with opener.open(r, timeout=100) as response:
402                    pass
403            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
404                if try_again:
405                    logger.debug2("checkstatus: trying again")
406                    return self.checkstatus(fetch, ud, d, False)
407                else:
408                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
409                    logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
410                    return False
411
412        return True
413
414    def _parse_path(self, regex, s):
415        """
416        Find and group name, version and archive type in the given string s
417        """
418
419        m = regex.search(s)
420        if m:
421            pname = ''
422            pver = ''
423            ptype = ''
424
425            mdict = m.groupdict()
426            if 'name' in mdict.keys():
427                pname = mdict['name']
428            if 'pver' in mdict.keys():
429                pver = mdict['pver']
430            if 'type' in mdict.keys():
431                ptype = mdict['type']
432
433            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
434
435            return (pname, pver, ptype)
436
437        return None
438
439    def _modelate_version(self, version):
440        if version[0] in ['.', '-']:
441            if version[1].isdigit():
442                version = version[1] + version[0] + version[2:len(version)]
443            else:
444                version = version[1:len(version)]
445
446        version = re.sub('-', '.', version)
447        version = re.sub('_', '.', version)
448        version = re.sub('(rc)+', '.1000.', version)
449        version = re.sub('(beta)+', '.100.', version)
450        version = re.sub('(alpha)+', '.10.', version)
451        if version[0] == 'v':
452            version = version[1:len(version)]
453        return version
454
455    def _vercmp(self, old, new):
456        """
457        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
458        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
459        for simplicity as it's somehow difficult to get from various upstream format
460        """
461
462        (oldpn, oldpv, oldsuffix) = old
463        (newpn, newpv, newsuffix) = new
464
465        # Check for a new suffix type that we have never heard of before
466        if newsuffix:
467            m = self.suffix_regex_comp.search(newsuffix)
468            if not m:
469                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
470                return False
471
472        # Not our package so ignore it
473        if oldpn != newpn:
474            return False
475
476        oldpv = self._modelate_version(oldpv)
477        newpv = self._modelate_version(newpv)
478
479        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
480
481    def _fetch_index(self, uri, ud, d):
482        """
483        Run fetch checkstatus to get directory information
484        """
485        f = tempfile.NamedTemporaryFile()
486        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
487            fetchcmd = self.basecmd
488            fetchcmd += " --output-document=%s '%s'" % (f.name, uri)
489            try:
490                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
491                fetchresult = f.read()
492            except bb.fetch2.BBFetchException:
493                fetchresult = ""
494
495        return fetchresult
496
497    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
498        """
499        Return the latest version of a package inside a given directory path
500        If error or no version, return ""
501        """
502        valid = 0
503        version = ['', '', '']
504
505        bb.debug(3, "VersionURL: %s" % (url))
506        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
507        if not soup:
508            bb.debug(3, "*** %s NO SOUP" % (url))
509            return ""
510
511        for line in soup.find_all('a', href=True):
512            bb.debug(3, "line['href'] = '%s'" % (line['href']))
513            bb.debug(3, "line = '%s'" % (str(line)))
514
515            newver = self._parse_path(package_regex, line['href'])
516            if not newver:
517                newver = self._parse_path(package_regex, str(line))
518
519            if newver:
520                bb.debug(3, "Upstream version found: %s" % newver[1])
521                if valid == 0:
522                    version = newver
523                    valid = 1
524                elif self._vercmp(version, newver) < 0:
525                    version = newver
526
527        pupver = re.sub('_', '.', version[1])
528
529        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
530                (package, pupver or "N/A", current_version[1]))
531
532        if valid:
533            return pupver
534
535        return ""
536
537    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
538        """
539        Scan every directory in order to get upstream version.
540        """
541        version_dir = ['', '', '']
542        version = ['', '', '']
543
544        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
545        s = dirver_regex.search(dirver)
546        if s:
547            version_dir[1] = s.group('ver')
548        else:
549            version_dir[1] = dirver
550
551        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
552                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
553        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
554
555        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
556        if not soup:
557            return version[1]
558
559        for line in soup.find_all('a', href=True):
560            s = dirver_regex.search(line['href'].strip("/"))
561            if s:
562                sver = s.group('ver')
563
564                # When prefix is part of the version directory it need to
565                # ensure that only version directory is used so remove previous
566                # directories if exists.
567                #
568                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
569                # result is v2.5.
570                spfx = s.group('pfx').split('/')[-1]
571
572                version_dir_new = ['', sver, '']
573                if self._vercmp(version_dir, version_dir_new) <= 0:
574                    dirver_new = spfx + sver
575                    path = ud.path.replace(dirver, dirver_new, True) \
576                        .split(package)[0]
577                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
578                        ud.user, ud.pswd, {}])
579
580                    pupver = self._check_latest_version(uri,
581                            package, package_regex, current_version, ud, d)
582                    if pupver:
583                        version[1] = pupver
584
585                    version_dir = version_dir_new
586
587        return version[1]
588
589    def _init_regexes(self, package, ud, d):
590        """
591        Match as many patterns as possible such as:
592                gnome-common-2.20.0.tar.gz (most common format)
593                gtk+-2.90.1.tar.gz
594                xf86-input-synaptics-12.6.9.tar.gz
595                dri2proto-2.3.tar.gz
596                blktool_4.orig.tar.gz
597                libid3tag-0.15.1b.tar.gz
598                unzip552.tar.gz
599                icu4c-3_6-src.tgz
600                genext2fs_1.3.orig.tar.gz
601                gst-fluendo-mp3
602        """
603        # match most patterns which uses "-" as separator to version digits
604        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
605        # a loose pattern such as for unzip552.tar.gz
606        pn_prefix2 = r"[a-zA-Z]+"
607        # a loose pattern such as for 80325-quicky-0.4.tar.gz
608        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
609        # Save the Package Name (pn) Regex for use later
610        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
611
612        # match version
613        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
614
615        # match arch
616        parch_regex = "-source|_all_"
617
618        # src.rpm extension was added only for rpm package. Can be removed if the rpm
619        # packaged will always be considered as having to be manually upgraded
620        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
621
622        # match name, version and archive type of a package
623        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
624                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
625        self.suffix_regex_comp = re.compile(psuffix_regex)
626
627        # compile regex, can be specific by package or generic regex
628        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
629        if pn_regex:
630            package_custom_regex_comp = re.compile(pn_regex)
631        else:
632            version = self._parse_path(package_regex_comp, package)
633            if version:
634                package_custom_regex_comp = re.compile(
635                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
636                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
637            else:
638                package_custom_regex_comp = None
639
640        return package_custom_regex_comp
641
642    def latest_versionstring(self, ud, d):
643        """
644        Manipulate the URL and try to obtain the latest package version
645
646        sanity check to ensure same name and type.
647        """
648        if 'downloadfilename' in ud.parm:
649            package = ud.parm['downloadfilename']
650        else:
651            package = ud.path.split("/")[-1]
652        current_version = ['', d.getVar('PV'), '']
653
654        """possible to have no version in pkg name, such as spectrum-fw"""
655        if not re.search(r"\d+", package):
656            current_version[1] = re.sub('_', '.', current_version[1])
657            current_version[1] = re.sub('-', '.', current_version[1])
658            bb.debug(3, "latest_versionstring: no version found in %s" % package)
659            return (current_version[1], '')
660
661        package_regex = self._init_regexes(package, ud, d)
662        if package_regex is None:
663            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
664            return ('', '')
665        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
666
667        uri = ""
668        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
669        if not regex_uri:
670            path = ud.path.split(package)[0]
671
672            # search for version matches on folders inside the path, like:
673            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
674            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
675            m = dirver_regex.findall(path)
676            if m:
677                pn = d.getVar('PN')
678                dirver = m[-1][0]
679
680                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
681                if not dirver_pn_regex.search(dirver):
682                    return (self._check_latest_version_by_dir(dirver,
683                        package, package_regex, current_version, ud, d), '')
684
685            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
686        else:
687            uri = regex_uri
688
689        return (self._check_latest_version(uri, package, package_regex,
690                current_version, ud, d), '')
691