xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision 96e4b4e121e0e2da1535d7d537d6a982a6ff5bc0)
1"""
2BitBake 'Fetch' implementations
3
4Classes for obtaining upstream sources for the
5BitBake build tools.
6
7"""
8
9# Copyright (C) 2003, 2004  Chris Larson
10#
11# SPDX-License-Identifier: GPL-2.0-only
12#
13# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14
15import shlex
16import re
17import tempfile
18import os
19import errno
20import bb
21import bb.progress
22import socket
23import http.client
24import urllib.request, urllib.parse, urllib.error
25from   bb.fetch2 import FetchMethod
26from   bb.fetch2 import FetchError
27from   bb.fetch2 import logger
28from   bb.fetch2 import runfetchcmd
29from   bs4 import BeautifulSoup
30from   bs4 import SoupStrainer
31
32class WgetProgressHandler(bb.progress.LineFilterProgressHandler):
33    """
34    Extract progress information from wget output.
35    Note: relies on --progress=dot (with -v or without -q/-nv) being
36    specified on the wget command line.
37    """
38    def __init__(self, d):
39        super(WgetProgressHandler, self).__init__(d)
40        # Send an initial progress event so the bar gets shown
41        self._fire_progress(0)
42
43    def writeline(self, line):
44        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
45        if percs:
46            progress = int(percs[-1][0])
47            rate = percs[-1][1] + '/s'
48            self.update(progress, rate)
49            return False
50        return True
51
52
53class Wget(FetchMethod):
54    """Class to fetch urls via 'wget'"""
55
56    def check_certs(self, d):
57        """
58        Should certificates be checked?
59        """
60        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
61
62    def supports(self, ud, d):
63        """
64        Check to see if a given url can be fetched with wget.
65        """
66        return ud.type in ['http', 'https', 'ftp', 'ftps']
67
68    def recommends_checksum(self, urldata):
69        return True
70
71    def urldata_init(self, ud, d):
72        if 'protocol' in ud.parm:
73            if ud.parm['protocol'] == 'git':
74                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
75
76        if 'downloadfilename' in ud.parm:
77            ud.basename = ud.parm['downloadfilename']
78        else:
79            ud.basename = os.path.basename(ud.path)
80
81        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
82        if not ud.localfile:
83            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
84
85        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 100"
86
87        if ud.type == 'ftp' or ud.type == 'ftps':
88            self.basecmd += " --passive-ftp"
89
90        if not self.check_certs(d):
91            self.basecmd += " --no-check-certificate"
92
93    def _runwget(self, ud, d, command, quiet, workdir=None):
94
95        progresshandler = WgetProgressHandler(d)
96
97        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
98        bb.fetch2.check_network_access(d, command, ud.url)
99        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
100
101    def download(self, ud, d):
102        """Fetch urls"""
103
104        fetchcmd = self.basecmd
105
106        dldir = os.path.realpath(d.getVar("DL_DIR"))
107        localpath = os.path.join(dldir, ud.localfile) + ".tmp"
108        bb.utils.mkdirhier(os.path.dirname(localpath))
109        fetchcmd += " -O %s" % shlex.quote(localpath)
110
111        if ud.user and ud.pswd:
112            fetchcmd += " --auth-no-challenge"
113            if ud.parm.get("redirectauth", "1") == "1":
114                # An undocumented feature of wget is that if the
115                # username/password are specified on the URI, wget will only
116                # send the Authorization header to the first host and not to
117                # any hosts that it is redirected to.  With the increasing
118                # usage of temporary AWS URLs, this difference now matters as
119                # AWS will reject any request that has authentication both in
120                # the query parameters (from the redirect) and in the
121                # Authorization header.
122                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
123
124        uri = ud.url.split(";")[0]
125        if os.path.exists(ud.localpath):
126            # file exists, but we didnt complete it.. trying again..
127            fetchcmd += " -c -P " + dldir + " '" + uri + "'"
128        else:
129            fetchcmd += " -P " + dldir + " '" + uri + "'"
130
131        self._runwget(ud, d, fetchcmd, False)
132
133        # Sanity check since wget can pretend it succeed when it didn't
134        # Also, this used to happen if sourceforge sent us to the mirror page
135        if not os.path.exists(localpath):
136            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, localpath), uri)
137
138        if os.path.getsize(localpath) == 0:
139            os.remove(localpath)
140            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
141
142        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
143        # original file, which might be a race (imagine two recipes referencing the same
144        # source, one with an incorrect checksum)
145        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
146
147        # Remove the ".tmp" and move the file into position atomically
148        # Our lock prevents multiple writers but mirroring code may grab incomplete files
149        os.rename(localpath, localpath[:-4])
150
151        return True
152
153    def checkstatus(self, fetch, ud, d, try_again=True):
154        class HTTPConnectionCache(http.client.HTTPConnection):
155            if fetch.connection_cache:
156                def connect(self):
157                    """Connect to the host and port specified in __init__."""
158
159                    sock = fetch.connection_cache.get_connection(self.host, self.port)
160                    if sock:
161                        self.sock = sock
162                    else:
163                        self.sock = socket.create_connection((self.host, self.port),
164                                    self.timeout, self.source_address)
165                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
166
167                    if self._tunnel_host:
168                        self._tunnel()
169
170        class CacheHTTPHandler(urllib.request.HTTPHandler):
171            def http_open(self, req):
172                return self.do_open(HTTPConnectionCache, req)
173
174            def do_open(self, http_class, req):
175                """Return an addinfourl object for the request, using http_class.
176
177                http_class must implement the HTTPConnection API from httplib.
178                The addinfourl return value is a file-like object.  It also
179                has methods and attributes including:
180                    - info(): return a mimetools.Message object for the headers
181                    - geturl(): return the original request URL
182                    - code: HTTP status code
183                """
184                host = req.host
185                if not host:
186                    raise urllib.error.URLError('no host given')
187
188                h = http_class(host, timeout=req.timeout) # will parse host:port
189                h.set_debuglevel(self._debuglevel)
190
191                headers = dict(req.unredirected_hdrs)
192                headers.update(dict((k, v) for k, v in list(req.headers.items())
193                            if k not in headers))
194
195                # We want to make an HTTP/1.1 request, but the addinfourl
196                # class isn't prepared to deal with a persistent connection.
197                # It will try to read all remaining data from the socket,
198                # which will block while the server waits for the next request.
199                # So make sure the connection gets closed after the (only)
200                # request.
201
202                # Don't close connection when connection_cache is enabled,
203                if fetch.connection_cache is None:
204                    headers["Connection"] = "close"
205                else:
206                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
207
208                headers = dict(
209                    (name.title(), val) for name, val in list(headers.items()))
210
211                if req._tunnel_host:
212                    tunnel_headers = {}
213                    proxy_auth_hdr = "Proxy-Authorization"
214                    if proxy_auth_hdr in headers:
215                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
216                        # Proxy-Authorization should not be sent to origin
217                        # server.
218                        del headers[proxy_auth_hdr]
219                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
220
221                try:
222                    h.request(req.get_method(), req.selector, req.data, headers)
223                except socket.error as err: # XXX what error?
224                    # Don't close connection when cache is enabled.
225                    # Instead, try to detect connections that are no longer
226                    # usable (for example, closed unexpectedly) and remove
227                    # them from the cache.
228                    if fetch.connection_cache is None:
229                        h.close()
230                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
231                        # This happens when the server closes the connection despite the Keep-Alive.
232                        # Apparently urllib then uses the file descriptor, expecting it to be
233                        # connected, when in reality the connection is already gone.
234                        # We let the request fail and expect it to be
235                        # tried once more ("try_again" in check_status()),
236                        # with the dead connection removed from the cache.
237                        # If it still fails, we give up, which can happen for bad
238                        # HTTP proxy settings.
239                        fetch.connection_cache.remove_connection(h.host, h.port)
240                    raise urllib.error.URLError(err)
241                else:
242                    try:
243                        r = h.getresponse()
244                    except TimeoutError as e:
245                        if fetch.connection_cache:
246                            fetch.connection_cache.remove_connection(h.host, h.port)
247                        raise TimeoutError(e)
248
249                # Pick apart the HTTPResponse object to get the addinfourl
250                # object initialized properly.
251
252                # Wrap the HTTPResponse object in socket's file object adapter
253                # for Windows.  That adapter calls recv(), so delegate recv()
254                # to read().  This weird wrapping allows the returned object to
255                # have readline() and readlines() methods.
256
257                # XXX It might be better to extract the read buffering code
258                # out of socket._fileobject() and into a base class.
259                r.recv = r.read
260
261                # no data, just have to read
262                r.read()
263                class fp_dummy(object):
264                    def read(self):
265                        return ""
266                    def readline(self):
267                        return ""
268                    def close(self):
269                        pass
270                    closed = False
271
272                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
273                resp.code = r.status
274                resp.msg = r.reason
275
276                # Close connection when server request it.
277                if fetch.connection_cache is not None:
278                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
279                        fetch.connection_cache.remove_connection(h.host, h.port)
280
281                return resp
282
283        class HTTPMethodFallback(urllib.request.BaseHandler):
284            """
285            Fallback to GET if HEAD is not allowed (405 HTTP error)
286            """
287            def http_error_405(self, req, fp, code, msg, headers):
288                fp.read()
289                fp.close()
290
291                if req.get_method() != 'GET':
292                    newheaders = dict((k, v) for k, v in list(req.headers.items())
293                                      if k.lower() not in ("content-length", "content-type"))
294                    return self.parent.open(urllib.request.Request(req.get_full_url(),
295                                                            headers=newheaders,
296                                                            origin_req_host=req.origin_req_host,
297                                                            unverifiable=True))
298
299                raise urllib.request.HTTPError(req, code, msg, headers, None)
300
301            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
302            # Forbidden when they actually mean 405 Method Not Allowed.
303            http_error_403 = http_error_405
304
305
306        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
307            """
308            urllib2.HTTPRedirectHandler before 3.13 has two flaws:
309
310            It resets the method to GET on redirect when we want to follow
311            redirects using the original method (typically HEAD). This was fixed
312            in 759e8e7.
313
314            It also doesn't handle 308 (Permanent Redirect). This was fixed in
315            c379bc5.
316
317            Until we depend on Python 3.13 onwards, copy the redirect_request
318            method to fix these issues.
319            """
320            def redirect_request(self, req, fp, code, msg, headers, newurl):
321                m = req.get_method()
322                if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
323                    or code in (301, 302, 303) and m == "POST")):
324                    raise urllib.HTTPError(req.full_url, code, msg, headers, fp)
325
326                # Strictly (according to RFC 2616), 301 or 302 in response to
327                # a POST MUST NOT cause a redirection without confirmation
328                # from the user (of urllib.request, in this case).  In practice,
329                # essentially all clients do redirect in this case, so we do
330                # the same.
331
332                # Be conciliant with URIs containing a space.  This is mainly
333                # redundant with the more complete encoding done in http_error_302(),
334                # but it is kept for compatibility with other callers.
335                newurl = newurl.replace(' ', '%20')
336
337                CONTENT_HEADERS = ("content-length", "content-type")
338                newheaders = {k: v for k, v in req.headers.items()
339                            if k.lower() not in CONTENT_HEADERS}
340                return urllib.request.Request(newurl,
341                            method="HEAD" if m == "HEAD" else "GET",
342                            headers=newheaders,
343                            origin_req_host=req.origin_req_host,
344                            unverifiable=True)
345
346            http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
347
348        # We need to update the environment here as both the proxy and HTTPS
349        # handlers need variables set. The proxy needs http_proxy and friends to
350        # be set, and HTTPSHandler ends up calling into openssl to load the
351        # certificates. In buildtools configurations this will be looking at the
352        # wrong place for certificates by default: we set SSL_CERT_FILE to the
353        # right location in the buildtools environment script but as BitBake
354        # prunes prunes the environment this is lost. When binaries are executed
355        # runfetchcmd ensures these values are in the environment, but this is
356        # pure Python so we need to update the environment.
357        #
358        # Avoid tramping the environment too much by using bb.utils.environment
359        # to scope the changes to the build_opener request, which is when the
360        # environment lookups happen.
361        newenv = bb.fetch2.get_fetcher_environment(d)
362
363        with bb.utils.environment(**newenv):
364            import ssl
365
366            if self.check_certs(d):
367                context = ssl.create_default_context()
368            else:
369                context = ssl._create_unverified_context()
370
371            handlers = [FixedHTTPRedirectHandler,
372                        HTTPMethodFallback,
373                        urllib.request.ProxyHandler(),
374                        CacheHTTPHandler(),
375                        urllib.request.HTTPSHandler(context=context)]
376            opener = urllib.request.build_opener(*handlers)
377
378            try:
379                parts = urllib.parse.urlparse(ud.url.split(";")[0])
380                uri = "{}://{}{}".format(parts.scheme, parts.netloc, parts.path)
381                r = urllib.request.Request(uri)
382                r.get_method = lambda: "HEAD"
383                # Some servers (FusionForge, as used on Alioth) require that the
384                # optional Accept header is set.
385                r.add_header("Accept", "*/*")
386                r.add_header("User-Agent", "bitbake/{}".format(bb.__version__))
387                def add_basic_auth(login_str, request):
388                    '''Adds Basic auth to http request, pass in login:password as string'''
389                    import base64
390                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
391                    authheader = "Basic %s" % encodeuser
392                    r.add_header("Authorization", authheader)
393
394                if ud.user and ud.pswd:
395                    add_basic_auth(ud.user + ':' + ud.pswd, r)
396
397                try:
398                    import netrc
399                    auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname)
400                    if auth_data:
401                        login, _, password = auth_data
402                        add_basic_auth("%s:%s" % (login, password), r)
403                except (FileNotFoundError, netrc.NetrcParseError):
404                    pass
405
406                with opener.open(r, timeout=100) as response:
407                    pass
408            except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e:
409                if try_again:
410                    logger.debug2("checkstatus: trying again")
411                    return self.checkstatus(fetch, ud, d, False)
412                else:
413                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
414                    logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e))
415                    return False
416
417        return True
418
419    def _parse_path(self, regex, s):
420        """
421        Find and group name, version and archive type in the given string s
422        """
423
424        m = regex.search(s)
425        if m:
426            pname = ''
427            pver = ''
428            ptype = ''
429
430            mdict = m.groupdict()
431            if 'name' in mdict.keys():
432                pname = mdict['name']
433            if 'pver' in mdict.keys():
434                pver = mdict['pver']
435            if 'type' in mdict.keys():
436                ptype = mdict['type']
437
438            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
439
440            return (pname, pver, ptype)
441
442        return None
443
444    def _modelate_version(self, version):
445        if version[0] in ['.', '-']:
446            if version[1].isdigit():
447                version = version[1] + version[0] + version[2:len(version)]
448            else:
449                version = version[1:len(version)]
450
451        version = re.sub('-', '.', version)
452        version = re.sub('_', '.', version)
453        version = re.sub('(rc)+', '.1000.', version)
454        version = re.sub('(beta)+', '.100.', version)
455        version = re.sub('(alpha)+', '.10.', version)
456        if version[0] == 'v':
457            version = version[1:len(version)]
458        return version
459
460    def _vercmp(self, old, new):
461        """
462        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
463        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
464        for simplicity as it's somehow difficult to get from various upstream format
465        """
466
467        (oldpn, oldpv, oldsuffix) = old
468        (newpn, newpv, newsuffix) = new
469
470        # Check for a new suffix type that we have never heard of before
471        if newsuffix:
472            m = self.suffix_regex_comp.search(newsuffix)
473            if not m:
474                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
475                return False
476
477        # Not our package so ignore it
478        if oldpn != newpn:
479            return False
480
481        oldpv = self._modelate_version(oldpv)
482        newpv = self._modelate_version(newpv)
483
484        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
485
486    def _fetch_index(self, uri, ud, d):
487        """
488        Run fetch checkstatus to get directory information
489        """
490        f = tempfile.NamedTemporaryFile()
491        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
492            fetchcmd = self.basecmd
493            fetchcmd += " -O " + f.name + " '" + uri + "'"
494            try:
495                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
496                fetchresult = f.read()
497            except bb.fetch2.BBFetchException:
498                fetchresult = ""
499
500        return fetchresult
501
502    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
503        """
504        Return the latest version of a package inside a given directory path
505        If error or no version, return ""
506        """
507        valid = 0
508        version = ['', '', '']
509
510        bb.debug(3, "VersionURL: %s" % (url))
511        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
512        if not soup:
513            bb.debug(3, "*** %s NO SOUP" % (url))
514            return ""
515
516        for line in soup.find_all('a', href=True):
517            bb.debug(3, "line['href'] = '%s'" % (line['href']))
518            bb.debug(3, "line = '%s'" % (str(line)))
519
520            newver = self._parse_path(package_regex, line['href'])
521            if not newver:
522                newver = self._parse_path(package_regex, str(line))
523
524            if newver:
525                bb.debug(3, "Upstream version found: %s" % newver[1])
526                if valid == 0:
527                    version = newver
528                    valid = 1
529                elif self._vercmp(version, newver) < 0:
530                    version = newver
531
532        pupver = re.sub('_', '.', version[1])
533
534        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
535                (package, pupver or "N/A", current_version[1]))
536
537        if valid:
538            return pupver
539
540        return ""
541
542    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
543        """
544        Scan every directory in order to get upstream version.
545        """
546        version_dir = ['', '', '']
547        version = ['', '', '']
548
549        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
550        s = dirver_regex.search(dirver)
551        if s:
552            version_dir[1] = s.group('ver')
553        else:
554            version_dir[1] = dirver
555
556        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
557                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
558        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
559
560        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
561        if not soup:
562            return version[1]
563
564        for line in soup.find_all('a', href=True):
565            s = dirver_regex.search(line['href'].strip("/"))
566            if s:
567                sver = s.group('ver')
568
569                # When prefix is part of the version directory it need to
570                # ensure that only version directory is used so remove previous
571                # directories if exists.
572                #
573                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
574                # result is v2.5.
575                spfx = s.group('pfx').split('/')[-1]
576
577                version_dir_new = ['', sver, '']
578                if self._vercmp(version_dir, version_dir_new) <= 0:
579                    dirver_new = spfx + sver
580                    path = ud.path.replace(dirver, dirver_new, True) \
581                        .split(package)[0]
582                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
583                        ud.user, ud.pswd, {}])
584
585                    pupver = self._check_latest_version(uri,
586                            package, package_regex, current_version, ud, d)
587                    if pupver:
588                        version[1] = pupver
589
590                    version_dir = version_dir_new
591
592        return version[1]
593
594    def _init_regexes(self, package, ud, d):
595        """
596        Match as many patterns as possible such as:
597                gnome-common-2.20.0.tar.gz (most common format)
598                gtk+-2.90.1.tar.gz
599                xf86-input-synaptics-12.6.9.tar.gz
600                dri2proto-2.3.tar.gz
601                blktool_4.orig.tar.gz
602                libid3tag-0.15.1b.tar.gz
603                unzip552.tar.gz
604                icu4c-3_6-src.tgz
605                genext2fs_1.3.orig.tar.gz
606                gst-fluendo-mp3
607        """
608        # match most patterns which uses "-" as separator to version digits
609        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
610        # a loose pattern such as for unzip552.tar.gz
611        pn_prefix2 = r"[a-zA-Z]+"
612        # a loose pattern such as for 80325-quicky-0.4.tar.gz
613        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
614        # Save the Package Name (pn) Regex for use later
615        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
616
617        # match version
618        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
619
620        # match arch
621        parch_regex = "-source|_all_"
622
623        # src.rpm extension was added only for rpm package. Can be removed if the rpm
624        # packaged will always be considered as having to be manually upgraded
625        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
626
627        # match name, version and archive type of a package
628        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
629                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
630        self.suffix_regex_comp = re.compile(psuffix_regex)
631
632        # compile regex, can be specific by package or generic regex
633        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
634        if pn_regex:
635            package_custom_regex_comp = re.compile(pn_regex)
636        else:
637            version = self._parse_path(package_regex_comp, package)
638            if version:
639                package_custom_regex_comp = re.compile(
640                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
641                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
642            else:
643                package_custom_regex_comp = None
644
645        return package_custom_regex_comp
646
647    def latest_versionstring(self, ud, d):
648        """
649        Manipulate the URL and try to obtain the latest package version
650
651        sanity check to ensure same name and type.
652        """
653        package = ud.path.split("/")[-1]
654        current_version = ['', d.getVar('PV'), '']
655
656        """possible to have no version in pkg name, such as spectrum-fw"""
657        if not re.search(r"\d+", package):
658            current_version[1] = re.sub('_', '.', current_version[1])
659            current_version[1] = re.sub('-', '.', current_version[1])
660            return (current_version[1], '')
661
662        package_regex = self._init_regexes(package, ud, d)
663        if package_regex is None:
664            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
665            return ('', '')
666        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
667
668        uri = ""
669        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
670        if not regex_uri:
671            path = ud.path.split(package)[0]
672
673            # search for version matches on folders inside the path, like:
674            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
675            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
676            m = dirver_regex.findall(path)
677            if m:
678                pn = d.getVar('PN')
679                dirver = m[-1][0]
680
681                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
682                if not dirver_pn_regex.search(dirver):
683                    return (self._check_latest_version_by_dir(dirver,
684                        package, package_regex, current_version, ud, d), '')
685
686            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
687        else:
688            uri = regex_uri
689
690        return (self._check_latest_version(uri, package, package_regex,
691                current_version, ud, d), '')
692