xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision 82c905dc)
1eb8dc403SDave Cobbley"""
2eb8dc403SDave CobbleyBitBake 'Fetch' implementations
3eb8dc403SDave Cobbley
4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the
5eb8dc403SDave CobbleyBitBake build tools.
6eb8dc403SDave Cobbley
7eb8dc403SDave Cobbley"""
8eb8dc403SDave Cobbley
9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004  Chris Larson
10eb8dc403SDave Cobbley#
11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only
12eb8dc403SDave Cobbley#
13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14eb8dc403SDave Cobbley
15*82c905dcSAndrew Geisslerimport shlex
16eb8dc403SDave Cobbleyimport re
17eb8dc403SDave Cobbleyimport tempfile
18eb8dc403SDave Cobbleyimport os
19eb8dc403SDave Cobbleyimport errno
20eb8dc403SDave Cobbleyimport bb
21eb8dc403SDave Cobbleyimport bb.progress
2219323693SBrad Bishopimport socket
2319323693SBrad Bishopimport http.client
24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error
25eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchMethod
26eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchError
27eb8dc403SDave Cobbleyfrom   bb.fetch2 import logger
28eb8dc403SDave Cobbleyfrom   bb.fetch2 import runfetchcmd
29eb8dc403SDave Cobbleyfrom   bb.utils import export_proxies
30eb8dc403SDave Cobbleyfrom   bs4 import BeautifulSoup
31eb8dc403SDave Cobbleyfrom   bs4 import SoupStrainer
32eb8dc403SDave Cobbley
33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34eb8dc403SDave Cobbley    """
35eb8dc403SDave Cobbley    Extract progress information from wget output.
36eb8dc403SDave Cobbley    Note: relies on --progress=dot (with -v or without -q/-nv) being
37eb8dc403SDave Cobbley    specified on the wget command line.
38eb8dc403SDave Cobbley    """
39eb8dc403SDave Cobbley    def __init__(self, d):
40eb8dc403SDave Cobbley        super(WgetProgressHandler, self).__init__(d)
41eb8dc403SDave Cobbley        # Send an initial progress event so the bar gets shown
42eb8dc403SDave Cobbley        self._fire_progress(0)
43eb8dc403SDave Cobbley
44eb8dc403SDave Cobbley    def writeline(self, line):
45eb8dc403SDave Cobbley        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46eb8dc403SDave Cobbley        if percs:
47eb8dc403SDave Cobbley            progress = int(percs[-1][0])
48eb8dc403SDave Cobbley            rate = percs[-1][1] + '/s'
49eb8dc403SDave Cobbley            self.update(progress, rate)
50eb8dc403SDave Cobbley            return False
51eb8dc403SDave Cobbley        return True
52eb8dc403SDave Cobbley
53eb8dc403SDave Cobbley
54eb8dc403SDave Cobbleyclass Wget(FetchMethod):
55eb8dc403SDave Cobbley    """Class to fetch urls via 'wget'"""
56eb8dc403SDave Cobbley    def supports(self, ud, d):
57eb8dc403SDave Cobbley        """
58eb8dc403SDave Cobbley        Check to see if a given url can be fetched with wget.
59eb8dc403SDave Cobbley        """
60eb8dc403SDave Cobbley        return ud.type in ['http', 'https', 'ftp']
61eb8dc403SDave Cobbley
62eb8dc403SDave Cobbley    def recommends_checksum(self, urldata):
63eb8dc403SDave Cobbley        return True
64eb8dc403SDave Cobbley
65eb8dc403SDave Cobbley    def urldata_init(self, ud, d):
66eb8dc403SDave Cobbley        if 'protocol' in ud.parm:
67eb8dc403SDave Cobbley            if ud.parm['protocol'] == 'git':
68eb8dc403SDave Cobbley                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
69eb8dc403SDave Cobbley
70eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
71eb8dc403SDave Cobbley            ud.basename = ud.parm['downloadfilename']
72eb8dc403SDave Cobbley        else:
73eb8dc403SDave Cobbley            ud.basename = os.path.basename(ud.path)
74eb8dc403SDave Cobbley
75eb8dc403SDave Cobbley        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
76eb8dc403SDave Cobbley        if not ud.localfile:
77eb8dc403SDave Cobbley            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
78eb8dc403SDave Cobbley
79eb8dc403SDave Cobbley        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
80eb8dc403SDave Cobbley
81eb8dc403SDave Cobbley    def _runwget(self, ud, d, command, quiet, workdir=None):
82eb8dc403SDave Cobbley
83eb8dc403SDave Cobbley        progresshandler = WgetProgressHandler(d)
84eb8dc403SDave Cobbley
85eb8dc403SDave Cobbley        logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
86eb8dc403SDave Cobbley        bb.fetch2.check_network_access(d, command, ud.url)
87eb8dc403SDave Cobbley        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
88eb8dc403SDave Cobbley
89eb8dc403SDave Cobbley    def download(self, ud, d):
90eb8dc403SDave Cobbley        """Fetch urls"""
91eb8dc403SDave Cobbley
92eb8dc403SDave Cobbley        fetchcmd = self.basecmd
93eb8dc403SDave Cobbley
94eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
95*82c905dcSAndrew Geissler            localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
96*82c905dcSAndrew Geissler            bb.utils.mkdirhier(os.path.dirname(localpath))
97*82c905dcSAndrew Geissler            fetchcmd += " -O %s" % shlex.quote(localpath)
98eb8dc403SDave Cobbley
99eb8dc403SDave Cobbley        if ud.user and ud.pswd:
100eb8dc403SDave Cobbley            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
101eb8dc403SDave Cobbley
102eb8dc403SDave Cobbley        uri = ud.url.split(";")[0]
103eb8dc403SDave Cobbley        if os.path.exists(ud.localpath):
104eb8dc403SDave Cobbley            # file exists, but we didnt complete it.. trying again..
105eb8dc403SDave Cobbley            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
106eb8dc403SDave Cobbley        else:
107eb8dc403SDave Cobbley            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
108eb8dc403SDave Cobbley
109eb8dc403SDave Cobbley        self._runwget(ud, d, fetchcmd, False)
110eb8dc403SDave Cobbley
111eb8dc403SDave Cobbley        # Sanity check since wget can pretend it succeed when it didn't
112eb8dc403SDave Cobbley        # Also, this used to happen if sourceforge sent us to the mirror page
113eb8dc403SDave Cobbley        if not os.path.exists(ud.localpath):
114eb8dc403SDave Cobbley            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
115eb8dc403SDave Cobbley
116eb8dc403SDave Cobbley        if os.path.getsize(ud.localpath) == 0:
117eb8dc403SDave Cobbley            os.remove(ud.localpath)
118eb8dc403SDave Cobbley            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
119eb8dc403SDave Cobbley
120eb8dc403SDave Cobbley        return True
121eb8dc403SDave Cobbley
122eb8dc403SDave Cobbley    def checkstatus(self, fetch, ud, d, try_again=True):
123eb8dc403SDave Cobbley        class HTTPConnectionCache(http.client.HTTPConnection):
124eb8dc403SDave Cobbley            if fetch.connection_cache:
125eb8dc403SDave Cobbley                def connect(self):
126eb8dc403SDave Cobbley                    """Connect to the host and port specified in __init__."""
127eb8dc403SDave Cobbley
128eb8dc403SDave Cobbley                    sock = fetch.connection_cache.get_connection(self.host, self.port)
129eb8dc403SDave Cobbley                    if sock:
130eb8dc403SDave Cobbley                        self.sock = sock
131eb8dc403SDave Cobbley                    else:
132eb8dc403SDave Cobbley                        self.sock = socket.create_connection((self.host, self.port),
133eb8dc403SDave Cobbley                                    self.timeout, self.source_address)
134eb8dc403SDave Cobbley                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
135eb8dc403SDave Cobbley
136eb8dc403SDave Cobbley                    if self._tunnel_host:
137eb8dc403SDave Cobbley                        self._tunnel()
138eb8dc403SDave Cobbley
139eb8dc403SDave Cobbley        class CacheHTTPHandler(urllib.request.HTTPHandler):
140eb8dc403SDave Cobbley            def http_open(self, req):
141eb8dc403SDave Cobbley                return self.do_open(HTTPConnectionCache, req)
142eb8dc403SDave Cobbley
143eb8dc403SDave Cobbley            def do_open(self, http_class, req):
144eb8dc403SDave Cobbley                """Return an addinfourl object for the request, using http_class.
145eb8dc403SDave Cobbley
146eb8dc403SDave Cobbley                http_class must implement the HTTPConnection API from httplib.
147eb8dc403SDave Cobbley                The addinfourl return value is a file-like object.  It also
148eb8dc403SDave Cobbley                has methods and attributes including:
149eb8dc403SDave Cobbley                    - info(): return a mimetools.Message object for the headers
150eb8dc403SDave Cobbley                    - geturl(): return the original request URL
151eb8dc403SDave Cobbley                    - code: HTTP status code
152eb8dc403SDave Cobbley                """
153eb8dc403SDave Cobbley                host = req.host
154eb8dc403SDave Cobbley                if not host:
15519323693SBrad Bishop                    raise urllib.error.URLError('no host given')
156eb8dc403SDave Cobbley
157eb8dc403SDave Cobbley                h = http_class(host, timeout=req.timeout) # will parse host:port
158eb8dc403SDave Cobbley                h.set_debuglevel(self._debuglevel)
159eb8dc403SDave Cobbley
160eb8dc403SDave Cobbley                headers = dict(req.unredirected_hdrs)
161eb8dc403SDave Cobbley                headers.update(dict((k, v) for k, v in list(req.headers.items())
162eb8dc403SDave Cobbley                            if k not in headers))
163eb8dc403SDave Cobbley
164eb8dc403SDave Cobbley                # We want to make an HTTP/1.1 request, but the addinfourl
165eb8dc403SDave Cobbley                # class isn't prepared to deal with a persistent connection.
166eb8dc403SDave Cobbley                # It will try to read all remaining data from the socket,
167eb8dc403SDave Cobbley                # which will block while the server waits for the next request.
168eb8dc403SDave Cobbley                # So make sure the connection gets closed after the (only)
169eb8dc403SDave Cobbley                # request.
170eb8dc403SDave Cobbley
171eb8dc403SDave Cobbley                # Don't close connection when connection_cache is enabled,
172eb8dc403SDave Cobbley                if fetch.connection_cache is None:
173eb8dc403SDave Cobbley                    headers["Connection"] = "close"
174eb8dc403SDave Cobbley                else:
175eb8dc403SDave Cobbley                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
176eb8dc403SDave Cobbley
177eb8dc403SDave Cobbley                headers = dict(
178eb8dc403SDave Cobbley                    (name.title(), val) for name, val in list(headers.items()))
179eb8dc403SDave Cobbley
180eb8dc403SDave Cobbley                if req._tunnel_host:
181eb8dc403SDave Cobbley                    tunnel_headers = {}
182eb8dc403SDave Cobbley                    proxy_auth_hdr = "Proxy-Authorization"
183eb8dc403SDave Cobbley                    if proxy_auth_hdr in headers:
184eb8dc403SDave Cobbley                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
185eb8dc403SDave Cobbley                        # Proxy-Authorization should not be sent to origin
186eb8dc403SDave Cobbley                        # server.
187eb8dc403SDave Cobbley                        del headers[proxy_auth_hdr]
188eb8dc403SDave Cobbley                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
189eb8dc403SDave Cobbley
190eb8dc403SDave Cobbley                try:
191eb8dc403SDave Cobbley                    h.request(req.get_method(), req.selector, req.data, headers)
192eb8dc403SDave Cobbley                except socket.error as err: # XXX what error?
193eb8dc403SDave Cobbley                    # Don't close connection when cache is enabled.
194eb8dc403SDave Cobbley                    # Instead, try to detect connections that are no longer
195eb8dc403SDave Cobbley                    # usable (for example, closed unexpectedly) and remove
196eb8dc403SDave Cobbley                    # them from the cache.
197eb8dc403SDave Cobbley                    if fetch.connection_cache is None:
198eb8dc403SDave Cobbley                        h.close()
199eb8dc403SDave Cobbley                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
200eb8dc403SDave Cobbley                        # This happens when the server closes the connection despite the Keep-Alive.
201eb8dc403SDave Cobbley                        # Apparently urllib then uses the file descriptor, expecting it to be
202eb8dc403SDave Cobbley                        # connected, when in reality the connection is already gone.
203eb8dc403SDave Cobbley                        # We let the request fail and expect it to be
204eb8dc403SDave Cobbley                        # tried once more ("try_again" in check_status()),
205eb8dc403SDave Cobbley                        # with the dead connection removed from the cache.
206eb8dc403SDave Cobbley                        # If it still fails, we give up, which can happend for bad
207eb8dc403SDave Cobbley                        # HTTP proxy settings.
208eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
209eb8dc403SDave Cobbley                    raise urllib.error.URLError(err)
210eb8dc403SDave Cobbley                else:
211eb8dc403SDave Cobbley                    try:
212eb8dc403SDave Cobbley                        r = h.getresponse(buffering=True)
213eb8dc403SDave Cobbley                    except TypeError: # buffering kw not supported
214eb8dc403SDave Cobbley                        r = h.getresponse()
215eb8dc403SDave Cobbley
216eb8dc403SDave Cobbley                # Pick apart the HTTPResponse object to get the addinfourl
217eb8dc403SDave Cobbley                # object initialized properly.
218eb8dc403SDave Cobbley
219eb8dc403SDave Cobbley                # Wrap the HTTPResponse object in socket's file object adapter
220eb8dc403SDave Cobbley                # for Windows.  That adapter calls recv(), so delegate recv()
221eb8dc403SDave Cobbley                # to read().  This weird wrapping allows the returned object to
222eb8dc403SDave Cobbley                # have readline() and readlines() methods.
223eb8dc403SDave Cobbley
224eb8dc403SDave Cobbley                # XXX It might be better to extract the read buffering code
225eb8dc403SDave Cobbley                # out of socket._fileobject() and into a base class.
226eb8dc403SDave Cobbley                r.recv = r.read
227eb8dc403SDave Cobbley
228eb8dc403SDave Cobbley                # no data, just have to read
229eb8dc403SDave Cobbley                r.read()
230eb8dc403SDave Cobbley                class fp_dummy(object):
231eb8dc403SDave Cobbley                    def read(self):
232eb8dc403SDave Cobbley                        return ""
233eb8dc403SDave Cobbley                    def readline(self):
234eb8dc403SDave Cobbley                        return ""
235eb8dc403SDave Cobbley                    def close(self):
236eb8dc403SDave Cobbley                        pass
237eb8dc403SDave Cobbley                    closed = False
238eb8dc403SDave Cobbley
23919323693SBrad Bishop                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
240eb8dc403SDave Cobbley                resp.code = r.status
241eb8dc403SDave Cobbley                resp.msg = r.reason
242eb8dc403SDave Cobbley
243eb8dc403SDave Cobbley                # Close connection when server request it.
244eb8dc403SDave Cobbley                if fetch.connection_cache is not None:
245eb8dc403SDave Cobbley                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
246eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
247eb8dc403SDave Cobbley
248eb8dc403SDave Cobbley                return resp
249eb8dc403SDave Cobbley
250eb8dc403SDave Cobbley        class HTTPMethodFallback(urllib.request.BaseHandler):
251eb8dc403SDave Cobbley            """
252eb8dc403SDave Cobbley            Fallback to GET if HEAD is not allowed (405 HTTP error)
253eb8dc403SDave Cobbley            """
254eb8dc403SDave Cobbley            def http_error_405(self, req, fp, code, msg, headers):
255eb8dc403SDave Cobbley                fp.read()
256eb8dc403SDave Cobbley                fp.close()
257eb8dc403SDave Cobbley
25808902b01SBrad Bishop                if req.get_method() != 'GET':
259eb8dc403SDave Cobbley                    newheaders = dict((k, v) for k, v in list(req.headers.items())
260eb8dc403SDave Cobbley                                      if k.lower() not in ("content-length", "content-type"))
261eb8dc403SDave Cobbley                    return self.parent.open(urllib.request.Request(req.get_full_url(),
262eb8dc403SDave Cobbley                                                            headers=newheaders,
263eb8dc403SDave Cobbley                                                            origin_req_host=req.origin_req_host,
264eb8dc403SDave Cobbley                                                            unverifiable=True))
265eb8dc403SDave Cobbley
26608902b01SBrad Bishop                raise urllib.request.HTTPError(req, code, msg, headers, None)
26719323693SBrad Bishop
26819323693SBrad Bishop            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
26919323693SBrad Bishop            # Forbidden when they actually mean 405 Method Not Allowed.
270eb8dc403SDave Cobbley            http_error_403 = http_error_405
271eb8dc403SDave Cobbley
272eb8dc403SDave Cobbley
273eb8dc403SDave Cobbley        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
274eb8dc403SDave Cobbley            """
275eb8dc403SDave Cobbley            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
276eb8dc403SDave Cobbley            when we want to follow redirects using the original method.
277eb8dc403SDave Cobbley            """
278eb8dc403SDave Cobbley            def redirect_request(self, req, fp, code, msg, headers, newurl):
279eb8dc403SDave Cobbley                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
28019323693SBrad Bishop                newreq.get_method = req.get_method
281eb8dc403SDave Cobbley                return newreq
282eb8dc403SDave Cobbley        exported_proxies = export_proxies(d)
283eb8dc403SDave Cobbley
284eb8dc403SDave Cobbley        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
28519323693SBrad Bishop        if exported_proxies:
286eb8dc403SDave Cobbley            handlers.append(urllib.request.ProxyHandler())
287eb8dc403SDave Cobbley        handlers.append(CacheHTTPHandler())
28819323693SBrad Bishop        # Since Python 2.7.9 ssl cert validation is enabled by default
289eb8dc403SDave Cobbley        # see PEP-0476, this causes verification errors on some https servers
290eb8dc403SDave Cobbley        # so disable by default.
291eb8dc403SDave Cobbley        import ssl
292eb8dc403SDave Cobbley        if hasattr(ssl, '_create_unverified_context'):
293eb8dc403SDave Cobbley            handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
294eb8dc403SDave Cobbley        opener = urllib.request.build_opener(*handlers)
295eb8dc403SDave Cobbley
296eb8dc403SDave Cobbley        try:
297eb8dc403SDave Cobbley            uri = ud.url.split(";")[0]
298eb8dc403SDave Cobbley            r = urllib.request.Request(uri)
299eb8dc403SDave Cobbley            r.get_method = lambda: "HEAD"
300eb8dc403SDave Cobbley            # Some servers (FusionForge, as used on Alioth) require that the
301eb8dc403SDave Cobbley            # optional Accept header is set.
302eb8dc403SDave Cobbley            r.add_header("Accept", "*/*")
303*82c905dcSAndrew Geissler            r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12")
304eb8dc403SDave Cobbley            def add_basic_auth(login_str, request):
305eb8dc403SDave Cobbley                '''Adds Basic auth to http request, pass in login:password as string'''
306eb8dc403SDave Cobbley                import base64
307eb8dc403SDave Cobbley                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
308eb8dc403SDave Cobbley                authheader = "Basic %s" % encodeuser
309eb8dc403SDave Cobbley                r.add_header("Authorization", authheader)
310eb8dc403SDave Cobbley
31119323693SBrad Bishop            if ud.user and ud.pswd:
31219323693SBrad Bishop                add_basic_auth(ud.user + ':' + ud.pswd, r)
313eb8dc403SDave Cobbley
314eb8dc403SDave Cobbley            try:
31519323693SBrad Bishop                import netrc
316eb8dc403SDave Cobbley                n = netrc.netrc()
317eb8dc403SDave Cobbley                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
318eb8dc403SDave Cobbley                add_basic_auth("%s:%s" % (login, password), r)
319eb8dc403SDave Cobbley            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
320eb8dc403SDave Cobbley                pass
321eb8dc403SDave Cobbley
322eb8dc403SDave Cobbley            with opener.open(r) as response:
323eb8dc403SDave Cobbley                pass
324eb8dc403SDave Cobbley        except urllib.error.URLError as e:
325eb8dc403SDave Cobbley            if try_again:
326eb8dc403SDave Cobbley                logger.debug(2, "checkstatus: trying again")
327eb8dc403SDave Cobbley                return self.checkstatus(fetch, ud, d, False)
328eb8dc403SDave Cobbley            else:
329eb8dc403SDave Cobbley                # debug for now to avoid spamming the logs in e.g. remote sstate searches
330eb8dc403SDave Cobbley                logger.debug(2, "checkstatus() urlopen failed: %s" % e)
331eb8dc403SDave Cobbley                return False
332eb8dc403SDave Cobbley        return True
333eb8dc403SDave Cobbley
334eb8dc403SDave Cobbley    def _parse_path(self, regex, s):
335eb8dc403SDave Cobbley        """
336eb8dc403SDave Cobbley        Find and group name, version and archive type in the given string s
337eb8dc403SDave Cobbley        """
338eb8dc403SDave Cobbley
339eb8dc403SDave Cobbley        m = regex.search(s)
340eb8dc403SDave Cobbley        if m:
341eb8dc403SDave Cobbley            pname = ''
342eb8dc403SDave Cobbley            pver = ''
343eb8dc403SDave Cobbley            ptype = ''
344eb8dc403SDave Cobbley
345eb8dc403SDave Cobbley            mdict = m.groupdict()
346eb8dc403SDave Cobbley            if 'name' in mdict.keys():
347eb8dc403SDave Cobbley                pname = mdict['name']
348eb8dc403SDave Cobbley            if 'pver' in mdict.keys():
349eb8dc403SDave Cobbley                pver = mdict['pver']
350eb8dc403SDave Cobbley            if 'type' in mdict.keys():
351eb8dc403SDave Cobbley                ptype = mdict['type']
352eb8dc403SDave Cobbley
353eb8dc403SDave Cobbley            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
354eb8dc403SDave Cobbley
355eb8dc403SDave Cobbley            return (pname, pver, ptype)
356eb8dc403SDave Cobbley
357eb8dc403SDave Cobbley        return None
358eb8dc403SDave Cobbley
359eb8dc403SDave Cobbley    def _modelate_version(self, version):
360eb8dc403SDave Cobbley        if version[0] in ['.', '-']:
361eb8dc403SDave Cobbley            if version[1].isdigit():
362eb8dc403SDave Cobbley                version = version[1] + version[0] + version[2:len(version)]
363eb8dc403SDave Cobbley            else:
364eb8dc403SDave Cobbley                version = version[1:len(version)]
365eb8dc403SDave Cobbley
366eb8dc403SDave Cobbley        version = re.sub('-', '.', version)
367eb8dc403SDave Cobbley        version = re.sub('_', '.', version)
368eb8dc403SDave Cobbley        version = re.sub('(rc)+', '.1000.', version)
369eb8dc403SDave Cobbley        version = re.sub('(beta)+', '.100.', version)
370eb8dc403SDave Cobbley        version = re.sub('(alpha)+', '.10.', version)
371eb8dc403SDave Cobbley        if version[0] == 'v':
372eb8dc403SDave Cobbley            version = version[1:len(version)]
373eb8dc403SDave Cobbley        return version
374eb8dc403SDave Cobbley
375eb8dc403SDave Cobbley    def _vercmp(self, old, new):
376eb8dc403SDave Cobbley        """
377eb8dc403SDave Cobbley        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
378eb8dc403SDave Cobbley        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
379eb8dc403SDave Cobbley        for simplicity as it's somehow difficult to get from various upstream format
380eb8dc403SDave Cobbley        """
381eb8dc403SDave Cobbley
382eb8dc403SDave Cobbley        (oldpn, oldpv, oldsuffix) = old
383eb8dc403SDave Cobbley        (newpn, newpv, newsuffix) = new
384eb8dc403SDave Cobbley
38519323693SBrad Bishop        # Check for a new suffix type that we have never heard of before
38619323693SBrad Bishop        if newsuffix:
387eb8dc403SDave Cobbley            m = self.suffix_regex_comp.search(newsuffix)
388eb8dc403SDave Cobbley            if not m:
389eb8dc403SDave Cobbley                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
390eb8dc403SDave Cobbley                return False
391eb8dc403SDave Cobbley
39219323693SBrad Bishop        # Not our package so ignore it
393eb8dc403SDave Cobbley        if oldpn != newpn:
394eb8dc403SDave Cobbley            return False
395eb8dc403SDave Cobbley
396eb8dc403SDave Cobbley        oldpv = self._modelate_version(oldpv)
397eb8dc403SDave Cobbley        newpv = self._modelate_version(newpv)
398eb8dc403SDave Cobbley
399eb8dc403SDave Cobbley        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
400eb8dc403SDave Cobbley
401eb8dc403SDave Cobbley    def _fetch_index(self, uri, ud, d):
402eb8dc403SDave Cobbley        """
403eb8dc403SDave Cobbley        Run fetch checkstatus to get directory information
404eb8dc403SDave Cobbley        """
405eb8dc403SDave Cobbley        f = tempfile.NamedTemporaryFile()
406eb8dc403SDave Cobbley        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
407eb8dc403SDave Cobbley            agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
408eb8dc403SDave Cobbley            fetchcmd = self.basecmd
409eb8dc403SDave Cobbley            fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
410eb8dc403SDave Cobbley            try:
411eb8dc403SDave Cobbley                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
412eb8dc403SDave Cobbley                fetchresult = f.read()
413eb8dc403SDave Cobbley            except bb.fetch2.BBFetchException:
414eb8dc403SDave Cobbley                fetchresult = ""
415eb8dc403SDave Cobbley
416eb8dc403SDave Cobbley        return fetchresult
417eb8dc403SDave Cobbley
418eb8dc403SDave Cobbley    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
419eb8dc403SDave Cobbley        """
420eb8dc403SDave Cobbley        Return the latest version of a package inside a given directory path
421eb8dc403SDave Cobbley        If error or no version, return ""
422eb8dc403SDave Cobbley        """
423eb8dc403SDave Cobbley        valid = 0
424eb8dc403SDave Cobbley        version = ['', '', '']
425eb8dc403SDave Cobbley
426eb8dc403SDave Cobbley        bb.debug(3, "VersionURL: %s" % (url))
427eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
428eb8dc403SDave Cobbley        if not soup:
429eb8dc403SDave Cobbley            bb.debug(3, "*** %s NO SOUP" % (url))
430eb8dc403SDave Cobbley            return ""
431eb8dc403SDave Cobbley
432eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
433eb8dc403SDave Cobbley            bb.debug(3, "line['href'] = '%s'" % (line['href']))
434eb8dc403SDave Cobbley            bb.debug(3, "line = '%s'" % (str(line)))
435eb8dc403SDave Cobbley
436eb8dc403SDave Cobbley            newver = self._parse_path(package_regex, line['href'])
437eb8dc403SDave Cobbley            if not newver:
438eb8dc403SDave Cobbley                newver = self._parse_path(package_regex, str(line))
439eb8dc403SDave Cobbley
440eb8dc403SDave Cobbley            if newver:
441eb8dc403SDave Cobbley                bb.debug(3, "Upstream version found: %s" % newver[1])
442eb8dc403SDave Cobbley                if valid == 0:
443eb8dc403SDave Cobbley                    version = newver
444eb8dc403SDave Cobbley                    valid = 1
445eb8dc403SDave Cobbley                elif self._vercmp(version, newver) < 0:
446eb8dc403SDave Cobbley                    version = newver
447eb8dc403SDave Cobbley
448eb8dc403SDave Cobbley        pupver = re.sub('_', '.', version[1])
449eb8dc403SDave Cobbley
450eb8dc403SDave Cobbley        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
451eb8dc403SDave Cobbley                (package, pupver or "N/A", current_version[1]))
452eb8dc403SDave Cobbley
453eb8dc403SDave Cobbley        if valid:
454eb8dc403SDave Cobbley            return pupver
455eb8dc403SDave Cobbley
456eb8dc403SDave Cobbley        return ""
457eb8dc403SDave Cobbley
45819323693SBrad Bishop    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
459eb8dc403SDave Cobbley        """
460eb8dc403SDave Cobbley        Scan every directory in order to get upstream version.
461eb8dc403SDave Cobbley        """
462eb8dc403SDave Cobbley        version_dir = ['', '', '']
463eb8dc403SDave Cobbley        version = ['', '', '']
464eb8dc403SDave Cobbley
46519323693SBrad Bishop        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
466eb8dc403SDave Cobbley        s = dirver_regex.search(dirver)
467eb8dc403SDave Cobbley        if s:
468eb8dc403SDave Cobbley            version_dir[1] = s.group('ver')
469eb8dc403SDave Cobbley        else:
470eb8dc403SDave Cobbley            version_dir[1] = dirver
471eb8dc403SDave Cobbley
472eb8dc403SDave Cobbley        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
473eb8dc403SDave Cobbley                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
474eb8dc403SDave Cobbley        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
475eb8dc403SDave Cobbley
476eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
477eb8dc403SDave Cobbley        if not soup:
478eb8dc403SDave Cobbley            return version[1]
479eb8dc403SDave Cobbley
480eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
481eb8dc403SDave Cobbley            s = dirver_regex.search(line['href'].strip("/"))
482eb8dc403SDave Cobbley            if s:
483eb8dc403SDave Cobbley                sver = s.group('ver')
484eb8dc403SDave Cobbley
485eb8dc403SDave Cobbley                # When prefix is part of the version directory it need to
486eb8dc403SDave Cobbley                # ensure that only version directory is used so remove previous
487eb8dc403SDave Cobbley                # directories if exists.
488eb8dc403SDave Cobbley                #
489eb8dc403SDave Cobbley                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
490eb8dc403SDave Cobbley                # result is v2.5.
491eb8dc403SDave Cobbley                spfx = s.group('pfx').split('/')[-1]
492eb8dc403SDave Cobbley
493eb8dc403SDave Cobbley                version_dir_new = ['', sver, '']
494eb8dc403SDave Cobbley                if self._vercmp(version_dir, version_dir_new) <= 0:
495eb8dc403SDave Cobbley                    dirver_new = spfx + sver
496eb8dc403SDave Cobbley                    path = ud.path.replace(dirver, dirver_new, True) \
497eb8dc403SDave Cobbley                        .split(package)[0]
498eb8dc403SDave Cobbley                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
499eb8dc403SDave Cobbley                        ud.user, ud.pswd, {}])
500eb8dc403SDave Cobbley
501eb8dc403SDave Cobbley                    pupver = self._check_latest_version(uri,
502eb8dc403SDave Cobbley                            package, package_regex, current_version, ud, d)
503eb8dc403SDave Cobbley                    if pupver:
504eb8dc403SDave Cobbley                        version[1] = pupver
505eb8dc403SDave Cobbley
506eb8dc403SDave Cobbley                    version_dir = version_dir_new
507eb8dc403SDave Cobbley
508eb8dc403SDave Cobbley        return version[1]
509eb8dc403SDave Cobbley
510eb8dc403SDave Cobbley    def _init_regexes(self, package, ud, d):
511eb8dc403SDave Cobbley        """
512eb8dc403SDave Cobbley        Match as many patterns as possible such as:
513eb8dc403SDave Cobbley                gnome-common-2.20.0.tar.gz (most common format)
514eb8dc403SDave Cobbley                gtk+-2.90.1.tar.gz
515eb8dc403SDave Cobbley                xf86-input-synaptics-12.6.9.tar.gz
516eb8dc403SDave Cobbley                dri2proto-2.3.tar.gz
517eb8dc403SDave Cobbley                blktool_4.orig.tar.gz
518eb8dc403SDave Cobbley                libid3tag-0.15.1b.tar.gz
519eb8dc403SDave Cobbley                unzip552.tar.gz
520eb8dc403SDave Cobbley                icu4c-3_6-src.tgz
521eb8dc403SDave Cobbley                genext2fs_1.3.orig.tar.gz
522eb8dc403SDave Cobbley                gst-fluendo-mp3
523eb8dc403SDave Cobbley        """
524eb8dc403SDave Cobbley        # match most patterns which uses "-" as separator to version digits
52519323693SBrad Bishop        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
526eb8dc403SDave Cobbley        # a loose pattern such as for unzip552.tar.gz
52719323693SBrad Bishop        pn_prefix2 = r"[a-zA-Z]+"
528eb8dc403SDave Cobbley        # a loose pattern such as for 80325-quicky-0.4.tar.gz
52919323693SBrad Bishop        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
530eb8dc403SDave Cobbley        # Save the Package Name (pn) Regex for use later
53119323693SBrad Bishop        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
532eb8dc403SDave Cobbley
533eb8dc403SDave Cobbley        # match version
53419323693SBrad Bishop        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
535eb8dc403SDave Cobbley
536eb8dc403SDave Cobbley        # match arch
537eb8dc403SDave Cobbley        parch_regex = "-source|_all_"
538eb8dc403SDave Cobbley
539eb8dc403SDave Cobbley        # src.rpm extension was added only for rpm package. Can be removed if the rpm
540eb8dc403SDave Cobbley        # packaged will always be considered as having to be manually upgraded
54119323693SBrad Bishop        psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
542eb8dc403SDave Cobbley
543eb8dc403SDave Cobbley        # match name, version and archive type of a package
54419323693SBrad Bishop        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
545eb8dc403SDave Cobbley                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
546eb8dc403SDave Cobbley        self.suffix_regex_comp = re.compile(psuffix_regex)
547eb8dc403SDave Cobbley
548eb8dc403SDave Cobbley        # compile regex, can be specific by package or generic regex
549eb8dc403SDave Cobbley        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
550eb8dc403SDave Cobbley        if pn_regex:
551eb8dc403SDave Cobbley            package_custom_regex_comp = re.compile(pn_regex)
552eb8dc403SDave Cobbley        else:
553eb8dc403SDave Cobbley            version = self._parse_path(package_regex_comp, package)
554eb8dc403SDave Cobbley            if version:
555eb8dc403SDave Cobbley                package_custom_regex_comp = re.compile(
55619323693SBrad Bishop                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
557eb8dc403SDave Cobbley                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
558eb8dc403SDave Cobbley            else:
559eb8dc403SDave Cobbley                package_custom_regex_comp = None
560eb8dc403SDave Cobbley
561eb8dc403SDave Cobbley        return package_custom_regex_comp
562eb8dc403SDave Cobbley
563eb8dc403SDave Cobbley    def latest_versionstring(self, ud, d):
564eb8dc403SDave Cobbley        """
565eb8dc403SDave Cobbley        Manipulate the URL and try to obtain the latest package version
566eb8dc403SDave Cobbley
567eb8dc403SDave Cobbley        sanity check to ensure same name and type.
568eb8dc403SDave Cobbley        """
569eb8dc403SDave Cobbley        package = ud.path.split("/")[-1]
570eb8dc403SDave Cobbley        current_version = ['', d.getVar('PV'), '']
571eb8dc403SDave Cobbley
572eb8dc403SDave Cobbley        """possible to have no version in pkg name, such as spectrum-fw"""
57319323693SBrad Bishop        if not re.search(r"\d+", package):
574eb8dc403SDave Cobbley            current_version[1] = re.sub('_', '.', current_version[1])
575eb8dc403SDave Cobbley            current_version[1] = re.sub('-', '.', current_version[1])
576eb8dc403SDave Cobbley            return (current_version[1], '')
577eb8dc403SDave Cobbley
578eb8dc403SDave Cobbley        package_regex = self._init_regexes(package, ud, d)
579eb8dc403SDave Cobbley        if package_regex is None:
580eb8dc403SDave Cobbley            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
581eb8dc403SDave Cobbley            return ('', '')
582eb8dc403SDave Cobbley        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
583eb8dc403SDave Cobbley
584eb8dc403SDave Cobbley        uri = ""
585eb8dc403SDave Cobbley        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
586eb8dc403SDave Cobbley        if not regex_uri:
587eb8dc403SDave Cobbley            path = ud.path.split(package)[0]
588eb8dc403SDave Cobbley
589eb8dc403SDave Cobbley            # search for version matches on folders inside the path, like:
590eb8dc403SDave Cobbley            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
59119323693SBrad Bishop            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
592eb8dc403SDave Cobbley            m = dirver_regex.search(path)
593eb8dc403SDave Cobbley            if m:
594eb8dc403SDave Cobbley                pn = d.getVar('PN')
595eb8dc403SDave Cobbley                dirver = m.group('dirver')
596eb8dc403SDave Cobbley
59719323693SBrad Bishop                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
598eb8dc403SDave Cobbley                if not dirver_pn_regex.search(dirver):
599eb8dc403SDave Cobbley                    return (self._check_latest_version_by_dir(dirver,
600eb8dc403SDave Cobbley                        package, package_regex, current_version, ud, d), '')
601eb8dc403SDave Cobbley
602eb8dc403SDave Cobbley            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
603eb8dc403SDave Cobbley        else:
604eb8dc403SDave Cobbley            uri = regex_uri
605eb8dc403SDave Cobbley
606eb8dc403SDave Cobbley        return (self._check_latest_version(uri, package, package_regex,
607eb8dc403SDave Cobbley                current_version, ud, d), '')
608