xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision 19323693)
1eb8dc403SDave Cobbley# ex:ts=4:sw=4:sts=4:et
2eb8dc403SDave Cobbley# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3eb8dc403SDave Cobbley"""
4eb8dc403SDave CobbleyBitBake 'Fetch' implementations
5eb8dc403SDave Cobbley
6eb8dc403SDave CobbleyClasses for obtaining upstream sources for the
7eb8dc403SDave CobbleyBitBake build tools.
8eb8dc403SDave Cobbley
9eb8dc403SDave Cobbley"""
10eb8dc403SDave Cobbley
11eb8dc403SDave Cobbley# Copyright (C) 2003, 2004  Chris Larson
12eb8dc403SDave Cobbley#
13eb8dc403SDave Cobbley# This program is free software; you can redistribute it and/or modify
14eb8dc403SDave Cobbley# it under the terms of the GNU General Public License version 2 as
15eb8dc403SDave Cobbley# published by the Free Software Foundation.
16eb8dc403SDave Cobbley#
17eb8dc403SDave Cobbley# This program is distributed in the hope that it will be useful,
18eb8dc403SDave Cobbley# but WITHOUT ANY WARRANTY; without even the implied warranty of
19eb8dc403SDave Cobbley# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20eb8dc403SDave Cobbley# GNU General Public License for more details.
21eb8dc403SDave Cobbley#
22eb8dc403SDave Cobbley# You should have received a copy of the GNU General Public License along
23eb8dc403SDave Cobbley# with this program; if not, write to the Free Software Foundation, Inc.,
24eb8dc403SDave Cobbley# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25eb8dc403SDave Cobbley#
26eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27eb8dc403SDave Cobbley
28eb8dc403SDave Cobbleyimport re
29eb8dc403SDave Cobbleyimport tempfile
30eb8dc403SDave Cobbleyimport subprocess
31eb8dc403SDave Cobbleyimport os
32eb8dc403SDave Cobbleyimport logging
33eb8dc403SDave Cobbleyimport errno
34eb8dc403SDave Cobbleyimport bb
35eb8dc403SDave Cobbleyimport bb.progress
36*19323693SBrad Bishopimport socket
37*19323693SBrad Bishopimport http.client
38eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error
39eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchMethod
40eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchError
41eb8dc403SDave Cobbleyfrom   bb.fetch2 import logger
42eb8dc403SDave Cobbleyfrom   bb.fetch2 import runfetchcmd
43*19323693SBrad Bishopfrom   bb.fetch2 import FetchConnectionCache
44eb8dc403SDave Cobbleyfrom   bb.utils import export_proxies
45eb8dc403SDave Cobbleyfrom   bs4 import BeautifulSoup
46eb8dc403SDave Cobbleyfrom   bs4 import SoupStrainer
47eb8dc403SDave Cobbley
48eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
49eb8dc403SDave Cobbley    """
50eb8dc403SDave Cobbley    Extract progress information from wget output.
51eb8dc403SDave Cobbley    Note: relies on --progress=dot (with -v or without -q/-nv) being
52eb8dc403SDave Cobbley    specified on the wget command line.
53eb8dc403SDave Cobbley    """
54eb8dc403SDave Cobbley    def __init__(self, d):
55eb8dc403SDave Cobbley        super(WgetProgressHandler, self).__init__(d)
56eb8dc403SDave Cobbley        # Send an initial progress event so the bar gets shown
57eb8dc403SDave Cobbley        self._fire_progress(0)
58eb8dc403SDave Cobbley
59eb8dc403SDave Cobbley    def writeline(self, line):
60eb8dc403SDave Cobbley        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
61eb8dc403SDave Cobbley        if percs:
62eb8dc403SDave Cobbley            progress = int(percs[-1][0])
63eb8dc403SDave Cobbley            rate = percs[-1][1] + '/s'
64eb8dc403SDave Cobbley            self.update(progress, rate)
65eb8dc403SDave Cobbley            return False
66eb8dc403SDave Cobbley        return True
67eb8dc403SDave Cobbley
68eb8dc403SDave Cobbley
69eb8dc403SDave Cobbleyclass Wget(FetchMethod):
70eb8dc403SDave Cobbley    """Class to fetch urls via 'wget'"""
71eb8dc403SDave Cobbley    def supports(self, ud, d):
72eb8dc403SDave Cobbley        """
73eb8dc403SDave Cobbley        Check to see if a given url can be fetched with wget.
74eb8dc403SDave Cobbley        """
75eb8dc403SDave Cobbley        return ud.type in ['http', 'https', 'ftp']
76eb8dc403SDave Cobbley
77eb8dc403SDave Cobbley    def recommends_checksum(self, urldata):
78eb8dc403SDave Cobbley        return True
79eb8dc403SDave Cobbley
80eb8dc403SDave Cobbley    def urldata_init(self, ud, d):
81eb8dc403SDave Cobbley        if 'protocol' in ud.parm:
82eb8dc403SDave Cobbley            if ud.parm['protocol'] == 'git':
83eb8dc403SDave Cobbley                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
84eb8dc403SDave Cobbley
85eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
86eb8dc403SDave Cobbley            ud.basename = ud.parm['downloadfilename']
87eb8dc403SDave Cobbley        else:
88eb8dc403SDave Cobbley            ud.basename = os.path.basename(ud.path)
89eb8dc403SDave Cobbley
90eb8dc403SDave Cobbley        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
91eb8dc403SDave Cobbley        if not ud.localfile:
92eb8dc403SDave Cobbley            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
93eb8dc403SDave Cobbley
94eb8dc403SDave Cobbley        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
95eb8dc403SDave Cobbley
96eb8dc403SDave Cobbley    def _runwget(self, ud, d, command, quiet, workdir=None):
97eb8dc403SDave Cobbley
98eb8dc403SDave Cobbley        progresshandler = WgetProgressHandler(d)
99eb8dc403SDave Cobbley
100eb8dc403SDave Cobbley        logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
101eb8dc403SDave Cobbley        bb.fetch2.check_network_access(d, command, ud.url)
102eb8dc403SDave Cobbley        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
103eb8dc403SDave Cobbley
104eb8dc403SDave Cobbley    def download(self, ud, d):
105eb8dc403SDave Cobbley        """Fetch urls"""
106eb8dc403SDave Cobbley
107eb8dc403SDave Cobbley        fetchcmd = self.basecmd
108eb8dc403SDave Cobbley
109eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
110eb8dc403SDave Cobbley            dldir = d.getVar("DL_DIR")
111eb8dc403SDave Cobbley            bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
112eb8dc403SDave Cobbley            fetchcmd += " -O " + dldir + os.sep + ud.localfile
113eb8dc403SDave Cobbley
114eb8dc403SDave Cobbley        if ud.user and ud.pswd:
115eb8dc403SDave Cobbley            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
116eb8dc403SDave Cobbley
117eb8dc403SDave Cobbley        uri = ud.url.split(";")[0]
118eb8dc403SDave Cobbley        if os.path.exists(ud.localpath):
119eb8dc403SDave Cobbley            # file exists, but we didnt complete it.. trying again..
120eb8dc403SDave Cobbley            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
121eb8dc403SDave Cobbley        else:
122eb8dc403SDave Cobbley            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
123eb8dc403SDave Cobbley
124eb8dc403SDave Cobbley        self._runwget(ud, d, fetchcmd, False)
125eb8dc403SDave Cobbley
126eb8dc403SDave Cobbley        # Sanity check since wget can pretend it succeed when it didn't
127eb8dc403SDave Cobbley        # Also, this used to happen if sourceforge sent us to the mirror page
128eb8dc403SDave Cobbley        if not os.path.exists(ud.localpath):
129eb8dc403SDave Cobbley            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
130eb8dc403SDave Cobbley
131eb8dc403SDave Cobbley        if os.path.getsize(ud.localpath) == 0:
132eb8dc403SDave Cobbley            os.remove(ud.localpath)
133eb8dc403SDave Cobbley            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
134eb8dc403SDave Cobbley
135eb8dc403SDave Cobbley        return True
136eb8dc403SDave Cobbley
137eb8dc403SDave Cobbley    def checkstatus(self, fetch, ud, d, try_again=True):
138eb8dc403SDave Cobbley        class HTTPConnectionCache(http.client.HTTPConnection):
139eb8dc403SDave Cobbley            if fetch.connection_cache:
140eb8dc403SDave Cobbley                def connect(self):
141eb8dc403SDave Cobbley                    """Connect to the host and port specified in __init__."""
142eb8dc403SDave Cobbley
143eb8dc403SDave Cobbley                    sock = fetch.connection_cache.get_connection(self.host, self.port)
144eb8dc403SDave Cobbley                    if sock:
145eb8dc403SDave Cobbley                        self.sock = sock
146eb8dc403SDave Cobbley                    else:
147eb8dc403SDave Cobbley                        self.sock = socket.create_connection((self.host, self.port),
148eb8dc403SDave Cobbley                                    self.timeout, self.source_address)
149eb8dc403SDave Cobbley                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
150eb8dc403SDave Cobbley
151eb8dc403SDave Cobbley                    if self._tunnel_host:
152eb8dc403SDave Cobbley                        self._tunnel()
153eb8dc403SDave Cobbley
154eb8dc403SDave Cobbley        class CacheHTTPHandler(urllib.request.HTTPHandler):
155eb8dc403SDave Cobbley            def http_open(self, req):
156eb8dc403SDave Cobbley                return self.do_open(HTTPConnectionCache, req)
157eb8dc403SDave Cobbley
158eb8dc403SDave Cobbley            def do_open(self, http_class, req):
159eb8dc403SDave Cobbley                """Return an addinfourl object for the request, using http_class.
160eb8dc403SDave Cobbley
161eb8dc403SDave Cobbley                http_class must implement the HTTPConnection API from httplib.
162eb8dc403SDave Cobbley                The addinfourl return value is a file-like object.  It also
163eb8dc403SDave Cobbley                has methods and attributes including:
164eb8dc403SDave Cobbley                    - info(): return a mimetools.Message object for the headers
165eb8dc403SDave Cobbley                    - geturl(): return the original request URL
166eb8dc403SDave Cobbley                    - code: HTTP status code
167eb8dc403SDave Cobbley                """
168eb8dc403SDave Cobbley                host = req.host
169eb8dc403SDave Cobbley                if not host:
170*19323693SBrad Bishop                    raise urllib.error.URLError('no host given')
171eb8dc403SDave Cobbley
172eb8dc403SDave Cobbley                h = http_class(host, timeout=req.timeout) # will parse host:port
173eb8dc403SDave Cobbley                h.set_debuglevel(self._debuglevel)
174eb8dc403SDave Cobbley
175eb8dc403SDave Cobbley                headers = dict(req.unredirected_hdrs)
176eb8dc403SDave Cobbley                headers.update(dict((k, v) for k, v in list(req.headers.items())
177eb8dc403SDave Cobbley                            if k not in headers))
178eb8dc403SDave Cobbley
179eb8dc403SDave Cobbley                # We want to make an HTTP/1.1 request, but the addinfourl
180eb8dc403SDave Cobbley                # class isn't prepared to deal with a persistent connection.
181eb8dc403SDave Cobbley                # It will try to read all remaining data from the socket,
182eb8dc403SDave Cobbley                # which will block while the server waits for the next request.
183eb8dc403SDave Cobbley                # So make sure the connection gets closed after the (only)
184eb8dc403SDave Cobbley                # request.
185eb8dc403SDave Cobbley
186eb8dc403SDave Cobbley                # Don't close connection when connection_cache is enabled,
187eb8dc403SDave Cobbley                if fetch.connection_cache is None:
188eb8dc403SDave Cobbley                    headers["Connection"] = "close"
189eb8dc403SDave Cobbley                else:
190eb8dc403SDave Cobbley                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
191eb8dc403SDave Cobbley
192eb8dc403SDave Cobbley                headers = dict(
193eb8dc403SDave Cobbley                    (name.title(), val) for name, val in list(headers.items()))
194eb8dc403SDave Cobbley
195eb8dc403SDave Cobbley                if req._tunnel_host:
196eb8dc403SDave Cobbley                    tunnel_headers = {}
197eb8dc403SDave Cobbley                    proxy_auth_hdr = "Proxy-Authorization"
198eb8dc403SDave Cobbley                    if proxy_auth_hdr in headers:
199eb8dc403SDave Cobbley                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
200eb8dc403SDave Cobbley                        # Proxy-Authorization should not be sent to origin
201eb8dc403SDave Cobbley                        # server.
202eb8dc403SDave Cobbley                        del headers[proxy_auth_hdr]
203eb8dc403SDave Cobbley                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
204eb8dc403SDave Cobbley
205eb8dc403SDave Cobbley                try:
206eb8dc403SDave Cobbley                    h.request(req.get_method(), req.selector, req.data, headers)
207eb8dc403SDave Cobbley                except socket.error as err: # XXX what error?
208eb8dc403SDave Cobbley                    # Don't close connection when cache is enabled.
209eb8dc403SDave Cobbley                    # Instead, try to detect connections that are no longer
210eb8dc403SDave Cobbley                    # usable (for example, closed unexpectedly) and remove
211eb8dc403SDave Cobbley                    # them from the cache.
212eb8dc403SDave Cobbley                    if fetch.connection_cache is None:
213eb8dc403SDave Cobbley                        h.close()
214eb8dc403SDave Cobbley                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
215eb8dc403SDave Cobbley                        # This happens when the server closes the connection despite the Keep-Alive.
216eb8dc403SDave Cobbley                        # Apparently urllib then uses the file descriptor, expecting it to be
217eb8dc403SDave Cobbley                        # connected, when in reality the connection is already gone.
218eb8dc403SDave Cobbley                        # We let the request fail and expect it to be
219eb8dc403SDave Cobbley                        # tried once more ("try_again" in check_status()),
220eb8dc403SDave Cobbley                        # with the dead connection removed from the cache.
221eb8dc403SDave Cobbley                        # If it still fails, we give up, which can happend for bad
222eb8dc403SDave Cobbley                        # HTTP proxy settings.
223eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
224eb8dc403SDave Cobbley                    raise urllib.error.URLError(err)
225eb8dc403SDave Cobbley                else:
226eb8dc403SDave Cobbley                    try:
227eb8dc403SDave Cobbley                        r = h.getresponse(buffering=True)
228eb8dc403SDave Cobbley                    except TypeError: # buffering kw not supported
229eb8dc403SDave Cobbley                        r = h.getresponse()
230eb8dc403SDave Cobbley
231eb8dc403SDave Cobbley                # Pick apart the HTTPResponse object to get the addinfourl
232eb8dc403SDave Cobbley                # object initialized properly.
233eb8dc403SDave Cobbley
234eb8dc403SDave Cobbley                # Wrap the HTTPResponse object in socket's file object adapter
235eb8dc403SDave Cobbley                # for Windows.  That adapter calls recv(), so delegate recv()
236eb8dc403SDave Cobbley                # to read().  This weird wrapping allows the returned object to
237eb8dc403SDave Cobbley                # have readline() and readlines() methods.
238eb8dc403SDave Cobbley
239eb8dc403SDave Cobbley                # XXX It might be better to extract the read buffering code
240eb8dc403SDave Cobbley                # out of socket._fileobject() and into a base class.
241eb8dc403SDave Cobbley                r.recv = r.read
242eb8dc403SDave Cobbley
243eb8dc403SDave Cobbley                # no data, just have to read
244eb8dc403SDave Cobbley                r.read()
245eb8dc403SDave Cobbley                class fp_dummy(object):
246eb8dc403SDave Cobbley                    def read(self):
247eb8dc403SDave Cobbley                        return ""
248eb8dc403SDave Cobbley                    def readline(self):
249eb8dc403SDave Cobbley                        return ""
250eb8dc403SDave Cobbley                    def close(self):
251eb8dc403SDave Cobbley                        pass
252eb8dc403SDave Cobbley                    closed = False
253eb8dc403SDave Cobbley
254*19323693SBrad Bishop                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
255eb8dc403SDave Cobbley                resp.code = r.status
256eb8dc403SDave Cobbley                resp.msg = r.reason
257eb8dc403SDave Cobbley
258eb8dc403SDave Cobbley                # Close connection when server request it.
259eb8dc403SDave Cobbley                if fetch.connection_cache is not None:
260eb8dc403SDave Cobbley                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
261eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
262eb8dc403SDave Cobbley
263eb8dc403SDave Cobbley                return resp
264eb8dc403SDave Cobbley
265eb8dc403SDave Cobbley        class HTTPMethodFallback(urllib.request.BaseHandler):
266eb8dc403SDave Cobbley            """
267eb8dc403SDave Cobbley            Fallback to GET if HEAD is not allowed (405 HTTP error)
268eb8dc403SDave Cobbley            """
269eb8dc403SDave Cobbley            def http_error_405(self, req, fp, code, msg, headers):
270eb8dc403SDave Cobbley                fp.read()
271eb8dc403SDave Cobbley                fp.close()
272eb8dc403SDave Cobbley
273eb8dc403SDave Cobbley                newheaders = dict((k, v) for k, v in list(req.headers.items())
274eb8dc403SDave Cobbley                                  if k.lower() not in ("content-length", "content-type"))
275eb8dc403SDave Cobbley                return self.parent.open(urllib.request.Request(req.get_full_url(),
276eb8dc403SDave Cobbley                                                        headers=newheaders,
277eb8dc403SDave Cobbley                                                        origin_req_host=req.origin_req_host,
278eb8dc403SDave Cobbley                                                        unverifiable=True))
279eb8dc403SDave Cobbley
280*19323693SBrad Bishop
281*19323693SBrad Bishop            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
282*19323693SBrad Bishop            # Forbidden when they actually mean 405 Method Not Allowed.
283eb8dc403SDave Cobbley            http_error_403 = http_error_405
284eb8dc403SDave Cobbley
285eb8dc403SDave Cobbley
286eb8dc403SDave Cobbley        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
287eb8dc403SDave Cobbley            """
288eb8dc403SDave Cobbley            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
289eb8dc403SDave Cobbley            when we want to follow redirects using the original method.
290eb8dc403SDave Cobbley            """
291eb8dc403SDave Cobbley            def redirect_request(self, req, fp, code, msg, headers, newurl):
292eb8dc403SDave Cobbley                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
293*19323693SBrad Bishop                newreq.get_method = req.get_method
294eb8dc403SDave Cobbley                return newreq
295eb8dc403SDave Cobbley        exported_proxies = export_proxies(d)
296eb8dc403SDave Cobbley
297eb8dc403SDave Cobbley        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
298*19323693SBrad Bishop        if exported_proxies:
299eb8dc403SDave Cobbley            handlers.append(urllib.request.ProxyHandler())
300eb8dc403SDave Cobbley        handlers.append(CacheHTTPHandler())
301*19323693SBrad Bishop        # Since Python 2.7.9 ssl cert validation is enabled by default
302eb8dc403SDave Cobbley        # see PEP-0476, this causes verification errors on some https servers
303eb8dc403SDave Cobbley        # so disable by default.
304eb8dc403SDave Cobbley        import ssl
305eb8dc403SDave Cobbley        if hasattr(ssl, '_create_unverified_context'):
306eb8dc403SDave Cobbley            handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
307eb8dc403SDave Cobbley        opener = urllib.request.build_opener(*handlers)
308eb8dc403SDave Cobbley
309eb8dc403SDave Cobbley        try:
310eb8dc403SDave Cobbley            uri = ud.url.split(";")[0]
311eb8dc403SDave Cobbley            r = urllib.request.Request(uri)
312eb8dc403SDave Cobbley            r.get_method = lambda: "HEAD"
313eb8dc403SDave Cobbley            # Some servers (FusionForge, as used on Alioth) require that the
314eb8dc403SDave Cobbley            # optional Accept header is set.
315eb8dc403SDave Cobbley            r.add_header("Accept", "*/*")
316eb8dc403SDave Cobbley            def add_basic_auth(login_str, request):
317eb8dc403SDave Cobbley                '''Adds Basic auth to http request, pass in login:password as string'''
318eb8dc403SDave Cobbley                import base64
319eb8dc403SDave Cobbley                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
320eb8dc403SDave Cobbley                authheader = "Basic %s" % encodeuser
321eb8dc403SDave Cobbley                r.add_header("Authorization", authheader)
322eb8dc403SDave Cobbley
323*19323693SBrad Bishop            if ud.user and ud.pswd:
324*19323693SBrad Bishop                add_basic_auth(ud.user + ':' + ud.pswd, r)
325eb8dc403SDave Cobbley
326eb8dc403SDave Cobbley            try:
327*19323693SBrad Bishop                import netrc
328eb8dc403SDave Cobbley                n = netrc.netrc()
329eb8dc403SDave Cobbley                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
330eb8dc403SDave Cobbley                add_basic_auth("%s:%s" % (login, password), r)
331eb8dc403SDave Cobbley            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
332eb8dc403SDave Cobbley                pass
333eb8dc403SDave Cobbley
334eb8dc403SDave Cobbley            with opener.open(r) as response:
335eb8dc403SDave Cobbley                pass
336eb8dc403SDave Cobbley        except urllib.error.URLError as e:
337eb8dc403SDave Cobbley            if try_again:
338eb8dc403SDave Cobbley                logger.debug(2, "checkstatus: trying again")
339eb8dc403SDave Cobbley                return self.checkstatus(fetch, ud, d, False)
340eb8dc403SDave Cobbley            else:
341eb8dc403SDave Cobbley                # debug for now to avoid spamming the logs in e.g. remote sstate searches
342eb8dc403SDave Cobbley                logger.debug(2, "checkstatus() urlopen failed: %s" % e)
343eb8dc403SDave Cobbley                return False
344eb8dc403SDave Cobbley        return True
345eb8dc403SDave Cobbley
346eb8dc403SDave Cobbley    def _parse_path(self, regex, s):
347eb8dc403SDave Cobbley        """
348eb8dc403SDave Cobbley        Find and group name, version and archive type in the given string s
349eb8dc403SDave Cobbley        """
350eb8dc403SDave Cobbley
351eb8dc403SDave Cobbley        m = regex.search(s)
352eb8dc403SDave Cobbley        if m:
353eb8dc403SDave Cobbley            pname = ''
354eb8dc403SDave Cobbley            pver = ''
355eb8dc403SDave Cobbley            ptype = ''
356eb8dc403SDave Cobbley
357eb8dc403SDave Cobbley            mdict = m.groupdict()
358eb8dc403SDave Cobbley            if 'name' in mdict.keys():
359eb8dc403SDave Cobbley                pname = mdict['name']
360eb8dc403SDave Cobbley            if 'pver' in mdict.keys():
361eb8dc403SDave Cobbley                pver = mdict['pver']
362eb8dc403SDave Cobbley            if 'type' in mdict.keys():
363eb8dc403SDave Cobbley                ptype = mdict['type']
364eb8dc403SDave Cobbley
365eb8dc403SDave Cobbley            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
366eb8dc403SDave Cobbley
367eb8dc403SDave Cobbley            return (pname, pver, ptype)
368eb8dc403SDave Cobbley
369eb8dc403SDave Cobbley        return None
370eb8dc403SDave Cobbley
371eb8dc403SDave Cobbley    def _modelate_version(self, version):
372eb8dc403SDave Cobbley        if version[0] in ['.', '-']:
373eb8dc403SDave Cobbley            if version[1].isdigit():
374eb8dc403SDave Cobbley                version = version[1] + version[0] + version[2:len(version)]
375eb8dc403SDave Cobbley            else:
376eb8dc403SDave Cobbley                version = version[1:len(version)]
377eb8dc403SDave Cobbley
378eb8dc403SDave Cobbley        version = re.sub('-', '.', version)
379eb8dc403SDave Cobbley        version = re.sub('_', '.', version)
380eb8dc403SDave Cobbley        version = re.sub('(rc)+', '.1000.', version)
381eb8dc403SDave Cobbley        version = re.sub('(beta)+', '.100.', version)
382eb8dc403SDave Cobbley        version = re.sub('(alpha)+', '.10.', version)
383eb8dc403SDave Cobbley        if version[0] == 'v':
384eb8dc403SDave Cobbley            version = version[1:len(version)]
385eb8dc403SDave Cobbley        return version
386eb8dc403SDave Cobbley
387eb8dc403SDave Cobbley    def _vercmp(self, old, new):
388eb8dc403SDave Cobbley        """
389eb8dc403SDave Cobbley        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
390eb8dc403SDave Cobbley        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
391eb8dc403SDave Cobbley        for simplicity as it's somehow difficult to get from various upstream format
392eb8dc403SDave Cobbley        """
393eb8dc403SDave Cobbley
394eb8dc403SDave Cobbley        (oldpn, oldpv, oldsuffix) = old
395eb8dc403SDave Cobbley        (newpn, newpv, newsuffix) = new
396eb8dc403SDave Cobbley
397*19323693SBrad Bishop        # Check for a new suffix type that we have never heard of before
398*19323693SBrad Bishop        if newsuffix:
399eb8dc403SDave Cobbley            m = self.suffix_regex_comp.search(newsuffix)
400eb8dc403SDave Cobbley            if not m:
401eb8dc403SDave Cobbley                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
402eb8dc403SDave Cobbley                return False
403eb8dc403SDave Cobbley
404*19323693SBrad Bishop        # Not our package so ignore it
405eb8dc403SDave Cobbley        if oldpn != newpn:
406eb8dc403SDave Cobbley            return False
407eb8dc403SDave Cobbley
408eb8dc403SDave Cobbley        oldpv = self._modelate_version(oldpv)
409eb8dc403SDave Cobbley        newpv = self._modelate_version(newpv)
410eb8dc403SDave Cobbley
411eb8dc403SDave Cobbley        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
412eb8dc403SDave Cobbley
413eb8dc403SDave Cobbley    def _fetch_index(self, uri, ud, d):
414eb8dc403SDave Cobbley        """
415eb8dc403SDave Cobbley        Run fetch checkstatus to get directory information
416eb8dc403SDave Cobbley        """
417eb8dc403SDave Cobbley        f = tempfile.NamedTemporaryFile()
418eb8dc403SDave Cobbley        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
419eb8dc403SDave Cobbley            agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
420eb8dc403SDave Cobbley            fetchcmd = self.basecmd
421eb8dc403SDave Cobbley            fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
422eb8dc403SDave Cobbley            try:
423eb8dc403SDave Cobbley                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
424eb8dc403SDave Cobbley                fetchresult = f.read()
425eb8dc403SDave Cobbley            except bb.fetch2.BBFetchException:
426eb8dc403SDave Cobbley                fetchresult = ""
427eb8dc403SDave Cobbley
428eb8dc403SDave Cobbley        return fetchresult
429eb8dc403SDave Cobbley
430eb8dc403SDave Cobbley    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
431eb8dc403SDave Cobbley        """
432eb8dc403SDave Cobbley        Return the latest version of a package inside a given directory path
433eb8dc403SDave Cobbley        If error or no version, return ""
434eb8dc403SDave Cobbley        """
435eb8dc403SDave Cobbley        valid = 0
436eb8dc403SDave Cobbley        version = ['', '', '']
437eb8dc403SDave Cobbley
438eb8dc403SDave Cobbley        bb.debug(3, "VersionURL: %s" % (url))
439eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
440eb8dc403SDave Cobbley        if not soup:
441eb8dc403SDave Cobbley            bb.debug(3, "*** %s NO SOUP" % (url))
442eb8dc403SDave Cobbley            return ""
443eb8dc403SDave Cobbley
444eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
445eb8dc403SDave Cobbley            bb.debug(3, "line['href'] = '%s'" % (line['href']))
446eb8dc403SDave Cobbley            bb.debug(3, "line = '%s'" % (str(line)))
447eb8dc403SDave Cobbley
448eb8dc403SDave Cobbley            newver = self._parse_path(package_regex, line['href'])
449eb8dc403SDave Cobbley            if not newver:
450eb8dc403SDave Cobbley                newver = self._parse_path(package_regex, str(line))
451eb8dc403SDave Cobbley
452eb8dc403SDave Cobbley            if newver:
453eb8dc403SDave Cobbley                bb.debug(3, "Upstream version found: %s" % newver[1])
454eb8dc403SDave Cobbley                if valid == 0:
455eb8dc403SDave Cobbley                    version = newver
456eb8dc403SDave Cobbley                    valid = 1
457eb8dc403SDave Cobbley                elif self._vercmp(version, newver) < 0:
458eb8dc403SDave Cobbley                    version = newver
459eb8dc403SDave Cobbley
460eb8dc403SDave Cobbley        pupver = re.sub('_', '.', version[1])
461eb8dc403SDave Cobbley
462eb8dc403SDave Cobbley        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
463eb8dc403SDave Cobbley                (package, pupver or "N/A", current_version[1]))
464eb8dc403SDave Cobbley
465eb8dc403SDave Cobbley        if valid:
466eb8dc403SDave Cobbley            return pupver
467eb8dc403SDave Cobbley
468eb8dc403SDave Cobbley        return ""
469eb8dc403SDave Cobbley
470*19323693SBrad Bishop    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
471eb8dc403SDave Cobbley        """
472eb8dc403SDave Cobbley        Scan every directory in order to get upstream version.
473eb8dc403SDave Cobbley        """
474eb8dc403SDave Cobbley        version_dir = ['', '', '']
475eb8dc403SDave Cobbley        version = ['', '', '']
476eb8dc403SDave Cobbley
477*19323693SBrad Bishop        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
478eb8dc403SDave Cobbley        s = dirver_regex.search(dirver)
479eb8dc403SDave Cobbley        if s:
480eb8dc403SDave Cobbley            version_dir[1] = s.group('ver')
481eb8dc403SDave Cobbley        else:
482eb8dc403SDave Cobbley            version_dir[1] = dirver
483eb8dc403SDave Cobbley
484eb8dc403SDave Cobbley        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
485eb8dc403SDave Cobbley                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
486eb8dc403SDave Cobbley        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
487eb8dc403SDave Cobbley
488eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
489eb8dc403SDave Cobbley        if not soup:
490eb8dc403SDave Cobbley            return version[1]
491eb8dc403SDave Cobbley
492eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
493eb8dc403SDave Cobbley            s = dirver_regex.search(line['href'].strip("/"))
494eb8dc403SDave Cobbley            if s:
495eb8dc403SDave Cobbley                sver = s.group('ver')
496eb8dc403SDave Cobbley
497eb8dc403SDave Cobbley                # When prefix is part of the version directory it need to
498eb8dc403SDave Cobbley                # ensure that only version directory is used so remove previous
499eb8dc403SDave Cobbley                # directories if exists.
500eb8dc403SDave Cobbley                #
501eb8dc403SDave Cobbley                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
502eb8dc403SDave Cobbley                # result is v2.5.
503eb8dc403SDave Cobbley                spfx = s.group('pfx').split('/')[-1]
504eb8dc403SDave Cobbley
505eb8dc403SDave Cobbley                version_dir_new = ['', sver, '']
506eb8dc403SDave Cobbley                if self._vercmp(version_dir, version_dir_new) <= 0:
507eb8dc403SDave Cobbley                    dirver_new = spfx + sver
508eb8dc403SDave Cobbley                    path = ud.path.replace(dirver, dirver_new, True) \
509eb8dc403SDave Cobbley                        .split(package)[0]
510eb8dc403SDave Cobbley                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
511eb8dc403SDave Cobbley                        ud.user, ud.pswd, {}])
512eb8dc403SDave Cobbley
513eb8dc403SDave Cobbley                    pupver = self._check_latest_version(uri,
514eb8dc403SDave Cobbley                            package, package_regex, current_version, ud, d)
515eb8dc403SDave Cobbley                    if pupver:
516eb8dc403SDave Cobbley                        version[1] = pupver
517eb8dc403SDave Cobbley
518eb8dc403SDave Cobbley                    version_dir = version_dir_new
519eb8dc403SDave Cobbley
520eb8dc403SDave Cobbley        return version[1]
521eb8dc403SDave Cobbley
522eb8dc403SDave Cobbley    def _init_regexes(self, package, ud, d):
523eb8dc403SDave Cobbley        """
524eb8dc403SDave Cobbley        Match as many patterns as possible such as:
525eb8dc403SDave Cobbley                gnome-common-2.20.0.tar.gz (most common format)
526eb8dc403SDave Cobbley                gtk+-2.90.1.tar.gz
527eb8dc403SDave Cobbley                xf86-input-synaptics-12.6.9.tar.gz
528eb8dc403SDave Cobbley                dri2proto-2.3.tar.gz
529eb8dc403SDave Cobbley                blktool_4.orig.tar.gz
530eb8dc403SDave Cobbley                libid3tag-0.15.1b.tar.gz
531eb8dc403SDave Cobbley                unzip552.tar.gz
532eb8dc403SDave Cobbley                icu4c-3_6-src.tgz
533eb8dc403SDave Cobbley                genext2fs_1.3.orig.tar.gz
534eb8dc403SDave Cobbley                gst-fluendo-mp3
535eb8dc403SDave Cobbley        """
536eb8dc403SDave Cobbley        # match most patterns which uses "-" as separator to version digits
537*19323693SBrad Bishop        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
538eb8dc403SDave Cobbley        # a loose pattern such as for unzip552.tar.gz
539*19323693SBrad Bishop        pn_prefix2 = r"[a-zA-Z]+"
540eb8dc403SDave Cobbley        # a loose pattern such as for 80325-quicky-0.4.tar.gz
541*19323693SBrad Bishop        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
542eb8dc403SDave Cobbley        # Save the Package Name (pn) Regex for use later
543*19323693SBrad Bishop        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
544eb8dc403SDave Cobbley
545eb8dc403SDave Cobbley        # match version
546*19323693SBrad Bishop        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
547eb8dc403SDave Cobbley
548eb8dc403SDave Cobbley        # match arch
549eb8dc403SDave Cobbley        parch_regex = "-source|_all_"
550eb8dc403SDave Cobbley
551eb8dc403SDave Cobbley        # src.rpm extension was added only for rpm package. Can be removed if the rpm
552eb8dc403SDave Cobbley        # packaged will always be considered as having to be manually upgraded
553*19323693SBrad Bishop        psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
554eb8dc403SDave Cobbley
555eb8dc403SDave Cobbley        # match name, version and archive type of a package
556*19323693SBrad Bishop        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
557eb8dc403SDave Cobbley                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
558eb8dc403SDave Cobbley        self.suffix_regex_comp = re.compile(psuffix_regex)
559eb8dc403SDave Cobbley
560eb8dc403SDave Cobbley        # compile regex, can be specific by package or generic regex
561eb8dc403SDave Cobbley        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
562eb8dc403SDave Cobbley        if pn_regex:
563eb8dc403SDave Cobbley            package_custom_regex_comp = re.compile(pn_regex)
564eb8dc403SDave Cobbley        else:
565eb8dc403SDave Cobbley            version = self._parse_path(package_regex_comp, package)
566eb8dc403SDave Cobbley            if version:
567eb8dc403SDave Cobbley                package_custom_regex_comp = re.compile(
568*19323693SBrad Bishop                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
569eb8dc403SDave Cobbley                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
570eb8dc403SDave Cobbley            else:
571eb8dc403SDave Cobbley                package_custom_regex_comp = None
572eb8dc403SDave Cobbley
573eb8dc403SDave Cobbley        return package_custom_regex_comp
574eb8dc403SDave Cobbley
575eb8dc403SDave Cobbley    def latest_versionstring(self, ud, d):
576eb8dc403SDave Cobbley        """
577eb8dc403SDave Cobbley        Manipulate the URL and try to obtain the latest package version
578eb8dc403SDave Cobbley
579eb8dc403SDave Cobbley        sanity check to ensure same name and type.
580eb8dc403SDave Cobbley        """
581eb8dc403SDave Cobbley        package = ud.path.split("/")[-1]
582eb8dc403SDave Cobbley        current_version = ['', d.getVar('PV'), '']
583eb8dc403SDave Cobbley
584eb8dc403SDave Cobbley        """possible to have no version in pkg name, such as spectrum-fw"""
585*19323693SBrad Bishop        if not re.search(r"\d+", package):
586eb8dc403SDave Cobbley            current_version[1] = re.sub('_', '.', current_version[1])
587eb8dc403SDave Cobbley            current_version[1] = re.sub('-', '.', current_version[1])
588eb8dc403SDave Cobbley            return (current_version[1], '')
589eb8dc403SDave Cobbley
590eb8dc403SDave Cobbley        package_regex = self._init_regexes(package, ud, d)
591eb8dc403SDave Cobbley        if package_regex is None:
592eb8dc403SDave Cobbley            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
593eb8dc403SDave Cobbley            return ('', '')
594eb8dc403SDave Cobbley        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
595eb8dc403SDave Cobbley
596eb8dc403SDave Cobbley        uri = ""
597eb8dc403SDave Cobbley        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
598eb8dc403SDave Cobbley        if not regex_uri:
599eb8dc403SDave Cobbley            path = ud.path.split(package)[0]
600eb8dc403SDave Cobbley
601eb8dc403SDave Cobbley            # search for version matches on folders inside the path, like:
602eb8dc403SDave Cobbley            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
603*19323693SBrad Bishop            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
604eb8dc403SDave Cobbley            m = dirver_regex.search(path)
605eb8dc403SDave Cobbley            if m:
606eb8dc403SDave Cobbley                pn = d.getVar('PN')
607eb8dc403SDave Cobbley                dirver = m.group('dirver')
608eb8dc403SDave Cobbley
609*19323693SBrad Bishop                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
610eb8dc403SDave Cobbley                if not dirver_pn_regex.search(dirver):
611eb8dc403SDave Cobbley                    return (self._check_latest_version_by_dir(dirver,
612eb8dc403SDave Cobbley                        package, package_regex, current_version, ud, d), '')
613eb8dc403SDave Cobbley
614eb8dc403SDave Cobbley            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
615eb8dc403SDave Cobbley        else:
616eb8dc403SDave Cobbley            uri = regex_uri
617eb8dc403SDave Cobbley
618eb8dc403SDave Cobbley        return (self._check_latest_version(uri, package, package_regex,
619eb8dc403SDave Cobbley                current_version, ud, d), '')
620