xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision 0ca19ccf)
1eb8dc403SDave Cobbley"""
2eb8dc403SDave CobbleyBitBake 'Fetch' implementations
3eb8dc403SDave Cobbley
4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the
5eb8dc403SDave CobbleyBitBake build tools.
6eb8dc403SDave Cobbley
7eb8dc403SDave Cobbley"""
8eb8dc403SDave Cobbley
9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004  Chris Larson
10eb8dc403SDave Cobbley#
11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only
12eb8dc403SDave Cobbley#
13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14eb8dc403SDave Cobbley
1582c905dcSAndrew Geisslerimport shlex
16eb8dc403SDave Cobbleyimport re
17eb8dc403SDave Cobbleyimport tempfile
18eb8dc403SDave Cobbleyimport os
19eb8dc403SDave Cobbleyimport errno
20eb8dc403SDave Cobbleyimport bb
21eb8dc403SDave Cobbleyimport bb.progress
2219323693SBrad Bishopimport socket
2319323693SBrad Bishopimport http.client
24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error
25eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchMethod
26eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchError
27eb8dc403SDave Cobbleyfrom   bb.fetch2 import logger
28eb8dc403SDave Cobbleyfrom   bb.fetch2 import runfetchcmd
29eb8dc403SDave Cobbleyfrom   bb.utils import export_proxies
30eb8dc403SDave Cobbleyfrom   bs4 import BeautifulSoup
31eb8dc403SDave Cobbleyfrom   bs4 import SoupStrainer
32eb8dc403SDave Cobbley
33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34eb8dc403SDave Cobbley    """
35eb8dc403SDave Cobbley    Extract progress information from wget output.
36eb8dc403SDave Cobbley    Note: relies on --progress=dot (with -v or without -q/-nv) being
37eb8dc403SDave Cobbley    specified on the wget command line.
38eb8dc403SDave Cobbley    """
39eb8dc403SDave Cobbley    def __init__(self, d):
40eb8dc403SDave Cobbley        super(WgetProgressHandler, self).__init__(d)
41eb8dc403SDave Cobbley        # Send an initial progress event so the bar gets shown
42eb8dc403SDave Cobbley        self._fire_progress(0)
43eb8dc403SDave Cobbley
44eb8dc403SDave Cobbley    def writeline(self, line):
45eb8dc403SDave Cobbley        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46eb8dc403SDave Cobbley        if percs:
47eb8dc403SDave Cobbley            progress = int(percs[-1][0])
48eb8dc403SDave Cobbley            rate = percs[-1][1] + '/s'
49eb8dc403SDave Cobbley            self.update(progress, rate)
50eb8dc403SDave Cobbley            return False
51eb8dc403SDave Cobbley        return True
52eb8dc403SDave Cobbley
53eb8dc403SDave Cobbley
54eb8dc403SDave Cobbleyclass Wget(FetchMethod):
55*0ca19ccfSPatrick Williams    """Class to fetch urls via 'wget'"""
56d1e89497SAndrew Geissler
57d1e89497SAndrew Geissler    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58d1e89497SAndrew Geissler    # with the standard wget/urllib User-Agent, so pretend to be a modern
59d1e89497SAndrew Geissler    # browser.
60d1e89497SAndrew Geissler    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61d1e89497SAndrew Geissler
62*0ca19ccfSPatrick Williams    def check_certs(self, d):
63*0ca19ccfSPatrick Williams        """
64*0ca19ccfSPatrick Williams        Should certificates be checked?
65*0ca19ccfSPatrick Williams        """
66*0ca19ccfSPatrick Williams        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
67*0ca19ccfSPatrick Williams
68eb8dc403SDave Cobbley    def supports(self, ud, d):
69eb8dc403SDave Cobbley        """
70eb8dc403SDave Cobbley        Check to see if a given url can be fetched with wget.
71eb8dc403SDave Cobbley        """
72eb8dc403SDave Cobbley        return ud.type in ['http', 'https', 'ftp']
73eb8dc403SDave Cobbley
74eb8dc403SDave Cobbley    def recommends_checksum(self, urldata):
75eb8dc403SDave Cobbley        return True
76eb8dc403SDave Cobbley
77eb8dc403SDave Cobbley    def urldata_init(self, ud, d):
78eb8dc403SDave Cobbley        if 'protocol' in ud.parm:
79eb8dc403SDave Cobbley            if ud.parm['protocol'] == 'git':
80eb8dc403SDave Cobbley                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81eb8dc403SDave Cobbley
82eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
83eb8dc403SDave Cobbley            ud.basename = ud.parm['downloadfilename']
84eb8dc403SDave Cobbley        else:
85eb8dc403SDave Cobbley            ud.basename = os.path.basename(ud.path)
86eb8dc403SDave Cobbley
87eb8dc403SDave Cobbley        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
88eb8dc403SDave Cobbley        if not ud.localfile:
89eb8dc403SDave Cobbley            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
90eb8dc403SDave Cobbley
91*0ca19ccfSPatrick Williams        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
92*0ca19ccfSPatrick Williams
93*0ca19ccfSPatrick Williams        if not self.check_certs(d):
94*0ca19ccfSPatrick Williams            self.basecmd += " --no-check-certificate"
95eb8dc403SDave Cobbley
96eb8dc403SDave Cobbley    def _runwget(self, ud, d, command, quiet, workdir=None):
97eb8dc403SDave Cobbley
98eb8dc403SDave Cobbley        progresshandler = WgetProgressHandler(d)
99eb8dc403SDave Cobbley
100d1e89497SAndrew Geissler        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
101eb8dc403SDave Cobbley        bb.fetch2.check_network_access(d, command, ud.url)
102eb8dc403SDave Cobbley        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
103eb8dc403SDave Cobbley
104eb8dc403SDave Cobbley    def download(self, ud, d):
105eb8dc403SDave Cobbley        """Fetch urls"""
106eb8dc403SDave Cobbley
107eb8dc403SDave Cobbley        fetchcmd = self.basecmd
108eb8dc403SDave Cobbley
109eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
11082c905dcSAndrew Geissler            localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile)
11182c905dcSAndrew Geissler            bb.utils.mkdirhier(os.path.dirname(localpath))
11282c905dcSAndrew Geissler            fetchcmd += " -O %s" % shlex.quote(localpath)
113eb8dc403SDave Cobbley
114eb8dc403SDave Cobbley        if ud.user and ud.pswd:
115eb8dc403SDave Cobbley            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
116eb8dc403SDave Cobbley
117eb8dc403SDave Cobbley        uri = ud.url.split(";")[0]
118eb8dc403SDave Cobbley        if os.path.exists(ud.localpath):
119eb8dc403SDave Cobbley            # file exists, but we didnt complete it.. trying again..
120eb8dc403SDave Cobbley            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
121eb8dc403SDave Cobbley        else:
122eb8dc403SDave Cobbley            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
123eb8dc403SDave Cobbley
124eb8dc403SDave Cobbley        self._runwget(ud, d, fetchcmd, False)
125eb8dc403SDave Cobbley
126eb8dc403SDave Cobbley        # Sanity check since wget can pretend it succeed when it didn't
127eb8dc403SDave Cobbley        # Also, this used to happen if sourceforge sent us to the mirror page
128eb8dc403SDave Cobbley        if not os.path.exists(ud.localpath):
129eb8dc403SDave Cobbley            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
130eb8dc403SDave Cobbley
131eb8dc403SDave Cobbley        if os.path.getsize(ud.localpath) == 0:
132eb8dc403SDave Cobbley            os.remove(ud.localpath)
133eb8dc403SDave Cobbley            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
134eb8dc403SDave Cobbley
135eb8dc403SDave Cobbley        return True
136eb8dc403SDave Cobbley
137eb8dc403SDave Cobbley    def checkstatus(self, fetch, ud, d, try_again=True):
138eb8dc403SDave Cobbley        class HTTPConnectionCache(http.client.HTTPConnection):
139eb8dc403SDave Cobbley            if fetch.connection_cache:
140eb8dc403SDave Cobbley                def connect(self):
141eb8dc403SDave Cobbley                    """Connect to the host and port specified in __init__."""
142eb8dc403SDave Cobbley
143eb8dc403SDave Cobbley                    sock = fetch.connection_cache.get_connection(self.host, self.port)
144eb8dc403SDave Cobbley                    if sock:
145eb8dc403SDave Cobbley                        self.sock = sock
146eb8dc403SDave Cobbley                    else:
147eb8dc403SDave Cobbley                        self.sock = socket.create_connection((self.host, self.port),
148eb8dc403SDave Cobbley                                    self.timeout, self.source_address)
149eb8dc403SDave Cobbley                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
150eb8dc403SDave Cobbley
151eb8dc403SDave Cobbley                    if self._tunnel_host:
152eb8dc403SDave Cobbley                        self._tunnel()
153eb8dc403SDave Cobbley
154eb8dc403SDave Cobbley        class CacheHTTPHandler(urllib.request.HTTPHandler):
155eb8dc403SDave Cobbley            def http_open(self, req):
156eb8dc403SDave Cobbley                return self.do_open(HTTPConnectionCache, req)
157eb8dc403SDave Cobbley
158eb8dc403SDave Cobbley            def do_open(self, http_class, req):
159eb8dc403SDave Cobbley                """Return an addinfourl object for the request, using http_class.
160eb8dc403SDave Cobbley
161eb8dc403SDave Cobbley                http_class must implement the HTTPConnection API from httplib.
162eb8dc403SDave Cobbley                The addinfourl return value is a file-like object.  It also
163eb8dc403SDave Cobbley                has methods and attributes including:
164eb8dc403SDave Cobbley                    - info(): return a mimetools.Message object for the headers
165eb8dc403SDave Cobbley                    - geturl(): return the original request URL
166eb8dc403SDave Cobbley                    - code: HTTP status code
167eb8dc403SDave Cobbley                """
168eb8dc403SDave Cobbley                host = req.host
169eb8dc403SDave Cobbley                if not host:
17019323693SBrad Bishop                    raise urllib.error.URLError('no host given')
171eb8dc403SDave Cobbley
172eb8dc403SDave Cobbley                h = http_class(host, timeout=req.timeout) # will parse host:port
173eb8dc403SDave Cobbley                h.set_debuglevel(self._debuglevel)
174eb8dc403SDave Cobbley
175eb8dc403SDave Cobbley                headers = dict(req.unredirected_hdrs)
176eb8dc403SDave Cobbley                headers.update(dict((k, v) for k, v in list(req.headers.items())
177eb8dc403SDave Cobbley                            if k not in headers))
178eb8dc403SDave Cobbley
179eb8dc403SDave Cobbley                # We want to make an HTTP/1.1 request, but the addinfourl
180eb8dc403SDave Cobbley                # class isn't prepared to deal with a persistent connection.
181eb8dc403SDave Cobbley                # It will try to read all remaining data from the socket,
182eb8dc403SDave Cobbley                # which will block while the server waits for the next request.
183eb8dc403SDave Cobbley                # So make sure the connection gets closed after the (only)
184eb8dc403SDave Cobbley                # request.
185eb8dc403SDave Cobbley
186eb8dc403SDave Cobbley                # Don't close connection when connection_cache is enabled,
187eb8dc403SDave Cobbley                if fetch.connection_cache is None:
188eb8dc403SDave Cobbley                    headers["Connection"] = "close"
189eb8dc403SDave Cobbley                else:
190eb8dc403SDave Cobbley                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
191eb8dc403SDave Cobbley
192eb8dc403SDave Cobbley                headers = dict(
193eb8dc403SDave Cobbley                    (name.title(), val) for name, val in list(headers.items()))
194eb8dc403SDave Cobbley
195eb8dc403SDave Cobbley                if req._tunnel_host:
196eb8dc403SDave Cobbley                    tunnel_headers = {}
197eb8dc403SDave Cobbley                    proxy_auth_hdr = "Proxy-Authorization"
198eb8dc403SDave Cobbley                    if proxy_auth_hdr in headers:
199eb8dc403SDave Cobbley                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
200eb8dc403SDave Cobbley                        # Proxy-Authorization should not be sent to origin
201eb8dc403SDave Cobbley                        # server.
202eb8dc403SDave Cobbley                        del headers[proxy_auth_hdr]
203eb8dc403SDave Cobbley                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
204eb8dc403SDave Cobbley
205eb8dc403SDave Cobbley                try:
206eb8dc403SDave Cobbley                    h.request(req.get_method(), req.selector, req.data, headers)
207eb8dc403SDave Cobbley                except socket.error as err: # XXX what error?
208eb8dc403SDave Cobbley                    # Don't close connection when cache is enabled.
209eb8dc403SDave Cobbley                    # Instead, try to detect connections that are no longer
210eb8dc403SDave Cobbley                    # usable (for example, closed unexpectedly) and remove
211eb8dc403SDave Cobbley                    # them from the cache.
212eb8dc403SDave Cobbley                    if fetch.connection_cache is None:
213eb8dc403SDave Cobbley                        h.close()
214eb8dc403SDave Cobbley                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
215eb8dc403SDave Cobbley                        # This happens when the server closes the connection despite the Keep-Alive.
216eb8dc403SDave Cobbley                        # Apparently urllib then uses the file descriptor, expecting it to be
217eb8dc403SDave Cobbley                        # connected, when in reality the connection is already gone.
218eb8dc403SDave Cobbley                        # We let the request fail and expect it to be
219eb8dc403SDave Cobbley                        # tried once more ("try_again" in check_status()),
220eb8dc403SDave Cobbley                        # with the dead connection removed from the cache.
221eb8dc403SDave Cobbley                        # If it still fails, we give up, which can happend for bad
222eb8dc403SDave Cobbley                        # HTTP proxy settings.
223eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
224eb8dc403SDave Cobbley                    raise urllib.error.URLError(err)
225eb8dc403SDave Cobbley                else:
226eb8dc403SDave Cobbley                    r = h.getresponse()
227eb8dc403SDave Cobbley
228eb8dc403SDave Cobbley                # Pick apart the HTTPResponse object to get the addinfourl
229eb8dc403SDave Cobbley                # object initialized properly.
230eb8dc403SDave Cobbley
231eb8dc403SDave Cobbley                # Wrap the HTTPResponse object in socket's file object adapter
232eb8dc403SDave Cobbley                # for Windows.  That adapter calls recv(), so delegate recv()
233eb8dc403SDave Cobbley                # to read().  This weird wrapping allows the returned object to
234eb8dc403SDave Cobbley                # have readline() and readlines() methods.
235eb8dc403SDave Cobbley
236eb8dc403SDave Cobbley                # XXX It might be better to extract the read buffering code
237eb8dc403SDave Cobbley                # out of socket._fileobject() and into a base class.
238eb8dc403SDave Cobbley                r.recv = r.read
239eb8dc403SDave Cobbley
240eb8dc403SDave Cobbley                # no data, just have to read
241eb8dc403SDave Cobbley                r.read()
242eb8dc403SDave Cobbley                class fp_dummy(object):
243eb8dc403SDave Cobbley                    def read(self):
244eb8dc403SDave Cobbley                        return ""
245eb8dc403SDave Cobbley                    def readline(self):
246eb8dc403SDave Cobbley                        return ""
247eb8dc403SDave Cobbley                    def close(self):
248eb8dc403SDave Cobbley                        pass
249eb8dc403SDave Cobbley                    closed = False
250eb8dc403SDave Cobbley
25119323693SBrad Bishop                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
252eb8dc403SDave Cobbley                resp.code = r.status
253eb8dc403SDave Cobbley                resp.msg = r.reason
254eb8dc403SDave Cobbley
255eb8dc403SDave Cobbley                # Close connection when server request it.
256eb8dc403SDave Cobbley                if fetch.connection_cache is not None:
257eb8dc403SDave Cobbley                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
258eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
259eb8dc403SDave Cobbley
260eb8dc403SDave Cobbley                return resp
261eb8dc403SDave Cobbley
262eb8dc403SDave Cobbley        class HTTPMethodFallback(urllib.request.BaseHandler):
263eb8dc403SDave Cobbley            """
264eb8dc403SDave Cobbley            Fallback to GET if HEAD is not allowed (405 HTTP error)
265eb8dc403SDave Cobbley            """
266eb8dc403SDave Cobbley            def http_error_405(self, req, fp, code, msg, headers):
267eb8dc403SDave Cobbley                fp.read()
268eb8dc403SDave Cobbley                fp.close()
269eb8dc403SDave Cobbley
27008902b01SBrad Bishop                if req.get_method() != 'GET':
271eb8dc403SDave Cobbley                    newheaders = dict((k, v) for k, v in list(req.headers.items())
272eb8dc403SDave Cobbley                                      if k.lower() not in ("content-length", "content-type"))
273eb8dc403SDave Cobbley                    return self.parent.open(urllib.request.Request(req.get_full_url(),
274eb8dc403SDave Cobbley                                                            headers=newheaders,
275eb8dc403SDave Cobbley                                                            origin_req_host=req.origin_req_host,
276eb8dc403SDave Cobbley                                                            unverifiable=True))
277eb8dc403SDave Cobbley
27808902b01SBrad Bishop                raise urllib.request.HTTPError(req, code, msg, headers, None)
27919323693SBrad Bishop
28019323693SBrad Bishop            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
28119323693SBrad Bishop            # Forbidden when they actually mean 405 Method Not Allowed.
282eb8dc403SDave Cobbley            http_error_403 = http_error_405
283eb8dc403SDave Cobbley
284eb8dc403SDave Cobbley
285eb8dc403SDave Cobbley        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
286eb8dc403SDave Cobbley            """
287eb8dc403SDave Cobbley            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
288eb8dc403SDave Cobbley            when we want to follow redirects using the original method.
289eb8dc403SDave Cobbley            """
290eb8dc403SDave Cobbley            def redirect_request(self, req, fp, code, msg, headers, newurl):
291eb8dc403SDave Cobbley                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
29219323693SBrad Bishop                newreq.get_method = req.get_method
293eb8dc403SDave Cobbley                return newreq
294eb8dc403SDave Cobbley
295*0ca19ccfSPatrick Williams        # We need to update the environment here as both the proxy and HTTPS
296*0ca19ccfSPatrick Williams        # handlers need variables set. The proxy needs http_proxy and friends to
297*0ca19ccfSPatrick Williams        # be set, and HTTPSHandler ends up calling into openssl to load the
298*0ca19ccfSPatrick Williams        # certificates. In buildtools configurations this will be looking at the
299*0ca19ccfSPatrick Williams        # wrong place for certificates by default: we set SSL_CERT_FILE to the
300*0ca19ccfSPatrick Williams        # right location in the buildtools environment script but as BitBake
301*0ca19ccfSPatrick Williams        # prunes prunes the environment this is lost. When binaries are executed
302*0ca19ccfSPatrick Williams        # runfetchcmd ensures these values are in the environment, but this is
303*0ca19ccfSPatrick Williams        # pure Python so we need to update the environment.
304*0ca19ccfSPatrick Williams        #
305*0ca19ccfSPatrick Williams        # Avoid tramping the environment too much by using bb.utils.environment
306*0ca19ccfSPatrick Williams        # to scope the changes to the build_opener request, which is when the
307*0ca19ccfSPatrick Williams        # environment lookups happen.
308*0ca19ccfSPatrick Williams        newenv = {}
309*0ca19ccfSPatrick Williams        for name in bb.fetch2.FETCH_EXPORT_VARS:
310*0ca19ccfSPatrick Williams            value = d.getVar(name)
311*0ca19ccfSPatrick Williams            if not value:
312*0ca19ccfSPatrick Williams                origenv = d.getVar("BB_ORIGENV")
313*0ca19ccfSPatrick Williams                if origenv:
314*0ca19ccfSPatrick Williams                    value = origenv.getVar(name)
315*0ca19ccfSPatrick Williams            if value:
316*0ca19ccfSPatrick Williams                newenv[name] = value
317*0ca19ccfSPatrick Williams
318*0ca19ccfSPatrick Williams        with bb.utils.environment(**newenv):
319eb8dc403SDave Cobbley            import ssl
320*0ca19ccfSPatrick Williams
321*0ca19ccfSPatrick Williams            if self.check_certs(d):
322*0ca19ccfSPatrick Williams                context = ssl.create_default_context()
323*0ca19ccfSPatrick Williams            else:
324*0ca19ccfSPatrick Williams                context = ssl._create_unverified_context()
325*0ca19ccfSPatrick Williams
326*0ca19ccfSPatrick Williams            handlers = [FixedHTTPRedirectHandler,
327*0ca19ccfSPatrick Williams                        HTTPMethodFallback,
328*0ca19ccfSPatrick Williams                        urllib.request.ProxyHandler(),
329*0ca19ccfSPatrick Williams                        CacheHTTPHandler(),
330*0ca19ccfSPatrick Williams                        urllib.request.HTTPSHandler(context=context)]
331eb8dc403SDave Cobbley            opener = urllib.request.build_opener(*handlers)
332eb8dc403SDave Cobbley
333eb8dc403SDave Cobbley        try:
334eb8dc403SDave Cobbley            uri = ud.url.split(";")[0]
335eb8dc403SDave Cobbley            r = urllib.request.Request(uri)
336eb8dc403SDave Cobbley            r.get_method = lambda: "HEAD"
337eb8dc403SDave Cobbley            # Some servers (FusionForge, as used on Alioth) require that the
338eb8dc403SDave Cobbley            # optional Accept header is set.
339eb8dc403SDave Cobbley            r.add_header("Accept", "*/*")
340d1e89497SAndrew Geissler            r.add_header("User-Agent", self.user_agent)
341eb8dc403SDave Cobbley            def add_basic_auth(login_str, request):
342eb8dc403SDave Cobbley                '''Adds Basic auth to http request, pass in login:password as string'''
343eb8dc403SDave Cobbley                import base64
344eb8dc403SDave Cobbley                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
345eb8dc403SDave Cobbley                authheader = "Basic %s" % encodeuser
346eb8dc403SDave Cobbley                r.add_header("Authorization", authheader)
347eb8dc403SDave Cobbley
34819323693SBrad Bishop            if ud.user and ud.pswd:
34919323693SBrad Bishop                add_basic_auth(ud.user + ':' + ud.pswd, r)
350eb8dc403SDave Cobbley
351eb8dc403SDave Cobbley            try:
35219323693SBrad Bishop                import netrc
353eb8dc403SDave Cobbley                n = netrc.netrc()
354eb8dc403SDave Cobbley                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
355eb8dc403SDave Cobbley                add_basic_auth("%s:%s" % (login, password), r)
356eb8dc403SDave Cobbley            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
357eb8dc403SDave Cobbley                pass
358eb8dc403SDave Cobbley
359eb8dc403SDave Cobbley            with opener.open(r) as response:
360eb8dc403SDave Cobbley                pass
361eb8dc403SDave Cobbley        except urllib.error.URLError as e:
362eb8dc403SDave Cobbley            if try_again:
363d1e89497SAndrew Geissler                logger.debug2("checkstatus: trying again")
364eb8dc403SDave Cobbley                return self.checkstatus(fetch, ud, d, False)
365eb8dc403SDave Cobbley            else:
366eb8dc403SDave Cobbley                # debug for now to avoid spamming the logs in e.g. remote sstate searches
367d1e89497SAndrew Geissler                logger.debug2("checkstatus() urlopen failed: %s" % e)
368eb8dc403SDave Cobbley                return False
36990fd73cbSAndrew Geissler        except ConnectionResetError as e:
37090fd73cbSAndrew Geissler            if try_again:
37190fd73cbSAndrew Geissler                logger.debug2("checkstatus: trying again")
37290fd73cbSAndrew Geissler                return self.checkstatus(fetch, ud, d, False)
37390fd73cbSAndrew Geissler            else:
37490fd73cbSAndrew Geissler                # debug for now to avoid spamming the logs in e.g. remote sstate searches
37590fd73cbSAndrew Geissler                logger.debug2("checkstatus() urlopen failed: %s" % e)
37690fd73cbSAndrew Geissler                return False
377eb8dc403SDave Cobbley        return True
378eb8dc403SDave Cobbley
379eb8dc403SDave Cobbley    def _parse_path(self, regex, s):
380eb8dc403SDave Cobbley        """
381eb8dc403SDave Cobbley        Find and group name, version and archive type in the given string s
382eb8dc403SDave Cobbley        """
383eb8dc403SDave Cobbley
384eb8dc403SDave Cobbley        m = regex.search(s)
385eb8dc403SDave Cobbley        if m:
386eb8dc403SDave Cobbley            pname = ''
387eb8dc403SDave Cobbley            pver = ''
388eb8dc403SDave Cobbley            ptype = ''
389eb8dc403SDave Cobbley
390eb8dc403SDave Cobbley            mdict = m.groupdict()
391eb8dc403SDave Cobbley            if 'name' in mdict.keys():
392eb8dc403SDave Cobbley                pname = mdict['name']
393eb8dc403SDave Cobbley            if 'pver' in mdict.keys():
394eb8dc403SDave Cobbley                pver = mdict['pver']
395eb8dc403SDave Cobbley            if 'type' in mdict.keys():
396eb8dc403SDave Cobbley                ptype = mdict['type']
397eb8dc403SDave Cobbley
398eb8dc403SDave Cobbley            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
399eb8dc403SDave Cobbley
400eb8dc403SDave Cobbley            return (pname, pver, ptype)
401eb8dc403SDave Cobbley
402eb8dc403SDave Cobbley        return None
403eb8dc403SDave Cobbley
404eb8dc403SDave Cobbley    def _modelate_version(self, version):
405eb8dc403SDave Cobbley        if version[0] in ['.', '-']:
406eb8dc403SDave Cobbley            if version[1].isdigit():
407eb8dc403SDave Cobbley                version = version[1] + version[0] + version[2:len(version)]
408eb8dc403SDave Cobbley            else:
409eb8dc403SDave Cobbley                version = version[1:len(version)]
410eb8dc403SDave Cobbley
411eb8dc403SDave Cobbley        version = re.sub('-', '.', version)
412eb8dc403SDave Cobbley        version = re.sub('_', '.', version)
413eb8dc403SDave Cobbley        version = re.sub('(rc)+', '.1000.', version)
414eb8dc403SDave Cobbley        version = re.sub('(beta)+', '.100.', version)
415eb8dc403SDave Cobbley        version = re.sub('(alpha)+', '.10.', version)
416eb8dc403SDave Cobbley        if version[0] == 'v':
417eb8dc403SDave Cobbley            version = version[1:len(version)]
418eb8dc403SDave Cobbley        return version
419eb8dc403SDave Cobbley
420eb8dc403SDave Cobbley    def _vercmp(self, old, new):
421eb8dc403SDave Cobbley        """
422eb8dc403SDave Cobbley        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
423eb8dc403SDave Cobbley        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
424eb8dc403SDave Cobbley        for simplicity as it's somehow difficult to get from various upstream format
425eb8dc403SDave Cobbley        """
426eb8dc403SDave Cobbley
427eb8dc403SDave Cobbley        (oldpn, oldpv, oldsuffix) = old
428eb8dc403SDave Cobbley        (newpn, newpv, newsuffix) = new
429eb8dc403SDave Cobbley
43019323693SBrad Bishop        # Check for a new suffix type that we have never heard of before
43119323693SBrad Bishop        if newsuffix:
432eb8dc403SDave Cobbley            m = self.suffix_regex_comp.search(newsuffix)
433eb8dc403SDave Cobbley            if not m:
434eb8dc403SDave Cobbley                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
435eb8dc403SDave Cobbley                return False
436eb8dc403SDave Cobbley
43719323693SBrad Bishop        # Not our package so ignore it
438eb8dc403SDave Cobbley        if oldpn != newpn:
439eb8dc403SDave Cobbley            return False
440eb8dc403SDave Cobbley
441eb8dc403SDave Cobbley        oldpv = self._modelate_version(oldpv)
442eb8dc403SDave Cobbley        newpv = self._modelate_version(newpv)
443eb8dc403SDave Cobbley
444eb8dc403SDave Cobbley        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
445eb8dc403SDave Cobbley
446eb8dc403SDave Cobbley    def _fetch_index(self, uri, ud, d):
447eb8dc403SDave Cobbley        """
448eb8dc403SDave Cobbley        Run fetch checkstatus to get directory information
449eb8dc403SDave Cobbley        """
450eb8dc403SDave Cobbley        f = tempfile.NamedTemporaryFile()
451eb8dc403SDave Cobbley        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
452eb8dc403SDave Cobbley            fetchcmd = self.basecmd
453d1e89497SAndrew Geissler            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
454eb8dc403SDave Cobbley            try:
455eb8dc403SDave Cobbley                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
456eb8dc403SDave Cobbley                fetchresult = f.read()
457eb8dc403SDave Cobbley            except bb.fetch2.BBFetchException:
458eb8dc403SDave Cobbley                fetchresult = ""
459eb8dc403SDave Cobbley
460eb8dc403SDave Cobbley        return fetchresult
461eb8dc403SDave Cobbley
462eb8dc403SDave Cobbley    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
463eb8dc403SDave Cobbley        """
464eb8dc403SDave Cobbley        Return the latest version of a package inside a given directory path
465eb8dc403SDave Cobbley        If error or no version, return ""
466eb8dc403SDave Cobbley        """
467eb8dc403SDave Cobbley        valid = 0
468eb8dc403SDave Cobbley        version = ['', '', '']
469eb8dc403SDave Cobbley
470eb8dc403SDave Cobbley        bb.debug(3, "VersionURL: %s" % (url))
471eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
472eb8dc403SDave Cobbley        if not soup:
473eb8dc403SDave Cobbley            bb.debug(3, "*** %s NO SOUP" % (url))
474eb8dc403SDave Cobbley            return ""
475eb8dc403SDave Cobbley
476eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
477eb8dc403SDave Cobbley            bb.debug(3, "line['href'] = '%s'" % (line['href']))
478eb8dc403SDave Cobbley            bb.debug(3, "line = '%s'" % (str(line)))
479eb8dc403SDave Cobbley
480eb8dc403SDave Cobbley            newver = self._parse_path(package_regex, line['href'])
481eb8dc403SDave Cobbley            if not newver:
482eb8dc403SDave Cobbley                newver = self._parse_path(package_regex, str(line))
483eb8dc403SDave Cobbley
484eb8dc403SDave Cobbley            if newver:
485eb8dc403SDave Cobbley                bb.debug(3, "Upstream version found: %s" % newver[1])
486eb8dc403SDave Cobbley                if valid == 0:
487eb8dc403SDave Cobbley                    version = newver
488eb8dc403SDave Cobbley                    valid = 1
489eb8dc403SDave Cobbley                elif self._vercmp(version, newver) < 0:
490eb8dc403SDave Cobbley                    version = newver
491eb8dc403SDave Cobbley
492eb8dc403SDave Cobbley        pupver = re.sub('_', '.', version[1])
493eb8dc403SDave Cobbley
494eb8dc403SDave Cobbley        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
495eb8dc403SDave Cobbley                (package, pupver or "N/A", current_version[1]))
496eb8dc403SDave Cobbley
497eb8dc403SDave Cobbley        if valid:
498eb8dc403SDave Cobbley            return pupver
499eb8dc403SDave Cobbley
500eb8dc403SDave Cobbley        return ""
501eb8dc403SDave Cobbley
50219323693SBrad Bishop    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
503eb8dc403SDave Cobbley        """
504eb8dc403SDave Cobbley        Scan every directory in order to get upstream version.
505eb8dc403SDave Cobbley        """
506eb8dc403SDave Cobbley        version_dir = ['', '', '']
507eb8dc403SDave Cobbley        version = ['', '', '']
508eb8dc403SDave Cobbley
509ac69b488SWilliam A. Kennington III        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
510eb8dc403SDave Cobbley        s = dirver_regex.search(dirver)
511eb8dc403SDave Cobbley        if s:
512eb8dc403SDave Cobbley            version_dir[1] = s.group('ver')
513eb8dc403SDave Cobbley        else:
514eb8dc403SDave Cobbley            version_dir[1] = dirver
515eb8dc403SDave Cobbley
516eb8dc403SDave Cobbley        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
517eb8dc403SDave Cobbley                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
518eb8dc403SDave Cobbley        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
519eb8dc403SDave Cobbley
520eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
521eb8dc403SDave Cobbley        if not soup:
522eb8dc403SDave Cobbley            return version[1]
523eb8dc403SDave Cobbley
524eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
525eb8dc403SDave Cobbley            s = dirver_regex.search(line['href'].strip("/"))
526eb8dc403SDave Cobbley            if s:
527eb8dc403SDave Cobbley                sver = s.group('ver')
528eb8dc403SDave Cobbley
529eb8dc403SDave Cobbley                # When prefix is part of the version directory it need to
530eb8dc403SDave Cobbley                # ensure that only version directory is used so remove previous
531eb8dc403SDave Cobbley                # directories if exists.
532eb8dc403SDave Cobbley                #
533eb8dc403SDave Cobbley                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
534eb8dc403SDave Cobbley                # result is v2.5.
535eb8dc403SDave Cobbley                spfx = s.group('pfx').split('/')[-1]
536eb8dc403SDave Cobbley
537eb8dc403SDave Cobbley                version_dir_new = ['', sver, '']
538eb8dc403SDave Cobbley                if self._vercmp(version_dir, version_dir_new) <= 0:
539eb8dc403SDave Cobbley                    dirver_new = spfx + sver
540eb8dc403SDave Cobbley                    path = ud.path.replace(dirver, dirver_new, True) \
541eb8dc403SDave Cobbley                        .split(package)[0]
542eb8dc403SDave Cobbley                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
543eb8dc403SDave Cobbley                        ud.user, ud.pswd, {}])
544eb8dc403SDave Cobbley
545eb8dc403SDave Cobbley                    pupver = self._check_latest_version(uri,
546eb8dc403SDave Cobbley                            package, package_regex, current_version, ud, d)
547eb8dc403SDave Cobbley                    if pupver:
548eb8dc403SDave Cobbley                        version[1] = pupver
549eb8dc403SDave Cobbley
550eb8dc403SDave Cobbley                    version_dir = version_dir_new
551eb8dc403SDave Cobbley
552eb8dc403SDave Cobbley        return version[1]
553eb8dc403SDave Cobbley
554eb8dc403SDave Cobbley    def _init_regexes(self, package, ud, d):
555eb8dc403SDave Cobbley        """
556eb8dc403SDave Cobbley        Match as many patterns as possible such as:
557eb8dc403SDave Cobbley                gnome-common-2.20.0.tar.gz (most common format)
558eb8dc403SDave Cobbley                gtk+-2.90.1.tar.gz
559eb8dc403SDave Cobbley                xf86-input-synaptics-12.6.9.tar.gz
560eb8dc403SDave Cobbley                dri2proto-2.3.tar.gz
561eb8dc403SDave Cobbley                blktool_4.orig.tar.gz
562eb8dc403SDave Cobbley                libid3tag-0.15.1b.tar.gz
563eb8dc403SDave Cobbley                unzip552.tar.gz
564eb8dc403SDave Cobbley                icu4c-3_6-src.tgz
565eb8dc403SDave Cobbley                genext2fs_1.3.orig.tar.gz
566eb8dc403SDave Cobbley                gst-fluendo-mp3
567eb8dc403SDave Cobbley        """
568eb8dc403SDave Cobbley        # match most patterns which uses "-" as separator to version digits
56919323693SBrad Bishop        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
570eb8dc403SDave Cobbley        # a loose pattern such as for unzip552.tar.gz
57119323693SBrad Bishop        pn_prefix2 = r"[a-zA-Z]+"
572eb8dc403SDave Cobbley        # a loose pattern such as for 80325-quicky-0.4.tar.gz
57319323693SBrad Bishop        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
574eb8dc403SDave Cobbley        # Save the Package Name (pn) Regex for use later
57519323693SBrad Bishop        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
576eb8dc403SDave Cobbley
577eb8dc403SDave Cobbley        # match version
57819323693SBrad Bishop        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
579eb8dc403SDave Cobbley
580eb8dc403SDave Cobbley        # match arch
581eb8dc403SDave Cobbley        parch_regex = "-source|_all_"
582eb8dc403SDave Cobbley
583eb8dc403SDave Cobbley        # src.rpm extension was added only for rpm package. Can be removed if the rpm
584eb8dc403SDave Cobbley        # packaged will always be considered as having to be manually upgraded
58519323693SBrad Bishop        psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
586eb8dc403SDave Cobbley
587eb8dc403SDave Cobbley        # match name, version and archive type of a package
58819323693SBrad Bishop        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
589eb8dc403SDave Cobbley                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
590eb8dc403SDave Cobbley        self.suffix_regex_comp = re.compile(psuffix_regex)
591eb8dc403SDave Cobbley
592eb8dc403SDave Cobbley        # compile regex, can be specific by package or generic regex
593eb8dc403SDave Cobbley        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
594eb8dc403SDave Cobbley        if pn_regex:
595eb8dc403SDave Cobbley            package_custom_regex_comp = re.compile(pn_regex)
596eb8dc403SDave Cobbley        else:
597eb8dc403SDave Cobbley            version = self._parse_path(package_regex_comp, package)
598eb8dc403SDave Cobbley            if version:
599eb8dc403SDave Cobbley                package_custom_regex_comp = re.compile(
60019323693SBrad Bishop                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
601eb8dc403SDave Cobbley                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
602eb8dc403SDave Cobbley            else:
603eb8dc403SDave Cobbley                package_custom_regex_comp = None
604eb8dc403SDave Cobbley
605eb8dc403SDave Cobbley        return package_custom_regex_comp
606eb8dc403SDave Cobbley
607eb8dc403SDave Cobbley    def latest_versionstring(self, ud, d):
608eb8dc403SDave Cobbley        """
609eb8dc403SDave Cobbley        Manipulate the URL and try to obtain the latest package version
610eb8dc403SDave Cobbley
611eb8dc403SDave Cobbley        sanity check to ensure same name and type.
612eb8dc403SDave Cobbley        """
613eb8dc403SDave Cobbley        package = ud.path.split("/")[-1]
614eb8dc403SDave Cobbley        current_version = ['', d.getVar('PV'), '']
615eb8dc403SDave Cobbley
616eb8dc403SDave Cobbley        """possible to have no version in pkg name, such as spectrum-fw"""
61719323693SBrad Bishop        if not re.search(r"\d+", package):
618eb8dc403SDave Cobbley            current_version[1] = re.sub('_', '.', current_version[1])
619eb8dc403SDave Cobbley            current_version[1] = re.sub('-', '.', current_version[1])
620eb8dc403SDave Cobbley            return (current_version[1], '')
621eb8dc403SDave Cobbley
622eb8dc403SDave Cobbley        package_regex = self._init_regexes(package, ud, d)
623eb8dc403SDave Cobbley        if package_regex is None:
624eb8dc403SDave Cobbley            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
625eb8dc403SDave Cobbley            return ('', '')
626eb8dc403SDave Cobbley        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
627eb8dc403SDave Cobbley
628eb8dc403SDave Cobbley        uri = ""
629eb8dc403SDave Cobbley        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
630eb8dc403SDave Cobbley        if not regex_uri:
631eb8dc403SDave Cobbley            path = ud.path.split(package)[0]
632eb8dc403SDave Cobbley
633eb8dc403SDave Cobbley            # search for version matches on folders inside the path, like:
634eb8dc403SDave Cobbley            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
63519323693SBrad Bishop            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
636eb8dc403SDave Cobbley            m = dirver_regex.search(path)
637eb8dc403SDave Cobbley            if m:
638eb8dc403SDave Cobbley                pn = d.getVar('PN')
639eb8dc403SDave Cobbley                dirver = m.group('dirver')
640eb8dc403SDave Cobbley
64119323693SBrad Bishop                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
642eb8dc403SDave Cobbley                if not dirver_pn_regex.search(dirver):
643eb8dc403SDave Cobbley                    return (self._check_latest_version_by_dir(dirver,
644eb8dc403SDave Cobbley                        package, package_regex, current_version, ud, d), '')
645eb8dc403SDave Cobbley
646eb8dc403SDave Cobbley            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
647eb8dc403SDave Cobbley        else:
648eb8dc403SDave Cobbley            uri = regex_uri
649eb8dc403SDave Cobbley
650eb8dc403SDave Cobbley        return (self._check_latest_version(uri, package, package_regex,
651eb8dc403SDave Cobbley                current_version, ud, d), '')
652