xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision 87f5cff0)
1eb8dc403SDave Cobbley"""
2eb8dc403SDave CobbleyBitBake 'Fetch' implementations
3eb8dc403SDave Cobbley
4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the
5eb8dc403SDave CobbleyBitBake build tools.
6eb8dc403SDave Cobbley
7eb8dc403SDave Cobbley"""
8eb8dc403SDave Cobbley
9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004  Chris Larson
10eb8dc403SDave Cobbley#
11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only
12eb8dc403SDave Cobbley#
13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig
14eb8dc403SDave Cobbley
1582c905dcSAndrew Geisslerimport shlex
16eb8dc403SDave Cobbleyimport re
17eb8dc403SDave Cobbleyimport tempfile
18eb8dc403SDave Cobbleyimport os
19eb8dc403SDave Cobbleyimport errno
20eb8dc403SDave Cobbleyimport bb
21eb8dc403SDave Cobbleyimport bb.progress
2219323693SBrad Bishopimport socket
2319323693SBrad Bishopimport http.client
24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error
25eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchMethod
26eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchError
27eb8dc403SDave Cobbleyfrom   bb.fetch2 import logger
28eb8dc403SDave Cobbleyfrom   bb.fetch2 import runfetchcmd
29eb8dc403SDave Cobbleyfrom   bb.utils import export_proxies
30eb8dc403SDave Cobbleyfrom   bs4 import BeautifulSoup
31eb8dc403SDave Cobbleyfrom   bs4 import SoupStrainer
32eb8dc403SDave Cobbley
33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
34eb8dc403SDave Cobbley    """
35eb8dc403SDave Cobbley    Extract progress information from wget output.
36eb8dc403SDave Cobbley    Note: relies on --progress=dot (with -v or without -q/-nv) being
37eb8dc403SDave Cobbley    specified on the wget command line.
38eb8dc403SDave Cobbley    """
39eb8dc403SDave Cobbley    def __init__(self, d):
40eb8dc403SDave Cobbley        super(WgetProgressHandler, self).__init__(d)
41eb8dc403SDave Cobbley        # Send an initial progress event so the bar gets shown
42eb8dc403SDave Cobbley        self._fire_progress(0)
43eb8dc403SDave Cobbley
44eb8dc403SDave Cobbley    def writeline(self, line):
45eb8dc403SDave Cobbley        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
46eb8dc403SDave Cobbley        if percs:
47eb8dc403SDave Cobbley            progress = int(percs[-1][0])
48eb8dc403SDave Cobbley            rate = percs[-1][1] + '/s'
49eb8dc403SDave Cobbley            self.update(progress, rate)
50eb8dc403SDave Cobbley            return False
51eb8dc403SDave Cobbley        return True
52eb8dc403SDave Cobbley
53eb8dc403SDave Cobbley
54eb8dc403SDave Cobbleyclass Wget(FetchMethod):
550ca19ccfSPatrick Williams    """Class to fetch urls via 'wget'"""
56d1e89497SAndrew Geissler
57d1e89497SAndrew Geissler    # CDNs like CloudFlare may do a 'browser integrity test' which can fail
58d1e89497SAndrew Geissler    # with the standard wget/urllib User-Agent, so pretend to be a modern
59d1e89497SAndrew Geissler    # browser.
60d1e89497SAndrew Geissler    user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0"
61d1e89497SAndrew Geissler
620ca19ccfSPatrick Williams    def check_certs(self, d):
630ca19ccfSPatrick Williams        """
640ca19ccfSPatrick Williams        Should certificates be checked?
650ca19ccfSPatrick Williams        """
660ca19ccfSPatrick Williams        return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0"
670ca19ccfSPatrick Williams
68eb8dc403SDave Cobbley    def supports(self, ud, d):
69eb8dc403SDave Cobbley        """
70eb8dc403SDave Cobbley        Check to see if a given url can be fetched with wget.
71eb8dc403SDave Cobbley        """
725199d831SAndrew Geissler        return ud.type in ['http', 'https', 'ftp', 'ftps']
73eb8dc403SDave Cobbley
74eb8dc403SDave Cobbley    def recommends_checksum(self, urldata):
75eb8dc403SDave Cobbley        return True
76eb8dc403SDave Cobbley
77eb8dc403SDave Cobbley    def urldata_init(self, ud, d):
78eb8dc403SDave Cobbley        if 'protocol' in ud.parm:
79eb8dc403SDave Cobbley            if ud.parm['protocol'] == 'git':
80eb8dc403SDave Cobbley                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81eb8dc403SDave Cobbley
82eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
83eb8dc403SDave Cobbley            ud.basename = ud.parm['downloadfilename']
84eb8dc403SDave Cobbley        else:
85eb8dc403SDave Cobbley            ud.basename = os.path.basename(ud.path)
86eb8dc403SDave Cobbley
87eb8dc403SDave Cobbley        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
88eb8dc403SDave Cobbley        if not ud.localfile:
89eb8dc403SDave Cobbley            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
90eb8dc403SDave Cobbley
910ca19ccfSPatrick Williams        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp"
920ca19ccfSPatrick Williams
930ca19ccfSPatrick Williams        if not self.check_certs(d):
940ca19ccfSPatrick Williams            self.basecmd += " --no-check-certificate"
95eb8dc403SDave Cobbley
96eb8dc403SDave Cobbley    def _runwget(self, ud, d, command, quiet, workdir=None):
97eb8dc403SDave Cobbley
98eb8dc403SDave Cobbley        progresshandler = WgetProgressHandler(d)
99eb8dc403SDave Cobbley
100d1e89497SAndrew Geissler        logger.debug2("Fetching %s using command '%s'" % (ud.url, command))
101eb8dc403SDave Cobbley        bb.fetch2.check_network_access(d, command, ud.url)
102eb8dc403SDave Cobbley        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
103eb8dc403SDave Cobbley
104eb8dc403SDave Cobbley    def download(self, ud, d):
105eb8dc403SDave Cobbley        """Fetch urls"""
106eb8dc403SDave Cobbley
107eb8dc403SDave Cobbley        fetchcmd = self.basecmd
108eb8dc403SDave Cobbley
10978b72798SAndrew Geissler        localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp"
11082c905dcSAndrew Geissler        bb.utils.mkdirhier(os.path.dirname(localpath))
11182c905dcSAndrew Geissler        fetchcmd += " -O %s" % shlex.quote(localpath)
112eb8dc403SDave Cobbley
113eb8dc403SDave Cobbley        if ud.user and ud.pswd:
114595f6308SAndrew Geissler            fetchcmd += " --auth-no-challenge"
115595f6308SAndrew Geissler            if ud.parm.get("redirectauth", "1") == "1":
116595f6308SAndrew Geissler                # An undocumented feature of wget is that if the
117595f6308SAndrew Geissler                # username/password are specified on the URI, wget will only
118595f6308SAndrew Geissler                # send the Authorization header to the first host and not to
119595f6308SAndrew Geissler                # any hosts that it is redirected to.  With the increasing
120595f6308SAndrew Geissler                # usage of temporary AWS URLs, this difference now matters as
121595f6308SAndrew Geissler                # AWS will reject any request that has authentication both in
122595f6308SAndrew Geissler                # the query parameters (from the redirect) and in the
123595f6308SAndrew Geissler                # Authorization header.
124595f6308SAndrew Geissler                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
125eb8dc403SDave Cobbley
126eb8dc403SDave Cobbley        uri = ud.url.split(";")[0]
127eb8dc403SDave Cobbley        if os.path.exists(ud.localpath):
128eb8dc403SDave Cobbley            # file exists, but we didnt complete it.. trying again..
129eb8dc403SDave Cobbley            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
130eb8dc403SDave Cobbley        else:
131eb8dc403SDave Cobbley            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
132eb8dc403SDave Cobbley
133eb8dc403SDave Cobbley        self._runwget(ud, d, fetchcmd, False)
134eb8dc403SDave Cobbley
135*87f5cff0SAndrew Geissler        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
136*87f5cff0SAndrew Geissler        # original file, which might be a race (imagine two recipes referencing the same
137*87f5cff0SAndrew Geissler        # source, one with an incorrect checksum)
138*87f5cff0SAndrew Geissler        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
139*87f5cff0SAndrew Geissler
14078b72798SAndrew Geissler        # Remove the ".tmp" and move the file into position atomically
14178b72798SAndrew Geissler        # Our lock prevents multiple writers but mirroring code may grab incomplete files
14278b72798SAndrew Geissler        os.rename(localpath, localpath[:-4])
14378b72798SAndrew Geissler
144eb8dc403SDave Cobbley        # Sanity check since wget can pretend it succeed when it didn't
145eb8dc403SDave Cobbley        # Also, this used to happen if sourceforge sent us to the mirror page
146eb8dc403SDave Cobbley        if not os.path.exists(ud.localpath):
147eb8dc403SDave Cobbley            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
148eb8dc403SDave Cobbley
149eb8dc403SDave Cobbley        if os.path.getsize(ud.localpath) == 0:
150eb8dc403SDave Cobbley            os.remove(ud.localpath)
151eb8dc403SDave Cobbley            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
152eb8dc403SDave Cobbley
153eb8dc403SDave Cobbley        return True
154eb8dc403SDave Cobbley
155eb8dc403SDave Cobbley    def checkstatus(self, fetch, ud, d, try_again=True):
156eb8dc403SDave Cobbley        class HTTPConnectionCache(http.client.HTTPConnection):
157eb8dc403SDave Cobbley            if fetch.connection_cache:
158eb8dc403SDave Cobbley                def connect(self):
159eb8dc403SDave Cobbley                    """Connect to the host and port specified in __init__."""
160eb8dc403SDave Cobbley
161eb8dc403SDave Cobbley                    sock = fetch.connection_cache.get_connection(self.host, self.port)
162eb8dc403SDave Cobbley                    if sock:
163eb8dc403SDave Cobbley                        self.sock = sock
164eb8dc403SDave Cobbley                    else:
165eb8dc403SDave Cobbley                        self.sock = socket.create_connection((self.host, self.port),
166eb8dc403SDave Cobbley                                    self.timeout, self.source_address)
167eb8dc403SDave Cobbley                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
168eb8dc403SDave Cobbley
169eb8dc403SDave Cobbley                    if self._tunnel_host:
170eb8dc403SDave Cobbley                        self._tunnel()
171eb8dc403SDave Cobbley
172eb8dc403SDave Cobbley        class CacheHTTPHandler(urllib.request.HTTPHandler):
173eb8dc403SDave Cobbley            def http_open(self, req):
174eb8dc403SDave Cobbley                return self.do_open(HTTPConnectionCache, req)
175eb8dc403SDave Cobbley
176eb8dc403SDave Cobbley            def do_open(self, http_class, req):
177eb8dc403SDave Cobbley                """Return an addinfourl object for the request, using http_class.
178eb8dc403SDave Cobbley
179eb8dc403SDave Cobbley                http_class must implement the HTTPConnection API from httplib.
180eb8dc403SDave Cobbley                The addinfourl return value is a file-like object.  It also
181eb8dc403SDave Cobbley                has methods and attributes including:
182eb8dc403SDave Cobbley                    - info(): return a mimetools.Message object for the headers
183eb8dc403SDave Cobbley                    - geturl(): return the original request URL
184eb8dc403SDave Cobbley                    - code: HTTP status code
185eb8dc403SDave Cobbley                """
186eb8dc403SDave Cobbley                host = req.host
187eb8dc403SDave Cobbley                if not host:
18819323693SBrad Bishop                    raise urllib.error.URLError('no host given')
189eb8dc403SDave Cobbley
190eb8dc403SDave Cobbley                h = http_class(host, timeout=req.timeout) # will parse host:port
191eb8dc403SDave Cobbley                h.set_debuglevel(self._debuglevel)
192eb8dc403SDave Cobbley
193eb8dc403SDave Cobbley                headers = dict(req.unredirected_hdrs)
194eb8dc403SDave Cobbley                headers.update(dict((k, v) for k, v in list(req.headers.items())
195eb8dc403SDave Cobbley                            if k not in headers))
196eb8dc403SDave Cobbley
197eb8dc403SDave Cobbley                # We want to make an HTTP/1.1 request, but the addinfourl
198eb8dc403SDave Cobbley                # class isn't prepared to deal with a persistent connection.
199eb8dc403SDave Cobbley                # It will try to read all remaining data from the socket,
200eb8dc403SDave Cobbley                # which will block while the server waits for the next request.
201eb8dc403SDave Cobbley                # So make sure the connection gets closed after the (only)
202eb8dc403SDave Cobbley                # request.
203eb8dc403SDave Cobbley
204eb8dc403SDave Cobbley                # Don't close connection when connection_cache is enabled,
205eb8dc403SDave Cobbley                if fetch.connection_cache is None:
206eb8dc403SDave Cobbley                    headers["Connection"] = "close"
207eb8dc403SDave Cobbley                else:
208eb8dc403SDave Cobbley                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
209eb8dc403SDave Cobbley
210eb8dc403SDave Cobbley                headers = dict(
211eb8dc403SDave Cobbley                    (name.title(), val) for name, val in list(headers.items()))
212eb8dc403SDave Cobbley
213eb8dc403SDave Cobbley                if req._tunnel_host:
214eb8dc403SDave Cobbley                    tunnel_headers = {}
215eb8dc403SDave Cobbley                    proxy_auth_hdr = "Proxy-Authorization"
216eb8dc403SDave Cobbley                    if proxy_auth_hdr in headers:
217eb8dc403SDave Cobbley                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
218eb8dc403SDave Cobbley                        # Proxy-Authorization should not be sent to origin
219eb8dc403SDave Cobbley                        # server.
220eb8dc403SDave Cobbley                        del headers[proxy_auth_hdr]
221eb8dc403SDave Cobbley                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
222eb8dc403SDave Cobbley
223eb8dc403SDave Cobbley                try:
224eb8dc403SDave Cobbley                    h.request(req.get_method(), req.selector, req.data, headers)
225eb8dc403SDave Cobbley                except socket.error as err: # XXX what error?
226eb8dc403SDave Cobbley                    # Don't close connection when cache is enabled.
227eb8dc403SDave Cobbley                    # Instead, try to detect connections that are no longer
228eb8dc403SDave Cobbley                    # usable (for example, closed unexpectedly) and remove
229eb8dc403SDave Cobbley                    # them from the cache.
230eb8dc403SDave Cobbley                    if fetch.connection_cache is None:
231eb8dc403SDave Cobbley                        h.close()
232eb8dc403SDave Cobbley                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
233eb8dc403SDave Cobbley                        # This happens when the server closes the connection despite the Keep-Alive.
234eb8dc403SDave Cobbley                        # Apparently urllib then uses the file descriptor, expecting it to be
235eb8dc403SDave Cobbley                        # connected, when in reality the connection is already gone.
236eb8dc403SDave Cobbley                        # We let the request fail and expect it to be
237eb8dc403SDave Cobbley                        # tried once more ("try_again" in check_status()),
238eb8dc403SDave Cobbley                        # with the dead connection removed from the cache.
2397e0e3c0cSAndrew Geissler                        # If it still fails, we give up, which can happen for bad
240eb8dc403SDave Cobbley                        # HTTP proxy settings.
241eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
242eb8dc403SDave Cobbley                    raise urllib.error.URLError(err)
243eb8dc403SDave Cobbley                else:
244eb8dc403SDave Cobbley                    r = h.getresponse()
245eb8dc403SDave Cobbley
246eb8dc403SDave Cobbley                # Pick apart the HTTPResponse object to get the addinfourl
247eb8dc403SDave Cobbley                # object initialized properly.
248eb8dc403SDave Cobbley
249eb8dc403SDave Cobbley                # Wrap the HTTPResponse object in socket's file object adapter
250eb8dc403SDave Cobbley                # for Windows.  That adapter calls recv(), so delegate recv()
251eb8dc403SDave Cobbley                # to read().  This weird wrapping allows the returned object to
252eb8dc403SDave Cobbley                # have readline() and readlines() methods.
253eb8dc403SDave Cobbley
254eb8dc403SDave Cobbley                # XXX It might be better to extract the read buffering code
255eb8dc403SDave Cobbley                # out of socket._fileobject() and into a base class.
256eb8dc403SDave Cobbley                r.recv = r.read
257eb8dc403SDave Cobbley
258eb8dc403SDave Cobbley                # no data, just have to read
259eb8dc403SDave Cobbley                r.read()
260eb8dc403SDave Cobbley                class fp_dummy(object):
261eb8dc403SDave Cobbley                    def read(self):
262eb8dc403SDave Cobbley                        return ""
263eb8dc403SDave Cobbley                    def readline(self):
264eb8dc403SDave Cobbley                        return ""
265eb8dc403SDave Cobbley                    def close(self):
266eb8dc403SDave Cobbley                        pass
267eb8dc403SDave Cobbley                    closed = False
268eb8dc403SDave Cobbley
26919323693SBrad Bishop                resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url())
270eb8dc403SDave Cobbley                resp.code = r.status
271eb8dc403SDave Cobbley                resp.msg = r.reason
272eb8dc403SDave Cobbley
273eb8dc403SDave Cobbley                # Close connection when server request it.
274eb8dc403SDave Cobbley                if fetch.connection_cache is not None:
275eb8dc403SDave Cobbley                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
276eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
277eb8dc403SDave Cobbley
278eb8dc403SDave Cobbley                return resp
279eb8dc403SDave Cobbley
280eb8dc403SDave Cobbley        class HTTPMethodFallback(urllib.request.BaseHandler):
281eb8dc403SDave Cobbley            """
282eb8dc403SDave Cobbley            Fallback to GET if HEAD is not allowed (405 HTTP error)
283eb8dc403SDave Cobbley            """
284eb8dc403SDave Cobbley            def http_error_405(self, req, fp, code, msg, headers):
285eb8dc403SDave Cobbley                fp.read()
286eb8dc403SDave Cobbley                fp.close()
287eb8dc403SDave Cobbley
28808902b01SBrad Bishop                if req.get_method() != 'GET':
289eb8dc403SDave Cobbley                    newheaders = dict((k, v) for k, v in list(req.headers.items())
290eb8dc403SDave Cobbley                                      if k.lower() not in ("content-length", "content-type"))
291eb8dc403SDave Cobbley                    return self.parent.open(urllib.request.Request(req.get_full_url(),
292eb8dc403SDave Cobbley                                                            headers=newheaders,
293eb8dc403SDave Cobbley                                                            origin_req_host=req.origin_req_host,
294eb8dc403SDave Cobbley                                                            unverifiable=True))
295eb8dc403SDave Cobbley
29608902b01SBrad Bishop                raise urllib.request.HTTPError(req, code, msg, headers, None)
29719323693SBrad Bishop
29819323693SBrad Bishop            # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
29919323693SBrad Bishop            # Forbidden when they actually mean 405 Method Not Allowed.
300eb8dc403SDave Cobbley            http_error_403 = http_error_405
301eb8dc403SDave Cobbley
302eb8dc403SDave Cobbley
303eb8dc403SDave Cobbley        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
304eb8dc403SDave Cobbley            """
305eb8dc403SDave Cobbley            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
306eb8dc403SDave Cobbley            when we want to follow redirects using the original method.
307eb8dc403SDave Cobbley            """
308eb8dc403SDave Cobbley            def redirect_request(self, req, fp, code, msg, headers, newurl):
309eb8dc403SDave Cobbley                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
31019323693SBrad Bishop                newreq.get_method = req.get_method
311eb8dc403SDave Cobbley                return newreq
312eb8dc403SDave Cobbley
3130ca19ccfSPatrick Williams        # We need to update the environment here as both the proxy and HTTPS
3140ca19ccfSPatrick Williams        # handlers need variables set. The proxy needs http_proxy and friends to
3150ca19ccfSPatrick Williams        # be set, and HTTPSHandler ends up calling into openssl to load the
3160ca19ccfSPatrick Williams        # certificates. In buildtools configurations this will be looking at the
3170ca19ccfSPatrick Williams        # wrong place for certificates by default: we set SSL_CERT_FILE to the
3180ca19ccfSPatrick Williams        # right location in the buildtools environment script but as BitBake
3190ca19ccfSPatrick Williams        # prunes prunes the environment this is lost. When binaries are executed
3200ca19ccfSPatrick Williams        # runfetchcmd ensures these values are in the environment, but this is
3210ca19ccfSPatrick Williams        # pure Python so we need to update the environment.
3220ca19ccfSPatrick Williams        #
3230ca19ccfSPatrick Williams        # Avoid tramping the environment too much by using bb.utils.environment
3240ca19ccfSPatrick Williams        # to scope the changes to the build_opener request, which is when the
3250ca19ccfSPatrick Williams        # environment lookups happen.
3267e0e3c0cSAndrew Geissler        newenv = bb.fetch2.get_fetcher_environment(d)
3270ca19ccfSPatrick Williams
3280ca19ccfSPatrick Williams        with bb.utils.environment(**newenv):
329eb8dc403SDave Cobbley            import ssl
3300ca19ccfSPatrick Williams
3310ca19ccfSPatrick Williams            if self.check_certs(d):
3320ca19ccfSPatrick Williams                context = ssl.create_default_context()
3330ca19ccfSPatrick Williams            else:
3340ca19ccfSPatrick Williams                context = ssl._create_unverified_context()
3350ca19ccfSPatrick Williams
3360ca19ccfSPatrick Williams            handlers = [FixedHTTPRedirectHandler,
3370ca19ccfSPatrick Williams                        HTTPMethodFallback,
3380ca19ccfSPatrick Williams                        urllib.request.ProxyHandler(),
3390ca19ccfSPatrick Williams                        CacheHTTPHandler(),
3400ca19ccfSPatrick Williams                        urllib.request.HTTPSHandler(context=context)]
341eb8dc403SDave Cobbley            opener = urllib.request.build_opener(*handlers)
342eb8dc403SDave Cobbley
343eb8dc403SDave Cobbley            try:
344eb8dc403SDave Cobbley                uri = ud.url.split(";")[0]
345eb8dc403SDave Cobbley                r = urllib.request.Request(uri)
346eb8dc403SDave Cobbley                r.get_method = lambda: "HEAD"
347eb8dc403SDave Cobbley                # Some servers (FusionForge, as used on Alioth) require that the
348eb8dc403SDave Cobbley                # optional Accept header is set.
349eb8dc403SDave Cobbley                r.add_header("Accept", "*/*")
350d1e89497SAndrew Geissler                r.add_header("User-Agent", self.user_agent)
351eb8dc403SDave Cobbley                def add_basic_auth(login_str, request):
352eb8dc403SDave Cobbley                    '''Adds Basic auth to http request, pass in login:password as string'''
353eb8dc403SDave Cobbley                    import base64
354eb8dc403SDave Cobbley                    encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
355eb8dc403SDave Cobbley                    authheader = "Basic %s" % encodeuser
356eb8dc403SDave Cobbley                    r.add_header("Authorization", authheader)
357eb8dc403SDave Cobbley
35819323693SBrad Bishop                if ud.user and ud.pswd:
35919323693SBrad Bishop                    add_basic_auth(ud.user + ':' + ud.pswd, r)
360eb8dc403SDave Cobbley
361eb8dc403SDave Cobbley                try:
36219323693SBrad Bishop                    import netrc
363eb8dc403SDave Cobbley                    n = netrc.netrc()
364eb8dc403SDave Cobbley                    login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
365eb8dc403SDave Cobbley                    add_basic_auth("%s:%s" % (login, password), r)
366eb8dc403SDave Cobbley                except (TypeError, ImportError, IOError, netrc.NetrcParseError):
367eb8dc403SDave Cobbley                    pass
368eb8dc403SDave Cobbley
369595f6308SAndrew Geissler                with opener.open(r, timeout=30) as response:
370eb8dc403SDave Cobbley                    pass
371eb8dc403SDave Cobbley            except urllib.error.URLError as e:
372eb8dc403SDave Cobbley                if try_again:
373d1e89497SAndrew Geissler                    logger.debug2("checkstatus: trying again")
374eb8dc403SDave Cobbley                    return self.checkstatus(fetch, ud, d, False)
375eb8dc403SDave Cobbley                else:
376eb8dc403SDave Cobbley                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
377d1e89497SAndrew Geissler                    logger.debug2("checkstatus() urlopen failed: %s" % e)
378eb8dc403SDave Cobbley                    return False
37990fd73cbSAndrew Geissler            except ConnectionResetError as e:
38090fd73cbSAndrew Geissler                if try_again:
38190fd73cbSAndrew Geissler                    logger.debug2("checkstatus: trying again")
38290fd73cbSAndrew Geissler                    return self.checkstatus(fetch, ud, d, False)
38390fd73cbSAndrew Geissler                else:
38490fd73cbSAndrew Geissler                    # debug for now to avoid spamming the logs in e.g. remote sstate searches
38590fd73cbSAndrew Geissler                    logger.debug2("checkstatus() urlopen failed: %s" % e)
38690fd73cbSAndrew Geissler                    return False
387d159c7fbSAndrew Geissler
388eb8dc403SDave Cobbley        return True
389eb8dc403SDave Cobbley
390eb8dc403SDave Cobbley    def _parse_path(self, regex, s):
391eb8dc403SDave Cobbley        """
392eb8dc403SDave Cobbley        Find and group name, version and archive type in the given string s
393eb8dc403SDave Cobbley        """
394eb8dc403SDave Cobbley
395eb8dc403SDave Cobbley        m = regex.search(s)
396eb8dc403SDave Cobbley        if m:
397eb8dc403SDave Cobbley            pname = ''
398eb8dc403SDave Cobbley            pver = ''
399eb8dc403SDave Cobbley            ptype = ''
400eb8dc403SDave Cobbley
401eb8dc403SDave Cobbley            mdict = m.groupdict()
402eb8dc403SDave Cobbley            if 'name' in mdict.keys():
403eb8dc403SDave Cobbley                pname = mdict['name']
404eb8dc403SDave Cobbley            if 'pver' in mdict.keys():
405eb8dc403SDave Cobbley                pver = mdict['pver']
406eb8dc403SDave Cobbley            if 'type' in mdict.keys():
407eb8dc403SDave Cobbley                ptype = mdict['type']
408eb8dc403SDave Cobbley
409eb8dc403SDave Cobbley            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
410eb8dc403SDave Cobbley
411eb8dc403SDave Cobbley            return (pname, pver, ptype)
412eb8dc403SDave Cobbley
413eb8dc403SDave Cobbley        return None
414eb8dc403SDave Cobbley
415eb8dc403SDave Cobbley    def _modelate_version(self, version):
416eb8dc403SDave Cobbley        if version[0] in ['.', '-']:
417eb8dc403SDave Cobbley            if version[1].isdigit():
418eb8dc403SDave Cobbley                version = version[1] + version[0] + version[2:len(version)]
419eb8dc403SDave Cobbley            else:
420eb8dc403SDave Cobbley                version = version[1:len(version)]
421eb8dc403SDave Cobbley
422eb8dc403SDave Cobbley        version = re.sub('-', '.', version)
423eb8dc403SDave Cobbley        version = re.sub('_', '.', version)
424eb8dc403SDave Cobbley        version = re.sub('(rc)+', '.1000.', version)
425eb8dc403SDave Cobbley        version = re.sub('(beta)+', '.100.', version)
426eb8dc403SDave Cobbley        version = re.sub('(alpha)+', '.10.', version)
427eb8dc403SDave Cobbley        if version[0] == 'v':
428eb8dc403SDave Cobbley            version = version[1:len(version)]
429eb8dc403SDave Cobbley        return version
430eb8dc403SDave Cobbley
431eb8dc403SDave Cobbley    def _vercmp(self, old, new):
432eb8dc403SDave Cobbley        """
433eb8dc403SDave Cobbley        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
434eb8dc403SDave Cobbley        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
435eb8dc403SDave Cobbley        for simplicity as it's somehow difficult to get from various upstream format
436eb8dc403SDave Cobbley        """
437eb8dc403SDave Cobbley
438eb8dc403SDave Cobbley        (oldpn, oldpv, oldsuffix) = old
439eb8dc403SDave Cobbley        (newpn, newpv, newsuffix) = new
440eb8dc403SDave Cobbley
44119323693SBrad Bishop        # Check for a new suffix type that we have never heard of before
44219323693SBrad Bishop        if newsuffix:
443eb8dc403SDave Cobbley            m = self.suffix_regex_comp.search(newsuffix)
444eb8dc403SDave Cobbley            if not m:
445eb8dc403SDave Cobbley                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
446eb8dc403SDave Cobbley                return False
447eb8dc403SDave Cobbley
44819323693SBrad Bishop        # Not our package so ignore it
449eb8dc403SDave Cobbley        if oldpn != newpn:
450eb8dc403SDave Cobbley            return False
451eb8dc403SDave Cobbley
452eb8dc403SDave Cobbley        oldpv = self._modelate_version(oldpv)
453eb8dc403SDave Cobbley        newpv = self._modelate_version(newpv)
454eb8dc403SDave Cobbley
455eb8dc403SDave Cobbley        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
456eb8dc403SDave Cobbley
457eb8dc403SDave Cobbley    def _fetch_index(self, uri, ud, d):
458eb8dc403SDave Cobbley        """
459eb8dc403SDave Cobbley        Run fetch checkstatus to get directory information
460eb8dc403SDave Cobbley        """
461eb8dc403SDave Cobbley        f = tempfile.NamedTemporaryFile()
462eb8dc403SDave Cobbley        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
463eb8dc403SDave Cobbley            fetchcmd = self.basecmd
464d1e89497SAndrew Geissler            fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'"
465eb8dc403SDave Cobbley            try:
466eb8dc403SDave Cobbley                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
467eb8dc403SDave Cobbley                fetchresult = f.read()
468eb8dc403SDave Cobbley            except bb.fetch2.BBFetchException:
469eb8dc403SDave Cobbley                fetchresult = ""
470eb8dc403SDave Cobbley
471eb8dc403SDave Cobbley        return fetchresult
472eb8dc403SDave Cobbley
473eb8dc403SDave Cobbley    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
474eb8dc403SDave Cobbley        """
475eb8dc403SDave Cobbley        Return the latest version of a package inside a given directory path
476eb8dc403SDave Cobbley        If error or no version, return ""
477eb8dc403SDave Cobbley        """
478eb8dc403SDave Cobbley        valid = 0
479eb8dc403SDave Cobbley        version = ['', '', '']
480eb8dc403SDave Cobbley
481eb8dc403SDave Cobbley        bb.debug(3, "VersionURL: %s" % (url))
482eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
483eb8dc403SDave Cobbley        if not soup:
484eb8dc403SDave Cobbley            bb.debug(3, "*** %s NO SOUP" % (url))
485eb8dc403SDave Cobbley            return ""
486eb8dc403SDave Cobbley
487eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
488eb8dc403SDave Cobbley            bb.debug(3, "line['href'] = '%s'" % (line['href']))
489eb8dc403SDave Cobbley            bb.debug(3, "line = '%s'" % (str(line)))
490eb8dc403SDave Cobbley
491eb8dc403SDave Cobbley            newver = self._parse_path(package_regex, line['href'])
492eb8dc403SDave Cobbley            if not newver:
493eb8dc403SDave Cobbley                newver = self._parse_path(package_regex, str(line))
494eb8dc403SDave Cobbley
495eb8dc403SDave Cobbley            if newver:
496eb8dc403SDave Cobbley                bb.debug(3, "Upstream version found: %s" % newver[1])
497eb8dc403SDave Cobbley                if valid == 0:
498eb8dc403SDave Cobbley                    version = newver
499eb8dc403SDave Cobbley                    valid = 1
500eb8dc403SDave Cobbley                elif self._vercmp(version, newver) < 0:
501eb8dc403SDave Cobbley                    version = newver
502eb8dc403SDave Cobbley
503eb8dc403SDave Cobbley        pupver = re.sub('_', '.', version[1])
504eb8dc403SDave Cobbley
505eb8dc403SDave Cobbley        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
506eb8dc403SDave Cobbley                (package, pupver or "N/A", current_version[1]))
507eb8dc403SDave Cobbley
508eb8dc403SDave Cobbley        if valid:
509eb8dc403SDave Cobbley            return pupver
510eb8dc403SDave Cobbley
511eb8dc403SDave Cobbley        return ""
512eb8dc403SDave Cobbley
51319323693SBrad Bishop    def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d):
514eb8dc403SDave Cobbley        """
515eb8dc403SDave Cobbley        Scan every directory in order to get upstream version.
516eb8dc403SDave Cobbley        """
517eb8dc403SDave Cobbley        version_dir = ['', '', '']
518eb8dc403SDave Cobbley        version = ['', '', '']
519eb8dc403SDave Cobbley
520ac69b488SWilliam A. Kennington III        dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))")
521eb8dc403SDave Cobbley        s = dirver_regex.search(dirver)
522eb8dc403SDave Cobbley        if s:
523eb8dc403SDave Cobbley            version_dir[1] = s.group('ver')
524eb8dc403SDave Cobbley        else:
525eb8dc403SDave Cobbley            version_dir[1] = dirver
526eb8dc403SDave Cobbley
527eb8dc403SDave Cobbley        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
528eb8dc403SDave Cobbley                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
529eb8dc403SDave Cobbley        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
530eb8dc403SDave Cobbley
531eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
532eb8dc403SDave Cobbley        if not soup:
533eb8dc403SDave Cobbley            return version[1]
534eb8dc403SDave Cobbley
535eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
536eb8dc403SDave Cobbley            s = dirver_regex.search(line['href'].strip("/"))
537eb8dc403SDave Cobbley            if s:
538eb8dc403SDave Cobbley                sver = s.group('ver')
539eb8dc403SDave Cobbley
540eb8dc403SDave Cobbley                # When prefix is part of the version directory it need to
541eb8dc403SDave Cobbley                # ensure that only version directory is used so remove previous
542eb8dc403SDave Cobbley                # directories if exists.
543eb8dc403SDave Cobbley                #
544eb8dc403SDave Cobbley                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
545eb8dc403SDave Cobbley                # result is v2.5.
546eb8dc403SDave Cobbley                spfx = s.group('pfx').split('/')[-1]
547eb8dc403SDave Cobbley
548eb8dc403SDave Cobbley                version_dir_new = ['', sver, '']
549eb8dc403SDave Cobbley                if self._vercmp(version_dir, version_dir_new) <= 0:
550eb8dc403SDave Cobbley                    dirver_new = spfx + sver
551eb8dc403SDave Cobbley                    path = ud.path.replace(dirver, dirver_new, True) \
552eb8dc403SDave Cobbley                        .split(package)[0]
553eb8dc403SDave Cobbley                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
554eb8dc403SDave Cobbley                        ud.user, ud.pswd, {}])
555eb8dc403SDave Cobbley
556eb8dc403SDave Cobbley                    pupver = self._check_latest_version(uri,
557eb8dc403SDave Cobbley                            package, package_regex, current_version, ud, d)
558eb8dc403SDave Cobbley                    if pupver:
559eb8dc403SDave Cobbley                        version[1] = pupver
560eb8dc403SDave Cobbley
561eb8dc403SDave Cobbley                    version_dir = version_dir_new
562eb8dc403SDave Cobbley
563eb8dc403SDave Cobbley        return version[1]
564eb8dc403SDave Cobbley
565eb8dc403SDave Cobbley    def _init_regexes(self, package, ud, d):
566eb8dc403SDave Cobbley        """
567eb8dc403SDave Cobbley        Match as many patterns as possible such as:
568eb8dc403SDave Cobbley                gnome-common-2.20.0.tar.gz (most common format)
569eb8dc403SDave Cobbley                gtk+-2.90.1.tar.gz
570eb8dc403SDave Cobbley                xf86-input-synaptics-12.6.9.tar.gz
571eb8dc403SDave Cobbley                dri2proto-2.3.tar.gz
572eb8dc403SDave Cobbley                blktool_4.orig.tar.gz
573eb8dc403SDave Cobbley                libid3tag-0.15.1b.tar.gz
574eb8dc403SDave Cobbley                unzip552.tar.gz
575eb8dc403SDave Cobbley                icu4c-3_6-src.tgz
576eb8dc403SDave Cobbley                genext2fs_1.3.orig.tar.gz
577eb8dc403SDave Cobbley                gst-fluendo-mp3
578eb8dc403SDave Cobbley        """
579eb8dc403SDave Cobbley        # match most patterns which uses "-" as separator to version digits
58019323693SBrad Bishop        pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
581eb8dc403SDave Cobbley        # a loose pattern such as for unzip552.tar.gz
58219323693SBrad Bishop        pn_prefix2 = r"[a-zA-Z]+"
583eb8dc403SDave Cobbley        # a loose pattern such as for 80325-quicky-0.4.tar.gz
58419323693SBrad Bishop        pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+"
585eb8dc403SDave Cobbley        # Save the Package Name (pn) Regex for use later
58619323693SBrad Bishop        pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
587eb8dc403SDave Cobbley
588eb8dc403SDave Cobbley        # match version
58919323693SBrad Bishop        pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
590eb8dc403SDave Cobbley
591eb8dc403SDave Cobbley        # match arch
592eb8dc403SDave Cobbley        parch_regex = "-source|_all_"
593eb8dc403SDave Cobbley
594eb8dc403SDave Cobbley        # src.rpm extension was added only for rpm package. Can be removed if the rpm
595eb8dc403SDave Cobbley        # packaged will always be considered as having to be manually upgraded
596595f6308SAndrew Geissler        psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)"
597eb8dc403SDave Cobbley
598eb8dc403SDave Cobbley        # match name, version and archive type of a package
59919323693SBrad Bishop        package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
600eb8dc403SDave Cobbley                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
601eb8dc403SDave Cobbley        self.suffix_regex_comp = re.compile(psuffix_regex)
602eb8dc403SDave Cobbley
603eb8dc403SDave Cobbley        # compile regex, can be specific by package or generic regex
604eb8dc403SDave Cobbley        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
605eb8dc403SDave Cobbley        if pn_regex:
606eb8dc403SDave Cobbley            package_custom_regex_comp = re.compile(pn_regex)
607eb8dc403SDave Cobbley        else:
608eb8dc403SDave Cobbley            version = self._parse_path(package_regex_comp, package)
609eb8dc403SDave Cobbley            if version:
610eb8dc403SDave Cobbley                package_custom_regex_comp = re.compile(
61119323693SBrad Bishop                    r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
612eb8dc403SDave Cobbley                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
613eb8dc403SDave Cobbley            else:
614eb8dc403SDave Cobbley                package_custom_regex_comp = None
615eb8dc403SDave Cobbley
616eb8dc403SDave Cobbley        return package_custom_regex_comp
617eb8dc403SDave Cobbley
618eb8dc403SDave Cobbley    def latest_versionstring(self, ud, d):
619eb8dc403SDave Cobbley        """
620eb8dc403SDave Cobbley        Manipulate the URL and try to obtain the latest package version
621eb8dc403SDave Cobbley
622eb8dc403SDave Cobbley        sanity check to ensure same name and type.
623eb8dc403SDave Cobbley        """
624eb8dc403SDave Cobbley        package = ud.path.split("/")[-1]
625eb8dc403SDave Cobbley        current_version = ['', d.getVar('PV'), '']
626eb8dc403SDave Cobbley
627eb8dc403SDave Cobbley        """possible to have no version in pkg name, such as spectrum-fw"""
62819323693SBrad Bishop        if not re.search(r"\d+", package):
629eb8dc403SDave Cobbley            current_version[1] = re.sub('_', '.', current_version[1])
630eb8dc403SDave Cobbley            current_version[1] = re.sub('-', '.', current_version[1])
631eb8dc403SDave Cobbley            return (current_version[1], '')
632eb8dc403SDave Cobbley
633eb8dc403SDave Cobbley        package_regex = self._init_regexes(package, ud, d)
634eb8dc403SDave Cobbley        if package_regex is None:
635eb8dc403SDave Cobbley            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
636eb8dc403SDave Cobbley            return ('', '')
637eb8dc403SDave Cobbley        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
638eb8dc403SDave Cobbley
639eb8dc403SDave Cobbley        uri = ""
640eb8dc403SDave Cobbley        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
641eb8dc403SDave Cobbley        if not regex_uri:
642eb8dc403SDave Cobbley            path = ud.path.split(package)[0]
643eb8dc403SDave Cobbley
644eb8dc403SDave Cobbley            # search for version matches on folders inside the path, like:
645eb8dc403SDave Cobbley            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
64619323693SBrad Bishop            dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
647eb8dc403SDave Cobbley            m = dirver_regex.search(path)
648eb8dc403SDave Cobbley            if m:
649eb8dc403SDave Cobbley                pn = d.getVar('PN')
650eb8dc403SDave Cobbley                dirver = m.group('dirver')
651eb8dc403SDave Cobbley
65219323693SBrad Bishop                dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn)))
653eb8dc403SDave Cobbley                if not dirver_pn_regex.search(dirver):
654eb8dc403SDave Cobbley                    return (self._check_latest_version_by_dir(dirver,
655eb8dc403SDave Cobbley                        package, package_regex, current_version, ud, d), '')
656eb8dc403SDave Cobbley
657eb8dc403SDave Cobbley            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
658eb8dc403SDave Cobbley        else:
659eb8dc403SDave Cobbley            uri = regex_uri
660eb8dc403SDave Cobbley
661eb8dc403SDave Cobbley        return (self._check_latest_version(uri, package, package_regex,
662eb8dc403SDave Cobbley                current_version, ud, d), '')
663