1eb8dc403SDave Cobbley""" 2eb8dc403SDave CobbleyBitBake 'Fetch' implementations 3eb8dc403SDave Cobbley 4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 5eb8dc403SDave CobbleyBitBake build tools. 6eb8dc403SDave Cobbley 7eb8dc403SDave Cobbley""" 8eb8dc403SDave Cobbley 9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 10eb8dc403SDave Cobbley# 11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only 12eb8dc403SDave Cobbley# 13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14eb8dc403SDave Cobbley 1582c905dcSAndrew Geisslerimport shlex 16eb8dc403SDave Cobbleyimport re 17eb8dc403SDave Cobbleyimport tempfile 18eb8dc403SDave Cobbleyimport os 19eb8dc403SDave Cobbleyimport errno 20eb8dc403SDave Cobbleyimport bb 21eb8dc403SDave Cobbleyimport bb.progress 2219323693SBrad Bishopimport socket 2319323693SBrad Bishopimport http.client 24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 25eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 26eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 27eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 28eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 29eb8dc403SDave Cobbleyfrom bb.utils import export_proxies 30eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 31eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 32eb8dc403SDave Cobbley 33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 34eb8dc403SDave Cobbley """ 35eb8dc403SDave Cobbley Extract progress information from wget output. 36eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 37eb8dc403SDave Cobbley specified on the wget command line. 38eb8dc403SDave Cobbley """ 39eb8dc403SDave Cobbley def __init__(self, d): 40eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 41eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 42eb8dc403SDave Cobbley self._fire_progress(0) 43eb8dc403SDave Cobbley 44eb8dc403SDave Cobbley def writeline(self, line): 45eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 46eb8dc403SDave Cobbley if percs: 47eb8dc403SDave Cobbley progress = int(percs[-1][0]) 48eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 49eb8dc403SDave Cobbley self.update(progress, rate) 50eb8dc403SDave Cobbley return False 51eb8dc403SDave Cobbley return True 52eb8dc403SDave Cobbley 53eb8dc403SDave Cobbley 54eb8dc403SDave Cobbleyclass Wget(FetchMethod): 55*0ca19ccfSPatrick Williams """Class to fetch urls via 'wget'""" 56d1e89497SAndrew Geissler 57d1e89497SAndrew Geissler # CDNs like CloudFlare may do a 'browser integrity test' which can fail 58d1e89497SAndrew Geissler # with the standard wget/urllib User-Agent, so pretend to be a modern 59d1e89497SAndrew Geissler # browser. 60d1e89497SAndrew Geissler user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 61d1e89497SAndrew Geissler 62*0ca19ccfSPatrick Williams def check_certs(self, d): 63*0ca19ccfSPatrick Williams """ 64*0ca19ccfSPatrick Williams Should certificates be checked? 65*0ca19ccfSPatrick Williams """ 66*0ca19ccfSPatrick Williams return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" 67*0ca19ccfSPatrick Williams 68eb8dc403SDave Cobbley def supports(self, ud, d): 69eb8dc403SDave Cobbley """ 70eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 71eb8dc403SDave Cobbley """ 72eb8dc403SDave Cobbley return ud.type in ['http', 'https', 'ftp'] 73eb8dc403SDave Cobbley 74eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 75eb8dc403SDave Cobbley return True 76eb8dc403SDave Cobbley 77eb8dc403SDave Cobbley def urldata_init(self, ud, d): 78eb8dc403SDave Cobbley if 'protocol' in ud.parm: 79eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 80eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 81eb8dc403SDave Cobbley 82eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 83eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 84eb8dc403SDave Cobbley else: 85eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 86eb8dc403SDave Cobbley 87eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 88eb8dc403SDave Cobbley if not ud.localfile: 89eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 90eb8dc403SDave Cobbley 91*0ca19ccfSPatrick Williams self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" 92*0ca19ccfSPatrick Williams 93*0ca19ccfSPatrick Williams if not self.check_certs(d): 94*0ca19ccfSPatrick Williams self.basecmd += " --no-check-certificate" 95eb8dc403SDave Cobbley 96eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 97eb8dc403SDave Cobbley 98eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 99eb8dc403SDave Cobbley 100d1e89497SAndrew Geissler logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 101eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 102eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 103eb8dc403SDave Cobbley 104eb8dc403SDave Cobbley def download(self, ud, d): 105eb8dc403SDave Cobbley """Fetch urls""" 106eb8dc403SDave Cobbley 107eb8dc403SDave Cobbley fetchcmd = self.basecmd 108eb8dc403SDave Cobbley 109eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 11082c905dcSAndrew Geissler localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 11182c905dcSAndrew Geissler bb.utils.mkdirhier(os.path.dirname(localpath)) 11282c905dcSAndrew Geissler fetchcmd += " -O %s" % shlex.quote(localpath) 113eb8dc403SDave Cobbley 114eb8dc403SDave Cobbley if ud.user and ud.pswd: 115eb8dc403SDave Cobbley fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 116eb8dc403SDave Cobbley 117eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 118eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 119eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 120eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 121eb8dc403SDave Cobbley else: 122eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 123eb8dc403SDave Cobbley 124eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 125eb8dc403SDave Cobbley 126eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 127eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 128eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 129eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 130eb8dc403SDave Cobbley 131eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 132eb8dc403SDave Cobbley os.remove(ud.localpath) 133eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 134eb8dc403SDave Cobbley 135eb8dc403SDave Cobbley return True 136eb8dc403SDave Cobbley 137eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 138eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 139eb8dc403SDave Cobbley if fetch.connection_cache: 140eb8dc403SDave Cobbley def connect(self): 141eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 142eb8dc403SDave Cobbley 143eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 144eb8dc403SDave Cobbley if sock: 145eb8dc403SDave Cobbley self.sock = sock 146eb8dc403SDave Cobbley else: 147eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 148eb8dc403SDave Cobbley self.timeout, self.source_address) 149eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 150eb8dc403SDave Cobbley 151eb8dc403SDave Cobbley if self._tunnel_host: 152eb8dc403SDave Cobbley self._tunnel() 153eb8dc403SDave Cobbley 154eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 155eb8dc403SDave Cobbley def http_open(self, req): 156eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 157eb8dc403SDave Cobbley 158eb8dc403SDave Cobbley def do_open(self, http_class, req): 159eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 160eb8dc403SDave Cobbley 161eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 162eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 163eb8dc403SDave Cobbley has methods and attributes including: 164eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 165eb8dc403SDave Cobbley - geturl(): return the original request URL 166eb8dc403SDave Cobbley - code: HTTP status code 167eb8dc403SDave Cobbley """ 168eb8dc403SDave Cobbley host = req.host 169eb8dc403SDave Cobbley if not host: 17019323693SBrad Bishop raise urllib.error.URLError('no host given') 171eb8dc403SDave Cobbley 172eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 173eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 174eb8dc403SDave Cobbley 175eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 176eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 177eb8dc403SDave Cobbley if k not in headers)) 178eb8dc403SDave Cobbley 179eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 180eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 181eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 182eb8dc403SDave Cobbley # which will block while the server waits for the next request. 183eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 184eb8dc403SDave Cobbley # request. 185eb8dc403SDave Cobbley 186eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 187eb8dc403SDave Cobbley if fetch.connection_cache is None: 188eb8dc403SDave Cobbley headers["Connection"] = "close" 189eb8dc403SDave Cobbley else: 190eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 191eb8dc403SDave Cobbley 192eb8dc403SDave Cobbley headers = dict( 193eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 194eb8dc403SDave Cobbley 195eb8dc403SDave Cobbley if req._tunnel_host: 196eb8dc403SDave Cobbley tunnel_headers = {} 197eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 198eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 199eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 200eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 201eb8dc403SDave Cobbley # server. 202eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 203eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 204eb8dc403SDave Cobbley 205eb8dc403SDave Cobbley try: 206eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 207eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 208eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 209eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 210eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 211eb8dc403SDave Cobbley # them from the cache. 212eb8dc403SDave Cobbley if fetch.connection_cache is None: 213eb8dc403SDave Cobbley h.close() 214eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 215eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 216eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 217eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 218eb8dc403SDave Cobbley # We let the request fail and expect it to be 219eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 220eb8dc403SDave Cobbley # with the dead connection removed from the cache. 221eb8dc403SDave Cobbley # If it still fails, we give up, which can happend for bad 222eb8dc403SDave Cobbley # HTTP proxy settings. 223eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 224eb8dc403SDave Cobbley raise urllib.error.URLError(err) 225eb8dc403SDave Cobbley else: 226eb8dc403SDave Cobbley r = h.getresponse() 227eb8dc403SDave Cobbley 228eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 229eb8dc403SDave Cobbley # object initialized properly. 230eb8dc403SDave Cobbley 231eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 232eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 233eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 234eb8dc403SDave Cobbley # have readline() and readlines() methods. 235eb8dc403SDave Cobbley 236eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 237eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 238eb8dc403SDave Cobbley r.recv = r.read 239eb8dc403SDave Cobbley 240eb8dc403SDave Cobbley # no data, just have to read 241eb8dc403SDave Cobbley r.read() 242eb8dc403SDave Cobbley class fp_dummy(object): 243eb8dc403SDave Cobbley def read(self): 244eb8dc403SDave Cobbley return "" 245eb8dc403SDave Cobbley def readline(self): 246eb8dc403SDave Cobbley return "" 247eb8dc403SDave Cobbley def close(self): 248eb8dc403SDave Cobbley pass 249eb8dc403SDave Cobbley closed = False 250eb8dc403SDave Cobbley 25119323693SBrad Bishop resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 252eb8dc403SDave Cobbley resp.code = r.status 253eb8dc403SDave Cobbley resp.msg = r.reason 254eb8dc403SDave Cobbley 255eb8dc403SDave Cobbley # Close connection when server request it. 256eb8dc403SDave Cobbley if fetch.connection_cache is not None: 257eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 258eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 259eb8dc403SDave Cobbley 260eb8dc403SDave Cobbley return resp 261eb8dc403SDave Cobbley 262eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 263eb8dc403SDave Cobbley """ 264eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 265eb8dc403SDave Cobbley """ 266eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 267eb8dc403SDave Cobbley fp.read() 268eb8dc403SDave Cobbley fp.close() 269eb8dc403SDave Cobbley 27008902b01SBrad Bishop if req.get_method() != 'GET': 271eb8dc403SDave Cobbley newheaders = dict((k, v) for k, v in list(req.headers.items()) 272eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 273eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 274eb8dc403SDave Cobbley headers=newheaders, 275eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 276eb8dc403SDave Cobbley unverifiable=True)) 277eb8dc403SDave Cobbley 27808902b01SBrad Bishop raise urllib.request.HTTPError(req, code, msg, headers, None) 27919323693SBrad Bishop 28019323693SBrad Bishop # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 28119323693SBrad Bishop # Forbidden when they actually mean 405 Method Not Allowed. 282eb8dc403SDave Cobbley http_error_403 = http_error_405 283eb8dc403SDave Cobbley 284eb8dc403SDave Cobbley 285eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 286eb8dc403SDave Cobbley """ 287eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 288eb8dc403SDave Cobbley when we want to follow redirects using the original method. 289eb8dc403SDave Cobbley """ 290eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 291eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 29219323693SBrad Bishop newreq.get_method = req.get_method 293eb8dc403SDave Cobbley return newreq 294eb8dc403SDave Cobbley 295*0ca19ccfSPatrick Williams # We need to update the environment here as both the proxy and HTTPS 296*0ca19ccfSPatrick Williams # handlers need variables set. The proxy needs http_proxy and friends to 297*0ca19ccfSPatrick Williams # be set, and HTTPSHandler ends up calling into openssl to load the 298*0ca19ccfSPatrick Williams # certificates. In buildtools configurations this will be looking at the 299*0ca19ccfSPatrick Williams # wrong place for certificates by default: we set SSL_CERT_FILE to the 300*0ca19ccfSPatrick Williams # right location in the buildtools environment script but as BitBake 301*0ca19ccfSPatrick Williams # prunes prunes the environment this is lost. When binaries are executed 302*0ca19ccfSPatrick Williams # runfetchcmd ensures these values are in the environment, but this is 303*0ca19ccfSPatrick Williams # pure Python so we need to update the environment. 304*0ca19ccfSPatrick Williams # 305*0ca19ccfSPatrick Williams # Avoid tramping the environment too much by using bb.utils.environment 306*0ca19ccfSPatrick Williams # to scope the changes to the build_opener request, which is when the 307*0ca19ccfSPatrick Williams # environment lookups happen. 308*0ca19ccfSPatrick Williams newenv = {} 309*0ca19ccfSPatrick Williams for name in bb.fetch2.FETCH_EXPORT_VARS: 310*0ca19ccfSPatrick Williams value = d.getVar(name) 311*0ca19ccfSPatrick Williams if not value: 312*0ca19ccfSPatrick Williams origenv = d.getVar("BB_ORIGENV") 313*0ca19ccfSPatrick Williams if origenv: 314*0ca19ccfSPatrick Williams value = origenv.getVar(name) 315*0ca19ccfSPatrick Williams if value: 316*0ca19ccfSPatrick Williams newenv[name] = value 317*0ca19ccfSPatrick Williams 318*0ca19ccfSPatrick Williams with bb.utils.environment(**newenv): 319eb8dc403SDave Cobbley import ssl 320*0ca19ccfSPatrick Williams 321*0ca19ccfSPatrick Williams if self.check_certs(d): 322*0ca19ccfSPatrick Williams context = ssl.create_default_context() 323*0ca19ccfSPatrick Williams else: 324*0ca19ccfSPatrick Williams context = ssl._create_unverified_context() 325*0ca19ccfSPatrick Williams 326*0ca19ccfSPatrick Williams handlers = [FixedHTTPRedirectHandler, 327*0ca19ccfSPatrick Williams HTTPMethodFallback, 328*0ca19ccfSPatrick Williams urllib.request.ProxyHandler(), 329*0ca19ccfSPatrick Williams CacheHTTPHandler(), 330*0ca19ccfSPatrick Williams urllib.request.HTTPSHandler(context=context)] 331eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 332eb8dc403SDave Cobbley 333eb8dc403SDave Cobbley try: 334eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 335eb8dc403SDave Cobbley r = urllib.request.Request(uri) 336eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 337eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 338eb8dc403SDave Cobbley # optional Accept header is set. 339eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 340d1e89497SAndrew Geissler r.add_header("User-Agent", self.user_agent) 341eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 342eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 343eb8dc403SDave Cobbley import base64 344eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 345eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 346eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 347eb8dc403SDave Cobbley 34819323693SBrad Bishop if ud.user and ud.pswd: 34919323693SBrad Bishop add_basic_auth(ud.user + ':' + ud.pswd, r) 350eb8dc403SDave Cobbley 351eb8dc403SDave Cobbley try: 35219323693SBrad Bishop import netrc 353eb8dc403SDave Cobbley n = netrc.netrc() 354eb8dc403SDave Cobbley login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 355eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 356eb8dc403SDave Cobbley except (TypeError, ImportError, IOError, netrc.NetrcParseError): 357eb8dc403SDave Cobbley pass 358eb8dc403SDave Cobbley 359eb8dc403SDave Cobbley with opener.open(r) as response: 360eb8dc403SDave Cobbley pass 361eb8dc403SDave Cobbley except urllib.error.URLError as e: 362eb8dc403SDave Cobbley if try_again: 363d1e89497SAndrew Geissler logger.debug2("checkstatus: trying again") 364eb8dc403SDave Cobbley return self.checkstatus(fetch, ud, d, False) 365eb8dc403SDave Cobbley else: 366eb8dc403SDave Cobbley # debug for now to avoid spamming the logs in e.g. remote sstate searches 367d1e89497SAndrew Geissler logger.debug2("checkstatus() urlopen failed: %s" % e) 368eb8dc403SDave Cobbley return False 36990fd73cbSAndrew Geissler except ConnectionResetError as e: 37090fd73cbSAndrew Geissler if try_again: 37190fd73cbSAndrew Geissler logger.debug2("checkstatus: trying again") 37290fd73cbSAndrew Geissler return self.checkstatus(fetch, ud, d, False) 37390fd73cbSAndrew Geissler else: 37490fd73cbSAndrew Geissler # debug for now to avoid spamming the logs in e.g. remote sstate searches 37590fd73cbSAndrew Geissler logger.debug2("checkstatus() urlopen failed: %s" % e) 37690fd73cbSAndrew Geissler return False 377eb8dc403SDave Cobbley return True 378eb8dc403SDave Cobbley 379eb8dc403SDave Cobbley def _parse_path(self, regex, s): 380eb8dc403SDave Cobbley """ 381eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 382eb8dc403SDave Cobbley """ 383eb8dc403SDave Cobbley 384eb8dc403SDave Cobbley m = regex.search(s) 385eb8dc403SDave Cobbley if m: 386eb8dc403SDave Cobbley pname = '' 387eb8dc403SDave Cobbley pver = '' 388eb8dc403SDave Cobbley ptype = '' 389eb8dc403SDave Cobbley 390eb8dc403SDave Cobbley mdict = m.groupdict() 391eb8dc403SDave Cobbley if 'name' in mdict.keys(): 392eb8dc403SDave Cobbley pname = mdict['name'] 393eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 394eb8dc403SDave Cobbley pver = mdict['pver'] 395eb8dc403SDave Cobbley if 'type' in mdict.keys(): 396eb8dc403SDave Cobbley ptype = mdict['type'] 397eb8dc403SDave Cobbley 398eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 399eb8dc403SDave Cobbley 400eb8dc403SDave Cobbley return (pname, pver, ptype) 401eb8dc403SDave Cobbley 402eb8dc403SDave Cobbley return None 403eb8dc403SDave Cobbley 404eb8dc403SDave Cobbley def _modelate_version(self, version): 405eb8dc403SDave Cobbley if version[0] in ['.', '-']: 406eb8dc403SDave Cobbley if version[1].isdigit(): 407eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 408eb8dc403SDave Cobbley else: 409eb8dc403SDave Cobbley version = version[1:len(version)] 410eb8dc403SDave Cobbley 411eb8dc403SDave Cobbley version = re.sub('-', '.', version) 412eb8dc403SDave Cobbley version = re.sub('_', '.', version) 413eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 414eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 415eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 416eb8dc403SDave Cobbley if version[0] == 'v': 417eb8dc403SDave Cobbley version = version[1:len(version)] 418eb8dc403SDave Cobbley return version 419eb8dc403SDave Cobbley 420eb8dc403SDave Cobbley def _vercmp(self, old, new): 421eb8dc403SDave Cobbley """ 422eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 423eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 424eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 425eb8dc403SDave Cobbley """ 426eb8dc403SDave Cobbley 427eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 428eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 429eb8dc403SDave Cobbley 43019323693SBrad Bishop # Check for a new suffix type that we have never heard of before 43119323693SBrad Bishop if newsuffix: 432eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 433eb8dc403SDave Cobbley if not m: 434eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 435eb8dc403SDave Cobbley return False 436eb8dc403SDave Cobbley 43719323693SBrad Bishop # Not our package so ignore it 438eb8dc403SDave Cobbley if oldpn != newpn: 439eb8dc403SDave Cobbley return False 440eb8dc403SDave Cobbley 441eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 442eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 443eb8dc403SDave Cobbley 444eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 445eb8dc403SDave Cobbley 446eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 447eb8dc403SDave Cobbley """ 448eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 449eb8dc403SDave Cobbley """ 450eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 451eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 452eb8dc403SDave Cobbley fetchcmd = self.basecmd 453d1e89497SAndrew Geissler fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 454eb8dc403SDave Cobbley try: 455eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 456eb8dc403SDave Cobbley fetchresult = f.read() 457eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 458eb8dc403SDave Cobbley fetchresult = "" 459eb8dc403SDave Cobbley 460eb8dc403SDave Cobbley return fetchresult 461eb8dc403SDave Cobbley 462eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 463eb8dc403SDave Cobbley """ 464eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 465eb8dc403SDave Cobbley If error or no version, return "" 466eb8dc403SDave Cobbley """ 467eb8dc403SDave Cobbley valid = 0 468eb8dc403SDave Cobbley version = ['', '', ''] 469eb8dc403SDave Cobbley 470eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 471eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 472eb8dc403SDave Cobbley if not soup: 473eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 474eb8dc403SDave Cobbley return "" 475eb8dc403SDave Cobbley 476eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 477eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 478eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 479eb8dc403SDave Cobbley 480eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 481eb8dc403SDave Cobbley if not newver: 482eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 483eb8dc403SDave Cobbley 484eb8dc403SDave Cobbley if newver: 485eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 486eb8dc403SDave Cobbley if valid == 0: 487eb8dc403SDave Cobbley version = newver 488eb8dc403SDave Cobbley valid = 1 489eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 490eb8dc403SDave Cobbley version = newver 491eb8dc403SDave Cobbley 492eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 493eb8dc403SDave Cobbley 494eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 495eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 496eb8dc403SDave Cobbley 497eb8dc403SDave Cobbley if valid: 498eb8dc403SDave Cobbley return pupver 499eb8dc403SDave Cobbley 500eb8dc403SDave Cobbley return "" 501eb8dc403SDave Cobbley 50219323693SBrad Bishop def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 503eb8dc403SDave Cobbley """ 504eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 505eb8dc403SDave Cobbley """ 506eb8dc403SDave Cobbley version_dir = ['', '', ''] 507eb8dc403SDave Cobbley version = ['', '', ''] 508eb8dc403SDave Cobbley 509ac69b488SWilliam A. Kennington III dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") 510eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 511eb8dc403SDave Cobbley if s: 512eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 513eb8dc403SDave Cobbley else: 514eb8dc403SDave Cobbley version_dir[1] = dirver 515eb8dc403SDave Cobbley 516eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 517eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 518eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 519eb8dc403SDave Cobbley 520eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 521eb8dc403SDave Cobbley if not soup: 522eb8dc403SDave Cobbley return version[1] 523eb8dc403SDave Cobbley 524eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 525eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 526eb8dc403SDave Cobbley if s: 527eb8dc403SDave Cobbley sver = s.group('ver') 528eb8dc403SDave Cobbley 529eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 530eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 531eb8dc403SDave Cobbley # directories if exists. 532eb8dc403SDave Cobbley # 533eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 534eb8dc403SDave Cobbley # result is v2.5. 535eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 536eb8dc403SDave Cobbley 537eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 538eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 539eb8dc403SDave Cobbley dirver_new = spfx + sver 540eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 541eb8dc403SDave Cobbley .split(package)[0] 542eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 543eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 544eb8dc403SDave Cobbley 545eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 546eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 547eb8dc403SDave Cobbley if pupver: 548eb8dc403SDave Cobbley version[1] = pupver 549eb8dc403SDave Cobbley 550eb8dc403SDave Cobbley version_dir = version_dir_new 551eb8dc403SDave Cobbley 552eb8dc403SDave Cobbley return version[1] 553eb8dc403SDave Cobbley 554eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 555eb8dc403SDave Cobbley """ 556eb8dc403SDave Cobbley Match as many patterns as possible such as: 557eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 558eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 559eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 560eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 561eb8dc403SDave Cobbley blktool_4.orig.tar.gz 562eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 563eb8dc403SDave Cobbley unzip552.tar.gz 564eb8dc403SDave Cobbley icu4c-3_6-src.tgz 565eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 566eb8dc403SDave Cobbley gst-fluendo-mp3 567eb8dc403SDave Cobbley """ 568eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 56919323693SBrad Bishop pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 570eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 57119323693SBrad Bishop pn_prefix2 = r"[a-zA-Z]+" 572eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 57319323693SBrad Bishop pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 574eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 57519323693SBrad Bishop pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 576eb8dc403SDave Cobbley 577eb8dc403SDave Cobbley # match version 57819323693SBrad Bishop pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 579eb8dc403SDave Cobbley 580eb8dc403SDave Cobbley # match arch 581eb8dc403SDave Cobbley parch_regex = "-source|_all_" 582eb8dc403SDave Cobbley 583eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 584eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 58519323693SBrad Bishop psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 586eb8dc403SDave Cobbley 587eb8dc403SDave Cobbley # match name, version and archive type of a package 58819323693SBrad Bishop package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 589eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 590eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 591eb8dc403SDave Cobbley 592eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 593eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 594eb8dc403SDave Cobbley if pn_regex: 595eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 596eb8dc403SDave Cobbley else: 597eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 598eb8dc403SDave Cobbley if version: 599eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 60019323693SBrad Bishop r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 601eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 602eb8dc403SDave Cobbley else: 603eb8dc403SDave Cobbley package_custom_regex_comp = None 604eb8dc403SDave Cobbley 605eb8dc403SDave Cobbley return package_custom_regex_comp 606eb8dc403SDave Cobbley 607eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 608eb8dc403SDave Cobbley """ 609eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 610eb8dc403SDave Cobbley 611eb8dc403SDave Cobbley sanity check to ensure same name and type. 612eb8dc403SDave Cobbley """ 613eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 614eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 615eb8dc403SDave Cobbley 616eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 61719323693SBrad Bishop if not re.search(r"\d+", package): 618eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 619eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 620eb8dc403SDave Cobbley return (current_version[1], '') 621eb8dc403SDave Cobbley 622eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 623eb8dc403SDave Cobbley if package_regex is None: 624eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 625eb8dc403SDave Cobbley return ('', '') 626eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 627eb8dc403SDave Cobbley 628eb8dc403SDave Cobbley uri = "" 629eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 630eb8dc403SDave Cobbley if not regex_uri: 631eb8dc403SDave Cobbley path = ud.path.split(package)[0] 632eb8dc403SDave Cobbley 633eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 634eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 63519323693SBrad Bishop dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 636eb8dc403SDave Cobbley m = dirver_regex.search(path) 637eb8dc403SDave Cobbley if m: 638eb8dc403SDave Cobbley pn = d.getVar('PN') 639eb8dc403SDave Cobbley dirver = m.group('dirver') 640eb8dc403SDave Cobbley 64119323693SBrad Bishop dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 642eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 643eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 644eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 645eb8dc403SDave Cobbley 646eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 647eb8dc403SDave Cobbley else: 648eb8dc403SDave Cobbley uri = regex_uri 649eb8dc403SDave Cobbley 650eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 651eb8dc403SDave Cobbley current_version, ud, d), '') 652