1eb8dc403SDave Cobbley""" 2eb8dc403SDave CobbleyBitBake 'Fetch' implementations 3eb8dc403SDave Cobbley 4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 5eb8dc403SDave CobbleyBitBake build tools. 6eb8dc403SDave Cobbley 7eb8dc403SDave Cobbley""" 8eb8dc403SDave Cobbley 9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 10eb8dc403SDave Cobbley# 11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only 12eb8dc403SDave Cobbley# 13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14eb8dc403SDave Cobbley 1582c905dcSAndrew Geisslerimport shlex 16eb8dc403SDave Cobbleyimport re 17eb8dc403SDave Cobbleyimport tempfile 18eb8dc403SDave Cobbleyimport os 19eb8dc403SDave Cobbleyimport errno 20eb8dc403SDave Cobbleyimport bb 21eb8dc403SDave Cobbleyimport bb.progress 2219323693SBrad Bishopimport socket 2319323693SBrad Bishopimport http.client 24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 25eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 26eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 27eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 28eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 29eb8dc403SDave Cobbleyfrom bb.utils import export_proxies 30eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 31eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 32eb8dc403SDave Cobbley 33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 34eb8dc403SDave Cobbley """ 35eb8dc403SDave Cobbley Extract progress information from wget output. 36eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 37eb8dc403SDave Cobbley specified on the wget command line. 38eb8dc403SDave Cobbley """ 39eb8dc403SDave Cobbley def __init__(self, d): 40eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 41eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 42eb8dc403SDave Cobbley self._fire_progress(0) 43eb8dc403SDave Cobbley 44eb8dc403SDave Cobbley def writeline(self, line): 45eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 46eb8dc403SDave Cobbley if percs: 47eb8dc403SDave Cobbley progress = int(percs[-1][0]) 48eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 49eb8dc403SDave Cobbley self.update(progress, rate) 50eb8dc403SDave Cobbley return False 51eb8dc403SDave Cobbley return True 52eb8dc403SDave Cobbley 53eb8dc403SDave Cobbley 54eb8dc403SDave Cobbleyclass Wget(FetchMethod): 550ca19ccfSPatrick Williams """Class to fetch urls via 'wget'""" 56d1e89497SAndrew Geissler 57d1e89497SAndrew Geissler # CDNs like CloudFlare may do a 'browser integrity test' which can fail 58d1e89497SAndrew Geissler # with the standard wget/urllib User-Agent, so pretend to be a modern 59d1e89497SAndrew Geissler # browser. 60d1e89497SAndrew Geissler user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 61d1e89497SAndrew Geissler 620ca19ccfSPatrick Williams def check_certs(self, d): 630ca19ccfSPatrick Williams """ 640ca19ccfSPatrick Williams Should certificates be checked? 650ca19ccfSPatrick Williams """ 660ca19ccfSPatrick Williams return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" 670ca19ccfSPatrick Williams 68eb8dc403SDave Cobbley def supports(self, ud, d): 69eb8dc403SDave Cobbley """ 70eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 71eb8dc403SDave Cobbley """ 725199d831SAndrew Geissler return ud.type in ['http', 'https', 'ftp', 'ftps'] 73eb8dc403SDave Cobbley 74eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 75eb8dc403SDave Cobbley return True 76eb8dc403SDave Cobbley 77eb8dc403SDave Cobbley def urldata_init(self, ud, d): 78eb8dc403SDave Cobbley if 'protocol' in ud.parm: 79eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 80eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 81eb8dc403SDave Cobbley 82eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 83eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 84eb8dc403SDave Cobbley else: 85eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 86eb8dc403SDave Cobbley 87eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 88eb8dc403SDave Cobbley if not ud.localfile: 89eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 90eb8dc403SDave Cobbley 910ca19ccfSPatrick Williams self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp" 920ca19ccfSPatrick Williams 930ca19ccfSPatrick Williams if not self.check_certs(d): 940ca19ccfSPatrick Williams self.basecmd += " --no-check-certificate" 95eb8dc403SDave Cobbley 96eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 97eb8dc403SDave Cobbley 98eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 99eb8dc403SDave Cobbley 100d1e89497SAndrew Geissler logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 101eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 102eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 103eb8dc403SDave Cobbley 104eb8dc403SDave Cobbley def download(self, ud, d): 105eb8dc403SDave Cobbley """Fetch urls""" 106eb8dc403SDave Cobbley 107eb8dc403SDave Cobbley fetchcmd = self.basecmd 108eb8dc403SDave Cobbley 10978b72798SAndrew Geissler localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" 11082c905dcSAndrew Geissler bb.utils.mkdirhier(os.path.dirname(localpath)) 11182c905dcSAndrew Geissler fetchcmd += " -O %s" % shlex.quote(localpath) 112eb8dc403SDave Cobbley 113eb8dc403SDave Cobbley if ud.user and ud.pswd: 114595f6308SAndrew Geissler fetchcmd += " --auth-no-challenge" 115595f6308SAndrew Geissler if ud.parm.get("redirectauth", "1") == "1": 116595f6308SAndrew Geissler # An undocumented feature of wget is that if the 117595f6308SAndrew Geissler # username/password are specified on the URI, wget will only 118595f6308SAndrew Geissler # send the Authorization header to the first host and not to 119595f6308SAndrew Geissler # any hosts that it is redirected to. With the increasing 120595f6308SAndrew Geissler # usage of temporary AWS URLs, this difference now matters as 121595f6308SAndrew Geissler # AWS will reject any request that has authentication both in 122595f6308SAndrew Geissler # the query parameters (from the redirect) and in the 123595f6308SAndrew Geissler # Authorization header. 124595f6308SAndrew Geissler fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) 125eb8dc403SDave Cobbley 126eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 127eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 128eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 129eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 130eb8dc403SDave Cobbley else: 131eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 132eb8dc403SDave Cobbley 133eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 134eb8dc403SDave Cobbley 135*87f5cff0SAndrew Geissler # Try and verify any checksum now, meaning if it isn't correct, we don't remove the 136*87f5cff0SAndrew Geissler # original file, which might be a race (imagine two recipes referencing the same 137*87f5cff0SAndrew Geissler # source, one with an incorrect checksum) 138*87f5cff0SAndrew Geissler bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) 139*87f5cff0SAndrew Geissler 14078b72798SAndrew Geissler # Remove the ".tmp" and move the file into position atomically 14178b72798SAndrew Geissler # Our lock prevents multiple writers but mirroring code may grab incomplete files 14278b72798SAndrew Geissler os.rename(localpath, localpath[:-4]) 14378b72798SAndrew Geissler 144eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 145eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 146eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 147eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 148eb8dc403SDave Cobbley 149eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 150eb8dc403SDave Cobbley os.remove(ud.localpath) 151eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 152eb8dc403SDave Cobbley 153eb8dc403SDave Cobbley return True 154eb8dc403SDave Cobbley 155eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 156eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 157eb8dc403SDave Cobbley if fetch.connection_cache: 158eb8dc403SDave Cobbley def connect(self): 159eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 160eb8dc403SDave Cobbley 161eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 162eb8dc403SDave Cobbley if sock: 163eb8dc403SDave Cobbley self.sock = sock 164eb8dc403SDave Cobbley else: 165eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 166eb8dc403SDave Cobbley self.timeout, self.source_address) 167eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 168eb8dc403SDave Cobbley 169eb8dc403SDave Cobbley if self._tunnel_host: 170eb8dc403SDave Cobbley self._tunnel() 171eb8dc403SDave Cobbley 172eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 173eb8dc403SDave Cobbley def http_open(self, req): 174eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 175eb8dc403SDave Cobbley 176eb8dc403SDave Cobbley def do_open(self, http_class, req): 177eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 178eb8dc403SDave Cobbley 179eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 180eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 181eb8dc403SDave Cobbley has methods and attributes including: 182eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 183eb8dc403SDave Cobbley - geturl(): return the original request URL 184eb8dc403SDave Cobbley - code: HTTP status code 185eb8dc403SDave Cobbley """ 186eb8dc403SDave Cobbley host = req.host 187eb8dc403SDave Cobbley if not host: 18819323693SBrad Bishop raise urllib.error.URLError('no host given') 189eb8dc403SDave Cobbley 190eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 191eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 192eb8dc403SDave Cobbley 193eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 194eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 195eb8dc403SDave Cobbley if k not in headers)) 196eb8dc403SDave Cobbley 197eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 198eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 199eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 200eb8dc403SDave Cobbley # which will block while the server waits for the next request. 201eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 202eb8dc403SDave Cobbley # request. 203eb8dc403SDave Cobbley 204eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 205eb8dc403SDave Cobbley if fetch.connection_cache is None: 206eb8dc403SDave Cobbley headers["Connection"] = "close" 207eb8dc403SDave Cobbley else: 208eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 209eb8dc403SDave Cobbley 210eb8dc403SDave Cobbley headers = dict( 211eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 212eb8dc403SDave Cobbley 213eb8dc403SDave Cobbley if req._tunnel_host: 214eb8dc403SDave Cobbley tunnel_headers = {} 215eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 216eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 217eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 218eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 219eb8dc403SDave Cobbley # server. 220eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 221eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 222eb8dc403SDave Cobbley 223eb8dc403SDave Cobbley try: 224eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 225eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 226eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 227eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 228eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 229eb8dc403SDave Cobbley # them from the cache. 230eb8dc403SDave Cobbley if fetch.connection_cache is None: 231eb8dc403SDave Cobbley h.close() 232eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 233eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 234eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 235eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 236eb8dc403SDave Cobbley # We let the request fail and expect it to be 237eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 238eb8dc403SDave Cobbley # with the dead connection removed from the cache. 2397e0e3c0cSAndrew Geissler # If it still fails, we give up, which can happen for bad 240eb8dc403SDave Cobbley # HTTP proxy settings. 241eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 242eb8dc403SDave Cobbley raise urllib.error.URLError(err) 243eb8dc403SDave Cobbley else: 244eb8dc403SDave Cobbley r = h.getresponse() 245eb8dc403SDave Cobbley 246eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 247eb8dc403SDave Cobbley # object initialized properly. 248eb8dc403SDave Cobbley 249eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 250eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 251eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 252eb8dc403SDave Cobbley # have readline() and readlines() methods. 253eb8dc403SDave Cobbley 254eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 255eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 256eb8dc403SDave Cobbley r.recv = r.read 257eb8dc403SDave Cobbley 258eb8dc403SDave Cobbley # no data, just have to read 259eb8dc403SDave Cobbley r.read() 260eb8dc403SDave Cobbley class fp_dummy(object): 261eb8dc403SDave Cobbley def read(self): 262eb8dc403SDave Cobbley return "" 263eb8dc403SDave Cobbley def readline(self): 264eb8dc403SDave Cobbley return "" 265eb8dc403SDave Cobbley def close(self): 266eb8dc403SDave Cobbley pass 267eb8dc403SDave Cobbley closed = False 268eb8dc403SDave Cobbley 26919323693SBrad Bishop resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 270eb8dc403SDave Cobbley resp.code = r.status 271eb8dc403SDave Cobbley resp.msg = r.reason 272eb8dc403SDave Cobbley 273eb8dc403SDave Cobbley # Close connection when server request it. 274eb8dc403SDave Cobbley if fetch.connection_cache is not None: 275eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 276eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 277eb8dc403SDave Cobbley 278eb8dc403SDave Cobbley return resp 279eb8dc403SDave Cobbley 280eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 281eb8dc403SDave Cobbley """ 282eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 283eb8dc403SDave Cobbley """ 284eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 285eb8dc403SDave Cobbley fp.read() 286eb8dc403SDave Cobbley fp.close() 287eb8dc403SDave Cobbley 28808902b01SBrad Bishop if req.get_method() != 'GET': 289eb8dc403SDave Cobbley newheaders = dict((k, v) for k, v in list(req.headers.items()) 290eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 291eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 292eb8dc403SDave Cobbley headers=newheaders, 293eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 294eb8dc403SDave Cobbley unverifiable=True)) 295eb8dc403SDave Cobbley 29608902b01SBrad Bishop raise urllib.request.HTTPError(req, code, msg, headers, None) 29719323693SBrad Bishop 29819323693SBrad Bishop # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 29919323693SBrad Bishop # Forbidden when they actually mean 405 Method Not Allowed. 300eb8dc403SDave Cobbley http_error_403 = http_error_405 301eb8dc403SDave Cobbley 302eb8dc403SDave Cobbley 303eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 304eb8dc403SDave Cobbley """ 305eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 306eb8dc403SDave Cobbley when we want to follow redirects using the original method. 307eb8dc403SDave Cobbley """ 308eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 309eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 31019323693SBrad Bishop newreq.get_method = req.get_method 311eb8dc403SDave Cobbley return newreq 312eb8dc403SDave Cobbley 3130ca19ccfSPatrick Williams # We need to update the environment here as both the proxy and HTTPS 3140ca19ccfSPatrick Williams # handlers need variables set. The proxy needs http_proxy and friends to 3150ca19ccfSPatrick Williams # be set, and HTTPSHandler ends up calling into openssl to load the 3160ca19ccfSPatrick Williams # certificates. In buildtools configurations this will be looking at the 3170ca19ccfSPatrick Williams # wrong place for certificates by default: we set SSL_CERT_FILE to the 3180ca19ccfSPatrick Williams # right location in the buildtools environment script but as BitBake 3190ca19ccfSPatrick Williams # prunes prunes the environment this is lost. When binaries are executed 3200ca19ccfSPatrick Williams # runfetchcmd ensures these values are in the environment, but this is 3210ca19ccfSPatrick Williams # pure Python so we need to update the environment. 3220ca19ccfSPatrick Williams # 3230ca19ccfSPatrick Williams # Avoid tramping the environment too much by using bb.utils.environment 3240ca19ccfSPatrick Williams # to scope the changes to the build_opener request, which is when the 3250ca19ccfSPatrick Williams # environment lookups happen. 3267e0e3c0cSAndrew Geissler newenv = bb.fetch2.get_fetcher_environment(d) 3270ca19ccfSPatrick Williams 3280ca19ccfSPatrick Williams with bb.utils.environment(**newenv): 329eb8dc403SDave Cobbley import ssl 3300ca19ccfSPatrick Williams 3310ca19ccfSPatrick Williams if self.check_certs(d): 3320ca19ccfSPatrick Williams context = ssl.create_default_context() 3330ca19ccfSPatrick Williams else: 3340ca19ccfSPatrick Williams context = ssl._create_unverified_context() 3350ca19ccfSPatrick Williams 3360ca19ccfSPatrick Williams handlers = [FixedHTTPRedirectHandler, 3370ca19ccfSPatrick Williams HTTPMethodFallback, 3380ca19ccfSPatrick Williams urllib.request.ProxyHandler(), 3390ca19ccfSPatrick Williams CacheHTTPHandler(), 3400ca19ccfSPatrick Williams urllib.request.HTTPSHandler(context=context)] 341eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 342eb8dc403SDave Cobbley 343eb8dc403SDave Cobbley try: 344eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 345eb8dc403SDave Cobbley r = urllib.request.Request(uri) 346eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 347eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 348eb8dc403SDave Cobbley # optional Accept header is set. 349eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 350d1e89497SAndrew Geissler r.add_header("User-Agent", self.user_agent) 351eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 352eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 353eb8dc403SDave Cobbley import base64 354eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 355eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 356eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 357eb8dc403SDave Cobbley 35819323693SBrad Bishop if ud.user and ud.pswd: 35919323693SBrad Bishop add_basic_auth(ud.user + ':' + ud.pswd, r) 360eb8dc403SDave Cobbley 361eb8dc403SDave Cobbley try: 36219323693SBrad Bishop import netrc 363eb8dc403SDave Cobbley n = netrc.netrc() 364eb8dc403SDave Cobbley login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 365eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 366eb8dc403SDave Cobbley except (TypeError, ImportError, IOError, netrc.NetrcParseError): 367eb8dc403SDave Cobbley pass 368eb8dc403SDave Cobbley 369595f6308SAndrew Geissler with opener.open(r, timeout=30) as response: 370eb8dc403SDave Cobbley pass 371eb8dc403SDave Cobbley except urllib.error.URLError as e: 372eb8dc403SDave Cobbley if try_again: 373d1e89497SAndrew Geissler logger.debug2("checkstatus: trying again") 374eb8dc403SDave Cobbley return self.checkstatus(fetch, ud, d, False) 375eb8dc403SDave Cobbley else: 376eb8dc403SDave Cobbley # debug for now to avoid spamming the logs in e.g. remote sstate searches 377d1e89497SAndrew Geissler logger.debug2("checkstatus() urlopen failed: %s" % e) 378eb8dc403SDave Cobbley return False 37990fd73cbSAndrew Geissler except ConnectionResetError as e: 38090fd73cbSAndrew Geissler if try_again: 38190fd73cbSAndrew Geissler logger.debug2("checkstatus: trying again") 38290fd73cbSAndrew Geissler return self.checkstatus(fetch, ud, d, False) 38390fd73cbSAndrew Geissler else: 38490fd73cbSAndrew Geissler # debug for now to avoid spamming the logs in e.g. remote sstate searches 38590fd73cbSAndrew Geissler logger.debug2("checkstatus() urlopen failed: %s" % e) 38690fd73cbSAndrew Geissler return False 387d159c7fbSAndrew Geissler 388eb8dc403SDave Cobbley return True 389eb8dc403SDave Cobbley 390eb8dc403SDave Cobbley def _parse_path(self, regex, s): 391eb8dc403SDave Cobbley """ 392eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 393eb8dc403SDave Cobbley """ 394eb8dc403SDave Cobbley 395eb8dc403SDave Cobbley m = regex.search(s) 396eb8dc403SDave Cobbley if m: 397eb8dc403SDave Cobbley pname = '' 398eb8dc403SDave Cobbley pver = '' 399eb8dc403SDave Cobbley ptype = '' 400eb8dc403SDave Cobbley 401eb8dc403SDave Cobbley mdict = m.groupdict() 402eb8dc403SDave Cobbley if 'name' in mdict.keys(): 403eb8dc403SDave Cobbley pname = mdict['name'] 404eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 405eb8dc403SDave Cobbley pver = mdict['pver'] 406eb8dc403SDave Cobbley if 'type' in mdict.keys(): 407eb8dc403SDave Cobbley ptype = mdict['type'] 408eb8dc403SDave Cobbley 409eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 410eb8dc403SDave Cobbley 411eb8dc403SDave Cobbley return (pname, pver, ptype) 412eb8dc403SDave Cobbley 413eb8dc403SDave Cobbley return None 414eb8dc403SDave Cobbley 415eb8dc403SDave Cobbley def _modelate_version(self, version): 416eb8dc403SDave Cobbley if version[0] in ['.', '-']: 417eb8dc403SDave Cobbley if version[1].isdigit(): 418eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 419eb8dc403SDave Cobbley else: 420eb8dc403SDave Cobbley version = version[1:len(version)] 421eb8dc403SDave Cobbley 422eb8dc403SDave Cobbley version = re.sub('-', '.', version) 423eb8dc403SDave Cobbley version = re.sub('_', '.', version) 424eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 425eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 426eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 427eb8dc403SDave Cobbley if version[0] == 'v': 428eb8dc403SDave Cobbley version = version[1:len(version)] 429eb8dc403SDave Cobbley return version 430eb8dc403SDave Cobbley 431eb8dc403SDave Cobbley def _vercmp(self, old, new): 432eb8dc403SDave Cobbley """ 433eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 434eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 435eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 436eb8dc403SDave Cobbley """ 437eb8dc403SDave Cobbley 438eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 439eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 440eb8dc403SDave Cobbley 44119323693SBrad Bishop # Check for a new suffix type that we have never heard of before 44219323693SBrad Bishop if newsuffix: 443eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 444eb8dc403SDave Cobbley if not m: 445eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 446eb8dc403SDave Cobbley return False 447eb8dc403SDave Cobbley 44819323693SBrad Bishop # Not our package so ignore it 449eb8dc403SDave Cobbley if oldpn != newpn: 450eb8dc403SDave Cobbley return False 451eb8dc403SDave Cobbley 452eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 453eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 454eb8dc403SDave Cobbley 455eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 456eb8dc403SDave Cobbley 457eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 458eb8dc403SDave Cobbley """ 459eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 460eb8dc403SDave Cobbley """ 461eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 462eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 463eb8dc403SDave Cobbley fetchcmd = self.basecmd 464d1e89497SAndrew Geissler fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 465eb8dc403SDave Cobbley try: 466eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 467eb8dc403SDave Cobbley fetchresult = f.read() 468eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 469eb8dc403SDave Cobbley fetchresult = "" 470eb8dc403SDave Cobbley 471eb8dc403SDave Cobbley return fetchresult 472eb8dc403SDave Cobbley 473eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 474eb8dc403SDave Cobbley """ 475eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 476eb8dc403SDave Cobbley If error or no version, return "" 477eb8dc403SDave Cobbley """ 478eb8dc403SDave Cobbley valid = 0 479eb8dc403SDave Cobbley version = ['', '', ''] 480eb8dc403SDave Cobbley 481eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 482eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 483eb8dc403SDave Cobbley if not soup: 484eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 485eb8dc403SDave Cobbley return "" 486eb8dc403SDave Cobbley 487eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 488eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 489eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 490eb8dc403SDave Cobbley 491eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 492eb8dc403SDave Cobbley if not newver: 493eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 494eb8dc403SDave Cobbley 495eb8dc403SDave Cobbley if newver: 496eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 497eb8dc403SDave Cobbley if valid == 0: 498eb8dc403SDave Cobbley version = newver 499eb8dc403SDave Cobbley valid = 1 500eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 501eb8dc403SDave Cobbley version = newver 502eb8dc403SDave Cobbley 503eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 504eb8dc403SDave Cobbley 505eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 506eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 507eb8dc403SDave Cobbley 508eb8dc403SDave Cobbley if valid: 509eb8dc403SDave Cobbley return pupver 510eb8dc403SDave Cobbley 511eb8dc403SDave Cobbley return "" 512eb8dc403SDave Cobbley 51319323693SBrad Bishop def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 514eb8dc403SDave Cobbley """ 515eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 516eb8dc403SDave Cobbley """ 517eb8dc403SDave Cobbley version_dir = ['', '', ''] 518eb8dc403SDave Cobbley version = ['', '', ''] 519eb8dc403SDave Cobbley 520ac69b488SWilliam A. Kennington III dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") 521eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 522eb8dc403SDave Cobbley if s: 523eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 524eb8dc403SDave Cobbley else: 525eb8dc403SDave Cobbley version_dir[1] = dirver 526eb8dc403SDave Cobbley 527eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 528eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 529eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 530eb8dc403SDave Cobbley 531eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 532eb8dc403SDave Cobbley if not soup: 533eb8dc403SDave Cobbley return version[1] 534eb8dc403SDave Cobbley 535eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 536eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 537eb8dc403SDave Cobbley if s: 538eb8dc403SDave Cobbley sver = s.group('ver') 539eb8dc403SDave Cobbley 540eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 541eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 542eb8dc403SDave Cobbley # directories if exists. 543eb8dc403SDave Cobbley # 544eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 545eb8dc403SDave Cobbley # result is v2.5. 546eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 547eb8dc403SDave Cobbley 548eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 549eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 550eb8dc403SDave Cobbley dirver_new = spfx + sver 551eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 552eb8dc403SDave Cobbley .split(package)[0] 553eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 554eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 555eb8dc403SDave Cobbley 556eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 557eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 558eb8dc403SDave Cobbley if pupver: 559eb8dc403SDave Cobbley version[1] = pupver 560eb8dc403SDave Cobbley 561eb8dc403SDave Cobbley version_dir = version_dir_new 562eb8dc403SDave Cobbley 563eb8dc403SDave Cobbley return version[1] 564eb8dc403SDave Cobbley 565eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 566eb8dc403SDave Cobbley """ 567eb8dc403SDave Cobbley Match as many patterns as possible such as: 568eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 569eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 570eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 571eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 572eb8dc403SDave Cobbley blktool_4.orig.tar.gz 573eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 574eb8dc403SDave Cobbley unzip552.tar.gz 575eb8dc403SDave Cobbley icu4c-3_6-src.tgz 576eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 577eb8dc403SDave Cobbley gst-fluendo-mp3 578eb8dc403SDave Cobbley """ 579eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 58019323693SBrad Bishop pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 581eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 58219323693SBrad Bishop pn_prefix2 = r"[a-zA-Z]+" 583eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 58419323693SBrad Bishop pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 585eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 58619323693SBrad Bishop pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 587eb8dc403SDave Cobbley 588eb8dc403SDave Cobbley # match version 58919323693SBrad Bishop pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 590eb8dc403SDave Cobbley 591eb8dc403SDave Cobbley # match arch 592eb8dc403SDave Cobbley parch_regex = "-source|_all_" 593eb8dc403SDave Cobbley 594eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 595eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 596595f6308SAndrew Geissler psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" 597eb8dc403SDave Cobbley 598eb8dc403SDave Cobbley # match name, version and archive type of a package 59919323693SBrad Bishop package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 600eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 601eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 602eb8dc403SDave Cobbley 603eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 604eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 605eb8dc403SDave Cobbley if pn_regex: 606eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 607eb8dc403SDave Cobbley else: 608eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 609eb8dc403SDave Cobbley if version: 610eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 61119323693SBrad Bishop r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 612eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 613eb8dc403SDave Cobbley else: 614eb8dc403SDave Cobbley package_custom_regex_comp = None 615eb8dc403SDave Cobbley 616eb8dc403SDave Cobbley return package_custom_regex_comp 617eb8dc403SDave Cobbley 618eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 619eb8dc403SDave Cobbley """ 620eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 621eb8dc403SDave Cobbley 622eb8dc403SDave Cobbley sanity check to ensure same name and type. 623eb8dc403SDave Cobbley """ 624eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 625eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 626eb8dc403SDave Cobbley 627eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 62819323693SBrad Bishop if not re.search(r"\d+", package): 629eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 630eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 631eb8dc403SDave Cobbley return (current_version[1], '') 632eb8dc403SDave Cobbley 633eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 634eb8dc403SDave Cobbley if package_regex is None: 635eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 636eb8dc403SDave Cobbley return ('', '') 637eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 638eb8dc403SDave Cobbley 639eb8dc403SDave Cobbley uri = "" 640eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 641eb8dc403SDave Cobbley if not regex_uri: 642eb8dc403SDave Cobbley path = ud.path.split(package)[0] 643eb8dc403SDave Cobbley 644eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 645eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 64619323693SBrad Bishop dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 647eb8dc403SDave Cobbley m = dirver_regex.search(path) 648eb8dc403SDave Cobbley if m: 649eb8dc403SDave Cobbley pn = d.getVar('PN') 650eb8dc403SDave Cobbley dirver = m.group('dirver') 651eb8dc403SDave Cobbley 65219323693SBrad Bishop dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 653eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 654eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 655eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 656eb8dc403SDave Cobbley 657eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 658eb8dc403SDave Cobbley else: 659eb8dc403SDave Cobbley uri = regex_uri 660eb8dc403SDave Cobbley 661eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 662eb8dc403SDave Cobbley current_version, ud, d), '') 663