1eb8dc403SDave Cobbley""" 2eb8dc403SDave CobbleyBitBake 'Fetch' implementations 3eb8dc403SDave Cobbley 4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 5eb8dc403SDave CobbleyBitBake build tools. 6eb8dc403SDave Cobbley 7eb8dc403SDave Cobbley""" 8eb8dc403SDave Cobbley 9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 10eb8dc403SDave Cobbley# 11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only 12eb8dc403SDave Cobbley# 13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14eb8dc403SDave Cobbley 1582c905dcSAndrew Geisslerimport shlex 16eb8dc403SDave Cobbleyimport re 17eb8dc403SDave Cobbleyimport tempfile 18eb8dc403SDave Cobbleyimport os 19eb8dc403SDave Cobbleyimport errno 20eb8dc403SDave Cobbleyimport bb 21eb8dc403SDave Cobbleyimport bb.progress 2219323693SBrad Bishopimport socket 2319323693SBrad Bishopimport http.client 24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 25eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 26eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 27eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 28eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 29eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 30eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 31eb8dc403SDave Cobbley 32eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 33eb8dc403SDave Cobbley """ 34eb8dc403SDave Cobbley Extract progress information from wget output. 35eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 36eb8dc403SDave Cobbley specified on the wget command line. 37eb8dc403SDave Cobbley """ 38eb8dc403SDave Cobbley def __init__(self, d): 39eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 40eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 41eb8dc403SDave Cobbley self._fire_progress(0) 42eb8dc403SDave Cobbley 43eb8dc403SDave Cobbley def writeline(self, line): 44eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 45eb8dc403SDave Cobbley if percs: 46eb8dc403SDave Cobbley progress = int(percs[-1][0]) 47eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 48eb8dc403SDave Cobbley self.update(progress, rate) 49eb8dc403SDave Cobbley return False 50eb8dc403SDave Cobbley return True 51eb8dc403SDave Cobbley 52eb8dc403SDave Cobbley 53eb8dc403SDave Cobbleyclass Wget(FetchMethod): 540ca19ccfSPatrick Williams """Class to fetch urls via 'wget'""" 55d1e89497SAndrew Geissler 56d1e89497SAndrew Geissler # CDNs like CloudFlare may do a 'browser integrity test' which can fail 57d1e89497SAndrew Geissler # with the standard wget/urllib User-Agent, so pretend to be a modern 58d1e89497SAndrew Geissler # browser. 59d1e89497SAndrew Geissler user_agent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0" 60d1e89497SAndrew Geissler 610ca19ccfSPatrick Williams def check_certs(self, d): 620ca19ccfSPatrick Williams """ 630ca19ccfSPatrick Williams Should certificates be checked? 640ca19ccfSPatrick Williams """ 650ca19ccfSPatrick Williams return (d.getVar("BB_CHECK_SSL_CERTS") or "1") != "0" 660ca19ccfSPatrick Williams 67eb8dc403SDave Cobbley def supports(self, ud, d): 68eb8dc403SDave Cobbley """ 69eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 70eb8dc403SDave Cobbley """ 715199d831SAndrew Geissler return ud.type in ['http', 'https', 'ftp', 'ftps'] 72eb8dc403SDave Cobbley 73eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 74eb8dc403SDave Cobbley return True 75eb8dc403SDave Cobbley 76eb8dc403SDave Cobbley def urldata_init(self, ud, d): 77eb8dc403SDave Cobbley if 'protocol' in ud.parm: 78eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 79eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 80eb8dc403SDave Cobbley 81eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 82eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 83eb8dc403SDave Cobbley else: 84eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 85eb8dc403SDave Cobbley 86eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 87eb8dc403SDave Cobbley if not ud.localfile: 88eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 89eb8dc403SDave Cobbley 90*44b3caf2SPatrick Williams self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30" 91*44b3caf2SPatrick Williams 92*44b3caf2SPatrick Williams if ud.type == 'ftp' or ud.type == 'ftps': 93*44b3caf2SPatrick Williams self.basecmd += " --passive-ftp" 940ca19ccfSPatrick Williams 950ca19ccfSPatrick Williams if not self.check_certs(d): 960ca19ccfSPatrick Williams self.basecmd += " --no-check-certificate" 97eb8dc403SDave Cobbley 98eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 99eb8dc403SDave Cobbley 100eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 101eb8dc403SDave Cobbley 102d1e89497SAndrew Geissler logger.debug2("Fetching %s using command '%s'" % (ud.url, command)) 103eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 104eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 105eb8dc403SDave Cobbley 106eb8dc403SDave Cobbley def download(self, ud, d): 107eb8dc403SDave Cobbley """Fetch urls""" 108eb8dc403SDave Cobbley 109eb8dc403SDave Cobbley fetchcmd = self.basecmd 110eb8dc403SDave Cobbley 11178b72798SAndrew Geissler localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) + ".tmp" 11282c905dcSAndrew Geissler bb.utils.mkdirhier(os.path.dirname(localpath)) 11382c905dcSAndrew Geissler fetchcmd += " -O %s" % shlex.quote(localpath) 114eb8dc403SDave Cobbley 115eb8dc403SDave Cobbley if ud.user and ud.pswd: 116595f6308SAndrew Geissler fetchcmd += " --auth-no-challenge" 117595f6308SAndrew Geissler if ud.parm.get("redirectauth", "1") == "1": 118595f6308SAndrew Geissler # An undocumented feature of wget is that if the 119595f6308SAndrew Geissler # username/password are specified on the URI, wget will only 120595f6308SAndrew Geissler # send the Authorization header to the first host and not to 121595f6308SAndrew Geissler # any hosts that it is redirected to. With the increasing 122595f6308SAndrew Geissler # usage of temporary AWS URLs, this difference now matters as 123595f6308SAndrew Geissler # AWS will reject any request that has authentication both in 124595f6308SAndrew Geissler # the query parameters (from the redirect) and in the 125595f6308SAndrew Geissler # Authorization header. 126595f6308SAndrew Geissler fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd) 127eb8dc403SDave Cobbley 128eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 129eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 130eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 131eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 132eb8dc403SDave Cobbley else: 133eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 134eb8dc403SDave Cobbley 135eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 136eb8dc403SDave Cobbley 13787f5cff0SAndrew Geissler # Try and verify any checksum now, meaning if it isn't correct, we don't remove the 13887f5cff0SAndrew Geissler # original file, which might be a race (imagine two recipes referencing the same 13987f5cff0SAndrew Geissler # source, one with an incorrect checksum) 14087f5cff0SAndrew Geissler bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False) 14187f5cff0SAndrew Geissler 14278b72798SAndrew Geissler # Remove the ".tmp" and move the file into position atomically 14378b72798SAndrew Geissler # Our lock prevents multiple writers but mirroring code may grab incomplete files 14478b72798SAndrew Geissler os.rename(localpath, localpath[:-4]) 14578b72798SAndrew Geissler 146eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 147eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 148eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 149eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 150eb8dc403SDave Cobbley 151eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 152eb8dc403SDave Cobbley os.remove(ud.localpath) 153eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 154eb8dc403SDave Cobbley 155eb8dc403SDave Cobbley return True 156eb8dc403SDave Cobbley 157eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 158eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 159eb8dc403SDave Cobbley if fetch.connection_cache: 160eb8dc403SDave Cobbley def connect(self): 161eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 162eb8dc403SDave Cobbley 163eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 164eb8dc403SDave Cobbley if sock: 165eb8dc403SDave Cobbley self.sock = sock 166eb8dc403SDave Cobbley else: 167eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 168eb8dc403SDave Cobbley self.timeout, self.source_address) 169eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 170eb8dc403SDave Cobbley 171eb8dc403SDave Cobbley if self._tunnel_host: 172eb8dc403SDave Cobbley self._tunnel() 173eb8dc403SDave Cobbley 174eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 175eb8dc403SDave Cobbley def http_open(self, req): 176eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 177eb8dc403SDave Cobbley 178eb8dc403SDave Cobbley def do_open(self, http_class, req): 179eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 180eb8dc403SDave Cobbley 181eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 182eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 183eb8dc403SDave Cobbley has methods and attributes including: 184eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 185eb8dc403SDave Cobbley - geturl(): return the original request URL 186eb8dc403SDave Cobbley - code: HTTP status code 187eb8dc403SDave Cobbley """ 188eb8dc403SDave Cobbley host = req.host 189eb8dc403SDave Cobbley if not host: 19019323693SBrad Bishop raise urllib.error.URLError('no host given') 191eb8dc403SDave Cobbley 192eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 193eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 194eb8dc403SDave Cobbley 195eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 196eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 197eb8dc403SDave Cobbley if k not in headers)) 198eb8dc403SDave Cobbley 199eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 200eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 201eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 202eb8dc403SDave Cobbley # which will block while the server waits for the next request. 203eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 204eb8dc403SDave Cobbley # request. 205eb8dc403SDave Cobbley 206eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 207eb8dc403SDave Cobbley if fetch.connection_cache is None: 208eb8dc403SDave Cobbley headers["Connection"] = "close" 209eb8dc403SDave Cobbley else: 210eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 211eb8dc403SDave Cobbley 212eb8dc403SDave Cobbley headers = dict( 213eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 214eb8dc403SDave Cobbley 215eb8dc403SDave Cobbley if req._tunnel_host: 216eb8dc403SDave Cobbley tunnel_headers = {} 217eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 218eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 219eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 220eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 221eb8dc403SDave Cobbley # server. 222eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 223eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 224eb8dc403SDave Cobbley 225eb8dc403SDave Cobbley try: 226eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 227eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 228eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 229eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 230eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 231eb8dc403SDave Cobbley # them from the cache. 232eb8dc403SDave Cobbley if fetch.connection_cache is None: 233eb8dc403SDave Cobbley h.close() 234eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 235eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 236eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 237eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 238eb8dc403SDave Cobbley # We let the request fail and expect it to be 239eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 240eb8dc403SDave Cobbley # with the dead connection removed from the cache. 2417e0e3c0cSAndrew Geissler # If it still fails, we give up, which can happen for bad 242eb8dc403SDave Cobbley # HTTP proxy settings. 243eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 244eb8dc403SDave Cobbley raise urllib.error.URLError(err) 245eb8dc403SDave Cobbley else: 246eb8dc403SDave Cobbley r = h.getresponse() 247eb8dc403SDave Cobbley 248eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 249eb8dc403SDave Cobbley # object initialized properly. 250eb8dc403SDave Cobbley 251eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 252eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 253eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 254eb8dc403SDave Cobbley # have readline() and readlines() methods. 255eb8dc403SDave Cobbley 256eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 257eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 258eb8dc403SDave Cobbley r.recv = r.read 259eb8dc403SDave Cobbley 260eb8dc403SDave Cobbley # no data, just have to read 261eb8dc403SDave Cobbley r.read() 262eb8dc403SDave Cobbley class fp_dummy(object): 263eb8dc403SDave Cobbley def read(self): 264eb8dc403SDave Cobbley return "" 265eb8dc403SDave Cobbley def readline(self): 266eb8dc403SDave Cobbley return "" 267eb8dc403SDave Cobbley def close(self): 268eb8dc403SDave Cobbley pass 269eb8dc403SDave Cobbley closed = False 270eb8dc403SDave Cobbley 27119323693SBrad Bishop resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 272eb8dc403SDave Cobbley resp.code = r.status 273eb8dc403SDave Cobbley resp.msg = r.reason 274eb8dc403SDave Cobbley 275eb8dc403SDave Cobbley # Close connection when server request it. 276eb8dc403SDave Cobbley if fetch.connection_cache is not None: 277eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 278eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 279eb8dc403SDave Cobbley 280eb8dc403SDave Cobbley return resp 281eb8dc403SDave Cobbley 282eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 283eb8dc403SDave Cobbley """ 284eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 285eb8dc403SDave Cobbley """ 286eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 287eb8dc403SDave Cobbley fp.read() 288eb8dc403SDave Cobbley fp.close() 289eb8dc403SDave Cobbley 29008902b01SBrad Bishop if req.get_method() != 'GET': 291eb8dc403SDave Cobbley newheaders = dict((k, v) for k, v in list(req.headers.items()) 292eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 293eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 294eb8dc403SDave Cobbley headers=newheaders, 295eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 296eb8dc403SDave Cobbley unverifiable=True)) 297eb8dc403SDave Cobbley 29808902b01SBrad Bishop raise urllib.request.HTTPError(req, code, msg, headers, None) 29919323693SBrad Bishop 30019323693SBrad Bishop # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 30119323693SBrad Bishop # Forbidden when they actually mean 405 Method Not Allowed. 302eb8dc403SDave Cobbley http_error_403 = http_error_405 303eb8dc403SDave Cobbley 304eb8dc403SDave Cobbley 305eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 306eb8dc403SDave Cobbley """ 307eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 308eb8dc403SDave Cobbley when we want to follow redirects using the original method. 309eb8dc403SDave Cobbley """ 310eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 311eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 31219323693SBrad Bishop newreq.get_method = req.get_method 313eb8dc403SDave Cobbley return newreq 314eb8dc403SDave Cobbley 3150ca19ccfSPatrick Williams # We need to update the environment here as both the proxy and HTTPS 3160ca19ccfSPatrick Williams # handlers need variables set. The proxy needs http_proxy and friends to 3170ca19ccfSPatrick Williams # be set, and HTTPSHandler ends up calling into openssl to load the 3180ca19ccfSPatrick Williams # certificates. In buildtools configurations this will be looking at the 3190ca19ccfSPatrick Williams # wrong place for certificates by default: we set SSL_CERT_FILE to the 3200ca19ccfSPatrick Williams # right location in the buildtools environment script but as BitBake 3210ca19ccfSPatrick Williams # prunes prunes the environment this is lost. When binaries are executed 3220ca19ccfSPatrick Williams # runfetchcmd ensures these values are in the environment, but this is 3230ca19ccfSPatrick Williams # pure Python so we need to update the environment. 3240ca19ccfSPatrick Williams # 3250ca19ccfSPatrick Williams # Avoid tramping the environment too much by using bb.utils.environment 3260ca19ccfSPatrick Williams # to scope the changes to the build_opener request, which is when the 3270ca19ccfSPatrick Williams # environment lookups happen. 3287e0e3c0cSAndrew Geissler newenv = bb.fetch2.get_fetcher_environment(d) 3290ca19ccfSPatrick Williams 3300ca19ccfSPatrick Williams with bb.utils.environment(**newenv): 331eb8dc403SDave Cobbley import ssl 3320ca19ccfSPatrick Williams 3330ca19ccfSPatrick Williams if self.check_certs(d): 3340ca19ccfSPatrick Williams context = ssl.create_default_context() 3350ca19ccfSPatrick Williams else: 3360ca19ccfSPatrick Williams context = ssl._create_unverified_context() 3370ca19ccfSPatrick Williams 3380ca19ccfSPatrick Williams handlers = [FixedHTTPRedirectHandler, 3390ca19ccfSPatrick Williams HTTPMethodFallback, 3400ca19ccfSPatrick Williams urllib.request.ProxyHandler(), 3410ca19ccfSPatrick Williams CacheHTTPHandler(), 3420ca19ccfSPatrick Williams urllib.request.HTTPSHandler(context=context)] 343eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 344eb8dc403SDave Cobbley 345eb8dc403SDave Cobbley try: 346517393d9SAndrew Geissler uri_base = ud.url.split(";")[0] 347517393d9SAndrew Geissler uri = "{}://{}{}".format(urllib.parse.urlparse(uri_base).scheme, ud.host, ud.path) 348eb8dc403SDave Cobbley r = urllib.request.Request(uri) 349eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 350eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 351eb8dc403SDave Cobbley # optional Accept header is set. 352eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 353d1e89497SAndrew Geissler r.add_header("User-Agent", self.user_agent) 354eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 355eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 356eb8dc403SDave Cobbley import base64 357eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 358eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 359eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 360eb8dc403SDave Cobbley 36119323693SBrad Bishop if ud.user and ud.pswd: 36219323693SBrad Bishop add_basic_auth(ud.user + ':' + ud.pswd, r) 363eb8dc403SDave Cobbley 364eb8dc403SDave Cobbley try: 36519323693SBrad Bishop import netrc 3666aa7eec5SAndrew Geissler auth_data = netrc.netrc().authenticators(urllib.parse.urlparse(uri).hostname) 3676aa7eec5SAndrew Geissler if auth_data: 3686aa7eec5SAndrew Geissler login, _, password = auth_data 369eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 3706aa7eec5SAndrew Geissler except (FileNotFoundError, netrc.NetrcParseError): 371eb8dc403SDave Cobbley pass 372eb8dc403SDave Cobbley 373595f6308SAndrew Geissler with opener.open(r, timeout=30) as response: 374eb8dc403SDave Cobbley pass 375fc113eadSAndrew Geissler except (urllib.error.URLError, ConnectionResetError, TimeoutError) as e: 37690fd73cbSAndrew Geissler if try_again: 37790fd73cbSAndrew Geissler logger.debug2("checkstatus: trying again") 37890fd73cbSAndrew Geissler return self.checkstatus(fetch, ud, d, False) 37990fd73cbSAndrew Geissler else: 38090fd73cbSAndrew Geissler # debug for now to avoid spamming the logs in e.g. remote sstate searches 381705982a5SPatrick Williams logger.debug2("checkstatus() urlopen failed for %s: %s" % (uri,e)) 38290fd73cbSAndrew Geissler return False 383d159c7fbSAndrew Geissler 384eb8dc403SDave Cobbley return True 385eb8dc403SDave Cobbley 386eb8dc403SDave Cobbley def _parse_path(self, regex, s): 387eb8dc403SDave Cobbley """ 388eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 389eb8dc403SDave Cobbley """ 390eb8dc403SDave Cobbley 391eb8dc403SDave Cobbley m = regex.search(s) 392eb8dc403SDave Cobbley if m: 393eb8dc403SDave Cobbley pname = '' 394eb8dc403SDave Cobbley pver = '' 395eb8dc403SDave Cobbley ptype = '' 396eb8dc403SDave Cobbley 397eb8dc403SDave Cobbley mdict = m.groupdict() 398eb8dc403SDave Cobbley if 'name' in mdict.keys(): 399eb8dc403SDave Cobbley pname = mdict['name'] 400eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 401eb8dc403SDave Cobbley pver = mdict['pver'] 402eb8dc403SDave Cobbley if 'type' in mdict.keys(): 403eb8dc403SDave Cobbley ptype = mdict['type'] 404eb8dc403SDave Cobbley 405eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 406eb8dc403SDave Cobbley 407eb8dc403SDave Cobbley return (pname, pver, ptype) 408eb8dc403SDave Cobbley 409eb8dc403SDave Cobbley return None 410eb8dc403SDave Cobbley 411eb8dc403SDave Cobbley def _modelate_version(self, version): 412eb8dc403SDave Cobbley if version[0] in ['.', '-']: 413eb8dc403SDave Cobbley if version[1].isdigit(): 414eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 415eb8dc403SDave Cobbley else: 416eb8dc403SDave Cobbley version = version[1:len(version)] 417eb8dc403SDave Cobbley 418eb8dc403SDave Cobbley version = re.sub('-', '.', version) 419eb8dc403SDave Cobbley version = re.sub('_', '.', version) 420eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 421eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 422eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 423eb8dc403SDave Cobbley if version[0] == 'v': 424eb8dc403SDave Cobbley version = version[1:len(version)] 425eb8dc403SDave Cobbley return version 426eb8dc403SDave Cobbley 427eb8dc403SDave Cobbley def _vercmp(self, old, new): 428eb8dc403SDave Cobbley """ 429eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 430eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 431eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 432eb8dc403SDave Cobbley """ 433eb8dc403SDave Cobbley 434eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 435eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 436eb8dc403SDave Cobbley 43719323693SBrad Bishop # Check for a new suffix type that we have never heard of before 43819323693SBrad Bishop if newsuffix: 439eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 440eb8dc403SDave Cobbley if not m: 441eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 442eb8dc403SDave Cobbley return False 443eb8dc403SDave Cobbley 44419323693SBrad Bishop # Not our package so ignore it 445eb8dc403SDave Cobbley if oldpn != newpn: 446eb8dc403SDave Cobbley return False 447eb8dc403SDave Cobbley 448eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 449eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 450eb8dc403SDave Cobbley 451eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 452eb8dc403SDave Cobbley 453eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 454eb8dc403SDave Cobbley """ 455eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 456eb8dc403SDave Cobbley """ 457eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 458eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 459eb8dc403SDave Cobbley fetchcmd = self.basecmd 460d1e89497SAndrew Geissler fetchcmd += " -O " + f.name + " --user-agent='" + self.user_agent + "' '" + uri + "'" 461eb8dc403SDave Cobbley try: 462eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 463eb8dc403SDave Cobbley fetchresult = f.read() 464eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 465eb8dc403SDave Cobbley fetchresult = "" 466eb8dc403SDave Cobbley 467eb8dc403SDave Cobbley return fetchresult 468eb8dc403SDave Cobbley 469eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 470eb8dc403SDave Cobbley """ 471eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 472eb8dc403SDave Cobbley If error or no version, return "" 473eb8dc403SDave Cobbley """ 474eb8dc403SDave Cobbley valid = 0 475eb8dc403SDave Cobbley version = ['', '', ''] 476eb8dc403SDave Cobbley 477eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 478eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 479eb8dc403SDave Cobbley if not soup: 480eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 481eb8dc403SDave Cobbley return "" 482eb8dc403SDave Cobbley 483eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 484eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 485eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 486eb8dc403SDave Cobbley 487eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 488eb8dc403SDave Cobbley if not newver: 489eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 490eb8dc403SDave Cobbley 491eb8dc403SDave Cobbley if newver: 492eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 493eb8dc403SDave Cobbley if valid == 0: 494eb8dc403SDave Cobbley version = newver 495eb8dc403SDave Cobbley valid = 1 496eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 497eb8dc403SDave Cobbley version = newver 498eb8dc403SDave Cobbley 499eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 500eb8dc403SDave Cobbley 501eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 502eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 503eb8dc403SDave Cobbley 504eb8dc403SDave Cobbley if valid: 505eb8dc403SDave Cobbley return pupver 506eb8dc403SDave Cobbley 507eb8dc403SDave Cobbley return "" 508eb8dc403SDave Cobbley 50919323693SBrad Bishop def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 510eb8dc403SDave Cobbley """ 511eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 512eb8dc403SDave Cobbley """ 513eb8dc403SDave Cobbley version_dir = ['', '', ''] 514eb8dc403SDave Cobbley version = ['', '', ''] 515eb8dc403SDave Cobbley 516ac69b488SWilliam A. Kennington III dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])*(\d+))") 517eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 518eb8dc403SDave Cobbley if s: 519eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 520eb8dc403SDave Cobbley else: 521eb8dc403SDave Cobbley version_dir[1] = dirver 522eb8dc403SDave Cobbley 523eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 524eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 525eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 526eb8dc403SDave Cobbley 527eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 528eb8dc403SDave Cobbley if not soup: 529eb8dc403SDave Cobbley return version[1] 530eb8dc403SDave Cobbley 531eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 532eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 533eb8dc403SDave Cobbley if s: 534eb8dc403SDave Cobbley sver = s.group('ver') 535eb8dc403SDave Cobbley 536eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 537eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 538eb8dc403SDave Cobbley # directories if exists. 539eb8dc403SDave Cobbley # 540eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 541eb8dc403SDave Cobbley # result is v2.5. 542eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 543eb8dc403SDave Cobbley 544eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 545eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 546eb8dc403SDave Cobbley dirver_new = spfx + sver 547eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 548eb8dc403SDave Cobbley .split(package)[0] 549eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 550eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 551eb8dc403SDave Cobbley 552eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 553eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 554eb8dc403SDave Cobbley if pupver: 555eb8dc403SDave Cobbley version[1] = pupver 556eb8dc403SDave Cobbley 557eb8dc403SDave Cobbley version_dir = version_dir_new 558eb8dc403SDave Cobbley 559eb8dc403SDave Cobbley return version[1] 560eb8dc403SDave Cobbley 561eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 562eb8dc403SDave Cobbley """ 563eb8dc403SDave Cobbley Match as many patterns as possible such as: 564eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 565eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 566eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 567eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 568eb8dc403SDave Cobbley blktool_4.orig.tar.gz 569eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 570eb8dc403SDave Cobbley unzip552.tar.gz 571eb8dc403SDave Cobbley icu4c-3_6-src.tgz 572eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 573eb8dc403SDave Cobbley gst-fluendo-mp3 574eb8dc403SDave Cobbley """ 575eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 57619323693SBrad Bishop pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 577eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 57819323693SBrad Bishop pn_prefix2 = r"[a-zA-Z]+" 579eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 58019323693SBrad Bishop pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 581eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 58219323693SBrad Bishop pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 583eb8dc403SDave Cobbley 584eb8dc403SDave Cobbley # match version 58519323693SBrad Bishop pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 586eb8dc403SDave Cobbley 587eb8dc403SDave Cobbley # match arch 588eb8dc403SDave Cobbley parch_regex = "-source|_all_" 589eb8dc403SDave Cobbley 590eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 591eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 592595f6308SAndrew Geissler psuffix_regex = r"(tar\.\w+|tgz|zip|xz|rpm|bz2|orig\.tar\.\w+|src\.tar\.\w+|src\.tgz|svnr\d+\.tar\.\w+|stable\.tar\.\w+|src\.rpm)" 593eb8dc403SDave Cobbley 594eb8dc403SDave Cobbley # match name, version and archive type of a package 59519323693SBrad Bishop package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 596eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 597eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 598eb8dc403SDave Cobbley 599eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 600eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 601eb8dc403SDave Cobbley if pn_regex: 602eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 603eb8dc403SDave Cobbley else: 604eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 605eb8dc403SDave Cobbley if version: 606eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 60719323693SBrad Bishop r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 608eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 609eb8dc403SDave Cobbley else: 610eb8dc403SDave Cobbley package_custom_regex_comp = None 611eb8dc403SDave Cobbley 612eb8dc403SDave Cobbley return package_custom_regex_comp 613eb8dc403SDave Cobbley 614eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 615eb8dc403SDave Cobbley """ 616eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 617eb8dc403SDave Cobbley 618eb8dc403SDave Cobbley sanity check to ensure same name and type. 619eb8dc403SDave Cobbley """ 620eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 621eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 622eb8dc403SDave Cobbley 623eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 62419323693SBrad Bishop if not re.search(r"\d+", package): 625eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 626eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 627eb8dc403SDave Cobbley return (current_version[1], '') 628eb8dc403SDave Cobbley 629eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 630eb8dc403SDave Cobbley if package_regex is None: 631eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 632eb8dc403SDave Cobbley return ('', '') 633eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 634eb8dc403SDave Cobbley 635eb8dc403SDave Cobbley uri = "" 636eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 637eb8dc403SDave Cobbley if not regex_uri: 638eb8dc403SDave Cobbley path = ud.path.split(package)[0] 639eb8dc403SDave Cobbley 640eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 641eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 64219323693SBrad Bishop dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 643517393d9SAndrew Geissler m = dirver_regex.findall(path) 644eb8dc403SDave Cobbley if m: 645eb8dc403SDave Cobbley pn = d.getVar('PN') 646517393d9SAndrew Geissler dirver = m[-1][0] 647eb8dc403SDave Cobbley 64819323693SBrad Bishop dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 649eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 650eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 651eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 652eb8dc403SDave Cobbley 653eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 654eb8dc403SDave Cobbley else: 655eb8dc403SDave Cobbley uri = regex_uri 656eb8dc403SDave Cobbley 657eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 658eb8dc403SDave Cobbley current_version, ud, d), '') 659