1eb8dc403SDave Cobbley""" 2eb8dc403SDave CobbleyBitBake 'Fetch' implementations 3eb8dc403SDave Cobbley 4eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 5eb8dc403SDave CobbleyBitBake build tools. 6eb8dc403SDave Cobbley 7eb8dc403SDave Cobbley""" 8eb8dc403SDave Cobbley 9eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 10eb8dc403SDave Cobbley# 11c342db35SBrad Bishop# SPDX-License-Identifier: GPL-2.0-only 12eb8dc403SDave Cobbley# 13eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14eb8dc403SDave Cobbley 15*82c905dcSAndrew Geisslerimport shlex 16eb8dc403SDave Cobbleyimport re 17eb8dc403SDave Cobbleyimport tempfile 18eb8dc403SDave Cobbleyimport os 19eb8dc403SDave Cobbleyimport errno 20eb8dc403SDave Cobbleyimport bb 21eb8dc403SDave Cobbleyimport bb.progress 2219323693SBrad Bishopimport socket 2319323693SBrad Bishopimport http.client 24eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 25eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 26eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 27eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 28eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 29eb8dc403SDave Cobbleyfrom bb.utils import export_proxies 30eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 31eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 32eb8dc403SDave Cobbley 33eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 34eb8dc403SDave Cobbley """ 35eb8dc403SDave Cobbley Extract progress information from wget output. 36eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 37eb8dc403SDave Cobbley specified on the wget command line. 38eb8dc403SDave Cobbley """ 39eb8dc403SDave Cobbley def __init__(self, d): 40eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 41eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 42eb8dc403SDave Cobbley self._fire_progress(0) 43eb8dc403SDave Cobbley 44eb8dc403SDave Cobbley def writeline(self, line): 45eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 46eb8dc403SDave Cobbley if percs: 47eb8dc403SDave Cobbley progress = int(percs[-1][0]) 48eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 49eb8dc403SDave Cobbley self.update(progress, rate) 50eb8dc403SDave Cobbley return False 51eb8dc403SDave Cobbley return True 52eb8dc403SDave Cobbley 53eb8dc403SDave Cobbley 54eb8dc403SDave Cobbleyclass Wget(FetchMethod): 55eb8dc403SDave Cobbley """Class to fetch urls via 'wget'""" 56eb8dc403SDave Cobbley def supports(self, ud, d): 57eb8dc403SDave Cobbley """ 58eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 59eb8dc403SDave Cobbley """ 60eb8dc403SDave Cobbley return ud.type in ['http', 'https', 'ftp'] 61eb8dc403SDave Cobbley 62eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 63eb8dc403SDave Cobbley return True 64eb8dc403SDave Cobbley 65eb8dc403SDave Cobbley def urldata_init(self, ud, d): 66eb8dc403SDave Cobbley if 'protocol' in ud.parm: 67eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 68eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 69eb8dc403SDave Cobbley 70eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 71eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 72eb8dc403SDave Cobbley else: 73eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 74eb8dc403SDave Cobbley 75eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 76eb8dc403SDave Cobbley if not ud.localfile: 77eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 78eb8dc403SDave Cobbley 79eb8dc403SDave Cobbley self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 80eb8dc403SDave Cobbley 81eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 82eb8dc403SDave Cobbley 83eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 84eb8dc403SDave Cobbley 85eb8dc403SDave Cobbley logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) 86eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 87eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 88eb8dc403SDave Cobbley 89eb8dc403SDave Cobbley def download(self, ud, d): 90eb8dc403SDave Cobbley """Fetch urls""" 91eb8dc403SDave Cobbley 92eb8dc403SDave Cobbley fetchcmd = self.basecmd 93eb8dc403SDave Cobbley 94eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 95*82c905dcSAndrew Geissler localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 96*82c905dcSAndrew Geissler bb.utils.mkdirhier(os.path.dirname(localpath)) 97*82c905dcSAndrew Geissler fetchcmd += " -O %s" % shlex.quote(localpath) 98eb8dc403SDave Cobbley 99eb8dc403SDave Cobbley if ud.user and ud.pswd: 100eb8dc403SDave Cobbley fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 101eb8dc403SDave Cobbley 102eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 103eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 104eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 105eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 106eb8dc403SDave Cobbley else: 107eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 108eb8dc403SDave Cobbley 109eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 110eb8dc403SDave Cobbley 111eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 112eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 113eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 114eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 115eb8dc403SDave Cobbley 116eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 117eb8dc403SDave Cobbley os.remove(ud.localpath) 118eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 119eb8dc403SDave Cobbley 120eb8dc403SDave Cobbley return True 121eb8dc403SDave Cobbley 122eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 123eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 124eb8dc403SDave Cobbley if fetch.connection_cache: 125eb8dc403SDave Cobbley def connect(self): 126eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 127eb8dc403SDave Cobbley 128eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 129eb8dc403SDave Cobbley if sock: 130eb8dc403SDave Cobbley self.sock = sock 131eb8dc403SDave Cobbley else: 132eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 133eb8dc403SDave Cobbley self.timeout, self.source_address) 134eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 135eb8dc403SDave Cobbley 136eb8dc403SDave Cobbley if self._tunnel_host: 137eb8dc403SDave Cobbley self._tunnel() 138eb8dc403SDave Cobbley 139eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 140eb8dc403SDave Cobbley def http_open(self, req): 141eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 142eb8dc403SDave Cobbley 143eb8dc403SDave Cobbley def do_open(self, http_class, req): 144eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 145eb8dc403SDave Cobbley 146eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 147eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 148eb8dc403SDave Cobbley has methods and attributes including: 149eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 150eb8dc403SDave Cobbley - geturl(): return the original request URL 151eb8dc403SDave Cobbley - code: HTTP status code 152eb8dc403SDave Cobbley """ 153eb8dc403SDave Cobbley host = req.host 154eb8dc403SDave Cobbley if not host: 15519323693SBrad Bishop raise urllib.error.URLError('no host given') 156eb8dc403SDave Cobbley 157eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 158eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 159eb8dc403SDave Cobbley 160eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 161eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 162eb8dc403SDave Cobbley if k not in headers)) 163eb8dc403SDave Cobbley 164eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 165eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 166eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 167eb8dc403SDave Cobbley # which will block while the server waits for the next request. 168eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 169eb8dc403SDave Cobbley # request. 170eb8dc403SDave Cobbley 171eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 172eb8dc403SDave Cobbley if fetch.connection_cache is None: 173eb8dc403SDave Cobbley headers["Connection"] = "close" 174eb8dc403SDave Cobbley else: 175eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 176eb8dc403SDave Cobbley 177eb8dc403SDave Cobbley headers = dict( 178eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 179eb8dc403SDave Cobbley 180eb8dc403SDave Cobbley if req._tunnel_host: 181eb8dc403SDave Cobbley tunnel_headers = {} 182eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 183eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 184eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 185eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 186eb8dc403SDave Cobbley # server. 187eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 188eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 189eb8dc403SDave Cobbley 190eb8dc403SDave Cobbley try: 191eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 192eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 193eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 194eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 195eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 196eb8dc403SDave Cobbley # them from the cache. 197eb8dc403SDave Cobbley if fetch.connection_cache is None: 198eb8dc403SDave Cobbley h.close() 199eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 200eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 201eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 202eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 203eb8dc403SDave Cobbley # We let the request fail and expect it to be 204eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 205eb8dc403SDave Cobbley # with the dead connection removed from the cache. 206eb8dc403SDave Cobbley # If it still fails, we give up, which can happend for bad 207eb8dc403SDave Cobbley # HTTP proxy settings. 208eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 209eb8dc403SDave Cobbley raise urllib.error.URLError(err) 210eb8dc403SDave Cobbley else: 211eb8dc403SDave Cobbley try: 212eb8dc403SDave Cobbley r = h.getresponse(buffering=True) 213eb8dc403SDave Cobbley except TypeError: # buffering kw not supported 214eb8dc403SDave Cobbley r = h.getresponse() 215eb8dc403SDave Cobbley 216eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 217eb8dc403SDave Cobbley # object initialized properly. 218eb8dc403SDave Cobbley 219eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 220eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 221eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 222eb8dc403SDave Cobbley # have readline() and readlines() methods. 223eb8dc403SDave Cobbley 224eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 225eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 226eb8dc403SDave Cobbley r.recv = r.read 227eb8dc403SDave Cobbley 228eb8dc403SDave Cobbley # no data, just have to read 229eb8dc403SDave Cobbley r.read() 230eb8dc403SDave Cobbley class fp_dummy(object): 231eb8dc403SDave Cobbley def read(self): 232eb8dc403SDave Cobbley return "" 233eb8dc403SDave Cobbley def readline(self): 234eb8dc403SDave Cobbley return "" 235eb8dc403SDave Cobbley def close(self): 236eb8dc403SDave Cobbley pass 237eb8dc403SDave Cobbley closed = False 238eb8dc403SDave Cobbley 23919323693SBrad Bishop resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 240eb8dc403SDave Cobbley resp.code = r.status 241eb8dc403SDave Cobbley resp.msg = r.reason 242eb8dc403SDave Cobbley 243eb8dc403SDave Cobbley # Close connection when server request it. 244eb8dc403SDave Cobbley if fetch.connection_cache is not None: 245eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 246eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 247eb8dc403SDave Cobbley 248eb8dc403SDave Cobbley return resp 249eb8dc403SDave Cobbley 250eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 251eb8dc403SDave Cobbley """ 252eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 253eb8dc403SDave Cobbley """ 254eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 255eb8dc403SDave Cobbley fp.read() 256eb8dc403SDave Cobbley fp.close() 257eb8dc403SDave Cobbley 25808902b01SBrad Bishop if req.get_method() != 'GET': 259eb8dc403SDave Cobbley newheaders = dict((k, v) for k, v in list(req.headers.items()) 260eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 261eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 262eb8dc403SDave Cobbley headers=newheaders, 263eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 264eb8dc403SDave Cobbley unverifiable=True)) 265eb8dc403SDave Cobbley 26608902b01SBrad Bishop raise urllib.request.HTTPError(req, code, msg, headers, None) 26719323693SBrad Bishop 26819323693SBrad Bishop # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 26919323693SBrad Bishop # Forbidden when they actually mean 405 Method Not Allowed. 270eb8dc403SDave Cobbley http_error_403 = http_error_405 271eb8dc403SDave Cobbley 272eb8dc403SDave Cobbley 273eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 274eb8dc403SDave Cobbley """ 275eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 276eb8dc403SDave Cobbley when we want to follow redirects using the original method. 277eb8dc403SDave Cobbley """ 278eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 279eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 28019323693SBrad Bishop newreq.get_method = req.get_method 281eb8dc403SDave Cobbley return newreq 282eb8dc403SDave Cobbley exported_proxies = export_proxies(d) 283eb8dc403SDave Cobbley 284eb8dc403SDave Cobbley handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 28519323693SBrad Bishop if exported_proxies: 286eb8dc403SDave Cobbley handlers.append(urllib.request.ProxyHandler()) 287eb8dc403SDave Cobbley handlers.append(CacheHTTPHandler()) 28819323693SBrad Bishop # Since Python 2.7.9 ssl cert validation is enabled by default 289eb8dc403SDave Cobbley # see PEP-0476, this causes verification errors on some https servers 290eb8dc403SDave Cobbley # so disable by default. 291eb8dc403SDave Cobbley import ssl 292eb8dc403SDave Cobbley if hasattr(ssl, '_create_unverified_context'): 293eb8dc403SDave Cobbley handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 294eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 295eb8dc403SDave Cobbley 296eb8dc403SDave Cobbley try: 297eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 298eb8dc403SDave Cobbley r = urllib.request.Request(uri) 299eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 300eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 301eb8dc403SDave Cobbley # optional Accept header is set. 302eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 303*82c905dcSAndrew Geissler r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12") 304eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 305eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 306eb8dc403SDave Cobbley import base64 307eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 308eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 309eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 310eb8dc403SDave Cobbley 31119323693SBrad Bishop if ud.user and ud.pswd: 31219323693SBrad Bishop add_basic_auth(ud.user + ':' + ud.pswd, r) 313eb8dc403SDave Cobbley 314eb8dc403SDave Cobbley try: 31519323693SBrad Bishop import netrc 316eb8dc403SDave Cobbley n = netrc.netrc() 317eb8dc403SDave Cobbley login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 318eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 319eb8dc403SDave Cobbley except (TypeError, ImportError, IOError, netrc.NetrcParseError): 320eb8dc403SDave Cobbley pass 321eb8dc403SDave Cobbley 322eb8dc403SDave Cobbley with opener.open(r) as response: 323eb8dc403SDave Cobbley pass 324eb8dc403SDave Cobbley except urllib.error.URLError as e: 325eb8dc403SDave Cobbley if try_again: 326eb8dc403SDave Cobbley logger.debug(2, "checkstatus: trying again") 327eb8dc403SDave Cobbley return self.checkstatus(fetch, ud, d, False) 328eb8dc403SDave Cobbley else: 329eb8dc403SDave Cobbley # debug for now to avoid spamming the logs in e.g. remote sstate searches 330eb8dc403SDave Cobbley logger.debug(2, "checkstatus() urlopen failed: %s" % e) 331eb8dc403SDave Cobbley return False 332eb8dc403SDave Cobbley return True 333eb8dc403SDave Cobbley 334eb8dc403SDave Cobbley def _parse_path(self, regex, s): 335eb8dc403SDave Cobbley """ 336eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 337eb8dc403SDave Cobbley """ 338eb8dc403SDave Cobbley 339eb8dc403SDave Cobbley m = regex.search(s) 340eb8dc403SDave Cobbley if m: 341eb8dc403SDave Cobbley pname = '' 342eb8dc403SDave Cobbley pver = '' 343eb8dc403SDave Cobbley ptype = '' 344eb8dc403SDave Cobbley 345eb8dc403SDave Cobbley mdict = m.groupdict() 346eb8dc403SDave Cobbley if 'name' in mdict.keys(): 347eb8dc403SDave Cobbley pname = mdict['name'] 348eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 349eb8dc403SDave Cobbley pver = mdict['pver'] 350eb8dc403SDave Cobbley if 'type' in mdict.keys(): 351eb8dc403SDave Cobbley ptype = mdict['type'] 352eb8dc403SDave Cobbley 353eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 354eb8dc403SDave Cobbley 355eb8dc403SDave Cobbley return (pname, pver, ptype) 356eb8dc403SDave Cobbley 357eb8dc403SDave Cobbley return None 358eb8dc403SDave Cobbley 359eb8dc403SDave Cobbley def _modelate_version(self, version): 360eb8dc403SDave Cobbley if version[0] in ['.', '-']: 361eb8dc403SDave Cobbley if version[1].isdigit(): 362eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 363eb8dc403SDave Cobbley else: 364eb8dc403SDave Cobbley version = version[1:len(version)] 365eb8dc403SDave Cobbley 366eb8dc403SDave Cobbley version = re.sub('-', '.', version) 367eb8dc403SDave Cobbley version = re.sub('_', '.', version) 368eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 369eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 370eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 371eb8dc403SDave Cobbley if version[0] == 'v': 372eb8dc403SDave Cobbley version = version[1:len(version)] 373eb8dc403SDave Cobbley return version 374eb8dc403SDave Cobbley 375eb8dc403SDave Cobbley def _vercmp(self, old, new): 376eb8dc403SDave Cobbley """ 377eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 378eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 379eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 380eb8dc403SDave Cobbley """ 381eb8dc403SDave Cobbley 382eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 383eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 384eb8dc403SDave Cobbley 38519323693SBrad Bishop # Check for a new suffix type that we have never heard of before 38619323693SBrad Bishop if newsuffix: 387eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 388eb8dc403SDave Cobbley if not m: 389eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 390eb8dc403SDave Cobbley return False 391eb8dc403SDave Cobbley 39219323693SBrad Bishop # Not our package so ignore it 393eb8dc403SDave Cobbley if oldpn != newpn: 394eb8dc403SDave Cobbley return False 395eb8dc403SDave Cobbley 396eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 397eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 398eb8dc403SDave Cobbley 399eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 400eb8dc403SDave Cobbley 401eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 402eb8dc403SDave Cobbley """ 403eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 404eb8dc403SDave Cobbley """ 405eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 406eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 407eb8dc403SDave Cobbley agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" 408eb8dc403SDave Cobbley fetchcmd = self.basecmd 409eb8dc403SDave Cobbley fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" 410eb8dc403SDave Cobbley try: 411eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 412eb8dc403SDave Cobbley fetchresult = f.read() 413eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 414eb8dc403SDave Cobbley fetchresult = "" 415eb8dc403SDave Cobbley 416eb8dc403SDave Cobbley return fetchresult 417eb8dc403SDave Cobbley 418eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 419eb8dc403SDave Cobbley """ 420eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 421eb8dc403SDave Cobbley If error or no version, return "" 422eb8dc403SDave Cobbley """ 423eb8dc403SDave Cobbley valid = 0 424eb8dc403SDave Cobbley version = ['', '', ''] 425eb8dc403SDave Cobbley 426eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 427eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 428eb8dc403SDave Cobbley if not soup: 429eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 430eb8dc403SDave Cobbley return "" 431eb8dc403SDave Cobbley 432eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 433eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 434eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 435eb8dc403SDave Cobbley 436eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 437eb8dc403SDave Cobbley if not newver: 438eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 439eb8dc403SDave Cobbley 440eb8dc403SDave Cobbley if newver: 441eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 442eb8dc403SDave Cobbley if valid == 0: 443eb8dc403SDave Cobbley version = newver 444eb8dc403SDave Cobbley valid = 1 445eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 446eb8dc403SDave Cobbley version = newver 447eb8dc403SDave Cobbley 448eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 449eb8dc403SDave Cobbley 450eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 451eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 452eb8dc403SDave Cobbley 453eb8dc403SDave Cobbley if valid: 454eb8dc403SDave Cobbley return pupver 455eb8dc403SDave Cobbley 456eb8dc403SDave Cobbley return "" 457eb8dc403SDave Cobbley 45819323693SBrad Bishop def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 459eb8dc403SDave Cobbley """ 460eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 461eb8dc403SDave Cobbley """ 462eb8dc403SDave Cobbley version_dir = ['', '', ''] 463eb8dc403SDave Cobbley version = ['', '', ''] 464eb8dc403SDave Cobbley 46519323693SBrad Bishop dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 466eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 467eb8dc403SDave Cobbley if s: 468eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 469eb8dc403SDave Cobbley else: 470eb8dc403SDave Cobbley version_dir[1] = dirver 471eb8dc403SDave Cobbley 472eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 473eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 474eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 475eb8dc403SDave Cobbley 476eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 477eb8dc403SDave Cobbley if not soup: 478eb8dc403SDave Cobbley return version[1] 479eb8dc403SDave Cobbley 480eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 481eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 482eb8dc403SDave Cobbley if s: 483eb8dc403SDave Cobbley sver = s.group('ver') 484eb8dc403SDave Cobbley 485eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 486eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 487eb8dc403SDave Cobbley # directories if exists. 488eb8dc403SDave Cobbley # 489eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 490eb8dc403SDave Cobbley # result is v2.5. 491eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 492eb8dc403SDave Cobbley 493eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 494eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 495eb8dc403SDave Cobbley dirver_new = spfx + sver 496eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 497eb8dc403SDave Cobbley .split(package)[0] 498eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 499eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 500eb8dc403SDave Cobbley 501eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 502eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 503eb8dc403SDave Cobbley if pupver: 504eb8dc403SDave Cobbley version[1] = pupver 505eb8dc403SDave Cobbley 506eb8dc403SDave Cobbley version_dir = version_dir_new 507eb8dc403SDave Cobbley 508eb8dc403SDave Cobbley return version[1] 509eb8dc403SDave Cobbley 510eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 511eb8dc403SDave Cobbley """ 512eb8dc403SDave Cobbley Match as many patterns as possible such as: 513eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 514eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 515eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 516eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 517eb8dc403SDave Cobbley blktool_4.orig.tar.gz 518eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 519eb8dc403SDave Cobbley unzip552.tar.gz 520eb8dc403SDave Cobbley icu4c-3_6-src.tgz 521eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 522eb8dc403SDave Cobbley gst-fluendo-mp3 523eb8dc403SDave Cobbley """ 524eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 52519323693SBrad Bishop pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 526eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 52719323693SBrad Bishop pn_prefix2 = r"[a-zA-Z]+" 528eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 52919323693SBrad Bishop pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 530eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 53119323693SBrad Bishop pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 532eb8dc403SDave Cobbley 533eb8dc403SDave Cobbley # match version 53419323693SBrad Bishop pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 535eb8dc403SDave Cobbley 536eb8dc403SDave Cobbley # match arch 537eb8dc403SDave Cobbley parch_regex = "-source|_all_" 538eb8dc403SDave Cobbley 539eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 540eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 54119323693SBrad Bishop psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 542eb8dc403SDave Cobbley 543eb8dc403SDave Cobbley # match name, version and archive type of a package 54419323693SBrad Bishop package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 545eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 546eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 547eb8dc403SDave Cobbley 548eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 549eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 550eb8dc403SDave Cobbley if pn_regex: 551eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 552eb8dc403SDave Cobbley else: 553eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 554eb8dc403SDave Cobbley if version: 555eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 55619323693SBrad Bishop r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 557eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 558eb8dc403SDave Cobbley else: 559eb8dc403SDave Cobbley package_custom_regex_comp = None 560eb8dc403SDave Cobbley 561eb8dc403SDave Cobbley return package_custom_regex_comp 562eb8dc403SDave Cobbley 563eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 564eb8dc403SDave Cobbley """ 565eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 566eb8dc403SDave Cobbley 567eb8dc403SDave Cobbley sanity check to ensure same name and type. 568eb8dc403SDave Cobbley """ 569eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 570eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 571eb8dc403SDave Cobbley 572eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 57319323693SBrad Bishop if not re.search(r"\d+", package): 574eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 575eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 576eb8dc403SDave Cobbley return (current_version[1], '') 577eb8dc403SDave Cobbley 578eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 579eb8dc403SDave Cobbley if package_regex is None: 580eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 581eb8dc403SDave Cobbley return ('', '') 582eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 583eb8dc403SDave Cobbley 584eb8dc403SDave Cobbley uri = "" 585eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 586eb8dc403SDave Cobbley if not regex_uri: 587eb8dc403SDave Cobbley path = ud.path.split(package)[0] 588eb8dc403SDave Cobbley 589eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 590eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 59119323693SBrad Bishop dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 592eb8dc403SDave Cobbley m = dirver_regex.search(path) 593eb8dc403SDave Cobbley if m: 594eb8dc403SDave Cobbley pn = d.getVar('PN') 595eb8dc403SDave Cobbley dirver = m.group('dirver') 596eb8dc403SDave Cobbley 59719323693SBrad Bishop dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 598eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 599eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 600eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 601eb8dc403SDave Cobbley 602eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 603eb8dc403SDave Cobbley else: 604eb8dc403SDave Cobbley uri = regex_uri 605eb8dc403SDave Cobbley 606eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 607eb8dc403SDave Cobbley current_version, ud, d), '') 608