1eb8dc403SDave Cobbley# ex:ts=4:sw=4:sts=4:et 2eb8dc403SDave Cobbley# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- 3eb8dc403SDave Cobbley""" 4eb8dc403SDave CobbleyBitBake 'Fetch' implementations 5eb8dc403SDave Cobbley 6eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 7eb8dc403SDave CobbleyBitBake build tools. 8eb8dc403SDave Cobbley 9eb8dc403SDave Cobbley""" 10eb8dc403SDave Cobbley 11eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 12eb8dc403SDave Cobbley# 13eb8dc403SDave Cobbley# This program is free software; you can redistribute it and/or modify 14eb8dc403SDave Cobbley# it under the terms of the GNU General Public License version 2 as 15eb8dc403SDave Cobbley# published by the Free Software Foundation. 16eb8dc403SDave Cobbley# 17eb8dc403SDave Cobbley# This program is distributed in the hope that it will be useful, 18eb8dc403SDave Cobbley# but WITHOUT ANY WARRANTY; without even the implied warranty of 19eb8dc403SDave Cobbley# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20eb8dc403SDave Cobbley# GNU General Public License for more details. 21eb8dc403SDave Cobbley# 22eb8dc403SDave Cobbley# You should have received a copy of the GNU General Public License along 23eb8dc403SDave Cobbley# with this program; if not, write to the Free Software Foundation, Inc., 24eb8dc403SDave Cobbley# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 25eb8dc403SDave Cobbley# 26eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 27eb8dc403SDave Cobbley 28eb8dc403SDave Cobbleyimport re 29eb8dc403SDave Cobbleyimport tempfile 30eb8dc403SDave Cobbleyimport subprocess 31eb8dc403SDave Cobbleyimport os 32eb8dc403SDave Cobbleyimport logging 33eb8dc403SDave Cobbleyimport errno 34eb8dc403SDave Cobbleyimport bb 35eb8dc403SDave Cobbleyimport bb.progress 36*19323693SBrad Bishopimport socket 37*19323693SBrad Bishopimport http.client 38eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 39eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 40eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 41eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 42eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 43*19323693SBrad Bishopfrom bb.fetch2 import FetchConnectionCache 44eb8dc403SDave Cobbleyfrom bb.utils import export_proxies 45eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 46eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 47eb8dc403SDave Cobbley 48eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 49eb8dc403SDave Cobbley """ 50eb8dc403SDave Cobbley Extract progress information from wget output. 51eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 52eb8dc403SDave Cobbley specified on the wget command line. 53eb8dc403SDave Cobbley """ 54eb8dc403SDave Cobbley def __init__(self, d): 55eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 56eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 57eb8dc403SDave Cobbley self._fire_progress(0) 58eb8dc403SDave Cobbley 59eb8dc403SDave Cobbley def writeline(self, line): 60eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 61eb8dc403SDave Cobbley if percs: 62eb8dc403SDave Cobbley progress = int(percs[-1][0]) 63eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 64eb8dc403SDave Cobbley self.update(progress, rate) 65eb8dc403SDave Cobbley return False 66eb8dc403SDave Cobbley return True 67eb8dc403SDave Cobbley 68eb8dc403SDave Cobbley 69eb8dc403SDave Cobbleyclass Wget(FetchMethod): 70eb8dc403SDave Cobbley """Class to fetch urls via 'wget'""" 71eb8dc403SDave Cobbley def supports(self, ud, d): 72eb8dc403SDave Cobbley """ 73eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 74eb8dc403SDave Cobbley """ 75eb8dc403SDave Cobbley return ud.type in ['http', 'https', 'ftp'] 76eb8dc403SDave Cobbley 77eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 78eb8dc403SDave Cobbley return True 79eb8dc403SDave Cobbley 80eb8dc403SDave Cobbley def urldata_init(self, ud, d): 81eb8dc403SDave Cobbley if 'protocol' in ud.parm: 82eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 83eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 84eb8dc403SDave Cobbley 85eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 86eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 87eb8dc403SDave Cobbley else: 88eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 89eb8dc403SDave Cobbley 90eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 91eb8dc403SDave Cobbley if not ud.localfile: 92eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 93eb8dc403SDave Cobbley 94eb8dc403SDave Cobbley self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 95eb8dc403SDave Cobbley 96eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 97eb8dc403SDave Cobbley 98eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 99eb8dc403SDave Cobbley 100eb8dc403SDave Cobbley logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) 101eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 102eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 103eb8dc403SDave Cobbley 104eb8dc403SDave Cobbley def download(self, ud, d): 105eb8dc403SDave Cobbley """Fetch urls""" 106eb8dc403SDave Cobbley 107eb8dc403SDave Cobbley fetchcmd = self.basecmd 108eb8dc403SDave Cobbley 109eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 110eb8dc403SDave Cobbley dldir = d.getVar("DL_DIR") 111eb8dc403SDave Cobbley bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile)) 112eb8dc403SDave Cobbley fetchcmd += " -O " + dldir + os.sep + ud.localfile 113eb8dc403SDave Cobbley 114eb8dc403SDave Cobbley if ud.user and ud.pswd: 115eb8dc403SDave Cobbley fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 116eb8dc403SDave Cobbley 117eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 118eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 119eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 120eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 121eb8dc403SDave Cobbley else: 122eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 123eb8dc403SDave Cobbley 124eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 125eb8dc403SDave Cobbley 126eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 127eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 128eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 129eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 130eb8dc403SDave Cobbley 131eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 132eb8dc403SDave Cobbley os.remove(ud.localpath) 133eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 134eb8dc403SDave Cobbley 135eb8dc403SDave Cobbley return True 136eb8dc403SDave Cobbley 137eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 138eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 139eb8dc403SDave Cobbley if fetch.connection_cache: 140eb8dc403SDave Cobbley def connect(self): 141eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 142eb8dc403SDave Cobbley 143eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 144eb8dc403SDave Cobbley if sock: 145eb8dc403SDave Cobbley self.sock = sock 146eb8dc403SDave Cobbley else: 147eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 148eb8dc403SDave Cobbley self.timeout, self.source_address) 149eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 150eb8dc403SDave Cobbley 151eb8dc403SDave Cobbley if self._tunnel_host: 152eb8dc403SDave Cobbley self._tunnel() 153eb8dc403SDave Cobbley 154eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 155eb8dc403SDave Cobbley def http_open(self, req): 156eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 157eb8dc403SDave Cobbley 158eb8dc403SDave Cobbley def do_open(self, http_class, req): 159eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 160eb8dc403SDave Cobbley 161eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 162eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 163eb8dc403SDave Cobbley has methods and attributes including: 164eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 165eb8dc403SDave Cobbley - geturl(): return the original request URL 166eb8dc403SDave Cobbley - code: HTTP status code 167eb8dc403SDave Cobbley """ 168eb8dc403SDave Cobbley host = req.host 169eb8dc403SDave Cobbley if not host: 170*19323693SBrad Bishop raise urllib.error.URLError('no host given') 171eb8dc403SDave Cobbley 172eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 173eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 174eb8dc403SDave Cobbley 175eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 176eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 177eb8dc403SDave Cobbley if k not in headers)) 178eb8dc403SDave Cobbley 179eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 180eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 181eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 182eb8dc403SDave Cobbley # which will block while the server waits for the next request. 183eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 184eb8dc403SDave Cobbley # request. 185eb8dc403SDave Cobbley 186eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 187eb8dc403SDave Cobbley if fetch.connection_cache is None: 188eb8dc403SDave Cobbley headers["Connection"] = "close" 189eb8dc403SDave Cobbley else: 190eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 191eb8dc403SDave Cobbley 192eb8dc403SDave Cobbley headers = dict( 193eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 194eb8dc403SDave Cobbley 195eb8dc403SDave Cobbley if req._tunnel_host: 196eb8dc403SDave Cobbley tunnel_headers = {} 197eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 198eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 199eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 200eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 201eb8dc403SDave Cobbley # server. 202eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 203eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 204eb8dc403SDave Cobbley 205eb8dc403SDave Cobbley try: 206eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 207eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 208eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 209eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 210eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 211eb8dc403SDave Cobbley # them from the cache. 212eb8dc403SDave Cobbley if fetch.connection_cache is None: 213eb8dc403SDave Cobbley h.close() 214eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 215eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 216eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 217eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 218eb8dc403SDave Cobbley # We let the request fail and expect it to be 219eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 220eb8dc403SDave Cobbley # with the dead connection removed from the cache. 221eb8dc403SDave Cobbley # If it still fails, we give up, which can happend for bad 222eb8dc403SDave Cobbley # HTTP proxy settings. 223eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 224eb8dc403SDave Cobbley raise urllib.error.URLError(err) 225eb8dc403SDave Cobbley else: 226eb8dc403SDave Cobbley try: 227eb8dc403SDave Cobbley r = h.getresponse(buffering=True) 228eb8dc403SDave Cobbley except TypeError: # buffering kw not supported 229eb8dc403SDave Cobbley r = h.getresponse() 230eb8dc403SDave Cobbley 231eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 232eb8dc403SDave Cobbley # object initialized properly. 233eb8dc403SDave Cobbley 234eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 235eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 236eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 237eb8dc403SDave Cobbley # have readline() and readlines() methods. 238eb8dc403SDave Cobbley 239eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 240eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 241eb8dc403SDave Cobbley r.recv = r.read 242eb8dc403SDave Cobbley 243eb8dc403SDave Cobbley # no data, just have to read 244eb8dc403SDave Cobbley r.read() 245eb8dc403SDave Cobbley class fp_dummy(object): 246eb8dc403SDave Cobbley def read(self): 247eb8dc403SDave Cobbley return "" 248eb8dc403SDave Cobbley def readline(self): 249eb8dc403SDave Cobbley return "" 250eb8dc403SDave Cobbley def close(self): 251eb8dc403SDave Cobbley pass 252eb8dc403SDave Cobbley closed = False 253eb8dc403SDave Cobbley 254*19323693SBrad Bishop resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 255eb8dc403SDave Cobbley resp.code = r.status 256eb8dc403SDave Cobbley resp.msg = r.reason 257eb8dc403SDave Cobbley 258eb8dc403SDave Cobbley # Close connection when server request it. 259eb8dc403SDave Cobbley if fetch.connection_cache is not None: 260eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 261eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 262eb8dc403SDave Cobbley 263eb8dc403SDave Cobbley return resp 264eb8dc403SDave Cobbley 265eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 266eb8dc403SDave Cobbley """ 267eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 268eb8dc403SDave Cobbley """ 269eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 270eb8dc403SDave Cobbley fp.read() 271eb8dc403SDave Cobbley fp.close() 272eb8dc403SDave Cobbley 273eb8dc403SDave Cobbley newheaders = dict((k, v) for k, v in list(req.headers.items()) 274eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 275eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 276eb8dc403SDave Cobbley headers=newheaders, 277eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 278eb8dc403SDave Cobbley unverifiable=True)) 279eb8dc403SDave Cobbley 280*19323693SBrad Bishop 281*19323693SBrad Bishop # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 282*19323693SBrad Bishop # Forbidden when they actually mean 405 Method Not Allowed. 283eb8dc403SDave Cobbley http_error_403 = http_error_405 284eb8dc403SDave Cobbley 285eb8dc403SDave Cobbley 286eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 287eb8dc403SDave Cobbley """ 288eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 289eb8dc403SDave Cobbley when we want to follow redirects using the original method. 290eb8dc403SDave Cobbley """ 291eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 292eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 293*19323693SBrad Bishop newreq.get_method = req.get_method 294eb8dc403SDave Cobbley return newreq 295eb8dc403SDave Cobbley exported_proxies = export_proxies(d) 296eb8dc403SDave Cobbley 297eb8dc403SDave Cobbley handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 298*19323693SBrad Bishop if exported_proxies: 299eb8dc403SDave Cobbley handlers.append(urllib.request.ProxyHandler()) 300eb8dc403SDave Cobbley handlers.append(CacheHTTPHandler()) 301*19323693SBrad Bishop # Since Python 2.7.9 ssl cert validation is enabled by default 302eb8dc403SDave Cobbley # see PEP-0476, this causes verification errors on some https servers 303eb8dc403SDave Cobbley # so disable by default. 304eb8dc403SDave Cobbley import ssl 305eb8dc403SDave Cobbley if hasattr(ssl, '_create_unverified_context'): 306eb8dc403SDave Cobbley handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 307eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 308eb8dc403SDave Cobbley 309eb8dc403SDave Cobbley try: 310eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 311eb8dc403SDave Cobbley r = urllib.request.Request(uri) 312eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 313eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 314eb8dc403SDave Cobbley # optional Accept header is set. 315eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 316eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 317eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 318eb8dc403SDave Cobbley import base64 319eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 320eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 321eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 322eb8dc403SDave Cobbley 323*19323693SBrad Bishop if ud.user and ud.pswd: 324*19323693SBrad Bishop add_basic_auth(ud.user + ':' + ud.pswd, r) 325eb8dc403SDave Cobbley 326eb8dc403SDave Cobbley try: 327*19323693SBrad Bishop import netrc 328eb8dc403SDave Cobbley n = netrc.netrc() 329eb8dc403SDave Cobbley login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 330eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 331eb8dc403SDave Cobbley except (TypeError, ImportError, IOError, netrc.NetrcParseError): 332eb8dc403SDave Cobbley pass 333eb8dc403SDave Cobbley 334eb8dc403SDave Cobbley with opener.open(r) as response: 335eb8dc403SDave Cobbley pass 336eb8dc403SDave Cobbley except urllib.error.URLError as e: 337eb8dc403SDave Cobbley if try_again: 338eb8dc403SDave Cobbley logger.debug(2, "checkstatus: trying again") 339eb8dc403SDave Cobbley return self.checkstatus(fetch, ud, d, False) 340eb8dc403SDave Cobbley else: 341eb8dc403SDave Cobbley # debug for now to avoid spamming the logs in e.g. remote sstate searches 342eb8dc403SDave Cobbley logger.debug(2, "checkstatus() urlopen failed: %s" % e) 343eb8dc403SDave Cobbley return False 344eb8dc403SDave Cobbley return True 345eb8dc403SDave Cobbley 346eb8dc403SDave Cobbley def _parse_path(self, regex, s): 347eb8dc403SDave Cobbley """ 348eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 349eb8dc403SDave Cobbley """ 350eb8dc403SDave Cobbley 351eb8dc403SDave Cobbley m = regex.search(s) 352eb8dc403SDave Cobbley if m: 353eb8dc403SDave Cobbley pname = '' 354eb8dc403SDave Cobbley pver = '' 355eb8dc403SDave Cobbley ptype = '' 356eb8dc403SDave Cobbley 357eb8dc403SDave Cobbley mdict = m.groupdict() 358eb8dc403SDave Cobbley if 'name' in mdict.keys(): 359eb8dc403SDave Cobbley pname = mdict['name'] 360eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 361eb8dc403SDave Cobbley pver = mdict['pver'] 362eb8dc403SDave Cobbley if 'type' in mdict.keys(): 363eb8dc403SDave Cobbley ptype = mdict['type'] 364eb8dc403SDave Cobbley 365eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 366eb8dc403SDave Cobbley 367eb8dc403SDave Cobbley return (pname, pver, ptype) 368eb8dc403SDave Cobbley 369eb8dc403SDave Cobbley return None 370eb8dc403SDave Cobbley 371eb8dc403SDave Cobbley def _modelate_version(self, version): 372eb8dc403SDave Cobbley if version[0] in ['.', '-']: 373eb8dc403SDave Cobbley if version[1].isdigit(): 374eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 375eb8dc403SDave Cobbley else: 376eb8dc403SDave Cobbley version = version[1:len(version)] 377eb8dc403SDave Cobbley 378eb8dc403SDave Cobbley version = re.sub('-', '.', version) 379eb8dc403SDave Cobbley version = re.sub('_', '.', version) 380eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 381eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 382eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 383eb8dc403SDave Cobbley if version[0] == 'v': 384eb8dc403SDave Cobbley version = version[1:len(version)] 385eb8dc403SDave Cobbley return version 386eb8dc403SDave Cobbley 387eb8dc403SDave Cobbley def _vercmp(self, old, new): 388eb8dc403SDave Cobbley """ 389eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 390eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 391eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 392eb8dc403SDave Cobbley """ 393eb8dc403SDave Cobbley 394eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 395eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 396eb8dc403SDave Cobbley 397*19323693SBrad Bishop # Check for a new suffix type that we have never heard of before 398*19323693SBrad Bishop if newsuffix: 399eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 400eb8dc403SDave Cobbley if not m: 401eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 402eb8dc403SDave Cobbley return False 403eb8dc403SDave Cobbley 404*19323693SBrad Bishop # Not our package so ignore it 405eb8dc403SDave Cobbley if oldpn != newpn: 406eb8dc403SDave Cobbley return False 407eb8dc403SDave Cobbley 408eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 409eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 410eb8dc403SDave Cobbley 411eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 412eb8dc403SDave Cobbley 413eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 414eb8dc403SDave Cobbley """ 415eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 416eb8dc403SDave Cobbley """ 417eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 418eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 419eb8dc403SDave Cobbley agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" 420eb8dc403SDave Cobbley fetchcmd = self.basecmd 421eb8dc403SDave Cobbley fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" 422eb8dc403SDave Cobbley try: 423eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 424eb8dc403SDave Cobbley fetchresult = f.read() 425eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 426eb8dc403SDave Cobbley fetchresult = "" 427eb8dc403SDave Cobbley 428eb8dc403SDave Cobbley return fetchresult 429eb8dc403SDave Cobbley 430eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 431eb8dc403SDave Cobbley """ 432eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 433eb8dc403SDave Cobbley If error or no version, return "" 434eb8dc403SDave Cobbley """ 435eb8dc403SDave Cobbley valid = 0 436eb8dc403SDave Cobbley version = ['', '', ''] 437eb8dc403SDave Cobbley 438eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 439eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 440eb8dc403SDave Cobbley if not soup: 441eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 442eb8dc403SDave Cobbley return "" 443eb8dc403SDave Cobbley 444eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 445eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 446eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 447eb8dc403SDave Cobbley 448eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 449eb8dc403SDave Cobbley if not newver: 450eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 451eb8dc403SDave Cobbley 452eb8dc403SDave Cobbley if newver: 453eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 454eb8dc403SDave Cobbley if valid == 0: 455eb8dc403SDave Cobbley version = newver 456eb8dc403SDave Cobbley valid = 1 457eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 458eb8dc403SDave Cobbley version = newver 459eb8dc403SDave Cobbley 460eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 461eb8dc403SDave Cobbley 462eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 463eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 464eb8dc403SDave Cobbley 465eb8dc403SDave Cobbley if valid: 466eb8dc403SDave Cobbley return pupver 467eb8dc403SDave Cobbley 468eb8dc403SDave Cobbley return "" 469eb8dc403SDave Cobbley 470*19323693SBrad Bishop def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 471eb8dc403SDave Cobbley """ 472eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 473eb8dc403SDave Cobbley """ 474eb8dc403SDave Cobbley version_dir = ['', '', ''] 475eb8dc403SDave Cobbley version = ['', '', ''] 476eb8dc403SDave Cobbley 477*19323693SBrad Bishop dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 478eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 479eb8dc403SDave Cobbley if s: 480eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 481eb8dc403SDave Cobbley else: 482eb8dc403SDave Cobbley version_dir[1] = dirver 483eb8dc403SDave Cobbley 484eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 485eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 486eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 487eb8dc403SDave Cobbley 488eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 489eb8dc403SDave Cobbley if not soup: 490eb8dc403SDave Cobbley return version[1] 491eb8dc403SDave Cobbley 492eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 493eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 494eb8dc403SDave Cobbley if s: 495eb8dc403SDave Cobbley sver = s.group('ver') 496eb8dc403SDave Cobbley 497eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 498eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 499eb8dc403SDave Cobbley # directories if exists. 500eb8dc403SDave Cobbley # 501eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 502eb8dc403SDave Cobbley # result is v2.5. 503eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 504eb8dc403SDave Cobbley 505eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 506eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 507eb8dc403SDave Cobbley dirver_new = spfx + sver 508eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 509eb8dc403SDave Cobbley .split(package)[0] 510eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 511eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 512eb8dc403SDave Cobbley 513eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 514eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 515eb8dc403SDave Cobbley if pupver: 516eb8dc403SDave Cobbley version[1] = pupver 517eb8dc403SDave Cobbley 518eb8dc403SDave Cobbley version_dir = version_dir_new 519eb8dc403SDave Cobbley 520eb8dc403SDave Cobbley return version[1] 521eb8dc403SDave Cobbley 522eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 523eb8dc403SDave Cobbley """ 524eb8dc403SDave Cobbley Match as many patterns as possible such as: 525eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 526eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 527eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 528eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 529eb8dc403SDave Cobbley blktool_4.orig.tar.gz 530eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 531eb8dc403SDave Cobbley unzip552.tar.gz 532eb8dc403SDave Cobbley icu4c-3_6-src.tgz 533eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 534eb8dc403SDave Cobbley gst-fluendo-mp3 535eb8dc403SDave Cobbley """ 536eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 537*19323693SBrad Bishop pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 538eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 539*19323693SBrad Bishop pn_prefix2 = r"[a-zA-Z]+" 540eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 541*19323693SBrad Bishop pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 542eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 543*19323693SBrad Bishop pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 544eb8dc403SDave Cobbley 545eb8dc403SDave Cobbley # match version 546*19323693SBrad Bishop pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 547eb8dc403SDave Cobbley 548eb8dc403SDave Cobbley # match arch 549eb8dc403SDave Cobbley parch_regex = "-source|_all_" 550eb8dc403SDave Cobbley 551eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 552eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 553*19323693SBrad Bishop psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 554eb8dc403SDave Cobbley 555eb8dc403SDave Cobbley # match name, version and archive type of a package 556*19323693SBrad Bishop package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 557eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 558eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 559eb8dc403SDave Cobbley 560eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 561eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 562eb8dc403SDave Cobbley if pn_regex: 563eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 564eb8dc403SDave Cobbley else: 565eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 566eb8dc403SDave Cobbley if version: 567eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 568*19323693SBrad Bishop r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 569eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 570eb8dc403SDave Cobbley else: 571eb8dc403SDave Cobbley package_custom_regex_comp = None 572eb8dc403SDave Cobbley 573eb8dc403SDave Cobbley return package_custom_regex_comp 574eb8dc403SDave Cobbley 575eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 576eb8dc403SDave Cobbley """ 577eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 578eb8dc403SDave Cobbley 579eb8dc403SDave Cobbley sanity check to ensure same name and type. 580eb8dc403SDave Cobbley """ 581eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 582eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 583eb8dc403SDave Cobbley 584eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 585*19323693SBrad Bishop if not re.search(r"\d+", package): 586eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 587eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 588eb8dc403SDave Cobbley return (current_version[1], '') 589eb8dc403SDave Cobbley 590eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 591eb8dc403SDave Cobbley if package_regex is None: 592eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 593eb8dc403SDave Cobbley return ('', '') 594eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 595eb8dc403SDave Cobbley 596eb8dc403SDave Cobbley uri = "" 597eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 598eb8dc403SDave Cobbley if not regex_uri: 599eb8dc403SDave Cobbley path = ud.path.split(package)[0] 600eb8dc403SDave Cobbley 601eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 602eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 603*19323693SBrad Bishop dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 604eb8dc403SDave Cobbley m = dirver_regex.search(path) 605eb8dc403SDave Cobbley if m: 606eb8dc403SDave Cobbley pn = d.getVar('PN') 607eb8dc403SDave Cobbley dirver = m.group('dirver') 608eb8dc403SDave Cobbley 609*19323693SBrad Bishop dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 610eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 611eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 612eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 613eb8dc403SDave Cobbley 614eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 615eb8dc403SDave Cobbley else: 616eb8dc403SDave Cobbley uri = regex_uri 617eb8dc403SDave Cobbley 618eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 619eb8dc403SDave Cobbley current_version, ud, d), '') 620