1*eb8dc403SDave Cobbley# ex:ts=4:sw=4:sts=4:et 2*eb8dc403SDave Cobbley# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- 3*eb8dc403SDave Cobbley""" 4*eb8dc403SDave CobbleyBitBake 'Fetch' implementations 5*eb8dc403SDave Cobbley 6*eb8dc403SDave CobbleyClasses for obtaining upstream sources for the 7*eb8dc403SDave CobbleyBitBake build tools. 8*eb8dc403SDave Cobbley 9*eb8dc403SDave Cobbley""" 10*eb8dc403SDave Cobbley 11*eb8dc403SDave Cobbley# Copyright (C) 2003, 2004 Chris Larson 12*eb8dc403SDave Cobbley# 13*eb8dc403SDave Cobbley# This program is free software; you can redistribute it and/or modify 14*eb8dc403SDave Cobbley# it under the terms of the GNU General Public License version 2 as 15*eb8dc403SDave Cobbley# published by the Free Software Foundation. 16*eb8dc403SDave Cobbley# 17*eb8dc403SDave Cobbley# This program is distributed in the hope that it will be useful, 18*eb8dc403SDave Cobbley# but WITHOUT ANY WARRANTY; without even the implied warranty of 19*eb8dc403SDave Cobbley# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20*eb8dc403SDave Cobbley# GNU General Public License for more details. 21*eb8dc403SDave Cobbley# 22*eb8dc403SDave Cobbley# You should have received a copy of the GNU General Public License along 23*eb8dc403SDave Cobbley# with this program; if not, write to the Free Software Foundation, Inc., 24*eb8dc403SDave Cobbley# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 25*eb8dc403SDave Cobbley# 26*eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig 27*eb8dc403SDave Cobbley 28*eb8dc403SDave Cobbleyimport re 29*eb8dc403SDave Cobbleyimport tempfile 30*eb8dc403SDave Cobbleyimport subprocess 31*eb8dc403SDave Cobbleyimport os 32*eb8dc403SDave Cobbleyimport logging 33*eb8dc403SDave Cobbleyimport errno 34*eb8dc403SDave Cobbleyimport bb 35*eb8dc403SDave Cobbleyimport bb.progress 36*eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error 37*eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchMethod 38*eb8dc403SDave Cobbleyfrom bb.fetch2 import FetchError 39*eb8dc403SDave Cobbleyfrom bb.fetch2 import logger 40*eb8dc403SDave Cobbleyfrom bb.fetch2 import runfetchcmd 41*eb8dc403SDave Cobbleyfrom bb.utils import export_proxies 42*eb8dc403SDave Cobbleyfrom bs4 import BeautifulSoup 43*eb8dc403SDave Cobbleyfrom bs4 import SoupStrainer 44*eb8dc403SDave Cobbley 45*eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler): 46*eb8dc403SDave Cobbley """ 47*eb8dc403SDave Cobbley Extract progress information from wget output. 48*eb8dc403SDave Cobbley Note: relies on --progress=dot (with -v or without -q/-nv) being 49*eb8dc403SDave Cobbley specified on the wget command line. 50*eb8dc403SDave Cobbley """ 51*eb8dc403SDave Cobbley def __init__(self, d): 52*eb8dc403SDave Cobbley super(WgetProgressHandler, self).__init__(d) 53*eb8dc403SDave Cobbley # Send an initial progress event so the bar gets shown 54*eb8dc403SDave Cobbley self._fire_progress(0) 55*eb8dc403SDave Cobbley 56*eb8dc403SDave Cobbley def writeline(self, line): 57*eb8dc403SDave Cobbley percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 58*eb8dc403SDave Cobbley if percs: 59*eb8dc403SDave Cobbley progress = int(percs[-1][0]) 60*eb8dc403SDave Cobbley rate = percs[-1][1] + '/s' 61*eb8dc403SDave Cobbley self.update(progress, rate) 62*eb8dc403SDave Cobbley return False 63*eb8dc403SDave Cobbley return True 64*eb8dc403SDave Cobbley 65*eb8dc403SDave Cobbley 66*eb8dc403SDave Cobbleyclass Wget(FetchMethod): 67*eb8dc403SDave Cobbley """Class to fetch urls via 'wget'""" 68*eb8dc403SDave Cobbley def supports(self, ud, d): 69*eb8dc403SDave Cobbley """ 70*eb8dc403SDave Cobbley Check to see if a given url can be fetched with wget. 71*eb8dc403SDave Cobbley """ 72*eb8dc403SDave Cobbley return ud.type in ['http', 'https', 'ftp'] 73*eb8dc403SDave Cobbley 74*eb8dc403SDave Cobbley def recommends_checksum(self, urldata): 75*eb8dc403SDave Cobbley return True 76*eb8dc403SDave Cobbley 77*eb8dc403SDave Cobbley def urldata_init(self, ud, d): 78*eb8dc403SDave Cobbley if 'protocol' in ud.parm: 79*eb8dc403SDave Cobbley if ud.parm['protocol'] == 'git': 80*eb8dc403SDave Cobbley raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 81*eb8dc403SDave Cobbley 82*eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 83*eb8dc403SDave Cobbley ud.basename = ud.parm['downloadfilename'] 84*eb8dc403SDave Cobbley else: 85*eb8dc403SDave Cobbley ud.basename = os.path.basename(ud.path) 86*eb8dc403SDave Cobbley 87*eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 88*eb8dc403SDave Cobbley if not ud.localfile: 89*eb8dc403SDave Cobbley ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 90*eb8dc403SDave Cobbley 91*eb8dc403SDave Cobbley self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 92*eb8dc403SDave Cobbley 93*eb8dc403SDave Cobbley def _runwget(self, ud, d, command, quiet, workdir=None): 94*eb8dc403SDave Cobbley 95*eb8dc403SDave Cobbley progresshandler = WgetProgressHandler(d) 96*eb8dc403SDave Cobbley 97*eb8dc403SDave Cobbley logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) 98*eb8dc403SDave Cobbley bb.fetch2.check_network_access(d, command, ud.url) 99*eb8dc403SDave Cobbley runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 100*eb8dc403SDave Cobbley 101*eb8dc403SDave Cobbley def download(self, ud, d): 102*eb8dc403SDave Cobbley """Fetch urls""" 103*eb8dc403SDave Cobbley 104*eb8dc403SDave Cobbley fetchcmd = self.basecmd 105*eb8dc403SDave Cobbley 106*eb8dc403SDave Cobbley if 'downloadfilename' in ud.parm: 107*eb8dc403SDave Cobbley dldir = d.getVar("DL_DIR") 108*eb8dc403SDave Cobbley bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile)) 109*eb8dc403SDave Cobbley fetchcmd += " -O " + dldir + os.sep + ud.localfile 110*eb8dc403SDave Cobbley 111*eb8dc403SDave Cobbley if ud.user and ud.pswd: 112*eb8dc403SDave Cobbley fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 113*eb8dc403SDave Cobbley 114*eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 115*eb8dc403SDave Cobbley if os.path.exists(ud.localpath): 116*eb8dc403SDave Cobbley # file exists, but we didnt complete it.. trying again.. 117*eb8dc403SDave Cobbley fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 118*eb8dc403SDave Cobbley else: 119*eb8dc403SDave Cobbley fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 120*eb8dc403SDave Cobbley 121*eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, False) 122*eb8dc403SDave Cobbley 123*eb8dc403SDave Cobbley # Sanity check since wget can pretend it succeed when it didn't 124*eb8dc403SDave Cobbley # Also, this used to happen if sourceforge sent us to the mirror page 125*eb8dc403SDave Cobbley if not os.path.exists(ud.localpath): 126*eb8dc403SDave Cobbley raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 127*eb8dc403SDave Cobbley 128*eb8dc403SDave Cobbley if os.path.getsize(ud.localpath) == 0: 129*eb8dc403SDave Cobbley os.remove(ud.localpath) 130*eb8dc403SDave Cobbley raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 131*eb8dc403SDave Cobbley 132*eb8dc403SDave Cobbley return True 133*eb8dc403SDave Cobbley 134*eb8dc403SDave Cobbley def checkstatus(self, fetch, ud, d, try_again=True): 135*eb8dc403SDave Cobbley import urllib.request, urllib.error, urllib.parse, socket, http.client 136*eb8dc403SDave Cobbley from urllib.response import addinfourl 137*eb8dc403SDave Cobbley from bb.fetch2 import FetchConnectionCache 138*eb8dc403SDave Cobbley 139*eb8dc403SDave Cobbley class HTTPConnectionCache(http.client.HTTPConnection): 140*eb8dc403SDave Cobbley if fetch.connection_cache: 141*eb8dc403SDave Cobbley def connect(self): 142*eb8dc403SDave Cobbley """Connect to the host and port specified in __init__.""" 143*eb8dc403SDave Cobbley 144*eb8dc403SDave Cobbley sock = fetch.connection_cache.get_connection(self.host, self.port) 145*eb8dc403SDave Cobbley if sock: 146*eb8dc403SDave Cobbley self.sock = sock 147*eb8dc403SDave Cobbley else: 148*eb8dc403SDave Cobbley self.sock = socket.create_connection((self.host, self.port), 149*eb8dc403SDave Cobbley self.timeout, self.source_address) 150*eb8dc403SDave Cobbley fetch.connection_cache.add_connection(self.host, self.port, self.sock) 151*eb8dc403SDave Cobbley 152*eb8dc403SDave Cobbley if self._tunnel_host: 153*eb8dc403SDave Cobbley self._tunnel() 154*eb8dc403SDave Cobbley 155*eb8dc403SDave Cobbley class CacheHTTPHandler(urllib.request.HTTPHandler): 156*eb8dc403SDave Cobbley def http_open(self, req): 157*eb8dc403SDave Cobbley return self.do_open(HTTPConnectionCache, req) 158*eb8dc403SDave Cobbley 159*eb8dc403SDave Cobbley def do_open(self, http_class, req): 160*eb8dc403SDave Cobbley """Return an addinfourl object for the request, using http_class. 161*eb8dc403SDave Cobbley 162*eb8dc403SDave Cobbley http_class must implement the HTTPConnection API from httplib. 163*eb8dc403SDave Cobbley The addinfourl return value is a file-like object. It also 164*eb8dc403SDave Cobbley has methods and attributes including: 165*eb8dc403SDave Cobbley - info(): return a mimetools.Message object for the headers 166*eb8dc403SDave Cobbley - geturl(): return the original request URL 167*eb8dc403SDave Cobbley - code: HTTP status code 168*eb8dc403SDave Cobbley """ 169*eb8dc403SDave Cobbley host = req.host 170*eb8dc403SDave Cobbley if not host: 171*eb8dc403SDave Cobbley raise urlllib2.URLError('no host given') 172*eb8dc403SDave Cobbley 173*eb8dc403SDave Cobbley h = http_class(host, timeout=req.timeout) # will parse host:port 174*eb8dc403SDave Cobbley h.set_debuglevel(self._debuglevel) 175*eb8dc403SDave Cobbley 176*eb8dc403SDave Cobbley headers = dict(req.unredirected_hdrs) 177*eb8dc403SDave Cobbley headers.update(dict((k, v) for k, v in list(req.headers.items()) 178*eb8dc403SDave Cobbley if k not in headers)) 179*eb8dc403SDave Cobbley 180*eb8dc403SDave Cobbley # We want to make an HTTP/1.1 request, but the addinfourl 181*eb8dc403SDave Cobbley # class isn't prepared to deal with a persistent connection. 182*eb8dc403SDave Cobbley # It will try to read all remaining data from the socket, 183*eb8dc403SDave Cobbley # which will block while the server waits for the next request. 184*eb8dc403SDave Cobbley # So make sure the connection gets closed after the (only) 185*eb8dc403SDave Cobbley # request. 186*eb8dc403SDave Cobbley 187*eb8dc403SDave Cobbley # Don't close connection when connection_cache is enabled, 188*eb8dc403SDave Cobbley if fetch.connection_cache is None: 189*eb8dc403SDave Cobbley headers["Connection"] = "close" 190*eb8dc403SDave Cobbley else: 191*eb8dc403SDave Cobbley headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 192*eb8dc403SDave Cobbley 193*eb8dc403SDave Cobbley headers = dict( 194*eb8dc403SDave Cobbley (name.title(), val) for name, val in list(headers.items())) 195*eb8dc403SDave Cobbley 196*eb8dc403SDave Cobbley if req._tunnel_host: 197*eb8dc403SDave Cobbley tunnel_headers = {} 198*eb8dc403SDave Cobbley proxy_auth_hdr = "Proxy-Authorization" 199*eb8dc403SDave Cobbley if proxy_auth_hdr in headers: 200*eb8dc403SDave Cobbley tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 201*eb8dc403SDave Cobbley # Proxy-Authorization should not be sent to origin 202*eb8dc403SDave Cobbley # server. 203*eb8dc403SDave Cobbley del headers[proxy_auth_hdr] 204*eb8dc403SDave Cobbley h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 205*eb8dc403SDave Cobbley 206*eb8dc403SDave Cobbley try: 207*eb8dc403SDave Cobbley h.request(req.get_method(), req.selector, req.data, headers) 208*eb8dc403SDave Cobbley except socket.error as err: # XXX what error? 209*eb8dc403SDave Cobbley # Don't close connection when cache is enabled. 210*eb8dc403SDave Cobbley # Instead, try to detect connections that are no longer 211*eb8dc403SDave Cobbley # usable (for example, closed unexpectedly) and remove 212*eb8dc403SDave Cobbley # them from the cache. 213*eb8dc403SDave Cobbley if fetch.connection_cache is None: 214*eb8dc403SDave Cobbley h.close() 215*eb8dc403SDave Cobbley elif isinstance(err, OSError) and err.errno == errno.EBADF: 216*eb8dc403SDave Cobbley # This happens when the server closes the connection despite the Keep-Alive. 217*eb8dc403SDave Cobbley # Apparently urllib then uses the file descriptor, expecting it to be 218*eb8dc403SDave Cobbley # connected, when in reality the connection is already gone. 219*eb8dc403SDave Cobbley # We let the request fail and expect it to be 220*eb8dc403SDave Cobbley # tried once more ("try_again" in check_status()), 221*eb8dc403SDave Cobbley # with the dead connection removed from the cache. 222*eb8dc403SDave Cobbley # If it still fails, we give up, which can happend for bad 223*eb8dc403SDave Cobbley # HTTP proxy settings. 224*eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 225*eb8dc403SDave Cobbley raise urllib.error.URLError(err) 226*eb8dc403SDave Cobbley else: 227*eb8dc403SDave Cobbley try: 228*eb8dc403SDave Cobbley r = h.getresponse(buffering=True) 229*eb8dc403SDave Cobbley except TypeError: # buffering kw not supported 230*eb8dc403SDave Cobbley r = h.getresponse() 231*eb8dc403SDave Cobbley 232*eb8dc403SDave Cobbley # Pick apart the HTTPResponse object to get the addinfourl 233*eb8dc403SDave Cobbley # object initialized properly. 234*eb8dc403SDave Cobbley 235*eb8dc403SDave Cobbley # Wrap the HTTPResponse object in socket's file object adapter 236*eb8dc403SDave Cobbley # for Windows. That adapter calls recv(), so delegate recv() 237*eb8dc403SDave Cobbley # to read(). This weird wrapping allows the returned object to 238*eb8dc403SDave Cobbley # have readline() and readlines() methods. 239*eb8dc403SDave Cobbley 240*eb8dc403SDave Cobbley # XXX It might be better to extract the read buffering code 241*eb8dc403SDave Cobbley # out of socket._fileobject() and into a base class. 242*eb8dc403SDave Cobbley r.recv = r.read 243*eb8dc403SDave Cobbley 244*eb8dc403SDave Cobbley # no data, just have to read 245*eb8dc403SDave Cobbley r.read() 246*eb8dc403SDave Cobbley class fp_dummy(object): 247*eb8dc403SDave Cobbley def read(self): 248*eb8dc403SDave Cobbley return "" 249*eb8dc403SDave Cobbley def readline(self): 250*eb8dc403SDave Cobbley return "" 251*eb8dc403SDave Cobbley def close(self): 252*eb8dc403SDave Cobbley pass 253*eb8dc403SDave Cobbley closed = False 254*eb8dc403SDave Cobbley 255*eb8dc403SDave Cobbley resp = addinfourl(fp_dummy(), r.msg, req.get_full_url()) 256*eb8dc403SDave Cobbley resp.code = r.status 257*eb8dc403SDave Cobbley resp.msg = r.reason 258*eb8dc403SDave Cobbley 259*eb8dc403SDave Cobbley # Close connection when server request it. 260*eb8dc403SDave Cobbley if fetch.connection_cache is not None: 261*eb8dc403SDave Cobbley if 'Connection' in r.msg and r.msg['Connection'] == 'close': 262*eb8dc403SDave Cobbley fetch.connection_cache.remove_connection(h.host, h.port) 263*eb8dc403SDave Cobbley 264*eb8dc403SDave Cobbley return resp 265*eb8dc403SDave Cobbley 266*eb8dc403SDave Cobbley class HTTPMethodFallback(urllib.request.BaseHandler): 267*eb8dc403SDave Cobbley """ 268*eb8dc403SDave Cobbley Fallback to GET if HEAD is not allowed (405 HTTP error) 269*eb8dc403SDave Cobbley """ 270*eb8dc403SDave Cobbley def http_error_405(self, req, fp, code, msg, headers): 271*eb8dc403SDave Cobbley fp.read() 272*eb8dc403SDave Cobbley fp.close() 273*eb8dc403SDave Cobbley 274*eb8dc403SDave Cobbley newheaders = dict((k,v) for k,v in list(req.headers.items()) 275*eb8dc403SDave Cobbley if k.lower() not in ("content-length", "content-type")) 276*eb8dc403SDave Cobbley return self.parent.open(urllib.request.Request(req.get_full_url(), 277*eb8dc403SDave Cobbley headers=newheaders, 278*eb8dc403SDave Cobbley origin_req_host=req.origin_req_host, 279*eb8dc403SDave Cobbley unverifiable=True)) 280*eb8dc403SDave Cobbley 281*eb8dc403SDave Cobbley """ 282*eb8dc403SDave Cobbley Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 283*eb8dc403SDave Cobbley Forbidden when they actually mean 405 Method Not Allowed. 284*eb8dc403SDave Cobbley """ 285*eb8dc403SDave Cobbley http_error_403 = http_error_405 286*eb8dc403SDave Cobbley 287*eb8dc403SDave Cobbley 288*eb8dc403SDave Cobbley class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 289*eb8dc403SDave Cobbley """ 290*eb8dc403SDave Cobbley urllib2.HTTPRedirectHandler resets the method to GET on redirect, 291*eb8dc403SDave Cobbley when we want to follow redirects using the original method. 292*eb8dc403SDave Cobbley """ 293*eb8dc403SDave Cobbley def redirect_request(self, req, fp, code, msg, headers, newurl): 294*eb8dc403SDave Cobbley newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 295*eb8dc403SDave Cobbley newreq.get_method = lambda: req.get_method() 296*eb8dc403SDave Cobbley return newreq 297*eb8dc403SDave Cobbley exported_proxies = export_proxies(d) 298*eb8dc403SDave Cobbley 299*eb8dc403SDave Cobbley handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 300*eb8dc403SDave Cobbley if export_proxies: 301*eb8dc403SDave Cobbley handlers.append(urllib.request.ProxyHandler()) 302*eb8dc403SDave Cobbley handlers.append(CacheHTTPHandler()) 303*eb8dc403SDave Cobbley # XXX: Since Python 2.7.9 ssl cert validation is enabled by default 304*eb8dc403SDave Cobbley # see PEP-0476, this causes verification errors on some https servers 305*eb8dc403SDave Cobbley # so disable by default. 306*eb8dc403SDave Cobbley import ssl 307*eb8dc403SDave Cobbley if hasattr(ssl, '_create_unverified_context'): 308*eb8dc403SDave Cobbley handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 309*eb8dc403SDave Cobbley opener = urllib.request.build_opener(*handlers) 310*eb8dc403SDave Cobbley 311*eb8dc403SDave Cobbley try: 312*eb8dc403SDave Cobbley uri = ud.url.split(";")[0] 313*eb8dc403SDave Cobbley r = urllib.request.Request(uri) 314*eb8dc403SDave Cobbley r.get_method = lambda: "HEAD" 315*eb8dc403SDave Cobbley # Some servers (FusionForge, as used on Alioth) require that the 316*eb8dc403SDave Cobbley # optional Accept header is set. 317*eb8dc403SDave Cobbley r.add_header("Accept", "*/*") 318*eb8dc403SDave Cobbley def add_basic_auth(login_str, request): 319*eb8dc403SDave Cobbley '''Adds Basic auth to http request, pass in login:password as string''' 320*eb8dc403SDave Cobbley import base64 321*eb8dc403SDave Cobbley encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 322*eb8dc403SDave Cobbley authheader = "Basic %s" % encodeuser 323*eb8dc403SDave Cobbley r.add_header("Authorization", authheader) 324*eb8dc403SDave Cobbley 325*eb8dc403SDave Cobbley if ud.user: 326*eb8dc403SDave Cobbley add_basic_auth(ud.user, r) 327*eb8dc403SDave Cobbley 328*eb8dc403SDave Cobbley try: 329*eb8dc403SDave Cobbley import netrc, urllib.parse 330*eb8dc403SDave Cobbley n = netrc.netrc() 331*eb8dc403SDave Cobbley login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 332*eb8dc403SDave Cobbley add_basic_auth("%s:%s" % (login, password), r) 333*eb8dc403SDave Cobbley except (TypeError, ImportError, IOError, netrc.NetrcParseError): 334*eb8dc403SDave Cobbley pass 335*eb8dc403SDave Cobbley 336*eb8dc403SDave Cobbley with opener.open(r) as response: 337*eb8dc403SDave Cobbley pass 338*eb8dc403SDave Cobbley except urllib.error.URLError as e: 339*eb8dc403SDave Cobbley if try_again: 340*eb8dc403SDave Cobbley logger.debug(2, "checkstatus: trying again") 341*eb8dc403SDave Cobbley return self.checkstatus(fetch, ud, d, False) 342*eb8dc403SDave Cobbley else: 343*eb8dc403SDave Cobbley # debug for now to avoid spamming the logs in e.g. remote sstate searches 344*eb8dc403SDave Cobbley logger.debug(2, "checkstatus() urlopen failed: %s" % e) 345*eb8dc403SDave Cobbley return False 346*eb8dc403SDave Cobbley return True 347*eb8dc403SDave Cobbley 348*eb8dc403SDave Cobbley def _parse_path(self, regex, s): 349*eb8dc403SDave Cobbley """ 350*eb8dc403SDave Cobbley Find and group name, version and archive type in the given string s 351*eb8dc403SDave Cobbley """ 352*eb8dc403SDave Cobbley 353*eb8dc403SDave Cobbley m = regex.search(s) 354*eb8dc403SDave Cobbley if m: 355*eb8dc403SDave Cobbley pname = '' 356*eb8dc403SDave Cobbley pver = '' 357*eb8dc403SDave Cobbley ptype = '' 358*eb8dc403SDave Cobbley 359*eb8dc403SDave Cobbley mdict = m.groupdict() 360*eb8dc403SDave Cobbley if 'name' in mdict.keys(): 361*eb8dc403SDave Cobbley pname = mdict['name'] 362*eb8dc403SDave Cobbley if 'pver' in mdict.keys(): 363*eb8dc403SDave Cobbley pver = mdict['pver'] 364*eb8dc403SDave Cobbley if 'type' in mdict.keys(): 365*eb8dc403SDave Cobbley ptype = mdict['type'] 366*eb8dc403SDave Cobbley 367*eb8dc403SDave Cobbley bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 368*eb8dc403SDave Cobbley 369*eb8dc403SDave Cobbley return (pname, pver, ptype) 370*eb8dc403SDave Cobbley 371*eb8dc403SDave Cobbley return None 372*eb8dc403SDave Cobbley 373*eb8dc403SDave Cobbley def _modelate_version(self, version): 374*eb8dc403SDave Cobbley if version[0] in ['.', '-']: 375*eb8dc403SDave Cobbley if version[1].isdigit(): 376*eb8dc403SDave Cobbley version = version[1] + version[0] + version[2:len(version)] 377*eb8dc403SDave Cobbley else: 378*eb8dc403SDave Cobbley version = version[1:len(version)] 379*eb8dc403SDave Cobbley 380*eb8dc403SDave Cobbley version = re.sub('-', '.', version) 381*eb8dc403SDave Cobbley version = re.sub('_', '.', version) 382*eb8dc403SDave Cobbley version = re.sub('(rc)+', '.1000.', version) 383*eb8dc403SDave Cobbley version = re.sub('(beta)+', '.100.', version) 384*eb8dc403SDave Cobbley version = re.sub('(alpha)+', '.10.', version) 385*eb8dc403SDave Cobbley if version[0] == 'v': 386*eb8dc403SDave Cobbley version = version[1:len(version)] 387*eb8dc403SDave Cobbley return version 388*eb8dc403SDave Cobbley 389*eb8dc403SDave Cobbley def _vercmp(self, old, new): 390*eb8dc403SDave Cobbley """ 391*eb8dc403SDave Cobbley Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 392*eb8dc403SDave Cobbley purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 393*eb8dc403SDave Cobbley for simplicity as it's somehow difficult to get from various upstream format 394*eb8dc403SDave Cobbley """ 395*eb8dc403SDave Cobbley 396*eb8dc403SDave Cobbley (oldpn, oldpv, oldsuffix) = old 397*eb8dc403SDave Cobbley (newpn, newpv, newsuffix) = new 398*eb8dc403SDave Cobbley 399*eb8dc403SDave Cobbley """ 400*eb8dc403SDave Cobbley Check for a new suffix type that we have never heard of before 401*eb8dc403SDave Cobbley """ 402*eb8dc403SDave Cobbley if (newsuffix): 403*eb8dc403SDave Cobbley m = self.suffix_regex_comp.search(newsuffix) 404*eb8dc403SDave Cobbley if not m: 405*eb8dc403SDave Cobbley bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 406*eb8dc403SDave Cobbley return False 407*eb8dc403SDave Cobbley 408*eb8dc403SDave Cobbley """ 409*eb8dc403SDave Cobbley Not our package so ignore it 410*eb8dc403SDave Cobbley """ 411*eb8dc403SDave Cobbley if oldpn != newpn: 412*eb8dc403SDave Cobbley return False 413*eb8dc403SDave Cobbley 414*eb8dc403SDave Cobbley oldpv = self._modelate_version(oldpv) 415*eb8dc403SDave Cobbley newpv = self._modelate_version(newpv) 416*eb8dc403SDave Cobbley 417*eb8dc403SDave Cobbley return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 418*eb8dc403SDave Cobbley 419*eb8dc403SDave Cobbley def _fetch_index(self, uri, ud, d): 420*eb8dc403SDave Cobbley """ 421*eb8dc403SDave Cobbley Run fetch checkstatus to get directory information 422*eb8dc403SDave Cobbley """ 423*eb8dc403SDave Cobbley f = tempfile.NamedTemporaryFile() 424*eb8dc403SDave Cobbley with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 425*eb8dc403SDave Cobbley agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" 426*eb8dc403SDave Cobbley fetchcmd = self.basecmd 427*eb8dc403SDave Cobbley fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" 428*eb8dc403SDave Cobbley try: 429*eb8dc403SDave Cobbley self._runwget(ud, d, fetchcmd, True, workdir=workdir) 430*eb8dc403SDave Cobbley fetchresult = f.read() 431*eb8dc403SDave Cobbley except bb.fetch2.BBFetchException: 432*eb8dc403SDave Cobbley fetchresult = "" 433*eb8dc403SDave Cobbley 434*eb8dc403SDave Cobbley return fetchresult 435*eb8dc403SDave Cobbley 436*eb8dc403SDave Cobbley def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 437*eb8dc403SDave Cobbley """ 438*eb8dc403SDave Cobbley Return the latest version of a package inside a given directory path 439*eb8dc403SDave Cobbley If error or no version, return "" 440*eb8dc403SDave Cobbley """ 441*eb8dc403SDave Cobbley valid = 0 442*eb8dc403SDave Cobbley version = ['', '', ''] 443*eb8dc403SDave Cobbley 444*eb8dc403SDave Cobbley bb.debug(3, "VersionURL: %s" % (url)) 445*eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 446*eb8dc403SDave Cobbley if not soup: 447*eb8dc403SDave Cobbley bb.debug(3, "*** %s NO SOUP" % (url)) 448*eb8dc403SDave Cobbley return "" 449*eb8dc403SDave Cobbley 450*eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 451*eb8dc403SDave Cobbley bb.debug(3, "line['href'] = '%s'" % (line['href'])) 452*eb8dc403SDave Cobbley bb.debug(3, "line = '%s'" % (str(line))) 453*eb8dc403SDave Cobbley 454*eb8dc403SDave Cobbley newver = self._parse_path(package_regex, line['href']) 455*eb8dc403SDave Cobbley if not newver: 456*eb8dc403SDave Cobbley newver = self._parse_path(package_regex, str(line)) 457*eb8dc403SDave Cobbley 458*eb8dc403SDave Cobbley if newver: 459*eb8dc403SDave Cobbley bb.debug(3, "Upstream version found: %s" % newver[1]) 460*eb8dc403SDave Cobbley if valid == 0: 461*eb8dc403SDave Cobbley version = newver 462*eb8dc403SDave Cobbley valid = 1 463*eb8dc403SDave Cobbley elif self._vercmp(version, newver) < 0: 464*eb8dc403SDave Cobbley version = newver 465*eb8dc403SDave Cobbley 466*eb8dc403SDave Cobbley pupver = re.sub('_', '.', version[1]) 467*eb8dc403SDave Cobbley 468*eb8dc403SDave Cobbley bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 469*eb8dc403SDave Cobbley (package, pupver or "N/A", current_version[1])) 470*eb8dc403SDave Cobbley 471*eb8dc403SDave Cobbley if valid: 472*eb8dc403SDave Cobbley return pupver 473*eb8dc403SDave Cobbley 474*eb8dc403SDave Cobbley return "" 475*eb8dc403SDave Cobbley 476*eb8dc403SDave Cobbley def _check_latest_version_by_dir(self, dirver, package, package_regex, 477*eb8dc403SDave Cobbley current_version, ud, d): 478*eb8dc403SDave Cobbley """ 479*eb8dc403SDave Cobbley Scan every directory in order to get upstream version. 480*eb8dc403SDave Cobbley """ 481*eb8dc403SDave Cobbley version_dir = ['', '', ''] 482*eb8dc403SDave Cobbley version = ['', '', ''] 483*eb8dc403SDave Cobbley 484*eb8dc403SDave Cobbley dirver_regex = re.compile("(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 485*eb8dc403SDave Cobbley s = dirver_regex.search(dirver) 486*eb8dc403SDave Cobbley if s: 487*eb8dc403SDave Cobbley version_dir[1] = s.group('ver') 488*eb8dc403SDave Cobbley else: 489*eb8dc403SDave Cobbley version_dir[1] = dirver 490*eb8dc403SDave Cobbley 491*eb8dc403SDave Cobbley dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 492*eb8dc403SDave Cobbley ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 493*eb8dc403SDave Cobbley bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 494*eb8dc403SDave Cobbley 495*eb8dc403SDave Cobbley soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 496*eb8dc403SDave Cobbley if not soup: 497*eb8dc403SDave Cobbley return version[1] 498*eb8dc403SDave Cobbley 499*eb8dc403SDave Cobbley for line in soup.find_all('a', href=True): 500*eb8dc403SDave Cobbley s = dirver_regex.search(line['href'].strip("/")) 501*eb8dc403SDave Cobbley if s: 502*eb8dc403SDave Cobbley sver = s.group('ver') 503*eb8dc403SDave Cobbley 504*eb8dc403SDave Cobbley # When prefix is part of the version directory it need to 505*eb8dc403SDave Cobbley # ensure that only version directory is used so remove previous 506*eb8dc403SDave Cobbley # directories if exists. 507*eb8dc403SDave Cobbley # 508*eb8dc403SDave Cobbley # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 509*eb8dc403SDave Cobbley # result is v2.5. 510*eb8dc403SDave Cobbley spfx = s.group('pfx').split('/')[-1] 511*eb8dc403SDave Cobbley 512*eb8dc403SDave Cobbley version_dir_new = ['', sver, ''] 513*eb8dc403SDave Cobbley if self._vercmp(version_dir, version_dir_new) <= 0: 514*eb8dc403SDave Cobbley dirver_new = spfx + sver 515*eb8dc403SDave Cobbley path = ud.path.replace(dirver, dirver_new, True) \ 516*eb8dc403SDave Cobbley .split(package)[0] 517*eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, 518*eb8dc403SDave Cobbley ud.user, ud.pswd, {}]) 519*eb8dc403SDave Cobbley 520*eb8dc403SDave Cobbley pupver = self._check_latest_version(uri, 521*eb8dc403SDave Cobbley package, package_regex, current_version, ud, d) 522*eb8dc403SDave Cobbley if pupver: 523*eb8dc403SDave Cobbley version[1] = pupver 524*eb8dc403SDave Cobbley 525*eb8dc403SDave Cobbley version_dir = version_dir_new 526*eb8dc403SDave Cobbley 527*eb8dc403SDave Cobbley return version[1] 528*eb8dc403SDave Cobbley 529*eb8dc403SDave Cobbley def _init_regexes(self, package, ud, d): 530*eb8dc403SDave Cobbley """ 531*eb8dc403SDave Cobbley Match as many patterns as possible such as: 532*eb8dc403SDave Cobbley gnome-common-2.20.0.tar.gz (most common format) 533*eb8dc403SDave Cobbley gtk+-2.90.1.tar.gz 534*eb8dc403SDave Cobbley xf86-input-synaptics-12.6.9.tar.gz 535*eb8dc403SDave Cobbley dri2proto-2.3.tar.gz 536*eb8dc403SDave Cobbley blktool_4.orig.tar.gz 537*eb8dc403SDave Cobbley libid3tag-0.15.1b.tar.gz 538*eb8dc403SDave Cobbley unzip552.tar.gz 539*eb8dc403SDave Cobbley icu4c-3_6-src.tgz 540*eb8dc403SDave Cobbley genext2fs_1.3.orig.tar.gz 541*eb8dc403SDave Cobbley gst-fluendo-mp3 542*eb8dc403SDave Cobbley """ 543*eb8dc403SDave Cobbley # match most patterns which uses "-" as separator to version digits 544*eb8dc403SDave Cobbley pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 545*eb8dc403SDave Cobbley # a loose pattern such as for unzip552.tar.gz 546*eb8dc403SDave Cobbley pn_prefix2 = "[a-zA-Z]+" 547*eb8dc403SDave Cobbley # a loose pattern such as for 80325-quicky-0.4.tar.gz 548*eb8dc403SDave Cobbley pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+" 549*eb8dc403SDave Cobbley # Save the Package Name (pn) Regex for use later 550*eb8dc403SDave Cobbley pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 551*eb8dc403SDave Cobbley 552*eb8dc403SDave Cobbley # match version 553*eb8dc403SDave Cobbley pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 554*eb8dc403SDave Cobbley 555*eb8dc403SDave Cobbley # match arch 556*eb8dc403SDave Cobbley parch_regex = "-source|_all_" 557*eb8dc403SDave Cobbley 558*eb8dc403SDave Cobbley # src.rpm extension was added only for rpm package. Can be removed if the rpm 559*eb8dc403SDave Cobbley # packaged will always be considered as having to be manually upgraded 560*eb8dc403SDave Cobbley psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 561*eb8dc403SDave Cobbley 562*eb8dc403SDave Cobbley # match name, version and archive type of a package 563*eb8dc403SDave Cobbley package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 564*eb8dc403SDave Cobbley % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 565*eb8dc403SDave Cobbley self.suffix_regex_comp = re.compile(psuffix_regex) 566*eb8dc403SDave Cobbley 567*eb8dc403SDave Cobbley # compile regex, can be specific by package or generic regex 568*eb8dc403SDave Cobbley pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 569*eb8dc403SDave Cobbley if pn_regex: 570*eb8dc403SDave Cobbley package_custom_regex_comp = re.compile(pn_regex) 571*eb8dc403SDave Cobbley else: 572*eb8dc403SDave Cobbley version = self._parse_path(package_regex_comp, package) 573*eb8dc403SDave Cobbley if version: 574*eb8dc403SDave Cobbley package_custom_regex_comp = re.compile( 575*eb8dc403SDave Cobbley "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 576*eb8dc403SDave Cobbley (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 577*eb8dc403SDave Cobbley else: 578*eb8dc403SDave Cobbley package_custom_regex_comp = None 579*eb8dc403SDave Cobbley 580*eb8dc403SDave Cobbley return package_custom_regex_comp 581*eb8dc403SDave Cobbley 582*eb8dc403SDave Cobbley def latest_versionstring(self, ud, d): 583*eb8dc403SDave Cobbley """ 584*eb8dc403SDave Cobbley Manipulate the URL and try to obtain the latest package version 585*eb8dc403SDave Cobbley 586*eb8dc403SDave Cobbley sanity check to ensure same name and type. 587*eb8dc403SDave Cobbley """ 588*eb8dc403SDave Cobbley package = ud.path.split("/")[-1] 589*eb8dc403SDave Cobbley current_version = ['', d.getVar('PV'), ''] 590*eb8dc403SDave Cobbley 591*eb8dc403SDave Cobbley """possible to have no version in pkg name, such as spectrum-fw""" 592*eb8dc403SDave Cobbley if not re.search("\d+", package): 593*eb8dc403SDave Cobbley current_version[1] = re.sub('_', '.', current_version[1]) 594*eb8dc403SDave Cobbley current_version[1] = re.sub('-', '.', current_version[1]) 595*eb8dc403SDave Cobbley return (current_version[1], '') 596*eb8dc403SDave Cobbley 597*eb8dc403SDave Cobbley package_regex = self._init_regexes(package, ud, d) 598*eb8dc403SDave Cobbley if package_regex is None: 599*eb8dc403SDave Cobbley bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 600*eb8dc403SDave Cobbley return ('', '') 601*eb8dc403SDave Cobbley bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 602*eb8dc403SDave Cobbley 603*eb8dc403SDave Cobbley uri = "" 604*eb8dc403SDave Cobbley regex_uri = d.getVar("UPSTREAM_CHECK_URI") 605*eb8dc403SDave Cobbley if not regex_uri: 606*eb8dc403SDave Cobbley path = ud.path.split(package)[0] 607*eb8dc403SDave Cobbley 608*eb8dc403SDave Cobbley # search for version matches on folders inside the path, like: 609*eb8dc403SDave Cobbley # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 610*eb8dc403SDave Cobbley dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 611*eb8dc403SDave Cobbley m = dirver_regex.search(path) 612*eb8dc403SDave Cobbley if m: 613*eb8dc403SDave Cobbley pn = d.getVar('PN') 614*eb8dc403SDave Cobbley dirver = m.group('dirver') 615*eb8dc403SDave Cobbley 616*eb8dc403SDave Cobbley dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn))) 617*eb8dc403SDave Cobbley if not dirver_pn_regex.search(dirver): 618*eb8dc403SDave Cobbley return (self._check_latest_version_by_dir(dirver, 619*eb8dc403SDave Cobbley package, package_regex, current_version, ud, d), '') 620*eb8dc403SDave Cobbley 621*eb8dc403SDave Cobbley uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 622*eb8dc403SDave Cobbley else: 623*eb8dc403SDave Cobbley uri = regex_uri 624*eb8dc403SDave Cobbley 625*eb8dc403SDave Cobbley return (self._check_latest_version(uri, package, package_regex, 626*eb8dc403SDave Cobbley current_version, ud, d), '') 627