1""" 2BitBake 'Fetch' implementations 3 4Classes for obtaining upstream sources for the 5BitBake build tools. 6 7""" 8 9# Copyright (C) 2003, 2004 Chris Larson 10# 11# SPDX-License-Identifier: GPL-2.0-only 12# 13# Based on functions from the base bb module, Copyright 2003 Holger Schurig 14 15import shlex 16import re 17import tempfile 18import os 19import errno 20import bb 21import bb.progress 22import socket 23import http.client 24import urllib.request, urllib.parse, urllib.error 25from bb.fetch2 import FetchMethod 26from bb.fetch2 import FetchError 27from bb.fetch2 import logger 28from bb.fetch2 import runfetchcmd 29from bb.utils import export_proxies 30from bs4 import BeautifulSoup 31from bs4 import SoupStrainer 32 33class WgetProgressHandler(bb.progress.LineFilterProgressHandler): 34 """ 35 Extract progress information from wget output. 36 Note: relies on --progress=dot (with -v or without -q/-nv) being 37 specified on the wget command line. 38 """ 39 def __init__(self, d): 40 super(WgetProgressHandler, self).__init__(d) 41 # Send an initial progress event so the bar gets shown 42 self._fire_progress(0) 43 44 def writeline(self, line): 45 percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line) 46 if percs: 47 progress = int(percs[-1][0]) 48 rate = percs[-1][1] + '/s' 49 self.update(progress, rate) 50 return False 51 return True 52 53 54class Wget(FetchMethod): 55 """Class to fetch urls via 'wget'""" 56 def supports(self, ud, d): 57 """ 58 Check to see if a given url can be fetched with wget. 59 """ 60 return ud.type in ['http', 'https', 'ftp'] 61 62 def recommends_checksum(self, urldata): 63 return True 64 65 def urldata_init(self, ud, d): 66 if 'protocol' in ud.parm: 67 if ud.parm['protocol'] == 'git': 68 raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url) 69 70 if 'downloadfilename' in ud.parm: 71 ud.basename = ud.parm['downloadfilename'] 72 else: 73 ud.basename = os.path.basename(ud.path) 74 75 ud.localfile = d.expand(urllib.parse.unquote(ud.basename)) 76 if not ud.localfile: 77 ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", ".")) 78 79 self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate" 80 81 def _runwget(self, ud, d, command, quiet, workdir=None): 82 83 progresshandler = WgetProgressHandler(d) 84 85 logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command)) 86 bb.fetch2.check_network_access(d, command, ud.url) 87 runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir) 88 89 def download(self, ud, d): 90 """Fetch urls""" 91 92 fetchcmd = self.basecmd 93 94 if 'downloadfilename' in ud.parm: 95 localpath = os.path.join(d.getVar("DL_DIR"), ud.localfile) 96 bb.utils.mkdirhier(os.path.dirname(localpath)) 97 fetchcmd += " -O %s" % shlex.quote(localpath) 98 99 if ud.user and ud.pswd: 100 fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd) 101 102 uri = ud.url.split(";")[0] 103 if os.path.exists(ud.localpath): 104 # file exists, but we didnt complete it.. trying again.. 105 fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri) 106 else: 107 fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri) 108 109 self._runwget(ud, d, fetchcmd, False) 110 111 # Sanity check since wget can pretend it succeed when it didn't 112 # Also, this used to happen if sourceforge sent us to the mirror page 113 if not os.path.exists(ud.localpath): 114 raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri) 115 116 if os.path.getsize(ud.localpath) == 0: 117 os.remove(ud.localpath) 118 raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri) 119 120 return True 121 122 def checkstatus(self, fetch, ud, d, try_again=True): 123 class HTTPConnectionCache(http.client.HTTPConnection): 124 if fetch.connection_cache: 125 def connect(self): 126 """Connect to the host and port specified in __init__.""" 127 128 sock = fetch.connection_cache.get_connection(self.host, self.port) 129 if sock: 130 self.sock = sock 131 else: 132 self.sock = socket.create_connection((self.host, self.port), 133 self.timeout, self.source_address) 134 fetch.connection_cache.add_connection(self.host, self.port, self.sock) 135 136 if self._tunnel_host: 137 self._tunnel() 138 139 class CacheHTTPHandler(urllib.request.HTTPHandler): 140 def http_open(self, req): 141 return self.do_open(HTTPConnectionCache, req) 142 143 def do_open(self, http_class, req): 144 """Return an addinfourl object for the request, using http_class. 145 146 http_class must implement the HTTPConnection API from httplib. 147 The addinfourl return value is a file-like object. It also 148 has methods and attributes including: 149 - info(): return a mimetools.Message object for the headers 150 - geturl(): return the original request URL 151 - code: HTTP status code 152 """ 153 host = req.host 154 if not host: 155 raise urllib.error.URLError('no host given') 156 157 h = http_class(host, timeout=req.timeout) # will parse host:port 158 h.set_debuglevel(self._debuglevel) 159 160 headers = dict(req.unredirected_hdrs) 161 headers.update(dict((k, v) for k, v in list(req.headers.items()) 162 if k not in headers)) 163 164 # We want to make an HTTP/1.1 request, but the addinfourl 165 # class isn't prepared to deal with a persistent connection. 166 # It will try to read all remaining data from the socket, 167 # which will block while the server waits for the next request. 168 # So make sure the connection gets closed after the (only) 169 # request. 170 171 # Don't close connection when connection_cache is enabled, 172 if fetch.connection_cache is None: 173 headers["Connection"] = "close" 174 else: 175 headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0 176 177 headers = dict( 178 (name.title(), val) for name, val in list(headers.items())) 179 180 if req._tunnel_host: 181 tunnel_headers = {} 182 proxy_auth_hdr = "Proxy-Authorization" 183 if proxy_auth_hdr in headers: 184 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 185 # Proxy-Authorization should not be sent to origin 186 # server. 187 del headers[proxy_auth_hdr] 188 h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 189 190 try: 191 h.request(req.get_method(), req.selector, req.data, headers) 192 except socket.error as err: # XXX what error? 193 # Don't close connection when cache is enabled. 194 # Instead, try to detect connections that are no longer 195 # usable (for example, closed unexpectedly) and remove 196 # them from the cache. 197 if fetch.connection_cache is None: 198 h.close() 199 elif isinstance(err, OSError) and err.errno == errno.EBADF: 200 # This happens when the server closes the connection despite the Keep-Alive. 201 # Apparently urllib then uses the file descriptor, expecting it to be 202 # connected, when in reality the connection is already gone. 203 # We let the request fail and expect it to be 204 # tried once more ("try_again" in check_status()), 205 # with the dead connection removed from the cache. 206 # If it still fails, we give up, which can happend for bad 207 # HTTP proxy settings. 208 fetch.connection_cache.remove_connection(h.host, h.port) 209 raise urllib.error.URLError(err) 210 else: 211 try: 212 r = h.getresponse(buffering=True) 213 except TypeError: # buffering kw not supported 214 r = h.getresponse() 215 216 # Pick apart the HTTPResponse object to get the addinfourl 217 # object initialized properly. 218 219 # Wrap the HTTPResponse object in socket's file object adapter 220 # for Windows. That adapter calls recv(), so delegate recv() 221 # to read(). This weird wrapping allows the returned object to 222 # have readline() and readlines() methods. 223 224 # XXX It might be better to extract the read buffering code 225 # out of socket._fileobject() and into a base class. 226 r.recv = r.read 227 228 # no data, just have to read 229 r.read() 230 class fp_dummy(object): 231 def read(self): 232 return "" 233 def readline(self): 234 return "" 235 def close(self): 236 pass 237 closed = False 238 239 resp = urllib.response.addinfourl(fp_dummy(), r.msg, req.get_full_url()) 240 resp.code = r.status 241 resp.msg = r.reason 242 243 # Close connection when server request it. 244 if fetch.connection_cache is not None: 245 if 'Connection' in r.msg and r.msg['Connection'] == 'close': 246 fetch.connection_cache.remove_connection(h.host, h.port) 247 248 return resp 249 250 class HTTPMethodFallback(urllib.request.BaseHandler): 251 """ 252 Fallback to GET if HEAD is not allowed (405 HTTP error) 253 """ 254 def http_error_405(self, req, fp, code, msg, headers): 255 fp.read() 256 fp.close() 257 258 if req.get_method() != 'GET': 259 newheaders = dict((k, v) for k, v in list(req.headers.items()) 260 if k.lower() not in ("content-length", "content-type")) 261 return self.parent.open(urllib.request.Request(req.get_full_url(), 262 headers=newheaders, 263 origin_req_host=req.origin_req_host, 264 unverifiable=True)) 265 266 raise urllib.request.HTTPError(req, code, msg, headers, None) 267 268 # Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403 269 # Forbidden when they actually mean 405 Method Not Allowed. 270 http_error_403 = http_error_405 271 272 273 class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler): 274 """ 275 urllib2.HTTPRedirectHandler resets the method to GET on redirect, 276 when we want to follow redirects using the original method. 277 """ 278 def redirect_request(self, req, fp, code, msg, headers, newurl): 279 newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl) 280 newreq.get_method = req.get_method 281 return newreq 282 exported_proxies = export_proxies(d) 283 284 handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback] 285 if exported_proxies: 286 handlers.append(urllib.request.ProxyHandler()) 287 handlers.append(CacheHTTPHandler()) 288 # Since Python 2.7.9 ssl cert validation is enabled by default 289 # see PEP-0476, this causes verification errors on some https servers 290 # so disable by default. 291 import ssl 292 if hasattr(ssl, '_create_unverified_context'): 293 handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context())) 294 opener = urllib.request.build_opener(*handlers) 295 296 try: 297 uri = ud.url.split(";")[0] 298 r = urllib.request.Request(uri) 299 r.get_method = lambda: "HEAD" 300 # Some servers (FusionForge, as used on Alioth) require that the 301 # optional Accept header is set. 302 r.add_header("Accept", "*/*") 303 r.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12") 304 def add_basic_auth(login_str, request): 305 '''Adds Basic auth to http request, pass in login:password as string''' 306 import base64 307 encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8") 308 authheader = "Basic %s" % encodeuser 309 r.add_header("Authorization", authheader) 310 311 if ud.user and ud.pswd: 312 add_basic_auth(ud.user + ':' + ud.pswd, r) 313 314 try: 315 import netrc 316 n = netrc.netrc() 317 login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname) 318 add_basic_auth("%s:%s" % (login, password), r) 319 except (TypeError, ImportError, IOError, netrc.NetrcParseError): 320 pass 321 322 with opener.open(r) as response: 323 pass 324 except urllib.error.URLError as e: 325 if try_again: 326 logger.debug(2, "checkstatus: trying again") 327 return self.checkstatus(fetch, ud, d, False) 328 else: 329 # debug for now to avoid spamming the logs in e.g. remote sstate searches 330 logger.debug(2, "checkstatus() urlopen failed: %s" % e) 331 return False 332 return True 333 334 def _parse_path(self, regex, s): 335 """ 336 Find and group name, version and archive type in the given string s 337 """ 338 339 m = regex.search(s) 340 if m: 341 pname = '' 342 pver = '' 343 ptype = '' 344 345 mdict = m.groupdict() 346 if 'name' in mdict.keys(): 347 pname = mdict['name'] 348 if 'pver' in mdict.keys(): 349 pver = mdict['pver'] 350 if 'type' in mdict.keys(): 351 ptype = mdict['type'] 352 353 bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype)) 354 355 return (pname, pver, ptype) 356 357 return None 358 359 def _modelate_version(self, version): 360 if version[0] in ['.', '-']: 361 if version[1].isdigit(): 362 version = version[1] + version[0] + version[2:len(version)] 363 else: 364 version = version[1:len(version)] 365 366 version = re.sub('-', '.', version) 367 version = re.sub('_', '.', version) 368 version = re.sub('(rc)+', '.1000.', version) 369 version = re.sub('(beta)+', '.100.', version) 370 version = re.sub('(alpha)+', '.10.', version) 371 if version[0] == 'v': 372 version = version[1:len(version)] 373 return version 374 375 def _vercmp(self, old, new): 376 """ 377 Check whether 'new' is newer than 'old' version. We use existing vercmp() for the 378 purpose. PE is cleared in comparison as it's not for build, and PR is cleared too 379 for simplicity as it's somehow difficult to get from various upstream format 380 """ 381 382 (oldpn, oldpv, oldsuffix) = old 383 (newpn, newpv, newsuffix) = new 384 385 # Check for a new suffix type that we have never heard of before 386 if newsuffix: 387 m = self.suffix_regex_comp.search(newsuffix) 388 if not m: 389 bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix)) 390 return False 391 392 # Not our package so ignore it 393 if oldpn != newpn: 394 return False 395 396 oldpv = self._modelate_version(oldpv) 397 newpv = self._modelate_version(newpv) 398 399 return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, "")) 400 401 def _fetch_index(self, uri, ud, d): 402 """ 403 Run fetch checkstatus to get directory information 404 """ 405 f = tempfile.NamedTemporaryFile() 406 with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f: 407 agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12" 408 fetchcmd = self.basecmd 409 fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'" 410 try: 411 self._runwget(ud, d, fetchcmd, True, workdir=workdir) 412 fetchresult = f.read() 413 except bb.fetch2.BBFetchException: 414 fetchresult = "" 415 416 return fetchresult 417 418 def _check_latest_version(self, url, package, package_regex, current_version, ud, d): 419 """ 420 Return the latest version of a package inside a given directory path 421 If error or no version, return "" 422 """ 423 valid = 0 424 version = ['', '', ''] 425 426 bb.debug(3, "VersionURL: %s" % (url)) 427 soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a")) 428 if not soup: 429 bb.debug(3, "*** %s NO SOUP" % (url)) 430 return "" 431 432 for line in soup.find_all('a', href=True): 433 bb.debug(3, "line['href'] = '%s'" % (line['href'])) 434 bb.debug(3, "line = '%s'" % (str(line))) 435 436 newver = self._parse_path(package_regex, line['href']) 437 if not newver: 438 newver = self._parse_path(package_regex, str(line)) 439 440 if newver: 441 bb.debug(3, "Upstream version found: %s" % newver[1]) 442 if valid == 0: 443 version = newver 444 valid = 1 445 elif self._vercmp(version, newver) < 0: 446 version = newver 447 448 pupver = re.sub('_', '.', version[1]) 449 450 bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" % 451 (package, pupver or "N/A", current_version[1])) 452 453 if valid: 454 return pupver 455 456 return "" 457 458 def _check_latest_version_by_dir(self, dirver, package, package_regex, current_version, ud, d): 459 """ 460 Scan every directory in order to get upstream version. 461 """ 462 version_dir = ['', '', ''] 463 version = ['', '', ''] 464 465 dirver_regex = re.compile(r"(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))") 466 s = dirver_regex.search(dirver) 467 if s: 468 version_dir[1] = s.group('ver') 469 else: 470 version_dir[1] = dirver 471 472 dirs_uri = bb.fetch.encodeurl([ud.type, ud.host, 473 ud.path.split(dirver)[0], ud.user, ud.pswd, {}]) 474 bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package)) 475 476 soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a")) 477 if not soup: 478 return version[1] 479 480 for line in soup.find_all('a', href=True): 481 s = dirver_regex.search(line['href'].strip("/")) 482 if s: 483 sver = s.group('ver') 484 485 # When prefix is part of the version directory it need to 486 # ensure that only version directory is used so remove previous 487 # directories if exists. 488 # 489 # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected 490 # result is v2.5. 491 spfx = s.group('pfx').split('/')[-1] 492 493 version_dir_new = ['', sver, ''] 494 if self._vercmp(version_dir, version_dir_new) <= 0: 495 dirver_new = spfx + sver 496 path = ud.path.replace(dirver, dirver_new, True) \ 497 .split(package)[0] 498 uri = bb.fetch.encodeurl([ud.type, ud.host, path, 499 ud.user, ud.pswd, {}]) 500 501 pupver = self._check_latest_version(uri, 502 package, package_regex, current_version, ud, d) 503 if pupver: 504 version[1] = pupver 505 506 version_dir = version_dir_new 507 508 return version[1] 509 510 def _init_regexes(self, package, ud, d): 511 """ 512 Match as many patterns as possible such as: 513 gnome-common-2.20.0.tar.gz (most common format) 514 gtk+-2.90.1.tar.gz 515 xf86-input-synaptics-12.6.9.tar.gz 516 dri2proto-2.3.tar.gz 517 blktool_4.orig.tar.gz 518 libid3tag-0.15.1b.tar.gz 519 unzip552.tar.gz 520 icu4c-3_6-src.tgz 521 genext2fs_1.3.orig.tar.gz 522 gst-fluendo-mp3 523 """ 524 # match most patterns which uses "-" as separator to version digits 525 pn_prefix1 = r"[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]" 526 # a loose pattern such as for unzip552.tar.gz 527 pn_prefix2 = r"[a-zA-Z]+" 528 # a loose pattern such as for 80325-quicky-0.4.tar.gz 529 pn_prefix3 = r"[0-9]+[-]?[a-zA-Z]+" 530 # Save the Package Name (pn) Regex for use later 531 pn_regex = r"(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3) 532 533 # match version 534 pver_regex = r"(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)" 535 536 # match arch 537 parch_regex = "-source|_all_" 538 539 # src.rpm extension was added only for rpm package. Can be removed if the rpm 540 # packaged will always be considered as having to be manually upgraded 541 psuffix_regex = r"(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)" 542 543 # match name, version and archive type of a package 544 package_regex_comp = re.compile(r"(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)" 545 % (pn_regex, pver_regex, parch_regex, psuffix_regex)) 546 self.suffix_regex_comp = re.compile(psuffix_regex) 547 548 # compile regex, can be specific by package or generic regex 549 pn_regex = d.getVar('UPSTREAM_CHECK_REGEX') 550 if pn_regex: 551 package_custom_regex_comp = re.compile(pn_regex) 552 else: 553 version = self._parse_path(package_regex_comp, package) 554 if version: 555 package_custom_regex_comp = re.compile( 556 r"(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" % 557 (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex)) 558 else: 559 package_custom_regex_comp = None 560 561 return package_custom_regex_comp 562 563 def latest_versionstring(self, ud, d): 564 """ 565 Manipulate the URL and try to obtain the latest package version 566 567 sanity check to ensure same name and type. 568 """ 569 package = ud.path.split("/")[-1] 570 current_version = ['', d.getVar('PV'), ''] 571 572 """possible to have no version in pkg name, such as spectrum-fw""" 573 if not re.search(r"\d+", package): 574 current_version[1] = re.sub('_', '.', current_version[1]) 575 current_version[1] = re.sub('-', '.', current_version[1]) 576 return (current_version[1], '') 577 578 package_regex = self._init_regexes(package, ud, d) 579 if package_regex is None: 580 bb.warn("latest_versionstring: package %s don't match pattern" % (package)) 581 return ('', '') 582 bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern)) 583 584 uri = "" 585 regex_uri = d.getVar("UPSTREAM_CHECK_URI") 586 if not regex_uri: 587 path = ud.path.split(package)[0] 588 589 # search for version matches on folders inside the path, like: 590 # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz 591 dirver_regex = re.compile(r"(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/") 592 m = dirver_regex.search(path) 593 if m: 594 pn = d.getVar('PN') 595 dirver = m.group('dirver') 596 597 dirver_pn_regex = re.compile(r"%s\d?" % (re.escape(pn))) 598 if not dirver_pn_regex.search(dirver): 599 return (self._check_latest_version_by_dir(dirver, 600 package, package_regex, current_version, ud, d), '') 601 602 uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}]) 603 else: 604 uri = regex_uri 605 606 return (self._check_latest_version(uri, package, package_regex, 607 current_version, ud, d), '') 608