xref: /openbmc/openbmc/poky/bitbake/lib/bb/fetch2/wget.py (revision eb8dc403)
1*eb8dc403SDave Cobbley# ex:ts=4:sw=4:sts=4:et
2*eb8dc403SDave Cobbley# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
3*eb8dc403SDave Cobbley"""
4*eb8dc403SDave CobbleyBitBake 'Fetch' implementations
5*eb8dc403SDave Cobbley
6*eb8dc403SDave CobbleyClasses for obtaining upstream sources for the
7*eb8dc403SDave CobbleyBitBake build tools.
8*eb8dc403SDave Cobbley
9*eb8dc403SDave Cobbley"""
10*eb8dc403SDave Cobbley
11*eb8dc403SDave Cobbley# Copyright (C) 2003, 2004  Chris Larson
12*eb8dc403SDave Cobbley#
13*eb8dc403SDave Cobbley# This program is free software; you can redistribute it and/or modify
14*eb8dc403SDave Cobbley# it under the terms of the GNU General Public License version 2 as
15*eb8dc403SDave Cobbley# published by the Free Software Foundation.
16*eb8dc403SDave Cobbley#
17*eb8dc403SDave Cobbley# This program is distributed in the hope that it will be useful,
18*eb8dc403SDave Cobbley# but WITHOUT ANY WARRANTY; without even the implied warranty of
19*eb8dc403SDave Cobbley# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20*eb8dc403SDave Cobbley# GNU General Public License for more details.
21*eb8dc403SDave Cobbley#
22*eb8dc403SDave Cobbley# You should have received a copy of the GNU General Public License along
23*eb8dc403SDave Cobbley# with this program; if not, write to the Free Software Foundation, Inc.,
24*eb8dc403SDave Cobbley# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25*eb8dc403SDave Cobbley#
26*eb8dc403SDave Cobbley# Based on functions from the base bb module, Copyright 2003 Holger Schurig
27*eb8dc403SDave Cobbley
28*eb8dc403SDave Cobbleyimport re
29*eb8dc403SDave Cobbleyimport tempfile
30*eb8dc403SDave Cobbleyimport subprocess
31*eb8dc403SDave Cobbleyimport os
32*eb8dc403SDave Cobbleyimport logging
33*eb8dc403SDave Cobbleyimport errno
34*eb8dc403SDave Cobbleyimport bb
35*eb8dc403SDave Cobbleyimport bb.progress
36*eb8dc403SDave Cobbleyimport urllib.request, urllib.parse, urllib.error
37*eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchMethod
38*eb8dc403SDave Cobbleyfrom   bb.fetch2 import FetchError
39*eb8dc403SDave Cobbleyfrom   bb.fetch2 import logger
40*eb8dc403SDave Cobbleyfrom   bb.fetch2 import runfetchcmd
41*eb8dc403SDave Cobbleyfrom   bb.utils import export_proxies
42*eb8dc403SDave Cobbleyfrom   bs4 import BeautifulSoup
43*eb8dc403SDave Cobbleyfrom   bs4 import SoupStrainer
44*eb8dc403SDave Cobbley
45*eb8dc403SDave Cobbleyclass WgetProgressHandler(bb.progress.LineFilterProgressHandler):
46*eb8dc403SDave Cobbley    """
47*eb8dc403SDave Cobbley    Extract progress information from wget output.
48*eb8dc403SDave Cobbley    Note: relies on --progress=dot (with -v or without -q/-nv) being
49*eb8dc403SDave Cobbley    specified on the wget command line.
50*eb8dc403SDave Cobbley    """
51*eb8dc403SDave Cobbley    def __init__(self, d):
52*eb8dc403SDave Cobbley        super(WgetProgressHandler, self).__init__(d)
53*eb8dc403SDave Cobbley        # Send an initial progress event so the bar gets shown
54*eb8dc403SDave Cobbley        self._fire_progress(0)
55*eb8dc403SDave Cobbley
56*eb8dc403SDave Cobbley    def writeline(self, line):
57*eb8dc403SDave Cobbley        percs = re.findall(r'(\d+)%\s+([\d.]+[A-Z])', line)
58*eb8dc403SDave Cobbley        if percs:
59*eb8dc403SDave Cobbley            progress = int(percs[-1][0])
60*eb8dc403SDave Cobbley            rate = percs[-1][1] + '/s'
61*eb8dc403SDave Cobbley            self.update(progress, rate)
62*eb8dc403SDave Cobbley            return False
63*eb8dc403SDave Cobbley        return True
64*eb8dc403SDave Cobbley
65*eb8dc403SDave Cobbley
66*eb8dc403SDave Cobbleyclass Wget(FetchMethod):
67*eb8dc403SDave Cobbley    """Class to fetch urls via 'wget'"""
68*eb8dc403SDave Cobbley    def supports(self, ud, d):
69*eb8dc403SDave Cobbley        """
70*eb8dc403SDave Cobbley        Check to see if a given url can be fetched with wget.
71*eb8dc403SDave Cobbley        """
72*eb8dc403SDave Cobbley        return ud.type in ['http', 'https', 'ftp']
73*eb8dc403SDave Cobbley
74*eb8dc403SDave Cobbley    def recommends_checksum(self, urldata):
75*eb8dc403SDave Cobbley        return True
76*eb8dc403SDave Cobbley
77*eb8dc403SDave Cobbley    def urldata_init(self, ud, d):
78*eb8dc403SDave Cobbley        if 'protocol' in ud.parm:
79*eb8dc403SDave Cobbley            if ud.parm['protocol'] == 'git':
80*eb8dc403SDave Cobbley                raise bb.fetch2.ParameterError("Invalid protocol - if you wish to fetch from a git repository using http, you need to instead use the git:// prefix with protocol=http", ud.url)
81*eb8dc403SDave Cobbley
82*eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
83*eb8dc403SDave Cobbley            ud.basename = ud.parm['downloadfilename']
84*eb8dc403SDave Cobbley        else:
85*eb8dc403SDave Cobbley            ud.basename = os.path.basename(ud.path)
86*eb8dc403SDave Cobbley
87*eb8dc403SDave Cobbley        ud.localfile = d.expand(urllib.parse.unquote(ud.basename))
88*eb8dc403SDave Cobbley        if not ud.localfile:
89*eb8dc403SDave Cobbley            ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
90*eb8dc403SDave Cobbley
91*eb8dc403SDave Cobbley        self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
92*eb8dc403SDave Cobbley
93*eb8dc403SDave Cobbley    def _runwget(self, ud, d, command, quiet, workdir=None):
94*eb8dc403SDave Cobbley
95*eb8dc403SDave Cobbley        progresshandler = WgetProgressHandler(d)
96*eb8dc403SDave Cobbley
97*eb8dc403SDave Cobbley        logger.debug(2, "Fetching %s using command '%s'" % (ud.url, command))
98*eb8dc403SDave Cobbley        bb.fetch2.check_network_access(d, command, ud.url)
99*eb8dc403SDave Cobbley        runfetchcmd(command + ' --progress=dot -v', d, quiet, log=progresshandler, workdir=workdir)
100*eb8dc403SDave Cobbley
101*eb8dc403SDave Cobbley    def download(self, ud, d):
102*eb8dc403SDave Cobbley        """Fetch urls"""
103*eb8dc403SDave Cobbley
104*eb8dc403SDave Cobbley        fetchcmd = self.basecmd
105*eb8dc403SDave Cobbley
106*eb8dc403SDave Cobbley        if 'downloadfilename' in ud.parm:
107*eb8dc403SDave Cobbley            dldir = d.getVar("DL_DIR")
108*eb8dc403SDave Cobbley            bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
109*eb8dc403SDave Cobbley            fetchcmd += " -O " + dldir + os.sep + ud.localfile
110*eb8dc403SDave Cobbley
111*eb8dc403SDave Cobbley        if ud.user and ud.pswd:
112*eb8dc403SDave Cobbley            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
113*eb8dc403SDave Cobbley
114*eb8dc403SDave Cobbley        uri = ud.url.split(";")[0]
115*eb8dc403SDave Cobbley        if os.path.exists(ud.localpath):
116*eb8dc403SDave Cobbley            # file exists, but we didnt complete it.. trying again..
117*eb8dc403SDave Cobbley            fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
118*eb8dc403SDave Cobbley        else:
119*eb8dc403SDave Cobbley            fetchcmd += d.expand(" -P ${DL_DIR} '%s'" % uri)
120*eb8dc403SDave Cobbley
121*eb8dc403SDave Cobbley        self._runwget(ud, d, fetchcmd, False)
122*eb8dc403SDave Cobbley
123*eb8dc403SDave Cobbley        # Sanity check since wget can pretend it succeed when it didn't
124*eb8dc403SDave Cobbley        # Also, this used to happen if sourceforge sent us to the mirror page
125*eb8dc403SDave Cobbley        if not os.path.exists(ud.localpath):
126*eb8dc403SDave Cobbley            raise FetchError("The fetch command returned success for url %s but %s doesn't exist?!" % (uri, ud.localpath), uri)
127*eb8dc403SDave Cobbley
128*eb8dc403SDave Cobbley        if os.path.getsize(ud.localpath) == 0:
129*eb8dc403SDave Cobbley            os.remove(ud.localpath)
130*eb8dc403SDave Cobbley            raise FetchError("The fetch of %s resulted in a zero size file?! Deleting and failing since this isn't right." % (uri), uri)
131*eb8dc403SDave Cobbley
132*eb8dc403SDave Cobbley        return True
133*eb8dc403SDave Cobbley
134*eb8dc403SDave Cobbley    def checkstatus(self, fetch, ud, d, try_again=True):
135*eb8dc403SDave Cobbley        import urllib.request, urllib.error, urllib.parse, socket, http.client
136*eb8dc403SDave Cobbley        from urllib.response import addinfourl
137*eb8dc403SDave Cobbley        from bb.fetch2 import FetchConnectionCache
138*eb8dc403SDave Cobbley
139*eb8dc403SDave Cobbley        class HTTPConnectionCache(http.client.HTTPConnection):
140*eb8dc403SDave Cobbley            if fetch.connection_cache:
141*eb8dc403SDave Cobbley                def connect(self):
142*eb8dc403SDave Cobbley                    """Connect to the host and port specified in __init__."""
143*eb8dc403SDave Cobbley
144*eb8dc403SDave Cobbley                    sock = fetch.connection_cache.get_connection(self.host, self.port)
145*eb8dc403SDave Cobbley                    if sock:
146*eb8dc403SDave Cobbley                        self.sock = sock
147*eb8dc403SDave Cobbley                    else:
148*eb8dc403SDave Cobbley                        self.sock = socket.create_connection((self.host, self.port),
149*eb8dc403SDave Cobbley                                    self.timeout, self.source_address)
150*eb8dc403SDave Cobbley                        fetch.connection_cache.add_connection(self.host, self.port, self.sock)
151*eb8dc403SDave Cobbley
152*eb8dc403SDave Cobbley                    if self._tunnel_host:
153*eb8dc403SDave Cobbley                        self._tunnel()
154*eb8dc403SDave Cobbley
155*eb8dc403SDave Cobbley        class CacheHTTPHandler(urllib.request.HTTPHandler):
156*eb8dc403SDave Cobbley            def http_open(self, req):
157*eb8dc403SDave Cobbley                return self.do_open(HTTPConnectionCache, req)
158*eb8dc403SDave Cobbley
159*eb8dc403SDave Cobbley            def do_open(self, http_class, req):
160*eb8dc403SDave Cobbley                """Return an addinfourl object for the request, using http_class.
161*eb8dc403SDave Cobbley
162*eb8dc403SDave Cobbley                http_class must implement the HTTPConnection API from httplib.
163*eb8dc403SDave Cobbley                The addinfourl return value is a file-like object.  It also
164*eb8dc403SDave Cobbley                has methods and attributes including:
165*eb8dc403SDave Cobbley                    - info(): return a mimetools.Message object for the headers
166*eb8dc403SDave Cobbley                    - geturl(): return the original request URL
167*eb8dc403SDave Cobbley                    - code: HTTP status code
168*eb8dc403SDave Cobbley                """
169*eb8dc403SDave Cobbley                host = req.host
170*eb8dc403SDave Cobbley                if not host:
171*eb8dc403SDave Cobbley                    raise urlllib2.URLError('no host given')
172*eb8dc403SDave Cobbley
173*eb8dc403SDave Cobbley                h = http_class(host, timeout=req.timeout) # will parse host:port
174*eb8dc403SDave Cobbley                h.set_debuglevel(self._debuglevel)
175*eb8dc403SDave Cobbley
176*eb8dc403SDave Cobbley                headers = dict(req.unredirected_hdrs)
177*eb8dc403SDave Cobbley                headers.update(dict((k, v) for k, v in list(req.headers.items())
178*eb8dc403SDave Cobbley                            if k not in headers))
179*eb8dc403SDave Cobbley
180*eb8dc403SDave Cobbley                # We want to make an HTTP/1.1 request, but the addinfourl
181*eb8dc403SDave Cobbley                # class isn't prepared to deal with a persistent connection.
182*eb8dc403SDave Cobbley                # It will try to read all remaining data from the socket,
183*eb8dc403SDave Cobbley                # which will block while the server waits for the next request.
184*eb8dc403SDave Cobbley                # So make sure the connection gets closed after the (only)
185*eb8dc403SDave Cobbley                # request.
186*eb8dc403SDave Cobbley
187*eb8dc403SDave Cobbley                # Don't close connection when connection_cache is enabled,
188*eb8dc403SDave Cobbley                if fetch.connection_cache is None:
189*eb8dc403SDave Cobbley                    headers["Connection"] = "close"
190*eb8dc403SDave Cobbley                else:
191*eb8dc403SDave Cobbley                    headers["Connection"] = "Keep-Alive" # Works for HTTP/1.0
192*eb8dc403SDave Cobbley
193*eb8dc403SDave Cobbley                headers = dict(
194*eb8dc403SDave Cobbley                    (name.title(), val) for name, val in list(headers.items()))
195*eb8dc403SDave Cobbley
196*eb8dc403SDave Cobbley                if req._tunnel_host:
197*eb8dc403SDave Cobbley                    tunnel_headers = {}
198*eb8dc403SDave Cobbley                    proxy_auth_hdr = "Proxy-Authorization"
199*eb8dc403SDave Cobbley                    if proxy_auth_hdr in headers:
200*eb8dc403SDave Cobbley                        tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
201*eb8dc403SDave Cobbley                        # Proxy-Authorization should not be sent to origin
202*eb8dc403SDave Cobbley                        # server.
203*eb8dc403SDave Cobbley                        del headers[proxy_auth_hdr]
204*eb8dc403SDave Cobbley                    h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
205*eb8dc403SDave Cobbley
206*eb8dc403SDave Cobbley                try:
207*eb8dc403SDave Cobbley                    h.request(req.get_method(), req.selector, req.data, headers)
208*eb8dc403SDave Cobbley                except socket.error as err: # XXX what error?
209*eb8dc403SDave Cobbley                    # Don't close connection when cache is enabled.
210*eb8dc403SDave Cobbley                    # Instead, try to detect connections that are no longer
211*eb8dc403SDave Cobbley                    # usable (for example, closed unexpectedly) and remove
212*eb8dc403SDave Cobbley                    # them from the cache.
213*eb8dc403SDave Cobbley                    if fetch.connection_cache is None:
214*eb8dc403SDave Cobbley                        h.close()
215*eb8dc403SDave Cobbley                    elif isinstance(err, OSError) and err.errno == errno.EBADF:
216*eb8dc403SDave Cobbley                        # This happens when the server closes the connection despite the Keep-Alive.
217*eb8dc403SDave Cobbley                        # Apparently urllib then uses the file descriptor, expecting it to be
218*eb8dc403SDave Cobbley                        # connected, when in reality the connection is already gone.
219*eb8dc403SDave Cobbley                        # We let the request fail and expect it to be
220*eb8dc403SDave Cobbley                        # tried once more ("try_again" in check_status()),
221*eb8dc403SDave Cobbley                        # with the dead connection removed from the cache.
222*eb8dc403SDave Cobbley                        # If it still fails, we give up, which can happend for bad
223*eb8dc403SDave Cobbley                        # HTTP proxy settings.
224*eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
225*eb8dc403SDave Cobbley                    raise urllib.error.URLError(err)
226*eb8dc403SDave Cobbley                else:
227*eb8dc403SDave Cobbley                    try:
228*eb8dc403SDave Cobbley                        r = h.getresponse(buffering=True)
229*eb8dc403SDave Cobbley                    except TypeError: # buffering kw not supported
230*eb8dc403SDave Cobbley                        r = h.getresponse()
231*eb8dc403SDave Cobbley
232*eb8dc403SDave Cobbley                # Pick apart the HTTPResponse object to get the addinfourl
233*eb8dc403SDave Cobbley                # object initialized properly.
234*eb8dc403SDave Cobbley
235*eb8dc403SDave Cobbley                # Wrap the HTTPResponse object in socket's file object adapter
236*eb8dc403SDave Cobbley                # for Windows.  That adapter calls recv(), so delegate recv()
237*eb8dc403SDave Cobbley                # to read().  This weird wrapping allows the returned object to
238*eb8dc403SDave Cobbley                # have readline() and readlines() methods.
239*eb8dc403SDave Cobbley
240*eb8dc403SDave Cobbley                # XXX It might be better to extract the read buffering code
241*eb8dc403SDave Cobbley                # out of socket._fileobject() and into a base class.
242*eb8dc403SDave Cobbley                r.recv = r.read
243*eb8dc403SDave Cobbley
244*eb8dc403SDave Cobbley                # no data, just have to read
245*eb8dc403SDave Cobbley                r.read()
246*eb8dc403SDave Cobbley                class fp_dummy(object):
247*eb8dc403SDave Cobbley                    def read(self):
248*eb8dc403SDave Cobbley                        return ""
249*eb8dc403SDave Cobbley                    def readline(self):
250*eb8dc403SDave Cobbley                        return ""
251*eb8dc403SDave Cobbley                    def close(self):
252*eb8dc403SDave Cobbley                        pass
253*eb8dc403SDave Cobbley                    closed = False
254*eb8dc403SDave Cobbley
255*eb8dc403SDave Cobbley                resp = addinfourl(fp_dummy(), r.msg, req.get_full_url())
256*eb8dc403SDave Cobbley                resp.code = r.status
257*eb8dc403SDave Cobbley                resp.msg = r.reason
258*eb8dc403SDave Cobbley
259*eb8dc403SDave Cobbley                # Close connection when server request it.
260*eb8dc403SDave Cobbley                if fetch.connection_cache is not None:
261*eb8dc403SDave Cobbley                    if 'Connection' in r.msg and r.msg['Connection'] == 'close':
262*eb8dc403SDave Cobbley                        fetch.connection_cache.remove_connection(h.host, h.port)
263*eb8dc403SDave Cobbley
264*eb8dc403SDave Cobbley                return resp
265*eb8dc403SDave Cobbley
266*eb8dc403SDave Cobbley        class HTTPMethodFallback(urllib.request.BaseHandler):
267*eb8dc403SDave Cobbley            """
268*eb8dc403SDave Cobbley            Fallback to GET if HEAD is not allowed (405 HTTP error)
269*eb8dc403SDave Cobbley            """
270*eb8dc403SDave Cobbley            def http_error_405(self, req, fp, code, msg, headers):
271*eb8dc403SDave Cobbley                fp.read()
272*eb8dc403SDave Cobbley                fp.close()
273*eb8dc403SDave Cobbley
274*eb8dc403SDave Cobbley                newheaders = dict((k,v) for k,v in list(req.headers.items())
275*eb8dc403SDave Cobbley                                  if k.lower() not in ("content-length", "content-type"))
276*eb8dc403SDave Cobbley                return self.parent.open(urllib.request.Request(req.get_full_url(),
277*eb8dc403SDave Cobbley                                                        headers=newheaders,
278*eb8dc403SDave Cobbley                                                        origin_req_host=req.origin_req_host,
279*eb8dc403SDave Cobbley                                                        unverifiable=True))
280*eb8dc403SDave Cobbley
281*eb8dc403SDave Cobbley            """
282*eb8dc403SDave Cobbley            Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
283*eb8dc403SDave Cobbley            Forbidden when they actually mean 405 Method Not Allowed.
284*eb8dc403SDave Cobbley            """
285*eb8dc403SDave Cobbley            http_error_403 = http_error_405
286*eb8dc403SDave Cobbley
287*eb8dc403SDave Cobbley
288*eb8dc403SDave Cobbley        class FixedHTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
289*eb8dc403SDave Cobbley            """
290*eb8dc403SDave Cobbley            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
291*eb8dc403SDave Cobbley            when we want to follow redirects using the original method.
292*eb8dc403SDave Cobbley            """
293*eb8dc403SDave Cobbley            def redirect_request(self, req, fp, code, msg, headers, newurl):
294*eb8dc403SDave Cobbley                newreq = urllib.request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
295*eb8dc403SDave Cobbley                newreq.get_method = lambda: req.get_method()
296*eb8dc403SDave Cobbley                return newreq
297*eb8dc403SDave Cobbley        exported_proxies = export_proxies(d)
298*eb8dc403SDave Cobbley
299*eb8dc403SDave Cobbley        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
300*eb8dc403SDave Cobbley        if export_proxies:
301*eb8dc403SDave Cobbley            handlers.append(urllib.request.ProxyHandler())
302*eb8dc403SDave Cobbley        handlers.append(CacheHTTPHandler())
303*eb8dc403SDave Cobbley        # XXX: Since Python 2.7.9 ssl cert validation is enabled by default
304*eb8dc403SDave Cobbley        # see PEP-0476, this causes verification errors on some https servers
305*eb8dc403SDave Cobbley        # so disable by default.
306*eb8dc403SDave Cobbley        import ssl
307*eb8dc403SDave Cobbley        if hasattr(ssl, '_create_unverified_context'):
308*eb8dc403SDave Cobbley            handlers.append(urllib.request.HTTPSHandler(context=ssl._create_unverified_context()))
309*eb8dc403SDave Cobbley        opener = urllib.request.build_opener(*handlers)
310*eb8dc403SDave Cobbley
311*eb8dc403SDave Cobbley        try:
312*eb8dc403SDave Cobbley            uri = ud.url.split(";")[0]
313*eb8dc403SDave Cobbley            r = urllib.request.Request(uri)
314*eb8dc403SDave Cobbley            r.get_method = lambda: "HEAD"
315*eb8dc403SDave Cobbley            # Some servers (FusionForge, as used on Alioth) require that the
316*eb8dc403SDave Cobbley            # optional Accept header is set.
317*eb8dc403SDave Cobbley            r.add_header("Accept", "*/*")
318*eb8dc403SDave Cobbley            def add_basic_auth(login_str, request):
319*eb8dc403SDave Cobbley                '''Adds Basic auth to http request, pass in login:password as string'''
320*eb8dc403SDave Cobbley                import base64
321*eb8dc403SDave Cobbley                encodeuser = base64.b64encode(login_str.encode('utf-8')).decode("utf-8")
322*eb8dc403SDave Cobbley                authheader =  "Basic %s" % encodeuser
323*eb8dc403SDave Cobbley                r.add_header("Authorization", authheader)
324*eb8dc403SDave Cobbley
325*eb8dc403SDave Cobbley            if ud.user:
326*eb8dc403SDave Cobbley                add_basic_auth(ud.user, r)
327*eb8dc403SDave Cobbley
328*eb8dc403SDave Cobbley            try:
329*eb8dc403SDave Cobbley                import netrc, urllib.parse
330*eb8dc403SDave Cobbley                n = netrc.netrc()
331*eb8dc403SDave Cobbley                login, unused, password = n.authenticators(urllib.parse.urlparse(uri).hostname)
332*eb8dc403SDave Cobbley                add_basic_auth("%s:%s" % (login, password), r)
333*eb8dc403SDave Cobbley            except (TypeError, ImportError, IOError, netrc.NetrcParseError):
334*eb8dc403SDave Cobbley                 pass
335*eb8dc403SDave Cobbley
336*eb8dc403SDave Cobbley            with opener.open(r) as response:
337*eb8dc403SDave Cobbley                pass
338*eb8dc403SDave Cobbley        except urllib.error.URLError as e:
339*eb8dc403SDave Cobbley            if try_again:
340*eb8dc403SDave Cobbley                logger.debug(2, "checkstatus: trying again")
341*eb8dc403SDave Cobbley                return self.checkstatus(fetch, ud, d, False)
342*eb8dc403SDave Cobbley            else:
343*eb8dc403SDave Cobbley                # debug for now to avoid spamming the logs in e.g. remote sstate searches
344*eb8dc403SDave Cobbley                logger.debug(2, "checkstatus() urlopen failed: %s" % e)
345*eb8dc403SDave Cobbley                return False
346*eb8dc403SDave Cobbley        return True
347*eb8dc403SDave Cobbley
348*eb8dc403SDave Cobbley    def _parse_path(self, regex, s):
349*eb8dc403SDave Cobbley        """
350*eb8dc403SDave Cobbley        Find and group name, version and archive type in the given string s
351*eb8dc403SDave Cobbley        """
352*eb8dc403SDave Cobbley
353*eb8dc403SDave Cobbley        m = regex.search(s)
354*eb8dc403SDave Cobbley        if m:
355*eb8dc403SDave Cobbley            pname = ''
356*eb8dc403SDave Cobbley            pver = ''
357*eb8dc403SDave Cobbley            ptype = ''
358*eb8dc403SDave Cobbley
359*eb8dc403SDave Cobbley            mdict = m.groupdict()
360*eb8dc403SDave Cobbley            if 'name' in mdict.keys():
361*eb8dc403SDave Cobbley                pname = mdict['name']
362*eb8dc403SDave Cobbley            if 'pver' in mdict.keys():
363*eb8dc403SDave Cobbley                pver = mdict['pver']
364*eb8dc403SDave Cobbley            if 'type' in mdict.keys():
365*eb8dc403SDave Cobbley                ptype = mdict['type']
366*eb8dc403SDave Cobbley
367*eb8dc403SDave Cobbley            bb.debug(3, "_parse_path: %s, %s, %s" % (pname, pver, ptype))
368*eb8dc403SDave Cobbley
369*eb8dc403SDave Cobbley            return (pname, pver, ptype)
370*eb8dc403SDave Cobbley
371*eb8dc403SDave Cobbley        return None
372*eb8dc403SDave Cobbley
373*eb8dc403SDave Cobbley    def _modelate_version(self, version):
374*eb8dc403SDave Cobbley        if version[0] in ['.', '-']:
375*eb8dc403SDave Cobbley            if version[1].isdigit():
376*eb8dc403SDave Cobbley                version = version[1] + version[0] + version[2:len(version)]
377*eb8dc403SDave Cobbley            else:
378*eb8dc403SDave Cobbley                version = version[1:len(version)]
379*eb8dc403SDave Cobbley
380*eb8dc403SDave Cobbley        version = re.sub('-', '.', version)
381*eb8dc403SDave Cobbley        version = re.sub('_', '.', version)
382*eb8dc403SDave Cobbley        version = re.sub('(rc)+', '.1000.', version)
383*eb8dc403SDave Cobbley        version = re.sub('(beta)+', '.100.', version)
384*eb8dc403SDave Cobbley        version = re.sub('(alpha)+', '.10.', version)
385*eb8dc403SDave Cobbley        if version[0] == 'v':
386*eb8dc403SDave Cobbley            version = version[1:len(version)]
387*eb8dc403SDave Cobbley        return version
388*eb8dc403SDave Cobbley
389*eb8dc403SDave Cobbley    def _vercmp(self, old, new):
390*eb8dc403SDave Cobbley        """
391*eb8dc403SDave Cobbley        Check whether 'new' is newer than 'old' version. We use existing vercmp() for the
392*eb8dc403SDave Cobbley        purpose. PE is cleared in comparison as it's not for build, and PR is cleared too
393*eb8dc403SDave Cobbley        for simplicity as it's somehow difficult to get from various upstream format
394*eb8dc403SDave Cobbley        """
395*eb8dc403SDave Cobbley
396*eb8dc403SDave Cobbley        (oldpn, oldpv, oldsuffix) = old
397*eb8dc403SDave Cobbley        (newpn, newpv, newsuffix) = new
398*eb8dc403SDave Cobbley
399*eb8dc403SDave Cobbley        """
400*eb8dc403SDave Cobbley        Check for a new suffix type that we have never heard of before
401*eb8dc403SDave Cobbley        """
402*eb8dc403SDave Cobbley        if (newsuffix):
403*eb8dc403SDave Cobbley            m = self.suffix_regex_comp.search(newsuffix)
404*eb8dc403SDave Cobbley            if not m:
405*eb8dc403SDave Cobbley                bb.warn("%s has a possible unknown suffix: %s" % (newpn, newsuffix))
406*eb8dc403SDave Cobbley                return False
407*eb8dc403SDave Cobbley
408*eb8dc403SDave Cobbley        """
409*eb8dc403SDave Cobbley        Not our package so ignore it
410*eb8dc403SDave Cobbley        """
411*eb8dc403SDave Cobbley        if oldpn != newpn:
412*eb8dc403SDave Cobbley            return False
413*eb8dc403SDave Cobbley
414*eb8dc403SDave Cobbley        oldpv = self._modelate_version(oldpv)
415*eb8dc403SDave Cobbley        newpv = self._modelate_version(newpv)
416*eb8dc403SDave Cobbley
417*eb8dc403SDave Cobbley        return bb.utils.vercmp(("0", oldpv, ""), ("0", newpv, ""))
418*eb8dc403SDave Cobbley
419*eb8dc403SDave Cobbley    def _fetch_index(self, uri, ud, d):
420*eb8dc403SDave Cobbley        """
421*eb8dc403SDave Cobbley        Run fetch checkstatus to get directory information
422*eb8dc403SDave Cobbley        """
423*eb8dc403SDave Cobbley        f = tempfile.NamedTemporaryFile()
424*eb8dc403SDave Cobbley        with tempfile.TemporaryDirectory(prefix="wget-index-") as workdir, tempfile.NamedTemporaryFile(dir=workdir, prefix="wget-listing-") as f:
425*eb8dc403SDave Cobbley            agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Ubuntu/9.10 (karmic) Firefox/3.6.12"
426*eb8dc403SDave Cobbley            fetchcmd = self.basecmd
427*eb8dc403SDave Cobbley            fetchcmd += " -O " + f.name + " --user-agent='" + agent + "' '" + uri + "'"
428*eb8dc403SDave Cobbley            try:
429*eb8dc403SDave Cobbley                self._runwget(ud, d, fetchcmd, True, workdir=workdir)
430*eb8dc403SDave Cobbley                fetchresult = f.read()
431*eb8dc403SDave Cobbley            except bb.fetch2.BBFetchException:
432*eb8dc403SDave Cobbley                fetchresult = ""
433*eb8dc403SDave Cobbley
434*eb8dc403SDave Cobbley        return fetchresult
435*eb8dc403SDave Cobbley
436*eb8dc403SDave Cobbley    def _check_latest_version(self, url, package, package_regex, current_version, ud, d):
437*eb8dc403SDave Cobbley        """
438*eb8dc403SDave Cobbley        Return the latest version of a package inside a given directory path
439*eb8dc403SDave Cobbley        If error or no version, return ""
440*eb8dc403SDave Cobbley        """
441*eb8dc403SDave Cobbley        valid = 0
442*eb8dc403SDave Cobbley        version = ['', '', '']
443*eb8dc403SDave Cobbley
444*eb8dc403SDave Cobbley        bb.debug(3, "VersionURL: %s" % (url))
445*eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
446*eb8dc403SDave Cobbley        if not soup:
447*eb8dc403SDave Cobbley            bb.debug(3, "*** %s NO SOUP" % (url))
448*eb8dc403SDave Cobbley            return ""
449*eb8dc403SDave Cobbley
450*eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
451*eb8dc403SDave Cobbley            bb.debug(3, "line['href'] = '%s'" % (line['href']))
452*eb8dc403SDave Cobbley            bb.debug(3, "line = '%s'" % (str(line)))
453*eb8dc403SDave Cobbley
454*eb8dc403SDave Cobbley            newver = self._parse_path(package_regex, line['href'])
455*eb8dc403SDave Cobbley            if not newver:
456*eb8dc403SDave Cobbley                newver = self._parse_path(package_regex, str(line))
457*eb8dc403SDave Cobbley
458*eb8dc403SDave Cobbley            if newver:
459*eb8dc403SDave Cobbley                bb.debug(3, "Upstream version found: %s" % newver[1])
460*eb8dc403SDave Cobbley                if valid == 0:
461*eb8dc403SDave Cobbley                    version = newver
462*eb8dc403SDave Cobbley                    valid = 1
463*eb8dc403SDave Cobbley                elif self._vercmp(version, newver) < 0:
464*eb8dc403SDave Cobbley                    version = newver
465*eb8dc403SDave Cobbley
466*eb8dc403SDave Cobbley        pupver = re.sub('_', '.', version[1])
467*eb8dc403SDave Cobbley
468*eb8dc403SDave Cobbley        bb.debug(3, "*** %s -> UpstreamVersion = %s (CurrentVersion = %s)" %
469*eb8dc403SDave Cobbley                (package, pupver or "N/A", current_version[1]))
470*eb8dc403SDave Cobbley
471*eb8dc403SDave Cobbley        if valid:
472*eb8dc403SDave Cobbley            return pupver
473*eb8dc403SDave Cobbley
474*eb8dc403SDave Cobbley        return ""
475*eb8dc403SDave Cobbley
476*eb8dc403SDave Cobbley    def _check_latest_version_by_dir(self, dirver, package, package_regex,
477*eb8dc403SDave Cobbley            current_version, ud, d):
478*eb8dc403SDave Cobbley        """
479*eb8dc403SDave Cobbley            Scan every directory in order to get upstream version.
480*eb8dc403SDave Cobbley        """
481*eb8dc403SDave Cobbley        version_dir = ['', '', '']
482*eb8dc403SDave Cobbley        version = ['', '', '']
483*eb8dc403SDave Cobbley
484*eb8dc403SDave Cobbley        dirver_regex = re.compile("(?P<pfx>\D*)(?P<ver>(\d+[\.\-_])+(\d+))")
485*eb8dc403SDave Cobbley        s = dirver_regex.search(dirver)
486*eb8dc403SDave Cobbley        if s:
487*eb8dc403SDave Cobbley            version_dir[1] = s.group('ver')
488*eb8dc403SDave Cobbley        else:
489*eb8dc403SDave Cobbley            version_dir[1] = dirver
490*eb8dc403SDave Cobbley
491*eb8dc403SDave Cobbley        dirs_uri = bb.fetch.encodeurl([ud.type, ud.host,
492*eb8dc403SDave Cobbley                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
493*eb8dc403SDave Cobbley        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
494*eb8dc403SDave Cobbley
495*eb8dc403SDave Cobbley        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
496*eb8dc403SDave Cobbley        if not soup:
497*eb8dc403SDave Cobbley            return version[1]
498*eb8dc403SDave Cobbley
499*eb8dc403SDave Cobbley        for line in soup.find_all('a', href=True):
500*eb8dc403SDave Cobbley            s = dirver_regex.search(line['href'].strip("/"))
501*eb8dc403SDave Cobbley            if s:
502*eb8dc403SDave Cobbley                sver = s.group('ver')
503*eb8dc403SDave Cobbley
504*eb8dc403SDave Cobbley                # When prefix is part of the version directory it need to
505*eb8dc403SDave Cobbley                # ensure that only version directory is used so remove previous
506*eb8dc403SDave Cobbley                # directories if exists.
507*eb8dc403SDave Cobbley                #
508*eb8dc403SDave Cobbley                # Example: pfx = '/dir1/dir2/v' and version = '2.5' the expected
509*eb8dc403SDave Cobbley                # result is v2.5.
510*eb8dc403SDave Cobbley                spfx = s.group('pfx').split('/')[-1]
511*eb8dc403SDave Cobbley
512*eb8dc403SDave Cobbley                version_dir_new = ['', sver, '']
513*eb8dc403SDave Cobbley                if self._vercmp(version_dir, version_dir_new) <= 0:
514*eb8dc403SDave Cobbley                    dirver_new = spfx + sver
515*eb8dc403SDave Cobbley                    path = ud.path.replace(dirver, dirver_new, True) \
516*eb8dc403SDave Cobbley                        .split(package)[0]
517*eb8dc403SDave Cobbley                    uri = bb.fetch.encodeurl([ud.type, ud.host, path,
518*eb8dc403SDave Cobbley                        ud.user, ud.pswd, {}])
519*eb8dc403SDave Cobbley
520*eb8dc403SDave Cobbley                    pupver = self._check_latest_version(uri,
521*eb8dc403SDave Cobbley                            package, package_regex, current_version, ud, d)
522*eb8dc403SDave Cobbley                    if pupver:
523*eb8dc403SDave Cobbley                        version[1] = pupver
524*eb8dc403SDave Cobbley
525*eb8dc403SDave Cobbley                    version_dir = version_dir_new
526*eb8dc403SDave Cobbley
527*eb8dc403SDave Cobbley        return version[1]
528*eb8dc403SDave Cobbley
529*eb8dc403SDave Cobbley    def _init_regexes(self, package, ud, d):
530*eb8dc403SDave Cobbley        """
531*eb8dc403SDave Cobbley        Match as many patterns as possible such as:
532*eb8dc403SDave Cobbley                gnome-common-2.20.0.tar.gz (most common format)
533*eb8dc403SDave Cobbley                gtk+-2.90.1.tar.gz
534*eb8dc403SDave Cobbley                xf86-input-synaptics-12.6.9.tar.gz
535*eb8dc403SDave Cobbley                dri2proto-2.3.tar.gz
536*eb8dc403SDave Cobbley                blktool_4.orig.tar.gz
537*eb8dc403SDave Cobbley                libid3tag-0.15.1b.tar.gz
538*eb8dc403SDave Cobbley                unzip552.tar.gz
539*eb8dc403SDave Cobbley                icu4c-3_6-src.tgz
540*eb8dc403SDave Cobbley                genext2fs_1.3.orig.tar.gz
541*eb8dc403SDave Cobbley                gst-fluendo-mp3
542*eb8dc403SDave Cobbley        """
543*eb8dc403SDave Cobbley        # match most patterns which uses "-" as separator to version digits
544*eb8dc403SDave Cobbley        pn_prefix1 = "[a-zA-Z][a-zA-Z0-9]*([-_][a-zA-Z]\w+)*\+?[-_]"
545*eb8dc403SDave Cobbley        # a loose pattern such as for unzip552.tar.gz
546*eb8dc403SDave Cobbley        pn_prefix2 = "[a-zA-Z]+"
547*eb8dc403SDave Cobbley        # a loose pattern such as for 80325-quicky-0.4.tar.gz
548*eb8dc403SDave Cobbley        pn_prefix3 = "[0-9]+[-]?[a-zA-Z]+"
549*eb8dc403SDave Cobbley        # Save the Package Name (pn) Regex for use later
550*eb8dc403SDave Cobbley        pn_regex = "(%s|%s|%s)" % (pn_prefix1, pn_prefix2, pn_prefix3)
551*eb8dc403SDave Cobbley
552*eb8dc403SDave Cobbley        # match version
553*eb8dc403SDave Cobbley        pver_regex = "(([A-Z]*\d+[a-zA-Z]*[\.\-_]*)+)"
554*eb8dc403SDave Cobbley
555*eb8dc403SDave Cobbley        # match arch
556*eb8dc403SDave Cobbley        parch_regex = "-source|_all_"
557*eb8dc403SDave Cobbley
558*eb8dc403SDave Cobbley        # src.rpm extension was added only for rpm package. Can be removed if the rpm
559*eb8dc403SDave Cobbley        # packaged will always be considered as having to be manually upgraded
560*eb8dc403SDave Cobbley        psuffix_regex = "(tar\.gz|tgz|tar\.bz2|zip|xz|tar\.lz|rpm|bz2|orig\.tar\.gz|tar\.xz|src\.tar\.gz|src\.tgz|svnr\d+\.tar\.bz2|stable\.tar\.gz|src\.rpm)"
561*eb8dc403SDave Cobbley
562*eb8dc403SDave Cobbley        # match name, version and archive type of a package
563*eb8dc403SDave Cobbley        package_regex_comp = re.compile("(?P<name>%s?\.?v?)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s$)"
564*eb8dc403SDave Cobbley                                                    % (pn_regex, pver_regex, parch_regex, psuffix_regex))
565*eb8dc403SDave Cobbley        self.suffix_regex_comp = re.compile(psuffix_regex)
566*eb8dc403SDave Cobbley
567*eb8dc403SDave Cobbley        # compile regex, can be specific by package or generic regex
568*eb8dc403SDave Cobbley        pn_regex = d.getVar('UPSTREAM_CHECK_REGEX')
569*eb8dc403SDave Cobbley        if pn_regex:
570*eb8dc403SDave Cobbley            package_custom_regex_comp = re.compile(pn_regex)
571*eb8dc403SDave Cobbley        else:
572*eb8dc403SDave Cobbley            version = self._parse_path(package_regex_comp, package)
573*eb8dc403SDave Cobbley            if version:
574*eb8dc403SDave Cobbley                package_custom_regex_comp = re.compile(
575*eb8dc403SDave Cobbley                    "(?P<name>%s)(?P<pver>%s)(?P<arch>%s)?[\.-](?P<type>%s)" %
576*eb8dc403SDave Cobbley                    (re.escape(version[0]), pver_regex, parch_regex, psuffix_regex))
577*eb8dc403SDave Cobbley            else:
578*eb8dc403SDave Cobbley                package_custom_regex_comp = None
579*eb8dc403SDave Cobbley
580*eb8dc403SDave Cobbley        return package_custom_regex_comp
581*eb8dc403SDave Cobbley
582*eb8dc403SDave Cobbley    def latest_versionstring(self, ud, d):
583*eb8dc403SDave Cobbley        """
584*eb8dc403SDave Cobbley        Manipulate the URL and try to obtain the latest package version
585*eb8dc403SDave Cobbley
586*eb8dc403SDave Cobbley        sanity check to ensure same name and type.
587*eb8dc403SDave Cobbley        """
588*eb8dc403SDave Cobbley        package = ud.path.split("/")[-1]
589*eb8dc403SDave Cobbley        current_version = ['', d.getVar('PV'), '']
590*eb8dc403SDave Cobbley
591*eb8dc403SDave Cobbley        """possible to have no version in pkg name, such as spectrum-fw"""
592*eb8dc403SDave Cobbley        if not re.search("\d+", package):
593*eb8dc403SDave Cobbley            current_version[1] = re.sub('_', '.', current_version[1])
594*eb8dc403SDave Cobbley            current_version[1] = re.sub('-', '.', current_version[1])
595*eb8dc403SDave Cobbley            return (current_version[1], '')
596*eb8dc403SDave Cobbley
597*eb8dc403SDave Cobbley        package_regex = self._init_regexes(package, ud, d)
598*eb8dc403SDave Cobbley        if package_regex is None:
599*eb8dc403SDave Cobbley            bb.warn("latest_versionstring: package %s don't match pattern" % (package))
600*eb8dc403SDave Cobbley            return ('', '')
601*eb8dc403SDave Cobbley        bb.debug(3, "latest_versionstring, regex: %s" % (package_regex.pattern))
602*eb8dc403SDave Cobbley
603*eb8dc403SDave Cobbley        uri = ""
604*eb8dc403SDave Cobbley        regex_uri = d.getVar("UPSTREAM_CHECK_URI")
605*eb8dc403SDave Cobbley        if not regex_uri:
606*eb8dc403SDave Cobbley            path = ud.path.split(package)[0]
607*eb8dc403SDave Cobbley
608*eb8dc403SDave Cobbley            # search for version matches on folders inside the path, like:
609*eb8dc403SDave Cobbley            # "5.7" in http://download.gnome.org/sources/${PN}/5.7/${PN}-${PV}.tar.gz
610*eb8dc403SDave Cobbley            dirver_regex = re.compile("(?P<dirver>[^/]*(\d+\.)*\d+([-_]r\d+)*)/")
611*eb8dc403SDave Cobbley            m = dirver_regex.search(path)
612*eb8dc403SDave Cobbley            if m:
613*eb8dc403SDave Cobbley                pn = d.getVar('PN')
614*eb8dc403SDave Cobbley                dirver = m.group('dirver')
615*eb8dc403SDave Cobbley
616*eb8dc403SDave Cobbley                dirver_pn_regex = re.compile("%s\d?" % (re.escape(pn)))
617*eb8dc403SDave Cobbley                if not dirver_pn_regex.search(dirver):
618*eb8dc403SDave Cobbley                    return (self._check_latest_version_by_dir(dirver,
619*eb8dc403SDave Cobbley                        package, package_regex, current_version, ud, d), '')
620*eb8dc403SDave Cobbley
621*eb8dc403SDave Cobbley            uri = bb.fetch.encodeurl([ud.type, ud.host, path, ud.user, ud.pswd, {}])
622*eb8dc403SDave Cobbley        else:
623*eb8dc403SDave Cobbley            uri = regex_uri
624*eb8dc403SDave Cobbley
625*eb8dc403SDave Cobbley        return (self._check_latest_version(uri, package, package_regex,
626*eb8dc403SDave Cobbley                current_version, ud, d), '')
627