xref: /openbmc/openbmc/poky/meta/lib/oe/sstatesig.py (revision 2f814a6d)
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6import bb.siggen
7import bb.runqueue
8import oe
9import netrc
10
11def sstate_rundepfilter(siggen, fn, recipename, task, dep, depname, dataCaches):
12    # Return True if we should keep the dependency, False to drop it
13    def isNative(x):
14        return x.endswith("-native")
15    def isCross(x):
16        return "-cross-" in x
17    def isNativeSDK(x):
18        return x.startswith("nativesdk-")
19    def isKernel(mc, fn):
20        inherits = " ".join(dataCaches[mc].inherits[fn])
21        return inherits.find("/module-base.bbclass") != -1 or inherits.find("/linux-kernel-base.bbclass") != -1
22    def isPackageGroup(mc, fn):
23        inherits = " ".join(dataCaches[mc].inherits[fn])
24        return "/packagegroup.bbclass" in inherits
25    def isAllArch(mc, fn):
26        inherits = " ".join(dataCaches[mc].inherits[fn])
27        return "/allarch.bbclass" in inherits
28    def isImage(mc, fn):
29        return "/image.bbclass" in " ".join(dataCaches[mc].inherits[fn])
30
31    depmc, _, deptaskname, depmcfn = bb.runqueue.split_tid_mcfn(dep)
32    mc, _ = bb.runqueue.split_mc(fn)
33
34    # We can skip the rm_work task signature to avoid running the task
35    # when we remove some tasks from the dependencie chain
36    # i.e INHERIT:remove = "create-spdx" will trigger the do_rm_work
37    if task == "do_rm_work":
38        return False
39
40    # (Almost) always include our own inter-task dependencies (unless it comes
41    # from a mcdepends). The exception is the special
42    # do_kernel_configme->do_unpack_and_patch dependency from archiver.bbclass.
43    if recipename == depname and depmc == mc:
44        if task == "do_kernel_configme" and deptaskname == "do_unpack_and_patch":
45            return False
46        return True
47
48    # Exclude well defined recipe->dependency
49    if "%s->%s" % (recipename, depname) in siggen.saferecipedeps:
50        return False
51
52    # Check for special wildcard
53    if "*->%s" % depname in siggen.saferecipedeps and recipename != depname:
54        return False
55
56    # Don't change native/cross/nativesdk recipe dependencies any further
57    if isNative(recipename) or isCross(recipename) or isNativeSDK(recipename):
58        return True
59
60    # Only target packages beyond here
61
62    # allarch packagegroups are assumed to have well behaved names which don't change between architecures/tunes
63    if isPackageGroup(mc, fn) and isAllArch(mc, fn) and not isNative(depname):
64        return False
65
66    # Exclude well defined machine specific configurations which don't change ABI
67    if depname in siggen.abisaferecipes and not isImage(mc, fn):
68        return False
69
70    # Kernel modules are well namespaced. We don't want to depend on the kernel's checksum
71    # if we're just doing an RRECOMMENDS:xxx = "kernel-module-*", not least because the checksum
72    # is machine specific.
73    # Therefore if we're not a kernel or a module recipe (inheriting the kernel classes)
74    # and we reccomend a kernel-module, we exclude the dependency.
75    if dataCaches and isKernel(depmc, depmcfn) and not isKernel(mc, fn):
76        for pkg in dataCaches[mc].runrecs[fn]:
77            if " ".join(dataCaches[mc].runrecs[fn][pkg]).find("kernel-module-") != -1:
78                return False
79
80    # Default to keep dependencies
81    return True
82
83def sstate_lockedsigs(d):
84    sigs = {}
85    types = (d.getVar("SIGGEN_LOCKEDSIGS_TYPES") or "").split()
86    for t in types:
87        siggen_lockedsigs_var = "SIGGEN_LOCKEDSIGS_%s" % t
88        lockedsigs = (d.getVar(siggen_lockedsigs_var) or "").split()
89        for ls in lockedsigs:
90            pn, task, h = ls.split(":", 2)
91            if pn not in sigs:
92                sigs[pn] = {}
93            sigs[pn][task] = [h, siggen_lockedsigs_var]
94    return sigs
95
96class SignatureGeneratorOEBasicHashMixIn(object):
97    supports_multiconfig_datacaches = True
98
99    def init_rundepcheck(self, data):
100        self.abisaferecipes = (data.getVar("SIGGEN_EXCLUDERECIPES_ABISAFE") or "").split()
101        self.saferecipedeps = (data.getVar("SIGGEN_EXCLUDE_SAFE_RECIPE_DEPS") or "").split()
102        self.lockedsigs = sstate_lockedsigs(data)
103        self.lockedhashes = {}
104        self.lockedpnmap = {}
105        self.lockedhashfn = {}
106        self.machine = data.getVar("MACHINE")
107        self.mismatch_msgs = []
108        self.mismatch_number = 0
109        self.lockedsigs_msgs = ""
110        self.unlockedrecipes = (data.getVar("SIGGEN_UNLOCKED_RECIPES") or
111                                "").split()
112        self.unlockedrecipes = { k: "" for k in self.unlockedrecipes }
113        self._internal = False
114        pass
115
116    def tasks_resolved(self, virtmap, virtpnmap, dataCache):
117        # Translate virtual/xxx entries to PN values
118        newabisafe = []
119        for a in self.abisaferecipes:
120            if a in virtpnmap:
121                newabisafe.append(virtpnmap[a])
122            else:
123                newabisafe.append(a)
124        self.abisaferecipes = newabisafe
125        newsafedeps = []
126        for a in self.saferecipedeps:
127            a1, a2 = a.split("->")
128            if a1 in virtpnmap:
129                a1 = virtpnmap[a1]
130            if a2 in virtpnmap:
131                a2 = virtpnmap[a2]
132            newsafedeps.append(a1 + "->" + a2)
133        self.saferecipedeps = newsafedeps
134
135    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches = None):
136        return sstate_rundepfilter(self, fn, recipename, task, dep, depname, dataCaches)
137
138    def get_taskdata(self):
139        return (self.lockedpnmap, self.lockedhashfn, self.lockedhashes) + super().get_taskdata()
140
141    def set_taskdata(self, data):
142        self.lockedpnmap, self.lockedhashfn, self.lockedhashes = data[:3]
143        super().set_taskdata(data[3:])
144
145    def dump_sigs(self, dataCache, options):
146        if 'lockedsigs' in options:
147            sigfile = os.getcwd() + "/locked-sigs.inc"
148            bb.plain("Writing locked sigs to %s" % sigfile)
149            self.dump_lockedsigs(sigfile)
150        return super(bb.siggen.SignatureGeneratorBasicHash, self).dump_sigs(dataCache, options)
151
152
153    def get_taskhash(self, tid, deps, dataCaches):
154        if tid in self.lockedhashes:
155            if self.lockedhashes[tid]:
156                return self.lockedhashes[tid]
157            else:
158                return super().get_taskhash(tid, deps, dataCaches)
159
160        h = super().get_taskhash(tid, deps, dataCaches)
161
162        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
163
164        recipename = dataCaches[mc].pkg_fn[fn]
165        self.lockedpnmap[fn] = recipename
166        self.lockedhashfn[fn] = dataCaches[mc].hashfn[fn]
167
168        unlocked = False
169        if recipename in self.unlockedrecipes:
170            unlocked = True
171        else:
172            def recipename_from_dep(dep):
173                (depmc, _, _, depfn) = bb.runqueue.split_tid_mcfn(dep)
174                return dataCaches[depmc].pkg_fn[depfn]
175
176            # If any unlocked recipe is in the direct dependencies then the
177            # current recipe should be unlocked as well.
178            depnames = [ recipename_from_dep(x) for x in deps if mc == bb.runqueue.mc_from_tid(x)]
179            if any(x in y for y in depnames for x in self.unlockedrecipes):
180                self.unlockedrecipes[recipename] = ''
181                unlocked = True
182
183        if not unlocked and recipename in self.lockedsigs:
184            if task in self.lockedsigs[recipename]:
185                h_locked = self.lockedsigs[recipename][task][0]
186                var = self.lockedsigs[recipename][task][1]
187                self.lockedhashes[tid] = h_locked
188                self._internal = True
189                unihash = self.get_unihash(tid)
190                self._internal = False
191                #bb.warn("Using %s %s %s" % (recipename, task, h))
192
193                if h != h_locked and h_locked != unihash:
194                    self.mismatch_number += 1
195                    self.mismatch_msgs.append('The %s:%s sig is computed to be %s, but the sig is locked to %s in %s'
196                                          % (recipename, task, h, h_locked, var))
197
198                return h_locked
199
200        self.lockedhashes[tid] = False
201        #bb.warn("%s %s %s" % (recipename, task, h))
202        return h
203
204    def get_stampfile_hash(self, tid):
205        if tid in self.lockedhashes and self.lockedhashes[tid]:
206            return self.lockedhashes[tid]
207        return super().get_stampfile_hash(tid)
208
209    def get_cached_unihash(self, tid):
210        if tid in self.lockedhashes and self.lockedhashes[tid] and not self._internal:
211            return self.lockedhashes[tid]
212        return super().get_cached_unihash(tid)
213
214    def dump_sigtask(self, fn, task, stampbase, runtime):
215        tid = fn + ":" + task
216        if tid in self.lockedhashes and self.lockedhashes[tid]:
217            return
218        super(bb.siggen.SignatureGeneratorBasicHash, self).dump_sigtask(fn, task, stampbase, runtime)
219
220    def dump_lockedsigs(self, sigfile, taskfilter=None):
221        types = {}
222        for tid in self.runtaskdeps:
223            # Bitbake changed this to a tuple in newer versions
224            if isinstance(tid, tuple):
225                tid = tid[1]
226            if taskfilter:
227                if not tid in taskfilter:
228                    continue
229            fn = bb.runqueue.fn_from_tid(tid)
230            t = self.lockedhashfn[fn].split(" ")[1].split(":")[5]
231            t = 't-' + t.replace('_', '-')
232            if t not in types:
233                types[t] = []
234            types[t].append(tid)
235
236        with open(sigfile, "w") as f:
237            l = sorted(types)
238            for t in l:
239                f.write('SIGGEN_LOCKEDSIGS_%s = "\\\n' % t)
240                types[t].sort()
241                sortedtid = sorted(types[t], key=lambda tid: self.lockedpnmap[bb.runqueue.fn_from_tid(tid)])
242                for tid in sortedtid:
243                    (_, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
244                    if tid not in self.taskhash:
245                        continue
246                    f.write("    " + self.lockedpnmap[fn] + ":" + task + ":" + self.get_unihash(tid) + " \\\n")
247                f.write('    "\n')
248            f.write('SIGGEN_LOCKEDSIGS_TYPES:%s = "%s"' % (self.machine, " ".join(l)))
249
250    def dump_siglist(self, sigfile, path_prefix_strip=None):
251        def strip_fn(fn):
252            nonlocal path_prefix_strip
253            if not path_prefix_strip:
254                return fn
255
256            fn_exp = fn.split(":")
257            if fn_exp[-1].startswith(path_prefix_strip):
258                fn_exp[-1] = fn_exp[-1][len(path_prefix_strip):]
259
260            return ":".join(fn_exp)
261
262        with open(sigfile, "w") as f:
263            tasks = []
264            for taskitem in self.taskhash:
265                (fn, task) = taskitem.rsplit(":", 1)
266                pn = self.lockedpnmap[fn]
267                tasks.append((pn, task, strip_fn(fn), self.taskhash[taskitem]))
268            for (pn, task, fn, taskhash) in sorted(tasks):
269                f.write('%s:%s %s %s\n' % (pn, task, fn, taskhash))
270
271    def checkhashes(self, sq_data, missed, found, d):
272        warn_msgs = []
273        error_msgs = []
274        sstate_missing_msgs = []
275        info_msgs = None
276
277        if self.lockedsigs:
278            if len(self.lockedsigs) > 10:
279                self.lockedsigs_msgs = "There are %s recipes with locked tasks (%s task(s) have non matching signature)" % (len(self.lockedsigs), self.mismatch_number)
280            else:
281                self.lockedsigs_msgs = "The following recipes have locked tasks:"
282                for pn in self.lockedsigs:
283                    self.lockedsigs_msgs += " %s" % (pn)
284
285        for tid in sq_data['hash']:
286            if tid not in found:
287                for pn in self.lockedsigs:
288                    taskname = bb.runqueue.taskname_from_tid(tid)
289                    if sq_data['hash'][tid] in iter(self.lockedsigs[pn].values()):
290                        if taskname == 'do_shared_workdir':
291                            continue
292                        sstate_missing_msgs.append("Locked sig is set for %s:%s (%s) yet not in sstate cache?"
293                                               % (pn, taskname, sq_data['hash'][tid]))
294
295        checklevel = d.getVar("SIGGEN_LOCKEDSIGS_TASKSIG_CHECK")
296        if checklevel == 'info':
297            info_msgs = self.lockedsigs_msgs
298        if checklevel == 'warn' or checklevel == 'info':
299            warn_msgs += self.mismatch_msgs
300        elif checklevel == 'error':
301            error_msgs += self.mismatch_msgs
302
303        checklevel = d.getVar("SIGGEN_LOCKEDSIGS_SSTATE_EXISTS_CHECK")
304        if checklevel == 'warn':
305            warn_msgs += sstate_missing_msgs
306        elif checklevel == 'error':
307            error_msgs += sstate_missing_msgs
308
309        if info_msgs:
310            bb.note(info_msgs)
311        if warn_msgs:
312            bb.warn("\n".join(warn_msgs))
313        if error_msgs:
314            bb.fatal("\n".join(error_msgs))
315
316class SignatureGeneratorOEBasicHash(SignatureGeneratorOEBasicHashMixIn, bb.siggen.SignatureGeneratorBasicHash):
317    name = "OEBasicHash"
318
319class SignatureGeneratorOEEquivHash(SignatureGeneratorOEBasicHashMixIn, bb.siggen.SignatureGeneratorUniHashMixIn, bb.siggen.SignatureGeneratorBasicHash):
320    name = "OEEquivHash"
321
322    def init_rundepcheck(self, data):
323        super().init_rundepcheck(data)
324        self.server = data.getVar('BB_HASHSERVE')
325        if not self.server:
326            bb.fatal("OEEquivHash requires BB_HASHSERVE to be set")
327        self.method = data.getVar('SSTATE_HASHEQUIV_METHOD')
328        if not self.method:
329            bb.fatal("OEEquivHash requires SSTATE_HASHEQUIV_METHOD to be set")
330        self.max_parallel = int(data.getVar('BB_HASHSERVE_MAX_PARALLEL') or 1)
331        self.username = data.getVar("BB_HASHSERVE_USERNAME")
332        self.password = data.getVar("BB_HASHSERVE_PASSWORD")
333        if not self.username or not self.password:
334            try:
335                n = netrc.netrc()
336                auth = n.authenticators(self.server)
337                if auth is not None:
338                    self.username, _, self.password = auth
339            except FileNotFoundError:
340                pass
341            except netrc.NetrcParseError as e:
342                bb.warn("Error parsing %s:%s: %s" % (e.filename, str(e.lineno), e.msg))
343
344# Insert these classes into siggen's namespace so it can see and select them
345bb.siggen.SignatureGeneratorOEBasicHash = SignatureGeneratorOEBasicHash
346bb.siggen.SignatureGeneratorOEEquivHash = SignatureGeneratorOEEquivHash
347
348
349def find_siginfo(pn, taskname, taskhashlist, d):
350    """ Find signature data files for comparison purposes """
351
352    import fnmatch
353    import glob
354
355    if not taskname:
356        # We have to derive pn and taskname
357        key = pn
358        if key.startswith("mc:"):
359           # mc:<mc>:<pn>:<task>
360           _, _, pn, taskname = key.split(':', 3)
361        else:
362           # <pn>:<task>
363           pn, taskname = key.split(':', 1)
364
365    hashfiles = {}
366
367    def get_hashval(siginfo):
368        if siginfo.endswith('.siginfo'):
369            return siginfo.rpartition(':')[2].partition('_')[0]
370        else:
371            return siginfo.rpartition('.')[2]
372
373    def get_time(fullpath):
374        return os.stat(fullpath).st_mtime
375
376    # First search in stamps dir
377    localdata = d.createCopy()
378    localdata.setVar('MULTIMACH_TARGET_SYS', '*')
379    localdata.setVar('PN', pn)
380    localdata.setVar('PV', '*')
381    localdata.setVar('PR', '*')
382    localdata.setVar('EXTENDPE', '')
383    stamp = localdata.getVar('STAMP')
384    if pn.startswith("gcc-source"):
385        # gcc-source shared workdir is a special case :(
386        stamp = localdata.expand("${STAMPS_DIR}/work-shared/gcc-${PV}-${PR}")
387
388    filespec = '%s.%s.sigdata.*' % (stamp, taskname)
389    foundall = False
390    import glob
391    bb.debug(1, "Calling glob.glob on {}".format(filespec))
392    for fullpath in glob.glob(filespec):
393        match = False
394        if taskhashlist:
395            for taskhash in taskhashlist:
396                if fullpath.endswith('.%s' % taskhash):
397                    hashfiles[taskhash] = {'path':fullpath, 'sstate':False, 'time':get_time(fullpath)}
398                    if len(hashfiles) == len(taskhashlist):
399                        foundall = True
400                        break
401        else:
402            hashval = get_hashval(fullpath)
403            hashfiles[hashval] = {'path':fullpath, 'sstate':False, 'time':get_time(fullpath)}
404
405    if not taskhashlist or (len(hashfiles) < 2 and not foundall):
406        # That didn't work, look in sstate-cache
407        hashes = taskhashlist or ['?' * 64]
408        localdata = bb.data.createCopy(d)
409        for hashval in hashes:
410            localdata.setVar('PACKAGE_ARCH', '*')
411            localdata.setVar('TARGET_VENDOR', '*')
412            localdata.setVar('TARGET_OS', '*')
413            localdata.setVar('PN', pn)
414            # gcc-source is a special case, same as with local stamps above
415            if pn.startswith("gcc-source"):
416                localdata.setVar('PN', "gcc")
417            localdata.setVar('PV', '*')
418            localdata.setVar('PR', '*')
419            localdata.setVar('BB_TASKHASH', hashval)
420            localdata.setVar('SSTATE_CURRTASK', taskname[3:])
421            swspec = localdata.getVar('SSTATE_SWSPEC')
422            if taskname in ['do_fetch', 'do_unpack', 'do_patch', 'do_populate_lic', 'do_preconfigure'] and swspec:
423                localdata.setVar('SSTATE_PKGSPEC', '${SSTATE_SWSPEC}')
424            elif pn.endswith('-native') or "-cross-" in pn or "-crosssdk-" in pn:
425                localdata.setVar('SSTATE_EXTRAPATH', "${NATIVELSBSTRING}/")
426            filespec = '%s.siginfo' % localdata.getVar('SSTATE_PKG')
427
428            bb.debug(1, "Calling glob.glob on {}".format(filespec))
429            matchedfiles = glob.glob(filespec)
430            for fullpath in matchedfiles:
431                actual_hashval = get_hashval(fullpath)
432                if actual_hashval in hashfiles:
433                    continue
434                hashfiles[actual_hashval] = {'path':fullpath, 'sstate':True, 'time':get_time(fullpath)}
435
436    return hashfiles
437
438bb.siggen.find_siginfo = find_siginfo
439bb.siggen.find_siginfo_version = 2
440
441
442def sstate_get_manifest_filename(task, d):
443    """
444    Return the sstate manifest file path for a particular task.
445    Also returns the datastore that can be used to query related variables.
446    """
447    d2 = d.createCopy()
448    extrainf = d.getVarFlag("do_" + task, 'stamp-extra-info')
449    if extrainf:
450        d2.setVar("SSTATE_MANMACH", extrainf)
451    return (d2.expand("${SSTATE_MANFILEPREFIX}.%s" % task), d2)
452
453def find_sstate_manifest(taskdata, taskdata2, taskname, d, multilibcache):
454    d2 = d
455    variant = ''
456    curr_variant = ''
457    if d.getVar("BBEXTENDCURR") == "multilib":
458        curr_variant = d.getVar("BBEXTENDVARIANT")
459        if "virtclass-multilib" not in d.getVar("OVERRIDES"):
460            curr_variant = "invalid"
461    if taskdata2.startswith("virtual:multilib"):
462        variant = taskdata2.split(":")[2]
463    if curr_variant != variant:
464        if variant not in multilibcache:
465            multilibcache[variant] = oe.utils.get_multilib_datastore(variant, d)
466        d2 = multilibcache[variant]
467
468    if taskdata.endswith("-native"):
469        pkgarchs = ["${BUILD_ARCH}", "${BUILD_ARCH}_${ORIGNATIVELSBSTRING}"]
470    elif taskdata.startswith("nativesdk-"):
471        pkgarchs = ["${SDK_ARCH}_${SDK_OS}", "allarch"]
472    elif "-cross-canadian" in taskdata:
473        pkgarchs = ["${SDK_ARCH}_${SDK_ARCH}-${SDKPKGSUFFIX}"]
474    elif "-cross-" in taskdata:
475        pkgarchs = ["${BUILD_ARCH}"]
476    elif "-crosssdk" in taskdata:
477        pkgarchs = ["${BUILD_ARCH}_${SDK_ARCH}_${SDK_OS}"]
478    else:
479        pkgarchs = ['${MACHINE_ARCH}']
480        pkgarchs = pkgarchs + list(reversed(d2.getVar("PACKAGE_EXTRA_ARCHS").split()))
481        pkgarchs.append('allarch')
482        pkgarchs.append('${SDK_ARCH}_${SDK_ARCH}-${SDKPKGSUFFIX}')
483
484    searched_manifests = []
485
486    for pkgarch in pkgarchs:
487        manifest = d2.expand("${SSTATE_MANIFESTS}/manifest-%s-%s.%s" % (pkgarch, taskdata, taskname))
488        if os.path.exists(manifest):
489            return manifest, d2
490        searched_manifests.append(manifest)
491    bb.fatal("The sstate manifest for task '%s:%s' (multilib variant '%s') could not be found.\nThe pkgarchs considered were: %s.\nBut none of these manifests exists:\n    %s"
492            % (taskdata, taskname, variant, d2.expand(", ".join(pkgarchs)),"\n    ".join(searched_manifests)))
493    return None, d2
494
495def OEOuthashBasic(path, sigfile, task, d):
496    """
497    Basic output hash function
498
499    Calculates the output hash of a task by hashing all output file metadata,
500    and file contents.
501    """
502    import hashlib
503    import stat
504    import pwd
505    import grp
506    import re
507    import fnmatch
508
509    def update_hash(s):
510        s = s.encode('utf-8')
511        h.update(s)
512        if sigfile:
513            sigfile.write(s)
514
515    h = hashlib.sha256()
516    prev_dir = os.getcwd()
517    corebase = d.getVar("COREBASE")
518    tmpdir = d.getVar("TMPDIR")
519    include_owners = os.environ.get('PSEUDO_DISABLED') == '0'
520    if "package_write_" in task or task == "package_qa":
521        include_owners = False
522    include_timestamps = False
523    include_root = True
524    if task == "package":
525        include_timestamps = True
526        include_root = False
527    hash_version = d.getVar('HASHEQUIV_HASH_VERSION')
528    extra_sigdata = d.getVar("HASHEQUIV_EXTRA_SIGDATA")
529
530    filemaps = {}
531    for m in (d.getVar('SSTATE_HASHEQUIV_FILEMAP') or '').split():
532        entry = m.split(":")
533        if len(entry) != 3 or entry[0] != task:
534            continue
535        filemaps.setdefault(entry[1], [])
536        filemaps[entry[1]].append(entry[2])
537
538    try:
539        os.chdir(path)
540        basepath = os.path.normpath(path)
541
542        update_hash("OEOuthashBasic\n")
543        if hash_version:
544            update_hash(hash_version + "\n")
545
546        if extra_sigdata:
547            update_hash(extra_sigdata + "\n")
548
549        # It is only currently useful to get equivalent hashes for things that
550        # can be restored from sstate. Since the sstate object is named using
551        # SSTATE_PKGSPEC and the task name, those should be included in the
552        # output hash calculation.
553        update_hash("SSTATE_PKGSPEC=%s\n" % d.getVar('SSTATE_PKGSPEC'))
554        update_hash("task=%s\n" % task)
555
556        for root, dirs, files in os.walk('.', topdown=True):
557            # Sort directories to ensure consistent ordering when recursing
558            dirs.sort()
559            files.sort()
560
561            def process(path):
562                s = os.lstat(path)
563
564                if stat.S_ISDIR(s.st_mode):
565                    update_hash('d')
566                elif stat.S_ISCHR(s.st_mode):
567                    update_hash('c')
568                elif stat.S_ISBLK(s.st_mode):
569                    update_hash('b')
570                elif stat.S_ISSOCK(s.st_mode):
571                    update_hash('s')
572                elif stat.S_ISLNK(s.st_mode):
573                    update_hash('l')
574                elif stat.S_ISFIFO(s.st_mode):
575                    update_hash('p')
576                else:
577                    update_hash('-')
578
579                def add_perm(mask, on, off='-'):
580                    if mask & s.st_mode:
581                        update_hash(on)
582                    else:
583                        update_hash(off)
584
585                add_perm(stat.S_IRUSR, 'r')
586                add_perm(stat.S_IWUSR, 'w')
587                if stat.S_ISUID & s.st_mode:
588                    add_perm(stat.S_IXUSR, 's', 'S')
589                else:
590                    add_perm(stat.S_IXUSR, 'x')
591
592                if include_owners:
593                    # Group/other permissions are only relevant in pseudo context
594                    add_perm(stat.S_IRGRP, 'r')
595                    add_perm(stat.S_IWGRP, 'w')
596                    if stat.S_ISGID & s.st_mode:
597                        add_perm(stat.S_IXGRP, 's', 'S')
598                    else:
599                        add_perm(stat.S_IXGRP, 'x')
600
601                    add_perm(stat.S_IROTH, 'r')
602                    add_perm(stat.S_IWOTH, 'w')
603                    if stat.S_ISVTX & s.st_mode:
604                        update_hash('t')
605                    else:
606                        add_perm(stat.S_IXOTH, 'x')
607
608                    try:
609                        update_hash(" %10s" % pwd.getpwuid(s.st_uid).pw_name)
610                        update_hash(" %10s" % grp.getgrgid(s.st_gid).gr_name)
611                    except KeyError as e:
612                        msg = ("KeyError: %s\nPath %s is owned by uid %d, gid %d, which doesn't match "
613                            "any user/group on target. This may be due to host contamination." %
614                            (e, os.path.abspath(path), s.st_uid, s.st_gid))
615                        raise Exception(msg).with_traceback(e.__traceback__)
616
617                if include_timestamps:
618                    update_hash(" %10d" % s.st_mtime)
619
620                update_hash(" ")
621                if stat.S_ISBLK(s.st_mode) or stat.S_ISCHR(s.st_mode):
622                    update_hash("%9s" % ("%d.%d" % (os.major(s.st_rdev), os.minor(s.st_rdev))))
623                else:
624                    update_hash(" " * 9)
625
626                filterfile = False
627                for entry in filemaps:
628                    if fnmatch.fnmatch(path, entry):
629                        filterfile = True
630
631                update_hash(" ")
632                if stat.S_ISREG(s.st_mode) and not filterfile:
633                    update_hash("%10d" % s.st_size)
634                else:
635                    update_hash(" " * 10)
636
637                update_hash(" ")
638                fh = hashlib.sha256()
639                if stat.S_ISREG(s.st_mode):
640                    # Hash file contents
641                    if filterfile:
642                        # Need to ignore paths in crossscripts and postinst-useradd files.
643                        with open(path, 'rb') as d:
644                            chunk = d.read()
645                            chunk = chunk.replace(bytes(basepath, encoding='utf8'), b'')
646                            for entry in filemaps:
647                                if not fnmatch.fnmatch(path, entry):
648                                    continue
649                                for r in filemaps[entry]:
650                                    if r.startswith("regex-"):
651                                        chunk = re.sub(bytes(r[6:], encoding='utf8'), b'', chunk)
652                                    else:
653                                        chunk = chunk.replace(bytes(r, encoding='utf8'), b'')
654                            fh.update(chunk)
655                    else:
656                        with open(path, 'rb') as d:
657                            for chunk in iter(lambda: d.read(4096), b""):
658                                fh.update(chunk)
659                    update_hash(fh.hexdigest())
660                else:
661                    update_hash(" " * len(fh.hexdigest()))
662
663                update_hash(" %s" % path)
664
665                if stat.S_ISLNK(s.st_mode):
666                    update_hash(" -> %s" % os.readlink(path))
667
668                update_hash("\n")
669
670            # Process this directory and all its child files
671            if include_root or root != ".":
672                process(root)
673            for f in files:
674                if f == 'fixmepath':
675                    continue
676                process(os.path.join(root, f))
677
678            for dir in dirs:
679                if os.path.islink(os.path.join(root, dir)):
680                    process(os.path.join(root, dir))
681    finally:
682        os.chdir(prev_dir)
683
684    return h.hexdigest()
685
686
687