xref: /openbmc/openbmc/poky/bitbake/lib/bb/checksum.py (revision 03907ee1)
1# Local file checksum cache implementation
2#
3# Copyright (C) 2012 Intel Corporation
4#
5# SPDX-License-Identifier: GPL-2.0-only
6#
7
8import glob
9import operator
10import os
11import stat
12import bb.utils
13import logging
14import re
15from bb.cache import MultiProcessCache
16
17logger = logging.getLogger("BitBake.Cache")
18
19filelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+')
20
21# mtime cache (non-persistent)
22# based upon the assumption that files do not change during bitbake run
23class FileMtimeCache(object):
24    cache = {}
25
26    def cached_mtime(self, f):
27        if f not in self.cache:
28            self.cache[f] = os.stat(f)[stat.ST_MTIME]
29        return self.cache[f]
30
31    def cached_mtime_noerror(self, f):
32        if f not in self.cache:
33            try:
34                self.cache[f] = os.stat(f)[stat.ST_MTIME]
35            except OSError:
36                return 0
37        return self.cache[f]
38
39    def update_mtime(self, f):
40        self.cache[f] = os.stat(f)[stat.ST_MTIME]
41        return self.cache[f]
42
43    def clear(self):
44        self.cache.clear()
45
46# Checksum + mtime cache (persistent)
47class FileChecksumCache(MultiProcessCache):
48    cache_file_name = "local_file_checksum_cache.dat"
49    CACHE_VERSION = 1
50
51    def __init__(self):
52        self.mtime_cache = FileMtimeCache()
53        MultiProcessCache.__init__(self)
54
55    def get_checksum(self, f):
56        f = os.path.normpath(f)
57        entry = self.cachedata[0].get(f)
58        cmtime = self.mtime_cache.cached_mtime(f)
59        if entry:
60            (mtime, hashval) = entry
61            if cmtime == mtime:
62                return hashval
63            else:
64                bb.debug(2, "file %s changed mtime, recompute checksum" % f)
65
66        hashval = bb.utils.md5_file(f)
67        self.cachedata_extras[0][f] = (cmtime, hashval)
68        return hashval
69
70    def merge_data(self, source, dest):
71        for h in source[0]:
72            if h in dest:
73                (smtime, _) = source[0][h]
74                (dmtime, _) = dest[0][h]
75                if smtime > dmtime:
76                    dest[0][h] = source[0][h]
77            else:
78                dest[0][h] = source[0][h]
79
80    def get_checksums(self, filelist, pn, localdirsexclude):
81        """Get checksums for a list of files"""
82
83        def checksum_file(f):
84            try:
85                checksum = self.get_checksum(f)
86            except OSError as e:
87                bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
88                return None
89            return checksum
90
91        #
92        # Changing the format of file-checksums is problematic as both OE and Bitbake have
93        # knowledge of them. We need to encode a new piece of data, the portion of the path
94        # we care about from a checksum perspective. This means that files that change subdirectory
95        # are tracked by the task hashes. To do this, we do something horrible and put a "/./" into
96        # the path. The filesystem handles it but it gives us a marker to know which subsection
97        # of the path to cache.
98        #
99        def checksum_dir(pth):
100            # Handle directories recursively
101            if pth == "/":
102                bb.fatal("Refusing to checksum /")
103            pth = pth.rstrip("/")
104            dirchecksums = []
105            for root, dirs, files in os.walk(pth, topdown=True):
106                [dirs.remove(d) for d in list(dirs) if d in localdirsexclude]
107                for name in files:
108                    fullpth = os.path.join(root, name).replace(pth, os.path.join(pth, "."))
109                    checksum = checksum_file(fullpth)
110                    if checksum:
111                        dirchecksums.append((fullpth, checksum))
112            return dirchecksums
113
114        checksums = []
115        for pth in filelist_regex.split(filelist):
116            if not pth:
117                continue
118            pth = pth.strip()
119            if not pth:
120                continue
121            exist = pth.split(":")[1]
122            if exist == "False":
123                continue
124            pth = pth.split(":")[0]
125            if '*' in pth:
126                # Handle globs
127                for f in glob.glob(pth):
128                    if os.path.isdir(f):
129                        if not os.path.islink(f):
130                            checksums.extend(checksum_dir(f))
131                    else:
132                        checksum = checksum_file(f)
133                        if checksum:
134                            checksums.append((f, checksum))
135            elif os.path.isdir(pth):
136                if not os.path.islink(pth):
137                    checksums.extend(checksum_dir(pth))
138            else:
139                checksum = checksum_file(pth)
140                if checksum:
141                    checksums.append((pth, checksum))
142
143        checksums.sort(key=operator.itemgetter(1))
144        return checksums
145