xref: /openbmc/openbmc/poky/scripts/lib/wic/filemap.py (revision fc113ead)
1#
2# Copyright (c) 2012 Intel, Inc.
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7"""
8This module implements python implements a way to get file block. Two methods
9are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of
10the file seek syscall. The former is implemented by the 'FilemapFiemap' class,
11the latter is implemented by the 'FilemapSeek' class. Both classes provide the
12same API. The 'filemap' function automatically selects which class can be used
13and returns an instance of the class.
14"""
15
16# Disable the following pylint recommendations:
17#   * Too many instance attributes (R0902)
18# pylint: disable=R0902
19
20import errno
21import os
22import struct
23import array
24import fcntl
25import tempfile
26import logging
27
28def get_block_size(file_obj):
29    """
30    Returns block size for file object 'file_obj'. Errors are indicated by the
31    'IOError' exception.
32    """
33    # Get the block size of the host file-system for the image file by calling
34    # the FIGETBSZ ioctl (number 2).
35    try:
36        binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0))
37        bsize = struct.unpack('I', binary_data)[0]
38    except OSError:
39        bsize = None
40
41    # If ioctl causes OSError or give bsize to zero failback to os.fstat
42    if not bsize:
43        import os
44        stat = os.fstat(file_obj.fileno())
45        if hasattr(stat, 'st_blksize'):
46            bsize = stat.st_blksize
47        else:
48            raise IOError("Unable to determine block size")
49
50    # The logic in this script only supports a maximum of a 4KB
51    # block size
52    max_block_size = 4 * 1024
53    if bsize > max_block_size:
54        bsize = max_block_size
55
56    return bsize
57
58class ErrorNotSupp(Exception):
59    """
60    An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature
61    is not supported either by the kernel or the file-system.
62    """
63    pass
64
65class Error(Exception):
66    """A class for all the other exceptions raised by this module."""
67    pass
68
69
70class _FilemapBase(object):
71    """
72    This is a base class for a couple of other classes in this module. This
73    class simply performs the common parts of the initialization process: opens
74    the image file, gets its size, etc. The 'log' parameter is the logger object
75    to use for printing messages.
76    """
77
78    def __init__(self, image, log=None):
79        """
80        Initialize a class instance. The 'image' argument is full path to the
81        file or file object to operate on.
82        """
83
84        self._log = log
85        if self._log is None:
86            self._log = logging.getLogger(__name__)
87
88        self._f_image_needs_close = False
89
90        if hasattr(image, "fileno"):
91            self._f_image = image
92            self._image_path = image.name
93        else:
94            self._image_path = image
95            self._open_image_file()
96
97        try:
98            self.image_size = os.fstat(self._f_image.fileno()).st_size
99        except IOError as err:
100            raise Error("cannot get information about file '%s': %s"
101                        % (self._f_image.name, err))
102
103        try:
104            self.block_size = get_block_size(self._f_image)
105        except IOError as err:
106            raise Error("cannot get block size for '%s': %s"
107                        % (self._image_path, err))
108
109        self.blocks_cnt = self.image_size + self.block_size - 1
110        self.blocks_cnt //= self.block_size
111
112        try:
113            self._f_image.flush()
114        except IOError as err:
115            raise Error("cannot flush image file '%s': %s"
116                        % (self._image_path, err))
117
118        try:
119            os.fsync(self._f_image.fileno()),
120        except OSError as err:
121            raise Error("cannot synchronize image file '%s': %s "
122                        % (self._image_path, err.strerror))
123
124        self._log.debug("opened image \"%s\"" % self._image_path)
125        self._log.debug("block size %d, blocks count %d, image size %d"
126                        % (self.block_size, self.blocks_cnt, self.image_size))
127
128    def __del__(self):
129        """The class destructor which just closes the image file."""
130        if self._f_image_needs_close:
131            self._f_image.close()
132
133    def _open_image_file(self):
134        """Open the image file."""
135        try:
136            self._f_image = open(self._image_path, 'rb')
137        except IOError as err:
138            raise Error("cannot open image file '%s': %s"
139                        % (self._image_path, err))
140
141        self._f_image_needs_close = True
142
143    def block_is_mapped(self, block): # pylint: disable=W0613,R0201
144        """
145        This method has has to be implemented by child classes. It returns
146        'True' if block number 'block' of the image file is mapped and 'False'
147        otherwise.
148        """
149
150        raise Error("the method is not implemented")
151
152    def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201
153        """
154        This method has has to be implemented by child classes. This is a
155        generator which yields ranges of mapped blocks in the file. The ranges
156        are tuples of 2 elements: [first, last], where 'first' is the first
157        mapped block and 'last' is the last mapped block.
158
159        The ranges are yielded for the area of the file of size 'count' blocks,
160        starting from block 'start'.
161        """
162
163        raise Error("the method is not implemented")
164
165
166# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call
167_SEEK_DATA = 3
168_SEEK_HOLE = 4
169
170def _lseek(file_obj, offset, whence):
171    """This is a helper function which invokes 'os.lseek' for file object
172    'file_obj' and with specified 'offset' and 'whence'. The 'whence'
173    argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When
174    there is no more data or hole starting from 'offset', this function
175    returns '-1'.  Otherwise the data or hole position is returned."""
176
177    try:
178        return os.lseek(file_obj.fileno(), offset, whence)
179    except OSError as err:
180        # The 'lseek' system call returns the ENXIO if there is no data or
181        # hole starting from the specified offset.
182        if err.errno == errno.ENXIO:
183            return -1
184        elif err.errno == errno.EINVAL:
185            raise ErrorNotSupp("the kernel or file-system does not support "
186                               "\"SEEK_HOLE\" and \"SEEK_DATA\"")
187        else:
188            raise
189
190class FilemapSeek(_FilemapBase):
191    """
192    This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping.
193    Unfortunately, the current implementation requires the caller to have write
194    access to the image file.
195    """
196
197    def __init__(self, image, log=None):
198        """Refer the '_FilemapBase' class for the documentation."""
199
200        # Call the base class constructor first
201        _FilemapBase.__init__(self, image, log)
202        self._log.debug("FilemapSeek: initializing")
203
204        self._probe_seek_hole()
205
206    def _probe_seek_hole(self):
207        """
208        Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'.
209        Unfortunately, there seems to be no clean way for detecting this,
210        because often the system just fakes them by just assuming that all
211        files are fully mapped, so 'SEEK_HOLE' always returns EOF and
212        'SEEK_DATA' always returns the requested offset.
213
214        I could not invent a better way of detecting the fake 'SEEK_HOLE'
215        implementation than just to create a temporary file in the same
216        directory where the image file resides. It would be nice to change this
217        to something better.
218        """
219
220        directory = os.path.dirname(self._image_path)
221
222        try:
223            tmp_obj = tempfile.TemporaryFile("w+", dir=directory)
224        except IOError as err:
225            raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \
226                              % (directory, err))
227
228        try:
229            os.ftruncate(tmp_obj.fileno(), self.block_size)
230        except OSError as err:
231            raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s"
232                               % (directory, err))
233
234        offs = _lseek(tmp_obj, 0, _SEEK_HOLE)
235        if offs != 0:
236            # We are dealing with the stub 'SEEK_HOLE' implementation which
237            # always returns EOF.
238            self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs)
239            raise ErrorNotSupp("the file-system does not support "
240                               "\"SEEK_HOLE\" and \"SEEK_DATA\" but only "
241                               "provides a stub implementation")
242
243        tmp_obj.close()
244
245    def block_is_mapped(self, block):
246        """Refer the '_FilemapBase' class for the documentation."""
247        offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA)
248        if offs == -1:
249            result = False
250        else:
251            result = (offs // self.block_size == block)
252
253        self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s"
254                        % (block, result))
255        return result
256
257    def _get_ranges(self, start, count, whence1, whence2):
258        """
259        This function implements 'get_mapped_ranges()' depending
260        on what is passed in the 'whence1' and 'whence2' arguments.
261        """
262
263        assert whence1 != whence2
264        end = start * self.block_size
265        limit = end + count * self.block_size
266
267        while True:
268            start = _lseek(self._f_image, end, whence1)
269            if start == -1 or start >= limit or start == self.image_size:
270                break
271
272            end = _lseek(self._f_image, start, whence2)
273            if end == -1 or end == self.image_size:
274                end = self.blocks_cnt * self.block_size
275            if end > limit:
276                end = limit
277
278            start_blk = start // self.block_size
279            end_blk = end // self.block_size - 1
280            self._log.debug("FilemapSeek: yielding range (%d, %d)"
281                            % (start_blk, end_blk))
282            yield (start_blk, end_blk)
283
284    def get_mapped_ranges(self, start, count):
285        """Refer the '_FilemapBase' class for the documentation."""
286        self._log.debug("FilemapSeek: get_mapped_ranges(%d,  %d(%d))"
287                        % (start, count, start + count - 1))
288        return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE)
289
290
291# Below goes the FIEMAP ioctl implementation, which is not very readable
292# because it deals with the rather complex FIEMAP ioctl. To understand the
293# code, you need to know the FIEMAP interface, which is documented in the
294# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources.
295
296# Format string for 'struct fiemap'
297_FIEMAP_FORMAT = "=QQLLLL"
298# sizeof(struct fiemap)
299_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT)
300# Format string for 'struct fiemap_extent'
301_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL"
302# sizeof(struct fiemap_extent)
303_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT)
304# The FIEMAP ioctl number
305_FIEMAP_IOCTL = 0xC020660B
306# This FIEMAP ioctl flag which instructs the kernel to sync the file before
307# reading the block map
308_FIEMAP_FLAG_SYNC = 0x00000001
309# Size of the buffer for 'struct fiemap_extent' elements which will be used
310# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the
311# FIEMAP ioctl will be invoked.
312_FIEMAP_BUFFER_SIZE = 256 * 1024
313
314class FilemapFiemap(_FilemapBase):
315    """
316    This class provides API to the FIEMAP ioctl. Namely, it allows to iterate
317    over all mapped blocks and over all holes.
318
319    This class synchronizes the image file every time it invokes the FIEMAP
320    ioctl in order to work-around early FIEMAP implementation kernel bugs.
321    """
322
323    def __init__(self, image, log=None):
324        """
325        Initialize a class instance. The 'image' argument is full the file
326        object to operate on.
327        """
328
329        # Call the base class constructor first
330        _FilemapBase.__init__(self, image, log)
331        self._log.debug("FilemapFiemap: initializing")
332
333        self._buf_size = _FIEMAP_BUFFER_SIZE
334
335        # Calculate how many 'struct fiemap_extent' elements fit the buffer
336        self._buf_size -= _FIEMAP_SIZE
337        self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE
338        assert self._fiemap_extent_cnt > 0
339        self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE
340        self._buf_size += _FIEMAP_SIZE
341
342        # Allocate a mutable buffer for the FIEMAP ioctl
343        self._buf = array.array('B', [0] * self._buf_size)
344
345        # Check if the FIEMAP ioctl is supported
346        self.block_is_mapped(0)
347
348    def _invoke_fiemap(self, block, count):
349        """
350        Invoke the FIEMAP ioctl for 'count' blocks of the file starting from
351        block number 'block'.
352
353        The full result of the operation is stored in 'self._buf' on exit.
354        Returns the unpacked 'struct fiemap' data structure in form of a python
355        list (just like 'struct.upack()').
356        """
357
358        if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt):
359            raise Error("bad block number %d, should be within [0, %d]"
360                        % (block, self.blocks_cnt))
361
362        # Initialize the 'struct fiemap' part of the buffer. We use the
363        # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is
364        # synchronized. The reason for this is that early FIEMAP
365        # implementations had many bugs related to cached dirty data, and
366        # synchronizing the file is a necessary work-around.
367        struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size,
368                         count * self.block_size, _FIEMAP_FLAG_SYNC, 0,
369                         self._fiemap_extent_cnt, 0)
370
371        try:
372            fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1)
373        except IOError as err:
374            # Note, the FIEMAP ioctl is supported by the Linux kernel starting
375            # from version 2.6.28 (year 2008).
376            if err.errno == errno.EOPNOTSUPP:
377                errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
378                         "by the file-system"
379                self._log.debug(errstr)
380                raise ErrorNotSupp(errstr)
381            if err.errno == errno.ENOTTY:
382                errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \
383                         "by the kernel"
384                self._log.debug(errstr)
385                raise ErrorNotSupp(errstr)
386            raise Error("the FIEMAP ioctl failed for '%s': %s"
387                        % (self._image_path, err))
388
389        return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE])
390
391    def block_is_mapped(self, block):
392        """Refer the '_FilemapBase' class for the documentation."""
393        struct_fiemap = self._invoke_fiemap(block, 1)
394
395        # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field.
396        # If it contains zero, the block is not mapped, otherwise it is
397        # mapped.
398        result = bool(struct_fiemap[3])
399        self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s"
400                        % (block, result))
401        return result
402
403    def _unpack_fiemap_extent(self, index):
404        """
405        Unpack a 'struct fiemap_extent' structure object number 'index' from
406        the internal 'self._buf' buffer.
407        """
408
409        offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index
410        return struct.unpack(_FIEMAP_EXTENT_FORMAT,
411                             self._buf[offset : offset + _FIEMAP_EXTENT_SIZE])
412
413    def _do_get_mapped_ranges(self, start, count):
414        """
415        Implements most the functionality for the  'get_mapped_ranges()'
416        generator: invokes the FIEMAP ioctl, walks through the mapped extents
417        and yields mapped block ranges. However, the ranges may be consecutive
418        (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges
419        them.
420        """
421
422        block = start
423        while block < start + count:
424            struct_fiemap = self._invoke_fiemap(block, count)
425
426            mapped_extents = struct_fiemap[3]
427            if mapped_extents == 0:
428                # No more mapped blocks
429                return
430
431            extent = 0
432            while extent < mapped_extents:
433                fiemap_extent = self._unpack_fiemap_extent(extent)
434
435                # Start of the extent
436                extent_start = fiemap_extent[0]
437                # Starting block number of the extent
438                extent_block = extent_start // self.block_size
439                # Length of the extent
440                extent_len = fiemap_extent[2]
441                # Count of blocks in the extent
442                extent_count = extent_len // self.block_size
443
444                # Extent length and offset have to be block-aligned
445                assert extent_start % self.block_size == 0
446                assert extent_len % self.block_size == 0
447
448                if extent_block > start + count - 1:
449                    return
450
451                first = max(extent_block, block)
452                last = min(extent_block + extent_count, start + count) - 1
453                yield (first, last)
454
455                extent += 1
456
457            block = extent_block + extent_count
458
459    def get_mapped_ranges(self, start, count):
460        """Refer the '_FilemapBase' class for the documentation."""
461        self._log.debug("FilemapFiemap: get_mapped_ranges(%d,  %d(%d))"
462                        % (start, count, start + count - 1))
463        iterator = self._do_get_mapped_ranges(start, count)
464        first_prev, last_prev = next(iterator)
465
466        for first, last in iterator:
467            if last_prev == first - 1:
468                last_prev = last
469            else:
470                self._log.debug("FilemapFiemap: yielding range (%d, %d)"
471                                % (first_prev, last_prev))
472                yield (first_prev, last_prev)
473                first_prev, last_prev = first, last
474
475        self._log.debug("FilemapFiemap: yielding range (%d, %d)"
476                        % (first_prev, last_prev))
477        yield (first_prev, last_prev)
478
479class FilemapNobmap(_FilemapBase):
480    """
481    This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not
482    supported by the filesystem or kernel.
483    """
484
485    def __init__(self, image, log=None):
486        """Refer the '_FilemapBase' class for the documentation."""
487
488        # Call the base class constructor first
489        _FilemapBase.__init__(self, image, log)
490        self._log.debug("FilemapNobmap: initializing")
491
492    def block_is_mapped(self, block):
493        """Refer the '_FilemapBase' class for the documentation."""
494        return True
495
496    def get_mapped_ranges(self, start, count):
497        """Refer the '_FilemapBase' class for the documentation."""
498        self._log.debug("FilemapNobmap: get_mapped_ranges(%d,  %d(%d))"
499                        % (start, count, start + count - 1))
500        yield (start, start + count -1)
501
502def filemap(image, log=None):
503    """
504    Create and return an instance of a Filemap class - 'FilemapFiemap' or
505    'FilemapSeek', depending on what the system we run on supports. If the
506    FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is
507    returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the
508    'FilemapSeek' class is returned. If none of these are supported, the
509    function generates an 'Error' type exception.
510    """
511
512    try:
513        return FilemapFiemap(image, log)
514    except ErrorNotSupp:
515        try:
516            return FilemapSeek(image, log)
517        except ErrorNotSupp:
518            return FilemapNobmap(image, log)
519
520def sparse_copy(src_fname, dst_fname, skip=0, seek=0,
521                length=0, api=None):
522    """
523    Efficiently copy sparse file to or into another file.
524
525    src_fname: path to source file
526    dst_fname: path to destination file
527    skip: skip N bytes at thestart of src
528    seek: seek N bytes from the start of dst
529    length: read N bytes from src and write them to dst
530    api: FilemapFiemap or FilemapSeek object
531    """
532    if not api:
533        api = filemap
534    fmap = api(src_fname)
535    try:
536        dst_file = open(dst_fname, 'r+b')
537    except IOError:
538        dst_file = open(dst_fname, 'wb')
539        if length:
540            dst_size = length + seek
541        else:
542            dst_size = os.path.getsize(src_fname) + seek - skip
543        dst_file.truncate(dst_size)
544
545    written = 0
546    for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt):
547        start = first * fmap.block_size
548        end = (last + 1) * fmap.block_size
549
550        if skip >= end:
551            continue
552
553        if start < skip < end:
554            start = skip
555
556        fmap._f_image.seek(start, os.SEEK_SET)
557
558        written += start - skip - written
559        if length and written >= length:
560            dst_file.seek(seek + length, os.SEEK_SET)
561            dst_file.close()
562            return
563
564        dst_file.seek(seek + start - skip, os.SEEK_SET)
565
566        chunk_size = 1024 * 1024
567        to_read = end - start
568        read = 0
569
570        while read < to_read:
571            if read + chunk_size > to_read:
572                chunk_size = to_read - read
573            size = chunk_size
574            if length and written + size > length:
575                size = length - written
576            chunk = fmap._f_image.read(size)
577            dst_file.write(chunk)
578            read += size
579            written += size
580            if written == length:
581                dst_file.close()
582                return
583    dst_file.close()
584