1# 2# Copyright (c) 2012 Intel, Inc. 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7""" 8This module implements python implements a way to get file block. Two methods 9are supported - the FIEMAP ioctl and the 'SEEK_HOLE / SEEK_DATA' features of 10the file seek syscall. The former is implemented by the 'FilemapFiemap' class, 11the latter is implemented by the 'FilemapSeek' class. Both classes provide the 12same API. The 'filemap' function automatically selects which class can be used 13and returns an instance of the class. 14""" 15 16# Disable the following pylint recommendations: 17# * Too many instance attributes (R0902) 18# pylint: disable=R0902 19 20import errno 21import os 22import struct 23import array 24import fcntl 25import tempfile 26import logging 27 28def get_block_size(file_obj): 29 """ 30 Returns block size for file object 'file_obj'. Errors are indicated by the 31 'IOError' exception. 32 """ 33 # Get the block size of the host file-system for the image file by calling 34 # the FIGETBSZ ioctl (number 2). 35 try: 36 binary_data = fcntl.ioctl(file_obj, 2, struct.pack('I', 0)) 37 bsize = struct.unpack('I', binary_data)[0] 38 except OSError: 39 bsize = None 40 41 # If ioctl causes OSError or give bsize to zero failback to os.fstat 42 if not bsize: 43 import os 44 stat = os.fstat(file_obj.fileno()) 45 if hasattr(stat, 'st_blksize'): 46 bsize = stat.st_blksize 47 else: 48 raise IOError("Unable to determine block size") 49 50 # The logic in this script only supports a maximum of a 4KB 51 # block size 52 max_block_size = 4 * 1024 53 if bsize > max_block_size: 54 bsize = max_block_size 55 56 return bsize 57 58class ErrorNotSupp(Exception): 59 """ 60 An exception of this type is raised when the 'FIEMAP' or 'SEEK_HOLE' feature 61 is not supported either by the kernel or the file-system. 62 """ 63 pass 64 65class Error(Exception): 66 """A class for all the other exceptions raised by this module.""" 67 pass 68 69 70class _FilemapBase(object): 71 """ 72 This is a base class for a couple of other classes in this module. This 73 class simply performs the common parts of the initialization process: opens 74 the image file, gets its size, etc. The 'log' parameter is the logger object 75 to use for printing messages. 76 """ 77 78 def __init__(self, image, log=None): 79 """ 80 Initialize a class instance. The 'image' argument is full path to the 81 file or file object to operate on. 82 """ 83 84 self._log = log 85 if self._log is None: 86 self._log = logging.getLogger(__name__) 87 88 self._f_image_needs_close = False 89 90 if hasattr(image, "fileno"): 91 self._f_image = image 92 self._image_path = image.name 93 else: 94 self._image_path = image 95 self._open_image_file() 96 97 try: 98 self.image_size = os.fstat(self._f_image.fileno()).st_size 99 except IOError as err: 100 raise Error("cannot get information about file '%s': %s" 101 % (self._f_image.name, err)) 102 103 try: 104 self.block_size = get_block_size(self._f_image) 105 except IOError as err: 106 raise Error("cannot get block size for '%s': %s" 107 % (self._image_path, err)) 108 109 self.blocks_cnt = self.image_size + self.block_size - 1 110 self.blocks_cnt //= self.block_size 111 112 try: 113 self._f_image.flush() 114 except IOError as err: 115 raise Error("cannot flush image file '%s': %s" 116 % (self._image_path, err)) 117 118 try: 119 os.fsync(self._f_image.fileno()), 120 except OSError as err: 121 raise Error("cannot synchronize image file '%s': %s " 122 % (self._image_path, err.strerror)) 123 124 self._log.debug("opened image \"%s\"" % self._image_path) 125 self._log.debug("block size %d, blocks count %d, image size %d" 126 % (self.block_size, self.blocks_cnt, self.image_size)) 127 128 def __del__(self): 129 """The class destructor which just closes the image file.""" 130 if self._f_image_needs_close: 131 self._f_image.close() 132 133 def _open_image_file(self): 134 """Open the image file.""" 135 try: 136 self._f_image = open(self._image_path, 'rb') 137 except IOError as err: 138 raise Error("cannot open image file '%s': %s" 139 % (self._image_path, err)) 140 141 self._f_image_needs_close = True 142 143 def block_is_mapped(self, block): # pylint: disable=W0613,R0201 144 """ 145 This method has has to be implemented by child classes. It returns 146 'True' if block number 'block' of the image file is mapped and 'False' 147 otherwise. 148 """ 149 150 raise Error("the method is not implemented") 151 152 def get_mapped_ranges(self, start, count): # pylint: disable=W0613,R0201 153 """ 154 This method has has to be implemented by child classes. This is a 155 generator which yields ranges of mapped blocks in the file. The ranges 156 are tuples of 2 elements: [first, last], where 'first' is the first 157 mapped block and 'last' is the last mapped block. 158 159 The ranges are yielded for the area of the file of size 'count' blocks, 160 starting from block 'start'. 161 """ 162 163 raise Error("the method is not implemented") 164 165 166# The 'SEEK_HOLE' and 'SEEK_DATA' options of the file seek system call 167_SEEK_DATA = 3 168_SEEK_HOLE = 4 169 170def _lseek(file_obj, offset, whence): 171 """This is a helper function which invokes 'os.lseek' for file object 172 'file_obj' and with specified 'offset' and 'whence'. The 'whence' 173 argument is supposed to be either '_SEEK_DATA' or '_SEEK_HOLE'. When 174 there is no more data or hole starting from 'offset', this function 175 returns '-1'. Otherwise the data or hole position is returned.""" 176 177 try: 178 return os.lseek(file_obj.fileno(), offset, whence) 179 except OSError as err: 180 # The 'lseek' system call returns the ENXIO if there is no data or 181 # hole starting from the specified offset. 182 if err.errno == errno.ENXIO: 183 return -1 184 elif err.errno == errno.EINVAL: 185 raise ErrorNotSupp("the kernel or file-system does not support " 186 "\"SEEK_HOLE\" and \"SEEK_DATA\"") 187 else: 188 raise 189 190class FilemapSeek(_FilemapBase): 191 """ 192 This class uses the 'SEEK_HOLE' and 'SEEK_DATA' to find file block mapping. 193 Unfortunately, the current implementation requires the caller to have write 194 access to the image file. 195 """ 196 197 def __init__(self, image, log=None): 198 """Refer the '_FilemapBase' class for the documentation.""" 199 200 # Call the base class constructor first 201 _FilemapBase.__init__(self, image, log) 202 self._log.debug("FilemapSeek: initializing") 203 204 self._probe_seek_hole() 205 206 def _probe_seek_hole(self): 207 """ 208 Check whether the system implements 'SEEK_HOLE' and 'SEEK_DATA'. 209 Unfortunately, there seems to be no clean way for detecting this, 210 because often the system just fakes them by just assuming that all 211 files are fully mapped, so 'SEEK_HOLE' always returns EOF and 212 'SEEK_DATA' always returns the requested offset. 213 214 I could not invent a better way of detecting the fake 'SEEK_HOLE' 215 implementation than just to create a temporary file in the same 216 directory where the image file resides. It would be nice to change this 217 to something better. 218 """ 219 220 directory = os.path.dirname(self._image_path) 221 222 try: 223 tmp_obj = tempfile.TemporaryFile("w+", dir=directory) 224 except IOError as err: 225 raise ErrorNotSupp("cannot create a temporary in \"%s\": %s" \ 226 % (directory, err)) 227 228 try: 229 os.ftruncate(tmp_obj.fileno(), self.block_size) 230 except OSError as err: 231 raise ErrorNotSupp("cannot truncate temporary file in \"%s\": %s" 232 % (directory, err)) 233 234 offs = _lseek(tmp_obj, 0, _SEEK_HOLE) 235 if offs != 0: 236 # We are dealing with the stub 'SEEK_HOLE' implementation which 237 # always returns EOF. 238 self._log.debug("lseek(0, SEEK_HOLE) returned %d" % offs) 239 raise ErrorNotSupp("the file-system does not support " 240 "\"SEEK_HOLE\" and \"SEEK_DATA\" but only " 241 "provides a stub implementation") 242 243 tmp_obj.close() 244 245 def block_is_mapped(self, block): 246 """Refer the '_FilemapBase' class for the documentation.""" 247 offs = _lseek(self._f_image, block * self.block_size, _SEEK_DATA) 248 if offs == -1: 249 result = False 250 else: 251 result = (offs // self.block_size == block) 252 253 self._log.debug("FilemapSeek: block_is_mapped(%d) returns %s" 254 % (block, result)) 255 return result 256 257 def _get_ranges(self, start, count, whence1, whence2): 258 """ 259 This function implements 'get_mapped_ranges()' depending 260 on what is passed in the 'whence1' and 'whence2' arguments. 261 """ 262 263 assert whence1 != whence2 264 end = start * self.block_size 265 limit = end + count * self.block_size 266 267 while True: 268 start = _lseek(self._f_image, end, whence1) 269 if start == -1 or start >= limit or start == self.image_size: 270 break 271 272 end = _lseek(self._f_image, start, whence2) 273 if end == -1 or end == self.image_size: 274 end = self.blocks_cnt * self.block_size 275 if end > limit: 276 end = limit 277 278 start_blk = start // self.block_size 279 end_blk = end // self.block_size - 1 280 self._log.debug("FilemapSeek: yielding range (%d, %d)" 281 % (start_blk, end_blk)) 282 yield (start_blk, end_blk) 283 284 def get_mapped_ranges(self, start, count): 285 """Refer the '_FilemapBase' class for the documentation.""" 286 self._log.debug("FilemapSeek: get_mapped_ranges(%d, %d(%d))" 287 % (start, count, start + count - 1)) 288 return self._get_ranges(start, count, _SEEK_DATA, _SEEK_HOLE) 289 290 291# Below goes the FIEMAP ioctl implementation, which is not very readable 292# because it deals with the rather complex FIEMAP ioctl. To understand the 293# code, you need to know the FIEMAP interface, which is documented in the 294# "Documentation/filesystems/fiemap.txt" file in the Linux kernel sources. 295 296# Format string for 'struct fiemap' 297_FIEMAP_FORMAT = "=QQLLLL" 298# sizeof(struct fiemap) 299_FIEMAP_SIZE = struct.calcsize(_FIEMAP_FORMAT) 300# Format string for 'struct fiemap_extent' 301_FIEMAP_EXTENT_FORMAT = "=QQQQQLLLL" 302# sizeof(struct fiemap_extent) 303_FIEMAP_EXTENT_SIZE = struct.calcsize(_FIEMAP_EXTENT_FORMAT) 304# The FIEMAP ioctl number 305_FIEMAP_IOCTL = 0xC020660B 306# This FIEMAP ioctl flag which instructs the kernel to sync the file before 307# reading the block map 308_FIEMAP_FLAG_SYNC = 0x00000001 309# Size of the buffer for 'struct fiemap_extent' elements which will be used 310# when invoking the FIEMAP ioctl. The larger is the buffer, the less times the 311# FIEMAP ioctl will be invoked. 312_FIEMAP_BUFFER_SIZE = 256 * 1024 313 314class FilemapFiemap(_FilemapBase): 315 """ 316 This class provides API to the FIEMAP ioctl. Namely, it allows to iterate 317 over all mapped blocks and over all holes. 318 319 This class synchronizes the image file every time it invokes the FIEMAP 320 ioctl in order to work-around early FIEMAP implementation kernel bugs. 321 """ 322 323 def __init__(self, image, log=None): 324 """ 325 Initialize a class instance. The 'image' argument is full the file 326 object to operate on. 327 """ 328 329 # Call the base class constructor first 330 _FilemapBase.__init__(self, image, log) 331 self._log.debug("FilemapFiemap: initializing") 332 333 self._buf_size = _FIEMAP_BUFFER_SIZE 334 335 # Calculate how many 'struct fiemap_extent' elements fit the buffer 336 self._buf_size -= _FIEMAP_SIZE 337 self._fiemap_extent_cnt = self._buf_size // _FIEMAP_EXTENT_SIZE 338 assert self._fiemap_extent_cnt > 0 339 self._buf_size = self._fiemap_extent_cnt * _FIEMAP_EXTENT_SIZE 340 self._buf_size += _FIEMAP_SIZE 341 342 # Allocate a mutable buffer for the FIEMAP ioctl 343 self._buf = array.array('B', [0] * self._buf_size) 344 345 # Check if the FIEMAP ioctl is supported 346 self.block_is_mapped(0) 347 348 def _invoke_fiemap(self, block, count): 349 """ 350 Invoke the FIEMAP ioctl for 'count' blocks of the file starting from 351 block number 'block'. 352 353 The full result of the operation is stored in 'self._buf' on exit. 354 Returns the unpacked 'struct fiemap' data structure in form of a python 355 list (just like 'struct.upack()'). 356 """ 357 358 if self.blocks_cnt != 0 and (block < 0 or block >= self.blocks_cnt): 359 raise Error("bad block number %d, should be within [0, %d]" 360 % (block, self.blocks_cnt)) 361 362 # Initialize the 'struct fiemap' part of the buffer. We use the 363 # '_FIEMAP_FLAG_SYNC' flag in order to make sure the file is 364 # synchronized. The reason for this is that early FIEMAP 365 # implementations had many bugs related to cached dirty data, and 366 # synchronizing the file is a necessary work-around. 367 struct.pack_into(_FIEMAP_FORMAT, self._buf, 0, block * self.block_size, 368 count * self.block_size, _FIEMAP_FLAG_SYNC, 0, 369 self._fiemap_extent_cnt, 0) 370 371 try: 372 fcntl.ioctl(self._f_image, _FIEMAP_IOCTL, self._buf, 1) 373 except IOError as err: 374 # Note, the FIEMAP ioctl is supported by the Linux kernel starting 375 # from version 2.6.28 (year 2008). 376 if err.errno == errno.EOPNOTSUPP: 377 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ 378 "by the file-system" 379 self._log.debug(errstr) 380 raise ErrorNotSupp(errstr) 381 if err.errno == errno.ENOTTY: 382 errstr = "FilemapFiemap: the FIEMAP ioctl is not supported " \ 383 "by the kernel" 384 self._log.debug(errstr) 385 raise ErrorNotSupp(errstr) 386 raise Error("the FIEMAP ioctl failed for '%s': %s" 387 % (self._image_path, err)) 388 389 return struct.unpack(_FIEMAP_FORMAT, self._buf[:_FIEMAP_SIZE]) 390 391 def block_is_mapped(self, block): 392 """Refer the '_FilemapBase' class for the documentation.""" 393 struct_fiemap = self._invoke_fiemap(block, 1) 394 395 # The 3rd element of 'struct_fiemap' is the 'fm_mapped_extents' field. 396 # If it contains zero, the block is not mapped, otherwise it is 397 # mapped. 398 result = bool(struct_fiemap[3]) 399 self._log.debug("FilemapFiemap: block_is_mapped(%d) returns %s" 400 % (block, result)) 401 return result 402 403 def _unpack_fiemap_extent(self, index): 404 """ 405 Unpack a 'struct fiemap_extent' structure object number 'index' from 406 the internal 'self._buf' buffer. 407 """ 408 409 offset = _FIEMAP_SIZE + _FIEMAP_EXTENT_SIZE * index 410 return struct.unpack(_FIEMAP_EXTENT_FORMAT, 411 self._buf[offset : offset + _FIEMAP_EXTENT_SIZE]) 412 413 def _do_get_mapped_ranges(self, start, count): 414 """ 415 Implements most the functionality for the 'get_mapped_ranges()' 416 generator: invokes the FIEMAP ioctl, walks through the mapped extents 417 and yields mapped block ranges. However, the ranges may be consecutive 418 (e.g., (1, 100), (100, 200)) and 'get_mapped_ranges()' simply merges 419 them. 420 """ 421 422 block = start 423 while block < start + count: 424 struct_fiemap = self._invoke_fiemap(block, count) 425 426 mapped_extents = struct_fiemap[3] 427 if mapped_extents == 0: 428 # No more mapped blocks 429 return 430 431 extent = 0 432 while extent < mapped_extents: 433 fiemap_extent = self._unpack_fiemap_extent(extent) 434 435 # Start of the extent 436 extent_start = fiemap_extent[0] 437 # Starting block number of the extent 438 extent_block = extent_start // self.block_size 439 # Length of the extent 440 extent_len = fiemap_extent[2] 441 # Count of blocks in the extent 442 extent_count = extent_len // self.block_size 443 444 # Extent length and offset have to be block-aligned 445 assert extent_start % self.block_size == 0 446 assert extent_len % self.block_size == 0 447 448 if extent_block > start + count - 1: 449 return 450 451 first = max(extent_block, block) 452 last = min(extent_block + extent_count, start + count) - 1 453 yield (first, last) 454 455 extent += 1 456 457 block = extent_block + extent_count 458 459 def get_mapped_ranges(self, start, count): 460 """Refer the '_FilemapBase' class for the documentation.""" 461 self._log.debug("FilemapFiemap: get_mapped_ranges(%d, %d(%d))" 462 % (start, count, start + count - 1)) 463 iterator = self._do_get_mapped_ranges(start, count) 464 first_prev, last_prev = next(iterator) 465 466 for first, last in iterator: 467 if last_prev == first - 1: 468 last_prev = last 469 else: 470 self._log.debug("FilemapFiemap: yielding range (%d, %d)" 471 % (first_prev, last_prev)) 472 yield (first_prev, last_prev) 473 first_prev, last_prev = first, last 474 475 self._log.debug("FilemapFiemap: yielding range (%d, %d)" 476 % (first_prev, last_prev)) 477 yield (first_prev, last_prev) 478 479class FilemapNobmap(_FilemapBase): 480 """ 481 This class is used when both the 'SEEK_DATA/HOLE' and FIEMAP are not 482 supported by the filesystem or kernel. 483 """ 484 485 def __init__(self, image, log=None): 486 """Refer the '_FilemapBase' class for the documentation.""" 487 488 # Call the base class constructor first 489 _FilemapBase.__init__(self, image, log) 490 self._log.debug("FilemapNobmap: initializing") 491 492 def block_is_mapped(self, block): 493 """Refer the '_FilemapBase' class for the documentation.""" 494 return True 495 496 def get_mapped_ranges(self, start, count): 497 """Refer the '_FilemapBase' class for the documentation.""" 498 self._log.debug("FilemapNobmap: get_mapped_ranges(%d, %d(%d))" 499 % (start, count, start + count - 1)) 500 yield (start, start + count -1) 501 502def filemap(image, log=None): 503 """ 504 Create and return an instance of a Filemap class - 'FilemapFiemap' or 505 'FilemapSeek', depending on what the system we run on supports. If the 506 FIEMAP ioctl is supported, an instance of the 'FilemapFiemap' class is 507 returned. Otherwise, if 'SEEK_HOLE' is supported an instance of the 508 'FilemapSeek' class is returned. If none of these are supported, the 509 function generates an 'Error' type exception. 510 """ 511 512 try: 513 return FilemapFiemap(image, log) 514 except ErrorNotSupp: 515 try: 516 return FilemapSeek(image, log) 517 except ErrorNotSupp: 518 return FilemapNobmap(image, log) 519 520def sparse_copy(src_fname, dst_fname, skip=0, seek=0, 521 length=0, api=None): 522 """ 523 Efficiently copy sparse file to or into another file. 524 525 src_fname: path to source file 526 dst_fname: path to destination file 527 skip: skip N bytes at thestart of src 528 seek: seek N bytes from the start of dst 529 length: read N bytes from src and write them to dst 530 api: FilemapFiemap or FilemapSeek object 531 """ 532 if not api: 533 api = filemap 534 fmap = api(src_fname) 535 try: 536 dst_file = open(dst_fname, 'r+b') 537 except IOError: 538 dst_file = open(dst_fname, 'wb') 539 if length: 540 dst_size = length + seek 541 else: 542 dst_size = os.path.getsize(src_fname) + seek - skip 543 dst_file.truncate(dst_size) 544 545 written = 0 546 for first, last in fmap.get_mapped_ranges(0, fmap.blocks_cnt): 547 start = first * fmap.block_size 548 end = (last + 1) * fmap.block_size 549 550 if skip >= end: 551 continue 552 553 if start < skip < end: 554 start = skip 555 556 fmap._f_image.seek(start, os.SEEK_SET) 557 558 written += start - skip - written 559 if length and written >= length: 560 dst_file.seek(seek + length, os.SEEK_SET) 561 dst_file.close() 562 return 563 564 dst_file.seek(seek + start - skip, os.SEEK_SET) 565 566 chunk_size = 1024 * 1024 567 to_read = end - start 568 read = 0 569 570 while read < to_read: 571 if read + chunk_size > to_read: 572 chunk_size = to_read - read 573 size = chunk_size 574 if length and written + size > length: 575 size = length - written 576 chunk = fmap._f_image.read(size) 577 dst_file.write(chunk) 578 read += size 579 written += size 580 if written == length: 581 dst_file.close() 582 return 583 dst_file.close() 584