1 /* 2 * QEMU System Emulator block driver 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 #ifndef BLOCK_COMMON_H 25 #define BLOCK_COMMON_H 26 27 #include "block/aio.h" 28 #include "block/aio-wait.h" 29 #include "qemu/iov.h" 30 #include "qemu/coroutine.h" 31 #include "block/accounting.h" 32 #include "block/dirty-bitmap.h" 33 #include "block/blockjob.h" 34 #include "qemu/hbitmap.h" 35 #include "qemu/transactions.h" 36 37 /* 38 * generated_co_wrapper 39 * 40 * Function specifier, which does nothing but mark functions to be 41 * generated by scripts/block-coroutine-wrapper.py 42 * 43 * Read more in docs/devel/block-coroutine-wrapper.rst 44 */ 45 #define generated_co_wrapper 46 47 /* block.c */ 48 typedef struct BlockDriver BlockDriver; 49 typedef struct BdrvChild BdrvChild; 50 typedef struct BdrvChildClass BdrvChildClass; 51 52 typedef struct BlockDriverInfo { 53 /* in bytes, 0 if irrelevant */ 54 int cluster_size; 55 /* offset at which the VM state can be saved (0 if not possible) */ 56 int64_t vm_state_offset; 57 bool is_dirty; 58 /* 59 * True if this block driver only supports compressed writes 60 */ 61 bool needs_compressed_writes; 62 } BlockDriverInfo; 63 64 typedef struct BlockFragInfo { 65 uint64_t allocated_clusters; 66 uint64_t total_clusters; 67 uint64_t fragmented_clusters; 68 uint64_t compressed_clusters; 69 } BlockFragInfo; 70 71 typedef enum { 72 BDRV_REQ_COPY_ON_READ = 0x1, 73 BDRV_REQ_ZERO_WRITE = 0x2, 74 75 /* 76 * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate 77 * that the block driver should unmap (discard) blocks if it is guaranteed 78 * that the result will read back as zeroes. The flag is only passed to the 79 * driver if the block device is opened with BDRV_O_UNMAP. 80 */ 81 BDRV_REQ_MAY_UNMAP = 0x4, 82 83 BDRV_REQ_FUA = 0x10, 84 BDRV_REQ_WRITE_COMPRESSED = 0x20, 85 86 /* 87 * Signifies that this write request will not change the visible disk 88 * content. 89 */ 90 BDRV_REQ_WRITE_UNCHANGED = 0x40, 91 92 /* 93 * Forces request serialisation. Use only with write requests. 94 */ 95 BDRV_REQ_SERIALISING = 0x80, 96 97 /* 98 * Execute the request only if the operation can be offloaded or otherwise 99 * be executed efficiently, but return an error instead of using a slow 100 * fallback. 101 */ 102 BDRV_REQ_NO_FALLBACK = 0x100, 103 104 /* 105 * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read 106 * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR 107 * filter is involved), in which case it signals that the COR operation 108 * need not read the data into memory (qiov) but only ensure they are 109 * copied to the top layer (i.e., that COR operation is done). 110 */ 111 BDRV_REQ_PREFETCH = 0x200, 112 113 /* 114 * If we need to wait for other requests, just fail immediately. Used 115 * only together with BDRV_REQ_SERIALISING. Used only with requests aligned 116 * to request_alignment (corresponding assertions are in block/io.c). 117 */ 118 BDRV_REQ_NO_WAIT = 0x400, 119 120 /* Mask of valid flags */ 121 BDRV_REQ_MASK = 0x7ff, 122 } BdrvRequestFlags; 123 124 #define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */ 125 #define BDRV_O_RDWR 0x0002 126 #define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */ 127 #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save 128 writes in a snapshot */ 129 #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ 130 #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ 131 #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the 132 thread pool */ 133 #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ 134 #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ 135 #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ 136 #define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ 137 #define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ 138 #define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ 139 #define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ 140 #define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: 141 select an appropriate protocol driver, 142 ignoring the format layer */ 143 #define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ 144 #define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening 145 read-write fails */ 146 #define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ 147 148 #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) 149 150 151 /* Option names of options parsed by the block layer */ 152 153 #define BDRV_OPT_CACHE_WB "cache.writeback" 154 #define BDRV_OPT_CACHE_DIRECT "cache.direct" 155 #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" 156 #define BDRV_OPT_READ_ONLY "read-only" 157 #define BDRV_OPT_AUTO_READ_ONLY "auto-read-only" 158 #define BDRV_OPT_DISCARD "discard" 159 #define BDRV_OPT_FORCE_SHARE "force-share" 160 161 162 #define BDRV_SECTOR_BITS 9 163 #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) 164 165 #define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \ 166 INT_MAX >> BDRV_SECTOR_BITS) 167 #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) 168 169 /* 170 * We want allow aligning requests and disk length up to any 32bit alignment 171 * and don't afraid of overflow. 172 * To achieve it, and in the same time use some pretty number as maximum disk 173 * size, let's define maximum "length" (a limit for any offset/bytes request and 174 * for disk size) to be the greatest power of 2 less than INT64_MAX. 175 */ 176 #define BDRV_MAX_ALIGNMENT (1L << 30) 177 #define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT)) 178 179 /* 180 * Allocation status flags for bdrv_block_status() and friends. 181 * 182 * Public flags: 183 * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer 184 * BDRV_BLOCK_ZERO: offset reads as zero 185 * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data 186 * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this 187 * layer rather than any backing, set by block layer 188 * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this 189 * layer, set by block layer 190 * 191 * Internal flags: 192 * BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request 193 * that the block layer recompute the answer from the returned 194 * BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID. 195 * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for 196 * zeroes in file child of current block node inside 197 * returned region. Only valid together with both 198 * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not 199 * appear with BDRV_BLOCK_ZERO. 200 * 201 * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the 202 * host offset within the returned BDS that is allocated for the 203 * corresponding raw guest data. However, whether that offset 204 * actually contains data also depends on BDRV_BLOCK_DATA, as follows: 205 * 206 * DATA ZERO OFFSET_VALID 207 * t t t sectors read as zero, returned file is zero at offset 208 * t f t sectors read as valid from file at offset 209 * f t t sectors preallocated, read as zero, returned file not 210 * necessarily zero at offset 211 * f f t sectors preallocated but read from backing_hd, 212 * returned file contains garbage at offset 213 * t t f sectors preallocated, read as zero, unknown offset 214 * t f f sectors read from unknown file or offset 215 * f t f not allocated or unknown offset, read as zero 216 * f f f not allocated or unknown offset, read from backing_hd 217 */ 218 #define BDRV_BLOCK_DATA 0x01 219 #define BDRV_BLOCK_ZERO 0x02 220 #define BDRV_BLOCK_OFFSET_VALID 0x04 221 #define BDRV_BLOCK_RAW 0x08 222 #define BDRV_BLOCK_ALLOCATED 0x10 223 #define BDRV_BLOCK_EOF 0x20 224 #define BDRV_BLOCK_RECURSE 0x40 225 226 typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; 227 228 typedef struct BDRVReopenState { 229 BlockDriverState *bs; 230 int flags; 231 BlockdevDetectZeroesOptions detect_zeroes; 232 bool backing_missing; 233 BlockDriverState *old_backing_bs; /* keep pointer for permissions update */ 234 BlockDriverState *old_file_bs; /* keep pointer for permissions update */ 235 QDict *options; 236 QDict *explicit_options; 237 void *opaque; 238 } BDRVReopenState; 239 240 /* 241 * Block operation types 242 */ 243 typedef enum BlockOpType { 244 BLOCK_OP_TYPE_BACKUP_SOURCE, 245 BLOCK_OP_TYPE_BACKUP_TARGET, 246 BLOCK_OP_TYPE_CHANGE, 247 BLOCK_OP_TYPE_COMMIT_SOURCE, 248 BLOCK_OP_TYPE_COMMIT_TARGET, 249 BLOCK_OP_TYPE_DATAPLANE, 250 BLOCK_OP_TYPE_DRIVE_DEL, 251 BLOCK_OP_TYPE_EJECT, 252 BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, 253 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, 254 BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, 255 BLOCK_OP_TYPE_MIRROR_SOURCE, 256 BLOCK_OP_TYPE_MIRROR_TARGET, 257 BLOCK_OP_TYPE_RESIZE, 258 BLOCK_OP_TYPE_STREAM, 259 BLOCK_OP_TYPE_REPLACE, 260 BLOCK_OP_TYPE_MAX, 261 } BlockOpType; 262 263 /* Block node permission constants */ 264 enum { 265 /** 266 * A user that has the "permission" of consistent reads is guaranteed that 267 * their view of the contents of the block device is complete and 268 * self-consistent, representing the contents of a disk at a specific 269 * point. 270 * 271 * For most block devices (including their backing files) this is true, but 272 * the property cannot be maintained in a few situations like for 273 * intermediate nodes of a commit block job. 274 */ 275 BLK_PERM_CONSISTENT_READ = 0x01, 276 277 /** This permission is required to change the visible disk contents. */ 278 BLK_PERM_WRITE = 0x02, 279 280 /** 281 * This permission (which is weaker than BLK_PERM_WRITE) is both enough and 282 * required for writes to the block node when the caller promises that 283 * the visible disk content doesn't change. 284 * 285 * As the BLK_PERM_WRITE permission is strictly stronger, either is 286 * sufficient to perform an unchanging write. 287 */ 288 BLK_PERM_WRITE_UNCHANGED = 0x04, 289 290 /** This permission is required to change the size of a block node. */ 291 BLK_PERM_RESIZE = 0x08, 292 293 /** 294 * There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU 295 * 6.1 and earlier may still lock the corresponding byte in block/file-posix 296 * locking. So, implementing some new permission should be very careful to 297 * not interfere with this old unused thing. 298 */ 299 300 BLK_PERM_ALL = 0x0f, 301 302 DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ 303 | BLK_PERM_WRITE 304 | BLK_PERM_WRITE_UNCHANGED 305 | BLK_PERM_RESIZE, 306 307 DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, 308 }; 309 310 /* 311 * Flags that parent nodes assign to child nodes to specify what kind of 312 * role(s) they take. 313 * 314 * At least one of DATA, METADATA, FILTERED, or COW must be set for 315 * every child. 316 */ 317 enum BdrvChildRoleBits { 318 /* 319 * This child stores data. 320 * Any node may have an arbitrary number of such children. 321 */ 322 BDRV_CHILD_DATA = (1 << 0), 323 324 /* 325 * This child stores metadata. 326 * Any node may have an arbitrary number of metadata-storing 327 * children. 328 */ 329 BDRV_CHILD_METADATA = (1 << 1), 330 331 /* 332 * A child that always presents exactly the same visible data as 333 * the parent, e.g. by virtue of the parent forwarding all reads 334 * and writes. 335 * This flag is mutually exclusive with DATA, METADATA, and COW. 336 * Any node may have at most one filtered child at a time. 337 */ 338 BDRV_CHILD_FILTERED = (1 << 2), 339 340 /* 341 * Child from which to read all data that isn't allocated in the 342 * parent (i.e., the backing child); such data is copied to the 343 * parent through COW (and optionally COR). 344 * This field is mutually exclusive with DATA, METADATA, and 345 * FILTERED. 346 * Any node may have at most one such backing child at a time. 347 */ 348 BDRV_CHILD_COW = (1 << 3), 349 350 /* 351 * The primary child. For most drivers, this is the child whose 352 * filename applies best to the parent node. 353 * Any node may have at most one primary child at a time. 354 */ 355 BDRV_CHILD_PRIMARY = (1 << 4), 356 357 /* Useful combination of flags */ 358 BDRV_CHILD_IMAGE = BDRV_CHILD_DATA 359 | BDRV_CHILD_METADATA 360 | BDRV_CHILD_PRIMARY, 361 }; 362 363 /* Mask of BdrvChildRoleBits values */ 364 typedef unsigned int BdrvChildRole; 365 366 typedef struct BdrvCheckResult { 367 int corruptions; 368 int leaks; 369 int check_errors; 370 int corruptions_fixed; 371 int leaks_fixed; 372 int64_t image_end_offset; 373 BlockFragInfo bfi; 374 } BdrvCheckResult; 375 376 typedef enum { 377 BDRV_FIX_LEAKS = 1, 378 BDRV_FIX_ERRORS = 2, 379 } BdrvCheckMode; 380 381 typedef struct BlockSizes { 382 uint32_t phys; 383 uint32_t log; 384 } BlockSizes; 385 386 typedef struct HDGeometry { 387 uint32_t heads; 388 uint32_t sectors; 389 uint32_t cylinders; 390 } HDGeometry; 391 392 /* 393 * Common functions that are neither I/O nor Global State. 394 * 395 * These functions must never call any function from other categories 396 * (I/O, "I/O or GS", Global State) except this one, but can be invoked by 397 * all of them. 398 */ 399 400 char *bdrv_perm_names(uint64_t perm); 401 uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm); 402 403 void bdrv_init_with_whitelist(void); 404 bool bdrv_uses_whitelist(void); 405 int bdrv_is_whitelisted(BlockDriver *drv, bool read_only); 406 407 int bdrv_parse_aio(const char *mode, int *flags); 408 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); 409 int bdrv_parse_discard_flags(const char *mode, int *flags); 410 411 int path_has_protocol(const char *path); 412 int path_is_absolute(const char *path); 413 char *path_combine(const char *base_path, const char *filename); 414 415 char *bdrv_get_full_backing_filename_from_filename(const char *backed, 416 const char *backing, 417 Error **errp); 418 419 #endif /* BLOCK_COMMON_H */ 420