1 /* 2 * Block driver for the Virtual Disk Image (VDI) format 3 * 4 * Copyright (c) 2009, 2012 Stefan Weil 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 2 of the License, or 9 * (at your option) version 3 or any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 * 19 * Reference: 20 * http://forums.virtualbox.org/viewtopic.php?t=8046 21 * 22 * This driver supports create / read / write operations on VDI images. 23 * 24 * Todo (see also TODO in code): 25 * 26 * Some features like snapshots are still missing. 27 * 28 * Deallocation of zero-filled blocks and shrinking images are missing, too 29 * (might be added to common block layer). 30 * 31 * Allocation of blocks could be optimized (less writes to block map and 32 * header). 33 * 34 * Read and write of adjacent blocks could be done in one operation 35 * (current code uses one operation per block (1 MiB). 36 * 37 * The code is not thread safe (missing locks for changes in header and 38 * block table, no problem with current QEMU). 39 * 40 * Hints: 41 * 42 * Blocks (VDI documentation) correspond to clusters (QEMU). 43 * QEMU's backing files could be implemented using VDI snapshot files (TODO). 44 * VDI snapshot files may also contain the complete machine state. 45 * Maybe this machine state can be converted to QEMU PC machine snapshot data. 46 * 47 * The driver keeps a block cache (little endian entries) in memory. 48 * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM, 49 * so this seems to be reasonable. 50 */ 51 52 #include "qemu/osdep.h" 53 #include "qapi/error.h" 54 #include "qapi/qmp/qdict.h" 55 #include "qapi/qobject-input-visitor.h" 56 #include "qapi/qapi-visit-block-core.h" 57 #include "block/block_int.h" 58 #include "sysemu/block-backend.h" 59 #include "qemu/module.h" 60 #include "qemu/option.h" 61 #include "qemu/bswap.h" 62 #include "migration/blocker.h" 63 #include "qemu/coroutine.h" 64 #include "qemu/cutils.h" 65 #include "qemu/uuid.h" 66 67 /* Code configuration options. */ 68 69 /* Enable debug messages. */ 70 //~ #define CONFIG_VDI_DEBUG 71 72 /* Support write operations on VDI images. */ 73 #define CONFIG_VDI_WRITE 74 75 /* Support non-standard block (cluster) size. This is untested. 76 * Maybe it will be needed for very large images. 77 */ 78 //~ #define CONFIG_VDI_BLOCK_SIZE 79 80 /* Support static (fixed, pre-allocated) images. */ 81 #define CONFIG_VDI_STATIC_IMAGE 82 83 /* Command line option for static images. */ 84 #define BLOCK_OPT_STATIC "static" 85 86 #define KiB 1024 87 #define MiB (KiB * KiB) 88 89 #define SECTOR_SIZE 512 90 #define DEFAULT_CLUSTER_SIZE (1 * MiB) 91 92 #if defined(CONFIG_VDI_DEBUG) 93 #define VDI_DEBUG 1 94 #else 95 #define VDI_DEBUG 0 96 #endif 97 98 #define logout(fmt, ...) \ 99 do { \ 100 if (VDI_DEBUG) { \ 101 fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__); \ 102 } \ 103 } while (0) 104 105 /* Image signature. */ 106 #define VDI_SIGNATURE 0xbeda107f 107 108 /* Image version. */ 109 #define VDI_VERSION_1_1 0x00010001 110 111 /* Image type. */ 112 #define VDI_TYPE_DYNAMIC 1 113 #define VDI_TYPE_STATIC 2 114 115 /* Innotek / SUN images use these strings in header.text: 116 * "<<< innotek VirtualBox Disk Image >>>\n" 117 * "<<< Sun xVM VirtualBox Disk Image >>>\n" 118 * "<<< Sun VirtualBox Disk Image >>>\n" 119 * The value does not matter, so QEMU created images use a different text. 120 */ 121 #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n" 122 123 /* A never-allocated block; semantically arbitrary content. */ 124 #define VDI_UNALLOCATED 0xffffffffU 125 126 /* A discarded (no longer allocated) block; semantically zero-filled. */ 127 #define VDI_DISCARDED 0xfffffffeU 128 129 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED) 130 131 /* The bmap will take up VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) bytes; since 132 * the bmap is read and written in a single operation, its size needs to be 133 * limited to INT_MAX; furthermore, when opening an image, the bmap size is 134 * rounded up to be aligned on BDRV_SECTOR_SIZE. 135 * Therefore this should satisfy the following: 136 * VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) + BDRV_SECTOR_SIZE == INT_MAX + 1 137 * (INT_MAX + 1 is the first value not representable as an int) 138 * This guarantees that any value below or equal to the constant will, when 139 * multiplied by sizeof(uint32_t) and rounded up to a BDRV_SECTOR_SIZE boundary, 140 * still be below or equal to INT_MAX. */ 141 #define VDI_BLOCKS_IN_IMAGE_MAX \ 142 ((unsigned)((INT_MAX + 1u - BDRV_SECTOR_SIZE) / sizeof(uint32_t))) 143 #define VDI_DISK_SIZE_MAX ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \ 144 (uint64_t)DEFAULT_CLUSTER_SIZE) 145 146 static QemuOptsList vdi_create_opts; 147 148 typedef struct { 149 char text[0x40]; 150 uint32_t signature; 151 uint32_t version; 152 uint32_t header_size; 153 uint32_t image_type; 154 uint32_t image_flags; 155 char description[256]; 156 uint32_t offset_bmap; 157 uint32_t offset_data; 158 uint32_t cylinders; /* disk geometry, unused here */ 159 uint32_t heads; /* disk geometry, unused here */ 160 uint32_t sectors; /* disk geometry, unused here */ 161 uint32_t sector_size; 162 uint32_t unused1; 163 uint64_t disk_size; 164 uint32_t block_size; 165 uint32_t block_extra; /* unused here */ 166 uint32_t blocks_in_image; 167 uint32_t blocks_allocated; 168 QemuUUID uuid_image; 169 QemuUUID uuid_last_snap; 170 QemuUUID uuid_link; 171 QemuUUID uuid_parent; 172 uint64_t unused2[7]; 173 } QEMU_PACKED VdiHeader; 174 175 typedef struct { 176 /* The block map entries are little endian (even in memory). */ 177 uint32_t *bmap; 178 /* Size of block (bytes). */ 179 uint32_t block_size; 180 /* First sector of block map. */ 181 uint32_t bmap_sector; 182 /* VDI header (converted to host endianness). */ 183 VdiHeader header; 184 185 CoRwlock bmap_lock; 186 187 Error *migration_blocker; 188 } BDRVVdiState; 189 190 static void vdi_header_to_cpu(VdiHeader *header) 191 { 192 le32_to_cpus(&header->signature); 193 le32_to_cpus(&header->version); 194 le32_to_cpus(&header->header_size); 195 le32_to_cpus(&header->image_type); 196 le32_to_cpus(&header->image_flags); 197 le32_to_cpus(&header->offset_bmap); 198 le32_to_cpus(&header->offset_data); 199 le32_to_cpus(&header->cylinders); 200 le32_to_cpus(&header->heads); 201 le32_to_cpus(&header->sectors); 202 le32_to_cpus(&header->sector_size); 203 le64_to_cpus(&header->disk_size); 204 le32_to_cpus(&header->block_size); 205 le32_to_cpus(&header->block_extra); 206 le32_to_cpus(&header->blocks_in_image); 207 le32_to_cpus(&header->blocks_allocated); 208 qemu_uuid_bswap(&header->uuid_image); 209 qemu_uuid_bswap(&header->uuid_last_snap); 210 qemu_uuid_bswap(&header->uuid_link); 211 qemu_uuid_bswap(&header->uuid_parent); 212 } 213 214 static void vdi_header_to_le(VdiHeader *header) 215 { 216 cpu_to_le32s(&header->signature); 217 cpu_to_le32s(&header->version); 218 cpu_to_le32s(&header->header_size); 219 cpu_to_le32s(&header->image_type); 220 cpu_to_le32s(&header->image_flags); 221 cpu_to_le32s(&header->offset_bmap); 222 cpu_to_le32s(&header->offset_data); 223 cpu_to_le32s(&header->cylinders); 224 cpu_to_le32s(&header->heads); 225 cpu_to_le32s(&header->sectors); 226 cpu_to_le32s(&header->sector_size); 227 cpu_to_le64s(&header->disk_size); 228 cpu_to_le32s(&header->block_size); 229 cpu_to_le32s(&header->block_extra); 230 cpu_to_le32s(&header->blocks_in_image); 231 cpu_to_le32s(&header->blocks_allocated); 232 qemu_uuid_bswap(&header->uuid_image); 233 qemu_uuid_bswap(&header->uuid_last_snap); 234 qemu_uuid_bswap(&header->uuid_link); 235 qemu_uuid_bswap(&header->uuid_parent); 236 } 237 238 #if defined(CONFIG_VDI_DEBUG) 239 static void vdi_header_print(VdiHeader *header) 240 { 241 char uuid[37]; 242 logout("text %s", header->text); 243 logout("signature 0x%08x\n", header->signature); 244 logout("header size 0x%04x\n", header->header_size); 245 logout("image type 0x%04x\n", header->image_type); 246 logout("image flags 0x%04x\n", header->image_flags); 247 logout("description %s\n", header->description); 248 logout("offset bmap 0x%04x\n", header->offset_bmap); 249 logout("offset data 0x%04x\n", header->offset_data); 250 logout("cylinders 0x%04x\n", header->cylinders); 251 logout("heads 0x%04x\n", header->heads); 252 logout("sectors 0x%04x\n", header->sectors); 253 logout("sector size 0x%04x\n", header->sector_size); 254 logout("image size 0x%" PRIx64 " B (%" PRIu64 " MiB)\n", 255 header->disk_size, header->disk_size / MiB); 256 logout("block size 0x%04x\n", header->block_size); 257 logout("block extra 0x%04x\n", header->block_extra); 258 logout("blocks tot. 0x%04x\n", header->blocks_in_image); 259 logout("blocks all. 0x%04x\n", header->blocks_allocated); 260 uuid_unparse(header->uuid_image, uuid); 261 logout("uuid image %s\n", uuid); 262 uuid_unparse(header->uuid_last_snap, uuid); 263 logout("uuid snap %s\n", uuid); 264 uuid_unparse(header->uuid_link, uuid); 265 logout("uuid link %s\n", uuid); 266 uuid_unparse(header->uuid_parent, uuid); 267 logout("uuid parent %s\n", uuid); 268 } 269 #endif 270 271 static int coroutine_fn vdi_co_check(BlockDriverState *bs, BdrvCheckResult *res, 272 BdrvCheckMode fix) 273 { 274 /* TODO: additional checks possible. */ 275 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 276 uint32_t blocks_allocated = 0; 277 uint32_t block; 278 uint32_t *bmap; 279 logout("\n"); 280 281 if (fix) { 282 return -ENOTSUP; 283 } 284 285 bmap = g_try_new(uint32_t, s->header.blocks_in_image); 286 if (s->header.blocks_in_image && bmap == NULL) { 287 res->check_errors++; 288 return -ENOMEM; 289 } 290 291 memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t)); 292 293 /* Check block map and value of blocks_allocated. */ 294 for (block = 0; block < s->header.blocks_in_image; block++) { 295 uint32_t bmap_entry = le32_to_cpu(s->bmap[block]); 296 if (VDI_IS_ALLOCATED(bmap_entry)) { 297 if (bmap_entry < s->header.blocks_in_image) { 298 blocks_allocated++; 299 if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) { 300 bmap[bmap_entry] = bmap_entry; 301 } else { 302 fprintf(stderr, "ERROR: block index %" PRIu32 303 " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry); 304 res->corruptions++; 305 } 306 } else { 307 fprintf(stderr, "ERROR: block index %" PRIu32 308 " too large, is %" PRIu32 "\n", block, bmap_entry); 309 res->corruptions++; 310 } 311 } 312 } 313 if (blocks_allocated != s->header.blocks_allocated) { 314 fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32 315 ", should be %" PRIu32 "\n", 316 blocks_allocated, s->header.blocks_allocated); 317 res->corruptions++; 318 } 319 320 g_free(bmap); 321 322 return 0; 323 } 324 325 static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 326 { 327 /* TODO: vdi_get_info would be needed for machine snapshots. 328 vm_state_offset is still missing. */ 329 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 330 logout("\n"); 331 bdi->cluster_size = s->block_size; 332 bdi->vm_state_offset = 0; 333 bdi->unallocated_blocks_are_zero = true; 334 return 0; 335 } 336 337 static int vdi_make_empty(BlockDriverState *bs) 338 { 339 /* TODO: missing code. */ 340 logout("\n"); 341 /* The return value for missing code must be 0, see block.c. */ 342 return 0; 343 } 344 345 static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename) 346 { 347 const VdiHeader *header = (const VdiHeader *)buf; 348 int ret = 0; 349 350 logout("\n"); 351 352 if (buf_size < sizeof(*header)) { 353 /* Header too small, no VDI. */ 354 } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) { 355 ret = 100; 356 } 357 358 if (ret == 0) { 359 logout("no vdi image\n"); 360 } else { 361 logout("%s", header->text); 362 } 363 364 return ret; 365 } 366 367 static int vdi_open(BlockDriverState *bs, QDict *options, int flags, 368 Error **errp) 369 { 370 BDRVVdiState *s = bs->opaque; 371 VdiHeader header; 372 size_t bmap_size; 373 int ret; 374 Error *local_err = NULL; 375 376 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 377 false, errp); 378 if (!bs->file) { 379 return -EINVAL; 380 } 381 382 logout("\n"); 383 384 ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); 385 if (ret < 0) { 386 goto fail; 387 } 388 389 vdi_header_to_cpu(&header); 390 #if defined(CONFIG_VDI_DEBUG) 391 vdi_header_print(&header); 392 #endif 393 394 if (header.disk_size > VDI_DISK_SIZE_MAX) { 395 error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64 396 ", max supported is 0x%" PRIx64 ")", 397 header.disk_size, VDI_DISK_SIZE_MAX); 398 ret = -ENOTSUP; 399 goto fail; 400 } 401 402 if (header.disk_size % SECTOR_SIZE != 0) { 403 /* 'VBoxManage convertfromraw' can create images with odd disk sizes. 404 We accept them but round the disk size to the next multiple of 405 SECTOR_SIZE. */ 406 logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size); 407 header.disk_size = ROUND_UP(header.disk_size, SECTOR_SIZE); 408 } 409 410 if (header.signature != VDI_SIGNATURE) { 411 error_setg(errp, "Image not in VDI format (bad signature %08" PRIx32 412 ")", header.signature); 413 ret = -EINVAL; 414 goto fail; 415 } else if (header.version != VDI_VERSION_1_1) { 416 error_setg(errp, "unsupported VDI image (version %" PRIu32 ".%" PRIu32 417 ")", header.version >> 16, header.version & 0xffff); 418 ret = -ENOTSUP; 419 goto fail; 420 } else if (header.offset_bmap % SECTOR_SIZE != 0) { 421 /* We only support block maps which start on a sector boundary. */ 422 error_setg(errp, "unsupported VDI image (unaligned block map offset " 423 "0x%" PRIx32 ")", header.offset_bmap); 424 ret = -ENOTSUP; 425 goto fail; 426 } else if (header.offset_data % SECTOR_SIZE != 0) { 427 /* We only support data blocks which start on a sector boundary. */ 428 error_setg(errp, "unsupported VDI image (unaligned data offset 0x%" 429 PRIx32 ")", header.offset_data); 430 ret = -ENOTSUP; 431 goto fail; 432 } else if (header.sector_size != SECTOR_SIZE) { 433 error_setg(errp, "unsupported VDI image (sector size %" PRIu32 434 " is not %u)", header.sector_size, SECTOR_SIZE); 435 ret = -ENOTSUP; 436 goto fail; 437 } else if (header.block_size != DEFAULT_CLUSTER_SIZE) { 438 error_setg(errp, "unsupported VDI image (block size %" PRIu32 439 " is not %u)", header.block_size, DEFAULT_CLUSTER_SIZE); 440 ret = -ENOTSUP; 441 goto fail; 442 } else if (header.disk_size > 443 (uint64_t)header.blocks_in_image * header.block_size) { 444 error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", " 445 "image bitmap has room for %" PRIu64 ")", 446 header.disk_size, 447 (uint64_t)header.blocks_in_image * header.block_size); 448 ret = -ENOTSUP; 449 goto fail; 450 } else if (!qemu_uuid_is_null(&header.uuid_link)) { 451 error_setg(errp, "unsupported VDI image (non-NULL link UUID)"); 452 ret = -ENOTSUP; 453 goto fail; 454 } else if (!qemu_uuid_is_null(&header.uuid_parent)) { 455 error_setg(errp, "unsupported VDI image (non-NULL parent UUID)"); 456 ret = -ENOTSUP; 457 goto fail; 458 } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) { 459 error_setg(errp, "unsupported VDI image " 460 "(too many blocks %u, max is %u)", 461 header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX); 462 ret = -ENOTSUP; 463 goto fail; 464 } 465 466 bs->total_sectors = header.disk_size / SECTOR_SIZE; 467 468 s->block_size = header.block_size; 469 s->bmap_sector = header.offset_bmap / SECTOR_SIZE; 470 s->header = header; 471 472 bmap_size = header.blocks_in_image * sizeof(uint32_t); 473 bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE); 474 s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE); 475 if (s->bmap == NULL) { 476 ret = -ENOMEM; 477 goto fail; 478 } 479 480 ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, 481 bmap_size); 482 if (ret < 0) { 483 goto fail_free_bmap; 484 } 485 486 /* Disable migration when vdi images are used */ 487 error_setg(&s->migration_blocker, "The vdi format used by node '%s' " 488 "does not support live migration", 489 bdrv_get_device_or_node_name(bs)); 490 ret = migrate_add_blocker(s->migration_blocker, &local_err); 491 if (local_err) { 492 error_propagate(errp, local_err); 493 error_free(s->migration_blocker); 494 goto fail_free_bmap; 495 } 496 497 qemu_co_rwlock_init(&s->bmap_lock); 498 499 return 0; 500 501 fail_free_bmap: 502 qemu_vfree(s->bmap); 503 504 fail: 505 return ret; 506 } 507 508 static int vdi_reopen_prepare(BDRVReopenState *state, 509 BlockReopenQueue *queue, Error **errp) 510 { 511 return 0; 512 } 513 514 static int coroutine_fn vdi_co_block_status(BlockDriverState *bs, 515 bool want_zero, 516 int64_t offset, int64_t bytes, 517 int64_t *pnum, int64_t *map, 518 BlockDriverState **file) 519 { 520 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 521 size_t bmap_index = offset / s->block_size; 522 size_t index_in_block = offset % s->block_size; 523 uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); 524 int result; 525 526 logout("%p, %" PRId64 ", %" PRId64 ", %p\n", bs, offset, bytes, pnum); 527 *pnum = MIN(s->block_size - index_in_block, bytes); 528 result = VDI_IS_ALLOCATED(bmap_entry); 529 if (!result) { 530 return 0; 531 } 532 533 *map = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + 534 index_in_block; 535 *file = bs->file->bs; 536 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; 537 } 538 539 static int coroutine_fn 540 vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 541 QEMUIOVector *qiov, int flags) 542 { 543 BDRVVdiState *s = bs->opaque; 544 QEMUIOVector local_qiov; 545 uint32_t bmap_entry; 546 uint32_t block_index; 547 uint32_t offset_in_block; 548 uint32_t n_bytes; 549 uint64_t bytes_done = 0; 550 int ret = 0; 551 552 logout("\n"); 553 554 qemu_iovec_init(&local_qiov, qiov->niov); 555 556 while (ret >= 0 && bytes > 0) { 557 block_index = offset / s->block_size; 558 offset_in_block = offset % s->block_size; 559 n_bytes = MIN(bytes, s->block_size - offset_in_block); 560 561 logout("will read %u bytes starting at offset %" PRIu64 "\n", 562 n_bytes, offset); 563 564 /* prepare next AIO request */ 565 qemu_co_rwlock_rdlock(&s->bmap_lock); 566 bmap_entry = le32_to_cpu(s->bmap[block_index]); 567 qemu_co_rwlock_unlock(&s->bmap_lock); 568 if (!VDI_IS_ALLOCATED(bmap_entry)) { 569 /* Block not allocated, return zeros, no need to wait. */ 570 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes); 571 ret = 0; 572 } else { 573 uint64_t data_offset = s->header.offset_data + 574 (uint64_t)bmap_entry * s->block_size + 575 offset_in_block; 576 577 qemu_iovec_reset(&local_qiov); 578 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 579 580 ret = bdrv_co_preadv(bs->file, data_offset, n_bytes, 581 &local_qiov, 0); 582 } 583 logout("%u bytes read\n", n_bytes); 584 585 bytes -= n_bytes; 586 offset += n_bytes; 587 bytes_done += n_bytes; 588 } 589 590 qemu_iovec_destroy(&local_qiov); 591 592 return ret; 593 } 594 595 static int coroutine_fn 596 vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, 597 QEMUIOVector *qiov, int flags) 598 { 599 BDRVVdiState *s = bs->opaque; 600 QEMUIOVector local_qiov; 601 uint32_t bmap_entry; 602 uint32_t block_index; 603 uint32_t offset_in_block; 604 uint32_t n_bytes; 605 uint64_t data_offset; 606 uint32_t bmap_first = VDI_UNALLOCATED; 607 uint32_t bmap_last = VDI_UNALLOCATED; 608 uint8_t *block = NULL; 609 uint64_t bytes_done = 0; 610 int ret = 0; 611 612 logout("\n"); 613 614 qemu_iovec_init(&local_qiov, qiov->niov); 615 616 while (ret >= 0 && bytes > 0) { 617 block_index = offset / s->block_size; 618 offset_in_block = offset % s->block_size; 619 n_bytes = MIN(bytes, s->block_size - offset_in_block); 620 621 logout("will write %u bytes starting at offset %" PRIu64 "\n", 622 n_bytes, offset); 623 624 /* prepare next AIO request */ 625 qemu_co_rwlock_rdlock(&s->bmap_lock); 626 bmap_entry = le32_to_cpu(s->bmap[block_index]); 627 if (!VDI_IS_ALLOCATED(bmap_entry)) { 628 /* Allocate new block and write to it. */ 629 uint64_t data_offset; 630 qemu_co_rwlock_upgrade(&s->bmap_lock); 631 bmap_entry = le32_to_cpu(s->bmap[block_index]); 632 if (VDI_IS_ALLOCATED(bmap_entry)) { 633 /* A concurrent allocation did the work for us. */ 634 qemu_co_rwlock_downgrade(&s->bmap_lock); 635 goto nonallocating_write; 636 } 637 638 bmap_entry = s->header.blocks_allocated; 639 s->bmap[block_index] = cpu_to_le32(bmap_entry); 640 s->header.blocks_allocated++; 641 data_offset = s->header.offset_data + 642 (uint64_t)bmap_entry * s->block_size; 643 if (block == NULL) { 644 block = g_malloc(s->block_size); 645 bmap_first = block_index; 646 } 647 bmap_last = block_index; 648 /* Copy data to be written to new block and zero unused parts. */ 649 memset(block, 0, offset_in_block); 650 qemu_iovec_to_buf(qiov, bytes_done, block + offset_in_block, 651 n_bytes); 652 memset(block + offset_in_block + n_bytes, 0, 653 s->block_size - n_bytes - offset_in_block); 654 655 /* Write the new block under CoRwLock write-side protection, 656 * so this full-cluster write does not overlap a partial write 657 * of the same cluster, issued from the "else" branch. 658 */ 659 ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size); 660 qemu_co_rwlock_unlock(&s->bmap_lock); 661 } else { 662 nonallocating_write: 663 data_offset = s->header.offset_data + 664 (uint64_t)bmap_entry * s->block_size + 665 offset_in_block; 666 qemu_co_rwlock_unlock(&s->bmap_lock); 667 668 qemu_iovec_reset(&local_qiov); 669 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes); 670 671 ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes, 672 &local_qiov, 0); 673 } 674 675 bytes -= n_bytes; 676 offset += n_bytes; 677 bytes_done += n_bytes; 678 679 logout("%u bytes written\n", n_bytes); 680 } 681 682 qemu_iovec_destroy(&local_qiov); 683 684 logout("finished data write\n"); 685 if (ret < 0) { 686 return ret; 687 } 688 689 if (block) { 690 /* One or more new blocks were allocated. */ 691 VdiHeader *header = (VdiHeader *) block; 692 uint8_t *base; 693 uint64_t offset; 694 uint32_t n_sectors; 695 696 logout("now writing modified header\n"); 697 assert(VDI_IS_ALLOCATED(bmap_first)); 698 *header = s->header; 699 vdi_header_to_le(header); 700 ret = bdrv_write(bs->file, 0, block, 1); 701 g_free(block); 702 block = NULL; 703 704 if (ret < 0) { 705 return ret; 706 } 707 708 logout("now writing modified block map entry %u...%u\n", 709 bmap_first, bmap_last); 710 /* Write modified sectors from block map. */ 711 bmap_first /= (SECTOR_SIZE / sizeof(uint32_t)); 712 bmap_last /= (SECTOR_SIZE / sizeof(uint32_t)); 713 n_sectors = bmap_last - bmap_first + 1; 714 offset = s->bmap_sector + bmap_first; 715 base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; 716 logout("will write %u block map sectors starting from entry %u\n", 717 n_sectors, bmap_first); 718 ret = bdrv_write(bs->file, offset, base, n_sectors); 719 } 720 721 return ret; 722 } 723 724 static int coroutine_fn vdi_co_do_create(BlockdevCreateOptions *create_options, 725 size_t block_size, Error **errp) 726 { 727 BlockdevCreateOptionsVdi *vdi_opts; 728 int ret = 0; 729 uint64_t bytes = 0; 730 uint32_t blocks; 731 uint32_t image_type = VDI_TYPE_DYNAMIC; 732 VdiHeader header; 733 size_t i; 734 size_t bmap_size; 735 int64_t offset = 0; 736 BlockDriverState *bs_file = NULL; 737 BlockBackend *blk = NULL; 738 uint32_t *bmap = NULL; 739 740 assert(create_options->driver == BLOCKDEV_DRIVER_VDI); 741 vdi_opts = &create_options->u.vdi; 742 743 logout("\n"); 744 745 /* Validate options and set default values */ 746 bytes = vdi_opts->size; 747 if (vdi_opts->q_static) { 748 image_type = VDI_TYPE_STATIC; 749 } 750 #ifndef CONFIG_VDI_STATIC_IMAGE 751 if (image_type == VDI_TYPE_STATIC) { 752 ret = -ENOTSUP; 753 error_setg(errp, "Statically allocated images cannot be created in " 754 "this build"); 755 goto exit; 756 } 757 #endif 758 #ifndef CONFIG_VDI_BLOCK_SIZE 759 if (block_size != DEFAULT_CLUSTER_SIZE) { 760 ret = -ENOTSUP; 761 error_setg(errp, 762 "A non-default cluster size is not supported in this build"); 763 goto exit; 764 } 765 #endif 766 767 if (bytes > VDI_DISK_SIZE_MAX) { 768 ret = -ENOTSUP; 769 error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64 770 ", max supported is 0x%" PRIx64 ")", 771 bytes, VDI_DISK_SIZE_MAX); 772 goto exit; 773 } 774 775 /* Create BlockBackend to write to the image */ 776 bs_file = bdrv_open_blockdev_ref(vdi_opts->file, errp); 777 if (!bs_file) { 778 ret = -EIO; 779 goto exit; 780 } 781 782 blk = blk_new(BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL); 783 ret = blk_insert_bs(blk, bs_file, errp); 784 if (ret < 0) { 785 goto exit; 786 } 787 788 blk_set_allow_write_beyond_eof(blk, true); 789 790 /* We need enough blocks to store the given disk size, 791 so always round up. */ 792 blocks = DIV_ROUND_UP(bytes, block_size); 793 794 bmap_size = blocks * sizeof(uint32_t); 795 bmap_size = ROUND_UP(bmap_size, SECTOR_SIZE); 796 797 memset(&header, 0, sizeof(header)); 798 pstrcpy(header.text, sizeof(header.text), VDI_TEXT); 799 header.signature = VDI_SIGNATURE; 800 header.version = VDI_VERSION_1_1; 801 header.header_size = 0x180; 802 header.image_type = image_type; 803 header.offset_bmap = 0x200; 804 header.offset_data = 0x200 + bmap_size; 805 header.sector_size = SECTOR_SIZE; 806 header.disk_size = bytes; 807 header.block_size = block_size; 808 header.blocks_in_image = blocks; 809 if (image_type == VDI_TYPE_STATIC) { 810 header.blocks_allocated = blocks; 811 } 812 qemu_uuid_generate(&header.uuid_image); 813 qemu_uuid_generate(&header.uuid_last_snap); 814 /* There is no need to set header.uuid_link or header.uuid_parent here. */ 815 #if defined(CONFIG_VDI_DEBUG) 816 vdi_header_print(&header); 817 #endif 818 vdi_header_to_le(&header); 819 ret = blk_pwrite(blk, offset, &header, sizeof(header), 0); 820 if (ret < 0) { 821 error_setg(errp, "Error writing header"); 822 goto exit; 823 } 824 offset += sizeof(header); 825 826 if (bmap_size > 0) { 827 bmap = g_try_malloc0(bmap_size); 828 if (bmap == NULL) { 829 ret = -ENOMEM; 830 error_setg(errp, "Could not allocate bmap"); 831 goto exit; 832 } 833 for (i = 0; i < blocks; i++) { 834 if (image_type == VDI_TYPE_STATIC) { 835 bmap[i] = i; 836 } else { 837 bmap[i] = VDI_UNALLOCATED; 838 } 839 } 840 ret = blk_pwrite(blk, offset, bmap, bmap_size, 0); 841 if (ret < 0) { 842 error_setg(errp, "Error writing bmap"); 843 goto exit; 844 } 845 offset += bmap_size; 846 } 847 848 if (image_type == VDI_TYPE_STATIC) { 849 ret = blk_truncate(blk, offset + blocks * block_size, 850 PREALLOC_MODE_OFF, errp); 851 if (ret < 0) { 852 error_prepend(errp, "Failed to statically allocate file"); 853 goto exit; 854 } 855 } 856 857 exit: 858 blk_unref(blk); 859 bdrv_unref(bs_file); 860 g_free(bmap); 861 return ret; 862 } 863 864 static int coroutine_fn vdi_co_create(BlockdevCreateOptions *create_options, 865 Error **errp) 866 { 867 return vdi_co_do_create(create_options, DEFAULT_CLUSTER_SIZE, errp); 868 } 869 870 static int coroutine_fn vdi_co_create_opts(const char *filename, QemuOpts *opts, 871 Error **errp) 872 { 873 QDict *qdict = NULL; 874 BlockdevCreateOptions *create_options = NULL; 875 BlockDriverState *bs_file = NULL; 876 uint64_t block_size = DEFAULT_CLUSTER_SIZE; 877 Visitor *v; 878 Error *local_err = NULL; 879 int ret; 880 881 /* Parse options and convert legacy syntax. 882 * 883 * Since CONFIG_VDI_BLOCK_SIZE is disabled by default, 884 * cluster-size is not part of the QAPI schema; therefore we have 885 * to parse it before creating the QAPI object. */ 886 #if defined(CONFIG_VDI_BLOCK_SIZE) 887 block_size = qemu_opt_get_size_del(opts, 888 BLOCK_OPT_CLUSTER_SIZE, 889 DEFAULT_CLUSTER_SIZE); 890 if (block_size < BDRV_SECTOR_SIZE || block_size > UINT32_MAX || 891 !is_power_of_2(block_size)) 892 { 893 error_setg(errp, "Invalid cluster size"); 894 ret = -EINVAL; 895 goto done; 896 } 897 #endif 898 899 qdict = qemu_opts_to_qdict_filtered(opts, NULL, &vdi_create_opts, true); 900 901 /* Create and open the file (protocol layer) */ 902 ret = bdrv_create_file(filename, opts, errp); 903 if (ret < 0) { 904 goto done; 905 } 906 907 bs_file = bdrv_open(filename, NULL, NULL, 908 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 909 if (!bs_file) { 910 ret = -EIO; 911 goto done; 912 } 913 914 qdict_put_str(qdict, "driver", "vdi"); 915 qdict_put_str(qdict, "file", bs_file->node_name); 916 917 /* Get the QAPI object */ 918 v = qobject_input_visitor_new_keyval(QOBJECT(qdict)); 919 visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err); 920 visit_free(v); 921 922 if (local_err) { 923 error_propagate(errp, local_err); 924 ret = -EINVAL; 925 goto done; 926 } 927 928 /* Silently round up size */ 929 assert(create_options->driver == BLOCKDEV_DRIVER_VDI); 930 create_options->u.vdi.size = ROUND_UP(create_options->u.vdi.size, 931 BDRV_SECTOR_SIZE); 932 933 /* Create the vdi image (format layer) */ 934 ret = vdi_co_do_create(create_options, block_size, errp); 935 done: 936 QDECREF(qdict); 937 qapi_free_BlockdevCreateOptions(create_options); 938 bdrv_unref(bs_file); 939 return ret; 940 } 941 942 static void vdi_close(BlockDriverState *bs) 943 { 944 BDRVVdiState *s = bs->opaque; 945 946 qemu_vfree(s->bmap); 947 948 migrate_del_blocker(s->migration_blocker); 949 error_free(s->migration_blocker); 950 } 951 952 static QemuOptsList vdi_create_opts = { 953 .name = "vdi-create-opts", 954 .head = QTAILQ_HEAD_INITIALIZER(vdi_create_opts.head), 955 .desc = { 956 { 957 .name = BLOCK_OPT_SIZE, 958 .type = QEMU_OPT_SIZE, 959 .help = "Virtual disk size" 960 }, 961 #if defined(CONFIG_VDI_BLOCK_SIZE) 962 { 963 .name = BLOCK_OPT_CLUSTER_SIZE, 964 .type = QEMU_OPT_SIZE, 965 .help = "VDI cluster (block) size", 966 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE) 967 }, 968 #endif 969 #if defined(CONFIG_VDI_STATIC_IMAGE) 970 { 971 .name = BLOCK_OPT_STATIC, 972 .type = QEMU_OPT_BOOL, 973 .help = "VDI static (pre-allocated) image", 974 .def_value_str = "off" 975 }, 976 #endif 977 /* TODO: An additional option to set UUID values might be useful. */ 978 { /* end of list */ } 979 } 980 }; 981 982 static BlockDriver bdrv_vdi = { 983 .format_name = "vdi", 984 .instance_size = sizeof(BDRVVdiState), 985 .bdrv_probe = vdi_probe, 986 .bdrv_open = vdi_open, 987 .bdrv_close = vdi_close, 988 .bdrv_reopen_prepare = vdi_reopen_prepare, 989 .bdrv_child_perm = bdrv_format_default_perms, 990 .bdrv_co_create = vdi_co_create, 991 .bdrv_co_create_opts = vdi_co_create_opts, 992 .bdrv_has_zero_init = bdrv_has_zero_init_1, 993 .bdrv_co_block_status = vdi_co_block_status, 994 .bdrv_make_empty = vdi_make_empty, 995 996 .bdrv_co_preadv = vdi_co_preadv, 997 #if defined(CONFIG_VDI_WRITE) 998 .bdrv_co_pwritev = vdi_co_pwritev, 999 #endif 1000 1001 .bdrv_get_info = vdi_get_info, 1002 1003 .create_opts = &vdi_create_opts, 1004 .bdrv_co_check = vdi_co_check, 1005 }; 1006 1007 static void bdrv_vdi_init(void) 1008 { 1009 logout("\n"); 1010 bdrv_register(&bdrv_vdi); 1011 } 1012 1013 block_init(bdrv_vdi_init); 1014