1 /* 2 * Block driver for the Virtual Disk Image (VDI) format 3 * 4 * Copyright (c) 2009 Stefan Weil 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 2 of the License, or 9 * (at your option) version 3 or any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. 18 * 19 * Reference: 20 * http://forums.virtualbox.org/viewtopic.php?t=8046 21 * 22 * This driver supports create / read / write operations on VDI images. 23 * 24 * Todo (see also TODO in code): 25 * 26 * Some features like snapshots are still missing. 27 * 28 * Deallocation of zero-filled blocks and shrinking images are missing, too 29 * (might be added to common block layer). 30 * 31 * Allocation of blocks could be optimized (less writes to block map and 32 * header). 33 * 34 * Read and write of adjacents blocks could be done in one operation 35 * (current code uses one operation per block (1 MiB). 36 * 37 * The code is not thread safe (missing locks for changes in header and 38 * block table, no problem with current QEMU). 39 * 40 * Hints: 41 * 42 * Blocks (VDI documentation) correspond to clusters (QEMU). 43 * QEMU's backing files could be implemented using VDI snapshot files (TODO). 44 * VDI snapshot files may also contain the complete machine state. 45 * Maybe this machine state can be converted to QEMU PC machine snapshot data. 46 * 47 * The driver keeps a block cache (little endian entries) in memory. 48 * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM, 49 * so this seems to be reasonable. 50 */ 51 52 #include "qemu-common.h" 53 #include "block_int.h" 54 #include "module.h" 55 56 #if defined(CONFIG_UUID) 57 #include <uuid/uuid.h> 58 #else 59 /* TODO: move uuid emulation to some central place in QEMU. */ 60 #include "sysemu.h" /* UUID_FMT */ 61 typedef unsigned char uuid_t[16]; 62 void uuid_generate(uuid_t out); 63 int uuid_is_null(const uuid_t uu); 64 void uuid_unparse(const uuid_t uu, char *out); 65 #endif 66 67 /* Code configuration options. */ 68 69 /* Enable debug messages. */ 70 //~ #define CONFIG_VDI_DEBUG 71 72 /* Support write operations on VDI images. */ 73 #define CONFIG_VDI_WRITE 74 75 /* Support non-standard block (cluster) size. This is untested. 76 * Maybe it will be needed for very large images. 77 */ 78 //~ #define CONFIG_VDI_BLOCK_SIZE 79 80 /* Support static (fixed, pre-allocated) images. */ 81 #define CONFIG_VDI_STATIC_IMAGE 82 83 /* Command line option for static images. */ 84 #define BLOCK_OPT_STATIC "static" 85 86 #define KiB 1024 87 #define MiB (KiB * KiB) 88 89 #define SECTOR_SIZE 512 90 91 #if defined(CONFIG_VDI_DEBUG) 92 #define logout(fmt, ...) \ 93 fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__) 94 #else 95 #define logout(fmt, ...) ((void)0) 96 #endif 97 98 /* Image signature. */ 99 #define VDI_SIGNATURE 0xbeda107f 100 101 /* Image version. */ 102 #define VDI_VERSION_1_1 0x00010001 103 104 /* Image type. */ 105 #define VDI_TYPE_DYNAMIC 1 106 #define VDI_TYPE_STATIC 2 107 108 /* Innotek / SUN images use these strings in header.text: 109 * "<<< innotek VirtualBox Disk Image >>>\n" 110 * "<<< Sun xVM VirtualBox Disk Image >>>\n" 111 * "<<< Sun VirtualBox Disk Image >>>\n" 112 * The value does not matter, so QEMU created images use a different text. 113 */ 114 #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n" 115 116 /* Unallocated blocks use this index (no need to convert endianness). */ 117 #define VDI_UNALLOCATED UINT32_MAX 118 119 #if !defined(CONFIG_UUID) 120 void uuid_generate(uuid_t out) 121 { 122 memset(out, 0, sizeof(uuid_t)); 123 } 124 125 int uuid_is_null(const uuid_t uu) 126 { 127 uuid_t null_uuid = { 0 }; 128 return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0; 129 } 130 131 void uuid_unparse(const uuid_t uu, char *out) 132 { 133 snprintf(out, 37, UUID_FMT, 134 uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7], 135 uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]); 136 } 137 #endif 138 139 typedef struct { 140 BlockDriverAIOCB common; 141 int64_t sector_num; 142 QEMUIOVector *qiov; 143 uint8_t *buf; 144 /* Total number of sectors. */ 145 int nb_sectors; 146 /* Number of sectors for current AIO. */ 147 int n_sectors; 148 /* New allocated block map entry. */ 149 uint32_t bmap_first; 150 uint32_t bmap_last; 151 /* Buffer for new allocated block. */ 152 void *block_buffer; 153 void *orig_buf; 154 int header_modified; 155 BlockDriverAIOCB *hd_aiocb; 156 struct iovec hd_iov; 157 QEMUIOVector hd_qiov; 158 QEMUBH *bh; 159 } VdiAIOCB; 160 161 typedef struct { 162 char text[0x40]; 163 uint32_t signature; 164 uint32_t version; 165 uint32_t header_size; 166 uint32_t image_type; 167 uint32_t image_flags; 168 char description[256]; 169 uint32_t offset_bmap; 170 uint32_t offset_data; 171 uint32_t cylinders; /* disk geometry, unused here */ 172 uint32_t heads; /* disk geometry, unused here */ 173 uint32_t sectors; /* disk geometry, unused here */ 174 uint32_t sector_size; 175 uint32_t unused1; 176 uint64_t disk_size; 177 uint32_t block_size; 178 uint32_t block_extra; /* unused here */ 179 uint32_t blocks_in_image; 180 uint32_t blocks_allocated; 181 uuid_t uuid_image; 182 uuid_t uuid_last_snap; 183 uuid_t uuid_link; 184 uuid_t uuid_parent; 185 uint64_t unused2[7]; 186 } VdiHeader; 187 188 typedef struct { 189 /* The block map entries are little endian (even in memory). */ 190 uint32_t *bmap; 191 /* Size of block (bytes). */ 192 uint32_t block_size; 193 /* Size of block (sectors). */ 194 uint32_t block_sectors; 195 /* First sector of block map. */ 196 uint32_t bmap_sector; 197 /* VDI header (converted to host endianness). */ 198 VdiHeader header; 199 } BDRVVdiState; 200 201 /* Change UUID from little endian (IPRT = VirtualBox format) to big endian 202 * format (network byte order, standard, see RFC 4122) and vice versa. 203 */ 204 static void uuid_convert(uuid_t uuid) 205 { 206 bswap32s((uint32_t *)&uuid[0]); 207 bswap16s((uint16_t *)&uuid[4]); 208 bswap16s((uint16_t *)&uuid[6]); 209 } 210 211 static void vdi_header_to_cpu(VdiHeader *header) 212 { 213 le32_to_cpus(&header->signature); 214 le32_to_cpus(&header->version); 215 le32_to_cpus(&header->header_size); 216 le32_to_cpus(&header->image_type); 217 le32_to_cpus(&header->image_flags); 218 le32_to_cpus(&header->offset_bmap); 219 le32_to_cpus(&header->offset_data); 220 le32_to_cpus(&header->cylinders); 221 le32_to_cpus(&header->heads); 222 le32_to_cpus(&header->sectors); 223 le32_to_cpus(&header->sector_size); 224 le64_to_cpus(&header->disk_size); 225 le32_to_cpus(&header->block_size); 226 le32_to_cpus(&header->block_extra); 227 le32_to_cpus(&header->blocks_in_image); 228 le32_to_cpus(&header->blocks_allocated); 229 uuid_convert(header->uuid_image); 230 uuid_convert(header->uuid_last_snap); 231 uuid_convert(header->uuid_link); 232 uuid_convert(header->uuid_parent); 233 } 234 235 static void vdi_header_to_le(VdiHeader *header) 236 { 237 cpu_to_le32s(&header->signature); 238 cpu_to_le32s(&header->version); 239 cpu_to_le32s(&header->header_size); 240 cpu_to_le32s(&header->image_type); 241 cpu_to_le32s(&header->image_flags); 242 cpu_to_le32s(&header->offset_bmap); 243 cpu_to_le32s(&header->offset_data); 244 cpu_to_le32s(&header->cylinders); 245 cpu_to_le32s(&header->heads); 246 cpu_to_le32s(&header->sectors); 247 cpu_to_le32s(&header->sector_size); 248 cpu_to_le64s(&header->disk_size); 249 cpu_to_le32s(&header->block_size); 250 cpu_to_le32s(&header->block_extra); 251 cpu_to_le32s(&header->blocks_in_image); 252 cpu_to_le32s(&header->blocks_allocated); 253 cpu_to_le32s(&header->blocks_allocated); 254 uuid_convert(header->uuid_image); 255 uuid_convert(header->uuid_last_snap); 256 uuid_convert(header->uuid_link); 257 uuid_convert(header->uuid_parent); 258 } 259 260 #if defined(CONFIG_VDI_DEBUG) 261 static void vdi_header_print(VdiHeader *header) 262 { 263 char uuid[37]; 264 logout("text %s", header->text); 265 logout("signature 0x%04x\n", header->signature); 266 logout("header size 0x%04x\n", header->header_size); 267 logout("image type 0x%04x\n", header->image_type); 268 logout("image flags 0x%04x\n", header->image_flags); 269 logout("description %s\n", header->description); 270 logout("offset bmap 0x%04x\n", header->offset_bmap); 271 logout("offset data 0x%04x\n", header->offset_data); 272 logout("cylinders 0x%04x\n", header->cylinders); 273 logout("heads 0x%04x\n", header->heads); 274 logout("sectors 0x%04x\n", header->sectors); 275 logout("sector size 0x%04x\n", header->sector_size); 276 logout("image size 0x%" PRIx64 " B (%" PRIu64 " MiB)\n", 277 header->disk_size, header->disk_size / MiB); 278 logout("block size 0x%04x\n", header->block_size); 279 logout("block extra 0x%04x\n", header->block_extra); 280 logout("blocks tot. 0x%04x\n", header->blocks_in_image); 281 logout("blocks all. 0x%04x\n", header->blocks_allocated); 282 uuid_unparse(header->uuid_image, uuid); 283 logout("uuid image %s\n", uuid); 284 uuid_unparse(header->uuid_last_snap, uuid); 285 logout("uuid snap %s\n", uuid); 286 uuid_unparse(header->uuid_link, uuid); 287 logout("uuid link %s\n", uuid); 288 uuid_unparse(header->uuid_parent, uuid); 289 logout("uuid parent %s\n", uuid); 290 } 291 #endif 292 293 static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res) 294 { 295 /* TODO: additional checks possible. */ 296 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 297 uint32_t blocks_allocated = 0; 298 uint32_t block; 299 uint32_t *bmap; 300 logout("\n"); 301 302 bmap = qemu_malloc(s->header.blocks_in_image * sizeof(uint32_t)); 303 memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t)); 304 305 /* Check block map and value of blocks_allocated. */ 306 for (block = 0; block < s->header.blocks_in_image; block++) { 307 uint32_t bmap_entry = le32_to_cpu(s->bmap[block]); 308 if (bmap_entry != VDI_UNALLOCATED) { 309 if (bmap_entry < s->header.blocks_in_image) { 310 blocks_allocated++; 311 if (bmap[bmap_entry] == VDI_UNALLOCATED) { 312 bmap[bmap_entry] = bmap_entry; 313 } else { 314 fprintf(stderr, "ERROR: block index %" PRIu32 315 " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry); 316 res->corruptions++; 317 } 318 } else { 319 fprintf(stderr, "ERROR: block index %" PRIu32 320 " too large, is %" PRIu32 "\n", block, bmap_entry); 321 res->corruptions++; 322 } 323 } 324 } 325 if (blocks_allocated != s->header.blocks_allocated) { 326 fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32 327 ", should be %" PRIu32 "\n", 328 blocks_allocated, s->header.blocks_allocated); 329 res->corruptions++; 330 } 331 332 qemu_free(bmap); 333 334 return 0; 335 } 336 337 static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 338 { 339 /* TODO: vdi_get_info would be needed for machine snapshots. 340 vm_state_offset is still missing. */ 341 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 342 logout("\n"); 343 bdi->cluster_size = s->block_size; 344 bdi->vm_state_offset = 0; 345 return 0; 346 } 347 348 static int vdi_make_empty(BlockDriverState *bs) 349 { 350 /* TODO: missing code. */ 351 logout("\n"); 352 /* The return value for missing code must be 0, see block.c. */ 353 return 0; 354 } 355 356 static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename) 357 { 358 const VdiHeader *header = (const VdiHeader *)buf; 359 int result = 0; 360 361 logout("\n"); 362 363 if (buf_size < sizeof(*header)) { 364 /* Header too small, no VDI. */ 365 } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) { 366 result = 100; 367 } 368 369 if (result == 0) { 370 logout("no vdi image\n"); 371 } else { 372 logout("%s", header->text); 373 } 374 375 return result; 376 } 377 378 static int vdi_open(BlockDriverState *bs, int flags) 379 { 380 BDRVVdiState *s = bs->opaque; 381 VdiHeader header; 382 size_t bmap_size; 383 384 logout("\n"); 385 386 if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) { 387 goto fail; 388 } 389 390 vdi_header_to_cpu(&header); 391 #if defined(CONFIG_VDI_DEBUG) 392 vdi_header_print(&header); 393 #endif 394 395 if (header.disk_size % SECTOR_SIZE != 0) { 396 /* 'VBoxManage convertfromraw' can create images with odd disk sizes. 397 We accept them but round the disk size to the next multiple of 398 SECTOR_SIZE. */ 399 logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size); 400 header.disk_size += SECTOR_SIZE - 1; 401 header.disk_size &= ~(SECTOR_SIZE - 1); 402 } 403 404 if (header.version != VDI_VERSION_1_1) { 405 logout("unsupported version %u.%u\n", 406 header.version >> 16, header.version & 0xffff); 407 goto fail; 408 } else if (header.offset_bmap % SECTOR_SIZE != 0) { 409 /* We only support block maps which start on a sector boundary. */ 410 logout("unsupported block map offset 0x%x B\n", header.offset_bmap); 411 goto fail; 412 } else if (header.offset_data % SECTOR_SIZE != 0) { 413 /* We only support data blocks which start on a sector boundary. */ 414 logout("unsupported data offset 0x%x B\n", header.offset_data); 415 goto fail; 416 } else if (header.sector_size != SECTOR_SIZE) { 417 logout("unsupported sector size %u B\n", header.sector_size); 418 goto fail; 419 } else if (header.block_size != 1 * MiB) { 420 logout("unsupported block size %u B\n", header.block_size); 421 goto fail; 422 } else if (header.disk_size > 423 (uint64_t)header.blocks_in_image * header.block_size) { 424 logout("unsupported disk size %" PRIu64 " B\n", header.disk_size); 425 goto fail; 426 } else if (!uuid_is_null(header.uuid_link)) { 427 logout("link uuid != 0, unsupported\n"); 428 goto fail; 429 } else if (!uuid_is_null(header.uuid_parent)) { 430 logout("parent uuid != 0, unsupported\n"); 431 goto fail; 432 } 433 434 bs->total_sectors = header.disk_size / SECTOR_SIZE; 435 436 s->block_size = header.block_size; 437 s->block_sectors = header.block_size / SECTOR_SIZE; 438 s->bmap_sector = header.offset_bmap / SECTOR_SIZE; 439 s->header = header; 440 441 bmap_size = header.blocks_in_image * sizeof(uint32_t); 442 bmap_size = (bmap_size + SECTOR_SIZE - 1) / SECTOR_SIZE; 443 if (bmap_size > 0) { 444 s->bmap = qemu_malloc(bmap_size * SECTOR_SIZE); 445 } 446 if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) { 447 goto fail_free_bmap; 448 } 449 450 return 0; 451 452 fail_free_bmap: 453 qemu_free(s->bmap); 454 455 fail: 456 return -1; 457 } 458 459 static int vdi_is_allocated(BlockDriverState *bs, int64_t sector_num, 460 int nb_sectors, int *pnum) 461 { 462 /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ 463 BDRVVdiState *s = (BDRVVdiState *)bs->opaque; 464 size_t bmap_index = sector_num / s->block_sectors; 465 size_t sector_in_block = sector_num % s->block_sectors; 466 int n_sectors = s->block_sectors - sector_in_block; 467 uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); 468 logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum); 469 if (n_sectors > nb_sectors) { 470 n_sectors = nb_sectors; 471 } 472 *pnum = n_sectors; 473 return bmap_entry != VDI_UNALLOCATED; 474 } 475 476 static void vdi_aio_cancel(BlockDriverAIOCB *blockacb) 477 { 478 /* TODO: This code is untested. How can I get it executed? */ 479 VdiAIOCB *acb = container_of(blockacb, VdiAIOCB, common); 480 logout("\n"); 481 if (acb->hd_aiocb) { 482 bdrv_aio_cancel(acb->hd_aiocb); 483 } 484 qemu_aio_release(acb); 485 } 486 487 static AIOPool vdi_aio_pool = { 488 .aiocb_size = sizeof(VdiAIOCB), 489 .cancel = vdi_aio_cancel, 490 }; 491 492 static VdiAIOCB *vdi_aio_setup(BlockDriverState *bs, int64_t sector_num, 493 QEMUIOVector *qiov, int nb_sectors, 494 BlockDriverCompletionFunc *cb, void *opaque, int is_write) 495 { 496 VdiAIOCB *acb; 497 498 logout("%p, %" PRId64 ", %p, %d, %p, %p, %d\n", 499 bs, sector_num, qiov, nb_sectors, cb, opaque, is_write); 500 501 acb = qemu_aio_get(&vdi_aio_pool, bs, cb, opaque); 502 if (acb) { 503 acb->hd_aiocb = NULL; 504 acb->sector_num = sector_num; 505 acb->qiov = qiov; 506 if (qiov->niov > 1) { 507 acb->buf = qemu_blockalign(bs, qiov->size); 508 acb->orig_buf = acb->buf; 509 if (is_write) { 510 qemu_iovec_to_buffer(qiov, acb->buf); 511 } 512 } else { 513 acb->buf = (uint8_t *)qiov->iov->iov_base; 514 } 515 acb->nb_sectors = nb_sectors; 516 acb->n_sectors = 0; 517 acb->bmap_first = VDI_UNALLOCATED; 518 acb->bmap_last = VDI_UNALLOCATED; 519 acb->block_buffer = NULL; 520 acb->header_modified = 0; 521 } 522 return acb; 523 } 524 525 static int vdi_schedule_bh(QEMUBHFunc *cb, VdiAIOCB *acb) 526 { 527 logout("\n"); 528 529 if (acb->bh) { 530 return -EIO; 531 } 532 533 acb->bh = qemu_bh_new(cb, acb); 534 if (!acb->bh) { 535 return -EIO; 536 } 537 538 qemu_bh_schedule(acb->bh); 539 540 return 0; 541 } 542 543 static void vdi_aio_read_cb(void *opaque, int ret); 544 545 static void vdi_aio_read_bh(void *opaque) 546 { 547 VdiAIOCB *acb = opaque; 548 logout("\n"); 549 qemu_bh_delete(acb->bh); 550 acb->bh = NULL; 551 vdi_aio_read_cb(opaque, 0); 552 } 553 554 static void vdi_aio_read_cb(void *opaque, int ret) 555 { 556 VdiAIOCB *acb = opaque; 557 BlockDriverState *bs = acb->common.bs; 558 BDRVVdiState *s = bs->opaque; 559 uint32_t bmap_entry; 560 uint32_t block_index; 561 uint32_t sector_in_block; 562 uint32_t n_sectors; 563 564 logout("%u sectors read\n", acb->n_sectors); 565 566 acb->hd_aiocb = NULL; 567 568 if (ret < 0) { 569 goto done; 570 } 571 572 acb->nb_sectors -= acb->n_sectors; 573 574 if (acb->nb_sectors == 0) { 575 /* request completed */ 576 ret = 0; 577 goto done; 578 } 579 580 acb->sector_num += acb->n_sectors; 581 acb->buf += acb->n_sectors * SECTOR_SIZE; 582 583 block_index = acb->sector_num / s->block_sectors; 584 sector_in_block = acb->sector_num % s->block_sectors; 585 n_sectors = s->block_sectors - sector_in_block; 586 if (n_sectors > acb->nb_sectors) { 587 n_sectors = acb->nb_sectors; 588 } 589 590 logout("will read %u sectors starting at sector %" PRIu64 "\n", 591 n_sectors, acb->sector_num); 592 593 /* prepare next AIO request */ 594 acb->n_sectors = n_sectors; 595 bmap_entry = le32_to_cpu(s->bmap[block_index]); 596 if (bmap_entry == VDI_UNALLOCATED) { 597 /* Block not allocated, return zeros, no need to wait. */ 598 memset(acb->buf, 0, n_sectors * SECTOR_SIZE); 599 ret = vdi_schedule_bh(vdi_aio_read_bh, acb); 600 if (ret < 0) { 601 goto done; 602 } 603 } else { 604 uint64_t offset = s->header.offset_data / SECTOR_SIZE + 605 (uint64_t)bmap_entry * s->block_sectors + 606 sector_in_block; 607 acb->hd_iov.iov_base = (void *)acb->buf; 608 acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; 609 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 610 acb->hd_aiocb = bdrv_aio_readv(bs->file, offset, &acb->hd_qiov, 611 n_sectors, vdi_aio_read_cb, acb); 612 if (acb->hd_aiocb == NULL) { 613 ret = -EIO; 614 goto done; 615 } 616 } 617 return; 618 done: 619 if (acb->qiov->niov > 1) { 620 qemu_iovec_from_buffer(acb->qiov, acb->orig_buf, acb->qiov->size); 621 qemu_vfree(acb->orig_buf); 622 } 623 acb->common.cb(acb->common.opaque, ret); 624 qemu_aio_release(acb); 625 } 626 627 static BlockDriverAIOCB *vdi_aio_readv(BlockDriverState *bs, 628 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 629 BlockDriverCompletionFunc *cb, void *opaque) 630 { 631 VdiAIOCB *acb; 632 logout("\n"); 633 acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); 634 if (!acb) { 635 return NULL; 636 } 637 vdi_aio_read_cb(acb, 0); 638 return &acb->common; 639 } 640 641 static void vdi_aio_write_cb(void *opaque, int ret) 642 { 643 VdiAIOCB *acb = opaque; 644 BlockDriverState *bs = acb->common.bs; 645 BDRVVdiState *s = bs->opaque; 646 uint32_t bmap_entry; 647 uint32_t block_index; 648 uint32_t sector_in_block; 649 uint32_t n_sectors; 650 651 acb->hd_aiocb = NULL; 652 653 if (ret < 0) { 654 goto done; 655 } 656 657 acb->nb_sectors -= acb->n_sectors; 658 acb->sector_num += acb->n_sectors; 659 acb->buf += acb->n_sectors * SECTOR_SIZE; 660 661 if (acb->nb_sectors == 0) { 662 logout("finished data write\n"); 663 acb->n_sectors = 0; 664 if (acb->header_modified) { 665 VdiHeader *header = acb->block_buffer; 666 logout("now writing modified header\n"); 667 assert(acb->bmap_first != VDI_UNALLOCATED); 668 *header = s->header; 669 vdi_header_to_le(header); 670 acb->header_modified = 0; 671 acb->hd_iov.iov_base = acb->block_buffer; 672 acb->hd_iov.iov_len = SECTOR_SIZE; 673 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 674 acb->hd_aiocb = bdrv_aio_writev(bs->file, 0, &acb->hd_qiov, 1, 675 vdi_aio_write_cb, acb); 676 if (acb->hd_aiocb == NULL) { 677 ret = -EIO; 678 goto done; 679 } 680 return; 681 } else if (acb->bmap_first != VDI_UNALLOCATED) { 682 /* One or more new blocks were allocated. */ 683 uint64_t offset; 684 uint32_t bmap_first; 685 uint32_t bmap_last; 686 qemu_free(acb->block_buffer); 687 acb->block_buffer = NULL; 688 bmap_first = acb->bmap_first; 689 bmap_last = acb->bmap_last; 690 logout("now writing modified block map entry %u...%u\n", 691 bmap_first, bmap_last); 692 /* Write modified sectors from block map. */ 693 bmap_first /= (SECTOR_SIZE / sizeof(uint32_t)); 694 bmap_last /= (SECTOR_SIZE / sizeof(uint32_t)); 695 n_sectors = bmap_last - bmap_first + 1; 696 offset = s->bmap_sector + bmap_first; 697 acb->bmap_first = VDI_UNALLOCATED; 698 acb->hd_iov.iov_base = (void *)((uint8_t *)&s->bmap[0] + 699 bmap_first * SECTOR_SIZE); 700 acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; 701 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 702 logout("will write %u block map sectors starting from entry %u\n", 703 n_sectors, bmap_first); 704 acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov, 705 n_sectors, vdi_aio_write_cb, acb); 706 if (acb->hd_aiocb == NULL) { 707 ret = -EIO; 708 goto done; 709 } 710 return; 711 } 712 ret = 0; 713 goto done; 714 } 715 716 logout("%u sectors written\n", acb->n_sectors); 717 718 block_index = acb->sector_num / s->block_sectors; 719 sector_in_block = acb->sector_num % s->block_sectors; 720 n_sectors = s->block_sectors - sector_in_block; 721 if (n_sectors > acb->nb_sectors) { 722 n_sectors = acb->nb_sectors; 723 } 724 725 logout("will write %u sectors starting at sector %" PRIu64 "\n", 726 n_sectors, acb->sector_num); 727 728 /* prepare next AIO request */ 729 acb->n_sectors = n_sectors; 730 bmap_entry = le32_to_cpu(s->bmap[block_index]); 731 if (bmap_entry == VDI_UNALLOCATED) { 732 /* Allocate new block and write to it. */ 733 uint64_t offset; 734 uint8_t *block; 735 bmap_entry = s->header.blocks_allocated; 736 s->bmap[block_index] = cpu_to_le32(bmap_entry); 737 s->header.blocks_allocated++; 738 offset = s->header.offset_data / SECTOR_SIZE + 739 (uint64_t)bmap_entry * s->block_sectors; 740 block = acb->block_buffer; 741 if (block == NULL) { 742 block = qemu_mallocz(s->block_size); 743 acb->block_buffer = block; 744 acb->bmap_first = block_index; 745 assert(!acb->header_modified); 746 acb->header_modified = 1; 747 } 748 acb->bmap_last = block_index; 749 memcpy(block + sector_in_block * SECTOR_SIZE, 750 acb->buf, n_sectors * SECTOR_SIZE); 751 acb->hd_iov.iov_base = (void *)block; 752 acb->hd_iov.iov_len = s->block_size; 753 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 754 acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, 755 &acb->hd_qiov, s->block_sectors, 756 vdi_aio_write_cb, acb); 757 if (acb->hd_aiocb == NULL) { 758 ret = -EIO; 759 goto done; 760 } 761 } else { 762 uint64_t offset = s->header.offset_data / SECTOR_SIZE + 763 (uint64_t)bmap_entry * s->block_sectors + 764 sector_in_block; 765 acb->hd_iov.iov_base = (void *)acb->buf; 766 acb->hd_iov.iov_len = n_sectors * SECTOR_SIZE; 767 qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1); 768 acb->hd_aiocb = bdrv_aio_writev(bs->file, offset, &acb->hd_qiov, 769 n_sectors, vdi_aio_write_cb, acb); 770 if (acb->hd_aiocb == NULL) { 771 ret = -EIO; 772 goto done; 773 } 774 } 775 776 return; 777 778 done: 779 if (acb->qiov->niov > 1) { 780 qemu_vfree(acb->orig_buf); 781 } 782 acb->common.cb(acb->common.opaque, ret); 783 qemu_aio_release(acb); 784 } 785 786 static BlockDriverAIOCB *vdi_aio_writev(BlockDriverState *bs, 787 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 788 BlockDriverCompletionFunc *cb, void *opaque) 789 { 790 VdiAIOCB *acb; 791 logout("\n"); 792 acb = vdi_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); 793 if (!acb) { 794 return NULL; 795 } 796 vdi_aio_write_cb(acb, 0); 797 return &acb->common; 798 } 799 800 static int vdi_create(const char *filename, QEMUOptionParameter *options) 801 { 802 int fd; 803 int result = 0; 804 uint64_t bytes = 0; 805 uint32_t blocks; 806 size_t block_size = 1 * MiB; 807 uint32_t image_type = VDI_TYPE_DYNAMIC; 808 VdiHeader header; 809 size_t i; 810 size_t bmap_size; 811 uint32_t *bmap; 812 813 logout("\n"); 814 815 /* Read out options. */ 816 while (options && options->name) { 817 if (!strcmp(options->name, BLOCK_OPT_SIZE)) { 818 bytes = options->value.n; 819 #if defined(CONFIG_VDI_BLOCK_SIZE) 820 } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) { 821 if (options->value.n) { 822 /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */ 823 block_size = options->value.n; 824 } 825 #endif 826 #if defined(CONFIG_VDI_STATIC_IMAGE) 827 } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) { 828 if (options->value.n) { 829 image_type = VDI_TYPE_STATIC; 830 } 831 #endif 832 } 833 options++; 834 } 835 836 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 837 0644); 838 if (fd < 0) { 839 return -errno; 840 } 841 842 /* We need enough blocks to store the given disk size, 843 so always round up. */ 844 blocks = (bytes + block_size - 1) / block_size; 845 846 bmap_size = blocks * sizeof(uint32_t); 847 bmap_size = ((bmap_size + SECTOR_SIZE - 1) & ~(SECTOR_SIZE -1)); 848 849 memset(&header, 0, sizeof(header)); 850 pstrcpy(header.text, sizeof(header.text), VDI_TEXT); 851 header.signature = VDI_SIGNATURE; 852 header.version = VDI_VERSION_1_1; 853 header.header_size = 0x180; 854 header.image_type = image_type; 855 header.offset_bmap = 0x200; 856 header.offset_data = 0x200 + bmap_size; 857 header.sector_size = SECTOR_SIZE; 858 header.disk_size = bytes; 859 header.block_size = block_size; 860 header.blocks_in_image = blocks; 861 if (image_type == VDI_TYPE_STATIC) { 862 header.blocks_allocated = blocks; 863 } 864 uuid_generate(header.uuid_image); 865 uuid_generate(header.uuid_last_snap); 866 /* There is no need to set header.uuid_link or header.uuid_parent here. */ 867 #if defined(CONFIG_VDI_DEBUG) 868 vdi_header_print(&header); 869 #endif 870 vdi_header_to_le(&header); 871 if (write(fd, &header, sizeof(header)) < 0) { 872 result = -errno; 873 } 874 875 bmap = NULL; 876 if (bmap_size > 0) { 877 bmap = (uint32_t *)qemu_mallocz(bmap_size); 878 } 879 for (i = 0; i < blocks; i++) { 880 if (image_type == VDI_TYPE_STATIC) { 881 bmap[i] = i; 882 } else { 883 bmap[i] = VDI_UNALLOCATED; 884 } 885 } 886 if (write(fd, bmap, bmap_size) < 0) { 887 result = -errno; 888 } 889 qemu_free(bmap); 890 if (image_type == VDI_TYPE_STATIC) { 891 if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) { 892 result = -errno; 893 } 894 } 895 896 if (close(fd) < 0) { 897 result = -errno; 898 } 899 900 return result; 901 } 902 903 static void vdi_close(BlockDriverState *bs) 904 { 905 } 906 907 static int vdi_flush(BlockDriverState *bs) 908 { 909 logout("\n"); 910 return bdrv_flush(bs->file); 911 } 912 913 914 static QEMUOptionParameter vdi_create_options[] = { 915 { 916 .name = BLOCK_OPT_SIZE, 917 .type = OPT_SIZE, 918 .help = "Virtual disk size" 919 }, 920 #if defined(CONFIG_VDI_BLOCK_SIZE) 921 { 922 .name = BLOCK_OPT_CLUSTER_SIZE, 923 .type = OPT_SIZE, 924 .help = "VDI cluster (block) size" 925 }, 926 #endif 927 #if defined(CONFIG_VDI_STATIC_IMAGE) 928 { 929 .name = BLOCK_OPT_STATIC, 930 .type = OPT_FLAG, 931 .help = "VDI static (pre-allocated) image" 932 }, 933 #endif 934 /* TODO: An additional option to set UUID values might be useful. */ 935 { NULL } 936 }; 937 938 static BlockDriver bdrv_vdi = { 939 .format_name = "vdi", 940 .instance_size = sizeof(BDRVVdiState), 941 .bdrv_probe = vdi_probe, 942 .bdrv_open = vdi_open, 943 .bdrv_close = vdi_close, 944 .bdrv_create = vdi_create, 945 .bdrv_flush = vdi_flush, 946 .bdrv_is_allocated = vdi_is_allocated, 947 .bdrv_make_empty = vdi_make_empty, 948 949 .bdrv_aio_readv = vdi_aio_readv, 950 #if defined(CONFIG_VDI_WRITE) 951 .bdrv_aio_writev = vdi_aio_writev, 952 #endif 953 954 .bdrv_get_info = vdi_get_info, 955 956 .create_options = vdi_create_options, 957 .bdrv_check = vdi_check, 958 }; 959 960 static void bdrv_vdi_init(void) 961 { 962 logout("\n"); 963 bdrv_register(&bdrv_vdi); 964 } 965 966 block_init(bdrv_vdi_init); 967