1 /* 2 * Block driver for Parallels disk image format 3 * 4 * Copyright (c) 2007 Alex Beregszaszi 5 * Copyright (c) 2015 Denis V. Lunev <den@openvz.org> 6 * 7 * This code was originally based on comparing different disk images created 8 * by Parallels. Currently it is based on opened OpenVZ sources 9 * available at 10 * http://git.openvz.org/?p=ploop;a=summary 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a copy 13 * of this software and associated documentation files (the "Software"), to deal 14 * in the Software without restriction, including without limitation the rights 15 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 16 * copies of the Software, and to permit persons to whom the Software is 17 * furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included in 20 * all copies or substantial portions of the Software. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 28 * THE SOFTWARE. 29 */ 30 31 #include "qemu/osdep.h" 32 #include "qemu/error-report.h" 33 #include "qapi/error.h" 34 #include "block/block_int.h" 35 #include "block/qdict.h" 36 #include "sysemu/block-backend.h" 37 #include "qemu/module.h" 38 #include "qemu/option.h" 39 #include "qapi/qmp/qdict.h" 40 #include "qapi/qobject-input-visitor.h" 41 #include "qapi/qapi-visit-block-core.h" 42 #include "qemu/bswap.h" 43 #include "qemu/bitmap.h" 44 #include "qemu/memalign.h" 45 #include "migration/blocker.h" 46 #include "parallels.h" 47 48 /**************************************************************/ 49 50 #define HEADER_MAGIC "WithoutFreeSpace" 51 #define HEADER_MAGIC2 "WithouFreSpacExt" 52 #define HEADER_VERSION 2 53 #define HEADER_INUSE_MAGIC (0x746F6E59) 54 #define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32) 55 56 static QEnumLookup prealloc_mode_lookup = { 57 .array = (const char *const[]) { 58 "falloc", 59 "truncate", 60 }, 61 .size = PRL_PREALLOC_MODE__MAX 62 }; 63 64 #define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode" 65 #define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size" 66 67 static QemuOptsList parallels_runtime_opts = { 68 .name = "parallels", 69 .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head), 70 .desc = { 71 { 72 .name = PARALLELS_OPT_PREALLOC_SIZE, 73 .type = QEMU_OPT_SIZE, 74 .help = "Preallocation size on image expansion", 75 .def_value_str = "128M", 76 }, 77 { 78 .name = PARALLELS_OPT_PREALLOC_MODE, 79 .type = QEMU_OPT_STRING, 80 .help = "Preallocation mode on image expansion " 81 "(allowed values: falloc, truncate)", 82 .def_value_str = "falloc", 83 }, 84 { /* end of list */ }, 85 }, 86 }; 87 88 static QemuOptsList parallels_create_opts = { 89 .name = "parallels-create-opts", 90 .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head), 91 .desc = { 92 { 93 .name = BLOCK_OPT_SIZE, 94 .type = QEMU_OPT_SIZE, 95 .help = "Virtual disk size", 96 }, 97 { 98 .name = BLOCK_OPT_CLUSTER_SIZE, 99 .type = QEMU_OPT_SIZE, 100 .help = "Parallels image cluster size", 101 .def_value_str = stringify(DEFAULT_CLUSTER_SIZE), 102 }, 103 { /* end of list */ } 104 } 105 }; 106 107 108 static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) 109 { 110 return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; 111 } 112 113 static uint32_t bat_entry_off(uint32_t idx) 114 { 115 return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx; 116 } 117 118 static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) 119 { 120 uint32_t index, offset; 121 122 index = sector_num / s->tracks; 123 offset = sector_num % s->tracks; 124 125 /* not allocated */ 126 if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) { 127 return -1; 128 } 129 return bat2sect(s, index) + offset; 130 } 131 132 static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, 133 int nb_sectors) 134 { 135 int ret = s->tracks - sector_num % s->tracks; 136 return MIN(nb_sectors, ret); 137 } 138 139 static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off) 140 { 141 off -= s->data_start << BDRV_SECTOR_BITS; 142 return off / s->cluster_size; 143 } 144 145 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, 146 int nb_sectors, int *pnum) 147 { 148 int64_t start_off = -2, prev_end_off = -2; 149 150 *pnum = 0; 151 while (nb_sectors > 0 || start_off == -2) { 152 int64_t offset = seek_to_sector(s, sector_num); 153 int to_end; 154 155 if (start_off == -2) { 156 start_off = offset; 157 prev_end_off = offset; 158 } else if (offset != prev_end_off) { 159 break; 160 } 161 162 to_end = cluster_remainder(s, sector_num, nb_sectors); 163 nb_sectors -= to_end; 164 sector_num += to_end; 165 *pnum += to_end; 166 167 if (offset > 0) { 168 prev_end_off += to_end; 169 } 170 } 171 return start_off; 172 } 173 174 static void parallels_set_bat_entry(BDRVParallelsState *s, 175 uint32_t index, uint32_t offset) 176 { 177 s->bat_bitmap[index] = cpu_to_le32(offset); 178 bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); 179 } 180 181 static int64_t coroutine_fn GRAPH_RDLOCK 182 allocate_clusters(BlockDriverState *bs, int64_t sector_num, 183 int nb_sectors, int *pnum) 184 { 185 int ret = 0; 186 BDRVParallelsState *s = bs->opaque; 187 int64_t pos, space, idx, to_allocate, i, len; 188 189 pos = block_status(s, sector_num, nb_sectors, pnum); 190 if (pos > 0) { 191 return pos; 192 } 193 194 idx = sector_num / s->tracks; 195 to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; 196 197 /* 198 * This function is called only by parallels_co_writev(), which will never 199 * pass a sector_num at or beyond the end of the image (because the block 200 * layer never passes such a sector_num to that function). Therefore, idx 201 * is always below s->bat_size. 202 * block_status() will limit *pnum so that sector_num + *pnum will not 203 * exceed the image end. Therefore, idx + to_allocate cannot exceed 204 * s->bat_size. 205 * Note that s->bat_size is an unsigned int, therefore idx + to_allocate 206 * will always fit into a uint32_t. 207 */ 208 assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); 209 210 space = to_allocate * s->tracks; 211 len = bdrv_co_getlength(bs->file->bs); 212 if (len < 0) { 213 return len; 214 } 215 if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { 216 space += s->prealloc_size; 217 /* 218 * We require the expanded size to read back as zero. If the 219 * user permitted truncation, we try that; but if it fails, we 220 * force the safer-but-slower fallocate. 221 */ 222 if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { 223 ret = bdrv_co_truncate(bs->file, 224 (s->data_end + space) << BDRV_SECTOR_BITS, 225 false, PREALLOC_MODE_OFF, 226 BDRV_REQ_ZERO_WRITE, NULL); 227 if (ret == -ENOTSUP) { 228 s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; 229 } 230 } 231 if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { 232 ret = bdrv_co_pwrite_zeroes(bs->file, 233 s->data_end << BDRV_SECTOR_BITS, 234 space << BDRV_SECTOR_BITS, 0); 235 } 236 if (ret < 0) { 237 return ret; 238 } 239 } 240 241 /* 242 * Try to read from backing to fill empty clusters 243 * FIXME: 1. previous write_zeroes may be redundant 244 * 2. most of data we read from backing will be rewritten by 245 * parallels_co_writev. On aligned-to-cluster write we do not need 246 * this read at all. 247 * 3. it would be good to combine write of data from backing and new 248 * data into one write call. 249 */ 250 if (bs->backing) { 251 int64_t nb_cow_sectors = to_allocate * s->tracks; 252 int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; 253 void *buf = qemu_blockalign(bs, nb_cow_bytes); 254 255 ret = bdrv_co_pread(bs->backing, idx * s->tracks * BDRV_SECTOR_SIZE, 256 nb_cow_bytes, buf, 0); 257 if (ret < 0) { 258 qemu_vfree(buf); 259 return ret; 260 } 261 262 ret = bdrv_co_pwrite(bs->file, s->data_end * BDRV_SECTOR_SIZE, 263 nb_cow_bytes, buf, 0); 264 qemu_vfree(buf); 265 if (ret < 0) { 266 return ret; 267 } 268 } 269 270 for (i = 0; i < to_allocate; i++) { 271 parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); 272 s->data_end += s->tracks; 273 } 274 275 return bat2sect(s, idx) + sector_num % s->tracks; 276 } 277 278 279 static int coroutine_fn GRAPH_RDLOCK 280 parallels_co_flush_to_os(BlockDriverState *bs) 281 { 282 BDRVParallelsState *s = bs->opaque; 283 unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block); 284 unsigned long bit; 285 286 qemu_co_mutex_lock(&s->lock); 287 288 bit = find_first_bit(s->bat_dirty_bmap, size); 289 while (bit < size) { 290 uint32_t off = bit * s->bat_dirty_block; 291 uint32_t to_write = s->bat_dirty_block; 292 int ret; 293 294 if (off + to_write > s->header_size) { 295 to_write = s->header_size - off; 296 } 297 ret = bdrv_co_pwrite(bs->file, off, to_write, 298 (uint8_t *)s->header + off, 0); 299 if (ret < 0) { 300 qemu_co_mutex_unlock(&s->lock); 301 return ret; 302 } 303 bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1); 304 } 305 bitmap_zero(s->bat_dirty_bmap, size); 306 307 qemu_co_mutex_unlock(&s->lock); 308 return 0; 309 } 310 311 312 static int coroutine_fn parallels_co_block_status(BlockDriverState *bs, 313 bool want_zero, 314 int64_t offset, 315 int64_t bytes, 316 int64_t *pnum, 317 int64_t *map, 318 BlockDriverState **file) 319 { 320 BDRVParallelsState *s = bs->opaque; 321 int count; 322 323 assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE)); 324 qemu_co_mutex_lock(&s->lock); 325 offset = block_status(s, offset >> BDRV_SECTOR_BITS, 326 bytes >> BDRV_SECTOR_BITS, &count); 327 qemu_co_mutex_unlock(&s->lock); 328 329 *pnum = count * BDRV_SECTOR_SIZE; 330 if (offset < 0) { 331 return 0; 332 } 333 334 *map = offset * BDRV_SECTOR_SIZE; 335 *file = bs->file->bs; 336 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; 337 } 338 339 static int coroutine_fn GRAPH_RDLOCK 340 parallels_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 341 QEMUIOVector *qiov, int flags) 342 { 343 BDRVParallelsState *s = bs->opaque; 344 uint64_t bytes_done = 0; 345 QEMUIOVector hd_qiov; 346 int ret = 0; 347 348 qemu_iovec_init(&hd_qiov, qiov->niov); 349 350 while (nb_sectors > 0) { 351 int64_t position; 352 int n, nbytes; 353 354 qemu_co_mutex_lock(&s->lock); 355 position = allocate_clusters(bs, sector_num, nb_sectors, &n); 356 qemu_co_mutex_unlock(&s->lock); 357 if (position < 0) { 358 ret = (int)position; 359 break; 360 } 361 362 nbytes = n << BDRV_SECTOR_BITS; 363 364 qemu_iovec_reset(&hd_qiov); 365 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); 366 367 ret = bdrv_co_pwritev(bs->file, position * BDRV_SECTOR_SIZE, nbytes, 368 &hd_qiov, 0); 369 if (ret < 0) { 370 break; 371 } 372 373 nb_sectors -= n; 374 sector_num += n; 375 bytes_done += nbytes; 376 } 377 378 qemu_iovec_destroy(&hd_qiov); 379 return ret; 380 } 381 382 static int coroutine_fn GRAPH_RDLOCK 383 parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, 384 QEMUIOVector *qiov) 385 { 386 BDRVParallelsState *s = bs->opaque; 387 uint64_t bytes_done = 0; 388 QEMUIOVector hd_qiov; 389 int ret = 0; 390 391 qemu_iovec_init(&hd_qiov, qiov->niov); 392 393 while (nb_sectors > 0) { 394 int64_t position; 395 int n, nbytes; 396 397 qemu_co_mutex_lock(&s->lock); 398 position = block_status(s, sector_num, nb_sectors, &n); 399 qemu_co_mutex_unlock(&s->lock); 400 401 nbytes = n << BDRV_SECTOR_BITS; 402 403 qemu_iovec_reset(&hd_qiov); 404 qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); 405 406 if (position < 0) { 407 if (bs->backing) { 408 ret = bdrv_co_preadv(bs->backing, sector_num * BDRV_SECTOR_SIZE, 409 nbytes, &hd_qiov, 0); 410 if (ret < 0) { 411 break; 412 } 413 } else { 414 qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); 415 } 416 } else { 417 ret = bdrv_co_preadv(bs->file, position * BDRV_SECTOR_SIZE, nbytes, 418 &hd_qiov, 0); 419 if (ret < 0) { 420 break; 421 } 422 } 423 424 nb_sectors -= n; 425 sector_num += n; 426 bytes_done += nbytes; 427 } 428 429 qemu_iovec_destroy(&hd_qiov); 430 return ret; 431 } 432 433 static void parallels_check_unclean(BlockDriverState *bs, 434 BdrvCheckResult *res, 435 BdrvCheckMode fix) 436 { 437 BDRVParallelsState *s = bs->opaque; 438 439 if (!s->header_unclean) { 440 return; 441 } 442 443 fprintf(stderr, "%s image was not closed correctly\n", 444 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); 445 res->corruptions++; 446 if (fix & BDRV_FIX_ERRORS) { 447 /* parallels_close will do the job right */ 448 res->corruptions_fixed++; 449 s->header_unclean = false; 450 } 451 } 452 453 /* 454 * Returns true if data_off is correct, otherwise false. In both cases 455 * correct_offset is set to the proper value. 456 */ 457 static bool parallels_test_data_off(BDRVParallelsState *s, 458 int64_t file_nb_sectors, 459 uint32_t *correct_offset) 460 { 461 uint32_t data_off, min_off; 462 bool old_magic; 463 464 /* 465 * There are two slightly different image formats: with "WithoutFreeSpace" 466 * or "WithouFreSpacExt" magic words. Call the first one as "old magic". 467 * In such images data_off field can be zero. In this case the offset is 468 * calculated as the end of BAT table plus some padding to ensure sector 469 * size alignment. 470 */ 471 old_magic = !memcmp(s->header->magic, HEADER_MAGIC, 16); 472 473 min_off = DIV_ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); 474 if (!old_magic) { 475 min_off = ROUND_UP(min_off, s->cluster_size / BDRV_SECTOR_SIZE); 476 } 477 478 if (correct_offset) { 479 *correct_offset = min_off; 480 } 481 482 data_off = le32_to_cpu(s->header->data_off); 483 if (data_off == 0 && old_magic) { 484 return true; 485 } 486 487 if (data_off < min_off || data_off > file_nb_sectors) { 488 return false; 489 } 490 491 if (correct_offset) { 492 *correct_offset = data_off; 493 } 494 495 return true; 496 } 497 498 static int coroutine_fn GRAPH_RDLOCK 499 parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, 500 BdrvCheckMode fix) 501 { 502 BDRVParallelsState *s = bs->opaque; 503 int64_t file_size; 504 uint32_t data_off; 505 506 file_size = bdrv_co_nb_sectors(bs->file->bs); 507 if (file_size < 0) { 508 res->check_errors++; 509 return file_size; 510 } 511 512 if (parallels_test_data_off(s, file_size, &data_off)) { 513 return 0; 514 } 515 516 res->corruptions++; 517 if (fix & BDRV_FIX_ERRORS) { 518 s->header->data_off = cpu_to_le32(data_off); 519 res->corruptions_fixed++; 520 } 521 522 fprintf(stderr, "%s data_off field has incorrect value\n", 523 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); 524 525 return 0; 526 } 527 528 static int coroutine_fn GRAPH_RDLOCK 529 parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res, 530 BdrvCheckMode fix) 531 { 532 BDRVParallelsState *s = bs->opaque; 533 uint32_t i; 534 int64_t off, high_off, size; 535 536 size = bdrv_co_getlength(bs->file->bs); 537 if (size < 0) { 538 res->check_errors++; 539 return size; 540 } 541 542 high_off = 0; 543 for (i = 0; i < s->bat_size; i++) { 544 off = bat2sect(s, i) << BDRV_SECTOR_BITS; 545 if (off + s->cluster_size > size) { 546 fprintf(stderr, "%s cluster %u is outside image\n", 547 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); 548 res->corruptions++; 549 if (fix & BDRV_FIX_ERRORS) { 550 parallels_set_bat_entry(s, i, 0); 551 res->corruptions_fixed++; 552 } 553 continue; 554 } 555 if (high_off < off) { 556 high_off = off; 557 } 558 } 559 560 if (high_off == 0) { 561 res->image_end_offset = s->data_end << BDRV_SECTOR_BITS; 562 } else { 563 res->image_end_offset = high_off + s->cluster_size; 564 s->data_end = res->image_end_offset >> BDRV_SECTOR_BITS; 565 } 566 567 return 0; 568 } 569 570 static int coroutine_fn GRAPH_RDLOCK 571 parallels_check_leak(BlockDriverState *bs, BdrvCheckResult *res, 572 BdrvCheckMode fix, bool explicit) 573 { 574 BDRVParallelsState *s = bs->opaque; 575 int64_t size; 576 int ret; 577 578 size = bdrv_co_getlength(bs->file->bs); 579 if (size < 0) { 580 res->check_errors++; 581 return size; 582 } 583 584 if (size > res->image_end_offset) { 585 int64_t count; 586 count = DIV_ROUND_UP(size - res->image_end_offset, s->cluster_size); 587 if (explicit) { 588 fprintf(stderr, 589 "%s space leaked at the end of the image %" PRId64 "\n", 590 fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", 591 size - res->image_end_offset); 592 res->leaks += count; 593 } 594 if (fix & BDRV_FIX_LEAKS) { 595 Error *local_err = NULL; 596 597 /* 598 * In order to really repair the image, we must shrink it. 599 * That means we have to pass exact=true. 600 */ 601 ret = bdrv_co_truncate(bs->file, res->image_end_offset, true, 602 PREALLOC_MODE_OFF, 0, &local_err); 603 if (ret < 0) { 604 error_report_err(local_err); 605 res->check_errors++; 606 return ret; 607 } 608 if (explicit) { 609 res->leaks_fixed += count; 610 } 611 } 612 } 613 614 return 0; 615 } 616 617 static int coroutine_fn GRAPH_RDLOCK 618 parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, 619 BdrvCheckMode fix) 620 { 621 BDRVParallelsState *s = bs->opaque; 622 int64_t host_off, host_sector, guest_sector; 623 unsigned long *bitmap; 624 uint32_t i, bitmap_size, cluster_index, bat_entry; 625 int n, ret = 0; 626 uint64_t *buf = NULL; 627 bool fixed = false; 628 629 /* 630 * Create a bitmap of used clusters. 631 * If a bit is set, there is a BAT entry pointing to this cluster. 632 * Loop through the BAT entries, check bits relevant to an entry offset. 633 * If bit is set, this entry is duplicated. Otherwise set the bit. 634 * 635 * We shouldn't worry about newly allocated clusters outside the image 636 * because they are created higher then any existing cluster pointed by 637 * a BAT entry. 638 */ 639 bitmap_size = host_cluster_index(s, res->image_end_offset); 640 if (bitmap_size == 0) { 641 return 0; 642 } 643 if (res->image_end_offset % s->cluster_size) { 644 /* A not aligned image end leads to a bitmap shorter by 1 */ 645 bitmap_size++; 646 } 647 648 bitmap = bitmap_new(bitmap_size); 649 650 buf = qemu_blockalign(bs, s->cluster_size); 651 652 for (i = 0; i < s->bat_size; i++) { 653 host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; 654 if (host_off == 0) { 655 continue; 656 } 657 658 cluster_index = host_cluster_index(s, host_off); 659 assert(cluster_index < bitmap_size); 660 if (!test_bit(cluster_index, bitmap)) { 661 bitmap_set(bitmap, cluster_index, 1); 662 continue; 663 } 664 665 /* this cluster duplicates another one */ 666 fprintf(stderr, "%s duplicate offset in BAT entry %u\n", 667 fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); 668 669 res->corruptions++; 670 671 if (!(fix & BDRV_FIX_ERRORS)) { 672 continue; 673 } 674 675 /* 676 * Reset the entry and allocate a new cluster 677 * for the relevant guest offset. In this way we let 678 * the lower layer to place the new cluster properly. 679 * Copy the original cluster to the allocated one. 680 * But before save the old offset value for repairing 681 * if we have an error. 682 */ 683 bat_entry = s->bat_bitmap[i]; 684 parallels_set_bat_entry(s, i, 0); 685 686 ret = bdrv_co_pread(bs->file, host_off, s->cluster_size, buf, 0); 687 if (ret < 0) { 688 res->check_errors++; 689 goto out_repair_bat; 690 } 691 692 guest_sector = (i * (int64_t)s->cluster_size) >> BDRV_SECTOR_BITS; 693 host_sector = allocate_clusters(bs, guest_sector, s->tracks, &n); 694 if (host_sector < 0) { 695 res->check_errors++; 696 goto out_repair_bat; 697 } 698 host_off = host_sector << BDRV_SECTOR_BITS; 699 700 ret = bdrv_co_pwrite(bs->file, host_off, s->cluster_size, buf, 0); 701 if (ret < 0) { 702 res->check_errors++; 703 goto out_repair_bat; 704 } 705 706 if (host_off + s->cluster_size > res->image_end_offset) { 707 res->image_end_offset = host_off + s->cluster_size; 708 } 709 710 /* 711 * In the future allocate_cluster() will reuse holed offsets 712 * inside the image. Keep the used clusters bitmap content 713 * consistent for the new allocated clusters too. 714 * 715 * Note, clusters allocated outside the current image are not 716 * considered, and the bitmap size doesn't change. 717 */ 718 cluster_index = host_cluster_index(s, host_off); 719 if (cluster_index < bitmap_size) { 720 bitmap_set(bitmap, cluster_index, 1); 721 } 722 723 fixed = true; 724 res->corruptions_fixed++; 725 726 } 727 728 if (fixed) { 729 /* 730 * When new clusters are allocated, the file size increases by 731 * 128 Mb. We need to truncate the file to the right size. Let 732 * the leak fix code make its job without res changing. 733 */ 734 ret = parallels_check_leak(bs, res, fix, false); 735 } 736 737 out_free: 738 g_free(buf); 739 g_free(bitmap); 740 return ret; 741 /* 742 * We can get here only from places where index and old_offset have 743 * meaningful values. 744 */ 745 out_repair_bat: 746 s->bat_bitmap[i] = bat_entry; 747 goto out_free; 748 } 749 750 static void parallels_collect_statistics(BlockDriverState *bs, 751 BdrvCheckResult *res, 752 BdrvCheckMode fix) 753 { 754 BDRVParallelsState *s = bs->opaque; 755 int64_t off, prev_off; 756 uint32_t i; 757 758 res->bfi.total_clusters = s->bat_size; 759 res->bfi.compressed_clusters = 0; /* compression is not supported */ 760 761 prev_off = 0; 762 for (i = 0; i < s->bat_size; i++) { 763 off = bat2sect(s, i) << BDRV_SECTOR_BITS; 764 /* 765 * If BDRV_FIX_ERRORS is not set, out-of-image BAT entries were not 766 * fixed. Skip not allocated and out-of-image BAT entries. 767 */ 768 if (off == 0 || off + s->cluster_size > res->image_end_offset) { 769 prev_off = 0; 770 continue; 771 } 772 773 if (prev_off != 0 && (prev_off + s->cluster_size) != off) { 774 res->bfi.fragmented_clusters++; 775 } 776 prev_off = off; 777 res->bfi.allocated_clusters++; 778 } 779 } 780 781 static int coroutine_fn GRAPH_RDLOCK 782 parallels_co_check(BlockDriverState *bs, BdrvCheckResult *res, 783 BdrvCheckMode fix) 784 { 785 BDRVParallelsState *s = bs->opaque; 786 int ret; 787 788 WITH_QEMU_LOCK_GUARD(&s->lock) { 789 parallels_check_unclean(bs, res, fix); 790 791 ret = parallels_check_data_off(bs, res, fix); 792 if (ret < 0) { 793 return ret; 794 } 795 796 ret = parallels_check_outside_image(bs, res, fix); 797 if (ret < 0) { 798 return ret; 799 } 800 801 ret = parallels_check_leak(bs, res, fix, true); 802 if (ret < 0) { 803 return ret; 804 } 805 806 ret = parallels_check_duplicate(bs, res, fix); 807 if (ret < 0) { 808 return ret; 809 } 810 811 parallels_collect_statistics(bs, res, fix); 812 } 813 814 ret = bdrv_co_flush(bs); 815 if (ret < 0) { 816 res->check_errors++; 817 } 818 819 return ret; 820 } 821 822 823 static int coroutine_fn GRAPH_UNLOCKED 824 parallels_co_create(BlockdevCreateOptions* opts, Error **errp) 825 { 826 BlockdevCreateOptionsParallels *parallels_opts; 827 BlockDriverState *bs; 828 BlockBackend *blk; 829 int64_t total_size, cl_size; 830 uint32_t bat_entries, bat_sectors; 831 ParallelsHeader header; 832 uint8_t tmp[BDRV_SECTOR_SIZE]; 833 int ret; 834 835 assert(opts->driver == BLOCKDEV_DRIVER_PARALLELS); 836 parallels_opts = &opts->u.parallels; 837 838 /* Sanity checks */ 839 total_size = parallels_opts->size; 840 841 if (parallels_opts->has_cluster_size) { 842 cl_size = parallels_opts->cluster_size; 843 } else { 844 cl_size = DEFAULT_CLUSTER_SIZE; 845 } 846 847 /* XXX What is the real limit here? This is an insanely large maximum. */ 848 if (cl_size >= INT64_MAX / MAX_PARALLELS_IMAGE_FACTOR) { 849 error_setg(errp, "Cluster size is too large"); 850 return -EINVAL; 851 } 852 if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) { 853 error_setg(errp, "Image size is too large for this cluster size"); 854 return -E2BIG; 855 } 856 857 if (!QEMU_IS_ALIGNED(total_size, BDRV_SECTOR_SIZE)) { 858 error_setg(errp, "Image size must be a multiple of 512 bytes"); 859 return -EINVAL; 860 } 861 862 if (!QEMU_IS_ALIGNED(cl_size, BDRV_SECTOR_SIZE)) { 863 error_setg(errp, "Cluster size must be a multiple of 512 bytes"); 864 return -EINVAL; 865 } 866 867 /* Create BlockBackend to write to the image */ 868 bs = bdrv_co_open_blockdev_ref(parallels_opts->file, errp); 869 if (bs == NULL) { 870 return -EIO; 871 } 872 873 blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE, BLK_PERM_ALL, 874 errp); 875 if (!blk) { 876 ret = -EPERM; 877 goto out; 878 } 879 blk_set_allow_write_beyond_eof(blk, true); 880 881 /* Create image format */ 882 bat_entries = DIV_ROUND_UP(total_size, cl_size); 883 bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size); 884 bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; 885 886 memset(&header, 0, sizeof(header)); 887 memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); 888 header.version = cpu_to_le32(HEADER_VERSION); 889 /* don't care much about geometry, it is not used on image level */ 890 header.heads = cpu_to_le32(HEADS_NUMBER); 891 header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE 892 / HEADS_NUMBER / SEC_IN_CYL); 893 header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); 894 header.bat_entries = cpu_to_le32(bat_entries); 895 header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); 896 header.data_off = cpu_to_le32(bat_sectors); 897 898 /* write all the data */ 899 memset(tmp, 0, sizeof(tmp)); 900 memcpy(tmp, &header, sizeof(header)); 901 902 ret = blk_co_pwrite(blk, 0, BDRV_SECTOR_SIZE, tmp, 0); 903 if (ret < 0) { 904 goto exit; 905 } 906 ret = blk_co_pwrite_zeroes(blk, BDRV_SECTOR_SIZE, 907 (bat_sectors - 1) << BDRV_SECTOR_BITS, 0); 908 if (ret < 0) { 909 goto exit; 910 } 911 912 ret = 0; 913 out: 914 blk_co_unref(blk); 915 bdrv_co_unref(bs); 916 return ret; 917 918 exit: 919 error_setg_errno(errp, -ret, "Failed to create Parallels image"); 920 goto out; 921 } 922 923 static int coroutine_fn GRAPH_UNLOCKED 924 parallels_co_create_opts(BlockDriver *drv, const char *filename, 925 QemuOpts *opts, Error **errp) 926 { 927 BlockdevCreateOptions *create_options = NULL; 928 BlockDriverState *bs = NULL; 929 QDict *qdict; 930 Visitor *v; 931 int ret; 932 933 static const QDictRenames opt_renames[] = { 934 { BLOCK_OPT_CLUSTER_SIZE, "cluster-size" }, 935 { NULL, NULL }, 936 }; 937 938 /* Parse options and convert legacy syntax */ 939 qdict = qemu_opts_to_qdict_filtered(opts, NULL, ¶llels_create_opts, 940 true); 941 942 if (!qdict_rename_keys(qdict, opt_renames, errp)) { 943 ret = -EINVAL; 944 goto done; 945 } 946 947 /* Create and open the file (protocol layer) */ 948 ret = bdrv_co_create_file(filename, opts, errp); 949 if (ret < 0) { 950 goto done; 951 } 952 953 bs = bdrv_co_open(filename, NULL, NULL, 954 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp); 955 if (bs == NULL) { 956 ret = -EIO; 957 goto done; 958 } 959 960 /* Now get the QAPI type BlockdevCreateOptions */ 961 qdict_put_str(qdict, "driver", "parallels"); 962 qdict_put_str(qdict, "file", bs->node_name); 963 964 v = qobject_input_visitor_new_flat_confused(qdict, errp); 965 if (!v) { 966 ret = -EINVAL; 967 goto done; 968 } 969 970 visit_type_BlockdevCreateOptions(v, NULL, &create_options, errp); 971 visit_free(v); 972 if (!create_options) { 973 ret = -EINVAL; 974 goto done; 975 } 976 977 /* Silently round up sizes */ 978 create_options->u.parallels.size = 979 ROUND_UP(create_options->u.parallels.size, BDRV_SECTOR_SIZE); 980 create_options->u.parallels.cluster_size = 981 ROUND_UP(create_options->u.parallels.cluster_size, BDRV_SECTOR_SIZE); 982 983 /* Create the Parallels image (format layer) */ 984 ret = parallels_co_create(create_options, errp); 985 if (ret < 0) { 986 goto done; 987 } 988 ret = 0; 989 990 done: 991 qobject_unref(qdict); 992 bdrv_co_unref(bs); 993 qapi_free_BlockdevCreateOptions(create_options); 994 return ret; 995 } 996 997 998 static int parallels_probe(const uint8_t *buf, int buf_size, 999 const char *filename) 1000 { 1001 const ParallelsHeader *ph = (const void *)buf; 1002 1003 if (buf_size < sizeof(ParallelsHeader)) { 1004 return 0; 1005 } 1006 1007 if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || 1008 !memcmp(ph->magic, HEADER_MAGIC2, 16)) && 1009 (le32_to_cpu(ph->version) == HEADER_VERSION)) { 1010 return 100; 1011 } 1012 1013 return 0; 1014 } 1015 1016 static int parallels_update_header(BlockDriverState *bs) 1017 { 1018 BDRVParallelsState *s = bs->opaque; 1019 unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs), 1020 sizeof(ParallelsHeader)); 1021 1022 if (size > s->header_size) { 1023 size = s->header_size; 1024 } 1025 return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0); 1026 } 1027 1028 static int parallels_open(BlockDriverState *bs, QDict *options, int flags, 1029 Error **errp) 1030 { 1031 BDRVParallelsState *s = bs->opaque; 1032 ParallelsHeader ph; 1033 int ret, size, i; 1034 int64_t file_nb_sectors, sector; 1035 uint32_t data_start; 1036 QemuOpts *opts = NULL; 1037 Error *local_err = NULL; 1038 char *buf; 1039 bool data_off_is_correct; 1040 1041 ret = bdrv_open_file_child(NULL, options, "file", bs, errp); 1042 if (ret < 0) { 1043 return ret; 1044 } 1045 1046 file_nb_sectors = bdrv_nb_sectors(bs->file->bs); 1047 if (file_nb_sectors < 0) { 1048 return -EINVAL; 1049 } 1050 1051 ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); 1052 if (ret < 0) { 1053 goto fail; 1054 } 1055 1056 bs->total_sectors = le64_to_cpu(ph.nb_sectors); 1057 1058 if (le32_to_cpu(ph.version) != HEADER_VERSION) { 1059 goto fail_format; 1060 } 1061 if (!memcmp(ph.magic, HEADER_MAGIC, 16)) { 1062 s->off_multiplier = 1; 1063 bs->total_sectors = 0xffffffff & bs->total_sectors; 1064 } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) { 1065 s->off_multiplier = le32_to_cpu(ph.tracks); 1066 } else { 1067 goto fail_format; 1068 } 1069 1070 s->tracks = le32_to_cpu(ph.tracks); 1071 if (s->tracks == 0) { 1072 error_setg(errp, "Invalid image: Zero sectors per track"); 1073 ret = -EINVAL; 1074 goto fail; 1075 } 1076 if (s->tracks > INT32_MAX/513) { 1077 error_setg(errp, "Invalid image: Too big cluster"); 1078 ret = -EFBIG; 1079 goto fail; 1080 } 1081 s->cluster_size = s->tracks << BDRV_SECTOR_BITS; 1082 1083 s->bat_size = le32_to_cpu(ph.bat_entries); 1084 if (s->bat_size > INT_MAX / sizeof(uint32_t)) { 1085 error_setg(errp, "Catalog too large"); 1086 ret = -EFBIG; 1087 goto fail; 1088 } 1089 1090 size = bat_entry_off(s->bat_size); 1091 s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); 1092 s->header = qemu_try_blockalign(bs->file->bs, s->header_size); 1093 if (s->header == NULL) { 1094 ret = -ENOMEM; 1095 goto fail; 1096 } 1097 1098 ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); 1099 if (ret < 0) { 1100 goto fail; 1101 } 1102 s->bat_bitmap = (uint32_t *)(s->header + 1); 1103 1104 if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { 1105 s->header_unclean = true; 1106 } 1107 1108 data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, 1109 &data_start); 1110 s->data_start = data_start; 1111 s->data_end = s->data_start; 1112 if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { 1113 /* 1114 * There is not enough unused space to fit to block align between BAT 1115 * and actual data. We can't avoid read-modify-write... 1116 */ 1117 s->header_size = size; 1118 } 1119 1120 opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); 1121 if (!opts) { 1122 goto fail_options; 1123 } 1124 1125 if (!qemu_opts_absorb_qdict(opts, options, errp)) { 1126 goto fail_options; 1127 } 1128 1129 s->prealloc_size = 1130 qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); 1131 s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); 1132 buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); 1133 /* prealloc_mode can be downgraded later during allocate_clusters */ 1134 s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, 1135 PRL_PREALLOC_MODE_FALLOCATE, 1136 &local_err); 1137 g_free(buf); 1138 if (local_err != NULL) { 1139 error_propagate(errp, local_err); 1140 goto fail_options; 1141 } 1142 1143 if (ph.ext_off) { 1144 if (flags & BDRV_O_RDWR) { 1145 /* 1146 * It's unsafe to open image RW if there is an extension (as we 1147 * don't support it). But parallels driver in QEMU historically 1148 * ignores the extension, so print warning and don't care. 1149 */ 1150 warn_report("Format Extension ignored in RW mode"); 1151 } else { 1152 ret = parallels_read_format_extension( 1153 bs, le64_to_cpu(ph.ext_off) << BDRV_SECTOR_BITS, errp); 1154 if (ret < 0) { 1155 goto fail; 1156 } 1157 } 1158 } 1159 1160 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) { 1161 s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); 1162 ret = parallels_update_header(bs); 1163 if (ret < 0) { 1164 goto fail; 1165 } 1166 } 1167 1168 s->bat_dirty_block = 4 * qemu_real_host_page_size(); 1169 s->bat_dirty_bmap = 1170 bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); 1171 1172 /* Disable migration until bdrv_activate method is added */ 1173 error_setg(&s->migration_blocker, "The Parallels format used by node '%s' " 1174 "does not support live migration", 1175 bdrv_get_device_or_node_name(bs)); 1176 ret = migrate_add_blocker(s->migration_blocker, errp); 1177 if (ret < 0) { 1178 error_setg(errp, "Migration blocker error"); 1179 goto fail; 1180 } 1181 qemu_co_mutex_init(&s->lock); 1182 1183 for (i = 0; i < s->bat_size; i++) { 1184 sector = bat2sect(s, i); 1185 if (sector + s->tracks > s->data_end) { 1186 s->data_end = sector + s->tracks; 1187 } 1188 } 1189 1190 /* 1191 * We don't repair the image here if it's opened for checks. Also we don't 1192 * want to change inactive images and can't change readonly images. 1193 */ 1194 if ((flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) || !(flags & BDRV_O_RDWR)) { 1195 return 0; 1196 } 1197 1198 /* 1199 * Repair the image if it's dirty or 1200 * out-of-image corruption was detected. 1201 */ 1202 if (s->data_end > file_nb_sectors || s->header_unclean 1203 || !data_off_is_correct) { 1204 BdrvCheckResult res; 1205 ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); 1206 if (ret < 0) { 1207 error_setg_errno(errp, -ret, "Could not repair corrupted image"); 1208 migrate_del_blocker(s->migration_blocker); 1209 goto fail; 1210 } 1211 } 1212 1213 return 0; 1214 1215 fail_format: 1216 error_setg(errp, "Image not in Parallels format"); 1217 fail_options: 1218 ret = -EINVAL; 1219 fail: 1220 /* 1221 * "s" object was allocated by g_malloc0 so we can safely 1222 * try to free its fields even they were not allocated. 1223 */ 1224 error_free(s->migration_blocker); 1225 g_free(s->bat_dirty_bmap); 1226 qemu_vfree(s->header); 1227 return ret; 1228 } 1229 1230 1231 static void parallels_close(BlockDriverState *bs) 1232 { 1233 BDRVParallelsState *s = bs->opaque; 1234 1235 if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) { 1236 s->header->inuse = 0; 1237 parallels_update_header(bs); 1238 1239 /* errors are ignored, so we might as well pass exact=true */ 1240 bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS, true, 1241 PREALLOC_MODE_OFF, 0, NULL); 1242 } 1243 1244 g_free(s->bat_dirty_bmap); 1245 qemu_vfree(s->header); 1246 1247 migrate_del_blocker(s->migration_blocker); 1248 error_free(s->migration_blocker); 1249 } 1250 1251 static BlockDriver bdrv_parallels = { 1252 .format_name = "parallels", 1253 .instance_size = sizeof(BDRVParallelsState), 1254 .bdrv_probe = parallels_probe, 1255 .bdrv_open = parallels_open, 1256 .bdrv_close = parallels_close, 1257 .bdrv_child_perm = bdrv_default_perms, 1258 .bdrv_co_block_status = parallels_co_block_status, 1259 .bdrv_has_zero_init = bdrv_has_zero_init_1, 1260 .bdrv_co_flush_to_os = parallels_co_flush_to_os, 1261 .bdrv_co_readv = parallels_co_readv, 1262 .bdrv_co_writev = parallels_co_writev, 1263 .is_format = true, 1264 .supports_backing = true, 1265 .bdrv_co_create = parallels_co_create, 1266 .bdrv_co_create_opts = parallels_co_create_opts, 1267 .bdrv_co_check = parallels_co_check, 1268 .create_opts = ¶llels_create_opts, 1269 }; 1270 1271 static void bdrv_parallels_init(void) 1272 { 1273 bdrv_register(&bdrv_parallels); 1274 } 1275 1276 block_init(bdrv_parallels_init); 1277