1 /* 2 * Block driver for the QCOW version 2 format 3 * 4 * Copyright (c) 2004-2006 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25 #include "qemu-common.h" 26 #include "block/block_int.h" 27 #include "block/qcow2.h" 28 29 typedef struct QEMU_PACKED QCowSnapshotHeader { 30 /* header is 8 byte aligned */ 31 uint64_t l1_table_offset; 32 33 uint32_t l1_size; 34 uint16_t id_str_size; 35 uint16_t name_size; 36 37 uint32_t date_sec; 38 uint32_t date_nsec; 39 40 uint64_t vm_clock_nsec; 41 42 uint32_t vm_state_size; 43 uint32_t extra_data_size; /* for extension */ 44 /* extra data follows */ 45 /* id_str follows */ 46 /* name follows */ 47 } QCowSnapshotHeader; 48 49 typedef struct QEMU_PACKED QCowSnapshotExtraData { 50 uint64_t vm_state_size_large; 51 uint64_t disk_size; 52 } QCowSnapshotExtraData; 53 54 void qcow2_free_snapshots(BlockDriverState *bs) 55 { 56 BDRVQcowState *s = bs->opaque; 57 int i; 58 59 for(i = 0; i < s->nb_snapshots; i++) { 60 g_free(s->snapshots[i].name); 61 g_free(s->snapshots[i].id_str); 62 } 63 g_free(s->snapshots); 64 s->snapshots = NULL; 65 s->nb_snapshots = 0; 66 } 67 68 int qcow2_read_snapshots(BlockDriverState *bs) 69 { 70 BDRVQcowState *s = bs->opaque; 71 QCowSnapshotHeader h; 72 QCowSnapshotExtraData extra; 73 QCowSnapshot *sn; 74 int i, id_str_size, name_size; 75 int64_t offset; 76 uint32_t extra_data_size; 77 int ret; 78 79 if (!s->nb_snapshots) { 80 s->snapshots = NULL; 81 s->snapshots_size = 0; 82 return 0; 83 } 84 85 offset = s->snapshots_offset; 86 s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot)); 87 88 for(i = 0; i < s->nb_snapshots; i++) { 89 /* Read statically sized part of the snapshot header */ 90 offset = align_offset(offset, 8); 91 ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); 92 if (ret < 0) { 93 goto fail; 94 } 95 96 offset += sizeof(h); 97 sn = s->snapshots + i; 98 sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); 99 sn->l1_size = be32_to_cpu(h.l1_size); 100 sn->vm_state_size = be32_to_cpu(h.vm_state_size); 101 sn->date_sec = be32_to_cpu(h.date_sec); 102 sn->date_nsec = be32_to_cpu(h.date_nsec); 103 sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec); 104 extra_data_size = be32_to_cpu(h.extra_data_size); 105 106 id_str_size = be16_to_cpu(h.id_str_size); 107 name_size = be16_to_cpu(h.name_size); 108 109 /* Read extra data */ 110 ret = bdrv_pread(bs->file, offset, &extra, 111 MIN(sizeof(extra), extra_data_size)); 112 if (ret < 0) { 113 goto fail; 114 } 115 offset += extra_data_size; 116 117 if (extra_data_size >= 8) { 118 sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large); 119 } 120 121 if (extra_data_size >= 16) { 122 sn->disk_size = be64_to_cpu(extra.disk_size); 123 } else { 124 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 125 } 126 127 /* Read snapshot ID */ 128 sn->id_str = g_malloc(id_str_size + 1); 129 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); 130 if (ret < 0) { 131 goto fail; 132 } 133 offset += id_str_size; 134 sn->id_str[id_str_size] = '\0'; 135 136 /* Read snapshot name */ 137 sn->name = g_malloc(name_size + 1); 138 ret = bdrv_pread(bs->file, offset, sn->name, name_size); 139 if (ret < 0) { 140 goto fail; 141 } 142 offset += name_size; 143 sn->name[name_size] = '\0'; 144 } 145 146 s->snapshots_size = offset - s->snapshots_offset; 147 return 0; 148 149 fail: 150 qcow2_free_snapshots(bs); 151 return ret; 152 } 153 154 /* add at the end of the file a new list of snapshots */ 155 static int qcow2_write_snapshots(BlockDriverState *bs) 156 { 157 BDRVQcowState *s = bs->opaque; 158 QCowSnapshot *sn; 159 QCowSnapshotHeader h; 160 QCowSnapshotExtraData extra; 161 int i, name_size, id_str_size, snapshots_size; 162 struct { 163 uint32_t nb_snapshots; 164 uint64_t snapshots_offset; 165 } QEMU_PACKED header_data; 166 int64_t offset, snapshots_offset; 167 int ret; 168 169 /* compute the size of the snapshots */ 170 offset = 0; 171 for(i = 0; i < s->nb_snapshots; i++) { 172 sn = s->snapshots + i; 173 offset = align_offset(offset, 8); 174 offset += sizeof(h); 175 offset += sizeof(extra); 176 offset += strlen(sn->id_str); 177 offset += strlen(sn->name); 178 } 179 snapshots_size = offset; 180 181 /* Allocate space for the new snapshot list */ 182 snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); 183 offset = snapshots_offset; 184 if (offset < 0) { 185 return offset; 186 } 187 ret = bdrv_flush(bs); 188 if (ret < 0) { 189 return ret; 190 } 191 192 /* The snapshot list position has not yet been updated, so these clusters 193 * must indeed be completely free */ 194 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset, 195 s->snapshots_size); 196 if (ret < 0) { 197 return ret; 198 } 199 200 201 /* Write all snapshots to the new list */ 202 for(i = 0; i < s->nb_snapshots; i++) { 203 sn = s->snapshots + i; 204 memset(&h, 0, sizeof(h)); 205 h.l1_table_offset = cpu_to_be64(sn->l1_table_offset); 206 h.l1_size = cpu_to_be32(sn->l1_size); 207 /* If it doesn't fit in 32 bit, older implementations should treat it 208 * as a disk-only snapshot rather than truncate the VM state */ 209 if (sn->vm_state_size <= 0xffffffff) { 210 h.vm_state_size = cpu_to_be32(sn->vm_state_size); 211 } 212 h.date_sec = cpu_to_be32(sn->date_sec); 213 h.date_nsec = cpu_to_be32(sn->date_nsec); 214 h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec); 215 h.extra_data_size = cpu_to_be32(sizeof(extra)); 216 217 memset(&extra, 0, sizeof(extra)); 218 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size); 219 extra.disk_size = cpu_to_be64(sn->disk_size); 220 221 id_str_size = strlen(sn->id_str); 222 name_size = strlen(sn->name); 223 h.id_str_size = cpu_to_be16(id_str_size); 224 h.name_size = cpu_to_be16(name_size); 225 offset = align_offset(offset, 8); 226 227 ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); 228 if (ret < 0) { 229 goto fail; 230 } 231 offset += sizeof(h); 232 233 ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra)); 234 if (ret < 0) { 235 goto fail; 236 } 237 offset += sizeof(extra); 238 239 ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); 240 if (ret < 0) { 241 goto fail; 242 } 243 offset += id_str_size; 244 245 ret = bdrv_pwrite(bs->file, offset, sn->name, name_size); 246 if (ret < 0) { 247 goto fail; 248 } 249 offset += name_size; 250 } 251 252 /* 253 * Update the header to point to the new snapshot table. This requires the 254 * new table and its refcounts to be stable on disk. 255 */ 256 ret = bdrv_flush(bs); 257 if (ret < 0) { 258 goto fail; 259 } 260 261 QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) != 262 offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots)); 263 264 header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots); 265 header_data.snapshots_offset = cpu_to_be64(snapshots_offset); 266 267 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), 268 &header_data, sizeof(header_data)); 269 if (ret < 0) { 270 goto fail; 271 } 272 273 /* free the old snapshot table */ 274 qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size, 275 QCOW2_DISCARD_SNAPSHOT); 276 s->snapshots_offset = snapshots_offset; 277 s->snapshots_size = snapshots_size; 278 return 0; 279 280 fail: 281 return ret; 282 } 283 284 static void find_new_snapshot_id(BlockDriverState *bs, 285 char *id_str, int id_str_size) 286 { 287 BDRVQcowState *s = bs->opaque; 288 QCowSnapshot *sn; 289 int i, id, id_max = 0; 290 291 for(i = 0; i < s->nb_snapshots; i++) { 292 sn = s->snapshots + i; 293 id = strtoul(sn->id_str, NULL, 10); 294 if (id > id_max) 295 id_max = id; 296 } 297 snprintf(id_str, id_str_size, "%d", id_max + 1); 298 } 299 300 static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str) 301 { 302 BDRVQcowState *s = bs->opaque; 303 int i; 304 305 for(i = 0; i < s->nb_snapshots; i++) { 306 if (!strcmp(s->snapshots[i].id_str, id_str)) 307 return i; 308 } 309 return -1; 310 } 311 312 static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name) 313 { 314 BDRVQcowState *s = bs->opaque; 315 int i, ret; 316 317 ret = find_snapshot_by_id(bs, name); 318 if (ret >= 0) 319 return ret; 320 for(i = 0; i < s->nb_snapshots; i++) { 321 if (!strcmp(s->snapshots[i].name, name)) 322 return i; 323 } 324 return -1; 325 } 326 327 /* if no id is provided, a new one is constructed */ 328 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) 329 { 330 BDRVQcowState *s = bs->opaque; 331 QCowSnapshot *new_snapshot_list = NULL; 332 QCowSnapshot *old_snapshot_list = NULL; 333 QCowSnapshot sn1, *sn = &sn1; 334 int i, ret; 335 uint64_t *l1_table = NULL; 336 int64_t l1_table_offset; 337 338 memset(sn, 0, sizeof(*sn)); 339 340 /* Generate an ID if it wasn't passed */ 341 if (sn_info->id_str[0] == '\0') { 342 find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str)); 343 } 344 345 /* Check that the ID is unique */ 346 if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) { 347 return -EEXIST; 348 } 349 350 /* Populate sn with passed data */ 351 sn->id_str = g_strdup(sn_info->id_str); 352 sn->name = g_strdup(sn_info->name); 353 354 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE; 355 sn->vm_state_size = sn_info->vm_state_size; 356 sn->date_sec = sn_info->date_sec; 357 sn->date_nsec = sn_info->date_nsec; 358 sn->vm_clock_nsec = sn_info->vm_clock_nsec; 359 360 /* Allocate the L1 table of the snapshot and copy the current one there. */ 361 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); 362 if (l1_table_offset < 0) { 363 ret = l1_table_offset; 364 goto fail; 365 } 366 367 sn->l1_table_offset = l1_table_offset; 368 sn->l1_size = s->l1_size; 369 370 l1_table = g_malloc(s->l1_size * sizeof(uint64_t)); 371 for(i = 0; i < s->l1_size; i++) { 372 l1_table[i] = cpu_to_be64(s->l1_table[i]); 373 } 374 375 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, 376 sn->l1_table_offset, s->l1_size * sizeof(uint64_t)); 377 if (ret < 0) { 378 goto fail; 379 } 380 381 ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, 382 s->l1_size * sizeof(uint64_t)); 383 if (ret < 0) { 384 goto fail; 385 } 386 387 g_free(l1_table); 388 l1_table = NULL; 389 390 /* 391 * Increase the refcounts of all clusters and make sure everything is 392 * stable on disk before updating the snapshot table to contain a pointer 393 * to the new L1 table. 394 */ 395 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); 396 if (ret < 0) { 397 goto fail; 398 } 399 400 /* Append the new snapshot to the snapshot list */ 401 new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot)); 402 if (s->snapshots) { 403 memcpy(new_snapshot_list, s->snapshots, 404 s->nb_snapshots * sizeof(QCowSnapshot)); 405 old_snapshot_list = s->snapshots; 406 } 407 s->snapshots = new_snapshot_list; 408 s->snapshots[s->nb_snapshots++] = *sn; 409 410 ret = qcow2_write_snapshots(bs); 411 if (ret < 0) { 412 g_free(s->snapshots); 413 s->snapshots = old_snapshot_list; 414 goto fail; 415 } 416 417 g_free(old_snapshot_list); 418 419 #ifdef DEBUG_ALLOC 420 { 421 BdrvCheckResult result = {0}; 422 qcow2_check_refcounts(bs, &result, 0); 423 } 424 #endif 425 return 0; 426 427 fail: 428 g_free(sn->id_str); 429 g_free(sn->name); 430 g_free(l1_table); 431 432 return ret; 433 } 434 435 /* copy the snapshot 'snapshot_name' into the current disk image */ 436 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) 437 { 438 BDRVQcowState *s = bs->opaque; 439 QCowSnapshot *sn; 440 int i, snapshot_index; 441 int cur_l1_bytes, sn_l1_bytes; 442 int ret; 443 uint64_t *sn_l1_table = NULL; 444 445 /* Search the snapshot */ 446 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); 447 if (snapshot_index < 0) { 448 return -ENOENT; 449 } 450 sn = &s->snapshots[snapshot_index]; 451 452 if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) { 453 error_report("qcow2: Loading snapshots with different disk " 454 "size is not implemented"); 455 ret = -ENOTSUP; 456 goto fail; 457 } 458 459 /* 460 * Make sure that the current L1 table is big enough to contain the whole 461 * L1 table of the snapshot. If the snapshot L1 table is smaller, the 462 * current one must be padded with zeros. 463 */ 464 ret = qcow2_grow_l1_table(bs, sn->l1_size, true); 465 if (ret < 0) { 466 goto fail; 467 } 468 469 cur_l1_bytes = s->l1_size * sizeof(uint64_t); 470 sn_l1_bytes = sn->l1_size * sizeof(uint64_t); 471 472 /* 473 * Copy the snapshot L1 table to the current L1 table. 474 * 475 * Before overwriting the old current L1 table on disk, make sure to 476 * increase all refcounts for the clusters referenced by the new one. 477 * Decrease the refcount referenced by the old one only when the L1 478 * table is overwritten. 479 */ 480 sn_l1_table = g_malloc0(cur_l1_bytes); 481 482 ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes); 483 if (ret < 0) { 484 goto fail; 485 } 486 487 ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, 488 sn->l1_size, 1); 489 if (ret < 0) { 490 goto fail; 491 } 492 493 ret = qcow2_pre_write_overlap_check(bs, 494 QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1, 495 s->l1_table_offset, cur_l1_bytes); 496 if (ret < 0) { 497 goto fail; 498 } 499 500 ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, 501 cur_l1_bytes); 502 if (ret < 0) { 503 goto fail; 504 } 505 506 /* 507 * Decrease refcount of clusters of current L1 table. 508 * 509 * At this point, the in-memory s->l1_table points to the old L1 table, 510 * whereas on disk we already have the new one. 511 * 512 * qcow2_update_snapshot_refcount special cases the current L1 table to use 513 * the in-memory data instead of really using the offset to load a new one, 514 * which is why this works. 515 */ 516 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, 517 s->l1_size, -1); 518 519 /* 520 * Now update the in-memory L1 table to be in sync with the on-disk one. We 521 * need to do this even if updating refcounts failed. 522 */ 523 for(i = 0;i < s->l1_size; i++) { 524 s->l1_table[i] = be64_to_cpu(sn_l1_table[i]); 525 } 526 527 if (ret < 0) { 528 goto fail; 529 } 530 531 g_free(sn_l1_table); 532 sn_l1_table = NULL; 533 534 /* 535 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed 536 * when we decreased the refcount of the old snapshot. 537 */ 538 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); 539 if (ret < 0) { 540 goto fail; 541 } 542 543 #ifdef DEBUG_ALLOC 544 { 545 BdrvCheckResult result = {0}; 546 qcow2_check_refcounts(bs, &result, 0); 547 } 548 #endif 549 return 0; 550 551 fail: 552 g_free(sn_l1_table); 553 return ret; 554 } 555 556 int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) 557 { 558 BDRVQcowState *s = bs->opaque; 559 QCowSnapshot sn; 560 int snapshot_index, ret; 561 562 /* Search the snapshot */ 563 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); 564 if (snapshot_index < 0) { 565 return -ENOENT; 566 } 567 sn = s->snapshots[snapshot_index]; 568 569 /* Remove it from the snapshot list */ 570 memmove(s->snapshots + snapshot_index, 571 s->snapshots + snapshot_index + 1, 572 (s->nb_snapshots - snapshot_index - 1) * sizeof(sn)); 573 s->nb_snapshots--; 574 ret = qcow2_write_snapshots(bs); 575 if (ret < 0) { 576 return ret; 577 } 578 579 /* 580 * The snapshot is now unused, clean up. If we fail after this point, we 581 * won't recover but just leak clusters. 582 */ 583 g_free(sn.id_str); 584 g_free(sn.name); 585 586 /* 587 * Now decrease the refcounts of clusters referenced by the snapshot and 588 * free the L1 table. 589 */ 590 ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset, 591 sn.l1_size, -1); 592 if (ret < 0) { 593 return ret; 594 } 595 qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t), 596 QCOW2_DISCARD_SNAPSHOT); 597 598 /* must update the copied flag on the current cluster offsets */ 599 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); 600 if (ret < 0) { 601 return ret; 602 } 603 604 #ifdef DEBUG_ALLOC 605 { 606 BdrvCheckResult result = {0}; 607 qcow2_check_refcounts(bs, &result, 0); 608 } 609 #endif 610 return 0; 611 } 612 613 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) 614 { 615 BDRVQcowState *s = bs->opaque; 616 QEMUSnapshotInfo *sn_tab, *sn_info; 617 QCowSnapshot *sn; 618 int i; 619 620 if (!s->nb_snapshots) { 621 *psn_tab = NULL; 622 return s->nb_snapshots; 623 } 624 625 sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo)); 626 for(i = 0; i < s->nb_snapshots; i++) { 627 sn_info = sn_tab + i; 628 sn = s->snapshots + i; 629 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), 630 sn->id_str); 631 pstrcpy(sn_info->name, sizeof(sn_info->name), 632 sn->name); 633 sn_info->vm_state_size = sn->vm_state_size; 634 sn_info->date_sec = sn->date_sec; 635 sn_info->date_nsec = sn->date_nsec; 636 sn_info->vm_clock_nsec = sn->vm_clock_nsec; 637 } 638 *psn_tab = sn_tab; 639 return s->nb_snapshots; 640 } 641 642 int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) 643 { 644 int i, snapshot_index; 645 BDRVQcowState *s = bs->opaque; 646 QCowSnapshot *sn; 647 uint64_t *new_l1_table; 648 int new_l1_bytes; 649 int ret; 650 651 assert(bs->read_only); 652 653 /* Search the snapshot */ 654 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); 655 if (snapshot_index < 0) { 656 return -ENOENT; 657 } 658 sn = &s->snapshots[snapshot_index]; 659 660 /* Allocate and read in the snapshot's L1 table */ 661 new_l1_bytes = s->l1_size * sizeof(uint64_t); 662 new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512)); 663 664 ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes); 665 if (ret < 0) { 666 g_free(new_l1_table); 667 return ret; 668 } 669 670 /* Switch the L1 table */ 671 g_free(s->l1_table); 672 673 s->l1_size = sn->l1_size; 674 s->l1_table_offset = sn->l1_table_offset; 675 s->l1_table = new_l1_table; 676 677 for(i = 0;i < s->l1_size; i++) { 678 be64_to_cpus(&s->l1_table[i]); 679 } 680 681 return 0; 682 } 683