1 /* 2 * dm-snapshot.c 3 * 4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 5 * 6 * This file is released under the GPL. 7 */ 8 9 #include "dm.h" 10 #include "dm-snap.h" 11 #include "dm-io.h" 12 #include "kcopyd.h" 13 14 #include <linux/mm.h> 15 #include <linux/pagemap.h> 16 #include <linux/vmalloc.h> 17 #include <linux/slab.h> 18 19 /*----------------------------------------------------------------- 20 * Persistent snapshots, by persistent we mean that the snapshot 21 * will survive a reboot. 22 *---------------------------------------------------------------*/ 23 24 /* 25 * We need to store a record of which parts of the origin have 26 * been copied to the snapshot device. The snapshot code 27 * requires that we copy exception chunks to chunk aligned areas 28 * of the COW store. It makes sense therefore, to store the 29 * metadata in chunk size blocks. 30 * 31 * There is no backward or forward compatibility implemented, 32 * snapshots with different disk versions than the kernel will 33 * not be usable. It is expected that "lvcreate" will blank out 34 * the start of a fresh COW device before calling the snapshot 35 * constructor. 36 * 37 * The first chunk of the COW device just contains the header. 38 * After this there is a chunk filled with exception metadata, 39 * followed by as many exception chunks as can fit in the 40 * metadata areas. 41 * 42 * All on disk structures are in little-endian format. The end 43 * of the exceptions info is indicated by an exception with a 44 * new_chunk of 0, which is invalid since it would point to the 45 * header chunk. 46 */ 47 48 /* 49 * Magic for persistent snapshots: "SnAp" - Feeble isn't it. 50 */ 51 #define SNAP_MAGIC 0x70416e53 52 53 /* 54 * The on-disk version of the metadata. 55 */ 56 #define SNAPSHOT_DISK_VERSION 1 57 58 struct disk_header { 59 uint32_t magic; 60 61 /* 62 * Is this snapshot valid. There is no way of recovering 63 * an invalid snapshot. 64 */ 65 uint32_t valid; 66 67 /* 68 * Simple, incrementing version. no backward 69 * compatibility. 70 */ 71 uint32_t version; 72 73 /* In sectors */ 74 uint32_t chunk_size; 75 }; 76 77 struct disk_exception { 78 uint64_t old_chunk; 79 uint64_t new_chunk; 80 }; 81 82 struct commit_callback { 83 void (*callback)(void *, int success); 84 void *context; 85 }; 86 87 /* 88 * The top level structure for a persistent exception store. 89 */ 90 struct pstore { 91 struct dm_snapshot *snap; /* up pointer to my snapshot */ 92 int version; 93 int valid; 94 uint32_t chunk_size; 95 uint32_t exceptions_per_area; 96 97 /* 98 * Now that we have an asynchronous kcopyd there is no 99 * need for large chunk sizes, so it wont hurt to have a 100 * whole chunks worth of metadata in memory at once. 101 */ 102 void *area; 103 104 /* 105 * Used to keep track of which metadata area the data in 106 * 'chunk' refers to. 107 */ 108 uint32_t current_area; 109 110 /* 111 * The next free chunk for an exception. 112 */ 113 uint32_t next_free; 114 115 /* 116 * The index of next free exception in the current 117 * metadata area. 118 */ 119 uint32_t current_committed; 120 121 atomic_t pending_count; 122 uint32_t callback_count; 123 struct commit_callback *callbacks; 124 }; 125 126 static inline unsigned int sectors_to_pages(unsigned int sectors) 127 { 128 return sectors / (PAGE_SIZE >> 9); 129 } 130 131 static int alloc_area(struct pstore *ps) 132 { 133 int r = -ENOMEM; 134 size_t len; 135 136 len = ps->chunk_size << SECTOR_SHIFT; 137 138 /* 139 * Allocate the chunk_size block of memory that will hold 140 * a single metadata area. 141 */ 142 ps->area = vmalloc(len); 143 if (!ps->area) 144 return r; 145 146 return 0; 147 } 148 149 static void free_area(struct pstore *ps) 150 { 151 vfree(ps->area); 152 } 153 154 /* 155 * Read or write a chunk aligned and sized block of data from a device. 156 */ 157 static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) 158 { 159 struct io_region where; 160 unsigned long bits; 161 162 where.bdev = ps->snap->cow->bdev; 163 where.sector = ps->chunk_size * chunk; 164 where.count = ps->chunk_size; 165 166 return dm_io_sync_vm(1, &where, rw, ps->area, &bits); 167 } 168 169 /* 170 * Read or write a metadata area. Remembering to skip the first 171 * chunk which holds the header. 172 */ 173 static int area_io(struct pstore *ps, uint32_t area, int rw) 174 { 175 int r; 176 uint32_t chunk; 177 178 /* convert a metadata area index to a chunk index */ 179 chunk = 1 + ((ps->exceptions_per_area + 1) * area); 180 181 r = chunk_io(ps, chunk, rw); 182 if (r) 183 return r; 184 185 ps->current_area = area; 186 return 0; 187 } 188 189 static int zero_area(struct pstore *ps, uint32_t area) 190 { 191 memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); 192 return area_io(ps, area, WRITE); 193 } 194 195 static int read_header(struct pstore *ps, int *new_snapshot) 196 { 197 int r; 198 struct disk_header *dh; 199 200 r = chunk_io(ps, 0, READ); 201 if (r) 202 return r; 203 204 dh = (struct disk_header *) ps->area; 205 206 if (le32_to_cpu(dh->magic) == 0) { 207 *new_snapshot = 1; 208 209 } else if (le32_to_cpu(dh->magic) == SNAP_MAGIC) { 210 *new_snapshot = 0; 211 ps->valid = le32_to_cpu(dh->valid); 212 ps->version = le32_to_cpu(dh->version); 213 ps->chunk_size = le32_to_cpu(dh->chunk_size); 214 215 } else { 216 DMWARN("Invalid/corrupt snapshot"); 217 r = -ENXIO; 218 } 219 220 return r; 221 } 222 223 static int write_header(struct pstore *ps) 224 { 225 struct disk_header *dh; 226 227 memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); 228 229 dh = (struct disk_header *) ps->area; 230 dh->magic = cpu_to_le32(SNAP_MAGIC); 231 dh->valid = cpu_to_le32(ps->valid); 232 dh->version = cpu_to_le32(ps->version); 233 dh->chunk_size = cpu_to_le32(ps->chunk_size); 234 235 return chunk_io(ps, 0, WRITE); 236 } 237 238 /* 239 * Access functions for the disk exceptions, these do the endian conversions. 240 */ 241 static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) 242 { 243 if (index >= ps->exceptions_per_area) 244 return NULL; 245 246 return ((struct disk_exception *) ps->area) + index; 247 } 248 249 static int read_exception(struct pstore *ps, 250 uint32_t index, struct disk_exception *result) 251 { 252 struct disk_exception *e; 253 254 e = get_exception(ps, index); 255 if (!e) 256 return -EINVAL; 257 258 /* copy it */ 259 result->old_chunk = le64_to_cpu(e->old_chunk); 260 result->new_chunk = le64_to_cpu(e->new_chunk); 261 262 return 0; 263 } 264 265 static int write_exception(struct pstore *ps, 266 uint32_t index, struct disk_exception *de) 267 { 268 struct disk_exception *e; 269 270 e = get_exception(ps, index); 271 if (!e) 272 return -EINVAL; 273 274 /* copy it */ 275 e->old_chunk = cpu_to_le64(de->old_chunk); 276 e->new_chunk = cpu_to_le64(de->new_chunk); 277 278 return 0; 279 } 280 281 /* 282 * Registers the exceptions that are present in the current area. 283 * 'full' is filled in to indicate if the area has been 284 * filled. 285 */ 286 static int insert_exceptions(struct pstore *ps, int *full) 287 { 288 int r; 289 unsigned int i; 290 struct disk_exception de; 291 292 /* presume the area is full */ 293 *full = 1; 294 295 for (i = 0; i < ps->exceptions_per_area; i++) { 296 r = read_exception(ps, i, &de); 297 298 if (r) 299 return r; 300 301 /* 302 * If the new_chunk is pointing at the start of 303 * the COW device, where the first metadata area 304 * is we know that we've hit the end of the 305 * exceptions. Therefore the area is not full. 306 */ 307 if (de.new_chunk == 0LL) { 308 ps->current_committed = i; 309 *full = 0; 310 break; 311 } 312 313 /* 314 * Keep track of the start of the free chunks. 315 */ 316 if (ps->next_free <= de.new_chunk) 317 ps->next_free = de.new_chunk + 1; 318 319 /* 320 * Otherwise we add the exception to the snapshot. 321 */ 322 r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); 323 if (r) 324 return r; 325 } 326 327 return 0; 328 } 329 330 static int read_exceptions(struct pstore *ps) 331 { 332 uint32_t area; 333 int r, full = 1; 334 335 /* 336 * Keeping reading chunks and inserting exceptions until 337 * we find a partially full area. 338 */ 339 for (area = 0; full; area++) { 340 r = area_io(ps, area, READ); 341 if (r) 342 return r; 343 344 r = insert_exceptions(ps, &full); 345 if (r) 346 return r; 347 } 348 349 return 0; 350 } 351 352 static inline struct pstore *get_info(struct exception_store *store) 353 { 354 return (struct pstore *) store->context; 355 } 356 357 static void persistent_fraction_full(struct exception_store *store, 358 sector_t *numerator, sector_t *denominator) 359 { 360 *numerator = get_info(store)->next_free * store->snap->chunk_size; 361 *denominator = get_dev_size(store->snap->cow->bdev); 362 } 363 364 static void persistent_destroy(struct exception_store *store) 365 { 366 struct pstore *ps = get_info(store); 367 368 dm_io_put(sectors_to_pages(ps->chunk_size)); 369 vfree(ps->callbacks); 370 free_area(ps); 371 kfree(ps); 372 } 373 374 static int persistent_read_metadata(struct exception_store *store) 375 { 376 int r, new_snapshot; 377 struct pstore *ps = get_info(store); 378 379 /* 380 * Read the snapshot header. 381 */ 382 r = read_header(ps, &new_snapshot); 383 if (r) 384 return r; 385 386 /* 387 * Do we need to setup a new snapshot ? 388 */ 389 if (new_snapshot) { 390 r = write_header(ps); 391 if (r) { 392 DMWARN("write_header failed"); 393 return r; 394 } 395 396 r = zero_area(ps, 0); 397 if (r) { 398 DMWARN("zero_area(0) failed"); 399 return r; 400 } 401 402 } else { 403 /* 404 * Sanity checks. 405 */ 406 if (!ps->valid) { 407 DMWARN("snapshot is marked invalid"); 408 return -EINVAL; 409 } 410 411 if (ps->version != SNAPSHOT_DISK_VERSION) { 412 DMWARN("unable to handle snapshot disk version %d", 413 ps->version); 414 return -EINVAL; 415 } 416 417 /* 418 * Read the metadata. 419 */ 420 r = read_exceptions(ps); 421 if (r) 422 return r; 423 } 424 425 return 0; 426 } 427 428 static int persistent_prepare(struct exception_store *store, 429 struct exception *e) 430 { 431 struct pstore *ps = get_info(store); 432 uint32_t stride; 433 sector_t size = get_dev_size(store->snap->cow->bdev); 434 435 /* Is there enough room ? */ 436 if (size < ((ps->next_free + 1) * store->snap->chunk_size)) 437 return -ENOSPC; 438 439 e->new_chunk = ps->next_free; 440 441 /* 442 * Move onto the next free pending, making sure to take 443 * into account the location of the metadata chunks. 444 */ 445 stride = (ps->exceptions_per_area + 1); 446 if ((++ps->next_free % stride) == 1) 447 ps->next_free++; 448 449 atomic_inc(&ps->pending_count); 450 return 0; 451 } 452 453 static void persistent_commit(struct exception_store *store, 454 struct exception *e, 455 void (*callback) (void *, int success), 456 void *callback_context) 457 { 458 int r; 459 unsigned int i; 460 struct pstore *ps = get_info(store); 461 struct disk_exception de; 462 struct commit_callback *cb; 463 464 de.old_chunk = e->old_chunk; 465 de.new_chunk = e->new_chunk; 466 write_exception(ps, ps->current_committed++, &de); 467 468 /* 469 * Add the callback to the back of the array. This code 470 * is the only place where the callback array is 471 * manipulated, and we know that it will never be called 472 * multiple times concurrently. 473 */ 474 cb = ps->callbacks + ps->callback_count++; 475 cb->callback = callback; 476 cb->context = callback_context; 477 478 /* 479 * If there are no more exceptions in flight, or we have 480 * filled this metadata area we commit the exceptions to 481 * disk. 482 */ 483 if (atomic_dec_and_test(&ps->pending_count) || 484 (ps->current_committed == ps->exceptions_per_area)) { 485 r = area_io(ps, ps->current_area, WRITE); 486 if (r) 487 ps->valid = 0; 488 489 for (i = 0; i < ps->callback_count; i++) { 490 cb = ps->callbacks + i; 491 cb->callback(cb->context, r == 0 ? 1 : 0); 492 } 493 494 ps->callback_count = 0; 495 } 496 497 /* 498 * Have we completely filled the current area ? 499 */ 500 if (ps->current_committed == ps->exceptions_per_area) { 501 ps->current_committed = 0; 502 r = zero_area(ps, ps->current_area + 1); 503 if (r) 504 ps->valid = 0; 505 } 506 } 507 508 static void persistent_drop(struct exception_store *store) 509 { 510 struct pstore *ps = get_info(store); 511 512 ps->valid = 0; 513 if (write_header(ps)) 514 DMWARN("write header failed"); 515 } 516 517 int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) 518 { 519 int r; 520 struct pstore *ps; 521 522 r = dm_io_get(sectors_to_pages(chunk_size)); 523 if (r) 524 return r; 525 526 /* allocate the pstore */ 527 ps = kmalloc(sizeof(*ps), GFP_KERNEL); 528 if (!ps) { 529 r = -ENOMEM; 530 goto bad; 531 } 532 533 ps->snap = store->snap; 534 ps->valid = 1; 535 ps->version = SNAPSHOT_DISK_VERSION; 536 ps->chunk_size = chunk_size; 537 ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / 538 sizeof(struct disk_exception); 539 ps->next_free = 2; /* skipping the header and first area */ 540 ps->current_committed = 0; 541 542 r = alloc_area(ps); 543 if (r) 544 goto bad; 545 546 /* 547 * Allocate space for all the callbacks. 548 */ 549 ps->callback_count = 0; 550 atomic_set(&ps->pending_count, 0); 551 ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 552 sizeof(*ps->callbacks)); 553 554 if (!ps->callbacks) { 555 r = -ENOMEM; 556 goto bad; 557 } 558 559 store->destroy = persistent_destroy; 560 store->read_metadata = persistent_read_metadata; 561 store->prepare_exception = persistent_prepare; 562 store->commit_exception = persistent_commit; 563 store->drop_snapshot = persistent_drop; 564 store->fraction_full = persistent_fraction_full; 565 store->context = ps; 566 567 return 0; 568 569 bad: 570 dm_io_put(sectors_to_pages(chunk_size)); 571 if (ps) { 572 if (ps->area) 573 free_area(ps); 574 575 kfree(ps); 576 } 577 return r; 578 } 579 580 /*----------------------------------------------------------------- 581 * Implementation of the store for non-persistent snapshots. 582 *---------------------------------------------------------------*/ 583 struct transient_c { 584 sector_t next_free; 585 }; 586 587 static void transient_destroy(struct exception_store *store) 588 { 589 kfree(store->context); 590 } 591 592 static int transient_read_metadata(struct exception_store *store) 593 { 594 return 0; 595 } 596 597 static int transient_prepare(struct exception_store *store, struct exception *e) 598 { 599 struct transient_c *tc = (struct transient_c *) store->context; 600 sector_t size = get_dev_size(store->snap->cow->bdev); 601 602 if (size < (tc->next_free + store->snap->chunk_size)) 603 return -1; 604 605 e->new_chunk = sector_to_chunk(store->snap, tc->next_free); 606 tc->next_free += store->snap->chunk_size; 607 608 return 0; 609 } 610 611 static void transient_commit(struct exception_store *store, 612 struct exception *e, 613 void (*callback) (void *, int success), 614 void *callback_context) 615 { 616 /* Just succeed */ 617 callback(callback_context, 1); 618 } 619 620 static void transient_fraction_full(struct exception_store *store, 621 sector_t *numerator, sector_t *denominator) 622 { 623 *numerator = ((struct transient_c *) store->context)->next_free; 624 *denominator = get_dev_size(store->snap->cow->bdev); 625 } 626 627 int dm_create_transient(struct exception_store *store, 628 struct dm_snapshot *s, int blocksize) 629 { 630 struct transient_c *tc; 631 632 memset(store, 0, sizeof(*store)); 633 store->destroy = transient_destroy; 634 store->read_metadata = transient_read_metadata; 635 store->prepare_exception = transient_prepare; 636 store->commit_exception = transient_commit; 637 store->fraction_full = transient_fraction_full; 638 store->snap = s; 639 640 tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); 641 if (!tc) 642 return -ENOMEM; 643 644 tc->next_free = 0; 645 store->context = tc; 646 647 return 0; 648 } 649