1 /* 2 * Copyright (C) 2003 Sistina Software 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/init.h> 8 #include <linux/slab.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 12 #include "dm-log.h" 13 #include "dm-io.h" 14 15 static LIST_HEAD(_log_types); 16 static DEFINE_SPINLOCK(_lock); 17 18 int dm_register_dirty_log_type(struct dirty_log_type *type) 19 { 20 spin_lock(&_lock); 21 type->use_count = 0; 22 list_add(&type->list, &_log_types); 23 spin_unlock(&_lock); 24 25 return 0; 26 } 27 28 int dm_unregister_dirty_log_type(struct dirty_log_type *type) 29 { 30 spin_lock(&_lock); 31 32 if (type->use_count) 33 DMWARN("Attempt to unregister a log type that is still in use"); 34 else 35 list_del(&type->list); 36 37 spin_unlock(&_lock); 38 39 return 0; 40 } 41 42 static struct dirty_log_type *get_type(const char *type_name) 43 { 44 struct dirty_log_type *type; 45 46 spin_lock(&_lock); 47 list_for_each_entry (type, &_log_types, list) 48 if (!strcmp(type_name, type->name)) { 49 if (!type->use_count && !try_module_get(type->module)){ 50 spin_unlock(&_lock); 51 return NULL; 52 } 53 type->use_count++; 54 spin_unlock(&_lock); 55 return type; 56 } 57 58 spin_unlock(&_lock); 59 return NULL; 60 } 61 62 static void put_type(struct dirty_log_type *type) 63 { 64 spin_lock(&_lock); 65 if (!--type->use_count) 66 module_put(type->module); 67 spin_unlock(&_lock); 68 } 69 70 struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, 71 unsigned int argc, char **argv) 72 { 73 struct dirty_log_type *type; 74 struct dirty_log *log; 75 76 log = kmalloc(sizeof(*log), GFP_KERNEL); 77 if (!log) 78 return NULL; 79 80 type = get_type(type_name); 81 if (!type) { 82 kfree(log); 83 return NULL; 84 } 85 86 log->type = type; 87 if (type->ctr(log, ti, argc, argv)) { 88 kfree(log); 89 put_type(type); 90 return NULL; 91 } 92 93 return log; 94 } 95 96 void dm_destroy_dirty_log(struct dirty_log *log) 97 { 98 log->type->dtr(log); 99 put_type(log->type); 100 kfree(log); 101 } 102 103 /*----------------------------------------------------------------- 104 * Persistent and core logs share a lot of their implementation. 105 * FIXME: need a reload method to be called from a resume 106 *---------------------------------------------------------------*/ 107 /* 108 * Magic for persistent mirrors: "MiRr" 109 */ 110 #define MIRROR_MAGIC 0x4D695272 111 112 /* 113 * The on-disk version of the metadata. 114 */ 115 #define MIRROR_DISK_VERSION 1 116 #define LOG_OFFSET 2 117 118 struct log_header { 119 uint32_t magic; 120 121 /* 122 * Simple, incrementing version. no backward 123 * compatibility. 124 */ 125 uint32_t version; 126 sector_t nr_regions; 127 }; 128 129 struct log_c { 130 struct dm_target *ti; 131 int touched; 132 uint32_t region_size; 133 unsigned int region_count; 134 region_t sync_count; 135 136 unsigned bitset_uint32_count; 137 uint32_t *clean_bits; 138 uint32_t *sync_bits; 139 uint32_t *recovering_bits; /* FIXME: this seems excessive */ 140 141 int sync_search; 142 143 /* Resync flag */ 144 enum sync { 145 DEFAULTSYNC, /* Synchronize if necessary */ 146 NOSYNC, /* Devices known to be already in sync */ 147 FORCESYNC, /* Force a sync to happen */ 148 } sync; 149 150 /* 151 * Disk log fields 152 */ 153 struct dm_dev *log_dev; 154 struct log_header header; 155 156 struct io_region header_location; 157 struct log_header *disk_header; 158 159 struct io_region bits_location; 160 uint32_t *disk_bits; 161 }; 162 163 /* 164 * The touched member needs to be updated every time we access 165 * one of the bitsets. 166 */ 167 static inline int log_test_bit(uint32_t *bs, unsigned bit) 168 { 169 return test_bit(bit, (unsigned long *) bs) ? 1 : 0; 170 } 171 172 static inline void log_set_bit(struct log_c *l, 173 uint32_t *bs, unsigned bit) 174 { 175 set_bit(bit, (unsigned long *) bs); 176 l->touched = 1; 177 } 178 179 static inline void log_clear_bit(struct log_c *l, 180 uint32_t *bs, unsigned bit) 181 { 182 clear_bit(bit, (unsigned long *) bs); 183 l->touched = 1; 184 } 185 186 /*---------------------------------------------------------------- 187 * Header IO 188 *--------------------------------------------------------------*/ 189 static void header_to_disk(struct log_header *core, struct log_header *disk) 190 { 191 disk->magic = cpu_to_le32(core->magic); 192 disk->version = cpu_to_le32(core->version); 193 disk->nr_regions = cpu_to_le64(core->nr_regions); 194 } 195 196 static void header_from_disk(struct log_header *core, struct log_header *disk) 197 { 198 core->magic = le32_to_cpu(disk->magic); 199 core->version = le32_to_cpu(disk->version); 200 core->nr_regions = le64_to_cpu(disk->nr_regions); 201 } 202 203 static int read_header(struct log_c *log) 204 { 205 int r; 206 unsigned long ebits; 207 208 r = dm_io_sync_vm(1, &log->header_location, READ, 209 log->disk_header, &ebits); 210 if (r) 211 return r; 212 213 header_from_disk(&log->header, log->disk_header); 214 215 /* New log required? */ 216 if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) { 217 log->header.magic = MIRROR_MAGIC; 218 log->header.version = MIRROR_DISK_VERSION; 219 log->header.nr_regions = 0; 220 } 221 222 if (log->header.version != MIRROR_DISK_VERSION) { 223 DMWARN("incompatible disk log version"); 224 return -EINVAL; 225 } 226 227 return 0; 228 } 229 230 static inline int write_header(struct log_c *log) 231 { 232 unsigned long ebits; 233 234 header_to_disk(&log->header, log->disk_header); 235 return dm_io_sync_vm(1, &log->header_location, WRITE, 236 log->disk_header, &ebits); 237 } 238 239 /*---------------------------------------------------------------- 240 * Bits IO 241 *--------------------------------------------------------------*/ 242 static inline void bits_to_core(uint32_t *core, uint32_t *disk, unsigned count) 243 { 244 unsigned i; 245 246 for (i = 0; i < count; i++) 247 core[i] = le32_to_cpu(disk[i]); 248 } 249 250 static inline void bits_to_disk(uint32_t *core, uint32_t *disk, unsigned count) 251 { 252 unsigned i; 253 254 /* copy across the clean/dirty bitset */ 255 for (i = 0; i < count; i++) 256 disk[i] = cpu_to_le32(core[i]); 257 } 258 259 static int read_bits(struct log_c *log) 260 { 261 int r; 262 unsigned long ebits; 263 264 r = dm_io_sync_vm(1, &log->bits_location, READ, 265 log->disk_bits, &ebits); 266 if (r) 267 return r; 268 269 bits_to_core(log->clean_bits, log->disk_bits, 270 log->bitset_uint32_count); 271 return 0; 272 } 273 274 static int write_bits(struct log_c *log) 275 { 276 unsigned long ebits; 277 bits_to_disk(log->clean_bits, log->disk_bits, 278 log->bitset_uint32_count); 279 return dm_io_sync_vm(1, &log->bits_location, WRITE, 280 log->disk_bits, &ebits); 281 } 282 283 /*---------------------------------------------------------------- 284 * core log constructor/destructor 285 * 286 * argv contains region_size followed optionally by [no]sync 287 *--------------------------------------------------------------*/ 288 #define BYTE_SHIFT 3 289 static int core_ctr(struct dirty_log *log, struct dm_target *ti, 290 unsigned int argc, char **argv) 291 { 292 enum sync sync = DEFAULTSYNC; 293 294 struct log_c *lc; 295 uint32_t region_size; 296 unsigned int region_count; 297 size_t bitset_size; 298 299 if (argc < 1 || argc > 2) { 300 DMWARN("wrong number of arguments to mirror log"); 301 return -EINVAL; 302 } 303 304 if (argc > 1) { 305 if (!strcmp(argv[1], "sync")) 306 sync = FORCESYNC; 307 else if (!strcmp(argv[1], "nosync")) 308 sync = NOSYNC; 309 else { 310 DMWARN("unrecognised sync argument to mirror log: %s", 311 argv[1]); 312 return -EINVAL; 313 } 314 } 315 316 if (sscanf(argv[0], "%u", ®ion_size) != 1) { 317 DMWARN("invalid region size string"); 318 return -EINVAL; 319 } 320 321 region_count = dm_sector_div_up(ti->len, region_size); 322 323 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 324 if (!lc) { 325 DMWARN("couldn't allocate core log"); 326 return -ENOMEM; 327 } 328 329 lc->ti = ti; 330 lc->touched = 0; 331 lc->region_size = region_size; 332 lc->region_count = region_count; 333 lc->sync = sync; 334 335 /* 336 * Work out how many "unsigned long"s we need to hold the bitset. 337 */ 338 bitset_size = dm_round_up(region_count, 339 sizeof(unsigned long) << BYTE_SHIFT); 340 bitset_size >>= BYTE_SHIFT; 341 342 lc->bitset_uint32_count = bitset_size / 4; 343 lc->clean_bits = vmalloc(bitset_size); 344 if (!lc->clean_bits) { 345 DMWARN("couldn't allocate clean bitset"); 346 kfree(lc); 347 return -ENOMEM; 348 } 349 memset(lc->clean_bits, -1, bitset_size); 350 351 lc->sync_bits = vmalloc(bitset_size); 352 if (!lc->sync_bits) { 353 DMWARN("couldn't allocate sync bitset"); 354 vfree(lc->clean_bits); 355 kfree(lc); 356 return -ENOMEM; 357 } 358 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 359 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 360 361 lc->recovering_bits = vmalloc(bitset_size); 362 if (!lc->recovering_bits) { 363 DMWARN("couldn't allocate sync bitset"); 364 vfree(lc->sync_bits); 365 vfree(lc->clean_bits); 366 kfree(lc); 367 return -ENOMEM; 368 } 369 memset(lc->recovering_bits, 0, bitset_size); 370 lc->sync_search = 0; 371 log->context = lc; 372 return 0; 373 } 374 375 static void core_dtr(struct dirty_log *log) 376 { 377 struct log_c *lc = (struct log_c *) log->context; 378 vfree(lc->clean_bits); 379 vfree(lc->sync_bits); 380 vfree(lc->recovering_bits); 381 kfree(lc); 382 } 383 384 /*---------------------------------------------------------------- 385 * disk log constructor/destructor 386 * 387 * argv contains log_device region_size followed optionally by [no]sync 388 *--------------------------------------------------------------*/ 389 static int disk_ctr(struct dirty_log *log, struct dm_target *ti, 390 unsigned int argc, char **argv) 391 { 392 int r; 393 size_t size; 394 struct log_c *lc; 395 struct dm_dev *dev; 396 397 if (argc < 2 || argc > 3) { 398 DMWARN("wrong number of arguments to disk mirror log"); 399 return -EINVAL; 400 } 401 402 r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, 403 FMODE_READ | FMODE_WRITE, &dev); 404 if (r) 405 return r; 406 407 r = core_ctr(log, ti, argc - 1, argv + 1); 408 if (r) { 409 dm_put_device(ti, dev); 410 return r; 411 } 412 413 lc = (struct log_c *) log->context; 414 lc->log_dev = dev; 415 416 /* setup the disk header fields */ 417 lc->header_location.bdev = lc->log_dev->bdev; 418 lc->header_location.sector = 0; 419 lc->header_location.count = 1; 420 421 /* 422 * We can't read less than this amount, even though we'll 423 * not be using most of this space. 424 */ 425 lc->disk_header = vmalloc(1 << SECTOR_SHIFT); 426 if (!lc->disk_header) 427 goto bad; 428 429 /* setup the disk bitset fields */ 430 lc->bits_location.bdev = lc->log_dev->bdev; 431 lc->bits_location.sector = LOG_OFFSET; 432 433 size = dm_round_up(lc->bitset_uint32_count * sizeof(uint32_t), 434 1 << SECTOR_SHIFT); 435 lc->bits_location.count = size >> SECTOR_SHIFT; 436 lc->disk_bits = vmalloc(size); 437 if (!lc->disk_bits) { 438 vfree(lc->disk_header); 439 goto bad; 440 } 441 return 0; 442 443 bad: 444 dm_put_device(ti, lc->log_dev); 445 core_dtr(log); 446 return -ENOMEM; 447 } 448 449 static void disk_dtr(struct dirty_log *log) 450 { 451 struct log_c *lc = (struct log_c *) log->context; 452 dm_put_device(lc->ti, lc->log_dev); 453 vfree(lc->disk_header); 454 vfree(lc->disk_bits); 455 core_dtr(log); 456 } 457 458 static int count_bits32(uint32_t *addr, unsigned size) 459 { 460 int count = 0, i; 461 462 for (i = 0; i < size; i++) { 463 count += hweight32(*(addr+i)); 464 } 465 return count; 466 } 467 468 static int disk_resume(struct dirty_log *log) 469 { 470 int r; 471 unsigned i; 472 struct log_c *lc = (struct log_c *) log->context; 473 size_t size = lc->bitset_uint32_count * sizeof(uint32_t); 474 475 /* read the disk header */ 476 r = read_header(lc); 477 if (r) 478 return r; 479 480 /* read the bits */ 481 r = read_bits(lc); 482 if (r) 483 return r; 484 485 /* set or clear any new bits */ 486 if (lc->sync == NOSYNC) 487 for (i = lc->header.nr_regions; i < lc->region_count; i++) 488 /* FIXME: amazingly inefficient */ 489 log_set_bit(lc, lc->clean_bits, i); 490 else 491 for (i = lc->header.nr_regions; i < lc->region_count; i++) 492 /* FIXME: amazingly inefficient */ 493 log_clear_bit(lc, lc->clean_bits, i); 494 495 /* copy clean across to sync */ 496 memcpy(lc->sync_bits, lc->clean_bits, size); 497 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 498 499 /* write the bits */ 500 r = write_bits(lc); 501 if (r) 502 return r; 503 504 /* set the correct number of regions in the header */ 505 lc->header.nr_regions = lc->region_count; 506 507 /* write the new header */ 508 return write_header(lc); 509 } 510 511 static uint32_t core_get_region_size(struct dirty_log *log) 512 { 513 struct log_c *lc = (struct log_c *) log->context; 514 return lc->region_size; 515 } 516 517 static int core_is_clean(struct dirty_log *log, region_t region) 518 { 519 struct log_c *lc = (struct log_c *) log->context; 520 return log_test_bit(lc->clean_bits, region); 521 } 522 523 static int core_in_sync(struct dirty_log *log, region_t region, int block) 524 { 525 struct log_c *lc = (struct log_c *) log->context; 526 return log_test_bit(lc->sync_bits, region); 527 } 528 529 static int core_flush(struct dirty_log *log) 530 { 531 /* no op */ 532 return 0; 533 } 534 535 static int disk_flush(struct dirty_log *log) 536 { 537 int r; 538 struct log_c *lc = (struct log_c *) log->context; 539 540 /* only write if the log has changed */ 541 if (!lc->touched) 542 return 0; 543 544 r = write_bits(lc); 545 if (!r) 546 lc->touched = 0; 547 548 return r; 549 } 550 551 static void core_mark_region(struct dirty_log *log, region_t region) 552 { 553 struct log_c *lc = (struct log_c *) log->context; 554 log_clear_bit(lc, lc->clean_bits, region); 555 } 556 557 static void core_clear_region(struct dirty_log *log, region_t region) 558 { 559 struct log_c *lc = (struct log_c *) log->context; 560 log_set_bit(lc, lc->clean_bits, region); 561 } 562 563 static int core_get_resync_work(struct dirty_log *log, region_t *region) 564 { 565 struct log_c *lc = (struct log_c *) log->context; 566 567 if (lc->sync_search >= lc->region_count) 568 return 0; 569 570 do { 571 *region = find_next_zero_bit((unsigned long *) lc->sync_bits, 572 lc->region_count, 573 lc->sync_search); 574 lc->sync_search = *region + 1; 575 576 if (*region >= lc->region_count) 577 return 0; 578 579 } while (log_test_bit(lc->recovering_bits, *region)); 580 581 log_set_bit(lc, lc->recovering_bits, *region); 582 return 1; 583 } 584 585 static void core_complete_resync_work(struct dirty_log *log, region_t region, 586 int success) 587 { 588 struct log_c *lc = (struct log_c *) log->context; 589 590 log_clear_bit(lc, lc->recovering_bits, region); 591 if (success) { 592 log_set_bit(lc, lc->sync_bits, region); 593 lc->sync_count++; 594 } 595 } 596 597 static region_t core_get_sync_count(struct dirty_log *log) 598 { 599 struct log_c *lc = (struct log_c *) log->context; 600 601 return lc->sync_count; 602 } 603 604 #define DMEMIT_SYNC \ 605 if (lc->sync != DEFAULTSYNC) \ 606 DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") 607 608 static int core_status(struct dirty_log *log, status_type_t status, 609 char *result, unsigned int maxlen) 610 { 611 int sz = 0; 612 struct log_c *lc = log->context; 613 614 switch(status) { 615 case STATUSTYPE_INFO: 616 break; 617 618 case STATUSTYPE_TABLE: 619 DMEMIT("%s %u %u ", log->type->name, 620 lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); 621 DMEMIT_SYNC; 622 } 623 624 return sz; 625 } 626 627 static int disk_status(struct dirty_log *log, status_type_t status, 628 char *result, unsigned int maxlen) 629 { 630 int sz = 0; 631 char buffer[16]; 632 struct log_c *lc = log->context; 633 634 switch(status) { 635 case STATUSTYPE_INFO: 636 break; 637 638 case STATUSTYPE_TABLE: 639 format_dev_t(buffer, lc->log_dev->bdev->bd_dev); 640 DMEMIT("%s %u %s %u ", log->type->name, 641 lc->sync == DEFAULTSYNC ? 2 : 3, buffer, 642 lc->region_size); 643 DMEMIT_SYNC; 644 } 645 646 return sz; 647 } 648 649 static struct dirty_log_type _core_type = { 650 .name = "core", 651 .module = THIS_MODULE, 652 .ctr = core_ctr, 653 .dtr = core_dtr, 654 .get_region_size = core_get_region_size, 655 .is_clean = core_is_clean, 656 .in_sync = core_in_sync, 657 .flush = core_flush, 658 .mark_region = core_mark_region, 659 .clear_region = core_clear_region, 660 .get_resync_work = core_get_resync_work, 661 .complete_resync_work = core_complete_resync_work, 662 .get_sync_count = core_get_sync_count, 663 .status = core_status, 664 }; 665 666 static struct dirty_log_type _disk_type = { 667 .name = "disk", 668 .module = THIS_MODULE, 669 .ctr = disk_ctr, 670 .dtr = disk_dtr, 671 .suspend = disk_flush, 672 .resume = disk_resume, 673 .get_region_size = core_get_region_size, 674 .is_clean = core_is_clean, 675 .in_sync = core_in_sync, 676 .flush = disk_flush, 677 .mark_region = core_mark_region, 678 .clear_region = core_clear_region, 679 .get_resync_work = core_get_resync_work, 680 .complete_resync_work = core_complete_resync_work, 681 .get_sync_count = core_get_sync_count, 682 .status = disk_status, 683 }; 684 685 int __init dm_dirty_log_init(void) 686 { 687 int r; 688 689 r = dm_register_dirty_log_type(&_core_type); 690 if (r) 691 DMWARN("couldn't register core log"); 692 693 r = dm_register_dirty_log_type(&_disk_type); 694 if (r) { 695 DMWARN("couldn't register disk type"); 696 dm_unregister_dirty_log_type(&_core_type); 697 } 698 699 return r; 700 } 701 702 void dm_dirty_log_exit(void) 703 { 704 dm_unregister_dirty_log_type(&_disk_type); 705 dm_unregister_dirty_log_type(&_core_type); 706 } 707 708 EXPORT_SYMBOL(dm_register_dirty_log_type); 709 EXPORT_SYMBOL(dm_unregister_dirty_log_type); 710 EXPORT_SYMBOL(dm_create_dirty_log); 711 EXPORT_SYMBOL(dm_destroy_dirty_log); 712