1 /* 2 * Copyright (C) 2003 Sistina Software 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/init.h> 8 #include <linux/slab.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 12 #include "dm-log.h" 13 #include "dm-io.h" 14 15 #define DM_MSG_PREFIX "mirror log" 16 17 static LIST_HEAD(_log_types); 18 static DEFINE_SPINLOCK(_lock); 19 20 int dm_register_dirty_log_type(struct dirty_log_type *type) 21 { 22 spin_lock(&_lock); 23 type->use_count = 0; 24 list_add(&type->list, &_log_types); 25 spin_unlock(&_lock); 26 27 return 0; 28 } 29 30 int dm_unregister_dirty_log_type(struct dirty_log_type *type) 31 { 32 spin_lock(&_lock); 33 34 if (type->use_count) 35 DMWARN("Attempt to unregister a log type that is still in use"); 36 else 37 list_del(&type->list); 38 39 spin_unlock(&_lock); 40 41 return 0; 42 } 43 44 static struct dirty_log_type *_get_type(const char *type_name) 45 { 46 struct dirty_log_type *type; 47 48 spin_lock(&_lock); 49 list_for_each_entry (type, &_log_types, list) 50 if (!strcmp(type_name, type->name)) { 51 if (!type->use_count && !try_module_get(type->module)){ 52 spin_unlock(&_lock); 53 return NULL; 54 } 55 type->use_count++; 56 spin_unlock(&_lock); 57 return type; 58 } 59 60 spin_unlock(&_lock); 61 return NULL; 62 } 63 64 /* 65 * get_type 66 * @type_name 67 * 68 * Attempt to retrieve the dirty_log_type by name. If not already 69 * available, attempt to load the appropriate module. 70 * 71 * Log modules are named "dm-log-" followed by the 'type_name'. 72 * Modules may contain multiple types. 73 * This function will first try the module "dm-log-<type_name>", 74 * then truncate 'type_name' on the last '-' and try again. 75 * 76 * For example, if type_name was "clustered-disk", it would search 77 * 'dm-log-clustered-disk' then 'dm-log-clustered'. 78 * 79 * Returns: dirty_log_type* on success, NULL on failure 80 */ 81 static struct dirty_log_type *get_type(const char *type_name) 82 { 83 char *p, *type_name_dup; 84 struct dirty_log_type *type; 85 86 type = _get_type(type_name); 87 if (type) 88 return type; 89 90 type_name_dup = kstrdup(type_name, GFP_KERNEL); 91 if (!type_name_dup) { 92 DMWARN("No memory left to attempt log module load for \"%s\"", 93 type_name); 94 return NULL; 95 } 96 97 while (request_module("dm-log-%s", type_name_dup) || 98 !(type = _get_type(type_name))) { 99 p = strrchr(type_name_dup, '-'); 100 if (!p) 101 break; 102 p[0] = '\0'; 103 } 104 105 if (!type) 106 DMWARN("Module for logging type \"%s\" not found.", type_name); 107 108 kfree(type_name_dup); 109 110 return type; 111 } 112 113 static void put_type(struct dirty_log_type *type) 114 { 115 spin_lock(&_lock); 116 if (!--type->use_count) 117 module_put(type->module); 118 spin_unlock(&_lock); 119 } 120 121 struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, 122 unsigned int argc, char **argv) 123 { 124 struct dirty_log_type *type; 125 struct dirty_log *log; 126 127 log = kmalloc(sizeof(*log), GFP_KERNEL); 128 if (!log) 129 return NULL; 130 131 type = get_type(type_name); 132 if (!type) { 133 kfree(log); 134 return NULL; 135 } 136 137 log->type = type; 138 if (type->ctr(log, ti, argc, argv)) { 139 kfree(log); 140 put_type(type); 141 return NULL; 142 } 143 144 return log; 145 } 146 147 void dm_destroy_dirty_log(struct dirty_log *log) 148 { 149 log->type->dtr(log); 150 put_type(log->type); 151 kfree(log); 152 } 153 154 /*----------------------------------------------------------------- 155 * Persistent and core logs share a lot of their implementation. 156 * FIXME: need a reload method to be called from a resume 157 *---------------------------------------------------------------*/ 158 /* 159 * Magic for persistent mirrors: "MiRr" 160 */ 161 #define MIRROR_MAGIC 0x4D695272 162 163 /* 164 * The on-disk version of the metadata. 165 */ 166 #define MIRROR_DISK_VERSION 2 167 #define LOG_OFFSET 2 168 169 struct log_header { 170 uint32_t magic; 171 172 /* 173 * Simple, incrementing version. no backward 174 * compatibility. 175 */ 176 uint32_t version; 177 sector_t nr_regions; 178 }; 179 180 struct log_c { 181 struct dm_target *ti; 182 int touched; 183 uint32_t region_size; 184 unsigned int region_count; 185 region_t sync_count; 186 187 unsigned bitset_uint32_count; 188 uint32_t *clean_bits; 189 uint32_t *sync_bits; 190 uint32_t *recovering_bits; /* FIXME: this seems excessive */ 191 192 int sync_search; 193 194 /* Resync flag */ 195 enum sync { 196 DEFAULTSYNC, /* Synchronize if necessary */ 197 NOSYNC, /* Devices known to be already in sync */ 198 FORCESYNC, /* Force a sync to happen */ 199 } sync; 200 201 struct dm_io_request io_req; 202 203 /* 204 * Disk log fields 205 */ 206 int log_dev_failed; 207 struct dm_dev *log_dev; 208 struct log_header header; 209 210 struct io_region header_location; 211 struct log_header *disk_header; 212 }; 213 214 /* 215 * The touched member needs to be updated every time we access 216 * one of the bitsets. 217 */ 218 static inline int log_test_bit(uint32_t *bs, unsigned bit) 219 { 220 return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; 221 } 222 223 static inline void log_set_bit(struct log_c *l, 224 uint32_t *bs, unsigned bit) 225 { 226 ext2_set_bit(bit, (unsigned long *) bs); 227 l->touched = 1; 228 } 229 230 static inline void log_clear_bit(struct log_c *l, 231 uint32_t *bs, unsigned bit) 232 { 233 ext2_clear_bit(bit, (unsigned long *) bs); 234 l->touched = 1; 235 } 236 237 /*---------------------------------------------------------------- 238 * Header IO 239 *--------------------------------------------------------------*/ 240 static void header_to_disk(struct log_header *core, struct log_header *disk) 241 { 242 disk->magic = cpu_to_le32(core->magic); 243 disk->version = cpu_to_le32(core->version); 244 disk->nr_regions = cpu_to_le64(core->nr_regions); 245 } 246 247 static void header_from_disk(struct log_header *core, struct log_header *disk) 248 { 249 core->magic = le32_to_cpu(disk->magic); 250 core->version = le32_to_cpu(disk->version); 251 core->nr_regions = le64_to_cpu(disk->nr_regions); 252 } 253 254 static int rw_header(struct log_c *lc, int rw) 255 { 256 lc->io_req.bi_rw = rw; 257 lc->io_req.mem.ptr.vma = lc->disk_header; 258 lc->io_req.notify.fn = NULL; 259 260 return dm_io(&lc->io_req, 1, &lc->header_location, NULL); 261 } 262 263 static int read_header(struct log_c *log) 264 { 265 int r; 266 267 r = rw_header(log, READ); 268 if (r) 269 return r; 270 271 header_from_disk(&log->header, log->disk_header); 272 273 /* New log required? */ 274 if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) { 275 log->header.magic = MIRROR_MAGIC; 276 log->header.version = MIRROR_DISK_VERSION; 277 log->header.nr_regions = 0; 278 } 279 280 #ifdef __LITTLE_ENDIAN 281 if (log->header.version == 1) 282 log->header.version = 2; 283 #endif 284 285 if (log->header.version != MIRROR_DISK_VERSION) { 286 DMWARN("incompatible disk log version"); 287 return -EINVAL; 288 } 289 290 return 0; 291 } 292 293 static inline int write_header(struct log_c *log) 294 { 295 header_to_disk(&log->header, log->disk_header); 296 return rw_header(log, WRITE); 297 } 298 299 /*---------------------------------------------------------------- 300 * core log constructor/destructor 301 * 302 * argv contains region_size followed optionally by [no]sync 303 *--------------------------------------------------------------*/ 304 #define BYTE_SHIFT 3 305 static int create_log_context(struct dirty_log *log, struct dm_target *ti, 306 unsigned int argc, char **argv, 307 struct dm_dev *dev) 308 { 309 enum sync sync = DEFAULTSYNC; 310 311 struct log_c *lc; 312 uint32_t region_size; 313 unsigned int region_count; 314 size_t bitset_size, buf_size; 315 int r; 316 317 if (argc < 1 || argc > 2) { 318 DMWARN("wrong number of arguments to mirror log"); 319 return -EINVAL; 320 } 321 322 if (argc > 1) { 323 if (!strcmp(argv[1], "sync")) 324 sync = FORCESYNC; 325 else if (!strcmp(argv[1], "nosync")) 326 sync = NOSYNC; 327 else { 328 DMWARN("unrecognised sync argument to mirror log: %s", 329 argv[1]); 330 return -EINVAL; 331 } 332 } 333 334 if (sscanf(argv[0], "%u", ®ion_size) != 1) { 335 DMWARN("invalid region size string"); 336 return -EINVAL; 337 } 338 339 region_count = dm_sector_div_up(ti->len, region_size); 340 341 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 342 if (!lc) { 343 DMWARN("couldn't allocate core log"); 344 return -ENOMEM; 345 } 346 347 lc->ti = ti; 348 lc->touched = 0; 349 lc->region_size = region_size; 350 lc->region_count = region_count; 351 lc->sync = sync; 352 353 /* 354 * Work out how many "unsigned long"s we need to hold the bitset. 355 */ 356 bitset_size = dm_round_up(region_count, 357 sizeof(*lc->clean_bits) << BYTE_SHIFT); 358 bitset_size >>= BYTE_SHIFT; 359 360 lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); 361 362 /* 363 * Disk log? 364 */ 365 if (!dev) { 366 lc->clean_bits = vmalloc(bitset_size); 367 if (!lc->clean_bits) { 368 DMWARN("couldn't allocate clean bitset"); 369 kfree(lc); 370 return -ENOMEM; 371 } 372 lc->disk_header = NULL; 373 } else { 374 lc->log_dev = dev; 375 lc->log_dev_failed = 0; 376 lc->header_location.bdev = lc->log_dev->bdev; 377 lc->header_location.sector = 0; 378 379 /* 380 * Buffer holds both header and bitset. 381 */ 382 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 383 bitset_size, ti->limits.hardsect_size); 384 lc->header_location.count = buf_size >> SECTOR_SHIFT; 385 lc->io_req.mem.type = DM_IO_VMA; 386 lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, 387 PAGE_SIZE)); 388 if (IS_ERR(lc->io_req.client)) { 389 r = PTR_ERR(lc->io_req.client); 390 DMWARN("couldn't allocate disk io client"); 391 kfree(lc); 392 return -ENOMEM; 393 } 394 395 lc->disk_header = vmalloc(buf_size); 396 if (!lc->disk_header) { 397 DMWARN("couldn't allocate disk log buffer"); 398 kfree(lc); 399 return -ENOMEM; 400 } 401 402 lc->clean_bits = (void *)lc->disk_header + 403 (LOG_OFFSET << SECTOR_SHIFT); 404 } 405 406 memset(lc->clean_bits, -1, bitset_size); 407 408 lc->sync_bits = vmalloc(bitset_size); 409 if (!lc->sync_bits) { 410 DMWARN("couldn't allocate sync bitset"); 411 if (!dev) 412 vfree(lc->clean_bits); 413 vfree(lc->disk_header); 414 kfree(lc); 415 return -ENOMEM; 416 } 417 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 418 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 419 420 lc->recovering_bits = vmalloc(bitset_size); 421 if (!lc->recovering_bits) { 422 DMWARN("couldn't allocate sync bitset"); 423 vfree(lc->sync_bits); 424 if (!dev) 425 vfree(lc->clean_bits); 426 vfree(lc->disk_header); 427 kfree(lc); 428 return -ENOMEM; 429 } 430 memset(lc->recovering_bits, 0, bitset_size); 431 lc->sync_search = 0; 432 log->context = lc; 433 434 return 0; 435 } 436 437 static int core_ctr(struct dirty_log *log, struct dm_target *ti, 438 unsigned int argc, char **argv) 439 { 440 return create_log_context(log, ti, argc, argv, NULL); 441 } 442 443 static void destroy_log_context(struct log_c *lc) 444 { 445 vfree(lc->sync_bits); 446 vfree(lc->recovering_bits); 447 kfree(lc); 448 } 449 450 static void core_dtr(struct dirty_log *log) 451 { 452 struct log_c *lc = (struct log_c *) log->context; 453 454 vfree(lc->clean_bits); 455 destroy_log_context(lc); 456 } 457 458 /*---------------------------------------------------------------- 459 * disk log constructor/destructor 460 * 461 * argv contains log_device region_size followed optionally by [no]sync 462 *--------------------------------------------------------------*/ 463 static int disk_ctr(struct dirty_log *log, struct dm_target *ti, 464 unsigned int argc, char **argv) 465 { 466 int r; 467 struct dm_dev *dev; 468 469 if (argc < 2 || argc > 3) { 470 DMWARN("wrong number of arguments to disk mirror log"); 471 return -EINVAL; 472 } 473 474 r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, 475 FMODE_READ | FMODE_WRITE, &dev); 476 if (r) 477 return r; 478 479 r = create_log_context(log, ti, argc - 1, argv + 1, dev); 480 if (r) { 481 dm_put_device(ti, dev); 482 return r; 483 } 484 485 return 0; 486 } 487 488 static void disk_dtr(struct dirty_log *log) 489 { 490 struct log_c *lc = (struct log_c *) log->context; 491 492 dm_put_device(lc->ti, lc->log_dev); 493 vfree(lc->disk_header); 494 dm_io_client_destroy(lc->io_req.client); 495 destroy_log_context(lc); 496 } 497 498 static int count_bits32(uint32_t *addr, unsigned size) 499 { 500 int count = 0, i; 501 502 for (i = 0; i < size; i++) { 503 count += hweight32(*(addr+i)); 504 } 505 return count; 506 } 507 508 static void fail_log_device(struct log_c *lc) 509 { 510 if (lc->log_dev_failed) 511 return; 512 513 lc->log_dev_failed = 1; 514 dm_table_event(lc->ti->table); 515 } 516 517 static int disk_resume(struct dirty_log *log) 518 { 519 int r; 520 unsigned i; 521 struct log_c *lc = (struct log_c *) log->context; 522 size_t size = lc->bitset_uint32_count * sizeof(uint32_t); 523 524 /* read the disk header */ 525 r = read_header(lc); 526 if (r) { 527 DMWARN("%s: Failed to read header on mirror log device", 528 lc->log_dev->name); 529 fail_log_device(lc); 530 /* 531 * If the log device cannot be read, we must assume 532 * all regions are out-of-sync. If we simply return 533 * here, the state will be uninitialized and could 534 * lead us to return 'in-sync' status for regions 535 * that are actually 'out-of-sync'. 536 */ 537 lc->header.nr_regions = 0; 538 } 539 540 /* set or clear any new bits -- device has grown */ 541 if (lc->sync == NOSYNC) 542 for (i = lc->header.nr_regions; i < lc->region_count; i++) 543 /* FIXME: amazingly inefficient */ 544 log_set_bit(lc, lc->clean_bits, i); 545 else 546 for (i = lc->header.nr_regions; i < lc->region_count; i++) 547 /* FIXME: amazingly inefficient */ 548 log_clear_bit(lc, lc->clean_bits, i); 549 550 /* clear any old bits -- device has shrunk */ 551 for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) 552 log_clear_bit(lc, lc->clean_bits, i); 553 554 /* copy clean across to sync */ 555 memcpy(lc->sync_bits, lc->clean_bits, size); 556 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 557 lc->sync_search = 0; 558 559 /* set the correct number of regions in the header */ 560 lc->header.nr_regions = lc->region_count; 561 562 /* write the new header */ 563 r = write_header(lc); 564 if (r) { 565 DMWARN("%s: Failed to write header on mirror log device", 566 lc->log_dev->name); 567 fail_log_device(lc); 568 } 569 570 return r; 571 } 572 573 static uint32_t core_get_region_size(struct dirty_log *log) 574 { 575 struct log_c *lc = (struct log_c *) log->context; 576 return lc->region_size; 577 } 578 579 static int core_resume(struct dirty_log *log) 580 { 581 struct log_c *lc = (struct log_c *) log->context; 582 lc->sync_search = 0; 583 return 0; 584 } 585 586 static int core_is_clean(struct dirty_log *log, region_t region) 587 { 588 struct log_c *lc = (struct log_c *) log->context; 589 return log_test_bit(lc->clean_bits, region); 590 } 591 592 static int core_in_sync(struct dirty_log *log, region_t region, int block) 593 { 594 struct log_c *lc = (struct log_c *) log->context; 595 return log_test_bit(lc->sync_bits, region); 596 } 597 598 static int core_flush(struct dirty_log *log) 599 { 600 /* no op */ 601 return 0; 602 } 603 604 static int disk_flush(struct dirty_log *log) 605 { 606 int r; 607 struct log_c *lc = (struct log_c *) log->context; 608 609 /* only write if the log has changed */ 610 if (!lc->touched) 611 return 0; 612 613 r = write_header(lc); 614 if (r) 615 fail_log_device(lc); 616 else 617 lc->touched = 0; 618 619 return r; 620 } 621 622 static void core_mark_region(struct dirty_log *log, region_t region) 623 { 624 struct log_c *lc = (struct log_c *) log->context; 625 log_clear_bit(lc, lc->clean_bits, region); 626 } 627 628 static void core_clear_region(struct dirty_log *log, region_t region) 629 { 630 struct log_c *lc = (struct log_c *) log->context; 631 log_set_bit(lc, lc->clean_bits, region); 632 } 633 634 static int core_get_resync_work(struct dirty_log *log, region_t *region) 635 { 636 struct log_c *lc = (struct log_c *) log->context; 637 638 if (lc->sync_search >= lc->region_count) 639 return 0; 640 641 do { 642 *region = ext2_find_next_zero_bit( 643 (unsigned long *) lc->sync_bits, 644 lc->region_count, 645 lc->sync_search); 646 lc->sync_search = *region + 1; 647 648 if (*region >= lc->region_count) 649 return 0; 650 651 } while (log_test_bit(lc->recovering_bits, *region)); 652 653 log_set_bit(lc, lc->recovering_bits, *region); 654 return 1; 655 } 656 657 static void core_set_region_sync(struct dirty_log *log, region_t region, 658 int in_sync) 659 { 660 struct log_c *lc = (struct log_c *) log->context; 661 662 log_clear_bit(lc, lc->recovering_bits, region); 663 if (in_sync) { 664 log_set_bit(lc, lc->sync_bits, region); 665 lc->sync_count++; 666 } else if (log_test_bit(lc->sync_bits, region)) { 667 lc->sync_count--; 668 log_clear_bit(lc, lc->sync_bits, region); 669 } 670 } 671 672 static region_t core_get_sync_count(struct dirty_log *log) 673 { 674 struct log_c *lc = (struct log_c *) log->context; 675 676 return lc->sync_count; 677 } 678 679 #define DMEMIT_SYNC \ 680 if (lc->sync != DEFAULTSYNC) \ 681 DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") 682 683 static int core_status(struct dirty_log *log, status_type_t status, 684 char *result, unsigned int maxlen) 685 { 686 int sz = 0; 687 struct log_c *lc = log->context; 688 689 switch(status) { 690 case STATUSTYPE_INFO: 691 DMEMIT("1 %s", log->type->name); 692 break; 693 694 case STATUSTYPE_TABLE: 695 DMEMIT("%s %u %u ", log->type->name, 696 lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); 697 DMEMIT_SYNC; 698 } 699 700 return sz; 701 } 702 703 static int disk_status(struct dirty_log *log, status_type_t status, 704 char *result, unsigned int maxlen) 705 { 706 int sz = 0; 707 struct log_c *lc = log->context; 708 709 switch(status) { 710 case STATUSTYPE_INFO: 711 DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name, 712 lc->log_dev_failed ? 'D' : 'A'); 713 break; 714 715 case STATUSTYPE_TABLE: 716 DMEMIT("%s %u %s %u ", log->type->name, 717 lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, 718 lc->region_size); 719 DMEMIT_SYNC; 720 } 721 722 return sz; 723 } 724 725 static struct dirty_log_type _core_type = { 726 .name = "core", 727 .module = THIS_MODULE, 728 .ctr = core_ctr, 729 .dtr = core_dtr, 730 .resume = core_resume, 731 .get_region_size = core_get_region_size, 732 .is_clean = core_is_clean, 733 .in_sync = core_in_sync, 734 .flush = core_flush, 735 .mark_region = core_mark_region, 736 .clear_region = core_clear_region, 737 .get_resync_work = core_get_resync_work, 738 .set_region_sync = core_set_region_sync, 739 .get_sync_count = core_get_sync_count, 740 .status = core_status, 741 }; 742 743 static struct dirty_log_type _disk_type = { 744 .name = "disk", 745 .module = THIS_MODULE, 746 .ctr = disk_ctr, 747 .dtr = disk_dtr, 748 .postsuspend = disk_flush, 749 .resume = disk_resume, 750 .get_region_size = core_get_region_size, 751 .is_clean = core_is_clean, 752 .in_sync = core_in_sync, 753 .flush = disk_flush, 754 .mark_region = core_mark_region, 755 .clear_region = core_clear_region, 756 .get_resync_work = core_get_resync_work, 757 .set_region_sync = core_set_region_sync, 758 .get_sync_count = core_get_sync_count, 759 .status = disk_status, 760 }; 761 762 int __init dm_dirty_log_init(void) 763 { 764 int r; 765 766 r = dm_register_dirty_log_type(&_core_type); 767 if (r) 768 DMWARN("couldn't register core log"); 769 770 r = dm_register_dirty_log_type(&_disk_type); 771 if (r) { 772 DMWARN("couldn't register disk type"); 773 dm_unregister_dirty_log_type(&_core_type); 774 } 775 776 return r; 777 } 778 779 void dm_dirty_log_exit(void) 780 { 781 dm_unregister_dirty_log_type(&_disk_type); 782 dm_unregister_dirty_log_type(&_core_type); 783 } 784 785 EXPORT_SYMBOL(dm_register_dirty_log_type); 786 EXPORT_SYMBOL(dm_unregister_dirty_log_type); 787 EXPORT_SYMBOL(dm_create_dirty_log); 788 EXPORT_SYMBOL(dm_destroy_dirty_log); 789