1 /* 2 * Copyright (C) 2003 Sistina Software 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/init.h> 8 #include <linux/slab.h> 9 #include <linux/module.h> 10 #include <linux/vmalloc.h> 11 12 #include "dm-log.h" 13 #include "dm-io.h" 14 15 #define DM_MSG_PREFIX "mirror log" 16 17 static LIST_HEAD(_log_types); 18 static DEFINE_SPINLOCK(_lock); 19 20 int dm_register_dirty_log_type(struct dirty_log_type *type) 21 { 22 spin_lock(&_lock); 23 type->use_count = 0; 24 list_add(&type->list, &_log_types); 25 spin_unlock(&_lock); 26 27 return 0; 28 } 29 30 int dm_unregister_dirty_log_type(struct dirty_log_type *type) 31 { 32 spin_lock(&_lock); 33 34 if (type->use_count) 35 DMWARN("Attempt to unregister a log type that is still in use"); 36 else 37 list_del(&type->list); 38 39 spin_unlock(&_lock); 40 41 return 0; 42 } 43 44 static struct dirty_log_type *get_type(const char *type_name) 45 { 46 struct dirty_log_type *type; 47 48 spin_lock(&_lock); 49 list_for_each_entry (type, &_log_types, list) 50 if (!strcmp(type_name, type->name)) { 51 if (!type->use_count && !try_module_get(type->module)){ 52 spin_unlock(&_lock); 53 return NULL; 54 } 55 type->use_count++; 56 spin_unlock(&_lock); 57 return type; 58 } 59 60 spin_unlock(&_lock); 61 return NULL; 62 } 63 64 static void put_type(struct dirty_log_type *type) 65 { 66 spin_lock(&_lock); 67 if (!--type->use_count) 68 module_put(type->module); 69 spin_unlock(&_lock); 70 } 71 72 struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, 73 unsigned int argc, char **argv) 74 { 75 struct dirty_log_type *type; 76 struct dirty_log *log; 77 78 log = kmalloc(sizeof(*log), GFP_KERNEL); 79 if (!log) 80 return NULL; 81 82 type = get_type(type_name); 83 if (!type) { 84 kfree(log); 85 return NULL; 86 } 87 88 log->type = type; 89 if (type->ctr(log, ti, argc, argv)) { 90 kfree(log); 91 put_type(type); 92 return NULL; 93 } 94 95 return log; 96 } 97 98 void dm_destroy_dirty_log(struct dirty_log *log) 99 { 100 log->type->dtr(log); 101 put_type(log->type); 102 kfree(log); 103 } 104 105 /*----------------------------------------------------------------- 106 * Persistent and core logs share a lot of their implementation. 107 * FIXME: need a reload method to be called from a resume 108 *---------------------------------------------------------------*/ 109 /* 110 * Magic for persistent mirrors: "MiRr" 111 */ 112 #define MIRROR_MAGIC 0x4D695272 113 114 /* 115 * The on-disk version of the metadata. 116 */ 117 #define MIRROR_DISK_VERSION 2 118 #define LOG_OFFSET 2 119 120 struct log_header { 121 uint32_t magic; 122 123 /* 124 * Simple, incrementing version. no backward 125 * compatibility. 126 */ 127 uint32_t version; 128 sector_t nr_regions; 129 }; 130 131 struct log_c { 132 struct dm_target *ti; 133 int touched; 134 uint32_t region_size; 135 unsigned int region_count; 136 region_t sync_count; 137 138 unsigned bitset_uint32_count; 139 uint32_t *clean_bits; 140 uint32_t *sync_bits; 141 uint32_t *recovering_bits; /* FIXME: this seems excessive */ 142 143 int sync_search; 144 145 /* Resync flag */ 146 enum sync { 147 DEFAULTSYNC, /* Synchronize if necessary */ 148 NOSYNC, /* Devices known to be already in sync */ 149 FORCESYNC, /* Force a sync to happen */ 150 } sync; 151 152 struct dm_io_request io_req; 153 154 /* 155 * Disk log fields 156 */ 157 int log_dev_failed; 158 struct dm_dev *log_dev; 159 struct log_header header; 160 161 struct io_region header_location; 162 struct log_header *disk_header; 163 }; 164 165 /* 166 * The touched member needs to be updated every time we access 167 * one of the bitsets. 168 */ 169 static inline int log_test_bit(uint32_t *bs, unsigned bit) 170 { 171 return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; 172 } 173 174 static inline void log_set_bit(struct log_c *l, 175 uint32_t *bs, unsigned bit) 176 { 177 ext2_set_bit(bit, (unsigned long *) bs); 178 l->touched = 1; 179 } 180 181 static inline void log_clear_bit(struct log_c *l, 182 uint32_t *bs, unsigned bit) 183 { 184 ext2_clear_bit(bit, (unsigned long *) bs); 185 l->touched = 1; 186 } 187 188 /*---------------------------------------------------------------- 189 * Header IO 190 *--------------------------------------------------------------*/ 191 static void header_to_disk(struct log_header *core, struct log_header *disk) 192 { 193 disk->magic = cpu_to_le32(core->magic); 194 disk->version = cpu_to_le32(core->version); 195 disk->nr_regions = cpu_to_le64(core->nr_regions); 196 } 197 198 static void header_from_disk(struct log_header *core, struct log_header *disk) 199 { 200 core->magic = le32_to_cpu(disk->magic); 201 core->version = le32_to_cpu(disk->version); 202 core->nr_regions = le64_to_cpu(disk->nr_regions); 203 } 204 205 static int rw_header(struct log_c *lc, int rw) 206 { 207 lc->io_req.bi_rw = rw; 208 lc->io_req.mem.ptr.vma = lc->disk_header; 209 lc->io_req.notify.fn = NULL; 210 211 return dm_io(&lc->io_req, 1, &lc->header_location, NULL); 212 } 213 214 static int read_header(struct log_c *log) 215 { 216 int r; 217 218 r = rw_header(log, READ); 219 if (r) 220 return r; 221 222 header_from_disk(&log->header, log->disk_header); 223 224 /* New log required? */ 225 if (log->sync != DEFAULTSYNC || log->header.magic != MIRROR_MAGIC) { 226 log->header.magic = MIRROR_MAGIC; 227 log->header.version = MIRROR_DISK_VERSION; 228 log->header.nr_regions = 0; 229 } 230 231 #ifdef __LITTLE_ENDIAN 232 if (log->header.version == 1) 233 log->header.version = 2; 234 #endif 235 236 if (log->header.version != MIRROR_DISK_VERSION) { 237 DMWARN("incompatible disk log version"); 238 return -EINVAL; 239 } 240 241 return 0; 242 } 243 244 static inline int write_header(struct log_c *log) 245 { 246 header_to_disk(&log->header, log->disk_header); 247 return rw_header(log, WRITE); 248 } 249 250 /*---------------------------------------------------------------- 251 * core log constructor/destructor 252 * 253 * argv contains region_size followed optionally by [no]sync 254 *--------------------------------------------------------------*/ 255 #define BYTE_SHIFT 3 256 static int create_log_context(struct dirty_log *log, struct dm_target *ti, 257 unsigned int argc, char **argv, 258 struct dm_dev *dev) 259 { 260 enum sync sync = DEFAULTSYNC; 261 262 struct log_c *lc; 263 uint32_t region_size; 264 unsigned int region_count; 265 size_t bitset_size, buf_size; 266 int r; 267 268 if (argc < 1 || argc > 2) { 269 DMWARN("wrong number of arguments to mirror log"); 270 return -EINVAL; 271 } 272 273 if (argc > 1) { 274 if (!strcmp(argv[1], "sync")) 275 sync = FORCESYNC; 276 else if (!strcmp(argv[1], "nosync")) 277 sync = NOSYNC; 278 else { 279 DMWARN("unrecognised sync argument to mirror log: %s", 280 argv[1]); 281 return -EINVAL; 282 } 283 } 284 285 if (sscanf(argv[0], "%u", ®ion_size) != 1) { 286 DMWARN("invalid region size string"); 287 return -EINVAL; 288 } 289 290 region_count = dm_sector_div_up(ti->len, region_size); 291 292 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 293 if (!lc) { 294 DMWARN("couldn't allocate core log"); 295 return -ENOMEM; 296 } 297 298 lc->ti = ti; 299 lc->touched = 0; 300 lc->region_size = region_size; 301 lc->region_count = region_count; 302 lc->sync = sync; 303 304 /* 305 * Work out how many "unsigned long"s we need to hold the bitset. 306 */ 307 bitset_size = dm_round_up(region_count, 308 sizeof(*lc->clean_bits) << BYTE_SHIFT); 309 bitset_size >>= BYTE_SHIFT; 310 311 lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits); 312 313 /* 314 * Disk log? 315 */ 316 if (!dev) { 317 lc->clean_bits = vmalloc(bitset_size); 318 if (!lc->clean_bits) { 319 DMWARN("couldn't allocate clean bitset"); 320 kfree(lc); 321 return -ENOMEM; 322 } 323 lc->disk_header = NULL; 324 } else { 325 lc->log_dev = dev; 326 lc->log_dev_failed = 0; 327 lc->header_location.bdev = lc->log_dev->bdev; 328 lc->header_location.sector = 0; 329 330 /* 331 * Buffer holds both header and bitset. 332 */ 333 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 334 bitset_size, ti->limits.hardsect_size); 335 lc->header_location.count = buf_size >> SECTOR_SHIFT; 336 lc->io_req.mem.type = DM_IO_VMA; 337 lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, 338 PAGE_SIZE)); 339 if (IS_ERR(lc->io_req.client)) { 340 r = PTR_ERR(lc->io_req.client); 341 DMWARN("couldn't allocate disk io client"); 342 kfree(lc); 343 return -ENOMEM; 344 } 345 346 lc->disk_header = vmalloc(buf_size); 347 if (!lc->disk_header) { 348 DMWARN("couldn't allocate disk log buffer"); 349 kfree(lc); 350 return -ENOMEM; 351 } 352 353 lc->clean_bits = (void *)lc->disk_header + 354 (LOG_OFFSET << SECTOR_SHIFT); 355 } 356 357 memset(lc->clean_bits, -1, bitset_size); 358 359 lc->sync_bits = vmalloc(bitset_size); 360 if (!lc->sync_bits) { 361 DMWARN("couldn't allocate sync bitset"); 362 if (!dev) 363 vfree(lc->clean_bits); 364 vfree(lc->disk_header); 365 kfree(lc); 366 return -ENOMEM; 367 } 368 memset(lc->sync_bits, (sync == NOSYNC) ? -1 : 0, bitset_size); 369 lc->sync_count = (sync == NOSYNC) ? region_count : 0; 370 371 lc->recovering_bits = vmalloc(bitset_size); 372 if (!lc->recovering_bits) { 373 DMWARN("couldn't allocate sync bitset"); 374 vfree(lc->sync_bits); 375 if (!dev) 376 vfree(lc->clean_bits); 377 vfree(lc->disk_header); 378 kfree(lc); 379 return -ENOMEM; 380 } 381 memset(lc->recovering_bits, 0, bitset_size); 382 lc->sync_search = 0; 383 log->context = lc; 384 385 return 0; 386 } 387 388 static int core_ctr(struct dirty_log *log, struct dm_target *ti, 389 unsigned int argc, char **argv) 390 { 391 return create_log_context(log, ti, argc, argv, NULL); 392 } 393 394 static void destroy_log_context(struct log_c *lc) 395 { 396 vfree(lc->sync_bits); 397 vfree(lc->recovering_bits); 398 kfree(lc); 399 } 400 401 static void core_dtr(struct dirty_log *log) 402 { 403 struct log_c *lc = (struct log_c *) log->context; 404 405 vfree(lc->clean_bits); 406 destroy_log_context(lc); 407 } 408 409 /*---------------------------------------------------------------- 410 * disk log constructor/destructor 411 * 412 * argv contains log_device region_size followed optionally by [no]sync 413 *--------------------------------------------------------------*/ 414 static int disk_ctr(struct dirty_log *log, struct dm_target *ti, 415 unsigned int argc, char **argv) 416 { 417 int r; 418 struct dm_dev *dev; 419 420 if (argc < 2 || argc > 3) { 421 DMWARN("wrong number of arguments to disk mirror log"); 422 return -EINVAL; 423 } 424 425 r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, 426 FMODE_READ | FMODE_WRITE, &dev); 427 if (r) 428 return r; 429 430 r = create_log_context(log, ti, argc - 1, argv + 1, dev); 431 if (r) { 432 dm_put_device(ti, dev); 433 return r; 434 } 435 436 return 0; 437 } 438 439 static void disk_dtr(struct dirty_log *log) 440 { 441 struct log_c *lc = (struct log_c *) log->context; 442 443 dm_put_device(lc->ti, lc->log_dev); 444 vfree(lc->disk_header); 445 dm_io_client_destroy(lc->io_req.client); 446 destroy_log_context(lc); 447 } 448 449 static int count_bits32(uint32_t *addr, unsigned size) 450 { 451 int count = 0, i; 452 453 for (i = 0; i < size; i++) { 454 count += hweight32(*(addr+i)); 455 } 456 return count; 457 } 458 459 static void fail_log_device(struct log_c *lc) 460 { 461 if (lc->log_dev_failed) 462 return; 463 464 lc->log_dev_failed = 1; 465 dm_table_event(lc->ti->table); 466 } 467 468 static int disk_resume(struct dirty_log *log) 469 { 470 int r; 471 unsigned i; 472 struct log_c *lc = (struct log_c *) log->context; 473 size_t size = lc->bitset_uint32_count * sizeof(uint32_t); 474 475 /* read the disk header */ 476 r = read_header(lc); 477 if (r) { 478 DMWARN("%s: Failed to read header on mirror log device", 479 lc->log_dev->name); 480 fail_log_device(lc); 481 /* 482 * If the log device cannot be read, we must assume 483 * all regions are out-of-sync. If we simply return 484 * here, the state will be uninitialized and could 485 * lead us to return 'in-sync' status for regions 486 * that are actually 'out-of-sync'. 487 */ 488 lc->header.nr_regions = 0; 489 } 490 491 /* set or clear any new bits -- device has grown */ 492 if (lc->sync == NOSYNC) 493 for (i = lc->header.nr_regions; i < lc->region_count; i++) 494 /* FIXME: amazingly inefficient */ 495 log_set_bit(lc, lc->clean_bits, i); 496 else 497 for (i = lc->header.nr_regions; i < lc->region_count; i++) 498 /* FIXME: amazingly inefficient */ 499 log_clear_bit(lc, lc->clean_bits, i); 500 501 /* clear any old bits -- device has shrunk */ 502 for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++) 503 log_clear_bit(lc, lc->clean_bits, i); 504 505 /* copy clean across to sync */ 506 memcpy(lc->sync_bits, lc->clean_bits, size); 507 lc->sync_count = count_bits32(lc->clean_bits, lc->bitset_uint32_count); 508 lc->sync_search = 0; 509 510 /* set the correct number of regions in the header */ 511 lc->header.nr_regions = lc->region_count; 512 513 /* write the new header */ 514 r = write_header(lc); 515 if (r) { 516 DMWARN("%s: Failed to write header on mirror log device", 517 lc->log_dev->name); 518 fail_log_device(lc); 519 } 520 521 return r; 522 } 523 524 static uint32_t core_get_region_size(struct dirty_log *log) 525 { 526 struct log_c *lc = (struct log_c *) log->context; 527 return lc->region_size; 528 } 529 530 static int core_resume(struct dirty_log *log) 531 { 532 struct log_c *lc = (struct log_c *) log->context; 533 lc->sync_search = 0; 534 return 0; 535 } 536 537 static int core_is_clean(struct dirty_log *log, region_t region) 538 { 539 struct log_c *lc = (struct log_c *) log->context; 540 return log_test_bit(lc->clean_bits, region); 541 } 542 543 static int core_in_sync(struct dirty_log *log, region_t region, int block) 544 { 545 struct log_c *lc = (struct log_c *) log->context; 546 return log_test_bit(lc->sync_bits, region); 547 } 548 549 static int core_flush(struct dirty_log *log) 550 { 551 /* no op */ 552 return 0; 553 } 554 555 static int disk_flush(struct dirty_log *log) 556 { 557 int r; 558 struct log_c *lc = (struct log_c *) log->context; 559 560 /* only write if the log has changed */ 561 if (!lc->touched) 562 return 0; 563 564 r = write_header(lc); 565 if (r) 566 fail_log_device(lc); 567 else 568 lc->touched = 0; 569 570 return r; 571 } 572 573 static void core_mark_region(struct dirty_log *log, region_t region) 574 { 575 struct log_c *lc = (struct log_c *) log->context; 576 log_clear_bit(lc, lc->clean_bits, region); 577 } 578 579 static void core_clear_region(struct dirty_log *log, region_t region) 580 { 581 struct log_c *lc = (struct log_c *) log->context; 582 log_set_bit(lc, lc->clean_bits, region); 583 } 584 585 static int core_get_resync_work(struct dirty_log *log, region_t *region) 586 { 587 struct log_c *lc = (struct log_c *) log->context; 588 589 if (lc->sync_search >= lc->region_count) 590 return 0; 591 592 do { 593 *region = ext2_find_next_zero_bit( 594 (unsigned long *) lc->sync_bits, 595 lc->region_count, 596 lc->sync_search); 597 lc->sync_search = *region + 1; 598 599 if (*region >= lc->region_count) 600 return 0; 601 602 } while (log_test_bit(lc->recovering_bits, *region)); 603 604 log_set_bit(lc, lc->recovering_bits, *region); 605 return 1; 606 } 607 608 static void core_set_region_sync(struct dirty_log *log, region_t region, 609 int in_sync) 610 { 611 struct log_c *lc = (struct log_c *) log->context; 612 613 log_clear_bit(lc, lc->recovering_bits, region); 614 if (in_sync) { 615 log_set_bit(lc, lc->sync_bits, region); 616 lc->sync_count++; 617 } else if (log_test_bit(lc->sync_bits, region)) { 618 lc->sync_count--; 619 log_clear_bit(lc, lc->sync_bits, region); 620 } 621 } 622 623 static region_t core_get_sync_count(struct dirty_log *log) 624 { 625 struct log_c *lc = (struct log_c *) log->context; 626 627 return lc->sync_count; 628 } 629 630 #define DMEMIT_SYNC \ 631 if (lc->sync != DEFAULTSYNC) \ 632 DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") 633 634 static int core_status(struct dirty_log *log, status_type_t status, 635 char *result, unsigned int maxlen) 636 { 637 int sz = 0; 638 struct log_c *lc = log->context; 639 640 switch(status) { 641 case STATUSTYPE_INFO: 642 DMEMIT("1 %s", log->type->name); 643 break; 644 645 case STATUSTYPE_TABLE: 646 DMEMIT("%s %u %u ", log->type->name, 647 lc->sync == DEFAULTSYNC ? 1 : 2, lc->region_size); 648 DMEMIT_SYNC; 649 } 650 651 return sz; 652 } 653 654 static int disk_status(struct dirty_log *log, status_type_t status, 655 char *result, unsigned int maxlen) 656 { 657 int sz = 0; 658 struct log_c *lc = log->context; 659 660 switch(status) { 661 case STATUSTYPE_INFO: 662 DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name, 663 lc->log_dev_failed ? 'D' : 'A'); 664 break; 665 666 case STATUSTYPE_TABLE: 667 DMEMIT("%s %u %s %u ", log->type->name, 668 lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name, 669 lc->region_size); 670 DMEMIT_SYNC; 671 } 672 673 return sz; 674 } 675 676 static struct dirty_log_type _core_type = { 677 .name = "core", 678 .module = THIS_MODULE, 679 .ctr = core_ctr, 680 .dtr = core_dtr, 681 .resume = core_resume, 682 .get_region_size = core_get_region_size, 683 .is_clean = core_is_clean, 684 .in_sync = core_in_sync, 685 .flush = core_flush, 686 .mark_region = core_mark_region, 687 .clear_region = core_clear_region, 688 .get_resync_work = core_get_resync_work, 689 .set_region_sync = core_set_region_sync, 690 .get_sync_count = core_get_sync_count, 691 .status = core_status, 692 }; 693 694 static struct dirty_log_type _disk_type = { 695 .name = "disk", 696 .module = THIS_MODULE, 697 .ctr = disk_ctr, 698 .dtr = disk_dtr, 699 .suspend = disk_flush, 700 .resume = disk_resume, 701 .get_region_size = core_get_region_size, 702 .is_clean = core_is_clean, 703 .in_sync = core_in_sync, 704 .flush = disk_flush, 705 .mark_region = core_mark_region, 706 .clear_region = core_clear_region, 707 .get_resync_work = core_get_resync_work, 708 .set_region_sync = core_set_region_sync, 709 .get_sync_count = core_get_sync_count, 710 .status = disk_status, 711 }; 712 713 int __init dm_dirty_log_init(void) 714 { 715 int r; 716 717 r = dm_register_dirty_log_type(&_core_type); 718 if (r) 719 DMWARN("couldn't register core log"); 720 721 r = dm_register_dirty_log_type(&_disk_type); 722 if (r) { 723 DMWARN("couldn't register disk type"); 724 dm_unregister_dirty_log_type(&_core_type); 725 } 726 727 return r; 728 } 729 730 void dm_dirty_log_exit(void) 731 { 732 dm_unregister_dirty_log_type(&_disk_type); 733 dm_unregister_dirty_log_type(&_core_type); 734 } 735 736 EXPORT_SYMBOL(dm_register_dirty_log_type); 737 EXPORT_SYMBOL(dm_unregister_dirty_log_type); 738 EXPORT_SYMBOL(dm_create_dirty_log); 739 EXPORT_SYMBOL(dm_destroy_dirty_log); 740