1 /* 2 * Copyright (C) 2006-2009 Red Hat, Inc. 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/bio.h> 8 #include <linux/slab.h> 9 #include <linux/dm-dirty-log.h> 10 #include <linux/device-mapper.h> 11 #include <linux/dm-log-userspace.h> 12 13 #include "dm-log-userspace-transfer.h" 14 15 struct flush_entry { 16 int type; 17 region_t region; 18 struct list_head list; 19 }; 20 21 struct log_c { 22 struct dm_target *ti; 23 uint32_t region_size; 24 region_t region_count; 25 uint64_t luid; 26 char uuid[DM_UUID_LEN]; 27 28 char *usr_argv_str; 29 uint32_t usr_argc; 30 31 /* 32 * in_sync_hint gets set when doing is_remote_recovering. It 33 * represents the first region that needs recovery. IOW, the 34 * first zero bit of sync_bits. This can be useful for to limit 35 * traffic for calls like is_remote_recovering and get_resync_work, 36 * but be take care in its use for anything else. 37 */ 38 uint64_t in_sync_hint; 39 40 spinlock_t flush_lock; 41 struct list_head flush_list; /* only for clear and mark requests */ 42 }; 43 44 static mempool_t *flush_entry_pool; 45 46 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) 47 { 48 return kmalloc(sizeof(struct flush_entry), gfp_mask); 49 } 50 51 static void flush_entry_free(void *element, void *pool_data) 52 { 53 kfree(element); 54 } 55 56 static int userspace_do_request(struct log_c *lc, const char *uuid, 57 int request_type, char *data, size_t data_size, 58 char *rdata, size_t *rdata_size) 59 { 60 int r; 61 62 /* 63 * If the server isn't there, -ESRCH is returned, 64 * and we must keep trying until the server is 65 * restored. 66 */ 67 retry: 68 r = dm_consult_userspace(uuid, lc->luid, request_type, data, 69 data_size, rdata, rdata_size); 70 71 if (r != -ESRCH) 72 return r; 73 74 DMERR(" Userspace log server not found."); 75 while (1) { 76 set_current_state(TASK_INTERRUPTIBLE); 77 schedule_timeout(2*HZ); 78 DMWARN("Attempting to contact userspace log server..."); 79 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, 80 lc->usr_argv_str, 81 strlen(lc->usr_argv_str) + 1, 82 NULL, NULL); 83 if (!r) 84 break; 85 } 86 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); 87 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, 88 0, NULL, NULL); 89 if (!r) 90 goto retry; 91 92 DMERR("Error trying to resume userspace log: %d", r); 93 94 return -ESRCH; 95 } 96 97 static int build_constructor_string(struct dm_target *ti, 98 unsigned argc, char **argv, 99 char **ctr_str) 100 { 101 int i, str_size; 102 char *str = NULL; 103 104 *ctr_str = NULL; 105 106 for (i = 0, str_size = 0; i < argc; i++) 107 str_size += strlen(argv[i]) + 1; /* +1 for space between args */ 108 109 str_size += 20; /* Max number of chars in a printed u64 number */ 110 111 str = kzalloc(str_size, GFP_KERNEL); 112 if (!str) { 113 DMWARN("Unable to allocate memory for constructor string"); 114 return -ENOMEM; 115 } 116 117 str_size = sprintf(str, "%llu", (unsigned long long)ti->len); 118 for (i = 0; i < argc; i++) 119 str_size += sprintf(str + str_size, " %s", argv[i]); 120 121 *ctr_str = str; 122 return str_size; 123 } 124 125 /* 126 * userspace_ctr 127 * 128 * argv contains: 129 * <UUID> <other args> 130 * Where 'other args' is the userspace implementation specific log 131 * arguments. An example might be: 132 * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] 133 * 134 * So, this module will strip off the <UUID> for identification purposes 135 * when communicating with userspace about a log; but will pass on everything 136 * else. 137 */ 138 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, 139 unsigned argc, char **argv) 140 { 141 int r = 0; 142 int str_size; 143 char *ctr_str = NULL; 144 struct log_c *lc = NULL; 145 uint64_t rdata; 146 size_t rdata_size = sizeof(rdata); 147 148 if (argc < 3) { 149 DMWARN("Too few arguments to userspace dirty log"); 150 return -EINVAL; 151 } 152 153 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 154 if (!lc) { 155 DMWARN("Unable to allocate userspace log context."); 156 return -ENOMEM; 157 } 158 159 /* The ptr value is sufficient for local unique id */ 160 lc->luid = (unsigned long)lc; 161 162 lc->ti = ti; 163 164 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { 165 DMWARN("UUID argument too long."); 166 kfree(lc); 167 return -EINVAL; 168 } 169 170 strncpy(lc->uuid, argv[0], DM_UUID_LEN); 171 spin_lock_init(&lc->flush_lock); 172 INIT_LIST_HEAD(&lc->flush_list); 173 174 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); 175 if (str_size < 0) { 176 kfree(lc); 177 return str_size; 178 } 179 180 /* Send table string */ 181 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, 182 ctr_str, str_size, NULL, NULL); 183 184 if (r < 0) { 185 if (r == -ESRCH) 186 DMERR("Userspace log server not found"); 187 else 188 DMERR("Userspace log server failed to create log"); 189 goto out; 190 } 191 192 /* Since the region size does not change, get it now */ 193 rdata_size = sizeof(rdata); 194 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, 195 NULL, 0, (char *)&rdata, &rdata_size); 196 197 if (r) { 198 DMERR("Failed to get region size of dirty log"); 199 goto out; 200 } 201 202 lc->region_size = (uint32_t)rdata; 203 lc->region_count = dm_sector_div_up(ti->len, lc->region_size); 204 205 out: 206 if (r) { 207 kfree(lc); 208 kfree(ctr_str); 209 } else { 210 lc->usr_argv_str = ctr_str; 211 lc->usr_argc = argc; 212 log->context = lc; 213 } 214 215 return r; 216 } 217 218 static void userspace_dtr(struct dm_dirty_log *log) 219 { 220 struct log_c *lc = log->context; 221 222 (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, 223 NULL, 0, 224 NULL, NULL); 225 226 kfree(lc->usr_argv_str); 227 kfree(lc); 228 229 return; 230 } 231 232 static int userspace_presuspend(struct dm_dirty_log *log) 233 { 234 int r; 235 struct log_c *lc = log->context; 236 237 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, 238 NULL, 0, 239 NULL, NULL); 240 241 return r; 242 } 243 244 static int userspace_postsuspend(struct dm_dirty_log *log) 245 { 246 int r; 247 struct log_c *lc = log->context; 248 249 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, 250 NULL, 0, 251 NULL, NULL); 252 253 return r; 254 } 255 256 static int userspace_resume(struct dm_dirty_log *log) 257 { 258 int r; 259 struct log_c *lc = log->context; 260 261 lc->in_sync_hint = 0; 262 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, 263 NULL, 0, 264 NULL, NULL); 265 266 return r; 267 } 268 269 static uint32_t userspace_get_region_size(struct dm_dirty_log *log) 270 { 271 struct log_c *lc = log->context; 272 273 return lc->region_size; 274 } 275 276 /* 277 * userspace_is_clean 278 * 279 * Check whether a region is clean. If there is any sort of 280 * failure when consulting the server, we return not clean. 281 * 282 * Returns: 1 if clean, 0 otherwise 283 */ 284 static int userspace_is_clean(struct dm_dirty_log *log, region_t region) 285 { 286 int r; 287 uint64_t region64 = (uint64_t)region; 288 int64_t is_clean; 289 size_t rdata_size; 290 struct log_c *lc = log->context; 291 292 rdata_size = sizeof(is_clean); 293 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, 294 (char *)®ion64, sizeof(region64), 295 (char *)&is_clean, &rdata_size); 296 297 return (r) ? 0 : (int)is_clean; 298 } 299 300 /* 301 * userspace_in_sync 302 * 303 * Check if the region is in-sync. If there is any sort 304 * of failure when consulting the server, we assume that 305 * the region is not in sync. 306 * 307 * If 'can_block' is set, return immediately 308 * 309 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK 310 */ 311 static int userspace_in_sync(struct dm_dirty_log *log, region_t region, 312 int can_block) 313 { 314 int r; 315 uint64_t region64 = region; 316 int64_t in_sync; 317 size_t rdata_size; 318 struct log_c *lc = log->context; 319 320 /* 321 * We can never respond directly - even if in_sync_hint is 322 * set. This is because another machine could see a device 323 * failure and mark the region out-of-sync. If we don't go 324 * to userspace to ask, we might think the region is in-sync 325 * and allow a read to pick up data that is stale. (This is 326 * very unlikely if a device actually fails; but it is very 327 * likely if a connection to one device from one machine fails.) 328 * 329 * There still might be a problem if the mirror caches the region 330 * state as in-sync... but then this call would not be made. So, 331 * that is a mirror problem. 332 */ 333 if (!can_block) 334 return -EWOULDBLOCK; 335 336 rdata_size = sizeof(in_sync); 337 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, 338 (char *)®ion64, sizeof(region64), 339 (char *)&in_sync, &rdata_size); 340 return (r) ? 0 : (int)in_sync; 341 } 342 343 /* 344 * userspace_flush 345 * 346 * This function is ok to block. 347 * The flush happens in two stages. First, it sends all 348 * clear/mark requests that are on the list. Then it 349 * tells the server to commit them. This gives the 350 * server a chance to optimise the commit, instead of 351 * doing it for every request. 352 * 353 * Additionally, we could implement another thread that 354 * sends the requests up to the server - reducing the 355 * load on flush. Then the flush would have less in 356 * the list and be responsible for the finishing commit. 357 * 358 * Returns: 0 on success, < 0 on failure 359 */ 360 static int userspace_flush(struct dm_dirty_log *log) 361 { 362 int r = 0; 363 unsigned long flags; 364 struct log_c *lc = log->context; 365 LIST_HEAD(flush_list); 366 struct flush_entry *fe, *tmp_fe; 367 368 spin_lock_irqsave(&lc->flush_lock, flags); 369 list_splice_init(&lc->flush_list, &flush_list); 370 spin_unlock_irqrestore(&lc->flush_lock, flags); 371 372 if (list_empty(&flush_list)) 373 return 0; 374 375 /* 376 * FIXME: Count up requests, group request types, 377 * allocate memory to stick all requests in and 378 * send to server in one go. Failing the allocation, 379 * do it one by one. 380 */ 381 382 list_for_each_entry(fe, &flush_list, list) { 383 r = userspace_do_request(lc, lc->uuid, fe->type, 384 (char *)&fe->region, 385 sizeof(fe->region), 386 NULL, NULL); 387 if (r) 388 goto fail; 389 } 390 391 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, 392 NULL, 0, NULL, NULL); 393 394 fail: 395 /* 396 * We can safely remove these entries, even if failure. 397 * Calling code will receive an error and will know that 398 * the log facility has failed. 399 */ 400 list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { 401 list_del(&fe->list); 402 mempool_free(fe, flush_entry_pool); 403 } 404 405 if (r) 406 dm_table_event(lc->ti->table); 407 408 return r; 409 } 410 411 /* 412 * userspace_mark_region 413 * 414 * This function should avoid blocking unless absolutely required. 415 * (Memory allocation is valid for blocking.) 416 */ 417 static void userspace_mark_region(struct dm_dirty_log *log, region_t region) 418 { 419 unsigned long flags; 420 struct log_c *lc = log->context; 421 struct flush_entry *fe; 422 423 /* Wait for an allocation, but _never_ fail */ 424 fe = mempool_alloc(flush_entry_pool, GFP_NOIO); 425 BUG_ON(!fe); 426 427 spin_lock_irqsave(&lc->flush_lock, flags); 428 fe->type = DM_ULOG_MARK_REGION; 429 fe->region = region; 430 list_add(&fe->list, &lc->flush_list); 431 spin_unlock_irqrestore(&lc->flush_lock, flags); 432 433 return; 434 } 435 436 /* 437 * userspace_clear_region 438 * 439 * This function must not block. 440 * So, the alloc can't block. In the worst case, it is ok to 441 * fail. It would simply mean we can't clear the region. 442 * Does nothing to current sync context, but does mean 443 * the region will be re-sync'ed on a reload of the mirror 444 * even though it is in-sync. 445 */ 446 static void userspace_clear_region(struct dm_dirty_log *log, region_t region) 447 { 448 unsigned long flags; 449 struct log_c *lc = log->context; 450 struct flush_entry *fe; 451 452 /* 453 * If we fail to allocate, we skip the clearing of 454 * the region. This doesn't hurt us in any way, except 455 * to cause the region to be resync'ed when the 456 * device is activated next time. 457 */ 458 fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); 459 if (!fe) { 460 DMERR("Failed to allocate memory to clear region."); 461 return; 462 } 463 464 spin_lock_irqsave(&lc->flush_lock, flags); 465 fe->type = DM_ULOG_CLEAR_REGION; 466 fe->region = region; 467 list_add(&fe->list, &lc->flush_list); 468 spin_unlock_irqrestore(&lc->flush_lock, flags); 469 470 return; 471 } 472 473 /* 474 * userspace_get_resync_work 475 * 476 * Get a region that needs recovery. It is valid to return 477 * an error for this function. 478 * 479 * Returns: 1 if region filled, 0 if no work, <0 on error 480 */ 481 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) 482 { 483 int r; 484 size_t rdata_size; 485 struct log_c *lc = log->context; 486 struct { 487 int64_t i; /* 64-bit for mix arch compatibility */ 488 region_t r; 489 } pkg; 490 491 if (lc->in_sync_hint >= lc->region_count) 492 return 0; 493 494 rdata_size = sizeof(pkg); 495 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, 496 NULL, 0, 497 (char *)&pkg, &rdata_size); 498 499 *region = pkg.r; 500 return (r) ? r : (int)pkg.i; 501 } 502 503 /* 504 * userspace_set_region_sync 505 * 506 * Set the sync status of a given region. This function 507 * must not fail. 508 */ 509 static void userspace_set_region_sync(struct dm_dirty_log *log, 510 region_t region, int in_sync) 511 { 512 int r; 513 struct log_c *lc = log->context; 514 struct { 515 region_t r; 516 int64_t i; 517 } pkg; 518 519 pkg.r = region; 520 pkg.i = (int64_t)in_sync; 521 522 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, 523 (char *)&pkg, sizeof(pkg), 524 NULL, NULL); 525 526 /* 527 * It would be nice to be able to report failures. 528 * However, it is easy emough to detect and resolve. 529 */ 530 return; 531 } 532 533 /* 534 * userspace_get_sync_count 535 * 536 * If there is any sort of failure when consulting the server, 537 * we assume that the sync count is zero. 538 * 539 * Returns: sync count on success, 0 on failure 540 */ 541 static region_t userspace_get_sync_count(struct dm_dirty_log *log) 542 { 543 int r; 544 size_t rdata_size; 545 uint64_t sync_count; 546 struct log_c *lc = log->context; 547 548 rdata_size = sizeof(sync_count); 549 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, 550 NULL, 0, 551 (char *)&sync_count, &rdata_size); 552 553 if (r) 554 return 0; 555 556 if (sync_count >= lc->region_count) 557 lc->in_sync_hint = lc->region_count; 558 559 return (region_t)sync_count; 560 } 561 562 /* 563 * userspace_status 564 * 565 * Returns: amount of space consumed 566 */ 567 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, 568 char *result, unsigned maxlen) 569 { 570 int r = 0; 571 char *table_args; 572 size_t sz = (size_t)maxlen; 573 struct log_c *lc = log->context; 574 575 switch (status_type) { 576 case STATUSTYPE_INFO: 577 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, 578 NULL, 0, 579 result, &sz); 580 581 if (r) { 582 sz = 0; 583 DMEMIT("%s 1 COM_FAILURE", log->type->name); 584 } 585 break; 586 case STATUSTYPE_TABLE: 587 sz = 0; 588 table_args = strchr(lc->usr_argv_str, ' '); 589 BUG_ON(!table_args); /* There will always be a ' ' */ 590 table_args++; 591 592 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, 593 lc->uuid, table_args); 594 break; 595 } 596 return (r) ? 0 : (int)sz; 597 } 598 599 /* 600 * userspace_is_remote_recovering 601 * 602 * Returns: 1 if region recovering, 0 otherwise 603 */ 604 static int userspace_is_remote_recovering(struct dm_dirty_log *log, 605 region_t region) 606 { 607 int r; 608 uint64_t region64 = region; 609 struct log_c *lc = log->context; 610 static unsigned long long limit; 611 struct { 612 int64_t is_recovering; 613 uint64_t in_sync_hint; 614 } pkg; 615 size_t rdata_size = sizeof(pkg); 616 617 /* 618 * Once the mirror has been reported to be in-sync, 619 * it will never again ask for recovery work. So, 620 * we can safely say there is not a remote machine 621 * recovering if the device is in-sync. (in_sync_hint 622 * must be reset at resume time.) 623 */ 624 if (region < lc->in_sync_hint) 625 return 0; 626 else if (jiffies < limit) 627 return 1; 628 629 limit = jiffies + (HZ / 4); 630 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, 631 (char *)®ion64, sizeof(region64), 632 (char *)&pkg, &rdata_size); 633 if (r) 634 return 1; 635 636 lc->in_sync_hint = pkg.in_sync_hint; 637 638 return (int)pkg.is_recovering; 639 } 640 641 static struct dm_dirty_log_type _userspace_type = { 642 .name = "userspace", 643 .module = THIS_MODULE, 644 .ctr = userspace_ctr, 645 .dtr = userspace_dtr, 646 .presuspend = userspace_presuspend, 647 .postsuspend = userspace_postsuspend, 648 .resume = userspace_resume, 649 .get_region_size = userspace_get_region_size, 650 .is_clean = userspace_is_clean, 651 .in_sync = userspace_in_sync, 652 .flush = userspace_flush, 653 .mark_region = userspace_mark_region, 654 .clear_region = userspace_clear_region, 655 .get_resync_work = userspace_get_resync_work, 656 .set_region_sync = userspace_set_region_sync, 657 .get_sync_count = userspace_get_sync_count, 658 .status = userspace_status, 659 .is_remote_recovering = userspace_is_remote_recovering, 660 }; 661 662 static int __init userspace_dirty_log_init(void) 663 { 664 int r = 0; 665 666 flush_entry_pool = mempool_create(100, flush_entry_alloc, 667 flush_entry_free, NULL); 668 669 if (!flush_entry_pool) { 670 DMWARN("Unable to create flush_entry_pool: No memory."); 671 return -ENOMEM; 672 } 673 674 r = dm_ulog_tfr_init(); 675 if (r) { 676 DMWARN("Unable to initialize userspace log communications"); 677 mempool_destroy(flush_entry_pool); 678 return r; 679 } 680 681 r = dm_dirty_log_type_register(&_userspace_type); 682 if (r) { 683 DMWARN("Couldn't register userspace dirty log type"); 684 dm_ulog_tfr_exit(); 685 mempool_destroy(flush_entry_pool); 686 return r; 687 } 688 689 DMINFO("version 1.0.0 loaded"); 690 return 0; 691 } 692 693 static void __exit userspace_dirty_log_exit(void) 694 { 695 dm_dirty_log_type_unregister(&_userspace_type); 696 dm_ulog_tfr_exit(); 697 mempool_destroy(flush_entry_pool); 698 699 DMINFO("version 1.0.0 unloaded"); 700 return; 701 } 702 703 module_init(userspace_dirty_log_init); 704 module_exit(userspace_dirty_log_exit); 705 706 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); 707 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); 708 MODULE_LICENSE("GPL"); 709