1 /* 2 * Copyright (C) 2006-2009 Red Hat, Inc. 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/bio.h> 8 #include <linux/dm-dirty-log.h> 9 #include <linux/device-mapper.h> 10 #include <linux/dm-log-userspace.h> 11 12 #include "dm-log-userspace-transfer.h" 13 14 struct flush_entry { 15 int type; 16 region_t region; 17 struct list_head list; 18 }; 19 20 struct log_c { 21 struct dm_target *ti; 22 uint32_t region_size; 23 region_t region_count; 24 uint64_t luid; 25 char uuid[DM_UUID_LEN]; 26 27 char *usr_argv_str; 28 uint32_t usr_argc; 29 30 /* 31 * in_sync_hint gets set when doing is_remote_recovering. It 32 * represents the first region that needs recovery. IOW, the 33 * first zero bit of sync_bits. This can be useful for to limit 34 * traffic for calls like is_remote_recovering and get_resync_work, 35 * but be take care in its use for anything else. 36 */ 37 uint64_t in_sync_hint; 38 39 spinlock_t flush_lock; 40 struct list_head flush_list; /* only for clear and mark requests */ 41 }; 42 43 static mempool_t *flush_entry_pool; 44 45 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) 46 { 47 return kmalloc(sizeof(struct flush_entry), gfp_mask); 48 } 49 50 static void flush_entry_free(void *element, void *pool_data) 51 { 52 kfree(element); 53 } 54 55 static int userspace_do_request(struct log_c *lc, const char *uuid, 56 int request_type, char *data, size_t data_size, 57 char *rdata, size_t *rdata_size) 58 { 59 int r; 60 61 /* 62 * If the server isn't there, -ESRCH is returned, 63 * and we must keep trying until the server is 64 * restored. 65 */ 66 retry: 67 r = dm_consult_userspace(uuid, lc->luid, request_type, data, 68 data_size, rdata, rdata_size); 69 70 if (r != -ESRCH) 71 return r; 72 73 DMERR(" Userspace log server not found."); 74 while (1) { 75 set_current_state(TASK_INTERRUPTIBLE); 76 schedule_timeout(2*HZ); 77 DMWARN("Attempting to contact userspace log server..."); 78 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR, 79 lc->usr_argv_str, 80 strlen(lc->usr_argv_str) + 1, 81 NULL, NULL); 82 if (!r) 83 break; 84 } 85 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); 86 r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL, 87 0, NULL, NULL); 88 if (!r) 89 goto retry; 90 91 DMERR("Error trying to resume userspace log: %d", r); 92 93 return -ESRCH; 94 } 95 96 static int build_constructor_string(struct dm_target *ti, 97 unsigned argc, char **argv, 98 char **ctr_str) 99 { 100 int i, str_size; 101 char *str = NULL; 102 103 *ctr_str = NULL; 104 105 for (i = 0, str_size = 0; i < argc; i++) 106 str_size += strlen(argv[i]) + 1; /* +1 for space between args */ 107 108 str_size += 20; /* Max number of chars in a printed u64 number */ 109 110 str = kzalloc(str_size, GFP_KERNEL); 111 if (!str) { 112 DMWARN("Unable to allocate memory for constructor string"); 113 return -ENOMEM; 114 } 115 116 str_size = sprintf(str, "%llu", (unsigned long long)ti->len); 117 for (i = 0; i < argc; i++) 118 str_size += sprintf(str + str_size, " %s", argv[i]); 119 120 *ctr_str = str; 121 return str_size; 122 } 123 124 /* 125 * userspace_ctr 126 * 127 * argv contains: 128 * <UUID> <other args> 129 * Where 'other args' is the userspace implementation specific log 130 * arguments. An example might be: 131 * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] 132 * 133 * So, this module will strip off the <UUID> for identification purposes 134 * when communicating with userspace about a log; but will pass on everything 135 * else. 136 */ 137 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, 138 unsigned argc, char **argv) 139 { 140 int r = 0; 141 int str_size; 142 char *ctr_str = NULL; 143 struct log_c *lc = NULL; 144 uint64_t rdata; 145 size_t rdata_size = sizeof(rdata); 146 147 if (argc < 3) { 148 DMWARN("Too few arguments to userspace dirty log"); 149 return -EINVAL; 150 } 151 152 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 153 if (!lc) { 154 DMWARN("Unable to allocate userspace log context."); 155 return -ENOMEM; 156 } 157 158 /* The ptr value is sufficient for local unique id */ 159 lc->luid = (unsigned long)lc; 160 161 lc->ti = ti; 162 163 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { 164 DMWARN("UUID argument too long."); 165 kfree(lc); 166 return -EINVAL; 167 } 168 169 strncpy(lc->uuid, argv[0], DM_UUID_LEN); 170 spin_lock_init(&lc->flush_lock); 171 INIT_LIST_HEAD(&lc->flush_list); 172 173 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); 174 if (str_size < 0) { 175 kfree(lc); 176 return str_size; 177 } 178 179 /* Send table string */ 180 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, 181 ctr_str, str_size, NULL, NULL); 182 183 if (r == -ESRCH) { 184 DMERR("Userspace log server not found"); 185 goto out; 186 } 187 188 /* Since the region size does not change, get it now */ 189 rdata_size = sizeof(rdata); 190 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE, 191 NULL, 0, (char *)&rdata, &rdata_size); 192 193 if (r) { 194 DMERR("Failed to get region size of dirty log"); 195 goto out; 196 } 197 198 lc->region_size = (uint32_t)rdata; 199 lc->region_count = dm_sector_div_up(ti->len, lc->region_size); 200 201 out: 202 if (r) { 203 kfree(lc); 204 kfree(ctr_str); 205 } else { 206 lc->usr_argv_str = ctr_str; 207 lc->usr_argc = argc; 208 log->context = lc; 209 } 210 211 return r; 212 } 213 214 static void userspace_dtr(struct dm_dirty_log *log) 215 { 216 int r; 217 struct log_c *lc = log->context; 218 219 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, 220 NULL, 0, 221 NULL, NULL); 222 223 kfree(lc->usr_argv_str); 224 kfree(lc); 225 226 return; 227 } 228 229 static int userspace_presuspend(struct dm_dirty_log *log) 230 { 231 int r; 232 struct log_c *lc = log->context; 233 234 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND, 235 NULL, 0, 236 NULL, NULL); 237 238 return r; 239 } 240 241 static int userspace_postsuspend(struct dm_dirty_log *log) 242 { 243 int r; 244 struct log_c *lc = log->context; 245 246 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND, 247 NULL, 0, 248 NULL, NULL); 249 250 return r; 251 } 252 253 static int userspace_resume(struct dm_dirty_log *log) 254 { 255 int r; 256 struct log_c *lc = log->context; 257 258 lc->in_sync_hint = 0; 259 r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME, 260 NULL, 0, 261 NULL, NULL); 262 263 return r; 264 } 265 266 static uint32_t userspace_get_region_size(struct dm_dirty_log *log) 267 { 268 struct log_c *lc = log->context; 269 270 return lc->region_size; 271 } 272 273 /* 274 * userspace_is_clean 275 * 276 * Check whether a region is clean. If there is any sort of 277 * failure when consulting the server, we return not clean. 278 * 279 * Returns: 1 if clean, 0 otherwise 280 */ 281 static int userspace_is_clean(struct dm_dirty_log *log, region_t region) 282 { 283 int r; 284 uint64_t region64 = (uint64_t)region; 285 int64_t is_clean; 286 size_t rdata_size; 287 struct log_c *lc = log->context; 288 289 rdata_size = sizeof(is_clean); 290 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, 291 (char *)®ion64, sizeof(region64), 292 (char *)&is_clean, &rdata_size); 293 294 return (r) ? 0 : (int)is_clean; 295 } 296 297 /* 298 * userspace_in_sync 299 * 300 * Check if the region is in-sync. If there is any sort 301 * of failure when consulting the server, we assume that 302 * the region is not in sync. 303 * 304 * If 'can_block' is set, return immediately 305 * 306 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK 307 */ 308 static int userspace_in_sync(struct dm_dirty_log *log, region_t region, 309 int can_block) 310 { 311 int r; 312 uint64_t region64 = region; 313 int64_t in_sync; 314 size_t rdata_size; 315 struct log_c *lc = log->context; 316 317 /* 318 * We can never respond directly - even if in_sync_hint is 319 * set. This is because another machine could see a device 320 * failure and mark the region out-of-sync. If we don't go 321 * to userspace to ask, we might think the region is in-sync 322 * and allow a read to pick up data that is stale. (This is 323 * very unlikely if a device actually fails; but it is very 324 * likely if a connection to one device from one machine fails.) 325 * 326 * There still might be a problem if the mirror caches the region 327 * state as in-sync... but then this call would not be made. So, 328 * that is a mirror problem. 329 */ 330 if (!can_block) 331 return -EWOULDBLOCK; 332 333 rdata_size = sizeof(in_sync); 334 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, 335 (char *)®ion64, sizeof(region64), 336 (char *)&in_sync, &rdata_size); 337 return (r) ? 0 : (int)in_sync; 338 } 339 340 /* 341 * userspace_flush 342 * 343 * This function is ok to block. 344 * The flush happens in two stages. First, it sends all 345 * clear/mark requests that are on the list. Then it 346 * tells the server to commit them. This gives the 347 * server a chance to optimise the commit, instead of 348 * doing it for every request. 349 * 350 * Additionally, we could implement another thread that 351 * sends the requests up to the server - reducing the 352 * load on flush. Then the flush would have less in 353 * the list and be responsible for the finishing commit. 354 * 355 * Returns: 0 on success, < 0 on failure 356 */ 357 static int userspace_flush(struct dm_dirty_log *log) 358 { 359 int r = 0; 360 unsigned long flags; 361 struct log_c *lc = log->context; 362 LIST_HEAD(flush_list); 363 struct flush_entry *fe, *tmp_fe; 364 365 spin_lock_irqsave(&lc->flush_lock, flags); 366 list_splice_init(&lc->flush_list, &flush_list); 367 spin_unlock_irqrestore(&lc->flush_lock, flags); 368 369 if (list_empty(&flush_list)) 370 return 0; 371 372 /* 373 * FIXME: Count up requests, group request types, 374 * allocate memory to stick all requests in and 375 * send to server in one go. Failing the allocation, 376 * do it one by one. 377 */ 378 379 list_for_each_entry(fe, &flush_list, list) { 380 r = userspace_do_request(lc, lc->uuid, fe->type, 381 (char *)&fe->region, 382 sizeof(fe->region), 383 NULL, NULL); 384 if (r) 385 goto fail; 386 } 387 388 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, 389 NULL, 0, NULL, NULL); 390 391 fail: 392 /* 393 * We can safely remove these entries, even if failure. 394 * Calling code will receive an error and will know that 395 * the log facility has failed. 396 */ 397 list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { 398 list_del(&fe->list); 399 mempool_free(fe, flush_entry_pool); 400 } 401 402 if (r) 403 dm_table_event(lc->ti->table); 404 405 return r; 406 } 407 408 /* 409 * userspace_mark_region 410 * 411 * This function should avoid blocking unless absolutely required. 412 * (Memory allocation is valid for blocking.) 413 */ 414 static void userspace_mark_region(struct dm_dirty_log *log, region_t region) 415 { 416 unsigned long flags; 417 struct log_c *lc = log->context; 418 struct flush_entry *fe; 419 420 /* Wait for an allocation, but _never_ fail */ 421 fe = mempool_alloc(flush_entry_pool, GFP_NOIO); 422 BUG_ON(!fe); 423 424 spin_lock_irqsave(&lc->flush_lock, flags); 425 fe->type = DM_ULOG_MARK_REGION; 426 fe->region = region; 427 list_add(&fe->list, &lc->flush_list); 428 spin_unlock_irqrestore(&lc->flush_lock, flags); 429 430 return; 431 } 432 433 /* 434 * userspace_clear_region 435 * 436 * This function must not block. 437 * So, the alloc can't block. In the worst case, it is ok to 438 * fail. It would simply mean we can't clear the region. 439 * Does nothing to current sync context, but does mean 440 * the region will be re-sync'ed on a reload of the mirror 441 * even though it is in-sync. 442 */ 443 static void userspace_clear_region(struct dm_dirty_log *log, region_t region) 444 { 445 unsigned long flags; 446 struct log_c *lc = log->context; 447 struct flush_entry *fe; 448 449 /* 450 * If we fail to allocate, we skip the clearing of 451 * the region. This doesn't hurt us in any way, except 452 * to cause the region to be resync'ed when the 453 * device is activated next time. 454 */ 455 fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); 456 if (!fe) { 457 DMERR("Failed to allocate memory to clear region."); 458 return; 459 } 460 461 spin_lock_irqsave(&lc->flush_lock, flags); 462 fe->type = DM_ULOG_CLEAR_REGION; 463 fe->region = region; 464 list_add(&fe->list, &lc->flush_list); 465 spin_unlock_irqrestore(&lc->flush_lock, flags); 466 467 return; 468 } 469 470 /* 471 * userspace_get_resync_work 472 * 473 * Get a region that needs recovery. It is valid to return 474 * an error for this function. 475 * 476 * Returns: 1 if region filled, 0 if no work, <0 on error 477 */ 478 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) 479 { 480 int r; 481 size_t rdata_size; 482 struct log_c *lc = log->context; 483 struct { 484 int64_t i; /* 64-bit for mix arch compatibility */ 485 region_t r; 486 } pkg; 487 488 if (lc->in_sync_hint >= lc->region_count) 489 return 0; 490 491 rdata_size = sizeof(pkg); 492 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, 493 NULL, 0, 494 (char *)&pkg, &rdata_size); 495 496 *region = pkg.r; 497 return (r) ? r : (int)pkg.i; 498 } 499 500 /* 501 * userspace_set_region_sync 502 * 503 * Set the sync status of a given region. This function 504 * must not fail. 505 */ 506 static void userspace_set_region_sync(struct dm_dirty_log *log, 507 region_t region, int in_sync) 508 { 509 int r; 510 struct log_c *lc = log->context; 511 struct { 512 region_t r; 513 int64_t i; 514 } pkg; 515 516 pkg.r = region; 517 pkg.i = (int64_t)in_sync; 518 519 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, 520 (char *)&pkg, sizeof(pkg), 521 NULL, NULL); 522 523 /* 524 * It would be nice to be able to report failures. 525 * However, it is easy emough to detect and resolve. 526 */ 527 return; 528 } 529 530 /* 531 * userspace_get_sync_count 532 * 533 * If there is any sort of failure when consulting the server, 534 * we assume that the sync count is zero. 535 * 536 * Returns: sync count on success, 0 on failure 537 */ 538 static region_t userspace_get_sync_count(struct dm_dirty_log *log) 539 { 540 int r; 541 size_t rdata_size; 542 uint64_t sync_count; 543 struct log_c *lc = log->context; 544 545 rdata_size = sizeof(sync_count); 546 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, 547 NULL, 0, 548 (char *)&sync_count, &rdata_size); 549 550 if (r) 551 return 0; 552 553 if (sync_count >= lc->region_count) 554 lc->in_sync_hint = lc->region_count; 555 556 return (region_t)sync_count; 557 } 558 559 /* 560 * userspace_status 561 * 562 * Returns: amount of space consumed 563 */ 564 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, 565 char *result, unsigned maxlen) 566 { 567 int r = 0; 568 char *table_args; 569 size_t sz = (size_t)maxlen; 570 struct log_c *lc = log->context; 571 572 switch (status_type) { 573 case STATUSTYPE_INFO: 574 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, 575 NULL, 0, 576 result, &sz); 577 578 if (r) { 579 sz = 0; 580 DMEMIT("%s 1 COM_FAILURE", log->type->name); 581 } 582 break; 583 case STATUSTYPE_TABLE: 584 sz = 0; 585 table_args = strchr(lc->usr_argv_str, ' '); 586 BUG_ON(!table_args); /* There will always be a ' ' */ 587 table_args++; 588 589 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, 590 lc->uuid, table_args); 591 break; 592 } 593 return (r) ? 0 : (int)sz; 594 } 595 596 /* 597 * userspace_is_remote_recovering 598 * 599 * Returns: 1 if region recovering, 0 otherwise 600 */ 601 static int userspace_is_remote_recovering(struct dm_dirty_log *log, 602 region_t region) 603 { 604 int r; 605 uint64_t region64 = region; 606 struct log_c *lc = log->context; 607 static unsigned long long limit; 608 struct { 609 int64_t is_recovering; 610 uint64_t in_sync_hint; 611 } pkg; 612 size_t rdata_size = sizeof(pkg); 613 614 /* 615 * Once the mirror has been reported to be in-sync, 616 * it will never again ask for recovery work. So, 617 * we can safely say there is not a remote machine 618 * recovering if the device is in-sync. (in_sync_hint 619 * must be reset at resume time.) 620 */ 621 if (region < lc->in_sync_hint) 622 return 0; 623 else if (jiffies < limit) 624 return 1; 625 626 limit = jiffies + (HZ / 4); 627 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, 628 (char *)®ion64, sizeof(region64), 629 (char *)&pkg, &rdata_size); 630 if (r) 631 return 1; 632 633 lc->in_sync_hint = pkg.in_sync_hint; 634 635 return (int)pkg.is_recovering; 636 } 637 638 static struct dm_dirty_log_type _userspace_type = { 639 .name = "userspace", 640 .module = THIS_MODULE, 641 .ctr = userspace_ctr, 642 .dtr = userspace_dtr, 643 .presuspend = userspace_presuspend, 644 .postsuspend = userspace_postsuspend, 645 .resume = userspace_resume, 646 .get_region_size = userspace_get_region_size, 647 .is_clean = userspace_is_clean, 648 .in_sync = userspace_in_sync, 649 .flush = userspace_flush, 650 .mark_region = userspace_mark_region, 651 .clear_region = userspace_clear_region, 652 .get_resync_work = userspace_get_resync_work, 653 .set_region_sync = userspace_set_region_sync, 654 .get_sync_count = userspace_get_sync_count, 655 .status = userspace_status, 656 .is_remote_recovering = userspace_is_remote_recovering, 657 }; 658 659 static int __init userspace_dirty_log_init(void) 660 { 661 int r = 0; 662 663 flush_entry_pool = mempool_create(100, flush_entry_alloc, 664 flush_entry_free, NULL); 665 666 if (!flush_entry_pool) { 667 DMWARN("Unable to create flush_entry_pool: No memory."); 668 return -ENOMEM; 669 } 670 671 r = dm_ulog_tfr_init(); 672 if (r) { 673 DMWARN("Unable to initialize userspace log communications"); 674 mempool_destroy(flush_entry_pool); 675 return r; 676 } 677 678 r = dm_dirty_log_type_register(&_userspace_type); 679 if (r) { 680 DMWARN("Couldn't register userspace dirty log type"); 681 dm_ulog_tfr_exit(); 682 mempool_destroy(flush_entry_pool); 683 return r; 684 } 685 686 DMINFO("version 1.0.0 loaded"); 687 return 0; 688 } 689 690 static void __exit userspace_dirty_log_exit(void) 691 { 692 dm_dirty_log_type_unregister(&_userspace_type); 693 dm_ulog_tfr_exit(); 694 mempool_destroy(flush_entry_pool); 695 696 DMINFO("version 1.0.0 unloaded"); 697 return; 698 } 699 700 module_init(userspace_dirty_log_init); 701 module_exit(userspace_dirty_log_exit); 702 703 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); 704 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); 705 MODULE_LICENSE("GPL"); 706