1 /* 2 * Copyright (C) 2006-2009 Red Hat, Inc. 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/bio.h> 8 #include <linux/dm-dirty-log.h> 9 #include <linux/device-mapper.h> 10 #include <linux/dm-log-userspace.h> 11 12 #include "dm-log-userspace-transfer.h" 13 14 struct flush_entry { 15 int type; 16 region_t region; 17 struct list_head list; 18 }; 19 20 struct log_c { 21 struct dm_target *ti; 22 uint32_t region_size; 23 region_t region_count; 24 char uuid[DM_UUID_LEN]; 25 26 char *usr_argv_str; 27 uint32_t usr_argc; 28 29 /* 30 * in_sync_hint gets set when doing is_remote_recovering. It 31 * represents the first region that needs recovery. IOW, the 32 * first zero bit of sync_bits. This can be useful for to limit 33 * traffic for calls like is_remote_recovering and get_resync_work, 34 * but be take care in its use for anything else. 35 */ 36 uint64_t in_sync_hint; 37 38 spinlock_t flush_lock; 39 struct list_head flush_list; /* only for clear and mark requests */ 40 }; 41 42 static mempool_t *flush_entry_pool; 43 44 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) 45 { 46 return kmalloc(sizeof(struct flush_entry), gfp_mask); 47 } 48 49 static void flush_entry_free(void *element, void *pool_data) 50 { 51 kfree(element); 52 } 53 54 static int userspace_do_request(struct log_c *lc, const char *uuid, 55 int request_type, char *data, size_t data_size, 56 char *rdata, size_t *rdata_size) 57 { 58 int r; 59 60 /* 61 * If the server isn't there, -ESRCH is returned, 62 * and we must keep trying until the server is 63 * restored. 64 */ 65 retry: 66 r = dm_consult_userspace(uuid, request_type, data, 67 data_size, rdata, rdata_size); 68 69 if (r != -ESRCH) 70 return r; 71 72 DMERR(" Userspace log server not found."); 73 while (1) { 74 set_current_state(TASK_INTERRUPTIBLE); 75 schedule_timeout(2*HZ); 76 DMWARN("Attempting to contact userspace log server..."); 77 r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, 78 strlen(lc->usr_argv_str) + 1, 79 NULL, NULL); 80 if (!r) 81 break; 82 } 83 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); 84 r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, 85 0, NULL, NULL); 86 if (!r) 87 goto retry; 88 89 DMERR("Error trying to resume userspace log: %d", r); 90 91 return -ESRCH; 92 } 93 94 static int build_constructor_string(struct dm_target *ti, 95 unsigned argc, char **argv, 96 char **ctr_str) 97 { 98 int i, str_size; 99 char *str = NULL; 100 101 *ctr_str = NULL; 102 103 for (i = 0, str_size = 0; i < argc; i++) 104 str_size += strlen(argv[i]) + 1; /* +1 for space between args */ 105 106 str_size += 20; /* Max number of chars in a printed u64 number */ 107 108 str = kzalloc(str_size, GFP_KERNEL); 109 if (!str) { 110 DMWARN("Unable to allocate memory for constructor string"); 111 return -ENOMEM; 112 } 113 114 str_size = sprintf(str, "%llu", (unsigned long long)ti->len); 115 for (i = 0; i < argc; i++) 116 str_size += sprintf(str + str_size, " %s", argv[i]); 117 118 *ctr_str = str; 119 return str_size; 120 } 121 122 /* 123 * userspace_ctr 124 * 125 * argv contains: 126 * <UUID> <other args> 127 * Where 'other args' is the userspace implementation specific log 128 * arguments. An example might be: 129 * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] 130 * 131 * So, this module will strip off the <UUID> for identification purposes 132 * when communicating with userspace about a log; but will pass on everything 133 * else. 134 */ 135 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, 136 unsigned argc, char **argv) 137 { 138 int r = 0; 139 int str_size; 140 char *ctr_str = NULL; 141 struct log_c *lc = NULL; 142 uint64_t rdata; 143 size_t rdata_size = sizeof(rdata); 144 145 if (argc < 3) { 146 DMWARN("Too few arguments to userspace dirty log"); 147 return -EINVAL; 148 } 149 150 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 151 if (!lc) { 152 DMWARN("Unable to allocate userspace log context."); 153 return -ENOMEM; 154 } 155 156 lc->ti = ti; 157 158 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { 159 DMWARN("UUID argument too long."); 160 kfree(lc); 161 return -EINVAL; 162 } 163 164 strncpy(lc->uuid, argv[0], DM_UUID_LEN); 165 spin_lock_init(&lc->flush_lock); 166 INIT_LIST_HEAD(&lc->flush_list); 167 168 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); 169 if (str_size < 0) { 170 kfree(lc); 171 return str_size; 172 } 173 174 /* Send table string */ 175 r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, 176 ctr_str, str_size, NULL, NULL); 177 178 if (r == -ESRCH) { 179 DMERR("Userspace log server not found"); 180 goto out; 181 } 182 183 /* Since the region size does not change, get it now */ 184 rdata_size = sizeof(rdata); 185 r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, 186 NULL, 0, (char *)&rdata, &rdata_size); 187 188 if (r) { 189 DMERR("Failed to get region size of dirty log"); 190 goto out; 191 } 192 193 lc->region_size = (uint32_t)rdata; 194 lc->region_count = dm_sector_div_up(ti->len, lc->region_size); 195 196 out: 197 if (r) { 198 kfree(lc); 199 kfree(ctr_str); 200 } else { 201 lc->usr_argv_str = ctr_str; 202 lc->usr_argc = argc; 203 log->context = lc; 204 } 205 206 return r; 207 } 208 209 static void userspace_dtr(struct dm_dirty_log *log) 210 { 211 int r; 212 struct log_c *lc = log->context; 213 214 r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, 215 NULL, 0, 216 NULL, NULL); 217 218 kfree(lc->usr_argv_str); 219 kfree(lc); 220 221 return; 222 } 223 224 static int userspace_presuspend(struct dm_dirty_log *log) 225 { 226 int r; 227 struct log_c *lc = log->context; 228 229 r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, 230 NULL, 0, 231 NULL, NULL); 232 233 return r; 234 } 235 236 static int userspace_postsuspend(struct dm_dirty_log *log) 237 { 238 int r; 239 struct log_c *lc = log->context; 240 241 r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, 242 NULL, 0, 243 NULL, NULL); 244 245 return r; 246 } 247 248 static int userspace_resume(struct dm_dirty_log *log) 249 { 250 int r; 251 struct log_c *lc = log->context; 252 253 lc->in_sync_hint = 0; 254 r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, 255 NULL, 0, 256 NULL, NULL); 257 258 return r; 259 } 260 261 static uint32_t userspace_get_region_size(struct dm_dirty_log *log) 262 { 263 struct log_c *lc = log->context; 264 265 return lc->region_size; 266 } 267 268 /* 269 * userspace_is_clean 270 * 271 * Check whether a region is clean. If there is any sort of 272 * failure when consulting the server, we return not clean. 273 * 274 * Returns: 1 if clean, 0 otherwise 275 */ 276 static int userspace_is_clean(struct dm_dirty_log *log, region_t region) 277 { 278 int r; 279 uint64_t region64 = (uint64_t)region; 280 int64_t is_clean; 281 size_t rdata_size; 282 struct log_c *lc = log->context; 283 284 rdata_size = sizeof(is_clean); 285 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, 286 (char *)®ion64, sizeof(region64), 287 (char *)&is_clean, &rdata_size); 288 289 return (r) ? 0 : (int)is_clean; 290 } 291 292 /* 293 * userspace_in_sync 294 * 295 * Check if the region is in-sync. If there is any sort 296 * of failure when consulting the server, we assume that 297 * the region is not in sync. 298 * 299 * If 'can_block' is set, return immediately 300 * 301 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK 302 */ 303 static int userspace_in_sync(struct dm_dirty_log *log, region_t region, 304 int can_block) 305 { 306 int r; 307 uint64_t region64 = region; 308 int64_t in_sync; 309 size_t rdata_size; 310 struct log_c *lc = log->context; 311 312 /* 313 * We can never respond directly - even if in_sync_hint is 314 * set. This is because another machine could see a device 315 * failure and mark the region out-of-sync. If we don't go 316 * to userspace to ask, we might think the region is in-sync 317 * and allow a read to pick up data that is stale. (This is 318 * very unlikely if a device actually fails; but it is very 319 * likely if a connection to one device from one machine fails.) 320 * 321 * There still might be a problem if the mirror caches the region 322 * state as in-sync... but then this call would not be made. So, 323 * that is a mirror problem. 324 */ 325 if (!can_block) 326 return -EWOULDBLOCK; 327 328 rdata_size = sizeof(in_sync); 329 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, 330 (char *)®ion64, sizeof(region64), 331 (char *)&in_sync, &rdata_size); 332 return (r) ? 0 : (int)in_sync; 333 } 334 335 /* 336 * userspace_flush 337 * 338 * This function is ok to block. 339 * The flush happens in two stages. First, it sends all 340 * clear/mark requests that are on the list. Then it 341 * tells the server to commit them. This gives the 342 * server a chance to optimise the commit, instead of 343 * doing it for every request. 344 * 345 * Additionally, we could implement another thread that 346 * sends the requests up to the server - reducing the 347 * load on flush. Then the flush would have less in 348 * the list and be responsible for the finishing commit. 349 * 350 * Returns: 0 on success, < 0 on failure 351 */ 352 static int userspace_flush(struct dm_dirty_log *log) 353 { 354 int r = 0; 355 unsigned long flags; 356 struct log_c *lc = log->context; 357 LIST_HEAD(flush_list); 358 struct flush_entry *fe, *tmp_fe; 359 360 spin_lock_irqsave(&lc->flush_lock, flags); 361 list_splice_init(&lc->flush_list, &flush_list); 362 spin_unlock_irqrestore(&lc->flush_lock, flags); 363 364 if (list_empty(&flush_list)) 365 return 0; 366 367 /* 368 * FIXME: Count up requests, group request types, 369 * allocate memory to stick all requests in and 370 * send to server in one go. Failing the allocation, 371 * do it one by one. 372 */ 373 374 list_for_each_entry(fe, &flush_list, list) { 375 r = userspace_do_request(lc, lc->uuid, fe->type, 376 (char *)&fe->region, 377 sizeof(fe->region), 378 NULL, NULL); 379 if (r) 380 goto fail; 381 } 382 383 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, 384 NULL, 0, NULL, NULL); 385 386 fail: 387 /* 388 * We can safely remove these entries, even if failure. 389 * Calling code will receive an error and will know that 390 * the log facility has failed. 391 */ 392 list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { 393 list_del(&fe->list); 394 mempool_free(fe, flush_entry_pool); 395 } 396 397 if (r) 398 dm_table_event(lc->ti->table); 399 400 return r; 401 } 402 403 /* 404 * userspace_mark_region 405 * 406 * This function should avoid blocking unless absolutely required. 407 * (Memory allocation is valid for blocking.) 408 */ 409 static void userspace_mark_region(struct dm_dirty_log *log, region_t region) 410 { 411 unsigned long flags; 412 struct log_c *lc = log->context; 413 struct flush_entry *fe; 414 415 /* Wait for an allocation, but _never_ fail */ 416 fe = mempool_alloc(flush_entry_pool, GFP_NOIO); 417 BUG_ON(!fe); 418 419 spin_lock_irqsave(&lc->flush_lock, flags); 420 fe->type = DM_ULOG_MARK_REGION; 421 fe->region = region; 422 list_add(&fe->list, &lc->flush_list); 423 spin_unlock_irqrestore(&lc->flush_lock, flags); 424 425 return; 426 } 427 428 /* 429 * userspace_clear_region 430 * 431 * This function must not block. 432 * So, the alloc can't block. In the worst case, it is ok to 433 * fail. It would simply mean we can't clear the region. 434 * Does nothing to current sync context, but does mean 435 * the region will be re-sync'ed on a reload of the mirror 436 * even though it is in-sync. 437 */ 438 static void userspace_clear_region(struct dm_dirty_log *log, region_t region) 439 { 440 unsigned long flags; 441 struct log_c *lc = log->context; 442 struct flush_entry *fe; 443 444 /* 445 * If we fail to allocate, we skip the clearing of 446 * the region. This doesn't hurt us in any way, except 447 * to cause the region to be resync'ed when the 448 * device is activated next time. 449 */ 450 fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); 451 if (!fe) { 452 DMERR("Failed to allocate memory to clear region."); 453 return; 454 } 455 456 spin_lock_irqsave(&lc->flush_lock, flags); 457 fe->type = DM_ULOG_CLEAR_REGION; 458 fe->region = region; 459 list_add(&fe->list, &lc->flush_list); 460 spin_unlock_irqrestore(&lc->flush_lock, flags); 461 462 return; 463 } 464 465 /* 466 * userspace_get_resync_work 467 * 468 * Get a region that needs recovery. It is valid to return 469 * an error for this function. 470 * 471 * Returns: 1 if region filled, 0 if no work, <0 on error 472 */ 473 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) 474 { 475 int r; 476 size_t rdata_size; 477 struct log_c *lc = log->context; 478 struct { 479 int64_t i; /* 64-bit for mix arch compatibility */ 480 region_t r; 481 } pkg; 482 483 if (lc->in_sync_hint >= lc->region_count) 484 return 0; 485 486 rdata_size = sizeof(pkg); 487 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, 488 NULL, 0, 489 (char *)&pkg, &rdata_size); 490 491 *region = pkg.r; 492 return (r) ? r : (int)pkg.i; 493 } 494 495 /* 496 * userspace_set_region_sync 497 * 498 * Set the sync status of a given region. This function 499 * must not fail. 500 */ 501 static void userspace_set_region_sync(struct dm_dirty_log *log, 502 region_t region, int in_sync) 503 { 504 int r; 505 struct log_c *lc = log->context; 506 struct { 507 region_t r; 508 int64_t i; 509 } pkg; 510 511 pkg.r = region; 512 pkg.i = (int64_t)in_sync; 513 514 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, 515 (char *)&pkg, sizeof(pkg), 516 NULL, NULL); 517 518 /* 519 * It would be nice to be able to report failures. 520 * However, it is easy emough to detect and resolve. 521 */ 522 return; 523 } 524 525 /* 526 * userspace_get_sync_count 527 * 528 * If there is any sort of failure when consulting the server, 529 * we assume that the sync count is zero. 530 * 531 * Returns: sync count on success, 0 on failure 532 */ 533 static region_t userspace_get_sync_count(struct dm_dirty_log *log) 534 { 535 int r; 536 size_t rdata_size; 537 uint64_t sync_count; 538 struct log_c *lc = log->context; 539 540 rdata_size = sizeof(sync_count); 541 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, 542 NULL, 0, 543 (char *)&sync_count, &rdata_size); 544 545 if (r) 546 return 0; 547 548 if (sync_count >= lc->region_count) 549 lc->in_sync_hint = lc->region_count; 550 551 return (region_t)sync_count; 552 } 553 554 /* 555 * userspace_status 556 * 557 * Returns: amount of space consumed 558 */ 559 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, 560 char *result, unsigned maxlen) 561 { 562 int r = 0; 563 char *table_args; 564 size_t sz = (size_t)maxlen; 565 struct log_c *lc = log->context; 566 567 switch (status_type) { 568 case STATUSTYPE_INFO: 569 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, 570 NULL, 0, 571 result, &sz); 572 573 if (r) { 574 sz = 0; 575 DMEMIT("%s 1 COM_FAILURE", log->type->name); 576 } 577 break; 578 case STATUSTYPE_TABLE: 579 sz = 0; 580 table_args = strstr(lc->usr_argv_str, " "); 581 BUG_ON(!table_args); /* There will always be a ' ' */ 582 table_args++; 583 584 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc, 585 lc->uuid, table_args); 586 break; 587 } 588 return (r) ? 0 : (int)sz; 589 } 590 591 /* 592 * userspace_is_remote_recovering 593 * 594 * Returns: 1 if region recovering, 0 otherwise 595 */ 596 static int userspace_is_remote_recovering(struct dm_dirty_log *log, 597 region_t region) 598 { 599 int r; 600 uint64_t region64 = region; 601 struct log_c *lc = log->context; 602 static unsigned long long limit; 603 struct { 604 int64_t is_recovering; 605 uint64_t in_sync_hint; 606 } pkg; 607 size_t rdata_size = sizeof(pkg); 608 609 /* 610 * Once the mirror has been reported to be in-sync, 611 * it will never again ask for recovery work. So, 612 * we can safely say there is not a remote machine 613 * recovering if the device is in-sync. (in_sync_hint 614 * must be reset at resume time.) 615 */ 616 if (region < lc->in_sync_hint) 617 return 0; 618 else if (jiffies < limit) 619 return 1; 620 621 limit = jiffies + (HZ / 4); 622 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, 623 (char *)®ion64, sizeof(region64), 624 (char *)&pkg, &rdata_size); 625 if (r) 626 return 1; 627 628 lc->in_sync_hint = pkg.in_sync_hint; 629 630 return (int)pkg.is_recovering; 631 } 632 633 static struct dm_dirty_log_type _userspace_type = { 634 .name = "userspace", 635 .module = THIS_MODULE, 636 .ctr = userspace_ctr, 637 .dtr = userspace_dtr, 638 .presuspend = userspace_presuspend, 639 .postsuspend = userspace_postsuspend, 640 .resume = userspace_resume, 641 .get_region_size = userspace_get_region_size, 642 .is_clean = userspace_is_clean, 643 .in_sync = userspace_in_sync, 644 .flush = userspace_flush, 645 .mark_region = userspace_mark_region, 646 .clear_region = userspace_clear_region, 647 .get_resync_work = userspace_get_resync_work, 648 .set_region_sync = userspace_set_region_sync, 649 .get_sync_count = userspace_get_sync_count, 650 .status = userspace_status, 651 .is_remote_recovering = userspace_is_remote_recovering, 652 }; 653 654 static int __init userspace_dirty_log_init(void) 655 { 656 int r = 0; 657 658 flush_entry_pool = mempool_create(100, flush_entry_alloc, 659 flush_entry_free, NULL); 660 661 if (!flush_entry_pool) { 662 DMWARN("Unable to create flush_entry_pool: No memory."); 663 return -ENOMEM; 664 } 665 666 r = dm_ulog_tfr_init(); 667 if (r) { 668 DMWARN("Unable to initialize userspace log communications"); 669 mempool_destroy(flush_entry_pool); 670 return r; 671 } 672 673 r = dm_dirty_log_type_register(&_userspace_type); 674 if (r) { 675 DMWARN("Couldn't register userspace dirty log type"); 676 dm_ulog_tfr_exit(); 677 mempool_destroy(flush_entry_pool); 678 return r; 679 } 680 681 DMINFO("version 1.0.0 loaded"); 682 return 0; 683 } 684 685 static void __exit userspace_dirty_log_exit(void) 686 { 687 dm_dirty_log_type_unregister(&_userspace_type); 688 dm_ulog_tfr_exit(); 689 mempool_destroy(flush_entry_pool); 690 691 DMINFO("version 1.0.0 unloaded"); 692 return; 693 } 694 695 module_init(userspace_dirty_log_init); 696 module_exit(userspace_dirty_log_exit); 697 698 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); 699 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); 700 MODULE_LICENSE("GPL"); 701