1 /* 2 * Copyright (C) 2006-2009 Red Hat, Inc. 3 * 4 * This file is released under the LGPL. 5 */ 6 7 #include <linux/bio.h> 8 #include <linux/dm-dirty-log.h> 9 #include <linux/device-mapper.h> 10 #include <linux/dm-log-userspace.h> 11 12 #include "dm-log-userspace-transfer.h" 13 14 struct flush_entry { 15 int type; 16 region_t region; 17 struct list_head list; 18 }; 19 20 struct log_c { 21 struct dm_target *ti; 22 uint32_t region_size; 23 region_t region_count; 24 char uuid[DM_UUID_LEN]; 25 26 char *usr_argv_str; 27 uint32_t usr_argc; 28 29 /* 30 * in_sync_hint gets set when doing is_remote_recovering. It 31 * represents the first region that needs recovery. IOW, the 32 * first zero bit of sync_bits. This can be useful for to limit 33 * traffic for calls like is_remote_recovering and get_resync_work, 34 * but be take care in its use for anything else. 35 */ 36 uint64_t in_sync_hint; 37 38 spinlock_t flush_lock; 39 struct list_head flush_list; /* only for clear and mark requests */ 40 }; 41 42 static mempool_t *flush_entry_pool; 43 44 static void *flush_entry_alloc(gfp_t gfp_mask, void *pool_data) 45 { 46 return kmalloc(sizeof(struct flush_entry), gfp_mask); 47 } 48 49 static void flush_entry_free(void *element, void *pool_data) 50 { 51 kfree(element); 52 } 53 54 static int userspace_do_request(struct log_c *lc, const char *uuid, 55 int request_type, char *data, size_t data_size, 56 char *rdata, size_t *rdata_size) 57 { 58 int r; 59 60 /* 61 * If the server isn't there, -ESRCH is returned, 62 * and we must keep trying until the server is 63 * restored. 64 */ 65 retry: 66 r = dm_consult_userspace(uuid, request_type, data, 67 data_size, rdata, rdata_size); 68 69 if (r != -ESRCH) 70 return r; 71 72 DMERR(" Userspace log server not found."); 73 while (1) { 74 set_current_state(TASK_INTERRUPTIBLE); 75 schedule_timeout(2*HZ); 76 DMWARN("Attempting to contact userspace log server..."); 77 r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str, 78 strlen(lc->usr_argv_str) + 1, 79 NULL, NULL); 80 if (!r) 81 break; 82 } 83 DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete"); 84 r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL, 85 0, NULL, NULL); 86 if (!r) 87 goto retry; 88 89 DMERR("Error trying to resume userspace log: %d", r); 90 91 return -ESRCH; 92 } 93 94 static int build_constructor_string(struct dm_target *ti, 95 unsigned argc, char **argv, 96 char **ctr_str) 97 { 98 int i, str_size; 99 char *str = NULL; 100 101 *ctr_str = NULL; 102 103 for (i = 0, str_size = 0; i < argc; i++) 104 str_size += strlen(argv[i]) + 1; /* +1 for space between args */ 105 106 str_size += 20; /* Max number of chars in a printed u64 number */ 107 108 str = kzalloc(str_size, GFP_KERNEL); 109 if (!str) { 110 DMWARN("Unable to allocate memory for constructor string"); 111 return -ENOMEM; 112 } 113 114 for (i = 0, str_size = 0; i < argc; i++) 115 str_size += sprintf(str + str_size, "%s ", argv[i]); 116 str_size += sprintf(str + str_size, "%llu", 117 (unsigned long long)ti->len); 118 119 *ctr_str = str; 120 return str_size; 121 } 122 123 /* 124 * userspace_ctr 125 * 126 * argv contains: 127 * <UUID> <other args> 128 * Where 'other args' is the userspace implementation specific log 129 * arguments. An example might be: 130 * <UUID> clustered_disk <arg count> <log dev> <region_size> [[no]sync] 131 * 132 * So, this module will strip off the <UUID> for identification purposes 133 * when communicating with userspace about a log; but will pass on everything 134 * else. 135 */ 136 static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, 137 unsigned argc, char **argv) 138 { 139 int r = 0; 140 int str_size; 141 char *ctr_str = NULL; 142 struct log_c *lc = NULL; 143 uint64_t rdata; 144 size_t rdata_size = sizeof(rdata); 145 146 if (argc < 3) { 147 DMWARN("Too few arguments to userspace dirty log"); 148 return -EINVAL; 149 } 150 151 lc = kmalloc(sizeof(*lc), GFP_KERNEL); 152 if (!lc) { 153 DMWARN("Unable to allocate userspace log context."); 154 return -ENOMEM; 155 } 156 157 lc->ti = ti; 158 159 if (strlen(argv[0]) > (DM_UUID_LEN - 1)) { 160 DMWARN("UUID argument too long."); 161 kfree(lc); 162 return -EINVAL; 163 } 164 165 strncpy(lc->uuid, argv[0], DM_UUID_LEN); 166 spin_lock_init(&lc->flush_lock); 167 INIT_LIST_HEAD(&lc->flush_list); 168 169 str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); 170 if (str_size < 0) { 171 kfree(lc); 172 return str_size; 173 } 174 175 /* Send table string */ 176 r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR, 177 ctr_str, str_size, NULL, NULL); 178 179 if (r == -ESRCH) { 180 DMERR("Userspace log server not found"); 181 goto out; 182 } 183 184 /* Since the region size does not change, get it now */ 185 rdata_size = sizeof(rdata); 186 r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE, 187 NULL, 0, (char *)&rdata, &rdata_size); 188 189 if (r) { 190 DMERR("Failed to get region size of dirty log"); 191 goto out; 192 } 193 194 lc->region_size = (uint32_t)rdata; 195 lc->region_count = dm_sector_div_up(ti->len, lc->region_size); 196 197 out: 198 if (r) { 199 kfree(lc); 200 kfree(ctr_str); 201 } else { 202 lc->usr_argv_str = ctr_str; 203 lc->usr_argc = argc; 204 log->context = lc; 205 } 206 207 return r; 208 } 209 210 static void userspace_dtr(struct dm_dirty_log *log) 211 { 212 int r; 213 struct log_c *lc = log->context; 214 215 r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR, 216 NULL, 0, 217 NULL, NULL); 218 219 kfree(lc->usr_argv_str); 220 kfree(lc); 221 222 return; 223 } 224 225 static int userspace_presuspend(struct dm_dirty_log *log) 226 { 227 int r; 228 struct log_c *lc = log->context; 229 230 r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND, 231 NULL, 0, 232 NULL, NULL); 233 234 return r; 235 } 236 237 static int userspace_postsuspend(struct dm_dirty_log *log) 238 { 239 int r; 240 struct log_c *lc = log->context; 241 242 r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND, 243 NULL, 0, 244 NULL, NULL); 245 246 return r; 247 } 248 249 static int userspace_resume(struct dm_dirty_log *log) 250 { 251 int r; 252 struct log_c *lc = log->context; 253 254 lc->in_sync_hint = 0; 255 r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME, 256 NULL, 0, 257 NULL, NULL); 258 259 return r; 260 } 261 262 static uint32_t userspace_get_region_size(struct dm_dirty_log *log) 263 { 264 struct log_c *lc = log->context; 265 266 return lc->region_size; 267 } 268 269 /* 270 * userspace_is_clean 271 * 272 * Check whether a region is clean. If there is any sort of 273 * failure when consulting the server, we return not clean. 274 * 275 * Returns: 1 if clean, 0 otherwise 276 */ 277 static int userspace_is_clean(struct dm_dirty_log *log, region_t region) 278 { 279 int r; 280 uint64_t region64 = (uint64_t)region; 281 int64_t is_clean; 282 size_t rdata_size; 283 struct log_c *lc = log->context; 284 285 rdata_size = sizeof(is_clean); 286 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_CLEAN, 287 (char *)®ion64, sizeof(region64), 288 (char *)&is_clean, &rdata_size); 289 290 return (r) ? 0 : (int)is_clean; 291 } 292 293 /* 294 * userspace_in_sync 295 * 296 * Check if the region is in-sync. If there is any sort 297 * of failure when consulting the server, we assume that 298 * the region is not in sync. 299 * 300 * If 'can_block' is set, return immediately 301 * 302 * Returns: 1 if in-sync, 0 if not-in-sync, -EWOULDBLOCK 303 */ 304 static int userspace_in_sync(struct dm_dirty_log *log, region_t region, 305 int can_block) 306 { 307 int r; 308 uint64_t region64 = region; 309 int64_t in_sync; 310 size_t rdata_size; 311 struct log_c *lc = log->context; 312 313 /* 314 * We can never respond directly - even if in_sync_hint is 315 * set. This is because another machine could see a device 316 * failure and mark the region out-of-sync. If we don't go 317 * to userspace to ask, we might think the region is in-sync 318 * and allow a read to pick up data that is stale. (This is 319 * very unlikely if a device actually fails; but it is very 320 * likely if a connection to one device from one machine fails.) 321 * 322 * There still might be a problem if the mirror caches the region 323 * state as in-sync... but then this call would not be made. So, 324 * that is a mirror problem. 325 */ 326 if (!can_block) 327 return -EWOULDBLOCK; 328 329 rdata_size = sizeof(in_sync); 330 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IN_SYNC, 331 (char *)®ion64, sizeof(region64), 332 (char *)&in_sync, &rdata_size); 333 return (r) ? 0 : (int)in_sync; 334 } 335 336 /* 337 * userspace_flush 338 * 339 * This function is ok to block. 340 * The flush happens in two stages. First, it sends all 341 * clear/mark requests that are on the list. Then it 342 * tells the server to commit them. This gives the 343 * server a chance to optimise the commit, instead of 344 * doing it for every request. 345 * 346 * Additionally, we could implement another thread that 347 * sends the requests up to the server - reducing the 348 * load on flush. Then the flush would have less in 349 * the list and be responsible for the finishing commit. 350 * 351 * Returns: 0 on success, < 0 on failure 352 */ 353 static int userspace_flush(struct dm_dirty_log *log) 354 { 355 int r = 0; 356 unsigned long flags; 357 struct log_c *lc = log->context; 358 LIST_HEAD(flush_list); 359 struct flush_entry *fe, *tmp_fe; 360 361 spin_lock_irqsave(&lc->flush_lock, flags); 362 list_splice_init(&lc->flush_list, &flush_list); 363 spin_unlock_irqrestore(&lc->flush_lock, flags); 364 365 if (list_empty(&flush_list)) 366 return 0; 367 368 /* 369 * FIXME: Count up requests, group request types, 370 * allocate memory to stick all requests in and 371 * send to server in one go. Failing the allocation, 372 * do it one by one. 373 */ 374 375 list_for_each_entry(fe, &flush_list, list) { 376 r = userspace_do_request(lc, lc->uuid, fe->type, 377 (char *)&fe->region, 378 sizeof(fe->region), 379 NULL, NULL); 380 if (r) 381 goto fail; 382 } 383 384 r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, 385 NULL, 0, NULL, NULL); 386 387 fail: 388 /* 389 * We can safely remove these entries, even if failure. 390 * Calling code will receive an error and will know that 391 * the log facility has failed. 392 */ 393 list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { 394 list_del(&fe->list); 395 mempool_free(fe, flush_entry_pool); 396 } 397 398 if (r) 399 dm_table_event(lc->ti->table); 400 401 return r; 402 } 403 404 /* 405 * userspace_mark_region 406 * 407 * This function should avoid blocking unless absolutely required. 408 * (Memory allocation is valid for blocking.) 409 */ 410 static void userspace_mark_region(struct dm_dirty_log *log, region_t region) 411 { 412 unsigned long flags; 413 struct log_c *lc = log->context; 414 struct flush_entry *fe; 415 416 /* Wait for an allocation, but _never_ fail */ 417 fe = mempool_alloc(flush_entry_pool, GFP_NOIO); 418 BUG_ON(!fe); 419 420 spin_lock_irqsave(&lc->flush_lock, flags); 421 fe->type = DM_ULOG_MARK_REGION; 422 fe->region = region; 423 list_add(&fe->list, &lc->flush_list); 424 spin_unlock_irqrestore(&lc->flush_lock, flags); 425 426 return; 427 } 428 429 /* 430 * userspace_clear_region 431 * 432 * This function must not block. 433 * So, the alloc can't block. In the worst case, it is ok to 434 * fail. It would simply mean we can't clear the region. 435 * Does nothing to current sync context, but does mean 436 * the region will be re-sync'ed on a reload of the mirror 437 * even though it is in-sync. 438 */ 439 static void userspace_clear_region(struct dm_dirty_log *log, region_t region) 440 { 441 unsigned long flags; 442 struct log_c *lc = log->context; 443 struct flush_entry *fe; 444 445 /* 446 * If we fail to allocate, we skip the clearing of 447 * the region. This doesn't hurt us in any way, except 448 * to cause the region to be resync'ed when the 449 * device is activated next time. 450 */ 451 fe = mempool_alloc(flush_entry_pool, GFP_ATOMIC); 452 if (!fe) { 453 DMERR("Failed to allocate memory to clear region."); 454 return; 455 } 456 457 spin_lock_irqsave(&lc->flush_lock, flags); 458 fe->type = DM_ULOG_CLEAR_REGION; 459 fe->region = region; 460 list_add(&fe->list, &lc->flush_list); 461 spin_unlock_irqrestore(&lc->flush_lock, flags); 462 463 return; 464 } 465 466 /* 467 * userspace_get_resync_work 468 * 469 * Get a region that needs recovery. It is valid to return 470 * an error for this function. 471 * 472 * Returns: 1 if region filled, 0 if no work, <0 on error 473 */ 474 static int userspace_get_resync_work(struct dm_dirty_log *log, region_t *region) 475 { 476 int r; 477 size_t rdata_size; 478 struct log_c *lc = log->context; 479 struct { 480 int64_t i; /* 64-bit for mix arch compatibility */ 481 region_t r; 482 } pkg; 483 484 if (lc->in_sync_hint >= lc->region_count) 485 return 0; 486 487 rdata_size = sizeof(pkg); 488 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_RESYNC_WORK, 489 NULL, 0, 490 (char *)&pkg, &rdata_size); 491 492 *region = pkg.r; 493 return (r) ? r : (int)pkg.i; 494 } 495 496 /* 497 * userspace_set_region_sync 498 * 499 * Set the sync status of a given region. This function 500 * must not fail. 501 */ 502 static void userspace_set_region_sync(struct dm_dirty_log *log, 503 region_t region, int in_sync) 504 { 505 int r; 506 struct log_c *lc = log->context; 507 struct { 508 region_t r; 509 int64_t i; 510 } pkg; 511 512 pkg.r = region; 513 pkg.i = (int64_t)in_sync; 514 515 r = userspace_do_request(lc, lc->uuid, DM_ULOG_SET_REGION_SYNC, 516 (char *)&pkg, sizeof(pkg), 517 NULL, NULL); 518 519 /* 520 * It would be nice to be able to report failures. 521 * However, it is easy emough to detect and resolve. 522 */ 523 return; 524 } 525 526 /* 527 * userspace_get_sync_count 528 * 529 * If there is any sort of failure when consulting the server, 530 * we assume that the sync count is zero. 531 * 532 * Returns: sync count on success, 0 on failure 533 */ 534 static region_t userspace_get_sync_count(struct dm_dirty_log *log) 535 { 536 int r; 537 size_t rdata_size; 538 uint64_t sync_count; 539 struct log_c *lc = log->context; 540 541 rdata_size = sizeof(sync_count); 542 r = userspace_do_request(lc, lc->uuid, DM_ULOG_GET_SYNC_COUNT, 543 NULL, 0, 544 (char *)&sync_count, &rdata_size); 545 546 if (r) 547 return 0; 548 549 if (sync_count >= lc->region_count) 550 lc->in_sync_hint = lc->region_count; 551 552 return (region_t)sync_count; 553 } 554 555 /* 556 * userspace_status 557 * 558 * Returns: amount of space consumed 559 */ 560 static int userspace_status(struct dm_dirty_log *log, status_type_t status_type, 561 char *result, unsigned maxlen) 562 { 563 int r = 0; 564 size_t sz = (size_t)maxlen; 565 struct log_c *lc = log->context; 566 567 switch (status_type) { 568 case STATUSTYPE_INFO: 569 r = userspace_do_request(lc, lc->uuid, DM_ULOG_STATUS_INFO, 570 NULL, 0, 571 result, &sz); 572 573 if (r) { 574 sz = 0; 575 DMEMIT("%s 1 COM_FAILURE", log->type->name); 576 } 577 break; 578 case STATUSTYPE_TABLE: 579 sz = 0; 580 DMEMIT("%s %u %s %s ", log->type->name, lc->usr_argc + 1, 581 lc->uuid, lc->usr_argv_str); 582 break; 583 } 584 return (r) ? 0 : (int)sz; 585 } 586 587 /* 588 * userspace_is_remote_recovering 589 * 590 * Returns: 1 if region recovering, 0 otherwise 591 */ 592 static int userspace_is_remote_recovering(struct dm_dirty_log *log, 593 region_t region) 594 { 595 int r; 596 uint64_t region64 = region; 597 struct log_c *lc = log->context; 598 static unsigned long long limit; 599 struct { 600 int64_t is_recovering; 601 uint64_t in_sync_hint; 602 } pkg; 603 size_t rdata_size = sizeof(pkg); 604 605 /* 606 * Once the mirror has been reported to be in-sync, 607 * it will never again ask for recovery work. So, 608 * we can safely say there is not a remote machine 609 * recovering if the device is in-sync. (in_sync_hint 610 * must be reset at resume time.) 611 */ 612 if (region < lc->in_sync_hint) 613 return 0; 614 else if (jiffies < limit) 615 return 1; 616 617 limit = jiffies + (HZ / 4); 618 r = userspace_do_request(lc, lc->uuid, DM_ULOG_IS_REMOTE_RECOVERING, 619 (char *)®ion64, sizeof(region64), 620 (char *)&pkg, &rdata_size); 621 if (r) 622 return 1; 623 624 lc->in_sync_hint = pkg.in_sync_hint; 625 626 return (int)pkg.is_recovering; 627 } 628 629 static struct dm_dirty_log_type _userspace_type = { 630 .name = "userspace", 631 .module = THIS_MODULE, 632 .ctr = userspace_ctr, 633 .dtr = userspace_dtr, 634 .presuspend = userspace_presuspend, 635 .postsuspend = userspace_postsuspend, 636 .resume = userspace_resume, 637 .get_region_size = userspace_get_region_size, 638 .is_clean = userspace_is_clean, 639 .in_sync = userspace_in_sync, 640 .flush = userspace_flush, 641 .mark_region = userspace_mark_region, 642 .clear_region = userspace_clear_region, 643 .get_resync_work = userspace_get_resync_work, 644 .set_region_sync = userspace_set_region_sync, 645 .get_sync_count = userspace_get_sync_count, 646 .status = userspace_status, 647 .is_remote_recovering = userspace_is_remote_recovering, 648 }; 649 650 static int __init userspace_dirty_log_init(void) 651 { 652 int r = 0; 653 654 flush_entry_pool = mempool_create(100, flush_entry_alloc, 655 flush_entry_free, NULL); 656 657 if (!flush_entry_pool) { 658 DMWARN("Unable to create flush_entry_pool: No memory."); 659 return -ENOMEM; 660 } 661 662 r = dm_ulog_tfr_init(); 663 if (r) { 664 DMWARN("Unable to initialize userspace log communications"); 665 mempool_destroy(flush_entry_pool); 666 return r; 667 } 668 669 r = dm_dirty_log_type_register(&_userspace_type); 670 if (r) { 671 DMWARN("Couldn't register userspace dirty log type"); 672 dm_ulog_tfr_exit(); 673 mempool_destroy(flush_entry_pool); 674 return r; 675 } 676 677 DMINFO("version 1.0.0 loaded"); 678 return 0; 679 } 680 681 static void __exit userspace_dirty_log_exit(void) 682 { 683 dm_dirty_log_type_unregister(&_userspace_type); 684 dm_ulog_tfr_exit(); 685 mempool_destroy(flush_entry_pool); 686 687 DMINFO("version 1.0.0 unloaded"); 688 return; 689 } 690 691 module_init(userspace_dirty_log_init); 692 module_exit(userspace_dirty_log_exit); 693 694 MODULE_DESCRIPTION(DM_NAME " userspace dirty log link"); 695 MODULE_AUTHOR("Jonathan Brassow <dm-devel@redhat.com>"); 696 MODULE_LICENSE("GPL"); 697