1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stackglue.c 5 * 6 * Code which implements an OCFS2 specific interface to underlying 7 * cluster stacks. 8 * 9 * Copyright (C) 2007, 2009 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation, version 2. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 */ 20 21 #include <linux/list.h> 22 #include <linux/spinlock.h> 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/kmod.h> 26 #include <linux/fs.h> 27 #include <linux/kobject.h> 28 #include <linux/sysfs.h> 29 #include <linux/sysctl.h> 30 31 #include "ocfs2_fs.h" 32 33 #include "stackglue.h" 34 35 #define OCFS2_STACK_PLUGIN_O2CB "o2cb" 36 #define OCFS2_STACK_PLUGIN_USER "user" 37 #define OCFS2_MAX_HB_CTL_PATH 256 38 39 static struct ocfs2_locking_protocol *lproto; 40 static DEFINE_SPINLOCK(ocfs2_stack_lock); 41 static LIST_HEAD(ocfs2_stack_list); 42 static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 43 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 44 45 /* 46 * The stack currently in use. If not null, active_stack->sp_count > 0, 47 * the module is pinned, and the locking protocol cannot be changed. 48 */ 49 static struct ocfs2_stack_plugin *active_stack; 50 51 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 52 { 53 struct ocfs2_stack_plugin *p; 54 55 assert_spin_locked(&ocfs2_stack_lock); 56 57 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 58 if (!strcmp(p->sp_name, name)) 59 return p; 60 } 61 62 return NULL; 63 } 64 65 static int ocfs2_stack_driver_request(const char *stack_name, 66 const char *plugin_name) 67 { 68 int rc; 69 struct ocfs2_stack_plugin *p; 70 71 spin_lock(&ocfs2_stack_lock); 72 73 /* 74 * If the stack passed by the filesystem isn't the selected one, 75 * we can't continue. 76 */ 77 if (strcmp(stack_name, cluster_stack_name)) { 78 rc = -EBUSY; 79 goto out; 80 } 81 82 if (active_stack) { 83 /* 84 * If the active stack isn't the one we want, it cannot 85 * be selected right now. 86 */ 87 if (!strcmp(active_stack->sp_name, plugin_name)) 88 rc = 0; 89 else 90 rc = -EBUSY; 91 goto out; 92 } 93 94 p = ocfs2_stack_lookup(plugin_name); 95 if (!p || !try_module_get(p->sp_owner)) { 96 rc = -ENOENT; 97 goto out; 98 } 99 100 active_stack = p; 101 rc = 0; 102 103 out: 104 /* If we found it, pin it */ 105 if (!rc) 106 active_stack->sp_count++; 107 108 spin_unlock(&ocfs2_stack_lock); 109 return rc; 110 } 111 112 /* 113 * This function looks up the appropriate stack and makes it active. If 114 * there is no stack, it tries to load it. It will fail if the stack still 115 * cannot be found. It will also fail if a different stack is in use. 116 */ 117 static int ocfs2_stack_driver_get(const char *stack_name) 118 { 119 int rc; 120 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; 121 122 /* 123 * Classic stack does not pass in a stack name. This is 124 * compatible with older tools as well. 125 */ 126 if (!stack_name || !*stack_name) 127 stack_name = OCFS2_STACK_PLUGIN_O2CB; 128 129 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { 130 printk(KERN_ERR 131 "ocfs2 passed an invalid cluster stack label: \"%s\"\n", 132 stack_name); 133 return -EINVAL; 134 } 135 136 /* Anything that isn't the classic stack is a user stack */ 137 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) 138 plugin_name = OCFS2_STACK_PLUGIN_USER; 139 140 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 141 if (rc == -ENOENT) { 142 request_module("ocfs2_stack_%s", plugin_name); 143 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 144 } 145 146 if (rc == -ENOENT) { 147 printk(KERN_ERR 148 "ocfs2: Cluster stack driver \"%s\" cannot be found\n", 149 plugin_name); 150 } else if (rc == -EBUSY) { 151 printk(KERN_ERR 152 "ocfs2: A different cluster stack is in use\n"); 153 } 154 155 return rc; 156 } 157 158 static void ocfs2_stack_driver_put(void) 159 { 160 spin_lock(&ocfs2_stack_lock); 161 BUG_ON(active_stack == NULL); 162 BUG_ON(active_stack->sp_count == 0); 163 164 active_stack->sp_count--; 165 if (!active_stack->sp_count) { 166 module_put(active_stack->sp_owner); 167 active_stack = NULL; 168 } 169 spin_unlock(&ocfs2_stack_lock); 170 } 171 172 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) 173 { 174 int rc; 175 176 spin_lock(&ocfs2_stack_lock); 177 if (!ocfs2_stack_lookup(plugin->sp_name)) { 178 plugin->sp_count = 0; 179 plugin->sp_proto = lproto; 180 list_add(&plugin->sp_list, &ocfs2_stack_list); 181 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 182 plugin->sp_name); 183 rc = 0; 184 } else { 185 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", 186 plugin->sp_name); 187 rc = -EEXIST; 188 } 189 spin_unlock(&ocfs2_stack_lock); 190 191 return rc; 192 } 193 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); 194 195 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) 196 { 197 struct ocfs2_stack_plugin *p; 198 199 spin_lock(&ocfs2_stack_lock); 200 p = ocfs2_stack_lookup(plugin->sp_name); 201 if (p) { 202 BUG_ON(p != plugin); 203 BUG_ON(plugin == active_stack); 204 BUG_ON(plugin->sp_count != 0); 205 list_del_init(&plugin->sp_list); 206 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", 207 plugin->sp_name); 208 } else { 209 printk(KERN_ERR "Stack \"%s\" is not registered\n", 210 plugin->sp_name); 211 } 212 spin_unlock(&ocfs2_stack_lock); 213 } 214 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 215 216 void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) 217 { 218 struct ocfs2_stack_plugin *p; 219 220 BUG_ON(proto == NULL); 221 222 spin_lock(&ocfs2_stack_lock); 223 BUG_ON(active_stack != NULL); 224 225 lproto = proto; 226 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 p->sp_proto = lproto; 228 } 229 230 spin_unlock(&ocfs2_stack_lock); 231 } 232 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); 233 234 235 /* 236 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take 237 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the 238 * underlying stack plugins need to pilfer the lksb off of the lock_res. 239 * If some other structure needs to be passed as an astarg, the plugins 240 * will need to be given a different avenue to the lksb. 241 */ 242 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 243 int mode, 244 union ocfs2_dlm_lksb *lksb, 245 u32 flags, 246 void *name, 247 unsigned int namelen, 248 struct ocfs2_lock_res *astarg) 249 { 250 BUG_ON(lproto == NULL); 251 252 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 253 name, namelen, astarg); 254 } 255 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 256 257 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 258 union ocfs2_dlm_lksb *lksb, 259 u32 flags, 260 struct ocfs2_lock_res *astarg) 261 { 262 BUG_ON(lproto == NULL); 263 264 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); 265 } 266 EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 267 268 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 269 { 270 return active_stack->sp_ops->lock_status(lksb); 271 } 272 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 273 274 int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) 275 { 276 return active_stack->sp_ops->lvb_valid(lksb); 277 } 278 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); 279 280 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 281 { 282 return active_stack->sp_ops->lock_lvb(lksb); 283 } 284 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 285 286 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 287 { 288 active_stack->sp_ops->dump_lksb(lksb); 289 } 290 EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 291 292 int ocfs2_stack_supports_plocks(void) 293 { 294 return active_stack && active_stack->sp_ops->plock; 295 } 296 EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); 297 298 /* 299 * ocfs2_plock() can only be safely called if 300 * ocfs2_stack_supports_plocks() returned true 301 */ 302 int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, 303 struct file *file, int cmd, struct file_lock *fl) 304 { 305 WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); 306 if (active_stack->sp_ops->plock) 307 return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); 308 return -EOPNOTSUPP; 309 } 310 EXPORT_SYMBOL_GPL(ocfs2_plock); 311 312 int ocfs2_cluster_connect(const char *stack_name, 313 const char *group, 314 int grouplen, 315 void (*recovery_handler)(int node_num, 316 void *recovery_data), 317 void *recovery_data, 318 struct ocfs2_cluster_connection **conn) 319 { 320 int rc = 0; 321 struct ocfs2_cluster_connection *new_conn; 322 323 BUG_ON(group == NULL); 324 BUG_ON(conn == NULL); 325 BUG_ON(recovery_handler == NULL); 326 327 if (grouplen > GROUP_NAME_MAX) { 328 rc = -EINVAL; 329 goto out; 330 } 331 332 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 333 GFP_KERNEL); 334 if (!new_conn) { 335 rc = -ENOMEM; 336 goto out; 337 } 338 339 memcpy(new_conn->cc_name, group, grouplen); 340 new_conn->cc_namelen = grouplen; 341 new_conn->cc_recovery_handler = recovery_handler; 342 new_conn->cc_recovery_data = recovery_data; 343 344 /* Start the new connection at our maximum compatibility level */ 345 new_conn->cc_version = lproto->lp_max_version; 346 347 /* This will pin the stack driver if successful */ 348 rc = ocfs2_stack_driver_get(stack_name); 349 if (rc) 350 goto out_free; 351 352 rc = active_stack->sp_ops->connect(new_conn); 353 if (rc) { 354 ocfs2_stack_driver_put(); 355 goto out_free; 356 } 357 358 *conn = new_conn; 359 360 out_free: 361 if (rc) 362 kfree(new_conn); 363 364 out: 365 return rc; 366 } 367 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 368 369 /* If hangup_pending is 0, the stack driver will be dropped */ 370 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 371 int hangup_pending) 372 { 373 int ret; 374 375 BUG_ON(conn == NULL); 376 377 ret = active_stack->sp_ops->disconnect(conn); 378 379 /* XXX Should we free it anyway? */ 380 if (!ret) { 381 kfree(conn); 382 if (!hangup_pending) 383 ocfs2_stack_driver_put(); 384 } 385 386 return ret; 387 } 388 EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 389 390 /* 391 * Leave the group for this filesystem. This is executed by a userspace 392 * program (stored in ocfs2_hb_ctl_path). 393 */ 394 static void ocfs2_leave_group(const char *group) 395 { 396 int ret; 397 char *argv[5], *envp[3]; 398 399 argv[0] = ocfs2_hb_ctl_path; 400 argv[1] = "-K"; 401 argv[2] = "-u"; 402 argv[3] = (char *)group; 403 argv[4] = NULL; 404 405 /* minimal command environment taken from cpu_run_sbin_hotplug */ 406 envp[0] = "HOME=/"; 407 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 408 envp[2] = NULL; 409 410 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 411 if (ret < 0) { 412 printk(KERN_ERR 413 "ocfs2: Error %d running user helper " 414 "\"%s %s %s %s\"\n", 415 ret, argv[0], argv[1], argv[2], argv[3]); 416 } 417 } 418 419 /* 420 * Hangup is a required post-umount. ocfs2-tools software expects the 421 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens 422 * regardless of whether the DLM got started, so we can't do it 423 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does 424 * the actual work. 425 */ 426 void ocfs2_cluster_hangup(const char *group, int grouplen) 427 { 428 BUG_ON(group == NULL); 429 BUG_ON(group[grouplen] != '\0'); 430 431 ocfs2_leave_group(group); 432 433 /* cluster_disconnect() was called with hangup_pending==1 */ 434 ocfs2_stack_driver_put(); 435 } 436 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); 437 438 int ocfs2_cluster_this_node(unsigned int *node) 439 { 440 return active_stack->sp_ops->this_node(node); 441 } 442 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); 443 444 445 /* 446 * Sysfs bits 447 */ 448 449 static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, 450 struct kobj_attribute *attr, 451 char *buf) 452 { 453 ssize_t ret = 0; 454 455 spin_lock(&ocfs2_stack_lock); 456 if (lproto) 457 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 458 lproto->lp_max_version.pv_major, 459 lproto->lp_max_version.pv_minor); 460 spin_unlock(&ocfs2_stack_lock); 461 462 return ret; 463 } 464 465 static struct kobj_attribute ocfs2_attr_max_locking_protocol = 466 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 467 ocfs2_max_locking_protocol_show, NULL); 468 469 static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 470 struct kobj_attribute *attr, 471 char *buf) 472 { 473 ssize_t ret = 0, total = 0, remain = PAGE_SIZE; 474 struct ocfs2_stack_plugin *p; 475 476 spin_lock(&ocfs2_stack_lock); 477 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 478 ret = snprintf(buf, remain, "%s\n", 479 p->sp_name); 480 if (ret < 0) { 481 total = ret; 482 break; 483 } 484 if (ret == remain) { 485 /* snprintf() didn't fit */ 486 total = -E2BIG; 487 break; 488 } 489 total += ret; 490 remain -= ret; 491 } 492 spin_unlock(&ocfs2_stack_lock); 493 494 return total; 495 } 496 497 static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 498 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 499 ocfs2_loaded_cluster_plugins_show, NULL); 500 501 static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 502 struct kobj_attribute *attr, 503 char *buf) 504 { 505 ssize_t ret = 0; 506 507 spin_lock(&ocfs2_stack_lock); 508 if (active_stack) { 509 ret = snprintf(buf, PAGE_SIZE, "%s\n", 510 active_stack->sp_name); 511 if (ret == PAGE_SIZE) 512 ret = -E2BIG; 513 } 514 spin_unlock(&ocfs2_stack_lock); 515 516 return ret; 517 } 518 519 static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 520 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 521 ocfs2_active_cluster_plugin_show, NULL); 522 523 static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 524 struct kobj_attribute *attr, 525 char *buf) 526 { 527 ssize_t ret; 528 spin_lock(&ocfs2_stack_lock); 529 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); 530 spin_unlock(&ocfs2_stack_lock); 531 532 return ret; 533 } 534 535 static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, 536 struct kobj_attribute *attr, 537 const char *buf, size_t count) 538 { 539 size_t len = count; 540 ssize_t ret; 541 542 if (len == 0) 543 return len; 544 545 if (buf[len - 1] == '\n') 546 len--; 547 548 if ((len != OCFS2_STACK_LABEL_LEN) || 549 (strnlen(buf, len) != len)) 550 return -EINVAL; 551 552 spin_lock(&ocfs2_stack_lock); 553 if (active_stack) { 554 if (!strncmp(buf, cluster_stack_name, len)) 555 ret = count; 556 else 557 ret = -EBUSY; 558 } else { 559 memcpy(cluster_stack_name, buf, len); 560 ret = count; 561 } 562 spin_unlock(&ocfs2_stack_lock); 563 564 return ret; 565 } 566 567 568 static struct kobj_attribute ocfs2_attr_cluster_stack = 569 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 570 ocfs2_cluster_stack_show, 571 ocfs2_cluster_stack_store); 572 573 static struct attribute *ocfs2_attrs[] = { 574 &ocfs2_attr_max_locking_protocol.attr, 575 &ocfs2_attr_loaded_cluster_plugins.attr, 576 &ocfs2_attr_active_cluster_plugin.attr, 577 &ocfs2_attr_cluster_stack.attr, 578 NULL, 579 }; 580 581 static struct attribute_group ocfs2_attr_group = { 582 .attrs = ocfs2_attrs, 583 }; 584 585 static struct kset *ocfs2_kset; 586 587 static void ocfs2_sysfs_exit(void) 588 { 589 kset_unregister(ocfs2_kset); 590 } 591 592 static int ocfs2_sysfs_init(void) 593 { 594 int ret; 595 596 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); 597 if (!ocfs2_kset) 598 return -ENOMEM; 599 600 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); 601 if (ret) 602 goto error; 603 604 return 0; 605 606 error: 607 kset_unregister(ocfs2_kset); 608 return ret; 609 } 610 611 /* 612 * Sysctl bits 613 * 614 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't 615 * make as much sense in a multiple cluster stack world, but it's safer 616 * and easier to preserve the name. 617 */ 618 619 #define FS_OCFS2_NM 1 620 621 static ctl_table ocfs2_nm_table[] = { 622 { 623 .procname = "hb_ctl_path", 624 .data = ocfs2_hb_ctl_path, 625 .maxlen = OCFS2_MAX_HB_CTL_PATH, 626 .mode = 0644, 627 .proc_handler = proc_dostring, 628 }, 629 { } 630 }; 631 632 static ctl_table ocfs2_mod_table[] = { 633 { 634 .procname = "nm", 635 .data = NULL, 636 .maxlen = 0, 637 .mode = 0555, 638 .child = ocfs2_nm_table 639 }, 640 { } 641 }; 642 643 static ctl_table ocfs2_kern_table[] = { 644 { 645 .procname = "ocfs2", 646 .data = NULL, 647 .maxlen = 0, 648 .mode = 0555, 649 .child = ocfs2_mod_table 650 }, 651 { } 652 }; 653 654 static ctl_table ocfs2_root_table[] = { 655 { 656 .procname = "fs", 657 .data = NULL, 658 .maxlen = 0, 659 .mode = 0555, 660 .child = ocfs2_kern_table 661 }, 662 { } 663 }; 664 665 static struct ctl_table_header *ocfs2_table_header = NULL; 666 667 668 /* 669 * Initialization 670 */ 671 672 static int __init ocfs2_stack_glue_init(void) 673 { 674 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 675 676 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 677 if (!ocfs2_table_header) { 678 printk(KERN_ERR 679 "ocfs2 stack glue: unable to register sysctl\n"); 680 return -ENOMEM; /* or something. */ 681 } 682 683 return ocfs2_sysfs_init(); 684 } 685 686 static void __exit ocfs2_stack_glue_exit(void) 687 { 688 lproto = NULL; 689 ocfs2_sysfs_exit(); 690 if (ocfs2_table_header) 691 unregister_sysctl_table(ocfs2_table_header); 692 } 693 694 MODULE_AUTHOR("Oracle"); 695 MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); 696 MODULE_LICENSE("GPL"); 697 module_init(ocfs2_stack_glue_init); 698 module_exit(ocfs2_stack_glue_exit); 699