1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stackglue.c 5 * 6 * Code which implements an OCFS2 specific interface to underlying 7 * cluster stacks. 8 * 9 * Copyright (C) 2007 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation, version 2. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 */ 20 21 #include <linux/list.h> 22 #include <linux/spinlock.h> 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/kmod.h> 26 #include <linux/fs.h> 27 #include <linux/kobject.h> 28 #include <linux/sysfs.h> 29 #include <linux/sysctl.h> 30 31 #include "ocfs2_fs.h" 32 33 #include "stackglue.h" 34 35 #define OCFS2_STACK_PLUGIN_O2CB "o2cb" 36 #define OCFS2_STACK_PLUGIN_USER "user" 37 #define OCFS2_MAX_HB_CTL_PATH 256 38 39 static struct ocfs2_locking_protocol *lproto; 40 static DEFINE_SPINLOCK(ocfs2_stack_lock); 41 static LIST_HEAD(ocfs2_stack_list); 42 static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 43 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 44 45 /* 46 * The stack currently in use. If not null, active_stack->sp_count > 0, 47 * the module is pinned, and the locking protocol cannot be changed. 48 */ 49 static struct ocfs2_stack_plugin *active_stack; 50 51 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 52 { 53 struct ocfs2_stack_plugin *p; 54 55 assert_spin_locked(&ocfs2_stack_lock); 56 57 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 58 if (!strcmp(p->sp_name, name)) 59 return p; 60 } 61 62 return NULL; 63 } 64 65 static int ocfs2_stack_driver_request(const char *stack_name, 66 const char *plugin_name) 67 { 68 int rc; 69 struct ocfs2_stack_plugin *p; 70 71 spin_lock(&ocfs2_stack_lock); 72 73 /* 74 * If the stack passed by the filesystem isn't the selected one, 75 * we can't continue. 76 */ 77 if (strcmp(stack_name, cluster_stack_name)) { 78 rc = -EBUSY; 79 goto out; 80 } 81 82 if (active_stack) { 83 /* 84 * If the active stack isn't the one we want, it cannot 85 * be selected right now. 86 */ 87 if (!strcmp(active_stack->sp_name, plugin_name)) 88 rc = 0; 89 else 90 rc = -EBUSY; 91 goto out; 92 } 93 94 p = ocfs2_stack_lookup(plugin_name); 95 if (!p || !try_module_get(p->sp_owner)) { 96 rc = -ENOENT; 97 goto out; 98 } 99 100 active_stack = p; 101 rc = 0; 102 103 out: 104 /* If we found it, pin it */ 105 if (!rc) 106 active_stack->sp_count++; 107 108 spin_unlock(&ocfs2_stack_lock); 109 return rc; 110 } 111 112 /* 113 * This function looks up the appropriate stack and makes it active. If 114 * there is no stack, it tries to load it. It will fail if the stack still 115 * cannot be found. It will also fail if a different stack is in use. 116 */ 117 static int ocfs2_stack_driver_get(const char *stack_name) 118 { 119 int rc; 120 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; 121 122 /* 123 * Classic stack does not pass in a stack name. This is 124 * compatible with older tools as well. 125 */ 126 if (!stack_name || !*stack_name) 127 stack_name = OCFS2_STACK_PLUGIN_O2CB; 128 129 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { 130 printk(KERN_ERR 131 "ocfs2 passed an invalid cluster stack label: \"%s\"\n", 132 stack_name); 133 return -EINVAL; 134 } 135 136 /* Anything that isn't the classic stack is a user stack */ 137 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) 138 plugin_name = OCFS2_STACK_PLUGIN_USER; 139 140 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 141 if (rc == -ENOENT) { 142 request_module("ocfs2_stack_%s", plugin_name); 143 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 144 } 145 146 if (rc == -ENOENT) { 147 printk(KERN_ERR 148 "ocfs2: Cluster stack driver \"%s\" cannot be found\n", 149 plugin_name); 150 } else if (rc == -EBUSY) { 151 printk(KERN_ERR 152 "ocfs2: A different cluster stack is in use\n"); 153 } 154 155 return rc; 156 } 157 158 static void ocfs2_stack_driver_put(void) 159 { 160 spin_lock(&ocfs2_stack_lock); 161 BUG_ON(active_stack == NULL); 162 BUG_ON(active_stack->sp_count == 0); 163 164 active_stack->sp_count--; 165 if (!active_stack->sp_count) { 166 module_put(active_stack->sp_owner); 167 active_stack = NULL; 168 } 169 spin_unlock(&ocfs2_stack_lock); 170 } 171 172 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) 173 { 174 int rc; 175 176 spin_lock(&ocfs2_stack_lock); 177 if (!ocfs2_stack_lookup(plugin->sp_name)) { 178 plugin->sp_count = 0; 179 plugin->sp_proto = lproto; 180 list_add(&plugin->sp_list, &ocfs2_stack_list); 181 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 182 plugin->sp_name); 183 rc = 0; 184 } else { 185 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", 186 plugin->sp_name); 187 rc = -EEXIST; 188 } 189 spin_unlock(&ocfs2_stack_lock); 190 191 return rc; 192 } 193 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); 194 195 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) 196 { 197 struct ocfs2_stack_plugin *p; 198 199 spin_lock(&ocfs2_stack_lock); 200 p = ocfs2_stack_lookup(plugin->sp_name); 201 if (p) { 202 BUG_ON(p != plugin); 203 BUG_ON(plugin == active_stack); 204 BUG_ON(plugin->sp_count != 0); 205 list_del_init(&plugin->sp_list); 206 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", 207 plugin->sp_name); 208 } else { 209 printk(KERN_ERR "Stack \"%s\" is not registered\n", 210 plugin->sp_name); 211 } 212 spin_unlock(&ocfs2_stack_lock); 213 } 214 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 215 216 void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) 217 { 218 struct ocfs2_stack_plugin *p; 219 220 BUG_ON(proto == NULL); 221 222 spin_lock(&ocfs2_stack_lock); 223 BUG_ON(active_stack != NULL); 224 225 lproto = proto; 226 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 p->sp_proto = lproto; 228 } 229 230 spin_unlock(&ocfs2_stack_lock); 231 } 232 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); 233 234 235 /* 236 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take 237 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the 238 * underlying stack plugins need to pilfer the lksb off of the lock_res. 239 * If some other structure needs to be passed as an astarg, the plugins 240 * will need to be given a different avenue to the lksb. 241 */ 242 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 243 int mode, 244 union ocfs2_dlm_lksb *lksb, 245 u32 flags, 246 void *name, 247 unsigned int namelen, 248 struct ocfs2_lock_res *astarg) 249 { 250 BUG_ON(lproto == NULL); 251 252 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 253 name, namelen, astarg); 254 } 255 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 256 257 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 258 union ocfs2_dlm_lksb *lksb, 259 u32 flags, 260 struct ocfs2_lock_res *astarg) 261 { 262 BUG_ON(lproto == NULL); 263 264 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); 265 } 266 EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 267 268 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 269 { 270 return active_stack->sp_ops->lock_status(lksb); 271 } 272 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 273 274 /* 275 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 276 * don't cast at the glue level. The real answer is that the header 277 * ordering is nigh impossible. 278 */ 279 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 280 { 281 return active_stack->sp_ops->lock_lvb(lksb); 282 } 283 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 284 285 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 286 { 287 active_stack->sp_ops->dump_lksb(lksb); 288 } 289 EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 290 291 int ocfs2_stack_supports_plocks(void) 292 { 293 return active_stack && active_stack->sp_ops->plock; 294 } 295 EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); 296 297 /* 298 * ocfs2_plock() can only be safely called if 299 * ocfs2_stack_supports_plocks() returned true 300 */ 301 int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, 302 struct file *file, int cmd, struct file_lock *fl) 303 { 304 WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); 305 if (active_stack->sp_ops->plock) 306 return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); 307 return -EOPNOTSUPP; 308 } 309 EXPORT_SYMBOL_GPL(ocfs2_plock); 310 311 int ocfs2_cluster_connect(const char *stack_name, 312 const char *group, 313 int grouplen, 314 void (*recovery_handler)(int node_num, 315 void *recovery_data), 316 void *recovery_data, 317 struct ocfs2_cluster_connection **conn) 318 { 319 int rc = 0; 320 struct ocfs2_cluster_connection *new_conn; 321 322 BUG_ON(group == NULL); 323 BUG_ON(conn == NULL); 324 BUG_ON(recovery_handler == NULL); 325 326 if (grouplen > GROUP_NAME_MAX) { 327 rc = -EINVAL; 328 goto out; 329 } 330 331 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 332 GFP_KERNEL); 333 if (!new_conn) { 334 rc = -ENOMEM; 335 goto out; 336 } 337 338 memcpy(new_conn->cc_name, group, grouplen); 339 new_conn->cc_namelen = grouplen; 340 new_conn->cc_recovery_handler = recovery_handler; 341 new_conn->cc_recovery_data = recovery_data; 342 343 /* Start the new connection at our maximum compatibility level */ 344 new_conn->cc_version = lproto->lp_max_version; 345 346 /* This will pin the stack driver if successful */ 347 rc = ocfs2_stack_driver_get(stack_name); 348 if (rc) 349 goto out_free; 350 351 rc = active_stack->sp_ops->connect(new_conn); 352 if (rc) { 353 ocfs2_stack_driver_put(); 354 goto out_free; 355 } 356 357 *conn = new_conn; 358 359 out_free: 360 if (rc) 361 kfree(new_conn); 362 363 out: 364 return rc; 365 } 366 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 367 368 /* If hangup_pending is 0, the stack driver will be dropped */ 369 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 370 int hangup_pending) 371 { 372 int ret; 373 374 BUG_ON(conn == NULL); 375 376 ret = active_stack->sp_ops->disconnect(conn); 377 378 /* XXX Should we free it anyway? */ 379 if (!ret) { 380 kfree(conn); 381 if (!hangup_pending) 382 ocfs2_stack_driver_put(); 383 } 384 385 return ret; 386 } 387 EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 388 389 /* 390 * Leave the group for this filesystem. This is executed by a userspace 391 * program (stored in ocfs2_hb_ctl_path). 392 */ 393 static void ocfs2_leave_group(const char *group) 394 { 395 int ret; 396 char *argv[5], *envp[3]; 397 398 argv[0] = ocfs2_hb_ctl_path; 399 argv[1] = "-K"; 400 argv[2] = "-u"; 401 argv[3] = (char *)group; 402 argv[4] = NULL; 403 404 /* minimal command environment taken from cpu_run_sbin_hotplug */ 405 envp[0] = "HOME=/"; 406 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 407 envp[2] = NULL; 408 409 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 410 if (ret < 0) { 411 printk(KERN_ERR 412 "ocfs2: Error %d running user helper " 413 "\"%s %s %s %s\"\n", 414 ret, argv[0], argv[1], argv[2], argv[3]); 415 } 416 } 417 418 /* 419 * Hangup is a required post-umount. ocfs2-tools software expects the 420 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens 421 * regardless of whether the DLM got started, so we can't do it 422 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does 423 * the actual work. 424 */ 425 void ocfs2_cluster_hangup(const char *group, int grouplen) 426 { 427 BUG_ON(group == NULL); 428 BUG_ON(group[grouplen] != '\0'); 429 430 ocfs2_leave_group(group); 431 432 /* cluster_disconnect() was called with hangup_pending==1 */ 433 ocfs2_stack_driver_put(); 434 } 435 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); 436 437 int ocfs2_cluster_this_node(unsigned int *node) 438 { 439 return active_stack->sp_ops->this_node(node); 440 } 441 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); 442 443 444 /* 445 * Sysfs bits 446 */ 447 448 static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, 449 struct kobj_attribute *attr, 450 char *buf) 451 { 452 ssize_t ret = 0; 453 454 spin_lock(&ocfs2_stack_lock); 455 if (lproto) 456 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 457 lproto->lp_max_version.pv_major, 458 lproto->lp_max_version.pv_minor); 459 spin_unlock(&ocfs2_stack_lock); 460 461 return ret; 462 } 463 464 static struct kobj_attribute ocfs2_attr_max_locking_protocol = 465 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 466 ocfs2_max_locking_protocol_show, NULL); 467 468 static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 469 struct kobj_attribute *attr, 470 char *buf) 471 { 472 ssize_t ret = 0, total = 0, remain = PAGE_SIZE; 473 struct ocfs2_stack_plugin *p; 474 475 spin_lock(&ocfs2_stack_lock); 476 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 477 ret = snprintf(buf, remain, "%s\n", 478 p->sp_name); 479 if (ret < 0) { 480 total = ret; 481 break; 482 } 483 if (ret == remain) { 484 /* snprintf() didn't fit */ 485 total = -E2BIG; 486 break; 487 } 488 total += ret; 489 remain -= ret; 490 } 491 spin_unlock(&ocfs2_stack_lock); 492 493 return total; 494 } 495 496 static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 497 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 498 ocfs2_loaded_cluster_plugins_show, NULL); 499 500 static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 501 struct kobj_attribute *attr, 502 char *buf) 503 { 504 ssize_t ret = 0; 505 506 spin_lock(&ocfs2_stack_lock); 507 if (active_stack) { 508 ret = snprintf(buf, PAGE_SIZE, "%s\n", 509 active_stack->sp_name); 510 if (ret == PAGE_SIZE) 511 ret = -E2BIG; 512 } 513 spin_unlock(&ocfs2_stack_lock); 514 515 return ret; 516 } 517 518 static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 519 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 520 ocfs2_active_cluster_plugin_show, NULL); 521 522 static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 523 struct kobj_attribute *attr, 524 char *buf) 525 { 526 ssize_t ret; 527 spin_lock(&ocfs2_stack_lock); 528 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); 529 spin_unlock(&ocfs2_stack_lock); 530 531 return ret; 532 } 533 534 static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, 535 struct kobj_attribute *attr, 536 const char *buf, size_t count) 537 { 538 size_t len = count; 539 ssize_t ret; 540 541 if (len == 0) 542 return len; 543 544 if (buf[len - 1] == '\n') 545 len--; 546 547 if ((len != OCFS2_STACK_LABEL_LEN) || 548 (strnlen(buf, len) != len)) 549 return -EINVAL; 550 551 spin_lock(&ocfs2_stack_lock); 552 if (active_stack) { 553 if (!strncmp(buf, cluster_stack_name, len)) 554 ret = count; 555 else 556 ret = -EBUSY; 557 } else { 558 memcpy(cluster_stack_name, buf, len); 559 ret = count; 560 } 561 spin_unlock(&ocfs2_stack_lock); 562 563 return ret; 564 } 565 566 567 static struct kobj_attribute ocfs2_attr_cluster_stack = 568 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 569 ocfs2_cluster_stack_show, 570 ocfs2_cluster_stack_store); 571 572 static struct attribute *ocfs2_attrs[] = { 573 &ocfs2_attr_max_locking_protocol.attr, 574 &ocfs2_attr_loaded_cluster_plugins.attr, 575 &ocfs2_attr_active_cluster_plugin.attr, 576 &ocfs2_attr_cluster_stack.attr, 577 NULL, 578 }; 579 580 static struct attribute_group ocfs2_attr_group = { 581 .attrs = ocfs2_attrs, 582 }; 583 584 static struct kset *ocfs2_kset; 585 586 static void ocfs2_sysfs_exit(void) 587 { 588 kset_unregister(ocfs2_kset); 589 } 590 591 static int ocfs2_sysfs_init(void) 592 { 593 int ret; 594 595 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); 596 if (!ocfs2_kset) 597 return -ENOMEM; 598 599 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); 600 if (ret) 601 goto error; 602 603 return 0; 604 605 error: 606 kset_unregister(ocfs2_kset); 607 return ret; 608 } 609 610 /* 611 * Sysctl bits 612 * 613 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't 614 * make as much sense in a multiple cluster stack world, but it's safer 615 * and easier to preserve the name. 616 */ 617 618 #define FS_OCFS2_NM 1 619 620 static ctl_table ocfs2_nm_table[] = { 621 { 622 .ctl_name = 1, 623 .procname = "hb_ctl_path", 624 .data = ocfs2_hb_ctl_path, 625 .maxlen = OCFS2_MAX_HB_CTL_PATH, 626 .mode = 0644, 627 .proc_handler = &proc_dostring, 628 .strategy = &sysctl_string, 629 }, 630 { .ctl_name = 0 } 631 }; 632 633 static ctl_table ocfs2_mod_table[] = { 634 { 635 .ctl_name = FS_OCFS2_NM, 636 .procname = "nm", 637 .data = NULL, 638 .maxlen = 0, 639 .mode = 0555, 640 .child = ocfs2_nm_table 641 }, 642 { .ctl_name = 0} 643 }; 644 645 static ctl_table ocfs2_kern_table[] = { 646 { 647 .ctl_name = FS_OCFS2, 648 .procname = "ocfs2", 649 .data = NULL, 650 .maxlen = 0, 651 .mode = 0555, 652 .child = ocfs2_mod_table 653 }, 654 { .ctl_name = 0} 655 }; 656 657 static ctl_table ocfs2_root_table[] = { 658 { 659 .ctl_name = CTL_FS, 660 .procname = "fs", 661 .data = NULL, 662 .maxlen = 0, 663 .mode = 0555, 664 .child = ocfs2_kern_table 665 }, 666 { .ctl_name = 0 } 667 }; 668 669 static struct ctl_table_header *ocfs2_table_header = NULL; 670 671 672 /* 673 * Initialization 674 */ 675 676 static int __init ocfs2_stack_glue_init(void) 677 { 678 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 679 680 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 681 if (!ocfs2_table_header) { 682 printk(KERN_ERR 683 "ocfs2 stack glue: unable to register sysctl\n"); 684 return -ENOMEM; /* or something. */ 685 } 686 687 return ocfs2_sysfs_init(); 688 } 689 690 static void __exit ocfs2_stack_glue_exit(void) 691 { 692 lproto = NULL; 693 ocfs2_sysfs_exit(); 694 if (ocfs2_table_header) 695 unregister_sysctl_table(ocfs2_table_header); 696 } 697 698 MODULE_AUTHOR("Oracle"); 699 MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); 700 MODULE_LICENSE("GPL"); 701 module_init(ocfs2_stack_glue_init); 702 module_exit(ocfs2_stack_glue_exit); 703