1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stackglue.c 5 * 6 * Code which implements an OCFS2 specific interface to underlying 7 * cluster stacks. 8 * 9 * Copyright (C) 2007 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation, version 2. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 */ 20 21 #include <linux/list.h> 22 #include <linux/spinlock.h> 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/kmod.h> 26 #include <linux/fs.h> 27 #include <linux/kobject.h> 28 #include <linux/sysfs.h> 29 #include <linux/sysctl.h> 30 31 #include "ocfs2_fs.h" 32 33 #include "stackglue.h" 34 35 #define OCFS2_STACK_PLUGIN_O2CB "o2cb" 36 #define OCFS2_STACK_PLUGIN_USER "user" 37 #define OCFS2_MAX_HB_CTL_PATH 256 38 39 static struct ocfs2_locking_protocol *lproto; 40 static DEFINE_SPINLOCK(ocfs2_stack_lock); 41 static LIST_HEAD(ocfs2_stack_list); 42 static char cluster_stack_name[OCFS2_STACK_LABEL_LEN + 1]; 43 static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; 44 45 /* 46 * The stack currently in use. If not null, active_stack->sp_count > 0, 47 * the module is pinned, and the locking protocol cannot be changed. 48 */ 49 static struct ocfs2_stack_plugin *active_stack; 50 51 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) 52 { 53 struct ocfs2_stack_plugin *p; 54 55 assert_spin_locked(&ocfs2_stack_lock); 56 57 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 58 if (!strcmp(p->sp_name, name)) 59 return p; 60 } 61 62 return NULL; 63 } 64 65 static int ocfs2_stack_driver_request(const char *stack_name, 66 const char *plugin_name) 67 { 68 int rc; 69 struct ocfs2_stack_plugin *p; 70 71 spin_lock(&ocfs2_stack_lock); 72 73 /* 74 * If the stack passed by the filesystem isn't the selected one, 75 * we can't continue. 76 */ 77 if (strcmp(stack_name, cluster_stack_name)) { 78 rc = -EBUSY; 79 goto out; 80 } 81 82 if (active_stack) { 83 /* 84 * If the active stack isn't the one we want, it cannot 85 * be selected right now. 86 */ 87 if (!strcmp(active_stack->sp_name, plugin_name)) 88 rc = 0; 89 else 90 rc = -EBUSY; 91 goto out; 92 } 93 94 p = ocfs2_stack_lookup(plugin_name); 95 if (!p || !try_module_get(p->sp_owner)) { 96 rc = -ENOENT; 97 goto out; 98 } 99 100 active_stack = p; 101 rc = 0; 102 103 out: 104 /* If we found it, pin it */ 105 if (!rc) 106 active_stack->sp_count++; 107 108 spin_unlock(&ocfs2_stack_lock); 109 return rc; 110 } 111 112 /* 113 * This function looks up the appropriate stack and makes it active. If 114 * there is no stack, it tries to load it. It will fail if the stack still 115 * cannot be found. It will also fail if a different stack is in use. 116 */ 117 static int ocfs2_stack_driver_get(const char *stack_name) 118 { 119 int rc; 120 char *plugin_name = OCFS2_STACK_PLUGIN_O2CB; 121 122 /* 123 * Classic stack does not pass in a stack name. This is 124 * compatible with older tools as well. 125 */ 126 if (!stack_name || !*stack_name) 127 stack_name = OCFS2_STACK_PLUGIN_O2CB; 128 129 if (strlen(stack_name) != OCFS2_STACK_LABEL_LEN) { 130 printk(KERN_ERR 131 "ocfs2 passed an invalid cluster stack label: \"%s\"\n", 132 stack_name); 133 return -EINVAL; 134 } 135 136 /* Anything that isn't the classic stack is a user stack */ 137 if (strcmp(stack_name, OCFS2_STACK_PLUGIN_O2CB)) 138 plugin_name = OCFS2_STACK_PLUGIN_USER; 139 140 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 141 if (rc == -ENOENT) { 142 request_module("ocfs2_stack_%s", plugin_name); 143 rc = ocfs2_stack_driver_request(stack_name, plugin_name); 144 } 145 146 if (rc == -ENOENT) { 147 printk(KERN_ERR 148 "ocfs2: Cluster stack driver \"%s\" cannot be found\n", 149 plugin_name); 150 } else if (rc == -EBUSY) { 151 printk(KERN_ERR 152 "ocfs2: A different cluster stack is in use\n"); 153 } 154 155 return rc; 156 } 157 158 static void ocfs2_stack_driver_put(void) 159 { 160 spin_lock(&ocfs2_stack_lock); 161 BUG_ON(active_stack == NULL); 162 BUG_ON(active_stack->sp_count == 0); 163 164 active_stack->sp_count--; 165 if (!active_stack->sp_count) { 166 module_put(active_stack->sp_owner); 167 active_stack = NULL; 168 } 169 spin_unlock(&ocfs2_stack_lock); 170 } 171 172 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin) 173 { 174 int rc; 175 176 spin_lock(&ocfs2_stack_lock); 177 if (!ocfs2_stack_lookup(plugin->sp_name)) { 178 plugin->sp_count = 0; 179 plugin->sp_proto = lproto; 180 list_add(&plugin->sp_list, &ocfs2_stack_list); 181 printk(KERN_INFO "ocfs2: Registered cluster interface %s\n", 182 plugin->sp_name); 183 rc = 0; 184 } else { 185 printk(KERN_ERR "ocfs2: Stack \"%s\" already registered\n", 186 plugin->sp_name); 187 rc = -EEXIST; 188 } 189 spin_unlock(&ocfs2_stack_lock); 190 191 return rc; 192 } 193 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_register); 194 195 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin) 196 { 197 struct ocfs2_stack_plugin *p; 198 199 spin_lock(&ocfs2_stack_lock); 200 p = ocfs2_stack_lookup(plugin->sp_name); 201 if (p) { 202 BUG_ON(p != plugin); 203 BUG_ON(plugin == active_stack); 204 BUG_ON(plugin->sp_count != 0); 205 list_del_init(&plugin->sp_list); 206 printk(KERN_INFO "ocfs2: Unregistered cluster interface %s\n", 207 plugin->sp_name); 208 } else { 209 printk(KERN_ERR "Stack \"%s\" is not registered\n", 210 plugin->sp_name); 211 } 212 spin_unlock(&ocfs2_stack_lock); 213 } 214 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_unregister); 215 216 void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto) 217 { 218 struct ocfs2_stack_plugin *p; 219 220 BUG_ON(proto == NULL); 221 222 spin_lock(&ocfs2_stack_lock); 223 BUG_ON(active_stack != NULL); 224 225 lproto = proto; 226 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 227 p->sp_proto = lproto; 228 } 229 230 spin_unlock(&ocfs2_stack_lock); 231 } 232 EXPORT_SYMBOL_GPL(ocfs2_stack_glue_set_locking_protocol); 233 234 235 /* 236 * The ocfs2_dlm_lock() and ocfs2_dlm_unlock() functions take 237 * "struct ocfs2_lock_res *astarg" instead of "void *astarg" because the 238 * underlying stack plugins need to pilfer the lksb off of the lock_res. 239 * If some other structure needs to be passed as an astarg, the plugins 240 * will need to be given a different avenue to the lksb. 241 */ 242 int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn, 243 int mode, 244 union ocfs2_dlm_lksb *lksb, 245 u32 flags, 246 void *name, 247 unsigned int namelen, 248 struct ocfs2_lock_res *astarg) 249 { 250 BUG_ON(lproto == NULL); 251 252 return active_stack->sp_ops->dlm_lock(conn, mode, lksb, flags, 253 name, namelen, astarg); 254 } 255 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock); 256 257 int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, 258 union ocfs2_dlm_lksb *lksb, 259 u32 flags, 260 struct ocfs2_lock_res *astarg) 261 { 262 BUG_ON(lproto == NULL); 263 264 return active_stack->sp_ops->dlm_unlock(conn, lksb, flags, astarg); 265 } 266 EXPORT_SYMBOL_GPL(ocfs2_dlm_unlock); 267 268 int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 269 { 270 return active_stack->sp_ops->lock_status(lksb); 271 } 272 EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); 273 274 /* 275 * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we 276 * don't cast at the glue level. The real answer is that the header 277 * ordering is nigh impossible. 278 */ 279 void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) 280 { 281 return active_stack->sp_ops->lock_lvb(lksb); 282 } 283 EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb); 284 285 void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 286 { 287 active_stack->sp_ops->dump_lksb(lksb); 288 } 289 EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 290 291 int ocfs2_cluster_connect(const char *stack_name, 292 const char *group, 293 int grouplen, 294 void (*recovery_handler)(int node_num, 295 void *recovery_data), 296 void *recovery_data, 297 struct ocfs2_cluster_connection **conn) 298 { 299 int rc = 0; 300 struct ocfs2_cluster_connection *new_conn; 301 302 BUG_ON(group == NULL); 303 BUG_ON(conn == NULL); 304 BUG_ON(recovery_handler == NULL); 305 306 if (grouplen > GROUP_NAME_MAX) { 307 rc = -EINVAL; 308 goto out; 309 } 310 311 new_conn = kzalloc(sizeof(struct ocfs2_cluster_connection), 312 GFP_KERNEL); 313 if (!new_conn) { 314 rc = -ENOMEM; 315 goto out; 316 } 317 318 memcpy(new_conn->cc_name, group, grouplen); 319 new_conn->cc_namelen = grouplen; 320 new_conn->cc_recovery_handler = recovery_handler; 321 new_conn->cc_recovery_data = recovery_data; 322 323 /* Start the new connection at our maximum compatibility level */ 324 new_conn->cc_version = lproto->lp_max_version; 325 326 /* This will pin the stack driver if successful */ 327 rc = ocfs2_stack_driver_get(stack_name); 328 if (rc) 329 goto out_free; 330 331 rc = active_stack->sp_ops->connect(new_conn); 332 if (rc) { 333 ocfs2_stack_driver_put(); 334 goto out_free; 335 } 336 337 *conn = new_conn; 338 339 out_free: 340 if (rc) 341 kfree(new_conn); 342 343 out: 344 return rc; 345 } 346 EXPORT_SYMBOL_GPL(ocfs2_cluster_connect); 347 348 /* If hangup_pending is 0, the stack driver will be dropped */ 349 int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn, 350 int hangup_pending) 351 { 352 int ret; 353 354 BUG_ON(conn == NULL); 355 356 ret = active_stack->sp_ops->disconnect(conn); 357 358 /* XXX Should we free it anyway? */ 359 if (!ret) { 360 kfree(conn); 361 if (!hangup_pending) 362 ocfs2_stack_driver_put(); 363 } 364 365 return ret; 366 } 367 EXPORT_SYMBOL_GPL(ocfs2_cluster_disconnect); 368 369 /* 370 * Leave the group for this filesystem. This is executed by a userspace 371 * program (stored in ocfs2_hb_ctl_path). 372 */ 373 static void ocfs2_leave_group(const char *group) 374 { 375 int ret; 376 char *argv[5], *envp[3]; 377 378 argv[0] = ocfs2_hb_ctl_path; 379 argv[1] = "-K"; 380 argv[2] = "-u"; 381 argv[3] = (char *)group; 382 argv[4] = NULL; 383 384 /* minimal command environment taken from cpu_run_sbin_hotplug */ 385 envp[0] = "HOME=/"; 386 envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 387 envp[2] = NULL; 388 389 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 390 if (ret < 0) { 391 printk(KERN_ERR 392 "ocfs2: Error %d running user helper " 393 "\"%s %s %s %s\"\n", 394 ret, argv[0], argv[1], argv[2], argv[3]); 395 } 396 } 397 398 /* 399 * Hangup is a required post-umount. ocfs2-tools software expects the 400 * filesystem to call "ocfs2_hb_ctl" during unmount. This happens 401 * regardless of whether the DLM got started, so we can't do it 402 * in ocfs2_cluster_disconnect(). The ocfs2_leave_group() function does 403 * the actual work. 404 */ 405 void ocfs2_cluster_hangup(const char *group, int grouplen) 406 { 407 BUG_ON(group == NULL); 408 BUG_ON(group[grouplen] != '\0'); 409 410 ocfs2_leave_group(group); 411 412 /* cluster_disconnect() was called with hangup_pending==1 */ 413 ocfs2_stack_driver_put(); 414 } 415 EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup); 416 417 int ocfs2_cluster_this_node(unsigned int *node) 418 { 419 return active_stack->sp_ops->this_node(node); 420 } 421 EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node); 422 423 424 /* 425 * Sysfs bits 426 */ 427 428 static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, 429 struct kobj_attribute *attr, 430 char *buf) 431 { 432 ssize_t ret = 0; 433 434 spin_lock(&ocfs2_stack_lock); 435 if (lproto) 436 ret = snprintf(buf, PAGE_SIZE, "%u.%u\n", 437 lproto->lp_max_version.pv_major, 438 lproto->lp_max_version.pv_minor); 439 spin_unlock(&ocfs2_stack_lock); 440 441 return ret; 442 } 443 444 static struct kobj_attribute ocfs2_attr_max_locking_protocol = 445 __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, 446 ocfs2_max_locking_protocol_show, NULL); 447 448 static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, 449 struct kobj_attribute *attr, 450 char *buf) 451 { 452 ssize_t ret = 0, total = 0, remain = PAGE_SIZE; 453 struct ocfs2_stack_plugin *p; 454 455 spin_lock(&ocfs2_stack_lock); 456 list_for_each_entry(p, &ocfs2_stack_list, sp_list) { 457 ret = snprintf(buf, remain, "%s\n", 458 p->sp_name); 459 if (ret < 0) { 460 total = ret; 461 break; 462 } 463 if (ret == remain) { 464 /* snprintf() didn't fit */ 465 total = -E2BIG; 466 break; 467 } 468 total += ret; 469 remain -= ret; 470 } 471 spin_unlock(&ocfs2_stack_lock); 472 473 return total; 474 } 475 476 static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = 477 __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, 478 ocfs2_loaded_cluster_plugins_show, NULL); 479 480 static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, 481 struct kobj_attribute *attr, 482 char *buf) 483 { 484 ssize_t ret = 0; 485 486 spin_lock(&ocfs2_stack_lock); 487 if (active_stack) { 488 ret = snprintf(buf, PAGE_SIZE, "%s\n", 489 active_stack->sp_name); 490 if (ret == PAGE_SIZE) 491 ret = -E2BIG; 492 } 493 spin_unlock(&ocfs2_stack_lock); 494 495 return ret; 496 } 497 498 static struct kobj_attribute ocfs2_attr_active_cluster_plugin = 499 __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, 500 ocfs2_active_cluster_plugin_show, NULL); 501 502 static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, 503 struct kobj_attribute *attr, 504 char *buf) 505 { 506 ssize_t ret; 507 spin_lock(&ocfs2_stack_lock); 508 ret = snprintf(buf, PAGE_SIZE, "%s\n", cluster_stack_name); 509 spin_unlock(&ocfs2_stack_lock); 510 511 return ret; 512 } 513 514 static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, 515 struct kobj_attribute *attr, 516 const char *buf, size_t count) 517 { 518 size_t len = count; 519 ssize_t ret; 520 521 if (len == 0) 522 return len; 523 524 if (buf[len - 1] == '\n') 525 len--; 526 527 if ((len != OCFS2_STACK_LABEL_LEN) || 528 (strnlen(buf, len) != len)) 529 return -EINVAL; 530 531 spin_lock(&ocfs2_stack_lock); 532 if (active_stack) { 533 if (!strncmp(buf, cluster_stack_name, len)) 534 ret = count; 535 else 536 ret = -EBUSY; 537 } else { 538 memcpy(cluster_stack_name, buf, len); 539 ret = count; 540 } 541 spin_unlock(&ocfs2_stack_lock); 542 543 return ret; 544 } 545 546 547 static struct kobj_attribute ocfs2_attr_cluster_stack = 548 __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, 549 ocfs2_cluster_stack_show, 550 ocfs2_cluster_stack_store); 551 552 static struct attribute *ocfs2_attrs[] = { 553 &ocfs2_attr_max_locking_protocol.attr, 554 &ocfs2_attr_loaded_cluster_plugins.attr, 555 &ocfs2_attr_active_cluster_plugin.attr, 556 &ocfs2_attr_cluster_stack.attr, 557 NULL, 558 }; 559 560 static struct attribute_group ocfs2_attr_group = { 561 .attrs = ocfs2_attrs, 562 }; 563 564 static struct kset *ocfs2_kset; 565 566 static void ocfs2_sysfs_exit(void) 567 { 568 kset_unregister(ocfs2_kset); 569 } 570 571 static int ocfs2_sysfs_init(void) 572 { 573 int ret; 574 575 ocfs2_kset = kset_create_and_add("ocfs2", NULL, fs_kobj); 576 if (!ocfs2_kset) 577 return -ENOMEM; 578 579 ret = sysfs_create_group(&ocfs2_kset->kobj, &ocfs2_attr_group); 580 if (ret) 581 goto error; 582 583 return 0; 584 585 error: 586 kset_unregister(ocfs2_kset); 587 return ret; 588 } 589 590 /* 591 * Sysctl bits 592 * 593 * The sysctl lives at /proc/sys/fs/ocfs2/nm/hb_ctl_path. The 'nm' doesn't 594 * make as much sense in a multiple cluster stack world, but it's safer 595 * and easier to preserve the name. 596 */ 597 598 #define FS_OCFS2_NM 1 599 600 static ctl_table ocfs2_nm_table[] = { 601 { 602 .ctl_name = 1, 603 .procname = "hb_ctl_path", 604 .data = ocfs2_hb_ctl_path, 605 .maxlen = OCFS2_MAX_HB_CTL_PATH, 606 .mode = 0644, 607 .proc_handler = &proc_dostring, 608 .strategy = &sysctl_string, 609 }, 610 { .ctl_name = 0 } 611 }; 612 613 static ctl_table ocfs2_mod_table[] = { 614 { 615 .ctl_name = FS_OCFS2_NM, 616 .procname = "nm", 617 .data = NULL, 618 .maxlen = 0, 619 .mode = 0555, 620 .child = ocfs2_nm_table 621 }, 622 { .ctl_name = 0} 623 }; 624 625 static ctl_table ocfs2_kern_table[] = { 626 { 627 .ctl_name = FS_OCFS2, 628 .procname = "ocfs2", 629 .data = NULL, 630 .maxlen = 0, 631 .mode = 0555, 632 .child = ocfs2_mod_table 633 }, 634 { .ctl_name = 0} 635 }; 636 637 static ctl_table ocfs2_root_table[] = { 638 { 639 .ctl_name = CTL_FS, 640 .procname = "fs", 641 .data = NULL, 642 .maxlen = 0, 643 .mode = 0555, 644 .child = ocfs2_kern_table 645 }, 646 { .ctl_name = 0 } 647 }; 648 649 static struct ctl_table_header *ocfs2_table_header = NULL; 650 651 652 /* 653 * Initialization 654 */ 655 656 static int __init ocfs2_stack_glue_init(void) 657 { 658 strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); 659 660 ocfs2_table_header = register_sysctl_table(ocfs2_root_table); 661 if (!ocfs2_table_header) { 662 printk(KERN_ERR 663 "ocfs2 stack glue: unable to register sysctl\n"); 664 return -ENOMEM; /* or something. */ 665 } 666 667 return ocfs2_sysfs_init(); 668 } 669 670 static void __exit ocfs2_stack_glue_exit(void) 671 { 672 lproto = NULL; 673 ocfs2_sysfs_exit(); 674 if (ocfs2_table_header) 675 unregister_sysctl_table(ocfs2_table_header); 676 } 677 678 MODULE_AUTHOR("Oracle"); 679 MODULE_DESCRIPTION("ocfs2 cluter stack glue layer"); 680 MODULE_LICENSE("GPL"); 681 module_init(ocfs2_stack_glue_init); 682 module_exit(ocfs2_stack_glue_exit); 683