1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stack_user.c 5 * 6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack. 7 * 8 * Copyright (C) 2007 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation, version 2. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 */ 19 20 #include <linux/module.h> 21 #include <linux/fs.h> 22 #include <linux/miscdevice.h> 23 #include <linux/mutex.h> 24 #include <linux/smp_lock.h> 25 #include <linux/reboot.h> 26 #include <asm/uaccess.h> 27 28 #include "ocfs2.h" /* For struct ocfs2_lock_res */ 29 #include "stackglue.h" 30 31 #include <linux/dlm_plock.h> 32 33 /* 34 * The control protocol starts with a handshake. Until the handshake 35 * is complete, the control device will fail all write(2)s. 36 * 37 * The handshake is simple. First, the client reads until EOF. Each line 38 * of output is a supported protocol tag. All protocol tags are a single 39 * character followed by a two hex digit version number. Currently the 40 * only things supported is T01, for "Text-base version 0x01". Next, the 41 * client writes the version they would like to use, including the newline. 42 * Thus, the protocol tag is 'T01\n'. If the version tag written is 43 * unknown, -EINVAL is returned. Once the negotiation is complete, the 44 * client can start sending messages. 45 * 46 * The T01 protocol has three messages. First is the "SETN" message. 47 * It has the following syntax: 48 * 49 * SETN<space><8-char-hex-nodenum><newline> 50 * 51 * This is 14 characters. 52 * 53 * The "SETN" message must be the first message following the protocol. 54 * It tells ocfs2_control the local node number. 55 * 56 * Next comes the "SETV" message. It has the following syntax: 57 * 58 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> 59 * 60 * This is 11 characters. 61 * 62 * The "SETV" message sets the filesystem locking protocol version as 63 * negotiated by the client. The client negotiates based on the maximum 64 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 65 * number from the "SETV" message must match 66 * ocfs2_user_plugin.sp_proto->lp_max_version.pv_major, and the minor number 67 * must be less than or equal to ...->lp_max_version.pv_minor. 68 * 69 * Once this information has been set, mounts will be allowed. From this 70 * point on, the "DOWN" message can be sent for node down notification. 71 * It has the following syntax: 72 * 73 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> 74 * 75 * eg: 76 * 77 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n 78 * 79 * This is 47 characters. 80 */ 81 82 /* 83 * Whether or not the client has done the handshake. 84 * For now, we have just one protocol version. 85 */ 86 #define OCFS2_CONTROL_PROTO "T01\n" 87 #define OCFS2_CONTROL_PROTO_LEN 4 88 89 /* Handshake states */ 90 #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) 91 #define OCFS2_CONTROL_HANDSHAKE_READ (1) 92 #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) 93 #define OCFS2_CONTROL_HANDSHAKE_VALID (3) 94 95 /* Messages */ 96 #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 97 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" 98 #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 99 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" 100 #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 101 #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" 102 #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 103 #define OCFS2_TEXT_UUID_LEN 32 104 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 105 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 106 107 /* 108 * ocfs2_live_connection is refcounted because the filesystem and 109 * miscdevice sides can detach in different order. Let's just be safe. 110 */ 111 struct ocfs2_live_connection { 112 struct list_head oc_list; 113 struct ocfs2_cluster_connection *oc_conn; 114 }; 115 116 struct ocfs2_control_private { 117 struct list_head op_list; 118 int op_state; 119 int op_this_node; 120 struct ocfs2_protocol_version op_proto; 121 }; 122 123 /* SETN<space><8-char-hex-nodenum><newline> */ 124 struct ocfs2_control_message_setn { 125 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 126 char space; 127 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 128 char newline; 129 }; 130 131 /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ 132 struct ocfs2_control_message_setv { 133 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 134 char space1; 135 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 136 char space2; 137 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 138 char newline; 139 }; 140 141 /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ 142 struct ocfs2_control_message_down { 143 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 144 char space1; 145 char uuid[OCFS2_TEXT_UUID_LEN]; 146 char space2; 147 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 148 char newline; 149 }; 150 151 union ocfs2_control_message { 152 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 153 struct ocfs2_control_message_setn u_setn; 154 struct ocfs2_control_message_setv u_setv; 155 struct ocfs2_control_message_down u_down; 156 }; 157 158 static struct ocfs2_stack_plugin ocfs2_user_plugin; 159 160 static atomic_t ocfs2_control_opened; 161 static int ocfs2_control_this_node = -1; 162 static struct ocfs2_protocol_version running_proto; 163 164 static LIST_HEAD(ocfs2_live_connection_list); 165 static LIST_HEAD(ocfs2_control_private_list); 166 static DEFINE_MUTEX(ocfs2_control_lock); 167 168 static inline void ocfs2_control_set_handshake_state(struct file *file, 169 int state) 170 { 171 struct ocfs2_control_private *p = file->private_data; 172 p->op_state = state; 173 } 174 175 static inline int ocfs2_control_get_handshake_state(struct file *file) 176 { 177 struct ocfs2_control_private *p = file->private_data; 178 return p->op_state; 179 } 180 181 static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) 182 { 183 size_t len = strlen(name); 184 struct ocfs2_live_connection *c; 185 186 BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); 187 188 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { 189 if ((c->oc_conn->cc_namelen == len) && 190 !strncmp(c->oc_conn->cc_name, name, len)) 191 return c; 192 } 193 194 return c; 195 } 196 197 /* 198 * ocfs2_live_connection structures are created underneath the ocfs2 199 * mount path. Since the VFS prevents multiple calls to 200 * fill_super(), we can't get dupes here. 201 */ 202 static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, 203 struct ocfs2_live_connection **c_ret) 204 { 205 int rc = 0; 206 struct ocfs2_live_connection *c; 207 208 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 209 if (!c) 210 return -ENOMEM; 211 212 mutex_lock(&ocfs2_control_lock); 213 c->oc_conn = conn; 214 215 if (atomic_read(&ocfs2_control_opened)) 216 list_add(&c->oc_list, &ocfs2_live_connection_list); 217 else { 218 printk(KERN_ERR 219 "ocfs2: Userspace control daemon is not present\n"); 220 rc = -ESRCH; 221 } 222 223 mutex_unlock(&ocfs2_control_lock); 224 225 if (!rc) 226 *c_ret = c; 227 else 228 kfree(c); 229 230 return rc; 231 } 232 233 /* 234 * This function disconnects the cluster connection from ocfs2_control. 235 * Afterwards, userspace can't affect the cluster connection. 236 */ 237 static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) 238 { 239 mutex_lock(&ocfs2_control_lock); 240 list_del_init(&c->oc_list); 241 c->oc_conn = NULL; 242 mutex_unlock(&ocfs2_control_lock); 243 244 kfree(c); 245 } 246 247 static int ocfs2_control_cfu(void *target, size_t target_len, 248 const char __user *buf, size_t count) 249 { 250 /* The T01 expects write(2) calls to have exactly one command */ 251 if ((count != target_len) || 252 (count > sizeof(union ocfs2_control_message))) 253 return -EINVAL; 254 255 if (copy_from_user(target, buf, target_len)) 256 return -EFAULT; 257 258 return 0; 259 } 260 261 static ssize_t ocfs2_control_validate_protocol(struct file *file, 262 const char __user *buf, 263 size_t count) 264 { 265 ssize_t ret; 266 char kbuf[OCFS2_CONTROL_PROTO_LEN]; 267 268 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, 269 buf, count); 270 if (ret) 271 return ret; 272 273 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) 274 return -EINVAL; 275 276 ocfs2_control_set_handshake_state(file, 277 OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 278 279 return count; 280 } 281 282 static void ocfs2_control_send_down(const char *uuid, 283 int nodenum) 284 { 285 struct ocfs2_live_connection *c; 286 287 mutex_lock(&ocfs2_control_lock); 288 289 c = ocfs2_connection_find(uuid); 290 if (c) { 291 BUG_ON(c->oc_conn == NULL); 292 c->oc_conn->cc_recovery_handler(nodenum, 293 c->oc_conn->cc_recovery_data); 294 } 295 296 mutex_unlock(&ocfs2_control_lock); 297 } 298 299 /* 300 * Called whenever configuration elements are sent to /dev/ocfs2_control. 301 * If all configuration elements are present, try to set the global 302 * values. If there is a problem, return an error. Skip any missing 303 * elements, and only bump ocfs2_control_opened when we have all elements 304 * and are successful. 305 */ 306 static int ocfs2_control_install_private(struct file *file) 307 { 308 int rc = 0; 309 int set_p = 1; 310 struct ocfs2_control_private *p = file->private_data; 311 312 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 313 314 mutex_lock(&ocfs2_control_lock); 315 316 if (p->op_this_node < 0) { 317 set_p = 0; 318 } else if ((ocfs2_control_this_node >= 0) && 319 (ocfs2_control_this_node != p->op_this_node)) { 320 rc = -EINVAL; 321 goto out_unlock; 322 } 323 324 if (!p->op_proto.pv_major) { 325 set_p = 0; 326 } else if (!list_empty(&ocfs2_live_connection_list) && 327 ((running_proto.pv_major != p->op_proto.pv_major) || 328 (running_proto.pv_minor != p->op_proto.pv_minor))) { 329 rc = -EINVAL; 330 goto out_unlock; 331 } 332 333 if (set_p) { 334 ocfs2_control_this_node = p->op_this_node; 335 running_proto.pv_major = p->op_proto.pv_major; 336 running_proto.pv_minor = p->op_proto.pv_minor; 337 } 338 339 out_unlock: 340 mutex_unlock(&ocfs2_control_lock); 341 342 if (!rc && set_p) { 343 /* We set the global values successfully */ 344 atomic_inc(&ocfs2_control_opened); 345 ocfs2_control_set_handshake_state(file, 346 OCFS2_CONTROL_HANDSHAKE_VALID); 347 } 348 349 return rc; 350 } 351 352 static int ocfs2_control_get_this_node(void) 353 { 354 int rc; 355 356 mutex_lock(&ocfs2_control_lock); 357 if (ocfs2_control_this_node < 0) 358 rc = -EINVAL; 359 else 360 rc = ocfs2_control_this_node; 361 mutex_unlock(&ocfs2_control_lock); 362 363 return rc; 364 } 365 366 static int ocfs2_control_do_setnode_msg(struct file *file, 367 struct ocfs2_control_message_setn *msg) 368 { 369 long nodenum; 370 char *ptr = NULL; 371 struct ocfs2_control_private *p = file->private_data; 372 373 if (ocfs2_control_get_handshake_state(file) != 374 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 375 return -EINVAL; 376 377 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 378 OCFS2_CONTROL_MESSAGE_OP_LEN)) 379 return -EINVAL; 380 381 if ((msg->space != ' ') || (msg->newline != '\n')) 382 return -EINVAL; 383 msg->space = msg->newline = '\0'; 384 385 nodenum = simple_strtol(msg->nodestr, &ptr, 16); 386 if (!ptr || *ptr) 387 return -EINVAL; 388 389 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 390 (nodenum > INT_MAX) || (nodenum < 0)) 391 return -ERANGE; 392 p->op_this_node = nodenum; 393 394 return ocfs2_control_install_private(file); 395 } 396 397 static int ocfs2_control_do_setversion_msg(struct file *file, 398 struct ocfs2_control_message_setv *msg) 399 { 400 long major, minor; 401 char *ptr = NULL; 402 struct ocfs2_control_private *p = file->private_data; 403 struct ocfs2_protocol_version *max = 404 &ocfs2_user_plugin.sp_proto->lp_max_version; 405 406 if (ocfs2_control_get_handshake_state(file) != 407 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 408 return -EINVAL; 409 410 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 411 OCFS2_CONTROL_MESSAGE_OP_LEN)) 412 return -EINVAL; 413 414 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 415 (msg->newline != '\n')) 416 return -EINVAL; 417 msg->space1 = msg->space2 = msg->newline = '\0'; 418 419 major = simple_strtol(msg->major, &ptr, 16); 420 if (!ptr || *ptr) 421 return -EINVAL; 422 minor = simple_strtol(msg->minor, &ptr, 16); 423 if (!ptr || *ptr) 424 return -EINVAL; 425 426 /* 427 * The major must be between 1 and 255, inclusive. The minor 428 * must be between 0 and 255, inclusive. The version passed in 429 * must be within the maximum version supported by the filesystem. 430 */ 431 if ((major == LONG_MIN) || (major == LONG_MAX) || 432 (major > (u8)-1) || (major < 1)) 433 return -ERANGE; 434 if ((minor == LONG_MIN) || (minor == LONG_MAX) || 435 (minor > (u8)-1) || (minor < 0)) 436 return -ERANGE; 437 if ((major != max->pv_major) || 438 (minor > max->pv_minor)) 439 return -EINVAL; 440 441 p->op_proto.pv_major = major; 442 p->op_proto.pv_minor = minor; 443 444 return ocfs2_control_install_private(file); 445 } 446 447 static int ocfs2_control_do_down_msg(struct file *file, 448 struct ocfs2_control_message_down *msg) 449 { 450 long nodenum; 451 char *p = NULL; 452 453 if (ocfs2_control_get_handshake_state(file) != 454 OCFS2_CONTROL_HANDSHAKE_VALID) 455 return -EINVAL; 456 457 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 458 OCFS2_CONTROL_MESSAGE_OP_LEN)) 459 return -EINVAL; 460 461 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 462 (msg->newline != '\n')) 463 return -EINVAL; 464 msg->space1 = msg->space2 = msg->newline = '\0'; 465 466 nodenum = simple_strtol(msg->nodestr, &p, 16); 467 if (!p || *p) 468 return -EINVAL; 469 470 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 471 (nodenum > INT_MAX) || (nodenum < 0)) 472 return -ERANGE; 473 474 ocfs2_control_send_down(msg->uuid, nodenum); 475 476 return 0; 477 } 478 479 static ssize_t ocfs2_control_message(struct file *file, 480 const char __user *buf, 481 size_t count) 482 { 483 ssize_t ret; 484 union ocfs2_control_message msg; 485 486 /* Try to catch padding issues */ 487 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != 488 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); 489 490 memset(&msg, 0, sizeof(union ocfs2_control_message)); 491 ret = ocfs2_control_cfu(&msg, count, buf, count); 492 if (ret) 493 goto out; 494 495 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && 496 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 497 OCFS2_CONTROL_MESSAGE_OP_LEN)) 498 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); 499 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && 500 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 501 OCFS2_CONTROL_MESSAGE_OP_LEN)) 502 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); 503 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && 504 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 505 OCFS2_CONTROL_MESSAGE_OP_LEN)) 506 ret = ocfs2_control_do_down_msg(file, &msg.u_down); 507 else 508 ret = -EINVAL; 509 510 out: 511 return ret ? ret : count; 512 } 513 514 static ssize_t ocfs2_control_write(struct file *file, 515 const char __user *buf, 516 size_t count, 517 loff_t *ppos) 518 { 519 ssize_t ret; 520 521 switch (ocfs2_control_get_handshake_state(file)) { 522 case OCFS2_CONTROL_HANDSHAKE_INVALID: 523 ret = -EINVAL; 524 break; 525 526 case OCFS2_CONTROL_HANDSHAKE_READ: 527 ret = ocfs2_control_validate_protocol(file, buf, 528 count); 529 break; 530 531 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: 532 case OCFS2_CONTROL_HANDSHAKE_VALID: 533 ret = ocfs2_control_message(file, buf, count); 534 break; 535 536 default: 537 BUG(); 538 ret = -EIO; 539 break; 540 } 541 542 return ret; 543 } 544 545 /* 546 * This is a naive version. If we ever have a new protocol, we'll expand 547 * it. Probably using seq_file. 548 */ 549 static ssize_t ocfs2_control_read(struct file *file, 550 char __user *buf, 551 size_t count, 552 loff_t *ppos) 553 { 554 ssize_t ret; 555 556 ret = simple_read_from_buffer(buf, count, ppos, 557 OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); 558 559 /* Have we read the whole protocol list? */ 560 if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) 561 ocfs2_control_set_handshake_state(file, 562 OCFS2_CONTROL_HANDSHAKE_READ); 563 564 return ret; 565 } 566 567 static int ocfs2_control_release(struct inode *inode, struct file *file) 568 { 569 struct ocfs2_control_private *p = file->private_data; 570 571 mutex_lock(&ocfs2_control_lock); 572 573 if (ocfs2_control_get_handshake_state(file) != 574 OCFS2_CONTROL_HANDSHAKE_VALID) 575 goto out; 576 577 if (atomic_dec_and_test(&ocfs2_control_opened)) { 578 if (!list_empty(&ocfs2_live_connection_list)) { 579 /* XXX: Do bad things! */ 580 printk(KERN_ERR 581 "ocfs2: Unexpected release of ocfs2_control!\n" 582 " Loss of cluster connection requires " 583 "an emergency restart!\n"); 584 emergency_restart(); 585 } 586 /* 587 * Last valid close clears the node number and resets 588 * the locking protocol version 589 */ 590 ocfs2_control_this_node = -1; 591 running_proto.pv_major = 0; 592 running_proto.pv_major = 0; 593 } 594 595 out: 596 list_del_init(&p->op_list); 597 file->private_data = NULL; 598 599 mutex_unlock(&ocfs2_control_lock); 600 601 kfree(p); 602 603 return 0; 604 } 605 606 static int ocfs2_control_open(struct inode *inode, struct file *file) 607 { 608 struct ocfs2_control_private *p; 609 610 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); 611 if (!p) 612 return -ENOMEM; 613 p->op_this_node = -1; 614 615 lock_kernel(); 616 mutex_lock(&ocfs2_control_lock); 617 file->private_data = p; 618 list_add(&p->op_list, &ocfs2_control_private_list); 619 mutex_unlock(&ocfs2_control_lock); 620 unlock_kernel(); 621 622 return 0; 623 } 624 625 static const struct file_operations ocfs2_control_fops = { 626 .open = ocfs2_control_open, 627 .release = ocfs2_control_release, 628 .read = ocfs2_control_read, 629 .write = ocfs2_control_write, 630 .owner = THIS_MODULE, 631 }; 632 633 static struct miscdevice ocfs2_control_device = { 634 .minor = MISC_DYNAMIC_MINOR, 635 .name = "ocfs2_control", 636 .fops = &ocfs2_control_fops, 637 }; 638 639 static int ocfs2_control_init(void) 640 { 641 int rc; 642 643 atomic_set(&ocfs2_control_opened, 0); 644 645 rc = misc_register(&ocfs2_control_device); 646 if (rc) 647 printk(KERN_ERR 648 "ocfs2: Unable to register ocfs2_control device " 649 "(errno %d)\n", 650 -rc); 651 652 return rc; 653 } 654 655 static void ocfs2_control_exit(void) 656 { 657 int rc; 658 659 rc = misc_deregister(&ocfs2_control_device); 660 if (rc) 661 printk(KERN_ERR 662 "ocfs2: Unable to deregister ocfs2_control device " 663 "(errno %d)\n", 664 -rc); 665 } 666 667 static struct dlm_lksb *fsdlm_astarg_to_lksb(void *astarg) 668 { 669 struct ocfs2_lock_res *res = astarg; 670 return &res->l_lksb.lksb_fsdlm; 671 } 672 673 static void fsdlm_lock_ast_wrapper(void *astarg) 674 { 675 struct dlm_lksb *lksb = fsdlm_astarg_to_lksb(astarg); 676 int status = lksb->sb_status; 677 678 BUG_ON(ocfs2_user_plugin.sp_proto == NULL); 679 680 /* 681 * For now we're punting on the issue of other non-standard errors 682 * where we can't tell if the unlock_ast or lock_ast should be called. 683 * The main "other error" that's possible is EINVAL which means the 684 * function was called with invalid args, which shouldn't be possible 685 * since the caller here is under our control. Other non-standard 686 * errors probably fall into the same category, or otherwise are fatal 687 * which means we can't carry on anyway. 688 */ 689 690 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 691 ocfs2_user_plugin.sp_proto->lp_unlock_ast(astarg, 0); 692 else 693 ocfs2_user_plugin.sp_proto->lp_lock_ast(astarg); 694 } 695 696 static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 697 { 698 BUG_ON(ocfs2_user_plugin.sp_proto == NULL); 699 700 ocfs2_user_plugin.sp_proto->lp_blocking_ast(astarg, level); 701 } 702 703 static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 704 int mode, 705 union ocfs2_dlm_lksb *lksb, 706 u32 flags, 707 void *name, 708 unsigned int namelen, 709 void *astarg) 710 { 711 int ret; 712 713 if (!lksb->lksb_fsdlm.sb_lvbptr) 714 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 715 sizeof(struct dlm_lksb); 716 717 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, 718 flags|DLM_LKF_NODLCKWT, name, namelen, 0, 719 fsdlm_lock_ast_wrapper, astarg, 720 fsdlm_blocking_ast_wrapper); 721 return ret; 722 } 723 724 static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, 725 union ocfs2_dlm_lksb *lksb, 726 u32 flags, 727 void *astarg) 728 { 729 int ret; 730 731 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, 732 flags, &lksb->lksb_fsdlm, astarg); 733 return ret; 734 } 735 736 static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) 737 { 738 return lksb->lksb_fsdlm.sb_status; 739 } 740 741 static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) 742 { 743 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; 744 745 return !invalid; 746 } 747 748 static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 749 { 750 if (!lksb->lksb_fsdlm.sb_lvbptr) 751 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 752 sizeof(struct dlm_lksb); 753 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 754 } 755 756 static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) 757 { 758 } 759 760 static int user_plock(struct ocfs2_cluster_connection *conn, 761 u64 ino, 762 struct file *file, 763 int cmd, 764 struct file_lock *fl) 765 { 766 /* 767 * This more or less just demuxes the plock request into any 768 * one of three dlm calls. 769 * 770 * Internally, fs/dlm will pass these to a misc device, which 771 * a userspace daemon will read and write to. 772 * 773 * For now, cancel requests (which happen internally only), 774 * are turned into unlocks. Most of this function taken from 775 * gfs2_lock. 776 */ 777 778 if (cmd == F_CANCELLK) { 779 cmd = F_SETLK; 780 fl->fl_type = F_UNLCK; 781 } 782 783 if (IS_GETLK(cmd)) 784 return dlm_posix_get(conn->cc_lockspace, ino, file, fl); 785 else if (fl->fl_type == F_UNLCK) 786 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); 787 else 788 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); 789 } 790 791 /* 792 * Compare a requested locking protocol version against the current one. 793 * 794 * If the major numbers are different, they are incompatible. 795 * If the current minor is greater than the request, they are incompatible. 796 * If the current minor is less than or equal to the request, they are 797 * compatible, and the requester should run at the current minor version. 798 */ 799 static int fs_protocol_compare(struct ocfs2_protocol_version *existing, 800 struct ocfs2_protocol_version *request) 801 { 802 if (existing->pv_major != request->pv_major) 803 return 1; 804 805 if (existing->pv_minor > request->pv_minor) 806 return 1; 807 808 if (existing->pv_minor < request->pv_minor) 809 request->pv_minor = existing->pv_minor; 810 811 return 0; 812 } 813 814 static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 815 { 816 dlm_lockspace_t *fsdlm; 817 struct ocfs2_live_connection *control; 818 int rc = 0; 819 820 BUG_ON(conn == NULL); 821 822 rc = ocfs2_live_connection_new(conn, &control); 823 if (rc) 824 goto out; 825 826 /* 827 * running_proto must have been set before we allowed any mounts 828 * to proceed. 829 */ 830 if (fs_protocol_compare(&running_proto, &conn->cc_version)) { 831 printk(KERN_ERR 832 "Unable to mount with fs locking protocol version " 833 "%u.%u because the userspace control daemon has " 834 "negotiated %u.%u\n", 835 conn->cc_version.pv_major, conn->cc_version.pv_minor, 836 running_proto.pv_major, running_proto.pv_minor); 837 rc = -EPROTO; 838 ocfs2_live_connection_drop(control); 839 goto out; 840 } 841 842 rc = dlm_new_lockspace(conn->cc_name, strlen(conn->cc_name), 843 &fsdlm, DLM_LSFL_FS, DLM_LVB_LEN); 844 if (rc) { 845 ocfs2_live_connection_drop(control); 846 goto out; 847 } 848 849 conn->cc_private = control; 850 conn->cc_lockspace = fsdlm; 851 out: 852 return rc; 853 } 854 855 static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) 856 { 857 dlm_release_lockspace(conn->cc_lockspace, 2); 858 conn->cc_lockspace = NULL; 859 ocfs2_live_connection_drop(conn->cc_private); 860 conn->cc_private = NULL; 861 return 0; 862 } 863 864 static int user_cluster_this_node(unsigned int *this_node) 865 { 866 int rc; 867 868 rc = ocfs2_control_get_this_node(); 869 if (rc < 0) 870 return rc; 871 872 *this_node = rc; 873 return 0; 874 } 875 876 static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { 877 .connect = user_cluster_connect, 878 .disconnect = user_cluster_disconnect, 879 .this_node = user_cluster_this_node, 880 .dlm_lock = user_dlm_lock, 881 .dlm_unlock = user_dlm_unlock, 882 .lock_status = user_dlm_lock_status, 883 .lvb_valid = user_dlm_lvb_valid, 884 .lock_lvb = user_dlm_lvb, 885 .plock = user_plock, 886 .dump_lksb = user_dlm_dump_lksb, 887 }; 888 889 static struct ocfs2_stack_plugin ocfs2_user_plugin = { 890 .sp_name = "user", 891 .sp_ops = &ocfs2_user_plugin_ops, 892 .sp_owner = THIS_MODULE, 893 }; 894 895 896 static int __init ocfs2_user_plugin_init(void) 897 { 898 int rc; 899 900 rc = ocfs2_control_init(); 901 if (!rc) { 902 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); 903 if (rc) 904 ocfs2_control_exit(); 905 } 906 907 return rc; 908 } 909 910 static void __exit ocfs2_user_plugin_exit(void) 911 { 912 ocfs2_stack_glue_unregister(&ocfs2_user_plugin); 913 ocfs2_control_exit(); 914 } 915 916 MODULE_AUTHOR("Oracle"); 917 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); 918 MODULE_LICENSE("GPL"); 919 module_init(ocfs2_user_plugin_init); 920 module_exit(ocfs2_user_plugin_exit); 921