1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stack_user.c 5 * 6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack. 7 * 8 * Copyright (C) 2007 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation, version 2. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 */ 19 20 #include <linux/module.h> 21 #include <linux/fs.h> 22 #include <linux/miscdevice.h> 23 #include <linux/mutex.h> 24 #include <linux/slab.h> 25 #include <linux/reboot.h> 26 #include <asm/uaccess.h> 27 28 #include "stackglue.h" 29 30 #include <linux/dlm_plock.h> 31 32 /* 33 * The control protocol starts with a handshake. Until the handshake 34 * is complete, the control device will fail all write(2)s. 35 * 36 * The handshake is simple. First, the client reads until EOF. Each line 37 * of output is a supported protocol tag. All protocol tags are a single 38 * character followed by a two hex digit version number. Currently the 39 * only things supported is T01, for "Text-base version 0x01". Next, the 40 * client writes the version they would like to use, including the newline. 41 * Thus, the protocol tag is 'T01\n'. If the version tag written is 42 * unknown, -EINVAL is returned. Once the negotiation is complete, the 43 * client can start sending messages. 44 * 45 * The T01 protocol has three messages. First is the "SETN" message. 46 * It has the following syntax: 47 * 48 * SETN<space><8-char-hex-nodenum><newline> 49 * 50 * This is 14 characters. 51 * 52 * The "SETN" message must be the first message following the protocol. 53 * It tells ocfs2_control the local node number. 54 * 55 * Next comes the "SETV" message. It has the following syntax: 56 * 57 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> 58 * 59 * This is 11 characters. 60 * 61 * The "SETV" message sets the filesystem locking protocol version as 62 * negotiated by the client. The client negotiates based on the maximum 63 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 64 * number from the "SETV" message must match 65 * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number 66 * must be less than or equal to ...sp_max_version.pv_minor. 67 * 68 * Once this information has been set, mounts will be allowed. From this 69 * point on, the "DOWN" message can be sent for node down notification. 70 * It has the following syntax: 71 * 72 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> 73 * 74 * eg: 75 * 76 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n 77 * 78 * This is 47 characters. 79 */ 80 81 /* 82 * Whether or not the client has done the handshake. 83 * For now, we have just one protocol version. 84 */ 85 #define OCFS2_CONTROL_PROTO "T01\n" 86 #define OCFS2_CONTROL_PROTO_LEN 4 87 88 /* Handshake states */ 89 #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) 90 #define OCFS2_CONTROL_HANDSHAKE_READ (1) 91 #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) 92 #define OCFS2_CONTROL_HANDSHAKE_VALID (3) 93 94 /* Messages */ 95 #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 96 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" 97 #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 98 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" 99 #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 100 #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" 101 #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 102 #define OCFS2_TEXT_UUID_LEN 32 103 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 104 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 105 106 /* 107 * ocfs2_live_connection is refcounted because the filesystem and 108 * miscdevice sides can detach in different order. Let's just be safe. 109 */ 110 struct ocfs2_live_connection { 111 struct list_head oc_list; 112 struct ocfs2_cluster_connection *oc_conn; 113 }; 114 115 struct ocfs2_control_private { 116 struct list_head op_list; 117 int op_state; 118 int op_this_node; 119 struct ocfs2_protocol_version op_proto; 120 }; 121 122 /* SETN<space><8-char-hex-nodenum><newline> */ 123 struct ocfs2_control_message_setn { 124 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 125 char space; 126 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 127 char newline; 128 }; 129 130 /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ 131 struct ocfs2_control_message_setv { 132 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 133 char space1; 134 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 135 char space2; 136 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 137 char newline; 138 }; 139 140 /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ 141 struct ocfs2_control_message_down { 142 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 143 char space1; 144 char uuid[OCFS2_TEXT_UUID_LEN]; 145 char space2; 146 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 147 char newline; 148 }; 149 150 union ocfs2_control_message { 151 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 152 struct ocfs2_control_message_setn u_setn; 153 struct ocfs2_control_message_setv u_setv; 154 struct ocfs2_control_message_down u_down; 155 }; 156 157 static struct ocfs2_stack_plugin ocfs2_user_plugin; 158 159 static atomic_t ocfs2_control_opened; 160 static int ocfs2_control_this_node = -1; 161 static struct ocfs2_protocol_version running_proto; 162 163 static LIST_HEAD(ocfs2_live_connection_list); 164 static LIST_HEAD(ocfs2_control_private_list); 165 static DEFINE_MUTEX(ocfs2_control_lock); 166 167 static inline void ocfs2_control_set_handshake_state(struct file *file, 168 int state) 169 { 170 struct ocfs2_control_private *p = file->private_data; 171 p->op_state = state; 172 } 173 174 static inline int ocfs2_control_get_handshake_state(struct file *file) 175 { 176 struct ocfs2_control_private *p = file->private_data; 177 return p->op_state; 178 } 179 180 static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) 181 { 182 size_t len = strlen(name); 183 struct ocfs2_live_connection *c; 184 185 BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); 186 187 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { 188 if ((c->oc_conn->cc_namelen == len) && 189 !strncmp(c->oc_conn->cc_name, name, len)) 190 return c; 191 } 192 193 return NULL; 194 } 195 196 /* 197 * ocfs2_live_connection structures are created underneath the ocfs2 198 * mount path. Since the VFS prevents multiple calls to 199 * fill_super(), we can't get dupes here. 200 */ 201 static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn, 202 struct ocfs2_live_connection **c_ret) 203 { 204 int rc = 0; 205 struct ocfs2_live_connection *c; 206 207 c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 208 if (!c) 209 return -ENOMEM; 210 211 mutex_lock(&ocfs2_control_lock); 212 c->oc_conn = conn; 213 214 if (atomic_read(&ocfs2_control_opened)) 215 list_add(&c->oc_list, &ocfs2_live_connection_list); 216 else { 217 printk(KERN_ERR 218 "ocfs2: Userspace control daemon is not present\n"); 219 rc = -ESRCH; 220 } 221 222 mutex_unlock(&ocfs2_control_lock); 223 224 if (!rc) 225 *c_ret = c; 226 else 227 kfree(c); 228 229 return rc; 230 } 231 232 /* 233 * This function disconnects the cluster connection from ocfs2_control. 234 * Afterwards, userspace can't affect the cluster connection. 235 */ 236 static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) 237 { 238 mutex_lock(&ocfs2_control_lock); 239 list_del_init(&c->oc_list); 240 c->oc_conn = NULL; 241 mutex_unlock(&ocfs2_control_lock); 242 243 kfree(c); 244 } 245 246 static int ocfs2_control_cfu(void *target, size_t target_len, 247 const char __user *buf, size_t count) 248 { 249 /* The T01 expects write(2) calls to have exactly one command */ 250 if ((count != target_len) || 251 (count > sizeof(union ocfs2_control_message))) 252 return -EINVAL; 253 254 if (copy_from_user(target, buf, target_len)) 255 return -EFAULT; 256 257 return 0; 258 } 259 260 static ssize_t ocfs2_control_validate_protocol(struct file *file, 261 const char __user *buf, 262 size_t count) 263 { 264 ssize_t ret; 265 char kbuf[OCFS2_CONTROL_PROTO_LEN]; 266 267 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, 268 buf, count); 269 if (ret) 270 return ret; 271 272 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) 273 return -EINVAL; 274 275 ocfs2_control_set_handshake_state(file, 276 OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 277 278 return count; 279 } 280 281 static void ocfs2_control_send_down(const char *uuid, 282 int nodenum) 283 { 284 struct ocfs2_live_connection *c; 285 286 mutex_lock(&ocfs2_control_lock); 287 288 c = ocfs2_connection_find(uuid); 289 if (c) { 290 BUG_ON(c->oc_conn == NULL); 291 c->oc_conn->cc_recovery_handler(nodenum, 292 c->oc_conn->cc_recovery_data); 293 } 294 295 mutex_unlock(&ocfs2_control_lock); 296 } 297 298 /* 299 * Called whenever configuration elements are sent to /dev/ocfs2_control. 300 * If all configuration elements are present, try to set the global 301 * values. If there is a problem, return an error. Skip any missing 302 * elements, and only bump ocfs2_control_opened when we have all elements 303 * and are successful. 304 */ 305 static int ocfs2_control_install_private(struct file *file) 306 { 307 int rc = 0; 308 int set_p = 1; 309 struct ocfs2_control_private *p = file->private_data; 310 311 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 312 313 mutex_lock(&ocfs2_control_lock); 314 315 if (p->op_this_node < 0) { 316 set_p = 0; 317 } else if ((ocfs2_control_this_node >= 0) && 318 (ocfs2_control_this_node != p->op_this_node)) { 319 rc = -EINVAL; 320 goto out_unlock; 321 } 322 323 if (!p->op_proto.pv_major) { 324 set_p = 0; 325 } else if (!list_empty(&ocfs2_live_connection_list) && 326 ((running_proto.pv_major != p->op_proto.pv_major) || 327 (running_proto.pv_minor != p->op_proto.pv_minor))) { 328 rc = -EINVAL; 329 goto out_unlock; 330 } 331 332 if (set_p) { 333 ocfs2_control_this_node = p->op_this_node; 334 running_proto.pv_major = p->op_proto.pv_major; 335 running_proto.pv_minor = p->op_proto.pv_minor; 336 } 337 338 out_unlock: 339 mutex_unlock(&ocfs2_control_lock); 340 341 if (!rc && set_p) { 342 /* We set the global values successfully */ 343 atomic_inc(&ocfs2_control_opened); 344 ocfs2_control_set_handshake_state(file, 345 OCFS2_CONTROL_HANDSHAKE_VALID); 346 } 347 348 return rc; 349 } 350 351 static int ocfs2_control_get_this_node(void) 352 { 353 int rc; 354 355 mutex_lock(&ocfs2_control_lock); 356 if (ocfs2_control_this_node < 0) 357 rc = -EINVAL; 358 else 359 rc = ocfs2_control_this_node; 360 mutex_unlock(&ocfs2_control_lock); 361 362 return rc; 363 } 364 365 static int ocfs2_control_do_setnode_msg(struct file *file, 366 struct ocfs2_control_message_setn *msg) 367 { 368 long nodenum; 369 char *ptr = NULL; 370 struct ocfs2_control_private *p = file->private_data; 371 372 if (ocfs2_control_get_handshake_state(file) != 373 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 374 return -EINVAL; 375 376 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 377 OCFS2_CONTROL_MESSAGE_OP_LEN)) 378 return -EINVAL; 379 380 if ((msg->space != ' ') || (msg->newline != '\n')) 381 return -EINVAL; 382 msg->space = msg->newline = '\0'; 383 384 nodenum = simple_strtol(msg->nodestr, &ptr, 16); 385 if (!ptr || *ptr) 386 return -EINVAL; 387 388 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 389 (nodenum > INT_MAX) || (nodenum < 0)) 390 return -ERANGE; 391 p->op_this_node = nodenum; 392 393 return ocfs2_control_install_private(file); 394 } 395 396 static int ocfs2_control_do_setversion_msg(struct file *file, 397 struct ocfs2_control_message_setv *msg) 398 { 399 long major, minor; 400 char *ptr = NULL; 401 struct ocfs2_control_private *p = file->private_data; 402 struct ocfs2_protocol_version *max = 403 &ocfs2_user_plugin.sp_max_proto; 404 405 if (ocfs2_control_get_handshake_state(file) != 406 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 407 return -EINVAL; 408 409 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 410 OCFS2_CONTROL_MESSAGE_OP_LEN)) 411 return -EINVAL; 412 413 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 414 (msg->newline != '\n')) 415 return -EINVAL; 416 msg->space1 = msg->space2 = msg->newline = '\0'; 417 418 major = simple_strtol(msg->major, &ptr, 16); 419 if (!ptr || *ptr) 420 return -EINVAL; 421 minor = simple_strtol(msg->minor, &ptr, 16); 422 if (!ptr || *ptr) 423 return -EINVAL; 424 425 /* 426 * The major must be between 1 and 255, inclusive. The minor 427 * must be between 0 and 255, inclusive. The version passed in 428 * must be within the maximum version supported by the filesystem. 429 */ 430 if ((major == LONG_MIN) || (major == LONG_MAX) || 431 (major > (u8)-1) || (major < 1)) 432 return -ERANGE; 433 if ((minor == LONG_MIN) || (minor == LONG_MAX) || 434 (minor > (u8)-1) || (minor < 0)) 435 return -ERANGE; 436 if ((major != max->pv_major) || 437 (minor > max->pv_minor)) 438 return -EINVAL; 439 440 p->op_proto.pv_major = major; 441 p->op_proto.pv_minor = minor; 442 443 return ocfs2_control_install_private(file); 444 } 445 446 static int ocfs2_control_do_down_msg(struct file *file, 447 struct ocfs2_control_message_down *msg) 448 { 449 long nodenum; 450 char *p = NULL; 451 452 if (ocfs2_control_get_handshake_state(file) != 453 OCFS2_CONTROL_HANDSHAKE_VALID) 454 return -EINVAL; 455 456 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 457 OCFS2_CONTROL_MESSAGE_OP_LEN)) 458 return -EINVAL; 459 460 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 461 (msg->newline != '\n')) 462 return -EINVAL; 463 msg->space1 = msg->space2 = msg->newline = '\0'; 464 465 nodenum = simple_strtol(msg->nodestr, &p, 16); 466 if (!p || *p) 467 return -EINVAL; 468 469 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 470 (nodenum > INT_MAX) || (nodenum < 0)) 471 return -ERANGE; 472 473 ocfs2_control_send_down(msg->uuid, nodenum); 474 475 return 0; 476 } 477 478 static ssize_t ocfs2_control_message(struct file *file, 479 const char __user *buf, 480 size_t count) 481 { 482 ssize_t ret; 483 union ocfs2_control_message msg; 484 485 /* Try to catch padding issues */ 486 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != 487 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); 488 489 memset(&msg, 0, sizeof(union ocfs2_control_message)); 490 ret = ocfs2_control_cfu(&msg, count, buf, count); 491 if (ret) 492 goto out; 493 494 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && 495 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 496 OCFS2_CONTROL_MESSAGE_OP_LEN)) 497 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); 498 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && 499 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 500 OCFS2_CONTROL_MESSAGE_OP_LEN)) 501 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); 502 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && 503 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 504 OCFS2_CONTROL_MESSAGE_OP_LEN)) 505 ret = ocfs2_control_do_down_msg(file, &msg.u_down); 506 else 507 ret = -EINVAL; 508 509 out: 510 return ret ? ret : count; 511 } 512 513 static ssize_t ocfs2_control_write(struct file *file, 514 const char __user *buf, 515 size_t count, 516 loff_t *ppos) 517 { 518 ssize_t ret; 519 520 switch (ocfs2_control_get_handshake_state(file)) { 521 case OCFS2_CONTROL_HANDSHAKE_INVALID: 522 ret = -EINVAL; 523 break; 524 525 case OCFS2_CONTROL_HANDSHAKE_READ: 526 ret = ocfs2_control_validate_protocol(file, buf, 527 count); 528 break; 529 530 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: 531 case OCFS2_CONTROL_HANDSHAKE_VALID: 532 ret = ocfs2_control_message(file, buf, count); 533 break; 534 535 default: 536 BUG(); 537 ret = -EIO; 538 break; 539 } 540 541 return ret; 542 } 543 544 /* 545 * This is a naive version. If we ever have a new protocol, we'll expand 546 * it. Probably using seq_file. 547 */ 548 static ssize_t ocfs2_control_read(struct file *file, 549 char __user *buf, 550 size_t count, 551 loff_t *ppos) 552 { 553 ssize_t ret; 554 555 ret = simple_read_from_buffer(buf, count, ppos, 556 OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); 557 558 /* Have we read the whole protocol list? */ 559 if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) 560 ocfs2_control_set_handshake_state(file, 561 OCFS2_CONTROL_HANDSHAKE_READ); 562 563 return ret; 564 } 565 566 static int ocfs2_control_release(struct inode *inode, struct file *file) 567 { 568 struct ocfs2_control_private *p = file->private_data; 569 570 mutex_lock(&ocfs2_control_lock); 571 572 if (ocfs2_control_get_handshake_state(file) != 573 OCFS2_CONTROL_HANDSHAKE_VALID) 574 goto out; 575 576 if (atomic_dec_and_test(&ocfs2_control_opened)) { 577 if (!list_empty(&ocfs2_live_connection_list)) { 578 /* XXX: Do bad things! */ 579 printk(KERN_ERR 580 "ocfs2: Unexpected release of ocfs2_control!\n" 581 " Loss of cluster connection requires " 582 "an emergency restart!\n"); 583 emergency_restart(); 584 } 585 /* 586 * Last valid close clears the node number and resets 587 * the locking protocol version 588 */ 589 ocfs2_control_this_node = -1; 590 running_proto.pv_major = 0; 591 running_proto.pv_major = 0; 592 } 593 594 out: 595 list_del_init(&p->op_list); 596 file->private_data = NULL; 597 598 mutex_unlock(&ocfs2_control_lock); 599 600 kfree(p); 601 602 return 0; 603 } 604 605 static int ocfs2_control_open(struct inode *inode, struct file *file) 606 { 607 struct ocfs2_control_private *p; 608 609 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); 610 if (!p) 611 return -ENOMEM; 612 p->op_this_node = -1; 613 614 mutex_lock(&ocfs2_control_lock); 615 file->private_data = p; 616 list_add(&p->op_list, &ocfs2_control_private_list); 617 mutex_unlock(&ocfs2_control_lock); 618 619 return 0; 620 } 621 622 static const struct file_operations ocfs2_control_fops = { 623 .open = ocfs2_control_open, 624 .release = ocfs2_control_release, 625 .read = ocfs2_control_read, 626 .write = ocfs2_control_write, 627 .owner = THIS_MODULE, 628 .llseek = default_llseek, 629 }; 630 631 static struct miscdevice ocfs2_control_device = { 632 .minor = MISC_DYNAMIC_MINOR, 633 .name = "ocfs2_control", 634 .fops = &ocfs2_control_fops, 635 }; 636 637 static int ocfs2_control_init(void) 638 { 639 int rc; 640 641 atomic_set(&ocfs2_control_opened, 0); 642 643 rc = misc_register(&ocfs2_control_device); 644 if (rc) 645 printk(KERN_ERR 646 "ocfs2: Unable to register ocfs2_control device " 647 "(errno %d)\n", 648 -rc); 649 650 return rc; 651 } 652 653 static void ocfs2_control_exit(void) 654 { 655 int rc; 656 657 rc = misc_deregister(&ocfs2_control_device); 658 if (rc) 659 printk(KERN_ERR 660 "ocfs2: Unable to deregister ocfs2_control device " 661 "(errno %d)\n", 662 -rc); 663 } 664 665 static void fsdlm_lock_ast_wrapper(void *astarg) 666 { 667 struct ocfs2_dlm_lksb *lksb = astarg; 668 int status = lksb->lksb_fsdlm.sb_status; 669 670 /* 671 * For now we're punting on the issue of other non-standard errors 672 * where we can't tell if the unlock_ast or lock_ast should be called. 673 * The main "other error" that's possible is EINVAL which means the 674 * function was called with invalid args, which shouldn't be possible 675 * since the caller here is under our control. Other non-standard 676 * errors probably fall into the same category, or otherwise are fatal 677 * which means we can't carry on anyway. 678 */ 679 680 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 681 lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); 682 else 683 lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); 684 } 685 686 static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 687 { 688 struct ocfs2_dlm_lksb *lksb = astarg; 689 690 lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); 691 } 692 693 static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 694 int mode, 695 struct ocfs2_dlm_lksb *lksb, 696 u32 flags, 697 void *name, 698 unsigned int namelen) 699 { 700 int ret; 701 702 if (!lksb->lksb_fsdlm.sb_lvbptr) 703 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 704 sizeof(struct dlm_lksb); 705 706 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, 707 flags|DLM_LKF_NODLCKWT, name, namelen, 0, 708 fsdlm_lock_ast_wrapper, lksb, 709 fsdlm_blocking_ast_wrapper); 710 return ret; 711 } 712 713 static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, 714 struct ocfs2_dlm_lksb *lksb, 715 u32 flags) 716 { 717 int ret; 718 719 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, 720 flags, &lksb->lksb_fsdlm, lksb); 721 return ret; 722 } 723 724 static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 725 { 726 return lksb->lksb_fsdlm.sb_status; 727 } 728 729 static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 730 { 731 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; 732 733 return !invalid; 734 } 735 736 static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 737 { 738 if (!lksb->lksb_fsdlm.sb_lvbptr) 739 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 740 sizeof(struct dlm_lksb); 741 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 742 } 743 744 static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 745 { 746 } 747 748 static int user_plock(struct ocfs2_cluster_connection *conn, 749 u64 ino, 750 struct file *file, 751 int cmd, 752 struct file_lock *fl) 753 { 754 /* 755 * This more or less just demuxes the plock request into any 756 * one of three dlm calls. 757 * 758 * Internally, fs/dlm will pass these to a misc device, which 759 * a userspace daemon will read and write to. 760 * 761 * For now, cancel requests (which happen internally only), 762 * are turned into unlocks. Most of this function taken from 763 * gfs2_lock. 764 */ 765 766 if (cmd == F_CANCELLK) { 767 cmd = F_SETLK; 768 fl->fl_type = F_UNLCK; 769 } 770 771 if (IS_GETLK(cmd)) 772 return dlm_posix_get(conn->cc_lockspace, ino, file, fl); 773 else if (fl->fl_type == F_UNLCK) 774 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); 775 else 776 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); 777 } 778 779 /* 780 * Compare a requested locking protocol version against the current one. 781 * 782 * If the major numbers are different, they are incompatible. 783 * If the current minor is greater than the request, they are incompatible. 784 * If the current minor is less than or equal to the request, they are 785 * compatible, and the requester should run at the current minor version. 786 */ 787 static int fs_protocol_compare(struct ocfs2_protocol_version *existing, 788 struct ocfs2_protocol_version *request) 789 { 790 if (existing->pv_major != request->pv_major) 791 return 1; 792 793 if (existing->pv_minor > request->pv_minor) 794 return 1; 795 796 if (existing->pv_minor < request->pv_minor) 797 request->pv_minor = existing->pv_minor; 798 799 return 0; 800 } 801 802 static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 803 { 804 dlm_lockspace_t *fsdlm; 805 struct ocfs2_live_connection *uninitialized_var(control); 806 int rc = 0; 807 808 BUG_ON(conn == NULL); 809 810 rc = ocfs2_live_connection_new(conn, &control); 811 if (rc) 812 goto out; 813 814 /* 815 * running_proto must have been set before we allowed any mounts 816 * to proceed. 817 */ 818 if (fs_protocol_compare(&running_proto, &conn->cc_version)) { 819 printk(KERN_ERR 820 "Unable to mount with fs locking protocol version " 821 "%u.%u because the userspace control daemon has " 822 "negotiated %u.%u\n", 823 conn->cc_version.pv_major, conn->cc_version.pv_minor, 824 running_proto.pv_major, running_proto.pv_minor); 825 rc = -EPROTO; 826 ocfs2_live_connection_drop(control); 827 goto out; 828 } 829 830 rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, 831 NULL, NULL, NULL, &fsdlm); 832 if (rc) { 833 ocfs2_live_connection_drop(control); 834 goto out; 835 } 836 837 conn->cc_private = control; 838 conn->cc_lockspace = fsdlm; 839 out: 840 return rc; 841 } 842 843 static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) 844 { 845 dlm_release_lockspace(conn->cc_lockspace, 2); 846 conn->cc_lockspace = NULL; 847 ocfs2_live_connection_drop(conn->cc_private); 848 conn->cc_private = NULL; 849 return 0; 850 } 851 852 static int user_cluster_this_node(unsigned int *this_node) 853 { 854 int rc; 855 856 rc = ocfs2_control_get_this_node(); 857 if (rc < 0) 858 return rc; 859 860 *this_node = rc; 861 return 0; 862 } 863 864 static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { 865 .connect = user_cluster_connect, 866 .disconnect = user_cluster_disconnect, 867 .this_node = user_cluster_this_node, 868 .dlm_lock = user_dlm_lock, 869 .dlm_unlock = user_dlm_unlock, 870 .lock_status = user_dlm_lock_status, 871 .lvb_valid = user_dlm_lvb_valid, 872 .lock_lvb = user_dlm_lvb, 873 .plock = user_plock, 874 .dump_lksb = user_dlm_dump_lksb, 875 }; 876 877 static struct ocfs2_stack_plugin ocfs2_user_plugin = { 878 .sp_name = "user", 879 .sp_ops = &ocfs2_user_plugin_ops, 880 .sp_owner = THIS_MODULE, 881 }; 882 883 884 static int __init ocfs2_user_plugin_init(void) 885 { 886 int rc; 887 888 rc = ocfs2_control_init(); 889 if (!rc) { 890 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); 891 if (rc) 892 ocfs2_control_exit(); 893 } 894 895 return rc; 896 } 897 898 static void __exit ocfs2_user_plugin_exit(void) 899 { 900 ocfs2_stack_glue_unregister(&ocfs2_user_plugin); 901 ocfs2_control_exit(); 902 } 903 904 MODULE_AUTHOR("Oracle"); 905 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); 906 MODULE_LICENSE("GPL"); 907 module_init(ocfs2_user_plugin_init); 908 module_exit(ocfs2_user_plugin_exit); 909