1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * stack_user.c 5 * 6 * Code which interfaces ocfs2 with fs/dlm and a userspace stack. 7 * 8 * Copyright (C) 2007 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation, version 2. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 */ 19 20 #include <linux/module.h> 21 #include <linux/fs.h> 22 #include <linux/miscdevice.h> 23 #include <linux/mutex.h> 24 #include <linux/slab.h> 25 #include <linux/reboot.h> 26 #include <asm/uaccess.h> 27 28 #include "stackglue.h" 29 30 #include <linux/dlm_plock.h> 31 32 /* 33 * The control protocol starts with a handshake. Until the handshake 34 * is complete, the control device will fail all write(2)s. 35 * 36 * The handshake is simple. First, the client reads until EOF. Each line 37 * of output is a supported protocol tag. All protocol tags are a single 38 * character followed by a two hex digit version number. Currently the 39 * only things supported is T01, for "Text-base version 0x01". Next, the 40 * client writes the version they would like to use, including the newline. 41 * Thus, the protocol tag is 'T01\n'. If the version tag written is 42 * unknown, -EINVAL is returned. Once the negotiation is complete, the 43 * client can start sending messages. 44 * 45 * The T01 protocol has three messages. First is the "SETN" message. 46 * It has the following syntax: 47 * 48 * SETN<space><8-char-hex-nodenum><newline> 49 * 50 * This is 14 characters. 51 * 52 * The "SETN" message must be the first message following the protocol. 53 * It tells ocfs2_control the local node number. 54 * 55 * Next comes the "SETV" message. It has the following syntax: 56 * 57 * SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> 58 * 59 * This is 11 characters. 60 * 61 * The "SETV" message sets the filesystem locking protocol version as 62 * negotiated by the client. The client negotiates based on the maximum 63 * version advertised in /sys/fs/ocfs2/max_locking_protocol. The major 64 * number from the "SETV" message must match 65 * ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number 66 * must be less than or equal to ...sp_max_version.pv_minor. 67 * 68 * Once this information has been set, mounts will be allowed. From this 69 * point on, the "DOWN" message can be sent for node down notification. 70 * It has the following syntax: 71 * 72 * DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> 73 * 74 * eg: 75 * 76 * DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n 77 * 78 * This is 47 characters. 79 */ 80 81 /* 82 * Whether or not the client has done the handshake. 83 * For now, we have just one protocol version. 84 */ 85 #define OCFS2_CONTROL_PROTO "T01\n" 86 #define OCFS2_CONTROL_PROTO_LEN 4 87 88 /* Handshake states */ 89 #define OCFS2_CONTROL_HANDSHAKE_INVALID (0) 90 #define OCFS2_CONTROL_HANDSHAKE_READ (1) 91 #define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2) 92 #define OCFS2_CONTROL_HANDSHAKE_VALID (3) 93 94 /* Messages */ 95 #define OCFS2_CONTROL_MESSAGE_OP_LEN 4 96 #define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN" 97 #define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14 98 #define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV" 99 #define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11 100 #define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN" 101 #define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47 102 #define OCFS2_TEXT_UUID_LEN 32 103 #define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2 104 #define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8 105 106 /* 107 * ocfs2_live_connection is refcounted because the filesystem and 108 * miscdevice sides can detach in different order. Let's just be safe. 109 */ 110 struct ocfs2_live_connection { 111 struct list_head oc_list; 112 struct ocfs2_cluster_connection *oc_conn; 113 atomic_t oc_this_node; 114 int oc_our_slot; 115 }; 116 117 struct ocfs2_control_private { 118 struct list_head op_list; 119 int op_state; 120 int op_this_node; 121 struct ocfs2_protocol_version op_proto; 122 }; 123 124 /* SETN<space><8-char-hex-nodenum><newline> */ 125 struct ocfs2_control_message_setn { 126 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 127 char space; 128 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 129 char newline; 130 }; 131 132 /* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */ 133 struct ocfs2_control_message_setv { 134 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 135 char space1; 136 char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 137 char space2; 138 char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN]; 139 char newline; 140 }; 141 142 /* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */ 143 struct ocfs2_control_message_down { 144 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 145 char space1; 146 char uuid[OCFS2_TEXT_UUID_LEN]; 147 char space2; 148 char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN]; 149 char newline; 150 }; 151 152 union ocfs2_control_message { 153 char tag[OCFS2_CONTROL_MESSAGE_OP_LEN]; 154 struct ocfs2_control_message_setn u_setn; 155 struct ocfs2_control_message_setv u_setv; 156 struct ocfs2_control_message_down u_down; 157 }; 158 159 static struct ocfs2_stack_plugin ocfs2_user_plugin; 160 161 static atomic_t ocfs2_control_opened; 162 static int ocfs2_control_this_node = -1; 163 static struct ocfs2_protocol_version running_proto; 164 165 static LIST_HEAD(ocfs2_live_connection_list); 166 static LIST_HEAD(ocfs2_control_private_list); 167 static DEFINE_MUTEX(ocfs2_control_lock); 168 169 static inline void ocfs2_control_set_handshake_state(struct file *file, 170 int state) 171 { 172 struct ocfs2_control_private *p = file->private_data; 173 p->op_state = state; 174 } 175 176 static inline int ocfs2_control_get_handshake_state(struct file *file) 177 { 178 struct ocfs2_control_private *p = file->private_data; 179 return p->op_state; 180 } 181 182 static struct ocfs2_live_connection *ocfs2_connection_find(const char *name) 183 { 184 size_t len = strlen(name); 185 struct ocfs2_live_connection *c; 186 187 BUG_ON(!mutex_is_locked(&ocfs2_control_lock)); 188 189 list_for_each_entry(c, &ocfs2_live_connection_list, oc_list) { 190 if ((c->oc_conn->cc_namelen == len) && 191 !strncmp(c->oc_conn->cc_name, name, len)) 192 return c; 193 } 194 195 return NULL; 196 } 197 198 /* 199 * ocfs2_live_connection structures are created underneath the ocfs2 200 * mount path. Since the VFS prevents multiple calls to 201 * fill_super(), we can't get dupes here. 202 */ 203 static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn, 204 struct ocfs2_live_connection *c) 205 { 206 int rc = 0; 207 208 mutex_lock(&ocfs2_control_lock); 209 c->oc_conn = conn; 210 211 if (atomic_read(&ocfs2_control_opened)) 212 list_add(&c->oc_list, &ocfs2_live_connection_list); 213 else { 214 printk(KERN_ERR 215 "ocfs2: Userspace control daemon is not present\n"); 216 rc = -ESRCH; 217 } 218 219 mutex_unlock(&ocfs2_control_lock); 220 return rc; 221 } 222 223 /* 224 * This function disconnects the cluster connection from ocfs2_control. 225 * Afterwards, userspace can't affect the cluster connection. 226 */ 227 static void ocfs2_live_connection_drop(struct ocfs2_live_connection *c) 228 { 229 mutex_lock(&ocfs2_control_lock); 230 list_del_init(&c->oc_list); 231 c->oc_conn = NULL; 232 mutex_unlock(&ocfs2_control_lock); 233 234 kfree(c); 235 } 236 237 static int ocfs2_control_cfu(void *target, size_t target_len, 238 const char __user *buf, size_t count) 239 { 240 /* The T01 expects write(2) calls to have exactly one command */ 241 if ((count != target_len) || 242 (count > sizeof(union ocfs2_control_message))) 243 return -EINVAL; 244 245 if (copy_from_user(target, buf, target_len)) 246 return -EFAULT; 247 248 return 0; 249 } 250 251 static ssize_t ocfs2_control_validate_protocol(struct file *file, 252 const char __user *buf, 253 size_t count) 254 { 255 ssize_t ret; 256 char kbuf[OCFS2_CONTROL_PROTO_LEN]; 257 258 ret = ocfs2_control_cfu(kbuf, OCFS2_CONTROL_PROTO_LEN, 259 buf, count); 260 if (ret) 261 return ret; 262 263 if (strncmp(kbuf, OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN)) 264 return -EINVAL; 265 266 ocfs2_control_set_handshake_state(file, 267 OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 268 269 return count; 270 } 271 272 static void ocfs2_control_send_down(const char *uuid, 273 int nodenum) 274 { 275 struct ocfs2_live_connection *c; 276 277 mutex_lock(&ocfs2_control_lock); 278 279 c = ocfs2_connection_find(uuid); 280 if (c) { 281 BUG_ON(c->oc_conn == NULL); 282 c->oc_conn->cc_recovery_handler(nodenum, 283 c->oc_conn->cc_recovery_data); 284 } 285 286 mutex_unlock(&ocfs2_control_lock); 287 } 288 289 /* 290 * Called whenever configuration elements are sent to /dev/ocfs2_control. 291 * If all configuration elements are present, try to set the global 292 * values. If there is a problem, return an error. Skip any missing 293 * elements, and only bump ocfs2_control_opened when we have all elements 294 * and are successful. 295 */ 296 static int ocfs2_control_install_private(struct file *file) 297 { 298 int rc = 0; 299 int set_p = 1; 300 struct ocfs2_control_private *p = file->private_data; 301 302 BUG_ON(p->op_state != OCFS2_CONTROL_HANDSHAKE_PROTOCOL); 303 304 mutex_lock(&ocfs2_control_lock); 305 306 if (p->op_this_node < 0) { 307 set_p = 0; 308 } else if ((ocfs2_control_this_node >= 0) && 309 (ocfs2_control_this_node != p->op_this_node)) { 310 rc = -EINVAL; 311 goto out_unlock; 312 } 313 314 if (!p->op_proto.pv_major) { 315 set_p = 0; 316 } else if (!list_empty(&ocfs2_live_connection_list) && 317 ((running_proto.pv_major != p->op_proto.pv_major) || 318 (running_proto.pv_minor != p->op_proto.pv_minor))) { 319 rc = -EINVAL; 320 goto out_unlock; 321 } 322 323 if (set_p) { 324 ocfs2_control_this_node = p->op_this_node; 325 running_proto.pv_major = p->op_proto.pv_major; 326 running_proto.pv_minor = p->op_proto.pv_minor; 327 } 328 329 out_unlock: 330 mutex_unlock(&ocfs2_control_lock); 331 332 if (!rc && set_p) { 333 /* We set the global values successfully */ 334 atomic_inc(&ocfs2_control_opened); 335 ocfs2_control_set_handshake_state(file, 336 OCFS2_CONTROL_HANDSHAKE_VALID); 337 } 338 339 return rc; 340 } 341 342 static int ocfs2_control_get_this_node(void) 343 { 344 int rc; 345 346 mutex_lock(&ocfs2_control_lock); 347 if (ocfs2_control_this_node < 0) 348 rc = -EINVAL; 349 else 350 rc = ocfs2_control_this_node; 351 mutex_unlock(&ocfs2_control_lock); 352 353 return rc; 354 } 355 356 static int ocfs2_control_do_setnode_msg(struct file *file, 357 struct ocfs2_control_message_setn *msg) 358 { 359 long nodenum; 360 char *ptr = NULL; 361 struct ocfs2_control_private *p = file->private_data; 362 363 if (ocfs2_control_get_handshake_state(file) != 364 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 365 return -EINVAL; 366 367 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 368 OCFS2_CONTROL_MESSAGE_OP_LEN)) 369 return -EINVAL; 370 371 if ((msg->space != ' ') || (msg->newline != '\n')) 372 return -EINVAL; 373 msg->space = msg->newline = '\0'; 374 375 nodenum = simple_strtol(msg->nodestr, &ptr, 16); 376 if (!ptr || *ptr) 377 return -EINVAL; 378 379 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 380 (nodenum > INT_MAX) || (nodenum < 0)) 381 return -ERANGE; 382 p->op_this_node = nodenum; 383 384 return ocfs2_control_install_private(file); 385 } 386 387 static int ocfs2_control_do_setversion_msg(struct file *file, 388 struct ocfs2_control_message_setv *msg) 389 { 390 long major, minor; 391 char *ptr = NULL; 392 struct ocfs2_control_private *p = file->private_data; 393 struct ocfs2_protocol_version *max = 394 &ocfs2_user_plugin.sp_max_proto; 395 396 if (ocfs2_control_get_handshake_state(file) != 397 OCFS2_CONTROL_HANDSHAKE_PROTOCOL) 398 return -EINVAL; 399 400 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 401 OCFS2_CONTROL_MESSAGE_OP_LEN)) 402 return -EINVAL; 403 404 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 405 (msg->newline != '\n')) 406 return -EINVAL; 407 msg->space1 = msg->space2 = msg->newline = '\0'; 408 409 major = simple_strtol(msg->major, &ptr, 16); 410 if (!ptr || *ptr) 411 return -EINVAL; 412 minor = simple_strtol(msg->minor, &ptr, 16); 413 if (!ptr || *ptr) 414 return -EINVAL; 415 416 /* 417 * The major must be between 1 and 255, inclusive. The minor 418 * must be between 0 and 255, inclusive. The version passed in 419 * must be within the maximum version supported by the filesystem. 420 */ 421 if ((major == LONG_MIN) || (major == LONG_MAX) || 422 (major > (u8)-1) || (major < 1)) 423 return -ERANGE; 424 if ((minor == LONG_MIN) || (minor == LONG_MAX) || 425 (minor > (u8)-1) || (minor < 0)) 426 return -ERANGE; 427 if ((major != max->pv_major) || 428 (minor > max->pv_minor)) 429 return -EINVAL; 430 431 p->op_proto.pv_major = major; 432 p->op_proto.pv_minor = minor; 433 434 return ocfs2_control_install_private(file); 435 } 436 437 static int ocfs2_control_do_down_msg(struct file *file, 438 struct ocfs2_control_message_down *msg) 439 { 440 long nodenum; 441 char *p = NULL; 442 443 if (ocfs2_control_get_handshake_state(file) != 444 OCFS2_CONTROL_HANDSHAKE_VALID) 445 return -EINVAL; 446 447 if (strncmp(msg->tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 448 OCFS2_CONTROL_MESSAGE_OP_LEN)) 449 return -EINVAL; 450 451 if ((msg->space1 != ' ') || (msg->space2 != ' ') || 452 (msg->newline != '\n')) 453 return -EINVAL; 454 msg->space1 = msg->space2 = msg->newline = '\0'; 455 456 nodenum = simple_strtol(msg->nodestr, &p, 16); 457 if (!p || *p) 458 return -EINVAL; 459 460 if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) || 461 (nodenum > INT_MAX) || (nodenum < 0)) 462 return -ERANGE; 463 464 ocfs2_control_send_down(msg->uuid, nodenum); 465 466 return 0; 467 } 468 469 static ssize_t ocfs2_control_message(struct file *file, 470 const char __user *buf, 471 size_t count) 472 { 473 ssize_t ret; 474 union ocfs2_control_message msg; 475 476 /* Try to catch padding issues */ 477 WARN_ON(offsetof(struct ocfs2_control_message_down, uuid) != 478 (sizeof(msg.u_down.tag) + sizeof(msg.u_down.space1))); 479 480 memset(&msg, 0, sizeof(union ocfs2_control_message)); 481 ret = ocfs2_control_cfu(&msg, count, buf, count); 482 if (ret) 483 goto out; 484 485 if ((count == OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN) && 486 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETNODE_OP, 487 OCFS2_CONTROL_MESSAGE_OP_LEN)) 488 ret = ocfs2_control_do_setnode_msg(file, &msg.u_setn); 489 else if ((count == OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN) && 490 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_SETVERSION_OP, 491 OCFS2_CONTROL_MESSAGE_OP_LEN)) 492 ret = ocfs2_control_do_setversion_msg(file, &msg.u_setv); 493 else if ((count == OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN) && 494 !strncmp(msg.tag, OCFS2_CONTROL_MESSAGE_DOWN_OP, 495 OCFS2_CONTROL_MESSAGE_OP_LEN)) 496 ret = ocfs2_control_do_down_msg(file, &msg.u_down); 497 else 498 ret = -EINVAL; 499 500 out: 501 return ret ? ret : count; 502 } 503 504 static ssize_t ocfs2_control_write(struct file *file, 505 const char __user *buf, 506 size_t count, 507 loff_t *ppos) 508 { 509 ssize_t ret; 510 511 switch (ocfs2_control_get_handshake_state(file)) { 512 case OCFS2_CONTROL_HANDSHAKE_INVALID: 513 ret = -EINVAL; 514 break; 515 516 case OCFS2_CONTROL_HANDSHAKE_READ: 517 ret = ocfs2_control_validate_protocol(file, buf, 518 count); 519 break; 520 521 case OCFS2_CONTROL_HANDSHAKE_PROTOCOL: 522 case OCFS2_CONTROL_HANDSHAKE_VALID: 523 ret = ocfs2_control_message(file, buf, count); 524 break; 525 526 default: 527 BUG(); 528 ret = -EIO; 529 break; 530 } 531 532 return ret; 533 } 534 535 /* 536 * This is a naive version. If we ever have a new protocol, we'll expand 537 * it. Probably using seq_file. 538 */ 539 static ssize_t ocfs2_control_read(struct file *file, 540 char __user *buf, 541 size_t count, 542 loff_t *ppos) 543 { 544 ssize_t ret; 545 546 ret = simple_read_from_buffer(buf, count, ppos, 547 OCFS2_CONTROL_PROTO, OCFS2_CONTROL_PROTO_LEN); 548 549 /* Have we read the whole protocol list? */ 550 if (ret > 0 && *ppos >= OCFS2_CONTROL_PROTO_LEN) 551 ocfs2_control_set_handshake_state(file, 552 OCFS2_CONTROL_HANDSHAKE_READ); 553 554 return ret; 555 } 556 557 static int ocfs2_control_release(struct inode *inode, struct file *file) 558 { 559 struct ocfs2_control_private *p = file->private_data; 560 561 mutex_lock(&ocfs2_control_lock); 562 563 if (ocfs2_control_get_handshake_state(file) != 564 OCFS2_CONTROL_HANDSHAKE_VALID) 565 goto out; 566 567 if (atomic_dec_and_test(&ocfs2_control_opened)) { 568 if (!list_empty(&ocfs2_live_connection_list)) { 569 /* XXX: Do bad things! */ 570 printk(KERN_ERR 571 "ocfs2: Unexpected release of ocfs2_control!\n" 572 " Loss of cluster connection requires " 573 "an emergency restart!\n"); 574 emergency_restart(); 575 } 576 /* 577 * Last valid close clears the node number and resets 578 * the locking protocol version 579 */ 580 ocfs2_control_this_node = -1; 581 running_proto.pv_major = 0; 582 running_proto.pv_major = 0; 583 } 584 585 out: 586 list_del_init(&p->op_list); 587 file->private_data = NULL; 588 589 mutex_unlock(&ocfs2_control_lock); 590 591 kfree(p); 592 593 return 0; 594 } 595 596 static int ocfs2_control_open(struct inode *inode, struct file *file) 597 { 598 struct ocfs2_control_private *p; 599 600 p = kzalloc(sizeof(struct ocfs2_control_private), GFP_KERNEL); 601 if (!p) 602 return -ENOMEM; 603 p->op_this_node = -1; 604 605 mutex_lock(&ocfs2_control_lock); 606 file->private_data = p; 607 list_add(&p->op_list, &ocfs2_control_private_list); 608 mutex_unlock(&ocfs2_control_lock); 609 610 return 0; 611 } 612 613 static const struct file_operations ocfs2_control_fops = { 614 .open = ocfs2_control_open, 615 .release = ocfs2_control_release, 616 .read = ocfs2_control_read, 617 .write = ocfs2_control_write, 618 .owner = THIS_MODULE, 619 .llseek = default_llseek, 620 }; 621 622 static struct miscdevice ocfs2_control_device = { 623 .minor = MISC_DYNAMIC_MINOR, 624 .name = "ocfs2_control", 625 .fops = &ocfs2_control_fops, 626 }; 627 628 static int ocfs2_control_init(void) 629 { 630 int rc; 631 632 atomic_set(&ocfs2_control_opened, 0); 633 634 rc = misc_register(&ocfs2_control_device); 635 if (rc) 636 printk(KERN_ERR 637 "ocfs2: Unable to register ocfs2_control device " 638 "(errno %d)\n", 639 -rc); 640 641 return rc; 642 } 643 644 static void ocfs2_control_exit(void) 645 { 646 int rc; 647 648 rc = misc_deregister(&ocfs2_control_device); 649 if (rc) 650 printk(KERN_ERR 651 "ocfs2: Unable to deregister ocfs2_control device " 652 "(errno %d)\n", 653 -rc); 654 } 655 656 static void fsdlm_lock_ast_wrapper(void *astarg) 657 { 658 struct ocfs2_dlm_lksb *lksb = astarg; 659 int status = lksb->lksb_fsdlm.sb_status; 660 661 /* 662 * For now we're punting on the issue of other non-standard errors 663 * where we can't tell if the unlock_ast or lock_ast should be called. 664 * The main "other error" that's possible is EINVAL which means the 665 * function was called with invalid args, which shouldn't be possible 666 * since the caller here is under our control. Other non-standard 667 * errors probably fall into the same category, or otherwise are fatal 668 * which means we can't carry on anyway. 669 */ 670 671 if (status == -DLM_EUNLOCK || status == -DLM_ECANCEL) 672 lksb->lksb_conn->cc_proto->lp_unlock_ast(lksb, 0); 673 else 674 lksb->lksb_conn->cc_proto->lp_lock_ast(lksb); 675 } 676 677 static void fsdlm_blocking_ast_wrapper(void *astarg, int level) 678 { 679 struct ocfs2_dlm_lksb *lksb = astarg; 680 681 lksb->lksb_conn->cc_proto->lp_blocking_ast(lksb, level); 682 } 683 684 static int user_dlm_lock(struct ocfs2_cluster_connection *conn, 685 int mode, 686 struct ocfs2_dlm_lksb *lksb, 687 u32 flags, 688 void *name, 689 unsigned int namelen) 690 { 691 int ret; 692 693 if (!lksb->lksb_fsdlm.sb_lvbptr) 694 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 695 sizeof(struct dlm_lksb); 696 697 ret = dlm_lock(conn->cc_lockspace, mode, &lksb->lksb_fsdlm, 698 flags|DLM_LKF_NODLCKWT, name, namelen, 0, 699 fsdlm_lock_ast_wrapper, lksb, 700 fsdlm_blocking_ast_wrapper); 701 return ret; 702 } 703 704 static int user_dlm_unlock(struct ocfs2_cluster_connection *conn, 705 struct ocfs2_dlm_lksb *lksb, 706 u32 flags) 707 { 708 int ret; 709 710 ret = dlm_unlock(conn->cc_lockspace, lksb->lksb_fsdlm.sb_lkid, 711 flags, &lksb->lksb_fsdlm, lksb); 712 return ret; 713 } 714 715 static int user_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) 716 { 717 return lksb->lksb_fsdlm.sb_status; 718 } 719 720 static int user_dlm_lvb_valid(struct ocfs2_dlm_lksb *lksb) 721 { 722 int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; 723 724 return !invalid; 725 } 726 727 static void *user_dlm_lvb(struct ocfs2_dlm_lksb *lksb) 728 { 729 if (!lksb->lksb_fsdlm.sb_lvbptr) 730 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + 731 sizeof(struct dlm_lksb); 732 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 733 } 734 735 static void user_dlm_dump_lksb(struct ocfs2_dlm_lksb *lksb) 736 { 737 } 738 739 static int user_plock(struct ocfs2_cluster_connection *conn, 740 u64 ino, 741 struct file *file, 742 int cmd, 743 struct file_lock *fl) 744 { 745 /* 746 * This more or less just demuxes the plock request into any 747 * one of three dlm calls. 748 * 749 * Internally, fs/dlm will pass these to a misc device, which 750 * a userspace daemon will read and write to. 751 * 752 * For now, cancel requests (which happen internally only), 753 * are turned into unlocks. Most of this function taken from 754 * gfs2_lock. 755 */ 756 757 if (cmd == F_CANCELLK) { 758 cmd = F_SETLK; 759 fl->fl_type = F_UNLCK; 760 } 761 762 if (IS_GETLK(cmd)) 763 return dlm_posix_get(conn->cc_lockspace, ino, file, fl); 764 else if (fl->fl_type == F_UNLCK) 765 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); 766 else 767 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); 768 } 769 770 /* 771 * Compare a requested locking protocol version against the current one. 772 * 773 * If the major numbers are different, they are incompatible. 774 * If the current minor is greater than the request, they are incompatible. 775 * If the current minor is less than or equal to the request, they are 776 * compatible, and the requester should run at the current minor version. 777 */ 778 static int fs_protocol_compare(struct ocfs2_protocol_version *existing, 779 struct ocfs2_protocol_version *request) 780 { 781 if (existing->pv_major != request->pv_major) 782 return 1; 783 784 if (existing->pv_minor > request->pv_minor) 785 return 1; 786 787 if (existing->pv_minor < request->pv_minor) 788 request->pv_minor = existing->pv_minor; 789 790 return 0; 791 } 792 793 static void user_recover_prep(void *arg) 794 { 795 } 796 797 static void user_recover_slot(void *arg, struct dlm_slot *slot) 798 { 799 struct ocfs2_cluster_connection *conn = arg; 800 printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n", 801 slot->nodeid, slot->slot); 802 conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data); 803 804 } 805 806 static void user_recover_done(void *arg, struct dlm_slot *slots, 807 int num_slots, int our_slot, 808 uint32_t generation) 809 { 810 struct ocfs2_cluster_connection *conn = arg; 811 struct ocfs2_live_connection *lc = conn->cc_private; 812 int i; 813 814 for (i = 0; i < num_slots; i++) 815 if (slots[i].slot == our_slot) { 816 atomic_set(&lc->oc_this_node, slots[i].nodeid); 817 break; 818 } 819 820 lc->oc_our_slot = our_slot; 821 } 822 823 const struct dlm_lockspace_ops ocfs2_ls_ops = { 824 .recover_prep = user_recover_prep, 825 .recover_slot = user_recover_slot, 826 .recover_done = user_recover_done, 827 }; 828 829 static int user_cluster_connect(struct ocfs2_cluster_connection *conn) 830 { 831 dlm_lockspace_t *fsdlm; 832 struct ocfs2_live_connection *lc; 833 int rc; 834 835 BUG_ON(conn == NULL); 836 837 lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL); 838 if (!lc) { 839 rc = -ENOMEM; 840 goto out; 841 } 842 843 rc = ocfs2_live_connection_attach(conn, lc); 844 if (rc) 845 goto out; 846 847 /* 848 * running_proto must have been set before we allowed any mounts 849 * to proceed. 850 */ 851 if (fs_protocol_compare(&running_proto, &conn->cc_version)) { 852 printk(KERN_ERR 853 "Unable to mount with fs locking protocol version " 854 "%u.%u because the userspace control daemon has " 855 "negotiated %u.%u\n", 856 conn->cc_version.pv_major, conn->cc_version.pv_minor, 857 running_proto.pv_major, running_proto.pv_minor); 858 rc = -EPROTO; 859 ocfs2_live_connection_drop(lc); 860 lc = NULL; 861 goto out; 862 } 863 864 rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN, 865 NULL, NULL, NULL, &fsdlm); 866 if (rc) { 867 ocfs2_live_connection_drop(lc); 868 lc = NULL; 869 goto out; 870 } 871 872 conn->cc_private = lc; 873 conn->cc_lockspace = fsdlm; 874 out: 875 if (rc && lc) 876 kfree(lc); 877 return rc; 878 } 879 880 static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn) 881 { 882 dlm_release_lockspace(conn->cc_lockspace, 2); 883 conn->cc_lockspace = NULL; 884 ocfs2_live_connection_drop(conn->cc_private); 885 conn->cc_private = NULL; 886 return 0; 887 } 888 889 static int user_cluster_this_node(unsigned int *this_node) 890 { 891 int rc; 892 893 rc = ocfs2_control_get_this_node(); 894 if (rc < 0) 895 return rc; 896 897 *this_node = rc; 898 return 0; 899 } 900 901 static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { 902 .connect = user_cluster_connect, 903 .disconnect = user_cluster_disconnect, 904 .this_node = user_cluster_this_node, 905 .dlm_lock = user_dlm_lock, 906 .dlm_unlock = user_dlm_unlock, 907 .lock_status = user_dlm_lock_status, 908 .lvb_valid = user_dlm_lvb_valid, 909 .lock_lvb = user_dlm_lvb, 910 .plock = user_plock, 911 .dump_lksb = user_dlm_dump_lksb, 912 }; 913 914 static struct ocfs2_stack_plugin ocfs2_user_plugin = { 915 .sp_name = "user", 916 .sp_ops = &ocfs2_user_plugin_ops, 917 .sp_owner = THIS_MODULE, 918 }; 919 920 921 static int __init ocfs2_user_plugin_init(void) 922 { 923 int rc; 924 925 rc = ocfs2_control_init(); 926 if (!rc) { 927 rc = ocfs2_stack_glue_register(&ocfs2_user_plugin); 928 if (rc) 929 ocfs2_control_exit(); 930 } 931 932 return rc; 933 } 934 935 static void __exit ocfs2_user_plugin_exit(void) 936 { 937 ocfs2_stack_glue_unregister(&ocfs2_user_plugin); 938 ocfs2_control_exit(); 939 } 940 941 MODULE_AUTHOR("Oracle"); 942 MODULE_DESCRIPTION("ocfs2 driver for userspace cluster stacks"); 943 MODULE_LICENSE("GPL"); 944 module_init(ocfs2_user_plugin_init); 945 module_exit(ocfs2_user_plugin_exit); 946