1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * userdlm.c 4 * 5 * Code which implements the kernel side of a minimal userspace 6 * interface to our DLM. 7 * 8 * Many of the functions here are pared down versions of dlmglue.c 9 * functions. 10 * 11 * Copyright (C) 2003, 2004 Oracle. All rights reserved. 12 */ 13 14 #include <linux/signal.h> 15 #include <linux/sched/signal.h> 16 17 #include <linux/module.h> 18 #include <linux/fs.h> 19 #include <linux/types.h> 20 #include <linux/crc32.h> 21 22 #include "../ocfs2_lockingver.h" 23 #include "../stackglue.h" 24 #include "userdlm.h" 25 26 #define MLOG_MASK_PREFIX ML_DLMFS 27 #include "../cluster/masklog.h" 28 29 30 static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) 31 { 32 return container_of(lksb, struct user_lock_res, l_lksb); 33 } 34 35 static inline int user_check_wait_flag(struct user_lock_res *lockres, 36 int flag) 37 { 38 int ret; 39 40 spin_lock(&lockres->l_lock); 41 ret = lockres->l_flags & flag; 42 spin_unlock(&lockres->l_lock); 43 44 return ret; 45 } 46 47 static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) 48 49 { 50 wait_event(lockres->l_event, 51 !user_check_wait_flag(lockres, USER_LOCK_BUSY)); 52 } 53 54 static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) 55 56 { 57 wait_event(lockres->l_event, 58 !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); 59 } 60 61 /* I heart container_of... */ 62 static inline struct ocfs2_cluster_connection * 63 cluster_connection_from_user_lockres(struct user_lock_res *lockres) 64 { 65 struct dlmfs_inode_private *ip; 66 67 ip = container_of(lockres, 68 struct dlmfs_inode_private, 69 ip_lockres); 70 return ip->ip_conn; 71 } 72 73 static struct inode * 74 user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) 75 { 76 struct dlmfs_inode_private *ip; 77 78 ip = container_of(lockres, 79 struct dlmfs_inode_private, 80 ip_lockres); 81 return &ip->ip_vfs_inode; 82 } 83 84 static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) 85 { 86 spin_lock(&lockres->l_lock); 87 lockres->l_flags &= ~USER_LOCK_BUSY; 88 spin_unlock(&lockres->l_lock); 89 } 90 91 #define user_log_dlm_error(_func, _stat, _lockres) do { \ 92 mlog(ML_ERROR, "Dlm error %d while calling %s on " \ 93 "resource %.*s\n", _stat, _func, \ 94 _lockres->l_namelen, _lockres->l_name); \ 95 } while (0) 96 97 /* WARNING: This function lives in a world where the only three lock 98 * levels are EX, PR, and NL. It *will* have to be adjusted when more 99 * lock types are added. */ 100 static inline int user_highest_compat_lock_level(int level) 101 { 102 int new_level = DLM_LOCK_EX; 103 104 if (level == DLM_LOCK_EX) 105 new_level = DLM_LOCK_NL; 106 else if (level == DLM_LOCK_PR) 107 new_level = DLM_LOCK_PR; 108 return new_level; 109 } 110 111 static void user_ast(struct ocfs2_dlm_lksb *lksb) 112 { 113 struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 114 int status; 115 116 mlog(ML_BASTS, "AST fired for lockres %.*s, level %d => %d\n", 117 lockres->l_namelen, lockres->l_name, lockres->l_level, 118 lockres->l_requested); 119 120 spin_lock(&lockres->l_lock); 121 122 status = ocfs2_dlm_lock_status(&lockres->l_lksb); 123 if (status) { 124 mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", 125 status, lockres->l_namelen, lockres->l_name); 126 spin_unlock(&lockres->l_lock); 127 return; 128 } 129 130 mlog_bug_on_msg(lockres->l_requested == DLM_LOCK_IV, 131 "Lockres %.*s, requested ivmode. flags 0x%x\n", 132 lockres->l_namelen, lockres->l_name, lockres->l_flags); 133 134 /* we're downconverting. */ 135 if (lockres->l_requested < lockres->l_level) { 136 if (lockres->l_requested <= 137 user_highest_compat_lock_level(lockres->l_blocking)) { 138 lockres->l_blocking = DLM_LOCK_NL; 139 lockres->l_flags &= ~USER_LOCK_BLOCKED; 140 } 141 } 142 143 lockres->l_level = lockres->l_requested; 144 lockres->l_requested = DLM_LOCK_IV; 145 lockres->l_flags |= USER_LOCK_ATTACHED; 146 lockres->l_flags &= ~USER_LOCK_BUSY; 147 148 spin_unlock(&lockres->l_lock); 149 150 wake_up(&lockres->l_event); 151 } 152 153 static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) 154 { 155 struct inode *inode; 156 inode = user_dlm_inode_from_user_lockres(lockres); 157 if (!igrab(inode)) 158 BUG(); 159 } 160 161 static void user_dlm_unblock_lock(struct work_struct *work); 162 163 static void __user_dlm_queue_lockres(struct user_lock_res *lockres) 164 { 165 if (!(lockres->l_flags & USER_LOCK_QUEUED)) { 166 user_dlm_grab_inode_ref(lockres); 167 168 INIT_WORK(&lockres->l_work, user_dlm_unblock_lock); 169 170 queue_work(user_dlm_worker, &lockres->l_work); 171 lockres->l_flags |= USER_LOCK_QUEUED; 172 } 173 } 174 175 static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) 176 { 177 int queue = 0; 178 179 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) 180 return; 181 182 switch (lockres->l_blocking) { 183 case DLM_LOCK_EX: 184 if (!lockres->l_ex_holders && !lockres->l_ro_holders) 185 queue = 1; 186 break; 187 case DLM_LOCK_PR: 188 if (!lockres->l_ex_holders) 189 queue = 1; 190 break; 191 default: 192 BUG(); 193 } 194 195 if (queue) 196 __user_dlm_queue_lockres(lockres); 197 } 198 199 static void user_bast(struct ocfs2_dlm_lksb *lksb, int level) 200 { 201 struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 202 203 mlog(ML_BASTS, "BAST fired for lockres %.*s, blocking %d, level %d\n", 204 lockres->l_namelen, lockres->l_name, level, lockres->l_level); 205 206 spin_lock(&lockres->l_lock); 207 lockres->l_flags |= USER_LOCK_BLOCKED; 208 if (level > lockres->l_blocking) 209 lockres->l_blocking = level; 210 211 __user_dlm_queue_lockres(lockres); 212 spin_unlock(&lockres->l_lock); 213 214 wake_up(&lockres->l_event); 215 } 216 217 static void user_unlock_ast(struct ocfs2_dlm_lksb *lksb, int status) 218 { 219 struct user_lock_res *lockres = user_lksb_to_lock_res(lksb); 220 221 mlog(ML_BASTS, "UNLOCK AST fired for lockres %.*s, flags 0x%x\n", 222 lockres->l_namelen, lockres->l_name, lockres->l_flags); 223 224 if (status) 225 mlog(ML_ERROR, "dlm returns status %d\n", status); 226 227 spin_lock(&lockres->l_lock); 228 /* The teardown flag gets set early during the unlock process, 229 * so test the cancel flag to make sure that this ast isn't 230 * for a concurrent cancel. */ 231 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN 232 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { 233 lockres->l_level = DLM_LOCK_IV; 234 } else if (status == DLM_CANCELGRANT) { 235 /* We tried to cancel a convert request, but it was 236 * already granted. Don't clear the busy flag - the 237 * ast should've done this already. */ 238 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 239 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 240 goto out_noclear; 241 } else { 242 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); 243 /* Cancel succeeded, we want to re-queue */ 244 lockres->l_requested = DLM_LOCK_IV; /* cancel an 245 * upconvert 246 * request. */ 247 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 248 /* we want the unblock thread to look at it again 249 * now. */ 250 if (lockres->l_flags & USER_LOCK_BLOCKED) 251 __user_dlm_queue_lockres(lockres); 252 } 253 254 lockres->l_flags &= ~USER_LOCK_BUSY; 255 out_noclear: 256 spin_unlock(&lockres->l_lock); 257 258 wake_up(&lockres->l_event); 259 } 260 261 /* 262 * This is the userdlmfs locking protocol version. 263 * 264 * See fs/ocfs2/dlmglue.c for more details on locking versions. 265 */ 266 static struct ocfs2_locking_protocol user_dlm_lproto = { 267 .lp_max_version = { 268 .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, 269 .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, 270 }, 271 .lp_lock_ast = user_ast, 272 .lp_blocking_ast = user_bast, 273 .lp_unlock_ast = user_unlock_ast, 274 }; 275 276 static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) 277 { 278 struct inode *inode; 279 inode = user_dlm_inode_from_user_lockres(lockres); 280 iput(inode); 281 } 282 283 static void user_dlm_unblock_lock(struct work_struct *work) 284 { 285 int new_level, status; 286 struct user_lock_res *lockres = 287 container_of(work, struct user_lock_res, l_work); 288 struct ocfs2_cluster_connection *conn = 289 cluster_connection_from_user_lockres(lockres); 290 291 mlog(0, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); 292 293 spin_lock(&lockres->l_lock); 294 295 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), 296 "Lockres %.*s, flags 0x%x\n", 297 lockres->l_namelen, lockres->l_name, lockres->l_flags); 298 299 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's 300 * set, we want user_ast clear it. */ 301 lockres->l_flags &= ~USER_LOCK_QUEUED; 302 303 /* It's valid to get here and no longer be blocked - if we get 304 * several basts in a row, we might be queued by the first 305 * one, the unblock thread might run and clear the queued 306 * flag, and finally we might get another bast which re-queues 307 * us before our ast for the downconvert is called. */ 308 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { 309 mlog(ML_BASTS, "lockres %.*s USER_LOCK_BLOCKED\n", 310 lockres->l_namelen, lockres->l_name); 311 spin_unlock(&lockres->l_lock); 312 goto drop_ref; 313 } 314 315 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 316 mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_TEARDOWN\n", 317 lockres->l_namelen, lockres->l_name); 318 spin_unlock(&lockres->l_lock); 319 goto drop_ref; 320 } 321 322 if (lockres->l_flags & USER_LOCK_BUSY) { 323 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 mlog(ML_BASTS, "lockres %.*s USER_LOCK_IN_CANCEL\n", 325 lockres->l_namelen, lockres->l_name); 326 spin_unlock(&lockres->l_lock); 327 goto drop_ref; 328 } 329 330 lockres->l_flags |= USER_LOCK_IN_CANCEL; 331 spin_unlock(&lockres->l_lock); 332 333 status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, 334 DLM_LKF_CANCEL); 335 if (status) 336 user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); 337 goto drop_ref; 338 } 339 340 /* If there are still incompat holders, we can exit safely 341 * without worrying about re-queueing this lock as that will 342 * happen on the last call to user_cluster_unlock. */ 343 if ((lockres->l_blocking == DLM_LOCK_EX) 344 && (lockres->l_ex_holders || lockres->l_ro_holders)) { 345 spin_unlock(&lockres->l_lock); 346 mlog(ML_BASTS, "lockres %.*s, EX/PR Holders %u,%u\n", 347 lockres->l_namelen, lockres->l_name, 348 lockres->l_ex_holders, lockres->l_ro_holders); 349 goto drop_ref; 350 } 351 352 if ((lockres->l_blocking == DLM_LOCK_PR) 353 && lockres->l_ex_holders) { 354 spin_unlock(&lockres->l_lock); 355 mlog(ML_BASTS, "lockres %.*s, EX Holders %u\n", 356 lockres->l_namelen, lockres->l_name, 357 lockres->l_ex_holders); 358 goto drop_ref; 359 } 360 361 /* yay, we can downconvert now. */ 362 new_level = user_highest_compat_lock_level(lockres->l_blocking); 363 lockres->l_requested = new_level; 364 lockres->l_flags |= USER_LOCK_BUSY; 365 mlog(ML_BASTS, "lockres %.*s, downconvert %d => %d\n", 366 lockres->l_namelen, lockres->l_name, lockres->l_level, new_level); 367 spin_unlock(&lockres->l_lock); 368 369 /* need lock downconvert request now... */ 370 status = ocfs2_dlm_lock(conn, new_level, &lockres->l_lksb, 371 DLM_LKF_CONVERT|DLM_LKF_VALBLK, 372 lockres->l_name, 373 lockres->l_namelen); 374 if (status) { 375 user_log_dlm_error("ocfs2_dlm_lock", status, lockres); 376 user_recover_from_dlm_error(lockres); 377 } 378 379 drop_ref: 380 user_dlm_drop_inode_ref(lockres); 381 } 382 383 static inline void user_dlm_inc_holders(struct user_lock_res *lockres, 384 int level) 385 { 386 switch(level) { 387 case DLM_LOCK_EX: 388 lockres->l_ex_holders++; 389 break; 390 case DLM_LOCK_PR: 391 lockres->l_ro_holders++; 392 break; 393 default: 394 BUG(); 395 } 396 } 397 398 /* predict what lock level we'll be dropping down to on behalf 399 * of another node, and return true if the currently wanted 400 * level will be compatible with it. */ 401 static inline int 402 user_may_continue_on_blocked_lock(struct user_lock_res *lockres, 403 int wanted) 404 { 405 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 406 407 return wanted <= user_highest_compat_lock_level(lockres->l_blocking); 408 } 409 410 int user_dlm_cluster_lock(struct user_lock_res *lockres, 411 int level, 412 int lkm_flags) 413 { 414 int status, local_flags; 415 struct ocfs2_cluster_connection *conn = 416 cluster_connection_from_user_lockres(lockres); 417 418 if (level != DLM_LOCK_EX && 419 level != DLM_LOCK_PR) { 420 mlog(ML_ERROR, "lockres %.*s: invalid request!\n", 421 lockres->l_namelen, lockres->l_name); 422 status = -EINVAL; 423 goto bail; 424 } 425 426 mlog(ML_BASTS, "lockres %.*s, level %d, flags = 0x%x\n", 427 lockres->l_namelen, lockres->l_name, level, lkm_flags); 428 429 again: 430 if (signal_pending(current)) { 431 status = -ERESTARTSYS; 432 goto bail; 433 } 434 435 spin_lock(&lockres->l_lock); 436 437 /* We only compare against the currently granted level 438 * here. If the lock is blocked waiting on a downconvert, 439 * we'll get caught below. */ 440 if ((lockres->l_flags & USER_LOCK_BUSY) && 441 (level > lockres->l_level)) { 442 /* is someone sitting in dlm_lock? If so, wait on 443 * them. */ 444 spin_unlock(&lockres->l_lock); 445 446 user_wait_on_busy_lock(lockres); 447 goto again; 448 } 449 450 if ((lockres->l_flags & USER_LOCK_BLOCKED) && 451 (!user_may_continue_on_blocked_lock(lockres, level))) { 452 /* is the lock is currently blocked on behalf of 453 * another node */ 454 spin_unlock(&lockres->l_lock); 455 456 user_wait_on_blocked_lock(lockres); 457 goto again; 458 } 459 460 if (level > lockres->l_level) { 461 local_flags = lkm_flags | DLM_LKF_VALBLK; 462 if (lockres->l_level != DLM_LOCK_IV) 463 local_flags |= DLM_LKF_CONVERT; 464 465 lockres->l_requested = level; 466 lockres->l_flags |= USER_LOCK_BUSY; 467 spin_unlock(&lockres->l_lock); 468 469 BUG_ON(level == DLM_LOCK_IV); 470 BUG_ON(level == DLM_LOCK_NL); 471 472 /* call dlm_lock to upgrade lock now */ 473 status = ocfs2_dlm_lock(conn, level, &lockres->l_lksb, 474 local_flags, lockres->l_name, 475 lockres->l_namelen); 476 if (status) { 477 if ((lkm_flags & DLM_LKF_NOQUEUE) && 478 (status != -EAGAIN)) 479 user_log_dlm_error("ocfs2_dlm_lock", 480 status, lockres); 481 user_recover_from_dlm_error(lockres); 482 goto bail; 483 } 484 485 user_wait_on_busy_lock(lockres); 486 goto again; 487 } 488 489 user_dlm_inc_holders(lockres, level); 490 spin_unlock(&lockres->l_lock); 491 492 status = 0; 493 bail: 494 return status; 495 } 496 497 static inline void user_dlm_dec_holders(struct user_lock_res *lockres, 498 int level) 499 { 500 switch(level) { 501 case DLM_LOCK_EX: 502 BUG_ON(!lockres->l_ex_holders); 503 lockres->l_ex_holders--; 504 break; 505 case DLM_LOCK_PR: 506 BUG_ON(!lockres->l_ro_holders); 507 lockres->l_ro_holders--; 508 break; 509 default: 510 BUG(); 511 } 512 } 513 514 void user_dlm_cluster_unlock(struct user_lock_res *lockres, 515 int level) 516 { 517 if (level != DLM_LOCK_EX && 518 level != DLM_LOCK_PR) { 519 mlog(ML_ERROR, "lockres %.*s: invalid request!\n", 520 lockres->l_namelen, lockres->l_name); 521 return; 522 } 523 524 spin_lock(&lockres->l_lock); 525 user_dlm_dec_holders(lockres, level); 526 __user_dlm_cond_queue_lockres(lockres); 527 spin_unlock(&lockres->l_lock); 528 } 529 530 void user_dlm_write_lvb(struct inode *inode, 531 const char *val, 532 unsigned int len) 533 { 534 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; 535 char *lvb; 536 537 BUG_ON(len > DLM_LVB_LEN); 538 539 spin_lock(&lockres->l_lock); 540 541 BUG_ON(lockres->l_level < DLM_LOCK_EX); 542 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 543 memcpy(lvb, val, len); 544 545 spin_unlock(&lockres->l_lock); 546 } 547 548 bool user_dlm_read_lvb(struct inode *inode, char *val) 549 { 550 struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; 551 char *lvb; 552 bool ret = true; 553 554 spin_lock(&lockres->l_lock); 555 556 BUG_ON(lockres->l_level < DLM_LOCK_PR); 557 if (ocfs2_dlm_lvb_valid(&lockres->l_lksb)) { 558 lvb = ocfs2_dlm_lvb(&lockres->l_lksb); 559 memcpy(val, lvb, DLM_LVB_LEN); 560 } else 561 ret = false; 562 563 spin_unlock(&lockres->l_lock); 564 return ret; 565 } 566 567 void user_dlm_lock_res_init(struct user_lock_res *lockres, 568 struct dentry *dentry) 569 { 570 memset(lockres, 0, sizeof(*lockres)); 571 572 spin_lock_init(&lockres->l_lock); 573 init_waitqueue_head(&lockres->l_event); 574 lockres->l_level = DLM_LOCK_IV; 575 lockres->l_requested = DLM_LOCK_IV; 576 lockres->l_blocking = DLM_LOCK_IV; 577 578 /* should have been checked before getting here. */ 579 BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); 580 581 memcpy(lockres->l_name, 582 dentry->d_name.name, 583 dentry->d_name.len); 584 lockres->l_namelen = dentry->d_name.len; 585 } 586 587 int user_dlm_destroy_lock(struct user_lock_res *lockres) 588 { 589 int status = -EBUSY; 590 struct ocfs2_cluster_connection *conn = 591 cluster_connection_from_user_lockres(lockres); 592 593 mlog(ML_BASTS, "lockres %.*s\n", lockres->l_namelen, lockres->l_name); 594 595 spin_lock(&lockres->l_lock); 596 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 597 spin_unlock(&lockres->l_lock); 598 return 0; 599 } 600 601 lockres->l_flags |= USER_LOCK_IN_TEARDOWN; 602 603 while (lockres->l_flags & USER_LOCK_BUSY) { 604 spin_unlock(&lockres->l_lock); 605 606 user_wait_on_busy_lock(lockres); 607 608 spin_lock(&lockres->l_lock); 609 } 610 611 if (lockres->l_ro_holders || lockres->l_ex_holders) { 612 spin_unlock(&lockres->l_lock); 613 goto bail; 614 } 615 616 status = 0; 617 if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { 618 spin_unlock(&lockres->l_lock); 619 goto bail; 620 } 621 622 lockres->l_flags &= ~USER_LOCK_ATTACHED; 623 lockres->l_flags |= USER_LOCK_BUSY; 624 spin_unlock(&lockres->l_lock); 625 626 status = ocfs2_dlm_unlock(conn, &lockres->l_lksb, DLM_LKF_VALBLK); 627 if (status) { 628 user_log_dlm_error("ocfs2_dlm_unlock", status, lockres); 629 goto bail; 630 } 631 632 user_wait_on_busy_lock(lockres); 633 634 status = 0; 635 bail: 636 return status; 637 } 638 639 static void user_dlm_recovery_handler_noop(int node_num, 640 void *recovery_data) 641 { 642 /* We ignore recovery events */ 643 return; 644 } 645 646 void user_dlm_set_locking_protocol(void) 647 { 648 ocfs2_stack_glue_set_max_proto_version(&user_dlm_lproto.lp_max_version); 649 } 650 651 struct ocfs2_cluster_connection *user_dlm_register(const struct qstr *name) 652 { 653 int rc; 654 struct ocfs2_cluster_connection *conn; 655 656 rc = ocfs2_cluster_connect_agnostic(name->name, name->len, 657 &user_dlm_lproto, 658 user_dlm_recovery_handler_noop, 659 NULL, &conn); 660 if (rc) 661 mlog_errno(rc); 662 663 return rc ? ERR_PTR(rc) : conn; 664 } 665 666 void user_dlm_unregister(struct ocfs2_cluster_connection *conn) 667 { 668 ocfs2_cluster_disconnect(conn, 0); 669 } 670