1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4 */ 5 6 #include <linux/fs.h> 7 #include <linux/filelock.h> 8 #include <linux/miscdevice.h> 9 #include <linux/poll.h> 10 #include <linux/dlm.h> 11 #include <linux/dlm_plock.h> 12 #include <linux/slab.h> 13 14 #include <trace/events/dlm.h> 15 16 #include "dlm_internal.h" 17 #include "lockspace.h" 18 19 static DEFINE_SPINLOCK(ops_lock); 20 static LIST_HEAD(send_list); 21 static LIST_HEAD(recv_list); 22 static DECLARE_WAIT_QUEUE_HEAD(send_wq); 23 static DECLARE_WAIT_QUEUE_HEAD(recv_wq); 24 25 struct plock_async_data { 26 void *fl; 27 void *file; 28 struct file_lock flc; 29 int (*callback)(struct file_lock *fl, int result); 30 }; 31 32 struct plock_op { 33 struct list_head list; 34 int done; 35 struct dlm_plock_info info; 36 /* if set indicates async handling */ 37 struct plock_async_data *data; 38 }; 39 40 static inline void set_version(struct dlm_plock_info *info) 41 { 42 info->version[0] = DLM_PLOCK_VERSION_MAJOR; 43 info->version[1] = DLM_PLOCK_VERSION_MINOR; 44 info->version[2] = DLM_PLOCK_VERSION_PATCH; 45 } 46 47 static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info) 48 { 49 struct plock_op *op = NULL, *iter; 50 51 list_for_each_entry(iter, &recv_list, list) { 52 if (iter->info.fsid == info->fsid && 53 iter->info.number == info->number && 54 iter->info.owner == info->owner && 55 iter->info.pid == info->pid && 56 iter->info.start == info->start && 57 iter->info.end == info->end && 58 iter->info.ex == info->ex && 59 iter->info.wait) { 60 op = iter; 61 break; 62 } 63 } 64 65 return op; 66 } 67 68 static int check_version(struct dlm_plock_info *info) 69 { 70 if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || 71 (DLM_PLOCK_VERSION_MINOR < info->version[1])) { 72 log_print("plock device version mismatch: " 73 "kernel (%u.%u.%u), user (%u.%u.%u)", 74 DLM_PLOCK_VERSION_MAJOR, 75 DLM_PLOCK_VERSION_MINOR, 76 DLM_PLOCK_VERSION_PATCH, 77 info->version[0], 78 info->version[1], 79 info->version[2]); 80 return -EINVAL; 81 } 82 return 0; 83 } 84 85 static void dlm_release_plock_op(struct plock_op *op) 86 { 87 kfree(op->data); 88 kfree(op); 89 } 90 91 static void send_op(struct plock_op *op) 92 { 93 set_version(&op->info); 94 spin_lock(&ops_lock); 95 list_add_tail(&op->list, &send_list); 96 spin_unlock(&ops_lock); 97 wake_up(&send_wq); 98 } 99 100 static int do_lock_cancel(const struct dlm_plock_info *orig_info) 101 { 102 struct plock_op *op; 103 int rv; 104 105 op = kzalloc(sizeof(*op), GFP_NOFS); 106 if (!op) 107 return -ENOMEM; 108 109 op->info = *orig_info; 110 op->info.optype = DLM_PLOCK_OP_CANCEL; 111 op->info.wait = 0; 112 113 send_op(op); 114 wait_event(recv_wq, (op->done != 0)); 115 116 rv = op->info.rv; 117 118 dlm_release_plock_op(op); 119 return rv; 120 } 121 122 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 123 int cmd, struct file_lock *fl) 124 { 125 struct plock_async_data *op_data; 126 struct dlm_ls *ls; 127 struct plock_op *op; 128 int rv; 129 130 ls = dlm_find_lockspace_local(lockspace); 131 if (!ls) 132 return -EINVAL; 133 134 op = kzalloc(sizeof(*op), GFP_NOFS); 135 if (!op) { 136 rv = -ENOMEM; 137 goto out; 138 } 139 140 op->info.optype = DLM_PLOCK_OP_LOCK; 141 op->info.pid = fl->fl_pid; 142 op->info.ex = (fl->fl_type == F_WRLCK); 143 op->info.wait = IS_SETLKW(cmd); 144 op->info.fsid = ls->ls_global_id; 145 op->info.number = number; 146 op->info.start = fl->fl_start; 147 op->info.end = fl->fl_end; 148 /* async handling */ 149 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 150 op_data = kzalloc(sizeof(*op_data), GFP_NOFS); 151 if (!op_data) { 152 dlm_release_plock_op(op); 153 rv = -ENOMEM; 154 goto out; 155 } 156 157 /* fl_owner is lockd which doesn't distinguish 158 processes on the nfs client */ 159 op->info.owner = (__u64) fl->fl_pid; 160 op_data->callback = fl->fl_lmops->lm_grant; 161 locks_init_lock(&op_data->flc); 162 locks_copy_lock(&op_data->flc, fl); 163 op_data->fl = fl; 164 op_data->file = file; 165 166 op->data = op_data; 167 168 send_op(op); 169 rv = FILE_LOCK_DEFERRED; 170 goto out; 171 } else { 172 op->info.owner = (__u64)(long) fl->fl_owner; 173 } 174 175 send_op(op); 176 177 if (op->info.wait) { 178 rv = wait_event_interruptible(recv_wq, (op->done != 0)); 179 if (rv == -ERESTARTSYS) { 180 spin_lock(&ops_lock); 181 /* recheck under ops_lock if we got a done != 0, 182 * if so this interrupt case should be ignored 183 */ 184 if (op->done != 0) { 185 spin_unlock(&ops_lock); 186 goto do_lock_wait; 187 } 188 spin_unlock(&ops_lock); 189 190 rv = do_lock_cancel(&op->info); 191 switch (rv) { 192 case 0: 193 /* waiter was deleted in user space, answer will never come 194 * remove original request. The original request must be 195 * on recv_list because the answer of do_lock_cancel() 196 * synchronized it. 197 */ 198 spin_lock(&ops_lock); 199 list_del(&op->list); 200 spin_unlock(&ops_lock); 201 rv = -EINTR; 202 break; 203 case -ENOENT: 204 /* cancellation wasn't successful but op should be done */ 205 fallthrough; 206 default: 207 /* internal error doing cancel we need to wait */ 208 goto wait; 209 } 210 211 log_debug(ls, "%s: wait interrupted %x %llx pid %d", 212 __func__, ls->ls_global_id, 213 (unsigned long long)number, op->info.pid); 214 dlm_release_plock_op(op); 215 goto out; 216 } 217 } else { 218 wait: 219 wait_event(recv_wq, (op->done != 0)); 220 } 221 222 do_lock_wait: 223 224 WARN_ON(!list_empty(&op->list)); 225 226 rv = op->info.rv; 227 228 if (!rv) { 229 if (locks_lock_file_wait(file, fl) < 0) 230 log_error(ls, "dlm_posix_lock: vfs lock error %llx", 231 (unsigned long long)number); 232 } 233 234 dlm_release_plock_op(op); 235 out: 236 dlm_put_lockspace(ls); 237 return rv; 238 } 239 EXPORT_SYMBOL_GPL(dlm_posix_lock); 240 241 /* Returns failure iff a successful lock operation should be canceled */ 242 static int dlm_plock_callback(struct plock_op *op) 243 { 244 struct plock_async_data *op_data = op->data; 245 struct file *file; 246 struct file_lock *fl; 247 struct file_lock *flc; 248 int (*notify)(struct file_lock *fl, int result) = NULL; 249 int rv = 0; 250 251 WARN_ON(!list_empty(&op->list)); 252 253 /* check if the following 2 are still valid or make a copy */ 254 file = op_data->file; 255 flc = &op_data->flc; 256 fl = op_data->fl; 257 notify = op_data->callback; 258 259 if (op->info.rv) { 260 notify(fl, op->info.rv); 261 goto out; 262 } 263 264 /* got fs lock; bookkeep locally as well: */ 265 flc->fl_flags &= ~FL_SLEEP; 266 if (posix_lock_file(file, flc, NULL)) { 267 /* 268 * This can only happen in the case of kmalloc() failure. 269 * The filesystem's own lock is the authoritative lock, 270 * so a failure to get the lock locally is not a disaster. 271 * As long as the fs cannot reliably cancel locks (especially 272 * in a low-memory situation), we're better off ignoring 273 * this failure than trying to recover. 274 */ 275 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", 276 (unsigned long long)op->info.number, file, fl); 277 } 278 279 rv = notify(fl, 0); 280 if (rv) { 281 /* XXX: We need to cancel the fs lock here: */ 282 log_print("%s: lock granted after lock request failed; dangling lock!", 283 __func__); 284 goto out; 285 } 286 287 out: 288 dlm_release_plock_op(op); 289 return rv; 290 } 291 292 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 293 struct file_lock *fl) 294 { 295 struct dlm_ls *ls; 296 struct plock_op *op; 297 int rv; 298 unsigned char fl_flags = fl->fl_flags; 299 300 ls = dlm_find_lockspace_local(lockspace); 301 if (!ls) 302 return -EINVAL; 303 304 op = kzalloc(sizeof(*op), GFP_NOFS); 305 if (!op) { 306 rv = -ENOMEM; 307 goto out; 308 } 309 310 /* cause the vfs unlock to return ENOENT if lock is not found */ 311 fl->fl_flags |= FL_EXISTS; 312 313 rv = locks_lock_file_wait(file, fl); 314 if (rv == -ENOENT) { 315 rv = 0; 316 goto out_free; 317 } 318 if (rv < 0) { 319 log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", 320 rv, (unsigned long long)number); 321 } 322 323 op->info.optype = DLM_PLOCK_OP_UNLOCK; 324 op->info.pid = fl->fl_pid; 325 op->info.fsid = ls->ls_global_id; 326 op->info.number = number; 327 op->info.start = fl->fl_start; 328 op->info.end = fl->fl_end; 329 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 330 op->info.owner = (__u64) fl->fl_pid; 331 else 332 op->info.owner = (__u64)(long) fl->fl_owner; 333 334 if (fl->fl_flags & FL_CLOSE) { 335 op->info.flags |= DLM_PLOCK_FL_CLOSE; 336 send_op(op); 337 rv = 0; 338 goto out; 339 } 340 341 send_op(op); 342 wait_event(recv_wq, (op->done != 0)); 343 344 WARN_ON(!list_empty(&op->list)); 345 346 rv = op->info.rv; 347 348 if (rv == -ENOENT) 349 rv = 0; 350 351 out_free: 352 dlm_release_plock_op(op); 353 out: 354 dlm_put_lockspace(ls); 355 fl->fl_flags = fl_flags; 356 return rv; 357 } 358 EXPORT_SYMBOL_GPL(dlm_posix_unlock); 359 360 /* 361 * NOTE: This implementation can only handle async lock requests as nfs 362 * do it. It cannot handle cancellation of a pending lock request sitting 363 * in wait_event(), but for now only nfs is the only user local kernel 364 * user. 365 */ 366 int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file, 367 struct file_lock *fl) 368 { 369 struct dlm_plock_info info; 370 struct plock_op *op; 371 struct dlm_ls *ls; 372 int rv; 373 374 /* this only works for async request for now and nfs is the only 375 * kernel user right now. 376 */ 377 if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant)) 378 return -EOPNOTSUPP; 379 380 ls = dlm_find_lockspace_local(lockspace); 381 if (!ls) 382 return -EINVAL; 383 384 memset(&info, 0, sizeof(info)); 385 info.pid = fl->fl_pid; 386 info.ex = (fl->fl_type == F_WRLCK); 387 info.fsid = ls->ls_global_id; 388 dlm_put_lockspace(ls); 389 info.number = number; 390 info.start = fl->fl_start; 391 info.end = fl->fl_end; 392 info.owner = (__u64)fl->fl_pid; 393 394 rv = do_lock_cancel(&info); 395 switch (rv) { 396 case 0: 397 spin_lock(&ops_lock); 398 /* lock request to cancel must be on recv_list because 399 * do_lock_cancel() synchronizes it. 400 */ 401 op = plock_lookup_waiter(&info); 402 if (WARN_ON_ONCE(!op)) { 403 spin_unlock(&ops_lock); 404 rv = -ENOLCK; 405 break; 406 } 407 408 list_del(&op->list); 409 spin_unlock(&ops_lock); 410 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); 411 op->data->callback(op->data->fl, -EINTR); 412 dlm_release_plock_op(op); 413 rv = -EINTR; 414 break; 415 case -ENOENT: 416 /* if cancel wasn't successful we probably were to late 417 * or it was a non-blocking lock request, so just unlock it. 418 */ 419 rv = dlm_posix_unlock(lockspace, number, file, fl); 420 break; 421 default: 422 break; 423 } 424 425 return rv; 426 } 427 EXPORT_SYMBOL_GPL(dlm_posix_cancel); 428 429 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, 430 struct file_lock *fl) 431 { 432 struct dlm_ls *ls; 433 struct plock_op *op; 434 int rv; 435 436 ls = dlm_find_lockspace_local(lockspace); 437 if (!ls) 438 return -EINVAL; 439 440 op = kzalloc(sizeof(*op), GFP_NOFS); 441 if (!op) { 442 rv = -ENOMEM; 443 goto out; 444 } 445 446 op->info.optype = DLM_PLOCK_OP_GET; 447 op->info.pid = fl->fl_pid; 448 op->info.ex = (fl->fl_type == F_WRLCK); 449 op->info.fsid = ls->ls_global_id; 450 op->info.number = number; 451 op->info.start = fl->fl_start; 452 op->info.end = fl->fl_end; 453 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 454 op->info.owner = (__u64) fl->fl_pid; 455 else 456 op->info.owner = (__u64)(long) fl->fl_owner; 457 458 send_op(op); 459 wait_event(recv_wq, (op->done != 0)); 460 461 WARN_ON(!list_empty(&op->list)); 462 463 /* info.rv from userspace is 1 for conflict, 0 for no-conflict, 464 -ENOENT if there are no locks on the file */ 465 466 rv = op->info.rv; 467 468 fl->fl_type = F_UNLCK; 469 if (rv == -ENOENT) 470 rv = 0; 471 else if (rv > 0) { 472 locks_init_lock(fl); 473 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 474 fl->fl_flags = FL_POSIX; 475 fl->fl_pid = op->info.pid; 476 if (op->info.nodeid != dlm_our_nodeid()) 477 fl->fl_pid = -fl->fl_pid; 478 fl->fl_start = op->info.start; 479 fl->fl_end = op->info.end; 480 rv = 0; 481 } 482 483 dlm_release_plock_op(op); 484 out: 485 dlm_put_lockspace(ls); 486 return rv; 487 } 488 EXPORT_SYMBOL_GPL(dlm_posix_get); 489 490 /* a read copies out one plock request from the send list */ 491 static ssize_t dev_read(struct file *file, char __user *u, size_t count, 492 loff_t *ppos) 493 { 494 struct dlm_plock_info info; 495 struct plock_op *op = NULL; 496 497 if (count < sizeof(info)) 498 return -EINVAL; 499 500 spin_lock(&ops_lock); 501 if (!list_empty(&send_list)) { 502 op = list_first_entry(&send_list, struct plock_op, list); 503 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 504 list_del(&op->list); 505 else 506 list_move_tail(&op->list, &recv_list); 507 memcpy(&info, &op->info, sizeof(info)); 508 } 509 spin_unlock(&ops_lock); 510 511 if (!op) 512 return -EAGAIN; 513 514 trace_dlm_plock_read(&info); 515 516 /* there is no need to get a reply from userspace for unlocks 517 that were generated by the vfs cleaning up for a close 518 (the process did not make an unlock call). */ 519 520 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 521 dlm_release_plock_op(op); 522 523 if (copy_to_user(u, &info, sizeof(info))) 524 return -EFAULT; 525 return sizeof(info); 526 } 527 528 /* a write copies in one plock result that should match a plock_op 529 on the recv list */ 530 static ssize_t dev_write(struct file *file, const char __user *u, size_t count, 531 loff_t *ppos) 532 { 533 struct plock_op *op = NULL, *iter; 534 struct dlm_plock_info info; 535 int do_callback = 0; 536 537 if (count != sizeof(info)) 538 return -EINVAL; 539 540 if (copy_from_user(&info, u, sizeof(info))) 541 return -EFAULT; 542 543 trace_dlm_plock_write(&info); 544 545 if (check_version(&info)) 546 return -EINVAL; 547 548 /* 549 * The results for waiting ops (SETLKW) can be returned in any 550 * order, so match all fields to find the op. The results for 551 * non-waiting ops are returned in the order that they were sent 552 * to userspace, so match the result with the first non-waiting op. 553 */ 554 spin_lock(&ops_lock); 555 if (info.wait) { 556 op = plock_lookup_waiter(&info); 557 } else { 558 list_for_each_entry(iter, &recv_list, list) { 559 if (!iter->info.wait && 560 iter->info.fsid == info.fsid) { 561 op = iter; 562 break; 563 } 564 } 565 } 566 567 if (op) { 568 /* Sanity check that op and info match. */ 569 if (info.wait) 570 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); 571 else 572 WARN_ON(op->info.number != info.number || 573 op->info.owner != info.owner || 574 op->info.optype != info.optype); 575 576 list_del_init(&op->list); 577 memcpy(&op->info, &info, sizeof(info)); 578 if (op->data) 579 do_callback = 1; 580 else 581 op->done = 1; 582 } 583 spin_unlock(&ops_lock); 584 585 if (op) { 586 if (do_callback) 587 dlm_plock_callback(op); 588 else 589 wake_up(&recv_wq); 590 } else 591 pr_debug("%s: no op %x %llx", __func__, 592 info.fsid, (unsigned long long)info.number); 593 return count; 594 } 595 596 static __poll_t dev_poll(struct file *file, poll_table *wait) 597 { 598 __poll_t mask = 0; 599 600 poll_wait(file, &send_wq, wait); 601 602 spin_lock(&ops_lock); 603 if (!list_empty(&send_list)) 604 mask = EPOLLIN | EPOLLRDNORM; 605 spin_unlock(&ops_lock); 606 607 return mask; 608 } 609 610 static const struct file_operations dev_fops = { 611 .read = dev_read, 612 .write = dev_write, 613 .poll = dev_poll, 614 .owner = THIS_MODULE, 615 .llseek = noop_llseek, 616 }; 617 618 static struct miscdevice plock_dev_misc = { 619 .minor = MISC_DYNAMIC_MINOR, 620 .name = DLM_PLOCK_MISC_NAME, 621 .fops = &dev_fops 622 }; 623 624 int dlm_plock_init(void) 625 { 626 int rv; 627 628 rv = misc_register(&plock_dev_misc); 629 if (rv) 630 log_print("dlm_plock_init: misc_register failed %d", rv); 631 return rv; 632 } 633 634 void dlm_plock_exit(void) 635 { 636 misc_deregister(&plock_dev_misc); 637 WARN_ON(!list_empty(&send_list)); 638 WARN_ON(!list_empty(&recv_list)); 639 } 640 641