1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. 4 */ 5 6 #include <linux/fs.h> 7 #include <linux/filelock.h> 8 #include <linux/miscdevice.h> 9 #include <linux/poll.h> 10 #include <linux/dlm.h> 11 #include <linux/dlm_plock.h> 12 #include <linux/slab.h> 13 14 #include "dlm_internal.h" 15 #include "lockspace.h" 16 17 static DEFINE_SPINLOCK(ops_lock); 18 static LIST_HEAD(send_list); 19 static LIST_HEAD(recv_list); 20 static DECLARE_WAIT_QUEUE_HEAD(send_wq); 21 static DECLARE_WAIT_QUEUE_HEAD(recv_wq); 22 23 struct plock_async_data { 24 void *fl; 25 void *file; 26 struct file_lock flc; 27 int (*callback)(struct file_lock *fl, int result); 28 }; 29 30 struct plock_op { 31 struct list_head list; 32 int done; 33 struct dlm_plock_info info; 34 /* if set indicates async handling */ 35 struct plock_async_data *data; 36 }; 37 38 static inline void set_version(struct dlm_plock_info *info) 39 { 40 info->version[0] = DLM_PLOCK_VERSION_MAJOR; 41 info->version[1] = DLM_PLOCK_VERSION_MINOR; 42 info->version[2] = DLM_PLOCK_VERSION_PATCH; 43 } 44 45 static int check_version(struct dlm_plock_info *info) 46 { 47 if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || 48 (DLM_PLOCK_VERSION_MINOR < info->version[1])) { 49 log_print("plock device version mismatch: " 50 "kernel (%u.%u.%u), user (%u.%u.%u)", 51 DLM_PLOCK_VERSION_MAJOR, 52 DLM_PLOCK_VERSION_MINOR, 53 DLM_PLOCK_VERSION_PATCH, 54 info->version[0], 55 info->version[1], 56 info->version[2]); 57 return -EINVAL; 58 } 59 return 0; 60 } 61 62 static void dlm_release_plock_op(struct plock_op *op) 63 { 64 kfree(op->data); 65 kfree(op); 66 } 67 68 static void send_op(struct plock_op *op) 69 { 70 set_version(&op->info); 71 spin_lock(&ops_lock); 72 list_add_tail(&op->list, &send_list); 73 spin_unlock(&ops_lock); 74 wake_up(&send_wq); 75 } 76 77 /* If a process was killed while waiting for the only plock on a file, 78 locks_remove_posix will not see any lock on the file so it won't 79 send an unlock-close to us to pass on to userspace to clean up the 80 abandoned waiter. So, we have to insert the unlock-close when the 81 lock call is interrupted. */ 82 83 static void do_unlock_close(const struct dlm_plock_info *info) 84 { 85 struct plock_op *op; 86 87 op = kzalloc(sizeof(*op), GFP_NOFS); 88 if (!op) 89 return; 90 91 op->info.optype = DLM_PLOCK_OP_UNLOCK; 92 op->info.pid = info->pid; 93 op->info.fsid = info->fsid; 94 op->info.number = info->number; 95 op->info.start = 0; 96 op->info.end = OFFSET_MAX; 97 op->info.owner = info->owner; 98 99 op->info.flags |= DLM_PLOCK_FL_CLOSE; 100 send_op(op); 101 } 102 103 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104 int cmd, struct file_lock *fl) 105 { 106 struct plock_async_data *op_data; 107 struct dlm_ls *ls; 108 struct plock_op *op; 109 int rv; 110 111 ls = dlm_find_lockspace_local(lockspace); 112 if (!ls) 113 return -EINVAL; 114 115 op = kzalloc(sizeof(*op), GFP_NOFS); 116 if (!op) { 117 rv = -ENOMEM; 118 goto out; 119 } 120 121 op->info.optype = DLM_PLOCK_OP_LOCK; 122 op->info.pid = fl->fl_pid; 123 op->info.ex = (fl->fl_type == F_WRLCK); 124 op->info.wait = IS_SETLKW(cmd); 125 op->info.fsid = ls->ls_global_id; 126 op->info.number = number; 127 op->info.start = fl->fl_start; 128 op->info.end = fl->fl_end; 129 /* async handling */ 130 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 131 op_data = kzalloc(sizeof(*op_data), GFP_NOFS); 132 if (!op_data) { 133 dlm_release_plock_op(op); 134 rv = -ENOMEM; 135 goto out; 136 } 137 138 /* fl_owner is lockd which doesn't distinguish 139 processes on the nfs client */ 140 op->info.owner = (__u64) fl->fl_pid; 141 op_data->callback = fl->fl_lmops->lm_grant; 142 locks_init_lock(&op_data->flc); 143 locks_copy_lock(&op_data->flc, fl); 144 op_data->fl = fl; 145 op_data->file = file; 146 147 op->data = op_data; 148 149 send_op(op); 150 rv = FILE_LOCK_DEFERRED; 151 goto out; 152 } else { 153 op->info.owner = (__u64)(long) fl->fl_owner; 154 } 155 156 send_op(op); 157 158 if (op->info.wait) { 159 rv = wait_event_killable(recv_wq, (op->done != 0)); 160 if (rv == -ERESTARTSYS) { 161 spin_lock(&ops_lock); 162 /* recheck under ops_lock if we got a done != 0, 163 * if so this interrupt case should be ignored 164 */ 165 if (op->done != 0) { 166 spin_unlock(&ops_lock); 167 goto do_lock_wait; 168 } 169 list_del(&op->list); 170 spin_unlock(&ops_lock); 171 172 log_debug(ls, "%s: wait interrupted %x %llx pid %d", 173 __func__, ls->ls_global_id, 174 (unsigned long long)number, op->info.pid); 175 do_unlock_close(&op->info); 176 dlm_release_plock_op(op); 177 goto out; 178 } 179 } else { 180 wait_event(recv_wq, (op->done != 0)); 181 } 182 183 do_lock_wait: 184 185 WARN_ON(!list_empty(&op->list)); 186 187 rv = op->info.rv; 188 189 if (!rv) { 190 if (locks_lock_file_wait(file, fl) < 0) 191 log_error(ls, "dlm_posix_lock: vfs lock error %llx", 192 (unsigned long long)number); 193 } 194 195 dlm_release_plock_op(op); 196 out: 197 dlm_put_lockspace(ls); 198 return rv; 199 } 200 EXPORT_SYMBOL_GPL(dlm_posix_lock); 201 202 /* Returns failure iff a successful lock operation should be canceled */ 203 static int dlm_plock_callback(struct plock_op *op) 204 { 205 struct plock_async_data *op_data = op->data; 206 struct file *file; 207 struct file_lock *fl; 208 struct file_lock *flc; 209 int (*notify)(struct file_lock *fl, int result) = NULL; 210 int rv = 0; 211 212 WARN_ON(!list_empty(&op->list)); 213 214 /* check if the following 2 are still valid or make a copy */ 215 file = op_data->file; 216 flc = &op_data->flc; 217 fl = op_data->fl; 218 notify = op_data->callback; 219 220 if (op->info.rv) { 221 notify(fl, op->info.rv); 222 goto out; 223 } 224 225 /* got fs lock; bookkeep locally as well: */ 226 flc->fl_flags &= ~FL_SLEEP; 227 if (posix_lock_file(file, flc, NULL)) { 228 /* 229 * This can only happen in the case of kmalloc() failure. 230 * The filesystem's own lock is the authoritative lock, 231 * so a failure to get the lock locally is not a disaster. 232 * As long as the fs cannot reliably cancel locks (especially 233 * in a low-memory situation), we're better off ignoring 234 * this failure than trying to recover. 235 */ 236 log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", 237 (unsigned long long)op->info.number, file, fl); 238 } 239 240 rv = notify(fl, 0); 241 if (rv) { 242 /* XXX: We need to cancel the fs lock here: */ 243 log_print("dlm_plock_callback: lock granted after lock request " 244 "failed; dangling lock!\n"); 245 goto out; 246 } 247 248 out: 249 dlm_release_plock_op(op); 250 return rv; 251 } 252 253 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 254 struct file_lock *fl) 255 { 256 struct dlm_ls *ls; 257 struct plock_op *op; 258 int rv; 259 unsigned char fl_flags = fl->fl_flags; 260 261 ls = dlm_find_lockspace_local(lockspace); 262 if (!ls) 263 return -EINVAL; 264 265 op = kzalloc(sizeof(*op), GFP_NOFS); 266 if (!op) { 267 rv = -ENOMEM; 268 goto out; 269 } 270 271 /* cause the vfs unlock to return ENOENT if lock is not found */ 272 fl->fl_flags |= FL_EXISTS; 273 274 rv = locks_lock_file_wait(file, fl); 275 if (rv == -ENOENT) { 276 rv = 0; 277 goto out_free; 278 } 279 if (rv < 0) { 280 log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx", 281 rv, (unsigned long long)number); 282 } 283 284 op->info.optype = DLM_PLOCK_OP_UNLOCK; 285 op->info.pid = fl->fl_pid; 286 op->info.fsid = ls->ls_global_id; 287 op->info.number = number; 288 op->info.start = fl->fl_start; 289 op->info.end = fl->fl_end; 290 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 291 op->info.owner = (__u64) fl->fl_pid; 292 else 293 op->info.owner = (__u64)(long) fl->fl_owner; 294 295 if (fl->fl_flags & FL_CLOSE) { 296 op->info.flags |= DLM_PLOCK_FL_CLOSE; 297 send_op(op); 298 rv = 0; 299 goto out; 300 } 301 302 send_op(op); 303 wait_event(recv_wq, (op->done != 0)); 304 305 WARN_ON(!list_empty(&op->list)); 306 307 rv = op->info.rv; 308 309 if (rv == -ENOENT) 310 rv = 0; 311 312 out_free: 313 dlm_release_plock_op(op); 314 out: 315 dlm_put_lockspace(ls); 316 fl->fl_flags = fl_flags; 317 return rv; 318 } 319 EXPORT_SYMBOL_GPL(dlm_posix_unlock); 320 321 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, 322 struct file_lock *fl) 323 { 324 struct dlm_ls *ls; 325 struct plock_op *op; 326 int rv; 327 328 ls = dlm_find_lockspace_local(lockspace); 329 if (!ls) 330 return -EINVAL; 331 332 op = kzalloc(sizeof(*op), GFP_NOFS); 333 if (!op) { 334 rv = -ENOMEM; 335 goto out; 336 } 337 338 op->info.optype = DLM_PLOCK_OP_GET; 339 op->info.pid = fl->fl_pid; 340 op->info.ex = (fl->fl_type == F_WRLCK); 341 op->info.fsid = ls->ls_global_id; 342 op->info.number = number; 343 op->info.start = fl->fl_start; 344 op->info.end = fl->fl_end; 345 if (fl->fl_lmops && fl->fl_lmops->lm_grant) 346 op->info.owner = (__u64) fl->fl_pid; 347 else 348 op->info.owner = (__u64)(long) fl->fl_owner; 349 350 send_op(op); 351 wait_event(recv_wq, (op->done != 0)); 352 353 WARN_ON(!list_empty(&op->list)); 354 355 /* info.rv from userspace is 1 for conflict, 0 for no-conflict, 356 -ENOENT if there are no locks on the file */ 357 358 rv = op->info.rv; 359 360 fl->fl_type = F_UNLCK; 361 if (rv == -ENOENT) 362 rv = 0; 363 else if (rv > 0) { 364 locks_init_lock(fl); 365 fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; 366 fl->fl_flags = FL_POSIX; 367 fl->fl_pid = op->info.pid; 368 if (op->info.nodeid != dlm_our_nodeid()) 369 fl->fl_pid = -fl->fl_pid; 370 fl->fl_start = op->info.start; 371 fl->fl_end = op->info.end; 372 rv = 0; 373 } 374 375 dlm_release_plock_op(op); 376 out: 377 dlm_put_lockspace(ls); 378 return rv; 379 } 380 EXPORT_SYMBOL_GPL(dlm_posix_get); 381 382 /* a read copies out one plock request from the send list */ 383 static ssize_t dev_read(struct file *file, char __user *u, size_t count, 384 loff_t *ppos) 385 { 386 struct dlm_plock_info info; 387 struct plock_op *op = NULL; 388 389 if (count < sizeof(info)) 390 return -EINVAL; 391 392 spin_lock(&ops_lock); 393 if (!list_empty(&send_list)) { 394 op = list_first_entry(&send_list, struct plock_op, list); 395 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 396 list_del(&op->list); 397 else 398 list_move_tail(&op->list, &recv_list); 399 memcpy(&info, &op->info, sizeof(info)); 400 } 401 spin_unlock(&ops_lock); 402 403 if (!op) 404 return -EAGAIN; 405 406 /* there is no need to get a reply from userspace for unlocks 407 that were generated by the vfs cleaning up for a close 408 (the process did not make an unlock call). */ 409 410 if (op->info.flags & DLM_PLOCK_FL_CLOSE) 411 dlm_release_plock_op(op); 412 413 if (copy_to_user(u, &info, sizeof(info))) 414 return -EFAULT; 415 return sizeof(info); 416 } 417 418 /* a write copies in one plock result that should match a plock_op 419 on the recv list */ 420 static ssize_t dev_write(struct file *file, const char __user *u, size_t count, 421 loff_t *ppos) 422 { 423 struct plock_op *op = NULL, *iter; 424 struct dlm_plock_info info; 425 int do_callback = 0; 426 427 if (count != sizeof(info)) 428 return -EINVAL; 429 430 if (copy_from_user(&info, u, sizeof(info))) 431 return -EFAULT; 432 433 if (check_version(&info)) 434 return -EINVAL; 435 436 /* 437 * The results for waiting ops (SETLKW) can be returned in any 438 * order, so match all fields to find the op. The results for 439 * non-waiting ops are returned in the order that they were sent 440 * to userspace, so match the result with the first non-waiting op. 441 */ 442 spin_lock(&ops_lock); 443 if (info.wait) { 444 list_for_each_entry(iter, &recv_list, list) { 445 if (iter->info.fsid == info.fsid && 446 iter->info.number == info.number && 447 iter->info.owner == info.owner && 448 iter->info.pid == info.pid && 449 iter->info.start == info.start && 450 iter->info.end == info.end && 451 iter->info.ex == info.ex && 452 iter->info.wait) { 453 op = iter; 454 break; 455 } 456 } 457 } else { 458 list_for_each_entry(iter, &recv_list, list) { 459 if (!iter->info.wait) { 460 op = iter; 461 break; 462 } 463 } 464 } 465 466 if (op) { 467 /* Sanity check that op and info match. */ 468 if (info.wait) 469 WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); 470 else 471 WARN_ON(op->info.fsid != info.fsid || 472 op->info.number != info.number || 473 op->info.owner != info.owner || 474 op->info.optype != info.optype); 475 476 list_del_init(&op->list); 477 memcpy(&op->info, &info, sizeof(info)); 478 if (op->data) 479 do_callback = 1; 480 else 481 op->done = 1; 482 } 483 spin_unlock(&ops_lock); 484 485 if (op) { 486 if (do_callback) 487 dlm_plock_callback(op); 488 else 489 wake_up(&recv_wq); 490 } else 491 pr_debug("%s: no op %x %llx", __func__, 492 info.fsid, (unsigned long long)info.number); 493 return count; 494 } 495 496 static __poll_t dev_poll(struct file *file, poll_table *wait) 497 { 498 __poll_t mask = 0; 499 500 poll_wait(file, &send_wq, wait); 501 502 spin_lock(&ops_lock); 503 if (!list_empty(&send_list)) 504 mask = EPOLLIN | EPOLLRDNORM; 505 spin_unlock(&ops_lock); 506 507 return mask; 508 } 509 510 static const struct file_operations dev_fops = { 511 .read = dev_read, 512 .write = dev_write, 513 .poll = dev_poll, 514 .owner = THIS_MODULE, 515 .llseek = noop_llseek, 516 }; 517 518 static struct miscdevice plock_dev_misc = { 519 .minor = MISC_DYNAMIC_MINOR, 520 .name = DLM_PLOCK_MISC_NAME, 521 .fops = &dev_fops 522 }; 523 524 int dlm_plock_init(void) 525 { 526 int rv; 527 528 rv = misc_register(&plock_dev_misc); 529 if (rv) 530 log_print("dlm_plock_init: misc_register failed %d", rv); 531 return rv; 532 } 533 534 void dlm_plock_exit(void) 535 { 536 misc_deregister(&plock_dev_misc); 537 } 538 539