xref: /openbmc/linux/fs/dlm/plock.c (revision 7c53e847)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
4  */
5 
6 #include <linux/fs.h>
7 #include <linux/filelock.h>
8 #include <linux/miscdevice.h>
9 #include <linux/poll.h>
10 #include <linux/dlm.h>
11 #include <linux/dlm_plock.h>
12 #include <linux/slab.h>
13 
14 #include <trace/events/dlm.h>
15 
16 #include "dlm_internal.h"
17 #include "lockspace.h"
18 
19 static DEFINE_SPINLOCK(ops_lock);
20 static LIST_HEAD(send_list);
21 static LIST_HEAD(recv_list);
22 static DECLARE_WAIT_QUEUE_HEAD(send_wq);
23 static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
24 
25 struct plock_async_data {
26 	void *fl;
27 	void *file;
28 	struct file_lock flc;
29 	int (*callback)(struct file_lock *fl, int result);
30 };
31 
32 struct plock_op {
33 	struct list_head list;
34 	int done;
35 	struct dlm_plock_info info;
36 	/* if set indicates async handling */
37 	struct plock_async_data *data;
38 };
39 
set_version(struct dlm_plock_info * info)40 static inline void set_version(struct dlm_plock_info *info)
41 {
42 	info->version[0] = DLM_PLOCK_VERSION_MAJOR;
43 	info->version[1] = DLM_PLOCK_VERSION_MINOR;
44 	info->version[2] = DLM_PLOCK_VERSION_PATCH;
45 }
46 
plock_lookup_waiter(const struct dlm_plock_info * info)47 static struct plock_op *plock_lookup_waiter(const struct dlm_plock_info *info)
48 {
49 	struct plock_op *op = NULL, *iter;
50 
51 	list_for_each_entry(iter, &recv_list, list) {
52 		if (iter->info.fsid == info->fsid &&
53 		    iter->info.number == info->number &&
54 		    iter->info.owner == info->owner &&
55 		    iter->info.pid == info->pid &&
56 		    iter->info.start == info->start &&
57 		    iter->info.end == info->end &&
58 		    iter->info.ex == info->ex &&
59 		    iter->info.wait) {
60 			op = iter;
61 			break;
62 		}
63 	}
64 
65 	return op;
66 }
67 
check_version(struct dlm_plock_info * info)68 static int check_version(struct dlm_plock_info *info)
69 {
70 	if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
71 	    (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
72 		log_print("plock device version mismatch: "
73 			  "kernel (%u.%u.%u), user (%u.%u.%u)",
74 			  DLM_PLOCK_VERSION_MAJOR,
75 			  DLM_PLOCK_VERSION_MINOR,
76 			  DLM_PLOCK_VERSION_PATCH,
77 			  info->version[0],
78 			  info->version[1],
79 			  info->version[2]);
80 		return -EINVAL;
81 	}
82 	return 0;
83 }
84 
dlm_release_plock_op(struct plock_op * op)85 static void dlm_release_plock_op(struct plock_op *op)
86 {
87 	kfree(op->data);
88 	kfree(op);
89 }
90 
send_op(struct plock_op * op)91 static void send_op(struct plock_op *op)
92 {
93 	set_version(&op->info);
94 	spin_lock(&ops_lock);
95 	list_add_tail(&op->list, &send_list);
96 	spin_unlock(&ops_lock);
97 	wake_up(&send_wq);
98 }
99 
do_lock_cancel(const struct dlm_plock_info * orig_info)100 static int do_lock_cancel(const struct dlm_plock_info *orig_info)
101 {
102 	struct plock_op *op;
103 	int rv;
104 
105 	op = kzalloc(sizeof(*op), GFP_NOFS);
106 	if (!op)
107 		return -ENOMEM;
108 
109 	op->info = *orig_info;
110 	op->info.optype = DLM_PLOCK_OP_CANCEL;
111 	op->info.wait = 0;
112 
113 	send_op(op);
114 	wait_event(recv_wq, (op->done != 0));
115 
116 	rv = op->info.rv;
117 
118 	dlm_release_plock_op(op);
119 	return rv;
120 }
121 
dlm_posix_lock(dlm_lockspace_t * lockspace,u64 number,struct file * file,int cmd,struct file_lock * fl)122 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
123 		   int cmd, struct file_lock *fl)
124 {
125 	struct plock_async_data *op_data;
126 	struct dlm_ls *ls;
127 	struct plock_op *op;
128 	int rv;
129 
130 	ls = dlm_find_lockspace_local(lockspace);
131 	if (!ls)
132 		return -EINVAL;
133 
134 	op = kzalloc(sizeof(*op), GFP_NOFS);
135 	if (!op) {
136 		rv = -ENOMEM;
137 		goto out;
138 	}
139 
140 	op->info.optype		= DLM_PLOCK_OP_LOCK;
141 	op->info.pid		= fl->fl_pid;
142 	op->info.ex		= (fl->fl_type == F_WRLCK);
143 	op->info.wait		= IS_SETLKW(cmd);
144 	op->info.fsid		= ls->ls_global_id;
145 	op->info.number		= number;
146 	op->info.start		= fl->fl_start;
147 	op->info.end		= fl->fl_end;
148 	/* async handling */
149 	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
150 		op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
151 		if (!op_data) {
152 			dlm_release_plock_op(op);
153 			rv = -ENOMEM;
154 			goto out;
155 		}
156 
157 		/* fl_owner is lockd which doesn't distinguish
158 		   processes on the nfs client */
159 		op->info.owner	= (__u64) fl->fl_pid;
160 		op_data->callback = fl->fl_lmops->lm_grant;
161 		locks_init_lock(&op_data->flc);
162 		locks_copy_lock(&op_data->flc, fl);
163 		op_data->fl		= fl;
164 		op_data->file	= file;
165 
166 		op->data = op_data;
167 
168 		send_op(op);
169 		rv = FILE_LOCK_DEFERRED;
170 		goto out;
171 	} else {
172 		op->info.owner	= (__u64)(long) fl->fl_owner;
173 	}
174 
175 	send_op(op);
176 
177 	if (op->info.wait) {
178 		rv = wait_event_interruptible(recv_wq, (op->done != 0));
179 		if (rv == -ERESTARTSYS) {
180 			spin_lock(&ops_lock);
181 			/* recheck under ops_lock if we got a done != 0,
182 			 * if so this interrupt case should be ignored
183 			 */
184 			if (op->done != 0) {
185 				spin_unlock(&ops_lock);
186 				goto do_lock_wait;
187 			}
188 			spin_unlock(&ops_lock);
189 
190 			rv = do_lock_cancel(&op->info);
191 			switch (rv) {
192 			case 0:
193 				/* waiter was deleted in user space, answer will never come
194 				 * remove original request. The original request must be
195 				 * on recv_list because the answer of do_lock_cancel()
196 				 * synchronized it.
197 				 */
198 				spin_lock(&ops_lock);
199 				list_del(&op->list);
200 				spin_unlock(&ops_lock);
201 				rv = -EINTR;
202 				break;
203 			case -ENOENT:
204 				/* cancellation wasn't successful but op should be done */
205 				fallthrough;
206 			default:
207 				/* internal error doing cancel we need to wait */
208 				goto wait;
209 			}
210 
211 			log_debug(ls, "%s: wait interrupted %x %llx pid %d",
212 				  __func__, ls->ls_global_id,
213 				  (unsigned long long)number, op->info.pid);
214 			dlm_release_plock_op(op);
215 			goto out;
216 		}
217 	} else {
218 wait:
219 		wait_event(recv_wq, (op->done != 0));
220 	}
221 
222 do_lock_wait:
223 
224 	WARN_ON(!list_empty(&op->list));
225 
226 	rv = op->info.rv;
227 
228 	if (!rv) {
229 		if (locks_lock_file_wait(file, fl) < 0)
230 			log_error(ls, "dlm_posix_lock: vfs lock error %llx",
231 				  (unsigned long long)number);
232 	}
233 
234 	dlm_release_plock_op(op);
235 out:
236 	dlm_put_lockspace(ls);
237 	return rv;
238 }
239 EXPORT_SYMBOL_GPL(dlm_posix_lock);
240 
241 /* Returns failure iff a successful lock operation should be canceled */
dlm_plock_callback(struct plock_op * op)242 static int dlm_plock_callback(struct plock_op *op)
243 {
244 	struct plock_async_data *op_data = op->data;
245 	struct file *file;
246 	struct file_lock *fl;
247 	struct file_lock *flc;
248 	int (*notify)(struct file_lock *fl, int result) = NULL;
249 	int rv = 0;
250 
251 	WARN_ON(!list_empty(&op->list));
252 
253 	/* check if the following 2 are still valid or make a copy */
254 	file = op_data->file;
255 	flc = &op_data->flc;
256 	fl = op_data->fl;
257 	notify = op_data->callback;
258 
259 	if (op->info.rv) {
260 		notify(fl, op->info.rv);
261 		goto out;
262 	}
263 
264 	/* got fs lock; bookkeep locally as well: */
265 	flc->fl_flags &= ~FL_SLEEP;
266 	if (posix_lock_file(file, flc, NULL)) {
267 		/*
268 		 * This can only happen in the case of kmalloc() failure.
269 		 * The filesystem's own lock is the authoritative lock,
270 		 * so a failure to get the lock locally is not a disaster.
271 		 * As long as the fs cannot reliably cancel locks (especially
272 		 * in a low-memory situation), we're better off ignoring
273 		 * this failure than trying to recover.
274 		 */
275 		log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
276 			  (unsigned long long)op->info.number, file, fl);
277 	}
278 
279 	rv = notify(fl, 0);
280 	if (rv) {
281 		/* XXX: We need to cancel the fs lock here: */
282 		log_print("%s: lock granted after lock request failed; dangling lock!",
283 			  __func__);
284 		goto out;
285 	}
286 
287 out:
288 	dlm_release_plock_op(op);
289 	return rv;
290 }
291 
dlm_posix_unlock(dlm_lockspace_t * lockspace,u64 number,struct file * file,struct file_lock * fl)292 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
293 		     struct file_lock *fl)
294 {
295 	struct dlm_ls *ls;
296 	struct plock_op *op;
297 	int rv;
298 	unsigned char fl_flags = fl->fl_flags;
299 
300 	ls = dlm_find_lockspace_local(lockspace);
301 	if (!ls)
302 		return -EINVAL;
303 
304 	op = kzalloc(sizeof(*op), GFP_NOFS);
305 	if (!op) {
306 		rv = -ENOMEM;
307 		goto out;
308 	}
309 
310 	/* cause the vfs unlock to return ENOENT if lock is not found */
311 	fl->fl_flags |= FL_EXISTS;
312 
313 	rv = locks_lock_file_wait(file, fl);
314 	if (rv == -ENOENT) {
315 		rv = 0;
316 		goto out_free;
317 	}
318 	if (rv < 0) {
319 		log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
320 			  rv, (unsigned long long)number);
321 	}
322 
323 	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
324 	op->info.pid		= fl->fl_pid;
325 	op->info.fsid		= ls->ls_global_id;
326 	op->info.number		= number;
327 	op->info.start		= fl->fl_start;
328 	op->info.end		= fl->fl_end;
329 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
330 		op->info.owner	= (__u64) fl->fl_pid;
331 	else
332 		op->info.owner	= (__u64)(long) fl->fl_owner;
333 
334 	if (fl->fl_flags & FL_CLOSE) {
335 		op->info.flags |= DLM_PLOCK_FL_CLOSE;
336 		send_op(op);
337 		rv = 0;
338 		goto out;
339 	}
340 
341 	send_op(op);
342 	wait_event(recv_wq, (op->done != 0));
343 
344 	WARN_ON(!list_empty(&op->list));
345 
346 	rv = op->info.rv;
347 
348 	if (rv == -ENOENT)
349 		rv = 0;
350 
351 out_free:
352 	dlm_release_plock_op(op);
353 out:
354 	dlm_put_lockspace(ls);
355 	fl->fl_flags = fl_flags;
356 	return rv;
357 }
358 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
359 
360 /*
361  * NOTE: This implementation can only handle async lock requests as nfs
362  * do it. It cannot handle cancellation of a pending lock request sitting
363  * in wait_event(), but for now only nfs is the only user local kernel
364  * user.
365  */
dlm_posix_cancel(dlm_lockspace_t * lockspace,u64 number,struct file * file,struct file_lock * fl)366 int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
367 		     struct file_lock *fl)
368 {
369 	struct dlm_plock_info info;
370 	struct plock_op *op;
371 	struct dlm_ls *ls;
372 	int rv;
373 
374 	/* this only works for async request for now and nfs is the only
375 	 * kernel user right now.
376 	 */
377 	if (WARN_ON_ONCE(!fl->fl_lmops || !fl->fl_lmops->lm_grant))
378 		return -EOPNOTSUPP;
379 
380 	ls = dlm_find_lockspace_local(lockspace);
381 	if (!ls)
382 		return -EINVAL;
383 
384 	memset(&info, 0, sizeof(info));
385 	info.pid = fl->fl_pid;
386 	info.ex = (fl->fl_type == F_WRLCK);
387 	info.fsid = ls->ls_global_id;
388 	dlm_put_lockspace(ls);
389 	info.number = number;
390 	info.start = fl->fl_start;
391 	info.end = fl->fl_end;
392 	info.owner = (__u64)fl->fl_pid;
393 
394 	rv = do_lock_cancel(&info);
395 	switch (rv) {
396 	case 0:
397 		spin_lock(&ops_lock);
398 		/* lock request to cancel must be on recv_list because
399 		 * do_lock_cancel() synchronizes it.
400 		 */
401 		op = plock_lookup_waiter(&info);
402 		if (WARN_ON_ONCE(!op)) {
403 			spin_unlock(&ops_lock);
404 			rv = -ENOLCK;
405 			break;
406 		}
407 
408 		list_del(&op->list);
409 		spin_unlock(&ops_lock);
410 		WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
411 		op->data->callback(op->data->fl, -EINTR);
412 		dlm_release_plock_op(op);
413 		rv = -EINTR;
414 		break;
415 	case -ENOENT:
416 		/* if cancel wasn't successful we probably were to late
417 		 * or it was a non-blocking lock request, so just unlock it.
418 		 */
419 		rv = dlm_posix_unlock(lockspace, number, file, fl);
420 		break;
421 	default:
422 		break;
423 	}
424 
425 	return rv;
426 }
427 EXPORT_SYMBOL_GPL(dlm_posix_cancel);
428 
dlm_posix_get(dlm_lockspace_t * lockspace,u64 number,struct file * file,struct file_lock * fl)429 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
430 		  struct file_lock *fl)
431 {
432 	struct dlm_ls *ls;
433 	struct plock_op *op;
434 	int rv;
435 
436 	ls = dlm_find_lockspace_local(lockspace);
437 	if (!ls)
438 		return -EINVAL;
439 
440 	op = kzalloc(sizeof(*op), GFP_NOFS);
441 	if (!op) {
442 		rv = -ENOMEM;
443 		goto out;
444 	}
445 
446 	op->info.optype		= DLM_PLOCK_OP_GET;
447 	op->info.pid		= fl->fl_pid;
448 	op->info.ex		= (fl->fl_type == F_WRLCK);
449 	op->info.fsid		= ls->ls_global_id;
450 	op->info.number		= number;
451 	op->info.start		= fl->fl_start;
452 	op->info.end		= fl->fl_end;
453 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
454 		op->info.owner	= (__u64) fl->fl_pid;
455 	else
456 		op->info.owner	= (__u64)(long) fl->fl_owner;
457 
458 	send_op(op);
459 	wait_event(recv_wq, (op->done != 0));
460 
461 	WARN_ON(!list_empty(&op->list));
462 
463 	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
464 	   -ENOENT if there are no locks on the file */
465 
466 	rv = op->info.rv;
467 
468 	fl->fl_type = F_UNLCK;
469 	if (rv == -ENOENT)
470 		rv = 0;
471 	else if (rv > 0) {
472 		locks_init_lock(fl);
473 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
474 		fl->fl_flags = FL_POSIX;
475 		fl->fl_pid = op->info.pid;
476 		if (op->info.nodeid != dlm_our_nodeid())
477 			fl->fl_pid = -fl->fl_pid;
478 		fl->fl_start = op->info.start;
479 		fl->fl_end = op->info.end;
480 		rv = 0;
481 	}
482 
483 	dlm_release_plock_op(op);
484 out:
485 	dlm_put_lockspace(ls);
486 	return rv;
487 }
488 EXPORT_SYMBOL_GPL(dlm_posix_get);
489 
490 /* a read copies out one plock request from the send list */
dev_read(struct file * file,char __user * u,size_t count,loff_t * ppos)491 static ssize_t dev_read(struct file *file, char __user *u, size_t count,
492 			loff_t *ppos)
493 {
494 	struct dlm_plock_info info;
495 	struct plock_op *op = NULL;
496 
497 	if (count < sizeof(info))
498 		return -EINVAL;
499 
500 	spin_lock(&ops_lock);
501 	if (!list_empty(&send_list)) {
502 		op = list_first_entry(&send_list, struct plock_op, list);
503 		if (op->info.flags & DLM_PLOCK_FL_CLOSE)
504 			list_del(&op->list);
505 		else
506 			list_move_tail(&op->list, &recv_list);
507 		memcpy(&info, &op->info, sizeof(info));
508 	}
509 	spin_unlock(&ops_lock);
510 
511 	if (!op)
512 		return -EAGAIN;
513 
514 	trace_dlm_plock_read(&info);
515 
516 	/* there is no need to get a reply from userspace for unlocks
517 	   that were generated by the vfs cleaning up for a close
518 	   (the process did not make an unlock call). */
519 
520 	if (op->info.flags & DLM_PLOCK_FL_CLOSE)
521 		dlm_release_plock_op(op);
522 
523 	if (copy_to_user(u, &info, sizeof(info)))
524 		return -EFAULT;
525 	return sizeof(info);
526 }
527 
528 /* a write copies in one plock result that should match a plock_op
529    on the recv list */
dev_write(struct file * file,const char __user * u,size_t count,loff_t * ppos)530 static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
531 			 loff_t *ppos)
532 {
533 	struct plock_op *op = NULL, *iter;
534 	struct dlm_plock_info info;
535 	int do_callback = 0;
536 
537 	if (count != sizeof(info))
538 		return -EINVAL;
539 
540 	if (copy_from_user(&info, u, sizeof(info)))
541 		return -EFAULT;
542 
543 	trace_dlm_plock_write(&info);
544 
545 	if (check_version(&info))
546 		return -EINVAL;
547 
548 	/*
549 	 * The results for waiting ops (SETLKW) can be returned in any
550 	 * order, so match all fields to find the op.  The results for
551 	 * non-waiting ops are returned in the order that they were sent
552 	 * to userspace, so match the result with the first non-waiting op.
553 	 */
554 	spin_lock(&ops_lock);
555 	if (info.wait) {
556 		op = plock_lookup_waiter(&info);
557 	} else {
558 		list_for_each_entry(iter, &recv_list, list) {
559 			if (!iter->info.wait &&
560 			    iter->info.fsid == info.fsid) {
561 				op = iter;
562 				break;
563 			}
564 		}
565 	}
566 
567 	if (op) {
568 		/* Sanity check that op and info match. */
569 		if (info.wait)
570 			WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK);
571 		else
572 			WARN_ON(op->info.number != info.number ||
573 				op->info.owner != info.owner ||
574 				op->info.optype != info.optype);
575 
576 		list_del_init(&op->list);
577 		memcpy(&op->info, &info, sizeof(info));
578 		if (op->data)
579 			do_callback = 1;
580 		else
581 			op->done = 1;
582 	}
583 	spin_unlock(&ops_lock);
584 
585 	if (op) {
586 		if (do_callback)
587 			dlm_plock_callback(op);
588 		else
589 			wake_up(&recv_wq);
590 	} else
591 		pr_debug("%s: no op %x %llx", __func__,
592 			 info.fsid, (unsigned long long)info.number);
593 	return count;
594 }
595 
dev_poll(struct file * file,poll_table * wait)596 static __poll_t dev_poll(struct file *file, poll_table *wait)
597 {
598 	__poll_t mask = 0;
599 
600 	poll_wait(file, &send_wq, wait);
601 
602 	spin_lock(&ops_lock);
603 	if (!list_empty(&send_list))
604 		mask = EPOLLIN | EPOLLRDNORM;
605 	spin_unlock(&ops_lock);
606 
607 	return mask;
608 }
609 
610 static const struct file_operations dev_fops = {
611 	.read    = dev_read,
612 	.write   = dev_write,
613 	.poll    = dev_poll,
614 	.owner   = THIS_MODULE,
615 	.llseek  = noop_llseek,
616 };
617 
618 static struct miscdevice plock_dev_misc = {
619 	.minor = MISC_DYNAMIC_MINOR,
620 	.name = DLM_PLOCK_MISC_NAME,
621 	.fops = &dev_fops
622 };
623 
dlm_plock_init(void)624 int dlm_plock_init(void)
625 {
626 	int rv;
627 
628 	rv = misc_register(&plock_dev_misc);
629 	if (rv)
630 		log_print("dlm_plock_init: misc_register failed %d", rv);
631 	return rv;
632 }
633 
dlm_plock_exit(void)634 void dlm_plock_exit(void)
635 {
636 	misc_deregister(&plock_dev_misc);
637 	WARN_ON(!list_empty(&send_list));
638 	WARN_ON(!list_empty(&recv_list));
639 }
640 
641