xref: /openbmc/linux/fs/dlm/plock.c (revision d3741027)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2005-2008 Red Hat, Inc.  All rights reserved.
4  */
5 
6 #include <linux/fs.h>
7 #include <linux/miscdevice.h>
8 #include <linux/poll.h>
9 #include <linux/dlm.h>
10 #include <linux/dlm_plock.h>
11 #include <linux/slab.h>
12 
13 #include "dlm_internal.h"
14 #include "lockspace.h"
15 
16 static DEFINE_SPINLOCK(ops_lock);
17 static LIST_HEAD(send_list);
18 static LIST_HEAD(recv_list);
19 static DECLARE_WAIT_QUEUE_HEAD(send_wq);
20 static DECLARE_WAIT_QUEUE_HEAD(recv_wq);
21 
22 struct plock_async_data {
23 	void *fl;
24 	void *file;
25 	struct file_lock flc;
26 	int (*callback)(struct file_lock *fl, int result);
27 };
28 
29 struct plock_op {
30 	struct list_head list;
31 	int done;
32 	struct dlm_plock_info info;
33 	/* if set indicates async handling */
34 	struct plock_async_data *data;
35 };
36 
37 static inline void set_version(struct dlm_plock_info *info)
38 {
39 	info->version[0] = DLM_PLOCK_VERSION_MAJOR;
40 	info->version[1] = DLM_PLOCK_VERSION_MINOR;
41 	info->version[2] = DLM_PLOCK_VERSION_PATCH;
42 }
43 
44 static int check_version(struct dlm_plock_info *info)
45 {
46 	if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
47 	    (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
48 		log_print("plock device version mismatch: "
49 			  "kernel (%u.%u.%u), user (%u.%u.%u)",
50 			  DLM_PLOCK_VERSION_MAJOR,
51 			  DLM_PLOCK_VERSION_MINOR,
52 			  DLM_PLOCK_VERSION_PATCH,
53 			  info->version[0],
54 			  info->version[1],
55 			  info->version[2]);
56 		return -EINVAL;
57 	}
58 	return 0;
59 }
60 
61 static void dlm_release_plock_op(struct plock_op *op)
62 {
63 	kfree(op->data);
64 	kfree(op);
65 }
66 
67 static void send_op(struct plock_op *op)
68 {
69 	set_version(&op->info);
70 	spin_lock(&ops_lock);
71 	list_add_tail(&op->list, &send_list);
72 	spin_unlock(&ops_lock);
73 	wake_up(&send_wq);
74 }
75 
76 /* If a process was killed while waiting for the only plock on a file,
77    locks_remove_posix will not see any lock on the file so it won't
78    send an unlock-close to us to pass on to userspace to clean up the
79    abandoned waiter.  So, we have to insert the unlock-close when the
80    lock call is interrupted. */
81 
82 static void do_unlock_close(struct dlm_ls *ls, u64 number,
83 			    struct file *file, struct file_lock *fl)
84 {
85 	struct plock_op *op;
86 
87 	op = kzalloc(sizeof(*op), GFP_NOFS);
88 	if (!op)
89 		return;
90 
91 	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
92 	op->info.pid		= fl->fl_pid;
93 	op->info.fsid		= ls->ls_global_id;
94 	op->info.number		= number;
95 	op->info.start		= 0;
96 	op->info.end		= OFFSET_MAX;
97 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
98 		op->info.owner	= (__u64) fl->fl_pid;
99 	else
100 		op->info.owner	= (__u64)(long) fl->fl_owner;
101 
102 	op->info.flags |= DLM_PLOCK_FL_CLOSE;
103 	send_op(op);
104 }
105 
106 int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
107 		   int cmd, struct file_lock *fl)
108 {
109 	struct plock_async_data *op_data;
110 	struct dlm_ls *ls;
111 	struct plock_op *op;
112 	int rv;
113 
114 	ls = dlm_find_lockspace_local(lockspace);
115 	if (!ls)
116 		return -EINVAL;
117 
118 	op = kzalloc(sizeof(*op), GFP_NOFS);
119 	if (!op) {
120 		rv = -ENOMEM;
121 		goto out;
122 	}
123 
124 	op->info.optype		= DLM_PLOCK_OP_LOCK;
125 	op->info.pid		= fl->fl_pid;
126 	op->info.ex		= (fl->fl_type == F_WRLCK);
127 	op->info.wait		= IS_SETLKW(cmd);
128 	op->info.fsid		= ls->ls_global_id;
129 	op->info.number		= number;
130 	op->info.start		= fl->fl_start;
131 	op->info.end		= fl->fl_end;
132 	/* async handling */
133 	if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
134 		op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
135 		if (!op_data) {
136 			dlm_release_plock_op(op);
137 			rv = -ENOMEM;
138 			goto out;
139 		}
140 
141 		/* fl_owner is lockd which doesn't distinguish
142 		   processes on the nfs client */
143 		op->info.owner	= (__u64) fl->fl_pid;
144 		op_data->callback = fl->fl_lmops->lm_grant;
145 		locks_init_lock(&op_data->flc);
146 		locks_copy_lock(&op_data->flc, fl);
147 		op_data->fl		= fl;
148 		op_data->file	= file;
149 
150 		op->data = op_data;
151 
152 		send_op(op);
153 		rv = FILE_LOCK_DEFERRED;
154 		goto out;
155 	} else {
156 		op->info.owner	= (__u64)(long) fl->fl_owner;
157 	}
158 
159 	send_op(op);
160 
161 	rv = wait_event_interruptible(recv_wq, (op->done != 0));
162 	if (rv == -ERESTARTSYS) {
163 		spin_lock(&ops_lock);
164 		list_del(&op->list);
165 		spin_unlock(&ops_lock);
166 		log_print("%s: wait interrupted %x %llx, op removed",
167 			  __func__, ls->ls_global_id,
168 			  (unsigned long long)number);
169 		dlm_release_plock_op(op);
170 		do_unlock_close(ls, number, file, fl);
171 		goto out;
172 	}
173 
174 	WARN_ON(!list_empty(&op->list));
175 
176 	rv = op->info.rv;
177 
178 	if (!rv) {
179 		if (locks_lock_file_wait(file, fl) < 0)
180 			log_error(ls, "dlm_posix_lock: vfs lock error %llx",
181 				  (unsigned long long)number);
182 	}
183 
184 	dlm_release_plock_op(op);
185 out:
186 	dlm_put_lockspace(ls);
187 	return rv;
188 }
189 EXPORT_SYMBOL_GPL(dlm_posix_lock);
190 
191 /* Returns failure iff a successful lock operation should be canceled */
192 static int dlm_plock_callback(struct plock_op *op)
193 {
194 	struct plock_async_data *op_data = op->data;
195 	struct file *file;
196 	struct file_lock *fl;
197 	struct file_lock *flc;
198 	int (*notify)(struct file_lock *fl, int result) = NULL;
199 	int rv = 0;
200 
201 	WARN_ON(!list_empty(&op->list));
202 
203 	/* check if the following 2 are still valid or make a copy */
204 	file = op_data->file;
205 	flc = &op_data->flc;
206 	fl = op_data->fl;
207 	notify = op_data->callback;
208 
209 	if (op->info.rv) {
210 		notify(fl, op->info.rv);
211 		goto out;
212 	}
213 
214 	/* got fs lock; bookkeep locally as well: */
215 	flc->fl_flags &= ~FL_SLEEP;
216 	if (posix_lock_file(file, flc, NULL)) {
217 		/*
218 		 * This can only happen in the case of kmalloc() failure.
219 		 * The filesystem's own lock is the authoritative lock,
220 		 * so a failure to get the lock locally is not a disaster.
221 		 * As long as the fs cannot reliably cancel locks (especially
222 		 * in a low-memory situation), we're better off ignoring
223 		 * this failure than trying to recover.
224 		 */
225 		log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
226 			  (unsigned long long)op->info.number, file, fl);
227 	}
228 
229 	rv = notify(fl, 0);
230 	if (rv) {
231 		/* XXX: We need to cancel the fs lock here: */
232 		log_print("dlm_plock_callback: lock granted after lock request "
233 			  "failed; dangling lock!\n");
234 		goto out;
235 	}
236 
237 out:
238 	dlm_release_plock_op(op);
239 	return rv;
240 }
241 
242 int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
243 		     struct file_lock *fl)
244 {
245 	struct dlm_ls *ls;
246 	struct plock_op *op;
247 	int rv;
248 	unsigned char fl_flags = fl->fl_flags;
249 
250 	ls = dlm_find_lockspace_local(lockspace);
251 	if (!ls)
252 		return -EINVAL;
253 
254 	op = kzalloc(sizeof(*op), GFP_NOFS);
255 	if (!op) {
256 		rv = -ENOMEM;
257 		goto out;
258 	}
259 
260 	/* cause the vfs unlock to return ENOENT if lock is not found */
261 	fl->fl_flags |= FL_EXISTS;
262 
263 	rv = locks_lock_file_wait(file, fl);
264 	if (rv == -ENOENT) {
265 		rv = 0;
266 		goto out_free;
267 	}
268 	if (rv < 0) {
269 		log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
270 			  rv, (unsigned long long)number);
271 	}
272 
273 	op->info.optype		= DLM_PLOCK_OP_UNLOCK;
274 	op->info.pid		= fl->fl_pid;
275 	op->info.fsid		= ls->ls_global_id;
276 	op->info.number		= number;
277 	op->info.start		= fl->fl_start;
278 	op->info.end		= fl->fl_end;
279 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
280 		op->info.owner	= (__u64) fl->fl_pid;
281 	else
282 		op->info.owner	= (__u64)(long) fl->fl_owner;
283 
284 	if (fl->fl_flags & FL_CLOSE) {
285 		op->info.flags |= DLM_PLOCK_FL_CLOSE;
286 		send_op(op);
287 		rv = 0;
288 		goto out;
289 	}
290 
291 	send_op(op);
292 	wait_event(recv_wq, (op->done != 0));
293 
294 	WARN_ON(!list_empty(&op->list));
295 
296 	rv = op->info.rv;
297 
298 	if (rv == -ENOENT)
299 		rv = 0;
300 
301 out_free:
302 	dlm_release_plock_op(op);
303 out:
304 	dlm_put_lockspace(ls);
305 	fl->fl_flags = fl_flags;
306 	return rv;
307 }
308 EXPORT_SYMBOL_GPL(dlm_posix_unlock);
309 
310 int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
311 		  struct file_lock *fl)
312 {
313 	struct dlm_ls *ls;
314 	struct plock_op *op;
315 	int rv;
316 
317 	ls = dlm_find_lockspace_local(lockspace);
318 	if (!ls)
319 		return -EINVAL;
320 
321 	op = kzalloc(sizeof(*op), GFP_NOFS);
322 	if (!op) {
323 		rv = -ENOMEM;
324 		goto out;
325 	}
326 
327 	op->info.optype		= DLM_PLOCK_OP_GET;
328 	op->info.pid		= fl->fl_pid;
329 	op->info.ex		= (fl->fl_type == F_WRLCK);
330 	op->info.fsid		= ls->ls_global_id;
331 	op->info.number		= number;
332 	op->info.start		= fl->fl_start;
333 	op->info.end		= fl->fl_end;
334 	if (fl->fl_lmops && fl->fl_lmops->lm_grant)
335 		op->info.owner	= (__u64) fl->fl_pid;
336 	else
337 		op->info.owner	= (__u64)(long) fl->fl_owner;
338 
339 	send_op(op);
340 	wait_event(recv_wq, (op->done != 0));
341 
342 	WARN_ON(!list_empty(&op->list));
343 
344 	/* info.rv from userspace is 1 for conflict, 0 for no-conflict,
345 	   -ENOENT if there are no locks on the file */
346 
347 	rv = op->info.rv;
348 
349 	fl->fl_type = F_UNLCK;
350 	if (rv == -ENOENT)
351 		rv = 0;
352 	else if (rv > 0) {
353 		locks_init_lock(fl);
354 		fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
355 		fl->fl_flags = FL_POSIX;
356 		fl->fl_pid = -op->info.pid;
357 		fl->fl_start = op->info.start;
358 		fl->fl_end = op->info.end;
359 		rv = 0;
360 	}
361 
362 	dlm_release_plock_op(op);
363 out:
364 	dlm_put_lockspace(ls);
365 	return rv;
366 }
367 EXPORT_SYMBOL_GPL(dlm_posix_get);
368 
369 /* a read copies out one plock request from the send list */
370 static ssize_t dev_read(struct file *file, char __user *u, size_t count,
371 			loff_t *ppos)
372 {
373 	struct dlm_plock_info info;
374 	struct plock_op *op = NULL;
375 
376 	if (count < sizeof(info))
377 		return -EINVAL;
378 
379 	spin_lock(&ops_lock);
380 	if (!list_empty(&send_list)) {
381 		op = list_entry(send_list.next, struct plock_op, list);
382 		if (op->info.flags & DLM_PLOCK_FL_CLOSE)
383 			list_del(&op->list);
384 		else
385 			list_move(&op->list, &recv_list);
386 		memcpy(&info, &op->info, sizeof(info));
387 	}
388 	spin_unlock(&ops_lock);
389 
390 	if (!op)
391 		return -EAGAIN;
392 
393 	/* there is no need to get a reply from userspace for unlocks
394 	   that were generated by the vfs cleaning up for a close
395 	   (the process did not make an unlock call). */
396 
397 	if (op->info.flags & DLM_PLOCK_FL_CLOSE)
398 		dlm_release_plock_op(op);
399 
400 	if (copy_to_user(u, &info, sizeof(info)))
401 		return -EFAULT;
402 	return sizeof(info);
403 }
404 
405 /* a write copies in one plock result that should match a plock_op
406    on the recv list */
407 static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
408 			 loff_t *ppos)
409 {
410 	struct plock_op *op = NULL, *iter;
411 	struct dlm_plock_info info;
412 	int do_callback = 0;
413 
414 	if (count != sizeof(info))
415 		return -EINVAL;
416 
417 	if (copy_from_user(&info, u, sizeof(info)))
418 		return -EFAULT;
419 
420 	if (check_version(&info))
421 		return -EINVAL;
422 
423 	spin_lock(&ops_lock);
424 	list_for_each_entry(iter, &recv_list, list) {
425 		if (iter->info.fsid == info.fsid &&
426 		    iter->info.number == info.number &&
427 		    iter->info.owner == info.owner) {
428 			list_del_init(&iter->list);
429 			memcpy(&iter->info, &info, sizeof(info));
430 			if (iter->data)
431 				do_callback = 1;
432 			else
433 				iter->done = 1;
434 			op = iter;
435 			break;
436 		}
437 	}
438 	spin_unlock(&ops_lock);
439 
440 	if (op) {
441 		if (do_callback)
442 			dlm_plock_callback(op);
443 		else
444 			wake_up(&recv_wq);
445 	} else
446 		log_print("%s: no op %x %llx - may got interrupted?", __func__,
447 			  info.fsid, (unsigned long long)info.number);
448 	return count;
449 }
450 
451 static __poll_t dev_poll(struct file *file, poll_table *wait)
452 {
453 	__poll_t mask = 0;
454 
455 	poll_wait(file, &send_wq, wait);
456 
457 	spin_lock(&ops_lock);
458 	if (!list_empty(&send_list))
459 		mask = EPOLLIN | EPOLLRDNORM;
460 	spin_unlock(&ops_lock);
461 
462 	return mask;
463 }
464 
465 static const struct file_operations dev_fops = {
466 	.read    = dev_read,
467 	.write   = dev_write,
468 	.poll    = dev_poll,
469 	.owner   = THIS_MODULE,
470 	.llseek  = noop_llseek,
471 };
472 
473 static struct miscdevice plock_dev_misc = {
474 	.minor = MISC_DYNAMIC_MINOR,
475 	.name = DLM_PLOCK_MISC_NAME,
476 	.fops = &dev_fops
477 };
478 
479 int dlm_plock_init(void)
480 {
481 	int rv;
482 
483 	rv = misc_register(&plock_dev_misc);
484 	if (rv)
485 		log_print("dlm_plock_init: misc_register failed %d", rv);
486 	return rv;
487 }
488 
489 void dlm_plock_exit(void)
490 {
491 	misc_deregister(&plock_dev_misc);
492 }
493 
494