xref: /openbmc/linux/fs/locks.c (revision 5f2fb52fac15a8a8e10ce020dd532504a8abfc4e)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/locks.c
4   *
5   *  Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
6   *  Doug Evans (dje@spiff.uucp), August 07, 1992
7   *
8   *  Deadlock detection added.
9   *  FIXME: one thing isn't handled yet:
10   *	- mandatory locks (requires lots of changes elsewhere)
11   *  Kelly Carmichael (kelly@[142.24.8.65]), September 17, 1994.
12   *
13   *  Miscellaneous edits, and a total rewrite of posix_lock_file() code.
14   *  Kai Petzke (wpp@marie.physik.tu-berlin.de), 1994
15   *
16   *  Converted file_lock_table to a linked list from an array, which eliminates
17   *  the limits on how many active file locks are open.
18   *  Chad Page (pageone@netcom.com), November 27, 1994
19   *
20   *  Removed dependency on file descriptors. dup()'ed file descriptors now
21   *  get the same locks as the original file descriptors, and a close() on
22   *  any file descriptor removes ALL the locks on the file for the current
23   *  process. Since locks still depend on the process id, locks are inherited
24   *  after an exec() but not after a fork(). This agrees with POSIX, and both
25   *  BSD and SVR4 practice.
26   *  Andy Walker (andy@lysaker.kvaerner.no), February 14, 1995
27   *
28   *  Scrapped free list which is redundant now that we allocate locks
29   *  dynamically with kmalloc()/kfree().
30   *  Andy Walker (andy@lysaker.kvaerner.no), February 21, 1995
31   *
32   *  Implemented two lock personalities - FL_FLOCK and FL_POSIX.
33   *
34   *  FL_POSIX locks are created with calls to fcntl() and lockf() through the
35   *  fcntl() system call. They have the semantics described above.
36   *
37   *  FL_FLOCK locks are created with calls to flock(), through the flock()
38   *  system call, which is new. Old C libraries implement flock() via fcntl()
39   *  and will continue to use the old, broken implementation.
40   *
41   *  FL_FLOCK locks follow the 4.4 BSD flock() semantics. They are associated
42   *  with a file pointer (filp). As a result they can be shared by a parent
43   *  process and its children after a fork(). They are removed when the last
44   *  file descriptor referring to the file pointer is closed (unless explicitly
45   *  unlocked).
46   *
47   *  FL_FLOCK locks never deadlock, an existing lock is always removed before
48   *  upgrading from shared to exclusive (or vice versa). When this happens
49   *  any processes blocked by the current lock are woken up and allowed to
50   *  run before the new lock is applied.
51   *  Andy Walker (andy@lysaker.kvaerner.no), June 09, 1995
52   *
53   *  Removed some race conditions in flock_lock_file(), marked other possible
54   *  races. Just grep for FIXME to see them.
55   *  Dmitry Gorodchanin (pgmdsg@ibi.com), February 09, 1996.
56   *
57   *  Addressed Dmitry's concerns. Deadlock checking no longer recursive.
58   *  Lock allocation changed to GFP_ATOMIC as we can't afford to sleep
59   *  once we've checked for blocking and deadlocking.
60   *  Andy Walker (andy@lysaker.kvaerner.no), April 03, 1996.
61   *
62   *  Initial implementation of mandatory locks. SunOS turned out to be
63   *  a rotten model, so I implemented the "obvious" semantics.
64   *  See 'Documentation/filesystems/mandatory-locking.txt' for details.
65   *  Andy Walker (andy@lysaker.kvaerner.no), April 06, 1996.
66   *
67   *  Don't allow mandatory locks on mmap()'ed files. Added simple functions to
68   *  check if a file has mandatory locks, used by mmap(), open() and creat() to
69   *  see if system call should be rejected. Ref. HP-UX/SunOS/Solaris Reference
70   *  Manual, Section 2.
71   *  Andy Walker (andy@lysaker.kvaerner.no), April 09, 1996.
72   *
73   *  Tidied up block list handling. Added '/proc/locks' interface.
74   *  Andy Walker (andy@lysaker.kvaerner.no), April 24, 1996.
75   *
76   *  Fixed deadlock condition for pathological code that mixes calls to
77   *  flock() and fcntl().
78   *  Andy Walker (andy@lysaker.kvaerner.no), April 29, 1996.
79   *
80   *  Allow only one type of locking scheme (FL_POSIX or FL_FLOCK) to be in use
81   *  for a given file at a time. Changed the CONFIG_LOCK_MANDATORY scheme to
82   *  guarantee sensible behaviour in the case where file system modules might
83   *  be compiled with different options than the kernel itself.
84   *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
85   *
86   *  Added a couple of missing wake_up() calls. Thanks to Thomas Meckel
87   *  (Thomas.Meckel@mni.fh-giessen.de) for spotting this.
88   *  Andy Walker (andy@lysaker.kvaerner.no), May 15, 1996.
89   *
90   *  Changed FL_POSIX locks to use the block list in the same way as FL_FLOCK
91   *  locks. Changed process synchronisation to avoid dereferencing locks that
92   *  have already been freed.
93   *  Andy Walker (andy@lysaker.kvaerner.no), Sep 21, 1996.
94   *
95   *  Made the block list a circular list to minimise searching in the list.
96   *  Andy Walker (andy@lysaker.kvaerner.no), Sep 25, 1996.
97   *
98   *  Made mandatory locking a mount option. Default is not to allow mandatory
99   *  locking.
100   *  Andy Walker (andy@lysaker.kvaerner.no), Oct 04, 1996.
101   *
102   *  Some adaptations for NFS support.
103   *  Olaf Kirch (okir@monad.swb.de), Dec 1996,
104   *
105   *  Fixed /proc/locks interface so that we can't overrun the buffer we are handed.
106   *  Andy Walker (andy@lysaker.kvaerner.no), May 12, 1997.
107   *
108   *  Use slab allocator instead of kmalloc/kfree.
109   *  Use generic list implementation from <linux/list.h>.
110   *  Sped up posix_locks_deadlock by only considering blocked locks.
111   *  Matthew Wilcox <willy@debian.org>, March, 2000.
112   *
113   *  Leases and LOCK_MAND
114   *  Matthew Wilcox <willy@debian.org>, June, 2000.
115   *  Stephen Rothwell <sfr@canb.auug.org.au>, June, 2000.
116   *
117   * Locking conflicts and dependencies:
118   * If multiple threads attempt to lock the same byte (or flock the same file)
119   * only one can be granted the lock, and other must wait their turn.
120   * The first lock has been "applied" or "granted", the others are "waiting"
121   * and are "blocked" by the "applied" lock..
122   *
123   * Waiting and applied locks are all kept in trees whose properties are:
124   *
125   *	- the root of a tree may be an applied or waiting lock.
126   *	- every other node in the tree is a waiting lock that
127   *	  conflicts with every ancestor of that node.
128   *
129   * Every such tree begins life as a waiting singleton which obviously
130   * satisfies the above properties.
131   *
132   * The only ways we modify trees preserve these properties:
133   *
134   *	1. We may add a new leaf node, but only after first verifying that it
135   *	   conflicts with all of its ancestors.
136   *	2. We may remove the root of a tree, creating a new singleton
137   *	   tree from the root and N new trees rooted in the immediate
138   *	   children.
139   *	3. If the root of a tree is not currently an applied lock, we may
140   *	   apply it (if possible).
141   *	4. We may upgrade the root of the tree (either extend its range,
142   *	   or upgrade its entire range from read to write).
143   *
144   * When an applied lock is modified in a way that reduces or downgrades any
145   * part of its range, we remove all its children (2 above).  This particularly
146   * happens when a lock is unlocked.
147   *
148   * For each of those child trees we "wake up" the thread which is
149   * waiting for the lock so it can continue handling as follows: if the
150   * root of the tree applies, we do so (3).  If it doesn't, it must
151   * conflict with some applied lock.  We remove (wake up) all of its children
152   * (2), and add it is a new leaf to the tree rooted in the applied
153   * lock (1).  We then repeat the process recursively with those
154   * children.
155   *
156   */
157  
158  #include <linux/capability.h>
159  #include <linux/file.h>
160  #include <linux/fdtable.h>
161  #include <linux/fs.h>
162  #include <linux/init.h>
163  #include <linux/security.h>
164  #include <linux/slab.h>
165  #include <linux/syscalls.h>
166  #include <linux/time.h>
167  #include <linux/rcupdate.h>
168  #include <linux/pid_namespace.h>
169  #include <linux/hashtable.h>
170  #include <linux/percpu.h>
171  
172  #define CREATE_TRACE_POINTS
173  #include <trace/events/filelock.h>
174  
175  #include <linux/uaccess.h>
176  
177  #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)
178  #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK)
179  #define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
180  #define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)
181  #define IS_REMOTELCK(fl)	(fl->fl_pid <= 0)
182  
183  static bool lease_breaking(struct file_lock *fl)
184  {
185  	return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
186  }
187  
188  static int target_leasetype(struct file_lock *fl)
189  {
190  	if (fl->fl_flags & FL_UNLOCK_PENDING)
191  		return F_UNLCK;
192  	if (fl->fl_flags & FL_DOWNGRADE_PENDING)
193  		return F_RDLCK;
194  	return fl->fl_type;
195  }
196  
197  int leases_enable = 1;
198  int lease_break_time = 45;
199  
200  /*
201   * The global file_lock_list is only used for displaying /proc/locks, so we
202   * keep a list on each CPU, with each list protected by its own spinlock.
203   * Global serialization is done using file_rwsem.
204   *
205   * Note that alterations to the list also require that the relevant flc_lock is
206   * held.
207   */
208  struct file_lock_list_struct {
209  	spinlock_t		lock;
210  	struct hlist_head	hlist;
211  };
212  static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
213  DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
214  
215  
216  /*
217   * The blocked_hash is used to find POSIX lock loops for deadlock detection.
218   * It is protected by blocked_lock_lock.
219   *
220   * We hash locks by lockowner in order to optimize searching for the lock a
221   * particular lockowner is waiting on.
222   *
223   * FIXME: make this value scale via some heuristic? We generally will want more
224   * buckets when we have more lockowners holding locks, but that's a little
225   * difficult to determine without knowing what the workload will look like.
226   */
227  #define BLOCKED_HASH_BITS	7
228  static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
229  
230  /*
231   * This lock protects the blocked_hash. Generally, if you're accessing it, you
232   * want to be holding this lock.
233   *
234   * In addition, it also protects the fl->fl_blocked_requests list, and the
235   * fl->fl_blocker pointer for file_lock structures that are acting as lock
236   * requests (in contrast to those that are acting as records of acquired locks).
237   *
238   * Note that when we acquire this lock in order to change the above fields,
239   * we often hold the flc_lock as well. In certain cases, when reading the fields
240   * protected by this lock, we can skip acquiring it iff we already hold the
241   * flc_lock.
242   */
243  static DEFINE_SPINLOCK(blocked_lock_lock);
244  
245  static struct kmem_cache *flctx_cache __read_mostly;
246  static struct kmem_cache *filelock_cache __read_mostly;
247  
248  static struct file_lock_context *
249  locks_get_lock_context(struct inode *inode, int type)
250  {
251  	struct file_lock_context *ctx;
252  
253  	/* paired with cmpxchg() below */
254  	ctx = smp_load_acquire(&inode->i_flctx);
255  	if (likely(ctx) || type == F_UNLCK)
256  		goto out;
257  
258  	ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
259  	if (!ctx)
260  		goto out;
261  
262  	spin_lock_init(&ctx->flc_lock);
263  	INIT_LIST_HEAD(&ctx->flc_flock);
264  	INIT_LIST_HEAD(&ctx->flc_posix);
265  	INIT_LIST_HEAD(&ctx->flc_lease);
266  
267  	/*
268  	 * Assign the pointer if it's not already assigned. If it is, then
269  	 * free the context we just allocated.
270  	 */
271  	if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
272  		kmem_cache_free(flctx_cache, ctx);
273  		ctx = smp_load_acquire(&inode->i_flctx);
274  	}
275  out:
276  	trace_locks_get_lock_context(inode, type, ctx);
277  	return ctx;
278  }
279  
280  static void
281  locks_dump_ctx_list(struct list_head *list, char *list_type)
282  {
283  	struct file_lock *fl;
284  
285  	list_for_each_entry(fl, list, fl_list) {
286  		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
287  	}
288  }
289  
290  static void
291  locks_check_ctx_lists(struct inode *inode)
292  {
293  	struct file_lock_context *ctx = inode->i_flctx;
294  
295  	if (unlikely(!list_empty(&ctx->flc_flock) ||
296  		     !list_empty(&ctx->flc_posix) ||
297  		     !list_empty(&ctx->flc_lease))) {
298  		pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
299  			MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
300  			inode->i_ino);
301  		locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
302  		locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
303  		locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
304  	}
305  }
306  
307  static void
308  locks_check_ctx_file_list(struct file *filp, struct list_head *list,
309  				char *list_type)
310  {
311  	struct file_lock *fl;
312  	struct inode *inode = locks_inode(filp);
313  
314  	list_for_each_entry(fl, list, fl_list)
315  		if (fl->fl_file == filp)
316  			pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
317  				" fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
318  				list_type, MAJOR(inode->i_sb->s_dev),
319  				MINOR(inode->i_sb->s_dev), inode->i_ino,
320  				fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
321  }
322  
323  void
324  locks_free_lock_context(struct inode *inode)
325  {
326  	struct file_lock_context *ctx = inode->i_flctx;
327  
328  	if (unlikely(ctx)) {
329  		locks_check_ctx_lists(inode);
330  		kmem_cache_free(flctx_cache, ctx);
331  	}
332  }
333  
334  static void locks_init_lock_heads(struct file_lock *fl)
335  {
336  	INIT_HLIST_NODE(&fl->fl_link);
337  	INIT_LIST_HEAD(&fl->fl_list);
338  	INIT_LIST_HEAD(&fl->fl_blocked_requests);
339  	INIT_LIST_HEAD(&fl->fl_blocked_member);
340  	init_waitqueue_head(&fl->fl_wait);
341  }
342  
343  /* Allocate an empty lock structure. */
344  struct file_lock *locks_alloc_lock(void)
345  {
346  	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
347  
348  	if (fl)
349  		locks_init_lock_heads(fl);
350  
351  	return fl;
352  }
353  EXPORT_SYMBOL_GPL(locks_alloc_lock);
354  
355  void locks_release_private(struct file_lock *fl)
356  {
357  	BUG_ON(waitqueue_active(&fl->fl_wait));
358  	BUG_ON(!list_empty(&fl->fl_list));
359  	BUG_ON(!list_empty(&fl->fl_blocked_requests));
360  	BUG_ON(!list_empty(&fl->fl_blocked_member));
361  	BUG_ON(!hlist_unhashed(&fl->fl_link));
362  
363  	if (fl->fl_ops) {
364  		if (fl->fl_ops->fl_release_private)
365  			fl->fl_ops->fl_release_private(fl);
366  		fl->fl_ops = NULL;
367  	}
368  
369  	if (fl->fl_lmops) {
370  		if (fl->fl_lmops->lm_put_owner) {
371  			fl->fl_lmops->lm_put_owner(fl->fl_owner);
372  			fl->fl_owner = NULL;
373  		}
374  		fl->fl_lmops = NULL;
375  	}
376  }
377  EXPORT_SYMBOL_GPL(locks_release_private);
378  
379  /* Free a lock which is not in use. */
380  void locks_free_lock(struct file_lock *fl)
381  {
382  	locks_release_private(fl);
383  	kmem_cache_free(filelock_cache, fl);
384  }
385  EXPORT_SYMBOL(locks_free_lock);
386  
387  static void
388  locks_dispose_list(struct list_head *dispose)
389  {
390  	struct file_lock *fl;
391  
392  	while (!list_empty(dispose)) {
393  		fl = list_first_entry(dispose, struct file_lock, fl_list);
394  		list_del_init(&fl->fl_list);
395  		locks_free_lock(fl);
396  	}
397  }
398  
399  void locks_init_lock(struct file_lock *fl)
400  {
401  	memset(fl, 0, sizeof(struct file_lock));
402  	locks_init_lock_heads(fl);
403  }
404  EXPORT_SYMBOL(locks_init_lock);
405  
406  /*
407   * Initialize a new lock from an existing file_lock structure.
408   */
409  void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
410  {
411  	new->fl_owner = fl->fl_owner;
412  	new->fl_pid = fl->fl_pid;
413  	new->fl_file = NULL;
414  	new->fl_flags = fl->fl_flags;
415  	new->fl_type = fl->fl_type;
416  	new->fl_start = fl->fl_start;
417  	new->fl_end = fl->fl_end;
418  	new->fl_lmops = fl->fl_lmops;
419  	new->fl_ops = NULL;
420  
421  	if (fl->fl_lmops) {
422  		if (fl->fl_lmops->lm_get_owner)
423  			fl->fl_lmops->lm_get_owner(fl->fl_owner);
424  	}
425  }
426  EXPORT_SYMBOL(locks_copy_conflock);
427  
428  void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
429  {
430  	/* "new" must be a freshly-initialized lock */
431  	WARN_ON_ONCE(new->fl_ops);
432  
433  	locks_copy_conflock(new, fl);
434  
435  	new->fl_file = fl->fl_file;
436  	new->fl_ops = fl->fl_ops;
437  
438  	if (fl->fl_ops) {
439  		if (fl->fl_ops->fl_copy_lock)
440  			fl->fl_ops->fl_copy_lock(new, fl);
441  	}
442  }
443  EXPORT_SYMBOL(locks_copy_lock);
444  
445  static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
446  {
447  	struct file_lock *f;
448  
449  	/*
450  	 * As ctx->flc_lock is held, new requests cannot be added to
451  	 * ->fl_blocked_requests, so we don't need a lock to check if it
452  	 * is empty.
453  	 */
454  	if (list_empty(&fl->fl_blocked_requests))
455  		return;
456  	spin_lock(&blocked_lock_lock);
457  	list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
458  	list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
459  		f->fl_blocker = new;
460  	spin_unlock(&blocked_lock_lock);
461  }
462  
463  static inline int flock_translate_cmd(int cmd) {
464  	if (cmd & LOCK_MAND)
465  		return cmd & (LOCK_MAND | LOCK_RW);
466  	switch (cmd) {
467  	case LOCK_SH:
468  		return F_RDLCK;
469  	case LOCK_EX:
470  		return F_WRLCK;
471  	case LOCK_UN:
472  		return F_UNLCK;
473  	}
474  	return -EINVAL;
475  }
476  
477  /* Fill in a file_lock structure with an appropriate FLOCK lock. */
478  static struct file_lock *
479  flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
480  {
481  	int type = flock_translate_cmd(cmd);
482  
483  	if (type < 0)
484  		return ERR_PTR(type);
485  
486  	if (fl == NULL) {
487  		fl = locks_alloc_lock();
488  		if (fl == NULL)
489  			return ERR_PTR(-ENOMEM);
490  	} else {
491  		locks_init_lock(fl);
492  	}
493  
494  	fl->fl_file = filp;
495  	fl->fl_owner = filp;
496  	fl->fl_pid = current->tgid;
497  	fl->fl_flags = FL_FLOCK;
498  	fl->fl_type = type;
499  	fl->fl_end = OFFSET_MAX;
500  
501  	return fl;
502  }
503  
504  static int assign_type(struct file_lock *fl, long type)
505  {
506  	switch (type) {
507  	case F_RDLCK:
508  	case F_WRLCK:
509  	case F_UNLCK:
510  		fl->fl_type = type;
511  		break;
512  	default:
513  		return -EINVAL;
514  	}
515  	return 0;
516  }
517  
518  static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
519  				 struct flock64 *l)
520  {
521  	switch (l->l_whence) {
522  	case SEEK_SET:
523  		fl->fl_start = 0;
524  		break;
525  	case SEEK_CUR:
526  		fl->fl_start = filp->f_pos;
527  		break;
528  	case SEEK_END:
529  		fl->fl_start = i_size_read(file_inode(filp));
530  		break;
531  	default:
532  		return -EINVAL;
533  	}
534  	if (l->l_start > OFFSET_MAX - fl->fl_start)
535  		return -EOVERFLOW;
536  	fl->fl_start += l->l_start;
537  	if (fl->fl_start < 0)
538  		return -EINVAL;
539  
540  	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
541  	   POSIX-2001 defines it. */
542  	if (l->l_len > 0) {
543  		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
544  			return -EOVERFLOW;
545  		fl->fl_end = fl->fl_start + l->l_len - 1;
546  
547  	} else if (l->l_len < 0) {
548  		if (fl->fl_start + l->l_len < 0)
549  			return -EINVAL;
550  		fl->fl_end = fl->fl_start - 1;
551  		fl->fl_start += l->l_len;
552  	} else
553  		fl->fl_end = OFFSET_MAX;
554  
555  	fl->fl_owner = current->files;
556  	fl->fl_pid = current->tgid;
557  	fl->fl_file = filp;
558  	fl->fl_flags = FL_POSIX;
559  	fl->fl_ops = NULL;
560  	fl->fl_lmops = NULL;
561  
562  	return assign_type(fl, l->l_type);
563  }
564  
565  /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
566   * style lock.
567   */
568  static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
569  			       struct flock *l)
570  {
571  	struct flock64 ll = {
572  		.l_type = l->l_type,
573  		.l_whence = l->l_whence,
574  		.l_start = l->l_start,
575  		.l_len = l->l_len,
576  	};
577  
578  	return flock64_to_posix_lock(filp, fl, &ll);
579  }
580  
581  /* default lease lock manager operations */
582  static bool
583  lease_break_callback(struct file_lock *fl)
584  {
585  	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
586  	return false;
587  }
588  
589  static void
590  lease_setup(struct file_lock *fl, void **priv)
591  {
592  	struct file *filp = fl->fl_file;
593  	struct fasync_struct *fa = *priv;
594  
595  	/*
596  	 * fasync_insert_entry() returns the old entry if any. If there was no
597  	 * old entry, then it used "priv" and inserted it into the fasync list.
598  	 * Clear the pointer to indicate that it shouldn't be freed.
599  	 */
600  	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
601  		*priv = NULL;
602  
603  	__f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
604  }
605  
606  static const struct lock_manager_operations lease_manager_ops = {
607  	.lm_break = lease_break_callback,
608  	.lm_change = lease_modify,
609  	.lm_setup = lease_setup,
610  };
611  
612  /*
613   * Initialize a lease, use the default lock manager operations
614   */
615  static int lease_init(struct file *filp, long type, struct file_lock *fl)
616  {
617  	if (assign_type(fl, type) != 0)
618  		return -EINVAL;
619  
620  	fl->fl_owner = filp;
621  	fl->fl_pid = current->tgid;
622  
623  	fl->fl_file = filp;
624  	fl->fl_flags = FL_LEASE;
625  	fl->fl_start = 0;
626  	fl->fl_end = OFFSET_MAX;
627  	fl->fl_ops = NULL;
628  	fl->fl_lmops = &lease_manager_ops;
629  	return 0;
630  }
631  
632  /* Allocate a file_lock initialised to this type of lease */
633  static struct file_lock *lease_alloc(struct file *filp, long type)
634  {
635  	struct file_lock *fl = locks_alloc_lock();
636  	int error = -ENOMEM;
637  
638  	if (fl == NULL)
639  		return ERR_PTR(error);
640  
641  	error = lease_init(filp, type, fl);
642  	if (error) {
643  		locks_free_lock(fl);
644  		return ERR_PTR(error);
645  	}
646  	return fl;
647  }
648  
649  /* Check if two locks overlap each other.
650   */
651  static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
652  {
653  	return ((fl1->fl_end >= fl2->fl_start) &&
654  		(fl2->fl_end >= fl1->fl_start));
655  }
656  
657  /*
658   * Check whether two locks have the same owner.
659   */
660  static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
661  {
662  	return fl1->fl_owner == fl2->fl_owner;
663  }
664  
665  /* Must be called with the flc_lock held! */
666  static void locks_insert_global_locks(struct file_lock *fl)
667  {
668  	struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
669  
670  	percpu_rwsem_assert_held(&file_rwsem);
671  
672  	spin_lock(&fll->lock);
673  	fl->fl_link_cpu = smp_processor_id();
674  	hlist_add_head(&fl->fl_link, &fll->hlist);
675  	spin_unlock(&fll->lock);
676  }
677  
678  /* Must be called with the flc_lock held! */
679  static void locks_delete_global_locks(struct file_lock *fl)
680  {
681  	struct file_lock_list_struct *fll;
682  
683  	percpu_rwsem_assert_held(&file_rwsem);
684  
685  	/*
686  	 * Avoid taking lock if already unhashed. This is safe since this check
687  	 * is done while holding the flc_lock, and new insertions into the list
688  	 * also require that it be held.
689  	 */
690  	if (hlist_unhashed(&fl->fl_link))
691  		return;
692  
693  	fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
694  	spin_lock(&fll->lock);
695  	hlist_del_init(&fl->fl_link);
696  	spin_unlock(&fll->lock);
697  }
698  
699  static unsigned long
700  posix_owner_key(struct file_lock *fl)
701  {
702  	return (unsigned long)fl->fl_owner;
703  }
704  
705  static void locks_insert_global_blocked(struct file_lock *waiter)
706  {
707  	lockdep_assert_held(&blocked_lock_lock);
708  
709  	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
710  }
711  
712  static void locks_delete_global_blocked(struct file_lock *waiter)
713  {
714  	lockdep_assert_held(&blocked_lock_lock);
715  
716  	hash_del(&waiter->fl_link);
717  }
718  
719  /* Remove waiter from blocker's block list.
720   * When blocker ends up pointing to itself then the list is empty.
721   *
722   * Must be called with blocked_lock_lock held.
723   */
724  static void __locks_delete_block(struct file_lock *waiter)
725  {
726  	locks_delete_global_blocked(waiter);
727  	list_del_init(&waiter->fl_blocked_member);
728  	waiter->fl_blocker = NULL;
729  }
730  
731  static void __locks_wake_up_blocks(struct file_lock *blocker)
732  {
733  	while (!list_empty(&blocker->fl_blocked_requests)) {
734  		struct file_lock *waiter;
735  
736  		waiter = list_first_entry(&blocker->fl_blocked_requests,
737  					  struct file_lock, fl_blocked_member);
738  		__locks_delete_block(waiter);
739  		if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
740  			waiter->fl_lmops->lm_notify(waiter);
741  		else
742  			wake_up(&waiter->fl_wait);
743  	}
744  }
745  
746  /**
747   *	locks_delete_lock - stop waiting for a file lock
748   *	@waiter: the lock which was waiting
749   *
750   *	lockd/nfsd need to disconnect the lock while working on it.
751   */
752  int locks_delete_block(struct file_lock *waiter)
753  {
754  	int status = -ENOENT;
755  
756  	/*
757  	 * If fl_blocker is NULL, it won't be set again as this thread
758  	 * "owns" the lock and is the only one that might try to claim
759  	 * the lock.  So it is safe to test fl_blocker locklessly.
760  	 * Also if fl_blocker is NULL, this waiter is not listed on
761  	 * fl_blocked_requests for some lock, so no other request can
762  	 * be added to the list of fl_blocked_requests for this
763  	 * request.  So if fl_blocker is NULL, it is safe to
764  	 * locklessly check if fl_blocked_requests is empty.  If both
765  	 * of these checks succeed, there is no need to take the lock.
766  	 */
767  	if (waiter->fl_blocker == NULL &&
768  	    list_empty(&waiter->fl_blocked_requests))
769  		return status;
770  	spin_lock(&blocked_lock_lock);
771  	if (waiter->fl_blocker)
772  		status = 0;
773  	__locks_wake_up_blocks(waiter);
774  	__locks_delete_block(waiter);
775  	spin_unlock(&blocked_lock_lock);
776  	return status;
777  }
778  EXPORT_SYMBOL(locks_delete_block);
779  
780  /* Insert waiter into blocker's block list.
781   * We use a circular list so that processes can be easily woken up in
782   * the order they blocked. The documentation doesn't require this but
783   * it seems like the reasonable thing to do.
784   *
785   * Must be called with both the flc_lock and blocked_lock_lock held. The
786   * fl_blocked_requests list itself is protected by the blocked_lock_lock,
787   * but by ensuring that the flc_lock is also held on insertions we can avoid
788   * taking the blocked_lock_lock in some cases when we see that the
789   * fl_blocked_requests list is empty.
790   *
791   * Rather than just adding to the list, we check for conflicts with any existing
792   * waiters, and add beneath any waiter that blocks the new waiter.
793   * Thus wakeups don't happen until needed.
794   */
795  static void __locks_insert_block(struct file_lock *blocker,
796  				 struct file_lock *waiter,
797  				 bool conflict(struct file_lock *,
798  					       struct file_lock *))
799  {
800  	struct file_lock *fl;
801  	BUG_ON(!list_empty(&waiter->fl_blocked_member));
802  
803  new_blocker:
804  	list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
805  		if (conflict(fl, waiter)) {
806  			blocker =  fl;
807  			goto new_blocker;
808  		}
809  	waiter->fl_blocker = blocker;
810  	list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
811  	if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
812  		locks_insert_global_blocked(waiter);
813  
814  	/* The requests in waiter->fl_blocked are known to conflict with
815  	 * waiter, but might not conflict with blocker, or the requests
816  	 * and lock which block it.  So they all need to be woken.
817  	 */
818  	__locks_wake_up_blocks(waiter);
819  }
820  
821  /* Must be called with flc_lock held. */
822  static void locks_insert_block(struct file_lock *blocker,
823  			       struct file_lock *waiter,
824  			       bool conflict(struct file_lock *,
825  					     struct file_lock *))
826  {
827  	spin_lock(&blocked_lock_lock);
828  	__locks_insert_block(blocker, waiter, conflict);
829  	spin_unlock(&blocked_lock_lock);
830  }
831  
832  /*
833   * Wake up processes blocked waiting for blocker.
834   *
835   * Must be called with the inode->flc_lock held!
836   */
837  static void locks_wake_up_blocks(struct file_lock *blocker)
838  {
839  	/*
840  	 * Avoid taking global lock if list is empty. This is safe since new
841  	 * blocked requests are only added to the list under the flc_lock, and
842  	 * the flc_lock is always held here. Note that removal from the
843  	 * fl_blocked_requests list does not require the flc_lock, so we must
844  	 * recheck list_empty() after acquiring the blocked_lock_lock.
845  	 */
846  	if (list_empty(&blocker->fl_blocked_requests))
847  		return;
848  
849  	spin_lock(&blocked_lock_lock);
850  	__locks_wake_up_blocks(blocker);
851  	spin_unlock(&blocked_lock_lock);
852  }
853  
854  static void
855  locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
856  {
857  	list_add_tail(&fl->fl_list, before);
858  	locks_insert_global_locks(fl);
859  }
860  
861  static void
862  locks_unlink_lock_ctx(struct file_lock *fl)
863  {
864  	locks_delete_global_locks(fl);
865  	list_del_init(&fl->fl_list);
866  	locks_wake_up_blocks(fl);
867  }
868  
869  static void
870  locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
871  {
872  	locks_unlink_lock_ctx(fl);
873  	if (dispose)
874  		list_add(&fl->fl_list, dispose);
875  	else
876  		locks_free_lock(fl);
877  }
878  
879  /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
880   * checks for shared/exclusive status of overlapping locks.
881   */
882  static bool locks_conflict(struct file_lock *caller_fl,
883  			   struct file_lock *sys_fl)
884  {
885  	if (sys_fl->fl_type == F_WRLCK)
886  		return true;
887  	if (caller_fl->fl_type == F_WRLCK)
888  		return true;
889  	return false;
890  }
891  
892  /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
893   * checking before calling the locks_conflict().
894   */
895  static bool posix_locks_conflict(struct file_lock *caller_fl,
896  				 struct file_lock *sys_fl)
897  {
898  	/* POSIX locks owned by the same process do not conflict with
899  	 * each other.
900  	 */
901  	if (posix_same_owner(caller_fl, sys_fl))
902  		return false;
903  
904  	/* Check whether they overlap */
905  	if (!locks_overlap(caller_fl, sys_fl))
906  		return false;
907  
908  	return locks_conflict(caller_fl, sys_fl);
909  }
910  
911  /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
912   * checking before calling the locks_conflict().
913   */
914  static bool flock_locks_conflict(struct file_lock *caller_fl,
915  				 struct file_lock *sys_fl)
916  {
917  	/* FLOCK locks referring to the same filp do not conflict with
918  	 * each other.
919  	 */
920  	if (caller_fl->fl_file == sys_fl->fl_file)
921  		return false;
922  	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
923  		return false;
924  
925  	return locks_conflict(caller_fl, sys_fl);
926  }
927  
928  void
929  posix_test_lock(struct file *filp, struct file_lock *fl)
930  {
931  	struct file_lock *cfl;
932  	struct file_lock_context *ctx;
933  	struct inode *inode = locks_inode(filp);
934  
935  	ctx = smp_load_acquire(&inode->i_flctx);
936  	if (!ctx || list_empty_careful(&ctx->flc_posix)) {
937  		fl->fl_type = F_UNLCK;
938  		return;
939  	}
940  
941  	spin_lock(&ctx->flc_lock);
942  	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
943  		if (posix_locks_conflict(fl, cfl)) {
944  			locks_copy_conflock(fl, cfl);
945  			goto out;
946  		}
947  	}
948  	fl->fl_type = F_UNLCK;
949  out:
950  	spin_unlock(&ctx->flc_lock);
951  	return;
952  }
953  EXPORT_SYMBOL(posix_test_lock);
954  
955  /*
956   * Deadlock detection:
957   *
958   * We attempt to detect deadlocks that are due purely to posix file
959   * locks.
960   *
961   * We assume that a task can be waiting for at most one lock at a time.
962   * So for any acquired lock, the process holding that lock may be
963   * waiting on at most one other lock.  That lock in turns may be held by
964   * someone waiting for at most one other lock.  Given a requested lock
965   * caller_fl which is about to wait for a conflicting lock block_fl, we
966   * follow this chain of waiters to ensure we are not about to create a
967   * cycle.
968   *
969   * Since we do this before we ever put a process to sleep on a lock, we
970   * are ensured that there is never a cycle; that is what guarantees that
971   * the while() loop in posix_locks_deadlock() eventually completes.
972   *
973   * Note: the above assumption may not be true when handling lock
974   * requests from a broken NFS client. It may also fail in the presence
975   * of tasks (such as posix threads) sharing the same open file table.
976   * To handle those cases, we just bail out after a few iterations.
977   *
978   * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
979   * Because the owner is not even nominally tied to a thread of
980   * execution, the deadlock detection below can't reasonably work well. Just
981   * skip it for those.
982   *
983   * In principle, we could do a more limited deadlock detection on FL_OFDLCK
984   * locks that just checks for the case where two tasks are attempting to
985   * upgrade from read to write locks on the same inode.
986   */
987  
988  #define MAX_DEADLK_ITERATIONS 10
989  
990  /* Find a lock that the owner of the given block_fl is blocking on. */
991  static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
992  {
993  	struct file_lock *fl;
994  
995  	hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
996  		if (posix_same_owner(fl, block_fl)) {
997  			while (fl->fl_blocker)
998  				fl = fl->fl_blocker;
999  			return fl;
1000  		}
1001  	}
1002  	return NULL;
1003  }
1004  
1005  /* Must be called with the blocked_lock_lock held! */
1006  static int posix_locks_deadlock(struct file_lock *caller_fl,
1007  				struct file_lock *block_fl)
1008  {
1009  	int i = 0;
1010  
1011  	lockdep_assert_held(&blocked_lock_lock);
1012  
1013  	/*
1014  	 * This deadlock detector can't reasonably detect deadlocks with
1015  	 * FL_OFDLCK locks, since they aren't owned by a process, per-se.
1016  	 */
1017  	if (IS_OFDLCK(caller_fl))
1018  		return 0;
1019  
1020  	while ((block_fl = what_owner_is_waiting_for(block_fl))) {
1021  		if (i++ > MAX_DEADLK_ITERATIONS)
1022  			return 0;
1023  		if (posix_same_owner(caller_fl, block_fl))
1024  			return 1;
1025  	}
1026  	return 0;
1027  }
1028  
1029  /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
1030   * after any leases, but before any posix locks.
1031   *
1032   * Note that if called with an FL_EXISTS argument, the caller may determine
1033   * whether or not a lock was successfully freed by testing the return
1034   * value for -ENOENT.
1035   */
1036  static int flock_lock_inode(struct inode *inode, struct file_lock *request)
1037  {
1038  	struct file_lock *new_fl = NULL;
1039  	struct file_lock *fl;
1040  	struct file_lock_context *ctx;
1041  	int error = 0;
1042  	bool found = false;
1043  	LIST_HEAD(dispose);
1044  
1045  	ctx = locks_get_lock_context(inode, request->fl_type);
1046  	if (!ctx) {
1047  		if (request->fl_type != F_UNLCK)
1048  			return -ENOMEM;
1049  		return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
1050  	}
1051  
1052  	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
1053  		new_fl = locks_alloc_lock();
1054  		if (!new_fl)
1055  			return -ENOMEM;
1056  	}
1057  
1058  	percpu_down_read(&file_rwsem);
1059  	spin_lock(&ctx->flc_lock);
1060  	if (request->fl_flags & FL_ACCESS)
1061  		goto find_conflict;
1062  
1063  	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
1064  		if (request->fl_file != fl->fl_file)
1065  			continue;
1066  		if (request->fl_type == fl->fl_type)
1067  			goto out;
1068  		found = true;
1069  		locks_delete_lock_ctx(fl, &dispose);
1070  		break;
1071  	}
1072  
1073  	if (request->fl_type == F_UNLCK) {
1074  		if ((request->fl_flags & FL_EXISTS) && !found)
1075  			error = -ENOENT;
1076  		goto out;
1077  	}
1078  
1079  find_conflict:
1080  	list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
1081  		if (!flock_locks_conflict(request, fl))
1082  			continue;
1083  		error = -EAGAIN;
1084  		if (!(request->fl_flags & FL_SLEEP))
1085  			goto out;
1086  		error = FILE_LOCK_DEFERRED;
1087  		locks_insert_block(fl, request, flock_locks_conflict);
1088  		goto out;
1089  	}
1090  	if (request->fl_flags & FL_ACCESS)
1091  		goto out;
1092  	locks_copy_lock(new_fl, request);
1093  	locks_move_blocks(new_fl, request);
1094  	locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
1095  	new_fl = NULL;
1096  	error = 0;
1097  
1098  out:
1099  	spin_unlock(&ctx->flc_lock);
1100  	percpu_up_read(&file_rwsem);
1101  	if (new_fl)
1102  		locks_free_lock(new_fl);
1103  	locks_dispose_list(&dispose);
1104  	trace_flock_lock_inode(inode, request, error);
1105  	return error;
1106  }
1107  
1108  static int posix_lock_inode(struct inode *inode, struct file_lock *request,
1109  			    struct file_lock *conflock)
1110  {
1111  	struct file_lock *fl, *tmp;
1112  	struct file_lock *new_fl = NULL;
1113  	struct file_lock *new_fl2 = NULL;
1114  	struct file_lock *left = NULL;
1115  	struct file_lock *right = NULL;
1116  	struct file_lock_context *ctx;
1117  	int error;
1118  	bool added = false;
1119  	LIST_HEAD(dispose);
1120  
1121  	ctx = locks_get_lock_context(inode, request->fl_type);
1122  	if (!ctx)
1123  		return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
1124  
1125  	/*
1126  	 * We may need two file_lock structures for this operation,
1127  	 * so we get them in advance to avoid races.
1128  	 *
1129  	 * In some cases we can be sure, that no new locks will be needed
1130  	 */
1131  	if (!(request->fl_flags & FL_ACCESS) &&
1132  	    (request->fl_type != F_UNLCK ||
1133  	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
1134  		new_fl = locks_alloc_lock();
1135  		new_fl2 = locks_alloc_lock();
1136  	}
1137  
1138  	percpu_down_read(&file_rwsem);
1139  	spin_lock(&ctx->flc_lock);
1140  	/*
1141  	 * New lock request. Walk all POSIX locks and look for conflicts. If
1142  	 * there are any, either return error or put the request on the
1143  	 * blocker's list of waiters and the global blocked_hash.
1144  	 */
1145  	if (request->fl_type != F_UNLCK) {
1146  		list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1147  			if (!posix_locks_conflict(request, fl))
1148  				continue;
1149  			if (conflock)
1150  				locks_copy_conflock(conflock, fl);
1151  			error = -EAGAIN;
1152  			if (!(request->fl_flags & FL_SLEEP))
1153  				goto out;
1154  			/*
1155  			 * Deadlock detection and insertion into the blocked
1156  			 * locks list must be done while holding the same lock!
1157  			 */
1158  			error = -EDEADLK;
1159  			spin_lock(&blocked_lock_lock);
1160  			/*
1161  			 * Ensure that we don't find any locks blocked on this
1162  			 * request during deadlock detection.
1163  			 */
1164  			__locks_wake_up_blocks(request);
1165  			if (likely(!posix_locks_deadlock(request, fl))) {
1166  				error = FILE_LOCK_DEFERRED;
1167  				__locks_insert_block(fl, request,
1168  						     posix_locks_conflict);
1169  			}
1170  			spin_unlock(&blocked_lock_lock);
1171  			goto out;
1172  		}
1173  	}
1174  
1175  	/* If we're just looking for a conflict, we're done. */
1176  	error = 0;
1177  	if (request->fl_flags & FL_ACCESS)
1178  		goto out;
1179  
1180  	/* Find the first old lock with the same owner as the new lock */
1181  	list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1182  		if (posix_same_owner(request, fl))
1183  			break;
1184  	}
1185  
1186  	/* Process locks with this owner. */
1187  	list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1188  		if (!posix_same_owner(request, fl))
1189  			break;
1190  
1191  		/* Detect adjacent or overlapping regions (if same lock type) */
1192  		if (request->fl_type == fl->fl_type) {
1193  			/* In all comparisons of start vs end, use
1194  			 * "start - 1" rather than "end + 1". If end
1195  			 * is OFFSET_MAX, end + 1 will become negative.
1196  			 */
1197  			if (fl->fl_end < request->fl_start - 1)
1198  				continue;
1199  			/* If the next lock in the list has entirely bigger
1200  			 * addresses than the new one, insert the lock here.
1201  			 */
1202  			if (fl->fl_start - 1 > request->fl_end)
1203  				break;
1204  
1205  			/* If we come here, the new and old lock are of the
1206  			 * same type and adjacent or overlapping. Make one
1207  			 * lock yielding from the lower start address of both
1208  			 * locks to the higher end address.
1209  			 */
1210  			if (fl->fl_start > request->fl_start)
1211  				fl->fl_start = request->fl_start;
1212  			else
1213  				request->fl_start = fl->fl_start;
1214  			if (fl->fl_end < request->fl_end)
1215  				fl->fl_end = request->fl_end;
1216  			else
1217  				request->fl_end = fl->fl_end;
1218  			if (added) {
1219  				locks_delete_lock_ctx(fl, &dispose);
1220  				continue;
1221  			}
1222  			request = fl;
1223  			added = true;
1224  		} else {
1225  			/* Processing for different lock types is a bit
1226  			 * more complex.
1227  			 */
1228  			if (fl->fl_end < request->fl_start)
1229  				continue;
1230  			if (fl->fl_start > request->fl_end)
1231  				break;
1232  			if (request->fl_type == F_UNLCK)
1233  				added = true;
1234  			if (fl->fl_start < request->fl_start)
1235  				left = fl;
1236  			/* If the next lock in the list has a higher end
1237  			 * address than the new one, insert the new one here.
1238  			 */
1239  			if (fl->fl_end > request->fl_end) {
1240  				right = fl;
1241  				break;
1242  			}
1243  			if (fl->fl_start >= request->fl_start) {
1244  				/* The new lock completely replaces an old
1245  				 * one (This may happen several times).
1246  				 */
1247  				if (added) {
1248  					locks_delete_lock_ctx(fl, &dispose);
1249  					continue;
1250  				}
1251  				/*
1252  				 * Replace the old lock with new_fl, and
1253  				 * remove the old one. It's safe to do the
1254  				 * insert here since we know that we won't be
1255  				 * using new_fl later, and that the lock is
1256  				 * just replacing an existing lock.
1257  				 */
1258  				error = -ENOLCK;
1259  				if (!new_fl)
1260  					goto out;
1261  				locks_copy_lock(new_fl, request);
1262  				request = new_fl;
1263  				new_fl = NULL;
1264  				locks_insert_lock_ctx(request, &fl->fl_list);
1265  				locks_delete_lock_ctx(fl, &dispose);
1266  				added = true;
1267  			}
1268  		}
1269  	}
1270  
1271  	/*
1272  	 * The above code only modifies existing locks in case of merging or
1273  	 * replacing. If new lock(s) need to be inserted all modifications are
1274  	 * done below this, so it's safe yet to bail out.
1275  	 */
1276  	error = -ENOLCK; /* "no luck" */
1277  	if (right && left == right && !new_fl2)
1278  		goto out;
1279  
1280  	error = 0;
1281  	if (!added) {
1282  		if (request->fl_type == F_UNLCK) {
1283  			if (request->fl_flags & FL_EXISTS)
1284  				error = -ENOENT;
1285  			goto out;
1286  		}
1287  
1288  		if (!new_fl) {
1289  			error = -ENOLCK;
1290  			goto out;
1291  		}
1292  		locks_copy_lock(new_fl, request);
1293  		locks_move_blocks(new_fl, request);
1294  		locks_insert_lock_ctx(new_fl, &fl->fl_list);
1295  		fl = new_fl;
1296  		new_fl = NULL;
1297  	}
1298  	if (right) {
1299  		if (left == right) {
1300  			/* The new lock breaks the old one in two pieces,
1301  			 * so we have to use the second new lock.
1302  			 */
1303  			left = new_fl2;
1304  			new_fl2 = NULL;
1305  			locks_copy_lock(left, right);
1306  			locks_insert_lock_ctx(left, &fl->fl_list);
1307  		}
1308  		right->fl_start = request->fl_end + 1;
1309  		locks_wake_up_blocks(right);
1310  	}
1311  	if (left) {
1312  		left->fl_end = request->fl_start - 1;
1313  		locks_wake_up_blocks(left);
1314  	}
1315   out:
1316  	spin_unlock(&ctx->flc_lock);
1317  	percpu_up_read(&file_rwsem);
1318  	/*
1319  	 * Free any unused locks.
1320  	 */
1321  	if (new_fl)
1322  		locks_free_lock(new_fl);
1323  	if (new_fl2)
1324  		locks_free_lock(new_fl2);
1325  	locks_dispose_list(&dispose);
1326  	trace_posix_lock_inode(inode, request, error);
1327  
1328  	return error;
1329  }
1330  
1331  /**
1332   * posix_lock_file - Apply a POSIX-style lock to a file
1333   * @filp: The file to apply the lock to
1334   * @fl: The lock to be applied
1335   * @conflock: Place to return a copy of the conflicting lock, if found.
1336   *
1337   * Add a POSIX style lock to a file.
1338   * We merge adjacent & overlapping locks whenever possible.
1339   * POSIX locks are sorted by owner task, then by starting address
1340   *
1341   * Note that if called with an FL_EXISTS argument, the caller may determine
1342   * whether or not a lock was successfully freed by testing the return
1343   * value for -ENOENT.
1344   */
1345  int posix_lock_file(struct file *filp, struct file_lock *fl,
1346  			struct file_lock *conflock)
1347  {
1348  	return posix_lock_inode(locks_inode(filp), fl, conflock);
1349  }
1350  EXPORT_SYMBOL(posix_lock_file);
1351  
1352  /**
1353   * posix_lock_inode_wait - Apply a POSIX-style lock to a file
1354   * @inode: inode of file to which lock request should be applied
1355   * @fl: The lock to be applied
1356   *
1357   * Apply a POSIX style lock request to an inode.
1358   */
1359  static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1360  {
1361  	int error;
1362  	might_sleep ();
1363  	for (;;) {
1364  		error = posix_lock_inode(inode, fl, NULL);
1365  		if (error != FILE_LOCK_DEFERRED)
1366  			break;
1367  		error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
1368  		if (error)
1369  			break;
1370  	}
1371  	locks_delete_block(fl);
1372  	return error;
1373  }
1374  
1375  #ifdef CONFIG_MANDATORY_FILE_LOCKING
1376  /**
1377   * locks_mandatory_locked - Check for an active lock
1378   * @file: the file to check
1379   *
1380   * Searches the inode's list of locks to find any POSIX locks which conflict.
1381   * This function is called from locks_verify_locked() only.
1382   */
1383  int locks_mandatory_locked(struct file *file)
1384  {
1385  	int ret;
1386  	struct inode *inode = locks_inode(file);
1387  	struct file_lock_context *ctx;
1388  	struct file_lock *fl;
1389  
1390  	ctx = smp_load_acquire(&inode->i_flctx);
1391  	if (!ctx || list_empty_careful(&ctx->flc_posix))
1392  		return 0;
1393  
1394  	/*
1395  	 * Search the lock list for this inode for any POSIX locks.
1396  	 */
1397  	spin_lock(&ctx->flc_lock);
1398  	ret = 0;
1399  	list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1400  		if (fl->fl_owner != current->files &&
1401  		    fl->fl_owner != file) {
1402  			ret = -EAGAIN;
1403  			break;
1404  		}
1405  	}
1406  	spin_unlock(&ctx->flc_lock);
1407  	return ret;
1408  }
1409  
1410  /**
1411   * locks_mandatory_area - Check for a conflicting lock
1412   * @inode:	the file to check
1413   * @filp:       how the file was opened (if it was)
1414   * @start:	first byte in the file to check
1415   * @end:	lastbyte in the file to check
1416   * @type:	%F_WRLCK for a write lock, else %F_RDLCK
1417   *
1418   * Searches the inode's list of locks to find any POSIX locks which conflict.
1419   */
1420  int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
1421  			 loff_t end, unsigned char type)
1422  {
1423  	struct file_lock fl;
1424  	int error;
1425  	bool sleep = false;
1426  
1427  	locks_init_lock(&fl);
1428  	fl.fl_pid = current->tgid;
1429  	fl.fl_file = filp;
1430  	fl.fl_flags = FL_POSIX | FL_ACCESS;
1431  	if (filp && !(filp->f_flags & O_NONBLOCK))
1432  		sleep = true;
1433  	fl.fl_type = type;
1434  	fl.fl_start = start;
1435  	fl.fl_end = end;
1436  
1437  	for (;;) {
1438  		if (filp) {
1439  			fl.fl_owner = filp;
1440  			fl.fl_flags &= ~FL_SLEEP;
1441  			error = posix_lock_inode(inode, &fl, NULL);
1442  			if (!error)
1443  				break;
1444  		}
1445  
1446  		if (sleep)
1447  			fl.fl_flags |= FL_SLEEP;
1448  		fl.fl_owner = current->files;
1449  		error = posix_lock_inode(inode, &fl, NULL);
1450  		if (error != FILE_LOCK_DEFERRED)
1451  			break;
1452  		error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker);
1453  		if (!error) {
1454  			/*
1455  			 * If we've been sleeping someone might have
1456  			 * changed the permissions behind our back.
1457  			 */
1458  			if (__mandatory_lock(inode))
1459  				continue;
1460  		}
1461  
1462  		break;
1463  	}
1464  	locks_delete_block(&fl);
1465  
1466  	return error;
1467  }
1468  EXPORT_SYMBOL(locks_mandatory_area);
1469  #endif /* CONFIG_MANDATORY_FILE_LOCKING */
1470  
1471  static void lease_clear_pending(struct file_lock *fl, int arg)
1472  {
1473  	switch (arg) {
1474  	case F_UNLCK:
1475  		fl->fl_flags &= ~FL_UNLOCK_PENDING;
1476  		/* fall through */
1477  	case F_RDLCK:
1478  		fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1479  	}
1480  }
1481  
1482  /* We already had a lease on this file; just change its type */
1483  int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1484  {
1485  	int error = assign_type(fl, arg);
1486  
1487  	if (error)
1488  		return error;
1489  	lease_clear_pending(fl, arg);
1490  	locks_wake_up_blocks(fl);
1491  	if (arg == F_UNLCK) {
1492  		struct file *filp = fl->fl_file;
1493  
1494  		f_delown(filp);
1495  		filp->f_owner.signum = 0;
1496  		fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
1497  		if (fl->fl_fasync != NULL) {
1498  			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1499  			fl->fl_fasync = NULL;
1500  		}
1501  		locks_delete_lock_ctx(fl, dispose);
1502  	}
1503  	return 0;
1504  }
1505  EXPORT_SYMBOL(lease_modify);
1506  
1507  static bool past_time(unsigned long then)
1508  {
1509  	if (!then)
1510  		/* 0 is a special value meaning "this never expires": */
1511  		return false;
1512  	return time_after(jiffies, then);
1513  }
1514  
1515  static void time_out_leases(struct inode *inode, struct list_head *dispose)
1516  {
1517  	struct file_lock_context *ctx = inode->i_flctx;
1518  	struct file_lock *fl, *tmp;
1519  
1520  	lockdep_assert_held(&ctx->flc_lock);
1521  
1522  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1523  		trace_time_out_leases(inode, fl);
1524  		if (past_time(fl->fl_downgrade_time))
1525  			lease_modify(fl, F_RDLCK, dispose);
1526  		if (past_time(fl->fl_break_time))
1527  			lease_modify(fl, F_UNLCK, dispose);
1528  	}
1529  }
1530  
1531  static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1532  {
1533  	bool rc;
1534  
1535  	if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
1536  		rc = false;
1537  		goto trace;
1538  	}
1539  	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
1540  		rc = false;
1541  		goto trace;
1542  	}
1543  
1544  	rc = locks_conflict(breaker, lease);
1545  trace:
1546  	trace_leases_conflict(rc, lease, breaker);
1547  	return rc;
1548  }
1549  
1550  static bool
1551  any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1552  {
1553  	struct file_lock_context *ctx = inode->i_flctx;
1554  	struct file_lock *fl;
1555  
1556  	lockdep_assert_held(&ctx->flc_lock);
1557  
1558  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1559  		if (leases_conflict(fl, breaker))
1560  			return true;
1561  	}
1562  	return false;
1563  }
1564  
1565  /**
1566   *	__break_lease	-	revoke all outstanding leases on file
1567   *	@inode: the inode of the file to return
1568   *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1569   *	    break all leases
1570   *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break
1571   *	    only delegations
1572   *
1573   *	break_lease (inlined for speed) has checked there already is at least
1574   *	some kind of lock (maybe a lease) on this file.  Leases are broken on
1575   *	a call to open() or truncate().  This function can sleep unless you
1576   *	specified %O_NONBLOCK to your open().
1577   */
1578  int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1579  {
1580  	int error = 0;
1581  	struct file_lock_context *ctx;
1582  	struct file_lock *new_fl, *fl, *tmp;
1583  	unsigned long break_time;
1584  	int want_write = (mode & O_ACCMODE) != O_RDONLY;
1585  	LIST_HEAD(dispose);
1586  
1587  	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1588  	if (IS_ERR(new_fl))
1589  		return PTR_ERR(new_fl);
1590  	new_fl->fl_flags = type;
1591  
1592  	/* typically we will check that ctx is non-NULL before calling */
1593  	ctx = smp_load_acquire(&inode->i_flctx);
1594  	if (!ctx) {
1595  		WARN_ON_ONCE(1);
1596  		goto free_lock;
1597  	}
1598  
1599  	percpu_down_read(&file_rwsem);
1600  	spin_lock(&ctx->flc_lock);
1601  
1602  	time_out_leases(inode, &dispose);
1603  
1604  	if (!any_leases_conflict(inode, new_fl))
1605  		goto out;
1606  
1607  	break_time = 0;
1608  	if (lease_break_time > 0) {
1609  		break_time = jiffies + lease_break_time * HZ;
1610  		if (break_time == 0)
1611  			break_time++;	/* so that 0 means no break time */
1612  	}
1613  
1614  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1615  		if (!leases_conflict(fl, new_fl))
1616  			continue;
1617  		if (want_write) {
1618  			if (fl->fl_flags & FL_UNLOCK_PENDING)
1619  				continue;
1620  			fl->fl_flags |= FL_UNLOCK_PENDING;
1621  			fl->fl_break_time = break_time;
1622  		} else {
1623  			if (lease_breaking(fl))
1624  				continue;
1625  			fl->fl_flags |= FL_DOWNGRADE_PENDING;
1626  			fl->fl_downgrade_time = break_time;
1627  		}
1628  		if (fl->fl_lmops->lm_break(fl))
1629  			locks_delete_lock_ctx(fl, &dispose);
1630  	}
1631  
1632  	if (list_empty(&ctx->flc_lease))
1633  		goto out;
1634  
1635  	if (mode & O_NONBLOCK) {
1636  		trace_break_lease_noblock(inode, new_fl);
1637  		error = -EWOULDBLOCK;
1638  		goto out;
1639  	}
1640  
1641  restart:
1642  	fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1643  	break_time = fl->fl_break_time;
1644  	if (break_time != 0)
1645  		break_time -= jiffies;
1646  	if (break_time == 0)
1647  		break_time++;
1648  	locks_insert_block(fl, new_fl, leases_conflict);
1649  	trace_break_lease_block(inode, new_fl);
1650  	spin_unlock(&ctx->flc_lock);
1651  	percpu_up_read(&file_rwsem);
1652  
1653  	locks_dispose_list(&dispose);
1654  	error = wait_event_interruptible_timeout(new_fl->fl_wait,
1655  						!new_fl->fl_blocker, break_time);
1656  
1657  	percpu_down_read(&file_rwsem);
1658  	spin_lock(&ctx->flc_lock);
1659  	trace_break_lease_unblock(inode, new_fl);
1660  	locks_delete_block(new_fl);
1661  	if (error >= 0) {
1662  		/*
1663  		 * Wait for the next conflicting lease that has not been
1664  		 * broken yet
1665  		 */
1666  		if (error == 0)
1667  			time_out_leases(inode, &dispose);
1668  		if (any_leases_conflict(inode, new_fl))
1669  			goto restart;
1670  		error = 0;
1671  	}
1672  out:
1673  	spin_unlock(&ctx->flc_lock);
1674  	percpu_up_read(&file_rwsem);
1675  	locks_dispose_list(&dispose);
1676  free_lock:
1677  	locks_free_lock(new_fl);
1678  	return error;
1679  }
1680  EXPORT_SYMBOL(__break_lease);
1681  
1682  /**
1683   *	lease_get_mtime - update modified time of an inode with exclusive lease
1684   *	@inode: the inode
1685   *      @time:  pointer to a timespec which contains the last modified time
1686   *
1687   * This is to force NFS clients to flush their caches for files with
1688   * exclusive leases.  The justification is that if someone has an
1689   * exclusive lease, then they could be modifying it.
1690   */
1691  void lease_get_mtime(struct inode *inode, struct timespec64 *time)
1692  {
1693  	bool has_lease = false;
1694  	struct file_lock_context *ctx;
1695  	struct file_lock *fl;
1696  
1697  	ctx = smp_load_acquire(&inode->i_flctx);
1698  	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1699  		spin_lock(&ctx->flc_lock);
1700  		fl = list_first_entry_or_null(&ctx->flc_lease,
1701  					      struct file_lock, fl_list);
1702  		if (fl && (fl->fl_type == F_WRLCK))
1703  			has_lease = true;
1704  		spin_unlock(&ctx->flc_lock);
1705  	}
1706  
1707  	if (has_lease)
1708  		*time = current_time(inode);
1709  }
1710  EXPORT_SYMBOL(lease_get_mtime);
1711  
1712  /**
1713   *	fcntl_getlease - Enquire what lease is currently active
1714   *	@filp: the file
1715   *
1716   *	The value returned by this function will be one of
1717   *	(if no lease break is pending):
1718   *
1719   *	%F_RDLCK to indicate a shared lease is held.
1720   *
1721   *	%F_WRLCK to indicate an exclusive lease is held.
1722   *
1723   *	%F_UNLCK to indicate no lease is held.
1724   *
1725   *	(if a lease break is pending):
1726   *
1727   *	%F_RDLCK to indicate an exclusive lease needs to be
1728   *		changed to a shared lease (or removed).
1729   *
1730   *	%F_UNLCK to indicate the lease needs to be removed.
1731   *
1732   *	XXX: sfr & willy disagree over whether F_INPROGRESS
1733   *	should be returned to userspace.
1734   */
1735  int fcntl_getlease(struct file *filp)
1736  {
1737  	struct file_lock *fl;
1738  	struct inode *inode = locks_inode(filp);
1739  	struct file_lock_context *ctx;
1740  	int type = F_UNLCK;
1741  	LIST_HEAD(dispose);
1742  
1743  	ctx = smp_load_acquire(&inode->i_flctx);
1744  	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1745  		percpu_down_read(&file_rwsem);
1746  		spin_lock(&ctx->flc_lock);
1747  		time_out_leases(inode, &dispose);
1748  		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1749  			if (fl->fl_file != filp)
1750  				continue;
1751  			type = target_leasetype(fl);
1752  			break;
1753  		}
1754  		spin_unlock(&ctx->flc_lock);
1755  		percpu_up_read(&file_rwsem);
1756  
1757  		locks_dispose_list(&dispose);
1758  	}
1759  	return type;
1760  }
1761  
1762  /**
1763   * check_conflicting_open - see if the given file points to an inode that has
1764   *			    an existing open that would conflict with the
1765   *			    desired lease.
1766   * @filp:	file to check
1767   * @arg:	type of lease that we're trying to acquire
1768   * @flags:	current lock flags
1769   *
1770   * Check to see if there's an existing open fd on this file that would
1771   * conflict with the lease we're trying to set.
1772   */
1773  static int
1774  check_conflicting_open(struct file *filp, const long arg, int flags)
1775  {
1776  	struct inode *inode = locks_inode(filp);
1777  	int self_wcount = 0, self_rcount = 0;
1778  
1779  	if (flags & FL_LAYOUT)
1780  		return 0;
1781  
1782  	if (arg == F_RDLCK)
1783  		return inode_is_open_for_write(inode) ? -EAGAIN : 0;
1784  	else if (arg != F_WRLCK)
1785  		return 0;
1786  
1787  	/*
1788  	 * Make sure that only read/write count is from lease requestor.
1789  	 * Note that this will result in denying write leases when i_writecount
1790  	 * is negative, which is what we want.  (We shouldn't grant write leases
1791  	 * on files open for execution.)
1792  	 */
1793  	if (filp->f_mode & FMODE_WRITE)
1794  		self_wcount = 1;
1795  	else if (filp->f_mode & FMODE_READ)
1796  		self_rcount = 1;
1797  
1798  	if (atomic_read(&inode->i_writecount) != self_wcount ||
1799  	    atomic_read(&inode->i_readcount) != self_rcount)
1800  		return -EAGAIN;
1801  
1802  	return 0;
1803  }
1804  
1805  static int
1806  generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
1807  {
1808  	struct file_lock *fl, *my_fl = NULL, *lease;
1809  	struct inode *inode = locks_inode(filp);
1810  	struct file_lock_context *ctx;
1811  	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1812  	int error;
1813  	LIST_HEAD(dispose);
1814  
1815  	lease = *flp;
1816  	trace_generic_add_lease(inode, lease);
1817  
1818  	/* Note that arg is never F_UNLCK here */
1819  	ctx = locks_get_lock_context(inode, arg);
1820  	if (!ctx)
1821  		return -ENOMEM;
1822  
1823  	/*
1824  	 * In the delegation case we need mutual exclusion with
1825  	 * a number of operations that take the i_mutex.  We trylock
1826  	 * because delegations are an optional optimization, and if
1827  	 * there's some chance of a conflict--we'd rather not
1828  	 * bother, maybe that's a sign this just isn't a good file to
1829  	 * hand out a delegation on.
1830  	 */
1831  	if (is_deleg && !inode_trylock(inode))
1832  		return -EAGAIN;
1833  
1834  	if (is_deleg && arg == F_WRLCK) {
1835  		/* Write delegations are not currently supported: */
1836  		inode_unlock(inode);
1837  		WARN_ON_ONCE(1);
1838  		return -EINVAL;
1839  	}
1840  
1841  	percpu_down_read(&file_rwsem);
1842  	spin_lock(&ctx->flc_lock);
1843  	time_out_leases(inode, &dispose);
1844  	error = check_conflicting_open(filp, arg, lease->fl_flags);
1845  	if (error)
1846  		goto out;
1847  
1848  	/*
1849  	 * At this point, we know that if there is an exclusive
1850  	 * lease on this file, then we hold it on this filp
1851  	 * (otherwise our open of this file would have blocked).
1852  	 * And if we are trying to acquire an exclusive lease,
1853  	 * then the file is not open by anyone (including us)
1854  	 * except for this filp.
1855  	 */
1856  	error = -EAGAIN;
1857  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1858  		if (fl->fl_file == filp &&
1859  		    fl->fl_owner == lease->fl_owner) {
1860  			my_fl = fl;
1861  			continue;
1862  		}
1863  
1864  		/*
1865  		 * No exclusive leases if someone else has a lease on
1866  		 * this file:
1867  		 */
1868  		if (arg == F_WRLCK)
1869  			goto out;
1870  		/*
1871  		 * Modifying our existing lease is OK, but no getting a
1872  		 * new lease if someone else is opening for write:
1873  		 */
1874  		if (fl->fl_flags & FL_UNLOCK_PENDING)
1875  			goto out;
1876  	}
1877  
1878  	if (my_fl != NULL) {
1879  		lease = my_fl;
1880  		error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1881  		if (error)
1882  			goto out;
1883  		goto out_setup;
1884  	}
1885  
1886  	error = -EINVAL;
1887  	if (!leases_enable)
1888  		goto out;
1889  
1890  	locks_insert_lock_ctx(lease, &ctx->flc_lease);
1891  	/*
1892  	 * The check in break_lease() is lockless. It's possible for another
1893  	 * open to race in after we did the earlier check for a conflicting
1894  	 * open but before the lease was inserted. Check again for a
1895  	 * conflicting open and cancel the lease if there is one.
1896  	 *
1897  	 * We also add a barrier here to ensure that the insertion of the lock
1898  	 * precedes these checks.
1899  	 */
1900  	smp_mb();
1901  	error = check_conflicting_open(filp, arg, lease->fl_flags);
1902  	if (error) {
1903  		locks_unlink_lock_ctx(lease);
1904  		goto out;
1905  	}
1906  
1907  out_setup:
1908  	if (lease->fl_lmops->lm_setup)
1909  		lease->fl_lmops->lm_setup(lease, priv);
1910  out:
1911  	spin_unlock(&ctx->flc_lock);
1912  	percpu_up_read(&file_rwsem);
1913  	locks_dispose_list(&dispose);
1914  	if (is_deleg)
1915  		inode_unlock(inode);
1916  	if (!error && !my_fl)
1917  		*flp = NULL;
1918  	return error;
1919  }
1920  
1921  static int generic_delete_lease(struct file *filp, void *owner)
1922  {
1923  	int error = -EAGAIN;
1924  	struct file_lock *fl, *victim = NULL;
1925  	struct inode *inode = locks_inode(filp);
1926  	struct file_lock_context *ctx;
1927  	LIST_HEAD(dispose);
1928  
1929  	ctx = smp_load_acquire(&inode->i_flctx);
1930  	if (!ctx) {
1931  		trace_generic_delete_lease(inode, NULL);
1932  		return error;
1933  	}
1934  
1935  	percpu_down_read(&file_rwsem);
1936  	spin_lock(&ctx->flc_lock);
1937  	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1938  		if (fl->fl_file == filp &&
1939  		    fl->fl_owner == owner) {
1940  			victim = fl;
1941  			break;
1942  		}
1943  	}
1944  	trace_generic_delete_lease(inode, victim);
1945  	if (victim)
1946  		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1947  	spin_unlock(&ctx->flc_lock);
1948  	percpu_up_read(&file_rwsem);
1949  	locks_dispose_list(&dispose);
1950  	return error;
1951  }
1952  
1953  /**
1954   *	generic_setlease	-	sets a lease on an open file
1955   *	@filp:	file pointer
1956   *	@arg:	type of lease to obtain
1957   *	@flp:	input - file_lock to use, output - file_lock inserted
1958   *	@priv:	private data for lm_setup (may be NULL if lm_setup
1959   *		doesn't require it)
1960   *
1961   *	The (input) flp->fl_lmops->lm_break function is required
1962   *	by break_lease().
1963   */
1964  int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
1965  			void **priv)
1966  {
1967  	struct inode *inode = locks_inode(filp);
1968  	int error;
1969  
1970  	if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
1971  		return -EACCES;
1972  	if (!S_ISREG(inode->i_mode))
1973  		return -EINVAL;
1974  	error = security_file_lock(filp, arg);
1975  	if (error)
1976  		return error;
1977  
1978  	switch (arg) {
1979  	case F_UNLCK:
1980  		return generic_delete_lease(filp, *priv);
1981  	case F_RDLCK:
1982  	case F_WRLCK:
1983  		if (!(*flp)->fl_lmops->lm_break) {
1984  			WARN_ON_ONCE(1);
1985  			return -ENOLCK;
1986  		}
1987  
1988  		return generic_add_lease(filp, arg, flp, priv);
1989  	default:
1990  		return -EINVAL;
1991  	}
1992  }
1993  EXPORT_SYMBOL(generic_setlease);
1994  
1995  #if IS_ENABLED(CONFIG_SRCU)
1996  /*
1997   * Kernel subsystems can register to be notified on any attempt to set
1998   * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1999   * to close files that it may have cached when there is an attempt to set a
2000   * conflicting lease.
2001   */
2002  static struct srcu_notifier_head lease_notifier_chain;
2003  
2004  static inline void
2005  lease_notifier_chain_init(void)
2006  {
2007  	srcu_init_notifier_head(&lease_notifier_chain);
2008  }
2009  
2010  static inline void
2011  setlease_notifier(long arg, struct file_lock *lease)
2012  {
2013  	if (arg != F_UNLCK)
2014  		srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
2015  }
2016  
2017  int lease_register_notifier(struct notifier_block *nb)
2018  {
2019  	return srcu_notifier_chain_register(&lease_notifier_chain, nb);
2020  }
2021  EXPORT_SYMBOL_GPL(lease_register_notifier);
2022  
2023  void lease_unregister_notifier(struct notifier_block *nb)
2024  {
2025  	srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
2026  }
2027  EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2028  
2029  #else /* !IS_ENABLED(CONFIG_SRCU) */
2030  static inline void
2031  lease_notifier_chain_init(void)
2032  {
2033  }
2034  
2035  static inline void
2036  setlease_notifier(long arg, struct file_lock *lease)
2037  {
2038  }
2039  
2040  int lease_register_notifier(struct notifier_block *nb)
2041  {
2042  	return 0;
2043  }
2044  EXPORT_SYMBOL_GPL(lease_register_notifier);
2045  
2046  void lease_unregister_notifier(struct notifier_block *nb)
2047  {
2048  }
2049  EXPORT_SYMBOL_GPL(lease_unregister_notifier);
2050  
2051  #endif /* IS_ENABLED(CONFIG_SRCU) */
2052  
2053  /**
2054   * vfs_setlease        -       sets a lease on an open file
2055   * @filp:	file pointer
2056   * @arg:	type of lease to obtain
2057   * @lease:	file_lock to use when adding a lease
2058   * @priv:	private info for lm_setup when adding a lease (may be
2059   *		NULL if lm_setup doesn't require it)
2060   *
2061   * Call this to establish a lease on the file. The "lease" argument is not
2062   * used for F_UNLCK requests and may be NULL. For commands that set or alter
2063   * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
2064   * set; if not, this function will return -ENOLCK (and generate a scary-looking
2065   * stack trace).
2066   *
2067   * The "priv" pointer is passed directly to the lm_setup function as-is. It
2068   * may be NULL if the lm_setup operation doesn't require it.
2069   */
2070  int
2071  vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
2072  {
2073  	if (lease)
2074  		setlease_notifier(arg, *lease);
2075  	if (filp->f_op->setlease)
2076  		return filp->f_op->setlease(filp, arg, lease, priv);
2077  	else
2078  		return generic_setlease(filp, arg, lease, priv);
2079  }
2080  EXPORT_SYMBOL_GPL(vfs_setlease);
2081  
2082  static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
2083  {
2084  	struct file_lock *fl;
2085  	struct fasync_struct *new;
2086  	int error;
2087  
2088  	fl = lease_alloc(filp, arg);
2089  	if (IS_ERR(fl))
2090  		return PTR_ERR(fl);
2091  
2092  	new = fasync_alloc();
2093  	if (!new) {
2094  		locks_free_lock(fl);
2095  		return -ENOMEM;
2096  	}
2097  	new->fa_fd = fd;
2098  
2099  	error = vfs_setlease(filp, arg, &fl, (void **)&new);
2100  	if (fl)
2101  		locks_free_lock(fl);
2102  	if (new)
2103  		fasync_free(new);
2104  	return error;
2105  }
2106  
2107  /**
2108   *	fcntl_setlease	-	sets a lease on an open file
2109   *	@fd: open file descriptor
2110   *	@filp: file pointer
2111   *	@arg: type of lease to obtain
2112   *
2113   *	Call this fcntl to establish a lease on the file.
2114   *	Note that you also need to call %F_SETSIG to
2115   *	receive a signal when the lease is broken.
2116   */
2117  int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
2118  {
2119  	if (arg == F_UNLCK)
2120  		return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
2121  	return do_fcntl_add_lease(fd, filp, arg);
2122  }
2123  
2124  /**
2125   * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
2126   * @inode: inode of the file to apply to
2127   * @fl: The lock to be applied
2128   *
2129   * Apply a FLOCK style lock request to an inode.
2130   */
2131  static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
2132  {
2133  	int error;
2134  	might_sleep();
2135  	for (;;) {
2136  		error = flock_lock_inode(inode, fl);
2137  		if (error != FILE_LOCK_DEFERRED)
2138  			break;
2139  		error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
2140  		if (error)
2141  			break;
2142  	}
2143  	locks_delete_block(fl);
2144  	return error;
2145  }
2146  
2147  /**
2148   * locks_lock_inode_wait - Apply a lock to an inode
2149   * @inode: inode of the file to apply to
2150   * @fl: The lock to be applied
2151   *
2152   * Apply a POSIX or FLOCK style lock request to an inode.
2153   */
2154  int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
2155  {
2156  	int res = 0;
2157  	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
2158  		case FL_POSIX:
2159  			res = posix_lock_inode_wait(inode, fl);
2160  			break;
2161  		case FL_FLOCK:
2162  			res = flock_lock_inode_wait(inode, fl);
2163  			break;
2164  		default:
2165  			BUG();
2166  	}
2167  	return res;
2168  }
2169  EXPORT_SYMBOL(locks_lock_inode_wait);
2170  
2171  /**
2172   *	sys_flock: - flock() system call.
2173   *	@fd: the file descriptor to lock.
2174   *	@cmd: the type of lock to apply.
2175   *
2176   *	Apply a %FL_FLOCK style lock to an open file descriptor.
2177   *	The @cmd can be one of:
2178   *
2179   *	- %LOCK_SH -- a shared lock.
2180   *	- %LOCK_EX -- an exclusive lock.
2181   *	- %LOCK_UN -- remove an existing lock.
2182   *	- %LOCK_MAND -- a 'mandatory' flock.
2183   *	  This exists to emulate Windows Share Modes.
2184   *
2185   *	%LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other
2186   *	processes read and write access respectively.
2187   */
2188  SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2189  {
2190  	struct fd f = fdget(fd);
2191  	struct file_lock *lock;
2192  	int can_sleep, unlock;
2193  	int error;
2194  
2195  	error = -EBADF;
2196  	if (!f.file)
2197  		goto out;
2198  
2199  	can_sleep = !(cmd & LOCK_NB);
2200  	cmd &= ~LOCK_NB;
2201  	unlock = (cmd == LOCK_UN);
2202  
2203  	if (!unlock && !(cmd & LOCK_MAND) &&
2204  	    !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
2205  		goto out_putf;
2206  
2207  	lock = flock_make_lock(f.file, cmd, NULL);
2208  	if (IS_ERR(lock)) {
2209  		error = PTR_ERR(lock);
2210  		goto out_putf;
2211  	}
2212  
2213  	if (can_sleep)
2214  		lock->fl_flags |= FL_SLEEP;
2215  
2216  	error = security_file_lock(f.file, lock->fl_type);
2217  	if (error)
2218  		goto out_free;
2219  
2220  	if (f.file->f_op->flock)
2221  		error = f.file->f_op->flock(f.file,
2222  					  (can_sleep) ? F_SETLKW : F_SETLK,
2223  					  lock);
2224  	else
2225  		error = locks_lock_file_wait(f.file, lock);
2226  
2227   out_free:
2228  	locks_free_lock(lock);
2229  
2230   out_putf:
2231  	fdput(f);
2232   out:
2233  	return error;
2234  }
2235  
2236  /**
2237   * vfs_test_lock - test file byte range lock
2238   * @filp: The file to test lock for
2239   * @fl: The lock to test; also used to hold result
2240   *
2241   * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
2242   * setting conf->fl_type to something other than F_UNLCK.
2243   */
2244  int vfs_test_lock(struct file *filp, struct file_lock *fl)
2245  {
2246  	if (filp->f_op->lock)
2247  		return filp->f_op->lock(filp, F_GETLK, fl);
2248  	posix_test_lock(filp, fl);
2249  	return 0;
2250  }
2251  EXPORT_SYMBOL_GPL(vfs_test_lock);
2252  
2253  /**
2254   * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2255   * @fl: The file_lock who's fl_pid should be translated
2256   * @ns: The namespace into which the pid should be translated
2257   *
2258   * Used to tranlate a fl_pid into a namespace virtual pid number
2259   */
2260  static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
2261  {
2262  	pid_t vnr;
2263  	struct pid *pid;
2264  
2265  	if (IS_OFDLCK(fl))
2266  		return -1;
2267  	if (IS_REMOTELCK(fl))
2268  		return fl->fl_pid;
2269  	/*
2270  	 * If the flock owner process is dead and its pid has been already
2271  	 * freed, the translation below won't work, but we still want to show
2272  	 * flock owner pid number in init pidns.
2273  	 */
2274  	if (ns == &init_pid_ns)
2275  		return (pid_t)fl->fl_pid;
2276  
2277  	rcu_read_lock();
2278  	pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
2279  	vnr = pid_nr_ns(pid, ns);
2280  	rcu_read_unlock();
2281  	return vnr;
2282  }
2283  
2284  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2285  {
2286  	flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2287  #if BITS_PER_LONG == 32
2288  	/*
2289  	 * Make sure we can represent the posix lock via
2290  	 * legacy 32bit flock.
2291  	 */
2292  	if (fl->fl_start > OFFT_OFFSET_MAX)
2293  		return -EOVERFLOW;
2294  	if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2295  		return -EOVERFLOW;
2296  #endif
2297  	flock->l_start = fl->fl_start;
2298  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2299  		fl->fl_end - fl->fl_start + 1;
2300  	flock->l_whence = 0;
2301  	flock->l_type = fl->fl_type;
2302  	return 0;
2303  }
2304  
2305  #if BITS_PER_LONG == 32
2306  static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
2307  {
2308  	flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2309  	flock->l_start = fl->fl_start;
2310  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2311  		fl->fl_end - fl->fl_start + 1;
2312  	flock->l_whence = 0;
2313  	flock->l_type = fl->fl_type;
2314  }
2315  #endif
2316  
2317  /* Report the first existing lock that would conflict with l.
2318   * This implements the F_GETLK command of fcntl().
2319   */
2320  int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
2321  {
2322  	struct file_lock *fl;
2323  	int error;
2324  
2325  	fl = locks_alloc_lock();
2326  	if (fl == NULL)
2327  		return -ENOMEM;
2328  	error = -EINVAL;
2329  	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2330  		goto out;
2331  
2332  	error = flock_to_posix_lock(filp, fl, flock);
2333  	if (error)
2334  		goto out;
2335  
2336  	if (cmd == F_OFD_GETLK) {
2337  		error = -EINVAL;
2338  		if (flock->l_pid != 0)
2339  			goto out;
2340  
2341  		cmd = F_GETLK;
2342  		fl->fl_flags |= FL_OFDLCK;
2343  		fl->fl_owner = filp;
2344  	}
2345  
2346  	error = vfs_test_lock(filp, fl);
2347  	if (error)
2348  		goto out;
2349  
2350  	flock->l_type = fl->fl_type;
2351  	if (fl->fl_type != F_UNLCK) {
2352  		error = posix_lock_to_flock(flock, fl);
2353  		if (error)
2354  			goto out;
2355  	}
2356  out:
2357  	locks_free_lock(fl);
2358  	return error;
2359  }
2360  
2361  /**
2362   * vfs_lock_file - file byte range lock
2363   * @filp: The file to apply the lock to
2364   * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2365   * @fl: The lock to be applied
2366   * @conf: Place to return a copy of the conflicting lock, if found.
2367   *
2368   * A caller that doesn't care about the conflicting lock may pass NULL
2369   * as the final argument.
2370   *
2371   * If the filesystem defines a private ->lock() method, then @conf will
2372   * be left unchanged; so a caller that cares should initialize it to
2373   * some acceptable default.
2374   *
2375   * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2376   * locks, the ->lock() interface may return asynchronously, before the lock has
2377   * been granted or denied by the underlying filesystem, if (and only if)
2378   * lm_grant is set. Callers expecting ->lock() to return asynchronously
2379   * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
2380   * the request is for a blocking lock. When ->lock() does return asynchronously,
2381   * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock
2382   * request completes.
2383   * If the request is for non-blocking lock the file system should return
2384   * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2385   * with the result. If the request timed out the callback routine will return a
2386   * nonzero return code and the file system should release the lock. The file
2387   * system is also responsible to keep a corresponding posix lock when it
2388   * grants a lock so the VFS can find out which locks are locally held and do
2389   * the correct lock cleanup when required.
2390   * The underlying filesystem must not drop the kernel lock or call
2391   * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2392   * return code.
2393   */
2394  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2395  {
2396  	if (filp->f_op->lock)
2397  		return filp->f_op->lock(filp, cmd, fl);
2398  	else
2399  		return posix_lock_file(filp, fl, conf);
2400  }
2401  EXPORT_SYMBOL_GPL(vfs_lock_file);
2402  
2403  static int do_lock_file_wait(struct file *filp, unsigned int cmd,
2404  			     struct file_lock *fl)
2405  {
2406  	int error;
2407  
2408  	error = security_file_lock(filp, fl->fl_type);
2409  	if (error)
2410  		return error;
2411  
2412  	for (;;) {
2413  		error = vfs_lock_file(filp, cmd, fl, NULL);
2414  		if (error != FILE_LOCK_DEFERRED)
2415  			break;
2416  		error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker);
2417  		if (error)
2418  			break;
2419  	}
2420  	locks_delete_block(fl);
2421  
2422  	return error;
2423  }
2424  
2425  /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
2426  static int
2427  check_fmode_for_setlk(struct file_lock *fl)
2428  {
2429  	switch (fl->fl_type) {
2430  	case F_RDLCK:
2431  		if (!(fl->fl_file->f_mode & FMODE_READ))
2432  			return -EBADF;
2433  		break;
2434  	case F_WRLCK:
2435  		if (!(fl->fl_file->f_mode & FMODE_WRITE))
2436  			return -EBADF;
2437  	}
2438  	return 0;
2439  }
2440  
2441  /* Apply the lock described by l to an open file descriptor.
2442   * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2443   */
2444  int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
2445  		struct flock *flock)
2446  {
2447  	struct file_lock *file_lock = locks_alloc_lock();
2448  	struct inode *inode = locks_inode(filp);
2449  	struct file *f;
2450  	int error;
2451  
2452  	if (file_lock == NULL)
2453  		return -ENOLCK;
2454  
2455  	/* Don't allow mandatory locks on files that may be memory mapped
2456  	 * and shared.
2457  	 */
2458  	if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
2459  		error = -EAGAIN;
2460  		goto out;
2461  	}
2462  
2463  	error = flock_to_posix_lock(filp, file_lock, flock);
2464  	if (error)
2465  		goto out;
2466  
2467  	error = check_fmode_for_setlk(file_lock);
2468  	if (error)
2469  		goto out;
2470  
2471  	/*
2472  	 * If the cmd is requesting file-private locks, then set the
2473  	 * FL_OFDLCK flag and override the owner.
2474  	 */
2475  	switch (cmd) {
2476  	case F_OFD_SETLK:
2477  		error = -EINVAL;
2478  		if (flock->l_pid != 0)
2479  			goto out;
2480  
2481  		cmd = F_SETLK;
2482  		file_lock->fl_flags |= FL_OFDLCK;
2483  		file_lock->fl_owner = filp;
2484  		break;
2485  	case F_OFD_SETLKW:
2486  		error = -EINVAL;
2487  		if (flock->l_pid != 0)
2488  			goto out;
2489  
2490  		cmd = F_SETLKW;
2491  		file_lock->fl_flags |= FL_OFDLCK;
2492  		file_lock->fl_owner = filp;
2493  		/* Fallthrough */
2494  	case F_SETLKW:
2495  		file_lock->fl_flags |= FL_SLEEP;
2496  	}
2497  
2498  	error = do_lock_file_wait(filp, cmd, file_lock);
2499  
2500  	/*
2501  	 * Attempt to detect a close/fcntl race and recover by releasing the
2502  	 * lock that was just acquired. There is no need to do that when we're
2503  	 * unlocking though, or for OFD locks.
2504  	 */
2505  	if (!error && file_lock->fl_type != F_UNLCK &&
2506  	    !(file_lock->fl_flags & FL_OFDLCK)) {
2507  		/*
2508  		 * We need that spin_lock here - it prevents reordering between
2509  		 * update of i_flctx->flc_posix and check for it done in
2510  		 * close(). rcu_read_lock() wouldn't do.
2511  		 */
2512  		spin_lock(&current->files->file_lock);
2513  		f = fcheck(fd);
2514  		spin_unlock(&current->files->file_lock);
2515  		if (f != filp) {
2516  			file_lock->fl_type = F_UNLCK;
2517  			error = do_lock_file_wait(filp, cmd, file_lock);
2518  			WARN_ON_ONCE(error);
2519  			error = -EBADF;
2520  		}
2521  	}
2522  out:
2523  	trace_fcntl_setlk(inode, file_lock, error);
2524  	locks_free_lock(file_lock);
2525  	return error;
2526  }
2527  
2528  #if BITS_PER_LONG == 32
2529  /* Report the first existing lock that would conflict with l.
2530   * This implements the F_GETLK command of fcntl().
2531   */
2532  int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
2533  {
2534  	struct file_lock *fl;
2535  	int error;
2536  
2537  	fl = locks_alloc_lock();
2538  	if (fl == NULL)
2539  		return -ENOMEM;
2540  
2541  	error = -EINVAL;
2542  	if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2543  		goto out;
2544  
2545  	error = flock64_to_posix_lock(filp, fl, flock);
2546  	if (error)
2547  		goto out;
2548  
2549  	if (cmd == F_OFD_GETLK) {
2550  		error = -EINVAL;
2551  		if (flock->l_pid != 0)
2552  			goto out;
2553  
2554  		cmd = F_GETLK64;
2555  		fl->fl_flags |= FL_OFDLCK;
2556  		fl->fl_owner = filp;
2557  	}
2558  
2559  	error = vfs_test_lock(filp, fl);
2560  	if (error)
2561  		goto out;
2562  
2563  	flock->l_type = fl->fl_type;
2564  	if (fl->fl_type != F_UNLCK)
2565  		posix_lock_to_flock64(flock, fl);
2566  
2567  out:
2568  	locks_free_lock(fl);
2569  	return error;
2570  }
2571  
2572  /* Apply the lock described by l to an open file descriptor.
2573   * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2574   */
2575  int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
2576  		struct flock64 *flock)
2577  {
2578  	struct file_lock *file_lock = locks_alloc_lock();
2579  	struct inode *inode = locks_inode(filp);
2580  	struct file *f;
2581  	int error;
2582  
2583  	if (file_lock == NULL)
2584  		return -ENOLCK;
2585  
2586  	/* Don't allow mandatory locks on files that may be memory mapped
2587  	 * and shared.
2588  	 */
2589  	if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
2590  		error = -EAGAIN;
2591  		goto out;
2592  	}
2593  
2594  	error = flock64_to_posix_lock(filp, file_lock, flock);
2595  	if (error)
2596  		goto out;
2597  
2598  	error = check_fmode_for_setlk(file_lock);
2599  	if (error)
2600  		goto out;
2601  
2602  	/*
2603  	 * If the cmd is requesting file-private locks, then set the
2604  	 * FL_OFDLCK flag and override the owner.
2605  	 */
2606  	switch (cmd) {
2607  	case F_OFD_SETLK:
2608  		error = -EINVAL;
2609  		if (flock->l_pid != 0)
2610  			goto out;
2611  
2612  		cmd = F_SETLK64;
2613  		file_lock->fl_flags |= FL_OFDLCK;
2614  		file_lock->fl_owner = filp;
2615  		break;
2616  	case F_OFD_SETLKW:
2617  		error = -EINVAL;
2618  		if (flock->l_pid != 0)
2619  			goto out;
2620  
2621  		cmd = F_SETLKW64;
2622  		file_lock->fl_flags |= FL_OFDLCK;
2623  		file_lock->fl_owner = filp;
2624  		/* Fallthrough */
2625  	case F_SETLKW64:
2626  		file_lock->fl_flags |= FL_SLEEP;
2627  	}
2628  
2629  	error = do_lock_file_wait(filp, cmd, file_lock);
2630  
2631  	/*
2632  	 * Attempt to detect a close/fcntl race and recover by releasing the
2633  	 * lock that was just acquired. There is no need to do that when we're
2634  	 * unlocking though, or for OFD locks.
2635  	 */
2636  	if (!error && file_lock->fl_type != F_UNLCK &&
2637  	    !(file_lock->fl_flags & FL_OFDLCK)) {
2638  		/*
2639  		 * We need that spin_lock here - it prevents reordering between
2640  		 * update of i_flctx->flc_posix and check for it done in
2641  		 * close(). rcu_read_lock() wouldn't do.
2642  		 */
2643  		spin_lock(&current->files->file_lock);
2644  		f = fcheck(fd);
2645  		spin_unlock(&current->files->file_lock);
2646  		if (f != filp) {
2647  			file_lock->fl_type = F_UNLCK;
2648  			error = do_lock_file_wait(filp, cmd, file_lock);
2649  			WARN_ON_ONCE(error);
2650  			error = -EBADF;
2651  		}
2652  	}
2653  out:
2654  	locks_free_lock(file_lock);
2655  	return error;
2656  }
2657  #endif /* BITS_PER_LONG == 32 */
2658  
2659  /*
2660   * This function is called when the file is being removed
2661   * from the task's fd array.  POSIX locks belonging to this task
2662   * are deleted at this time.
2663   */
2664  void locks_remove_posix(struct file *filp, fl_owner_t owner)
2665  {
2666  	int error;
2667  	struct inode *inode = locks_inode(filp);
2668  	struct file_lock lock;
2669  	struct file_lock_context *ctx;
2670  
2671  	/*
2672  	 * If there are no locks held on this file, we don't need to call
2673  	 * posix_lock_file().  Another process could be setting a lock on this
2674  	 * file at the same time, but we wouldn't remove that lock anyway.
2675  	 */
2676  	ctx =  smp_load_acquire(&inode->i_flctx);
2677  	if (!ctx || list_empty(&ctx->flc_posix))
2678  		return;
2679  
2680  	locks_init_lock(&lock);
2681  	lock.fl_type = F_UNLCK;
2682  	lock.fl_flags = FL_POSIX | FL_CLOSE;
2683  	lock.fl_start = 0;
2684  	lock.fl_end = OFFSET_MAX;
2685  	lock.fl_owner = owner;
2686  	lock.fl_pid = current->tgid;
2687  	lock.fl_file = filp;
2688  	lock.fl_ops = NULL;
2689  	lock.fl_lmops = NULL;
2690  
2691  	error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2692  
2693  	if (lock.fl_ops && lock.fl_ops->fl_release_private)
2694  		lock.fl_ops->fl_release_private(&lock);
2695  	trace_locks_remove_posix(inode, &lock, error);
2696  }
2697  EXPORT_SYMBOL(locks_remove_posix);
2698  
2699  /* The i_flctx must be valid when calling into here */
2700  static void
2701  locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2702  {
2703  	struct file_lock fl;
2704  	struct inode *inode = locks_inode(filp);
2705  
2706  	if (list_empty(&flctx->flc_flock))
2707  		return;
2708  
2709  	flock_make_lock(filp, LOCK_UN, &fl);
2710  	fl.fl_flags |= FL_CLOSE;
2711  
2712  	if (filp->f_op->flock)
2713  		filp->f_op->flock(filp, F_SETLKW, &fl);
2714  	else
2715  		flock_lock_inode(inode, &fl);
2716  
2717  	if (fl.fl_ops && fl.fl_ops->fl_release_private)
2718  		fl.fl_ops->fl_release_private(&fl);
2719  }
2720  
2721  /* The i_flctx must be valid when calling into here */
2722  static void
2723  locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
2724  {
2725  	struct file_lock *fl, *tmp;
2726  	LIST_HEAD(dispose);
2727  
2728  	if (list_empty(&ctx->flc_lease))
2729  		return;
2730  
2731  	percpu_down_read(&file_rwsem);
2732  	spin_lock(&ctx->flc_lock);
2733  	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2734  		if (filp == fl->fl_file)
2735  			lease_modify(fl, F_UNLCK, &dispose);
2736  	spin_unlock(&ctx->flc_lock);
2737  	percpu_up_read(&file_rwsem);
2738  
2739  	locks_dispose_list(&dispose);
2740  }
2741  
2742  /*
2743   * This function is called on the last close of an open file.
2744   */
2745  void locks_remove_file(struct file *filp)
2746  {
2747  	struct file_lock_context *ctx;
2748  
2749  	ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
2750  	if (!ctx)
2751  		return;
2752  
2753  	/* remove any OFD locks */
2754  	locks_remove_posix(filp, filp);
2755  
2756  	/* remove flock locks */
2757  	locks_remove_flock(filp, ctx);
2758  
2759  	/* remove any leases */
2760  	locks_remove_lease(filp, ctx);
2761  
2762  	spin_lock(&ctx->flc_lock);
2763  	locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX");
2764  	locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK");
2765  	locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE");
2766  	spin_unlock(&ctx->flc_lock);
2767  }
2768  
2769  /**
2770   * vfs_cancel_lock - file byte range unblock lock
2771   * @filp: The file to apply the unblock to
2772   * @fl: The lock to be unblocked
2773   *
2774   * Used by lock managers to cancel blocked requests
2775   */
2776  int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2777  {
2778  	if (filp->f_op->lock)
2779  		return filp->f_op->lock(filp, F_CANCELLK, fl);
2780  	return 0;
2781  }
2782  EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2783  
2784  #ifdef CONFIG_PROC_FS
2785  #include <linux/proc_fs.h>
2786  #include <linux/seq_file.h>
2787  
2788  struct locks_iterator {
2789  	int	li_cpu;
2790  	loff_t	li_pos;
2791  };
2792  
2793  static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2794  			    loff_t id, char *pfx)
2795  {
2796  	struct inode *inode = NULL;
2797  	unsigned int fl_pid;
2798  	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
2799  
2800  	fl_pid = locks_translate_pid(fl, proc_pidns);
2801  	/*
2802  	 * If lock owner is dead (and pid is freed) or not visible in current
2803  	 * pidns, zero is shown as a pid value. Check lock info from
2804  	 * init_pid_ns to get saved lock pid value.
2805  	 */
2806  
2807  	if (fl->fl_file != NULL)
2808  		inode = locks_inode(fl->fl_file);
2809  
2810  	seq_printf(f, "%lld:%s ", id, pfx);
2811  	if (IS_POSIX(fl)) {
2812  		if (fl->fl_flags & FL_ACCESS)
2813  			seq_puts(f, "ACCESS");
2814  		else if (IS_OFDLCK(fl))
2815  			seq_puts(f, "OFDLCK");
2816  		else
2817  			seq_puts(f, "POSIX ");
2818  
2819  		seq_printf(f, " %s ",
2820  			     (inode == NULL) ? "*NOINODE*" :
2821  			     mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
2822  	} else if (IS_FLOCK(fl)) {
2823  		if (fl->fl_type & LOCK_MAND) {
2824  			seq_puts(f, "FLOCK  MSNFS     ");
2825  		} else {
2826  			seq_puts(f, "FLOCK  ADVISORY  ");
2827  		}
2828  	} else if (IS_LEASE(fl)) {
2829  		if (fl->fl_flags & FL_DELEG)
2830  			seq_puts(f, "DELEG  ");
2831  		else
2832  			seq_puts(f, "LEASE  ");
2833  
2834  		if (lease_breaking(fl))
2835  			seq_puts(f, "BREAKING  ");
2836  		else if (fl->fl_file)
2837  			seq_puts(f, "ACTIVE    ");
2838  		else
2839  			seq_puts(f, "BREAKER   ");
2840  	} else {
2841  		seq_puts(f, "UNKNOWN UNKNOWN  ");
2842  	}
2843  	if (fl->fl_type & LOCK_MAND) {
2844  		seq_printf(f, "%s ",
2845  			       (fl->fl_type & LOCK_READ)
2846  			       ? (fl->fl_type & LOCK_WRITE) ? "RW   " : "READ "
2847  			       : (fl->fl_type & LOCK_WRITE) ? "WRITE" : "NONE ");
2848  	} else {
2849  		int type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
2850  
2851  		seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
2852  				     (type == F_RDLCK) ? "READ" : "UNLCK");
2853  	}
2854  	if (inode) {
2855  		/* userspace relies on this representation of dev_t */
2856  		seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
2857  				MAJOR(inode->i_sb->s_dev),
2858  				MINOR(inode->i_sb->s_dev), inode->i_ino);
2859  	} else {
2860  		seq_printf(f, "%d <none>:0 ", fl_pid);
2861  	}
2862  	if (IS_POSIX(fl)) {
2863  		if (fl->fl_end == OFFSET_MAX)
2864  			seq_printf(f, "%Ld EOF\n", fl->fl_start);
2865  		else
2866  			seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2867  	} else {
2868  		seq_puts(f, "0 EOF\n");
2869  	}
2870  }
2871  
2872  static int locks_show(struct seq_file *f, void *v)
2873  {
2874  	struct locks_iterator *iter = f->private;
2875  	struct file_lock *fl, *bfl;
2876  	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
2877  
2878  	fl = hlist_entry(v, struct file_lock, fl_link);
2879  
2880  	if (locks_translate_pid(fl, proc_pidns) == 0)
2881  		return 0;
2882  
2883  	lock_get_status(f, fl, iter->li_pos, "");
2884  
2885  	list_for_each_entry(bfl, &fl->fl_blocked_requests, fl_blocked_member)
2886  		lock_get_status(f, bfl, iter->li_pos, " ->");
2887  
2888  	return 0;
2889  }
2890  
2891  static void __show_fd_locks(struct seq_file *f,
2892  			struct list_head *head, int *id,
2893  			struct file *filp, struct files_struct *files)
2894  {
2895  	struct file_lock *fl;
2896  
2897  	list_for_each_entry(fl, head, fl_list) {
2898  
2899  		if (filp != fl->fl_file)
2900  			continue;
2901  		if (fl->fl_owner != files &&
2902  		    fl->fl_owner != filp)
2903  			continue;
2904  
2905  		(*id)++;
2906  		seq_puts(f, "lock:\t");
2907  		lock_get_status(f, fl, *id, "");
2908  	}
2909  }
2910  
2911  void show_fd_locks(struct seq_file *f,
2912  		  struct file *filp, struct files_struct *files)
2913  {
2914  	struct inode *inode = locks_inode(filp);
2915  	struct file_lock_context *ctx;
2916  	int id = 0;
2917  
2918  	ctx = smp_load_acquire(&inode->i_flctx);
2919  	if (!ctx)
2920  		return;
2921  
2922  	spin_lock(&ctx->flc_lock);
2923  	__show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2924  	__show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2925  	__show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2926  	spin_unlock(&ctx->flc_lock);
2927  }
2928  
2929  static void *locks_start(struct seq_file *f, loff_t *pos)
2930  	__acquires(&blocked_lock_lock)
2931  {
2932  	struct locks_iterator *iter = f->private;
2933  
2934  	iter->li_pos = *pos + 1;
2935  	percpu_down_write(&file_rwsem);
2936  	spin_lock(&blocked_lock_lock);
2937  	return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
2938  }
2939  
2940  static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2941  {
2942  	struct locks_iterator *iter = f->private;
2943  
2944  	++iter->li_pos;
2945  	return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
2946  }
2947  
2948  static void locks_stop(struct seq_file *f, void *v)
2949  	__releases(&blocked_lock_lock)
2950  {
2951  	spin_unlock(&blocked_lock_lock);
2952  	percpu_up_write(&file_rwsem);
2953  }
2954  
2955  static const struct seq_operations locks_seq_operations = {
2956  	.start	= locks_start,
2957  	.next	= locks_next,
2958  	.stop	= locks_stop,
2959  	.show	= locks_show,
2960  };
2961  
2962  static int __init proc_locks_init(void)
2963  {
2964  	proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
2965  			sizeof(struct locks_iterator), NULL);
2966  	return 0;
2967  }
2968  fs_initcall(proc_locks_init);
2969  #endif
2970  
2971  static int __init filelock_init(void)
2972  {
2973  	int i;
2974  
2975  	flctx_cache = kmem_cache_create("file_lock_ctx",
2976  			sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2977  
2978  	filelock_cache = kmem_cache_create("file_lock_cache",
2979  			sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2980  
2981  	for_each_possible_cpu(i) {
2982  		struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2983  
2984  		spin_lock_init(&fll->lock);
2985  		INIT_HLIST_HEAD(&fll->hlist);
2986  	}
2987  
2988  	lease_notifier_chain_init();
2989  	return 0;
2990  }
2991  core_initcall(filelock_init);
2992