xref: /openbmc/linux/ipc/sem.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * linux/ipc/sem.c
4   * Copyright (C) 1992 Krishna Balasubramanian
5   * Copyright (C) 1995 Eric Schenk, Bruno Haible
6   *
7   * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
8   *
9   * SMP-threaded, sysctl's added
10   * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
11   * Enforced range limit on SEM_UNDO
12   * (c) 2001 Red Hat Inc
13   * Lockless wakeup
14   * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
15   * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
16   * Further wakeup optimizations, documentation
17   * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
18   *
19   * support for audit of ipc object properties and permission changes
20   * Dustin Kirkland <dustin.kirkland@us.ibm.com>
21   *
22   * namespaces support
23   * OpenVZ, SWsoft Inc.
24   * Pavel Emelianov <xemul@openvz.org>
25   *
26   * Implementation notes: (May 2010)
27   * This file implements System V semaphores.
28   *
29   * User space visible behavior:
30   * - FIFO ordering for semop() operations (just FIFO, not starvation
31   *   protection)
32   * - multiple semaphore operations that alter the same semaphore in
33   *   one semop() are handled.
34   * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
35   *   SETALL calls.
36   * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
37   * - undo adjustments at process exit are limited to 0..SEMVMX.
38   * - namespace are supported.
39   * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtime by writing
40   *   to /proc/sys/kernel/sem.
41   * - statistics about the usage are reported in /proc/sysvipc/sem.
42   *
43   * Internals:
44   * - scalability:
45   *   - all global variables are read-mostly.
46   *   - semop() calls and semctl(RMID) are synchronized by RCU.
47   *   - most operations do write operations (actually: spin_lock calls) to
48   *     the per-semaphore array structure.
49   *   Thus: Perfect SMP scaling between independent semaphore arrays.
50   *         If multiple semaphores in one array are used, then cache line
51   *         trashing on the semaphore array spinlock will limit the scaling.
52   * - semncnt and semzcnt are calculated on demand in count_semcnt()
53   * - the task that performs a successful semop() scans the list of all
54   *   sleeping tasks and completes any pending operations that can be fulfilled.
55   *   Semaphores are actively given to waiting tasks (necessary for FIFO).
56   *   (see update_queue())
57   * - To improve the scalability, the actual wake-up calls are performed after
58   *   dropping all locks. (see wake_up_sem_queue_prepare())
59   * - All work is done by the waker, the woken up task does not have to do
60   *   anything - not even acquiring a lock or dropping a refcount.
61   * - A woken up task may not even touch the semaphore array anymore, it may
62   *   have been destroyed already by a semctl(RMID).
63   * - UNDO values are stored in an array (one per process and per
64   *   semaphore array, lazily allocated). For backwards compatibility, multiple
65   *   modes for the UNDO variables are supported (per process, per thread)
66   *   (see copy_semundo, CLONE_SYSVSEM)
67   * - There are two lists of the pending operations: a per-array list
68   *   and per-semaphore list (stored in the array). This allows to achieve FIFO
69   *   ordering without always scanning all pending operations.
70   *   The worst-case behavior is nevertheless O(N^2) for N wakeups.
71   */
72  
73  #include <linux/compat.h>
74  #include <linux/slab.h>
75  #include <linux/spinlock.h>
76  #include <linux/init.h>
77  #include <linux/proc_fs.h>
78  #include <linux/time.h>
79  #include <linux/security.h>
80  #include <linux/syscalls.h>
81  #include <linux/audit.h>
82  #include <linux/capability.h>
83  #include <linux/seq_file.h>
84  #include <linux/rwsem.h>
85  #include <linux/nsproxy.h>
86  #include <linux/ipc_namespace.h>
87  #include <linux/sched/wake_q.h>
88  #include <linux/nospec.h>
89  #include <linux/rhashtable.h>
90  
91  #include <linux/uaccess.h>
92  #include "util.h"
93  
94  /* One semaphore structure for each semaphore in the system. */
95  struct sem {
96  	int	semval;		/* current value */
97  	/*
98  	 * PID of the process that last modified the semaphore. For
99  	 * Linux, specifically these are:
100  	 *  - semop
101  	 *  - semctl, via SETVAL and SETALL.
102  	 *  - at task exit when performing undo adjustments (see exit_sem).
103  	 */
104  	struct pid *sempid;
105  	spinlock_t	lock;	/* spinlock for fine-grained semtimedop */
106  	struct list_head pending_alter; /* pending single-sop operations */
107  					/* that alter the semaphore */
108  	struct list_head pending_const; /* pending single-sop operations */
109  					/* that do not alter the semaphore*/
110  	time64_t	 sem_otime;	/* candidate for sem_otime */
111  } ____cacheline_aligned_in_smp;
112  
113  /* One sem_array data structure for each set of semaphores in the system. */
114  struct sem_array {
115  	struct kern_ipc_perm	sem_perm;	/* permissions .. see ipc.h */
116  	time64_t		sem_ctime;	/* create/last semctl() time */
117  	struct list_head	pending_alter;	/* pending operations */
118  						/* that alter the array */
119  	struct list_head	pending_const;	/* pending complex operations */
120  						/* that do not alter semvals */
121  	struct list_head	list_id;	/* undo requests on this array */
122  	int			sem_nsems;	/* no. of semaphores in array */
123  	int			complex_count;	/* pending complex operations */
124  	unsigned int		use_global_lock;/* >0: global lock required */
125  
126  	struct sem		sems[];
127  } __randomize_layout;
128  
129  /* One queue for each sleeping process in the system. */
130  struct sem_queue {
131  	struct list_head	list;	 /* queue of pending operations */
132  	struct task_struct	*sleeper; /* this process */
133  	struct sem_undo		*undo;	 /* undo structure */
134  	struct pid		*pid;	 /* process id of requesting process */
135  	int			status;	 /* completion status of operation */
136  	struct sembuf		*sops;	 /* array of pending operations */
137  	struct sembuf		*blocking; /* the operation that blocked */
138  	int			nsops;	 /* number of operations */
139  	bool			alter;	 /* does *sops alter the array? */
140  	bool                    dupsop;	 /* sops on more than one sem_num */
141  };
142  
143  /* Each task has a list of undo requests. They are executed automatically
144   * when the process exits.
145   */
146  struct sem_undo {
147  	struct list_head	list_proc;	/* per-process list: *
148  						 * all undos from one process
149  						 * rcu protected */
150  	struct rcu_head		rcu;		/* rcu struct for sem_undo */
151  	struct sem_undo_list	*ulp;		/* back ptr to sem_undo_list */
152  	struct list_head	list_id;	/* per semaphore array list:
153  						 * all undos for one array */
154  	int			semid;		/* semaphore set identifier */
155  	short			semadj[];	/* array of adjustments */
156  						/* one per semaphore */
157  };
158  
159  /* sem_undo_list controls shared access to the list of sem_undo structures
160   * that may be shared among all a CLONE_SYSVSEM task group.
161   */
162  struct sem_undo_list {
163  	refcount_t		refcnt;
164  	spinlock_t		lock;
165  	struct list_head	list_proc;
166  };
167  
168  
169  #define sem_ids(ns)	((ns)->ids[IPC_SEM_IDS])
170  
171  static int newary(struct ipc_namespace *, struct ipc_params *);
172  static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
173  #ifdef CONFIG_PROC_FS
174  static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
175  #endif
176  
177  #define SEMMSL_FAST	256 /* 512 bytes on stack */
178  #define SEMOPM_FAST	64  /* ~ 372 bytes on stack */
179  
180  /*
181   * Switching from the mode suitable for simple ops
182   * to the mode for complex ops is costly. Therefore:
183   * use some hysteresis
184   */
185  #define USE_GLOBAL_LOCK_HYSTERESIS	10
186  
187  /*
188   * Locking:
189   * a) global sem_lock() for read/write
190   *	sem_undo.id_next,
191   *	sem_array.complex_count,
192   *	sem_array.pending{_alter,_const},
193   *	sem_array.sem_undo
194   *
195   * b) global or semaphore sem_lock() for read/write:
196   *	sem_array.sems[i].pending_{const,alter}:
197   *
198   * c) special:
199   *	sem_undo_list.list_proc:
200   *	* undo_list->lock for write
201   *	* rcu for read
202   *	use_global_lock:
203   *	* global sem_lock() for write
204   *	* either local or global sem_lock() for read.
205   *
206   * Memory ordering:
207   * Most ordering is enforced by using spin_lock() and spin_unlock().
208   *
209   * Exceptions:
210   * 1) use_global_lock: (SEM_BARRIER_1)
211   * Setting it from non-zero to 0 is a RELEASE, this is ensured by
212   * using smp_store_release(): Immediately after setting it to 0,
213   * a simple op can start.
214   * Testing if it is non-zero is an ACQUIRE, this is ensured by using
215   * smp_load_acquire().
216   * Setting it from 0 to non-zero must be ordered with regards to
217   * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
218   * is inside a spin_lock() and after a write from 0 to non-zero a
219   * spin_lock()+spin_unlock() is done.
220   * To prevent the compiler/cpu temporarily writing 0 to use_global_lock,
221   * READ_ONCE()/WRITE_ONCE() is used.
222   *
223   * 2) queue.status: (SEM_BARRIER_2)
224   * Initialization is done while holding sem_lock(), so no further barrier is
225   * required.
226   * Setting it to a result code is a RELEASE, this is ensured by both a
227   * smp_store_release() (for case a) and while holding sem_lock()
228   * (for case b).
229   * The ACQUIRE when reading the result code without holding sem_lock() is
230   * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
231   * (case a above).
232   * Reading the result code while holding sem_lock() needs no further barriers,
233   * the locks inside sem_lock() enforce ordering (case b above)
234   *
235   * 3) current->state:
236   * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock().
237   * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may
238   * happen immediately after calling wake_q_add. As wake_q_add_safe() is called
239   * when holding sem_lock(), no further barriers are required.
240   *
241   * See also ipc/mqueue.c for more details on the covered races.
242   */
243  
244  #define sc_semmsl	sem_ctls[0]
245  #define sc_semmns	sem_ctls[1]
246  #define sc_semopm	sem_ctls[2]
247  #define sc_semmni	sem_ctls[3]
248  
sem_init_ns(struct ipc_namespace * ns)249  void sem_init_ns(struct ipc_namespace *ns)
250  {
251  	ns->sc_semmsl = SEMMSL;
252  	ns->sc_semmns = SEMMNS;
253  	ns->sc_semopm = SEMOPM;
254  	ns->sc_semmni = SEMMNI;
255  	ns->used_sems = 0;
256  	ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
257  }
258  
259  #ifdef CONFIG_IPC_NS
sem_exit_ns(struct ipc_namespace * ns)260  void sem_exit_ns(struct ipc_namespace *ns)
261  {
262  	free_ipcs(ns, &sem_ids(ns), freeary);
263  	idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
264  	rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
265  }
266  #endif
267  
sem_init(void)268  void __init sem_init(void)
269  {
270  	sem_init_ns(&init_ipc_ns);
271  	ipc_init_proc_interface("sysvipc/sem",
272  				"       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
273  				IPC_SEM_IDS, sysvipc_sem_proc_show);
274  }
275  
276  /**
277   * unmerge_queues - unmerge queues, if possible.
278   * @sma: semaphore array
279   *
280   * The function unmerges the wait queues if complex_count is 0.
281   * It must be called prior to dropping the global semaphore array lock.
282   */
unmerge_queues(struct sem_array * sma)283  static void unmerge_queues(struct sem_array *sma)
284  {
285  	struct sem_queue *q, *tq;
286  
287  	/* complex operations still around? */
288  	if (sma->complex_count)
289  		return;
290  	/*
291  	 * We will switch back to simple mode.
292  	 * Move all pending operation back into the per-semaphore
293  	 * queues.
294  	 */
295  	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
296  		struct sem *curr;
297  		curr = &sma->sems[q->sops[0].sem_num];
298  
299  		list_add_tail(&q->list, &curr->pending_alter);
300  	}
301  	INIT_LIST_HEAD(&sma->pending_alter);
302  }
303  
304  /**
305   * merge_queues - merge single semop queues into global queue
306   * @sma: semaphore array
307   *
308   * This function merges all per-semaphore queues into the global queue.
309   * It is necessary to achieve FIFO ordering for the pending single-sop
310   * operations when a multi-semop operation must sleep.
311   * Only the alter operations must be moved, the const operations can stay.
312   */
merge_queues(struct sem_array * sma)313  static void merge_queues(struct sem_array *sma)
314  {
315  	int i;
316  	for (i = 0; i < sma->sem_nsems; i++) {
317  		struct sem *sem = &sma->sems[i];
318  
319  		list_splice_init(&sem->pending_alter, &sma->pending_alter);
320  	}
321  }
322  
sem_rcu_free(struct rcu_head * head)323  static void sem_rcu_free(struct rcu_head *head)
324  {
325  	struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
326  	struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
327  
328  	security_sem_free(&sma->sem_perm);
329  	kvfree(sma);
330  }
331  
332  /*
333   * Enter the mode suitable for non-simple operations:
334   * Caller must own sem_perm.lock.
335   */
complexmode_enter(struct sem_array * sma)336  static void complexmode_enter(struct sem_array *sma)
337  {
338  	int i;
339  	struct sem *sem;
340  
341  	if (sma->use_global_lock > 0)  {
342  		/*
343  		 * We are already in global lock mode.
344  		 * Nothing to do, just reset the
345  		 * counter until we return to simple mode.
346  		 */
347  		WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS);
348  		return;
349  	}
350  	WRITE_ONCE(sma->use_global_lock, USE_GLOBAL_LOCK_HYSTERESIS);
351  
352  	for (i = 0; i < sma->sem_nsems; i++) {
353  		sem = &sma->sems[i];
354  		spin_lock(&sem->lock);
355  		spin_unlock(&sem->lock);
356  	}
357  }
358  
359  /*
360   * Try to leave the mode that disallows simple operations:
361   * Caller must own sem_perm.lock.
362   */
complexmode_tryleave(struct sem_array * sma)363  static void complexmode_tryleave(struct sem_array *sma)
364  {
365  	if (sma->complex_count)  {
366  		/* Complex ops are sleeping.
367  		 * We must stay in complex mode
368  		 */
369  		return;
370  	}
371  	if (sma->use_global_lock == 1) {
372  
373  		/* See SEM_BARRIER_1 for purpose/pairing */
374  		smp_store_release(&sma->use_global_lock, 0);
375  	} else {
376  		WRITE_ONCE(sma->use_global_lock,
377  				sma->use_global_lock-1);
378  	}
379  }
380  
381  #define SEM_GLOBAL_LOCK	(-1)
382  /*
383   * If the request contains only one semaphore operation, and there are
384   * no complex transactions pending, lock only the semaphore involved.
385   * Otherwise, lock the entire semaphore array, since we either have
386   * multiple semaphores in our own semops, or we need to look at
387   * semaphores from other pending complex operations.
388   */
sem_lock(struct sem_array * sma,struct sembuf * sops,int nsops)389  static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
390  			      int nsops)
391  {
392  	struct sem *sem;
393  	int idx;
394  
395  	if (nsops != 1) {
396  		/* Complex operation - acquire a full lock */
397  		ipc_lock_object(&sma->sem_perm);
398  
399  		/* Prevent parallel simple ops */
400  		complexmode_enter(sma);
401  		return SEM_GLOBAL_LOCK;
402  	}
403  
404  	/*
405  	 * Only one semaphore affected - try to optimize locking.
406  	 * Optimized locking is possible if no complex operation
407  	 * is either enqueued or processed right now.
408  	 *
409  	 * Both facts are tracked by use_global_mode.
410  	 */
411  	idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
412  	sem = &sma->sems[idx];
413  
414  	/*
415  	 * Initial check for use_global_lock. Just an optimization,
416  	 * no locking, no memory barrier.
417  	 */
418  	if (!READ_ONCE(sma->use_global_lock)) {
419  		/*
420  		 * It appears that no complex operation is around.
421  		 * Acquire the per-semaphore lock.
422  		 */
423  		spin_lock(&sem->lock);
424  
425  		/* see SEM_BARRIER_1 for purpose/pairing */
426  		if (!smp_load_acquire(&sma->use_global_lock)) {
427  			/* fast path successful! */
428  			return sops->sem_num;
429  		}
430  		spin_unlock(&sem->lock);
431  	}
432  
433  	/* slow path: acquire the full lock */
434  	ipc_lock_object(&sma->sem_perm);
435  
436  	if (sma->use_global_lock == 0) {
437  		/*
438  		 * The use_global_lock mode ended while we waited for
439  		 * sma->sem_perm.lock. Thus we must switch to locking
440  		 * with sem->lock.
441  		 * Unlike in the fast path, there is no need to recheck
442  		 * sma->use_global_lock after we have acquired sem->lock:
443  		 * We own sma->sem_perm.lock, thus use_global_lock cannot
444  		 * change.
445  		 */
446  		spin_lock(&sem->lock);
447  
448  		ipc_unlock_object(&sma->sem_perm);
449  		return sops->sem_num;
450  	} else {
451  		/*
452  		 * Not a false alarm, thus continue to use the global lock
453  		 * mode. No need for complexmode_enter(), this was done by
454  		 * the caller that has set use_global_mode to non-zero.
455  		 */
456  		return SEM_GLOBAL_LOCK;
457  	}
458  }
459  
sem_unlock(struct sem_array * sma,int locknum)460  static inline void sem_unlock(struct sem_array *sma, int locknum)
461  {
462  	if (locknum == SEM_GLOBAL_LOCK) {
463  		unmerge_queues(sma);
464  		complexmode_tryleave(sma);
465  		ipc_unlock_object(&sma->sem_perm);
466  	} else {
467  		struct sem *sem = &sma->sems[locknum];
468  		spin_unlock(&sem->lock);
469  	}
470  }
471  
472  /*
473   * sem_lock_(check_) routines are called in the paths where the rwsem
474   * is not held.
475   *
476   * The caller holds the RCU read lock.
477   */
sem_obtain_object(struct ipc_namespace * ns,int id)478  static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
479  {
480  	struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
481  
482  	if (IS_ERR(ipcp))
483  		return ERR_CAST(ipcp);
484  
485  	return container_of(ipcp, struct sem_array, sem_perm);
486  }
487  
sem_obtain_object_check(struct ipc_namespace * ns,int id)488  static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
489  							int id)
490  {
491  	struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
492  
493  	if (IS_ERR(ipcp))
494  		return ERR_CAST(ipcp);
495  
496  	return container_of(ipcp, struct sem_array, sem_perm);
497  }
498  
sem_lock_and_putref(struct sem_array * sma)499  static inline void sem_lock_and_putref(struct sem_array *sma)
500  {
501  	sem_lock(sma, NULL, -1);
502  	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
503  }
504  
sem_rmid(struct ipc_namespace * ns,struct sem_array * s)505  static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
506  {
507  	ipc_rmid(&sem_ids(ns), &s->sem_perm);
508  }
509  
sem_alloc(size_t nsems)510  static struct sem_array *sem_alloc(size_t nsems)
511  {
512  	struct sem_array *sma;
513  
514  	if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
515  		return NULL;
516  
517  	sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT);
518  	if (unlikely(!sma))
519  		return NULL;
520  
521  	return sma;
522  }
523  
524  /**
525   * newary - Create a new semaphore set
526   * @ns: namespace
527   * @params: ptr to the structure that contains key, semflg and nsems
528   *
529   * Called with sem_ids.rwsem held (as a writer)
530   */
newary(struct ipc_namespace * ns,struct ipc_params * params)531  static int newary(struct ipc_namespace *ns, struct ipc_params *params)
532  {
533  	int retval;
534  	struct sem_array *sma;
535  	key_t key = params->key;
536  	int nsems = params->u.nsems;
537  	int semflg = params->flg;
538  	int i;
539  
540  	if (!nsems)
541  		return -EINVAL;
542  	if (ns->used_sems + nsems > ns->sc_semmns)
543  		return -ENOSPC;
544  
545  	sma = sem_alloc(nsems);
546  	if (!sma)
547  		return -ENOMEM;
548  
549  	sma->sem_perm.mode = (semflg & S_IRWXUGO);
550  	sma->sem_perm.key = key;
551  
552  	sma->sem_perm.security = NULL;
553  	retval = security_sem_alloc(&sma->sem_perm);
554  	if (retval) {
555  		kvfree(sma);
556  		return retval;
557  	}
558  
559  	for (i = 0; i < nsems; i++) {
560  		INIT_LIST_HEAD(&sma->sems[i].pending_alter);
561  		INIT_LIST_HEAD(&sma->sems[i].pending_const);
562  		spin_lock_init(&sma->sems[i].lock);
563  	}
564  
565  	sma->complex_count = 0;
566  	sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
567  	INIT_LIST_HEAD(&sma->pending_alter);
568  	INIT_LIST_HEAD(&sma->pending_const);
569  	INIT_LIST_HEAD(&sma->list_id);
570  	sma->sem_nsems = nsems;
571  	sma->sem_ctime = ktime_get_real_seconds();
572  
573  	/* ipc_addid() locks sma upon success. */
574  	retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
575  	if (retval < 0) {
576  		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
577  		return retval;
578  	}
579  	ns->used_sems += nsems;
580  
581  	sem_unlock(sma, -1);
582  	rcu_read_unlock();
583  
584  	return sma->sem_perm.id;
585  }
586  
587  
588  /*
589   * Called with sem_ids.rwsem and ipcp locked.
590   */
sem_more_checks(struct kern_ipc_perm * ipcp,struct ipc_params * params)591  static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params)
592  {
593  	struct sem_array *sma;
594  
595  	sma = container_of(ipcp, struct sem_array, sem_perm);
596  	if (params->u.nsems > sma->sem_nsems)
597  		return -EINVAL;
598  
599  	return 0;
600  }
601  
ksys_semget(key_t key,int nsems,int semflg)602  long ksys_semget(key_t key, int nsems, int semflg)
603  {
604  	struct ipc_namespace *ns;
605  	static const struct ipc_ops sem_ops = {
606  		.getnew = newary,
607  		.associate = security_sem_associate,
608  		.more_checks = sem_more_checks,
609  	};
610  	struct ipc_params sem_params;
611  
612  	ns = current->nsproxy->ipc_ns;
613  
614  	if (nsems < 0 || nsems > ns->sc_semmsl)
615  		return -EINVAL;
616  
617  	sem_params.key = key;
618  	sem_params.flg = semflg;
619  	sem_params.u.nsems = nsems;
620  
621  	return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
622  }
623  
SYSCALL_DEFINE3(semget,key_t,key,int,nsems,int,semflg)624  SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
625  {
626  	return ksys_semget(key, nsems, semflg);
627  }
628  
629  /**
630   * perform_atomic_semop[_slow] - Attempt to perform semaphore
631   *                               operations on a given array.
632   * @sma: semaphore array
633   * @q: struct sem_queue that describes the operation
634   *
635   * Caller blocking are as follows, based the value
636   * indicated by the semaphore operation (sem_op):
637   *
638   *  (1) >0 never blocks.
639   *  (2)  0 (wait-for-zero operation): semval is non-zero.
640   *  (3) <0 attempting to decrement semval to a value smaller than zero.
641   *
642   * Returns 0 if the operation was possible.
643   * Returns 1 if the operation is impossible, the caller must sleep.
644   * Returns <0 for error codes.
645   */
perform_atomic_semop_slow(struct sem_array * sma,struct sem_queue * q)646  static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
647  {
648  	int result, sem_op, nsops;
649  	struct pid *pid;
650  	struct sembuf *sop;
651  	struct sem *curr;
652  	struct sembuf *sops;
653  	struct sem_undo *un;
654  
655  	sops = q->sops;
656  	nsops = q->nsops;
657  	un = q->undo;
658  
659  	for (sop = sops; sop < sops + nsops; sop++) {
660  		int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
661  		curr = &sma->sems[idx];
662  		sem_op = sop->sem_op;
663  		result = curr->semval;
664  
665  		if (!sem_op && result)
666  			goto would_block;
667  
668  		result += sem_op;
669  		if (result < 0)
670  			goto would_block;
671  		if (result > SEMVMX)
672  			goto out_of_range;
673  
674  		if (sop->sem_flg & SEM_UNDO) {
675  			int undo = un->semadj[sop->sem_num] - sem_op;
676  			/* Exceeding the undo range is an error. */
677  			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
678  				goto out_of_range;
679  			un->semadj[sop->sem_num] = undo;
680  		}
681  
682  		curr->semval = result;
683  	}
684  
685  	sop--;
686  	pid = q->pid;
687  	while (sop >= sops) {
688  		ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid);
689  		sop--;
690  	}
691  
692  	return 0;
693  
694  out_of_range:
695  	result = -ERANGE;
696  	goto undo;
697  
698  would_block:
699  	q->blocking = sop;
700  
701  	if (sop->sem_flg & IPC_NOWAIT)
702  		result = -EAGAIN;
703  	else
704  		result = 1;
705  
706  undo:
707  	sop--;
708  	while (sop >= sops) {
709  		sem_op = sop->sem_op;
710  		sma->sems[sop->sem_num].semval -= sem_op;
711  		if (sop->sem_flg & SEM_UNDO)
712  			un->semadj[sop->sem_num] += sem_op;
713  		sop--;
714  	}
715  
716  	return result;
717  }
718  
perform_atomic_semop(struct sem_array * sma,struct sem_queue * q)719  static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
720  {
721  	int result, sem_op, nsops;
722  	struct sembuf *sop;
723  	struct sem *curr;
724  	struct sembuf *sops;
725  	struct sem_undo *un;
726  
727  	sops = q->sops;
728  	nsops = q->nsops;
729  	un = q->undo;
730  
731  	if (unlikely(q->dupsop))
732  		return perform_atomic_semop_slow(sma, q);
733  
734  	/*
735  	 * We scan the semaphore set twice, first to ensure that the entire
736  	 * operation can succeed, therefore avoiding any pointless writes
737  	 * to shared memory and having to undo such changes in order to block
738  	 * until the operations can go through.
739  	 */
740  	for (sop = sops; sop < sops + nsops; sop++) {
741  		int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
742  
743  		curr = &sma->sems[idx];
744  		sem_op = sop->sem_op;
745  		result = curr->semval;
746  
747  		if (!sem_op && result)
748  			goto would_block; /* wait-for-zero */
749  
750  		result += sem_op;
751  		if (result < 0)
752  			goto would_block;
753  
754  		if (result > SEMVMX)
755  			return -ERANGE;
756  
757  		if (sop->sem_flg & SEM_UNDO) {
758  			int undo = un->semadj[sop->sem_num] - sem_op;
759  
760  			/* Exceeding the undo range is an error. */
761  			if (undo < (-SEMAEM - 1) || undo > SEMAEM)
762  				return -ERANGE;
763  		}
764  	}
765  
766  	for (sop = sops; sop < sops + nsops; sop++) {
767  		curr = &sma->sems[sop->sem_num];
768  		sem_op = sop->sem_op;
769  
770  		if (sop->sem_flg & SEM_UNDO) {
771  			int undo = un->semadj[sop->sem_num] - sem_op;
772  
773  			un->semadj[sop->sem_num] = undo;
774  		}
775  		curr->semval += sem_op;
776  		ipc_update_pid(&curr->sempid, q->pid);
777  	}
778  
779  	return 0;
780  
781  would_block:
782  	q->blocking = sop;
783  	return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
784  }
785  
wake_up_sem_queue_prepare(struct sem_queue * q,int error,struct wake_q_head * wake_q)786  static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
787  					     struct wake_q_head *wake_q)
788  {
789  	struct task_struct *sleeper;
790  
791  	sleeper = get_task_struct(q->sleeper);
792  
793  	/* see SEM_BARRIER_2 for purpose/pairing */
794  	smp_store_release(&q->status, error);
795  
796  	wake_q_add_safe(wake_q, sleeper);
797  }
798  
unlink_queue(struct sem_array * sma,struct sem_queue * q)799  static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
800  {
801  	list_del(&q->list);
802  	if (q->nsops > 1)
803  		sma->complex_count--;
804  }
805  
806  /** check_restart(sma, q)
807   * @sma: semaphore array
808   * @q: the operation that just completed
809   *
810   * update_queue is O(N^2) when it restarts scanning the whole queue of
811   * waiting operations. Therefore this function checks if the restart is
812   * really necessary. It is called after a previously waiting operation
813   * modified the array.
814   * Note that wait-for-zero operations are handled without restart.
815   */
check_restart(struct sem_array * sma,struct sem_queue * q)816  static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
817  {
818  	/* pending complex alter operations are too difficult to analyse */
819  	if (!list_empty(&sma->pending_alter))
820  		return 1;
821  
822  	/* we were a sleeping complex operation. Too difficult */
823  	if (q->nsops > 1)
824  		return 1;
825  
826  	/* It is impossible that someone waits for the new value:
827  	 * - complex operations always restart.
828  	 * - wait-for-zero are handled separately.
829  	 * - q is a previously sleeping simple operation that
830  	 *   altered the array. It must be a decrement, because
831  	 *   simple increments never sleep.
832  	 * - If there are older (higher priority) decrements
833  	 *   in the queue, then they have observed the original
834  	 *   semval value and couldn't proceed. The operation
835  	 *   decremented to value - thus they won't proceed either.
836  	 */
837  	return 0;
838  }
839  
840  /**
841   * wake_const_ops - wake up non-alter tasks
842   * @sma: semaphore array.
843   * @semnum: semaphore that was modified.
844   * @wake_q: lockless wake-queue head.
845   *
846   * wake_const_ops must be called after a semaphore in a semaphore array
847   * was set to 0. If complex const operations are pending, wake_const_ops must
848   * be called with semnum = -1, as well as with the number of each modified
849   * semaphore.
850   * The tasks that must be woken up are added to @wake_q. The return code
851   * is stored in q->pid.
852   * The function returns 1 if at least one operation was completed successfully.
853   */
wake_const_ops(struct sem_array * sma,int semnum,struct wake_q_head * wake_q)854  static int wake_const_ops(struct sem_array *sma, int semnum,
855  			  struct wake_q_head *wake_q)
856  {
857  	struct sem_queue *q, *tmp;
858  	struct list_head *pending_list;
859  	int semop_completed = 0;
860  
861  	if (semnum == -1)
862  		pending_list = &sma->pending_const;
863  	else
864  		pending_list = &sma->sems[semnum].pending_const;
865  
866  	list_for_each_entry_safe(q, tmp, pending_list, list) {
867  		int error = perform_atomic_semop(sma, q);
868  
869  		if (error > 0)
870  			continue;
871  		/* operation completed, remove from queue & wakeup */
872  		unlink_queue(sma, q);
873  
874  		wake_up_sem_queue_prepare(q, error, wake_q);
875  		if (error == 0)
876  			semop_completed = 1;
877  	}
878  
879  	return semop_completed;
880  }
881  
882  /**
883   * do_smart_wakeup_zero - wakeup all wait for zero tasks
884   * @sma: semaphore array
885   * @sops: operations that were performed
886   * @nsops: number of operations
887   * @wake_q: lockless wake-queue head
888   *
889   * Checks all required queue for wait-for-zero operations, based
890   * on the actual changes that were performed on the semaphore array.
891   * The function returns 1 if at least one operation was completed successfully.
892   */
do_smart_wakeup_zero(struct sem_array * sma,struct sembuf * sops,int nsops,struct wake_q_head * wake_q)893  static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
894  				int nsops, struct wake_q_head *wake_q)
895  {
896  	int i;
897  	int semop_completed = 0;
898  	int got_zero = 0;
899  
900  	/* first: the per-semaphore queues, if known */
901  	if (sops) {
902  		for (i = 0; i < nsops; i++) {
903  			int num = sops[i].sem_num;
904  
905  			if (sma->sems[num].semval == 0) {
906  				got_zero = 1;
907  				semop_completed |= wake_const_ops(sma, num, wake_q);
908  			}
909  		}
910  	} else {
911  		/*
912  		 * No sops means modified semaphores not known.
913  		 * Assume all were changed.
914  		 */
915  		for (i = 0; i < sma->sem_nsems; i++) {
916  			if (sma->sems[i].semval == 0) {
917  				got_zero = 1;
918  				semop_completed |= wake_const_ops(sma, i, wake_q);
919  			}
920  		}
921  	}
922  	/*
923  	 * If one of the modified semaphores got 0,
924  	 * then check the global queue, too.
925  	 */
926  	if (got_zero)
927  		semop_completed |= wake_const_ops(sma, -1, wake_q);
928  
929  	return semop_completed;
930  }
931  
932  
933  /**
934   * update_queue - look for tasks that can be completed.
935   * @sma: semaphore array.
936   * @semnum: semaphore that was modified.
937   * @wake_q: lockless wake-queue head.
938   *
939   * update_queue must be called after a semaphore in a semaphore array
940   * was modified. If multiple semaphores were modified, update_queue must
941   * be called with semnum = -1, as well as with the number of each modified
942   * semaphore.
943   * The tasks that must be woken up are added to @wake_q. The return code
944   * is stored in q->pid.
945   * The function internally checks if const operations can now succeed.
946   *
947   * The function return 1 if at least one semop was completed successfully.
948   */
update_queue(struct sem_array * sma,int semnum,struct wake_q_head * wake_q)949  static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
950  {
951  	struct sem_queue *q, *tmp;
952  	struct list_head *pending_list;
953  	int semop_completed = 0;
954  
955  	if (semnum == -1)
956  		pending_list = &sma->pending_alter;
957  	else
958  		pending_list = &sma->sems[semnum].pending_alter;
959  
960  again:
961  	list_for_each_entry_safe(q, tmp, pending_list, list) {
962  		int error, restart;
963  
964  		/* If we are scanning the single sop, per-semaphore list of
965  		 * one semaphore and that semaphore is 0, then it is not
966  		 * necessary to scan further: simple increments
967  		 * that affect only one entry succeed immediately and cannot
968  		 * be in the  per semaphore pending queue, and decrements
969  		 * cannot be successful if the value is already 0.
970  		 */
971  		if (semnum != -1 && sma->sems[semnum].semval == 0)
972  			break;
973  
974  		error = perform_atomic_semop(sma, q);
975  
976  		/* Does q->sleeper still need to sleep? */
977  		if (error > 0)
978  			continue;
979  
980  		unlink_queue(sma, q);
981  
982  		if (error) {
983  			restart = 0;
984  		} else {
985  			semop_completed = 1;
986  			do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
987  			restart = check_restart(sma, q);
988  		}
989  
990  		wake_up_sem_queue_prepare(q, error, wake_q);
991  		if (restart)
992  			goto again;
993  	}
994  	return semop_completed;
995  }
996  
997  /**
998   * set_semotime - set sem_otime
999   * @sma: semaphore array
1000   * @sops: operations that modified the array, may be NULL
1001   *
1002   * sem_otime is replicated to avoid cache line trashing.
1003   * This function sets one instance to the current time.
1004   */
set_semotime(struct sem_array * sma,struct sembuf * sops)1005  static void set_semotime(struct sem_array *sma, struct sembuf *sops)
1006  {
1007  	if (sops == NULL) {
1008  		sma->sems[0].sem_otime = ktime_get_real_seconds();
1009  	} else {
1010  		sma->sems[sops[0].sem_num].sem_otime =
1011  						ktime_get_real_seconds();
1012  	}
1013  }
1014  
1015  /**
1016   * do_smart_update - optimized update_queue
1017   * @sma: semaphore array
1018   * @sops: operations that were performed
1019   * @nsops: number of operations
1020   * @otime: force setting otime
1021   * @wake_q: lockless wake-queue head
1022   *
1023   * do_smart_update() does the required calls to update_queue and wakeup_zero,
1024   * based on the actual changes that were performed on the semaphore array.
1025   * Note that the function does not do the actual wake-up: the caller is
1026   * responsible for calling wake_up_q().
1027   * It is safe to perform this call after dropping all locks.
1028   */
do_smart_update(struct sem_array * sma,struct sembuf * sops,int nsops,int otime,struct wake_q_head * wake_q)1029  static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
1030  			    int otime, struct wake_q_head *wake_q)
1031  {
1032  	int i;
1033  
1034  	otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
1035  
1036  	if (!list_empty(&sma->pending_alter)) {
1037  		/* semaphore array uses the global queue - just process it. */
1038  		otime |= update_queue(sma, -1, wake_q);
1039  	} else {
1040  		if (!sops) {
1041  			/*
1042  			 * No sops, thus the modified semaphores are not
1043  			 * known. Check all.
1044  			 */
1045  			for (i = 0; i < sma->sem_nsems; i++)
1046  				otime |= update_queue(sma, i, wake_q);
1047  		} else {
1048  			/*
1049  			 * Check the semaphores that were increased:
1050  			 * - No complex ops, thus all sleeping ops are
1051  			 *   decrease.
1052  			 * - if we decreased the value, then any sleeping
1053  			 *   semaphore ops won't be able to run: If the
1054  			 *   previous value was too small, then the new
1055  			 *   value will be too small, too.
1056  			 */
1057  			for (i = 0; i < nsops; i++) {
1058  				if (sops[i].sem_op > 0) {
1059  					otime |= update_queue(sma,
1060  							      sops[i].sem_num, wake_q);
1061  				}
1062  			}
1063  		}
1064  	}
1065  	if (otime)
1066  		set_semotime(sma, sops);
1067  }
1068  
1069  /*
1070   * check_qop: Test if a queued operation sleeps on the semaphore semnum
1071   */
check_qop(struct sem_array * sma,int semnum,struct sem_queue * q,bool count_zero)1072  static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
1073  			bool count_zero)
1074  {
1075  	struct sembuf *sop = q->blocking;
1076  
1077  	/*
1078  	 * Linux always (since 0.99.10) reported a task as sleeping on all
1079  	 * semaphores. This violates SUS, therefore it was changed to the
1080  	 * standard compliant behavior.
1081  	 * Give the administrators a chance to notice that an application
1082  	 * might misbehave because it relies on the Linux behavior.
1083  	 */
1084  	pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
1085  			"The task %s (%d) triggered the difference, watch for misbehavior.\n",
1086  			current->comm, task_pid_nr(current));
1087  
1088  	if (sop->sem_num != semnum)
1089  		return 0;
1090  
1091  	if (count_zero && sop->sem_op == 0)
1092  		return 1;
1093  	if (!count_zero && sop->sem_op < 0)
1094  		return 1;
1095  
1096  	return 0;
1097  }
1098  
1099  /* The following counts are associated to each semaphore:
1100   *   semncnt        number of tasks waiting on semval being nonzero
1101   *   semzcnt        number of tasks waiting on semval being zero
1102   *
1103   * Per definition, a task waits only on the semaphore of the first semop
1104   * that cannot proceed, even if additional operation would block, too.
1105   */
count_semcnt(struct sem_array * sma,ushort semnum,bool count_zero)1106  static int count_semcnt(struct sem_array *sma, ushort semnum,
1107  			bool count_zero)
1108  {
1109  	struct list_head *l;
1110  	struct sem_queue *q;
1111  	int semcnt;
1112  
1113  	semcnt = 0;
1114  	/* First: check the simple operations. They are easy to evaluate */
1115  	if (count_zero)
1116  		l = &sma->sems[semnum].pending_const;
1117  	else
1118  		l = &sma->sems[semnum].pending_alter;
1119  
1120  	list_for_each_entry(q, l, list) {
1121  		/* all task on a per-semaphore list sleep on exactly
1122  		 * that semaphore
1123  		 */
1124  		semcnt++;
1125  	}
1126  
1127  	/* Then: check the complex operations. */
1128  	list_for_each_entry(q, &sma->pending_alter, list) {
1129  		semcnt += check_qop(sma, semnum, q, count_zero);
1130  	}
1131  	if (count_zero) {
1132  		list_for_each_entry(q, &sma->pending_const, list) {
1133  			semcnt += check_qop(sma, semnum, q, count_zero);
1134  		}
1135  	}
1136  	return semcnt;
1137  }
1138  
1139  /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
1140   * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
1141   * remains locked on exit.
1142   */
freeary(struct ipc_namespace * ns,struct kern_ipc_perm * ipcp)1143  static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1144  {
1145  	struct sem_undo *un, *tu;
1146  	struct sem_queue *q, *tq;
1147  	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1148  	int i;
1149  	DEFINE_WAKE_Q(wake_q);
1150  
1151  	/* Free the existing undo structures for this semaphore set.  */
1152  	ipc_assert_locked_object(&sma->sem_perm);
1153  	list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
1154  		list_del(&un->list_id);
1155  		spin_lock(&un->ulp->lock);
1156  		un->semid = -1;
1157  		list_del_rcu(&un->list_proc);
1158  		spin_unlock(&un->ulp->lock);
1159  		kvfree_rcu(un, rcu);
1160  	}
1161  
1162  	/* Wake up all pending processes and let them fail with EIDRM. */
1163  	list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
1164  		unlink_queue(sma, q);
1165  		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1166  	}
1167  
1168  	list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1169  		unlink_queue(sma, q);
1170  		wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1171  	}
1172  	for (i = 0; i < sma->sem_nsems; i++) {
1173  		struct sem *sem = &sma->sems[i];
1174  		list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
1175  			unlink_queue(sma, q);
1176  			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1177  		}
1178  		list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1179  			unlink_queue(sma, q);
1180  			wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1181  		}
1182  		ipc_update_pid(&sem->sempid, NULL);
1183  	}
1184  
1185  	/* Remove the semaphore set from the IDR */
1186  	sem_rmid(ns, sma);
1187  	sem_unlock(sma, -1);
1188  	rcu_read_unlock();
1189  
1190  	wake_up_q(&wake_q);
1191  	ns->used_sems -= sma->sem_nsems;
1192  	ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1193  }
1194  
copy_semid_to_user(void __user * buf,struct semid64_ds * in,int version)1195  static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
1196  {
1197  	switch (version) {
1198  	case IPC_64:
1199  		return copy_to_user(buf, in, sizeof(*in));
1200  	case IPC_OLD:
1201  	    {
1202  		struct semid_ds out;
1203  
1204  		memset(&out, 0, sizeof(out));
1205  
1206  		ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
1207  
1208  		out.sem_otime	= in->sem_otime;
1209  		out.sem_ctime	= in->sem_ctime;
1210  		out.sem_nsems	= in->sem_nsems;
1211  
1212  		return copy_to_user(buf, &out, sizeof(out));
1213  	    }
1214  	default:
1215  		return -EINVAL;
1216  	}
1217  }
1218  
get_semotime(struct sem_array * sma)1219  static time64_t get_semotime(struct sem_array *sma)
1220  {
1221  	int i;
1222  	time64_t res;
1223  
1224  	res = sma->sems[0].sem_otime;
1225  	for (i = 1; i < sma->sem_nsems; i++) {
1226  		time64_t to = sma->sems[i].sem_otime;
1227  
1228  		if (to > res)
1229  			res = to;
1230  	}
1231  	return res;
1232  }
1233  
semctl_stat(struct ipc_namespace * ns,int semid,int cmd,struct semid64_ds * semid64)1234  static int semctl_stat(struct ipc_namespace *ns, int semid,
1235  			 int cmd, struct semid64_ds *semid64)
1236  {
1237  	struct sem_array *sma;
1238  	time64_t semotime;
1239  	int err;
1240  
1241  	memset(semid64, 0, sizeof(*semid64));
1242  
1243  	rcu_read_lock();
1244  	if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
1245  		sma = sem_obtain_object(ns, semid);
1246  		if (IS_ERR(sma)) {
1247  			err = PTR_ERR(sma);
1248  			goto out_unlock;
1249  		}
1250  	} else { /* IPC_STAT */
1251  		sma = sem_obtain_object_check(ns, semid);
1252  		if (IS_ERR(sma)) {
1253  			err = PTR_ERR(sma);
1254  			goto out_unlock;
1255  		}
1256  	}
1257  
1258  	/* see comment for SHM_STAT_ANY */
1259  	if (cmd == SEM_STAT_ANY)
1260  		audit_ipc_obj(&sma->sem_perm);
1261  	else {
1262  		err = -EACCES;
1263  		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
1264  			goto out_unlock;
1265  	}
1266  
1267  	err = security_sem_semctl(&sma->sem_perm, cmd);
1268  	if (err)
1269  		goto out_unlock;
1270  
1271  	ipc_lock_object(&sma->sem_perm);
1272  
1273  	if (!ipc_valid_object(&sma->sem_perm)) {
1274  		ipc_unlock_object(&sma->sem_perm);
1275  		err = -EIDRM;
1276  		goto out_unlock;
1277  	}
1278  
1279  	kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
1280  	semotime = get_semotime(sma);
1281  	semid64->sem_otime = semotime;
1282  	semid64->sem_ctime = sma->sem_ctime;
1283  #ifndef CONFIG_64BIT
1284  	semid64->sem_otime_high = semotime >> 32;
1285  	semid64->sem_ctime_high = sma->sem_ctime >> 32;
1286  #endif
1287  	semid64->sem_nsems = sma->sem_nsems;
1288  
1289  	if (cmd == IPC_STAT) {
1290  		/*
1291  		 * As defined in SUS:
1292  		 * Return 0 on success
1293  		 */
1294  		err = 0;
1295  	} else {
1296  		/*
1297  		 * SEM_STAT and SEM_STAT_ANY (both Linux specific)
1298  		 * Return the full id, including the sequence number
1299  		 */
1300  		err = sma->sem_perm.id;
1301  	}
1302  	ipc_unlock_object(&sma->sem_perm);
1303  out_unlock:
1304  	rcu_read_unlock();
1305  	return err;
1306  }
1307  
semctl_info(struct ipc_namespace * ns,int semid,int cmd,void __user * p)1308  static int semctl_info(struct ipc_namespace *ns, int semid,
1309  			 int cmd, void __user *p)
1310  {
1311  	struct seminfo seminfo;
1312  	int max_idx;
1313  	int err;
1314  
1315  	err = security_sem_semctl(NULL, cmd);
1316  	if (err)
1317  		return err;
1318  
1319  	memset(&seminfo, 0, sizeof(seminfo));
1320  	seminfo.semmni = ns->sc_semmni;
1321  	seminfo.semmns = ns->sc_semmns;
1322  	seminfo.semmsl = ns->sc_semmsl;
1323  	seminfo.semopm = ns->sc_semopm;
1324  	seminfo.semvmx = SEMVMX;
1325  	seminfo.semmnu = SEMMNU;
1326  	seminfo.semmap = SEMMAP;
1327  	seminfo.semume = SEMUME;
1328  	down_read(&sem_ids(ns).rwsem);
1329  	if (cmd == SEM_INFO) {
1330  		seminfo.semusz = sem_ids(ns).in_use;
1331  		seminfo.semaem = ns->used_sems;
1332  	} else {
1333  		seminfo.semusz = SEMUSZ;
1334  		seminfo.semaem = SEMAEM;
1335  	}
1336  	max_idx = ipc_get_maxidx(&sem_ids(ns));
1337  	up_read(&sem_ids(ns).rwsem);
1338  	if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
1339  		return -EFAULT;
1340  	return (max_idx < 0) ? 0 : max_idx;
1341  }
1342  
semctl_setval(struct ipc_namespace * ns,int semid,int semnum,int val)1343  static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1344  		int val)
1345  {
1346  	struct sem_undo *un;
1347  	struct sem_array *sma;
1348  	struct sem *curr;
1349  	int err;
1350  	DEFINE_WAKE_Q(wake_q);
1351  
1352  	if (val > SEMVMX || val < 0)
1353  		return -ERANGE;
1354  
1355  	rcu_read_lock();
1356  	sma = sem_obtain_object_check(ns, semid);
1357  	if (IS_ERR(sma)) {
1358  		rcu_read_unlock();
1359  		return PTR_ERR(sma);
1360  	}
1361  
1362  	if (semnum < 0 || semnum >= sma->sem_nsems) {
1363  		rcu_read_unlock();
1364  		return -EINVAL;
1365  	}
1366  
1367  
1368  	if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
1369  		rcu_read_unlock();
1370  		return -EACCES;
1371  	}
1372  
1373  	err = security_sem_semctl(&sma->sem_perm, SETVAL);
1374  	if (err) {
1375  		rcu_read_unlock();
1376  		return -EACCES;
1377  	}
1378  
1379  	sem_lock(sma, NULL, -1);
1380  
1381  	if (!ipc_valid_object(&sma->sem_perm)) {
1382  		sem_unlock(sma, -1);
1383  		rcu_read_unlock();
1384  		return -EIDRM;
1385  	}
1386  
1387  	semnum = array_index_nospec(semnum, sma->sem_nsems);
1388  	curr = &sma->sems[semnum];
1389  
1390  	ipc_assert_locked_object(&sma->sem_perm);
1391  	list_for_each_entry(un, &sma->list_id, list_id)
1392  		un->semadj[semnum] = 0;
1393  
1394  	curr->semval = val;
1395  	ipc_update_pid(&curr->sempid, task_tgid(current));
1396  	sma->sem_ctime = ktime_get_real_seconds();
1397  	/* maybe some queued-up processes were waiting for this */
1398  	do_smart_update(sma, NULL, 0, 0, &wake_q);
1399  	sem_unlock(sma, -1);
1400  	rcu_read_unlock();
1401  	wake_up_q(&wake_q);
1402  	return 0;
1403  }
1404  
semctl_main(struct ipc_namespace * ns,int semid,int semnum,int cmd,void __user * p)1405  static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1406  		int cmd, void __user *p)
1407  {
1408  	struct sem_array *sma;
1409  	struct sem *curr;
1410  	int err, nsems;
1411  	ushort fast_sem_io[SEMMSL_FAST];
1412  	ushort *sem_io = fast_sem_io;
1413  	DEFINE_WAKE_Q(wake_q);
1414  
1415  	rcu_read_lock();
1416  	sma = sem_obtain_object_check(ns, semid);
1417  	if (IS_ERR(sma)) {
1418  		rcu_read_unlock();
1419  		return PTR_ERR(sma);
1420  	}
1421  
1422  	nsems = sma->sem_nsems;
1423  
1424  	err = -EACCES;
1425  	if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
1426  		goto out_rcu_wakeup;
1427  
1428  	err = security_sem_semctl(&sma->sem_perm, cmd);
1429  	if (err)
1430  		goto out_rcu_wakeup;
1431  
1432  	switch (cmd) {
1433  	case GETALL:
1434  	{
1435  		ushort __user *array = p;
1436  		int i;
1437  
1438  		sem_lock(sma, NULL, -1);
1439  		if (!ipc_valid_object(&sma->sem_perm)) {
1440  			err = -EIDRM;
1441  			goto out_unlock;
1442  		}
1443  		if (nsems > SEMMSL_FAST) {
1444  			if (!ipc_rcu_getref(&sma->sem_perm)) {
1445  				err = -EIDRM;
1446  				goto out_unlock;
1447  			}
1448  			sem_unlock(sma, -1);
1449  			rcu_read_unlock();
1450  			sem_io = kvmalloc_array(nsems, sizeof(ushort),
1451  						GFP_KERNEL);
1452  			if (sem_io == NULL) {
1453  				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1454  				return -ENOMEM;
1455  			}
1456  
1457  			rcu_read_lock();
1458  			sem_lock_and_putref(sma);
1459  			if (!ipc_valid_object(&sma->sem_perm)) {
1460  				err = -EIDRM;
1461  				goto out_unlock;
1462  			}
1463  		}
1464  		for (i = 0; i < sma->sem_nsems; i++)
1465  			sem_io[i] = sma->sems[i].semval;
1466  		sem_unlock(sma, -1);
1467  		rcu_read_unlock();
1468  		err = 0;
1469  		if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
1470  			err = -EFAULT;
1471  		goto out_free;
1472  	}
1473  	case SETALL:
1474  	{
1475  		int i;
1476  		struct sem_undo *un;
1477  
1478  		if (!ipc_rcu_getref(&sma->sem_perm)) {
1479  			err = -EIDRM;
1480  			goto out_rcu_wakeup;
1481  		}
1482  		rcu_read_unlock();
1483  
1484  		if (nsems > SEMMSL_FAST) {
1485  			sem_io = kvmalloc_array(nsems, sizeof(ushort),
1486  						GFP_KERNEL);
1487  			if (sem_io == NULL) {
1488  				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1489  				return -ENOMEM;
1490  			}
1491  		}
1492  
1493  		if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1494  			ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1495  			err = -EFAULT;
1496  			goto out_free;
1497  		}
1498  
1499  		for (i = 0; i < nsems; i++) {
1500  			if (sem_io[i] > SEMVMX) {
1501  				ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1502  				err = -ERANGE;
1503  				goto out_free;
1504  			}
1505  		}
1506  		rcu_read_lock();
1507  		sem_lock_and_putref(sma);
1508  		if (!ipc_valid_object(&sma->sem_perm)) {
1509  			err = -EIDRM;
1510  			goto out_unlock;
1511  		}
1512  
1513  		for (i = 0; i < nsems; i++) {
1514  			sma->sems[i].semval = sem_io[i];
1515  			ipc_update_pid(&sma->sems[i].sempid, task_tgid(current));
1516  		}
1517  
1518  		ipc_assert_locked_object(&sma->sem_perm);
1519  		list_for_each_entry(un, &sma->list_id, list_id) {
1520  			for (i = 0; i < nsems; i++)
1521  				un->semadj[i] = 0;
1522  		}
1523  		sma->sem_ctime = ktime_get_real_seconds();
1524  		/* maybe some queued-up processes were waiting for this */
1525  		do_smart_update(sma, NULL, 0, 0, &wake_q);
1526  		err = 0;
1527  		goto out_unlock;
1528  	}
1529  	/* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1530  	}
1531  	err = -EINVAL;
1532  	if (semnum < 0 || semnum >= nsems)
1533  		goto out_rcu_wakeup;
1534  
1535  	sem_lock(sma, NULL, -1);
1536  	if (!ipc_valid_object(&sma->sem_perm)) {
1537  		err = -EIDRM;
1538  		goto out_unlock;
1539  	}
1540  
1541  	semnum = array_index_nospec(semnum, nsems);
1542  	curr = &sma->sems[semnum];
1543  
1544  	switch (cmd) {
1545  	case GETVAL:
1546  		err = curr->semval;
1547  		goto out_unlock;
1548  	case GETPID:
1549  		err = pid_vnr(curr->sempid);
1550  		goto out_unlock;
1551  	case GETNCNT:
1552  		err = count_semcnt(sma, semnum, 0);
1553  		goto out_unlock;
1554  	case GETZCNT:
1555  		err = count_semcnt(sma, semnum, 1);
1556  		goto out_unlock;
1557  	}
1558  
1559  out_unlock:
1560  	sem_unlock(sma, -1);
1561  out_rcu_wakeup:
1562  	rcu_read_unlock();
1563  	wake_up_q(&wake_q);
1564  out_free:
1565  	if (sem_io != fast_sem_io)
1566  		kvfree(sem_io);
1567  	return err;
1568  }
1569  
1570  static inline unsigned long
copy_semid_from_user(struct semid64_ds * out,void __user * buf,int version)1571  copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1572  {
1573  	switch (version) {
1574  	case IPC_64:
1575  		if (copy_from_user(out, buf, sizeof(*out)))
1576  			return -EFAULT;
1577  		return 0;
1578  	case IPC_OLD:
1579  	    {
1580  		struct semid_ds tbuf_old;
1581  
1582  		if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
1583  			return -EFAULT;
1584  
1585  		out->sem_perm.uid	= tbuf_old.sem_perm.uid;
1586  		out->sem_perm.gid	= tbuf_old.sem_perm.gid;
1587  		out->sem_perm.mode	= tbuf_old.sem_perm.mode;
1588  
1589  		return 0;
1590  	    }
1591  	default:
1592  		return -EINVAL;
1593  	}
1594  }
1595  
1596  /*
1597   * This function handles some semctl commands which require the rwsem
1598   * to be held in write mode.
1599   * NOTE: no locks must be held, the rwsem is taken inside this function.
1600   */
semctl_down(struct ipc_namespace * ns,int semid,int cmd,struct semid64_ds * semid64)1601  static int semctl_down(struct ipc_namespace *ns, int semid,
1602  		       int cmd, struct semid64_ds *semid64)
1603  {
1604  	struct sem_array *sma;
1605  	int err;
1606  	struct kern_ipc_perm *ipcp;
1607  
1608  	down_write(&sem_ids(ns).rwsem);
1609  	rcu_read_lock();
1610  
1611  	ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
1612  				      &semid64->sem_perm, 0);
1613  	if (IS_ERR(ipcp)) {
1614  		err = PTR_ERR(ipcp);
1615  		goto out_unlock1;
1616  	}
1617  
1618  	sma = container_of(ipcp, struct sem_array, sem_perm);
1619  
1620  	err = security_sem_semctl(&sma->sem_perm, cmd);
1621  	if (err)
1622  		goto out_unlock1;
1623  
1624  	switch (cmd) {
1625  	case IPC_RMID:
1626  		sem_lock(sma, NULL, -1);
1627  		/* freeary unlocks the ipc object and rcu */
1628  		freeary(ns, ipcp);
1629  		goto out_up;
1630  	case IPC_SET:
1631  		sem_lock(sma, NULL, -1);
1632  		err = ipc_update_perm(&semid64->sem_perm, ipcp);
1633  		if (err)
1634  			goto out_unlock0;
1635  		sma->sem_ctime = ktime_get_real_seconds();
1636  		break;
1637  	default:
1638  		err = -EINVAL;
1639  		goto out_unlock1;
1640  	}
1641  
1642  out_unlock0:
1643  	sem_unlock(sma, -1);
1644  out_unlock1:
1645  	rcu_read_unlock();
1646  out_up:
1647  	up_write(&sem_ids(ns).rwsem);
1648  	return err;
1649  }
1650  
ksys_semctl(int semid,int semnum,int cmd,unsigned long arg,int version)1651  static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version)
1652  {
1653  	struct ipc_namespace *ns;
1654  	void __user *p = (void __user *)arg;
1655  	struct semid64_ds semid64;
1656  	int err;
1657  
1658  	if (semid < 0)
1659  		return -EINVAL;
1660  
1661  	ns = current->nsproxy->ipc_ns;
1662  
1663  	switch (cmd) {
1664  	case IPC_INFO:
1665  	case SEM_INFO:
1666  		return semctl_info(ns, semid, cmd, p);
1667  	case IPC_STAT:
1668  	case SEM_STAT:
1669  	case SEM_STAT_ANY:
1670  		err = semctl_stat(ns, semid, cmd, &semid64);
1671  		if (err < 0)
1672  			return err;
1673  		if (copy_semid_to_user(p, &semid64, version))
1674  			err = -EFAULT;
1675  		return err;
1676  	case GETALL:
1677  	case GETVAL:
1678  	case GETPID:
1679  	case GETNCNT:
1680  	case GETZCNT:
1681  	case SETALL:
1682  		return semctl_main(ns, semid, semnum, cmd, p);
1683  	case SETVAL: {
1684  		int val;
1685  #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
1686  		/* big-endian 64bit */
1687  		val = arg >> 32;
1688  #else
1689  		/* 32bit or little-endian 64bit */
1690  		val = arg;
1691  #endif
1692  		return semctl_setval(ns, semid, semnum, val);
1693  	}
1694  	case IPC_SET:
1695  		if (copy_semid_from_user(&semid64, p, version))
1696  			return -EFAULT;
1697  		fallthrough;
1698  	case IPC_RMID:
1699  		return semctl_down(ns, semid, cmd, &semid64);
1700  	default:
1701  		return -EINVAL;
1702  	}
1703  }
1704  
SYSCALL_DEFINE4(semctl,int,semid,int,semnum,int,cmd,unsigned long,arg)1705  SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1706  {
1707  	return ksys_semctl(semid, semnum, cmd, arg, IPC_64);
1708  }
1709  
1710  #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
ksys_old_semctl(int semid,int semnum,int cmd,unsigned long arg)1711  long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg)
1712  {
1713  	int version = ipc_parse_version(&cmd);
1714  
1715  	return ksys_semctl(semid, semnum, cmd, arg, version);
1716  }
1717  
SYSCALL_DEFINE4(old_semctl,int,semid,int,semnum,int,cmd,unsigned long,arg)1718  SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1719  {
1720  	return ksys_old_semctl(semid, semnum, cmd, arg);
1721  }
1722  #endif
1723  
1724  #ifdef CONFIG_COMPAT
1725  
1726  struct compat_semid_ds {
1727  	struct compat_ipc_perm sem_perm;
1728  	old_time32_t sem_otime;
1729  	old_time32_t sem_ctime;
1730  	compat_uptr_t sem_base;
1731  	compat_uptr_t sem_pending;
1732  	compat_uptr_t sem_pending_last;
1733  	compat_uptr_t undo;
1734  	unsigned short sem_nsems;
1735  };
1736  
copy_compat_semid_from_user(struct semid64_ds * out,void __user * buf,int version)1737  static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf,
1738  					int version)
1739  {
1740  	memset(out, 0, sizeof(*out));
1741  	if (version == IPC_64) {
1742  		struct compat_semid64_ds __user *p = buf;
1743  		return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
1744  	} else {
1745  		struct compat_semid_ds __user *p = buf;
1746  		return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
1747  	}
1748  }
1749  
copy_compat_semid_to_user(void __user * buf,struct semid64_ds * in,int version)1750  static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
1751  					int version)
1752  {
1753  	if (version == IPC_64) {
1754  		struct compat_semid64_ds v;
1755  		memset(&v, 0, sizeof(v));
1756  		to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
1757  		v.sem_otime	 = lower_32_bits(in->sem_otime);
1758  		v.sem_otime_high = upper_32_bits(in->sem_otime);
1759  		v.sem_ctime	 = lower_32_bits(in->sem_ctime);
1760  		v.sem_ctime_high = upper_32_bits(in->sem_ctime);
1761  		v.sem_nsems = in->sem_nsems;
1762  		return copy_to_user(buf, &v, sizeof(v));
1763  	} else {
1764  		struct compat_semid_ds v;
1765  		memset(&v, 0, sizeof(v));
1766  		to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
1767  		v.sem_otime = in->sem_otime;
1768  		v.sem_ctime = in->sem_ctime;
1769  		v.sem_nsems = in->sem_nsems;
1770  		return copy_to_user(buf, &v, sizeof(v));
1771  	}
1772  }
1773  
compat_ksys_semctl(int semid,int semnum,int cmd,int arg,int version)1774  static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version)
1775  {
1776  	void __user *p = compat_ptr(arg);
1777  	struct ipc_namespace *ns;
1778  	struct semid64_ds semid64;
1779  	int err;
1780  
1781  	ns = current->nsproxy->ipc_ns;
1782  
1783  	if (semid < 0)
1784  		return -EINVAL;
1785  
1786  	switch (cmd & (~IPC_64)) {
1787  	case IPC_INFO:
1788  	case SEM_INFO:
1789  		return semctl_info(ns, semid, cmd, p);
1790  	case IPC_STAT:
1791  	case SEM_STAT:
1792  	case SEM_STAT_ANY:
1793  		err = semctl_stat(ns, semid, cmd, &semid64);
1794  		if (err < 0)
1795  			return err;
1796  		if (copy_compat_semid_to_user(p, &semid64, version))
1797  			err = -EFAULT;
1798  		return err;
1799  	case GETVAL:
1800  	case GETPID:
1801  	case GETNCNT:
1802  	case GETZCNT:
1803  	case GETALL:
1804  	case SETALL:
1805  		return semctl_main(ns, semid, semnum, cmd, p);
1806  	case SETVAL:
1807  		return semctl_setval(ns, semid, semnum, arg);
1808  	case IPC_SET:
1809  		if (copy_compat_semid_from_user(&semid64, p, version))
1810  			return -EFAULT;
1811  		fallthrough;
1812  	case IPC_RMID:
1813  		return semctl_down(ns, semid, cmd, &semid64);
1814  	default:
1815  		return -EINVAL;
1816  	}
1817  }
1818  
COMPAT_SYSCALL_DEFINE4(semctl,int,semid,int,semnum,int,cmd,int,arg)1819  COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
1820  {
1821  	return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64);
1822  }
1823  
1824  #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
compat_ksys_old_semctl(int semid,int semnum,int cmd,int arg)1825  long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg)
1826  {
1827  	int version = compat_ipc_parse_version(&cmd);
1828  
1829  	return compat_ksys_semctl(semid, semnum, cmd, arg, version);
1830  }
1831  
COMPAT_SYSCALL_DEFINE4(old_semctl,int,semid,int,semnum,int,cmd,int,arg)1832  COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg)
1833  {
1834  	return compat_ksys_old_semctl(semid, semnum, cmd, arg);
1835  }
1836  #endif
1837  #endif
1838  
1839  /* If the task doesn't already have a undo_list, then allocate one
1840   * here.  We guarantee there is only one thread using this undo list,
1841   * and current is THE ONE
1842   *
1843   * If this allocation and assignment succeeds, but later
1844   * portions of this code fail, there is no need to free the sem_undo_list.
1845   * Just let it stay associated with the task, and it'll be freed later
1846   * at exit time.
1847   *
1848   * This can block, so callers must hold no locks.
1849   */
get_undo_list(struct sem_undo_list ** undo_listp)1850  static inline int get_undo_list(struct sem_undo_list **undo_listp)
1851  {
1852  	struct sem_undo_list *undo_list;
1853  
1854  	undo_list = current->sysvsem.undo_list;
1855  	if (!undo_list) {
1856  		undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT);
1857  		if (undo_list == NULL)
1858  			return -ENOMEM;
1859  		spin_lock_init(&undo_list->lock);
1860  		refcount_set(&undo_list->refcnt, 1);
1861  		INIT_LIST_HEAD(&undo_list->list_proc);
1862  
1863  		current->sysvsem.undo_list = undo_list;
1864  	}
1865  	*undo_listp = undo_list;
1866  	return 0;
1867  }
1868  
__lookup_undo(struct sem_undo_list * ulp,int semid)1869  static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1870  {
1871  	struct sem_undo *un;
1872  
1873  	list_for_each_entry_rcu(un, &ulp->list_proc, list_proc,
1874  				spin_is_locked(&ulp->lock)) {
1875  		if (un->semid == semid)
1876  			return un;
1877  	}
1878  	return NULL;
1879  }
1880  
lookup_undo(struct sem_undo_list * ulp,int semid)1881  static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1882  {
1883  	struct sem_undo *un;
1884  
1885  	assert_spin_locked(&ulp->lock);
1886  
1887  	un = __lookup_undo(ulp, semid);
1888  	if (un) {
1889  		list_del_rcu(&un->list_proc);
1890  		list_add_rcu(&un->list_proc, &ulp->list_proc);
1891  	}
1892  	return un;
1893  }
1894  
1895  /**
1896   * find_alloc_undo - lookup (and if not present create) undo array
1897   * @ns: namespace
1898   * @semid: semaphore array id
1899   *
1900   * The function looks up (and if not present creates) the undo structure.
1901   * The size of the undo structure depends on the size of the semaphore
1902   * array, thus the alloc path is not that straightforward.
1903   * Lifetime-rules: sem_undo is rcu-protected, on success, the function
1904   * performs a rcu_read_lock().
1905   */
find_alloc_undo(struct ipc_namespace * ns,int semid)1906  static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1907  {
1908  	struct sem_array *sma;
1909  	struct sem_undo_list *ulp;
1910  	struct sem_undo *un, *new;
1911  	int nsems, error;
1912  
1913  	error = get_undo_list(&ulp);
1914  	if (error)
1915  		return ERR_PTR(error);
1916  
1917  	rcu_read_lock();
1918  	spin_lock(&ulp->lock);
1919  	un = lookup_undo(ulp, semid);
1920  	spin_unlock(&ulp->lock);
1921  	if (likely(un != NULL))
1922  		goto out;
1923  
1924  	/* no undo structure around - allocate one. */
1925  	/* step 1: figure out the size of the semaphore array */
1926  	sma = sem_obtain_object_check(ns, semid);
1927  	if (IS_ERR(sma)) {
1928  		rcu_read_unlock();
1929  		return ERR_CAST(sma);
1930  	}
1931  
1932  	nsems = sma->sem_nsems;
1933  	if (!ipc_rcu_getref(&sma->sem_perm)) {
1934  		rcu_read_unlock();
1935  		un = ERR_PTR(-EIDRM);
1936  		goto out;
1937  	}
1938  	rcu_read_unlock();
1939  
1940  	/* step 2: allocate new undo structure */
1941  	new = kvzalloc(struct_size(new, semadj, nsems), GFP_KERNEL_ACCOUNT);
1942  	if (!new) {
1943  		ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1944  		return ERR_PTR(-ENOMEM);
1945  	}
1946  
1947  	/* step 3: Acquire the lock on semaphore array */
1948  	rcu_read_lock();
1949  	sem_lock_and_putref(sma);
1950  	if (!ipc_valid_object(&sma->sem_perm)) {
1951  		sem_unlock(sma, -1);
1952  		rcu_read_unlock();
1953  		kvfree(new);
1954  		un = ERR_PTR(-EIDRM);
1955  		goto out;
1956  	}
1957  	spin_lock(&ulp->lock);
1958  
1959  	/*
1960  	 * step 4: check for races: did someone else allocate the undo struct?
1961  	 */
1962  	un = lookup_undo(ulp, semid);
1963  	if (un) {
1964  		spin_unlock(&ulp->lock);
1965  		kvfree(new);
1966  		goto success;
1967  	}
1968  	/* step 5: initialize & link new undo structure */
1969  	new->ulp = ulp;
1970  	new->semid = semid;
1971  	assert_spin_locked(&ulp->lock);
1972  	list_add_rcu(&new->list_proc, &ulp->list_proc);
1973  	ipc_assert_locked_object(&sma->sem_perm);
1974  	list_add(&new->list_id, &sma->list_id);
1975  	un = new;
1976  	spin_unlock(&ulp->lock);
1977  success:
1978  	sem_unlock(sma, -1);
1979  out:
1980  	return un;
1981  }
1982  
__do_semtimedop(int semid,struct sembuf * sops,unsigned nsops,const struct timespec64 * timeout,struct ipc_namespace * ns)1983  long __do_semtimedop(int semid, struct sembuf *sops,
1984  		unsigned nsops, const struct timespec64 *timeout,
1985  		struct ipc_namespace *ns)
1986  {
1987  	int error = -EINVAL;
1988  	struct sem_array *sma;
1989  	struct sembuf *sop;
1990  	struct sem_undo *un;
1991  	int max, locknum;
1992  	bool undos = false, alter = false, dupsop = false;
1993  	struct sem_queue queue;
1994  	unsigned long dup = 0;
1995  	ktime_t expires, *exp = NULL;
1996  	bool timed_out = false;
1997  
1998  	if (nsops < 1 || semid < 0)
1999  		return -EINVAL;
2000  	if (nsops > ns->sc_semopm)
2001  		return -E2BIG;
2002  
2003  	if (timeout) {
2004  		if (!timespec64_valid(timeout))
2005  			return -EINVAL;
2006  		expires = ktime_add_safe(ktime_get(),
2007  				timespec64_to_ktime(*timeout));
2008  		exp = &expires;
2009  	}
2010  
2011  
2012  	max = 0;
2013  	for (sop = sops; sop < sops + nsops; sop++) {
2014  		unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
2015  
2016  		if (sop->sem_num >= max)
2017  			max = sop->sem_num;
2018  		if (sop->sem_flg & SEM_UNDO)
2019  			undos = true;
2020  		if (dup & mask) {
2021  			/*
2022  			 * There was a previous alter access that appears
2023  			 * to have accessed the same semaphore, thus use
2024  			 * the dupsop logic. "appears", because the detection
2025  			 * can only check % BITS_PER_LONG.
2026  			 */
2027  			dupsop = true;
2028  		}
2029  		if (sop->sem_op != 0) {
2030  			alter = true;
2031  			dup |= mask;
2032  		}
2033  	}
2034  
2035  	if (undos) {
2036  		/* On success, find_alloc_undo takes the rcu_read_lock */
2037  		un = find_alloc_undo(ns, semid);
2038  		if (IS_ERR(un)) {
2039  			error = PTR_ERR(un);
2040  			goto out;
2041  		}
2042  	} else {
2043  		un = NULL;
2044  		rcu_read_lock();
2045  	}
2046  
2047  	sma = sem_obtain_object_check(ns, semid);
2048  	if (IS_ERR(sma)) {
2049  		rcu_read_unlock();
2050  		error = PTR_ERR(sma);
2051  		goto out;
2052  	}
2053  
2054  	error = -EFBIG;
2055  	if (max >= sma->sem_nsems) {
2056  		rcu_read_unlock();
2057  		goto out;
2058  	}
2059  
2060  	error = -EACCES;
2061  	if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
2062  		rcu_read_unlock();
2063  		goto out;
2064  	}
2065  
2066  	error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
2067  	if (error) {
2068  		rcu_read_unlock();
2069  		goto out;
2070  	}
2071  
2072  	error = -EIDRM;
2073  	locknum = sem_lock(sma, sops, nsops);
2074  	/*
2075  	 * We eventually might perform the following check in a lockless
2076  	 * fashion, considering ipc_valid_object() locking constraints.
2077  	 * If nsops == 1 and there is no contention for sem_perm.lock, then
2078  	 * only a per-semaphore lock is held and it's OK to proceed with the
2079  	 * check below. More details on the fine grained locking scheme
2080  	 * entangled here and why it's RMID race safe on comments at sem_lock()
2081  	 */
2082  	if (!ipc_valid_object(&sma->sem_perm))
2083  		goto out_unlock;
2084  	/*
2085  	 * semid identifiers are not unique - find_alloc_undo may have
2086  	 * allocated an undo structure, it was invalidated by an RMID
2087  	 * and now a new array with received the same id. Check and fail.
2088  	 * This case can be detected checking un->semid. The existence of
2089  	 * "un" itself is guaranteed by rcu.
2090  	 */
2091  	if (un && un->semid == -1)
2092  		goto out_unlock;
2093  
2094  	queue.sops = sops;
2095  	queue.nsops = nsops;
2096  	queue.undo = un;
2097  	queue.pid = task_tgid(current);
2098  	queue.alter = alter;
2099  	queue.dupsop = dupsop;
2100  
2101  	error = perform_atomic_semop(sma, &queue);
2102  	if (error == 0) { /* non-blocking successful path */
2103  		DEFINE_WAKE_Q(wake_q);
2104  
2105  		/*
2106  		 * If the operation was successful, then do
2107  		 * the required updates.
2108  		 */
2109  		if (alter)
2110  			do_smart_update(sma, sops, nsops, 1, &wake_q);
2111  		else
2112  			set_semotime(sma, sops);
2113  
2114  		sem_unlock(sma, locknum);
2115  		rcu_read_unlock();
2116  		wake_up_q(&wake_q);
2117  
2118  		goto out;
2119  	}
2120  	if (error < 0) /* non-blocking error path */
2121  		goto out_unlock;
2122  
2123  	/*
2124  	 * We need to sleep on this operation, so we put the current
2125  	 * task into the pending queue and go to sleep.
2126  	 */
2127  	if (nsops == 1) {
2128  		struct sem *curr;
2129  		int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
2130  		curr = &sma->sems[idx];
2131  
2132  		if (alter) {
2133  			if (sma->complex_count) {
2134  				list_add_tail(&queue.list,
2135  						&sma->pending_alter);
2136  			} else {
2137  
2138  				list_add_tail(&queue.list,
2139  						&curr->pending_alter);
2140  			}
2141  		} else {
2142  			list_add_tail(&queue.list, &curr->pending_const);
2143  		}
2144  	} else {
2145  		if (!sma->complex_count)
2146  			merge_queues(sma);
2147  
2148  		if (alter)
2149  			list_add_tail(&queue.list, &sma->pending_alter);
2150  		else
2151  			list_add_tail(&queue.list, &sma->pending_const);
2152  
2153  		sma->complex_count++;
2154  	}
2155  
2156  	do {
2157  		/* memory ordering ensured by the lock in sem_lock() */
2158  		WRITE_ONCE(queue.status, -EINTR);
2159  		queue.sleeper = current;
2160  
2161  		/* memory ordering is ensured by the lock in sem_lock() */
2162  		__set_current_state(TASK_INTERRUPTIBLE);
2163  		sem_unlock(sma, locknum);
2164  		rcu_read_unlock();
2165  
2166  		timed_out = !schedule_hrtimeout_range(exp,
2167  				current->timer_slack_ns, HRTIMER_MODE_ABS);
2168  
2169  		/*
2170  		 * fastpath: the semop has completed, either successfully or
2171  		 * not, from the syscall pov, is quite irrelevant to us at this
2172  		 * point; we're done.
2173  		 *
2174  		 * We _do_ care, nonetheless, about being awoken by a signal or
2175  		 * spuriously.  The queue.status is checked again in the
2176  		 * slowpath (aka after taking sem_lock), such that we can detect
2177  		 * scenarios where we were awakened externally, during the
2178  		 * window between wake_q_add() and wake_up_q().
2179  		 */
2180  		rcu_read_lock();
2181  		error = READ_ONCE(queue.status);
2182  		if (error != -EINTR) {
2183  			/* see SEM_BARRIER_2 for purpose/pairing */
2184  			smp_acquire__after_ctrl_dep();
2185  			rcu_read_unlock();
2186  			goto out;
2187  		}
2188  
2189  		locknum = sem_lock(sma, sops, nsops);
2190  
2191  		if (!ipc_valid_object(&sma->sem_perm))
2192  			goto out_unlock;
2193  
2194  		/*
2195  		 * No necessity for any barrier: We are protect by sem_lock()
2196  		 */
2197  		error = READ_ONCE(queue.status);
2198  
2199  		/*
2200  		 * If queue.status != -EINTR we are woken up by another process.
2201  		 * Leave without unlink_queue(), but with sem_unlock().
2202  		 */
2203  		if (error != -EINTR)
2204  			goto out_unlock;
2205  
2206  		/*
2207  		 * If an interrupt occurred we have to clean up the queue.
2208  		 */
2209  		if (timed_out)
2210  			error = -EAGAIN;
2211  	} while (error == -EINTR && !signal_pending(current)); /* spurious */
2212  
2213  	unlink_queue(sma, &queue);
2214  
2215  out_unlock:
2216  	sem_unlock(sma, locknum);
2217  	rcu_read_unlock();
2218  out:
2219  	return error;
2220  }
2221  
do_semtimedop(int semid,struct sembuf __user * tsops,unsigned nsops,const struct timespec64 * timeout)2222  static long do_semtimedop(int semid, struct sembuf __user *tsops,
2223  		unsigned nsops, const struct timespec64 *timeout)
2224  {
2225  	struct sembuf fast_sops[SEMOPM_FAST];
2226  	struct sembuf *sops = fast_sops;
2227  	struct ipc_namespace *ns;
2228  	int ret;
2229  
2230  	ns = current->nsproxy->ipc_ns;
2231  	if (nsops > ns->sc_semopm)
2232  		return -E2BIG;
2233  	if (nsops < 1)
2234  		return -EINVAL;
2235  
2236  	if (nsops > SEMOPM_FAST) {
2237  		sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
2238  		if (sops == NULL)
2239  			return -ENOMEM;
2240  	}
2241  
2242  	if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
2243  		ret =  -EFAULT;
2244  		goto out_free;
2245  	}
2246  
2247  	ret = __do_semtimedop(semid, sops, nsops, timeout, ns);
2248  
2249  out_free:
2250  	if (sops != fast_sops)
2251  		kvfree(sops);
2252  
2253  	return ret;
2254  }
2255  
ksys_semtimedop(int semid,struct sembuf __user * tsops,unsigned int nsops,const struct __kernel_timespec __user * timeout)2256  long ksys_semtimedop(int semid, struct sembuf __user *tsops,
2257  		     unsigned int nsops, const struct __kernel_timespec __user *timeout)
2258  {
2259  	if (timeout) {
2260  		struct timespec64 ts;
2261  		if (get_timespec64(&ts, timeout))
2262  			return -EFAULT;
2263  		return do_semtimedop(semid, tsops, nsops, &ts);
2264  	}
2265  	return do_semtimedop(semid, tsops, nsops, NULL);
2266  }
2267  
SYSCALL_DEFINE4(semtimedop,int,semid,struct sembuf __user *,tsops,unsigned int,nsops,const struct __kernel_timespec __user *,timeout)2268  SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2269  		unsigned int, nsops, const struct __kernel_timespec __user *, timeout)
2270  {
2271  	return ksys_semtimedop(semid, tsops, nsops, timeout);
2272  }
2273  
2274  #ifdef CONFIG_COMPAT_32BIT_TIME
compat_ksys_semtimedop(int semid,struct sembuf __user * tsems,unsigned int nsops,const struct old_timespec32 __user * timeout)2275  long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
2276  			    unsigned int nsops,
2277  			    const struct old_timespec32 __user *timeout)
2278  {
2279  	if (timeout) {
2280  		struct timespec64 ts;
2281  		if (get_old_timespec32(&ts, timeout))
2282  			return -EFAULT;
2283  		return do_semtimedop(semid, tsems, nsops, &ts);
2284  	}
2285  	return do_semtimedop(semid, tsems, nsops, NULL);
2286  }
2287  
SYSCALL_DEFINE4(semtimedop_time32,int,semid,struct sembuf __user *,tsems,unsigned int,nsops,const struct old_timespec32 __user *,timeout)2288  SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems,
2289  		       unsigned int, nsops,
2290  		       const struct old_timespec32 __user *, timeout)
2291  {
2292  	return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
2293  }
2294  #endif
2295  
SYSCALL_DEFINE3(semop,int,semid,struct sembuf __user *,tsops,unsigned,nsops)2296  SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
2297  		unsigned, nsops)
2298  {
2299  	return do_semtimedop(semid, tsops, nsops, NULL);
2300  }
2301  
2302  /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
2303   * parent and child tasks.
2304   */
2305  
copy_semundo(unsigned long clone_flags,struct task_struct * tsk)2306  int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
2307  {
2308  	struct sem_undo_list *undo_list;
2309  	int error;
2310  
2311  	if (clone_flags & CLONE_SYSVSEM) {
2312  		error = get_undo_list(&undo_list);
2313  		if (error)
2314  			return error;
2315  		refcount_inc(&undo_list->refcnt);
2316  		tsk->sysvsem.undo_list = undo_list;
2317  	} else
2318  		tsk->sysvsem.undo_list = NULL;
2319  
2320  	return 0;
2321  }
2322  
2323  /*
2324   * add semadj values to semaphores, free undo structures.
2325   * undo structures are not freed when semaphore arrays are destroyed
2326   * so some of them may be out of date.
2327   * IMPLEMENTATION NOTE: There is some confusion over whether the
2328   * set of adjustments that needs to be done should be done in an atomic
2329   * manner or not. That is, if we are attempting to decrement the semval
2330   * should we queue up and wait until we can do so legally?
2331   * The original implementation attempted to do this (queue and wait).
2332   * The current implementation does not do so. The POSIX standard
2333   * and SVID should be consulted to determine what behavior is mandated.
2334   */
exit_sem(struct task_struct * tsk)2335  void exit_sem(struct task_struct *tsk)
2336  {
2337  	struct sem_undo_list *ulp;
2338  
2339  	ulp = tsk->sysvsem.undo_list;
2340  	if (!ulp)
2341  		return;
2342  	tsk->sysvsem.undo_list = NULL;
2343  
2344  	if (!refcount_dec_and_test(&ulp->refcnt))
2345  		return;
2346  
2347  	for (;;) {
2348  		struct sem_array *sma;
2349  		struct sem_undo *un;
2350  		int semid, i;
2351  		DEFINE_WAKE_Q(wake_q);
2352  
2353  		cond_resched();
2354  
2355  		rcu_read_lock();
2356  		un = list_entry_rcu(ulp->list_proc.next,
2357  				    struct sem_undo, list_proc);
2358  		if (&un->list_proc == &ulp->list_proc) {
2359  			/*
2360  			 * We must wait for freeary() before freeing this ulp,
2361  			 * in case we raced with last sem_undo. There is a small
2362  			 * possibility where we exit while freeary() didn't
2363  			 * finish unlocking sem_undo_list.
2364  			 */
2365  			spin_lock(&ulp->lock);
2366  			spin_unlock(&ulp->lock);
2367  			rcu_read_unlock();
2368  			break;
2369  		}
2370  		spin_lock(&ulp->lock);
2371  		semid = un->semid;
2372  		spin_unlock(&ulp->lock);
2373  
2374  		/* exit_sem raced with IPC_RMID, nothing to do */
2375  		if (semid == -1) {
2376  			rcu_read_unlock();
2377  			continue;
2378  		}
2379  
2380  		sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
2381  		/* exit_sem raced with IPC_RMID, nothing to do */
2382  		if (IS_ERR(sma)) {
2383  			rcu_read_unlock();
2384  			continue;
2385  		}
2386  
2387  		sem_lock(sma, NULL, -1);
2388  		/* exit_sem raced with IPC_RMID, nothing to do */
2389  		if (!ipc_valid_object(&sma->sem_perm)) {
2390  			sem_unlock(sma, -1);
2391  			rcu_read_unlock();
2392  			continue;
2393  		}
2394  		un = __lookup_undo(ulp, semid);
2395  		if (un == NULL) {
2396  			/* exit_sem raced with IPC_RMID+semget() that created
2397  			 * exactly the same semid. Nothing to do.
2398  			 */
2399  			sem_unlock(sma, -1);
2400  			rcu_read_unlock();
2401  			continue;
2402  		}
2403  
2404  		/* remove un from the linked lists */
2405  		ipc_assert_locked_object(&sma->sem_perm);
2406  		list_del(&un->list_id);
2407  
2408  		spin_lock(&ulp->lock);
2409  		list_del_rcu(&un->list_proc);
2410  		spin_unlock(&ulp->lock);
2411  
2412  		/* perform adjustments registered in un */
2413  		for (i = 0; i < sma->sem_nsems; i++) {
2414  			struct sem *semaphore = &sma->sems[i];
2415  			if (un->semadj[i]) {
2416  				semaphore->semval += un->semadj[i];
2417  				/*
2418  				 * Range checks of the new semaphore value,
2419  				 * not defined by sus:
2420  				 * - Some unices ignore the undo entirely
2421  				 *   (e.g. HP UX 11i 11.22, Tru64 V5.1)
2422  				 * - some cap the value (e.g. FreeBSD caps
2423  				 *   at 0, but doesn't enforce SEMVMX)
2424  				 *
2425  				 * Linux caps the semaphore value, both at 0
2426  				 * and at SEMVMX.
2427  				 *
2428  				 *	Manfred <manfred@colorfullife.com>
2429  				 */
2430  				if (semaphore->semval < 0)
2431  					semaphore->semval = 0;
2432  				if (semaphore->semval > SEMVMX)
2433  					semaphore->semval = SEMVMX;
2434  				ipc_update_pid(&semaphore->sempid, task_tgid(current));
2435  			}
2436  		}
2437  		/* maybe some queued-up processes were waiting for this */
2438  		do_smart_update(sma, NULL, 0, 1, &wake_q);
2439  		sem_unlock(sma, -1);
2440  		rcu_read_unlock();
2441  		wake_up_q(&wake_q);
2442  
2443  		kvfree_rcu(un, rcu);
2444  	}
2445  	kfree(ulp);
2446  }
2447  
2448  #ifdef CONFIG_PROC_FS
sysvipc_sem_proc_show(struct seq_file * s,void * it)2449  static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2450  {
2451  	struct user_namespace *user_ns = seq_user_ns(s);
2452  	struct kern_ipc_perm *ipcp = it;
2453  	struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
2454  	time64_t sem_otime;
2455  
2456  	/*
2457  	 * The proc interface isn't aware of sem_lock(), it calls
2458  	 * ipc_lock_object(), i.e. spin_lock(&sma->sem_perm.lock).
2459  	 * (in sysvipc_find_ipc)
2460  	 * In order to stay compatible with sem_lock(), we must
2461  	 * enter / leave complex_mode.
2462  	 */
2463  	complexmode_enter(sma);
2464  
2465  	sem_otime = get_semotime(sma);
2466  
2467  	seq_printf(s,
2468  		   "%10d %10d  %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
2469  		   sma->sem_perm.key,
2470  		   sma->sem_perm.id,
2471  		   sma->sem_perm.mode,
2472  		   sma->sem_nsems,
2473  		   from_kuid_munged(user_ns, sma->sem_perm.uid),
2474  		   from_kgid_munged(user_ns, sma->sem_perm.gid),
2475  		   from_kuid_munged(user_ns, sma->sem_perm.cuid),
2476  		   from_kgid_munged(user_ns, sma->sem_perm.cgid),
2477  		   sem_otime,
2478  		   sma->sem_ctime);
2479  
2480  	complexmode_tryleave(sma);
2481  
2482  	return 0;
2483  }
2484  #endif
2485