1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/compat.h> 74 #include <linux/slab.h> 75 #include <linux/spinlock.h> 76 #include <linux/init.h> 77 #include <linux/proc_fs.h> 78 #include <linux/time.h> 79 #include <linux/security.h> 80 #include <linux/syscalls.h> 81 #include <linux/audit.h> 82 #include <linux/capability.h> 83 #include <linux/seq_file.h> 84 #include <linux/rwsem.h> 85 #include <linux/nsproxy.h> 86 #include <linux/ipc_namespace.h> 87 #include <linux/sched/wake_q.h> 88 #include <linux/nospec.h> 89 #include <linux/rhashtable.h> 90 91 #include <linux/uaccess.h> 92 #include "util.h" 93 94 /* One semaphore structure for each semaphore in the system. */ 95 struct sem { 96 int semval; /* current value */ 97 /* 98 * PID of the process that last modified the semaphore. For 99 * Linux, specifically these are: 100 * - semop 101 * - semctl, via SETVAL and SETALL. 102 * - at task exit when performing undo adjustments (see exit_sem). 103 */ 104 struct pid *sempid; 105 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 106 struct list_head pending_alter; /* pending single-sop operations */ 107 /* that alter the semaphore */ 108 struct list_head pending_const; /* pending single-sop operations */ 109 /* that do not alter the semaphore*/ 110 time64_t sem_otime; /* candidate for sem_otime */ 111 } ____cacheline_aligned_in_smp; 112 113 /* One sem_array data structure for each set of semaphores in the system. */ 114 struct sem_array { 115 struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ 116 time64_t sem_ctime; /* create/last semctl() time */ 117 struct list_head pending_alter; /* pending operations */ 118 /* that alter the array */ 119 struct list_head pending_const; /* pending complex operations */ 120 /* that do not alter semvals */ 121 struct list_head list_id; /* undo requests on this array */ 122 int sem_nsems; /* no. of semaphores in array */ 123 int complex_count; /* pending complex operations */ 124 unsigned int use_global_lock;/* >0: global lock required */ 125 126 struct sem sems[]; 127 } __randomize_layout; 128 129 /* One queue for each sleeping process in the system. */ 130 struct sem_queue { 131 struct list_head list; /* queue of pending operations */ 132 struct task_struct *sleeper; /* this process */ 133 struct sem_undo *undo; /* undo structure */ 134 struct pid *pid; /* process id of requesting process */ 135 int status; /* completion status of operation */ 136 struct sembuf *sops; /* array of pending operations */ 137 struct sembuf *blocking; /* the operation that blocked */ 138 int nsops; /* number of operations */ 139 bool alter; /* does *sops alter the array? */ 140 bool dupsop; /* sops on more than one sem_num */ 141 }; 142 143 /* Each task has a list of undo requests. They are executed automatically 144 * when the process exits. 145 */ 146 struct sem_undo { 147 struct list_head list_proc; /* per-process list: * 148 * all undos from one process 149 * rcu protected */ 150 struct rcu_head rcu; /* rcu struct for sem_undo */ 151 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 152 struct list_head list_id; /* per semaphore array list: 153 * all undos for one array */ 154 int semid; /* semaphore set identifier */ 155 short *semadj; /* array of adjustments */ 156 /* one per semaphore */ 157 }; 158 159 /* sem_undo_list controls shared access to the list of sem_undo structures 160 * that may be shared among all a CLONE_SYSVSEM task group. 161 */ 162 struct sem_undo_list { 163 refcount_t refcnt; 164 spinlock_t lock; 165 struct list_head list_proc; 166 }; 167 168 169 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 170 171 static int newary(struct ipc_namespace *, struct ipc_params *); 172 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 173 #ifdef CONFIG_PROC_FS 174 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 175 #endif 176 177 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 178 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 179 180 /* 181 * Switching from the mode suitable for simple ops 182 * to the mode for complex ops is costly. Therefore: 183 * use some hysteresis 184 */ 185 #define USE_GLOBAL_LOCK_HYSTERESIS 10 186 187 /* 188 * Locking: 189 * a) global sem_lock() for read/write 190 * sem_undo.id_next, 191 * sem_array.complex_count, 192 * sem_array.pending{_alter,_const}, 193 * sem_array.sem_undo 194 * 195 * b) global or semaphore sem_lock() for read/write: 196 * sem_array.sems[i].pending_{const,alter}: 197 * 198 * c) special: 199 * sem_undo_list.list_proc: 200 * * undo_list->lock for write 201 * * rcu for read 202 * use_global_lock: 203 * * global sem_lock() for write 204 * * either local or global sem_lock() for read. 205 * 206 * Memory ordering: 207 * Most ordering is enforced by using spin_lock() and spin_unlock(). 208 * 209 * Exceptions: 210 * 1) use_global_lock: (SEM_BARRIER_1) 211 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 212 * using smp_store_release(): Immediately after setting it to 0, 213 * a simple op can start. 214 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 215 * smp_load_acquire(). 216 * Setting it from 0 to non-zero must be ordered with regards to 217 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 218 * is inside a spin_lock() and after a write from 0 to non-zero a 219 * spin_lock()+spin_unlock() is done. 220 * 221 * 2) queue.status: (SEM_BARRIER_2) 222 * Initialization is done while holding sem_lock(), so no further barrier is 223 * required. 224 * Setting it to a result code is a RELEASE, this is ensured by both a 225 * smp_store_release() (for case a) and while holding sem_lock() 226 * (for case b). 227 * The AQUIRE when reading the result code without holding sem_lock() is 228 * achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep(). 229 * (case a above). 230 * Reading the result code while holding sem_lock() needs no further barriers, 231 * the locks inside sem_lock() enforce ordering (case b above) 232 * 233 * 3) current->state: 234 * current->state is set to TASK_INTERRUPTIBLE while holding sem_lock(). 235 * The wakeup is handled using the wake_q infrastructure. wake_q wakeups may 236 * happen immediately after calling wake_q_add. As wake_q_add_safe() is called 237 * when holding sem_lock(), no further barriers are required. 238 * 239 * See also ipc/mqueue.c for more details on the covered races. 240 */ 241 242 #define sc_semmsl sem_ctls[0] 243 #define sc_semmns sem_ctls[1] 244 #define sc_semopm sem_ctls[2] 245 #define sc_semmni sem_ctls[3] 246 247 void sem_init_ns(struct ipc_namespace *ns) 248 { 249 ns->sc_semmsl = SEMMSL; 250 ns->sc_semmns = SEMMNS; 251 ns->sc_semopm = SEMOPM; 252 ns->sc_semmni = SEMMNI; 253 ns->used_sems = 0; 254 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 255 } 256 257 #ifdef CONFIG_IPC_NS 258 void sem_exit_ns(struct ipc_namespace *ns) 259 { 260 free_ipcs(ns, &sem_ids(ns), freeary); 261 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 262 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 263 } 264 #endif 265 266 void __init sem_init(void) 267 { 268 sem_init_ns(&init_ipc_ns); 269 ipc_init_proc_interface("sysvipc/sem", 270 " key semid perms nsems uid gid cuid cgid otime ctime\n", 271 IPC_SEM_IDS, sysvipc_sem_proc_show); 272 } 273 274 /** 275 * unmerge_queues - unmerge queues, if possible. 276 * @sma: semaphore array 277 * 278 * The function unmerges the wait queues if complex_count is 0. 279 * It must be called prior to dropping the global semaphore array lock. 280 */ 281 static void unmerge_queues(struct sem_array *sma) 282 { 283 struct sem_queue *q, *tq; 284 285 /* complex operations still around? */ 286 if (sma->complex_count) 287 return; 288 /* 289 * We will switch back to simple mode. 290 * Move all pending operation back into the per-semaphore 291 * queues. 292 */ 293 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 294 struct sem *curr; 295 curr = &sma->sems[q->sops[0].sem_num]; 296 297 list_add_tail(&q->list, &curr->pending_alter); 298 } 299 INIT_LIST_HEAD(&sma->pending_alter); 300 } 301 302 /** 303 * merge_queues - merge single semop queues into global queue 304 * @sma: semaphore array 305 * 306 * This function merges all per-semaphore queues into the global queue. 307 * It is necessary to achieve FIFO ordering for the pending single-sop 308 * operations when a multi-semop operation must sleep. 309 * Only the alter operations must be moved, the const operations can stay. 310 */ 311 static void merge_queues(struct sem_array *sma) 312 { 313 int i; 314 for (i = 0; i < sma->sem_nsems; i++) { 315 struct sem *sem = &sma->sems[i]; 316 317 list_splice_init(&sem->pending_alter, &sma->pending_alter); 318 } 319 } 320 321 static void sem_rcu_free(struct rcu_head *head) 322 { 323 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 324 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 325 326 security_sem_free(&sma->sem_perm); 327 kvfree(sma); 328 } 329 330 /* 331 * Enter the mode suitable for non-simple operations: 332 * Caller must own sem_perm.lock. 333 */ 334 static void complexmode_enter(struct sem_array *sma) 335 { 336 int i; 337 struct sem *sem; 338 339 if (sma->use_global_lock > 0) { 340 /* 341 * We are already in global lock mode. 342 * Nothing to do, just reset the 343 * counter until we return to simple mode. 344 */ 345 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 346 return; 347 } 348 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 349 350 for (i = 0; i < sma->sem_nsems; i++) { 351 sem = &sma->sems[i]; 352 spin_lock(&sem->lock); 353 spin_unlock(&sem->lock); 354 } 355 } 356 357 /* 358 * Try to leave the mode that disallows simple operations: 359 * Caller must own sem_perm.lock. 360 */ 361 static void complexmode_tryleave(struct sem_array *sma) 362 { 363 if (sma->complex_count) { 364 /* Complex ops are sleeping. 365 * We must stay in complex mode 366 */ 367 return; 368 } 369 if (sma->use_global_lock == 1) { 370 371 /* See SEM_BARRIER_1 for purpose/pairing */ 372 smp_store_release(&sma->use_global_lock, 0); 373 } else { 374 sma->use_global_lock--; 375 } 376 } 377 378 #define SEM_GLOBAL_LOCK (-1) 379 /* 380 * If the request contains only one semaphore operation, and there are 381 * no complex transactions pending, lock only the semaphore involved. 382 * Otherwise, lock the entire semaphore array, since we either have 383 * multiple semaphores in our own semops, or we need to look at 384 * semaphores from other pending complex operations. 385 */ 386 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 387 int nsops) 388 { 389 struct sem *sem; 390 int idx; 391 392 if (nsops != 1) { 393 /* Complex operation - acquire a full lock */ 394 ipc_lock_object(&sma->sem_perm); 395 396 /* Prevent parallel simple ops */ 397 complexmode_enter(sma); 398 return SEM_GLOBAL_LOCK; 399 } 400 401 /* 402 * Only one semaphore affected - try to optimize locking. 403 * Optimized locking is possible if no complex operation 404 * is either enqueued or processed right now. 405 * 406 * Both facts are tracked by use_global_mode. 407 */ 408 idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 409 sem = &sma->sems[idx]; 410 411 /* 412 * Initial check for use_global_lock. Just an optimization, 413 * no locking, no memory barrier. 414 */ 415 if (!sma->use_global_lock) { 416 /* 417 * It appears that no complex operation is around. 418 * Acquire the per-semaphore lock. 419 */ 420 spin_lock(&sem->lock); 421 422 /* see SEM_BARRIER_1 for purpose/pairing */ 423 if (!smp_load_acquire(&sma->use_global_lock)) { 424 /* fast path successful! */ 425 return sops->sem_num; 426 } 427 spin_unlock(&sem->lock); 428 } 429 430 /* slow path: acquire the full lock */ 431 ipc_lock_object(&sma->sem_perm); 432 433 if (sma->use_global_lock == 0) { 434 /* 435 * The use_global_lock mode ended while we waited for 436 * sma->sem_perm.lock. Thus we must switch to locking 437 * with sem->lock. 438 * Unlike in the fast path, there is no need to recheck 439 * sma->use_global_lock after we have acquired sem->lock: 440 * We own sma->sem_perm.lock, thus use_global_lock cannot 441 * change. 442 */ 443 spin_lock(&sem->lock); 444 445 ipc_unlock_object(&sma->sem_perm); 446 return sops->sem_num; 447 } else { 448 /* 449 * Not a false alarm, thus continue to use the global lock 450 * mode. No need for complexmode_enter(), this was done by 451 * the caller that has set use_global_mode to non-zero. 452 */ 453 return SEM_GLOBAL_LOCK; 454 } 455 } 456 457 static inline void sem_unlock(struct sem_array *sma, int locknum) 458 { 459 if (locknum == SEM_GLOBAL_LOCK) { 460 unmerge_queues(sma); 461 complexmode_tryleave(sma); 462 ipc_unlock_object(&sma->sem_perm); 463 } else { 464 struct sem *sem = &sma->sems[locknum]; 465 spin_unlock(&sem->lock); 466 } 467 } 468 469 /* 470 * sem_lock_(check_) routines are called in the paths where the rwsem 471 * is not held. 472 * 473 * The caller holds the RCU read lock. 474 */ 475 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 476 { 477 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 478 479 if (IS_ERR(ipcp)) 480 return ERR_CAST(ipcp); 481 482 return container_of(ipcp, struct sem_array, sem_perm); 483 } 484 485 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 486 int id) 487 { 488 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 489 490 if (IS_ERR(ipcp)) 491 return ERR_CAST(ipcp); 492 493 return container_of(ipcp, struct sem_array, sem_perm); 494 } 495 496 static inline void sem_lock_and_putref(struct sem_array *sma) 497 { 498 sem_lock(sma, NULL, -1); 499 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 500 } 501 502 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 503 { 504 ipc_rmid(&sem_ids(ns), &s->sem_perm); 505 } 506 507 static struct sem_array *sem_alloc(size_t nsems) 508 { 509 struct sem_array *sma; 510 511 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 512 return NULL; 513 514 sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL); 515 if (unlikely(!sma)) 516 return NULL; 517 518 return sma; 519 } 520 521 /** 522 * newary - Create a new semaphore set 523 * @ns: namespace 524 * @params: ptr to the structure that contains key, semflg and nsems 525 * 526 * Called with sem_ids.rwsem held (as a writer) 527 */ 528 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 529 { 530 int retval; 531 struct sem_array *sma; 532 key_t key = params->key; 533 int nsems = params->u.nsems; 534 int semflg = params->flg; 535 int i; 536 537 if (!nsems) 538 return -EINVAL; 539 if (ns->used_sems + nsems > ns->sc_semmns) 540 return -ENOSPC; 541 542 sma = sem_alloc(nsems); 543 if (!sma) 544 return -ENOMEM; 545 546 sma->sem_perm.mode = (semflg & S_IRWXUGO); 547 sma->sem_perm.key = key; 548 549 sma->sem_perm.security = NULL; 550 retval = security_sem_alloc(&sma->sem_perm); 551 if (retval) { 552 kvfree(sma); 553 return retval; 554 } 555 556 for (i = 0; i < nsems; i++) { 557 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 558 INIT_LIST_HEAD(&sma->sems[i].pending_const); 559 spin_lock_init(&sma->sems[i].lock); 560 } 561 562 sma->complex_count = 0; 563 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 564 INIT_LIST_HEAD(&sma->pending_alter); 565 INIT_LIST_HEAD(&sma->pending_const); 566 INIT_LIST_HEAD(&sma->list_id); 567 sma->sem_nsems = nsems; 568 sma->sem_ctime = ktime_get_real_seconds(); 569 570 /* ipc_addid() locks sma upon success. */ 571 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 572 if (retval < 0) { 573 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 574 return retval; 575 } 576 ns->used_sems += nsems; 577 578 sem_unlock(sma, -1); 579 rcu_read_unlock(); 580 581 return sma->sem_perm.id; 582 } 583 584 585 /* 586 * Called with sem_ids.rwsem and ipcp locked. 587 */ 588 static int sem_more_checks(struct kern_ipc_perm *ipcp, struct ipc_params *params) 589 { 590 struct sem_array *sma; 591 592 sma = container_of(ipcp, struct sem_array, sem_perm); 593 if (params->u.nsems > sma->sem_nsems) 594 return -EINVAL; 595 596 return 0; 597 } 598 599 long ksys_semget(key_t key, int nsems, int semflg) 600 { 601 struct ipc_namespace *ns; 602 static const struct ipc_ops sem_ops = { 603 .getnew = newary, 604 .associate = security_sem_associate, 605 .more_checks = sem_more_checks, 606 }; 607 struct ipc_params sem_params; 608 609 ns = current->nsproxy->ipc_ns; 610 611 if (nsems < 0 || nsems > ns->sc_semmsl) 612 return -EINVAL; 613 614 sem_params.key = key; 615 sem_params.flg = semflg; 616 sem_params.u.nsems = nsems; 617 618 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 619 } 620 621 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 622 { 623 return ksys_semget(key, nsems, semflg); 624 } 625 626 /** 627 * perform_atomic_semop[_slow] - Attempt to perform semaphore 628 * operations on a given array. 629 * @sma: semaphore array 630 * @q: struct sem_queue that describes the operation 631 * 632 * Caller blocking are as follows, based the value 633 * indicated by the semaphore operation (sem_op): 634 * 635 * (1) >0 never blocks. 636 * (2) 0 (wait-for-zero operation): semval is non-zero. 637 * (3) <0 attempting to decrement semval to a value smaller than zero. 638 * 639 * Returns 0 if the operation was possible. 640 * Returns 1 if the operation is impossible, the caller must sleep. 641 * Returns <0 for error codes. 642 */ 643 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 644 { 645 int result, sem_op, nsops; 646 struct pid *pid; 647 struct sembuf *sop; 648 struct sem *curr; 649 struct sembuf *sops; 650 struct sem_undo *un; 651 652 sops = q->sops; 653 nsops = q->nsops; 654 un = q->undo; 655 656 for (sop = sops; sop < sops + nsops; sop++) { 657 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 658 curr = &sma->sems[idx]; 659 sem_op = sop->sem_op; 660 result = curr->semval; 661 662 if (!sem_op && result) 663 goto would_block; 664 665 result += sem_op; 666 if (result < 0) 667 goto would_block; 668 if (result > SEMVMX) 669 goto out_of_range; 670 671 if (sop->sem_flg & SEM_UNDO) { 672 int undo = un->semadj[sop->sem_num] - sem_op; 673 /* Exceeding the undo range is an error. */ 674 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 675 goto out_of_range; 676 un->semadj[sop->sem_num] = undo; 677 } 678 679 curr->semval = result; 680 } 681 682 sop--; 683 pid = q->pid; 684 while (sop >= sops) { 685 ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); 686 sop--; 687 } 688 689 return 0; 690 691 out_of_range: 692 result = -ERANGE; 693 goto undo; 694 695 would_block: 696 q->blocking = sop; 697 698 if (sop->sem_flg & IPC_NOWAIT) 699 result = -EAGAIN; 700 else 701 result = 1; 702 703 undo: 704 sop--; 705 while (sop >= sops) { 706 sem_op = sop->sem_op; 707 sma->sems[sop->sem_num].semval -= sem_op; 708 if (sop->sem_flg & SEM_UNDO) 709 un->semadj[sop->sem_num] += sem_op; 710 sop--; 711 } 712 713 return result; 714 } 715 716 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 717 { 718 int result, sem_op, nsops; 719 struct sembuf *sop; 720 struct sem *curr; 721 struct sembuf *sops; 722 struct sem_undo *un; 723 724 sops = q->sops; 725 nsops = q->nsops; 726 un = q->undo; 727 728 if (unlikely(q->dupsop)) 729 return perform_atomic_semop_slow(sma, q); 730 731 /* 732 * We scan the semaphore set twice, first to ensure that the entire 733 * operation can succeed, therefore avoiding any pointless writes 734 * to shared memory and having to undo such changes in order to block 735 * until the operations can go through. 736 */ 737 for (sop = sops; sop < sops + nsops; sop++) { 738 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems); 739 740 curr = &sma->sems[idx]; 741 sem_op = sop->sem_op; 742 result = curr->semval; 743 744 if (!sem_op && result) 745 goto would_block; /* wait-for-zero */ 746 747 result += sem_op; 748 if (result < 0) 749 goto would_block; 750 751 if (result > SEMVMX) 752 return -ERANGE; 753 754 if (sop->sem_flg & SEM_UNDO) { 755 int undo = un->semadj[sop->sem_num] - sem_op; 756 757 /* Exceeding the undo range is an error. */ 758 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 759 return -ERANGE; 760 } 761 } 762 763 for (sop = sops; sop < sops + nsops; sop++) { 764 curr = &sma->sems[sop->sem_num]; 765 sem_op = sop->sem_op; 766 result = curr->semval; 767 768 if (sop->sem_flg & SEM_UNDO) { 769 int undo = un->semadj[sop->sem_num] - sem_op; 770 771 un->semadj[sop->sem_num] = undo; 772 } 773 curr->semval += sem_op; 774 ipc_update_pid(&curr->sempid, q->pid); 775 } 776 777 return 0; 778 779 would_block: 780 q->blocking = sop; 781 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 782 } 783 784 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 785 struct wake_q_head *wake_q) 786 { 787 get_task_struct(q->sleeper); 788 789 /* see SEM_BARRIER_2 for purpuse/pairing */ 790 smp_store_release(&q->status, error); 791 792 wake_q_add_safe(wake_q, q->sleeper); 793 } 794 795 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 796 { 797 list_del(&q->list); 798 if (q->nsops > 1) 799 sma->complex_count--; 800 } 801 802 /** check_restart(sma, q) 803 * @sma: semaphore array 804 * @q: the operation that just completed 805 * 806 * update_queue is O(N^2) when it restarts scanning the whole queue of 807 * waiting operations. Therefore this function checks if the restart is 808 * really necessary. It is called after a previously waiting operation 809 * modified the array. 810 * Note that wait-for-zero operations are handled without restart. 811 */ 812 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 813 { 814 /* pending complex alter operations are too difficult to analyse */ 815 if (!list_empty(&sma->pending_alter)) 816 return 1; 817 818 /* we were a sleeping complex operation. Too difficult */ 819 if (q->nsops > 1) 820 return 1; 821 822 /* It is impossible that someone waits for the new value: 823 * - complex operations always restart. 824 * - wait-for-zero are handled seperately. 825 * - q is a previously sleeping simple operation that 826 * altered the array. It must be a decrement, because 827 * simple increments never sleep. 828 * - If there are older (higher priority) decrements 829 * in the queue, then they have observed the original 830 * semval value and couldn't proceed. The operation 831 * decremented to value - thus they won't proceed either. 832 */ 833 return 0; 834 } 835 836 /** 837 * wake_const_ops - wake up non-alter tasks 838 * @sma: semaphore array. 839 * @semnum: semaphore that was modified. 840 * @wake_q: lockless wake-queue head. 841 * 842 * wake_const_ops must be called after a semaphore in a semaphore array 843 * was set to 0. If complex const operations are pending, wake_const_ops must 844 * be called with semnum = -1, as well as with the number of each modified 845 * semaphore. 846 * The tasks that must be woken up are added to @wake_q. The return code 847 * is stored in q->pid. 848 * The function returns 1 if at least one operation was completed successfully. 849 */ 850 static int wake_const_ops(struct sem_array *sma, int semnum, 851 struct wake_q_head *wake_q) 852 { 853 struct sem_queue *q, *tmp; 854 struct list_head *pending_list; 855 int semop_completed = 0; 856 857 if (semnum == -1) 858 pending_list = &sma->pending_const; 859 else 860 pending_list = &sma->sems[semnum].pending_const; 861 862 list_for_each_entry_safe(q, tmp, pending_list, list) { 863 int error = perform_atomic_semop(sma, q); 864 865 if (error > 0) 866 continue; 867 /* operation completed, remove from queue & wakeup */ 868 unlink_queue(sma, q); 869 870 wake_up_sem_queue_prepare(q, error, wake_q); 871 if (error == 0) 872 semop_completed = 1; 873 } 874 875 return semop_completed; 876 } 877 878 /** 879 * do_smart_wakeup_zero - wakeup all wait for zero tasks 880 * @sma: semaphore array 881 * @sops: operations that were performed 882 * @nsops: number of operations 883 * @wake_q: lockless wake-queue head 884 * 885 * Checks all required queue for wait-for-zero operations, based 886 * on the actual changes that were performed on the semaphore array. 887 * The function returns 1 if at least one operation was completed successfully. 888 */ 889 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 890 int nsops, struct wake_q_head *wake_q) 891 { 892 int i; 893 int semop_completed = 0; 894 int got_zero = 0; 895 896 /* first: the per-semaphore queues, if known */ 897 if (sops) { 898 for (i = 0; i < nsops; i++) { 899 int num = sops[i].sem_num; 900 901 if (sma->sems[num].semval == 0) { 902 got_zero = 1; 903 semop_completed |= wake_const_ops(sma, num, wake_q); 904 } 905 } 906 } else { 907 /* 908 * No sops means modified semaphores not known. 909 * Assume all were changed. 910 */ 911 for (i = 0; i < sma->sem_nsems; i++) { 912 if (sma->sems[i].semval == 0) { 913 got_zero = 1; 914 semop_completed |= wake_const_ops(sma, i, wake_q); 915 } 916 } 917 } 918 /* 919 * If one of the modified semaphores got 0, 920 * then check the global queue, too. 921 */ 922 if (got_zero) 923 semop_completed |= wake_const_ops(sma, -1, wake_q); 924 925 return semop_completed; 926 } 927 928 929 /** 930 * update_queue - look for tasks that can be completed. 931 * @sma: semaphore array. 932 * @semnum: semaphore that was modified. 933 * @wake_q: lockless wake-queue head. 934 * 935 * update_queue must be called after a semaphore in a semaphore array 936 * was modified. If multiple semaphores were modified, update_queue must 937 * be called with semnum = -1, as well as with the number of each modified 938 * semaphore. 939 * The tasks that must be woken up are added to @wake_q. The return code 940 * is stored in q->pid. 941 * The function internally checks if const operations can now succeed. 942 * 943 * The function return 1 if at least one semop was completed successfully. 944 */ 945 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 946 { 947 struct sem_queue *q, *tmp; 948 struct list_head *pending_list; 949 int semop_completed = 0; 950 951 if (semnum == -1) 952 pending_list = &sma->pending_alter; 953 else 954 pending_list = &sma->sems[semnum].pending_alter; 955 956 again: 957 list_for_each_entry_safe(q, tmp, pending_list, list) { 958 int error, restart; 959 960 /* If we are scanning the single sop, per-semaphore list of 961 * one semaphore and that semaphore is 0, then it is not 962 * necessary to scan further: simple increments 963 * that affect only one entry succeed immediately and cannot 964 * be in the per semaphore pending queue, and decrements 965 * cannot be successful if the value is already 0. 966 */ 967 if (semnum != -1 && sma->sems[semnum].semval == 0) 968 break; 969 970 error = perform_atomic_semop(sma, q); 971 972 /* Does q->sleeper still need to sleep? */ 973 if (error > 0) 974 continue; 975 976 unlink_queue(sma, q); 977 978 if (error) { 979 restart = 0; 980 } else { 981 semop_completed = 1; 982 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 983 restart = check_restart(sma, q); 984 } 985 986 wake_up_sem_queue_prepare(q, error, wake_q); 987 if (restart) 988 goto again; 989 } 990 return semop_completed; 991 } 992 993 /** 994 * set_semotime - set sem_otime 995 * @sma: semaphore array 996 * @sops: operations that modified the array, may be NULL 997 * 998 * sem_otime is replicated to avoid cache line trashing. 999 * This function sets one instance to the current time. 1000 */ 1001 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 1002 { 1003 if (sops == NULL) { 1004 sma->sems[0].sem_otime = ktime_get_real_seconds(); 1005 } else { 1006 sma->sems[sops[0].sem_num].sem_otime = 1007 ktime_get_real_seconds(); 1008 } 1009 } 1010 1011 /** 1012 * do_smart_update - optimized update_queue 1013 * @sma: semaphore array 1014 * @sops: operations that were performed 1015 * @nsops: number of operations 1016 * @otime: force setting otime 1017 * @wake_q: lockless wake-queue head 1018 * 1019 * do_smart_update() does the required calls to update_queue and wakeup_zero, 1020 * based on the actual changes that were performed on the semaphore array. 1021 * Note that the function does not do the actual wake-up: the caller is 1022 * responsible for calling wake_up_q(). 1023 * It is safe to perform this call after dropping all locks. 1024 */ 1025 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 1026 int otime, struct wake_q_head *wake_q) 1027 { 1028 int i; 1029 1030 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 1031 1032 if (!list_empty(&sma->pending_alter)) { 1033 /* semaphore array uses the global queue - just process it. */ 1034 otime |= update_queue(sma, -1, wake_q); 1035 } else { 1036 if (!sops) { 1037 /* 1038 * No sops, thus the modified semaphores are not 1039 * known. Check all. 1040 */ 1041 for (i = 0; i < sma->sem_nsems; i++) 1042 otime |= update_queue(sma, i, wake_q); 1043 } else { 1044 /* 1045 * Check the semaphores that were increased: 1046 * - No complex ops, thus all sleeping ops are 1047 * decrease. 1048 * - if we decreased the value, then any sleeping 1049 * semaphore ops wont be able to run: If the 1050 * previous value was too small, then the new 1051 * value will be too small, too. 1052 */ 1053 for (i = 0; i < nsops; i++) { 1054 if (sops[i].sem_op > 0) { 1055 otime |= update_queue(sma, 1056 sops[i].sem_num, wake_q); 1057 } 1058 } 1059 } 1060 } 1061 if (otime) 1062 set_semotime(sma, sops); 1063 } 1064 1065 /* 1066 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1067 */ 1068 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1069 bool count_zero) 1070 { 1071 struct sembuf *sop = q->blocking; 1072 1073 /* 1074 * Linux always (since 0.99.10) reported a task as sleeping on all 1075 * semaphores. This violates SUS, therefore it was changed to the 1076 * standard compliant behavior. 1077 * Give the administrators a chance to notice that an application 1078 * might misbehave because it relies on the Linux behavior. 1079 */ 1080 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1081 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1082 current->comm, task_pid_nr(current)); 1083 1084 if (sop->sem_num != semnum) 1085 return 0; 1086 1087 if (count_zero && sop->sem_op == 0) 1088 return 1; 1089 if (!count_zero && sop->sem_op < 0) 1090 return 1; 1091 1092 return 0; 1093 } 1094 1095 /* The following counts are associated to each semaphore: 1096 * semncnt number of tasks waiting on semval being nonzero 1097 * semzcnt number of tasks waiting on semval being zero 1098 * 1099 * Per definition, a task waits only on the semaphore of the first semop 1100 * that cannot proceed, even if additional operation would block, too. 1101 */ 1102 static int count_semcnt(struct sem_array *sma, ushort semnum, 1103 bool count_zero) 1104 { 1105 struct list_head *l; 1106 struct sem_queue *q; 1107 int semcnt; 1108 1109 semcnt = 0; 1110 /* First: check the simple operations. They are easy to evaluate */ 1111 if (count_zero) 1112 l = &sma->sems[semnum].pending_const; 1113 else 1114 l = &sma->sems[semnum].pending_alter; 1115 1116 list_for_each_entry(q, l, list) { 1117 /* all task on a per-semaphore list sleep on exactly 1118 * that semaphore 1119 */ 1120 semcnt++; 1121 } 1122 1123 /* Then: check the complex operations. */ 1124 list_for_each_entry(q, &sma->pending_alter, list) { 1125 semcnt += check_qop(sma, semnum, q, count_zero); 1126 } 1127 if (count_zero) { 1128 list_for_each_entry(q, &sma->pending_const, list) { 1129 semcnt += check_qop(sma, semnum, q, count_zero); 1130 } 1131 } 1132 return semcnt; 1133 } 1134 1135 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1136 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1137 * remains locked on exit. 1138 */ 1139 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1140 { 1141 struct sem_undo *un, *tu; 1142 struct sem_queue *q, *tq; 1143 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1144 int i; 1145 DEFINE_WAKE_Q(wake_q); 1146 1147 /* Free the existing undo structures for this semaphore set. */ 1148 ipc_assert_locked_object(&sma->sem_perm); 1149 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1150 list_del(&un->list_id); 1151 spin_lock(&un->ulp->lock); 1152 un->semid = -1; 1153 list_del_rcu(&un->list_proc); 1154 spin_unlock(&un->ulp->lock); 1155 kfree_rcu(un, rcu); 1156 } 1157 1158 /* Wake up all pending processes and let them fail with EIDRM. */ 1159 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1160 unlink_queue(sma, q); 1161 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1162 } 1163 1164 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1165 unlink_queue(sma, q); 1166 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1167 } 1168 for (i = 0; i < sma->sem_nsems; i++) { 1169 struct sem *sem = &sma->sems[i]; 1170 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1171 unlink_queue(sma, q); 1172 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1173 } 1174 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1175 unlink_queue(sma, q); 1176 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1177 } 1178 ipc_update_pid(&sem->sempid, NULL); 1179 } 1180 1181 /* Remove the semaphore set from the IDR */ 1182 sem_rmid(ns, sma); 1183 sem_unlock(sma, -1); 1184 rcu_read_unlock(); 1185 1186 wake_up_q(&wake_q); 1187 ns->used_sems -= sma->sem_nsems; 1188 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1189 } 1190 1191 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1192 { 1193 switch (version) { 1194 case IPC_64: 1195 return copy_to_user(buf, in, sizeof(*in)); 1196 case IPC_OLD: 1197 { 1198 struct semid_ds out; 1199 1200 memset(&out, 0, sizeof(out)); 1201 1202 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1203 1204 out.sem_otime = in->sem_otime; 1205 out.sem_ctime = in->sem_ctime; 1206 out.sem_nsems = in->sem_nsems; 1207 1208 return copy_to_user(buf, &out, sizeof(out)); 1209 } 1210 default: 1211 return -EINVAL; 1212 } 1213 } 1214 1215 static time64_t get_semotime(struct sem_array *sma) 1216 { 1217 int i; 1218 time64_t res; 1219 1220 res = sma->sems[0].sem_otime; 1221 for (i = 1; i < sma->sem_nsems; i++) { 1222 time64_t to = sma->sems[i].sem_otime; 1223 1224 if (to > res) 1225 res = to; 1226 } 1227 return res; 1228 } 1229 1230 static int semctl_stat(struct ipc_namespace *ns, int semid, 1231 int cmd, struct semid64_ds *semid64) 1232 { 1233 struct sem_array *sma; 1234 time64_t semotime; 1235 int err; 1236 1237 memset(semid64, 0, sizeof(*semid64)); 1238 1239 rcu_read_lock(); 1240 if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { 1241 sma = sem_obtain_object(ns, semid); 1242 if (IS_ERR(sma)) { 1243 err = PTR_ERR(sma); 1244 goto out_unlock; 1245 } 1246 } else { /* IPC_STAT */ 1247 sma = sem_obtain_object_check(ns, semid); 1248 if (IS_ERR(sma)) { 1249 err = PTR_ERR(sma); 1250 goto out_unlock; 1251 } 1252 } 1253 1254 /* see comment for SHM_STAT_ANY */ 1255 if (cmd == SEM_STAT_ANY) 1256 audit_ipc_obj(&sma->sem_perm); 1257 else { 1258 err = -EACCES; 1259 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1260 goto out_unlock; 1261 } 1262 1263 err = security_sem_semctl(&sma->sem_perm, cmd); 1264 if (err) 1265 goto out_unlock; 1266 1267 ipc_lock_object(&sma->sem_perm); 1268 1269 if (!ipc_valid_object(&sma->sem_perm)) { 1270 ipc_unlock_object(&sma->sem_perm); 1271 err = -EIDRM; 1272 goto out_unlock; 1273 } 1274 1275 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1276 semotime = get_semotime(sma); 1277 semid64->sem_otime = semotime; 1278 semid64->sem_ctime = sma->sem_ctime; 1279 #ifndef CONFIG_64BIT 1280 semid64->sem_otime_high = semotime >> 32; 1281 semid64->sem_ctime_high = sma->sem_ctime >> 32; 1282 #endif 1283 semid64->sem_nsems = sma->sem_nsems; 1284 1285 if (cmd == IPC_STAT) { 1286 /* 1287 * As defined in SUS: 1288 * Return 0 on success 1289 */ 1290 err = 0; 1291 } else { 1292 /* 1293 * SEM_STAT and SEM_STAT_ANY (both Linux specific) 1294 * Return the full id, including the sequence number 1295 */ 1296 err = sma->sem_perm.id; 1297 } 1298 ipc_unlock_object(&sma->sem_perm); 1299 out_unlock: 1300 rcu_read_unlock(); 1301 return err; 1302 } 1303 1304 static int semctl_info(struct ipc_namespace *ns, int semid, 1305 int cmd, void __user *p) 1306 { 1307 struct seminfo seminfo; 1308 int max_idx; 1309 int err; 1310 1311 err = security_sem_semctl(NULL, cmd); 1312 if (err) 1313 return err; 1314 1315 memset(&seminfo, 0, sizeof(seminfo)); 1316 seminfo.semmni = ns->sc_semmni; 1317 seminfo.semmns = ns->sc_semmns; 1318 seminfo.semmsl = ns->sc_semmsl; 1319 seminfo.semopm = ns->sc_semopm; 1320 seminfo.semvmx = SEMVMX; 1321 seminfo.semmnu = SEMMNU; 1322 seminfo.semmap = SEMMAP; 1323 seminfo.semume = SEMUME; 1324 down_read(&sem_ids(ns).rwsem); 1325 if (cmd == SEM_INFO) { 1326 seminfo.semusz = sem_ids(ns).in_use; 1327 seminfo.semaem = ns->used_sems; 1328 } else { 1329 seminfo.semusz = SEMUSZ; 1330 seminfo.semaem = SEMAEM; 1331 } 1332 max_idx = ipc_get_maxidx(&sem_ids(ns)); 1333 up_read(&sem_ids(ns).rwsem); 1334 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1335 return -EFAULT; 1336 return (max_idx < 0) ? 0 : max_idx; 1337 } 1338 1339 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1340 int val) 1341 { 1342 struct sem_undo *un; 1343 struct sem_array *sma; 1344 struct sem *curr; 1345 int err; 1346 DEFINE_WAKE_Q(wake_q); 1347 1348 if (val > SEMVMX || val < 0) 1349 return -ERANGE; 1350 1351 rcu_read_lock(); 1352 sma = sem_obtain_object_check(ns, semid); 1353 if (IS_ERR(sma)) { 1354 rcu_read_unlock(); 1355 return PTR_ERR(sma); 1356 } 1357 1358 if (semnum < 0 || semnum >= sma->sem_nsems) { 1359 rcu_read_unlock(); 1360 return -EINVAL; 1361 } 1362 1363 1364 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1365 rcu_read_unlock(); 1366 return -EACCES; 1367 } 1368 1369 err = security_sem_semctl(&sma->sem_perm, SETVAL); 1370 if (err) { 1371 rcu_read_unlock(); 1372 return -EACCES; 1373 } 1374 1375 sem_lock(sma, NULL, -1); 1376 1377 if (!ipc_valid_object(&sma->sem_perm)) { 1378 sem_unlock(sma, -1); 1379 rcu_read_unlock(); 1380 return -EIDRM; 1381 } 1382 1383 semnum = array_index_nospec(semnum, sma->sem_nsems); 1384 curr = &sma->sems[semnum]; 1385 1386 ipc_assert_locked_object(&sma->sem_perm); 1387 list_for_each_entry(un, &sma->list_id, list_id) 1388 un->semadj[semnum] = 0; 1389 1390 curr->semval = val; 1391 ipc_update_pid(&curr->sempid, task_tgid(current)); 1392 sma->sem_ctime = ktime_get_real_seconds(); 1393 /* maybe some queued-up processes were waiting for this */ 1394 do_smart_update(sma, NULL, 0, 0, &wake_q); 1395 sem_unlock(sma, -1); 1396 rcu_read_unlock(); 1397 wake_up_q(&wake_q); 1398 return 0; 1399 } 1400 1401 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1402 int cmd, void __user *p) 1403 { 1404 struct sem_array *sma; 1405 struct sem *curr; 1406 int err, nsems; 1407 ushort fast_sem_io[SEMMSL_FAST]; 1408 ushort *sem_io = fast_sem_io; 1409 DEFINE_WAKE_Q(wake_q); 1410 1411 rcu_read_lock(); 1412 sma = sem_obtain_object_check(ns, semid); 1413 if (IS_ERR(sma)) { 1414 rcu_read_unlock(); 1415 return PTR_ERR(sma); 1416 } 1417 1418 nsems = sma->sem_nsems; 1419 1420 err = -EACCES; 1421 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1422 goto out_rcu_wakeup; 1423 1424 err = security_sem_semctl(&sma->sem_perm, cmd); 1425 if (err) 1426 goto out_rcu_wakeup; 1427 1428 err = -EACCES; 1429 switch (cmd) { 1430 case GETALL: 1431 { 1432 ushort __user *array = p; 1433 int i; 1434 1435 sem_lock(sma, NULL, -1); 1436 if (!ipc_valid_object(&sma->sem_perm)) { 1437 err = -EIDRM; 1438 goto out_unlock; 1439 } 1440 if (nsems > SEMMSL_FAST) { 1441 if (!ipc_rcu_getref(&sma->sem_perm)) { 1442 err = -EIDRM; 1443 goto out_unlock; 1444 } 1445 sem_unlock(sma, -1); 1446 rcu_read_unlock(); 1447 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1448 GFP_KERNEL); 1449 if (sem_io == NULL) { 1450 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1451 return -ENOMEM; 1452 } 1453 1454 rcu_read_lock(); 1455 sem_lock_and_putref(sma); 1456 if (!ipc_valid_object(&sma->sem_perm)) { 1457 err = -EIDRM; 1458 goto out_unlock; 1459 } 1460 } 1461 for (i = 0; i < sma->sem_nsems; i++) 1462 sem_io[i] = sma->sems[i].semval; 1463 sem_unlock(sma, -1); 1464 rcu_read_unlock(); 1465 err = 0; 1466 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1467 err = -EFAULT; 1468 goto out_free; 1469 } 1470 case SETALL: 1471 { 1472 int i; 1473 struct sem_undo *un; 1474 1475 if (!ipc_rcu_getref(&sma->sem_perm)) { 1476 err = -EIDRM; 1477 goto out_rcu_wakeup; 1478 } 1479 rcu_read_unlock(); 1480 1481 if (nsems > SEMMSL_FAST) { 1482 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1483 GFP_KERNEL); 1484 if (sem_io == NULL) { 1485 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1486 return -ENOMEM; 1487 } 1488 } 1489 1490 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1491 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1492 err = -EFAULT; 1493 goto out_free; 1494 } 1495 1496 for (i = 0; i < nsems; i++) { 1497 if (sem_io[i] > SEMVMX) { 1498 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1499 err = -ERANGE; 1500 goto out_free; 1501 } 1502 } 1503 rcu_read_lock(); 1504 sem_lock_and_putref(sma); 1505 if (!ipc_valid_object(&sma->sem_perm)) { 1506 err = -EIDRM; 1507 goto out_unlock; 1508 } 1509 1510 for (i = 0; i < nsems; i++) { 1511 sma->sems[i].semval = sem_io[i]; 1512 ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); 1513 } 1514 1515 ipc_assert_locked_object(&sma->sem_perm); 1516 list_for_each_entry(un, &sma->list_id, list_id) { 1517 for (i = 0; i < nsems; i++) 1518 un->semadj[i] = 0; 1519 } 1520 sma->sem_ctime = ktime_get_real_seconds(); 1521 /* maybe some queued-up processes were waiting for this */ 1522 do_smart_update(sma, NULL, 0, 0, &wake_q); 1523 err = 0; 1524 goto out_unlock; 1525 } 1526 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1527 } 1528 err = -EINVAL; 1529 if (semnum < 0 || semnum >= nsems) 1530 goto out_rcu_wakeup; 1531 1532 sem_lock(sma, NULL, -1); 1533 if (!ipc_valid_object(&sma->sem_perm)) { 1534 err = -EIDRM; 1535 goto out_unlock; 1536 } 1537 1538 semnum = array_index_nospec(semnum, nsems); 1539 curr = &sma->sems[semnum]; 1540 1541 switch (cmd) { 1542 case GETVAL: 1543 err = curr->semval; 1544 goto out_unlock; 1545 case GETPID: 1546 err = pid_vnr(curr->sempid); 1547 goto out_unlock; 1548 case GETNCNT: 1549 err = count_semcnt(sma, semnum, 0); 1550 goto out_unlock; 1551 case GETZCNT: 1552 err = count_semcnt(sma, semnum, 1); 1553 goto out_unlock; 1554 } 1555 1556 out_unlock: 1557 sem_unlock(sma, -1); 1558 out_rcu_wakeup: 1559 rcu_read_unlock(); 1560 wake_up_q(&wake_q); 1561 out_free: 1562 if (sem_io != fast_sem_io) 1563 kvfree(sem_io); 1564 return err; 1565 } 1566 1567 static inline unsigned long 1568 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1569 { 1570 switch (version) { 1571 case IPC_64: 1572 if (copy_from_user(out, buf, sizeof(*out))) 1573 return -EFAULT; 1574 return 0; 1575 case IPC_OLD: 1576 { 1577 struct semid_ds tbuf_old; 1578 1579 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1580 return -EFAULT; 1581 1582 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1583 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1584 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1585 1586 return 0; 1587 } 1588 default: 1589 return -EINVAL; 1590 } 1591 } 1592 1593 /* 1594 * This function handles some semctl commands which require the rwsem 1595 * to be held in write mode. 1596 * NOTE: no locks must be held, the rwsem is taken inside this function. 1597 */ 1598 static int semctl_down(struct ipc_namespace *ns, int semid, 1599 int cmd, struct semid64_ds *semid64) 1600 { 1601 struct sem_array *sma; 1602 int err; 1603 struct kern_ipc_perm *ipcp; 1604 1605 down_write(&sem_ids(ns).rwsem); 1606 rcu_read_lock(); 1607 1608 ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd, 1609 &semid64->sem_perm, 0); 1610 if (IS_ERR(ipcp)) { 1611 err = PTR_ERR(ipcp); 1612 goto out_unlock1; 1613 } 1614 1615 sma = container_of(ipcp, struct sem_array, sem_perm); 1616 1617 err = security_sem_semctl(&sma->sem_perm, cmd); 1618 if (err) 1619 goto out_unlock1; 1620 1621 switch (cmd) { 1622 case IPC_RMID: 1623 sem_lock(sma, NULL, -1); 1624 /* freeary unlocks the ipc object and rcu */ 1625 freeary(ns, ipcp); 1626 goto out_up; 1627 case IPC_SET: 1628 sem_lock(sma, NULL, -1); 1629 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1630 if (err) 1631 goto out_unlock0; 1632 sma->sem_ctime = ktime_get_real_seconds(); 1633 break; 1634 default: 1635 err = -EINVAL; 1636 goto out_unlock1; 1637 } 1638 1639 out_unlock0: 1640 sem_unlock(sma, -1); 1641 out_unlock1: 1642 rcu_read_unlock(); 1643 out_up: 1644 up_write(&sem_ids(ns).rwsem); 1645 return err; 1646 } 1647 1648 static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version) 1649 { 1650 struct ipc_namespace *ns; 1651 void __user *p = (void __user *)arg; 1652 struct semid64_ds semid64; 1653 int err; 1654 1655 if (semid < 0) 1656 return -EINVAL; 1657 1658 ns = current->nsproxy->ipc_ns; 1659 1660 switch (cmd) { 1661 case IPC_INFO: 1662 case SEM_INFO: 1663 return semctl_info(ns, semid, cmd, p); 1664 case IPC_STAT: 1665 case SEM_STAT: 1666 case SEM_STAT_ANY: 1667 err = semctl_stat(ns, semid, cmd, &semid64); 1668 if (err < 0) 1669 return err; 1670 if (copy_semid_to_user(p, &semid64, version)) 1671 err = -EFAULT; 1672 return err; 1673 case GETALL: 1674 case GETVAL: 1675 case GETPID: 1676 case GETNCNT: 1677 case GETZCNT: 1678 case SETALL: 1679 return semctl_main(ns, semid, semnum, cmd, p); 1680 case SETVAL: { 1681 int val; 1682 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1683 /* big-endian 64bit */ 1684 val = arg >> 32; 1685 #else 1686 /* 32bit or little-endian 64bit */ 1687 val = arg; 1688 #endif 1689 return semctl_setval(ns, semid, semnum, val); 1690 } 1691 case IPC_SET: 1692 if (copy_semid_from_user(&semid64, p, version)) 1693 return -EFAULT; 1694 /* fall through */ 1695 case IPC_RMID: 1696 return semctl_down(ns, semid, cmd, &semid64); 1697 default: 1698 return -EINVAL; 1699 } 1700 } 1701 1702 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1703 { 1704 return ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1705 } 1706 1707 #ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION 1708 long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg) 1709 { 1710 int version = ipc_parse_version(&cmd); 1711 1712 return ksys_semctl(semid, semnum, cmd, arg, version); 1713 } 1714 1715 SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1716 { 1717 return ksys_old_semctl(semid, semnum, cmd, arg); 1718 } 1719 #endif 1720 1721 #ifdef CONFIG_COMPAT 1722 1723 struct compat_semid_ds { 1724 struct compat_ipc_perm sem_perm; 1725 old_time32_t sem_otime; 1726 old_time32_t sem_ctime; 1727 compat_uptr_t sem_base; 1728 compat_uptr_t sem_pending; 1729 compat_uptr_t sem_pending_last; 1730 compat_uptr_t undo; 1731 unsigned short sem_nsems; 1732 }; 1733 1734 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1735 int version) 1736 { 1737 memset(out, 0, sizeof(*out)); 1738 if (version == IPC_64) { 1739 struct compat_semid64_ds __user *p = buf; 1740 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1741 } else { 1742 struct compat_semid_ds __user *p = buf; 1743 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1744 } 1745 } 1746 1747 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1748 int version) 1749 { 1750 if (version == IPC_64) { 1751 struct compat_semid64_ds v; 1752 memset(&v, 0, sizeof(v)); 1753 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1754 v.sem_otime = lower_32_bits(in->sem_otime); 1755 v.sem_otime_high = upper_32_bits(in->sem_otime); 1756 v.sem_ctime = lower_32_bits(in->sem_ctime); 1757 v.sem_ctime_high = upper_32_bits(in->sem_ctime); 1758 v.sem_nsems = in->sem_nsems; 1759 return copy_to_user(buf, &v, sizeof(v)); 1760 } else { 1761 struct compat_semid_ds v; 1762 memset(&v, 0, sizeof(v)); 1763 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1764 v.sem_otime = in->sem_otime; 1765 v.sem_ctime = in->sem_ctime; 1766 v.sem_nsems = in->sem_nsems; 1767 return copy_to_user(buf, &v, sizeof(v)); 1768 } 1769 } 1770 1771 static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version) 1772 { 1773 void __user *p = compat_ptr(arg); 1774 struct ipc_namespace *ns; 1775 struct semid64_ds semid64; 1776 int err; 1777 1778 ns = current->nsproxy->ipc_ns; 1779 1780 if (semid < 0) 1781 return -EINVAL; 1782 1783 switch (cmd & (~IPC_64)) { 1784 case IPC_INFO: 1785 case SEM_INFO: 1786 return semctl_info(ns, semid, cmd, p); 1787 case IPC_STAT: 1788 case SEM_STAT: 1789 case SEM_STAT_ANY: 1790 err = semctl_stat(ns, semid, cmd, &semid64); 1791 if (err < 0) 1792 return err; 1793 if (copy_compat_semid_to_user(p, &semid64, version)) 1794 err = -EFAULT; 1795 return err; 1796 case GETVAL: 1797 case GETPID: 1798 case GETNCNT: 1799 case GETZCNT: 1800 case GETALL: 1801 case SETALL: 1802 return semctl_main(ns, semid, semnum, cmd, p); 1803 case SETVAL: 1804 return semctl_setval(ns, semid, semnum, arg); 1805 case IPC_SET: 1806 if (copy_compat_semid_from_user(&semid64, p, version)) 1807 return -EFAULT; 1808 /* fallthru */ 1809 case IPC_RMID: 1810 return semctl_down(ns, semid, cmd, &semid64); 1811 default: 1812 return -EINVAL; 1813 } 1814 } 1815 1816 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1817 { 1818 return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64); 1819 } 1820 1821 #ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION 1822 long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg) 1823 { 1824 int version = compat_ipc_parse_version(&cmd); 1825 1826 return compat_ksys_semctl(semid, semnum, cmd, arg, version); 1827 } 1828 1829 COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg) 1830 { 1831 return compat_ksys_old_semctl(semid, semnum, cmd, arg); 1832 } 1833 #endif 1834 #endif 1835 1836 /* If the task doesn't already have a undo_list, then allocate one 1837 * here. We guarantee there is only one thread using this undo list, 1838 * and current is THE ONE 1839 * 1840 * If this allocation and assignment succeeds, but later 1841 * portions of this code fail, there is no need to free the sem_undo_list. 1842 * Just let it stay associated with the task, and it'll be freed later 1843 * at exit time. 1844 * 1845 * This can block, so callers must hold no locks. 1846 */ 1847 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1848 { 1849 struct sem_undo_list *undo_list; 1850 1851 undo_list = current->sysvsem.undo_list; 1852 if (!undo_list) { 1853 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1854 if (undo_list == NULL) 1855 return -ENOMEM; 1856 spin_lock_init(&undo_list->lock); 1857 refcount_set(&undo_list->refcnt, 1); 1858 INIT_LIST_HEAD(&undo_list->list_proc); 1859 1860 current->sysvsem.undo_list = undo_list; 1861 } 1862 *undo_listp = undo_list; 1863 return 0; 1864 } 1865 1866 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1867 { 1868 struct sem_undo *un; 1869 1870 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc, 1871 spin_is_locked(&ulp->lock)) { 1872 if (un->semid == semid) 1873 return un; 1874 } 1875 return NULL; 1876 } 1877 1878 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1879 { 1880 struct sem_undo *un; 1881 1882 assert_spin_locked(&ulp->lock); 1883 1884 un = __lookup_undo(ulp, semid); 1885 if (un) { 1886 list_del_rcu(&un->list_proc); 1887 list_add_rcu(&un->list_proc, &ulp->list_proc); 1888 } 1889 return un; 1890 } 1891 1892 /** 1893 * find_alloc_undo - lookup (and if not present create) undo array 1894 * @ns: namespace 1895 * @semid: semaphore array id 1896 * 1897 * The function looks up (and if not present creates) the undo structure. 1898 * The size of the undo structure depends on the size of the semaphore 1899 * array, thus the alloc path is not that straightforward. 1900 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1901 * performs a rcu_read_lock(). 1902 */ 1903 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1904 { 1905 struct sem_array *sma; 1906 struct sem_undo_list *ulp; 1907 struct sem_undo *un, *new; 1908 int nsems, error; 1909 1910 error = get_undo_list(&ulp); 1911 if (error) 1912 return ERR_PTR(error); 1913 1914 rcu_read_lock(); 1915 spin_lock(&ulp->lock); 1916 un = lookup_undo(ulp, semid); 1917 spin_unlock(&ulp->lock); 1918 if (likely(un != NULL)) 1919 goto out; 1920 1921 /* no undo structure around - allocate one. */ 1922 /* step 1: figure out the size of the semaphore array */ 1923 sma = sem_obtain_object_check(ns, semid); 1924 if (IS_ERR(sma)) { 1925 rcu_read_unlock(); 1926 return ERR_CAST(sma); 1927 } 1928 1929 nsems = sma->sem_nsems; 1930 if (!ipc_rcu_getref(&sma->sem_perm)) { 1931 rcu_read_unlock(); 1932 un = ERR_PTR(-EIDRM); 1933 goto out; 1934 } 1935 rcu_read_unlock(); 1936 1937 /* step 2: allocate new undo structure */ 1938 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1939 if (!new) { 1940 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1941 return ERR_PTR(-ENOMEM); 1942 } 1943 1944 /* step 3: Acquire the lock on semaphore array */ 1945 rcu_read_lock(); 1946 sem_lock_and_putref(sma); 1947 if (!ipc_valid_object(&sma->sem_perm)) { 1948 sem_unlock(sma, -1); 1949 rcu_read_unlock(); 1950 kfree(new); 1951 un = ERR_PTR(-EIDRM); 1952 goto out; 1953 } 1954 spin_lock(&ulp->lock); 1955 1956 /* 1957 * step 4: check for races: did someone else allocate the undo struct? 1958 */ 1959 un = lookup_undo(ulp, semid); 1960 if (un) { 1961 kfree(new); 1962 goto success; 1963 } 1964 /* step 5: initialize & link new undo structure */ 1965 new->semadj = (short *) &new[1]; 1966 new->ulp = ulp; 1967 new->semid = semid; 1968 assert_spin_locked(&ulp->lock); 1969 list_add_rcu(&new->list_proc, &ulp->list_proc); 1970 ipc_assert_locked_object(&sma->sem_perm); 1971 list_add(&new->list_id, &sma->list_id); 1972 un = new; 1973 1974 success: 1975 spin_unlock(&ulp->lock); 1976 sem_unlock(sma, -1); 1977 out: 1978 return un; 1979 } 1980 1981 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1982 unsigned nsops, const struct timespec64 *timeout) 1983 { 1984 int error = -EINVAL; 1985 struct sem_array *sma; 1986 struct sembuf fast_sops[SEMOPM_FAST]; 1987 struct sembuf *sops = fast_sops, *sop; 1988 struct sem_undo *un; 1989 int max, locknum; 1990 bool undos = false, alter = false, dupsop = false; 1991 struct sem_queue queue; 1992 unsigned long dup = 0, jiffies_left = 0; 1993 struct ipc_namespace *ns; 1994 1995 ns = current->nsproxy->ipc_ns; 1996 1997 if (nsops < 1 || semid < 0) 1998 return -EINVAL; 1999 if (nsops > ns->sc_semopm) 2000 return -E2BIG; 2001 if (nsops > SEMOPM_FAST) { 2002 sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); 2003 if (sops == NULL) 2004 return -ENOMEM; 2005 } 2006 2007 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 2008 error = -EFAULT; 2009 goto out_free; 2010 } 2011 2012 if (timeout) { 2013 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 2014 timeout->tv_nsec >= 1000000000L) { 2015 error = -EINVAL; 2016 goto out_free; 2017 } 2018 jiffies_left = timespec64_to_jiffies(timeout); 2019 } 2020 2021 max = 0; 2022 for (sop = sops; sop < sops + nsops; sop++) { 2023 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 2024 2025 if (sop->sem_num >= max) 2026 max = sop->sem_num; 2027 if (sop->sem_flg & SEM_UNDO) 2028 undos = true; 2029 if (dup & mask) { 2030 /* 2031 * There was a previous alter access that appears 2032 * to have accessed the same semaphore, thus use 2033 * the dupsop logic. "appears", because the detection 2034 * can only check % BITS_PER_LONG. 2035 */ 2036 dupsop = true; 2037 } 2038 if (sop->sem_op != 0) { 2039 alter = true; 2040 dup |= mask; 2041 } 2042 } 2043 2044 if (undos) { 2045 /* On success, find_alloc_undo takes the rcu_read_lock */ 2046 un = find_alloc_undo(ns, semid); 2047 if (IS_ERR(un)) { 2048 error = PTR_ERR(un); 2049 goto out_free; 2050 } 2051 } else { 2052 un = NULL; 2053 rcu_read_lock(); 2054 } 2055 2056 sma = sem_obtain_object_check(ns, semid); 2057 if (IS_ERR(sma)) { 2058 rcu_read_unlock(); 2059 error = PTR_ERR(sma); 2060 goto out_free; 2061 } 2062 2063 error = -EFBIG; 2064 if (max >= sma->sem_nsems) { 2065 rcu_read_unlock(); 2066 goto out_free; 2067 } 2068 2069 error = -EACCES; 2070 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 2071 rcu_read_unlock(); 2072 goto out_free; 2073 } 2074 2075 error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); 2076 if (error) { 2077 rcu_read_unlock(); 2078 goto out_free; 2079 } 2080 2081 error = -EIDRM; 2082 locknum = sem_lock(sma, sops, nsops); 2083 /* 2084 * We eventually might perform the following check in a lockless 2085 * fashion, considering ipc_valid_object() locking constraints. 2086 * If nsops == 1 and there is no contention for sem_perm.lock, then 2087 * only a per-semaphore lock is held and it's OK to proceed with the 2088 * check below. More details on the fine grained locking scheme 2089 * entangled here and why it's RMID race safe on comments at sem_lock() 2090 */ 2091 if (!ipc_valid_object(&sma->sem_perm)) 2092 goto out_unlock_free; 2093 /* 2094 * semid identifiers are not unique - find_alloc_undo may have 2095 * allocated an undo structure, it was invalidated by an RMID 2096 * and now a new array with received the same id. Check and fail. 2097 * This case can be detected checking un->semid. The existence of 2098 * "un" itself is guaranteed by rcu. 2099 */ 2100 if (un && un->semid == -1) 2101 goto out_unlock_free; 2102 2103 queue.sops = sops; 2104 queue.nsops = nsops; 2105 queue.undo = un; 2106 queue.pid = task_tgid(current); 2107 queue.alter = alter; 2108 queue.dupsop = dupsop; 2109 2110 error = perform_atomic_semop(sma, &queue); 2111 if (error == 0) { /* non-blocking succesfull path */ 2112 DEFINE_WAKE_Q(wake_q); 2113 2114 /* 2115 * If the operation was successful, then do 2116 * the required updates. 2117 */ 2118 if (alter) 2119 do_smart_update(sma, sops, nsops, 1, &wake_q); 2120 else 2121 set_semotime(sma, sops); 2122 2123 sem_unlock(sma, locknum); 2124 rcu_read_unlock(); 2125 wake_up_q(&wake_q); 2126 2127 goto out_free; 2128 } 2129 if (error < 0) /* non-blocking error path */ 2130 goto out_unlock_free; 2131 2132 /* 2133 * We need to sleep on this operation, so we put the current 2134 * task into the pending queue and go to sleep. 2135 */ 2136 if (nsops == 1) { 2137 struct sem *curr; 2138 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems); 2139 curr = &sma->sems[idx]; 2140 2141 if (alter) { 2142 if (sma->complex_count) { 2143 list_add_tail(&queue.list, 2144 &sma->pending_alter); 2145 } else { 2146 2147 list_add_tail(&queue.list, 2148 &curr->pending_alter); 2149 } 2150 } else { 2151 list_add_tail(&queue.list, &curr->pending_const); 2152 } 2153 } else { 2154 if (!sma->complex_count) 2155 merge_queues(sma); 2156 2157 if (alter) 2158 list_add_tail(&queue.list, &sma->pending_alter); 2159 else 2160 list_add_tail(&queue.list, &sma->pending_const); 2161 2162 sma->complex_count++; 2163 } 2164 2165 do { 2166 /* memory ordering ensured by the lock in sem_lock() */ 2167 WRITE_ONCE(queue.status, -EINTR); 2168 queue.sleeper = current; 2169 2170 /* memory ordering is ensured by the lock in sem_lock() */ 2171 __set_current_state(TASK_INTERRUPTIBLE); 2172 sem_unlock(sma, locknum); 2173 rcu_read_unlock(); 2174 2175 if (timeout) 2176 jiffies_left = schedule_timeout(jiffies_left); 2177 else 2178 schedule(); 2179 2180 /* 2181 * fastpath: the semop has completed, either successfully or 2182 * not, from the syscall pov, is quite irrelevant to us at this 2183 * point; we're done. 2184 * 2185 * We _do_ care, nonetheless, about being awoken by a signal or 2186 * spuriously. The queue.status is checked again in the 2187 * slowpath (aka after taking sem_lock), such that we can detect 2188 * scenarios where we were awakened externally, during the 2189 * window between wake_q_add() and wake_up_q(). 2190 */ 2191 error = READ_ONCE(queue.status); 2192 if (error != -EINTR) { 2193 /* see SEM_BARRIER_2 for purpose/pairing */ 2194 smp_acquire__after_ctrl_dep(); 2195 goto out_free; 2196 } 2197 2198 rcu_read_lock(); 2199 locknum = sem_lock(sma, sops, nsops); 2200 2201 if (!ipc_valid_object(&sma->sem_perm)) 2202 goto out_unlock_free; 2203 2204 /* 2205 * No necessity for any barrier: We are protect by sem_lock() 2206 */ 2207 error = READ_ONCE(queue.status); 2208 2209 /* 2210 * If queue.status != -EINTR we are woken up by another process. 2211 * Leave without unlink_queue(), but with sem_unlock(). 2212 */ 2213 if (error != -EINTR) 2214 goto out_unlock_free; 2215 2216 /* 2217 * If an interrupt occurred we have to clean up the queue. 2218 */ 2219 if (timeout && jiffies_left == 0) 2220 error = -EAGAIN; 2221 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2222 2223 unlink_queue(sma, &queue); 2224 2225 out_unlock_free: 2226 sem_unlock(sma, locknum); 2227 rcu_read_unlock(); 2228 out_free: 2229 if (sops != fast_sops) 2230 kvfree(sops); 2231 return error; 2232 } 2233 2234 long ksys_semtimedop(int semid, struct sembuf __user *tsops, 2235 unsigned int nsops, const struct __kernel_timespec __user *timeout) 2236 { 2237 if (timeout) { 2238 struct timespec64 ts; 2239 if (get_timespec64(&ts, timeout)) 2240 return -EFAULT; 2241 return do_semtimedop(semid, tsops, nsops, &ts); 2242 } 2243 return do_semtimedop(semid, tsops, nsops, NULL); 2244 } 2245 2246 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2247 unsigned int, nsops, const struct __kernel_timespec __user *, timeout) 2248 { 2249 return ksys_semtimedop(semid, tsops, nsops, timeout); 2250 } 2251 2252 #ifdef CONFIG_COMPAT_32BIT_TIME 2253 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems, 2254 unsigned int nsops, 2255 const struct old_timespec32 __user *timeout) 2256 { 2257 if (timeout) { 2258 struct timespec64 ts; 2259 if (get_old_timespec32(&ts, timeout)) 2260 return -EFAULT; 2261 return do_semtimedop(semid, tsems, nsops, &ts); 2262 } 2263 return do_semtimedop(semid, tsems, nsops, NULL); 2264 } 2265 2266 SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems, 2267 unsigned int, nsops, 2268 const struct old_timespec32 __user *, timeout) 2269 { 2270 return compat_ksys_semtimedop(semid, tsems, nsops, timeout); 2271 } 2272 #endif 2273 2274 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2275 unsigned, nsops) 2276 { 2277 return do_semtimedop(semid, tsops, nsops, NULL); 2278 } 2279 2280 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2281 * parent and child tasks. 2282 */ 2283 2284 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2285 { 2286 struct sem_undo_list *undo_list; 2287 int error; 2288 2289 if (clone_flags & CLONE_SYSVSEM) { 2290 error = get_undo_list(&undo_list); 2291 if (error) 2292 return error; 2293 refcount_inc(&undo_list->refcnt); 2294 tsk->sysvsem.undo_list = undo_list; 2295 } else 2296 tsk->sysvsem.undo_list = NULL; 2297 2298 return 0; 2299 } 2300 2301 /* 2302 * add semadj values to semaphores, free undo structures. 2303 * undo structures are not freed when semaphore arrays are destroyed 2304 * so some of them may be out of date. 2305 * IMPLEMENTATION NOTE: There is some confusion over whether the 2306 * set of adjustments that needs to be done should be done in an atomic 2307 * manner or not. That is, if we are attempting to decrement the semval 2308 * should we queue up and wait until we can do so legally? 2309 * The original implementation attempted to do this (queue and wait). 2310 * The current implementation does not do so. The POSIX standard 2311 * and SVID should be consulted to determine what behavior is mandated. 2312 */ 2313 void exit_sem(struct task_struct *tsk) 2314 { 2315 struct sem_undo_list *ulp; 2316 2317 ulp = tsk->sysvsem.undo_list; 2318 if (!ulp) 2319 return; 2320 tsk->sysvsem.undo_list = NULL; 2321 2322 if (!refcount_dec_and_test(&ulp->refcnt)) 2323 return; 2324 2325 for (;;) { 2326 struct sem_array *sma; 2327 struct sem_undo *un; 2328 int semid, i; 2329 DEFINE_WAKE_Q(wake_q); 2330 2331 cond_resched(); 2332 2333 rcu_read_lock(); 2334 un = list_entry_rcu(ulp->list_proc.next, 2335 struct sem_undo, list_proc); 2336 if (&un->list_proc == &ulp->list_proc) { 2337 /* 2338 * We must wait for freeary() before freeing this ulp, 2339 * in case we raced with last sem_undo. There is a small 2340 * possibility where we exit while freeary() didn't 2341 * finish unlocking sem_undo_list. 2342 */ 2343 spin_lock(&ulp->lock); 2344 spin_unlock(&ulp->lock); 2345 rcu_read_unlock(); 2346 break; 2347 } 2348 spin_lock(&ulp->lock); 2349 semid = un->semid; 2350 spin_unlock(&ulp->lock); 2351 2352 /* exit_sem raced with IPC_RMID, nothing to do */ 2353 if (semid == -1) { 2354 rcu_read_unlock(); 2355 continue; 2356 } 2357 2358 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2359 /* exit_sem raced with IPC_RMID, nothing to do */ 2360 if (IS_ERR(sma)) { 2361 rcu_read_unlock(); 2362 continue; 2363 } 2364 2365 sem_lock(sma, NULL, -1); 2366 /* exit_sem raced with IPC_RMID, nothing to do */ 2367 if (!ipc_valid_object(&sma->sem_perm)) { 2368 sem_unlock(sma, -1); 2369 rcu_read_unlock(); 2370 continue; 2371 } 2372 un = __lookup_undo(ulp, semid); 2373 if (un == NULL) { 2374 /* exit_sem raced with IPC_RMID+semget() that created 2375 * exactly the same semid. Nothing to do. 2376 */ 2377 sem_unlock(sma, -1); 2378 rcu_read_unlock(); 2379 continue; 2380 } 2381 2382 /* remove un from the linked lists */ 2383 ipc_assert_locked_object(&sma->sem_perm); 2384 list_del(&un->list_id); 2385 2386 spin_lock(&ulp->lock); 2387 list_del_rcu(&un->list_proc); 2388 spin_unlock(&ulp->lock); 2389 2390 /* perform adjustments registered in un */ 2391 for (i = 0; i < sma->sem_nsems; i++) { 2392 struct sem *semaphore = &sma->sems[i]; 2393 if (un->semadj[i]) { 2394 semaphore->semval += un->semadj[i]; 2395 /* 2396 * Range checks of the new semaphore value, 2397 * not defined by sus: 2398 * - Some unices ignore the undo entirely 2399 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2400 * - some cap the value (e.g. FreeBSD caps 2401 * at 0, but doesn't enforce SEMVMX) 2402 * 2403 * Linux caps the semaphore value, both at 0 2404 * and at SEMVMX. 2405 * 2406 * Manfred <manfred@colorfullife.com> 2407 */ 2408 if (semaphore->semval < 0) 2409 semaphore->semval = 0; 2410 if (semaphore->semval > SEMVMX) 2411 semaphore->semval = SEMVMX; 2412 ipc_update_pid(&semaphore->sempid, task_tgid(current)); 2413 } 2414 } 2415 /* maybe some queued-up processes were waiting for this */ 2416 do_smart_update(sma, NULL, 0, 1, &wake_q); 2417 sem_unlock(sma, -1); 2418 rcu_read_unlock(); 2419 wake_up_q(&wake_q); 2420 2421 kfree_rcu(un, rcu); 2422 } 2423 kfree(ulp); 2424 } 2425 2426 #ifdef CONFIG_PROC_FS 2427 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2428 { 2429 struct user_namespace *user_ns = seq_user_ns(s); 2430 struct kern_ipc_perm *ipcp = it; 2431 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2432 time64_t sem_otime; 2433 2434 /* 2435 * The proc interface isn't aware of sem_lock(), it calls 2436 * ipc_lock_object() directly (in sysvipc_find_ipc). 2437 * In order to stay compatible with sem_lock(), we must 2438 * enter / leave complex_mode. 2439 */ 2440 complexmode_enter(sma); 2441 2442 sem_otime = get_semotime(sma); 2443 2444 seq_printf(s, 2445 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2446 sma->sem_perm.key, 2447 sma->sem_perm.id, 2448 sma->sem_perm.mode, 2449 sma->sem_nsems, 2450 from_kuid_munged(user_ns, sma->sem_perm.uid), 2451 from_kgid_munged(user_ns, sma->sem_perm.gid), 2452 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2453 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2454 sem_otime, 2455 sma->sem_ctime); 2456 2457 complexmode_tryleave(sma); 2458 2459 return 0; 2460 } 2461 #endif 2462