1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 #include <linux/ipc_namespace.h> 86 #include <linux/sched/wake_q.h> 87 88 #include <linux/uaccess.h> 89 #include "util.h" 90 91 92 /* One queue for each sleeping process in the system. */ 93 struct sem_queue { 94 struct list_head list; /* queue of pending operations */ 95 struct task_struct *sleeper; /* this process */ 96 struct sem_undo *undo; /* undo structure */ 97 int pid; /* process id of requesting process */ 98 int status; /* completion status of operation */ 99 struct sembuf *sops; /* array of pending operations */ 100 struct sembuf *blocking; /* the operation that blocked */ 101 int nsops; /* number of operations */ 102 bool alter; /* does *sops alter the array? */ 103 bool dupsop; /* sops on more than one sem_num */ 104 }; 105 106 /* Each task has a list of undo requests. They are executed automatically 107 * when the process exits. 108 */ 109 struct sem_undo { 110 struct list_head list_proc; /* per-process list: * 111 * all undos from one process 112 * rcu protected */ 113 struct rcu_head rcu; /* rcu struct for sem_undo */ 114 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 115 struct list_head list_id; /* per semaphore array list: 116 * all undos for one array */ 117 int semid; /* semaphore set identifier */ 118 short *semadj; /* array of adjustments */ 119 /* one per semaphore */ 120 }; 121 122 /* sem_undo_list controls shared access to the list of sem_undo structures 123 * that may be shared among all a CLONE_SYSVSEM task group. 124 */ 125 struct sem_undo_list { 126 refcount_t refcnt; 127 spinlock_t lock; 128 struct list_head list_proc; 129 }; 130 131 132 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 133 134 static int newary(struct ipc_namespace *, struct ipc_params *); 135 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 136 #ifdef CONFIG_PROC_FS 137 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 138 #endif 139 140 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 141 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 142 143 /* 144 * Switching from the mode suitable for simple ops 145 * to the mode for complex ops is costly. Therefore: 146 * use some hysteresis 147 */ 148 #define USE_GLOBAL_LOCK_HYSTERESIS 10 149 150 /* 151 * Locking: 152 * a) global sem_lock() for read/write 153 * sem_undo.id_next, 154 * sem_array.complex_count, 155 * sem_array.pending{_alter,_const}, 156 * sem_array.sem_undo 157 * 158 * b) global or semaphore sem_lock() for read/write: 159 * sem_array.sems[i].pending_{const,alter}: 160 * 161 * c) special: 162 * sem_undo_list.list_proc: 163 * * undo_list->lock for write 164 * * rcu for read 165 * use_global_lock: 166 * * global sem_lock() for write 167 * * either local or global sem_lock() for read. 168 * 169 * Memory ordering: 170 * Most ordering is enforced by using spin_lock() and spin_unlock(). 171 * The special case is use_global_lock: 172 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 173 * using smp_store_release(). 174 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 175 * smp_load_acquire(). 176 * Setting it from 0 to non-zero must be ordered with regards to 177 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 178 * is inside a spin_lock() and after a write from 0 to non-zero a 179 * spin_lock()+spin_unlock() is done. 180 */ 181 182 #define sc_semmsl sem_ctls[0] 183 #define sc_semmns sem_ctls[1] 184 #define sc_semopm sem_ctls[2] 185 #define sc_semmni sem_ctls[3] 186 187 int sem_init_ns(struct ipc_namespace *ns) 188 { 189 ns->sc_semmsl = SEMMSL; 190 ns->sc_semmns = SEMMNS; 191 ns->sc_semopm = SEMOPM; 192 ns->sc_semmni = SEMMNI; 193 ns->used_sems = 0; 194 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 195 } 196 197 #ifdef CONFIG_IPC_NS 198 void sem_exit_ns(struct ipc_namespace *ns) 199 { 200 free_ipcs(ns, &sem_ids(ns), freeary); 201 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 202 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 203 } 204 #endif 205 206 int __init sem_init(void) 207 { 208 const int err = sem_init_ns(&init_ipc_ns); 209 210 ipc_init_proc_interface("sysvipc/sem", 211 " key semid perms nsems uid gid cuid cgid otime ctime\n", 212 IPC_SEM_IDS, sysvipc_sem_proc_show); 213 return err; 214 } 215 216 /** 217 * unmerge_queues - unmerge queues, if possible. 218 * @sma: semaphore array 219 * 220 * The function unmerges the wait queues if complex_count is 0. 221 * It must be called prior to dropping the global semaphore array lock. 222 */ 223 static void unmerge_queues(struct sem_array *sma) 224 { 225 struct sem_queue *q, *tq; 226 227 /* complex operations still around? */ 228 if (sma->complex_count) 229 return; 230 /* 231 * We will switch back to simple mode. 232 * Move all pending operation back into the per-semaphore 233 * queues. 234 */ 235 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 236 struct sem *curr; 237 curr = &sma->sems[q->sops[0].sem_num]; 238 239 list_add_tail(&q->list, &curr->pending_alter); 240 } 241 INIT_LIST_HEAD(&sma->pending_alter); 242 } 243 244 /** 245 * merge_queues - merge single semop queues into global queue 246 * @sma: semaphore array 247 * 248 * This function merges all per-semaphore queues into the global queue. 249 * It is necessary to achieve FIFO ordering for the pending single-sop 250 * operations when a multi-semop operation must sleep. 251 * Only the alter operations must be moved, the const operations can stay. 252 */ 253 static void merge_queues(struct sem_array *sma) 254 { 255 int i; 256 for (i = 0; i < sma->sem_nsems; i++) { 257 struct sem *sem = &sma->sems[i]; 258 259 list_splice_init(&sem->pending_alter, &sma->pending_alter); 260 } 261 } 262 263 static void sem_rcu_free(struct rcu_head *head) 264 { 265 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 266 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 267 268 security_sem_free(sma); 269 kvfree(sma); 270 } 271 272 /* 273 * Enter the mode suitable for non-simple operations: 274 * Caller must own sem_perm.lock. 275 */ 276 static void complexmode_enter(struct sem_array *sma) 277 { 278 int i; 279 struct sem *sem; 280 281 if (sma->use_global_lock > 0) { 282 /* 283 * We are already in global lock mode. 284 * Nothing to do, just reset the 285 * counter until we return to simple mode. 286 */ 287 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 288 return; 289 } 290 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 291 292 for (i = 0; i < sma->sem_nsems; i++) { 293 sem = &sma->sems[i]; 294 spin_lock(&sem->lock); 295 spin_unlock(&sem->lock); 296 } 297 } 298 299 /* 300 * Try to leave the mode that disallows simple operations: 301 * Caller must own sem_perm.lock. 302 */ 303 static void complexmode_tryleave(struct sem_array *sma) 304 { 305 if (sma->complex_count) { 306 /* Complex ops are sleeping. 307 * We must stay in complex mode 308 */ 309 return; 310 } 311 if (sma->use_global_lock == 1) { 312 /* 313 * Immediately after setting use_global_lock to 0, 314 * a simple op can start. Thus: all memory writes 315 * performed by the current operation must be visible 316 * before we set use_global_lock to 0. 317 */ 318 smp_store_release(&sma->use_global_lock, 0); 319 } else { 320 sma->use_global_lock--; 321 } 322 } 323 324 #define SEM_GLOBAL_LOCK (-1) 325 /* 326 * If the request contains only one semaphore operation, and there are 327 * no complex transactions pending, lock only the semaphore involved. 328 * Otherwise, lock the entire semaphore array, since we either have 329 * multiple semaphores in our own semops, or we need to look at 330 * semaphores from other pending complex operations. 331 */ 332 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 333 int nsops) 334 { 335 struct sem *sem; 336 337 if (nsops != 1) { 338 /* Complex operation - acquire a full lock */ 339 ipc_lock_object(&sma->sem_perm); 340 341 /* Prevent parallel simple ops */ 342 complexmode_enter(sma); 343 return SEM_GLOBAL_LOCK; 344 } 345 346 /* 347 * Only one semaphore affected - try to optimize locking. 348 * Optimized locking is possible if no complex operation 349 * is either enqueued or processed right now. 350 * 351 * Both facts are tracked by use_global_mode. 352 */ 353 sem = &sma->sems[sops->sem_num]; 354 355 /* 356 * Initial check for use_global_lock. Just an optimization, 357 * no locking, no memory barrier. 358 */ 359 if (!sma->use_global_lock) { 360 /* 361 * It appears that no complex operation is around. 362 * Acquire the per-semaphore lock. 363 */ 364 spin_lock(&sem->lock); 365 366 /* pairs with smp_store_release() */ 367 if (!smp_load_acquire(&sma->use_global_lock)) { 368 /* fast path successful! */ 369 return sops->sem_num; 370 } 371 spin_unlock(&sem->lock); 372 } 373 374 /* slow path: acquire the full lock */ 375 ipc_lock_object(&sma->sem_perm); 376 377 if (sma->use_global_lock == 0) { 378 /* 379 * The use_global_lock mode ended while we waited for 380 * sma->sem_perm.lock. Thus we must switch to locking 381 * with sem->lock. 382 * Unlike in the fast path, there is no need to recheck 383 * sma->use_global_lock after we have acquired sem->lock: 384 * We own sma->sem_perm.lock, thus use_global_lock cannot 385 * change. 386 */ 387 spin_lock(&sem->lock); 388 389 ipc_unlock_object(&sma->sem_perm); 390 return sops->sem_num; 391 } else { 392 /* 393 * Not a false alarm, thus continue to use the global lock 394 * mode. No need for complexmode_enter(), this was done by 395 * the caller that has set use_global_mode to non-zero. 396 */ 397 return SEM_GLOBAL_LOCK; 398 } 399 } 400 401 static inline void sem_unlock(struct sem_array *sma, int locknum) 402 { 403 if (locknum == SEM_GLOBAL_LOCK) { 404 unmerge_queues(sma); 405 complexmode_tryleave(sma); 406 ipc_unlock_object(&sma->sem_perm); 407 } else { 408 struct sem *sem = &sma->sems[locknum]; 409 spin_unlock(&sem->lock); 410 } 411 } 412 413 /* 414 * sem_lock_(check_) routines are called in the paths where the rwsem 415 * is not held. 416 * 417 * The caller holds the RCU read lock. 418 */ 419 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 420 { 421 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 422 423 if (IS_ERR(ipcp)) 424 return ERR_CAST(ipcp); 425 426 return container_of(ipcp, struct sem_array, sem_perm); 427 } 428 429 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 430 int id) 431 { 432 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 433 434 if (IS_ERR(ipcp)) 435 return ERR_CAST(ipcp); 436 437 return container_of(ipcp, struct sem_array, sem_perm); 438 } 439 440 static inline void sem_lock_and_putref(struct sem_array *sma) 441 { 442 sem_lock(sma, NULL, -1); 443 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 444 } 445 446 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 447 { 448 ipc_rmid(&sem_ids(ns), &s->sem_perm); 449 } 450 451 static struct sem_array *sem_alloc(size_t nsems) 452 { 453 struct sem_array *sma; 454 size_t size; 455 456 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 457 return NULL; 458 459 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 460 sma = kvmalloc(size, GFP_KERNEL); 461 if (unlikely(!sma)) 462 return NULL; 463 464 memset(sma, 0, size); 465 466 return sma; 467 } 468 469 /** 470 * newary - Create a new semaphore set 471 * @ns: namespace 472 * @params: ptr to the structure that contains key, semflg and nsems 473 * 474 * Called with sem_ids.rwsem held (as a writer) 475 */ 476 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 477 { 478 int retval; 479 struct sem_array *sma; 480 key_t key = params->key; 481 int nsems = params->u.nsems; 482 int semflg = params->flg; 483 int i; 484 485 if (!nsems) 486 return -EINVAL; 487 if (ns->used_sems + nsems > ns->sc_semmns) 488 return -ENOSPC; 489 490 sma = sem_alloc(nsems); 491 if (!sma) 492 return -ENOMEM; 493 494 sma->sem_perm.mode = (semflg & S_IRWXUGO); 495 sma->sem_perm.key = key; 496 497 sma->sem_perm.security = NULL; 498 retval = security_sem_alloc(sma); 499 if (retval) { 500 kvfree(sma); 501 return retval; 502 } 503 504 for (i = 0; i < nsems; i++) { 505 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 506 INIT_LIST_HEAD(&sma->sems[i].pending_const); 507 spin_lock_init(&sma->sems[i].lock); 508 } 509 510 sma->complex_count = 0; 511 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 512 INIT_LIST_HEAD(&sma->pending_alter); 513 INIT_LIST_HEAD(&sma->pending_const); 514 INIT_LIST_HEAD(&sma->list_id); 515 sma->sem_nsems = nsems; 516 sma->sem_ctime = ktime_get_real_seconds(); 517 518 /* ipc_addid() locks sma upon success. */ 519 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 520 if (retval < 0) { 521 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 522 return retval; 523 } 524 ns->used_sems += nsems; 525 526 sem_unlock(sma, -1); 527 rcu_read_unlock(); 528 529 return sma->sem_perm.id; 530 } 531 532 533 /* 534 * Called with sem_ids.rwsem and ipcp locked. 535 */ 536 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 537 { 538 struct sem_array *sma; 539 540 sma = container_of(ipcp, struct sem_array, sem_perm); 541 return security_sem_associate(sma, semflg); 542 } 543 544 /* 545 * Called with sem_ids.rwsem and ipcp locked. 546 */ 547 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 548 struct ipc_params *params) 549 { 550 struct sem_array *sma; 551 552 sma = container_of(ipcp, struct sem_array, sem_perm); 553 if (params->u.nsems > sma->sem_nsems) 554 return -EINVAL; 555 556 return 0; 557 } 558 559 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 560 { 561 struct ipc_namespace *ns; 562 static const struct ipc_ops sem_ops = { 563 .getnew = newary, 564 .associate = sem_security, 565 .more_checks = sem_more_checks, 566 }; 567 struct ipc_params sem_params; 568 569 ns = current->nsproxy->ipc_ns; 570 571 if (nsems < 0 || nsems > ns->sc_semmsl) 572 return -EINVAL; 573 574 sem_params.key = key; 575 sem_params.flg = semflg; 576 sem_params.u.nsems = nsems; 577 578 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 579 } 580 581 /** 582 * perform_atomic_semop[_slow] - Attempt to perform semaphore 583 * operations on a given array. 584 * @sma: semaphore array 585 * @q: struct sem_queue that describes the operation 586 * 587 * Caller blocking are as follows, based the value 588 * indicated by the semaphore operation (sem_op): 589 * 590 * (1) >0 never blocks. 591 * (2) 0 (wait-for-zero operation): semval is non-zero. 592 * (3) <0 attempting to decrement semval to a value smaller than zero. 593 * 594 * Returns 0 if the operation was possible. 595 * Returns 1 if the operation is impossible, the caller must sleep. 596 * Returns <0 for error codes. 597 */ 598 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 599 { 600 int result, sem_op, nsops, pid; 601 struct sembuf *sop; 602 struct sem *curr; 603 struct sembuf *sops; 604 struct sem_undo *un; 605 606 sops = q->sops; 607 nsops = q->nsops; 608 un = q->undo; 609 610 for (sop = sops; sop < sops + nsops; sop++) { 611 curr = &sma->sems[sop->sem_num]; 612 sem_op = sop->sem_op; 613 result = curr->semval; 614 615 if (!sem_op && result) 616 goto would_block; 617 618 result += sem_op; 619 if (result < 0) 620 goto would_block; 621 if (result > SEMVMX) 622 goto out_of_range; 623 624 if (sop->sem_flg & SEM_UNDO) { 625 int undo = un->semadj[sop->sem_num] - sem_op; 626 /* Exceeding the undo range is an error. */ 627 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 628 goto out_of_range; 629 un->semadj[sop->sem_num] = undo; 630 } 631 632 curr->semval = result; 633 } 634 635 sop--; 636 pid = q->pid; 637 while (sop >= sops) { 638 sma->sems[sop->sem_num].sempid = pid; 639 sop--; 640 } 641 642 return 0; 643 644 out_of_range: 645 result = -ERANGE; 646 goto undo; 647 648 would_block: 649 q->blocking = sop; 650 651 if (sop->sem_flg & IPC_NOWAIT) 652 result = -EAGAIN; 653 else 654 result = 1; 655 656 undo: 657 sop--; 658 while (sop >= sops) { 659 sem_op = sop->sem_op; 660 sma->sems[sop->sem_num].semval -= sem_op; 661 if (sop->sem_flg & SEM_UNDO) 662 un->semadj[sop->sem_num] += sem_op; 663 sop--; 664 } 665 666 return result; 667 } 668 669 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 670 { 671 int result, sem_op, nsops; 672 struct sembuf *sop; 673 struct sem *curr; 674 struct sembuf *sops; 675 struct sem_undo *un; 676 677 sops = q->sops; 678 nsops = q->nsops; 679 un = q->undo; 680 681 if (unlikely(q->dupsop)) 682 return perform_atomic_semop_slow(sma, q); 683 684 /* 685 * We scan the semaphore set twice, first to ensure that the entire 686 * operation can succeed, therefore avoiding any pointless writes 687 * to shared memory and having to undo such changes in order to block 688 * until the operations can go through. 689 */ 690 for (sop = sops; sop < sops + nsops; sop++) { 691 curr = &sma->sems[sop->sem_num]; 692 sem_op = sop->sem_op; 693 result = curr->semval; 694 695 if (!sem_op && result) 696 goto would_block; /* wait-for-zero */ 697 698 result += sem_op; 699 if (result < 0) 700 goto would_block; 701 702 if (result > SEMVMX) 703 return -ERANGE; 704 705 if (sop->sem_flg & SEM_UNDO) { 706 int undo = un->semadj[sop->sem_num] - sem_op; 707 708 /* Exceeding the undo range is an error. */ 709 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 710 return -ERANGE; 711 } 712 } 713 714 for (sop = sops; sop < sops + nsops; sop++) { 715 curr = &sma->sems[sop->sem_num]; 716 sem_op = sop->sem_op; 717 result = curr->semval; 718 719 if (sop->sem_flg & SEM_UNDO) { 720 int undo = un->semadj[sop->sem_num] - sem_op; 721 722 un->semadj[sop->sem_num] = undo; 723 } 724 curr->semval += sem_op; 725 curr->sempid = q->pid; 726 } 727 728 return 0; 729 730 would_block: 731 q->blocking = sop; 732 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 733 } 734 735 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 736 struct wake_q_head *wake_q) 737 { 738 wake_q_add(wake_q, q->sleeper); 739 /* 740 * Rely on the above implicit barrier, such that we can 741 * ensure that we hold reference to the task before setting 742 * q->status. Otherwise we could race with do_exit if the 743 * task is awoken by an external event before calling 744 * wake_up_process(). 745 */ 746 WRITE_ONCE(q->status, error); 747 } 748 749 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 750 { 751 list_del(&q->list); 752 if (q->nsops > 1) 753 sma->complex_count--; 754 } 755 756 /** check_restart(sma, q) 757 * @sma: semaphore array 758 * @q: the operation that just completed 759 * 760 * update_queue is O(N^2) when it restarts scanning the whole queue of 761 * waiting operations. Therefore this function checks if the restart is 762 * really necessary. It is called after a previously waiting operation 763 * modified the array. 764 * Note that wait-for-zero operations are handled without restart. 765 */ 766 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 767 { 768 /* pending complex alter operations are too difficult to analyse */ 769 if (!list_empty(&sma->pending_alter)) 770 return 1; 771 772 /* we were a sleeping complex operation. Too difficult */ 773 if (q->nsops > 1) 774 return 1; 775 776 /* It is impossible that someone waits for the new value: 777 * - complex operations always restart. 778 * - wait-for-zero are handled seperately. 779 * - q is a previously sleeping simple operation that 780 * altered the array. It must be a decrement, because 781 * simple increments never sleep. 782 * - If there are older (higher priority) decrements 783 * in the queue, then they have observed the original 784 * semval value and couldn't proceed. The operation 785 * decremented to value - thus they won't proceed either. 786 */ 787 return 0; 788 } 789 790 /** 791 * wake_const_ops - wake up non-alter tasks 792 * @sma: semaphore array. 793 * @semnum: semaphore that was modified. 794 * @wake_q: lockless wake-queue head. 795 * 796 * wake_const_ops must be called after a semaphore in a semaphore array 797 * was set to 0. If complex const operations are pending, wake_const_ops must 798 * be called with semnum = -1, as well as with the number of each modified 799 * semaphore. 800 * The tasks that must be woken up are added to @wake_q. The return code 801 * is stored in q->pid. 802 * The function returns 1 if at least one operation was completed successfully. 803 */ 804 static int wake_const_ops(struct sem_array *sma, int semnum, 805 struct wake_q_head *wake_q) 806 { 807 struct sem_queue *q, *tmp; 808 struct list_head *pending_list; 809 int semop_completed = 0; 810 811 if (semnum == -1) 812 pending_list = &sma->pending_const; 813 else 814 pending_list = &sma->sems[semnum].pending_const; 815 816 list_for_each_entry_safe(q, tmp, pending_list, list) { 817 int error = perform_atomic_semop(sma, q); 818 819 if (error > 0) 820 continue; 821 /* operation completed, remove from queue & wakeup */ 822 unlink_queue(sma, q); 823 824 wake_up_sem_queue_prepare(q, error, wake_q); 825 if (error == 0) 826 semop_completed = 1; 827 } 828 829 return semop_completed; 830 } 831 832 /** 833 * do_smart_wakeup_zero - wakeup all wait for zero tasks 834 * @sma: semaphore array 835 * @sops: operations that were performed 836 * @nsops: number of operations 837 * @wake_q: lockless wake-queue head 838 * 839 * Checks all required queue for wait-for-zero operations, based 840 * on the actual changes that were performed on the semaphore array. 841 * The function returns 1 if at least one operation was completed successfully. 842 */ 843 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 844 int nsops, struct wake_q_head *wake_q) 845 { 846 int i; 847 int semop_completed = 0; 848 int got_zero = 0; 849 850 /* first: the per-semaphore queues, if known */ 851 if (sops) { 852 for (i = 0; i < nsops; i++) { 853 int num = sops[i].sem_num; 854 855 if (sma->sems[num].semval == 0) { 856 got_zero = 1; 857 semop_completed |= wake_const_ops(sma, num, wake_q); 858 } 859 } 860 } else { 861 /* 862 * No sops means modified semaphores not known. 863 * Assume all were changed. 864 */ 865 for (i = 0; i < sma->sem_nsems; i++) { 866 if (sma->sems[i].semval == 0) { 867 got_zero = 1; 868 semop_completed |= wake_const_ops(sma, i, wake_q); 869 } 870 } 871 } 872 /* 873 * If one of the modified semaphores got 0, 874 * then check the global queue, too. 875 */ 876 if (got_zero) 877 semop_completed |= wake_const_ops(sma, -1, wake_q); 878 879 return semop_completed; 880 } 881 882 883 /** 884 * update_queue - look for tasks that can be completed. 885 * @sma: semaphore array. 886 * @semnum: semaphore that was modified. 887 * @wake_q: lockless wake-queue head. 888 * 889 * update_queue must be called after a semaphore in a semaphore array 890 * was modified. If multiple semaphores were modified, update_queue must 891 * be called with semnum = -1, as well as with the number of each modified 892 * semaphore. 893 * The tasks that must be woken up are added to @wake_q. The return code 894 * is stored in q->pid. 895 * The function internally checks if const operations can now succeed. 896 * 897 * The function return 1 if at least one semop was completed successfully. 898 */ 899 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 900 { 901 struct sem_queue *q, *tmp; 902 struct list_head *pending_list; 903 int semop_completed = 0; 904 905 if (semnum == -1) 906 pending_list = &sma->pending_alter; 907 else 908 pending_list = &sma->sems[semnum].pending_alter; 909 910 again: 911 list_for_each_entry_safe(q, tmp, pending_list, list) { 912 int error, restart; 913 914 /* If we are scanning the single sop, per-semaphore list of 915 * one semaphore and that semaphore is 0, then it is not 916 * necessary to scan further: simple increments 917 * that affect only one entry succeed immediately and cannot 918 * be in the per semaphore pending queue, and decrements 919 * cannot be successful if the value is already 0. 920 */ 921 if (semnum != -1 && sma->sems[semnum].semval == 0) 922 break; 923 924 error = perform_atomic_semop(sma, q); 925 926 /* Does q->sleeper still need to sleep? */ 927 if (error > 0) 928 continue; 929 930 unlink_queue(sma, q); 931 932 if (error) { 933 restart = 0; 934 } else { 935 semop_completed = 1; 936 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 937 restart = check_restart(sma, q); 938 } 939 940 wake_up_sem_queue_prepare(q, error, wake_q); 941 if (restart) 942 goto again; 943 } 944 return semop_completed; 945 } 946 947 /** 948 * set_semotime - set sem_otime 949 * @sma: semaphore array 950 * @sops: operations that modified the array, may be NULL 951 * 952 * sem_otime is replicated to avoid cache line trashing. 953 * This function sets one instance to the current time. 954 */ 955 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 956 { 957 if (sops == NULL) { 958 sma->sems[0].sem_otime = get_seconds(); 959 } else { 960 sma->sems[sops[0].sem_num].sem_otime = 961 get_seconds(); 962 } 963 } 964 965 /** 966 * do_smart_update - optimized update_queue 967 * @sma: semaphore array 968 * @sops: operations that were performed 969 * @nsops: number of operations 970 * @otime: force setting otime 971 * @wake_q: lockless wake-queue head 972 * 973 * do_smart_update() does the required calls to update_queue and wakeup_zero, 974 * based on the actual changes that were performed on the semaphore array. 975 * Note that the function does not do the actual wake-up: the caller is 976 * responsible for calling wake_up_q(). 977 * It is safe to perform this call after dropping all locks. 978 */ 979 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 980 int otime, struct wake_q_head *wake_q) 981 { 982 int i; 983 984 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 985 986 if (!list_empty(&sma->pending_alter)) { 987 /* semaphore array uses the global queue - just process it. */ 988 otime |= update_queue(sma, -1, wake_q); 989 } else { 990 if (!sops) { 991 /* 992 * No sops, thus the modified semaphores are not 993 * known. Check all. 994 */ 995 for (i = 0; i < sma->sem_nsems; i++) 996 otime |= update_queue(sma, i, wake_q); 997 } else { 998 /* 999 * Check the semaphores that were increased: 1000 * - No complex ops, thus all sleeping ops are 1001 * decrease. 1002 * - if we decreased the value, then any sleeping 1003 * semaphore ops wont be able to run: If the 1004 * previous value was too small, then the new 1005 * value will be too small, too. 1006 */ 1007 for (i = 0; i < nsops; i++) { 1008 if (sops[i].sem_op > 0) { 1009 otime |= update_queue(sma, 1010 sops[i].sem_num, wake_q); 1011 } 1012 } 1013 } 1014 } 1015 if (otime) 1016 set_semotime(sma, sops); 1017 } 1018 1019 /* 1020 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1021 */ 1022 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1023 bool count_zero) 1024 { 1025 struct sembuf *sop = q->blocking; 1026 1027 /* 1028 * Linux always (since 0.99.10) reported a task as sleeping on all 1029 * semaphores. This violates SUS, therefore it was changed to the 1030 * standard compliant behavior. 1031 * Give the administrators a chance to notice that an application 1032 * might misbehave because it relies on the Linux behavior. 1033 */ 1034 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1035 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1036 current->comm, task_pid_nr(current)); 1037 1038 if (sop->sem_num != semnum) 1039 return 0; 1040 1041 if (count_zero && sop->sem_op == 0) 1042 return 1; 1043 if (!count_zero && sop->sem_op < 0) 1044 return 1; 1045 1046 return 0; 1047 } 1048 1049 /* The following counts are associated to each semaphore: 1050 * semncnt number of tasks waiting on semval being nonzero 1051 * semzcnt number of tasks waiting on semval being zero 1052 * 1053 * Per definition, a task waits only on the semaphore of the first semop 1054 * that cannot proceed, even if additional operation would block, too. 1055 */ 1056 static int count_semcnt(struct sem_array *sma, ushort semnum, 1057 bool count_zero) 1058 { 1059 struct list_head *l; 1060 struct sem_queue *q; 1061 int semcnt; 1062 1063 semcnt = 0; 1064 /* First: check the simple operations. They are easy to evaluate */ 1065 if (count_zero) 1066 l = &sma->sems[semnum].pending_const; 1067 else 1068 l = &sma->sems[semnum].pending_alter; 1069 1070 list_for_each_entry(q, l, list) { 1071 /* all task on a per-semaphore list sleep on exactly 1072 * that semaphore 1073 */ 1074 semcnt++; 1075 } 1076 1077 /* Then: check the complex operations. */ 1078 list_for_each_entry(q, &sma->pending_alter, list) { 1079 semcnt += check_qop(sma, semnum, q, count_zero); 1080 } 1081 if (count_zero) { 1082 list_for_each_entry(q, &sma->pending_const, list) { 1083 semcnt += check_qop(sma, semnum, q, count_zero); 1084 } 1085 } 1086 return semcnt; 1087 } 1088 1089 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1090 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1091 * remains locked on exit. 1092 */ 1093 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1094 { 1095 struct sem_undo *un, *tu; 1096 struct sem_queue *q, *tq; 1097 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1098 int i; 1099 DEFINE_WAKE_Q(wake_q); 1100 1101 /* Free the existing undo structures for this semaphore set. */ 1102 ipc_assert_locked_object(&sma->sem_perm); 1103 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1104 list_del(&un->list_id); 1105 spin_lock(&un->ulp->lock); 1106 un->semid = -1; 1107 list_del_rcu(&un->list_proc); 1108 spin_unlock(&un->ulp->lock); 1109 kfree_rcu(un, rcu); 1110 } 1111 1112 /* Wake up all pending processes and let them fail with EIDRM. */ 1113 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1114 unlink_queue(sma, q); 1115 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1116 } 1117 1118 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1119 unlink_queue(sma, q); 1120 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1121 } 1122 for (i = 0; i < sma->sem_nsems; i++) { 1123 struct sem *sem = &sma->sems[i]; 1124 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1125 unlink_queue(sma, q); 1126 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1127 } 1128 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1129 unlink_queue(sma, q); 1130 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1131 } 1132 } 1133 1134 /* Remove the semaphore set from the IDR */ 1135 sem_rmid(ns, sma); 1136 sem_unlock(sma, -1); 1137 rcu_read_unlock(); 1138 1139 wake_up_q(&wake_q); 1140 ns->used_sems -= sma->sem_nsems; 1141 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1142 } 1143 1144 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1145 { 1146 switch (version) { 1147 case IPC_64: 1148 return copy_to_user(buf, in, sizeof(*in)); 1149 case IPC_OLD: 1150 { 1151 struct semid_ds out; 1152 1153 memset(&out, 0, sizeof(out)); 1154 1155 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1156 1157 out.sem_otime = in->sem_otime; 1158 out.sem_ctime = in->sem_ctime; 1159 out.sem_nsems = in->sem_nsems; 1160 1161 return copy_to_user(buf, &out, sizeof(out)); 1162 } 1163 default: 1164 return -EINVAL; 1165 } 1166 } 1167 1168 static time64_t get_semotime(struct sem_array *sma) 1169 { 1170 int i; 1171 time64_t res; 1172 1173 res = sma->sems[0].sem_otime; 1174 for (i = 1; i < sma->sem_nsems; i++) { 1175 time64_t to = sma->sems[i].sem_otime; 1176 1177 if (to > res) 1178 res = to; 1179 } 1180 return res; 1181 } 1182 1183 static int semctl_stat(struct ipc_namespace *ns, int semid, 1184 int cmd, struct semid64_ds *semid64) 1185 { 1186 struct sem_array *sma; 1187 int id = 0; 1188 int err; 1189 1190 memset(semid64, 0, sizeof(*semid64)); 1191 1192 rcu_read_lock(); 1193 if (cmd == SEM_STAT) { 1194 sma = sem_obtain_object(ns, semid); 1195 if (IS_ERR(sma)) { 1196 err = PTR_ERR(sma); 1197 goto out_unlock; 1198 } 1199 id = sma->sem_perm.id; 1200 } else { 1201 sma = sem_obtain_object_check(ns, semid); 1202 if (IS_ERR(sma)) { 1203 err = PTR_ERR(sma); 1204 goto out_unlock; 1205 } 1206 } 1207 1208 err = -EACCES; 1209 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1210 goto out_unlock; 1211 1212 err = security_sem_semctl(sma, cmd); 1213 if (err) 1214 goto out_unlock; 1215 1216 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1217 semid64->sem_otime = get_semotime(sma); 1218 semid64->sem_ctime = sma->sem_ctime; 1219 semid64->sem_nsems = sma->sem_nsems; 1220 rcu_read_unlock(); 1221 return id; 1222 1223 out_unlock: 1224 rcu_read_unlock(); 1225 return err; 1226 } 1227 1228 static int semctl_info(struct ipc_namespace *ns, int semid, 1229 int cmd, void __user *p) 1230 { 1231 struct seminfo seminfo; 1232 int max_id; 1233 int err; 1234 1235 err = security_sem_semctl(NULL, cmd); 1236 if (err) 1237 return err; 1238 1239 memset(&seminfo, 0, sizeof(seminfo)); 1240 seminfo.semmni = ns->sc_semmni; 1241 seminfo.semmns = ns->sc_semmns; 1242 seminfo.semmsl = ns->sc_semmsl; 1243 seminfo.semopm = ns->sc_semopm; 1244 seminfo.semvmx = SEMVMX; 1245 seminfo.semmnu = SEMMNU; 1246 seminfo.semmap = SEMMAP; 1247 seminfo.semume = SEMUME; 1248 down_read(&sem_ids(ns).rwsem); 1249 if (cmd == SEM_INFO) { 1250 seminfo.semusz = sem_ids(ns).in_use; 1251 seminfo.semaem = ns->used_sems; 1252 } else { 1253 seminfo.semusz = SEMUSZ; 1254 seminfo.semaem = SEMAEM; 1255 } 1256 max_id = ipc_get_maxid(&sem_ids(ns)); 1257 up_read(&sem_ids(ns).rwsem); 1258 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1259 return -EFAULT; 1260 return (max_id < 0) ? 0 : max_id; 1261 } 1262 1263 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1264 int val) 1265 { 1266 struct sem_undo *un; 1267 struct sem_array *sma; 1268 struct sem *curr; 1269 int err; 1270 DEFINE_WAKE_Q(wake_q); 1271 1272 if (val > SEMVMX || val < 0) 1273 return -ERANGE; 1274 1275 rcu_read_lock(); 1276 sma = sem_obtain_object_check(ns, semid); 1277 if (IS_ERR(sma)) { 1278 rcu_read_unlock(); 1279 return PTR_ERR(sma); 1280 } 1281 1282 if (semnum < 0 || semnum >= sma->sem_nsems) { 1283 rcu_read_unlock(); 1284 return -EINVAL; 1285 } 1286 1287 1288 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1289 rcu_read_unlock(); 1290 return -EACCES; 1291 } 1292 1293 err = security_sem_semctl(sma, SETVAL); 1294 if (err) { 1295 rcu_read_unlock(); 1296 return -EACCES; 1297 } 1298 1299 sem_lock(sma, NULL, -1); 1300 1301 if (!ipc_valid_object(&sma->sem_perm)) { 1302 sem_unlock(sma, -1); 1303 rcu_read_unlock(); 1304 return -EIDRM; 1305 } 1306 1307 curr = &sma->sems[semnum]; 1308 1309 ipc_assert_locked_object(&sma->sem_perm); 1310 list_for_each_entry(un, &sma->list_id, list_id) 1311 un->semadj[semnum] = 0; 1312 1313 curr->semval = val; 1314 curr->sempid = task_tgid_vnr(current); 1315 sma->sem_ctime = ktime_get_real_seconds(); 1316 /* maybe some queued-up processes were waiting for this */ 1317 do_smart_update(sma, NULL, 0, 0, &wake_q); 1318 sem_unlock(sma, -1); 1319 rcu_read_unlock(); 1320 wake_up_q(&wake_q); 1321 return 0; 1322 } 1323 1324 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1325 int cmd, void __user *p) 1326 { 1327 struct sem_array *sma; 1328 struct sem *curr; 1329 int err, nsems; 1330 ushort fast_sem_io[SEMMSL_FAST]; 1331 ushort *sem_io = fast_sem_io; 1332 DEFINE_WAKE_Q(wake_q); 1333 1334 rcu_read_lock(); 1335 sma = sem_obtain_object_check(ns, semid); 1336 if (IS_ERR(sma)) { 1337 rcu_read_unlock(); 1338 return PTR_ERR(sma); 1339 } 1340 1341 nsems = sma->sem_nsems; 1342 1343 err = -EACCES; 1344 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1345 goto out_rcu_wakeup; 1346 1347 err = security_sem_semctl(sma, cmd); 1348 if (err) 1349 goto out_rcu_wakeup; 1350 1351 err = -EACCES; 1352 switch (cmd) { 1353 case GETALL: 1354 { 1355 ushort __user *array = p; 1356 int i; 1357 1358 sem_lock(sma, NULL, -1); 1359 if (!ipc_valid_object(&sma->sem_perm)) { 1360 err = -EIDRM; 1361 goto out_unlock; 1362 } 1363 if (nsems > SEMMSL_FAST) { 1364 if (!ipc_rcu_getref(&sma->sem_perm)) { 1365 err = -EIDRM; 1366 goto out_unlock; 1367 } 1368 sem_unlock(sma, -1); 1369 rcu_read_unlock(); 1370 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1371 GFP_KERNEL); 1372 if (sem_io == NULL) { 1373 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1374 return -ENOMEM; 1375 } 1376 1377 rcu_read_lock(); 1378 sem_lock_and_putref(sma); 1379 if (!ipc_valid_object(&sma->sem_perm)) { 1380 err = -EIDRM; 1381 goto out_unlock; 1382 } 1383 } 1384 for (i = 0; i < sma->sem_nsems; i++) 1385 sem_io[i] = sma->sems[i].semval; 1386 sem_unlock(sma, -1); 1387 rcu_read_unlock(); 1388 err = 0; 1389 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1390 err = -EFAULT; 1391 goto out_free; 1392 } 1393 case SETALL: 1394 { 1395 int i; 1396 struct sem_undo *un; 1397 1398 if (!ipc_rcu_getref(&sma->sem_perm)) { 1399 err = -EIDRM; 1400 goto out_rcu_wakeup; 1401 } 1402 rcu_read_unlock(); 1403 1404 if (nsems > SEMMSL_FAST) { 1405 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1406 GFP_KERNEL); 1407 if (sem_io == NULL) { 1408 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1409 return -ENOMEM; 1410 } 1411 } 1412 1413 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1414 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1415 err = -EFAULT; 1416 goto out_free; 1417 } 1418 1419 for (i = 0; i < nsems; i++) { 1420 if (sem_io[i] > SEMVMX) { 1421 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1422 err = -ERANGE; 1423 goto out_free; 1424 } 1425 } 1426 rcu_read_lock(); 1427 sem_lock_and_putref(sma); 1428 if (!ipc_valid_object(&sma->sem_perm)) { 1429 err = -EIDRM; 1430 goto out_unlock; 1431 } 1432 1433 for (i = 0; i < nsems; i++) { 1434 sma->sems[i].semval = sem_io[i]; 1435 sma->sems[i].sempid = task_tgid_vnr(current); 1436 } 1437 1438 ipc_assert_locked_object(&sma->sem_perm); 1439 list_for_each_entry(un, &sma->list_id, list_id) { 1440 for (i = 0; i < nsems; i++) 1441 un->semadj[i] = 0; 1442 } 1443 sma->sem_ctime = ktime_get_real_seconds(); 1444 /* maybe some queued-up processes were waiting for this */ 1445 do_smart_update(sma, NULL, 0, 0, &wake_q); 1446 err = 0; 1447 goto out_unlock; 1448 } 1449 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1450 } 1451 err = -EINVAL; 1452 if (semnum < 0 || semnum >= nsems) 1453 goto out_rcu_wakeup; 1454 1455 sem_lock(sma, NULL, -1); 1456 if (!ipc_valid_object(&sma->sem_perm)) { 1457 err = -EIDRM; 1458 goto out_unlock; 1459 } 1460 curr = &sma->sems[semnum]; 1461 1462 switch (cmd) { 1463 case GETVAL: 1464 err = curr->semval; 1465 goto out_unlock; 1466 case GETPID: 1467 err = curr->sempid; 1468 goto out_unlock; 1469 case GETNCNT: 1470 err = count_semcnt(sma, semnum, 0); 1471 goto out_unlock; 1472 case GETZCNT: 1473 err = count_semcnt(sma, semnum, 1); 1474 goto out_unlock; 1475 } 1476 1477 out_unlock: 1478 sem_unlock(sma, -1); 1479 out_rcu_wakeup: 1480 rcu_read_unlock(); 1481 wake_up_q(&wake_q); 1482 out_free: 1483 if (sem_io != fast_sem_io) 1484 kvfree(sem_io); 1485 return err; 1486 } 1487 1488 static inline unsigned long 1489 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1490 { 1491 switch (version) { 1492 case IPC_64: 1493 if (copy_from_user(out, buf, sizeof(*out))) 1494 return -EFAULT; 1495 return 0; 1496 case IPC_OLD: 1497 { 1498 struct semid_ds tbuf_old; 1499 1500 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1501 return -EFAULT; 1502 1503 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1504 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1505 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1506 1507 return 0; 1508 } 1509 default: 1510 return -EINVAL; 1511 } 1512 } 1513 1514 /* 1515 * This function handles some semctl commands which require the rwsem 1516 * to be held in write mode. 1517 * NOTE: no locks must be held, the rwsem is taken inside this function. 1518 */ 1519 static int semctl_down(struct ipc_namespace *ns, int semid, 1520 int cmd, struct semid64_ds *semid64) 1521 { 1522 struct sem_array *sma; 1523 int err; 1524 struct kern_ipc_perm *ipcp; 1525 1526 down_write(&sem_ids(ns).rwsem); 1527 rcu_read_lock(); 1528 1529 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1530 &semid64->sem_perm, 0); 1531 if (IS_ERR(ipcp)) { 1532 err = PTR_ERR(ipcp); 1533 goto out_unlock1; 1534 } 1535 1536 sma = container_of(ipcp, struct sem_array, sem_perm); 1537 1538 err = security_sem_semctl(sma, cmd); 1539 if (err) 1540 goto out_unlock1; 1541 1542 switch (cmd) { 1543 case IPC_RMID: 1544 sem_lock(sma, NULL, -1); 1545 /* freeary unlocks the ipc object and rcu */ 1546 freeary(ns, ipcp); 1547 goto out_up; 1548 case IPC_SET: 1549 sem_lock(sma, NULL, -1); 1550 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1551 if (err) 1552 goto out_unlock0; 1553 sma->sem_ctime = ktime_get_real_seconds(); 1554 break; 1555 default: 1556 err = -EINVAL; 1557 goto out_unlock1; 1558 } 1559 1560 out_unlock0: 1561 sem_unlock(sma, -1); 1562 out_unlock1: 1563 rcu_read_unlock(); 1564 out_up: 1565 up_write(&sem_ids(ns).rwsem); 1566 return err; 1567 } 1568 1569 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1570 { 1571 int version; 1572 struct ipc_namespace *ns; 1573 void __user *p = (void __user *)arg; 1574 struct semid64_ds semid64; 1575 int err; 1576 1577 if (semid < 0) 1578 return -EINVAL; 1579 1580 version = ipc_parse_version(&cmd); 1581 ns = current->nsproxy->ipc_ns; 1582 1583 switch (cmd) { 1584 case IPC_INFO: 1585 case SEM_INFO: 1586 return semctl_info(ns, semid, cmd, p); 1587 case IPC_STAT: 1588 case SEM_STAT: 1589 err = semctl_stat(ns, semid, cmd, &semid64); 1590 if (err < 0) 1591 return err; 1592 if (copy_semid_to_user(p, &semid64, version)) 1593 err = -EFAULT; 1594 return err; 1595 case GETALL: 1596 case GETVAL: 1597 case GETPID: 1598 case GETNCNT: 1599 case GETZCNT: 1600 case SETALL: 1601 return semctl_main(ns, semid, semnum, cmd, p); 1602 case SETVAL: { 1603 int val; 1604 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1605 /* big-endian 64bit */ 1606 val = arg >> 32; 1607 #else 1608 /* 32bit or little-endian 64bit */ 1609 val = arg; 1610 #endif 1611 return semctl_setval(ns, semid, semnum, val); 1612 } 1613 case IPC_SET: 1614 if (copy_semid_from_user(&semid64, p, version)) 1615 return -EFAULT; 1616 case IPC_RMID: 1617 return semctl_down(ns, semid, cmd, &semid64); 1618 default: 1619 return -EINVAL; 1620 } 1621 } 1622 1623 #ifdef CONFIG_COMPAT 1624 1625 struct compat_semid_ds { 1626 struct compat_ipc_perm sem_perm; 1627 compat_time_t sem_otime; 1628 compat_time_t sem_ctime; 1629 compat_uptr_t sem_base; 1630 compat_uptr_t sem_pending; 1631 compat_uptr_t sem_pending_last; 1632 compat_uptr_t undo; 1633 unsigned short sem_nsems; 1634 }; 1635 1636 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1637 int version) 1638 { 1639 memset(out, 0, sizeof(*out)); 1640 if (version == IPC_64) { 1641 struct compat_semid64_ds __user *p = buf; 1642 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1643 } else { 1644 struct compat_semid_ds __user *p = buf; 1645 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1646 } 1647 } 1648 1649 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1650 int version) 1651 { 1652 if (version == IPC_64) { 1653 struct compat_semid64_ds v; 1654 memset(&v, 0, sizeof(v)); 1655 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1656 v.sem_otime = in->sem_otime; 1657 v.sem_ctime = in->sem_ctime; 1658 v.sem_nsems = in->sem_nsems; 1659 return copy_to_user(buf, &v, sizeof(v)); 1660 } else { 1661 struct compat_semid_ds v; 1662 memset(&v, 0, sizeof(v)); 1663 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1664 v.sem_otime = in->sem_otime; 1665 v.sem_ctime = in->sem_ctime; 1666 v.sem_nsems = in->sem_nsems; 1667 return copy_to_user(buf, &v, sizeof(v)); 1668 } 1669 } 1670 1671 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1672 { 1673 void __user *p = compat_ptr(arg); 1674 struct ipc_namespace *ns; 1675 struct semid64_ds semid64; 1676 int version = compat_ipc_parse_version(&cmd); 1677 int err; 1678 1679 ns = current->nsproxy->ipc_ns; 1680 1681 if (semid < 0) 1682 return -EINVAL; 1683 1684 switch (cmd & (~IPC_64)) { 1685 case IPC_INFO: 1686 case SEM_INFO: 1687 return semctl_info(ns, semid, cmd, p); 1688 case IPC_STAT: 1689 case SEM_STAT: 1690 err = semctl_stat(ns, semid, cmd, &semid64); 1691 if (err < 0) 1692 return err; 1693 if (copy_compat_semid_to_user(p, &semid64, version)) 1694 err = -EFAULT; 1695 return err; 1696 case GETVAL: 1697 case GETPID: 1698 case GETNCNT: 1699 case GETZCNT: 1700 case GETALL: 1701 case SETALL: 1702 return semctl_main(ns, semid, semnum, cmd, p); 1703 case SETVAL: 1704 return semctl_setval(ns, semid, semnum, arg); 1705 case IPC_SET: 1706 if (copy_compat_semid_from_user(&semid64, p, version)) 1707 return -EFAULT; 1708 /* fallthru */ 1709 case IPC_RMID: 1710 return semctl_down(ns, semid, cmd, &semid64); 1711 default: 1712 return -EINVAL; 1713 } 1714 } 1715 #endif 1716 1717 /* If the task doesn't already have a undo_list, then allocate one 1718 * here. We guarantee there is only one thread using this undo list, 1719 * and current is THE ONE 1720 * 1721 * If this allocation and assignment succeeds, but later 1722 * portions of this code fail, there is no need to free the sem_undo_list. 1723 * Just let it stay associated with the task, and it'll be freed later 1724 * at exit time. 1725 * 1726 * This can block, so callers must hold no locks. 1727 */ 1728 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1729 { 1730 struct sem_undo_list *undo_list; 1731 1732 undo_list = current->sysvsem.undo_list; 1733 if (!undo_list) { 1734 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1735 if (undo_list == NULL) 1736 return -ENOMEM; 1737 spin_lock_init(&undo_list->lock); 1738 refcount_set(&undo_list->refcnt, 1); 1739 INIT_LIST_HEAD(&undo_list->list_proc); 1740 1741 current->sysvsem.undo_list = undo_list; 1742 } 1743 *undo_listp = undo_list; 1744 return 0; 1745 } 1746 1747 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1748 { 1749 struct sem_undo *un; 1750 1751 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1752 if (un->semid == semid) 1753 return un; 1754 } 1755 return NULL; 1756 } 1757 1758 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1759 { 1760 struct sem_undo *un; 1761 1762 assert_spin_locked(&ulp->lock); 1763 1764 un = __lookup_undo(ulp, semid); 1765 if (un) { 1766 list_del_rcu(&un->list_proc); 1767 list_add_rcu(&un->list_proc, &ulp->list_proc); 1768 } 1769 return un; 1770 } 1771 1772 /** 1773 * find_alloc_undo - lookup (and if not present create) undo array 1774 * @ns: namespace 1775 * @semid: semaphore array id 1776 * 1777 * The function looks up (and if not present creates) the undo structure. 1778 * The size of the undo structure depends on the size of the semaphore 1779 * array, thus the alloc path is not that straightforward. 1780 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1781 * performs a rcu_read_lock(). 1782 */ 1783 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1784 { 1785 struct sem_array *sma; 1786 struct sem_undo_list *ulp; 1787 struct sem_undo *un, *new; 1788 int nsems, error; 1789 1790 error = get_undo_list(&ulp); 1791 if (error) 1792 return ERR_PTR(error); 1793 1794 rcu_read_lock(); 1795 spin_lock(&ulp->lock); 1796 un = lookup_undo(ulp, semid); 1797 spin_unlock(&ulp->lock); 1798 if (likely(un != NULL)) 1799 goto out; 1800 1801 /* no undo structure around - allocate one. */ 1802 /* step 1: figure out the size of the semaphore array */ 1803 sma = sem_obtain_object_check(ns, semid); 1804 if (IS_ERR(sma)) { 1805 rcu_read_unlock(); 1806 return ERR_CAST(sma); 1807 } 1808 1809 nsems = sma->sem_nsems; 1810 if (!ipc_rcu_getref(&sma->sem_perm)) { 1811 rcu_read_unlock(); 1812 un = ERR_PTR(-EIDRM); 1813 goto out; 1814 } 1815 rcu_read_unlock(); 1816 1817 /* step 2: allocate new undo structure */ 1818 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1819 if (!new) { 1820 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1821 return ERR_PTR(-ENOMEM); 1822 } 1823 1824 /* step 3: Acquire the lock on semaphore array */ 1825 rcu_read_lock(); 1826 sem_lock_and_putref(sma); 1827 if (!ipc_valid_object(&sma->sem_perm)) { 1828 sem_unlock(sma, -1); 1829 rcu_read_unlock(); 1830 kfree(new); 1831 un = ERR_PTR(-EIDRM); 1832 goto out; 1833 } 1834 spin_lock(&ulp->lock); 1835 1836 /* 1837 * step 4: check for races: did someone else allocate the undo struct? 1838 */ 1839 un = lookup_undo(ulp, semid); 1840 if (un) { 1841 kfree(new); 1842 goto success; 1843 } 1844 /* step 5: initialize & link new undo structure */ 1845 new->semadj = (short *) &new[1]; 1846 new->ulp = ulp; 1847 new->semid = semid; 1848 assert_spin_locked(&ulp->lock); 1849 list_add_rcu(&new->list_proc, &ulp->list_proc); 1850 ipc_assert_locked_object(&sma->sem_perm); 1851 list_add(&new->list_id, &sma->list_id); 1852 un = new; 1853 1854 success: 1855 spin_unlock(&ulp->lock); 1856 sem_unlock(sma, -1); 1857 out: 1858 return un; 1859 } 1860 1861 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1862 unsigned nsops, const struct timespec64 *timeout) 1863 { 1864 int error = -EINVAL; 1865 struct sem_array *sma; 1866 struct sembuf fast_sops[SEMOPM_FAST]; 1867 struct sembuf *sops = fast_sops, *sop; 1868 struct sem_undo *un; 1869 int max, locknum; 1870 bool undos = false, alter = false, dupsop = false; 1871 struct sem_queue queue; 1872 unsigned long dup = 0, jiffies_left = 0; 1873 struct ipc_namespace *ns; 1874 1875 ns = current->nsproxy->ipc_ns; 1876 1877 if (nsops < 1 || semid < 0) 1878 return -EINVAL; 1879 if (nsops > ns->sc_semopm) 1880 return -E2BIG; 1881 if (nsops > SEMOPM_FAST) { 1882 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1883 if (sops == NULL) 1884 return -ENOMEM; 1885 } 1886 1887 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1888 error = -EFAULT; 1889 goto out_free; 1890 } 1891 1892 if (timeout) { 1893 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1894 timeout->tv_nsec >= 1000000000L) { 1895 error = -EINVAL; 1896 goto out_free; 1897 } 1898 jiffies_left = timespec64_to_jiffies(timeout); 1899 } 1900 1901 max = 0; 1902 for (sop = sops; sop < sops + nsops; sop++) { 1903 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1904 1905 if (sop->sem_num >= max) 1906 max = sop->sem_num; 1907 if (sop->sem_flg & SEM_UNDO) 1908 undos = true; 1909 if (dup & mask) { 1910 /* 1911 * There was a previous alter access that appears 1912 * to have accessed the same semaphore, thus use 1913 * the dupsop logic. "appears", because the detection 1914 * can only check % BITS_PER_LONG. 1915 */ 1916 dupsop = true; 1917 } 1918 if (sop->sem_op != 0) { 1919 alter = true; 1920 dup |= mask; 1921 } 1922 } 1923 1924 if (undos) { 1925 /* On success, find_alloc_undo takes the rcu_read_lock */ 1926 un = find_alloc_undo(ns, semid); 1927 if (IS_ERR(un)) { 1928 error = PTR_ERR(un); 1929 goto out_free; 1930 } 1931 } else { 1932 un = NULL; 1933 rcu_read_lock(); 1934 } 1935 1936 sma = sem_obtain_object_check(ns, semid); 1937 if (IS_ERR(sma)) { 1938 rcu_read_unlock(); 1939 error = PTR_ERR(sma); 1940 goto out_free; 1941 } 1942 1943 error = -EFBIG; 1944 if (max >= sma->sem_nsems) { 1945 rcu_read_unlock(); 1946 goto out_free; 1947 } 1948 1949 error = -EACCES; 1950 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 1951 rcu_read_unlock(); 1952 goto out_free; 1953 } 1954 1955 error = security_sem_semop(sma, sops, nsops, alter); 1956 if (error) { 1957 rcu_read_unlock(); 1958 goto out_free; 1959 } 1960 1961 error = -EIDRM; 1962 locknum = sem_lock(sma, sops, nsops); 1963 /* 1964 * We eventually might perform the following check in a lockless 1965 * fashion, considering ipc_valid_object() locking constraints. 1966 * If nsops == 1 and there is no contention for sem_perm.lock, then 1967 * only a per-semaphore lock is held and it's OK to proceed with the 1968 * check below. More details on the fine grained locking scheme 1969 * entangled here and why it's RMID race safe on comments at sem_lock() 1970 */ 1971 if (!ipc_valid_object(&sma->sem_perm)) 1972 goto out_unlock_free; 1973 /* 1974 * semid identifiers are not unique - find_alloc_undo may have 1975 * allocated an undo structure, it was invalidated by an RMID 1976 * and now a new array with received the same id. Check and fail. 1977 * This case can be detected checking un->semid. The existence of 1978 * "un" itself is guaranteed by rcu. 1979 */ 1980 if (un && un->semid == -1) 1981 goto out_unlock_free; 1982 1983 queue.sops = sops; 1984 queue.nsops = nsops; 1985 queue.undo = un; 1986 queue.pid = task_tgid_vnr(current); 1987 queue.alter = alter; 1988 queue.dupsop = dupsop; 1989 1990 error = perform_atomic_semop(sma, &queue); 1991 if (error == 0) { /* non-blocking succesfull path */ 1992 DEFINE_WAKE_Q(wake_q); 1993 1994 /* 1995 * If the operation was successful, then do 1996 * the required updates. 1997 */ 1998 if (alter) 1999 do_smart_update(sma, sops, nsops, 1, &wake_q); 2000 else 2001 set_semotime(sma, sops); 2002 2003 sem_unlock(sma, locknum); 2004 rcu_read_unlock(); 2005 wake_up_q(&wake_q); 2006 2007 goto out_free; 2008 } 2009 if (error < 0) /* non-blocking error path */ 2010 goto out_unlock_free; 2011 2012 /* 2013 * We need to sleep on this operation, so we put the current 2014 * task into the pending queue and go to sleep. 2015 */ 2016 if (nsops == 1) { 2017 struct sem *curr; 2018 curr = &sma->sems[sops->sem_num]; 2019 2020 if (alter) { 2021 if (sma->complex_count) { 2022 list_add_tail(&queue.list, 2023 &sma->pending_alter); 2024 } else { 2025 2026 list_add_tail(&queue.list, 2027 &curr->pending_alter); 2028 } 2029 } else { 2030 list_add_tail(&queue.list, &curr->pending_const); 2031 } 2032 } else { 2033 if (!sma->complex_count) 2034 merge_queues(sma); 2035 2036 if (alter) 2037 list_add_tail(&queue.list, &sma->pending_alter); 2038 else 2039 list_add_tail(&queue.list, &sma->pending_const); 2040 2041 sma->complex_count++; 2042 } 2043 2044 do { 2045 queue.status = -EINTR; 2046 queue.sleeper = current; 2047 2048 __set_current_state(TASK_INTERRUPTIBLE); 2049 sem_unlock(sma, locknum); 2050 rcu_read_unlock(); 2051 2052 if (timeout) 2053 jiffies_left = schedule_timeout(jiffies_left); 2054 else 2055 schedule(); 2056 2057 /* 2058 * fastpath: the semop has completed, either successfully or 2059 * not, from the syscall pov, is quite irrelevant to us at this 2060 * point; we're done. 2061 * 2062 * We _do_ care, nonetheless, about being awoken by a signal or 2063 * spuriously. The queue.status is checked again in the 2064 * slowpath (aka after taking sem_lock), such that we can detect 2065 * scenarios where we were awakened externally, during the 2066 * window between wake_q_add() and wake_up_q(). 2067 */ 2068 error = READ_ONCE(queue.status); 2069 if (error != -EINTR) { 2070 /* 2071 * User space could assume that semop() is a memory 2072 * barrier: Without the mb(), the cpu could 2073 * speculatively read in userspace stale data that was 2074 * overwritten by the previous owner of the semaphore. 2075 */ 2076 smp_mb(); 2077 goto out_free; 2078 } 2079 2080 rcu_read_lock(); 2081 locknum = sem_lock(sma, sops, nsops); 2082 2083 if (!ipc_valid_object(&sma->sem_perm)) 2084 goto out_unlock_free; 2085 2086 error = READ_ONCE(queue.status); 2087 2088 /* 2089 * If queue.status != -EINTR we are woken up by another process. 2090 * Leave without unlink_queue(), but with sem_unlock(). 2091 */ 2092 if (error != -EINTR) 2093 goto out_unlock_free; 2094 2095 /* 2096 * If an interrupt occurred we have to clean up the queue. 2097 */ 2098 if (timeout && jiffies_left == 0) 2099 error = -EAGAIN; 2100 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2101 2102 unlink_queue(sma, &queue); 2103 2104 out_unlock_free: 2105 sem_unlock(sma, locknum); 2106 rcu_read_unlock(); 2107 out_free: 2108 if (sops != fast_sops) 2109 kvfree(sops); 2110 return error; 2111 } 2112 2113 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2114 unsigned, nsops, const struct timespec __user *, timeout) 2115 { 2116 if (timeout) { 2117 struct timespec64 ts; 2118 if (get_timespec64(&ts, timeout)) 2119 return -EFAULT; 2120 return do_semtimedop(semid, tsops, nsops, &ts); 2121 } 2122 return do_semtimedop(semid, tsops, nsops, NULL); 2123 } 2124 2125 #ifdef CONFIG_COMPAT 2126 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2127 unsigned, nsops, 2128 const struct compat_timespec __user *, timeout) 2129 { 2130 if (timeout) { 2131 struct timespec64 ts; 2132 if (compat_get_timespec64(&ts, timeout)) 2133 return -EFAULT; 2134 return do_semtimedop(semid, tsems, nsops, &ts); 2135 } 2136 return do_semtimedop(semid, tsems, nsops, NULL); 2137 } 2138 #endif 2139 2140 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2141 unsigned, nsops) 2142 { 2143 return do_semtimedop(semid, tsops, nsops, NULL); 2144 } 2145 2146 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2147 * parent and child tasks. 2148 */ 2149 2150 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2151 { 2152 struct sem_undo_list *undo_list; 2153 int error; 2154 2155 if (clone_flags & CLONE_SYSVSEM) { 2156 error = get_undo_list(&undo_list); 2157 if (error) 2158 return error; 2159 refcount_inc(&undo_list->refcnt); 2160 tsk->sysvsem.undo_list = undo_list; 2161 } else 2162 tsk->sysvsem.undo_list = NULL; 2163 2164 return 0; 2165 } 2166 2167 /* 2168 * add semadj values to semaphores, free undo structures. 2169 * undo structures are not freed when semaphore arrays are destroyed 2170 * so some of them may be out of date. 2171 * IMPLEMENTATION NOTE: There is some confusion over whether the 2172 * set of adjustments that needs to be done should be done in an atomic 2173 * manner or not. That is, if we are attempting to decrement the semval 2174 * should we queue up and wait until we can do so legally? 2175 * The original implementation attempted to do this (queue and wait). 2176 * The current implementation does not do so. The POSIX standard 2177 * and SVID should be consulted to determine what behavior is mandated. 2178 */ 2179 void exit_sem(struct task_struct *tsk) 2180 { 2181 struct sem_undo_list *ulp; 2182 2183 ulp = tsk->sysvsem.undo_list; 2184 if (!ulp) 2185 return; 2186 tsk->sysvsem.undo_list = NULL; 2187 2188 if (!refcount_dec_and_test(&ulp->refcnt)) 2189 return; 2190 2191 for (;;) { 2192 struct sem_array *sma; 2193 struct sem_undo *un; 2194 int semid, i; 2195 DEFINE_WAKE_Q(wake_q); 2196 2197 cond_resched(); 2198 2199 rcu_read_lock(); 2200 un = list_entry_rcu(ulp->list_proc.next, 2201 struct sem_undo, list_proc); 2202 if (&un->list_proc == &ulp->list_proc) { 2203 /* 2204 * We must wait for freeary() before freeing this ulp, 2205 * in case we raced with last sem_undo. There is a small 2206 * possibility where we exit while freeary() didn't 2207 * finish unlocking sem_undo_list. 2208 */ 2209 spin_lock(&ulp->lock); 2210 spin_unlock(&ulp->lock); 2211 rcu_read_unlock(); 2212 break; 2213 } 2214 spin_lock(&ulp->lock); 2215 semid = un->semid; 2216 spin_unlock(&ulp->lock); 2217 2218 /* exit_sem raced with IPC_RMID, nothing to do */ 2219 if (semid == -1) { 2220 rcu_read_unlock(); 2221 continue; 2222 } 2223 2224 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2225 /* exit_sem raced with IPC_RMID, nothing to do */ 2226 if (IS_ERR(sma)) { 2227 rcu_read_unlock(); 2228 continue; 2229 } 2230 2231 sem_lock(sma, NULL, -1); 2232 /* exit_sem raced with IPC_RMID, nothing to do */ 2233 if (!ipc_valid_object(&sma->sem_perm)) { 2234 sem_unlock(sma, -1); 2235 rcu_read_unlock(); 2236 continue; 2237 } 2238 un = __lookup_undo(ulp, semid); 2239 if (un == NULL) { 2240 /* exit_sem raced with IPC_RMID+semget() that created 2241 * exactly the same semid. Nothing to do. 2242 */ 2243 sem_unlock(sma, -1); 2244 rcu_read_unlock(); 2245 continue; 2246 } 2247 2248 /* remove un from the linked lists */ 2249 ipc_assert_locked_object(&sma->sem_perm); 2250 list_del(&un->list_id); 2251 2252 /* we are the last process using this ulp, acquiring ulp->lock 2253 * isn't required. Besides that, we are also protected against 2254 * IPC_RMID as we hold sma->sem_perm lock now 2255 */ 2256 list_del_rcu(&un->list_proc); 2257 2258 /* perform adjustments registered in un */ 2259 for (i = 0; i < sma->sem_nsems; i++) { 2260 struct sem *semaphore = &sma->sems[i]; 2261 if (un->semadj[i]) { 2262 semaphore->semval += un->semadj[i]; 2263 /* 2264 * Range checks of the new semaphore value, 2265 * not defined by sus: 2266 * - Some unices ignore the undo entirely 2267 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2268 * - some cap the value (e.g. FreeBSD caps 2269 * at 0, but doesn't enforce SEMVMX) 2270 * 2271 * Linux caps the semaphore value, both at 0 2272 * and at SEMVMX. 2273 * 2274 * Manfred <manfred@colorfullife.com> 2275 */ 2276 if (semaphore->semval < 0) 2277 semaphore->semval = 0; 2278 if (semaphore->semval > SEMVMX) 2279 semaphore->semval = SEMVMX; 2280 semaphore->sempid = task_tgid_vnr(current); 2281 } 2282 } 2283 /* maybe some queued-up processes were waiting for this */ 2284 do_smart_update(sma, NULL, 0, 1, &wake_q); 2285 sem_unlock(sma, -1); 2286 rcu_read_unlock(); 2287 wake_up_q(&wake_q); 2288 2289 kfree_rcu(un, rcu); 2290 } 2291 kfree(ulp); 2292 } 2293 2294 #ifdef CONFIG_PROC_FS 2295 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2296 { 2297 struct user_namespace *user_ns = seq_user_ns(s); 2298 struct kern_ipc_perm *ipcp = it; 2299 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2300 time64_t sem_otime; 2301 2302 /* 2303 * The proc interface isn't aware of sem_lock(), it calls 2304 * ipc_lock_object() directly (in sysvipc_find_ipc). 2305 * In order to stay compatible with sem_lock(), we must 2306 * enter / leave complex_mode. 2307 */ 2308 complexmode_enter(sma); 2309 2310 sem_otime = get_semotime(sma); 2311 2312 seq_printf(s, 2313 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2314 sma->sem_perm.key, 2315 sma->sem_perm.id, 2316 sma->sem_perm.mode, 2317 sma->sem_nsems, 2318 from_kuid_munged(user_ns, sma->sem_perm.uid), 2319 from_kgid_munged(user_ns, sma->sem_perm.gid), 2320 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2321 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2322 sem_otime, 2323 sma->sem_ctime); 2324 2325 complexmode_tryleave(sma); 2326 2327 return 0; 2328 } 2329 #endif 2330