1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/ipc/sem.c 4 * Copyright (C) 1992 Krishna Balasubramanian 5 * Copyright (C) 1995 Eric Schenk, Bruno Haible 6 * 7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 8 * 9 * SMP-threaded, sysctl's added 10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 11 * Enforced range limit on SEM_UNDO 12 * (c) 2001 Red Hat Inc 13 * Lockless wakeup 14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 16 * Further wakeup optimizations, documentation 17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 18 * 19 * support for audit of ipc object properties and permission changes 20 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 21 * 22 * namespaces support 23 * OpenVZ, SWsoft Inc. 24 * Pavel Emelianov <xemul@openvz.org> 25 * 26 * Implementation notes: (May 2010) 27 * This file implements System V semaphores. 28 * 29 * User space visible behavior: 30 * - FIFO ordering for semop() operations (just FIFO, not starvation 31 * protection) 32 * - multiple semaphore operations that alter the same semaphore in 33 * one semop() are handled. 34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 35 * SETALL calls. 36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 37 * - undo adjustments at process exit are limited to 0..SEMVMX. 38 * - namespace are supported. 39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 40 * to /proc/sys/kernel/sem. 41 * - statistics about the usage are reported in /proc/sysvipc/sem. 42 * 43 * Internals: 44 * - scalability: 45 * - all global variables are read-mostly. 46 * - semop() calls and semctl(RMID) are synchronized by RCU. 47 * - most operations do write operations (actually: spin_lock calls) to 48 * the per-semaphore array structure. 49 * Thus: Perfect SMP scaling between independent semaphore arrays. 50 * If multiple semaphores in one array are used, then cache line 51 * trashing on the semaphore array spinlock will limit the scaling. 52 * - semncnt and semzcnt are calculated on demand in count_semcnt() 53 * - the task that performs a successful semop() scans the list of all 54 * sleeping tasks and completes any pending operations that can be fulfilled. 55 * Semaphores are actively given to waiting tasks (necessary for FIFO). 56 * (see update_queue()) 57 * - To improve the scalability, the actual wake-up calls are performed after 58 * dropping all locks. (see wake_up_sem_queue_prepare()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - UNDO values are stored in an array (one per process and per 64 * semaphore array, lazily allocated). For backwards compatibility, multiple 65 * modes for the UNDO variables are supported (per process, per thread) 66 * (see copy_semundo, CLONE_SYSVSEM) 67 * - There are two lists of the pending operations: a per-array list 68 * and per-semaphore list (stored in the array). This allows to achieve FIFO 69 * ordering without always scanning all pending operations. 70 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 71 */ 72 73 #include <linux/slab.h> 74 #include <linux/spinlock.h> 75 #include <linux/init.h> 76 #include <linux/proc_fs.h> 77 #include <linux/time.h> 78 #include <linux/security.h> 79 #include <linux/syscalls.h> 80 #include <linux/audit.h> 81 #include <linux/capability.h> 82 #include <linux/seq_file.h> 83 #include <linux/rwsem.h> 84 #include <linux/nsproxy.h> 85 #include <linux/ipc_namespace.h> 86 #include <linux/sched/wake_q.h> 87 88 #include <linux/uaccess.h> 89 #include "util.h" 90 91 92 /* One queue for each sleeping process in the system. */ 93 struct sem_queue { 94 struct list_head list; /* queue of pending operations */ 95 struct task_struct *sleeper; /* this process */ 96 struct sem_undo *undo; /* undo structure */ 97 int pid; /* process id of requesting process */ 98 int status; /* completion status of operation */ 99 struct sembuf *sops; /* array of pending operations */ 100 struct sembuf *blocking; /* the operation that blocked */ 101 int nsops; /* number of operations */ 102 bool alter; /* does *sops alter the array? */ 103 bool dupsop; /* sops on more than one sem_num */ 104 }; 105 106 /* Each task has a list of undo requests. They are executed automatically 107 * when the process exits. 108 */ 109 struct sem_undo { 110 struct list_head list_proc; /* per-process list: * 111 * all undos from one process 112 * rcu protected */ 113 struct rcu_head rcu; /* rcu struct for sem_undo */ 114 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 115 struct list_head list_id; /* per semaphore array list: 116 * all undos for one array */ 117 int semid; /* semaphore set identifier */ 118 short *semadj; /* array of adjustments */ 119 /* one per semaphore */ 120 }; 121 122 /* sem_undo_list controls shared access to the list of sem_undo structures 123 * that may be shared among all a CLONE_SYSVSEM task group. 124 */ 125 struct sem_undo_list { 126 refcount_t refcnt; 127 spinlock_t lock; 128 struct list_head list_proc; 129 }; 130 131 132 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 133 134 static int newary(struct ipc_namespace *, struct ipc_params *); 135 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 136 #ifdef CONFIG_PROC_FS 137 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 138 #endif 139 140 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 141 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 142 143 /* 144 * Switching from the mode suitable for simple ops 145 * to the mode for complex ops is costly. Therefore: 146 * use some hysteresis 147 */ 148 #define USE_GLOBAL_LOCK_HYSTERESIS 10 149 150 /* 151 * Locking: 152 * a) global sem_lock() for read/write 153 * sem_undo.id_next, 154 * sem_array.complex_count, 155 * sem_array.pending{_alter,_const}, 156 * sem_array.sem_undo 157 * 158 * b) global or semaphore sem_lock() for read/write: 159 * sem_array.sems[i].pending_{const,alter}: 160 * 161 * c) special: 162 * sem_undo_list.list_proc: 163 * * undo_list->lock for write 164 * * rcu for read 165 * use_global_lock: 166 * * global sem_lock() for write 167 * * either local or global sem_lock() for read. 168 * 169 * Memory ordering: 170 * Most ordering is enforced by using spin_lock() and spin_unlock(). 171 * The special case is use_global_lock: 172 * Setting it from non-zero to 0 is a RELEASE, this is ensured by 173 * using smp_store_release(). 174 * Testing if it is non-zero is an ACQUIRE, this is ensured by using 175 * smp_load_acquire(). 176 * Setting it from 0 to non-zero must be ordered with regards to 177 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire() 178 * is inside a spin_lock() and after a write from 0 to non-zero a 179 * spin_lock()+spin_unlock() is done. 180 */ 181 182 #define sc_semmsl sem_ctls[0] 183 #define sc_semmns sem_ctls[1] 184 #define sc_semopm sem_ctls[2] 185 #define sc_semmni sem_ctls[3] 186 187 int sem_init_ns(struct ipc_namespace *ns) 188 { 189 ns->sc_semmsl = SEMMSL; 190 ns->sc_semmns = SEMMNS; 191 ns->sc_semopm = SEMOPM; 192 ns->sc_semmni = SEMMNI; 193 ns->used_sems = 0; 194 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 195 } 196 197 #ifdef CONFIG_IPC_NS 198 void sem_exit_ns(struct ipc_namespace *ns) 199 { 200 free_ipcs(ns, &sem_ids(ns), freeary); 201 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 202 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); 203 } 204 #endif 205 206 int __init sem_init(void) 207 { 208 const int err = sem_init_ns(&init_ipc_ns); 209 210 ipc_init_proc_interface("sysvipc/sem", 211 " key semid perms nsems uid gid cuid cgid otime ctime\n", 212 IPC_SEM_IDS, sysvipc_sem_proc_show); 213 return err; 214 } 215 216 /** 217 * unmerge_queues - unmerge queues, if possible. 218 * @sma: semaphore array 219 * 220 * The function unmerges the wait queues if complex_count is 0. 221 * It must be called prior to dropping the global semaphore array lock. 222 */ 223 static void unmerge_queues(struct sem_array *sma) 224 { 225 struct sem_queue *q, *tq; 226 227 /* complex operations still around? */ 228 if (sma->complex_count) 229 return; 230 /* 231 * We will switch back to simple mode. 232 * Move all pending operation back into the per-semaphore 233 * queues. 234 */ 235 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 236 struct sem *curr; 237 curr = &sma->sems[q->sops[0].sem_num]; 238 239 list_add_tail(&q->list, &curr->pending_alter); 240 } 241 INIT_LIST_HEAD(&sma->pending_alter); 242 } 243 244 /** 245 * merge_queues - merge single semop queues into global queue 246 * @sma: semaphore array 247 * 248 * This function merges all per-semaphore queues into the global queue. 249 * It is necessary to achieve FIFO ordering for the pending single-sop 250 * operations when a multi-semop operation must sleep. 251 * Only the alter operations must be moved, the const operations can stay. 252 */ 253 static void merge_queues(struct sem_array *sma) 254 { 255 int i; 256 for (i = 0; i < sma->sem_nsems; i++) { 257 struct sem *sem = &sma->sems[i]; 258 259 list_splice_init(&sem->pending_alter, &sma->pending_alter); 260 } 261 } 262 263 static void sem_rcu_free(struct rcu_head *head) 264 { 265 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); 266 struct sem_array *sma = container_of(p, struct sem_array, sem_perm); 267 268 security_sem_free(sma); 269 kvfree(sma); 270 } 271 272 /* 273 * Enter the mode suitable for non-simple operations: 274 * Caller must own sem_perm.lock. 275 */ 276 static void complexmode_enter(struct sem_array *sma) 277 { 278 int i; 279 struct sem *sem; 280 281 if (sma->use_global_lock > 0) { 282 /* 283 * We are already in global lock mode. 284 * Nothing to do, just reset the 285 * counter until we return to simple mode. 286 */ 287 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 288 return; 289 } 290 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 291 292 for (i = 0; i < sma->sem_nsems; i++) { 293 sem = &sma->sems[i]; 294 spin_lock(&sem->lock); 295 spin_unlock(&sem->lock); 296 } 297 } 298 299 /* 300 * Try to leave the mode that disallows simple operations: 301 * Caller must own sem_perm.lock. 302 */ 303 static void complexmode_tryleave(struct sem_array *sma) 304 { 305 if (sma->complex_count) { 306 /* Complex ops are sleeping. 307 * We must stay in complex mode 308 */ 309 return; 310 } 311 if (sma->use_global_lock == 1) { 312 /* 313 * Immediately after setting use_global_lock to 0, 314 * a simple op can start. Thus: all memory writes 315 * performed by the current operation must be visible 316 * before we set use_global_lock to 0. 317 */ 318 smp_store_release(&sma->use_global_lock, 0); 319 } else { 320 sma->use_global_lock--; 321 } 322 } 323 324 #define SEM_GLOBAL_LOCK (-1) 325 /* 326 * If the request contains only one semaphore operation, and there are 327 * no complex transactions pending, lock only the semaphore involved. 328 * Otherwise, lock the entire semaphore array, since we either have 329 * multiple semaphores in our own semops, or we need to look at 330 * semaphores from other pending complex operations. 331 */ 332 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 333 int nsops) 334 { 335 struct sem *sem; 336 337 if (nsops != 1) { 338 /* Complex operation - acquire a full lock */ 339 ipc_lock_object(&sma->sem_perm); 340 341 /* Prevent parallel simple ops */ 342 complexmode_enter(sma); 343 return SEM_GLOBAL_LOCK; 344 } 345 346 /* 347 * Only one semaphore affected - try to optimize locking. 348 * Optimized locking is possible if no complex operation 349 * is either enqueued or processed right now. 350 * 351 * Both facts are tracked by use_global_mode. 352 */ 353 sem = &sma->sems[sops->sem_num]; 354 355 /* 356 * Initial check for use_global_lock. Just an optimization, 357 * no locking, no memory barrier. 358 */ 359 if (!sma->use_global_lock) { 360 /* 361 * It appears that no complex operation is around. 362 * Acquire the per-semaphore lock. 363 */ 364 spin_lock(&sem->lock); 365 366 /* pairs with smp_store_release() */ 367 if (!smp_load_acquire(&sma->use_global_lock)) { 368 /* fast path successful! */ 369 return sops->sem_num; 370 } 371 spin_unlock(&sem->lock); 372 } 373 374 /* slow path: acquire the full lock */ 375 ipc_lock_object(&sma->sem_perm); 376 377 if (sma->use_global_lock == 0) { 378 /* 379 * The use_global_lock mode ended while we waited for 380 * sma->sem_perm.lock. Thus we must switch to locking 381 * with sem->lock. 382 * Unlike in the fast path, there is no need to recheck 383 * sma->use_global_lock after we have acquired sem->lock: 384 * We own sma->sem_perm.lock, thus use_global_lock cannot 385 * change. 386 */ 387 spin_lock(&sem->lock); 388 389 ipc_unlock_object(&sma->sem_perm); 390 return sops->sem_num; 391 } else { 392 /* 393 * Not a false alarm, thus continue to use the global lock 394 * mode. No need for complexmode_enter(), this was done by 395 * the caller that has set use_global_mode to non-zero. 396 */ 397 return SEM_GLOBAL_LOCK; 398 } 399 } 400 401 static inline void sem_unlock(struct sem_array *sma, int locknum) 402 { 403 if (locknum == SEM_GLOBAL_LOCK) { 404 unmerge_queues(sma); 405 complexmode_tryleave(sma); 406 ipc_unlock_object(&sma->sem_perm); 407 } else { 408 struct sem *sem = &sma->sems[locknum]; 409 spin_unlock(&sem->lock); 410 } 411 } 412 413 /* 414 * sem_lock_(check_) routines are called in the paths where the rwsem 415 * is not held. 416 * 417 * The caller holds the RCU read lock. 418 */ 419 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 420 { 421 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 422 423 if (IS_ERR(ipcp)) 424 return ERR_CAST(ipcp); 425 426 return container_of(ipcp, struct sem_array, sem_perm); 427 } 428 429 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 430 int id) 431 { 432 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 433 434 if (IS_ERR(ipcp)) 435 return ERR_CAST(ipcp); 436 437 return container_of(ipcp, struct sem_array, sem_perm); 438 } 439 440 static inline void sem_lock_and_putref(struct sem_array *sma) 441 { 442 sem_lock(sma, NULL, -1); 443 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 444 } 445 446 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 447 { 448 ipc_rmid(&sem_ids(ns), &s->sem_perm); 449 } 450 451 static struct sem_array *sem_alloc(size_t nsems) 452 { 453 struct sem_array *sma; 454 size_t size; 455 456 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) 457 return NULL; 458 459 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]); 460 sma = kvmalloc(size, GFP_KERNEL); 461 if (unlikely(!sma)) 462 return NULL; 463 464 memset(sma, 0, size); 465 466 return sma; 467 } 468 469 /** 470 * newary - Create a new semaphore set 471 * @ns: namespace 472 * @params: ptr to the structure that contains key, semflg and nsems 473 * 474 * Called with sem_ids.rwsem held (as a writer) 475 */ 476 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 477 { 478 int retval; 479 struct sem_array *sma; 480 key_t key = params->key; 481 int nsems = params->u.nsems; 482 int semflg = params->flg; 483 int i; 484 485 if (!nsems) 486 return -EINVAL; 487 if (ns->used_sems + nsems > ns->sc_semmns) 488 return -ENOSPC; 489 490 sma = sem_alloc(nsems); 491 if (!sma) 492 return -ENOMEM; 493 494 sma->sem_perm.mode = (semflg & S_IRWXUGO); 495 sma->sem_perm.key = key; 496 497 sma->sem_perm.security = NULL; 498 retval = security_sem_alloc(sma); 499 if (retval) { 500 kvfree(sma); 501 return retval; 502 } 503 504 for (i = 0; i < nsems; i++) { 505 INIT_LIST_HEAD(&sma->sems[i].pending_alter); 506 INIT_LIST_HEAD(&sma->sems[i].pending_const); 507 spin_lock_init(&sma->sems[i].lock); 508 } 509 510 sma->complex_count = 0; 511 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS; 512 INIT_LIST_HEAD(&sma->pending_alter); 513 INIT_LIST_HEAD(&sma->pending_const); 514 INIT_LIST_HEAD(&sma->list_id); 515 sma->sem_nsems = nsems; 516 sma->sem_ctime = ktime_get_real_seconds(); 517 518 /* ipc_addid() locks sma upon success. */ 519 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 520 if (retval < 0) { 521 call_rcu(&sma->sem_perm.rcu, sem_rcu_free); 522 return retval; 523 } 524 ns->used_sems += nsems; 525 526 sem_unlock(sma, -1); 527 rcu_read_unlock(); 528 529 return sma->sem_perm.id; 530 } 531 532 533 /* 534 * Called with sem_ids.rwsem and ipcp locked. 535 */ 536 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 537 { 538 struct sem_array *sma; 539 540 sma = container_of(ipcp, struct sem_array, sem_perm); 541 return security_sem_associate(sma, semflg); 542 } 543 544 /* 545 * Called with sem_ids.rwsem and ipcp locked. 546 */ 547 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 548 struct ipc_params *params) 549 { 550 struct sem_array *sma; 551 552 sma = container_of(ipcp, struct sem_array, sem_perm); 553 if (params->u.nsems > sma->sem_nsems) 554 return -EINVAL; 555 556 return 0; 557 } 558 559 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 560 { 561 struct ipc_namespace *ns; 562 static const struct ipc_ops sem_ops = { 563 .getnew = newary, 564 .associate = sem_security, 565 .more_checks = sem_more_checks, 566 }; 567 struct ipc_params sem_params; 568 569 ns = current->nsproxy->ipc_ns; 570 571 if (nsems < 0 || nsems > ns->sc_semmsl) 572 return -EINVAL; 573 574 sem_params.key = key; 575 sem_params.flg = semflg; 576 sem_params.u.nsems = nsems; 577 578 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 579 } 580 581 /** 582 * perform_atomic_semop[_slow] - Attempt to perform semaphore 583 * operations on a given array. 584 * @sma: semaphore array 585 * @q: struct sem_queue that describes the operation 586 * 587 * Caller blocking are as follows, based the value 588 * indicated by the semaphore operation (sem_op): 589 * 590 * (1) >0 never blocks. 591 * (2) 0 (wait-for-zero operation): semval is non-zero. 592 * (3) <0 attempting to decrement semval to a value smaller than zero. 593 * 594 * Returns 0 if the operation was possible. 595 * Returns 1 if the operation is impossible, the caller must sleep. 596 * Returns <0 for error codes. 597 */ 598 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 599 { 600 int result, sem_op, nsops, pid; 601 struct sembuf *sop; 602 struct sem *curr; 603 struct sembuf *sops; 604 struct sem_undo *un; 605 606 sops = q->sops; 607 nsops = q->nsops; 608 un = q->undo; 609 610 for (sop = sops; sop < sops + nsops; sop++) { 611 curr = &sma->sems[sop->sem_num]; 612 sem_op = sop->sem_op; 613 result = curr->semval; 614 615 if (!sem_op && result) 616 goto would_block; 617 618 result += sem_op; 619 if (result < 0) 620 goto would_block; 621 if (result > SEMVMX) 622 goto out_of_range; 623 624 if (sop->sem_flg & SEM_UNDO) { 625 int undo = un->semadj[sop->sem_num] - sem_op; 626 /* Exceeding the undo range is an error. */ 627 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 628 goto out_of_range; 629 un->semadj[sop->sem_num] = undo; 630 } 631 632 curr->semval = result; 633 } 634 635 sop--; 636 pid = q->pid; 637 while (sop >= sops) { 638 sma->sems[sop->sem_num].sempid = pid; 639 sop--; 640 } 641 642 return 0; 643 644 out_of_range: 645 result = -ERANGE; 646 goto undo; 647 648 would_block: 649 q->blocking = sop; 650 651 if (sop->sem_flg & IPC_NOWAIT) 652 result = -EAGAIN; 653 else 654 result = 1; 655 656 undo: 657 sop--; 658 while (sop >= sops) { 659 sem_op = sop->sem_op; 660 sma->sems[sop->sem_num].semval -= sem_op; 661 if (sop->sem_flg & SEM_UNDO) 662 un->semadj[sop->sem_num] += sem_op; 663 sop--; 664 } 665 666 return result; 667 } 668 669 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 670 { 671 int result, sem_op, nsops; 672 struct sembuf *sop; 673 struct sem *curr; 674 struct sembuf *sops; 675 struct sem_undo *un; 676 677 sops = q->sops; 678 nsops = q->nsops; 679 un = q->undo; 680 681 if (unlikely(q->dupsop)) 682 return perform_atomic_semop_slow(sma, q); 683 684 /* 685 * We scan the semaphore set twice, first to ensure that the entire 686 * operation can succeed, therefore avoiding any pointless writes 687 * to shared memory and having to undo such changes in order to block 688 * until the operations can go through. 689 */ 690 for (sop = sops; sop < sops + nsops; sop++) { 691 curr = &sma->sems[sop->sem_num]; 692 sem_op = sop->sem_op; 693 result = curr->semval; 694 695 if (!sem_op && result) 696 goto would_block; /* wait-for-zero */ 697 698 result += sem_op; 699 if (result < 0) 700 goto would_block; 701 702 if (result > SEMVMX) 703 return -ERANGE; 704 705 if (sop->sem_flg & SEM_UNDO) { 706 int undo = un->semadj[sop->sem_num] - sem_op; 707 708 /* Exceeding the undo range is an error. */ 709 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 710 return -ERANGE; 711 } 712 } 713 714 for (sop = sops; sop < sops + nsops; sop++) { 715 curr = &sma->sems[sop->sem_num]; 716 sem_op = sop->sem_op; 717 result = curr->semval; 718 719 if (sop->sem_flg & SEM_UNDO) { 720 int undo = un->semadj[sop->sem_num] - sem_op; 721 722 un->semadj[sop->sem_num] = undo; 723 } 724 curr->semval += sem_op; 725 curr->sempid = q->pid; 726 } 727 728 return 0; 729 730 would_block: 731 q->blocking = sop; 732 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 733 } 734 735 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 736 struct wake_q_head *wake_q) 737 { 738 wake_q_add(wake_q, q->sleeper); 739 /* 740 * Rely on the above implicit barrier, such that we can 741 * ensure that we hold reference to the task before setting 742 * q->status. Otherwise we could race with do_exit if the 743 * task is awoken by an external event before calling 744 * wake_up_process(). 745 */ 746 WRITE_ONCE(q->status, error); 747 } 748 749 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 750 { 751 list_del(&q->list); 752 if (q->nsops > 1) 753 sma->complex_count--; 754 } 755 756 /** check_restart(sma, q) 757 * @sma: semaphore array 758 * @q: the operation that just completed 759 * 760 * update_queue is O(N^2) when it restarts scanning the whole queue of 761 * waiting operations. Therefore this function checks if the restart is 762 * really necessary. It is called after a previously waiting operation 763 * modified the array. 764 * Note that wait-for-zero operations are handled without restart. 765 */ 766 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 767 { 768 /* pending complex alter operations are too difficult to analyse */ 769 if (!list_empty(&sma->pending_alter)) 770 return 1; 771 772 /* we were a sleeping complex operation. Too difficult */ 773 if (q->nsops > 1) 774 return 1; 775 776 /* It is impossible that someone waits for the new value: 777 * - complex operations always restart. 778 * - wait-for-zero are handled seperately. 779 * - q is a previously sleeping simple operation that 780 * altered the array. It must be a decrement, because 781 * simple increments never sleep. 782 * - If there are older (higher priority) decrements 783 * in the queue, then they have observed the original 784 * semval value and couldn't proceed. The operation 785 * decremented to value - thus they won't proceed either. 786 */ 787 return 0; 788 } 789 790 /** 791 * wake_const_ops - wake up non-alter tasks 792 * @sma: semaphore array. 793 * @semnum: semaphore that was modified. 794 * @wake_q: lockless wake-queue head. 795 * 796 * wake_const_ops must be called after a semaphore in a semaphore array 797 * was set to 0. If complex const operations are pending, wake_const_ops must 798 * be called with semnum = -1, as well as with the number of each modified 799 * semaphore. 800 * The tasks that must be woken up are added to @wake_q. The return code 801 * is stored in q->pid. 802 * The function returns 1 if at least one operation was completed successfully. 803 */ 804 static int wake_const_ops(struct sem_array *sma, int semnum, 805 struct wake_q_head *wake_q) 806 { 807 struct sem_queue *q, *tmp; 808 struct list_head *pending_list; 809 int semop_completed = 0; 810 811 if (semnum == -1) 812 pending_list = &sma->pending_const; 813 else 814 pending_list = &sma->sems[semnum].pending_const; 815 816 list_for_each_entry_safe(q, tmp, pending_list, list) { 817 int error = perform_atomic_semop(sma, q); 818 819 if (error > 0) 820 continue; 821 /* operation completed, remove from queue & wakeup */ 822 unlink_queue(sma, q); 823 824 wake_up_sem_queue_prepare(q, error, wake_q); 825 if (error == 0) 826 semop_completed = 1; 827 } 828 829 return semop_completed; 830 } 831 832 /** 833 * do_smart_wakeup_zero - wakeup all wait for zero tasks 834 * @sma: semaphore array 835 * @sops: operations that were performed 836 * @nsops: number of operations 837 * @wake_q: lockless wake-queue head 838 * 839 * Checks all required queue for wait-for-zero operations, based 840 * on the actual changes that were performed on the semaphore array. 841 * The function returns 1 if at least one operation was completed successfully. 842 */ 843 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 844 int nsops, struct wake_q_head *wake_q) 845 { 846 int i; 847 int semop_completed = 0; 848 int got_zero = 0; 849 850 /* first: the per-semaphore queues, if known */ 851 if (sops) { 852 for (i = 0; i < nsops; i++) { 853 int num = sops[i].sem_num; 854 855 if (sma->sems[num].semval == 0) { 856 got_zero = 1; 857 semop_completed |= wake_const_ops(sma, num, wake_q); 858 } 859 } 860 } else { 861 /* 862 * No sops means modified semaphores not known. 863 * Assume all were changed. 864 */ 865 for (i = 0; i < sma->sem_nsems; i++) { 866 if (sma->sems[i].semval == 0) { 867 got_zero = 1; 868 semop_completed |= wake_const_ops(sma, i, wake_q); 869 } 870 } 871 } 872 /* 873 * If one of the modified semaphores got 0, 874 * then check the global queue, too. 875 */ 876 if (got_zero) 877 semop_completed |= wake_const_ops(sma, -1, wake_q); 878 879 return semop_completed; 880 } 881 882 883 /** 884 * update_queue - look for tasks that can be completed. 885 * @sma: semaphore array. 886 * @semnum: semaphore that was modified. 887 * @wake_q: lockless wake-queue head. 888 * 889 * update_queue must be called after a semaphore in a semaphore array 890 * was modified. If multiple semaphores were modified, update_queue must 891 * be called with semnum = -1, as well as with the number of each modified 892 * semaphore. 893 * The tasks that must be woken up are added to @wake_q. The return code 894 * is stored in q->pid. 895 * The function internally checks if const operations can now succeed. 896 * 897 * The function return 1 if at least one semop was completed successfully. 898 */ 899 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 900 { 901 struct sem_queue *q, *tmp; 902 struct list_head *pending_list; 903 int semop_completed = 0; 904 905 if (semnum == -1) 906 pending_list = &sma->pending_alter; 907 else 908 pending_list = &sma->sems[semnum].pending_alter; 909 910 again: 911 list_for_each_entry_safe(q, tmp, pending_list, list) { 912 int error, restart; 913 914 /* If we are scanning the single sop, per-semaphore list of 915 * one semaphore and that semaphore is 0, then it is not 916 * necessary to scan further: simple increments 917 * that affect only one entry succeed immediately and cannot 918 * be in the per semaphore pending queue, and decrements 919 * cannot be successful if the value is already 0. 920 */ 921 if (semnum != -1 && sma->sems[semnum].semval == 0) 922 break; 923 924 error = perform_atomic_semop(sma, q); 925 926 /* Does q->sleeper still need to sleep? */ 927 if (error > 0) 928 continue; 929 930 unlink_queue(sma, q); 931 932 if (error) { 933 restart = 0; 934 } else { 935 semop_completed = 1; 936 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 937 restart = check_restart(sma, q); 938 } 939 940 wake_up_sem_queue_prepare(q, error, wake_q); 941 if (restart) 942 goto again; 943 } 944 return semop_completed; 945 } 946 947 /** 948 * set_semotime - set sem_otime 949 * @sma: semaphore array 950 * @sops: operations that modified the array, may be NULL 951 * 952 * sem_otime is replicated to avoid cache line trashing. 953 * This function sets one instance to the current time. 954 */ 955 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 956 { 957 if (sops == NULL) { 958 sma->sems[0].sem_otime = get_seconds(); 959 } else { 960 sma->sems[sops[0].sem_num].sem_otime = 961 get_seconds(); 962 } 963 } 964 965 /** 966 * do_smart_update - optimized update_queue 967 * @sma: semaphore array 968 * @sops: operations that were performed 969 * @nsops: number of operations 970 * @otime: force setting otime 971 * @wake_q: lockless wake-queue head 972 * 973 * do_smart_update() does the required calls to update_queue and wakeup_zero, 974 * based on the actual changes that were performed on the semaphore array. 975 * Note that the function does not do the actual wake-up: the caller is 976 * responsible for calling wake_up_q(). 977 * It is safe to perform this call after dropping all locks. 978 */ 979 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 980 int otime, struct wake_q_head *wake_q) 981 { 982 int i; 983 984 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 985 986 if (!list_empty(&sma->pending_alter)) { 987 /* semaphore array uses the global queue - just process it. */ 988 otime |= update_queue(sma, -1, wake_q); 989 } else { 990 if (!sops) { 991 /* 992 * No sops, thus the modified semaphores are not 993 * known. Check all. 994 */ 995 for (i = 0; i < sma->sem_nsems; i++) 996 otime |= update_queue(sma, i, wake_q); 997 } else { 998 /* 999 * Check the semaphores that were increased: 1000 * - No complex ops, thus all sleeping ops are 1001 * decrease. 1002 * - if we decreased the value, then any sleeping 1003 * semaphore ops wont be able to run: If the 1004 * previous value was too small, then the new 1005 * value will be too small, too. 1006 */ 1007 for (i = 0; i < nsops; i++) { 1008 if (sops[i].sem_op > 0) { 1009 otime |= update_queue(sma, 1010 sops[i].sem_num, wake_q); 1011 } 1012 } 1013 } 1014 } 1015 if (otime) 1016 set_semotime(sma, sops); 1017 } 1018 1019 /* 1020 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1021 */ 1022 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1023 bool count_zero) 1024 { 1025 struct sembuf *sop = q->blocking; 1026 1027 /* 1028 * Linux always (since 0.99.10) reported a task as sleeping on all 1029 * semaphores. This violates SUS, therefore it was changed to the 1030 * standard compliant behavior. 1031 * Give the administrators a chance to notice that an application 1032 * might misbehave because it relies on the Linux behavior. 1033 */ 1034 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1035 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1036 current->comm, task_pid_nr(current)); 1037 1038 if (sop->sem_num != semnum) 1039 return 0; 1040 1041 if (count_zero && sop->sem_op == 0) 1042 return 1; 1043 if (!count_zero && sop->sem_op < 0) 1044 return 1; 1045 1046 return 0; 1047 } 1048 1049 /* The following counts are associated to each semaphore: 1050 * semncnt number of tasks waiting on semval being nonzero 1051 * semzcnt number of tasks waiting on semval being zero 1052 * 1053 * Per definition, a task waits only on the semaphore of the first semop 1054 * that cannot proceed, even if additional operation would block, too. 1055 */ 1056 static int count_semcnt(struct sem_array *sma, ushort semnum, 1057 bool count_zero) 1058 { 1059 struct list_head *l; 1060 struct sem_queue *q; 1061 int semcnt; 1062 1063 semcnt = 0; 1064 /* First: check the simple operations. They are easy to evaluate */ 1065 if (count_zero) 1066 l = &sma->sems[semnum].pending_const; 1067 else 1068 l = &sma->sems[semnum].pending_alter; 1069 1070 list_for_each_entry(q, l, list) { 1071 /* all task on a per-semaphore list sleep on exactly 1072 * that semaphore 1073 */ 1074 semcnt++; 1075 } 1076 1077 /* Then: check the complex operations. */ 1078 list_for_each_entry(q, &sma->pending_alter, list) { 1079 semcnt += check_qop(sma, semnum, q, count_zero); 1080 } 1081 if (count_zero) { 1082 list_for_each_entry(q, &sma->pending_const, list) { 1083 semcnt += check_qop(sma, semnum, q, count_zero); 1084 } 1085 } 1086 return semcnt; 1087 } 1088 1089 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1090 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1091 * remains locked on exit. 1092 */ 1093 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1094 { 1095 struct sem_undo *un, *tu; 1096 struct sem_queue *q, *tq; 1097 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1098 int i; 1099 DEFINE_WAKE_Q(wake_q); 1100 1101 /* Free the existing undo structures for this semaphore set. */ 1102 ipc_assert_locked_object(&sma->sem_perm); 1103 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1104 list_del(&un->list_id); 1105 spin_lock(&un->ulp->lock); 1106 un->semid = -1; 1107 list_del_rcu(&un->list_proc); 1108 spin_unlock(&un->ulp->lock); 1109 kfree_rcu(un, rcu); 1110 } 1111 1112 /* Wake up all pending processes and let them fail with EIDRM. */ 1113 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1114 unlink_queue(sma, q); 1115 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1116 } 1117 1118 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1119 unlink_queue(sma, q); 1120 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1121 } 1122 for (i = 0; i < sma->sem_nsems; i++) { 1123 struct sem *sem = &sma->sems[i]; 1124 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1125 unlink_queue(sma, q); 1126 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1127 } 1128 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1129 unlink_queue(sma, q); 1130 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1131 } 1132 } 1133 1134 /* Remove the semaphore set from the IDR */ 1135 sem_rmid(ns, sma); 1136 sem_unlock(sma, -1); 1137 rcu_read_unlock(); 1138 1139 wake_up_q(&wake_q); 1140 ns->used_sems -= sma->sem_nsems; 1141 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1142 } 1143 1144 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1145 { 1146 switch (version) { 1147 case IPC_64: 1148 return copy_to_user(buf, in, sizeof(*in)); 1149 case IPC_OLD: 1150 { 1151 struct semid_ds out; 1152 1153 memset(&out, 0, sizeof(out)); 1154 1155 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1156 1157 out.sem_otime = in->sem_otime; 1158 out.sem_ctime = in->sem_ctime; 1159 out.sem_nsems = in->sem_nsems; 1160 1161 return copy_to_user(buf, &out, sizeof(out)); 1162 } 1163 default: 1164 return -EINVAL; 1165 } 1166 } 1167 1168 static time64_t get_semotime(struct sem_array *sma) 1169 { 1170 int i; 1171 time64_t res; 1172 1173 res = sma->sems[0].sem_otime; 1174 for (i = 1; i < sma->sem_nsems; i++) { 1175 time64_t to = sma->sems[i].sem_otime; 1176 1177 if (to > res) 1178 res = to; 1179 } 1180 return res; 1181 } 1182 1183 static int semctl_stat(struct ipc_namespace *ns, int semid, 1184 int cmd, struct semid64_ds *semid64) 1185 { 1186 struct sem_array *sma; 1187 int id = 0; 1188 int err; 1189 1190 memset(semid64, 0, sizeof(*semid64)); 1191 1192 rcu_read_lock(); 1193 if (cmd == SEM_STAT) { 1194 sma = sem_obtain_object(ns, semid); 1195 if (IS_ERR(sma)) { 1196 err = PTR_ERR(sma); 1197 goto out_unlock; 1198 } 1199 id = sma->sem_perm.id; 1200 } else { 1201 sma = sem_obtain_object_check(ns, semid); 1202 if (IS_ERR(sma)) { 1203 err = PTR_ERR(sma); 1204 goto out_unlock; 1205 } 1206 } 1207 1208 err = -EACCES; 1209 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1210 goto out_unlock; 1211 1212 err = security_sem_semctl(sma, cmd); 1213 if (err) 1214 goto out_unlock; 1215 1216 ipc_lock_object(&sma->sem_perm); 1217 1218 if (!ipc_valid_object(&sma->sem_perm)) { 1219 ipc_unlock_object(&sma->sem_perm); 1220 err = -EIDRM; 1221 goto out_unlock; 1222 } 1223 1224 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm); 1225 semid64->sem_otime = get_semotime(sma); 1226 semid64->sem_ctime = sma->sem_ctime; 1227 semid64->sem_nsems = sma->sem_nsems; 1228 1229 ipc_unlock_object(&sma->sem_perm); 1230 rcu_read_unlock(); 1231 return id; 1232 1233 out_unlock: 1234 rcu_read_unlock(); 1235 return err; 1236 } 1237 1238 static int semctl_info(struct ipc_namespace *ns, int semid, 1239 int cmd, void __user *p) 1240 { 1241 struct seminfo seminfo; 1242 int max_id; 1243 int err; 1244 1245 err = security_sem_semctl(NULL, cmd); 1246 if (err) 1247 return err; 1248 1249 memset(&seminfo, 0, sizeof(seminfo)); 1250 seminfo.semmni = ns->sc_semmni; 1251 seminfo.semmns = ns->sc_semmns; 1252 seminfo.semmsl = ns->sc_semmsl; 1253 seminfo.semopm = ns->sc_semopm; 1254 seminfo.semvmx = SEMVMX; 1255 seminfo.semmnu = SEMMNU; 1256 seminfo.semmap = SEMMAP; 1257 seminfo.semume = SEMUME; 1258 down_read(&sem_ids(ns).rwsem); 1259 if (cmd == SEM_INFO) { 1260 seminfo.semusz = sem_ids(ns).in_use; 1261 seminfo.semaem = ns->used_sems; 1262 } else { 1263 seminfo.semusz = SEMUSZ; 1264 seminfo.semaem = SEMAEM; 1265 } 1266 max_id = ipc_get_maxid(&sem_ids(ns)); 1267 up_read(&sem_ids(ns).rwsem); 1268 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1269 return -EFAULT; 1270 return (max_id < 0) ? 0 : max_id; 1271 } 1272 1273 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1274 int val) 1275 { 1276 struct sem_undo *un; 1277 struct sem_array *sma; 1278 struct sem *curr; 1279 int err; 1280 DEFINE_WAKE_Q(wake_q); 1281 1282 if (val > SEMVMX || val < 0) 1283 return -ERANGE; 1284 1285 rcu_read_lock(); 1286 sma = sem_obtain_object_check(ns, semid); 1287 if (IS_ERR(sma)) { 1288 rcu_read_unlock(); 1289 return PTR_ERR(sma); 1290 } 1291 1292 if (semnum < 0 || semnum >= sma->sem_nsems) { 1293 rcu_read_unlock(); 1294 return -EINVAL; 1295 } 1296 1297 1298 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1299 rcu_read_unlock(); 1300 return -EACCES; 1301 } 1302 1303 err = security_sem_semctl(sma, SETVAL); 1304 if (err) { 1305 rcu_read_unlock(); 1306 return -EACCES; 1307 } 1308 1309 sem_lock(sma, NULL, -1); 1310 1311 if (!ipc_valid_object(&sma->sem_perm)) { 1312 sem_unlock(sma, -1); 1313 rcu_read_unlock(); 1314 return -EIDRM; 1315 } 1316 1317 curr = &sma->sems[semnum]; 1318 1319 ipc_assert_locked_object(&sma->sem_perm); 1320 list_for_each_entry(un, &sma->list_id, list_id) 1321 un->semadj[semnum] = 0; 1322 1323 curr->semval = val; 1324 curr->sempid = task_tgid_vnr(current); 1325 sma->sem_ctime = ktime_get_real_seconds(); 1326 /* maybe some queued-up processes were waiting for this */ 1327 do_smart_update(sma, NULL, 0, 0, &wake_q); 1328 sem_unlock(sma, -1); 1329 rcu_read_unlock(); 1330 wake_up_q(&wake_q); 1331 return 0; 1332 } 1333 1334 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1335 int cmd, void __user *p) 1336 { 1337 struct sem_array *sma; 1338 struct sem *curr; 1339 int err, nsems; 1340 ushort fast_sem_io[SEMMSL_FAST]; 1341 ushort *sem_io = fast_sem_io; 1342 DEFINE_WAKE_Q(wake_q); 1343 1344 rcu_read_lock(); 1345 sma = sem_obtain_object_check(ns, semid); 1346 if (IS_ERR(sma)) { 1347 rcu_read_unlock(); 1348 return PTR_ERR(sma); 1349 } 1350 1351 nsems = sma->sem_nsems; 1352 1353 err = -EACCES; 1354 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1355 goto out_rcu_wakeup; 1356 1357 err = security_sem_semctl(sma, cmd); 1358 if (err) 1359 goto out_rcu_wakeup; 1360 1361 err = -EACCES; 1362 switch (cmd) { 1363 case GETALL: 1364 { 1365 ushort __user *array = p; 1366 int i; 1367 1368 sem_lock(sma, NULL, -1); 1369 if (!ipc_valid_object(&sma->sem_perm)) { 1370 err = -EIDRM; 1371 goto out_unlock; 1372 } 1373 if (nsems > SEMMSL_FAST) { 1374 if (!ipc_rcu_getref(&sma->sem_perm)) { 1375 err = -EIDRM; 1376 goto out_unlock; 1377 } 1378 sem_unlock(sma, -1); 1379 rcu_read_unlock(); 1380 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1381 GFP_KERNEL); 1382 if (sem_io == NULL) { 1383 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1384 return -ENOMEM; 1385 } 1386 1387 rcu_read_lock(); 1388 sem_lock_and_putref(sma); 1389 if (!ipc_valid_object(&sma->sem_perm)) { 1390 err = -EIDRM; 1391 goto out_unlock; 1392 } 1393 } 1394 for (i = 0; i < sma->sem_nsems; i++) 1395 sem_io[i] = sma->sems[i].semval; 1396 sem_unlock(sma, -1); 1397 rcu_read_unlock(); 1398 err = 0; 1399 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1400 err = -EFAULT; 1401 goto out_free; 1402 } 1403 case SETALL: 1404 { 1405 int i; 1406 struct sem_undo *un; 1407 1408 if (!ipc_rcu_getref(&sma->sem_perm)) { 1409 err = -EIDRM; 1410 goto out_rcu_wakeup; 1411 } 1412 rcu_read_unlock(); 1413 1414 if (nsems > SEMMSL_FAST) { 1415 sem_io = kvmalloc_array(nsems, sizeof(ushort), 1416 GFP_KERNEL); 1417 if (sem_io == NULL) { 1418 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1419 return -ENOMEM; 1420 } 1421 } 1422 1423 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1424 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1425 err = -EFAULT; 1426 goto out_free; 1427 } 1428 1429 for (i = 0; i < nsems; i++) { 1430 if (sem_io[i] > SEMVMX) { 1431 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1432 err = -ERANGE; 1433 goto out_free; 1434 } 1435 } 1436 rcu_read_lock(); 1437 sem_lock_and_putref(sma); 1438 if (!ipc_valid_object(&sma->sem_perm)) { 1439 err = -EIDRM; 1440 goto out_unlock; 1441 } 1442 1443 for (i = 0; i < nsems; i++) { 1444 sma->sems[i].semval = sem_io[i]; 1445 sma->sems[i].sempid = task_tgid_vnr(current); 1446 } 1447 1448 ipc_assert_locked_object(&sma->sem_perm); 1449 list_for_each_entry(un, &sma->list_id, list_id) { 1450 for (i = 0; i < nsems; i++) 1451 un->semadj[i] = 0; 1452 } 1453 sma->sem_ctime = ktime_get_real_seconds(); 1454 /* maybe some queued-up processes were waiting for this */ 1455 do_smart_update(sma, NULL, 0, 0, &wake_q); 1456 err = 0; 1457 goto out_unlock; 1458 } 1459 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1460 } 1461 err = -EINVAL; 1462 if (semnum < 0 || semnum >= nsems) 1463 goto out_rcu_wakeup; 1464 1465 sem_lock(sma, NULL, -1); 1466 if (!ipc_valid_object(&sma->sem_perm)) { 1467 err = -EIDRM; 1468 goto out_unlock; 1469 } 1470 curr = &sma->sems[semnum]; 1471 1472 switch (cmd) { 1473 case GETVAL: 1474 err = curr->semval; 1475 goto out_unlock; 1476 case GETPID: 1477 err = curr->sempid; 1478 goto out_unlock; 1479 case GETNCNT: 1480 err = count_semcnt(sma, semnum, 0); 1481 goto out_unlock; 1482 case GETZCNT: 1483 err = count_semcnt(sma, semnum, 1); 1484 goto out_unlock; 1485 } 1486 1487 out_unlock: 1488 sem_unlock(sma, -1); 1489 out_rcu_wakeup: 1490 rcu_read_unlock(); 1491 wake_up_q(&wake_q); 1492 out_free: 1493 if (sem_io != fast_sem_io) 1494 kvfree(sem_io); 1495 return err; 1496 } 1497 1498 static inline unsigned long 1499 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1500 { 1501 switch (version) { 1502 case IPC_64: 1503 if (copy_from_user(out, buf, sizeof(*out))) 1504 return -EFAULT; 1505 return 0; 1506 case IPC_OLD: 1507 { 1508 struct semid_ds tbuf_old; 1509 1510 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1511 return -EFAULT; 1512 1513 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1514 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1515 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1516 1517 return 0; 1518 } 1519 default: 1520 return -EINVAL; 1521 } 1522 } 1523 1524 /* 1525 * This function handles some semctl commands which require the rwsem 1526 * to be held in write mode. 1527 * NOTE: no locks must be held, the rwsem is taken inside this function. 1528 */ 1529 static int semctl_down(struct ipc_namespace *ns, int semid, 1530 int cmd, struct semid64_ds *semid64) 1531 { 1532 struct sem_array *sma; 1533 int err; 1534 struct kern_ipc_perm *ipcp; 1535 1536 down_write(&sem_ids(ns).rwsem); 1537 rcu_read_lock(); 1538 1539 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1540 &semid64->sem_perm, 0); 1541 if (IS_ERR(ipcp)) { 1542 err = PTR_ERR(ipcp); 1543 goto out_unlock1; 1544 } 1545 1546 sma = container_of(ipcp, struct sem_array, sem_perm); 1547 1548 err = security_sem_semctl(sma, cmd); 1549 if (err) 1550 goto out_unlock1; 1551 1552 switch (cmd) { 1553 case IPC_RMID: 1554 sem_lock(sma, NULL, -1); 1555 /* freeary unlocks the ipc object and rcu */ 1556 freeary(ns, ipcp); 1557 goto out_up; 1558 case IPC_SET: 1559 sem_lock(sma, NULL, -1); 1560 err = ipc_update_perm(&semid64->sem_perm, ipcp); 1561 if (err) 1562 goto out_unlock0; 1563 sma->sem_ctime = ktime_get_real_seconds(); 1564 break; 1565 default: 1566 err = -EINVAL; 1567 goto out_unlock1; 1568 } 1569 1570 out_unlock0: 1571 sem_unlock(sma, -1); 1572 out_unlock1: 1573 rcu_read_unlock(); 1574 out_up: 1575 up_write(&sem_ids(ns).rwsem); 1576 return err; 1577 } 1578 1579 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1580 { 1581 int version; 1582 struct ipc_namespace *ns; 1583 void __user *p = (void __user *)arg; 1584 struct semid64_ds semid64; 1585 int err; 1586 1587 if (semid < 0) 1588 return -EINVAL; 1589 1590 version = ipc_parse_version(&cmd); 1591 ns = current->nsproxy->ipc_ns; 1592 1593 switch (cmd) { 1594 case IPC_INFO: 1595 case SEM_INFO: 1596 return semctl_info(ns, semid, cmd, p); 1597 case IPC_STAT: 1598 case SEM_STAT: 1599 err = semctl_stat(ns, semid, cmd, &semid64); 1600 if (err < 0) 1601 return err; 1602 if (copy_semid_to_user(p, &semid64, version)) 1603 err = -EFAULT; 1604 return err; 1605 case GETALL: 1606 case GETVAL: 1607 case GETPID: 1608 case GETNCNT: 1609 case GETZCNT: 1610 case SETALL: 1611 return semctl_main(ns, semid, semnum, cmd, p); 1612 case SETVAL: { 1613 int val; 1614 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1615 /* big-endian 64bit */ 1616 val = arg >> 32; 1617 #else 1618 /* 32bit or little-endian 64bit */ 1619 val = arg; 1620 #endif 1621 return semctl_setval(ns, semid, semnum, val); 1622 } 1623 case IPC_SET: 1624 if (copy_semid_from_user(&semid64, p, version)) 1625 return -EFAULT; 1626 case IPC_RMID: 1627 return semctl_down(ns, semid, cmd, &semid64); 1628 default: 1629 return -EINVAL; 1630 } 1631 } 1632 1633 #ifdef CONFIG_COMPAT 1634 1635 struct compat_semid_ds { 1636 struct compat_ipc_perm sem_perm; 1637 compat_time_t sem_otime; 1638 compat_time_t sem_ctime; 1639 compat_uptr_t sem_base; 1640 compat_uptr_t sem_pending; 1641 compat_uptr_t sem_pending_last; 1642 compat_uptr_t undo; 1643 unsigned short sem_nsems; 1644 }; 1645 1646 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf, 1647 int version) 1648 { 1649 memset(out, 0, sizeof(*out)); 1650 if (version == IPC_64) { 1651 struct compat_semid64_ds __user *p = buf; 1652 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm); 1653 } else { 1654 struct compat_semid_ds __user *p = buf; 1655 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm); 1656 } 1657 } 1658 1659 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in, 1660 int version) 1661 { 1662 if (version == IPC_64) { 1663 struct compat_semid64_ds v; 1664 memset(&v, 0, sizeof(v)); 1665 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm); 1666 v.sem_otime = in->sem_otime; 1667 v.sem_ctime = in->sem_ctime; 1668 v.sem_nsems = in->sem_nsems; 1669 return copy_to_user(buf, &v, sizeof(v)); 1670 } else { 1671 struct compat_semid_ds v; 1672 memset(&v, 0, sizeof(v)); 1673 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm); 1674 v.sem_otime = in->sem_otime; 1675 v.sem_ctime = in->sem_ctime; 1676 v.sem_nsems = in->sem_nsems; 1677 return copy_to_user(buf, &v, sizeof(v)); 1678 } 1679 } 1680 1681 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) 1682 { 1683 void __user *p = compat_ptr(arg); 1684 struct ipc_namespace *ns; 1685 struct semid64_ds semid64; 1686 int version = compat_ipc_parse_version(&cmd); 1687 int err; 1688 1689 ns = current->nsproxy->ipc_ns; 1690 1691 if (semid < 0) 1692 return -EINVAL; 1693 1694 switch (cmd & (~IPC_64)) { 1695 case IPC_INFO: 1696 case SEM_INFO: 1697 return semctl_info(ns, semid, cmd, p); 1698 case IPC_STAT: 1699 case SEM_STAT: 1700 err = semctl_stat(ns, semid, cmd, &semid64); 1701 if (err < 0) 1702 return err; 1703 if (copy_compat_semid_to_user(p, &semid64, version)) 1704 err = -EFAULT; 1705 return err; 1706 case GETVAL: 1707 case GETPID: 1708 case GETNCNT: 1709 case GETZCNT: 1710 case GETALL: 1711 case SETALL: 1712 return semctl_main(ns, semid, semnum, cmd, p); 1713 case SETVAL: 1714 return semctl_setval(ns, semid, semnum, arg); 1715 case IPC_SET: 1716 if (copy_compat_semid_from_user(&semid64, p, version)) 1717 return -EFAULT; 1718 /* fallthru */ 1719 case IPC_RMID: 1720 return semctl_down(ns, semid, cmd, &semid64); 1721 default: 1722 return -EINVAL; 1723 } 1724 } 1725 #endif 1726 1727 /* If the task doesn't already have a undo_list, then allocate one 1728 * here. We guarantee there is only one thread using this undo list, 1729 * and current is THE ONE 1730 * 1731 * If this allocation and assignment succeeds, but later 1732 * portions of this code fail, there is no need to free the sem_undo_list. 1733 * Just let it stay associated with the task, and it'll be freed later 1734 * at exit time. 1735 * 1736 * This can block, so callers must hold no locks. 1737 */ 1738 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1739 { 1740 struct sem_undo_list *undo_list; 1741 1742 undo_list = current->sysvsem.undo_list; 1743 if (!undo_list) { 1744 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1745 if (undo_list == NULL) 1746 return -ENOMEM; 1747 spin_lock_init(&undo_list->lock); 1748 refcount_set(&undo_list->refcnt, 1); 1749 INIT_LIST_HEAD(&undo_list->list_proc); 1750 1751 current->sysvsem.undo_list = undo_list; 1752 } 1753 *undo_listp = undo_list; 1754 return 0; 1755 } 1756 1757 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1758 { 1759 struct sem_undo *un; 1760 1761 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1762 if (un->semid == semid) 1763 return un; 1764 } 1765 return NULL; 1766 } 1767 1768 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1769 { 1770 struct sem_undo *un; 1771 1772 assert_spin_locked(&ulp->lock); 1773 1774 un = __lookup_undo(ulp, semid); 1775 if (un) { 1776 list_del_rcu(&un->list_proc); 1777 list_add_rcu(&un->list_proc, &ulp->list_proc); 1778 } 1779 return un; 1780 } 1781 1782 /** 1783 * find_alloc_undo - lookup (and if not present create) undo array 1784 * @ns: namespace 1785 * @semid: semaphore array id 1786 * 1787 * The function looks up (and if not present creates) the undo structure. 1788 * The size of the undo structure depends on the size of the semaphore 1789 * array, thus the alloc path is not that straightforward. 1790 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1791 * performs a rcu_read_lock(). 1792 */ 1793 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1794 { 1795 struct sem_array *sma; 1796 struct sem_undo_list *ulp; 1797 struct sem_undo *un, *new; 1798 int nsems, error; 1799 1800 error = get_undo_list(&ulp); 1801 if (error) 1802 return ERR_PTR(error); 1803 1804 rcu_read_lock(); 1805 spin_lock(&ulp->lock); 1806 un = lookup_undo(ulp, semid); 1807 spin_unlock(&ulp->lock); 1808 if (likely(un != NULL)) 1809 goto out; 1810 1811 /* no undo structure around - allocate one. */ 1812 /* step 1: figure out the size of the semaphore array */ 1813 sma = sem_obtain_object_check(ns, semid); 1814 if (IS_ERR(sma)) { 1815 rcu_read_unlock(); 1816 return ERR_CAST(sma); 1817 } 1818 1819 nsems = sma->sem_nsems; 1820 if (!ipc_rcu_getref(&sma->sem_perm)) { 1821 rcu_read_unlock(); 1822 un = ERR_PTR(-EIDRM); 1823 goto out; 1824 } 1825 rcu_read_unlock(); 1826 1827 /* step 2: allocate new undo structure */ 1828 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1829 if (!new) { 1830 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); 1831 return ERR_PTR(-ENOMEM); 1832 } 1833 1834 /* step 3: Acquire the lock on semaphore array */ 1835 rcu_read_lock(); 1836 sem_lock_and_putref(sma); 1837 if (!ipc_valid_object(&sma->sem_perm)) { 1838 sem_unlock(sma, -1); 1839 rcu_read_unlock(); 1840 kfree(new); 1841 un = ERR_PTR(-EIDRM); 1842 goto out; 1843 } 1844 spin_lock(&ulp->lock); 1845 1846 /* 1847 * step 4: check for races: did someone else allocate the undo struct? 1848 */ 1849 un = lookup_undo(ulp, semid); 1850 if (un) { 1851 kfree(new); 1852 goto success; 1853 } 1854 /* step 5: initialize & link new undo structure */ 1855 new->semadj = (short *) &new[1]; 1856 new->ulp = ulp; 1857 new->semid = semid; 1858 assert_spin_locked(&ulp->lock); 1859 list_add_rcu(&new->list_proc, &ulp->list_proc); 1860 ipc_assert_locked_object(&sma->sem_perm); 1861 list_add(&new->list_id, &sma->list_id); 1862 un = new; 1863 1864 success: 1865 spin_unlock(&ulp->lock); 1866 sem_unlock(sma, -1); 1867 out: 1868 return un; 1869 } 1870 1871 static long do_semtimedop(int semid, struct sembuf __user *tsops, 1872 unsigned nsops, const struct timespec64 *timeout) 1873 { 1874 int error = -EINVAL; 1875 struct sem_array *sma; 1876 struct sembuf fast_sops[SEMOPM_FAST]; 1877 struct sembuf *sops = fast_sops, *sop; 1878 struct sem_undo *un; 1879 int max, locknum; 1880 bool undos = false, alter = false, dupsop = false; 1881 struct sem_queue queue; 1882 unsigned long dup = 0, jiffies_left = 0; 1883 struct ipc_namespace *ns; 1884 1885 ns = current->nsproxy->ipc_ns; 1886 1887 if (nsops < 1 || semid < 0) 1888 return -EINVAL; 1889 if (nsops > ns->sc_semopm) 1890 return -E2BIG; 1891 if (nsops > SEMOPM_FAST) { 1892 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1893 if (sops == NULL) 1894 return -ENOMEM; 1895 } 1896 1897 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1898 error = -EFAULT; 1899 goto out_free; 1900 } 1901 1902 if (timeout) { 1903 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 || 1904 timeout->tv_nsec >= 1000000000L) { 1905 error = -EINVAL; 1906 goto out_free; 1907 } 1908 jiffies_left = timespec64_to_jiffies(timeout); 1909 } 1910 1911 max = 0; 1912 for (sop = sops; sop < sops + nsops; sop++) { 1913 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1914 1915 if (sop->sem_num >= max) 1916 max = sop->sem_num; 1917 if (sop->sem_flg & SEM_UNDO) 1918 undos = true; 1919 if (dup & mask) { 1920 /* 1921 * There was a previous alter access that appears 1922 * to have accessed the same semaphore, thus use 1923 * the dupsop logic. "appears", because the detection 1924 * can only check % BITS_PER_LONG. 1925 */ 1926 dupsop = true; 1927 } 1928 if (sop->sem_op != 0) { 1929 alter = true; 1930 dup |= mask; 1931 } 1932 } 1933 1934 if (undos) { 1935 /* On success, find_alloc_undo takes the rcu_read_lock */ 1936 un = find_alloc_undo(ns, semid); 1937 if (IS_ERR(un)) { 1938 error = PTR_ERR(un); 1939 goto out_free; 1940 } 1941 } else { 1942 un = NULL; 1943 rcu_read_lock(); 1944 } 1945 1946 sma = sem_obtain_object_check(ns, semid); 1947 if (IS_ERR(sma)) { 1948 rcu_read_unlock(); 1949 error = PTR_ERR(sma); 1950 goto out_free; 1951 } 1952 1953 error = -EFBIG; 1954 if (max >= sma->sem_nsems) { 1955 rcu_read_unlock(); 1956 goto out_free; 1957 } 1958 1959 error = -EACCES; 1960 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 1961 rcu_read_unlock(); 1962 goto out_free; 1963 } 1964 1965 error = security_sem_semop(sma, sops, nsops, alter); 1966 if (error) { 1967 rcu_read_unlock(); 1968 goto out_free; 1969 } 1970 1971 error = -EIDRM; 1972 locknum = sem_lock(sma, sops, nsops); 1973 /* 1974 * We eventually might perform the following check in a lockless 1975 * fashion, considering ipc_valid_object() locking constraints. 1976 * If nsops == 1 and there is no contention for sem_perm.lock, then 1977 * only a per-semaphore lock is held and it's OK to proceed with the 1978 * check below. More details on the fine grained locking scheme 1979 * entangled here and why it's RMID race safe on comments at sem_lock() 1980 */ 1981 if (!ipc_valid_object(&sma->sem_perm)) 1982 goto out_unlock_free; 1983 /* 1984 * semid identifiers are not unique - find_alloc_undo may have 1985 * allocated an undo structure, it was invalidated by an RMID 1986 * and now a new array with received the same id. Check and fail. 1987 * This case can be detected checking un->semid. The existence of 1988 * "un" itself is guaranteed by rcu. 1989 */ 1990 if (un && un->semid == -1) 1991 goto out_unlock_free; 1992 1993 queue.sops = sops; 1994 queue.nsops = nsops; 1995 queue.undo = un; 1996 queue.pid = task_tgid_vnr(current); 1997 queue.alter = alter; 1998 queue.dupsop = dupsop; 1999 2000 error = perform_atomic_semop(sma, &queue); 2001 if (error == 0) { /* non-blocking succesfull path */ 2002 DEFINE_WAKE_Q(wake_q); 2003 2004 /* 2005 * If the operation was successful, then do 2006 * the required updates. 2007 */ 2008 if (alter) 2009 do_smart_update(sma, sops, nsops, 1, &wake_q); 2010 else 2011 set_semotime(sma, sops); 2012 2013 sem_unlock(sma, locknum); 2014 rcu_read_unlock(); 2015 wake_up_q(&wake_q); 2016 2017 goto out_free; 2018 } 2019 if (error < 0) /* non-blocking error path */ 2020 goto out_unlock_free; 2021 2022 /* 2023 * We need to sleep on this operation, so we put the current 2024 * task into the pending queue and go to sleep. 2025 */ 2026 if (nsops == 1) { 2027 struct sem *curr; 2028 curr = &sma->sems[sops->sem_num]; 2029 2030 if (alter) { 2031 if (sma->complex_count) { 2032 list_add_tail(&queue.list, 2033 &sma->pending_alter); 2034 } else { 2035 2036 list_add_tail(&queue.list, 2037 &curr->pending_alter); 2038 } 2039 } else { 2040 list_add_tail(&queue.list, &curr->pending_const); 2041 } 2042 } else { 2043 if (!sma->complex_count) 2044 merge_queues(sma); 2045 2046 if (alter) 2047 list_add_tail(&queue.list, &sma->pending_alter); 2048 else 2049 list_add_tail(&queue.list, &sma->pending_const); 2050 2051 sma->complex_count++; 2052 } 2053 2054 do { 2055 queue.status = -EINTR; 2056 queue.sleeper = current; 2057 2058 __set_current_state(TASK_INTERRUPTIBLE); 2059 sem_unlock(sma, locknum); 2060 rcu_read_unlock(); 2061 2062 if (timeout) 2063 jiffies_left = schedule_timeout(jiffies_left); 2064 else 2065 schedule(); 2066 2067 /* 2068 * fastpath: the semop has completed, either successfully or 2069 * not, from the syscall pov, is quite irrelevant to us at this 2070 * point; we're done. 2071 * 2072 * We _do_ care, nonetheless, about being awoken by a signal or 2073 * spuriously. The queue.status is checked again in the 2074 * slowpath (aka after taking sem_lock), such that we can detect 2075 * scenarios where we were awakened externally, during the 2076 * window between wake_q_add() and wake_up_q(). 2077 */ 2078 error = READ_ONCE(queue.status); 2079 if (error != -EINTR) { 2080 /* 2081 * User space could assume that semop() is a memory 2082 * barrier: Without the mb(), the cpu could 2083 * speculatively read in userspace stale data that was 2084 * overwritten by the previous owner of the semaphore. 2085 */ 2086 smp_mb(); 2087 goto out_free; 2088 } 2089 2090 rcu_read_lock(); 2091 locknum = sem_lock(sma, sops, nsops); 2092 2093 if (!ipc_valid_object(&sma->sem_perm)) 2094 goto out_unlock_free; 2095 2096 error = READ_ONCE(queue.status); 2097 2098 /* 2099 * If queue.status != -EINTR we are woken up by another process. 2100 * Leave without unlink_queue(), but with sem_unlock(). 2101 */ 2102 if (error != -EINTR) 2103 goto out_unlock_free; 2104 2105 /* 2106 * If an interrupt occurred we have to clean up the queue. 2107 */ 2108 if (timeout && jiffies_left == 0) 2109 error = -EAGAIN; 2110 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2111 2112 unlink_queue(sma, &queue); 2113 2114 out_unlock_free: 2115 sem_unlock(sma, locknum); 2116 rcu_read_unlock(); 2117 out_free: 2118 if (sops != fast_sops) 2119 kvfree(sops); 2120 return error; 2121 } 2122 2123 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 2124 unsigned, nsops, const struct timespec __user *, timeout) 2125 { 2126 if (timeout) { 2127 struct timespec64 ts; 2128 if (get_timespec64(&ts, timeout)) 2129 return -EFAULT; 2130 return do_semtimedop(semid, tsops, nsops, &ts); 2131 } 2132 return do_semtimedop(semid, tsops, nsops, NULL); 2133 } 2134 2135 #ifdef CONFIG_COMPAT 2136 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, 2137 unsigned, nsops, 2138 const struct compat_timespec __user *, timeout) 2139 { 2140 if (timeout) { 2141 struct timespec64 ts; 2142 if (compat_get_timespec64(&ts, timeout)) 2143 return -EFAULT; 2144 return do_semtimedop(semid, tsems, nsops, &ts); 2145 } 2146 return do_semtimedop(semid, tsems, nsops, NULL); 2147 } 2148 #endif 2149 2150 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2151 unsigned, nsops) 2152 { 2153 return do_semtimedop(semid, tsops, nsops, NULL); 2154 } 2155 2156 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2157 * parent and child tasks. 2158 */ 2159 2160 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2161 { 2162 struct sem_undo_list *undo_list; 2163 int error; 2164 2165 if (clone_flags & CLONE_SYSVSEM) { 2166 error = get_undo_list(&undo_list); 2167 if (error) 2168 return error; 2169 refcount_inc(&undo_list->refcnt); 2170 tsk->sysvsem.undo_list = undo_list; 2171 } else 2172 tsk->sysvsem.undo_list = NULL; 2173 2174 return 0; 2175 } 2176 2177 /* 2178 * add semadj values to semaphores, free undo structures. 2179 * undo structures are not freed when semaphore arrays are destroyed 2180 * so some of them may be out of date. 2181 * IMPLEMENTATION NOTE: There is some confusion over whether the 2182 * set of adjustments that needs to be done should be done in an atomic 2183 * manner or not. That is, if we are attempting to decrement the semval 2184 * should we queue up and wait until we can do so legally? 2185 * The original implementation attempted to do this (queue and wait). 2186 * The current implementation does not do so. The POSIX standard 2187 * and SVID should be consulted to determine what behavior is mandated. 2188 */ 2189 void exit_sem(struct task_struct *tsk) 2190 { 2191 struct sem_undo_list *ulp; 2192 2193 ulp = tsk->sysvsem.undo_list; 2194 if (!ulp) 2195 return; 2196 tsk->sysvsem.undo_list = NULL; 2197 2198 if (!refcount_dec_and_test(&ulp->refcnt)) 2199 return; 2200 2201 for (;;) { 2202 struct sem_array *sma; 2203 struct sem_undo *un; 2204 int semid, i; 2205 DEFINE_WAKE_Q(wake_q); 2206 2207 cond_resched(); 2208 2209 rcu_read_lock(); 2210 un = list_entry_rcu(ulp->list_proc.next, 2211 struct sem_undo, list_proc); 2212 if (&un->list_proc == &ulp->list_proc) { 2213 /* 2214 * We must wait for freeary() before freeing this ulp, 2215 * in case we raced with last sem_undo. There is a small 2216 * possibility where we exit while freeary() didn't 2217 * finish unlocking sem_undo_list. 2218 */ 2219 spin_lock(&ulp->lock); 2220 spin_unlock(&ulp->lock); 2221 rcu_read_unlock(); 2222 break; 2223 } 2224 spin_lock(&ulp->lock); 2225 semid = un->semid; 2226 spin_unlock(&ulp->lock); 2227 2228 /* exit_sem raced with IPC_RMID, nothing to do */ 2229 if (semid == -1) { 2230 rcu_read_unlock(); 2231 continue; 2232 } 2233 2234 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2235 /* exit_sem raced with IPC_RMID, nothing to do */ 2236 if (IS_ERR(sma)) { 2237 rcu_read_unlock(); 2238 continue; 2239 } 2240 2241 sem_lock(sma, NULL, -1); 2242 /* exit_sem raced with IPC_RMID, nothing to do */ 2243 if (!ipc_valid_object(&sma->sem_perm)) { 2244 sem_unlock(sma, -1); 2245 rcu_read_unlock(); 2246 continue; 2247 } 2248 un = __lookup_undo(ulp, semid); 2249 if (un == NULL) { 2250 /* exit_sem raced with IPC_RMID+semget() that created 2251 * exactly the same semid. Nothing to do. 2252 */ 2253 sem_unlock(sma, -1); 2254 rcu_read_unlock(); 2255 continue; 2256 } 2257 2258 /* remove un from the linked lists */ 2259 ipc_assert_locked_object(&sma->sem_perm); 2260 list_del(&un->list_id); 2261 2262 /* we are the last process using this ulp, acquiring ulp->lock 2263 * isn't required. Besides that, we are also protected against 2264 * IPC_RMID as we hold sma->sem_perm lock now 2265 */ 2266 list_del_rcu(&un->list_proc); 2267 2268 /* perform adjustments registered in un */ 2269 for (i = 0; i < sma->sem_nsems; i++) { 2270 struct sem *semaphore = &sma->sems[i]; 2271 if (un->semadj[i]) { 2272 semaphore->semval += un->semadj[i]; 2273 /* 2274 * Range checks of the new semaphore value, 2275 * not defined by sus: 2276 * - Some unices ignore the undo entirely 2277 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2278 * - some cap the value (e.g. FreeBSD caps 2279 * at 0, but doesn't enforce SEMVMX) 2280 * 2281 * Linux caps the semaphore value, both at 0 2282 * and at SEMVMX. 2283 * 2284 * Manfred <manfred@colorfullife.com> 2285 */ 2286 if (semaphore->semval < 0) 2287 semaphore->semval = 0; 2288 if (semaphore->semval > SEMVMX) 2289 semaphore->semval = SEMVMX; 2290 semaphore->sempid = task_tgid_vnr(current); 2291 } 2292 } 2293 /* maybe some queued-up processes were waiting for this */ 2294 do_smart_update(sma, NULL, 0, 1, &wake_q); 2295 sem_unlock(sma, -1); 2296 rcu_read_unlock(); 2297 wake_up_q(&wake_q); 2298 2299 kfree_rcu(un, rcu); 2300 } 2301 kfree(ulp); 2302 } 2303 2304 #ifdef CONFIG_PROC_FS 2305 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2306 { 2307 struct user_namespace *user_ns = seq_user_ns(s); 2308 struct kern_ipc_perm *ipcp = it; 2309 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 2310 time64_t sem_otime; 2311 2312 /* 2313 * The proc interface isn't aware of sem_lock(), it calls 2314 * ipc_lock_object() directly (in sysvipc_find_ipc). 2315 * In order to stay compatible with sem_lock(), we must 2316 * enter / leave complex_mode. 2317 */ 2318 complexmode_enter(sma); 2319 2320 sem_otime = get_semotime(sma); 2321 2322 seq_printf(s, 2323 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n", 2324 sma->sem_perm.key, 2325 sma->sem_perm.id, 2326 sma->sem_perm.mode, 2327 sma->sem_nsems, 2328 from_kuid_munged(user_ns, sma->sem_perm.uid), 2329 from_kgid_munged(user_ns, sma->sem_perm.gid), 2330 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2331 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2332 sem_otime, 2333 sma->sem_ctime); 2334 2335 complexmode_tryleave(sma); 2336 2337 return 0; 2338 } 2339 #endif 2340