1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 7 * 8 * SMP-threaded, sysctl's added 9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 10 * Enforced range limit on SEM_UNDO 11 * (c) 2001 Red Hat Inc 12 * Lockless wakeup 13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 14 * Further wakeup optimizations, documentation 15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Implementation notes: (May 2010) 25 * This file implements System V semaphores. 26 * 27 * User space visible behavior: 28 * - FIFO ordering for semop() operations (just FIFO, not starvation 29 * protection) 30 * - multiple semaphore operations that alter the same semaphore in 31 * one semop() are handled. 32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 33 * SETALL calls. 34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 35 * - undo adjustments at process exit are limited to 0..SEMVMX. 36 * - namespace are supported. 37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 38 * to /proc/sys/kernel/sem. 39 * - statistics about the usage are reported in /proc/sysvipc/sem. 40 * 41 * Internals: 42 * - scalability: 43 * - all global variables are read-mostly. 44 * - semop() calls and semctl(RMID) are synchronized by RCU. 45 * - most operations do write operations (actually: spin_lock calls) to 46 * the per-semaphore array structure. 47 * Thus: Perfect SMP scaling between independent semaphore arrays. 48 * If multiple semaphores in one array are used, then cache line 49 * trashing on the semaphore array spinlock will limit the scaling. 50 * - semncnt and semzcnt are calculated on demand in count_semcnt() 51 * - the task that performs a successful semop() scans the list of all 52 * sleeping tasks and completes any pending operations that can be fulfilled. 53 * Semaphores are actively given to waiting tasks (necessary for FIFO). 54 * (see update_queue()) 55 * - To improve the scalability, the actual wake-up calls are performed after 56 * dropping all locks. (see wake_up_sem_queue_prepare(), 57 * wake_up_sem_queue_do()) 58 * - All work is done by the waker, the woken up task does not have to do 59 * anything - not even acquiring a lock or dropping a refcount. 60 * - A woken up task may not even touch the semaphore array anymore, it may 61 * have been destroyed already by a semctl(RMID). 62 * - The synchronizations between wake-ups due to a timeout/signal and a 63 * wake-up due to a completed semaphore operation is achieved by using an 64 * intermediate state (IN_WAKEUP). 65 * - UNDO values are stored in an array (one per process and per 66 * semaphore array, lazily allocated). For backwards compatibility, multiple 67 * modes for the UNDO variables are supported (per process, per thread) 68 * (see copy_semundo, CLONE_SYSVSEM) 69 * - There are two lists of the pending operations: a per-array list 70 * and per-semaphore list (stored in the array). This allows to achieve FIFO 71 * ordering without always scanning all pending operations. 72 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 73 */ 74 75 #include <linux/slab.h> 76 #include <linux/spinlock.h> 77 #include <linux/init.h> 78 #include <linux/proc_fs.h> 79 #include <linux/time.h> 80 #include <linux/security.h> 81 #include <linux/syscalls.h> 82 #include <linux/audit.h> 83 #include <linux/capability.h> 84 #include <linux/seq_file.h> 85 #include <linux/rwsem.h> 86 #include <linux/nsproxy.h> 87 #include <linux/ipc_namespace.h> 88 89 #include <linux/uaccess.h> 90 #include "util.h" 91 92 /* One semaphore structure for each semaphore in the system. */ 93 struct sem { 94 int semval; /* current value */ 95 /* 96 * PID of the process that last modified the semaphore. For 97 * Linux, specifically these are: 98 * - semop 99 * - semctl, via SETVAL and SETALL. 100 * - at task exit when performing undo adjustments (see exit_sem). 101 */ 102 int sempid; 103 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 104 struct list_head pending_alter; /* pending single-sop operations */ 105 /* that alter the semaphore */ 106 struct list_head pending_const; /* pending single-sop operations */ 107 /* that do not alter the semaphore*/ 108 time_t sem_otime; /* candidate for sem_otime */ 109 } ____cacheline_aligned_in_smp; 110 111 /* One queue for each sleeping process in the system. */ 112 struct sem_queue { 113 struct list_head list; /* queue of pending operations */ 114 struct task_struct *sleeper; /* this process */ 115 struct sem_undo *undo; /* undo structure */ 116 int pid; /* process id of requesting process */ 117 int status; /* completion status of operation */ 118 struct sembuf *sops; /* array of pending operations */ 119 struct sembuf *blocking; /* the operation that blocked */ 120 int nsops; /* number of operations */ 121 int alter; /* does *sops alter the array? */ 122 }; 123 124 /* Each task has a list of undo requests. They are executed automatically 125 * when the process exits. 126 */ 127 struct sem_undo { 128 struct list_head list_proc; /* per-process list: * 129 * all undos from one process 130 * rcu protected */ 131 struct rcu_head rcu; /* rcu struct for sem_undo */ 132 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 133 struct list_head list_id; /* per semaphore array list: 134 * all undos for one array */ 135 int semid; /* semaphore set identifier */ 136 short *semadj; /* array of adjustments */ 137 /* one per semaphore */ 138 }; 139 140 /* sem_undo_list controls shared access to the list of sem_undo structures 141 * that may be shared among all a CLONE_SYSVSEM task group. 142 */ 143 struct sem_undo_list { 144 atomic_t refcnt; 145 spinlock_t lock; 146 struct list_head list_proc; 147 }; 148 149 150 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 151 152 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 153 154 static int newary(struct ipc_namespace *, struct ipc_params *); 155 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 156 #ifdef CONFIG_PROC_FS 157 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 158 #endif 159 160 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 161 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 162 163 /* 164 * Locking: 165 * a) global sem_lock() for read/write 166 * sem_undo.id_next, 167 * sem_array.complex_count, 168 * sem_array.complex_mode 169 * sem_array.pending{_alter,_const}, 170 * sem_array.sem_undo 171 * 172 * b) global or semaphore sem_lock() for read/write: 173 * sem_array.sem_base[i].pending_{const,alter}: 174 * sem_array.complex_mode (for read) 175 * 176 * c) special: 177 * sem_undo_list.list_proc: 178 * * undo_list->lock for write 179 * * rcu for read 180 */ 181 182 #define sc_semmsl sem_ctls[0] 183 #define sc_semmns sem_ctls[1] 184 #define sc_semopm sem_ctls[2] 185 #define sc_semmni sem_ctls[3] 186 187 void sem_init_ns(struct ipc_namespace *ns) 188 { 189 ns->sc_semmsl = SEMMSL; 190 ns->sc_semmns = SEMMNS; 191 ns->sc_semopm = SEMOPM; 192 ns->sc_semmni = SEMMNI; 193 ns->used_sems = 0; 194 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 195 } 196 197 #ifdef CONFIG_IPC_NS 198 void sem_exit_ns(struct ipc_namespace *ns) 199 { 200 free_ipcs(ns, &sem_ids(ns), freeary); 201 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 202 } 203 #endif 204 205 void __init sem_init(void) 206 { 207 sem_init_ns(&init_ipc_ns); 208 ipc_init_proc_interface("sysvipc/sem", 209 " key semid perms nsems uid gid cuid cgid otime ctime\n", 210 IPC_SEM_IDS, sysvipc_sem_proc_show); 211 } 212 213 /** 214 * unmerge_queues - unmerge queues, if possible. 215 * @sma: semaphore array 216 * 217 * The function unmerges the wait queues if complex_count is 0. 218 * It must be called prior to dropping the global semaphore array lock. 219 */ 220 static void unmerge_queues(struct sem_array *sma) 221 { 222 struct sem_queue *q, *tq; 223 224 /* complex operations still around? */ 225 if (sma->complex_count) 226 return; 227 /* 228 * We will switch back to simple mode. 229 * Move all pending operation back into the per-semaphore 230 * queues. 231 */ 232 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 233 struct sem *curr; 234 curr = &sma->sem_base[q->sops[0].sem_num]; 235 236 list_add_tail(&q->list, &curr->pending_alter); 237 } 238 INIT_LIST_HEAD(&sma->pending_alter); 239 } 240 241 /** 242 * merge_queues - merge single semop queues into global queue 243 * @sma: semaphore array 244 * 245 * This function merges all per-semaphore queues into the global queue. 246 * It is necessary to achieve FIFO ordering for the pending single-sop 247 * operations when a multi-semop operation must sleep. 248 * Only the alter operations must be moved, the const operations can stay. 249 */ 250 static void merge_queues(struct sem_array *sma) 251 { 252 int i; 253 for (i = 0; i < sma->sem_nsems; i++) { 254 struct sem *sem = sma->sem_base + i; 255 256 list_splice_init(&sem->pending_alter, &sma->pending_alter); 257 } 258 } 259 260 static void sem_rcu_free(struct rcu_head *head) 261 { 262 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 263 struct sem_array *sma = ipc_rcu_to_struct(p); 264 265 security_sem_free(sma); 266 ipc_rcu_free(head); 267 } 268 269 /* 270 * Enter the mode suitable for non-simple operations: 271 * Caller must own sem_perm.lock. 272 */ 273 static void complexmode_enter(struct sem_array *sma) 274 { 275 int i; 276 struct sem *sem; 277 278 if (sma->complex_mode) { 279 /* We are already in complex_mode. Nothing to do */ 280 return; 281 } 282 283 /* We need a full barrier after seting complex_mode: 284 * The write to complex_mode must be visible 285 * before we read the first sem->lock spinlock state. 286 */ 287 smp_store_mb(sma->complex_mode, true); 288 289 for (i = 0; i < sma->sem_nsems; i++) { 290 sem = sma->sem_base + i; 291 spin_unlock_wait(&sem->lock); 292 } 293 /* 294 * spin_unlock_wait() is not a memory barriers, it is only a 295 * control barrier. The code must pair with spin_unlock(&sem->lock), 296 * thus just the control barrier is insufficient. 297 * 298 * smp_rmb() is sufficient, as writes cannot pass the control barrier. 299 */ 300 smp_rmb(); 301 } 302 303 /* 304 * Try to leave the mode that disallows simple operations: 305 * Caller must own sem_perm.lock. 306 */ 307 static void complexmode_tryleave(struct sem_array *sma) 308 { 309 if (sma->complex_count) { 310 /* Complex ops are sleeping. 311 * We must stay in complex mode 312 */ 313 return; 314 } 315 /* 316 * Immediately after setting complex_mode to false, 317 * a simple op can start. Thus: all memory writes 318 * performed by the current operation must be visible 319 * before we set complex_mode to false. 320 */ 321 smp_store_release(&sma->complex_mode, false); 322 } 323 324 #define SEM_GLOBAL_LOCK (-1) 325 /* 326 * If the request contains only one semaphore operation, and there are 327 * no complex transactions pending, lock only the semaphore involved. 328 * Otherwise, lock the entire semaphore array, since we either have 329 * multiple semaphores in our own semops, or we need to look at 330 * semaphores from other pending complex operations. 331 */ 332 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 333 int nsops) 334 { 335 struct sem *sem; 336 337 if (nsops != 1) { 338 /* Complex operation - acquire a full lock */ 339 ipc_lock_object(&sma->sem_perm); 340 341 /* Prevent parallel simple ops */ 342 complexmode_enter(sma); 343 return SEM_GLOBAL_LOCK; 344 } 345 346 /* 347 * Only one semaphore affected - try to optimize locking. 348 * Optimized locking is possible if no complex operation 349 * is either enqueued or processed right now. 350 * 351 * Both facts are tracked by complex_mode. 352 */ 353 sem = sma->sem_base + sops->sem_num; 354 355 /* 356 * Initial check for complex_mode. Just an optimization, 357 * no locking, no memory barrier. 358 */ 359 if (!sma->complex_mode) { 360 /* 361 * It appears that no complex operation is around. 362 * Acquire the per-semaphore lock. 363 */ 364 spin_lock(&sem->lock); 365 366 /* 367 * See 51d7d5205d33 368 * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): 369 * A full barrier is required: the write of sem->lock 370 * must be visible before the read is executed 371 */ 372 smp_mb(); 373 374 if (!smp_load_acquire(&sma->complex_mode)) { 375 /* fast path successful! */ 376 return sops->sem_num; 377 } 378 spin_unlock(&sem->lock); 379 } 380 381 /* slow path: acquire the full lock */ 382 ipc_lock_object(&sma->sem_perm); 383 384 if (sma->complex_count == 0) { 385 /* False alarm: 386 * There is no complex operation, thus we can switch 387 * back to the fast path. 388 */ 389 spin_lock(&sem->lock); 390 ipc_unlock_object(&sma->sem_perm); 391 return sops->sem_num; 392 } else { 393 /* Not a false alarm, thus complete the sequence for a 394 * full lock. 395 */ 396 complexmode_enter(sma); 397 return SEM_GLOBAL_LOCK; 398 } 399 } 400 401 static inline void sem_unlock(struct sem_array *sma, int locknum) 402 { 403 if (locknum == SEM_GLOBAL_LOCK) { 404 unmerge_queues(sma); 405 complexmode_tryleave(sma); 406 ipc_unlock_object(&sma->sem_perm); 407 } else { 408 struct sem *sem = sma->sem_base + locknum; 409 spin_unlock(&sem->lock); 410 } 411 } 412 413 /* 414 * sem_lock_(check_) routines are called in the paths where the rwsem 415 * is not held. 416 * 417 * The caller holds the RCU read lock. 418 */ 419 static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, 420 int id, struct sembuf *sops, int nsops, int *locknum) 421 { 422 struct kern_ipc_perm *ipcp; 423 struct sem_array *sma; 424 425 ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 426 if (IS_ERR(ipcp)) 427 return ERR_CAST(ipcp); 428 429 sma = container_of(ipcp, struct sem_array, sem_perm); 430 *locknum = sem_lock(sma, sops, nsops); 431 432 /* ipc_rmid() may have already freed the ID while sem_lock 433 * was spinning: verify that the structure is still valid 434 */ 435 if (ipc_valid_object(ipcp)) 436 return container_of(ipcp, struct sem_array, sem_perm); 437 438 sem_unlock(sma, *locknum); 439 return ERR_PTR(-EINVAL); 440 } 441 442 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 443 { 444 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 445 446 if (IS_ERR(ipcp)) 447 return ERR_CAST(ipcp); 448 449 return container_of(ipcp, struct sem_array, sem_perm); 450 } 451 452 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 453 int id) 454 { 455 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 456 457 if (IS_ERR(ipcp)) 458 return ERR_CAST(ipcp); 459 460 return container_of(ipcp, struct sem_array, sem_perm); 461 } 462 463 static inline void sem_lock_and_putref(struct sem_array *sma) 464 { 465 sem_lock(sma, NULL, -1); 466 ipc_rcu_putref(sma, sem_rcu_free); 467 } 468 469 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 470 { 471 ipc_rmid(&sem_ids(ns), &s->sem_perm); 472 } 473 474 /* 475 * Lockless wakeup algorithm: 476 * Without the check/retry algorithm a lockless wakeup is possible: 477 * - queue.status is initialized to -EINTR before blocking. 478 * - wakeup is performed by 479 * * unlinking the queue entry from the pending list 480 * * setting queue.status to IN_WAKEUP 481 * This is the notification for the blocked thread that a 482 * result value is imminent. 483 * * call wake_up_process 484 * * set queue.status to the final value. 485 * - the previously blocked thread checks queue.status: 486 * * if it's IN_WAKEUP, then it must wait until the value changes 487 * * if it's not -EINTR, then the operation was completed by 488 * update_queue. semtimedop can return queue.status without 489 * performing any operation on the sem array. 490 * * otherwise it must acquire the spinlock and check what's up. 491 * 492 * The two-stage algorithm is necessary to protect against the following 493 * races: 494 * - if queue.status is set after wake_up_process, then the woken up idle 495 * thread could race forward and try (and fail) to acquire sma->lock 496 * before update_queue had a chance to set queue.status 497 * - if queue.status is written before wake_up_process and if the 498 * blocked process is woken up by a signal between writing 499 * queue.status and the wake_up_process, then the woken up 500 * process could return from semtimedop and die by calling 501 * sys_exit before wake_up_process is called. Then wake_up_process 502 * will oops, because the task structure is already invalid. 503 * (yes, this happened on s390 with sysv msg). 504 * 505 */ 506 #define IN_WAKEUP 1 507 508 /** 509 * newary - Create a new semaphore set 510 * @ns: namespace 511 * @params: ptr to the structure that contains key, semflg and nsems 512 * 513 * Called with sem_ids.rwsem held (as a writer) 514 */ 515 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 516 { 517 int id; 518 int retval; 519 struct sem_array *sma; 520 int size; 521 key_t key = params->key; 522 int nsems = params->u.nsems; 523 int semflg = params->flg; 524 int i; 525 526 if (!nsems) 527 return -EINVAL; 528 if (ns->used_sems + nsems > ns->sc_semmns) 529 return -ENOSPC; 530 531 size = sizeof(*sma) + nsems * sizeof(struct sem); 532 sma = ipc_rcu_alloc(size); 533 if (!sma) 534 return -ENOMEM; 535 536 memset(sma, 0, size); 537 538 sma->sem_perm.mode = (semflg & S_IRWXUGO); 539 sma->sem_perm.key = key; 540 541 sma->sem_perm.security = NULL; 542 retval = security_sem_alloc(sma); 543 if (retval) { 544 ipc_rcu_putref(sma, ipc_rcu_free); 545 return retval; 546 } 547 548 sma->sem_base = (struct sem *) &sma[1]; 549 550 for (i = 0; i < nsems; i++) { 551 INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); 552 INIT_LIST_HEAD(&sma->sem_base[i].pending_const); 553 spin_lock_init(&sma->sem_base[i].lock); 554 } 555 556 sma->complex_count = 0; 557 sma->complex_mode = true; /* dropped by sem_unlock below */ 558 INIT_LIST_HEAD(&sma->pending_alter); 559 INIT_LIST_HEAD(&sma->pending_const); 560 INIT_LIST_HEAD(&sma->list_id); 561 sma->sem_nsems = nsems; 562 sma->sem_ctime = get_seconds(); 563 564 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 565 if (id < 0) { 566 ipc_rcu_putref(sma, sem_rcu_free); 567 return id; 568 } 569 ns->used_sems += nsems; 570 571 sem_unlock(sma, -1); 572 rcu_read_unlock(); 573 574 return sma->sem_perm.id; 575 } 576 577 578 /* 579 * Called with sem_ids.rwsem and ipcp locked. 580 */ 581 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 582 { 583 struct sem_array *sma; 584 585 sma = container_of(ipcp, struct sem_array, sem_perm); 586 return security_sem_associate(sma, semflg); 587 } 588 589 /* 590 * Called with sem_ids.rwsem and ipcp locked. 591 */ 592 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 593 struct ipc_params *params) 594 { 595 struct sem_array *sma; 596 597 sma = container_of(ipcp, struct sem_array, sem_perm); 598 if (params->u.nsems > sma->sem_nsems) 599 return -EINVAL; 600 601 return 0; 602 } 603 604 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 605 { 606 struct ipc_namespace *ns; 607 static const struct ipc_ops sem_ops = { 608 .getnew = newary, 609 .associate = sem_security, 610 .more_checks = sem_more_checks, 611 }; 612 struct ipc_params sem_params; 613 614 ns = current->nsproxy->ipc_ns; 615 616 if (nsems < 0 || nsems > ns->sc_semmsl) 617 return -EINVAL; 618 619 sem_params.key = key; 620 sem_params.flg = semflg; 621 sem_params.u.nsems = nsems; 622 623 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 624 } 625 626 /** 627 * perform_atomic_semop - Perform (if possible) a semaphore operation 628 * @sma: semaphore array 629 * @q: struct sem_queue that describes the operation 630 * 631 * Returns 0 if the operation was possible. 632 * Returns 1 if the operation is impossible, the caller must sleep. 633 * Negative values are error codes. 634 */ 635 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 636 { 637 int result, sem_op, nsops, pid; 638 struct sembuf *sop; 639 struct sem *curr; 640 struct sembuf *sops; 641 struct sem_undo *un; 642 643 sops = q->sops; 644 nsops = q->nsops; 645 un = q->undo; 646 647 for (sop = sops; sop < sops + nsops; sop++) { 648 curr = sma->sem_base + sop->sem_num; 649 sem_op = sop->sem_op; 650 result = curr->semval; 651 652 if (!sem_op && result) 653 goto would_block; 654 655 result += sem_op; 656 if (result < 0) 657 goto would_block; 658 if (result > SEMVMX) 659 goto out_of_range; 660 661 if (sop->sem_flg & SEM_UNDO) { 662 int undo = un->semadj[sop->sem_num] - sem_op; 663 /* Exceeding the undo range is an error. */ 664 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 665 goto out_of_range; 666 un->semadj[sop->sem_num] = undo; 667 } 668 669 curr->semval = result; 670 } 671 672 sop--; 673 pid = q->pid; 674 while (sop >= sops) { 675 sma->sem_base[sop->sem_num].sempid = pid; 676 sop--; 677 } 678 679 return 0; 680 681 out_of_range: 682 result = -ERANGE; 683 goto undo; 684 685 would_block: 686 q->blocking = sop; 687 688 if (sop->sem_flg & IPC_NOWAIT) 689 result = -EAGAIN; 690 else 691 result = 1; 692 693 undo: 694 sop--; 695 while (sop >= sops) { 696 sem_op = sop->sem_op; 697 sma->sem_base[sop->sem_num].semval -= sem_op; 698 if (sop->sem_flg & SEM_UNDO) 699 un->semadj[sop->sem_num] += sem_op; 700 sop--; 701 } 702 703 return result; 704 } 705 706 /** wake_up_sem_queue_prepare(q, error): Prepare wake-up 707 * @q: queue entry that must be signaled 708 * @error: Error value for the signal 709 * 710 * Prepare the wake-up of the queue entry q. 711 */ 712 static void wake_up_sem_queue_prepare(struct list_head *pt, 713 struct sem_queue *q, int error) 714 { 715 if (list_empty(pt)) { 716 /* 717 * Hold preempt off so that we don't get preempted and have the 718 * wakee busy-wait until we're scheduled back on. 719 */ 720 preempt_disable(); 721 } 722 q->status = IN_WAKEUP; 723 q->pid = error; 724 725 list_add_tail(&q->list, pt); 726 } 727 728 /** 729 * wake_up_sem_queue_do - do the actual wake-up 730 * @pt: list of tasks to be woken up 731 * 732 * Do the actual wake-up. 733 * The function is called without any locks held, thus the semaphore array 734 * could be destroyed already and the tasks can disappear as soon as the 735 * status is set to the actual return code. 736 */ 737 static void wake_up_sem_queue_do(struct list_head *pt) 738 { 739 struct sem_queue *q, *t; 740 int did_something; 741 742 did_something = !list_empty(pt); 743 list_for_each_entry_safe(q, t, pt, list) { 744 wake_up_process(q->sleeper); 745 /* q can disappear immediately after writing q->status. */ 746 smp_wmb(); 747 q->status = q->pid; 748 } 749 if (did_something) 750 preempt_enable(); 751 } 752 753 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 754 { 755 list_del(&q->list); 756 if (q->nsops > 1) 757 sma->complex_count--; 758 } 759 760 /** check_restart(sma, q) 761 * @sma: semaphore array 762 * @q: the operation that just completed 763 * 764 * update_queue is O(N^2) when it restarts scanning the whole queue of 765 * waiting operations. Therefore this function checks if the restart is 766 * really necessary. It is called after a previously waiting operation 767 * modified the array. 768 * Note that wait-for-zero operations are handled without restart. 769 */ 770 static int check_restart(struct sem_array *sma, struct sem_queue *q) 771 { 772 /* pending complex alter operations are too difficult to analyse */ 773 if (!list_empty(&sma->pending_alter)) 774 return 1; 775 776 /* we were a sleeping complex operation. Too difficult */ 777 if (q->nsops > 1) 778 return 1; 779 780 /* It is impossible that someone waits for the new value: 781 * - complex operations always restart. 782 * - wait-for-zero are handled seperately. 783 * - q is a previously sleeping simple operation that 784 * altered the array. It must be a decrement, because 785 * simple increments never sleep. 786 * - If there are older (higher priority) decrements 787 * in the queue, then they have observed the original 788 * semval value and couldn't proceed. The operation 789 * decremented to value - thus they won't proceed either. 790 */ 791 return 0; 792 } 793 794 /** 795 * wake_const_ops - wake up non-alter tasks 796 * @sma: semaphore array. 797 * @semnum: semaphore that was modified. 798 * @pt: list head for the tasks that must be woken up. 799 * 800 * wake_const_ops must be called after a semaphore in a semaphore array 801 * was set to 0. If complex const operations are pending, wake_const_ops must 802 * be called with semnum = -1, as well as with the number of each modified 803 * semaphore. 804 * The tasks that must be woken up are added to @pt. The return code 805 * is stored in q->pid. 806 * The function returns 1 if at least one operation was completed successfully. 807 */ 808 static int wake_const_ops(struct sem_array *sma, int semnum, 809 struct list_head *pt) 810 { 811 struct sem_queue *q; 812 struct list_head *walk; 813 struct list_head *pending_list; 814 int semop_completed = 0; 815 816 if (semnum == -1) 817 pending_list = &sma->pending_const; 818 else 819 pending_list = &sma->sem_base[semnum].pending_const; 820 821 walk = pending_list->next; 822 while (walk != pending_list) { 823 int error; 824 825 q = container_of(walk, struct sem_queue, list); 826 walk = walk->next; 827 828 error = perform_atomic_semop(sma, q); 829 830 if (error <= 0) { 831 /* operation completed, remove from queue & wakeup */ 832 833 unlink_queue(sma, q); 834 835 wake_up_sem_queue_prepare(pt, q, error); 836 if (error == 0) 837 semop_completed = 1; 838 } 839 } 840 return semop_completed; 841 } 842 843 /** 844 * do_smart_wakeup_zero - wakeup all wait for zero tasks 845 * @sma: semaphore array 846 * @sops: operations that were performed 847 * @nsops: number of operations 848 * @pt: list head of the tasks that must be woken up. 849 * 850 * Checks all required queue for wait-for-zero operations, based 851 * on the actual changes that were performed on the semaphore array. 852 * The function returns 1 if at least one operation was completed successfully. 853 */ 854 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 855 int nsops, struct list_head *pt) 856 { 857 int i; 858 int semop_completed = 0; 859 int got_zero = 0; 860 861 /* first: the per-semaphore queues, if known */ 862 if (sops) { 863 for (i = 0; i < nsops; i++) { 864 int num = sops[i].sem_num; 865 866 if (sma->sem_base[num].semval == 0) { 867 got_zero = 1; 868 semop_completed |= wake_const_ops(sma, num, pt); 869 } 870 } 871 } else { 872 /* 873 * No sops means modified semaphores not known. 874 * Assume all were changed. 875 */ 876 for (i = 0; i < sma->sem_nsems; i++) { 877 if (sma->sem_base[i].semval == 0) { 878 got_zero = 1; 879 semop_completed |= wake_const_ops(sma, i, pt); 880 } 881 } 882 } 883 /* 884 * If one of the modified semaphores got 0, 885 * then check the global queue, too. 886 */ 887 if (got_zero) 888 semop_completed |= wake_const_ops(sma, -1, pt); 889 890 return semop_completed; 891 } 892 893 894 /** 895 * update_queue - look for tasks that can be completed. 896 * @sma: semaphore array. 897 * @semnum: semaphore that was modified. 898 * @pt: list head for the tasks that must be woken up. 899 * 900 * update_queue must be called after a semaphore in a semaphore array 901 * was modified. If multiple semaphores were modified, update_queue must 902 * be called with semnum = -1, as well as with the number of each modified 903 * semaphore. 904 * The tasks that must be woken up are added to @pt. The return code 905 * is stored in q->pid. 906 * The function internally checks if const operations can now succeed. 907 * 908 * The function return 1 if at least one semop was completed successfully. 909 */ 910 static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) 911 { 912 struct sem_queue *q; 913 struct list_head *walk; 914 struct list_head *pending_list; 915 int semop_completed = 0; 916 917 if (semnum == -1) 918 pending_list = &sma->pending_alter; 919 else 920 pending_list = &sma->sem_base[semnum].pending_alter; 921 922 again: 923 walk = pending_list->next; 924 while (walk != pending_list) { 925 int error, restart; 926 927 q = container_of(walk, struct sem_queue, list); 928 walk = walk->next; 929 930 /* If we are scanning the single sop, per-semaphore list of 931 * one semaphore and that semaphore is 0, then it is not 932 * necessary to scan further: simple increments 933 * that affect only one entry succeed immediately and cannot 934 * be in the per semaphore pending queue, and decrements 935 * cannot be successful if the value is already 0. 936 */ 937 if (semnum != -1 && sma->sem_base[semnum].semval == 0) 938 break; 939 940 error = perform_atomic_semop(sma, q); 941 942 /* Does q->sleeper still need to sleep? */ 943 if (error > 0) 944 continue; 945 946 unlink_queue(sma, q); 947 948 if (error) { 949 restart = 0; 950 } else { 951 semop_completed = 1; 952 do_smart_wakeup_zero(sma, q->sops, q->nsops, pt); 953 restart = check_restart(sma, q); 954 } 955 956 wake_up_sem_queue_prepare(pt, q, error); 957 if (restart) 958 goto again; 959 } 960 return semop_completed; 961 } 962 963 /** 964 * set_semotime - set sem_otime 965 * @sma: semaphore array 966 * @sops: operations that modified the array, may be NULL 967 * 968 * sem_otime is replicated to avoid cache line trashing. 969 * This function sets one instance to the current time. 970 */ 971 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 972 { 973 if (sops == NULL) { 974 sma->sem_base[0].sem_otime = get_seconds(); 975 } else { 976 sma->sem_base[sops[0].sem_num].sem_otime = 977 get_seconds(); 978 } 979 } 980 981 /** 982 * do_smart_update - optimized update_queue 983 * @sma: semaphore array 984 * @sops: operations that were performed 985 * @nsops: number of operations 986 * @otime: force setting otime 987 * @pt: list head of the tasks that must be woken up. 988 * 989 * do_smart_update() does the required calls to update_queue and wakeup_zero, 990 * based on the actual changes that were performed on the semaphore array. 991 * Note that the function does not do the actual wake-up: the caller is 992 * responsible for calling wake_up_sem_queue_do(@pt). 993 * It is safe to perform this call after dropping all locks. 994 */ 995 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 996 int otime, struct list_head *pt) 997 { 998 int i; 999 1000 otime |= do_smart_wakeup_zero(sma, sops, nsops, pt); 1001 1002 if (!list_empty(&sma->pending_alter)) { 1003 /* semaphore array uses the global queue - just process it. */ 1004 otime |= update_queue(sma, -1, pt); 1005 } else { 1006 if (!sops) { 1007 /* 1008 * No sops, thus the modified semaphores are not 1009 * known. Check all. 1010 */ 1011 for (i = 0; i < sma->sem_nsems; i++) 1012 otime |= update_queue(sma, i, pt); 1013 } else { 1014 /* 1015 * Check the semaphores that were increased: 1016 * - No complex ops, thus all sleeping ops are 1017 * decrease. 1018 * - if we decreased the value, then any sleeping 1019 * semaphore ops wont be able to run: If the 1020 * previous value was too small, then the new 1021 * value will be too small, too. 1022 */ 1023 for (i = 0; i < nsops; i++) { 1024 if (sops[i].sem_op > 0) { 1025 otime |= update_queue(sma, 1026 sops[i].sem_num, pt); 1027 } 1028 } 1029 } 1030 } 1031 if (otime) 1032 set_semotime(sma, sops); 1033 } 1034 1035 /* 1036 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1037 */ 1038 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1039 bool count_zero) 1040 { 1041 struct sembuf *sop = q->blocking; 1042 1043 /* 1044 * Linux always (since 0.99.10) reported a task as sleeping on all 1045 * semaphores. This violates SUS, therefore it was changed to the 1046 * standard compliant behavior. 1047 * Give the administrators a chance to notice that an application 1048 * might misbehave because it relies on the Linux behavior. 1049 */ 1050 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1051 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1052 current->comm, task_pid_nr(current)); 1053 1054 if (sop->sem_num != semnum) 1055 return 0; 1056 1057 if (count_zero && sop->sem_op == 0) 1058 return 1; 1059 if (!count_zero && sop->sem_op < 0) 1060 return 1; 1061 1062 return 0; 1063 } 1064 1065 /* The following counts are associated to each semaphore: 1066 * semncnt number of tasks waiting on semval being nonzero 1067 * semzcnt number of tasks waiting on semval being zero 1068 * 1069 * Per definition, a task waits only on the semaphore of the first semop 1070 * that cannot proceed, even if additional operation would block, too. 1071 */ 1072 static int count_semcnt(struct sem_array *sma, ushort semnum, 1073 bool count_zero) 1074 { 1075 struct list_head *l; 1076 struct sem_queue *q; 1077 int semcnt; 1078 1079 semcnt = 0; 1080 /* First: check the simple operations. They are easy to evaluate */ 1081 if (count_zero) 1082 l = &sma->sem_base[semnum].pending_const; 1083 else 1084 l = &sma->sem_base[semnum].pending_alter; 1085 1086 list_for_each_entry(q, l, list) { 1087 /* all task on a per-semaphore list sleep on exactly 1088 * that semaphore 1089 */ 1090 semcnt++; 1091 } 1092 1093 /* Then: check the complex operations. */ 1094 list_for_each_entry(q, &sma->pending_alter, list) { 1095 semcnt += check_qop(sma, semnum, q, count_zero); 1096 } 1097 if (count_zero) { 1098 list_for_each_entry(q, &sma->pending_const, list) { 1099 semcnt += check_qop(sma, semnum, q, count_zero); 1100 } 1101 } 1102 return semcnt; 1103 } 1104 1105 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1106 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1107 * remains locked on exit. 1108 */ 1109 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1110 { 1111 struct sem_undo *un, *tu; 1112 struct sem_queue *q, *tq; 1113 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1114 struct list_head tasks; 1115 int i; 1116 1117 /* Free the existing undo structures for this semaphore set. */ 1118 ipc_assert_locked_object(&sma->sem_perm); 1119 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1120 list_del(&un->list_id); 1121 spin_lock(&un->ulp->lock); 1122 un->semid = -1; 1123 list_del_rcu(&un->list_proc); 1124 spin_unlock(&un->ulp->lock); 1125 kfree_rcu(un, rcu); 1126 } 1127 1128 /* Wake up all pending processes and let them fail with EIDRM. */ 1129 INIT_LIST_HEAD(&tasks); 1130 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1131 unlink_queue(sma, q); 1132 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 1133 } 1134 1135 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1136 unlink_queue(sma, q); 1137 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 1138 } 1139 for (i = 0; i < sma->sem_nsems; i++) { 1140 struct sem *sem = sma->sem_base + i; 1141 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1142 unlink_queue(sma, q); 1143 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 1144 } 1145 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1146 unlink_queue(sma, q); 1147 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 1148 } 1149 } 1150 1151 /* Remove the semaphore set from the IDR */ 1152 sem_rmid(ns, sma); 1153 sem_unlock(sma, -1); 1154 rcu_read_unlock(); 1155 1156 wake_up_sem_queue_do(&tasks); 1157 ns->used_sems -= sma->sem_nsems; 1158 ipc_rcu_putref(sma, sem_rcu_free); 1159 } 1160 1161 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1162 { 1163 switch (version) { 1164 case IPC_64: 1165 return copy_to_user(buf, in, sizeof(*in)); 1166 case IPC_OLD: 1167 { 1168 struct semid_ds out; 1169 1170 memset(&out, 0, sizeof(out)); 1171 1172 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1173 1174 out.sem_otime = in->sem_otime; 1175 out.sem_ctime = in->sem_ctime; 1176 out.sem_nsems = in->sem_nsems; 1177 1178 return copy_to_user(buf, &out, sizeof(out)); 1179 } 1180 default: 1181 return -EINVAL; 1182 } 1183 } 1184 1185 static time_t get_semotime(struct sem_array *sma) 1186 { 1187 int i; 1188 time_t res; 1189 1190 res = sma->sem_base[0].sem_otime; 1191 for (i = 1; i < sma->sem_nsems; i++) { 1192 time_t to = sma->sem_base[i].sem_otime; 1193 1194 if (to > res) 1195 res = to; 1196 } 1197 return res; 1198 } 1199 1200 static int semctl_nolock(struct ipc_namespace *ns, int semid, 1201 int cmd, int version, void __user *p) 1202 { 1203 int err; 1204 struct sem_array *sma; 1205 1206 switch (cmd) { 1207 case IPC_INFO: 1208 case SEM_INFO: 1209 { 1210 struct seminfo seminfo; 1211 int max_id; 1212 1213 err = security_sem_semctl(NULL, cmd); 1214 if (err) 1215 return err; 1216 1217 memset(&seminfo, 0, sizeof(seminfo)); 1218 seminfo.semmni = ns->sc_semmni; 1219 seminfo.semmns = ns->sc_semmns; 1220 seminfo.semmsl = ns->sc_semmsl; 1221 seminfo.semopm = ns->sc_semopm; 1222 seminfo.semvmx = SEMVMX; 1223 seminfo.semmnu = SEMMNU; 1224 seminfo.semmap = SEMMAP; 1225 seminfo.semume = SEMUME; 1226 down_read(&sem_ids(ns).rwsem); 1227 if (cmd == SEM_INFO) { 1228 seminfo.semusz = sem_ids(ns).in_use; 1229 seminfo.semaem = ns->used_sems; 1230 } else { 1231 seminfo.semusz = SEMUSZ; 1232 seminfo.semaem = SEMAEM; 1233 } 1234 max_id = ipc_get_maxid(&sem_ids(ns)); 1235 up_read(&sem_ids(ns).rwsem); 1236 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1237 return -EFAULT; 1238 return (max_id < 0) ? 0 : max_id; 1239 } 1240 case IPC_STAT: 1241 case SEM_STAT: 1242 { 1243 struct semid64_ds tbuf; 1244 int id = 0; 1245 1246 memset(&tbuf, 0, sizeof(tbuf)); 1247 1248 rcu_read_lock(); 1249 if (cmd == SEM_STAT) { 1250 sma = sem_obtain_object(ns, semid); 1251 if (IS_ERR(sma)) { 1252 err = PTR_ERR(sma); 1253 goto out_unlock; 1254 } 1255 id = sma->sem_perm.id; 1256 } else { 1257 sma = sem_obtain_object_check(ns, semid); 1258 if (IS_ERR(sma)) { 1259 err = PTR_ERR(sma); 1260 goto out_unlock; 1261 } 1262 } 1263 1264 err = -EACCES; 1265 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1266 goto out_unlock; 1267 1268 err = security_sem_semctl(sma, cmd); 1269 if (err) 1270 goto out_unlock; 1271 1272 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 1273 tbuf.sem_otime = get_semotime(sma); 1274 tbuf.sem_ctime = sma->sem_ctime; 1275 tbuf.sem_nsems = sma->sem_nsems; 1276 rcu_read_unlock(); 1277 if (copy_semid_to_user(p, &tbuf, version)) 1278 return -EFAULT; 1279 return id; 1280 } 1281 default: 1282 return -EINVAL; 1283 } 1284 out_unlock: 1285 rcu_read_unlock(); 1286 return err; 1287 } 1288 1289 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1290 unsigned long arg) 1291 { 1292 struct sem_undo *un; 1293 struct sem_array *sma; 1294 struct sem *curr; 1295 int err; 1296 struct list_head tasks; 1297 int val; 1298 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1299 /* big-endian 64bit */ 1300 val = arg >> 32; 1301 #else 1302 /* 32bit or little-endian 64bit */ 1303 val = arg; 1304 #endif 1305 1306 if (val > SEMVMX || val < 0) 1307 return -ERANGE; 1308 1309 INIT_LIST_HEAD(&tasks); 1310 1311 rcu_read_lock(); 1312 sma = sem_obtain_object_check(ns, semid); 1313 if (IS_ERR(sma)) { 1314 rcu_read_unlock(); 1315 return PTR_ERR(sma); 1316 } 1317 1318 if (semnum < 0 || semnum >= sma->sem_nsems) { 1319 rcu_read_unlock(); 1320 return -EINVAL; 1321 } 1322 1323 1324 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1325 rcu_read_unlock(); 1326 return -EACCES; 1327 } 1328 1329 err = security_sem_semctl(sma, SETVAL); 1330 if (err) { 1331 rcu_read_unlock(); 1332 return -EACCES; 1333 } 1334 1335 sem_lock(sma, NULL, -1); 1336 1337 if (!ipc_valid_object(&sma->sem_perm)) { 1338 sem_unlock(sma, -1); 1339 rcu_read_unlock(); 1340 return -EIDRM; 1341 } 1342 1343 curr = &sma->sem_base[semnum]; 1344 1345 ipc_assert_locked_object(&sma->sem_perm); 1346 list_for_each_entry(un, &sma->list_id, list_id) 1347 un->semadj[semnum] = 0; 1348 1349 curr->semval = val; 1350 curr->sempid = task_tgid_vnr(current); 1351 sma->sem_ctime = get_seconds(); 1352 /* maybe some queued-up processes were waiting for this */ 1353 do_smart_update(sma, NULL, 0, 0, &tasks); 1354 sem_unlock(sma, -1); 1355 rcu_read_unlock(); 1356 wake_up_sem_queue_do(&tasks); 1357 return 0; 1358 } 1359 1360 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1361 int cmd, void __user *p) 1362 { 1363 struct sem_array *sma; 1364 struct sem *curr; 1365 int err, nsems; 1366 ushort fast_sem_io[SEMMSL_FAST]; 1367 ushort *sem_io = fast_sem_io; 1368 struct list_head tasks; 1369 1370 INIT_LIST_HEAD(&tasks); 1371 1372 rcu_read_lock(); 1373 sma = sem_obtain_object_check(ns, semid); 1374 if (IS_ERR(sma)) { 1375 rcu_read_unlock(); 1376 return PTR_ERR(sma); 1377 } 1378 1379 nsems = sma->sem_nsems; 1380 1381 err = -EACCES; 1382 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1383 goto out_rcu_wakeup; 1384 1385 err = security_sem_semctl(sma, cmd); 1386 if (err) 1387 goto out_rcu_wakeup; 1388 1389 err = -EACCES; 1390 switch (cmd) { 1391 case GETALL: 1392 { 1393 ushort __user *array = p; 1394 int i; 1395 1396 sem_lock(sma, NULL, -1); 1397 if (!ipc_valid_object(&sma->sem_perm)) { 1398 err = -EIDRM; 1399 goto out_unlock; 1400 } 1401 if (nsems > SEMMSL_FAST) { 1402 if (!ipc_rcu_getref(sma)) { 1403 err = -EIDRM; 1404 goto out_unlock; 1405 } 1406 sem_unlock(sma, -1); 1407 rcu_read_unlock(); 1408 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1409 if (sem_io == NULL) { 1410 ipc_rcu_putref(sma, sem_rcu_free); 1411 return -ENOMEM; 1412 } 1413 1414 rcu_read_lock(); 1415 sem_lock_and_putref(sma); 1416 if (!ipc_valid_object(&sma->sem_perm)) { 1417 err = -EIDRM; 1418 goto out_unlock; 1419 } 1420 } 1421 for (i = 0; i < sma->sem_nsems; i++) 1422 sem_io[i] = sma->sem_base[i].semval; 1423 sem_unlock(sma, -1); 1424 rcu_read_unlock(); 1425 err = 0; 1426 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1427 err = -EFAULT; 1428 goto out_free; 1429 } 1430 case SETALL: 1431 { 1432 int i; 1433 struct sem_undo *un; 1434 1435 if (!ipc_rcu_getref(sma)) { 1436 err = -EIDRM; 1437 goto out_rcu_wakeup; 1438 } 1439 rcu_read_unlock(); 1440 1441 if (nsems > SEMMSL_FAST) { 1442 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1443 if (sem_io == NULL) { 1444 ipc_rcu_putref(sma, sem_rcu_free); 1445 return -ENOMEM; 1446 } 1447 } 1448 1449 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1450 ipc_rcu_putref(sma, sem_rcu_free); 1451 err = -EFAULT; 1452 goto out_free; 1453 } 1454 1455 for (i = 0; i < nsems; i++) { 1456 if (sem_io[i] > SEMVMX) { 1457 ipc_rcu_putref(sma, sem_rcu_free); 1458 err = -ERANGE; 1459 goto out_free; 1460 } 1461 } 1462 rcu_read_lock(); 1463 sem_lock_and_putref(sma); 1464 if (!ipc_valid_object(&sma->sem_perm)) { 1465 err = -EIDRM; 1466 goto out_unlock; 1467 } 1468 1469 for (i = 0; i < nsems; i++) { 1470 sma->sem_base[i].semval = sem_io[i]; 1471 sma->sem_base[i].sempid = task_tgid_vnr(current); 1472 } 1473 1474 ipc_assert_locked_object(&sma->sem_perm); 1475 list_for_each_entry(un, &sma->list_id, list_id) { 1476 for (i = 0; i < nsems; i++) 1477 un->semadj[i] = 0; 1478 } 1479 sma->sem_ctime = get_seconds(); 1480 /* maybe some queued-up processes were waiting for this */ 1481 do_smart_update(sma, NULL, 0, 0, &tasks); 1482 err = 0; 1483 goto out_unlock; 1484 } 1485 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1486 } 1487 err = -EINVAL; 1488 if (semnum < 0 || semnum >= nsems) 1489 goto out_rcu_wakeup; 1490 1491 sem_lock(sma, NULL, -1); 1492 if (!ipc_valid_object(&sma->sem_perm)) { 1493 err = -EIDRM; 1494 goto out_unlock; 1495 } 1496 curr = &sma->sem_base[semnum]; 1497 1498 switch (cmd) { 1499 case GETVAL: 1500 err = curr->semval; 1501 goto out_unlock; 1502 case GETPID: 1503 err = curr->sempid; 1504 goto out_unlock; 1505 case GETNCNT: 1506 err = count_semcnt(sma, semnum, 0); 1507 goto out_unlock; 1508 case GETZCNT: 1509 err = count_semcnt(sma, semnum, 1); 1510 goto out_unlock; 1511 } 1512 1513 out_unlock: 1514 sem_unlock(sma, -1); 1515 out_rcu_wakeup: 1516 rcu_read_unlock(); 1517 wake_up_sem_queue_do(&tasks); 1518 out_free: 1519 if (sem_io != fast_sem_io) 1520 ipc_free(sem_io); 1521 return err; 1522 } 1523 1524 static inline unsigned long 1525 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1526 { 1527 switch (version) { 1528 case IPC_64: 1529 if (copy_from_user(out, buf, sizeof(*out))) 1530 return -EFAULT; 1531 return 0; 1532 case IPC_OLD: 1533 { 1534 struct semid_ds tbuf_old; 1535 1536 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1537 return -EFAULT; 1538 1539 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1540 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1541 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1542 1543 return 0; 1544 } 1545 default: 1546 return -EINVAL; 1547 } 1548 } 1549 1550 /* 1551 * This function handles some semctl commands which require the rwsem 1552 * to be held in write mode. 1553 * NOTE: no locks must be held, the rwsem is taken inside this function. 1554 */ 1555 static int semctl_down(struct ipc_namespace *ns, int semid, 1556 int cmd, int version, void __user *p) 1557 { 1558 struct sem_array *sma; 1559 int err; 1560 struct semid64_ds semid64; 1561 struct kern_ipc_perm *ipcp; 1562 1563 if (cmd == IPC_SET) { 1564 if (copy_semid_from_user(&semid64, p, version)) 1565 return -EFAULT; 1566 } 1567 1568 down_write(&sem_ids(ns).rwsem); 1569 rcu_read_lock(); 1570 1571 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1572 &semid64.sem_perm, 0); 1573 if (IS_ERR(ipcp)) { 1574 err = PTR_ERR(ipcp); 1575 goto out_unlock1; 1576 } 1577 1578 sma = container_of(ipcp, struct sem_array, sem_perm); 1579 1580 err = security_sem_semctl(sma, cmd); 1581 if (err) 1582 goto out_unlock1; 1583 1584 switch (cmd) { 1585 case IPC_RMID: 1586 sem_lock(sma, NULL, -1); 1587 /* freeary unlocks the ipc object and rcu */ 1588 freeary(ns, ipcp); 1589 goto out_up; 1590 case IPC_SET: 1591 sem_lock(sma, NULL, -1); 1592 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1593 if (err) 1594 goto out_unlock0; 1595 sma->sem_ctime = get_seconds(); 1596 break; 1597 default: 1598 err = -EINVAL; 1599 goto out_unlock1; 1600 } 1601 1602 out_unlock0: 1603 sem_unlock(sma, -1); 1604 out_unlock1: 1605 rcu_read_unlock(); 1606 out_up: 1607 up_write(&sem_ids(ns).rwsem); 1608 return err; 1609 } 1610 1611 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1612 { 1613 int version; 1614 struct ipc_namespace *ns; 1615 void __user *p = (void __user *)arg; 1616 1617 if (semid < 0) 1618 return -EINVAL; 1619 1620 version = ipc_parse_version(&cmd); 1621 ns = current->nsproxy->ipc_ns; 1622 1623 switch (cmd) { 1624 case IPC_INFO: 1625 case SEM_INFO: 1626 case IPC_STAT: 1627 case SEM_STAT: 1628 return semctl_nolock(ns, semid, cmd, version, p); 1629 case GETALL: 1630 case GETVAL: 1631 case GETPID: 1632 case GETNCNT: 1633 case GETZCNT: 1634 case SETALL: 1635 return semctl_main(ns, semid, semnum, cmd, p); 1636 case SETVAL: 1637 return semctl_setval(ns, semid, semnum, arg); 1638 case IPC_RMID: 1639 case IPC_SET: 1640 return semctl_down(ns, semid, cmd, version, p); 1641 default: 1642 return -EINVAL; 1643 } 1644 } 1645 1646 /* If the task doesn't already have a undo_list, then allocate one 1647 * here. We guarantee there is only one thread using this undo list, 1648 * and current is THE ONE 1649 * 1650 * If this allocation and assignment succeeds, but later 1651 * portions of this code fail, there is no need to free the sem_undo_list. 1652 * Just let it stay associated with the task, and it'll be freed later 1653 * at exit time. 1654 * 1655 * This can block, so callers must hold no locks. 1656 */ 1657 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1658 { 1659 struct sem_undo_list *undo_list; 1660 1661 undo_list = current->sysvsem.undo_list; 1662 if (!undo_list) { 1663 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1664 if (undo_list == NULL) 1665 return -ENOMEM; 1666 spin_lock_init(&undo_list->lock); 1667 atomic_set(&undo_list->refcnt, 1); 1668 INIT_LIST_HEAD(&undo_list->list_proc); 1669 1670 current->sysvsem.undo_list = undo_list; 1671 } 1672 *undo_listp = undo_list; 1673 return 0; 1674 } 1675 1676 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1677 { 1678 struct sem_undo *un; 1679 1680 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1681 if (un->semid == semid) 1682 return un; 1683 } 1684 return NULL; 1685 } 1686 1687 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1688 { 1689 struct sem_undo *un; 1690 1691 assert_spin_locked(&ulp->lock); 1692 1693 un = __lookup_undo(ulp, semid); 1694 if (un) { 1695 list_del_rcu(&un->list_proc); 1696 list_add_rcu(&un->list_proc, &ulp->list_proc); 1697 } 1698 return un; 1699 } 1700 1701 /** 1702 * find_alloc_undo - lookup (and if not present create) undo array 1703 * @ns: namespace 1704 * @semid: semaphore array id 1705 * 1706 * The function looks up (and if not present creates) the undo structure. 1707 * The size of the undo structure depends on the size of the semaphore 1708 * array, thus the alloc path is not that straightforward. 1709 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1710 * performs a rcu_read_lock(). 1711 */ 1712 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1713 { 1714 struct sem_array *sma; 1715 struct sem_undo_list *ulp; 1716 struct sem_undo *un, *new; 1717 int nsems, error; 1718 1719 error = get_undo_list(&ulp); 1720 if (error) 1721 return ERR_PTR(error); 1722 1723 rcu_read_lock(); 1724 spin_lock(&ulp->lock); 1725 un = lookup_undo(ulp, semid); 1726 spin_unlock(&ulp->lock); 1727 if (likely(un != NULL)) 1728 goto out; 1729 1730 /* no undo structure around - allocate one. */ 1731 /* step 1: figure out the size of the semaphore array */ 1732 sma = sem_obtain_object_check(ns, semid); 1733 if (IS_ERR(sma)) { 1734 rcu_read_unlock(); 1735 return ERR_CAST(sma); 1736 } 1737 1738 nsems = sma->sem_nsems; 1739 if (!ipc_rcu_getref(sma)) { 1740 rcu_read_unlock(); 1741 un = ERR_PTR(-EIDRM); 1742 goto out; 1743 } 1744 rcu_read_unlock(); 1745 1746 /* step 2: allocate new undo structure */ 1747 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1748 if (!new) { 1749 ipc_rcu_putref(sma, sem_rcu_free); 1750 return ERR_PTR(-ENOMEM); 1751 } 1752 1753 /* step 3: Acquire the lock on semaphore array */ 1754 rcu_read_lock(); 1755 sem_lock_and_putref(sma); 1756 if (!ipc_valid_object(&sma->sem_perm)) { 1757 sem_unlock(sma, -1); 1758 rcu_read_unlock(); 1759 kfree(new); 1760 un = ERR_PTR(-EIDRM); 1761 goto out; 1762 } 1763 spin_lock(&ulp->lock); 1764 1765 /* 1766 * step 4: check for races: did someone else allocate the undo struct? 1767 */ 1768 un = lookup_undo(ulp, semid); 1769 if (un) { 1770 kfree(new); 1771 goto success; 1772 } 1773 /* step 5: initialize & link new undo structure */ 1774 new->semadj = (short *) &new[1]; 1775 new->ulp = ulp; 1776 new->semid = semid; 1777 assert_spin_locked(&ulp->lock); 1778 list_add_rcu(&new->list_proc, &ulp->list_proc); 1779 ipc_assert_locked_object(&sma->sem_perm); 1780 list_add(&new->list_id, &sma->list_id); 1781 un = new; 1782 1783 success: 1784 spin_unlock(&ulp->lock); 1785 sem_unlock(sma, -1); 1786 out: 1787 return un; 1788 } 1789 1790 1791 /** 1792 * get_queue_result - retrieve the result code from sem_queue 1793 * @q: Pointer to queue structure 1794 * 1795 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in 1796 * q->status, then we must loop until the value is replaced with the final 1797 * value: This may happen if a task is woken up by an unrelated event (e.g. 1798 * signal) and in parallel the task is woken up by another task because it got 1799 * the requested semaphores. 1800 * 1801 * The function can be called with or without holding the semaphore spinlock. 1802 */ 1803 static int get_queue_result(struct sem_queue *q) 1804 { 1805 int error; 1806 1807 error = q->status; 1808 while (unlikely(error == IN_WAKEUP)) { 1809 cpu_relax(); 1810 error = q->status; 1811 } 1812 1813 return error; 1814 } 1815 1816 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 1817 unsigned, nsops, const struct timespec __user *, timeout) 1818 { 1819 int error = -EINVAL; 1820 struct sem_array *sma; 1821 struct sembuf fast_sops[SEMOPM_FAST]; 1822 struct sembuf *sops = fast_sops, *sop; 1823 struct sem_undo *un; 1824 int undos = 0, alter = 0, max, locknum; 1825 struct sem_queue queue; 1826 unsigned long jiffies_left = 0; 1827 struct ipc_namespace *ns; 1828 struct list_head tasks; 1829 1830 ns = current->nsproxy->ipc_ns; 1831 1832 if (nsops < 1 || semid < 0) 1833 return -EINVAL; 1834 if (nsops > ns->sc_semopm) 1835 return -E2BIG; 1836 if (nsops > SEMOPM_FAST) { 1837 sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1838 if (sops == NULL) 1839 return -ENOMEM; 1840 } 1841 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1842 error = -EFAULT; 1843 goto out_free; 1844 } 1845 if (timeout) { 1846 struct timespec _timeout; 1847 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1848 error = -EFAULT; 1849 goto out_free; 1850 } 1851 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1852 _timeout.tv_nsec >= 1000000000L) { 1853 error = -EINVAL; 1854 goto out_free; 1855 } 1856 jiffies_left = timespec_to_jiffies(&_timeout); 1857 } 1858 max = 0; 1859 for (sop = sops; sop < sops + nsops; sop++) { 1860 if (sop->sem_num >= max) 1861 max = sop->sem_num; 1862 if (sop->sem_flg & SEM_UNDO) 1863 undos = 1; 1864 if (sop->sem_op != 0) 1865 alter = 1; 1866 } 1867 1868 INIT_LIST_HEAD(&tasks); 1869 1870 if (undos) { 1871 /* On success, find_alloc_undo takes the rcu_read_lock */ 1872 un = find_alloc_undo(ns, semid); 1873 if (IS_ERR(un)) { 1874 error = PTR_ERR(un); 1875 goto out_free; 1876 } 1877 } else { 1878 un = NULL; 1879 rcu_read_lock(); 1880 } 1881 1882 sma = sem_obtain_object_check(ns, semid); 1883 if (IS_ERR(sma)) { 1884 rcu_read_unlock(); 1885 error = PTR_ERR(sma); 1886 goto out_free; 1887 } 1888 1889 error = -EFBIG; 1890 if (max >= sma->sem_nsems) 1891 goto out_rcu_wakeup; 1892 1893 error = -EACCES; 1894 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1895 goto out_rcu_wakeup; 1896 1897 error = security_sem_semop(sma, sops, nsops, alter); 1898 if (error) 1899 goto out_rcu_wakeup; 1900 1901 error = -EIDRM; 1902 locknum = sem_lock(sma, sops, nsops); 1903 /* 1904 * We eventually might perform the following check in a lockless 1905 * fashion, considering ipc_valid_object() locking constraints. 1906 * If nsops == 1 and there is no contention for sem_perm.lock, then 1907 * only a per-semaphore lock is held and it's OK to proceed with the 1908 * check below. More details on the fine grained locking scheme 1909 * entangled here and why it's RMID race safe on comments at sem_lock() 1910 */ 1911 if (!ipc_valid_object(&sma->sem_perm)) 1912 goto out_unlock_free; 1913 /* 1914 * semid identifiers are not unique - find_alloc_undo may have 1915 * allocated an undo structure, it was invalidated by an RMID 1916 * and now a new array with received the same id. Check and fail. 1917 * This case can be detected checking un->semid. The existence of 1918 * "un" itself is guaranteed by rcu. 1919 */ 1920 if (un && un->semid == -1) 1921 goto out_unlock_free; 1922 1923 queue.sops = sops; 1924 queue.nsops = nsops; 1925 queue.undo = un; 1926 queue.pid = task_tgid_vnr(current); 1927 queue.alter = alter; 1928 1929 error = perform_atomic_semop(sma, &queue); 1930 if (error == 0) { 1931 /* If the operation was successful, then do 1932 * the required updates. 1933 */ 1934 if (alter) 1935 do_smart_update(sma, sops, nsops, 1, &tasks); 1936 else 1937 set_semotime(sma, sops); 1938 } 1939 if (error <= 0) 1940 goto out_unlock_free; 1941 1942 /* We need to sleep on this operation, so we put the current 1943 * task into the pending queue and go to sleep. 1944 */ 1945 1946 if (nsops == 1) { 1947 struct sem *curr; 1948 curr = &sma->sem_base[sops->sem_num]; 1949 1950 if (alter) { 1951 if (sma->complex_count) { 1952 list_add_tail(&queue.list, 1953 &sma->pending_alter); 1954 } else { 1955 1956 list_add_tail(&queue.list, 1957 &curr->pending_alter); 1958 } 1959 } else { 1960 list_add_tail(&queue.list, &curr->pending_const); 1961 } 1962 } else { 1963 if (!sma->complex_count) 1964 merge_queues(sma); 1965 1966 if (alter) 1967 list_add_tail(&queue.list, &sma->pending_alter); 1968 else 1969 list_add_tail(&queue.list, &sma->pending_const); 1970 1971 sma->complex_count++; 1972 } 1973 1974 queue.status = -EINTR; 1975 queue.sleeper = current; 1976 1977 sleep_again: 1978 __set_current_state(TASK_INTERRUPTIBLE); 1979 sem_unlock(sma, locknum); 1980 rcu_read_unlock(); 1981 1982 if (timeout) 1983 jiffies_left = schedule_timeout(jiffies_left); 1984 else 1985 schedule(); 1986 1987 error = get_queue_result(&queue); 1988 1989 if (error != -EINTR) { 1990 /* fast path: update_queue already obtained all requested 1991 * resources. 1992 * Perform a smp_mb(): User space could assume that semop() 1993 * is a memory barrier: Without the mb(), the cpu could 1994 * speculatively read in user space stale data that was 1995 * overwritten by the previous owner of the semaphore. 1996 */ 1997 smp_mb(); 1998 1999 goto out_free; 2000 } 2001 2002 rcu_read_lock(); 2003 sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); 2004 2005 /* 2006 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. 2007 */ 2008 error = get_queue_result(&queue); 2009 2010 /* 2011 * Array removed? If yes, leave without sem_unlock(). 2012 */ 2013 if (IS_ERR(sma)) { 2014 rcu_read_unlock(); 2015 goto out_free; 2016 } 2017 2018 2019 /* 2020 * If queue.status != -EINTR we are woken up by another process. 2021 * Leave without unlink_queue(), but with sem_unlock(). 2022 */ 2023 if (error != -EINTR) 2024 goto out_unlock_free; 2025 2026 /* 2027 * If an interrupt occurred we have to clean up the queue 2028 */ 2029 if (timeout && jiffies_left == 0) 2030 error = -EAGAIN; 2031 2032 /* 2033 * If the wakeup was spurious, just retry 2034 */ 2035 if (error == -EINTR && !signal_pending(current)) 2036 goto sleep_again; 2037 2038 unlink_queue(sma, &queue); 2039 2040 out_unlock_free: 2041 sem_unlock(sma, locknum); 2042 out_rcu_wakeup: 2043 rcu_read_unlock(); 2044 wake_up_sem_queue_do(&tasks); 2045 out_free: 2046 if (sops != fast_sops) 2047 kfree(sops); 2048 return error; 2049 } 2050 2051 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2052 unsigned, nsops) 2053 { 2054 return sys_semtimedop(semid, tsops, nsops, NULL); 2055 } 2056 2057 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2058 * parent and child tasks. 2059 */ 2060 2061 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2062 { 2063 struct sem_undo_list *undo_list; 2064 int error; 2065 2066 if (clone_flags & CLONE_SYSVSEM) { 2067 error = get_undo_list(&undo_list); 2068 if (error) 2069 return error; 2070 atomic_inc(&undo_list->refcnt); 2071 tsk->sysvsem.undo_list = undo_list; 2072 } else 2073 tsk->sysvsem.undo_list = NULL; 2074 2075 return 0; 2076 } 2077 2078 /* 2079 * add semadj values to semaphores, free undo structures. 2080 * undo structures are not freed when semaphore arrays are destroyed 2081 * so some of them may be out of date. 2082 * IMPLEMENTATION NOTE: There is some confusion over whether the 2083 * set of adjustments that needs to be done should be done in an atomic 2084 * manner or not. That is, if we are attempting to decrement the semval 2085 * should we queue up and wait until we can do so legally? 2086 * The original implementation attempted to do this (queue and wait). 2087 * The current implementation does not do so. The POSIX standard 2088 * and SVID should be consulted to determine what behavior is mandated. 2089 */ 2090 void exit_sem(struct task_struct *tsk) 2091 { 2092 struct sem_undo_list *ulp; 2093 2094 ulp = tsk->sysvsem.undo_list; 2095 if (!ulp) 2096 return; 2097 tsk->sysvsem.undo_list = NULL; 2098 2099 if (!atomic_dec_and_test(&ulp->refcnt)) 2100 return; 2101 2102 for (;;) { 2103 struct sem_array *sma; 2104 struct sem_undo *un; 2105 struct list_head tasks; 2106 int semid, i; 2107 2108 cond_resched(); 2109 2110 rcu_read_lock(); 2111 un = list_entry_rcu(ulp->list_proc.next, 2112 struct sem_undo, list_proc); 2113 if (&un->list_proc == &ulp->list_proc) { 2114 /* 2115 * We must wait for freeary() before freeing this ulp, 2116 * in case we raced with last sem_undo. There is a small 2117 * possibility where we exit while freeary() didn't 2118 * finish unlocking sem_undo_list. 2119 */ 2120 spin_unlock_wait(&ulp->lock); 2121 rcu_read_unlock(); 2122 break; 2123 } 2124 spin_lock(&ulp->lock); 2125 semid = un->semid; 2126 spin_unlock(&ulp->lock); 2127 2128 /* exit_sem raced with IPC_RMID, nothing to do */ 2129 if (semid == -1) { 2130 rcu_read_unlock(); 2131 continue; 2132 } 2133 2134 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2135 /* exit_sem raced with IPC_RMID, nothing to do */ 2136 if (IS_ERR(sma)) { 2137 rcu_read_unlock(); 2138 continue; 2139 } 2140 2141 sem_lock(sma, NULL, -1); 2142 /* exit_sem raced with IPC_RMID, nothing to do */ 2143 if (!ipc_valid_object(&sma->sem_perm)) { 2144 sem_unlock(sma, -1); 2145 rcu_read_unlock(); 2146 continue; 2147 } 2148 un = __lookup_undo(ulp, semid); 2149 if (un == NULL) { 2150 /* exit_sem raced with IPC_RMID+semget() that created 2151 * exactly the same semid. Nothing to do. 2152 */ 2153 sem_unlock(sma, -1); 2154 rcu_read_unlock(); 2155 continue; 2156 } 2157 2158 /* remove un from the linked lists */ 2159 ipc_assert_locked_object(&sma->sem_perm); 2160 list_del(&un->list_id); 2161 2162 /* we are the last process using this ulp, acquiring ulp->lock 2163 * isn't required. Besides that, we are also protected against 2164 * IPC_RMID as we hold sma->sem_perm lock now 2165 */ 2166 list_del_rcu(&un->list_proc); 2167 2168 /* perform adjustments registered in un */ 2169 for (i = 0; i < sma->sem_nsems; i++) { 2170 struct sem *semaphore = &sma->sem_base[i]; 2171 if (un->semadj[i]) { 2172 semaphore->semval += un->semadj[i]; 2173 /* 2174 * Range checks of the new semaphore value, 2175 * not defined by sus: 2176 * - Some unices ignore the undo entirely 2177 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2178 * - some cap the value (e.g. FreeBSD caps 2179 * at 0, but doesn't enforce SEMVMX) 2180 * 2181 * Linux caps the semaphore value, both at 0 2182 * and at SEMVMX. 2183 * 2184 * Manfred <manfred@colorfullife.com> 2185 */ 2186 if (semaphore->semval < 0) 2187 semaphore->semval = 0; 2188 if (semaphore->semval > SEMVMX) 2189 semaphore->semval = SEMVMX; 2190 semaphore->sempid = task_tgid_vnr(current); 2191 } 2192 } 2193 /* maybe some queued-up processes were waiting for this */ 2194 INIT_LIST_HEAD(&tasks); 2195 do_smart_update(sma, NULL, 0, 1, &tasks); 2196 sem_unlock(sma, -1); 2197 rcu_read_unlock(); 2198 wake_up_sem_queue_do(&tasks); 2199 2200 kfree_rcu(un, rcu); 2201 } 2202 kfree(ulp); 2203 } 2204 2205 #ifdef CONFIG_PROC_FS 2206 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2207 { 2208 struct user_namespace *user_ns = seq_user_ns(s); 2209 struct sem_array *sma = it; 2210 time_t sem_otime; 2211 2212 /* 2213 * The proc interface isn't aware of sem_lock(), it calls 2214 * ipc_lock_object() directly (in sysvipc_find_ipc). 2215 * In order to stay compatible with sem_lock(), we must 2216 * enter / leave complex_mode. 2217 */ 2218 complexmode_enter(sma); 2219 2220 sem_otime = get_semotime(sma); 2221 2222 seq_printf(s, 2223 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 2224 sma->sem_perm.key, 2225 sma->sem_perm.id, 2226 sma->sem_perm.mode, 2227 sma->sem_nsems, 2228 from_kuid_munged(user_ns, sma->sem_perm.uid), 2229 from_kgid_munged(user_ns, sma->sem_perm.gid), 2230 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2231 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2232 sem_otime, 2233 sma->sem_ctime); 2234 2235 complexmode_tryleave(sma); 2236 2237 return 0; 2238 } 2239 #endif 2240