1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 7 * 8 * SMP-threaded, sysctl's added 9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 10 * Enforced range limit on SEM_UNDO 11 * (c) 2001 Red Hat Inc 12 * Lockless wakeup 13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 14 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net> 15 * Further wakeup optimizations, documentation 16 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 17 * 18 * support for audit of ipc object properties and permission changes 19 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 20 * 21 * namespaces support 22 * OpenVZ, SWsoft Inc. 23 * Pavel Emelianov <xemul@openvz.org> 24 * 25 * Implementation notes: (May 2010) 26 * This file implements System V semaphores. 27 * 28 * User space visible behavior: 29 * - FIFO ordering for semop() operations (just FIFO, not starvation 30 * protection) 31 * - multiple semaphore operations that alter the same semaphore in 32 * one semop() are handled. 33 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 34 * SETALL calls. 35 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 36 * - undo adjustments at process exit are limited to 0..SEMVMX. 37 * - namespace are supported. 38 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 39 * to /proc/sys/kernel/sem. 40 * - statistics about the usage are reported in /proc/sysvipc/sem. 41 * 42 * Internals: 43 * - scalability: 44 * - all global variables are read-mostly. 45 * - semop() calls and semctl(RMID) are synchronized by RCU. 46 * - most operations do write operations (actually: spin_lock calls) to 47 * the per-semaphore array structure. 48 * Thus: Perfect SMP scaling between independent semaphore arrays. 49 * If multiple semaphores in one array are used, then cache line 50 * trashing on the semaphore array spinlock will limit the scaling. 51 * - semncnt and semzcnt are calculated on demand in count_semcnt() 52 * - the task that performs a successful semop() scans the list of all 53 * sleeping tasks and completes any pending operations that can be fulfilled. 54 * Semaphores are actively given to waiting tasks (necessary for FIFO). 55 * (see update_queue()) 56 * - To improve the scalability, the actual wake-up calls are performed after 57 * dropping all locks. (see wake_up_sem_queue_prepare()) 58 * - All work is done by the waker, the woken up task does not have to do 59 * anything - not even acquiring a lock or dropping a refcount. 60 * - A woken up task may not even touch the semaphore array anymore, it may 61 * have been destroyed already by a semctl(RMID). 62 * - UNDO values are stored in an array (one per process and per 63 * semaphore array, lazily allocated). For backwards compatibility, multiple 64 * modes for the UNDO variables are supported (per process, per thread) 65 * (see copy_semundo, CLONE_SYSVSEM) 66 * - There are two lists of the pending operations: a per-array list 67 * and per-semaphore list (stored in the array). This allows to achieve FIFO 68 * ordering without always scanning all pending operations. 69 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 70 */ 71 72 #include <linux/slab.h> 73 #include <linux/spinlock.h> 74 #include <linux/init.h> 75 #include <linux/proc_fs.h> 76 #include <linux/time.h> 77 #include <linux/security.h> 78 #include <linux/syscalls.h> 79 #include <linux/audit.h> 80 #include <linux/capability.h> 81 #include <linux/seq_file.h> 82 #include <linux/rwsem.h> 83 #include <linux/nsproxy.h> 84 #include <linux/ipc_namespace.h> 85 86 #include <linux/uaccess.h> 87 #include "util.h" 88 89 /* One semaphore structure for each semaphore in the system. */ 90 struct sem { 91 int semval; /* current value */ 92 /* 93 * PID of the process that last modified the semaphore. For 94 * Linux, specifically these are: 95 * - semop 96 * - semctl, via SETVAL and SETALL. 97 * - at task exit when performing undo adjustments (see exit_sem). 98 */ 99 int sempid; 100 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 101 struct list_head pending_alter; /* pending single-sop operations */ 102 /* that alter the semaphore */ 103 struct list_head pending_const; /* pending single-sop operations */ 104 /* that do not alter the semaphore*/ 105 time_t sem_otime; /* candidate for sem_otime */ 106 } ____cacheline_aligned_in_smp; 107 108 /* One queue for each sleeping process in the system. */ 109 struct sem_queue { 110 struct list_head list; /* queue of pending operations */ 111 struct task_struct *sleeper; /* this process */ 112 struct sem_undo *undo; /* undo structure */ 113 int pid; /* process id of requesting process */ 114 int status; /* completion status of operation */ 115 struct sembuf *sops; /* array of pending operations */ 116 struct sembuf *blocking; /* the operation that blocked */ 117 int nsops; /* number of operations */ 118 bool alter; /* does *sops alter the array? */ 119 bool dupsop; /* sops on more than one sem_num */ 120 }; 121 122 /* Each task has a list of undo requests. They are executed automatically 123 * when the process exits. 124 */ 125 struct sem_undo { 126 struct list_head list_proc; /* per-process list: * 127 * all undos from one process 128 * rcu protected */ 129 struct rcu_head rcu; /* rcu struct for sem_undo */ 130 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 131 struct list_head list_id; /* per semaphore array list: 132 * all undos for one array */ 133 int semid; /* semaphore set identifier */ 134 short *semadj; /* array of adjustments */ 135 /* one per semaphore */ 136 }; 137 138 /* sem_undo_list controls shared access to the list of sem_undo structures 139 * that may be shared among all a CLONE_SYSVSEM task group. 140 */ 141 struct sem_undo_list { 142 atomic_t refcnt; 143 spinlock_t lock; 144 struct list_head list_proc; 145 }; 146 147 148 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 149 150 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 151 152 static int newary(struct ipc_namespace *, struct ipc_params *); 153 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 154 #ifdef CONFIG_PROC_FS 155 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 156 #endif 157 158 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 159 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 160 161 /* 162 * Locking: 163 * a) global sem_lock() for read/write 164 * sem_undo.id_next, 165 * sem_array.complex_count, 166 * sem_array.complex_mode 167 * sem_array.pending{_alter,_const}, 168 * sem_array.sem_undo 169 * 170 * b) global or semaphore sem_lock() for read/write: 171 * sem_array.sem_base[i].pending_{const,alter}: 172 * sem_array.complex_mode (for read) 173 * 174 * c) special: 175 * sem_undo_list.list_proc: 176 * * undo_list->lock for write 177 * * rcu for read 178 */ 179 180 #define sc_semmsl sem_ctls[0] 181 #define sc_semmns sem_ctls[1] 182 #define sc_semopm sem_ctls[2] 183 #define sc_semmni sem_ctls[3] 184 185 void sem_init_ns(struct ipc_namespace *ns) 186 { 187 ns->sc_semmsl = SEMMSL; 188 ns->sc_semmns = SEMMNS; 189 ns->sc_semopm = SEMOPM; 190 ns->sc_semmni = SEMMNI; 191 ns->used_sems = 0; 192 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 193 } 194 195 #ifdef CONFIG_IPC_NS 196 void sem_exit_ns(struct ipc_namespace *ns) 197 { 198 free_ipcs(ns, &sem_ids(ns), freeary); 199 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 200 } 201 #endif 202 203 void __init sem_init(void) 204 { 205 sem_init_ns(&init_ipc_ns); 206 ipc_init_proc_interface("sysvipc/sem", 207 " key semid perms nsems uid gid cuid cgid otime ctime\n", 208 IPC_SEM_IDS, sysvipc_sem_proc_show); 209 } 210 211 /** 212 * unmerge_queues - unmerge queues, if possible. 213 * @sma: semaphore array 214 * 215 * The function unmerges the wait queues if complex_count is 0. 216 * It must be called prior to dropping the global semaphore array lock. 217 */ 218 static void unmerge_queues(struct sem_array *sma) 219 { 220 struct sem_queue *q, *tq; 221 222 /* complex operations still around? */ 223 if (sma->complex_count) 224 return; 225 /* 226 * We will switch back to simple mode. 227 * Move all pending operation back into the per-semaphore 228 * queues. 229 */ 230 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 231 struct sem *curr; 232 curr = &sma->sem_base[q->sops[0].sem_num]; 233 234 list_add_tail(&q->list, &curr->pending_alter); 235 } 236 INIT_LIST_HEAD(&sma->pending_alter); 237 } 238 239 /** 240 * merge_queues - merge single semop queues into global queue 241 * @sma: semaphore array 242 * 243 * This function merges all per-semaphore queues into the global queue. 244 * It is necessary to achieve FIFO ordering for the pending single-sop 245 * operations when a multi-semop operation must sleep. 246 * Only the alter operations must be moved, the const operations can stay. 247 */ 248 static void merge_queues(struct sem_array *sma) 249 { 250 int i; 251 for (i = 0; i < sma->sem_nsems; i++) { 252 struct sem *sem = sma->sem_base + i; 253 254 list_splice_init(&sem->pending_alter, &sma->pending_alter); 255 } 256 } 257 258 static void sem_rcu_free(struct rcu_head *head) 259 { 260 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); 261 struct sem_array *sma = ipc_rcu_to_struct(p); 262 263 security_sem_free(sma); 264 ipc_rcu_free(head); 265 } 266 267 /* 268 * Enter the mode suitable for non-simple operations: 269 * Caller must own sem_perm.lock. 270 */ 271 static void complexmode_enter(struct sem_array *sma) 272 { 273 int i; 274 struct sem *sem; 275 276 if (sma->complex_mode) { 277 /* We are already in complex_mode. Nothing to do */ 278 return; 279 } 280 281 /* We need a full barrier after seting complex_mode: 282 * The write to complex_mode must be visible 283 * before we read the first sem->lock spinlock state. 284 */ 285 smp_store_mb(sma->complex_mode, true); 286 287 for (i = 0; i < sma->sem_nsems; i++) { 288 sem = sma->sem_base + i; 289 spin_unlock_wait(&sem->lock); 290 } 291 /* 292 * spin_unlock_wait() is not a memory barriers, it is only a 293 * control barrier. The code must pair with spin_unlock(&sem->lock), 294 * thus just the control barrier is insufficient. 295 * 296 * smp_rmb() is sufficient, as writes cannot pass the control barrier. 297 */ 298 smp_rmb(); 299 } 300 301 /* 302 * Try to leave the mode that disallows simple operations: 303 * Caller must own sem_perm.lock. 304 */ 305 static void complexmode_tryleave(struct sem_array *sma) 306 { 307 if (sma->complex_count) { 308 /* Complex ops are sleeping. 309 * We must stay in complex mode 310 */ 311 return; 312 } 313 /* 314 * Immediately after setting complex_mode to false, 315 * a simple op can start. Thus: all memory writes 316 * performed by the current operation must be visible 317 * before we set complex_mode to false. 318 */ 319 smp_store_release(&sma->complex_mode, false); 320 } 321 322 #define SEM_GLOBAL_LOCK (-1) 323 /* 324 * If the request contains only one semaphore operation, and there are 325 * no complex transactions pending, lock only the semaphore involved. 326 * Otherwise, lock the entire semaphore array, since we either have 327 * multiple semaphores in our own semops, or we need to look at 328 * semaphores from other pending complex operations. 329 */ 330 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 331 int nsops) 332 { 333 struct sem *sem; 334 335 if (nsops != 1) { 336 /* Complex operation - acquire a full lock */ 337 ipc_lock_object(&sma->sem_perm); 338 339 /* Prevent parallel simple ops */ 340 complexmode_enter(sma); 341 return SEM_GLOBAL_LOCK; 342 } 343 344 /* 345 * Only one semaphore affected - try to optimize locking. 346 * Optimized locking is possible if no complex operation 347 * is either enqueued or processed right now. 348 * 349 * Both facts are tracked by complex_mode. 350 */ 351 sem = sma->sem_base + sops->sem_num; 352 353 /* 354 * Initial check for complex_mode. Just an optimization, 355 * no locking, no memory barrier. 356 */ 357 if (!sma->complex_mode) { 358 /* 359 * It appears that no complex operation is around. 360 * Acquire the per-semaphore lock. 361 */ 362 spin_lock(&sem->lock); 363 364 /* 365 * See 51d7d5205d33 366 * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): 367 * A full barrier is required: the write of sem->lock 368 * must be visible before the read is executed 369 */ 370 smp_mb(); 371 372 if (!smp_load_acquire(&sma->complex_mode)) { 373 /* fast path successful! */ 374 return sops->sem_num; 375 } 376 spin_unlock(&sem->lock); 377 } 378 379 /* slow path: acquire the full lock */ 380 ipc_lock_object(&sma->sem_perm); 381 382 if (sma->complex_count == 0) { 383 /* False alarm: 384 * There is no complex operation, thus we can switch 385 * back to the fast path. 386 */ 387 spin_lock(&sem->lock); 388 ipc_unlock_object(&sma->sem_perm); 389 return sops->sem_num; 390 } else { 391 /* Not a false alarm, thus complete the sequence for a 392 * full lock. 393 */ 394 complexmode_enter(sma); 395 return SEM_GLOBAL_LOCK; 396 } 397 } 398 399 static inline void sem_unlock(struct sem_array *sma, int locknum) 400 { 401 if (locknum == SEM_GLOBAL_LOCK) { 402 unmerge_queues(sma); 403 complexmode_tryleave(sma); 404 ipc_unlock_object(&sma->sem_perm); 405 } else { 406 struct sem *sem = sma->sem_base + locknum; 407 spin_unlock(&sem->lock); 408 } 409 } 410 411 /* 412 * sem_lock_(check_) routines are called in the paths where the rwsem 413 * is not held. 414 * 415 * The caller holds the RCU read lock. 416 */ 417 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 418 { 419 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id); 420 421 if (IS_ERR(ipcp)) 422 return ERR_CAST(ipcp); 423 424 return container_of(ipcp, struct sem_array, sem_perm); 425 } 426 427 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 428 int id) 429 { 430 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 431 432 if (IS_ERR(ipcp)) 433 return ERR_CAST(ipcp); 434 435 return container_of(ipcp, struct sem_array, sem_perm); 436 } 437 438 static inline void sem_lock_and_putref(struct sem_array *sma) 439 { 440 sem_lock(sma, NULL, -1); 441 ipc_rcu_putref(sma, sem_rcu_free); 442 } 443 444 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 445 { 446 ipc_rmid(&sem_ids(ns), &s->sem_perm); 447 } 448 449 /** 450 * newary - Create a new semaphore set 451 * @ns: namespace 452 * @params: ptr to the structure that contains key, semflg and nsems 453 * 454 * Called with sem_ids.rwsem held (as a writer) 455 */ 456 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 457 { 458 int id; 459 int retval; 460 struct sem_array *sma; 461 int size; 462 key_t key = params->key; 463 int nsems = params->u.nsems; 464 int semflg = params->flg; 465 int i; 466 467 if (!nsems) 468 return -EINVAL; 469 if (ns->used_sems + nsems > ns->sc_semmns) 470 return -ENOSPC; 471 472 size = sizeof(*sma) + nsems * sizeof(struct sem); 473 sma = ipc_rcu_alloc(size); 474 if (!sma) 475 return -ENOMEM; 476 477 memset(sma, 0, size); 478 479 sma->sem_perm.mode = (semflg & S_IRWXUGO); 480 sma->sem_perm.key = key; 481 482 sma->sem_perm.security = NULL; 483 retval = security_sem_alloc(sma); 484 if (retval) { 485 ipc_rcu_putref(sma, ipc_rcu_free); 486 return retval; 487 } 488 489 sma->sem_base = (struct sem *) &sma[1]; 490 491 for (i = 0; i < nsems; i++) { 492 INIT_LIST_HEAD(&sma->sem_base[i].pending_alter); 493 INIT_LIST_HEAD(&sma->sem_base[i].pending_const); 494 spin_lock_init(&sma->sem_base[i].lock); 495 } 496 497 sma->complex_count = 0; 498 sma->complex_mode = true; /* dropped by sem_unlock below */ 499 INIT_LIST_HEAD(&sma->pending_alter); 500 INIT_LIST_HEAD(&sma->pending_const); 501 INIT_LIST_HEAD(&sma->list_id); 502 sma->sem_nsems = nsems; 503 sma->sem_ctime = get_seconds(); 504 505 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 506 if (id < 0) { 507 ipc_rcu_putref(sma, sem_rcu_free); 508 return id; 509 } 510 ns->used_sems += nsems; 511 512 sem_unlock(sma, -1); 513 rcu_read_unlock(); 514 515 return sma->sem_perm.id; 516 } 517 518 519 /* 520 * Called with sem_ids.rwsem and ipcp locked. 521 */ 522 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 523 { 524 struct sem_array *sma; 525 526 sma = container_of(ipcp, struct sem_array, sem_perm); 527 return security_sem_associate(sma, semflg); 528 } 529 530 /* 531 * Called with sem_ids.rwsem and ipcp locked. 532 */ 533 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 534 struct ipc_params *params) 535 { 536 struct sem_array *sma; 537 538 sma = container_of(ipcp, struct sem_array, sem_perm); 539 if (params->u.nsems > sma->sem_nsems) 540 return -EINVAL; 541 542 return 0; 543 } 544 545 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 546 { 547 struct ipc_namespace *ns; 548 static const struct ipc_ops sem_ops = { 549 .getnew = newary, 550 .associate = sem_security, 551 .more_checks = sem_more_checks, 552 }; 553 struct ipc_params sem_params; 554 555 ns = current->nsproxy->ipc_ns; 556 557 if (nsems < 0 || nsems > ns->sc_semmsl) 558 return -EINVAL; 559 560 sem_params.key = key; 561 sem_params.flg = semflg; 562 sem_params.u.nsems = nsems; 563 564 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 565 } 566 567 /** 568 * perform_atomic_semop[_slow] - Attempt to perform semaphore 569 * operations on a given array. 570 * @sma: semaphore array 571 * @q: struct sem_queue that describes the operation 572 * 573 * Caller blocking are as follows, based the value 574 * indicated by the semaphore operation (sem_op): 575 * 576 * (1) >0 never blocks. 577 * (2) 0 (wait-for-zero operation): semval is non-zero. 578 * (3) <0 attempting to decrement semval to a value smaller than zero. 579 * 580 * Returns 0 if the operation was possible. 581 * Returns 1 if the operation is impossible, the caller must sleep. 582 * Returns <0 for error codes. 583 */ 584 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) 585 { 586 int result, sem_op, nsops, pid; 587 struct sembuf *sop; 588 struct sem *curr; 589 struct sembuf *sops; 590 struct sem_undo *un; 591 592 sops = q->sops; 593 nsops = q->nsops; 594 un = q->undo; 595 596 for (sop = sops; sop < sops + nsops; sop++) { 597 curr = sma->sem_base + sop->sem_num; 598 sem_op = sop->sem_op; 599 result = curr->semval; 600 601 if (!sem_op && result) 602 goto would_block; 603 604 result += sem_op; 605 if (result < 0) 606 goto would_block; 607 if (result > SEMVMX) 608 goto out_of_range; 609 610 if (sop->sem_flg & SEM_UNDO) { 611 int undo = un->semadj[sop->sem_num] - sem_op; 612 /* Exceeding the undo range is an error. */ 613 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 614 goto out_of_range; 615 un->semadj[sop->sem_num] = undo; 616 } 617 618 curr->semval = result; 619 } 620 621 sop--; 622 pid = q->pid; 623 while (sop >= sops) { 624 sma->sem_base[sop->sem_num].sempid = pid; 625 sop--; 626 } 627 628 return 0; 629 630 out_of_range: 631 result = -ERANGE; 632 goto undo; 633 634 would_block: 635 q->blocking = sop; 636 637 if (sop->sem_flg & IPC_NOWAIT) 638 result = -EAGAIN; 639 else 640 result = 1; 641 642 undo: 643 sop--; 644 while (sop >= sops) { 645 sem_op = sop->sem_op; 646 sma->sem_base[sop->sem_num].semval -= sem_op; 647 if (sop->sem_flg & SEM_UNDO) 648 un->semadj[sop->sem_num] += sem_op; 649 sop--; 650 } 651 652 return result; 653 } 654 655 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) 656 { 657 int result, sem_op, nsops; 658 struct sembuf *sop; 659 struct sem *curr; 660 struct sembuf *sops; 661 struct sem_undo *un; 662 663 sops = q->sops; 664 nsops = q->nsops; 665 un = q->undo; 666 667 if (unlikely(q->dupsop)) 668 return perform_atomic_semop_slow(sma, q); 669 670 /* 671 * We scan the semaphore set twice, first to ensure that the entire 672 * operation can succeed, therefore avoiding any pointless writes 673 * to shared memory and having to undo such changes in order to block 674 * until the operations can go through. 675 */ 676 for (sop = sops; sop < sops + nsops; sop++) { 677 curr = sma->sem_base + sop->sem_num; 678 sem_op = sop->sem_op; 679 result = curr->semval; 680 681 if (!sem_op && result) 682 goto would_block; /* wait-for-zero */ 683 684 result += sem_op; 685 if (result < 0) 686 goto would_block; 687 688 if (result > SEMVMX) 689 return -ERANGE; 690 691 if (sop->sem_flg & SEM_UNDO) { 692 int undo = un->semadj[sop->sem_num] - sem_op; 693 694 /* Exceeding the undo range is an error. */ 695 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 696 return -ERANGE; 697 } 698 } 699 700 for (sop = sops; sop < sops + nsops; sop++) { 701 curr = sma->sem_base + sop->sem_num; 702 sem_op = sop->sem_op; 703 result = curr->semval; 704 705 if (sop->sem_flg & SEM_UNDO) { 706 int undo = un->semadj[sop->sem_num] - sem_op; 707 708 un->semadj[sop->sem_num] = undo; 709 } 710 curr->semval += sem_op; 711 curr->sempid = q->pid; 712 } 713 714 return 0; 715 716 would_block: 717 q->blocking = sop; 718 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1; 719 } 720 721 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error, 722 struct wake_q_head *wake_q) 723 { 724 wake_q_add(wake_q, q->sleeper); 725 /* 726 * Rely on the above implicit barrier, such that we can 727 * ensure that we hold reference to the task before setting 728 * q->status. Otherwise we could race with do_exit if the 729 * task is awoken by an external event before calling 730 * wake_up_process(). 731 */ 732 WRITE_ONCE(q->status, error); 733 } 734 735 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 736 { 737 list_del(&q->list); 738 if (q->nsops > 1) 739 sma->complex_count--; 740 } 741 742 /** check_restart(sma, q) 743 * @sma: semaphore array 744 * @q: the operation that just completed 745 * 746 * update_queue is O(N^2) when it restarts scanning the whole queue of 747 * waiting operations. Therefore this function checks if the restart is 748 * really necessary. It is called after a previously waiting operation 749 * modified the array. 750 * Note that wait-for-zero operations are handled without restart. 751 */ 752 static inline int check_restart(struct sem_array *sma, struct sem_queue *q) 753 { 754 /* pending complex alter operations are too difficult to analyse */ 755 if (!list_empty(&sma->pending_alter)) 756 return 1; 757 758 /* we were a sleeping complex operation. Too difficult */ 759 if (q->nsops > 1) 760 return 1; 761 762 /* It is impossible that someone waits for the new value: 763 * - complex operations always restart. 764 * - wait-for-zero are handled seperately. 765 * - q is a previously sleeping simple operation that 766 * altered the array. It must be a decrement, because 767 * simple increments never sleep. 768 * - If there are older (higher priority) decrements 769 * in the queue, then they have observed the original 770 * semval value and couldn't proceed. The operation 771 * decremented to value - thus they won't proceed either. 772 */ 773 return 0; 774 } 775 776 /** 777 * wake_const_ops - wake up non-alter tasks 778 * @sma: semaphore array. 779 * @semnum: semaphore that was modified. 780 * @wake_q: lockless wake-queue head. 781 * 782 * wake_const_ops must be called after a semaphore in a semaphore array 783 * was set to 0. If complex const operations are pending, wake_const_ops must 784 * be called with semnum = -1, as well as with the number of each modified 785 * semaphore. 786 * The tasks that must be woken up are added to @wake_q. The return code 787 * is stored in q->pid. 788 * The function returns 1 if at least one operation was completed successfully. 789 */ 790 static int wake_const_ops(struct sem_array *sma, int semnum, 791 struct wake_q_head *wake_q) 792 { 793 struct sem_queue *q, *tmp; 794 struct list_head *pending_list; 795 int semop_completed = 0; 796 797 if (semnum == -1) 798 pending_list = &sma->pending_const; 799 else 800 pending_list = &sma->sem_base[semnum].pending_const; 801 802 list_for_each_entry_safe(q, tmp, pending_list, list) { 803 int error = perform_atomic_semop(sma, q); 804 805 if (error > 0) 806 continue; 807 /* operation completed, remove from queue & wakeup */ 808 unlink_queue(sma, q); 809 810 wake_up_sem_queue_prepare(q, error, wake_q); 811 if (error == 0) 812 semop_completed = 1; 813 } 814 815 return semop_completed; 816 } 817 818 /** 819 * do_smart_wakeup_zero - wakeup all wait for zero tasks 820 * @sma: semaphore array 821 * @sops: operations that were performed 822 * @nsops: number of operations 823 * @wake_q: lockless wake-queue head 824 * 825 * Checks all required queue for wait-for-zero operations, based 826 * on the actual changes that were performed on the semaphore array. 827 * The function returns 1 if at least one operation was completed successfully. 828 */ 829 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops, 830 int nsops, struct wake_q_head *wake_q) 831 { 832 int i; 833 int semop_completed = 0; 834 int got_zero = 0; 835 836 /* first: the per-semaphore queues, if known */ 837 if (sops) { 838 for (i = 0; i < nsops; i++) { 839 int num = sops[i].sem_num; 840 841 if (sma->sem_base[num].semval == 0) { 842 got_zero = 1; 843 semop_completed |= wake_const_ops(sma, num, wake_q); 844 } 845 } 846 } else { 847 /* 848 * No sops means modified semaphores not known. 849 * Assume all were changed. 850 */ 851 for (i = 0; i < sma->sem_nsems; i++) { 852 if (sma->sem_base[i].semval == 0) { 853 got_zero = 1; 854 semop_completed |= wake_const_ops(sma, i, wake_q); 855 } 856 } 857 } 858 /* 859 * If one of the modified semaphores got 0, 860 * then check the global queue, too. 861 */ 862 if (got_zero) 863 semop_completed |= wake_const_ops(sma, -1, wake_q); 864 865 return semop_completed; 866 } 867 868 869 /** 870 * update_queue - look for tasks that can be completed. 871 * @sma: semaphore array. 872 * @semnum: semaphore that was modified. 873 * @wake_q: lockless wake-queue head. 874 * 875 * update_queue must be called after a semaphore in a semaphore array 876 * was modified. If multiple semaphores were modified, update_queue must 877 * be called with semnum = -1, as well as with the number of each modified 878 * semaphore. 879 * The tasks that must be woken up are added to @wake_q. The return code 880 * is stored in q->pid. 881 * The function internally checks if const operations can now succeed. 882 * 883 * The function return 1 if at least one semop was completed successfully. 884 */ 885 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q) 886 { 887 struct sem_queue *q, *tmp; 888 struct list_head *pending_list; 889 int semop_completed = 0; 890 891 if (semnum == -1) 892 pending_list = &sma->pending_alter; 893 else 894 pending_list = &sma->sem_base[semnum].pending_alter; 895 896 again: 897 list_for_each_entry_safe(q, tmp, pending_list, list) { 898 int error, restart; 899 900 /* If we are scanning the single sop, per-semaphore list of 901 * one semaphore and that semaphore is 0, then it is not 902 * necessary to scan further: simple increments 903 * that affect only one entry succeed immediately and cannot 904 * be in the per semaphore pending queue, and decrements 905 * cannot be successful if the value is already 0. 906 */ 907 if (semnum != -1 && sma->sem_base[semnum].semval == 0) 908 break; 909 910 error = perform_atomic_semop(sma, q); 911 912 /* Does q->sleeper still need to sleep? */ 913 if (error > 0) 914 continue; 915 916 unlink_queue(sma, q); 917 918 if (error) { 919 restart = 0; 920 } else { 921 semop_completed = 1; 922 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q); 923 restart = check_restart(sma, q); 924 } 925 926 wake_up_sem_queue_prepare(q, error, wake_q); 927 if (restart) 928 goto again; 929 } 930 return semop_completed; 931 } 932 933 /** 934 * set_semotime - set sem_otime 935 * @sma: semaphore array 936 * @sops: operations that modified the array, may be NULL 937 * 938 * sem_otime is replicated to avoid cache line trashing. 939 * This function sets one instance to the current time. 940 */ 941 static void set_semotime(struct sem_array *sma, struct sembuf *sops) 942 { 943 if (sops == NULL) { 944 sma->sem_base[0].sem_otime = get_seconds(); 945 } else { 946 sma->sem_base[sops[0].sem_num].sem_otime = 947 get_seconds(); 948 } 949 } 950 951 /** 952 * do_smart_update - optimized update_queue 953 * @sma: semaphore array 954 * @sops: operations that were performed 955 * @nsops: number of operations 956 * @otime: force setting otime 957 * @wake_q: lockless wake-queue head 958 * 959 * do_smart_update() does the required calls to update_queue and wakeup_zero, 960 * based on the actual changes that were performed on the semaphore array. 961 * Note that the function does not do the actual wake-up: the caller is 962 * responsible for calling wake_up_q(). 963 * It is safe to perform this call after dropping all locks. 964 */ 965 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 966 int otime, struct wake_q_head *wake_q) 967 { 968 int i; 969 970 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q); 971 972 if (!list_empty(&sma->pending_alter)) { 973 /* semaphore array uses the global queue - just process it. */ 974 otime |= update_queue(sma, -1, wake_q); 975 } else { 976 if (!sops) { 977 /* 978 * No sops, thus the modified semaphores are not 979 * known. Check all. 980 */ 981 for (i = 0; i < sma->sem_nsems; i++) 982 otime |= update_queue(sma, i, wake_q); 983 } else { 984 /* 985 * Check the semaphores that were increased: 986 * - No complex ops, thus all sleeping ops are 987 * decrease. 988 * - if we decreased the value, then any sleeping 989 * semaphore ops wont be able to run: If the 990 * previous value was too small, then the new 991 * value will be too small, too. 992 */ 993 for (i = 0; i < nsops; i++) { 994 if (sops[i].sem_op > 0) { 995 otime |= update_queue(sma, 996 sops[i].sem_num, wake_q); 997 } 998 } 999 } 1000 } 1001 if (otime) 1002 set_semotime(sma, sops); 1003 } 1004 1005 /* 1006 * check_qop: Test if a queued operation sleeps on the semaphore semnum 1007 */ 1008 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q, 1009 bool count_zero) 1010 { 1011 struct sembuf *sop = q->blocking; 1012 1013 /* 1014 * Linux always (since 0.99.10) reported a task as sleeping on all 1015 * semaphores. This violates SUS, therefore it was changed to the 1016 * standard compliant behavior. 1017 * Give the administrators a chance to notice that an application 1018 * might misbehave because it relies on the Linux behavior. 1019 */ 1020 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n" 1021 "The task %s (%d) triggered the difference, watch for misbehavior.\n", 1022 current->comm, task_pid_nr(current)); 1023 1024 if (sop->sem_num != semnum) 1025 return 0; 1026 1027 if (count_zero && sop->sem_op == 0) 1028 return 1; 1029 if (!count_zero && sop->sem_op < 0) 1030 return 1; 1031 1032 return 0; 1033 } 1034 1035 /* The following counts are associated to each semaphore: 1036 * semncnt number of tasks waiting on semval being nonzero 1037 * semzcnt number of tasks waiting on semval being zero 1038 * 1039 * Per definition, a task waits only on the semaphore of the first semop 1040 * that cannot proceed, even if additional operation would block, too. 1041 */ 1042 static int count_semcnt(struct sem_array *sma, ushort semnum, 1043 bool count_zero) 1044 { 1045 struct list_head *l; 1046 struct sem_queue *q; 1047 int semcnt; 1048 1049 semcnt = 0; 1050 /* First: check the simple operations. They are easy to evaluate */ 1051 if (count_zero) 1052 l = &sma->sem_base[semnum].pending_const; 1053 else 1054 l = &sma->sem_base[semnum].pending_alter; 1055 1056 list_for_each_entry(q, l, list) { 1057 /* all task on a per-semaphore list sleep on exactly 1058 * that semaphore 1059 */ 1060 semcnt++; 1061 } 1062 1063 /* Then: check the complex operations. */ 1064 list_for_each_entry(q, &sma->pending_alter, list) { 1065 semcnt += check_qop(sma, semnum, q, count_zero); 1066 } 1067 if (count_zero) { 1068 list_for_each_entry(q, &sma->pending_const, list) { 1069 semcnt += check_qop(sma, semnum, q, count_zero); 1070 } 1071 } 1072 return semcnt; 1073 } 1074 1075 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked 1076 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem 1077 * remains locked on exit. 1078 */ 1079 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 1080 { 1081 struct sem_undo *un, *tu; 1082 struct sem_queue *q, *tq; 1083 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 1084 int i; 1085 DEFINE_WAKE_Q(wake_q); 1086 1087 /* Free the existing undo structures for this semaphore set. */ 1088 ipc_assert_locked_object(&sma->sem_perm); 1089 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 1090 list_del(&un->list_id); 1091 spin_lock(&un->ulp->lock); 1092 un->semid = -1; 1093 list_del_rcu(&un->list_proc); 1094 spin_unlock(&un->ulp->lock); 1095 kfree_rcu(un, rcu); 1096 } 1097 1098 /* Wake up all pending processes and let them fail with EIDRM. */ 1099 list_for_each_entry_safe(q, tq, &sma->pending_const, list) { 1100 unlink_queue(sma, q); 1101 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1102 } 1103 1104 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) { 1105 unlink_queue(sma, q); 1106 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1107 } 1108 for (i = 0; i < sma->sem_nsems; i++) { 1109 struct sem *sem = sma->sem_base + i; 1110 list_for_each_entry_safe(q, tq, &sem->pending_const, list) { 1111 unlink_queue(sma, q); 1112 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1113 } 1114 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) { 1115 unlink_queue(sma, q); 1116 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); 1117 } 1118 } 1119 1120 /* Remove the semaphore set from the IDR */ 1121 sem_rmid(ns, sma); 1122 sem_unlock(sma, -1); 1123 rcu_read_unlock(); 1124 1125 wake_up_q(&wake_q); 1126 ns->used_sems -= sma->sem_nsems; 1127 ipc_rcu_putref(sma, sem_rcu_free); 1128 } 1129 1130 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1131 { 1132 switch (version) { 1133 case IPC_64: 1134 return copy_to_user(buf, in, sizeof(*in)); 1135 case IPC_OLD: 1136 { 1137 struct semid_ds out; 1138 1139 memset(&out, 0, sizeof(out)); 1140 1141 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 1142 1143 out.sem_otime = in->sem_otime; 1144 out.sem_ctime = in->sem_ctime; 1145 out.sem_nsems = in->sem_nsems; 1146 1147 return copy_to_user(buf, &out, sizeof(out)); 1148 } 1149 default: 1150 return -EINVAL; 1151 } 1152 } 1153 1154 static time_t get_semotime(struct sem_array *sma) 1155 { 1156 int i; 1157 time_t res; 1158 1159 res = sma->sem_base[0].sem_otime; 1160 for (i = 1; i < sma->sem_nsems; i++) { 1161 time_t to = sma->sem_base[i].sem_otime; 1162 1163 if (to > res) 1164 res = to; 1165 } 1166 return res; 1167 } 1168 1169 static int semctl_nolock(struct ipc_namespace *ns, int semid, 1170 int cmd, int version, void __user *p) 1171 { 1172 int err; 1173 struct sem_array *sma; 1174 1175 switch (cmd) { 1176 case IPC_INFO: 1177 case SEM_INFO: 1178 { 1179 struct seminfo seminfo; 1180 int max_id; 1181 1182 err = security_sem_semctl(NULL, cmd); 1183 if (err) 1184 return err; 1185 1186 memset(&seminfo, 0, sizeof(seminfo)); 1187 seminfo.semmni = ns->sc_semmni; 1188 seminfo.semmns = ns->sc_semmns; 1189 seminfo.semmsl = ns->sc_semmsl; 1190 seminfo.semopm = ns->sc_semopm; 1191 seminfo.semvmx = SEMVMX; 1192 seminfo.semmnu = SEMMNU; 1193 seminfo.semmap = SEMMAP; 1194 seminfo.semume = SEMUME; 1195 down_read(&sem_ids(ns).rwsem); 1196 if (cmd == SEM_INFO) { 1197 seminfo.semusz = sem_ids(ns).in_use; 1198 seminfo.semaem = ns->used_sems; 1199 } else { 1200 seminfo.semusz = SEMUSZ; 1201 seminfo.semaem = SEMAEM; 1202 } 1203 max_id = ipc_get_maxid(&sem_ids(ns)); 1204 up_read(&sem_ids(ns).rwsem); 1205 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 1206 return -EFAULT; 1207 return (max_id < 0) ? 0 : max_id; 1208 } 1209 case IPC_STAT: 1210 case SEM_STAT: 1211 { 1212 struct semid64_ds tbuf; 1213 int id = 0; 1214 1215 memset(&tbuf, 0, sizeof(tbuf)); 1216 1217 rcu_read_lock(); 1218 if (cmd == SEM_STAT) { 1219 sma = sem_obtain_object(ns, semid); 1220 if (IS_ERR(sma)) { 1221 err = PTR_ERR(sma); 1222 goto out_unlock; 1223 } 1224 id = sma->sem_perm.id; 1225 } else { 1226 sma = sem_obtain_object_check(ns, semid); 1227 if (IS_ERR(sma)) { 1228 err = PTR_ERR(sma); 1229 goto out_unlock; 1230 } 1231 } 1232 1233 err = -EACCES; 1234 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1235 goto out_unlock; 1236 1237 err = security_sem_semctl(sma, cmd); 1238 if (err) 1239 goto out_unlock; 1240 1241 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 1242 tbuf.sem_otime = get_semotime(sma); 1243 tbuf.sem_ctime = sma->sem_ctime; 1244 tbuf.sem_nsems = sma->sem_nsems; 1245 rcu_read_unlock(); 1246 if (copy_semid_to_user(p, &tbuf, version)) 1247 return -EFAULT; 1248 return id; 1249 } 1250 default: 1251 return -EINVAL; 1252 } 1253 out_unlock: 1254 rcu_read_unlock(); 1255 return err; 1256 } 1257 1258 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1259 unsigned long arg) 1260 { 1261 struct sem_undo *un; 1262 struct sem_array *sma; 1263 struct sem *curr; 1264 int err, val; 1265 DEFINE_WAKE_Q(wake_q); 1266 1267 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1268 /* big-endian 64bit */ 1269 val = arg >> 32; 1270 #else 1271 /* 32bit or little-endian 64bit */ 1272 val = arg; 1273 #endif 1274 1275 if (val > SEMVMX || val < 0) 1276 return -ERANGE; 1277 1278 rcu_read_lock(); 1279 sma = sem_obtain_object_check(ns, semid); 1280 if (IS_ERR(sma)) { 1281 rcu_read_unlock(); 1282 return PTR_ERR(sma); 1283 } 1284 1285 if (semnum < 0 || semnum >= sma->sem_nsems) { 1286 rcu_read_unlock(); 1287 return -EINVAL; 1288 } 1289 1290 1291 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1292 rcu_read_unlock(); 1293 return -EACCES; 1294 } 1295 1296 err = security_sem_semctl(sma, SETVAL); 1297 if (err) { 1298 rcu_read_unlock(); 1299 return -EACCES; 1300 } 1301 1302 sem_lock(sma, NULL, -1); 1303 1304 if (!ipc_valid_object(&sma->sem_perm)) { 1305 sem_unlock(sma, -1); 1306 rcu_read_unlock(); 1307 return -EIDRM; 1308 } 1309 1310 curr = &sma->sem_base[semnum]; 1311 1312 ipc_assert_locked_object(&sma->sem_perm); 1313 list_for_each_entry(un, &sma->list_id, list_id) 1314 un->semadj[semnum] = 0; 1315 1316 curr->semval = val; 1317 curr->sempid = task_tgid_vnr(current); 1318 sma->sem_ctime = get_seconds(); 1319 /* maybe some queued-up processes were waiting for this */ 1320 do_smart_update(sma, NULL, 0, 0, &wake_q); 1321 sem_unlock(sma, -1); 1322 rcu_read_unlock(); 1323 wake_up_q(&wake_q); 1324 return 0; 1325 } 1326 1327 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1328 int cmd, void __user *p) 1329 { 1330 struct sem_array *sma; 1331 struct sem *curr; 1332 int err, nsems; 1333 ushort fast_sem_io[SEMMSL_FAST]; 1334 ushort *sem_io = fast_sem_io; 1335 DEFINE_WAKE_Q(wake_q); 1336 1337 rcu_read_lock(); 1338 sma = sem_obtain_object_check(ns, semid); 1339 if (IS_ERR(sma)) { 1340 rcu_read_unlock(); 1341 return PTR_ERR(sma); 1342 } 1343 1344 nsems = sma->sem_nsems; 1345 1346 err = -EACCES; 1347 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1348 goto out_rcu_wakeup; 1349 1350 err = security_sem_semctl(sma, cmd); 1351 if (err) 1352 goto out_rcu_wakeup; 1353 1354 err = -EACCES; 1355 switch (cmd) { 1356 case GETALL: 1357 { 1358 ushort __user *array = p; 1359 int i; 1360 1361 sem_lock(sma, NULL, -1); 1362 if (!ipc_valid_object(&sma->sem_perm)) { 1363 err = -EIDRM; 1364 goto out_unlock; 1365 } 1366 if (nsems > SEMMSL_FAST) { 1367 if (!ipc_rcu_getref(sma)) { 1368 err = -EIDRM; 1369 goto out_unlock; 1370 } 1371 sem_unlock(sma, -1); 1372 rcu_read_unlock(); 1373 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1374 if (sem_io == NULL) { 1375 ipc_rcu_putref(sma, sem_rcu_free); 1376 return -ENOMEM; 1377 } 1378 1379 rcu_read_lock(); 1380 sem_lock_and_putref(sma); 1381 if (!ipc_valid_object(&sma->sem_perm)) { 1382 err = -EIDRM; 1383 goto out_unlock; 1384 } 1385 } 1386 for (i = 0; i < sma->sem_nsems; i++) 1387 sem_io[i] = sma->sem_base[i].semval; 1388 sem_unlock(sma, -1); 1389 rcu_read_unlock(); 1390 err = 0; 1391 if (copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1392 err = -EFAULT; 1393 goto out_free; 1394 } 1395 case SETALL: 1396 { 1397 int i; 1398 struct sem_undo *un; 1399 1400 if (!ipc_rcu_getref(sma)) { 1401 err = -EIDRM; 1402 goto out_rcu_wakeup; 1403 } 1404 rcu_read_unlock(); 1405 1406 if (nsems > SEMMSL_FAST) { 1407 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1408 if (sem_io == NULL) { 1409 ipc_rcu_putref(sma, sem_rcu_free); 1410 return -ENOMEM; 1411 } 1412 } 1413 1414 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) { 1415 ipc_rcu_putref(sma, sem_rcu_free); 1416 err = -EFAULT; 1417 goto out_free; 1418 } 1419 1420 for (i = 0; i < nsems; i++) { 1421 if (sem_io[i] > SEMVMX) { 1422 ipc_rcu_putref(sma, sem_rcu_free); 1423 err = -ERANGE; 1424 goto out_free; 1425 } 1426 } 1427 rcu_read_lock(); 1428 sem_lock_and_putref(sma); 1429 if (!ipc_valid_object(&sma->sem_perm)) { 1430 err = -EIDRM; 1431 goto out_unlock; 1432 } 1433 1434 for (i = 0; i < nsems; i++) { 1435 sma->sem_base[i].semval = sem_io[i]; 1436 sma->sem_base[i].sempid = task_tgid_vnr(current); 1437 } 1438 1439 ipc_assert_locked_object(&sma->sem_perm); 1440 list_for_each_entry(un, &sma->list_id, list_id) { 1441 for (i = 0; i < nsems; i++) 1442 un->semadj[i] = 0; 1443 } 1444 sma->sem_ctime = get_seconds(); 1445 /* maybe some queued-up processes were waiting for this */ 1446 do_smart_update(sma, NULL, 0, 0, &wake_q); 1447 err = 0; 1448 goto out_unlock; 1449 } 1450 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1451 } 1452 err = -EINVAL; 1453 if (semnum < 0 || semnum >= nsems) 1454 goto out_rcu_wakeup; 1455 1456 sem_lock(sma, NULL, -1); 1457 if (!ipc_valid_object(&sma->sem_perm)) { 1458 err = -EIDRM; 1459 goto out_unlock; 1460 } 1461 curr = &sma->sem_base[semnum]; 1462 1463 switch (cmd) { 1464 case GETVAL: 1465 err = curr->semval; 1466 goto out_unlock; 1467 case GETPID: 1468 err = curr->sempid; 1469 goto out_unlock; 1470 case GETNCNT: 1471 err = count_semcnt(sma, semnum, 0); 1472 goto out_unlock; 1473 case GETZCNT: 1474 err = count_semcnt(sma, semnum, 1); 1475 goto out_unlock; 1476 } 1477 1478 out_unlock: 1479 sem_unlock(sma, -1); 1480 out_rcu_wakeup: 1481 rcu_read_unlock(); 1482 wake_up_q(&wake_q); 1483 out_free: 1484 if (sem_io != fast_sem_io) 1485 ipc_free(sem_io); 1486 return err; 1487 } 1488 1489 static inline unsigned long 1490 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1491 { 1492 switch (version) { 1493 case IPC_64: 1494 if (copy_from_user(out, buf, sizeof(*out))) 1495 return -EFAULT; 1496 return 0; 1497 case IPC_OLD: 1498 { 1499 struct semid_ds tbuf_old; 1500 1501 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1502 return -EFAULT; 1503 1504 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1505 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1506 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1507 1508 return 0; 1509 } 1510 default: 1511 return -EINVAL; 1512 } 1513 } 1514 1515 /* 1516 * This function handles some semctl commands which require the rwsem 1517 * to be held in write mode. 1518 * NOTE: no locks must be held, the rwsem is taken inside this function. 1519 */ 1520 static int semctl_down(struct ipc_namespace *ns, int semid, 1521 int cmd, int version, void __user *p) 1522 { 1523 struct sem_array *sma; 1524 int err; 1525 struct semid64_ds semid64; 1526 struct kern_ipc_perm *ipcp; 1527 1528 if (cmd == IPC_SET) { 1529 if (copy_semid_from_user(&semid64, p, version)) 1530 return -EFAULT; 1531 } 1532 1533 down_write(&sem_ids(ns).rwsem); 1534 rcu_read_lock(); 1535 1536 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1537 &semid64.sem_perm, 0); 1538 if (IS_ERR(ipcp)) { 1539 err = PTR_ERR(ipcp); 1540 goto out_unlock1; 1541 } 1542 1543 sma = container_of(ipcp, struct sem_array, sem_perm); 1544 1545 err = security_sem_semctl(sma, cmd); 1546 if (err) 1547 goto out_unlock1; 1548 1549 switch (cmd) { 1550 case IPC_RMID: 1551 sem_lock(sma, NULL, -1); 1552 /* freeary unlocks the ipc object and rcu */ 1553 freeary(ns, ipcp); 1554 goto out_up; 1555 case IPC_SET: 1556 sem_lock(sma, NULL, -1); 1557 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1558 if (err) 1559 goto out_unlock0; 1560 sma->sem_ctime = get_seconds(); 1561 break; 1562 default: 1563 err = -EINVAL; 1564 goto out_unlock1; 1565 } 1566 1567 out_unlock0: 1568 sem_unlock(sma, -1); 1569 out_unlock1: 1570 rcu_read_unlock(); 1571 out_up: 1572 up_write(&sem_ids(ns).rwsem); 1573 return err; 1574 } 1575 1576 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1577 { 1578 int version; 1579 struct ipc_namespace *ns; 1580 void __user *p = (void __user *)arg; 1581 1582 if (semid < 0) 1583 return -EINVAL; 1584 1585 version = ipc_parse_version(&cmd); 1586 ns = current->nsproxy->ipc_ns; 1587 1588 switch (cmd) { 1589 case IPC_INFO: 1590 case SEM_INFO: 1591 case IPC_STAT: 1592 case SEM_STAT: 1593 return semctl_nolock(ns, semid, cmd, version, p); 1594 case GETALL: 1595 case GETVAL: 1596 case GETPID: 1597 case GETNCNT: 1598 case GETZCNT: 1599 case SETALL: 1600 return semctl_main(ns, semid, semnum, cmd, p); 1601 case SETVAL: 1602 return semctl_setval(ns, semid, semnum, arg); 1603 case IPC_RMID: 1604 case IPC_SET: 1605 return semctl_down(ns, semid, cmd, version, p); 1606 default: 1607 return -EINVAL; 1608 } 1609 } 1610 1611 /* If the task doesn't already have a undo_list, then allocate one 1612 * here. We guarantee there is only one thread using this undo list, 1613 * and current is THE ONE 1614 * 1615 * If this allocation and assignment succeeds, but later 1616 * portions of this code fail, there is no need to free the sem_undo_list. 1617 * Just let it stay associated with the task, and it'll be freed later 1618 * at exit time. 1619 * 1620 * This can block, so callers must hold no locks. 1621 */ 1622 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1623 { 1624 struct sem_undo_list *undo_list; 1625 1626 undo_list = current->sysvsem.undo_list; 1627 if (!undo_list) { 1628 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1629 if (undo_list == NULL) 1630 return -ENOMEM; 1631 spin_lock_init(&undo_list->lock); 1632 atomic_set(&undo_list->refcnt, 1); 1633 INIT_LIST_HEAD(&undo_list->list_proc); 1634 1635 current->sysvsem.undo_list = undo_list; 1636 } 1637 *undo_listp = undo_list; 1638 return 0; 1639 } 1640 1641 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1642 { 1643 struct sem_undo *un; 1644 1645 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1646 if (un->semid == semid) 1647 return un; 1648 } 1649 return NULL; 1650 } 1651 1652 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1653 { 1654 struct sem_undo *un; 1655 1656 assert_spin_locked(&ulp->lock); 1657 1658 un = __lookup_undo(ulp, semid); 1659 if (un) { 1660 list_del_rcu(&un->list_proc); 1661 list_add_rcu(&un->list_proc, &ulp->list_proc); 1662 } 1663 return un; 1664 } 1665 1666 /** 1667 * find_alloc_undo - lookup (and if not present create) undo array 1668 * @ns: namespace 1669 * @semid: semaphore array id 1670 * 1671 * The function looks up (and if not present creates) the undo structure. 1672 * The size of the undo structure depends on the size of the semaphore 1673 * array, thus the alloc path is not that straightforward. 1674 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1675 * performs a rcu_read_lock(). 1676 */ 1677 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1678 { 1679 struct sem_array *sma; 1680 struct sem_undo_list *ulp; 1681 struct sem_undo *un, *new; 1682 int nsems, error; 1683 1684 error = get_undo_list(&ulp); 1685 if (error) 1686 return ERR_PTR(error); 1687 1688 rcu_read_lock(); 1689 spin_lock(&ulp->lock); 1690 un = lookup_undo(ulp, semid); 1691 spin_unlock(&ulp->lock); 1692 if (likely(un != NULL)) 1693 goto out; 1694 1695 /* no undo structure around - allocate one. */ 1696 /* step 1: figure out the size of the semaphore array */ 1697 sma = sem_obtain_object_check(ns, semid); 1698 if (IS_ERR(sma)) { 1699 rcu_read_unlock(); 1700 return ERR_CAST(sma); 1701 } 1702 1703 nsems = sma->sem_nsems; 1704 if (!ipc_rcu_getref(sma)) { 1705 rcu_read_unlock(); 1706 un = ERR_PTR(-EIDRM); 1707 goto out; 1708 } 1709 rcu_read_unlock(); 1710 1711 /* step 2: allocate new undo structure */ 1712 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1713 if (!new) { 1714 ipc_rcu_putref(sma, sem_rcu_free); 1715 return ERR_PTR(-ENOMEM); 1716 } 1717 1718 /* step 3: Acquire the lock on semaphore array */ 1719 rcu_read_lock(); 1720 sem_lock_and_putref(sma); 1721 if (!ipc_valid_object(&sma->sem_perm)) { 1722 sem_unlock(sma, -1); 1723 rcu_read_unlock(); 1724 kfree(new); 1725 un = ERR_PTR(-EIDRM); 1726 goto out; 1727 } 1728 spin_lock(&ulp->lock); 1729 1730 /* 1731 * step 4: check for races: did someone else allocate the undo struct? 1732 */ 1733 un = lookup_undo(ulp, semid); 1734 if (un) { 1735 kfree(new); 1736 goto success; 1737 } 1738 /* step 5: initialize & link new undo structure */ 1739 new->semadj = (short *) &new[1]; 1740 new->ulp = ulp; 1741 new->semid = semid; 1742 assert_spin_locked(&ulp->lock); 1743 list_add_rcu(&new->list_proc, &ulp->list_proc); 1744 ipc_assert_locked_object(&sma->sem_perm); 1745 list_add(&new->list_id, &sma->list_id); 1746 un = new; 1747 1748 success: 1749 spin_unlock(&ulp->lock); 1750 sem_unlock(sma, -1); 1751 out: 1752 return un; 1753 } 1754 1755 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 1756 unsigned, nsops, const struct timespec __user *, timeout) 1757 { 1758 int error = -EINVAL; 1759 struct sem_array *sma; 1760 struct sembuf fast_sops[SEMOPM_FAST]; 1761 struct sembuf *sops = fast_sops, *sop; 1762 struct sem_undo *un; 1763 int max, locknum; 1764 bool undos = false, alter = false, dupsop = false; 1765 struct sem_queue queue; 1766 unsigned long dup = 0, jiffies_left = 0; 1767 struct ipc_namespace *ns; 1768 1769 ns = current->nsproxy->ipc_ns; 1770 1771 if (nsops < 1 || semid < 0) 1772 return -EINVAL; 1773 if (nsops > ns->sc_semopm) 1774 return -E2BIG; 1775 if (nsops > SEMOPM_FAST) { 1776 sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL); 1777 if (sops == NULL) 1778 return -ENOMEM; 1779 } 1780 1781 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) { 1782 error = -EFAULT; 1783 goto out_free; 1784 } 1785 1786 if (timeout) { 1787 struct timespec _timeout; 1788 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1789 error = -EFAULT; 1790 goto out_free; 1791 } 1792 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1793 _timeout.tv_nsec >= 1000000000L) { 1794 error = -EINVAL; 1795 goto out_free; 1796 } 1797 jiffies_left = timespec_to_jiffies(&_timeout); 1798 } 1799 1800 max = 0; 1801 for (sop = sops; sop < sops + nsops; sop++) { 1802 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG); 1803 1804 if (sop->sem_num >= max) 1805 max = sop->sem_num; 1806 if (sop->sem_flg & SEM_UNDO) 1807 undos = true; 1808 if (dup & mask) { 1809 /* 1810 * There was a previous alter access that appears 1811 * to have accessed the same semaphore, thus use 1812 * the dupsop logic. "appears", because the detection 1813 * can only check % BITS_PER_LONG. 1814 */ 1815 dupsop = true; 1816 } 1817 if (sop->sem_op != 0) { 1818 alter = true; 1819 dup |= mask; 1820 } 1821 } 1822 1823 if (undos) { 1824 /* On success, find_alloc_undo takes the rcu_read_lock */ 1825 un = find_alloc_undo(ns, semid); 1826 if (IS_ERR(un)) { 1827 error = PTR_ERR(un); 1828 goto out_free; 1829 } 1830 } else { 1831 un = NULL; 1832 rcu_read_lock(); 1833 } 1834 1835 sma = sem_obtain_object_check(ns, semid); 1836 if (IS_ERR(sma)) { 1837 rcu_read_unlock(); 1838 error = PTR_ERR(sma); 1839 goto out_free; 1840 } 1841 1842 error = -EFBIG; 1843 if (max >= sma->sem_nsems) { 1844 rcu_read_unlock(); 1845 goto out_free; 1846 } 1847 1848 error = -EACCES; 1849 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { 1850 rcu_read_unlock(); 1851 goto out_free; 1852 } 1853 1854 error = security_sem_semop(sma, sops, nsops, alter); 1855 if (error) { 1856 rcu_read_unlock(); 1857 goto out_free; 1858 } 1859 1860 error = -EIDRM; 1861 locknum = sem_lock(sma, sops, nsops); 1862 /* 1863 * We eventually might perform the following check in a lockless 1864 * fashion, considering ipc_valid_object() locking constraints. 1865 * If nsops == 1 and there is no contention for sem_perm.lock, then 1866 * only a per-semaphore lock is held and it's OK to proceed with the 1867 * check below. More details on the fine grained locking scheme 1868 * entangled here and why it's RMID race safe on comments at sem_lock() 1869 */ 1870 if (!ipc_valid_object(&sma->sem_perm)) 1871 goto out_unlock_free; 1872 /* 1873 * semid identifiers are not unique - find_alloc_undo may have 1874 * allocated an undo structure, it was invalidated by an RMID 1875 * and now a new array with received the same id. Check and fail. 1876 * This case can be detected checking un->semid. The existence of 1877 * "un" itself is guaranteed by rcu. 1878 */ 1879 if (un && un->semid == -1) 1880 goto out_unlock_free; 1881 1882 queue.sops = sops; 1883 queue.nsops = nsops; 1884 queue.undo = un; 1885 queue.pid = task_tgid_vnr(current); 1886 queue.alter = alter; 1887 queue.dupsop = dupsop; 1888 1889 error = perform_atomic_semop(sma, &queue); 1890 if (error == 0) { /* non-blocking succesfull path */ 1891 DEFINE_WAKE_Q(wake_q); 1892 1893 /* 1894 * If the operation was successful, then do 1895 * the required updates. 1896 */ 1897 if (alter) 1898 do_smart_update(sma, sops, nsops, 1, &wake_q); 1899 else 1900 set_semotime(sma, sops); 1901 1902 sem_unlock(sma, locknum); 1903 rcu_read_unlock(); 1904 wake_up_q(&wake_q); 1905 1906 goto out_free; 1907 } 1908 if (error < 0) /* non-blocking error path */ 1909 goto out_unlock_free; 1910 1911 /* 1912 * We need to sleep on this operation, so we put the current 1913 * task into the pending queue and go to sleep. 1914 */ 1915 if (nsops == 1) { 1916 struct sem *curr; 1917 curr = &sma->sem_base[sops->sem_num]; 1918 1919 if (alter) { 1920 if (sma->complex_count) { 1921 list_add_tail(&queue.list, 1922 &sma->pending_alter); 1923 } else { 1924 1925 list_add_tail(&queue.list, 1926 &curr->pending_alter); 1927 } 1928 } else { 1929 list_add_tail(&queue.list, &curr->pending_const); 1930 } 1931 } else { 1932 if (!sma->complex_count) 1933 merge_queues(sma); 1934 1935 if (alter) 1936 list_add_tail(&queue.list, &sma->pending_alter); 1937 else 1938 list_add_tail(&queue.list, &sma->pending_const); 1939 1940 sma->complex_count++; 1941 } 1942 1943 do { 1944 queue.status = -EINTR; 1945 queue.sleeper = current; 1946 1947 __set_current_state(TASK_INTERRUPTIBLE); 1948 sem_unlock(sma, locknum); 1949 rcu_read_unlock(); 1950 1951 if (timeout) 1952 jiffies_left = schedule_timeout(jiffies_left); 1953 else 1954 schedule(); 1955 1956 /* 1957 * fastpath: the semop has completed, either successfully or 1958 * not, from the syscall pov, is quite irrelevant to us at this 1959 * point; we're done. 1960 * 1961 * We _do_ care, nonetheless, about being awoken by a signal or 1962 * spuriously. The queue.status is checked again in the 1963 * slowpath (aka after taking sem_lock), such that we can detect 1964 * scenarios where we were awakened externally, during the 1965 * window between wake_q_add() and wake_up_q(). 1966 */ 1967 error = READ_ONCE(queue.status); 1968 if (error != -EINTR) { 1969 /* 1970 * User space could assume that semop() is a memory 1971 * barrier: Without the mb(), the cpu could 1972 * speculatively read in userspace stale data that was 1973 * overwritten by the previous owner of the semaphore. 1974 */ 1975 smp_mb(); 1976 goto out_free; 1977 } 1978 1979 rcu_read_lock(); 1980 locknum = sem_lock(sma, sops, nsops); 1981 1982 if (!ipc_valid_object(&sma->sem_perm)) 1983 goto out_unlock_free; 1984 1985 error = READ_ONCE(queue.status); 1986 1987 /* 1988 * If queue.status != -EINTR we are woken up by another process. 1989 * Leave without unlink_queue(), but with sem_unlock(). 1990 */ 1991 if (error != -EINTR) 1992 goto out_unlock_free; 1993 1994 /* 1995 * If an interrupt occurred we have to clean up the queue. 1996 */ 1997 if (timeout && jiffies_left == 0) 1998 error = -EAGAIN; 1999 } while (error == -EINTR && !signal_pending(current)); /* spurious */ 2000 2001 unlink_queue(sma, &queue); 2002 2003 out_unlock_free: 2004 sem_unlock(sma, locknum); 2005 rcu_read_unlock(); 2006 out_free: 2007 if (sops != fast_sops) 2008 kfree(sops); 2009 return error; 2010 } 2011 2012 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 2013 unsigned, nsops) 2014 { 2015 return sys_semtimedop(semid, tsops, nsops, NULL); 2016 } 2017 2018 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 2019 * parent and child tasks. 2020 */ 2021 2022 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 2023 { 2024 struct sem_undo_list *undo_list; 2025 int error; 2026 2027 if (clone_flags & CLONE_SYSVSEM) { 2028 error = get_undo_list(&undo_list); 2029 if (error) 2030 return error; 2031 atomic_inc(&undo_list->refcnt); 2032 tsk->sysvsem.undo_list = undo_list; 2033 } else 2034 tsk->sysvsem.undo_list = NULL; 2035 2036 return 0; 2037 } 2038 2039 /* 2040 * add semadj values to semaphores, free undo structures. 2041 * undo structures are not freed when semaphore arrays are destroyed 2042 * so some of them may be out of date. 2043 * IMPLEMENTATION NOTE: There is some confusion over whether the 2044 * set of adjustments that needs to be done should be done in an atomic 2045 * manner or not. That is, if we are attempting to decrement the semval 2046 * should we queue up and wait until we can do so legally? 2047 * The original implementation attempted to do this (queue and wait). 2048 * The current implementation does not do so. The POSIX standard 2049 * and SVID should be consulted to determine what behavior is mandated. 2050 */ 2051 void exit_sem(struct task_struct *tsk) 2052 { 2053 struct sem_undo_list *ulp; 2054 2055 ulp = tsk->sysvsem.undo_list; 2056 if (!ulp) 2057 return; 2058 tsk->sysvsem.undo_list = NULL; 2059 2060 if (!atomic_dec_and_test(&ulp->refcnt)) 2061 return; 2062 2063 for (;;) { 2064 struct sem_array *sma; 2065 struct sem_undo *un; 2066 int semid, i; 2067 DEFINE_WAKE_Q(wake_q); 2068 2069 cond_resched(); 2070 2071 rcu_read_lock(); 2072 un = list_entry_rcu(ulp->list_proc.next, 2073 struct sem_undo, list_proc); 2074 if (&un->list_proc == &ulp->list_proc) { 2075 /* 2076 * We must wait for freeary() before freeing this ulp, 2077 * in case we raced with last sem_undo. There is a small 2078 * possibility where we exit while freeary() didn't 2079 * finish unlocking sem_undo_list. 2080 */ 2081 spin_unlock_wait(&ulp->lock); 2082 rcu_read_unlock(); 2083 break; 2084 } 2085 spin_lock(&ulp->lock); 2086 semid = un->semid; 2087 spin_unlock(&ulp->lock); 2088 2089 /* exit_sem raced with IPC_RMID, nothing to do */ 2090 if (semid == -1) { 2091 rcu_read_unlock(); 2092 continue; 2093 } 2094 2095 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid); 2096 /* exit_sem raced with IPC_RMID, nothing to do */ 2097 if (IS_ERR(sma)) { 2098 rcu_read_unlock(); 2099 continue; 2100 } 2101 2102 sem_lock(sma, NULL, -1); 2103 /* exit_sem raced with IPC_RMID, nothing to do */ 2104 if (!ipc_valid_object(&sma->sem_perm)) { 2105 sem_unlock(sma, -1); 2106 rcu_read_unlock(); 2107 continue; 2108 } 2109 un = __lookup_undo(ulp, semid); 2110 if (un == NULL) { 2111 /* exit_sem raced with IPC_RMID+semget() that created 2112 * exactly the same semid. Nothing to do. 2113 */ 2114 sem_unlock(sma, -1); 2115 rcu_read_unlock(); 2116 continue; 2117 } 2118 2119 /* remove un from the linked lists */ 2120 ipc_assert_locked_object(&sma->sem_perm); 2121 list_del(&un->list_id); 2122 2123 /* we are the last process using this ulp, acquiring ulp->lock 2124 * isn't required. Besides that, we are also protected against 2125 * IPC_RMID as we hold sma->sem_perm lock now 2126 */ 2127 list_del_rcu(&un->list_proc); 2128 2129 /* perform adjustments registered in un */ 2130 for (i = 0; i < sma->sem_nsems; i++) { 2131 struct sem *semaphore = &sma->sem_base[i]; 2132 if (un->semadj[i]) { 2133 semaphore->semval += un->semadj[i]; 2134 /* 2135 * Range checks of the new semaphore value, 2136 * not defined by sus: 2137 * - Some unices ignore the undo entirely 2138 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 2139 * - some cap the value (e.g. FreeBSD caps 2140 * at 0, but doesn't enforce SEMVMX) 2141 * 2142 * Linux caps the semaphore value, both at 0 2143 * and at SEMVMX. 2144 * 2145 * Manfred <manfred@colorfullife.com> 2146 */ 2147 if (semaphore->semval < 0) 2148 semaphore->semval = 0; 2149 if (semaphore->semval > SEMVMX) 2150 semaphore->semval = SEMVMX; 2151 semaphore->sempid = task_tgid_vnr(current); 2152 } 2153 } 2154 /* maybe some queued-up processes were waiting for this */ 2155 do_smart_update(sma, NULL, 0, 1, &wake_q); 2156 sem_unlock(sma, -1); 2157 rcu_read_unlock(); 2158 wake_up_q(&wake_q); 2159 2160 kfree_rcu(un, rcu); 2161 } 2162 kfree(ulp); 2163 } 2164 2165 #ifdef CONFIG_PROC_FS 2166 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 2167 { 2168 struct user_namespace *user_ns = seq_user_ns(s); 2169 struct sem_array *sma = it; 2170 time_t sem_otime; 2171 2172 /* 2173 * The proc interface isn't aware of sem_lock(), it calls 2174 * ipc_lock_object() directly (in sysvipc_find_ipc). 2175 * In order to stay compatible with sem_lock(), we must 2176 * enter / leave complex_mode. 2177 */ 2178 complexmode_enter(sma); 2179 2180 sem_otime = get_semotime(sma); 2181 2182 seq_printf(s, 2183 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 2184 sma->sem_perm.key, 2185 sma->sem_perm.id, 2186 sma->sem_perm.mode, 2187 sma->sem_nsems, 2188 from_kuid_munged(user_ns, sma->sem_perm.uid), 2189 from_kgid_munged(user_ns, sma->sem_perm.gid), 2190 from_kuid_munged(user_ns, sma->sem_perm.cuid), 2191 from_kgid_munged(user_ns, sma->sem_perm.cgid), 2192 sem_otime, 2193 sma->sem_ctime); 2194 2195 complexmode_tryleave(sma); 2196 2197 return 0; 2198 } 2199 #endif 2200