1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 7 * 8 * SMP-threaded, sysctl's added 9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 10 * Enforced range limit on SEM_UNDO 11 * (c) 2001 Red Hat Inc 12 * Lockless wakeup 13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 14 * Further wakeup optimizations, documentation 15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Implementation notes: (May 2010) 25 * This file implements System V semaphores. 26 * 27 * User space visible behavior: 28 * - FIFO ordering for semop() operations (just FIFO, not starvation 29 * protection) 30 * - multiple semaphore operations that alter the same semaphore in 31 * one semop() are handled. 32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 33 * SETALL calls. 34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 35 * - undo adjustments at process exit are limited to 0..SEMVMX. 36 * - namespace are supported. 37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 38 * to /proc/sys/kernel/sem. 39 * - statistics about the usage are reported in /proc/sysvipc/sem. 40 * 41 * Internals: 42 * - scalability: 43 * - all global variables are read-mostly. 44 * - semop() calls and semctl(RMID) are synchronized by RCU. 45 * - most operations do write operations (actually: spin_lock calls) to 46 * the per-semaphore array structure. 47 * Thus: Perfect SMP scaling between independent semaphore arrays. 48 * If multiple semaphores in one array are used, then cache line 49 * trashing on the semaphore array spinlock will limit the scaling. 50 * - semncnt and semzcnt are calculated on demand in count_semncnt() and 51 * count_semzcnt() 52 * - the task that performs a successful semop() scans the list of all 53 * sleeping tasks and completes any pending operations that can be fulfilled. 54 * Semaphores are actively given to waiting tasks (necessary for FIFO). 55 * (see update_queue()) 56 * - To improve the scalability, the actual wake-up calls are performed after 57 * dropping all locks. (see wake_up_sem_queue_prepare(), 58 * wake_up_sem_queue_do()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - The synchronizations between wake-ups due to a timeout/signal and a 64 * wake-up due to a completed semaphore operation is achieved by using an 65 * intermediate state (IN_WAKEUP). 66 * - UNDO values are stored in an array (one per process and per 67 * semaphore array, lazily allocated). For backwards compatibility, multiple 68 * modes for the UNDO variables are supported (per process, per thread) 69 * (see copy_semundo, CLONE_SYSVSEM) 70 * - There are two lists of the pending operations: a per-array list 71 * and per-semaphore list (stored in the array). This allows to achieve FIFO 72 * ordering without always scanning all pending operations. 73 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 74 */ 75 76 #include <linux/slab.h> 77 #include <linux/spinlock.h> 78 #include <linux/init.h> 79 #include <linux/proc_fs.h> 80 #include <linux/time.h> 81 #include <linux/security.h> 82 #include <linux/syscalls.h> 83 #include <linux/audit.h> 84 #include <linux/capability.h> 85 #include <linux/seq_file.h> 86 #include <linux/rwsem.h> 87 #include <linux/nsproxy.h> 88 #include <linux/ipc_namespace.h> 89 90 #include <asm/uaccess.h> 91 #include "util.h" 92 93 /* One semaphore structure for each semaphore in the system. */ 94 struct sem { 95 int semval; /* current value */ 96 int sempid; /* pid of last operation */ 97 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 98 struct list_head sem_pending; /* pending single-sop operations */ 99 }; 100 101 /* One queue for each sleeping process in the system. */ 102 struct sem_queue { 103 struct list_head list; /* queue of pending operations */ 104 struct task_struct *sleeper; /* this process */ 105 struct sem_undo *undo; /* undo structure */ 106 int pid; /* process id of requesting process */ 107 int status; /* completion status of operation */ 108 struct sembuf *sops; /* array of pending operations */ 109 int nsops; /* number of operations */ 110 int alter; /* does *sops alter the array? */ 111 }; 112 113 /* Each task has a list of undo requests. They are executed automatically 114 * when the process exits. 115 */ 116 struct sem_undo { 117 struct list_head list_proc; /* per-process list: * 118 * all undos from one process 119 * rcu protected */ 120 struct rcu_head rcu; /* rcu struct for sem_undo */ 121 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 122 struct list_head list_id; /* per semaphore array list: 123 * all undos for one array */ 124 int semid; /* semaphore set identifier */ 125 short *semadj; /* array of adjustments */ 126 /* one per semaphore */ 127 }; 128 129 /* sem_undo_list controls shared access to the list of sem_undo structures 130 * that may be shared among all a CLONE_SYSVSEM task group. 131 */ 132 struct sem_undo_list { 133 atomic_t refcnt; 134 spinlock_t lock; 135 struct list_head list_proc; 136 }; 137 138 139 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 140 141 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 142 143 static int newary(struct ipc_namespace *, struct ipc_params *); 144 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 145 #ifdef CONFIG_PROC_FS 146 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 147 #endif 148 149 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 150 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 151 152 /* 153 * linked list protection: 154 * sem_undo.id_next, 155 * sem_array.sem_pending{,last}, 156 * sem_array.sem_undo: sem_lock() for read/write 157 * sem_undo.proc_next: only "current" is allowed to read/write that field. 158 * 159 */ 160 161 #define sc_semmsl sem_ctls[0] 162 #define sc_semmns sem_ctls[1] 163 #define sc_semopm sem_ctls[2] 164 #define sc_semmni sem_ctls[3] 165 166 void sem_init_ns(struct ipc_namespace *ns) 167 { 168 ns->sc_semmsl = SEMMSL; 169 ns->sc_semmns = SEMMNS; 170 ns->sc_semopm = SEMOPM; 171 ns->sc_semmni = SEMMNI; 172 ns->used_sems = 0; 173 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 174 } 175 176 #ifdef CONFIG_IPC_NS 177 void sem_exit_ns(struct ipc_namespace *ns) 178 { 179 free_ipcs(ns, &sem_ids(ns), freeary); 180 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 181 } 182 #endif 183 184 void __init sem_init (void) 185 { 186 sem_init_ns(&init_ipc_ns); 187 ipc_init_proc_interface("sysvipc/sem", 188 " key semid perms nsems uid gid cuid cgid otime ctime\n", 189 IPC_SEM_IDS, sysvipc_sem_proc_show); 190 } 191 192 /* 193 * If the request contains only one semaphore operation, and there are 194 * no complex transactions pending, lock only the semaphore involved. 195 * Otherwise, lock the entire semaphore array, since we either have 196 * multiple semaphores in our own semops, or we need to look at 197 * semaphores from other pending complex operations. 198 * 199 * Carefully guard against sma->complex_count changing between zero 200 * and non-zero while we are spinning for the lock. The value of 201 * sma->complex_count cannot change while we are holding the lock, 202 * so sem_unlock should be fine. 203 * 204 * The global lock path checks that all the local locks have been released, 205 * checking each local lock once. This means that the local lock paths 206 * cannot start their critical sections while the global lock is held. 207 */ 208 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 209 int nsops) 210 { 211 int locknum; 212 again: 213 if (nsops == 1 && !sma->complex_count) { 214 struct sem *sem = sma->sem_base + sops->sem_num; 215 216 /* Lock just the semaphore we are interested in. */ 217 spin_lock(&sem->lock); 218 219 /* 220 * If sma->complex_count was set while we were spinning, 221 * we may need to look at things we did not lock here. 222 */ 223 if (unlikely(sma->complex_count)) { 224 spin_unlock(&sem->lock); 225 goto lock_array; 226 } 227 228 /* 229 * Another process is holding the global lock on the 230 * sem_array; we cannot enter our critical section, 231 * but have to wait for the global lock to be released. 232 */ 233 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 234 spin_unlock(&sem->lock); 235 spin_unlock_wait(&sma->sem_perm.lock); 236 goto again; 237 } 238 239 locknum = sops->sem_num; 240 } else { 241 int i; 242 /* 243 * Lock the semaphore array, and wait for all of the 244 * individual semaphore locks to go away. The code 245 * above ensures no new single-lock holders will enter 246 * their critical section while the array lock is held. 247 */ 248 lock_array: 249 spin_lock(&sma->sem_perm.lock); 250 for (i = 0; i < sma->sem_nsems; i++) { 251 struct sem *sem = sma->sem_base + i; 252 spin_unlock_wait(&sem->lock); 253 } 254 locknum = -1; 255 } 256 return locknum; 257 } 258 259 static inline void sem_unlock(struct sem_array *sma, int locknum) 260 { 261 if (locknum == -1) { 262 spin_unlock(&sma->sem_perm.lock); 263 } else { 264 struct sem *sem = sma->sem_base + locknum; 265 spin_unlock(&sem->lock); 266 } 267 } 268 269 /* 270 * sem_lock_(check_) routines are called in the paths where the rw_mutex 271 * is not held. 272 * 273 * The caller holds the RCU read lock. 274 */ 275 static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, 276 int id, struct sembuf *sops, int nsops, int *locknum) 277 { 278 struct kern_ipc_perm *ipcp; 279 struct sem_array *sma; 280 281 ipcp = ipc_obtain_object(&sem_ids(ns), id); 282 if (IS_ERR(ipcp)) 283 return ERR_CAST(ipcp); 284 285 sma = container_of(ipcp, struct sem_array, sem_perm); 286 *locknum = sem_lock(sma, sops, nsops); 287 288 /* ipc_rmid() may have already freed the ID while sem_lock 289 * was spinning: verify that the structure is still valid 290 */ 291 if (!ipcp->deleted) 292 return container_of(ipcp, struct sem_array, sem_perm); 293 294 sem_unlock(sma, *locknum); 295 return ERR_PTR(-EINVAL); 296 } 297 298 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 299 { 300 struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); 301 302 if (IS_ERR(ipcp)) 303 return ERR_CAST(ipcp); 304 305 return container_of(ipcp, struct sem_array, sem_perm); 306 } 307 308 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 309 int id) 310 { 311 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 312 313 if (IS_ERR(ipcp)) 314 return ERR_CAST(ipcp); 315 316 return container_of(ipcp, struct sem_array, sem_perm); 317 } 318 319 static inline void sem_lock_and_putref(struct sem_array *sma) 320 { 321 sem_lock(sma, NULL, -1); 322 ipc_rcu_putref(sma); 323 } 324 325 static inline void sem_putref(struct sem_array *sma) 326 { 327 ipc_rcu_putref(sma); 328 } 329 330 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 331 { 332 ipc_rmid(&sem_ids(ns), &s->sem_perm); 333 } 334 335 /* 336 * Lockless wakeup algorithm: 337 * Without the check/retry algorithm a lockless wakeup is possible: 338 * - queue.status is initialized to -EINTR before blocking. 339 * - wakeup is performed by 340 * * unlinking the queue entry from sma->sem_pending 341 * * setting queue.status to IN_WAKEUP 342 * This is the notification for the blocked thread that a 343 * result value is imminent. 344 * * call wake_up_process 345 * * set queue.status to the final value. 346 * - the previously blocked thread checks queue.status: 347 * * if it's IN_WAKEUP, then it must wait until the value changes 348 * * if it's not -EINTR, then the operation was completed by 349 * update_queue. semtimedop can return queue.status without 350 * performing any operation on the sem array. 351 * * otherwise it must acquire the spinlock and check what's up. 352 * 353 * The two-stage algorithm is necessary to protect against the following 354 * races: 355 * - if queue.status is set after wake_up_process, then the woken up idle 356 * thread could race forward and try (and fail) to acquire sma->lock 357 * before update_queue had a chance to set queue.status 358 * - if queue.status is written before wake_up_process and if the 359 * blocked process is woken up by a signal between writing 360 * queue.status and the wake_up_process, then the woken up 361 * process could return from semtimedop and die by calling 362 * sys_exit before wake_up_process is called. Then wake_up_process 363 * will oops, because the task structure is already invalid. 364 * (yes, this happened on s390 with sysv msg). 365 * 366 */ 367 #define IN_WAKEUP 1 368 369 /** 370 * newary - Create a new semaphore set 371 * @ns: namespace 372 * @params: ptr to the structure that contains key, semflg and nsems 373 * 374 * Called with sem_ids.rw_mutex held (as a writer) 375 */ 376 377 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 378 { 379 int id; 380 int retval; 381 struct sem_array *sma; 382 int size; 383 key_t key = params->key; 384 int nsems = params->u.nsems; 385 int semflg = params->flg; 386 int i; 387 388 if (!nsems) 389 return -EINVAL; 390 if (ns->used_sems + nsems > ns->sc_semmns) 391 return -ENOSPC; 392 393 size = sizeof (*sma) + nsems * sizeof (struct sem); 394 sma = ipc_rcu_alloc(size); 395 if (!sma) { 396 return -ENOMEM; 397 } 398 memset (sma, 0, size); 399 400 sma->sem_perm.mode = (semflg & S_IRWXUGO); 401 sma->sem_perm.key = key; 402 403 sma->sem_perm.security = NULL; 404 retval = security_sem_alloc(sma); 405 if (retval) { 406 ipc_rcu_putref(sma); 407 return retval; 408 } 409 410 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 411 if (id < 0) { 412 security_sem_free(sma); 413 ipc_rcu_putref(sma); 414 return id; 415 } 416 ns->used_sems += nsems; 417 418 sma->sem_base = (struct sem *) &sma[1]; 419 420 for (i = 0; i < nsems; i++) { 421 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 422 spin_lock_init(&sma->sem_base[i].lock); 423 } 424 425 sma->complex_count = 0; 426 INIT_LIST_HEAD(&sma->sem_pending); 427 INIT_LIST_HEAD(&sma->list_id); 428 sma->sem_nsems = nsems; 429 sma->sem_ctime = get_seconds(); 430 sem_unlock(sma, -1); 431 rcu_read_unlock(); 432 433 return sma->sem_perm.id; 434 } 435 436 437 /* 438 * Called with sem_ids.rw_mutex and ipcp locked. 439 */ 440 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 441 { 442 struct sem_array *sma; 443 444 sma = container_of(ipcp, struct sem_array, sem_perm); 445 return security_sem_associate(sma, semflg); 446 } 447 448 /* 449 * Called with sem_ids.rw_mutex and ipcp locked. 450 */ 451 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 452 struct ipc_params *params) 453 { 454 struct sem_array *sma; 455 456 sma = container_of(ipcp, struct sem_array, sem_perm); 457 if (params->u.nsems > sma->sem_nsems) 458 return -EINVAL; 459 460 return 0; 461 } 462 463 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 464 { 465 struct ipc_namespace *ns; 466 struct ipc_ops sem_ops; 467 struct ipc_params sem_params; 468 469 ns = current->nsproxy->ipc_ns; 470 471 if (nsems < 0 || nsems > ns->sc_semmsl) 472 return -EINVAL; 473 474 sem_ops.getnew = newary; 475 sem_ops.associate = sem_security; 476 sem_ops.more_checks = sem_more_checks; 477 478 sem_params.key = key; 479 sem_params.flg = semflg; 480 sem_params.u.nsems = nsems; 481 482 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 483 } 484 485 /* 486 * Determine whether a sequence of semaphore operations would succeed 487 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 488 */ 489 490 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 491 int nsops, struct sem_undo *un, int pid) 492 { 493 int result, sem_op; 494 struct sembuf *sop; 495 struct sem * curr; 496 497 for (sop = sops; sop < sops + nsops; sop++) { 498 curr = sma->sem_base + sop->sem_num; 499 sem_op = sop->sem_op; 500 result = curr->semval; 501 502 if (!sem_op && result) 503 goto would_block; 504 505 result += sem_op; 506 if (result < 0) 507 goto would_block; 508 if (result > SEMVMX) 509 goto out_of_range; 510 if (sop->sem_flg & SEM_UNDO) { 511 int undo = un->semadj[sop->sem_num] - sem_op; 512 /* 513 * Exceeding the undo range is an error. 514 */ 515 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 516 goto out_of_range; 517 } 518 curr->semval = result; 519 } 520 521 sop--; 522 while (sop >= sops) { 523 sma->sem_base[sop->sem_num].sempid = pid; 524 if (sop->sem_flg & SEM_UNDO) 525 un->semadj[sop->sem_num] -= sop->sem_op; 526 sop--; 527 } 528 529 return 0; 530 531 out_of_range: 532 result = -ERANGE; 533 goto undo; 534 535 would_block: 536 if (sop->sem_flg & IPC_NOWAIT) 537 result = -EAGAIN; 538 else 539 result = 1; 540 541 undo: 542 sop--; 543 while (sop >= sops) { 544 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 545 sop--; 546 } 547 548 return result; 549 } 550 551 /** wake_up_sem_queue_prepare(q, error): Prepare wake-up 552 * @q: queue entry that must be signaled 553 * @error: Error value for the signal 554 * 555 * Prepare the wake-up of the queue entry q. 556 */ 557 static void wake_up_sem_queue_prepare(struct list_head *pt, 558 struct sem_queue *q, int error) 559 { 560 if (list_empty(pt)) { 561 /* 562 * Hold preempt off so that we don't get preempted and have the 563 * wakee busy-wait until we're scheduled back on. 564 */ 565 preempt_disable(); 566 } 567 q->status = IN_WAKEUP; 568 q->pid = error; 569 570 list_add_tail(&q->list, pt); 571 } 572 573 /** 574 * wake_up_sem_queue_do(pt) - do the actual wake-up 575 * @pt: list of tasks to be woken up 576 * 577 * Do the actual wake-up. 578 * The function is called without any locks held, thus the semaphore array 579 * could be destroyed already and the tasks can disappear as soon as the 580 * status is set to the actual return code. 581 */ 582 static void wake_up_sem_queue_do(struct list_head *pt) 583 { 584 struct sem_queue *q, *t; 585 int did_something; 586 587 did_something = !list_empty(pt); 588 list_for_each_entry_safe(q, t, pt, list) { 589 wake_up_process(q->sleeper); 590 /* q can disappear immediately after writing q->status. */ 591 smp_wmb(); 592 q->status = q->pid; 593 } 594 if (did_something) 595 preempt_enable(); 596 } 597 598 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 599 { 600 list_del(&q->list); 601 if (q->nsops > 1) 602 sma->complex_count--; 603 } 604 605 /** check_restart(sma, q) 606 * @sma: semaphore array 607 * @q: the operation that just completed 608 * 609 * update_queue is O(N^2) when it restarts scanning the whole queue of 610 * waiting operations. Therefore this function checks if the restart is 611 * really necessary. It is called after a previously waiting operation 612 * was completed. 613 */ 614 static int check_restart(struct sem_array *sma, struct sem_queue *q) 615 { 616 struct sem *curr; 617 struct sem_queue *h; 618 619 /* if the operation didn't modify the array, then no restart */ 620 if (q->alter == 0) 621 return 0; 622 623 /* pending complex operations are too difficult to analyse */ 624 if (sma->complex_count) 625 return 1; 626 627 /* we were a sleeping complex operation. Too difficult */ 628 if (q->nsops > 1) 629 return 1; 630 631 curr = sma->sem_base + q->sops[0].sem_num; 632 633 /* No-one waits on this queue */ 634 if (list_empty(&curr->sem_pending)) 635 return 0; 636 637 /* the new semaphore value */ 638 if (curr->semval) { 639 /* It is impossible that someone waits for the new value: 640 * - q is a previously sleeping simple operation that 641 * altered the array. It must be a decrement, because 642 * simple increments never sleep. 643 * - The value is not 0, thus wait-for-zero won't proceed. 644 * - If there are older (higher priority) decrements 645 * in the queue, then they have observed the original 646 * semval value and couldn't proceed. The operation 647 * decremented to value - thus they won't proceed either. 648 */ 649 BUG_ON(q->sops[0].sem_op >= 0); 650 return 0; 651 } 652 /* 653 * semval is 0. Check if there are wait-for-zero semops. 654 * They must be the first entries in the per-semaphore queue 655 */ 656 h = list_first_entry(&curr->sem_pending, struct sem_queue, list); 657 BUG_ON(h->nsops != 1); 658 BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); 659 660 /* Yes, there is a wait-for-zero semop. Restart */ 661 if (h->sops[0].sem_op == 0) 662 return 1; 663 664 /* Again - no-one is waiting for the new value. */ 665 return 0; 666 } 667 668 669 /** 670 * update_queue(sma, semnum): Look for tasks that can be completed. 671 * @sma: semaphore array. 672 * @semnum: semaphore that was modified. 673 * @pt: list head for the tasks that must be woken up. 674 * 675 * update_queue must be called after a semaphore in a semaphore array 676 * was modified. If multiple semaphores were modified, update_queue must 677 * be called with semnum = -1, as well as with the number of each modified 678 * semaphore. 679 * The tasks that must be woken up are added to @pt. The return code 680 * is stored in q->pid. 681 * The function return 1 if at least one semop was completed successfully. 682 */ 683 static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) 684 { 685 struct sem_queue *q; 686 struct list_head *walk; 687 struct list_head *pending_list; 688 int semop_completed = 0; 689 690 if (semnum == -1) 691 pending_list = &sma->sem_pending; 692 else 693 pending_list = &sma->sem_base[semnum].sem_pending; 694 695 again: 696 walk = pending_list->next; 697 while (walk != pending_list) { 698 int error, restart; 699 700 q = container_of(walk, struct sem_queue, list); 701 walk = walk->next; 702 703 /* If we are scanning the single sop, per-semaphore list of 704 * one semaphore and that semaphore is 0, then it is not 705 * necessary to scan the "alter" entries: simple increments 706 * that affect only one entry succeed immediately and cannot 707 * be in the per semaphore pending queue, and decrements 708 * cannot be successful if the value is already 0. 709 */ 710 if (semnum != -1 && sma->sem_base[semnum].semval == 0 && 711 q->alter) 712 break; 713 714 error = try_atomic_semop(sma, q->sops, q->nsops, 715 q->undo, q->pid); 716 717 /* Does q->sleeper still need to sleep? */ 718 if (error > 0) 719 continue; 720 721 unlink_queue(sma, q); 722 723 if (error) { 724 restart = 0; 725 } else { 726 semop_completed = 1; 727 restart = check_restart(sma, q); 728 } 729 730 wake_up_sem_queue_prepare(pt, q, error); 731 if (restart) 732 goto again; 733 } 734 return semop_completed; 735 } 736 737 /** 738 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 739 * @sma: semaphore array 740 * @sops: operations that were performed 741 * @nsops: number of operations 742 * @otime: force setting otime 743 * @pt: list head of the tasks that must be woken up. 744 * 745 * do_smart_update() does the required called to update_queue, based on the 746 * actual changes that were performed on the semaphore array. 747 * Note that the function does not do the actual wake-up: the caller is 748 * responsible for calling wake_up_sem_queue_do(@pt). 749 * It is safe to perform this call after dropping all locks. 750 */ 751 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 752 int otime, struct list_head *pt) 753 { 754 int i; 755 int progress; 756 757 progress = 1; 758 retry_global: 759 if (sma->complex_count) { 760 if (update_queue(sma, -1, pt)) { 761 progress = 1; 762 otime = 1; 763 sops = NULL; 764 } 765 } 766 if (!progress) 767 goto done; 768 769 if (!sops) { 770 /* No semops; something special is going on. */ 771 for (i = 0; i < sma->sem_nsems; i++) { 772 if (update_queue(sma, i, pt)) { 773 otime = 1; 774 progress = 1; 775 } 776 } 777 goto done_checkretry; 778 } 779 780 /* Check the semaphores that were modified. */ 781 for (i = 0; i < nsops; i++) { 782 if (sops[i].sem_op > 0 || 783 (sops[i].sem_op < 0 && 784 sma->sem_base[sops[i].sem_num].semval == 0)) 785 if (update_queue(sma, sops[i].sem_num, pt)) { 786 otime = 1; 787 progress = 1; 788 } 789 } 790 done_checkretry: 791 if (progress) { 792 progress = 0; 793 goto retry_global; 794 } 795 done: 796 if (otime) 797 sma->sem_otime = get_seconds(); 798 } 799 800 801 /* The following counts are associated to each semaphore: 802 * semncnt number of tasks waiting on semval being nonzero 803 * semzcnt number of tasks waiting on semval being zero 804 * This model assumes that a task waits on exactly one semaphore. 805 * Since semaphore operations are to be performed atomically, tasks actually 806 * wait on a whole sequence of semaphores simultaneously. 807 * The counts we return here are a rough approximation, but still 808 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 809 */ 810 static int count_semncnt (struct sem_array * sma, ushort semnum) 811 { 812 int semncnt; 813 struct sem_queue * q; 814 815 semncnt = 0; 816 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 817 struct sembuf * sops = q->sops; 818 BUG_ON(sops->sem_num != semnum); 819 if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) 820 semncnt++; 821 } 822 823 list_for_each_entry(q, &sma->sem_pending, list) { 824 struct sembuf * sops = q->sops; 825 int nsops = q->nsops; 826 int i; 827 for (i = 0; i < nsops; i++) 828 if (sops[i].sem_num == semnum 829 && (sops[i].sem_op < 0) 830 && !(sops[i].sem_flg & IPC_NOWAIT)) 831 semncnt++; 832 } 833 return semncnt; 834 } 835 836 static int count_semzcnt (struct sem_array * sma, ushort semnum) 837 { 838 int semzcnt; 839 struct sem_queue * q; 840 841 semzcnt = 0; 842 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 843 struct sembuf * sops = q->sops; 844 BUG_ON(sops->sem_num != semnum); 845 if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) 846 semzcnt++; 847 } 848 849 list_for_each_entry(q, &sma->sem_pending, list) { 850 struct sembuf * sops = q->sops; 851 int nsops = q->nsops; 852 int i; 853 for (i = 0; i < nsops; i++) 854 if (sops[i].sem_num == semnum 855 && (sops[i].sem_op == 0) 856 && !(sops[i].sem_flg & IPC_NOWAIT)) 857 semzcnt++; 858 } 859 return semzcnt; 860 } 861 862 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 863 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 864 * remains locked on exit. 865 */ 866 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 867 { 868 struct sem_undo *un, *tu; 869 struct sem_queue *q, *tq; 870 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 871 struct list_head tasks; 872 int i; 873 874 /* Free the existing undo structures for this semaphore set. */ 875 assert_spin_locked(&sma->sem_perm.lock); 876 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 877 list_del(&un->list_id); 878 spin_lock(&un->ulp->lock); 879 un->semid = -1; 880 list_del_rcu(&un->list_proc); 881 spin_unlock(&un->ulp->lock); 882 kfree_rcu(un, rcu); 883 } 884 885 /* Wake up all pending processes and let them fail with EIDRM. */ 886 INIT_LIST_HEAD(&tasks); 887 list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { 888 unlink_queue(sma, q); 889 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 890 } 891 for (i = 0; i < sma->sem_nsems; i++) { 892 struct sem *sem = sma->sem_base + i; 893 list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { 894 unlink_queue(sma, q); 895 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 896 } 897 } 898 899 /* Remove the semaphore set from the IDR */ 900 sem_rmid(ns, sma); 901 sem_unlock(sma, -1); 902 rcu_read_unlock(); 903 904 wake_up_sem_queue_do(&tasks); 905 ns->used_sems -= sma->sem_nsems; 906 security_sem_free(sma); 907 ipc_rcu_putref(sma); 908 } 909 910 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 911 { 912 switch(version) { 913 case IPC_64: 914 return copy_to_user(buf, in, sizeof(*in)); 915 case IPC_OLD: 916 { 917 struct semid_ds out; 918 919 memset(&out, 0, sizeof(out)); 920 921 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 922 923 out.sem_otime = in->sem_otime; 924 out.sem_ctime = in->sem_ctime; 925 out.sem_nsems = in->sem_nsems; 926 927 return copy_to_user(buf, &out, sizeof(out)); 928 } 929 default: 930 return -EINVAL; 931 } 932 } 933 934 static int semctl_nolock(struct ipc_namespace *ns, int semid, 935 int cmd, int version, void __user *p) 936 { 937 int err; 938 struct sem_array *sma; 939 940 switch(cmd) { 941 case IPC_INFO: 942 case SEM_INFO: 943 { 944 struct seminfo seminfo; 945 int max_id; 946 947 err = security_sem_semctl(NULL, cmd); 948 if (err) 949 return err; 950 951 memset(&seminfo,0,sizeof(seminfo)); 952 seminfo.semmni = ns->sc_semmni; 953 seminfo.semmns = ns->sc_semmns; 954 seminfo.semmsl = ns->sc_semmsl; 955 seminfo.semopm = ns->sc_semopm; 956 seminfo.semvmx = SEMVMX; 957 seminfo.semmnu = SEMMNU; 958 seminfo.semmap = SEMMAP; 959 seminfo.semume = SEMUME; 960 down_read(&sem_ids(ns).rw_mutex); 961 if (cmd == SEM_INFO) { 962 seminfo.semusz = sem_ids(ns).in_use; 963 seminfo.semaem = ns->used_sems; 964 } else { 965 seminfo.semusz = SEMUSZ; 966 seminfo.semaem = SEMAEM; 967 } 968 max_id = ipc_get_maxid(&sem_ids(ns)); 969 up_read(&sem_ids(ns).rw_mutex); 970 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 971 return -EFAULT; 972 return (max_id < 0) ? 0: max_id; 973 } 974 case IPC_STAT: 975 case SEM_STAT: 976 { 977 struct semid64_ds tbuf; 978 int id = 0; 979 980 memset(&tbuf, 0, sizeof(tbuf)); 981 982 rcu_read_lock(); 983 if (cmd == SEM_STAT) { 984 sma = sem_obtain_object(ns, semid); 985 if (IS_ERR(sma)) { 986 err = PTR_ERR(sma); 987 goto out_unlock; 988 } 989 id = sma->sem_perm.id; 990 } else { 991 sma = sem_obtain_object_check(ns, semid); 992 if (IS_ERR(sma)) { 993 err = PTR_ERR(sma); 994 goto out_unlock; 995 } 996 } 997 998 err = -EACCES; 999 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 1000 goto out_unlock; 1001 1002 err = security_sem_semctl(sma, cmd); 1003 if (err) 1004 goto out_unlock; 1005 1006 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 1007 tbuf.sem_otime = sma->sem_otime; 1008 tbuf.sem_ctime = sma->sem_ctime; 1009 tbuf.sem_nsems = sma->sem_nsems; 1010 rcu_read_unlock(); 1011 if (copy_semid_to_user(p, &tbuf, version)) 1012 return -EFAULT; 1013 return id; 1014 } 1015 default: 1016 return -EINVAL; 1017 } 1018 out_unlock: 1019 rcu_read_unlock(); 1020 return err; 1021 } 1022 1023 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1024 unsigned long arg) 1025 { 1026 struct sem_undo *un; 1027 struct sem_array *sma; 1028 struct sem* curr; 1029 int err; 1030 struct list_head tasks; 1031 int val; 1032 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1033 /* big-endian 64bit */ 1034 val = arg >> 32; 1035 #else 1036 /* 32bit or little-endian 64bit */ 1037 val = arg; 1038 #endif 1039 1040 if (val > SEMVMX || val < 0) 1041 return -ERANGE; 1042 1043 INIT_LIST_HEAD(&tasks); 1044 1045 rcu_read_lock(); 1046 sma = sem_obtain_object_check(ns, semid); 1047 if (IS_ERR(sma)) { 1048 rcu_read_unlock(); 1049 return PTR_ERR(sma); 1050 } 1051 1052 if (semnum < 0 || semnum >= sma->sem_nsems) { 1053 rcu_read_unlock(); 1054 return -EINVAL; 1055 } 1056 1057 1058 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1059 rcu_read_unlock(); 1060 return -EACCES; 1061 } 1062 1063 err = security_sem_semctl(sma, SETVAL); 1064 if (err) { 1065 rcu_read_unlock(); 1066 return -EACCES; 1067 } 1068 1069 sem_lock(sma, NULL, -1); 1070 1071 curr = &sma->sem_base[semnum]; 1072 1073 assert_spin_locked(&sma->sem_perm.lock); 1074 list_for_each_entry(un, &sma->list_id, list_id) 1075 un->semadj[semnum] = 0; 1076 1077 curr->semval = val; 1078 curr->sempid = task_tgid_vnr(current); 1079 sma->sem_ctime = get_seconds(); 1080 /* maybe some queued-up processes were waiting for this */ 1081 do_smart_update(sma, NULL, 0, 0, &tasks); 1082 sem_unlock(sma, -1); 1083 rcu_read_unlock(); 1084 wake_up_sem_queue_do(&tasks); 1085 return 0; 1086 } 1087 1088 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1089 int cmd, void __user *p) 1090 { 1091 struct sem_array *sma; 1092 struct sem* curr; 1093 int err, nsems; 1094 ushort fast_sem_io[SEMMSL_FAST]; 1095 ushort* sem_io = fast_sem_io; 1096 struct list_head tasks; 1097 1098 INIT_LIST_HEAD(&tasks); 1099 1100 rcu_read_lock(); 1101 sma = sem_obtain_object_check(ns, semid); 1102 if (IS_ERR(sma)) { 1103 rcu_read_unlock(); 1104 return PTR_ERR(sma); 1105 } 1106 1107 nsems = sma->sem_nsems; 1108 1109 err = -EACCES; 1110 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1111 goto out_rcu_wakeup; 1112 1113 err = security_sem_semctl(sma, cmd); 1114 if (err) 1115 goto out_rcu_wakeup; 1116 1117 err = -EACCES; 1118 switch (cmd) { 1119 case GETALL: 1120 { 1121 ushort __user *array = p; 1122 int i; 1123 1124 sem_lock(sma, NULL, -1); 1125 if(nsems > SEMMSL_FAST) { 1126 if (!ipc_rcu_getref(sma)) { 1127 sem_unlock(sma, -1); 1128 rcu_read_unlock(); 1129 err = -EIDRM; 1130 goto out_free; 1131 } 1132 sem_unlock(sma, -1); 1133 rcu_read_unlock(); 1134 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1135 if(sem_io == NULL) { 1136 sem_putref(sma); 1137 return -ENOMEM; 1138 } 1139 1140 rcu_read_lock(); 1141 sem_lock_and_putref(sma); 1142 if (sma->sem_perm.deleted) { 1143 sem_unlock(sma, -1); 1144 rcu_read_unlock(); 1145 err = -EIDRM; 1146 goto out_free; 1147 } 1148 } 1149 for (i = 0; i < sma->sem_nsems; i++) 1150 sem_io[i] = sma->sem_base[i].semval; 1151 sem_unlock(sma, -1); 1152 rcu_read_unlock(); 1153 err = 0; 1154 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1155 err = -EFAULT; 1156 goto out_free; 1157 } 1158 case SETALL: 1159 { 1160 int i; 1161 struct sem_undo *un; 1162 1163 if (!ipc_rcu_getref(sma)) { 1164 rcu_read_unlock(); 1165 return -EIDRM; 1166 } 1167 rcu_read_unlock(); 1168 1169 if(nsems > SEMMSL_FAST) { 1170 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1171 if(sem_io == NULL) { 1172 sem_putref(sma); 1173 return -ENOMEM; 1174 } 1175 } 1176 1177 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1178 sem_putref(sma); 1179 err = -EFAULT; 1180 goto out_free; 1181 } 1182 1183 for (i = 0; i < nsems; i++) { 1184 if (sem_io[i] > SEMVMX) { 1185 sem_putref(sma); 1186 err = -ERANGE; 1187 goto out_free; 1188 } 1189 } 1190 rcu_read_lock(); 1191 sem_lock_and_putref(sma); 1192 if (sma->sem_perm.deleted) { 1193 sem_unlock(sma, -1); 1194 rcu_read_unlock(); 1195 err = -EIDRM; 1196 goto out_free; 1197 } 1198 1199 for (i = 0; i < nsems; i++) 1200 sma->sem_base[i].semval = sem_io[i]; 1201 1202 assert_spin_locked(&sma->sem_perm.lock); 1203 list_for_each_entry(un, &sma->list_id, list_id) { 1204 for (i = 0; i < nsems; i++) 1205 un->semadj[i] = 0; 1206 } 1207 sma->sem_ctime = get_seconds(); 1208 /* maybe some queued-up processes were waiting for this */ 1209 do_smart_update(sma, NULL, 0, 0, &tasks); 1210 err = 0; 1211 goto out_unlock; 1212 } 1213 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1214 } 1215 err = -EINVAL; 1216 if (semnum < 0 || semnum >= nsems) 1217 goto out_rcu_wakeup; 1218 1219 sem_lock(sma, NULL, -1); 1220 curr = &sma->sem_base[semnum]; 1221 1222 switch (cmd) { 1223 case GETVAL: 1224 err = curr->semval; 1225 goto out_unlock; 1226 case GETPID: 1227 err = curr->sempid; 1228 goto out_unlock; 1229 case GETNCNT: 1230 err = count_semncnt(sma,semnum); 1231 goto out_unlock; 1232 case GETZCNT: 1233 err = count_semzcnt(sma,semnum); 1234 goto out_unlock; 1235 } 1236 1237 out_unlock: 1238 sem_unlock(sma, -1); 1239 out_rcu_wakeup: 1240 rcu_read_unlock(); 1241 wake_up_sem_queue_do(&tasks); 1242 out_free: 1243 if(sem_io != fast_sem_io) 1244 ipc_free(sem_io, sizeof(ushort)*nsems); 1245 return err; 1246 } 1247 1248 static inline unsigned long 1249 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1250 { 1251 switch(version) { 1252 case IPC_64: 1253 if (copy_from_user(out, buf, sizeof(*out))) 1254 return -EFAULT; 1255 return 0; 1256 case IPC_OLD: 1257 { 1258 struct semid_ds tbuf_old; 1259 1260 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1261 return -EFAULT; 1262 1263 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1264 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1265 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1266 1267 return 0; 1268 } 1269 default: 1270 return -EINVAL; 1271 } 1272 } 1273 1274 /* 1275 * This function handles some semctl commands which require the rw_mutex 1276 * to be held in write mode. 1277 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 1278 */ 1279 static int semctl_down(struct ipc_namespace *ns, int semid, 1280 int cmd, int version, void __user *p) 1281 { 1282 struct sem_array *sma; 1283 int err; 1284 struct semid64_ds semid64; 1285 struct kern_ipc_perm *ipcp; 1286 1287 if(cmd == IPC_SET) { 1288 if (copy_semid_from_user(&semid64, p, version)) 1289 return -EFAULT; 1290 } 1291 1292 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1293 &semid64.sem_perm, 0); 1294 if (IS_ERR(ipcp)) 1295 return PTR_ERR(ipcp); 1296 1297 sma = container_of(ipcp, struct sem_array, sem_perm); 1298 1299 err = security_sem_semctl(sma, cmd); 1300 if (err) { 1301 rcu_read_unlock(); 1302 goto out_up; 1303 } 1304 1305 switch(cmd){ 1306 case IPC_RMID: 1307 sem_lock(sma, NULL, -1); 1308 freeary(ns, ipcp); 1309 goto out_up; 1310 case IPC_SET: 1311 sem_lock(sma, NULL, -1); 1312 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1313 if (err) 1314 goto out_unlock; 1315 sma->sem_ctime = get_seconds(); 1316 break; 1317 default: 1318 rcu_read_unlock(); 1319 err = -EINVAL; 1320 goto out_up; 1321 } 1322 1323 out_unlock: 1324 sem_unlock(sma, -1); 1325 rcu_read_unlock(); 1326 out_up: 1327 up_write(&sem_ids(ns).rw_mutex); 1328 return err; 1329 } 1330 1331 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1332 { 1333 int version; 1334 struct ipc_namespace *ns; 1335 void __user *p = (void __user *)arg; 1336 1337 if (semid < 0) 1338 return -EINVAL; 1339 1340 version = ipc_parse_version(&cmd); 1341 ns = current->nsproxy->ipc_ns; 1342 1343 switch(cmd) { 1344 case IPC_INFO: 1345 case SEM_INFO: 1346 case IPC_STAT: 1347 case SEM_STAT: 1348 return semctl_nolock(ns, semid, cmd, version, p); 1349 case GETALL: 1350 case GETVAL: 1351 case GETPID: 1352 case GETNCNT: 1353 case GETZCNT: 1354 case SETALL: 1355 return semctl_main(ns, semid, semnum, cmd, p); 1356 case SETVAL: 1357 return semctl_setval(ns, semid, semnum, arg); 1358 case IPC_RMID: 1359 case IPC_SET: 1360 return semctl_down(ns, semid, cmd, version, p); 1361 default: 1362 return -EINVAL; 1363 } 1364 } 1365 1366 /* If the task doesn't already have a undo_list, then allocate one 1367 * here. We guarantee there is only one thread using this undo list, 1368 * and current is THE ONE 1369 * 1370 * If this allocation and assignment succeeds, but later 1371 * portions of this code fail, there is no need to free the sem_undo_list. 1372 * Just let it stay associated with the task, and it'll be freed later 1373 * at exit time. 1374 * 1375 * This can block, so callers must hold no locks. 1376 */ 1377 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1378 { 1379 struct sem_undo_list *undo_list; 1380 1381 undo_list = current->sysvsem.undo_list; 1382 if (!undo_list) { 1383 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1384 if (undo_list == NULL) 1385 return -ENOMEM; 1386 spin_lock_init(&undo_list->lock); 1387 atomic_set(&undo_list->refcnt, 1); 1388 INIT_LIST_HEAD(&undo_list->list_proc); 1389 1390 current->sysvsem.undo_list = undo_list; 1391 } 1392 *undo_listp = undo_list; 1393 return 0; 1394 } 1395 1396 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1397 { 1398 struct sem_undo *un; 1399 1400 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1401 if (un->semid == semid) 1402 return un; 1403 } 1404 return NULL; 1405 } 1406 1407 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1408 { 1409 struct sem_undo *un; 1410 1411 assert_spin_locked(&ulp->lock); 1412 1413 un = __lookup_undo(ulp, semid); 1414 if (un) { 1415 list_del_rcu(&un->list_proc); 1416 list_add_rcu(&un->list_proc, &ulp->list_proc); 1417 } 1418 return un; 1419 } 1420 1421 /** 1422 * find_alloc_undo - Lookup (and if not present create) undo array 1423 * @ns: namespace 1424 * @semid: semaphore array id 1425 * 1426 * The function looks up (and if not present creates) the undo structure. 1427 * The size of the undo structure depends on the size of the semaphore 1428 * array, thus the alloc path is not that straightforward. 1429 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1430 * performs a rcu_read_lock(). 1431 */ 1432 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1433 { 1434 struct sem_array *sma; 1435 struct sem_undo_list *ulp; 1436 struct sem_undo *un, *new; 1437 int nsems, error; 1438 1439 error = get_undo_list(&ulp); 1440 if (error) 1441 return ERR_PTR(error); 1442 1443 rcu_read_lock(); 1444 spin_lock(&ulp->lock); 1445 un = lookup_undo(ulp, semid); 1446 spin_unlock(&ulp->lock); 1447 if (likely(un!=NULL)) 1448 goto out; 1449 1450 /* no undo structure around - allocate one. */ 1451 /* step 1: figure out the size of the semaphore array */ 1452 sma = sem_obtain_object_check(ns, semid); 1453 if (IS_ERR(sma)) { 1454 rcu_read_unlock(); 1455 return ERR_CAST(sma); 1456 } 1457 1458 nsems = sma->sem_nsems; 1459 if (!ipc_rcu_getref(sma)) { 1460 rcu_read_unlock(); 1461 un = ERR_PTR(-EIDRM); 1462 goto out; 1463 } 1464 rcu_read_unlock(); 1465 1466 /* step 2: allocate new undo structure */ 1467 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1468 if (!new) { 1469 sem_putref(sma); 1470 return ERR_PTR(-ENOMEM); 1471 } 1472 1473 /* step 3: Acquire the lock on semaphore array */ 1474 rcu_read_lock(); 1475 sem_lock_and_putref(sma); 1476 if (sma->sem_perm.deleted) { 1477 sem_unlock(sma, -1); 1478 rcu_read_unlock(); 1479 kfree(new); 1480 un = ERR_PTR(-EIDRM); 1481 goto out; 1482 } 1483 spin_lock(&ulp->lock); 1484 1485 /* 1486 * step 4: check for races: did someone else allocate the undo struct? 1487 */ 1488 un = lookup_undo(ulp, semid); 1489 if (un) { 1490 kfree(new); 1491 goto success; 1492 } 1493 /* step 5: initialize & link new undo structure */ 1494 new->semadj = (short *) &new[1]; 1495 new->ulp = ulp; 1496 new->semid = semid; 1497 assert_spin_locked(&ulp->lock); 1498 list_add_rcu(&new->list_proc, &ulp->list_proc); 1499 assert_spin_locked(&sma->sem_perm.lock); 1500 list_add(&new->list_id, &sma->list_id); 1501 un = new; 1502 1503 success: 1504 spin_unlock(&ulp->lock); 1505 sem_unlock(sma, -1); 1506 out: 1507 return un; 1508 } 1509 1510 1511 /** 1512 * get_queue_result - Retrieve the result code from sem_queue 1513 * @q: Pointer to queue structure 1514 * 1515 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in 1516 * q->status, then we must loop until the value is replaced with the final 1517 * value: This may happen if a task is woken up by an unrelated event (e.g. 1518 * signal) and in parallel the task is woken up by another task because it got 1519 * the requested semaphores. 1520 * 1521 * The function can be called with or without holding the semaphore spinlock. 1522 */ 1523 static int get_queue_result(struct sem_queue *q) 1524 { 1525 int error; 1526 1527 error = q->status; 1528 while (unlikely(error == IN_WAKEUP)) { 1529 cpu_relax(); 1530 error = q->status; 1531 } 1532 1533 return error; 1534 } 1535 1536 1537 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 1538 unsigned, nsops, const struct timespec __user *, timeout) 1539 { 1540 int error = -EINVAL; 1541 struct sem_array *sma; 1542 struct sembuf fast_sops[SEMOPM_FAST]; 1543 struct sembuf* sops = fast_sops, *sop; 1544 struct sem_undo *un; 1545 int undos = 0, alter = 0, max, locknum; 1546 struct sem_queue queue; 1547 unsigned long jiffies_left = 0; 1548 struct ipc_namespace *ns; 1549 struct list_head tasks; 1550 1551 ns = current->nsproxy->ipc_ns; 1552 1553 if (nsops < 1 || semid < 0) 1554 return -EINVAL; 1555 if (nsops > ns->sc_semopm) 1556 return -E2BIG; 1557 if(nsops > SEMOPM_FAST) { 1558 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1559 if(sops==NULL) 1560 return -ENOMEM; 1561 } 1562 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1563 error=-EFAULT; 1564 goto out_free; 1565 } 1566 if (timeout) { 1567 struct timespec _timeout; 1568 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1569 error = -EFAULT; 1570 goto out_free; 1571 } 1572 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1573 _timeout.tv_nsec >= 1000000000L) { 1574 error = -EINVAL; 1575 goto out_free; 1576 } 1577 jiffies_left = timespec_to_jiffies(&_timeout); 1578 } 1579 max = 0; 1580 for (sop = sops; sop < sops + nsops; sop++) { 1581 if (sop->sem_num >= max) 1582 max = sop->sem_num; 1583 if (sop->sem_flg & SEM_UNDO) 1584 undos = 1; 1585 if (sop->sem_op != 0) 1586 alter = 1; 1587 } 1588 1589 INIT_LIST_HEAD(&tasks); 1590 1591 if (undos) { 1592 /* On success, find_alloc_undo takes the rcu_read_lock */ 1593 un = find_alloc_undo(ns, semid); 1594 if (IS_ERR(un)) { 1595 error = PTR_ERR(un); 1596 goto out_free; 1597 } 1598 } else { 1599 un = NULL; 1600 rcu_read_lock(); 1601 } 1602 1603 sma = sem_obtain_object_check(ns, semid); 1604 if (IS_ERR(sma)) { 1605 rcu_read_unlock(); 1606 error = PTR_ERR(sma); 1607 goto out_free; 1608 } 1609 1610 error = -EFBIG; 1611 if (max >= sma->sem_nsems) 1612 goto out_rcu_wakeup; 1613 1614 error = -EACCES; 1615 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1616 goto out_rcu_wakeup; 1617 1618 error = security_sem_semop(sma, sops, nsops, alter); 1619 if (error) 1620 goto out_rcu_wakeup; 1621 1622 /* 1623 * semid identifiers are not unique - find_alloc_undo may have 1624 * allocated an undo structure, it was invalidated by an RMID 1625 * and now a new array with received the same id. Check and fail. 1626 * This case can be detected checking un->semid. The existence of 1627 * "un" itself is guaranteed by rcu. 1628 */ 1629 error = -EIDRM; 1630 locknum = sem_lock(sma, sops, nsops); 1631 if (un && un->semid == -1) 1632 goto out_unlock_free; 1633 1634 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1635 if (error <= 0) { 1636 if (alter && error == 0) 1637 do_smart_update(sma, sops, nsops, 1, &tasks); 1638 1639 goto out_unlock_free; 1640 } 1641 1642 /* We need to sleep on this operation, so we put the current 1643 * task into the pending queue and go to sleep. 1644 */ 1645 1646 queue.sops = sops; 1647 queue.nsops = nsops; 1648 queue.undo = un; 1649 queue.pid = task_tgid_vnr(current); 1650 queue.alter = alter; 1651 1652 if (nsops == 1) { 1653 struct sem *curr; 1654 curr = &sma->sem_base[sops->sem_num]; 1655 1656 if (alter) 1657 list_add_tail(&queue.list, &curr->sem_pending); 1658 else 1659 list_add(&queue.list, &curr->sem_pending); 1660 } else { 1661 if (alter) 1662 list_add_tail(&queue.list, &sma->sem_pending); 1663 else 1664 list_add(&queue.list, &sma->sem_pending); 1665 sma->complex_count++; 1666 } 1667 1668 queue.status = -EINTR; 1669 queue.sleeper = current; 1670 1671 sleep_again: 1672 current->state = TASK_INTERRUPTIBLE; 1673 sem_unlock(sma, locknum); 1674 rcu_read_unlock(); 1675 1676 if (timeout) 1677 jiffies_left = schedule_timeout(jiffies_left); 1678 else 1679 schedule(); 1680 1681 error = get_queue_result(&queue); 1682 1683 if (error != -EINTR) { 1684 /* fast path: update_queue already obtained all requested 1685 * resources. 1686 * Perform a smp_mb(): User space could assume that semop() 1687 * is a memory barrier: Without the mb(), the cpu could 1688 * speculatively read in user space stale data that was 1689 * overwritten by the previous owner of the semaphore. 1690 */ 1691 smp_mb(); 1692 1693 goto out_free; 1694 } 1695 1696 rcu_read_lock(); 1697 sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); 1698 1699 /* 1700 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. 1701 */ 1702 error = get_queue_result(&queue); 1703 1704 /* 1705 * Array removed? If yes, leave without sem_unlock(). 1706 */ 1707 if (IS_ERR(sma)) { 1708 rcu_read_unlock(); 1709 goto out_free; 1710 } 1711 1712 1713 /* 1714 * If queue.status != -EINTR we are woken up by another process. 1715 * Leave without unlink_queue(), but with sem_unlock(). 1716 */ 1717 1718 if (error != -EINTR) { 1719 goto out_unlock_free; 1720 } 1721 1722 /* 1723 * If an interrupt occurred we have to clean up the queue 1724 */ 1725 if (timeout && jiffies_left == 0) 1726 error = -EAGAIN; 1727 1728 /* 1729 * If the wakeup was spurious, just retry 1730 */ 1731 if (error == -EINTR && !signal_pending(current)) 1732 goto sleep_again; 1733 1734 unlink_queue(sma, &queue); 1735 1736 out_unlock_free: 1737 sem_unlock(sma, locknum); 1738 out_rcu_wakeup: 1739 rcu_read_unlock(); 1740 wake_up_sem_queue_do(&tasks); 1741 out_free: 1742 if(sops != fast_sops) 1743 kfree(sops); 1744 return error; 1745 } 1746 1747 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 1748 unsigned, nsops) 1749 { 1750 return sys_semtimedop(semid, tsops, nsops, NULL); 1751 } 1752 1753 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1754 * parent and child tasks. 1755 */ 1756 1757 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1758 { 1759 struct sem_undo_list *undo_list; 1760 int error; 1761 1762 if (clone_flags & CLONE_SYSVSEM) { 1763 error = get_undo_list(&undo_list); 1764 if (error) 1765 return error; 1766 atomic_inc(&undo_list->refcnt); 1767 tsk->sysvsem.undo_list = undo_list; 1768 } else 1769 tsk->sysvsem.undo_list = NULL; 1770 1771 return 0; 1772 } 1773 1774 /* 1775 * add semadj values to semaphores, free undo structures. 1776 * undo structures are not freed when semaphore arrays are destroyed 1777 * so some of them may be out of date. 1778 * IMPLEMENTATION NOTE: There is some confusion over whether the 1779 * set of adjustments that needs to be done should be done in an atomic 1780 * manner or not. That is, if we are attempting to decrement the semval 1781 * should we queue up and wait until we can do so legally? 1782 * The original implementation attempted to do this (queue and wait). 1783 * The current implementation does not do so. The POSIX standard 1784 * and SVID should be consulted to determine what behavior is mandated. 1785 */ 1786 void exit_sem(struct task_struct *tsk) 1787 { 1788 struct sem_undo_list *ulp; 1789 1790 ulp = tsk->sysvsem.undo_list; 1791 if (!ulp) 1792 return; 1793 tsk->sysvsem.undo_list = NULL; 1794 1795 if (!atomic_dec_and_test(&ulp->refcnt)) 1796 return; 1797 1798 for (;;) { 1799 struct sem_array *sma; 1800 struct sem_undo *un; 1801 struct list_head tasks; 1802 int semid, i; 1803 1804 rcu_read_lock(); 1805 un = list_entry_rcu(ulp->list_proc.next, 1806 struct sem_undo, list_proc); 1807 if (&un->list_proc == &ulp->list_proc) 1808 semid = -1; 1809 else 1810 semid = un->semid; 1811 1812 if (semid == -1) { 1813 rcu_read_unlock(); 1814 break; 1815 } 1816 1817 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); 1818 /* exit_sem raced with IPC_RMID, nothing to do */ 1819 if (IS_ERR(sma)) { 1820 rcu_read_unlock(); 1821 continue; 1822 } 1823 1824 sem_lock(sma, NULL, -1); 1825 un = __lookup_undo(ulp, semid); 1826 if (un == NULL) { 1827 /* exit_sem raced with IPC_RMID+semget() that created 1828 * exactly the same semid. Nothing to do. 1829 */ 1830 sem_unlock(sma, -1); 1831 rcu_read_unlock(); 1832 continue; 1833 } 1834 1835 /* remove un from the linked lists */ 1836 assert_spin_locked(&sma->sem_perm.lock); 1837 list_del(&un->list_id); 1838 1839 spin_lock(&ulp->lock); 1840 list_del_rcu(&un->list_proc); 1841 spin_unlock(&ulp->lock); 1842 1843 /* perform adjustments registered in un */ 1844 for (i = 0; i < sma->sem_nsems; i++) { 1845 struct sem * semaphore = &sma->sem_base[i]; 1846 if (un->semadj[i]) { 1847 semaphore->semval += un->semadj[i]; 1848 /* 1849 * Range checks of the new semaphore value, 1850 * not defined by sus: 1851 * - Some unices ignore the undo entirely 1852 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1853 * - some cap the value (e.g. FreeBSD caps 1854 * at 0, but doesn't enforce SEMVMX) 1855 * 1856 * Linux caps the semaphore value, both at 0 1857 * and at SEMVMX. 1858 * 1859 * Manfred <manfred@colorfullife.com> 1860 */ 1861 if (semaphore->semval < 0) 1862 semaphore->semval = 0; 1863 if (semaphore->semval > SEMVMX) 1864 semaphore->semval = SEMVMX; 1865 semaphore->sempid = task_tgid_vnr(current); 1866 } 1867 } 1868 /* maybe some queued-up processes were waiting for this */ 1869 INIT_LIST_HEAD(&tasks); 1870 do_smart_update(sma, NULL, 0, 1, &tasks); 1871 sem_unlock(sma, -1); 1872 rcu_read_unlock(); 1873 wake_up_sem_queue_do(&tasks); 1874 1875 kfree_rcu(un, rcu); 1876 } 1877 kfree(ulp); 1878 } 1879 1880 #ifdef CONFIG_PROC_FS 1881 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1882 { 1883 struct user_namespace *user_ns = seq_user_ns(s); 1884 struct sem_array *sma = it; 1885 1886 return seq_printf(s, 1887 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 1888 sma->sem_perm.key, 1889 sma->sem_perm.id, 1890 sma->sem_perm.mode, 1891 sma->sem_nsems, 1892 from_kuid_munged(user_ns, sma->sem_perm.uid), 1893 from_kgid_munged(user_ns, sma->sem_perm.gid), 1894 from_kuid_munged(user_ns, sma->sem_perm.cuid), 1895 from_kgid_munged(user_ns, sma->sem_perm.cgid), 1896 sma->sem_otime, 1897 sma->sem_ctime); 1898 } 1899 #endif 1900