1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 7 * 8 * SMP-threaded, sysctl's added 9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 10 * Enforced range limit on SEM_UNDO 11 * (c) 2001 Red Hat Inc 12 * Lockless wakeup 13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 14 * Further wakeup optimizations, documentation 15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Implementation notes: (May 2010) 25 * This file implements System V semaphores. 26 * 27 * User space visible behavior: 28 * - FIFO ordering for semop() operations (just FIFO, not starvation 29 * protection) 30 * - multiple semaphore operations that alter the same semaphore in 31 * one semop() are handled. 32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 33 * SETALL calls. 34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 35 * - undo adjustments at process exit are limited to 0..SEMVMX. 36 * - namespace are supported. 37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 38 * to /proc/sys/kernel/sem. 39 * - statistics about the usage are reported in /proc/sysvipc/sem. 40 * 41 * Internals: 42 * - scalability: 43 * - all global variables are read-mostly. 44 * - semop() calls and semctl(RMID) are synchronized by RCU. 45 * - most operations do write operations (actually: spin_lock calls) to 46 * the per-semaphore array structure. 47 * Thus: Perfect SMP scaling between independent semaphore arrays. 48 * If multiple semaphores in one array are used, then cache line 49 * trashing on the semaphore array spinlock will limit the scaling. 50 * - semncnt and semzcnt are calculated on demand in count_semncnt() and 51 * count_semzcnt() 52 * - the task that performs a successful semop() scans the list of all 53 * sleeping tasks and completes any pending operations that can be fulfilled. 54 * Semaphores are actively given to waiting tasks (necessary for FIFO). 55 * (see update_queue()) 56 * - To improve the scalability, the actual wake-up calls are performed after 57 * dropping all locks. (see wake_up_sem_queue_prepare(), 58 * wake_up_sem_queue_do()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - The synchronizations between wake-ups due to a timeout/signal and a 64 * wake-up due to a completed semaphore operation is achieved by using an 65 * intermediate state (IN_WAKEUP). 66 * - UNDO values are stored in an array (one per process and per 67 * semaphore array, lazily allocated). For backwards compatibility, multiple 68 * modes for the UNDO variables are supported (per process, per thread) 69 * (see copy_semundo, CLONE_SYSVSEM) 70 * - There are two lists of the pending operations: a per-array list 71 * and per-semaphore list (stored in the array). This allows to achieve FIFO 72 * ordering without always scanning all pending operations. 73 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 74 */ 75 76 #include <linux/slab.h> 77 #include <linux/spinlock.h> 78 #include <linux/init.h> 79 #include <linux/proc_fs.h> 80 #include <linux/time.h> 81 #include <linux/security.h> 82 #include <linux/syscalls.h> 83 #include <linux/audit.h> 84 #include <linux/capability.h> 85 #include <linux/seq_file.h> 86 #include <linux/rwsem.h> 87 #include <linux/nsproxy.h> 88 #include <linux/ipc_namespace.h> 89 90 #include <asm/uaccess.h> 91 #include "util.h" 92 93 /* One semaphore structure for each semaphore in the system. */ 94 struct sem { 95 int semval; /* current value */ 96 int sempid; /* pid of last operation */ 97 spinlock_t lock; /* spinlock for fine-grained semtimedop */ 98 struct list_head sem_pending; /* pending single-sop operations */ 99 }; 100 101 /* One queue for each sleeping process in the system. */ 102 struct sem_queue { 103 struct list_head list; /* queue of pending operations */ 104 struct task_struct *sleeper; /* this process */ 105 struct sem_undo *undo; /* undo structure */ 106 int pid; /* process id of requesting process */ 107 int status; /* completion status of operation */ 108 struct sembuf *sops; /* array of pending operations */ 109 int nsops; /* number of operations */ 110 int alter; /* does *sops alter the array? */ 111 }; 112 113 /* Each task has a list of undo requests. They are executed automatically 114 * when the process exits. 115 */ 116 struct sem_undo { 117 struct list_head list_proc; /* per-process list: * 118 * all undos from one process 119 * rcu protected */ 120 struct rcu_head rcu; /* rcu struct for sem_undo */ 121 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 122 struct list_head list_id; /* per semaphore array list: 123 * all undos for one array */ 124 int semid; /* semaphore set identifier */ 125 short *semadj; /* array of adjustments */ 126 /* one per semaphore */ 127 }; 128 129 /* sem_undo_list controls shared access to the list of sem_undo structures 130 * that may be shared among all a CLONE_SYSVSEM task group. 131 */ 132 struct sem_undo_list { 133 atomic_t refcnt; 134 spinlock_t lock; 135 struct list_head list_proc; 136 }; 137 138 139 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 140 141 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 142 143 static int newary(struct ipc_namespace *, struct ipc_params *); 144 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 145 #ifdef CONFIG_PROC_FS 146 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 147 #endif 148 149 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 150 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 151 152 /* 153 * linked list protection: 154 * sem_undo.id_next, 155 * sem_array.sem_pending{,last}, 156 * sem_array.sem_undo: sem_lock() for read/write 157 * sem_undo.proc_next: only "current" is allowed to read/write that field. 158 * 159 */ 160 161 #define sc_semmsl sem_ctls[0] 162 #define sc_semmns sem_ctls[1] 163 #define sc_semopm sem_ctls[2] 164 #define sc_semmni sem_ctls[3] 165 166 void sem_init_ns(struct ipc_namespace *ns) 167 { 168 ns->sc_semmsl = SEMMSL; 169 ns->sc_semmns = SEMMNS; 170 ns->sc_semopm = SEMOPM; 171 ns->sc_semmni = SEMMNI; 172 ns->used_sems = 0; 173 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 174 } 175 176 #ifdef CONFIG_IPC_NS 177 void sem_exit_ns(struct ipc_namespace *ns) 178 { 179 free_ipcs(ns, &sem_ids(ns), freeary); 180 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 181 } 182 #endif 183 184 void __init sem_init (void) 185 { 186 sem_init_ns(&init_ipc_ns); 187 ipc_init_proc_interface("sysvipc/sem", 188 " key semid perms nsems uid gid cuid cgid otime ctime\n", 189 IPC_SEM_IDS, sysvipc_sem_proc_show); 190 } 191 192 /* 193 * If the request contains only one semaphore operation, and there are 194 * no complex transactions pending, lock only the semaphore involved. 195 * Otherwise, lock the entire semaphore array, since we either have 196 * multiple semaphores in our own semops, or we need to look at 197 * semaphores from other pending complex operations. 198 * 199 * Carefully guard against sma->complex_count changing between zero 200 * and non-zero while we are spinning for the lock. The value of 201 * sma->complex_count cannot change while we are holding the lock, 202 * so sem_unlock should be fine. 203 * 204 * The global lock path checks that all the local locks have been released, 205 * checking each local lock once. This means that the local lock paths 206 * cannot start their critical sections while the global lock is held. 207 */ 208 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, 209 int nsops) 210 { 211 int locknum; 212 again: 213 if (nsops == 1 && !sma->complex_count) { 214 struct sem *sem = sma->sem_base + sops->sem_num; 215 216 /* Lock just the semaphore we are interested in. */ 217 spin_lock(&sem->lock); 218 219 /* 220 * If sma->complex_count was set while we were spinning, 221 * we may need to look at things we did not lock here. 222 */ 223 if (unlikely(sma->complex_count)) { 224 spin_unlock(&sem->lock); 225 goto lock_array; 226 } 227 228 /* 229 * Another process is holding the global lock on the 230 * sem_array; we cannot enter our critical section, 231 * but have to wait for the global lock to be released. 232 */ 233 if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { 234 spin_unlock(&sem->lock); 235 spin_unlock_wait(&sma->sem_perm.lock); 236 goto again; 237 } 238 239 locknum = sops->sem_num; 240 } else { 241 int i; 242 /* 243 * Lock the semaphore array, and wait for all of the 244 * individual semaphore locks to go away. The code 245 * above ensures no new single-lock holders will enter 246 * their critical section while the array lock is held. 247 */ 248 lock_array: 249 spin_lock(&sma->sem_perm.lock); 250 for (i = 0; i < sma->sem_nsems; i++) { 251 struct sem *sem = sma->sem_base + i; 252 spin_unlock_wait(&sem->lock); 253 } 254 locknum = -1; 255 } 256 return locknum; 257 } 258 259 static inline void sem_unlock(struct sem_array *sma, int locknum) 260 { 261 if (locknum == -1) { 262 spin_unlock(&sma->sem_perm.lock); 263 } else { 264 struct sem *sem = sma->sem_base + locknum; 265 spin_unlock(&sem->lock); 266 } 267 } 268 269 /* 270 * sem_lock_(check_) routines are called in the paths where the rw_mutex 271 * is not held. 272 * 273 * The caller holds the RCU read lock. 274 */ 275 static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, 276 int id, struct sembuf *sops, int nsops, int *locknum) 277 { 278 struct kern_ipc_perm *ipcp; 279 struct sem_array *sma; 280 281 ipcp = ipc_obtain_object(&sem_ids(ns), id); 282 if (IS_ERR(ipcp)) 283 return ERR_CAST(ipcp); 284 285 sma = container_of(ipcp, struct sem_array, sem_perm); 286 *locknum = sem_lock(sma, sops, nsops); 287 288 /* ipc_rmid() may have already freed the ID while sem_lock 289 * was spinning: verify that the structure is still valid 290 */ 291 if (!ipcp->deleted) 292 return container_of(ipcp, struct sem_array, sem_perm); 293 294 sem_unlock(sma, *locknum); 295 return ERR_PTR(-EINVAL); 296 } 297 298 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) 299 { 300 struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); 301 302 if (IS_ERR(ipcp)) 303 return ERR_CAST(ipcp); 304 305 return container_of(ipcp, struct sem_array, sem_perm); 306 } 307 308 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, 309 int id) 310 { 311 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); 312 313 if (IS_ERR(ipcp)) 314 return ERR_CAST(ipcp); 315 316 return container_of(ipcp, struct sem_array, sem_perm); 317 } 318 319 static inline void sem_lock_and_putref(struct sem_array *sma) 320 { 321 sem_lock(sma, NULL, -1); 322 ipc_rcu_putref(sma); 323 } 324 325 static inline void sem_putref(struct sem_array *sma) 326 { 327 ipc_rcu_putref(sma); 328 } 329 330 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 331 { 332 ipc_rmid(&sem_ids(ns), &s->sem_perm); 333 } 334 335 /* 336 * Lockless wakeup algorithm: 337 * Without the check/retry algorithm a lockless wakeup is possible: 338 * - queue.status is initialized to -EINTR before blocking. 339 * - wakeup is performed by 340 * * unlinking the queue entry from sma->sem_pending 341 * * setting queue.status to IN_WAKEUP 342 * This is the notification for the blocked thread that a 343 * result value is imminent. 344 * * call wake_up_process 345 * * set queue.status to the final value. 346 * - the previously blocked thread checks queue.status: 347 * * if it's IN_WAKEUP, then it must wait until the value changes 348 * * if it's not -EINTR, then the operation was completed by 349 * update_queue. semtimedop can return queue.status without 350 * performing any operation on the sem array. 351 * * otherwise it must acquire the spinlock and check what's up. 352 * 353 * The two-stage algorithm is necessary to protect against the following 354 * races: 355 * - if queue.status is set after wake_up_process, then the woken up idle 356 * thread could race forward and try (and fail) to acquire sma->lock 357 * before update_queue had a chance to set queue.status 358 * - if queue.status is written before wake_up_process and if the 359 * blocked process is woken up by a signal between writing 360 * queue.status and the wake_up_process, then the woken up 361 * process could return from semtimedop and die by calling 362 * sys_exit before wake_up_process is called. Then wake_up_process 363 * will oops, because the task structure is already invalid. 364 * (yes, this happened on s390 with sysv msg). 365 * 366 */ 367 #define IN_WAKEUP 1 368 369 /** 370 * newary - Create a new semaphore set 371 * @ns: namespace 372 * @params: ptr to the structure that contains key, semflg and nsems 373 * 374 * Called with sem_ids.rw_mutex held (as a writer) 375 */ 376 377 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 378 { 379 int id; 380 int retval; 381 struct sem_array *sma; 382 int size; 383 key_t key = params->key; 384 int nsems = params->u.nsems; 385 int semflg = params->flg; 386 int i; 387 388 if (!nsems) 389 return -EINVAL; 390 if (ns->used_sems + nsems > ns->sc_semmns) 391 return -ENOSPC; 392 393 size = sizeof (*sma) + nsems * sizeof (struct sem); 394 sma = ipc_rcu_alloc(size); 395 if (!sma) { 396 return -ENOMEM; 397 } 398 memset (sma, 0, size); 399 400 sma->sem_perm.mode = (semflg & S_IRWXUGO); 401 sma->sem_perm.key = key; 402 403 sma->sem_perm.security = NULL; 404 retval = security_sem_alloc(sma); 405 if (retval) { 406 ipc_rcu_putref(sma); 407 return retval; 408 } 409 410 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 411 if (id < 0) { 412 security_sem_free(sma); 413 ipc_rcu_putref(sma); 414 return id; 415 } 416 ns->used_sems += nsems; 417 418 sma->sem_base = (struct sem *) &sma[1]; 419 420 for (i = 0; i < nsems; i++) { 421 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 422 spin_lock_init(&sma->sem_base[i].lock); 423 } 424 425 sma->complex_count = 0; 426 INIT_LIST_HEAD(&sma->sem_pending); 427 INIT_LIST_HEAD(&sma->list_id); 428 sma->sem_nsems = nsems; 429 sma->sem_ctime = get_seconds(); 430 sem_unlock(sma, -1); 431 rcu_read_unlock(); 432 433 return sma->sem_perm.id; 434 } 435 436 437 /* 438 * Called with sem_ids.rw_mutex and ipcp locked. 439 */ 440 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 441 { 442 struct sem_array *sma; 443 444 sma = container_of(ipcp, struct sem_array, sem_perm); 445 return security_sem_associate(sma, semflg); 446 } 447 448 /* 449 * Called with sem_ids.rw_mutex and ipcp locked. 450 */ 451 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 452 struct ipc_params *params) 453 { 454 struct sem_array *sma; 455 456 sma = container_of(ipcp, struct sem_array, sem_perm); 457 if (params->u.nsems > sma->sem_nsems) 458 return -EINVAL; 459 460 return 0; 461 } 462 463 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 464 { 465 struct ipc_namespace *ns; 466 struct ipc_ops sem_ops; 467 struct ipc_params sem_params; 468 469 ns = current->nsproxy->ipc_ns; 470 471 if (nsems < 0 || nsems > ns->sc_semmsl) 472 return -EINVAL; 473 474 sem_ops.getnew = newary; 475 sem_ops.associate = sem_security; 476 sem_ops.more_checks = sem_more_checks; 477 478 sem_params.key = key; 479 sem_params.flg = semflg; 480 sem_params.u.nsems = nsems; 481 482 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 483 } 484 485 /* 486 * Determine whether a sequence of semaphore operations would succeed 487 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 488 */ 489 490 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 491 int nsops, struct sem_undo *un, int pid) 492 { 493 int result, sem_op; 494 struct sembuf *sop; 495 struct sem * curr; 496 497 for (sop = sops; sop < sops + nsops; sop++) { 498 curr = sma->sem_base + sop->sem_num; 499 sem_op = sop->sem_op; 500 result = curr->semval; 501 502 if (!sem_op && result) 503 goto would_block; 504 505 result += sem_op; 506 if (result < 0) 507 goto would_block; 508 if (result > SEMVMX) 509 goto out_of_range; 510 if (sop->sem_flg & SEM_UNDO) { 511 int undo = un->semadj[sop->sem_num] - sem_op; 512 /* 513 * Exceeding the undo range is an error. 514 */ 515 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 516 goto out_of_range; 517 } 518 curr->semval = result; 519 } 520 521 sop--; 522 while (sop >= sops) { 523 sma->sem_base[sop->sem_num].sempid = pid; 524 if (sop->sem_flg & SEM_UNDO) 525 un->semadj[sop->sem_num] -= sop->sem_op; 526 sop--; 527 } 528 529 return 0; 530 531 out_of_range: 532 result = -ERANGE; 533 goto undo; 534 535 would_block: 536 if (sop->sem_flg & IPC_NOWAIT) 537 result = -EAGAIN; 538 else 539 result = 1; 540 541 undo: 542 sop--; 543 while (sop >= sops) { 544 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 545 sop--; 546 } 547 548 return result; 549 } 550 551 /** wake_up_sem_queue_prepare(q, error): Prepare wake-up 552 * @q: queue entry that must be signaled 553 * @error: Error value for the signal 554 * 555 * Prepare the wake-up of the queue entry q. 556 */ 557 static void wake_up_sem_queue_prepare(struct list_head *pt, 558 struct sem_queue *q, int error) 559 { 560 if (list_empty(pt)) { 561 /* 562 * Hold preempt off so that we don't get preempted and have the 563 * wakee busy-wait until we're scheduled back on. 564 */ 565 preempt_disable(); 566 } 567 q->status = IN_WAKEUP; 568 q->pid = error; 569 570 list_add_tail(&q->list, pt); 571 } 572 573 /** 574 * wake_up_sem_queue_do(pt) - do the actual wake-up 575 * @pt: list of tasks to be woken up 576 * 577 * Do the actual wake-up. 578 * The function is called without any locks held, thus the semaphore array 579 * could be destroyed already and the tasks can disappear as soon as the 580 * status is set to the actual return code. 581 */ 582 static void wake_up_sem_queue_do(struct list_head *pt) 583 { 584 struct sem_queue *q, *t; 585 int did_something; 586 587 did_something = !list_empty(pt); 588 list_for_each_entry_safe(q, t, pt, list) { 589 wake_up_process(q->sleeper); 590 /* q can disappear immediately after writing q->status. */ 591 smp_wmb(); 592 q->status = q->pid; 593 } 594 if (did_something) 595 preempt_enable(); 596 } 597 598 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 599 { 600 list_del(&q->list); 601 if (q->nsops > 1) 602 sma->complex_count--; 603 } 604 605 /** check_restart(sma, q) 606 * @sma: semaphore array 607 * @q: the operation that just completed 608 * 609 * update_queue is O(N^2) when it restarts scanning the whole queue of 610 * waiting operations. Therefore this function checks if the restart is 611 * really necessary. It is called after a previously waiting operation 612 * was completed. 613 */ 614 static int check_restart(struct sem_array *sma, struct sem_queue *q) 615 { 616 struct sem *curr; 617 struct sem_queue *h; 618 619 /* if the operation didn't modify the array, then no restart */ 620 if (q->alter == 0) 621 return 0; 622 623 /* pending complex operations are too difficult to analyse */ 624 if (sma->complex_count) 625 return 1; 626 627 /* we were a sleeping complex operation. Too difficult */ 628 if (q->nsops > 1) 629 return 1; 630 631 curr = sma->sem_base + q->sops[0].sem_num; 632 633 /* No-one waits on this queue */ 634 if (list_empty(&curr->sem_pending)) 635 return 0; 636 637 /* the new semaphore value */ 638 if (curr->semval) { 639 /* It is impossible that someone waits for the new value: 640 * - q is a previously sleeping simple operation that 641 * altered the array. It must be a decrement, because 642 * simple increments never sleep. 643 * - The value is not 0, thus wait-for-zero won't proceed. 644 * - If there are older (higher priority) decrements 645 * in the queue, then they have observed the original 646 * semval value and couldn't proceed. The operation 647 * decremented to value - thus they won't proceed either. 648 */ 649 BUG_ON(q->sops[0].sem_op >= 0); 650 return 0; 651 } 652 /* 653 * semval is 0. Check if there are wait-for-zero semops. 654 * They must be the first entries in the per-semaphore queue 655 */ 656 h = list_first_entry(&curr->sem_pending, struct sem_queue, list); 657 BUG_ON(h->nsops != 1); 658 BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); 659 660 /* Yes, there is a wait-for-zero semop. Restart */ 661 if (h->sops[0].sem_op == 0) 662 return 1; 663 664 /* Again - no-one is waiting for the new value. */ 665 return 0; 666 } 667 668 669 /** 670 * update_queue(sma, semnum): Look for tasks that can be completed. 671 * @sma: semaphore array. 672 * @semnum: semaphore that was modified. 673 * @pt: list head for the tasks that must be woken up. 674 * 675 * update_queue must be called after a semaphore in a semaphore array 676 * was modified. If multiple semaphores were modified, update_queue must 677 * be called with semnum = -1, as well as with the number of each modified 678 * semaphore. 679 * The tasks that must be woken up are added to @pt. The return code 680 * is stored in q->pid. 681 * The function return 1 if at least one semop was completed successfully. 682 */ 683 static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) 684 { 685 struct sem_queue *q; 686 struct list_head *walk; 687 struct list_head *pending_list; 688 int semop_completed = 0; 689 690 if (semnum == -1) 691 pending_list = &sma->sem_pending; 692 else 693 pending_list = &sma->sem_base[semnum].sem_pending; 694 695 again: 696 walk = pending_list->next; 697 while (walk != pending_list) { 698 int error, restart; 699 700 q = container_of(walk, struct sem_queue, list); 701 walk = walk->next; 702 703 /* If we are scanning the single sop, per-semaphore list of 704 * one semaphore and that semaphore is 0, then it is not 705 * necessary to scan the "alter" entries: simple increments 706 * that affect only one entry succeed immediately and cannot 707 * be in the per semaphore pending queue, and decrements 708 * cannot be successful if the value is already 0. 709 */ 710 if (semnum != -1 && sma->sem_base[semnum].semval == 0 && 711 q->alter) 712 break; 713 714 error = try_atomic_semop(sma, q->sops, q->nsops, 715 q->undo, q->pid); 716 717 /* Does q->sleeper still need to sleep? */ 718 if (error > 0) 719 continue; 720 721 unlink_queue(sma, q); 722 723 if (error) { 724 restart = 0; 725 } else { 726 semop_completed = 1; 727 restart = check_restart(sma, q); 728 } 729 730 wake_up_sem_queue_prepare(pt, q, error); 731 if (restart) 732 goto again; 733 } 734 return semop_completed; 735 } 736 737 /** 738 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 739 * @sma: semaphore array 740 * @sops: operations that were performed 741 * @nsops: number of operations 742 * @otime: force setting otime 743 * @pt: list head of the tasks that must be woken up. 744 * 745 * do_smart_update() does the required called to update_queue, based on the 746 * actual changes that were performed on the semaphore array. 747 * Note that the function does not do the actual wake-up: the caller is 748 * responsible for calling wake_up_sem_queue_do(@pt). 749 * It is safe to perform this call after dropping all locks. 750 */ 751 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 752 int otime, struct list_head *pt) 753 { 754 int i; 755 756 if (sma->complex_count || sops == NULL) { 757 if (update_queue(sma, -1, pt)) 758 otime = 1; 759 } 760 761 if (!sops) { 762 /* No semops; something special is going on. */ 763 for (i = 0; i < sma->sem_nsems; i++) { 764 if (update_queue(sma, i, pt)) 765 otime = 1; 766 } 767 goto done; 768 } 769 770 /* Check the semaphores that were modified. */ 771 for (i = 0; i < nsops; i++) { 772 if (sops[i].sem_op > 0 || 773 (sops[i].sem_op < 0 && 774 sma->sem_base[sops[i].sem_num].semval == 0)) 775 if (update_queue(sma, sops[i].sem_num, pt)) 776 otime = 1; 777 } 778 done: 779 if (otime) 780 sma->sem_otime = get_seconds(); 781 } 782 783 784 /* The following counts are associated to each semaphore: 785 * semncnt number of tasks waiting on semval being nonzero 786 * semzcnt number of tasks waiting on semval being zero 787 * This model assumes that a task waits on exactly one semaphore. 788 * Since semaphore operations are to be performed atomically, tasks actually 789 * wait on a whole sequence of semaphores simultaneously. 790 * The counts we return here are a rough approximation, but still 791 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 792 */ 793 static int count_semncnt (struct sem_array * sma, ushort semnum) 794 { 795 int semncnt; 796 struct sem_queue * q; 797 798 semncnt = 0; 799 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 800 struct sembuf * sops = q->sops; 801 BUG_ON(sops->sem_num != semnum); 802 if ((sops->sem_op < 0) && !(sops->sem_flg & IPC_NOWAIT)) 803 semncnt++; 804 } 805 806 list_for_each_entry(q, &sma->sem_pending, list) { 807 struct sembuf * sops = q->sops; 808 int nsops = q->nsops; 809 int i; 810 for (i = 0; i < nsops; i++) 811 if (sops[i].sem_num == semnum 812 && (sops[i].sem_op < 0) 813 && !(sops[i].sem_flg & IPC_NOWAIT)) 814 semncnt++; 815 } 816 return semncnt; 817 } 818 819 static int count_semzcnt (struct sem_array * sma, ushort semnum) 820 { 821 int semzcnt; 822 struct sem_queue * q; 823 824 semzcnt = 0; 825 list_for_each_entry(q, &sma->sem_base[semnum].sem_pending, list) { 826 struct sembuf * sops = q->sops; 827 BUG_ON(sops->sem_num != semnum); 828 if ((sops->sem_op == 0) && !(sops->sem_flg & IPC_NOWAIT)) 829 semzcnt++; 830 } 831 832 list_for_each_entry(q, &sma->sem_pending, list) { 833 struct sembuf * sops = q->sops; 834 int nsops = q->nsops; 835 int i; 836 for (i = 0; i < nsops; i++) 837 if (sops[i].sem_num == semnum 838 && (sops[i].sem_op == 0) 839 && !(sops[i].sem_flg & IPC_NOWAIT)) 840 semzcnt++; 841 } 842 return semzcnt; 843 } 844 845 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 846 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 847 * remains locked on exit. 848 */ 849 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 850 { 851 struct sem_undo *un, *tu; 852 struct sem_queue *q, *tq; 853 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 854 struct list_head tasks; 855 int i; 856 857 /* Free the existing undo structures for this semaphore set. */ 858 assert_spin_locked(&sma->sem_perm.lock); 859 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 860 list_del(&un->list_id); 861 spin_lock(&un->ulp->lock); 862 un->semid = -1; 863 list_del_rcu(&un->list_proc); 864 spin_unlock(&un->ulp->lock); 865 kfree_rcu(un, rcu); 866 } 867 868 /* Wake up all pending processes and let them fail with EIDRM. */ 869 INIT_LIST_HEAD(&tasks); 870 list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { 871 unlink_queue(sma, q); 872 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 873 } 874 for (i = 0; i < sma->sem_nsems; i++) { 875 struct sem *sem = sma->sem_base + i; 876 list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { 877 unlink_queue(sma, q); 878 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 879 } 880 } 881 882 /* Remove the semaphore set from the IDR */ 883 sem_rmid(ns, sma); 884 sem_unlock(sma, -1); 885 rcu_read_unlock(); 886 887 wake_up_sem_queue_do(&tasks); 888 ns->used_sems -= sma->sem_nsems; 889 security_sem_free(sma); 890 ipc_rcu_putref(sma); 891 } 892 893 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 894 { 895 switch(version) { 896 case IPC_64: 897 return copy_to_user(buf, in, sizeof(*in)); 898 case IPC_OLD: 899 { 900 struct semid_ds out; 901 902 memset(&out, 0, sizeof(out)); 903 904 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 905 906 out.sem_otime = in->sem_otime; 907 out.sem_ctime = in->sem_ctime; 908 out.sem_nsems = in->sem_nsems; 909 910 return copy_to_user(buf, &out, sizeof(out)); 911 } 912 default: 913 return -EINVAL; 914 } 915 } 916 917 static int semctl_nolock(struct ipc_namespace *ns, int semid, 918 int cmd, int version, void __user *p) 919 { 920 int err; 921 struct sem_array *sma; 922 923 switch(cmd) { 924 case IPC_INFO: 925 case SEM_INFO: 926 { 927 struct seminfo seminfo; 928 int max_id; 929 930 err = security_sem_semctl(NULL, cmd); 931 if (err) 932 return err; 933 934 memset(&seminfo,0,sizeof(seminfo)); 935 seminfo.semmni = ns->sc_semmni; 936 seminfo.semmns = ns->sc_semmns; 937 seminfo.semmsl = ns->sc_semmsl; 938 seminfo.semopm = ns->sc_semopm; 939 seminfo.semvmx = SEMVMX; 940 seminfo.semmnu = SEMMNU; 941 seminfo.semmap = SEMMAP; 942 seminfo.semume = SEMUME; 943 down_read(&sem_ids(ns).rw_mutex); 944 if (cmd == SEM_INFO) { 945 seminfo.semusz = sem_ids(ns).in_use; 946 seminfo.semaem = ns->used_sems; 947 } else { 948 seminfo.semusz = SEMUSZ; 949 seminfo.semaem = SEMAEM; 950 } 951 max_id = ipc_get_maxid(&sem_ids(ns)); 952 up_read(&sem_ids(ns).rw_mutex); 953 if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 954 return -EFAULT; 955 return (max_id < 0) ? 0: max_id; 956 } 957 case IPC_STAT: 958 case SEM_STAT: 959 { 960 struct semid64_ds tbuf; 961 int id = 0; 962 963 memset(&tbuf, 0, sizeof(tbuf)); 964 965 rcu_read_lock(); 966 if (cmd == SEM_STAT) { 967 sma = sem_obtain_object(ns, semid); 968 if (IS_ERR(sma)) { 969 err = PTR_ERR(sma); 970 goto out_unlock; 971 } 972 id = sma->sem_perm.id; 973 } else { 974 sma = sem_obtain_object_check(ns, semid); 975 if (IS_ERR(sma)) { 976 err = PTR_ERR(sma); 977 goto out_unlock; 978 } 979 } 980 981 err = -EACCES; 982 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 983 goto out_unlock; 984 985 err = security_sem_semctl(sma, cmd); 986 if (err) 987 goto out_unlock; 988 989 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 990 tbuf.sem_otime = sma->sem_otime; 991 tbuf.sem_ctime = sma->sem_ctime; 992 tbuf.sem_nsems = sma->sem_nsems; 993 rcu_read_unlock(); 994 if (copy_semid_to_user(p, &tbuf, version)) 995 return -EFAULT; 996 return id; 997 } 998 default: 999 return -EINVAL; 1000 } 1001 out_unlock: 1002 rcu_read_unlock(); 1003 return err; 1004 } 1005 1006 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, 1007 unsigned long arg) 1008 { 1009 struct sem_undo *un; 1010 struct sem_array *sma; 1011 struct sem* curr; 1012 int err; 1013 struct list_head tasks; 1014 int val; 1015 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) 1016 /* big-endian 64bit */ 1017 val = arg >> 32; 1018 #else 1019 /* 32bit or little-endian 64bit */ 1020 val = arg; 1021 #endif 1022 1023 if (val > SEMVMX || val < 0) 1024 return -ERANGE; 1025 1026 INIT_LIST_HEAD(&tasks); 1027 1028 rcu_read_lock(); 1029 sma = sem_obtain_object_check(ns, semid); 1030 if (IS_ERR(sma)) { 1031 rcu_read_unlock(); 1032 return PTR_ERR(sma); 1033 } 1034 1035 if (semnum < 0 || semnum >= sma->sem_nsems) { 1036 rcu_read_unlock(); 1037 return -EINVAL; 1038 } 1039 1040 1041 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { 1042 rcu_read_unlock(); 1043 return -EACCES; 1044 } 1045 1046 err = security_sem_semctl(sma, SETVAL); 1047 if (err) { 1048 rcu_read_unlock(); 1049 return -EACCES; 1050 } 1051 1052 sem_lock(sma, NULL, -1); 1053 1054 curr = &sma->sem_base[semnum]; 1055 1056 assert_spin_locked(&sma->sem_perm.lock); 1057 list_for_each_entry(un, &sma->list_id, list_id) 1058 un->semadj[semnum] = 0; 1059 1060 curr->semval = val; 1061 curr->sempid = task_tgid_vnr(current); 1062 sma->sem_ctime = get_seconds(); 1063 /* maybe some queued-up processes were waiting for this */ 1064 do_smart_update(sma, NULL, 0, 0, &tasks); 1065 sem_unlock(sma, -1); 1066 rcu_read_unlock(); 1067 wake_up_sem_queue_do(&tasks); 1068 return 0; 1069 } 1070 1071 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 1072 int cmd, void __user *p) 1073 { 1074 struct sem_array *sma; 1075 struct sem* curr; 1076 int err, nsems; 1077 ushort fast_sem_io[SEMMSL_FAST]; 1078 ushort* sem_io = fast_sem_io; 1079 struct list_head tasks; 1080 1081 INIT_LIST_HEAD(&tasks); 1082 1083 rcu_read_lock(); 1084 sma = sem_obtain_object_check(ns, semid); 1085 if (IS_ERR(sma)) { 1086 rcu_read_unlock(); 1087 return PTR_ERR(sma); 1088 } 1089 1090 nsems = sma->sem_nsems; 1091 1092 err = -EACCES; 1093 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) 1094 goto out_rcu_wakeup; 1095 1096 err = security_sem_semctl(sma, cmd); 1097 if (err) 1098 goto out_rcu_wakeup; 1099 1100 err = -EACCES; 1101 switch (cmd) { 1102 case GETALL: 1103 { 1104 ushort __user *array = p; 1105 int i; 1106 1107 sem_lock(sma, NULL, -1); 1108 if(nsems > SEMMSL_FAST) { 1109 if (!ipc_rcu_getref(sma)) { 1110 sem_unlock(sma, -1); 1111 rcu_read_unlock(); 1112 err = -EIDRM; 1113 goto out_free; 1114 } 1115 sem_unlock(sma, -1); 1116 rcu_read_unlock(); 1117 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1118 if(sem_io == NULL) { 1119 sem_putref(sma); 1120 return -ENOMEM; 1121 } 1122 1123 rcu_read_lock(); 1124 sem_lock_and_putref(sma); 1125 if (sma->sem_perm.deleted) { 1126 sem_unlock(sma, -1); 1127 rcu_read_unlock(); 1128 err = -EIDRM; 1129 goto out_free; 1130 } 1131 } 1132 for (i = 0; i < sma->sem_nsems; i++) 1133 sem_io[i] = sma->sem_base[i].semval; 1134 sem_unlock(sma, -1); 1135 rcu_read_unlock(); 1136 err = 0; 1137 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 1138 err = -EFAULT; 1139 goto out_free; 1140 } 1141 case SETALL: 1142 { 1143 int i; 1144 struct sem_undo *un; 1145 1146 if (!ipc_rcu_getref(sma)) { 1147 rcu_read_unlock(); 1148 return -EIDRM; 1149 } 1150 rcu_read_unlock(); 1151 1152 if(nsems > SEMMSL_FAST) { 1153 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1154 if(sem_io == NULL) { 1155 sem_putref(sma); 1156 return -ENOMEM; 1157 } 1158 } 1159 1160 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1161 sem_putref(sma); 1162 err = -EFAULT; 1163 goto out_free; 1164 } 1165 1166 for (i = 0; i < nsems; i++) { 1167 if (sem_io[i] > SEMVMX) { 1168 sem_putref(sma); 1169 err = -ERANGE; 1170 goto out_free; 1171 } 1172 } 1173 rcu_read_lock(); 1174 sem_lock_and_putref(sma); 1175 if (sma->sem_perm.deleted) { 1176 sem_unlock(sma, -1); 1177 rcu_read_unlock(); 1178 err = -EIDRM; 1179 goto out_free; 1180 } 1181 1182 for (i = 0; i < nsems; i++) 1183 sma->sem_base[i].semval = sem_io[i]; 1184 1185 assert_spin_locked(&sma->sem_perm.lock); 1186 list_for_each_entry(un, &sma->list_id, list_id) { 1187 for (i = 0; i < nsems; i++) 1188 un->semadj[i] = 0; 1189 } 1190 sma->sem_ctime = get_seconds(); 1191 /* maybe some queued-up processes were waiting for this */ 1192 do_smart_update(sma, NULL, 0, 0, &tasks); 1193 err = 0; 1194 goto out_unlock; 1195 } 1196 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ 1197 } 1198 err = -EINVAL; 1199 if (semnum < 0 || semnum >= nsems) 1200 goto out_rcu_wakeup; 1201 1202 sem_lock(sma, NULL, -1); 1203 curr = &sma->sem_base[semnum]; 1204 1205 switch (cmd) { 1206 case GETVAL: 1207 err = curr->semval; 1208 goto out_unlock; 1209 case GETPID: 1210 err = curr->sempid; 1211 goto out_unlock; 1212 case GETNCNT: 1213 err = count_semncnt(sma,semnum); 1214 goto out_unlock; 1215 case GETZCNT: 1216 err = count_semzcnt(sma,semnum); 1217 goto out_unlock; 1218 } 1219 1220 out_unlock: 1221 sem_unlock(sma, -1); 1222 out_rcu_wakeup: 1223 rcu_read_unlock(); 1224 wake_up_sem_queue_do(&tasks); 1225 out_free: 1226 if(sem_io != fast_sem_io) 1227 ipc_free(sem_io, sizeof(ushort)*nsems); 1228 return err; 1229 } 1230 1231 static inline unsigned long 1232 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1233 { 1234 switch(version) { 1235 case IPC_64: 1236 if (copy_from_user(out, buf, sizeof(*out))) 1237 return -EFAULT; 1238 return 0; 1239 case IPC_OLD: 1240 { 1241 struct semid_ds tbuf_old; 1242 1243 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1244 return -EFAULT; 1245 1246 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1247 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1248 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1249 1250 return 0; 1251 } 1252 default: 1253 return -EINVAL; 1254 } 1255 } 1256 1257 /* 1258 * This function handles some semctl commands which require the rw_mutex 1259 * to be held in write mode. 1260 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 1261 */ 1262 static int semctl_down(struct ipc_namespace *ns, int semid, 1263 int cmd, int version, void __user *p) 1264 { 1265 struct sem_array *sma; 1266 int err; 1267 struct semid64_ds semid64; 1268 struct kern_ipc_perm *ipcp; 1269 1270 if(cmd == IPC_SET) { 1271 if (copy_semid_from_user(&semid64, p, version)) 1272 return -EFAULT; 1273 } 1274 1275 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, 1276 &semid64.sem_perm, 0); 1277 if (IS_ERR(ipcp)) 1278 return PTR_ERR(ipcp); 1279 1280 sma = container_of(ipcp, struct sem_array, sem_perm); 1281 1282 err = security_sem_semctl(sma, cmd); 1283 if (err) { 1284 rcu_read_unlock(); 1285 goto out_up; 1286 } 1287 1288 switch(cmd){ 1289 case IPC_RMID: 1290 sem_lock(sma, NULL, -1); 1291 freeary(ns, ipcp); 1292 goto out_up; 1293 case IPC_SET: 1294 sem_lock(sma, NULL, -1); 1295 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1296 if (err) 1297 goto out_unlock; 1298 sma->sem_ctime = get_seconds(); 1299 break; 1300 default: 1301 rcu_read_unlock(); 1302 err = -EINVAL; 1303 goto out_up; 1304 } 1305 1306 out_unlock: 1307 sem_unlock(sma, -1); 1308 rcu_read_unlock(); 1309 out_up: 1310 up_write(&sem_ids(ns).rw_mutex); 1311 return err; 1312 } 1313 1314 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) 1315 { 1316 int version; 1317 struct ipc_namespace *ns; 1318 void __user *p = (void __user *)arg; 1319 1320 if (semid < 0) 1321 return -EINVAL; 1322 1323 version = ipc_parse_version(&cmd); 1324 ns = current->nsproxy->ipc_ns; 1325 1326 switch(cmd) { 1327 case IPC_INFO: 1328 case SEM_INFO: 1329 case IPC_STAT: 1330 case SEM_STAT: 1331 return semctl_nolock(ns, semid, cmd, version, p); 1332 case GETALL: 1333 case GETVAL: 1334 case GETPID: 1335 case GETNCNT: 1336 case GETZCNT: 1337 case SETALL: 1338 return semctl_main(ns, semid, semnum, cmd, p); 1339 case SETVAL: 1340 return semctl_setval(ns, semid, semnum, arg); 1341 case IPC_RMID: 1342 case IPC_SET: 1343 return semctl_down(ns, semid, cmd, version, p); 1344 default: 1345 return -EINVAL; 1346 } 1347 } 1348 1349 /* If the task doesn't already have a undo_list, then allocate one 1350 * here. We guarantee there is only one thread using this undo list, 1351 * and current is THE ONE 1352 * 1353 * If this allocation and assignment succeeds, but later 1354 * portions of this code fail, there is no need to free the sem_undo_list. 1355 * Just let it stay associated with the task, and it'll be freed later 1356 * at exit time. 1357 * 1358 * This can block, so callers must hold no locks. 1359 */ 1360 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1361 { 1362 struct sem_undo_list *undo_list; 1363 1364 undo_list = current->sysvsem.undo_list; 1365 if (!undo_list) { 1366 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1367 if (undo_list == NULL) 1368 return -ENOMEM; 1369 spin_lock_init(&undo_list->lock); 1370 atomic_set(&undo_list->refcnt, 1); 1371 INIT_LIST_HEAD(&undo_list->list_proc); 1372 1373 current->sysvsem.undo_list = undo_list; 1374 } 1375 *undo_listp = undo_list; 1376 return 0; 1377 } 1378 1379 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1380 { 1381 struct sem_undo *un; 1382 1383 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1384 if (un->semid == semid) 1385 return un; 1386 } 1387 return NULL; 1388 } 1389 1390 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1391 { 1392 struct sem_undo *un; 1393 1394 assert_spin_locked(&ulp->lock); 1395 1396 un = __lookup_undo(ulp, semid); 1397 if (un) { 1398 list_del_rcu(&un->list_proc); 1399 list_add_rcu(&un->list_proc, &ulp->list_proc); 1400 } 1401 return un; 1402 } 1403 1404 /** 1405 * find_alloc_undo - Lookup (and if not present create) undo array 1406 * @ns: namespace 1407 * @semid: semaphore array id 1408 * 1409 * The function looks up (and if not present creates) the undo structure. 1410 * The size of the undo structure depends on the size of the semaphore 1411 * array, thus the alloc path is not that straightforward. 1412 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1413 * performs a rcu_read_lock(). 1414 */ 1415 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1416 { 1417 struct sem_array *sma; 1418 struct sem_undo_list *ulp; 1419 struct sem_undo *un, *new; 1420 int nsems, error; 1421 1422 error = get_undo_list(&ulp); 1423 if (error) 1424 return ERR_PTR(error); 1425 1426 rcu_read_lock(); 1427 spin_lock(&ulp->lock); 1428 un = lookup_undo(ulp, semid); 1429 spin_unlock(&ulp->lock); 1430 if (likely(un!=NULL)) 1431 goto out; 1432 1433 /* no undo structure around - allocate one. */ 1434 /* step 1: figure out the size of the semaphore array */ 1435 sma = sem_obtain_object_check(ns, semid); 1436 if (IS_ERR(sma)) { 1437 rcu_read_unlock(); 1438 return ERR_CAST(sma); 1439 } 1440 1441 nsems = sma->sem_nsems; 1442 if (!ipc_rcu_getref(sma)) { 1443 rcu_read_unlock(); 1444 un = ERR_PTR(-EIDRM); 1445 goto out; 1446 } 1447 rcu_read_unlock(); 1448 1449 /* step 2: allocate new undo structure */ 1450 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1451 if (!new) { 1452 sem_putref(sma); 1453 return ERR_PTR(-ENOMEM); 1454 } 1455 1456 /* step 3: Acquire the lock on semaphore array */ 1457 rcu_read_lock(); 1458 sem_lock_and_putref(sma); 1459 if (sma->sem_perm.deleted) { 1460 sem_unlock(sma, -1); 1461 rcu_read_unlock(); 1462 kfree(new); 1463 un = ERR_PTR(-EIDRM); 1464 goto out; 1465 } 1466 spin_lock(&ulp->lock); 1467 1468 /* 1469 * step 4: check for races: did someone else allocate the undo struct? 1470 */ 1471 un = lookup_undo(ulp, semid); 1472 if (un) { 1473 kfree(new); 1474 goto success; 1475 } 1476 /* step 5: initialize & link new undo structure */ 1477 new->semadj = (short *) &new[1]; 1478 new->ulp = ulp; 1479 new->semid = semid; 1480 assert_spin_locked(&ulp->lock); 1481 list_add_rcu(&new->list_proc, &ulp->list_proc); 1482 assert_spin_locked(&sma->sem_perm.lock); 1483 list_add(&new->list_id, &sma->list_id); 1484 un = new; 1485 1486 success: 1487 spin_unlock(&ulp->lock); 1488 sem_unlock(sma, -1); 1489 out: 1490 return un; 1491 } 1492 1493 1494 /** 1495 * get_queue_result - Retrieve the result code from sem_queue 1496 * @q: Pointer to queue structure 1497 * 1498 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in 1499 * q->status, then we must loop until the value is replaced with the final 1500 * value: This may happen if a task is woken up by an unrelated event (e.g. 1501 * signal) and in parallel the task is woken up by another task because it got 1502 * the requested semaphores. 1503 * 1504 * The function can be called with or without holding the semaphore spinlock. 1505 */ 1506 static int get_queue_result(struct sem_queue *q) 1507 { 1508 int error; 1509 1510 error = q->status; 1511 while (unlikely(error == IN_WAKEUP)) { 1512 cpu_relax(); 1513 error = q->status; 1514 } 1515 1516 return error; 1517 } 1518 1519 1520 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 1521 unsigned, nsops, const struct timespec __user *, timeout) 1522 { 1523 int error = -EINVAL; 1524 struct sem_array *sma; 1525 struct sembuf fast_sops[SEMOPM_FAST]; 1526 struct sembuf* sops = fast_sops, *sop; 1527 struct sem_undo *un; 1528 int undos = 0, alter = 0, max, locknum; 1529 struct sem_queue queue; 1530 unsigned long jiffies_left = 0; 1531 struct ipc_namespace *ns; 1532 struct list_head tasks; 1533 1534 ns = current->nsproxy->ipc_ns; 1535 1536 if (nsops < 1 || semid < 0) 1537 return -EINVAL; 1538 if (nsops > ns->sc_semopm) 1539 return -E2BIG; 1540 if(nsops > SEMOPM_FAST) { 1541 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1542 if(sops==NULL) 1543 return -ENOMEM; 1544 } 1545 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1546 error=-EFAULT; 1547 goto out_free; 1548 } 1549 if (timeout) { 1550 struct timespec _timeout; 1551 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1552 error = -EFAULT; 1553 goto out_free; 1554 } 1555 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1556 _timeout.tv_nsec >= 1000000000L) { 1557 error = -EINVAL; 1558 goto out_free; 1559 } 1560 jiffies_left = timespec_to_jiffies(&_timeout); 1561 } 1562 max = 0; 1563 for (sop = sops; sop < sops + nsops; sop++) { 1564 if (sop->sem_num >= max) 1565 max = sop->sem_num; 1566 if (sop->sem_flg & SEM_UNDO) 1567 undos = 1; 1568 if (sop->sem_op != 0) 1569 alter = 1; 1570 } 1571 1572 INIT_LIST_HEAD(&tasks); 1573 1574 if (undos) { 1575 /* On success, find_alloc_undo takes the rcu_read_lock */ 1576 un = find_alloc_undo(ns, semid); 1577 if (IS_ERR(un)) { 1578 error = PTR_ERR(un); 1579 goto out_free; 1580 } 1581 } else { 1582 un = NULL; 1583 rcu_read_lock(); 1584 } 1585 1586 sma = sem_obtain_object_check(ns, semid); 1587 if (IS_ERR(sma)) { 1588 rcu_read_unlock(); 1589 error = PTR_ERR(sma); 1590 goto out_free; 1591 } 1592 1593 error = -EFBIG; 1594 if (max >= sma->sem_nsems) 1595 goto out_rcu_wakeup; 1596 1597 error = -EACCES; 1598 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1599 goto out_rcu_wakeup; 1600 1601 error = security_sem_semop(sma, sops, nsops, alter); 1602 if (error) 1603 goto out_rcu_wakeup; 1604 1605 /* 1606 * semid identifiers are not unique - find_alloc_undo may have 1607 * allocated an undo structure, it was invalidated by an RMID 1608 * and now a new array with received the same id. Check and fail. 1609 * This case can be detected checking un->semid. The existence of 1610 * "un" itself is guaranteed by rcu. 1611 */ 1612 error = -EIDRM; 1613 locknum = sem_lock(sma, sops, nsops); 1614 if (un && un->semid == -1) 1615 goto out_unlock_free; 1616 1617 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1618 if (error <= 0) { 1619 if (alter && error == 0) 1620 do_smart_update(sma, sops, nsops, 1, &tasks); 1621 1622 goto out_unlock_free; 1623 } 1624 1625 /* We need to sleep on this operation, so we put the current 1626 * task into the pending queue and go to sleep. 1627 */ 1628 1629 queue.sops = sops; 1630 queue.nsops = nsops; 1631 queue.undo = un; 1632 queue.pid = task_tgid_vnr(current); 1633 queue.alter = alter; 1634 1635 if (nsops == 1) { 1636 struct sem *curr; 1637 curr = &sma->sem_base[sops->sem_num]; 1638 1639 if (alter) 1640 list_add_tail(&queue.list, &curr->sem_pending); 1641 else 1642 list_add(&queue.list, &curr->sem_pending); 1643 } else { 1644 if (alter) 1645 list_add_tail(&queue.list, &sma->sem_pending); 1646 else 1647 list_add(&queue.list, &sma->sem_pending); 1648 sma->complex_count++; 1649 } 1650 1651 queue.status = -EINTR; 1652 queue.sleeper = current; 1653 1654 sleep_again: 1655 current->state = TASK_INTERRUPTIBLE; 1656 sem_unlock(sma, locknum); 1657 rcu_read_unlock(); 1658 1659 if (timeout) 1660 jiffies_left = schedule_timeout(jiffies_left); 1661 else 1662 schedule(); 1663 1664 error = get_queue_result(&queue); 1665 1666 if (error != -EINTR) { 1667 /* fast path: update_queue already obtained all requested 1668 * resources. 1669 * Perform a smp_mb(): User space could assume that semop() 1670 * is a memory barrier: Without the mb(), the cpu could 1671 * speculatively read in user space stale data that was 1672 * overwritten by the previous owner of the semaphore. 1673 */ 1674 smp_mb(); 1675 1676 goto out_free; 1677 } 1678 1679 rcu_read_lock(); 1680 sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); 1681 1682 /* 1683 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. 1684 */ 1685 error = get_queue_result(&queue); 1686 1687 /* 1688 * Array removed? If yes, leave without sem_unlock(). 1689 */ 1690 if (IS_ERR(sma)) { 1691 rcu_read_unlock(); 1692 goto out_free; 1693 } 1694 1695 1696 /* 1697 * If queue.status != -EINTR we are woken up by another process. 1698 * Leave without unlink_queue(), but with sem_unlock(). 1699 */ 1700 1701 if (error != -EINTR) { 1702 goto out_unlock_free; 1703 } 1704 1705 /* 1706 * If an interrupt occurred we have to clean up the queue 1707 */ 1708 if (timeout && jiffies_left == 0) 1709 error = -EAGAIN; 1710 1711 /* 1712 * If the wakeup was spurious, just retry 1713 */ 1714 if (error == -EINTR && !signal_pending(current)) 1715 goto sleep_again; 1716 1717 unlink_queue(sma, &queue); 1718 1719 out_unlock_free: 1720 sem_unlock(sma, locknum); 1721 out_rcu_wakeup: 1722 rcu_read_unlock(); 1723 wake_up_sem_queue_do(&tasks); 1724 out_free: 1725 if(sops != fast_sops) 1726 kfree(sops); 1727 return error; 1728 } 1729 1730 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 1731 unsigned, nsops) 1732 { 1733 return sys_semtimedop(semid, tsops, nsops, NULL); 1734 } 1735 1736 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1737 * parent and child tasks. 1738 */ 1739 1740 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1741 { 1742 struct sem_undo_list *undo_list; 1743 int error; 1744 1745 if (clone_flags & CLONE_SYSVSEM) { 1746 error = get_undo_list(&undo_list); 1747 if (error) 1748 return error; 1749 atomic_inc(&undo_list->refcnt); 1750 tsk->sysvsem.undo_list = undo_list; 1751 } else 1752 tsk->sysvsem.undo_list = NULL; 1753 1754 return 0; 1755 } 1756 1757 /* 1758 * add semadj values to semaphores, free undo structures. 1759 * undo structures are not freed when semaphore arrays are destroyed 1760 * so some of them may be out of date. 1761 * IMPLEMENTATION NOTE: There is some confusion over whether the 1762 * set of adjustments that needs to be done should be done in an atomic 1763 * manner or not. That is, if we are attempting to decrement the semval 1764 * should we queue up and wait until we can do so legally? 1765 * The original implementation attempted to do this (queue and wait). 1766 * The current implementation does not do so. The POSIX standard 1767 * and SVID should be consulted to determine what behavior is mandated. 1768 */ 1769 void exit_sem(struct task_struct *tsk) 1770 { 1771 struct sem_undo_list *ulp; 1772 1773 ulp = tsk->sysvsem.undo_list; 1774 if (!ulp) 1775 return; 1776 tsk->sysvsem.undo_list = NULL; 1777 1778 if (!atomic_dec_and_test(&ulp->refcnt)) 1779 return; 1780 1781 for (;;) { 1782 struct sem_array *sma; 1783 struct sem_undo *un; 1784 struct list_head tasks; 1785 int semid, i; 1786 1787 rcu_read_lock(); 1788 un = list_entry_rcu(ulp->list_proc.next, 1789 struct sem_undo, list_proc); 1790 if (&un->list_proc == &ulp->list_proc) 1791 semid = -1; 1792 else 1793 semid = un->semid; 1794 1795 if (semid == -1) { 1796 rcu_read_unlock(); 1797 break; 1798 } 1799 1800 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); 1801 /* exit_sem raced with IPC_RMID, nothing to do */ 1802 if (IS_ERR(sma)) { 1803 rcu_read_unlock(); 1804 continue; 1805 } 1806 1807 sem_lock(sma, NULL, -1); 1808 un = __lookup_undo(ulp, semid); 1809 if (un == NULL) { 1810 /* exit_sem raced with IPC_RMID+semget() that created 1811 * exactly the same semid. Nothing to do. 1812 */ 1813 sem_unlock(sma, -1); 1814 rcu_read_unlock(); 1815 continue; 1816 } 1817 1818 /* remove un from the linked lists */ 1819 assert_spin_locked(&sma->sem_perm.lock); 1820 list_del(&un->list_id); 1821 1822 spin_lock(&ulp->lock); 1823 list_del_rcu(&un->list_proc); 1824 spin_unlock(&ulp->lock); 1825 1826 /* perform adjustments registered in un */ 1827 for (i = 0; i < sma->sem_nsems; i++) { 1828 struct sem * semaphore = &sma->sem_base[i]; 1829 if (un->semadj[i]) { 1830 semaphore->semval += un->semadj[i]; 1831 /* 1832 * Range checks of the new semaphore value, 1833 * not defined by sus: 1834 * - Some unices ignore the undo entirely 1835 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1836 * - some cap the value (e.g. FreeBSD caps 1837 * at 0, but doesn't enforce SEMVMX) 1838 * 1839 * Linux caps the semaphore value, both at 0 1840 * and at SEMVMX. 1841 * 1842 * Manfred <manfred@colorfullife.com> 1843 */ 1844 if (semaphore->semval < 0) 1845 semaphore->semval = 0; 1846 if (semaphore->semval > SEMVMX) 1847 semaphore->semval = SEMVMX; 1848 semaphore->sempid = task_tgid_vnr(current); 1849 } 1850 } 1851 /* maybe some queued-up processes were waiting for this */ 1852 INIT_LIST_HEAD(&tasks); 1853 do_smart_update(sma, NULL, 0, 1, &tasks); 1854 sem_unlock(sma, -1); 1855 rcu_read_unlock(); 1856 wake_up_sem_queue_do(&tasks); 1857 1858 kfree_rcu(un, rcu); 1859 } 1860 kfree(ulp); 1861 } 1862 1863 #ifdef CONFIG_PROC_FS 1864 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1865 { 1866 struct user_namespace *user_ns = seq_user_ns(s); 1867 struct sem_array *sma = it; 1868 1869 return seq_printf(s, 1870 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 1871 sma->sem_perm.key, 1872 sma->sem_perm.id, 1873 sma->sem_perm.mode, 1874 sma->sem_nsems, 1875 from_kuid_munged(user_ns, sma->sem_perm.uid), 1876 from_kgid_munged(user_ns, sma->sem_perm.gid), 1877 from_kuid_munged(user_ns, sma->sem_perm.cuid), 1878 from_kgid_munged(user_ns, sma->sem_perm.cgid), 1879 sma->sem_otime, 1880 sma->sem_ctime); 1881 } 1882 #endif 1883