1 /* 2 * linux/ipc/sem.c 3 * Copyright (C) 1992 Krishna Balasubramanian 4 * Copyright (C) 1995 Eric Schenk, Bruno Haible 5 * 6 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com> 7 * 8 * SMP-threaded, sysctl's added 9 * (c) 1999 Manfred Spraul <manfred@colorfullife.com> 10 * Enforced range limit on SEM_UNDO 11 * (c) 2001 Red Hat Inc 12 * Lockless wakeup 13 * (c) 2003 Manfred Spraul <manfred@colorfullife.com> 14 * Further wakeup optimizations, documentation 15 * (c) 2010 Manfred Spraul <manfred@colorfullife.com> 16 * 17 * support for audit of ipc object properties and permission changes 18 * Dustin Kirkland <dustin.kirkland@us.ibm.com> 19 * 20 * namespaces support 21 * OpenVZ, SWsoft Inc. 22 * Pavel Emelianov <xemul@openvz.org> 23 * 24 * Implementation notes: (May 2010) 25 * This file implements System V semaphores. 26 * 27 * User space visible behavior: 28 * - FIFO ordering for semop() operations (just FIFO, not starvation 29 * protection) 30 * - multiple semaphore operations that alter the same semaphore in 31 * one semop() are handled. 32 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and 33 * SETALL calls. 34 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO. 35 * - undo adjustments at process exit are limited to 0..SEMVMX. 36 * - namespace are supported. 37 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing 38 * to /proc/sys/kernel/sem. 39 * - statistics about the usage are reported in /proc/sysvipc/sem. 40 * 41 * Internals: 42 * - scalability: 43 * - all global variables are read-mostly. 44 * - semop() calls and semctl(RMID) are synchronized by RCU. 45 * - most operations do write operations (actually: spin_lock calls) to 46 * the per-semaphore array structure. 47 * Thus: Perfect SMP scaling between independent semaphore arrays. 48 * If multiple semaphores in one array are used, then cache line 49 * trashing on the semaphore array spinlock will limit the scaling. 50 * - semncnt and semzcnt are calculated on demand in count_semncnt() and 51 * count_semzcnt() 52 * - the task that performs a successful semop() scans the list of all 53 * sleeping tasks and completes any pending operations that can be fulfilled. 54 * Semaphores are actively given to waiting tasks (necessary for FIFO). 55 * (see update_queue()) 56 * - To improve the scalability, the actual wake-up calls are performed after 57 * dropping all locks. (see wake_up_sem_queue_prepare(), 58 * wake_up_sem_queue_do()) 59 * - All work is done by the waker, the woken up task does not have to do 60 * anything - not even acquiring a lock or dropping a refcount. 61 * - A woken up task may not even touch the semaphore array anymore, it may 62 * have been destroyed already by a semctl(RMID). 63 * - The synchronizations between wake-ups due to a timeout/signal and a 64 * wake-up due to a completed semaphore operation is achieved by using an 65 * intermediate state (IN_WAKEUP). 66 * - UNDO values are stored in an array (one per process and per 67 * semaphore array, lazily allocated). For backwards compatibility, multiple 68 * modes for the UNDO variables are supported (per process, per thread) 69 * (see copy_semundo, CLONE_SYSVSEM) 70 * - There are two lists of the pending operations: a per-array list 71 * and per-semaphore list (stored in the array). This allows to achieve FIFO 72 * ordering without always scanning all pending operations. 73 * The worst-case behavior is nevertheless O(N^2) for N wakeups. 74 */ 75 76 #include <linux/slab.h> 77 #include <linux/spinlock.h> 78 #include <linux/init.h> 79 #include <linux/proc_fs.h> 80 #include <linux/time.h> 81 #include <linux/security.h> 82 #include <linux/syscalls.h> 83 #include <linux/audit.h> 84 #include <linux/capability.h> 85 #include <linux/seq_file.h> 86 #include <linux/rwsem.h> 87 #include <linux/nsproxy.h> 88 #include <linux/ipc_namespace.h> 89 90 #include <asm/uaccess.h> 91 #include "util.h" 92 93 /* One semaphore structure for each semaphore in the system. */ 94 struct sem { 95 int semval; /* current value */ 96 int sempid; /* pid of last operation */ 97 struct list_head sem_pending; /* pending single-sop operations */ 98 }; 99 100 /* One queue for each sleeping process in the system. */ 101 struct sem_queue { 102 struct list_head simple_list; /* queue of pending operations */ 103 struct list_head list; /* queue of pending operations */ 104 struct task_struct *sleeper; /* this process */ 105 struct sem_undo *undo; /* undo structure */ 106 int pid; /* process id of requesting process */ 107 int status; /* completion status of operation */ 108 struct sembuf *sops; /* array of pending operations */ 109 int nsops; /* number of operations */ 110 int alter; /* does *sops alter the array? */ 111 }; 112 113 /* Each task has a list of undo requests. They are executed automatically 114 * when the process exits. 115 */ 116 struct sem_undo { 117 struct list_head list_proc; /* per-process list: * 118 * all undos from one process 119 * rcu protected */ 120 struct rcu_head rcu; /* rcu struct for sem_undo */ 121 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */ 122 struct list_head list_id; /* per semaphore array list: 123 * all undos for one array */ 124 int semid; /* semaphore set identifier */ 125 short *semadj; /* array of adjustments */ 126 /* one per semaphore */ 127 }; 128 129 /* sem_undo_list controls shared access to the list of sem_undo structures 130 * that may be shared among all a CLONE_SYSVSEM task group. 131 */ 132 struct sem_undo_list { 133 atomic_t refcnt; 134 spinlock_t lock; 135 struct list_head list_proc; 136 }; 137 138 139 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) 140 141 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) 142 #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) 143 144 static int newary(struct ipc_namespace *, struct ipc_params *); 145 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); 146 #ifdef CONFIG_PROC_FS 147 static int sysvipc_sem_proc_show(struct seq_file *s, void *it); 148 #endif 149 150 #define SEMMSL_FAST 256 /* 512 bytes on stack */ 151 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */ 152 153 /* 154 * linked list protection: 155 * sem_undo.id_next, 156 * sem_array.sem_pending{,last}, 157 * sem_array.sem_undo: sem_lock() for read/write 158 * sem_undo.proc_next: only "current" is allowed to read/write that field. 159 * 160 */ 161 162 #define sc_semmsl sem_ctls[0] 163 #define sc_semmns sem_ctls[1] 164 #define sc_semopm sem_ctls[2] 165 #define sc_semmni sem_ctls[3] 166 167 void sem_init_ns(struct ipc_namespace *ns) 168 { 169 ns->sc_semmsl = SEMMSL; 170 ns->sc_semmns = SEMMNS; 171 ns->sc_semopm = SEMOPM; 172 ns->sc_semmni = SEMMNI; 173 ns->used_sems = 0; 174 ipc_init_ids(&ns->ids[IPC_SEM_IDS]); 175 } 176 177 #ifdef CONFIG_IPC_NS 178 void sem_exit_ns(struct ipc_namespace *ns) 179 { 180 free_ipcs(ns, &sem_ids(ns), freeary); 181 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); 182 } 183 #endif 184 185 void __init sem_init (void) 186 { 187 sem_init_ns(&init_ipc_ns); 188 ipc_init_proc_interface("sysvipc/sem", 189 " key semid perms nsems uid gid cuid cgid otime ctime\n", 190 IPC_SEM_IDS, sysvipc_sem_proc_show); 191 } 192 193 /* 194 * sem_lock_(check_) routines are called in the paths where the rw_mutex 195 * is not held. 196 */ 197 static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) 198 { 199 struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); 200 201 if (IS_ERR(ipcp)) 202 return (struct sem_array *)ipcp; 203 204 return container_of(ipcp, struct sem_array, sem_perm); 205 } 206 207 static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, 208 int id) 209 { 210 struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); 211 212 if (IS_ERR(ipcp)) 213 return (struct sem_array *)ipcp; 214 215 return container_of(ipcp, struct sem_array, sem_perm); 216 } 217 218 static inline void sem_lock_and_putref(struct sem_array *sma) 219 { 220 ipc_lock_by_ptr(&sma->sem_perm); 221 ipc_rcu_putref(sma); 222 } 223 224 static inline void sem_getref_and_unlock(struct sem_array *sma) 225 { 226 ipc_rcu_getref(sma); 227 ipc_unlock(&(sma)->sem_perm); 228 } 229 230 static inline void sem_putref(struct sem_array *sma) 231 { 232 ipc_lock_by_ptr(&sma->sem_perm); 233 ipc_rcu_putref(sma); 234 ipc_unlock(&(sma)->sem_perm); 235 } 236 237 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 238 { 239 ipc_rmid(&sem_ids(ns), &s->sem_perm); 240 } 241 242 /* 243 * Lockless wakeup algorithm: 244 * Without the check/retry algorithm a lockless wakeup is possible: 245 * - queue.status is initialized to -EINTR before blocking. 246 * - wakeup is performed by 247 * * unlinking the queue entry from sma->sem_pending 248 * * setting queue.status to IN_WAKEUP 249 * This is the notification for the blocked thread that a 250 * result value is imminent. 251 * * call wake_up_process 252 * * set queue.status to the final value. 253 * - the previously blocked thread checks queue.status: 254 * * if it's IN_WAKEUP, then it must wait until the value changes 255 * * if it's not -EINTR, then the operation was completed by 256 * update_queue. semtimedop can return queue.status without 257 * performing any operation on the sem array. 258 * * otherwise it must acquire the spinlock and check what's up. 259 * 260 * The two-stage algorithm is necessary to protect against the following 261 * races: 262 * - if queue.status is set after wake_up_process, then the woken up idle 263 * thread could race forward and try (and fail) to acquire sma->lock 264 * before update_queue had a chance to set queue.status 265 * - if queue.status is written before wake_up_process and if the 266 * blocked process is woken up by a signal between writing 267 * queue.status and the wake_up_process, then the woken up 268 * process could return from semtimedop and die by calling 269 * sys_exit before wake_up_process is called. Then wake_up_process 270 * will oops, because the task structure is already invalid. 271 * (yes, this happened on s390 with sysv msg). 272 * 273 */ 274 #define IN_WAKEUP 1 275 276 /** 277 * newary - Create a new semaphore set 278 * @ns: namespace 279 * @params: ptr to the structure that contains key, semflg and nsems 280 * 281 * Called with sem_ids.rw_mutex held (as a writer) 282 */ 283 284 static int newary(struct ipc_namespace *ns, struct ipc_params *params) 285 { 286 int id; 287 int retval; 288 struct sem_array *sma; 289 int size; 290 key_t key = params->key; 291 int nsems = params->u.nsems; 292 int semflg = params->flg; 293 int i; 294 295 if (!nsems) 296 return -EINVAL; 297 if (ns->used_sems + nsems > ns->sc_semmns) 298 return -ENOSPC; 299 300 size = sizeof (*sma) + nsems * sizeof (struct sem); 301 sma = ipc_rcu_alloc(size); 302 if (!sma) { 303 return -ENOMEM; 304 } 305 memset (sma, 0, size); 306 307 sma->sem_perm.mode = (semflg & S_IRWXUGO); 308 sma->sem_perm.key = key; 309 310 sma->sem_perm.security = NULL; 311 retval = security_sem_alloc(sma); 312 if (retval) { 313 ipc_rcu_putref(sma); 314 return retval; 315 } 316 317 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 318 if (id < 0) { 319 security_sem_free(sma); 320 ipc_rcu_putref(sma); 321 return id; 322 } 323 ns->used_sems += nsems; 324 325 sma->sem_base = (struct sem *) &sma[1]; 326 327 for (i = 0; i < nsems; i++) 328 INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); 329 330 sma->complex_count = 0; 331 INIT_LIST_HEAD(&sma->sem_pending); 332 INIT_LIST_HEAD(&sma->list_id); 333 sma->sem_nsems = nsems; 334 sma->sem_ctime = get_seconds(); 335 sem_unlock(sma); 336 337 return sma->sem_perm.id; 338 } 339 340 341 /* 342 * Called with sem_ids.rw_mutex and ipcp locked. 343 */ 344 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) 345 { 346 struct sem_array *sma; 347 348 sma = container_of(ipcp, struct sem_array, sem_perm); 349 return security_sem_associate(sma, semflg); 350 } 351 352 /* 353 * Called with sem_ids.rw_mutex and ipcp locked. 354 */ 355 static inline int sem_more_checks(struct kern_ipc_perm *ipcp, 356 struct ipc_params *params) 357 { 358 struct sem_array *sma; 359 360 sma = container_of(ipcp, struct sem_array, sem_perm); 361 if (params->u.nsems > sma->sem_nsems) 362 return -EINVAL; 363 364 return 0; 365 } 366 367 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) 368 { 369 struct ipc_namespace *ns; 370 struct ipc_ops sem_ops; 371 struct ipc_params sem_params; 372 373 ns = current->nsproxy->ipc_ns; 374 375 if (nsems < 0 || nsems > ns->sc_semmsl) 376 return -EINVAL; 377 378 sem_ops.getnew = newary; 379 sem_ops.associate = sem_security; 380 sem_ops.more_checks = sem_more_checks; 381 382 sem_params.key = key; 383 sem_params.flg = semflg; 384 sem_params.u.nsems = nsems; 385 386 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); 387 } 388 389 /* 390 * Determine whether a sequence of semaphore operations would succeed 391 * all at once. Return 0 if yes, 1 if need to sleep, else return error code. 392 */ 393 394 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops, 395 int nsops, struct sem_undo *un, int pid) 396 { 397 int result, sem_op; 398 struct sembuf *sop; 399 struct sem * curr; 400 401 for (sop = sops; sop < sops + nsops; sop++) { 402 curr = sma->sem_base + sop->sem_num; 403 sem_op = sop->sem_op; 404 result = curr->semval; 405 406 if (!sem_op && result) 407 goto would_block; 408 409 result += sem_op; 410 if (result < 0) 411 goto would_block; 412 if (result > SEMVMX) 413 goto out_of_range; 414 if (sop->sem_flg & SEM_UNDO) { 415 int undo = un->semadj[sop->sem_num] - sem_op; 416 /* 417 * Exceeding the undo range is an error. 418 */ 419 if (undo < (-SEMAEM - 1) || undo > SEMAEM) 420 goto out_of_range; 421 } 422 curr->semval = result; 423 } 424 425 sop--; 426 while (sop >= sops) { 427 sma->sem_base[sop->sem_num].sempid = pid; 428 if (sop->sem_flg & SEM_UNDO) 429 un->semadj[sop->sem_num] -= sop->sem_op; 430 sop--; 431 } 432 433 return 0; 434 435 out_of_range: 436 result = -ERANGE; 437 goto undo; 438 439 would_block: 440 if (sop->sem_flg & IPC_NOWAIT) 441 result = -EAGAIN; 442 else 443 result = 1; 444 445 undo: 446 sop--; 447 while (sop >= sops) { 448 sma->sem_base[sop->sem_num].semval -= sop->sem_op; 449 sop--; 450 } 451 452 return result; 453 } 454 455 /** wake_up_sem_queue_prepare(q, error): Prepare wake-up 456 * @q: queue entry that must be signaled 457 * @error: Error value for the signal 458 * 459 * Prepare the wake-up of the queue entry q. 460 */ 461 static void wake_up_sem_queue_prepare(struct list_head *pt, 462 struct sem_queue *q, int error) 463 { 464 if (list_empty(pt)) { 465 /* 466 * Hold preempt off so that we don't get preempted and have the 467 * wakee busy-wait until we're scheduled back on. 468 */ 469 preempt_disable(); 470 } 471 q->status = IN_WAKEUP; 472 q->pid = error; 473 474 list_add_tail(&q->simple_list, pt); 475 } 476 477 /** 478 * wake_up_sem_queue_do(pt) - do the actual wake-up 479 * @pt: list of tasks to be woken up 480 * 481 * Do the actual wake-up. 482 * The function is called without any locks held, thus the semaphore array 483 * could be destroyed already and the tasks can disappear as soon as the 484 * status is set to the actual return code. 485 */ 486 static void wake_up_sem_queue_do(struct list_head *pt) 487 { 488 struct sem_queue *q, *t; 489 int did_something; 490 491 did_something = !list_empty(pt); 492 list_for_each_entry_safe(q, t, pt, simple_list) { 493 wake_up_process(q->sleeper); 494 /* q can disappear immediately after writing q->status. */ 495 smp_wmb(); 496 q->status = q->pid; 497 } 498 if (did_something) 499 preempt_enable(); 500 } 501 502 static void unlink_queue(struct sem_array *sma, struct sem_queue *q) 503 { 504 list_del(&q->list); 505 if (q->nsops == 1) 506 list_del(&q->simple_list); 507 else 508 sma->complex_count--; 509 } 510 511 /** check_restart(sma, q) 512 * @sma: semaphore array 513 * @q: the operation that just completed 514 * 515 * update_queue is O(N^2) when it restarts scanning the whole queue of 516 * waiting operations. Therefore this function checks if the restart is 517 * really necessary. It is called after a previously waiting operation 518 * was completed. 519 */ 520 static int check_restart(struct sem_array *sma, struct sem_queue *q) 521 { 522 struct sem *curr; 523 struct sem_queue *h; 524 525 /* if the operation didn't modify the array, then no restart */ 526 if (q->alter == 0) 527 return 0; 528 529 /* pending complex operations are too difficult to analyse */ 530 if (sma->complex_count) 531 return 1; 532 533 /* we were a sleeping complex operation. Too difficult */ 534 if (q->nsops > 1) 535 return 1; 536 537 curr = sma->sem_base + q->sops[0].sem_num; 538 539 /* No-one waits on this queue */ 540 if (list_empty(&curr->sem_pending)) 541 return 0; 542 543 /* the new semaphore value */ 544 if (curr->semval) { 545 /* It is impossible that someone waits for the new value: 546 * - q is a previously sleeping simple operation that 547 * altered the array. It must be a decrement, because 548 * simple increments never sleep. 549 * - The value is not 0, thus wait-for-zero won't proceed. 550 * - If there are older (higher priority) decrements 551 * in the queue, then they have observed the original 552 * semval value and couldn't proceed. The operation 553 * decremented to value - thus they won't proceed either. 554 */ 555 BUG_ON(q->sops[0].sem_op >= 0); 556 return 0; 557 } 558 /* 559 * semval is 0. Check if there are wait-for-zero semops. 560 * They must be the first entries in the per-semaphore simple queue 561 */ 562 h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list); 563 BUG_ON(h->nsops != 1); 564 BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); 565 566 /* Yes, there is a wait-for-zero semop. Restart */ 567 if (h->sops[0].sem_op == 0) 568 return 1; 569 570 /* Again - no-one is waiting for the new value. */ 571 return 0; 572 } 573 574 575 /** 576 * update_queue(sma, semnum): Look for tasks that can be completed. 577 * @sma: semaphore array. 578 * @semnum: semaphore that was modified. 579 * @pt: list head for the tasks that must be woken up. 580 * 581 * update_queue must be called after a semaphore in a semaphore array 582 * was modified. If multiple semaphore were modified, then @semnum 583 * must be set to -1. 584 * The tasks that must be woken up are added to @pt. The return code 585 * is stored in q->pid. 586 * The function return 1 if at least one semop was completed successfully. 587 */ 588 static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) 589 { 590 struct sem_queue *q; 591 struct list_head *walk; 592 struct list_head *pending_list; 593 int offset; 594 int semop_completed = 0; 595 596 /* if there are complex operations around, then knowing the semaphore 597 * that was modified doesn't help us. Assume that multiple semaphores 598 * were modified. 599 */ 600 if (sma->complex_count) 601 semnum = -1; 602 603 if (semnum == -1) { 604 pending_list = &sma->sem_pending; 605 offset = offsetof(struct sem_queue, list); 606 } else { 607 pending_list = &sma->sem_base[semnum].sem_pending; 608 offset = offsetof(struct sem_queue, simple_list); 609 } 610 611 again: 612 walk = pending_list->next; 613 while (walk != pending_list) { 614 int error, restart; 615 616 q = (struct sem_queue *)((char *)walk - offset); 617 walk = walk->next; 618 619 /* If we are scanning the single sop, per-semaphore list of 620 * one semaphore and that semaphore is 0, then it is not 621 * necessary to scan the "alter" entries: simple increments 622 * that affect only one entry succeed immediately and cannot 623 * be in the per semaphore pending queue, and decrements 624 * cannot be successful if the value is already 0. 625 */ 626 if (semnum != -1 && sma->sem_base[semnum].semval == 0 && 627 q->alter) 628 break; 629 630 error = try_atomic_semop(sma, q->sops, q->nsops, 631 q->undo, q->pid); 632 633 /* Does q->sleeper still need to sleep? */ 634 if (error > 0) 635 continue; 636 637 unlink_queue(sma, q); 638 639 if (error) { 640 restart = 0; 641 } else { 642 semop_completed = 1; 643 restart = check_restart(sma, q); 644 } 645 646 wake_up_sem_queue_prepare(pt, q, error); 647 if (restart) 648 goto again; 649 } 650 return semop_completed; 651 } 652 653 /** 654 * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue 655 * @sma: semaphore array 656 * @sops: operations that were performed 657 * @nsops: number of operations 658 * @otime: force setting otime 659 * @pt: list head of the tasks that must be woken up. 660 * 661 * do_smart_update() does the required called to update_queue, based on the 662 * actual changes that were performed on the semaphore array. 663 * Note that the function does not do the actual wake-up: the caller is 664 * responsible for calling wake_up_sem_queue_do(@pt). 665 * It is safe to perform this call after dropping all locks. 666 */ 667 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops, 668 int otime, struct list_head *pt) 669 { 670 int i; 671 672 if (sma->complex_count || sops == NULL) { 673 if (update_queue(sma, -1, pt)) 674 otime = 1; 675 goto done; 676 } 677 678 for (i = 0; i < nsops; i++) { 679 if (sops[i].sem_op > 0 || 680 (sops[i].sem_op < 0 && 681 sma->sem_base[sops[i].sem_num].semval == 0)) 682 if (update_queue(sma, sops[i].sem_num, pt)) 683 otime = 1; 684 } 685 done: 686 if (otime) 687 sma->sem_otime = get_seconds(); 688 } 689 690 691 /* The following counts are associated to each semaphore: 692 * semncnt number of tasks waiting on semval being nonzero 693 * semzcnt number of tasks waiting on semval being zero 694 * This model assumes that a task waits on exactly one semaphore. 695 * Since semaphore operations are to be performed atomically, tasks actually 696 * wait on a whole sequence of semaphores simultaneously. 697 * The counts we return here are a rough approximation, but still 698 * warrant that semncnt+semzcnt>0 if the task is on the pending queue. 699 */ 700 static int count_semncnt (struct sem_array * sma, ushort semnum) 701 { 702 int semncnt; 703 struct sem_queue * q; 704 705 semncnt = 0; 706 list_for_each_entry(q, &sma->sem_pending, list) { 707 struct sembuf * sops = q->sops; 708 int nsops = q->nsops; 709 int i; 710 for (i = 0; i < nsops; i++) 711 if (sops[i].sem_num == semnum 712 && (sops[i].sem_op < 0) 713 && !(sops[i].sem_flg & IPC_NOWAIT)) 714 semncnt++; 715 } 716 return semncnt; 717 } 718 719 static int count_semzcnt (struct sem_array * sma, ushort semnum) 720 { 721 int semzcnt; 722 struct sem_queue * q; 723 724 semzcnt = 0; 725 list_for_each_entry(q, &sma->sem_pending, list) { 726 struct sembuf * sops = q->sops; 727 int nsops = q->nsops; 728 int i; 729 for (i = 0; i < nsops; i++) 730 if (sops[i].sem_num == semnum 731 && (sops[i].sem_op == 0) 732 && !(sops[i].sem_flg & IPC_NOWAIT)) 733 semzcnt++; 734 } 735 return semzcnt; 736 } 737 738 /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked 739 * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex 740 * remains locked on exit. 741 */ 742 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) 743 { 744 struct sem_undo *un, *tu; 745 struct sem_queue *q, *tq; 746 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); 747 struct list_head tasks; 748 749 /* Free the existing undo structures for this semaphore set. */ 750 assert_spin_locked(&sma->sem_perm.lock); 751 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { 752 list_del(&un->list_id); 753 spin_lock(&un->ulp->lock); 754 un->semid = -1; 755 list_del_rcu(&un->list_proc); 756 spin_unlock(&un->ulp->lock); 757 kfree_rcu(un, rcu); 758 } 759 760 /* Wake up all pending processes and let them fail with EIDRM. */ 761 INIT_LIST_HEAD(&tasks); 762 list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { 763 unlink_queue(sma, q); 764 wake_up_sem_queue_prepare(&tasks, q, -EIDRM); 765 } 766 767 /* Remove the semaphore set from the IDR */ 768 sem_rmid(ns, sma); 769 sem_unlock(sma); 770 771 wake_up_sem_queue_do(&tasks); 772 ns->used_sems -= sma->sem_nsems; 773 security_sem_free(sma); 774 ipc_rcu_putref(sma); 775 } 776 777 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 778 { 779 switch(version) { 780 case IPC_64: 781 return copy_to_user(buf, in, sizeof(*in)); 782 case IPC_OLD: 783 { 784 struct semid_ds out; 785 786 memset(&out, 0, sizeof(out)); 787 788 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); 789 790 out.sem_otime = in->sem_otime; 791 out.sem_ctime = in->sem_ctime; 792 out.sem_nsems = in->sem_nsems; 793 794 return copy_to_user(buf, &out, sizeof(out)); 795 } 796 default: 797 return -EINVAL; 798 } 799 } 800 801 static int semctl_nolock(struct ipc_namespace *ns, int semid, 802 int cmd, int version, union semun arg) 803 { 804 int err; 805 struct sem_array *sma; 806 807 switch(cmd) { 808 case IPC_INFO: 809 case SEM_INFO: 810 { 811 struct seminfo seminfo; 812 int max_id; 813 814 err = security_sem_semctl(NULL, cmd); 815 if (err) 816 return err; 817 818 memset(&seminfo,0,sizeof(seminfo)); 819 seminfo.semmni = ns->sc_semmni; 820 seminfo.semmns = ns->sc_semmns; 821 seminfo.semmsl = ns->sc_semmsl; 822 seminfo.semopm = ns->sc_semopm; 823 seminfo.semvmx = SEMVMX; 824 seminfo.semmnu = SEMMNU; 825 seminfo.semmap = SEMMAP; 826 seminfo.semume = SEMUME; 827 down_read(&sem_ids(ns).rw_mutex); 828 if (cmd == SEM_INFO) { 829 seminfo.semusz = sem_ids(ns).in_use; 830 seminfo.semaem = ns->used_sems; 831 } else { 832 seminfo.semusz = SEMUSZ; 833 seminfo.semaem = SEMAEM; 834 } 835 max_id = ipc_get_maxid(&sem_ids(ns)); 836 up_read(&sem_ids(ns).rw_mutex); 837 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 838 return -EFAULT; 839 return (max_id < 0) ? 0: max_id; 840 } 841 case IPC_STAT: 842 case SEM_STAT: 843 { 844 struct semid64_ds tbuf; 845 int id; 846 847 if (cmd == SEM_STAT) { 848 sma = sem_lock(ns, semid); 849 if (IS_ERR(sma)) 850 return PTR_ERR(sma); 851 id = sma->sem_perm.id; 852 } else { 853 sma = sem_lock_check(ns, semid); 854 if (IS_ERR(sma)) 855 return PTR_ERR(sma); 856 id = 0; 857 } 858 859 err = -EACCES; 860 if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) 861 goto out_unlock; 862 863 err = security_sem_semctl(sma, cmd); 864 if (err) 865 goto out_unlock; 866 867 memset(&tbuf, 0, sizeof(tbuf)); 868 869 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); 870 tbuf.sem_otime = sma->sem_otime; 871 tbuf.sem_ctime = sma->sem_ctime; 872 tbuf.sem_nsems = sma->sem_nsems; 873 sem_unlock(sma); 874 if (copy_semid_to_user (arg.buf, &tbuf, version)) 875 return -EFAULT; 876 return id; 877 } 878 default: 879 return -EINVAL; 880 } 881 out_unlock: 882 sem_unlock(sma); 883 return err; 884 } 885 886 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, 887 int cmd, int version, union semun arg) 888 { 889 struct sem_array *sma; 890 struct sem* curr; 891 int err; 892 ushort fast_sem_io[SEMMSL_FAST]; 893 ushort* sem_io = fast_sem_io; 894 int nsems; 895 struct list_head tasks; 896 897 sma = sem_lock_check(ns, semid); 898 if (IS_ERR(sma)) 899 return PTR_ERR(sma); 900 901 INIT_LIST_HEAD(&tasks); 902 nsems = sma->sem_nsems; 903 904 err = -EACCES; 905 if (ipcperms(ns, &sma->sem_perm, 906 (cmd == SETVAL || cmd == SETALL) ? S_IWUGO : S_IRUGO)) 907 goto out_unlock; 908 909 err = security_sem_semctl(sma, cmd); 910 if (err) 911 goto out_unlock; 912 913 err = -EACCES; 914 switch (cmd) { 915 case GETALL: 916 { 917 ushort __user *array = arg.array; 918 int i; 919 920 if(nsems > SEMMSL_FAST) { 921 sem_getref_and_unlock(sma); 922 923 sem_io = ipc_alloc(sizeof(ushort)*nsems); 924 if(sem_io == NULL) { 925 sem_putref(sma); 926 return -ENOMEM; 927 } 928 929 sem_lock_and_putref(sma); 930 if (sma->sem_perm.deleted) { 931 sem_unlock(sma); 932 err = -EIDRM; 933 goto out_free; 934 } 935 } 936 937 for (i = 0; i < sma->sem_nsems; i++) 938 sem_io[i] = sma->sem_base[i].semval; 939 sem_unlock(sma); 940 err = 0; 941 if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) 942 err = -EFAULT; 943 goto out_free; 944 } 945 case SETALL: 946 { 947 int i; 948 struct sem_undo *un; 949 950 sem_getref_and_unlock(sma); 951 952 if(nsems > SEMMSL_FAST) { 953 sem_io = ipc_alloc(sizeof(ushort)*nsems); 954 if(sem_io == NULL) { 955 sem_putref(sma); 956 return -ENOMEM; 957 } 958 } 959 960 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) { 961 sem_putref(sma); 962 err = -EFAULT; 963 goto out_free; 964 } 965 966 for (i = 0; i < nsems; i++) { 967 if (sem_io[i] > SEMVMX) { 968 sem_putref(sma); 969 err = -ERANGE; 970 goto out_free; 971 } 972 } 973 sem_lock_and_putref(sma); 974 if (sma->sem_perm.deleted) { 975 sem_unlock(sma); 976 err = -EIDRM; 977 goto out_free; 978 } 979 980 for (i = 0; i < nsems; i++) 981 sma->sem_base[i].semval = sem_io[i]; 982 983 assert_spin_locked(&sma->sem_perm.lock); 984 list_for_each_entry(un, &sma->list_id, list_id) { 985 for (i = 0; i < nsems; i++) 986 un->semadj[i] = 0; 987 } 988 sma->sem_ctime = get_seconds(); 989 /* maybe some queued-up processes were waiting for this */ 990 do_smart_update(sma, NULL, 0, 0, &tasks); 991 err = 0; 992 goto out_unlock; 993 } 994 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */ 995 } 996 err = -EINVAL; 997 if(semnum < 0 || semnum >= nsems) 998 goto out_unlock; 999 1000 curr = &sma->sem_base[semnum]; 1001 1002 switch (cmd) { 1003 case GETVAL: 1004 err = curr->semval; 1005 goto out_unlock; 1006 case GETPID: 1007 err = curr->sempid; 1008 goto out_unlock; 1009 case GETNCNT: 1010 err = count_semncnt(sma,semnum); 1011 goto out_unlock; 1012 case GETZCNT: 1013 err = count_semzcnt(sma,semnum); 1014 goto out_unlock; 1015 case SETVAL: 1016 { 1017 int val = arg.val; 1018 struct sem_undo *un; 1019 1020 err = -ERANGE; 1021 if (val > SEMVMX || val < 0) 1022 goto out_unlock; 1023 1024 assert_spin_locked(&sma->sem_perm.lock); 1025 list_for_each_entry(un, &sma->list_id, list_id) 1026 un->semadj[semnum] = 0; 1027 1028 curr->semval = val; 1029 curr->sempid = task_tgid_vnr(current); 1030 sma->sem_ctime = get_seconds(); 1031 /* maybe some queued-up processes were waiting for this */ 1032 do_smart_update(sma, NULL, 0, 0, &tasks); 1033 err = 0; 1034 goto out_unlock; 1035 } 1036 } 1037 out_unlock: 1038 sem_unlock(sma); 1039 wake_up_sem_queue_do(&tasks); 1040 1041 out_free: 1042 if(sem_io != fast_sem_io) 1043 ipc_free(sem_io, sizeof(ushort)*nsems); 1044 return err; 1045 } 1046 1047 static inline unsigned long 1048 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version) 1049 { 1050 switch(version) { 1051 case IPC_64: 1052 if (copy_from_user(out, buf, sizeof(*out))) 1053 return -EFAULT; 1054 return 0; 1055 case IPC_OLD: 1056 { 1057 struct semid_ds tbuf_old; 1058 1059 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old))) 1060 return -EFAULT; 1061 1062 out->sem_perm.uid = tbuf_old.sem_perm.uid; 1063 out->sem_perm.gid = tbuf_old.sem_perm.gid; 1064 out->sem_perm.mode = tbuf_old.sem_perm.mode; 1065 1066 return 0; 1067 } 1068 default: 1069 return -EINVAL; 1070 } 1071 } 1072 1073 /* 1074 * This function handles some semctl commands which require the rw_mutex 1075 * to be held in write mode. 1076 * NOTE: no locks must be held, the rw_mutex is taken inside this function. 1077 */ 1078 static int semctl_down(struct ipc_namespace *ns, int semid, 1079 int cmd, int version, union semun arg) 1080 { 1081 struct sem_array *sma; 1082 int err; 1083 struct semid64_ds semid64; 1084 struct kern_ipc_perm *ipcp; 1085 1086 if(cmd == IPC_SET) { 1087 if (copy_semid_from_user(&semid64, arg.buf, version)) 1088 return -EFAULT; 1089 } 1090 1091 ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, 1092 &semid64.sem_perm, 0); 1093 if (IS_ERR(ipcp)) 1094 return PTR_ERR(ipcp); 1095 1096 sma = container_of(ipcp, struct sem_array, sem_perm); 1097 1098 err = security_sem_semctl(sma, cmd); 1099 if (err) 1100 goto out_unlock; 1101 1102 switch(cmd){ 1103 case IPC_RMID: 1104 freeary(ns, ipcp); 1105 goto out_up; 1106 case IPC_SET: 1107 err = ipc_update_perm(&semid64.sem_perm, ipcp); 1108 if (err) 1109 goto out_unlock; 1110 sma->sem_ctime = get_seconds(); 1111 break; 1112 default: 1113 err = -EINVAL; 1114 } 1115 1116 out_unlock: 1117 sem_unlock(sma); 1118 out_up: 1119 up_write(&sem_ids(ns).rw_mutex); 1120 return err; 1121 } 1122 1123 SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg) 1124 { 1125 int err = -EINVAL; 1126 int version; 1127 struct ipc_namespace *ns; 1128 1129 if (semid < 0) 1130 return -EINVAL; 1131 1132 version = ipc_parse_version(&cmd); 1133 ns = current->nsproxy->ipc_ns; 1134 1135 switch(cmd) { 1136 case IPC_INFO: 1137 case SEM_INFO: 1138 case IPC_STAT: 1139 case SEM_STAT: 1140 err = semctl_nolock(ns, semid, cmd, version, arg); 1141 return err; 1142 case GETALL: 1143 case GETVAL: 1144 case GETPID: 1145 case GETNCNT: 1146 case GETZCNT: 1147 case SETVAL: 1148 case SETALL: 1149 err = semctl_main(ns,semid,semnum,cmd,version,arg); 1150 return err; 1151 case IPC_RMID: 1152 case IPC_SET: 1153 err = semctl_down(ns, semid, cmd, version, arg); 1154 return err; 1155 default: 1156 return -EINVAL; 1157 } 1158 } 1159 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 1160 asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg) 1161 { 1162 return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg); 1163 } 1164 SYSCALL_ALIAS(sys_semctl, SyS_semctl); 1165 #endif 1166 1167 /* If the task doesn't already have a undo_list, then allocate one 1168 * here. We guarantee there is only one thread using this undo list, 1169 * and current is THE ONE 1170 * 1171 * If this allocation and assignment succeeds, but later 1172 * portions of this code fail, there is no need to free the sem_undo_list. 1173 * Just let it stay associated with the task, and it'll be freed later 1174 * at exit time. 1175 * 1176 * This can block, so callers must hold no locks. 1177 */ 1178 static inline int get_undo_list(struct sem_undo_list **undo_listp) 1179 { 1180 struct sem_undo_list *undo_list; 1181 1182 undo_list = current->sysvsem.undo_list; 1183 if (!undo_list) { 1184 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); 1185 if (undo_list == NULL) 1186 return -ENOMEM; 1187 spin_lock_init(&undo_list->lock); 1188 atomic_set(&undo_list->refcnt, 1); 1189 INIT_LIST_HEAD(&undo_list->list_proc); 1190 1191 current->sysvsem.undo_list = undo_list; 1192 } 1193 *undo_listp = undo_list; 1194 return 0; 1195 } 1196 1197 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid) 1198 { 1199 struct sem_undo *un; 1200 1201 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) { 1202 if (un->semid == semid) 1203 return un; 1204 } 1205 return NULL; 1206 } 1207 1208 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) 1209 { 1210 struct sem_undo *un; 1211 1212 assert_spin_locked(&ulp->lock); 1213 1214 un = __lookup_undo(ulp, semid); 1215 if (un) { 1216 list_del_rcu(&un->list_proc); 1217 list_add_rcu(&un->list_proc, &ulp->list_proc); 1218 } 1219 return un; 1220 } 1221 1222 /** 1223 * find_alloc_undo - Lookup (and if not present create) undo array 1224 * @ns: namespace 1225 * @semid: semaphore array id 1226 * 1227 * The function looks up (and if not present creates) the undo structure. 1228 * The size of the undo structure depends on the size of the semaphore 1229 * array, thus the alloc path is not that straightforward. 1230 * Lifetime-rules: sem_undo is rcu-protected, on success, the function 1231 * performs a rcu_read_lock(). 1232 */ 1233 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) 1234 { 1235 struct sem_array *sma; 1236 struct sem_undo_list *ulp; 1237 struct sem_undo *un, *new; 1238 int nsems; 1239 int error; 1240 1241 error = get_undo_list(&ulp); 1242 if (error) 1243 return ERR_PTR(error); 1244 1245 rcu_read_lock(); 1246 spin_lock(&ulp->lock); 1247 un = lookup_undo(ulp, semid); 1248 spin_unlock(&ulp->lock); 1249 if (likely(un!=NULL)) 1250 goto out; 1251 rcu_read_unlock(); 1252 1253 /* no undo structure around - allocate one. */ 1254 /* step 1: figure out the size of the semaphore array */ 1255 sma = sem_lock_check(ns, semid); 1256 if (IS_ERR(sma)) 1257 return ERR_CAST(sma); 1258 1259 nsems = sma->sem_nsems; 1260 sem_getref_and_unlock(sma); 1261 1262 /* step 2: allocate new undo structure */ 1263 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1264 if (!new) { 1265 sem_putref(sma); 1266 return ERR_PTR(-ENOMEM); 1267 } 1268 1269 /* step 3: Acquire the lock on semaphore array */ 1270 sem_lock_and_putref(sma); 1271 if (sma->sem_perm.deleted) { 1272 sem_unlock(sma); 1273 kfree(new); 1274 un = ERR_PTR(-EIDRM); 1275 goto out; 1276 } 1277 spin_lock(&ulp->lock); 1278 1279 /* 1280 * step 4: check for races: did someone else allocate the undo struct? 1281 */ 1282 un = lookup_undo(ulp, semid); 1283 if (un) { 1284 kfree(new); 1285 goto success; 1286 } 1287 /* step 5: initialize & link new undo structure */ 1288 new->semadj = (short *) &new[1]; 1289 new->ulp = ulp; 1290 new->semid = semid; 1291 assert_spin_locked(&ulp->lock); 1292 list_add_rcu(&new->list_proc, &ulp->list_proc); 1293 assert_spin_locked(&sma->sem_perm.lock); 1294 list_add(&new->list_id, &sma->list_id); 1295 un = new; 1296 1297 success: 1298 spin_unlock(&ulp->lock); 1299 rcu_read_lock(); 1300 sem_unlock(sma); 1301 out: 1302 return un; 1303 } 1304 1305 1306 /** 1307 * get_queue_result - Retrieve the result code from sem_queue 1308 * @q: Pointer to queue structure 1309 * 1310 * Retrieve the return code from the pending queue. If IN_WAKEUP is found in 1311 * q->status, then we must loop until the value is replaced with the final 1312 * value: This may happen if a task is woken up by an unrelated event (e.g. 1313 * signal) and in parallel the task is woken up by another task because it got 1314 * the requested semaphores. 1315 * 1316 * The function can be called with or without holding the semaphore spinlock. 1317 */ 1318 static int get_queue_result(struct sem_queue *q) 1319 { 1320 int error; 1321 1322 error = q->status; 1323 while (unlikely(error == IN_WAKEUP)) { 1324 cpu_relax(); 1325 error = q->status; 1326 } 1327 1328 return error; 1329 } 1330 1331 1332 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, 1333 unsigned, nsops, const struct timespec __user *, timeout) 1334 { 1335 int error = -EINVAL; 1336 struct sem_array *sma; 1337 struct sembuf fast_sops[SEMOPM_FAST]; 1338 struct sembuf* sops = fast_sops, *sop; 1339 struct sem_undo *un; 1340 int undos = 0, alter = 0, max; 1341 struct sem_queue queue; 1342 unsigned long jiffies_left = 0; 1343 struct ipc_namespace *ns; 1344 struct list_head tasks; 1345 1346 ns = current->nsproxy->ipc_ns; 1347 1348 if (nsops < 1 || semid < 0) 1349 return -EINVAL; 1350 if (nsops > ns->sc_semopm) 1351 return -E2BIG; 1352 if(nsops > SEMOPM_FAST) { 1353 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL); 1354 if(sops==NULL) 1355 return -ENOMEM; 1356 } 1357 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) { 1358 error=-EFAULT; 1359 goto out_free; 1360 } 1361 if (timeout) { 1362 struct timespec _timeout; 1363 if (copy_from_user(&_timeout, timeout, sizeof(*timeout))) { 1364 error = -EFAULT; 1365 goto out_free; 1366 } 1367 if (_timeout.tv_sec < 0 || _timeout.tv_nsec < 0 || 1368 _timeout.tv_nsec >= 1000000000L) { 1369 error = -EINVAL; 1370 goto out_free; 1371 } 1372 jiffies_left = timespec_to_jiffies(&_timeout); 1373 } 1374 max = 0; 1375 for (sop = sops; sop < sops + nsops; sop++) { 1376 if (sop->sem_num >= max) 1377 max = sop->sem_num; 1378 if (sop->sem_flg & SEM_UNDO) 1379 undos = 1; 1380 if (sop->sem_op != 0) 1381 alter = 1; 1382 } 1383 1384 if (undos) { 1385 un = find_alloc_undo(ns, semid); 1386 if (IS_ERR(un)) { 1387 error = PTR_ERR(un); 1388 goto out_free; 1389 } 1390 } else 1391 un = NULL; 1392 1393 INIT_LIST_HEAD(&tasks); 1394 1395 sma = sem_lock_check(ns, semid); 1396 if (IS_ERR(sma)) { 1397 if (un) 1398 rcu_read_unlock(); 1399 error = PTR_ERR(sma); 1400 goto out_free; 1401 } 1402 1403 /* 1404 * semid identifiers are not unique - find_alloc_undo may have 1405 * allocated an undo structure, it was invalidated by an RMID 1406 * and now a new array with received the same id. Check and fail. 1407 * This case can be detected checking un->semid. The existence of 1408 * "un" itself is guaranteed by rcu. 1409 */ 1410 error = -EIDRM; 1411 if (un) { 1412 if (un->semid == -1) { 1413 rcu_read_unlock(); 1414 goto out_unlock_free; 1415 } else { 1416 /* 1417 * rcu lock can be released, "un" cannot disappear: 1418 * - sem_lock is acquired, thus IPC_RMID is 1419 * impossible. 1420 * - exit_sem is impossible, it always operates on 1421 * current (or a dead task). 1422 */ 1423 1424 rcu_read_unlock(); 1425 } 1426 } 1427 1428 error = -EFBIG; 1429 if (max >= sma->sem_nsems) 1430 goto out_unlock_free; 1431 1432 error = -EACCES; 1433 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) 1434 goto out_unlock_free; 1435 1436 error = security_sem_semop(sma, sops, nsops, alter); 1437 if (error) 1438 goto out_unlock_free; 1439 1440 error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); 1441 if (error <= 0) { 1442 if (alter && error == 0) 1443 do_smart_update(sma, sops, nsops, 1, &tasks); 1444 1445 goto out_unlock_free; 1446 } 1447 1448 /* We need to sleep on this operation, so we put the current 1449 * task into the pending queue and go to sleep. 1450 */ 1451 1452 queue.sops = sops; 1453 queue.nsops = nsops; 1454 queue.undo = un; 1455 queue.pid = task_tgid_vnr(current); 1456 queue.alter = alter; 1457 if (alter) 1458 list_add_tail(&queue.list, &sma->sem_pending); 1459 else 1460 list_add(&queue.list, &sma->sem_pending); 1461 1462 if (nsops == 1) { 1463 struct sem *curr; 1464 curr = &sma->sem_base[sops->sem_num]; 1465 1466 if (alter) 1467 list_add_tail(&queue.simple_list, &curr->sem_pending); 1468 else 1469 list_add(&queue.simple_list, &curr->sem_pending); 1470 } else { 1471 INIT_LIST_HEAD(&queue.simple_list); 1472 sma->complex_count++; 1473 } 1474 1475 queue.status = -EINTR; 1476 queue.sleeper = current; 1477 1478 sleep_again: 1479 current->state = TASK_INTERRUPTIBLE; 1480 sem_unlock(sma); 1481 1482 if (timeout) 1483 jiffies_left = schedule_timeout(jiffies_left); 1484 else 1485 schedule(); 1486 1487 error = get_queue_result(&queue); 1488 1489 if (error != -EINTR) { 1490 /* fast path: update_queue already obtained all requested 1491 * resources. 1492 * Perform a smp_mb(): User space could assume that semop() 1493 * is a memory barrier: Without the mb(), the cpu could 1494 * speculatively read in user space stale data that was 1495 * overwritten by the previous owner of the semaphore. 1496 */ 1497 smp_mb(); 1498 1499 goto out_free; 1500 } 1501 1502 sma = sem_lock(ns, semid); 1503 1504 /* 1505 * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. 1506 */ 1507 error = get_queue_result(&queue); 1508 1509 /* 1510 * Array removed? If yes, leave without sem_unlock(). 1511 */ 1512 if (IS_ERR(sma)) { 1513 goto out_free; 1514 } 1515 1516 1517 /* 1518 * If queue.status != -EINTR we are woken up by another process. 1519 * Leave without unlink_queue(), but with sem_unlock(). 1520 */ 1521 1522 if (error != -EINTR) { 1523 goto out_unlock_free; 1524 } 1525 1526 /* 1527 * If an interrupt occurred we have to clean up the queue 1528 */ 1529 if (timeout && jiffies_left == 0) 1530 error = -EAGAIN; 1531 1532 /* 1533 * If the wakeup was spurious, just retry 1534 */ 1535 if (error == -EINTR && !signal_pending(current)) 1536 goto sleep_again; 1537 1538 unlink_queue(sma, &queue); 1539 1540 out_unlock_free: 1541 sem_unlock(sma); 1542 1543 wake_up_sem_queue_do(&tasks); 1544 out_free: 1545 if(sops != fast_sops) 1546 kfree(sops); 1547 return error; 1548 } 1549 1550 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, 1551 unsigned, nsops) 1552 { 1553 return sys_semtimedop(semid, tsops, nsops, NULL); 1554 } 1555 1556 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between 1557 * parent and child tasks. 1558 */ 1559 1560 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) 1561 { 1562 struct sem_undo_list *undo_list; 1563 int error; 1564 1565 if (clone_flags & CLONE_SYSVSEM) { 1566 error = get_undo_list(&undo_list); 1567 if (error) 1568 return error; 1569 atomic_inc(&undo_list->refcnt); 1570 tsk->sysvsem.undo_list = undo_list; 1571 } else 1572 tsk->sysvsem.undo_list = NULL; 1573 1574 return 0; 1575 } 1576 1577 /* 1578 * add semadj values to semaphores, free undo structures. 1579 * undo structures are not freed when semaphore arrays are destroyed 1580 * so some of them may be out of date. 1581 * IMPLEMENTATION NOTE: There is some confusion over whether the 1582 * set of adjustments that needs to be done should be done in an atomic 1583 * manner or not. That is, if we are attempting to decrement the semval 1584 * should we queue up and wait until we can do so legally? 1585 * The original implementation attempted to do this (queue and wait). 1586 * The current implementation does not do so. The POSIX standard 1587 * and SVID should be consulted to determine what behavior is mandated. 1588 */ 1589 void exit_sem(struct task_struct *tsk) 1590 { 1591 struct sem_undo_list *ulp; 1592 1593 ulp = tsk->sysvsem.undo_list; 1594 if (!ulp) 1595 return; 1596 tsk->sysvsem.undo_list = NULL; 1597 1598 if (!atomic_dec_and_test(&ulp->refcnt)) 1599 return; 1600 1601 for (;;) { 1602 struct sem_array *sma; 1603 struct sem_undo *un; 1604 struct list_head tasks; 1605 int semid; 1606 int i; 1607 1608 rcu_read_lock(); 1609 un = list_entry_rcu(ulp->list_proc.next, 1610 struct sem_undo, list_proc); 1611 if (&un->list_proc == &ulp->list_proc) 1612 semid = -1; 1613 else 1614 semid = un->semid; 1615 rcu_read_unlock(); 1616 1617 if (semid == -1) 1618 break; 1619 1620 sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); 1621 1622 /* exit_sem raced with IPC_RMID, nothing to do */ 1623 if (IS_ERR(sma)) 1624 continue; 1625 1626 un = __lookup_undo(ulp, semid); 1627 if (un == NULL) { 1628 /* exit_sem raced with IPC_RMID+semget() that created 1629 * exactly the same semid. Nothing to do. 1630 */ 1631 sem_unlock(sma); 1632 continue; 1633 } 1634 1635 /* remove un from the linked lists */ 1636 assert_spin_locked(&sma->sem_perm.lock); 1637 list_del(&un->list_id); 1638 1639 spin_lock(&ulp->lock); 1640 list_del_rcu(&un->list_proc); 1641 spin_unlock(&ulp->lock); 1642 1643 /* perform adjustments registered in un */ 1644 for (i = 0; i < sma->sem_nsems; i++) { 1645 struct sem * semaphore = &sma->sem_base[i]; 1646 if (un->semadj[i]) { 1647 semaphore->semval += un->semadj[i]; 1648 /* 1649 * Range checks of the new semaphore value, 1650 * not defined by sus: 1651 * - Some unices ignore the undo entirely 1652 * (e.g. HP UX 11i 11.22, Tru64 V5.1) 1653 * - some cap the value (e.g. FreeBSD caps 1654 * at 0, but doesn't enforce SEMVMX) 1655 * 1656 * Linux caps the semaphore value, both at 0 1657 * and at SEMVMX. 1658 * 1659 * Manfred <manfred@colorfullife.com> 1660 */ 1661 if (semaphore->semval < 0) 1662 semaphore->semval = 0; 1663 if (semaphore->semval > SEMVMX) 1664 semaphore->semval = SEMVMX; 1665 semaphore->sempid = task_tgid_vnr(current); 1666 } 1667 } 1668 /* maybe some queued-up processes were waiting for this */ 1669 INIT_LIST_HEAD(&tasks); 1670 do_smart_update(sma, NULL, 0, 1, &tasks); 1671 sem_unlock(sma); 1672 wake_up_sem_queue_do(&tasks); 1673 1674 kfree_rcu(un, rcu); 1675 } 1676 kfree(ulp); 1677 } 1678 1679 #ifdef CONFIG_PROC_FS 1680 static int sysvipc_sem_proc_show(struct seq_file *s, void *it) 1681 { 1682 struct user_namespace *user_ns = seq_user_ns(s); 1683 struct sem_array *sma = it; 1684 1685 return seq_printf(s, 1686 "%10d %10d %4o %10u %5u %5u %5u %5u %10lu %10lu\n", 1687 sma->sem_perm.key, 1688 sma->sem_perm.id, 1689 sma->sem_perm.mode, 1690 sma->sem_nsems, 1691 from_kuid_munged(user_ns, sma->sem_perm.uid), 1692 from_kgid_munged(user_ns, sma->sem_perm.gid), 1693 from_kuid_munged(user_ns, sma->sem_perm.cuid), 1694 from_kgid_munged(user_ns, sma->sem_perm.cgid), 1695 sma->sem_otime, 1696 sma->sem_ctime); 1697 } 1698 #endif 1699