1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/module.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/smp_lock.h> 12 #include <linux/notifier.h> 13 #include <linux/reboot.h> 14 #include <linux/prctl.h> 15 #include <linux/highuid.h> 16 #include <linux/fs.h> 17 #include <linux/kernel.h> 18 #include <linux/kexec.h> 19 #include <linux/workqueue.h> 20 #include <linux/capability.h> 21 #include <linux/device.h> 22 #include <linux/key.h> 23 #include <linux/times.h> 24 #include <linux/posix-timers.h> 25 #include <linux/security.h> 26 #include <linux/dcookies.h> 27 #include <linux/suspend.h> 28 #include <linux/tty.h> 29 #include <linux/signal.h> 30 #include <linux/cn_proc.h> 31 #include <linux/getcpu.h> 32 33 #include <linux/compat.h> 34 #include <linux/syscalls.h> 35 #include <linux/kprobes.h> 36 37 #include <asm/uaccess.h> 38 #include <asm/io.h> 39 #include <asm/unistd.h> 40 41 #ifndef SET_UNALIGN_CTL 42 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 43 #endif 44 #ifndef GET_UNALIGN_CTL 45 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 46 #endif 47 #ifndef SET_FPEMU_CTL 48 # define SET_FPEMU_CTL(a,b) (-EINVAL) 49 #endif 50 #ifndef GET_FPEMU_CTL 51 # define GET_FPEMU_CTL(a,b) (-EINVAL) 52 #endif 53 #ifndef SET_FPEXC_CTL 54 # define SET_FPEXC_CTL(a,b) (-EINVAL) 55 #endif 56 #ifndef GET_FPEXC_CTL 57 # define GET_FPEXC_CTL(a,b) (-EINVAL) 58 #endif 59 #ifndef GET_ENDIAN 60 # define GET_ENDIAN(a,b) (-EINVAL) 61 #endif 62 #ifndef SET_ENDIAN 63 # define SET_ENDIAN(a,b) (-EINVAL) 64 #endif 65 66 /* 67 * this is where the system-wide overflow UID and GID are defined, for 68 * architectures that now have 32-bit UID/GID but didn't in the past 69 */ 70 71 int overflowuid = DEFAULT_OVERFLOWUID; 72 int overflowgid = DEFAULT_OVERFLOWGID; 73 74 #ifdef CONFIG_UID16 75 EXPORT_SYMBOL(overflowuid); 76 EXPORT_SYMBOL(overflowgid); 77 #endif 78 79 /* 80 * the same as above, but for filesystems which can only store a 16-bit 81 * UID and GID. as such, this is needed on all architectures 82 */ 83 84 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 85 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 86 87 EXPORT_SYMBOL(fs_overflowuid); 88 EXPORT_SYMBOL(fs_overflowgid); 89 90 /* 91 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 92 */ 93 94 int C_A_D = 1; 95 struct pid *cad_pid; 96 EXPORT_SYMBOL(cad_pid); 97 98 /* 99 * Notifier list for kernel code which wants to be called 100 * at shutdown. This is used to stop any idling DMA operations 101 * and the like. 102 */ 103 104 static BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); 105 106 /* 107 * Notifier chain core routines. The exported routines below 108 * are layered on top of these, with appropriate locking added. 109 */ 110 111 static int notifier_chain_register(struct notifier_block **nl, 112 struct notifier_block *n) 113 { 114 while ((*nl) != NULL) { 115 if (n->priority > (*nl)->priority) 116 break; 117 nl = &((*nl)->next); 118 } 119 n->next = *nl; 120 rcu_assign_pointer(*nl, n); 121 return 0; 122 } 123 124 static int notifier_chain_unregister(struct notifier_block **nl, 125 struct notifier_block *n) 126 { 127 while ((*nl) != NULL) { 128 if ((*nl) == n) { 129 rcu_assign_pointer(*nl, n->next); 130 return 0; 131 } 132 nl = &((*nl)->next); 133 } 134 return -ENOENT; 135 } 136 137 static int __kprobes notifier_call_chain(struct notifier_block **nl, 138 unsigned long val, void *v) 139 { 140 int ret = NOTIFY_DONE; 141 struct notifier_block *nb, *next_nb; 142 143 nb = rcu_dereference(*nl); 144 while (nb) { 145 next_nb = rcu_dereference(nb->next); 146 ret = nb->notifier_call(nb, val, v); 147 if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK) 148 break; 149 nb = next_nb; 150 } 151 return ret; 152 } 153 154 /* 155 * Atomic notifier chain routines. Registration and unregistration 156 * use a spinlock, and call_chain is synchronized by RCU (no locks). 157 */ 158 159 /** 160 * atomic_notifier_chain_register - Add notifier to an atomic notifier chain 161 * @nh: Pointer to head of the atomic notifier chain 162 * @n: New entry in notifier chain 163 * 164 * Adds a notifier to an atomic notifier chain. 165 * 166 * Currently always returns zero. 167 */ 168 169 int atomic_notifier_chain_register(struct atomic_notifier_head *nh, 170 struct notifier_block *n) 171 { 172 unsigned long flags; 173 int ret; 174 175 spin_lock_irqsave(&nh->lock, flags); 176 ret = notifier_chain_register(&nh->head, n); 177 spin_unlock_irqrestore(&nh->lock, flags); 178 return ret; 179 } 180 181 EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); 182 183 /** 184 * atomic_notifier_chain_unregister - Remove notifier from an atomic notifier chain 185 * @nh: Pointer to head of the atomic notifier chain 186 * @n: Entry to remove from notifier chain 187 * 188 * Removes a notifier from an atomic notifier chain. 189 * 190 * Returns zero on success or %-ENOENT on failure. 191 */ 192 int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, 193 struct notifier_block *n) 194 { 195 unsigned long flags; 196 int ret; 197 198 spin_lock_irqsave(&nh->lock, flags); 199 ret = notifier_chain_unregister(&nh->head, n); 200 spin_unlock_irqrestore(&nh->lock, flags); 201 synchronize_rcu(); 202 return ret; 203 } 204 205 EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister); 206 207 /** 208 * atomic_notifier_call_chain - Call functions in an atomic notifier chain 209 * @nh: Pointer to head of the atomic notifier chain 210 * @val: Value passed unmodified to notifier function 211 * @v: Pointer passed unmodified to notifier function 212 * 213 * Calls each function in a notifier chain in turn. The functions 214 * run in an atomic context, so they must not block. 215 * This routine uses RCU to synchronize with changes to the chain. 216 * 217 * If the return value of the notifier can be and'ed 218 * with %NOTIFY_STOP_MASK then atomic_notifier_call_chain 219 * will return immediately, with the return value of 220 * the notifier function which halted execution. 221 * Otherwise the return value is the return value 222 * of the last notifier function called. 223 */ 224 225 int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, 226 unsigned long val, void *v) 227 { 228 int ret; 229 230 rcu_read_lock(); 231 ret = notifier_call_chain(&nh->head, val, v); 232 rcu_read_unlock(); 233 return ret; 234 } 235 236 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); 237 238 /* 239 * Blocking notifier chain routines. All access to the chain is 240 * synchronized by an rwsem. 241 */ 242 243 /** 244 * blocking_notifier_chain_register - Add notifier to a blocking notifier chain 245 * @nh: Pointer to head of the blocking notifier chain 246 * @n: New entry in notifier chain 247 * 248 * Adds a notifier to a blocking notifier chain. 249 * Must be called in process context. 250 * 251 * Currently always returns zero. 252 */ 253 254 int blocking_notifier_chain_register(struct blocking_notifier_head *nh, 255 struct notifier_block *n) 256 { 257 int ret; 258 259 /* 260 * This code gets used during boot-up, when task switching is 261 * not yet working and interrupts must remain disabled. At 262 * such times we must not call down_write(). 263 */ 264 if (unlikely(system_state == SYSTEM_BOOTING)) 265 return notifier_chain_register(&nh->head, n); 266 267 down_write(&nh->rwsem); 268 ret = notifier_chain_register(&nh->head, n); 269 up_write(&nh->rwsem); 270 return ret; 271 } 272 273 EXPORT_SYMBOL_GPL(blocking_notifier_chain_register); 274 275 /** 276 * blocking_notifier_chain_unregister - Remove notifier from a blocking notifier chain 277 * @nh: Pointer to head of the blocking notifier chain 278 * @n: Entry to remove from notifier chain 279 * 280 * Removes a notifier from a blocking notifier chain. 281 * Must be called from process context. 282 * 283 * Returns zero on success or %-ENOENT on failure. 284 */ 285 int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, 286 struct notifier_block *n) 287 { 288 int ret; 289 290 /* 291 * This code gets used during boot-up, when task switching is 292 * not yet working and interrupts must remain disabled. At 293 * such times we must not call down_write(). 294 */ 295 if (unlikely(system_state == SYSTEM_BOOTING)) 296 return notifier_chain_unregister(&nh->head, n); 297 298 down_write(&nh->rwsem); 299 ret = notifier_chain_unregister(&nh->head, n); 300 up_write(&nh->rwsem); 301 return ret; 302 } 303 304 EXPORT_SYMBOL_GPL(blocking_notifier_chain_unregister); 305 306 /** 307 * blocking_notifier_call_chain - Call functions in a blocking notifier chain 308 * @nh: Pointer to head of the blocking notifier chain 309 * @val: Value passed unmodified to notifier function 310 * @v: Pointer passed unmodified to notifier function 311 * 312 * Calls each function in a notifier chain in turn. The functions 313 * run in a process context, so they are allowed to block. 314 * 315 * If the return value of the notifier can be and'ed 316 * with %NOTIFY_STOP_MASK then blocking_notifier_call_chain 317 * will return immediately, with the return value of 318 * the notifier function which halted execution. 319 * Otherwise the return value is the return value 320 * of the last notifier function called. 321 */ 322 323 int blocking_notifier_call_chain(struct blocking_notifier_head *nh, 324 unsigned long val, void *v) 325 { 326 int ret; 327 328 down_read(&nh->rwsem); 329 ret = notifier_call_chain(&nh->head, val, v); 330 up_read(&nh->rwsem); 331 return ret; 332 } 333 334 EXPORT_SYMBOL_GPL(blocking_notifier_call_chain); 335 336 /* 337 * Raw notifier chain routines. There is no protection; 338 * the caller must provide it. Use at your own risk! 339 */ 340 341 /** 342 * raw_notifier_chain_register - Add notifier to a raw notifier chain 343 * @nh: Pointer to head of the raw notifier chain 344 * @n: New entry in notifier chain 345 * 346 * Adds a notifier to a raw notifier chain. 347 * All locking must be provided by the caller. 348 * 349 * Currently always returns zero. 350 */ 351 352 int raw_notifier_chain_register(struct raw_notifier_head *nh, 353 struct notifier_block *n) 354 { 355 return notifier_chain_register(&nh->head, n); 356 } 357 358 EXPORT_SYMBOL_GPL(raw_notifier_chain_register); 359 360 /** 361 * raw_notifier_chain_unregister - Remove notifier from a raw notifier chain 362 * @nh: Pointer to head of the raw notifier chain 363 * @n: Entry to remove from notifier chain 364 * 365 * Removes a notifier from a raw notifier chain. 366 * All locking must be provided by the caller. 367 * 368 * Returns zero on success or %-ENOENT on failure. 369 */ 370 int raw_notifier_chain_unregister(struct raw_notifier_head *nh, 371 struct notifier_block *n) 372 { 373 return notifier_chain_unregister(&nh->head, n); 374 } 375 376 EXPORT_SYMBOL_GPL(raw_notifier_chain_unregister); 377 378 /** 379 * raw_notifier_call_chain - Call functions in a raw notifier chain 380 * @nh: Pointer to head of the raw notifier chain 381 * @val: Value passed unmodified to notifier function 382 * @v: Pointer passed unmodified to notifier function 383 * 384 * Calls each function in a notifier chain in turn. The functions 385 * run in an undefined context. 386 * All locking must be provided by the caller. 387 * 388 * If the return value of the notifier can be and'ed 389 * with %NOTIFY_STOP_MASK then raw_notifier_call_chain 390 * will return immediately, with the return value of 391 * the notifier function which halted execution. 392 * Otherwise the return value is the return value 393 * of the last notifier function called. 394 */ 395 396 int raw_notifier_call_chain(struct raw_notifier_head *nh, 397 unsigned long val, void *v) 398 { 399 return notifier_call_chain(&nh->head, val, v); 400 } 401 402 EXPORT_SYMBOL_GPL(raw_notifier_call_chain); 403 404 /* 405 * SRCU notifier chain routines. Registration and unregistration 406 * use a mutex, and call_chain is synchronized by SRCU (no locks). 407 */ 408 409 /** 410 * srcu_notifier_chain_register - Add notifier to an SRCU notifier chain 411 * @nh: Pointer to head of the SRCU notifier chain 412 * @n: New entry in notifier chain 413 * 414 * Adds a notifier to an SRCU notifier chain. 415 * Must be called in process context. 416 * 417 * Currently always returns zero. 418 */ 419 420 int srcu_notifier_chain_register(struct srcu_notifier_head *nh, 421 struct notifier_block *n) 422 { 423 int ret; 424 425 /* 426 * This code gets used during boot-up, when task switching is 427 * not yet working and interrupts must remain disabled. At 428 * such times we must not call mutex_lock(). 429 */ 430 if (unlikely(system_state == SYSTEM_BOOTING)) 431 return notifier_chain_register(&nh->head, n); 432 433 mutex_lock(&nh->mutex); 434 ret = notifier_chain_register(&nh->head, n); 435 mutex_unlock(&nh->mutex); 436 return ret; 437 } 438 439 EXPORT_SYMBOL_GPL(srcu_notifier_chain_register); 440 441 /** 442 * srcu_notifier_chain_unregister - Remove notifier from an SRCU notifier chain 443 * @nh: Pointer to head of the SRCU notifier chain 444 * @n: Entry to remove from notifier chain 445 * 446 * Removes a notifier from an SRCU notifier chain. 447 * Must be called from process context. 448 * 449 * Returns zero on success or %-ENOENT on failure. 450 */ 451 int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh, 452 struct notifier_block *n) 453 { 454 int ret; 455 456 /* 457 * This code gets used during boot-up, when task switching is 458 * not yet working and interrupts must remain disabled. At 459 * such times we must not call mutex_lock(). 460 */ 461 if (unlikely(system_state == SYSTEM_BOOTING)) 462 return notifier_chain_unregister(&nh->head, n); 463 464 mutex_lock(&nh->mutex); 465 ret = notifier_chain_unregister(&nh->head, n); 466 mutex_unlock(&nh->mutex); 467 synchronize_srcu(&nh->srcu); 468 return ret; 469 } 470 471 EXPORT_SYMBOL_GPL(srcu_notifier_chain_unregister); 472 473 /** 474 * srcu_notifier_call_chain - Call functions in an SRCU notifier chain 475 * @nh: Pointer to head of the SRCU notifier chain 476 * @val: Value passed unmodified to notifier function 477 * @v: Pointer passed unmodified to notifier function 478 * 479 * Calls each function in a notifier chain in turn. The functions 480 * run in a process context, so they are allowed to block. 481 * 482 * If the return value of the notifier can be and'ed 483 * with %NOTIFY_STOP_MASK then srcu_notifier_call_chain 484 * will return immediately, with the return value of 485 * the notifier function which halted execution. 486 * Otherwise the return value is the return value 487 * of the last notifier function called. 488 */ 489 490 int srcu_notifier_call_chain(struct srcu_notifier_head *nh, 491 unsigned long val, void *v) 492 { 493 int ret; 494 int idx; 495 496 idx = srcu_read_lock(&nh->srcu); 497 ret = notifier_call_chain(&nh->head, val, v); 498 srcu_read_unlock(&nh->srcu, idx); 499 return ret; 500 } 501 502 EXPORT_SYMBOL_GPL(srcu_notifier_call_chain); 503 504 /** 505 * srcu_init_notifier_head - Initialize an SRCU notifier head 506 * @nh: Pointer to head of the srcu notifier chain 507 * 508 * Unlike other sorts of notifier heads, SRCU notifier heads require 509 * dynamic initialization. Be sure to call this routine before 510 * calling any of the other SRCU notifier routines for this head. 511 * 512 * If an SRCU notifier head is deallocated, it must first be cleaned 513 * up by calling srcu_cleanup_notifier_head(). Otherwise the head's 514 * per-cpu data (used by the SRCU mechanism) will leak. 515 */ 516 517 void srcu_init_notifier_head(struct srcu_notifier_head *nh) 518 { 519 mutex_init(&nh->mutex); 520 if (init_srcu_struct(&nh->srcu) < 0) 521 BUG(); 522 nh->head = NULL; 523 } 524 525 EXPORT_SYMBOL_GPL(srcu_init_notifier_head); 526 527 /** 528 * register_reboot_notifier - Register function to be called at reboot time 529 * @nb: Info about notifier function to be called 530 * 531 * Registers a function with the list of functions 532 * to be called at reboot time. 533 * 534 * Currently always returns zero, as blocking_notifier_chain_register 535 * always returns zero. 536 */ 537 538 int register_reboot_notifier(struct notifier_block * nb) 539 { 540 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 541 } 542 543 EXPORT_SYMBOL(register_reboot_notifier); 544 545 /** 546 * unregister_reboot_notifier - Unregister previously registered reboot notifier 547 * @nb: Hook to be unregistered 548 * 549 * Unregisters a previously registered reboot 550 * notifier function. 551 * 552 * Returns zero on success, or %-ENOENT on failure. 553 */ 554 555 int unregister_reboot_notifier(struct notifier_block * nb) 556 { 557 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 558 } 559 560 EXPORT_SYMBOL(unregister_reboot_notifier); 561 562 static int set_one_prio(struct task_struct *p, int niceval, int error) 563 { 564 int no_nice; 565 566 if (p->uid != current->euid && 567 p->euid != current->euid && !capable(CAP_SYS_NICE)) { 568 error = -EPERM; 569 goto out; 570 } 571 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 572 error = -EACCES; 573 goto out; 574 } 575 no_nice = security_task_setnice(p, niceval); 576 if (no_nice) { 577 error = no_nice; 578 goto out; 579 } 580 if (error == -ESRCH) 581 error = 0; 582 set_user_nice(p, niceval); 583 out: 584 return error; 585 } 586 587 asmlinkage long sys_setpriority(int which, int who, int niceval) 588 { 589 struct task_struct *g, *p; 590 struct user_struct *user; 591 int error = -EINVAL; 592 593 if (which > 2 || which < 0) 594 goto out; 595 596 /* normalize: avoid signed division (rounding problems) */ 597 error = -ESRCH; 598 if (niceval < -20) 599 niceval = -20; 600 if (niceval > 19) 601 niceval = 19; 602 603 read_lock(&tasklist_lock); 604 switch (which) { 605 case PRIO_PROCESS: 606 if (!who) 607 who = current->pid; 608 p = find_task_by_pid(who); 609 if (p) 610 error = set_one_prio(p, niceval, error); 611 break; 612 case PRIO_PGRP: 613 if (!who) 614 who = process_group(current); 615 do_each_task_pid(who, PIDTYPE_PGID, p) { 616 error = set_one_prio(p, niceval, error); 617 } while_each_task_pid(who, PIDTYPE_PGID, p); 618 break; 619 case PRIO_USER: 620 user = current->user; 621 if (!who) 622 who = current->uid; 623 else 624 if ((who != current->uid) && !(user = find_user(who))) 625 goto out_unlock; /* No processes for this user */ 626 627 do_each_thread(g, p) 628 if (p->uid == who) 629 error = set_one_prio(p, niceval, error); 630 while_each_thread(g, p); 631 if (who != current->uid) 632 free_uid(user); /* For find_user() */ 633 break; 634 } 635 out_unlock: 636 read_unlock(&tasklist_lock); 637 out: 638 return error; 639 } 640 641 /* 642 * Ugh. To avoid negative return values, "getpriority()" will 643 * not return the normal nice-value, but a negated value that 644 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 645 * to stay compatible. 646 */ 647 asmlinkage long sys_getpriority(int which, int who) 648 { 649 struct task_struct *g, *p; 650 struct user_struct *user; 651 long niceval, retval = -ESRCH; 652 653 if (which > 2 || which < 0) 654 return -EINVAL; 655 656 read_lock(&tasklist_lock); 657 switch (which) { 658 case PRIO_PROCESS: 659 if (!who) 660 who = current->pid; 661 p = find_task_by_pid(who); 662 if (p) { 663 niceval = 20 - task_nice(p); 664 if (niceval > retval) 665 retval = niceval; 666 } 667 break; 668 case PRIO_PGRP: 669 if (!who) 670 who = process_group(current); 671 do_each_task_pid(who, PIDTYPE_PGID, p) { 672 niceval = 20 - task_nice(p); 673 if (niceval > retval) 674 retval = niceval; 675 } while_each_task_pid(who, PIDTYPE_PGID, p); 676 break; 677 case PRIO_USER: 678 user = current->user; 679 if (!who) 680 who = current->uid; 681 else 682 if ((who != current->uid) && !(user = find_user(who))) 683 goto out_unlock; /* No processes for this user */ 684 685 do_each_thread(g, p) 686 if (p->uid == who) { 687 niceval = 20 - task_nice(p); 688 if (niceval > retval) 689 retval = niceval; 690 } 691 while_each_thread(g, p); 692 if (who != current->uid) 693 free_uid(user); /* for find_user() */ 694 break; 695 } 696 out_unlock: 697 read_unlock(&tasklist_lock); 698 699 return retval; 700 } 701 702 /** 703 * emergency_restart - reboot the system 704 * 705 * Without shutting down any hardware or taking any locks 706 * reboot the system. This is called when we know we are in 707 * trouble so this is our best effort to reboot. This is 708 * safe to call in interrupt context. 709 */ 710 void emergency_restart(void) 711 { 712 machine_emergency_restart(); 713 } 714 EXPORT_SYMBOL_GPL(emergency_restart); 715 716 static void kernel_restart_prepare(char *cmd) 717 { 718 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 719 system_state = SYSTEM_RESTART; 720 device_shutdown(); 721 } 722 723 /** 724 * kernel_restart - reboot the system 725 * @cmd: pointer to buffer containing command to execute for restart 726 * or %NULL 727 * 728 * Shutdown everything and perform a clean reboot. 729 * This is not safe to call in interrupt context. 730 */ 731 void kernel_restart(char *cmd) 732 { 733 kernel_restart_prepare(cmd); 734 if (!cmd) 735 printk(KERN_EMERG "Restarting system.\n"); 736 else 737 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 738 machine_restart(cmd); 739 } 740 EXPORT_SYMBOL_GPL(kernel_restart); 741 742 /** 743 * kernel_kexec - reboot the system 744 * 745 * Move into place and start executing a preloaded standalone 746 * executable. If nothing was preloaded return an error. 747 */ 748 static void kernel_kexec(void) 749 { 750 #ifdef CONFIG_KEXEC 751 struct kimage *image; 752 image = xchg(&kexec_image, NULL); 753 if (!image) 754 return; 755 kernel_restart_prepare(NULL); 756 printk(KERN_EMERG "Starting new kernel\n"); 757 machine_shutdown(); 758 machine_kexec(image); 759 #endif 760 } 761 762 void kernel_shutdown_prepare(enum system_states state) 763 { 764 blocking_notifier_call_chain(&reboot_notifier_list, 765 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 766 system_state = state; 767 device_shutdown(); 768 } 769 /** 770 * kernel_halt - halt the system 771 * 772 * Shutdown everything and perform a clean system halt. 773 */ 774 void kernel_halt(void) 775 { 776 kernel_shutdown_prepare(SYSTEM_HALT); 777 printk(KERN_EMERG "System halted.\n"); 778 machine_halt(); 779 } 780 781 EXPORT_SYMBOL_GPL(kernel_halt); 782 783 /** 784 * kernel_power_off - power_off the system 785 * 786 * Shutdown everything and perform a clean system power_off. 787 */ 788 void kernel_power_off(void) 789 { 790 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 791 printk(KERN_EMERG "Power down.\n"); 792 machine_power_off(); 793 } 794 EXPORT_SYMBOL_GPL(kernel_power_off); 795 /* 796 * Reboot system call: for obvious reasons only root may call it, 797 * and even root needs to set up some magic numbers in the registers 798 * so that some mistake won't make this reboot the whole machine. 799 * You can also set the meaning of the ctrl-alt-del-key here. 800 * 801 * reboot doesn't sync: do that yourself before calling this. 802 */ 803 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) 804 { 805 char buffer[256]; 806 807 /* We only trust the superuser with rebooting the system. */ 808 if (!capable(CAP_SYS_BOOT)) 809 return -EPERM; 810 811 /* For safety, we require "magic" arguments. */ 812 if (magic1 != LINUX_REBOOT_MAGIC1 || 813 (magic2 != LINUX_REBOOT_MAGIC2 && 814 magic2 != LINUX_REBOOT_MAGIC2A && 815 magic2 != LINUX_REBOOT_MAGIC2B && 816 magic2 != LINUX_REBOOT_MAGIC2C)) 817 return -EINVAL; 818 819 /* Instead of trying to make the power_off code look like 820 * halt when pm_power_off is not set do it the easy way. 821 */ 822 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 823 cmd = LINUX_REBOOT_CMD_HALT; 824 825 lock_kernel(); 826 switch (cmd) { 827 case LINUX_REBOOT_CMD_RESTART: 828 kernel_restart(NULL); 829 break; 830 831 case LINUX_REBOOT_CMD_CAD_ON: 832 C_A_D = 1; 833 break; 834 835 case LINUX_REBOOT_CMD_CAD_OFF: 836 C_A_D = 0; 837 break; 838 839 case LINUX_REBOOT_CMD_HALT: 840 kernel_halt(); 841 unlock_kernel(); 842 do_exit(0); 843 break; 844 845 case LINUX_REBOOT_CMD_POWER_OFF: 846 kernel_power_off(); 847 unlock_kernel(); 848 do_exit(0); 849 break; 850 851 case LINUX_REBOOT_CMD_RESTART2: 852 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 853 unlock_kernel(); 854 return -EFAULT; 855 } 856 buffer[sizeof(buffer) - 1] = '\0'; 857 858 kernel_restart(buffer); 859 break; 860 861 case LINUX_REBOOT_CMD_KEXEC: 862 kernel_kexec(); 863 unlock_kernel(); 864 return -EINVAL; 865 866 #ifdef CONFIG_SOFTWARE_SUSPEND 867 case LINUX_REBOOT_CMD_SW_SUSPEND: 868 { 869 int ret = software_suspend(); 870 unlock_kernel(); 871 return ret; 872 } 873 #endif 874 875 default: 876 unlock_kernel(); 877 return -EINVAL; 878 } 879 unlock_kernel(); 880 return 0; 881 } 882 883 static void deferred_cad(struct work_struct *dummy) 884 { 885 kernel_restart(NULL); 886 } 887 888 /* 889 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 890 * As it's called within an interrupt, it may NOT sync: the only choice 891 * is whether to reboot at once, or just ignore the ctrl-alt-del. 892 */ 893 void ctrl_alt_del(void) 894 { 895 static DECLARE_WORK(cad_work, deferred_cad); 896 897 if (C_A_D) 898 schedule_work(&cad_work); 899 else 900 kill_cad_pid(SIGINT, 1); 901 } 902 903 /* 904 * Unprivileged users may change the real gid to the effective gid 905 * or vice versa. (BSD-style) 906 * 907 * If you set the real gid at all, or set the effective gid to a value not 908 * equal to the real gid, then the saved gid is set to the new effective gid. 909 * 910 * This makes it possible for a setgid program to completely drop its 911 * privileges, which is often a useful assertion to make when you are doing 912 * a security audit over a program. 913 * 914 * The general idea is that a program which uses just setregid() will be 915 * 100% compatible with BSD. A program which uses just setgid() will be 916 * 100% compatible with POSIX with saved IDs. 917 * 918 * SMP: There are not races, the GIDs are checked only by filesystem 919 * operations (as far as semantic preservation is concerned). 920 */ 921 asmlinkage long sys_setregid(gid_t rgid, gid_t egid) 922 { 923 int old_rgid = current->gid; 924 int old_egid = current->egid; 925 int new_rgid = old_rgid; 926 int new_egid = old_egid; 927 int retval; 928 929 retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); 930 if (retval) 931 return retval; 932 933 if (rgid != (gid_t) -1) { 934 if ((old_rgid == rgid) || 935 (current->egid==rgid) || 936 capable(CAP_SETGID)) 937 new_rgid = rgid; 938 else 939 return -EPERM; 940 } 941 if (egid != (gid_t) -1) { 942 if ((old_rgid == egid) || 943 (current->egid == egid) || 944 (current->sgid == egid) || 945 capable(CAP_SETGID)) 946 new_egid = egid; 947 else 948 return -EPERM; 949 } 950 if (new_egid != old_egid) { 951 current->mm->dumpable = suid_dumpable; 952 smp_wmb(); 953 } 954 if (rgid != (gid_t) -1 || 955 (egid != (gid_t) -1 && egid != old_rgid)) 956 current->sgid = new_egid; 957 current->fsgid = new_egid; 958 current->egid = new_egid; 959 current->gid = new_rgid; 960 key_fsgid_changed(current); 961 proc_id_connector(current, PROC_EVENT_GID); 962 return 0; 963 } 964 965 /* 966 * setgid() is implemented like SysV w/ SAVED_IDS 967 * 968 * SMP: Same implicit races as above. 969 */ 970 asmlinkage long sys_setgid(gid_t gid) 971 { 972 int old_egid = current->egid; 973 int retval; 974 975 retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); 976 if (retval) 977 return retval; 978 979 if (capable(CAP_SETGID)) { 980 if (old_egid != gid) { 981 current->mm->dumpable = suid_dumpable; 982 smp_wmb(); 983 } 984 current->gid = current->egid = current->sgid = current->fsgid = gid; 985 } else if ((gid == current->gid) || (gid == current->sgid)) { 986 if (old_egid != gid) { 987 current->mm->dumpable = suid_dumpable; 988 smp_wmb(); 989 } 990 current->egid = current->fsgid = gid; 991 } 992 else 993 return -EPERM; 994 995 key_fsgid_changed(current); 996 proc_id_connector(current, PROC_EVENT_GID); 997 return 0; 998 } 999 1000 static int set_user(uid_t new_ruid, int dumpclear) 1001 { 1002 struct user_struct *new_user; 1003 1004 new_user = alloc_uid(new_ruid); 1005 if (!new_user) 1006 return -EAGAIN; 1007 1008 if (atomic_read(&new_user->processes) >= 1009 current->signal->rlim[RLIMIT_NPROC].rlim_cur && 1010 new_user != &root_user) { 1011 free_uid(new_user); 1012 return -EAGAIN; 1013 } 1014 1015 switch_uid(new_user); 1016 1017 if (dumpclear) { 1018 current->mm->dumpable = suid_dumpable; 1019 smp_wmb(); 1020 } 1021 current->uid = new_ruid; 1022 return 0; 1023 } 1024 1025 /* 1026 * Unprivileged users may change the real uid to the effective uid 1027 * or vice versa. (BSD-style) 1028 * 1029 * If you set the real uid at all, or set the effective uid to a value not 1030 * equal to the real uid, then the saved uid is set to the new effective uid. 1031 * 1032 * This makes it possible for a setuid program to completely drop its 1033 * privileges, which is often a useful assertion to make when you are doing 1034 * a security audit over a program. 1035 * 1036 * The general idea is that a program which uses just setreuid() will be 1037 * 100% compatible with BSD. A program which uses just setuid() will be 1038 * 100% compatible with POSIX with saved IDs. 1039 */ 1040 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) 1041 { 1042 int old_ruid, old_euid, old_suid, new_ruid, new_euid; 1043 int retval; 1044 1045 retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); 1046 if (retval) 1047 return retval; 1048 1049 new_ruid = old_ruid = current->uid; 1050 new_euid = old_euid = current->euid; 1051 old_suid = current->suid; 1052 1053 if (ruid != (uid_t) -1) { 1054 new_ruid = ruid; 1055 if ((old_ruid != ruid) && 1056 (current->euid != ruid) && 1057 !capable(CAP_SETUID)) 1058 return -EPERM; 1059 } 1060 1061 if (euid != (uid_t) -1) { 1062 new_euid = euid; 1063 if ((old_ruid != euid) && 1064 (current->euid != euid) && 1065 (current->suid != euid) && 1066 !capable(CAP_SETUID)) 1067 return -EPERM; 1068 } 1069 1070 if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) 1071 return -EAGAIN; 1072 1073 if (new_euid != old_euid) { 1074 current->mm->dumpable = suid_dumpable; 1075 smp_wmb(); 1076 } 1077 current->fsuid = current->euid = new_euid; 1078 if (ruid != (uid_t) -1 || 1079 (euid != (uid_t) -1 && euid != old_ruid)) 1080 current->suid = current->euid; 1081 current->fsuid = current->euid; 1082 1083 key_fsuid_changed(current); 1084 proc_id_connector(current, PROC_EVENT_UID); 1085 1086 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); 1087 } 1088 1089 1090 1091 /* 1092 * setuid() is implemented like SysV with SAVED_IDS 1093 * 1094 * Note that SAVED_ID's is deficient in that a setuid root program 1095 * like sendmail, for example, cannot set its uid to be a normal 1096 * user and then switch back, because if you're root, setuid() sets 1097 * the saved uid too. If you don't like this, blame the bright people 1098 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 1099 * will allow a root program to temporarily drop privileges and be able to 1100 * regain them by swapping the real and effective uid. 1101 */ 1102 asmlinkage long sys_setuid(uid_t uid) 1103 { 1104 int old_euid = current->euid; 1105 int old_ruid, old_suid, new_suid; 1106 int retval; 1107 1108 retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); 1109 if (retval) 1110 return retval; 1111 1112 old_ruid = current->uid; 1113 old_suid = current->suid; 1114 new_suid = old_suid; 1115 1116 if (capable(CAP_SETUID)) { 1117 if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) 1118 return -EAGAIN; 1119 new_suid = uid; 1120 } else if ((uid != current->uid) && (uid != new_suid)) 1121 return -EPERM; 1122 1123 if (old_euid != uid) { 1124 current->mm->dumpable = suid_dumpable; 1125 smp_wmb(); 1126 } 1127 current->fsuid = current->euid = uid; 1128 current->suid = new_suid; 1129 1130 key_fsuid_changed(current); 1131 proc_id_connector(current, PROC_EVENT_UID); 1132 1133 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); 1134 } 1135 1136 1137 /* 1138 * This function implements a generic ability to update ruid, euid, 1139 * and suid. This allows you to implement the 4.4 compatible seteuid(). 1140 */ 1141 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) 1142 { 1143 int old_ruid = current->uid; 1144 int old_euid = current->euid; 1145 int old_suid = current->suid; 1146 int retval; 1147 1148 retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); 1149 if (retval) 1150 return retval; 1151 1152 if (!capable(CAP_SETUID)) { 1153 if ((ruid != (uid_t) -1) && (ruid != current->uid) && 1154 (ruid != current->euid) && (ruid != current->suid)) 1155 return -EPERM; 1156 if ((euid != (uid_t) -1) && (euid != current->uid) && 1157 (euid != current->euid) && (euid != current->suid)) 1158 return -EPERM; 1159 if ((suid != (uid_t) -1) && (suid != current->uid) && 1160 (suid != current->euid) && (suid != current->suid)) 1161 return -EPERM; 1162 } 1163 if (ruid != (uid_t) -1) { 1164 if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) 1165 return -EAGAIN; 1166 } 1167 if (euid != (uid_t) -1) { 1168 if (euid != current->euid) { 1169 current->mm->dumpable = suid_dumpable; 1170 smp_wmb(); 1171 } 1172 current->euid = euid; 1173 } 1174 current->fsuid = current->euid; 1175 if (suid != (uid_t) -1) 1176 current->suid = suid; 1177 1178 key_fsuid_changed(current); 1179 proc_id_connector(current, PROC_EVENT_UID); 1180 1181 return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); 1182 } 1183 1184 asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) 1185 { 1186 int retval; 1187 1188 if (!(retval = put_user(current->uid, ruid)) && 1189 !(retval = put_user(current->euid, euid))) 1190 retval = put_user(current->suid, suid); 1191 1192 return retval; 1193 } 1194 1195 /* 1196 * Same as above, but for rgid, egid, sgid. 1197 */ 1198 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) 1199 { 1200 int retval; 1201 1202 retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); 1203 if (retval) 1204 return retval; 1205 1206 if (!capable(CAP_SETGID)) { 1207 if ((rgid != (gid_t) -1) && (rgid != current->gid) && 1208 (rgid != current->egid) && (rgid != current->sgid)) 1209 return -EPERM; 1210 if ((egid != (gid_t) -1) && (egid != current->gid) && 1211 (egid != current->egid) && (egid != current->sgid)) 1212 return -EPERM; 1213 if ((sgid != (gid_t) -1) && (sgid != current->gid) && 1214 (sgid != current->egid) && (sgid != current->sgid)) 1215 return -EPERM; 1216 } 1217 if (egid != (gid_t) -1) { 1218 if (egid != current->egid) { 1219 current->mm->dumpable = suid_dumpable; 1220 smp_wmb(); 1221 } 1222 current->egid = egid; 1223 } 1224 current->fsgid = current->egid; 1225 if (rgid != (gid_t) -1) 1226 current->gid = rgid; 1227 if (sgid != (gid_t) -1) 1228 current->sgid = sgid; 1229 1230 key_fsgid_changed(current); 1231 proc_id_connector(current, PROC_EVENT_GID); 1232 return 0; 1233 } 1234 1235 asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) 1236 { 1237 int retval; 1238 1239 if (!(retval = put_user(current->gid, rgid)) && 1240 !(retval = put_user(current->egid, egid))) 1241 retval = put_user(current->sgid, sgid); 1242 1243 return retval; 1244 } 1245 1246 1247 /* 1248 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 1249 * is used for "access()" and for the NFS daemon (letting nfsd stay at 1250 * whatever uid it wants to). It normally shadows "euid", except when 1251 * explicitly set by setfsuid() or for access.. 1252 */ 1253 asmlinkage long sys_setfsuid(uid_t uid) 1254 { 1255 int old_fsuid; 1256 1257 old_fsuid = current->fsuid; 1258 if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) 1259 return old_fsuid; 1260 1261 if (uid == current->uid || uid == current->euid || 1262 uid == current->suid || uid == current->fsuid || 1263 capable(CAP_SETUID)) { 1264 if (uid != old_fsuid) { 1265 current->mm->dumpable = suid_dumpable; 1266 smp_wmb(); 1267 } 1268 current->fsuid = uid; 1269 } 1270 1271 key_fsuid_changed(current); 1272 proc_id_connector(current, PROC_EVENT_UID); 1273 1274 security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); 1275 1276 return old_fsuid; 1277 } 1278 1279 /* 1280 * Samma p� svenska.. 1281 */ 1282 asmlinkage long sys_setfsgid(gid_t gid) 1283 { 1284 int old_fsgid; 1285 1286 old_fsgid = current->fsgid; 1287 if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) 1288 return old_fsgid; 1289 1290 if (gid == current->gid || gid == current->egid || 1291 gid == current->sgid || gid == current->fsgid || 1292 capable(CAP_SETGID)) { 1293 if (gid != old_fsgid) { 1294 current->mm->dumpable = suid_dumpable; 1295 smp_wmb(); 1296 } 1297 current->fsgid = gid; 1298 key_fsgid_changed(current); 1299 proc_id_connector(current, PROC_EVENT_GID); 1300 } 1301 return old_fsgid; 1302 } 1303 1304 asmlinkage long sys_times(struct tms __user * tbuf) 1305 { 1306 /* 1307 * In the SMP world we might just be unlucky and have one of 1308 * the times increment as we use it. Since the value is an 1309 * atomically safe type this is just fine. Conceptually its 1310 * as if the syscall took an instant longer to occur. 1311 */ 1312 if (tbuf) { 1313 struct tms tmp; 1314 struct task_struct *tsk = current; 1315 struct task_struct *t; 1316 cputime_t utime, stime, cutime, cstime; 1317 1318 spin_lock_irq(&tsk->sighand->siglock); 1319 utime = tsk->signal->utime; 1320 stime = tsk->signal->stime; 1321 t = tsk; 1322 do { 1323 utime = cputime_add(utime, t->utime); 1324 stime = cputime_add(stime, t->stime); 1325 t = next_thread(t); 1326 } while (t != tsk); 1327 1328 cutime = tsk->signal->cutime; 1329 cstime = tsk->signal->cstime; 1330 spin_unlock_irq(&tsk->sighand->siglock); 1331 1332 tmp.tms_utime = cputime_to_clock_t(utime); 1333 tmp.tms_stime = cputime_to_clock_t(stime); 1334 tmp.tms_cutime = cputime_to_clock_t(cutime); 1335 tmp.tms_cstime = cputime_to_clock_t(cstime); 1336 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1337 return -EFAULT; 1338 } 1339 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1340 } 1341 1342 /* 1343 * This needs some heavy checking ... 1344 * I just haven't the stomach for it. I also don't fully 1345 * understand sessions/pgrp etc. Let somebody who does explain it. 1346 * 1347 * OK, I think I have the protection semantics right.... this is really 1348 * only important on a multi-user system anyway, to make sure one user 1349 * can't send a signal to a process owned by another. -TYT, 12/12/91 1350 * 1351 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1352 * LBT 04.03.94 1353 */ 1354 1355 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) 1356 { 1357 struct task_struct *p; 1358 struct task_struct *group_leader = current->group_leader; 1359 int err = -EINVAL; 1360 1361 if (!pid) 1362 pid = group_leader->pid; 1363 if (!pgid) 1364 pgid = pid; 1365 if (pgid < 0) 1366 return -EINVAL; 1367 1368 /* From this point forward we keep holding onto the tasklist lock 1369 * so that our parent does not change from under us. -DaveM 1370 */ 1371 write_lock_irq(&tasklist_lock); 1372 1373 err = -ESRCH; 1374 p = find_task_by_pid(pid); 1375 if (!p) 1376 goto out; 1377 1378 err = -EINVAL; 1379 if (!thread_group_leader(p)) 1380 goto out; 1381 1382 if (p->real_parent == group_leader) { 1383 err = -EPERM; 1384 if (process_session(p) != process_session(group_leader)) 1385 goto out; 1386 err = -EACCES; 1387 if (p->did_exec) 1388 goto out; 1389 } else { 1390 err = -ESRCH; 1391 if (p != group_leader) 1392 goto out; 1393 } 1394 1395 err = -EPERM; 1396 if (p->signal->leader) 1397 goto out; 1398 1399 if (pgid != pid) { 1400 struct task_struct *g = 1401 find_task_by_pid_type(PIDTYPE_PGID, pgid); 1402 1403 if (!g || process_session(g) != process_session(group_leader)) 1404 goto out; 1405 } 1406 1407 err = security_task_setpgid(p, pgid); 1408 if (err) 1409 goto out; 1410 1411 if (process_group(p) != pgid) { 1412 detach_pid(p, PIDTYPE_PGID); 1413 p->signal->pgrp = pgid; 1414 attach_pid(p, PIDTYPE_PGID, pgid); 1415 } 1416 1417 err = 0; 1418 out: 1419 /* All paths lead to here, thus we are safe. -DaveM */ 1420 write_unlock_irq(&tasklist_lock); 1421 return err; 1422 } 1423 1424 asmlinkage long sys_getpgid(pid_t pid) 1425 { 1426 if (!pid) 1427 return process_group(current); 1428 else { 1429 int retval; 1430 struct task_struct *p; 1431 1432 read_lock(&tasklist_lock); 1433 p = find_task_by_pid(pid); 1434 1435 retval = -ESRCH; 1436 if (p) { 1437 retval = security_task_getpgid(p); 1438 if (!retval) 1439 retval = process_group(p); 1440 } 1441 read_unlock(&tasklist_lock); 1442 return retval; 1443 } 1444 } 1445 1446 #ifdef __ARCH_WANT_SYS_GETPGRP 1447 1448 asmlinkage long sys_getpgrp(void) 1449 { 1450 /* SMP - assuming writes are word atomic this is fine */ 1451 return process_group(current); 1452 } 1453 1454 #endif 1455 1456 asmlinkage long sys_getsid(pid_t pid) 1457 { 1458 if (!pid) 1459 return process_session(current); 1460 else { 1461 int retval; 1462 struct task_struct *p; 1463 1464 read_lock(&tasklist_lock); 1465 p = find_task_by_pid(pid); 1466 1467 retval = -ESRCH; 1468 if (p) { 1469 retval = security_task_getsid(p); 1470 if (!retval) 1471 retval = process_session(p); 1472 } 1473 read_unlock(&tasklist_lock); 1474 return retval; 1475 } 1476 } 1477 1478 asmlinkage long sys_setsid(void) 1479 { 1480 struct task_struct *group_leader = current->group_leader; 1481 pid_t session; 1482 int err = -EPERM; 1483 1484 write_lock_irq(&tasklist_lock); 1485 1486 /* Fail if I am already a session leader */ 1487 if (group_leader->signal->leader) 1488 goto out; 1489 1490 session = group_leader->pid; 1491 /* Fail if a process group id already exists that equals the 1492 * proposed session id. 1493 * 1494 * Don't check if session id == 1 because kernel threads use this 1495 * session id and so the check will always fail and make it so 1496 * init cannot successfully call setsid. 1497 */ 1498 if (session > 1 && find_task_by_pid_type(PIDTYPE_PGID, session)) 1499 goto out; 1500 1501 group_leader->signal->leader = 1; 1502 __set_special_pids(session, session); 1503 1504 spin_lock(&group_leader->sighand->siglock); 1505 group_leader->signal->tty = NULL; 1506 group_leader->signal->tty_old_pgrp = 0; 1507 spin_unlock(&group_leader->sighand->siglock); 1508 1509 err = process_group(group_leader); 1510 out: 1511 write_unlock_irq(&tasklist_lock); 1512 return err; 1513 } 1514 1515 /* 1516 * Supplementary group IDs 1517 */ 1518 1519 /* init to 2 - one for init_task, one to ensure it is never freed */ 1520 struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; 1521 1522 struct group_info *groups_alloc(int gidsetsize) 1523 { 1524 struct group_info *group_info; 1525 int nblocks; 1526 int i; 1527 1528 nblocks = (gidsetsize + NGROUPS_PER_BLOCK - 1) / NGROUPS_PER_BLOCK; 1529 /* Make sure we always allocate at least one indirect block pointer */ 1530 nblocks = nblocks ? : 1; 1531 group_info = kmalloc(sizeof(*group_info) + nblocks*sizeof(gid_t *), GFP_USER); 1532 if (!group_info) 1533 return NULL; 1534 group_info->ngroups = gidsetsize; 1535 group_info->nblocks = nblocks; 1536 atomic_set(&group_info->usage, 1); 1537 1538 if (gidsetsize <= NGROUPS_SMALL) 1539 group_info->blocks[0] = group_info->small_block; 1540 else { 1541 for (i = 0; i < nblocks; i++) { 1542 gid_t *b; 1543 b = (void *)__get_free_page(GFP_USER); 1544 if (!b) 1545 goto out_undo_partial_alloc; 1546 group_info->blocks[i] = b; 1547 } 1548 } 1549 return group_info; 1550 1551 out_undo_partial_alloc: 1552 while (--i >= 0) { 1553 free_page((unsigned long)group_info->blocks[i]); 1554 } 1555 kfree(group_info); 1556 return NULL; 1557 } 1558 1559 EXPORT_SYMBOL(groups_alloc); 1560 1561 void groups_free(struct group_info *group_info) 1562 { 1563 if (group_info->blocks[0] != group_info->small_block) { 1564 int i; 1565 for (i = 0; i < group_info->nblocks; i++) 1566 free_page((unsigned long)group_info->blocks[i]); 1567 } 1568 kfree(group_info); 1569 } 1570 1571 EXPORT_SYMBOL(groups_free); 1572 1573 /* export the group_info to a user-space array */ 1574 static int groups_to_user(gid_t __user *grouplist, 1575 struct group_info *group_info) 1576 { 1577 int i; 1578 int count = group_info->ngroups; 1579 1580 for (i = 0; i < group_info->nblocks; i++) { 1581 int cp_count = min(NGROUPS_PER_BLOCK, count); 1582 int off = i * NGROUPS_PER_BLOCK; 1583 int len = cp_count * sizeof(*grouplist); 1584 1585 if (copy_to_user(grouplist+off, group_info->blocks[i], len)) 1586 return -EFAULT; 1587 1588 count -= cp_count; 1589 } 1590 return 0; 1591 } 1592 1593 /* fill a group_info from a user-space array - it must be allocated already */ 1594 static int groups_from_user(struct group_info *group_info, 1595 gid_t __user *grouplist) 1596 { 1597 int i; 1598 int count = group_info->ngroups; 1599 1600 for (i = 0; i < group_info->nblocks; i++) { 1601 int cp_count = min(NGROUPS_PER_BLOCK, count); 1602 int off = i * NGROUPS_PER_BLOCK; 1603 int len = cp_count * sizeof(*grouplist); 1604 1605 if (copy_from_user(group_info->blocks[i], grouplist+off, len)) 1606 return -EFAULT; 1607 1608 count -= cp_count; 1609 } 1610 return 0; 1611 } 1612 1613 /* a simple Shell sort */ 1614 static void groups_sort(struct group_info *group_info) 1615 { 1616 int base, max, stride; 1617 int gidsetsize = group_info->ngroups; 1618 1619 for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1) 1620 ; /* nothing */ 1621 stride /= 3; 1622 1623 while (stride) { 1624 max = gidsetsize - stride; 1625 for (base = 0; base < max; base++) { 1626 int left = base; 1627 int right = left + stride; 1628 gid_t tmp = GROUP_AT(group_info, right); 1629 1630 while (left >= 0 && GROUP_AT(group_info, left) > tmp) { 1631 GROUP_AT(group_info, right) = 1632 GROUP_AT(group_info, left); 1633 right = left; 1634 left -= stride; 1635 } 1636 GROUP_AT(group_info, right) = tmp; 1637 } 1638 stride /= 3; 1639 } 1640 } 1641 1642 /* a simple bsearch */ 1643 int groups_search(struct group_info *group_info, gid_t grp) 1644 { 1645 unsigned int left, right; 1646 1647 if (!group_info) 1648 return 0; 1649 1650 left = 0; 1651 right = group_info->ngroups; 1652 while (left < right) { 1653 unsigned int mid = (left+right)/2; 1654 int cmp = grp - GROUP_AT(group_info, mid); 1655 if (cmp > 0) 1656 left = mid + 1; 1657 else if (cmp < 0) 1658 right = mid; 1659 else 1660 return 1; 1661 } 1662 return 0; 1663 } 1664 1665 /* validate and set current->group_info */ 1666 int set_current_groups(struct group_info *group_info) 1667 { 1668 int retval; 1669 struct group_info *old_info; 1670 1671 retval = security_task_setgroups(group_info); 1672 if (retval) 1673 return retval; 1674 1675 groups_sort(group_info); 1676 get_group_info(group_info); 1677 1678 task_lock(current); 1679 old_info = current->group_info; 1680 current->group_info = group_info; 1681 task_unlock(current); 1682 1683 put_group_info(old_info); 1684 1685 return 0; 1686 } 1687 1688 EXPORT_SYMBOL(set_current_groups); 1689 1690 asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) 1691 { 1692 int i = 0; 1693 1694 /* 1695 * SMP: Nobody else can change our grouplist. Thus we are 1696 * safe. 1697 */ 1698 1699 if (gidsetsize < 0) 1700 return -EINVAL; 1701 1702 /* no need to grab task_lock here; it cannot change */ 1703 i = current->group_info->ngroups; 1704 if (gidsetsize) { 1705 if (i > gidsetsize) { 1706 i = -EINVAL; 1707 goto out; 1708 } 1709 if (groups_to_user(grouplist, current->group_info)) { 1710 i = -EFAULT; 1711 goto out; 1712 } 1713 } 1714 out: 1715 return i; 1716 } 1717 1718 /* 1719 * SMP: Our groups are copy-on-write. We can set them safely 1720 * without another task interfering. 1721 */ 1722 1723 asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) 1724 { 1725 struct group_info *group_info; 1726 int retval; 1727 1728 if (!capable(CAP_SETGID)) 1729 return -EPERM; 1730 if ((unsigned)gidsetsize > NGROUPS_MAX) 1731 return -EINVAL; 1732 1733 group_info = groups_alloc(gidsetsize); 1734 if (!group_info) 1735 return -ENOMEM; 1736 retval = groups_from_user(group_info, grouplist); 1737 if (retval) { 1738 put_group_info(group_info); 1739 return retval; 1740 } 1741 1742 retval = set_current_groups(group_info); 1743 put_group_info(group_info); 1744 1745 return retval; 1746 } 1747 1748 /* 1749 * Check whether we're fsgid/egid or in the supplemental group.. 1750 */ 1751 int in_group_p(gid_t grp) 1752 { 1753 int retval = 1; 1754 if (grp != current->fsgid) 1755 retval = groups_search(current->group_info, grp); 1756 return retval; 1757 } 1758 1759 EXPORT_SYMBOL(in_group_p); 1760 1761 int in_egroup_p(gid_t grp) 1762 { 1763 int retval = 1; 1764 if (grp != current->egid) 1765 retval = groups_search(current->group_info, grp); 1766 return retval; 1767 } 1768 1769 EXPORT_SYMBOL(in_egroup_p); 1770 1771 DECLARE_RWSEM(uts_sem); 1772 1773 EXPORT_SYMBOL(uts_sem); 1774 1775 asmlinkage long sys_newuname(struct new_utsname __user * name) 1776 { 1777 int errno = 0; 1778 1779 down_read(&uts_sem); 1780 if (copy_to_user(name, utsname(), sizeof *name)) 1781 errno = -EFAULT; 1782 up_read(&uts_sem); 1783 return errno; 1784 } 1785 1786 asmlinkage long sys_sethostname(char __user *name, int len) 1787 { 1788 int errno; 1789 char tmp[__NEW_UTS_LEN]; 1790 1791 if (!capable(CAP_SYS_ADMIN)) 1792 return -EPERM; 1793 if (len < 0 || len > __NEW_UTS_LEN) 1794 return -EINVAL; 1795 down_write(&uts_sem); 1796 errno = -EFAULT; 1797 if (!copy_from_user(tmp, name, len)) { 1798 memcpy(utsname()->nodename, tmp, len); 1799 utsname()->nodename[len] = 0; 1800 errno = 0; 1801 } 1802 up_write(&uts_sem); 1803 return errno; 1804 } 1805 1806 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1807 1808 asmlinkage long sys_gethostname(char __user *name, int len) 1809 { 1810 int i, errno; 1811 1812 if (len < 0) 1813 return -EINVAL; 1814 down_read(&uts_sem); 1815 i = 1 + strlen(utsname()->nodename); 1816 if (i > len) 1817 i = len; 1818 errno = 0; 1819 if (copy_to_user(name, utsname()->nodename, i)) 1820 errno = -EFAULT; 1821 up_read(&uts_sem); 1822 return errno; 1823 } 1824 1825 #endif 1826 1827 /* 1828 * Only setdomainname; getdomainname can be implemented by calling 1829 * uname() 1830 */ 1831 asmlinkage long sys_setdomainname(char __user *name, int len) 1832 { 1833 int errno; 1834 char tmp[__NEW_UTS_LEN]; 1835 1836 if (!capable(CAP_SYS_ADMIN)) 1837 return -EPERM; 1838 if (len < 0 || len > __NEW_UTS_LEN) 1839 return -EINVAL; 1840 1841 down_write(&uts_sem); 1842 errno = -EFAULT; 1843 if (!copy_from_user(tmp, name, len)) { 1844 memcpy(utsname()->domainname, tmp, len); 1845 utsname()->domainname[len] = 0; 1846 errno = 0; 1847 } 1848 up_write(&uts_sem); 1849 return errno; 1850 } 1851 1852 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1853 { 1854 if (resource >= RLIM_NLIMITS) 1855 return -EINVAL; 1856 else { 1857 struct rlimit value; 1858 task_lock(current->group_leader); 1859 value = current->signal->rlim[resource]; 1860 task_unlock(current->group_leader); 1861 return copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1862 } 1863 } 1864 1865 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1866 1867 /* 1868 * Back compatibility for getrlimit. Needed for some apps. 1869 */ 1870 1871 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) 1872 { 1873 struct rlimit x; 1874 if (resource >= RLIM_NLIMITS) 1875 return -EINVAL; 1876 1877 task_lock(current->group_leader); 1878 x = current->signal->rlim[resource]; 1879 task_unlock(current->group_leader); 1880 if (x.rlim_cur > 0x7FFFFFFF) 1881 x.rlim_cur = 0x7FFFFFFF; 1882 if (x.rlim_max > 0x7FFFFFFF) 1883 x.rlim_max = 0x7FFFFFFF; 1884 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1885 } 1886 1887 #endif 1888 1889 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) 1890 { 1891 struct rlimit new_rlim, *old_rlim; 1892 unsigned long it_prof_secs; 1893 int retval; 1894 1895 if (resource >= RLIM_NLIMITS) 1896 return -EINVAL; 1897 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1898 return -EFAULT; 1899 if (new_rlim.rlim_cur > new_rlim.rlim_max) 1900 return -EINVAL; 1901 old_rlim = current->signal->rlim + resource; 1902 if ((new_rlim.rlim_max > old_rlim->rlim_max) && 1903 !capable(CAP_SYS_RESOURCE)) 1904 return -EPERM; 1905 if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > NR_OPEN) 1906 return -EPERM; 1907 1908 retval = security_task_setrlimit(resource, &new_rlim); 1909 if (retval) 1910 return retval; 1911 1912 task_lock(current->group_leader); 1913 *old_rlim = new_rlim; 1914 task_unlock(current->group_leader); 1915 1916 if (resource != RLIMIT_CPU) 1917 goto out; 1918 1919 /* 1920 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1921 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1922 * very long-standing error, and fixing it now risks breakage of 1923 * applications, so we live with it 1924 */ 1925 if (new_rlim.rlim_cur == RLIM_INFINITY) 1926 goto out; 1927 1928 it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); 1929 if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { 1930 unsigned long rlim_cur = new_rlim.rlim_cur; 1931 cputime_t cputime; 1932 1933 if (rlim_cur == 0) { 1934 /* 1935 * The caller is asking for an immediate RLIMIT_CPU 1936 * expiry. But we use the zero value to mean "it was 1937 * never set". So let's cheat and make it one second 1938 * instead 1939 */ 1940 rlim_cur = 1; 1941 } 1942 cputime = secs_to_cputime(rlim_cur); 1943 read_lock(&tasklist_lock); 1944 spin_lock_irq(¤t->sighand->siglock); 1945 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 1946 spin_unlock_irq(¤t->sighand->siglock); 1947 read_unlock(&tasklist_lock); 1948 } 1949 out: 1950 return 0; 1951 } 1952 1953 /* 1954 * It would make sense to put struct rusage in the task_struct, 1955 * except that would make the task_struct be *really big*. After 1956 * task_struct gets moved into malloc'ed memory, it would 1957 * make sense to do this. It will make moving the rest of the information 1958 * a lot simpler! (Which we're not doing right now because we're not 1959 * measuring them yet). 1960 * 1961 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1962 * races with threads incrementing their own counters. But since word 1963 * reads are atomic, we either get new values or old values and we don't 1964 * care which for the sums. We always take the siglock to protect reading 1965 * the c* fields from p->signal from races with exit.c updating those 1966 * fields when reaping, so a sample either gets all the additions of a 1967 * given child after it's reaped, or none so this sample is before reaping. 1968 * 1969 * Locking: 1970 * We need to take the siglock for CHILDEREN, SELF and BOTH 1971 * for the cases current multithreaded, non-current single threaded 1972 * non-current multithreaded. Thread traversal is now safe with 1973 * the siglock held. 1974 * Strictly speaking, we donot need to take the siglock if we are current and 1975 * single threaded, as no one else can take our signal_struct away, no one 1976 * else can reap the children to update signal->c* counters, and no one else 1977 * can race with the signal-> fields. If we do not take any lock, the 1978 * signal-> fields could be read out of order while another thread was just 1979 * exiting. So we should place a read memory barrier when we avoid the lock. 1980 * On the writer side, write memory barrier is implied in __exit_signal 1981 * as __exit_signal releases the siglock spinlock after updating the signal-> 1982 * fields. But we don't do this yet to keep things simple. 1983 * 1984 */ 1985 1986 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1987 { 1988 struct task_struct *t; 1989 unsigned long flags; 1990 cputime_t utime, stime; 1991 1992 memset((char *) r, 0, sizeof *r); 1993 utime = stime = cputime_zero; 1994 1995 rcu_read_lock(); 1996 if (!lock_task_sighand(p, &flags)) { 1997 rcu_read_unlock(); 1998 return; 1999 } 2000 2001 switch (who) { 2002 case RUSAGE_BOTH: 2003 case RUSAGE_CHILDREN: 2004 utime = p->signal->cutime; 2005 stime = p->signal->cstime; 2006 r->ru_nvcsw = p->signal->cnvcsw; 2007 r->ru_nivcsw = p->signal->cnivcsw; 2008 r->ru_minflt = p->signal->cmin_flt; 2009 r->ru_majflt = p->signal->cmaj_flt; 2010 2011 if (who == RUSAGE_CHILDREN) 2012 break; 2013 2014 case RUSAGE_SELF: 2015 utime = cputime_add(utime, p->signal->utime); 2016 stime = cputime_add(stime, p->signal->stime); 2017 r->ru_nvcsw += p->signal->nvcsw; 2018 r->ru_nivcsw += p->signal->nivcsw; 2019 r->ru_minflt += p->signal->min_flt; 2020 r->ru_majflt += p->signal->maj_flt; 2021 t = p; 2022 do { 2023 utime = cputime_add(utime, t->utime); 2024 stime = cputime_add(stime, t->stime); 2025 r->ru_nvcsw += t->nvcsw; 2026 r->ru_nivcsw += t->nivcsw; 2027 r->ru_minflt += t->min_flt; 2028 r->ru_majflt += t->maj_flt; 2029 t = next_thread(t); 2030 } while (t != p); 2031 break; 2032 2033 default: 2034 BUG(); 2035 } 2036 2037 unlock_task_sighand(p, &flags); 2038 rcu_read_unlock(); 2039 2040 cputime_to_timeval(utime, &r->ru_utime); 2041 cputime_to_timeval(stime, &r->ru_stime); 2042 } 2043 2044 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 2045 { 2046 struct rusage r; 2047 k_getrusage(p, who, &r); 2048 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 2049 } 2050 2051 asmlinkage long sys_getrusage(int who, struct rusage __user *ru) 2052 { 2053 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) 2054 return -EINVAL; 2055 return getrusage(current, who, ru); 2056 } 2057 2058 asmlinkage long sys_umask(int mask) 2059 { 2060 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 2061 return mask; 2062 } 2063 2064 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, 2065 unsigned long arg4, unsigned long arg5) 2066 { 2067 long error; 2068 2069 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2070 if (error) 2071 return error; 2072 2073 switch (option) { 2074 case PR_SET_PDEATHSIG: 2075 if (!valid_signal(arg2)) { 2076 error = -EINVAL; 2077 break; 2078 } 2079 current->pdeath_signal = arg2; 2080 break; 2081 case PR_GET_PDEATHSIG: 2082 error = put_user(current->pdeath_signal, (int __user *)arg2); 2083 break; 2084 case PR_GET_DUMPABLE: 2085 error = current->mm->dumpable; 2086 break; 2087 case PR_SET_DUMPABLE: 2088 if (arg2 < 0 || arg2 > 1) { 2089 error = -EINVAL; 2090 break; 2091 } 2092 current->mm->dumpable = arg2; 2093 break; 2094 2095 case PR_SET_UNALIGN: 2096 error = SET_UNALIGN_CTL(current, arg2); 2097 break; 2098 case PR_GET_UNALIGN: 2099 error = GET_UNALIGN_CTL(current, arg2); 2100 break; 2101 case PR_SET_FPEMU: 2102 error = SET_FPEMU_CTL(current, arg2); 2103 break; 2104 case PR_GET_FPEMU: 2105 error = GET_FPEMU_CTL(current, arg2); 2106 break; 2107 case PR_SET_FPEXC: 2108 error = SET_FPEXC_CTL(current, arg2); 2109 break; 2110 case PR_GET_FPEXC: 2111 error = GET_FPEXC_CTL(current, arg2); 2112 break; 2113 case PR_GET_TIMING: 2114 error = PR_TIMING_STATISTICAL; 2115 break; 2116 case PR_SET_TIMING: 2117 if (arg2 == PR_TIMING_STATISTICAL) 2118 error = 0; 2119 else 2120 error = -EINVAL; 2121 break; 2122 2123 case PR_GET_KEEPCAPS: 2124 if (current->keep_capabilities) 2125 error = 1; 2126 break; 2127 case PR_SET_KEEPCAPS: 2128 if (arg2 != 0 && arg2 != 1) { 2129 error = -EINVAL; 2130 break; 2131 } 2132 current->keep_capabilities = arg2; 2133 break; 2134 case PR_SET_NAME: { 2135 struct task_struct *me = current; 2136 unsigned char ncomm[sizeof(me->comm)]; 2137 2138 ncomm[sizeof(me->comm)-1] = 0; 2139 if (strncpy_from_user(ncomm, (char __user *)arg2, 2140 sizeof(me->comm)-1) < 0) 2141 return -EFAULT; 2142 set_task_comm(me, ncomm); 2143 return 0; 2144 } 2145 case PR_GET_NAME: { 2146 struct task_struct *me = current; 2147 unsigned char tcomm[sizeof(me->comm)]; 2148 2149 get_task_comm(tcomm, me); 2150 if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) 2151 return -EFAULT; 2152 return 0; 2153 } 2154 case PR_GET_ENDIAN: 2155 error = GET_ENDIAN(current, arg2); 2156 break; 2157 case PR_SET_ENDIAN: 2158 error = SET_ENDIAN(current, arg2); 2159 break; 2160 2161 default: 2162 error = -EINVAL; 2163 break; 2164 } 2165 return error; 2166 } 2167 2168 asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, 2169 struct getcpu_cache __user *cache) 2170 { 2171 int err = 0; 2172 int cpu = raw_smp_processor_id(); 2173 if (cpup) 2174 err |= put_user(cpu, cpup); 2175 if (nodep) 2176 err |= put_user(cpu_to_node(cpu), nodep); 2177 if (cache) { 2178 /* 2179 * The cache is not needed for this implementation, 2180 * but make sure user programs pass something 2181 * valid. vsyscall implementations can instead make 2182 * good use of the cache. Only use t0 and t1 because 2183 * these are available in both 32bit and 64bit ABI (no 2184 * need for a compat_getcpu). 32bit has enough 2185 * padding 2186 */ 2187 unsigned long t0, t1; 2188 get_user(t0, &cache->blob[0]); 2189 get_user(t1, &cache->blob[1]); 2190 t0++; 2191 t1++; 2192 put_user(t0, &cache->blob[0]); 2193 put_user(t1, &cache->blob[1]); 2194 } 2195 return err ? -EFAULT : 0; 2196 } 2197