1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/kexec.h> 20 #include <linux/workqueue.h> 21 #include <linux/capability.h> 22 #include <linux/device.h> 23 #include <linux/key.h> 24 #include <linux/times.h> 25 #include <linux/posix-timers.h> 26 #include <linux/security.h> 27 #include <linux/dcookies.h> 28 #include <linux/suspend.h> 29 #include <linux/tty.h> 30 #include <linux/signal.h> 31 #include <linux/cn_proc.h> 32 #include <linux/getcpu.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/seccomp.h> 35 #include <linux/cpu.h> 36 #include <linux/personality.h> 37 #include <linux/ptrace.h> 38 #include <linux/fs_struct.h> 39 #include <linux/file.h> 40 #include <linux/mount.h> 41 #include <linux/gfp.h> 42 #include <linux/syscore_ops.h> 43 #include <linux/version.h> 44 #include <linux/ctype.h> 45 46 #include <linux/compat.h> 47 #include <linux/syscalls.h> 48 #include <linux/kprobes.h> 49 #include <linux/user_namespace.h> 50 #include <linux/binfmts.h> 51 52 #include <linux/kmsg_dump.h> 53 /* Move somewhere else to avoid recompiling? */ 54 #include <generated/utsrelease.h> 55 56 #include <asm/uaccess.h> 57 #include <asm/io.h> 58 #include <asm/unistd.h> 59 60 #ifndef SET_UNALIGN_CTL 61 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 62 #endif 63 #ifndef GET_UNALIGN_CTL 64 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 65 #endif 66 #ifndef SET_FPEMU_CTL 67 # define SET_FPEMU_CTL(a,b) (-EINVAL) 68 #endif 69 #ifndef GET_FPEMU_CTL 70 # define GET_FPEMU_CTL(a,b) (-EINVAL) 71 #endif 72 #ifndef SET_FPEXC_CTL 73 # define SET_FPEXC_CTL(a,b) (-EINVAL) 74 #endif 75 #ifndef GET_FPEXC_CTL 76 # define GET_FPEXC_CTL(a,b) (-EINVAL) 77 #endif 78 #ifndef GET_ENDIAN 79 # define GET_ENDIAN(a,b) (-EINVAL) 80 #endif 81 #ifndef SET_ENDIAN 82 # define SET_ENDIAN(a,b) (-EINVAL) 83 #endif 84 #ifndef GET_TSC_CTL 85 # define GET_TSC_CTL(a) (-EINVAL) 86 #endif 87 #ifndef SET_TSC_CTL 88 # define SET_TSC_CTL(a) (-EINVAL) 89 #endif 90 91 /* 92 * this is where the system-wide overflow UID and GID are defined, for 93 * architectures that now have 32-bit UID/GID but didn't in the past 94 */ 95 96 int overflowuid = DEFAULT_OVERFLOWUID; 97 int overflowgid = DEFAULT_OVERFLOWGID; 98 99 EXPORT_SYMBOL(overflowuid); 100 EXPORT_SYMBOL(overflowgid); 101 102 /* 103 * the same as above, but for filesystems which can only store a 16-bit 104 * UID and GID. as such, this is needed on all architectures 105 */ 106 107 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 108 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 109 110 EXPORT_SYMBOL(fs_overflowuid); 111 EXPORT_SYMBOL(fs_overflowgid); 112 113 /* 114 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 115 */ 116 117 int C_A_D = 1; 118 struct pid *cad_pid; 119 EXPORT_SYMBOL(cad_pid); 120 121 /* 122 * If set, this is used for preparing the system to power off. 123 */ 124 125 void (*pm_power_off_prepare)(void); 126 127 /* 128 * Returns true if current's euid is same as p's uid or euid, 129 * or has CAP_SYS_NICE to p's user_ns. 130 * 131 * Called with rcu_read_lock, creds are safe 132 */ 133 static bool set_one_prio_perm(struct task_struct *p) 134 { 135 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 136 137 if (uid_eq(pcred->uid, cred->euid) || 138 uid_eq(pcred->euid, cred->euid)) 139 return true; 140 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 141 return true; 142 return false; 143 } 144 145 /* 146 * set the priority of a task 147 * - the caller must hold the RCU read lock 148 */ 149 static int set_one_prio(struct task_struct *p, int niceval, int error) 150 { 151 int no_nice; 152 153 if (!set_one_prio_perm(p)) { 154 error = -EPERM; 155 goto out; 156 } 157 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 158 error = -EACCES; 159 goto out; 160 } 161 no_nice = security_task_setnice(p, niceval); 162 if (no_nice) { 163 error = no_nice; 164 goto out; 165 } 166 if (error == -ESRCH) 167 error = 0; 168 set_user_nice(p, niceval); 169 out: 170 return error; 171 } 172 173 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 174 { 175 struct task_struct *g, *p; 176 struct user_struct *user; 177 const struct cred *cred = current_cred(); 178 int error = -EINVAL; 179 struct pid *pgrp; 180 kuid_t uid; 181 182 if (which > PRIO_USER || which < PRIO_PROCESS) 183 goto out; 184 185 /* normalize: avoid signed division (rounding problems) */ 186 error = -ESRCH; 187 if (niceval < -20) 188 niceval = -20; 189 if (niceval > 19) 190 niceval = 19; 191 192 rcu_read_lock(); 193 read_lock(&tasklist_lock); 194 switch (which) { 195 case PRIO_PROCESS: 196 if (who) 197 p = find_task_by_vpid(who); 198 else 199 p = current; 200 if (p) 201 error = set_one_prio(p, niceval, error); 202 break; 203 case PRIO_PGRP: 204 if (who) 205 pgrp = find_vpid(who); 206 else 207 pgrp = task_pgrp(current); 208 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 209 error = set_one_prio(p, niceval, error); 210 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 211 break; 212 case PRIO_USER: 213 uid = make_kuid(cred->user_ns, who); 214 user = cred->user; 215 if (!who) 216 uid = cred->uid; 217 else if (!uid_eq(uid, cred->uid) && 218 !(user = find_user(uid))) 219 goto out_unlock; /* No processes for this user */ 220 221 do_each_thread(g, p) { 222 if (uid_eq(task_uid(p), uid)) 223 error = set_one_prio(p, niceval, error); 224 } while_each_thread(g, p); 225 if (!uid_eq(uid, cred->uid)) 226 free_uid(user); /* For find_user() */ 227 break; 228 } 229 out_unlock: 230 read_unlock(&tasklist_lock); 231 rcu_read_unlock(); 232 out: 233 return error; 234 } 235 236 /* 237 * Ugh. To avoid negative return values, "getpriority()" will 238 * not return the normal nice-value, but a negated value that 239 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 240 * to stay compatible. 241 */ 242 SYSCALL_DEFINE2(getpriority, int, which, int, who) 243 { 244 struct task_struct *g, *p; 245 struct user_struct *user; 246 const struct cred *cred = current_cred(); 247 long niceval, retval = -ESRCH; 248 struct pid *pgrp; 249 kuid_t uid; 250 251 if (which > PRIO_USER || which < PRIO_PROCESS) 252 return -EINVAL; 253 254 rcu_read_lock(); 255 read_lock(&tasklist_lock); 256 switch (which) { 257 case PRIO_PROCESS: 258 if (who) 259 p = find_task_by_vpid(who); 260 else 261 p = current; 262 if (p) { 263 niceval = 20 - task_nice(p); 264 if (niceval > retval) 265 retval = niceval; 266 } 267 break; 268 case PRIO_PGRP: 269 if (who) 270 pgrp = find_vpid(who); 271 else 272 pgrp = task_pgrp(current); 273 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 274 niceval = 20 - task_nice(p); 275 if (niceval > retval) 276 retval = niceval; 277 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 278 break; 279 case PRIO_USER: 280 uid = make_kuid(cred->user_ns, who); 281 user = cred->user; 282 if (!who) 283 uid = cred->uid; 284 else if (!uid_eq(uid, cred->uid) && 285 !(user = find_user(uid))) 286 goto out_unlock; /* No processes for this user */ 287 288 do_each_thread(g, p) { 289 if (uid_eq(task_uid(p), uid)) { 290 niceval = 20 - task_nice(p); 291 if (niceval > retval) 292 retval = niceval; 293 } 294 } while_each_thread(g, p); 295 if (!uid_eq(uid, cred->uid)) 296 free_uid(user); /* for find_user() */ 297 break; 298 } 299 out_unlock: 300 read_unlock(&tasklist_lock); 301 rcu_read_unlock(); 302 303 return retval; 304 } 305 306 /** 307 * emergency_restart - reboot the system 308 * 309 * Without shutting down any hardware or taking any locks 310 * reboot the system. This is called when we know we are in 311 * trouble so this is our best effort to reboot. This is 312 * safe to call in interrupt context. 313 */ 314 void emergency_restart(void) 315 { 316 kmsg_dump(KMSG_DUMP_EMERG); 317 machine_emergency_restart(); 318 } 319 EXPORT_SYMBOL_GPL(emergency_restart); 320 321 void kernel_restart_prepare(char *cmd) 322 { 323 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 324 system_state = SYSTEM_RESTART; 325 usermodehelper_disable(); 326 device_shutdown(); 327 syscore_shutdown(); 328 } 329 330 /** 331 * register_reboot_notifier - Register function to be called at reboot time 332 * @nb: Info about notifier function to be called 333 * 334 * Registers a function with the list of functions 335 * to be called at reboot time. 336 * 337 * Currently always returns zero, as blocking_notifier_chain_register() 338 * always returns zero. 339 */ 340 int register_reboot_notifier(struct notifier_block *nb) 341 { 342 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 343 } 344 EXPORT_SYMBOL(register_reboot_notifier); 345 346 /** 347 * unregister_reboot_notifier - Unregister previously registered reboot notifier 348 * @nb: Hook to be unregistered 349 * 350 * Unregisters a previously registered reboot 351 * notifier function. 352 * 353 * Returns zero on success, or %-ENOENT on failure. 354 */ 355 int unregister_reboot_notifier(struct notifier_block *nb) 356 { 357 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 358 } 359 EXPORT_SYMBOL(unregister_reboot_notifier); 360 361 /** 362 * kernel_restart - reboot the system 363 * @cmd: pointer to buffer containing command to execute for restart 364 * or %NULL 365 * 366 * Shutdown everything and perform a clean reboot. 367 * This is not safe to call in interrupt context. 368 */ 369 void kernel_restart(char *cmd) 370 { 371 kernel_restart_prepare(cmd); 372 disable_nonboot_cpus(); 373 if (!cmd) 374 printk(KERN_EMERG "Restarting system.\n"); 375 else 376 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 377 kmsg_dump(KMSG_DUMP_RESTART); 378 machine_restart(cmd); 379 } 380 EXPORT_SYMBOL_GPL(kernel_restart); 381 382 static void kernel_shutdown_prepare(enum system_states state) 383 { 384 blocking_notifier_call_chain(&reboot_notifier_list, 385 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 386 system_state = state; 387 usermodehelper_disable(); 388 device_shutdown(); 389 } 390 /** 391 * kernel_halt - halt the system 392 * 393 * Shutdown everything and perform a clean system halt. 394 */ 395 void kernel_halt(void) 396 { 397 kernel_shutdown_prepare(SYSTEM_HALT); 398 syscore_shutdown(); 399 printk(KERN_EMERG "System halted.\n"); 400 kmsg_dump(KMSG_DUMP_HALT); 401 machine_halt(); 402 } 403 404 EXPORT_SYMBOL_GPL(kernel_halt); 405 406 /** 407 * kernel_power_off - power_off the system 408 * 409 * Shutdown everything and perform a clean system power_off. 410 */ 411 void kernel_power_off(void) 412 { 413 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 414 if (pm_power_off_prepare) 415 pm_power_off_prepare(); 416 disable_nonboot_cpus(); 417 syscore_shutdown(); 418 printk(KERN_EMERG "Power down.\n"); 419 kmsg_dump(KMSG_DUMP_POWEROFF); 420 machine_power_off(); 421 } 422 EXPORT_SYMBOL_GPL(kernel_power_off); 423 424 static DEFINE_MUTEX(reboot_mutex); 425 426 /* 427 * Reboot system call: for obvious reasons only root may call it, 428 * and even root needs to set up some magic numbers in the registers 429 * so that some mistake won't make this reboot the whole machine. 430 * You can also set the meaning of the ctrl-alt-del-key here. 431 * 432 * reboot doesn't sync: do that yourself before calling this. 433 */ 434 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 435 void __user *, arg) 436 { 437 struct pid_namespace *pid_ns = task_active_pid_ns(current); 438 char buffer[256]; 439 int ret = 0; 440 441 /* We only trust the superuser with rebooting the system. */ 442 if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) 443 return -EPERM; 444 445 /* For safety, we require "magic" arguments. */ 446 if (magic1 != LINUX_REBOOT_MAGIC1 || 447 (magic2 != LINUX_REBOOT_MAGIC2 && 448 magic2 != LINUX_REBOOT_MAGIC2A && 449 magic2 != LINUX_REBOOT_MAGIC2B && 450 magic2 != LINUX_REBOOT_MAGIC2C)) 451 return -EINVAL; 452 453 /* 454 * If pid namespaces are enabled and the current task is in a child 455 * pid_namespace, the command is handled by reboot_pid_ns() which will 456 * call do_exit(). 457 */ 458 ret = reboot_pid_ns(pid_ns, cmd); 459 if (ret) 460 return ret; 461 462 /* Instead of trying to make the power_off code look like 463 * halt when pm_power_off is not set do it the easy way. 464 */ 465 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 466 cmd = LINUX_REBOOT_CMD_HALT; 467 468 mutex_lock(&reboot_mutex); 469 switch (cmd) { 470 case LINUX_REBOOT_CMD_RESTART: 471 kernel_restart(NULL); 472 break; 473 474 case LINUX_REBOOT_CMD_CAD_ON: 475 C_A_D = 1; 476 break; 477 478 case LINUX_REBOOT_CMD_CAD_OFF: 479 C_A_D = 0; 480 break; 481 482 case LINUX_REBOOT_CMD_HALT: 483 kernel_halt(); 484 do_exit(0); 485 panic("cannot halt"); 486 487 case LINUX_REBOOT_CMD_POWER_OFF: 488 kernel_power_off(); 489 do_exit(0); 490 break; 491 492 case LINUX_REBOOT_CMD_RESTART2: 493 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 494 ret = -EFAULT; 495 break; 496 } 497 buffer[sizeof(buffer) - 1] = '\0'; 498 499 kernel_restart(buffer); 500 break; 501 502 #ifdef CONFIG_KEXEC 503 case LINUX_REBOOT_CMD_KEXEC: 504 ret = kernel_kexec(); 505 break; 506 #endif 507 508 #ifdef CONFIG_HIBERNATION 509 case LINUX_REBOOT_CMD_SW_SUSPEND: 510 ret = hibernate(); 511 break; 512 #endif 513 514 default: 515 ret = -EINVAL; 516 break; 517 } 518 mutex_unlock(&reboot_mutex); 519 return ret; 520 } 521 522 static void deferred_cad(struct work_struct *dummy) 523 { 524 kernel_restart(NULL); 525 } 526 527 /* 528 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 529 * As it's called within an interrupt, it may NOT sync: the only choice 530 * is whether to reboot at once, or just ignore the ctrl-alt-del. 531 */ 532 void ctrl_alt_del(void) 533 { 534 static DECLARE_WORK(cad_work, deferred_cad); 535 536 if (C_A_D) 537 schedule_work(&cad_work); 538 else 539 kill_cad_pid(SIGINT, 1); 540 } 541 542 /* 543 * Unprivileged users may change the real gid to the effective gid 544 * or vice versa. (BSD-style) 545 * 546 * If you set the real gid at all, or set the effective gid to a value not 547 * equal to the real gid, then the saved gid is set to the new effective gid. 548 * 549 * This makes it possible for a setgid program to completely drop its 550 * privileges, which is often a useful assertion to make when you are doing 551 * a security audit over a program. 552 * 553 * The general idea is that a program which uses just setregid() will be 554 * 100% compatible with BSD. A program which uses just setgid() will be 555 * 100% compatible with POSIX with saved IDs. 556 * 557 * SMP: There are not races, the GIDs are checked only by filesystem 558 * operations (as far as semantic preservation is concerned). 559 */ 560 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 561 { 562 struct user_namespace *ns = current_user_ns(); 563 const struct cred *old; 564 struct cred *new; 565 int retval; 566 kgid_t krgid, kegid; 567 568 krgid = make_kgid(ns, rgid); 569 kegid = make_kgid(ns, egid); 570 571 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 572 return -EINVAL; 573 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 574 return -EINVAL; 575 576 new = prepare_creds(); 577 if (!new) 578 return -ENOMEM; 579 old = current_cred(); 580 581 retval = -EPERM; 582 if (rgid != (gid_t) -1) { 583 if (gid_eq(old->gid, krgid) || 584 gid_eq(old->egid, krgid) || 585 nsown_capable(CAP_SETGID)) 586 new->gid = krgid; 587 else 588 goto error; 589 } 590 if (egid != (gid_t) -1) { 591 if (gid_eq(old->gid, kegid) || 592 gid_eq(old->egid, kegid) || 593 gid_eq(old->sgid, kegid) || 594 nsown_capable(CAP_SETGID)) 595 new->egid = kegid; 596 else 597 goto error; 598 } 599 600 if (rgid != (gid_t) -1 || 601 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 602 new->sgid = new->egid; 603 new->fsgid = new->egid; 604 605 return commit_creds(new); 606 607 error: 608 abort_creds(new); 609 return retval; 610 } 611 612 /* 613 * setgid() is implemented like SysV w/ SAVED_IDS 614 * 615 * SMP: Same implicit races as above. 616 */ 617 SYSCALL_DEFINE1(setgid, gid_t, gid) 618 { 619 struct user_namespace *ns = current_user_ns(); 620 const struct cred *old; 621 struct cred *new; 622 int retval; 623 kgid_t kgid; 624 625 kgid = make_kgid(ns, gid); 626 if (!gid_valid(kgid)) 627 return -EINVAL; 628 629 new = prepare_creds(); 630 if (!new) 631 return -ENOMEM; 632 old = current_cred(); 633 634 retval = -EPERM; 635 if (nsown_capable(CAP_SETGID)) 636 new->gid = new->egid = new->sgid = new->fsgid = kgid; 637 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 638 new->egid = new->fsgid = kgid; 639 else 640 goto error; 641 642 return commit_creds(new); 643 644 error: 645 abort_creds(new); 646 return retval; 647 } 648 649 /* 650 * change the user struct in a credentials set to match the new UID 651 */ 652 static int set_user(struct cred *new) 653 { 654 struct user_struct *new_user; 655 656 new_user = alloc_uid(new->uid); 657 if (!new_user) 658 return -EAGAIN; 659 660 /* 661 * We don't fail in case of NPROC limit excess here because too many 662 * poorly written programs don't check set*uid() return code, assuming 663 * it never fails if called by root. We may still enforce NPROC limit 664 * for programs doing set*uid()+execve() by harmlessly deferring the 665 * failure to the execve() stage. 666 */ 667 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 668 new_user != INIT_USER) 669 current->flags |= PF_NPROC_EXCEEDED; 670 else 671 current->flags &= ~PF_NPROC_EXCEEDED; 672 673 free_uid(new->user); 674 new->user = new_user; 675 return 0; 676 } 677 678 /* 679 * Unprivileged users may change the real uid to the effective uid 680 * or vice versa. (BSD-style) 681 * 682 * If you set the real uid at all, or set the effective uid to a value not 683 * equal to the real uid, then the saved uid is set to the new effective uid. 684 * 685 * This makes it possible for a setuid program to completely drop its 686 * privileges, which is often a useful assertion to make when you are doing 687 * a security audit over a program. 688 * 689 * The general idea is that a program which uses just setreuid() will be 690 * 100% compatible with BSD. A program which uses just setuid() will be 691 * 100% compatible with POSIX with saved IDs. 692 */ 693 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 694 { 695 struct user_namespace *ns = current_user_ns(); 696 const struct cred *old; 697 struct cred *new; 698 int retval; 699 kuid_t kruid, keuid; 700 701 kruid = make_kuid(ns, ruid); 702 keuid = make_kuid(ns, euid); 703 704 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 705 return -EINVAL; 706 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 707 return -EINVAL; 708 709 new = prepare_creds(); 710 if (!new) 711 return -ENOMEM; 712 old = current_cred(); 713 714 retval = -EPERM; 715 if (ruid != (uid_t) -1) { 716 new->uid = kruid; 717 if (!uid_eq(old->uid, kruid) && 718 !uid_eq(old->euid, kruid) && 719 !nsown_capable(CAP_SETUID)) 720 goto error; 721 } 722 723 if (euid != (uid_t) -1) { 724 new->euid = keuid; 725 if (!uid_eq(old->uid, keuid) && 726 !uid_eq(old->euid, keuid) && 727 !uid_eq(old->suid, keuid) && 728 !nsown_capable(CAP_SETUID)) 729 goto error; 730 } 731 732 if (!uid_eq(new->uid, old->uid)) { 733 retval = set_user(new); 734 if (retval < 0) 735 goto error; 736 } 737 if (ruid != (uid_t) -1 || 738 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 739 new->suid = new->euid; 740 new->fsuid = new->euid; 741 742 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 743 if (retval < 0) 744 goto error; 745 746 return commit_creds(new); 747 748 error: 749 abort_creds(new); 750 return retval; 751 } 752 753 /* 754 * setuid() is implemented like SysV with SAVED_IDS 755 * 756 * Note that SAVED_ID's is deficient in that a setuid root program 757 * like sendmail, for example, cannot set its uid to be a normal 758 * user and then switch back, because if you're root, setuid() sets 759 * the saved uid too. If you don't like this, blame the bright people 760 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 761 * will allow a root program to temporarily drop privileges and be able to 762 * regain them by swapping the real and effective uid. 763 */ 764 SYSCALL_DEFINE1(setuid, uid_t, uid) 765 { 766 struct user_namespace *ns = current_user_ns(); 767 const struct cred *old; 768 struct cred *new; 769 int retval; 770 kuid_t kuid; 771 772 kuid = make_kuid(ns, uid); 773 if (!uid_valid(kuid)) 774 return -EINVAL; 775 776 new = prepare_creds(); 777 if (!new) 778 return -ENOMEM; 779 old = current_cred(); 780 781 retval = -EPERM; 782 if (nsown_capable(CAP_SETUID)) { 783 new->suid = new->uid = kuid; 784 if (!uid_eq(kuid, old->uid)) { 785 retval = set_user(new); 786 if (retval < 0) 787 goto error; 788 } 789 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 790 goto error; 791 } 792 793 new->fsuid = new->euid = kuid; 794 795 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 796 if (retval < 0) 797 goto error; 798 799 return commit_creds(new); 800 801 error: 802 abort_creds(new); 803 return retval; 804 } 805 806 807 /* 808 * This function implements a generic ability to update ruid, euid, 809 * and suid. This allows you to implement the 4.4 compatible seteuid(). 810 */ 811 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 812 { 813 struct user_namespace *ns = current_user_ns(); 814 const struct cred *old; 815 struct cred *new; 816 int retval; 817 kuid_t kruid, keuid, ksuid; 818 819 kruid = make_kuid(ns, ruid); 820 keuid = make_kuid(ns, euid); 821 ksuid = make_kuid(ns, suid); 822 823 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 824 return -EINVAL; 825 826 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 827 return -EINVAL; 828 829 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 830 return -EINVAL; 831 832 new = prepare_creds(); 833 if (!new) 834 return -ENOMEM; 835 836 old = current_cred(); 837 838 retval = -EPERM; 839 if (!nsown_capable(CAP_SETUID)) { 840 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 841 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 842 goto error; 843 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 844 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 845 goto error; 846 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 847 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 848 goto error; 849 } 850 851 if (ruid != (uid_t) -1) { 852 new->uid = kruid; 853 if (!uid_eq(kruid, old->uid)) { 854 retval = set_user(new); 855 if (retval < 0) 856 goto error; 857 } 858 } 859 if (euid != (uid_t) -1) 860 new->euid = keuid; 861 if (suid != (uid_t) -1) 862 new->suid = ksuid; 863 new->fsuid = new->euid; 864 865 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 866 if (retval < 0) 867 goto error; 868 869 return commit_creds(new); 870 871 error: 872 abort_creds(new); 873 return retval; 874 } 875 876 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 877 { 878 const struct cred *cred = current_cred(); 879 int retval; 880 uid_t ruid, euid, suid; 881 882 ruid = from_kuid_munged(cred->user_ns, cred->uid); 883 euid = from_kuid_munged(cred->user_ns, cred->euid); 884 suid = from_kuid_munged(cred->user_ns, cred->suid); 885 886 if (!(retval = put_user(ruid, ruidp)) && 887 !(retval = put_user(euid, euidp))) 888 retval = put_user(suid, suidp); 889 890 return retval; 891 } 892 893 /* 894 * Same as above, but for rgid, egid, sgid. 895 */ 896 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 897 { 898 struct user_namespace *ns = current_user_ns(); 899 const struct cred *old; 900 struct cred *new; 901 int retval; 902 kgid_t krgid, kegid, ksgid; 903 904 krgid = make_kgid(ns, rgid); 905 kegid = make_kgid(ns, egid); 906 ksgid = make_kgid(ns, sgid); 907 908 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 909 return -EINVAL; 910 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 911 return -EINVAL; 912 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 913 return -EINVAL; 914 915 new = prepare_creds(); 916 if (!new) 917 return -ENOMEM; 918 old = current_cred(); 919 920 retval = -EPERM; 921 if (!nsown_capable(CAP_SETGID)) { 922 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 923 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 924 goto error; 925 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 926 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 927 goto error; 928 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 929 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 930 goto error; 931 } 932 933 if (rgid != (gid_t) -1) 934 new->gid = krgid; 935 if (egid != (gid_t) -1) 936 new->egid = kegid; 937 if (sgid != (gid_t) -1) 938 new->sgid = ksgid; 939 new->fsgid = new->egid; 940 941 return commit_creds(new); 942 943 error: 944 abort_creds(new); 945 return retval; 946 } 947 948 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 949 { 950 const struct cred *cred = current_cred(); 951 int retval; 952 gid_t rgid, egid, sgid; 953 954 rgid = from_kgid_munged(cred->user_ns, cred->gid); 955 egid = from_kgid_munged(cred->user_ns, cred->egid); 956 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 957 958 if (!(retval = put_user(rgid, rgidp)) && 959 !(retval = put_user(egid, egidp))) 960 retval = put_user(sgid, sgidp); 961 962 return retval; 963 } 964 965 966 /* 967 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 968 * is used for "access()" and for the NFS daemon (letting nfsd stay at 969 * whatever uid it wants to). It normally shadows "euid", except when 970 * explicitly set by setfsuid() or for access.. 971 */ 972 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 973 { 974 const struct cred *old; 975 struct cred *new; 976 uid_t old_fsuid; 977 kuid_t kuid; 978 979 old = current_cred(); 980 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 981 982 kuid = make_kuid(old->user_ns, uid); 983 if (!uid_valid(kuid)) 984 return old_fsuid; 985 986 new = prepare_creds(); 987 if (!new) 988 return old_fsuid; 989 990 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 991 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 992 nsown_capable(CAP_SETUID)) { 993 if (!uid_eq(kuid, old->fsuid)) { 994 new->fsuid = kuid; 995 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 996 goto change_okay; 997 } 998 } 999 1000 abort_creds(new); 1001 return old_fsuid; 1002 1003 change_okay: 1004 commit_creds(new); 1005 return old_fsuid; 1006 } 1007 1008 /* 1009 * Samma på svenska.. 1010 */ 1011 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 1012 { 1013 const struct cred *old; 1014 struct cred *new; 1015 gid_t old_fsgid; 1016 kgid_t kgid; 1017 1018 old = current_cred(); 1019 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 1020 1021 kgid = make_kgid(old->user_ns, gid); 1022 if (!gid_valid(kgid)) 1023 return old_fsgid; 1024 1025 new = prepare_creds(); 1026 if (!new) 1027 return old_fsgid; 1028 1029 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 1030 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 1031 nsown_capable(CAP_SETGID)) { 1032 if (!gid_eq(kgid, old->fsgid)) { 1033 new->fsgid = kgid; 1034 goto change_okay; 1035 } 1036 } 1037 1038 abort_creds(new); 1039 return old_fsgid; 1040 1041 change_okay: 1042 commit_creds(new); 1043 return old_fsgid; 1044 } 1045 1046 void do_sys_times(struct tms *tms) 1047 { 1048 cputime_t tgutime, tgstime, cutime, cstime; 1049 1050 spin_lock_irq(¤t->sighand->siglock); 1051 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 1052 cutime = current->signal->cutime; 1053 cstime = current->signal->cstime; 1054 spin_unlock_irq(¤t->sighand->siglock); 1055 tms->tms_utime = cputime_to_clock_t(tgutime); 1056 tms->tms_stime = cputime_to_clock_t(tgstime); 1057 tms->tms_cutime = cputime_to_clock_t(cutime); 1058 tms->tms_cstime = cputime_to_clock_t(cstime); 1059 } 1060 1061 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1062 { 1063 if (tbuf) { 1064 struct tms tmp; 1065 1066 do_sys_times(&tmp); 1067 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1068 return -EFAULT; 1069 } 1070 force_successful_syscall_return(); 1071 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1072 } 1073 1074 /* 1075 * This needs some heavy checking ... 1076 * I just haven't the stomach for it. I also don't fully 1077 * understand sessions/pgrp etc. Let somebody who does explain it. 1078 * 1079 * OK, I think I have the protection semantics right.... this is really 1080 * only important on a multi-user system anyway, to make sure one user 1081 * can't send a signal to a process owned by another. -TYT, 12/12/91 1082 * 1083 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1084 * LBT 04.03.94 1085 */ 1086 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1087 { 1088 struct task_struct *p; 1089 struct task_struct *group_leader = current->group_leader; 1090 struct pid *pgrp; 1091 int err; 1092 1093 if (!pid) 1094 pid = task_pid_vnr(group_leader); 1095 if (!pgid) 1096 pgid = pid; 1097 if (pgid < 0) 1098 return -EINVAL; 1099 rcu_read_lock(); 1100 1101 /* From this point forward we keep holding onto the tasklist lock 1102 * so that our parent does not change from under us. -DaveM 1103 */ 1104 write_lock_irq(&tasklist_lock); 1105 1106 err = -ESRCH; 1107 p = find_task_by_vpid(pid); 1108 if (!p) 1109 goto out; 1110 1111 err = -EINVAL; 1112 if (!thread_group_leader(p)) 1113 goto out; 1114 1115 if (same_thread_group(p->real_parent, group_leader)) { 1116 err = -EPERM; 1117 if (task_session(p) != task_session(group_leader)) 1118 goto out; 1119 err = -EACCES; 1120 if (p->did_exec) 1121 goto out; 1122 } else { 1123 err = -ESRCH; 1124 if (p != group_leader) 1125 goto out; 1126 } 1127 1128 err = -EPERM; 1129 if (p->signal->leader) 1130 goto out; 1131 1132 pgrp = task_pid(p); 1133 if (pgid != pid) { 1134 struct task_struct *g; 1135 1136 pgrp = find_vpid(pgid); 1137 g = pid_task(pgrp, PIDTYPE_PGID); 1138 if (!g || task_session(g) != task_session(group_leader)) 1139 goto out; 1140 } 1141 1142 err = security_task_setpgid(p, pgid); 1143 if (err) 1144 goto out; 1145 1146 if (task_pgrp(p) != pgrp) 1147 change_pid(p, PIDTYPE_PGID, pgrp); 1148 1149 err = 0; 1150 out: 1151 /* All paths lead to here, thus we are safe. -DaveM */ 1152 write_unlock_irq(&tasklist_lock); 1153 rcu_read_unlock(); 1154 return err; 1155 } 1156 1157 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1158 { 1159 struct task_struct *p; 1160 struct pid *grp; 1161 int retval; 1162 1163 rcu_read_lock(); 1164 if (!pid) 1165 grp = task_pgrp(current); 1166 else { 1167 retval = -ESRCH; 1168 p = find_task_by_vpid(pid); 1169 if (!p) 1170 goto out; 1171 grp = task_pgrp(p); 1172 if (!grp) 1173 goto out; 1174 1175 retval = security_task_getpgid(p); 1176 if (retval) 1177 goto out; 1178 } 1179 retval = pid_vnr(grp); 1180 out: 1181 rcu_read_unlock(); 1182 return retval; 1183 } 1184 1185 #ifdef __ARCH_WANT_SYS_GETPGRP 1186 1187 SYSCALL_DEFINE0(getpgrp) 1188 { 1189 return sys_getpgid(0); 1190 } 1191 1192 #endif 1193 1194 SYSCALL_DEFINE1(getsid, pid_t, pid) 1195 { 1196 struct task_struct *p; 1197 struct pid *sid; 1198 int retval; 1199 1200 rcu_read_lock(); 1201 if (!pid) 1202 sid = task_session(current); 1203 else { 1204 retval = -ESRCH; 1205 p = find_task_by_vpid(pid); 1206 if (!p) 1207 goto out; 1208 sid = task_session(p); 1209 if (!sid) 1210 goto out; 1211 1212 retval = security_task_getsid(p); 1213 if (retval) 1214 goto out; 1215 } 1216 retval = pid_vnr(sid); 1217 out: 1218 rcu_read_unlock(); 1219 return retval; 1220 } 1221 1222 SYSCALL_DEFINE0(setsid) 1223 { 1224 struct task_struct *group_leader = current->group_leader; 1225 struct pid *sid = task_pid(group_leader); 1226 pid_t session = pid_vnr(sid); 1227 int err = -EPERM; 1228 1229 write_lock_irq(&tasklist_lock); 1230 /* Fail if I am already a session leader */ 1231 if (group_leader->signal->leader) 1232 goto out; 1233 1234 /* Fail if a process group id already exists that equals the 1235 * proposed session id. 1236 */ 1237 if (pid_task(sid, PIDTYPE_PGID)) 1238 goto out; 1239 1240 group_leader->signal->leader = 1; 1241 __set_special_pids(sid); 1242 1243 proc_clear_tty(group_leader); 1244 1245 err = session; 1246 out: 1247 write_unlock_irq(&tasklist_lock); 1248 if (err > 0) { 1249 proc_sid_connector(group_leader); 1250 sched_autogroup_create_attach(group_leader); 1251 } 1252 return err; 1253 } 1254 1255 DECLARE_RWSEM(uts_sem); 1256 1257 #ifdef COMPAT_UTS_MACHINE 1258 #define override_architecture(name) \ 1259 (personality(current->personality) == PER_LINUX32 && \ 1260 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1261 sizeof(COMPAT_UTS_MACHINE))) 1262 #else 1263 #define override_architecture(name) 0 1264 #endif 1265 1266 /* 1267 * Work around broken programs that cannot handle "Linux 3.0". 1268 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1269 */ 1270 static int override_release(char __user *release, size_t len) 1271 { 1272 int ret = 0; 1273 1274 if (current->personality & UNAME26) { 1275 const char *rest = UTS_RELEASE; 1276 char buf[65] = { 0 }; 1277 int ndots = 0; 1278 unsigned v; 1279 size_t copy; 1280 1281 while (*rest) { 1282 if (*rest == '.' && ++ndots >= 3) 1283 break; 1284 if (!isdigit(*rest) && *rest != '.') 1285 break; 1286 rest++; 1287 } 1288 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1289 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1290 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1291 ret = copy_to_user(release, buf, copy + 1); 1292 } 1293 return ret; 1294 } 1295 1296 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1297 { 1298 int errno = 0; 1299 1300 down_read(&uts_sem); 1301 if (copy_to_user(name, utsname(), sizeof *name)) 1302 errno = -EFAULT; 1303 up_read(&uts_sem); 1304 1305 if (!errno && override_release(name->release, sizeof(name->release))) 1306 errno = -EFAULT; 1307 if (!errno && override_architecture(name)) 1308 errno = -EFAULT; 1309 return errno; 1310 } 1311 1312 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1313 /* 1314 * Old cruft 1315 */ 1316 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1317 { 1318 int error = 0; 1319 1320 if (!name) 1321 return -EFAULT; 1322 1323 down_read(&uts_sem); 1324 if (copy_to_user(name, utsname(), sizeof(*name))) 1325 error = -EFAULT; 1326 up_read(&uts_sem); 1327 1328 if (!error && override_release(name->release, sizeof(name->release))) 1329 error = -EFAULT; 1330 if (!error && override_architecture(name)) 1331 error = -EFAULT; 1332 return error; 1333 } 1334 1335 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1336 { 1337 int error; 1338 1339 if (!name) 1340 return -EFAULT; 1341 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1342 return -EFAULT; 1343 1344 down_read(&uts_sem); 1345 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1346 __OLD_UTS_LEN); 1347 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1348 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1349 __OLD_UTS_LEN); 1350 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1351 error |= __copy_to_user(&name->release, &utsname()->release, 1352 __OLD_UTS_LEN); 1353 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1354 error |= __copy_to_user(&name->version, &utsname()->version, 1355 __OLD_UTS_LEN); 1356 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1357 error |= __copy_to_user(&name->machine, &utsname()->machine, 1358 __OLD_UTS_LEN); 1359 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1360 up_read(&uts_sem); 1361 1362 if (!error && override_architecture(name)) 1363 error = -EFAULT; 1364 if (!error && override_release(name->release, sizeof(name->release))) 1365 error = -EFAULT; 1366 return error ? -EFAULT : 0; 1367 } 1368 #endif 1369 1370 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1371 { 1372 int errno; 1373 char tmp[__NEW_UTS_LEN]; 1374 1375 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1376 return -EPERM; 1377 1378 if (len < 0 || len > __NEW_UTS_LEN) 1379 return -EINVAL; 1380 down_write(&uts_sem); 1381 errno = -EFAULT; 1382 if (!copy_from_user(tmp, name, len)) { 1383 struct new_utsname *u = utsname(); 1384 1385 memcpy(u->nodename, tmp, len); 1386 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1387 errno = 0; 1388 uts_proc_notify(UTS_PROC_HOSTNAME); 1389 } 1390 up_write(&uts_sem); 1391 return errno; 1392 } 1393 1394 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1395 1396 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1397 { 1398 int i, errno; 1399 struct new_utsname *u; 1400 1401 if (len < 0) 1402 return -EINVAL; 1403 down_read(&uts_sem); 1404 u = utsname(); 1405 i = 1 + strlen(u->nodename); 1406 if (i > len) 1407 i = len; 1408 errno = 0; 1409 if (copy_to_user(name, u->nodename, i)) 1410 errno = -EFAULT; 1411 up_read(&uts_sem); 1412 return errno; 1413 } 1414 1415 #endif 1416 1417 /* 1418 * Only setdomainname; getdomainname can be implemented by calling 1419 * uname() 1420 */ 1421 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1422 { 1423 int errno; 1424 char tmp[__NEW_UTS_LEN]; 1425 1426 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1427 return -EPERM; 1428 if (len < 0 || len > __NEW_UTS_LEN) 1429 return -EINVAL; 1430 1431 down_write(&uts_sem); 1432 errno = -EFAULT; 1433 if (!copy_from_user(tmp, name, len)) { 1434 struct new_utsname *u = utsname(); 1435 1436 memcpy(u->domainname, tmp, len); 1437 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1438 errno = 0; 1439 uts_proc_notify(UTS_PROC_DOMAINNAME); 1440 } 1441 up_write(&uts_sem); 1442 return errno; 1443 } 1444 1445 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1446 { 1447 struct rlimit value; 1448 int ret; 1449 1450 ret = do_prlimit(current, resource, NULL, &value); 1451 if (!ret) 1452 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1453 1454 return ret; 1455 } 1456 1457 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1458 1459 /* 1460 * Back compatibility for getrlimit. Needed for some apps. 1461 */ 1462 1463 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1464 struct rlimit __user *, rlim) 1465 { 1466 struct rlimit x; 1467 if (resource >= RLIM_NLIMITS) 1468 return -EINVAL; 1469 1470 task_lock(current->group_leader); 1471 x = current->signal->rlim[resource]; 1472 task_unlock(current->group_leader); 1473 if (x.rlim_cur > 0x7FFFFFFF) 1474 x.rlim_cur = 0x7FFFFFFF; 1475 if (x.rlim_max > 0x7FFFFFFF) 1476 x.rlim_max = 0x7FFFFFFF; 1477 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1478 } 1479 1480 #endif 1481 1482 static inline bool rlim64_is_infinity(__u64 rlim64) 1483 { 1484 #if BITS_PER_LONG < 64 1485 return rlim64 >= ULONG_MAX; 1486 #else 1487 return rlim64 == RLIM64_INFINITY; 1488 #endif 1489 } 1490 1491 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1492 { 1493 if (rlim->rlim_cur == RLIM_INFINITY) 1494 rlim64->rlim_cur = RLIM64_INFINITY; 1495 else 1496 rlim64->rlim_cur = rlim->rlim_cur; 1497 if (rlim->rlim_max == RLIM_INFINITY) 1498 rlim64->rlim_max = RLIM64_INFINITY; 1499 else 1500 rlim64->rlim_max = rlim->rlim_max; 1501 } 1502 1503 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1504 { 1505 if (rlim64_is_infinity(rlim64->rlim_cur)) 1506 rlim->rlim_cur = RLIM_INFINITY; 1507 else 1508 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1509 if (rlim64_is_infinity(rlim64->rlim_max)) 1510 rlim->rlim_max = RLIM_INFINITY; 1511 else 1512 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1513 } 1514 1515 /* make sure you are allowed to change @tsk limits before calling this */ 1516 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1517 struct rlimit *new_rlim, struct rlimit *old_rlim) 1518 { 1519 struct rlimit *rlim; 1520 int retval = 0; 1521 1522 if (resource >= RLIM_NLIMITS) 1523 return -EINVAL; 1524 if (new_rlim) { 1525 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1526 return -EINVAL; 1527 if (resource == RLIMIT_NOFILE && 1528 new_rlim->rlim_max > sysctl_nr_open) 1529 return -EPERM; 1530 } 1531 1532 /* protect tsk->signal and tsk->sighand from disappearing */ 1533 read_lock(&tasklist_lock); 1534 if (!tsk->sighand) { 1535 retval = -ESRCH; 1536 goto out; 1537 } 1538 1539 rlim = tsk->signal->rlim + resource; 1540 task_lock(tsk->group_leader); 1541 if (new_rlim) { 1542 /* Keep the capable check against init_user_ns until 1543 cgroups can contain all limits */ 1544 if (new_rlim->rlim_max > rlim->rlim_max && 1545 !capable(CAP_SYS_RESOURCE)) 1546 retval = -EPERM; 1547 if (!retval) 1548 retval = security_task_setrlimit(tsk->group_leader, 1549 resource, new_rlim); 1550 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1551 /* 1552 * The caller is asking for an immediate RLIMIT_CPU 1553 * expiry. But we use the zero value to mean "it was 1554 * never set". So let's cheat and make it one second 1555 * instead 1556 */ 1557 new_rlim->rlim_cur = 1; 1558 } 1559 } 1560 if (!retval) { 1561 if (old_rlim) 1562 *old_rlim = *rlim; 1563 if (new_rlim) 1564 *rlim = *new_rlim; 1565 } 1566 task_unlock(tsk->group_leader); 1567 1568 /* 1569 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1570 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1571 * very long-standing error, and fixing it now risks breakage of 1572 * applications, so we live with it 1573 */ 1574 if (!retval && new_rlim && resource == RLIMIT_CPU && 1575 new_rlim->rlim_cur != RLIM_INFINITY) 1576 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1577 out: 1578 read_unlock(&tasklist_lock); 1579 return retval; 1580 } 1581 1582 /* rcu lock must be held */ 1583 static int check_prlimit_permission(struct task_struct *task) 1584 { 1585 const struct cred *cred = current_cred(), *tcred; 1586 1587 if (current == task) 1588 return 0; 1589 1590 tcred = __task_cred(task); 1591 if (uid_eq(cred->uid, tcred->euid) && 1592 uid_eq(cred->uid, tcred->suid) && 1593 uid_eq(cred->uid, tcred->uid) && 1594 gid_eq(cred->gid, tcred->egid) && 1595 gid_eq(cred->gid, tcred->sgid) && 1596 gid_eq(cred->gid, tcred->gid)) 1597 return 0; 1598 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1599 return 0; 1600 1601 return -EPERM; 1602 } 1603 1604 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1605 const struct rlimit64 __user *, new_rlim, 1606 struct rlimit64 __user *, old_rlim) 1607 { 1608 struct rlimit64 old64, new64; 1609 struct rlimit old, new; 1610 struct task_struct *tsk; 1611 int ret; 1612 1613 if (new_rlim) { 1614 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1615 return -EFAULT; 1616 rlim64_to_rlim(&new64, &new); 1617 } 1618 1619 rcu_read_lock(); 1620 tsk = pid ? find_task_by_vpid(pid) : current; 1621 if (!tsk) { 1622 rcu_read_unlock(); 1623 return -ESRCH; 1624 } 1625 ret = check_prlimit_permission(tsk); 1626 if (ret) { 1627 rcu_read_unlock(); 1628 return ret; 1629 } 1630 get_task_struct(tsk); 1631 rcu_read_unlock(); 1632 1633 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1634 old_rlim ? &old : NULL); 1635 1636 if (!ret && old_rlim) { 1637 rlim_to_rlim64(&old, &old64); 1638 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1639 ret = -EFAULT; 1640 } 1641 1642 put_task_struct(tsk); 1643 return ret; 1644 } 1645 1646 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1647 { 1648 struct rlimit new_rlim; 1649 1650 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1651 return -EFAULT; 1652 return do_prlimit(current, resource, &new_rlim, NULL); 1653 } 1654 1655 /* 1656 * It would make sense to put struct rusage in the task_struct, 1657 * except that would make the task_struct be *really big*. After 1658 * task_struct gets moved into malloc'ed memory, it would 1659 * make sense to do this. It will make moving the rest of the information 1660 * a lot simpler! (Which we're not doing right now because we're not 1661 * measuring them yet). 1662 * 1663 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1664 * races with threads incrementing their own counters. But since word 1665 * reads are atomic, we either get new values or old values and we don't 1666 * care which for the sums. We always take the siglock to protect reading 1667 * the c* fields from p->signal from races with exit.c updating those 1668 * fields when reaping, so a sample either gets all the additions of a 1669 * given child after it's reaped, or none so this sample is before reaping. 1670 * 1671 * Locking: 1672 * We need to take the siglock for CHILDEREN, SELF and BOTH 1673 * for the cases current multithreaded, non-current single threaded 1674 * non-current multithreaded. Thread traversal is now safe with 1675 * the siglock held. 1676 * Strictly speaking, we donot need to take the siglock if we are current and 1677 * single threaded, as no one else can take our signal_struct away, no one 1678 * else can reap the children to update signal->c* counters, and no one else 1679 * can race with the signal-> fields. If we do not take any lock, the 1680 * signal-> fields could be read out of order while another thread was just 1681 * exiting. So we should place a read memory barrier when we avoid the lock. 1682 * On the writer side, write memory barrier is implied in __exit_signal 1683 * as __exit_signal releases the siglock spinlock after updating the signal-> 1684 * fields. But we don't do this yet to keep things simple. 1685 * 1686 */ 1687 1688 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1689 { 1690 r->ru_nvcsw += t->nvcsw; 1691 r->ru_nivcsw += t->nivcsw; 1692 r->ru_minflt += t->min_flt; 1693 r->ru_majflt += t->maj_flt; 1694 r->ru_inblock += task_io_get_inblock(t); 1695 r->ru_oublock += task_io_get_oublock(t); 1696 } 1697 1698 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1699 { 1700 struct task_struct *t; 1701 unsigned long flags; 1702 cputime_t tgutime, tgstime, utime, stime; 1703 unsigned long maxrss = 0; 1704 1705 memset((char *) r, 0, sizeof *r); 1706 utime = stime = 0; 1707 1708 if (who == RUSAGE_THREAD) { 1709 task_cputime_adjusted(current, &utime, &stime); 1710 accumulate_thread_rusage(p, r); 1711 maxrss = p->signal->maxrss; 1712 goto out; 1713 } 1714 1715 if (!lock_task_sighand(p, &flags)) 1716 return; 1717 1718 switch (who) { 1719 case RUSAGE_BOTH: 1720 case RUSAGE_CHILDREN: 1721 utime = p->signal->cutime; 1722 stime = p->signal->cstime; 1723 r->ru_nvcsw = p->signal->cnvcsw; 1724 r->ru_nivcsw = p->signal->cnivcsw; 1725 r->ru_minflt = p->signal->cmin_flt; 1726 r->ru_majflt = p->signal->cmaj_flt; 1727 r->ru_inblock = p->signal->cinblock; 1728 r->ru_oublock = p->signal->coublock; 1729 maxrss = p->signal->cmaxrss; 1730 1731 if (who == RUSAGE_CHILDREN) 1732 break; 1733 1734 case RUSAGE_SELF: 1735 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1736 utime += tgutime; 1737 stime += tgstime; 1738 r->ru_nvcsw += p->signal->nvcsw; 1739 r->ru_nivcsw += p->signal->nivcsw; 1740 r->ru_minflt += p->signal->min_flt; 1741 r->ru_majflt += p->signal->maj_flt; 1742 r->ru_inblock += p->signal->inblock; 1743 r->ru_oublock += p->signal->oublock; 1744 if (maxrss < p->signal->maxrss) 1745 maxrss = p->signal->maxrss; 1746 t = p; 1747 do { 1748 accumulate_thread_rusage(t, r); 1749 t = next_thread(t); 1750 } while (t != p); 1751 break; 1752 1753 default: 1754 BUG(); 1755 } 1756 unlock_task_sighand(p, &flags); 1757 1758 out: 1759 cputime_to_timeval(utime, &r->ru_utime); 1760 cputime_to_timeval(stime, &r->ru_stime); 1761 1762 if (who != RUSAGE_CHILDREN) { 1763 struct mm_struct *mm = get_task_mm(p); 1764 if (mm) { 1765 setmax_mm_hiwater_rss(&maxrss, mm); 1766 mmput(mm); 1767 } 1768 } 1769 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1770 } 1771 1772 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1773 { 1774 struct rusage r; 1775 k_getrusage(p, who, &r); 1776 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1777 } 1778 1779 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1780 { 1781 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1782 who != RUSAGE_THREAD) 1783 return -EINVAL; 1784 return getrusage(current, who, ru); 1785 } 1786 1787 SYSCALL_DEFINE1(umask, int, mask) 1788 { 1789 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1790 return mask; 1791 } 1792 1793 #ifdef CONFIG_CHECKPOINT_RESTORE 1794 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1795 { 1796 struct fd exe; 1797 struct inode *inode; 1798 int err; 1799 1800 exe = fdget(fd); 1801 if (!exe.file) 1802 return -EBADF; 1803 1804 inode = file_inode(exe.file); 1805 1806 /* 1807 * Because the original mm->exe_file points to executable file, make 1808 * sure that this one is executable as well, to avoid breaking an 1809 * overall picture. 1810 */ 1811 err = -EACCES; 1812 if (!S_ISREG(inode->i_mode) || 1813 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1814 goto exit; 1815 1816 err = inode_permission(inode, MAY_EXEC); 1817 if (err) 1818 goto exit; 1819 1820 down_write(&mm->mmap_sem); 1821 1822 /* 1823 * Forbid mm->exe_file change if old file still mapped. 1824 */ 1825 err = -EBUSY; 1826 if (mm->exe_file) { 1827 struct vm_area_struct *vma; 1828 1829 for (vma = mm->mmap; vma; vma = vma->vm_next) 1830 if (vma->vm_file && 1831 path_equal(&vma->vm_file->f_path, 1832 &mm->exe_file->f_path)) 1833 goto exit_unlock; 1834 } 1835 1836 /* 1837 * The symlink can be changed only once, just to disallow arbitrary 1838 * transitions malicious software might bring in. This means one 1839 * could make a snapshot over all processes running and monitor 1840 * /proc/pid/exe changes to notice unusual activity if needed. 1841 */ 1842 err = -EPERM; 1843 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1844 goto exit_unlock; 1845 1846 err = 0; 1847 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1848 exit_unlock: 1849 up_write(&mm->mmap_sem); 1850 1851 exit: 1852 fdput(exe); 1853 return err; 1854 } 1855 1856 static int prctl_set_mm(int opt, unsigned long addr, 1857 unsigned long arg4, unsigned long arg5) 1858 { 1859 unsigned long rlim = rlimit(RLIMIT_DATA); 1860 struct mm_struct *mm = current->mm; 1861 struct vm_area_struct *vma; 1862 int error; 1863 1864 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1865 return -EINVAL; 1866 1867 if (!capable(CAP_SYS_RESOURCE)) 1868 return -EPERM; 1869 1870 if (opt == PR_SET_MM_EXE_FILE) 1871 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1872 1873 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1874 return -EINVAL; 1875 1876 error = -EINVAL; 1877 1878 down_read(&mm->mmap_sem); 1879 vma = find_vma(mm, addr); 1880 1881 switch (opt) { 1882 case PR_SET_MM_START_CODE: 1883 mm->start_code = addr; 1884 break; 1885 case PR_SET_MM_END_CODE: 1886 mm->end_code = addr; 1887 break; 1888 case PR_SET_MM_START_DATA: 1889 mm->start_data = addr; 1890 break; 1891 case PR_SET_MM_END_DATA: 1892 mm->end_data = addr; 1893 break; 1894 1895 case PR_SET_MM_START_BRK: 1896 if (addr <= mm->end_data) 1897 goto out; 1898 1899 if (rlim < RLIM_INFINITY && 1900 (mm->brk - addr) + 1901 (mm->end_data - mm->start_data) > rlim) 1902 goto out; 1903 1904 mm->start_brk = addr; 1905 break; 1906 1907 case PR_SET_MM_BRK: 1908 if (addr <= mm->end_data) 1909 goto out; 1910 1911 if (rlim < RLIM_INFINITY && 1912 (addr - mm->start_brk) + 1913 (mm->end_data - mm->start_data) > rlim) 1914 goto out; 1915 1916 mm->brk = addr; 1917 break; 1918 1919 /* 1920 * If command line arguments and environment 1921 * are placed somewhere else on stack, we can 1922 * set them up here, ARG_START/END to setup 1923 * command line argumets and ENV_START/END 1924 * for environment. 1925 */ 1926 case PR_SET_MM_START_STACK: 1927 case PR_SET_MM_ARG_START: 1928 case PR_SET_MM_ARG_END: 1929 case PR_SET_MM_ENV_START: 1930 case PR_SET_MM_ENV_END: 1931 if (!vma) { 1932 error = -EFAULT; 1933 goto out; 1934 } 1935 if (opt == PR_SET_MM_START_STACK) 1936 mm->start_stack = addr; 1937 else if (opt == PR_SET_MM_ARG_START) 1938 mm->arg_start = addr; 1939 else if (opt == PR_SET_MM_ARG_END) 1940 mm->arg_end = addr; 1941 else if (opt == PR_SET_MM_ENV_START) 1942 mm->env_start = addr; 1943 else if (opt == PR_SET_MM_ENV_END) 1944 mm->env_end = addr; 1945 break; 1946 1947 /* 1948 * This doesn't move auxiliary vector itself 1949 * since it's pinned to mm_struct, but allow 1950 * to fill vector with new values. It's up 1951 * to a caller to provide sane values here 1952 * otherwise user space tools which use this 1953 * vector might be unhappy. 1954 */ 1955 case PR_SET_MM_AUXV: { 1956 unsigned long user_auxv[AT_VECTOR_SIZE]; 1957 1958 if (arg4 > sizeof(user_auxv)) 1959 goto out; 1960 up_read(&mm->mmap_sem); 1961 1962 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 1963 return -EFAULT; 1964 1965 /* Make sure the last entry is always AT_NULL */ 1966 user_auxv[AT_VECTOR_SIZE - 2] = 0; 1967 user_auxv[AT_VECTOR_SIZE - 1] = 0; 1968 1969 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1970 1971 task_lock(current); 1972 memcpy(mm->saved_auxv, user_auxv, arg4); 1973 task_unlock(current); 1974 1975 return 0; 1976 } 1977 default: 1978 goto out; 1979 } 1980 1981 error = 0; 1982 out: 1983 up_read(&mm->mmap_sem); 1984 return error; 1985 } 1986 1987 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1988 { 1989 return put_user(me->clear_child_tid, tid_addr); 1990 } 1991 1992 #else /* CONFIG_CHECKPOINT_RESTORE */ 1993 static int prctl_set_mm(int opt, unsigned long addr, 1994 unsigned long arg4, unsigned long arg5) 1995 { 1996 return -EINVAL; 1997 } 1998 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1999 { 2000 return -EINVAL; 2001 } 2002 #endif 2003 2004 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2005 unsigned long, arg4, unsigned long, arg5) 2006 { 2007 struct task_struct *me = current; 2008 unsigned char comm[sizeof(me->comm)]; 2009 long error; 2010 2011 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2012 if (error != -ENOSYS) 2013 return error; 2014 2015 error = 0; 2016 switch (option) { 2017 case PR_SET_PDEATHSIG: 2018 if (!valid_signal(arg2)) { 2019 error = -EINVAL; 2020 break; 2021 } 2022 me->pdeath_signal = arg2; 2023 break; 2024 case PR_GET_PDEATHSIG: 2025 error = put_user(me->pdeath_signal, (int __user *)arg2); 2026 break; 2027 case PR_GET_DUMPABLE: 2028 error = get_dumpable(me->mm); 2029 break; 2030 case PR_SET_DUMPABLE: 2031 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2032 error = -EINVAL; 2033 break; 2034 } 2035 set_dumpable(me->mm, arg2); 2036 break; 2037 2038 case PR_SET_UNALIGN: 2039 error = SET_UNALIGN_CTL(me, arg2); 2040 break; 2041 case PR_GET_UNALIGN: 2042 error = GET_UNALIGN_CTL(me, arg2); 2043 break; 2044 case PR_SET_FPEMU: 2045 error = SET_FPEMU_CTL(me, arg2); 2046 break; 2047 case PR_GET_FPEMU: 2048 error = GET_FPEMU_CTL(me, arg2); 2049 break; 2050 case PR_SET_FPEXC: 2051 error = SET_FPEXC_CTL(me, arg2); 2052 break; 2053 case PR_GET_FPEXC: 2054 error = GET_FPEXC_CTL(me, arg2); 2055 break; 2056 case PR_GET_TIMING: 2057 error = PR_TIMING_STATISTICAL; 2058 break; 2059 case PR_SET_TIMING: 2060 if (arg2 != PR_TIMING_STATISTICAL) 2061 error = -EINVAL; 2062 break; 2063 case PR_SET_NAME: 2064 comm[sizeof(me->comm) - 1] = 0; 2065 if (strncpy_from_user(comm, (char __user *)arg2, 2066 sizeof(me->comm) - 1) < 0) 2067 return -EFAULT; 2068 set_task_comm(me, comm); 2069 proc_comm_connector(me); 2070 break; 2071 case PR_GET_NAME: 2072 get_task_comm(comm, me); 2073 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2074 return -EFAULT; 2075 break; 2076 case PR_GET_ENDIAN: 2077 error = GET_ENDIAN(me, arg2); 2078 break; 2079 case PR_SET_ENDIAN: 2080 error = SET_ENDIAN(me, arg2); 2081 break; 2082 case PR_GET_SECCOMP: 2083 error = prctl_get_seccomp(); 2084 break; 2085 case PR_SET_SECCOMP: 2086 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2087 break; 2088 case PR_GET_TSC: 2089 error = GET_TSC_CTL(arg2); 2090 break; 2091 case PR_SET_TSC: 2092 error = SET_TSC_CTL(arg2); 2093 break; 2094 case PR_TASK_PERF_EVENTS_DISABLE: 2095 error = perf_event_task_disable(); 2096 break; 2097 case PR_TASK_PERF_EVENTS_ENABLE: 2098 error = perf_event_task_enable(); 2099 break; 2100 case PR_GET_TIMERSLACK: 2101 error = current->timer_slack_ns; 2102 break; 2103 case PR_SET_TIMERSLACK: 2104 if (arg2 <= 0) 2105 current->timer_slack_ns = 2106 current->default_timer_slack_ns; 2107 else 2108 current->timer_slack_ns = arg2; 2109 break; 2110 case PR_MCE_KILL: 2111 if (arg4 | arg5) 2112 return -EINVAL; 2113 switch (arg2) { 2114 case PR_MCE_KILL_CLEAR: 2115 if (arg3 != 0) 2116 return -EINVAL; 2117 current->flags &= ~PF_MCE_PROCESS; 2118 break; 2119 case PR_MCE_KILL_SET: 2120 current->flags |= PF_MCE_PROCESS; 2121 if (arg3 == PR_MCE_KILL_EARLY) 2122 current->flags |= PF_MCE_EARLY; 2123 else if (arg3 == PR_MCE_KILL_LATE) 2124 current->flags &= ~PF_MCE_EARLY; 2125 else if (arg3 == PR_MCE_KILL_DEFAULT) 2126 current->flags &= 2127 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2128 else 2129 return -EINVAL; 2130 break; 2131 default: 2132 return -EINVAL; 2133 } 2134 break; 2135 case PR_MCE_KILL_GET: 2136 if (arg2 | arg3 | arg4 | arg5) 2137 return -EINVAL; 2138 if (current->flags & PF_MCE_PROCESS) 2139 error = (current->flags & PF_MCE_EARLY) ? 2140 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2141 else 2142 error = PR_MCE_KILL_DEFAULT; 2143 break; 2144 case PR_SET_MM: 2145 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2146 break; 2147 case PR_GET_TID_ADDRESS: 2148 error = prctl_get_tid_address(me, (int __user **)arg2); 2149 break; 2150 case PR_SET_CHILD_SUBREAPER: 2151 me->signal->is_child_subreaper = !!arg2; 2152 break; 2153 case PR_GET_CHILD_SUBREAPER: 2154 error = put_user(me->signal->is_child_subreaper, 2155 (int __user *)arg2); 2156 break; 2157 case PR_SET_NO_NEW_PRIVS: 2158 if (arg2 != 1 || arg3 || arg4 || arg5) 2159 return -EINVAL; 2160 2161 current->no_new_privs = 1; 2162 break; 2163 case PR_GET_NO_NEW_PRIVS: 2164 if (arg2 || arg3 || arg4 || arg5) 2165 return -EINVAL; 2166 return current->no_new_privs ? 1 : 0; 2167 default: 2168 error = -EINVAL; 2169 break; 2170 } 2171 return error; 2172 } 2173 2174 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2175 struct getcpu_cache __user *, unused) 2176 { 2177 int err = 0; 2178 int cpu = raw_smp_processor_id(); 2179 if (cpup) 2180 err |= put_user(cpu, cpup); 2181 if (nodep) 2182 err |= put_user(cpu_to_node(cpu), nodep); 2183 return err ? -EFAULT : 0; 2184 } 2185 2186 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2187 2188 static int __orderly_poweroff(void) 2189 { 2190 int argc; 2191 char **argv; 2192 static char *envp[] = { 2193 "HOME=/", 2194 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 2195 NULL 2196 }; 2197 int ret; 2198 2199 argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 2200 if (argv == NULL) { 2201 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2202 __func__, poweroff_cmd); 2203 return -ENOMEM; 2204 } 2205 2206 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, 2207 NULL, NULL, NULL); 2208 argv_free(argv); 2209 2210 return ret; 2211 } 2212 2213 /** 2214 * orderly_poweroff - Trigger an orderly system poweroff 2215 * @force: force poweroff if command execution fails 2216 * 2217 * This may be called from any context to trigger a system shutdown. 2218 * If the orderly shutdown fails, it will force an immediate shutdown. 2219 */ 2220 int orderly_poweroff(bool force) 2221 { 2222 int ret = __orderly_poweroff(); 2223 2224 if (ret && force) { 2225 printk(KERN_WARNING "Failed to start orderly shutdown: " 2226 "forcing the issue\n"); 2227 2228 /* 2229 * I guess this should try to kick off some daemon to sync and 2230 * poweroff asap. Or not even bother syncing if we're doing an 2231 * emergency shutdown? 2232 */ 2233 emergency_sync(); 2234 kernel_power_off(); 2235 } 2236 2237 return ret; 2238 } 2239 EXPORT_SYMBOL_GPL(orderly_poweroff); 2240