1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/kexec.h> 20 #include <linux/workqueue.h> 21 #include <linux/capability.h> 22 #include <linux/device.h> 23 #include <linux/key.h> 24 #include <linux/times.h> 25 #include <linux/posix-timers.h> 26 #include <linux/security.h> 27 #include <linux/dcookies.h> 28 #include <linux/suspend.h> 29 #include <linux/tty.h> 30 #include <linux/signal.h> 31 #include <linux/cn_proc.h> 32 #include <linux/getcpu.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/seccomp.h> 35 #include <linux/cpu.h> 36 #include <linux/personality.h> 37 #include <linux/ptrace.h> 38 #include <linux/fs_struct.h> 39 #include <linux/file.h> 40 #include <linux/mount.h> 41 #include <linux/gfp.h> 42 #include <linux/syscore_ops.h> 43 #include <linux/version.h> 44 #include <linux/ctype.h> 45 46 #include <linux/compat.h> 47 #include <linux/syscalls.h> 48 #include <linux/kprobes.h> 49 #include <linux/user_namespace.h> 50 51 #include <linux/kmsg_dump.h> 52 /* Move somewhere else to avoid recompiling? */ 53 #include <generated/utsrelease.h> 54 55 #include <asm/uaccess.h> 56 #include <asm/io.h> 57 #include <asm/unistd.h> 58 59 #ifndef SET_UNALIGN_CTL 60 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 61 #endif 62 #ifndef GET_UNALIGN_CTL 63 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 64 #endif 65 #ifndef SET_FPEMU_CTL 66 # define SET_FPEMU_CTL(a,b) (-EINVAL) 67 #endif 68 #ifndef GET_FPEMU_CTL 69 # define GET_FPEMU_CTL(a,b) (-EINVAL) 70 #endif 71 #ifndef SET_FPEXC_CTL 72 # define SET_FPEXC_CTL(a,b) (-EINVAL) 73 #endif 74 #ifndef GET_FPEXC_CTL 75 # define GET_FPEXC_CTL(a,b) (-EINVAL) 76 #endif 77 #ifndef GET_ENDIAN 78 # define GET_ENDIAN(a,b) (-EINVAL) 79 #endif 80 #ifndef SET_ENDIAN 81 # define SET_ENDIAN(a,b) (-EINVAL) 82 #endif 83 #ifndef GET_TSC_CTL 84 # define GET_TSC_CTL(a) (-EINVAL) 85 #endif 86 #ifndef SET_TSC_CTL 87 # define SET_TSC_CTL(a) (-EINVAL) 88 #endif 89 90 /* 91 * this is where the system-wide overflow UID and GID are defined, for 92 * architectures that now have 32-bit UID/GID but didn't in the past 93 */ 94 95 int overflowuid = DEFAULT_OVERFLOWUID; 96 int overflowgid = DEFAULT_OVERFLOWGID; 97 98 EXPORT_SYMBOL(overflowuid); 99 EXPORT_SYMBOL(overflowgid); 100 101 /* 102 * the same as above, but for filesystems which can only store a 16-bit 103 * UID and GID. as such, this is needed on all architectures 104 */ 105 106 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 107 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 108 109 EXPORT_SYMBOL(fs_overflowuid); 110 EXPORT_SYMBOL(fs_overflowgid); 111 112 /* 113 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 114 */ 115 116 int C_A_D = 1; 117 struct pid *cad_pid; 118 EXPORT_SYMBOL(cad_pid); 119 120 /* 121 * If set, this is used for preparing the system to power off. 122 */ 123 124 void (*pm_power_off_prepare)(void); 125 126 /* 127 * Returns true if current's euid is same as p's uid or euid, 128 * or has CAP_SYS_NICE to p's user_ns. 129 * 130 * Called with rcu_read_lock, creds are safe 131 */ 132 static bool set_one_prio_perm(struct task_struct *p) 133 { 134 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 135 136 if (uid_eq(pcred->uid, cred->euid) || 137 uid_eq(pcred->euid, cred->euid)) 138 return true; 139 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 140 return true; 141 return false; 142 } 143 144 /* 145 * set the priority of a task 146 * - the caller must hold the RCU read lock 147 */ 148 static int set_one_prio(struct task_struct *p, int niceval, int error) 149 { 150 int no_nice; 151 152 if (!set_one_prio_perm(p)) { 153 error = -EPERM; 154 goto out; 155 } 156 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 157 error = -EACCES; 158 goto out; 159 } 160 no_nice = security_task_setnice(p, niceval); 161 if (no_nice) { 162 error = no_nice; 163 goto out; 164 } 165 if (error == -ESRCH) 166 error = 0; 167 set_user_nice(p, niceval); 168 out: 169 return error; 170 } 171 172 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 173 { 174 struct task_struct *g, *p; 175 struct user_struct *user; 176 const struct cred *cred = current_cred(); 177 int error = -EINVAL; 178 struct pid *pgrp; 179 kuid_t uid; 180 181 if (which > PRIO_USER || which < PRIO_PROCESS) 182 goto out; 183 184 /* normalize: avoid signed division (rounding problems) */ 185 error = -ESRCH; 186 if (niceval < -20) 187 niceval = -20; 188 if (niceval > 19) 189 niceval = 19; 190 191 rcu_read_lock(); 192 read_lock(&tasklist_lock); 193 switch (which) { 194 case PRIO_PROCESS: 195 if (who) 196 p = find_task_by_vpid(who); 197 else 198 p = current; 199 if (p) 200 error = set_one_prio(p, niceval, error); 201 break; 202 case PRIO_PGRP: 203 if (who) 204 pgrp = find_vpid(who); 205 else 206 pgrp = task_pgrp(current); 207 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 208 error = set_one_prio(p, niceval, error); 209 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 210 break; 211 case PRIO_USER: 212 uid = make_kuid(cred->user_ns, who); 213 user = cred->user; 214 if (!who) 215 uid = cred->uid; 216 else if (!uid_eq(uid, cred->uid) && 217 !(user = find_user(uid))) 218 goto out_unlock; /* No processes for this user */ 219 220 do_each_thread(g, p) { 221 if (uid_eq(task_uid(p), uid)) 222 error = set_one_prio(p, niceval, error); 223 } while_each_thread(g, p); 224 if (!uid_eq(uid, cred->uid)) 225 free_uid(user); /* For find_user() */ 226 break; 227 } 228 out_unlock: 229 read_unlock(&tasklist_lock); 230 rcu_read_unlock(); 231 out: 232 return error; 233 } 234 235 /* 236 * Ugh. To avoid negative return values, "getpriority()" will 237 * not return the normal nice-value, but a negated value that 238 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 239 * to stay compatible. 240 */ 241 SYSCALL_DEFINE2(getpriority, int, which, int, who) 242 { 243 struct task_struct *g, *p; 244 struct user_struct *user; 245 const struct cred *cred = current_cred(); 246 long niceval, retval = -ESRCH; 247 struct pid *pgrp; 248 kuid_t uid; 249 250 if (which > PRIO_USER || which < PRIO_PROCESS) 251 return -EINVAL; 252 253 rcu_read_lock(); 254 read_lock(&tasklist_lock); 255 switch (which) { 256 case PRIO_PROCESS: 257 if (who) 258 p = find_task_by_vpid(who); 259 else 260 p = current; 261 if (p) { 262 niceval = 20 - task_nice(p); 263 if (niceval > retval) 264 retval = niceval; 265 } 266 break; 267 case PRIO_PGRP: 268 if (who) 269 pgrp = find_vpid(who); 270 else 271 pgrp = task_pgrp(current); 272 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 273 niceval = 20 - task_nice(p); 274 if (niceval > retval) 275 retval = niceval; 276 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 277 break; 278 case PRIO_USER: 279 uid = make_kuid(cred->user_ns, who); 280 user = cred->user; 281 if (!who) 282 uid = cred->uid; 283 else if (!uid_eq(uid, cred->uid) && 284 !(user = find_user(uid))) 285 goto out_unlock; /* No processes for this user */ 286 287 do_each_thread(g, p) { 288 if (uid_eq(task_uid(p), uid)) { 289 niceval = 20 - task_nice(p); 290 if (niceval > retval) 291 retval = niceval; 292 } 293 } while_each_thread(g, p); 294 if (!uid_eq(uid, cred->uid)) 295 free_uid(user); /* for find_user() */ 296 break; 297 } 298 out_unlock: 299 read_unlock(&tasklist_lock); 300 rcu_read_unlock(); 301 302 return retval; 303 } 304 305 /** 306 * emergency_restart - reboot the system 307 * 308 * Without shutting down any hardware or taking any locks 309 * reboot the system. This is called when we know we are in 310 * trouble so this is our best effort to reboot. This is 311 * safe to call in interrupt context. 312 */ 313 void emergency_restart(void) 314 { 315 kmsg_dump(KMSG_DUMP_EMERG); 316 machine_emergency_restart(); 317 } 318 EXPORT_SYMBOL_GPL(emergency_restart); 319 320 void kernel_restart_prepare(char *cmd) 321 { 322 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 323 system_state = SYSTEM_RESTART; 324 usermodehelper_disable(); 325 device_shutdown(); 326 syscore_shutdown(); 327 } 328 329 /** 330 * register_reboot_notifier - Register function to be called at reboot time 331 * @nb: Info about notifier function to be called 332 * 333 * Registers a function with the list of functions 334 * to be called at reboot time. 335 * 336 * Currently always returns zero, as blocking_notifier_chain_register() 337 * always returns zero. 338 */ 339 int register_reboot_notifier(struct notifier_block *nb) 340 { 341 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 342 } 343 EXPORT_SYMBOL(register_reboot_notifier); 344 345 /** 346 * unregister_reboot_notifier - Unregister previously registered reboot notifier 347 * @nb: Hook to be unregistered 348 * 349 * Unregisters a previously registered reboot 350 * notifier function. 351 * 352 * Returns zero on success, or %-ENOENT on failure. 353 */ 354 int unregister_reboot_notifier(struct notifier_block *nb) 355 { 356 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 357 } 358 EXPORT_SYMBOL(unregister_reboot_notifier); 359 360 /** 361 * kernel_restart - reboot the system 362 * @cmd: pointer to buffer containing command to execute for restart 363 * or %NULL 364 * 365 * Shutdown everything and perform a clean reboot. 366 * This is not safe to call in interrupt context. 367 */ 368 void kernel_restart(char *cmd) 369 { 370 kernel_restart_prepare(cmd); 371 if (!cmd) 372 printk(KERN_EMERG "Restarting system.\n"); 373 else 374 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 375 kmsg_dump(KMSG_DUMP_RESTART); 376 machine_restart(cmd); 377 } 378 EXPORT_SYMBOL_GPL(kernel_restart); 379 380 static void kernel_shutdown_prepare(enum system_states state) 381 { 382 blocking_notifier_call_chain(&reboot_notifier_list, 383 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 384 system_state = state; 385 usermodehelper_disable(); 386 device_shutdown(); 387 } 388 /** 389 * kernel_halt - halt the system 390 * 391 * Shutdown everything and perform a clean system halt. 392 */ 393 void kernel_halt(void) 394 { 395 kernel_shutdown_prepare(SYSTEM_HALT); 396 syscore_shutdown(); 397 printk(KERN_EMERG "System halted.\n"); 398 kmsg_dump(KMSG_DUMP_HALT); 399 machine_halt(); 400 } 401 402 EXPORT_SYMBOL_GPL(kernel_halt); 403 404 /** 405 * kernel_power_off - power_off the system 406 * 407 * Shutdown everything and perform a clean system power_off. 408 */ 409 void kernel_power_off(void) 410 { 411 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 412 if (pm_power_off_prepare) 413 pm_power_off_prepare(); 414 disable_nonboot_cpus(); 415 syscore_shutdown(); 416 printk(KERN_EMERG "Power down.\n"); 417 kmsg_dump(KMSG_DUMP_POWEROFF); 418 machine_power_off(); 419 } 420 EXPORT_SYMBOL_GPL(kernel_power_off); 421 422 static DEFINE_MUTEX(reboot_mutex); 423 424 /* 425 * Reboot system call: for obvious reasons only root may call it, 426 * and even root needs to set up some magic numbers in the registers 427 * so that some mistake won't make this reboot the whole machine. 428 * You can also set the meaning of the ctrl-alt-del-key here. 429 * 430 * reboot doesn't sync: do that yourself before calling this. 431 */ 432 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 433 void __user *, arg) 434 { 435 char buffer[256]; 436 int ret = 0; 437 438 /* We only trust the superuser with rebooting the system. */ 439 if (!capable(CAP_SYS_BOOT)) 440 return -EPERM; 441 442 /* For safety, we require "magic" arguments. */ 443 if (magic1 != LINUX_REBOOT_MAGIC1 || 444 (magic2 != LINUX_REBOOT_MAGIC2 && 445 magic2 != LINUX_REBOOT_MAGIC2A && 446 magic2 != LINUX_REBOOT_MAGIC2B && 447 magic2 != LINUX_REBOOT_MAGIC2C)) 448 return -EINVAL; 449 450 /* 451 * If pid namespaces are enabled and the current task is in a child 452 * pid_namespace, the command is handled by reboot_pid_ns() which will 453 * call do_exit(). 454 */ 455 ret = reboot_pid_ns(task_active_pid_ns(current), cmd); 456 if (ret) 457 return ret; 458 459 /* Instead of trying to make the power_off code look like 460 * halt when pm_power_off is not set do it the easy way. 461 */ 462 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 463 cmd = LINUX_REBOOT_CMD_HALT; 464 465 mutex_lock(&reboot_mutex); 466 switch (cmd) { 467 case LINUX_REBOOT_CMD_RESTART: 468 kernel_restart(NULL); 469 break; 470 471 case LINUX_REBOOT_CMD_CAD_ON: 472 C_A_D = 1; 473 break; 474 475 case LINUX_REBOOT_CMD_CAD_OFF: 476 C_A_D = 0; 477 break; 478 479 case LINUX_REBOOT_CMD_HALT: 480 kernel_halt(); 481 do_exit(0); 482 panic("cannot halt"); 483 484 case LINUX_REBOOT_CMD_POWER_OFF: 485 kernel_power_off(); 486 do_exit(0); 487 break; 488 489 case LINUX_REBOOT_CMD_RESTART2: 490 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 491 ret = -EFAULT; 492 break; 493 } 494 buffer[sizeof(buffer) - 1] = '\0'; 495 496 kernel_restart(buffer); 497 break; 498 499 #ifdef CONFIG_KEXEC 500 case LINUX_REBOOT_CMD_KEXEC: 501 ret = kernel_kexec(); 502 break; 503 #endif 504 505 #ifdef CONFIG_HIBERNATION 506 case LINUX_REBOOT_CMD_SW_SUSPEND: 507 ret = hibernate(); 508 break; 509 #endif 510 511 default: 512 ret = -EINVAL; 513 break; 514 } 515 mutex_unlock(&reboot_mutex); 516 return ret; 517 } 518 519 static void deferred_cad(struct work_struct *dummy) 520 { 521 kernel_restart(NULL); 522 } 523 524 /* 525 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 526 * As it's called within an interrupt, it may NOT sync: the only choice 527 * is whether to reboot at once, or just ignore the ctrl-alt-del. 528 */ 529 void ctrl_alt_del(void) 530 { 531 static DECLARE_WORK(cad_work, deferred_cad); 532 533 if (C_A_D) 534 schedule_work(&cad_work); 535 else 536 kill_cad_pid(SIGINT, 1); 537 } 538 539 /* 540 * Unprivileged users may change the real gid to the effective gid 541 * or vice versa. (BSD-style) 542 * 543 * If you set the real gid at all, or set the effective gid to a value not 544 * equal to the real gid, then the saved gid is set to the new effective gid. 545 * 546 * This makes it possible for a setgid program to completely drop its 547 * privileges, which is often a useful assertion to make when you are doing 548 * a security audit over a program. 549 * 550 * The general idea is that a program which uses just setregid() will be 551 * 100% compatible with BSD. A program which uses just setgid() will be 552 * 100% compatible with POSIX with saved IDs. 553 * 554 * SMP: There are not races, the GIDs are checked only by filesystem 555 * operations (as far as semantic preservation is concerned). 556 */ 557 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 558 { 559 struct user_namespace *ns = current_user_ns(); 560 const struct cred *old; 561 struct cred *new; 562 int retval; 563 kgid_t krgid, kegid; 564 565 krgid = make_kgid(ns, rgid); 566 kegid = make_kgid(ns, egid); 567 568 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 569 return -EINVAL; 570 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 571 return -EINVAL; 572 573 new = prepare_creds(); 574 if (!new) 575 return -ENOMEM; 576 old = current_cred(); 577 578 retval = -EPERM; 579 if (rgid != (gid_t) -1) { 580 if (gid_eq(old->gid, krgid) || 581 gid_eq(old->egid, krgid) || 582 nsown_capable(CAP_SETGID)) 583 new->gid = krgid; 584 else 585 goto error; 586 } 587 if (egid != (gid_t) -1) { 588 if (gid_eq(old->gid, kegid) || 589 gid_eq(old->egid, kegid) || 590 gid_eq(old->sgid, kegid) || 591 nsown_capable(CAP_SETGID)) 592 new->egid = kegid; 593 else 594 goto error; 595 } 596 597 if (rgid != (gid_t) -1 || 598 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 599 new->sgid = new->egid; 600 new->fsgid = new->egid; 601 602 return commit_creds(new); 603 604 error: 605 abort_creds(new); 606 return retval; 607 } 608 609 /* 610 * setgid() is implemented like SysV w/ SAVED_IDS 611 * 612 * SMP: Same implicit races as above. 613 */ 614 SYSCALL_DEFINE1(setgid, gid_t, gid) 615 { 616 struct user_namespace *ns = current_user_ns(); 617 const struct cred *old; 618 struct cred *new; 619 int retval; 620 kgid_t kgid; 621 622 kgid = make_kgid(ns, gid); 623 if (!gid_valid(kgid)) 624 return -EINVAL; 625 626 new = prepare_creds(); 627 if (!new) 628 return -ENOMEM; 629 old = current_cred(); 630 631 retval = -EPERM; 632 if (nsown_capable(CAP_SETGID)) 633 new->gid = new->egid = new->sgid = new->fsgid = kgid; 634 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 635 new->egid = new->fsgid = kgid; 636 else 637 goto error; 638 639 return commit_creds(new); 640 641 error: 642 abort_creds(new); 643 return retval; 644 } 645 646 /* 647 * change the user struct in a credentials set to match the new UID 648 */ 649 static int set_user(struct cred *new) 650 { 651 struct user_struct *new_user; 652 653 new_user = alloc_uid(new->uid); 654 if (!new_user) 655 return -EAGAIN; 656 657 /* 658 * We don't fail in case of NPROC limit excess here because too many 659 * poorly written programs don't check set*uid() return code, assuming 660 * it never fails if called by root. We may still enforce NPROC limit 661 * for programs doing set*uid()+execve() by harmlessly deferring the 662 * failure to the execve() stage. 663 */ 664 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 665 new_user != INIT_USER) 666 current->flags |= PF_NPROC_EXCEEDED; 667 else 668 current->flags &= ~PF_NPROC_EXCEEDED; 669 670 free_uid(new->user); 671 new->user = new_user; 672 return 0; 673 } 674 675 /* 676 * Unprivileged users may change the real uid to the effective uid 677 * or vice versa. (BSD-style) 678 * 679 * If you set the real uid at all, or set the effective uid to a value not 680 * equal to the real uid, then the saved uid is set to the new effective uid. 681 * 682 * This makes it possible for a setuid program to completely drop its 683 * privileges, which is often a useful assertion to make when you are doing 684 * a security audit over a program. 685 * 686 * The general idea is that a program which uses just setreuid() will be 687 * 100% compatible with BSD. A program which uses just setuid() will be 688 * 100% compatible with POSIX with saved IDs. 689 */ 690 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 691 { 692 struct user_namespace *ns = current_user_ns(); 693 const struct cred *old; 694 struct cred *new; 695 int retval; 696 kuid_t kruid, keuid; 697 698 kruid = make_kuid(ns, ruid); 699 keuid = make_kuid(ns, euid); 700 701 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 702 return -EINVAL; 703 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 704 return -EINVAL; 705 706 new = prepare_creds(); 707 if (!new) 708 return -ENOMEM; 709 old = current_cred(); 710 711 retval = -EPERM; 712 if (ruid != (uid_t) -1) { 713 new->uid = kruid; 714 if (!uid_eq(old->uid, kruid) && 715 !uid_eq(old->euid, kruid) && 716 !nsown_capable(CAP_SETUID)) 717 goto error; 718 } 719 720 if (euid != (uid_t) -1) { 721 new->euid = keuid; 722 if (!uid_eq(old->uid, keuid) && 723 !uid_eq(old->euid, keuid) && 724 !uid_eq(old->suid, keuid) && 725 !nsown_capable(CAP_SETUID)) 726 goto error; 727 } 728 729 if (!uid_eq(new->uid, old->uid)) { 730 retval = set_user(new); 731 if (retval < 0) 732 goto error; 733 } 734 if (ruid != (uid_t) -1 || 735 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 736 new->suid = new->euid; 737 new->fsuid = new->euid; 738 739 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 740 if (retval < 0) 741 goto error; 742 743 return commit_creds(new); 744 745 error: 746 abort_creds(new); 747 return retval; 748 } 749 750 /* 751 * setuid() is implemented like SysV with SAVED_IDS 752 * 753 * Note that SAVED_ID's is deficient in that a setuid root program 754 * like sendmail, for example, cannot set its uid to be a normal 755 * user and then switch back, because if you're root, setuid() sets 756 * the saved uid too. If you don't like this, blame the bright people 757 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 758 * will allow a root program to temporarily drop privileges and be able to 759 * regain them by swapping the real and effective uid. 760 */ 761 SYSCALL_DEFINE1(setuid, uid_t, uid) 762 { 763 struct user_namespace *ns = current_user_ns(); 764 const struct cred *old; 765 struct cred *new; 766 int retval; 767 kuid_t kuid; 768 769 kuid = make_kuid(ns, uid); 770 if (!uid_valid(kuid)) 771 return -EINVAL; 772 773 new = prepare_creds(); 774 if (!new) 775 return -ENOMEM; 776 old = current_cred(); 777 778 retval = -EPERM; 779 if (nsown_capable(CAP_SETUID)) { 780 new->suid = new->uid = kuid; 781 if (!uid_eq(kuid, old->uid)) { 782 retval = set_user(new); 783 if (retval < 0) 784 goto error; 785 } 786 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 787 goto error; 788 } 789 790 new->fsuid = new->euid = kuid; 791 792 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 793 if (retval < 0) 794 goto error; 795 796 return commit_creds(new); 797 798 error: 799 abort_creds(new); 800 return retval; 801 } 802 803 804 /* 805 * This function implements a generic ability to update ruid, euid, 806 * and suid. This allows you to implement the 4.4 compatible seteuid(). 807 */ 808 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 809 { 810 struct user_namespace *ns = current_user_ns(); 811 const struct cred *old; 812 struct cred *new; 813 int retval; 814 kuid_t kruid, keuid, ksuid; 815 816 kruid = make_kuid(ns, ruid); 817 keuid = make_kuid(ns, euid); 818 ksuid = make_kuid(ns, suid); 819 820 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 821 return -EINVAL; 822 823 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 824 return -EINVAL; 825 826 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 827 return -EINVAL; 828 829 new = prepare_creds(); 830 if (!new) 831 return -ENOMEM; 832 833 old = current_cred(); 834 835 retval = -EPERM; 836 if (!nsown_capable(CAP_SETUID)) { 837 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 838 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 839 goto error; 840 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 841 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 842 goto error; 843 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 844 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 845 goto error; 846 } 847 848 if (ruid != (uid_t) -1) { 849 new->uid = kruid; 850 if (!uid_eq(kruid, old->uid)) { 851 retval = set_user(new); 852 if (retval < 0) 853 goto error; 854 } 855 } 856 if (euid != (uid_t) -1) 857 new->euid = keuid; 858 if (suid != (uid_t) -1) 859 new->suid = ksuid; 860 new->fsuid = new->euid; 861 862 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 863 if (retval < 0) 864 goto error; 865 866 return commit_creds(new); 867 868 error: 869 abort_creds(new); 870 return retval; 871 } 872 873 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 874 { 875 const struct cred *cred = current_cred(); 876 int retval; 877 uid_t ruid, euid, suid; 878 879 ruid = from_kuid_munged(cred->user_ns, cred->uid); 880 euid = from_kuid_munged(cred->user_ns, cred->euid); 881 suid = from_kuid_munged(cred->user_ns, cred->suid); 882 883 if (!(retval = put_user(ruid, ruidp)) && 884 !(retval = put_user(euid, euidp))) 885 retval = put_user(suid, suidp); 886 887 return retval; 888 } 889 890 /* 891 * Same as above, but for rgid, egid, sgid. 892 */ 893 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 894 { 895 struct user_namespace *ns = current_user_ns(); 896 const struct cred *old; 897 struct cred *new; 898 int retval; 899 kgid_t krgid, kegid, ksgid; 900 901 krgid = make_kgid(ns, rgid); 902 kegid = make_kgid(ns, egid); 903 ksgid = make_kgid(ns, sgid); 904 905 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 906 return -EINVAL; 907 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 908 return -EINVAL; 909 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 910 return -EINVAL; 911 912 new = prepare_creds(); 913 if (!new) 914 return -ENOMEM; 915 old = current_cred(); 916 917 retval = -EPERM; 918 if (!nsown_capable(CAP_SETGID)) { 919 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 920 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 921 goto error; 922 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 923 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 924 goto error; 925 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 926 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 927 goto error; 928 } 929 930 if (rgid != (gid_t) -1) 931 new->gid = krgid; 932 if (egid != (gid_t) -1) 933 new->egid = kegid; 934 if (sgid != (gid_t) -1) 935 new->sgid = ksgid; 936 new->fsgid = new->egid; 937 938 return commit_creds(new); 939 940 error: 941 abort_creds(new); 942 return retval; 943 } 944 945 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 946 { 947 const struct cred *cred = current_cred(); 948 int retval; 949 gid_t rgid, egid, sgid; 950 951 rgid = from_kgid_munged(cred->user_ns, cred->gid); 952 egid = from_kgid_munged(cred->user_ns, cred->egid); 953 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 954 955 if (!(retval = put_user(rgid, rgidp)) && 956 !(retval = put_user(egid, egidp))) 957 retval = put_user(sgid, sgidp); 958 959 return retval; 960 } 961 962 963 /* 964 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 965 * is used for "access()" and for the NFS daemon (letting nfsd stay at 966 * whatever uid it wants to). It normally shadows "euid", except when 967 * explicitly set by setfsuid() or for access.. 968 */ 969 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 970 { 971 const struct cred *old; 972 struct cred *new; 973 uid_t old_fsuid; 974 kuid_t kuid; 975 976 old = current_cred(); 977 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 978 979 kuid = make_kuid(old->user_ns, uid); 980 if (!uid_valid(kuid)) 981 return old_fsuid; 982 983 new = prepare_creds(); 984 if (!new) 985 return old_fsuid; 986 987 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 988 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 989 nsown_capable(CAP_SETUID)) { 990 if (!uid_eq(kuid, old->fsuid)) { 991 new->fsuid = kuid; 992 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 993 goto change_okay; 994 } 995 } 996 997 abort_creds(new); 998 return old_fsuid; 999 1000 change_okay: 1001 commit_creds(new); 1002 return old_fsuid; 1003 } 1004 1005 /* 1006 * Samma pÃ¥ svenska.. 1007 */ 1008 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 1009 { 1010 const struct cred *old; 1011 struct cred *new; 1012 gid_t old_fsgid; 1013 kgid_t kgid; 1014 1015 old = current_cred(); 1016 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 1017 1018 kgid = make_kgid(old->user_ns, gid); 1019 if (!gid_valid(kgid)) 1020 return old_fsgid; 1021 1022 new = prepare_creds(); 1023 if (!new) 1024 return old_fsgid; 1025 1026 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 1027 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 1028 nsown_capable(CAP_SETGID)) { 1029 if (!gid_eq(kgid, old->fsgid)) { 1030 new->fsgid = kgid; 1031 goto change_okay; 1032 } 1033 } 1034 1035 abort_creds(new); 1036 return old_fsgid; 1037 1038 change_okay: 1039 commit_creds(new); 1040 return old_fsgid; 1041 } 1042 1043 void do_sys_times(struct tms *tms) 1044 { 1045 cputime_t tgutime, tgstime, cutime, cstime; 1046 1047 spin_lock_irq(¤t->sighand->siglock); 1048 thread_group_times(current, &tgutime, &tgstime); 1049 cutime = current->signal->cutime; 1050 cstime = current->signal->cstime; 1051 spin_unlock_irq(¤t->sighand->siglock); 1052 tms->tms_utime = cputime_to_clock_t(tgutime); 1053 tms->tms_stime = cputime_to_clock_t(tgstime); 1054 tms->tms_cutime = cputime_to_clock_t(cutime); 1055 tms->tms_cstime = cputime_to_clock_t(cstime); 1056 } 1057 1058 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1059 { 1060 if (tbuf) { 1061 struct tms tmp; 1062 1063 do_sys_times(&tmp); 1064 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1065 return -EFAULT; 1066 } 1067 force_successful_syscall_return(); 1068 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1069 } 1070 1071 /* 1072 * This needs some heavy checking ... 1073 * I just haven't the stomach for it. I also don't fully 1074 * understand sessions/pgrp etc. Let somebody who does explain it. 1075 * 1076 * OK, I think I have the protection semantics right.... this is really 1077 * only important on a multi-user system anyway, to make sure one user 1078 * can't send a signal to a process owned by another. -TYT, 12/12/91 1079 * 1080 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1081 * LBT 04.03.94 1082 */ 1083 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1084 { 1085 struct task_struct *p; 1086 struct task_struct *group_leader = current->group_leader; 1087 struct pid *pgrp; 1088 int err; 1089 1090 if (!pid) 1091 pid = task_pid_vnr(group_leader); 1092 if (!pgid) 1093 pgid = pid; 1094 if (pgid < 0) 1095 return -EINVAL; 1096 rcu_read_lock(); 1097 1098 /* From this point forward we keep holding onto the tasklist lock 1099 * so that our parent does not change from under us. -DaveM 1100 */ 1101 write_lock_irq(&tasklist_lock); 1102 1103 err = -ESRCH; 1104 p = find_task_by_vpid(pid); 1105 if (!p) 1106 goto out; 1107 1108 err = -EINVAL; 1109 if (!thread_group_leader(p)) 1110 goto out; 1111 1112 if (same_thread_group(p->real_parent, group_leader)) { 1113 err = -EPERM; 1114 if (task_session(p) != task_session(group_leader)) 1115 goto out; 1116 err = -EACCES; 1117 if (p->did_exec) 1118 goto out; 1119 } else { 1120 err = -ESRCH; 1121 if (p != group_leader) 1122 goto out; 1123 } 1124 1125 err = -EPERM; 1126 if (p->signal->leader) 1127 goto out; 1128 1129 pgrp = task_pid(p); 1130 if (pgid != pid) { 1131 struct task_struct *g; 1132 1133 pgrp = find_vpid(pgid); 1134 g = pid_task(pgrp, PIDTYPE_PGID); 1135 if (!g || task_session(g) != task_session(group_leader)) 1136 goto out; 1137 } 1138 1139 err = security_task_setpgid(p, pgid); 1140 if (err) 1141 goto out; 1142 1143 if (task_pgrp(p) != pgrp) 1144 change_pid(p, PIDTYPE_PGID, pgrp); 1145 1146 err = 0; 1147 out: 1148 /* All paths lead to here, thus we are safe. -DaveM */ 1149 write_unlock_irq(&tasklist_lock); 1150 rcu_read_unlock(); 1151 return err; 1152 } 1153 1154 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1155 { 1156 struct task_struct *p; 1157 struct pid *grp; 1158 int retval; 1159 1160 rcu_read_lock(); 1161 if (!pid) 1162 grp = task_pgrp(current); 1163 else { 1164 retval = -ESRCH; 1165 p = find_task_by_vpid(pid); 1166 if (!p) 1167 goto out; 1168 grp = task_pgrp(p); 1169 if (!grp) 1170 goto out; 1171 1172 retval = security_task_getpgid(p); 1173 if (retval) 1174 goto out; 1175 } 1176 retval = pid_vnr(grp); 1177 out: 1178 rcu_read_unlock(); 1179 return retval; 1180 } 1181 1182 #ifdef __ARCH_WANT_SYS_GETPGRP 1183 1184 SYSCALL_DEFINE0(getpgrp) 1185 { 1186 return sys_getpgid(0); 1187 } 1188 1189 #endif 1190 1191 SYSCALL_DEFINE1(getsid, pid_t, pid) 1192 { 1193 struct task_struct *p; 1194 struct pid *sid; 1195 int retval; 1196 1197 rcu_read_lock(); 1198 if (!pid) 1199 sid = task_session(current); 1200 else { 1201 retval = -ESRCH; 1202 p = find_task_by_vpid(pid); 1203 if (!p) 1204 goto out; 1205 sid = task_session(p); 1206 if (!sid) 1207 goto out; 1208 1209 retval = security_task_getsid(p); 1210 if (retval) 1211 goto out; 1212 } 1213 retval = pid_vnr(sid); 1214 out: 1215 rcu_read_unlock(); 1216 return retval; 1217 } 1218 1219 SYSCALL_DEFINE0(setsid) 1220 { 1221 struct task_struct *group_leader = current->group_leader; 1222 struct pid *sid = task_pid(group_leader); 1223 pid_t session = pid_vnr(sid); 1224 int err = -EPERM; 1225 1226 write_lock_irq(&tasklist_lock); 1227 /* Fail if I am already a session leader */ 1228 if (group_leader->signal->leader) 1229 goto out; 1230 1231 /* Fail if a process group id already exists that equals the 1232 * proposed session id. 1233 */ 1234 if (pid_task(sid, PIDTYPE_PGID)) 1235 goto out; 1236 1237 group_leader->signal->leader = 1; 1238 __set_special_pids(sid); 1239 1240 proc_clear_tty(group_leader); 1241 1242 err = session; 1243 out: 1244 write_unlock_irq(&tasklist_lock); 1245 if (err > 0) { 1246 proc_sid_connector(group_leader); 1247 sched_autogroup_create_attach(group_leader); 1248 } 1249 return err; 1250 } 1251 1252 DECLARE_RWSEM(uts_sem); 1253 1254 #ifdef COMPAT_UTS_MACHINE 1255 #define override_architecture(name) \ 1256 (personality(current->personality) == PER_LINUX32 && \ 1257 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1258 sizeof(COMPAT_UTS_MACHINE))) 1259 #else 1260 #define override_architecture(name) 0 1261 #endif 1262 1263 /* 1264 * Work around broken programs that cannot handle "Linux 3.0". 1265 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1266 */ 1267 static int override_release(char __user *release, int len) 1268 { 1269 int ret = 0; 1270 char buf[65]; 1271 1272 if (current->personality & UNAME26) { 1273 char *rest = UTS_RELEASE; 1274 int ndots = 0; 1275 unsigned v; 1276 1277 while (*rest) { 1278 if (*rest == '.' && ++ndots >= 3) 1279 break; 1280 if (!isdigit(*rest) && *rest != '.') 1281 break; 1282 rest++; 1283 } 1284 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1285 snprintf(buf, len, "2.6.%u%s", v, rest); 1286 ret = copy_to_user(release, buf, len); 1287 } 1288 return ret; 1289 } 1290 1291 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1292 { 1293 int errno = 0; 1294 1295 down_read(&uts_sem); 1296 if (copy_to_user(name, utsname(), sizeof *name)) 1297 errno = -EFAULT; 1298 up_read(&uts_sem); 1299 1300 if (!errno && override_release(name->release, sizeof(name->release))) 1301 errno = -EFAULT; 1302 if (!errno && override_architecture(name)) 1303 errno = -EFAULT; 1304 return errno; 1305 } 1306 1307 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1308 /* 1309 * Old cruft 1310 */ 1311 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1312 { 1313 int error = 0; 1314 1315 if (!name) 1316 return -EFAULT; 1317 1318 down_read(&uts_sem); 1319 if (copy_to_user(name, utsname(), sizeof(*name))) 1320 error = -EFAULT; 1321 up_read(&uts_sem); 1322 1323 if (!error && override_release(name->release, sizeof(name->release))) 1324 error = -EFAULT; 1325 if (!error && override_architecture(name)) 1326 error = -EFAULT; 1327 return error; 1328 } 1329 1330 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1331 { 1332 int error; 1333 1334 if (!name) 1335 return -EFAULT; 1336 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1337 return -EFAULT; 1338 1339 down_read(&uts_sem); 1340 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1341 __OLD_UTS_LEN); 1342 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1343 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1344 __OLD_UTS_LEN); 1345 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1346 error |= __copy_to_user(&name->release, &utsname()->release, 1347 __OLD_UTS_LEN); 1348 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1349 error |= __copy_to_user(&name->version, &utsname()->version, 1350 __OLD_UTS_LEN); 1351 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1352 error |= __copy_to_user(&name->machine, &utsname()->machine, 1353 __OLD_UTS_LEN); 1354 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1355 up_read(&uts_sem); 1356 1357 if (!error && override_architecture(name)) 1358 error = -EFAULT; 1359 if (!error && override_release(name->release, sizeof(name->release))) 1360 error = -EFAULT; 1361 return error ? -EFAULT : 0; 1362 } 1363 #endif 1364 1365 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1366 { 1367 int errno; 1368 char tmp[__NEW_UTS_LEN]; 1369 1370 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1371 return -EPERM; 1372 1373 if (len < 0 || len > __NEW_UTS_LEN) 1374 return -EINVAL; 1375 down_write(&uts_sem); 1376 errno = -EFAULT; 1377 if (!copy_from_user(tmp, name, len)) { 1378 struct new_utsname *u = utsname(); 1379 1380 memcpy(u->nodename, tmp, len); 1381 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1382 errno = 0; 1383 uts_proc_notify(UTS_PROC_HOSTNAME); 1384 } 1385 up_write(&uts_sem); 1386 return errno; 1387 } 1388 1389 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1390 1391 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1392 { 1393 int i, errno; 1394 struct new_utsname *u; 1395 1396 if (len < 0) 1397 return -EINVAL; 1398 down_read(&uts_sem); 1399 u = utsname(); 1400 i = 1 + strlen(u->nodename); 1401 if (i > len) 1402 i = len; 1403 errno = 0; 1404 if (copy_to_user(name, u->nodename, i)) 1405 errno = -EFAULT; 1406 up_read(&uts_sem); 1407 return errno; 1408 } 1409 1410 #endif 1411 1412 /* 1413 * Only setdomainname; getdomainname can be implemented by calling 1414 * uname() 1415 */ 1416 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1417 { 1418 int errno; 1419 char tmp[__NEW_UTS_LEN]; 1420 1421 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1422 return -EPERM; 1423 if (len < 0 || len > __NEW_UTS_LEN) 1424 return -EINVAL; 1425 1426 down_write(&uts_sem); 1427 errno = -EFAULT; 1428 if (!copy_from_user(tmp, name, len)) { 1429 struct new_utsname *u = utsname(); 1430 1431 memcpy(u->domainname, tmp, len); 1432 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1433 errno = 0; 1434 uts_proc_notify(UTS_PROC_DOMAINNAME); 1435 } 1436 up_write(&uts_sem); 1437 return errno; 1438 } 1439 1440 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1441 { 1442 struct rlimit value; 1443 int ret; 1444 1445 ret = do_prlimit(current, resource, NULL, &value); 1446 if (!ret) 1447 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1448 1449 return ret; 1450 } 1451 1452 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1453 1454 /* 1455 * Back compatibility for getrlimit. Needed for some apps. 1456 */ 1457 1458 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1459 struct rlimit __user *, rlim) 1460 { 1461 struct rlimit x; 1462 if (resource >= RLIM_NLIMITS) 1463 return -EINVAL; 1464 1465 task_lock(current->group_leader); 1466 x = current->signal->rlim[resource]; 1467 task_unlock(current->group_leader); 1468 if (x.rlim_cur > 0x7FFFFFFF) 1469 x.rlim_cur = 0x7FFFFFFF; 1470 if (x.rlim_max > 0x7FFFFFFF) 1471 x.rlim_max = 0x7FFFFFFF; 1472 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1473 } 1474 1475 #endif 1476 1477 static inline bool rlim64_is_infinity(__u64 rlim64) 1478 { 1479 #if BITS_PER_LONG < 64 1480 return rlim64 >= ULONG_MAX; 1481 #else 1482 return rlim64 == RLIM64_INFINITY; 1483 #endif 1484 } 1485 1486 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1487 { 1488 if (rlim->rlim_cur == RLIM_INFINITY) 1489 rlim64->rlim_cur = RLIM64_INFINITY; 1490 else 1491 rlim64->rlim_cur = rlim->rlim_cur; 1492 if (rlim->rlim_max == RLIM_INFINITY) 1493 rlim64->rlim_max = RLIM64_INFINITY; 1494 else 1495 rlim64->rlim_max = rlim->rlim_max; 1496 } 1497 1498 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1499 { 1500 if (rlim64_is_infinity(rlim64->rlim_cur)) 1501 rlim->rlim_cur = RLIM_INFINITY; 1502 else 1503 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1504 if (rlim64_is_infinity(rlim64->rlim_max)) 1505 rlim->rlim_max = RLIM_INFINITY; 1506 else 1507 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1508 } 1509 1510 /* make sure you are allowed to change @tsk limits before calling this */ 1511 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1512 struct rlimit *new_rlim, struct rlimit *old_rlim) 1513 { 1514 struct rlimit *rlim; 1515 int retval = 0; 1516 1517 if (resource >= RLIM_NLIMITS) 1518 return -EINVAL; 1519 if (new_rlim) { 1520 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1521 return -EINVAL; 1522 if (resource == RLIMIT_NOFILE && 1523 new_rlim->rlim_max > sysctl_nr_open) 1524 return -EPERM; 1525 } 1526 1527 /* protect tsk->signal and tsk->sighand from disappearing */ 1528 read_lock(&tasklist_lock); 1529 if (!tsk->sighand) { 1530 retval = -ESRCH; 1531 goto out; 1532 } 1533 1534 rlim = tsk->signal->rlim + resource; 1535 task_lock(tsk->group_leader); 1536 if (new_rlim) { 1537 /* Keep the capable check against init_user_ns until 1538 cgroups can contain all limits */ 1539 if (new_rlim->rlim_max > rlim->rlim_max && 1540 !capable(CAP_SYS_RESOURCE)) 1541 retval = -EPERM; 1542 if (!retval) 1543 retval = security_task_setrlimit(tsk->group_leader, 1544 resource, new_rlim); 1545 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1546 /* 1547 * The caller is asking for an immediate RLIMIT_CPU 1548 * expiry. But we use the zero value to mean "it was 1549 * never set". So let's cheat and make it one second 1550 * instead 1551 */ 1552 new_rlim->rlim_cur = 1; 1553 } 1554 } 1555 if (!retval) { 1556 if (old_rlim) 1557 *old_rlim = *rlim; 1558 if (new_rlim) 1559 *rlim = *new_rlim; 1560 } 1561 task_unlock(tsk->group_leader); 1562 1563 /* 1564 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1565 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1566 * very long-standing error, and fixing it now risks breakage of 1567 * applications, so we live with it 1568 */ 1569 if (!retval && new_rlim && resource == RLIMIT_CPU && 1570 new_rlim->rlim_cur != RLIM_INFINITY) 1571 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1572 out: 1573 read_unlock(&tasklist_lock); 1574 return retval; 1575 } 1576 1577 /* rcu lock must be held */ 1578 static int check_prlimit_permission(struct task_struct *task) 1579 { 1580 const struct cred *cred = current_cred(), *tcred; 1581 1582 if (current == task) 1583 return 0; 1584 1585 tcred = __task_cred(task); 1586 if (uid_eq(cred->uid, tcred->euid) && 1587 uid_eq(cred->uid, tcred->suid) && 1588 uid_eq(cred->uid, tcred->uid) && 1589 gid_eq(cred->gid, tcred->egid) && 1590 gid_eq(cred->gid, tcred->sgid) && 1591 gid_eq(cred->gid, tcred->gid)) 1592 return 0; 1593 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1594 return 0; 1595 1596 return -EPERM; 1597 } 1598 1599 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1600 const struct rlimit64 __user *, new_rlim, 1601 struct rlimit64 __user *, old_rlim) 1602 { 1603 struct rlimit64 old64, new64; 1604 struct rlimit old, new; 1605 struct task_struct *tsk; 1606 int ret; 1607 1608 if (new_rlim) { 1609 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1610 return -EFAULT; 1611 rlim64_to_rlim(&new64, &new); 1612 } 1613 1614 rcu_read_lock(); 1615 tsk = pid ? find_task_by_vpid(pid) : current; 1616 if (!tsk) { 1617 rcu_read_unlock(); 1618 return -ESRCH; 1619 } 1620 ret = check_prlimit_permission(tsk); 1621 if (ret) { 1622 rcu_read_unlock(); 1623 return ret; 1624 } 1625 get_task_struct(tsk); 1626 rcu_read_unlock(); 1627 1628 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1629 old_rlim ? &old : NULL); 1630 1631 if (!ret && old_rlim) { 1632 rlim_to_rlim64(&old, &old64); 1633 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1634 ret = -EFAULT; 1635 } 1636 1637 put_task_struct(tsk); 1638 return ret; 1639 } 1640 1641 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1642 { 1643 struct rlimit new_rlim; 1644 1645 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1646 return -EFAULT; 1647 return do_prlimit(current, resource, &new_rlim, NULL); 1648 } 1649 1650 /* 1651 * It would make sense to put struct rusage in the task_struct, 1652 * except that would make the task_struct be *really big*. After 1653 * task_struct gets moved into malloc'ed memory, it would 1654 * make sense to do this. It will make moving the rest of the information 1655 * a lot simpler! (Which we're not doing right now because we're not 1656 * measuring them yet). 1657 * 1658 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1659 * races with threads incrementing their own counters. But since word 1660 * reads are atomic, we either get new values or old values and we don't 1661 * care which for the sums. We always take the siglock to protect reading 1662 * the c* fields from p->signal from races with exit.c updating those 1663 * fields when reaping, so a sample either gets all the additions of a 1664 * given child after it's reaped, or none so this sample is before reaping. 1665 * 1666 * Locking: 1667 * We need to take the siglock for CHILDEREN, SELF and BOTH 1668 * for the cases current multithreaded, non-current single threaded 1669 * non-current multithreaded. Thread traversal is now safe with 1670 * the siglock held. 1671 * Strictly speaking, we donot need to take the siglock if we are current and 1672 * single threaded, as no one else can take our signal_struct away, no one 1673 * else can reap the children to update signal->c* counters, and no one else 1674 * can race with the signal-> fields. If we do not take any lock, the 1675 * signal-> fields could be read out of order while another thread was just 1676 * exiting. So we should place a read memory barrier when we avoid the lock. 1677 * On the writer side, write memory barrier is implied in __exit_signal 1678 * as __exit_signal releases the siglock spinlock after updating the signal-> 1679 * fields. But we don't do this yet to keep things simple. 1680 * 1681 */ 1682 1683 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1684 { 1685 r->ru_nvcsw += t->nvcsw; 1686 r->ru_nivcsw += t->nivcsw; 1687 r->ru_minflt += t->min_flt; 1688 r->ru_majflt += t->maj_flt; 1689 r->ru_inblock += task_io_get_inblock(t); 1690 r->ru_oublock += task_io_get_oublock(t); 1691 } 1692 1693 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1694 { 1695 struct task_struct *t; 1696 unsigned long flags; 1697 cputime_t tgutime, tgstime, utime, stime; 1698 unsigned long maxrss = 0; 1699 1700 memset((char *) r, 0, sizeof *r); 1701 utime = stime = 0; 1702 1703 if (who == RUSAGE_THREAD) { 1704 task_times(current, &utime, &stime); 1705 accumulate_thread_rusage(p, r); 1706 maxrss = p->signal->maxrss; 1707 goto out; 1708 } 1709 1710 if (!lock_task_sighand(p, &flags)) 1711 return; 1712 1713 switch (who) { 1714 case RUSAGE_BOTH: 1715 case RUSAGE_CHILDREN: 1716 utime = p->signal->cutime; 1717 stime = p->signal->cstime; 1718 r->ru_nvcsw = p->signal->cnvcsw; 1719 r->ru_nivcsw = p->signal->cnivcsw; 1720 r->ru_minflt = p->signal->cmin_flt; 1721 r->ru_majflt = p->signal->cmaj_flt; 1722 r->ru_inblock = p->signal->cinblock; 1723 r->ru_oublock = p->signal->coublock; 1724 maxrss = p->signal->cmaxrss; 1725 1726 if (who == RUSAGE_CHILDREN) 1727 break; 1728 1729 case RUSAGE_SELF: 1730 thread_group_times(p, &tgutime, &tgstime); 1731 utime += tgutime; 1732 stime += tgstime; 1733 r->ru_nvcsw += p->signal->nvcsw; 1734 r->ru_nivcsw += p->signal->nivcsw; 1735 r->ru_minflt += p->signal->min_flt; 1736 r->ru_majflt += p->signal->maj_flt; 1737 r->ru_inblock += p->signal->inblock; 1738 r->ru_oublock += p->signal->oublock; 1739 if (maxrss < p->signal->maxrss) 1740 maxrss = p->signal->maxrss; 1741 t = p; 1742 do { 1743 accumulate_thread_rusage(t, r); 1744 t = next_thread(t); 1745 } while (t != p); 1746 break; 1747 1748 default: 1749 BUG(); 1750 } 1751 unlock_task_sighand(p, &flags); 1752 1753 out: 1754 cputime_to_timeval(utime, &r->ru_utime); 1755 cputime_to_timeval(stime, &r->ru_stime); 1756 1757 if (who != RUSAGE_CHILDREN) { 1758 struct mm_struct *mm = get_task_mm(p); 1759 if (mm) { 1760 setmax_mm_hiwater_rss(&maxrss, mm); 1761 mmput(mm); 1762 } 1763 } 1764 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1765 } 1766 1767 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1768 { 1769 struct rusage r; 1770 k_getrusage(p, who, &r); 1771 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1772 } 1773 1774 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1775 { 1776 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1777 who != RUSAGE_THREAD) 1778 return -EINVAL; 1779 return getrusage(current, who, ru); 1780 } 1781 1782 SYSCALL_DEFINE1(umask, int, mask) 1783 { 1784 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1785 return mask; 1786 } 1787 1788 #ifdef CONFIG_CHECKPOINT_RESTORE 1789 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1790 { 1791 struct file *exe_file; 1792 struct dentry *dentry; 1793 int err; 1794 1795 exe_file = fget(fd); 1796 if (!exe_file) 1797 return -EBADF; 1798 1799 dentry = exe_file->f_path.dentry; 1800 1801 /* 1802 * Because the original mm->exe_file points to executable file, make 1803 * sure that this one is executable as well, to avoid breaking an 1804 * overall picture. 1805 */ 1806 err = -EACCES; 1807 if (!S_ISREG(dentry->d_inode->i_mode) || 1808 exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1809 goto exit; 1810 1811 err = inode_permission(dentry->d_inode, MAY_EXEC); 1812 if (err) 1813 goto exit; 1814 1815 down_write(&mm->mmap_sem); 1816 1817 /* 1818 * Forbid mm->exe_file change if old file still mapped. 1819 */ 1820 err = -EBUSY; 1821 if (mm->exe_file) { 1822 struct vm_area_struct *vma; 1823 1824 for (vma = mm->mmap; vma; vma = vma->vm_next) 1825 if (vma->vm_file && 1826 path_equal(&vma->vm_file->f_path, 1827 &mm->exe_file->f_path)) 1828 goto exit_unlock; 1829 } 1830 1831 /* 1832 * The symlink can be changed only once, just to disallow arbitrary 1833 * transitions malicious software might bring in. This means one 1834 * could make a snapshot over all processes running and monitor 1835 * /proc/pid/exe changes to notice unusual activity if needed. 1836 */ 1837 err = -EPERM; 1838 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1839 goto exit_unlock; 1840 1841 err = 0; 1842 set_mm_exe_file(mm, exe_file); 1843 exit_unlock: 1844 up_write(&mm->mmap_sem); 1845 1846 exit: 1847 fput(exe_file); 1848 return err; 1849 } 1850 1851 static int prctl_set_mm(int opt, unsigned long addr, 1852 unsigned long arg4, unsigned long arg5) 1853 { 1854 unsigned long rlim = rlimit(RLIMIT_DATA); 1855 struct mm_struct *mm = current->mm; 1856 struct vm_area_struct *vma; 1857 int error; 1858 1859 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1860 return -EINVAL; 1861 1862 if (!capable(CAP_SYS_RESOURCE)) 1863 return -EPERM; 1864 1865 if (opt == PR_SET_MM_EXE_FILE) 1866 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1867 1868 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1869 return -EINVAL; 1870 1871 error = -EINVAL; 1872 1873 down_read(&mm->mmap_sem); 1874 vma = find_vma(mm, addr); 1875 1876 switch (opt) { 1877 case PR_SET_MM_START_CODE: 1878 mm->start_code = addr; 1879 break; 1880 case PR_SET_MM_END_CODE: 1881 mm->end_code = addr; 1882 break; 1883 case PR_SET_MM_START_DATA: 1884 mm->start_data = addr; 1885 break; 1886 case PR_SET_MM_END_DATA: 1887 mm->end_data = addr; 1888 break; 1889 1890 case PR_SET_MM_START_BRK: 1891 if (addr <= mm->end_data) 1892 goto out; 1893 1894 if (rlim < RLIM_INFINITY && 1895 (mm->brk - addr) + 1896 (mm->end_data - mm->start_data) > rlim) 1897 goto out; 1898 1899 mm->start_brk = addr; 1900 break; 1901 1902 case PR_SET_MM_BRK: 1903 if (addr <= mm->end_data) 1904 goto out; 1905 1906 if (rlim < RLIM_INFINITY && 1907 (addr - mm->start_brk) + 1908 (mm->end_data - mm->start_data) > rlim) 1909 goto out; 1910 1911 mm->brk = addr; 1912 break; 1913 1914 /* 1915 * If command line arguments and environment 1916 * are placed somewhere else on stack, we can 1917 * set them up here, ARG_START/END to setup 1918 * command line argumets and ENV_START/END 1919 * for environment. 1920 */ 1921 case PR_SET_MM_START_STACK: 1922 case PR_SET_MM_ARG_START: 1923 case PR_SET_MM_ARG_END: 1924 case PR_SET_MM_ENV_START: 1925 case PR_SET_MM_ENV_END: 1926 if (!vma) { 1927 error = -EFAULT; 1928 goto out; 1929 } 1930 if (opt == PR_SET_MM_START_STACK) 1931 mm->start_stack = addr; 1932 else if (opt == PR_SET_MM_ARG_START) 1933 mm->arg_start = addr; 1934 else if (opt == PR_SET_MM_ARG_END) 1935 mm->arg_end = addr; 1936 else if (opt == PR_SET_MM_ENV_START) 1937 mm->env_start = addr; 1938 else if (opt == PR_SET_MM_ENV_END) 1939 mm->env_end = addr; 1940 break; 1941 1942 /* 1943 * This doesn't move auxiliary vector itself 1944 * since it's pinned to mm_struct, but allow 1945 * to fill vector with new values. It's up 1946 * to a caller to provide sane values here 1947 * otherwise user space tools which use this 1948 * vector might be unhappy. 1949 */ 1950 case PR_SET_MM_AUXV: { 1951 unsigned long user_auxv[AT_VECTOR_SIZE]; 1952 1953 if (arg4 > sizeof(user_auxv)) 1954 goto out; 1955 up_read(&mm->mmap_sem); 1956 1957 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 1958 return -EFAULT; 1959 1960 /* Make sure the last entry is always AT_NULL */ 1961 user_auxv[AT_VECTOR_SIZE - 2] = 0; 1962 user_auxv[AT_VECTOR_SIZE - 1] = 0; 1963 1964 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1965 1966 task_lock(current); 1967 memcpy(mm->saved_auxv, user_auxv, arg4); 1968 task_unlock(current); 1969 1970 return 0; 1971 } 1972 default: 1973 goto out; 1974 } 1975 1976 error = 0; 1977 out: 1978 up_read(&mm->mmap_sem); 1979 return error; 1980 } 1981 1982 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1983 { 1984 return put_user(me->clear_child_tid, tid_addr); 1985 } 1986 1987 #else /* CONFIG_CHECKPOINT_RESTORE */ 1988 static int prctl_set_mm(int opt, unsigned long addr, 1989 unsigned long arg4, unsigned long arg5) 1990 { 1991 return -EINVAL; 1992 } 1993 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 1994 { 1995 return -EINVAL; 1996 } 1997 #endif 1998 1999 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2000 unsigned long, arg4, unsigned long, arg5) 2001 { 2002 struct task_struct *me = current; 2003 unsigned char comm[sizeof(me->comm)]; 2004 long error; 2005 2006 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2007 if (error != -ENOSYS) 2008 return error; 2009 2010 error = 0; 2011 switch (option) { 2012 case PR_SET_PDEATHSIG: 2013 if (!valid_signal(arg2)) { 2014 error = -EINVAL; 2015 break; 2016 } 2017 me->pdeath_signal = arg2; 2018 error = 0; 2019 break; 2020 case PR_GET_PDEATHSIG: 2021 error = put_user(me->pdeath_signal, (int __user *)arg2); 2022 break; 2023 case PR_GET_DUMPABLE: 2024 error = get_dumpable(me->mm); 2025 break; 2026 case PR_SET_DUMPABLE: 2027 if (arg2 < 0 || arg2 > 1) { 2028 error = -EINVAL; 2029 break; 2030 } 2031 set_dumpable(me->mm, arg2); 2032 error = 0; 2033 break; 2034 2035 case PR_SET_UNALIGN: 2036 error = SET_UNALIGN_CTL(me, arg2); 2037 break; 2038 case PR_GET_UNALIGN: 2039 error = GET_UNALIGN_CTL(me, arg2); 2040 break; 2041 case PR_SET_FPEMU: 2042 error = SET_FPEMU_CTL(me, arg2); 2043 break; 2044 case PR_GET_FPEMU: 2045 error = GET_FPEMU_CTL(me, arg2); 2046 break; 2047 case PR_SET_FPEXC: 2048 error = SET_FPEXC_CTL(me, arg2); 2049 break; 2050 case PR_GET_FPEXC: 2051 error = GET_FPEXC_CTL(me, arg2); 2052 break; 2053 case PR_GET_TIMING: 2054 error = PR_TIMING_STATISTICAL; 2055 break; 2056 case PR_SET_TIMING: 2057 if (arg2 != PR_TIMING_STATISTICAL) 2058 error = -EINVAL; 2059 else 2060 error = 0; 2061 break; 2062 2063 case PR_SET_NAME: 2064 comm[sizeof(me->comm)-1] = 0; 2065 if (strncpy_from_user(comm, (char __user *)arg2, 2066 sizeof(me->comm) - 1) < 0) 2067 return -EFAULT; 2068 set_task_comm(me, comm); 2069 proc_comm_connector(me); 2070 return 0; 2071 case PR_GET_NAME: 2072 get_task_comm(comm, me); 2073 if (copy_to_user((char __user *)arg2, comm, 2074 sizeof(comm))) 2075 return -EFAULT; 2076 return 0; 2077 case PR_GET_ENDIAN: 2078 error = GET_ENDIAN(me, arg2); 2079 break; 2080 case PR_SET_ENDIAN: 2081 error = SET_ENDIAN(me, arg2); 2082 break; 2083 2084 case PR_GET_SECCOMP: 2085 error = prctl_get_seccomp(); 2086 break; 2087 case PR_SET_SECCOMP: 2088 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2089 break; 2090 case PR_GET_TSC: 2091 error = GET_TSC_CTL(arg2); 2092 break; 2093 case PR_SET_TSC: 2094 error = SET_TSC_CTL(arg2); 2095 break; 2096 case PR_TASK_PERF_EVENTS_DISABLE: 2097 error = perf_event_task_disable(); 2098 break; 2099 case PR_TASK_PERF_EVENTS_ENABLE: 2100 error = perf_event_task_enable(); 2101 break; 2102 case PR_GET_TIMERSLACK: 2103 error = current->timer_slack_ns; 2104 break; 2105 case PR_SET_TIMERSLACK: 2106 if (arg2 <= 0) 2107 current->timer_slack_ns = 2108 current->default_timer_slack_ns; 2109 else 2110 current->timer_slack_ns = arg2; 2111 error = 0; 2112 break; 2113 case PR_MCE_KILL: 2114 if (arg4 | arg5) 2115 return -EINVAL; 2116 switch (arg2) { 2117 case PR_MCE_KILL_CLEAR: 2118 if (arg3 != 0) 2119 return -EINVAL; 2120 current->flags &= ~PF_MCE_PROCESS; 2121 break; 2122 case PR_MCE_KILL_SET: 2123 current->flags |= PF_MCE_PROCESS; 2124 if (arg3 == PR_MCE_KILL_EARLY) 2125 current->flags |= PF_MCE_EARLY; 2126 else if (arg3 == PR_MCE_KILL_LATE) 2127 current->flags &= ~PF_MCE_EARLY; 2128 else if (arg3 == PR_MCE_KILL_DEFAULT) 2129 current->flags &= 2130 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2131 else 2132 return -EINVAL; 2133 break; 2134 default: 2135 return -EINVAL; 2136 } 2137 error = 0; 2138 break; 2139 case PR_MCE_KILL_GET: 2140 if (arg2 | arg3 | arg4 | arg5) 2141 return -EINVAL; 2142 if (current->flags & PF_MCE_PROCESS) 2143 error = (current->flags & PF_MCE_EARLY) ? 2144 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2145 else 2146 error = PR_MCE_KILL_DEFAULT; 2147 break; 2148 case PR_SET_MM: 2149 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2150 break; 2151 case PR_GET_TID_ADDRESS: 2152 error = prctl_get_tid_address(me, (int __user **)arg2); 2153 break; 2154 case PR_SET_CHILD_SUBREAPER: 2155 me->signal->is_child_subreaper = !!arg2; 2156 error = 0; 2157 break; 2158 case PR_GET_CHILD_SUBREAPER: 2159 error = put_user(me->signal->is_child_subreaper, 2160 (int __user *) arg2); 2161 break; 2162 case PR_SET_NO_NEW_PRIVS: 2163 if (arg2 != 1 || arg3 || arg4 || arg5) 2164 return -EINVAL; 2165 2166 current->no_new_privs = 1; 2167 break; 2168 case PR_GET_NO_NEW_PRIVS: 2169 if (arg2 || arg3 || arg4 || arg5) 2170 return -EINVAL; 2171 return current->no_new_privs ? 1 : 0; 2172 default: 2173 error = -EINVAL; 2174 break; 2175 } 2176 return error; 2177 } 2178 2179 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2180 struct getcpu_cache __user *, unused) 2181 { 2182 int err = 0; 2183 int cpu = raw_smp_processor_id(); 2184 if (cpup) 2185 err |= put_user(cpu, cpup); 2186 if (nodep) 2187 err |= put_user(cpu_to_node(cpu), nodep); 2188 return err ? -EFAULT : 0; 2189 } 2190 2191 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2192 2193 static void argv_cleanup(struct subprocess_info *info) 2194 { 2195 argv_free(info->argv); 2196 } 2197 2198 /** 2199 * orderly_poweroff - Trigger an orderly system poweroff 2200 * @force: force poweroff if command execution fails 2201 * 2202 * This may be called from any context to trigger a system shutdown. 2203 * If the orderly shutdown fails, it will force an immediate shutdown. 2204 */ 2205 int orderly_poweroff(bool force) 2206 { 2207 int argc; 2208 char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 2209 static char *envp[] = { 2210 "HOME=/", 2211 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 2212 NULL 2213 }; 2214 int ret = -ENOMEM; 2215 2216 if (argv == NULL) { 2217 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2218 __func__, poweroff_cmd); 2219 goto out; 2220 } 2221 2222 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, 2223 NULL, argv_cleanup, NULL); 2224 out: 2225 if (likely(!ret)) 2226 return 0; 2227 2228 if (ret == -ENOMEM) 2229 argv_free(argv); 2230 2231 if (force) { 2232 printk(KERN_WARNING "Failed to start orderly shutdown: " 2233 "forcing the issue\n"); 2234 2235 /* I guess this should try to kick off some daemon to 2236 sync and poweroff asap. Or not even bother syncing 2237 if we're doing an emergency shutdown? */ 2238 emergency_sync(); 2239 kernel_power_off(); 2240 } 2241 2242 return ret; 2243 } 2244 EXPORT_SYMBOL_GPL(orderly_poweroff); 2245