1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/kexec.h> 20 #include <linux/workqueue.h> 21 #include <linux/capability.h> 22 #include <linux/device.h> 23 #include <linux/key.h> 24 #include <linux/times.h> 25 #include <linux/posix-timers.h> 26 #include <linux/security.h> 27 #include <linux/dcookies.h> 28 #include <linux/suspend.h> 29 #include <linux/tty.h> 30 #include <linux/signal.h> 31 #include <linux/cn_proc.h> 32 #include <linux/getcpu.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/seccomp.h> 35 #include <linux/cpu.h> 36 #include <linux/personality.h> 37 #include <linux/ptrace.h> 38 #include <linux/fs_struct.h> 39 #include <linux/file.h> 40 #include <linux/mount.h> 41 #include <linux/gfp.h> 42 #include <linux/syscore_ops.h> 43 #include <linux/version.h> 44 #include <linux/ctype.h> 45 46 #include <linux/compat.h> 47 #include <linux/syscalls.h> 48 #include <linux/kprobes.h> 49 #include <linux/user_namespace.h> 50 #include <linux/binfmts.h> 51 52 #include <linux/sched.h> 53 #include <linux/rcupdate.h> 54 #include <linux/uidgid.h> 55 #include <linux/cred.h> 56 57 #include <linux/kmsg_dump.h> 58 /* Move somewhere else to avoid recompiling? */ 59 #include <generated/utsrelease.h> 60 61 #include <asm/uaccess.h> 62 #include <asm/io.h> 63 #include <asm/unistd.h> 64 65 #ifndef SET_UNALIGN_CTL 66 # define SET_UNALIGN_CTL(a,b) (-EINVAL) 67 #endif 68 #ifndef GET_UNALIGN_CTL 69 # define GET_UNALIGN_CTL(a,b) (-EINVAL) 70 #endif 71 #ifndef SET_FPEMU_CTL 72 # define SET_FPEMU_CTL(a,b) (-EINVAL) 73 #endif 74 #ifndef GET_FPEMU_CTL 75 # define GET_FPEMU_CTL(a,b) (-EINVAL) 76 #endif 77 #ifndef SET_FPEXC_CTL 78 # define SET_FPEXC_CTL(a,b) (-EINVAL) 79 #endif 80 #ifndef GET_FPEXC_CTL 81 # define GET_FPEXC_CTL(a,b) (-EINVAL) 82 #endif 83 #ifndef GET_ENDIAN 84 # define GET_ENDIAN(a,b) (-EINVAL) 85 #endif 86 #ifndef SET_ENDIAN 87 # define SET_ENDIAN(a,b) (-EINVAL) 88 #endif 89 #ifndef GET_TSC_CTL 90 # define GET_TSC_CTL(a) (-EINVAL) 91 #endif 92 #ifndef SET_TSC_CTL 93 # define SET_TSC_CTL(a) (-EINVAL) 94 #endif 95 96 /* 97 * this is where the system-wide overflow UID and GID are defined, for 98 * architectures that now have 32-bit UID/GID but didn't in the past 99 */ 100 101 int overflowuid = DEFAULT_OVERFLOWUID; 102 int overflowgid = DEFAULT_OVERFLOWGID; 103 104 EXPORT_SYMBOL(overflowuid); 105 EXPORT_SYMBOL(overflowgid); 106 107 /* 108 * the same as above, but for filesystems which can only store a 16-bit 109 * UID and GID. as such, this is needed on all architectures 110 */ 111 112 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 113 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 114 115 EXPORT_SYMBOL(fs_overflowuid); 116 EXPORT_SYMBOL(fs_overflowgid); 117 118 /* 119 * this indicates whether you can reboot with ctrl-alt-del: the default is yes 120 */ 121 122 int C_A_D = 1; 123 struct pid *cad_pid; 124 EXPORT_SYMBOL(cad_pid); 125 126 /* 127 * If set, this is used for preparing the system to power off. 128 */ 129 130 void (*pm_power_off_prepare)(void); 131 132 /* 133 * Returns true if current's euid is same as p's uid or euid, 134 * or has CAP_SYS_NICE to p's user_ns. 135 * 136 * Called with rcu_read_lock, creds are safe 137 */ 138 static bool set_one_prio_perm(struct task_struct *p) 139 { 140 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 141 142 if (uid_eq(pcred->uid, cred->euid) || 143 uid_eq(pcred->euid, cred->euid)) 144 return true; 145 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 146 return true; 147 return false; 148 } 149 150 /* 151 * set the priority of a task 152 * - the caller must hold the RCU read lock 153 */ 154 static int set_one_prio(struct task_struct *p, int niceval, int error) 155 { 156 int no_nice; 157 158 if (!set_one_prio_perm(p)) { 159 error = -EPERM; 160 goto out; 161 } 162 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 163 error = -EACCES; 164 goto out; 165 } 166 no_nice = security_task_setnice(p, niceval); 167 if (no_nice) { 168 error = no_nice; 169 goto out; 170 } 171 if (error == -ESRCH) 172 error = 0; 173 set_user_nice(p, niceval); 174 out: 175 return error; 176 } 177 178 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 179 { 180 struct task_struct *g, *p; 181 struct user_struct *user; 182 const struct cred *cred = current_cred(); 183 int error = -EINVAL; 184 struct pid *pgrp; 185 kuid_t uid; 186 187 if (which > PRIO_USER || which < PRIO_PROCESS) 188 goto out; 189 190 /* normalize: avoid signed division (rounding problems) */ 191 error = -ESRCH; 192 if (niceval < -20) 193 niceval = -20; 194 if (niceval > 19) 195 niceval = 19; 196 197 rcu_read_lock(); 198 read_lock(&tasklist_lock); 199 switch (which) { 200 case PRIO_PROCESS: 201 if (who) 202 p = find_task_by_vpid(who); 203 else 204 p = current; 205 if (p) 206 error = set_one_prio(p, niceval, error); 207 break; 208 case PRIO_PGRP: 209 if (who) 210 pgrp = find_vpid(who); 211 else 212 pgrp = task_pgrp(current); 213 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 214 error = set_one_prio(p, niceval, error); 215 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 216 break; 217 case PRIO_USER: 218 uid = make_kuid(cred->user_ns, who); 219 user = cred->user; 220 if (!who) 221 uid = cred->uid; 222 else if (!uid_eq(uid, cred->uid) && 223 !(user = find_user(uid))) 224 goto out_unlock; /* No processes for this user */ 225 226 do_each_thread(g, p) { 227 if (uid_eq(task_uid(p), uid)) 228 error = set_one_prio(p, niceval, error); 229 } while_each_thread(g, p); 230 if (!uid_eq(uid, cred->uid)) 231 free_uid(user); /* For find_user() */ 232 break; 233 } 234 out_unlock: 235 read_unlock(&tasklist_lock); 236 rcu_read_unlock(); 237 out: 238 return error; 239 } 240 241 /* 242 * Ugh. To avoid negative return values, "getpriority()" will 243 * not return the normal nice-value, but a negated value that 244 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 245 * to stay compatible. 246 */ 247 SYSCALL_DEFINE2(getpriority, int, which, int, who) 248 { 249 struct task_struct *g, *p; 250 struct user_struct *user; 251 const struct cred *cred = current_cred(); 252 long niceval, retval = -ESRCH; 253 struct pid *pgrp; 254 kuid_t uid; 255 256 if (which > PRIO_USER || which < PRIO_PROCESS) 257 return -EINVAL; 258 259 rcu_read_lock(); 260 read_lock(&tasklist_lock); 261 switch (which) { 262 case PRIO_PROCESS: 263 if (who) 264 p = find_task_by_vpid(who); 265 else 266 p = current; 267 if (p) { 268 niceval = 20 - task_nice(p); 269 if (niceval > retval) 270 retval = niceval; 271 } 272 break; 273 case PRIO_PGRP: 274 if (who) 275 pgrp = find_vpid(who); 276 else 277 pgrp = task_pgrp(current); 278 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 279 niceval = 20 - task_nice(p); 280 if (niceval > retval) 281 retval = niceval; 282 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 283 break; 284 case PRIO_USER: 285 uid = make_kuid(cred->user_ns, who); 286 user = cred->user; 287 if (!who) 288 uid = cred->uid; 289 else if (!uid_eq(uid, cred->uid) && 290 !(user = find_user(uid))) 291 goto out_unlock; /* No processes for this user */ 292 293 do_each_thread(g, p) { 294 if (uid_eq(task_uid(p), uid)) { 295 niceval = 20 - task_nice(p); 296 if (niceval > retval) 297 retval = niceval; 298 } 299 } while_each_thread(g, p); 300 if (!uid_eq(uid, cred->uid)) 301 free_uid(user); /* for find_user() */ 302 break; 303 } 304 out_unlock: 305 read_unlock(&tasklist_lock); 306 rcu_read_unlock(); 307 308 return retval; 309 } 310 311 /** 312 * emergency_restart - reboot the system 313 * 314 * Without shutting down any hardware or taking any locks 315 * reboot the system. This is called when we know we are in 316 * trouble so this is our best effort to reboot. This is 317 * safe to call in interrupt context. 318 */ 319 void emergency_restart(void) 320 { 321 kmsg_dump(KMSG_DUMP_EMERG); 322 machine_emergency_restart(); 323 } 324 EXPORT_SYMBOL_GPL(emergency_restart); 325 326 void kernel_restart_prepare(char *cmd) 327 { 328 blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); 329 system_state = SYSTEM_RESTART; 330 usermodehelper_disable(); 331 device_shutdown(); 332 } 333 334 /** 335 * register_reboot_notifier - Register function to be called at reboot time 336 * @nb: Info about notifier function to be called 337 * 338 * Registers a function with the list of functions 339 * to be called at reboot time. 340 * 341 * Currently always returns zero, as blocking_notifier_chain_register() 342 * always returns zero. 343 */ 344 int register_reboot_notifier(struct notifier_block *nb) 345 { 346 return blocking_notifier_chain_register(&reboot_notifier_list, nb); 347 } 348 EXPORT_SYMBOL(register_reboot_notifier); 349 350 /** 351 * unregister_reboot_notifier - Unregister previously registered reboot notifier 352 * @nb: Hook to be unregistered 353 * 354 * Unregisters a previously registered reboot 355 * notifier function. 356 * 357 * Returns zero on success, or %-ENOENT on failure. 358 */ 359 int unregister_reboot_notifier(struct notifier_block *nb) 360 { 361 return blocking_notifier_chain_unregister(&reboot_notifier_list, nb); 362 } 363 EXPORT_SYMBOL(unregister_reboot_notifier); 364 365 /** 366 * kernel_restart - reboot the system 367 * @cmd: pointer to buffer containing command to execute for restart 368 * or %NULL 369 * 370 * Shutdown everything and perform a clean reboot. 371 * This is not safe to call in interrupt context. 372 */ 373 void kernel_restart(char *cmd) 374 { 375 kernel_restart_prepare(cmd); 376 disable_nonboot_cpus(); 377 syscore_shutdown(); 378 if (!cmd) 379 printk(KERN_EMERG "Restarting system.\n"); 380 else 381 printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd); 382 kmsg_dump(KMSG_DUMP_RESTART); 383 machine_restart(cmd); 384 } 385 EXPORT_SYMBOL_GPL(kernel_restart); 386 387 static void kernel_shutdown_prepare(enum system_states state) 388 { 389 blocking_notifier_call_chain(&reboot_notifier_list, 390 (state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL); 391 system_state = state; 392 usermodehelper_disable(); 393 device_shutdown(); 394 } 395 /** 396 * kernel_halt - halt the system 397 * 398 * Shutdown everything and perform a clean system halt. 399 */ 400 void kernel_halt(void) 401 { 402 kernel_shutdown_prepare(SYSTEM_HALT); 403 disable_nonboot_cpus(); 404 syscore_shutdown(); 405 printk(KERN_EMERG "System halted.\n"); 406 kmsg_dump(KMSG_DUMP_HALT); 407 machine_halt(); 408 } 409 410 EXPORT_SYMBOL_GPL(kernel_halt); 411 412 /** 413 * kernel_power_off - power_off the system 414 * 415 * Shutdown everything and perform a clean system power_off. 416 */ 417 void kernel_power_off(void) 418 { 419 kernel_shutdown_prepare(SYSTEM_POWER_OFF); 420 if (pm_power_off_prepare) 421 pm_power_off_prepare(); 422 disable_nonboot_cpus(); 423 syscore_shutdown(); 424 printk(KERN_EMERG "Power down.\n"); 425 kmsg_dump(KMSG_DUMP_POWEROFF); 426 machine_power_off(); 427 } 428 EXPORT_SYMBOL_GPL(kernel_power_off); 429 430 static DEFINE_MUTEX(reboot_mutex); 431 432 /* 433 * Reboot system call: for obvious reasons only root may call it, 434 * and even root needs to set up some magic numbers in the registers 435 * so that some mistake won't make this reboot the whole machine. 436 * You can also set the meaning of the ctrl-alt-del-key here. 437 * 438 * reboot doesn't sync: do that yourself before calling this. 439 */ 440 SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, 441 void __user *, arg) 442 { 443 struct pid_namespace *pid_ns = task_active_pid_ns(current); 444 char buffer[256]; 445 int ret = 0; 446 447 /* We only trust the superuser with rebooting the system. */ 448 if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT)) 449 return -EPERM; 450 451 /* For safety, we require "magic" arguments. */ 452 if (magic1 != LINUX_REBOOT_MAGIC1 || 453 (magic2 != LINUX_REBOOT_MAGIC2 && 454 magic2 != LINUX_REBOOT_MAGIC2A && 455 magic2 != LINUX_REBOOT_MAGIC2B && 456 magic2 != LINUX_REBOOT_MAGIC2C)) 457 return -EINVAL; 458 459 /* 460 * If pid namespaces are enabled and the current task is in a child 461 * pid_namespace, the command is handled by reboot_pid_ns() which will 462 * call do_exit(). 463 */ 464 ret = reboot_pid_ns(pid_ns, cmd); 465 if (ret) 466 return ret; 467 468 /* Instead of trying to make the power_off code look like 469 * halt when pm_power_off is not set do it the easy way. 470 */ 471 if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off) 472 cmd = LINUX_REBOOT_CMD_HALT; 473 474 mutex_lock(&reboot_mutex); 475 switch (cmd) { 476 case LINUX_REBOOT_CMD_RESTART: 477 kernel_restart(NULL); 478 break; 479 480 case LINUX_REBOOT_CMD_CAD_ON: 481 C_A_D = 1; 482 break; 483 484 case LINUX_REBOOT_CMD_CAD_OFF: 485 C_A_D = 0; 486 break; 487 488 case LINUX_REBOOT_CMD_HALT: 489 kernel_halt(); 490 do_exit(0); 491 panic("cannot halt"); 492 493 case LINUX_REBOOT_CMD_POWER_OFF: 494 kernel_power_off(); 495 do_exit(0); 496 break; 497 498 case LINUX_REBOOT_CMD_RESTART2: 499 if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) { 500 ret = -EFAULT; 501 break; 502 } 503 buffer[sizeof(buffer) - 1] = '\0'; 504 505 kernel_restart(buffer); 506 break; 507 508 #ifdef CONFIG_KEXEC 509 case LINUX_REBOOT_CMD_KEXEC: 510 ret = kernel_kexec(); 511 break; 512 #endif 513 514 #ifdef CONFIG_HIBERNATION 515 case LINUX_REBOOT_CMD_SW_SUSPEND: 516 ret = hibernate(); 517 break; 518 #endif 519 520 default: 521 ret = -EINVAL; 522 break; 523 } 524 mutex_unlock(&reboot_mutex); 525 return ret; 526 } 527 528 static void deferred_cad(struct work_struct *dummy) 529 { 530 kernel_restart(NULL); 531 } 532 533 /* 534 * This function gets called by ctrl-alt-del - ie the keyboard interrupt. 535 * As it's called within an interrupt, it may NOT sync: the only choice 536 * is whether to reboot at once, or just ignore the ctrl-alt-del. 537 */ 538 void ctrl_alt_del(void) 539 { 540 static DECLARE_WORK(cad_work, deferred_cad); 541 542 if (C_A_D) 543 schedule_work(&cad_work); 544 else 545 kill_cad_pid(SIGINT, 1); 546 } 547 548 /* 549 * Unprivileged users may change the real gid to the effective gid 550 * or vice versa. (BSD-style) 551 * 552 * If you set the real gid at all, or set the effective gid to a value not 553 * equal to the real gid, then the saved gid is set to the new effective gid. 554 * 555 * This makes it possible for a setgid program to completely drop its 556 * privileges, which is often a useful assertion to make when you are doing 557 * a security audit over a program. 558 * 559 * The general idea is that a program which uses just setregid() will be 560 * 100% compatible with BSD. A program which uses just setgid() will be 561 * 100% compatible with POSIX with saved IDs. 562 * 563 * SMP: There are not races, the GIDs are checked only by filesystem 564 * operations (as far as semantic preservation is concerned). 565 */ 566 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 567 { 568 struct user_namespace *ns = current_user_ns(); 569 const struct cred *old; 570 struct cred *new; 571 int retval; 572 kgid_t krgid, kegid; 573 574 krgid = make_kgid(ns, rgid); 575 kegid = make_kgid(ns, egid); 576 577 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 578 return -EINVAL; 579 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 580 return -EINVAL; 581 582 new = prepare_creds(); 583 if (!new) 584 return -ENOMEM; 585 old = current_cred(); 586 587 retval = -EPERM; 588 if (rgid != (gid_t) -1) { 589 if (gid_eq(old->gid, krgid) || 590 gid_eq(old->egid, krgid) || 591 nsown_capable(CAP_SETGID)) 592 new->gid = krgid; 593 else 594 goto error; 595 } 596 if (egid != (gid_t) -1) { 597 if (gid_eq(old->gid, kegid) || 598 gid_eq(old->egid, kegid) || 599 gid_eq(old->sgid, kegid) || 600 nsown_capable(CAP_SETGID)) 601 new->egid = kegid; 602 else 603 goto error; 604 } 605 606 if (rgid != (gid_t) -1 || 607 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 608 new->sgid = new->egid; 609 new->fsgid = new->egid; 610 611 return commit_creds(new); 612 613 error: 614 abort_creds(new); 615 return retval; 616 } 617 618 /* 619 * setgid() is implemented like SysV w/ SAVED_IDS 620 * 621 * SMP: Same implicit races as above. 622 */ 623 SYSCALL_DEFINE1(setgid, gid_t, gid) 624 { 625 struct user_namespace *ns = current_user_ns(); 626 const struct cred *old; 627 struct cred *new; 628 int retval; 629 kgid_t kgid; 630 631 kgid = make_kgid(ns, gid); 632 if (!gid_valid(kgid)) 633 return -EINVAL; 634 635 new = prepare_creds(); 636 if (!new) 637 return -ENOMEM; 638 old = current_cred(); 639 640 retval = -EPERM; 641 if (nsown_capable(CAP_SETGID)) 642 new->gid = new->egid = new->sgid = new->fsgid = kgid; 643 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 644 new->egid = new->fsgid = kgid; 645 else 646 goto error; 647 648 return commit_creds(new); 649 650 error: 651 abort_creds(new); 652 return retval; 653 } 654 655 /* 656 * change the user struct in a credentials set to match the new UID 657 */ 658 static int set_user(struct cred *new) 659 { 660 struct user_struct *new_user; 661 662 new_user = alloc_uid(new->uid); 663 if (!new_user) 664 return -EAGAIN; 665 666 /* 667 * We don't fail in case of NPROC limit excess here because too many 668 * poorly written programs don't check set*uid() return code, assuming 669 * it never fails if called by root. We may still enforce NPROC limit 670 * for programs doing set*uid()+execve() by harmlessly deferring the 671 * failure to the execve() stage. 672 */ 673 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 674 new_user != INIT_USER) 675 current->flags |= PF_NPROC_EXCEEDED; 676 else 677 current->flags &= ~PF_NPROC_EXCEEDED; 678 679 free_uid(new->user); 680 new->user = new_user; 681 return 0; 682 } 683 684 /* 685 * Unprivileged users may change the real uid to the effective uid 686 * or vice versa. (BSD-style) 687 * 688 * If you set the real uid at all, or set the effective uid to a value not 689 * equal to the real uid, then the saved uid is set to the new effective uid. 690 * 691 * This makes it possible for a setuid program to completely drop its 692 * privileges, which is often a useful assertion to make when you are doing 693 * a security audit over a program. 694 * 695 * The general idea is that a program which uses just setreuid() will be 696 * 100% compatible with BSD. A program which uses just setuid() will be 697 * 100% compatible with POSIX with saved IDs. 698 */ 699 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 700 { 701 struct user_namespace *ns = current_user_ns(); 702 const struct cred *old; 703 struct cred *new; 704 int retval; 705 kuid_t kruid, keuid; 706 707 kruid = make_kuid(ns, ruid); 708 keuid = make_kuid(ns, euid); 709 710 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 711 return -EINVAL; 712 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 713 return -EINVAL; 714 715 new = prepare_creds(); 716 if (!new) 717 return -ENOMEM; 718 old = current_cred(); 719 720 retval = -EPERM; 721 if (ruid != (uid_t) -1) { 722 new->uid = kruid; 723 if (!uid_eq(old->uid, kruid) && 724 !uid_eq(old->euid, kruid) && 725 !nsown_capable(CAP_SETUID)) 726 goto error; 727 } 728 729 if (euid != (uid_t) -1) { 730 new->euid = keuid; 731 if (!uid_eq(old->uid, keuid) && 732 !uid_eq(old->euid, keuid) && 733 !uid_eq(old->suid, keuid) && 734 !nsown_capable(CAP_SETUID)) 735 goto error; 736 } 737 738 if (!uid_eq(new->uid, old->uid)) { 739 retval = set_user(new); 740 if (retval < 0) 741 goto error; 742 } 743 if (ruid != (uid_t) -1 || 744 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 745 new->suid = new->euid; 746 new->fsuid = new->euid; 747 748 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 749 if (retval < 0) 750 goto error; 751 752 return commit_creds(new); 753 754 error: 755 abort_creds(new); 756 return retval; 757 } 758 759 /* 760 * setuid() is implemented like SysV with SAVED_IDS 761 * 762 * Note that SAVED_ID's is deficient in that a setuid root program 763 * like sendmail, for example, cannot set its uid to be a normal 764 * user and then switch back, because if you're root, setuid() sets 765 * the saved uid too. If you don't like this, blame the bright people 766 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 767 * will allow a root program to temporarily drop privileges and be able to 768 * regain them by swapping the real and effective uid. 769 */ 770 SYSCALL_DEFINE1(setuid, uid_t, uid) 771 { 772 struct user_namespace *ns = current_user_ns(); 773 const struct cred *old; 774 struct cred *new; 775 int retval; 776 kuid_t kuid; 777 778 kuid = make_kuid(ns, uid); 779 if (!uid_valid(kuid)) 780 return -EINVAL; 781 782 new = prepare_creds(); 783 if (!new) 784 return -ENOMEM; 785 old = current_cred(); 786 787 retval = -EPERM; 788 if (nsown_capable(CAP_SETUID)) { 789 new->suid = new->uid = kuid; 790 if (!uid_eq(kuid, old->uid)) { 791 retval = set_user(new); 792 if (retval < 0) 793 goto error; 794 } 795 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 796 goto error; 797 } 798 799 new->fsuid = new->euid = kuid; 800 801 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 802 if (retval < 0) 803 goto error; 804 805 return commit_creds(new); 806 807 error: 808 abort_creds(new); 809 return retval; 810 } 811 812 813 /* 814 * This function implements a generic ability to update ruid, euid, 815 * and suid. This allows you to implement the 4.4 compatible seteuid(). 816 */ 817 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 818 { 819 struct user_namespace *ns = current_user_ns(); 820 const struct cred *old; 821 struct cred *new; 822 int retval; 823 kuid_t kruid, keuid, ksuid; 824 825 kruid = make_kuid(ns, ruid); 826 keuid = make_kuid(ns, euid); 827 ksuid = make_kuid(ns, suid); 828 829 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 830 return -EINVAL; 831 832 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 833 return -EINVAL; 834 835 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 836 return -EINVAL; 837 838 new = prepare_creds(); 839 if (!new) 840 return -ENOMEM; 841 842 old = current_cred(); 843 844 retval = -EPERM; 845 if (!nsown_capable(CAP_SETUID)) { 846 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 847 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 848 goto error; 849 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 850 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 851 goto error; 852 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 853 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 854 goto error; 855 } 856 857 if (ruid != (uid_t) -1) { 858 new->uid = kruid; 859 if (!uid_eq(kruid, old->uid)) { 860 retval = set_user(new); 861 if (retval < 0) 862 goto error; 863 } 864 } 865 if (euid != (uid_t) -1) 866 new->euid = keuid; 867 if (suid != (uid_t) -1) 868 new->suid = ksuid; 869 new->fsuid = new->euid; 870 871 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 872 if (retval < 0) 873 goto error; 874 875 return commit_creds(new); 876 877 error: 878 abort_creds(new); 879 return retval; 880 } 881 882 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 883 { 884 const struct cred *cred = current_cred(); 885 int retval; 886 uid_t ruid, euid, suid; 887 888 ruid = from_kuid_munged(cred->user_ns, cred->uid); 889 euid = from_kuid_munged(cred->user_ns, cred->euid); 890 suid = from_kuid_munged(cred->user_ns, cred->suid); 891 892 if (!(retval = put_user(ruid, ruidp)) && 893 !(retval = put_user(euid, euidp))) 894 retval = put_user(suid, suidp); 895 896 return retval; 897 } 898 899 /* 900 * Same as above, but for rgid, egid, sgid. 901 */ 902 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 903 { 904 struct user_namespace *ns = current_user_ns(); 905 const struct cred *old; 906 struct cred *new; 907 int retval; 908 kgid_t krgid, kegid, ksgid; 909 910 krgid = make_kgid(ns, rgid); 911 kegid = make_kgid(ns, egid); 912 ksgid = make_kgid(ns, sgid); 913 914 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 915 return -EINVAL; 916 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 917 return -EINVAL; 918 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 919 return -EINVAL; 920 921 new = prepare_creds(); 922 if (!new) 923 return -ENOMEM; 924 old = current_cred(); 925 926 retval = -EPERM; 927 if (!nsown_capable(CAP_SETGID)) { 928 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 929 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 930 goto error; 931 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 932 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 933 goto error; 934 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 935 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 936 goto error; 937 } 938 939 if (rgid != (gid_t) -1) 940 new->gid = krgid; 941 if (egid != (gid_t) -1) 942 new->egid = kegid; 943 if (sgid != (gid_t) -1) 944 new->sgid = ksgid; 945 new->fsgid = new->egid; 946 947 return commit_creds(new); 948 949 error: 950 abort_creds(new); 951 return retval; 952 } 953 954 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 955 { 956 const struct cred *cred = current_cred(); 957 int retval; 958 gid_t rgid, egid, sgid; 959 960 rgid = from_kgid_munged(cred->user_ns, cred->gid); 961 egid = from_kgid_munged(cred->user_ns, cred->egid); 962 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 963 964 if (!(retval = put_user(rgid, rgidp)) && 965 !(retval = put_user(egid, egidp))) 966 retval = put_user(sgid, sgidp); 967 968 return retval; 969 } 970 971 972 /* 973 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 974 * is used for "access()" and for the NFS daemon (letting nfsd stay at 975 * whatever uid it wants to). It normally shadows "euid", except when 976 * explicitly set by setfsuid() or for access.. 977 */ 978 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 979 { 980 const struct cred *old; 981 struct cred *new; 982 uid_t old_fsuid; 983 kuid_t kuid; 984 985 old = current_cred(); 986 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 987 988 kuid = make_kuid(old->user_ns, uid); 989 if (!uid_valid(kuid)) 990 return old_fsuid; 991 992 new = prepare_creds(); 993 if (!new) 994 return old_fsuid; 995 996 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 997 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 998 nsown_capable(CAP_SETUID)) { 999 if (!uid_eq(kuid, old->fsuid)) { 1000 new->fsuid = kuid; 1001 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 1002 goto change_okay; 1003 } 1004 } 1005 1006 abort_creds(new); 1007 return old_fsuid; 1008 1009 change_okay: 1010 commit_creds(new); 1011 return old_fsuid; 1012 } 1013 1014 /* 1015 * Samma på svenska.. 1016 */ 1017 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 1018 { 1019 const struct cred *old; 1020 struct cred *new; 1021 gid_t old_fsgid; 1022 kgid_t kgid; 1023 1024 old = current_cred(); 1025 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 1026 1027 kgid = make_kgid(old->user_ns, gid); 1028 if (!gid_valid(kgid)) 1029 return old_fsgid; 1030 1031 new = prepare_creds(); 1032 if (!new) 1033 return old_fsgid; 1034 1035 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 1036 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 1037 nsown_capable(CAP_SETGID)) { 1038 if (!gid_eq(kgid, old->fsgid)) { 1039 new->fsgid = kgid; 1040 goto change_okay; 1041 } 1042 } 1043 1044 abort_creds(new); 1045 return old_fsgid; 1046 1047 change_okay: 1048 commit_creds(new); 1049 return old_fsgid; 1050 } 1051 1052 /** 1053 * sys_getpid - return the thread group id of the current process 1054 * 1055 * Note, despite the name, this returns the tgid not the pid. The tgid and 1056 * the pid are identical unless CLONE_THREAD was specified on clone() in 1057 * which case the tgid is the same in all threads of the same group. 1058 * 1059 * This is SMP safe as current->tgid does not change. 1060 */ 1061 SYSCALL_DEFINE0(getpid) 1062 { 1063 return task_tgid_vnr(current); 1064 } 1065 1066 /* Thread ID - the internal kernel "pid" */ 1067 SYSCALL_DEFINE0(gettid) 1068 { 1069 return task_pid_vnr(current); 1070 } 1071 1072 /* 1073 * Accessing ->real_parent is not SMP-safe, it could 1074 * change from under us. However, we can use a stale 1075 * value of ->real_parent under rcu_read_lock(), see 1076 * release_task()->call_rcu(delayed_put_task_struct). 1077 */ 1078 SYSCALL_DEFINE0(getppid) 1079 { 1080 int pid; 1081 1082 rcu_read_lock(); 1083 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 1084 rcu_read_unlock(); 1085 1086 return pid; 1087 } 1088 1089 SYSCALL_DEFINE0(getuid) 1090 { 1091 /* Only we change this so SMP safe */ 1092 return from_kuid_munged(current_user_ns(), current_uid()); 1093 } 1094 1095 SYSCALL_DEFINE0(geteuid) 1096 { 1097 /* Only we change this so SMP safe */ 1098 return from_kuid_munged(current_user_ns(), current_euid()); 1099 } 1100 1101 SYSCALL_DEFINE0(getgid) 1102 { 1103 /* Only we change this so SMP safe */ 1104 return from_kgid_munged(current_user_ns(), current_gid()); 1105 } 1106 1107 SYSCALL_DEFINE0(getegid) 1108 { 1109 /* Only we change this so SMP safe */ 1110 return from_kgid_munged(current_user_ns(), current_egid()); 1111 } 1112 1113 void do_sys_times(struct tms *tms) 1114 { 1115 cputime_t tgutime, tgstime, cutime, cstime; 1116 1117 spin_lock_irq(¤t->sighand->siglock); 1118 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 1119 cutime = current->signal->cutime; 1120 cstime = current->signal->cstime; 1121 spin_unlock_irq(¤t->sighand->siglock); 1122 tms->tms_utime = cputime_to_clock_t(tgutime); 1123 tms->tms_stime = cputime_to_clock_t(tgstime); 1124 tms->tms_cutime = cputime_to_clock_t(cutime); 1125 tms->tms_cstime = cputime_to_clock_t(cstime); 1126 } 1127 1128 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 1129 { 1130 if (tbuf) { 1131 struct tms tmp; 1132 1133 do_sys_times(&tmp); 1134 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 1135 return -EFAULT; 1136 } 1137 force_successful_syscall_return(); 1138 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 1139 } 1140 1141 /* 1142 * This needs some heavy checking ... 1143 * I just haven't the stomach for it. I also don't fully 1144 * understand sessions/pgrp etc. Let somebody who does explain it. 1145 * 1146 * OK, I think I have the protection semantics right.... this is really 1147 * only important on a multi-user system anyway, to make sure one user 1148 * can't send a signal to a process owned by another. -TYT, 12/12/91 1149 * 1150 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. 1151 * LBT 04.03.94 1152 */ 1153 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 1154 { 1155 struct task_struct *p; 1156 struct task_struct *group_leader = current->group_leader; 1157 struct pid *pgrp; 1158 int err; 1159 1160 if (!pid) 1161 pid = task_pid_vnr(group_leader); 1162 if (!pgid) 1163 pgid = pid; 1164 if (pgid < 0) 1165 return -EINVAL; 1166 rcu_read_lock(); 1167 1168 /* From this point forward we keep holding onto the tasklist lock 1169 * so that our parent does not change from under us. -DaveM 1170 */ 1171 write_lock_irq(&tasklist_lock); 1172 1173 err = -ESRCH; 1174 p = find_task_by_vpid(pid); 1175 if (!p) 1176 goto out; 1177 1178 err = -EINVAL; 1179 if (!thread_group_leader(p)) 1180 goto out; 1181 1182 if (same_thread_group(p->real_parent, group_leader)) { 1183 err = -EPERM; 1184 if (task_session(p) != task_session(group_leader)) 1185 goto out; 1186 err = -EACCES; 1187 if (p->did_exec) 1188 goto out; 1189 } else { 1190 err = -ESRCH; 1191 if (p != group_leader) 1192 goto out; 1193 } 1194 1195 err = -EPERM; 1196 if (p->signal->leader) 1197 goto out; 1198 1199 pgrp = task_pid(p); 1200 if (pgid != pid) { 1201 struct task_struct *g; 1202 1203 pgrp = find_vpid(pgid); 1204 g = pid_task(pgrp, PIDTYPE_PGID); 1205 if (!g || task_session(g) != task_session(group_leader)) 1206 goto out; 1207 } 1208 1209 err = security_task_setpgid(p, pgid); 1210 if (err) 1211 goto out; 1212 1213 if (task_pgrp(p) != pgrp) 1214 change_pid(p, PIDTYPE_PGID, pgrp); 1215 1216 err = 0; 1217 out: 1218 /* All paths lead to here, thus we are safe. -DaveM */ 1219 write_unlock_irq(&tasklist_lock); 1220 rcu_read_unlock(); 1221 return err; 1222 } 1223 1224 SYSCALL_DEFINE1(getpgid, pid_t, pid) 1225 { 1226 struct task_struct *p; 1227 struct pid *grp; 1228 int retval; 1229 1230 rcu_read_lock(); 1231 if (!pid) 1232 grp = task_pgrp(current); 1233 else { 1234 retval = -ESRCH; 1235 p = find_task_by_vpid(pid); 1236 if (!p) 1237 goto out; 1238 grp = task_pgrp(p); 1239 if (!grp) 1240 goto out; 1241 1242 retval = security_task_getpgid(p); 1243 if (retval) 1244 goto out; 1245 } 1246 retval = pid_vnr(grp); 1247 out: 1248 rcu_read_unlock(); 1249 return retval; 1250 } 1251 1252 #ifdef __ARCH_WANT_SYS_GETPGRP 1253 1254 SYSCALL_DEFINE0(getpgrp) 1255 { 1256 return sys_getpgid(0); 1257 } 1258 1259 #endif 1260 1261 SYSCALL_DEFINE1(getsid, pid_t, pid) 1262 { 1263 struct task_struct *p; 1264 struct pid *sid; 1265 int retval; 1266 1267 rcu_read_lock(); 1268 if (!pid) 1269 sid = task_session(current); 1270 else { 1271 retval = -ESRCH; 1272 p = find_task_by_vpid(pid); 1273 if (!p) 1274 goto out; 1275 sid = task_session(p); 1276 if (!sid) 1277 goto out; 1278 1279 retval = security_task_getsid(p); 1280 if (retval) 1281 goto out; 1282 } 1283 retval = pid_vnr(sid); 1284 out: 1285 rcu_read_unlock(); 1286 return retval; 1287 } 1288 1289 SYSCALL_DEFINE0(setsid) 1290 { 1291 struct task_struct *group_leader = current->group_leader; 1292 struct pid *sid = task_pid(group_leader); 1293 pid_t session = pid_vnr(sid); 1294 int err = -EPERM; 1295 1296 write_lock_irq(&tasklist_lock); 1297 /* Fail if I am already a session leader */ 1298 if (group_leader->signal->leader) 1299 goto out; 1300 1301 /* Fail if a process group id already exists that equals the 1302 * proposed session id. 1303 */ 1304 if (pid_task(sid, PIDTYPE_PGID)) 1305 goto out; 1306 1307 group_leader->signal->leader = 1; 1308 __set_special_pids(sid); 1309 1310 proc_clear_tty(group_leader); 1311 1312 err = session; 1313 out: 1314 write_unlock_irq(&tasklist_lock); 1315 if (err > 0) { 1316 proc_sid_connector(group_leader); 1317 sched_autogroup_create_attach(group_leader); 1318 } 1319 return err; 1320 } 1321 1322 DECLARE_RWSEM(uts_sem); 1323 1324 #ifdef COMPAT_UTS_MACHINE 1325 #define override_architecture(name) \ 1326 (personality(current->personality) == PER_LINUX32 && \ 1327 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1328 sizeof(COMPAT_UTS_MACHINE))) 1329 #else 1330 #define override_architecture(name) 0 1331 #endif 1332 1333 /* 1334 * Work around broken programs that cannot handle "Linux 3.0". 1335 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1336 */ 1337 static int override_release(char __user *release, size_t len) 1338 { 1339 int ret = 0; 1340 1341 if (current->personality & UNAME26) { 1342 const char *rest = UTS_RELEASE; 1343 char buf[65] = { 0 }; 1344 int ndots = 0; 1345 unsigned v; 1346 size_t copy; 1347 1348 while (*rest) { 1349 if (*rest == '.' && ++ndots >= 3) 1350 break; 1351 if (!isdigit(*rest) && *rest != '.') 1352 break; 1353 rest++; 1354 } 1355 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1356 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1357 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1358 ret = copy_to_user(release, buf, copy + 1); 1359 } 1360 return ret; 1361 } 1362 1363 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1364 { 1365 int errno = 0; 1366 1367 down_read(&uts_sem); 1368 if (copy_to_user(name, utsname(), sizeof *name)) 1369 errno = -EFAULT; 1370 up_read(&uts_sem); 1371 1372 if (!errno && override_release(name->release, sizeof(name->release))) 1373 errno = -EFAULT; 1374 if (!errno && override_architecture(name)) 1375 errno = -EFAULT; 1376 return errno; 1377 } 1378 1379 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1380 /* 1381 * Old cruft 1382 */ 1383 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1384 { 1385 int error = 0; 1386 1387 if (!name) 1388 return -EFAULT; 1389 1390 down_read(&uts_sem); 1391 if (copy_to_user(name, utsname(), sizeof(*name))) 1392 error = -EFAULT; 1393 up_read(&uts_sem); 1394 1395 if (!error && override_release(name->release, sizeof(name->release))) 1396 error = -EFAULT; 1397 if (!error && override_architecture(name)) 1398 error = -EFAULT; 1399 return error; 1400 } 1401 1402 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1403 { 1404 int error; 1405 1406 if (!name) 1407 return -EFAULT; 1408 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1409 return -EFAULT; 1410 1411 down_read(&uts_sem); 1412 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1413 __OLD_UTS_LEN); 1414 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1415 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1416 __OLD_UTS_LEN); 1417 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1418 error |= __copy_to_user(&name->release, &utsname()->release, 1419 __OLD_UTS_LEN); 1420 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1421 error |= __copy_to_user(&name->version, &utsname()->version, 1422 __OLD_UTS_LEN); 1423 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1424 error |= __copy_to_user(&name->machine, &utsname()->machine, 1425 __OLD_UTS_LEN); 1426 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1427 up_read(&uts_sem); 1428 1429 if (!error && override_architecture(name)) 1430 error = -EFAULT; 1431 if (!error && override_release(name->release, sizeof(name->release))) 1432 error = -EFAULT; 1433 return error ? -EFAULT : 0; 1434 } 1435 #endif 1436 1437 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1438 { 1439 int errno; 1440 char tmp[__NEW_UTS_LEN]; 1441 1442 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1443 return -EPERM; 1444 1445 if (len < 0 || len > __NEW_UTS_LEN) 1446 return -EINVAL; 1447 down_write(&uts_sem); 1448 errno = -EFAULT; 1449 if (!copy_from_user(tmp, name, len)) { 1450 struct new_utsname *u = utsname(); 1451 1452 memcpy(u->nodename, tmp, len); 1453 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1454 errno = 0; 1455 uts_proc_notify(UTS_PROC_HOSTNAME); 1456 } 1457 up_write(&uts_sem); 1458 return errno; 1459 } 1460 1461 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1462 1463 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1464 { 1465 int i, errno; 1466 struct new_utsname *u; 1467 1468 if (len < 0) 1469 return -EINVAL; 1470 down_read(&uts_sem); 1471 u = utsname(); 1472 i = 1 + strlen(u->nodename); 1473 if (i > len) 1474 i = len; 1475 errno = 0; 1476 if (copy_to_user(name, u->nodename, i)) 1477 errno = -EFAULT; 1478 up_read(&uts_sem); 1479 return errno; 1480 } 1481 1482 #endif 1483 1484 /* 1485 * Only setdomainname; getdomainname can be implemented by calling 1486 * uname() 1487 */ 1488 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1489 { 1490 int errno; 1491 char tmp[__NEW_UTS_LEN]; 1492 1493 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1494 return -EPERM; 1495 if (len < 0 || len > __NEW_UTS_LEN) 1496 return -EINVAL; 1497 1498 down_write(&uts_sem); 1499 errno = -EFAULT; 1500 if (!copy_from_user(tmp, name, len)) { 1501 struct new_utsname *u = utsname(); 1502 1503 memcpy(u->domainname, tmp, len); 1504 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1505 errno = 0; 1506 uts_proc_notify(UTS_PROC_DOMAINNAME); 1507 } 1508 up_write(&uts_sem); 1509 return errno; 1510 } 1511 1512 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1513 { 1514 struct rlimit value; 1515 int ret; 1516 1517 ret = do_prlimit(current, resource, NULL, &value); 1518 if (!ret) 1519 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1520 1521 return ret; 1522 } 1523 1524 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1525 1526 /* 1527 * Back compatibility for getrlimit. Needed for some apps. 1528 */ 1529 1530 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1531 struct rlimit __user *, rlim) 1532 { 1533 struct rlimit x; 1534 if (resource >= RLIM_NLIMITS) 1535 return -EINVAL; 1536 1537 task_lock(current->group_leader); 1538 x = current->signal->rlim[resource]; 1539 task_unlock(current->group_leader); 1540 if (x.rlim_cur > 0x7FFFFFFF) 1541 x.rlim_cur = 0x7FFFFFFF; 1542 if (x.rlim_max > 0x7FFFFFFF) 1543 x.rlim_max = 0x7FFFFFFF; 1544 return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0; 1545 } 1546 1547 #endif 1548 1549 static inline bool rlim64_is_infinity(__u64 rlim64) 1550 { 1551 #if BITS_PER_LONG < 64 1552 return rlim64 >= ULONG_MAX; 1553 #else 1554 return rlim64 == RLIM64_INFINITY; 1555 #endif 1556 } 1557 1558 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1559 { 1560 if (rlim->rlim_cur == RLIM_INFINITY) 1561 rlim64->rlim_cur = RLIM64_INFINITY; 1562 else 1563 rlim64->rlim_cur = rlim->rlim_cur; 1564 if (rlim->rlim_max == RLIM_INFINITY) 1565 rlim64->rlim_max = RLIM64_INFINITY; 1566 else 1567 rlim64->rlim_max = rlim->rlim_max; 1568 } 1569 1570 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1571 { 1572 if (rlim64_is_infinity(rlim64->rlim_cur)) 1573 rlim->rlim_cur = RLIM_INFINITY; 1574 else 1575 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1576 if (rlim64_is_infinity(rlim64->rlim_max)) 1577 rlim->rlim_max = RLIM_INFINITY; 1578 else 1579 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1580 } 1581 1582 /* make sure you are allowed to change @tsk limits before calling this */ 1583 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1584 struct rlimit *new_rlim, struct rlimit *old_rlim) 1585 { 1586 struct rlimit *rlim; 1587 int retval = 0; 1588 1589 if (resource >= RLIM_NLIMITS) 1590 return -EINVAL; 1591 if (new_rlim) { 1592 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1593 return -EINVAL; 1594 if (resource == RLIMIT_NOFILE && 1595 new_rlim->rlim_max > sysctl_nr_open) 1596 return -EPERM; 1597 } 1598 1599 /* protect tsk->signal and tsk->sighand from disappearing */ 1600 read_lock(&tasklist_lock); 1601 if (!tsk->sighand) { 1602 retval = -ESRCH; 1603 goto out; 1604 } 1605 1606 rlim = tsk->signal->rlim + resource; 1607 task_lock(tsk->group_leader); 1608 if (new_rlim) { 1609 /* Keep the capable check against init_user_ns until 1610 cgroups can contain all limits */ 1611 if (new_rlim->rlim_max > rlim->rlim_max && 1612 !capable(CAP_SYS_RESOURCE)) 1613 retval = -EPERM; 1614 if (!retval) 1615 retval = security_task_setrlimit(tsk->group_leader, 1616 resource, new_rlim); 1617 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1618 /* 1619 * The caller is asking for an immediate RLIMIT_CPU 1620 * expiry. But we use the zero value to mean "it was 1621 * never set". So let's cheat and make it one second 1622 * instead 1623 */ 1624 new_rlim->rlim_cur = 1; 1625 } 1626 } 1627 if (!retval) { 1628 if (old_rlim) 1629 *old_rlim = *rlim; 1630 if (new_rlim) 1631 *rlim = *new_rlim; 1632 } 1633 task_unlock(tsk->group_leader); 1634 1635 /* 1636 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1637 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1638 * very long-standing error, and fixing it now risks breakage of 1639 * applications, so we live with it 1640 */ 1641 if (!retval && new_rlim && resource == RLIMIT_CPU && 1642 new_rlim->rlim_cur != RLIM_INFINITY) 1643 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1644 out: 1645 read_unlock(&tasklist_lock); 1646 return retval; 1647 } 1648 1649 /* rcu lock must be held */ 1650 static int check_prlimit_permission(struct task_struct *task) 1651 { 1652 const struct cred *cred = current_cred(), *tcred; 1653 1654 if (current == task) 1655 return 0; 1656 1657 tcred = __task_cred(task); 1658 if (uid_eq(cred->uid, tcred->euid) && 1659 uid_eq(cred->uid, tcred->suid) && 1660 uid_eq(cred->uid, tcred->uid) && 1661 gid_eq(cred->gid, tcred->egid) && 1662 gid_eq(cred->gid, tcred->sgid) && 1663 gid_eq(cred->gid, tcred->gid)) 1664 return 0; 1665 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1666 return 0; 1667 1668 return -EPERM; 1669 } 1670 1671 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1672 const struct rlimit64 __user *, new_rlim, 1673 struct rlimit64 __user *, old_rlim) 1674 { 1675 struct rlimit64 old64, new64; 1676 struct rlimit old, new; 1677 struct task_struct *tsk; 1678 int ret; 1679 1680 if (new_rlim) { 1681 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1682 return -EFAULT; 1683 rlim64_to_rlim(&new64, &new); 1684 } 1685 1686 rcu_read_lock(); 1687 tsk = pid ? find_task_by_vpid(pid) : current; 1688 if (!tsk) { 1689 rcu_read_unlock(); 1690 return -ESRCH; 1691 } 1692 ret = check_prlimit_permission(tsk); 1693 if (ret) { 1694 rcu_read_unlock(); 1695 return ret; 1696 } 1697 get_task_struct(tsk); 1698 rcu_read_unlock(); 1699 1700 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1701 old_rlim ? &old : NULL); 1702 1703 if (!ret && old_rlim) { 1704 rlim_to_rlim64(&old, &old64); 1705 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1706 ret = -EFAULT; 1707 } 1708 1709 put_task_struct(tsk); 1710 return ret; 1711 } 1712 1713 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1714 { 1715 struct rlimit new_rlim; 1716 1717 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1718 return -EFAULT; 1719 return do_prlimit(current, resource, &new_rlim, NULL); 1720 } 1721 1722 /* 1723 * It would make sense to put struct rusage in the task_struct, 1724 * except that would make the task_struct be *really big*. After 1725 * task_struct gets moved into malloc'ed memory, it would 1726 * make sense to do this. It will make moving the rest of the information 1727 * a lot simpler! (Which we're not doing right now because we're not 1728 * measuring them yet). 1729 * 1730 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1731 * races with threads incrementing their own counters. But since word 1732 * reads are atomic, we either get new values or old values and we don't 1733 * care which for the sums. We always take the siglock to protect reading 1734 * the c* fields from p->signal from races with exit.c updating those 1735 * fields when reaping, so a sample either gets all the additions of a 1736 * given child after it's reaped, or none so this sample is before reaping. 1737 * 1738 * Locking: 1739 * We need to take the siglock for CHILDEREN, SELF and BOTH 1740 * for the cases current multithreaded, non-current single threaded 1741 * non-current multithreaded. Thread traversal is now safe with 1742 * the siglock held. 1743 * Strictly speaking, we donot need to take the siglock if we are current and 1744 * single threaded, as no one else can take our signal_struct away, no one 1745 * else can reap the children to update signal->c* counters, and no one else 1746 * can race with the signal-> fields. If we do not take any lock, the 1747 * signal-> fields could be read out of order while another thread was just 1748 * exiting. So we should place a read memory barrier when we avoid the lock. 1749 * On the writer side, write memory barrier is implied in __exit_signal 1750 * as __exit_signal releases the siglock spinlock after updating the signal-> 1751 * fields. But we don't do this yet to keep things simple. 1752 * 1753 */ 1754 1755 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1756 { 1757 r->ru_nvcsw += t->nvcsw; 1758 r->ru_nivcsw += t->nivcsw; 1759 r->ru_minflt += t->min_flt; 1760 r->ru_majflt += t->maj_flt; 1761 r->ru_inblock += task_io_get_inblock(t); 1762 r->ru_oublock += task_io_get_oublock(t); 1763 } 1764 1765 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1766 { 1767 struct task_struct *t; 1768 unsigned long flags; 1769 cputime_t tgutime, tgstime, utime, stime; 1770 unsigned long maxrss = 0; 1771 1772 memset((char *) r, 0, sizeof *r); 1773 utime = stime = 0; 1774 1775 if (who == RUSAGE_THREAD) { 1776 task_cputime_adjusted(current, &utime, &stime); 1777 accumulate_thread_rusage(p, r); 1778 maxrss = p->signal->maxrss; 1779 goto out; 1780 } 1781 1782 if (!lock_task_sighand(p, &flags)) 1783 return; 1784 1785 switch (who) { 1786 case RUSAGE_BOTH: 1787 case RUSAGE_CHILDREN: 1788 utime = p->signal->cutime; 1789 stime = p->signal->cstime; 1790 r->ru_nvcsw = p->signal->cnvcsw; 1791 r->ru_nivcsw = p->signal->cnivcsw; 1792 r->ru_minflt = p->signal->cmin_flt; 1793 r->ru_majflt = p->signal->cmaj_flt; 1794 r->ru_inblock = p->signal->cinblock; 1795 r->ru_oublock = p->signal->coublock; 1796 maxrss = p->signal->cmaxrss; 1797 1798 if (who == RUSAGE_CHILDREN) 1799 break; 1800 1801 case RUSAGE_SELF: 1802 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1803 utime += tgutime; 1804 stime += tgstime; 1805 r->ru_nvcsw += p->signal->nvcsw; 1806 r->ru_nivcsw += p->signal->nivcsw; 1807 r->ru_minflt += p->signal->min_flt; 1808 r->ru_majflt += p->signal->maj_flt; 1809 r->ru_inblock += p->signal->inblock; 1810 r->ru_oublock += p->signal->oublock; 1811 if (maxrss < p->signal->maxrss) 1812 maxrss = p->signal->maxrss; 1813 t = p; 1814 do { 1815 accumulate_thread_rusage(t, r); 1816 t = next_thread(t); 1817 } while (t != p); 1818 break; 1819 1820 default: 1821 BUG(); 1822 } 1823 unlock_task_sighand(p, &flags); 1824 1825 out: 1826 cputime_to_timeval(utime, &r->ru_utime); 1827 cputime_to_timeval(stime, &r->ru_stime); 1828 1829 if (who != RUSAGE_CHILDREN) { 1830 struct mm_struct *mm = get_task_mm(p); 1831 if (mm) { 1832 setmax_mm_hiwater_rss(&maxrss, mm); 1833 mmput(mm); 1834 } 1835 } 1836 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1837 } 1838 1839 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1840 { 1841 struct rusage r; 1842 k_getrusage(p, who, &r); 1843 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1844 } 1845 1846 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1847 { 1848 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1849 who != RUSAGE_THREAD) 1850 return -EINVAL; 1851 return getrusage(current, who, ru); 1852 } 1853 1854 #ifdef CONFIG_COMPAT 1855 COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) 1856 { 1857 struct rusage r; 1858 1859 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1860 who != RUSAGE_THREAD) 1861 return -EINVAL; 1862 1863 k_getrusage(current, who, &r); 1864 return put_compat_rusage(&r, ru); 1865 } 1866 #endif 1867 1868 SYSCALL_DEFINE1(umask, int, mask) 1869 { 1870 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1871 return mask; 1872 } 1873 1874 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1875 { 1876 struct fd exe; 1877 struct inode *inode; 1878 int err; 1879 1880 exe = fdget(fd); 1881 if (!exe.file) 1882 return -EBADF; 1883 1884 inode = file_inode(exe.file); 1885 1886 /* 1887 * Because the original mm->exe_file points to executable file, make 1888 * sure that this one is executable as well, to avoid breaking an 1889 * overall picture. 1890 */ 1891 err = -EACCES; 1892 if (!S_ISREG(inode->i_mode) || 1893 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1894 goto exit; 1895 1896 err = inode_permission(inode, MAY_EXEC); 1897 if (err) 1898 goto exit; 1899 1900 down_write(&mm->mmap_sem); 1901 1902 /* 1903 * Forbid mm->exe_file change if old file still mapped. 1904 */ 1905 err = -EBUSY; 1906 if (mm->exe_file) { 1907 struct vm_area_struct *vma; 1908 1909 for (vma = mm->mmap; vma; vma = vma->vm_next) 1910 if (vma->vm_file && 1911 path_equal(&vma->vm_file->f_path, 1912 &mm->exe_file->f_path)) 1913 goto exit_unlock; 1914 } 1915 1916 /* 1917 * The symlink can be changed only once, just to disallow arbitrary 1918 * transitions malicious software might bring in. This means one 1919 * could make a snapshot over all processes running and monitor 1920 * /proc/pid/exe changes to notice unusual activity if needed. 1921 */ 1922 err = -EPERM; 1923 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1924 goto exit_unlock; 1925 1926 err = 0; 1927 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1928 exit_unlock: 1929 up_write(&mm->mmap_sem); 1930 1931 exit: 1932 fdput(exe); 1933 return err; 1934 } 1935 1936 static int prctl_set_mm(int opt, unsigned long addr, 1937 unsigned long arg4, unsigned long arg5) 1938 { 1939 unsigned long rlim = rlimit(RLIMIT_DATA); 1940 struct mm_struct *mm = current->mm; 1941 struct vm_area_struct *vma; 1942 int error; 1943 1944 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) 1945 return -EINVAL; 1946 1947 if (!capable(CAP_SYS_RESOURCE)) 1948 return -EPERM; 1949 1950 if (opt == PR_SET_MM_EXE_FILE) 1951 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1952 1953 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1954 return -EINVAL; 1955 1956 error = -EINVAL; 1957 1958 down_read(&mm->mmap_sem); 1959 vma = find_vma(mm, addr); 1960 1961 switch (opt) { 1962 case PR_SET_MM_START_CODE: 1963 mm->start_code = addr; 1964 break; 1965 case PR_SET_MM_END_CODE: 1966 mm->end_code = addr; 1967 break; 1968 case PR_SET_MM_START_DATA: 1969 mm->start_data = addr; 1970 break; 1971 case PR_SET_MM_END_DATA: 1972 mm->end_data = addr; 1973 break; 1974 1975 case PR_SET_MM_START_BRK: 1976 if (addr <= mm->end_data) 1977 goto out; 1978 1979 if (rlim < RLIM_INFINITY && 1980 (mm->brk - addr) + 1981 (mm->end_data - mm->start_data) > rlim) 1982 goto out; 1983 1984 mm->start_brk = addr; 1985 break; 1986 1987 case PR_SET_MM_BRK: 1988 if (addr <= mm->end_data) 1989 goto out; 1990 1991 if (rlim < RLIM_INFINITY && 1992 (addr - mm->start_brk) + 1993 (mm->end_data - mm->start_data) > rlim) 1994 goto out; 1995 1996 mm->brk = addr; 1997 break; 1998 1999 /* 2000 * If command line arguments and environment 2001 * are placed somewhere else on stack, we can 2002 * set them up here, ARG_START/END to setup 2003 * command line argumets and ENV_START/END 2004 * for environment. 2005 */ 2006 case PR_SET_MM_START_STACK: 2007 case PR_SET_MM_ARG_START: 2008 case PR_SET_MM_ARG_END: 2009 case PR_SET_MM_ENV_START: 2010 case PR_SET_MM_ENV_END: 2011 if (!vma) { 2012 error = -EFAULT; 2013 goto out; 2014 } 2015 if (opt == PR_SET_MM_START_STACK) 2016 mm->start_stack = addr; 2017 else if (opt == PR_SET_MM_ARG_START) 2018 mm->arg_start = addr; 2019 else if (opt == PR_SET_MM_ARG_END) 2020 mm->arg_end = addr; 2021 else if (opt == PR_SET_MM_ENV_START) 2022 mm->env_start = addr; 2023 else if (opt == PR_SET_MM_ENV_END) 2024 mm->env_end = addr; 2025 break; 2026 2027 /* 2028 * This doesn't move auxiliary vector itself 2029 * since it's pinned to mm_struct, but allow 2030 * to fill vector with new values. It's up 2031 * to a caller to provide sane values here 2032 * otherwise user space tools which use this 2033 * vector might be unhappy. 2034 */ 2035 case PR_SET_MM_AUXV: { 2036 unsigned long user_auxv[AT_VECTOR_SIZE]; 2037 2038 if (arg4 > sizeof(user_auxv)) 2039 goto out; 2040 up_read(&mm->mmap_sem); 2041 2042 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 2043 return -EFAULT; 2044 2045 /* Make sure the last entry is always AT_NULL */ 2046 user_auxv[AT_VECTOR_SIZE - 2] = 0; 2047 user_auxv[AT_VECTOR_SIZE - 1] = 0; 2048 2049 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2050 2051 task_lock(current); 2052 memcpy(mm->saved_auxv, user_auxv, arg4); 2053 task_unlock(current); 2054 2055 return 0; 2056 } 2057 default: 2058 goto out; 2059 } 2060 2061 error = 0; 2062 out: 2063 up_read(&mm->mmap_sem); 2064 return error; 2065 } 2066 2067 #ifdef CONFIG_CHECKPOINT_RESTORE 2068 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2069 { 2070 return put_user(me->clear_child_tid, tid_addr); 2071 } 2072 #else 2073 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2074 { 2075 return -EINVAL; 2076 } 2077 #endif 2078 2079 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2080 unsigned long, arg4, unsigned long, arg5) 2081 { 2082 struct task_struct *me = current; 2083 unsigned char comm[sizeof(me->comm)]; 2084 long error; 2085 2086 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2087 if (error != -ENOSYS) 2088 return error; 2089 2090 error = 0; 2091 switch (option) { 2092 case PR_SET_PDEATHSIG: 2093 if (!valid_signal(arg2)) { 2094 error = -EINVAL; 2095 break; 2096 } 2097 me->pdeath_signal = arg2; 2098 break; 2099 case PR_GET_PDEATHSIG: 2100 error = put_user(me->pdeath_signal, (int __user *)arg2); 2101 break; 2102 case PR_GET_DUMPABLE: 2103 error = get_dumpable(me->mm); 2104 break; 2105 case PR_SET_DUMPABLE: 2106 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2107 error = -EINVAL; 2108 break; 2109 } 2110 set_dumpable(me->mm, arg2); 2111 break; 2112 2113 case PR_SET_UNALIGN: 2114 error = SET_UNALIGN_CTL(me, arg2); 2115 break; 2116 case PR_GET_UNALIGN: 2117 error = GET_UNALIGN_CTL(me, arg2); 2118 break; 2119 case PR_SET_FPEMU: 2120 error = SET_FPEMU_CTL(me, arg2); 2121 break; 2122 case PR_GET_FPEMU: 2123 error = GET_FPEMU_CTL(me, arg2); 2124 break; 2125 case PR_SET_FPEXC: 2126 error = SET_FPEXC_CTL(me, arg2); 2127 break; 2128 case PR_GET_FPEXC: 2129 error = GET_FPEXC_CTL(me, arg2); 2130 break; 2131 case PR_GET_TIMING: 2132 error = PR_TIMING_STATISTICAL; 2133 break; 2134 case PR_SET_TIMING: 2135 if (arg2 != PR_TIMING_STATISTICAL) 2136 error = -EINVAL; 2137 break; 2138 case PR_SET_NAME: 2139 comm[sizeof(me->comm) - 1] = 0; 2140 if (strncpy_from_user(comm, (char __user *)arg2, 2141 sizeof(me->comm) - 1) < 0) 2142 return -EFAULT; 2143 set_task_comm(me, comm); 2144 proc_comm_connector(me); 2145 break; 2146 case PR_GET_NAME: 2147 get_task_comm(comm, me); 2148 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2149 return -EFAULT; 2150 break; 2151 case PR_GET_ENDIAN: 2152 error = GET_ENDIAN(me, arg2); 2153 break; 2154 case PR_SET_ENDIAN: 2155 error = SET_ENDIAN(me, arg2); 2156 break; 2157 case PR_GET_SECCOMP: 2158 error = prctl_get_seccomp(); 2159 break; 2160 case PR_SET_SECCOMP: 2161 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2162 break; 2163 case PR_GET_TSC: 2164 error = GET_TSC_CTL(arg2); 2165 break; 2166 case PR_SET_TSC: 2167 error = SET_TSC_CTL(arg2); 2168 break; 2169 case PR_TASK_PERF_EVENTS_DISABLE: 2170 error = perf_event_task_disable(); 2171 break; 2172 case PR_TASK_PERF_EVENTS_ENABLE: 2173 error = perf_event_task_enable(); 2174 break; 2175 case PR_GET_TIMERSLACK: 2176 error = current->timer_slack_ns; 2177 break; 2178 case PR_SET_TIMERSLACK: 2179 if (arg2 <= 0) 2180 current->timer_slack_ns = 2181 current->default_timer_slack_ns; 2182 else 2183 current->timer_slack_ns = arg2; 2184 break; 2185 case PR_MCE_KILL: 2186 if (arg4 | arg5) 2187 return -EINVAL; 2188 switch (arg2) { 2189 case PR_MCE_KILL_CLEAR: 2190 if (arg3 != 0) 2191 return -EINVAL; 2192 current->flags &= ~PF_MCE_PROCESS; 2193 break; 2194 case PR_MCE_KILL_SET: 2195 current->flags |= PF_MCE_PROCESS; 2196 if (arg3 == PR_MCE_KILL_EARLY) 2197 current->flags |= PF_MCE_EARLY; 2198 else if (arg3 == PR_MCE_KILL_LATE) 2199 current->flags &= ~PF_MCE_EARLY; 2200 else if (arg3 == PR_MCE_KILL_DEFAULT) 2201 current->flags &= 2202 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2203 else 2204 return -EINVAL; 2205 break; 2206 default: 2207 return -EINVAL; 2208 } 2209 break; 2210 case PR_MCE_KILL_GET: 2211 if (arg2 | arg3 | arg4 | arg5) 2212 return -EINVAL; 2213 if (current->flags & PF_MCE_PROCESS) 2214 error = (current->flags & PF_MCE_EARLY) ? 2215 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2216 else 2217 error = PR_MCE_KILL_DEFAULT; 2218 break; 2219 case PR_SET_MM: 2220 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2221 break; 2222 case PR_GET_TID_ADDRESS: 2223 error = prctl_get_tid_address(me, (int __user **)arg2); 2224 break; 2225 case PR_SET_CHILD_SUBREAPER: 2226 me->signal->is_child_subreaper = !!arg2; 2227 break; 2228 case PR_GET_CHILD_SUBREAPER: 2229 error = put_user(me->signal->is_child_subreaper, 2230 (int __user *)arg2); 2231 break; 2232 case PR_SET_NO_NEW_PRIVS: 2233 if (arg2 != 1 || arg3 || arg4 || arg5) 2234 return -EINVAL; 2235 2236 current->no_new_privs = 1; 2237 break; 2238 case PR_GET_NO_NEW_PRIVS: 2239 if (arg2 || arg3 || arg4 || arg5) 2240 return -EINVAL; 2241 return current->no_new_privs ? 1 : 0; 2242 default: 2243 error = -EINVAL; 2244 break; 2245 } 2246 return error; 2247 } 2248 2249 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2250 struct getcpu_cache __user *, unused) 2251 { 2252 int err = 0; 2253 int cpu = raw_smp_processor_id(); 2254 if (cpup) 2255 err |= put_user(cpu, cpup); 2256 if (nodep) 2257 err |= put_user(cpu_to_node(cpu), nodep); 2258 return err ? -EFAULT : 0; 2259 } 2260 2261 char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2262 2263 static int __orderly_poweroff(bool force) 2264 { 2265 char **argv; 2266 static char *envp[] = { 2267 "HOME=/", 2268 "PATH=/sbin:/bin:/usr/sbin:/usr/bin", 2269 NULL 2270 }; 2271 int ret; 2272 2273 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); 2274 if (argv) { 2275 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); 2276 argv_free(argv); 2277 } else { 2278 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2279 __func__, poweroff_cmd); 2280 ret = -ENOMEM; 2281 } 2282 2283 if (ret && force) { 2284 printk(KERN_WARNING "Failed to start orderly shutdown: " 2285 "forcing the issue\n"); 2286 /* 2287 * I guess this should try to kick off some daemon to sync and 2288 * poweroff asap. Or not even bother syncing if we're doing an 2289 * emergency shutdown? 2290 */ 2291 emergency_sync(); 2292 kernel_power_off(); 2293 } 2294 2295 return ret; 2296 } 2297 2298 static bool poweroff_force; 2299 2300 static void poweroff_work_func(struct work_struct *work) 2301 { 2302 __orderly_poweroff(poweroff_force); 2303 } 2304 2305 static DECLARE_WORK(poweroff_work, poweroff_work_func); 2306 2307 /** 2308 * orderly_poweroff - Trigger an orderly system poweroff 2309 * @force: force poweroff if command execution fails 2310 * 2311 * This may be called from any context to trigger a system shutdown. 2312 * If the orderly shutdown fails, it will force an immediate shutdown. 2313 */ 2314 int orderly_poweroff(bool force) 2315 { 2316 if (force) /* do not override the pending "true" */ 2317 poweroff_force = true; 2318 schedule_work(&poweroff_work); 2319 return 0; 2320 } 2321 EXPORT_SYMBOL_GPL(orderly_poweroff); 2322 2323 /** 2324 * do_sysinfo - fill in sysinfo struct 2325 * @info: pointer to buffer to fill 2326 */ 2327 static int do_sysinfo(struct sysinfo *info) 2328 { 2329 unsigned long mem_total, sav_total; 2330 unsigned int mem_unit, bitcount; 2331 struct timespec tp; 2332 2333 memset(info, 0, sizeof(struct sysinfo)); 2334 2335 ktime_get_ts(&tp); 2336 monotonic_to_bootbased(&tp); 2337 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2338 2339 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2340 2341 info->procs = nr_threads; 2342 2343 si_meminfo(info); 2344 si_swapinfo(info); 2345 2346 /* 2347 * If the sum of all the available memory (i.e. ram + swap) 2348 * is less than can be stored in a 32 bit unsigned long then 2349 * we can be binary compatible with 2.2.x kernels. If not, 2350 * well, in that case 2.2.x was broken anyways... 2351 * 2352 * -Erik Andersen <andersee@debian.org> 2353 */ 2354 2355 mem_total = info->totalram + info->totalswap; 2356 if (mem_total < info->totalram || mem_total < info->totalswap) 2357 goto out; 2358 bitcount = 0; 2359 mem_unit = info->mem_unit; 2360 while (mem_unit > 1) { 2361 bitcount++; 2362 mem_unit >>= 1; 2363 sav_total = mem_total; 2364 mem_total <<= 1; 2365 if (mem_total < sav_total) 2366 goto out; 2367 } 2368 2369 /* 2370 * If mem_total did not overflow, multiply all memory values by 2371 * info->mem_unit and set it to 1. This leaves things compatible 2372 * with 2.2.x, and also retains compatibility with earlier 2.4.x 2373 * kernels... 2374 */ 2375 2376 info->mem_unit = 1; 2377 info->totalram <<= bitcount; 2378 info->freeram <<= bitcount; 2379 info->sharedram <<= bitcount; 2380 info->bufferram <<= bitcount; 2381 info->totalswap <<= bitcount; 2382 info->freeswap <<= bitcount; 2383 info->totalhigh <<= bitcount; 2384 info->freehigh <<= bitcount; 2385 2386 out: 2387 return 0; 2388 } 2389 2390 SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 2391 { 2392 struct sysinfo val; 2393 2394 do_sysinfo(&val); 2395 2396 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 2397 return -EFAULT; 2398 2399 return 0; 2400 } 2401 2402 #ifdef CONFIG_COMPAT 2403 struct compat_sysinfo { 2404 s32 uptime; 2405 u32 loads[3]; 2406 u32 totalram; 2407 u32 freeram; 2408 u32 sharedram; 2409 u32 bufferram; 2410 u32 totalswap; 2411 u32 freeswap; 2412 u16 procs; 2413 u16 pad; 2414 u32 totalhigh; 2415 u32 freehigh; 2416 u32 mem_unit; 2417 char _f[20-2*sizeof(u32)-sizeof(int)]; 2418 }; 2419 2420 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) 2421 { 2422 struct sysinfo s; 2423 2424 do_sysinfo(&s); 2425 2426 /* Check to see if any memory value is too large for 32-bit and scale 2427 * down if needed 2428 */ 2429 if ((s.totalram >> 32) || (s.totalswap >> 32)) { 2430 int bitcount = 0; 2431 2432 while (s.mem_unit < PAGE_SIZE) { 2433 s.mem_unit <<= 1; 2434 bitcount++; 2435 } 2436 2437 s.totalram >>= bitcount; 2438 s.freeram >>= bitcount; 2439 s.sharedram >>= bitcount; 2440 s.bufferram >>= bitcount; 2441 s.totalswap >>= bitcount; 2442 s.freeswap >>= bitcount; 2443 s.totalhigh >>= bitcount; 2444 s.freehigh >>= bitcount; 2445 } 2446 2447 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || 2448 __put_user(s.uptime, &info->uptime) || 2449 __put_user(s.loads[0], &info->loads[0]) || 2450 __put_user(s.loads[1], &info->loads[1]) || 2451 __put_user(s.loads[2], &info->loads[2]) || 2452 __put_user(s.totalram, &info->totalram) || 2453 __put_user(s.freeram, &info->freeram) || 2454 __put_user(s.sharedram, &info->sharedram) || 2455 __put_user(s.bufferram, &info->bufferram) || 2456 __put_user(s.totalswap, &info->totalswap) || 2457 __put_user(s.freeswap, &info->freeswap) || 2458 __put_user(s.procs, &info->procs) || 2459 __put_user(s.totalhigh, &info->totalhigh) || 2460 __put_user(s.freehigh, &info->freehigh) || 2461 __put_user(s.mem_unit, &info->mem_unit)) 2462 return -EFAULT; 2463 2464 return 0; 2465 } 2466 #endif /* CONFIG_COMPAT */ 2467