1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/workqueue.h> 20 #include <linux/capability.h> 21 #include <linux/device.h> 22 #include <linux/key.h> 23 #include <linux/times.h> 24 #include <linux/posix-timers.h> 25 #include <linux/security.h> 26 #include <linux/dcookies.h> 27 #include <linux/suspend.h> 28 #include <linux/tty.h> 29 #include <linux/signal.h> 30 #include <linux/cn_proc.h> 31 #include <linux/getcpu.h> 32 #include <linux/task_io_accounting_ops.h> 33 #include <linux/seccomp.h> 34 #include <linux/cpu.h> 35 #include <linux/personality.h> 36 #include <linux/ptrace.h> 37 #include <linux/fs_struct.h> 38 #include <linux/file.h> 39 #include <linux/mount.h> 40 #include <linux/gfp.h> 41 #include <linux/syscore_ops.h> 42 #include <linux/version.h> 43 #include <linux/ctype.h> 44 45 #include <linux/compat.h> 46 #include <linux/syscalls.h> 47 #include <linux/kprobes.h> 48 #include <linux/user_namespace.h> 49 #include <linux/binfmts.h> 50 51 #include <linux/sched.h> 52 #include <linux/rcupdate.h> 53 #include <linux/uidgid.h> 54 #include <linux/cred.h> 55 56 #include <linux/kmsg_dump.h> 57 /* Move somewhere else to avoid recompiling? */ 58 #include <generated/utsrelease.h> 59 60 #include <asm/uaccess.h> 61 #include <asm/io.h> 62 #include <asm/unistd.h> 63 64 #ifndef SET_UNALIGN_CTL 65 # define SET_UNALIGN_CTL(a, b) (-EINVAL) 66 #endif 67 #ifndef GET_UNALIGN_CTL 68 # define GET_UNALIGN_CTL(a, b) (-EINVAL) 69 #endif 70 #ifndef SET_FPEMU_CTL 71 # define SET_FPEMU_CTL(a, b) (-EINVAL) 72 #endif 73 #ifndef GET_FPEMU_CTL 74 # define GET_FPEMU_CTL(a, b) (-EINVAL) 75 #endif 76 #ifndef SET_FPEXC_CTL 77 # define SET_FPEXC_CTL(a, b) (-EINVAL) 78 #endif 79 #ifndef GET_FPEXC_CTL 80 # define GET_FPEXC_CTL(a, b) (-EINVAL) 81 #endif 82 #ifndef GET_ENDIAN 83 # define GET_ENDIAN(a, b) (-EINVAL) 84 #endif 85 #ifndef SET_ENDIAN 86 # define SET_ENDIAN(a, b) (-EINVAL) 87 #endif 88 #ifndef GET_TSC_CTL 89 # define GET_TSC_CTL(a) (-EINVAL) 90 #endif 91 #ifndef SET_TSC_CTL 92 # define SET_TSC_CTL(a) (-EINVAL) 93 #endif 94 #ifndef MPX_ENABLE_MANAGEMENT 95 # define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) 96 #endif 97 #ifndef MPX_DISABLE_MANAGEMENT 98 # define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) 99 #endif 100 101 /* 102 * this is where the system-wide overflow UID and GID are defined, for 103 * architectures that now have 32-bit UID/GID but didn't in the past 104 */ 105 106 int overflowuid = DEFAULT_OVERFLOWUID; 107 int overflowgid = DEFAULT_OVERFLOWGID; 108 109 EXPORT_SYMBOL(overflowuid); 110 EXPORT_SYMBOL(overflowgid); 111 112 /* 113 * the same as above, but for filesystems which can only store a 16-bit 114 * UID and GID. as such, this is needed on all architectures 115 */ 116 117 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 118 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 119 120 EXPORT_SYMBOL(fs_overflowuid); 121 EXPORT_SYMBOL(fs_overflowgid); 122 123 /* 124 * Returns true if current's euid is same as p's uid or euid, 125 * or has CAP_SYS_NICE to p's user_ns. 126 * 127 * Called with rcu_read_lock, creds are safe 128 */ 129 static bool set_one_prio_perm(struct task_struct *p) 130 { 131 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 132 133 if (uid_eq(pcred->uid, cred->euid) || 134 uid_eq(pcred->euid, cred->euid)) 135 return true; 136 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 137 return true; 138 return false; 139 } 140 141 /* 142 * set the priority of a task 143 * - the caller must hold the RCU read lock 144 */ 145 static int set_one_prio(struct task_struct *p, int niceval, int error) 146 { 147 int no_nice; 148 149 if (!set_one_prio_perm(p)) { 150 error = -EPERM; 151 goto out; 152 } 153 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 154 error = -EACCES; 155 goto out; 156 } 157 no_nice = security_task_setnice(p, niceval); 158 if (no_nice) { 159 error = no_nice; 160 goto out; 161 } 162 if (error == -ESRCH) 163 error = 0; 164 set_user_nice(p, niceval); 165 out: 166 return error; 167 } 168 169 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 170 { 171 struct task_struct *g, *p; 172 struct user_struct *user; 173 const struct cred *cred = current_cred(); 174 int error = -EINVAL; 175 struct pid *pgrp; 176 kuid_t uid; 177 178 if (which > PRIO_USER || which < PRIO_PROCESS) 179 goto out; 180 181 /* normalize: avoid signed division (rounding problems) */ 182 error = -ESRCH; 183 if (niceval < MIN_NICE) 184 niceval = MIN_NICE; 185 if (niceval > MAX_NICE) 186 niceval = MAX_NICE; 187 188 rcu_read_lock(); 189 read_lock(&tasklist_lock); 190 switch (which) { 191 case PRIO_PROCESS: 192 if (who) 193 p = find_task_by_vpid(who); 194 else 195 p = current; 196 if (p) 197 error = set_one_prio(p, niceval, error); 198 break; 199 case PRIO_PGRP: 200 if (who) 201 pgrp = find_vpid(who); 202 else 203 pgrp = task_pgrp(current); 204 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 205 error = set_one_prio(p, niceval, error); 206 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 207 break; 208 case PRIO_USER: 209 uid = make_kuid(cred->user_ns, who); 210 user = cred->user; 211 if (!who) 212 uid = cred->uid; 213 else if (!uid_eq(uid, cred->uid)) { 214 user = find_user(uid); 215 if (!user) 216 goto out_unlock; /* No processes for this user */ 217 } 218 do_each_thread(g, p) { 219 if (uid_eq(task_uid(p), uid)) 220 error = set_one_prio(p, niceval, error); 221 } while_each_thread(g, p); 222 if (!uid_eq(uid, cred->uid)) 223 free_uid(user); /* For find_user() */ 224 break; 225 } 226 out_unlock: 227 read_unlock(&tasklist_lock); 228 rcu_read_unlock(); 229 out: 230 return error; 231 } 232 233 /* 234 * Ugh. To avoid negative return values, "getpriority()" will 235 * not return the normal nice-value, but a negated value that 236 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 237 * to stay compatible. 238 */ 239 SYSCALL_DEFINE2(getpriority, int, which, int, who) 240 { 241 struct task_struct *g, *p; 242 struct user_struct *user; 243 const struct cred *cred = current_cred(); 244 long niceval, retval = -ESRCH; 245 struct pid *pgrp; 246 kuid_t uid; 247 248 if (which > PRIO_USER || which < PRIO_PROCESS) 249 return -EINVAL; 250 251 rcu_read_lock(); 252 read_lock(&tasklist_lock); 253 switch (which) { 254 case PRIO_PROCESS: 255 if (who) 256 p = find_task_by_vpid(who); 257 else 258 p = current; 259 if (p) { 260 niceval = nice_to_rlimit(task_nice(p)); 261 if (niceval > retval) 262 retval = niceval; 263 } 264 break; 265 case PRIO_PGRP: 266 if (who) 267 pgrp = find_vpid(who); 268 else 269 pgrp = task_pgrp(current); 270 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 271 niceval = nice_to_rlimit(task_nice(p)); 272 if (niceval > retval) 273 retval = niceval; 274 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 275 break; 276 case PRIO_USER: 277 uid = make_kuid(cred->user_ns, who); 278 user = cred->user; 279 if (!who) 280 uid = cred->uid; 281 else if (!uid_eq(uid, cred->uid)) { 282 user = find_user(uid); 283 if (!user) 284 goto out_unlock; /* No processes for this user */ 285 } 286 do_each_thread(g, p) { 287 if (uid_eq(task_uid(p), uid)) { 288 niceval = nice_to_rlimit(task_nice(p)); 289 if (niceval > retval) 290 retval = niceval; 291 } 292 } while_each_thread(g, p); 293 if (!uid_eq(uid, cred->uid)) 294 free_uid(user); /* for find_user() */ 295 break; 296 } 297 out_unlock: 298 read_unlock(&tasklist_lock); 299 rcu_read_unlock(); 300 301 return retval; 302 } 303 304 /* 305 * Unprivileged users may change the real gid to the effective gid 306 * or vice versa. (BSD-style) 307 * 308 * If you set the real gid at all, or set the effective gid to a value not 309 * equal to the real gid, then the saved gid is set to the new effective gid. 310 * 311 * This makes it possible for a setgid program to completely drop its 312 * privileges, which is often a useful assertion to make when you are doing 313 * a security audit over a program. 314 * 315 * The general idea is that a program which uses just setregid() will be 316 * 100% compatible with BSD. A program which uses just setgid() will be 317 * 100% compatible with POSIX with saved IDs. 318 * 319 * SMP: There are not races, the GIDs are checked only by filesystem 320 * operations (as far as semantic preservation is concerned). 321 */ 322 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 323 { 324 struct user_namespace *ns = current_user_ns(); 325 const struct cred *old; 326 struct cred *new; 327 int retval; 328 kgid_t krgid, kegid; 329 330 krgid = make_kgid(ns, rgid); 331 kegid = make_kgid(ns, egid); 332 333 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 334 return -EINVAL; 335 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 336 return -EINVAL; 337 338 new = prepare_creds(); 339 if (!new) 340 return -ENOMEM; 341 old = current_cred(); 342 343 retval = -EPERM; 344 if (rgid != (gid_t) -1) { 345 if (gid_eq(old->gid, krgid) || 346 gid_eq(old->egid, krgid) || 347 ns_capable(old->user_ns, CAP_SETGID)) 348 new->gid = krgid; 349 else 350 goto error; 351 } 352 if (egid != (gid_t) -1) { 353 if (gid_eq(old->gid, kegid) || 354 gid_eq(old->egid, kegid) || 355 gid_eq(old->sgid, kegid) || 356 ns_capable(old->user_ns, CAP_SETGID)) 357 new->egid = kegid; 358 else 359 goto error; 360 } 361 362 if (rgid != (gid_t) -1 || 363 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 364 new->sgid = new->egid; 365 new->fsgid = new->egid; 366 367 return commit_creds(new); 368 369 error: 370 abort_creds(new); 371 return retval; 372 } 373 374 /* 375 * setgid() is implemented like SysV w/ SAVED_IDS 376 * 377 * SMP: Same implicit races as above. 378 */ 379 SYSCALL_DEFINE1(setgid, gid_t, gid) 380 { 381 struct user_namespace *ns = current_user_ns(); 382 const struct cred *old; 383 struct cred *new; 384 int retval; 385 kgid_t kgid; 386 387 kgid = make_kgid(ns, gid); 388 if (!gid_valid(kgid)) 389 return -EINVAL; 390 391 new = prepare_creds(); 392 if (!new) 393 return -ENOMEM; 394 old = current_cred(); 395 396 retval = -EPERM; 397 if (ns_capable(old->user_ns, CAP_SETGID)) 398 new->gid = new->egid = new->sgid = new->fsgid = kgid; 399 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 400 new->egid = new->fsgid = kgid; 401 else 402 goto error; 403 404 return commit_creds(new); 405 406 error: 407 abort_creds(new); 408 return retval; 409 } 410 411 /* 412 * change the user struct in a credentials set to match the new UID 413 */ 414 static int set_user(struct cred *new) 415 { 416 struct user_struct *new_user; 417 418 new_user = alloc_uid(new->uid); 419 if (!new_user) 420 return -EAGAIN; 421 422 /* 423 * We don't fail in case of NPROC limit excess here because too many 424 * poorly written programs don't check set*uid() return code, assuming 425 * it never fails if called by root. We may still enforce NPROC limit 426 * for programs doing set*uid()+execve() by harmlessly deferring the 427 * failure to the execve() stage. 428 */ 429 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 430 new_user != INIT_USER) 431 current->flags |= PF_NPROC_EXCEEDED; 432 else 433 current->flags &= ~PF_NPROC_EXCEEDED; 434 435 free_uid(new->user); 436 new->user = new_user; 437 return 0; 438 } 439 440 /* 441 * Unprivileged users may change the real uid to the effective uid 442 * or vice versa. (BSD-style) 443 * 444 * If you set the real uid at all, or set the effective uid to a value not 445 * equal to the real uid, then the saved uid is set to the new effective uid. 446 * 447 * This makes it possible for a setuid program to completely drop its 448 * privileges, which is often a useful assertion to make when you are doing 449 * a security audit over a program. 450 * 451 * The general idea is that a program which uses just setreuid() will be 452 * 100% compatible with BSD. A program which uses just setuid() will be 453 * 100% compatible with POSIX with saved IDs. 454 */ 455 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 456 { 457 struct user_namespace *ns = current_user_ns(); 458 const struct cred *old; 459 struct cred *new; 460 int retval; 461 kuid_t kruid, keuid; 462 463 kruid = make_kuid(ns, ruid); 464 keuid = make_kuid(ns, euid); 465 466 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 467 return -EINVAL; 468 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 469 return -EINVAL; 470 471 new = prepare_creds(); 472 if (!new) 473 return -ENOMEM; 474 old = current_cred(); 475 476 retval = -EPERM; 477 if (ruid != (uid_t) -1) { 478 new->uid = kruid; 479 if (!uid_eq(old->uid, kruid) && 480 !uid_eq(old->euid, kruid) && 481 !ns_capable(old->user_ns, CAP_SETUID)) 482 goto error; 483 } 484 485 if (euid != (uid_t) -1) { 486 new->euid = keuid; 487 if (!uid_eq(old->uid, keuid) && 488 !uid_eq(old->euid, keuid) && 489 !uid_eq(old->suid, keuid) && 490 !ns_capable(old->user_ns, CAP_SETUID)) 491 goto error; 492 } 493 494 if (!uid_eq(new->uid, old->uid)) { 495 retval = set_user(new); 496 if (retval < 0) 497 goto error; 498 } 499 if (ruid != (uid_t) -1 || 500 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 501 new->suid = new->euid; 502 new->fsuid = new->euid; 503 504 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 505 if (retval < 0) 506 goto error; 507 508 return commit_creds(new); 509 510 error: 511 abort_creds(new); 512 return retval; 513 } 514 515 /* 516 * setuid() is implemented like SysV with SAVED_IDS 517 * 518 * Note that SAVED_ID's is deficient in that a setuid root program 519 * like sendmail, for example, cannot set its uid to be a normal 520 * user and then switch back, because if you're root, setuid() sets 521 * the saved uid too. If you don't like this, blame the bright people 522 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 523 * will allow a root program to temporarily drop privileges and be able to 524 * regain them by swapping the real and effective uid. 525 */ 526 SYSCALL_DEFINE1(setuid, uid_t, uid) 527 { 528 struct user_namespace *ns = current_user_ns(); 529 const struct cred *old; 530 struct cred *new; 531 int retval; 532 kuid_t kuid; 533 534 kuid = make_kuid(ns, uid); 535 if (!uid_valid(kuid)) 536 return -EINVAL; 537 538 new = prepare_creds(); 539 if (!new) 540 return -ENOMEM; 541 old = current_cred(); 542 543 retval = -EPERM; 544 if (ns_capable(old->user_ns, CAP_SETUID)) { 545 new->suid = new->uid = kuid; 546 if (!uid_eq(kuid, old->uid)) { 547 retval = set_user(new); 548 if (retval < 0) 549 goto error; 550 } 551 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 552 goto error; 553 } 554 555 new->fsuid = new->euid = kuid; 556 557 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 558 if (retval < 0) 559 goto error; 560 561 return commit_creds(new); 562 563 error: 564 abort_creds(new); 565 return retval; 566 } 567 568 569 /* 570 * This function implements a generic ability to update ruid, euid, 571 * and suid. This allows you to implement the 4.4 compatible seteuid(). 572 */ 573 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 574 { 575 struct user_namespace *ns = current_user_ns(); 576 const struct cred *old; 577 struct cred *new; 578 int retval; 579 kuid_t kruid, keuid, ksuid; 580 581 kruid = make_kuid(ns, ruid); 582 keuid = make_kuid(ns, euid); 583 ksuid = make_kuid(ns, suid); 584 585 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 586 return -EINVAL; 587 588 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 589 return -EINVAL; 590 591 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 592 return -EINVAL; 593 594 new = prepare_creds(); 595 if (!new) 596 return -ENOMEM; 597 598 old = current_cred(); 599 600 retval = -EPERM; 601 if (!ns_capable(old->user_ns, CAP_SETUID)) { 602 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 603 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 604 goto error; 605 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 606 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 607 goto error; 608 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 609 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 610 goto error; 611 } 612 613 if (ruid != (uid_t) -1) { 614 new->uid = kruid; 615 if (!uid_eq(kruid, old->uid)) { 616 retval = set_user(new); 617 if (retval < 0) 618 goto error; 619 } 620 } 621 if (euid != (uid_t) -1) 622 new->euid = keuid; 623 if (suid != (uid_t) -1) 624 new->suid = ksuid; 625 new->fsuid = new->euid; 626 627 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 628 if (retval < 0) 629 goto error; 630 631 return commit_creds(new); 632 633 error: 634 abort_creds(new); 635 return retval; 636 } 637 638 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 639 { 640 const struct cred *cred = current_cred(); 641 int retval; 642 uid_t ruid, euid, suid; 643 644 ruid = from_kuid_munged(cred->user_ns, cred->uid); 645 euid = from_kuid_munged(cred->user_ns, cred->euid); 646 suid = from_kuid_munged(cred->user_ns, cred->suid); 647 648 retval = put_user(ruid, ruidp); 649 if (!retval) { 650 retval = put_user(euid, euidp); 651 if (!retval) 652 return put_user(suid, suidp); 653 } 654 return retval; 655 } 656 657 /* 658 * Same as above, but for rgid, egid, sgid. 659 */ 660 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 661 { 662 struct user_namespace *ns = current_user_ns(); 663 const struct cred *old; 664 struct cred *new; 665 int retval; 666 kgid_t krgid, kegid, ksgid; 667 668 krgid = make_kgid(ns, rgid); 669 kegid = make_kgid(ns, egid); 670 ksgid = make_kgid(ns, sgid); 671 672 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 673 return -EINVAL; 674 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 675 return -EINVAL; 676 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 677 return -EINVAL; 678 679 new = prepare_creds(); 680 if (!new) 681 return -ENOMEM; 682 old = current_cred(); 683 684 retval = -EPERM; 685 if (!ns_capable(old->user_ns, CAP_SETGID)) { 686 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 687 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 688 goto error; 689 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 690 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 691 goto error; 692 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 693 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 694 goto error; 695 } 696 697 if (rgid != (gid_t) -1) 698 new->gid = krgid; 699 if (egid != (gid_t) -1) 700 new->egid = kegid; 701 if (sgid != (gid_t) -1) 702 new->sgid = ksgid; 703 new->fsgid = new->egid; 704 705 return commit_creds(new); 706 707 error: 708 abort_creds(new); 709 return retval; 710 } 711 712 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 713 { 714 const struct cred *cred = current_cred(); 715 int retval; 716 gid_t rgid, egid, sgid; 717 718 rgid = from_kgid_munged(cred->user_ns, cred->gid); 719 egid = from_kgid_munged(cred->user_ns, cred->egid); 720 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 721 722 retval = put_user(rgid, rgidp); 723 if (!retval) { 724 retval = put_user(egid, egidp); 725 if (!retval) 726 retval = put_user(sgid, sgidp); 727 } 728 729 return retval; 730 } 731 732 733 /* 734 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 735 * is used for "access()" and for the NFS daemon (letting nfsd stay at 736 * whatever uid it wants to). It normally shadows "euid", except when 737 * explicitly set by setfsuid() or for access.. 738 */ 739 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 740 { 741 const struct cred *old; 742 struct cred *new; 743 uid_t old_fsuid; 744 kuid_t kuid; 745 746 old = current_cred(); 747 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 748 749 kuid = make_kuid(old->user_ns, uid); 750 if (!uid_valid(kuid)) 751 return old_fsuid; 752 753 new = prepare_creds(); 754 if (!new) 755 return old_fsuid; 756 757 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 758 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 759 ns_capable(old->user_ns, CAP_SETUID)) { 760 if (!uid_eq(kuid, old->fsuid)) { 761 new->fsuid = kuid; 762 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 763 goto change_okay; 764 } 765 } 766 767 abort_creds(new); 768 return old_fsuid; 769 770 change_okay: 771 commit_creds(new); 772 return old_fsuid; 773 } 774 775 /* 776 * Samma på svenska.. 777 */ 778 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 779 { 780 const struct cred *old; 781 struct cred *new; 782 gid_t old_fsgid; 783 kgid_t kgid; 784 785 old = current_cred(); 786 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 787 788 kgid = make_kgid(old->user_ns, gid); 789 if (!gid_valid(kgid)) 790 return old_fsgid; 791 792 new = prepare_creds(); 793 if (!new) 794 return old_fsgid; 795 796 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 797 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 798 ns_capable(old->user_ns, CAP_SETGID)) { 799 if (!gid_eq(kgid, old->fsgid)) { 800 new->fsgid = kgid; 801 goto change_okay; 802 } 803 } 804 805 abort_creds(new); 806 return old_fsgid; 807 808 change_okay: 809 commit_creds(new); 810 return old_fsgid; 811 } 812 813 /** 814 * sys_getpid - return the thread group id of the current process 815 * 816 * Note, despite the name, this returns the tgid not the pid. The tgid and 817 * the pid are identical unless CLONE_THREAD was specified on clone() in 818 * which case the tgid is the same in all threads of the same group. 819 * 820 * This is SMP safe as current->tgid does not change. 821 */ 822 SYSCALL_DEFINE0(getpid) 823 { 824 return task_tgid_vnr(current); 825 } 826 827 /* Thread ID - the internal kernel "pid" */ 828 SYSCALL_DEFINE0(gettid) 829 { 830 return task_pid_vnr(current); 831 } 832 833 /* 834 * Accessing ->real_parent is not SMP-safe, it could 835 * change from under us. However, we can use a stale 836 * value of ->real_parent under rcu_read_lock(), see 837 * release_task()->call_rcu(delayed_put_task_struct). 838 */ 839 SYSCALL_DEFINE0(getppid) 840 { 841 int pid; 842 843 rcu_read_lock(); 844 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 845 rcu_read_unlock(); 846 847 return pid; 848 } 849 850 SYSCALL_DEFINE0(getuid) 851 { 852 /* Only we change this so SMP safe */ 853 return from_kuid_munged(current_user_ns(), current_uid()); 854 } 855 856 SYSCALL_DEFINE0(geteuid) 857 { 858 /* Only we change this so SMP safe */ 859 return from_kuid_munged(current_user_ns(), current_euid()); 860 } 861 862 SYSCALL_DEFINE0(getgid) 863 { 864 /* Only we change this so SMP safe */ 865 return from_kgid_munged(current_user_ns(), current_gid()); 866 } 867 868 SYSCALL_DEFINE0(getegid) 869 { 870 /* Only we change this so SMP safe */ 871 return from_kgid_munged(current_user_ns(), current_egid()); 872 } 873 874 void do_sys_times(struct tms *tms) 875 { 876 cputime_t tgutime, tgstime, cutime, cstime; 877 878 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 879 cutime = current->signal->cutime; 880 cstime = current->signal->cstime; 881 tms->tms_utime = cputime_to_clock_t(tgutime); 882 tms->tms_stime = cputime_to_clock_t(tgstime); 883 tms->tms_cutime = cputime_to_clock_t(cutime); 884 tms->tms_cstime = cputime_to_clock_t(cstime); 885 } 886 887 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 888 { 889 if (tbuf) { 890 struct tms tmp; 891 892 do_sys_times(&tmp); 893 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 894 return -EFAULT; 895 } 896 force_successful_syscall_return(); 897 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 898 } 899 900 /* 901 * This needs some heavy checking ... 902 * I just haven't the stomach for it. I also don't fully 903 * understand sessions/pgrp etc. Let somebody who does explain it. 904 * 905 * OK, I think I have the protection semantics right.... this is really 906 * only important on a multi-user system anyway, to make sure one user 907 * can't send a signal to a process owned by another. -TYT, 12/12/91 908 * 909 * !PF_FORKNOEXEC check to conform completely to POSIX. 910 */ 911 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 912 { 913 struct task_struct *p; 914 struct task_struct *group_leader = current->group_leader; 915 struct pid *pgrp; 916 int err; 917 918 if (!pid) 919 pid = task_pid_vnr(group_leader); 920 if (!pgid) 921 pgid = pid; 922 if (pgid < 0) 923 return -EINVAL; 924 rcu_read_lock(); 925 926 /* From this point forward we keep holding onto the tasklist lock 927 * so that our parent does not change from under us. -DaveM 928 */ 929 write_lock_irq(&tasklist_lock); 930 931 err = -ESRCH; 932 p = find_task_by_vpid(pid); 933 if (!p) 934 goto out; 935 936 err = -EINVAL; 937 if (!thread_group_leader(p)) 938 goto out; 939 940 if (same_thread_group(p->real_parent, group_leader)) { 941 err = -EPERM; 942 if (task_session(p) != task_session(group_leader)) 943 goto out; 944 err = -EACCES; 945 if (!(p->flags & PF_FORKNOEXEC)) 946 goto out; 947 } else { 948 err = -ESRCH; 949 if (p != group_leader) 950 goto out; 951 } 952 953 err = -EPERM; 954 if (p->signal->leader) 955 goto out; 956 957 pgrp = task_pid(p); 958 if (pgid != pid) { 959 struct task_struct *g; 960 961 pgrp = find_vpid(pgid); 962 g = pid_task(pgrp, PIDTYPE_PGID); 963 if (!g || task_session(g) != task_session(group_leader)) 964 goto out; 965 } 966 967 err = security_task_setpgid(p, pgid); 968 if (err) 969 goto out; 970 971 if (task_pgrp(p) != pgrp) 972 change_pid(p, PIDTYPE_PGID, pgrp); 973 974 err = 0; 975 out: 976 /* All paths lead to here, thus we are safe. -DaveM */ 977 write_unlock_irq(&tasklist_lock); 978 rcu_read_unlock(); 979 return err; 980 } 981 982 SYSCALL_DEFINE1(getpgid, pid_t, pid) 983 { 984 struct task_struct *p; 985 struct pid *grp; 986 int retval; 987 988 rcu_read_lock(); 989 if (!pid) 990 grp = task_pgrp(current); 991 else { 992 retval = -ESRCH; 993 p = find_task_by_vpid(pid); 994 if (!p) 995 goto out; 996 grp = task_pgrp(p); 997 if (!grp) 998 goto out; 999 1000 retval = security_task_getpgid(p); 1001 if (retval) 1002 goto out; 1003 } 1004 retval = pid_vnr(grp); 1005 out: 1006 rcu_read_unlock(); 1007 return retval; 1008 } 1009 1010 #ifdef __ARCH_WANT_SYS_GETPGRP 1011 1012 SYSCALL_DEFINE0(getpgrp) 1013 { 1014 return sys_getpgid(0); 1015 } 1016 1017 #endif 1018 1019 SYSCALL_DEFINE1(getsid, pid_t, pid) 1020 { 1021 struct task_struct *p; 1022 struct pid *sid; 1023 int retval; 1024 1025 rcu_read_lock(); 1026 if (!pid) 1027 sid = task_session(current); 1028 else { 1029 retval = -ESRCH; 1030 p = find_task_by_vpid(pid); 1031 if (!p) 1032 goto out; 1033 sid = task_session(p); 1034 if (!sid) 1035 goto out; 1036 1037 retval = security_task_getsid(p); 1038 if (retval) 1039 goto out; 1040 } 1041 retval = pid_vnr(sid); 1042 out: 1043 rcu_read_unlock(); 1044 return retval; 1045 } 1046 1047 static void set_special_pids(struct pid *pid) 1048 { 1049 struct task_struct *curr = current->group_leader; 1050 1051 if (task_session(curr) != pid) 1052 change_pid(curr, PIDTYPE_SID, pid); 1053 1054 if (task_pgrp(curr) != pid) 1055 change_pid(curr, PIDTYPE_PGID, pid); 1056 } 1057 1058 SYSCALL_DEFINE0(setsid) 1059 { 1060 struct task_struct *group_leader = current->group_leader; 1061 struct pid *sid = task_pid(group_leader); 1062 pid_t session = pid_vnr(sid); 1063 int err = -EPERM; 1064 1065 write_lock_irq(&tasklist_lock); 1066 /* Fail if I am already a session leader */ 1067 if (group_leader->signal->leader) 1068 goto out; 1069 1070 /* Fail if a process group id already exists that equals the 1071 * proposed session id. 1072 */ 1073 if (pid_task(sid, PIDTYPE_PGID)) 1074 goto out; 1075 1076 group_leader->signal->leader = 1; 1077 set_special_pids(sid); 1078 1079 proc_clear_tty(group_leader); 1080 1081 err = session; 1082 out: 1083 write_unlock_irq(&tasklist_lock); 1084 if (err > 0) { 1085 proc_sid_connector(group_leader); 1086 sched_autogroup_create_attach(group_leader); 1087 } 1088 return err; 1089 } 1090 1091 DECLARE_RWSEM(uts_sem); 1092 1093 #ifdef COMPAT_UTS_MACHINE 1094 #define override_architecture(name) \ 1095 (personality(current->personality) == PER_LINUX32 && \ 1096 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1097 sizeof(COMPAT_UTS_MACHINE))) 1098 #else 1099 #define override_architecture(name) 0 1100 #endif 1101 1102 /* 1103 * Work around broken programs that cannot handle "Linux 3.0". 1104 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1105 */ 1106 static int override_release(char __user *release, size_t len) 1107 { 1108 int ret = 0; 1109 1110 if (current->personality & UNAME26) { 1111 const char *rest = UTS_RELEASE; 1112 char buf[65] = { 0 }; 1113 int ndots = 0; 1114 unsigned v; 1115 size_t copy; 1116 1117 while (*rest) { 1118 if (*rest == '.' && ++ndots >= 3) 1119 break; 1120 if (!isdigit(*rest) && *rest != '.') 1121 break; 1122 rest++; 1123 } 1124 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; 1125 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1126 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1127 ret = copy_to_user(release, buf, copy + 1); 1128 } 1129 return ret; 1130 } 1131 1132 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1133 { 1134 int errno = 0; 1135 1136 down_read(&uts_sem); 1137 if (copy_to_user(name, utsname(), sizeof *name)) 1138 errno = -EFAULT; 1139 up_read(&uts_sem); 1140 1141 if (!errno && override_release(name->release, sizeof(name->release))) 1142 errno = -EFAULT; 1143 if (!errno && override_architecture(name)) 1144 errno = -EFAULT; 1145 return errno; 1146 } 1147 1148 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1149 /* 1150 * Old cruft 1151 */ 1152 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1153 { 1154 int error = 0; 1155 1156 if (!name) 1157 return -EFAULT; 1158 1159 down_read(&uts_sem); 1160 if (copy_to_user(name, utsname(), sizeof(*name))) 1161 error = -EFAULT; 1162 up_read(&uts_sem); 1163 1164 if (!error && override_release(name->release, sizeof(name->release))) 1165 error = -EFAULT; 1166 if (!error && override_architecture(name)) 1167 error = -EFAULT; 1168 return error; 1169 } 1170 1171 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1172 { 1173 int error; 1174 1175 if (!name) 1176 return -EFAULT; 1177 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1178 return -EFAULT; 1179 1180 down_read(&uts_sem); 1181 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1182 __OLD_UTS_LEN); 1183 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1184 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1185 __OLD_UTS_LEN); 1186 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1187 error |= __copy_to_user(&name->release, &utsname()->release, 1188 __OLD_UTS_LEN); 1189 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1190 error |= __copy_to_user(&name->version, &utsname()->version, 1191 __OLD_UTS_LEN); 1192 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1193 error |= __copy_to_user(&name->machine, &utsname()->machine, 1194 __OLD_UTS_LEN); 1195 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1196 up_read(&uts_sem); 1197 1198 if (!error && override_architecture(name)) 1199 error = -EFAULT; 1200 if (!error && override_release(name->release, sizeof(name->release))) 1201 error = -EFAULT; 1202 return error ? -EFAULT : 0; 1203 } 1204 #endif 1205 1206 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1207 { 1208 int errno; 1209 char tmp[__NEW_UTS_LEN]; 1210 1211 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1212 return -EPERM; 1213 1214 if (len < 0 || len > __NEW_UTS_LEN) 1215 return -EINVAL; 1216 down_write(&uts_sem); 1217 errno = -EFAULT; 1218 if (!copy_from_user(tmp, name, len)) { 1219 struct new_utsname *u = utsname(); 1220 1221 memcpy(u->nodename, tmp, len); 1222 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1223 errno = 0; 1224 uts_proc_notify(UTS_PROC_HOSTNAME); 1225 } 1226 up_write(&uts_sem); 1227 return errno; 1228 } 1229 1230 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1231 1232 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1233 { 1234 int i, errno; 1235 struct new_utsname *u; 1236 1237 if (len < 0) 1238 return -EINVAL; 1239 down_read(&uts_sem); 1240 u = utsname(); 1241 i = 1 + strlen(u->nodename); 1242 if (i > len) 1243 i = len; 1244 errno = 0; 1245 if (copy_to_user(name, u->nodename, i)) 1246 errno = -EFAULT; 1247 up_read(&uts_sem); 1248 return errno; 1249 } 1250 1251 #endif 1252 1253 /* 1254 * Only setdomainname; getdomainname can be implemented by calling 1255 * uname() 1256 */ 1257 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1258 { 1259 int errno; 1260 char tmp[__NEW_UTS_LEN]; 1261 1262 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1263 return -EPERM; 1264 if (len < 0 || len > __NEW_UTS_LEN) 1265 return -EINVAL; 1266 1267 down_write(&uts_sem); 1268 errno = -EFAULT; 1269 if (!copy_from_user(tmp, name, len)) { 1270 struct new_utsname *u = utsname(); 1271 1272 memcpy(u->domainname, tmp, len); 1273 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1274 errno = 0; 1275 uts_proc_notify(UTS_PROC_DOMAINNAME); 1276 } 1277 up_write(&uts_sem); 1278 return errno; 1279 } 1280 1281 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1282 { 1283 struct rlimit value; 1284 int ret; 1285 1286 ret = do_prlimit(current, resource, NULL, &value); 1287 if (!ret) 1288 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1289 1290 return ret; 1291 } 1292 1293 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1294 1295 /* 1296 * Back compatibility for getrlimit. Needed for some apps. 1297 */ 1298 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1299 struct rlimit __user *, rlim) 1300 { 1301 struct rlimit x; 1302 if (resource >= RLIM_NLIMITS) 1303 return -EINVAL; 1304 1305 task_lock(current->group_leader); 1306 x = current->signal->rlim[resource]; 1307 task_unlock(current->group_leader); 1308 if (x.rlim_cur > 0x7FFFFFFF) 1309 x.rlim_cur = 0x7FFFFFFF; 1310 if (x.rlim_max > 0x7FFFFFFF) 1311 x.rlim_max = 0x7FFFFFFF; 1312 return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; 1313 } 1314 1315 #endif 1316 1317 static inline bool rlim64_is_infinity(__u64 rlim64) 1318 { 1319 #if BITS_PER_LONG < 64 1320 return rlim64 >= ULONG_MAX; 1321 #else 1322 return rlim64 == RLIM64_INFINITY; 1323 #endif 1324 } 1325 1326 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1327 { 1328 if (rlim->rlim_cur == RLIM_INFINITY) 1329 rlim64->rlim_cur = RLIM64_INFINITY; 1330 else 1331 rlim64->rlim_cur = rlim->rlim_cur; 1332 if (rlim->rlim_max == RLIM_INFINITY) 1333 rlim64->rlim_max = RLIM64_INFINITY; 1334 else 1335 rlim64->rlim_max = rlim->rlim_max; 1336 } 1337 1338 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1339 { 1340 if (rlim64_is_infinity(rlim64->rlim_cur)) 1341 rlim->rlim_cur = RLIM_INFINITY; 1342 else 1343 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1344 if (rlim64_is_infinity(rlim64->rlim_max)) 1345 rlim->rlim_max = RLIM_INFINITY; 1346 else 1347 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1348 } 1349 1350 /* make sure you are allowed to change @tsk limits before calling this */ 1351 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1352 struct rlimit *new_rlim, struct rlimit *old_rlim) 1353 { 1354 struct rlimit *rlim; 1355 int retval = 0; 1356 1357 if (resource >= RLIM_NLIMITS) 1358 return -EINVAL; 1359 if (new_rlim) { 1360 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1361 return -EINVAL; 1362 if (resource == RLIMIT_NOFILE && 1363 new_rlim->rlim_max > sysctl_nr_open) 1364 return -EPERM; 1365 } 1366 1367 /* protect tsk->signal and tsk->sighand from disappearing */ 1368 read_lock(&tasklist_lock); 1369 if (!tsk->sighand) { 1370 retval = -ESRCH; 1371 goto out; 1372 } 1373 1374 rlim = tsk->signal->rlim + resource; 1375 task_lock(tsk->group_leader); 1376 if (new_rlim) { 1377 /* Keep the capable check against init_user_ns until 1378 cgroups can contain all limits */ 1379 if (new_rlim->rlim_max > rlim->rlim_max && 1380 !capable(CAP_SYS_RESOURCE)) 1381 retval = -EPERM; 1382 if (!retval) 1383 retval = security_task_setrlimit(tsk->group_leader, 1384 resource, new_rlim); 1385 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1386 /* 1387 * The caller is asking for an immediate RLIMIT_CPU 1388 * expiry. But we use the zero value to mean "it was 1389 * never set". So let's cheat and make it one second 1390 * instead 1391 */ 1392 new_rlim->rlim_cur = 1; 1393 } 1394 } 1395 if (!retval) { 1396 if (old_rlim) 1397 *old_rlim = *rlim; 1398 if (new_rlim) 1399 *rlim = *new_rlim; 1400 } 1401 task_unlock(tsk->group_leader); 1402 1403 /* 1404 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1405 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1406 * very long-standing error, and fixing it now risks breakage of 1407 * applications, so we live with it 1408 */ 1409 if (!retval && new_rlim && resource == RLIMIT_CPU && 1410 new_rlim->rlim_cur != RLIM_INFINITY) 1411 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1412 out: 1413 read_unlock(&tasklist_lock); 1414 return retval; 1415 } 1416 1417 /* rcu lock must be held */ 1418 static int check_prlimit_permission(struct task_struct *task) 1419 { 1420 const struct cred *cred = current_cred(), *tcred; 1421 1422 if (current == task) 1423 return 0; 1424 1425 tcred = __task_cred(task); 1426 if (uid_eq(cred->uid, tcred->euid) && 1427 uid_eq(cred->uid, tcred->suid) && 1428 uid_eq(cred->uid, tcred->uid) && 1429 gid_eq(cred->gid, tcred->egid) && 1430 gid_eq(cred->gid, tcred->sgid) && 1431 gid_eq(cred->gid, tcred->gid)) 1432 return 0; 1433 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1434 return 0; 1435 1436 return -EPERM; 1437 } 1438 1439 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1440 const struct rlimit64 __user *, new_rlim, 1441 struct rlimit64 __user *, old_rlim) 1442 { 1443 struct rlimit64 old64, new64; 1444 struct rlimit old, new; 1445 struct task_struct *tsk; 1446 int ret; 1447 1448 if (new_rlim) { 1449 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1450 return -EFAULT; 1451 rlim64_to_rlim(&new64, &new); 1452 } 1453 1454 rcu_read_lock(); 1455 tsk = pid ? find_task_by_vpid(pid) : current; 1456 if (!tsk) { 1457 rcu_read_unlock(); 1458 return -ESRCH; 1459 } 1460 ret = check_prlimit_permission(tsk); 1461 if (ret) { 1462 rcu_read_unlock(); 1463 return ret; 1464 } 1465 get_task_struct(tsk); 1466 rcu_read_unlock(); 1467 1468 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1469 old_rlim ? &old : NULL); 1470 1471 if (!ret && old_rlim) { 1472 rlim_to_rlim64(&old, &old64); 1473 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1474 ret = -EFAULT; 1475 } 1476 1477 put_task_struct(tsk); 1478 return ret; 1479 } 1480 1481 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1482 { 1483 struct rlimit new_rlim; 1484 1485 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1486 return -EFAULT; 1487 return do_prlimit(current, resource, &new_rlim, NULL); 1488 } 1489 1490 /* 1491 * It would make sense to put struct rusage in the task_struct, 1492 * except that would make the task_struct be *really big*. After 1493 * task_struct gets moved into malloc'ed memory, it would 1494 * make sense to do this. It will make moving the rest of the information 1495 * a lot simpler! (Which we're not doing right now because we're not 1496 * measuring them yet). 1497 * 1498 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1499 * races with threads incrementing their own counters. But since word 1500 * reads are atomic, we either get new values or old values and we don't 1501 * care which for the sums. We always take the siglock to protect reading 1502 * the c* fields from p->signal from races with exit.c updating those 1503 * fields when reaping, so a sample either gets all the additions of a 1504 * given child after it's reaped, or none so this sample is before reaping. 1505 * 1506 * Locking: 1507 * We need to take the siglock for CHILDEREN, SELF and BOTH 1508 * for the cases current multithreaded, non-current single threaded 1509 * non-current multithreaded. Thread traversal is now safe with 1510 * the siglock held. 1511 * Strictly speaking, we donot need to take the siglock if we are current and 1512 * single threaded, as no one else can take our signal_struct away, no one 1513 * else can reap the children to update signal->c* counters, and no one else 1514 * can race with the signal-> fields. If we do not take any lock, the 1515 * signal-> fields could be read out of order while another thread was just 1516 * exiting. So we should place a read memory barrier when we avoid the lock. 1517 * On the writer side, write memory barrier is implied in __exit_signal 1518 * as __exit_signal releases the siglock spinlock after updating the signal-> 1519 * fields. But we don't do this yet to keep things simple. 1520 * 1521 */ 1522 1523 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1524 { 1525 r->ru_nvcsw += t->nvcsw; 1526 r->ru_nivcsw += t->nivcsw; 1527 r->ru_minflt += t->min_flt; 1528 r->ru_majflt += t->maj_flt; 1529 r->ru_inblock += task_io_get_inblock(t); 1530 r->ru_oublock += task_io_get_oublock(t); 1531 } 1532 1533 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1534 { 1535 struct task_struct *t; 1536 unsigned long flags; 1537 cputime_t tgutime, tgstime, utime, stime; 1538 unsigned long maxrss = 0; 1539 1540 memset((char *)r, 0, sizeof (*r)); 1541 utime = stime = 0; 1542 1543 if (who == RUSAGE_THREAD) { 1544 task_cputime_adjusted(current, &utime, &stime); 1545 accumulate_thread_rusage(p, r); 1546 maxrss = p->signal->maxrss; 1547 goto out; 1548 } 1549 1550 if (!lock_task_sighand(p, &flags)) 1551 return; 1552 1553 switch (who) { 1554 case RUSAGE_BOTH: 1555 case RUSAGE_CHILDREN: 1556 utime = p->signal->cutime; 1557 stime = p->signal->cstime; 1558 r->ru_nvcsw = p->signal->cnvcsw; 1559 r->ru_nivcsw = p->signal->cnivcsw; 1560 r->ru_minflt = p->signal->cmin_flt; 1561 r->ru_majflt = p->signal->cmaj_flt; 1562 r->ru_inblock = p->signal->cinblock; 1563 r->ru_oublock = p->signal->coublock; 1564 maxrss = p->signal->cmaxrss; 1565 1566 if (who == RUSAGE_CHILDREN) 1567 break; 1568 1569 case RUSAGE_SELF: 1570 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1571 utime += tgutime; 1572 stime += tgstime; 1573 r->ru_nvcsw += p->signal->nvcsw; 1574 r->ru_nivcsw += p->signal->nivcsw; 1575 r->ru_minflt += p->signal->min_flt; 1576 r->ru_majflt += p->signal->maj_flt; 1577 r->ru_inblock += p->signal->inblock; 1578 r->ru_oublock += p->signal->oublock; 1579 if (maxrss < p->signal->maxrss) 1580 maxrss = p->signal->maxrss; 1581 t = p; 1582 do { 1583 accumulate_thread_rusage(t, r); 1584 } while_each_thread(p, t); 1585 break; 1586 1587 default: 1588 BUG(); 1589 } 1590 unlock_task_sighand(p, &flags); 1591 1592 out: 1593 cputime_to_timeval(utime, &r->ru_utime); 1594 cputime_to_timeval(stime, &r->ru_stime); 1595 1596 if (who != RUSAGE_CHILDREN) { 1597 struct mm_struct *mm = get_task_mm(p); 1598 1599 if (mm) { 1600 setmax_mm_hiwater_rss(&maxrss, mm); 1601 mmput(mm); 1602 } 1603 } 1604 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1605 } 1606 1607 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1608 { 1609 struct rusage r; 1610 1611 k_getrusage(p, who, &r); 1612 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1613 } 1614 1615 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1616 { 1617 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1618 who != RUSAGE_THREAD) 1619 return -EINVAL; 1620 return getrusage(current, who, ru); 1621 } 1622 1623 #ifdef CONFIG_COMPAT 1624 COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) 1625 { 1626 struct rusage r; 1627 1628 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1629 who != RUSAGE_THREAD) 1630 return -EINVAL; 1631 1632 k_getrusage(current, who, &r); 1633 return put_compat_rusage(&r, ru); 1634 } 1635 #endif 1636 1637 SYSCALL_DEFINE1(umask, int, mask) 1638 { 1639 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1640 return mask; 1641 } 1642 1643 static int prctl_set_mm_exe_file_locked(struct mm_struct *mm, unsigned int fd) 1644 { 1645 struct fd exe; 1646 struct inode *inode; 1647 int err; 1648 1649 VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); 1650 1651 exe = fdget(fd); 1652 if (!exe.file) 1653 return -EBADF; 1654 1655 inode = file_inode(exe.file); 1656 1657 /* 1658 * Because the original mm->exe_file points to executable file, make 1659 * sure that this one is executable as well, to avoid breaking an 1660 * overall picture. 1661 */ 1662 err = -EACCES; 1663 if (!S_ISREG(inode->i_mode) || 1664 exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) 1665 goto exit; 1666 1667 err = inode_permission(inode, MAY_EXEC); 1668 if (err) 1669 goto exit; 1670 1671 /* 1672 * Forbid mm->exe_file change if old file still mapped. 1673 */ 1674 err = -EBUSY; 1675 if (mm->exe_file) { 1676 struct vm_area_struct *vma; 1677 1678 for (vma = mm->mmap; vma; vma = vma->vm_next) 1679 if (vma->vm_file && 1680 path_equal(&vma->vm_file->f_path, 1681 &mm->exe_file->f_path)) 1682 goto exit; 1683 } 1684 1685 /* 1686 * The symlink can be changed only once, just to disallow arbitrary 1687 * transitions malicious software might bring in. This means one 1688 * could make a snapshot over all processes running and monitor 1689 * /proc/pid/exe changes to notice unusual activity if needed. 1690 */ 1691 err = -EPERM; 1692 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) 1693 goto exit; 1694 1695 err = 0; 1696 set_mm_exe_file(mm, exe.file); /* this grabs a reference to exe.file */ 1697 exit: 1698 fdput(exe); 1699 return err; 1700 } 1701 1702 #ifdef CONFIG_CHECKPOINT_RESTORE 1703 /* 1704 * WARNING: we don't require any capability here so be very careful 1705 * in what is allowed for modification from userspace. 1706 */ 1707 static int validate_prctl_map(struct prctl_mm_map *prctl_map) 1708 { 1709 unsigned long mmap_max_addr = TASK_SIZE; 1710 struct mm_struct *mm = current->mm; 1711 int error = -EINVAL, i; 1712 1713 static const unsigned char offsets[] = { 1714 offsetof(struct prctl_mm_map, start_code), 1715 offsetof(struct prctl_mm_map, end_code), 1716 offsetof(struct prctl_mm_map, start_data), 1717 offsetof(struct prctl_mm_map, end_data), 1718 offsetof(struct prctl_mm_map, start_brk), 1719 offsetof(struct prctl_mm_map, brk), 1720 offsetof(struct prctl_mm_map, start_stack), 1721 offsetof(struct prctl_mm_map, arg_start), 1722 offsetof(struct prctl_mm_map, arg_end), 1723 offsetof(struct prctl_mm_map, env_start), 1724 offsetof(struct prctl_mm_map, env_end), 1725 }; 1726 1727 /* 1728 * Make sure the members are not somewhere outside 1729 * of allowed address space. 1730 */ 1731 for (i = 0; i < ARRAY_SIZE(offsets); i++) { 1732 u64 val = *(u64 *)((char *)prctl_map + offsets[i]); 1733 1734 if ((unsigned long)val >= mmap_max_addr || 1735 (unsigned long)val < mmap_min_addr) 1736 goto out; 1737 } 1738 1739 /* 1740 * Make sure the pairs are ordered. 1741 */ 1742 #define __prctl_check_order(__m1, __op, __m2) \ 1743 ((unsigned long)prctl_map->__m1 __op \ 1744 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL 1745 error = __prctl_check_order(start_code, <, end_code); 1746 error |= __prctl_check_order(start_data, <, end_data); 1747 error |= __prctl_check_order(start_brk, <=, brk); 1748 error |= __prctl_check_order(arg_start, <=, arg_end); 1749 error |= __prctl_check_order(env_start, <=, env_end); 1750 if (error) 1751 goto out; 1752 #undef __prctl_check_order 1753 1754 error = -EINVAL; 1755 1756 /* 1757 * @brk should be after @end_data in traditional maps. 1758 */ 1759 if (prctl_map->start_brk <= prctl_map->end_data || 1760 prctl_map->brk <= prctl_map->end_data) 1761 goto out; 1762 1763 /* 1764 * Neither we should allow to override limits if they set. 1765 */ 1766 if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, 1767 prctl_map->start_brk, prctl_map->end_data, 1768 prctl_map->start_data)) 1769 goto out; 1770 1771 /* 1772 * Someone is trying to cheat the auxv vector. 1773 */ 1774 if (prctl_map->auxv_size) { 1775 if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv)) 1776 goto out; 1777 } 1778 1779 /* 1780 * Finally, make sure the caller has the rights to 1781 * change /proc/pid/exe link: only local root should 1782 * be allowed to. 1783 */ 1784 if (prctl_map->exe_fd != (u32)-1) { 1785 struct user_namespace *ns = current_user_ns(); 1786 const struct cred *cred = current_cred(); 1787 1788 if (!uid_eq(cred->uid, make_kuid(ns, 0)) || 1789 !gid_eq(cred->gid, make_kgid(ns, 0))) 1790 goto out; 1791 } 1792 1793 error = 0; 1794 out: 1795 return error; 1796 } 1797 1798 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) 1799 { 1800 struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; 1801 unsigned long user_auxv[AT_VECTOR_SIZE]; 1802 struct mm_struct *mm = current->mm; 1803 int error; 1804 1805 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1806 BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); 1807 1808 if (opt == PR_SET_MM_MAP_SIZE) 1809 return put_user((unsigned int)sizeof(prctl_map), 1810 (unsigned int __user *)addr); 1811 1812 if (data_size != sizeof(prctl_map)) 1813 return -EINVAL; 1814 1815 if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) 1816 return -EFAULT; 1817 1818 error = validate_prctl_map(&prctl_map); 1819 if (error) 1820 return error; 1821 1822 if (prctl_map.auxv_size) { 1823 memset(user_auxv, 0, sizeof(user_auxv)); 1824 if (copy_from_user(user_auxv, 1825 (const void __user *)prctl_map.auxv, 1826 prctl_map.auxv_size)) 1827 return -EFAULT; 1828 1829 /* Last entry must be AT_NULL as specification requires */ 1830 user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; 1831 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; 1832 } 1833 1834 down_write(&mm->mmap_sem); 1835 if (prctl_map.exe_fd != (u32)-1) 1836 error = prctl_set_mm_exe_file_locked(mm, prctl_map.exe_fd); 1837 downgrade_write(&mm->mmap_sem); 1838 if (error) 1839 goto out; 1840 1841 /* 1842 * We don't validate if these members are pointing to 1843 * real present VMAs because application may have correspond 1844 * VMAs already unmapped and kernel uses these members for statistics 1845 * output in procfs mostly, except 1846 * 1847 * - @start_brk/@brk which are used in do_brk but kernel lookups 1848 * for VMAs when updating these memvers so anything wrong written 1849 * here cause kernel to swear at userspace program but won't lead 1850 * to any problem in kernel itself 1851 */ 1852 1853 mm->start_code = prctl_map.start_code; 1854 mm->end_code = prctl_map.end_code; 1855 mm->start_data = prctl_map.start_data; 1856 mm->end_data = prctl_map.end_data; 1857 mm->start_brk = prctl_map.start_brk; 1858 mm->brk = prctl_map.brk; 1859 mm->start_stack = prctl_map.start_stack; 1860 mm->arg_start = prctl_map.arg_start; 1861 mm->arg_end = prctl_map.arg_end; 1862 mm->env_start = prctl_map.env_start; 1863 mm->env_end = prctl_map.env_end; 1864 1865 /* 1866 * Note this update of @saved_auxv is lockless thus 1867 * if someone reads this member in procfs while we're 1868 * updating -- it may get partly updated results. It's 1869 * known and acceptable trade off: we leave it as is to 1870 * not introduce additional locks here making the kernel 1871 * more complex. 1872 */ 1873 if (prctl_map.auxv_size) 1874 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); 1875 1876 error = 0; 1877 out: 1878 up_read(&mm->mmap_sem); 1879 return error; 1880 } 1881 #endif /* CONFIG_CHECKPOINT_RESTORE */ 1882 1883 static int prctl_set_mm(int opt, unsigned long addr, 1884 unsigned long arg4, unsigned long arg5) 1885 { 1886 struct mm_struct *mm = current->mm; 1887 struct vm_area_struct *vma; 1888 int error; 1889 1890 if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && 1891 opt != PR_SET_MM_MAP && 1892 opt != PR_SET_MM_MAP_SIZE))) 1893 return -EINVAL; 1894 1895 #ifdef CONFIG_CHECKPOINT_RESTORE 1896 if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) 1897 return prctl_set_mm_map(opt, (const void __user *)addr, arg4); 1898 #endif 1899 1900 if (!capable(CAP_SYS_RESOURCE)) 1901 return -EPERM; 1902 1903 if (opt == PR_SET_MM_EXE_FILE) { 1904 down_write(&mm->mmap_sem); 1905 error = prctl_set_mm_exe_file_locked(mm, (unsigned int)addr); 1906 up_write(&mm->mmap_sem); 1907 return error; 1908 } 1909 1910 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1911 return -EINVAL; 1912 1913 error = -EINVAL; 1914 1915 down_read(&mm->mmap_sem); 1916 vma = find_vma(mm, addr); 1917 1918 switch (opt) { 1919 case PR_SET_MM_START_CODE: 1920 mm->start_code = addr; 1921 break; 1922 case PR_SET_MM_END_CODE: 1923 mm->end_code = addr; 1924 break; 1925 case PR_SET_MM_START_DATA: 1926 mm->start_data = addr; 1927 break; 1928 case PR_SET_MM_END_DATA: 1929 mm->end_data = addr; 1930 break; 1931 1932 case PR_SET_MM_START_BRK: 1933 if (addr <= mm->end_data) 1934 goto out; 1935 1936 if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr, 1937 mm->end_data, mm->start_data)) 1938 goto out; 1939 1940 mm->start_brk = addr; 1941 break; 1942 1943 case PR_SET_MM_BRK: 1944 if (addr <= mm->end_data) 1945 goto out; 1946 1947 if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk, 1948 mm->end_data, mm->start_data)) 1949 goto out; 1950 1951 mm->brk = addr; 1952 break; 1953 1954 /* 1955 * If command line arguments and environment 1956 * are placed somewhere else on stack, we can 1957 * set them up here, ARG_START/END to setup 1958 * command line argumets and ENV_START/END 1959 * for environment. 1960 */ 1961 case PR_SET_MM_START_STACK: 1962 case PR_SET_MM_ARG_START: 1963 case PR_SET_MM_ARG_END: 1964 case PR_SET_MM_ENV_START: 1965 case PR_SET_MM_ENV_END: 1966 if (!vma) { 1967 error = -EFAULT; 1968 goto out; 1969 } 1970 if (opt == PR_SET_MM_START_STACK) 1971 mm->start_stack = addr; 1972 else if (opt == PR_SET_MM_ARG_START) 1973 mm->arg_start = addr; 1974 else if (opt == PR_SET_MM_ARG_END) 1975 mm->arg_end = addr; 1976 else if (opt == PR_SET_MM_ENV_START) 1977 mm->env_start = addr; 1978 else if (opt == PR_SET_MM_ENV_END) 1979 mm->env_end = addr; 1980 break; 1981 1982 /* 1983 * This doesn't move auxiliary vector itself 1984 * since it's pinned to mm_struct, but allow 1985 * to fill vector with new values. It's up 1986 * to a caller to provide sane values here 1987 * otherwise user space tools which use this 1988 * vector might be unhappy. 1989 */ 1990 case PR_SET_MM_AUXV: { 1991 unsigned long user_auxv[AT_VECTOR_SIZE]; 1992 1993 if (arg4 > sizeof(user_auxv)) 1994 goto out; 1995 up_read(&mm->mmap_sem); 1996 1997 if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) 1998 return -EFAULT; 1999 2000 /* Make sure the last entry is always AT_NULL */ 2001 user_auxv[AT_VECTOR_SIZE - 2] = 0; 2002 user_auxv[AT_VECTOR_SIZE - 1] = 0; 2003 2004 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 2005 2006 task_lock(current); 2007 memcpy(mm->saved_auxv, user_auxv, arg4); 2008 task_unlock(current); 2009 2010 return 0; 2011 } 2012 default: 2013 goto out; 2014 } 2015 2016 error = 0; 2017 out: 2018 up_read(&mm->mmap_sem); 2019 return error; 2020 } 2021 2022 #ifdef CONFIG_CHECKPOINT_RESTORE 2023 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2024 { 2025 return put_user(me->clear_child_tid, tid_addr); 2026 } 2027 #else 2028 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2029 { 2030 return -EINVAL; 2031 } 2032 #endif 2033 2034 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2035 unsigned long, arg4, unsigned long, arg5) 2036 { 2037 struct task_struct *me = current; 2038 unsigned char comm[sizeof(me->comm)]; 2039 long error; 2040 2041 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2042 if (error != -ENOSYS) 2043 return error; 2044 2045 error = 0; 2046 switch (option) { 2047 case PR_SET_PDEATHSIG: 2048 if (!valid_signal(arg2)) { 2049 error = -EINVAL; 2050 break; 2051 } 2052 me->pdeath_signal = arg2; 2053 break; 2054 case PR_GET_PDEATHSIG: 2055 error = put_user(me->pdeath_signal, (int __user *)arg2); 2056 break; 2057 case PR_GET_DUMPABLE: 2058 error = get_dumpable(me->mm); 2059 break; 2060 case PR_SET_DUMPABLE: 2061 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2062 error = -EINVAL; 2063 break; 2064 } 2065 set_dumpable(me->mm, arg2); 2066 break; 2067 2068 case PR_SET_UNALIGN: 2069 error = SET_UNALIGN_CTL(me, arg2); 2070 break; 2071 case PR_GET_UNALIGN: 2072 error = GET_UNALIGN_CTL(me, arg2); 2073 break; 2074 case PR_SET_FPEMU: 2075 error = SET_FPEMU_CTL(me, arg2); 2076 break; 2077 case PR_GET_FPEMU: 2078 error = GET_FPEMU_CTL(me, arg2); 2079 break; 2080 case PR_SET_FPEXC: 2081 error = SET_FPEXC_CTL(me, arg2); 2082 break; 2083 case PR_GET_FPEXC: 2084 error = GET_FPEXC_CTL(me, arg2); 2085 break; 2086 case PR_GET_TIMING: 2087 error = PR_TIMING_STATISTICAL; 2088 break; 2089 case PR_SET_TIMING: 2090 if (arg2 != PR_TIMING_STATISTICAL) 2091 error = -EINVAL; 2092 break; 2093 case PR_SET_NAME: 2094 comm[sizeof(me->comm) - 1] = 0; 2095 if (strncpy_from_user(comm, (char __user *)arg2, 2096 sizeof(me->comm) - 1) < 0) 2097 return -EFAULT; 2098 set_task_comm(me, comm); 2099 proc_comm_connector(me); 2100 break; 2101 case PR_GET_NAME: 2102 get_task_comm(comm, me); 2103 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2104 return -EFAULT; 2105 break; 2106 case PR_GET_ENDIAN: 2107 error = GET_ENDIAN(me, arg2); 2108 break; 2109 case PR_SET_ENDIAN: 2110 error = SET_ENDIAN(me, arg2); 2111 break; 2112 case PR_GET_SECCOMP: 2113 error = prctl_get_seccomp(); 2114 break; 2115 case PR_SET_SECCOMP: 2116 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2117 break; 2118 case PR_GET_TSC: 2119 error = GET_TSC_CTL(arg2); 2120 break; 2121 case PR_SET_TSC: 2122 error = SET_TSC_CTL(arg2); 2123 break; 2124 case PR_TASK_PERF_EVENTS_DISABLE: 2125 error = perf_event_task_disable(); 2126 break; 2127 case PR_TASK_PERF_EVENTS_ENABLE: 2128 error = perf_event_task_enable(); 2129 break; 2130 case PR_GET_TIMERSLACK: 2131 error = current->timer_slack_ns; 2132 break; 2133 case PR_SET_TIMERSLACK: 2134 if (arg2 <= 0) 2135 current->timer_slack_ns = 2136 current->default_timer_slack_ns; 2137 else 2138 current->timer_slack_ns = arg2; 2139 break; 2140 case PR_MCE_KILL: 2141 if (arg4 | arg5) 2142 return -EINVAL; 2143 switch (arg2) { 2144 case PR_MCE_KILL_CLEAR: 2145 if (arg3 != 0) 2146 return -EINVAL; 2147 current->flags &= ~PF_MCE_PROCESS; 2148 break; 2149 case PR_MCE_KILL_SET: 2150 current->flags |= PF_MCE_PROCESS; 2151 if (arg3 == PR_MCE_KILL_EARLY) 2152 current->flags |= PF_MCE_EARLY; 2153 else if (arg3 == PR_MCE_KILL_LATE) 2154 current->flags &= ~PF_MCE_EARLY; 2155 else if (arg3 == PR_MCE_KILL_DEFAULT) 2156 current->flags &= 2157 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2158 else 2159 return -EINVAL; 2160 break; 2161 default: 2162 return -EINVAL; 2163 } 2164 break; 2165 case PR_MCE_KILL_GET: 2166 if (arg2 | arg3 | arg4 | arg5) 2167 return -EINVAL; 2168 if (current->flags & PF_MCE_PROCESS) 2169 error = (current->flags & PF_MCE_EARLY) ? 2170 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2171 else 2172 error = PR_MCE_KILL_DEFAULT; 2173 break; 2174 case PR_SET_MM: 2175 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2176 break; 2177 case PR_GET_TID_ADDRESS: 2178 error = prctl_get_tid_address(me, (int __user **)arg2); 2179 break; 2180 case PR_SET_CHILD_SUBREAPER: 2181 me->signal->is_child_subreaper = !!arg2; 2182 break; 2183 case PR_GET_CHILD_SUBREAPER: 2184 error = put_user(me->signal->is_child_subreaper, 2185 (int __user *)arg2); 2186 break; 2187 case PR_SET_NO_NEW_PRIVS: 2188 if (arg2 != 1 || arg3 || arg4 || arg5) 2189 return -EINVAL; 2190 2191 task_set_no_new_privs(current); 2192 break; 2193 case PR_GET_NO_NEW_PRIVS: 2194 if (arg2 || arg3 || arg4 || arg5) 2195 return -EINVAL; 2196 return task_no_new_privs(current) ? 1 : 0; 2197 case PR_GET_THP_DISABLE: 2198 if (arg2 || arg3 || arg4 || arg5) 2199 return -EINVAL; 2200 error = !!(me->mm->def_flags & VM_NOHUGEPAGE); 2201 break; 2202 case PR_SET_THP_DISABLE: 2203 if (arg3 || arg4 || arg5) 2204 return -EINVAL; 2205 down_write(&me->mm->mmap_sem); 2206 if (arg2) 2207 me->mm->def_flags |= VM_NOHUGEPAGE; 2208 else 2209 me->mm->def_flags &= ~VM_NOHUGEPAGE; 2210 up_write(&me->mm->mmap_sem); 2211 break; 2212 case PR_MPX_ENABLE_MANAGEMENT: 2213 error = MPX_ENABLE_MANAGEMENT(me); 2214 break; 2215 case PR_MPX_DISABLE_MANAGEMENT: 2216 error = MPX_DISABLE_MANAGEMENT(me); 2217 break; 2218 default: 2219 error = -EINVAL; 2220 break; 2221 } 2222 return error; 2223 } 2224 2225 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2226 struct getcpu_cache __user *, unused) 2227 { 2228 int err = 0; 2229 int cpu = raw_smp_processor_id(); 2230 2231 if (cpup) 2232 err |= put_user(cpu, cpup); 2233 if (nodep) 2234 err |= put_user(cpu_to_node(cpu), nodep); 2235 return err ? -EFAULT : 0; 2236 } 2237 2238 /** 2239 * do_sysinfo - fill in sysinfo struct 2240 * @info: pointer to buffer to fill 2241 */ 2242 static int do_sysinfo(struct sysinfo *info) 2243 { 2244 unsigned long mem_total, sav_total; 2245 unsigned int mem_unit, bitcount; 2246 struct timespec tp; 2247 2248 memset(info, 0, sizeof(struct sysinfo)); 2249 2250 get_monotonic_boottime(&tp); 2251 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2252 2253 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2254 2255 info->procs = nr_threads; 2256 2257 si_meminfo(info); 2258 si_swapinfo(info); 2259 2260 /* 2261 * If the sum of all the available memory (i.e. ram + swap) 2262 * is less than can be stored in a 32 bit unsigned long then 2263 * we can be binary compatible with 2.2.x kernels. If not, 2264 * well, in that case 2.2.x was broken anyways... 2265 * 2266 * -Erik Andersen <andersee@debian.org> 2267 */ 2268 2269 mem_total = info->totalram + info->totalswap; 2270 if (mem_total < info->totalram || mem_total < info->totalswap) 2271 goto out; 2272 bitcount = 0; 2273 mem_unit = info->mem_unit; 2274 while (mem_unit > 1) { 2275 bitcount++; 2276 mem_unit >>= 1; 2277 sav_total = mem_total; 2278 mem_total <<= 1; 2279 if (mem_total < sav_total) 2280 goto out; 2281 } 2282 2283 /* 2284 * If mem_total did not overflow, multiply all memory values by 2285 * info->mem_unit and set it to 1. This leaves things compatible 2286 * with 2.2.x, and also retains compatibility with earlier 2.4.x 2287 * kernels... 2288 */ 2289 2290 info->mem_unit = 1; 2291 info->totalram <<= bitcount; 2292 info->freeram <<= bitcount; 2293 info->sharedram <<= bitcount; 2294 info->bufferram <<= bitcount; 2295 info->totalswap <<= bitcount; 2296 info->freeswap <<= bitcount; 2297 info->totalhigh <<= bitcount; 2298 info->freehigh <<= bitcount; 2299 2300 out: 2301 return 0; 2302 } 2303 2304 SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 2305 { 2306 struct sysinfo val; 2307 2308 do_sysinfo(&val); 2309 2310 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 2311 return -EFAULT; 2312 2313 return 0; 2314 } 2315 2316 #ifdef CONFIG_COMPAT 2317 struct compat_sysinfo { 2318 s32 uptime; 2319 u32 loads[3]; 2320 u32 totalram; 2321 u32 freeram; 2322 u32 sharedram; 2323 u32 bufferram; 2324 u32 totalswap; 2325 u32 freeswap; 2326 u16 procs; 2327 u16 pad; 2328 u32 totalhigh; 2329 u32 freehigh; 2330 u32 mem_unit; 2331 char _f[20-2*sizeof(u32)-sizeof(int)]; 2332 }; 2333 2334 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) 2335 { 2336 struct sysinfo s; 2337 2338 do_sysinfo(&s); 2339 2340 /* Check to see if any memory value is too large for 32-bit and scale 2341 * down if needed 2342 */ 2343 if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { 2344 int bitcount = 0; 2345 2346 while (s.mem_unit < PAGE_SIZE) { 2347 s.mem_unit <<= 1; 2348 bitcount++; 2349 } 2350 2351 s.totalram >>= bitcount; 2352 s.freeram >>= bitcount; 2353 s.sharedram >>= bitcount; 2354 s.bufferram >>= bitcount; 2355 s.totalswap >>= bitcount; 2356 s.freeswap >>= bitcount; 2357 s.totalhigh >>= bitcount; 2358 s.freehigh >>= bitcount; 2359 } 2360 2361 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || 2362 __put_user(s.uptime, &info->uptime) || 2363 __put_user(s.loads[0], &info->loads[0]) || 2364 __put_user(s.loads[1], &info->loads[1]) || 2365 __put_user(s.loads[2], &info->loads[2]) || 2366 __put_user(s.totalram, &info->totalram) || 2367 __put_user(s.freeram, &info->freeram) || 2368 __put_user(s.sharedram, &info->sharedram) || 2369 __put_user(s.bufferram, &info->bufferram) || 2370 __put_user(s.totalswap, &info->totalswap) || 2371 __put_user(s.freeswap, &info->freeswap) || 2372 __put_user(s.procs, &info->procs) || 2373 __put_user(s.totalhigh, &info->totalhigh) || 2374 __put_user(s.freehigh, &info->freehigh) || 2375 __put_user(s.mem_unit, &info->mem_unit)) 2376 return -EFAULT; 2377 2378 return 0; 2379 } 2380 #endif /* CONFIG_COMPAT */ 2381