1 /* 2 * linux/kernel/sys.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/export.h> 8 #include <linux/mm.h> 9 #include <linux/utsname.h> 10 #include <linux/mman.h> 11 #include <linux/reboot.h> 12 #include <linux/prctl.h> 13 #include <linux/highuid.h> 14 #include <linux/fs.h> 15 #include <linux/kmod.h> 16 #include <linux/perf_event.h> 17 #include <linux/resource.h> 18 #include <linux/kernel.h> 19 #include <linux/workqueue.h> 20 #include <linux/capability.h> 21 #include <linux/device.h> 22 #include <linux/key.h> 23 #include <linux/times.h> 24 #include <linux/posix-timers.h> 25 #include <linux/security.h> 26 #include <linux/dcookies.h> 27 #include <linux/suspend.h> 28 #include <linux/tty.h> 29 #include <linux/signal.h> 30 #include <linux/cn_proc.h> 31 #include <linux/getcpu.h> 32 #include <linux/task_io_accounting_ops.h> 33 #include <linux/seccomp.h> 34 #include <linux/cpu.h> 35 #include <linux/personality.h> 36 #include <linux/ptrace.h> 37 #include <linux/fs_struct.h> 38 #include <linux/file.h> 39 #include <linux/mount.h> 40 #include <linux/gfp.h> 41 #include <linux/syscore_ops.h> 42 #include <linux/version.h> 43 #include <linux/ctype.h> 44 45 #include <linux/compat.h> 46 #include <linux/syscalls.h> 47 #include <linux/kprobes.h> 48 #include <linux/user_namespace.h> 49 #include <linux/binfmts.h> 50 51 #include <linux/sched.h> 52 #include <linux/sched/autogroup.h> 53 #include <linux/sched/loadavg.h> 54 #include <linux/sched/mm.h> 55 #include <linux/sched/coredump.h> 56 #include <linux/rcupdate.h> 57 #include <linux/uidgid.h> 58 #include <linux/cred.h> 59 60 #include <linux/kmsg_dump.h> 61 /* Move somewhere else to avoid recompiling? */ 62 #include <generated/utsrelease.h> 63 64 #include <linux/uaccess.h> 65 #include <asm/io.h> 66 #include <asm/unistd.h> 67 68 #ifndef SET_UNALIGN_CTL 69 # define SET_UNALIGN_CTL(a, b) (-EINVAL) 70 #endif 71 #ifndef GET_UNALIGN_CTL 72 # define GET_UNALIGN_CTL(a, b) (-EINVAL) 73 #endif 74 #ifndef SET_FPEMU_CTL 75 # define SET_FPEMU_CTL(a, b) (-EINVAL) 76 #endif 77 #ifndef GET_FPEMU_CTL 78 # define GET_FPEMU_CTL(a, b) (-EINVAL) 79 #endif 80 #ifndef SET_FPEXC_CTL 81 # define SET_FPEXC_CTL(a, b) (-EINVAL) 82 #endif 83 #ifndef GET_FPEXC_CTL 84 # define GET_FPEXC_CTL(a, b) (-EINVAL) 85 #endif 86 #ifndef GET_ENDIAN 87 # define GET_ENDIAN(a, b) (-EINVAL) 88 #endif 89 #ifndef SET_ENDIAN 90 # define SET_ENDIAN(a, b) (-EINVAL) 91 #endif 92 #ifndef GET_TSC_CTL 93 # define GET_TSC_CTL(a) (-EINVAL) 94 #endif 95 #ifndef SET_TSC_CTL 96 # define SET_TSC_CTL(a) (-EINVAL) 97 #endif 98 #ifndef MPX_ENABLE_MANAGEMENT 99 # define MPX_ENABLE_MANAGEMENT() (-EINVAL) 100 #endif 101 #ifndef MPX_DISABLE_MANAGEMENT 102 # define MPX_DISABLE_MANAGEMENT() (-EINVAL) 103 #endif 104 #ifndef GET_FP_MODE 105 # define GET_FP_MODE(a) (-EINVAL) 106 #endif 107 #ifndef SET_FP_MODE 108 # define SET_FP_MODE(a,b) (-EINVAL) 109 #endif 110 111 /* 112 * this is where the system-wide overflow UID and GID are defined, for 113 * architectures that now have 32-bit UID/GID but didn't in the past 114 */ 115 116 int overflowuid = DEFAULT_OVERFLOWUID; 117 int overflowgid = DEFAULT_OVERFLOWGID; 118 119 EXPORT_SYMBOL(overflowuid); 120 EXPORT_SYMBOL(overflowgid); 121 122 /* 123 * the same as above, but for filesystems which can only store a 16-bit 124 * UID and GID. as such, this is needed on all architectures 125 */ 126 127 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; 128 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID; 129 130 EXPORT_SYMBOL(fs_overflowuid); 131 EXPORT_SYMBOL(fs_overflowgid); 132 133 /* 134 * Returns true if current's euid is same as p's uid or euid, 135 * or has CAP_SYS_NICE to p's user_ns. 136 * 137 * Called with rcu_read_lock, creds are safe 138 */ 139 static bool set_one_prio_perm(struct task_struct *p) 140 { 141 const struct cred *cred = current_cred(), *pcred = __task_cred(p); 142 143 if (uid_eq(pcred->uid, cred->euid) || 144 uid_eq(pcred->euid, cred->euid)) 145 return true; 146 if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) 147 return true; 148 return false; 149 } 150 151 /* 152 * set the priority of a task 153 * - the caller must hold the RCU read lock 154 */ 155 static int set_one_prio(struct task_struct *p, int niceval, int error) 156 { 157 int no_nice; 158 159 if (!set_one_prio_perm(p)) { 160 error = -EPERM; 161 goto out; 162 } 163 if (niceval < task_nice(p) && !can_nice(p, niceval)) { 164 error = -EACCES; 165 goto out; 166 } 167 no_nice = security_task_setnice(p, niceval); 168 if (no_nice) { 169 error = no_nice; 170 goto out; 171 } 172 if (error == -ESRCH) 173 error = 0; 174 set_user_nice(p, niceval); 175 out: 176 return error; 177 } 178 179 SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) 180 { 181 struct task_struct *g, *p; 182 struct user_struct *user; 183 const struct cred *cred = current_cred(); 184 int error = -EINVAL; 185 struct pid *pgrp; 186 kuid_t uid; 187 188 if (which > PRIO_USER || which < PRIO_PROCESS) 189 goto out; 190 191 /* normalize: avoid signed division (rounding problems) */ 192 error = -ESRCH; 193 if (niceval < MIN_NICE) 194 niceval = MIN_NICE; 195 if (niceval > MAX_NICE) 196 niceval = MAX_NICE; 197 198 rcu_read_lock(); 199 read_lock(&tasklist_lock); 200 switch (which) { 201 case PRIO_PROCESS: 202 if (who) 203 p = find_task_by_vpid(who); 204 else 205 p = current; 206 if (p) 207 error = set_one_prio(p, niceval, error); 208 break; 209 case PRIO_PGRP: 210 if (who) 211 pgrp = find_vpid(who); 212 else 213 pgrp = task_pgrp(current); 214 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 215 error = set_one_prio(p, niceval, error); 216 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 217 break; 218 case PRIO_USER: 219 uid = make_kuid(cred->user_ns, who); 220 user = cred->user; 221 if (!who) 222 uid = cred->uid; 223 else if (!uid_eq(uid, cred->uid)) { 224 user = find_user(uid); 225 if (!user) 226 goto out_unlock; /* No processes for this user */ 227 } 228 do_each_thread(g, p) { 229 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) 230 error = set_one_prio(p, niceval, error); 231 } while_each_thread(g, p); 232 if (!uid_eq(uid, cred->uid)) 233 free_uid(user); /* For find_user() */ 234 break; 235 } 236 out_unlock: 237 read_unlock(&tasklist_lock); 238 rcu_read_unlock(); 239 out: 240 return error; 241 } 242 243 /* 244 * Ugh. To avoid negative return values, "getpriority()" will 245 * not return the normal nice-value, but a negated value that 246 * has been offset by 20 (ie it returns 40..1 instead of -20..19) 247 * to stay compatible. 248 */ 249 SYSCALL_DEFINE2(getpriority, int, which, int, who) 250 { 251 struct task_struct *g, *p; 252 struct user_struct *user; 253 const struct cred *cred = current_cred(); 254 long niceval, retval = -ESRCH; 255 struct pid *pgrp; 256 kuid_t uid; 257 258 if (which > PRIO_USER || which < PRIO_PROCESS) 259 return -EINVAL; 260 261 rcu_read_lock(); 262 read_lock(&tasklist_lock); 263 switch (which) { 264 case PRIO_PROCESS: 265 if (who) 266 p = find_task_by_vpid(who); 267 else 268 p = current; 269 if (p) { 270 niceval = nice_to_rlimit(task_nice(p)); 271 if (niceval > retval) 272 retval = niceval; 273 } 274 break; 275 case PRIO_PGRP: 276 if (who) 277 pgrp = find_vpid(who); 278 else 279 pgrp = task_pgrp(current); 280 do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { 281 niceval = nice_to_rlimit(task_nice(p)); 282 if (niceval > retval) 283 retval = niceval; 284 } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); 285 break; 286 case PRIO_USER: 287 uid = make_kuid(cred->user_ns, who); 288 user = cred->user; 289 if (!who) 290 uid = cred->uid; 291 else if (!uid_eq(uid, cred->uid)) { 292 user = find_user(uid); 293 if (!user) 294 goto out_unlock; /* No processes for this user */ 295 } 296 do_each_thread(g, p) { 297 if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) { 298 niceval = nice_to_rlimit(task_nice(p)); 299 if (niceval > retval) 300 retval = niceval; 301 } 302 } while_each_thread(g, p); 303 if (!uid_eq(uid, cred->uid)) 304 free_uid(user); /* for find_user() */ 305 break; 306 } 307 out_unlock: 308 read_unlock(&tasklist_lock); 309 rcu_read_unlock(); 310 311 return retval; 312 } 313 314 /* 315 * Unprivileged users may change the real gid to the effective gid 316 * or vice versa. (BSD-style) 317 * 318 * If you set the real gid at all, or set the effective gid to a value not 319 * equal to the real gid, then the saved gid is set to the new effective gid. 320 * 321 * This makes it possible for a setgid program to completely drop its 322 * privileges, which is often a useful assertion to make when you are doing 323 * a security audit over a program. 324 * 325 * The general idea is that a program which uses just setregid() will be 326 * 100% compatible with BSD. A program which uses just setgid() will be 327 * 100% compatible with POSIX with saved IDs. 328 * 329 * SMP: There are not races, the GIDs are checked only by filesystem 330 * operations (as far as semantic preservation is concerned). 331 */ 332 #ifdef CONFIG_MULTIUSER 333 SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) 334 { 335 struct user_namespace *ns = current_user_ns(); 336 const struct cred *old; 337 struct cred *new; 338 int retval; 339 kgid_t krgid, kegid; 340 341 krgid = make_kgid(ns, rgid); 342 kegid = make_kgid(ns, egid); 343 344 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 345 return -EINVAL; 346 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 347 return -EINVAL; 348 349 new = prepare_creds(); 350 if (!new) 351 return -ENOMEM; 352 old = current_cred(); 353 354 retval = -EPERM; 355 if (rgid != (gid_t) -1) { 356 if (gid_eq(old->gid, krgid) || 357 gid_eq(old->egid, krgid) || 358 ns_capable(old->user_ns, CAP_SETGID)) 359 new->gid = krgid; 360 else 361 goto error; 362 } 363 if (egid != (gid_t) -1) { 364 if (gid_eq(old->gid, kegid) || 365 gid_eq(old->egid, kegid) || 366 gid_eq(old->sgid, kegid) || 367 ns_capable(old->user_ns, CAP_SETGID)) 368 new->egid = kegid; 369 else 370 goto error; 371 } 372 373 if (rgid != (gid_t) -1 || 374 (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) 375 new->sgid = new->egid; 376 new->fsgid = new->egid; 377 378 return commit_creds(new); 379 380 error: 381 abort_creds(new); 382 return retval; 383 } 384 385 /* 386 * setgid() is implemented like SysV w/ SAVED_IDS 387 * 388 * SMP: Same implicit races as above. 389 */ 390 SYSCALL_DEFINE1(setgid, gid_t, gid) 391 { 392 struct user_namespace *ns = current_user_ns(); 393 const struct cred *old; 394 struct cred *new; 395 int retval; 396 kgid_t kgid; 397 398 kgid = make_kgid(ns, gid); 399 if (!gid_valid(kgid)) 400 return -EINVAL; 401 402 new = prepare_creds(); 403 if (!new) 404 return -ENOMEM; 405 old = current_cred(); 406 407 retval = -EPERM; 408 if (ns_capable(old->user_ns, CAP_SETGID)) 409 new->gid = new->egid = new->sgid = new->fsgid = kgid; 410 else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) 411 new->egid = new->fsgid = kgid; 412 else 413 goto error; 414 415 return commit_creds(new); 416 417 error: 418 abort_creds(new); 419 return retval; 420 } 421 422 /* 423 * change the user struct in a credentials set to match the new UID 424 */ 425 static int set_user(struct cred *new) 426 { 427 struct user_struct *new_user; 428 429 new_user = alloc_uid(new->uid); 430 if (!new_user) 431 return -EAGAIN; 432 433 /* 434 * We don't fail in case of NPROC limit excess here because too many 435 * poorly written programs don't check set*uid() return code, assuming 436 * it never fails if called by root. We may still enforce NPROC limit 437 * for programs doing set*uid()+execve() by harmlessly deferring the 438 * failure to the execve() stage. 439 */ 440 if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && 441 new_user != INIT_USER) 442 current->flags |= PF_NPROC_EXCEEDED; 443 else 444 current->flags &= ~PF_NPROC_EXCEEDED; 445 446 free_uid(new->user); 447 new->user = new_user; 448 return 0; 449 } 450 451 /* 452 * Unprivileged users may change the real uid to the effective uid 453 * or vice versa. (BSD-style) 454 * 455 * If you set the real uid at all, or set the effective uid to a value not 456 * equal to the real uid, then the saved uid is set to the new effective uid. 457 * 458 * This makes it possible for a setuid program to completely drop its 459 * privileges, which is often a useful assertion to make when you are doing 460 * a security audit over a program. 461 * 462 * The general idea is that a program which uses just setreuid() will be 463 * 100% compatible with BSD. A program which uses just setuid() will be 464 * 100% compatible with POSIX with saved IDs. 465 */ 466 SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) 467 { 468 struct user_namespace *ns = current_user_ns(); 469 const struct cred *old; 470 struct cred *new; 471 int retval; 472 kuid_t kruid, keuid; 473 474 kruid = make_kuid(ns, ruid); 475 keuid = make_kuid(ns, euid); 476 477 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 478 return -EINVAL; 479 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 480 return -EINVAL; 481 482 new = prepare_creds(); 483 if (!new) 484 return -ENOMEM; 485 old = current_cred(); 486 487 retval = -EPERM; 488 if (ruid != (uid_t) -1) { 489 new->uid = kruid; 490 if (!uid_eq(old->uid, kruid) && 491 !uid_eq(old->euid, kruid) && 492 !ns_capable(old->user_ns, CAP_SETUID)) 493 goto error; 494 } 495 496 if (euid != (uid_t) -1) { 497 new->euid = keuid; 498 if (!uid_eq(old->uid, keuid) && 499 !uid_eq(old->euid, keuid) && 500 !uid_eq(old->suid, keuid) && 501 !ns_capable(old->user_ns, CAP_SETUID)) 502 goto error; 503 } 504 505 if (!uid_eq(new->uid, old->uid)) { 506 retval = set_user(new); 507 if (retval < 0) 508 goto error; 509 } 510 if (ruid != (uid_t) -1 || 511 (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) 512 new->suid = new->euid; 513 new->fsuid = new->euid; 514 515 retval = security_task_fix_setuid(new, old, LSM_SETID_RE); 516 if (retval < 0) 517 goto error; 518 519 return commit_creds(new); 520 521 error: 522 abort_creds(new); 523 return retval; 524 } 525 526 /* 527 * setuid() is implemented like SysV with SAVED_IDS 528 * 529 * Note that SAVED_ID's is deficient in that a setuid root program 530 * like sendmail, for example, cannot set its uid to be a normal 531 * user and then switch back, because if you're root, setuid() sets 532 * the saved uid too. If you don't like this, blame the bright people 533 * in the POSIX committee and/or USG. Note that the BSD-style setreuid() 534 * will allow a root program to temporarily drop privileges and be able to 535 * regain them by swapping the real and effective uid. 536 */ 537 SYSCALL_DEFINE1(setuid, uid_t, uid) 538 { 539 struct user_namespace *ns = current_user_ns(); 540 const struct cred *old; 541 struct cred *new; 542 int retval; 543 kuid_t kuid; 544 545 kuid = make_kuid(ns, uid); 546 if (!uid_valid(kuid)) 547 return -EINVAL; 548 549 new = prepare_creds(); 550 if (!new) 551 return -ENOMEM; 552 old = current_cred(); 553 554 retval = -EPERM; 555 if (ns_capable(old->user_ns, CAP_SETUID)) { 556 new->suid = new->uid = kuid; 557 if (!uid_eq(kuid, old->uid)) { 558 retval = set_user(new); 559 if (retval < 0) 560 goto error; 561 } 562 } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { 563 goto error; 564 } 565 566 new->fsuid = new->euid = kuid; 567 568 retval = security_task_fix_setuid(new, old, LSM_SETID_ID); 569 if (retval < 0) 570 goto error; 571 572 return commit_creds(new); 573 574 error: 575 abort_creds(new); 576 return retval; 577 } 578 579 580 /* 581 * This function implements a generic ability to update ruid, euid, 582 * and suid. This allows you to implement the 4.4 compatible seteuid(). 583 */ 584 SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) 585 { 586 struct user_namespace *ns = current_user_ns(); 587 const struct cred *old; 588 struct cred *new; 589 int retval; 590 kuid_t kruid, keuid, ksuid; 591 592 kruid = make_kuid(ns, ruid); 593 keuid = make_kuid(ns, euid); 594 ksuid = make_kuid(ns, suid); 595 596 if ((ruid != (uid_t) -1) && !uid_valid(kruid)) 597 return -EINVAL; 598 599 if ((euid != (uid_t) -1) && !uid_valid(keuid)) 600 return -EINVAL; 601 602 if ((suid != (uid_t) -1) && !uid_valid(ksuid)) 603 return -EINVAL; 604 605 new = prepare_creds(); 606 if (!new) 607 return -ENOMEM; 608 609 old = current_cred(); 610 611 retval = -EPERM; 612 if (!ns_capable(old->user_ns, CAP_SETUID)) { 613 if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && 614 !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) 615 goto error; 616 if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && 617 !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) 618 goto error; 619 if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && 620 !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) 621 goto error; 622 } 623 624 if (ruid != (uid_t) -1) { 625 new->uid = kruid; 626 if (!uid_eq(kruid, old->uid)) { 627 retval = set_user(new); 628 if (retval < 0) 629 goto error; 630 } 631 } 632 if (euid != (uid_t) -1) 633 new->euid = keuid; 634 if (suid != (uid_t) -1) 635 new->suid = ksuid; 636 new->fsuid = new->euid; 637 638 retval = security_task_fix_setuid(new, old, LSM_SETID_RES); 639 if (retval < 0) 640 goto error; 641 642 return commit_creds(new); 643 644 error: 645 abort_creds(new); 646 return retval; 647 } 648 649 SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) 650 { 651 const struct cred *cred = current_cred(); 652 int retval; 653 uid_t ruid, euid, suid; 654 655 ruid = from_kuid_munged(cred->user_ns, cred->uid); 656 euid = from_kuid_munged(cred->user_ns, cred->euid); 657 suid = from_kuid_munged(cred->user_ns, cred->suid); 658 659 retval = put_user(ruid, ruidp); 660 if (!retval) { 661 retval = put_user(euid, euidp); 662 if (!retval) 663 return put_user(suid, suidp); 664 } 665 return retval; 666 } 667 668 /* 669 * Same as above, but for rgid, egid, sgid. 670 */ 671 SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) 672 { 673 struct user_namespace *ns = current_user_ns(); 674 const struct cred *old; 675 struct cred *new; 676 int retval; 677 kgid_t krgid, kegid, ksgid; 678 679 krgid = make_kgid(ns, rgid); 680 kegid = make_kgid(ns, egid); 681 ksgid = make_kgid(ns, sgid); 682 683 if ((rgid != (gid_t) -1) && !gid_valid(krgid)) 684 return -EINVAL; 685 if ((egid != (gid_t) -1) && !gid_valid(kegid)) 686 return -EINVAL; 687 if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) 688 return -EINVAL; 689 690 new = prepare_creds(); 691 if (!new) 692 return -ENOMEM; 693 old = current_cred(); 694 695 retval = -EPERM; 696 if (!ns_capable(old->user_ns, CAP_SETGID)) { 697 if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && 698 !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) 699 goto error; 700 if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && 701 !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) 702 goto error; 703 if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && 704 !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) 705 goto error; 706 } 707 708 if (rgid != (gid_t) -1) 709 new->gid = krgid; 710 if (egid != (gid_t) -1) 711 new->egid = kegid; 712 if (sgid != (gid_t) -1) 713 new->sgid = ksgid; 714 new->fsgid = new->egid; 715 716 return commit_creds(new); 717 718 error: 719 abort_creds(new); 720 return retval; 721 } 722 723 SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) 724 { 725 const struct cred *cred = current_cred(); 726 int retval; 727 gid_t rgid, egid, sgid; 728 729 rgid = from_kgid_munged(cred->user_ns, cred->gid); 730 egid = from_kgid_munged(cred->user_ns, cred->egid); 731 sgid = from_kgid_munged(cred->user_ns, cred->sgid); 732 733 retval = put_user(rgid, rgidp); 734 if (!retval) { 735 retval = put_user(egid, egidp); 736 if (!retval) 737 retval = put_user(sgid, sgidp); 738 } 739 740 return retval; 741 } 742 743 744 /* 745 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This 746 * is used for "access()" and for the NFS daemon (letting nfsd stay at 747 * whatever uid it wants to). It normally shadows "euid", except when 748 * explicitly set by setfsuid() or for access.. 749 */ 750 SYSCALL_DEFINE1(setfsuid, uid_t, uid) 751 { 752 const struct cred *old; 753 struct cred *new; 754 uid_t old_fsuid; 755 kuid_t kuid; 756 757 old = current_cred(); 758 old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); 759 760 kuid = make_kuid(old->user_ns, uid); 761 if (!uid_valid(kuid)) 762 return old_fsuid; 763 764 new = prepare_creds(); 765 if (!new) 766 return old_fsuid; 767 768 if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || 769 uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || 770 ns_capable(old->user_ns, CAP_SETUID)) { 771 if (!uid_eq(kuid, old->fsuid)) { 772 new->fsuid = kuid; 773 if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) 774 goto change_okay; 775 } 776 } 777 778 abort_creds(new); 779 return old_fsuid; 780 781 change_okay: 782 commit_creds(new); 783 return old_fsuid; 784 } 785 786 /* 787 * Samma på svenska.. 788 */ 789 SYSCALL_DEFINE1(setfsgid, gid_t, gid) 790 { 791 const struct cred *old; 792 struct cred *new; 793 gid_t old_fsgid; 794 kgid_t kgid; 795 796 old = current_cred(); 797 old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); 798 799 kgid = make_kgid(old->user_ns, gid); 800 if (!gid_valid(kgid)) 801 return old_fsgid; 802 803 new = prepare_creds(); 804 if (!new) 805 return old_fsgid; 806 807 if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || 808 gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || 809 ns_capable(old->user_ns, CAP_SETGID)) { 810 if (!gid_eq(kgid, old->fsgid)) { 811 new->fsgid = kgid; 812 goto change_okay; 813 } 814 } 815 816 abort_creds(new); 817 return old_fsgid; 818 819 change_okay: 820 commit_creds(new); 821 return old_fsgid; 822 } 823 #endif /* CONFIG_MULTIUSER */ 824 825 /** 826 * sys_getpid - return the thread group id of the current process 827 * 828 * Note, despite the name, this returns the tgid not the pid. The tgid and 829 * the pid are identical unless CLONE_THREAD was specified on clone() in 830 * which case the tgid is the same in all threads of the same group. 831 * 832 * This is SMP safe as current->tgid does not change. 833 */ 834 SYSCALL_DEFINE0(getpid) 835 { 836 return task_tgid_vnr(current); 837 } 838 839 /* Thread ID - the internal kernel "pid" */ 840 SYSCALL_DEFINE0(gettid) 841 { 842 return task_pid_vnr(current); 843 } 844 845 /* 846 * Accessing ->real_parent is not SMP-safe, it could 847 * change from under us. However, we can use a stale 848 * value of ->real_parent under rcu_read_lock(), see 849 * release_task()->call_rcu(delayed_put_task_struct). 850 */ 851 SYSCALL_DEFINE0(getppid) 852 { 853 int pid; 854 855 rcu_read_lock(); 856 pid = task_tgid_vnr(rcu_dereference(current->real_parent)); 857 rcu_read_unlock(); 858 859 return pid; 860 } 861 862 SYSCALL_DEFINE0(getuid) 863 { 864 /* Only we change this so SMP safe */ 865 return from_kuid_munged(current_user_ns(), current_uid()); 866 } 867 868 SYSCALL_DEFINE0(geteuid) 869 { 870 /* Only we change this so SMP safe */ 871 return from_kuid_munged(current_user_ns(), current_euid()); 872 } 873 874 SYSCALL_DEFINE0(getgid) 875 { 876 /* Only we change this so SMP safe */ 877 return from_kgid_munged(current_user_ns(), current_gid()); 878 } 879 880 SYSCALL_DEFINE0(getegid) 881 { 882 /* Only we change this so SMP safe */ 883 return from_kgid_munged(current_user_ns(), current_egid()); 884 } 885 886 void do_sys_times(struct tms *tms) 887 { 888 u64 tgutime, tgstime, cutime, cstime; 889 890 thread_group_cputime_adjusted(current, &tgutime, &tgstime); 891 cutime = current->signal->cutime; 892 cstime = current->signal->cstime; 893 tms->tms_utime = nsec_to_clock_t(tgutime); 894 tms->tms_stime = nsec_to_clock_t(tgstime); 895 tms->tms_cutime = nsec_to_clock_t(cutime); 896 tms->tms_cstime = nsec_to_clock_t(cstime); 897 } 898 899 SYSCALL_DEFINE1(times, struct tms __user *, tbuf) 900 { 901 if (tbuf) { 902 struct tms tmp; 903 904 do_sys_times(&tmp); 905 if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) 906 return -EFAULT; 907 } 908 force_successful_syscall_return(); 909 return (long) jiffies_64_to_clock_t(get_jiffies_64()); 910 } 911 912 /* 913 * This needs some heavy checking ... 914 * I just haven't the stomach for it. I also don't fully 915 * understand sessions/pgrp etc. Let somebody who does explain it. 916 * 917 * OK, I think I have the protection semantics right.... this is really 918 * only important on a multi-user system anyway, to make sure one user 919 * can't send a signal to a process owned by another. -TYT, 12/12/91 920 * 921 * !PF_FORKNOEXEC check to conform completely to POSIX. 922 */ 923 SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) 924 { 925 struct task_struct *p; 926 struct task_struct *group_leader = current->group_leader; 927 struct pid *pgrp; 928 int err; 929 930 if (!pid) 931 pid = task_pid_vnr(group_leader); 932 if (!pgid) 933 pgid = pid; 934 if (pgid < 0) 935 return -EINVAL; 936 rcu_read_lock(); 937 938 /* From this point forward we keep holding onto the tasklist lock 939 * so that our parent does not change from under us. -DaveM 940 */ 941 write_lock_irq(&tasklist_lock); 942 943 err = -ESRCH; 944 p = find_task_by_vpid(pid); 945 if (!p) 946 goto out; 947 948 err = -EINVAL; 949 if (!thread_group_leader(p)) 950 goto out; 951 952 if (same_thread_group(p->real_parent, group_leader)) { 953 err = -EPERM; 954 if (task_session(p) != task_session(group_leader)) 955 goto out; 956 err = -EACCES; 957 if (!(p->flags & PF_FORKNOEXEC)) 958 goto out; 959 } else { 960 err = -ESRCH; 961 if (p != group_leader) 962 goto out; 963 } 964 965 err = -EPERM; 966 if (p->signal->leader) 967 goto out; 968 969 pgrp = task_pid(p); 970 if (pgid != pid) { 971 struct task_struct *g; 972 973 pgrp = find_vpid(pgid); 974 g = pid_task(pgrp, PIDTYPE_PGID); 975 if (!g || task_session(g) != task_session(group_leader)) 976 goto out; 977 } 978 979 err = security_task_setpgid(p, pgid); 980 if (err) 981 goto out; 982 983 if (task_pgrp(p) != pgrp) 984 change_pid(p, PIDTYPE_PGID, pgrp); 985 986 err = 0; 987 out: 988 /* All paths lead to here, thus we are safe. -DaveM */ 989 write_unlock_irq(&tasklist_lock); 990 rcu_read_unlock(); 991 return err; 992 } 993 994 SYSCALL_DEFINE1(getpgid, pid_t, pid) 995 { 996 struct task_struct *p; 997 struct pid *grp; 998 int retval; 999 1000 rcu_read_lock(); 1001 if (!pid) 1002 grp = task_pgrp(current); 1003 else { 1004 retval = -ESRCH; 1005 p = find_task_by_vpid(pid); 1006 if (!p) 1007 goto out; 1008 grp = task_pgrp(p); 1009 if (!grp) 1010 goto out; 1011 1012 retval = security_task_getpgid(p); 1013 if (retval) 1014 goto out; 1015 } 1016 retval = pid_vnr(grp); 1017 out: 1018 rcu_read_unlock(); 1019 return retval; 1020 } 1021 1022 #ifdef __ARCH_WANT_SYS_GETPGRP 1023 1024 SYSCALL_DEFINE0(getpgrp) 1025 { 1026 return sys_getpgid(0); 1027 } 1028 1029 #endif 1030 1031 SYSCALL_DEFINE1(getsid, pid_t, pid) 1032 { 1033 struct task_struct *p; 1034 struct pid *sid; 1035 int retval; 1036 1037 rcu_read_lock(); 1038 if (!pid) 1039 sid = task_session(current); 1040 else { 1041 retval = -ESRCH; 1042 p = find_task_by_vpid(pid); 1043 if (!p) 1044 goto out; 1045 sid = task_session(p); 1046 if (!sid) 1047 goto out; 1048 1049 retval = security_task_getsid(p); 1050 if (retval) 1051 goto out; 1052 } 1053 retval = pid_vnr(sid); 1054 out: 1055 rcu_read_unlock(); 1056 return retval; 1057 } 1058 1059 static void set_special_pids(struct pid *pid) 1060 { 1061 struct task_struct *curr = current->group_leader; 1062 1063 if (task_session(curr) != pid) 1064 change_pid(curr, PIDTYPE_SID, pid); 1065 1066 if (task_pgrp(curr) != pid) 1067 change_pid(curr, PIDTYPE_PGID, pid); 1068 } 1069 1070 SYSCALL_DEFINE0(setsid) 1071 { 1072 struct task_struct *group_leader = current->group_leader; 1073 struct pid *sid = task_pid(group_leader); 1074 pid_t session = pid_vnr(sid); 1075 int err = -EPERM; 1076 1077 write_lock_irq(&tasklist_lock); 1078 /* Fail if I am already a session leader */ 1079 if (group_leader->signal->leader) 1080 goto out; 1081 1082 /* Fail if a process group id already exists that equals the 1083 * proposed session id. 1084 */ 1085 if (pid_task(sid, PIDTYPE_PGID)) 1086 goto out; 1087 1088 group_leader->signal->leader = 1; 1089 set_special_pids(sid); 1090 1091 proc_clear_tty(group_leader); 1092 1093 err = session; 1094 out: 1095 write_unlock_irq(&tasklist_lock); 1096 if (err > 0) { 1097 proc_sid_connector(group_leader); 1098 sched_autogroup_create_attach(group_leader); 1099 } 1100 return err; 1101 } 1102 1103 DECLARE_RWSEM(uts_sem); 1104 1105 #ifdef COMPAT_UTS_MACHINE 1106 #define override_architecture(name) \ 1107 (personality(current->personality) == PER_LINUX32 && \ 1108 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ 1109 sizeof(COMPAT_UTS_MACHINE))) 1110 #else 1111 #define override_architecture(name) 0 1112 #endif 1113 1114 /* 1115 * Work around broken programs that cannot handle "Linux 3.0". 1116 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 1117 * And we map 4.x to 2.6.60+x, so 4.0 would be 2.6.60. 1118 */ 1119 static int override_release(char __user *release, size_t len) 1120 { 1121 int ret = 0; 1122 1123 if (current->personality & UNAME26) { 1124 const char *rest = UTS_RELEASE; 1125 char buf[65] = { 0 }; 1126 int ndots = 0; 1127 unsigned v; 1128 size_t copy; 1129 1130 while (*rest) { 1131 if (*rest == '.' && ++ndots >= 3) 1132 break; 1133 if (!isdigit(*rest) && *rest != '.') 1134 break; 1135 rest++; 1136 } 1137 v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60; 1138 copy = clamp_t(size_t, len, 1, sizeof(buf)); 1139 copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); 1140 ret = copy_to_user(release, buf, copy + 1); 1141 } 1142 return ret; 1143 } 1144 1145 SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1146 { 1147 int errno = 0; 1148 1149 down_read(&uts_sem); 1150 if (copy_to_user(name, utsname(), sizeof *name)) 1151 errno = -EFAULT; 1152 up_read(&uts_sem); 1153 1154 if (!errno && override_release(name->release, sizeof(name->release))) 1155 errno = -EFAULT; 1156 if (!errno && override_architecture(name)) 1157 errno = -EFAULT; 1158 return errno; 1159 } 1160 1161 #ifdef __ARCH_WANT_SYS_OLD_UNAME 1162 /* 1163 * Old cruft 1164 */ 1165 SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) 1166 { 1167 int error = 0; 1168 1169 if (!name) 1170 return -EFAULT; 1171 1172 down_read(&uts_sem); 1173 if (copy_to_user(name, utsname(), sizeof(*name))) 1174 error = -EFAULT; 1175 up_read(&uts_sem); 1176 1177 if (!error && override_release(name->release, sizeof(name->release))) 1178 error = -EFAULT; 1179 if (!error && override_architecture(name)) 1180 error = -EFAULT; 1181 return error; 1182 } 1183 1184 SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) 1185 { 1186 int error; 1187 1188 if (!name) 1189 return -EFAULT; 1190 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) 1191 return -EFAULT; 1192 1193 down_read(&uts_sem); 1194 error = __copy_to_user(&name->sysname, &utsname()->sysname, 1195 __OLD_UTS_LEN); 1196 error |= __put_user(0, name->sysname + __OLD_UTS_LEN); 1197 error |= __copy_to_user(&name->nodename, &utsname()->nodename, 1198 __OLD_UTS_LEN); 1199 error |= __put_user(0, name->nodename + __OLD_UTS_LEN); 1200 error |= __copy_to_user(&name->release, &utsname()->release, 1201 __OLD_UTS_LEN); 1202 error |= __put_user(0, name->release + __OLD_UTS_LEN); 1203 error |= __copy_to_user(&name->version, &utsname()->version, 1204 __OLD_UTS_LEN); 1205 error |= __put_user(0, name->version + __OLD_UTS_LEN); 1206 error |= __copy_to_user(&name->machine, &utsname()->machine, 1207 __OLD_UTS_LEN); 1208 error |= __put_user(0, name->machine + __OLD_UTS_LEN); 1209 up_read(&uts_sem); 1210 1211 if (!error && override_architecture(name)) 1212 error = -EFAULT; 1213 if (!error && override_release(name->release, sizeof(name->release))) 1214 error = -EFAULT; 1215 return error ? -EFAULT : 0; 1216 } 1217 #endif 1218 1219 SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1220 { 1221 int errno; 1222 char tmp[__NEW_UTS_LEN]; 1223 1224 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1225 return -EPERM; 1226 1227 if (len < 0 || len > __NEW_UTS_LEN) 1228 return -EINVAL; 1229 down_write(&uts_sem); 1230 errno = -EFAULT; 1231 if (!copy_from_user(tmp, name, len)) { 1232 struct new_utsname *u = utsname(); 1233 1234 memcpy(u->nodename, tmp, len); 1235 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1236 errno = 0; 1237 uts_proc_notify(UTS_PROC_HOSTNAME); 1238 } 1239 up_write(&uts_sem); 1240 return errno; 1241 } 1242 1243 #ifdef __ARCH_WANT_SYS_GETHOSTNAME 1244 1245 SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) 1246 { 1247 int i, errno; 1248 struct new_utsname *u; 1249 1250 if (len < 0) 1251 return -EINVAL; 1252 down_read(&uts_sem); 1253 u = utsname(); 1254 i = 1 + strlen(u->nodename); 1255 if (i > len) 1256 i = len; 1257 errno = 0; 1258 if (copy_to_user(name, u->nodename, i)) 1259 errno = -EFAULT; 1260 up_read(&uts_sem); 1261 return errno; 1262 } 1263 1264 #endif 1265 1266 /* 1267 * Only setdomainname; getdomainname can be implemented by calling 1268 * uname() 1269 */ 1270 SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) 1271 { 1272 int errno; 1273 char tmp[__NEW_UTS_LEN]; 1274 1275 if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) 1276 return -EPERM; 1277 if (len < 0 || len > __NEW_UTS_LEN) 1278 return -EINVAL; 1279 1280 down_write(&uts_sem); 1281 errno = -EFAULT; 1282 if (!copy_from_user(tmp, name, len)) { 1283 struct new_utsname *u = utsname(); 1284 1285 memcpy(u->domainname, tmp, len); 1286 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1287 errno = 0; 1288 uts_proc_notify(UTS_PROC_DOMAINNAME); 1289 } 1290 up_write(&uts_sem); 1291 return errno; 1292 } 1293 1294 SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1295 { 1296 struct rlimit value; 1297 int ret; 1298 1299 ret = do_prlimit(current, resource, NULL, &value); 1300 if (!ret) 1301 ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; 1302 1303 return ret; 1304 } 1305 1306 #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT 1307 1308 /* 1309 * Back compatibility for getrlimit. Needed for some apps. 1310 */ 1311 SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, 1312 struct rlimit __user *, rlim) 1313 { 1314 struct rlimit x; 1315 if (resource >= RLIM_NLIMITS) 1316 return -EINVAL; 1317 1318 task_lock(current->group_leader); 1319 x = current->signal->rlim[resource]; 1320 task_unlock(current->group_leader); 1321 if (x.rlim_cur > 0x7FFFFFFF) 1322 x.rlim_cur = 0x7FFFFFFF; 1323 if (x.rlim_max > 0x7FFFFFFF) 1324 x.rlim_max = 0x7FFFFFFF; 1325 return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; 1326 } 1327 1328 #endif 1329 1330 static inline bool rlim64_is_infinity(__u64 rlim64) 1331 { 1332 #if BITS_PER_LONG < 64 1333 return rlim64 >= ULONG_MAX; 1334 #else 1335 return rlim64 == RLIM64_INFINITY; 1336 #endif 1337 } 1338 1339 static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) 1340 { 1341 if (rlim->rlim_cur == RLIM_INFINITY) 1342 rlim64->rlim_cur = RLIM64_INFINITY; 1343 else 1344 rlim64->rlim_cur = rlim->rlim_cur; 1345 if (rlim->rlim_max == RLIM_INFINITY) 1346 rlim64->rlim_max = RLIM64_INFINITY; 1347 else 1348 rlim64->rlim_max = rlim->rlim_max; 1349 } 1350 1351 static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) 1352 { 1353 if (rlim64_is_infinity(rlim64->rlim_cur)) 1354 rlim->rlim_cur = RLIM_INFINITY; 1355 else 1356 rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; 1357 if (rlim64_is_infinity(rlim64->rlim_max)) 1358 rlim->rlim_max = RLIM_INFINITY; 1359 else 1360 rlim->rlim_max = (unsigned long)rlim64->rlim_max; 1361 } 1362 1363 /* make sure you are allowed to change @tsk limits before calling this */ 1364 int do_prlimit(struct task_struct *tsk, unsigned int resource, 1365 struct rlimit *new_rlim, struct rlimit *old_rlim) 1366 { 1367 struct rlimit *rlim; 1368 int retval = 0; 1369 1370 if (resource >= RLIM_NLIMITS) 1371 return -EINVAL; 1372 if (new_rlim) { 1373 if (new_rlim->rlim_cur > new_rlim->rlim_max) 1374 return -EINVAL; 1375 if (resource == RLIMIT_NOFILE && 1376 new_rlim->rlim_max > sysctl_nr_open) 1377 return -EPERM; 1378 } 1379 1380 /* protect tsk->signal and tsk->sighand from disappearing */ 1381 read_lock(&tasklist_lock); 1382 if (!tsk->sighand) { 1383 retval = -ESRCH; 1384 goto out; 1385 } 1386 1387 rlim = tsk->signal->rlim + resource; 1388 task_lock(tsk->group_leader); 1389 if (new_rlim) { 1390 /* Keep the capable check against init_user_ns until 1391 cgroups can contain all limits */ 1392 if (new_rlim->rlim_max > rlim->rlim_max && 1393 !capable(CAP_SYS_RESOURCE)) 1394 retval = -EPERM; 1395 if (!retval) 1396 retval = security_task_setrlimit(tsk->group_leader, 1397 resource, new_rlim); 1398 if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) { 1399 /* 1400 * The caller is asking for an immediate RLIMIT_CPU 1401 * expiry. But we use the zero value to mean "it was 1402 * never set". So let's cheat and make it one second 1403 * instead 1404 */ 1405 new_rlim->rlim_cur = 1; 1406 } 1407 } 1408 if (!retval) { 1409 if (old_rlim) 1410 *old_rlim = *rlim; 1411 if (new_rlim) 1412 *rlim = *new_rlim; 1413 } 1414 task_unlock(tsk->group_leader); 1415 1416 /* 1417 * RLIMIT_CPU handling. Note that the kernel fails to return an error 1418 * code if it rejected the user's attempt to set RLIMIT_CPU. This is a 1419 * very long-standing error, and fixing it now risks breakage of 1420 * applications, so we live with it 1421 */ 1422 if (!retval && new_rlim && resource == RLIMIT_CPU && 1423 new_rlim->rlim_cur != RLIM_INFINITY && 1424 IS_ENABLED(CONFIG_POSIX_TIMERS)) 1425 update_rlimit_cpu(tsk, new_rlim->rlim_cur); 1426 out: 1427 read_unlock(&tasklist_lock); 1428 return retval; 1429 } 1430 1431 /* rcu lock must be held */ 1432 static int check_prlimit_permission(struct task_struct *task) 1433 { 1434 const struct cred *cred = current_cred(), *tcred; 1435 1436 if (current == task) 1437 return 0; 1438 1439 tcred = __task_cred(task); 1440 if (uid_eq(cred->uid, tcred->euid) && 1441 uid_eq(cred->uid, tcred->suid) && 1442 uid_eq(cred->uid, tcred->uid) && 1443 gid_eq(cred->gid, tcred->egid) && 1444 gid_eq(cred->gid, tcred->sgid) && 1445 gid_eq(cred->gid, tcred->gid)) 1446 return 0; 1447 if (ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) 1448 return 0; 1449 1450 return -EPERM; 1451 } 1452 1453 SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, 1454 const struct rlimit64 __user *, new_rlim, 1455 struct rlimit64 __user *, old_rlim) 1456 { 1457 struct rlimit64 old64, new64; 1458 struct rlimit old, new; 1459 struct task_struct *tsk; 1460 int ret; 1461 1462 if (new_rlim) { 1463 if (copy_from_user(&new64, new_rlim, sizeof(new64))) 1464 return -EFAULT; 1465 rlim64_to_rlim(&new64, &new); 1466 } 1467 1468 rcu_read_lock(); 1469 tsk = pid ? find_task_by_vpid(pid) : current; 1470 if (!tsk) { 1471 rcu_read_unlock(); 1472 return -ESRCH; 1473 } 1474 ret = check_prlimit_permission(tsk); 1475 if (ret) { 1476 rcu_read_unlock(); 1477 return ret; 1478 } 1479 get_task_struct(tsk); 1480 rcu_read_unlock(); 1481 1482 ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, 1483 old_rlim ? &old : NULL); 1484 1485 if (!ret && old_rlim) { 1486 rlim_to_rlim64(&old, &old64); 1487 if (copy_to_user(old_rlim, &old64, sizeof(old64))) 1488 ret = -EFAULT; 1489 } 1490 1491 put_task_struct(tsk); 1492 return ret; 1493 } 1494 1495 SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) 1496 { 1497 struct rlimit new_rlim; 1498 1499 if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) 1500 return -EFAULT; 1501 return do_prlimit(current, resource, &new_rlim, NULL); 1502 } 1503 1504 /* 1505 * It would make sense to put struct rusage in the task_struct, 1506 * except that would make the task_struct be *really big*. After 1507 * task_struct gets moved into malloc'ed memory, it would 1508 * make sense to do this. It will make moving the rest of the information 1509 * a lot simpler! (Which we're not doing right now because we're not 1510 * measuring them yet). 1511 * 1512 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have 1513 * races with threads incrementing their own counters. But since word 1514 * reads are atomic, we either get new values or old values and we don't 1515 * care which for the sums. We always take the siglock to protect reading 1516 * the c* fields from p->signal from races with exit.c updating those 1517 * fields when reaping, so a sample either gets all the additions of a 1518 * given child after it's reaped, or none so this sample is before reaping. 1519 * 1520 * Locking: 1521 * We need to take the siglock for CHILDEREN, SELF and BOTH 1522 * for the cases current multithreaded, non-current single threaded 1523 * non-current multithreaded. Thread traversal is now safe with 1524 * the siglock held. 1525 * Strictly speaking, we donot need to take the siglock if we are current and 1526 * single threaded, as no one else can take our signal_struct away, no one 1527 * else can reap the children to update signal->c* counters, and no one else 1528 * can race with the signal-> fields. If we do not take any lock, the 1529 * signal-> fields could be read out of order while another thread was just 1530 * exiting. So we should place a read memory barrier when we avoid the lock. 1531 * On the writer side, write memory barrier is implied in __exit_signal 1532 * as __exit_signal releases the siglock spinlock after updating the signal-> 1533 * fields. But we don't do this yet to keep things simple. 1534 * 1535 */ 1536 1537 static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) 1538 { 1539 r->ru_nvcsw += t->nvcsw; 1540 r->ru_nivcsw += t->nivcsw; 1541 r->ru_minflt += t->min_flt; 1542 r->ru_majflt += t->maj_flt; 1543 r->ru_inblock += task_io_get_inblock(t); 1544 r->ru_oublock += task_io_get_oublock(t); 1545 } 1546 1547 static void k_getrusage(struct task_struct *p, int who, struct rusage *r) 1548 { 1549 struct task_struct *t; 1550 unsigned long flags; 1551 u64 tgutime, tgstime, utime, stime; 1552 unsigned long maxrss = 0; 1553 1554 memset((char *)r, 0, sizeof (*r)); 1555 utime = stime = 0; 1556 1557 if (who == RUSAGE_THREAD) { 1558 task_cputime_adjusted(current, &utime, &stime); 1559 accumulate_thread_rusage(p, r); 1560 maxrss = p->signal->maxrss; 1561 goto out; 1562 } 1563 1564 if (!lock_task_sighand(p, &flags)) 1565 return; 1566 1567 switch (who) { 1568 case RUSAGE_BOTH: 1569 case RUSAGE_CHILDREN: 1570 utime = p->signal->cutime; 1571 stime = p->signal->cstime; 1572 r->ru_nvcsw = p->signal->cnvcsw; 1573 r->ru_nivcsw = p->signal->cnivcsw; 1574 r->ru_minflt = p->signal->cmin_flt; 1575 r->ru_majflt = p->signal->cmaj_flt; 1576 r->ru_inblock = p->signal->cinblock; 1577 r->ru_oublock = p->signal->coublock; 1578 maxrss = p->signal->cmaxrss; 1579 1580 if (who == RUSAGE_CHILDREN) 1581 break; 1582 1583 case RUSAGE_SELF: 1584 thread_group_cputime_adjusted(p, &tgutime, &tgstime); 1585 utime += tgutime; 1586 stime += tgstime; 1587 r->ru_nvcsw += p->signal->nvcsw; 1588 r->ru_nivcsw += p->signal->nivcsw; 1589 r->ru_minflt += p->signal->min_flt; 1590 r->ru_majflt += p->signal->maj_flt; 1591 r->ru_inblock += p->signal->inblock; 1592 r->ru_oublock += p->signal->oublock; 1593 if (maxrss < p->signal->maxrss) 1594 maxrss = p->signal->maxrss; 1595 t = p; 1596 do { 1597 accumulate_thread_rusage(t, r); 1598 } while_each_thread(p, t); 1599 break; 1600 1601 default: 1602 BUG(); 1603 } 1604 unlock_task_sighand(p, &flags); 1605 1606 out: 1607 r->ru_utime = ns_to_timeval(utime); 1608 r->ru_stime = ns_to_timeval(stime); 1609 1610 if (who != RUSAGE_CHILDREN) { 1611 struct mm_struct *mm = get_task_mm(p); 1612 1613 if (mm) { 1614 setmax_mm_hiwater_rss(&maxrss, mm); 1615 mmput(mm); 1616 } 1617 } 1618 r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ 1619 } 1620 1621 int getrusage(struct task_struct *p, int who, struct rusage __user *ru) 1622 { 1623 struct rusage r; 1624 1625 k_getrusage(p, who, &r); 1626 return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; 1627 } 1628 1629 SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) 1630 { 1631 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1632 who != RUSAGE_THREAD) 1633 return -EINVAL; 1634 return getrusage(current, who, ru); 1635 } 1636 1637 #ifdef CONFIG_COMPAT 1638 COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) 1639 { 1640 struct rusage r; 1641 1642 if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && 1643 who != RUSAGE_THREAD) 1644 return -EINVAL; 1645 1646 k_getrusage(current, who, &r); 1647 return put_compat_rusage(&r, ru); 1648 } 1649 #endif 1650 1651 SYSCALL_DEFINE1(umask, int, mask) 1652 { 1653 mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); 1654 return mask; 1655 } 1656 1657 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1658 { 1659 struct fd exe; 1660 struct file *old_exe, *exe_file; 1661 struct inode *inode; 1662 int err; 1663 1664 exe = fdget(fd); 1665 if (!exe.file) 1666 return -EBADF; 1667 1668 inode = file_inode(exe.file); 1669 1670 /* 1671 * Because the original mm->exe_file points to executable file, make 1672 * sure that this one is executable as well, to avoid breaking an 1673 * overall picture. 1674 */ 1675 err = -EACCES; 1676 if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) 1677 goto exit; 1678 1679 err = inode_permission(inode, MAY_EXEC); 1680 if (err) 1681 goto exit; 1682 1683 /* 1684 * Forbid mm->exe_file change if old file still mapped. 1685 */ 1686 exe_file = get_mm_exe_file(mm); 1687 err = -EBUSY; 1688 if (exe_file) { 1689 struct vm_area_struct *vma; 1690 1691 down_read(&mm->mmap_sem); 1692 for (vma = mm->mmap; vma; vma = vma->vm_next) { 1693 if (!vma->vm_file) 1694 continue; 1695 if (path_equal(&vma->vm_file->f_path, 1696 &exe_file->f_path)) 1697 goto exit_err; 1698 } 1699 1700 up_read(&mm->mmap_sem); 1701 fput(exe_file); 1702 } 1703 1704 err = 0; 1705 /* set the new file, lockless */ 1706 get_file(exe.file); 1707 old_exe = xchg(&mm->exe_file, exe.file); 1708 if (old_exe) 1709 fput(old_exe); 1710 exit: 1711 fdput(exe); 1712 return err; 1713 exit_err: 1714 up_read(&mm->mmap_sem); 1715 fput(exe_file); 1716 goto exit; 1717 } 1718 1719 /* 1720 * WARNING: we don't require any capability here so be very careful 1721 * in what is allowed for modification from userspace. 1722 */ 1723 static int validate_prctl_map(struct prctl_mm_map *prctl_map) 1724 { 1725 unsigned long mmap_max_addr = TASK_SIZE; 1726 struct mm_struct *mm = current->mm; 1727 int error = -EINVAL, i; 1728 1729 static const unsigned char offsets[] = { 1730 offsetof(struct prctl_mm_map, start_code), 1731 offsetof(struct prctl_mm_map, end_code), 1732 offsetof(struct prctl_mm_map, start_data), 1733 offsetof(struct prctl_mm_map, end_data), 1734 offsetof(struct prctl_mm_map, start_brk), 1735 offsetof(struct prctl_mm_map, brk), 1736 offsetof(struct prctl_mm_map, start_stack), 1737 offsetof(struct prctl_mm_map, arg_start), 1738 offsetof(struct prctl_mm_map, arg_end), 1739 offsetof(struct prctl_mm_map, env_start), 1740 offsetof(struct prctl_mm_map, env_end), 1741 }; 1742 1743 /* 1744 * Make sure the members are not somewhere outside 1745 * of allowed address space. 1746 */ 1747 for (i = 0; i < ARRAY_SIZE(offsets); i++) { 1748 u64 val = *(u64 *)((char *)prctl_map + offsets[i]); 1749 1750 if ((unsigned long)val >= mmap_max_addr || 1751 (unsigned long)val < mmap_min_addr) 1752 goto out; 1753 } 1754 1755 /* 1756 * Make sure the pairs are ordered. 1757 */ 1758 #define __prctl_check_order(__m1, __op, __m2) \ 1759 ((unsigned long)prctl_map->__m1 __op \ 1760 (unsigned long)prctl_map->__m2) ? 0 : -EINVAL 1761 error = __prctl_check_order(start_code, <, end_code); 1762 error |= __prctl_check_order(start_data, <, end_data); 1763 error |= __prctl_check_order(start_brk, <=, brk); 1764 error |= __prctl_check_order(arg_start, <=, arg_end); 1765 error |= __prctl_check_order(env_start, <=, env_end); 1766 if (error) 1767 goto out; 1768 #undef __prctl_check_order 1769 1770 error = -EINVAL; 1771 1772 /* 1773 * @brk should be after @end_data in traditional maps. 1774 */ 1775 if (prctl_map->start_brk <= prctl_map->end_data || 1776 prctl_map->brk <= prctl_map->end_data) 1777 goto out; 1778 1779 /* 1780 * Neither we should allow to override limits if they set. 1781 */ 1782 if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, 1783 prctl_map->start_brk, prctl_map->end_data, 1784 prctl_map->start_data)) 1785 goto out; 1786 1787 /* 1788 * Someone is trying to cheat the auxv vector. 1789 */ 1790 if (prctl_map->auxv_size) { 1791 if (!prctl_map->auxv || prctl_map->auxv_size > sizeof(mm->saved_auxv)) 1792 goto out; 1793 } 1794 1795 /* 1796 * Finally, make sure the caller has the rights to 1797 * change /proc/pid/exe link: only local root should 1798 * be allowed to. 1799 */ 1800 if (prctl_map->exe_fd != (u32)-1) { 1801 struct user_namespace *ns = current_user_ns(); 1802 const struct cred *cred = current_cred(); 1803 1804 if (!uid_eq(cred->uid, make_kuid(ns, 0)) || 1805 !gid_eq(cred->gid, make_kgid(ns, 0))) 1806 goto out; 1807 } 1808 1809 error = 0; 1810 out: 1811 return error; 1812 } 1813 1814 #ifdef CONFIG_CHECKPOINT_RESTORE 1815 static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) 1816 { 1817 struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; 1818 unsigned long user_auxv[AT_VECTOR_SIZE]; 1819 struct mm_struct *mm = current->mm; 1820 int error; 1821 1822 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1823 BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); 1824 1825 if (opt == PR_SET_MM_MAP_SIZE) 1826 return put_user((unsigned int)sizeof(prctl_map), 1827 (unsigned int __user *)addr); 1828 1829 if (data_size != sizeof(prctl_map)) 1830 return -EINVAL; 1831 1832 if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) 1833 return -EFAULT; 1834 1835 error = validate_prctl_map(&prctl_map); 1836 if (error) 1837 return error; 1838 1839 if (prctl_map.auxv_size) { 1840 memset(user_auxv, 0, sizeof(user_auxv)); 1841 if (copy_from_user(user_auxv, 1842 (const void __user *)prctl_map.auxv, 1843 prctl_map.auxv_size)) 1844 return -EFAULT; 1845 1846 /* Last entry must be AT_NULL as specification requires */ 1847 user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; 1848 user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; 1849 } 1850 1851 if (prctl_map.exe_fd != (u32)-1) { 1852 error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); 1853 if (error) 1854 return error; 1855 } 1856 1857 down_write(&mm->mmap_sem); 1858 1859 /* 1860 * We don't validate if these members are pointing to 1861 * real present VMAs because application may have correspond 1862 * VMAs already unmapped and kernel uses these members for statistics 1863 * output in procfs mostly, except 1864 * 1865 * - @start_brk/@brk which are used in do_brk but kernel lookups 1866 * for VMAs when updating these memvers so anything wrong written 1867 * here cause kernel to swear at userspace program but won't lead 1868 * to any problem in kernel itself 1869 */ 1870 1871 mm->start_code = prctl_map.start_code; 1872 mm->end_code = prctl_map.end_code; 1873 mm->start_data = prctl_map.start_data; 1874 mm->end_data = prctl_map.end_data; 1875 mm->start_brk = prctl_map.start_brk; 1876 mm->brk = prctl_map.brk; 1877 mm->start_stack = prctl_map.start_stack; 1878 mm->arg_start = prctl_map.arg_start; 1879 mm->arg_end = prctl_map.arg_end; 1880 mm->env_start = prctl_map.env_start; 1881 mm->env_end = prctl_map.env_end; 1882 1883 /* 1884 * Note this update of @saved_auxv is lockless thus 1885 * if someone reads this member in procfs while we're 1886 * updating -- it may get partly updated results. It's 1887 * known and acceptable trade off: we leave it as is to 1888 * not introduce additional locks here making the kernel 1889 * more complex. 1890 */ 1891 if (prctl_map.auxv_size) 1892 memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); 1893 1894 up_write(&mm->mmap_sem); 1895 return 0; 1896 } 1897 #endif /* CONFIG_CHECKPOINT_RESTORE */ 1898 1899 static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, 1900 unsigned long len) 1901 { 1902 /* 1903 * This doesn't move the auxiliary vector itself since it's pinned to 1904 * mm_struct, but it permits filling the vector with new values. It's 1905 * up to the caller to provide sane values here, otherwise userspace 1906 * tools which use this vector might be unhappy. 1907 */ 1908 unsigned long user_auxv[AT_VECTOR_SIZE]; 1909 1910 if (len > sizeof(user_auxv)) 1911 return -EINVAL; 1912 1913 if (copy_from_user(user_auxv, (const void __user *)addr, len)) 1914 return -EFAULT; 1915 1916 /* Make sure the last entry is always AT_NULL */ 1917 user_auxv[AT_VECTOR_SIZE - 2] = 0; 1918 user_auxv[AT_VECTOR_SIZE - 1] = 0; 1919 1920 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); 1921 1922 task_lock(current); 1923 memcpy(mm->saved_auxv, user_auxv, len); 1924 task_unlock(current); 1925 1926 return 0; 1927 } 1928 1929 static int prctl_set_mm(int opt, unsigned long addr, 1930 unsigned long arg4, unsigned long arg5) 1931 { 1932 struct mm_struct *mm = current->mm; 1933 struct prctl_mm_map prctl_map; 1934 struct vm_area_struct *vma; 1935 int error; 1936 1937 if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && 1938 opt != PR_SET_MM_MAP && 1939 opt != PR_SET_MM_MAP_SIZE))) 1940 return -EINVAL; 1941 1942 #ifdef CONFIG_CHECKPOINT_RESTORE 1943 if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) 1944 return prctl_set_mm_map(opt, (const void __user *)addr, arg4); 1945 #endif 1946 1947 if (!capable(CAP_SYS_RESOURCE)) 1948 return -EPERM; 1949 1950 if (opt == PR_SET_MM_EXE_FILE) 1951 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1952 1953 if (opt == PR_SET_MM_AUXV) 1954 return prctl_set_auxv(mm, addr, arg4); 1955 1956 if (addr >= TASK_SIZE || addr < mmap_min_addr) 1957 return -EINVAL; 1958 1959 error = -EINVAL; 1960 1961 down_write(&mm->mmap_sem); 1962 vma = find_vma(mm, addr); 1963 1964 prctl_map.start_code = mm->start_code; 1965 prctl_map.end_code = mm->end_code; 1966 prctl_map.start_data = mm->start_data; 1967 prctl_map.end_data = mm->end_data; 1968 prctl_map.start_brk = mm->start_brk; 1969 prctl_map.brk = mm->brk; 1970 prctl_map.start_stack = mm->start_stack; 1971 prctl_map.arg_start = mm->arg_start; 1972 prctl_map.arg_end = mm->arg_end; 1973 prctl_map.env_start = mm->env_start; 1974 prctl_map.env_end = mm->env_end; 1975 prctl_map.auxv = NULL; 1976 prctl_map.auxv_size = 0; 1977 prctl_map.exe_fd = -1; 1978 1979 switch (opt) { 1980 case PR_SET_MM_START_CODE: 1981 prctl_map.start_code = addr; 1982 break; 1983 case PR_SET_MM_END_CODE: 1984 prctl_map.end_code = addr; 1985 break; 1986 case PR_SET_MM_START_DATA: 1987 prctl_map.start_data = addr; 1988 break; 1989 case PR_SET_MM_END_DATA: 1990 prctl_map.end_data = addr; 1991 break; 1992 case PR_SET_MM_START_STACK: 1993 prctl_map.start_stack = addr; 1994 break; 1995 case PR_SET_MM_START_BRK: 1996 prctl_map.start_brk = addr; 1997 break; 1998 case PR_SET_MM_BRK: 1999 prctl_map.brk = addr; 2000 break; 2001 case PR_SET_MM_ARG_START: 2002 prctl_map.arg_start = addr; 2003 break; 2004 case PR_SET_MM_ARG_END: 2005 prctl_map.arg_end = addr; 2006 break; 2007 case PR_SET_MM_ENV_START: 2008 prctl_map.env_start = addr; 2009 break; 2010 case PR_SET_MM_ENV_END: 2011 prctl_map.env_end = addr; 2012 break; 2013 default: 2014 goto out; 2015 } 2016 2017 error = validate_prctl_map(&prctl_map); 2018 if (error) 2019 goto out; 2020 2021 switch (opt) { 2022 /* 2023 * If command line arguments and environment 2024 * are placed somewhere else on stack, we can 2025 * set them up here, ARG_START/END to setup 2026 * command line argumets and ENV_START/END 2027 * for environment. 2028 */ 2029 case PR_SET_MM_START_STACK: 2030 case PR_SET_MM_ARG_START: 2031 case PR_SET_MM_ARG_END: 2032 case PR_SET_MM_ENV_START: 2033 case PR_SET_MM_ENV_END: 2034 if (!vma) { 2035 error = -EFAULT; 2036 goto out; 2037 } 2038 } 2039 2040 mm->start_code = prctl_map.start_code; 2041 mm->end_code = prctl_map.end_code; 2042 mm->start_data = prctl_map.start_data; 2043 mm->end_data = prctl_map.end_data; 2044 mm->start_brk = prctl_map.start_brk; 2045 mm->brk = prctl_map.brk; 2046 mm->start_stack = prctl_map.start_stack; 2047 mm->arg_start = prctl_map.arg_start; 2048 mm->arg_end = prctl_map.arg_end; 2049 mm->env_start = prctl_map.env_start; 2050 mm->env_end = prctl_map.env_end; 2051 2052 error = 0; 2053 out: 2054 up_write(&mm->mmap_sem); 2055 return error; 2056 } 2057 2058 #ifdef CONFIG_CHECKPOINT_RESTORE 2059 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2060 { 2061 return put_user(me->clear_child_tid, tid_addr); 2062 } 2063 #else 2064 static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) 2065 { 2066 return -EINVAL; 2067 } 2068 #endif 2069 2070 static int propagate_has_child_subreaper(struct task_struct *p, void *data) 2071 { 2072 /* 2073 * If task has has_child_subreaper - all its decendants 2074 * already have these flag too and new decendants will 2075 * inherit it on fork, skip them. 2076 * 2077 * If we've found child_reaper - skip descendants in 2078 * it's subtree as they will never get out pidns. 2079 */ 2080 if (p->signal->has_child_subreaper || 2081 is_child_reaper(task_pid(p))) 2082 return 0; 2083 2084 p->signal->has_child_subreaper = 1; 2085 return 1; 2086 } 2087 2088 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 2089 unsigned long, arg4, unsigned long, arg5) 2090 { 2091 struct task_struct *me = current; 2092 unsigned char comm[sizeof(me->comm)]; 2093 long error; 2094 2095 error = security_task_prctl(option, arg2, arg3, arg4, arg5); 2096 if (error != -ENOSYS) 2097 return error; 2098 2099 error = 0; 2100 switch (option) { 2101 case PR_SET_PDEATHSIG: 2102 if (!valid_signal(arg2)) { 2103 error = -EINVAL; 2104 break; 2105 } 2106 me->pdeath_signal = arg2; 2107 break; 2108 case PR_GET_PDEATHSIG: 2109 error = put_user(me->pdeath_signal, (int __user *)arg2); 2110 break; 2111 case PR_GET_DUMPABLE: 2112 error = get_dumpable(me->mm); 2113 break; 2114 case PR_SET_DUMPABLE: 2115 if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { 2116 error = -EINVAL; 2117 break; 2118 } 2119 set_dumpable(me->mm, arg2); 2120 break; 2121 2122 case PR_SET_UNALIGN: 2123 error = SET_UNALIGN_CTL(me, arg2); 2124 break; 2125 case PR_GET_UNALIGN: 2126 error = GET_UNALIGN_CTL(me, arg2); 2127 break; 2128 case PR_SET_FPEMU: 2129 error = SET_FPEMU_CTL(me, arg2); 2130 break; 2131 case PR_GET_FPEMU: 2132 error = GET_FPEMU_CTL(me, arg2); 2133 break; 2134 case PR_SET_FPEXC: 2135 error = SET_FPEXC_CTL(me, arg2); 2136 break; 2137 case PR_GET_FPEXC: 2138 error = GET_FPEXC_CTL(me, arg2); 2139 break; 2140 case PR_GET_TIMING: 2141 error = PR_TIMING_STATISTICAL; 2142 break; 2143 case PR_SET_TIMING: 2144 if (arg2 != PR_TIMING_STATISTICAL) 2145 error = -EINVAL; 2146 break; 2147 case PR_SET_NAME: 2148 comm[sizeof(me->comm) - 1] = 0; 2149 if (strncpy_from_user(comm, (char __user *)arg2, 2150 sizeof(me->comm) - 1) < 0) 2151 return -EFAULT; 2152 set_task_comm(me, comm); 2153 proc_comm_connector(me); 2154 break; 2155 case PR_GET_NAME: 2156 get_task_comm(comm, me); 2157 if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) 2158 return -EFAULT; 2159 break; 2160 case PR_GET_ENDIAN: 2161 error = GET_ENDIAN(me, arg2); 2162 break; 2163 case PR_SET_ENDIAN: 2164 error = SET_ENDIAN(me, arg2); 2165 break; 2166 case PR_GET_SECCOMP: 2167 error = prctl_get_seccomp(); 2168 break; 2169 case PR_SET_SECCOMP: 2170 error = prctl_set_seccomp(arg2, (char __user *)arg3); 2171 break; 2172 case PR_GET_TSC: 2173 error = GET_TSC_CTL(arg2); 2174 break; 2175 case PR_SET_TSC: 2176 error = SET_TSC_CTL(arg2); 2177 break; 2178 case PR_TASK_PERF_EVENTS_DISABLE: 2179 error = perf_event_task_disable(); 2180 break; 2181 case PR_TASK_PERF_EVENTS_ENABLE: 2182 error = perf_event_task_enable(); 2183 break; 2184 case PR_GET_TIMERSLACK: 2185 if (current->timer_slack_ns > ULONG_MAX) 2186 error = ULONG_MAX; 2187 else 2188 error = current->timer_slack_ns; 2189 break; 2190 case PR_SET_TIMERSLACK: 2191 if (arg2 <= 0) 2192 current->timer_slack_ns = 2193 current->default_timer_slack_ns; 2194 else 2195 current->timer_slack_ns = arg2; 2196 break; 2197 case PR_MCE_KILL: 2198 if (arg4 | arg5) 2199 return -EINVAL; 2200 switch (arg2) { 2201 case PR_MCE_KILL_CLEAR: 2202 if (arg3 != 0) 2203 return -EINVAL; 2204 current->flags &= ~PF_MCE_PROCESS; 2205 break; 2206 case PR_MCE_KILL_SET: 2207 current->flags |= PF_MCE_PROCESS; 2208 if (arg3 == PR_MCE_KILL_EARLY) 2209 current->flags |= PF_MCE_EARLY; 2210 else if (arg3 == PR_MCE_KILL_LATE) 2211 current->flags &= ~PF_MCE_EARLY; 2212 else if (arg3 == PR_MCE_KILL_DEFAULT) 2213 current->flags &= 2214 ~(PF_MCE_EARLY|PF_MCE_PROCESS); 2215 else 2216 return -EINVAL; 2217 break; 2218 default: 2219 return -EINVAL; 2220 } 2221 break; 2222 case PR_MCE_KILL_GET: 2223 if (arg2 | arg3 | arg4 | arg5) 2224 return -EINVAL; 2225 if (current->flags & PF_MCE_PROCESS) 2226 error = (current->flags & PF_MCE_EARLY) ? 2227 PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; 2228 else 2229 error = PR_MCE_KILL_DEFAULT; 2230 break; 2231 case PR_SET_MM: 2232 error = prctl_set_mm(arg2, arg3, arg4, arg5); 2233 break; 2234 case PR_GET_TID_ADDRESS: 2235 error = prctl_get_tid_address(me, (int __user **)arg2); 2236 break; 2237 case PR_SET_CHILD_SUBREAPER: 2238 me->signal->is_child_subreaper = !!arg2; 2239 if (!arg2) 2240 break; 2241 2242 walk_process_tree(me, propagate_has_child_subreaper, NULL); 2243 break; 2244 case PR_GET_CHILD_SUBREAPER: 2245 error = put_user(me->signal->is_child_subreaper, 2246 (int __user *)arg2); 2247 break; 2248 case PR_SET_NO_NEW_PRIVS: 2249 if (arg2 != 1 || arg3 || arg4 || arg5) 2250 return -EINVAL; 2251 2252 task_set_no_new_privs(current); 2253 break; 2254 case PR_GET_NO_NEW_PRIVS: 2255 if (arg2 || arg3 || arg4 || arg5) 2256 return -EINVAL; 2257 return task_no_new_privs(current) ? 1 : 0; 2258 case PR_GET_THP_DISABLE: 2259 if (arg2 || arg3 || arg4 || arg5) 2260 return -EINVAL; 2261 error = !!(me->mm->def_flags & VM_NOHUGEPAGE); 2262 break; 2263 case PR_SET_THP_DISABLE: 2264 if (arg3 || arg4 || arg5) 2265 return -EINVAL; 2266 if (down_write_killable(&me->mm->mmap_sem)) 2267 return -EINTR; 2268 if (arg2) 2269 me->mm->def_flags |= VM_NOHUGEPAGE; 2270 else 2271 me->mm->def_flags &= ~VM_NOHUGEPAGE; 2272 up_write(&me->mm->mmap_sem); 2273 break; 2274 case PR_MPX_ENABLE_MANAGEMENT: 2275 if (arg2 || arg3 || arg4 || arg5) 2276 return -EINVAL; 2277 error = MPX_ENABLE_MANAGEMENT(); 2278 break; 2279 case PR_MPX_DISABLE_MANAGEMENT: 2280 if (arg2 || arg3 || arg4 || arg5) 2281 return -EINVAL; 2282 error = MPX_DISABLE_MANAGEMENT(); 2283 break; 2284 case PR_SET_FP_MODE: 2285 error = SET_FP_MODE(me, arg2); 2286 break; 2287 case PR_GET_FP_MODE: 2288 error = GET_FP_MODE(me); 2289 break; 2290 default: 2291 error = -EINVAL; 2292 break; 2293 } 2294 return error; 2295 } 2296 2297 SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, 2298 struct getcpu_cache __user *, unused) 2299 { 2300 int err = 0; 2301 int cpu = raw_smp_processor_id(); 2302 2303 if (cpup) 2304 err |= put_user(cpu, cpup); 2305 if (nodep) 2306 err |= put_user(cpu_to_node(cpu), nodep); 2307 return err ? -EFAULT : 0; 2308 } 2309 2310 /** 2311 * do_sysinfo - fill in sysinfo struct 2312 * @info: pointer to buffer to fill 2313 */ 2314 static int do_sysinfo(struct sysinfo *info) 2315 { 2316 unsigned long mem_total, sav_total; 2317 unsigned int mem_unit, bitcount; 2318 struct timespec tp; 2319 2320 memset(info, 0, sizeof(struct sysinfo)); 2321 2322 get_monotonic_boottime(&tp); 2323 info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); 2324 2325 get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); 2326 2327 info->procs = nr_threads; 2328 2329 si_meminfo(info); 2330 si_swapinfo(info); 2331 2332 /* 2333 * If the sum of all the available memory (i.e. ram + swap) 2334 * is less than can be stored in a 32 bit unsigned long then 2335 * we can be binary compatible with 2.2.x kernels. If not, 2336 * well, in that case 2.2.x was broken anyways... 2337 * 2338 * -Erik Andersen <andersee@debian.org> 2339 */ 2340 2341 mem_total = info->totalram + info->totalswap; 2342 if (mem_total < info->totalram || mem_total < info->totalswap) 2343 goto out; 2344 bitcount = 0; 2345 mem_unit = info->mem_unit; 2346 while (mem_unit > 1) { 2347 bitcount++; 2348 mem_unit >>= 1; 2349 sav_total = mem_total; 2350 mem_total <<= 1; 2351 if (mem_total < sav_total) 2352 goto out; 2353 } 2354 2355 /* 2356 * If mem_total did not overflow, multiply all memory values by 2357 * info->mem_unit and set it to 1. This leaves things compatible 2358 * with 2.2.x, and also retains compatibility with earlier 2.4.x 2359 * kernels... 2360 */ 2361 2362 info->mem_unit = 1; 2363 info->totalram <<= bitcount; 2364 info->freeram <<= bitcount; 2365 info->sharedram <<= bitcount; 2366 info->bufferram <<= bitcount; 2367 info->totalswap <<= bitcount; 2368 info->freeswap <<= bitcount; 2369 info->totalhigh <<= bitcount; 2370 info->freehigh <<= bitcount; 2371 2372 out: 2373 return 0; 2374 } 2375 2376 SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) 2377 { 2378 struct sysinfo val; 2379 2380 do_sysinfo(&val); 2381 2382 if (copy_to_user(info, &val, sizeof(struct sysinfo))) 2383 return -EFAULT; 2384 2385 return 0; 2386 } 2387 2388 #ifdef CONFIG_COMPAT 2389 struct compat_sysinfo { 2390 s32 uptime; 2391 u32 loads[3]; 2392 u32 totalram; 2393 u32 freeram; 2394 u32 sharedram; 2395 u32 bufferram; 2396 u32 totalswap; 2397 u32 freeswap; 2398 u16 procs; 2399 u16 pad; 2400 u32 totalhigh; 2401 u32 freehigh; 2402 u32 mem_unit; 2403 char _f[20-2*sizeof(u32)-sizeof(int)]; 2404 }; 2405 2406 COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) 2407 { 2408 struct sysinfo s; 2409 2410 do_sysinfo(&s); 2411 2412 /* Check to see if any memory value is too large for 32-bit and scale 2413 * down if needed 2414 */ 2415 if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { 2416 int bitcount = 0; 2417 2418 while (s.mem_unit < PAGE_SIZE) { 2419 s.mem_unit <<= 1; 2420 bitcount++; 2421 } 2422 2423 s.totalram >>= bitcount; 2424 s.freeram >>= bitcount; 2425 s.sharedram >>= bitcount; 2426 s.bufferram >>= bitcount; 2427 s.totalswap >>= bitcount; 2428 s.freeswap >>= bitcount; 2429 s.totalhigh >>= bitcount; 2430 s.freehigh >>= bitcount; 2431 } 2432 2433 if (!access_ok(VERIFY_WRITE, info, sizeof(struct compat_sysinfo)) || 2434 __put_user(s.uptime, &info->uptime) || 2435 __put_user(s.loads[0], &info->loads[0]) || 2436 __put_user(s.loads[1], &info->loads[1]) || 2437 __put_user(s.loads[2], &info->loads[2]) || 2438 __put_user(s.totalram, &info->totalram) || 2439 __put_user(s.freeram, &info->freeram) || 2440 __put_user(s.sharedram, &info->sharedram) || 2441 __put_user(s.bufferram, &info->bufferram) || 2442 __put_user(s.totalswap, &info->totalswap) || 2443 __put_user(s.freeswap, &info->freeswap) || 2444 __put_user(s.procs, &info->procs) || 2445 __put_user(s.totalhigh, &info->totalhigh) || 2446 __put_user(s.freehigh, &info->freehigh) || 2447 __put_user(s.mem_unit, &info->mem_unit)) 2448 return -EFAULT; 2449 2450 return 0; 2451 } 2452 #endif /* CONFIG_COMPAT */ 2453