1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 #include <linux/prctl.h> 28 #include <linux/securebits.h> 29 30 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 31 { 32 NETLINK_CB(skb).eff_cap = current->cap_effective; 33 return 0; 34 } 35 36 int cap_netlink_recv(struct sk_buff *skb, int cap) 37 { 38 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 39 return -EPERM; 40 return 0; 41 } 42 43 EXPORT_SYMBOL(cap_netlink_recv); 44 45 /* 46 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 47 * function. That is, it has the reverse semantics: cap_capable() 48 * returns 0 when a task has a capability, but the kernel's capable() 49 * returns 1 for this case. 50 */ 51 int cap_capable (struct task_struct *tsk, int cap) 52 { 53 /* Derived from include/linux/sched.h:capable. */ 54 if (cap_raised(tsk->cap_effective, cap)) 55 return 0; 56 return -EPERM; 57 } 58 59 int cap_settime(struct timespec *ts, struct timezone *tz) 60 { 61 if (!capable(CAP_SYS_TIME)) 62 return -EPERM; 63 return 0; 64 } 65 66 int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) 67 { 68 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 69 if (cap_issubset(child->cap_permitted, current->cap_permitted)) 70 return 0; 71 if (capable(CAP_SYS_PTRACE)) 72 return 0; 73 return -EPERM; 74 } 75 76 int cap_ptrace_traceme(struct task_struct *parent) 77 { 78 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 79 if (cap_issubset(current->cap_permitted, parent->cap_permitted)) 80 return 0; 81 if (has_capability(parent, CAP_SYS_PTRACE)) 82 return 0; 83 return -EPERM; 84 } 85 86 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 87 kernel_cap_t *inheritable, kernel_cap_t *permitted) 88 { 89 /* Derived from kernel/capability.c:sys_capget. */ 90 *effective = target->cap_effective; 91 *inheritable = target->cap_inheritable; 92 *permitted = target->cap_permitted; 93 return 0; 94 } 95 96 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 97 98 static inline int cap_block_setpcap(struct task_struct *target) 99 { 100 /* 101 * No support for remote process capability manipulation with 102 * filesystem capability support. 103 */ 104 return (target != current); 105 } 106 107 static inline int cap_inh_is_capped(void) 108 { 109 /* 110 * Return 1 if changes to the inheritable set are limited 111 * to the old permitted set. That is, if the current task 112 * does *not* possess the CAP_SETPCAP capability. 113 */ 114 return (cap_capable(current, CAP_SETPCAP) != 0); 115 } 116 117 static inline int cap_limit_ptraced_target(void) { return 1; } 118 119 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 120 121 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 122 static inline int cap_inh_is_capped(void) { return 1; } 123 static inline int cap_limit_ptraced_target(void) 124 { 125 return !capable(CAP_SETPCAP); 126 } 127 128 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 129 130 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 131 kernel_cap_t *inheritable, kernel_cap_t *permitted) 132 { 133 if (cap_block_setpcap(target)) { 134 return -EPERM; 135 } 136 if (cap_inh_is_capped() 137 && !cap_issubset(*inheritable, 138 cap_combine(target->cap_inheritable, 139 current->cap_permitted))) { 140 /* incapable of using this inheritable set */ 141 return -EPERM; 142 } 143 if (!cap_issubset(*inheritable, 144 cap_combine(target->cap_inheritable, 145 current->cap_bset))) { 146 /* no new pI capabilities outside bounding set */ 147 return -EPERM; 148 } 149 150 /* verify restrictions on target's new Permitted set */ 151 if (!cap_issubset (*permitted, 152 cap_combine (target->cap_permitted, 153 current->cap_permitted))) { 154 return -EPERM; 155 } 156 157 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 158 if (!cap_issubset (*effective, *permitted)) { 159 return -EPERM; 160 } 161 162 return 0; 163 } 164 165 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 166 kernel_cap_t *inheritable, kernel_cap_t *permitted) 167 { 168 target->cap_effective = *effective; 169 target->cap_inheritable = *inheritable; 170 target->cap_permitted = *permitted; 171 } 172 173 static inline void bprm_clear_caps(struct linux_binprm *bprm) 174 { 175 cap_clear(bprm->cap_post_exec_permitted); 176 bprm->cap_effective = false; 177 } 178 179 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 180 181 int cap_inode_need_killpriv(struct dentry *dentry) 182 { 183 struct inode *inode = dentry->d_inode; 184 int error; 185 186 if (!inode->i_op || !inode->i_op->getxattr) 187 return 0; 188 189 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 190 if (error <= 0) 191 return 0; 192 return 1; 193 } 194 195 int cap_inode_killpriv(struct dentry *dentry) 196 { 197 struct inode *inode = dentry->d_inode; 198 199 if (!inode->i_op || !inode->i_op->removexattr) 200 return 0; 201 202 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 203 } 204 205 static inline int cap_from_disk(struct vfs_cap_data *caps, 206 struct linux_binprm *bprm, unsigned size) 207 { 208 __u32 magic_etc; 209 unsigned tocopy, i; 210 int ret; 211 212 if (size < sizeof(magic_etc)) 213 return -EINVAL; 214 215 magic_etc = le32_to_cpu(caps->magic_etc); 216 217 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 218 case VFS_CAP_REVISION_1: 219 if (size != XATTR_CAPS_SZ_1) 220 return -EINVAL; 221 tocopy = VFS_CAP_U32_1; 222 break; 223 case VFS_CAP_REVISION_2: 224 if (size != XATTR_CAPS_SZ_2) 225 return -EINVAL; 226 tocopy = VFS_CAP_U32_2; 227 break; 228 default: 229 return -EINVAL; 230 } 231 232 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 233 bprm->cap_effective = true; 234 } else { 235 bprm->cap_effective = false; 236 } 237 238 ret = 0; 239 240 CAP_FOR_EACH_U32(i) { 241 __u32 value_cpu; 242 243 if (i >= tocopy) { 244 /* 245 * Legacy capability sets have no upper bits 246 */ 247 bprm->cap_post_exec_permitted.cap[i] = 0; 248 continue; 249 } 250 /* 251 * pP' = (X & fP) | (pI & fI) 252 */ 253 value_cpu = le32_to_cpu(caps->data[i].permitted); 254 bprm->cap_post_exec_permitted.cap[i] = 255 (current->cap_bset.cap[i] & value_cpu) | 256 (current->cap_inheritable.cap[i] & 257 le32_to_cpu(caps->data[i].inheritable)); 258 if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) { 259 /* 260 * insufficient to execute correctly 261 */ 262 ret = -EPERM; 263 } 264 } 265 266 /* 267 * For legacy apps, with no internal support for recognizing they 268 * do not have enough capabilities, we return an error if they are 269 * missing some "forced" (aka file-permitted) capabilities. 270 */ 271 return bprm->cap_effective ? ret : 0; 272 } 273 274 /* Locate any VFS capabilities: */ 275 static int get_file_caps(struct linux_binprm *bprm) 276 { 277 struct dentry *dentry; 278 int rc = 0; 279 struct vfs_cap_data vcaps; 280 struct inode *inode; 281 282 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 283 bprm_clear_caps(bprm); 284 return 0; 285 } 286 287 dentry = dget(bprm->file->f_dentry); 288 inode = dentry->d_inode; 289 if (!inode->i_op || !inode->i_op->getxattr) 290 goto out; 291 292 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 293 XATTR_CAPS_SZ); 294 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 295 /* no data, that's ok */ 296 rc = 0; 297 goto out; 298 } 299 if (rc < 0) 300 goto out; 301 302 rc = cap_from_disk(&vcaps, bprm, rc); 303 if (rc == -EINVAL) 304 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 305 __func__, rc, bprm->filename); 306 307 out: 308 dput(dentry); 309 if (rc) 310 bprm_clear_caps(bprm); 311 312 return rc; 313 } 314 315 #else 316 int cap_inode_need_killpriv(struct dentry *dentry) 317 { 318 return 0; 319 } 320 321 int cap_inode_killpriv(struct dentry *dentry) 322 { 323 return 0; 324 } 325 326 static inline int get_file_caps(struct linux_binprm *bprm) 327 { 328 bprm_clear_caps(bprm); 329 return 0; 330 } 331 #endif 332 333 int cap_bprm_set_security (struct linux_binprm *bprm) 334 { 335 int ret; 336 337 ret = get_file_caps(bprm); 338 339 if (!issecure(SECURE_NOROOT)) { 340 /* 341 * To support inheritance of root-permissions and suid-root 342 * executables under compatibility mode, we override the 343 * capability sets for the file. 344 * 345 * If only the real uid is 0, we do not set the effective 346 * bit. 347 */ 348 if (bprm->e_uid == 0 || current->uid == 0) { 349 /* pP' = (cap_bset & ~0) | (pI & ~0) */ 350 bprm->cap_post_exec_permitted = cap_combine( 351 current->cap_bset, current->cap_inheritable 352 ); 353 bprm->cap_effective = (bprm->e_uid == 0); 354 ret = 0; 355 } 356 } 357 358 return ret; 359 } 360 361 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 362 { 363 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 364 !cap_issubset(bprm->cap_post_exec_permitted, 365 current->cap_permitted)) { 366 set_dumpable(current->mm, suid_dumpable); 367 current->pdeath_signal = 0; 368 369 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 370 if (!capable(CAP_SETUID)) { 371 bprm->e_uid = current->uid; 372 bprm->e_gid = current->gid; 373 } 374 if (cap_limit_ptraced_target()) { 375 bprm->cap_post_exec_permitted = cap_intersect( 376 bprm->cap_post_exec_permitted, 377 current->cap_permitted); 378 } 379 } 380 } 381 382 current->suid = current->euid = current->fsuid = bprm->e_uid; 383 current->sgid = current->egid = current->fsgid = bprm->e_gid; 384 385 /* For init, we want to retain the capabilities set 386 * in the init_task struct. Thus we skip the usual 387 * capability rules */ 388 if (!is_global_init(current)) { 389 current->cap_permitted = bprm->cap_post_exec_permitted; 390 if (bprm->cap_effective) 391 current->cap_effective = bprm->cap_post_exec_permitted; 392 else 393 cap_clear(current->cap_effective); 394 } 395 396 /* AUD: Audit candidate if current->cap_effective is set */ 397 398 current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); 399 } 400 401 int cap_bprm_secureexec (struct linux_binprm *bprm) 402 { 403 if (current->uid != 0) { 404 if (bprm->cap_effective) 405 return 1; 406 if (!cap_isclear(bprm->cap_post_exec_permitted)) 407 return 1; 408 } 409 410 return (current->euid != current->uid || 411 current->egid != current->gid); 412 } 413 414 int cap_inode_setxattr(struct dentry *dentry, const char *name, 415 const void *value, size_t size, int flags) 416 { 417 if (!strcmp(name, XATTR_NAME_CAPS)) { 418 if (!capable(CAP_SETFCAP)) 419 return -EPERM; 420 return 0; 421 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 422 sizeof(XATTR_SECURITY_PREFIX) - 1) && 423 !capable(CAP_SYS_ADMIN)) 424 return -EPERM; 425 return 0; 426 } 427 428 int cap_inode_removexattr(struct dentry *dentry, const char *name) 429 { 430 if (!strcmp(name, XATTR_NAME_CAPS)) { 431 if (!capable(CAP_SETFCAP)) 432 return -EPERM; 433 return 0; 434 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 435 sizeof(XATTR_SECURITY_PREFIX) - 1) && 436 !capable(CAP_SYS_ADMIN)) 437 return -EPERM; 438 return 0; 439 } 440 441 /* moved from kernel/sys.c. */ 442 /* 443 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 444 * a process after a call to setuid, setreuid, or setresuid. 445 * 446 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 447 * {r,e,s}uid != 0, the permitted and effective capabilities are 448 * cleared. 449 * 450 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 451 * capabilities of the process are cleared. 452 * 453 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 454 * capabilities are set to the permitted capabilities. 455 * 456 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 457 * never happen. 458 * 459 * -astor 460 * 461 * cevans - New behaviour, Oct '99 462 * A process may, via prctl(), elect to keep its capabilities when it 463 * calls setuid() and switches away from uid==0. Both permitted and 464 * effective sets will be retained. 465 * Without this change, it was impossible for a daemon to drop only some 466 * of its privilege. The call to setuid(!=0) would drop all privileges! 467 * Keeping uid 0 is not an option because uid 0 owns too many vital 468 * files.. 469 * Thanks to Olaf Kirch and Peter Benie for spotting this. 470 */ 471 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 472 int old_suid) 473 { 474 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 475 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 476 !issecure(SECURE_KEEP_CAPS)) { 477 cap_clear (current->cap_permitted); 478 cap_clear (current->cap_effective); 479 } 480 if (old_euid == 0 && current->euid != 0) { 481 cap_clear (current->cap_effective); 482 } 483 if (old_euid != 0 && current->euid == 0) { 484 current->cap_effective = current->cap_permitted; 485 } 486 } 487 488 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 489 int flags) 490 { 491 switch (flags) { 492 case LSM_SETID_RE: 493 case LSM_SETID_ID: 494 case LSM_SETID_RES: 495 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 496 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 497 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 498 } 499 break; 500 case LSM_SETID_FS: 501 { 502 uid_t old_fsuid = old_ruid; 503 504 /* Copied from kernel/sys.c:setfsuid. */ 505 506 /* 507 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 508 * if not, we might be a bit too harsh here. 509 */ 510 511 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 512 if (old_fsuid == 0 && current->fsuid != 0) { 513 current->cap_effective = 514 cap_drop_fs_set( 515 current->cap_effective); 516 } 517 if (old_fsuid != 0 && current->fsuid == 0) { 518 current->cap_effective = 519 cap_raise_fs_set( 520 current->cap_effective, 521 current->cap_permitted); 522 } 523 } 524 break; 525 } 526 default: 527 return -EINVAL; 528 } 529 530 return 0; 531 } 532 533 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 534 /* 535 * Rationale: code calling task_setscheduler, task_setioprio, and 536 * task_setnice, assumes that 537 * . if capable(cap_sys_nice), then those actions should be allowed 538 * . if not capable(cap_sys_nice), but acting on your own processes, 539 * then those actions should be allowed 540 * This is insufficient now since you can call code without suid, but 541 * yet with increased caps. 542 * So we check for increased caps on the target process. 543 */ 544 static int cap_safe_nice(struct task_struct *p) 545 { 546 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 547 !capable(CAP_SYS_NICE)) 548 return -EPERM; 549 return 0; 550 } 551 552 int cap_task_setscheduler (struct task_struct *p, int policy, 553 struct sched_param *lp) 554 { 555 return cap_safe_nice(p); 556 } 557 558 int cap_task_setioprio (struct task_struct *p, int ioprio) 559 { 560 return cap_safe_nice(p); 561 } 562 563 int cap_task_setnice (struct task_struct *p, int nice) 564 { 565 return cap_safe_nice(p); 566 } 567 568 /* 569 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 570 * done without task_capability_lock() because it introduces 571 * no new races - i.e. only another task doing capget() on 572 * this task could get inconsistent info. There can be no 573 * racing writer bc a task can only change its own caps. 574 */ 575 static long cap_prctl_drop(unsigned long cap) 576 { 577 if (!capable(CAP_SETPCAP)) 578 return -EPERM; 579 if (!cap_valid(cap)) 580 return -EINVAL; 581 cap_lower(current->cap_bset, cap); 582 return 0; 583 } 584 585 #else 586 int cap_task_setscheduler (struct task_struct *p, int policy, 587 struct sched_param *lp) 588 { 589 return 0; 590 } 591 int cap_task_setioprio (struct task_struct *p, int ioprio) 592 { 593 return 0; 594 } 595 int cap_task_setnice (struct task_struct *p, int nice) 596 { 597 return 0; 598 } 599 #endif 600 601 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 602 unsigned long arg4, unsigned long arg5, long *rc_p) 603 { 604 long error = 0; 605 606 switch (option) { 607 case PR_CAPBSET_READ: 608 if (!cap_valid(arg2)) 609 error = -EINVAL; 610 else 611 error = !!cap_raised(current->cap_bset, arg2); 612 break; 613 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 614 case PR_CAPBSET_DROP: 615 error = cap_prctl_drop(arg2); 616 break; 617 618 /* 619 * The next four prctl's remain to assist with transitioning a 620 * system from legacy UID=0 based privilege (when filesystem 621 * capabilities are not in use) to a system using filesystem 622 * capabilities only - as the POSIX.1e draft intended. 623 * 624 * Note: 625 * 626 * PR_SET_SECUREBITS = 627 * issecure_mask(SECURE_KEEP_CAPS_LOCKED) 628 * | issecure_mask(SECURE_NOROOT) 629 * | issecure_mask(SECURE_NOROOT_LOCKED) 630 * | issecure_mask(SECURE_NO_SETUID_FIXUP) 631 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) 632 * 633 * will ensure that the current process and all of its 634 * children will be locked into a pure 635 * capability-based-privilege environment. 636 */ 637 case PR_SET_SECUREBITS: 638 if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) 639 & (current->securebits ^ arg2)) /*[1]*/ 640 || ((current->securebits & SECURE_ALL_LOCKS 641 & ~arg2)) /*[2]*/ 642 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 643 || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ 644 /* 645 * [1] no changing of bits that are locked 646 * [2] no unlocking of locks 647 * [3] no setting of unsupported bits 648 * [4] doing anything requires privilege (go read about 649 * the "sendmail capabilities bug") 650 */ 651 error = -EPERM; /* cannot change a locked bit */ 652 } else { 653 current->securebits = arg2; 654 } 655 break; 656 case PR_GET_SECUREBITS: 657 error = current->securebits; 658 break; 659 660 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 661 662 case PR_GET_KEEPCAPS: 663 if (issecure(SECURE_KEEP_CAPS)) 664 error = 1; 665 break; 666 case PR_SET_KEEPCAPS: 667 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ 668 error = -EINVAL; 669 else if (issecure(SECURE_KEEP_CAPS_LOCKED)) 670 error = -EPERM; 671 else if (arg2) 672 current->securebits |= issecure_mask(SECURE_KEEP_CAPS); 673 else 674 current->securebits &= 675 ~issecure_mask(SECURE_KEEP_CAPS); 676 break; 677 678 default: 679 /* No functionality available - continue with default */ 680 return 0; 681 } 682 683 /* Functionality provided */ 684 *rc_p = error; 685 return 1; 686 } 687 688 void cap_task_reparent_to_init (struct task_struct *p) 689 { 690 cap_set_init_eff(p->cap_effective); 691 cap_clear(p->cap_inheritable); 692 cap_set_full(p->cap_permitted); 693 p->securebits = SECUREBITS_DEFAULT; 694 return; 695 } 696 697 int cap_syslog (int type) 698 { 699 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 700 return -EPERM; 701 return 0; 702 } 703 704 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 705 { 706 int cap_sys_admin = 0; 707 708 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 709 cap_sys_admin = 1; 710 return __vm_enough_memory(mm, pages, cap_sys_admin); 711 } 712 713