1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 #include <linux/prctl.h> 28 #include <linux/securebits.h> 29 30 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 31 { 32 NETLINK_CB(skb).eff_cap = current->cap_effective; 33 return 0; 34 } 35 36 int cap_netlink_recv(struct sk_buff *skb, int cap) 37 { 38 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 39 return -EPERM; 40 return 0; 41 } 42 43 EXPORT_SYMBOL(cap_netlink_recv); 44 45 /* 46 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 47 * function. That is, it has the reverse semantics: cap_capable() 48 * returns 0 when a task has a capability, but the kernel's capable() 49 * returns 1 for this case. 50 */ 51 int cap_capable (struct task_struct *tsk, int cap) 52 { 53 /* Derived from include/linux/sched.h:capable. */ 54 if (cap_raised(tsk->cap_effective, cap)) 55 return 0; 56 return -EPERM; 57 } 58 59 int cap_settime(struct timespec *ts, struct timezone *tz) 60 { 61 if (!capable(CAP_SYS_TIME)) 62 return -EPERM; 63 return 0; 64 } 65 66 int cap_ptrace (struct task_struct *parent, struct task_struct *child, 67 unsigned int mode) 68 { 69 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 70 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 71 !__capable(parent, CAP_SYS_PTRACE)) 72 return -EPERM; 73 return 0; 74 } 75 76 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 77 kernel_cap_t *inheritable, kernel_cap_t *permitted) 78 { 79 /* Derived from kernel/capability.c:sys_capget. */ 80 *effective = target->cap_effective; 81 *inheritable = target->cap_inheritable; 82 *permitted = target->cap_permitted; 83 return 0; 84 } 85 86 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 87 88 static inline int cap_block_setpcap(struct task_struct *target) 89 { 90 /* 91 * No support for remote process capability manipulation with 92 * filesystem capability support. 93 */ 94 return (target != current); 95 } 96 97 static inline int cap_inh_is_capped(void) 98 { 99 /* 100 * Return 1 if changes to the inheritable set are limited 101 * to the old permitted set. That is, if the current task 102 * does *not* possess the CAP_SETPCAP capability. 103 */ 104 return (cap_capable(current, CAP_SETPCAP) != 0); 105 } 106 107 static inline int cap_limit_ptraced_target(void) { return 1; } 108 109 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 110 111 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 112 static inline int cap_inh_is_capped(void) { return 1; } 113 static inline int cap_limit_ptraced_target(void) 114 { 115 return !capable(CAP_SETPCAP); 116 } 117 118 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 119 120 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 121 kernel_cap_t *inheritable, kernel_cap_t *permitted) 122 { 123 if (cap_block_setpcap(target)) { 124 return -EPERM; 125 } 126 if (cap_inh_is_capped() 127 && !cap_issubset(*inheritable, 128 cap_combine(target->cap_inheritable, 129 current->cap_permitted))) { 130 /* incapable of using this inheritable set */ 131 return -EPERM; 132 } 133 if (!cap_issubset(*inheritable, 134 cap_combine(target->cap_inheritable, 135 current->cap_bset))) { 136 /* no new pI capabilities outside bounding set */ 137 return -EPERM; 138 } 139 140 /* verify restrictions on target's new Permitted set */ 141 if (!cap_issubset (*permitted, 142 cap_combine (target->cap_permitted, 143 current->cap_permitted))) { 144 return -EPERM; 145 } 146 147 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 148 if (!cap_issubset (*effective, *permitted)) { 149 return -EPERM; 150 } 151 152 return 0; 153 } 154 155 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 156 kernel_cap_t *inheritable, kernel_cap_t *permitted) 157 { 158 target->cap_effective = *effective; 159 target->cap_inheritable = *inheritable; 160 target->cap_permitted = *permitted; 161 } 162 163 static inline void bprm_clear_caps(struct linux_binprm *bprm) 164 { 165 cap_clear(bprm->cap_post_exec_permitted); 166 bprm->cap_effective = false; 167 } 168 169 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 170 171 int cap_inode_need_killpriv(struct dentry *dentry) 172 { 173 struct inode *inode = dentry->d_inode; 174 int error; 175 176 if (!inode->i_op || !inode->i_op->getxattr) 177 return 0; 178 179 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 180 if (error <= 0) 181 return 0; 182 return 1; 183 } 184 185 int cap_inode_killpriv(struct dentry *dentry) 186 { 187 struct inode *inode = dentry->d_inode; 188 189 if (!inode->i_op || !inode->i_op->removexattr) 190 return 0; 191 192 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 193 } 194 195 static inline int cap_from_disk(struct vfs_cap_data *caps, 196 struct linux_binprm *bprm, unsigned size) 197 { 198 __u32 magic_etc; 199 unsigned tocopy, i; 200 int ret; 201 202 if (size < sizeof(magic_etc)) 203 return -EINVAL; 204 205 magic_etc = le32_to_cpu(caps->magic_etc); 206 207 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 208 case VFS_CAP_REVISION_1: 209 if (size != XATTR_CAPS_SZ_1) 210 return -EINVAL; 211 tocopy = VFS_CAP_U32_1; 212 break; 213 case VFS_CAP_REVISION_2: 214 if (size != XATTR_CAPS_SZ_2) 215 return -EINVAL; 216 tocopy = VFS_CAP_U32_2; 217 break; 218 default: 219 return -EINVAL; 220 } 221 222 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 223 bprm->cap_effective = true; 224 } else { 225 bprm->cap_effective = false; 226 } 227 228 ret = 0; 229 230 CAP_FOR_EACH_U32(i) { 231 __u32 value_cpu; 232 233 if (i >= tocopy) { 234 /* 235 * Legacy capability sets have no upper bits 236 */ 237 bprm->cap_post_exec_permitted.cap[i] = 0; 238 continue; 239 } 240 /* 241 * pP' = (X & fP) | (pI & fI) 242 */ 243 value_cpu = le32_to_cpu(caps->data[i].permitted); 244 bprm->cap_post_exec_permitted.cap[i] = 245 (current->cap_bset.cap[i] & value_cpu) | 246 (current->cap_inheritable.cap[i] & 247 le32_to_cpu(caps->data[i].inheritable)); 248 if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) { 249 /* 250 * insufficient to execute correctly 251 */ 252 ret = -EPERM; 253 } 254 } 255 256 /* 257 * For legacy apps, with no internal support for recognizing they 258 * do not have enough capabilities, we return an error if they are 259 * missing some "forced" (aka file-permitted) capabilities. 260 */ 261 return bprm->cap_effective ? ret : 0; 262 } 263 264 /* Locate any VFS capabilities: */ 265 static int get_file_caps(struct linux_binprm *bprm) 266 { 267 struct dentry *dentry; 268 int rc = 0; 269 struct vfs_cap_data vcaps; 270 struct inode *inode; 271 272 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 273 bprm_clear_caps(bprm); 274 return 0; 275 } 276 277 dentry = dget(bprm->file->f_dentry); 278 inode = dentry->d_inode; 279 if (!inode->i_op || !inode->i_op->getxattr) 280 goto out; 281 282 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 283 XATTR_CAPS_SZ); 284 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 285 /* no data, that's ok */ 286 rc = 0; 287 goto out; 288 } 289 if (rc < 0) 290 goto out; 291 292 rc = cap_from_disk(&vcaps, bprm, rc); 293 if (rc == -EINVAL) 294 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 295 __func__, rc, bprm->filename); 296 297 out: 298 dput(dentry); 299 if (rc) 300 bprm_clear_caps(bprm); 301 302 return rc; 303 } 304 305 #else 306 int cap_inode_need_killpriv(struct dentry *dentry) 307 { 308 return 0; 309 } 310 311 int cap_inode_killpriv(struct dentry *dentry) 312 { 313 return 0; 314 } 315 316 static inline int get_file_caps(struct linux_binprm *bprm) 317 { 318 bprm_clear_caps(bprm); 319 return 0; 320 } 321 #endif 322 323 int cap_bprm_set_security (struct linux_binprm *bprm) 324 { 325 int ret; 326 327 ret = get_file_caps(bprm); 328 329 if (!issecure(SECURE_NOROOT)) { 330 /* 331 * To support inheritance of root-permissions and suid-root 332 * executables under compatibility mode, we override the 333 * capability sets for the file. 334 * 335 * If only the real uid is 0, we do not set the effective 336 * bit. 337 */ 338 if (bprm->e_uid == 0 || current->uid == 0) { 339 /* pP' = (cap_bset & ~0) | (pI & ~0) */ 340 bprm->cap_post_exec_permitted = cap_combine( 341 current->cap_bset, current->cap_inheritable 342 ); 343 bprm->cap_effective = (bprm->e_uid == 0); 344 ret = 0; 345 } 346 } 347 348 return ret; 349 } 350 351 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 352 { 353 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 354 !cap_issubset(bprm->cap_post_exec_permitted, 355 current->cap_permitted)) { 356 set_dumpable(current->mm, suid_dumpable); 357 current->pdeath_signal = 0; 358 359 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 360 if (!capable(CAP_SETUID)) { 361 bprm->e_uid = current->uid; 362 bprm->e_gid = current->gid; 363 } 364 if (cap_limit_ptraced_target()) { 365 bprm->cap_post_exec_permitted = cap_intersect( 366 bprm->cap_post_exec_permitted, 367 current->cap_permitted); 368 } 369 } 370 } 371 372 current->suid = current->euid = current->fsuid = bprm->e_uid; 373 current->sgid = current->egid = current->fsgid = bprm->e_gid; 374 375 /* For init, we want to retain the capabilities set 376 * in the init_task struct. Thus we skip the usual 377 * capability rules */ 378 if (!is_global_init(current)) { 379 current->cap_permitted = bprm->cap_post_exec_permitted; 380 if (bprm->cap_effective) 381 current->cap_effective = bprm->cap_post_exec_permitted; 382 else 383 cap_clear(current->cap_effective); 384 } 385 386 /* AUD: Audit candidate if current->cap_effective is set */ 387 388 current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); 389 } 390 391 int cap_bprm_secureexec (struct linux_binprm *bprm) 392 { 393 if (current->uid != 0) { 394 if (bprm->cap_effective) 395 return 1; 396 if (!cap_isclear(bprm->cap_post_exec_permitted)) 397 return 1; 398 } 399 400 return (current->euid != current->uid || 401 current->egid != current->gid); 402 } 403 404 int cap_inode_setxattr(struct dentry *dentry, const char *name, 405 const void *value, size_t size, int flags) 406 { 407 if (!strcmp(name, XATTR_NAME_CAPS)) { 408 if (!capable(CAP_SETFCAP)) 409 return -EPERM; 410 return 0; 411 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 412 sizeof(XATTR_SECURITY_PREFIX) - 1) && 413 !capable(CAP_SYS_ADMIN)) 414 return -EPERM; 415 return 0; 416 } 417 418 int cap_inode_removexattr(struct dentry *dentry, const char *name) 419 { 420 if (!strcmp(name, XATTR_NAME_CAPS)) { 421 if (!capable(CAP_SETFCAP)) 422 return -EPERM; 423 return 0; 424 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 425 sizeof(XATTR_SECURITY_PREFIX) - 1) && 426 !capable(CAP_SYS_ADMIN)) 427 return -EPERM; 428 return 0; 429 } 430 431 /* moved from kernel/sys.c. */ 432 /* 433 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 434 * a process after a call to setuid, setreuid, or setresuid. 435 * 436 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 437 * {r,e,s}uid != 0, the permitted and effective capabilities are 438 * cleared. 439 * 440 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 441 * capabilities of the process are cleared. 442 * 443 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 444 * capabilities are set to the permitted capabilities. 445 * 446 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 447 * never happen. 448 * 449 * -astor 450 * 451 * cevans - New behaviour, Oct '99 452 * A process may, via prctl(), elect to keep its capabilities when it 453 * calls setuid() and switches away from uid==0. Both permitted and 454 * effective sets will be retained. 455 * Without this change, it was impossible for a daemon to drop only some 456 * of its privilege. The call to setuid(!=0) would drop all privileges! 457 * Keeping uid 0 is not an option because uid 0 owns too many vital 458 * files.. 459 * Thanks to Olaf Kirch and Peter Benie for spotting this. 460 */ 461 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 462 int old_suid) 463 { 464 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 465 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 466 !issecure(SECURE_KEEP_CAPS)) { 467 cap_clear (current->cap_permitted); 468 cap_clear (current->cap_effective); 469 } 470 if (old_euid == 0 && current->euid != 0) { 471 cap_clear (current->cap_effective); 472 } 473 if (old_euid != 0 && current->euid == 0) { 474 current->cap_effective = current->cap_permitted; 475 } 476 } 477 478 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 479 int flags) 480 { 481 switch (flags) { 482 case LSM_SETID_RE: 483 case LSM_SETID_ID: 484 case LSM_SETID_RES: 485 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 486 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 487 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 488 } 489 break; 490 case LSM_SETID_FS: 491 { 492 uid_t old_fsuid = old_ruid; 493 494 /* Copied from kernel/sys.c:setfsuid. */ 495 496 /* 497 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 498 * if not, we might be a bit too harsh here. 499 */ 500 501 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 502 if (old_fsuid == 0 && current->fsuid != 0) { 503 current->cap_effective = 504 cap_drop_fs_set( 505 current->cap_effective); 506 } 507 if (old_fsuid != 0 && current->fsuid == 0) { 508 current->cap_effective = 509 cap_raise_fs_set( 510 current->cap_effective, 511 current->cap_permitted); 512 } 513 } 514 break; 515 } 516 default: 517 return -EINVAL; 518 } 519 520 return 0; 521 } 522 523 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 524 /* 525 * Rationale: code calling task_setscheduler, task_setioprio, and 526 * task_setnice, assumes that 527 * . if capable(cap_sys_nice), then those actions should be allowed 528 * . if not capable(cap_sys_nice), but acting on your own processes, 529 * then those actions should be allowed 530 * This is insufficient now since you can call code without suid, but 531 * yet with increased caps. 532 * So we check for increased caps on the target process. 533 */ 534 static inline int cap_safe_nice(struct task_struct *p) 535 { 536 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 537 !__capable(current, CAP_SYS_NICE)) 538 return -EPERM; 539 return 0; 540 } 541 542 int cap_task_setscheduler (struct task_struct *p, int policy, 543 struct sched_param *lp) 544 { 545 return cap_safe_nice(p); 546 } 547 548 int cap_task_setioprio (struct task_struct *p, int ioprio) 549 { 550 return cap_safe_nice(p); 551 } 552 553 int cap_task_setnice (struct task_struct *p, int nice) 554 { 555 return cap_safe_nice(p); 556 } 557 558 /* 559 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 560 * done without task_capability_lock() because it introduces 561 * no new races - i.e. only another task doing capget() on 562 * this task could get inconsistent info. There can be no 563 * racing writer bc a task can only change its own caps. 564 */ 565 static long cap_prctl_drop(unsigned long cap) 566 { 567 if (!capable(CAP_SETPCAP)) 568 return -EPERM; 569 if (!cap_valid(cap)) 570 return -EINVAL; 571 cap_lower(current->cap_bset, cap); 572 return 0; 573 } 574 575 #else 576 int cap_task_setscheduler (struct task_struct *p, int policy, 577 struct sched_param *lp) 578 { 579 return 0; 580 } 581 int cap_task_setioprio (struct task_struct *p, int ioprio) 582 { 583 return 0; 584 } 585 int cap_task_setnice (struct task_struct *p, int nice) 586 { 587 return 0; 588 } 589 #endif 590 591 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 592 unsigned long arg4, unsigned long arg5, long *rc_p) 593 { 594 long error = 0; 595 596 switch (option) { 597 case PR_CAPBSET_READ: 598 if (!cap_valid(arg2)) 599 error = -EINVAL; 600 else 601 error = !!cap_raised(current->cap_bset, arg2); 602 break; 603 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 604 case PR_CAPBSET_DROP: 605 error = cap_prctl_drop(arg2); 606 break; 607 608 /* 609 * The next four prctl's remain to assist with transitioning a 610 * system from legacy UID=0 based privilege (when filesystem 611 * capabilities are not in use) to a system using filesystem 612 * capabilities only - as the POSIX.1e draft intended. 613 * 614 * Note: 615 * 616 * PR_SET_SECUREBITS = 617 * issecure_mask(SECURE_KEEP_CAPS_LOCKED) 618 * | issecure_mask(SECURE_NOROOT) 619 * | issecure_mask(SECURE_NOROOT_LOCKED) 620 * | issecure_mask(SECURE_NO_SETUID_FIXUP) 621 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) 622 * 623 * will ensure that the current process and all of its 624 * children will be locked into a pure 625 * capability-based-privilege environment. 626 */ 627 case PR_SET_SECUREBITS: 628 if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) 629 & (current->securebits ^ arg2)) /*[1]*/ 630 || ((current->securebits & SECURE_ALL_LOCKS 631 & ~arg2)) /*[2]*/ 632 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 633 || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ 634 /* 635 * [1] no changing of bits that are locked 636 * [2] no unlocking of locks 637 * [3] no setting of unsupported bits 638 * [4] doing anything requires privilege (go read about 639 * the "sendmail capabilities bug") 640 */ 641 error = -EPERM; /* cannot change a locked bit */ 642 } else { 643 current->securebits = arg2; 644 } 645 break; 646 case PR_GET_SECUREBITS: 647 error = current->securebits; 648 break; 649 650 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 651 652 case PR_GET_KEEPCAPS: 653 if (issecure(SECURE_KEEP_CAPS)) 654 error = 1; 655 break; 656 case PR_SET_KEEPCAPS: 657 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ 658 error = -EINVAL; 659 else if (issecure(SECURE_KEEP_CAPS_LOCKED)) 660 error = -EPERM; 661 else if (arg2) 662 current->securebits |= issecure_mask(SECURE_KEEP_CAPS); 663 else 664 current->securebits &= 665 ~issecure_mask(SECURE_KEEP_CAPS); 666 break; 667 668 default: 669 /* No functionality available - continue with default */ 670 return 0; 671 } 672 673 /* Functionality provided */ 674 *rc_p = error; 675 return 1; 676 } 677 678 void cap_task_reparent_to_init (struct task_struct *p) 679 { 680 cap_set_init_eff(p->cap_effective); 681 cap_clear(p->cap_inheritable); 682 cap_set_full(p->cap_permitted); 683 p->securebits = SECUREBITS_DEFAULT; 684 return; 685 } 686 687 int cap_syslog (int type) 688 { 689 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 690 return -EPERM; 691 return 0; 692 } 693 694 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 695 { 696 int cap_sys_admin = 0; 697 698 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 699 cap_sys_admin = 1; 700 return __vm_enough_memory(mm, pages, cap_sys_admin); 701 } 702 703