1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 #include <linux/prctl.h> 28 #include <linux/securebits.h> 29 30 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 31 { 32 NETLINK_CB(skb).eff_cap = current->cap_effective; 33 return 0; 34 } 35 36 int cap_netlink_recv(struct sk_buff *skb, int cap) 37 { 38 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 39 return -EPERM; 40 return 0; 41 } 42 43 EXPORT_SYMBOL(cap_netlink_recv); 44 45 /* 46 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 47 * function. That is, it has the reverse semantics: cap_capable() 48 * returns 0 when a task has a capability, but the kernel's capable() 49 * returns 1 for this case. 50 */ 51 int cap_capable (struct task_struct *tsk, int cap) 52 { 53 /* Derived from include/linux/sched.h:capable. */ 54 if (cap_raised(tsk->cap_effective, cap)) 55 return 0; 56 return -EPERM; 57 } 58 59 int cap_settime(struct timespec *ts, struct timezone *tz) 60 { 61 if (!capable(CAP_SYS_TIME)) 62 return -EPERM; 63 return 0; 64 } 65 66 int cap_ptrace (struct task_struct *parent, struct task_struct *child) 67 { 68 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 69 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 70 !__capable(parent, CAP_SYS_PTRACE)) 71 return -EPERM; 72 return 0; 73 } 74 75 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 76 kernel_cap_t *inheritable, kernel_cap_t *permitted) 77 { 78 /* Derived from kernel/capability.c:sys_capget. */ 79 *effective = target->cap_effective; 80 *inheritable = target->cap_inheritable; 81 *permitted = target->cap_permitted; 82 return 0; 83 } 84 85 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 86 87 static inline int cap_block_setpcap(struct task_struct *target) 88 { 89 /* 90 * No support for remote process capability manipulation with 91 * filesystem capability support. 92 */ 93 return (target != current); 94 } 95 96 static inline int cap_inh_is_capped(void) 97 { 98 /* 99 * Return 1 if changes to the inheritable set are limited 100 * to the old permitted set. That is, if the current task 101 * does *not* possess the CAP_SETPCAP capability. 102 */ 103 return (cap_capable(current, CAP_SETPCAP) != 0); 104 } 105 106 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 107 108 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 109 static inline int cap_inh_is_capped(void) { return 1; } 110 111 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 112 113 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 114 kernel_cap_t *inheritable, kernel_cap_t *permitted) 115 { 116 if (cap_block_setpcap(target)) { 117 return -EPERM; 118 } 119 if (cap_inh_is_capped() 120 && !cap_issubset(*inheritable, 121 cap_combine(target->cap_inheritable, 122 current->cap_permitted))) { 123 /* incapable of using this inheritable set */ 124 return -EPERM; 125 } 126 if (!cap_issubset(*inheritable, 127 cap_combine(target->cap_inheritable, 128 current->cap_bset))) { 129 /* no new pI capabilities outside bounding set */ 130 return -EPERM; 131 } 132 133 /* verify restrictions on target's new Permitted set */ 134 if (!cap_issubset (*permitted, 135 cap_combine (target->cap_permitted, 136 current->cap_permitted))) { 137 return -EPERM; 138 } 139 140 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 141 if (!cap_issubset (*effective, *permitted)) { 142 return -EPERM; 143 } 144 145 return 0; 146 } 147 148 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 149 kernel_cap_t *inheritable, kernel_cap_t *permitted) 150 { 151 target->cap_effective = *effective; 152 target->cap_inheritable = *inheritable; 153 target->cap_permitted = *permitted; 154 } 155 156 static inline void bprm_clear_caps(struct linux_binprm *bprm) 157 { 158 cap_clear(bprm->cap_inheritable); 159 cap_clear(bprm->cap_permitted); 160 bprm->cap_effective = false; 161 } 162 163 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 164 165 int cap_inode_need_killpriv(struct dentry *dentry) 166 { 167 struct inode *inode = dentry->d_inode; 168 int error; 169 170 if (!inode->i_op || !inode->i_op->getxattr) 171 return 0; 172 173 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 174 if (error <= 0) 175 return 0; 176 return 1; 177 } 178 179 int cap_inode_killpriv(struct dentry *dentry) 180 { 181 struct inode *inode = dentry->d_inode; 182 183 if (!inode->i_op || !inode->i_op->removexattr) 184 return 0; 185 186 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 187 } 188 189 static inline int cap_from_disk(struct vfs_cap_data *caps, 190 struct linux_binprm *bprm, unsigned size) 191 { 192 __u32 magic_etc; 193 unsigned tocopy, i; 194 195 if (size < sizeof(magic_etc)) 196 return -EINVAL; 197 198 magic_etc = le32_to_cpu(caps->magic_etc); 199 200 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 201 case VFS_CAP_REVISION_1: 202 if (size != XATTR_CAPS_SZ_1) 203 return -EINVAL; 204 tocopy = VFS_CAP_U32_1; 205 break; 206 case VFS_CAP_REVISION_2: 207 if (size != XATTR_CAPS_SZ_2) 208 return -EINVAL; 209 tocopy = VFS_CAP_U32_2; 210 break; 211 default: 212 return -EINVAL; 213 } 214 215 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 216 bprm->cap_effective = true; 217 } else { 218 bprm->cap_effective = false; 219 } 220 221 for (i = 0; i < tocopy; ++i) { 222 bprm->cap_permitted.cap[i] = 223 le32_to_cpu(caps->data[i].permitted); 224 bprm->cap_inheritable.cap[i] = 225 le32_to_cpu(caps->data[i].inheritable); 226 } 227 while (i < VFS_CAP_U32) { 228 bprm->cap_permitted.cap[i] = 0; 229 bprm->cap_inheritable.cap[i] = 0; 230 i++; 231 } 232 233 return 0; 234 } 235 236 /* Locate any VFS capabilities: */ 237 static int get_file_caps(struct linux_binprm *bprm) 238 { 239 struct dentry *dentry; 240 int rc = 0; 241 struct vfs_cap_data vcaps; 242 struct inode *inode; 243 244 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 245 bprm_clear_caps(bprm); 246 return 0; 247 } 248 249 dentry = dget(bprm->file->f_dentry); 250 inode = dentry->d_inode; 251 if (!inode->i_op || !inode->i_op->getxattr) 252 goto out; 253 254 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 255 XATTR_CAPS_SZ); 256 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 257 /* no data, that's ok */ 258 rc = 0; 259 goto out; 260 } 261 if (rc < 0) 262 goto out; 263 264 rc = cap_from_disk(&vcaps, bprm, rc); 265 if (rc) 266 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 267 __func__, rc, bprm->filename); 268 269 out: 270 dput(dentry); 271 if (rc) 272 bprm_clear_caps(bprm); 273 274 return rc; 275 } 276 277 #else 278 int cap_inode_need_killpriv(struct dentry *dentry) 279 { 280 return 0; 281 } 282 283 int cap_inode_killpriv(struct dentry *dentry) 284 { 285 return 0; 286 } 287 288 static inline int get_file_caps(struct linux_binprm *bprm) 289 { 290 bprm_clear_caps(bprm); 291 return 0; 292 } 293 #endif 294 295 int cap_bprm_set_security (struct linux_binprm *bprm) 296 { 297 int ret; 298 299 ret = get_file_caps(bprm); 300 if (ret) 301 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n", 302 __func__, ret, bprm->filename); 303 304 /* To support inheritance of root-permissions and suid-root 305 * executables under compatibility mode, we raise all three 306 * capability sets for the file. 307 * 308 * If only the real uid is 0, we only raise the inheritable 309 * and permitted sets of the executable file. 310 */ 311 312 if (!issecure (SECURE_NOROOT)) { 313 if (bprm->e_uid == 0 || current->uid == 0) { 314 cap_set_full (bprm->cap_inheritable); 315 cap_set_full (bprm->cap_permitted); 316 } 317 if (bprm->e_uid == 0) 318 bprm->cap_effective = true; 319 } 320 321 return ret; 322 } 323 324 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 325 { 326 /* Derived from fs/exec.c:compute_creds. */ 327 kernel_cap_t new_permitted, working; 328 329 new_permitted = cap_intersect(bprm->cap_permitted, 330 current->cap_bset); 331 working = cap_intersect(bprm->cap_inheritable, 332 current->cap_inheritable); 333 new_permitted = cap_combine(new_permitted, working); 334 335 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 336 !cap_issubset (new_permitted, current->cap_permitted)) { 337 set_dumpable(current->mm, suid_dumpable); 338 current->pdeath_signal = 0; 339 340 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 341 if (!capable(CAP_SETUID)) { 342 bprm->e_uid = current->uid; 343 bprm->e_gid = current->gid; 344 } 345 if (!capable (CAP_SETPCAP)) { 346 new_permitted = cap_intersect (new_permitted, 347 current->cap_permitted); 348 } 349 } 350 } 351 352 current->suid = current->euid = current->fsuid = bprm->e_uid; 353 current->sgid = current->egid = current->fsgid = bprm->e_gid; 354 355 /* For init, we want to retain the capabilities set 356 * in the init_task struct. Thus we skip the usual 357 * capability rules */ 358 if (!is_global_init(current)) { 359 current->cap_permitted = new_permitted; 360 if (bprm->cap_effective) 361 current->cap_effective = new_permitted; 362 else 363 cap_clear(current->cap_effective); 364 } 365 366 /* AUD: Audit candidate if current->cap_effective is set */ 367 368 current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); 369 } 370 371 int cap_bprm_secureexec (struct linux_binprm *bprm) 372 { 373 if (current->uid != 0) { 374 if (bprm->cap_effective) 375 return 1; 376 if (!cap_isclear(bprm->cap_permitted)) 377 return 1; 378 if (!cap_isclear(bprm->cap_inheritable)) 379 return 1; 380 } 381 382 return (current->euid != current->uid || 383 current->egid != current->gid); 384 } 385 386 int cap_inode_setxattr(struct dentry *dentry, const char *name, 387 const void *value, size_t size, int flags) 388 { 389 if (!strcmp(name, XATTR_NAME_CAPS)) { 390 if (!capable(CAP_SETFCAP)) 391 return -EPERM; 392 return 0; 393 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 394 sizeof(XATTR_SECURITY_PREFIX) - 1) && 395 !capable(CAP_SYS_ADMIN)) 396 return -EPERM; 397 return 0; 398 } 399 400 int cap_inode_removexattr(struct dentry *dentry, const char *name) 401 { 402 if (!strcmp(name, XATTR_NAME_CAPS)) { 403 if (!capable(CAP_SETFCAP)) 404 return -EPERM; 405 return 0; 406 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 407 sizeof(XATTR_SECURITY_PREFIX) - 1) && 408 !capable(CAP_SYS_ADMIN)) 409 return -EPERM; 410 return 0; 411 } 412 413 /* moved from kernel/sys.c. */ 414 /* 415 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 416 * a process after a call to setuid, setreuid, or setresuid. 417 * 418 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 419 * {r,e,s}uid != 0, the permitted and effective capabilities are 420 * cleared. 421 * 422 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 423 * capabilities of the process are cleared. 424 * 425 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 426 * capabilities are set to the permitted capabilities. 427 * 428 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 429 * never happen. 430 * 431 * -astor 432 * 433 * cevans - New behaviour, Oct '99 434 * A process may, via prctl(), elect to keep its capabilities when it 435 * calls setuid() and switches away from uid==0. Both permitted and 436 * effective sets will be retained. 437 * Without this change, it was impossible for a daemon to drop only some 438 * of its privilege. The call to setuid(!=0) would drop all privileges! 439 * Keeping uid 0 is not an option because uid 0 owns too many vital 440 * files.. 441 * Thanks to Olaf Kirch and Peter Benie for spotting this. 442 */ 443 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 444 int old_suid) 445 { 446 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 447 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 448 !issecure(SECURE_KEEP_CAPS)) { 449 cap_clear (current->cap_permitted); 450 cap_clear (current->cap_effective); 451 } 452 if (old_euid == 0 && current->euid != 0) { 453 cap_clear (current->cap_effective); 454 } 455 if (old_euid != 0 && current->euid == 0) { 456 current->cap_effective = current->cap_permitted; 457 } 458 } 459 460 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 461 int flags) 462 { 463 switch (flags) { 464 case LSM_SETID_RE: 465 case LSM_SETID_ID: 466 case LSM_SETID_RES: 467 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 468 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 469 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 470 } 471 break; 472 case LSM_SETID_FS: 473 { 474 uid_t old_fsuid = old_ruid; 475 476 /* Copied from kernel/sys.c:setfsuid. */ 477 478 /* 479 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 480 * if not, we might be a bit too harsh here. 481 */ 482 483 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 484 if (old_fsuid == 0 && current->fsuid != 0) { 485 current->cap_effective = 486 cap_drop_fs_set( 487 current->cap_effective); 488 } 489 if (old_fsuid != 0 && current->fsuid == 0) { 490 current->cap_effective = 491 cap_raise_fs_set( 492 current->cap_effective, 493 current->cap_permitted); 494 } 495 } 496 break; 497 } 498 default: 499 return -EINVAL; 500 } 501 502 return 0; 503 } 504 505 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 506 /* 507 * Rationale: code calling task_setscheduler, task_setioprio, and 508 * task_setnice, assumes that 509 * . if capable(cap_sys_nice), then those actions should be allowed 510 * . if not capable(cap_sys_nice), but acting on your own processes, 511 * then those actions should be allowed 512 * This is insufficient now since you can call code without suid, but 513 * yet with increased caps. 514 * So we check for increased caps on the target process. 515 */ 516 static inline int cap_safe_nice(struct task_struct *p) 517 { 518 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 519 !__capable(current, CAP_SYS_NICE)) 520 return -EPERM; 521 return 0; 522 } 523 524 int cap_task_setscheduler (struct task_struct *p, int policy, 525 struct sched_param *lp) 526 { 527 return cap_safe_nice(p); 528 } 529 530 int cap_task_setioprio (struct task_struct *p, int ioprio) 531 { 532 return cap_safe_nice(p); 533 } 534 535 int cap_task_setnice (struct task_struct *p, int nice) 536 { 537 return cap_safe_nice(p); 538 } 539 540 /* 541 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 542 * done without task_capability_lock() because it introduces 543 * no new races - i.e. only another task doing capget() on 544 * this task could get inconsistent info. There can be no 545 * racing writer bc a task can only change its own caps. 546 */ 547 static long cap_prctl_drop(unsigned long cap) 548 { 549 if (!capable(CAP_SETPCAP)) 550 return -EPERM; 551 if (!cap_valid(cap)) 552 return -EINVAL; 553 cap_lower(current->cap_bset, cap); 554 return 0; 555 } 556 557 #else 558 int cap_task_setscheduler (struct task_struct *p, int policy, 559 struct sched_param *lp) 560 { 561 return 0; 562 } 563 int cap_task_setioprio (struct task_struct *p, int ioprio) 564 { 565 return 0; 566 } 567 int cap_task_setnice (struct task_struct *p, int nice) 568 { 569 return 0; 570 } 571 #endif 572 573 int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, 574 unsigned long arg4, unsigned long arg5, long *rc_p) 575 { 576 long error = 0; 577 578 switch (option) { 579 case PR_CAPBSET_READ: 580 if (!cap_valid(arg2)) 581 error = -EINVAL; 582 else 583 error = !!cap_raised(current->cap_bset, arg2); 584 break; 585 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 586 case PR_CAPBSET_DROP: 587 error = cap_prctl_drop(arg2); 588 break; 589 590 /* 591 * The next four prctl's remain to assist with transitioning a 592 * system from legacy UID=0 based privilege (when filesystem 593 * capabilities are not in use) to a system using filesystem 594 * capabilities only - as the POSIX.1e draft intended. 595 * 596 * Note: 597 * 598 * PR_SET_SECUREBITS = 599 * issecure_mask(SECURE_KEEP_CAPS_LOCKED) 600 * | issecure_mask(SECURE_NOROOT) 601 * | issecure_mask(SECURE_NOROOT_LOCKED) 602 * | issecure_mask(SECURE_NO_SETUID_FIXUP) 603 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED) 604 * 605 * will ensure that the current process and all of its 606 * children will be locked into a pure 607 * capability-based-privilege environment. 608 */ 609 case PR_SET_SECUREBITS: 610 if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) 611 & (current->securebits ^ arg2)) /*[1]*/ 612 || ((current->securebits & SECURE_ALL_LOCKS 613 & ~arg2)) /*[2]*/ 614 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ 615 || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ 616 /* 617 * [1] no changing of bits that are locked 618 * [2] no unlocking of locks 619 * [3] no setting of unsupported bits 620 * [4] doing anything requires privilege (go read about 621 * the "sendmail capabilities bug") 622 */ 623 error = -EPERM; /* cannot change a locked bit */ 624 } else { 625 current->securebits = arg2; 626 } 627 break; 628 case PR_GET_SECUREBITS: 629 error = current->securebits; 630 break; 631 632 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 633 634 case PR_GET_KEEPCAPS: 635 if (issecure(SECURE_KEEP_CAPS)) 636 error = 1; 637 break; 638 case PR_SET_KEEPCAPS: 639 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ 640 error = -EINVAL; 641 else if (issecure(SECURE_KEEP_CAPS_LOCKED)) 642 error = -EPERM; 643 else if (arg2) 644 current->securebits |= issecure_mask(SECURE_KEEP_CAPS); 645 else 646 current->securebits &= 647 ~issecure_mask(SECURE_KEEP_CAPS); 648 break; 649 650 default: 651 /* No functionality available - continue with default */ 652 return 0; 653 } 654 655 /* Functionality provided */ 656 *rc_p = error; 657 return 1; 658 } 659 660 void cap_task_reparent_to_init (struct task_struct *p) 661 { 662 cap_set_init_eff(p->cap_effective); 663 cap_clear(p->cap_inheritable); 664 cap_set_full(p->cap_permitted); 665 p->securebits = SECUREBITS_DEFAULT; 666 return; 667 } 668 669 int cap_syslog (int type) 670 { 671 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 672 return -EPERM; 673 return 0; 674 } 675 676 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 677 { 678 int cap_sys_admin = 0; 679 680 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 681 cap_sys_admin = 1; 682 return __vm_enough_memory(mm, pages, cap_sys_admin); 683 } 684 685