1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 28 /* Global security state */ 29 30 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ 31 EXPORT_SYMBOL(securebits); 32 33 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 34 { 35 NETLINK_CB(skb).eff_cap = current->cap_effective; 36 return 0; 37 } 38 39 int cap_netlink_recv(struct sk_buff *skb, int cap) 40 { 41 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 42 return -EPERM; 43 return 0; 44 } 45 46 EXPORT_SYMBOL(cap_netlink_recv); 47 48 /* 49 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 50 * function. That is, it has the reverse semantics: cap_capable() 51 * returns 0 when a task has a capability, but the kernel's capable() 52 * returns 1 for this case. 53 */ 54 int cap_capable (struct task_struct *tsk, int cap) 55 { 56 /* Derived from include/linux/sched.h:capable. */ 57 if (cap_raised(tsk->cap_effective, cap)) 58 return 0; 59 return -EPERM; 60 } 61 62 int cap_settime(struct timespec *ts, struct timezone *tz) 63 { 64 if (!capable(CAP_SYS_TIME)) 65 return -EPERM; 66 return 0; 67 } 68 69 int cap_ptrace (struct task_struct *parent, struct task_struct *child) 70 { 71 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 72 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 73 !__capable(parent, CAP_SYS_PTRACE)) 74 return -EPERM; 75 return 0; 76 } 77 78 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 79 kernel_cap_t *inheritable, kernel_cap_t *permitted) 80 { 81 /* Derived from kernel/capability.c:sys_capget. */ 82 *effective = target->cap_effective; 83 *inheritable = target->cap_inheritable; 84 *permitted = target->cap_permitted; 85 return 0; 86 } 87 88 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 89 90 static inline int cap_block_setpcap(struct task_struct *target) 91 { 92 /* 93 * No support for remote process capability manipulation with 94 * filesystem capability support. 95 */ 96 return (target != current); 97 } 98 99 static inline int cap_inh_is_capped(void) 100 { 101 /* 102 * Return 1 if changes to the inheritable set are limited 103 * to the old permitted set. That is, if the current task 104 * does *not* possess the CAP_SETPCAP capability. 105 */ 106 return (cap_capable(current, CAP_SETPCAP) != 0); 107 } 108 109 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 110 111 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 112 static inline int cap_inh_is_capped(void) { return 1; } 113 114 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 115 116 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 117 kernel_cap_t *inheritable, kernel_cap_t *permitted) 118 { 119 if (cap_block_setpcap(target)) { 120 return -EPERM; 121 } 122 if (cap_inh_is_capped() 123 && !cap_issubset(*inheritable, 124 cap_combine(target->cap_inheritable, 125 current->cap_permitted))) { 126 /* incapable of using this inheritable set */ 127 return -EPERM; 128 } 129 if (!cap_issubset(*inheritable, 130 cap_combine(target->cap_inheritable, 131 current->cap_bset))) { 132 /* no new pI capabilities outside bounding set */ 133 return -EPERM; 134 } 135 136 /* verify restrictions on target's new Permitted set */ 137 if (!cap_issubset (*permitted, 138 cap_combine (target->cap_permitted, 139 current->cap_permitted))) { 140 return -EPERM; 141 } 142 143 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 144 if (!cap_issubset (*effective, *permitted)) { 145 return -EPERM; 146 } 147 148 return 0; 149 } 150 151 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 152 kernel_cap_t *inheritable, kernel_cap_t *permitted) 153 { 154 target->cap_effective = *effective; 155 target->cap_inheritable = *inheritable; 156 target->cap_permitted = *permitted; 157 } 158 159 static inline void bprm_clear_caps(struct linux_binprm *bprm) 160 { 161 cap_clear(bprm->cap_inheritable); 162 cap_clear(bprm->cap_permitted); 163 bprm->cap_effective = false; 164 } 165 166 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 167 168 int cap_inode_need_killpriv(struct dentry *dentry) 169 { 170 struct inode *inode = dentry->d_inode; 171 int error; 172 173 if (!inode->i_op || !inode->i_op->getxattr) 174 return 0; 175 176 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 177 if (error <= 0) 178 return 0; 179 return 1; 180 } 181 182 int cap_inode_killpriv(struct dentry *dentry) 183 { 184 struct inode *inode = dentry->d_inode; 185 186 if (!inode->i_op || !inode->i_op->removexattr) 187 return 0; 188 189 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 190 } 191 192 static inline int cap_from_disk(struct vfs_cap_data *caps, 193 struct linux_binprm *bprm, unsigned size) 194 { 195 __u32 magic_etc; 196 unsigned tocopy, i; 197 198 if (size < sizeof(magic_etc)) 199 return -EINVAL; 200 201 magic_etc = le32_to_cpu(caps->magic_etc); 202 203 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 204 case VFS_CAP_REVISION_1: 205 if (size != XATTR_CAPS_SZ_1) 206 return -EINVAL; 207 tocopy = VFS_CAP_U32_1; 208 break; 209 case VFS_CAP_REVISION_2: 210 if (size != XATTR_CAPS_SZ_2) 211 return -EINVAL; 212 tocopy = VFS_CAP_U32_2; 213 break; 214 default: 215 return -EINVAL; 216 } 217 218 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { 219 bprm->cap_effective = true; 220 } else { 221 bprm->cap_effective = false; 222 } 223 224 for (i = 0; i < tocopy; ++i) { 225 bprm->cap_permitted.cap[i] = 226 le32_to_cpu(caps->data[i].permitted); 227 bprm->cap_inheritable.cap[i] = 228 le32_to_cpu(caps->data[i].inheritable); 229 } 230 while (i < VFS_CAP_U32) { 231 bprm->cap_permitted.cap[i] = 0; 232 bprm->cap_inheritable.cap[i] = 0; 233 i++; 234 } 235 236 return 0; 237 } 238 239 /* Locate any VFS capabilities: */ 240 static int get_file_caps(struct linux_binprm *bprm) 241 { 242 struct dentry *dentry; 243 int rc = 0; 244 struct vfs_cap_data vcaps; 245 struct inode *inode; 246 247 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 248 bprm_clear_caps(bprm); 249 return 0; 250 } 251 252 dentry = dget(bprm->file->f_dentry); 253 inode = dentry->d_inode; 254 if (!inode->i_op || !inode->i_op->getxattr) 255 goto out; 256 257 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, 258 XATTR_CAPS_SZ); 259 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 260 /* no data, that's ok */ 261 rc = 0; 262 goto out; 263 } 264 if (rc < 0) 265 goto out; 266 267 rc = cap_from_disk(&vcaps, bprm, rc); 268 if (rc) 269 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 270 __FUNCTION__, rc, bprm->filename); 271 272 out: 273 dput(dentry); 274 if (rc) 275 bprm_clear_caps(bprm); 276 277 return rc; 278 } 279 280 #else 281 int cap_inode_need_killpriv(struct dentry *dentry) 282 { 283 return 0; 284 } 285 286 int cap_inode_killpriv(struct dentry *dentry) 287 { 288 return 0; 289 } 290 291 static inline int get_file_caps(struct linux_binprm *bprm) 292 { 293 bprm_clear_caps(bprm); 294 return 0; 295 } 296 #endif 297 298 int cap_bprm_set_security (struct linux_binprm *bprm) 299 { 300 int ret; 301 302 ret = get_file_caps(bprm); 303 if (ret) 304 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n", 305 __FUNCTION__, ret, bprm->filename); 306 307 /* To support inheritance of root-permissions and suid-root 308 * executables under compatibility mode, we raise all three 309 * capability sets for the file. 310 * 311 * If only the real uid is 0, we only raise the inheritable 312 * and permitted sets of the executable file. 313 */ 314 315 if (!issecure (SECURE_NOROOT)) { 316 if (bprm->e_uid == 0 || current->uid == 0) { 317 cap_set_full (bprm->cap_inheritable); 318 cap_set_full (bprm->cap_permitted); 319 } 320 if (bprm->e_uid == 0) 321 bprm->cap_effective = true; 322 } 323 324 return ret; 325 } 326 327 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 328 { 329 /* Derived from fs/exec.c:compute_creds. */ 330 kernel_cap_t new_permitted, working; 331 332 new_permitted = cap_intersect(bprm->cap_permitted, 333 current->cap_bset); 334 working = cap_intersect(bprm->cap_inheritable, 335 current->cap_inheritable); 336 new_permitted = cap_combine(new_permitted, working); 337 338 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 339 !cap_issubset (new_permitted, current->cap_permitted)) { 340 set_dumpable(current->mm, suid_dumpable); 341 current->pdeath_signal = 0; 342 343 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 344 if (!capable(CAP_SETUID)) { 345 bprm->e_uid = current->uid; 346 bprm->e_gid = current->gid; 347 } 348 if (!capable (CAP_SETPCAP)) { 349 new_permitted = cap_intersect (new_permitted, 350 current->cap_permitted); 351 } 352 } 353 } 354 355 current->suid = current->euid = current->fsuid = bprm->e_uid; 356 current->sgid = current->egid = current->fsgid = bprm->e_gid; 357 358 /* For init, we want to retain the capabilities set 359 * in the init_task struct. Thus we skip the usual 360 * capability rules */ 361 if (!is_global_init(current)) { 362 current->cap_permitted = new_permitted; 363 if (bprm->cap_effective) 364 current->cap_effective = new_permitted; 365 else 366 cap_clear(current->cap_effective); 367 } 368 369 /* AUD: Audit candidate if current->cap_effective is set */ 370 371 current->keep_capabilities = 0; 372 } 373 374 int cap_bprm_secureexec (struct linux_binprm *bprm) 375 { 376 if (current->uid != 0) { 377 if (bprm->cap_effective) 378 return 1; 379 if (!cap_isclear(bprm->cap_permitted)) 380 return 1; 381 if (!cap_isclear(bprm->cap_inheritable)) 382 return 1; 383 } 384 385 return (current->euid != current->uid || 386 current->egid != current->gid); 387 } 388 389 int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, 390 size_t size, int flags) 391 { 392 if (!strcmp(name, XATTR_NAME_CAPS)) { 393 if (!capable(CAP_SETFCAP)) 394 return -EPERM; 395 return 0; 396 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 397 sizeof(XATTR_SECURITY_PREFIX) - 1) && 398 !capable(CAP_SYS_ADMIN)) 399 return -EPERM; 400 return 0; 401 } 402 403 int cap_inode_removexattr(struct dentry *dentry, char *name) 404 { 405 if (!strcmp(name, XATTR_NAME_CAPS)) { 406 if (!capable(CAP_SETFCAP)) 407 return -EPERM; 408 return 0; 409 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 410 sizeof(XATTR_SECURITY_PREFIX) - 1) && 411 !capable(CAP_SYS_ADMIN)) 412 return -EPERM; 413 return 0; 414 } 415 416 /* moved from kernel/sys.c. */ 417 /* 418 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 419 * a process after a call to setuid, setreuid, or setresuid. 420 * 421 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 422 * {r,e,s}uid != 0, the permitted and effective capabilities are 423 * cleared. 424 * 425 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 426 * capabilities of the process are cleared. 427 * 428 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 429 * capabilities are set to the permitted capabilities. 430 * 431 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 432 * never happen. 433 * 434 * -astor 435 * 436 * cevans - New behaviour, Oct '99 437 * A process may, via prctl(), elect to keep its capabilities when it 438 * calls setuid() and switches away from uid==0. Both permitted and 439 * effective sets will be retained. 440 * Without this change, it was impossible for a daemon to drop only some 441 * of its privilege. The call to setuid(!=0) would drop all privileges! 442 * Keeping uid 0 is not an option because uid 0 owns too many vital 443 * files.. 444 * Thanks to Olaf Kirch and Peter Benie for spotting this. 445 */ 446 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 447 int old_suid) 448 { 449 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 450 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 451 !current->keep_capabilities) { 452 cap_clear (current->cap_permitted); 453 cap_clear (current->cap_effective); 454 } 455 if (old_euid == 0 && current->euid != 0) { 456 cap_clear (current->cap_effective); 457 } 458 if (old_euid != 0 && current->euid == 0) { 459 current->cap_effective = current->cap_permitted; 460 } 461 } 462 463 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 464 int flags) 465 { 466 switch (flags) { 467 case LSM_SETID_RE: 468 case LSM_SETID_ID: 469 case LSM_SETID_RES: 470 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 471 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 472 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 473 } 474 break; 475 case LSM_SETID_FS: 476 { 477 uid_t old_fsuid = old_ruid; 478 479 /* Copied from kernel/sys.c:setfsuid. */ 480 481 /* 482 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 483 * if not, we might be a bit too harsh here. 484 */ 485 486 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 487 if (old_fsuid == 0 && current->fsuid != 0) { 488 current->cap_effective = 489 cap_drop_fs_set( 490 current->cap_effective); 491 } 492 if (old_fsuid != 0 && current->fsuid == 0) { 493 current->cap_effective = 494 cap_raise_fs_set( 495 current->cap_effective, 496 current->cap_permitted); 497 } 498 } 499 break; 500 } 501 default: 502 return -EINVAL; 503 } 504 505 return 0; 506 } 507 508 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 509 /* 510 * Rationale: code calling task_setscheduler, task_setioprio, and 511 * task_setnice, assumes that 512 * . if capable(cap_sys_nice), then those actions should be allowed 513 * . if not capable(cap_sys_nice), but acting on your own processes, 514 * then those actions should be allowed 515 * This is insufficient now since you can call code without suid, but 516 * yet with increased caps. 517 * So we check for increased caps on the target process. 518 */ 519 static inline int cap_safe_nice(struct task_struct *p) 520 { 521 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 522 !__capable(current, CAP_SYS_NICE)) 523 return -EPERM; 524 return 0; 525 } 526 527 int cap_task_setscheduler (struct task_struct *p, int policy, 528 struct sched_param *lp) 529 { 530 return cap_safe_nice(p); 531 } 532 533 int cap_task_setioprio (struct task_struct *p, int ioprio) 534 { 535 return cap_safe_nice(p); 536 } 537 538 int cap_task_setnice (struct task_struct *p, int nice) 539 { 540 return cap_safe_nice(p); 541 } 542 543 /* 544 * called from kernel/sys.c for prctl(PR_CABSET_DROP) 545 * done without task_capability_lock() because it introduces 546 * no new races - i.e. only another task doing capget() on 547 * this task could get inconsistent info. There can be no 548 * racing writer bc a task can only change its own caps. 549 */ 550 long cap_prctl_drop(unsigned long cap) 551 { 552 if (!capable(CAP_SETPCAP)) 553 return -EPERM; 554 if (!cap_valid(cap)) 555 return -EINVAL; 556 cap_lower(current->cap_bset, cap); 557 return 0; 558 } 559 #else 560 int cap_task_setscheduler (struct task_struct *p, int policy, 561 struct sched_param *lp) 562 { 563 return 0; 564 } 565 int cap_task_setioprio (struct task_struct *p, int ioprio) 566 { 567 return 0; 568 } 569 int cap_task_setnice (struct task_struct *p, int nice) 570 { 571 return 0; 572 } 573 #endif 574 575 void cap_task_reparent_to_init (struct task_struct *p) 576 { 577 cap_set_init_eff(p->cap_effective); 578 cap_clear(p->cap_inheritable); 579 cap_set_full(p->cap_permitted); 580 p->keep_capabilities = 0; 581 return; 582 } 583 584 int cap_syslog (int type) 585 { 586 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 587 return -EPERM; 588 return 0; 589 } 590 591 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 592 { 593 int cap_sys_admin = 0; 594 595 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 596 cap_sys_admin = 1; 597 return __vm_enough_memory(mm, pages, cap_sys_admin); 598 } 599 600