1 /* Common capabilities, needed by capability.o and root_plug.o 2 * 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 */ 9 10 #include <linux/capability.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/kernel.h> 14 #include <linux/security.h> 15 #include <linux/file.h> 16 #include <linux/mm.h> 17 #include <linux/mman.h> 18 #include <linux/pagemap.h> 19 #include <linux/swap.h> 20 #include <linux/skbuff.h> 21 #include <linux/netlink.h> 22 #include <linux/ptrace.h> 23 #include <linux/xattr.h> 24 #include <linux/hugetlb.h> 25 #include <linux/mount.h> 26 #include <linux/sched.h> 27 28 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 29 /* 30 * Because of the reduced scope of CAP_SETPCAP when filesystem 31 * capabilities are in effect, it is safe to allow this capability to 32 * be available in the default configuration. 33 */ 34 # define CAP_INIT_BSET CAP_FULL_SET 35 #else /* ie. ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 36 # define CAP_INIT_BSET CAP_INIT_EFF_SET 37 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 38 39 kernel_cap_t cap_bset = CAP_INIT_BSET; /* systemwide capability bound */ 40 EXPORT_SYMBOL(cap_bset); 41 42 /* Global security state */ 43 44 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ 45 EXPORT_SYMBOL(securebits); 46 47 int cap_netlink_send(struct sock *sk, struct sk_buff *skb) 48 { 49 NETLINK_CB(skb).eff_cap = current->cap_effective; 50 return 0; 51 } 52 53 int cap_netlink_recv(struct sk_buff *skb, int cap) 54 { 55 if (!cap_raised(NETLINK_CB(skb).eff_cap, cap)) 56 return -EPERM; 57 return 0; 58 } 59 60 EXPORT_SYMBOL(cap_netlink_recv); 61 62 /* 63 * NOTE WELL: cap_capable() cannot be used like the kernel's capable() 64 * function. That is, it has the reverse semantics: cap_capable() 65 * returns 0 when a task has a capability, but the kernel's capable() 66 * returns 1 for this case. 67 */ 68 int cap_capable (struct task_struct *tsk, int cap) 69 { 70 /* Derived from include/linux/sched.h:capable. */ 71 if (cap_raised(tsk->cap_effective, cap)) 72 return 0; 73 return -EPERM; 74 } 75 76 int cap_settime(struct timespec *ts, struct timezone *tz) 77 { 78 if (!capable(CAP_SYS_TIME)) 79 return -EPERM; 80 return 0; 81 } 82 83 int cap_ptrace (struct task_struct *parent, struct task_struct *child) 84 { 85 /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ 86 if (!cap_issubset(child->cap_permitted, parent->cap_permitted) && 87 !__capable(parent, CAP_SYS_PTRACE)) 88 return -EPERM; 89 return 0; 90 } 91 92 int cap_capget (struct task_struct *target, kernel_cap_t *effective, 93 kernel_cap_t *inheritable, kernel_cap_t *permitted) 94 { 95 /* Derived from kernel/capability.c:sys_capget. */ 96 *effective = cap_t (target->cap_effective); 97 *inheritable = cap_t (target->cap_inheritable); 98 *permitted = cap_t (target->cap_permitted); 99 return 0; 100 } 101 102 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 103 104 static inline int cap_block_setpcap(struct task_struct *target) 105 { 106 /* 107 * No support for remote process capability manipulation with 108 * filesystem capability support. 109 */ 110 return (target != current); 111 } 112 113 static inline int cap_inh_is_capped(void) 114 { 115 /* 116 * Return 1 if changes to the inheritable set are limited 117 * to the old permitted set. That is, if the current task 118 * does *not* possess the CAP_SETPCAP capability. 119 */ 120 return (cap_capable(current, CAP_SETPCAP) != 0); 121 } 122 123 #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 124 125 static inline int cap_block_setpcap(struct task_struct *t) { return 0; } 126 static inline int cap_inh_is_capped(void) { return 1; } 127 128 #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ 129 130 int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, 131 kernel_cap_t *inheritable, kernel_cap_t *permitted) 132 { 133 if (cap_block_setpcap(target)) { 134 return -EPERM; 135 } 136 if (cap_inh_is_capped() 137 && !cap_issubset(*inheritable, 138 cap_combine(target->cap_inheritable, 139 current->cap_permitted))) { 140 /* incapable of using this inheritable set */ 141 return -EPERM; 142 } 143 144 /* verify restrictions on target's new Permitted set */ 145 if (!cap_issubset (*permitted, 146 cap_combine (target->cap_permitted, 147 current->cap_permitted))) { 148 return -EPERM; 149 } 150 151 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ 152 if (!cap_issubset (*effective, *permitted)) { 153 return -EPERM; 154 } 155 156 return 0; 157 } 158 159 void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, 160 kernel_cap_t *inheritable, kernel_cap_t *permitted) 161 { 162 target->cap_effective = *effective; 163 target->cap_inheritable = *inheritable; 164 target->cap_permitted = *permitted; 165 } 166 167 static inline void bprm_clear_caps(struct linux_binprm *bprm) 168 { 169 cap_clear(bprm->cap_inheritable); 170 cap_clear(bprm->cap_permitted); 171 bprm->cap_effective = false; 172 } 173 174 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 175 176 int cap_inode_need_killpriv(struct dentry *dentry) 177 { 178 struct inode *inode = dentry->d_inode; 179 int error; 180 181 if (!inode->i_op || !inode->i_op->getxattr) 182 return 0; 183 184 error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 185 if (error <= 0) 186 return 0; 187 return 1; 188 } 189 190 int cap_inode_killpriv(struct dentry *dentry) 191 { 192 struct inode *inode = dentry->d_inode; 193 194 if (!inode->i_op || !inode->i_op->removexattr) 195 return 0; 196 197 return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); 198 } 199 200 static inline int cap_from_disk(struct vfs_cap_data *caps, 201 struct linux_binprm *bprm, 202 int size) 203 { 204 __u32 magic_etc; 205 206 if (size != XATTR_CAPS_SZ) 207 return -EINVAL; 208 209 magic_etc = le32_to_cpu(caps->magic_etc); 210 211 switch ((magic_etc & VFS_CAP_REVISION_MASK)) { 212 case VFS_CAP_REVISION: 213 if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) 214 bprm->cap_effective = true; 215 else 216 bprm->cap_effective = false; 217 bprm->cap_permitted = to_cap_t(le32_to_cpu(caps->permitted)); 218 bprm->cap_inheritable = to_cap_t(le32_to_cpu(caps->inheritable)); 219 return 0; 220 default: 221 return -EINVAL; 222 } 223 } 224 225 /* Locate any VFS capabilities: */ 226 static int get_file_caps(struct linux_binprm *bprm) 227 { 228 struct dentry *dentry; 229 int rc = 0; 230 struct vfs_cap_data incaps; 231 struct inode *inode; 232 233 if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) { 234 bprm_clear_caps(bprm); 235 return 0; 236 } 237 238 dentry = dget(bprm->file->f_dentry); 239 inode = dentry->d_inode; 240 if (!inode->i_op || !inode->i_op->getxattr) 241 goto out; 242 243 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); 244 if (rc > 0) { 245 if (rc == XATTR_CAPS_SZ) 246 rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, 247 &incaps, XATTR_CAPS_SZ); 248 else 249 rc = -EINVAL; 250 } 251 if (rc == -ENODATA || rc == -EOPNOTSUPP) { 252 /* no data, that's ok */ 253 rc = 0; 254 goto out; 255 } 256 if (rc < 0) 257 goto out; 258 259 rc = cap_from_disk(&incaps, bprm, rc); 260 if (rc) 261 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", 262 __FUNCTION__, rc, bprm->filename); 263 264 out: 265 dput(dentry); 266 if (rc) 267 bprm_clear_caps(bprm); 268 269 return rc; 270 } 271 272 #else 273 int cap_inode_need_killpriv(struct dentry *dentry) 274 { 275 return 0; 276 } 277 278 int cap_inode_killpriv(struct dentry *dentry) 279 { 280 return 0; 281 } 282 283 static inline int get_file_caps(struct linux_binprm *bprm) 284 { 285 bprm_clear_caps(bprm); 286 return 0; 287 } 288 #endif 289 290 int cap_bprm_set_security (struct linux_binprm *bprm) 291 { 292 int ret; 293 294 ret = get_file_caps(bprm); 295 if (ret) 296 printk(KERN_NOTICE "%s: get_file_caps returned %d for %s\n", 297 __FUNCTION__, ret, bprm->filename); 298 299 /* To support inheritance of root-permissions and suid-root 300 * executables under compatibility mode, we raise all three 301 * capability sets for the file. 302 * 303 * If only the real uid is 0, we only raise the inheritable 304 * and permitted sets of the executable file. 305 */ 306 307 if (!issecure (SECURE_NOROOT)) { 308 if (bprm->e_uid == 0 || current->uid == 0) { 309 cap_set_full (bprm->cap_inheritable); 310 cap_set_full (bprm->cap_permitted); 311 } 312 if (bprm->e_uid == 0) 313 bprm->cap_effective = true; 314 } 315 316 return ret; 317 } 318 319 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) 320 { 321 /* Derived from fs/exec.c:compute_creds. */ 322 kernel_cap_t new_permitted, working; 323 324 new_permitted = cap_intersect (bprm->cap_permitted, cap_bset); 325 working = cap_intersect (bprm->cap_inheritable, 326 current->cap_inheritable); 327 new_permitted = cap_combine (new_permitted, working); 328 329 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || 330 !cap_issubset (new_permitted, current->cap_permitted)) { 331 set_dumpable(current->mm, suid_dumpable); 332 current->pdeath_signal = 0; 333 334 if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { 335 if (!capable(CAP_SETUID)) { 336 bprm->e_uid = current->uid; 337 bprm->e_gid = current->gid; 338 } 339 if (!capable (CAP_SETPCAP)) { 340 new_permitted = cap_intersect (new_permitted, 341 current->cap_permitted); 342 } 343 } 344 } 345 346 current->suid = current->euid = current->fsuid = bprm->e_uid; 347 current->sgid = current->egid = current->fsgid = bprm->e_gid; 348 349 /* For init, we want to retain the capabilities set 350 * in the init_task struct. Thus we skip the usual 351 * capability rules */ 352 if (!is_global_init(current)) { 353 current->cap_permitted = new_permitted; 354 current->cap_effective = bprm->cap_effective ? 355 new_permitted : 0; 356 } 357 358 /* AUD: Audit candidate if current->cap_effective is set */ 359 360 current->keep_capabilities = 0; 361 } 362 363 int cap_bprm_secureexec (struct linux_binprm *bprm) 364 { 365 if (current->uid != 0) { 366 if (bprm->cap_effective) 367 return 1; 368 if (!cap_isclear(bprm->cap_permitted)) 369 return 1; 370 if (!cap_isclear(bprm->cap_inheritable)) 371 return 1; 372 } 373 374 return (current->euid != current->uid || 375 current->egid != current->gid); 376 } 377 378 int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, 379 size_t size, int flags) 380 { 381 if (!strcmp(name, XATTR_NAME_CAPS)) { 382 if (!capable(CAP_SETFCAP)) 383 return -EPERM; 384 return 0; 385 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 386 sizeof(XATTR_SECURITY_PREFIX) - 1) && 387 !capable(CAP_SYS_ADMIN)) 388 return -EPERM; 389 return 0; 390 } 391 392 int cap_inode_removexattr(struct dentry *dentry, char *name) 393 { 394 if (!strcmp(name, XATTR_NAME_CAPS)) { 395 if (!capable(CAP_SETFCAP)) 396 return -EPERM; 397 return 0; 398 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 399 sizeof(XATTR_SECURITY_PREFIX) - 1) && 400 !capable(CAP_SYS_ADMIN)) 401 return -EPERM; 402 return 0; 403 } 404 405 /* moved from kernel/sys.c. */ 406 /* 407 * cap_emulate_setxuid() fixes the effective / permitted capabilities of 408 * a process after a call to setuid, setreuid, or setresuid. 409 * 410 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of 411 * {r,e,s}uid != 0, the permitted and effective capabilities are 412 * cleared. 413 * 414 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective 415 * capabilities of the process are cleared. 416 * 417 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective 418 * capabilities are set to the permitted capabilities. 419 * 420 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 421 * never happen. 422 * 423 * -astor 424 * 425 * cevans - New behaviour, Oct '99 426 * A process may, via prctl(), elect to keep its capabilities when it 427 * calls setuid() and switches away from uid==0. Both permitted and 428 * effective sets will be retained. 429 * Without this change, it was impossible for a daemon to drop only some 430 * of its privilege. The call to setuid(!=0) would drop all privileges! 431 * Keeping uid 0 is not an option because uid 0 owns too many vital 432 * files.. 433 * Thanks to Olaf Kirch and Peter Benie for spotting this. 434 */ 435 static inline void cap_emulate_setxuid (int old_ruid, int old_euid, 436 int old_suid) 437 { 438 if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && 439 (current->uid != 0 && current->euid != 0 && current->suid != 0) && 440 !current->keep_capabilities) { 441 cap_clear (current->cap_permitted); 442 cap_clear (current->cap_effective); 443 } 444 if (old_euid == 0 && current->euid != 0) { 445 cap_clear (current->cap_effective); 446 } 447 if (old_euid != 0 && current->euid == 0) { 448 current->cap_effective = current->cap_permitted; 449 } 450 } 451 452 int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, 453 int flags) 454 { 455 switch (flags) { 456 case LSM_SETID_RE: 457 case LSM_SETID_ID: 458 case LSM_SETID_RES: 459 /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ 460 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 461 cap_emulate_setxuid (old_ruid, old_euid, old_suid); 462 } 463 break; 464 case LSM_SETID_FS: 465 { 466 uid_t old_fsuid = old_ruid; 467 468 /* Copied from kernel/sys.c:setfsuid. */ 469 470 /* 471 * FIXME - is fsuser used for all CAP_FS_MASK capabilities? 472 * if not, we might be a bit too harsh here. 473 */ 474 475 if (!issecure (SECURE_NO_SETUID_FIXUP)) { 476 if (old_fsuid == 0 && current->fsuid != 0) { 477 cap_t (current->cap_effective) &= 478 ~CAP_FS_MASK; 479 } 480 if (old_fsuid != 0 && current->fsuid == 0) { 481 cap_t (current->cap_effective) |= 482 (cap_t (current->cap_permitted) & 483 CAP_FS_MASK); 484 } 485 } 486 break; 487 } 488 default: 489 return -EINVAL; 490 } 491 492 return 0; 493 } 494 495 #ifdef CONFIG_SECURITY_FILE_CAPABILITIES 496 /* 497 * Rationale: code calling task_setscheduler, task_setioprio, and 498 * task_setnice, assumes that 499 * . if capable(cap_sys_nice), then those actions should be allowed 500 * . if not capable(cap_sys_nice), but acting on your own processes, 501 * then those actions should be allowed 502 * This is insufficient now since you can call code without suid, but 503 * yet with increased caps. 504 * So we check for increased caps on the target process. 505 */ 506 static inline int cap_safe_nice(struct task_struct *p) 507 { 508 if (!cap_issubset(p->cap_permitted, current->cap_permitted) && 509 !__capable(current, CAP_SYS_NICE)) 510 return -EPERM; 511 return 0; 512 } 513 514 int cap_task_setscheduler (struct task_struct *p, int policy, 515 struct sched_param *lp) 516 { 517 return cap_safe_nice(p); 518 } 519 520 int cap_task_setioprio (struct task_struct *p, int ioprio) 521 { 522 return cap_safe_nice(p); 523 } 524 525 int cap_task_setnice (struct task_struct *p, int nice) 526 { 527 return cap_safe_nice(p); 528 } 529 530 int cap_task_kill(struct task_struct *p, struct siginfo *info, 531 int sig, u32 secid) 532 { 533 if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info))) 534 return 0; 535 536 /* 537 * Running a setuid root program raises your capabilities. 538 * Killing your own setuid root processes was previously 539 * allowed. 540 * We must preserve legacy signal behavior in this case. 541 */ 542 if (p->euid == 0 && p->uid == current->uid) 543 return 0; 544 545 /* sigcont is permitted within same session */ 546 if (sig == SIGCONT && (task_session_nr(current) == task_session_nr(p))) 547 return 0; 548 549 if (secid) 550 /* 551 * Signal sent as a particular user. 552 * Capabilities are ignored. May be wrong, but it's the 553 * only thing we can do at the moment. 554 * Used only by usb drivers? 555 */ 556 return 0; 557 if (cap_issubset(p->cap_permitted, current->cap_permitted)) 558 return 0; 559 if (capable(CAP_KILL)) 560 return 0; 561 562 return -EPERM; 563 } 564 #else 565 int cap_task_setscheduler (struct task_struct *p, int policy, 566 struct sched_param *lp) 567 { 568 return 0; 569 } 570 int cap_task_setioprio (struct task_struct *p, int ioprio) 571 { 572 return 0; 573 } 574 int cap_task_setnice (struct task_struct *p, int nice) 575 { 576 return 0; 577 } 578 int cap_task_kill(struct task_struct *p, struct siginfo *info, 579 int sig, u32 secid) 580 { 581 return 0; 582 } 583 #endif 584 585 void cap_task_reparent_to_init (struct task_struct *p) 586 { 587 p->cap_effective = CAP_INIT_EFF_SET; 588 p->cap_inheritable = CAP_INIT_INH_SET; 589 p->cap_permitted = CAP_FULL_SET; 590 p->keep_capabilities = 0; 591 return; 592 } 593 594 int cap_syslog (int type) 595 { 596 if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN)) 597 return -EPERM; 598 return 0; 599 } 600 601 int cap_vm_enough_memory(struct mm_struct *mm, long pages) 602 { 603 int cap_sys_admin = 0; 604 605 if (cap_capable(current, CAP_SYS_ADMIN) == 0) 606 cap_sys_admin = 1; 607 return __vm_enough_memory(mm, pages, cap_sys_admin); 608 } 609 610