1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Landlock LSM - Filesystem management and hooks 4 * 5 * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> 6 * Copyright © 2018-2020 ANSSI 7 */ 8 9 #include <linux/atomic.h> 10 #include <linux/bitops.h> 11 #include <linux/bits.h> 12 #include <linux/compiler_types.h> 13 #include <linux/dcache.h> 14 #include <linux/err.h> 15 #include <linux/fs.h> 16 #include <linux/init.h> 17 #include <linux/kernel.h> 18 #include <linux/limits.h> 19 #include <linux/list.h> 20 #include <linux/lsm_hooks.h> 21 #include <linux/mount.h> 22 #include <linux/namei.h> 23 #include <linux/path.h> 24 #include <linux/rcupdate.h> 25 #include <linux/spinlock.h> 26 #include <linux/stat.h> 27 #include <linux/types.h> 28 #include <linux/wait_bit.h> 29 #include <linux/workqueue.h> 30 #include <uapi/linux/landlock.h> 31 32 #include "common.h" 33 #include "cred.h" 34 #include "fs.h" 35 #include "limits.h" 36 #include "object.h" 37 #include "ruleset.h" 38 #include "setup.h" 39 40 /* Underlying object management */ 41 42 static void release_inode(struct landlock_object *const object) 43 __releases(object->lock) 44 { 45 struct inode *const inode = object->underobj; 46 struct super_block *sb; 47 48 if (!inode) { 49 spin_unlock(&object->lock); 50 return; 51 } 52 53 /* 54 * Protects against concurrent use by hook_sb_delete() of the reference 55 * to the underlying inode. 56 */ 57 object->underobj = NULL; 58 /* 59 * Makes sure that if the filesystem is concurrently unmounted, 60 * hook_sb_delete() will wait for us to finish iput(). 61 */ 62 sb = inode->i_sb; 63 atomic_long_inc(&landlock_superblock(sb)->inode_refs); 64 spin_unlock(&object->lock); 65 /* 66 * Because object->underobj was not NULL, hook_sb_delete() and 67 * get_inode_object() guarantee that it is safe to reset 68 * landlock_inode(inode)->object while it is not NULL. It is therefore 69 * not necessary to lock inode->i_lock. 70 */ 71 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 72 /* 73 * Now, new rules can safely be tied to @inode with get_inode_object(). 74 */ 75 76 iput(inode); 77 if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) 78 wake_up_var(&landlock_superblock(sb)->inode_refs); 79 } 80 81 static const struct landlock_object_underops landlock_fs_underops = { 82 .release = release_inode 83 }; 84 85 /* Ruleset management */ 86 87 static struct landlock_object *get_inode_object(struct inode *const inode) 88 { 89 struct landlock_object *object, *new_object; 90 struct landlock_inode_security *inode_sec = landlock_inode(inode); 91 92 rcu_read_lock(); 93 retry: 94 object = rcu_dereference(inode_sec->object); 95 if (object) { 96 if (likely(refcount_inc_not_zero(&object->usage))) { 97 rcu_read_unlock(); 98 return object; 99 } 100 /* 101 * We are racing with release_inode(), the object is going 102 * away. Wait for release_inode(), then retry. 103 */ 104 spin_lock(&object->lock); 105 spin_unlock(&object->lock); 106 goto retry; 107 } 108 rcu_read_unlock(); 109 110 /* 111 * If there is no object tied to @inode, then create a new one (without 112 * holding any locks). 113 */ 114 new_object = landlock_create_object(&landlock_fs_underops, inode); 115 if (IS_ERR(new_object)) 116 return new_object; 117 118 /* 119 * Protects against concurrent calls to get_inode_object() or 120 * hook_sb_delete(). 121 */ 122 spin_lock(&inode->i_lock); 123 if (unlikely(rcu_access_pointer(inode_sec->object))) { 124 /* Someone else just created the object, bail out and retry. */ 125 spin_unlock(&inode->i_lock); 126 kfree(new_object); 127 128 rcu_read_lock(); 129 goto retry; 130 } 131 132 /* 133 * @inode will be released by hook_sb_delete() on its superblock 134 * shutdown, or by release_inode() when no more ruleset references the 135 * related object. 136 */ 137 ihold(inode); 138 rcu_assign_pointer(inode_sec->object, new_object); 139 spin_unlock(&inode->i_lock); 140 return new_object; 141 } 142 143 /* All access rights that can be tied to files. */ 144 /* clang-format off */ 145 #define ACCESS_FILE ( \ 146 LANDLOCK_ACCESS_FS_EXECUTE | \ 147 LANDLOCK_ACCESS_FS_WRITE_FILE | \ 148 LANDLOCK_ACCESS_FS_READ_FILE) 149 /* clang-format on */ 150 151 /* 152 * @path: Should have been checked by get_path_from_fd(). 153 */ 154 int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, 155 const struct path *const path, 156 access_mask_t access_rights) 157 { 158 int err; 159 struct landlock_object *object; 160 161 /* Files only get access rights that make sense. */ 162 if (!d_is_dir(path->dentry) && 163 (access_rights | ACCESS_FILE) != ACCESS_FILE) 164 return -EINVAL; 165 if (WARN_ON_ONCE(ruleset->num_layers != 1)) 166 return -EINVAL; 167 168 /* Transforms relative access rights to absolute ones. */ 169 access_rights |= LANDLOCK_MASK_ACCESS_FS & ~ruleset->fs_access_masks[0]; 170 object = get_inode_object(d_backing_inode(path->dentry)); 171 if (IS_ERR(object)) 172 return PTR_ERR(object); 173 mutex_lock(&ruleset->lock); 174 err = landlock_insert_rule(ruleset, object, access_rights); 175 mutex_unlock(&ruleset->lock); 176 /* 177 * No need to check for an error because landlock_insert_rule() 178 * increments the refcount for the new object if needed. 179 */ 180 landlock_put_object(object); 181 return err; 182 } 183 184 /* Access-control management */ 185 186 /* 187 * The lifetime of the returned rule is tied to @domain. 188 * 189 * Returns NULL if no rule is found or if @dentry is negative. 190 */ 191 static inline const struct landlock_rule * 192 find_rule(const struct landlock_ruleset *const domain, 193 const struct dentry *const dentry) 194 { 195 const struct landlock_rule *rule; 196 const struct inode *inode; 197 198 /* Ignores nonexistent leafs. */ 199 if (d_is_negative(dentry)) 200 return NULL; 201 202 inode = d_backing_inode(dentry); 203 rcu_read_lock(); 204 rule = landlock_find_rule( 205 domain, rcu_dereference(landlock_inode(inode)->object)); 206 rcu_read_unlock(); 207 return rule; 208 } 209 210 static inline layer_mask_t unmask_layers(const struct landlock_rule *const rule, 211 const access_mask_t access_request, 212 layer_mask_t layer_mask) 213 { 214 size_t layer_level; 215 216 if (!rule) 217 return layer_mask; 218 219 /* 220 * An access is granted if, for each policy layer, at least one rule 221 * encountered on the pathwalk grants the requested accesses, 222 * regardless of their position in the layer stack. We must then check 223 * the remaining layers for each inode, from the first added layer to 224 * the last one. 225 */ 226 for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { 227 const struct landlock_layer *const layer = 228 &rule->layers[layer_level]; 229 const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); 230 231 /* Checks that the layer grants access to the full request. */ 232 if ((layer->access & access_request) == access_request) { 233 layer_mask &= ~layer_bit; 234 235 if (layer_mask == 0) 236 return layer_mask; 237 } 238 } 239 return layer_mask; 240 } 241 242 static int check_access_path(const struct landlock_ruleset *const domain, 243 const struct path *const path, 244 const access_mask_t access_request) 245 { 246 bool allowed = false; 247 struct path walker_path; 248 layer_mask_t layer_mask; 249 size_t i; 250 251 if (!access_request) 252 return 0; 253 if (WARN_ON_ONCE(!domain || !path)) 254 return 0; 255 /* 256 * Allows access to pseudo filesystems that will never be mountable 257 * (e.g. sockfs, pipefs), but can still be reachable through 258 * /proc/<pid>/fd/<file-descriptor> . 259 */ 260 if ((path->dentry->d_sb->s_flags & SB_NOUSER) || 261 (d_is_positive(path->dentry) && 262 unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))) 263 return 0; 264 if (WARN_ON_ONCE(domain->num_layers < 1)) 265 return -EACCES; 266 267 /* Saves all layers handling a subset of requested accesses. */ 268 layer_mask = 0; 269 for (i = 0; i < domain->num_layers; i++) { 270 if (domain->fs_access_masks[i] & access_request) 271 layer_mask |= BIT_ULL(i); 272 } 273 /* An access request not handled by the domain is allowed. */ 274 if (layer_mask == 0) 275 return 0; 276 277 walker_path = *path; 278 path_get(&walker_path); 279 /* 280 * We need to walk through all the hierarchy to not miss any relevant 281 * restriction. 282 */ 283 while (true) { 284 struct dentry *parent_dentry; 285 286 layer_mask = 287 unmask_layers(find_rule(domain, walker_path.dentry), 288 access_request, layer_mask); 289 if (layer_mask == 0) { 290 /* Stops when a rule from each layer grants access. */ 291 allowed = true; 292 break; 293 } 294 295 jump_up: 296 if (walker_path.dentry == walker_path.mnt->mnt_root) { 297 if (follow_up(&walker_path)) { 298 /* Ignores hidden mount points. */ 299 goto jump_up; 300 } else { 301 /* 302 * Stops at the real root. Denies access 303 * because not all layers have granted access. 304 */ 305 allowed = false; 306 break; 307 } 308 } 309 if (unlikely(IS_ROOT(walker_path.dentry))) { 310 /* 311 * Stops at disconnected root directories. Only allows 312 * access to internal filesystems (e.g. nsfs, which is 313 * reachable through /proc/<pid>/ns/<namespace>). 314 */ 315 allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); 316 break; 317 } 318 parent_dentry = dget_parent(walker_path.dentry); 319 dput(walker_path.dentry); 320 walker_path.dentry = parent_dentry; 321 } 322 path_put(&walker_path); 323 return allowed ? 0 : -EACCES; 324 } 325 326 static inline int current_check_access_path(const struct path *const path, 327 const access_mask_t access_request) 328 { 329 const struct landlock_ruleset *const dom = 330 landlock_get_current_domain(); 331 332 if (!dom) 333 return 0; 334 return check_access_path(dom, path, access_request); 335 } 336 337 /* Inode hooks */ 338 339 static void hook_inode_free_security(struct inode *const inode) 340 { 341 /* 342 * All inodes must already have been untied from their object by 343 * release_inode() or hook_sb_delete(). 344 */ 345 WARN_ON_ONCE(landlock_inode(inode)->object); 346 } 347 348 /* Super-block hooks */ 349 350 /* 351 * Release the inodes used in a security policy. 352 * 353 * Cf. fsnotify_unmount_inodes() and invalidate_inodes() 354 */ 355 static void hook_sb_delete(struct super_block *const sb) 356 { 357 struct inode *inode, *prev_inode = NULL; 358 359 if (!landlock_initialized) 360 return; 361 362 spin_lock(&sb->s_inode_list_lock); 363 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 364 struct landlock_object *object; 365 366 /* Only handles referenced inodes. */ 367 if (!atomic_read(&inode->i_count)) 368 continue; 369 370 /* 371 * Protects against concurrent modification of inode (e.g. 372 * from get_inode_object()). 373 */ 374 spin_lock(&inode->i_lock); 375 /* 376 * Checks I_FREEING and I_WILL_FREE to protect against a race 377 * condition when release_inode() just called iput(), which 378 * could lead to a NULL dereference of inode->security or a 379 * second call to iput() for the same Landlock object. Also 380 * checks I_NEW because such inode cannot be tied to an object. 381 */ 382 if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { 383 spin_unlock(&inode->i_lock); 384 continue; 385 } 386 387 rcu_read_lock(); 388 object = rcu_dereference(landlock_inode(inode)->object); 389 if (!object) { 390 rcu_read_unlock(); 391 spin_unlock(&inode->i_lock); 392 continue; 393 } 394 /* Keeps a reference to this inode until the next loop walk. */ 395 __iget(inode); 396 spin_unlock(&inode->i_lock); 397 398 /* 399 * If there is no concurrent release_inode() ongoing, then we 400 * are in charge of calling iput() on this inode, otherwise we 401 * will just wait for it to finish. 402 */ 403 spin_lock(&object->lock); 404 if (object->underobj == inode) { 405 object->underobj = NULL; 406 spin_unlock(&object->lock); 407 rcu_read_unlock(); 408 409 /* 410 * Because object->underobj was not NULL, 411 * release_inode() and get_inode_object() guarantee 412 * that it is safe to reset 413 * landlock_inode(inode)->object while it is not NULL. 414 * It is therefore not necessary to lock inode->i_lock. 415 */ 416 rcu_assign_pointer(landlock_inode(inode)->object, NULL); 417 /* 418 * At this point, we own the ihold() reference that was 419 * originally set up by get_inode_object() and the 420 * __iget() reference that we just set in this loop 421 * walk. Therefore the following call to iput() will 422 * not sleep nor drop the inode because there is now at 423 * least two references to it. 424 */ 425 iput(inode); 426 } else { 427 spin_unlock(&object->lock); 428 rcu_read_unlock(); 429 } 430 431 if (prev_inode) { 432 /* 433 * At this point, we still own the __iget() reference 434 * that we just set in this loop walk. Therefore we 435 * can drop the list lock and know that the inode won't 436 * disappear from under us until the next loop walk. 437 */ 438 spin_unlock(&sb->s_inode_list_lock); 439 /* 440 * We can now actually put the inode reference from the 441 * previous loop walk, which is not needed anymore. 442 */ 443 iput(prev_inode); 444 cond_resched(); 445 spin_lock(&sb->s_inode_list_lock); 446 } 447 prev_inode = inode; 448 } 449 spin_unlock(&sb->s_inode_list_lock); 450 451 /* Puts the inode reference from the last loop walk, if any. */ 452 if (prev_inode) 453 iput(prev_inode); 454 /* Waits for pending iput() in release_inode(). */ 455 wait_var_event(&landlock_superblock(sb)->inode_refs, 456 !atomic_long_read(&landlock_superblock(sb)->inode_refs)); 457 } 458 459 /* 460 * Because a Landlock security policy is defined according to the filesystem 461 * topology (i.e. the mount namespace), changing it may grant access to files 462 * not previously allowed. 463 * 464 * To make it simple, deny any filesystem topology modification by landlocked 465 * processes. Non-landlocked processes may still change the namespace of a 466 * landlocked process, but this kind of threat must be handled by a system-wide 467 * access-control security policy. 468 * 469 * This could be lifted in the future if Landlock can safely handle mount 470 * namespace updates requested by a landlocked process. Indeed, we could 471 * update the current domain (which is currently read-only) by taking into 472 * account the accesses of the source and the destination of a new mount point. 473 * However, it would also require to make all the child domains dynamically 474 * inherit these new constraints. Anyway, for backward compatibility reasons, 475 * a dedicated user space option would be required (e.g. as a ruleset flag). 476 */ 477 static int hook_sb_mount(const char *const dev_name, 478 const struct path *const path, const char *const type, 479 const unsigned long flags, void *const data) 480 { 481 if (!landlock_get_current_domain()) 482 return 0; 483 return -EPERM; 484 } 485 486 static int hook_move_mount(const struct path *const from_path, 487 const struct path *const to_path) 488 { 489 if (!landlock_get_current_domain()) 490 return 0; 491 return -EPERM; 492 } 493 494 /* 495 * Removing a mount point may reveal a previously hidden file hierarchy, which 496 * may then grant access to files, which may have previously been forbidden. 497 */ 498 static int hook_sb_umount(struct vfsmount *const mnt, const int flags) 499 { 500 if (!landlock_get_current_domain()) 501 return 0; 502 return -EPERM; 503 } 504 505 static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) 506 { 507 if (!landlock_get_current_domain()) 508 return 0; 509 return -EPERM; 510 } 511 512 /* 513 * pivot_root(2), like mount(2), changes the current mount namespace. It must 514 * then be forbidden for a landlocked process. 515 * 516 * However, chroot(2) may be allowed because it only changes the relative root 517 * directory of the current process. Moreover, it can be used to restrict the 518 * view of the filesystem. 519 */ 520 static int hook_sb_pivotroot(const struct path *const old_path, 521 const struct path *const new_path) 522 { 523 if (!landlock_get_current_domain()) 524 return 0; 525 return -EPERM; 526 } 527 528 /* Path hooks */ 529 530 static inline access_mask_t get_mode_access(const umode_t mode) 531 { 532 switch (mode & S_IFMT) { 533 case S_IFLNK: 534 return LANDLOCK_ACCESS_FS_MAKE_SYM; 535 case 0: 536 /* A zero mode translates to S_IFREG. */ 537 case S_IFREG: 538 return LANDLOCK_ACCESS_FS_MAKE_REG; 539 case S_IFDIR: 540 return LANDLOCK_ACCESS_FS_MAKE_DIR; 541 case S_IFCHR: 542 return LANDLOCK_ACCESS_FS_MAKE_CHAR; 543 case S_IFBLK: 544 return LANDLOCK_ACCESS_FS_MAKE_BLOCK; 545 case S_IFIFO: 546 return LANDLOCK_ACCESS_FS_MAKE_FIFO; 547 case S_IFSOCK: 548 return LANDLOCK_ACCESS_FS_MAKE_SOCK; 549 default: 550 WARN_ON_ONCE(1); 551 return 0; 552 } 553 } 554 555 /* 556 * Creating multiple links or renaming may lead to privilege escalations if not 557 * handled properly. Indeed, we must be sure that the source doesn't gain more 558 * privileges by being accessible from the destination. This is getting more 559 * complex when dealing with multiple layers. The whole picture can be seen as 560 * a multilayer partial ordering problem. A future version of Landlock will 561 * deal with that. 562 */ 563 static int hook_path_link(struct dentry *const old_dentry, 564 const struct path *const new_dir, 565 struct dentry *const new_dentry) 566 { 567 const struct landlock_ruleset *const dom = 568 landlock_get_current_domain(); 569 570 if (!dom) 571 return 0; 572 /* The mount points are the same for old and new paths, cf. EXDEV. */ 573 if (old_dentry->d_parent != new_dir->dentry) 574 /* Gracefully forbids reparenting. */ 575 return -EXDEV; 576 if (unlikely(d_is_negative(old_dentry))) 577 return -ENOENT; 578 return check_access_path( 579 dom, new_dir, 580 get_mode_access(d_backing_inode(old_dentry)->i_mode)); 581 } 582 583 static inline access_mask_t maybe_remove(const struct dentry *const dentry) 584 { 585 if (d_is_negative(dentry)) 586 return 0; 587 return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : 588 LANDLOCK_ACCESS_FS_REMOVE_FILE; 589 } 590 591 static int hook_path_rename(const struct path *const old_dir, 592 struct dentry *const old_dentry, 593 const struct path *const new_dir, 594 struct dentry *const new_dentry) 595 { 596 const struct landlock_ruleset *const dom = 597 landlock_get_current_domain(); 598 599 if (!dom) 600 return 0; 601 /* The mount points are the same for old and new paths, cf. EXDEV. */ 602 if (old_dir->dentry != new_dir->dentry) 603 /* Gracefully forbids reparenting. */ 604 return -EXDEV; 605 if (unlikely(d_is_negative(old_dentry))) 606 return -ENOENT; 607 /* RENAME_EXCHANGE is handled because directories are the same. */ 608 return check_access_path( 609 dom, old_dir, 610 maybe_remove(old_dentry) | maybe_remove(new_dentry) | 611 get_mode_access(d_backing_inode(old_dentry)->i_mode)); 612 } 613 614 static int hook_path_mkdir(const struct path *const dir, 615 struct dentry *const dentry, const umode_t mode) 616 { 617 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); 618 } 619 620 static int hook_path_mknod(const struct path *const dir, 621 struct dentry *const dentry, const umode_t mode, 622 const unsigned int dev) 623 { 624 const struct landlock_ruleset *const dom = 625 landlock_get_current_domain(); 626 627 if (!dom) 628 return 0; 629 return check_access_path(dom, dir, get_mode_access(mode)); 630 } 631 632 static int hook_path_symlink(const struct path *const dir, 633 struct dentry *const dentry, 634 const char *const old_name) 635 { 636 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); 637 } 638 639 static int hook_path_unlink(const struct path *const dir, 640 struct dentry *const dentry) 641 { 642 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); 643 } 644 645 static int hook_path_rmdir(const struct path *const dir, 646 struct dentry *const dentry) 647 { 648 return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); 649 } 650 651 /* File hooks */ 652 653 static inline access_mask_t get_file_access(const struct file *const file) 654 { 655 access_mask_t access = 0; 656 657 if (file->f_mode & FMODE_READ) { 658 /* A directory can only be opened in read mode. */ 659 if (S_ISDIR(file_inode(file)->i_mode)) 660 return LANDLOCK_ACCESS_FS_READ_DIR; 661 access = LANDLOCK_ACCESS_FS_READ_FILE; 662 } 663 if (file->f_mode & FMODE_WRITE) 664 access |= LANDLOCK_ACCESS_FS_WRITE_FILE; 665 /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ 666 if (file->f_flags & __FMODE_EXEC) 667 access |= LANDLOCK_ACCESS_FS_EXECUTE; 668 return access; 669 } 670 671 static int hook_file_open(struct file *const file) 672 { 673 const struct landlock_ruleset *const dom = 674 landlock_get_current_domain(); 675 676 if (!dom) 677 return 0; 678 /* 679 * Because a file may be opened with O_PATH, get_file_access() may 680 * return 0. This case will be handled with a future Landlock 681 * evolution. 682 */ 683 return check_access_path(dom, &file->f_path, get_file_access(file)); 684 } 685 686 static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { 687 LSM_HOOK_INIT(inode_free_security, hook_inode_free_security), 688 689 LSM_HOOK_INIT(sb_delete, hook_sb_delete), 690 LSM_HOOK_INIT(sb_mount, hook_sb_mount), 691 LSM_HOOK_INIT(move_mount, hook_move_mount), 692 LSM_HOOK_INIT(sb_umount, hook_sb_umount), 693 LSM_HOOK_INIT(sb_remount, hook_sb_remount), 694 LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), 695 696 LSM_HOOK_INIT(path_link, hook_path_link), 697 LSM_HOOK_INIT(path_rename, hook_path_rename), 698 LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), 699 LSM_HOOK_INIT(path_mknod, hook_path_mknod), 700 LSM_HOOK_INIT(path_symlink, hook_path_symlink), 701 LSM_HOOK_INIT(path_unlink, hook_path_unlink), 702 LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), 703 704 LSM_HOOK_INIT(file_open, hook_file_open), 705 }; 706 707 __init void landlock_add_fs_hooks(void) 708 { 709 security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), 710 LANDLOCK_NAME); 711 } 712