1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 22 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23 MODULE_DESCRIPTION("Overlay filesystem"); 24 MODULE_LICENSE("GPL"); 25 26 27 struct ovl_dir_cache; 28 29 #define OVL_MAX_STACK 500 30 31 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 32 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 33 MODULE_PARM_DESC(ovl_redirect_dir_def, 34 "Default to on or off for the redirect_dir feature"); 35 36 static bool ovl_redirect_always_follow = 37 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 38 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 39 bool, 0644); 40 MODULE_PARM_DESC(ovl_redirect_always_follow, 41 "Follow redirects even if redirect_dir feature is turned off"); 42 43 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 44 module_param_named(index, ovl_index_def, bool, 0644); 45 MODULE_PARM_DESC(ovl_index_def, 46 "Default to on or off for the inodes index feature"); 47 48 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 49 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 50 MODULE_PARM_DESC(ovl_nfs_export_def, 51 "Default to on or off for the NFS export feature"); 52 53 static void ovl_entry_stack_free(struct ovl_entry *oe) 54 { 55 unsigned int i; 56 57 for (i = 0; i < oe->numlower; i++) 58 dput(oe->lowerstack[i].dentry); 59 } 60 61 static void ovl_dentry_release(struct dentry *dentry) 62 { 63 struct ovl_entry *oe = dentry->d_fsdata; 64 65 if (oe) { 66 ovl_entry_stack_free(oe); 67 kfree_rcu(oe, rcu); 68 } 69 } 70 71 static int ovl_check_append_only(struct inode *inode, int flag) 72 { 73 /* 74 * This test was moot in vfs may_open() because overlay inode does 75 * not have the S_APPEND flag, so re-check on real upper inode 76 */ 77 if (IS_APPEND(inode)) { 78 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 79 return -EPERM; 80 if (flag & O_TRUNC) 81 return -EPERM; 82 } 83 84 return 0; 85 } 86 87 static struct dentry *ovl_d_real(struct dentry *dentry, 88 const struct inode *inode, 89 unsigned int open_flags, unsigned int flags) 90 { 91 struct dentry *real; 92 int err; 93 94 if (flags & D_REAL_UPPER) 95 return ovl_dentry_upper(dentry); 96 97 if (!d_is_reg(dentry)) { 98 if (!inode || inode == d_inode(dentry)) 99 return dentry; 100 goto bug; 101 } 102 103 if (open_flags) { 104 err = ovl_open_maybe_copy_up(dentry, open_flags); 105 if (err) 106 return ERR_PTR(err); 107 } 108 109 real = ovl_dentry_upper(dentry); 110 if (real && (!inode || inode == d_inode(real))) { 111 if (!inode) { 112 err = ovl_check_append_only(d_inode(real), open_flags); 113 if (err) 114 return ERR_PTR(err); 115 } 116 return real; 117 } 118 119 real = ovl_dentry_lower(dentry); 120 if (!real) 121 goto bug; 122 123 /* Handle recursion */ 124 real = d_real(real, inode, open_flags, 0); 125 126 if (!inode || inode == d_inode(real)) 127 return real; 128 bug: 129 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 130 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 131 return dentry; 132 } 133 134 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 135 { 136 struct ovl_entry *oe = dentry->d_fsdata; 137 unsigned int i; 138 int ret = 1; 139 140 for (i = 0; i < oe->numlower; i++) { 141 struct dentry *d = oe->lowerstack[i].dentry; 142 143 if (d->d_flags & DCACHE_OP_REVALIDATE) { 144 ret = d->d_op->d_revalidate(d, flags); 145 if (ret < 0) 146 return ret; 147 if (!ret) { 148 if (!(flags & LOOKUP_RCU)) 149 d_invalidate(d); 150 return -ESTALE; 151 } 152 } 153 } 154 return 1; 155 } 156 157 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 158 { 159 struct ovl_entry *oe = dentry->d_fsdata; 160 unsigned int i; 161 int ret = 1; 162 163 for (i = 0; i < oe->numlower; i++) { 164 struct dentry *d = oe->lowerstack[i].dentry; 165 166 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 167 ret = d->d_op->d_weak_revalidate(d, flags); 168 if (ret <= 0) 169 break; 170 } 171 } 172 return ret; 173 } 174 175 static const struct dentry_operations ovl_dentry_operations = { 176 .d_release = ovl_dentry_release, 177 .d_real = ovl_d_real, 178 }; 179 180 static const struct dentry_operations ovl_reval_dentry_operations = { 181 .d_release = ovl_dentry_release, 182 .d_real = ovl_d_real, 183 .d_revalidate = ovl_dentry_revalidate, 184 .d_weak_revalidate = ovl_dentry_weak_revalidate, 185 }; 186 187 static struct kmem_cache *ovl_inode_cachep; 188 189 static struct inode *ovl_alloc_inode(struct super_block *sb) 190 { 191 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 192 193 if (!oi) 194 return NULL; 195 196 oi->cache = NULL; 197 oi->redirect = NULL; 198 oi->version = 0; 199 oi->flags = 0; 200 oi->__upperdentry = NULL; 201 oi->lower = NULL; 202 mutex_init(&oi->lock); 203 204 return &oi->vfs_inode; 205 } 206 207 static void ovl_i_callback(struct rcu_head *head) 208 { 209 struct inode *inode = container_of(head, struct inode, i_rcu); 210 211 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 212 } 213 214 static void ovl_destroy_inode(struct inode *inode) 215 { 216 struct ovl_inode *oi = OVL_I(inode); 217 218 dput(oi->__upperdentry); 219 iput(oi->lower); 220 kfree(oi->redirect); 221 ovl_dir_cache_free(inode); 222 mutex_destroy(&oi->lock); 223 224 call_rcu(&inode->i_rcu, ovl_i_callback); 225 } 226 227 static void ovl_free_fs(struct ovl_fs *ofs) 228 { 229 unsigned i; 230 231 dput(ofs->indexdir); 232 dput(ofs->workdir); 233 if (ofs->workdir_locked) 234 ovl_inuse_unlock(ofs->workbasedir); 235 dput(ofs->workbasedir); 236 if (ofs->upperdir_locked) 237 ovl_inuse_unlock(ofs->upper_mnt->mnt_root); 238 mntput(ofs->upper_mnt); 239 for (i = 0; i < ofs->numlower; i++) { 240 mntput(ofs->lower_layers[i].mnt); 241 free_anon_bdev(ofs->lower_layers[i].pseudo_dev); 242 } 243 kfree(ofs->lower_layers); 244 245 kfree(ofs->config.lowerdir); 246 kfree(ofs->config.upperdir); 247 kfree(ofs->config.workdir); 248 kfree(ofs->config.redirect_mode); 249 if (ofs->creator_cred) 250 put_cred(ofs->creator_cred); 251 kfree(ofs); 252 } 253 254 static void ovl_put_super(struct super_block *sb) 255 { 256 struct ovl_fs *ofs = sb->s_fs_info; 257 258 ovl_free_fs(ofs); 259 } 260 261 /* Sync real dirty inodes in upper filesystem (if it exists) */ 262 static int ovl_sync_fs(struct super_block *sb, int wait) 263 { 264 struct ovl_fs *ofs = sb->s_fs_info; 265 struct super_block *upper_sb; 266 int ret; 267 268 if (!ofs->upper_mnt) 269 return 0; 270 271 /* 272 * If this is a sync(2) call or an emergency sync, all the super blocks 273 * will be iterated, including upper_sb, so no need to do anything. 274 * 275 * If this is a syncfs(2) call, then we do need to call 276 * sync_filesystem() on upper_sb, but enough if we do it when being 277 * called with wait == 1. 278 */ 279 if (!wait) 280 return 0; 281 282 upper_sb = ofs->upper_mnt->mnt_sb; 283 284 down_read(&upper_sb->s_umount); 285 ret = sync_filesystem(upper_sb); 286 up_read(&upper_sb->s_umount); 287 288 return ret; 289 } 290 291 /** 292 * ovl_statfs 293 * @sb: The overlayfs super block 294 * @buf: The struct kstatfs to fill in with stats 295 * 296 * Get the filesystem statistics. As writes always target the upper layer 297 * filesystem pass the statfs to the upper filesystem (if it exists) 298 */ 299 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 300 { 301 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 302 struct dentry *root_dentry = dentry->d_sb->s_root; 303 struct path path; 304 int err; 305 306 ovl_path_real(root_dentry, &path); 307 308 err = vfs_statfs(&path, buf); 309 if (!err) { 310 buf->f_namelen = ofs->namelen; 311 buf->f_type = OVERLAYFS_SUPER_MAGIC; 312 } 313 314 return err; 315 } 316 317 /* Will this overlay be forced to mount/remount ro? */ 318 static bool ovl_force_readonly(struct ovl_fs *ofs) 319 { 320 return (!ofs->upper_mnt || !ofs->workdir); 321 } 322 323 static const char *ovl_redirect_mode_def(void) 324 { 325 return ovl_redirect_dir_def ? "on" : "off"; 326 } 327 328 /** 329 * ovl_show_options 330 * 331 * Prints the mount options for a given superblock. 332 * Returns zero; does not fail. 333 */ 334 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 335 { 336 struct super_block *sb = dentry->d_sb; 337 struct ovl_fs *ofs = sb->s_fs_info; 338 339 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 340 if (ofs->config.upperdir) { 341 seq_show_option(m, "upperdir", ofs->config.upperdir); 342 seq_show_option(m, "workdir", ofs->config.workdir); 343 } 344 if (ofs->config.default_permissions) 345 seq_puts(m, ",default_permissions"); 346 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 347 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 348 if (ofs->config.index != ovl_index_def) 349 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 350 if (ofs->config.nfs_export != ovl_nfs_export_def) 351 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 352 "on" : "off"); 353 return 0; 354 } 355 356 static int ovl_remount(struct super_block *sb, int *flags, char *data) 357 { 358 struct ovl_fs *ofs = sb->s_fs_info; 359 360 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 361 return -EROFS; 362 363 return 0; 364 } 365 366 static const struct super_operations ovl_super_operations = { 367 .alloc_inode = ovl_alloc_inode, 368 .destroy_inode = ovl_destroy_inode, 369 .drop_inode = generic_delete_inode, 370 .put_super = ovl_put_super, 371 .sync_fs = ovl_sync_fs, 372 .statfs = ovl_statfs, 373 .show_options = ovl_show_options, 374 .remount_fs = ovl_remount, 375 }; 376 377 enum { 378 OPT_LOWERDIR, 379 OPT_UPPERDIR, 380 OPT_WORKDIR, 381 OPT_DEFAULT_PERMISSIONS, 382 OPT_REDIRECT_DIR, 383 OPT_INDEX_ON, 384 OPT_INDEX_OFF, 385 OPT_NFS_EXPORT_ON, 386 OPT_NFS_EXPORT_OFF, 387 OPT_ERR, 388 }; 389 390 static const match_table_t ovl_tokens = { 391 {OPT_LOWERDIR, "lowerdir=%s"}, 392 {OPT_UPPERDIR, "upperdir=%s"}, 393 {OPT_WORKDIR, "workdir=%s"}, 394 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 395 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 396 {OPT_INDEX_ON, "index=on"}, 397 {OPT_INDEX_OFF, "index=off"}, 398 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 399 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 400 {OPT_ERR, NULL} 401 }; 402 403 static char *ovl_next_opt(char **s) 404 { 405 char *sbegin = *s; 406 char *p; 407 408 if (sbegin == NULL) 409 return NULL; 410 411 for (p = sbegin; *p; p++) { 412 if (*p == '\\') { 413 p++; 414 if (!*p) 415 break; 416 } else if (*p == ',') { 417 *p = '\0'; 418 *s = p + 1; 419 return sbegin; 420 } 421 } 422 *s = NULL; 423 return sbegin; 424 } 425 426 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 427 { 428 if (strcmp(mode, "on") == 0) { 429 config->redirect_dir = true; 430 /* 431 * Does not make sense to have redirect creation without 432 * redirect following. 433 */ 434 config->redirect_follow = true; 435 } else if (strcmp(mode, "follow") == 0) { 436 config->redirect_follow = true; 437 } else if (strcmp(mode, "off") == 0) { 438 if (ovl_redirect_always_follow) 439 config->redirect_follow = true; 440 } else if (strcmp(mode, "nofollow") != 0) { 441 pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", 442 mode); 443 return -EINVAL; 444 } 445 446 return 0; 447 } 448 449 static int ovl_parse_opt(char *opt, struct ovl_config *config) 450 { 451 char *p; 452 453 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 454 if (!config->redirect_mode) 455 return -ENOMEM; 456 457 while ((p = ovl_next_opt(&opt)) != NULL) { 458 int token; 459 substring_t args[MAX_OPT_ARGS]; 460 461 if (!*p) 462 continue; 463 464 token = match_token(p, ovl_tokens, args); 465 switch (token) { 466 case OPT_UPPERDIR: 467 kfree(config->upperdir); 468 config->upperdir = match_strdup(&args[0]); 469 if (!config->upperdir) 470 return -ENOMEM; 471 break; 472 473 case OPT_LOWERDIR: 474 kfree(config->lowerdir); 475 config->lowerdir = match_strdup(&args[0]); 476 if (!config->lowerdir) 477 return -ENOMEM; 478 break; 479 480 case OPT_WORKDIR: 481 kfree(config->workdir); 482 config->workdir = match_strdup(&args[0]); 483 if (!config->workdir) 484 return -ENOMEM; 485 break; 486 487 case OPT_DEFAULT_PERMISSIONS: 488 config->default_permissions = true; 489 break; 490 491 case OPT_REDIRECT_DIR: 492 kfree(config->redirect_mode); 493 config->redirect_mode = match_strdup(&args[0]); 494 if (!config->redirect_mode) 495 return -ENOMEM; 496 break; 497 498 case OPT_INDEX_ON: 499 config->index = true; 500 break; 501 502 case OPT_INDEX_OFF: 503 config->index = false; 504 break; 505 506 case OPT_NFS_EXPORT_ON: 507 config->nfs_export = true; 508 break; 509 510 case OPT_NFS_EXPORT_OFF: 511 config->nfs_export = false; 512 break; 513 514 default: 515 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 516 return -EINVAL; 517 } 518 } 519 520 /* Workdir is useless in non-upper mount */ 521 if (!config->upperdir && config->workdir) { 522 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 523 config->workdir); 524 kfree(config->workdir); 525 config->workdir = NULL; 526 } 527 528 return ovl_parse_redirect_mode(config, config->redirect_mode); 529 } 530 531 #define OVL_WORKDIR_NAME "work" 532 #define OVL_INDEXDIR_NAME "index" 533 534 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 535 const char *name, bool persist) 536 { 537 struct inode *dir = ofs->workbasedir->d_inode; 538 struct vfsmount *mnt = ofs->upper_mnt; 539 struct dentry *work; 540 int err; 541 bool retried = false; 542 bool locked = false; 543 544 inode_lock_nested(dir, I_MUTEX_PARENT); 545 locked = true; 546 547 retry: 548 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 549 550 if (!IS_ERR(work)) { 551 struct iattr attr = { 552 .ia_valid = ATTR_MODE, 553 .ia_mode = S_IFDIR | 0, 554 }; 555 556 if (work->d_inode) { 557 err = -EEXIST; 558 if (retried) 559 goto out_dput; 560 561 if (persist) 562 goto out_unlock; 563 564 retried = true; 565 ovl_workdir_cleanup(dir, mnt, work, 0); 566 dput(work); 567 goto retry; 568 } 569 570 err = ovl_create_real(dir, work, 571 &(struct cattr){.mode = S_IFDIR | 0}, 572 NULL, true); 573 if (err) 574 goto out_dput; 575 576 /* 577 * Try to remove POSIX ACL xattrs from workdir. We are good if: 578 * 579 * a) success (there was a POSIX ACL xattr and was removed) 580 * b) -ENODATA (there was no POSIX ACL xattr) 581 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 582 * 583 * There are various other error values that could effectively 584 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 585 * if the xattr name is too long), but the set of filesystems 586 * allowed as upper are limited to "normal" ones, where checking 587 * for the above two errors is sufficient. 588 */ 589 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 590 if (err && err != -ENODATA && err != -EOPNOTSUPP) 591 goto out_dput; 592 593 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 594 if (err && err != -ENODATA && err != -EOPNOTSUPP) 595 goto out_dput; 596 597 /* Clear any inherited mode bits */ 598 inode_lock(work->d_inode); 599 err = notify_change(work, &attr, NULL); 600 inode_unlock(work->d_inode); 601 if (err) 602 goto out_dput; 603 } else { 604 err = PTR_ERR(work); 605 goto out_err; 606 } 607 out_unlock: 608 if (locked) 609 inode_unlock(dir); 610 611 return work; 612 613 out_dput: 614 dput(work); 615 out_err: 616 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 617 ofs->config.workdir, name, -err); 618 work = NULL; 619 goto out_unlock; 620 } 621 622 static void ovl_unescape(char *s) 623 { 624 char *d = s; 625 626 for (;; s++, d++) { 627 if (*s == '\\') 628 s++; 629 *d = *s; 630 if (!*s) 631 break; 632 } 633 } 634 635 static int ovl_mount_dir_noesc(const char *name, struct path *path) 636 { 637 int err = -EINVAL; 638 639 if (!*name) { 640 pr_err("overlayfs: empty lowerdir\n"); 641 goto out; 642 } 643 err = kern_path(name, LOOKUP_FOLLOW, path); 644 if (err) { 645 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 646 goto out; 647 } 648 err = -EINVAL; 649 if (ovl_dentry_weird(path->dentry)) { 650 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 651 goto out_put; 652 } 653 if (!d_is_dir(path->dentry)) { 654 pr_err("overlayfs: '%s' not a directory\n", name); 655 goto out_put; 656 } 657 return 0; 658 659 out_put: 660 path_put_init(path); 661 out: 662 return err; 663 } 664 665 static int ovl_mount_dir(const char *name, struct path *path) 666 { 667 int err = -ENOMEM; 668 char *tmp = kstrdup(name, GFP_KERNEL); 669 670 if (tmp) { 671 ovl_unescape(tmp); 672 err = ovl_mount_dir_noesc(tmp, path); 673 674 if (!err) 675 if (ovl_dentry_remote(path->dentry)) { 676 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 677 tmp); 678 path_put_init(path); 679 err = -EINVAL; 680 } 681 kfree(tmp); 682 } 683 return err; 684 } 685 686 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 687 const char *name) 688 { 689 struct kstatfs statfs; 690 int err = vfs_statfs(path, &statfs); 691 692 if (err) 693 pr_err("overlayfs: statfs failed on '%s'\n", name); 694 else 695 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 696 697 return err; 698 } 699 700 static int ovl_lower_dir(const char *name, struct path *path, 701 struct ovl_fs *ofs, int *stack_depth, bool *remote) 702 { 703 int err; 704 705 err = ovl_mount_dir_noesc(name, path); 706 if (err) 707 goto out; 708 709 err = ovl_check_namelen(path, ofs, name); 710 if (err) 711 goto out_put; 712 713 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 714 715 if (ovl_dentry_remote(path->dentry)) 716 *remote = true; 717 718 /* 719 * The inodes index feature and NFS export need to encode and decode 720 * file handles, so they require that all layers support them. 721 */ 722 if ((ofs->config.nfs_export || 723 (ofs->config.index && ofs->config.upperdir)) && 724 !ovl_can_decode_fh(path->dentry->d_sb)) { 725 ofs->config.index = false; 726 ofs->config.nfs_export = false; 727 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 728 name); 729 } 730 731 return 0; 732 733 out_put: 734 path_put_init(path); 735 out: 736 return err; 737 } 738 739 /* Workdir should not be subdir of upperdir and vice versa */ 740 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 741 { 742 bool ok = false; 743 744 if (workdir != upperdir) { 745 ok = (lock_rename(workdir, upperdir) == NULL); 746 unlock_rename(workdir, upperdir); 747 } 748 return ok; 749 } 750 751 static unsigned int ovl_split_lowerdirs(char *str) 752 { 753 unsigned int ctr = 1; 754 char *s, *d; 755 756 for (s = d = str;; s++, d++) { 757 if (*s == '\\') { 758 s++; 759 } else if (*s == ':') { 760 *d = '\0'; 761 ctr++; 762 continue; 763 } 764 *d = *s; 765 if (!*s) 766 break; 767 } 768 return ctr; 769 } 770 771 static int __maybe_unused 772 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 773 struct dentry *dentry, struct inode *inode, 774 const char *name, void *buffer, size_t size) 775 { 776 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 777 } 778 779 static int __maybe_unused 780 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 781 struct dentry *dentry, struct inode *inode, 782 const char *name, const void *value, 783 size_t size, int flags) 784 { 785 struct dentry *workdir = ovl_workdir(dentry); 786 struct inode *realinode = ovl_inode_real(inode); 787 struct posix_acl *acl = NULL; 788 int err; 789 790 /* Check that everything is OK before copy-up */ 791 if (value) { 792 acl = posix_acl_from_xattr(&init_user_ns, value, size); 793 if (IS_ERR(acl)) 794 return PTR_ERR(acl); 795 } 796 err = -EOPNOTSUPP; 797 if (!IS_POSIXACL(d_inode(workdir))) 798 goto out_acl_release; 799 if (!realinode->i_op->set_acl) 800 goto out_acl_release; 801 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 802 err = acl ? -EACCES : 0; 803 goto out_acl_release; 804 } 805 err = -EPERM; 806 if (!inode_owner_or_capable(inode)) 807 goto out_acl_release; 808 809 posix_acl_release(acl); 810 811 /* 812 * Check if sgid bit needs to be cleared (actual setacl operation will 813 * be done with mounter's capabilities and so that won't do it for us). 814 */ 815 if (unlikely(inode->i_mode & S_ISGID) && 816 handler->flags == ACL_TYPE_ACCESS && 817 !in_group_p(inode->i_gid) && 818 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 819 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 820 821 err = ovl_setattr(dentry, &iattr); 822 if (err) 823 return err; 824 } 825 826 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 827 if (!err) 828 ovl_copyattr(ovl_inode_real(inode), inode); 829 830 return err; 831 832 out_acl_release: 833 posix_acl_release(acl); 834 return err; 835 } 836 837 static int ovl_own_xattr_get(const struct xattr_handler *handler, 838 struct dentry *dentry, struct inode *inode, 839 const char *name, void *buffer, size_t size) 840 { 841 return -EOPNOTSUPP; 842 } 843 844 static int ovl_own_xattr_set(const struct xattr_handler *handler, 845 struct dentry *dentry, struct inode *inode, 846 const char *name, const void *value, 847 size_t size, int flags) 848 { 849 return -EOPNOTSUPP; 850 } 851 852 static int ovl_other_xattr_get(const struct xattr_handler *handler, 853 struct dentry *dentry, struct inode *inode, 854 const char *name, void *buffer, size_t size) 855 { 856 return ovl_xattr_get(dentry, inode, name, buffer, size); 857 } 858 859 static int ovl_other_xattr_set(const struct xattr_handler *handler, 860 struct dentry *dentry, struct inode *inode, 861 const char *name, const void *value, 862 size_t size, int flags) 863 { 864 return ovl_xattr_set(dentry, inode, name, value, size, flags); 865 } 866 867 static const struct xattr_handler __maybe_unused 868 ovl_posix_acl_access_xattr_handler = { 869 .name = XATTR_NAME_POSIX_ACL_ACCESS, 870 .flags = ACL_TYPE_ACCESS, 871 .get = ovl_posix_acl_xattr_get, 872 .set = ovl_posix_acl_xattr_set, 873 }; 874 875 static const struct xattr_handler __maybe_unused 876 ovl_posix_acl_default_xattr_handler = { 877 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 878 .flags = ACL_TYPE_DEFAULT, 879 .get = ovl_posix_acl_xattr_get, 880 .set = ovl_posix_acl_xattr_set, 881 }; 882 883 static const struct xattr_handler ovl_own_xattr_handler = { 884 .prefix = OVL_XATTR_PREFIX, 885 .get = ovl_own_xattr_get, 886 .set = ovl_own_xattr_set, 887 }; 888 889 static const struct xattr_handler ovl_other_xattr_handler = { 890 .prefix = "", /* catch all */ 891 .get = ovl_other_xattr_get, 892 .set = ovl_other_xattr_set, 893 }; 894 895 static const struct xattr_handler *ovl_xattr_handlers[] = { 896 #ifdef CONFIG_FS_POSIX_ACL 897 &ovl_posix_acl_access_xattr_handler, 898 &ovl_posix_acl_default_xattr_handler, 899 #endif 900 &ovl_own_xattr_handler, 901 &ovl_other_xattr_handler, 902 NULL 903 }; 904 905 static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) 906 { 907 struct vfsmount *upper_mnt; 908 int err; 909 910 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 911 if (err) 912 goto out; 913 914 /* Upper fs should not be r/o */ 915 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 916 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 917 err = -EINVAL; 918 goto out; 919 } 920 921 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 922 if (err) 923 goto out; 924 925 err = -EBUSY; 926 if (ovl_inuse_trylock(upperpath->dentry)) { 927 ofs->upperdir_locked = true; 928 } else if (ofs->config.index) { 929 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); 930 goto out; 931 } else { 932 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 933 } 934 935 upper_mnt = clone_private_mount(upperpath); 936 err = PTR_ERR(upper_mnt); 937 if (IS_ERR(upper_mnt)) { 938 pr_err("overlayfs: failed to clone upperpath\n"); 939 goto out; 940 } 941 942 /* Don't inherit atime flags */ 943 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 944 ofs->upper_mnt = upper_mnt; 945 err = 0; 946 out: 947 return err; 948 } 949 950 static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) 951 { 952 struct vfsmount *mnt = ofs->upper_mnt; 953 struct dentry *temp; 954 int err; 955 956 err = mnt_want_write(mnt); 957 if (err) 958 return err; 959 960 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 961 if (!ofs->workdir) 962 goto out; 963 964 /* 965 * Upper should support d_type, else whiteouts are visible. Given 966 * workdir and upper are on same fs, we can do iterate_dir() on 967 * workdir. This check requires successful creation of workdir in 968 * previous step. 969 */ 970 err = ovl_check_d_type_supported(workpath); 971 if (err < 0) 972 goto out; 973 974 /* 975 * We allowed this configuration and don't want to break users over 976 * kernel upgrade. So warn instead of erroring out. 977 */ 978 if (!err) 979 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 980 981 /* Check if upper/work fs supports O_TMPFILE */ 982 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 983 ofs->tmpfile = !IS_ERR(temp); 984 if (ofs->tmpfile) 985 dput(temp); 986 else 987 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 988 989 /* 990 * Check if upper/work fs supports trusted.overlay.* xattr 991 */ 992 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 993 if (err) { 994 ofs->noxattr = true; 995 ofs->config.index = false; 996 pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n"); 997 err = 0; 998 } else { 999 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1000 } 1001 1002 /* Check if upper/work fs supports file handles */ 1003 if (ofs->config.index && 1004 !ovl_can_decode_fh(ofs->workdir->d_sb)) { 1005 ofs->config.index = false; 1006 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1007 } 1008 1009 /* NFS export of r/w mount depends on index */ 1010 if (ofs->config.nfs_export && !ofs->config.index) { 1011 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1012 ofs->config.nfs_export = false; 1013 } 1014 1015 out: 1016 mnt_drop_write(mnt); 1017 return err; 1018 } 1019 1020 static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) 1021 { 1022 int err; 1023 struct path workpath = { }; 1024 1025 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1026 if (err) 1027 goto out; 1028 1029 err = -EINVAL; 1030 if (upperpath->mnt != workpath.mnt) { 1031 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 1032 goto out; 1033 } 1034 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1035 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 1036 goto out; 1037 } 1038 1039 err = -EBUSY; 1040 if (ovl_inuse_trylock(workpath.dentry)) { 1041 ofs->workdir_locked = true; 1042 } else if (ofs->config.index) { 1043 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); 1044 goto out; 1045 } else { 1046 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 1047 } 1048 1049 ofs->workbasedir = dget(workpath.dentry); 1050 err = ovl_make_workdir(ofs, &workpath); 1051 if (err) 1052 goto out; 1053 1054 err = 0; 1055 out: 1056 path_put(&workpath); 1057 1058 return err; 1059 } 1060 1061 static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, 1062 struct path *upperpath) 1063 { 1064 struct vfsmount *mnt = ofs->upper_mnt; 1065 int err; 1066 1067 err = mnt_want_write(mnt); 1068 if (err) 1069 return err; 1070 1071 /* Verify lower root is upper root origin */ 1072 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1073 true); 1074 if (err) { 1075 pr_err("overlayfs: failed to verify upper root origin\n"); 1076 goto out; 1077 } 1078 1079 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1080 if (ofs->indexdir) { 1081 /* 1082 * Verify upper root is exclusively associated with index dir. 1083 * Older kernels stored upper fh in "trusted.overlay.origin" 1084 * xattr. If that xattr exists, verify that it is a match to 1085 * upper dir file handle. In any case, verify or set xattr 1086 * "trusted.overlay.upper" to indicate that index may have 1087 * directory entries. 1088 */ 1089 if (ovl_check_origin_xattr(ofs->indexdir)) { 1090 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, 1091 upperpath->dentry, true, false); 1092 if (err) 1093 pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); 1094 } 1095 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); 1096 if (err) 1097 pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); 1098 1099 /* Cleanup bad/stale/orphan index entries */ 1100 if (!err) 1101 err = ovl_indexdir_cleanup(ofs); 1102 } 1103 if (err || !ofs->indexdir) 1104 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1105 1106 out: 1107 mnt_drop_write(mnt); 1108 return err; 1109 } 1110 1111 static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, 1112 unsigned int numlower) 1113 { 1114 int err; 1115 unsigned int i; 1116 1117 err = -ENOMEM; 1118 ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), 1119 GFP_KERNEL); 1120 if (ofs->lower_layers == NULL) 1121 goto out; 1122 for (i = 0; i < numlower; i++) { 1123 struct vfsmount *mnt; 1124 dev_t dev; 1125 1126 err = get_anon_bdev(&dev); 1127 if (err) { 1128 pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); 1129 goto out; 1130 } 1131 1132 mnt = clone_private_mount(&stack[i]); 1133 err = PTR_ERR(mnt); 1134 if (IS_ERR(mnt)) { 1135 pr_err("overlayfs: failed to clone lowerpath\n"); 1136 free_anon_bdev(dev); 1137 goto out; 1138 } 1139 /* 1140 * Make lower layers R/O. That way fchmod/fchown on lower file 1141 * will fail instead of modifying lower fs. 1142 */ 1143 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1144 1145 ofs->lower_layers[ofs->numlower].mnt = mnt; 1146 ofs->lower_layers[ofs->numlower].pseudo_dev = dev; 1147 ofs->lower_layers[ofs->numlower].idx = i + 1; 1148 ofs->numlower++; 1149 1150 /* Check if all lower layers are on same sb */ 1151 if (i == 0) 1152 ofs->same_sb = mnt->mnt_sb; 1153 else if (ofs->same_sb != mnt->mnt_sb) 1154 ofs->same_sb = NULL; 1155 } 1156 err = 0; 1157 out: 1158 return err; 1159 } 1160 1161 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1162 struct ovl_fs *ofs) 1163 { 1164 int err; 1165 char *lowertmp, *lower; 1166 struct path *stack = NULL; 1167 unsigned int stacklen, numlower = 0, i; 1168 bool remote = false; 1169 struct ovl_entry *oe; 1170 1171 err = -ENOMEM; 1172 lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1173 if (!lowertmp) 1174 goto out_err; 1175 1176 err = -EINVAL; 1177 stacklen = ovl_split_lowerdirs(lowertmp); 1178 if (stacklen > OVL_MAX_STACK) { 1179 pr_err("overlayfs: too many lower directories, limit is %d\n", 1180 OVL_MAX_STACK); 1181 goto out_err; 1182 } else if (!ofs->config.upperdir && stacklen == 1) { 1183 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1184 goto out_err; 1185 } else if (!ofs->config.upperdir && ofs->config.nfs_export && 1186 ofs->config.redirect_follow) { 1187 pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 1188 ofs->config.nfs_export = false; 1189 } 1190 1191 err = -ENOMEM; 1192 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1193 if (!stack) 1194 goto out_err; 1195 1196 err = -EINVAL; 1197 lower = lowertmp; 1198 for (numlower = 0; numlower < stacklen; numlower++) { 1199 err = ovl_lower_dir(lower, &stack[numlower], ofs, 1200 &sb->s_stack_depth, &remote); 1201 if (err) 1202 goto out_err; 1203 1204 lower = strchr(lower, '\0') + 1; 1205 } 1206 1207 err = -EINVAL; 1208 sb->s_stack_depth++; 1209 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1210 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 1211 goto out_err; 1212 } 1213 1214 err = ovl_get_lower_layers(ofs, stack, numlower); 1215 if (err) 1216 goto out_err; 1217 1218 err = -ENOMEM; 1219 oe = ovl_alloc_entry(numlower); 1220 if (!oe) 1221 goto out_err; 1222 1223 for (i = 0; i < numlower; i++) { 1224 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1225 oe->lowerstack[i].layer = &ofs->lower_layers[i]; 1226 } 1227 1228 if (remote) 1229 sb->s_d_op = &ovl_reval_dentry_operations; 1230 else 1231 sb->s_d_op = &ovl_dentry_operations; 1232 1233 out: 1234 for (i = 0; i < numlower; i++) 1235 path_put(&stack[i]); 1236 kfree(stack); 1237 kfree(lowertmp); 1238 1239 return oe; 1240 1241 out_err: 1242 oe = ERR_PTR(err); 1243 goto out; 1244 } 1245 1246 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1247 { 1248 struct path upperpath = { }; 1249 struct dentry *root_dentry; 1250 struct ovl_entry *oe; 1251 struct ovl_fs *ofs; 1252 struct cred *cred; 1253 int err; 1254 1255 err = -ENOMEM; 1256 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1257 if (!ofs) 1258 goto out; 1259 1260 ofs->creator_cred = cred = prepare_creds(); 1261 if (!cred) 1262 goto out_err; 1263 1264 ofs->config.index = ovl_index_def; 1265 ofs->config.nfs_export = ovl_nfs_export_def; 1266 err = ovl_parse_opt((char *) data, &ofs->config); 1267 if (err) 1268 goto out_err; 1269 1270 err = -EINVAL; 1271 if (!ofs->config.lowerdir) { 1272 if (!silent) 1273 pr_err("overlayfs: missing 'lowerdir'\n"); 1274 goto out_err; 1275 } 1276 1277 sb->s_stack_depth = 0; 1278 sb->s_maxbytes = MAX_LFS_FILESIZE; 1279 if (ofs->config.upperdir) { 1280 if (!ofs->config.workdir) { 1281 pr_err("overlayfs: missing 'workdir'\n"); 1282 goto out_err; 1283 } 1284 1285 err = ovl_get_upper(ofs, &upperpath); 1286 if (err) 1287 goto out_err; 1288 1289 err = ovl_get_workdir(ofs, &upperpath); 1290 if (err) 1291 goto out_err; 1292 1293 if (!ofs->workdir) 1294 sb->s_flags |= SB_RDONLY; 1295 1296 sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; 1297 sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; 1298 1299 } 1300 oe = ovl_get_lowerstack(sb, ofs); 1301 err = PTR_ERR(oe); 1302 if (IS_ERR(oe)) 1303 goto out_err; 1304 1305 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1306 if (!ofs->upper_mnt) 1307 sb->s_flags |= SB_RDONLY; 1308 else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) 1309 ofs->same_sb = NULL; 1310 1311 if (!(ovl_force_readonly(ofs)) && ofs->config.index) { 1312 err = ovl_get_indexdir(ofs, oe, &upperpath); 1313 if (err) 1314 goto out_free_oe; 1315 1316 /* Force r/o mount with no index dir */ 1317 if (!ofs->indexdir) { 1318 dput(ofs->workdir); 1319 ofs->workdir = NULL; 1320 sb->s_flags |= SB_RDONLY; 1321 } 1322 1323 } 1324 1325 /* Show index=off in /proc/mounts for forced r/o mount */ 1326 if (!ofs->indexdir) { 1327 ofs->config.index = false; 1328 if (ofs->upper_mnt && ofs->config.nfs_export) { 1329 pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); 1330 ofs->config.nfs_export = false; 1331 } 1332 } 1333 1334 if (ofs->config.nfs_export) 1335 sb->s_export_op = &ovl_export_operations; 1336 1337 /* Never override disk quota limits or use reserved space */ 1338 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1339 1340 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1341 sb->s_op = &ovl_super_operations; 1342 sb->s_xattr = ovl_xattr_handlers; 1343 sb->s_fs_info = ofs; 1344 sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK; 1345 1346 err = -ENOMEM; 1347 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1348 if (!root_dentry) 1349 goto out_free_oe; 1350 1351 root_dentry->d_fsdata = oe; 1352 1353 mntput(upperpath.mnt); 1354 if (upperpath.dentry) { 1355 ovl_dentry_set_upper_alias(root_dentry); 1356 if (ovl_is_impuredir(upperpath.dentry)) 1357 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1358 } 1359 1360 /* Root is always merge -> can have whiteouts */ 1361 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1362 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1363 ovl_dentry_lower(root_dentry)); 1364 1365 sb->s_root = root_dentry; 1366 1367 return 0; 1368 1369 out_free_oe: 1370 ovl_entry_stack_free(oe); 1371 kfree(oe); 1372 out_err: 1373 path_put(&upperpath); 1374 ovl_free_fs(ofs); 1375 out: 1376 return err; 1377 } 1378 1379 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1380 const char *dev_name, void *raw_data) 1381 { 1382 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1383 } 1384 1385 static struct file_system_type ovl_fs_type = { 1386 .owner = THIS_MODULE, 1387 .name = "overlay", 1388 .mount = ovl_mount, 1389 .kill_sb = kill_anon_super, 1390 }; 1391 MODULE_ALIAS_FS("overlay"); 1392 1393 static void ovl_inode_init_once(void *foo) 1394 { 1395 struct ovl_inode *oi = foo; 1396 1397 inode_init_once(&oi->vfs_inode); 1398 } 1399 1400 static int __init ovl_init(void) 1401 { 1402 int err; 1403 1404 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1405 sizeof(struct ovl_inode), 0, 1406 (SLAB_RECLAIM_ACCOUNT| 1407 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1408 ovl_inode_init_once); 1409 if (ovl_inode_cachep == NULL) 1410 return -ENOMEM; 1411 1412 err = register_filesystem(&ovl_fs_type); 1413 if (err) 1414 kmem_cache_destroy(ovl_inode_cachep); 1415 1416 return err; 1417 } 1418 1419 static void __exit ovl_exit(void) 1420 { 1421 unregister_filesystem(&ovl_fs_type); 1422 1423 /* 1424 * Make sure all delayed rcu free inodes are flushed before we 1425 * destroy cache. 1426 */ 1427 rcu_barrier(); 1428 kmem_cache_destroy(ovl_inode_cachep); 1429 1430 } 1431 1432 module_init(ovl_init); 1433 module_exit(ovl_exit); 1434