1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 #include "ovl_entry.h" 22 23 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24 MODULE_DESCRIPTION("Overlay filesystem"); 25 MODULE_LICENSE("GPL"); 26 27 28 struct ovl_dir_cache; 29 30 #define OVL_MAX_STACK 500 31 32 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 33 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 34 MODULE_PARM_DESC(ovl_redirect_dir_def, 35 "Default to on or off for the redirect_dir feature"); 36 37 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 38 module_param_named(index, ovl_index_def, bool, 0644); 39 MODULE_PARM_DESC(ovl_index_def, 40 "Default to on or off for the inodes index feature"); 41 42 static void ovl_dentry_release(struct dentry *dentry) 43 { 44 struct ovl_entry *oe = dentry->d_fsdata; 45 46 if (oe) { 47 unsigned int i; 48 49 for (i = 0; i < oe->numlower; i++) 50 dput(oe->lowerstack[i].dentry); 51 kfree_rcu(oe, rcu); 52 } 53 } 54 55 static int ovl_check_append_only(struct inode *inode, int flag) 56 { 57 /* 58 * This test was moot in vfs may_open() because overlay inode does 59 * not have the S_APPEND flag, so re-check on real upper inode 60 */ 61 if (IS_APPEND(inode)) { 62 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 63 return -EPERM; 64 if (flag & O_TRUNC) 65 return -EPERM; 66 } 67 68 return 0; 69 } 70 71 static struct dentry *ovl_d_real(struct dentry *dentry, 72 const struct inode *inode, 73 unsigned int open_flags, unsigned int flags) 74 { 75 struct dentry *real; 76 int err; 77 78 if (flags & D_REAL_UPPER) 79 return ovl_dentry_upper(dentry); 80 81 if (!d_is_reg(dentry)) { 82 if (!inode || inode == d_inode(dentry)) 83 return dentry; 84 goto bug; 85 } 86 87 if (open_flags) { 88 err = ovl_open_maybe_copy_up(dentry, open_flags); 89 if (err) 90 return ERR_PTR(err); 91 } 92 93 real = ovl_dentry_upper(dentry); 94 if (real && (!inode || inode == d_inode(real))) { 95 if (!inode) { 96 err = ovl_check_append_only(d_inode(real), open_flags); 97 if (err) 98 return ERR_PTR(err); 99 } 100 return real; 101 } 102 103 real = ovl_dentry_lower(dentry); 104 if (!real) 105 goto bug; 106 107 /* Handle recursion */ 108 real = d_real(real, inode, open_flags, 0); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112 bug: 113 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 114 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 115 return dentry; 116 } 117 118 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 119 { 120 struct ovl_entry *oe = dentry->d_fsdata; 121 unsigned int i; 122 int ret = 1; 123 124 for (i = 0; i < oe->numlower; i++) { 125 struct dentry *d = oe->lowerstack[i].dentry; 126 127 if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (ret < 0) 130 return ret; 131 if (!ret) { 132 if (!(flags & LOOKUP_RCU)) 133 d_invalidate(d); 134 return -ESTALE; 135 } 136 } 137 } 138 return 1; 139 } 140 141 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 142 { 143 struct ovl_entry *oe = dentry->d_fsdata; 144 unsigned int i; 145 int ret = 1; 146 147 for (i = 0; i < oe->numlower; i++) { 148 struct dentry *d = oe->lowerstack[i].dentry; 149 150 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 151 ret = d->d_op->d_weak_revalidate(d, flags); 152 if (ret <= 0) 153 break; 154 } 155 } 156 return ret; 157 } 158 159 static const struct dentry_operations ovl_dentry_operations = { 160 .d_release = ovl_dentry_release, 161 .d_real = ovl_d_real, 162 }; 163 164 static const struct dentry_operations ovl_reval_dentry_operations = { 165 .d_release = ovl_dentry_release, 166 .d_real = ovl_d_real, 167 .d_revalidate = ovl_dentry_revalidate, 168 .d_weak_revalidate = ovl_dentry_weak_revalidate, 169 }; 170 171 static struct kmem_cache *ovl_inode_cachep; 172 173 static struct inode *ovl_alloc_inode(struct super_block *sb) 174 { 175 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 176 177 oi->cache = NULL; 178 oi->redirect = NULL; 179 oi->version = 0; 180 oi->flags = 0; 181 oi->__upperdentry = NULL; 182 oi->lower = NULL; 183 mutex_init(&oi->lock); 184 185 return &oi->vfs_inode; 186 } 187 188 static void ovl_i_callback(struct rcu_head *head) 189 { 190 struct inode *inode = container_of(head, struct inode, i_rcu); 191 192 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 193 } 194 195 static void ovl_destroy_inode(struct inode *inode) 196 { 197 struct ovl_inode *oi = OVL_I(inode); 198 199 dput(oi->__upperdentry); 200 kfree(oi->redirect); 201 ovl_dir_cache_free(inode); 202 mutex_destroy(&oi->lock); 203 204 call_rcu(&inode->i_rcu, ovl_i_callback); 205 } 206 207 static void ovl_put_super(struct super_block *sb) 208 { 209 struct ovl_fs *ufs = sb->s_fs_info; 210 unsigned i; 211 212 dput(ufs->indexdir); 213 dput(ufs->workdir); 214 if (ufs->workdir_locked) 215 ovl_inuse_unlock(ufs->workbasedir); 216 dput(ufs->workbasedir); 217 if (ufs->upper_mnt && ufs->upperdir_locked) 218 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 219 mntput(ufs->upper_mnt); 220 for (i = 0; i < ufs->numlower; i++) 221 mntput(ufs->lower_mnt[i]); 222 kfree(ufs->lower_mnt); 223 224 kfree(ufs->config.lowerdir); 225 kfree(ufs->config.upperdir); 226 kfree(ufs->config.workdir); 227 put_cred(ufs->creator_cred); 228 kfree(ufs); 229 } 230 231 static int ovl_sync_fs(struct super_block *sb, int wait) 232 { 233 struct ovl_fs *ufs = sb->s_fs_info; 234 struct super_block *upper_sb; 235 int ret; 236 237 if (!ufs->upper_mnt) 238 return 0; 239 upper_sb = ufs->upper_mnt->mnt_sb; 240 if (!upper_sb->s_op->sync_fs) 241 return 0; 242 243 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 244 down_read(&upper_sb->s_umount); 245 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 246 up_read(&upper_sb->s_umount); 247 return ret; 248 } 249 250 /** 251 * ovl_statfs 252 * @sb: The overlayfs super block 253 * @buf: The struct kstatfs to fill in with stats 254 * 255 * Get the filesystem statistics. As writes always target the upper layer 256 * filesystem pass the statfs to the upper filesystem (if it exists) 257 */ 258 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 259 { 260 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 261 struct dentry *root_dentry = dentry->d_sb->s_root; 262 struct path path; 263 int err; 264 265 ovl_path_real(root_dentry, &path); 266 267 err = vfs_statfs(&path, buf); 268 if (!err) { 269 buf->f_namelen = ofs->namelen; 270 buf->f_type = OVERLAYFS_SUPER_MAGIC; 271 } 272 273 return err; 274 } 275 276 /* Will this overlay be forced to mount/remount ro? */ 277 static bool ovl_force_readonly(struct ovl_fs *ufs) 278 { 279 return (!ufs->upper_mnt || !ufs->workdir); 280 } 281 282 /** 283 * ovl_show_options 284 * 285 * Prints the mount options for a given superblock. 286 * Returns zero; does not fail. 287 */ 288 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 289 { 290 struct super_block *sb = dentry->d_sb; 291 struct ovl_fs *ufs = sb->s_fs_info; 292 293 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 294 if (ufs->config.upperdir) { 295 seq_show_option(m, "upperdir", ufs->config.upperdir); 296 seq_show_option(m, "workdir", ufs->config.workdir); 297 } 298 if (ufs->config.default_permissions) 299 seq_puts(m, ",default_permissions"); 300 if (ufs->config.redirect_dir != ovl_redirect_dir_def) 301 seq_printf(m, ",redirect_dir=%s", 302 ufs->config.redirect_dir ? "on" : "off"); 303 if (ufs->config.index != ovl_index_def) 304 seq_printf(m, ",index=%s", 305 ufs->config.index ? "on" : "off"); 306 return 0; 307 } 308 309 static int ovl_remount(struct super_block *sb, int *flags, char *data) 310 { 311 struct ovl_fs *ufs = sb->s_fs_info; 312 313 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) 314 return -EROFS; 315 316 return 0; 317 } 318 319 static const struct super_operations ovl_super_operations = { 320 .alloc_inode = ovl_alloc_inode, 321 .destroy_inode = ovl_destroy_inode, 322 .drop_inode = generic_delete_inode, 323 .put_super = ovl_put_super, 324 .sync_fs = ovl_sync_fs, 325 .statfs = ovl_statfs, 326 .show_options = ovl_show_options, 327 .remount_fs = ovl_remount, 328 }; 329 330 enum { 331 OPT_LOWERDIR, 332 OPT_UPPERDIR, 333 OPT_WORKDIR, 334 OPT_DEFAULT_PERMISSIONS, 335 OPT_REDIRECT_DIR_ON, 336 OPT_REDIRECT_DIR_OFF, 337 OPT_INDEX_ON, 338 OPT_INDEX_OFF, 339 OPT_ERR, 340 }; 341 342 static const match_table_t ovl_tokens = { 343 {OPT_LOWERDIR, "lowerdir=%s"}, 344 {OPT_UPPERDIR, "upperdir=%s"}, 345 {OPT_WORKDIR, "workdir=%s"}, 346 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 347 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 348 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 349 {OPT_INDEX_ON, "index=on"}, 350 {OPT_INDEX_OFF, "index=off"}, 351 {OPT_ERR, NULL} 352 }; 353 354 static char *ovl_next_opt(char **s) 355 { 356 char *sbegin = *s; 357 char *p; 358 359 if (sbegin == NULL) 360 return NULL; 361 362 for (p = sbegin; *p; p++) { 363 if (*p == '\\') { 364 p++; 365 if (!*p) 366 break; 367 } else if (*p == ',') { 368 *p = '\0'; 369 *s = p + 1; 370 return sbegin; 371 } 372 } 373 *s = NULL; 374 return sbegin; 375 } 376 377 static int ovl_parse_opt(char *opt, struct ovl_config *config) 378 { 379 char *p; 380 381 while ((p = ovl_next_opt(&opt)) != NULL) { 382 int token; 383 substring_t args[MAX_OPT_ARGS]; 384 385 if (!*p) 386 continue; 387 388 token = match_token(p, ovl_tokens, args); 389 switch (token) { 390 case OPT_UPPERDIR: 391 kfree(config->upperdir); 392 config->upperdir = match_strdup(&args[0]); 393 if (!config->upperdir) 394 return -ENOMEM; 395 break; 396 397 case OPT_LOWERDIR: 398 kfree(config->lowerdir); 399 config->lowerdir = match_strdup(&args[0]); 400 if (!config->lowerdir) 401 return -ENOMEM; 402 break; 403 404 case OPT_WORKDIR: 405 kfree(config->workdir); 406 config->workdir = match_strdup(&args[0]); 407 if (!config->workdir) 408 return -ENOMEM; 409 break; 410 411 case OPT_DEFAULT_PERMISSIONS: 412 config->default_permissions = true; 413 break; 414 415 case OPT_REDIRECT_DIR_ON: 416 config->redirect_dir = true; 417 break; 418 419 case OPT_REDIRECT_DIR_OFF: 420 config->redirect_dir = false; 421 break; 422 423 case OPT_INDEX_ON: 424 config->index = true; 425 break; 426 427 case OPT_INDEX_OFF: 428 config->index = false; 429 break; 430 431 default: 432 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 433 return -EINVAL; 434 } 435 } 436 437 /* Workdir is useless in non-upper mount */ 438 if (!config->upperdir && config->workdir) { 439 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 440 config->workdir); 441 kfree(config->workdir); 442 config->workdir = NULL; 443 } 444 445 return 0; 446 } 447 448 #define OVL_WORKDIR_NAME "work" 449 #define OVL_INDEXDIR_NAME "index" 450 451 static struct dentry *ovl_workdir_create(struct super_block *sb, 452 struct ovl_fs *ufs, 453 struct dentry *dentry, 454 const char *name, bool persist) 455 { 456 struct inode *dir = dentry->d_inode; 457 struct vfsmount *mnt = ufs->upper_mnt; 458 struct dentry *work; 459 int err; 460 bool retried = false; 461 bool locked = false; 462 463 err = mnt_want_write(mnt); 464 if (err) 465 goto out_err; 466 467 inode_lock_nested(dir, I_MUTEX_PARENT); 468 locked = true; 469 470 retry: 471 work = lookup_one_len(name, dentry, strlen(name)); 472 473 if (!IS_ERR(work)) { 474 struct iattr attr = { 475 .ia_valid = ATTR_MODE, 476 .ia_mode = S_IFDIR | 0, 477 }; 478 479 if (work->d_inode) { 480 err = -EEXIST; 481 if (retried) 482 goto out_dput; 483 484 if (persist) 485 goto out_unlock; 486 487 retried = true; 488 ovl_workdir_cleanup(dir, mnt, work, 0); 489 dput(work); 490 goto retry; 491 } 492 493 err = ovl_create_real(dir, work, 494 &(struct cattr){.mode = S_IFDIR | 0}, 495 NULL, true); 496 if (err) 497 goto out_dput; 498 499 /* 500 * Try to remove POSIX ACL xattrs from workdir. We are good if: 501 * 502 * a) success (there was a POSIX ACL xattr and was removed) 503 * b) -ENODATA (there was no POSIX ACL xattr) 504 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 505 * 506 * There are various other error values that could effectively 507 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 508 * if the xattr name is too long), but the set of filesystems 509 * allowed as upper are limited to "normal" ones, where checking 510 * for the above two errors is sufficient. 511 */ 512 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 513 if (err && err != -ENODATA && err != -EOPNOTSUPP) 514 goto out_dput; 515 516 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 517 if (err && err != -ENODATA && err != -EOPNOTSUPP) 518 goto out_dput; 519 520 /* Clear any inherited mode bits */ 521 inode_lock(work->d_inode); 522 err = notify_change(work, &attr, NULL); 523 inode_unlock(work->d_inode); 524 if (err) 525 goto out_dput; 526 } else { 527 err = PTR_ERR(work); 528 goto out_err; 529 } 530 out_unlock: 531 mnt_drop_write(mnt); 532 if (locked) 533 inode_unlock(dir); 534 535 return work; 536 537 out_dput: 538 dput(work); 539 out_err: 540 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 541 ufs->config.workdir, name, -err); 542 sb->s_flags |= MS_RDONLY; 543 work = NULL; 544 goto out_unlock; 545 } 546 547 static void ovl_unescape(char *s) 548 { 549 char *d = s; 550 551 for (;; s++, d++) { 552 if (*s == '\\') 553 s++; 554 *d = *s; 555 if (!*s) 556 break; 557 } 558 } 559 560 static int ovl_mount_dir_noesc(const char *name, struct path *path) 561 { 562 int err = -EINVAL; 563 564 if (!*name) { 565 pr_err("overlayfs: empty lowerdir\n"); 566 goto out; 567 } 568 err = kern_path(name, LOOKUP_FOLLOW, path); 569 if (err) { 570 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 571 goto out; 572 } 573 err = -EINVAL; 574 if (ovl_dentry_weird(path->dentry)) { 575 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 576 goto out_put; 577 } 578 if (!d_is_dir(path->dentry)) { 579 pr_err("overlayfs: '%s' not a directory\n", name); 580 goto out_put; 581 } 582 return 0; 583 584 out_put: 585 path_put(path); 586 out: 587 return err; 588 } 589 590 static int ovl_mount_dir(const char *name, struct path *path) 591 { 592 int err = -ENOMEM; 593 char *tmp = kstrdup(name, GFP_KERNEL); 594 595 if (tmp) { 596 ovl_unescape(tmp); 597 err = ovl_mount_dir_noesc(tmp, path); 598 599 if (!err) 600 if (ovl_dentry_remote(path->dentry)) { 601 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 602 tmp); 603 path_put(path); 604 err = -EINVAL; 605 } 606 kfree(tmp); 607 } 608 return err; 609 } 610 611 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 612 const char *name) 613 { 614 struct kstatfs statfs; 615 int err = vfs_statfs(path, &statfs); 616 617 if (err) 618 pr_err("overlayfs: statfs failed on '%s'\n", name); 619 else 620 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 621 622 return err; 623 } 624 625 static int ovl_lower_dir(const char *name, struct path *path, 626 struct ovl_fs *ofs, int *stack_depth, bool *remote) 627 { 628 int err; 629 630 err = ovl_mount_dir_noesc(name, path); 631 if (err) 632 goto out; 633 634 err = ovl_check_namelen(path, ofs, name); 635 if (err) 636 goto out_put; 637 638 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 639 640 if (ovl_dentry_remote(path->dentry)) 641 *remote = true; 642 643 /* 644 * The inodes index feature needs to encode and decode file 645 * handles, so it requires that all layers support them. 646 */ 647 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 648 ofs->config.index = false; 649 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 650 } 651 652 return 0; 653 654 out_put: 655 path_put(path); 656 out: 657 return err; 658 } 659 660 /* Workdir should not be subdir of upperdir and vice versa */ 661 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 662 { 663 bool ok = false; 664 665 if (workdir != upperdir) { 666 ok = (lock_rename(workdir, upperdir) == NULL); 667 unlock_rename(workdir, upperdir); 668 } 669 return ok; 670 } 671 672 static unsigned int ovl_split_lowerdirs(char *str) 673 { 674 unsigned int ctr = 1; 675 char *s, *d; 676 677 for (s = d = str;; s++, d++) { 678 if (*s == '\\') { 679 s++; 680 } else if (*s == ':') { 681 *d = '\0'; 682 ctr++; 683 continue; 684 } 685 *d = *s; 686 if (!*s) 687 break; 688 } 689 return ctr; 690 } 691 692 static int __maybe_unused 693 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 694 struct dentry *dentry, struct inode *inode, 695 const char *name, void *buffer, size_t size) 696 { 697 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 698 } 699 700 static int __maybe_unused 701 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 702 struct dentry *dentry, struct inode *inode, 703 const char *name, const void *value, 704 size_t size, int flags) 705 { 706 struct dentry *workdir = ovl_workdir(dentry); 707 struct inode *realinode = ovl_inode_real(inode); 708 struct posix_acl *acl = NULL; 709 int err; 710 711 /* Check that everything is OK before copy-up */ 712 if (value) { 713 acl = posix_acl_from_xattr(&init_user_ns, value, size); 714 if (IS_ERR(acl)) 715 return PTR_ERR(acl); 716 } 717 err = -EOPNOTSUPP; 718 if (!IS_POSIXACL(d_inode(workdir))) 719 goto out_acl_release; 720 if (!realinode->i_op->set_acl) 721 goto out_acl_release; 722 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 723 err = acl ? -EACCES : 0; 724 goto out_acl_release; 725 } 726 err = -EPERM; 727 if (!inode_owner_or_capable(inode)) 728 goto out_acl_release; 729 730 posix_acl_release(acl); 731 732 /* 733 * Check if sgid bit needs to be cleared (actual setacl operation will 734 * be done with mounter's capabilities and so that won't do it for us). 735 */ 736 if (unlikely(inode->i_mode & S_ISGID) && 737 handler->flags == ACL_TYPE_ACCESS && 738 !in_group_p(inode->i_gid) && 739 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 740 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 741 742 err = ovl_setattr(dentry, &iattr); 743 if (err) 744 return err; 745 } 746 747 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 748 if (!err) 749 ovl_copyattr(ovl_inode_real(inode), inode); 750 751 return err; 752 753 out_acl_release: 754 posix_acl_release(acl); 755 return err; 756 } 757 758 static int ovl_own_xattr_get(const struct xattr_handler *handler, 759 struct dentry *dentry, struct inode *inode, 760 const char *name, void *buffer, size_t size) 761 { 762 return -EOPNOTSUPP; 763 } 764 765 static int ovl_own_xattr_set(const struct xattr_handler *handler, 766 struct dentry *dentry, struct inode *inode, 767 const char *name, const void *value, 768 size_t size, int flags) 769 { 770 return -EOPNOTSUPP; 771 } 772 773 static int ovl_other_xattr_get(const struct xattr_handler *handler, 774 struct dentry *dentry, struct inode *inode, 775 const char *name, void *buffer, size_t size) 776 { 777 return ovl_xattr_get(dentry, inode, name, buffer, size); 778 } 779 780 static int ovl_other_xattr_set(const struct xattr_handler *handler, 781 struct dentry *dentry, struct inode *inode, 782 const char *name, const void *value, 783 size_t size, int flags) 784 { 785 return ovl_xattr_set(dentry, inode, name, value, size, flags); 786 } 787 788 static const struct xattr_handler __maybe_unused 789 ovl_posix_acl_access_xattr_handler = { 790 .name = XATTR_NAME_POSIX_ACL_ACCESS, 791 .flags = ACL_TYPE_ACCESS, 792 .get = ovl_posix_acl_xattr_get, 793 .set = ovl_posix_acl_xattr_set, 794 }; 795 796 static const struct xattr_handler __maybe_unused 797 ovl_posix_acl_default_xattr_handler = { 798 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 799 .flags = ACL_TYPE_DEFAULT, 800 .get = ovl_posix_acl_xattr_get, 801 .set = ovl_posix_acl_xattr_set, 802 }; 803 804 static const struct xattr_handler ovl_own_xattr_handler = { 805 .prefix = OVL_XATTR_PREFIX, 806 .get = ovl_own_xattr_get, 807 .set = ovl_own_xattr_set, 808 }; 809 810 static const struct xattr_handler ovl_other_xattr_handler = { 811 .prefix = "", /* catch all */ 812 .get = ovl_other_xattr_get, 813 .set = ovl_other_xattr_set, 814 }; 815 816 static const struct xattr_handler *ovl_xattr_handlers[] = { 817 #ifdef CONFIG_FS_POSIX_ACL 818 &ovl_posix_acl_access_xattr_handler, 819 &ovl_posix_acl_default_xattr_handler, 820 #endif 821 &ovl_own_xattr_handler, 822 &ovl_other_xattr_handler, 823 NULL 824 }; 825 826 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 827 { 828 struct path upperpath = { }; 829 struct path workpath = { }; 830 struct dentry *root_dentry; 831 struct ovl_entry *oe; 832 struct ovl_fs *ufs; 833 struct path *stack = NULL; 834 char *lowertmp; 835 char *lower; 836 unsigned int numlower; 837 unsigned int stacklen = 0; 838 unsigned int i; 839 bool remote = false; 840 struct cred *cred; 841 int err; 842 843 err = -ENOMEM; 844 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 845 if (!ufs) 846 goto out; 847 848 ufs->config.redirect_dir = ovl_redirect_dir_def; 849 ufs->config.index = ovl_index_def; 850 err = ovl_parse_opt((char *) data, &ufs->config); 851 if (err) 852 goto out_free_config; 853 854 err = -EINVAL; 855 if (!ufs->config.lowerdir) { 856 if (!silent) 857 pr_err("overlayfs: missing 'lowerdir'\n"); 858 goto out_free_config; 859 } 860 861 sb->s_stack_depth = 0; 862 sb->s_maxbytes = MAX_LFS_FILESIZE; 863 if (ufs->config.upperdir) { 864 if (!ufs->config.workdir) { 865 pr_err("overlayfs: missing 'workdir'\n"); 866 goto out_free_config; 867 } 868 869 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 870 if (err) 871 goto out_free_config; 872 873 /* Upper fs should not be r/o */ 874 if (sb_rdonly(upperpath.mnt->mnt_sb)) { 875 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 876 err = -EINVAL; 877 goto out_put_upperpath; 878 } 879 880 err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); 881 if (err) 882 goto out_put_upperpath; 883 884 err = -EBUSY; 885 if (ovl_inuse_trylock(upperpath.dentry)) { 886 ufs->upperdir_locked = true; 887 } else if (ufs->config.index) { 888 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); 889 goto out_put_upperpath; 890 } else { 891 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 892 } 893 894 err = ovl_mount_dir(ufs->config.workdir, &workpath); 895 if (err) 896 goto out_unlock_upperdentry; 897 898 err = -EINVAL; 899 if (upperpath.mnt != workpath.mnt) { 900 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 901 goto out_put_workpath; 902 } 903 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 904 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 905 goto out_put_workpath; 906 } 907 908 err = -EBUSY; 909 if (ovl_inuse_trylock(workpath.dentry)) { 910 ufs->workdir_locked = true; 911 } else if (ufs->config.index) { 912 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); 913 goto out_put_workpath; 914 } else { 915 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 916 } 917 918 ufs->workbasedir = workpath.dentry; 919 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 920 } 921 err = -ENOMEM; 922 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 923 if (!lowertmp) 924 goto out_unlock_workdentry; 925 926 err = -EINVAL; 927 stacklen = ovl_split_lowerdirs(lowertmp); 928 if (stacklen > OVL_MAX_STACK) { 929 pr_err("overlayfs: too many lower directories, limit is %d\n", 930 OVL_MAX_STACK); 931 goto out_free_lowertmp; 932 } else if (!ufs->config.upperdir && stacklen == 1) { 933 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 934 goto out_free_lowertmp; 935 } 936 937 err = -ENOMEM; 938 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 939 if (!stack) 940 goto out_free_lowertmp; 941 942 err = -EINVAL; 943 lower = lowertmp; 944 for (numlower = 0; numlower < stacklen; numlower++) { 945 err = ovl_lower_dir(lower, &stack[numlower], ufs, 946 &sb->s_stack_depth, &remote); 947 if (err) 948 goto out_put_lowerpath; 949 950 lower = strchr(lower, '\0') + 1; 951 } 952 953 err = -EINVAL; 954 sb->s_stack_depth++; 955 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 956 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 957 goto out_put_lowerpath; 958 } 959 960 if (ufs->config.upperdir) { 961 ufs->upper_mnt = clone_private_mount(&upperpath); 962 err = PTR_ERR(ufs->upper_mnt); 963 if (IS_ERR(ufs->upper_mnt)) { 964 pr_err("overlayfs: failed to clone upperpath\n"); 965 goto out_put_lowerpath; 966 } 967 968 /* Don't inherit atime flags */ 969 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 970 971 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 972 973 ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, 974 OVL_WORKDIR_NAME, false); 975 /* 976 * Upper should support d_type, else whiteouts are visible. 977 * Given workdir and upper are on same fs, we can do 978 * iterate_dir() on workdir. This check requires successful 979 * creation of workdir in previous step. 980 */ 981 if (ufs->workdir) { 982 struct dentry *temp; 983 984 err = ovl_check_d_type_supported(&workpath); 985 if (err < 0) 986 goto out_put_workdir; 987 988 /* 989 * We allowed this configuration and don't want to 990 * break users over kernel upgrade. So warn instead 991 * of erroring out. 992 */ 993 if (!err) 994 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 995 996 /* Check if upper/work fs supports O_TMPFILE */ 997 temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); 998 ufs->tmpfile = !IS_ERR(temp); 999 if (ufs->tmpfile) 1000 dput(temp); 1001 else 1002 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 1003 1004 /* 1005 * Check if upper/work fs supports trusted.overlay.* 1006 * xattr 1007 */ 1008 err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, 1009 "0", 1, 0); 1010 if (err) { 1011 ufs->noxattr = true; 1012 pr_warn("overlayfs: upper fs does not support xattr.\n"); 1013 } else { 1014 vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); 1015 } 1016 1017 /* Check if upper/work fs supports file handles */ 1018 if (ufs->config.index && 1019 !ovl_can_decode_fh(ufs->workdir->d_sb)) { 1020 ufs->config.index = false; 1021 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1022 } 1023 } 1024 } 1025 1026 err = -ENOMEM; 1027 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1028 if (ufs->lower_mnt == NULL) 1029 goto out_put_workdir; 1030 for (i = 0; i < numlower; i++) { 1031 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1032 1033 err = PTR_ERR(mnt); 1034 if (IS_ERR(mnt)) { 1035 pr_err("overlayfs: failed to clone lowerpath\n"); 1036 goto out_put_lower_mnt; 1037 } 1038 /* 1039 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1040 * will fail instead of modifying lower fs. 1041 */ 1042 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1043 1044 ufs->lower_mnt[ufs->numlower] = mnt; 1045 ufs->numlower++; 1046 1047 /* Check if all lower layers are on same sb */ 1048 if (i == 0) 1049 ufs->same_sb = mnt->mnt_sb; 1050 else if (ufs->same_sb != mnt->mnt_sb) 1051 ufs->same_sb = NULL; 1052 } 1053 1054 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1055 if (!ufs->upper_mnt) 1056 sb->s_flags |= MS_RDONLY; 1057 else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) 1058 ufs->same_sb = NULL; 1059 1060 if (!(ovl_force_readonly(ufs)) && ufs->config.index) { 1061 /* Verify lower root is upper root origin */ 1062 err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], 1063 stack[0].dentry, false, true); 1064 if (err) { 1065 pr_err("overlayfs: failed to verify upper root origin\n"); 1066 goto out_put_lower_mnt; 1067 } 1068 1069 ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, 1070 OVL_INDEXDIR_NAME, true); 1071 if (ufs->indexdir) { 1072 /* Verify upper root is index dir origin */ 1073 err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, 1074 upperpath.dentry, true, true); 1075 if (err) 1076 pr_err("overlayfs: failed to verify index dir origin\n"); 1077 1078 /* Cleanup bad/stale/orphan index entries */ 1079 if (!err) 1080 err = ovl_indexdir_cleanup(ufs->indexdir, 1081 ufs->upper_mnt, 1082 stack, numlower); 1083 } 1084 if (err || !ufs->indexdir) 1085 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1086 if (err) 1087 goto out_put_indexdir; 1088 } 1089 1090 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1091 if (!ufs->indexdir) 1092 ufs->config.index = false; 1093 1094 if (remote) 1095 sb->s_d_op = &ovl_reval_dentry_operations; 1096 else 1097 sb->s_d_op = &ovl_dentry_operations; 1098 1099 err = -ENOMEM; 1100 ufs->creator_cred = cred = prepare_creds(); 1101 if (!cred) 1102 goto out_put_indexdir; 1103 1104 /* Never override disk quota limits or use reserved space */ 1105 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1106 1107 err = -ENOMEM; 1108 oe = ovl_alloc_entry(numlower); 1109 if (!oe) 1110 goto out_put_cred; 1111 1112 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1113 sb->s_op = &ovl_super_operations; 1114 sb->s_xattr = ovl_xattr_handlers; 1115 sb->s_fs_info = ufs; 1116 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1117 1118 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1119 if (!root_dentry) 1120 goto out_free_oe; 1121 1122 mntput(upperpath.mnt); 1123 for (i = 0; i < numlower; i++) 1124 mntput(stack[i].mnt); 1125 mntput(workpath.mnt); 1126 kfree(lowertmp); 1127 1128 if (upperpath.dentry) { 1129 oe->has_upper = true; 1130 if (ovl_is_impuredir(upperpath.dentry)) 1131 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1132 } 1133 for (i = 0; i < numlower; i++) { 1134 oe->lowerstack[i].dentry = stack[i].dentry; 1135 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1136 } 1137 kfree(stack); 1138 1139 root_dentry->d_fsdata = oe; 1140 1141 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1142 ovl_dentry_lower(root_dentry)); 1143 1144 sb->s_root = root_dentry; 1145 1146 return 0; 1147 1148 out_free_oe: 1149 kfree(oe); 1150 out_put_cred: 1151 put_cred(ufs->creator_cred); 1152 out_put_indexdir: 1153 dput(ufs->indexdir); 1154 out_put_lower_mnt: 1155 for (i = 0; i < ufs->numlower; i++) 1156 mntput(ufs->lower_mnt[i]); 1157 kfree(ufs->lower_mnt); 1158 out_put_workdir: 1159 dput(ufs->workdir); 1160 mntput(ufs->upper_mnt); 1161 out_put_lowerpath: 1162 for (i = 0; i < numlower; i++) 1163 path_put(&stack[i]); 1164 kfree(stack); 1165 out_free_lowertmp: 1166 kfree(lowertmp); 1167 out_unlock_workdentry: 1168 if (ufs->workdir_locked) 1169 ovl_inuse_unlock(workpath.dentry); 1170 out_put_workpath: 1171 path_put(&workpath); 1172 out_unlock_upperdentry: 1173 if (ufs->upperdir_locked) 1174 ovl_inuse_unlock(upperpath.dentry); 1175 out_put_upperpath: 1176 path_put(&upperpath); 1177 out_free_config: 1178 kfree(ufs->config.lowerdir); 1179 kfree(ufs->config.upperdir); 1180 kfree(ufs->config.workdir); 1181 kfree(ufs); 1182 out: 1183 return err; 1184 } 1185 1186 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1187 const char *dev_name, void *raw_data) 1188 { 1189 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1190 } 1191 1192 static struct file_system_type ovl_fs_type = { 1193 .owner = THIS_MODULE, 1194 .name = "overlay", 1195 .mount = ovl_mount, 1196 .kill_sb = kill_anon_super, 1197 }; 1198 MODULE_ALIAS_FS("overlay"); 1199 1200 static void ovl_inode_init_once(void *foo) 1201 { 1202 struct ovl_inode *oi = foo; 1203 1204 inode_init_once(&oi->vfs_inode); 1205 } 1206 1207 static int __init ovl_init(void) 1208 { 1209 int err; 1210 1211 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1212 sizeof(struct ovl_inode), 0, 1213 (SLAB_RECLAIM_ACCOUNT| 1214 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1215 ovl_inode_init_once); 1216 if (ovl_inode_cachep == NULL) 1217 return -ENOMEM; 1218 1219 err = register_filesystem(&ovl_fs_type); 1220 if (err) 1221 kmem_cache_destroy(ovl_inode_cachep); 1222 1223 return err; 1224 } 1225 1226 static void __exit ovl_exit(void) 1227 { 1228 unregister_filesystem(&ovl_fs_type); 1229 1230 /* 1231 * Make sure all delayed rcu free inodes are flushed before we 1232 * destroy cache. 1233 */ 1234 rcu_barrier(); 1235 kmem_cache_destroy(ovl_inode_cachep); 1236 1237 } 1238 1239 module_init(ovl_init); 1240 module_exit(ovl_exit); 1241