1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 #include "ovl_entry.h" 22 23 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24 MODULE_DESCRIPTION("Overlay filesystem"); 25 MODULE_LICENSE("GPL"); 26 27 28 struct ovl_dir_cache; 29 30 #define OVL_MAX_STACK 500 31 32 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 33 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 34 MODULE_PARM_DESC(ovl_redirect_dir_def, 35 "Default to on or off for the redirect_dir feature"); 36 37 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 38 module_param_named(index, ovl_index_def, bool, 0644); 39 MODULE_PARM_DESC(ovl_index_def, 40 "Default to on or off for the inodes index feature"); 41 42 static void ovl_dentry_release(struct dentry *dentry) 43 { 44 struct ovl_entry *oe = dentry->d_fsdata; 45 46 if (oe) { 47 unsigned int i; 48 49 for (i = 0; i < oe->numlower; i++) 50 dput(oe->lowerstack[i].dentry); 51 kfree_rcu(oe, rcu); 52 } 53 } 54 55 static int ovl_check_append_only(struct inode *inode, int flag) 56 { 57 /* 58 * This test was moot in vfs may_open() because overlay inode does 59 * not have the S_APPEND flag, so re-check on real upper inode 60 */ 61 if (IS_APPEND(inode)) { 62 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 63 return -EPERM; 64 if (flag & O_TRUNC) 65 return -EPERM; 66 } 67 68 return 0; 69 } 70 71 static struct dentry *ovl_d_real(struct dentry *dentry, 72 const struct inode *inode, 73 unsigned int open_flags) 74 { 75 struct dentry *real; 76 int err; 77 78 if (!d_is_reg(dentry)) { 79 if (!inode || inode == d_inode(dentry)) 80 return dentry; 81 goto bug; 82 } 83 84 if (d_is_negative(dentry)) 85 return dentry; 86 87 if (open_flags) { 88 err = ovl_open_maybe_copy_up(dentry, open_flags); 89 if (err) 90 return ERR_PTR(err); 91 } 92 93 real = ovl_dentry_upper(dentry); 94 if (real && (!inode || inode == d_inode(real))) { 95 if (!inode) { 96 err = ovl_check_append_only(d_inode(real), open_flags); 97 if (err) 98 return ERR_PTR(err); 99 } 100 return real; 101 } 102 103 real = ovl_dentry_lower(dentry); 104 if (!real) 105 goto bug; 106 107 /* Handle recursion */ 108 real = d_real(real, inode, open_flags); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112 bug: 113 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 114 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 115 return dentry; 116 } 117 118 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 119 { 120 struct ovl_entry *oe = dentry->d_fsdata; 121 unsigned int i; 122 int ret = 1; 123 124 for (i = 0; i < oe->numlower; i++) { 125 struct dentry *d = oe->lowerstack[i].dentry; 126 127 if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (ret < 0) 130 return ret; 131 if (!ret) { 132 if (!(flags & LOOKUP_RCU)) 133 d_invalidate(d); 134 return -ESTALE; 135 } 136 } 137 } 138 return 1; 139 } 140 141 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 142 { 143 struct ovl_entry *oe = dentry->d_fsdata; 144 unsigned int i; 145 int ret = 1; 146 147 for (i = 0; i < oe->numlower; i++) { 148 struct dentry *d = oe->lowerstack[i].dentry; 149 150 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 151 ret = d->d_op->d_weak_revalidate(d, flags); 152 if (ret <= 0) 153 break; 154 } 155 } 156 return ret; 157 } 158 159 static const struct dentry_operations ovl_dentry_operations = { 160 .d_release = ovl_dentry_release, 161 .d_real = ovl_d_real, 162 }; 163 164 static const struct dentry_operations ovl_reval_dentry_operations = { 165 .d_release = ovl_dentry_release, 166 .d_real = ovl_d_real, 167 .d_revalidate = ovl_dentry_revalidate, 168 .d_weak_revalidate = ovl_dentry_weak_revalidate, 169 }; 170 171 static struct kmem_cache *ovl_inode_cachep; 172 173 static struct inode *ovl_alloc_inode(struct super_block *sb) 174 { 175 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 176 177 oi->cache = NULL; 178 oi->redirect = NULL; 179 oi->version = 0; 180 oi->flags = 0; 181 oi->__upperdentry = NULL; 182 oi->lower = NULL; 183 mutex_init(&oi->lock); 184 185 return &oi->vfs_inode; 186 } 187 188 static void ovl_i_callback(struct rcu_head *head) 189 { 190 struct inode *inode = container_of(head, struct inode, i_rcu); 191 192 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 193 } 194 195 static void ovl_destroy_inode(struct inode *inode) 196 { 197 struct ovl_inode *oi = OVL_I(inode); 198 199 dput(oi->__upperdentry); 200 kfree(oi->redirect); 201 mutex_destroy(&oi->lock); 202 203 call_rcu(&inode->i_rcu, ovl_i_callback); 204 } 205 206 static void ovl_put_super(struct super_block *sb) 207 { 208 struct ovl_fs *ufs = sb->s_fs_info; 209 unsigned i; 210 211 dput(ufs->indexdir); 212 dput(ufs->workdir); 213 ovl_inuse_unlock(ufs->workbasedir); 214 dput(ufs->workbasedir); 215 if (ufs->upper_mnt) 216 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 217 mntput(ufs->upper_mnt); 218 for (i = 0; i < ufs->numlower; i++) 219 mntput(ufs->lower_mnt[i]); 220 kfree(ufs->lower_mnt); 221 222 kfree(ufs->config.lowerdir); 223 kfree(ufs->config.upperdir); 224 kfree(ufs->config.workdir); 225 put_cred(ufs->creator_cred); 226 kfree(ufs); 227 } 228 229 static int ovl_sync_fs(struct super_block *sb, int wait) 230 { 231 struct ovl_fs *ufs = sb->s_fs_info; 232 struct super_block *upper_sb; 233 int ret; 234 235 if (!ufs->upper_mnt) 236 return 0; 237 upper_sb = ufs->upper_mnt->mnt_sb; 238 if (!upper_sb->s_op->sync_fs) 239 return 0; 240 241 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 242 down_read(&upper_sb->s_umount); 243 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 244 up_read(&upper_sb->s_umount); 245 return ret; 246 } 247 248 /** 249 * ovl_statfs 250 * @sb: The overlayfs super block 251 * @buf: The struct kstatfs to fill in with stats 252 * 253 * Get the filesystem statistics. As writes always target the upper layer 254 * filesystem pass the statfs to the upper filesystem (if it exists) 255 */ 256 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 257 { 258 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 259 struct dentry *root_dentry = dentry->d_sb->s_root; 260 struct path path; 261 int err; 262 263 ovl_path_real(root_dentry, &path); 264 265 err = vfs_statfs(&path, buf); 266 if (!err) { 267 buf->f_namelen = ofs->namelen; 268 buf->f_type = OVERLAYFS_SUPER_MAGIC; 269 } 270 271 return err; 272 } 273 274 /* Will this overlay be forced to mount/remount ro? */ 275 static bool ovl_force_readonly(struct ovl_fs *ufs) 276 { 277 return (!ufs->upper_mnt || !ufs->workdir); 278 } 279 280 /** 281 * ovl_show_options 282 * 283 * Prints the mount options for a given superblock. 284 * Returns zero; does not fail. 285 */ 286 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 287 { 288 struct super_block *sb = dentry->d_sb; 289 struct ovl_fs *ufs = sb->s_fs_info; 290 291 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 292 if (ufs->config.upperdir) { 293 seq_show_option(m, "upperdir", ufs->config.upperdir); 294 seq_show_option(m, "workdir", ufs->config.workdir); 295 } 296 if (ufs->config.default_permissions) 297 seq_puts(m, ",default_permissions"); 298 if (ufs->config.redirect_dir != ovl_redirect_dir_def) 299 seq_printf(m, ",redirect_dir=%s", 300 ufs->config.redirect_dir ? "on" : "off"); 301 if (ufs->config.index != ovl_index_def) 302 seq_printf(m, ",index=%s", 303 ufs->config.index ? "on" : "off"); 304 return 0; 305 } 306 307 static int ovl_remount(struct super_block *sb, int *flags, char *data) 308 { 309 struct ovl_fs *ufs = sb->s_fs_info; 310 311 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) 312 return -EROFS; 313 314 return 0; 315 } 316 317 static const struct super_operations ovl_super_operations = { 318 .alloc_inode = ovl_alloc_inode, 319 .destroy_inode = ovl_destroy_inode, 320 .drop_inode = generic_delete_inode, 321 .put_super = ovl_put_super, 322 .sync_fs = ovl_sync_fs, 323 .statfs = ovl_statfs, 324 .show_options = ovl_show_options, 325 .remount_fs = ovl_remount, 326 }; 327 328 enum { 329 OPT_LOWERDIR, 330 OPT_UPPERDIR, 331 OPT_WORKDIR, 332 OPT_DEFAULT_PERMISSIONS, 333 OPT_REDIRECT_DIR_ON, 334 OPT_REDIRECT_DIR_OFF, 335 OPT_INDEX_ON, 336 OPT_INDEX_OFF, 337 OPT_ERR, 338 }; 339 340 static const match_table_t ovl_tokens = { 341 {OPT_LOWERDIR, "lowerdir=%s"}, 342 {OPT_UPPERDIR, "upperdir=%s"}, 343 {OPT_WORKDIR, "workdir=%s"}, 344 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 345 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 346 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 347 {OPT_INDEX_ON, "index=on"}, 348 {OPT_INDEX_OFF, "index=off"}, 349 {OPT_ERR, NULL} 350 }; 351 352 static char *ovl_next_opt(char **s) 353 { 354 char *sbegin = *s; 355 char *p; 356 357 if (sbegin == NULL) 358 return NULL; 359 360 for (p = sbegin; *p; p++) { 361 if (*p == '\\') { 362 p++; 363 if (!*p) 364 break; 365 } else if (*p == ',') { 366 *p = '\0'; 367 *s = p + 1; 368 return sbegin; 369 } 370 } 371 *s = NULL; 372 return sbegin; 373 } 374 375 static int ovl_parse_opt(char *opt, struct ovl_config *config) 376 { 377 char *p; 378 379 while ((p = ovl_next_opt(&opt)) != NULL) { 380 int token; 381 substring_t args[MAX_OPT_ARGS]; 382 383 if (!*p) 384 continue; 385 386 token = match_token(p, ovl_tokens, args); 387 switch (token) { 388 case OPT_UPPERDIR: 389 kfree(config->upperdir); 390 config->upperdir = match_strdup(&args[0]); 391 if (!config->upperdir) 392 return -ENOMEM; 393 break; 394 395 case OPT_LOWERDIR: 396 kfree(config->lowerdir); 397 config->lowerdir = match_strdup(&args[0]); 398 if (!config->lowerdir) 399 return -ENOMEM; 400 break; 401 402 case OPT_WORKDIR: 403 kfree(config->workdir); 404 config->workdir = match_strdup(&args[0]); 405 if (!config->workdir) 406 return -ENOMEM; 407 break; 408 409 case OPT_DEFAULT_PERMISSIONS: 410 config->default_permissions = true; 411 break; 412 413 case OPT_REDIRECT_DIR_ON: 414 config->redirect_dir = true; 415 break; 416 417 case OPT_REDIRECT_DIR_OFF: 418 config->redirect_dir = false; 419 break; 420 421 case OPT_INDEX_ON: 422 config->index = true; 423 break; 424 425 case OPT_INDEX_OFF: 426 config->index = false; 427 break; 428 429 default: 430 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 431 return -EINVAL; 432 } 433 } 434 435 /* Workdir is useless in non-upper mount */ 436 if (!config->upperdir && config->workdir) { 437 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 438 config->workdir); 439 kfree(config->workdir); 440 config->workdir = NULL; 441 } 442 443 return 0; 444 } 445 446 #define OVL_WORKDIR_NAME "work" 447 #define OVL_INDEXDIR_NAME "index" 448 449 static struct dentry *ovl_workdir_create(struct super_block *sb, 450 struct ovl_fs *ufs, 451 struct dentry *dentry, 452 const char *name, bool persist) 453 { 454 struct inode *dir = dentry->d_inode; 455 struct vfsmount *mnt = ufs->upper_mnt; 456 struct dentry *work; 457 int err; 458 bool retried = false; 459 bool locked = false; 460 461 err = mnt_want_write(mnt); 462 if (err) 463 goto out_err; 464 465 inode_lock_nested(dir, I_MUTEX_PARENT); 466 locked = true; 467 468 retry: 469 work = lookup_one_len(name, dentry, strlen(name)); 470 471 if (!IS_ERR(work)) { 472 struct iattr attr = { 473 .ia_valid = ATTR_MODE, 474 .ia_mode = S_IFDIR | 0, 475 }; 476 477 if (work->d_inode) { 478 err = -EEXIST; 479 if (retried) 480 goto out_dput; 481 482 if (persist) 483 goto out_unlock; 484 485 retried = true; 486 ovl_workdir_cleanup(dir, mnt, work, 0); 487 dput(work); 488 goto retry; 489 } 490 491 err = ovl_create_real(dir, work, 492 &(struct cattr){.mode = S_IFDIR | 0}, 493 NULL, true); 494 if (err) 495 goto out_dput; 496 497 /* 498 * Try to remove POSIX ACL xattrs from workdir. We are good if: 499 * 500 * a) success (there was a POSIX ACL xattr and was removed) 501 * b) -ENODATA (there was no POSIX ACL xattr) 502 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 503 * 504 * There are various other error values that could effectively 505 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 506 * if the xattr name is too long), but the set of filesystems 507 * allowed as upper are limited to "normal" ones, where checking 508 * for the above two errors is sufficient. 509 */ 510 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 511 if (err && err != -ENODATA && err != -EOPNOTSUPP) 512 goto out_dput; 513 514 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 515 if (err && err != -ENODATA && err != -EOPNOTSUPP) 516 goto out_dput; 517 518 /* Clear any inherited mode bits */ 519 inode_lock(work->d_inode); 520 err = notify_change(work, &attr, NULL); 521 inode_unlock(work->d_inode); 522 if (err) 523 goto out_dput; 524 } else { 525 err = PTR_ERR(work); 526 goto out_err; 527 } 528 out_unlock: 529 mnt_drop_write(mnt); 530 if (locked) 531 inode_unlock(dir); 532 533 return work; 534 535 out_dput: 536 dput(work); 537 out_err: 538 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 539 ufs->config.workdir, name, -err); 540 sb->s_flags |= MS_RDONLY; 541 work = NULL; 542 goto out_unlock; 543 } 544 545 static void ovl_unescape(char *s) 546 { 547 char *d = s; 548 549 for (;; s++, d++) { 550 if (*s == '\\') 551 s++; 552 *d = *s; 553 if (!*s) 554 break; 555 } 556 } 557 558 static int ovl_mount_dir_noesc(const char *name, struct path *path) 559 { 560 int err = -EINVAL; 561 562 if (!*name) { 563 pr_err("overlayfs: empty lowerdir\n"); 564 goto out; 565 } 566 err = kern_path(name, LOOKUP_FOLLOW, path); 567 if (err) { 568 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 569 goto out; 570 } 571 err = -EINVAL; 572 if (ovl_dentry_weird(path->dentry)) { 573 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 574 goto out_put; 575 } 576 if (!d_is_dir(path->dentry)) { 577 pr_err("overlayfs: '%s' not a directory\n", name); 578 goto out_put; 579 } 580 return 0; 581 582 out_put: 583 path_put(path); 584 out: 585 return err; 586 } 587 588 static int ovl_mount_dir(const char *name, struct path *path) 589 { 590 int err = -ENOMEM; 591 char *tmp = kstrdup(name, GFP_KERNEL); 592 593 if (tmp) { 594 ovl_unescape(tmp); 595 err = ovl_mount_dir_noesc(tmp, path); 596 597 if (!err) 598 if (ovl_dentry_remote(path->dentry)) { 599 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 600 tmp); 601 path_put(path); 602 err = -EINVAL; 603 } 604 kfree(tmp); 605 } 606 return err; 607 } 608 609 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 610 const char *name) 611 { 612 struct kstatfs statfs; 613 int err = vfs_statfs(path, &statfs); 614 615 if (err) 616 pr_err("overlayfs: statfs failed on '%s'\n", name); 617 else 618 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 619 620 return err; 621 } 622 623 static int ovl_lower_dir(const char *name, struct path *path, 624 struct ovl_fs *ofs, int *stack_depth, bool *remote) 625 { 626 int err; 627 628 err = ovl_mount_dir_noesc(name, path); 629 if (err) 630 goto out; 631 632 err = ovl_check_namelen(path, ofs, name); 633 if (err) 634 goto out_put; 635 636 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 637 638 if (ovl_dentry_remote(path->dentry)) 639 *remote = true; 640 641 /* 642 * The inodes index feature needs to encode and decode file 643 * handles, so it requires that all layers support them. 644 */ 645 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 646 ofs->config.index = false; 647 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 648 } 649 650 return 0; 651 652 out_put: 653 path_put(path); 654 out: 655 return err; 656 } 657 658 /* Workdir should not be subdir of upperdir and vice versa */ 659 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 660 { 661 bool ok = false; 662 663 if (workdir != upperdir) { 664 ok = (lock_rename(workdir, upperdir) == NULL); 665 unlock_rename(workdir, upperdir); 666 } 667 return ok; 668 } 669 670 static unsigned int ovl_split_lowerdirs(char *str) 671 { 672 unsigned int ctr = 1; 673 char *s, *d; 674 675 for (s = d = str;; s++, d++) { 676 if (*s == '\\') { 677 s++; 678 } else if (*s == ':') { 679 *d = '\0'; 680 ctr++; 681 continue; 682 } 683 *d = *s; 684 if (!*s) 685 break; 686 } 687 return ctr; 688 } 689 690 static int __maybe_unused 691 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 692 struct dentry *dentry, struct inode *inode, 693 const char *name, void *buffer, size_t size) 694 { 695 return ovl_xattr_get(dentry, handler->name, buffer, size); 696 } 697 698 static int __maybe_unused 699 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 700 struct dentry *dentry, struct inode *inode, 701 const char *name, const void *value, 702 size_t size, int flags) 703 { 704 struct dentry *workdir = ovl_workdir(dentry); 705 struct inode *realinode = ovl_inode_real(inode); 706 struct posix_acl *acl = NULL; 707 int err; 708 709 /* Check that everything is OK before copy-up */ 710 if (value) { 711 acl = posix_acl_from_xattr(&init_user_ns, value, size); 712 if (IS_ERR(acl)) 713 return PTR_ERR(acl); 714 } 715 err = -EOPNOTSUPP; 716 if (!IS_POSIXACL(d_inode(workdir))) 717 goto out_acl_release; 718 if (!realinode->i_op->set_acl) 719 goto out_acl_release; 720 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 721 err = acl ? -EACCES : 0; 722 goto out_acl_release; 723 } 724 err = -EPERM; 725 if (!inode_owner_or_capable(inode)) 726 goto out_acl_release; 727 728 posix_acl_release(acl); 729 730 /* 731 * Check if sgid bit needs to be cleared (actual setacl operation will 732 * be done with mounter's capabilities and so that won't do it for us). 733 */ 734 if (unlikely(inode->i_mode & S_ISGID) && 735 handler->flags == ACL_TYPE_ACCESS && 736 !in_group_p(inode->i_gid) && 737 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 738 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 739 740 err = ovl_setattr(dentry, &iattr); 741 if (err) 742 return err; 743 } 744 745 err = ovl_xattr_set(dentry, handler->name, value, size, flags); 746 if (!err) 747 ovl_copyattr(ovl_inode_real(inode), inode); 748 749 return err; 750 751 out_acl_release: 752 posix_acl_release(acl); 753 return err; 754 } 755 756 static int ovl_own_xattr_get(const struct xattr_handler *handler, 757 struct dentry *dentry, struct inode *inode, 758 const char *name, void *buffer, size_t size) 759 { 760 return -EOPNOTSUPP; 761 } 762 763 static int ovl_own_xattr_set(const struct xattr_handler *handler, 764 struct dentry *dentry, struct inode *inode, 765 const char *name, const void *value, 766 size_t size, int flags) 767 { 768 return -EOPNOTSUPP; 769 } 770 771 static int ovl_other_xattr_get(const struct xattr_handler *handler, 772 struct dentry *dentry, struct inode *inode, 773 const char *name, void *buffer, size_t size) 774 { 775 return ovl_xattr_get(dentry, name, buffer, size); 776 } 777 778 static int ovl_other_xattr_set(const struct xattr_handler *handler, 779 struct dentry *dentry, struct inode *inode, 780 const char *name, const void *value, 781 size_t size, int flags) 782 { 783 return ovl_xattr_set(dentry, name, value, size, flags); 784 } 785 786 static const struct xattr_handler __maybe_unused 787 ovl_posix_acl_access_xattr_handler = { 788 .name = XATTR_NAME_POSIX_ACL_ACCESS, 789 .flags = ACL_TYPE_ACCESS, 790 .get = ovl_posix_acl_xattr_get, 791 .set = ovl_posix_acl_xattr_set, 792 }; 793 794 static const struct xattr_handler __maybe_unused 795 ovl_posix_acl_default_xattr_handler = { 796 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 797 .flags = ACL_TYPE_DEFAULT, 798 .get = ovl_posix_acl_xattr_get, 799 .set = ovl_posix_acl_xattr_set, 800 }; 801 802 static const struct xattr_handler ovl_own_xattr_handler = { 803 .prefix = OVL_XATTR_PREFIX, 804 .get = ovl_own_xattr_get, 805 .set = ovl_own_xattr_set, 806 }; 807 808 static const struct xattr_handler ovl_other_xattr_handler = { 809 .prefix = "", /* catch all */ 810 .get = ovl_other_xattr_get, 811 .set = ovl_other_xattr_set, 812 }; 813 814 static const struct xattr_handler *ovl_xattr_handlers[] = { 815 #ifdef CONFIG_FS_POSIX_ACL 816 &ovl_posix_acl_access_xattr_handler, 817 &ovl_posix_acl_default_xattr_handler, 818 #endif 819 &ovl_own_xattr_handler, 820 &ovl_other_xattr_handler, 821 NULL 822 }; 823 824 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 825 { 826 struct path upperpath = { }; 827 struct path workpath = { }; 828 struct dentry *root_dentry; 829 struct ovl_entry *oe; 830 struct ovl_fs *ufs; 831 struct path *stack = NULL; 832 char *lowertmp; 833 char *lower; 834 unsigned int numlower; 835 unsigned int stacklen = 0; 836 unsigned int i; 837 bool remote = false; 838 struct cred *cred; 839 int err; 840 841 err = -ENOMEM; 842 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 843 if (!ufs) 844 goto out; 845 846 ufs->config.redirect_dir = ovl_redirect_dir_def; 847 ufs->config.index = ovl_index_def; 848 err = ovl_parse_opt((char *) data, &ufs->config); 849 if (err) 850 goto out_free_config; 851 852 err = -EINVAL; 853 if (!ufs->config.lowerdir) { 854 if (!silent) 855 pr_err("overlayfs: missing 'lowerdir'\n"); 856 goto out_free_config; 857 } 858 859 sb->s_stack_depth = 0; 860 sb->s_maxbytes = MAX_LFS_FILESIZE; 861 if (ufs->config.upperdir) { 862 if (!ufs->config.workdir) { 863 pr_err("overlayfs: missing 'workdir'\n"); 864 goto out_free_config; 865 } 866 867 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 868 if (err) 869 goto out_free_config; 870 871 /* Upper fs should not be r/o */ 872 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { 873 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 874 err = -EINVAL; 875 goto out_put_upperpath; 876 } 877 878 err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); 879 if (err) 880 goto out_put_upperpath; 881 882 err = -EBUSY; 883 if (!ovl_inuse_trylock(upperpath.dentry)) { 884 pr_err("overlayfs: upperdir is in-use by another mount\n"); 885 goto out_put_upperpath; 886 } 887 888 err = ovl_mount_dir(ufs->config.workdir, &workpath); 889 if (err) 890 goto out_unlock_upperdentry; 891 892 err = -EINVAL; 893 if (upperpath.mnt != workpath.mnt) { 894 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 895 goto out_put_workpath; 896 } 897 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 898 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 899 goto out_put_workpath; 900 } 901 902 err = -EBUSY; 903 if (!ovl_inuse_trylock(workpath.dentry)) { 904 pr_err("overlayfs: workdir is in-use by another mount\n"); 905 goto out_put_workpath; 906 } 907 908 ufs->workbasedir = workpath.dentry; 909 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 910 } 911 err = -ENOMEM; 912 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 913 if (!lowertmp) 914 goto out_unlock_workdentry; 915 916 err = -EINVAL; 917 stacklen = ovl_split_lowerdirs(lowertmp); 918 if (stacklen > OVL_MAX_STACK) { 919 pr_err("overlayfs: too many lower directories, limit is %d\n", 920 OVL_MAX_STACK); 921 goto out_free_lowertmp; 922 } else if (!ufs->config.upperdir && stacklen == 1) { 923 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 924 goto out_free_lowertmp; 925 } 926 927 err = -ENOMEM; 928 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 929 if (!stack) 930 goto out_free_lowertmp; 931 932 err = -EINVAL; 933 lower = lowertmp; 934 for (numlower = 0; numlower < stacklen; numlower++) { 935 err = ovl_lower_dir(lower, &stack[numlower], ufs, 936 &sb->s_stack_depth, &remote); 937 if (err) 938 goto out_put_lowerpath; 939 940 lower = strchr(lower, '\0') + 1; 941 } 942 943 err = -EINVAL; 944 sb->s_stack_depth++; 945 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 946 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 947 goto out_put_lowerpath; 948 } 949 950 if (ufs->config.upperdir) { 951 ufs->upper_mnt = clone_private_mount(&upperpath); 952 err = PTR_ERR(ufs->upper_mnt); 953 if (IS_ERR(ufs->upper_mnt)) { 954 pr_err("overlayfs: failed to clone upperpath\n"); 955 goto out_put_lowerpath; 956 } 957 958 /* Don't inherit atime flags */ 959 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 960 961 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 962 963 ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, 964 OVL_WORKDIR_NAME, false); 965 /* 966 * Upper should support d_type, else whiteouts are visible. 967 * Given workdir and upper are on same fs, we can do 968 * iterate_dir() on workdir. This check requires successful 969 * creation of workdir in previous step. 970 */ 971 if (ufs->workdir) { 972 struct dentry *temp; 973 974 err = ovl_check_d_type_supported(&workpath); 975 if (err < 0) 976 goto out_put_workdir; 977 978 /* 979 * We allowed this configuration and don't want to 980 * break users over kernel upgrade. So warn instead 981 * of erroring out. 982 */ 983 if (!err) 984 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 985 986 /* Check if upper/work fs supports O_TMPFILE */ 987 temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); 988 ufs->tmpfile = !IS_ERR(temp); 989 if (ufs->tmpfile) 990 dput(temp); 991 else 992 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 993 994 /* 995 * Check if upper/work fs supports trusted.overlay.* 996 * xattr 997 */ 998 err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, 999 "0", 1, 0); 1000 if (err) { 1001 ufs->noxattr = true; 1002 pr_warn("overlayfs: upper fs does not support xattr.\n"); 1003 } else { 1004 vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); 1005 } 1006 1007 /* Check if upper/work fs supports file handles */ 1008 if (ufs->config.index && 1009 !ovl_can_decode_fh(ufs->workdir->d_sb)) { 1010 ufs->config.index = false; 1011 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1012 } 1013 } 1014 } 1015 1016 err = -ENOMEM; 1017 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1018 if (ufs->lower_mnt == NULL) 1019 goto out_put_workdir; 1020 for (i = 0; i < numlower; i++) { 1021 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1022 1023 err = PTR_ERR(mnt); 1024 if (IS_ERR(mnt)) { 1025 pr_err("overlayfs: failed to clone lowerpath\n"); 1026 goto out_put_lower_mnt; 1027 } 1028 /* 1029 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1030 * will fail instead of modifying lower fs. 1031 */ 1032 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1033 1034 ufs->lower_mnt[ufs->numlower] = mnt; 1035 ufs->numlower++; 1036 1037 /* Check if all lower layers are on same sb */ 1038 if (i == 0) 1039 ufs->same_sb = mnt->mnt_sb; 1040 else if (ufs->same_sb != mnt->mnt_sb) 1041 ufs->same_sb = NULL; 1042 } 1043 1044 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1045 if (!ufs->upper_mnt) 1046 sb->s_flags |= MS_RDONLY; 1047 else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) 1048 ufs->same_sb = NULL; 1049 1050 if (!(ovl_force_readonly(ufs)) && ufs->config.index) { 1051 /* Verify lower root is upper root origin */ 1052 err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], 1053 stack[0].dentry, false, true); 1054 if (err) { 1055 pr_err("overlayfs: failed to verify upper root origin\n"); 1056 goto out_put_lower_mnt; 1057 } 1058 1059 ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, 1060 OVL_INDEXDIR_NAME, true); 1061 err = PTR_ERR(ufs->indexdir); 1062 if (IS_ERR(ufs->indexdir)) 1063 goto out_put_lower_mnt; 1064 1065 if (ufs->indexdir) { 1066 /* Verify upper root is index dir origin */ 1067 err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, 1068 upperpath.dentry, true, true); 1069 if (err) 1070 pr_err("overlayfs: failed to verify index dir origin\n"); 1071 1072 /* Cleanup bad/stale/orphan index entries */ 1073 if (!err) 1074 err = ovl_indexdir_cleanup(ufs->indexdir, 1075 ufs->upper_mnt, 1076 stack, numlower); 1077 } 1078 if (err || !ufs->indexdir) 1079 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1080 if (err) 1081 goto out_put_indexdir; 1082 } 1083 1084 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1085 if (!ufs->indexdir) 1086 ufs->config.index = false; 1087 1088 if (remote) 1089 sb->s_d_op = &ovl_reval_dentry_operations; 1090 else 1091 sb->s_d_op = &ovl_dentry_operations; 1092 1093 ufs->creator_cred = cred = prepare_creds(); 1094 if (!cred) 1095 goto out_put_indexdir; 1096 1097 /* Never override disk quota limits or use reserved space */ 1098 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1099 1100 err = -ENOMEM; 1101 oe = ovl_alloc_entry(numlower); 1102 if (!oe) 1103 goto out_put_cred; 1104 1105 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1106 sb->s_op = &ovl_super_operations; 1107 sb->s_xattr = ovl_xattr_handlers; 1108 sb->s_fs_info = ufs; 1109 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1110 1111 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1112 if (!root_dentry) 1113 goto out_free_oe; 1114 1115 mntput(upperpath.mnt); 1116 for (i = 0; i < numlower; i++) 1117 mntput(stack[i].mnt); 1118 mntput(workpath.mnt); 1119 kfree(lowertmp); 1120 1121 if (upperpath.dentry) { 1122 oe->has_upper = true; 1123 if (ovl_is_impuredir(upperpath.dentry)) 1124 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1125 } 1126 for (i = 0; i < numlower; i++) { 1127 oe->lowerstack[i].dentry = stack[i].dentry; 1128 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1129 } 1130 kfree(stack); 1131 1132 root_dentry->d_fsdata = oe; 1133 1134 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1135 ovl_dentry_lower(root_dentry)); 1136 1137 sb->s_root = root_dentry; 1138 1139 return 0; 1140 1141 out_free_oe: 1142 kfree(oe); 1143 out_put_cred: 1144 put_cred(ufs->creator_cred); 1145 out_put_indexdir: 1146 dput(ufs->indexdir); 1147 out_put_lower_mnt: 1148 for (i = 0; i < ufs->numlower; i++) 1149 mntput(ufs->lower_mnt[i]); 1150 kfree(ufs->lower_mnt); 1151 out_put_workdir: 1152 dput(ufs->workdir); 1153 mntput(ufs->upper_mnt); 1154 out_put_lowerpath: 1155 for (i = 0; i < numlower; i++) 1156 path_put(&stack[i]); 1157 kfree(stack); 1158 out_free_lowertmp: 1159 kfree(lowertmp); 1160 out_unlock_workdentry: 1161 ovl_inuse_unlock(workpath.dentry); 1162 out_put_workpath: 1163 path_put(&workpath); 1164 out_unlock_upperdentry: 1165 ovl_inuse_unlock(upperpath.dentry); 1166 out_put_upperpath: 1167 path_put(&upperpath); 1168 out_free_config: 1169 kfree(ufs->config.lowerdir); 1170 kfree(ufs->config.upperdir); 1171 kfree(ufs->config.workdir); 1172 kfree(ufs); 1173 out: 1174 return err; 1175 } 1176 1177 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1178 const char *dev_name, void *raw_data) 1179 { 1180 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1181 } 1182 1183 static struct file_system_type ovl_fs_type = { 1184 .owner = THIS_MODULE, 1185 .name = "overlay", 1186 .mount = ovl_mount, 1187 .kill_sb = kill_anon_super, 1188 }; 1189 MODULE_ALIAS_FS("overlay"); 1190 1191 static void ovl_inode_init_once(void *foo) 1192 { 1193 struct ovl_inode *oi = foo; 1194 1195 inode_init_once(&oi->vfs_inode); 1196 } 1197 1198 static int __init ovl_init(void) 1199 { 1200 int err; 1201 1202 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1203 sizeof(struct ovl_inode), 0, 1204 (SLAB_RECLAIM_ACCOUNT| 1205 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1206 ovl_inode_init_once); 1207 if (ovl_inode_cachep == NULL) 1208 return -ENOMEM; 1209 1210 err = register_filesystem(&ovl_fs_type); 1211 if (err) 1212 kmem_cache_destroy(ovl_inode_cachep); 1213 1214 return err; 1215 } 1216 1217 static void __exit ovl_exit(void) 1218 { 1219 unregister_filesystem(&ovl_fs_type); 1220 1221 /* 1222 * Make sure all delayed rcu free inodes are flushed before we 1223 * destroy cache. 1224 */ 1225 rcu_barrier(); 1226 kmem_cache_destroy(ovl_inode_cachep); 1227 1228 } 1229 1230 module_init(ovl_init); 1231 module_exit(ovl_exit); 1232