1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 #include "ovl_entry.h" 22 23 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24 MODULE_DESCRIPTION("Overlay filesystem"); 25 MODULE_LICENSE("GPL"); 26 27 28 struct ovl_dir_cache; 29 30 #define OVL_MAX_STACK 500 31 32 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 33 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 34 MODULE_PARM_DESC(ovl_redirect_dir_def, 35 "Default to on or off for the redirect_dir feature"); 36 37 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 38 module_param_named(index, ovl_index_def, bool, 0644); 39 MODULE_PARM_DESC(ovl_index_def, 40 "Default to on or off for the inodes index feature"); 41 42 static void ovl_dentry_release(struct dentry *dentry) 43 { 44 struct ovl_entry *oe = dentry->d_fsdata; 45 46 if (oe) { 47 unsigned int i; 48 49 for (i = 0; i < oe->numlower; i++) 50 dput(oe->lowerstack[i].dentry); 51 kfree_rcu(oe, rcu); 52 } 53 } 54 55 static int ovl_check_append_only(struct inode *inode, int flag) 56 { 57 /* 58 * This test was moot in vfs may_open() because overlay inode does 59 * not have the S_APPEND flag, so re-check on real upper inode 60 */ 61 if (IS_APPEND(inode)) { 62 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 63 return -EPERM; 64 if (flag & O_TRUNC) 65 return -EPERM; 66 } 67 68 return 0; 69 } 70 71 static struct dentry *ovl_d_real(struct dentry *dentry, 72 const struct inode *inode, 73 unsigned int open_flags, unsigned int flags) 74 { 75 struct dentry *real; 76 int err; 77 78 if (flags & D_REAL_UPPER) 79 return ovl_dentry_upper(dentry); 80 81 if (!d_is_reg(dentry)) { 82 if (!inode || inode == d_inode(dentry)) 83 return dentry; 84 goto bug; 85 } 86 87 if (open_flags) { 88 err = ovl_open_maybe_copy_up(dentry, open_flags); 89 if (err) 90 return ERR_PTR(err); 91 } 92 93 real = ovl_dentry_upper(dentry); 94 if (real && (!inode || inode == d_inode(real))) { 95 if (!inode) { 96 err = ovl_check_append_only(d_inode(real), open_flags); 97 if (err) 98 return ERR_PTR(err); 99 } 100 return real; 101 } 102 103 real = ovl_dentry_lower(dentry); 104 if (!real) 105 goto bug; 106 107 /* Handle recursion */ 108 real = d_real(real, inode, open_flags, 0); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112 bug: 113 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 114 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 115 return dentry; 116 } 117 118 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 119 { 120 struct ovl_entry *oe = dentry->d_fsdata; 121 unsigned int i; 122 int ret = 1; 123 124 for (i = 0; i < oe->numlower; i++) { 125 struct dentry *d = oe->lowerstack[i].dentry; 126 127 if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (ret < 0) 130 return ret; 131 if (!ret) { 132 if (!(flags & LOOKUP_RCU)) 133 d_invalidate(d); 134 return -ESTALE; 135 } 136 } 137 } 138 return 1; 139 } 140 141 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 142 { 143 struct ovl_entry *oe = dentry->d_fsdata; 144 unsigned int i; 145 int ret = 1; 146 147 for (i = 0; i < oe->numlower; i++) { 148 struct dentry *d = oe->lowerstack[i].dentry; 149 150 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 151 ret = d->d_op->d_weak_revalidate(d, flags); 152 if (ret <= 0) 153 break; 154 } 155 } 156 return ret; 157 } 158 159 static const struct dentry_operations ovl_dentry_operations = { 160 .d_release = ovl_dentry_release, 161 .d_real = ovl_d_real, 162 }; 163 164 static const struct dentry_operations ovl_reval_dentry_operations = { 165 .d_release = ovl_dentry_release, 166 .d_real = ovl_d_real, 167 .d_revalidate = ovl_dentry_revalidate, 168 .d_weak_revalidate = ovl_dentry_weak_revalidate, 169 }; 170 171 static struct kmem_cache *ovl_inode_cachep; 172 173 static struct inode *ovl_alloc_inode(struct super_block *sb) 174 { 175 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 176 177 oi->cache = NULL; 178 oi->redirect = NULL; 179 oi->version = 0; 180 oi->flags = 0; 181 oi->__upperdentry = NULL; 182 oi->lower = NULL; 183 mutex_init(&oi->lock); 184 185 return &oi->vfs_inode; 186 } 187 188 static void ovl_i_callback(struct rcu_head *head) 189 { 190 struct inode *inode = container_of(head, struct inode, i_rcu); 191 192 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 193 } 194 195 static void ovl_destroy_inode(struct inode *inode) 196 { 197 struct ovl_inode *oi = OVL_I(inode); 198 199 dput(oi->__upperdentry); 200 kfree(oi->redirect); 201 ovl_dir_cache_free(inode); 202 mutex_destroy(&oi->lock); 203 204 call_rcu(&inode->i_rcu, ovl_i_callback); 205 } 206 207 static void ovl_put_super(struct super_block *sb) 208 { 209 struct ovl_fs *ufs = sb->s_fs_info; 210 unsigned i; 211 212 dput(ufs->indexdir); 213 dput(ufs->workdir); 214 ovl_inuse_unlock(ufs->workbasedir); 215 dput(ufs->workbasedir); 216 if (ufs->upper_mnt) 217 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 218 mntput(ufs->upper_mnt); 219 for (i = 0; i < ufs->numlower; i++) 220 mntput(ufs->lower_mnt[i]); 221 kfree(ufs->lower_mnt); 222 223 kfree(ufs->config.lowerdir); 224 kfree(ufs->config.upperdir); 225 kfree(ufs->config.workdir); 226 put_cred(ufs->creator_cred); 227 kfree(ufs); 228 } 229 230 static int ovl_sync_fs(struct super_block *sb, int wait) 231 { 232 struct ovl_fs *ufs = sb->s_fs_info; 233 struct super_block *upper_sb; 234 int ret; 235 236 if (!ufs->upper_mnt) 237 return 0; 238 upper_sb = ufs->upper_mnt->mnt_sb; 239 if (!upper_sb->s_op->sync_fs) 240 return 0; 241 242 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 243 down_read(&upper_sb->s_umount); 244 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 245 up_read(&upper_sb->s_umount); 246 return ret; 247 } 248 249 /** 250 * ovl_statfs 251 * @sb: The overlayfs super block 252 * @buf: The struct kstatfs to fill in with stats 253 * 254 * Get the filesystem statistics. As writes always target the upper layer 255 * filesystem pass the statfs to the upper filesystem (if it exists) 256 */ 257 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 258 { 259 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 260 struct dentry *root_dentry = dentry->d_sb->s_root; 261 struct path path; 262 int err; 263 264 ovl_path_real(root_dentry, &path); 265 266 err = vfs_statfs(&path, buf); 267 if (!err) { 268 buf->f_namelen = ofs->namelen; 269 buf->f_type = OVERLAYFS_SUPER_MAGIC; 270 } 271 272 return err; 273 } 274 275 /* Will this overlay be forced to mount/remount ro? */ 276 static bool ovl_force_readonly(struct ovl_fs *ufs) 277 { 278 return (!ufs->upper_mnt || !ufs->workdir); 279 } 280 281 /** 282 * ovl_show_options 283 * 284 * Prints the mount options for a given superblock. 285 * Returns zero; does not fail. 286 */ 287 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 288 { 289 struct super_block *sb = dentry->d_sb; 290 struct ovl_fs *ufs = sb->s_fs_info; 291 292 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 293 if (ufs->config.upperdir) { 294 seq_show_option(m, "upperdir", ufs->config.upperdir); 295 seq_show_option(m, "workdir", ufs->config.workdir); 296 } 297 if (ufs->config.default_permissions) 298 seq_puts(m, ",default_permissions"); 299 if (ufs->config.redirect_dir != ovl_redirect_dir_def) 300 seq_printf(m, ",redirect_dir=%s", 301 ufs->config.redirect_dir ? "on" : "off"); 302 if (ufs->config.index != ovl_index_def) 303 seq_printf(m, ",index=%s", 304 ufs->config.index ? "on" : "off"); 305 return 0; 306 } 307 308 static int ovl_remount(struct super_block *sb, int *flags, char *data) 309 { 310 struct ovl_fs *ufs = sb->s_fs_info; 311 312 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) 313 return -EROFS; 314 315 return 0; 316 } 317 318 static const struct super_operations ovl_super_operations = { 319 .alloc_inode = ovl_alloc_inode, 320 .destroy_inode = ovl_destroy_inode, 321 .drop_inode = generic_delete_inode, 322 .put_super = ovl_put_super, 323 .sync_fs = ovl_sync_fs, 324 .statfs = ovl_statfs, 325 .show_options = ovl_show_options, 326 .remount_fs = ovl_remount, 327 }; 328 329 enum { 330 OPT_LOWERDIR, 331 OPT_UPPERDIR, 332 OPT_WORKDIR, 333 OPT_DEFAULT_PERMISSIONS, 334 OPT_REDIRECT_DIR_ON, 335 OPT_REDIRECT_DIR_OFF, 336 OPT_INDEX_ON, 337 OPT_INDEX_OFF, 338 OPT_ERR, 339 }; 340 341 static const match_table_t ovl_tokens = { 342 {OPT_LOWERDIR, "lowerdir=%s"}, 343 {OPT_UPPERDIR, "upperdir=%s"}, 344 {OPT_WORKDIR, "workdir=%s"}, 345 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 346 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 347 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 348 {OPT_INDEX_ON, "index=on"}, 349 {OPT_INDEX_OFF, "index=off"}, 350 {OPT_ERR, NULL} 351 }; 352 353 static char *ovl_next_opt(char **s) 354 { 355 char *sbegin = *s; 356 char *p; 357 358 if (sbegin == NULL) 359 return NULL; 360 361 for (p = sbegin; *p; p++) { 362 if (*p == '\\') { 363 p++; 364 if (!*p) 365 break; 366 } else if (*p == ',') { 367 *p = '\0'; 368 *s = p + 1; 369 return sbegin; 370 } 371 } 372 *s = NULL; 373 return sbegin; 374 } 375 376 static int ovl_parse_opt(char *opt, struct ovl_config *config) 377 { 378 char *p; 379 380 while ((p = ovl_next_opt(&opt)) != NULL) { 381 int token; 382 substring_t args[MAX_OPT_ARGS]; 383 384 if (!*p) 385 continue; 386 387 token = match_token(p, ovl_tokens, args); 388 switch (token) { 389 case OPT_UPPERDIR: 390 kfree(config->upperdir); 391 config->upperdir = match_strdup(&args[0]); 392 if (!config->upperdir) 393 return -ENOMEM; 394 break; 395 396 case OPT_LOWERDIR: 397 kfree(config->lowerdir); 398 config->lowerdir = match_strdup(&args[0]); 399 if (!config->lowerdir) 400 return -ENOMEM; 401 break; 402 403 case OPT_WORKDIR: 404 kfree(config->workdir); 405 config->workdir = match_strdup(&args[0]); 406 if (!config->workdir) 407 return -ENOMEM; 408 break; 409 410 case OPT_DEFAULT_PERMISSIONS: 411 config->default_permissions = true; 412 break; 413 414 case OPT_REDIRECT_DIR_ON: 415 config->redirect_dir = true; 416 break; 417 418 case OPT_REDIRECT_DIR_OFF: 419 config->redirect_dir = false; 420 break; 421 422 case OPT_INDEX_ON: 423 config->index = true; 424 break; 425 426 case OPT_INDEX_OFF: 427 config->index = false; 428 break; 429 430 default: 431 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 432 return -EINVAL; 433 } 434 } 435 436 /* Workdir is useless in non-upper mount */ 437 if (!config->upperdir && config->workdir) { 438 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 439 config->workdir); 440 kfree(config->workdir); 441 config->workdir = NULL; 442 } 443 444 return 0; 445 } 446 447 #define OVL_WORKDIR_NAME "work" 448 #define OVL_INDEXDIR_NAME "index" 449 450 static struct dentry *ovl_workdir_create(struct super_block *sb, 451 struct ovl_fs *ufs, 452 struct dentry *dentry, 453 const char *name, bool persist) 454 { 455 struct inode *dir = dentry->d_inode; 456 struct vfsmount *mnt = ufs->upper_mnt; 457 struct dentry *work; 458 int err; 459 bool retried = false; 460 bool locked = false; 461 462 err = mnt_want_write(mnt); 463 if (err) 464 goto out_err; 465 466 inode_lock_nested(dir, I_MUTEX_PARENT); 467 locked = true; 468 469 retry: 470 work = lookup_one_len(name, dentry, strlen(name)); 471 472 if (!IS_ERR(work)) { 473 struct iattr attr = { 474 .ia_valid = ATTR_MODE, 475 .ia_mode = S_IFDIR | 0, 476 }; 477 478 if (work->d_inode) { 479 err = -EEXIST; 480 if (retried) 481 goto out_dput; 482 483 if (persist) 484 goto out_unlock; 485 486 retried = true; 487 ovl_workdir_cleanup(dir, mnt, work, 0); 488 dput(work); 489 goto retry; 490 } 491 492 err = ovl_create_real(dir, work, 493 &(struct cattr){.mode = S_IFDIR | 0}, 494 NULL, true); 495 if (err) 496 goto out_dput; 497 498 /* 499 * Try to remove POSIX ACL xattrs from workdir. We are good if: 500 * 501 * a) success (there was a POSIX ACL xattr and was removed) 502 * b) -ENODATA (there was no POSIX ACL xattr) 503 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 504 * 505 * There are various other error values that could effectively 506 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 507 * if the xattr name is too long), but the set of filesystems 508 * allowed as upper are limited to "normal" ones, where checking 509 * for the above two errors is sufficient. 510 */ 511 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 512 if (err && err != -ENODATA && err != -EOPNOTSUPP) 513 goto out_dput; 514 515 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 516 if (err && err != -ENODATA && err != -EOPNOTSUPP) 517 goto out_dput; 518 519 /* Clear any inherited mode bits */ 520 inode_lock(work->d_inode); 521 err = notify_change(work, &attr, NULL); 522 inode_unlock(work->d_inode); 523 if (err) 524 goto out_dput; 525 } else { 526 err = PTR_ERR(work); 527 goto out_err; 528 } 529 out_unlock: 530 mnt_drop_write(mnt); 531 if (locked) 532 inode_unlock(dir); 533 534 return work; 535 536 out_dput: 537 dput(work); 538 out_err: 539 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 540 ufs->config.workdir, name, -err); 541 sb->s_flags |= MS_RDONLY; 542 work = NULL; 543 goto out_unlock; 544 } 545 546 static void ovl_unescape(char *s) 547 { 548 char *d = s; 549 550 for (;; s++, d++) { 551 if (*s == '\\') 552 s++; 553 *d = *s; 554 if (!*s) 555 break; 556 } 557 } 558 559 static int ovl_mount_dir_noesc(const char *name, struct path *path) 560 { 561 int err = -EINVAL; 562 563 if (!*name) { 564 pr_err("overlayfs: empty lowerdir\n"); 565 goto out; 566 } 567 err = kern_path(name, LOOKUP_FOLLOW, path); 568 if (err) { 569 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 570 goto out; 571 } 572 err = -EINVAL; 573 if (ovl_dentry_weird(path->dentry)) { 574 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 575 goto out_put; 576 } 577 if (!d_is_dir(path->dentry)) { 578 pr_err("overlayfs: '%s' not a directory\n", name); 579 goto out_put; 580 } 581 return 0; 582 583 out_put: 584 path_put(path); 585 out: 586 return err; 587 } 588 589 static int ovl_mount_dir(const char *name, struct path *path) 590 { 591 int err = -ENOMEM; 592 char *tmp = kstrdup(name, GFP_KERNEL); 593 594 if (tmp) { 595 ovl_unescape(tmp); 596 err = ovl_mount_dir_noesc(tmp, path); 597 598 if (!err) 599 if (ovl_dentry_remote(path->dentry)) { 600 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 601 tmp); 602 path_put(path); 603 err = -EINVAL; 604 } 605 kfree(tmp); 606 } 607 return err; 608 } 609 610 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 611 const char *name) 612 { 613 struct kstatfs statfs; 614 int err = vfs_statfs(path, &statfs); 615 616 if (err) 617 pr_err("overlayfs: statfs failed on '%s'\n", name); 618 else 619 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 620 621 return err; 622 } 623 624 static int ovl_lower_dir(const char *name, struct path *path, 625 struct ovl_fs *ofs, int *stack_depth, bool *remote) 626 { 627 int err; 628 629 err = ovl_mount_dir_noesc(name, path); 630 if (err) 631 goto out; 632 633 err = ovl_check_namelen(path, ofs, name); 634 if (err) 635 goto out_put; 636 637 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 638 639 if (ovl_dentry_remote(path->dentry)) 640 *remote = true; 641 642 /* 643 * The inodes index feature needs to encode and decode file 644 * handles, so it requires that all layers support them. 645 */ 646 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 647 ofs->config.index = false; 648 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 649 } 650 651 return 0; 652 653 out_put: 654 path_put(path); 655 out: 656 return err; 657 } 658 659 /* Workdir should not be subdir of upperdir and vice versa */ 660 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 661 { 662 bool ok = false; 663 664 if (workdir != upperdir) { 665 ok = (lock_rename(workdir, upperdir) == NULL); 666 unlock_rename(workdir, upperdir); 667 } 668 return ok; 669 } 670 671 static unsigned int ovl_split_lowerdirs(char *str) 672 { 673 unsigned int ctr = 1; 674 char *s, *d; 675 676 for (s = d = str;; s++, d++) { 677 if (*s == '\\') { 678 s++; 679 } else if (*s == ':') { 680 *d = '\0'; 681 ctr++; 682 continue; 683 } 684 *d = *s; 685 if (!*s) 686 break; 687 } 688 return ctr; 689 } 690 691 static int __maybe_unused 692 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 693 struct dentry *dentry, struct inode *inode, 694 const char *name, void *buffer, size_t size) 695 { 696 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 697 } 698 699 static int __maybe_unused 700 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 701 struct dentry *dentry, struct inode *inode, 702 const char *name, const void *value, 703 size_t size, int flags) 704 { 705 struct dentry *workdir = ovl_workdir(dentry); 706 struct inode *realinode = ovl_inode_real(inode); 707 struct posix_acl *acl = NULL; 708 int err; 709 710 /* Check that everything is OK before copy-up */ 711 if (value) { 712 acl = posix_acl_from_xattr(&init_user_ns, value, size); 713 if (IS_ERR(acl)) 714 return PTR_ERR(acl); 715 } 716 err = -EOPNOTSUPP; 717 if (!IS_POSIXACL(d_inode(workdir))) 718 goto out_acl_release; 719 if (!realinode->i_op->set_acl) 720 goto out_acl_release; 721 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 722 err = acl ? -EACCES : 0; 723 goto out_acl_release; 724 } 725 err = -EPERM; 726 if (!inode_owner_or_capable(inode)) 727 goto out_acl_release; 728 729 posix_acl_release(acl); 730 731 /* 732 * Check if sgid bit needs to be cleared (actual setacl operation will 733 * be done with mounter's capabilities and so that won't do it for us). 734 */ 735 if (unlikely(inode->i_mode & S_ISGID) && 736 handler->flags == ACL_TYPE_ACCESS && 737 !in_group_p(inode->i_gid) && 738 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 739 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 740 741 err = ovl_setattr(dentry, &iattr); 742 if (err) 743 return err; 744 } 745 746 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 747 if (!err) 748 ovl_copyattr(ovl_inode_real(inode), inode); 749 750 return err; 751 752 out_acl_release: 753 posix_acl_release(acl); 754 return err; 755 } 756 757 static int ovl_own_xattr_get(const struct xattr_handler *handler, 758 struct dentry *dentry, struct inode *inode, 759 const char *name, void *buffer, size_t size) 760 { 761 return -EOPNOTSUPP; 762 } 763 764 static int ovl_own_xattr_set(const struct xattr_handler *handler, 765 struct dentry *dentry, struct inode *inode, 766 const char *name, const void *value, 767 size_t size, int flags) 768 { 769 return -EOPNOTSUPP; 770 } 771 772 static int ovl_other_xattr_get(const struct xattr_handler *handler, 773 struct dentry *dentry, struct inode *inode, 774 const char *name, void *buffer, size_t size) 775 { 776 return ovl_xattr_get(dentry, inode, name, buffer, size); 777 } 778 779 static int ovl_other_xattr_set(const struct xattr_handler *handler, 780 struct dentry *dentry, struct inode *inode, 781 const char *name, const void *value, 782 size_t size, int flags) 783 { 784 return ovl_xattr_set(dentry, inode, name, value, size, flags); 785 } 786 787 static const struct xattr_handler __maybe_unused 788 ovl_posix_acl_access_xattr_handler = { 789 .name = XATTR_NAME_POSIX_ACL_ACCESS, 790 .flags = ACL_TYPE_ACCESS, 791 .get = ovl_posix_acl_xattr_get, 792 .set = ovl_posix_acl_xattr_set, 793 }; 794 795 static const struct xattr_handler __maybe_unused 796 ovl_posix_acl_default_xattr_handler = { 797 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 798 .flags = ACL_TYPE_DEFAULT, 799 .get = ovl_posix_acl_xattr_get, 800 .set = ovl_posix_acl_xattr_set, 801 }; 802 803 static const struct xattr_handler ovl_own_xattr_handler = { 804 .prefix = OVL_XATTR_PREFIX, 805 .get = ovl_own_xattr_get, 806 .set = ovl_own_xattr_set, 807 }; 808 809 static const struct xattr_handler ovl_other_xattr_handler = { 810 .prefix = "", /* catch all */ 811 .get = ovl_other_xattr_get, 812 .set = ovl_other_xattr_set, 813 }; 814 815 static const struct xattr_handler *ovl_xattr_handlers[] = { 816 #ifdef CONFIG_FS_POSIX_ACL 817 &ovl_posix_acl_access_xattr_handler, 818 &ovl_posix_acl_default_xattr_handler, 819 #endif 820 &ovl_own_xattr_handler, 821 &ovl_other_xattr_handler, 822 NULL 823 }; 824 825 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 826 { 827 struct path upperpath = { }; 828 struct path workpath = { }; 829 struct dentry *root_dentry; 830 struct ovl_entry *oe; 831 struct ovl_fs *ufs; 832 struct path *stack = NULL; 833 char *lowertmp; 834 char *lower; 835 unsigned int numlower; 836 unsigned int stacklen = 0; 837 unsigned int i; 838 bool remote = false; 839 struct cred *cred; 840 int err; 841 842 err = -ENOMEM; 843 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 844 if (!ufs) 845 goto out; 846 847 ufs->config.redirect_dir = ovl_redirect_dir_def; 848 ufs->config.index = ovl_index_def; 849 err = ovl_parse_opt((char *) data, &ufs->config); 850 if (err) 851 goto out_free_config; 852 853 err = -EINVAL; 854 if (!ufs->config.lowerdir) { 855 if (!silent) 856 pr_err("overlayfs: missing 'lowerdir'\n"); 857 goto out_free_config; 858 } 859 860 sb->s_stack_depth = 0; 861 sb->s_maxbytes = MAX_LFS_FILESIZE; 862 if (ufs->config.upperdir) { 863 if (!ufs->config.workdir) { 864 pr_err("overlayfs: missing 'workdir'\n"); 865 goto out_free_config; 866 } 867 868 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 869 if (err) 870 goto out_free_config; 871 872 /* Upper fs should not be r/o */ 873 if (sb_rdonly(upperpath.mnt->mnt_sb)) { 874 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 875 err = -EINVAL; 876 goto out_put_upperpath; 877 } 878 879 err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); 880 if (err) 881 goto out_put_upperpath; 882 883 err = -EBUSY; 884 if (!ovl_inuse_trylock(upperpath.dentry)) { 885 pr_err("overlayfs: upperdir is in-use by another mount\n"); 886 goto out_put_upperpath; 887 } 888 889 err = ovl_mount_dir(ufs->config.workdir, &workpath); 890 if (err) 891 goto out_unlock_upperdentry; 892 893 err = -EINVAL; 894 if (upperpath.mnt != workpath.mnt) { 895 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 896 goto out_put_workpath; 897 } 898 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 899 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 900 goto out_put_workpath; 901 } 902 903 err = -EBUSY; 904 if (!ovl_inuse_trylock(workpath.dentry)) { 905 pr_err("overlayfs: workdir is in-use by another mount\n"); 906 goto out_put_workpath; 907 } 908 909 ufs->workbasedir = workpath.dentry; 910 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 911 } 912 err = -ENOMEM; 913 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 914 if (!lowertmp) 915 goto out_unlock_workdentry; 916 917 err = -EINVAL; 918 stacklen = ovl_split_lowerdirs(lowertmp); 919 if (stacklen > OVL_MAX_STACK) { 920 pr_err("overlayfs: too many lower directories, limit is %d\n", 921 OVL_MAX_STACK); 922 goto out_free_lowertmp; 923 } else if (!ufs->config.upperdir && stacklen == 1) { 924 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 925 goto out_free_lowertmp; 926 } 927 928 err = -ENOMEM; 929 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 930 if (!stack) 931 goto out_free_lowertmp; 932 933 err = -EINVAL; 934 lower = lowertmp; 935 for (numlower = 0; numlower < stacklen; numlower++) { 936 err = ovl_lower_dir(lower, &stack[numlower], ufs, 937 &sb->s_stack_depth, &remote); 938 if (err) 939 goto out_put_lowerpath; 940 941 lower = strchr(lower, '\0') + 1; 942 } 943 944 err = -EINVAL; 945 sb->s_stack_depth++; 946 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 947 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 948 goto out_put_lowerpath; 949 } 950 951 if (ufs->config.upperdir) { 952 ufs->upper_mnt = clone_private_mount(&upperpath); 953 err = PTR_ERR(ufs->upper_mnt); 954 if (IS_ERR(ufs->upper_mnt)) { 955 pr_err("overlayfs: failed to clone upperpath\n"); 956 goto out_put_lowerpath; 957 } 958 959 /* Don't inherit atime flags */ 960 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 961 962 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 963 964 ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, 965 OVL_WORKDIR_NAME, false); 966 /* 967 * Upper should support d_type, else whiteouts are visible. 968 * Given workdir and upper are on same fs, we can do 969 * iterate_dir() on workdir. This check requires successful 970 * creation of workdir in previous step. 971 */ 972 if (ufs->workdir) { 973 struct dentry *temp; 974 975 err = ovl_check_d_type_supported(&workpath); 976 if (err < 0) 977 goto out_put_workdir; 978 979 /* 980 * We allowed this configuration and don't want to 981 * break users over kernel upgrade. So warn instead 982 * of erroring out. 983 */ 984 if (!err) 985 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 986 987 /* Check if upper/work fs supports O_TMPFILE */ 988 temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); 989 ufs->tmpfile = !IS_ERR(temp); 990 if (ufs->tmpfile) 991 dput(temp); 992 else 993 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 994 995 /* 996 * Check if upper/work fs supports trusted.overlay.* 997 * xattr 998 */ 999 err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, 1000 "0", 1, 0); 1001 if (err) { 1002 ufs->noxattr = true; 1003 pr_warn("overlayfs: upper fs does not support xattr.\n"); 1004 } else { 1005 vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); 1006 } 1007 1008 /* Check if upper/work fs supports file handles */ 1009 if (ufs->config.index && 1010 !ovl_can_decode_fh(ufs->workdir->d_sb)) { 1011 ufs->config.index = false; 1012 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1013 } 1014 } 1015 } 1016 1017 err = -ENOMEM; 1018 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1019 if (ufs->lower_mnt == NULL) 1020 goto out_put_workdir; 1021 for (i = 0; i < numlower; i++) { 1022 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1023 1024 err = PTR_ERR(mnt); 1025 if (IS_ERR(mnt)) { 1026 pr_err("overlayfs: failed to clone lowerpath\n"); 1027 goto out_put_lower_mnt; 1028 } 1029 /* 1030 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1031 * will fail instead of modifying lower fs. 1032 */ 1033 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1034 1035 ufs->lower_mnt[ufs->numlower] = mnt; 1036 ufs->numlower++; 1037 1038 /* Check if all lower layers are on same sb */ 1039 if (i == 0) 1040 ufs->same_sb = mnt->mnt_sb; 1041 else if (ufs->same_sb != mnt->mnt_sb) 1042 ufs->same_sb = NULL; 1043 } 1044 1045 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1046 if (!ufs->upper_mnt) 1047 sb->s_flags |= MS_RDONLY; 1048 else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) 1049 ufs->same_sb = NULL; 1050 1051 if (!(ovl_force_readonly(ufs)) && ufs->config.index) { 1052 /* Verify lower root is upper root origin */ 1053 err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], 1054 stack[0].dentry, false, true); 1055 if (err) { 1056 pr_err("overlayfs: failed to verify upper root origin\n"); 1057 goto out_put_lower_mnt; 1058 } 1059 1060 ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, 1061 OVL_INDEXDIR_NAME, true); 1062 if (ufs->indexdir) { 1063 /* Verify upper root is index dir origin */ 1064 err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, 1065 upperpath.dentry, true, true); 1066 if (err) 1067 pr_err("overlayfs: failed to verify index dir origin\n"); 1068 1069 /* Cleanup bad/stale/orphan index entries */ 1070 if (!err) 1071 err = ovl_indexdir_cleanup(ufs->indexdir, 1072 ufs->upper_mnt, 1073 stack, numlower); 1074 } 1075 if (err || !ufs->indexdir) 1076 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1077 if (err) 1078 goto out_put_indexdir; 1079 } 1080 1081 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1082 if (!ufs->indexdir) 1083 ufs->config.index = false; 1084 1085 if (remote) 1086 sb->s_d_op = &ovl_reval_dentry_operations; 1087 else 1088 sb->s_d_op = &ovl_dentry_operations; 1089 1090 err = -ENOMEM; 1091 ufs->creator_cred = cred = prepare_creds(); 1092 if (!cred) 1093 goto out_put_indexdir; 1094 1095 /* Never override disk quota limits or use reserved space */ 1096 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1097 1098 err = -ENOMEM; 1099 oe = ovl_alloc_entry(numlower); 1100 if (!oe) 1101 goto out_put_cred; 1102 1103 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1104 sb->s_op = &ovl_super_operations; 1105 sb->s_xattr = ovl_xattr_handlers; 1106 sb->s_fs_info = ufs; 1107 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1108 1109 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1110 if (!root_dentry) 1111 goto out_free_oe; 1112 1113 mntput(upperpath.mnt); 1114 for (i = 0; i < numlower; i++) 1115 mntput(stack[i].mnt); 1116 mntput(workpath.mnt); 1117 kfree(lowertmp); 1118 1119 if (upperpath.dentry) { 1120 oe->has_upper = true; 1121 if (ovl_is_impuredir(upperpath.dentry)) 1122 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1123 } 1124 for (i = 0; i < numlower; i++) { 1125 oe->lowerstack[i].dentry = stack[i].dentry; 1126 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1127 } 1128 kfree(stack); 1129 1130 root_dentry->d_fsdata = oe; 1131 1132 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1133 ovl_dentry_lower(root_dentry)); 1134 1135 sb->s_root = root_dentry; 1136 1137 return 0; 1138 1139 out_free_oe: 1140 kfree(oe); 1141 out_put_cred: 1142 put_cred(ufs->creator_cred); 1143 out_put_indexdir: 1144 dput(ufs->indexdir); 1145 out_put_lower_mnt: 1146 for (i = 0; i < ufs->numlower; i++) 1147 mntput(ufs->lower_mnt[i]); 1148 kfree(ufs->lower_mnt); 1149 out_put_workdir: 1150 dput(ufs->workdir); 1151 mntput(ufs->upper_mnt); 1152 out_put_lowerpath: 1153 for (i = 0; i < numlower; i++) 1154 path_put(&stack[i]); 1155 kfree(stack); 1156 out_free_lowertmp: 1157 kfree(lowertmp); 1158 out_unlock_workdentry: 1159 ovl_inuse_unlock(workpath.dentry); 1160 out_put_workpath: 1161 path_put(&workpath); 1162 out_unlock_upperdentry: 1163 ovl_inuse_unlock(upperpath.dentry); 1164 out_put_upperpath: 1165 path_put(&upperpath); 1166 out_free_config: 1167 kfree(ufs->config.lowerdir); 1168 kfree(ufs->config.upperdir); 1169 kfree(ufs->config.workdir); 1170 kfree(ufs); 1171 out: 1172 return err; 1173 } 1174 1175 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1176 const char *dev_name, void *raw_data) 1177 { 1178 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1179 } 1180 1181 static struct file_system_type ovl_fs_type = { 1182 .owner = THIS_MODULE, 1183 .name = "overlay", 1184 .mount = ovl_mount, 1185 .kill_sb = kill_anon_super, 1186 }; 1187 MODULE_ALIAS_FS("overlay"); 1188 1189 static void ovl_inode_init_once(void *foo) 1190 { 1191 struct ovl_inode *oi = foo; 1192 1193 inode_init_once(&oi->vfs_inode); 1194 } 1195 1196 static int __init ovl_init(void) 1197 { 1198 int err; 1199 1200 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1201 sizeof(struct ovl_inode), 0, 1202 (SLAB_RECLAIM_ACCOUNT| 1203 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1204 ovl_inode_init_once); 1205 if (ovl_inode_cachep == NULL) 1206 return -ENOMEM; 1207 1208 err = register_filesystem(&ovl_fs_type); 1209 if (err) 1210 kmem_cache_destroy(ovl_inode_cachep); 1211 1212 return err; 1213 } 1214 1215 static void __exit ovl_exit(void) 1216 { 1217 unregister_filesystem(&ovl_fs_type); 1218 1219 /* 1220 * Make sure all delayed rcu free inodes are flushed before we 1221 * destroy cache. 1222 */ 1223 rcu_barrier(); 1224 kmem_cache_destroy(ovl_inode_cachep); 1225 1226 } 1227 1228 module_init(ovl_init); 1229 module_exit(ovl_exit); 1230