1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 #include "ovl_entry.h" 22 23 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24 MODULE_DESCRIPTION("Overlay filesystem"); 25 MODULE_LICENSE("GPL"); 26 27 28 struct ovl_dir_cache; 29 30 #define OVL_MAX_STACK 500 31 32 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 33 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 34 MODULE_PARM_DESC(ovl_redirect_dir_def, 35 "Default to on or off for the redirect_dir feature"); 36 37 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 38 module_param_named(index, ovl_index_def, bool, 0644); 39 MODULE_PARM_DESC(ovl_index_def, 40 "Default to on or off for the inodes index feature"); 41 42 static void ovl_dentry_release(struct dentry *dentry) 43 { 44 struct ovl_entry *oe = dentry->d_fsdata; 45 46 if (oe) { 47 unsigned int i; 48 49 for (i = 0; i < oe->numlower; i++) 50 dput(oe->lowerstack[i].dentry); 51 kfree_rcu(oe, rcu); 52 } 53 } 54 55 static int ovl_check_append_only(struct inode *inode, int flag) 56 { 57 /* 58 * This test was moot in vfs may_open() because overlay inode does 59 * not have the S_APPEND flag, so re-check on real upper inode 60 */ 61 if (IS_APPEND(inode)) { 62 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 63 return -EPERM; 64 if (flag & O_TRUNC) 65 return -EPERM; 66 } 67 68 return 0; 69 } 70 71 static struct dentry *ovl_d_real(struct dentry *dentry, 72 const struct inode *inode, 73 unsigned int open_flags, unsigned int flags) 74 { 75 struct dentry *real; 76 int err; 77 78 if (flags & D_REAL_UPPER) 79 return ovl_dentry_upper(dentry); 80 81 if (!d_is_reg(dentry)) { 82 if (!inode || inode == d_inode(dentry)) 83 return dentry; 84 goto bug; 85 } 86 87 if (open_flags) { 88 err = ovl_open_maybe_copy_up(dentry, open_flags); 89 if (err) 90 return ERR_PTR(err); 91 } 92 93 real = ovl_dentry_upper(dentry); 94 if (real && (!inode || inode == d_inode(real))) { 95 if (!inode) { 96 err = ovl_check_append_only(d_inode(real), open_flags); 97 if (err) 98 return ERR_PTR(err); 99 } 100 return real; 101 } 102 103 real = ovl_dentry_lower(dentry); 104 if (!real) 105 goto bug; 106 107 /* Handle recursion */ 108 real = d_real(real, inode, open_flags, 0); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112 bug: 113 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 114 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 115 return dentry; 116 } 117 118 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 119 { 120 struct ovl_entry *oe = dentry->d_fsdata; 121 unsigned int i; 122 int ret = 1; 123 124 for (i = 0; i < oe->numlower; i++) { 125 struct dentry *d = oe->lowerstack[i].dentry; 126 127 if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (ret < 0) 130 return ret; 131 if (!ret) { 132 if (!(flags & LOOKUP_RCU)) 133 d_invalidate(d); 134 return -ESTALE; 135 } 136 } 137 } 138 return 1; 139 } 140 141 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 142 { 143 struct ovl_entry *oe = dentry->d_fsdata; 144 unsigned int i; 145 int ret = 1; 146 147 for (i = 0; i < oe->numlower; i++) { 148 struct dentry *d = oe->lowerstack[i].dentry; 149 150 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 151 ret = d->d_op->d_weak_revalidate(d, flags); 152 if (ret <= 0) 153 break; 154 } 155 } 156 return ret; 157 } 158 159 static const struct dentry_operations ovl_dentry_operations = { 160 .d_release = ovl_dentry_release, 161 .d_real = ovl_d_real, 162 }; 163 164 static const struct dentry_operations ovl_reval_dentry_operations = { 165 .d_release = ovl_dentry_release, 166 .d_real = ovl_d_real, 167 .d_revalidate = ovl_dentry_revalidate, 168 .d_weak_revalidate = ovl_dentry_weak_revalidate, 169 }; 170 171 static struct kmem_cache *ovl_inode_cachep; 172 173 static struct inode *ovl_alloc_inode(struct super_block *sb) 174 { 175 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 176 177 if (!oi) 178 return NULL; 179 180 oi->cache = NULL; 181 oi->redirect = NULL; 182 oi->version = 0; 183 oi->flags = 0; 184 oi->__upperdentry = NULL; 185 oi->lower = NULL; 186 mutex_init(&oi->lock); 187 188 return &oi->vfs_inode; 189 } 190 191 static void ovl_i_callback(struct rcu_head *head) 192 { 193 struct inode *inode = container_of(head, struct inode, i_rcu); 194 195 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 196 } 197 198 static void ovl_destroy_inode(struct inode *inode) 199 { 200 struct ovl_inode *oi = OVL_I(inode); 201 202 dput(oi->__upperdentry); 203 kfree(oi->redirect); 204 ovl_dir_cache_free(inode); 205 mutex_destroy(&oi->lock); 206 207 call_rcu(&inode->i_rcu, ovl_i_callback); 208 } 209 210 static void ovl_put_super(struct super_block *sb) 211 { 212 struct ovl_fs *ufs = sb->s_fs_info; 213 unsigned i; 214 215 dput(ufs->indexdir); 216 dput(ufs->workdir); 217 if (ufs->workdir_locked) 218 ovl_inuse_unlock(ufs->workbasedir); 219 dput(ufs->workbasedir); 220 if (ufs->upper_mnt && ufs->upperdir_locked) 221 ovl_inuse_unlock(ufs->upper_mnt->mnt_root); 222 mntput(ufs->upper_mnt); 223 for (i = 0; i < ufs->numlower; i++) 224 mntput(ufs->lower_mnt[i]); 225 kfree(ufs->lower_mnt); 226 227 kfree(ufs->config.lowerdir); 228 kfree(ufs->config.upperdir); 229 kfree(ufs->config.workdir); 230 put_cred(ufs->creator_cred); 231 kfree(ufs); 232 } 233 234 static int ovl_sync_fs(struct super_block *sb, int wait) 235 { 236 struct ovl_fs *ufs = sb->s_fs_info; 237 struct super_block *upper_sb; 238 int ret; 239 240 if (!ufs->upper_mnt) 241 return 0; 242 upper_sb = ufs->upper_mnt->mnt_sb; 243 if (!upper_sb->s_op->sync_fs) 244 return 0; 245 246 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 247 down_read(&upper_sb->s_umount); 248 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 249 up_read(&upper_sb->s_umount); 250 return ret; 251 } 252 253 /** 254 * ovl_statfs 255 * @sb: The overlayfs super block 256 * @buf: The struct kstatfs to fill in with stats 257 * 258 * Get the filesystem statistics. As writes always target the upper layer 259 * filesystem pass the statfs to the upper filesystem (if it exists) 260 */ 261 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 262 { 263 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 264 struct dentry *root_dentry = dentry->d_sb->s_root; 265 struct path path; 266 int err; 267 268 ovl_path_real(root_dentry, &path); 269 270 err = vfs_statfs(&path, buf); 271 if (!err) { 272 buf->f_namelen = ofs->namelen; 273 buf->f_type = OVERLAYFS_SUPER_MAGIC; 274 } 275 276 return err; 277 } 278 279 /* Will this overlay be forced to mount/remount ro? */ 280 static bool ovl_force_readonly(struct ovl_fs *ufs) 281 { 282 return (!ufs->upper_mnt || !ufs->workdir); 283 } 284 285 /** 286 * ovl_show_options 287 * 288 * Prints the mount options for a given superblock. 289 * Returns zero; does not fail. 290 */ 291 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 292 { 293 struct super_block *sb = dentry->d_sb; 294 struct ovl_fs *ufs = sb->s_fs_info; 295 296 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 297 if (ufs->config.upperdir) { 298 seq_show_option(m, "upperdir", ufs->config.upperdir); 299 seq_show_option(m, "workdir", ufs->config.workdir); 300 } 301 if (ufs->config.default_permissions) 302 seq_puts(m, ",default_permissions"); 303 if (ufs->config.redirect_dir != ovl_redirect_dir_def) 304 seq_printf(m, ",redirect_dir=%s", 305 ufs->config.redirect_dir ? "on" : "off"); 306 if (ufs->config.index != ovl_index_def) 307 seq_printf(m, ",index=%s", 308 ufs->config.index ? "on" : "off"); 309 return 0; 310 } 311 312 static int ovl_remount(struct super_block *sb, int *flags, char *data) 313 { 314 struct ovl_fs *ufs = sb->s_fs_info; 315 316 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ufs)) 317 return -EROFS; 318 319 return 0; 320 } 321 322 static const struct super_operations ovl_super_operations = { 323 .alloc_inode = ovl_alloc_inode, 324 .destroy_inode = ovl_destroy_inode, 325 .drop_inode = generic_delete_inode, 326 .put_super = ovl_put_super, 327 .sync_fs = ovl_sync_fs, 328 .statfs = ovl_statfs, 329 .show_options = ovl_show_options, 330 .remount_fs = ovl_remount, 331 }; 332 333 enum { 334 OPT_LOWERDIR, 335 OPT_UPPERDIR, 336 OPT_WORKDIR, 337 OPT_DEFAULT_PERMISSIONS, 338 OPT_REDIRECT_DIR_ON, 339 OPT_REDIRECT_DIR_OFF, 340 OPT_INDEX_ON, 341 OPT_INDEX_OFF, 342 OPT_ERR, 343 }; 344 345 static const match_table_t ovl_tokens = { 346 {OPT_LOWERDIR, "lowerdir=%s"}, 347 {OPT_UPPERDIR, "upperdir=%s"}, 348 {OPT_WORKDIR, "workdir=%s"}, 349 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 350 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 351 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 352 {OPT_INDEX_ON, "index=on"}, 353 {OPT_INDEX_OFF, "index=off"}, 354 {OPT_ERR, NULL} 355 }; 356 357 static char *ovl_next_opt(char **s) 358 { 359 char *sbegin = *s; 360 char *p; 361 362 if (sbegin == NULL) 363 return NULL; 364 365 for (p = sbegin; *p; p++) { 366 if (*p == '\\') { 367 p++; 368 if (!*p) 369 break; 370 } else if (*p == ',') { 371 *p = '\0'; 372 *s = p + 1; 373 return sbegin; 374 } 375 } 376 *s = NULL; 377 return sbegin; 378 } 379 380 static int ovl_parse_opt(char *opt, struct ovl_config *config) 381 { 382 char *p; 383 384 while ((p = ovl_next_opt(&opt)) != NULL) { 385 int token; 386 substring_t args[MAX_OPT_ARGS]; 387 388 if (!*p) 389 continue; 390 391 token = match_token(p, ovl_tokens, args); 392 switch (token) { 393 case OPT_UPPERDIR: 394 kfree(config->upperdir); 395 config->upperdir = match_strdup(&args[0]); 396 if (!config->upperdir) 397 return -ENOMEM; 398 break; 399 400 case OPT_LOWERDIR: 401 kfree(config->lowerdir); 402 config->lowerdir = match_strdup(&args[0]); 403 if (!config->lowerdir) 404 return -ENOMEM; 405 break; 406 407 case OPT_WORKDIR: 408 kfree(config->workdir); 409 config->workdir = match_strdup(&args[0]); 410 if (!config->workdir) 411 return -ENOMEM; 412 break; 413 414 case OPT_DEFAULT_PERMISSIONS: 415 config->default_permissions = true; 416 break; 417 418 case OPT_REDIRECT_DIR_ON: 419 config->redirect_dir = true; 420 break; 421 422 case OPT_REDIRECT_DIR_OFF: 423 config->redirect_dir = false; 424 break; 425 426 case OPT_INDEX_ON: 427 config->index = true; 428 break; 429 430 case OPT_INDEX_OFF: 431 config->index = false; 432 break; 433 434 default: 435 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 436 return -EINVAL; 437 } 438 } 439 440 /* Workdir is useless in non-upper mount */ 441 if (!config->upperdir && config->workdir) { 442 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 443 config->workdir); 444 kfree(config->workdir); 445 config->workdir = NULL; 446 } 447 448 return 0; 449 } 450 451 #define OVL_WORKDIR_NAME "work" 452 #define OVL_INDEXDIR_NAME "index" 453 454 static struct dentry *ovl_workdir_create(struct super_block *sb, 455 struct ovl_fs *ufs, 456 struct dentry *dentry, 457 const char *name, bool persist) 458 { 459 struct inode *dir = dentry->d_inode; 460 struct vfsmount *mnt = ufs->upper_mnt; 461 struct dentry *work; 462 int err; 463 bool retried = false; 464 bool locked = false; 465 466 err = mnt_want_write(mnt); 467 if (err) 468 goto out_err; 469 470 inode_lock_nested(dir, I_MUTEX_PARENT); 471 locked = true; 472 473 retry: 474 work = lookup_one_len(name, dentry, strlen(name)); 475 476 if (!IS_ERR(work)) { 477 struct iattr attr = { 478 .ia_valid = ATTR_MODE, 479 .ia_mode = S_IFDIR | 0, 480 }; 481 482 if (work->d_inode) { 483 err = -EEXIST; 484 if (retried) 485 goto out_dput; 486 487 if (persist) 488 goto out_unlock; 489 490 retried = true; 491 ovl_workdir_cleanup(dir, mnt, work, 0); 492 dput(work); 493 goto retry; 494 } 495 496 err = ovl_create_real(dir, work, 497 &(struct cattr){.mode = S_IFDIR | 0}, 498 NULL, true); 499 if (err) 500 goto out_dput; 501 502 /* 503 * Try to remove POSIX ACL xattrs from workdir. We are good if: 504 * 505 * a) success (there was a POSIX ACL xattr and was removed) 506 * b) -ENODATA (there was no POSIX ACL xattr) 507 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 508 * 509 * There are various other error values that could effectively 510 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 511 * if the xattr name is too long), but the set of filesystems 512 * allowed as upper are limited to "normal" ones, where checking 513 * for the above two errors is sufficient. 514 */ 515 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 516 if (err && err != -ENODATA && err != -EOPNOTSUPP) 517 goto out_dput; 518 519 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 520 if (err && err != -ENODATA && err != -EOPNOTSUPP) 521 goto out_dput; 522 523 /* Clear any inherited mode bits */ 524 inode_lock(work->d_inode); 525 err = notify_change(work, &attr, NULL); 526 inode_unlock(work->d_inode); 527 if (err) 528 goto out_dput; 529 } else { 530 err = PTR_ERR(work); 531 goto out_err; 532 } 533 out_unlock: 534 mnt_drop_write(mnt); 535 if (locked) 536 inode_unlock(dir); 537 538 return work; 539 540 out_dput: 541 dput(work); 542 out_err: 543 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 544 ufs->config.workdir, name, -err); 545 sb->s_flags |= MS_RDONLY; 546 work = NULL; 547 goto out_unlock; 548 } 549 550 static void ovl_unescape(char *s) 551 { 552 char *d = s; 553 554 for (;; s++, d++) { 555 if (*s == '\\') 556 s++; 557 *d = *s; 558 if (!*s) 559 break; 560 } 561 } 562 563 static int ovl_mount_dir_noesc(const char *name, struct path *path) 564 { 565 int err = -EINVAL; 566 567 if (!*name) { 568 pr_err("overlayfs: empty lowerdir\n"); 569 goto out; 570 } 571 err = kern_path(name, LOOKUP_FOLLOW, path); 572 if (err) { 573 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 574 goto out; 575 } 576 err = -EINVAL; 577 if (ovl_dentry_weird(path->dentry)) { 578 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 579 goto out_put; 580 } 581 if (!d_is_dir(path->dentry)) { 582 pr_err("overlayfs: '%s' not a directory\n", name); 583 goto out_put; 584 } 585 return 0; 586 587 out_put: 588 path_put(path); 589 out: 590 return err; 591 } 592 593 static int ovl_mount_dir(const char *name, struct path *path) 594 { 595 int err = -ENOMEM; 596 char *tmp = kstrdup(name, GFP_KERNEL); 597 598 if (tmp) { 599 ovl_unescape(tmp); 600 err = ovl_mount_dir_noesc(tmp, path); 601 602 if (!err) 603 if (ovl_dentry_remote(path->dentry)) { 604 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 605 tmp); 606 path_put(path); 607 err = -EINVAL; 608 } 609 kfree(tmp); 610 } 611 return err; 612 } 613 614 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 615 const char *name) 616 { 617 struct kstatfs statfs; 618 int err = vfs_statfs(path, &statfs); 619 620 if (err) 621 pr_err("overlayfs: statfs failed on '%s'\n", name); 622 else 623 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 624 625 return err; 626 } 627 628 static int ovl_lower_dir(const char *name, struct path *path, 629 struct ovl_fs *ofs, int *stack_depth, bool *remote) 630 { 631 int err; 632 633 err = ovl_mount_dir_noesc(name, path); 634 if (err) 635 goto out; 636 637 err = ovl_check_namelen(path, ofs, name); 638 if (err) 639 goto out_put; 640 641 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 642 643 if (ovl_dentry_remote(path->dentry)) 644 *remote = true; 645 646 /* 647 * The inodes index feature needs to encode and decode file 648 * handles, so it requires that all layers support them. 649 */ 650 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 651 ofs->config.index = false; 652 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 653 } 654 655 return 0; 656 657 out_put: 658 path_put(path); 659 out: 660 return err; 661 } 662 663 /* Workdir should not be subdir of upperdir and vice versa */ 664 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 665 { 666 bool ok = false; 667 668 if (workdir != upperdir) { 669 ok = (lock_rename(workdir, upperdir) == NULL); 670 unlock_rename(workdir, upperdir); 671 } 672 return ok; 673 } 674 675 static unsigned int ovl_split_lowerdirs(char *str) 676 { 677 unsigned int ctr = 1; 678 char *s, *d; 679 680 for (s = d = str;; s++, d++) { 681 if (*s == '\\') { 682 s++; 683 } else if (*s == ':') { 684 *d = '\0'; 685 ctr++; 686 continue; 687 } 688 *d = *s; 689 if (!*s) 690 break; 691 } 692 return ctr; 693 } 694 695 static int __maybe_unused 696 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 697 struct dentry *dentry, struct inode *inode, 698 const char *name, void *buffer, size_t size) 699 { 700 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 701 } 702 703 static int __maybe_unused 704 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 705 struct dentry *dentry, struct inode *inode, 706 const char *name, const void *value, 707 size_t size, int flags) 708 { 709 struct dentry *workdir = ovl_workdir(dentry); 710 struct inode *realinode = ovl_inode_real(inode); 711 struct posix_acl *acl = NULL; 712 int err; 713 714 /* Check that everything is OK before copy-up */ 715 if (value) { 716 acl = posix_acl_from_xattr(&init_user_ns, value, size); 717 if (IS_ERR(acl)) 718 return PTR_ERR(acl); 719 } 720 err = -EOPNOTSUPP; 721 if (!IS_POSIXACL(d_inode(workdir))) 722 goto out_acl_release; 723 if (!realinode->i_op->set_acl) 724 goto out_acl_release; 725 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 726 err = acl ? -EACCES : 0; 727 goto out_acl_release; 728 } 729 err = -EPERM; 730 if (!inode_owner_or_capable(inode)) 731 goto out_acl_release; 732 733 posix_acl_release(acl); 734 735 /* 736 * Check if sgid bit needs to be cleared (actual setacl operation will 737 * be done with mounter's capabilities and so that won't do it for us). 738 */ 739 if (unlikely(inode->i_mode & S_ISGID) && 740 handler->flags == ACL_TYPE_ACCESS && 741 !in_group_p(inode->i_gid) && 742 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 743 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 744 745 err = ovl_setattr(dentry, &iattr); 746 if (err) 747 return err; 748 } 749 750 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 751 if (!err) 752 ovl_copyattr(ovl_inode_real(inode), inode); 753 754 return err; 755 756 out_acl_release: 757 posix_acl_release(acl); 758 return err; 759 } 760 761 static int ovl_own_xattr_get(const struct xattr_handler *handler, 762 struct dentry *dentry, struct inode *inode, 763 const char *name, void *buffer, size_t size) 764 { 765 return -EOPNOTSUPP; 766 } 767 768 static int ovl_own_xattr_set(const struct xattr_handler *handler, 769 struct dentry *dentry, struct inode *inode, 770 const char *name, const void *value, 771 size_t size, int flags) 772 { 773 return -EOPNOTSUPP; 774 } 775 776 static int ovl_other_xattr_get(const struct xattr_handler *handler, 777 struct dentry *dentry, struct inode *inode, 778 const char *name, void *buffer, size_t size) 779 { 780 return ovl_xattr_get(dentry, inode, name, buffer, size); 781 } 782 783 static int ovl_other_xattr_set(const struct xattr_handler *handler, 784 struct dentry *dentry, struct inode *inode, 785 const char *name, const void *value, 786 size_t size, int flags) 787 { 788 return ovl_xattr_set(dentry, inode, name, value, size, flags); 789 } 790 791 static const struct xattr_handler __maybe_unused 792 ovl_posix_acl_access_xattr_handler = { 793 .name = XATTR_NAME_POSIX_ACL_ACCESS, 794 .flags = ACL_TYPE_ACCESS, 795 .get = ovl_posix_acl_xattr_get, 796 .set = ovl_posix_acl_xattr_set, 797 }; 798 799 static const struct xattr_handler __maybe_unused 800 ovl_posix_acl_default_xattr_handler = { 801 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 802 .flags = ACL_TYPE_DEFAULT, 803 .get = ovl_posix_acl_xattr_get, 804 .set = ovl_posix_acl_xattr_set, 805 }; 806 807 static const struct xattr_handler ovl_own_xattr_handler = { 808 .prefix = OVL_XATTR_PREFIX, 809 .get = ovl_own_xattr_get, 810 .set = ovl_own_xattr_set, 811 }; 812 813 static const struct xattr_handler ovl_other_xattr_handler = { 814 .prefix = "", /* catch all */ 815 .get = ovl_other_xattr_get, 816 .set = ovl_other_xattr_set, 817 }; 818 819 static const struct xattr_handler *ovl_xattr_handlers[] = { 820 #ifdef CONFIG_FS_POSIX_ACL 821 &ovl_posix_acl_access_xattr_handler, 822 &ovl_posix_acl_default_xattr_handler, 823 #endif 824 &ovl_own_xattr_handler, 825 &ovl_other_xattr_handler, 826 NULL 827 }; 828 829 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 830 { 831 struct path upperpath = { }; 832 struct path workpath = { }; 833 struct dentry *root_dentry; 834 struct ovl_entry *oe; 835 struct ovl_fs *ufs; 836 struct path *stack = NULL; 837 char *lowertmp; 838 char *lower; 839 unsigned int numlower; 840 unsigned int stacklen = 0; 841 unsigned int i; 842 bool remote = false; 843 struct cred *cred; 844 int err; 845 846 err = -ENOMEM; 847 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 848 if (!ufs) 849 goto out; 850 851 ufs->config.redirect_dir = ovl_redirect_dir_def; 852 ufs->config.index = ovl_index_def; 853 err = ovl_parse_opt((char *) data, &ufs->config); 854 if (err) 855 goto out_free_config; 856 857 err = -EINVAL; 858 if (!ufs->config.lowerdir) { 859 if (!silent) 860 pr_err("overlayfs: missing 'lowerdir'\n"); 861 goto out_free_config; 862 } 863 864 sb->s_stack_depth = 0; 865 sb->s_maxbytes = MAX_LFS_FILESIZE; 866 if (ufs->config.upperdir) { 867 if (!ufs->config.workdir) { 868 pr_err("overlayfs: missing 'workdir'\n"); 869 goto out_free_config; 870 } 871 872 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 873 if (err) 874 goto out_free_config; 875 876 /* Upper fs should not be r/o */ 877 if (sb_rdonly(upperpath.mnt->mnt_sb)) { 878 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 879 err = -EINVAL; 880 goto out_put_upperpath; 881 } 882 883 err = ovl_check_namelen(&upperpath, ufs, ufs->config.upperdir); 884 if (err) 885 goto out_put_upperpath; 886 887 err = -EBUSY; 888 if (ovl_inuse_trylock(upperpath.dentry)) { 889 ufs->upperdir_locked = true; 890 } else if (ufs->config.index) { 891 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); 892 goto out_put_upperpath; 893 } else { 894 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 895 } 896 897 err = ovl_mount_dir(ufs->config.workdir, &workpath); 898 if (err) 899 goto out_unlock_upperdentry; 900 901 err = -EINVAL; 902 if (upperpath.mnt != workpath.mnt) { 903 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 904 goto out_put_workpath; 905 } 906 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 907 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 908 goto out_put_workpath; 909 } 910 911 err = -EBUSY; 912 if (ovl_inuse_trylock(workpath.dentry)) { 913 ufs->workdir_locked = true; 914 } else if (ufs->config.index) { 915 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); 916 goto out_put_workpath; 917 } else { 918 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 919 } 920 921 ufs->workbasedir = workpath.dentry; 922 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 923 } 924 err = -ENOMEM; 925 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 926 if (!lowertmp) 927 goto out_unlock_workdentry; 928 929 err = -EINVAL; 930 stacklen = ovl_split_lowerdirs(lowertmp); 931 if (stacklen > OVL_MAX_STACK) { 932 pr_err("overlayfs: too many lower directories, limit is %d\n", 933 OVL_MAX_STACK); 934 goto out_free_lowertmp; 935 } else if (!ufs->config.upperdir && stacklen == 1) { 936 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 937 goto out_free_lowertmp; 938 } 939 940 err = -ENOMEM; 941 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 942 if (!stack) 943 goto out_free_lowertmp; 944 945 err = -EINVAL; 946 lower = lowertmp; 947 for (numlower = 0; numlower < stacklen; numlower++) { 948 err = ovl_lower_dir(lower, &stack[numlower], ufs, 949 &sb->s_stack_depth, &remote); 950 if (err) 951 goto out_put_lowerpath; 952 953 lower = strchr(lower, '\0') + 1; 954 } 955 956 err = -EINVAL; 957 sb->s_stack_depth++; 958 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 959 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 960 goto out_put_lowerpath; 961 } 962 963 if (ufs->config.upperdir) { 964 ufs->upper_mnt = clone_private_mount(&upperpath); 965 err = PTR_ERR(ufs->upper_mnt); 966 if (IS_ERR(ufs->upper_mnt)) { 967 pr_err("overlayfs: failed to clone upperpath\n"); 968 goto out_put_lowerpath; 969 } 970 971 /* Don't inherit atime flags */ 972 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 973 974 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 975 976 ufs->workdir = ovl_workdir_create(sb, ufs, workpath.dentry, 977 OVL_WORKDIR_NAME, false); 978 /* 979 * Upper should support d_type, else whiteouts are visible. 980 * Given workdir and upper are on same fs, we can do 981 * iterate_dir() on workdir. This check requires successful 982 * creation of workdir in previous step. 983 */ 984 if (ufs->workdir) { 985 struct dentry *temp; 986 987 err = ovl_check_d_type_supported(&workpath); 988 if (err < 0) 989 goto out_put_workdir; 990 991 /* 992 * We allowed this configuration and don't want to 993 * break users over kernel upgrade. So warn instead 994 * of erroring out. 995 */ 996 if (!err) 997 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 998 999 /* Check if upper/work fs supports O_TMPFILE */ 1000 temp = ovl_do_tmpfile(ufs->workdir, S_IFREG | 0); 1001 ufs->tmpfile = !IS_ERR(temp); 1002 if (ufs->tmpfile) 1003 dput(temp); 1004 else 1005 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 1006 1007 /* 1008 * Check if upper/work fs supports trusted.overlay.* 1009 * xattr 1010 */ 1011 err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, 1012 "0", 1, 0); 1013 if (err) { 1014 ufs->noxattr = true; 1015 pr_warn("overlayfs: upper fs does not support xattr.\n"); 1016 } else { 1017 vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); 1018 } 1019 1020 /* Check if upper/work fs supports file handles */ 1021 if (ufs->config.index && 1022 !ovl_can_decode_fh(ufs->workdir->d_sb)) { 1023 ufs->config.index = false; 1024 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1025 } 1026 } 1027 } 1028 1029 err = -ENOMEM; 1030 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1031 if (ufs->lower_mnt == NULL) 1032 goto out_put_workdir; 1033 for (i = 0; i < numlower; i++) { 1034 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1035 1036 err = PTR_ERR(mnt); 1037 if (IS_ERR(mnt)) { 1038 pr_err("overlayfs: failed to clone lowerpath\n"); 1039 goto out_put_lower_mnt; 1040 } 1041 /* 1042 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1043 * will fail instead of modifying lower fs. 1044 */ 1045 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1046 1047 ufs->lower_mnt[ufs->numlower] = mnt; 1048 ufs->numlower++; 1049 1050 /* Check if all lower layers are on same sb */ 1051 if (i == 0) 1052 ufs->same_sb = mnt->mnt_sb; 1053 else if (ufs->same_sb != mnt->mnt_sb) 1054 ufs->same_sb = NULL; 1055 } 1056 1057 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1058 if (!ufs->upper_mnt) 1059 sb->s_flags |= MS_RDONLY; 1060 else if (ufs->upper_mnt->mnt_sb != ufs->same_sb) 1061 ufs->same_sb = NULL; 1062 1063 if (!(ovl_force_readonly(ufs)) && ufs->config.index) { 1064 /* Verify lower root is upper root origin */ 1065 err = ovl_verify_origin(upperpath.dentry, ufs->lower_mnt[0], 1066 stack[0].dentry, false, true); 1067 if (err) { 1068 pr_err("overlayfs: failed to verify upper root origin\n"); 1069 goto out_put_lower_mnt; 1070 } 1071 1072 ufs->indexdir = ovl_workdir_create(sb, ufs, workpath.dentry, 1073 OVL_INDEXDIR_NAME, true); 1074 if (ufs->indexdir) { 1075 /* Verify upper root is index dir origin */ 1076 err = ovl_verify_origin(ufs->indexdir, ufs->upper_mnt, 1077 upperpath.dentry, true, true); 1078 if (err) 1079 pr_err("overlayfs: failed to verify index dir origin\n"); 1080 1081 /* Cleanup bad/stale/orphan index entries */ 1082 if (!err) 1083 err = ovl_indexdir_cleanup(ufs->indexdir, 1084 ufs->upper_mnt, 1085 stack, numlower); 1086 } 1087 if (err || !ufs->indexdir) 1088 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1089 if (err) 1090 goto out_put_indexdir; 1091 } 1092 1093 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1094 if (!ufs->indexdir) 1095 ufs->config.index = false; 1096 1097 if (remote) 1098 sb->s_d_op = &ovl_reval_dentry_operations; 1099 else 1100 sb->s_d_op = &ovl_dentry_operations; 1101 1102 err = -ENOMEM; 1103 ufs->creator_cred = cred = prepare_creds(); 1104 if (!cred) 1105 goto out_put_indexdir; 1106 1107 /* Never override disk quota limits or use reserved space */ 1108 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1109 1110 err = -ENOMEM; 1111 oe = ovl_alloc_entry(numlower); 1112 if (!oe) 1113 goto out_put_cred; 1114 1115 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1116 sb->s_op = &ovl_super_operations; 1117 sb->s_xattr = ovl_xattr_handlers; 1118 sb->s_fs_info = ufs; 1119 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1120 1121 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1122 if (!root_dentry) 1123 goto out_free_oe; 1124 1125 mntput(upperpath.mnt); 1126 for (i = 0; i < numlower; i++) 1127 mntput(stack[i].mnt); 1128 mntput(workpath.mnt); 1129 kfree(lowertmp); 1130 1131 if (upperpath.dentry) { 1132 oe->has_upper = true; 1133 if (ovl_is_impuredir(upperpath.dentry)) 1134 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1135 } 1136 for (i = 0; i < numlower; i++) { 1137 oe->lowerstack[i].dentry = stack[i].dentry; 1138 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1139 } 1140 kfree(stack); 1141 1142 root_dentry->d_fsdata = oe; 1143 1144 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1145 ovl_dentry_lower(root_dentry)); 1146 1147 sb->s_root = root_dentry; 1148 1149 return 0; 1150 1151 out_free_oe: 1152 kfree(oe); 1153 out_put_cred: 1154 put_cred(ufs->creator_cred); 1155 out_put_indexdir: 1156 dput(ufs->indexdir); 1157 out_put_lower_mnt: 1158 for (i = 0; i < ufs->numlower; i++) 1159 mntput(ufs->lower_mnt[i]); 1160 kfree(ufs->lower_mnt); 1161 out_put_workdir: 1162 dput(ufs->workdir); 1163 mntput(ufs->upper_mnt); 1164 out_put_lowerpath: 1165 for (i = 0; i < numlower; i++) 1166 path_put(&stack[i]); 1167 kfree(stack); 1168 out_free_lowertmp: 1169 kfree(lowertmp); 1170 out_unlock_workdentry: 1171 if (ufs->workdir_locked) 1172 ovl_inuse_unlock(workpath.dentry); 1173 out_put_workpath: 1174 path_put(&workpath); 1175 out_unlock_upperdentry: 1176 if (ufs->upperdir_locked) 1177 ovl_inuse_unlock(upperpath.dentry); 1178 out_put_upperpath: 1179 path_put(&upperpath); 1180 out_free_config: 1181 kfree(ufs->config.lowerdir); 1182 kfree(ufs->config.upperdir); 1183 kfree(ufs->config.workdir); 1184 kfree(ufs); 1185 out: 1186 return err; 1187 } 1188 1189 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1190 const char *dev_name, void *raw_data) 1191 { 1192 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1193 } 1194 1195 static struct file_system_type ovl_fs_type = { 1196 .owner = THIS_MODULE, 1197 .name = "overlay", 1198 .mount = ovl_mount, 1199 .kill_sb = kill_anon_super, 1200 }; 1201 MODULE_ALIAS_FS("overlay"); 1202 1203 static void ovl_inode_init_once(void *foo) 1204 { 1205 struct ovl_inode *oi = foo; 1206 1207 inode_init_once(&oi->vfs_inode); 1208 } 1209 1210 static int __init ovl_init(void) 1211 { 1212 int err; 1213 1214 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1215 sizeof(struct ovl_inode), 0, 1216 (SLAB_RECLAIM_ACCOUNT| 1217 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1218 ovl_inode_init_once); 1219 if (ovl_inode_cachep == NULL) 1220 return -ENOMEM; 1221 1222 err = register_filesystem(&ovl_fs_type); 1223 if (err) 1224 kmem_cache_destroy(ovl_inode_cachep); 1225 1226 return err; 1227 } 1228 1229 static void __exit ovl_exit(void) 1230 { 1231 unregister_filesystem(&ovl_fs_type); 1232 1233 /* 1234 * Make sure all delayed rcu free inodes are flushed before we 1235 * destroy cache. 1236 */ 1237 rcu_barrier(); 1238 kmem_cache_destroy(ovl_inode_cachep); 1239 1240 } 1241 1242 module_init(ovl_init); 1243 module_exit(ovl_exit); 1244