1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <uapi/linux/magic.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/xattr.h> 14 #include <linux/mount.h> 15 #include <linux/parser.h> 16 #include <linux/module.h> 17 #include <linux/statfs.h> 18 #include <linux/seq_file.h> 19 #include <linux/posix_acl_xattr.h> 20 #include "overlayfs.h" 21 22 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23 MODULE_DESCRIPTION("Overlay filesystem"); 24 MODULE_LICENSE("GPL"); 25 26 27 struct ovl_dir_cache; 28 29 #define OVL_MAX_STACK 500 30 31 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 32 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 33 MODULE_PARM_DESC(ovl_redirect_dir_def, 34 "Default to on or off for the redirect_dir feature"); 35 36 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 37 module_param_named(index, ovl_index_def, bool, 0644); 38 MODULE_PARM_DESC(ovl_index_def, 39 "Default to on or off for the inodes index feature"); 40 41 static void ovl_entry_stack_free(struct ovl_entry *oe) 42 { 43 unsigned int i; 44 45 for (i = 0; i < oe->numlower; i++) 46 dput(oe->lowerstack[i].dentry); 47 } 48 49 static void ovl_dentry_release(struct dentry *dentry) 50 { 51 struct ovl_entry *oe = dentry->d_fsdata; 52 53 if (oe) { 54 ovl_entry_stack_free(oe); 55 kfree_rcu(oe, rcu); 56 } 57 } 58 59 static int ovl_check_append_only(struct inode *inode, int flag) 60 { 61 /* 62 * This test was moot in vfs may_open() because overlay inode does 63 * not have the S_APPEND flag, so re-check on real upper inode 64 */ 65 if (IS_APPEND(inode)) { 66 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) 67 return -EPERM; 68 if (flag & O_TRUNC) 69 return -EPERM; 70 } 71 72 return 0; 73 } 74 75 static struct dentry *ovl_d_real(struct dentry *dentry, 76 const struct inode *inode, 77 unsigned int open_flags, unsigned int flags) 78 { 79 struct dentry *real; 80 int err; 81 82 if (flags & D_REAL_UPPER) 83 return ovl_dentry_upper(dentry); 84 85 if (!d_is_reg(dentry)) { 86 if (!inode || inode == d_inode(dentry)) 87 return dentry; 88 goto bug; 89 } 90 91 if (open_flags) { 92 err = ovl_open_maybe_copy_up(dentry, open_flags); 93 if (err) 94 return ERR_PTR(err); 95 } 96 97 real = ovl_dentry_upper(dentry); 98 if (real && (!inode || inode == d_inode(real))) { 99 if (!inode) { 100 err = ovl_check_append_only(d_inode(real), open_flags); 101 if (err) 102 return ERR_PTR(err); 103 } 104 return real; 105 } 106 107 real = ovl_dentry_lower(dentry); 108 if (!real) 109 goto bug; 110 111 /* Handle recursion */ 112 real = d_real(real, inode, open_flags, 0); 113 114 if (!inode || inode == d_inode(real)) 115 return real; 116 bug: 117 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 118 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 119 return dentry; 120 } 121 122 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 123 { 124 struct ovl_entry *oe = dentry->d_fsdata; 125 unsigned int i; 126 int ret = 1; 127 128 for (i = 0; i < oe->numlower; i++) { 129 struct dentry *d = oe->lowerstack[i].dentry; 130 131 if (d->d_flags & DCACHE_OP_REVALIDATE) { 132 ret = d->d_op->d_revalidate(d, flags); 133 if (ret < 0) 134 return ret; 135 if (!ret) { 136 if (!(flags & LOOKUP_RCU)) 137 d_invalidate(d); 138 return -ESTALE; 139 } 140 } 141 } 142 return 1; 143 } 144 145 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 146 { 147 struct ovl_entry *oe = dentry->d_fsdata; 148 unsigned int i; 149 int ret = 1; 150 151 for (i = 0; i < oe->numlower; i++) { 152 struct dentry *d = oe->lowerstack[i].dentry; 153 154 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 155 ret = d->d_op->d_weak_revalidate(d, flags); 156 if (ret <= 0) 157 break; 158 } 159 } 160 return ret; 161 } 162 163 static const struct dentry_operations ovl_dentry_operations = { 164 .d_release = ovl_dentry_release, 165 .d_real = ovl_d_real, 166 }; 167 168 static const struct dentry_operations ovl_reval_dentry_operations = { 169 .d_release = ovl_dentry_release, 170 .d_real = ovl_d_real, 171 .d_revalidate = ovl_dentry_revalidate, 172 .d_weak_revalidate = ovl_dentry_weak_revalidate, 173 }; 174 175 static struct kmem_cache *ovl_inode_cachep; 176 177 static struct inode *ovl_alloc_inode(struct super_block *sb) 178 { 179 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 180 181 if (!oi) 182 return NULL; 183 184 oi->cache = NULL; 185 oi->redirect = NULL; 186 oi->version = 0; 187 oi->flags = 0; 188 oi->__upperdentry = NULL; 189 oi->lower = NULL; 190 mutex_init(&oi->lock); 191 192 return &oi->vfs_inode; 193 } 194 195 static void ovl_i_callback(struct rcu_head *head) 196 { 197 struct inode *inode = container_of(head, struct inode, i_rcu); 198 199 kmem_cache_free(ovl_inode_cachep, OVL_I(inode)); 200 } 201 202 static void ovl_destroy_inode(struct inode *inode) 203 { 204 struct ovl_inode *oi = OVL_I(inode); 205 206 dput(oi->__upperdentry); 207 kfree(oi->redirect); 208 ovl_dir_cache_free(inode); 209 mutex_destroy(&oi->lock); 210 211 call_rcu(&inode->i_rcu, ovl_i_callback); 212 } 213 214 static void ovl_free_fs(struct ovl_fs *ofs) 215 { 216 unsigned i; 217 218 dput(ofs->indexdir); 219 dput(ofs->workdir); 220 if (ofs->workdir_locked) 221 ovl_inuse_unlock(ofs->workbasedir); 222 dput(ofs->workbasedir); 223 if (ofs->upperdir_locked) 224 ovl_inuse_unlock(ofs->upper_mnt->mnt_root); 225 mntput(ofs->upper_mnt); 226 for (i = 0; i < ofs->numlower; i++) { 227 mntput(ofs->lower_layers[i].mnt); 228 free_anon_bdev(ofs->lower_layers[i].pseudo_dev); 229 } 230 kfree(ofs->lower_layers); 231 232 kfree(ofs->config.lowerdir); 233 kfree(ofs->config.upperdir); 234 kfree(ofs->config.workdir); 235 if (ofs->creator_cred) 236 put_cred(ofs->creator_cred); 237 kfree(ofs); 238 } 239 240 static void ovl_put_super(struct super_block *sb) 241 { 242 struct ovl_fs *ofs = sb->s_fs_info; 243 244 ovl_free_fs(ofs); 245 } 246 247 static int ovl_sync_fs(struct super_block *sb, int wait) 248 { 249 struct ovl_fs *ofs = sb->s_fs_info; 250 struct super_block *upper_sb; 251 int ret; 252 253 if (!ofs->upper_mnt) 254 return 0; 255 upper_sb = ofs->upper_mnt->mnt_sb; 256 if (!upper_sb->s_op->sync_fs) 257 return 0; 258 259 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 260 down_read(&upper_sb->s_umount); 261 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 262 up_read(&upper_sb->s_umount); 263 return ret; 264 } 265 266 /** 267 * ovl_statfs 268 * @sb: The overlayfs super block 269 * @buf: The struct kstatfs to fill in with stats 270 * 271 * Get the filesystem statistics. As writes always target the upper layer 272 * filesystem pass the statfs to the upper filesystem (if it exists) 273 */ 274 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 275 { 276 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 277 struct dentry *root_dentry = dentry->d_sb->s_root; 278 struct path path; 279 int err; 280 281 ovl_path_real(root_dentry, &path); 282 283 err = vfs_statfs(&path, buf); 284 if (!err) { 285 buf->f_namelen = ofs->namelen; 286 buf->f_type = OVERLAYFS_SUPER_MAGIC; 287 } 288 289 return err; 290 } 291 292 /* Will this overlay be forced to mount/remount ro? */ 293 static bool ovl_force_readonly(struct ovl_fs *ofs) 294 { 295 return (!ofs->upper_mnt || !ofs->workdir); 296 } 297 298 /** 299 * ovl_show_options 300 * 301 * Prints the mount options for a given superblock. 302 * Returns zero; does not fail. 303 */ 304 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 305 { 306 struct super_block *sb = dentry->d_sb; 307 struct ovl_fs *ofs = sb->s_fs_info; 308 309 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 310 if (ofs->config.upperdir) { 311 seq_show_option(m, "upperdir", ofs->config.upperdir); 312 seq_show_option(m, "workdir", ofs->config.workdir); 313 } 314 if (ofs->config.default_permissions) 315 seq_puts(m, ",default_permissions"); 316 if (ofs->config.redirect_dir != ovl_redirect_dir_def) 317 seq_printf(m, ",redirect_dir=%s", 318 ofs->config.redirect_dir ? "on" : "off"); 319 if (ofs->config.index != ovl_index_def) 320 seq_printf(m, ",index=%s", 321 ofs->config.index ? "on" : "off"); 322 return 0; 323 } 324 325 static int ovl_remount(struct super_block *sb, int *flags, char *data) 326 { 327 struct ovl_fs *ofs = sb->s_fs_info; 328 329 if (!(*flags & MS_RDONLY) && ovl_force_readonly(ofs)) 330 return -EROFS; 331 332 return 0; 333 } 334 335 static const struct super_operations ovl_super_operations = { 336 .alloc_inode = ovl_alloc_inode, 337 .destroy_inode = ovl_destroy_inode, 338 .drop_inode = generic_delete_inode, 339 .put_super = ovl_put_super, 340 .sync_fs = ovl_sync_fs, 341 .statfs = ovl_statfs, 342 .show_options = ovl_show_options, 343 .remount_fs = ovl_remount, 344 }; 345 346 enum { 347 OPT_LOWERDIR, 348 OPT_UPPERDIR, 349 OPT_WORKDIR, 350 OPT_DEFAULT_PERMISSIONS, 351 OPT_REDIRECT_DIR_ON, 352 OPT_REDIRECT_DIR_OFF, 353 OPT_INDEX_ON, 354 OPT_INDEX_OFF, 355 OPT_ERR, 356 }; 357 358 static const match_table_t ovl_tokens = { 359 {OPT_LOWERDIR, "lowerdir=%s"}, 360 {OPT_UPPERDIR, "upperdir=%s"}, 361 {OPT_WORKDIR, "workdir=%s"}, 362 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 363 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 364 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"}, 365 {OPT_INDEX_ON, "index=on"}, 366 {OPT_INDEX_OFF, "index=off"}, 367 {OPT_ERR, NULL} 368 }; 369 370 static char *ovl_next_opt(char **s) 371 { 372 char *sbegin = *s; 373 char *p; 374 375 if (sbegin == NULL) 376 return NULL; 377 378 for (p = sbegin; *p; p++) { 379 if (*p == '\\') { 380 p++; 381 if (!*p) 382 break; 383 } else if (*p == ',') { 384 *p = '\0'; 385 *s = p + 1; 386 return sbegin; 387 } 388 } 389 *s = NULL; 390 return sbegin; 391 } 392 393 static int ovl_parse_opt(char *opt, struct ovl_config *config) 394 { 395 char *p; 396 397 while ((p = ovl_next_opt(&opt)) != NULL) { 398 int token; 399 substring_t args[MAX_OPT_ARGS]; 400 401 if (!*p) 402 continue; 403 404 token = match_token(p, ovl_tokens, args); 405 switch (token) { 406 case OPT_UPPERDIR: 407 kfree(config->upperdir); 408 config->upperdir = match_strdup(&args[0]); 409 if (!config->upperdir) 410 return -ENOMEM; 411 break; 412 413 case OPT_LOWERDIR: 414 kfree(config->lowerdir); 415 config->lowerdir = match_strdup(&args[0]); 416 if (!config->lowerdir) 417 return -ENOMEM; 418 break; 419 420 case OPT_WORKDIR: 421 kfree(config->workdir); 422 config->workdir = match_strdup(&args[0]); 423 if (!config->workdir) 424 return -ENOMEM; 425 break; 426 427 case OPT_DEFAULT_PERMISSIONS: 428 config->default_permissions = true; 429 break; 430 431 case OPT_REDIRECT_DIR_ON: 432 config->redirect_dir = true; 433 break; 434 435 case OPT_REDIRECT_DIR_OFF: 436 config->redirect_dir = false; 437 break; 438 439 case OPT_INDEX_ON: 440 config->index = true; 441 break; 442 443 case OPT_INDEX_OFF: 444 config->index = false; 445 break; 446 447 default: 448 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 449 return -EINVAL; 450 } 451 } 452 453 /* Workdir is useless in non-upper mount */ 454 if (!config->upperdir && config->workdir) { 455 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 456 config->workdir); 457 kfree(config->workdir); 458 config->workdir = NULL; 459 } 460 461 return 0; 462 } 463 464 #define OVL_WORKDIR_NAME "work" 465 #define OVL_INDEXDIR_NAME "index" 466 467 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 468 const char *name, bool persist) 469 { 470 struct inode *dir = ofs->workbasedir->d_inode; 471 struct vfsmount *mnt = ofs->upper_mnt; 472 struct dentry *work; 473 int err; 474 bool retried = false; 475 bool locked = false; 476 477 err = mnt_want_write(mnt); 478 if (err) 479 goto out_err; 480 481 inode_lock_nested(dir, I_MUTEX_PARENT); 482 locked = true; 483 484 retry: 485 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 486 487 if (!IS_ERR(work)) { 488 struct iattr attr = { 489 .ia_valid = ATTR_MODE, 490 .ia_mode = S_IFDIR | 0, 491 }; 492 493 if (work->d_inode) { 494 err = -EEXIST; 495 if (retried) 496 goto out_dput; 497 498 if (persist) 499 goto out_unlock; 500 501 retried = true; 502 ovl_workdir_cleanup(dir, mnt, work, 0); 503 dput(work); 504 goto retry; 505 } 506 507 err = ovl_create_real(dir, work, 508 &(struct cattr){.mode = S_IFDIR | 0}, 509 NULL, true); 510 if (err) 511 goto out_dput; 512 513 /* 514 * Try to remove POSIX ACL xattrs from workdir. We are good if: 515 * 516 * a) success (there was a POSIX ACL xattr and was removed) 517 * b) -ENODATA (there was no POSIX ACL xattr) 518 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 519 * 520 * There are various other error values that could effectively 521 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 522 * if the xattr name is too long), but the set of filesystems 523 * allowed as upper are limited to "normal" ones, where checking 524 * for the above two errors is sufficient. 525 */ 526 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 527 if (err && err != -ENODATA && err != -EOPNOTSUPP) 528 goto out_dput; 529 530 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 531 if (err && err != -ENODATA && err != -EOPNOTSUPP) 532 goto out_dput; 533 534 /* Clear any inherited mode bits */ 535 inode_lock(work->d_inode); 536 err = notify_change(work, &attr, NULL); 537 inode_unlock(work->d_inode); 538 if (err) 539 goto out_dput; 540 } else { 541 err = PTR_ERR(work); 542 goto out_err; 543 } 544 out_unlock: 545 mnt_drop_write(mnt); 546 if (locked) 547 inode_unlock(dir); 548 549 return work; 550 551 out_dput: 552 dput(work); 553 out_err: 554 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 555 ofs->config.workdir, name, -err); 556 work = NULL; 557 goto out_unlock; 558 } 559 560 static void ovl_unescape(char *s) 561 { 562 char *d = s; 563 564 for (;; s++, d++) { 565 if (*s == '\\') 566 s++; 567 *d = *s; 568 if (!*s) 569 break; 570 } 571 } 572 573 static int ovl_mount_dir_noesc(const char *name, struct path *path) 574 { 575 int err = -EINVAL; 576 577 if (!*name) { 578 pr_err("overlayfs: empty lowerdir\n"); 579 goto out; 580 } 581 err = kern_path(name, LOOKUP_FOLLOW, path); 582 if (err) { 583 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 584 goto out; 585 } 586 err = -EINVAL; 587 if (ovl_dentry_weird(path->dentry)) { 588 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 589 goto out_put; 590 } 591 if (!d_is_dir(path->dentry)) { 592 pr_err("overlayfs: '%s' not a directory\n", name); 593 goto out_put; 594 } 595 return 0; 596 597 out_put: 598 path_put_init(path); 599 out: 600 return err; 601 } 602 603 static int ovl_mount_dir(const char *name, struct path *path) 604 { 605 int err = -ENOMEM; 606 char *tmp = kstrdup(name, GFP_KERNEL); 607 608 if (tmp) { 609 ovl_unescape(tmp); 610 err = ovl_mount_dir_noesc(tmp, path); 611 612 if (!err) 613 if (ovl_dentry_remote(path->dentry)) { 614 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 615 tmp); 616 path_put_init(path); 617 err = -EINVAL; 618 } 619 kfree(tmp); 620 } 621 return err; 622 } 623 624 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 625 const char *name) 626 { 627 struct kstatfs statfs; 628 int err = vfs_statfs(path, &statfs); 629 630 if (err) 631 pr_err("overlayfs: statfs failed on '%s'\n", name); 632 else 633 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 634 635 return err; 636 } 637 638 static int ovl_lower_dir(const char *name, struct path *path, 639 struct ovl_fs *ofs, int *stack_depth, bool *remote) 640 { 641 int err; 642 643 err = ovl_mount_dir_noesc(name, path); 644 if (err) 645 goto out; 646 647 err = ovl_check_namelen(path, ofs, name); 648 if (err) 649 goto out_put; 650 651 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 652 653 if (ovl_dentry_remote(path->dentry)) 654 *remote = true; 655 656 /* 657 * The inodes index feature needs to encode and decode file 658 * handles, so it requires that all layers support them. 659 */ 660 if (ofs->config.index && !ovl_can_decode_fh(path->dentry->d_sb)) { 661 ofs->config.index = false; 662 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off.\n", name); 663 } 664 665 return 0; 666 667 out_put: 668 path_put_init(path); 669 out: 670 return err; 671 } 672 673 /* Workdir should not be subdir of upperdir and vice versa */ 674 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 675 { 676 bool ok = false; 677 678 if (workdir != upperdir) { 679 ok = (lock_rename(workdir, upperdir) == NULL); 680 unlock_rename(workdir, upperdir); 681 } 682 return ok; 683 } 684 685 static unsigned int ovl_split_lowerdirs(char *str) 686 { 687 unsigned int ctr = 1; 688 char *s, *d; 689 690 for (s = d = str;; s++, d++) { 691 if (*s == '\\') { 692 s++; 693 } else if (*s == ':') { 694 *d = '\0'; 695 ctr++; 696 continue; 697 } 698 *d = *s; 699 if (!*s) 700 break; 701 } 702 return ctr; 703 } 704 705 static int __maybe_unused 706 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 707 struct dentry *dentry, struct inode *inode, 708 const char *name, void *buffer, size_t size) 709 { 710 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 711 } 712 713 static int __maybe_unused 714 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 715 struct dentry *dentry, struct inode *inode, 716 const char *name, const void *value, 717 size_t size, int flags) 718 { 719 struct dentry *workdir = ovl_workdir(dentry); 720 struct inode *realinode = ovl_inode_real(inode); 721 struct posix_acl *acl = NULL; 722 int err; 723 724 /* Check that everything is OK before copy-up */ 725 if (value) { 726 acl = posix_acl_from_xattr(&init_user_ns, value, size); 727 if (IS_ERR(acl)) 728 return PTR_ERR(acl); 729 } 730 err = -EOPNOTSUPP; 731 if (!IS_POSIXACL(d_inode(workdir))) 732 goto out_acl_release; 733 if (!realinode->i_op->set_acl) 734 goto out_acl_release; 735 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 736 err = acl ? -EACCES : 0; 737 goto out_acl_release; 738 } 739 err = -EPERM; 740 if (!inode_owner_or_capable(inode)) 741 goto out_acl_release; 742 743 posix_acl_release(acl); 744 745 /* 746 * Check if sgid bit needs to be cleared (actual setacl operation will 747 * be done with mounter's capabilities and so that won't do it for us). 748 */ 749 if (unlikely(inode->i_mode & S_ISGID) && 750 handler->flags == ACL_TYPE_ACCESS && 751 !in_group_p(inode->i_gid) && 752 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 753 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 754 755 err = ovl_setattr(dentry, &iattr); 756 if (err) 757 return err; 758 } 759 760 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 761 if (!err) 762 ovl_copyattr(ovl_inode_real(inode), inode); 763 764 return err; 765 766 out_acl_release: 767 posix_acl_release(acl); 768 return err; 769 } 770 771 static int ovl_own_xattr_get(const struct xattr_handler *handler, 772 struct dentry *dentry, struct inode *inode, 773 const char *name, void *buffer, size_t size) 774 { 775 return -EOPNOTSUPP; 776 } 777 778 static int ovl_own_xattr_set(const struct xattr_handler *handler, 779 struct dentry *dentry, struct inode *inode, 780 const char *name, const void *value, 781 size_t size, int flags) 782 { 783 return -EOPNOTSUPP; 784 } 785 786 static int ovl_other_xattr_get(const struct xattr_handler *handler, 787 struct dentry *dentry, struct inode *inode, 788 const char *name, void *buffer, size_t size) 789 { 790 return ovl_xattr_get(dentry, inode, name, buffer, size); 791 } 792 793 static int ovl_other_xattr_set(const struct xattr_handler *handler, 794 struct dentry *dentry, struct inode *inode, 795 const char *name, const void *value, 796 size_t size, int flags) 797 { 798 return ovl_xattr_set(dentry, inode, name, value, size, flags); 799 } 800 801 static const struct xattr_handler __maybe_unused 802 ovl_posix_acl_access_xattr_handler = { 803 .name = XATTR_NAME_POSIX_ACL_ACCESS, 804 .flags = ACL_TYPE_ACCESS, 805 .get = ovl_posix_acl_xattr_get, 806 .set = ovl_posix_acl_xattr_set, 807 }; 808 809 static const struct xattr_handler __maybe_unused 810 ovl_posix_acl_default_xattr_handler = { 811 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 812 .flags = ACL_TYPE_DEFAULT, 813 .get = ovl_posix_acl_xattr_get, 814 .set = ovl_posix_acl_xattr_set, 815 }; 816 817 static const struct xattr_handler ovl_own_xattr_handler = { 818 .prefix = OVL_XATTR_PREFIX, 819 .get = ovl_own_xattr_get, 820 .set = ovl_own_xattr_set, 821 }; 822 823 static const struct xattr_handler ovl_other_xattr_handler = { 824 .prefix = "", /* catch all */ 825 .get = ovl_other_xattr_get, 826 .set = ovl_other_xattr_set, 827 }; 828 829 static const struct xattr_handler *ovl_xattr_handlers[] = { 830 #ifdef CONFIG_FS_POSIX_ACL 831 &ovl_posix_acl_access_xattr_handler, 832 &ovl_posix_acl_default_xattr_handler, 833 #endif 834 &ovl_own_xattr_handler, 835 &ovl_other_xattr_handler, 836 NULL 837 }; 838 839 static int ovl_get_upper(struct ovl_fs *ofs, struct path *upperpath) 840 { 841 struct vfsmount *upper_mnt; 842 int err; 843 844 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 845 if (err) 846 goto out; 847 848 /* Upper fs should not be r/o */ 849 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 850 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 851 err = -EINVAL; 852 goto out; 853 } 854 855 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 856 if (err) 857 goto out; 858 859 err = -EBUSY; 860 if (ovl_inuse_trylock(upperpath->dentry)) { 861 ofs->upperdir_locked = true; 862 } else if (ofs->config.index) { 863 pr_err("overlayfs: upperdir is in-use by another mount, mount with '-o index=off' to override exclusive upperdir protection.\n"); 864 goto out; 865 } else { 866 pr_warn("overlayfs: upperdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 867 } 868 869 upper_mnt = clone_private_mount(upperpath); 870 err = PTR_ERR(upper_mnt); 871 if (IS_ERR(upper_mnt)) { 872 pr_err("overlayfs: failed to clone upperpath\n"); 873 goto out; 874 } 875 876 /* Don't inherit atime flags */ 877 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 878 ofs->upper_mnt = upper_mnt; 879 err = 0; 880 out: 881 return err; 882 } 883 884 static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath) 885 { 886 struct dentry *temp; 887 int err; 888 889 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 890 if (!ofs->workdir) 891 return 0; 892 893 /* 894 * Upper should support d_type, else whiteouts are visible. Given 895 * workdir and upper are on same fs, we can do iterate_dir() on 896 * workdir. This check requires successful creation of workdir in 897 * previous step. 898 */ 899 err = ovl_check_d_type_supported(workpath); 900 if (err < 0) 901 return err; 902 903 /* 904 * We allowed this configuration and don't want to break users over 905 * kernel upgrade. So warn instead of erroring out. 906 */ 907 if (!err) 908 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 909 910 /* Check if upper/work fs supports O_TMPFILE */ 911 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 912 ofs->tmpfile = !IS_ERR(temp); 913 if (ofs->tmpfile) 914 dput(temp); 915 else 916 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 917 918 /* 919 * Check if upper/work fs supports trusted.overlay.* xattr 920 */ 921 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 922 if (err) { 923 ofs->noxattr = true; 924 pr_warn("overlayfs: upper fs does not support xattr.\n"); 925 } else { 926 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 927 } 928 929 /* Check if upper/work fs supports file handles */ 930 if (ofs->config.index && 931 !ovl_can_decode_fh(ofs->workdir->d_sb)) { 932 ofs->config.index = false; 933 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 934 } 935 936 return 0; 937 } 938 939 static int ovl_get_workdir(struct ovl_fs *ofs, struct path *upperpath) 940 { 941 int err; 942 struct path workpath = { }; 943 944 err = ovl_mount_dir(ofs->config.workdir, &workpath); 945 if (err) 946 goto out; 947 948 err = -EINVAL; 949 if (upperpath->mnt != workpath.mnt) { 950 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 951 goto out; 952 } 953 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 954 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 955 goto out; 956 } 957 958 err = -EBUSY; 959 if (ovl_inuse_trylock(workpath.dentry)) { 960 ofs->workdir_locked = true; 961 } else if (ofs->config.index) { 962 pr_err("overlayfs: workdir is in-use by another mount, mount with '-o index=off' to override exclusive workdir protection.\n"); 963 goto out; 964 } else { 965 pr_warn("overlayfs: workdir is in-use by another mount, accessing files from both mounts will result in undefined behavior.\n"); 966 } 967 968 ofs->workbasedir = dget(workpath.dentry); 969 err = ovl_make_workdir(ofs, &workpath); 970 if (err) 971 goto out; 972 973 err = 0; 974 out: 975 path_put(&workpath); 976 977 return err; 978 } 979 980 static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe, 981 struct path *upperpath) 982 { 983 int err; 984 985 /* Verify lower root is upper root origin */ 986 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 987 false, true); 988 if (err) { 989 pr_err("overlayfs: failed to verify upper root origin\n"); 990 goto out; 991 } 992 993 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 994 if (ofs->indexdir) { 995 /* Verify upper root is index dir origin */ 996 err = ovl_verify_origin(ofs->indexdir, upperpath->dentry, 997 true, true); 998 if (err) 999 pr_err("overlayfs: failed to verify index dir origin\n"); 1000 1001 /* Cleanup bad/stale/orphan index entries */ 1002 if (!err) 1003 err = ovl_indexdir_cleanup(ofs->indexdir, 1004 ofs->upper_mnt, 1005 oe->lowerstack, 1006 oe->numlower); 1007 } 1008 if (err || !ofs->indexdir) 1009 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1010 1011 out: 1012 return err; 1013 } 1014 1015 static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack, 1016 unsigned int numlower) 1017 { 1018 int err; 1019 unsigned int i; 1020 1021 err = -ENOMEM; 1022 ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), 1023 GFP_KERNEL); 1024 if (ofs->lower_layers == NULL) 1025 goto out; 1026 for (i = 0; i < numlower; i++) { 1027 struct vfsmount *mnt; 1028 dev_t dev; 1029 1030 err = get_anon_bdev(&dev); 1031 if (err) { 1032 pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); 1033 goto out; 1034 } 1035 1036 mnt = clone_private_mount(&stack[i]); 1037 err = PTR_ERR(mnt); 1038 if (IS_ERR(mnt)) { 1039 pr_err("overlayfs: failed to clone lowerpath\n"); 1040 free_anon_bdev(dev); 1041 goto out; 1042 } 1043 /* 1044 * Make lower layers R/O. That way fchmod/fchown on lower file 1045 * will fail instead of modifying lower fs. 1046 */ 1047 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1048 1049 ofs->lower_layers[ofs->numlower].mnt = mnt; 1050 ofs->lower_layers[ofs->numlower].pseudo_dev = dev; 1051 ofs->numlower++; 1052 1053 /* Check if all lower layers are on same sb */ 1054 if (i == 0) 1055 ofs->same_sb = mnt->mnt_sb; 1056 else if (ofs->same_sb != mnt->mnt_sb) 1057 ofs->same_sb = NULL; 1058 } 1059 err = 0; 1060 out: 1061 return err; 1062 } 1063 1064 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1065 struct ovl_fs *ofs) 1066 { 1067 int err; 1068 char *lowertmp, *lower; 1069 struct path *stack = NULL; 1070 unsigned int stacklen, numlower = 0, i; 1071 bool remote = false; 1072 struct ovl_entry *oe; 1073 1074 err = -ENOMEM; 1075 lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1076 if (!lowertmp) 1077 goto out_err; 1078 1079 err = -EINVAL; 1080 stacklen = ovl_split_lowerdirs(lowertmp); 1081 if (stacklen > OVL_MAX_STACK) { 1082 pr_err("overlayfs: too many lower directories, limit is %d\n", 1083 OVL_MAX_STACK); 1084 goto out_err; 1085 } else if (!ofs->config.upperdir && stacklen == 1) { 1086 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1087 goto out_err; 1088 } 1089 1090 err = -ENOMEM; 1091 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1092 if (!stack) 1093 goto out_err; 1094 1095 err = -EINVAL; 1096 lower = lowertmp; 1097 for (numlower = 0; numlower < stacklen; numlower++) { 1098 err = ovl_lower_dir(lower, &stack[numlower], ofs, 1099 &sb->s_stack_depth, &remote); 1100 if (err) 1101 goto out_err; 1102 1103 lower = strchr(lower, '\0') + 1; 1104 } 1105 1106 err = -EINVAL; 1107 sb->s_stack_depth++; 1108 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1109 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 1110 goto out_err; 1111 } 1112 1113 err = ovl_get_lower_layers(ofs, stack, numlower); 1114 if (err) 1115 goto out_err; 1116 1117 err = -ENOMEM; 1118 oe = ovl_alloc_entry(numlower); 1119 if (!oe) 1120 goto out_err; 1121 1122 for (i = 0; i < numlower; i++) { 1123 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1124 oe->lowerstack[i].layer = &ofs->lower_layers[i]; 1125 } 1126 1127 if (remote) 1128 sb->s_d_op = &ovl_reval_dentry_operations; 1129 else 1130 sb->s_d_op = &ovl_dentry_operations; 1131 1132 out: 1133 for (i = 0; i < numlower; i++) 1134 path_put(&stack[i]); 1135 kfree(stack); 1136 kfree(lowertmp); 1137 1138 return oe; 1139 1140 out_err: 1141 oe = ERR_PTR(err); 1142 goto out; 1143 } 1144 1145 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1146 { 1147 struct path upperpath = { }; 1148 struct dentry *root_dentry; 1149 struct ovl_entry *oe; 1150 struct ovl_fs *ofs; 1151 struct cred *cred; 1152 int err; 1153 1154 err = -ENOMEM; 1155 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1156 if (!ofs) 1157 goto out; 1158 1159 ofs->creator_cred = cred = prepare_creds(); 1160 if (!cred) 1161 goto out_err; 1162 1163 ofs->config.redirect_dir = ovl_redirect_dir_def; 1164 ofs->config.index = ovl_index_def; 1165 err = ovl_parse_opt((char *) data, &ofs->config); 1166 if (err) 1167 goto out_err; 1168 1169 err = -EINVAL; 1170 if (!ofs->config.lowerdir) { 1171 if (!silent) 1172 pr_err("overlayfs: missing 'lowerdir'\n"); 1173 goto out_err; 1174 } 1175 1176 sb->s_stack_depth = 0; 1177 sb->s_maxbytes = MAX_LFS_FILESIZE; 1178 if (ofs->config.upperdir) { 1179 if (!ofs->config.workdir) { 1180 pr_err("overlayfs: missing 'workdir'\n"); 1181 goto out_err; 1182 } 1183 1184 err = ovl_get_upper(ofs, &upperpath); 1185 if (err) 1186 goto out_err; 1187 1188 err = ovl_get_workdir(ofs, &upperpath); 1189 if (err) 1190 goto out_err; 1191 1192 if (!ofs->workdir) 1193 sb->s_flags |= MS_RDONLY; 1194 1195 sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; 1196 sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; 1197 1198 } 1199 oe = ovl_get_lowerstack(sb, ofs); 1200 err = PTR_ERR(oe); 1201 if (IS_ERR(oe)) 1202 goto out_err; 1203 1204 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1205 if (!ofs->upper_mnt) 1206 sb->s_flags |= MS_RDONLY; 1207 else if (ofs->upper_mnt->mnt_sb != ofs->same_sb) 1208 ofs->same_sb = NULL; 1209 1210 if (!(ovl_force_readonly(ofs)) && ofs->config.index) { 1211 err = ovl_get_indexdir(ofs, oe, &upperpath); 1212 if (err) 1213 goto out_free_oe; 1214 1215 if (!ofs->indexdir) 1216 sb->s_flags |= MS_RDONLY; 1217 } 1218 1219 /* Show index=off/on in /proc/mounts for any of the reasons above */ 1220 if (!ofs->indexdir) 1221 ofs->config.index = false; 1222 1223 /* Never override disk quota limits or use reserved space */ 1224 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1225 1226 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1227 sb->s_op = &ovl_super_operations; 1228 sb->s_xattr = ovl_xattr_handlers; 1229 sb->s_fs_info = ofs; 1230 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1231 1232 err = -ENOMEM; 1233 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1234 if (!root_dentry) 1235 goto out_free_oe; 1236 1237 mntput(upperpath.mnt); 1238 if (upperpath.dentry) { 1239 oe->has_upper = true; 1240 if (ovl_is_impuredir(upperpath.dentry)) 1241 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1242 } 1243 1244 root_dentry->d_fsdata = oe; 1245 1246 /* Root is always merge -> can have whiteouts */ 1247 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1248 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1249 ovl_dentry_lower(root_dentry)); 1250 1251 sb->s_root = root_dentry; 1252 1253 return 0; 1254 1255 out_free_oe: 1256 ovl_entry_stack_free(oe); 1257 kfree(oe); 1258 out_err: 1259 path_put(&upperpath); 1260 ovl_free_fs(ofs); 1261 out: 1262 return err; 1263 } 1264 1265 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1266 const char *dev_name, void *raw_data) 1267 { 1268 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1269 } 1270 1271 static struct file_system_type ovl_fs_type = { 1272 .owner = THIS_MODULE, 1273 .name = "overlay", 1274 .mount = ovl_mount, 1275 .kill_sb = kill_anon_super, 1276 }; 1277 MODULE_ALIAS_FS("overlay"); 1278 1279 static void ovl_inode_init_once(void *foo) 1280 { 1281 struct ovl_inode *oi = foo; 1282 1283 inode_init_once(&oi->vfs_inode); 1284 } 1285 1286 static int __init ovl_init(void) 1287 { 1288 int err; 1289 1290 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1291 sizeof(struct ovl_inode), 0, 1292 (SLAB_RECLAIM_ACCOUNT| 1293 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1294 ovl_inode_init_once); 1295 if (ovl_inode_cachep == NULL) 1296 return -ENOMEM; 1297 1298 err = register_filesystem(&ovl_fs_type); 1299 if (err) 1300 kmem_cache_destroy(ovl_inode_cachep); 1301 1302 return err; 1303 } 1304 1305 static void __exit ovl_exit(void) 1306 { 1307 unregister_filesystem(&ovl_fs_type); 1308 1309 /* 1310 * Make sure all delayed rcu free inodes are flushed before we 1311 * destroy cache. 1312 */ 1313 rcu_barrier(); 1314 kmem_cache_destroy(ovl_inode_cachep); 1315 1316 } 1317 1318 module_init(ovl_init); 1319 module_exit(ovl_exit); 1320