1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include "overlayfs.h" 19 20 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 21 MODULE_DESCRIPTION("Overlay filesystem"); 22 MODULE_LICENSE("GPL"); 23 24 25 struct ovl_dir_cache; 26 27 #define OVL_MAX_STACK 500 28 29 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 30 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 31 MODULE_PARM_DESC(redirect_dir, 32 "Default to on or off for the redirect_dir feature"); 33 34 static bool ovl_redirect_always_follow = 35 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 36 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 37 bool, 0644); 38 MODULE_PARM_DESC(redirect_always_follow, 39 "Follow redirects even if redirect_dir feature is turned off"); 40 41 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 42 module_param_named(index, ovl_index_def, bool, 0644); 43 MODULE_PARM_DESC(index, 44 "Default to on or off for the inodes index feature"); 45 46 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 47 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 48 MODULE_PARM_DESC(nfs_export, 49 "Default to on or off for the NFS export feature"); 50 51 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 52 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 53 MODULE_PARM_DESC(xino_auto, 54 "Auto enable xino feature"); 55 56 static void ovl_entry_stack_free(struct ovl_entry *oe) 57 { 58 unsigned int i; 59 60 for (i = 0; i < oe->numlower; i++) 61 dput(oe->lowerstack[i].dentry); 62 } 63 64 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 65 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 66 MODULE_PARM_DESC(metacopy, 67 "Default to on or off for the metadata only copy up feature"); 68 69 static void ovl_dentry_release(struct dentry *dentry) 70 { 71 struct ovl_entry *oe = dentry->d_fsdata; 72 73 if (oe) { 74 ovl_entry_stack_free(oe); 75 kfree_rcu(oe, rcu); 76 } 77 } 78 79 static struct dentry *ovl_d_real(struct dentry *dentry, 80 const struct inode *inode) 81 { 82 struct dentry *real; 83 84 /* It's an overlay file */ 85 if (inode && d_inode(dentry) == inode) 86 return dentry; 87 88 if (!d_is_reg(dentry)) { 89 if (!inode || inode == d_inode(dentry)) 90 return dentry; 91 goto bug; 92 } 93 94 real = ovl_dentry_upper(dentry); 95 if (real && (inode == d_inode(real))) 96 return real; 97 98 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 99 return real; 100 101 real = ovl_dentry_lowerdata(dentry); 102 if (!real) 103 goto bug; 104 105 /* Handle recursion */ 106 real = d_real(real, inode); 107 108 if (!inode || inode == d_inode(real)) 109 return real; 110 bug: 111 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 112 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 113 return dentry; 114 } 115 116 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 117 { 118 struct ovl_entry *oe = dentry->d_fsdata; 119 unsigned int i; 120 int ret = 1; 121 122 for (i = 0; i < oe->numlower; i++) { 123 struct dentry *d = oe->lowerstack[i].dentry; 124 125 if (d->d_flags & DCACHE_OP_REVALIDATE) { 126 ret = d->d_op->d_revalidate(d, flags); 127 if (ret < 0) 128 return ret; 129 if (!ret) { 130 if (!(flags & LOOKUP_RCU)) 131 d_invalidate(d); 132 return -ESTALE; 133 } 134 } 135 } 136 return 1; 137 } 138 139 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 140 { 141 struct ovl_entry *oe = dentry->d_fsdata; 142 unsigned int i; 143 int ret = 1; 144 145 for (i = 0; i < oe->numlower; i++) { 146 struct dentry *d = oe->lowerstack[i].dentry; 147 148 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 149 ret = d->d_op->d_weak_revalidate(d, flags); 150 if (ret <= 0) 151 break; 152 } 153 } 154 return ret; 155 } 156 157 static const struct dentry_operations ovl_dentry_operations = { 158 .d_release = ovl_dentry_release, 159 .d_real = ovl_d_real, 160 }; 161 162 static const struct dentry_operations ovl_reval_dentry_operations = { 163 .d_release = ovl_dentry_release, 164 .d_real = ovl_d_real, 165 .d_revalidate = ovl_dentry_revalidate, 166 .d_weak_revalidate = ovl_dentry_weak_revalidate, 167 }; 168 169 static struct kmem_cache *ovl_inode_cachep; 170 171 static struct inode *ovl_alloc_inode(struct super_block *sb) 172 { 173 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 174 175 if (!oi) 176 return NULL; 177 178 oi->cache = NULL; 179 oi->redirect = NULL; 180 oi->version = 0; 181 oi->flags = 0; 182 oi->__upperdentry = NULL; 183 oi->lower = NULL; 184 oi->lowerdata = NULL; 185 mutex_init(&oi->lock); 186 187 return &oi->vfs_inode; 188 } 189 190 static void ovl_free_inode(struct inode *inode) 191 { 192 struct ovl_inode *oi = OVL_I(inode); 193 194 kfree(oi->redirect); 195 mutex_destroy(&oi->lock); 196 kmem_cache_free(ovl_inode_cachep, oi); 197 } 198 199 static void ovl_destroy_inode(struct inode *inode) 200 { 201 struct ovl_inode *oi = OVL_I(inode); 202 203 dput(oi->__upperdentry); 204 iput(oi->lower); 205 if (S_ISDIR(inode->i_mode)) 206 ovl_dir_cache_free(inode); 207 else 208 iput(oi->lowerdata); 209 } 210 211 static void ovl_free_fs(struct ovl_fs *ofs) 212 { 213 unsigned i; 214 215 iput(ofs->workbasedir_trap); 216 iput(ofs->indexdir_trap); 217 iput(ofs->workdir_trap); 218 iput(ofs->upperdir_trap); 219 dput(ofs->indexdir); 220 dput(ofs->workdir); 221 if (ofs->workdir_locked) 222 ovl_inuse_unlock(ofs->workbasedir); 223 dput(ofs->workbasedir); 224 if (ofs->upperdir_locked) 225 ovl_inuse_unlock(ofs->upper_mnt->mnt_root); 226 mntput(ofs->upper_mnt); 227 for (i = 0; i < ofs->numlower; i++) { 228 iput(ofs->lower_layers[i].trap); 229 mntput(ofs->lower_layers[i].mnt); 230 } 231 for (i = 0; i < ofs->numlowerfs; i++) 232 free_anon_bdev(ofs->lower_fs[i].pseudo_dev); 233 kfree(ofs->lower_layers); 234 kfree(ofs->lower_fs); 235 236 kfree(ofs->config.lowerdir); 237 kfree(ofs->config.upperdir); 238 kfree(ofs->config.workdir); 239 kfree(ofs->config.redirect_mode); 240 if (ofs->creator_cred) 241 put_cred(ofs->creator_cred); 242 kfree(ofs); 243 } 244 245 static void ovl_put_super(struct super_block *sb) 246 { 247 struct ovl_fs *ofs = sb->s_fs_info; 248 249 ovl_free_fs(ofs); 250 } 251 252 /* Sync real dirty inodes in upper filesystem (if it exists) */ 253 static int ovl_sync_fs(struct super_block *sb, int wait) 254 { 255 struct ovl_fs *ofs = sb->s_fs_info; 256 struct super_block *upper_sb; 257 int ret; 258 259 if (!ofs->upper_mnt) 260 return 0; 261 262 /* 263 * If this is a sync(2) call or an emergency sync, all the super blocks 264 * will be iterated, including upper_sb, so no need to do anything. 265 * 266 * If this is a syncfs(2) call, then we do need to call 267 * sync_filesystem() on upper_sb, but enough if we do it when being 268 * called with wait == 1. 269 */ 270 if (!wait) 271 return 0; 272 273 upper_sb = ofs->upper_mnt->mnt_sb; 274 275 down_read(&upper_sb->s_umount); 276 ret = sync_filesystem(upper_sb); 277 up_read(&upper_sb->s_umount); 278 279 return ret; 280 } 281 282 /** 283 * ovl_statfs 284 * @sb: The overlayfs super block 285 * @buf: The struct kstatfs to fill in with stats 286 * 287 * Get the filesystem statistics. As writes always target the upper layer 288 * filesystem pass the statfs to the upper filesystem (if it exists) 289 */ 290 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 291 { 292 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 293 struct dentry *root_dentry = dentry->d_sb->s_root; 294 struct path path; 295 int err; 296 297 ovl_path_real(root_dentry, &path); 298 299 err = vfs_statfs(&path, buf); 300 if (!err) { 301 buf->f_namelen = ofs->namelen; 302 buf->f_type = OVERLAYFS_SUPER_MAGIC; 303 } 304 305 return err; 306 } 307 308 /* Will this overlay be forced to mount/remount ro? */ 309 static bool ovl_force_readonly(struct ovl_fs *ofs) 310 { 311 return (!ofs->upper_mnt || !ofs->workdir); 312 } 313 314 static const char *ovl_redirect_mode_def(void) 315 { 316 return ovl_redirect_dir_def ? "on" : "off"; 317 } 318 319 enum { 320 OVL_XINO_OFF, 321 OVL_XINO_AUTO, 322 OVL_XINO_ON, 323 }; 324 325 static const char * const ovl_xino_str[] = { 326 "off", 327 "auto", 328 "on", 329 }; 330 331 static inline int ovl_xino_def(void) 332 { 333 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 334 } 335 336 /** 337 * ovl_show_options 338 * 339 * Prints the mount options for a given superblock. 340 * Returns zero; does not fail. 341 */ 342 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 343 { 344 struct super_block *sb = dentry->d_sb; 345 struct ovl_fs *ofs = sb->s_fs_info; 346 347 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 348 if (ofs->config.upperdir) { 349 seq_show_option(m, "upperdir", ofs->config.upperdir); 350 seq_show_option(m, "workdir", ofs->config.workdir); 351 } 352 if (ofs->config.default_permissions) 353 seq_puts(m, ",default_permissions"); 354 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 355 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 356 if (ofs->config.index != ovl_index_def) 357 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 358 if (ofs->config.nfs_export != ovl_nfs_export_def) 359 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 360 "on" : "off"); 361 if (ofs->config.xino != ovl_xino_def()) 362 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 363 if (ofs->config.metacopy != ovl_metacopy_def) 364 seq_printf(m, ",metacopy=%s", 365 ofs->config.metacopy ? "on" : "off"); 366 return 0; 367 } 368 369 static int ovl_remount(struct super_block *sb, int *flags, char *data) 370 { 371 struct ovl_fs *ofs = sb->s_fs_info; 372 373 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 374 return -EROFS; 375 376 return 0; 377 } 378 379 static const struct super_operations ovl_super_operations = { 380 .alloc_inode = ovl_alloc_inode, 381 .free_inode = ovl_free_inode, 382 .destroy_inode = ovl_destroy_inode, 383 .drop_inode = generic_delete_inode, 384 .put_super = ovl_put_super, 385 .sync_fs = ovl_sync_fs, 386 .statfs = ovl_statfs, 387 .show_options = ovl_show_options, 388 .remount_fs = ovl_remount, 389 }; 390 391 enum { 392 OPT_LOWERDIR, 393 OPT_UPPERDIR, 394 OPT_WORKDIR, 395 OPT_DEFAULT_PERMISSIONS, 396 OPT_REDIRECT_DIR, 397 OPT_INDEX_ON, 398 OPT_INDEX_OFF, 399 OPT_NFS_EXPORT_ON, 400 OPT_NFS_EXPORT_OFF, 401 OPT_XINO_ON, 402 OPT_XINO_OFF, 403 OPT_XINO_AUTO, 404 OPT_METACOPY_ON, 405 OPT_METACOPY_OFF, 406 OPT_ERR, 407 }; 408 409 static const match_table_t ovl_tokens = { 410 {OPT_LOWERDIR, "lowerdir=%s"}, 411 {OPT_UPPERDIR, "upperdir=%s"}, 412 {OPT_WORKDIR, "workdir=%s"}, 413 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 414 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 415 {OPT_INDEX_ON, "index=on"}, 416 {OPT_INDEX_OFF, "index=off"}, 417 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 418 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 419 {OPT_XINO_ON, "xino=on"}, 420 {OPT_XINO_OFF, "xino=off"}, 421 {OPT_XINO_AUTO, "xino=auto"}, 422 {OPT_METACOPY_ON, "metacopy=on"}, 423 {OPT_METACOPY_OFF, "metacopy=off"}, 424 {OPT_ERR, NULL} 425 }; 426 427 static char *ovl_next_opt(char **s) 428 { 429 char *sbegin = *s; 430 char *p; 431 432 if (sbegin == NULL) 433 return NULL; 434 435 for (p = sbegin; *p; p++) { 436 if (*p == '\\') { 437 p++; 438 if (!*p) 439 break; 440 } else if (*p == ',') { 441 *p = '\0'; 442 *s = p + 1; 443 return sbegin; 444 } 445 } 446 *s = NULL; 447 return sbegin; 448 } 449 450 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 451 { 452 if (strcmp(mode, "on") == 0) { 453 config->redirect_dir = true; 454 /* 455 * Does not make sense to have redirect creation without 456 * redirect following. 457 */ 458 config->redirect_follow = true; 459 } else if (strcmp(mode, "follow") == 0) { 460 config->redirect_follow = true; 461 } else if (strcmp(mode, "off") == 0) { 462 if (ovl_redirect_always_follow) 463 config->redirect_follow = true; 464 } else if (strcmp(mode, "nofollow") != 0) { 465 pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n", 466 mode); 467 return -EINVAL; 468 } 469 470 return 0; 471 } 472 473 static int ovl_parse_opt(char *opt, struct ovl_config *config) 474 { 475 char *p; 476 int err; 477 bool metacopy_opt = false, redirect_opt = false; 478 479 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 480 if (!config->redirect_mode) 481 return -ENOMEM; 482 483 while ((p = ovl_next_opt(&opt)) != NULL) { 484 int token; 485 substring_t args[MAX_OPT_ARGS]; 486 487 if (!*p) 488 continue; 489 490 token = match_token(p, ovl_tokens, args); 491 switch (token) { 492 case OPT_UPPERDIR: 493 kfree(config->upperdir); 494 config->upperdir = match_strdup(&args[0]); 495 if (!config->upperdir) 496 return -ENOMEM; 497 break; 498 499 case OPT_LOWERDIR: 500 kfree(config->lowerdir); 501 config->lowerdir = match_strdup(&args[0]); 502 if (!config->lowerdir) 503 return -ENOMEM; 504 break; 505 506 case OPT_WORKDIR: 507 kfree(config->workdir); 508 config->workdir = match_strdup(&args[0]); 509 if (!config->workdir) 510 return -ENOMEM; 511 break; 512 513 case OPT_DEFAULT_PERMISSIONS: 514 config->default_permissions = true; 515 break; 516 517 case OPT_REDIRECT_DIR: 518 kfree(config->redirect_mode); 519 config->redirect_mode = match_strdup(&args[0]); 520 if (!config->redirect_mode) 521 return -ENOMEM; 522 redirect_opt = true; 523 break; 524 525 case OPT_INDEX_ON: 526 config->index = true; 527 break; 528 529 case OPT_INDEX_OFF: 530 config->index = false; 531 break; 532 533 case OPT_NFS_EXPORT_ON: 534 config->nfs_export = true; 535 break; 536 537 case OPT_NFS_EXPORT_OFF: 538 config->nfs_export = false; 539 break; 540 541 case OPT_XINO_ON: 542 config->xino = OVL_XINO_ON; 543 break; 544 545 case OPT_XINO_OFF: 546 config->xino = OVL_XINO_OFF; 547 break; 548 549 case OPT_XINO_AUTO: 550 config->xino = OVL_XINO_AUTO; 551 break; 552 553 case OPT_METACOPY_ON: 554 config->metacopy = true; 555 metacopy_opt = true; 556 break; 557 558 case OPT_METACOPY_OFF: 559 config->metacopy = false; 560 break; 561 562 default: 563 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 564 return -EINVAL; 565 } 566 } 567 568 /* Workdir is useless in non-upper mount */ 569 if (!config->upperdir && config->workdir) { 570 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 571 config->workdir); 572 kfree(config->workdir); 573 config->workdir = NULL; 574 } 575 576 err = ovl_parse_redirect_mode(config, config->redirect_mode); 577 if (err) 578 return err; 579 580 /* 581 * This is to make the logic below simpler. It doesn't make any other 582 * difference, since config->redirect_dir is only used for upper. 583 */ 584 if (!config->upperdir && config->redirect_follow) 585 config->redirect_dir = true; 586 587 /* Resolve metacopy -> redirect_dir dependency */ 588 if (config->metacopy && !config->redirect_dir) { 589 if (metacopy_opt && redirect_opt) { 590 pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n", 591 config->redirect_mode); 592 return -EINVAL; 593 } 594 if (redirect_opt) { 595 /* 596 * There was an explicit redirect_dir=... that resulted 597 * in this conflict. 598 */ 599 pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n", 600 config->redirect_mode); 601 config->metacopy = false; 602 } else { 603 /* Automatically enable redirect otherwise. */ 604 config->redirect_follow = config->redirect_dir = true; 605 } 606 } 607 608 return 0; 609 } 610 611 #define OVL_WORKDIR_NAME "work" 612 #define OVL_INDEXDIR_NAME "index" 613 614 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 615 const char *name, bool persist) 616 { 617 struct inode *dir = ofs->workbasedir->d_inode; 618 struct vfsmount *mnt = ofs->upper_mnt; 619 struct dentry *work; 620 int err; 621 bool retried = false; 622 bool locked = false; 623 624 inode_lock_nested(dir, I_MUTEX_PARENT); 625 locked = true; 626 627 retry: 628 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 629 630 if (!IS_ERR(work)) { 631 struct iattr attr = { 632 .ia_valid = ATTR_MODE, 633 .ia_mode = S_IFDIR | 0, 634 }; 635 636 if (work->d_inode) { 637 err = -EEXIST; 638 if (retried) 639 goto out_dput; 640 641 if (persist) 642 goto out_unlock; 643 644 retried = true; 645 ovl_workdir_cleanup(dir, mnt, work, 0); 646 dput(work); 647 goto retry; 648 } 649 650 work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); 651 err = PTR_ERR(work); 652 if (IS_ERR(work)) 653 goto out_err; 654 655 /* 656 * Try to remove POSIX ACL xattrs from workdir. We are good if: 657 * 658 * a) success (there was a POSIX ACL xattr and was removed) 659 * b) -ENODATA (there was no POSIX ACL xattr) 660 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 661 * 662 * There are various other error values that could effectively 663 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 664 * if the xattr name is too long), but the set of filesystems 665 * allowed as upper are limited to "normal" ones, where checking 666 * for the above two errors is sufficient. 667 */ 668 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 669 if (err && err != -ENODATA && err != -EOPNOTSUPP) 670 goto out_dput; 671 672 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 673 if (err && err != -ENODATA && err != -EOPNOTSUPP) 674 goto out_dput; 675 676 /* Clear any inherited mode bits */ 677 inode_lock(work->d_inode); 678 err = notify_change(work, &attr, NULL); 679 inode_unlock(work->d_inode); 680 if (err) 681 goto out_dput; 682 } else { 683 err = PTR_ERR(work); 684 goto out_err; 685 } 686 out_unlock: 687 if (locked) 688 inode_unlock(dir); 689 690 return work; 691 692 out_dput: 693 dput(work); 694 out_err: 695 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 696 ofs->config.workdir, name, -err); 697 work = NULL; 698 goto out_unlock; 699 } 700 701 static void ovl_unescape(char *s) 702 { 703 char *d = s; 704 705 for (;; s++, d++) { 706 if (*s == '\\') 707 s++; 708 *d = *s; 709 if (!*s) 710 break; 711 } 712 } 713 714 static int ovl_mount_dir_noesc(const char *name, struct path *path) 715 { 716 int err = -EINVAL; 717 718 if (!*name) { 719 pr_err("overlayfs: empty lowerdir\n"); 720 goto out; 721 } 722 err = kern_path(name, LOOKUP_FOLLOW, path); 723 if (err) { 724 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 725 goto out; 726 } 727 err = -EINVAL; 728 if (ovl_dentry_weird(path->dentry)) { 729 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 730 goto out_put; 731 } 732 if (!d_is_dir(path->dentry)) { 733 pr_err("overlayfs: '%s' not a directory\n", name); 734 goto out_put; 735 } 736 return 0; 737 738 out_put: 739 path_put_init(path); 740 out: 741 return err; 742 } 743 744 static int ovl_mount_dir(const char *name, struct path *path) 745 { 746 int err = -ENOMEM; 747 char *tmp = kstrdup(name, GFP_KERNEL); 748 749 if (tmp) { 750 ovl_unescape(tmp); 751 err = ovl_mount_dir_noesc(tmp, path); 752 753 if (!err) 754 if (ovl_dentry_remote(path->dentry)) { 755 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 756 tmp); 757 path_put_init(path); 758 err = -EINVAL; 759 } 760 kfree(tmp); 761 } 762 return err; 763 } 764 765 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 766 const char *name) 767 { 768 struct kstatfs statfs; 769 int err = vfs_statfs(path, &statfs); 770 771 if (err) 772 pr_err("overlayfs: statfs failed on '%s'\n", name); 773 else 774 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 775 776 return err; 777 } 778 779 static int ovl_lower_dir(const char *name, struct path *path, 780 struct ovl_fs *ofs, int *stack_depth, bool *remote) 781 { 782 int fh_type; 783 int err; 784 785 err = ovl_mount_dir_noesc(name, path); 786 if (err) 787 goto out; 788 789 err = ovl_check_namelen(path, ofs, name); 790 if (err) 791 goto out_put; 792 793 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 794 795 if (ovl_dentry_remote(path->dentry)) 796 *remote = true; 797 798 /* 799 * The inodes index feature and NFS export need to encode and decode 800 * file handles, so they require that all layers support them. 801 */ 802 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 803 if ((ofs->config.nfs_export || 804 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 805 ofs->config.index = false; 806 ofs->config.nfs_export = false; 807 pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 808 name); 809 } 810 811 /* Check if lower fs has 32bit inode numbers */ 812 if (fh_type != FILEID_INO32_GEN) 813 ofs->xino_bits = 0; 814 815 return 0; 816 817 out_put: 818 path_put_init(path); 819 out: 820 return err; 821 } 822 823 /* Workdir should not be subdir of upperdir and vice versa */ 824 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 825 { 826 bool ok = false; 827 828 if (workdir != upperdir) { 829 ok = (lock_rename(workdir, upperdir) == NULL); 830 unlock_rename(workdir, upperdir); 831 } 832 return ok; 833 } 834 835 static unsigned int ovl_split_lowerdirs(char *str) 836 { 837 unsigned int ctr = 1; 838 char *s, *d; 839 840 for (s = d = str;; s++, d++) { 841 if (*s == '\\') { 842 s++; 843 } else if (*s == ':') { 844 *d = '\0'; 845 ctr++; 846 continue; 847 } 848 *d = *s; 849 if (!*s) 850 break; 851 } 852 return ctr; 853 } 854 855 static int __maybe_unused 856 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 857 struct dentry *dentry, struct inode *inode, 858 const char *name, void *buffer, size_t size) 859 { 860 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 861 } 862 863 static int __maybe_unused 864 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 865 struct dentry *dentry, struct inode *inode, 866 const char *name, const void *value, 867 size_t size, int flags) 868 { 869 struct dentry *workdir = ovl_workdir(dentry); 870 struct inode *realinode = ovl_inode_real(inode); 871 struct posix_acl *acl = NULL; 872 int err; 873 874 /* Check that everything is OK before copy-up */ 875 if (value) { 876 acl = posix_acl_from_xattr(&init_user_ns, value, size); 877 if (IS_ERR(acl)) 878 return PTR_ERR(acl); 879 } 880 err = -EOPNOTSUPP; 881 if (!IS_POSIXACL(d_inode(workdir))) 882 goto out_acl_release; 883 if (!realinode->i_op->set_acl) 884 goto out_acl_release; 885 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 886 err = acl ? -EACCES : 0; 887 goto out_acl_release; 888 } 889 err = -EPERM; 890 if (!inode_owner_or_capable(inode)) 891 goto out_acl_release; 892 893 posix_acl_release(acl); 894 895 /* 896 * Check if sgid bit needs to be cleared (actual setacl operation will 897 * be done with mounter's capabilities and so that won't do it for us). 898 */ 899 if (unlikely(inode->i_mode & S_ISGID) && 900 handler->flags == ACL_TYPE_ACCESS && 901 !in_group_p(inode->i_gid) && 902 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 903 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 904 905 err = ovl_setattr(dentry, &iattr); 906 if (err) 907 return err; 908 } 909 910 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 911 if (!err) 912 ovl_copyattr(ovl_inode_real(inode), inode); 913 914 return err; 915 916 out_acl_release: 917 posix_acl_release(acl); 918 return err; 919 } 920 921 static int ovl_own_xattr_get(const struct xattr_handler *handler, 922 struct dentry *dentry, struct inode *inode, 923 const char *name, void *buffer, size_t size) 924 { 925 return -EOPNOTSUPP; 926 } 927 928 static int ovl_own_xattr_set(const struct xattr_handler *handler, 929 struct dentry *dentry, struct inode *inode, 930 const char *name, const void *value, 931 size_t size, int flags) 932 { 933 return -EOPNOTSUPP; 934 } 935 936 static int ovl_other_xattr_get(const struct xattr_handler *handler, 937 struct dentry *dentry, struct inode *inode, 938 const char *name, void *buffer, size_t size) 939 { 940 return ovl_xattr_get(dentry, inode, name, buffer, size); 941 } 942 943 static int ovl_other_xattr_set(const struct xattr_handler *handler, 944 struct dentry *dentry, struct inode *inode, 945 const char *name, const void *value, 946 size_t size, int flags) 947 { 948 return ovl_xattr_set(dentry, inode, name, value, size, flags); 949 } 950 951 static const struct xattr_handler __maybe_unused 952 ovl_posix_acl_access_xattr_handler = { 953 .name = XATTR_NAME_POSIX_ACL_ACCESS, 954 .flags = ACL_TYPE_ACCESS, 955 .get = ovl_posix_acl_xattr_get, 956 .set = ovl_posix_acl_xattr_set, 957 }; 958 959 static const struct xattr_handler __maybe_unused 960 ovl_posix_acl_default_xattr_handler = { 961 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 962 .flags = ACL_TYPE_DEFAULT, 963 .get = ovl_posix_acl_xattr_get, 964 .set = ovl_posix_acl_xattr_set, 965 }; 966 967 static const struct xattr_handler ovl_own_xattr_handler = { 968 .prefix = OVL_XATTR_PREFIX, 969 .get = ovl_own_xattr_get, 970 .set = ovl_own_xattr_set, 971 }; 972 973 static const struct xattr_handler ovl_other_xattr_handler = { 974 .prefix = "", /* catch all */ 975 .get = ovl_other_xattr_get, 976 .set = ovl_other_xattr_set, 977 }; 978 979 static const struct xattr_handler *ovl_xattr_handlers[] = { 980 #ifdef CONFIG_FS_POSIX_ACL 981 &ovl_posix_acl_access_xattr_handler, 982 &ovl_posix_acl_default_xattr_handler, 983 #endif 984 &ovl_own_xattr_handler, 985 &ovl_other_xattr_handler, 986 NULL 987 }; 988 989 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 990 struct inode **ptrap, const char *name) 991 { 992 struct inode *trap; 993 int err; 994 995 trap = ovl_get_trap_inode(sb, dir); 996 err = PTR_ERR_OR_ZERO(trap); 997 if (err) { 998 if (err == -ELOOP) 999 pr_err("overlayfs: conflicting %s path\n", name); 1000 return err; 1001 } 1002 1003 *ptrap = trap; 1004 return 0; 1005 } 1006 1007 /* 1008 * Determine how we treat concurrent use of upperdir/workdir based on the 1009 * index feature. This is papering over mount leaks of container runtimes, 1010 * for example, an old overlay mount is leaked and now its upperdir is 1011 * attempted to be used as a lower layer in a new overlay mount. 1012 */ 1013 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 1014 { 1015 if (ofs->config.index) { 1016 pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 1017 name); 1018 return -EBUSY; 1019 } else { 1020 pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 1021 name); 1022 return 0; 1023 } 1024 } 1025 1026 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 1027 struct path *upperpath) 1028 { 1029 struct vfsmount *upper_mnt; 1030 int err; 1031 1032 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 1033 if (err) 1034 goto out; 1035 1036 /* Upper fs should not be r/o */ 1037 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 1038 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 1039 err = -EINVAL; 1040 goto out; 1041 } 1042 1043 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 1044 if (err) 1045 goto out; 1046 1047 err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap, 1048 "upperdir"); 1049 if (err) 1050 goto out; 1051 1052 upper_mnt = clone_private_mount(upperpath); 1053 err = PTR_ERR(upper_mnt); 1054 if (IS_ERR(upper_mnt)) { 1055 pr_err("overlayfs: failed to clone upperpath\n"); 1056 goto out; 1057 } 1058 1059 /* Don't inherit atime flags */ 1060 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1061 ofs->upper_mnt = upper_mnt; 1062 1063 if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) { 1064 ofs->upperdir_locked = true; 1065 } else { 1066 err = ovl_report_in_use(ofs, "upperdir"); 1067 if (err) 1068 goto out; 1069 } 1070 1071 err = 0; 1072 out: 1073 return err; 1074 } 1075 1076 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1077 struct path *workpath) 1078 { 1079 struct vfsmount *mnt = ofs->upper_mnt; 1080 struct dentry *temp; 1081 int fh_type; 1082 int err; 1083 1084 err = mnt_want_write(mnt); 1085 if (err) 1086 return err; 1087 1088 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1089 if (!ofs->workdir) 1090 goto out; 1091 1092 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1093 if (err) 1094 goto out; 1095 1096 /* 1097 * Upper should support d_type, else whiteouts are visible. Given 1098 * workdir and upper are on same fs, we can do iterate_dir() on 1099 * workdir. This check requires successful creation of workdir in 1100 * previous step. 1101 */ 1102 err = ovl_check_d_type_supported(workpath); 1103 if (err < 0) 1104 goto out; 1105 1106 /* 1107 * We allowed this configuration and don't want to break users over 1108 * kernel upgrade. So warn instead of erroring out. 1109 */ 1110 if (!err) 1111 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 1112 1113 /* Check if upper/work fs supports O_TMPFILE */ 1114 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 1115 ofs->tmpfile = !IS_ERR(temp); 1116 if (ofs->tmpfile) 1117 dput(temp); 1118 else 1119 pr_warn("overlayfs: upper fs does not support tmpfile.\n"); 1120 1121 /* 1122 * Check if upper/work fs supports trusted.overlay.* xattr 1123 */ 1124 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 1125 if (err) { 1126 ofs->noxattr = true; 1127 ofs->config.index = false; 1128 ofs->config.metacopy = false; 1129 pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); 1130 err = 0; 1131 } else { 1132 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1133 } 1134 1135 /* Check if upper/work fs supports file handles */ 1136 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1137 if (ofs->config.index && !fh_type) { 1138 ofs->config.index = false; 1139 pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n"); 1140 } 1141 1142 /* Check if upper fs has 32bit inode numbers */ 1143 if (fh_type != FILEID_INO32_GEN) 1144 ofs->xino_bits = 0; 1145 1146 /* NFS export of r/w mount depends on index */ 1147 if (ofs->config.nfs_export && !ofs->config.index) { 1148 pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1149 ofs->config.nfs_export = false; 1150 } 1151 out: 1152 mnt_drop_write(mnt); 1153 return err; 1154 } 1155 1156 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1157 struct path *upperpath) 1158 { 1159 int err; 1160 struct path workpath = { }; 1161 1162 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1163 if (err) 1164 goto out; 1165 1166 err = -EINVAL; 1167 if (upperpath->mnt != workpath.mnt) { 1168 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 1169 goto out; 1170 } 1171 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1172 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 1173 goto out; 1174 } 1175 1176 ofs->workbasedir = dget(workpath.dentry); 1177 1178 if (ovl_inuse_trylock(ofs->workbasedir)) { 1179 ofs->workdir_locked = true; 1180 } else { 1181 err = ovl_report_in_use(ofs, "workdir"); 1182 if (err) 1183 goto out; 1184 } 1185 1186 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1187 "workdir"); 1188 if (err) 1189 goto out; 1190 1191 err = ovl_make_workdir(sb, ofs, &workpath); 1192 1193 out: 1194 path_put(&workpath); 1195 1196 return err; 1197 } 1198 1199 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1200 struct ovl_entry *oe, struct path *upperpath) 1201 { 1202 struct vfsmount *mnt = ofs->upper_mnt; 1203 int err; 1204 1205 err = mnt_want_write(mnt); 1206 if (err) 1207 return err; 1208 1209 /* Verify lower root is upper root origin */ 1210 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1211 true); 1212 if (err) { 1213 pr_err("overlayfs: failed to verify upper root origin\n"); 1214 goto out; 1215 } 1216 1217 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1218 if (ofs->indexdir) { 1219 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1220 "indexdir"); 1221 if (err) 1222 goto out; 1223 1224 /* 1225 * Verify upper root is exclusively associated with index dir. 1226 * Older kernels stored upper fh in "trusted.overlay.origin" 1227 * xattr. If that xattr exists, verify that it is a match to 1228 * upper dir file handle. In any case, verify or set xattr 1229 * "trusted.overlay.upper" to indicate that index may have 1230 * directory entries. 1231 */ 1232 if (ovl_check_origin_xattr(ofs->indexdir)) { 1233 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, 1234 upperpath->dentry, true, false); 1235 if (err) 1236 pr_err("overlayfs: failed to verify index dir 'origin' xattr\n"); 1237 } 1238 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); 1239 if (err) 1240 pr_err("overlayfs: failed to verify index dir 'upper' xattr\n"); 1241 1242 /* Cleanup bad/stale/orphan index entries */ 1243 if (!err) 1244 err = ovl_indexdir_cleanup(ofs); 1245 } 1246 if (err || !ofs->indexdir) 1247 pr_warn("overlayfs: try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1248 1249 out: 1250 mnt_drop_write(mnt); 1251 return err; 1252 } 1253 1254 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1255 { 1256 unsigned int i; 1257 1258 if (!ofs->config.nfs_export && !ofs->upper_mnt) 1259 return true; 1260 1261 for (i = 0; i < ofs->numlowerfs; i++) { 1262 /* 1263 * We use uuid to associate an overlay lower file handle with a 1264 * lower layer, so we can accept lower fs with null uuid as long 1265 * as all lower layers with null uuid are on the same fs. 1266 * if we detect multiple lower fs with the same uuid, we 1267 * disable lower file handle decoding on all of them. 1268 */ 1269 if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) { 1270 ofs->lower_fs[i].bad_uuid = true; 1271 return false; 1272 } 1273 } 1274 return true; 1275 } 1276 1277 /* Get a unique fsid for the layer */ 1278 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1279 { 1280 struct super_block *sb = path->mnt->mnt_sb; 1281 unsigned int i; 1282 dev_t dev; 1283 int err; 1284 bool bad_uuid = false; 1285 1286 /* fsid 0 is reserved for upper fs even with non upper overlay */ 1287 if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb) 1288 return 0; 1289 1290 for (i = 0; i < ofs->numlowerfs; i++) { 1291 if (ofs->lower_fs[i].sb == sb) 1292 return i + 1; 1293 } 1294 1295 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1296 bad_uuid = true; 1297 if (ofs->config.index || ofs->config.nfs_export) { 1298 ofs->config.index = false; 1299 ofs->config.nfs_export = false; 1300 pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", 1301 uuid_is_null(&sb->s_uuid) ? "null" : 1302 "conflicting", 1303 path->dentry); 1304 } 1305 } 1306 1307 err = get_anon_bdev(&dev); 1308 if (err) { 1309 pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n"); 1310 return err; 1311 } 1312 1313 ofs->lower_fs[ofs->numlowerfs].sb = sb; 1314 ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev; 1315 ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid; 1316 ofs->numlowerfs++; 1317 1318 return ofs->numlowerfs; 1319 } 1320 1321 static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs, 1322 struct path *stack, unsigned int numlower) 1323 { 1324 int err; 1325 unsigned int i; 1326 1327 err = -ENOMEM; 1328 ofs->lower_layers = kcalloc(numlower, sizeof(struct ovl_layer), 1329 GFP_KERNEL); 1330 if (ofs->lower_layers == NULL) 1331 goto out; 1332 1333 ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb), 1334 GFP_KERNEL); 1335 if (ofs->lower_fs == NULL) 1336 goto out; 1337 1338 for (i = 0; i < numlower; i++) { 1339 struct vfsmount *mnt; 1340 struct inode *trap; 1341 int fsid; 1342 1343 err = fsid = ovl_get_fsid(ofs, &stack[i]); 1344 if (err < 0) 1345 goto out; 1346 1347 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); 1348 if (err) 1349 goto out; 1350 1351 if (ovl_is_inuse(stack[i].dentry)) { 1352 err = ovl_report_in_use(ofs, "lowerdir"); 1353 if (err) 1354 goto out; 1355 } 1356 1357 mnt = clone_private_mount(&stack[i]); 1358 err = PTR_ERR(mnt); 1359 if (IS_ERR(mnt)) { 1360 pr_err("overlayfs: failed to clone lowerpath\n"); 1361 iput(trap); 1362 goto out; 1363 } 1364 1365 /* 1366 * Make lower layers R/O. That way fchmod/fchown on lower file 1367 * will fail instead of modifying lower fs. 1368 */ 1369 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1370 1371 ofs->lower_layers[ofs->numlower].trap = trap; 1372 ofs->lower_layers[ofs->numlower].mnt = mnt; 1373 ofs->lower_layers[ofs->numlower].idx = i + 1; 1374 ofs->lower_layers[ofs->numlower].fsid = fsid; 1375 if (fsid) { 1376 ofs->lower_layers[ofs->numlower].fs = 1377 &ofs->lower_fs[fsid - 1]; 1378 } 1379 ofs->numlower++; 1380 } 1381 1382 /* 1383 * When all layers on same fs, overlay can use real inode numbers. 1384 * With mount option "xino=on", mounter declares that there are enough 1385 * free high bits in underlying fs to hold the unique fsid. 1386 * If overlayfs does encounter underlying inodes using the high xino 1387 * bits reserved for fsid, it emits a warning and uses the original 1388 * inode number. 1389 */ 1390 if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) { 1391 ofs->xino_bits = 0; 1392 ofs->config.xino = OVL_XINO_OFF; 1393 } else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) { 1394 /* 1395 * This is a roundup of number of bits needed for numlowerfs+1 1396 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for 1397 * upper fs even with non upper overlay. 1398 */ 1399 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); 1400 ofs->xino_bits = ilog2(ofs->numlowerfs) + 1; 1401 } 1402 1403 if (ofs->xino_bits) { 1404 pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n", 1405 ofs->xino_bits); 1406 } 1407 1408 err = 0; 1409 out: 1410 return err; 1411 } 1412 1413 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1414 struct ovl_fs *ofs) 1415 { 1416 int err; 1417 char *lowertmp, *lower; 1418 struct path *stack = NULL; 1419 unsigned int stacklen, numlower = 0, i; 1420 bool remote = false; 1421 struct ovl_entry *oe; 1422 1423 err = -ENOMEM; 1424 lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1425 if (!lowertmp) 1426 goto out_err; 1427 1428 err = -EINVAL; 1429 stacklen = ovl_split_lowerdirs(lowertmp); 1430 if (stacklen > OVL_MAX_STACK) { 1431 pr_err("overlayfs: too many lower directories, limit is %d\n", 1432 OVL_MAX_STACK); 1433 goto out_err; 1434 } else if (!ofs->config.upperdir && stacklen == 1) { 1435 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1436 goto out_err; 1437 } else if (!ofs->config.upperdir && ofs->config.nfs_export && 1438 ofs->config.redirect_follow) { 1439 pr_warn("overlayfs: NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 1440 ofs->config.nfs_export = false; 1441 } 1442 1443 err = -ENOMEM; 1444 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1445 if (!stack) 1446 goto out_err; 1447 1448 err = -EINVAL; 1449 lower = lowertmp; 1450 for (numlower = 0; numlower < stacklen; numlower++) { 1451 err = ovl_lower_dir(lower, &stack[numlower], ofs, 1452 &sb->s_stack_depth, &remote); 1453 if (err) 1454 goto out_err; 1455 1456 lower = strchr(lower, '\0') + 1; 1457 } 1458 1459 err = -EINVAL; 1460 sb->s_stack_depth++; 1461 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1462 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 1463 goto out_err; 1464 } 1465 1466 err = ovl_get_lower_layers(sb, ofs, stack, numlower); 1467 if (err) 1468 goto out_err; 1469 1470 err = -ENOMEM; 1471 oe = ovl_alloc_entry(numlower); 1472 if (!oe) 1473 goto out_err; 1474 1475 for (i = 0; i < numlower; i++) { 1476 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1477 oe->lowerstack[i].layer = &ofs->lower_layers[i]; 1478 } 1479 1480 if (remote) 1481 sb->s_d_op = &ovl_reval_dentry_operations; 1482 else 1483 sb->s_d_op = &ovl_dentry_operations; 1484 1485 out: 1486 for (i = 0; i < numlower; i++) 1487 path_put(&stack[i]); 1488 kfree(stack); 1489 kfree(lowertmp); 1490 1491 return oe; 1492 1493 out_err: 1494 oe = ERR_PTR(err); 1495 goto out; 1496 } 1497 1498 /* 1499 * Check if this layer root is a descendant of: 1500 * - another layer of this overlayfs instance 1501 * - upper/work dir of any overlayfs instance 1502 */ 1503 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1504 struct dentry *dentry, const char *name) 1505 { 1506 struct dentry *next = dentry, *parent; 1507 int err = 0; 1508 1509 if (!dentry) 1510 return 0; 1511 1512 parent = dget_parent(next); 1513 1514 /* Walk back ancestors to root (inclusive) looking for traps */ 1515 while (!err && parent != next) { 1516 if (ovl_lookup_trap_inode(sb, parent)) { 1517 err = -ELOOP; 1518 pr_err("overlayfs: overlapping %s path\n", name); 1519 } else if (ovl_is_inuse(parent)) { 1520 err = ovl_report_in_use(ofs, name); 1521 } 1522 next = parent; 1523 parent = dget_parent(next); 1524 dput(next); 1525 } 1526 1527 dput(parent); 1528 1529 return err; 1530 } 1531 1532 /* 1533 * Check if any of the layers or work dirs overlap. 1534 */ 1535 static int ovl_check_overlapping_layers(struct super_block *sb, 1536 struct ovl_fs *ofs) 1537 { 1538 int i, err; 1539 1540 if (ofs->upper_mnt) { 1541 err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root, 1542 "upperdir"); 1543 if (err) 1544 return err; 1545 1546 /* 1547 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1548 * this instance and covers overlapping work and index dirs, 1549 * unless work or index dir have been moved since created inside 1550 * workbasedir. In that case, we already have their traps in 1551 * inode cache and we will catch that case on lookup. 1552 */ 1553 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); 1554 if (err) 1555 return err; 1556 } 1557 1558 for (i = 0; i < ofs->numlower; i++) { 1559 err = ovl_check_layer(sb, ofs, 1560 ofs->lower_layers[i].mnt->mnt_root, 1561 "lowerdir"); 1562 if (err) 1563 return err; 1564 } 1565 1566 return 0; 1567 } 1568 1569 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1570 { 1571 struct path upperpath = { }; 1572 struct dentry *root_dentry; 1573 struct ovl_entry *oe; 1574 struct ovl_fs *ofs; 1575 struct cred *cred; 1576 int err; 1577 1578 err = -ENOMEM; 1579 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1580 if (!ofs) 1581 goto out; 1582 1583 ofs->creator_cred = cred = prepare_creds(); 1584 if (!cred) 1585 goto out_err; 1586 1587 ofs->config.index = ovl_index_def; 1588 ofs->config.nfs_export = ovl_nfs_export_def; 1589 ofs->config.xino = ovl_xino_def(); 1590 ofs->config.metacopy = ovl_metacopy_def; 1591 err = ovl_parse_opt((char *) data, &ofs->config); 1592 if (err) 1593 goto out_err; 1594 1595 err = -EINVAL; 1596 if (!ofs->config.lowerdir) { 1597 if (!silent) 1598 pr_err("overlayfs: missing 'lowerdir'\n"); 1599 goto out_err; 1600 } 1601 1602 sb->s_stack_depth = 0; 1603 sb->s_maxbytes = MAX_LFS_FILESIZE; 1604 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ 1605 if (ofs->config.xino != OVL_XINO_OFF) 1606 ofs->xino_bits = BITS_PER_LONG - 32; 1607 1608 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1609 sb->s_op = &ovl_super_operations; 1610 1611 if (ofs->config.upperdir) { 1612 if (!ofs->config.workdir) { 1613 pr_err("overlayfs: missing 'workdir'\n"); 1614 goto out_err; 1615 } 1616 1617 err = ovl_get_upper(sb, ofs, &upperpath); 1618 if (err) 1619 goto out_err; 1620 1621 err = ovl_get_workdir(sb, ofs, &upperpath); 1622 if (err) 1623 goto out_err; 1624 1625 if (!ofs->workdir) 1626 sb->s_flags |= SB_RDONLY; 1627 1628 sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; 1629 sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; 1630 1631 } 1632 oe = ovl_get_lowerstack(sb, ofs); 1633 err = PTR_ERR(oe); 1634 if (IS_ERR(oe)) 1635 goto out_err; 1636 1637 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1638 if (!ofs->upper_mnt) 1639 sb->s_flags |= SB_RDONLY; 1640 1641 if (!(ovl_force_readonly(ofs)) && ofs->config.index) { 1642 err = ovl_get_indexdir(sb, ofs, oe, &upperpath); 1643 if (err) 1644 goto out_free_oe; 1645 1646 /* Force r/o mount with no index dir */ 1647 if (!ofs->indexdir) { 1648 dput(ofs->workdir); 1649 ofs->workdir = NULL; 1650 sb->s_flags |= SB_RDONLY; 1651 } 1652 1653 } 1654 1655 err = ovl_check_overlapping_layers(sb, ofs); 1656 if (err) 1657 goto out_free_oe; 1658 1659 /* Show index=off in /proc/mounts for forced r/o mount */ 1660 if (!ofs->indexdir) { 1661 ofs->config.index = false; 1662 if (ofs->upper_mnt && ofs->config.nfs_export) { 1663 pr_warn("overlayfs: NFS export requires an index dir, falling back to nfs_export=off.\n"); 1664 ofs->config.nfs_export = false; 1665 } 1666 } 1667 1668 if (ofs->config.metacopy && ofs->config.nfs_export) { 1669 pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1670 ofs->config.nfs_export = false; 1671 } 1672 1673 if (ofs->config.nfs_export) 1674 sb->s_export_op = &ovl_export_operations; 1675 1676 /* Never override disk quota limits or use reserved space */ 1677 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1678 1679 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1680 sb->s_xattr = ovl_xattr_handlers; 1681 sb->s_fs_info = ofs; 1682 sb->s_flags |= SB_POSIXACL; 1683 1684 err = -ENOMEM; 1685 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1686 if (!root_dentry) 1687 goto out_free_oe; 1688 1689 root_dentry->d_fsdata = oe; 1690 1691 mntput(upperpath.mnt); 1692 if (upperpath.dentry) { 1693 ovl_dentry_set_upper_alias(root_dentry); 1694 if (ovl_is_impuredir(upperpath.dentry)) 1695 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1696 } 1697 1698 /* Root is always merge -> can have whiteouts */ 1699 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1700 ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); 1701 ovl_set_upperdata(d_inode(root_dentry)); 1702 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1703 ovl_dentry_lower(root_dentry), NULL); 1704 1705 sb->s_root = root_dentry; 1706 1707 return 0; 1708 1709 out_free_oe: 1710 ovl_entry_stack_free(oe); 1711 kfree(oe); 1712 out_err: 1713 path_put(&upperpath); 1714 ovl_free_fs(ofs); 1715 out: 1716 return err; 1717 } 1718 1719 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1720 const char *dev_name, void *raw_data) 1721 { 1722 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1723 } 1724 1725 static struct file_system_type ovl_fs_type = { 1726 .owner = THIS_MODULE, 1727 .name = "overlay", 1728 .mount = ovl_mount, 1729 .kill_sb = kill_anon_super, 1730 }; 1731 MODULE_ALIAS_FS("overlay"); 1732 1733 static void ovl_inode_init_once(void *foo) 1734 { 1735 struct ovl_inode *oi = foo; 1736 1737 inode_init_once(&oi->vfs_inode); 1738 } 1739 1740 static int __init ovl_init(void) 1741 { 1742 int err; 1743 1744 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1745 sizeof(struct ovl_inode), 0, 1746 (SLAB_RECLAIM_ACCOUNT| 1747 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1748 ovl_inode_init_once); 1749 if (ovl_inode_cachep == NULL) 1750 return -ENOMEM; 1751 1752 err = register_filesystem(&ovl_fs_type); 1753 if (err) 1754 kmem_cache_destroy(ovl_inode_cachep); 1755 1756 return err; 1757 } 1758 1759 static void __exit ovl_exit(void) 1760 { 1761 unregister_filesystem(&ovl_fs_type); 1762 1763 /* 1764 * Make sure all delayed rcu free inodes are flushed before we 1765 * destroy cache. 1766 */ 1767 rcu_barrier(); 1768 kmem_cache_destroy(ovl_inode_cachep); 1769 1770 } 1771 1772 module_init(ovl_init); 1773 module_exit(ovl_exit); 1774