1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include "overlayfs.h" 19 20 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 21 MODULE_DESCRIPTION("Overlay filesystem"); 22 MODULE_LICENSE("GPL"); 23 24 25 struct ovl_dir_cache; 26 27 #define OVL_MAX_STACK 500 28 29 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 30 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 31 MODULE_PARM_DESC(redirect_dir, 32 "Default to on or off for the redirect_dir feature"); 33 34 static bool ovl_redirect_always_follow = 35 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 36 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 37 bool, 0644); 38 MODULE_PARM_DESC(redirect_always_follow, 39 "Follow redirects even if redirect_dir feature is turned off"); 40 41 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 42 module_param_named(index, ovl_index_def, bool, 0644); 43 MODULE_PARM_DESC(index, 44 "Default to on or off for the inodes index feature"); 45 46 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 47 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 48 MODULE_PARM_DESC(nfs_export, 49 "Default to on or off for the NFS export feature"); 50 51 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 52 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 53 MODULE_PARM_DESC(xino_auto, 54 "Auto enable xino feature"); 55 56 static void ovl_entry_stack_free(struct ovl_entry *oe) 57 { 58 unsigned int i; 59 60 for (i = 0; i < oe->numlower; i++) 61 dput(oe->lowerstack[i].dentry); 62 } 63 64 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 65 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 66 MODULE_PARM_DESC(metacopy, 67 "Default to on or off for the metadata only copy up feature"); 68 69 static void ovl_dentry_release(struct dentry *dentry) 70 { 71 struct ovl_entry *oe = dentry->d_fsdata; 72 73 if (oe) { 74 ovl_entry_stack_free(oe); 75 kfree_rcu(oe, rcu); 76 } 77 } 78 79 static struct dentry *ovl_d_real(struct dentry *dentry, 80 const struct inode *inode) 81 { 82 struct dentry *real; 83 84 /* It's an overlay file */ 85 if (inode && d_inode(dentry) == inode) 86 return dentry; 87 88 if (!d_is_reg(dentry)) { 89 if (!inode || inode == d_inode(dentry)) 90 return dentry; 91 goto bug; 92 } 93 94 real = ovl_dentry_upper(dentry); 95 if (real && (inode == d_inode(real))) 96 return real; 97 98 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 99 return real; 100 101 real = ovl_dentry_lowerdata(dentry); 102 if (!real) 103 goto bug; 104 105 /* Handle recursion */ 106 real = d_real(real, inode); 107 108 if (!inode || inode == d_inode(real)) 109 return real; 110 bug: 111 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 112 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 113 return dentry; 114 } 115 116 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 117 { 118 struct ovl_entry *oe = dentry->d_fsdata; 119 unsigned int i; 120 int ret = 1; 121 122 for (i = 0; i < oe->numlower; i++) { 123 struct dentry *d = oe->lowerstack[i].dentry; 124 125 if (d->d_flags & DCACHE_OP_REVALIDATE) { 126 ret = d->d_op->d_revalidate(d, flags); 127 if (ret < 0) 128 return ret; 129 if (!ret) { 130 if (!(flags & LOOKUP_RCU)) 131 d_invalidate(d); 132 return -ESTALE; 133 } 134 } 135 } 136 return 1; 137 } 138 139 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 140 { 141 struct ovl_entry *oe = dentry->d_fsdata; 142 unsigned int i; 143 int ret = 1; 144 145 for (i = 0; i < oe->numlower; i++) { 146 struct dentry *d = oe->lowerstack[i].dentry; 147 148 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 149 ret = d->d_op->d_weak_revalidate(d, flags); 150 if (ret <= 0) 151 break; 152 } 153 } 154 return ret; 155 } 156 157 static const struct dentry_operations ovl_dentry_operations = { 158 .d_release = ovl_dentry_release, 159 .d_real = ovl_d_real, 160 }; 161 162 static const struct dentry_operations ovl_reval_dentry_operations = { 163 .d_release = ovl_dentry_release, 164 .d_real = ovl_d_real, 165 .d_revalidate = ovl_dentry_revalidate, 166 .d_weak_revalidate = ovl_dentry_weak_revalidate, 167 }; 168 169 static struct kmem_cache *ovl_inode_cachep; 170 171 static struct inode *ovl_alloc_inode(struct super_block *sb) 172 { 173 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 174 175 if (!oi) 176 return NULL; 177 178 oi->cache = NULL; 179 oi->redirect = NULL; 180 oi->version = 0; 181 oi->flags = 0; 182 oi->__upperdentry = NULL; 183 oi->lower = NULL; 184 oi->lowerdata = NULL; 185 mutex_init(&oi->lock); 186 187 return &oi->vfs_inode; 188 } 189 190 static void ovl_free_inode(struct inode *inode) 191 { 192 struct ovl_inode *oi = OVL_I(inode); 193 194 kfree(oi->redirect); 195 mutex_destroy(&oi->lock); 196 kmem_cache_free(ovl_inode_cachep, oi); 197 } 198 199 static void ovl_destroy_inode(struct inode *inode) 200 { 201 struct ovl_inode *oi = OVL_I(inode); 202 203 dput(oi->__upperdentry); 204 iput(oi->lower); 205 if (S_ISDIR(inode->i_mode)) 206 ovl_dir_cache_free(inode); 207 else 208 iput(oi->lowerdata); 209 } 210 211 static void ovl_free_fs(struct ovl_fs *ofs) 212 { 213 unsigned i; 214 215 iput(ofs->workbasedir_trap); 216 iput(ofs->indexdir_trap); 217 iput(ofs->workdir_trap); 218 iput(ofs->upperdir_trap); 219 dput(ofs->indexdir); 220 dput(ofs->workdir); 221 if (ofs->workdir_locked) 222 ovl_inuse_unlock(ofs->workbasedir); 223 dput(ofs->workbasedir); 224 if (ofs->upperdir_locked) 225 ovl_inuse_unlock(ofs->upper_mnt->mnt_root); 226 mntput(ofs->upper_mnt); 227 for (i = 1; i < ofs->numlayer; i++) { 228 iput(ofs->layers[i].trap); 229 mntput(ofs->layers[i].mnt); 230 } 231 kfree(ofs->layers); 232 for (i = 0; i < ofs->numfs; i++) 233 free_anon_bdev(ofs->fs[i].pseudo_dev); 234 kfree(ofs->fs); 235 236 kfree(ofs->config.lowerdir); 237 kfree(ofs->config.upperdir); 238 kfree(ofs->config.workdir); 239 kfree(ofs->config.redirect_mode); 240 if (ofs->creator_cred) 241 put_cred(ofs->creator_cred); 242 kfree(ofs); 243 } 244 245 static void ovl_put_super(struct super_block *sb) 246 { 247 struct ovl_fs *ofs = sb->s_fs_info; 248 249 ovl_free_fs(ofs); 250 } 251 252 /* Sync real dirty inodes in upper filesystem (if it exists) */ 253 static int ovl_sync_fs(struct super_block *sb, int wait) 254 { 255 struct ovl_fs *ofs = sb->s_fs_info; 256 struct super_block *upper_sb; 257 int ret; 258 259 if (!ofs->upper_mnt) 260 return 0; 261 262 /* 263 * If this is a sync(2) call or an emergency sync, all the super blocks 264 * will be iterated, including upper_sb, so no need to do anything. 265 * 266 * If this is a syncfs(2) call, then we do need to call 267 * sync_filesystem() on upper_sb, but enough if we do it when being 268 * called with wait == 1. 269 */ 270 if (!wait) 271 return 0; 272 273 upper_sb = ofs->upper_mnt->mnt_sb; 274 275 down_read(&upper_sb->s_umount); 276 ret = sync_filesystem(upper_sb); 277 up_read(&upper_sb->s_umount); 278 279 return ret; 280 } 281 282 /** 283 * ovl_statfs 284 * @sb: The overlayfs super block 285 * @buf: The struct kstatfs to fill in with stats 286 * 287 * Get the filesystem statistics. As writes always target the upper layer 288 * filesystem pass the statfs to the upper filesystem (if it exists) 289 */ 290 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 291 { 292 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 293 struct dentry *root_dentry = dentry->d_sb->s_root; 294 struct path path; 295 int err; 296 297 ovl_path_real(root_dentry, &path); 298 299 err = vfs_statfs(&path, buf); 300 if (!err) { 301 buf->f_namelen = ofs->namelen; 302 buf->f_type = OVERLAYFS_SUPER_MAGIC; 303 } 304 305 return err; 306 } 307 308 /* Will this overlay be forced to mount/remount ro? */ 309 static bool ovl_force_readonly(struct ovl_fs *ofs) 310 { 311 return (!ofs->upper_mnt || !ofs->workdir); 312 } 313 314 static const char *ovl_redirect_mode_def(void) 315 { 316 return ovl_redirect_dir_def ? "on" : "off"; 317 } 318 319 enum { 320 OVL_XINO_OFF, 321 OVL_XINO_AUTO, 322 OVL_XINO_ON, 323 }; 324 325 static const char * const ovl_xino_str[] = { 326 "off", 327 "auto", 328 "on", 329 }; 330 331 static inline int ovl_xino_def(void) 332 { 333 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 334 } 335 336 /** 337 * ovl_show_options 338 * 339 * Prints the mount options for a given superblock. 340 * Returns zero; does not fail. 341 */ 342 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 343 { 344 struct super_block *sb = dentry->d_sb; 345 struct ovl_fs *ofs = sb->s_fs_info; 346 347 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 348 if (ofs->config.upperdir) { 349 seq_show_option(m, "upperdir", ofs->config.upperdir); 350 seq_show_option(m, "workdir", ofs->config.workdir); 351 } 352 if (ofs->config.default_permissions) 353 seq_puts(m, ",default_permissions"); 354 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 355 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 356 if (ofs->config.index != ovl_index_def) 357 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 358 if (ofs->config.nfs_export != ovl_nfs_export_def) 359 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 360 "on" : "off"); 361 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) 362 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 363 if (ofs->config.metacopy != ovl_metacopy_def) 364 seq_printf(m, ",metacopy=%s", 365 ofs->config.metacopy ? "on" : "off"); 366 return 0; 367 } 368 369 static int ovl_remount(struct super_block *sb, int *flags, char *data) 370 { 371 struct ovl_fs *ofs = sb->s_fs_info; 372 373 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 374 return -EROFS; 375 376 return 0; 377 } 378 379 static const struct super_operations ovl_super_operations = { 380 .alloc_inode = ovl_alloc_inode, 381 .free_inode = ovl_free_inode, 382 .destroy_inode = ovl_destroy_inode, 383 .drop_inode = generic_delete_inode, 384 .put_super = ovl_put_super, 385 .sync_fs = ovl_sync_fs, 386 .statfs = ovl_statfs, 387 .show_options = ovl_show_options, 388 .remount_fs = ovl_remount, 389 }; 390 391 enum { 392 OPT_LOWERDIR, 393 OPT_UPPERDIR, 394 OPT_WORKDIR, 395 OPT_DEFAULT_PERMISSIONS, 396 OPT_REDIRECT_DIR, 397 OPT_INDEX_ON, 398 OPT_INDEX_OFF, 399 OPT_NFS_EXPORT_ON, 400 OPT_NFS_EXPORT_OFF, 401 OPT_XINO_ON, 402 OPT_XINO_OFF, 403 OPT_XINO_AUTO, 404 OPT_METACOPY_ON, 405 OPT_METACOPY_OFF, 406 OPT_ERR, 407 }; 408 409 static const match_table_t ovl_tokens = { 410 {OPT_LOWERDIR, "lowerdir=%s"}, 411 {OPT_UPPERDIR, "upperdir=%s"}, 412 {OPT_WORKDIR, "workdir=%s"}, 413 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 414 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 415 {OPT_INDEX_ON, "index=on"}, 416 {OPT_INDEX_OFF, "index=off"}, 417 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 418 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 419 {OPT_XINO_ON, "xino=on"}, 420 {OPT_XINO_OFF, "xino=off"}, 421 {OPT_XINO_AUTO, "xino=auto"}, 422 {OPT_METACOPY_ON, "metacopy=on"}, 423 {OPT_METACOPY_OFF, "metacopy=off"}, 424 {OPT_ERR, NULL} 425 }; 426 427 static char *ovl_next_opt(char **s) 428 { 429 char *sbegin = *s; 430 char *p; 431 432 if (sbegin == NULL) 433 return NULL; 434 435 for (p = sbegin; *p; p++) { 436 if (*p == '\\') { 437 p++; 438 if (!*p) 439 break; 440 } else if (*p == ',') { 441 *p = '\0'; 442 *s = p + 1; 443 return sbegin; 444 } 445 } 446 *s = NULL; 447 return sbegin; 448 } 449 450 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 451 { 452 if (strcmp(mode, "on") == 0) { 453 config->redirect_dir = true; 454 /* 455 * Does not make sense to have redirect creation without 456 * redirect following. 457 */ 458 config->redirect_follow = true; 459 } else if (strcmp(mode, "follow") == 0) { 460 config->redirect_follow = true; 461 } else if (strcmp(mode, "off") == 0) { 462 if (ovl_redirect_always_follow) 463 config->redirect_follow = true; 464 } else if (strcmp(mode, "nofollow") != 0) { 465 pr_err("bad mount option \"redirect_dir=%s\"\n", 466 mode); 467 return -EINVAL; 468 } 469 470 return 0; 471 } 472 473 static int ovl_parse_opt(char *opt, struct ovl_config *config) 474 { 475 char *p; 476 int err; 477 bool metacopy_opt = false, redirect_opt = false; 478 479 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 480 if (!config->redirect_mode) 481 return -ENOMEM; 482 483 while ((p = ovl_next_opt(&opt)) != NULL) { 484 int token; 485 substring_t args[MAX_OPT_ARGS]; 486 487 if (!*p) 488 continue; 489 490 token = match_token(p, ovl_tokens, args); 491 switch (token) { 492 case OPT_UPPERDIR: 493 kfree(config->upperdir); 494 config->upperdir = match_strdup(&args[0]); 495 if (!config->upperdir) 496 return -ENOMEM; 497 break; 498 499 case OPT_LOWERDIR: 500 kfree(config->lowerdir); 501 config->lowerdir = match_strdup(&args[0]); 502 if (!config->lowerdir) 503 return -ENOMEM; 504 break; 505 506 case OPT_WORKDIR: 507 kfree(config->workdir); 508 config->workdir = match_strdup(&args[0]); 509 if (!config->workdir) 510 return -ENOMEM; 511 break; 512 513 case OPT_DEFAULT_PERMISSIONS: 514 config->default_permissions = true; 515 break; 516 517 case OPT_REDIRECT_DIR: 518 kfree(config->redirect_mode); 519 config->redirect_mode = match_strdup(&args[0]); 520 if (!config->redirect_mode) 521 return -ENOMEM; 522 redirect_opt = true; 523 break; 524 525 case OPT_INDEX_ON: 526 config->index = true; 527 break; 528 529 case OPT_INDEX_OFF: 530 config->index = false; 531 break; 532 533 case OPT_NFS_EXPORT_ON: 534 config->nfs_export = true; 535 break; 536 537 case OPT_NFS_EXPORT_OFF: 538 config->nfs_export = false; 539 break; 540 541 case OPT_XINO_ON: 542 config->xino = OVL_XINO_ON; 543 break; 544 545 case OPT_XINO_OFF: 546 config->xino = OVL_XINO_OFF; 547 break; 548 549 case OPT_XINO_AUTO: 550 config->xino = OVL_XINO_AUTO; 551 break; 552 553 case OPT_METACOPY_ON: 554 config->metacopy = true; 555 metacopy_opt = true; 556 break; 557 558 case OPT_METACOPY_OFF: 559 config->metacopy = false; 560 break; 561 562 default: 563 pr_err("unrecognized mount option \"%s\" or missing value\n", 564 p); 565 return -EINVAL; 566 } 567 } 568 569 /* Workdir is useless in non-upper mount */ 570 if (!config->upperdir && config->workdir) { 571 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 572 config->workdir); 573 kfree(config->workdir); 574 config->workdir = NULL; 575 } 576 577 err = ovl_parse_redirect_mode(config, config->redirect_mode); 578 if (err) 579 return err; 580 581 /* 582 * This is to make the logic below simpler. It doesn't make any other 583 * difference, since config->redirect_dir is only used for upper. 584 */ 585 if (!config->upperdir && config->redirect_follow) 586 config->redirect_dir = true; 587 588 /* Resolve metacopy -> redirect_dir dependency */ 589 if (config->metacopy && !config->redirect_dir) { 590 if (metacopy_opt && redirect_opt) { 591 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", 592 config->redirect_mode); 593 return -EINVAL; 594 } 595 if (redirect_opt) { 596 /* 597 * There was an explicit redirect_dir=... that resulted 598 * in this conflict. 599 */ 600 pr_info("disabling metacopy due to redirect_dir=%s\n", 601 config->redirect_mode); 602 config->metacopy = false; 603 } else { 604 /* Automatically enable redirect otherwise. */ 605 config->redirect_follow = config->redirect_dir = true; 606 } 607 } 608 609 return 0; 610 } 611 612 #define OVL_WORKDIR_NAME "work" 613 #define OVL_INDEXDIR_NAME "index" 614 615 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 616 const char *name, bool persist) 617 { 618 struct inode *dir = ofs->workbasedir->d_inode; 619 struct vfsmount *mnt = ofs->upper_mnt; 620 struct dentry *work; 621 int err; 622 bool retried = false; 623 bool locked = false; 624 625 inode_lock_nested(dir, I_MUTEX_PARENT); 626 locked = true; 627 628 retry: 629 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 630 631 if (!IS_ERR(work)) { 632 struct iattr attr = { 633 .ia_valid = ATTR_MODE, 634 .ia_mode = S_IFDIR | 0, 635 }; 636 637 if (work->d_inode) { 638 err = -EEXIST; 639 if (retried) 640 goto out_dput; 641 642 if (persist) 643 goto out_unlock; 644 645 retried = true; 646 ovl_workdir_cleanup(dir, mnt, work, 0); 647 dput(work); 648 goto retry; 649 } 650 651 work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); 652 err = PTR_ERR(work); 653 if (IS_ERR(work)) 654 goto out_err; 655 656 /* 657 * Try to remove POSIX ACL xattrs from workdir. We are good if: 658 * 659 * a) success (there was a POSIX ACL xattr and was removed) 660 * b) -ENODATA (there was no POSIX ACL xattr) 661 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 662 * 663 * There are various other error values that could effectively 664 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 665 * if the xattr name is too long), but the set of filesystems 666 * allowed as upper are limited to "normal" ones, where checking 667 * for the above two errors is sufficient. 668 */ 669 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 670 if (err && err != -ENODATA && err != -EOPNOTSUPP) 671 goto out_dput; 672 673 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 674 if (err && err != -ENODATA && err != -EOPNOTSUPP) 675 goto out_dput; 676 677 /* Clear any inherited mode bits */ 678 inode_lock(work->d_inode); 679 err = notify_change(work, &attr, NULL); 680 inode_unlock(work->d_inode); 681 if (err) 682 goto out_dput; 683 } else { 684 err = PTR_ERR(work); 685 goto out_err; 686 } 687 out_unlock: 688 if (locked) 689 inode_unlock(dir); 690 691 return work; 692 693 out_dput: 694 dput(work); 695 out_err: 696 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 697 ofs->config.workdir, name, -err); 698 work = NULL; 699 goto out_unlock; 700 } 701 702 static void ovl_unescape(char *s) 703 { 704 char *d = s; 705 706 for (;; s++, d++) { 707 if (*s == '\\') 708 s++; 709 *d = *s; 710 if (!*s) 711 break; 712 } 713 } 714 715 static int ovl_mount_dir_noesc(const char *name, struct path *path) 716 { 717 int err = -EINVAL; 718 719 if (!*name) { 720 pr_err("empty lowerdir\n"); 721 goto out; 722 } 723 err = kern_path(name, LOOKUP_FOLLOW, path); 724 if (err) { 725 pr_err("failed to resolve '%s': %i\n", name, err); 726 goto out; 727 } 728 err = -EINVAL; 729 if (ovl_dentry_weird(path->dentry)) { 730 pr_err("filesystem on '%s' not supported\n", name); 731 goto out_put; 732 } 733 if (!d_is_dir(path->dentry)) { 734 pr_err("'%s' not a directory\n", name); 735 goto out_put; 736 } 737 return 0; 738 739 out_put: 740 path_put_init(path); 741 out: 742 return err; 743 } 744 745 static int ovl_mount_dir(const char *name, struct path *path) 746 { 747 int err = -ENOMEM; 748 char *tmp = kstrdup(name, GFP_KERNEL); 749 750 if (tmp) { 751 ovl_unescape(tmp); 752 err = ovl_mount_dir_noesc(tmp, path); 753 754 if (!err) 755 if (ovl_dentry_remote(path->dentry)) { 756 pr_err("filesystem on '%s' not supported as upperdir\n", 757 tmp); 758 path_put_init(path); 759 err = -EINVAL; 760 } 761 kfree(tmp); 762 } 763 return err; 764 } 765 766 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 767 const char *name) 768 { 769 struct kstatfs statfs; 770 int err = vfs_statfs(path, &statfs); 771 772 if (err) 773 pr_err("statfs failed on '%s'\n", name); 774 else 775 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 776 777 return err; 778 } 779 780 static int ovl_lower_dir(const char *name, struct path *path, 781 struct ovl_fs *ofs, int *stack_depth, bool *remote) 782 { 783 int fh_type; 784 int err; 785 786 err = ovl_mount_dir_noesc(name, path); 787 if (err) 788 goto out; 789 790 err = ovl_check_namelen(path, ofs, name); 791 if (err) 792 goto out_put; 793 794 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 795 796 if (ovl_dentry_remote(path->dentry)) 797 *remote = true; 798 799 /* 800 * The inodes index feature and NFS export need to encode and decode 801 * file handles, so they require that all layers support them. 802 */ 803 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 804 if ((ofs->config.nfs_export || 805 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 806 ofs->config.index = false; 807 ofs->config.nfs_export = false; 808 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 809 name); 810 } 811 812 /* Check if lower fs has 32bit inode numbers */ 813 if (fh_type != FILEID_INO32_GEN) 814 ofs->xino_mode = -1; 815 816 return 0; 817 818 out_put: 819 path_put_init(path); 820 out: 821 return err; 822 } 823 824 /* Workdir should not be subdir of upperdir and vice versa */ 825 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 826 { 827 bool ok = false; 828 829 if (workdir != upperdir) { 830 ok = (lock_rename(workdir, upperdir) == NULL); 831 unlock_rename(workdir, upperdir); 832 } 833 return ok; 834 } 835 836 static unsigned int ovl_split_lowerdirs(char *str) 837 { 838 unsigned int ctr = 1; 839 char *s, *d; 840 841 for (s = d = str;; s++, d++) { 842 if (*s == '\\') { 843 s++; 844 } else if (*s == ':') { 845 *d = '\0'; 846 ctr++; 847 continue; 848 } 849 *d = *s; 850 if (!*s) 851 break; 852 } 853 return ctr; 854 } 855 856 static int __maybe_unused 857 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 858 struct dentry *dentry, struct inode *inode, 859 const char *name, void *buffer, size_t size) 860 { 861 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 862 } 863 864 static int __maybe_unused 865 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 866 struct dentry *dentry, struct inode *inode, 867 const char *name, const void *value, 868 size_t size, int flags) 869 { 870 struct dentry *workdir = ovl_workdir(dentry); 871 struct inode *realinode = ovl_inode_real(inode); 872 struct posix_acl *acl = NULL; 873 int err; 874 875 /* Check that everything is OK before copy-up */ 876 if (value) { 877 acl = posix_acl_from_xattr(&init_user_ns, value, size); 878 if (IS_ERR(acl)) 879 return PTR_ERR(acl); 880 } 881 err = -EOPNOTSUPP; 882 if (!IS_POSIXACL(d_inode(workdir))) 883 goto out_acl_release; 884 if (!realinode->i_op->set_acl) 885 goto out_acl_release; 886 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 887 err = acl ? -EACCES : 0; 888 goto out_acl_release; 889 } 890 err = -EPERM; 891 if (!inode_owner_or_capable(inode)) 892 goto out_acl_release; 893 894 posix_acl_release(acl); 895 896 /* 897 * Check if sgid bit needs to be cleared (actual setacl operation will 898 * be done with mounter's capabilities and so that won't do it for us). 899 */ 900 if (unlikely(inode->i_mode & S_ISGID) && 901 handler->flags == ACL_TYPE_ACCESS && 902 !in_group_p(inode->i_gid) && 903 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 904 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 905 906 err = ovl_setattr(dentry, &iattr); 907 if (err) 908 return err; 909 } 910 911 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 912 if (!err) 913 ovl_copyattr(ovl_inode_real(inode), inode); 914 915 return err; 916 917 out_acl_release: 918 posix_acl_release(acl); 919 return err; 920 } 921 922 static int ovl_own_xattr_get(const struct xattr_handler *handler, 923 struct dentry *dentry, struct inode *inode, 924 const char *name, void *buffer, size_t size) 925 { 926 return -EOPNOTSUPP; 927 } 928 929 static int ovl_own_xattr_set(const struct xattr_handler *handler, 930 struct dentry *dentry, struct inode *inode, 931 const char *name, const void *value, 932 size_t size, int flags) 933 { 934 return -EOPNOTSUPP; 935 } 936 937 static int ovl_other_xattr_get(const struct xattr_handler *handler, 938 struct dentry *dentry, struct inode *inode, 939 const char *name, void *buffer, size_t size) 940 { 941 return ovl_xattr_get(dentry, inode, name, buffer, size); 942 } 943 944 static int ovl_other_xattr_set(const struct xattr_handler *handler, 945 struct dentry *dentry, struct inode *inode, 946 const char *name, const void *value, 947 size_t size, int flags) 948 { 949 return ovl_xattr_set(dentry, inode, name, value, size, flags); 950 } 951 952 static const struct xattr_handler __maybe_unused 953 ovl_posix_acl_access_xattr_handler = { 954 .name = XATTR_NAME_POSIX_ACL_ACCESS, 955 .flags = ACL_TYPE_ACCESS, 956 .get = ovl_posix_acl_xattr_get, 957 .set = ovl_posix_acl_xattr_set, 958 }; 959 960 static const struct xattr_handler __maybe_unused 961 ovl_posix_acl_default_xattr_handler = { 962 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 963 .flags = ACL_TYPE_DEFAULT, 964 .get = ovl_posix_acl_xattr_get, 965 .set = ovl_posix_acl_xattr_set, 966 }; 967 968 static const struct xattr_handler ovl_own_xattr_handler = { 969 .prefix = OVL_XATTR_PREFIX, 970 .get = ovl_own_xattr_get, 971 .set = ovl_own_xattr_set, 972 }; 973 974 static const struct xattr_handler ovl_other_xattr_handler = { 975 .prefix = "", /* catch all */ 976 .get = ovl_other_xattr_get, 977 .set = ovl_other_xattr_set, 978 }; 979 980 static const struct xattr_handler *ovl_xattr_handlers[] = { 981 #ifdef CONFIG_FS_POSIX_ACL 982 &ovl_posix_acl_access_xattr_handler, 983 &ovl_posix_acl_default_xattr_handler, 984 #endif 985 &ovl_own_xattr_handler, 986 &ovl_other_xattr_handler, 987 NULL 988 }; 989 990 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 991 struct inode **ptrap, const char *name) 992 { 993 struct inode *trap; 994 int err; 995 996 trap = ovl_get_trap_inode(sb, dir); 997 err = PTR_ERR_OR_ZERO(trap); 998 if (err) { 999 if (err == -ELOOP) 1000 pr_err("conflicting %s path\n", name); 1001 return err; 1002 } 1003 1004 *ptrap = trap; 1005 return 0; 1006 } 1007 1008 /* 1009 * Determine how we treat concurrent use of upperdir/workdir based on the 1010 * index feature. This is papering over mount leaks of container runtimes, 1011 * for example, an old overlay mount is leaked and now its upperdir is 1012 * attempted to be used as a lower layer in a new overlay mount. 1013 */ 1014 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 1015 { 1016 if (ofs->config.index) { 1017 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 1018 name); 1019 return -EBUSY; 1020 } else { 1021 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 1022 name); 1023 return 0; 1024 } 1025 } 1026 1027 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 1028 struct path *upperpath) 1029 { 1030 struct vfsmount *upper_mnt; 1031 int err; 1032 1033 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 1034 if (err) 1035 goto out; 1036 1037 /* Upper fs should not be r/o */ 1038 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 1039 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 1040 err = -EINVAL; 1041 goto out; 1042 } 1043 1044 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 1045 if (err) 1046 goto out; 1047 1048 err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap, 1049 "upperdir"); 1050 if (err) 1051 goto out; 1052 1053 upper_mnt = clone_private_mount(upperpath); 1054 err = PTR_ERR(upper_mnt); 1055 if (IS_ERR(upper_mnt)) { 1056 pr_err("failed to clone upperpath\n"); 1057 goto out; 1058 } 1059 1060 /* Don't inherit atime flags */ 1061 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1062 ofs->upper_mnt = upper_mnt; 1063 1064 if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) { 1065 ofs->upperdir_locked = true; 1066 } else { 1067 err = ovl_report_in_use(ofs, "upperdir"); 1068 if (err) 1069 goto out; 1070 } 1071 1072 err = 0; 1073 out: 1074 return err; 1075 } 1076 1077 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1078 struct path *workpath) 1079 { 1080 struct vfsmount *mnt = ofs->upper_mnt; 1081 struct dentry *temp; 1082 int fh_type; 1083 int err; 1084 1085 err = mnt_want_write(mnt); 1086 if (err) 1087 return err; 1088 1089 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1090 if (!ofs->workdir) 1091 goto out; 1092 1093 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1094 if (err) 1095 goto out; 1096 1097 /* 1098 * Upper should support d_type, else whiteouts are visible. Given 1099 * workdir and upper are on same fs, we can do iterate_dir() on 1100 * workdir. This check requires successful creation of workdir in 1101 * previous step. 1102 */ 1103 err = ovl_check_d_type_supported(workpath); 1104 if (err < 0) 1105 goto out; 1106 1107 /* 1108 * We allowed this configuration and don't want to break users over 1109 * kernel upgrade. So warn instead of erroring out. 1110 */ 1111 if (!err) 1112 pr_warn("upper fs needs to support d_type.\n"); 1113 1114 /* Check if upper/work fs supports O_TMPFILE */ 1115 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 1116 ofs->tmpfile = !IS_ERR(temp); 1117 if (ofs->tmpfile) 1118 dput(temp); 1119 else 1120 pr_warn("upper fs does not support tmpfile.\n"); 1121 1122 /* 1123 * Check if upper/work fs supports trusted.overlay.* xattr 1124 */ 1125 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 1126 if (err) { 1127 ofs->noxattr = true; 1128 ofs->config.index = false; 1129 ofs->config.metacopy = false; 1130 pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); 1131 err = 0; 1132 } else { 1133 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1134 } 1135 1136 /* Check if upper/work fs supports file handles */ 1137 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1138 if (ofs->config.index && !fh_type) { 1139 ofs->config.index = false; 1140 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 1141 } 1142 1143 /* Check if upper fs has 32bit inode numbers */ 1144 if (fh_type != FILEID_INO32_GEN) 1145 ofs->xino_mode = -1; 1146 1147 /* NFS export of r/w mount depends on index */ 1148 if (ofs->config.nfs_export && !ofs->config.index) { 1149 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1150 ofs->config.nfs_export = false; 1151 } 1152 out: 1153 mnt_drop_write(mnt); 1154 return err; 1155 } 1156 1157 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1158 struct path *upperpath) 1159 { 1160 int err; 1161 struct path workpath = { }; 1162 1163 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1164 if (err) 1165 goto out; 1166 1167 err = -EINVAL; 1168 if (upperpath->mnt != workpath.mnt) { 1169 pr_err("workdir and upperdir must reside under the same mount\n"); 1170 goto out; 1171 } 1172 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1173 pr_err("workdir and upperdir must be separate subtrees\n"); 1174 goto out; 1175 } 1176 1177 ofs->workbasedir = dget(workpath.dentry); 1178 1179 if (ovl_inuse_trylock(ofs->workbasedir)) { 1180 ofs->workdir_locked = true; 1181 } else { 1182 err = ovl_report_in_use(ofs, "workdir"); 1183 if (err) 1184 goto out; 1185 } 1186 1187 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1188 "workdir"); 1189 if (err) 1190 goto out; 1191 1192 err = ovl_make_workdir(sb, ofs, &workpath); 1193 1194 out: 1195 path_put(&workpath); 1196 1197 return err; 1198 } 1199 1200 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1201 struct ovl_entry *oe, struct path *upperpath) 1202 { 1203 struct vfsmount *mnt = ofs->upper_mnt; 1204 int err; 1205 1206 err = mnt_want_write(mnt); 1207 if (err) 1208 return err; 1209 1210 /* Verify lower root is upper root origin */ 1211 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1212 true); 1213 if (err) { 1214 pr_err("failed to verify upper root origin\n"); 1215 goto out; 1216 } 1217 1218 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1219 if (ofs->indexdir) { 1220 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1221 "indexdir"); 1222 if (err) 1223 goto out; 1224 1225 /* 1226 * Verify upper root is exclusively associated with index dir. 1227 * Older kernels stored upper fh in "trusted.overlay.origin" 1228 * xattr. If that xattr exists, verify that it is a match to 1229 * upper dir file handle. In any case, verify or set xattr 1230 * "trusted.overlay.upper" to indicate that index may have 1231 * directory entries. 1232 */ 1233 if (ovl_check_origin_xattr(ofs->indexdir)) { 1234 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, 1235 upperpath->dentry, true, false); 1236 if (err) 1237 pr_err("failed to verify index dir 'origin' xattr\n"); 1238 } 1239 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); 1240 if (err) 1241 pr_err("failed to verify index dir 'upper' xattr\n"); 1242 1243 /* Cleanup bad/stale/orphan index entries */ 1244 if (!err) 1245 err = ovl_indexdir_cleanup(ofs); 1246 } 1247 if (err || !ofs->indexdir) 1248 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1249 1250 out: 1251 mnt_drop_write(mnt); 1252 return err; 1253 } 1254 1255 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1256 { 1257 unsigned int i; 1258 1259 if (!ofs->config.nfs_export && !ofs->upper_mnt) 1260 return true; 1261 1262 for (i = 0; i < ofs->numfs; i++) { 1263 /* 1264 * We use uuid to associate an overlay lower file handle with a 1265 * lower layer, so we can accept lower fs with null uuid as long 1266 * as all lower layers with null uuid are on the same fs. 1267 * if we detect multiple lower fs with the same uuid, we 1268 * disable lower file handle decoding on all of them. 1269 */ 1270 if (ofs->fs[i].is_lower && 1271 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 1272 ofs->fs[i].bad_uuid = true; 1273 return false; 1274 } 1275 } 1276 return true; 1277 } 1278 1279 /* Get a unique fsid for the layer */ 1280 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1281 { 1282 struct super_block *sb = path->mnt->mnt_sb; 1283 unsigned int i; 1284 dev_t dev; 1285 int err; 1286 bool bad_uuid = false; 1287 1288 for (i = 0; i < ofs->numfs; i++) { 1289 if (ofs->fs[i].sb == sb) 1290 return i; 1291 } 1292 1293 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1294 bad_uuid = true; 1295 if (ofs->config.index || ofs->config.nfs_export) { 1296 ofs->config.index = false; 1297 ofs->config.nfs_export = false; 1298 pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", 1299 uuid_is_null(&sb->s_uuid) ? "null" : 1300 "conflicting", 1301 path->dentry); 1302 } 1303 } 1304 1305 err = get_anon_bdev(&dev); 1306 if (err) { 1307 pr_err("failed to get anonymous bdev for lowerpath\n"); 1308 return err; 1309 } 1310 1311 ofs->fs[ofs->numfs].sb = sb; 1312 ofs->fs[ofs->numfs].pseudo_dev = dev; 1313 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1314 1315 return ofs->numfs++; 1316 } 1317 1318 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1319 struct path *stack, unsigned int numlower) 1320 { 1321 int err; 1322 unsigned int i; 1323 struct ovl_layer *layers; 1324 1325 err = -ENOMEM; 1326 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); 1327 if (!layers) 1328 goto out; 1329 ofs->layers = layers; 1330 1331 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); 1332 if (ofs->fs == NULL) 1333 goto out; 1334 1335 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ 1336 ofs->numfs++; 1337 1338 layers[0].mnt = ofs->upper_mnt; 1339 layers[0].idx = 0; 1340 layers[0].fsid = 0; 1341 ofs->numlayer = 1; 1342 1343 /* 1344 * All lower layers that share the same fs as upper layer, use the same 1345 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1346 * only overlay to simplify ovl_fs_free(). 1347 * is_lower will be set if upper fs is shared with a lower layer. 1348 */ 1349 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1350 if (err) { 1351 pr_err("failed to get anonymous bdev for upper fs\n"); 1352 goto out; 1353 } 1354 1355 if (ofs->upper_mnt) { 1356 ofs->fs[0].sb = ofs->upper_mnt->mnt_sb; 1357 ofs->fs[0].is_lower = false; 1358 } 1359 1360 for (i = 0; i < numlower; i++) { 1361 struct vfsmount *mnt; 1362 struct inode *trap; 1363 int fsid; 1364 1365 err = fsid = ovl_get_fsid(ofs, &stack[i]); 1366 if (err < 0) 1367 goto out; 1368 1369 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); 1370 if (err) 1371 goto out; 1372 1373 if (ovl_is_inuse(stack[i].dentry)) { 1374 err = ovl_report_in_use(ofs, "lowerdir"); 1375 if (err) 1376 goto out; 1377 } 1378 1379 mnt = clone_private_mount(&stack[i]); 1380 err = PTR_ERR(mnt); 1381 if (IS_ERR(mnt)) { 1382 pr_err("failed to clone lowerpath\n"); 1383 iput(trap); 1384 goto out; 1385 } 1386 1387 /* 1388 * Make lower layers R/O. That way fchmod/fchown on lower file 1389 * will fail instead of modifying lower fs. 1390 */ 1391 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1392 1393 layers[ofs->numlayer].trap = trap; 1394 layers[ofs->numlayer].mnt = mnt; 1395 layers[ofs->numlayer].idx = ofs->numlayer; 1396 layers[ofs->numlayer].fsid = fsid; 1397 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1398 ofs->numlayer++; 1399 ofs->fs[fsid].is_lower = true; 1400 } 1401 1402 /* 1403 * When all layers on same fs, overlay can use real inode numbers. 1404 * With mount option "xino=on", mounter declares that there are enough 1405 * free high bits in underlying fs to hold the unique fsid. 1406 * If overlayfs does encounter underlying inodes using the high xino 1407 * bits reserved for fsid, it emits a warning and uses the original 1408 * inode number. 1409 */ 1410 if (ofs->numfs - !ofs->upper_mnt == 1) { 1411 if (ofs->config.xino == OVL_XINO_ON) 1412 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1413 ofs->xino_mode = 0; 1414 } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { 1415 /* 1416 * This is a roundup of number of bits needed for encoding 1417 * fsid, where fsid 0 is reserved for upper fs even with 1418 * lower only overlay. 1419 */ 1420 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31); 1421 ofs->xino_mode = ilog2(ofs->numfs - 1) + 1; 1422 } 1423 1424 if (ofs->xino_mode > 0) { 1425 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1426 ofs->xino_mode); 1427 } 1428 1429 err = 0; 1430 out: 1431 return err; 1432 } 1433 1434 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1435 struct ovl_fs *ofs) 1436 { 1437 int err; 1438 char *lowertmp, *lower; 1439 struct path *stack = NULL; 1440 unsigned int stacklen, numlower = 0, i; 1441 bool remote = false; 1442 struct ovl_entry *oe; 1443 1444 err = -ENOMEM; 1445 lowertmp = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1446 if (!lowertmp) 1447 goto out_err; 1448 1449 err = -EINVAL; 1450 stacklen = ovl_split_lowerdirs(lowertmp); 1451 if (stacklen > OVL_MAX_STACK) { 1452 pr_err("too many lower directories, limit is %d\n", 1453 OVL_MAX_STACK); 1454 goto out_err; 1455 } else if (!ofs->config.upperdir && stacklen == 1) { 1456 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1457 goto out_err; 1458 } else if (!ofs->config.upperdir && ofs->config.nfs_export && 1459 ofs->config.redirect_follow) { 1460 pr_warn("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 1461 ofs->config.nfs_export = false; 1462 } 1463 1464 err = -ENOMEM; 1465 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1466 if (!stack) 1467 goto out_err; 1468 1469 err = -EINVAL; 1470 lower = lowertmp; 1471 for (numlower = 0; numlower < stacklen; numlower++) { 1472 err = ovl_lower_dir(lower, &stack[numlower], ofs, 1473 &sb->s_stack_depth, &remote); 1474 if (err) 1475 goto out_err; 1476 1477 lower = strchr(lower, '\0') + 1; 1478 } 1479 1480 err = -EINVAL; 1481 sb->s_stack_depth++; 1482 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1483 pr_err("maximum fs stacking depth exceeded\n"); 1484 goto out_err; 1485 } 1486 1487 err = ovl_get_layers(sb, ofs, stack, numlower); 1488 if (err) 1489 goto out_err; 1490 1491 err = -ENOMEM; 1492 oe = ovl_alloc_entry(numlower); 1493 if (!oe) 1494 goto out_err; 1495 1496 for (i = 0; i < numlower; i++) { 1497 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1498 oe->lowerstack[i].layer = &ofs->layers[i+1]; 1499 } 1500 1501 if (remote) 1502 sb->s_d_op = &ovl_reval_dentry_operations; 1503 else 1504 sb->s_d_op = &ovl_dentry_operations; 1505 1506 out: 1507 for (i = 0; i < numlower; i++) 1508 path_put(&stack[i]); 1509 kfree(stack); 1510 kfree(lowertmp); 1511 1512 return oe; 1513 1514 out_err: 1515 oe = ERR_PTR(err); 1516 goto out; 1517 } 1518 1519 /* 1520 * Check if this layer root is a descendant of: 1521 * - another layer of this overlayfs instance 1522 * - upper/work dir of any overlayfs instance 1523 */ 1524 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1525 struct dentry *dentry, const char *name) 1526 { 1527 struct dentry *next = dentry, *parent; 1528 int err = 0; 1529 1530 if (!dentry) 1531 return 0; 1532 1533 parent = dget_parent(next); 1534 1535 /* Walk back ancestors to root (inclusive) looking for traps */ 1536 while (!err && parent != next) { 1537 if (ovl_lookup_trap_inode(sb, parent)) { 1538 err = -ELOOP; 1539 pr_err("overlapping %s path\n", name); 1540 } else if (ovl_is_inuse(parent)) { 1541 err = ovl_report_in_use(ofs, name); 1542 } 1543 next = parent; 1544 parent = dget_parent(next); 1545 dput(next); 1546 } 1547 1548 dput(parent); 1549 1550 return err; 1551 } 1552 1553 /* 1554 * Check if any of the layers or work dirs overlap. 1555 */ 1556 static int ovl_check_overlapping_layers(struct super_block *sb, 1557 struct ovl_fs *ofs) 1558 { 1559 int i, err; 1560 1561 if (ofs->upper_mnt) { 1562 err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root, 1563 "upperdir"); 1564 if (err) 1565 return err; 1566 1567 /* 1568 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1569 * this instance and covers overlapping work and index dirs, 1570 * unless work or index dir have been moved since created inside 1571 * workbasedir. In that case, we already have their traps in 1572 * inode cache and we will catch that case on lookup. 1573 */ 1574 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); 1575 if (err) 1576 return err; 1577 } 1578 1579 for (i = 1; i < ofs->numlayer; i++) { 1580 err = ovl_check_layer(sb, ofs, 1581 ofs->layers[i].mnt->mnt_root, 1582 "lowerdir"); 1583 if (err) 1584 return err; 1585 } 1586 1587 return 0; 1588 } 1589 1590 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1591 { 1592 struct path upperpath = { }; 1593 struct dentry *root_dentry; 1594 struct ovl_entry *oe; 1595 struct ovl_fs *ofs; 1596 struct cred *cred; 1597 int err; 1598 1599 err = -ENOMEM; 1600 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1601 if (!ofs) 1602 goto out; 1603 1604 ofs->creator_cred = cred = prepare_creds(); 1605 if (!cred) 1606 goto out_err; 1607 1608 ofs->config.index = ovl_index_def; 1609 ofs->config.nfs_export = ovl_nfs_export_def; 1610 ofs->config.xino = ovl_xino_def(); 1611 ofs->config.metacopy = ovl_metacopy_def; 1612 err = ovl_parse_opt((char *) data, &ofs->config); 1613 if (err) 1614 goto out_err; 1615 1616 err = -EINVAL; 1617 if (!ofs->config.lowerdir) { 1618 if (!silent) 1619 pr_err("missing 'lowerdir'\n"); 1620 goto out_err; 1621 } 1622 1623 sb->s_stack_depth = 0; 1624 sb->s_maxbytes = MAX_LFS_FILESIZE; 1625 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ 1626 if (ofs->config.xino != OVL_XINO_OFF) 1627 ofs->xino_mode = BITS_PER_LONG - 32; 1628 1629 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1630 sb->s_op = &ovl_super_operations; 1631 1632 if (ofs->config.upperdir) { 1633 if (!ofs->config.workdir) { 1634 pr_err("missing 'workdir'\n"); 1635 goto out_err; 1636 } 1637 1638 err = ovl_get_upper(sb, ofs, &upperpath); 1639 if (err) 1640 goto out_err; 1641 1642 err = ovl_get_workdir(sb, ofs, &upperpath); 1643 if (err) 1644 goto out_err; 1645 1646 if (!ofs->workdir) 1647 sb->s_flags |= SB_RDONLY; 1648 1649 sb->s_stack_depth = ofs->upper_mnt->mnt_sb->s_stack_depth; 1650 sb->s_time_gran = ofs->upper_mnt->mnt_sb->s_time_gran; 1651 1652 } 1653 oe = ovl_get_lowerstack(sb, ofs); 1654 err = PTR_ERR(oe); 1655 if (IS_ERR(oe)) 1656 goto out_err; 1657 1658 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1659 if (!ofs->upper_mnt) 1660 sb->s_flags |= SB_RDONLY; 1661 1662 if (!(ovl_force_readonly(ofs)) && ofs->config.index) { 1663 err = ovl_get_indexdir(sb, ofs, oe, &upperpath); 1664 if (err) 1665 goto out_free_oe; 1666 1667 /* Force r/o mount with no index dir */ 1668 if (!ofs->indexdir) { 1669 dput(ofs->workdir); 1670 ofs->workdir = NULL; 1671 sb->s_flags |= SB_RDONLY; 1672 } 1673 1674 } 1675 1676 err = ovl_check_overlapping_layers(sb, ofs); 1677 if (err) 1678 goto out_free_oe; 1679 1680 /* Show index=off in /proc/mounts for forced r/o mount */ 1681 if (!ofs->indexdir) { 1682 ofs->config.index = false; 1683 if (ofs->upper_mnt && ofs->config.nfs_export) { 1684 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 1685 ofs->config.nfs_export = false; 1686 } 1687 } 1688 1689 if (ofs->config.metacopy && ofs->config.nfs_export) { 1690 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1691 ofs->config.nfs_export = false; 1692 } 1693 1694 if (ofs->config.nfs_export) 1695 sb->s_export_op = &ovl_export_operations; 1696 1697 /* Never override disk quota limits or use reserved space */ 1698 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1699 1700 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1701 sb->s_xattr = ovl_xattr_handlers; 1702 sb->s_fs_info = ofs; 1703 sb->s_flags |= SB_POSIXACL; 1704 1705 err = -ENOMEM; 1706 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1707 if (!root_dentry) 1708 goto out_free_oe; 1709 1710 root_dentry->d_fsdata = oe; 1711 1712 mntput(upperpath.mnt); 1713 if (upperpath.dentry) { 1714 ovl_dentry_set_upper_alias(root_dentry); 1715 if (ovl_is_impuredir(upperpath.dentry)) 1716 ovl_set_flag(OVL_IMPURE, d_inode(root_dentry)); 1717 } 1718 1719 /* Root is always merge -> can have whiteouts */ 1720 ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry)); 1721 ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry); 1722 ovl_set_upperdata(d_inode(root_dentry)); 1723 ovl_inode_init(d_inode(root_dentry), upperpath.dentry, 1724 ovl_dentry_lower(root_dentry), NULL); 1725 1726 sb->s_root = root_dentry; 1727 1728 return 0; 1729 1730 out_free_oe: 1731 ovl_entry_stack_free(oe); 1732 kfree(oe); 1733 out_err: 1734 path_put(&upperpath); 1735 ovl_free_fs(ofs); 1736 out: 1737 return err; 1738 } 1739 1740 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1741 const char *dev_name, void *raw_data) 1742 { 1743 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1744 } 1745 1746 static struct file_system_type ovl_fs_type = { 1747 .owner = THIS_MODULE, 1748 .name = "overlay", 1749 .mount = ovl_mount, 1750 .kill_sb = kill_anon_super, 1751 }; 1752 MODULE_ALIAS_FS("overlay"); 1753 1754 static void ovl_inode_init_once(void *foo) 1755 { 1756 struct ovl_inode *oi = foo; 1757 1758 inode_init_once(&oi->vfs_inode); 1759 } 1760 1761 static int __init ovl_init(void) 1762 { 1763 int err; 1764 1765 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1766 sizeof(struct ovl_inode), 0, 1767 (SLAB_RECLAIM_ACCOUNT| 1768 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1769 ovl_inode_init_once); 1770 if (ovl_inode_cachep == NULL) 1771 return -ENOMEM; 1772 1773 err = ovl_aio_request_cache_init(); 1774 if (!err) { 1775 err = register_filesystem(&ovl_fs_type); 1776 if (!err) 1777 return 0; 1778 1779 ovl_aio_request_cache_destroy(); 1780 } 1781 kmem_cache_destroy(ovl_inode_cachep); 1782 1783 return err; 1784 } 1785 1786 static void __exit ovl_exit(void) 1787 { 1788 unregister_filesystem(&ovl_fs_type); 1789 1790 /* 1791 * Make sure all delayed rcu free inodes are flushed before we 1792 * destroy cache. 1793 */ 1794 rcu_barrier(); 1795 kmem_cache_destroy(ovl_inode_cachep); 1796 ovl_aio_request_cache_destroy(); 1797 } 1798 1799 module_init(ovl_init); 1800 module_exit(ovl_exit); 1801