1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include <linux/file.h> 19 #include "overlayfs.h" 20 21 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 22 MODULE_DESCRIPTION("Overlay filesystem"); 23 MODULE_LICENSE("GPL"); 24 25 26 struct ovl_dir_cache; 27 28 #define OVL_MAX_STACK 500 29 30 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 31 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 32 MODULE_PARM_DESC(redirect_dir, 33 "Default to on or off for the redirect_dir feature"); 34 35 static bool ovl_redirect_always_follow = 36 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 37 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 38 bool, 0644); 39 MODULE_PARM_DESC(redirect_always_follow, 40 "Follow redirects even if redirect_dir feature is turned off"); 41 42 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 43 module_param_named(index, ovl_index_def, bool, 0644); 44 MODULE_PARM_DESC(index, 45 "Default to on or off for the inodes index feature"); 46 47 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 48 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 49 MODULE_PARM_DESC(nfs_export, 50 "Default to on or off for the NFS export feature"); 51 52 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 53 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 54 MODULE_PARM_DESC(xino_auto, 55 "Auto enable xino feature"); 56 57 static void ovl_entry_stack_free(struct ovl_entry *oe) 58 { 59 unsigned int i; 60 61 for (i = 0; i < oe->numlower; i++) 62 dput(oe->lowerstack[i].dentry); 63 } 64 65 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 66 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 67 MODULE_PARM_DESC(metacopy, 68 "Default to on or off for the metadata only copy up feature"); 69 70 static void ovl_dentry_release(struct dentry *dentry) 71 { 72 struct ovl_entry *oe = dentry->d_fsdata; 73 74 if (oe) { 75 ovl_entry_stack_free(oe); 76 kfree_rcu(oe, rcu); 77 } 78 } 79 80 static struct dentry *ovl_d_real(struct dentry *dentry, 81 const struct inode *inode) 82 { 83 struct dentry *real = NULL, *lower; 84 85 /* It's an overlay file */ 86 if (inode && d_inode(dentry) == inode) 87 return dentry; 88 89 if (!d_is_reg(dentry)) { 90 if (!inode || inode == d_inode(dentry)) 91 return dentry; 92 goto bug; 93 } 94 95 real = ovl_dentry_upper(dentry); 96 if (real && (inode == d_inode(real))) 97 return real; 98 99 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 100 return real; 101 102 lower = ovl_dentry_lowerdata(dentry); 103 if (!lower) 104 goto bug; 105 real = lower; 106 107 /* Handle recursion */ 108 real = d_real(real, inode); 109 110 if (!inode || inode == d_inode(real)) 111 return real; 112 bug: 113 WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", 114 __func__, dentry, inode ? inode->i_sb->s_id : "NULL", 115 inode ? inode->i_ino : 0, real, 116 real && d_inode(real) ? d_inode(real)->i_ino : 0); 117 return dentry; 118 } 119 120 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) 121 { 122 int ret = 1; 123 124 if (weak) { 125 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) 126 ret = d->d_op->d_weak_revalidate(d, flags); 127 } else if (d->d_flags & DCACHE_OP_REVALIDATE) { 128 ret = d->d_op->d_revalidate(d, flags); 129 if (!ret) { 130 if (!(flags & LOOKUP_RCU)) 131 d_invalidate(d); 132 ret = -ESTALE; 133 } 134 } 135 return ret; 136 } 137 138 static int ovl_dentry_revalidate_common(struct dentry *dentry, 139 unsigned int flags, bool weak) 140 { 141 struct ovl_entry *oe = dentry->d_fsdata; 142 struct dentry *upper; 143 unsigned int i; 144 int ret = 1; 145 146 upper = ovl_dentry_upper(dentry); 147 if (upper) 148 ret = ovl_revalidate_real(upper, flags, weak); 149 150 for (i = 0; ret > 0 && i < oe->numlower; i++) { 151 ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, 152 weak); 153 } 154 return ret; 155 } 156 157 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 158 { 159 return ovl_dentry_revalidate_common(dentry, flags, false); 160 } 161 162 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 163 { 164 return ovl_dentry_revalidate_common(dentry, flags, true); 165 } 166 167 static const struct dentry_operations ovl_dentry_operations = { 168 .d_release = ovl_dentry_release, 169 .d_real = ovl_d_real, 170 .d_revalidate = ovl_dentry_revalidate, 171 .d_weak_revalidate = ovl_dentry_weak_revalidate, 172 }; 173 174 static struct kmem_cache *ovl_inode_cachep; 175 176 static struct inode *ovl_alloc_inode(struct super_block *sb) 177 { 178 struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL); 179 180 if (!oi) 181 return NULL; 182 183 oi->cache = NULL; 184 oi->redirect = NULL; 185 oi->version = 0; 186 oi->flags = 0; 187 oi->__upperdentry = NULL; 188 oi->lowerpath.dentry = NULL; 189 oi->lowerpath.layer = NULL; 190 oi->lowerdata = NULL; 191 mutex_init(&oi->lock); 192 193 return &oi->vfs_inode; 194 } 195 196 static void ovl_free_inode(struct inode *inode) 197 { 198 struct ovl_inode *oi = OVL_I(inode); 199 200 kfree(oi->redirect); 201 mutex_destroy(&oi->lock); 202 kmem_cache_free(ovl_inode_cachep, oi); 203 } 204 205 static void ovl_destroy_inode(struct inode *inode) 206 { 207 struct ovl_inode *oi = OVL_I(inode); 208 209 dput(oi->__upperdentry); 210 dput(oi->lowerpath.dentry); 211 if (S_ISDIR(inode->i_mode)) 212 ovl_dir_cache_free(inode); 213 else 214 iput(oi->lowerdata); 215 } 216 217 static void ovl_free_fs(struct ovl_fs *ofs) 218 { 219 struct vfsmount **mounts; 220 unsigned i; 221 222 iput(ofs->workbasedir_trap); 223 iput(ofs->indexdir_trap); 224 iput(ofs->workdir_trap); 225 dput(ofs->whiteout); 226 dput(ofs->indexdir); 227 dput(ofs->workdir); 228 if (ofs->workdir_locked) 229 ovl_inuse_unlock(ofs->workbasedir); 230 dput(ofs->workbasedir); 231 if (ofs->upperdir_locked) 232 ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); 233 234 /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ 235 mounts = (struct vfsmount **) ofs->layers; 236 for (i = 0; i < ofs->numlayer; i++) { 237 iput(ofs->layers[i].trap); 238 mounts[i] = ofs->layers[i].mnt; 239 } 240 kern_unmount_array(mounts, ofs->numlayer); 241 kfree(ofs->layers); 242 for (i = 0; i < ofs->numfs; i++) 243 free_anon_bdev(ofs->fs[i].pseudo_dev); 244 kfree(ofs->fs); 245 246 kfree(ofs->config.lowerdir); 247 kfree(ofs->config.upperdir); 248 kfree(ofs->config.workdir); 249 kfree(ofs->config.redirect_mode); 250 if (ofs->creator_cred) 251 put_cred(ofs->creator_cred); 252 kfree(ofs); 253 } 254 255 static void ovl_put_super(struct super_block *sb) 256 { 257 struct ovl_fs *ofs = sb->s_fs_info; 258 259 ovl_free_fs(ofs); 260 } 261 262 /* Sync real dirty inodes in upper filesystem (if it exists) */ 263 static int ovl_sync_fs(struct super_block *sb, int wait) 264 { 265 struct ovl_fs *ofs = sb->s_fs_info; 266 struct super_block *upper_sb; 267 int ret; 268 269 ret = ovl_sync_status(ofs); 270 /* 271 * We have to always set the err, because the return value isn't 272 * checked in syncfs, and instead indirectly return an error via 273 * the sb's writeback errseq, which VFS inspects after this call. 274 */ 275 if (ret < 0) { 276 errseq_set(&sb->s_wb_err, -EIO); 277 return -EIO; 278 } 279 280 if (!ret) 281 return ret; 282 283 /* 284 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). 285 * All the super blocks will be iterated, including upper_sb. 286 * 287 * If this is a syncfs(2) call, then we do need to call 288 * sync_filesystem() on upper_sb, but enough if we do it when being 289 * called with wait == 1. 290 */ 291 if (!wait) 292 return 0; 293 294 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 295 296 down_read(&upper_sb->s_umount); 297 ret = sync_filesystem(upper_sb); 298 up_read(&upper_sb->s_umount); 299 300 return ret; 301 } 302 303 /** 304 * ovl_statfs 305 * @dentry: The dentry to query 306 * @buf: The struct kstatfs to fill in with stats 307 * 308 * Get the filesystem statistics. As writes always target the upper layer 309 * filesystem pass the statfs to the upper filesystem (if it exists) 310 */ 311 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 312 { 313 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 314 struct dentry *root_dentry = dentry->d_sb->s_root; 315 struct path path; 316 int err; 317 318 ovl_path_real(root_dentry, &path); 319 320 err = vfs_statfs(&path, buf); 321 if (!err) { 322 buf->f_namelen = ofs->namelen; 323 buf->f_type = OVERLAYFS_SUPER_MAGIC; 324 } 325 326 return err; 327 } 328 329 /* Will this overlay be forced to mount/remount ro? */ 330 static bool ovl_force_readonly(struct ovl_fs *ofs) 331 { 332 return (!ovl_upper_mnt(ofs) || !ofs->workdir); 333 } 334 335 static const char *ovl_redirect_mode_def(void) 336 { 337 return ovl_redirect_dir_def ? "on" : "off"; 338 } 339 340 static const char * const ovl_xino_str[] = { 341 "off", 342 "auto", 343 "on", 344 }; 345 346 static inline int ovl_xino_def(void) 347 { 348 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 349 } 350 351 /** 352 * ovl_show_options 353 * @m: the seq_file handle 354 * @dentry: The dentry to query 355 * 356 * Prints the mount options for a given superblock. 357 * Returns zero; does not fail. 358 */ 359 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 360 { 361 struct super_block *sb = dentry->d_sb; 362 struct ovl_fs *ofs = sb->s_fs_info; 363 364 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 365 if (ofs->config.upperdir) { 366 seq_show_option(m, "upperdir", ofs->config.upperdir); 367 seq_show_option(m, "workdir", ofs->config.workdir); 368 } 369 if (ofs->config.default_permissions) 370 seq_puts(m, ",default_permissions"); 371 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 372 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 373 if (ofs->config.index != ovl_index_def) 374 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 375 if (!ofs->config.uuid) 376 seq_puts(m, ",uuid=off"); 377 if (ofs->config.nfs_export != ovl_nfs_export_def) 378 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 379 "on" : "off"); 380 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) 381 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 382 if (ofs->config.metacopy != ovl_metacopy_def) 383 seq_printf(m, ",metacopy=%s", 384 ofs->config.metacopy ? "on" : "off"); 385 if (ofs->config.ovl_volatile) 386 seq_puts(m, ",volatile"); 387 if (ofs->config.userxattr) 388 seq_puts(m, ",userxattr"); 389 return 0; 390 } 391 392 static int ovl_remount(struct super_block *sb, int *flags, char *data) 393 { 394 struct ovl_fs *ofs = sb->s_fs_info; 395 struct super_block *upper_sb; 396 int ret = 0; 397 398 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 399 return -EROFS; 400 401 if (*flags & SB_RDONLY && !sb_rdonly(sb)) { 402 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 403 if (ovl_should_sync(ofs)) { 404 down_read(&upper_sb->s_umount); 405 ret = sync_filesystem(upper_sb); 406 up_read(&upper_sb->s_umount); 407 } 408 } 409 410 return ret; 411 } 412 413 static const struct super_operations ovl_super_operations = { 414 .alloc_inode = ovl_alloc_inode, 415 .free_inode = ovl_free_inode, 416 .destroy_inode = ovl_destroy_inode, 417 .drop_inode = generic_delete_inode, 418 .put_super = ovl_put_super, 419 .sync_fs = ovl_sync_fs, 420 .statfs = ovl_statfs, 421 .show_options = ovl_show_options, 422 .remount_fs = ovl_remount, 423 }; 424 425 enum { 426 OPT_LOWERDIR, 427 OPT_UPPERDIR, 428 OPT_WORKDIR, 429 OPT_DEFAULT_PERMISSIONS, 430 OPT_REDIRECT_DIR, 431 OPT_INDEX_ON, 432 OPT_INDEX_OFF, 433 OPT_UUID_ON, 434 OPT_UUID_OFF, 435 OPT_NFS_EXPORT_ON, 436 OPT_USERXATTR, 437 OPT_NFS_EXPORT_OFF, 438 OPT_XINO_ON, 439 OPT_XINO_OFF, 440 OPT_XINO_AUTO, 441 OPT_METACOPY_ON, 442 OPT_METACOPY_OFF, 443 OPT_VOLATILE, 444 OPT_ERR, 445 }; 446 447 static const match_table_t ovl_tokens = { 448 {OPT_LOWERDIR, "lowerdir=%s"}, 449 {OPT_UPPERDIR, "upperdir=%s"}, 450 {OPT_WORKDIR, "workdir=%s"}, 451 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 452 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 453 {OPT_INDEX_ON, "index=on"}, 454 {OPT_INDEX_OFF, "index=off"}, 455 {OPT_USERXATTR, "userxattr"}, 456 {OPT_UUID_ON, "uuid=on"}, 457 {OPT_UUID_OFF, "uuid=off"}, 458 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 459 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 460 {OPT_XINO_ON, "xino=on"}, 461 {OPT_XINO_OFF, "xino=off"}, 462 {OPT_XINO_AUTO, "xino=auto"}, 463 {OPT_METACOPY_ON, "metacopy=on"}, 464 {OPT_METACOPY_OFF, "metacopy=off"}, 465 {OPT_VOLATILE, "volatile"}, 466 {OPT_ERR, NULL} 467 }; 468 469 static char *ovl_next_opt(char **s) 470 { 471 char *sbegin = *s; 472 char *p; 473 474 if (sbegin == NULL) 475 return NULL; 476 477 for (p = sbegin; *p; p++) { 478 if (*p == '\\') { 479 p++; 480 if (!*p) 481 break; 482 } else if (*p == ',') { 483 *p = '\0'; 484 *s = p + 1; 485 return sbegin; 486 } 487 } 488 *s = NULL; 489 return sbegin; 490 } 491 492 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 493 { 494 if (strcmp(mode, "on") == 0) { 495 config->redirect_dir = true; 496 /* 497 * Does not make sense to have redirect creation without 498 * redirect following. 499 */ 500 config->redirect_follow = true; 501 } else if (strcmp(mode, "follow") == 0) { 502 config->redirect_follow = true; 503 } else if (strcmp(mode, "off") == 0) { 504 if (ovl_redirect_always_follow) 505 config->redirect_follow = true; 506 } else if (strcmp(mode, "nofollow") != 0) { 507 pr_err("bad mount option \"redirect_dir=%s\"\n", 508 mode); 509 return -EINVAL; 510 } 511 512 return 0; 513 } 514 515 static int ovl_parse_opt(char *opt, struct ovl_config *config) 516 { 517 char *p; 518 int err; 519 bool metacopy_opt = false, redirect_opt = false; 520 bool nfs_export_opt = false, index_opt = false; 521 522 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 523 if (!config->redirect_mode) 524 return -ENOMEM; 525 526 while ((p = ovl_next_opt(&opt)) != NULL) { 527 int token; 528 substring_t args[MAX_OPT_ARGS]; 529 530 if (!*p) 531 continue; 532 533 token = match_token(p, ovl_tokens, args); 534 switch (token) { 535 case OPT_UPPERDIR: 536 kfree(config->upperdir); 537 config->upperdir = match_strdup(&args[0]); 538 if (!config->upperdir) 539 return -ENOMEM; 540 break; 541 542 case OPT_LOWERDIR: 543 kfree(config->lowerdir); 544 config->lowerdir = match_strdup(&args[0]); 545 if (!config->lowerdir) 546 return -ENOMEM; 547 break; 548 549 case OPT_WORKDIR: 550 kfree(config->workdir); 551 config->workdir = match_strdup(&args[0]); 552 if (!config->workdir) 553 return -ENOMEM; 554 break; 555 556 case OPT_DEFAULT_PERMISSIONS: 557 config->default_permissions = true; 558 break; 559 560 case OPT_REDIRECT_DIR: 561 kfree(config->redirect_mode); 562 config->redirect_mode = match_strdup(&args[0]); 563 if (!config->redirect_mode) 564 return -ENOMEM; 565 redirect_opt = true; 566 break; 567 568 case OPT_INDEX_ON: 569 config->index = true; 570 index_opt = true; 571 break; 572 573 case OPT_INDEX_OFF: 574 config->index = false; 575 index_opt = true; 576 break; 577 578 case OPT_UUID_ON: 579 config->uuid = true; 580 break; 581 582 case OPT_UUID_OFF: 583 config->uuid = false; 584 break; 585 586 case OPT_NFS_EXPORT_ON: 587 config->nfs_export = true; 588 nfs_export_opt = true; 589 break; 590 591 case OPT_NFS_EXPORT_OFF: 592 config->nfs_export = false; 593 nfs_export_opt = true; 594 break; 595 596 case OPT_XINO_ON: 597 config->xino = OVL_XINO_ON; 598 break; 599 600 case OPT_XINO_OFF: 601 config->xino = OVL_XINO_OFF; 602 break; 603 604 case OPT_XINO_AUTO: 605 config->xino = OVL_XINO_AUTO; 606 break; 607 608 case OPT_METACOPY_ON: 609 config->metacopy = true; 610 metacopy_opt = true; 611 break; 612 613 case OPT_METACOPY_OFF: 614 config->metacopy = false; 615 metacopy_opt = true; 616 break; 617 618 case OPT_VOLATILE: 619 config->ovl_volatile = true; 620 break; 621 622 case OPT_USERXATTR: 623 config->userxattr = true; 624 break; 625 626 default: 627 pr_err("unrecognized mount option \"%s\" or missing value\n", 628 p); 629 return -EINVAL; 630 } 631 } 632 633 /* Workdir/index are useless in non-upper mount */ 634 if (!config->upperdir) { 635 if (config->workdir) { 636 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 637 config->workdir); 638 kfree(config->workdir); 639 config->workdir = NULL; 640 } 641 if (config->index && index_opt) { 642 pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n"); 643 index_opt = false; 644 } 645 config->index = false; 646 } 647 648 if (!config->upperdir && config->ovl_volatile) { 649 pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); 650 config->ovl_volatile = false; 651 } 652 653 err = ovl_parse_redirect_mode(config, config->redirect_mode); 654 if (err) 655 return err; 656 657 /* 658 * This is to make the logic below simpler. It doesn't make any other 659 * difference, since config->redirect_dir is only used for upper. 660 */ 661 if (!config->upperdir && config->redirect_follow) 662 config->redirect_dir = true; 663 664 /* Resolve metacopy -> redirect_dir dependency */ 665 if (config->metacopy && !config->redirect_dir) { 666 if (metacopy_opt && redirect_opt) { 667 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", 668 config->redirect_mode); 669 return -EINVAL; 670 } 671 if (redirect_opt) { 672 /* 673 * There was an explicit redirect_dir=... that resulted 674 * in this conflict. 675 */ 676 pr_info("disabling metacopy due to redirect_dir=%s\n", 677 config->redirect_mode); 678 config->metacopy = false; 679 } else { 680 /* Automatically enable redirect otherwise. */ 681 config->redirect_follow = config->redirect_dir = true; 682 } 683 } 684 685 /* Resolve nfs_export -> index dependency */ 686 if (config->nfs_export && !config->index) { 687 if (!config->upperdir && config->redirect_follow) { 688 pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 689 config->nfs_export = false; 690 } else if (nfs_export_opt && index_opt) { 691 pr_err("conflicting options: nfs_export=on,index=off\n"); 692 return -EINVAL; 693 } else if (index_opt) { 694 /* 695 * There was an explicit index=off that resulted 696 * in this conflict. 697 */ 698 pr_info("disabling nfs_export due to index=off\n"); 699 config->nfs_export = false; 700 } else { 701 /* Automatically enable index otherwise. */ 702 config->index = true; 703 } 704 } 705 706 /* Resolve nfs_export -> !metacopy dependency */ 707 if (config->nfs_export && config->metacopy) { 708 if (nfs_export_opt && metacopy_opt) { 709 pr_err("conflicting options: nfs_export=on,metacopy=on\n"); 710 return -EINVAL; 711 } 712 if (metacopy_opt) { 713 /* 714 * There was an explicit metacopy=on that resulted 715 * in this conflict. 716 */ 717 pr_info("disabling nfs_export due to metacopy=on\n"); 718 config->nfs_export = false; 719 } else { 720 /* 721 * There was an explicit nfs_export=on that resulted 722 * in this conflict. 723 */ 724 pr_info("disabling metacopy due to nfs_export=on\n"); 725 config->metacopy = false; 726 } 727 } 728 729 730 /* Resolve userxattr -> !redirect && !metacopy dependency */ 731 if (config->userxattr) { 732 if (config->redirect_follow && redirect_opt) { 733 pr_err("conflicting options: userxattr,redirect_dir=%s\n", 734 config->redirect_mode); 735 return -EINVAL; 736 } 737 if (config->metacopy && metacopy_opt) { 738 pr_err("conflicting options: userxattr,metacopy=on\n"); 739 return -EINVAL; 740 } 741 /* 742 * Silently disable default setting of redirect and metacopy. 743 * This shall be the default in the future as well: these 744 * options must be explicitly enabled if used together with 745 * userxattr. 746 */ 747 config->redirect_dir = config->redirect_follow = false; 748 config->metacopy = false; 749 } 750 751 return 0; 752 } 753 754 #define OVL_WORKDIR_NAME "work" 755 #define OVL_INDEXDIR_NAME "index" 756 757 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 758 const char *name, bool persist) 759 { 760 struct inode *dir = ofs->workbasedir->d_inode; 761 struct vfsmount *mnt = ovl_upper_mnt(ofs); 762 struct dentry *work; 763 int err; 764 bool retried = false; 765 766 inode_lock_nested(dir, I_MUTEX_PARENT); 767 retry: 768 work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); 769 770 if (!IS_ERR(work)) { 771 struct iattr attr = { 772 .ia_valid = ATTR_MODE, 773 .ia_mode = S_IFDIR | 0, 774 }; 775 776 if (work->d_inode) { 777 err = -EEXIST; 778 if (retried) 779 goto out_dput; 780 781 if (persist) 782 goto out_unlock; 783 784 retried = true; 785 err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); 786 dput(work); 787 if (err == -EINVAL) { 788 work = ERR_PTR(err); 789 goto out_unlock; 790 } 791 goto retry; 792 } 793 794 err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); 795 if (err) 796 goto out_dput; 797 798 /* Weird filesystem returning with hashed negative (kernfs)? */ 799 err = -EINVAL; 800 if (d_really_is_negative(work)) 801 goto out_dput; 802 803 /* 804 * Try to remove POSIX ACL xattrs from workdir. We are good if: 805 * 806 * a) success (there was a POSIX ACL xattr and was removed) 807 * b) -ENODATA (there was no POSIX ACL xattr) 808 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 809 * 810 * There are various other error values that could effectively 811 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 812 * if the xattr name is too long), but the set of filesystems 813 * allowed as upper are limited to "normal" ones, where checking 814 * for the above two errors is sufficient. 815 */ 816 err = ovl_do_removexattr(ofs, work, 817 XATTR_NAME_POSIX_ACL_DEFAULT); 818 if (err && err != -ENODATA && err != -EOPNOTSUPP) 819 goto out_dput; 820 821 err = ovl_do_removexattr(ofs, work, 822 XATTR_NAME_POSIX_ACL_ACCESS); 823 if (err && err != -ENODATA && err != -EOPNOTSUPP) 824 goto out_dput; 825 826 /* Clear any inherited mode bits */ 827 inode_lock(work->d_inode); 828 err = ovl_do_notify_change(ofs, work, &attr); 829 inode_unlock(work->d_inode); 830 if (err) 831 goto out_dput; 832 } else { 833 err = PTR_ERR(work); 834 goto out_err; 835 } 836 out_unlock: 837 inode_unlock(dir); 838 return work; 839 840 out_dput: 841 dput(work); 842 out_err: 843 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 844 ofs->config.workdir, name, -err); 845 work = NULL; 846 goto out_unlock; 847 } 848 849 static void ovl_unescape(char *s) 850 { 851 char *d = s; 852 853 for (;; s++, d++) { 854 if (*s == '\\') 855 s++; 856 *d = *s; 857 if (!*s) 858 break; 859 } 860 } 861 862 static int ovl_mount_dir_noesc(const char *name, struct path *path) 863 { 864 int err = -EINVAL; 865 866 if (!*name) { 867 pr_err("empty lowerdir\n"); 868 goto out; 869 } 870 err = kern_path(name, LOOKUP_FOLLOW, path); 871 if (err) { 872 pr_err("failed to resolve '%s': %i\n", name, err); 873 goto out; 874 } 875 err = -EINVAL; 876 if (ovl_dentry_weird(path->dentry)) { 877 pr_err("filesystem on '%s' not supported\n", name); 878 goto out_put; 879 } 880 if (!d_is_dir(path->dentry)) { 881 pr_err("'%s' not a directory\n", name); 882 goto out_put; 883 } 884 return 0; 885 886 out_put: 887 path_put_init(path); 888 out: 889 return err; 890 } 891 892 static int ovl_mount_dir(const char *name, struct path *path) 893 { 894 int err = -ENOMEM; 895 char *tmp = kstrdup(name, GFP_KERNEL); 896 897 if (tmp) { 898 ovl_unescape(tmp); 899 err = ovl_mount_dir_noesc(tmp, path); 900 901 if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { 902 pr_err("filesystem on '%s' not supported as upperdir\n", 903 tmp); 904 path_put_init(path); 905 err = -EINVAL; 906 } 907 kfree(tmp); 908 } 909 return err; 910 } 911 912 static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, 913 const char *name) 914 { 915 struct kstatfs statfs; 916 int err = vfs_statfs(path, &statfs); 917 918 if (err) 919 pr_err("statfs failed on '%s'\n", name); 920 else 921 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 922 923 return err; 924 } 925 926 static int ovl_lower_dir(const char *name, struct path *path, 927 struct ovl_fs *ofs, int *stack_depth) 928 { 929 int fh_type; 930 int err; 931 932 err = ovl_mount_dir_noesc(name, path); 933 if (err) 934 return err; 935 936 err = ovl_check_namelen(path, ofs, name); 937 if (err) 938 return err; 939 940 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 941 942 /* 943 * The inodes index feature and NFS export need to encode and decode 944 * file handles, so they require that all layers support them. 945 */ 946 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 947 if ((ofs->config.nfs_export || 948 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 949 ofs->config.index = false; 950 ofs->config.nfs_export = false; 951 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 952 name); 953 } 954 /* 955 * Decoding origin file handle is required for persistent st_ino. 956 * Without persistent st_ino, xino=auto falls back to xino=off. 957 */ 958 if (ofs->config.xino == OVL_XINO_AUTO && 959 ofs->config.upperdir && !fh_type) { 960 ofs->config.xino = OVL_XINO_OFF; 961 pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n", 962 name); 963 } 964 965 /* Check if lower fs has 32bit inode numbers */ 966 if (fh_type != FILEID_INO32_GEN) 967 ofs->xino_mode = -1; 968 969 return 0; 970 } 971 972 /* Workdir should not be subdir of upperdir and vice versa */ 973 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 974 { 975 bool ok = false; 976 977 if (workdir != upperdir) { 978 ok = (lock_rename(workdir, upperdir) == NULL); 979 unlock_rename(workdir, upperdir); 980 } 981 return ok; 982 } 983 984 static unsigned int ovl_split_lowerdirs(char *str) 985 { 986 unsigned int ctr = 1; 987 char *s, *d; 988 989 for (s = d = str;; s++, d++) { 990 if (*s == '\\') { 991 s++; 992 } else if (*s == ':') { 993 *d = '\0'; 994 ctr++; 995 continue; 996 } 997 *d = *s; 998 if (!*s) 999 break; 1000 } 1001 return ctr; 1002 } 1003 1004 static int __maybe_unused 1005 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 1006 struct dentry *dentry, struct inode *inode, 1007 const char *name, void *buffer, size_t size) 1008 { 1009 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 1010 } 1011 1012 static int __maybe_unused 1013 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 1014 struct user_namespace *mnt_userns, 1015 struct dentry *dentry, struct inode *inode, 1016 const char *name, const void *value, 1017 size_t size, int flags) 1018 { 1019 struct dentry *workdir = ovl_workdir(dentry); 1020 struct inode *realinode = ovl_inode_real(inode); 1021 struct posix_acl *acl = NULL; 1022 int err; 1023 1024 /* Check that everything is OK before copy-up */ 1025 if (value) { 1026 /* The above comment can be understood in two ways: 1027 * 1028 * 1. We just want to check whether the basic POSIX ACL format 1029 * is ok. For example, if the header is correct and the size 1030 * is sane. 1031 * 2. We want to know whether the ACL_{GROUP,USER} entries can 1032 * be mapped according to the underlying filesystem. 1033 * 1034 * Currently, we only check 1. If we wanted to check 2. we 1035 * would need to pass the mnt_userns and the fs_userns of the 1036 * underlying filesystem. But frankly, I think checking 1. is 1037 * enough to start the copy-up. 1038 */ 1039 acl = vfs_set_acl_prepare(&init_user_ns, &init_user_ns, value, size); 1040 if (IS_ERR(acl)) 1041 return PTR_ERR(acl); 1042 } 1043 err = -EOPNOTSUPP; 1044 if (!IS_POSIXACL(d_inode(workdir))) 1045 goto out_acl_release; 1046 if (!realinode->i_op->set_acl) 1047 goto out_acl_release; 1048 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 1049 err = acl ? -EACCES : 0; 1050 goto out_acl_release; 1051 } 1052 err = -EPERM; 1053 if (!inode_owner_or_capable(&init_user_ns, inode)) 1054 goto out_acl_release; 1055 1056 posix_acl_release(acl); 1057 1058 /* 1059 * Check if sgid bit needs to be cleared (actual setacl operation will 1060 * be done with mounter's capabilities and so that won't do it for us). 1061 */ 1062 if (unlikely(inode->i_mode & S_ISGID) && 1063 handler->flags == ACL_TYPE_ACCESS && 1064 !in_group_p(inode->i_gid) && 1065 !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) { 1066 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 1067 1068 err = ovl_setattr(&init_user_ns, dentry, &iattr); 1069 if (err) 1070 return err; 1071 } 1072 1073 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 1074 return err; 1075 1076 out_acl_release: 1077 posix_acl_release(acl); 1078 return err; 1079 } 1080 1081 static int ovl_own_xattr_get(const struct xattr_handler *handler, 1082 struct dentry *dentry, struct inode *inode, 1083 const char *name, void *buffer, size_t size) 1084 { 1085 return -EOPNOTSUPP; 1086 } 1087 1088 static int ovl_own_xattr_set(const struct xattr_handler *handler, 1089 struct user_namespace *mnt_userns, 1090 struct dentry *dentry, struct inode *inode, 1091 const char *name, const void *value, 1092 size_t size, int flags) 1093 { 1094 return -EOPNOTSUPP; 1095 } 1096 1097 static int ovl_other_xattr_get(const struct xattr_handler *handler, 1098 struct dentry *dentry, struct inode *inode, 1099 const char *name, void *buffer, size_t size) 1100 { 1101 return ovl_xattr_get(dentry, inode, name, buffer, size); 1102 } 1103 1104 static int ovl_other_xattr_set(const struct xattr_handler *handler, 1105 struct user_namespace *mnt_userns, 1106 struct dentry *dentry, struct inode *inode, 1107 const char *name, const void *value, 1108 size_t size, int flags) 1109 { 1110 return ovl_xattr_set(dentry, inode, name, value, size, flags); 1111 } 1112 1113 static const struct xattr_handler __maybe_unused 1114 ovl_posix_acl_access_xattr_handler = { 1115 .name = XATTR_NAME_POSIX_ACL_ACCESS, 1116 .flags = ACL_TYPE_ACCESS, 1117 .get = ovl_posix_acl_xattr_get, 1118 .set = ovl_posix_acl_xattr_set, 1119 }; 1120 1121 static const struct xattr_handler __maybe_unused 1122 ovl_posix_acl_default_xattr_handler = { 1123 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 1124 .flags = ACL_TYPE_DEFAULT, 1125 .get = ovl_posix_acl_xattr_get, 1126 .set = ovl_posix_acl_xattr_set, 1127 }; 1128 1129 static const struct xattr_handler ovl_own_trusted_xattr_handler = { 1130 .prefix = OVL_XATTR_TRUSTED_PREFIX, 1131 .get = ovl_own_xattr_get, 1132 .set = ovl_own_xattr_set, 1133 }; 1134 1135 static const struct xattr_handler ovl_own_user_xattr_handler = { 1136 .prefix = OVL_XATTR_USER_PREFIX, 1137 .get = ovl_own_xattr_get, 1138 .set = ovl_own_xattr_set, 1139 }; 1140 1141 static const struct xattr_handler ovl_other_xattr_handler = { 1142 .prefix = "", /* catch all */ 1143 .get = ovl_other_xattr_get, 1144 .set = ovl_other_xattr_set, 1145 }; 1146 1147 static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { 1148 #ifdef CONFIG_FS_POSIX_ACL 1149 &ovl_posix_acl_access_xattr_handler, 1150 &ovl_posix_acl_default_xattr_handler, 1151 #endif 1152 &ovl_own_trusted_xattr_handler, 1153 &ovl_other_xattr_handler, 1154 NULL 1155 }; 1156 1157 static const struct xattr_handler *ovl_user_xattr_handlers[] = { 1158 #ifdef CONFIG_FS_POSIX_ACL 1159 &ovl_posix_acl_access_xattr_handler, 1160 &ovl_posix_acl_default_xattr_handler, 1161 #endif 1162 &ovl_own_user_xattr_handler, 1163 &ovl_other_xattr_handler, 1164 NULL 1165 }; 1166 1167 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 1168 struct inode **ptrap, const char *name) 1169 { 1170 struct inode *trap; 1171 int err; 1172 1173 trap = ovl_get_trap_inode(sb, dir); 1174 err = PTR_ERR_OR_ZERO(trap); 1175 if (err) { 1176 if (err == -ELOOP) 1177 pr_err("conflicting %s path\n", name); 1178 return err; 1179 } 1180 1181 *ptrap = trap; 1182 return 0; 1183 } 1184 1185 /* 1186 * Determine how we treat concurrent use of upperdir/workdir based on the 1187 * index feature. This is papering over mount leaks of container runtimes, 1188 * for example, an old overlay mount is leaked and now its upperdir is 1189 * attempted to be used as a lower layer in a new overlay mount. 1190 */ 1191 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 1192 { 1193 if (ofs->config.index) { 1194 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 1195 name); 1196 return -EBUSY; 1197 } else { 1198 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 1199 name); 1200 return 0; 1201 } 1202 } 1203 1204 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 1205 struct ovl_layer *upper_layer, struct path *upperpath) 1206 { 1207 struct vfsmount *upper_mnt; 1208 int err; 1209 1210 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 1211 if (err) 1212 goto out; 1213 1214 /* Upperdir path should not be r/o */ 1215 if (__mnt_is_readonly(upperpath->mnt)) { 1216 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 1217 err = -EINVAL; 1218 goto out; 1219 } 1220 1221 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 1222 if (err) 1223 goto out; 1224 1225 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, 1226 "upperdir"); 1227 if (err) 1228 goto out; 1229 1230 upper_mnt = clone_private_mount(upperpath); 1231 err = PTR_ERR(upper_mnt); 1232 if (IS_ERR(upper_mnt)) { 1233 pr_err("failed to clone upperpath\n"); 1234 goto out; 1235 } 1236 1237 /* Don't inherit atime flags */ 1238 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1239 upper_layer->mnt = upper_mnt; 1240 upper_layer->idx = 0; 1241 upper_layer->fsid = 0; 1242 1243 /* 1244 * Inherit SB_NOSEC flag from upperdir. 1245 * 1246 * This optimization changes behavior when a security related attribute 1247 * (suid/sgid/security.*) is changed on an underlying layer. This is 1248 * okay because we don't yet have guarantees in that case, but it will 1249 * need careful treatment once we want to honour changes to underlying 1250 * filesystems. 1251 */ 1252 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) 1253 sb->s_flags |= SB_NOSEC; 1254 1255 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { 1256 ofs->upperdir_locked = true; 1257 } else { 1258 err = ovl_report_in_use(ofs, "upperdir"); 1259 if (err) 1260 goto out; 1261 } 1262 1263 err = 0; 1264 out: 1265 return err; 1266 } 1267 1268 /* 1269 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and 1270 * negative values if error is encountered. 1271 */ 1272 static int ovl_check_rename_whiteout(struct ovl_fs *ofs) 1273 { 1274 struct dentry *workdir = ofs->workdir; 1275 struct inode *dir = d_inode(workdir); 1276 struct dentry *temp; 1277 struct dentry *dest; 1278 struct dentry *whiteout; 1279 struct name_snapshot name; 1280 int err; 1281 1282 inode_lock_nested(dir, I_MUTEX_PARENT); 1283 1284 temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); 1285 err = PTR_ERR(temp); 1286 if (IS_ERR(temp)) 1287 goto out_unlock; 1288 1289 dest = ovl_lookup_temp(ofs, workdir); 1290 err = PTR_ERR(dest); 1291 if (IS_ERR(dest)) { 1292 dput(temp); 1293 goto out_unlock; 1294 } 1295 1296 /* Name is inline and stable - using snapshot as a copy helper */ 1297 take_dentry_name_snapshot(&name, temp); 1298 err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); 1299 if (err) { 1300 if (err == -EINVAL) 1301 err = 0; 1302 goto cleanup_temp; 1303 } 1304 1305 whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len); 1306 err = PTR_ERR(whiteout); 1307 if (IS_ERR(whiteout)) 1308 goto cleanup_temp; 1309 1310 err = ovl_is_whiteout(whiteout); 1311 1312 /* Best effort cleanup of whiteout and temp file */ 1313 if (err) 1314 ovl_cleanup(ofs, dir, whiteout); 1315 dput(whiteout); 1316 1317 cleanup_temp: 1318 ovl_cleanup(ofs, dir, temp); 1319 release_dentry_name_snapshot(&name); 1320 dput(temp); 1321 dput(dest); 1322 1323 out_unlock: 1324 inode_unlock(dir); 1325 1326 return err; 1327 } 1328 1329 static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, 1330 struct dentry *parent, 1331 const char *name, umode_t mode) 1332 { 1333 size_t len = strlen(name); 1334 struct dentry *child; 1335 1336 inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); 1337 child = ovl_lookup_upper(ofs, name, parent, len); 1338 if (!IS_ERR(child) && !child->d_inode) 1339 child = ovl_create_real(ofs, parent->d_inode, child, 1340 OVL_CATTR(mode)); 1341 inode_unlock(parent->d_inode); 1342 dput(parent); 1343 1344 return child; 1345 } 1346 1347 /* 1348 * Creates $workdir/work/incompat/volatile/dirty file if it is not already 1349 * present. 1350 */ 1351 static int ovl_create_volatile_dirty(struct ovl_fs *ofs) 1352 { 1353 unsigned int ctr; 1354 struct dentry *d = dget(ofs->workbasedir); 1355 static const char *const volatile_path[] = { 1356 OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" 1357 }; 1358 const char *const *name = volatile_path; 1359 1360 for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { 1361 d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG); 1362 if (IS_ERR(d)) 1363 return PTR_ERR(d); 1364 } 1365 dput(d); 1366 return 0; 1367 } 1368 1369 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1370 const struct path *workpath) 1371 { 1372 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1373 struct dentry *workdir; 1374 struct file *tmpfile; 1375 bool rename_whiteout; 1376 bool d_type; 1377 int fh_type; 1378 int err; 1379 1380 err = mnt_want_write(mnt); 1381 if (err) 1382 return err; 1383 1384 workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1385 err = PTR_ERR(workdir); 1386 if (IS_ERR_OR_NULL(workdir)) 1387 goto out; 1388 1389 ofs->workdir = workdir; 1390 1391 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1392 if (err) 1393 goto out; 1394 1395 /* 1396 * Upper should support d_type, else whiteouts are visible. Given 1397 * workdir and upper are on same fs, we can do iterate_dir() on 1398 * workdir. This check requires successful creation of workdir in 1399 * previous step. 1400 */ 1401 err = ovl_check_d_type_supported(workpath); 1402 if (err < 0) 1403 goto out; 1404 1405 d_type = err; 1406 if (!d_type) 1407 pr_warn("upper fs needs to support d_type.\n"); 1408 1409 /* Check if upper/work fs supports O_TMPFILE */ 1410 tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0); 1411 ofs->tmpfile = !IS_ERR(tmpfile); 1412 if (ofs->tmpfile) 1413 fput(tmpfile); 1414 else 1415 pr_warn("upper fs does not support tmpfile.\n"); 1416 1417 1418 /* Check if upper/work fs supports RENAME_WHITEOUT */ 1419 err = ovl_check_rename_whiteout(ofs); 1420 if (err < 0) 1421 goto out; 1422 1423 rename_whiteout = err; 1424 if (!rename_whiteout) 1425 pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); 1426 1427 /* 1428 * Check if upper/work fs supports (trusted|user).overlay.* xattr 1429 */ 1430 err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); 1431 if (err) { 1432 pr_warn("failed to set xattr on upper\n"); 1433 ofs->noxattr = true; 1434 if (ofs->config.index || ofs->config.metacopy) { 1435 ofs->config.index = false; 1436 ofs->config.metacopy = false; 1437 pr_warn("...falling back to index=off,metacopy=off.\n"); 1438 } 1439 /* 1440 * xattr support is required for persistent st_ino. 1441 * Without persistent st_ino, xino=auto falls back to xino=off. 1442 */ 1443 if (ofs->config.xino == OVL_XINO_AUTO) { 1444 ofs->config.xino = OVL_XINO_OFF; 1445 pr_warn("...falling back to xino=off.\n"); 1446 } 1447 if (err == -EPERM && !ofs->config.userxattr) 1448 pr_info("try mounting with 'userxattr' option\n"); 1449 err = 0; 1450 } else { 1451 ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); 1452 } 1453 1454 /* 1455 * We allowed sub-optimal upper fs configuration and don't want to break 1456 * users over kernel upgrade, but we never allowed remote upper fs, so 1457 * we can enforce strict requirements for remote upper fs. 1458 */ 1459 if (ovl_dentry_remote(ofs->workdir) && 1460 (!d_type || !rename_whiteout || ofs->noxattr)) { 1461 pr_err("upper fs missing required features.\n"); 1462 err = -EINVAL; 1463 goto out; 1464 } 1465 1466 /* 1467 * For volatile mount, create a incompat/volatile/dirty file to keep 1468 * track of it. 1469 */ 1470 if (ofs->config.ovl_volatile) { 1471 err = ovl_create_volatile_dirty(ofs); 1472 if (err < 0) { 1473 pr_err("Failed to create volatile/dirty file.\n"); 1474 goto out; 1475 } 1476 } 1477 1478 /* Check if upper/work fs supports file handles */ 1479 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1480 if (ofs->config.index && !fh_type) { 1481 ofs->config.index = false; 1482 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 1483 } 1484 1485 /* Check if upper fs has 32bit inode numbers */ 1486 if (fh_type != FILEID_INO32_GEN) 1487 ofs->xino_mode = -1; 1488 1489 /* NFS export of r/w mount depends on index */ 1490 if (ofs->config.nfs_export && !ofs->config.index) { 1491 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1492 ofs->config.nfs_export = false; 1493 } 1494 out: 1495 mnt_drop_write(mnt); 1496 return err; 1497 } 1498 1499 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1500 const struct path *upperpath) 1501 { 1502 int err; 1503 struct path workpath = { }; 1504 1505 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1506 if (err) 1507 goto out; 1508 1509 err = -EINVAL; 1510 if (upperpath->mnt != workpath.mnt) { 1511 pr_err("workdir and upperdir must reside under the same mount\n"); 1512 goto out; 1513 } 1514 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1515 pr_err("workdir and upperdir must be separate subtrees\n"); 1516 goto out; 1517 } 1518 1519 ofs->workbasedir = dget(workpath.dentry); 1520 1521 if (ovl_inuse_trylock(ofs->workbasedir)) { 1522 ofs->workdir_locked = true; 1523 } else { 1524 err = ovl_report_in_use(ofs, "workdir"); 1525 if (err) 1526 goto out; 1527 } 1528 1529 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1530 "workdir"); 1531 if (err) 1532 goto out; 1533 1534 err = ovl_make_workdir(sb, ofs, &workpath); 1535 1536 out: 1537 path_put(&workpath); 1538 1539 return err; 1540 } 1541 1542 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1543 struct ovl_entry *oe, const struct path *upperpath) 1544 { 1545 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1546 struct dentry *indexdir; 1547 int err; 1548 1549 err = mnt_want_write(mnt); 1550 if (err) 1551 return err; 1552 1553 /* Verify lower root is upper root origin */ 1554 err = ovl_verify_origin(ofs, upperpath->dentry, 1555 oe->lowerstack[0].dentry, true); 1556 if (err) { 1557 pr_err("failed to verify upper root origin\n"); 1558 goto out; 1559 } 1560 1561 /* index dir will act also as workdir */ 1562 iput(ofs->workdir_trap); 1563 ofs->workdir_trap = NULL; 1564 dput(ofs->workdir); 1565 ofs->workdir = NULL; 1566 indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1567 if (IS_ERR(indexdir)) { 1568 err = PTR_ERR(indexdir); 1569 } else if (indexdir) { 1570 ofs->indexdir = indexdir; 1571 ofs->workdir = dget(indexdir); 1572 1573 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1574 "indexdir"); 1575 if (err) 1576 goto out; 1577 1578 /* 1579 * Verify upper root is exclusively associated with index dir. 1580 * Older kernels stored upper fh in ".overlay.origin" 1581 * xattr. If that xattr exists, verify that it is a match to 1582 * upper dir file handle. In any case, verify or set xattr 1583 * ".overlay.upper" to indicate that index may have 1584 * directory entries. 1585 */ 1586 if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { 1587 err = ovl_verify_set_fh(ofs, ofs->indexdir, 1588 OVL_XATTR_ORIGIN, 1589 upperpath->dentry, true, false); 1590 if (err) 1591 pr_err("failed to verify index dir 'origin' xattr\n"); 1592 } 1593 err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry, 1594 true); 1595 if (err) 1596 pr_err("failed to verify index dir 'upper' xattr\n"); 1597 1598 /* Cleanup bad/stale/orphan index entries */ 1599 if (!err) 1600 err = ovl_indexdir_cleanup(ofs); 1601 } 1602 if (err || !ofs->indexdir) 1603 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1604 1605 out: 1606 mnt_drop_write(mnt); 1607 return err; 1608 } 1609 1610 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1611 { 1612 unsigned int i; 1613 1614 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) 1615 return true; 1616 1617 /* 1618 * We allow using single lower with null uuid for index and nfs_export 1619 * for example to support those features with single lower squashfs. 1620 * To avoid regressions in setups of overlay with re-formatted lower 1621 * squashfs, do not allow decoding origin with lower null uuid unless 1622 * user opted-in to one of the new features that require following the 1623 * lower inode of non-dir upper. 1624 */ 1625 if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) 1626 return false; 1627 1628 for (i = 0; i < ofs->numfs; i++) { 1629 /* 1630 * We use uuid to associate an overlay lower file handle with a 1631 * lower layer, so we can accept lower fs with null uuid as long 1632 * as all lower layers with null uuid are on the same fs. 1633 * if we detect multiple lower fs with the same uuid, we 1634 * disable lower file handle decoding on all of them. 1635 */ 1636 if (ofs->fs[i].is_lower && 1637 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 1638 ofs->fs[i].bad_uuid = true; 1639 return false; 1640 } 1641 } 1642 return true; 1643 } 1644 1645 /* Get a unique fsid for the layer */ 1646 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1647 { 1648 struct super_block *sb = path->mnt->mnt_sb; 1649 unsigned int i; 1650 dev_t dev; 1651 int err; 1652 bool bad_uuid = false; 1653 bool warn = false; 1654 1655 for (i = 0; i < ofs->numfs; i++) { 1656 if (ofs->fs[i].sb == sb) 1657 return i; 1658 } 1659 1660 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1661 bad_uuid = true; 1662 if (ofs->config.xino == OVL_XINO_AUTO) { 1663 ofs->config.xino = OVL_XINO_OFF; 1664 warn = true; 1665 } 1666 if (ofs->config.index || ofs->config.nfs_export) { 1667 ofs->config.index = false; 1668 ofs->config.nfs_export = false; 1669 warn = true; 1670 } 1671 if (warn) { 1672 pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n", 1673 uuid_is_null(&sb->s_uuid) ? "null" : 1674 "conflicting", 1675 path->dentry, ovl_xino_str[ofs->config.xino]); 1676 } 1677 } 1678 1679 err = get_anon_bdev(&dev); 1680 if (err) { 1681 pr_err("failed to get anonymous bdev for lowerpath\n"); 1682 return err; 1683 } 1684 1685 ofs->fs[ofs->numfs].sb = sb; 1686 ofs->fs[ofs->numfs].pseudo_dev = dev; 1687 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1688 1689 return ofs->numfs++; 1690 } 1691 1692 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1693 struct path *stack, unsigned int numlower, 1694 struct ovl_layer *layers) 1695 { 1696 int err; 1697 unsigned int i; 1698 1699 err = -ENOMEM; 1700 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); 1701 if (ofs->fs == NULL) 1702 goto out; 1703 1704 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ 1705 ofs->numfs++; 1706 1707 /* 1708 * All lower layers that share the same fs as upper layer, use the same 1709 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1710 * only overlay to simplify ovl_fs_free(). 1711 * is_lower will be set if upper fs is shared with a lower layer. 1712 */ 1713 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1714 if (err) { 1715 pr_err("failed to get anonymous bdev for upper fs\n"); 1716 goto out; 1717 } 1718 1719 if (ovl_upper_mnt(ofs)) { 1720 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; 1721 ofs->fs[0].is_lower = false; 1722 } 1723 1724 for (i = 0; i < numlower; i++) { 1725 struct vfsmount *mnt; 1726 struct inode *trap; 1727 int fsid; 1728 1729 err = fsid = ovl_get_fsid(ofs, &stack[i]); 1730 if (err < 0) 1731 goto out; 1732 1733 /* 1734 * Check if lower root conflicts with this overlay layers before 1735 * checking if it is in-use as upperdir/workdir of "another" 1736 * mount, because we do not bother to check in ovl_is_inuse() if 1737 * the upperdir/workdir is in fact in-use by our 1738 * upperdir/workdir. 1739 */ 1740 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); 1741 if (err) 1742 goto out; 1743 1744 if (ovl_is_inuse(stack[i].dentry)) { 1745 err = ovl_report_in_use(ofs, "lowerdir"); 1746 if (err) { 1747 iput(trap); 1748 goto out; 1749 } 1750 } 1751 1752 mnt = clone_private_mount(&stack[i]); 1753 err = PTR_ERR(mnt); 1754 if (IS_ERR(mnt)) { 1755 pr_err("failed to clone lowerpath\n"); 1756 iput(trap); 1757 goto out; 1758 } 1759 1760 /* 1761 * Make lower layers R/O. That way fchmod/fchown on lower file 1762 * will fail instead of modifying lower fs. 1763 */ 1764 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1765 1766 layers[ofs->numlayer].trap = trap; 1767 layers[ofs->numlayer].mnt = mnt; 1768 layers[ofs->numlayer].idx = ofs->numlayer; 1769 layers[ofs->numlayer].fsid = fsid; 1770 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1771 ofs->numlayer++; 1772 ofs->fs[fsid].is_lower = true; 1773 } 1774 1775 /* 1776 * When all layers on same fs, overlay can use real inode numbers. 1777 * With mount option "xino=<on|auto>", mounter declares that there are 1778 * enough free high bits in underlying fs to hold the unique fsid. 1779 * If overlayfs does encounter underlying inodes using the high xino 1780 * bits reserved for fsid, it emits a warning and uses the original 1781 * inode number or a non persistent inode number allocated from a 1782 * dedicated range. 1783 */ 1784 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { 1785 if (ofs->config.xino == OVL_XINO_ON) 1786 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1787 ofs->xino_mode = 0; 1788 } else if (ofs->config.xino == OVL_XINO_OFF) { 1789 ofs->xino_mode = -1; 1790 } else if (ofs->xino_mode < 0) { 1791 /* 1792 * This is a roundup of number of bits needed for encoding 1793 * fsid, where fsid 0 is reserved for upper fs (even with 1794 * lower only overlay) +1 extra bit is reserved for the non 1795 * persistent inode number range that is used for resolving 1796 * xino lower bits overflow. 1797 */ 1798 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); 1799 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; 1800 } 1801 1802 if (ofs->xino_mode > 0) { 1803 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1804 ofs->xino_mode); 1805 } 1806 1807 err = 0; 1808 out: 1809 return err; 1810 } 1811 1812 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1813 const char *lower, unsigned int numlower, 1814 struct ovl_fs *ofs, struct ovl_layer *layers) 1815 { 1816 int err; 1817 struct path *stack = NULL; 1818 unsigned int i; 1819 struct ovl_entry *oe; 1820 1821 if (!ofs->config.upperdir && numlower == 1) { 1822 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1823 return ERR_PTR(-EINVAL); 1824 } 1825 1826 stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL); 1827 if (!stack) 1828 return ERR_PTR(-ENOMEM); 1829 1830 err = -EINVAL; 1831 for (i = 0; i < numlower; i++) { 1832 err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth); 1833 if (err) 1834 goto out_err; 1835 1836 lower = strchr(lower, '\0') + 1; 1837 } 1838 1839 err = -EINVAL; 1840 sb->s_stack_depth++; 1841 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1842 pr_err("maximum fs stacking depth exceeded\n"); 1843 goto out_err; 1844 } 1845 1846 err = ovl_get_layers(sb, ofs, stack, numlower, layers); 1847 if (err) 1848 goto out_err; 1849 1850 err = -ENOMEM; 1851 oe = ovl_alloc_entry(numlower); 1852 if (!oe) 1853 goto out_err; 1854 1855 for (i = 0; i < numlower; i++) { 1856 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1857 oe->lowerstack[i].layer = &ofs->layers[i+1]; 1858 } 1859 1860 out: 1861 for (i = 0; i < numlower; i++) 1862 path_put(&stack[i]); 1863 kfree(stack); 1864 1865 return oe; 1866 1867 out_err: 1868 oe = ERR_PTR(err); 1869 goto out; 1870 } 1871 1872 /* 1873 * Check if this layer root is a descendant of: 1874 * - another layer of this overlayfs instance 1875 * - upper/work dir of any overlayfs instance 1876 */ 1877 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1878 struct dentry *dentry, const char *name, 1879 bool is_lower) 1880 { 1881 struct dentry *next = dentry, *parent; 1882 int err = 0; 1883 1884 if (!dentry) 1885 return 0; 1886 1887 parent = dget_parent(next); 1888 1889 /* Walk back ancestors to root (inclusive) looking for traps */ 1890 while (!err && parent != next) { 1891 if (is_lower && ovl_lookup_trap_inode(sb, parent)) { 1892 err = -ELOOP; 1893 pr_err("overlapping %s path\n", name); 1894 } else if (ovl_is_inuse(parent)) { 1895 err = ovl_report_in_use(ofs, name); 1896 } 1897 next = parent; 1898 parent = dget_parent(next); 1899 dput(next); 1900 } 1901 1902 dput(parent); 1903 1904 return err; 1905 } 1906 1907 /* 1908 * Check if any of the layers or work dirs overlap. 1909 */ 1910 static int ovl_check_overlapping_layers(struct super_block *sb, 1911 struct ovl_fs *ofs) 1912 { 1913 int i, err; 1914 1915 if (ovl_upper_mnt(ofs)) { 1916 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, 1917 "upperdir", false); 1918 if (err) 1919 return err; 1920 1921 /* 1922 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1923 * this instance and covers overlapping work and index dirs, 1924 * unless work or index dir have been moved since created inside 1925 * workbasedir. In that case, we already have their traps in 1926 * inode cache and we will catch that case on lookup. 1927 */ 1928 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", 1929 false); 1930 if (err) 1931 return err; 1932 } 1933 1934 for (i = 1; i < ofs->numlayer; i++) { 1935 err = ovl_check_layer(sb, ofs, 1936 ofs->layers[i].mnt->mnt_root, 1937 "lowerdir", true); 1938 if (err) 1939 return err; 1940 } 1941 1942 return 0; 1943 } 1944 1945 static struct dentry *ovl_get_root(struct super_block *sb, 1946 struct dentry *upperdentry, 1947 struct ovl_entry *oe) 1948 { 1949 struct dentry *root; 1950 struct ovl_path *lowerpath = &oe->lowerstack[0]; 1951 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1952 int fsid = lowerpath->layer->fsid; 1953 struct ovl_inode_params oip = { 1954 .upperdentry = upperdentry, 1955 .lowerpath = lowerpath, 1956 }; 1957 1958 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1959 if (!root) 1960 return NULL; 1961 1962 root->d_fsdata = oe; 1963 1964 if (upperdentry) { 1965 /* Root inode uses upper st_ino/i_ino */ 1966 ino = d_inode(upperdentry)->i_ino; 1967 fsid = 0; 1968 ovl_dentry_set_upper_alias(root); 1969 if (ovl_is_impuredir(sb, upperdentry)) 1970 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1971 } 1972 1973 /* Root is always merge -> can have whiteouts */ 1974 ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); 1975 ovl_dentry_set_flag(OVL_E_CONNECTED, root); 1976 ovl_set_upperdata(d_inode(root)); 1977 ovl_inode_init(d_inode(root), &oip, ino, fsid); 1978 ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); 1979 1980 return root; 1981 } 1982 1983 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1984 { 1985 struct path upperpath = { }; 1986 struct dentry *root_dentry; 1987 struct ovl_entry *oe; 1988 struct ovl_fs *ofs; 1989 struct ovl_layer *layers; 1990 struct cred *cred; 1991 char *splitlower = NULL; 1992 unsigned int numlower; 1993 int err; 1994 1995 err = -EIO; 1996 if (WARN_ON(sb->s_user_ns != current_user_ns())) 1997 goto out; 1998 1999 sb->s_d_op = &ovl_dentry_operations; 2000 2001 err = -ENOMEM; 2002 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 2003 if (!ofs) 2004 goto out; 2005 2006 err = -ENOMEM; 2007 ofs->creator_cred = cred = prepare_creds(); 2008 if (!cred) 2009 goto out_err; 2010 2011 /* Is there a reason anyone would want not to share whiteouts? */ 2012 ofs->share_whiteout = true; 2013 2014 ofs->config.index = ovl_index_def; 2015 ofs->config.uuid = true; 2016 ofs->config.nfs_export = ovl_nfs_export_def; 2017 ofs->config.xino = ovl_xino_def(); 2018 ofs->config.metacopy = ovl_metacopy_def; 2019 err = ovl_parse_opt((char *) data, &ofs->config); 2020 if (err) 2021 goto out_err; 2022 2023 err = -EINVAL; 2024 if (!ofs->config.lowerdir) { 2025 if (!silent) 2026 pr_err("missing 'lowerdir'\n"); 2027 goto out_err; 2028 } 2029 2030 err = -ENOMEM; 2031 splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 2032 if (!splitlower) 2033 goto out_err; 2034 2035 err = -EINVAL; 2036 numlower = ovl_split_lowerdirs(splitlower); 2037 if (numlower > OVL_MAX_STACK) { 2038 pr_err("too many lower directories, limit is %d\n", 2039 OVL_MAX_STACK); 2040 goto out_err; 2041 } 2042 2043 err = -ENOMEM; 2044 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); 2045 if (!layers) 2046 goto out_err; 2047 2048 ofs->layers = layers; 2049 /* Layer 0 is reserved for upper even if there's no upper */ 2050 ofs->numlayer = 1; 2051 2052 sb->s_stack_depth = 0; 2053 sb->s_maxbytes = MAX_LFS_FILESIZE; 2054 atomic_long_set(&ofs->last_ino, 1); 2055 /* Assume underlying fs uses 32bit inodes unless proven otherwise */ 2056 if (ofs->config.xino != OVL_XINO_OFF) { 2057 ofs->xino_mode = BITS_PER_LONG - 32; 2058 if (!ofs->xino_mode) { 2059 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); 2060 ofs->config.xino = OVL_XINO_OFF; 2061 } 2062 } 2063 2064 /* alloc/destroy_inode needed for setting up traps in inode cache */ 2065 sb->s_op = &ovl_super_operations; 2066 2067 if (ofs->config.upperdir) { 2068 struct super_block *upper_sb; 2069 2070 err = -EINVAL; 2071 if (!ofs->config.workdir) { 2072 pr_err("missing 'workdir'\n"); 2073 goto out_err; 2074 } 2075 2076 err = ovl_get_upper(sb, ofs, &layers[0], &upperpath); 2077 if (err) 2078 goto out_err; 2079 2080 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 2081 if (!ovl_should_sync(ofs)) { 2082 ofs->errseq = errseq_sample(&upper_sb->s_wb_err); 2083 if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { 2084 err = -EIO; 2085 pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); 2086 goto out_err; 2087 } 2088 } 2089 2090 err = ovl_get_workdir(sb, ofs, &upperpath); 2091 if (err) 2092 goto out_err; 2093 2094 if (!ofs->workdir) 2095 sb->s_flags |= SB_RDONLY; 2096 2097 sb->s_stack_depth = upper_sb->s_stack_depth; 2098 sb->s_time_gran = upper_sb->s_time_gran; 2099 } 2100 oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers); 2101 err = PTR_ERR(oe); 2102 if (IS_ERR(oe)) 2103 goto out_err; 2104 2105 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 2106 if (!ovl_upper_mnt(ofs)) 2107 sb->s_flags |= SB_RDONLY; 2108 2109 if (!ofs->config.uuid && ofs->numfs > 1) { 2110 pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n"); 2111 ofs->config.uuid = true; 2112 } 2113 2114 if (!ovl_force_readonly(ofs) && ofs->config.index) { 2115 err = ovl_get_indexdir(sb, ofs, oe, &upperpath); 2116 if (err) 2117 goto out_free_oe; 2118 2119 /* Force r/o mount with no index dir */ 2120 if (!ofs->indexdir) 2121 sb->s_flags |= SB_RDONLY; 2122 } 2123 2124 err = ovl_check_overlapping_layers(sb, ofs); 2125 if (err) 2126 goto out_free_oe; 2127 2128 /* Show index=off in /proc/mounts for forced r/o mount */ 2129 if (!ofs->indexdir) { 2130 ofs->config.index = false; 2131 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { 2132 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 2133 ofs->config.nfs_export = false; 2134 } 2135 } 2136 2137 if (ofs->config.metacopy && ofs->config.nfs_export) { 2138 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 2139 ofs->config.nfs_export = false; 2140 } 2141 2142 if (ofs->config.nfs_export) 2143 sb->s_export_op = &ovl_export_operations; 2144 2145 /* Never override disk quota limits or use reserved space */ 2146 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 2147 2148 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 2149 sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : 2150 ovl_trusted_xattr_handlers; 2151 sb->s_fs_info = ofs; 2152 sb->s_flags |= SB_POSIXACL; 2153 sb->s_iflags |= SB_I_SKIP_SYNC; 2154 2155 err = -ENOMEM; 2156 root_dentry = ovl_get_root(sb, upperpath.dentry, oe); 2157 if (!root_dentry) 2158 goto out_free_oe; 2159 2160 mntput(upperpath.mnt); 2161 kfree(splitlower); 2162 2163 sb->s_root = root_dentry; 2164 2165 return 0; 2166 2167 out_free_oe: 2168 ovl_entry_stack_free(oe); 2169 kfree(oe); 2170 out_err: 2171 kfree(splitlower); 2172 path_put(&upperpath); 2173 ovl_free_fs(ofs); 2174 out: 2175 return err; 2176 } 2177 2178 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 2179 const char *dev_name, void *raw_data) 2180 { 2181 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 2182 } 2183 2184 static struct file_system_type ovl_fs_type = { 2185 .owner = THIS_MODULE, 2186 .name = "overlay", 2187 .fs_flags = FS_USERNS_MOUNT, 2188 .mount = ovl_mount, 2189 .kill_sb = kill_anon_super, 2190 }; 2191 MODULE_ALIAS_FS("overlay"); 2192 2193 static void ovl_inode_init_once(void *foo) 2194 { 2195 struct ovl_inode *oi = foo; 2196 2197 inode_init_once(&oi->vfs_inode); 2198 } 2199 2200 static int __init ovl_init(void) 2201 { 2202 int err; 2203 2204 ovl_inode_cachep = kmem_cache_create("ovl_inode", 2205 sizeof(struct ovl_inode), 0, 2206 (SLAB_RECLAIM_ACCOUNT| 2207 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 2208 ovl_inode_init_once); 2209 if (ovl_inode_cachep == NULL) 2210 return -ENOMEM; 2211 2212 err = ovl_aio_request_cache_init(); 2213 if (!err) { 2214 err = register_filesystem(&ovl_fs_type); 2215 if (!err) 2216 return 0; 2217 2218 ovl_aio_request_cache_destroy(); 2219 } 2220 kmem_cache_destroy(ovl_inode_cachep); 2221 2222 return err; 2223 } 2224 2225 static void __exit ovl_exit(void) 2226 { 2227 unregister_filesystem(&ovl_fs_type); 2228 2229 /* 2230 * Make sure all delayed rcu free inodes are flushed before we 2231 * destroy cache. 2232 */ 2233 rcu_barrier(); 2234 kmem_cache_destroy(ovl_inode_cachep); 2235 ovl_aio_request_cache_destroy(); 2236 } 2237 2238 module_init(ovl_init); 2239 module_exit(ovl_exit); 2240