1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include "overlayfs.h" 19 20 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 21 MODULE_DESCRIPTION("Overlay filesystem"); 22 MODULE_LICENSE("GPL"); 23 24 25 struct ovl_dir_cache; 26 27 #define OVL_MAX_STACK 500 28 29 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 30 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 31 MODULE_PARM_DESC(redirect_dir, 32 "Default to on or off for the redirect_dir feature"); 33 34 static bool ovl_redirect_always_follow = 35 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 36 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 37 bool, 0644); 38 MODULE_PARM_DESC(redirect_always_follow, 39 "Follow redirects even if redirect_dir feature is turned off"); 40 41 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 42 module_param_named(index, ovl_index_def, bool, 0644); 43 MODULE_PARM_DESC(index, 44 "Default to on or off for the inodes index feature"); 45 46 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 47 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 48 MODULE_PARM_DESC(nfs_export, 49 "Default to on or off for the NFS export feature"); 50 51 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 52 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 53 MODULE_PARM_DESC(xino_auto, 54 "Auto enable xino feature"); 55 56 static void ovl_entry_stack_free(struct ovl_entry *oe) 57 { 58 unsigned int i; 59 60 for (i = 0; i < oe->numlower; i++) 61 dput(oe->lowerstack[i].dentry); 62 } 63 64 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 65 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 66 MODULE_PARM_DESC(metacopy, 67 "Default to on or off for the metadata only copy up feature"); 68 69 static void ovl_dentry_release(struct dentry *dentry) 70 { 71 struct ovl_entry *oe = dentry->d_fsdata; 72 73 if (oe) { 74 ovl_entry_stack_free(oe); 75 kfree_rcu(oe, rcu); 76 } 77 } 78 79 static struct dentry *ovl_d_real(struct dentry *dentry, 80 const struct inode *inode) 81 { 82 struct dentry *real; 83 84 /* It's an overlay file */ 85 if (inode && d_inode(dentry) == inode) 86 return dentry; 87 88 if (!d_is_reg(dentry)) { 89 if (!inode || inode == d_inode(dentry)) 90 return dentry; 91 goto bug; 92 } 93 94 real = ovl_dentry_upper(dentry); 95 if (real && (inode == d_inode(real))) 96 return real; 97 98 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 99 return real; 100 101 real = ovl_dentry_lowerdata(dentry); 102 if (!real) 103 goto bug; 104 105 /* Handle recursion */ 106 real = d_real(real, inode); 107 108 if (!inode || inode == d_inode(real)) 109 return real; 110 bug: 111 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 112 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 113 return dentry; 114 } 115 116 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) 117 { 118 int ret = 1; 119 120 if (weak) { 121 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) 122 ret = d->d_op->d_weak_revalidate(d, flags); 123 } else if (d->d_flags & DCACHE_OP_REVALIDATE) { 124 ret = d->d_op->d_revalidate(d, flags); 125 if (!ret) { 126 if (!(flags & LOOKUP_RCU)) 127 d_invalidate(d); 128 ret = -ESTALE; 129 } 130 } 131 return ret; 132 } 133 134 static int ovl_dentry_revalidate_common(struct dentry *dentry, 135 unsigned int flags, bool weak) 136 { 137 struct ovl_entry *oe = dentry->d_fsdata; 138 struct dentry *upper; 139 unsigned int i; 140 int ret = 1; 141 142 upper = ovl_dentry_upper(dentry); 143 if (upper) 144 ret = ovl_revalidate_real(upper, flags, weak); 145 146 for (i = 0; ret > 0 && i < oe->numlower; i++) { 147 ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags, 148 weak); 149 } 150 return ret; 151 } 152 153 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 154 { 155 return ovl_dentry_revalidate_common(dentry, flags, false); 156 } 157 158 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 159 { 160 return ovl_dentry_revalidate_common(dentry, flags, true); 161 } 162 163 static const struct dentry_operations ovl_dentry_operations = { 164 .d_release = ovl_dentry_release, 165 .d_real = ovl_d_real, 166 .d_revalidate = ovl_dentry_revalidate, 167 .d_weak_revalidate = ovl_dentry_weak_revalidate, 168 }; 169 170 static struct kmem_cache *ovl_inode_cachep; 171 172 static struct inode *ovl_alloc_inode(struct super_block *sb) 173 { 174 struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL); 175 176 if (!oi) 177 return NULL; 178 179 oi->cache = NULL; 180 oi->redirect = NULL; 181 oi->version = 0; 182 oi->flags = 0; 183 oi->__upperdentry = NULL; 184 oi->lower = NULL; 185 oi->lowerdata = NULL; 186 mutex_init(&oi->lock); 187 188 return &oi->vfs_inode; 189 } 190 191 static void ovl_free_inode(struct inode *inode) 192 { 193 struct ovl_inode *oi = OVL_I(inode); 194 195 kfree(oi->redirect); 196 mutex_destroy(&oi->lock); 197 kmem_cache_free(ovl_inode_cachep, oi); 198 } 199 200 static void ovl_destroy_inode(struct inode *inode) 201 { 202 struct ovl_inode *oi = OVL_I(inode); 203 204 dput(oi->__upperdentry); 205 iput(oi->lower); 206 if (S_ISDIR(inode->i_mode)) 207 ovl_dir_cache_free(inode); 208 else 209 iput(oi->lowerdata); 210 } 211 212 static void ovl_free_fs(struct ovl_fs *ofs) 213 { 214 struct vfsmount **mounts; 215 unsigned i; 216 217 iput(ofs->workbasedir_trap); 218 iput(ofs->indexdir_trap); 219 iput(ofs->workdir_trap); 220 dput(ofs->whiteout); 221 dput(ofs->indexdir); 222 dput(ofs->workdir); 223 if (ofs->workdir_locked) 224 ovl_inuse_unlock(ofs->workbasedir); 225 dput(ofs->workbasedir); 226 if (ofs->upperdir_locked) 227 ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); 228 229 /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ 230 mounts = (struct vfsmount **) ofs->layers; 231 for (i = 0; i < ofs->numlayer; i++) { 232 iput(ofs->layers[i].trap); 233 mounts[i] = ofs->layers[i].mnt; 234 } 235 kern_unmount_array(mounts, ofs->numlayer); 236 kfree(ofs->layers); 237 for (i = 0; i < ofs->numfs; i++) 238 free_anon_bdev(ofs->fs[i].pseudo_dev); 239 kfree(ofs->fs); 240 241 kfree(ofs->config.lowerdir); 242 kfree(ofs->config.upperdir); 243 kfree(ofs->config.workdir); 244 kfree(ofs->config.redirect_mode); 245 if (ofs->creator_cred) 246 put_cred(ofs->creator_cred); 247 kfree(ofs); 248 } 249 250 static void ovl_put_super(struct super_block *sb) 251 { 252 struct ovl_fs *ofs = sb->s_fs_info; 253 254 ovl_free_fs(ofs); 255 } 256 257 /* Sync real dirty inodes in upper filesystem (if it exists) */ 258 static int ovl_sync_fs(struct super_block *sb, int wait) 259 { 260 struct ovl_fs *ofs = sb->s_fs_info; 261 struct super_block *upper_sb; 262 int ret; 263 264 if (!ovl_upper_mnt(ofs)) 265 return 0; 266 267 /* 268 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). 269 * All the super blocks will be iterated, including upper_sb. 270 * 271 * If this is a syncfs(2) call, then we do need to call 272 * sync_filesystem() on upper_sb, but enough if we do it when being 273 * called with wait == 1. 274 */ 275 if (!wait) 276 return 0; 277 278 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 279 280 down_read(&upper_sb->s_umount); 281 ret = sync_filesystem(upper_sb); 282 up_read(&upper_sb->s_umount); 283 284 return ret; 285 } 286 287 /** 288 * ovl_statfs 289 * @sb: The overlayfs super block 290 * @buf: The struct kstatfs to fill in with stats 291 * 292 * Get the filesystem statistics. As writes always target the upper layer 293 * filesystem pass the statfs to the upper filesystem (if it exists) 294 */ 295 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 296 { 297 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 298 struct dentry *root_dentry = dentry->d_sb->s_root; 299 struct path path; 300 int err; 301 302 ovl_path_real(root_dentry, &path); 303 304 err = vfs_statfs(&path, buf); 305 if (!err) { 306 buf->f_namelen = ofs->namelen; 307 buf->f_type = OVERLAYFS_SUPER_MAGIC; 308 } 309 310 return err; 311 } 312 313 /* Will this overlay be forced to mount/remount ro? */ 314 static bool ovl_force_readonly(struct ovl_fs *ofs) 315 { 316 return (!ovl_upper_mnt(ofs) || !ofs->workdir); 317 } 318 319 static const char *ovl_redirect_mode_def(void) 320 { 321 return ovl_redirect_dir_def ? "on" : "off"; 322 } 323 324 static const char * const ovl_xino_str[] = { 325 "off", 326 "auto", 327 "on", 328 }; 329 330 static inline int ovl_xino_def(void) 331 { 332 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 333 } 334 335 /** 336 * ovl_show_options 337 * 338 * Prints the mount options for a given superblock. 339 * Returns zero; does not fail. 340 */ 341 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 342 { 343 struct super_block *sb = dentry->d_sb; 344 struct ovl_fs *ofs = sb->s_fs_info; 345 346 seq_show_option(m, "lowerdir", ofs->config.lowerdir); 347 if (ofs->config.upperdir) { 348 seq_show_option(m, "upperdir", ofs->config.upperdir); 349 seq_show_option(m, "workdir", ofs->config.workdir); 350 } 351 if (ofs->config.default_permissions) 352 seq_puts(m, ",default_permissions"); 353 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0) 354 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode); 355 if (ofs->config.index != ovl_index_def) 356 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 357 if (ofs->config.nfs_export != ovl_nfs_export_def) 358 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 359 "on" : "off"); 360 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb)) 361 seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]); 362 if (ofs->config.metacopy != ovl_metacopy_def) 363 seq_printf(m, ",metacopy=%s", 364 ofs->config.metacopy ? "on" : "off"); 365 return 0; 366 } 367 368 static int ovl_remount(struct super_block *sb, int *flags, char *data) 369 { 370 struct ovl_fs *ofs = sb->s_fs_info; 371 struct super_block *upper_sb; 372 int ret = 0; 373 374 if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs)) 375 return -EROFS; 376 377 if (*flags & SB_RDONLY && !sb_rdonly(sb)) { 378 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 379 down_read(&upper_sb->s_umount); 380 ret = sync_filesystem(upper_sb); 381 up_read(&upper_sb->s_umount); 382 } 383 384 return ret; 385 } 386 387 static const struct super_operations ovl_super_operations = { 388 .alloc_inode = ovl_alloc_inode, 389 .free_inode = ovl_free_inode, 390 .destroy_inode = ovl_destroy_inode, 391 .drop_inode = generic_delete_inode, 392 .put_super = ovl_put_super, 393 .sync_fs = ovl_sync_fs, 394 .statfs = ovl_statfs, 395 .show_options = ovl_show_options, 396 .remount_fs = ovl_remount, 397 }; 398 399 enum { 400 OPT_LOWERDIR, 401 OPT_UPPERDIR, 402 OPT_WORKDIR, 403 OPT_DEFAULT_PERMISSIONS, 404 OPT_REDIRECT_DIR, 405 OPT_INDEX_ON, 406 OPT_INDEX_OFF, 407 OPT_NFS_EXPORT_ON, 408 OPT_NFS_EXPORT_OFF, 409 OPT_XINO_ON, 410 OPT_XINO_OFF, 411 OPT_XINO_AUTO, 412 OPT_METACOPY_ON, 413 OPT_METACOPY_OFF, 414 OPT_ERR, 415 }; 416 417 static const match_table_t ovl_tokens = { 418 {OPT_LOWERDIR, "lowerdir=%s"}, 419 {OPT_UPPERDIR, "upperdir=%s"}, 420 {OPT_WORKDIR, "workdir=%s"}, 421 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 422 {OPT_REDIRECT_DIR, "redirect_dir=%s"}, 423 {OPT_INDEX_ON, "index=on"}, 424 {OPT_INDEX_OFF, "index=off"}, 425 {OPT_NFS_EXPORT_ON, "nfs_export=on"}, 426 {OPT_NFS_EXPORT_OFF, "nfs_export=off"}, 427 {OPT_XINO_ON, "xino=on"}, 428 {OPT_XINO_OFF, "xino=off"}, 429 {OPT_XINO_AUTO, "xino=auto"}, 430 {OPT_METACOPY_ON, "metacopy=on"}, 431 {OPT_METACOPY_OFF, "metacopy=off"}, 432 {OPT_ERR, NULL} 433 }; 434 435 static char *ovl_next_opt(char **s) 436 { 437 char *sbegin = *s; 438 char *p; 439 440 if (sbegin == NULL) 441 return NULL; 442 443 for (p = sbegin; *p; p++) { 444 if (*p == '\\') { 445 p++; 446 if (!*p) 447 break; 448 } else if (*p == ',') { 449 *p = '\0'; 450 *s = p + 1; 451 return sbegin; 452 } 453 } 454 *s = NULL; 455 return sbegin; 456 } 457 458 static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode) 459 { 460 if (strcmp(mode, "on") == 0) { 461 config->redirect_dir = true; 462 /* 463 * Does not make sense to have redirect creation without 464 * redirect following. 465 */ 466 config->redirect_follow = true; 467 } else if (strcmp(mode, "follow") == 0) { 468 config->redirect_follow = true; 469 } else if (strcmp(mode, "off") == 0) { 470 if (ovl_redirect_always_follow) 471 config->redirect_follow = true; 472 } else if (strcmp(mode, "nofollow") != 0) { 473 pr_err("bad mount option \"redirect_dir=%s\"\n", 474 mode); 475 return -EINVAL; 476 } 477 478 return 0; 479 } 480 481 static int ovl_parse_opt(char *opt, struct ovl_config *config) 482 { 483 char *p; 484 int err; 485 bool metacopy_opt = false, redirect_opt = false; 486 bool nfs_export_opt = false, index_opt = false; 487 488 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL); 489 if (!config->redirect_mode) 490 return -ENOMEM; 491 492 while ((p = ovl_next_opt(&opt)) != NULL) { 493 int token; 494 substring_t args[MAX_OPT_ARGS]; 495 496 if (!*p) 497 continue; 498 499 token = match_token(p, ovl_tokens, args); 500 switch (token) { 501 case OPT_UPPERDIR: 502 kfree(config->upperdir); 503 config->upperdir = match_strdup(&args[0]); 504 if (!config->upperdir) 505 return -ENOMEM; 506 break; 507 508 case OPT_LOWERDIR: 509 kfree(config->lowerdir); 510 config->lowerdir = match_strdup(&args[0]); 511 if (!config->lowerdir) 512 return -ENOMEM; 513 break; 514 515 case OPT_WORKDIR: 516 kfree(config->workdir); 517 config->workdir = match_strdup(&args[0]); 518 if (!config->workdir) 519 return -ENOMEM; 520 break; 521 522 case OPT_DEFAULT_PERMISSIONS: 523 config->default_permissions = true; 524 break; 525 526 case OPT_REDIRECT_DIR: 527 kfree(config->redirect_mode); 528 config->redirect_mode = match_strdup(&args[0]); 529 if (!config->redirect_mode) 530 return -ENOMEM; 531 redirect_opt = true; 532 break; 533 534 case OPT_INDEX_ON: 535 config->index = true; 536 index_opt = true; 537 break; 538 539 case OPT_INDEX_OFF: 540 config->index = false; 541 index_opt = true; 542 break; 543 544 case OPT_NFS_EXPORT_ON: 545 config->nfs_export = true; 546 nfs_export_opt = true; 547 break; 548 549 case OPT_NFS_EXPORT_OFF: 550 config->nfs_export = false; 551 nfs_export_opt = true; 552 break; 553 554 case OPT_XINO_ON: 555 config->xino = OVL_XINO_ON; 556 break; 557 558 case OPT_XINO_OFF: 559 config->xino = OVL_XINO_OFF; 560 break; 561 562 case OPT_XINO_AUTO: 563 config->xino = OVL_XINO_AUTO; 564 break; 565 566 case OPT_METACOPY_ON: 567 config->metacopy = true; 568 metacopy_opt = true; 569 break; 570 571 case OPT_METACOPY_OFF: 572 config->metacopy = false; 573 metacopy_opt = true; 574 break; 575 576 default: 577 pr_err("unrecognized mount option \"%s\" or missing value\n", 578 p); 579 return -EINVAL; 580 } 581 } 582 583 /* Workdir/index are useless in non-upper mount */ 584 if (!config->upperdir) { 585 if (config->workdir) { 586 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 587 config->workdir); 588 kfree(config->workdir); 589 config->workdir = NULL; 590 } 591 if (config->index && index_opt) { 592 pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n"); 593 index_opt = false; 594 } 595 config->index = false; 596 } 597 598 err = ovl_parse_redirect_mode(config, config->redirect_mode); 599 if (err) 600 return err; 601 602 /* 603 * This is to make the logic below simpler. It doesn't make any other 604 * difference, since config->redirect_dir is only used for upper. 605 */ 606 if (!config->upperdir && config->redirect_follow) 607 config->redirect_dir = true; 608 609 /* Resolve metacopy -> redirect_dir dependency */ 610 if (config->metacopy && !config->redirect_dir) { 611 if (metacopy_opt && redirect_opt) { 612 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", 613 config->redirect_mode); 614 return -EINVAL; 615 } 616 if (redirect_opt) { 617 /* 618 * There was an explicit redirect_dir=... that resulted 619 * in this conflict. 620 */ 621 pr_info("disabling metacopy due to redirect_dir=%s\n", 622 config->redirect_mode); 623 config->metacopy = false; 624 } else { 625 /* Automatically enable redirect otherwise. */ 626 config->redirect_follow = config->redirect_dir = true; 627 } 628 } 629 630 /* Resolve nfs_export -> index dependency */ 631 if (config->nfs_export && !config->index) { 632 if (!config->upperdir && config->redirect_follow) { 633 pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 634 config->nfs_export = false; 635 } else if (nfs_export_opt && index_opt) { 636 pr_err("conflicting options: nfs_export=on,index=off\n"); 637 return -EINVAL; 638 } else if (index_opt) { 639 /* 640 * There was an explicit index=off that resulted 641 * in this conflict. 642 */ 643 pr_info("disabling nfs_export due to index=off\n"); 644 config->nfs_export = false; 645 } else { 646 /* Automatically enable index otherwise. */ 647 config->index = true; 648 } 649 } 650 651 /* Resolve nfs_export -> !metacopy dependency */ 652 if (config->nfs_export && config->metacopy) { 653 if (nfs_export_opt && metacopy_opt) { 654 pr_err("conflicting options: nfs_export=on,metacopy=on\n"); 655 return -EINVAL; 656 } 657 if (metacopy_opt) { 658 /* 659 * There was an explicit metacopy=on that resulted 660 * in this conflict. 661 */ 662 pr_info("disabling nfs_export due to metacopy=on\n"); 663 config->nfs_export = false; 664 } else { 665 /* 666 * There was an explicit nfs_export=on that resulted 667 * in this conflict. 668 */ 669 pr_info("disabling metacopy due to nfs_export=on\n"); 670 config->metacopy = false; 671 } 672 } 673 674 return 0; 675 } 676 677 #define OVL_WORKDIR_NAME "work" 678 #define OVL_INDEXDIR_NAME "index" 679 680 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 681 const char *name, bool persist) 682 { 683 struct inode *dir = ofs->workbasedir->d_inode; 684 struct vfsmount *mnt = ovl_upper_mnt(ofs); 685 struct dentry *work; 686 int err; 687 bool retried = false; 688 689 inode_lock_nested(dir, I_MUTEX_PARENT); 690 retry: 691 work = lookup_one_len(name, ofs->workbasedir, strlen(name)); 692 693 if (!IS_ERR(work)) { 694 struct iattr attr = { 695 .ia_valid = ATTR_MODE, 696 .ia_mode = S_IFDIR | 0, 697 }; 698 699 if (work->d_inode) { 700 err = -EEXIST; 701 if (retried) 702 goto out_dput; 703 704 if (persist) 705 goto out_unlock; 706 707 retried = true; 708 ovl_workdir_cleanup(dir, mnt, work, 0); 709 dput(work); 710 goto retry; 711 } 712 713 work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); 714 err = PTR_ERR(work); 715 if (IS_ERR(work)) 716 goto out_err; 717 718 /* 719 * Try to remove POSIX ACL xattrs from workdir. We are good if: 720 * 721 * a) success (there was a POSIX ACL xattr and was removed) 722 * b) -ENODATA (there was no POSIX ACL xattr) 723 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 724 * 725 * There are various other error values that could effectively 726 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 727 * if the xattr name is too long), but the set of filesystems 728 * allowed as upper are limited to "normal" ones, where checking 729 * for the above two errors is sufficient. 730 */ 731 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 732 if (err && err != -ENODATA && err != -EOPNOTSUPP) 733 goto out_dput; 734 735 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 736 if (err && err != -ENODATA && err != -EOPNOTSUPP) 737 goto out_dput; 738 739 /* Clear any inherited mode bits */ 740 inode_lock(work->d_inode); 741 err = notify_change(work, &attr, NULL); 742 inode_unlock(work->d_inode); 743 if (err) 744 goto out_dput; 745 } else { 746 err = PTR_ERR(work); 747 goto out_err; 748 } 749 out_unlock: 750 inode_unlock(dir); 751 return work; 752 753 out_dput: 754 dput(work); 755 out_err: 756 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 757 ofs->config.workdir, name, -err); 758 work = NULL; 759 goto out_unlock; 760 } 761 762 static void ovl_unescape(char *s) 763 { 764 char *d = s; 765 766 for (;; s++, d++) { 767 if (*s == '\\') 768 s++; 769 *d = *s; 770 if (!*s) 771 break; 772 } 773 } 774 775 static int ovl_mount_dir_noesc(const char *name, struct path *path) 776 { 777 int err = -EINVAL; 778 779 if (!*name) { 780 pr_err("empty lowerdir\n"); 781 goto out; 782 } 783 err = kern_path(name, LOOKUP_FOLLOW, path); 784 if (err) { 785 pr_err("failed to resolve '%s': %i\n", name, err); 786 goto out; 787 } 788 err = -EINVAL; 789 if (ovl_dentry_weird(path->dentry)) { 790 pr_err("filesystem on '%s' not supported\n", name); 791 goto out_put; 792 } 793 if (!d_is_dir(path->dentry)) { 794 pr_err("'%s' not a directory\n", name); 795 goto out_put; 796 } 797 return 0; 798 799 out_put: 800 path_put_init(path); 801 out: 802 return err; 803 } 804 805 static int ovl_mount_dir(const char *name, struct path *path) 806 { 807 int err = -ENOMEM; 808 char *tmp = kstrdup(name, GFP_KERNEL); 809 810 if (tmp) { 811 ovl_unescape(tmp); 812 err = ovl_mount_dir_noesc(tmp, path); 813 814 if (!err && path->dentry->d_flags & DCACHE_OP_REAL) { 815 pr_err("filesystem on '%s' not supported as upperdir\n", 816 tmp); 817 path_put_init(path); 818 err = -EINVAL; 819 } 820 kfree(tmp); 821 } 822 return err; 823 } 824 825 static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs, 826 const char *name) 827 { 828 struct kstatfs statfs; 829 int err = vfs_statfs(path, &statfs); 830 831 if (err) 832 pr_err("statfs failed on '%s'\n", name); 833 else 834 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 835 836 return err; 837 } 838 839 static int ovl_lower_dir(const char *name, struct path *path, 840 struct ovl_fs *ofs, int *stack_depth) 841 { 842 int fh_type; 843 int err; 844 845 err = ovl_mount_dir_noesc(name, path); 846 if (err) 847 return err; 848 849 err = ovl_check_namelen(path, ofs, name); 850 if (err) 851 return err; 852 853 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 854 855 /* 856 * The inodes index feature and NFS export need to encode and decode 857 * file handles, so they require that all layers support them. 858 */ 859 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 860 if ((ofs->config.nfs_export || 861 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 862 ofs->config.index = false; 863 ofs->config.nfs_export = false; 864 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 865 name); 866 } 867 868 /* Check if lower fs has 32bit inode numbers */ 869 if (fh_type != FILEID_INO32_GEN) 870 ofs->xino_mode = -1; 871 872 return 0; 873 } 874 875 /* Workdir should not be subdir of upperdir and vice versa */ 876 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 877 { 878 bool ok = false; 879 880 if (workdir != upperdir) { 881 ok = (lock_rename(workdir, upperdir) == NULL); 882 unlock_rename(workdir, upperdir); 883 } 884 return ok; 885 } 886 887 static unsigned int ovl_split_lowerdirs(char *str) 888 { 889 unsigned int ctr = 1; 890 char *s, *d; 891 892 for (s = d = str;; s++, d++) { 893 if (*s == '\\') { 894 s++; 895 } else if (*s == ':') { 896 *d = '\0'; 897 ctr++; 898 continue; 899 } 900 *d = *s; 901 if (!*s) 902 break; 903 } 904 return ctr; 905 } 906 907 static int __maybe_unused 908 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 909 struct dentry *dentry, struct inode *inode, 910 const char *name, void *buffer, size_t size) 911 { 912 return ovl_xattr_get(dentry, inode, handler->name, buffer, size); 913 } 914 915 static int __maybe_unused 916 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 917 struct dentry *dentry, struct inode *inode, 918 const char *name, const void *value, 919 size_t size, int flags) 920 { 921 struct dentry *workdir = ovl_workdir(dentry); 922 struct inode *realinode = ovl_inode_real(inode); 923 struct posix_acl *acl = NULL; 924 int err; 925 926 /* Check that everything is OK before copy-up */ 927 if (value) { 928 acl = posix_acl_from_xattr(&init_user_ns, value, size); 929 if (IS_ERR(acl)) 930 return PTR_ERR(acl); 931 } 932 err = -EOPNOTSUPP; 933 if (!IS_POSIXACL(d_inode(workdir))) 934 goto out_acl_release; 935 if (!realinode->i_op->set_acl) 936 goto out_acl_release; 937 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 938 err = acl ? -EACCES : 0; 939 goto out_acl_release; 940 } 941 err = -EPERM; 942 if (!inode_owner_or_capable(inode)) 943 goto out_acl_release; 944 945 posix_acl_release(acl); 946 947 /* 948 * Check if sgid bit needs to be cleared (actual setacl operation will 949 * be done with mounter's capabilities and so that won't do it for us). 950 */ 951 if (unlikely(inode->i_mode & S_ISGID) && 952 handler->flags == ACL_TYPE_ACCESS && 953 !in_group_p(inode->i_gid) && 954 !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { 955 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 956 957 err = ovl_setattr(dentry, &iattr); 958 if (err) 959 return err; 960 } 961 962 err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags); 963 if (!err) 964 ovl_copyattr(ovl_inode_real(inode), inode); 965 966 return err; 967 968 out_acl_release: 969 posix_acl_release(acl); 970 return err; 971 } 972 973 static int ovl_own_xattr_get(const struct xattr_handler *handler, 974 struct dentry *dentry, struct inode *inode, 975 const char *name, void *buffer, size_t size) 976 { 977 return -EOPNOTSUPP; 978 } 979 980 static int ovl_own_xattr_set(const struct xattr_handler *handler, 981 struct dentry *dentry, struct inode *inode, 982 const char *name, const void *value, 983 size_t size, int flags) 984 { 985 return -EOPNOTSUPP; 986 } 987 988 static int ovl_other_xattr_get(const struct xattr_handler *handler, 989 struct dentry *dentry, struct inode *inode, 990 const char *name, void *buffer, size_t size) 991 { 992 return ovl_xattr_get(dentry, inode, name, buffer, size); 993 } 994 995 static int ovl_other_xattr_set(const struct xattr_handler *handler, 996 struct dentry *dentry, struct inode *inode, 997 const char *name, const void *value, 998 size_t size, int flags) 999 { 1000 return ovl_xattr_set(dentry, inode, name, value, size, flags); 1001 } 1002 1003 static const struct xattr_handler __maybe_unused 1004 ovl_posix_acl_access_xattr_handler = { 1005 .name = XATTR_NAME_POSIX_ACL_ACCESS, 1006 .flags = ACL_TYPE_ACCESS, 1007 .get = ovl_posix_acl_xattr_get, 1008 .set = ovl_posix_acl_xattr_set, 1009 }; 1010 1011 static const struct xattr_handler __maybe_unused 1012 ovl_posix_acl_default_xattr_handler = { 1013 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 1014 .flags = ACL_TYPE_DEFAULT, 1015 .get = ovl_posix_acl_xattr_get, 1016 .set = ovl_posix_acl_xattr_set, 1017 }; 1018 1019 static const struct xattr_handler ovl_own_xattr_handler = { 1020 .prefix = OVL_XATTR_PREFIX, 1021 .get = ovl_own_xattr_get, 1022 .set = ovl_own_xattr_set, 1023 }; 1024 1025 static const struct xattr_handler ovl_other_xattr_handler = { 1026 .prefix = "", /* catch all */ 1027 .get = ovl_other_xattr_get, 1028 .set = ovl_other_xattr_set, 1029 }; 1030 1031 static const struct xattr_handler *ovl_xattr_handlers[] = { 1032 #ifdef CONFIG_FS_POSIX_ACL 1033 &ovl_posix_acl_access_xattr_handler, 1034 &ovl_posix_acl_default_xattr_handler, 1035 #endif 1036 &ovl_own_xattr_handler, 1037 &ovl_other_xattr_handler, 1038 NULL 1039 }; 1040 1041 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 1042 struct inode **ptrap, const char *name) 1043 { 1044 struct inode *trap; 1045 int err; 1046 1047 trap = ovl_get_trap_inode(sb, dir); 1048 err = PTR_ERR_OR_ZERO(trap); 1049 if (err) { 1050 if (err == -ELOOP) 1051 pr_err("conflicting %s path\n", name); 1052 return err; 1053 } 1054 1055 *ptrap = trap; 1056 return 0; 1057 } 1058 1059 /* 1060 * Determine how we treat concurrent use of upperdir/workdir based on the 1061 * index feature. This is papering over mount leaks of container runtimes, 1062 * for example, an old overlay mount is leaked and now its upperdir is 1063 * attempted to be used as a lower layer in a new overlay mount. 1064 */ 1065 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 1066 { 1067 if (ofs->config.index) { 1068 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 1069 name); 1070 return -EBUSY; 1071 } else { 1072 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 1073 name); 1074 return 0; 1075 } 1076 } 1077 1078 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 1079 struct ovl_layer *upper_layer, struct path *upperpath) 1080 { 1081 struct vfsmount *upper_mnt; 1082 int err; 1083 1084 err = ovl_mount_dir(ofs->config.upperdir, upperpath); 1085 if (err) 1086 goto out; 1087 1088 /* Upper fs should not be r/o */ 1089 if (sb_rdonly(upperpath->mnt->mnt_sb)) { 1090 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 1091 err = -EINVAL; 1092 goto out; 1093 } 1094 1095 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 1096 if (err) 1097 goto out; 1098 1099 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, 1100 "upperdir"); 1101 if (err) 1102 goto out; 1103 1104 upper_mnt = clone_private_mount(upperpath); 1105 err = PTR_ERR(upper_mnt); 1106 if (IS_ERR(upper_mnt)) { 1107 pr_err("failed to clone upperpath\n"); 1108 goto out; 1109 } 1110 1111 /* Don't inherit atime flags */ 1112 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1113 upper_layer->mnt = upper_mnt; 1114 upper_layer->idx = 0; 1115 upper_layer->fsid = 0; 1116 1117 /* 1118 * Inherit SB_NOSEC flag from upperdir. 1119 * 1120 * This optimization changes behavior when a security related attribute 1121 * (suid/sgid/security.*) is changed on an underlying layer. This is 1122 * okay because we don't yet have guarantees in that case, but it will 1123 * need careful treatment once we want to honour changes to underlying 1124 * filesystems. 1125 */ 1126 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) 1127 sb->s_flags |= SB_NOSEC; 1128 1129 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { 1130 ofs->upperdir_locked = true; 1131 } else { 1132 err = ovl_report_in_use(ofs, "upperdir"); 1133 if (err) 1134 goto out; 1135 } 1136 1137 err = 0; 1138 out: 1139 return err; 1140 } 1141 1142 /* 1143 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and 1144 * negative values if error is encountered. 1145 */ 1146 static int ovl_check_rename_whiteout(struct dentry *workdir) 1147 { 1148 struct inode *dir = d_inode(workdir); 1149 struct dentry *temp; 1150 struct dentry *dest; 1151 struct dentry *whiteout; 1152 struct name_snapshot name; 1153 int err; 1154 1155 inode_lock_nested(dir, I_MUTEX_PARENT); 1156 1157 temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0)); 1158 err = PTR_ERR(temp); 1159 if (IS_ERR(temp)) 1160 goto out_unlock; 1161 1162 dest = ovl_lookup_temp(workdir); 1163 err = PTR_ERR(dest); 1164 if (IS_ERR(dest)) { 1165 dput(temp); 1166 goto out_unlock; 1167 } 1168 1169 /* Name is inline and stable - using snapshot as a copy helper */ 1170 take_dentry_name_snapshot(&name, temp); 1171 err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT); 1172 if (err) { 1173 if (err == -EINVAL) 1174 err = 0; 1175 goto cleanup_temp; 1176 } 1177 1178 whiteout = lookup_one_len(name.name.name, workdir, name.name.len); 1179 err = PTR_ERR(whiteout); 1180 if (IS_ERR(whiteout)) 1181 goto cleanup_temp; 1182 1183 err = ovl_is_whiteout(whiteout); 1184 1185 /* Best effort cleanup of whiteout and temp file */ 1186 if (err) 1187 ovl_cleanup(dir, whiteout); 1188 dput(whiteout); 1189 1190 cleanup_temp: 1191 ovl_cleanup(dir, temp); 1192 release_dentry_name_snapshot(&name); 1193 dput(temp); 1194 dput(dest); 1195 1196 out_unlock: 1197 inode_unlock(dir); 1198 1199 return err; 1200 } 1201 1202 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1203 struct path *workpath) 1204 { 1205 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1206 struct dentry *temp; 1207 bool rename_whiteout; 1208 bool d_type; 1209 int fh_type; 1210 int err; 1211 1212 err = mnt_want_write(mnt); 1213 if (err) 1214 return err; 1215 1216 ofs->workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1217 if (!ofs->workdir) 1218 goto out; 1219 1220 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1221 if (err) 1222 goto out; 1223 1224 /* 1225 * Upper should support d_type, else whiteouts are visible. Given 1226 * workdir and upper are on same fs, we can do iterate_dir() on 1227 * workdir. This check requires successful creation of workdir in 1228 * previous step. 1229 */ 1230 err = ovl_check_d_type_supported(workpath); 1231 if (err < 0) 1232 goto out; 1233 1234 d_type = err; 1235 if (!d_type) 1236 pr_warn("upper fs needs to support d_type.\n"); 1237 1238 /* Check if upper/work fs supports O_TMPFILE */ 1239 temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0); 1240 ofs->tmpfile = !IS_ERR(temp); 1241 if (ofs->tmpfile) 1242 dput(temp); 1243 else 1244 pr_warn("upper fs does not support tmpfile.\n"); 1245 1246 1247 /* Check if upper/work fs supports RENAME_WHITEOUT */ 1248 err = ovl_check_rename_whiteout(ofs->workdir); 1249 if (err < 0) 1250 goto out; 1251 1252 rename_whiteout = err; 1253 if (!rename_whiteout) 1254 pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); 1255 1256 /* 1257 * Check if upper/work fs supports trusted.overlay.* xattr 1258 */ 1259 err = ovl_do_setxattr(ofs->workdir, OVL_XATTR_OPAQUE, "0", 1, 0); 1260 if (err) { 1261 ofs->noxattr = true; 1262 ofs->config.index = false; 1263 ofs->config.metacopy = false; 1264 pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n"); 1265 err = 0; 1266 } else { 1267 vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE); 1268 } 1269 1270 /* 1271 * We allowed sub-optimal upper fs configuration and don't want to break 1272 * users over kernel upgrade, but we never allowed remote upper fs, so 1273 * we can enforce strict requirements for remote upper fs. 1274 */ 1275 if (ovl_dentry_remote(ofs->workdir) && 1276 (!d_type || !rename_whiteout || ofs->noxattr)) { 1277 pr_err("upper fs missing required features.\n"); 1278 err = -EINVAL; 1279 goto out; 1280 } 1281 1282 /* Check if upper/work fs supports file handles */ 1283 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1284 if (ofs->config.index && !fh_type) { 1285 ofs->config.index = false; 1286 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 1287 } 1288 1289 /* Check if upper fs has 32bit inode numbers */ 1290 if (fh_type != FILEID_INO32_GEN) 1291 ofs->xino_mode = -1; 1292 1293 /* NFS export of r/w mount depends on index */ 1294 if (ofs->config.nfs_export && !ofs->config.index) { 1295 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1296 ofs->config.nfs_export = false; 1297 } 1298 out: 1299 mnt_drop_write(mnt); 1300 return err; 1301 } 1302 1303 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1304 struct path *upperpath) 1305 { 1306 int err; 1307 struct path workpath = { }; 1308 1309 err = ovl_mount_dir(ofs->config.workdir, &workpath); 1310 if (err) 1311 goto out; 1312 1313 err = -EINVAL; 1314 if (upperpath->mnt != workpath.mnt) { 1315 pr_err("workdir and upperdir must reside under the same mount\n"); 1316 goto out; 1317 } 1318 if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) { 1319 pr_err("workdir and upperdir must be separate subtrees\n"); 1320 goto out; 1321 } 1322 1323 ofs->workbasedir = dget(workpath.dentry); 1324 1325 if (ovl_inuse_trylock(ofs->workbasedir)) { 1326 ofs->workdir_locked = true; 1327 } else { 1328 err = ovl_report_in_use(ofs, "workdir"); 1329 if (err) 1330 goto out; 1331 } 1332 1333 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1334 "workdir"); 1335 if (err) 1336 goto out; 1337 1338 err = ovl_make_workdir(sb, ofs, &workpath); 1339 1340 out: 1341 path_put(&workpath); 1342 1343 return err; 1344 } 1345 1346 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1347 struct ovl_entry *oe, struct path *upperpath) 1348 { 1349 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1350 int err; 1351 1352 err = mnt_want_write(mnt); 1353 if (err) 1354 return err; 1355 1356 /* Verify lower root is upper root origin */ 1357 err = ovl_verify_origin(upperpath->dentry, oe->lowerstack[0].dentry, 1358 true); 1359 if (err) { 1360 pr_err("failed to verify upper root origin\n"); 1361 goto out; 1362 } 1363 1364 /* index dir will act also as workdir */ 1365 iput(ofs->workdir_trap); 1366 ofs->workdir_trap = NULL; 1367 dput(ofs->workdir); 1368 ofs->workdir = NULL; 1369 ofs->indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1370 if (ofs->indexdir) { 1371 ofs->workdir = dget(ofs->indexdir); 1372 1373 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1374 "indexdir"); 1375 if (err) 1376 goto out; 1377 1378 /* 1379 * Verify upper root is exclusively associated with index dir. 1380 * Older kernels stored upper fh in "trusted.overlay.origin" 1381 * xattr. If that xattr exists, verify that it is a match to 1382 * upper dir file handle. In any case, verify or set xattr 1383 * "trusted.overlay.upper" to indicate that index may have 1384 * directory entries. 1385 */ 1386 if (ovl_check_origin_xattr(ofs->indexdir)) { 1387 err = ovl_verify_set_fh(ofs->indexdir, OVL_XATTR_ORIGIN, 1388 upperpath->dentry, true, false); 1389 if (err) 1390 pr_err("failed to verify index dir 'origin' xattr\n"); 1391 } 1392 err = ovl_verify_upper(ofs->indexdir, upperpath->dentry, true); 1393 if (err) 1394 pr_err("failed to verify index dir 'upper' xattr\n"); 1395 1396 /* Cleanup bad/stale/orphan index entries */ 1397 if (!err) 1398 err = ovl_indexdir_cleanup(ofs); 1399 } 1400 if (err || !ofs->indexdir) 1401 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1402 1403 out: 1404 mnt_drop_write(mnt); 1405 return err; 1406 } 1407 1408 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1409 { 1410 unsigned int i; 1411 1412 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) 1413 return true; 1414 1415 /* 1416 * We allow using single lower with null uuid for index and nfs_export 1417 * for example to support those features with single lower squashfs. 1418 * To avoid regressions in setups of overlay with re-formatted lower 1419 * squashfs, do not allow decoding origin with lower null uuid unless 1420 * user opted-in to one of the new features that require following the 1421 * lower inode of non-dir upper. 1422 */ 1423 if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino && 1424 uuid_is_null(uuid)) 1425 return false; 1426 1427 for (i = 0; i < ofs->numfs; i++) { 1428 /* 1429 * We use uuid to associate an overlay lower file handle with a 1430 * lower layer, so we can accept lower fs with null uuid as long 1431 * as all lower layers with null uuid are on the same fs. 1432 * if we detect multiple lower fs with the same uuid, we 1433 * disable lower file handle decoding on all of them. 1434 */ 1435 if (ofs->fs[i].is_lower && 1436 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 1437 ofs->fs[i].bad_uuid = true; 1438 return false; 1439 } 1440 } 1441 return true; 1442 } 1443 1444 /* Get a unique fsid for the layer */ 1445 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1446 { 1447 struct super_block *sb = path->mnt->mnt_sb; 1448 unsigned int i; 1449 dev_t dev; 1450 int err; 1451 bool bad_uuid = false; 1452 1453 for (i = 0; i < ofs->numfs; i++) { 1454 if (ofs->fs[i].sb == sb) 1455 return i; 1456 } 1457 1458 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1459 bad_uuid = true; 1460 if (ofs->config.index || ofs->config.nfs_export) { 1461 ofs->config.index = false; 1462 ofs->config.nfs_export = false; 1463 pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n", 1464 uuid_is_null(&sb->s_uuid) ? "null" : 1465 "conflicting", 1466 path->dentry); 1467 } 1468 } 1469 1470 err = get_anon_bdev(&dev); 1471 if (err) { 1472 pr_err("failed to get anonymous bdev for lowerpath\n"); 1473 return err; 1474 } 1475 1476 ofs->fs[ofs->numfs].sb = sb; 1477 ofs->fs[ofs->numfs].pseudo_dev = dev; 1478 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1479 1480 return ofs->numfs++; 1481 } 1482 1483 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1484 struct path *stack, unsigned int numlower, 1485 struct ovl_layer *layers) 1486 { 1487 int err; 1488 unsigned int i; 1489 1490 err = -ENOMEM; 1491 ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL); 1492 if (ofs->fs == NULL) 1493 goto out; 1494 1495 /* idx/fsid 0 are reserved for upper fs even with lower only overlay */ 1496 ofs->numfs++; 1497 1498 /* 1499 * All lower layers that share the same fs as upper layer, use the same 1500 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1501 * only overlay to simplify ovl_fs_free(). 1502 * is_lower will be set if upper fs is shared with a lower layer. 1503 */ 1504 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1505 if (err) { 1506 pr_err("failed to get anonymous bdev for upper fs\n"); 1507 goto out; 1508 } 1509 1510 if (ovl_upper_mnt(ofs)) { 1511 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; 1512 ofs->fs[0].is_lower = false; 1513 } 1514 1515 for (i = 0; i < numlower; i++) { 1516 struct vfsmount *mnt; 1517 struct inode *trap; 1518 int fsid; 1519 1520 err = fsid = ovl_get_fsid(ofs, &stack[i]); 1521 if (err < 0) 1522 goto out; 1523 1524 /* 1525 * Check if lower root conflicts with this overlay layers before 1526 * checking if it is in-use as upperdir/workdir of "another" 1527 * mount, because we do not bother to check in ovl_is_inuse() if 1528 * the upperdir/workdir is in fact in-use by our 1529 * upperdir/workdir. 1530 */ 1531 err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); 1532 if (err) 1533 goto out; 1534 1535 if (ovl_is_inuse(stack[i].dentry)) { 1536 err = ovl_report_in_use(ofs, "lowerdir"); 1537 if (err) { 1538 iput(trap); 1539 goto out; 1540 } 1541 } 1542 1543 mnt = clone_private_mount(&stack[i]); 1544 err = PTR_ERR(mnt); 1545 if (IS_ERR(mnt)) { 1546 pr_err("failed to clone lowerpath\n"); 1547 iput(trap); 1548 goto out; 1549 } 1550 1551 /* 1552 * Make lower layers R/O. That way fchmod/fchown on lower file 1553 * will fail instead of modifying lower fs. 1554 */ 1555 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1556 1557 layers[ofs->numlayer].trap = trap; 1558 layers[ofs->numlayer].mnt = mnt; 1559 layers[ofs->numlayer].idx = ofs->numlayer; 1560 layers[ofs->numlayer].fsid = fsid; 1561 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1562 ofs->numlayer++; 1563 ofs->fs[fsid].is_lower = true; 1564 } 1565 1566 /* 1567 * When all layers on same fs, overlay can use real inode numbers. 1568 * With mount option "xino=<on|auto>", mounter declares that there are 1569 * enough free high bits in underlying fs to hold the unique fsid. 1570 * If overlayfs does encounter underlying inodes using the high xino 1571 * bits reserved for fsid, it emits a warning and uses the original 1572 * inode number or a non persistent inode number allocated from a 1573 * dedicated range. 1574 */ 1575 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { 1576 if (ofs->config.xino == OVL_XINO_ON) 1577 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1578 ofs->xino_mode = 0; 1579 } else if (ofs->config.xino == OVL_XINO_OFF) { 1580 ofs->xino_mode = -1; 1581 } else if (ofs->xino_mode < 0) { 1582 /* 1583 * This is a roundup of number of bits needed for encoding 1584 * fsid, where fsid 0 is reserved for upper fs (even with 1585 * lower only overlay) +1 extra bit is reserved for the non 1586 * persistent inode number range that is used for resolving 1587 * xino lower bits overflow. 1588 */ 1589 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); 1590 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; 1591 } 1592 1593 if (ofs->xino_mode > 0) { 1594 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1595 ofs->xino_mode); 1596 } 1597 1598 err = 0; 1599 out: 1600 return err; 1601 } 1602 1603 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1604 const char *lower, unsigned int numlower, 1605 struct ovl_fs *ofs, struct ovl_layer *layers) 1606 { 1607 int err; 1608 struct path *stack = NULL; 1609 unsigned int i; 1610 struct ovl_entry *oe; 1611 1612 if (!ofs->config.upperdir && numlower == 1) { 1613 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1614 return ERR_PTR(-EINVAL); 1615 } 1616 1617 stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL); 1618 if (!stack) 1619 return ERR_PTR(-ENOMEM); 1620 1621 err = -EINVAL; 1622 for (i = 0; i < numlower; i++) { 1623 err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth); 1624 if (err) 1625 goto out_err; 1626 1627 lower = strchr(lower, '\0') + 1; 1628 } 1629 1630 err = -EINVAL; 1631 sb->s_stack_depth++; 1632 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1633 pr_err("maximum fs stacking depth exceeded\n"); 1634 goto out_err; 1635 } 1636 1637 err = ovl_get_layers(sb, ofs, stack, numlower, layers); 1638 if (err) 1639 goto out_err; 1640 1641 err = -ENOMEM; 1642 oe = ovl_alloc_entry(numlower); 1643 if (!oe) 1644 goto out_err; 1645 1646 for (i = 0; i < numlower; i++) { 1647 oe->lowerstack[i].dentry = dget(stack[i].dentry); 1648 oe->lowerstack[i].layer = &ofs->layers[i+1]; 1649 } 1650 1651 out: 1652 for (i = 0; i < numlower; i++) 1653 path_put(&stack[i]); 1654 kfree(stack); 1655 1656 return oe; 1657 1658 out_err: 1659 oe = ERR_PTR(err); 1660 goto out; 1661 } 1662 1663 /* 1664 * Check if this layer root is a descendant of: 1665 * - another layer of this overlayfs instance 1666 * - upper/work dir of any overlayfs instance 1667 */ 1668 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1669 struct dentry *dentry, const char *name) 1670 { 1671 struct dentry *next = dentry, *parent; 1672 int err = 0; 1673 1674 if (!dentry) 1675 return 0; 1676 1677 parent = dget_parent(next); 1678 1679 /* Walk back ancestors to root (inclusive) looking for traps */ 1680 while (!err && parent != next) { 1681 if (ovl_lookup_trap_inode(sb, parent)) { 1682 err = -ELOOP; 1683 pr_err("overlapping %s path\n", name); 1684 } else if (ovl_is_inuse(parent)) { 1685 err = ovl_report_in_use(ofs, name); 1686 } 1687 next = parent; 1688 parent = dget_parent(next); 1689 dput(next); 1690 } 1691 1692 dput(parent); 1693 1694 return err; 1695 } 1696 1697 /* 1698 * Check if any of the layers or work dirs overlap. 1699 */ 1700 static int ovl_check_overlapping_layers(struct super_block *sb, 1701 struct ovl_fs *ofs) 1702 { 1703 int i, err; 1704 1705 if (ovl_upper_mnt(ofs)) { 1706 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, 1707 "upperdir"); 1708 if (err) 1709 return err; 1710 1711 /* 1712 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1713 * this instance and covers overlapping work and index dirs, 1714 * unless work or index dir have been moved since created inside 1715 * workbasedir. In that case, we already have their traps in 1716 * inode cache and we will catch that case on lookup. 1717 */ 1718 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); 1719 if (err) 1720 return err; 1721 } 1722 1723 for (i = 1; i < ofs->numlayer; i++) { 1724 err = ovl_check_layer(sb, ofs, 1725 ofs->layers[i].mnt->mnt_root, 1726 "lowerdir"); 1727 if (err) 1728 return err; 1729 } 1730 1731 return 0; 1732 } 1733 1734 static struct dentry *ovl_get_root(struct super_block *sb, 1735 struct dentry *upperdentry, 1736 struct ovl_entry *oe) 1737 { 1738 struct dentry *root; 1739 struct ovl_path *lowerpath = &oe->lowerstack[0]; 1740 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1741 int fsid = lowerpath->layer->fsid; 1742 struct ovl_inode_params oip = { 1743 .upperdentry = upperdentry, 1744 .lowerpath = lowerpath, 1745 }; 1746 1747 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1748 if (!root) 1749 return NULL; 1750 1751 root->d_fsdata = oe; 1752 1753 if (upperdentry) { 1754 /* Root inode uses upper st_ino/i_ino */ 1755 ino = d_inode(upperdentry)->i_ino; 1756 fsid = 0; 1757 ovl_dentry_set_upper_alias(root); 1758 if (ovl_is_impuredir(upperdentry)) 1759 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1760 } 1761 1762 /* Root is always merge -> can have whiteouts */ 1763 ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); 1764 ovl_dentry_set_flag(OVL_E_CONNECTED, root); 1765 ovl_set_upperdata(d_inode(root)); 1766 ovl_inode_init(d_inode(root), &oip, ino, fsid); 1767 ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE); 1768 1769 return root; 1770 } 1771 1772 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1773 { 1774 struct path upperpath = { }; 1775 struct dentry *root_dentry; 1776 struct ovl_entry *oe; 1777 struct ovl_fs *ofs; 1778 struct ovl_layer *layers; 1779 struct cred *cred; 1780 char *splitlower = NULL; 1781 unsigned int numlower; 1782 int err; 1783 1784 sb->s_d_op = &ovl_dentry_operations; 1785 1786 err = -ENOMEM; 1787 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1788 if (!ofs) 1789 goto out; 1790 1791 ofs->creator_cred = cred = prepare_creds(); 1792 if (!cred) 1793 goto out_err; 1794 1795 /* Is there a reason anyone would want not to share whiteouts? */ 1796 ofs->share_whiteout = true; 1797 1798 ofs->config.index = ovl_index_def; 1799 ofs->config.nfs_export = ovl_nfs_export_def; 1800 ofs->config.xino = ovl_xino_def(); 1801 ofs->config.metacopy = ovl_metacopy_def; 1802 err = ovl_parse_opt((char *) data, &ofs->config); 1803 if (err) 1804 goto out_err; 1805 1806 err = -EINVAL; 1807 if (!ofs->config.lowerdir) { 1808 if (!silent) 1809 pr_err("missing 'lowerdir'\n"); 1810 goto out_err; 1811 } 1812 1813 err = -ENOMEM; 1814 splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL); 1815 if (!splitlower) 1816 goto out_err; 1817 1818 numlower = ovl_split_lowerdirs(splitlower); 1819 if (numlower > OVL_MAX_STACK) { 1820 pr_err("too many lower directories, limit is %d\n", 1821 OVL_MAX_STACK); 1822 goto out_err; 1823 } 1824 1825 layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL); 1826 if (!layers) 1827 goto out_err; 1828 1829 ofs->layers = layers; 1830 /* Layer 0 is reserved for upper even if there's no upper */ 1831 ofs->numlayer = 1; 1832 1833 sb->s_stack_depth = 0; 1834 sb->s_maxbytes = MAX_LFS_FILESIZE; 1835 atomic_long_set(&ofs->last_ino, 1); 1836 /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ 1837 if (ofs->config.xino != OVL_XINO_OFF) { 1838 ofs->xino_mode = BITS_PER_LONG - 32; 1839 if (!ofs->xino_mode) { 1840 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); 1841 ofs->config.xino = OVL_XINO_OFF; 1842 } 1843 } 1844 1845 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1846 sb->s_op = &ovl_super_operations; 1847 1848 if (ofs->config.upperdir) { 1849 if (!ofs->config.workdir) { 1850 pr_err("missing 'workdir'\n"); 1851 goto out_err; 1852 } 1853 1854 err = ovl_get_upper(sb, ofs, &layers[0], &upperpath); 1855 if (err) 1856 goto out_err; 1857 1858 err = ovl_get_workdir(sb, ofs, &upperpath); 1859 if (err) 1860 goto out_err; 1861 1862 if (!ofs->workdir) 1863 sb->s_flags |= SB_RDONLY; 1864 1865 sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth; 1866 sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran; 1867 1868 } 1869 oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers); 1870 err = PTR_ERR(oe); 1871 if (IS_ERR(oe)) 1872 goto out_err; 1873 1874 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1875 if (!ovl_upper_mnt(ofs)) 1876 sb->s_flags |= SB_RDONLY; 1877 1878 if (!ovl_force_readonly(ofs) && ofs->config.index) { 1879 err = ovl_get_indexdir(sb, ofs, oe, &upperpath); 1880 if (err) 1881 goto out_free_oe; 1882 1883 /* Force r/o mount with no index dir */ 1884 if (!ofs->indexdir) 1885 sb->s_flags |= SB_RDONLY; 1886 } 1887 1888 err = ovl_check_overlapping_layers(sb, ofs); 1889 if (err) 1890 goto out_free_oe; 1891 1892 /* Show index=off in /proc/mounts for forced r/o mount */ 1893 if (!ofs->indexdir) { 1894 ofs->config.index = false; 1895 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { 1896 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 1897 ofs->config.nfs_export = false; 1898 } 1899 } 1900 1901 if (ofs->config.metacopy && ofs->config.nfs_export) { 1902 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1903 ofs->config.nfs_export = false; 1904 } 1905 1906 if (ofs->config.nfs_export) 1907 sb->s_export_op = &ovl_export_operations; 1908 1909 /* Never override disk quota limits or use reserved space */ 1910 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1911 1912 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1913 sb->s_xattr = ovl_xattr_handlers; 1914 sb->s_fs_info = ofs; 1915 sb->s_flags |= SB_POSIXACL; 1916 sb->s_iflags |= SB_I_SKIP_SYNC; 1917 1918 err = -ENOMEM; 1919 root_dentry = ovl_get_root(sb, upperpath.dentry, oe); 1920 if (!root_dentry) 1921 goto out_free_oe; 1922 1923 mntput(upperpath.mnt); 1924 kfree(splitlower); 1925 1926 sb->s_root = root_dentry; 1927 1928 return 0; 1929 1930 out_free_oe: 1931 ovl_entry_stack_free(oe); 1932 kfree(oe); 1933 out_err: 1934 kfree(splitlower); 1935 path_put(&upperpath); 1936 ovl_free_fs(ofs); 1937 out: 1938 return err; 1939 } 1940 1941 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1942 const char *dev_name, void *raw_data) 1943 { 1944 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1945 } 1946 1947 static struct file_system_type ovl_fs_type = { 1948 .owner = THIS_MODULE, 1949 .name = "overlay", 1950 .mount = ovl_mount, 1951 .kill_sb = kill_anon_super, 1952 }; 1953 MODULE_ALIAS_FS("overlay"); 1954 1955 static void ovl_inode_init_once(void *foo) 1956 { 1957 struct ovl_inode *oi = foo; 1958 1959 inode_init_once(&oi->vfs_inode); 1960 } 1961 1962 static int __init ovl_init(void) 1963 { 1964 int err; 1965 1966 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1967 sizeof(struct ovl_inode), 0, 1968 (SLAB_RECLAIM_ACCOUNT| 1969 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1970 ovl_inode_init_once); 1971 if (ovl_inode_cachep == NULL) 1972 return -ENOMEM; 1973 1974 err = ovl_aio_request_cache_init(); 1975 if (!err) { 1976 err = register_filesystem(&ovl_fs_type); 1977 if (!err) 1978 return 0; 1979 1980 ovl_aio_request_cache_destroy(); 1981 } 1982 kmem_cache_destroy(ovl_inode_cachep); 1983 1984 return err; 1985 } 1986 1987 static void __exit ovl_exit(void) 1988 { 1989 unregister_filesystem(&ovl_fs_type); 1990 1991 /* 1992 * Make sure all delayed rcu free inodes are flushed before we 1993 * destroy cache. 1994 */ 1995 rcu_barrier(); 1996 kmem_cache_destroy(ovl_inode_cachep); 1997 ovl_aio_request_cache_destroy(); 1998 } 1999 2000 module_init(ovl_init); 2001 module_exit(ovl_exit); 2002