1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include <linux/file.h> 19 #include <linux/fs_context.h> 20 #include <linux/fs_parser.h> 21 #include "overlayfs.h" 22 #include "params.h" 23 24 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 25 MODULE_DESCRIPTION("Overlay filesystem"); 26 MODULE_LICENSE("GPL"); 27 28 29 struct ovl_dir_cache; 30 31 static struct dentry *ovl_d_real(struct dentry *dentry, 32 const struct inode *inode) 33 { 34 struct dentry *real = NULL, *lower; 35 int err; 36 37 /* It's an overlay file */ 38 if (inode && d_inode(dentry) == inode) 39 return dentry; 40 41 if (!d_is_reg(dentry)) { 42 if (!inode || inode == d_inode(dentry)) 43 return dentry; 44 goto bug; 45 } 46 47 real = ovl_dentry_upper(dentry); 48 if (real && (inode == d_inode(real))) 49 return real; 50 51 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 52 return real; 53 54 /* 55 * Best effort lazy lookup of lowerdata for !inode case to return 56 * the real lowerdata dentry. The only current caller of d_real() with 57 * NULL inode is d_real_inode() from trace_uprobe and this caller is 58 * likely going to be followed reading from the file, before placing 59 * uprobes on offset within the file, so lowerdata should be available 60 * when setting the uprobe. 61 */ 62 err = ovl_verify_lowerdata(dentry); 63 if (err) 64 goto bug; 65 lower = ovl_dentry_lowerdata(dentry); 66 if (!lower) 67 goto bug; 68 real = lower; 69 70 /* Handle recursion */ 71 real = d_real(real, inode); 72 73 if (!inode || inode == d_inode(real)) 74 return real; 75 bug: 76 WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", 77 __func__, dentry, inode ? inode->i_sb->s_id : "NULL", 78 inode ? inode->i_ino : 0, real, 79 real && d_inode(real) ? d_inode(real)->i_ino : 0); 80 return dentry; 81 } 82 83 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) 84 { 85 int ret = 1; 86 87 if (!d) 88 return 1; 89 90 if (weak) { 91 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) 92 ret = d->d_op->d_weak_revalidate(d, flags); 93 } else if (d->d_flags & DCACHE_OP_REVALIDATE) { 94 ret = d->d_op->d_revalidate(d, flags); 95 if (!ret) { 96 if (!(flags & LOOKUP_RCU)) 97 d_invalidate(d); 98 ret = -ESTALE; 99 } 100 } 101 return ret; 102 } 103 104 static int ovl_dentry_revalidate_common(struct dentry *dentry, 105 unsigned int flags, bool weak) 106 { 107 struct ovl_entry *oe; 108 struct ovl_path *lowerstack; 109 struct inode *inode = d_inode_rcu(dentry); 110 struct dentry *upper; 111 unsigned int i; 112 int ret = 1; 113 114 /* Careful in RCU mode */ 115 if (!inode) 116 return -ECHILD; 117 118 oe = OVL_I_E(inode); 119 lowerstack = ovl_lowerstack(oe); 120 upper = ovl_i_dentry_upper(inode); 121 if (upper) 122 ret = ovl_revalidate_real(upper, flags, weak); 123 124 for (i = 0; ret > 0 && i < ovl_numlower(oe); i++) 125 ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak); 126 127 return ret; 128 } 129 130 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 131 { 132 return ovl_dentry_revalidate_common(dentry, flags, false); 133 } 134 135 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 136 { 137 return ovl_dentry_revalidate_common(dentry, flags, true); 138 } 139 140 static const struct dentry_operations ovl_dentry_operations = { 141 .d_real = ovl_d_real, 142 .d_revalidate = ovl_dentry_revalidate, 143 .d_weak_revalidate = ovl_dentry_weak_revalidate, 144 }; 145 146 static struct kmem_cache *ovl_inode_cachep; 147 148 static struct inode *ovl_alloc_inode(struct super_block *sb) 149 { 150 struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL); 151 152 if (!oi) 153 return NULL; 154 155 oi->cache = NULL; 156 oi->redirect = NULL; 157 oi->version = 0; 158 oi->flags = 0; 159 oi->__upperdentry = NULL; 160 oi->lowerdata_redirect = NULL; 161 oi->oe = NULL; 162 mutex_init(&oi->lock); 163 164 return &oi->vfs_inode; 165 } 166 167 static void ovl_free_inode(struct inode *inode) 168 { 169 struct ovl_inode *oi = OVL_I(inode); 170 171 kfree(oi->redirect); 172 kfree(oi->oe); 173 mutex_destroy(&oi->lock); 174 kmem_cache_free(ovl_inode_cachep, oi); 175 } 176 177 static void ovl_destroy_inode(struct inode *inode) 178 { 179 struct ovl_inode *oi = OVL_I(inode); 180 181 dput(oi->__upperdentry); 182 ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe)); 183 if (S_ISDIR(inode->i_mode)) 184 ovl_dir_cache_free(inode); 185 else 186 kfree(oi->lowerdata_redirect); 187 } 188 189 static void ovl_put_super(struct super_block *sb) 190 { 191 struct ovl_fs *ofs = OVL_FS(sb); 192 193 if (ofs) 194 ovl_free_fs(ofs); 195 } 196 197 /* Sync real dirty inodes in upper filesystem (if it exists) */ 198 static int ovl_sync_fs(struct super_block *sb, int wait) 199 { 200 struct ovl_fs *ofs = OVL_FS(sb); 201 struct super_block *upper_sb; 202 int ret; 203 204 ret = ovl_sync_status(ofs); 205 /* 206 * We have to always set the err, because the return value isn't 207 * checked in syncfs, and instead indirectly return an error via 208 * the sb's writeback errseq, which VFS inspects after this call. 209 */ 210 if (ret < 0) { 211 errseq_set(&sb->s_wb_err, -EIO); 212 return -EIO; 213 } 214 215 if (!ret) 216 return ret; 217 218 /* 219 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). 220 * All the super blocks will be iterated, including upper_sb. 221 * 222 * If this is a syncfs(2) call, then we do need to call 223 * sync_filesystem() on upper_sb, but enough if we do it when being 224 * called with wait == 1. 225 */ 226 if (!wait) 227 return 0; 228 229 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 230 231 down_read(&upper_sb->s_umount); 232 ret = sync_filesystem(upper_sb); 233 up_read(&upper_sb->s_umount); 234 235 return ret; 236 } 237 238 /** 239 * ovl_statfs 240 * @dentry: The dentry to query 241 * @buf: The struct kstatfs to fill in with stats 242 * 243 * Get the filesystem statistics. As writes always target the upper layer 244 * filesystem pass the statfs to the upper filesystem (if it exists) 245 */ 246 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 247 { 248 struct super_block *sb = dentry->d_sb; 249 struct ovl_fs *ofs = OVL_FS(sb); 250 struct dentry *root_dentry = sb->s_root; 251 struct path path; 252 int err; 253 254 ovl_path_real(root_dentry, &path); 255 256 err = vfs_statfs(&path, buf); 257 if (!err) { 258 buf->f_namelen = ofs->namelen; 259 buf->f_type = OVERLAYFS_SUPER_MAGIC; 260 if (ovl_has_fsid(ofs)) 261 buf->f_fsid = uuid_to_fsid(sb->s_uuid.b); 262 } 263 264 return err; 265 } 266 267 static const struct super_operations ovl_super_operations = { 268 .alloc_inode = ovl_alloc_inode, 269 .free_inode = ovl_free_inode, 270 .destroy_inode = ovl_destroy_inode, 271 .drop_inode = generic_delete_inode, 272 .put_super = ovl_put_super, 273 .sync_fs = ovl_sync_fs, 274 .statfs = ovl_statfs, 275 .show_options = ovl_show_options, 276 }; 277 278 #define OVL_WORKDIR_NAME "work" 279 #define OVL_INDEXDIR_NAME "index" 280 281 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 282 const char *name, bool persist) 283 { 284 struct inode *dir = ofs->workbasedir->d_inode; 285 struct vfsmount *mnt = ovl_upper_mnt(ofs); 286 struct dentry *work; 287 int err; 288 bool retried = false; 289 290 inode_lock_nested(dir, I_MUTEX_PARENT); 291 retry: 292 work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); 293 294 if (!IS_ERR(work)) { 295 struct iattr attr = { 296 .ia_valid = ATTR_MODE, 297 .ia_mode = S_IFDIR | 0, 298 }; 299 300 if (work->d_inode) { 301 err = -EEXIST; 302 if (retried) 303 goto out_dput; 304 305 if (persist) 306 goto out_unlock; 307 308 retried = true; 309 err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); 310 dput(work); 311 if (err == -EINVAL) { 312 work = ERR_PTR(err); 313 goto out_unlock; 314 } 315 goto retry; 316 } 317 318 err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); 319 if (err) 320 goto out_dput; 321 322 /* Weird filesystem returning with hashed negative (kernfs)? */ 323 err = -EINVAL; 324 if (d_really_is_negative(work)) 325 goto out_dput; 326 327 /* 328 * Try to remove POSIX ACL xattrs from workdir. We are good if: 329 * 330 * a) success (there was a POSIX ACL xattr and was removed) 331 * b) -ENODATA (there was no POSIX ACL xattr) 332 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 333 * 334 * There are various other error values that could effectively 335 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 336 * if the xattr name is too long), but the set of filesystems 337 * allowed as upper are limited to "normal" ones, where checking 338 * for the above two errors is sufficient. 339 */ 340 err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT); 341 if (err && err != -ENODATA && err != -EOPNOTSUPP) 342 goto out_dput; 343 344 err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS); 345 if (err && err != -ENODATA && err != -EOPNOTSUPP) 346 goto out_dput; 347 348 /* Clear any inherited mode bits */ 349 inode_lock(work->d_inode); 350 err = ovl_do_notify_change(ofs, work, &attr); 351 inode_unlock(work->d_inode); 352 if (err) 353 goto out_dput; 354 } else { 355 err = PTR_ERR(work); 356 goto out_err; 357 } 358 out_unlock: 359 inode_unlock(dir); 360 return work; 361 362 out_dput: 363 dput(work); 364 out_err: 365 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 366 ofs->config.workdir, name, -err); 367 work = NULL; 368 goto out_unlock; 369 } 370 371 static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, 372 const char *name) 373 { 374 struct kstatfs statfs; 375 int err = vfs_statfs(path, &statfs); 376 377 if (err) 378 pr_err("statfs failed on '%s'\n", name); 379 else 380 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 381 382 return err; 383 } 384 385 static int ovl_lower_dir(const char *name, struct path *path, 386 struct ovl_fs *ofs, int *stack_depth) 387 { 388 int fh_type; 389 int err; 390 391 err = ovl_check_namelen(path, ofs, name); 392 if (err) 393 return err; 394 395 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 396 397 /* 398 * The inodes index feature and NFS export need to encode and decode 399 * file handles, so they require that all layers support them. 400 */ 401 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 402 if ((ofs->config.nfs_export || 403 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 404 ofs->config.index = false; 405 ofs->config.nfs_export = false; 406 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 407 name); 408 } 409 ofs->nofh |= !fh_type; 410 /* 411 * Decoding origin file handle is required for persistent st_ino. 412 * Without persistent st_ino, xino=auto falls back to xino=off. 413 */ 414 if (ofs->config.xino == OVL_XINO_AUTO && 415 ofs->config.upperdir && !fh_type) { 416 ofs->config.xino = OVL_XINO_OFF; 417 pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n", 418 name); 419 } 420 421 /* Check if lower fs has 32bit inode numbers */ 422 if (fh_type != FILEID_INO32_GEN) 423 ofs->xino_mode = -1; 424 425 return 0; 426 } 427 428 /* Workdir should not be subdir of upperdir and vice versa */ 429 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 430 { 431 bool ok = false; 432 433 if (workdir != upperdir) { 434 ok = (lock_rename(workdir, upperdir) == NULL); 435 unlock_rename(workdir, upperdir); 436 } 437 return ok; 438 } 439 440 static int ovl_own_xattr_get(const struct xattr_handler *handler, 441 struct dentry *dentry, struct inode *inode, 442 const char *name, void *buffer, size_t size) 443 { 444 return -EOPNOTSUPP; 445 } 446 447 static int ovl_own_xattr_set(const struct xattr_handler *handler, 448 struct mnt_idmap *idmap, 449 struct dentry *dentry, struct inode *inode, 450 const char *name, const void *value, 451 size_t size, int flags) 452 { 453 return -EOPNOTSUPP; 454 } 455 456 static int ovl_other_xattr_get(const struct xattr_handler *handler, 457 struct dentry *dentry, struct inode *inode, 458 const char *name, void *buffer, size_t size) 459 { 460 return ovl_xattr_get(dentry, inode, name, buffer, size); 461 } 462 463 static int ovl_other_xattr_set(const struct xattr_handler *handler, 464 struct mnt_idmap *idmap, 465 struct dentry *dentry, struct inode *inode, 466 const char *name, const void *value, 467 size_t size, int flags) 468 { 469 return ovl_xattr_set(dentry, inode, name, value, size, flags); 470 } 471 472 static const struct xattr_handler ovl_own_trusted_xattr_handler = { 473 .prefix = OVL_XATTR_TRUSTED_PREFIX, 474 .get = ovl_own_xattr_get, 475 .set = ovl_own_xattr_set, 476 }; 477 478 static const struct xattr_handler ovl_own_user_xattr_handler = { 479 .prefix = OVL_XATTR_USER_PREFIX, 480 .get = ovl_own_xattr_get, 481 .set = ovl_own_xattr_set, 482 }; 483 484 static const struct xattr_handler ovl_other_xattr_handler = { 485 .prefix = "", /* catch all */ 486 .get = ovl_other_xattr_get, 487 .set = ovl_other_xattr_set, 488 }; 489 490 static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { 491 &ovl_own_trusted_xattr_handler, 492 &ovl_other_xattr_handler, 493 NULL 494 }; 495 496 static const struct xattr_handler *ovl_user_xattr_handlers[] = { 497 &ovl_own_user_xattr_handler, 498 &ovl_other_xattr_handler, 499 NULL 500 }; 501 502 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 503 struct inode **ptrap, const char *name) 504 { 505 struct inode *trap; 506 int err; 507 508 trap = ovl_get_trap_inode(sb, dir); 509 err = PTR_ERR_OR_ZERO(trap); 510 if (err) { 511 if (err == -ELOOP) 512 pr_err("conflicting %s path\n", name); 513 return err; 514 } 515 516 *ptrap = trap; 517 return 0; 518 } 519 520 /* 521 * Determine how we treat concurrent use of upperdir/workdir based on the 522 * index feature. This is papering over mount leaks of container runtimes, 523 * for example, an old overlay mount is leaked and now its upperdir is 524 * attempted to be used as a lower layer in a new overlay mount. 525 */ 526 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 527 { 528 if (ofs->config.index) { 529 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 530 name); 531 return -EBUSY; 532 } else { 533 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 534 name); 535 return 0; 536 } 537 } 538 539 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 540 struct ovl_layer *upper_layer, 541 const struct path *upperpath) 542 { 543 struct vfsmount *upper_mnt; 544 int err; 545 546 /* Upperdir path should not be r/o */ 547 if (__mnt_is_readonly(upperpath->mnt)) { 548 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 549 err = -EINVAL; 550 goto out; 551 } 552 553 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 554 if (err) 555 goto out; 556 557 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, 558 "upperdir"); 559 if (err) 560 goto out; 561 562 upper_mnt = clone_private_mount(upperpath); 563 err = PTR_ERR(upper_mnt); 564 if (IS_ERR(upper_mnt)) { 565 pr_err("failed to clone upperpath\n"); 566 goto out; 567 } 568 569 /* Don't inherit atime flags */ 570 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 571 upper_layer->mnt = upper_mnt; 572 upper_layer->idx = 0; 573 upper_layer->fsid = 0; 574 575 /* 576 * Inherit SB_NOSEC flag from upperdir. 577 * 578 * This optimization changes behavior when a security related attribute 579 * (suid/sgid/security.*) is changed on an underlying layer. This is 580 * okay because we don't yet have guarantees in that case, but it will 581 * need careful treatment once we want to honour changes to underlying 582 * filesystems. 583 */ 584 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) 585 sb->s_flags |= SB_NOSEC; 586 587 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { 588 ofs->upperdir_locked = true; 589 } else { 590 err = ovl_report_in_use(ofs, "upperdir"); 591 if (err) 592 goto out; 593 } 594 595 err = 0; 596 out: 597 return err; 598 } 599 600 /* 601 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and 602 * negative values if error is encountered. 603 */ 604 static int ovl_check_rename_whiteout(struct ovl_fs *ofs) 605 { 606 struct dentry *workdir = ofs->workdir; 607 struct inode *dir = d_inode(workdir); 608 struct dentry *temp; 609 struct dentry *dest; 610 struct dentry *whiteout; 611 struct name_snapshot name; 612 int err; 613 614 inode_lock_nested(dir, I_MUTEX_PARENT); 615 616 temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); 617 err = PTR_ERR(temp); 618 if (IS_ERR(temp)) 619 goto out_unlock; 620 621 dest = ovl_lookup_temp(ofs, workdir); 622 err = PTR_ERR(dest); 623 if (IS_ERR(dest)) { 624 dput(temp); 625 goto out_unlock; 626 } 627 628 /* Name is inline and stable - using snapshot as a copy helper */ 629 take_dentry_name_snapshot(&name, temp); 630 err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); 631 if (err) { 632 if (err == -EINVAL) 633 err = 0; 634 goto cleanup_temp; 635 } 636 637 whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len); 638 err = PTR_ERR(whiteout); 639 if (IS_ERR(whiteout)) 640 goto cleanup_temp; 641 642 err = ovl_is_whiteout(whiteout); 643 644 /* Best effort cleanup of whiteout and temp file */ 645 if (err) 646 ovl_cleanup(ofs, dir, whiteout); 647 dput(whiteout); 648 649 cleanup_temp: 650 ovl_cleanup(ofs, dir, temp); 651 release_dentry_name_snapshot(&name); 652 dput(temp); 653 dput(dest); 654 655 out_unlock: 656 inode_unlock(dir); 657 658 return err; 659 } 660 661 static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, 662 struct dentry *parent, 663 const char *name, umode_t mode) 664 { 665 size_t len = strlen(name); 666 struct dentry *child; 667 668 inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); 669 child = ovl_lookup_upper(ofs, name, parent, len); 670 if (!IS_ERR(child) && !child->d_inode) 671 child = ovl_create_real(ofs, parent->d_inode, child, 672 OVL_CATTR(mode)); 673 inode_unlock(parent->d_inode); 674 dput(parent); 675 676 return child; 677 } 678 679 /* 680 * Creates $workdir/work/incompat/volatile/dirty file if it is not already 681 * present. 682 */ 683 static int ovl_create_volatile_dirty(struct ovl_fs *ofs) 684 { 685 unsigned int ctr; 686 struct dentry *d = dget(ofs->workbasedir); 687 static const char *const volatile_path[] = { 688 OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" 689 }; 690 const char *const *name = volatile_path; 691 692 for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { 693 d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG); 694 if (IS_ERR(d)) 695 return PTR_ERR(d); 696 } 697 dput(d); 698 return 0; 699 } 700 701 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 702 const struct path *workpath) 703 { 704 struct vfsmount *mnt = ovl_upper_mnt(ofs); 705 struct dentry *workdir; 706 struct file *tmpfile; 707 bool rename_whiteout; 708 bool d_type; 709 int fh_type; 710 int err; 711 712 err = mnt_want_write(mnt); 713 if (err) 714 return err; 715 716 workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 717 err = PTR_ERR(workdir); 718 if (IS_ERR_OR_NULL(workdir)) 719 goto out; 720 721 ofs->workdir = workdir; 722 723 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 724 if (err) 725 goto out; 726 727 /* 728 * Upper should support d_type, else whiteouts are visible. Given 729 * workdir and upper are on same fs, we can do iterate_dir() on 730 * workdir. This check requires successful creation of workdir in 731 * previous step. 732 */ 733 err = ovl_check_d_type_supported(workpath); 734 if (err < 0) 735 goto out; 736 737 d_type = err; 738 if (!d_type) 739 pr_warn("upper fs needs to support d_type.\n"); 740 741 /* Check if upper/work fs supports O_TMPFILE */ 742 tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0); 743 ofs->tmpfile = !IS_ERR(tmpfile); 744 if (ofs->tmpfile) 745 fput(tmpfile); 746 else 747 pr_warn("upper fs does not support tmpfile.\n"); 748 749 750 /* Check if upper/work fs supports RENAME_WHITEOUT */ 751 err = ovl_check_rename_whiteout(ofs); 752 if (err < 0) 753 goto out; 754 755 rename_whiteout = err; 756 if (!rename_whiteout) 757 pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); 758 759 /* 760 * Check if upper/work fs supports (trusted|user).overlay.* xattr 761 */ 762 err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); 763 if (err) { 764 pr_warn("failed to set xattr on upper\n"); 765 ofs->noxattr = true; 766 if (ovl_redirect_follow(ofs)) { 767 ofs->config.redirect_mode = OVL_REDIRECT_NOFOLLOW; 768 pr_warn("...falling back to redirect_dir=nofollow.\n"); 769 } 770 if (ofs->config.metacopy) { 771 ofs->config.metacopy = false; 772 pr_warn("...falling back to metacopy=off.\n"); 773 } 774 if (ofs->config.index) { 775 ofs->config.index = false; 776 pr_warn("...falling back to index=off.\n"); 777 } 778 if (ovl_has_fsid(ofs)) { 779 ofs->config.uuid = OVL_UUID_NULL; 780 pr_warn("...falling back to uuid=null.\n"); 781 } 782 /* 783 * xattr support is required for persistent st_ino. 784 * Without persistent st_ino, xino=auto falls back to xino=off. 785 */ 786 if (ofs->config.xino == OVL_XINO_AUTO) { 787 ofs->config.xino = OVL_XINO_OFF; 788 pr_warn("...falling back to xino=off.\n"); 789 } 790 if (err == -EPERM && !ofs->config.userxattr) 791 pr_info("try mounting with 'userxattr' option\n"); 792 err = 0; 793 } else { 794 ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); 795 } 796 797 /* 798 * We allowed sub-optimal upper fs configuration and don't want to break 799 * users over kernel upgrade, but we never allowed remote upper fs, so 800 * we can enforce strict requirements for remote upper fs. 801 */ 802 if (ovl_dentry_remote(ofs->workdir) && 803 (!d_type || !rename_whiteout || ofs->noxattr)) { 804 pr_err("upper fs missing required features.\n"); 805 err = -EINVAL; 806 goto out; 807 } 808 809 /* 810 * For volatile mount, create a incompat/volatile/dirty file to keep 811 * track of it. 812 */ 813 if (ofs->config.ovl_volatile) { 814 err = ovl_create_volatile_dirty(ofs); 815 if (err < 0) { 816 pr_err("Failed to create volatile/dirty file.\n"); 817 goto out; 818 } 819 } 820 821 /* Check if upper/work fs supports file handles */ 822 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 823 if (ofs->config.index && !fh_type) { 824 ofs->config.index = false; 825 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 826 } 827 ofs->nofh |= !fh_type; 828 829 /* Check if upper fs has 32bit inode numbers */ 830 if (fh_type != FILEID_INO32_GEN) 831 ofs->xino_mode = -1; 832 833 /* NFS export of r/w mount depends on index */ 834 if (ofs->config.nfs_export && !ofs->config.index) { 835 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 836 ofs->config.nfs_export = false; 837 } 838 out: 839 mnt_drop_write(mnt); 840 return err; 841 } 842 843 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 844 const struct path *upperpath, 845 const struct path *workpath) 846 { 847 int err; 848 849 err = -EINVAL; 850 if (upperpath->mnt != workpath->mnt) { 851 pr_err("workdir and upperdir must reside under the same mount\n"); 852 return err; 853 } 854 if (!ovl_workdir_ok(workpath->dentry, upperpath->dentry)) { 855 pr_err("workdir and upperdir must be separate subtrees\n"); 856 return err; 857 } 858 859 ofs->workbasedir = dget(workpath->dentry); 860 861 if (ovl_inuse_trylock(ofs->workbasedir)) { 862 ofs->workdir_locked = true; 863 } else { 864 err = ovl_report_in_use(ofs, "workdir"); 865 if (err) 866 return err; 867 } 868 869 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 870 "workdir"); 871 if (err) 872 return err; 873 874 return ovl_make_workdir(sb, ofs, workpath); 875 } 876 877 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 878 struct ovl_entry *oe, const struct path *upperpath) 879 { 880 struct vfsmount *mnt = ovl_upper_mnt(ofs); 881 struct dentry *indexdir; 882 int err; 883 884 err = mnt_want_write(mnt); 885 if (err) 886 return err; 887 888 /* Verify lower root is upper root origin */ 889 err = ovl_verify_origin(ofs, upperpath->dentry, 890 ovl_lowerstack(oe)->dentry, true); 891 if (err) { 892 pr_err("failed to verify upper root origin\n"); 893 goto out; 894 } 895 896 /* index dir will act also as workdir */ 897 iput(ofs->workdir_trap); 898 ofs->workdir_trap = NULL; 899 dput(ofs->workdir); 900 ofs->workdir = NULL; 901 indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 902 if (IS_ERR(indexdir)) { 903 err = PTR_ERR(indexdir); 904 } else if (indexdir) { 905 ofs->indexdir = indexdir; 906 ofs->workdir = dget(indexdir); 907 908 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 909 "indexdir"); 910 if (err) 911 goto out; 912 913 /* 914 * Verify upper root is exclusively associated with index dir. 915 * Older kernels stored upper fh in ".overlay.origin" 916 * xattr. If that xattr exists, verify that it is a match to 917 * upper dir file handle. In any case, verify or set xattr 918 * ".overlay.upper" to indicate that index may have 919 * directory entries. 920 */ 921 if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { 922 err = ovl_verify_set_fh(ofs, ofs->indexdir, 923 OVL_XATTR_ORIGIN, 924 upperpath->dentry, true, false); 925 if (err) 926 pr_err("failed to verify index dir 'origin' xattr\n"); 927 } 928 err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry, 929 true); 930 if (err) 931 pr_err("failed to verify index dir 'upper' xattr\n"); 932 933 /* Cleanup bad/stale/orphan index entries */ 934 if (!err) 935 err = ovl_indexdir_cleanup(ofs); 936 } 937 if (err || !ofs->indexdir) 938 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 939 940 out: 941 mnt_drop_write(mnt); 942 return err; 943 } 944 945 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 946 { 947 unsigned int i; 948 949 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) 950 return true; 951 952 /* 953 * We allow using single lower with null uuid for index and nfs_export 954 * for example to support those features with single lower squashfs. 955 * To avoid regressions in setups of overlay with re-formatted lower 956 * squashfs, do not allow decoding origin with lower null uuid unless 957 * user opted-in to one of the new features that require following the 958 * lower inode of non-dir upper. 959 */ 960 if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) 961 return false; 962 963 for (i = 0; i < ofs->numfs; i++) { 964 /* 965 * We use uuid to associate an overlay lower file handle with a 966 * lower layer, so we can accept lower fs with null uuid as long 967 * as all lower layers with null uuid are on the same fs. 968 * if we detect multiple lower fs with the same uuid, we 969 * disable lower file handle decoding on all of them. 970 */ 971 if (ofs->fs[i].is_lower && 972 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 973 ofs->fs[i].bad_uuid = true; 974 return false; 975 } 976 } 977 return true; 978 } 979 980 /* Get a unique fsid for the layer */ 981 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 982 { 983 struct super_block *sb = path->mnt->mnt_sb; 984 unsigned int i; 985 dev_t dev; 986 int err; 987 bool bad_uuid = false; 988 bool warn = false; 989 990 for (i = 0; i < ofs->numfs; i++) { 991 if (ofs->fs[i].sb == sb) 992 return i; 993 } 994 995 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 996 bad_uuid = true; 997 if (ofs->config.xino == OVL_XINO_AUTO) { 998 ofs->config.xino = OVL_XINO_OFF; 999 warn = true; 1000 } 1001 if (ofs->config.index || ofs->config.nfs_export) { 1002 ofs->config.index = false; 1003 ofs->config.nfs_export = false; 1004 warn = true; 1005 } 1006 if (warn) { 1007 pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n", 1008 uuid_is_null(&sb->s_uuid) ? "null" : 1009 "conflicting", 1010 path->dentry, ovl_xino_mode(&ofs->config)); 1011 } 1012 } 1013 1014 err = get_anon_bdev(&dev); 1015 if (err) { 1016 pr_err("failed to get anonymous bdev for lowerpath\n"); 1017 return err; 1018 } 1019 1020 ofs->fs[ofs->numfs].sb = sb; 1021 ofs->fs[ofs->numfs].pseudo_dev = dev; 1022 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1023 1024 return ofs->numfs++; 1025 } 1026 1027 /* 1028 * The fsid after the last lower fsid is used for the data layers. 1029 * It is a "null fs" with a null sb, null uuid, and no pseudo dev. 1030 */ 1031 static int ovl_get_data_fsid(struct ovl_fs *ofs) 1032 { 1033 return ofs->numfs; 1034 } 1035 1036 1037 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1038 struct ovl_fs_context *ctx, struct ovl_layer *layers) 1039 { 1040 int err; 1041 unsigned int i; 1042 size_t nr_merged_lower; 1043 1044 ofs->fs = kcalloc(ctx->nr + 2, sizeof(struct ovl_sb), GFP_KERNEL); 1045 if (ofs->fs == NULL) 1046 return -ENOMEM; 1047 1048 /* 1049 * idx/fsid 0 are reserved for upper fs even with lower only overlay 1050 * and the last fsid is reserved for "null fs" of the data layers. 1051 */ 1052 ofs->numfs++; 1053 1054 /* 1055 * All lower layers that share the same fs as upper layer, use the same 1056 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1057 * only overlay to simplify ovl_fs_free(). 1058 * is_lower will be set if upper fs is shared with a lower layer. 1059 */ 1060 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1061 if (err) { 1062 pr_err("failed to get anonymous bdev for upper fs\n"); 1063 return err; 1064 } 1065 1066 if (ovl_upper_mnt(ofs)) { 1067 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; 1068 ofs->fs[0].is_lower = false; 1069 } 1070 1071 nr_merged_lower = ctx->nr - ctx->nr_data; 1072 for (i = 0; i < ctx->nr; i++) { 1073 struct ovl_fs_context_layer *l = &ctx->lower[i]; 1074 struct vfsmount *mnt; 1075 struct inode *trap; 1076 int fsid; 1077 1078 if (i < nr_merged_lower) 1079 fsid = ovl_get_fsid(ofs, &l->path); 1080 else 1081 fsid = ovl_get_data_fsid(ofs); 1082 if (fsid < 0) 1083 return fsid; 1084 1085 /* 1086 * Check if lower root conflicts with this overlay layers before 1087 * checking if it is in-use as upperdir/workdir of "another" 1088 * mount, because we do not bother to check in ovl_is_inuse() if 1089 * the upperdir/workdir is in fact in-use by our 1090 * upperdir/workdir. 1091 */ 1092 err = ovl_setup_trap(sb, l->path.dentry, &trap, "lowerdir"); 1093 if (err) 1094 return err; 1095 1096 if (ovl_is_inuse(l->path.dentry)) { 1097 err = ovl_report_in_use(ofs, "lowerdir"); 1098 if (err) { 1099 iput(trap); 1100 return err; 1101 } 1102 } 1103 1104 mnt = clone_private_mount(&l->path); 1105 err = PTR_ERR(mnt); 1106 if (IS_ERR(mnt)) { 1107 pr_err("failed to clone lowerpath\n"); 1108 iput(trap); 1109 return err; 1110 } 1111 1112 /* 1113 * Make lower layers R/O. That way fchmod/fchown on lower file 1114 * will fail instead of modifying lower fs. 1115 */ 1116 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1117 1118 layers[ofs->numlayer].trap = trap; 1119 layers[ofs->numlayer].mnt = mnt; 1120 layers[ofs->numlayer].idx = ofs->numlayer; 1121 layers[ofs->numlayer].fsid = fsid; 1122 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1123 /* Store for printing lowerdir=... in ovl_show_options() */ 1124 ofs->config.lowerdirs[ofs->numlayer] = l->name; 1125 l->name = NULL; 1126 ofs->numlayer++; 1127 ofs->fs[fsid].is_lower = true; 1128 } 1129 1130 /* 1131 * When all layers on same fs, overlay can use real inode numbers. 1132 * With mount option "xino=<on|auto>", mounter declares that there are 1133 * enough free high bits in underlying fs to hold the unique fsid. 1134 * If overlayfs does encounter underlying inodes using the high xino 1135 * bits reserved for fsid, it emits a warning and uses the original 1136 * inode number or a non persistent inode number allocated from a 1137 * dedicated range. 1138 */ 1139 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { 1140 if (ofs->config.xino == OVL_XINO_ON) 1141 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1142 ofs->xino_mode = 0; 1143 } else if (ofs->config.xino == OVL_XINO_OFF) { 1144 ofs->xino_mode = -1; 1145 } else if (ofs->xino_mode < 0) { 1146 /* 1147 * This is a roundup of number of bits needed for encoding 1148 * fsid, where fsid 0 is reserved for upper fs (even with 1149 * lower only overlay) +1 extra bit is reserved for the non 1150 * persistent inode number range that is used for resolving 1151 * xino lower bits overflow. 1152 */ 1153 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); 1154 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; 1155 } 1156 1157 if (ofs->xino_mode > 0) { 1158 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1159 ofs->xino_mode); 1160 } 1161 1162 return 0; 1163 } 1164 1165 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1166 struct ovl_fs_context *ctx, 1167 struct ovl_fs *ofs, 1168 struct ovl_layer *layers) 1169 { 1170 int err; 1171 unsigned int i; 1172 size_t nr_merged_lower; 1173 struct ovl_entry *oe; 1174 struct ovl_path *lowerstack; 1175 1176 struct ovl_fs_context_layer *l; 1177 1178 if (!ofs->config.upperdir && ctx->nr == 1) { 1179 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1180 return ERR_PTR(-EINVAL); 1181 } 1182 1183 err = -EINVAL; 1184 for (i = 0; i < ctx->nr; i++) { 1185 l = &ctx->lower[i]; 1186 1187 err = ovl_lower_dir(l->name, &l->path, ofs, &sb->s_stack_depth); 1188 if (err) 1189 return ERR_PTR(err); 1190 } 1191 1192 err = -EINVAL; 1193 sb->s_stack_depth++; 1194 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1195 pr_err("maximum fs stacking depth exceeded\n"); 1196 return ERR_PTR(err); 1197 } 1198 1199 err = ovl_get_layers(sb, ofs, ctx, layers); 1200 if (err) 1201 return ERR_PTR(err); 1202 1203 err = -ENOMEM; 1204 /* Data-only layers are not merged in root directory */ 1205 nr_merged_lower = ctx->nr - ctx->nr_data; 1206 oe = ovl_alloc_entry(nr_merged_lower); 1207 if (!oe) 1208 return ERR_PTR(err); 1209 1210 lowerstack = ovl_lowerstack(oe); 1211 for (i = 0; i < nr_merged_lower; i++) { 1212 l = &ctx->lower[i]; 1213 lowerstack[i].dentry = dget(l->path.dentry); 1214 lowerstack[i].layer = &ofs->layers[i + 1]; 1215 } 1216 ofs->numdatalayer = ctx->nr_data; 1217 1218 return oe; 1219 } 1220 1221 /* 1222 * Check if this layer root is a descendant of: 1223 * - another layer of this overlayfs instance 1224 * - upper/work dir of any overlayfs instance 1225 */ 1226 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1227 struct dentry *dentry, const char *name, 1228 bool is_lower) 1229 { 1230 struct dentry *next = dentry, *parent; 1231 int err = 0; 1232 1233 if (!dentry) 1234 return 0; 1235 1236 parent = dget_parent(next); 1237 1238 /* Walk back ancestors to root (inclusive) looking for traps */ 1239 while (!err && parent != next) { 1240 if (is_lower && ovl_lookup_trap_inode(sb, parent)) { 1241 err = -ELOOP; 1242 pr_err("overlapping %s path\n", name); 1243 } else if (ovl_is_inuse(parent)) { 1244 err = ovl_report_in_use(ofs, name); 1245 } 1246 next = parent; 1247 parent = dget_parent(next); 1248 dput(next); 1249 } 1250 1251 dput(parent); 1252 1253 return err; 1254 } 1255 1256 /* 1257 * Check if any of the layers or work dirs overlap. 1258 */ 1259 static int ovl_check_overlapping_layers(struct super_block *sb, 1260 struct ovl_fs *ofs) 1261 { 1262 int i, err; 1263 1264 if (ovl_upper_mnt(ofs)) { 1265 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, 1266 "upperdir", false); 1267 if (err) 1268 return err; 1269 1270 /* 1271 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1272 * this instance and covers overlapping work and index dirs, 1273 * unless work or index dir have been moved since created inside 1274 * workbasedir. In that case, we already have their traps in 1275 * inode cache and we will catch that case on lookup. 1276 */ 1277 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", 1278 false); 1279 if (err) 1280 return err; 1281 } 1282 1283 for (i = 1; i < ofs->numlayer; i++) { 1284 err = ovl_check_layer(sb, ofs, 1285 ofs->layers[i].mnt->mnt_root, 1286 "lowerdir", true); 1287 if (err) 1288 return err; 1289 } 1290 1291 return 0; 1292 } 1293 1294 static struct dentry *ovl_get_root(struct super_block *sb, 1295 struct dentry *upperdentry, 1296 struct ovl_entry *oe) 1297 { 1298 struct dentry *root; 1299 struct ovl_path *lowerpath = ovl_lowerstack(oe); 1300 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1301 int fsid = lowerpath->layer->fsid; 1302 struct ovl_inode_params oip = { 1303 .upperdentry = upperdentry, 1304 .oe = oe, 1305 }; 1306 1307 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1308 if (!root) 1309 return NULL; 1310 1311 if (upperdentry) { 1312 /* Root inode uses upper st_ino/i_ino */ 1313 ino = d_inode(upperdentry)->i_ino; 1314 fsid = 0; 1315 ovl_dentry_set_upper_alias(root); 1316 if (ovl_is_impuredir(sb, upperdentry)) 1317 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1318 } 1319 1320 /* Root is always merge -> can have whiteouts */ 1321 ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); 1322 ovl_dentry_set_flag(OVL_E_CONNECTED, root); 1323 ovl_set_upperdata(d_inode(root)); 1324 ovl_inode_init(d_inode(root), &oip, ino, fsid); 1325 ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE); 1326 /* root keeps a reference of upperdentry */ 1327 dget(upperdentry); 1328 1329 return root; 1330 } 1331 1332 int ovl_fill_super(struct super_block *sb, struct fs_context *fc) 1333 { 1334 struct ovl_fs *ofs = sb->s_fs_info; 1335 struct ovl_fs_context *ctx = fc->fs_private; 1336 struct dentry *root_dentry; 1337 struct ovl_entry *oe; 1338 struct ovl_layer *layers; 1339 struct cred *cred; 1340 int err; 1341 1342 err = -EIO; 1343 if (WARN_ON(fc->user_ns != current_user_ns())) 1344 goto out_err; 1345 1346 sb->s_d_op = &ovl_dentry_operations; 1347 1348 err = -ENOMEM; 1349 ofs->creator_cred = cred = prepare_creds(); 1350 if (!cred) 1351 goto out_err; 1352 1353 err = ovl_fs_params_verify(ctx, &ofs->config); 1354 if (err) 1355 goto out_err; 1356 1357 err = -EINVAL; 1358 if (ctx->nr == 0) { 1359 if (!(fc->sb_flags & SB_SILENT)) 1360 pr_err("missing 'lowerdir'\n"); 1361 goto out_err; 1362 } 1363 1364 err = -ENOMEM; 1365 layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL); 1366 if (!layers) 1367 goto out_err; 1368 1369 ofs->config.lowerdirs = kcalloc(ctx->nr + 1, sizeof(char *), GFP_KERNEL); 1370 if (!ofs->config.lowerdirs) { 1371 kfree(layers); 1372 goto out_err; 1373 } 1374 ofs->layers = layers; 1375 /* 1376 * Layer 0 is reserved for upper even if there's no upper. 1377 * For consistency, config.lowerdirs[0] is NULL. 1378 */ 1379 ofs->numlayer = 1; 1380 1381 sb->s_stack_depth = 0; 1382 sb->s_maxbytes = MAX_LFS_FILESIZE; 1383 atomic_long_set(&ofs->last_ino, 1); 1384 /* Assume underlying fs uses 32bit inodes unless proven otherwise */ 1385 if (ofs->config.xino != OVL_XINO_OFF) { 1386 ofs->xino_mode = BITS_PER_LONG - 32; 1387 if (!ofs->xino_mode) { 1388 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); 1389 ofs->config.xino = OVL_XINO_OFF; 1390 } 1391 } 1392 1393 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1394 sb->s_op = &ovl_super_operations; 1395 1396 if (ofs->config.upperdir) { 1397 struct super_block *upper_sb; 1398 1399 err = -EINVAL; 1400 if (!ofs->config.workdir) { 1401 pr_err("missing 'workdir'\n"); 1402 goto out_err; 1403 } 1404 1405 err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper); 1406 if (err) 1407 goto out_err; 1408 1409 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 1410 if (!ovl_should_sync(ofs)) { 1411 ofs->errseq = errseq_sample(&upper_sb->s_wb_err); 1412 if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { 1413 err = -EIO; 1414 pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); 1415 goto out_err; 1416 } 1417 } 1418 1419 err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work); 1420 if (err) 1421 goto out_err; 1422 1423 if (!ofs->workdir) 1424 sb->s_flags |= SB_RDONLY; 1425 1426 sb->s_stack_depth = upper_sb->s_stack_depth; 1427 sb->s_time_gran = upper_sb->s_time_gran; 1428 } 1429 oe = ovl_get_lowerstack(sb, ctx, ofs, layers); 1430 err = PTR_ERR(oe); 1431 if (IS_ERR(oe)) 1432 goto out_err; 1433 1434 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1435 if (!ovl_upper_mnt(ofs)) 1436 sb->s_flags |= SB_RDONLY; 1437 1438 if (!ovl_origin_uuid(ofs) && ofs->numfs > 1) { 1439 pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=null.\n"); 1440 ofs->config.uuid = OVL_UUID_NULL; 1441 } else if (ovl_has_fsid(ofs) && ovl_upper_mnt(ofs)) { 1442 /* Use per instance persistent uuid/fsid */ 1443 ovl_init_uuid_xattr(sb, ofs, &ctx->upper); 1444 } 1445 1446 if (!ovl_force_readonly(ofs) && ofs->config.index) { 1447 err = ovl_get_indexdir(sb, ofs, oe, &ctx->upper); 1448 if (err) 1449 goto out_free_oe; 1450 1451 /* Force r/o mount with no index dir */ 1452 if (!ofs->indexdir) 1453 sb->s_flags |= SB_RDONLY; 1454 } 1455 1456 err = ovl_check_overlapping_layers(sb, ofs); 1457 if (err) 1458 goto out_free_oe; 1459 1460 /* Show index=off in /proc/mounts for forced r/o mount */ 1461 if (!ofs->indexdir) { 1462 ofs->config.index = false; 1463 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { 1464 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 1465 ofs->config.nfs_export = false; 1466 } 1467 } 1468 1469 if (ofs->config.metacopy && ofs->config.nfs_export) { 1470 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1471 ofs->config.nfs_export = false; 1472 } 1473 1474 /* 1475 * Support encoding decodable file handles with nfs_export=on 1476 * and encoding non-decodable file handles with nfs_export=off 1477 * if all layers support file handles. 1478 */ 1479 if (ofs->config.nfs_export) 1480 sb->s_export_op = &ovl_export_operations; 1481 else if (!ofs->nofh) 1482 sb->s_export_op = &ovl_export_fid_operations; 1483 1484 /* Never override disk quota limits or use reserved space */ 1485 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1486 1487 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1488 sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : 1489 ovl_trusted_xattr_handlers; 1490 sb->s_fs_info = ofs; 1491 sb->s_flags |= SB_POSIXACL; 1492 sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1493 1494 err = -ENOMEM; 1495 root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); 1496 if (!root_dentry) 1497 goto out_free_oe; 1498 1499 sb->s_root = root_dentry; 1500 1501 return 0; 1502 1503 out_free_oe: 1504 ovl_free_entry(oe); 1505 out_err: 1506 ovl_free_fs(ofs); 1507 sb->s_fs_info = NULL; 1508 return err; 1509 } 1510 1511 struct file_system_type ovl_fs_type = { 1512 .owner = THIS_MODULE, 1513 .name = "overlay", 1514 .init_fs_context = ovl_init_fs_context, 1515 .parameters = ovl_parameter_spec, 1516 .fs_flags = FS_USERNS_MOUNT, 1517 .kill_sb = kill_anon_super, 1518 }; 1519 MODULE_ALIAS_FS("overlay"); 1520 1521 static void ovl_inode_init_once(void *foo) 1522 { 1523 struct ovl_inode *oi = foo; 1524 1525 inode_init_once(&oi->vfs_inode); 1526 } 1527 1528 static int __init ovl_init(void) 1529 { 1530 int err; 1531 1532 ovl_inode_cachep = kmem_cache_create("ovl_inode", 1533 sizeof(struct ovl_inode), 0, 1534 (SLAB_RECLAIM_ACCOUNT| 1535 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 1536 ovl_inode_init_once); 1537 if (ovl_inode_cachep == NULL) 1538 return -ENOMEM; 1539 1540 err = ovl_aio_request_cache_init(); 1541 if (!err) { 1542 err = register_filesystem(&ovl_fs_type); 1543 if (!err) 1544 return 0; 1545 1546 ovl_aio_request_cache_destroy(); 1547 } 1548 kmem_cache_destroy(ovl_inode_cachep); 1549 1550 return err; 1551 } 1552 1553 static void __exit ovl_exit(void) 1554 { 1555 unregister_filesystem(&ovl_fs_type); 1556 1557 /* 1558 * Make sure all delayed rcu free inodes are flushed before we 1559 * destroy cache. 1560 */ 1561 rcu_barrier(); 1562 kmem_cache_destroy(ovl_inode_cachep); 1563 ovl_aio_request_cache_destroy(); 1564 } 1565 1566 module_init(ovl_init); 1567 module_exit(ovl_exit); 1568