1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <uapi/linux/magic.h> 8 #include <linux/fs.h> 9 #include <linux/namei.h> 10 #include <linux/xattr.h> 11 #include <linux/mount.h> 12 #include <linux/parser.h> 13 #include <linux/module.h> 14 #include <linux/statfs.h> 15 #include <linux/seq_file.h> 16 #include <linux/posix_acl_xattr.h> 17 #include <linux/exportfs.h> 18 #include <linux/file.h> 19 #include <linux/fs_context.h> 20 #include <linux/fs_parser.h> 21 #include "overlayfs.h" 22 23 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 24 MODULE_DESCRIPTION("Overlay filesystem"); 25 MODULE_LICENSE("GPL"); 26 27 28 struct ovl_dir_cache; 29 30 static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR); 31 module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644); 32 MODULE_PARM_DESC(redirect_dir, 33 "Default to on or off for the redirect_dir feature"); 34 35 static bool ovl_redirect_always_follow = 36 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW); 37 module_param_named(redirect_always_follow, ovl_redirect_always_follow, 38 bool, 0644); 39 MODULE_PARM_DESC(redirect_always_follow, 40 "Follow redirects even if redirect_dir feature is turned off"); 41 42 static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 43 module_param_named(index, ovl_index_def, bool, 0644); 44 MODULE_PARM_DESC(index, 45 "Default to on or off for the inodes index feature"); 46 47 static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT); 48 module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644); 49 MODULE_PARM_DESC(nfs_export, 50 "Default to on or off for the NFS export feature"); 51 52 static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO); 53 module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644); 54 MODULE_PARM_DESC(xino_auto, 55 "Auto enable xino feature"); 56 57 static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY); 58 module_param_named(metacopy, ovl_metacopy_def, bool, 0644); 59 MODULE_PARM_DESC(metacopy, 60 "Default to on or off for the metadata only copy up feature"); 61 62 static struct dentry *ovl_d_real(struct dentry *dentry, 63 const struct inode *inode) 64 { 65 struct dentry *real = NULL, *lower; 66 67 /* It's an overlay file */ 68 if (inode && d_inode(dentry) == inode) 69 return dentry; 70 71 if (!d_is_reg(dentry)) { 72 if (!inode || inode == d_inode(dentry)) 73 return dentry; 74 goto bug; 75 } 76 77 real = ovl_dentry_upper(dentry); 78 if (real && (inode == d_inode(real))) 79 return real; 80 81 if (real && !inode && ovl_has_upperdata(d_inode(dentry))) 82 return real; 83 84 /* 85 * Best effort lazy lookup of lowerdata for !inode case to return 86 * the real lowerdata dentry. The only current caller of d_real() with 87 * NULL inode is d_real_inode() from trace_uprobe and this caller is 88 * likely going to be followed reading from the file, before placing 89 * uprobes on offset within the file, so lowerdata should be available 90 * when setting the uprobe. 91 */ 92 ovl_maybe_lookup_lowerdata(dentry); 93 lower = ovl_dentry_lowerdata(dentry); 94 if (!lower) 95 goto bug; 96 real = lower; 97 98 /* Handle recursion */ 99 real = d_real(real, inode); 100 101 if (!inode || inode == d_inode(real)) 102 return real; 103 bug: 104 WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n", 105 __func__, dentry, inode ? inode->i_sb->s_id : "NULL", 106 inode ? inode->i_ino : 0, real, 107 real && d_inode(real) ? d_inode(real)->i_ino : 0); 108 return dentry; 109 } 110 111 static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) 112 { 113 int ret = 1; 114 115 if (!d) 116 return 1; 117 118 if (weak) { 119 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) 120 ret = d->d_op->d_weak_revalidate(d, flags); 121 } else if (d->d_flags & DCACHE_OP_REVALIDATE) { 122 ret = d->d_op->d_revalidate(d, flags); 123 if (!ret) { 124 if (!(flags & LOOKUP_RCU)) 125 d_invalidate(d); 126 ret = -ESTALE; 127 } 128 } 129 return ret; 130 } 131 132 static int ovl_dentry_revalidate_common(struct dentry *dentry, 133 unsigned int flags, bool weak) 134 { 135 struct ovl_entry *oe = OVL_E(dentry); 136 struct ovl_path *lowerstack = ovl_lowerstack(oe); 137 struct inode *inode = d_inode_rcu(dentry); 138 struct dentry *upper; 139 unsigned int i; 140 int ret = 1; 141 142 /* Careful in RCU mode */ 143 if (!inode) 144 return -ECHILD; 145 146 upper = ovl_i_dentry_upper(inode); 147 if (upper) 148 ret = ovl_revalidate_real(upper, flags, weak); 149 150 for (i = 0; ret > 0 && i < ovl_numlower(oe); i++) 151 ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak); 152 153 return ret; 154 } 155 156 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 157 { 158 return ovl_dentry_revalidate_common(dentry, flags, false); 159 } 160 161 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 162 { 163 return ovl_dentry_revalidate_common(dentry, flags, true); 164 } 165 166 static const struct dentry_operations ovl_dentry_operations = { 167 .d_real = ovl_d_real, 168 .d_revalidate = ovl_dentry_revalidate, 169 .d_weak_revalidate = ovl_dentry_weak_revalidate, 170 }; 171 172 static struct kmem_cache *ovl_inode_cachep; 173 174 static struct inode *ovl_alloc_inode(struct super_block *sb) 175 { 176 struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL); 177 178 if (!oi) 179 return NULL; 180 181 oi->cache = NULL; 182 oi->redirect = NULL; 183 oi->version = 0; 184 oi->flags = 0; 185 oi->__upperdentry = NULL; 186 oi->lowerdata_redirect = NULL; 187 oi->oe = NULL; 188 mutex_init(&oi->lock); 189 190 return &oi->vfs_inode; 191 } 192 193 static void ovl_free_inode(struct inode *inode) 194 { 195 struct ovl_inode *oi = OVL_I(inode); 196 197 kfree(oi->redirect); 198 mutex_destroy(&oi->lock); 199 kmem_cache_free(ovl_inode_cachep, oi); 200 } 201 202 static void ovl_destroy_inode(struct inode *inode) 203 { 204 struct ovl_inode *oi = OVL_I(inode); 205 206 dput(oi->__upperdentry); 207 ovl_free_entry(oi->oe); 208 if (S_ISDIR(inode->i_mode)) 209 ovl_dir_cache_free(inode); 210 else 211 kfree(oi->lowerdata_redirect); 212 } 213 214 static void ovl_free_fs(struct ovl_fs *ofs) 215 { 216 struct vfsmount **mounts; 217 unsigned i; 218 219 iput(ofs->workbasedir_trap); 220 iput(ofs->indexdir_trap); 221 iput(ofs->workdir_trap); 222 dput(ofs->whiteout); 223 dput(ofs->indexdir); 224 dput(ofs->workdir); 225 if (ofs->workdir_locked) 226 ovl_inuse_unlock(ofs->workbasedir); 227 dput(ofs->workbasedir); 228 if (ofs->upperdir_locked) 229 ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); 230 231 /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ 232 mounts = (struct vfsmount **) ofs->layers; 233 for (i = 0; i < ofs->numlayer; i++) { 234 iput(ofs->layers[i].trap); 235 mounts[i] = ofs->layers[i].mnt; 236 kfree(ofs->layers[i].name); 237 } 238 kern_unmount_array(mounts, ofs->numlayer); 239 kfree(ofs->layers); 240 for (i = 0; i < ofs->numfs; i++) 241 free_anon_bdev(ofs->fs[i].pseudo_dev); 242 kfree(ofs->fs); 243 244 kfree(ofs->config.upperdir); 245 kfree(ofs->config.workdir); 246 if (ofs->creator_cred) 247 put_cred(ofs->creator_cred); 248 kfree(ofs); 249 } 250 251 static void ovl_put_super(struct super_block *sb) 252 { 253 struct ovl_fs *ofs = sb->s_fs_info; 254 255 if (ofs) 256 ovl_free_fs(ofs); 257 } 258 259 /* Sync real dirty inodes in upper filesystem (if it exists) */ 260 static int ovl_sync_fs(struct super_block *sb, int wait) 261 { 262 struct ovl_fs *ofs = sb->s_fs_info; 263 struct super_block *upper_sb; 264 int ret; 265 266 ret = ovl_sync_status(ofs); 267 /* 268 * We have to always set the err, because the return value isn't 269 * checked in syncfs, and instead indirectly return an error via 270 * the sb's writeback errseq, which VFS inspects after this call. 271 */ 272 if (ret < 0) { 273 errseq_set(&sb->s_wb_err, -EIO); 274 return -EIO; 275 } 276 277 if (!ret) 278 return ret; 279 280 /* 281 * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). 282 * All the super blocks will be iterated, including upper_sb. 283 * 284 * If this is a syncfs(2) call, then we do need to call 285 * sync_filesystem() on upper_sb, but enough if we do it when being 286 * called with wait == 1. 287 */ 288 if (!wait) 289 return 0; 290 291 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 292 293 down_read(&upper_sb->s_umount); 294 ret = sync_filesystem(upper_sb); 295 up_read(&upper_sb->s_umount); 296 297 return ret; 298 } 299 300 /** 301 * ovl_statfs 302 * @dentry: The dentry to query 303 * @buf: The struct kstatfs to fill in with stats 304 * 305 * Get the filesystem statistics. As writes always target the upper layer 306 * filesystem pass the statfs to the upper filesystem (if it exists) 307 */ 308 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 309 { 310 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 311 struct dentry *root_dentry = dentry->d_sb->s_root; 312 struct path path; 313 int err; 314 315 ovl_path_real(root_dentry, &path); 316 317 err = vfs_statfs(&path, buf); 318 if (!err) { 319 buf->f_namelen = ofs->namelen; 320 buf->f_type = OVERLAYFS_SUPER_MAGIC; 321 } 322 323 return err; 324 } 325 326 /* Will this overlay be forced to mount/remount ro? */ 327 static bool ovl_force_readonly(struct ovl_fs *ofs) 328 { 329 return (!ovl_upper_mnt(ofs) || !ofs->workdir); 330 } 331 332 static const struct constant_table ovl_parameter_redirect_dir[] = { 333 { "off", OVL_REDIRECT_OFF }, 334 { "follow", OVL_REDIRECT_FOLLOW }, 335 { "nofollow", OVL_REDIRECT_NOFOLLOW }, 336 { "on", OVL_REDIRECT_ON }, 337 {} 338 }; 339 340 static const char *ovl_redirect_mode(struct ovl_config *config) 341 { 342 return ovl_parameter_redirect_dir[config->redirect_mode].name; 343 } 344 345 static int ovl_redirect_mode_def(void) 346 { 347 return ovl_redirect_dir_def ? OVL_REDIRECT_ON : 348 ovl_redirect_always_follow ? OVL_REDIRECT_FOLLOW : 349 OVL_REDIRECT_NOFOLLOW; 350 } 351 352 static const struct constant_table ovl_parameter_xino[] = { 353 { "off", OVL_XINO_OFF }, 354 { "auto", OVL_XINO_AUTO }, 355 { "on", OVL_XINO_ON }, 356 {} 357 }; 358 359 static const char *ovl_xino_mode(struct ovl_config *config) 360 { 361 return ovl_parameter_xino[config->xino].name; 362 } 363 364 static inline int ovl_xino_def(void) 365 { 366 return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF; 367 } 368 369 /** 370 * ovl_show_options 371 * @m: the seq_file handle 372 * @dentry: The dentry to query 373 * 374 * Prints the mount options for a given superblock. 375 * Returns zero; does not fail. 376 */ 377 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 378 { 379 struct super_block *sb = dentry->d_sb; 380 struct ovl_fs *ofs = sb->s_fs_info; 381 size_t nr, nr_merged_lower = ofs->numlayer - ofs->numdatalayer; 382 const struct ovl_layer *data_layers = &ofs->layers[nr_merged_lower]; 383 384 /* ofs->layers[0] is the upper layer */ 385 seq_printf(m, ",lowerdir=%s", ofs->layers[1].name); 386 /* dump regular lower layers */ 387 for (nr = 2; nr < nr_merged_lower; nr++) 388 seq_printf(m, ":%s", ofs->layers[nr].name); 389 /* dump data lower layers */ 390 for (nr = 0; nr < ofs->numdatalayer; nr++) 391 seq_printf(m, "::%s", data_layers[nr].name); 392 if (ofs->config.upperdir) { 393 seq_show_option(m, "upperdir", ofs->config.upperdir); 394 seq_show_option(m, "workdir", ofs->config.workdir); 395 } 396 if (ofs->config.default_permissions) 397 seq_puts(m, ",default_permissions"); 398 if (ofs->config.redirect_mode != ovl_redirect_mode_def()) 399 seq_printf(m, ",redirect_dir=%s", 400 ovl_redirect_mode(&ofs->config)); 401 if (ofs->config.index != ovl_index_def) 402 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off"); 403 if (!ofs->config.uuid) 404 seq_puts(m, ",uuid=off"); 405 if (ofs->config.nfs_export != ovl_nfs_export_def) 406 seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ? 407 "on" : "off"); 408 if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(ofs)) 409 seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config)); 410 if (ofs->config.metacopy != ovl_metacopy_def) 411 seq_printf(m, ",metacopy=%s", 412 ofs->config.metacopy ? "on" : "off"); 413 if (ofs->config.ovl_volatile) 414 seq_puts(m, ",volatile"); 415 if (ofs->config.userxattr) 416 seq_puts(m, ",userxattr"); 417 return 0; 418 } 419 420 static int ovl_reconfigure(struct fs_context *fc) 421 { 422 struct super_block *sb = fc->root->d_sb; 423 struct ovl_fs *ofs = sb->s_fs_info; 424 struct super_block *upper_sb; 425 int ret = 0; 426 427 if (!(fc->sb_flags & SB_RDONLY) && ovl_force_readonly(ofs)) 428 return -EROFS; 429 430 if (fc->sb_flags & SB_RDONLY && !sb_rdonly(sb)) { 431 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 432 if (ovl_should_sync(ofs)) { 433 down_read(&upper_sb->s_umount); 434 ret = sync_filesystem(upper_sb); 435 up_read(&upper_sb->s_umount); 436 } 437 } 438 439 return ret; 440 } 441 442 static const struct super_operations ovl_super_operations = { 443 .alloc_inode = ovl_alloc_inode, 444 .free_inode = ovl_free_inode, 445 .destroy_inode = ovl_destroy_inode, 446 .drop_inode = generic_delete_inode, 447 .put_super = ovl_put_super, 448 .sync_fs = ovl_sync_fs, 449 .statfs = ovl_statfs, 450 .show_options = ovl_show_options, 451 }; 452 453 enum { 454 Opt_lowerdir, 455 Opt_upperdir, 456 Opt_workdir, 457 Opt_default_permissions, 458 Opt_redirect_dir, 459 Opt_index, 460 Opt_uuid, 461 Opt_nfs_export, 462 Opt_userxattr, 463 Opt_xino, 464 Opt_metacopy, 465 Opt_volatile, 466 }; 467 468 static const struct constant_table ovl_parameter_bool[] = { 469 { "on", true }, 470 { "off", false }, 471 {} 472 }; 473 474 #define fsparam_string_empty(NAME, OPT) \ 475 __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) 476 477 static const struct fs_parameter_spec ovl_parameter_spec[] = { 478 fsparam_string_empty("lowerdir", Opt_lowerdir), 479 fsparam_string("upperdir", Opt_upperdir), 480 fsparam_string("workdir", Opt_workdir), 481 fsparam_flag("default_permissions", Opt_default_permissions), 482 fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir), 483 fsparam_enum("index", Opt_index, ovl_parameter_bool), 484 fsparam_enum("uuid", Opt_uuid, ovl_parameter_bool), 485 fsparam_enum("nfs_export", Opt_nfs_export, ovl_parameter_bool), 486 fsparam_flag("userxattr", Opt_userxattr), 487 fsparam_enum("xino", Opt_xino, ovl_parameter_xino), 488 fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool), 489 fsparam_flag("volatile", Opt_volatile), 490 {} 491 }; 492 493 static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param) 494 { 495 int err = 0; 496 struct fs_parse_result result; 497 struct ovl_fs *ofs = fc->s_fs_info; 498 struct ovl_config *config = &ofs->config; 499 struct ovl_fs_context *ctx = fc->fs_private; 500 int opt; 501 502 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 503 /* 504 * On remount overlayfs has always ignored all mount 505 * options no matter if malformed or not so for 506 * backwards compatibility we do the same here. 507 */ 508 if (fc->oldapi) 509 return 0; 510 511 /* 512 * Give us the freedom to allow changing mount options 513 * with the new mount api in the future. So instead of 514 * silently ignoring everything we report a proper 515 * error. This is only visible for users of the new 516 * mount api. 517 */ 518 return invalfc(fc, "No changes allowed in reconfigure"); 519 } 520 521 opt = fs_parse(fc, ovl_parameter_spec, param, &result); 522 if (opt < 0) 523 return opt; 524 525 switch (opt) { 526 case Opt_lowerdir: 527 err = ovl_parse_param_lowerdir(param->string, fc); 528 break; 529 case Opt_upperdir: 530 fallthrough; 531 case Opt_workdir: 532 err = ovl_parse_param_upperdir(param->string, fc, 533 (Opt_workdir == opt)); 534 break; 535 case Opt_default_permissions: 536 config->default_permissions = true; 537 break; 538 case Opt_redirect_dir: 539 config->redirect_mode = result.uint_32; 540 if (config->redirect_mode == OVL_REDIRECT_OFF) { 541 config->redirect_mode = ovl_redirect_always_follow ? 542 OVL_REDIRECT_FOLLOW : 543 OVL_REDIRECT_NOFOLLOW; 544 } 545 ctx->set.redirect = true; 546 break; 547 case Opt_index: 548 config->index = result.uint_32; 549 ctx->set.index = true; 550 break; 551 case Opt_uuid: 552 config->uuid = result.uint_32; 553 break; 554 case Opt_nfs_export: 555 config->nfs_export = result.uint_32; 556 ctx->set.nfs_export = true; 557 break; 558 case Opt_xino: 559 config->xino = result.uint_32; 560 break; 561 case Opt_metacopy: 562 config->metacopy = result.uint_32; 563 ctx->set.metacopy = true; 564 break; 565 case Opt_volatile: 566 config->ovl_volatile = true; 567 break; 568 case Opt_userxattr: 569 config->userxattr = true; 570 break; 571 default: 572 pr_err("unrecognized mount option \"%s\" or missing value\n", 573 param->key); 574 return -EINVAL; 575 } 576 577 return err; 578 } 579 580 static int ovl_fs_params_verify(const struct ovl_fs_context *ctx, 581 struct ovl_config *config) 582 { 583 struct ovl_opt_set set = ctx->set; 584 585 if (ctx->nr_data > 0 && !config->metacopy) { 586 pr_err("lower data-only dirs require metacopy support.\n"); 587 return -EINVAL; 588 } 589 590 /* Workdir/index are useless in non-upper mount */ 591 if (!config->upperdir) { 592 if (config->workdir) { 593 pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 594 config->workdir); 595 kfree(config->workdir); 596 config->workdir = NULL; 597 } 598 if (config->index && set.index) { 599 pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n"); 600 set.index = false; 601 } 602 config->index = false; 603 } 604 605 if (!config->upperdir && config->ovl_volatile) { 606 pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n"); 607 config->ovl_volatile = false; 608 } 609 610 /* 611 * This is to make the logic below simpler. It doesn't make any other 612 * difference, since redirect_dir=on is only used for upper. 613 */ 614 if (!config->upperdir && config->redirect_mode == OVL_REDIRECT_FOLLOW) 615 config->redirect_mode = OVL_REDIRECT_ON; 616 617 /* Resolve metacopy -> redirect_dir dependency */ 618 if (config->metacopy && config->redirect_mode != OVL_REDIRECT_ON) { 619 if (set.metacopy && set.redirect) { 620 pr_err("conflicting options: metacopy=on,redirect_dir=%s\n", 621 ovl_redirect_mode(config)); 622 return -EINVAL; 623 } 624 if (set.redirect) { 625 /* 626 * There was an explicit redirect_dir=... that resulted 627 * in this conflict. 628 */ 629 pr_info("disabling metacopy due to redirect_dir=%s\n", 630 ovl_redirect_mode(config)); 631 config->metacopy = false; 632 } else { 633 /* Automatically enable redirect otherwise. */ 634 config->redirect_mode = OVL_REDIRECT_ON; 635 } 636 } 637 638 /* Resolve nfs_export -> index dependency */ 639 if (config->nfs_export && !config->index) { 640 if (!config->upperdir && 641 config->redirect_mode != OVL_REDIRECT_NOFOLLOW) { 642 pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n"); 643 config->nfs_export = false; 644 } else if (set.nfs_export && set.index) { 645 pr_err("conflicting options: nfs_export=on,index=off\n"); 646 return -EINVAL; 647 } else if (set.index) { 648 /* 649 * There was an explicit index=off that resulted 650 * in this conflict. 651 */ 652 pr_info("disabling nfs_export due to index=off\n"); 653 config->nfs_export = false; 654 } else { 655 /* Automatically enable index otherwise. */ 656 config->index = true; 657 } 658 } 659 660 /* Resolve nfs_export -> !metacopy dependency */ 661 if (config->nfs_export && config->metacopy) { 662 if (set.nfs_export && set.metacopy) { 663 pr_err("conflicting options: nfs_export=on,metacopy=on\n"); 664 return -EINVAL; 665 } 666 if (set.metacopy) { 667 /* 668 * There was an explicit metacopy=on that resulted 669 * in this conflict. 670 */ 671 pr_info("disabling nfs_export due to metacopy=on\n"); 672 config->nfs_export = false; 673 } else { 674 /* 675 * There was an explicit nfs_export=on that resulted 676 * in this conflict. 677 */ 678 pr_info("disabling metacopy due to nfs_export=on\n"); 679 config->metacopy = false; 680 } 681 } 682 683 684 /* Resolve userxattr -> !redirect && !metacopy dependency */ 685 if (config->userxattr) { 686 if (set.redirect && 687 config->redirect_mode != OVL_REDIRECT_NOFOLLOW) { 688 pr_err("conflicting options: userxattr,redirect_dir=%s\n", 689 ovl_redirect_mode(config)); 690 return -EINVAL; 691 } 692 if (config->metacopy && set.metacopy) { 693 pr_err("conflicting options: userxattr,metacopy=on\n"); 694 return -EINVAL; 695 } 696 /* 697 * Silently disable default setting of redirect and metacopy. 698 * This shall be the default in the future as well: these 699 * options must be explicitly enabled if used together with 700 * userxattr. 701 */ 702 config->redirect_mode = OVL_REDIRECT_NOFOLLOW; 703 config->metacopy = false; 704 } 705 706 return 0; 707 } 708 709 #define OVL_WORKDIR_NAME "work" 710 #define OVL_INDEXDIR_NAME "index" 711 712 static struct dentry *ovl_workdir_create(struct ovl_fs *ofs, 713 const char *name, bool persist) 714 { 715 struct inode *dir = ofs->workbasedir->d_inode; 716 struct vfsmount *mnt = ovl_upper_mnt(ofs); 717 struct dentry *work; 718 int err; 719 bool retried = false; 720 721 inode_lock_nested(dir, I_MUTEX_PARENT); 722 retry: 723 work = ovl_lookup_upper(ofs, name, ofs->workbasedir, strlen(name)); 724 725 if (!IS_ERR(work)) { 726 struct iattr attr = { 727 .ia_valid = ATTR_MODE, 728 .ia_mode = S_IFDIR | 0, 729 }; 730 731 if (work->d_inode) { 732 err = -EEXIST; 733 if (retried) 734 goto out_dput; 735 736 if (persist) 737 goto out_unlock; 738 739 retried = true; 740 err = ovl_workdir_cleanup(ofs, dir, mnt, work, 0); 741 dput(work); 742 if (err == -EINVAL) { 743 work = ERR_PTR(err); 744 goto out_unlock; 745 } 746 goto retry; 747 } 748 749 err = ovl_mkdir_real(ofs, dir, &work, attr.ia_mode); 750 if (err) 751 goto out_dput; 752 753 /* Weird filesystem returning with hashed negative (kernfs)? */ 754 err = -EINVAL; 755 if (d_really_is_negative(work)) 756 goto out_dput; 757 758 /* 759 * Try to remove POSIX ACL xattrs from workdir. We are good if: 760 * 761 * a) success (there was a POSIX ACL xattr and was removed) 762 * b) -ENODATA (there was no POSIX ACL xattr) 763 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 764 * 765 * There are various other error values that could effectively 766 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 767 * if the xattr name is too long), but the set of filesystems 768 * allowed as upper are limited to "normal" ones, where checking 769 * for the above two errors is sufficient. 770 */ 771 err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_DEFAULT); 772 if (err && err != -ENODATA && err != -EOPNOTSUPP) 773 goto out_dput; 774 775 err = ovl_do_remove_acl(ofs, work, XATTR_NAME_POSIX_ACL_ACCESS); 776 if (err && err != -ENODATA && err != -EOPNOTSUPP) 777 goto out_dput; 778 779 /* Clear any inherited mode bits */ 780 inode_lock(work->d_inode); 781 err = ovl_do_notify_change(ofs, work, &attr); 782 inode_unlock(work->d_inode); 783 if (err) 784 goto out_dput; 785 } else { 786 err = PTR_ERR(work); 787 goto out_err; 788 } 789 out_unlock: 790 inode_unlock(dir); 791 return work; 792 793 out_dput: 794 dput(work); 795 out_err: 796 pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n", 797 ofs->config.workdir, name, -err); 798 work = NULL; 799 goto out_unlock; 800 } 801 802 static int ovl_check_namelen(const struct path *path, struct ovl_fs *ofs, 803 const char *name) 804 { 805 struct kstatfs statfs; 806 int err = vfs_statfs(path, &statfs); 807 808 if (err) 809 pr_err("statfs failed on '%s'\n", name); 810 else 811 ofs->namelen = max(ofs->namelen, statfs.f_namelen); 812 813 return err; 814 } 815 816 static int ovl_lower_dir(const char *name, struct path *path, 817 struct ovl_fs *ofs, int *stack_depth) 818 { 819 int fh_type; 820 int err; 821 822 err = ovl_check_namelen(path, ofs, name); 823 if (err) 824 return err; 825 826 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 827 828 /* 829 * The inodes index feature and NFS export need to encode and decode 830 * file handles, so they require that all layers support them. 831 */ 832 fh_type = ovl_can_decode_fh(path->dentry->d_sb); 833 if ((ofs->config.nfs_export || 834 (ofs->config.index && ofs->config.upperdir)) && !fh_type) { 835 ofs->config.index = false; 836 ofs->config.nfs_export = false; 837 pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n", 838 name); 839 } 840 /* 841 * Decoding origin file handle is required for persistent st_ino. 842 * Without persistent st_ino, xino=auto falls back to xino=off. 843 */ 844 if (ofs->config.xino == OVL_XINO_AUTO && 845 ofs->config.upperdir && !fh_type) { 846 ofs->config.xino = OVL_XINO_OFF; 847 pr_warn("fs on '%s' does not support file handles, falling back to xino=off.\n", 848 name); 849 } 850 851 /* Check if lower fs has 32bit inode numbers */ 852 if (fh_type != FILEID_INO32_GEN) 853 ofs->xino_mode = -1; 854 855 return 0; 856 } 857 858 /* Workdir should not be subdir of upperdir and vice versa */ 859 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 860 { 861 bool ok = false; 862 863 if (workdir != upperdir) { 864 ok = (lock_rename(workdir, upperdir) == NULL); 865 unlock_rename(workdir, upperdir); 866 } 867 return ok; 868 } 869 870 static int ovl_own_xattr_get(const struct xattr_handler *handler, 871 struct dentry *dentry, struct inode *inode, 872 const char *name, void *buffer, size_t size) 873 { 874 return -EOPNOTSUPP; 875 } 876 877 static int ovl_own_xattr_set(const struct xattr_handler *handler, 878 struct mnt_idmap *idmap, 879 struct dentry *dentry, struct inode *inode, 880 const char *name, const void *value, 881 size_t size, int flags) 882 { 883 return -EOPNOTSUPP; 884 } 885 886 static int ovl_other_xattr_get(const struct xattr_handler *handler, 887 struct dentry *dentry, struct inode *inode, 888 const char *name, void *buffer, size_t size) 889 { 890 return ovl_xattr_get(dentry, inode, name, buffer, size); 891 } 892 893 static int ovl_other_xattr_set(const struct xattr_handler *handler, 894 struct mnt_idmap *idmap, 895 struct dentry *dentry, struct inode *inode, 896 const char *name, const void *value, 897 size_t size, int flags) 898 { 899 return ovl_xattr_set(dentry, inode, name, value, size, flags); 900 } 901 902 static const struct xattr_handler ovl_own_trusted_xattr_handler = { 903 .prefix = OVL_XATTR_TRUSTED_PREFIX, 904 .get = ovl_own_xattr_get, 905 .set = ovl_own_xattr_set, 906 }; 907 908 static const struct xattr_handler ovl_own_user_xattr_handler = { 909 .prefix = OVL_XATTR_USER_PREFIX, 910 .get = ovl_own_xattr_get, 911 .set = ovl_own_xattr_set, 912 }; 913 914 static const struct xattr_handler ovl_other_xattr_handler = { 915 .prefix = "", /* catch all */ 916 .get = ovl_other_xattr_get, 917 .set = ovl_other_xattr_set, 918 }; 919 920 static const struct xattr_handler *ovl_trusted_xattr_handlers[] = { 921 &ovl_own_trusted_xattr_handler, 922 &ovl_other_xattr_handler, 923 NULL 924 }; 925 926 static const struct xattr_handler *ovl_user_xattr_handlers[] = { 927 &ovl_own_user_xattr_handler, 928 &ovl_other_xattr_handler, 929 NULL 930 }; 931 932 static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, 933 struct inode **ptrap, const char *name) 934 { 935 struct inode *trap; 936 int err; 937 938 trap = ovl_get_trap_inode(sb, dir); 939 err = PTR_ERR_OR_ZERO(trap); 940 if (err) { 941 if (err == -ELOOP) 942 pr_err("conflicting %s path\n", name); 943 return err; 944 } 945 946 *ptrap = trap; 947 return 0; 948 } 949 950 /* 951 * Determine how we treat concurrent use of upperdir/workdir based on the 952 * index feature. This is papering over mount leaks of container runtimes, 953 * for example, an old overlay mount is leaked and now its upperdir is 954 * attempted to be used as a lower layer in a new overlay mount. 955 */ 956 static int ovl_report_in_use(struct ovl_fs *ofs, const char *name) 957 { 958 if (ofs->config.index) { 959 pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n", 960 name); 961 return -EBUSY; 962 } else { 963 pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n", 964 name); 965 return 0; 966 } 967 } 968 969 static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs, 970 struct ovl_layer *upper_layer, 971 const struct path *upperpath) 972 { 973 struct vfsmount *upper_mnt; 974 int err; 975 976 /* Upperdir path should not be r/o */ 977 if (__mnt_is_readonly(upperpath->mnt)) { 978 pr_err("upper fs is r/o, try multi-lower layers mount\n"); 979 err = -EINVAL; 980 goto out; 981 } 982 983 err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir); 984 if (err) 985 goto out; 986 987 err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap, 988 "upperdir"); 989 if (err) 990 goto out; 991 992 upper_mnt = clone_private_mount(upperpath); 993 err = PTR_ERR(upper_mnt); 994 if (IS_ERR(upper_mnt)) { 995 pr_err("failed to clone upperpath\n"); 996 goto out; 997 } 998 999 /* Don't inherit atime flags */ 1000 upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1001 upper_layer->mnt = upper_mnt; 1002 upper_layer->idx = 0; 1003 upper_layer->fsid = 0; 1004 1005 err = -ENOMEM; 1006 upper_layer->name = kstrdup(ofs->config.upperdir, GFP_KERNEL); 1007 if (!upper_layer->name) 1008 goto out; 1009 1010 /* 1011 * Inherit SB_NOSEC flag from upperdir. 1012 * 1013 * This optimization changes behavior when a security related attribute 1014 * (suid/sgid/security.*) is changed on an underlying layer. This is 1015 * okay because we don't yet have guarantees in that case, but it will 1016 * need careful treatment once we want to honour changes to underlying 1017 * filesystems. 1018 */ 1019 if (upper_mnt->mnt_sb->s_flags & SB_NOSEC) 1020 sb->s_flags |= SB_NOSEC; 1021 1022 if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) { 1023 ofs->upperdir_locked = true; 1024 } else { 1025 err = ovl_report_in_use(ofs, "upperdir"); 1026 if (err) 1027 goto out; 1028 } 1029 1030 err = 0; 1031 out: 1032 return err; 1033 } 1034 1035 /* 1036 * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and 1037 * negative values if error is encountered. 1038 */ 1039 static int ovl_check_rename_whiteout(struct ovl_fs *ofs) 1040 { 1041 struct dentry *workdir = ofs->workdir; 1042 struct inode *dir = d_inode(workdir); 1043 struct dentry *temp; 1044 struct dentry *dest; 1045 struct dentry *whiteout; 1046 struct name_snapshot name; 1047 int err; 1048 1049 inode_lock_nested(dir, I_MUTEX_PARENT); 1050 1051 temp = ovl_create_temp(ofs, workdir, OVL_CATTR(S_IFREG | 0)); 1052 err = PTR_ERR(temp); 1053 if (IS_ERR(temp)) 1054 goto out_unlock; 1055 1056 dest = ovl_lookup_temp(ofs, workdir); 1057 err = PTR_ERR(dest); 1058 if (IS_ERR(dest)) { 1059 dput(temp); 1060 goto out_unlock; 1061 } 1062 1063 /* Name is inline and stable - using snapshot as a copy helper */ 1064 take_dentry_name_snapshot(&name, temp); 1065 err = ovl_do_rename(ofs, dir, temp, dir, dest, RENAME_WHITEOUT); 1066 if (err) { 1067 if (err == -EINVAL) 1068 err = 0; 1069 goto cleanup_temp; 1070 } 1071 1072 whiteout = ovl_lookup_upper(ofs, name.name.name, workdir, name.name.len); 1073 err = PTR_ERR(whiteout); 1074 if (IS_ERR(whiteout)) 1075 goto cleanup_temp; 1076 1077 err = ovl_is_whiteout(whiteout); 1078 1079 /* Best effort cleanup of whiteout and temp file */ 1080 if (err) 1081 ovl_cleanup(ofs, dir, whiteout); 1082 dput(whiteout); 1083 1084 cleanup_temp: 1085 ovl_cleanup(ofs, dir, temp); 1086 release_dentry_name_snapshot(&name); 1087 dput(temp); 1088 dput(dest); 1089 1090 out_unlock: 1091 inode_unlock(dir); 1092 1093 return err; 1094 } 1095 1096 static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, 1097 struct dentry *parent, 1098 const char *name, umode_t mode) 1099 { 1100 size_t len = strlen(name); 1101 struct dentry *child; 1102 1103 inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); 1104 child = ovl_lookup_upper(ofs, name, parent, len); 1105 if (!IS_ERR(child) && !child->d_inode) 1106 child = ovl_create_real(ofs, parent->d_inode, child, 1107 OVL_CATTR(mode)); 1108 inode_unlock(parent->d_inode); 1109 dput(parent); 1110 1111 return child; 1112 } 1113 1114 /* 1115 * Creates $workdir/work/incompat/volatile/dirty file if it is not already 1116 * present. 1117 */ 1118 static int ovl_create_volatile_dirty(struct ovl_fs *ofs) 1119 { 1120 unsigned int ctr; 1121 struct dentry *d = dget(ofs->workbasedir); 1122 static const char *const volatile_path[] = { 1123 OVL_WORKDIR_NAME, "incompat", "volatile", "dirty" 1124 }; 1125 const char *const *name = volatile_path; 1126 1127 for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) { 1128 d = ovl_lookup_or_create(ofs, d, *name, ctr > 1 ? S_IFDIR : S_IFREG); 1129 if (IS_ERR(d)) 1130 return PTR_ERR(d); 1131 } 1132 dput(d); 1133 return 0; 1134 } 1135 1136 static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs, 1137 const struct path *workpath) 1138 { 1139 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1140 struct dentry *workdir; 1141 struct file *tmpfile; 1142 bool rename_whiteout; 1143 bool d_type; 1144 int fh_type; 1145 int err; 1146 1147 err = mnt_want_write(mnt); 1148 if (err) 1149 return err; 1150 1151 workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false); 1152 err = PTR_ERR(workdir); 1153 if (IS_ERR_OR_NULL(workdir)) 1154 goto out; 1155 1156 ofs->workdir = workdir; 1157 1158 err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); 1159 if (err) 1160 goto out; 1161 1162 /* 1163 * Upper should support d_type, else whiteouts are visible. Given 1164 * workdir and upper are on same fs, we can do iterate_dir() on 1165 * workdir. This check requires successful creation of workdir in 1166 * previous step. 1167 */ 1168 err = ovl_check_d_type_supported(workpath); 1169 if (err < 0) 1170 goto out; 1171 1172 d_type = err; 1173 if (!d_type) 1174 pr_warn("upper fs needs to support d_type.\n"); 1175 1176 /* Check if upper/work fs supports O_TMPFILE */ 1177 tmpfile = ovl_do_tmpfile(ofs, ofs->workdir, S_IFREG | 0); 1178 ofs->tmpfile = !IS_ERR(tmpfile); 1179 if (ofs->tmpfile) 1180 fput(tmpfile); 1181 else 1182 pr_warn("upper fs does not support tmpfile.\n"); 1183 1184 1185 /* Check if upper/work fs supports RENAME_WHITEOUT */ 1186 err = ovl_check_rename_whiteout(ofs); 1187 if (err < 0) 1188 goto out; 1189 1190 rename_whiteout = err; 1191 if (!rename_whiteout) 1192 pr_warn("upper fs does not support RENAME_WHITEOUT.\n"); 1193 1194 /* 1195 * Check if upper/work fs supports (trusted|user).overlay.* xattr 1196 */ 1197 err = ovl_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1); 1198 if (err) { 1199 pr_warn("failed to set xattr on upper\n"); 1200 ofs->noxattr = true; 1201 if (ovl_redirect_follow(ofs)) { 1202 ofs->config.redirect_mode = OVL_REDIRECT_NOFOLLOW; 1203 pr_warn("...falling back to redirect_dir=nofollow.\n"); 1204 } 1205 if (ofs->config.metacopy) { 1206 ofs->config.metacopy = false; 1207 pr_warn("...falling back to metacopy=off.\n"); 1208 } 1209 if (ofs->config.index) { 1210 ofs->config.index = false; 1211 pr_warn("...falling back to index=off.\n"); 1212 } 1213 /* 1214 * xattr support is required for persistent st_ino. 1215 * Without persistent st_ino, xino=auto falls back to xino=off. 1216 */ 1217 if (ofs->config.xino == OVL_XINO_AUTO) { 1218 ofs->config.xino = OVL_XINO_OFF; 1219 pr_warn("...falling back to xino=off.\n"); 1220 } 1221 if (err == -EPERM && !ofs->config.userxattr) 1222 pr_info("try mounting with 'userxattr' option\n"); 1223 err = 0; 1224 } else { 1225 ovl_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE); 1226 } 1227 1228 /* 1229 * We allowed sub-optimal upper fs configuration and don't want to break 1230 * users over kernel upgrade, but we never allowed remote upper fs, so 1231 * we can enforce strict requirements for remote upper fs. 1232 */ 1233 if (ovl_dentry_remote(ofs->workdir) && 1234 (!d_type || !rename_whiteout || ofs->noxattr)) { 1235 pr_err("upper fs missing required features.\n"); 1236 err = -EINVAL; 1237 goto out; 1238 } 1239 1240 /* 1241 * For volatile mount, create a incompat/volatile/dirty file to keep 1242 * track of it. 1243 */ 1244 if (ofs->config.ovl_volatile) { 1245 err = ovl_create_volatile_dirty(ofs); 1246 if (err < 0) { 1247 pr_err("Failed to create volatile/dirty file.\n"); 1248 goto out; 1249 } 1250 } 1251 1252 /* Check if upper/work fs supports file handles */ 1253 fh_type = ovl_can_decode_fh(ofs->workdir->d_sb); 1254 if (ofs->config.index && !fh_type) { 1255 ofs->config.index = false; 1256 pr_warn("upper fs does not support file handles, falling back to index=off.\n"); 1257 } 1258 1259 /* Check if upper fs has 32bit inode numbers */ 1260 if (fh_type != FILEID_INO32_GEN) 1261 ofs->xino_mode = -1; 1262 1263 /* NFS export of r/w mount depends on index */ 1264 if (ofs->config.nfs_export && !ofs->config.index) { 1265 pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n"); 1266 ofs->config.nfs_export = false; 1267 } 1268 out: 1269 mnt_drop_write(mnt); 1270 return err; 1271 } 1272 1273 static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs, 1274 const struct path *upperpath, 1275 const struct path *workpath) 1276 { 1277 int err; 1278 1279 err = -EINVAL; 1280 if (upperpath->mnt != workpath->mnt) { 1281 pr_err("workdir and upperdir must reside under the same mount\n"); 1282 return err; 1283 } 1284 if (!ovl_workdir_ok(workpath->dentry, upperpath->dentry)) { 1285 pr_err("workdir and upperdir must be separate subtrees\n"); 1286 return err; 1287 } 1288 1289 ofs->workbasedir = dget(workpath->dentry); 1290 1291 if (ovl_inuse_trylock(ofs->workbasedir)) { 1292 ofs->workdir_locked = true; 1293 } else { 1294 err = ovl_report_in_use(ofs, "workdir"); 1295 if (err) 1296 return err; 1297 } 1298 1299 err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap, 1300 "workdir"); 1301 if (err) 1302 return err; 1303 1304 return ovl_make_workdir(sb, ofs, workpath); 1305 } 1306 1307 static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, 1308 struct ovl_entry *oe, const struct path *upperpath) 1309 { 1310 struct vfsmount *mnt = ovl_upper_mnt(ofs); 1311 struct dentry *indexdir; 1312 int err; 1313 1314 err = mnt_want_write(mnt); 1315 if (err) 1316 return err; 1317 1318 /* Verify lower root is upper root origin */ 1319 err = ovl_verify_origin(ofs, upperpath->dentry, 1320 ovl_lowerstack(oe)->dentry, true); 1321 if (err) { 1322 pr_err("failed to verify upper root origin\n"); 1323 goto out; 1324 } 1325 1326 /* index dir will act also as workdir */ 1327 iput(ofs->workdir_trap); 1328 ofs->workdir_trap = NULL; 1329 dput(ofs->workdir); 1330 ofs->workdir = NULL; 1331 indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true); 1332 if (IS_ERR(indexdir)) { 1333 err = PTR_ERR(indexdir); 1334 } else if (indexdir) { 1335 ofs->indexdir = indexdir; 1336 ofs->workdir = dget(indexdir); 1337 1338 err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap, 1339 "indexdir"); 1340 if (err) 1341 goto out; 1342 1343 /* 1344 * Verify upper root is exclusively associated with index dir. 1345 * Older kernels stored upper fh in ".overlay.origin" 1346 * xattr. If that xattr exists, verify that it is a match to 1347 * upper dir file handle. In any case, verify or set xattr 1348 * ".overlay.upper" to indicate that index may have 1349 * directory entries. 1350 */ 1351 if (ovl_check_origin_xattr(ofs, ofs->indexdir)) { 1352 err = ovl_verify_set_fh(ofs, ofs->indexdir, 1353 OVL_XATTR_ORIGIN, 1354 upperpath->dentry, true, false); 1355 if (err) 1356 pr_err("failed to verify index dir 'origin' xattr\n"); 1357 } 1358 err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry, 1359 true); 1360 if (err) 1361 pr_err("failed to verify index dir 'upper' xattr\n"); 1362 1363 /* Cleanup bad/stale/orphan index entries */ 1364 if (!err) 1365 err = ovl_indexdir_cleanup(ofs); 1366 } 1367 if (err || !ofs->indexdir) 1368 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n"); 1369 1370 out: 1371 mnt_drop_write(mnt); 1372 return err; 1373 } 1374 1375 static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) 1376 { 1377 unsigned int i; 1378 1379 if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs)) 1380 return true; 1381 1382 /* 1383 * We allow using single lower with null uuid for index and nfs_export 1384 * for example to support those features with single lower squashfs. 1385 * To avoid regressions in setups of overlay with re-formatted lower 1386 * squashfs, do not allow decoding origin with lower null uuid unless 1387 * user opted-in to one of the new features that require following the 1388 * lower inode of non-dir upper. 1389 */ 1390 if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) 1391 return false; 1392 1393 for (i = 0; i < ofs->numfs; i++) { 1394 /* 1395 * We use uuid to associate an overlay lower file handle with a 1396 * lower layer, so we can accept lower fs with null uuid as long 1397 * as all lower layers with null uuid are on the same fs. 1398 * if we detect multiple lower fs with the same uuid, we 1399 * disable lower file handle decoding on all of them. 1400 */ 1401 if (ofs->fs[i].is_lower && 1402 uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) { 1403 ofs->fs[i].bad_uuid = true; 1404 return false; 1405 } 1406 } 1407 return true; 1408 } 1409 1410 /* Get a unique fsid for the layer */ 1411 static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path) 1412 { 1413 struct super_block *sb = path->mnt->mnt_sb; 1414 unsigned int i; 1415 dev_t dev; 1416 int err; 1417 bool bad_uuid = false; 1418 bool warn = false; 1419 1420 for (i = 0; i < ofs->numfs; i++) { 1421 if (ofs->fs[i].sb == sb) 1422 return i; 1423 } 1424 1425 if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) { 1426 bad_uuid = true; 1427 if (ofs->config.xino == OVL_XINO_AUTO) { 1428 ofs->config.xino = OVL_XINO_OFF; 1429 warn = true; 1430 } 1431 if (ofs->config.index || ofs->config.nfs_export) { 1432 ofs->config.index = false; 1433 ofs->config.nfs_export = false; 1434 warn = true; 1435 } 1436 if (warn) { 1437 pr_warn("%s uuid detected in lower fs '%pd2', falling back to xino=%s,index=off,nfs_export=off.\n", 1438 uuid_is_null(&sb->s_uuid) ? "null" : 1439 "conflicting", 1440 path->dentry, ovl_xino_mode(&ofs->config)); 1441 } 1442 } 1443 1444 err = get_anon_bdev(&dev); 1445 if (err) { 1446 pr_err("failed to get anonymous bdev for lowerpath\n"); 1447 return err; 1448 } 1449 1450 ofs->fs[ofs->numfs].sb = sb; 1451 ofs->fs[ofs->numfs].pseudo_dev = dev; 1452 ofs->fs[ofs->numfs].bad_uuid = bad_uuid; 1453 1454 return ofs->numfs++; 1455 } 1456 1457 /* 1458 * The fsid after the last lower fsid is used for the data layers. 1459 * It is a "null fs" with a null sb, null uuid, and no pseudo dev. 1460 */ 1461 static int ovl_get_data_fsid(struct ovl_fs *ofs) 1462 { 1463 return ofs->numfs; 1464 } 1465 1466 1467 static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, 1468 struct ovl_fs_context *ctx, struct ovl_layer *layers) 1469 { 1470 int err; 1471 unsigned int i; 1472 size_t nr_merged_lower; 1473 1474 ofs->fs = kcalloc(ctx->nr + 2, sizeof(struct ovl_sb), GFP_KERNEL); 1475 if (ofs->fs == NULL) 1476 return -ENOMEM; 1477 1478 /* 1479 * idx/fsid 0 are reserved for upper fs even with lower only overlay 1480 * and the last fsid is reserved for "null fs" of the data layers. 1481 */ 1482 ofs->numfs++; 1483 1484 /* 1485 * All lower layers that share the same fs as upper layer, use the same 1486 * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower 1487 * only overlay to simplify ovl_fs_free(). 1488 * is_lower will be set if upper fs is shared with a lower layer. 1489 */ 1490 err = get_anon_bdev(&ofs->fs[0].pseudo_dev); 1491 if (err) { 1492 pr_err("failed to get anonymous bdev for upper fs\n"); 1493 return err; 1494 } 1495 1496 if (ovl_upper_mnt(ofs)) { 1497 ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb; 1498 ofs->fs[0].is_lower = false; 1499 } 1500 1501 nr_merged_lower = ctx->nr - ctx->nr_data; 1502 for (i = 0; i < ctx->nr; i++) { 1503 struct ovl_fs_context_layer *l = &ctx->lower[i]; 1504 struct vfsmount *mnt; 1505 struct inode *trap; 1506 int fsid; 1507 1508 if (i < nr_merged_lower) 1509 fsid = ovl_get_fsid(ofs, &l->path); 1510 else 1511 fsid = ovl_get_data_fsid(ofs); 1512 if (fsid < 0) 1513 return fsid; 1514 1515 /* 1516 * Check if lower root conflicts with this overlay layers before 1517 * checking if it is in-use as upperdir/workdir of "another" 1518 * mount, because we do not bother to check in ovl_is_inuse() if 1519 * the upperdir/workdir is in fact in-use by our 1520 * upperdir/workdir. 1521 */ 1522 err = ovl_setup_trap(sb, l->path.dentry, &trap, "lowerdir"); 1523 if (err) 1524 return err; 1525 1526 if (ovl_is_inuse(l->path.dentry)) { 1527 err = ovl_report_in_use(ofs, "lowerdir"); 1528 if (err) { 1529 iput(trap); 1530 return err; 1531 } 1532 } 1533 1534 mnt = clone_private_mount(&l->path); 1535 err = PTR_ERR(mnt); 1536 if (IS_ERR(mnt)) { 1537 pr_err("failed to clone lowerpath\n"); 1538 iput(trap); 1539 return err; 1540 } 1541 1542 /* 1543 * Make lower layers R/O. That way fchmod/fchown on lower file 1544 * will fail instead of modifying lower fs. 1545 */ 1546 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1547 1548 layers[ofs->numlayer].trap = trap; 1549 layers[ofs->numlayer].mnt = mnt; 1550 layers[ofs->numlayer].idx = ofs->numlayer; 1551 layers[ofs->numlayer].fsid = fsid; 1552 layers[ofs->numlayer].fs = &ofs->fs[fsid]; 1553 layers[ofs->numlayer].name = l->name; 1554 l->name = NULL; 1555 ofs->numlayer++; 1556 ofs->fs[fsid].is_lower = true; 1557 } 1558 1559 /* 1560 * When all layers on same fs, overlay can use real inode numbers. 1561 * With mount option "xino=<on|auto>", mounter declares that there are 1562 * enough free high bits in underlying fs to hold the unique fsid. 1563 * If overlayfs does encounter underlying inodes using the high xino 1564 * bits reserved for fsid, it emits a warning and uses the original 1565 * inode number or a non persistent inode number allocated from a 1566 * dedicated range. 1567 */ 1568 if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) { 1569 if (ofs->config.xino == OVL_XINO_ON) 1570 pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); 1571 ofs->xino_mode = 0; 1572 } else if (ofs->config.xino == OVL_XINO_OFF) { 1573 ofs->xino_mode = -1; 1574 } else if (ofs->xino_mode < 0) { 1575 /* 1576 * This is a roundup of number of bits needed for encoding 1577 * fsid, where fsid 0 is reserved for upper fs (even with 1578 * lower only overlay) +1 extra bit is reserved for the non 1579 * persistent inode number range that is used for resolving 1580 * xino lower bits overflow. 1581 */ 1582 BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30); 1583 ofs->xino_mode = ilog2(ofs->numfs - 1) + 2; 1584 } 1585 1586 if (ofs->xino_mode > 0) { 1587 pr_info("\"xino\" feature enabled using %d upper inode bits.\n", 1588 ofs->xino_mode); 1589 } 1590 1591 return 0; 1592 } 1593 1594 static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, 1595 struct ovl_fs_context *ctx, 1596 struct ovl_fs *ofs, 1597 struct ovl_layer *layers) 1598 { 1599 int err; 1600 unsigned int i; 1601 size_t nr_merged_lower; 1602 struct ovl_entry *oe; 1603 struct ovl_path *lowerstack; 1604 1605 struct ovl_fs_context_layer *l; 1606 1607 if (!ofs->config.upperdir && ctx->nr == 1) { 1608 pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n"); 1609 return ERR_PTR(-EINVAL); 1610 } 1611 1612 err = -EINVAL; 1613 for (i = 0; i < ctx->nr; i++) { 1614 l = &ctx->lower[i]; 1615 1616 err = ovl_lower_dir(l->name, &l->path, ofs, &sb->s_stack_depth); 1617 if (err) 1618 return ERR_PTR(err); 1619 } 1620 1621 err = -EINVAL; 1622 sb->s_stack_depth++; 1623 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1624 pr_err("maximum fs stacking depth exceeded\n"); 1625 return ERR_PTR(err); 1626 } 1627 1628 err = ovl_get_layers(sb, ofs, ctx, layers); 1629 if (err) 1630 return ERR_PTR(err); 1631 1632 err = -ENOMEM; 1633 /* Data-only layers are not merged in root directory */ 1634 nr_merged_lower = ctx->nr - ctx->nr_data; 1635 oe = ovl_alloc_entry(nr_merged_lower); 1636 if (!oe) 1637 return ERR_PTR(err); 1638 1639 lowerstack = ovl_lowerstack(oe); 1640 for (i = 0; i < nr_merged_lower; i++) { 1641 l = &ctx->lower[i]; 1642 lowerstack[i].dentry = dget(l->path.dentry); 1643 lowerstack[i].layer = &ofs->layers[i + 1]; 1644 } 1645 ofs->numdatalayer = ctx->nr_data; 1646 1647 return oe; 1648 } 1649 1650 /* 1651 * Check if this layer root is a descendant of: 1652 * - another layer of this overlayfs instance 1653 * - upper/work dir of any overlayfs instance 1654 */ 1655 static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, 1656 struct dentry *dentry, const char *name, 1657 bool is_lower) 1658 { 1659 struct dentry *next = dentry, *parent; 1660 int err = 0; 1661 1662 if (!dentry) 1663 return 0; 1664 1665 parent = dget_parent(next); 1666 1667 /* Walk back ancestors to root (inclusive) looking for traps */ 1668 while (!err && parent != next) { 1669 if (is_lower && ovl_lookup_trap_inode(sb, parent)) { 1670 err = -ELOOP; 1671 pr_err("overlapping %s path\n", name); 1672 } else if (ovl_is_inuse(parent)) { 1673 err = ovl_report_in_use(ofs, name); 1674 } 1675 next = parent; 1676 parent = dget_parent(next); 1677 dput(next); 1678 } 1679 1680 dput(parent); 1681 1682 return err; 1683 } 1684 1685 /* 1686 * Check if any of the layers or work dirs overlap. 1687 */ 1688 static int ovl_check_overlapping_layers(struct super_block *sb, 1689 struct ovl_fs *ofs) 1690 { 1691 int i, err; 1692 1693 if (ovl_upper_mnt(ofs)) { 1694 err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, 1695 "upperdir", false); 1696 if (err) 1697 return err; 1698 1699 /* 1700 * Checking workbasedir avoids hitting ovl_is_inuse(parent) of 1701 * this instance and covers overlapping work and index dirs, 1702 * unless work or index dir have been moved since created inside 1703 * workbasedir. In that case, we already have their traps in 1704 * inode cache and we will catch that case on lookup. 1705 */ 1706 err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", 1707 false); 1708 if (err) 1709 return err; 1710 } 1711 1712 for (i = 1; i < ofs->numlayer; i++) { 1713 err = ovl_check_layer(sb, ofs, 1714 ofs->layers[i].mnt->mnt_root, 1715 "lowerdir", true); 1716 if (err) 1717 return err; 1718 } 1719 1720 return 0; 1721 } 1722 1723 static struct dentry *ovl_get_root(struct super_block *sb, 1724 struct dentry *upperdentry, 1725 struct ovl_entry *oe) 1726 { 1727 struct dentry *root; 1728 struct ovl_path *lowerpath = ovl_lowerstack(oe); 1729 unsigned long ino = d_inode(lowerpath->dentry)->i_ino; 1730 int fsid = lowerpath->layer->fsid; 1731 struct ovl_inode_params oip = { 1732 .upperdentry = upperdentry, 1733 .oe = oe, 1734 }; 1735 1736 root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0)); 1737 if (!root) 1738 return NULL; 1739 1740 if (upperdentry) { 1741 /* Root inode uses upper st_ino/i_ino */ 1742 ino = d_inode(upperdentry)->i_ino; 1743 fsid = 0; 1744 ovl_dentry_set_upper_alias(root); 1745 if (ovl_is_impuredir(sb, upperdentry)) 1746 ovl_set_flag(OVL_IMPURE, d_inode(root)); 1747 } 1748 1749 /* Root is always merge -> can have whiteouts */ 1750 ovl_set_flag(OVL_WHITEOUTS, d_inode(root)); 1751 ovl_dentry_set_flag(OVL_E_CONNECTED, root); 1752 ovl_set_upperdata(d_inode(root)); 1753 ovl_inode_init(d_inode(root), &oip, ino, fsid); 1754 ovl_dentry_init_flags(root, upperdentry, oe, DCACHE_OP_WEAK_REVALIDATE); 1755 /* root keeps a reference of upperdentry */ 1756 dget(upperdentry); 1757 1758 return root; 1759 } 1760 1761 static int ovl_fill_super(struct super_block *sb, struct fs_context *fc) 1762 { 1763 struct ovl_fs *ofs = sb->s_fs_info; 1764 struct ovl_fs_context *ctx = fc->fs_private; 1765 struct dentry *root_dentry; 1766 struct ovl_entry *oe; 1767 struct ovl_layer *layers; 1768 struct cred *cred; 1769 int err; 1770 1771 err = -EIO; 1772 if (WARN_ON(fc->user_ns != current_user_ns())) 1773 goto out_err; 1774 1775 sb->s_d_op = &ovl_dentry_operations; 1776 1777 err = -ENOMEM; 1778 ofs->creator_cred = cred = prepare_creds(); 1779 if (!cred) 1780 goto out_err; 1781 1782 err = ovl_fs_params_verify(ctx, &ofs->config); 1783 if (err) 1784 goto out_err; 1785 1786 err = -EINVAL; 1787 if (ctx->nr == 0) { 1788 if (!(fc->sb_flags & SB_SILENT)) 1789 pr_err("missing 'lowerdir'\n"); 1790 goto out_err; 1791 } 1792 1793 err = -ENOMEM; 1794 layers = kcalloc(ctx->nr + 1, sizeof(struct ovl_layer), GFP_KERNEL); 1795 if (!layers) 1796 goto out_err; 1797 1798 ofs->layers = layers; 1799 /* Layer 0 is reserved for upper even if there's no upper */ 1800 ofs->numlayer = 1; 1801 1802 sb->s_stack_depth = 0; 1803 sb->s_maxbytes = MAX_LFS_FILESIZE; 1804 atomic_long_set(&ofs->last_ino, 1); 1805 /* Assume underlying fs uses 32bit inodes unless proven otherwise */ 1806 if (ofs->config.xino != OVL_XINO_OFF) { 1807 ofs->xino_mode = BITS_PER_LONG - 32; 1808 if (!ofs->xino_mode) { 1809 pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); 1810 ofs->config.xino = OVL_XINO_OFF; 1811 } 1812 } 1813 1814 /* alloc/destroy_inode needed for setting up traps in inode cache */ 1815 sb->s_op = &ovl_super_operations; 1816 1817 if (ofs->config.upperdir) { 1818 struct super_block *upper_sb; 1819 1820 err = -EINVAL; 1821 if (!ofs->config.workdir) { 1822 pr_err("missing 'workdir'\n"); 1823 goto out_err; 1824 } 1825 1826 err = ovl_get_upper(sb, ofs, &layers[0], &ctx->upper); 1827 if (err) 1828 goto out_err; 1829 1830 upper_sb = ovl_upper_mnt(ofs)->mnt_sb; 1831 if (!ovl_should_sync(ofs)) { 1832 ofs->errseq = errseq_sample(&upper_sb->s_wb_err); 1833 if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) { 1834 err = -EIO; 1835 pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n"); 1836 goto out_err; 1837 } 1838 } 1839 1840 err = ovl_get_workdir(sb, ofs, &ctx->upper, &ctx->work); 1841 if (err) 1842 goto out_err; 1843 1844 if (!ofs->workdir) 1845 sb->s_flags |= SB_RDONLY; 1846 1847 sb->s_stack_depth = upper_sb->s_stack_depth; 1848 sb->s_time_gran = upper_sb->s_time_gran; 1849 } 1850 oe = ovl_get_lowerstack(sb, ctx, ofs, layers); 1851 err = PTR_ERR(oe); 1852 if (IS_ERR(oe)) 1853 goto out_err; 1854 1855 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1856 if (!ovl_upper_mnt(ofs)) 1857 sb->s_flags |= SB_RDONLY; 1858 1859 if (!ofs->config.uuid && ofs->numfs > 1) { 1860 pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n"); 1861 ofs->config.uuid = true; 1862 } 1863 1864 if (!ovl_force_readonly(ofs) && ofs->config.index) { 1865 err = ovl_get_indexdir(sb, ofs, oe, &ctx->upper); 1866 if (err) 1867 goto out_free_oe; 1868 1869 /* Force r/o mount with no index dir */ 1870 if (!ofs->indexdir) 1871 sb->s_flags |= SB_RDONLY; 1872 } 1873 1874 err = ovl_check_overlapping_layers(sb, ofs); 1875 if (err) 1876 goto out_free_oe; 1877 1878 /* Show index=off in /proc/mounts for forced r/o mount */ 1879 if (!ofs->indexdir) { 1880 ofs->config.index = false; 1881 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) { 1882 pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n"); 1883 ofs->config.nfs_export = false; 1884 } 1885 } 1886 1887 if (ofs->config.metacopy && ofs->config.nfs_export) { 1888 pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n"); 1889 ofs->config.nfs_export = false; 1890 } 1891 1892 if (ofs->config.nfs_export) 1893 sb->s_export_op = &ovl_export_operations; 1894 1895 /* Never override disk quota limits or use reserved space */ 1896 cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); 1897 1898 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1899 sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : 1900 ovl_trusted_xattr_handlers; 1901 sb->s_fs_info = ofs; 1902 sb->s_flags |= SB_POSIXACL; 1903 sb->s_iflags |= SB_I_SKIP_SYNC; 1904 1905 err = -ENOMEM; 1906 root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); 1907 if (!root_dentry) 1908 goto out_free_oe; 1909 1910 sb->s_root = root_dentry; 1911 1912 return 0; 1913 1914 out_free_oe: 1915 ovl_free_entry(oe); 1916 out_err: 1917 ovl_free_fs(ofs); 1918 sb->s_fs_info = NULL; 1919 return err; 1920 } 1921 1922 static int ovl_get_tree(struct fs_context *fc) 1923 { 1924 return get_tree_nodev(fc, ovl_fill_super); 1925 } 1926 1927 static inline void ovl_fs_context_free(struct ovl_fs_context *ctx) 1928 { 1929 ovl_parse_param_drop_lowerdir(ctx); 1930 path_put(&ctx->upper); 1931 path_put(&ctx->work); 1932 kfree(ctx->lower); 1933 kfree(ctx); 1934 } 1935 1936 static void ovl_free(struct fs_context *fc) 1937 { 1938 struct ovl_fs *ofs = fc->s_fs_info; 1939 struct ovl_fs_context *ctx = fc->fs_private; 1940 1941 /* 1942 * ofs is stored in the fs_context when it is initialized. 1943 * ofs is transferred to the superblock on a successful mount, 1944 * but if an error occurs before the transfer we have to free 1945 * it here. 1946 */ 1947 if (ofs) 1948 ovl_free_fs(ofs); 1949 1950 if (ctx) 1951 ovl_fs_context_free(ctx); 1952 } 1953 1954 static const struct fs_context_operations ovl_context_ops = { 1955 .parse_param = ovl_parse_param, 1956 .get_tree = ovl_get_tree, 1957 .reconfigure = ovl_reconfigure, 1958 .free = ovl_free, 1959 }; 1960 1961 /* 1962 * This is called during fsopen() and will record the user namespace of 1963 * the caller in fc->user_ns since we've raised FS_USERNS_MOUNT. We'll 1964 * need it when we actually create the superblock to verify that the 1965 * process creating the superblock is in the same user namespace as 1966 * process that called fsopen(). 1967 */ 1968 static int ovl_init_fs_context(struct fs_context *fc) 1969 { 1970 struct ovl_fs_context *ctx; 1971 struct ovl_fs *ofs; 1972 1973 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); 1974 if (!ctx) 1975 return -ENOMEM; 1976 1977 /* 1978 * By default we allocate for three lower layers. It's likely 1979 * that it'll cover most users. 1980 */ 1981 ctx->lower = kmalloc_array(3, sizeof(*ctx->lower), GFP_KERNEL_ACCOUNT); 1982 if (!ctx->lower) 1983 goto out_err; 1984 ctx->capacity = 3; 1985 1986 ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1987 if (!ofs) 1988 goto out_err; 1989 1990 ofs->config.redirect_mode = ovl_redirect_mode_def(); 1991 ofs->config.index = ovl_index_def; 1992 ofs->config.uuid = true; 1993 ofs->config.nfs_export = ovl_nfs_export_def; 1994 ofs->config.xino = ovl_xino_def(); 1995 ofs->config.metacopy = ovl_metacopy_def; 1996 1997 fc->s_fs_info = ofs; 1998 fc->fs_private = ctx; 1999 fc->ops = &ovl_context_ops; 2000 return 0; 2001 2002 out_err: 2003 ovl_fs_context_free(ctx); 2004 return -ENOMEM; 2005 2006 } 2007 2008 static struct file_system_type ovl_fs_type = { 2009 .owner = THIS_MODULE, 2010 .name = "overlay", 2011 .init_fs_context = ovl_init_fs_context, 2012 .parameters = ovl_parameter_spec, 2013 .fs_flags = FS_USERNS_MOUNT, 2014 .kill_sb = kill_anon_super, 2015 }; 2016 MODULE_ALIAS_FS("overlay"); 2017 2018 static void ovl_inode_init_once(void *foo) 2019 { 2020 struct ovl_inode *oi = foo; 2021 2022 inode_init_once(&oi->vfs_inode); 2023 } 2024 2025 static int __init ovl_init(void) 2026 { 2027 int err; 2028 2029 ovl_inode_cachep = kmem_cache_create("ovl_inode", 2030 sizeof(struct ovl_inode), 0, 2031 (SLAB_RECLAIM_ACCOUNT| 2032 SLAB_MEM_SPREAD|SLAB_ACCOUNT), 2033 ovl_inode_init_once); 2034 if (ovl_inode_cachep == NULL) 2035 return -ENOMEM; 2036 2037 err = ovl_aio_request_cache_init(); 2038 if (!err) { 2039 err = register_filesystem(&ovl_fs_type); 2040 if (!err) 2041 return 0; 2042 2043 ovl_aio_request_cache_destroy(); 2044 } 2045 kmem_cache_destroy(ovl_inode_cachep); 2046 2047 return err; 2048 } 2049 2050 static void __exit ovl_exit(void) 2051 { 2052 unregister_filesystem(&ovl_fs_type); 2053 2054 /* 2055 * Make sure all delayed rcu free inodes are flushed before we 2056 * destroy cache. 2057 */ 2058 rcu_barrier(); 2059 kmem_cache_destroy(ovl_inode_cachep); 2060 ovl_aio_request_cache_destroy(); 2061 } 2062 2063 module_init(ovl_init); 2064 module_exit(ovl_exit); 2065