1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/namei.h> 12 #include <linux/pagemap.h> 13 #include <linux/xattr.h> 14 #include <linux/security.h> 15 #include <linux/mount.h> 16 #include <linux/slab.h> 17 #include <linux/parser.h> 18 #include <linux/module.h> 19 #include <linux/sched.h> 20 #include <linux/statfs.h> 21 #include <linux/seq_file.h> 22 #include <linux/posix_acl_xattr.h> 23 #include "overlayfs.h" 24 25 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 26 MODULE_DESCRIPTION("Overlay filesystem"); 27 MODULE_LICENSE("GPL"); 28 29 struct ovl_config { 30 char *lowerdir; 31 char *upperdir; 32 char *workdir; 33 bool default_permissions; 34 }; 35 36 /* private information held for overlayfs's superblock */ 37 struct ovl_fs { 38 struct vfsmount *upper_mnt; 39 unsigned numlower; 40 struct vfsmount **lower_mnt; 41 struct dentry *workdir; 42 long lower_namelen; 43 /* pathnames of lower and upper dirs, for show_options */ 44 struct ovl_config config; 45 /* creds of process who forced instantiation of super block */ 46 const struct cred *creator_cred; 47 }; 48 49 struct ovl_dir_cache; 50 51 /* private information held for every overlayfs dentry */ 52 struct ovl_entry { 53 struct dentry *__upperdentry; 54 struct ovl_dir_cache *cache; 55 union { 56 struct { 57 u64 version; 58 bool opaque; 59 }; 60 struct rcu_head rcu; 61 }; 62 unsigned numlower; 63 struct path lowerstack[]; 64 }; 65 66 #define OVL_MAX_STACK 500 67 68 static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) 69 { 70 return oe->numlower ? oe->lowerstack[0].dentry : NULL; 71 } 72 73 enum ovl_path_type ovl_path_type(struct dentry *dentry) 74 { 75 struct ovl_entry *oe = dentry->d_fsdata; 76 enum ovl_path_type type = 0; 77 78 if (oe->__upperdentry) { 79 type = __OVL_PATH_UPPER; 80 81 /* 82 * Non-dir dentry can hold lower dentry from previous 83 * location. Its purity depends only on opaque flag. 84 */ 85 if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode)) 86 type |= __OVL_PATH_MERGE; 87 else if (!oe->opaque) 88 type |= __OVL_PATH_PURE; 89 } else { 90 if (oe->numlower > 1) 91 type |= __OVL_PATH_MERGE; 92 } 93 return type; 94 } 95 96 static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) 97 { 98 return lockless_dereference(oe->__upperdentry); 99 } 100 101 void ovl_path_upper(struct dentry *dentry, struct path *path) 102 { 103 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 104 struct ovl_entry *oe = dentry->d_fsdata; 105 106 path->mnt = ofs->upper_mnt; 107 path->dentry = ovl_upperdentry_dereference(oe); 108 } 109 110 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) 111 { 112 enum ovl_path_type type = ovl_path_type(dentry); 113 114 if (!OVL_TYPE_UPPER(type)) 115 ovl_path_lower(dentry, path); 116 else 117 ovl_path_upper(dentry, path); 118 119 return type; 120 } 121 122 struct dentry *ovl_dentry_upper(struct dentry *dentry) 123 { 124 struct ovl_entry *oe = dentry->d_fsdata; 125 126 return ovl_upperdentry_dereference(oe); 127 } 128 129 struct dentry *ovl_dentry_lower(struct dentry *dentry) 130 { 131 struct ovl_entry *oe = dentry->d_fsdata; 132 133 return __ovl_dentry_lower(oe); 134 } 135 136 struct dentry *ovl_dentry_real(struct dentry *dentry) 137 { 138 struct ovl_entry *oe = dentry->d_fsdata; 139 struct dentry *realdentry; 140 141 realdentry = ovl_upperdentry_dereference(oe); 142 if (!realdentry) 143 realdentry = __ovl_dentry_lower(oe); 144 145 return realdentry; 146 } 147 148 static void ovl_inode_init(struct inode *inode, struct inode *realinode, 149 bool is_upper) 150 { 151 WRITE_ONCE(inode->i_private, (unsigned long) realinode | 152 (is_upper ? OVL_ISUPPER_MASK : 0)); 153 } 154 155 struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode, 156 bool is_upper) 157 { 158 if (is_upper) { 159 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 160 161 return ofs->upper_mnt; 162 } else { 163 return oe->numlower ? oe->lowerstack[0].mnt : NULL; 164 } 165 } 166 167 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) 168 { 169 struct ovl_entry *oe = dentry->d_fsdata; 170 171 return oe->cache; 172 } 173 174 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) 175 { 176 struct ovl_entry *oe = dentry->d_fsdata; 177 178 oe->cache = cache; 179 } 180 181 void ovl_path_lower(struct dentry *dentry, struct path *path) 182 { 183 struct ovl_entry *oe = dentry->d_fsdata; 184 185 *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; 186 } 187 188 int ovl_want_write(struct dentry *dentry) 189 { 190 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 191 return mnt_want_write(ofs->upper_mnt); 192 } 193 194 void ovl_drop_write(struct dentry *dentry) 195 { 196 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 197 mnt_drop_write(ofs->upper_mnt); 198 } 199 200 struct dentry *ovl_workdir(struct dentry *dentry) 201 { 202 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 203 return ofs->workdir; 204 } 205 206 bool ovl_dentry_is_opaque(struct dentry *dentry) 207 { 208 struct ovl_entry *oe = dentry->d_fsdata; 209 return oe->opaque; 210 } 211 212 void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) 213 { 214 struct ovl_entry *oe = dentry->d_fsdata; 215 oe->opaque = opaque; 216 } 217 218 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) 219 { 220 struct ovl_entry *oe = dentry->d_fsdata; 221 222 WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); 223 WARN_ON(oe->__upperdentry); 224 /* 225 * Make sure upperdentry is consistent before making it visible to 226 * ovl_upperdentry_dereference(). 227 */ 228 smp_wmb(); 229 oe->__upperdentry = upperdentry; 230 } 231 232 void ovl_inode_update(struct inode *inode, struct inode *upperinode) 233 { 234 WARN_ON(!upperinode); 235 WARN_ON(!inode_unhashed(inode)); 236 WRITE_ONCE(inode->i_private, 237 (unsigned long) upperinode | OVL_ISUPPER_MASK); 238 if (!S_ISDIR(upperinode->i_mode)) 239 __insert_inode_hash(inode, (unsigned long) upperinode); 240 } 241 242 void ovl_dentry_version_inc(struct dentry *dentry) 243 { 244 struct ovl_entry *oe = dentry->d_fsdata; 245 246 WARN_ON(!inode_is_locked(dentry->d_inode)); 247 oe->version++; 248 } 249 250 u64 ovl_dentry_version_get(struct dentry *dentry) 251 { 252 struct ovl_entry *oe = dentry->d_fsdata; 253 254 WARN_ON(!inode_is_locked(dentry->d_inode)); 255 return oe->version; 256 } 257 258 bool ovl_is_whiteout(struct dentry *dentry) 259 { 260 struct inode *inode = dentry->d_inode; 261 262 return inode && IS_WHITEOUT(inode); 263 } 264 265 const struct cred *ovl_override_creds(struct super_block *sb) 266 { 267 struct ovl_fs *ofs = sb->s_fs_info; 268 269 return override_creds(ofs->creator_cred); 270 } 271 272 static bool ovl_is_opaquedir(struct dentry *dentry) 273 { 274 int res; 275 char val; 276 struct inode *inode = dentry->d_inode; 277 278 if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) 279 return false; 280 281 res = inode->i_op->getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1); 282 if (res == 1 && val == 'y') 283 return true; 284 285 return false; 286 } 287 288 static void ovl_dentry_release(struct dentry *dentry) 289 { 290 struct ovl_entry *oe = dentry->d_fsdata; 291 292 if (oe) { 293 unsigned int i; 294 295 dput(oe->__upperdentry); 296 for (i = 0; i < oe->numlower; i++) 297 dput(oe->lowerstack[i].dentry); 298 kfree_rcu(oe, rcu); 299 } 300 } 301 302 static struct dentry *ovl_d_real(struct dentry *dentry, 303 const struct inode *inode, 304 unsigned int open_flags) 305 { 306 struct dentry *real; 307 308 if (d_is_dir(dentry)) { 309 if (!inode || inode == d_inode(dentry)) 310 return dentry; 311 goto bug; 312 } 313 314 if (d_is_negative(dentry)) 315 return dentry; 316 317 if (open_flags) { 318 int err = ovl_open_maybe_copy_up(dentry, open_flags); 319 320 if (err) 321 return ERR_PTR(err); 322 } 323 324 real = ovl_dentry_upper(dentry); 325 if (real && (!inode || inode == d_inode(real))) 326 return real; 327 328 real = ovl_dentry_lower(dentry); 329 if (!real) 330 goto bug; 331 332 if (!inode || inode == d_inode(real)) 333 return real; 334 335 /* Handle recursion */ 336 return d_real(real, inode, open_flags); 337 bug: 338 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 339 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 340 return dentry; 341 } 342 343 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 344 { 345 struct ovl_entry *oe = dentry->d_fsdata; 346 unsigned int i; 347 int ret = 1; 348 349 for (i = 0; i < oe->numlower; i++) { 350 struct dentry *d = oe->lowerstack[i].dentry; 351 352 if (d->d_flags & DCACHE_OP_REVALIDATE) { 353 ret = d->d_op->d_revalidate(d, flags); 354 if (ret < 0) 355 return ret; 356 if (!ret) { 357 if (!(flags & LOOKUP_RCU)) 358 d_invalidate(d); 359 return -ESTALE; 360 } 361 } 362 } 363 return 1; 364 } 365 366 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 367 { 368 struct ovl_entry *oe = dentry->d_fsdata; 369 unsigned int i; 370 int ret = 1; 371 372 for (i = 0; i < oe->numlower; i++) { 373 struct dentry *d = oe->lowerstack[i].dentry; 374 375 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 376 ret = d->d_op->d_weak_revalidate(d, flags); 377 if (ret <= 0) 378 break; 379 } 380 } 381 return ret; 382 } 383 384 static const struct dentry_operations ovl_dentry_operations = { 385 .d_release = ovl_dentry_release, 386 .d_real = ovl_d_real, 387 }; 388 389 static const struct dentry_operations ovl_reval_dentry_operations = { 390 .d_release = ovl_dentry_release, 391 .d_real = ovl_d_real, 392 .d_revalidate = ovl_dentry_revalidate, 393 .d_weak_revalidate = ovl_dentry_weak_revalidate, 394 }; 395 396 static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) 397 { 398 size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); 399 struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); 400 401 if (oe) 402 oe->numlower = numlower; 403 404 return oe; 405 } 406 407 static bool ovl_dentry_remote(struct dentry *dentry) 408 { 409 return dentry->d_flags & 410 (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | 411 DCACHE_OP_REAL); 412 } 413 414 static bool ovl_dentry_weird(struct dentry *dentry) 415 { 416 return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | 417 DCACHE_MANAGE_TRANSIT | 418 DCACHE_OP_HASH | 419 DCACHE_OP_COMPARE); 420 } 421 422 static inline struct dentry *ovl_lookup_real(struct super_block *ovl_sb, 423 struct dentry *dir, 424 const struct qstr *name) 425 { 426 const struct cred *old_cred; 427 struct dentry *dentry; 428 429 old_cred = ovl_override_creds(ovl_sb); 430 dentry = lookup_one_len_unlocked(name->name, dir, name->len); 431 revert_creds(old_cred); 432 433 if (IS_ERR(dentry)) { 434 if (PTR_ERR(dentry) == -ENOENT) 435 dentry = NULL; 436 } else if (!dentry->d_inode) { 437 dput(dentry); 438 dentry = NULL; 439 } else if (ovl_dentry_weird(dentry)) { 440 dput(dentry); 441 /* Don't support traversing automounts and other weirdness */ 442 dentry = ERR_PTR(-EREMOTE); 443 } 444 return dentry; 445 } 446 447 /* 448 * Returns next layer in stack starting from top. 449 * Returns -1 if this is the last layer. 450 */ 451 int ovl_path_next(int idx, struct dentry *dentry, struct path *path) 452 { 453 struct ovl_entry *oe = dentry->d_fsdata; 454 455 BUG_ON(idx < 0); 456 if (idx == 0) { 457 ovl_path_upper(dentry, path); 458 if (path->dentry) 459 return oe->numlower ? 1 : -1; 460 idx++; 461 } 462 BUG_ON(idx > oe->numlower); 463 *path = oe->lowerstack[idx - 1]; 464 465 return (idx < oe->numlower) ? idx + 1 : -1; 466 } 467 468 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 469 unsigned int flags) 470 { 471 struct ovl_entry *oe; 472 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 473 struct path *stack = NULL; 474 struct dentry *upperdir, *upperdentry = NULL; 475 unsigned int ctr = 0; 476 struct inode *inode = NULL; 477 bool upperopaque = false; 478 struct dentry *this, *prev = NULL; 479 unsigned int i; 480 int err; 481 482 upperdir = ovl_upperdentry_dereference(poe); 483 if (upperdir) { 484 this = ovl_lookup_real(dentry->d_sb, upperdir, &dentry->d_name); 485 err = PTR_ERR(this); 486 if (IS_ERR(this)) 487 goto out; 488 489 if (this) { 490 if (unlikely(ovl_dentry_remote(this))) { 491 dput(this); 492 err = -EREMOTE; 493 goto out; 494 } 495 if (ovl_is_whiteout(this)) { 496 dput(this); 497 this = NULL; 498 upperopaque = true; 499 } else if (poe->numlower && ovl_is_opaquedir(this)) { 500 upperopaque = true; 501 } 502 } 503 upperdentry = prev = this; 504 } 505 506 if (!upperopaque && poe->numlower) { 507 err = -ENOMEM; 508 stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); 509 if (!stack) 510 goto out_put_upper; 511 } 512 513 for (i = 0; !upperopaque && i < poe->numlower; i++) { 514 bool opaque = false; 515 struct path lowerpath = poe->lowerstack[i]; 516 517 this = ovl_lookup_real(dentry->d_sb, 518 lowerpath.dentry, &dentry->d_name); 519 err = PTR_ERR(this); 520 if (IS_ERR(this)) { 521 /* 522 * If it's positive, then treat ENAMETOOLONG as ENOENT. 523 */ 524 if (err == -ENAMETOOLONG && (upperdentry || ctr)) 525 continue; 526 goto out_put; 527 } 528 if (!this) 529 continue; 530 if (ovl_is_whiteout(this)) { 531 dput(this); 532 break; 533 } 534 /* 535 * Only makes sense to check opaque dir if this is not the 536 * lowermost layer. 537 */ 538 if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) 539 opaque = true; 540 541 if (prev && (!S_ISDIR(prev->d_inode->i_mode) || 542 !S_ISDIR(this->d_inode->i_mode))) { 543 /* 544 * FIXME: check for upper-opaqueness maybe better done 545 * in remove code. 546 */ 547 if (prev == upperdentry) 548 upperopaque = true; 549 dput(this); 550 break; 551 } 552 /* 553 * If this is a non-directory then stop here. 554 */ 555 if (!S_ISDIR(this->d_inode->i_mode)) 556 opaque = true; 557 558 stack[ctr].dentry = this; 559 stack[ctr].mnt = lowerpath.mnt; 560 ctr++; 561 prev = this; 562 if (opaque) 563 break; 564 } 565 566 oe = ovl_alloc_entry(ctr); 567 err = -ENOMEM; 568 if (!oe) 569 goto out_put; 570 571 if (upperdentry || ctr) { 572 struct dentry *realdentry; 573 struct inode *realinode; 574 575 realdentry = upperdentry ? upperdentry : stack[0].dentry; 576 realinode = d_inode(realdentry); 577 578 err = -ENOMEM; 579 if (upperdentry && !d_is_dir(upperdentry)) { 580 inode = ovl_get_inode(dentry->d_sb, realinode); 581 } else { 582 inode = ovl_new_inode(dentry->d_sb, realinode->i_mode); 583 if (inode) 584 ovl_inode_init(inode, realinode, !!upperdentry); 585 } 586 if (!inode) 587 goto out_free_oe; 588 ovl_copyattr(realdentry->d_inode, inode); 589 } 590 591 oe->opaque = upperopaque; 592 oe->__upperdentry = upperdentry; 593 memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); 594 kfree(stack); 595 dentry->d_fsdata = oe; 596 d_add(dentry, inode); 597 598 return NULL; 599 600 out_free_oe: 601 kfree(oe); 602 out_put: 603 for (i = 0; i < ctr; i++) 604 dput(stack[i].dentry); 605 kfree(stack); 606 out_put_upper: 607 dput(upperdentry); 608 out: 609 return ERR_PTR(err); 610 } 611 612 struct file *ovl_path_open(struct path *path, int flags) 613 { 614 return dentry_open(path, flags | O_NOATIME, current_cred()); 615 } 616 617 static void ovl_put_super(struct super_block *sb) 618 { 619 struct ovl_fs *ufs = sb->s_fs_info; 620 unsigned i; 621 622 dput(ufs->workdir); 623 mntput(ufs->upper_mnt); 624 for (i = 0; i < ufs->numlower; i++) 625 mntput(ufs->lower_mnt[i]); 626 kfree(ufs->lower_mnt); 627 628 kfree(ufs->config.lowerdir); 629 kfree(ufs->config.upperdir); 630 kfree(ufs->config.workdir); 631 put_cred(ufs->creator_cred); 632 kfree(ufs); 633 } 634 635 /** 636 * ovl_statfs 637 * @sb: The overlayfs super block 638 * @buf: The struct kstatfs to fill in with stats 639 * 640 * Get the filesystem statistics. As writes always target the upper layer 641 * filesystem pass the statfs to the upper filesystem (if it exists) 642 */ 643 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 644 { 645 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 646 struct dentry *root_dentry = dentry->d_sb->s_root; 647 struct path path; 648 int err; 649 650 ovl_path_real(root_dentry, &path); 651 652 err = vfs_statfs(&path, buf); 653 if (!err) { 654 buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); 655 buf->f_type = OVERLAYFS_SUPER_MAGIC; 656 } 657 658 return err; 659 } 660 661 /** 662 * ovl_show_options 663 * 664 * Prints the mount options for a given superblock. 665 * Returns zero; does not fail. 666 */ 667 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 668 { 669 struct super_block *sb = dentry->d_sb; 670 struct ovl_fs *ufs = sb->s_fs_info; 671 672 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 673 if (ufs->config.upperdir) { 674 seq_show_option(m, "upperdir", ufs->config.upperdir); 675 seq_show_option(m, "workdir", ufs->config.workdir); 676 } 677 if (ufs->config.default_permissions) 678 seq_puts(m, ",default_permissions"); 679 return 0; 680 } 681 682 static int ovl_remount(struct super_block *sb, int *flags, char *data) 683 { 684 struct ovl_fs *ufs = sb->s_fs_info; 685 686 if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) 687 return -EROFS; 688 689 return 0; 690 } 691 692 static const struct super_operations ovl_super_operations = { 693 .put_super = ovl_put_super, 694 .statfs = ovl_statfs, 695 .show_options = ovl_show_options, 696 .remount_fs = ovl_remount, 697 .drop_inode = generic_delete_inode, 698 }; 699 700 enum { 701 OPT_LOWERDIR, 702 OPT_UPPERDIR, 703 OPT_WORKDIR, 704 OPT_DEFAULT_PERMISSIONS, 705 OPT_ERR, 706 }; 707 708 static const match_table_t ovl_tokens = { 709 {OPT_LOWERDIR, "lowerdir=%s"}, 710 {OPT_UPPERDIR, "upperdir=%s"}, 711 {OPT_WORKDIR, "workdir=%s"}, 712 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 713 {OPT_ERR, NULL} 714 }; 715 716 static char *ovl_next_opt(char **s) 717 { 718 char *sbegin = *s; 719 char *p; 720 721 if (sbegin == NULL) 722 return NULL; 723 724 for (p = sbegin; *p; p++) { 725 if (*p == '\\') { 726 p++; 727 if (!*p) 728 break; 729 } else if (*p == ',') { 730 *p = '\0'; 731 *s = p + 1; 732 return sbegin; 733 } 734 } 735 *s = NULL; 736 return sbegin; 737 } 738 739 static int ovl_parse_opt(char *opt, struct ovl_config *config) 740 { 741 char *p; 742 743 while ((p = ovl_next_opt(&opt)) != NULL) { 744 int token; 745 substring_t args[MAX_OPT_ARGS]; 746 747 if (!*p) 748 continue; 749 750 token = match_token(p, ovl_tokens, args); 751 switch (token) { 752 case OPT_UPPERDIR: 753 kfree(config->upperdir); 754 config->upperdir = match_strdup(&args[0]); 755 if (!config->upperdir) 756 return -ENOMEM; 757 break; 758 759 case OPT_LOWERDIR: 760 kfree(config->lowerdir); 761 config->lowerdir = match_strdup(&args[0]); 762 if (!config->lowerdir) 763 return -ENOMEM; 764 break; 765 766 case OPT_WORKDIR: 767 kfree(config->workdir); 768 config->workdir = match_strdup(&args[0]); 769 if (!config->workdir) 770 return -ENOMEM; 771 break; 772 773 case OPT_DEFAULT_PERMISSIONS: 774 config->default_permissions = true; 775 break; 776 777 default: 778 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 779 return -EINVAL; 780 } 781 } 782 783 /* Workdir is useless in non-upper mount */ 784 if (!config->upperdir && config->workdir) { 785 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 786 config->workdir); 787 kfree(config->workdir); 788 config->workdir = NULL; 789 } 790 791 return 0; 792 } 793 794 #define OVL_WORKDIR_NAME "work" 795 796 static struct dentry *ovl_workdir_create(struct vfsmount *mnt, 797 struct dentry *dentry) 798 { 799 struct inode *dir = dentry->d_inode; 800 struct dentry *work; 801 int err; 802 bool retried = false; 803 804 err = mnt_want_write(mnt); 805 if (err) 806 return ERR_PTR(err); 807 808 inode_lock_nested(dir, I_MUTEX_PARENT); 809 retry: 810 work = lookup_one_len(OVL_WORKDIR_NAME, dentry, 811 strlen(OVL_WORKDIR_NAME)); 812 813 if (!IS_ERR(work)) { 814 struct kstat stat = { 815 .mode = S_IFDIR | 0, 816 }; 817 struct iattr attr = { 818 .ia_valid = ATTR_MODE, 819 .ia_mode = stat.mode, 820 }; 821 822 if (work->d_inode) { 823 err = -EEXIST; 824 if (retried) 825 goto out_dput; 826 827 retried = true; 828 ovl_workdir_cleanup(dir, mnt, work, 0); 829 dput(work); 830 goto retry; 831 } 832 833 err = ovl_create_real(dir, work, &stat, NULL, NULL, true); 834 if (err) 835 goto out_dput; 836 837 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 838 if (err && err != -ENODATA && err != -EOPNOTSUPP) 839 goto out_dput; 840 841 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 842 if (err && err != -ENODATA && err != -EOPNOTSUPP) 843 goto out_dput; 844 845 /* Clear any inherited mode bits */ 846 inode_lock(work->d_inode); 847 err = notify_change(work, &attr, NULL); 848 inode_unlock(work->d_inode); 849 if (err) 850 goto out_dput; 851 } 852 out_unlock: 853 inode_unlock(dir); 854 mnt_drop_write(mnt); 855 856 return work; 857 858 out_dput: 859 dput(work); 860 work = ERR_PTR(err); 861 goto out_unlock; 862 } 863 864 static void ovl_unescape(char *s) 865 { 866 char *d = s; 867 868 for (;; s++, d++) { 869 if (*s == '\\') 870 s++; 871 *d = *s; 872 if (!*s) 873 break; 874 } 875 } 876 877 static int ovl_mount_dir_noesc(const char *name, struct path *path) 878 { 879 int err = -EINVAL; 880 881 if (!*name) { 882 pr_err("overlayfs: empty lowerdir\n"); 883 goto out; 884 } 885 err = kern_path(name, LOOKUP_FOLLOW, path); 886 if (err) { 887 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 888 goto out; 889 } 890 err = -EINVAL; 891 if (ovl_dentry_weird(path->dentry)) { 892 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 893 goto out_put; 894 } 895 if (!S_ISDIR(path->dentry->d_inode->i_mode)) { 896 pr_err("overlayfs: '%s' not a directory\n", name); 897 goto out_put; 898 } 899 return 0; 900 901 out_put: 902 path_put(path); 903 out: 904 return err; 905 } 906 907 static int ovl_mount_dir(const char *name, struct path *path) 908 { 909 int err = -ENOMEM; 910 char *tmp = kstrdup(name, GFP_KERNEL); 911 912 if (tmp) { 913 ovl_unescape(tmp); 914 err = ovl_mount_dir_noesc(tmp, path); 915 916 if (!err) 917 if (ovl_dentry_remote(path->dentry)) { 918 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 919 tmp); 920 path_put(path); 921 err = -EINVAL; 922 } 923 kfree(tmp); 924 } 925 return err; 926 } 927 928 static int ovl_lower_dir(const char *name, struct path *path, long *namelen, 929 int *stack_depth, bool *remote) 930 { 931 int err; 932 struct kstatfs statfs; 933 934 err = ovl_mount_dir_noesc(name, path); 935 if (err) 936 goto out; 937 938 err = vfs_statfs(path, &statfs); 939 if (err) { 940 pr_err("overlayfs: statfs failed on '%s'\n", name); 941 goto out_put; 942 } 943 *namelen = max(*namelen, statfs.f_namelen); 944 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 945 946 if (ovl_dentry_remote(path->dentry)) 947 *remote = true; 948 949 return 0; 950 951 out_put: 952 path_put(path); 953 out: 954 return err; 955 } 956 957 /* Workdir should not be subdir of upperdir and vice versa */ 958 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 959 { 960 bool ok = false; 961 962 if (workdir != upperdir) { 963 ok = (lock_rename(workdir, upperdir) == NULL); 964 unlock_rename(workdir, upperdir); 965 } 966 return ok; 967 } 968 969 static unsigned int ovl_split_lowerdirs(char *str) 970 { 971 unsigned int ctr = 1; 972 char *s, *d; 973 974 for (s = d = str;; s++, d++) { 975 if (*s == '\\') { 976 s++; 977 } else if (*s == ':') { 978 *d = '\0'; 979 ctr++; 980 continue; 981 } 982 *d = *s; 983 if (!*s) 984 break; 985 } 986 return ctr; 987 } 988 989 static int __maybe_unused 990 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 991 struct dentry *dentry, struct inode *inode, 992 const char *name, void *buffer, size_t size) 993 { 994 return ovl_xattr_get(dentry, handler->name, buffer, size); 995 } 996 997 static int __maybe_unused 998 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 999 struct dentry *dentry, struct inode *inode, 1000 const char *name, const void *value, 1001 size_t size, int flags) 1002 { 1003 struct dentry *workdir = ovl_workdir(dentry); 1004 struct inode *realinode = ovl_inode_real(inode, NULL); 1005 struct posix_acl *acl = NULL; 1006 int err; 1007 1008 /* Check that everything is OK before copy-up */ 1009 if (value) { 1010 acl = posix_acl_from_xattr(&init_user_ns, value, size); 1011 if (IS_ERR(acl)) 1012 return PTR_ERR(acl); 1013 } 1014 err = -EOPNOTSUPP; 1015 if (!IS_POSIXACL(d_inode(workdir))) 1016 goto out_acl_release; 1017 if (!realinode->i_op->set_acl) 1018 goto out_acl_release; 1019 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 1020 err = acl ? -EACCES : 0; 1021 goto out_acl_release; 1022 } 1023 err = -EPERM; 1024 if (!inode_owner_or_capable(inode)) 1025 goto out_acl_release; 1026 1027 posix_acl_release(acl); 1028 1029 err = ovl_xattr_set(dentry, handler->name, value, size, flags); 1030 if (!err) 1031 ovl_copyattr(ovl_inode_real(inode, NULL), inode); 1032 1033 return err; 1034 1035 out_acl_release: 1036 posix_acl_release(acl); 1037 return err; 1038 } 1039 1040 static int ovl_own_xattr_get(const struct xattr_handler *handler, 1041 struct dentry *dentry, struct inode *inode, 1042 const char *name, void *buffer, size_t size) 1043 { 1044 return -EPERM; 1045 } 1046 1047 static int ovl_own_xattr_set(const struct xattr_handler *handler, 1048 struct dentry *dentry, struct inode *inode, 1049 const char *name, const void *value, 1050 size_t size, int flags) 1051 { 1052 return -EPERM; 1053 } 1054 1055 static int ovl_other_xattr_get(const struct xattr_handler *handler, 1056 struct dentry *dentry, struct inode *inode, 1057 const char *name, void *buffer, size_t size) 1058 { 1059 return ovl_xattr_get(dentry, name, buffer, size); 1060 } 1061 1062 static int ovl_other_xattr_set(const struct xattr_handler *handler, 1063 struct dentry *dentry, struct inode *inode, 1064 const char *name, const void *value, 1065 size_t size, int flags) 1066 { 1067 return ovl_xattr_set(dentry, name, value, size, flags); 1068 } 1069 1070 static const struct xattr_handler __maybe_unused 1071 ovl_posix_acl_access_xattr_handler = { 1072 .name = XATTR_NAME_POSIX_ACL_ACCESS, 1073 .flags = ACL_TYPE_ACCESS, 1074 .get = ovl_posix_acl_xattr_get, 1075 .set = ovl_posix_acl_xattr_set, 1076 }; 1077 1078 static const struct xattr_handler __maybe_unused 1079 ovl_posix_acl_default_xattr_handler = { 1080 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 1081 .flags = ACL_TYPE_DEFAULT, 1082 .get = ovl_posix_acl_xattr_get, 1083 .set = ovl_posix_acl_xattr_set, 1084 }; 1085 1086 static const struct xattr_handler ovl_own_xattr_handler = { 1087 .prefix = OVL_XATTR_PREFIX, 1088 .get = ovl_own_xattr_get, 1089 .set = ovl_own_xattr_set, 1090 }; 1091 1092 static const struct xattr_handler ovl_other_xattr_handler = { 1093 .prefix = "", /* catch all */ 1094 .get = ovl_other_xattr_get, 1095 .set = ovl_other_xattr_set, 1096 }; 1097 1098 static const struct xattr_handler *ovl_xattr_handlers[] = { 1099 #ifdef CONFIG_FS_POSIX_ACL 1100 &ovl_posix_acl_access_xattr_handler, 1101 &ovl_posix_acl_default_xattr_handler, 1102 #endif 1103 &ovl_own_xattr_handler, 1104 &ovl_other_xattr_handler, 1105 NULL 1106 }; 1107 1108 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1109 { 1110 struct path upperpath = { NULL, NULL }; 1111 struct path workpath = { NULL, NULL }; 1112 struct dentry *root_dentry; 1113 struct inode *realinode; 1114 struct ovl_entry *oe; 1115 struct ovl_fs *ufs; 1116 struct path *stack = NULL; 1117 char *lowertmp; 1118 char *lower; 1119 unsigned int numlower; 1120 unsigned int stacklen = 0; 1121 unsigned int i; 1122 bool remote = false; 1123 int err; 1124 1125 err = -ENOMEM; 1126 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1127 if (!ufs) 1128 goto out; 1129 1130 err = ovl_parse_opt((char *) data, &ufs->config); 1131 if (err) 1132 goto out_free_config; 1133 1134 err = -EINVAL; 1135 if (!ufs->config.lowerdir) { 1136 if (!silent) 1137 pr_err("overlayfs: missing 'lowerdir'\n"); 1138 goto out_free_config; 1139 } 1140 1141 sb->s_stack_depth = 0; 1142 sb->s_maxbytes = MAX_LFS_FILESIZE; 1143 if (ufs->config.upperdir) { 1144 if (!ufs->config.workdir) { 1145 pr_err("overlayfs: missing 'workdir'\n"); 1146 goto out_free_config; 1147 } 1148 1149 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 1150 if (err) 1151 goto out_free_config; 1152 1153 /* Upper fs should not be r/o */ 1154 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { 1155 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 1156 err = -EINVAL; 1157 goto out_put_upperpath; 1158 } 1159 1160 err = ovl_mount_dir(ufs->config.workdir, &workpath); 1161 if (err) 1162 goto out_put_upperpath; 1163 1164 err = -EINVAL; 1165 if (upperpath.mnt != workpath.mnt) { 1166 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 1167 goto out_put_workpath; 1168 } 1169 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 1170 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 1171 goto out_put_workpath; 1172 } 1173 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 1174 } 1175 err = -ENOMEM; 1176 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 1177 if (!lowertmp) 1178 goto out_put_workpath; 1179 1180 err = -EINVAL; 1181 stacklen = ovl_split_lowerdirs(lowertmp); 1182 if (stacklen > OVL_MAX_STACK) { 1183 pr_err("overlayfs: too many lower directories, limit is %d\n", 1184 OVL_MAX_STACK); 1185 goto out_free_lowertmp; 1186 } else if (!ufs->config.upperdir && stacklen == 1) { 1187 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1188 goto out_free_lowertmp; 1189 } 1190 1191 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1192 if (!stack) 1193 goto out_free_lowertmp; 1194 1195 lower = lowertmp; 1196 for (numlower = 0; numlower < stacklen; numlower++) { 1197 err = ovl_lower_dir(lower, &stack[numlower], 1198 &ufs->lower_namelen, &sb->s_stack_depth, 1199 &remote); 1200 if (err) 1201 goto out_put_lowerpath; 1202 1203 lower = strchr(lower, '\0') + 1; 1204 } 1205 1206 err = -EINVAL; 1207 sb->s_stack_depth++; 1208 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1209 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 1210 goto out_put_lowerpath; 1211 } 1212 1213 if (ufs->config.upperdir) { 1214 ufs->upper_mnt = clone_private_mount(&upperpath); 1215 err = PTR_ERR(ufs->upper_mnt); 1216 if (IS_ERR(ufs->upper_mnt)) { 1217 pr_err("overlayfs: failed to clone upperpath\n"); 1218 goto out_put_lowerpath; 1219 } 1220 /* Don't inherit atime flags */ 1221 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1222 1223 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 1224 1225 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); 1226 err = PTR_ERR(ufs->workdir); 1227 if (IS_ERR(ufs->workdir)) { 1228 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 1229 ufs->config.workdir, OVL_WORKDIR_NAME, -err); 1230 sb->s_flags |= MS_RDONLY; 1231 ufs->workdir = NULL; 1232 } 1233 1234 /* 1235 * Upper should support d_type, else whiteouts are visible. 1236 * Given workdir and upper are on same fs, we can do 1237 * iterate_dir() on workdir. This check requires successful 1238 * creation of workdir in previous step. 1239 */ 1240 if (ufs->workdir) { 1241 err = ovl_check_d_type_supported(&workpath); 1242 if (err < 0) 1243 goto out_put_workdir; 1244 1245 /* 1246 * We allowed this configuration and don't want to 1247 * break users over kernel upgrade. So warn instead 1248 * of erroring out. 1249 */ 1250 if (!err) 1251 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 1252 } 1253 } 1254 1255 err = -ENOMEM; 1256 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1257 if (ufs->lower_mnt == NULL) 1258 goto out_put_workdir; 1259 for (i = 0; i < numlower; i++) { 1260 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1261 1262 err = PTR_ERR(mnt); 1263 if (IS_ERR(mnt)) { 1264 pr_err("overlayfs: failed to clone lowerpath\n"); 1265 goto out_put_lower_mnt; 1266 } 1267 /* 1268 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1269 * will fail instead of modifying lower fs. 1270 */ 1271 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1272 1273 ufs->lower_mnt[ufs->numlower] = mnt; 1274 ufs->numlower++; 1275 } 1276 1277 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1278 if (!ufs->upper_mnt) 1279 sb->s_flags |= MS_RDONLY; 1280 1281 if (remote) 1282 sb->s_d_op = &ovl_reval_dentry_operations; 1283 else 1284 sb->s_d_op = &ovl_dentry_operations; 1285 1286 ufs->creator_cred = prepare_creds(); 1287 if (!ufs->creator_cred) 1288 goto out_put_lower_mnt; 1289 1290 err = -ENOMEM; 1291 oe = ovl_alloc_entry(numlower); 1292 if (!oe) 1293 goto out_put_cred; 1294 1295 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR)); 1296 if (!root_dentry) 1297 goto out_free_oe; 1298 1299 mntput(upperpath.mnt); 1300 for (i = 0; i < numlower; i++) 1301 mntput(stack[i].mnt); 1302 path_put(&workpath); 1303 kfree(lowertmp); 1304 1305 oe->__upperdentry = upperpath.dentry; 1306 for (i = 0; i < numlower; i++) { 1307 oe->lowerstack[i].dentry = stack[i].dentry; 1308 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1309 } 1310 kfree(stack); 1311 1312 root_dentry->d_fsdata = oe; 1313 1314 realinode = d_inode(ovl_dentry_real(root_dentry)); 1315 ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry); 1316 ovl_copyattr(realinode, d_inode(root_dentry)); 1317 1318 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1319 sb->s_op = &ovl_super_operations; 1320 sb->s_xattr = ovl_xattr_handlers; 1321 sb->s_root = root_dentry; 1322 sb->s_fs_info = ufs; 1323 sb->s_flags |= MS_POSIXACL; 1324 1325 return 0; 1326 1327 out_free_oe: 1328 kfree(oe); 1329 out_put_cred: 1330 put_cred(ufs->creator_cred); 1331 out_put_lower_mnt: 1332 for (i = 0; i < ufs->numlower; i++) 1333 mntput(ufs->lower_mnt[i]); 1334 kfree(ufs->lower_mnt); 1335 out_put_workdir: 1336 dput(ufs->workdir); 1337 mntput(ufs->upper_mnt); 1338 out_put_lowerpath: 1339 for (i = 0; i < numlower; i++) 1340 path_put(&stack[i]); 1341 kfree(stack); 1342 out_free_lowertmp: 1343 kfree(lowertmp); 1344 out_put_workpath: 1345 path_put(&workpath); 1346 out_put_upperpath: 1347 path_put(&upperpath); 1348 out_free_config: 1349 kfree(ufs->config.lowerdir); 1350 kfree(ufs->config.upperdir); 1351 kfree(ufs->config.workdir); 1352 kfree(ufs); 1353 out: 1354 return err; 1355 } 1356 1357 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1358 const char *dev_name, void *raw_data) 1359 { 1360 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1361 } 1362 1363 static struct file_system_type ovl_fs_type = { 1364 .owner = THIS_MODULE, 1365 .name = "overlay", 1366 .mount = ovl_mount, 1367 .kill_sb = kill_anon_super, 1368 }; 1369 MODULE_ALIAS_FS("overlay"); 1370 1371 static int __init ovl_init(void) 1372 { 1373 return register_filesystem(&ovl_fs_type); 1374 } 1375 1376 static void __exit ovl_exit(void) 1377 { 1378 unregister_filesystem(&ovl_fs_type); 1379 } 1380 1381 module_init(ovl_init); 1382 module_exit(ovl_exit); 1383