1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/namei.h> 12 #include <linux/pagemap.h> 13 #include <linux/xattr.h> 14 #include <linux/security.h> 15 #include <linux/mount.h> 16 #include <linux/slab.h> 17 #include <linux/parser.h> 18 #include <linux/module.h> 19 #include <linux/sched.h> 20 #include <linux/statfs.h> 21 #include <linux/seq_file.h> 22 #include <linux/posix_acl_xattr.h> 23 #include "overlayfs.h" 24 25 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 26 MODULE_DESCRIPTION("Overlay filesystem"); 27 MODULE_LICENSE("GPL"); 28 29 struct ovl_config { 30 char *lowerdir; 31 char *upperdir; 32 char *workdir; 33 bool default_permissions; 34 }; 35 36 /* private information held for overlayfs's superblock */ 37 struct ovl_fs { 38 struct vfsmount *upper_mnt; 39 unsigned numlower; 40 struct vfsmount **lower_mnt; 41 struct dentry *workdir; 42 long lower_namelen; 43 /* pathnames of lower and upper dirs, for show_options */ 44 struct ovl_config config; 45 /* creds of process who forced instantiation of super block */ 46 const struct cred *creator_cred; 47 }; 48 49 struct ovl_dir_cache; 50 51 /* private information held for every overlayfs dentry */ 52 struct ovl_entry { 53 struct dentry *__upperdentry; 54 struct ovl_dir_cache *cache; 55 union { 56 struct { 57 u64 version; 58 bool opaque; 59 }; 60 struct rcu_head rcu; 61 }; 62 unsigned numlower; 63 struct path lowerstack[]; 64 }; 65 66 #define OVL_MAX_STACK 500 67 68 static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) 69 { 70 return oe->numlower ? oe->lowerstack[0].dentry : NULL; 71 } 72 73 enum ovl_path_type ovl_path_type(struct dentry *dentry) 74 { 75 struct ovl_entry *oe = dentry->d_fsdata; 76 enum ovl_path_type type = 0; 77 78 if (oe->__upperdentry) { 79 type = __OVL_PATH_UPPER; 80 81 /* 82 * Non-dir dentry can hold lower dentry from previous 83 * location. Its purity depends only on opaque flag. 84 */ 85 if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode)) 86 type |= __OVL_PATH_MERGE; 87 else if (!oe->opaque) 88 type |= __OVL_PATH_PURE; 89 } else { 90 if (oe->numlower > 1) 91 type |= __OVL_PATH_MERGE; 92 } 93 return type; 94 } 95 96 static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) 97 { 98 return lockless_dereference(oe->__upperdentry); 99 } 100 101 void ovl_path_upper(struct dentry *dentry, struct path *path) 102 { 103 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 104 struct ovl_entry *oe = dentry->d_fsdata; 105 106 path->mnt = ofs->upper_mnt; 107 path->dentry = ovl_upperdentry_dereference(oe); 108 } 109 110 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) 111 { 112 enum ovl_path_type type = ovl_path_type(dentry); 113 114 if (!OVL_TYPE_UPPER(type)) 115 ovl_path_lower(dentry, path); 116 else 117 ovl_path_upper(dentry, path); 118 119 return type; 120 } 121 122 struct dentry *ovl_dentry_upper(struct dentry *dentry) 123 { 124 struct ovl_entry *oe = dentry->d_fsdata; 125 126 return ovl_upperdentry_dereference(oe); 127 } 128 129 struct dentry *ovl_dentry_lower(struct dentry *dentry) 130 { 131 struct ovl_entry *oe = dentry->d_fsdata; 132 133 return __ovl_dentry_lower(oe); 134 } 135 136 struct dentry *ovl_dentry_real(struct dentry *dentry) 137 { 138 struct ovl_entry *oe = dentry->d_fsdata; 139 struct dentry *realdentry; 140 141 realdentry = ovl_upperdentry_dereference(oe); 142 if (!realdentry) 143 realdentry = __ovl_dentry_lower(oe); 144 145 return realdentry; 146 } 147 148 static void ovl_inode_init(struct inode *inode, struct inode *realinode, 149 bool is_upper) 150 { 151 WRITE_ONCE(inode->i_private, (unsigned long) realinode | 152 (is_upper ? OVL_ISUPPER_MASK : 0)); 153 } 154 155 struct vfsmount *ovl_entry_mnt_real(struct ovl_entry *oe, struct inode *inode, 156 bool is_upper) 157 { 158 if (is_upper) { 159 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 160 161 return ofs->upper_mnt; 162 } else { 163 return oe->numlower ? oe->lowerstack[0].mnt : NULL; 164 } 165 } 166 167 struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) 168 { 169 struct ovl_entry *oe = dentry->d_fsdata; 170 171 return oe->cache; 172 } 173 174 void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) 175 { 176 struct ovl_entry *oe = dentry->d_fsdata; 177 178 oe->cache = cache; 179 } 180 181 void ovl_path_lower(struct dentry *dentry, struct path *path) 182 { 183 struct ovl_entry *oe = dentry->d_fsdata; 184 185 *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; 186 } 187 188 int ovl_want_write(struct dentry *dentry) 189 { 190 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 191 return mnt_want_write(ofs->upper_mnt); 192 } 193 194 void ovl_drop_write(struct dentry *dentry) 195 { 196 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 197 mnt_drop_write(ofs->upper_mnt); 198 } 199 200 struct dentry *ovl_workdir(struct dentry *dentry) 201 { 202 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 203 return ofs->workdir; 204 } 205 206 bool ovl_dentry_is_opaque(struct dentry *dentry) 207 { 208 struct ovl_entry *oe = dentry->d_fsdata; 209 return oe->opaque; 210 } 211 212 void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) 213 { 214 struct ovl_entry *oe = dentry->d_fsdata; 215 oe->opaque = opaque; 216 } 217 218 void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) 219 { 220 struct ovl_entry *oe = dentry->d_fsdata; 221 222 WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode)); 223 WARN_ON(oe->__upperdentry); 224 /* 225 * Make sure upperdentry is consistent before making it visible to 226 * ovl_upperdentry_dereference(). 227 */ 228 smp_wmb(); 229 oe->__upperdentry = upperdentry; 230 } 231 232 void ovl_inode_update(struct inode *inode, struct inode *upperinode) 233 { 234 WARN_ON(!upperinode); 235 WARN_ON(!inode_unhashed(inode)); 236 WRITE_ONCE(inode->i_private, 237 (unsigned long) upperinode | OVL_ISUPPER_MASK); 238 if (!S_ISDIR(upperinode->i_mode)) 239 __insert_inode_hash(inode, (unsigned long) upperinode); 240 } 241 242 void ovl_dentry_version_inc(struct dentry *dentry) 243 { 244 struct ovl_entry *oe = dentry->d_fsdata; 245 246 WARN_ON(!inode_is_locked(dentry->d_inode)); 247 oe->version++; 248 } 249 250 u64 ovl_dentry_version_get(struct dentry *dentry) 251 { 252 struct ovl_entry *oe = dentry->d_fsdata; 253 254 WARN_ON(!inode_is_locked(dentry->d_inode)); 255 return oe->version; 256 } 257 258 bool ovl_is_whiteout(struct dentry *dentry) 259 { 260 struct inode *inode = dentry->d_inode; 261 262 return inode && IS_WHITEOUT(inode); 263 } 264 265 const struct cred *ovl_override_creds(struct super_block *sb) 266 { 267 struct ovl_fs *ofs = sb->s_fs_info; 268 269 return override_creds(ofs->creator_cred); 270 } 271 272 static bool ovl_is_opaquedir(struct dentry *dentry) 273 { 274 int res; 275 char val; 276 277 if (!d_is_dir(dentry)) 278 return false; 279 280 res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); 281 if (res == 1 && val == 'y') 282 return true; 283 284 return false; 285 } 286 287 static void ovl_dentry_release(struct dentry *dentry) 288 { 289 struct ovl_entry *oe = dentry->d_fsdata; 290 291 if (oe) { 292 unsigned int i; 293 294 dput(oe->__upperdentry); 295 for (i = 0; i < oe->numlower; i++) 296 dput(oe->lowerstack[i].dentry); 297 kfree_rcu(oe, rcu); 298 } 299 } 300 301 static struct dentry *ovl_d_real(struct dentry *dentry, 302 const struct inode *inode, 303 unsigned int open_flags) 304 { 305 struct dentry *real; 306 307 if (d_is_dir(dentry)) { 308 if (!inode || inode == d_inode(dentry)) 309 return dentry; 310 goto bug; 311 } 312 313 if (d_is_negative(dentry)) 314 return dentry; 315 316 if (open_flags) { 317 int err = ovl_open_maybe_copy_up(dentry, open_flags); 318 319 if (err) 320 return ERR_PTR(err); 321 } 322 323 real = ovl_dentry_upper(dentry); 324 if (real && (!inode || inode == d_inode(real))) 325 return real; 326 327 real = ovl_dentry_lower(dentry); 328 if (!real) 329 goto bug; 330 331 if (!inode || inode == d_inode(real)) 332 return real; 333 334 /* Handle recursion */ 335 return d_real(real, inode, open_flags); 336 bug: 337 WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry, 338 inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0); 339 return dentry; 340 } 341 342 static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) 343 { 344 struct ovl_entry *oe = dentry->d_fsdata; 345 unsigned int i; 346 int ret = 1; 347 348 for (i = 0; i < oe->numlower; i++) { 349 struct dentry *d = oe->lowerstack[i].dentry; 350 351 if (d->d_flags & DCACHE_OP_REVALIDATE) { 352 ret = d->d_op->d_revalidate(d, flags); 353 if (ret < 0) 354 return ret; 355 if (!ret) { 356 if (!(flags & LOOKUP_RCU)) 357 d_invalidate(d); 358 return -ESTALE; 359 } 360 } 361 } 362 return 1; 363 } 364 365 static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags) 366 { 367 struct ovl_entry *oe = dentry->d_fsdata; 368 unsigned int i; 369 int ret = 1; 370 371 for (i = 0; i < oe->numlower; i++) { 372 struct dentry *d = oe->lowerstack[i].dentry; 373 374 if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) { 375 ret = d->d_op->d_weak_revalidate(d, flags); 376 if (ret <= 0) 377 break; 378 } 379 } 380 return ret; 381 } 382 383 static const struct dentry_operations ovl_dentry_operations = { 384 .d_release = ovl_dentry_release, 385 .d_real = ovl_d_real, 386 }; 387 388 static const struct dentry_operations ovl_reval_dentry_operations = { 389 .d_release = ovl_dentry_release, 390 .d_real = ovl_d_real, 391 .d_revalidate = ovl_dentry_revalidate, 392 .d_weak_revalidate = ovl_dentry_weak_revalidate, 393 }; 394 395 static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) 396 { 397 size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); 398 struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); 399 400 if (oe) 401 oe->numlower = numlower; 402 403 return oe; 404 } 405 406 static bool ovl_dentry_remote(struct dentry *dentry) 407 { 408 return dentry->d_flags & 409 (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE | 410 DCACHE_OP_REAL); 411 } 412 413 static bool ovl_dentry_weird(struct dentry *dentry) 414 { 415 return dentry->d_flags & (DCACHE_NEED_AUTOMOUNT | 416 DCACHE_MANAGE_TRANSIT | 417 DCACHE_OP_HASH | 418 DCACHE_OP_COMPARE); 419 } 420 421 static inline struct dentry *ovl_lookup_real(struct dentry *dir, 422 const struct qstr *name) 423 { 424 struct dentry *dentry; 425 426 dentry = lookup_one_len_unlocked(name->name, dir, name->len); 427 428 if (IS_ERR(dentry)) { 429 if (PTR_ERR(dentry) == -ENOENT) 430 dentry = NULL; 431 } else if (!dentry->d_inode) { 432 dput(dentry); 433 dentry = NULL; 434 } else if (ovl_dentry_weird(dentry)) { 435 dput(dentry); 436 /* Don't support traversing automounts and other weirdness */ 437 dentry = ERR_PTR(-EREMOTE); 438 } 439 return dentry; 440 } 441 442 /* 443 * Returns next layer in stack starting from top. 444 * Returns -1 if this is the last layer. 445 */ 446 int ovl_path_next(int idx, struct dentry *dentry, struct path *path) 447 { 448 struct ovl_entry *oe = dentry->d_fsdata; 449 450 BUG_ON(idx < 0); 451 if (idx == 0) { 452 ovl_path_upper(dentry, path); 453 if (path->dentry) 454 return oe->numlower ? 1 : -1; 455 idx++; 456 } 457 BUG_ON(idx > oe->numlower); 458 *path = oe->lowerstack[idx - 1]; 459 460 return (idx < oe->numlower) ? idx + 1 : -1; 461 } 462 463 struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, 464 unsigned int flags) 465 { 466 struct ovl_entry *oe; 467 const struct cred *old_cred; 468 struct ovl_entry *poe = dentry->d_parent->d_fsdata; 469 struct path *stack = NULL; 470 struct dentry *upperdir, *upperdentry = NULL; 471 unsigned int ctr = 0; 472 struct inode *inode = NULL; 473 bool upperopaque = false; 474 struct dentry *this, *prev = NULL; 475 unsigned int i; 476 int err; 477 478 old_cred = ovl_override_creds(dentry->d_sb); 479 upperdir = ovl_upperdentry_dereference(poe); 480 if (upperdir) { 481 this = ovl_lookup_real(upperdir, &dentry->d_name); 482 err = PTR_ERR(this); 483 if (IS_ERR(this)) 484 goto out; 485 486 if (this) { 487 if (unlikely(ovl_dentry_remote(this))) { 488 dput(this); 489 err = -EREMOTE; 490 goto out; 491 } 492 if (ovl_is_whiteout(this)) { 493 dput(this); 494 this = NULL; 495 upperopaque = true; 496 } else if (poe->numlower && ovl_is_opaquedir(this)) { 497 upperopaque = true; 498 } 499 } 500 upperdentry = prev = this; 501 } 502 503 if (!upperopaque && poe->numlower) { 504 err = -ENOMEM; 505 stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); 506 if (!stack) 507 goto out_put_upper; 508 } 509 510 for (i = 0; !upperopaque && i < poe->numlower; i++) { 511 bool opaque = false; 512 struct path lowerpath = poe->lowerstack[i]; 513 514 this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); 515 err = PTR_ERR(this); 516 if (IS_ERR(this)) { 517 /* 518 * If it's positive, then treat ENAMETOOLONG as ENOENT. 519 */ 520 if (err == -ENAMETOOLONG && (upperdentry || ctr)) 521 continue; 522 goto out_put; 523 } 524 if (!this) 525 continue; 526 if (ovl_is_whiteout(this)) { 527 dput(this); 528 break; 529 } 530 /* 531 * Only makes sense to check opaque dir if this is not the 532 * lowermost layer. 533 */ 534 if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) 535 opaque = true; 536 537 if (prev && (!S_ISDIR(prev->d_inode->i_mode) || 538 !S_ISDIR(this->d_inode->i_mode))) { 539 /* 540 * FIXME: check for upper-opaqueness maybe better done 541 * in remove code. 542 */ 543 if (prev == upperdentry) 544 upperopaque = true; 545 dput(this); 546 break; 547 } 548 /* 549 * If this is a non-directory then stop here. 550 */ 551 if (!S_ISDIR(this->d_inode->i_mode)) 552 opaque = true; 553 554 stack[ctr].dentry = this; 555 stack[ctr].mnt = lowerpath.mnt; 556 ctr++; 557 prev = this; 558 if (opaque) 559 break; 560 } 561 562 oe = ovl_alloc_entry(ctr); 563 err = -ENOMEM; 564 if (!oe) 565 goto out_put; 566 567 if (upperdentry || ctr) { 568 struct dentry *realdentry; 569 struct inode *realinode; 570 571 realdentry = upperdentry ? upperdentry : stack[0].dentry; 572 realinode = d_inode(realdentry); 573 574 err = -ENOMEM; 575 if (upperdentry && !d_is_dir(upperdentry)) { 576 inode = ovl_get_inode(dentry->d_sb, realinode); 577 } else { 578 inode = ovl_new_inode(dentry->d_sb, realinode->i_mode); 579 if (inode) 580 ovl_inode_init(inode, realinode, !!upperdentry); 581 } 582 if (!inode) 583 goto out_free_oe; 584 ovl_copyattr(realdentry->d_inode, inode); 585 } 586 587 revert_creds(old_cred); 588 oe->opaque = upperopaque; 589 oe->__upperdentry = upperdentry; 590 memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); 591 kfree(stack); 592 dentry->d_fsdata = oe; 593 d_add(dentry, inode); 594 595 return NULL; 596 597 out_free_oe: 598 kfree(oe); 599 out_put: 600 for (i = 0; i < ctr; i++) 601 dput(stack[i].dentry); 602 kfree(stack); 603 out_put_upper: 604 dput(upperdentry); 605 out: 606 revert_creds(old_cred); 607 return ERR_PTR(err); 608 } 609 610 struct file *ovl_path_open(struct path *path, int flags) 611 { 612 return dentry_open(path, flags | O_NOATIME, current_cred()); 613 } 614 615 static void ovl_put_super(struct super_block *sb) 616 { 617 struct ovl_fs *ufs = sb->s_fs_info; 618 unsigned i; 619 620 dput(ufs->workdir); 621 mntput(ufs->upper_mnt); 622 for (i = 0; i < ufs->numlower; i++) 623 mntput(ufs->lower_mnt[i]); 624 kfree(ufs->lower_mnt); 625 626 kfree(ufs->config.lowerdir); 627 kfree(ufs->config.upperdir); 628 kfree(ufs->config.workdir); 629 put_cred(ufs->creator_cred); 630 kfree(ufs); 631 } 632 633 /** 634 * ovl_statfs 635 * @sb: The overlayfs super block 636 * @buf: The struct kstatfs to fill in with stats 637 * 638 * Get the filesystem statistics. As writes always target the upper layer 639 * filesystem pass the statfs to the upper filesystem (if it exists) 640 */ 641 static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) 642 { 643 struct ovl_fs *ofs = dentry->d_sb->s_fs_info; 644 struct dentry *root_dentry = dentry->d_sb->s_root; 645 struct path path; 646 int err; 647 648 ovl_path_real(root_dentry, &path); 649 650 err = vfs_statfs(&path, buf); 651 if (!err) { 652 buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); 653 buf->f_type = OVERLAYFS_SUPER_MAGIC; 654 } 655 656 return err; 657 } 658 659 /** 660 * ovl_show_options 661 * 662 * Prints the mount options for a given superblock. 663 * Returns zero; does not fail. 664 */ 665 static int ovl_show_options(struct seq_file *m, struct dentry *dentry) 666 { 667 struct super_block *sb = dentry->d_sb; 668 struct ovl_fs *ufs = sb->s_fs_info; 669 670 seq_show_option(m, "lowerdir", ufs->config.lowerdir); 671 if (ufs->config.upperdir) { 672 seq_show_option(m, "upperdir", ufs->config.upperdir); 673 seq_show_option(m, "workdir", ufs->config.workdir); 674 } 675 if (ufs->config.default_permissions) 676 seq_puts(m, ",default_permissions"); 677 return 0; 678 } 679 680 static int ovl_remount(struct super_block *sb, int *flags, char *data) 681 { 682 struct ovl_fs *ufs = sb->s_fs_info; 683 684 if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) 685 return -EROFS; 686 687 return 0; 688 } 689 690 static const struct super_operations ovl_super_operations = { 691 .put_super = ovl_put_super, 692 .statfs = ovl_statfs, 693 .show_options = ovl_show_options, 694 .remount_fs = ovl_remount, 695 .drop_inode = generic_delete_inode, 696 }; 697 698 enum { 699 OPT_LOWERDIR, 700 OPT_UPPERDIR, 701 OPT_WORKDIR, 702 OPT_DEFAULT_PERMISSIONS, 703 OPT_ERR, 704 }; 705 706 static const match_table_t ovl_tokens = { 707 {OPT_LOWERDIR, "lowerdir=%s"}, 708 {OPT_UPPERDIR, "upperdir=%s"}, 709 {OPT_WORKDIR, "workdir=%s"}, 710 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 711 {OPT_ERR, NULL} 712 }; 713 714 static char *ovl_next_opt(char **s) 715 { 716 char *sbegin = *s; 717 char *p; 718 719 if (sbegin == NULL) 720 return NULL; 721 722 for (p = sbegin; *p; p++) { 723 if (*p == '\\') { 724 p++; 725 if (!*p) 726 break; 727 } else if (*p == ',') { 728 *p = '\0'; 729 *s = p + 1; 730 return sbegin; 731 } 732 } 733 *s = NULL; 734 return sbegin; 735 } 736 737 static int ovl_parse_opt(char *opt, struct ovl_config *config) 738 { 739 char *p; 740 741 while ((p = ovl_next_opt(&opt)) != NULL) { 742 int token; 743 substring_t args[MAX_OPT_ARGS]; 744 745 if (!*p) 746 continue; 747 748 token = match_token(p, ovl_tokens, args); 749 switch (token) { 750 case OPT_UPPERDIR: 751 kfree(config->upperdir); 752 config->upperdir = match_strdup(&args[0]); 753 if (!config->upperdir) 754 return -ENOMEM; 755 break; 756 757 case OPT_LOWERDIR: 758 kfree(config->lowerdir); 759 config->lowerdir = match_strdup(&args[0]); 760 if (!config->lowerdir) 761 return -ENOMEM; 762 break; 763 764 case OPT_WORKDIR: 765 kfree(config->workdir); 766 config->workdir = match_strdup(&args[0]); 767 if (!config->workdir) 768 return -ENOMEM; 769 break; 770 771 case OPT_DEFAULT_PERMISSIONS: 772 config->default_permissions = true; 773 break; 774 775 default: 776 pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); 777 return -EINVAL; 778 } 779 } 780 781 /* Workdir is useless in non-upper mount */ 782 if (!config->upperdir && config->workdir) { 783 pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", 784 config->workdir); 785 kfree(config->workdir); 786 config->workdir = NULL; 787 } 788 789 return 0; 790 } 791 792 #define OVL_WORKDIR_NAME "work" 793 794 static struct dentry *ovl_workdir_create(struct vfsmount *mnt, 795 struct dentry *dentry) 796 { 797 struct inode *dir = dentry->d_inode; 798 struct dentry *work; 799 int err; 800 bool retried = false; 801 802 err = mnt_want_write(mnt); 803 if (err) 804 return ERR_PTR(err); 805 806 inode_lock_nested(dir, I_MUTEX_PARENT); 807 retry: 808 work = lookup_one_len(OVL_WORKDIR_NAME, dentry, 809 strlen(OVL_WORKDIR_NAME)); 810 811 if (!IS_ERR(work)) { 812 struct kstat stat = { 813 .mode = S_IFDIR | 0, 814 }; 815 struct iattr attr = { 816 .ia_valid = ATTR_MODE, 817 .ia_mode = stat.mode, 818 }; 819 820 if (work->d_inode) { 821 err = -EEXIST; 822 if (retried) 823 goto out_dput; 824 825 retried = true; 826 ovl_workdir_cleanup(dir, mnt, work, 0); 827 dput(work); 828 goto retry; 829 } 830 831 err = ovl_create_real(dir, work, &stat, NULL, NULL, true); 832 if (err) 833 goto out_dput; 834 835 /* 836 * Try to remove POSIX ACL xattrs from workdir. We are good if: 837 * 838 * a) success (there was a POSIX ACL xattr and was removed) 839 * b) -ENODATA (there was no POSIX ACL xattr) 840 * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported) 841 * 842 * There are various other error values that could effectively 843 * mean that the xattr doesn't exist (e.g. -ERANGE is returned 844 * if the xattr name is too long), but the set of filesystems 845 * allowed as upper are limited to "normal" ones, where checking 846 * for the above two errors is sufficient. 847 */ 848 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT); 849 if (err && err != -ENODATA && err != -EOPNOTSUPP) 850 goto out_dput; 851 852 err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS); 853 if (err && err != -ENODATA && err != -EOPNOTSUPP) 854 goto out_dput; 855 856 /* Clear any inherited mode bits */ 857 inode_lock(work->d_inode); 858 err = notify_change(work, &attr, NULL); 859 inode_unlock(work->d_inode); 860 if (err) 861 goto out_dput; 862 } 863 out_unlock: 864 inode_unlock(dir); 865 mnt_drop_write(mnt); 866 867 return work; 868 869 out_dput: 870 dput(work); 871 work = ERR_PTR(err); 872 goto out_unlock; 873 } 874 875 static void ovl_unescape(char *s) 876 { 877 char *d = s; 878 879 for (;; s++, d++) { 880 if (*s == '\\') 881 s++; 882 *d = *s; 883 if (!*s) 884 break; 885 } 886 } 887 888 static int ovl_mount_dir_noesc(const char *name, struct path *path) 889 { 890 int err = -EINVAL; 891 892 if (!*name) { 893 pr_err("overlayfs: empty lowerdir\n"); 894 goto out; 895 } 896 err = kern_path(name, LOOKUP_FOLLOW, path); 897 if (err) { 898 pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); 899 goto out; 900 } 901 err = -EINVAL; 902 if (ovl_dentry_weird(path->dentry)) { 903 pr_err("overlayfs: filesystem on '%s' not supported\n", name); 904 goto out_put; 905 } 906 if (!S_ISDIR(path->dentry->d_inode->i_mode)) { 907 pr_err("overlayfs: '%s' not a directory\n", name); 908 goto out_put; 909 } 910 return 0; 911 912 out_put: 913 path_put(path); 914 out: 915 return err; 916 } 917 918 static int ovl_mount_dir(const char *name, struct path *path) 919 { 920 int err = -ENOMEM; 921 char *tmp = kstrdup(name, GFP_KERNEL); 922 923 if (tmp) { 924 ovl_unescape(tmp); 925 err = ovl_mount_dir_noesc(tmp, path); 926 927 if (!err) 928 if (ovl_dentry_remote(path->dentry)) { 929 pr_err("overlayfs: filesystem on '%s' not supported as upperdir\n", 930 tmp); 931 path_put(path); 932 err = -EINVAL; 933 } 934 kfree(tmp); 935 } 936 return err; 937 } 938 939 static int ovl_lower_dir(const char *name, struct path *path, long *namelen, 940 int *stack_depth, bool *remote) 941 { 942 int err; 943 struct kstatfs statfs; 944 945 err = ovl_mount_dir_noesc(name, path); 946 if (err) 947 goto out; 948 949 err = vfs_statfs(path, &statfs); 950 if (err) { 951 pr_err("overlayfs: statfs failed on '%s'\n", name); 952 goto out_put; 953 } 954 *namelen = max(*namelen, statfs.f_namelen); 955 *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); 956 957 if (ovl_dentry_remote(path->dentry)) 958 *remote = true; 959 960 return 0; 961 962 out_put: 963 path_put(path); 964 out: 965 return err; 966 } 967 968 /* Workdir should not be subdir of upperdir and vice versa */ 969 static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) 970 { 971 bool ok = false; 972 973 if (workdir != upperdir) { 974 ok = (lock_rename(workdir, upperdir) == NULL); 975 unlock_rename(workdir, upperdir); 976 } 977 return ok; 978 } 979 980 static unsigned int ovl_split_lowerdirs(char *str) 981 { 982 unsigned int ctr = 1; 983 char *s, *d; 984 985 for (s = d = str;; s++, d++) { 986 if (*s == '\\') { 987 s++; 988 } else if (*s == ':') { 989 *d = '\0'; 990 ctr++; 991 continue; 992 } 993 *d = *s; 994 if (!*s) 995 break; 996 } 997 return ctr; 998 } 999 1000 static int __maybe_unused 1001 ovl_posix_acl_xattr_get(const struct xattr_handler *handler, 1002 struct dentry *dentry, struct inode *inode, 1003 const char *name, void *buffer, size_t size) 1004 { 1005 return ovl_xattr_get(dentry, handler->name, buffer, size); 1006 } 1007 1008 static int __maybe_unused 1009 ovl_posix_acl_xattr_set(const struct xattr_handler *handler, 1010 struct dentry *dentry, struct inode *inode, 1011 const char *name, const void *value, 1012 size_t size, int flags) 1013 { 1014 struct dentry *workdir = ovl_workdir(dentry); 1015 struct inode *realinode = ovl_inode_real(inode, NULL); 1016 struct posix_acl *acl = NULL; 1017 int err; 1018 1019 /* Check that everything is OK before copy-up */ 1020 if (value) { 1021 acl = posix_acl_from_xattr(&init_user_ns, value, size); 1022 if (IS_ERR(acl)) 1023 return PTR_ERR(acl); 1024 } 1025 err = -EOPNOTSUPP; 1026 if (!IS_POSIXACL(d_inode(workdir))) 1027 goto out_acl_release; 1028 if (!realinode->i_op->set_acl) 1029 goto out_acl_release; 1030 if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) { 1031 err = acl ? -EACCES : 0; 1032 goto out_acl_release; 1033 } 1034 err = -EPERM; 1035 if (!inode_owner_or_capable(inode)) 1036 goto out_acl_release; 1037 1038 posix_acl_release(acl); 1039 1040 err = ovl_xattr_set(dentry, handler->name, value, size, flags); 1041 if (!err) 1042 ovl_copyattr(ovl_inode_real(inode, NULL), inode); 1043 1044 return err; 1045 1046 out_acl_release: 1047 posix_acl_release(acl); 1048 return err; 1049 } 1050 1051 static int ovl_own_xattr_get(const struct xattr_handler *handler, 1052 struct dentry *dentry, struct inode *inode, 1053 const char *name, void *buffer, size_t size) 1054 { 1055 return -EPERM; 1056 } 1057 1058 static int ovl_own_xattr_set(const struct xattr_handler *handler, 1059 struct dentry *dentry, struct inode *inode, 1060 const char *name, const void *value, 1061 size_t size, int flags) 1062 { 1063 return -EPERM; 1064 } 1065 1066 static int ovl_other_xattr_get(const struct xattr_handler *handler, 1067 struct dentry *dentry, struct inode *inode, 1068 const char *name, void *buffer, size_t size) 1069 { 1070 return ovl_xattr_get(dentry, name, buffer, size); 1071 } 1072 1073 static int ovl_other_xattr_set(const struct xattr_handler *handler, 1074 struct dentry *dentry, struct inode *inode, 1075 const char *name, const void *value, 1076 size_t size, int flags) 1077 { 1078 return ovl_xattr_set(dentry, name, value, size, flags); 1079 } 1080 1081 static const struct xattr_handler __maybe_unused 1082 ovl_posix_acl_access_xattr_handler = { 1083 .name = XATTR_NAME_POSIX_ACL_ACCESS, 1084 .flags = ACL_TYPE_ACCESS, 1085 .get = ovl_posix_acl_xattr_get, 1086 .set = ovl_posix_acl_xattr_set, 1087 }; 1088 1089 static const struct xattr_handler __maybe_unused 1090 ovl_posix_acl_default_xattr_handler = { 1091 .name = XATTR_NAME_POSIX_ACL_DEFAULT, 1092 .flags = ACL_TYPE_DEFAULT, 1093 .get = ovl_posix_acl_xattr_get, 1094 .set = ovl_posix_acl_xattr_set, 1095 }; 1096 1097 static const struct xattr_handler ovl_own_xattr_handler = { 1098 .prefix = OVL_XATTR_PREFIX, 1099 .get = ovl_own_xattr_get, 1100 .set = ovl_own_xattr_set, 1101 }; 1102 1103 static const struct xattr_handler ovl_other_xattr_handler = { 1104 .prefix = "", /* catch all */ 1105 .get = ovl_other_xattr_get, 1106 .set = ovl_other_xattr_set, 1107 }; 1108 1109 static const struct xattr_handler *ovl_xattr_handlers[] = { 1110 #ifdef CONFIG_FS_POSIX_ACL 1111 &ovl_posix_acl_access_xattr_handler, 1112 &ovl_posix_acl_default_xattr_handler, 1113 #endif 1114 &ovl_own_xattr_handler, 1115 &ovl_other_xattr_handler, 1116 NULL 1117 }; 1118 1119 static int ovl_fill_super(struct super_block *sb, void *data, int silent) 1120 { 1121 struct path upperpath = { NULL, NULL }; 1122 struct path workpath = { NULL, NULL }; 1123 struct dentry *root_dentry; 1124 struct inode *realinode; 1125 struct ovl_entry *oe; 1126 struct ovl_fs *ufs; 1127 struct path *stack = NULL; 1128 char *lowertmp; 1129 char *lower; 1130 unsigned int numlower; 1131 unsigned int stacklen = 0; 1132 unsigned int i; 1133 bool remote = false; 1134 int err; 1135 1136 err = -ENOMEM; 1137 ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); 1138 if (!ufs) 1139 goto out; 1140 1141 err = ovl_parse_opt((char *) data, &ufs->config); 1142 if (err) 1143 goto out_free_config; 1144 1145 err = -EINVAL; 1146 if (!ufs->config.lowerdir) { 1147 if (!silent) 1148 pr_err("overlayfs: missing 'lowerdir'\n"); 1149 goto out_free_config; 1150 } 1151 1152 sb->s_stack_depth = 0; 1153 sb->s_maxbytes = MAX_LFS_FILESIZE; 1154 if (ufs->config.upperdir) { 1155 if (!ufs->config.workdir) { 1156 pr_err("overlayfs: missing 'workdir'\n"); 1157 goto out_free_config; 1158 } 1159 1160 err = ovl_mount_dir(ufs->config.upperdir, &upperpath); 1161 if (err) 1162 goto out_free_config; 1163 1164 /* Upper fs should not be r/o */ 1165 if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { 1166 pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); 1167 err = -EINVAL; 1168 goto out_put_upperpath; 1169 } 1170 1171 err = ovl_mount_dir(ufs->config.workdir, &workpath); 1172 if (err) 1173 goto out_put_upperpath; 1174 1175 err = -EINVAL; 1176 if (upperpath.mnt != workpath.mnt) { 1177 pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); 1178 goto out_put_workpath; 1179 } 1180 if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { 1181 pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); 1182 goto out_put_workpath; 1183 } 1184 sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; 1185 } 1186 err = -ENOMEM; 1187 lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); 1188 if (!lowertmp) 1189 goto out_put_workpath; 1190 1191 err = -EINVAL; 1192 stacklen = ovl_split_lowerdirs(lowertmp); 1193 if (stacklen > OVL_MAX_STACK) { 1194 pr_err("overlayfs: too many lower directories, limit is %d\n", 1195 OVL_MAX_STACK); 1196 goto out_free_lowertmp; 1197 } else if (!ufs->config.upperdir && stacklen == 1) { 1198 pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); 1199 goto out_free_lowertmp; 1200 } 1201 1202 stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); 1203 if (!stack) 1204 goto out_free_lowertmp; 1205 1206 lower = lowertmp; 1207 for (numlower = 0; numlower < stacklen; numlower++) { 1208 err = ovl_lower_dir(lower, &stack[numlower], 1209 &ufs->lower_namelen, &sb->s_stack_depth, 1210 &remote); 1211 if (err) 1212 goto out_put_lowerpath; 1213 1214 lower = strchr(lower, '\0') + 1; 1215 } 1216 1217 err = -EINVAL; 1218 sb->s_stack_depth++; 1219 if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { 1220 pr_err("overlayfs: maximum fs stacking depth exceeded\n"); 1221 goto out_put_lowerpath; 1222 } 1223 1224 if (ufs->config.upperdir) { 1225 ufs->upper_mnt = clone_private_mount(&upperpath); 1226 err = PTR_ERR(ufs->upper_mnt); 1227 if (IS_ERR(ufs->upper_mnt)) { 1228 pr_err("overlayfs: failed to clone upperpath\n"); 1229 goto out_put_lowerpath; 1230 } 1231 /* Don't inherit atime flags */ 1232 ufs->upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME); 1233 1234 sb->s_time_gran = ufs->upper_mnt->mnt_sb->s_time_gran; 1235 1236 ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); 1237 err = PTR_ERR(ufs->workdir); 1238 if (IS_ERR(ufs->workdir)) { 1239 pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", 1240 ufs->config.workdir, OVL_WORKDIR_NAME, -err); 1241 sb->s_flags |= MS_RDONLY; 1242 ufs->workdir = NULL; 1243 } 1244 1245 /* 1246 * Upper should support d_type, else whiteouts are visible. 1247 * Given workdir and upper are on same fs, we can do 1248 * iterate_dir() on workdir. This check requires successful 1249 * creation of workdir in previous step. 1250 */ 1251 if (ufs->workdir) { 1252 err = ovl_check_d_type_supported(&workpath); 1253 if (err < 0) 1254 goto out_put_workdir; 1255 1256 /* 1257 * We allowed this configuration and don't want to 1258 * break users over kernel upgrade. So warn instead 1259 * of erroring out. 1260 */ 1261 if (!err) 1262 pr_warn("overlayfs: upper fs needs to support d_type.\n"); 1263 } 1264 } 1265 1266 err = -ENOMEM; 1267 ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); 1268 if (ufs->lower_mnt == NULL) 1269 goto out_put_workdir; 1270 for (i = 0; i < numlower; i++) { 1271 struct vfsmount *mnt = clone_private_mount(&stack[i]); 1272 1273 err = PTR_ERR(mnt); 1274 if (IS_ERR(mnt)) { 1275 pr_err("overlayfs: failed to clone lowerpath\n"); 1276 goto out_put_lower_mnt; 1277 } 1278 /* 1279 * Make lower_mnt R/O. That way fchmod/fchown on lower file 1280 * will fail instead of modifying lower fs. 1281 */ 1282 mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME; 1283 1284 ufs->lower_mnt[ufs->numlower] = mnt; 1285 ufs->numlower++; 1286 } 1287 1288 /* If the upper fs is nonexistent, we mark overlayfs r/o too */ 1289 if (!ufs->upper_mnt) 1290 sb->s_flags |= MS_RDONLY; 1291 1292 if (remote) 1293 sb->s_d_op = &ovl_reval_dentry_operations; 1294 else 1295 sb->s_d_op = &ovl_dentry_operations; 1296 1297 ufs->creator_cred = prepare_creds(); 1298 if (!ufs->creator_cred) 1299 goto out_put_lower_mnt; 1300 1301 err = -ENOMEM; 1302 oe = ovl_alloc_entry(numlower); 1303 if (!oe) 1304 goto out_put_cred; 1305 1306 sb->s_magic = OVERLAYFS_SUPER_MAGIC; 1307 sb->s_op = &ovl_super_operations; 1308 sb->s_xattr = ovl_xattr_handlers; 1309 sb->s_fs_info = ufs; 1310 sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; 1311 1312 root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR)); 1313 if (!root_dentry) 1314 goto out_free_oe; 1315 1316 mntput(upperpath.mnt); 1317 for (i = 0; i < numlower; i++) 1318 mntput(stack[i].mnt); 1319 path_put(&workpath); 1320 kfree(lowertmp); 1321 1322 oe->__upperdentry = upperpath.dentry; 1323 for (i = 0; i < numlower; i++) { 1324 oe->lowerstack[i].dentry = stack[i].dentry; 1325 oe->lowerstack[i].mnt = ufs->lower_mnt[i]; 1326 } 1327 kfree(stack); 1328 1329 root_dentry->d_fsdata = oe; 1330 1331 realinode = d_inode(ovl_dentry_real(root_dentry)); 1332 ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry); 1333 ovl_copyattr(realinode, d_inode(root_dentry)); 1334 1335 sb->s_root = root_dentry; 1336 1337 return 0; 1338 1339 out_free_oe: 1340 kfree(oe); 1341 out_put_cred: 1342 put_cred(ufs->creator_cred); 1343 out_put_lower_mnt: 1344 for (i = 0; i < ufs->numlower; i++) 1345 mntput(ufs->lower_mnt[i]); 1346 kfree(ufs->lower_mnt); 1347 out_put_workdir: 1348 dput(ufs->workdir); 1349 mntput(ufs->upper_mnt); 1350 out_put_lowerpath: 1351 for (i = 0; i < numlower; i++) 1352 path_put(&stack[i]); 1353 kfree(stack); 1354 out_free_lowertmp: 1355 kfree(lowertmp); 1356 out_put_workpath: 1357 path_put(&workpath); 1358 out_put_upperpath: 1359 path_put(&upperpath); 1360 out_free_config: 1361 kfree(ufs->config.lowerdir); 1362 kfree(ufs->config.upperdir); 1363 kfree(ufs->config.workdir); 1364 kfree(ufs); 1365 out: 1366 return err; 1367 } 1368 1369 static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, 1370 const char *dev_name, void *raw_data) 1371 { 1372 return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); 1373 } 1374 1375 static struct file_system_type ovl_fs_type = { 1376 .owner = THIS_MODULE, 1377 .name = "overlay", 1378 .mount = ovl_mount, 1379 .kill_sb = kill_anon_super, 1380 }; 1381 MODULE_ALIAS_FS("overlay"); 1382 1383 static int __init ovl_init(void) 1384 { 1385 return register_filesystem(&ovl_fs_type); 1386 } 1387 1388 static void __exit ovl_exit(void) 1389 { 1390 unregister_filesystem(&ovl_fs_type); 1391 } 1392 1393 module_init(ovl_init); 1394 module_exit(ovl_exit); 1395