1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/cred.h> 10 #include <linux/xattr.h> 11 #include <linux/posix_acl.h> 12 #include <linux/ratelimit.h> 13 #include "overlayfs.h" 14 15 16 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 17 { 18 int err; 19 bool full_copy_up = false; 20 struct dentry *upperdentry; 21 const struct cred *old_cred; 22 23 err = setattr_prepare(dentry, attr); 24 if (err) 25 return err; 26 27 err = ovl_want_write(dentry); 28 if (err) 29 goto out; 30 31 if (attr->ia_valid & ATTR_SIZE) { 32 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 33 34 err = -ETXTBSY; 35 if (atomic_read(&realinode->i_writecount) < 0) 36 goto out_drop_write; 37 38 /* Truncate should trigger data copy up as well */ 39 full_copy_up = true; 40 } 41 42 if (!full_copy_up) 43 err = ovl_copy_up(dentry); 44 else 45 err = ovl_copy_up_with_data(dentry); 46 if (!err) { 47 struct inode *winode = NULL; 48 49 upperdentry = ovl_dentry_upper(dentry); 50 51 if (attr->ia_valid & ATTR_SIZE) { 52 winode = d_inode(upperdentry); 53 err = get_write_access(winode); 54 if (err) 55 goto out_drop_write; 56 } 57 58 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 59 attr->ia_valid &= ~ATTR_MODE; 60 61 inode_lock(upperdentry->d_inode); 62 old_cred = ovl_override_creds(dentry->d_sb); 63 err = notify_change(upperdentry, attr, NULL); 64 revert_creds(old_cred); 65 if (!err) 66 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 67 inode_unlock(upperdentry->d_inode); 68 69 if (winode) 70 put_write_access(winode); 71 } 72 out_drop_write: 73 ovl_drop_write(dentry); 74 out: 75 return err; 76 } 77 78 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 79 struct ovl_layer *lower_layer) 80 { 81 bool samefs = ovl_same_sb(dentry->d_sb); 82 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 83 84 if (samefs) { 85 /* 86 * When all layers are on the same fs, all real inode 87 * number are unique, so we use the overlay st_dev, 88 * which is friendly to du -x. 89 */ 90 stat->dev = dentry->d_sb->s_dev; 91 return 0; 92 } else if (xinobits) { 93 unsigned int shift = 64 - xinobits; 94 /* 95 * All inode numbers of underlying fs should not be using the 96 * high xinobits, so we use high xinobits to partition the 97 * overlay st_ino address space. The high bits holds the fsid 98 * (upper fsid is 0). This way overlay inode numbers are unique 99 * and all inodes use overlay st_dev. Inode numbers are also 100 * persistent for a given layer configuration. 101 */ 102 if (stat->ino >> shift) { 103 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 104 dentry, stat->ino, xinobits); 105 } else { 106 if (lower_layer) 107 stat->ino |= ((u64)lower_layer->fsid) << shift; 108 109 stat->dev = dentry->d_sb->s_dev; 110 return 0; 111 } 112 } 113 114 /* The inode could not be mapped to a unified st_ino address space */ 115 if (S_ISDIR(dentry->d_inode->i_mode)) { 116 /* 117 * Always use the overlay st_dev for directories, so 'find 118 * -xdev' will scan the entire overlay mount and won't cross the 119 * overlay mount boundaries. 120 * 121 * If not all layers are on the same fs the pair {real st_ino; 122 * overlay st_dev} is not unique, so use the non persistent 123 * overlay st_ino for directories. 124 */ 125 stat->dev = dentry->d_sb->s_dev; 126 stat->ino = dentry->d_inode->i_ino; 127 } else if (lower_layer && lower_layer->fsid) { 128 /* 129 * For non-samefs setup, if we cannot map all layers st_ino 130 * to a unified address space, we need to make sure that st_dev 131 * is unique per lower fs. Upper layer uses real st_dev and 132 * lower layers use the unique anonymous bdev assigned to the 133 * lower fs. 134 */ 135 stat->dev = lower_layer->fs->pseudo_dev; 136 } 137 138 return 0; 139 } 140 141 int ovl_getattr(const struct path *path, struct kstat *stat, 142 u32 request_mask, unsigned int flags) 143 { 144 struct dentry *dentry = path->dentry; 145 enum ovl_path_type type; 146 struct path realpath; 147 const struct cred *old_cred; 148 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 149 bool samefs = ovl_same_sb(dentry->d_sb); 150 struct ovl_layer *lower_layer = NULL; 151 int err; 152 bool metacopy_blocks = false; 153 154 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 155 156 type = ovl_path_real(dentry, &realpath); 157 old_cred = ovl_override_creds(dentry->d_sb); 158 err = vfs_getattr(&realpath, stat, request_mask, flags); 159 if (err) 160 goto out; 161 162 /* 163 * For non-dir or same fs, we use st_ino of the copy up origin. 164 * This guaranties constant st_dev/st_ino across copy up. 165 * With xino feature and non-samefs, we use st_ino of the copy up 166 * origin masked with high bits that represent the layer id. 167 * 168 * If lower filesystem supports NFS file handles, this also guaranties 169 * persistent st_ino across mount cycle. 170 */ 171 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 172 if (!OVL_TYPE_UPPER(type)) { 173 lower_layer = ovl_layer_lower(dentry); 174 } else if (OVL_TYPE_ORIGIN(type)) { 175 struct kstat lowerstat; 176 u32 lowermask = STATX_INO | STATX_BLOCKS | 177 (!is_dir ? STATX_NLINK : 0); 178 179 ovl_path_lower(dentry, &realpath); 180 err = vfs_getattr(&realpath, &lowerstat, 181 lowermask, flags); 182 if (err) 183 goto out; 184 185 /* 186 * Lower hardlinks may be broken on copy up to different 187 * upper files, so we cannot use the lower origin st_ino 188 * for those different files, even for the same fs case. 189 * 190 * Similarly, several redirected dirs can point to the 191 * same dir on a lower layer. With the "verify_lower" 192 * feature, we do not use the lower origin st_ino, if 193 * we haven't verified that this redirect is unique. 194 * 195 * With inodes index enabled, it is safe to use st_ino 196 * of an indexed origin. The index validates that the 197 * upper hardlink is not broken and that a redirected 198 * dir is the only redirect to that origin. 199 */ 200 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 201 (!ovl_verify_lower(dentry->d_sb) && 202 (is_dir || lowerstat.nlink == 1))) { 203 stat->ino = lowerstat.ino; 204 lower_layer = ovl_layer_lower(dentry); 205 } 206 207 /* 208 * If we are querying a metacopy dentry and lower 209 * dentry is data dentry, then use the blocks we 210 * queried just now. We don't have to do additional 211 * vfs_getattr(). If lower itself is metacopy, then 212 * additional vfs_getattr() is unavoidable. 213 */ 214 if (metacopy_blocks && 215 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 216 stat->blocks = lowerstat.blocks; 217 metacopy_blocks = false; 218 } 219 } 220 221 if (metacopy_blocks) { 222 /* 223 * If lower is not same as lowerdata or if there was 224 * no origin on upper, we can end up here. 225 */ 226 struct kstat lowerdatastat; 227 u32 lowermask = STATX_BLOCKS; 228 229 ovl_path_lowerdata(dentry, &realpath); 230 err = vfs_getattr(&realpath, &lowerdatastat, 231 lowermask, flags); 232 if (err) 233 goto out; 234 stat->blocks = lowerdatastat.blocks; 235 } 236 } 237 238 err = ovl_map_dev_ino(dentry, stat, lower_layer); 239 if (err) 240 goto out; 241 242 /* 243 * It's probably not worth it to count subdirs to get the 244 * correct link count. nlink=1 seems to pacify 'find' and 245 * other utilities. 246 */ 247 if (is_dir && OVL_TYPE_MERGE(type)) 248 stat->nlink = 1; 249 250 /* 251 * Return the overlay inode nlinks for indexed upper inodes. 252 * Overlay inode nlink counts the union of the upper hardlinks 253 * and non-covered lower hardlinks. It does not include the upper 254 * index hardlink. 255 */ 256 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 257 stat->nlink = dentry->d_inode->i_nlink; 258 259 out: 260 revert_creds(old_cred); 261 262 return err; 263 } 264 265 int ovl_permission(struct inode *inode, int mask) 266 { 267 struct inode *upperinode = ovl_inode_upper(inode); 268 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 269 const struct cred *old_cred; 270 int err; 271 272 /* Careful in RCU walk mode */ 273 if (!realinode) { 274 WARN_ON(!(mask & MAY_NOT_BLOCK)); 275 return -ECHILD; 276 } 277 278 /* 279 * Check overlay inode with the creds of task and underlying inode 280 * with creds of mounter 281 */ 282 err = generic_permission(inode, mask); 283 if (err) 284 return err; 285 286 old_cred = ovl_override_creds(inode->i_sb); 287 if (!upperinode && 288 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 289 mask &= ~(MAY_WRITE | MAY_APPEND); 290 /* Make sure mounter can read file for copy up later */ 291 mask |= MAY_READ; 292 } 293 err = inode_permission(realinode, mask); 294 revert_creds(old_cred); 295 296 return err; 297 } 298 299 static const char *ovl_get_link(struct dentry *dentry, 300 struct inode *inode, 301 struct delayed_call *done) 302 { 303 const struct cred *old_cred; 304 const char *p; 305 306 if (!dentry) 307 return ERR_PTR(-ECHILD); 308 309 old_cred = ovl_override_creds(dentry->d_sb); 310 p = vfs_get_link(ovl_dentry_real(dentry), done); 311 revert_creds(old_cred); 312 return p; 313 } 314 315 bool ovl_is_private_xattr(const char *name) 316 { 317 return strncmp(name, OVL_XATTR_PREFIX, 318 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 319 } 320 321 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 322 const void *value, size_t size, int flags) 323 { 324 int err; 325 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 326 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 327 const struct cred *old_cred; 328 329 err = ovl_want_write(dentry); 330 if (err) 331 goto out; 332 333 if (!value && !upperdentry) { 334 err = vfs_getxattr(realdentry, name, NULL, 0); 335 if (err < 0) 336 goto out_drop_write; 337 } 338 339 if (!upperdentry) { 340 err = ovl_copy_up(dentry); 341 if (err) 342 goto out_drop_write; 343 344 realdentry = ovl_dentry_upper(dentry); 345 } 346 347 old_cred = ovl_override_creds(dentry->d_sb); 348 if (value) 349 err = vfs_setxattr(realdentry, name, value, size, flags); 350 else { 351 WARN_ON(flags != XATTR_REPLACE); 352 err = vfs_removexattr(realdentry, name); 353 } 354 revert_creds(old_cred); 355 356 /* copy c/mtime */ 357 ovl_copyattr(d_inode(realdentry), inode); 358 359 out_drop_write: 360 ovl_drop_write(dentry); 361 out: 362 return err; 363 } 364 365 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 366 void *value, size_t size) 367 { 368 ssize_t res; 369 const struct cred *old_cred; 370 struct dentry *realdentry = 371 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 372 373 old_cred = ovl_override_creds(dentry->d_sb); 374 res = vfs_getxattr(realdentry, name, value, size); 375 revert_creds(old_cred); 376 return res; 377 } 378 379 static bool ovl_can_list(const char *s) 380 { 381 /* List all non-trusted xatts */ 382 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 383 return true; 384 385 /* Never list trusted.overlay, list other trusted for superuser only */ 386 return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); 387 } 388 389 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 390 { 391 struct dentry *realdentry = ovl_dentry_real(dentry); 392 ssize_t res; 393 size_t len; 394 char *s; 395 const struct cred *old_cred; 396 397 old_cred = ovl_override_creds(dentry->d_sb); 398 res = vfs_listxattr(realdentry, list, size); 399 revert_creds(old_cred); 400 if (res <= 0 || size == 0) 401 return res; 402 403 /* filter out private xattrs */ 404 for (s = list, len = res; len;) { 405 size_t slen = strnlen(s, len) + 1; 406 407 /* underlying fs providing us with an broken xattr list? */ 408 if (WARN_ON(slen > len)) 409 return -EIO; 410 411 len -= slen; 412 if (!ovl_can_list(s)) { 413 res -= slen; 414 memmove(s, s + slen, len); 415 } else { 416 s += slen; 417 } 418 } 419 420 return res; 421 } 422 423 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 424 { 425 struct inode *realinode = ovl_inode_real(inode); 426 const struct cred *old_cred; 427 struct posix_acl *acl; 428 429 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 430 return NULL; 431 432 old_cred = ovl_override_creds(inode->i_sb); 433 acl = get_acl(realinode, type); 434 revert_creds(old_cred); 435 436 return acl; 437 } 438 439 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 440 { 441 if (flags & S_ATIME) { 442 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 443 struct path upperpath = { 444 .mnt = ofs->upper_mnt, 445 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 446 }; 447 448 if (upperpath.dentry) { 449 touch_atime(&upperpath); 450 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 451 } 452 } 453 return 0; 454 } 455 456 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 457 u64 start, u64 len) 458 { 459 int err; 460 struct inode *realinode = ovl_inode_real(inode); 461 const struct cred *old_cred; 462 463 if (!realinode->i_op->fiemap) 464 return -EOPNOTSUPP; 465 466 old_cred = ovl_override_creds(inode->i_sb); 467 468 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) 469 filemap_write_and_wait(realinode->i_mapping); 470 471 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 472 revert_creds(old_cred); 473 474 return err; 475 } 476 477 static const struct inode_operations ovl_file_inode_operations = { 478 .setattr = ovl_setattr, 479 .permission = ovl_permission, 480 .getattr = ovl_getattr, 481 .listxattr = ovl_listxattr, 482 .get_acl = ovl_get_acl, 483 .update_time = ovl_update_time, 484 .fiemap = ovl_fiemap, 485 }; 486 487 static const struct inode_operations ovl_symlink_inode_operations = { 488 .setattr = ovl_setattr, 489 .get_link = ovl_get_link, 490 .getattr = ovl_getattr, 491 .listxattr = ovl_listxattr, 492 .update_time = ovl_update_time, 493 }; 494 495 static const struct inode_operations ovl_special_inode_operations = { 496 .setattr = ovl_setattr, 497 .permission = ovl_permission, 498 .getattr = ovl_getattr, 499 .listxattr = ovl_listxattr, 500 .get_acl = ovl_get_acl, 501 .update_time = ovl_update_time, 502 }; 503 504 static const struct address_space_operations ovl_aops = { 505 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 506 .direct_IO = noop_direct_IO, 507 }; 508 509 /* 510 * It is possible to stack overlayfs instance on top of another 511 * overlayfs instance as lower layer. We need to annonate the 512 * stackable i_mutex locks according to stack level of the super 513 * block instance. An overlayfs instance can never be in stack 514 * depth 0 (there is always a real fs below it). An overlayfs 515 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 516 * 517 * For example, here is a snip from /proc/lockdep_chains after 518 * dir_iterate of nested overlayfs: 519 * 520 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 521 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 522 * [...] &type->i_mutex_dir_key (stack_depth=0) 523 */ 524 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 525 526 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 527 { 528 #ifdef CONFIG_LOCKDEP 529 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 530 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 531 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 532 533 int depth = inode->i_sb->s_stack_depth - 1; 534 535 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 536 depth = 0; 537 538 if (S_ISDIR(inode->i_mode)) 539 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 540 else 541 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 542 543 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 544 #endif 545 } 546 547 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 548 unsigned long ino, int fsid) 549 { 550 int xinobits = ovl_xino_bits(inode->i_sb); 551 552 /* 553 * When d_ino is consistent with st_ino (samefs or i_ino has enough 554 * bits to encode layer), set the same value used for st_ino to i_ino, 555 * so inode number exposed via /proc/locks and a like will be 556 * consistent with d_ino and st_ino values. An i_ino value inconsistent 557 * with d_ino also causes nfsd readdirplus to fail. When called from 558 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 559 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 560 */ 561 if (ovl_same_sb(inode->i_sb) || xinobits) { 562 inode->i_ino = ino; 563 if (xinobits && fsid && !(ino >> (64 - xinobits))) 564 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 565 } else { 566 inode->i_ino = get_next_ino(); 567 } 568 inode->i_mode = mode; 569 inode->i_flags |= S_NOCMTIME; 570 #ifdef CONFIG_FS_POSIX_ACL 571 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 572 #endif 573 574 ovl_lockdep_annotate_inode_mutex_key(inode); 575 576 switch (mode & S_IFMT) { 577 case S_IFREG: 578 inode->i_op = &ovl_file_inode_operations; 579 inode->i_fop = &ovl_file_operations; 580 inode->i_mapping->a_ops = &ovl_aops; 581 break; 582 583 case S_IFDIR: 584 inode->i_op = &ovl_dir_inode_operations; 585 inode->i_fop = &ovl_dir_operations; 586 break; 587 588 case S_IFLNK: 589 inode->i_op = &ovl_symlink_inode_operations; 590 break; 591 592 default: 593 inode->i_op = &ovl_special_inode_operations; 594 init_special_inode(inode, mode, rdev); 595 break; 596 } 597 } 598 599 /* 600 * With inodes index enabled, an overlay inode nlink counts the union of upper 601 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 602 * upper inode, the following nlink modifying operations can happen: 603 * 604 * 1. Lower hardlink copy up 605 * 2. Upper hardlink created, unlinked or renamed over 606 * 3. Lower hardlink whiteout or renamed over 607 * 608 * For the first, copy up case, the union nlink does not change, whether the 609 * operation succeeds or fails, but the upper inode nlink may change. 610 * Therefore, before copy up, we store the union nlink value relative to the 611 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 612 * 613 * For the second, upper hardlink case, the union nlink should be incremented 614 * or decremented IFF the operation succeeds, aligned with nlink change of the 615 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 616 * value relative to the upper inode nlink in the index inode. 617 * 618 * For the last, lower cover up case, we simplify things by preceding the 619 * whiteout or cover up with copy up. This makes sure that there is an index 620 * upper inode where the nlink xattr can be stored before the copied up upper 621 * entry is unlink. 622 */ 623 #define OVL_NLINK_ADD_UPPER (1 << 0) 624 625 /* 626 * On-disk format for indexed nlink: 627 * 628 * nlink relative to the upper inode - "U[+-]NUM" 629 * nlink relative to the lower inode - "L[+-]NUM" 630 */ 631 632 static int ovl_set_nlink_common(struct dentry *dentry, 633 struct dentry *realdentry, const char *format) 634 { 635 struct inode *inode = d_inode(dentry); 636 struct inode *realinode = d_inode(realdentry); 637 char buf[13]; 638 int len; 639 640 len = snprintf(buf, sizeof(buf), format, 641 (int) (inode->i_nlink - realinode->i_nlink)); 642 643 if (WARN_ON(len >= sizeof(buf))) 644 return -EIO; 645 646 return ovl_do_setxattr(ovl_dentry_upper(dentry), 647 OVL_XATTR_NLINK, buf, len, 0); 648 } 649 650 int ovl_set_nlink_upper(struct dentry *dentry) 651 { 652 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 653 } 654 655 int ovl_set_nlink_lower(struct dentry *dentry) 656 { 657 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 658 } 659 660 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 661 struct dentry *upperdentry, 662 unsigned int fallback) 663 { 664 int nlink_diff; 665 int nlink; 666 char buf[13]; 667 int err; 668 669 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 670 return fallback; 671 672 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 673 if (err < 0) 674 goto fail; 675 676 buf[err] = '\0'; 677 if ((buf[0] != 'L' && buf[0] != 'U') || 678 (buf[1] != '+' && buf[1] != '-')) 679 goto fail; 680 681 err = kstrtoint(buf + 1, 10, &nlink_diff); 682 if (err < 0) 683 goto fail; 684 685 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 686 nlink += nlink_diff; 687 688 if (nlink <= 0) 689 goto fail; 690 691 return nlink; 692 693 fail: 694 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 695 upperdentry, err); 696 return fallback; 697 } 698 699 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 700 { 701 struct inode *inode; 702 703 inode = new_inode(sb); 704 if (inode) 705 ovl_fill_inode(inode, mode, rdev, 0, 0); 706 707 return inode; 708 } 709 710 static int ovl_inode_test(struct inode *inode, void *data) 711 { 712 return inode->i_private == data; 713 } 714 715 static int ovl_inode_set(struct inode *inode, void *data) 716 { 717 inode->i_private = data; 718 return 0; 719 } 720 721 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 722 struct dentry *upperdentry, bool strict) 723 { 724 /* 725 * For directories, @strict verify from lookup path performs consistency 726 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 727 * inode. Non @strict verify from NFS handle decode path passes NULL for 728 * 'unknown' lower/upper. 729 */ 730 if (S_ISDIR(inode->i_mode) && strict) { 731 /* Real lower dir moved to upper layer under us? */ 732 if (!lowerdentry && ovl_inode_lower(inode)) 733 return false; 734 735 /* Lookup of an uncovered redirect origin? */ 736 if (!upperdentry && ovl_inode_upper(inode)) 737 return false; 738 } 739 740 /* 741 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 742 * This happens when finding a copied up overlay inode for a renamed 743 * or hardlinked overlay dentry and lower dentry cannot be followed 744 * by origin because lower fs does not support file handles. 745 */ 746 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 747 return false; 748 749 /* 750 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 751 * This happens when finding a lower alias for a copied up hard link. 752 */ 753 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 754 return false; 755 756 return true; 757 } 758 759 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 760 bool is_upper) 761 { 762 struct inode *inode, *key = d_inode(real); 763 764 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 765 if (!inode) 766 return NULL; 767 768 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 769 is_upper ? real : NULL, false)) { 770 iput(inode); 771 return ERR_PTR(-ESTALE); 772 } 773 774 return inode; 775 } 776 777 bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 778 { 779 struct inode *key = d_inode(dir); 780 struct inode *trap; 781 bool res; 782 783 trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 784 if (!trap) 785 return false; 786 787 res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 788 !ovl_inode_lower(trap); 789 790 iput(trap); 791 return res; 792 } 793 794 /* 795 * Create an inode cache entry for layer root dir, that will intentionally 796 * fail ovl_verify_inode(), so any lookup that will find some layer root 797 * will fail. 798 */ 799 struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 800 { 801 struct inode *key = d_inode(dir); 802 struct inode *trap; 803 804 if (!d_is_dir(dir)) 805 return ERR_PTR(-ENOTDIR); 806 807 trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 808 ovl_inode_set, key); 809 if (!trap) 810 return ERR_PTR(-ENOMEM); 811 812 if (!(trap->i_state & I_NEW)) { 813 /* Conflicting layer roots? */ 814 iput(trap); 815 return ERR_PTR(-ELOOP); 816 } 817 818 trap->i_mode = S_IFDIR; 819 trap->i_flags = S_DEAD; 820 unlock_new_inode(trap); 821 822 return trap; 823 } 824 825 /* 826 * Does overlay inode need to be hashed by lower inode? 827 */ 828 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 829 struct dentry *lower, struct dentry *index) 830 { 831 struct ovl_fs *ofs = sb->s_fs_info; 832 833 /* No, if pure upper */ 834 if (!lower) 835 return false; 836 837 /* Yes, if already indexed */ 838 if (index) 839 return true; 840 841 /* Yes, if won't be copied up */ 842 if (!ofs->upper_mnt) 843 return true; 844 845 /* No, if lower hardlink is or will be broken on copy up */ 846 if ((upper || !ovl_indexdir(sb)) && 847 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 848 return false; 849 850 /* No, if non-indexed upper with NFS export */ 851 if (sb->s_export_op && upper) 852 return false; 853 854 /* Otherwise, hash by lower inode for fsnotify */ 855 return true; 856 } 857 858 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 859 struct inode *key) 860 { 861 return newinode ? inode_insert5(newinode, (unsigned long) key, 862 ovl_inode_test, ovl_inode_set, key) : 863 iget5_locked(sb, (unsigned long) key, 864 ovl_inode_test, ovl_inode_set, key); 865 } 866 867 struct inode *ovl_get_inode(struct super_block *sb, 868 struct ovl_inode_params *oip) 869 { 870 struct dentry *upperdentry = oip->upperdentry; 871 struct ovl_path *lowerpath = oip->lowerpath; 872 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 873 struct inode *inode; 874 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 875 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 876 oip->index); 877 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 878 bool is_dir, metacopy = false; 879 unsigned long ino = 0; 880 int err = oip->newinode ? -EEXIST : -ENOMEM; 881 882 if (!realinode) 883 realinode = d_inode(lowerdentry); 884 885 /* 886 * Copy up origin (lower) may exist for non-indexed upper, but we must 887 * not use lower as hash key if this is a broken hardlink. 888 */ 889 is_dir = S_ISDIR(realinode->i_mode); 890 if (upperdentry || bylower) { 891 struct inode *key = d_inode(bylower ? lowerdentry : 892 upperdentry); 893 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 894 895 inode = ovl_iget5(sb, oip->newinode, key); 896 if (!inode) 897 goto out_err; 898 if (!(inode->i_state & I_NEW)) { 899 /* 900 * Verify that the underlying files stored in the inode 901 * match those in the dentry. 902 */ 903 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 904 true)) { 905 iput(inode); 906 err = -ESTALE; 907 goto out_err; 908 } 909 910 dput(upperdentry); 911 kfree(oip->redirect); 912 goto out; 913 } 914 915 /* Recalculate nlink for non-dir due to indexing */ 916 if (!is_dir) 917 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 918 set_nlink(inode, nlink); 919 ino = key->i_ino; 920 } else { 921 /* Lower hardlink that will be broken on copy up */ 922 inode = new_inode(sb); 923 if (!inode) { 924 err = -ENOMEM; 925 goto out_err; 926 } 927 } 928 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 929 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 930 931 if (upperdentry && ovl_is_impuredir(upperdentry)) 932 ovl_set_flag(OVL_IMPURE, inode); 933 934 if (oip->index) 935 ovl_set_flag(OVL_INDEX, inode); 936 937 if (upperdentry) { 938 err = ovl_check_metacopy_xattr(upperdentry); 939 if (err < 0) 940 goto out_err; 941 metacopy = err; 942 if (!metacopy) 943 ovl_set_flag(OVL_UPPERDATA, inode); 944 } 945 946 OVL_I(inode)->redirect = oip->redirect; 947 948 if (bylower) 949 ovl_set_flag(OVL_CONST_INO, inode); 950 951 /* Check for non-merge dir that may have whiteouts */ 952 if (is_dir) { 953 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 954 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 955 ovl_set_flag(OVL_WHITEOUTS, inode); 956 } 957 } 958 959 if (inode->i_state & I_NEW) 960 unlock_new_inode(inode); 961 out: 962 return inode; 963 964 out_err: 965 pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); 966 inode = ERR_PTR(err); 967 goto out; 968 } 969