1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/cred.h> 10 #include <linux/xattr.h> 11 #include <linux/posix_acl.h> 12 #include <linux/ratelimit.h> 13 #include "overlayfs.h" 14 15 16 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 17 { 18 int err; 19 bool full_copy_up = false; 20 struct dentry *upperdentry; 21 const struct cred *old_cred; 22 23 err = setattr_prepare(dentry, attr); 24 if (err) 25 return err; 26 27 err = ovl_want_write(dentry); 28 if (err) 29 goto out; 30 31 if (attr->ia_valid & ATTR_SIZE) { 32 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 33 34 err = -ETXTBSY; 35 if (atomic_read(&realinode->i_writecount) < 0) 36 goto out_drop_write; 37 38 /* Truncate should trigger data copy up as well */ 39 full_copy_up = true; 40 } 41 42 if (!full_copy_up) 43 err = ovl_copy_up(dentry); 44 else 45 err = ovl_copy_up_with_data(dentry); 46 if (!err) { 47 struct inode *winode = NULL; 48 49 upperdentry = ovl_dentry_upper(dentry); 50 51 if (attr->ia_valid & ATTR_SIZE) { 52 winode = d_inode(upperdentry); 53 err = get_write_access(winode); 54 if (err) 55 goto out_drop_write; 56 } 57 58 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 59 attr->ia_valid &= ~ATTR_MODE; 60 61 inode_lock(upperdentry->d_inode); 62 old_cred = ovl_override_creds(dentry->d_sb); 63 err = notify_change(upperdentry, attr, NULL); 64 revert_creds(old_cred); 65 if (!err) 66 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 67 inode_unlock(upperdentry->d_inode); 68 69 if (winode) 70 put_write_access(winode); 71 } 72 out_drop_write: 73 ovl_drop_write(dentry); 74 out: 75 return err; 76 } 77 78 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 79 struct ovl_layer *lower_layer) 80 { 81 bool samefs = ovl_same_sb(dentry->d_sb); 82 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 83 84 if (samefs) { 85 /* 86 * When all layers are on the same fs, all real inode 87 * number are unique, so we use the overlay st_dev, 88 * which is friendly to du -x. 89 */ 90 stat->dev = dentry->d_sb->s_dev; 91 return 0; 92 } else if (xinobits) { 93 unsigned int shift = 64 - xinobits; 94 /* 95 * All inode numbers of underlying fs should not be using the 96 * high xinobits, so we use high xinobits to partition the 97 * overlay st_ino address space. The high bits holds the fsid 98 * (upper fsid is 0). This way overlay inode numbers are unique 99 * and all inodes use overlay st_dev. Inode numbers are also 100 * persistent for a given layer configuration. 101 */ 102 if (stat->ino >> shift) { 103 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 104 dentry, stat->ino, xinobits); 105 } else { 106 if (lower_layer) 107 stat->ino |= ((u64)lower_layer->fsid) << shift; 108 109 stat->dev = dentry->d_sb->s_dev; 110 return 0; 111 } 112 } 113 114 /* The inode could not be mapped to a unified st_ino address space */ 115 if (S_ISDIR(dentry->d_inode->i_mode)) { 116 /* 117 * Always use the overlay st_dev for directories, so 'find 118 * -xdev' will scan the entire overlay mount and won't cross the 119 * overlay mount boundaries. 120 * 121 * If not all layers are on the same fs the pair {real st_ino; 122 * overlay st_dev} is not unique, so use the non persistent 123 * overlay st_ino for directories. 124 */ 125 stat->dev = dentry->d_sb->s_dev; 126 stat->ino = dentry->d_inode->i_ino; 127 } else if (lower_layer && lower_layer->fsid) { 128 /* 129 * For non-samefs setup, if we cannot map all layers st_ino 130 * to a unified address space, we need to make sure that st_dev 131 * is unique per lower fs. Upper layer uses real st_dev and 132 * lower layers use the unique anonymous bdev assigned to the 133 * lower fs. 134 */ 135 stat->dev = lower_layer->fs->pseudo_dev; 136 } 137 138 return 0; 139 } 140 141 int ovl_getattr(const struct path *path, struct kstat *stat, 142 u32 request_mask, unsigned int flags) 143 { 144 struct dentry *dentry = path->dentry; 145 enum ovl_path_type type; 146 struct path realpath; 147 const struct cred *old_cred; 148 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 149 bool samefs = ovl_same_sb(dentry->d_sb); 150 struct ovl_layer *lower_layer = NULL; 151 int err; 152 bool metacopy_blocks = false; 153 154 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 155 156 type = ovl_path_real(dentry, &realpath); 157 old_cred = ovl_override_creds(dentry->d_sb); 158 err = vfs_getattr(&realpath, stat, request_mask, flags); 159 if (err) 160 goto out; 161 162 /* 163 * For non-dir or same fs, we use st_ino of the copy up origin. 164 * This guaranties constant st_dev/st_ino across copy up. 165 * With xino feature and non-samefs, we use st_ino of the copy up 166 * origin masked with high bits that represent the layer id. 167 * 168 * If lower filesystem supports NFS file handles, this also guaranties 169 * persistent st_ino across mount cycle. 170 */ 171 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 172 if (!OVL_TYPE_UPPER(type)) { 173 lower_layer = ovl_layer_lower(dentry); 174 } else if (OVL_TYPE_ORIGIN(type)) { 175 struct kstat lowerstat; 176 u32 lowermask = STATX_INO | STATX_BLOCKS | 177 (!is_dir ? STATX_NLINK : 0); 178 179 ovl_path_lower(dentry, &realpath); 180 err = vfs_getattr(&realpath, &lowerstat, 181 lowermask, flags); 182 if (err) 183 goto out; 184 185 /* 186 * Lower hardlinks may be broken on copy up to different 187 * upper files, so we cannot use the lower origin st_ino 188 * for those different files, even for the same fs case. 189 * 190 * Similarly, several redirected dirs can point to the 191 * same dir on a lower layer. With the "verify_lower" 192 * feature, we do not use the lower origin st_ino, if 193 * we haven't verified that this redirect is unique. 194 * 195 * With inodes index enabled, it is safe to use st_ino 196 * of an indexed origin. The index validates that the 197 * upper hardlink is not broken and that a redirected 198 * dir is the only redirect to that origin. 199 */ 200 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 201 (!ovl_verify_lower(dentry->d_sb) && 202 (is_dir || lowerstat.nlink == 1))) { 203 stat->ino = lowerstat.ino; 204 lower_layer = ovl_layer_lower(dentry); 205 } 206 207 /* 208 * If we are querying a metacopy dentry and lower 209 * dentry is data dentry, then use the blocks we 210 * queried just now. We don't have to do additional 211 * vfs_getattr(). If lower itself is metacopy, then 212 * additional vfs_getattr() is unavoidable. 213 */ 214 if (metacopy_blocks && 215 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 216 stat->blocks = lowerstat.blocks; 217 metacopy_blocks = false; 218 } 219 } 220 221 if (metacopy_blocks) { 222 /* 223 * If lower is not same as lowerdata or if there was 224 * no origin on upper, we can end up here. 225 */ 226 struct kstat lowerdatastat; 227 u32 lowermask = STATX_BLOCKS; 228 229 ovl_path_lowerdata(dentry, &realpath); 230 err = vfs_getattr(&realpath, &lowerdatastat, 231 lowermask, flags); 232 if (err) 233 goto out; 234 stat->blocks = lowerdatastat.blocks; 235 } 236 } 237 238 err = ovl_map_dev_ino(dentry, stat, lower_layer); 239 if (err) 240 goto out; 241 242 /* 243 * It's probably not worth it to count subdirs to get the 244 * correct link count. nlink=1 seems to pacify 'find' and 245 * other utilities. 246 */ 247 if (is_dir && OVL_TYPE_MERGE(type)) 248 stat->nlink = 1; 249 250 /* 251 * Return the overlay inode nlinks for indexed upper inodes. 252 * Overlay inode nlink counts the union of the upper hardlinks 253 * and non-covered lower hardlinks. It does not include the upper 254 * index hardlink. 255 */ 256 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 257 stat->nlink = dentry->d_inode->i_nlink; 258 259 out: 260 revert_creds(old_cred); 261 262 return err; 263 } 264 265 int ovl_permission(struct inode *inode, int mask) 266 { 267 struct inode *upperinode = ovl_inode_upper(inode); 268 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 269 const struct cred *old_cred; 270 int err; 271 272 /* Careful in RCU walk mode */ 273 if (!realinode) { 274 WARN_ON(!(mask & MAY_NOT_BLOCK)); 275 return -ECHILD; 276 } 277 278 /* 279 * Check overlay inode with the creds of task and underlying inode 280 * with creds of mounter 281 */ 282 err = generic_permission(inode, mask); 283 if (err) 284 return err; 285 286 old_cred = ovl_override_creds(inode->i_sb); 287 if (!upperinode && 288 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 289 mask &= ~(MAY_WRITE | MAY_APPEND); 290 /* Make sure mounter can read file for copy up later */ 291 mask |= MAY_READ; 292 } 293 err = inode_permission(realinode, mask); 294 revert_creds(old_cred); 295 296 return err; 297 } 298 299 static const char *ovl_get_link(struct dentry *dentry, 300 struct inode *inode, 301 struct delayed_call *done) 302 { 303 const struct cred *old_cred; 304 const char *p; 305 306 if (!dentry) 307 return ERR_PTR(-ECHILD); 308 309 old_cred = ovl_override_creds(dentry->d_sb); 310 p = vfs_get_link(ovl_dentry_real(dentry), done); 311 revert_creds(old_cred); 312 return p; 313 } 314 315 bool ovl_is_private_xattr(const char *name) 316 { 317 return strncmp(name, OVL_XATTR_PREFIX, 318 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 319 } 320 321 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 322 const void *value, size_t size, int flags) 323 { 324 int err; 325 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 326 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 327 const struct cred *old_cred; 328 329 err = ovl_want_write(dentry); 330 if (err) 331 goto out; 332 333 if (!value && !upperdentry) { 334 err = vfs_getxattr(realdentry, name, NULL, 0); 335 if (err < 0) 336 goto out_drop_write; 337 } 338 339 if (!upperdentry) { 340 err = ovl_copy_up(dentry); 341 if (err) 342 goto out_drop_write; 343 344 realdentry = ovl_dentry_upper(dentry); 345 } 346 347 old_cred = ovl_override_creds(dentry->d_sb); 348 if (value) 349 err = vfs_setxattr(realdentry, name, value, size, flags); 350 else { 351 WARN_ON(flags != XATTR_REPLACE); 352 err = vfs_removexattr(realdentry, name); 353 } 354 revert_creds(old_cred); 355 356 /* copy c/mtime */ 357 ovl_copyattr(d_inode(realdentry), inode); 358 359 out_drop_write: 360 ovl_drop_write(dentry); 361 out: 362 return err; 363 } 364 365 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 366 void *value, size_t size) 367 { 368 ssize_t res; 369 const struct cred *old_cred; 370 struct dentry *realdentry = 371 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 372 373 old_cred = ovl_override_creds(dentry->d_sb); 374 res = vfs_getxattr(realdentry, name, value, size); 375 revert_creds(old_cred); 376 return res; 377 } 378 379 static bool ovl_can_list(const char *s) 380 { 381 /* List all non-trusted xatts */ 382 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 383 return true; 384 385 /* Never list trusted.overlay, list other trusted for superuser only */ 386 return !ovl_is_private_xattr(s) && 387 ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); 388 } 389 390 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 391 { 392 struct dentry *realdentry = ovl_dentry_real(dentry); 393 ssize_t res; 394 size_t len; 395 char *s; 396 const struct cred *old_cred; 397 398 old_cred = ovl_override_creds(dentry->d_sb); 399 res = vfs_listxattr(realdentry, list, size); 400 revert_creds(old_cred); 401 if (res <= 0 || size == 0) 402 return res; 403 404 /* filter out private xattrs */ 405 for (s = list, len = res; len;) { 406 size_t slen = strnlen(s, len) + 1; 407 408 /* underlying fs providing us with an broken xattr list? */ 409 if (WARN_ON(slen > len)) 410 return -EIO; 411 412 len -= slen; 413 if (!ovl_can_list(s)) { 414 res -= slen; 415 memmove(s, s + slen, len); 416 } else { 417 s += slen; 418 } 419 } 420 421 return res; 422 } 423 424 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 425 { 426 struct inode *realinode = ovl_inode_real(inode); 427 const struct cred *old_cred; 428 struct posix_acl *acl; 429 430 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 431 return NULL; 432 433 old_cred = ovl_override_creds(inode->i_sb); 434 acl = get_acl(realinode, type); 435 revert_creds(old_cred); 436 437 return acl; 438 } 439 440 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 441 { 442 if (flags & S_ATIME) { 443 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 444 struct path upperpath = { 445 .mnt = ofs->upper_mnt, 446 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 447 }; 448 449 if (upperpath.dentry) { 450 touch_atime(&upperpath); 451 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 452 } 453 } 454 return 0; 455 } 456 457 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 458 u64 start, u64 len) 459 { 460 int err; 461 struct inode *realinode = ovl_inode_real(inode); 462 const struct cred *old_cred; 463 464 if (!realinode->i_op->fiemap) 465 return -EOPNOTSUPP; 466 467 old_cred = ovl_override_creds(inode->i_sb); 468 469 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) 470 filemap_write_and_wait(realinode->i_mapping); 471 472 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 473 revert_creds(old_cred); 474 475 return err; 476 } 477 478 static const struct inode_operations ovl_file_inode_operations = { 479 .setattr = ovl_setattr, 480 .permission = ovl_permission, 481 .getattr = ovl_getattr, 482 .listxattr = ovl_listxattr, 483 .get_acl = ovl_get_acl, 484 .update_time = ovl_update_time, 485 .fiemap = ovl_fiemap, 486 }; 487 488 static const struct inode_operations ovl_symlink_inode_operations = { 489 .setattr = ovl_setattr, 490 .get_link = ovl_get_link, 491 .getattr = ovl_getattr, 492 .listxattr = ovl_listxattr, 493 .update_time = ovl_update_time, 494 }; 495 496 static const struct inode_operations ovl_special_inode_operations = { 497 .setattr = ovl_setattr, 498 .permission = ovl_permission, 499 .getattr = ovl_getattr, 500 .listxattr = ovl_listxattr, 501 .get_acl = ovl_get_acl, 502 .update_time = ovl_update_time, 503 }; 504 505 static const struct address_space_operations ovl_aops = { 506 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 507 .direct_IO = noop_direct_IO, 508 }; 509 510 /* 511 * It is possible to stack overlayfs instance on top of another 512 * overlayfs instance as lower layer. We need to annonate the 513 * stackable i_mutex locks according to stack level of the super 514 * block instance. An overlayfs instance can never be in stack 515 * depth 0 (there is always a real fs below it). An overlayfs 516 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 517 * 518 * For example, here is a snip from /proc/lockdep_chains after 519 * dir_iterate of nested overlayfs: 520 * 521 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 522 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 523 * [...] &type->i_mutex_dir_key (stack_depth=0) 524 */ 525 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 526 527 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 528 { 529 #ifdef CONFIG_LOCKDEP 530 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 531 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 532 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 533 534 int depth = inode->i_sb->s_stack_depth - 1; 535 536 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 537 depth = 0; 538 539 if (S_ISDIR(inode->i_mode)) 540 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 541 else 542 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 543 544 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 545 #endif 546 } 547 548 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 549 unsigned long ino, int fsid) 550 { 551 int xinobits = ovl_xino_bits(inode->i_sb); 552 553 /* 554 * When d_ino is consistent with st_ino (samefs or i_ino has enough 555 * bits to encode layer), set the same value used for st_ino to i_ino, 556 * so inode number exposed via /proc/locks and a like will be 557 * consistent with d_ino and st_ino values. An i_ino value inconsistent 558 * with d_ino also causes nfsd readdirplus to fail. When called from 559 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 560 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 561 */ 562 if (ovl_same_sb(inode->i_sb) || xinobits) { 563 inode->i_ino = ino; 564 if (xinobits && fsid && !(ino >> (64 - xinobits))) 565 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 566 } else { 567 inode->i_ino = get_next_ino(); 568 } 569 inode->i_mode = mode; 570 inode->i_flags |= S_NOCMTIME; 571 #ifdef CONFIG_FS_POSIX_ACL 572 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 573 #endif 574 575 ovl_lockdep_annotate_inode_mutex_key(inode); 576 577 switch (mode & S_IFMT) { 578 case S_IFREG: 579 inode->i_op = &ovl_file_inode_operations; 580 inode->i_fop = &ovl_file_operations; 581 inode->i_mapping->a_ops = &ovl_aops; 582 break; 583 584 case S_IFDIR: 585 inode->i_op = &ovl_dir_inode_operations; 586 inode->i_fop = &ovl_dir_operations; 587 break; 588 589 case S_IFLNK: 590 inode->i_op = &ovl_symlink_inode_operations; 591 break; 592 593 default: 594 inode->i_op = &ovl_special_inode_operations; 595 init_special_inode(inode, mode, rdev); 596 break; 597 } 598 } 599 600 /* 601 * With inodes index enabled, an overlay inode nlink counts the union of upper 602 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 603 * upper inode, the following nlink modifying operations can happen: 604 * 605 * 1. Lower hardlink copy up 606 * 2. Upper hardlink created, unlinked or renamed over 607 * 3. Lower hardlink whiteout or renamed over 608 * 609 * For the first, copy up case, the union nlink does not change, whether the 610 * operation succeeds or fails, but the upper inode nlink may change. 611 * Therefore, before copy up, we store the union nlink value relative to the 612 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 613 * 614 * For the second, upper hardlink case, the union nlink should be incremented 615 * or decremented IFF the operation succeeds, aligned with nlink change of the 616 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 617 * value relative to the upper inode nlink in the index inode. 618 * 619 * For the last, lower cover up case, we simplify things by preceding the 620 * whiteout or cover up with copy up. This makes sure that there is an index 621 * upper inode where the nlink xattr can be stored before the copied up upper 622 * entry is unlink. 623 */ 624 #define OVL_NLINK_ADD_UPPER (1 << 0) 625 626 /* 627 * On-disk format for indexed nlink: 628 * 629 * nlink relative to the upper inode - "U[+-]NUM" 630 * nlink relative to the lower inode - "L[+-]NUM" 631 */ 632 633 static int ovl_set_nlink_common(struct dentry *dentry, 634 struct dentry *realdentry, const char *format) 635 { 636 struct inode *inode = d_inode(dentry); 637 struct inode *realinode = d_inode(realdentry); 638 char buf[13]; 639 int len; 640 641 len = snprintf(buf, sizeof(buf), format, 642 (int) (inode->i_nlink - realinode->i_nlink)); 643 644 if (WARN_ON(len >= sizeof(buf))) 645 return -EIO; 646 647 return ovl_do_setxattr(ovl_dentry_upper(dentry), 648 OVL_XATTR_NLINK, buf, len, 0); 649 } 650 651 int ovl_set_nlink_upper(struct dentry *dentry) 652 { 653 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 654 } 655 656 int ovl_set_nlink_lower(struct dentry *dentry) 657 { 658 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 659 } 660 661 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 662 struct dentry *upperdentry, 663 unsigned int fallback) 664 { 665 int nlink_diff; 666 int nlink; 667 char buf[13]; 668 int err; 669 670 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 671 return fallback; 672 673 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 674 if (err < 0) 675 goto fail; 676 677 buf[err] = '\0'; 678 if ((buf[0] != 'L' && buf[0] != 'U') || 679 (buf[1] != '+' && buf[1] != '-')) 680 goto fail; 681 682 err = kstrtoint(buf + 1, 10, &nlink_diff); 683 if (err < 0) 684 goto fail; 685 686 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 687 nlink += nlink_diff; 688 689 if (nlink <= 0) 690 goto fail; 691 692 return nlink; 693 694 fail: 695 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 696 upperdentry, err); 697 return fallback; 698 } 699 700 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 701 { 702 struct inode *inode; 703 704 inode = new_inode(sb); 705 if (inode) 706 ovl_fill_inode(inode, mode, rdev, 0, 0); 707 708 return inode; 709 } 710 711 static int ovl_inode_test(struct inode *inode, void *data) 712 { 713 return inode->i_private == data; 714 } 715 716 static int ovl_inode_set(struct inode *inode, void *data) 717 { 718 inode->i_private = data; 719 return 0; 720 } 721 722 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 723 struct dentry *upperdentry, bool strict) 724 { 725 /* 726 * For directories, @strict verify from lookup path performs consistency 727 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 728 * inode. Non @strict verify from NFS handle decode path passes NULL for 729 * 'unknown' lower/upper. 730 */ 731 if (S_ISDIR(inode->i_mode) && strict) { 732 /* Real lower dir moved to upper layer under us? */ 733 if (!lowerdentry && ovl_inode_lower(inode)) 734 return false; 735 736 /* Lookup of an uncovered redirect origin? */ 737 if (!upperdentry && ovl_inode_upper(inode)) 738 return false; 739 } 740 741 /* 742 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 743 * This happens when finding a copied up overlay inode for a renamed 744 * or hardlinked overlay dentry and lower dentry cannot be followed 745 * by origin because lower fs does not support file handles. 746 */ 747 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 748 return false; 749 750 /* 751 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 752 * This happens when finding a lower alias for a copied up hard link. 753 */ 754 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 755 return false; 756 757 return true; 758 } 759 760 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 761 bool is_upper) 762 { 763 struct inode *inode, *key = d_inode(real); 764 765 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 766 if (!inode) 767 return NULL; 768 769 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 770 is_upper ? real : NULL, false)) { 771 iput(inode); 772 return ERR_PTR(-ESTALE); 773 } 774 775 return inode; 776 } 777 778 bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 779 { 780 struct inode *key = d_inode(dir); 781 struct inode *trap; 782 bool res; 783 784 trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 785 if (!trap) 786 return false; 787 788 res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 789 !ovl_inode_lower(trap); 790 791 iput(trap); 792 return res; 793 } 794 795 /* 796 * Create an inode cache entry for layer root dir, that will intentionally 797 * fail ovl_verify_inode(), so any lookup that will find some layer root 798 * will fail. 799 */ 800 struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 801 { 802 struct inode *key = d_inode(dir); 803 struct inode *trap; 804 805 if (!d_is_dir(dir)) 806 return ERR_PTR(-ENOTDIR); 807 808 trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 809 ovl_inode_set, key); 810 if (!trap) 811 return ERR_PTR(-ENOMEM); 812 813 if (!(trap->i_state & I_NEW)) { 814 /* Conflicting layer roots? */ 815 iput(trap); 816 return ERR_PTR(-ELOOP); 817 } 818 819 trap->i_mode = S_IFDIR; 820 trap->i_flags = S_DEAD; 821 unlock_new_inode(trap); 822 823 return trap; 824 } 825 826 /* 827 * Does overlay inode need to be hashed by lower inode? 828 */ 829 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 830 struct dentry *lower, struct dentry *index) 831 { 832 struct ovl_fs *ofs = sb->s_fs_info; 833 834 /* No, if pure upper */ 835 if (!lower) 836 return false; 837 838 /* Yes, if already indexed */ 839 if (index) 840 return true; 841 842 /* Yes, if won't be copied up */ 843 if (!ofs->upper_mnt) 844 return true; 845 846 /* No, if lower hardlink is or will be broken on copy up */ 847 if ((upper || !ovl_indexdir(sb)) && 848 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 849 return false; 850 851 /* No, if non-indexed upper with NFS export */ 852 if (sb->s_export_op && upper) 853 return false; 854 855 /* Otherwise, hash by lower inode for fsnotify */ 856 return true; 857 } 858 859 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 860 struct inode *key) 861 { 862 return newinode ? inode_insert5(newinode, (unsigned long) key, 863 ovl_inode_test, ovl_inode_set, key) : 864 iget5_locked(sb, (unsigned long) key, 865 ovl_inode_test, ovl_inode_set, key); 866 } 867 868 struct inode *ovl_get_inode(struct super_block *sb, 869 struct ovl_inode_params *oip) 870 { 871 struct dentry *upperdentry = oip->upperdentry; 872 struct ovl_path *lowerpath = oip->lowerpath; 873 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 874 struct inode *inode; 875 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 876 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 877 oip->index); 878 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 879 bool is_dir, metacopy = false; 880 unsigned long ino = 0; 881 int err = oip->newinode ? -EEXIST : -ENOMEM; 882 883 if (!realinode) 884 realinode = d_inode(lowerdentry); 885 886 /* 887 * Copy up origin (lower) may exist for non-indexed upper, but we must 888 * not use lower as hash key if this is a broken hardlink. 889 */ 890 is_dir = S_ISDIR(realinode->i_mode); 891 if (upperdentry || bylower) { 892 struct inode *key = d_inode(bylower ? lowerdentry : 893 upperdentry); 894 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 895 896 inode = ovl_iget5(sb, oip->newinode, key); 897 if (!inode) 898 goto out_err; 899 if (!(inode->i_state & I_NEW)) { 900 /* 901 * Verify that the underlying files stored in the inode 902 * match those in the dentry. 903 */ 904 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 905 true)) { 906 iput(inode); 907 err = -ESTALE; 908 goto out_err; 909 } 910 911 dput(upperdentry); 912 kfree(oip->redirect); 913 goto out; 914 } 915 916 /* Recalculate nlink for non-dir due to indexing */ 917 if (!is_dir) 918 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 919 set_nlink(inode, nlink); 920 ino = key->i_ino; 921 } else { 922 /* Lower hardlink that will be broken on copy up */ 923 inode = new_inode(sb); 924 if (!inode) { 925 err = -ENOMEM; 926 goto out_err; 927 } 928 } 929 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 930 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 931 932 if (upperdentry && ovl_is_impuredir(upperdentry)) 933 ovl_set_flag(OVL_IMPURE, inode); 934 935 if (oip->index) 936 ovl_set_flag(OVL_INDEX, inode); 937 938 if (upperdentry) { 939 err = ovl_check_metacopy_xattr(upperdentry); 940 if (err < 0) 941 goto out_err; 942 metacopy = err; 943 if (!metacopy) 944 ovl_set_flag(OVL_UPPERDATA, inode); 945 } 946 947 OVL_I(inode)->redirect = oip->redirect; 948 949 if (bylower) 950 ovl_set_flag(OVL_CONST_INO, inode); 951 952 /* Check for non-merge dir that may have whiteouts */ 953 if (is_dir) { 954 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 955 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 956 ovl_set_flag(OVL_WHITEOUTS, inode); 957 } 958 } 959 960 if (inode->i_state & I_NEW) 961 unlock_new_inode(inode); 962 out: 963 return inode; 964 965 out_err: 966 pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); 967 inode = ERR_PTR(err); 968 goto out; 969 } 970