1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/cred.h> 10 #include <linux/xattr.h> 11 #include <linux/posix_acl.h> 12 #include <linux/ratelimit.h> 13 #include "overlayfs.h" 14 15 16 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 17 { 18 int err; 19 bool full_copy_up = false; 20 struct dentry *upperdentry; 21 const struct cred *old_cred; 22 23 err = setattr_prepare(dentry, attr); 24 if (err) 25 return err; 26 27 err = ovl_want_write(dentry); 28 if (err) 29 goto out; 30 31 if (attr->ia_valid & ATTR_SIZE) { 32 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 33 34 err = -ETXTBSY; 35 if (atomic_read(&realinode->i_writecount) < 0) 36 goto out_drop_write; 37 38 /* Truncate should trigger data copy up as well */ 39 full_copy_up = true; 40 } 41 42 if (!full_copy_up) 43 err = ovl_copy_up(dentry); 44 else 45 err = ovl_copy_up_with_data(dentry); 46 if (!err) { 47 struct inode *winode = NULL; 48 49 upperdentry = ovl_dentry_upper(dentry); 50 51 if (attr->ia_valid & ATTR_SIZE) { 52 winode = d_inode(upperdentry); 53 err = get_write_access(winode); 54 if (err) 55 goto out_drop_write; 56 } 57 58 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 59 attr->ia_valid &= ~ATTR_MODE; 60 61 inode_lock(upperdentry->d_inode); 62 old_cred = ovl_override_creds(dentry->d_sb); 63 err = notify_change(upperdentry, attr, NULL); 64 revert_creds(old_cred); 65 if (!err) 66 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 67 inode_unlock(upperdentry->d_inode); 68 69 if (winode) 70 put_write_access(winode); 71 } 72 out_drop_write: 73 ovl_drop_write(dentry); 74 out: 75 return err; 76 } 77 78 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 79 struct ovl_layer *lower_layer) 80 { 81 bool samefs = ovl_same_sb(dentry->d_sb); 82 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 83 84 if (samefs) { 85 /* 86 * When all layers are on the same fs, all real inode 87 * number are unique, so we use the overlay st_dev, 88 * which is friendly to du -x. 89 */ 90 stat->dev = dentry->d_sb->s_dev; 91 return 0; 92 } else if (xinobits) { 93 unsigned int shift = 64 - xinobits; 94 /* 95 * All inode numbers of underlying fs should not be using the 96 * high xinobits, so we use high xinobits to partition the 97 * overlay st_ino address space. The high bits holds the fsid 98 * (upper fsid is 0). This way overlay inode numbers are unique 99 * and all inodes use overlay st_dev. Inode numbers are also 100 * persistent for a given layer configuration. 101 */ 102 if (stat->ino >> shift) { 103 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 104 dentry, stat->ino, xinobits); 105 } else { 106 if (lower_layer) 107 stat->ino |= ((u64)lower_layer->fsid) << shift; 108 109 stat->dev = dentry->d_sb->s_dev; 110 return 0; 111 } 112 } 113 114 /* The inode could not be mapped to a unified st_ino address space */ 115 if (S_ISDIR(dentry->d_inode->i_mode)) { 116 /* 117 * Always use the overlay st_dev for directories, so 'find 118 * -xdev' will scan the entire overlay mount and won't cross the 119 * overlay mount boundaries. 120 * 121 * If not all layers are on the same fs the pair {real st_ino; 122 * overlay st_dev} is not unique, so use the non persistent 123 * overlay st_ino for directories. 124 */ 125 stat->dev = dentry->d_sb->s_dev; 126 stat->ino = dentry->d_inode->i_ino; 127 } else if (lower_layer && lower_layer->fsid) { 128 /* 129 * For non-samefs setup, if we cannot map all layers st_ino 130 * to a unified address space, we need to make sure that st_dev 131 * is unique per lower fs. Upper layer uses real st_dev and 132 * lower layers use the unique anonymous bdev assigned to the 133 * lower fs. 134 */ 135 stat->dev = lower_layer->fs->pseudo_dev; 136 } 137 138 return 0; 139 } 140 141 int ovl_getattr(const struct path *path, struct kstat *stat, 142 u32 request_mask, unsigned int flags) 143 { 144 struct dentry *dentry = path->dentry; 145 enum ovl_path_type type; 146 struct path realpath; 147 const struct cred *old_cred; 148 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 149 bool samefs = ovl_same_sb(dentry->d_sb); 150 struct ovl_layer *lower_layer = NULL; 151 int err; 152 bool metacopy_blocks = false; 153 154 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 155 156 type = ovl_path_real(dentry, &realpath); 157 old_cred = ovl_override_creds(dentry->d_sb); 158 err = vfs_getattr(&realpath, stat, request_mask, flags); 159 if (err) 160 goto out; 161 162 /* 163 * For non-dir or same fs, we use st_ino of the copy up origin. 164 * This guaranties constant st_dev/st_ino across copy up. 165 * With xino feature and non-samefs, we use st_ino of the copy up 166 * origin masked with high bits that represent the layer id. 167 * 168 * If lower filesystem supports NFS file handles, this also guaranties 169 * persistent st_ino across mount cycle. 170 */ 171 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 172 if (!OVL_TYPE_UPPER(type)) { 173 lower_layer = ovl_layer_lower(dentry); 174 } else if (OVL_TYPE_ORIGIN(type)) { 175 struct kstat lowerstat; 176 u32 lowermask = STATX_INO | STATX_BLOCKS | 177 (!is_dir ? STATX_NLINK : 0); 178 179 ovl_path_lower(dentry, &realpath); 180 err = vfs_getattr(&realpath, &lowerstat, 181 lowermask, flags); 182 if (err) 183 goto out; 184 185 /* 186 * Lower hardlinks may be broken on copy up to different 187 * upper files, so we cannot use the lower origin st_ino 188 * for those different files, even for the same fs case. 189 * 190 * Similarly, several redirected dirs can point to the 191 * same dir on a lower layer. With the "verify_lower" 192 * feature, we do not use the lower origin st_ino, if 193 * we haven't verified that this redirect is unique. 194 * 195 * With inodes index enabled, it is safe to use st_ino 196 * of an indexed origin. The index validates that the 197 * upper hardlink is not broken and that a redirected 198 * dir is the only redirect to that origin. 199 */ 200 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 201 (!ovl_verify_lower(dentry->d_sb) && 202 (is_dir || lowerstat.nlink == 1))) { 203 lower_layer = ovl_layer_lower(dentry); 204 /* 205 * Cannot use origin st_dev;st_ino because 206 * origin inode content may differ from overlay 207 * inode content. 208 */ 209 if (samefs || lower_layer->fsid) 210 stat->ino = lowerstat.ino; 211 } 212 213 /* 214 * If we are querying a metacopy dentry and lower 215 * dentry is data dentry, then use the blocks we 216 * queried just now. We don't have to do additional 217 * vfs_getattr(). If lower itself is metacopy, then 218 * additional vfs_getattr() is unavoidable. 219 */ 220 if (metacopy_blocks && 221 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 222 stat->blocks = lowerstat.blocks; 223 metacopy_blocks = false; 224 } 225 } 226 227 if (metacopy_blocks) { 228 /* 229 * If lower is not same as lowerdata or if there was 230 * no origin on upper, we can end up here. 231 */ 232 struct kstat lowerdatastat; 233 u32 lowermask = STATX_BLOCKS; 234 235 ovl_path_lowerdata(dentry, &realpath); 236 err = vfs_getattr(&realpath, &lowerdatastat, 237 lowermask, flags); 238 if (err) 239 goto out; 240 stat->blocks = lowerdatastat.blocks; 241 } 242 } 243 244 err = ovl_map_dev_ino(dentry, stat, lower_layer); 245 if (err) 246 goto out; 247 248 /* 249 * It's probably not worth it to count subdirs to get the 250 * correct link count. nlink=1 seems to pacify 'find' and 251 * other utilities. 252 */ 253 if (is_dir && OVL_TYPE_MERGE(type)) 254 stat->nlink = 1; 255 256 /* 257 * Return the overlay inode nlinks for indexed upper inodes. 258 * Overlay inode nlink counts the union of the upper hardlinks 259 * and non-covered lower hardlinks. It does not include the upper 260 * index hardlink. 261 */ 262 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 263 stat->nlink = dentry->d_inode->i_nlink; 264 265 out: 266 revert_creds(old_cred); 267 268 return err; 269 } 270 271 int ovl_permission(struct inode *inode, int mask) 272 { 273 struct inode *upperinode = ovl_inode_upper(inode); 274 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 275 const struct cred *old_cred; 276 int err; 277 278 /* Careful in RCU walk mode */ 279 if (!realinode) { 280 WARN_ON(!(mask & MAY_NOT_BLOCK)); 281 return -ECHILD; 282 } 283 284 /* 285 * Check overlay inode with the creds of task and underlying inode 286 * with creds of mounter 287 */ 288 err = generic_permission(inode, mask); 289 if (err) 290 return err; 291 292 old_cred = ovl_override_creds(inode->i_sb); 293 if (!upperinode && 294 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 295 mask &= ~(MAY_WRITE | MAY_APPEND); 296 /* Make sure mounter can read file for copy up later */ 297 mask |= MAY_READ; 298 } 299 err = inode_permission(realinode, mask); 300 revert_creds(old_cred); 301 302 return err; 303 } 304 305 static const char *ovl_get_link(struct dentry *dentry, 306 struct inode *inode, 307 struct delayed_call *done) 308 { 309 const struct cred *old_cred; 310 const char *p; 311 312 if (!dentry) 313 return ERR_PTR(-ECHILD); 314 315 old_cred = ovl_override_creds(dentry->d_sb); 316 p = vfs_get_link(ovl_dentry_real(dentry), done); 317 revert_creds(old_cred); 318 return p; 319 } 320 321 bool ovl_is_private_xattr(const char *name) 322 { 323 return strncmp(name, OVL_XATTR_PREFIX, 324 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 325 } 326 327 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 328 const void *value, size_t size, int flags) 329 { 330 int err; 331 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 332 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 333 const struct cred *old_cred; 334 335 err = ovl_want_write(dentry); 336 if (err) 337 goto out; 338 339 if (!value && !upperdentry) { 340 err = vfs_getxattr(realdentry, name, NULL, 0); 341 if (err < 0) 342 goto out_drop_write; 343 } 344 345 if (!upperdentry) { 346 err = ovl_copy_up(dentry); 347 if (err) 348 goto out_drop_write; 349 350 realdentry = ovl_dentry_upper(dentry); 351 } 352 353 old_cred = ovl_override_creds(dentry->d_sb); 354 if (value) 355 err = vfs_setxattr(realdentry, name, value, size, flags); 356 else { 357 WARN_ON(flags != XATTR_REPLACE); 358 err = vfs_removexattr(realdentry, name); 359 } 360 revert_creds(old_cred); 361 362 /* copy c/mtime */ 363 ovl_copyattr(d_inode(realdentry), inode); 364 365 out_drop_write: 366 ovl_drop_write(dentry); 367 out: 368 return err; 369 } 370 371 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 372 void *value, size_t size) 373 { 374 ssize_t res; 375 const struct cred *old_cred; 376 struct dentry *realdentry = 377 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 378 379 old_cred = ovl_override_creds(dentry->d_sb); 380 res = vfs_getxattr(realdentry, name, value, size); 381 revert_creds(old_cred); 382 return res; 383 } 384 385 static bool ovl_can_list(const char *s) 386 { 387 /* List all non-trusted xatts */ 388 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 389 return true; 390 391 /* Never list trusted.overlay, list other trusted for superuser only */ 392 return !ovl_is_private_xattr(s) && 393 ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); 394 } 395 396 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 397 { 398 struct dentry *realdentry = ovl_dentry_real(dentry); 399 ssize_t res; 400 size_t len; 401 char *s; 402 const struct cred *old_cred; 403 404 old_cred = ovl_override_creds(dentry->d_sb); 405 res = vfs_listxattr(realdentry, list, size); 406 revert_creds(old_cred); 407 if (res <= 0 || size == 0) 408 return res; 409 410 /* filter out private xattrs */ 411 for (s = list, len = res; len;) { 412 size_t slen = strnlen(s, len) + 1; 413 414 /* underlying fs providing us with an broken xattr list? */ 415 if (WARN_ON(slen > len)) 416 return -EIO; 417 418 len -= slen; 419 if (!ovl_can_list(s)) { 420 res -= slen; 421 memmove(s, s + slen, len); 422 } else { 423 s += slen; 424 } 425 } 426 427 return res; 428 } 429 430 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 431 { 432 struct inode *realinode = ovl_inode_real(inode); 433 const struct cred *old_cred; 434 struct posix_acl *acl; 435 436 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 437 return NULL; 438 439 old_cred = ovl_override_creds(inode->i_sb); 440 acl = get_acl(realinode, type); 441 revert_creds(old_cred); 442 443 return acl; 444 } 445 446 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 447 { 448 if (flags & S_ATIME) { 449 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 450 struct path upperpath = { 451 .mnt = ofs->upper_mnt, 452 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 453 }; 454 455 if (upperpath.dentry) { 456 touch_atime(&upperpath); 457 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 458 } 459 } 460 return 0; 461 } 462 463 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 464 u64 start, u64 len) 465 { 466 int err; 467 struct inode *realinode = ovl_inode_real(inode); 468 const struct cred *old_cred; 469 470 if (!realinode->i_op->fiemap) 471 return -EOPNOTSUPP; 472 473 old_cred = ovl_override_creds(inode->i_sb); 474 475 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) 476 filemap_write_and_wait(realinode->i_mapping); 477 478 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 479 revert_creds(old_cred); 480 481 return err; 482 } 483 484 static const struct inode_operations ovl_file_inode_operations = { 485 .setattr = ovl_setattr, 486 .permission = ovl_permission, 487 .getattr = ovl_getattr, 488 .listxattr = ovl_listxattr, 489 .get_acl = ovl_get_acl, 490 .update_time = ovl_update_time, 491 .fiemap = ovl_fiemap, 492 }; 493 494 static const struct inode_operations ovl_symlink_inode_operations = { 495 .setattr = ovl_setattr, 496 .get_link = ovl_get_link, 497 .getattr = ovl_getattr, 498 .listxattr = ovl_listxattr, 499 .update_time = ovl_update_time, 500 }; 501 502 static const struct inode_operations ovl_special_inode_operations = { 503 .setattr = ovl_setattr, 504 .permission = ovl_permission, 505 .getattr = ovl_getattr, 506 .listxattr = ovl_listxattr, 507 .get_acl = ovl_get_acl, 508 .update_time = ovl_update_time, 509 }; 510 511 static const struct address_space_operations ovl_aops = { 512 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 513 .direct_IO = noop_direct_IO, 514 }; 515 516 /* 517 * It is possible to stack overlayfs instance on top of another 518 * overlayfs instance as lower layer. We need to annonate the 519 * stackable i_mutex locks according to stack level of the super 520 * block instance. An overlayfs instance can never be in stack 521 * depth 0 (there is always a real fs below it). An overlayfs 522 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 523 * 524 * For example, here is a snip from /proc/lockdep_chains after 525 * dir_iterate of nested overlayfs: 526 * 527 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 528 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 529 * [...] &type->i_mutex_dir_key (stack_depth=0) 530 */ 531 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 532 533 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 534 { 535 #ifdef CONFIG_LOCKDEP 536 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 537 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 538 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 539 540 int depth = inode->i_sb->s_stack_depth - 1; 541 542 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 543 depth = 0; 544 545 if (S_ISDIR(inode->i_mode)) 546 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 547 else 548 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 549 550 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 551 #endif 552 } 553 554 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 555 unsigned long ino, int fsid) 556 { 557 int xinobits = ovl_xino_bits(inode->i_sb); 558 559 /* 560 * When d_ino is consistent with st_ino (samefs or i_ino has enough 561 * bits to encode layer), set the same value used for st_ino to i_ino, 562 * so inode number exposed via /proc/locks and a like will be 563 * consistent with d_ino and st_ino values. An i_ino value inconsistent 564 * with d_ino also causes nfsd readdirplus to fail. When called from 565 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 566 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 567 */ 568 if (ovl_same_sb(inode->i_sb) || xinobits) { 569 inode->i_ino = ino; 570 if (xinobits && fsid && !(ino >> (64 - xinobits))) 571 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 572 } else { 573 inode->i_ino = get_next_ino(); 574 } 575 inode->i_mode = mode; 576 inode->i_flags |= S_NOCMTIME; 577 #ifdef CONFIG_FS_POSIX_ACL 578 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 579 #endif 580 581 ovl_lockdep_annotate_inode_mutex_key(inode); 582 583 switch (mode & S_IFMT) { 584 case S_IFREG: 585 inode->i_op = &ovl_file_inode_operations; 586 inode->i_fop = &ovl_file_operations; 587 inode->i_mapping->a_ops = &ovl_aops; 588 break; 589 590 case S_IFDIR: 591 inode->i_op = &ovl_dir_inode_operations; 592 inode->i_fop = &ovl_dir_operations; 593 break; 594 595 case S_IFLNK: 596 inode->i_op = &ovl_symlink_inode_operations; 597 break; 598 599 default: 600 inode->i_op = &ovl_special_inode_operations; 601 init_special_inode(inode, mode, rdev); 602 break; 603 } 604 } 605 606 /* 607 * With inodes index enabled, an overlay inode nlink counts the union of upper 608 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 609 * upper inode, the following nlink modifying operations can happen: 610 * 611 * 1. Lower hardlink copy up 612 * 2. Upper hardlink created, unlinked or renamed over 613 * 3. Lower hardlink whiteout or renamed over 614 * 615 * For the first, copy up case, the union nlink does not change, whether the 616 * operation succeeds or fails, but the upper inode nlink may change. 617 * Therefore, before copy up, we store the union nlink value relative to the 618 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 619 * 620 * For the second, upper hardlink case, the union nlink should be incremented 621 * or decremented IFF the operation succeeds, aligned with nlink change of the 622 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 623 * value relative to the upper inode nlink in the index inode. 624 * 625 * For the last, lower cover up case, we simplify things by preceding the 626 * whiteout or cover up with copy up. This makes sure that there is an index 627 * upper inode where the nlink xattr can be stored before the copied up upper 628 * entry is unlink. 629 */ 630 #define OVL_NLINK_ADD_UPPER (1 << 0) 631 632 /* 633 * On-disk format for indexed nlink: 634 * 635 * nlink relative to the upper inode - "U[+-]NUM" 636 * nlink relative to the lower inode - "L[+-]NUM" 637 */ 638 639 static int ovl_set_nlink_common(struct dentry *dentry, 640 struct dentry *realdentry, const char *format) 641 { 642 struct inode *inode = d_inode(dentry); 643 struct inode *realinode = d_inode(realdentry); 644 char buf[13]; 645 int len; 646 647 len = snprintf(buf, sizeof(buf), format, 648 (int) (inode->i_nlink - realinode->i_nlink)); 649 650 if (WARN_ON(len >= sizeof(buf))) 651 return -EIO; 652 653 return ovl_do_setxattr(ovl_dentry_upper(dentry), 654 OVL_XATTR_NLINK, buf, len, 0); 655 } 656 657 int ovl_set_nlink_upper(struct dentry *dentry) 658 { 659 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 660 } 661 662 int ovl_set_nlink_lower(struct dentry *dentry) 663 { 664 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 665 } 666 667 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 668 struct dentry *upperdentry, 669 unsigned int fallback) 670 { 671 int nlink_diff; 672 int nlink; 673 char buf[13]; 674 int err; 675 676 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 677 return fallback; 678 679 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 680 if (err < 0) 681 goto fail; 682 683 buf[err] = '\0'; 684 if ((buf[0] != 'L' && buf[0] != 'U') || 685 (buf[1] != '+' && buf[1] != '-')) 686 goto fail; 687 688 err = kstrtoint(buf + 1, 10, &nlink_diff); 689 if (err < 0) 690 goto fail; 691 692 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 693 nlink += nlink_diff; 694 695 if (nlink <= 0) 696 goto fail; 697 698 return nlink; 699 700 fail: 701 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 702 upperdentry, err); 703 return fallback; 704 } 705 706 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 707 { 708 struct inode *inode; 709 710 inode = new_inode(sb); 711 if (inode) 712 ovl_fill_inode(inode, mode, rdev, 0, 0); 713 714 return inode; 715 } 716 717 static int ovl_inode_test(struct inode *inode, void *data) 718 { 719 return inode->i_private == data; 720 } 721 722 static int ovl_inode_set(struct inode *inode, void *data) 723 { 724 inode->i_private = data; 725 return 0; 726 } 727 728 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 729 struct dentry *upperdentry, bool strict) 730 { 731 /* 732 * For directories, @strict verify from lookup path performs consistency 733 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 734 * inode. Non @strict verify from NFS handle decode path passes NULL for 735 * 'unknown' lower/upper. 736 */ 737 if (S_ISDIR(inode->i_mode) && strict) { 738 /* Real lower dir moved to upper layer under us? */ 739 if (!lowerdentry && ovl_inode_lower(inode)) 740 return false; 741 742 /* Lookup of an uncovered redirect origin? */ 743 if (!upperdentry && ovl_inode_upper(inode)) 744 return false; 745 } 746 747 /* 748 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 749 * This happens when finding a copied up overlay inode for a renamed 750 * or hardlinked overlay dentry and lower dentry cannot be followed 751 * by origin because lower fs does not support file handles. 752 */ 753 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 754 return false; 755 756 /* 757 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 758 * This happens when finding a lower alias for a copied up hard link. 759 */ 760 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 761 return false; 762 763 return true; 764 } 765 766 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 767 bool is_upper) 768 { 769 struct inode *inode, *key = d_inode(real); 770 771 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 772 if (!inode) 773 return NULL; 774 775 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 776 is_upper ? real : NULL, false)) { 777 iput(inode); 778 return ERR_PTR(-ESTALE); 779 } 780 781 return inode; 782 } 783 784 bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 785 { 786 struct inode *key = d_inode(dir); 787 struct inode *trap; 788 bool res; 789 790 trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 791 if (!trap) 792 return false; 793 794 res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 795 !ovl_inode_lower(trap); 796 797 iput(trap); 798 return res; 799 } 800 801 /* 802 * Create an inode cache entry for layer root dir, that will intentionally 803 * fail ovl_verify_inode(), so any lookup that will find some layer root 804 * will fail. 805 */ 806 struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 807 { 808 struct inode *key = d_inode(dir); 809 struct inode *trap; 810 811 if (!d_is_dir(dir)) 812 return ERR_PTR(-ENOTDIR); 813 814 trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 815 ovl_inode_set, key); 816 if (!trap) 817 return ERR_PTR(-ENOMEM); 818 819 if (!(trap->i_state & I_NEW)) { 820 /* Conflicting layer roots? */ 821 iput(trap); 822 return ERR_PTR(-ELOOP); 823 } 824 825 trap->i_mode = S_IFDIR; 826 trap->i_flags = S_DEAD; 827 unlock_new_inode(trap); 828 829 return trap; 830 } 831 832 /* 833 * Does overlay inode need to be hashed by lower inode? 834 */ 835 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 836 struct dentry *lower, struct dentry *index) 837 { 838 struct ovl_fs *ofs = sb->s_fs_info; 839 840 /* No, if pure upper */ 841 if (!lower) 842 return false; 843 844 /* Yes, if already indexed */ 845 if (index) 846 return true; 847 848 /* Yes, if won't be copied up */ 849 if (!ofs->upper_mnt) 850 return true; 851 852 /* No, if lower hardlink is or will be broken on copy up */ 853 if ((upper || !ovl_indexdir(sb)) && 854 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 855 return false; 856 857 /* No, if non-indexed upper with NFS export */ 858 if (sb->s_export_op && upper) 859 return false; 860 861 /* Otherwise, hash by lower inode for fsnotify */ 862 return true; 863 } 864 865 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 866 struct inode *key) 867 { 868 return newinode ? inode_insert5(newinode, (unsigned long) key, 869 ovl_inode_test, ovl_inode_set, key) : 870 iget5_locked(sb, (unsigned long) key, 871 ovl_inode_test, ovl_inode_set, key); 872 } 873 874 struct inode *ovl_get_inode(struct super_block *sb, 875 struct ovl_inode_params *oip) 876 { 877 struct dentry *upperdentry = oip->upperdentry; 878 struct ovl_path *lowerpath = oip->lowerpath; 879 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 880 struct inode *inode; 881 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 882 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 883 oip->index); 884 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 885 bool is_dir, metacopy = false; 886 unsigned long ino = 0; 887 int err = oip->newinode ? -EEXIST : -ENOMEM; 888 889 if (!realinode) 890 realinode = d_inode(lowerdentry); 891 892 /* 893 * Copy up origin (lower) may exist for non-indexed upper, but we must 894 * not use lower as hash key if this is a broken hardlink. 895 */ 896 is_dir = S_ISDIR(realinode->i_mode); 897 if (upperdentry || bylower) { 898 struct inode *key = d_inode(bylower ? lowerdentry : 899 upperdentry); 900 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 901 902 inode = ovl_iget5(sb, oip->newinode, key); 903 if (!inode) 904 goto out_err; 905 if (!(inode->i_state & I_NEW)) { 906 /* 907 * Verify that the underlying files stored in the inode 908 * match those in the dentry. 909 */ 910 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 911 true)) { 912 iput(inode); 913 err = -ESTALE; 914 goto out_err; 915 } 916 917 dput(upperdentry); 918 kfree(oip->redirect); 919 goto out; 920 } 921 922 /* Recalculate nlink for non-dir due to indexing */ 923 if (!is_dir) 924 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 925 set_nlink(inode, nlink); 926 ino = key->i_ino; 927 } else { 928 /* Lower hardlink that will be broken on copy up */ 929 inode = new_inode(sb); 930 if (!inode) { 931 err = -ENOMEM; 932 goto out_err; 933 } 934 } 935 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 936 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 937 938 if (upperdentry && ovl_is_impuredir(upperdentry)) 939 ovl_set_flag(OVL_IMPURE, inode); 940 941 if (oip->index) 942 ovl_set_flag(OVL_INDEX, inode); 943 944 if (upperdentry) { 945 err = ovl_check_metacopy_xattr(upperdentry); 946 if (err < 0) 947 goto out_err; 948 metacopy = err; 949 if (!metacopy) 950 ovl_set_flag(OVL_UPPERDATA, inode); 951 } 952 953 OVL_I(inode)->redirect = oip->redirect; 954 955 if (bylower) 956 ovl_set_flag(OVL_CONST_INO, inode); 957 958 /* Check for non-merge dir that may have whiteouts */ 959 if (is_dir) { 960 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 961 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 962 ovl_set_flag(OVL_WHITEOUTS, inode); 963 } 964 } 965 966 if (inode->i_state & I_NEW) 967 unlock_new_inode(inode); 968 out: 969 return inode; 970 971 out_err: 972 pr_warn_ratelimited("overlayfs: failed to get inode (%i)\n", err); 973 inode = ERR_PTR(err); 974 goto out; 975 } 976