1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/cred.h> 13 #include <linux/xattr.h> 14 #include <linux/posix_acl.h> 15 #include <linux/ratelimit.h> 16 #include "overlayfs.h" 17 18 19 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 20 { 21 int err; 22 bool full_copy_up = false; 23 struct dentry *upperdentry; 24 const struct cred *old_cred; 25 26 err = setattr_prepare(dentry, attr); 27 if (err) 28 return err; 29 30 err = ovl_want_write(dentry); 31 if (err) 32 goto out; 33 34 if (attr->ia_valid & ATTR_SIZE) { 35 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 36 37 err = -ETXTBSY; 38 if (atomic_read(&realinode->i_writecount) < 0) 39 goto out_drop_write; 40 41 /* Truncate should trigger data copy up as well */ 42 full_copy_up = true; 43 } 44 45 if (!full_copy_up) 46 err = ovl_copy_up(dentry); 47 else 48 err = ovl_copy_up_with_data(dentry); 49 if (!err) { 50 struct inode *winode = NULL; 51 52 upperdentry = ovl_dentry_upper(dentry); 53 54 if (attr->ia_valid & ATTR_SIZE) { 55 winode = d_inode(upperdentry); 56 err = get_write_access(winode); 57 if (err) 58 goto out_drop_write; 59 } 60 61 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 62 attr->ia_valid &= ~ATTR_MODE; 63 64 inode_lock(upperdentry->d_inode); 65 old_cred = ovl_override_creds(dentry->d_sb); 66 err = notify_change(upperdentry, attr, NULL); 67 revert_creds(old_cred); 68 if (!err) 69 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 70 inode_unlock(upperdentry->d_inode); 71 72 if (winode) 73 put_write_access(winode); 74 } 75 out_drop_write: 76 ovl_drop_write(dentry); 77 out: 78 return err; 79 } 80 81 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 82 struct ovl_layer *lower_layer) 83 { 84 bool samefs = ovl_same_sb(dentry->d_sb); 85 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 86 87 if (samefs) { 88 /* 89 * When all layers are on the same fs, all real inode 90 * number are unique, so we use the overlay st_dev, 91 * which is friendly to du -x. 92 */ 93 stat->dev = dentry->d_sb->s_dev; 94 return 0; 95 } else if (xinobits) { 96 unsigned int shift = 64 - xinobits; 97 /* 98 * All inode numbers of underlying fs should not be using the 99 * high xinobits, so we use high xinobits to partition the 100 * overlay st_ino address space. The high bits holds the fsid 101 * (upper fsid is 0). This way overlay inode numbers are unique 102 * and all inodes use overlay st_dev. Inode numbers are also 103 * persistent for a given layer configuration. 104 */ 105 if (stat->ino >> shift) { 106 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 107 dentry, stat->ino, xinobits); 108 } else { 109 if (lower_layer) 110 stat->ino |= ((u64)lower_layer->fsid) << shift; 111 112 stat->dev = dentry->d_sb->s_dev; 113 return 0; 114 } 115 } 116 117 /* The inode could not be mapped to a unified st_ino address space */ 118 if (S_ISDIR(dentry->d_inode->i_mode)) { 119 /* 120 * Always use the overlay st_dev for directories, so 'find 121 * -xdev' will scan the entire overlay mount and won't cross the 122 * overlay mount boundaries. 123 * 124 * If not all layers are on the same fs the pair {real st_ino; 125 * overlay st_dev} is not unique, so use the non persistent 126 * overlay st_ino for directories. 127 */ 128 stat->dev = dentry->d_sb->s_dev; 129 stat->ino = dentry->d_inode->i_ino; 130 } else if (lower_layer && lower_layer->fsid) { 131 /* 132 * For non-samefs setup, if we cannot map all layers st_ino 133 * to a unified address space, we need to make sure that st_dev 134 * is unique per lower fs. Upper layer uses real st_dev and 135 * lower layers use the unique anonymous bdev assigned to the 136 * lower fs. 137 */ 138 stat->dev = lower_layer->fs->pseudo_dev; 139 } 140 141 return 0; 142 } 143 144 int ovl_getattr(const struct path *path, struct kstat *stat, 145 u32 request_mask, unsigned int flags) 146 { 147 struct dentry *dentry = path->dentry; 148 enum ovl_path_type type; 149 struct path realpath; 150 const struct cred *old_cred; 151 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 152 bool samefs = ovl_same_sb(dentry->d_sb); 153 struct ovl_layer *lower_layer = NULL; 154 int err; 155 bool metacopy_blocks = false; 156 157 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 158 159 type = ovl_path_real(dentry, &realpath); 160 old_cred = ovl_override_creds(dentry->d_sb); 161 err = vfs_getattr(&realpath, stat, request_mask, flags); 162 if (err) 163 goto out; 164 165 /* 166 * For non-dir or same fs, we use st_ino of the copy up origin. 167 * This guaranties constant st_dev/st_ino across copy up. 168 * With xino feature and non-samefs, we use st_ino of the copy up 169 * origin masked with high bits that represent the layer id. 170 * 171 * If lower filesystem supports NFS file handles, this also guaranties 172 * persistent st_ino across mount cycle. 173 */ 174 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 175 if (!OVL_TYPE_UPPER(type)) { 176 lower_layer = ovl_layer_lower(dentry); 177 } else if (OVL_TYPE_ORIGIN(type)) { 178 struct kstat lowerstat; 179 u32 lowermask = STATX_INO | STATX_BLOCKS | 180 (!is_dir ? STATX_NLINK : 0); 181 182 ovl_path_lower(dentry, &realpath); 183 err = vfs_getattr(&realpath, &lowerstat, 184 lowermask, flags); 185 if (err) 186 goto out; 187 188 /* 189 * Lower hardlinks may be broken on copy up to different 190 * upper files, so we cannot use the lower origin st_ino 191 * for those different files, even for the same fs case. 192 * 193 * Similarly, several redirected dirs can point to the 194 * same dir on a lower layer. With the "verify_lower" 195 * feature, we do not use the lower origin st_ino, if 196 * we haven't verified that this redirect is unique. 197 * 198 * With inodes index enabled, it is safe to use st_ino 199 * of an indexed origin. The index validates that the 200 * upper hardlink is not broken and that a redirected 201 * dir is the only redirect to that origin. 202 */ 203 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 204 (!ovl_verify_lower(dentry->d_sb) && 205 (is_dir || lowerstat.nlink == 1))) { 206 stat->ino = lowerstat.ino; 207 lower_layer = ovl_layer_lower(dentry); 208 } 209 210 /* 211 * If we are querying a metacopy dentry and lower 212 * dentry is data dentry, then use the blocks we 213 * queried just now. We don't have to do additional 214 * vfs_getattr(). If lower itself is metacopy, then 215 * additional vfs_getattr() is unavoidable. 216 */ 217 if (metacopy_blocks && 218 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 219 stat->blocks = lowerstat.blocks; 220 metacopy_blocks = false; 221 } 222 } 223 224 if (metacopy_blocks) { 225 /* 226 * If lower is not same as lowerdata or if there was 227 * no origin on upper, we can end up here. 228 */ 229 struct kstat lowerdatastat; 230 u32 lowermask = STATX_BLOCKS; 231 232 ovl_path_lowerdata(dentry, &realpath); 233 err = vfs_getattr(&realpath, &lowerdatastat, 234 lowermask, flags); 235 if (err) 236 goto out; 237 stat->blocks = lowerdatastat.blocks; 238 } 239 } 240 241 err = ovl_map_dev_ino(dentry, stat, lower_layer); 242 if (err) 243 goto out; 244 245 /* 246 * It's probably not worth it to count subdirs to get the 247 * correct link count. nlink=1 seems to pacify 'find' and 248 * other utilities. 249 */ 250 if (is_dir && OVL_TYPE_MERGE(type)) 251 stat->nlink = 1; 252 253 /* 254 * Return the overlay inode nlinks for indexed upper inodes. 255 * Overlay inode nlink counts the union of the upper hardlinks 256 * and non-covered lower hardlinks. It does not include the upper 257 * index hardlink. 258 */ 259 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 260 stat->nlink = dentry->d_inode->i_nlink; 261 262 out: 263 revert_creds(old_cred); 264 265 return err; 266 } 267 268 int ovl_permission(struct inode *inode, int mask) 269 { 270 struct inode *upperinode = ovl_inode_upper(inode); 271 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 272 const struct cred *old_cred; 273 int err; 274 275 /* Careful in RCU walk mode */ 276 if (!realinode) { 277 WARN_ON(!(mask & MAY_NOT_BLOCK)); 278 return -ECHILD; 279 } 280 281 /* 282 * Check overlay inode with the creds of task and underlying inode 283 * with creds of mounter 284 */ 285 err = generic_permission(inode, mask); 286 if (err) 287 return err; 288 289 /* No need to do any access on underlying for special files */ 290 if (special_file(realinode->i_mode)) 291 return 0; 292 293 /* No need to access underlying for execute */ 294 mask &= ~MAY_EXEC; 295 if ((mask & (MAY_READ | MAY_WRITE)) == 0) 296 return 0; 297 298 /* Lower files get copied up, so turn write access into read */ 299 if (!upperinode && mask & MAY_WRITE) { 300 mask &= ~(MAY_WRITE | MAY_APPEND); 301 mask |= MAY_READ; 302 } 303 304 old_cred = ovl_override_creds(inode->i_sb); 305 err = inode_permission(realinode, mask); 306 revert_creds(old_cred); 307 308 return err; 309 } 310 311 static const char *ovl_get_link(struct dentry *dentry, 312 struct inode *inode, 313 struct delayed_call *done) 314 { 315 const struct cred *old_cred; 316 const char *p; 317 318 if (!dentry) 319 return ERR_PTR(-ECHILD); 320 321 old_cred = ovl_override_creds(dentry->d_sb); 322 p = vfs_get_link(ovl_dentry_real(dentry), done); 323 revert_creds(old_cred); 324 return p; 325 } 326 327 bool ovl_is_private_xattr(const char *name) 328 { 329 return strncmp(name, OVL_XATTR_PREFIX, 330 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 331 } 332 333 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 334 const void *value, size_t size, int flags) 335 { 336 int err; 337 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 338 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 339 const struct cred *old_cred; 340 341 err = ovl_want_write(dentry); 342 if (err) 343 goto out; 344 345 if (!value && !upperdentry) { 346 err = vfs_getxattr(realdentry, name, NULL, 0); 347 if (err < 0) 348 goto out_drop_write; 349 } 350 351 if (!upperdentry) { 352 err = ovl_copy_up(dentry); 353 if (err) 354 goto out_drop_write; 355 356 realdentry = ovl_dentry_upper(dentry); 357 } 358 359 old_cred = ovl_override_creds(dentry->d_sb); 360 if (value) 361 err = vfs_setxattr(realdentry, name, value, size, flags); 362 else { 363 WARN_ON(flags != XATTR_REPLACE); 364 err = vfs_removexattr(realdentry, name); 365 } 366 revert_creds(old_cred); 367 368 /* copy c/mtime */ 369 ovl_copyattr(d_inode(realdentry), inode); 370 371 out_drop_write: 372 ovl_drop_write(dentry); 373 out: 374 return err; 375 } 376 377 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 378 void *value, size_t size) 379 { 380 ssize_t res; 381 const struct cred *old_cred; 382 struct dentry *realdentry = 383 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 384 385 old_cred = ovl_override_creds(dentry->d_sb); 386 res = vfs_getxattr(realdentry, name, value, size); 387 revert_creds(old_cred); 388 return res; 389 } 390 391 static bool ovl_can_list(const char *s) 392 { 393 /* List all non-trusted xatts */ 394 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 395 return true; 396 397 /* Never list trusted.overlay, list other trusted for superuser only */ 398 return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); 399 } 400 401 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 402 { 403 struct dentry *realdentry = ovl_dentry_real(dentry); 404 ssize_t res; 405 size_t len; 406 char *s; 407 const struct cred *old_cred; 408 409 old_cred = ovl_override_creds(dentry->d_sb); 410 res = vfs_listxattr(realdentry, list, size); 411 revert_creds(old_cred); 412 if (res <= 0 || size == 0) 413 return res; 414 415 /* filter out private xattrs */ 416 for (s = list, len = res; len;) { 417 size_t slen = strnlen(s, len) + 1; 418 419 /* underlying fs providing us with an broken xattr list? */ 420 if (WARN_ON(slen > len)) 421 return -EIO; 422 423 len -= slen; 424 if (!ovl_can_list(s)) { 425 res -= slen; 426 memmove(s, s + slen, len); 427 } else { 428 s += slen; 429 } 430 } 431 432 return res; 433 } 434 435 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 436 { 437 struct inode *realinode = ovl_inode_real(inode); 438 const struct cred *old_cred; 439 struct posix_acl *acl; 440 441 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 442 return NULL; 443 444 old_cred = ovl_override_creds(inode->i_sb); 445 acl = get_acl(realinode, type); 446 revert_creds(old_cred); 447 448 return acl; 449 } 450 451 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 452 { 453 if (flags & S_ATIME) { 454 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 455 struct path upperpath = { 456 .mnt = ofs->upper_mnt, 457 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 458 }; 459 460 if (upperpath.dentry) { 461 touch_atime(&upperpath); 462 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 463 } 464 } 465 return 0; 466 } 467 468 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 469 u64 start, u64 len) 470 { 471 int err; 472 struct inode *realinode = ovl_inode_real(inode); 473 const struct cred *old_cred; 474 475 if (!realinode->i_op->fiemap) 476 return -EOPNOTSUPP; 477 478 old_cred = ovl_override_creds(inode->i_sb); 479 480 if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) 481 filemap_write_and_wait(realinode->i_mapping); 482 483 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 484 revert_creds(old_cred); 485 486 return err; 487 } 488 489 static const struct inode_operations ovl_file_inode_operations = { 490 .setattr = ovl_setattr, 491 .permission = ovl_permission, 492 .getattr = ovl_getattr, 493 .listxattr = ovl_listxattr, 494 .get_acl = ovl_get_acl, 495 .update_time = ovl_update_time, 496 .fiemap = ovl_fiemap, 497 }; 498 499 static const struct inode_operations ovl_symlink_inode_operations = { 500 .setattr = ovl_setattr, 501 .get_link = ovl_get_link, 502 .getattr = ovl_getattr, 503 .listxattr = ovl_listxattr, 504 .update_time = ovl_update_time, 505 }; 506 507 static const struct inode_operations ovl_special_inode_operations = { 508 .setattr = ovl_setattr, 509 .permission = ovl_permission, 510 .getattr = ovl_getattr, 511 .listxattr = ovl_listxattr, 512 .get_acl = ovl_get_acl, 513 .update_time = ovl_update_time, 514 }; 515 516 static const struct address_space_operations ovl_aops = { 517 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 518 .direct_IO = noop_direct_IO, 519 }; 520 521 /* 522 * It is possible to stack overlayfs instance on top of another 523 * overlayfs instance as lower layer. We need to annonate the 524 * stackable i_mutex locks according to stack level of the super 525 * block instance. An overlayfs instance can never be in stack 526 * depth 0 (there is always a real fs below it). An overlayfs 527 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 528 * 529 * For example, here is a snip from /proc/lockdep_chains after 530 * dir_iterate of nested overlayfs: 531 * 532 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 533 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 534 * [...] &type->i_mutex_dir_key (stack_depth=0) 535 */ 536 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 537 538 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 539 { 540 #ifdef CONFIG_LOCKDEP 541 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 542 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 543 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 544 545 int depth = inode->i_sb->s_stack_depth - 1; 546 547 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 548 depth = 0; 549 550 if (S_ISDIR(inode->i_mode)) 551 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 552 else 553 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 554 555 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 556 #endif 557 } 558 559 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 560 unsigned long ino, int fsid) 561 { 562 int xinobits = ovl_xino_bits(inode->i_sb); 563 564 /* 565 * When NFS export is enabled and d_ino is consistent with st_ino 566 * (samefs or i_ino has enough bits to encode layer), set the same 567 * value used for d_ino to i_ino, because nfsd readdirplus compares 568 * d_ino values to i_ino values of child entries. When called from 569 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 570 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 571 */ 572 if (inode->i_sb->s_export_op && 573 (ovl_same_sb(inode->i_sb) || xinobits)) { 574 inode->i_ino = ino; 575 if (xinobits && fsid && !(ino >> (64 - xinobits))) 576 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 577 } else { 578 inode->i_ino = get_next_ino(); 579 } 580 inode->i_mode = mode; 581 inode->i_flags |= S_NOCMTIME; 582 #ifdef CONFIG_FS_POSIX_ACL 583 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 584 #endif 585 586 ovl_lockdep_annotate_inode_mutex_key(inode); 587 588 switch (mode & S_IFMT) { 589 case S_IFREG: 590 inode->i_op = &ovl_file_inode_operations; 591 inode->i_fop = &ovl_file_operations; 592 inode->i_mapping->a_ops = &ovl_aops; 593 break; 594 595 case S_IFDIR: 596 inode->i_op = &ovl_dir_inode_operations; 597 inode->i_fop = &ovl_dir_operations; 598 break; 599 600 case S_IFLNK: 601 inode->i_op = &ovl_symlink_inode_operations; 602 break; 603 604 default: 605 inode->i_op = &ovl_special_inode_operations; 606 init_special_inode(inode, mode, rdev); 607 break; 608 } 609 } 610 611 /* 612 * With inodes index enabled, an overlay inode nlink counts the union of upper 613 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 614 * upper inode, the following nlink modifying operations can happen: 615 * 616 * 1. Lower hardlink copy up 617 * 2. Upper hardlink created, unlinked or renamed over 618 * 3. Lower hardlink whiteout or renamed over 619 * 620 * For the first, copy up case, the union nlink does not change, whether the 621 * operation succeeds or fails, but the upper inode nlink may change. 622 * Therefore, before copy up, we store the union nlink value relative to the 623 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 624 * 625 * For the second, upper hardlink case, the union nlink should be incremented 626 * or decremented IFF the operation succeeds, aligned with nlink change of the 627 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 628 * value relative to the upper inode nlink in the index inode. 629 * 630 * For the last, lower cover up case, we simplify things by preceding the 631 * whiteout or cover up with copy up. This makes sure that there is an index 632 * upper inode where the nlink xattr can be stored before the copied up upper 633 * entry is unlink. 634 */ 635 #define OVL_NLINK_ADD_UPPER (1 << 0) 636 637 /* 638 * On-disk format for indexed nlink: 639 * 640 * nlink relative to the upper inode - "U[+-]NUM" 641 * nlink relative to the lower inode - "L[+-]NUM" 642 */ 643 644 static int ovl_set_nlink_common(struct dentry *dentry, 645 struct dentry *realdentry, const char *format) 646 { 647 struct inode *inode = d_inode(dentry); 648 struct inode *realinode = d_inode(realdentry); 649 char buf[13]; 650 int len; 651 652 len = snprintf(buf, sizeof(buf), format, 653 (int) (inode->i_nlink - realinode->i_nlink)); 654 655 if (WARN_ON(len >= sizeof(buf))) 656 return -EIO; 657 658 return ovl_do_setxattr(ovl_dentry_upper(dentry), 659 OVL_XATTR_NLINK, buf, len, 0); 660 } 661 662 int ovl_set_nlink_upper(struct dentry *dentry) 663 { 664 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 665 } 666 667 int ovl_set_nlink_lower(struct dentry *dentry) 668 { 669 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 670 } 671 672 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 673 struct dentry *upperdentry, 674 unsigned int fallback) 675 { 676 int nlink_diff; 677 int nlink; 678 char buf[13]; 679 int err; 680 681 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 682 return fallback; 683 684 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 685 if (err < 0) 686 goto fail; 687 688 buf[err] = '\0'; 689 if ((buf[0] != 'L' && buf[0] != 'U') || 690 (buf[1] != '+' && buf[1] != '-')) 691 goto fail; 692 693 err = kstrtoint(buf + 1, 10, &nlink_diff); 694 if (err < 0) 695 goto fail; 696 697 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 698 nlink += nlink_diff; 699 700 if (nlink <= 0) 701 goto fail; 702 703 return nlink; 704 705 fail: 706 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 707 upperdentry, err); 708 return fallback; 709 } 710 711 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 712 { 713 struct inode *inode; 714 715 inode = new_inode(sb); 716 if (inode) 717 ovl_fill_inode(inode, mode, rdev, 0, 0); 718 719 return inode; 720 } 721 722 static int ovl_inode_test(struct inode *inode, void *data) 723 { 724 return inode->i_private == data; 725 } 726 727 static int ovl_inode_set(struct inode *inode, void *data) 728 { 729 inode->i_private = data; 730 return 0; 731 } 732 733 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 734 struct dentry *upperdentry, bool strict) 735 { 736 /* 737 * For directories, @strict verify from lookup path performs consistency 738 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 739 * inode. Non @strict verify from NFS handle decode path passes NULL for 740 * 'unknown' lower/upper. 741 */ 742 if (S_ISDIR(inode->i_mode) && strict) { 743 /* Real lower dir moved to upper layer under us? */ 744 if (!lowerdentry && ovl_inode_lower(inode)) 745 return false; 746 747 /* Lookup of an uncovered redirect origin? */ 748 if (!upperdentry && ovl_inode_upper(inode)) 749 return false; 750 } 751 752 /* 753 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 754 * This happens when finding a copied up overlay inode for a renamed 755 * or hardlinked overlay dentry and lower dentry cannot be followed 756 * by origin because lower fs does not support file handles. 757 */ 758 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 759 return false; 760 761 /* 762 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 763 * This happens when finding a lower alias for a copied up hard link. 764 */ 765 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 766 return false; 767 768 return true; 769 } 770 771 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 772 bool is_upper) 773 { 774 struct inode *inode, *key = d_inode(real); 775 776 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 777 if (!inode) 778 return NULL; 779 780 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 781 is_upper ? real : NULL, false)) { 782 iput(inode); 783 return ERR_PTR(-ESTALE); 784 } 785 786 return inode; 787 } 788 789 /* 790 * Does overlay inode need to be hashed by lower inode? 791 */ 792 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 793 struct dentry *lower, struct dentry *index) 794 { 795 struct ovl_fs *ofs = sb->s_fs_info; 796 797 /* No, if pure upper */ 798 if (!lower) 799 return false; 800 801 /* Yes, if already indexed */ 802 if (index) 803 return true; 804 805 /* Yes, if won't be copied up */ 806 if (!ofs->upper_mnt) 807 return true; 808 809 /* No, if lower hardlink is or will be broken on copy up */ 810 if ((upper || !ovl_indexdir(sb)) && 811 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 812 return false; 813 814 /* No, if non-indexed upper with NFS export */ 815 if (sb->s_export_op && upper) 816 return false; 817 818 /* Otherwise, hash by lower inode for fsnotify */ 819 return true; 820 } 821 822 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 823 struct inode *key) 824 { 825 return newinode ? inode_insert5(newinode, (unsigned long) key, 826 ovl_inode_test, ovl_inode_set, key) : 827 iget5_locked(sb, (unsigned long) key, 828 ovl_inode_test, ovl_inode_set, key); 829 } 830 831 struct inode *ovl_get_inode(struct super_block *sb, 832 struct ovl_inode_params *oip) 833 { 834 struct dentry *upperdentry = oip->upperdentry; 835 struct ovl_path *lowerpath = oip->lowerpath; 836 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 837 struct inode *inode; 838 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 839 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 840 oip->index); 841 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 842 bool is_dir, metacopy = false; 843 unsigned long ino = 0; 844 int err = -ENOMEM; 845 846 if (!realinode) 847 realinode = d_inode(lowerdentry); 848 849 /* 850 * Copy up origin (lower) may exist for non-indexed upper, but we must 851 * not use lower as hash key if this is a broken hardlink. 852 */ 853 is_dir = S_ISDIR(realinode->i_mode); 854 if (upperdentry || bylower) { 855 struct inode *key = d_inode(bylower ? lowerdentry : 856 upperdentry); 857 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 858 859 inode = ovl_iget5(sb, oip->newinode, key); 860 if (!inode) 861 goto out_err; 862 if (!(inode->i_state & I_NEW)) { 863 /* 864 * Verify that the underlying files stored in the inode 865 * match those in the dentry. 866 */ 867 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 868 true)) { 869 iput(inode); 870 err = -ESTALE; 871 goto out_err; 872 } 873 874 dput(upperdentry); 875 kfree(oip->redirect); 876 goto out; 877 } 878 879 /* Recalculate nlink for non-dir due to indexing */ 880 if (!is_dir) 881 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 882 set_nlink(inode, nlink); 883 ino = key->i_ino; 884 } else { 885 /* Lower hardlink that will be broken on copy up */ 886 inode = new_inode(sb); 887 if (!inode) { 888 err = -ENOMEM; 889 goto out_err; 890 } 891 } 892 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 893 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 894 895 if (upperdentry && ovl_is_impuredir(upperdentry)) 896 ovl_set_flag(OVL_IMPURE, inode); 897 898 if (oip->index) 899 ovl_set_flag(OVL_INDEX, inode); 900 901 if (upperdentry) { 902 err = ovl_check_metacopy_xattr(upperdentry); 903 if (err < 0) 904 goto out_err; 905 metacopy = err; 906 if (!metacopy) 907 ovl_set_flag(OVL_UPPERDATA, inode); 908 } 909 910 OVL_I(inode)->redirect = oip->redirect; 911 912 if (bylower) 913 ovl_set_flag(OVL_CONST_INO, inode); 914 915 /* Check for non-merge dir that may have whiteouts */ 916 if (is_dir) { 917 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 918 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 919 ovl_set_flag(OVL_WHITEOUTS, inode); 920 } 921 } 922 923 if (inode->i_state & I_NEW) 924 unlock_new_inode(inode); 925 out: 926 return inode; 927 928 out_err: 929 inode = ERR_PTR(err); 930 goto out; 931 } 932