1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright (C) 2011 Novell Inc. 5 */ 6 7 #include <linux/fs.h> 8 #include <linux/slab.h> 9 #include <linux/cred.h> 10 #include <linux/xattr.h> 11 #include <linux/posix_acl.h> 12 #include <linux/ratelimit.h> 13 #include <linux/fiemap.h> 14 #include <linux/fileattr.h> 15 #include <linux/security.h> 16 #include <linux/namei.h> 17 #include <linux/posix_acl.h> 18 #include <linux/posix_acl_xattr.h> 19 #include "overlayfs.h" 20 21 22 int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 23 struct iattr *attr) 24 { 25 int err; 26 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 27 bool full_copy_up = false; 28 struct dentry *upperdentry; 29 const struct cred *old_cred; 30 31 err = setattr_prepare(&nop_mnt_idmap, dentry, attr); 32 if (err) 33 return err; 34 35 err = ovl_want_write(dentry); 36 if (err) 37 goto out; 38 39 if (attr->ia_valid & ATTR_SIZE) { 40 /* Truncate should trigger data copy up as well */ 41 full_copy_up = true; 42 } 43 44 if (!full_copy_up) 45 err = ovl_copy_up(dentry); 46 else 47 err = ovl_copy_up_with_data(dentry); 48 if (!err) { 49 struct inode *winode = NULL; 50 51 upperdentry = ovl_dentry_upper(dentry); 52 53 if (attr->ia_valid & ATTR_SIZE) { 54 winode = d_inode(upperdentry); 55 err = get_write_access(winode); 56 if (err) 57 goto out_drop_write; 58 } 59 60 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 61 attr->ia_valid &= ~ATTR_MODE; 62 63 /* 64 * We might have to translate ovl file into real file object 65 * once use cases emerge. For now, simply don't let underlying 66 * filesystem rely on attr->ia_file 67 */ 68 attr->ia_valid &= ~ATTR_FILE; 69 70 /* 71 * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN 72 * set. Overlayfs does not pass O_TRUNC flag to underlying 73 * filesystem during open -> do not pass ATTR_OPEN. This 74 * disables optimization in fuse which assumes open(O_TRUNC) 75 * already set file size to 0. But we never passed O_TRUNC to 76 * fuse. So by clearing ATTR_OPEN, fuse will be forced to send 77 * setattr request to server. 78 */ 79 attr->ia_valid &= ~ATTR_OPEN; 80 81 inode_lock(upperdentry->d_inode); 82 old_cred = ovl_override_creds(dentry->d_sb); 83 err = ovl_do_notify_change(ofs, upperdentry, attr); 84 revert_creds(old_cred); 85 if (!err) 86 ovl_copyattr(dentry->d_inode); 87 inode_unlock(upperdentry->d_inode); 88 89 if (winode) 90 put_write_access(winode); 91 } 92 out_drop_write: 93 ovl_drop_write(dentry); 94 out: 95 return err; 96 } 97 98 static void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) 99 { 100 bool samefs = ovl_same_fs(dentry->d_sb); 101 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 102 unsigned int xinoshift = 64 - xinobits; 103 104 if (samefs) { 105 /* 106 * When all layers are on the same fs, all real inode 107 * number are unique, so we use the overlay st_dev, 108 * which is friendly to du -x. 109 */ 110 stat->dev = dentry->d_sb->s_dev; 111 return; 112 } else if (xinobits) { 113 /* 114 * All inode numbers of underlying fs should not be using the 115 * high xinobits, so we use high xinobits to partition the 116 * overlay st_ino address space. The high bits holds the fsid 117 * (upper fsid is 0). The lowest xinobit is reserved for mapping 118 * the non-persistent inode numbers range in case of overflow. 119 * This way all overlay inode numbers are unique and use the 120 * overlay st_dev. 121 */ 122 if (likely(!(stat->ino >> xinoshift))) { 123 stat->ino |= ((u64)fsid) << (xinoshift + 1); 124 stat->dev = dentry->d_sb->s_dev; 125 return; 126 } else if (ovl_xino_warn(dentry->d_sb)) { 127 pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 128 dentry, stat->ino, xinobits); 129 } 130 } 131 132 /* The inode could not be mapped to a unified st_ino address space */ 133 if (S_ISDIR(dentry->d_inode->i_mode)) { 134 /* 135 * Always use the overlay st_dev for directories, so 'find 136 * -xdev' will scan the entire overlay mount and won't cross the 137 * overlay mount boundaries. 138 * 139 * If not all layers are on the same fs the pair {real st_ino; 140 * overlay st_dev} is not unique, so use the non persistent 141 * overlay st_ino for directories. 142 */ 143 stat->dev = dentry->d_sb->s_dev; 144 stat->ino = dentry->d_inode->i_ino; 145 } else { 146 /* 147 * For non-samefs setup, if we cannot map all layers st_ino 148 * to a unified address space, we need to make sure that st_dev 149 * is unique per underlying fs, so we use the unique anonymous 150 * bdev assigned to the underlying fs. 151 */ 152 stat->dev = OVL_FS(dentry->d_sb)->fs[fsid].pseudo_dev; 153 } 154 } 155 156 int ovl_getattr(struct mnt_idmap *idmap, const struct path *path, 157 struct kstat *stat, u32 request_mask, unsigned int flags) 158 { 159 struct dentry *dentry = path->dentry; 160 enum ovl_path_type type; 161 struct path realpath; 162 const struct cred *old_cred; 163 struct inode *inode = d_inode(dentry); 164 bool is_dir = S_ISDIR(inode->i_mode); 165 int fsid = 0; 166 int err; 167 bool metacopy_blocks = false; 168 169 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 170 171 type = ovl_path_real(dentry, &realpath); 172 old_cred = ovl_override_creds(dentry->d_sb); 173 err = vfs_getattr(&realpath, stat, request_mask, flags); 174 if (err) 175 goto out; 176 177 /* Report the effective immutable/append-only STATX flags */ 178 generic_fill_statx_attr(inode, stat); 179 180 /* 181 * For non-dir or same fs, we use st_ino of the copy up origin. 182 * This guaranties constant st_dev/st_ino across copy up. 183 * With xino feature and non-samefs, we use st_ino of the copy up 184 * origin masked with high bits that represent the layer id. 185 * 186 * If lower filesystem supports NFS file handles, this also guaranties 187 * persistent st_ino across mount cycle. 188 */ 189 if (!is_dir || ovl_same_dev(dentry->d_sb)) { 190 if (!OVL_TYPE_UPPER(type)) { 191 fsid = ovl_layer_lower(dentry)->fsid; 192 } else if (OVL_TYPE_ORIGIN(type)) { 193 struct kstat lowerstat; 194 u32 lowermask = STATX_INO | STATX_BLOCKS | 195 (!is_dir ? STATX_NLINK : 0); 196 197 ovl_path_lower(dentry, &realpath); 198 err = vfs_getattr(&realpath, &lowerstat, 199 lowermask, flags); 200 if (err) 201 goto out; 202 203 /* 204 * Lower hardlinks may be broken on copy up to different 205 * upper files, so we cannot use the lower origin st_ino 206 * for those different files, even for the same fs case. 207 * 208 * Similarly, several redirected dirs can point to the 209 * same dir on a lower layer. With the "verify_lower" 210 * feature, we do not use the lower origin st_ino, if 211 * we haven't verified that this redirect is unique. 212 * 213 * With inodes index enabled, it is safe to use st_ino 214 * of an indexed origin. The index validates that the 215 * upper hardlink is not broken and that a redirected 216 * dir is the only redirect to that origin. 217 */ 218 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 219 (!ovl_verify_lower(dentry->d_sb) && 220 (is_dir || lowerstat.nlink == 1))) { 221 fsid = ovl_layer_lower(dentry)->fsid; 222 stat->ino = lowerstat.ino; 223 } 224 225 /* 226 * If we are querying a metacopy dentry and lower 227 * dentry is data dentry, then use the blocks we 228 * queried just now. We don't have to do additional 229 * vfs_getattr(). If lower itself is metacopy, then 230 * additional vfs_getattr() is unavoidable. 231 */ 232 if (metacopy_blocks && 233 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 234 stat->blocks = lowerstat.blocks; 235 metacopy_blocks = false; 236 } 237 } 238 239 if (metacopy_blocks) { 240 /* 241 * If lower is not same as lowerdata or if there was 242 * no origin on upper, we can end up here. 243 */ 244 struct kstat lowerdatastat; 245 u32 lowermask = STATX_BLOCKS; 246 247 ovl_path_lowerdata(dentry, &realpath); 248 err = vfs_getattr(&realpath, &lowerdatastat, 249 lowermask, flags); 250 if (err) 251 goto out; 252 stat->blocks = lowerdatastat.blocks; 253 } 254 } 255 256 ovl_map_dev_ino(dentry, stat, fsid); 257 258 /* 259 * It's probably not worth it to count subdirs to get the 260 * correct link count. nlink=1 seems to pacify 'find' and 261 * other utilities. 262 */ 263 if (is_dir && OVL_TYPE_MERGE(type)) 264 stat->nlink = 1; 265 266 /* 267 * Return the overlay inode nlinks for indexed upper inodes. 268 * Overlay inode nlink counts the union of the upper hardlinks 269 * and non-covered lower hardlinks. It does not include the upper 270 * index hardlink. 271 */ 272 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 273 stat->nlink = dentry->d_inode->i_nlink; 274 275 out: 276 revert_creds(old_cred); 277 278 return err; 279 } 280 281 int ovl_permission(struct mnt_idmap *idmap, 282 struct inode *inode, int mask) 283 { 284 struct inode *upperinode = ovl_inode_upper(inode); 285 struct inode *realinode; 286 struct path realpath; 287 const struct cred *old_cred; 288 int err; 289 290 /* Careful in RCU walk mode */ 291 realinode = ovl_i_path_real(inode, &realpath); 292 if (!realinode) { 293 WARN_ON(!(mask & MAY_NOT_BLOCK)); 294 return -ECHILD; 295 } 296 297 /* 298 * Check overlay inode with the creds of task and underlying inode 299 * with creds of mounter 300 */ 301 err = generic_permission(&nop_mnt_idmap, inode, mask); 302 if (err) 303 return err; 304 305 old_cred = ovl_override_creds(inode->i_sb); 306 if (!upperinode && 307 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 308 mask &= ~(MAY_WRITE | MAY_APPEND); 309 /* Make sure mounter can read file for copy up later */ 310 mask |= MAY_READ; 311 } 312 err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask); 313 revert_creds(old_cred); 314 315 return err; 316 } 317 318 static const char *ovl_get_link(struct dentry *dentry, 319 struct inode *inode, 320 struct delayed_call *done) 321 { 322 const struct cred *old_cred; 323 const char *p; 324 325 if (!dentry) 326 return ERR_PTR(-ECHILD); 327 328 old_cred = ovl_override_creds(dentry->d_sb); 329 p = vfs_get_link(ovl_dentry_real(dentry), done); 330 revert_creds(old_cred); 331 return p; 332 } 333 334 bool ovl_is_private_xattr(struct super_block *sb, const char *name) 335 { 336 struct ovl_fs *ofs = sb->s_fs_info; 337 338 if (ofs->config.userxattr) 339 return strncmp(name, OVL_XATTR_USER_PREFIX, 340 sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; 341 else 342 return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, 343 sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; 344 } 345 346 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 347 const void *value, size_t size, int flags) 348 { 349 int err; 350 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 351 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 352 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 353 struct path realpath; 354 const struct cred *old_cred; 355 356 err = ovl_want_write(dentry); 357 if (err) 358 goto out; 359 360 if (!value && !upperdentry) { 361 ovl_path_lower(dentry, &realpath); 362 old_cred = ovl_override_creds(dentry->d_sb); 363 err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); 364 revert_creds(old_cred); 365 if (err < 0) 366 goto out_drop_write; 367 } 368 369 if (!upperdentry) { 370 err = ovl_copy_up(dentry); 371 if (err) 372 goto out_drop_write; 373 374 realdentry = ovl_dentry_upper(dentry); 375 } 376 377 old_cred = ovl_override_creds(dentry->d_sb); 378 if (value) { 379 err = ovl_do_setxattr(ofs, realdentry, name, value, size, 380 flags); 381 } else { 382 WARN_ON(flags != XATTR_REPLACE); 383 err = ovl_do_removexattr(ofs, realdentry, name); 384 } 385 revert_creds(old_cred); 386 387 /* copy c/mtime */ 388 ovl_copyattr(inode); 389 390 out_drop_write: 391 ovl_drop_write(dentry); 392 out: 393 return err; 394 } 395 396 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 397 void *value, size_t size) 398 { 399 ssize_t res; 400 const struct cred *old_cred; 401 struct path realpath; 402 403 ovl_i_path_real(inode, &realpath); 404 old_cred = ovl_override_creds(dentry->d_sb); 405 res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); 406 revert_creds(old_cred); 407 return res; 408 } 409 410 static bool ovl_can_list(struct super_block *sb, const char *s) 411 { 412 /* Never list private (.overlay) */ 413 if (ovl_is_private_xattr(sb, s)) 414 return false; 415 416 /* List all non-trusted xattrs */ 417 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 418 return true; 419 420 /* list other trusted for superuser only */ 421 return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); 422 } 423 424 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 425 { 426 struct dentry *realdentry = ovl_dentry_real(dentry); 427 ssize_t res; 428 size_t len; 429 char *s; 430 const struct cred *old_cred; 431 432 old_cred = ovl_override_creds(dentry->d_sb); 433 res = vfs_listxattr(realdentry, list, size); 434 revert_creds(old_cred); 435 if (res <= 0 || size == 0) 436 return res; 437 438 /* filter out private xattrs */ 439 for (s = list, len = res; len;) { 440 size_t slen = strnlen(s, len) + 1; 441 442 /* underlying fs providing us with an broken xattr list? */ 443 if (WARN_ON(slen > len)) 444 return -EIO; 445 446 len -= slen; 447 if (!ovl_can_list(dentry->d_sb, s)) { 448 res -= slen; 449 memmove(s, s + slen, len); 450 } else { 451 s += slen; 452 } 453 } 454 455 return res; 456 } 457 458 #ifdef CONFIG_FS_POSIX_ACL 459 /* 460 * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone 461 * of the POSIX ACLs retrieved from the lower layer to this function to not 462 * alter the POSIX ACLs for the underlying filesystem. 463 */ 464 static void ovl_idmap_posix_acl(const struct inode *realinode, 465 struct mnt_idmap *idmap, 466 struct posix_acl *acl) 467 { 468 struct user_namespace *fs_userns = i_user_ns(realinode); 469 470 for (unsigned int i = 0; i < acl->a_count; i++) { 471 vfsuid_t vfsuid; 472 vfsgid_t vfsgid; 473 474 struct posix_acl_entry *e = &acl->a_entries[i]; 475 switch (e->e_tag) { 476 case ACL_USER: 477 vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid); 478 e->e_uid = vfsuid_into_kuid(vfsuid); 479 break; 480 case ACL_GROUP: 481 vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid); 482 e->e_gid = vfsgid_into_kgid(vfsgid); 483 break; 484 } 485 } 486 } 487 488 /* 489 * The @noperm argument is used to skip permission checking and is a temporary 490 * measure. Quoting Miklos from an earlier discussion: 491 * 492 * > So there are two paths to getting an acl: 493 * > 1) permission checking and 2) retrieving the value via getxattr(2). 494 * > This is a similar situation as reading a symlink vs. following it. 495 * > When following a symlink overlayfs always reads the link on the 496 * > underlying fs just as if it was a readlink(2) call, calling 497 * > security_inode_readlink() instead of security_inode_follow_link(). 498 * > This is logical: we are reading the link from the underlying storage, 499 * > and following it on overlayfs. 500 * > 501 * > Applying the same logic to acl: we do need to call the 502 * > security_inode_getxattr() on the underlying fs, even if just want to 503 * > check permissions on overlay. This is currently not done, which is an 504 * > inconsistency. 505 * > 506 * > Maybe adding the check to ovl_get_acl() is the right way to go, but 507 * > I'm a little afraid of a performance regression. Will look into that. 508 * 509 * Until we have made a decision allow this helper to take the @noperm 510 * argument. We should hopefully be able to remove it soon. 511 */ 512 struct posix_acl *ovl_get_acl_path(const struct path *path, 513 const char *acl_name, bool noperm) 514 { 515 struct posix_acl *real_acl, *clone; 516 struct mnt_idmap *idmap; 517 struct inode *realinode = d_inode(path->dentry); 518 519 idmap = mnt_idmap(path->mnt); 520 521 if (noperm) 522 real_acl = get_inode_acl(realinode, posix_acl_type(acl_name)); 523 else 524 real_acl = vfs_get_acl(idmap, path->dentry, acl_name); 525 if (IS_ERR_OR_NULL(real_acl)) 526 return real_acl; 527 528 if (!is_idmapped_mnt(path->mnt)) 529 return real_acl; 530 531 /* 532 * We cannot alter the ACLs returned from the relevant layer as that 533 * would alter the cached values filesystem wide for the lower 534 * filesystem. Instead we can clone the ACLs and then apply the 535 * relevant idmapping of the layer. 536 */ 537 clone = posix_acl_clone(real_acl, GFP_KERNEL); 538 posix_acl_release(real_acl); /* release original acl */ 539 if (!clone) 540 return ERR_PTR(-ENOMEM); 541 542 ovl_idmap_posix_acl(realinode, idmap, clone); 543 return clone; 544 } 545 546 /* 547 * When the relevant layer is an idmapped mount we need to take the idmapping 548 * of the layer into account and translate any ACL_{GROUP,USER} values 549 * according to the idmapped mount. 550 * 551 * We cannot alter the ACLs returned from the relevant layer as that would 552 * alter the cached values filesystem wide for the lower filesystem. Instead we 553 * can clone the ACLs and then apply the relevant idmapping of the layer. 554 * 555 * This is obviously only relevant when idmapped layers are used. 556 */ 557 struct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, 558 struct inode *inode, int type, 559 bool rcu, bool noperm) 560 { 561 struct inode *realinode; 562 struct posix_acl *acl; 563 struct path realpath; 564 565 /* Careful in RCU walk mode */ 566 realinode = ovl_i_path_real(inode, &realpath); 567 if (!realinode) { 568 WARN_ON(!rcu); 569 return ERR_PTR(-ECHILD); 570 } 571 572 if (!IS_POSIXACL(realinode)) 573 return NULL; 574 575 if (rcu) { 576 /* 577 * If the layer is idmapped drop out of RCU path walk 578 * so we can clone the ACLs. 579 */ 580 if (is_idmapped_mnt(realpath.mnt)) 581 return ERR_PTR(-ECHILD); 582 583 acl = get_cached_acl_rcu(realinode, type); 584 } else { 585 const struct cred *old_cred; 586 587 old_cred = ovl_override_creds(inode->i_sb); 588 acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); 589 revert_creds(old_cred); 590 } 591 592 return acl; 593 } 594 595 static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, 596 struct posix_acl *acl, int type) 597 { 598 int err; 599 struct path realpath; 600 const char *acl_name; 601 const struct cred *old_cred; 602 struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 603 struct dentry *upperdentry = ovl_dentry_upper(dentry); 604 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 605 606 err = ovl_want_write(dentry); 607 if (err) 608 return err; 609 610 /* 611 * If ACL is to be removed from a lower file, check if it exists in 612 * the first place before copying it up. 613 */ 614 acl_name = posix_acl_xattr_name(type); 615 if (!acl && !upperdentry) { 616 struct posix_acl *real_acl; 617 618 ovl_path_lower(dentry, &realpath); 619 old_cred = ovl_override_creds(dentry->d_sb); 620 real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, 621 acl_name); 622 revert_creds(old_cred); 623 if (IS_ERR(real_acl)) { 624 err = PTR_ERR(real_acl); 625 goto out_drop_write; 626 } 627 posix_acl_release(real_acl); 628 } 629 630 if (!upperdentry) { 631 err = ovl_copy_up(dentry); 632 if (err) 633 goto out_drop_write; 634 635 realdentry = ovl_dentry_upper(dentry); 636 } 637 638 old_cred = ovl_override_creds(dentry->d_sb); 639 if (acl) 640 err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); 641 else 642 err = ovl_do_remove_acl(ofs, realdentry, acl_name); 643 revert_creds(old_cred); 644 645 /* copy c/mtime */ 646 ovl_copyattr(inode); 647 648 out_drop_write: 649 ovl_drop_write(dentry); 650 return err; 651 } 652 653 int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, 654 struct posix_acl *acl, int type) 655 { 656 int err; 657 struct inode *inode = d_inode(dentry); 658 struct dentry *workdir = ovl_workdir(dentry); 659 struct inode *realinode = ovl_inode_real(inode); 660 661 if (!IS_POSIXACL(d_inode(workdir))) 662 return -EOPNOTSUPP; 663 if (!realinode->i_op->set_acl) 664 return -EOPNOTSUPP; 665 if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 666 return acl ? -EACCES : 0; 667 if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) 668 return -EPERM; 669 670 /* 671 * Check if sgid bit needs to be cleared (actual setacl operation will 672 * be done with mounter's capabilities and so that won't do it for us). 673 */ 674 if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS && 675 !in_group_p(inode->i_gid) && 676 !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) { 677 struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 678 679 err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr); 680 if (err) 681 return err; 682 } 683 684 return ovl_set_or_remove_acl(dentry, inode, acl, type); 685 } 686 #endif 687 688 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 689 { 690 if (flags & S_ATIME) { 691 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 692 struct path upperpath = { 693 .mnt = ovl_upper_mnt(ofs), 694 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 695 }; 696 697 if (upperpath.dentry) { 698 touch_atime(&upperpath); 699 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 700 } 701 } 702 return 0; 703 } 704 705 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 706 u64 start, u64 len) 707 { 708 int err; 709 struct inode *realinode = ovl_inode_realdata(inode); 710 const struct cred *old_cred; 711 712 if (!realinode->i_op->fiemap) 713 return -EOPNOTSUPP; 714 715 old_cred = ovl_override_creds(inode->i_sb); 716 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 717 revert_creds(old_cred); 718 719 return err; 720 } 721 722 /* 723 * Work around the fact that security_file_ioctl() takes a file argument. 724 * Introducing security_inode_fileattr_get/set() hooks would solve this issue 725 * properly. 726 */ 727 static int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa, 728 bool set) 729 { 730 struct file *file; 731 unsigned int cmd; 732 int err; 733 734 file = dentry_open(realpath, O_RDONLY, current_cred()); 735 if (IS_ERR(file)) 736 return PTR_ERR(file); 737 738 if (set) 739 cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS; 740 else 741 cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS; 742 743 err = security_file_ioctl(file, cmd, 0); 744 fput(file); 745 746 return err; 747 } 748 749 int ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) 750 { 751 int err; 752 753 err = ovl_security_fileattr(realpath, fa, true); 754 if (err) 755 return err; 756 757 return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa); 758 } 759 760 int ovl_fileattr_set(struct mnt_idmap *idmap, 761 struct dentry *dentry, struct fileattr *fa) 762 { 763 struct inode *inode = d_inode(dentry); 764 struct path upperpath; 765 const struct cred *old_cred; 766 unsigned int flags; 767 int err; 768 769 err = ovl_want_write(dentry); 770 if (err) 771 goto out; 772 773 err = ovl_copy_up(dentry); 774 if (!err) { 775 ovl_path_real(dentry, &upperpath); 776 777 old_cred = ovl_override_creds(inode->i_sb); 778 /* 779 * Store immutable/append-only flags in xattr and clear them 780 * in upper fileattr (in case they were set by older kernel) 781 * so children of "ovl-immutable" directories lower aliases of 782 * "ovl-immutable" hardlinks could be copied up. 783 * Clear xattr when flags are cleared. 784 */ 785 err = ovl_set_protattr(inode, upperpath.dentry, fa); 786 if (!err) 787 err = ovl_real_fileattr_set(&upperpath, fa); 788 revert_creds(old_cred); 789 790 /* 791 * Merge real inode flags with inode flags read from 792 * overlay.protattr xattr 793 */ 794 flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; 795 796 BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); 797 flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; 798 inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); 799 800 /* Update ctime */ 801 ovl_copyattr(inode); 802 } 803 ovl_drop_write(dentry); 804 out: 805 return err; 806 } 807 808 /* Convert inode protection flags to fileattr flags */ 809 static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) 810 { 811 BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); 812 BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); 813 814 if (inode->i_flags & S_APPEND) { 815 fa->flags |= FS_APPEND_FL; 816 fa->fsx_xflags |= FS_XFLAG_APPEND; 817 } 818 if (inode->i_flags & S_IMMUTABLE) { 819 fa->flags |= FS_IMMUTABLE_FL; 820 fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; 821 } 822 } 823 824 int ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) 825 { 826 int err; 827 828 err = ovl_security_fileattr(realpath, fa, false); 829 if (err) 830 return err; 831 832 err = vfs_fileattr_get(realpath->dentry, fa); 833 if (err == -ENOIOCTLCMD) 834 err = -ENOTTY; 835 return err; 836 } 837 838 int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) 839 { 840 struct inode *inode = d_inode(dentry); 841 struct path realpath; 842 const struct cred *old_cred; 843 int err; 844 845 ovl_path_real(dentry, &realpath); 846 847 old_cred = ovl_override_creds(inode->i_sb); 848 err = ovl_real_fileattr_get(&realpath, fa); 849 ovl_fileattr_prot_flags(inode, fa); 850 revert_creds(old_cred); 851 852 return err; 853 } 854 855 static const struct inode_operations ovl_file_inode_operations = { 856 .setattr = ovl_setattr, 857 .permission = ovl_permission, 858 .getattr = ovl_getattr, 859 .listxattr = ovl_listxattr, 860 .get_inode_acl = ovl_get_inode_acl, 861 .get_acl = ovl_get_acl, 862 .set_acl = ovl_set_acl, 863 .update_time = ovl_update_time, 864 .fiemap = ovl_fiemap, 865 .fileattr_get = ovl_fileattr_get, 866 .fileattr_set = ovl_fileattr_set, 867 }; 868 869 static const struct inode_operations ovl_symlink_inode_operations = { 870 .setattr = ovl_setattr, 871 .get_link = ovl_get_link, 872 .getattr = ovl_getattr, 873 .listxattr = ovl_listxattr, 874 .update_time = ovl_update_time, 875 }; 876 877 static const struct inode_operations ovl_special_inode_operations = { 878 .setattr = ovl_setattr, 879 .permission = ovl_permission, 880 .getattr = ovl_getattr, 881 .listxattr = ovl_listxattr, 882 .get_inode_acl = ovl_get_inode_acl, 883 .get_acl = ovl_get_acl, 884 .set_acl = ovl_set_acl, 885 .update_time = ovl_update_time, 886 }; 887 888 static const struct address_space_operations ovl_aops = { 889 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 890 .direct_IO = noop_direct_IO, 891 }; 892 893 /* 894 * It is possible to stack overlayfs instance on top of another 895 * overlayfs instance as lower layer. We need to annotate the 896 * stackable i_mutex locks according to stack level of the super 897 * block instance. An overlayfs instance can never be in stack 898 * depth 0 (there is always a real fs below it). An overlayfs 899 * inode lock will use the lockdep annotation ovl_i_mutex_key[depth]. 900 * 901 * For example, here is a snip from /proc/lockdep_chains after 902 * dir_iterate of nested overlayfs: 903 * 904 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 905 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 906 * [...] &type->i_mutex_dir_key (stack_depth=0) 907 * 908 * Locking order w.r.t ovl_want_write() is important for nested overlayfs. 909 * 910 * This chain is valid: 911 * - inode->i_rwsem (inode_lock[2]) 912 * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) 913 * - OVL_I(inode)->lock (ovl_inode_lock[2]) 914 * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) 915 * 916 * And this chain is valid: 917 * - inode->i_rwsem (inode_lock[2]) 918 * - OVL_I(inode)->lock (ovl_inode_lock[2]) 919 * - lowerinode->i_rwsem (inode_lock[1]) 920 * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) 921 * 922 * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is 923 * held, because it is in reverse order of the non-nested case using the same 924 * upper fs: 925 * - inode->i_rwsem (inode_lock[1]) 926 * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) 927 * - OVL_I(inode)->lock (ovl_inode_lock[1]) 928 */ 929 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 930 931 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 932 { 933 #ifdef CONFIG_LOCKDEP 934 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 935 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 936 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 937 938 int depth = inode->i_sb->s_stack_depth - 1; 939 940 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 941 depth = 0; 942 943 if (S_ISDIR(inode->i_mode)) 944 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 945 else 946 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 947 948 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 949 #endif 950 } 951 952 static void ovl_next_ino(struct inode *inode) 953 { 954 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 955 956 inode->i_ino = atomic_long_inc_return(&ofs->last_ino); 957 if (unlikely(!inode->i_ino)) 958 inode->i_ino = atomic_long_inc_return(&ofs->last_ino); 959 } 960 961 static void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) 962 { 963 int xinobits = ovl_xino_bits(inode->i_sb); 964 unsigned int xinoshift = 64 - xinobits; 965 966 /* 967 * When d_ino is consistent with st_ino (samefs or i_ino has enough 968 * bits to encode layer), set the same value used for st_ino to i_ino, 969 * so inode number exposed via /proc/locks and a like will be 970 * consistent with d_ino and st_ino values. An i_ino value inconsistent 971 * with d_ino also causes nfsd readdirplus to fail. 972 */ 973 inode->i_ino = ino; 974 if (ovl_same_fs(inode->i_sb)) { 975 return; 976 } else if (xinobits && likely(!(ino >> xinoshift))) { 977 inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); 978 return; 979 } 980 981 /* 982 * For directory inodes on non-samefs with xino disabled or xino 983 * overflow, we allocate a non-persistent inode number, to be used for 984 * resolving st_ino collisions in ovl_map_dev_ino(). 985 * 986 * To avoid ino collision with legitimate xino values from upper 987 * layer (fsid 0), use the lowest xinobit to map the non 988 * persistent inode numbers to the unified st_ino address space. 989 */ 990 if (S_ISDIR(inode->i_mode)) { 991 ovl_next_ino(inode); 992 if (xinobits) { 993 inode->i_ino &= ~0UL >> xinobits; 994 inode->i_ino |= 1UL << xinoshift; 995 } 996 } 997 } 998 999 void ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, 1000 unsigned long ino, int fsid) 1001 { 1002 struct inode *realinode; 1003 struct ovl_inode *oi = OVL_I(inode); 1004 1005 oi->__upperdentry = oip->upperdentry; 1006 oi->oe = oip->oe; 1007 oi->redirect = oip->redirect; 1008 1009 realinode = ovl_inode_real(inode); 1010 ovl_copyattr(inode); 1011 ovl_copyflags(realinode, inode); 1012 ovl_map_ino(inode, ino, fsid); 1013 } 1014 1015 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) 1016 { 1017 inode->i_mode = mode; 1018 inode->i_flags |= S_NOCMTIME; 1019 #ifdef CONFIG_FS_POSIX_ACL 1020 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 1021 #endif 1022 1023 ovl_lockdep_annotate_inode_mutex_key(inode); 1024 1025 switch (mode & S_IFMT) { 1026 case S_IFREG: 1027 inode->i_op = &ovl_file_inode_operations; 1028 inode->i_fop = &ovl_file_operations; 1029 inode->i_mapping->a_ops = &ovl_aops; 1030 break; 1031 1032 case S_IFDIR: 1033 inode->i_op = &ovl_dir_inode_operations; 1034 inode->i_fop = &ovl_dir_operations; 1035 break; 1036 1037 case S_IFLNK: 1038 inode->i_op = &ovl_symlink_inode_operations; 1039 break; 1040 1041 default: 1042 inode->i_op = &ovl_special_inode_operations; 1043 init_special_inode(inode, mode, rdev); 1044 break; 1045 } 1046 } 1047 1048 /* 1049 * With inodes index enabled, an overlay inode nlink counts the union of upper 1050 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 1051 * upper inode, the following nlink modifying operations can happen: 1052 * 1053 * 1. Lower hardlink copy up 1054 * 2. Upper hardlink created, unlinked or renamed over 1055 * 3. Lower hardlink whiteout or renamed over 1056 * 1057 * For the first, copy up case, the union nlink does not change, whether the 1058 * operation succeeds or fails, but the upper inode nlink may change. 1059 * Therefore, before copy up, we store the union nlink value relative to the 1060 * lower inode nlink in the index inode xattr .overlay.nlink. 1061 * 1062 * For the second, upper hardlink case, the union nlink should be incremented 1063 * or decremented IFF the operation succeeds, aligned with nlink change of the 1064 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 1065 * value relative to the upper inode nlink in the index inode. 1066 * 1067 * For the last, lower cover up case, we simplify things by preceding the 1068 * whiteout or cover up with copy up. This makes sure that there is an index 1069 * upper inode where the nlink xattr can be stored before the copied up upper 1070 * entry is unlink. 1071 */ 1072 #define OVL_NLINK_ADD_UPPER (1 << 0) 1073 1074 /* 1075 * On-disk format for indexed nlink: 1076 * 1077 * nlink relative to the upper inode - "U[+-]NUM" 1078 * nlink relative to the lower inode - "L[+-]NUM" 1079 */ 1080 1081 static int ovl_set_nlink_common(struct dentry *dentry, 1082 struct dentry *realdentry, const char *format) 1083 { 1084 struct inode *inode = d_inode(dentry); 1085 struct inode *realinode = d_inode(realdentry); 1086 char buf[13]; 1087 int len; 1088 1089 len = snprintf(buf, sizeof(buf), format, 1090 (int) (inode->i_nlink - realinode->i_nlink)); 1091 1092 if (WARN_ON(len >= sizeof(buf))) 1093 return -EIO; 1094 1095 return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry), 1096 OVL_XATTR_NLINK, buf, len); 1097 } 1098 1099 int ovl_set_nlink_upper(struct dentry *dentry) 1100 { 1101 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 1102 } 1103 1104 int ovl_set_nlink_lower(struct dentry *dentry) 1105 { 1106 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 1107 } 1108 1109 unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, 1110 struct dentry *upperdentry, 1111 unsigned int fallback) 1112 { 1113 int nlink_diff; 1114 int nlink; 1115 char buf[13]; 1116 int err; 1117 1118 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 1119 return fallback; 1120 1121 err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK, 1122 &buf, sizeof(buf) - 1); 1123 if (err < 0) 1124 goto fail; 1125 1126 buf[err] = '\0'; 1127 if ((buf[0] != 'L' && buf[0] != 'U') || 1128 (buf[1] != '+' && buf[1] != '-')) 1129 goto fail; 1130 1131 err = kstrtoint(buf + 1, 10, &nlink_diff); 1132 if (err < 0) 1133 goto fail; 1134 1135 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 1136 nlink += nlink_diff; 1137 1138 if (nlink <= 0) 1139 goto fail; 1140 1141 return nlink; 1142 1143 fail: 1144 pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", 1145 upperdentry, err); 1146 return fallback; 1147 } 1148 1149 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 1150 { 1151 struct inode *inode; 1152 1153 inode = new_inode(sb); 1154 if (inode) 1155 ovl_fill_inode(inode, mode, rdev); 1156 1157 return inode; 1158 } 1159 1160 static int ovl_inode_test(struct inode *inode, void *data) 1161 { 1162 return inode->i_private == data; 1163 } 1164 1165 static int ovl_inode_set(struct inode *inode, void *data) 1166 { 1167 inode->i_private = data; 1168 return 0; 1169 } 1170 1171 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 1172 struct dentry *upperdentry, bool strict) 1173 { 1174 /* 1175 * For directories, @strict verify from lookup path performs consistency 1176 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 1177 * inode. Non @strict verify from NFS handle decode path passes NULL for 1178 * 'unknown' lower/upper. 1179 */ 1180 if (S_ISDIR(inode->i_mode) && strict) { 1181 /* Real lower dir moved to upper layer under us? */ 1182 if (!lowerdentry && ovl_inode_lower(inode)) 1183 return false; 1184 1185 /* Lookup of an uncovered redirect origin? */ 1186 if (!upperdentry && ovl_inode_upper(inode)) 1187 return false; 1188 } 1189 1190 /* 1191 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 1192 * This happens when finding a copied up overlay inode for a renamed 1193 * or hardlinked overlay dentry and lower dentry cannot be followed 1194 * by origin because lower fs does not support file handles. 1195 */ 1196 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 1197 return false; 1198 1199 /* 1200 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 1201 * This happens when finding a lower alias for a copied up hard link. 1202 */ 1203 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 1204 return false; 1205 1206 return true; 1207 } 1208 1209 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 1210 bool is_upper) 1211 { 1212 struct inode *inode, *key = d_inode(real); 1213 1214 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 1215 if (!inode) 1216 return NULL; 1217 1218 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 1219 is_upper ? real : NULL, false)) { 1220 iput(inode); 1221 return ERR_PTR(-ESTALE); 1222 } 1223 1224 return inode; 1225 } 1226 1227 bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 1228 { 1229 struct inode *key = d_inode(dir); 1230 struct inode *trap; 1231 bool res; 1232 1233 trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 1234 if (!trap) 1235 return false; 1236 1237 res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 1238 !ovl_inode_lower(trap); 1239 1240 iput(trap); 1241 return res; 1242 } 1243 1244 /* 1245 * Create an inode cache entry for layer root dir, that will intentionally 1246 * fail ovl_verify_inode(), so any lookup that will find some layer root 1247 * will fail. 1248 */ 1249 struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 1250 { 1251 struct inode *key = d_inode(dir); 1252 struct inode *trap; 1253 1254 if (!d_is_dir(dir)) 1255 return ERR_PTR(-ENOTDIR); 1256 1257 trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 1258 ovl_inode_set, key); 1259 if (!trap) 1260 return ERR_PTR(-ENOMEM); 1261 1262 if (!(trap->i_state & I_NEW)) { 1263 /* Conflicting layer roots? */ 1264 iput(trap); 1265 return ERR_PTR(-ELOOP); 1266 } 1267 1268 trap->i_mode = S_IFDIR; 1269 trap->i_flags = S_DEAD; 1270 unlock_new_inode(trap); 1271 1272 return trap; 1273 } 1274 1275 /* 1276 * Does overlay inode need to be hashed by lower inode? 1277 */ 1278 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 1279 struct dentry *lower, bool index) 1280 { 1281 struct ovl_fs *ofs = sb->s_fs_info; 1282 1283 /* No, if pure upper */ 1284 if (!lower) 1285 return false; 1286 1287 /* Yes, if already indexed */ 1288 if (index) 1289 return true; 1290 1291 /* Yes, if won't be copied up */ 1292 if (!ovl_upper_mnt(ofs)) 1293 return true; 1294 1295 /* No, if lower hardlink is or will be broken on copy up */ 1296 if ((upper || !ovl_indexdir(sb)) && 1297 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 1298 return false; 1299 1300 /* No, if non-indexed upper with NFS export */ 1301 if (sb->s_export_op && upper) 1302 return false; 1303 1304 /* Otherwise, hash by lower inode for fsnotify */ 1305 return true; 1306 } 1307 1308 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 1309 struct inode *key) 1310 { 1311 return newinode ? inode_insert5(newinode, (unsigned long) key, 1312 ovl_inode_test, ovl_inode_set, key) : 1313 iget5_locked(sb, (unsigned long) key, 1314 ovl_inode_test, ovl_inode_set, key); 1315 } 1316 1317 struct inode *ovl_get_inode(struct super_block *sb, 1318 struct ovl_inode_params *oip) 1319 { 1320 struct ovl_fs *ofs = OVL_FS(sb); 1321 struct dentry *upperdentry = oip->upperdentry; 1322 struct ovl_path *lowerpath = ovl_lowerpath(oip->oe); 1323 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 1324 struct inode *inode; 1325 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 1326 struct path realpath = { 1327 .dentry = upperdentry ?: lowerdentry, 1328 .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt, 1329 }; 1330 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 1331 oip->index); 1332 int fsid = bylower ? lowerpath->layer->fsid : 0; 1333 bool is_dir; 1334 unsigned long ino = 0; 1335 int err = oip->newinode ? -EEXIST : -ENOMEM; 1336 1337 if (!realinode) 1338 realinode = d_inode(lowerdentry); 1339 1340 /* 1341 * Copy up origin (lower) may exist for non-indexed upper, but we must 1342 * not use lower as hash key if this is a broken hardlink. 1343 */ 1344 is_dir = S_ISDIR(realinode->i_mode); 1345 if (upperdentry || bylower) { 1346 struct inode *key = d_inode(bylower ? lowerdentry : 1347 upperdentry); 1348 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 1349 1350 inode = ovl_iget5(sb, oip->newinode, key); 1351 if (!inode) 1352 goto out_err; 1353 if (!(inode->i_state & I_NEW)) { 1354 /* 1355 * Verify that the underlying files stored in the inode 1356 * match those in the dentry. 1357 */ 1358 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 1359 true)) { 1360 iput(inode); 1361 err = -ESTALE; 1362 goto out_err; 1363 } 1364 1365 dput(upperdentry); 1366 ovl_free_entry(oip->oe); 1367 kfree(oip->redirect); 1368 goto out; 1369 } 1370 1371 /* Recalculate nlink for non-dir due to indexing */ 1372 if (!is_dir) 1373 nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry, 1374 nlink); 1375 set_nlink(inode, nlink); 1376 ino = key->i_ino; 1377 } else { 1378 /* Lower hardlink that will be broken on copy up */ 1379 inode = new_inode(sb); 1380 if (!inode) { 1381 err = -ENOMEM; 1382 goto out_err; 1383 } 1384 ino = realinode->i_ino; 1385 fsid = lowerpath->layer->fsid; 1386 } 1387 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); 1388 ovl_inode_init(inode, oip, ino, fsid); 1389 1390 if (upperdentry && ovl_is_impuredir(sb, upperdentry)) 1391 ovl_set_flag(OVL_IMPURE, inode); 1392 1393 if (oip->index) 1394 ovl_set_flag(OVL_INDEX, inode); 1395 1396 if (bylower) 1397 ovl_set_flag(OVL_CONST_INO, inode); 1398 1399 /* Check for non-merge dir that may have whiteouts */ 1400 if (is_dir) { 1401 if (((upperdentry && lowerdentry) || ovl_numlower(oip->oe) > 1) || 1402 ovl_path_check_origin_xattr(ofs, &realpath)) { 1403 ovl_set_flag(OVL_WHITEOUTS, inode); 1404 } 1405 } 1406 1407 /* Check for immutable/append-only inode flags in xattr */ 1408 if (upperdentry) 1409 ovl_check_protattr(inode, upperdentry); 1410 1411 if (inode->i_state & I_NEW) 1412 unlock_new_inode(inode); 1413 out: 1414 return inode; 1415 1416 out_err: 1417 pr_warn_ratelimited("failed to get inode (%i)\n", err); 1418 inode = ERR_PTR(err); 1419 goto out; 1420 } 1421