1 /* 2 * 3 * Copyright (C) 2011 Novell Inc. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/slab.h> 12 #include <linux/cred.h> 13 #include <linux/xattr.h> 14 #include <linux/posix_acl.h> 15 #include <linux/ratelimit.h> 16 #include "overlayfs.h" 17 18 19 int ovl_setattr(struct dentry *dentry, struct iattr *attr) 20 { 21 int err; 22 bool full_copy_up = false; 23 struct dentry *upperdentry; 24 const struct cred *old_cred; 25 26 err = setattr_prepare(dentry, attr); 27 if (err) 28 return err; 29 30 err = ovl_want_write(dentry); 31 if (err) 32 goto out; 33 34 if (attr->ia_valid & ATTR_SIZE) { 35 struct inode *realinode = d_inode(ovl_dentry_real(dentry)); 36 37 err = -ETXTBSY; 38 if (atomic_read(&realinode->i_writecount) < 0) 39 goto out_drop_write; 40 41 /* Truncate should trigger data copy up as well */ 42 full_copy_up = true; 43 } 44 45 if (!full_copy_up) 46 err = ovl_copy_up(dentry); 47 else 48 err = ovl_copy_up_with_data(dentry); 49 if (!err) { 50 struct inode *winode = NULL; 51 52 upperdentry = ovl_dentry_upper(dentry); 53 54 if (attr->ia_valid & ATTR_SIZE) { 55 winode = d_inode(upperdentry); 56 err = get_write_access(winode); 57 if (err) 58 goto out_drop_write; 59 } 60 61 if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 62 attr->ia_valid &= ~ATTR_MODE; 63 64 inode_lock(upperdentry->d_inode); 65 old_cred = ovl_override_creds(dentry->d_sb); 66 err = notify_change(upperdentry, attr, NULL); 67 revert_creds(old_cred); 68 if (!err) 69 ovl_copyattr(upperdentry->d_inode, dentry->d_inode); 70 inode_unlock(upperdentry->d_inode); 71 72 if (winode) 73 put_write_access(winode); 74 } 75 out_drop_write: 76 ovl_drop_write(dentry); 77 out: 78 return err; 79 } 80 81 static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, 82 struct ovl_layer *lower_layer) 83 { 84 bool samefs = ovl_same_sb(dentry->d_sb); 85 unsigned int xinobits = ovl_xino_bits(dentry->d_sb); 86 87 if (samefs) { 88 /* 89 * When all layers are on the same fs, all real inode 90 * number are unique, so we use the overlay st_dev, 91 * which is friendly to du -x. 92 */ 93 stat->dev = dentry->d_sb->s_dev; 94 return 0; 95 } else if (xinobits) { 96 unsigned int shift = 64 - xinobits; 97 /* 98 * All inode numbers of underlying fs should not be using the 99 * high xinobits, so we use high xinobits to partition the 100 * overlay st_ino address space. The high bits holds the fsid 101 * (upper fsid is 0). This way overlay inode numbers are unique 102 * and all inodes use overlay st_dev. Inode numbers are also 103 * persistent for a given layer configuration. 104 */ 105 if (stat->ino >> shift) { 106 pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 107 dentry, stat->ino, xinobits); 108 } else { 109 if (lower_layer) 110 stat->ino |= ((u64)lower_layer->fsid) << shift; 111 112 stat->dev = dentry->d_sb->s_dev; 113 return 0; 114 } 115 } 116 117 /* The inode could not be mapped to a unified st_ino address space */ 118 if (S_ISDIR(dentry->d_inode->i_mode)) { 119 /* 120 * Always use the overlay st_dev for directories, so 'find 121 * -xdev' will scan the entire overlay mount and won't cross the 122 * overlay mount boundaries. 123 * 124 * If not all layers are on the same fs the pair {real st_ino; 125 * overlay st_dev} is not unique, so use the non persistent 126 * overlay st_ino for directories. 127 */ 128 stat->dev = dentry->d_sb->s_dev; 129 stat->ino = dentry->d_inode->i_ino; 130 } else if (lower_layer && lower_layer->fsid) { 131 /* 132 * For non-samefs setup, if we cannot map all layers st_ino 133 * to a unified address space, we need to make sure that st_dev 134 * is unique per lower fs. Upper layer uses real st_dev and 135 * lower layers use the unique anonymous bdev assigned to the 136 * lower fs. 137 */ 138 stat->dev = lower_layer->fs->pseudo_dev; 139 } 140 141 return 0; 142 } 143 144 int ovl_getattr(const struct path *path, struct kstat *stat, 145 u32 request_mask, unsigned int flags) 146 { 147 struct dentry *dentry = path->dentry; 148 enum ovl_path_type type; 149 struct path realpath; 150 const struct cred *old_cred; 151 bool is_dir = S_ISDIR(dentry->d_inode->i_mode); 152 bool samefs = ovl_same_sb(dentry->d_sb); 153 struct ovl_layer *lower_layer = NULL; 154 int err; 155 bool metacopy_blocks = false; 156 157 metacopy_blocks = ovl_is_metacopy_dentry(dentry); 158 159 type = ovl_path_real(dentry, &realpath); 160 old_cred = ovl_override_creds(dentry->d_sb); 161 err = vfs_getattr(&realpath, stat, request_mask, flags); 162 if (err) 163 goto out; 164 165 /* 166 * For non-dir or same fs, we use st_ino of the copy up origin. 167 * This guaranties constant st_dev/st_ino across copy up. 168 * With xino feature and non-samefs, we use st_ino of the copy up 169 * origin masked with high bits that represent the layer id. 170 * 171 * If lower filesystem supports NFS file handles, this also guaranties 172 * persistent st_ino across mount cycle. 173 */ 174 if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) { 175 if (!OVL_TYPE_UPPER(type)) { 176 lower_layer = ovl_layer_lower(dentry); 177 } else if (OVL_TYPE_ORIGIN(type)) { 178 struct kstat lowerstat; 179 u32 lowermask = STATX_INO | STATX_BLOCKS | 180 (!is_dir ? STATX_NLINK : 0); 181 182 ovl_path_lower(dentry, &realpath); 183 err = vfs_getattr(&realpath, &lowerstat, 184 lowermask, flags); 185 if (err) 186 goto out; 187 188 /* 189 * Lower hardlinks may be broken on copy up to different 190 * upper files, so we cannot use the lower origin st_ino 191 * for those different files, even for the same fs case. 192 * 193 * Similarly, several redirected dirs can point to the 194 * same dir on a lower layer. With the "verify_lower" 195 * feature, we do not use the lower origin st_ino, if 196 * we haven't verified that this redirect is unique. 197 * 198 * With inodes index enabled, it is safe to use st_ino 199 * of an indexed origin. The index validates that the 200 * upper hardlink is not broken and that a redirected 201 * dir is the only redirect to that origin. 202 */ 203 if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 204 (!ovl_verify_lower(dentry->d_sb) && 205 (is_dir || lowerstat.nlink == 1))) { 206 stat->ino = lowerstat.ino; 207 lower_layer = ovl_layer_lower(dentry); 208 } 209 210 /* 211 * If we are querying a metacopy dentry and lower 212 * dentry is data dentry, then use the blocks we 213 * queried just now. We don't have to do additional 214 * vfs_getattr(). If lower itself is metacopy, then 215 * additional vfs_getattr() is unavoidable. 216 */ 217 if (metacopy_blocks && 218 realpath.dentry == ovl_dentry_lowerdata(dentry)) { 219 stat->blocks = lowerstat.blocks; 220 metacopy_blocks = false; 221 } 222 } 223 224 if (metacopy_blocks) { 225 /* 226 * If lower is not same as lowerdata or if there was 227 * no origin on upper, we can end up here. 228 */ 229 struct kstat lowerdatastat; 230 u32 lowermask = STATX_BLOCKS; 231 232 ovl_path_lowerdata(dentry, &realpath); 233 err = vfs_getattr(&realpath, &lowerdatastat, 234 lowermask, flags); 235 if (err) 236 goto out; 237 stat->blocks = lowerdatastat.blocks; 238 } 239 } 240 241 err = ovl_map_dev_ino(dentry, stat, lower_layer); 242 if (err) 243 goto out; 244 245 /* 246 * It's probably not worth it to count subdirs to get the 247 * correct link count. nlink=1 seems to pacify 'find' and 248 * other utilities. 249 */ 250 if (is_dir && OVL_TYPE_MERGE(type)) 251 stat->nlink = 1; 252 253 /* 254 * Return the overlay inode nlinks for indexed upper inodes. 255 * Overlay inode nlink counts the union of the upper hardlinks 256 * and non-covered lower hardlinks. It does not include the upper 257 * index hardlink. 258 */ 259 if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 260 stat->nlink = dentry->d_inode->i_nlink; 261 262 out: 263 revert_creds(old_cred); 264 265 return err; 266 } 267 268 int ovl_permission(struct inode *inode, int mask) 269 { 270 struct inode *upperinode = ovl_inode_upper(inode); 271 struct inode *realinode = upperinode ?: ovl_inode_lower(inode); 272 const struct cred *old_cred; 273 int err; 274 275 /* Careful in RCU walk mode */ 276 if (!realinode) { 277 WARN_ON(!(mask & MAY_NOT_BLOCK)); 278 return -ECHILD; 279 } 280 281 /* 282 * Check overlay inode with the creds of task and underlying inode 283 * with creds of mounter 284 */ 285 err = generic_permission(inode, mask); 286 if (err) 287 return err; 288 289 old_cred = ovl_override_creds(inode->i_sb); 290 if (!upperinode && 291 !special_file(realinode->i_mode) && mask & MAY_WRITE) { 292 mask &= ~(MAY_WRITE | MAY_APPEND); 293 /* Make sure mounter can read file for copy up later */ 294 mask |= MAY_READ; 295 } 296 err = inode_permission(realinode, mask); 297 revert_creds(old_cred); 298 299 return err; 300 } 301 302 static const char *ovl_get_link(struct dentry *dentry, 303 struct inode *inode, 304 struct delayed_call *done) 305 { 306 const struct cred *old_cred; 307 const char *p; 308 309 if (!dentry) 310 return ERR_PTR(-ECHILD); 311 312 old_cred = ovl_override_creds(dentry->d_sb); 313 p = vfs_get_link(ovl_dentry_real(dentry), done); 314 revert_creds(old_cred); 315 return p; 316 } 317 318 bool ovl_is_private_xattr(const char *name) 319 { 320 return strncmp(name, OVL_XATTR_PREFIX, 321 sizeof(OVL_XATTR_PREFIX) - 1) == 0; 322 } 323 324 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 325 const void *value, size_t size, int flags) 326 { 327 int err; 328 struct dentry *upperdentry = ovl_i_dentry_upper(inode); 329 struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 330 const struct cred *old_cred; 331 332 err = ovl_want_write(dentry); 333 if (err) 334 goto out; 335 336 if (!value && !upperdentry) { 337 err = vfs_getxattr(realdentry, name, NULL, 0); 338 if (err < 0) 339 goto out_drop_write; 340 } 341 342 if (!upperdentry) { 343 err = ovl_copy_up(dentry); 344 if (err) 345 goto out_drop_write; 346 347 realdentry = ovl_dentry_upper(dentry); 348 } 349 350 old_cred = ovl_override_creds(dentry->d_sb); 351 if (value) 352 err = vfs_setxattr(realdentry, name, value, size, flags); 353 else { 354 WARN_ON(flags != XATTR_REPLACE); 355 err = vfs_removexattr(realdentry, name); 356 } 357 revert_creds(old_cred); 358 359 /* copy c/mtime */ 360 ovl_copyattr(d_inode(realdentry), inode); 361 362 out_drop_write: 363 ovl_drop_write(dentry); 364 out: 365 return err; 366 } 367 368 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 369 void *value, size_t size) 370 { 371 ssize_t res; 372 const struct cred *old_cred; 373 struct dentry *realdentry = 374 ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); 375 376 old_cred = ovl_override_creds(dentry->d_sb); 377 res = vfs_getxattr(realdentry, name, value, size); 378 revert_creds(old_cred); 379 return res; 380 } 381 382 static bool ovl_can_list(const char *s) 383 { 384 /* List all non-trusted xatts */ 385 if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 386 return true; 387 388 /* Never list trusted.overlay, list other trusted for superuser only */ 389 return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); 390 } 391 392 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 393 { 394 struct dentry *realdentry = ovl_dentry_real(dentry); 395 ssize_t res; 396 size_t len; 397 char *s; 398 const struct cred *old_cred; 399 400 old_cred = ovl_override_creds(dentry->d_sb); 401 res = vfs_listxattr(realdentry, list, size); 402 revert_creds(old_cred); 403 if (res <= 0 || size == 0) 404 return res; 405 406 /* filter out private xattrs */ 407 for (s = list, len = res; len;) { 408 size_t slen = strnlen(s, len) + 1; 409 410 /* underlying fs providing us with an broken xattr list? */ 411 if (WARN_ON(slen > len)) 412 return -EIO; 413 414 len -= slen; 415 if (!ovl_can_list(s)) { 416 res -= slen; 417 memmove(s, s + slen, len); 418 } else { 419 s += slen; 420 } 421 } 422 423 return res; 424 } 425 426 struct posix_acl *ovl_get_acl(struct inode *inode, int type) 427 { 428 struct inode *realinode = ovl_inode_real(inode); 429 const struct cred *old_cred; 430 struct posix_acl *acl; 431 432 if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) 433 return NULL; 434 435 old_cred = ovl_override_creds(inode->i_sb); 436 acl = get_acl(realinode, type); 437 revert_creds(old_cred); 438 439 return acl; 440 } 441 442 int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags) 443 { 444 if (flags & S_ATIME) { 445 struct ovl_fs *ofs = inode->i_sb->s_fs_info; 446 struct path upperpath = { 447 .mnt = ofs->upper_mnt, 448 .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 449 }; 450 451 if (upperpath.dentry) { 452 touch_atime(&upperpath); 453 inode->i_atime = d_inode(upperpath.dentry)->i_atime; 454 } 455 } 456 return 0; 457 } 458 459 static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 460 u64 start, u64 len) 461 { 462 int err; 463 struct inode *realinode = ovl_inode_real(inode); 464 const struct cred *old_cred; 465 466 if (!realinode->i_op->fiemap) 467 return -EOPNOTSUPP; 468 469 old_cred = ovl_override_creds(inode->i_sb); 470 err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 471 revert_creds(old_cred); 472 473 return err; 474 } 475 476 static const struct inode_operations ovl_file_inode_operations = { 477 .setattr = ovl_setattr, 478 .permission = ovl_permission, 479 .getattr = ovl_getattr, 480 .listxattr = ovl_listxattr, 481 .get_acl = ovl_get_acl, 482 .update_time = ovl_update_time, 483 .fiemap = ovl_fiemap, 484 }; 485 486 static const struct inode_operations ovl_symlink_inode_operations = { 487 .setattr = ovl_setattr, 488 .get_link = ovl_get_link, 489 .getattr = ovl_getattr, 490 .listxattr = ovl_listxattr, 491 .update_time = ovl_update_time, 492 }; 493 494 static const struct inode_operations ovl_special_inode_operations = { 495 .setattr = ovl_setattr, 496 .permission = ovl_permission, 497 .getattr = ovl_getattr, 498 .listxattr = ovl_listxattr, 499 .get_acl = ovl_get_acl, 500 .update_time = ovl_update_time, 501 }; 502 503 /* 504 * It is possible to stack overlayfs instance on top of another 505 * overlayfs instance as lower layer. We need to annonate the 506 * stackable i_mutex locks according to stack level of the super 507 * block instance. An overlayfs instance can never be in stack 508 * depth 0 (there is always a real fs below it). An overlayfs 509 * inode lock will use the lockdep annotaion ovl_i_mutex_key[depth]. 510 * 511 * For example, here is a snip from /proc/lockdep_chains after 512 * dir_iterate of nested overlayfs: 513 * 514 * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 515 * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 516 * [...] &type->i_mutex_dir_key (stack_depth=0) 517 */ 518 #define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 519 520 static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 521 { 522 #ifdef CONFIG_LOCKDEP 523 static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 524 static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 525 static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 526 527 int depth = inode->i_sb->s_stack_depth - 1; 528 529 if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 530 depth = 0; 531 532 if (S_ISDIR(inode->i_mode)) 533 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 534 else 535 lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 536 537 lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 538 #endif 539 } 540 541 static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev, 542 unsigned long ino, int fsid) 543 { 544 int xinobits = ovl_xino_bits(inode->i_sb); 545 546 /* 547 * When NFS export is enabled and d_ino is consistent with st_ino 548 * (samefs or i_ino has enough bits to encode layer), set the same 549 * value used for d_ino to i_ino, because nfsd readdirplus compares 550 * d_ino values to i_ino values of child entries. When called from 551 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real 552 * upper inode i_ino on ovl_inode_init() or ovl_inode_update(). 553 */ 554 if (inode->i_sb->s_export_op && 555 (ovl_same_sb(inode->i_sb) || xinobits)) { 556 inode->i_ino = ino; 557 if (xinobits && fsid && !(ino >> (64 - xinobits))) 558 inode->i_ino |= (unsigned long)fsid << (64 - xinobits); 559 } else { 560 inode->i_ino = get_next_ino(); 561 } 562 inode->i_mode = mode; 563 inode->i_flags |= S_NOCMTIME; 564 #ifdef CONFIG_FS_POSIX_ACL 565 inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 566 #endif 567 568 ovl_lockdep_annotate_inode_mutex_key(inode); 569 570 switch (mode & S_IFMT) { 571 case S_IFREG: 572 inode->i_op = &ovl_file_inode_operations; 573 inode->i_fop = &ovl_file_operations; 574 break; 575 576 case S_IFDIR: 577 inode->i_op = &ovl_dir_inode_operations; 578 inode->i_fop = &ovl_dir_operations; 579 break; 580 581 case S_IFLNK: 582 inode->i_op = &ovl_symlink_inode_operations; 583 break; 584 585 default: 586 inode->i_op = &ovl_special_inode_operations; 587 init_special_inode(inode, mode, rdev); 588 break; 589 } 590 } 591 592 /* 593 * With inodes index enabled, an overlay inode nlink counts the union of upper 594 * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 595 * upper inode, the following nlink modifying operations can happen: 596 * 597 * 1. Lower hardlink copy up 598 * 2. Upper hardlink created, unlinked or renamed over 599 * 3. Lower hardlink whiteout or renamed over 600 * 601 * For the first, copy up case, the union nlink does not change, whether the 602 * operation succeeds or fails, but the upper inode nlink may change. 603 * Therefore, before copy up, we store the union nlink value relative to the 604 * lower inode nlink in the index inode xattr trusted.overlay.nlink. 605 * 606 * For the second, upper hardlink case, the union nlink should be incremented 607 * or decremented IFF the operation succeeds, aligned with nlink change of the 608 * upper inode. Therefore, before link/unlink/rename, we store the union nlink 609 * value relative to the upper inode nlink in the index inode. 610 * 611 * For the last, lower cover up case, we simplify things by preceding the 612 * whiteout or cover up with copy up. This makes sure that there is an index 613 * upper inode where the nlink xattr can be stored before the copied up upper 614 * entry is unlink. 615 */ 616 #define OVL_NLINK_ADD_UPPER (1 << 0) 617 618 /* 619 * On-disk format for indexed nlink: 620 * 621 * nlink relative to the upper inode - "U[+-]NUM" 622 * nlink relative to the lower inode - "L[+-]NUM" 623 */ 624 625 static int ovl_set_nlink_common(struct dentry *dentry, 626 struct dentry *realdentry, const char *format) 627 { 628 struct inode *inode = d_inode(dentry); 629 struct inode *realinode = d_inode(realdentry); 630 char buf[13]; 631 int len; 632 633 len = snprintf(buf, sizeof(buf), format, 634 (int) (inode->i_nlink - realinode->i_nlink)); 635 636 if (WARN_ON(len >= sizeof(buf))) 637 return -EIO; 638 639 return ovl_do_setxattr(ovl_dentry_upper(dentry), 640 OVL_XATTR_NLINK, buf, len, 0); 641 } 642 643 int ovl_set_nlink_upper(struct dentry *dentry) 644 { 645 return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 646 } 647 648 int ovl_set_nlink_lower(struct dentry *dentry) 649 { 650 return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 651 } 652 653 unsigned int ovl_get_nlink(struct dentry *lowerdentry, 654 struct dentry *upperdentry, 655 unsigned int fallback) 656 { 657 int nlink_diff; 658 int nlink; 659 char buf[13]; 660 int err; 661 662 if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 663 return fallback; 664 665 err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); 666 if (err < 0) 667 goto fail; 668 669 buf[err] = '\0'; 670 if ((buf[0] != 'L' && buf[0] != 'U') || 671 (buf[1] != '+' && buf[1] != '-')) 672 goto fail; 673 674 err = kstrtoint(buf + 1, 10, &nlink_diff); 675 if (err < 0) 676 goto fail; 677 678 nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 679 nlink += nlink_diff; 680 681 if (nlink <= 0) 682 goto fail; 683 684 return nlink; 685 686 fail: 687 pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", 688 upperdentry, err); 689 return fallback; 690 } 691 692 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 693 { 694 struct inode *inode; 695 696 inode = new_inode(sb); 697 if (inode) 698 ovl_fill_inode(inode, mode, rdev, 0, 0); 699 700 return inode; 701 } 702 703 static int ovl_inode_test(struct inode *inode, void *data) 704 { 705 return inode->i_private == data; 706 } 707 708 static int ovl_inode_set(struct inode *inode, void *data) 709 { 710 inode->i_private = data; 711 return 0; 712 } 713 714 static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 715 struct dentry *upperdentry, bool strict) 716 { 717 /* 718 * For directories, @strict verify from lookup path performs consistency 719 * checks, so NULL lower/upper in dentry must match NULL lower/upper in 720 * inode. Non @strict verify from NFS handle decode path passes NULL for 721 * 'unknown' lower/upper. 722 */ 723 if (S_ISDIR(inode->i_mode) && strict) { 724 /* Real lower dir moved to upper layer under us? */ 725 if (!lowerdentry && ovl_inode_lower(inode)) 726 return false; 727 728 /* Lookup of an uncovered redirect origin? */ 729 if (!upperdentry && ovl_inode_upper(inode)) 730 return false; 731 } 732 733 /* 734 * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 735 * This happens when finding a copied up overlay inode for a renamed 736 * or hardlinked overlay dentry and lower dentry cannot be followed 737 * by origin because lower fs does not support file handles. 738 */ 739 if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 740 return false; 741 742 /* 743 * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 744 * This happens when finding a lower alias for a copied up hard link. 745 */ 746 if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 747 return false; 748 749 return true; 750 } 751 752 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 753 bool is_upper) 754 { 755 struct inode *inode, *key = d_inode(real); 756 757 inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 758 if (!inode) 759 return NULL; 760 761 if (!ovl_verify_inode(inode, is_upper ? NULL : real, 762 is_upper ? real : NULL, false)) { 763 iput(inode); 764 return ERR_PTR(-ESTALE); 765 } 766 767 return inode; 768 } 769 770 /* 771 * Does overlay inode need to be hashed by lower inode? 772 */ 773 static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 774 struct dentry *lower, struct dentry *index) 775 { 776 struct ovl_fs *ofs = sb->s_fs_info; 777 778 /* No, if pure upper */ 779 if (!lower) 780 return false; 781 782 /* Yes, if already indexed */ 783 if (index) 784 return true; 785 786 /* Yes, if won't be copied up */ 787 if (!ofs->upper_mnt) 788 return true; 789 790 /* No, if lower hardlink is or will be broken on copy up */ 791 if ((upper || !ovl_indexdir(sb)) && 792 !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 793 return false; 794 795 /* No, if non-indexed upper with NFS export */ 796 if (sb->s_export_op && upper) 797 return false; 798 799 /* Otherwise, hash by lower inode for fsnotify */ 800 return true; 801 } 802 803 static struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 804 struct inode *key) 805 { 806 return newinode ? inode_insert5(newinode, (unsigned long) key, 807 ovl_inode_test, ovl_inode_set, key) : 808 iget5_locked(sb, (unsigned long) key, 809 ovl_inode_test, ovl_inode_set, key); 810 } 811 812 struct inode *ovl_get_inode(struct super_block *sb, 813 struct ovl_inode_params *oip) 814 { 815 struct dentry *upperdentry = oip->upperdentry; 816 struct ovl_path *lowerpath = oip->lowerpath; 817 struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 818 struct inode *inode; 819 struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 820 bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 821 oip->index); 822 int fsid = bylower ? oip->lowerpath->layer->fsid : 0; 823 bool is_dir, metacopy = false; 824 unsigned long ino = 0; 825 int err = -ENOMEM; 826 827 if (!realinode) 828 realinode = d_inode(lowerdentry); 829 830 /* 831 * Copy up origin (lower) may exist for non-indexed upper, but we must 832 * not use lower as hash key if this is a broken hardlink. 833 */ 834 is_dir = S_ISDIR(realinode->i_mode); 835 if (upperdentry || bylower) { 836 struct inode *key = d_inode(bylower ? lowerdentry : 837 upperdentry); 838 unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 839 840 inode = ovl_iget5(sb, oip->newinode, key); 841 if (!inode) 842 goto out_err; 843 if (!(inode->i_state & I_NEW)) { 844 /* 845 * Verify that the underlying files stored in the inode 846 * match those in the dentry. 847 */ 848 if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 849 true)) { 850 iput(inode); 851 err = -ESTALE; 852 goto out_err; 853 } 854 855 dput(upperdentry); 856 kfree(oip->redirect); 857 goto out; 858 } 859 860 /* Recalculate nlink for non-dir due to indexing */ 861 if (!is_dir) 862 nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink); 863 set_nlink(inode, nlink); 864 ino = key->i_ino; 865 } else { 866 /* Lower hardlink that will be broken on copy up */ 867 inode = new_inode(sb); 868 if (!inode) { 869 err = -ENOMEM; 870 goto out_err; 871 } 872 } 873 ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid); 874 ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata); 875 876 if (upperdentry && ovl_is_impuredir(upperdentry)) 877 ovl_set_flag(OVL_IMPURE, inode); 878 879 if (oip->index) 880 ovl_set_flag(OVL_INDEX, inode); 881 882 if (upperdentry) { 883 err = ovl_check_metacopy_xattr(upperdentry); 884 if (err < 0) 885 goto out_err; 886 metacopy = err; 887 if (!metacopy) 888 ovl_set_flag(OVL_UPPERDATA, inode); 889 } 890 891 OVL_I(inode)->redirect = oip->redirect; 892 893 if (bylower) 894 ovl_set_flag(OVL_CONST_INO, inode); 895 896 /* Check for non-merge dir that may have whiteouts */ 897 if (is_dir) { 898 if (((upperdentry && lowerdentry) || oip->numlower > 1) || 899 ovl_check_origin_xattr(upperdentry ?: lowerdentry)) { 900 ovl_set_flag(OVL_WHITEOUTS, inode); 901 } 902 } 903 904 if (inode->i_state & I_NEW) 905 unlock_new_inode(inode); 906 out: 907 return inode; 908 909 out_err: 910 inode = ERR_PTR(err); 911 goto out; 912 } 913