1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/libfs.c 4 * Library for filesystems writers. 5 */ 6 7 #include <linux/blkdev.h> 8 #include <linux/export.h> 9 #include <linux/pagemap.h> 10 #include <linux/slab.h> 11 #include <linux/cred.h> 12 #include <linux/mount.h> 13 #include <linux/vfs.h> 14 #include <linux/quotaops.h> 15 #include <linux/mutex.h> 16 #include <linux/namei.h> 17 #include <linux/exportfs.h> 18 #include <linux/iversion.h> 19 #include <linux/writeback.h> 20 #include <linux/buffer_head.h> /* sync_mapping_buffers */ 21 #include <linux/fs_context.h> 22 #include <linux/pseudo_fs.h> 23 #include <linux/fsnotify.h> 24 #include <linux/unicode.h> 25 #include <linux/fscrypt.h> 26 27 #include <linux/uaccess.h> 28 29 #include "internal.h" 30 31 int simple_getattr(struct user_namespace *mnt_userns, const struct path *path, 32 struct kstat *stat, u32 request_mask, 33 unsigned int query_flags) 34 { 35 struct inode *inode = d_inode(path->dentry); 36 generic_fillattr(&init_user_ns, inode, stat); 37 stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); 38 return 0; 39 } 40 EXPORT_SYMBOL(simple_getattr); 41 42 int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 43 { 44 buf->f_type = dentry->d_sb->s_magic; 45 buf->f_bsize = PAGE_SIZE; 46 buf->f_namelen = NAME_MAX; 47 return 0; 48 } 49 EXPORT_SYMBOL(simple_statfs); 50 51 /* 52 * Retaining negative dentries for an in-memory filesystem just wastes 53 * memory and lookup time: arrange for them to be deleted immediately. 54 */ 55 int always_delete_dentry(const struct dentry *dentry) 56 { 57 return 1; 58 } 59 EXPORT_SYMBOL(always_delete_dentry); 60 61 const struct dentry_operations simple_dentry_operations = { 62 .d_delete = always_delete_dentry, 63 }; 64 EXPORT_SYMBOL(simple_dentry_operations); 65 66 /* 67 * Lookup the data. This is trivial - if the dentry didn't already 68 * exist, we know it is negative. Set d_op to delete negative dentries. 69 */ 70 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 71 { 72 if (dentry->d_name.len > NAME_MAX) 73 return ERR_PTR(-ENAMETOOLONG); 74 if (!dentry->d_sb->s_d_op) 75 d_set_d_op(dentry, &simple_dentry_operations); 76 d_add(dentry, NULL); 77 return NULL; 78 } 79 EXPORT_SYMBOL(simple_lookup); 80 81 int dcache_dir_open(struct inode *inode, struct file *file) 82 { 83 file->private_data = d_alloc_cursor(file->f_path.dentry); 84 85 return file->private_data ? 0 : -ENOMEM; 86 } 87 EXPORT_SYMBOL(dcache_dir_open); 88 89 int dcache_dir_close(struct inode *inode, struct file *file) 90 { 91 dput(file->private_data); 92 return 0; 93 } 94 EXPORT_SYMBOL(dcache_dir_close); 95 96 /* parent is locked at least shared */ 97 /* 98 * Returns an element of siblings' list. 99 * We are looking for <count>th positive after <p>; if 100 * found, dentry is grabbed and returned to caller. 101 * If no such element exists, NULL is returned. 102 */ 103 static struct dentry *scan_positives(struct dentry *cursor, 104 struct list_head *p, 105 loff_t count, 106 struct dentry *last) 107 { 108 struct dentry *dentry = cursor->d_parent, *found = NULL; 109 110 spin_lock(&dentry->d_lock); 111 while ((p = p->next) != &dentry->d_subdirs) { 112 struct dentry *d = list_entry(p, struct dentry, d_child); 113 // we must at least skip cursors, to avoid livelocks 114 if (d->d_flags & DCACHE_DENTRY_CURSOR) 115 continue; 116 if (simple_positive(d) && !--count) { 117 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 118 if (simple_positive(d)) 119 found = dget_dlock(d); 120 spin_unlock(&d->d_lock); 121 if (likely(found)) 122 break; 123 count = 1; 124 } 125 if (need_resched()) { 126 list_move(&cursor->d_child, p); 127 p = &cursor->d_child; 128 spin_unlock(&dentry->d_lock); 129 cond_resched(); 130 spin_lock(&dentry->d_lock); 131 } 132 } 133 spin_unlock(&dentry->d_lock); 134 dput(last); 135 return found; 136 } 137 138 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) 139 { 140 struct dentry *dentry = file->f_path.dentry; 141 switch (whence) { 142 case 1: 143 offset += file->f_pos; 144 fallthrough; 145 case 0: 146 if (offset >= 0) 147 break; 148 fallthrough; 149 default: 150 return -EINVAL; 151 } 152 if (offset != file->f_pos) { 153 struct dentry *cursor = file->private_data; 154 struct dentry *to = NULL; 155 156 inode_lock_shared(dentry->d_inode); 157 158 if (offset > 2) 159 to = scan_positives(cursor, &dentry->d_subdirs, 160 offset - 2, NULL); 161 spin_lock(&dentry->d_lock); 162 if (to) 163 list_move(&cursor->d_child, &to->d_child); 164 else 165 list_del_init(&cursor->d_child); 166 spin_unlock(&dentry->d_lock); 167 dput(to); 168 169 file->f_pos = offset; 170 171 inode_unlock_shared(dentry->d_inode); 172 } 173 return offset; 174 } 175 EXPORT_SYMBOL(dcache_dir_lseek); 176 177 /* Relationship between i_mode and the DT_xxx types */ 178 static inline unsigned char dt_type(struct inode *inode) 179 { 180 return (inode->i_mode >> 12) & 15; 181 } 182 183 /* 184 * Directory is locked and all positive dentries in it are safe, since 185 * for ramfs-type trees they can't go away without unlink() or rmdir(), 186 * both impossible due to the lock on directory. 187 */ 188 189 int dcache_readdir(struct file *file, struct dir_context *ctx) 190 { 191 struct dentry *dentry = file->f_path.dentry; 192 struct dentry *cursor = file->private_data; 193 struct list_head *anchor = &dentry->d_subdirs; 194 struct dentry *next = NULL; 195 struct list_head *p; 196 197 if (!dir_emit_dots(file, ctx)) 198 return 0; 199 200 if (ctx->pos == 2) 201 p = anchor; 202 else if (!list_empty(&cursor->d_child)) 203 p = &cursor->d_child; 204 else 205 return 0; 206 207 while ((next = scan_positives(cursor, p, 1, next)) != NULL) { 208 if (!dir_emit(ctx, next->d_name.name, next->d_name.len, 209 d_inode(next)->i_ino, dt_type(d_inode(next)))) 210 break; 211 ctx->pos++; 212 p = &next->d_child; 213 } 214 spin_lock(&dentry->d_lock); 215 if (next) 216 list_move_tail(&cursor->d_child, &next->d_child); 217 else 218 list_del_init(&cursor->d_child); 219 spin_unlock(&dentry->d_lock); 220 dput(next); 221 222 return 0; 223 } 224 EXPORT_SYMBOL(dcache_readdir); 225 226 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 227 { 228 return -EISDIR; 229 } 230 EXPORT_SYMBOL(generic_read_dir); 231 232 const struct file_operations simple_dir_operations = { 233 .open = dcache_dir_open, 234 .release = dcache_dir_close, 235 .llseek = dcache_dir_lseek, 236 .read = generic_read_dir, 237 .iterate_shared = dcache_readdir, 238 .fsync = noop_fsync, 239 }; 240 EXPORT_SYMBOL(simple_dir_operations); 241 242 const struct inode_operations simple_dir_inode_operations = { 243 .lookup = simple_lookup, 244 }; 245 EXPORT_SYMBOL(simple_dir_inode_operations); 246 247 static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) 248 { 249 struct dentry *child = NULL; 250 struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs; 251 252 spin_lock(&parent->d_lock); 253 while ((p = p->next) != &parent->d_subdirs) { 254 struct dentry *d = container_of(p, struct dentry, d_child); 255 if (simple_positive(d)) { 256 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 257 if (simple_positive(d)) 258 child = dget_dlock(d); 259 spin_unlock(&d->d_lock); 260 if (likely(child)) 261 break; 262 } 263 } 264 spin_unlock(&parent->d_lock); 265 dput(prev); 266 return child; 267 } 268 269 void simple_recursive_removal(struct dentry *dentry, 270 void (*callback)(struct dentry *)) 271 { 272 struct dentry *this = dget(dentry); 273 while (true) { 274 struct dentry *victim = NULL, *child; 275 struct inode *inode = this->d_inode; 276 277 inode_lock(inode); 278 if (d_is_dir(this)) 279 inode->i_flags |= S_DEAD; 280 while ((child = find_next_child(this, victim)) == NULL) { 281 // kill and ascend 282 // update metadata while it's still locked 283 inode->i_ctime = current_time(inode); 284 clear_nlink(inode); 285 inode_unlock(inode); 286 victim = this; 287 this = this->d_parent; 288 inode = this->d_inode; 289 inode_lock(inode); 290 if (simple_positive(victim)) { 291 d_invalidate(victim); // avoid lost mounts 292 if (d_is_dir(victim)) 293 fsnotify_rmdir(inode, victim); 294 else 295 fsnotify_unlink(inode, victim); 296 if (callback) 297 callback(victim); 298 dput(victim); // unpin it 299 } 300 if (victim == dentry) { 301 inode->i_ctime = inode->i_mtime = 302 current_time(inode); 303 if (d_is_dir(dentry)) 304 drop_nlink(inode); 305 inode_unlock(inode); 306 dput(dentry); 307 return; 308 } 309 } 310 inode_unlock(inode); 311 this = child; 312 } 313 } 314 EXPORT_SYMBOL(simple_recursive_removal); 315 316 static const struct super_operations simple_super_operations = { 317 .statfs = simple_statfs, 318 }; 319 320 static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) 321 { 322 struct pseudo_fs_context *ctx = fc->fs_private; 323 struct inode *root; 324 325 s->s_maxbytes = MAX_LFS_FILESIZE; 326 s->s_blocksize = PAGE_SIZE; 327 s->s_blocksize_bits = PAGE_SHIFT; 328 s->s_magic = ctx->magic; 329 s->s_op = ctx->ops ?: &simple_super_operations; 330 s->s_xattr = ctx->xattr; 331 s->s_time_gran = 1; 332 root = new_inode(s); 333 if (!root) 334 return -ENOMEM; 335 336 /* 337 * since this is the first inode, make it number 1. New inodes created 338 * after this must take care not to collide with it (by passing 339 * max_reserved of 1 to iunique). 340 */ 341 root->i_ino = 1; 342 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 343 root->i_atime = root->i_mtime = root->i_ctime = current_time(root); 344 s->s_root = d_make_root(root); 345 if (!s->s_root) 346 return -ENOMEM; 347 s->s_d_op = ctx->dops; 348 return 0; 349 } 350 351 static int pseudo_fs_get_tree(struct fs_context *fc) 352 { 353 return get_tree_nodev(fc, pseudo_fs_fill_super); 354 } 355 356 static void pseudo_fs_free(struct fs_context *fc) 357 { 358 kfree(fc->fs_private); 359 } 360 361 static const struct fs_context_operations pseudo_fs_context_ops = { 362 .free = pseudo_fs_free, 363 .get_tree = pseudo_fs_get_tree, 364 }; 365 366 /* 367 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 368 * will never be mountable) 369 */ 370 struct pseudo_fs_context *init_pseudo(struct fs_context *fc, 371 unsigned long magic) 372 { 373 struct pseudo_fs_context *ctx; 374 375 ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL); 376 if (likely(ctx)) { 377 ctx->magic = magic; 378 fc->fs_private = ctx; 379 fc->ops = &pseudo_fs_context_ops; 380 fc->sb_flags |= SB_NOUSER; 381 fc->global = true; 382 } 383 return ctx; 384 } 385 EXPORT_SYMBOL(init_pseudo); 386 387 int simple_open(struct inode *inode, struct file *file) 388 { 389 if (inode->i_private) 390 file->private_data = inode->i_private; 391 return 0; 392 } 393 EXPORT_SYMBOL(simple_open); 394 395 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 396 { 397 struct inode *inode = d_inode(old_dentry); 398 399 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 400 inc_nlink(inode); 401 ihold(inode); 402 dget(dentry); 403 d_instantiate(dentry, inode); 404 return 0; 405 } 406 EXPORT_SYMBOL(simple_link); 407 408 int simple_empty(struct dentry *dentry) 409 { 410 struct dentry *child; 411 int ret = 0; 412 413 spin_lock(&dentry->d_lock); 414 list_for_each_entry(child, &dentry->d_subdirs, d_child) { 415 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); 416 if (simple_positive(child)) { 417 spin_unlock(&child->d_lock); 418 goto out; 419 } 420 spin_unlock(&child->d_lock); 421 } 422 ret = 1; 423 out: 424 spin_unlock(&dentry->d_lock); 425 return ret; 426 } 427 EXPORT_SYMBOL(simple_empty); 428 429 int simple_unlink(struct inode *dir, struct dentry *dentry) 430 { 431 struct inode *inode = d_inode(dentry); 432 433 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 434 drop_nlink(inode); 435 dput(dentry); 436 return 0; 437 } 438 EXPORT_SYMBOL(simple_unlink); 439 440 int simple_rmdir(struct inode *dir, struct dentry *dentry) 441 { 442 if (!simple_empty(dentry)) 443 return -ENOTEMPTY; 444 445 drop_nlink(d_inode(dentry)); 446 simple_unlink(dir, dentry); 447 drop_nlink(dir); 448 return 0; 449 } 450 EXPORT_SYMBOL(simple_rmdir); 451 452 int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, 453 struct inode *new_dir, struct dentry *new_dentry) 454 { 455 bool old_is_dir = d_is_dir(old_dentry); 456 bool new_is_dir = d_is_dir(new_dentry); 457 458 if (old_dir != new_dir && old_is_dir != new_is_dir) { 459 if (old_is_dir) { 460 drop_nlink(old_dir); 461 inc_nlink(new_dir); 462 } else { 463 drop_nlink(new_dir); 464 inc_nlink(old_dir); 465 } 466 } 467 old_dir->i_ctime = old_dir->i_mtime = 468 new_dir->i_ctime = new_dir->i_mtime = 469 d_inode(old_dentry)->i_ctime = 470 d_inode(new_dentry)->i_ctime = current_time(old_dir); 471 472 return 0; 473 } 474 EXPORT_SYMBOL_GPL(simple_rename_exchange); 475 476 int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir, 477 struct dentry *old_dentry, struct inode *new_dir, 478 struct dentry *new_dentry, unsigned int flags) 479 { 480 struct inode *inode = d_inode(old_dentry); 481 int they_are_dirs = d_is_dir(old_dentry); 482 483 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 484 return -EINVAL; 485 486 if (flags & RENAME_EXCHANGE) 487 return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); 488 489 if (!simple_empty(new_dentry)) 490 return -ENOTEMPTY; 491 492 if (d_really_is_positive(new_dentry)) { 493 simple_unlink(new_dir, new_dentry); 494 if (they_are_dirs) { 495 drop_nlink(d_inode(new_dentry)); 496 drop_nlink(old_dir); 497 } 498 } else if (they_are_dirs) { 499 drop_nlink(old_dir); 500 inc_nlink(new_dir); 501 } 502 503 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = 504 new_dir->i_mtime = inode->i_ctime = current_time(old_dir); 505 506 return 0; 507 } 508 EXPORT_SYMBOL(simple_rename); 509 510 /** 511 * simple_setattr - setattr for simple filesystem 512 * @mnt_userns: user namespace of the target mount 513 * @dentry: dentry 514 * @iattr: iattr structure 515 * 516 * Returns 0 on success, -error on failure. 517 * 518 * simple_setattr is a simple ->setattr implementation without a proper 519 * implementation of size changes. 520 * 521 * It can either be used for in-memory filesystems or special files 522 * on simple regular filesystems. Anything that needs to change on-disk 523 * or wire state on size changes needs its own setattr method. 524 */ 525 int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, 526 struct iattr *iattr) 527 { 528 struct inode *inode = d_inode(dentry); 529 int error; 530 531 error = setattr_prepare(mnt_userns, dentry, iattr); 532 if (error) 533 return error; 534 535 if (iattr->ia_valid & ATTR_SIZE) 536 truncate_setsize(inode, iattr->ia_size); 537 setattr_copy(mnt_userns, inode, iattr); 538 mark_inode_dirty(inode); 539 return 0; 540 } 541 EXPORT_SYMBOL(simple_setattr); 542 543 static int simple_read_folio(struct file *file, struct folio *folio) 544 { 545 folio_zero_range(folio, 0, folio_size(folio)); 546 flush_dcache_folio(folio); 547 folio_mark_uptodate(folio); 548 folio_unlock(folio); 549 return 0; 550 } 551 552 int simple_write_begin(struct file *file, struct address_space *mapping, 553 loff_t pos, unsigned len, 554 struct page **pagep, void **fsdata) 555 { 556 struct page *page; 557 pgoff_t index; 558 559 index = pos >> PAGE_SHIFT; 560 561 page = grab_cache_page_write_begin(mapping, index); 562 if (!page) 563 return -ENOMEM; 564 565 *pagep = page; 566 567 if (!PageUptodate(page) && (len != PAGE_SIZE)) { 568 unsigned from = pos & (PAGE_SIZE - 1); 569 570 zero_user_segments(page, 0, from, from + len, PAGE_SIZE); 571 } 572 return 0; 573 } 574 EXPORT_SYMBOL(simple_write_begin); 575 576 /** 577 * simple_write_end - .write_end helper for non-block-device FSes 578 * @file: See .write_end of address_space_operations 579 * @mapping: " 580 * @pos: " 581 * @len: " 582 * @copied: " 583 * @page: " 584 * @fsdata: " 585 * 586 * simple_write_end does the minimum needed for updating a page after writing is 587 * done. It has the same API signature as the .write_end of 588 * address_space_operations vector. So it can just be set onto .write_end for 589 * FSes that don't need any other processing. i_mutex is assumed to be held. 590 * Block based filesystems should use generic_write_end(). 591 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 592 * is not called, so a filesystem that actually does store data in .write_inode 593 * should extend on what's done here with a call to mark_inode_dirty() in the 594 * case that i_size has changed. 595 * 596 * Use *ONLY* with simple_read_folio() 597 */ 598 static int simple_write_end(struct file *file, struct address_space *mapping, 599 loff_t pos, unsigned len, unsigned copied, 600 struct page *page, void *fsdata) 601 { 602 struct inode *inode = page->mapping->host; 603 loff_t last_pos = pos + copied; 604 605 /* zero the stale part of the page if we did a short copy */ 606 if (!PageUptodate(page)) { 607 if (copied < len) { 608 unsigned from = pos & (PAGE_SIZE - 1); 609 610 zero_user(page, from + copied, len - copied); 611 } 612 SetPageUptodate(page); 613 } 614 /* 615 * No need to use i_size_read() here, the i_size 616 * cannot change under us because we hold the i_mutex. 617 */ 618 if (last_pos > inode->i_size) 619 i_size_write(inode, last_pos); 620 621 set_page_dirty(page); 622 unlock_page(page); 623 put_page(page); 624 625 return copied; 626 } 627 628 /* 629 * Provides ramfs-style behavior: data in the pagecache, but no writeback. 630 */ 631 const struct address_space_operations ram_aops = { 632 .read_folio = simple_read_folio, 633 .write_begin = simple_write_begin, 634 .write_end = simple_write_end, 635 .dirty_folio = noop_dirty_folio, 636 }; 637 EXPORT_SYMBOL(ram_aops); 638 639 /* 640 * the inodes created here are not hashed. If you use iunique to generate 641 * unique inode values later for this filesystem, then you must take care 642 * to pass it an appropriate max_reserved value to avoid collisions. 643 */ 644 int simple_fill_super(struct super_block *s, unsigned long magic, 645 const struct tree_descr *files) 646 { 647 struct inode *inode; 648 struct dentry *root; 649 struct dentry *dentry; 650 int i; 651 652 s->s_blocksize = PAGE_SIZE; 653 s->s_blocksize_bits = PAGE_SHIFT; 654 s->s_magic = magic; 655 s->s_op = &simple_super_operations; 656 s->s_time_gran = 1; 657 658 inode = new_inode(s); 659 if (!inode) 660 return -ENOMEM; 661 /* 662 * because the root inode is 1, the files array must not contain an 663 * entry at index 1 664 */ 665 inode->i_ino = 1; 666 inode->i_mode = S_IFDIR | 0755; 667 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 668 inode->i_op = &simple_dir_inode_operations; 669 inode->i_fop = &simple_dir_operations; 670 set_nlink(inode, 2); 671 root = d_make_root(inode); 672 if (!root) 673 return -ENOMEM; 674 for (i = 0; !files->name || files->name[0]; i++, files++) { 675 if (!files->name) 676 continue; 677 678 /* warn if it tries to conflict with the root inode */ 679 if (unlikely(i == 1)) 680 printk(KERN_WARNING "%s: %s passed in a files array" 681 "with an index of 1!\n", __func__, 682 s->s_type->name); 683 684 dentry = d_alloc_name(root, files->name); 685 if (!dentry) 686 goto out; 687 inode = new_inode(s); 688 if (!inode) { 689 dput(dentry); 690 goto out; 691 } 692 inode->i_mode = S_IFREG | files->mode; 693 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 694 inode->i_fop = files->ops; 695 inode->i_ino = i; 696 d_add(dentry, inode); 697 } 698 s->s_root = root; 699 return 0; 700 out: 701 d_genocide(root); 702 shrink_dcache_parent(root); 703 dput(root); 704 return -ENOMEM; 705 } 706 EXPORT_SYMBOL(simple_fill_super); 707 708 static DEFINE_SPINLOCK(pin_fs_lock); 709 710 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) 711 { 712 struct vfsmount *mnt = NULL; 713 spin_lock(&pin_fs_lock); 714 if (unlikely(!*mount)) { 715 spin_unlock(&pin_fs_lock); 716 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); 717 if (IS_ERR(mnt)) 718 return PTR_ERR(mnt); 719 spin_lock(&pin_fs_lock); 720 if (!*mount) 721 *mount = mnt; 722 } 723 mntget(*mount); 724 ++*count; 725 spin_unlock(&pin_fs_lock); 726 mntput(mnt); 727 return 0; 728 } 729 EXPORT_SYMBOL(simple_pin_fs); 730 731 void simple_release_fs(struct vfsmount **mount, int *count) 732 { 733 struct vfsmount *mnt; 734 spin_lock(&pin_fs_lock); 735 mnt = *mount; 736 if (!--*count) 737 *mount = NULL; 738 spin_unlock(&pin_fs_lock); 739 mntput(mnt); 740 } 741 EXPORT_SYMBOL(simple_release_fs); 742 743 /** 744 * simple_read_from_buffer - copy data from the buffer to user space 745 * @to: the user space buffer to read to 746 * @count: the maximum number of bytes to read 747 * @ppos: the current position in the buffer 748 * @from: the buffer to read from 749 * @available: the size of the buffer 750 * 751 * The simple_read_from_buffer() function reads up to @count bytes from the 752 * buffer @from at offset @ppos into the user space address starting at @to. 753 * 754 * On success, the number of bytes read is returned and the offset @ppos is 755 * advanced by this number, or negative value is returned on error. 756 **/ 757 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 758 const void *from, size_t available) 759 { 760 loff_t pos = *ppos; 761 size_t ret; 762 763 if (pos < 0) 764 return -EINVAL; 765 if (pos >= available || !count) 766 return 0; 767 if (count > available - pos) 768 count = available - pos; 769 ret = copy_to_user(to, from + pos, count); 770 if (ret == count) 771 return -EFAULT; 772 count -= ret; 773 *ppos = pos + count; 774 return count; 775 } 776 EXPORT_SYMBOL(simple_read_from_buffer); 777 778 /** 779 * simple_write_to_buffer - copy data from user space to the buffer 780 * @to: the buffer to write to 781 * @available: the size of the buffer 782 * @ppos: the current position in the buffer 783 * @from: the user space buffer to read from 784 * @count: the maximum number of bytes to read 785 * 786 * The simple_write_to_buffer() function reads up to @count bytes from the user 787 * space address starting at @from into the buffer @to at offset @ppos. 788 * 789 * On success, the number of bytes written is returned and the offset @ppos is 790 * advanced by this number, or negative value is returned on error. 791 **/ 792 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 793 const void __user *from, size_t count) 794 { 795 loff_t pos = *ppos; 796 size_t res; 797 798 if (pos < 0) 799 return -EINVAL; 800 if (pos >= available || !count) 801 return 0; 802 if (count > available - pos) 803 count = available - pos; 804 res = copy_from_user(to + pos, from, count); 805 if (res == count) 806 return -EFAULT; 807 count -= res; 808 *ppos = pos + count; 809 return count; 810 } 811 EXPORT_SYMBOL(simple_write_to_buffer); 812 813 /** 814 * memory_read_from_buffer - copy data from the buffer 815 * @to: the kernel space buffer to read to 816 * @count: the maximum number of bytes to read 817 * @ppos: the current position in the buffer 818 * @from: the buffer to read from 819 * @available: the size of the buffer 820 * 821 * The memory_read_from_buffer() function reads up to @count bytes from the 822 * buffer @from at offset @ppos into the kernel space address starting at @to. 823 * 824 * On success, the number of bytes read is returned and the offset @ppos is 825 * advanced by this number, or negative value is returned on error. 826 **/ 827 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 828 const void *from, size_t available) 829 { 830 loff_t pos = *ppos; 831 832 if (pos < 0) 833 return -EINVAL; 834 if (pos >= available) 835 return 0; 836 if (count > available - pos) 837 count = available - pos; 838 memcpy(to, from + pos, count); 839 *ppos = pos + count; 840 841 return count; 842 } 843 EXPORT_SYMBOL(memory_read_from_buffer); 844 845 /* 846 * Transaction based IO. 847 * The file expects a single write which triggers the transaction, and then 848 * possibly a read which collects the result - which is stored in a 849 * file-local buffer. 850 */ 851 852 void simple_transaction_set(struct file *file, size_t n) 853 { 854 struct simple_transaction_argresp *ar = file->private_data; 855 856 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); 857 858 /* 859 * The barrier ensures that ar->size will really remain zero until 860 * ar->data is ready for reading. 861 */ 862 smp_mb(); 863 ar->size = n; 864 } 865 EXPORT_SYMBOL(simple_transaction_set); 866 867 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 868 { 869 struct simple_transaction_argresp *ar; 870 static DEFINE_SPINLOCK(simple_transaction_lock); 871 872 if (size > SIMPLE_TRANSACTION_LIMIT - 1) 873 return ERR_PTR(-EFBIG); 874 875 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); 876 if (!ar) 877 return ERR_PTR(-ENOMEM); 878 879 spin_lock(&simple_transaction_lock); 880 881 /* only one write allowed per open */ 882 if (file->private_data) { 883 spin_unlock(&simple_transaction_lock); 884 free_page((unsigned long)ar); 885 return ERR_PTR(-EBUSY); 886 } 887 888 file->private_data = ar; 889 890 spin_unlock(&simple_transaction_lock); 891 892 if (copy_from_user(ar->data, buf, size)) 893 return ERR_PTR(-EFAULT); 894 895 return ar->data; 896 } 897 EXPORT_SYMBOL(simple_transaction_get); 898 899 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 900 { 901 struct simple_transaction_argresp *ar = file->private_data; 902 903 if (!ar) 904 return 0; 905 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 906 } 907 EXPORT_SYMBOL(simple_transaction_read); 908 909 int simple_transaction_release(struct inode *inode, struct file *file) 910 { 911 free_page((unsigned long)file->private_data); 912 return 0; 913 } 914 EXPORT_SYMBOL(simple_transaction_release); 915 916 /* Simple attribute files */ 917 918 struct simple_attr { 919 int (*get)(void *, u64 *); 920 int (*set)(void *, u64); 921 char get_buf[24]; /* enough to store a u64 and "\n\0" */ 922 char set_buf[24]; 923 void *data; 924 const char *fmt; /* format for read operation */ 925 struct mutex mutex; /* protects access to these buffers */ 926 }; 927 928 /* simple_attr_open is called by an actual attribute open file operation 929 * to set the attribute specific access operations. */ 930 int simple_attr_open(struct inode *inode, struct file *file, 931 int (*get)(void *, u64 *), int (*set)(void *, u64), 932 const char *fmt) 933 { 934 struct simple_attr *attr; 935 936 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 937 if (!attr) 938 return -ENOMEM; 939 940 attr->get = get; 941 attr->set = set; 942 attr->data = inode->i_private; 943 attr->fmt = fmt; 944 mutex_init(&attr->mutex); 945 946 file->private_data = attr; 947 948 return nonseekable_open(inode, file); 949 } 950 EXPORT_SYMBOL_GPL(simple_attr_open); 951 952 int simple_attr_release(struct inode *inode, struct file *file) 953 { 954 kfree(file->private_data); 955 return 0; 956 } 957 EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ 958 959 /* read from the buffer that is filled with the get function */ 960 ssize_t simple_attr_read(struct file *file, char __user *buf, 961 size_t len, loff_t *ppos) 962 { 963 struct simple_attr *attr; 964 size_t size; 965 ssize_t ret; 966 967 attr = file->private_data; 968 969 if (!attr->get) 970 return -EACCES; 971 972 ret = mutex_lock_interruptible(&attr->mutex); 973 if (ret) 974 return ret; 975 976 if (*ppos && attr->get_buf[0]) { 977 /* continued read */ 978 size = strlen(attr->get_buf); 979 } else { 980 /* first read */ 981 u64 val; 982 ret = attr->get(attr->data, &val); 983 if (ret) 984 goto out; 985 986 size = scnprintf(attr->get_buf, sizeof(attr->get_buf), 987 attr->fmt, (unsigned long long)val); 988 } 989 990 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); 991 out: 992 mutex_unlock(&attr->mutex); 993 return ret; 994 } 995 EXPORT_SYMBOL_GPL(simple_attr_read); 996 997 /* interpret the buffer as a number to call the set function with */ 998 ssize_t simple_attr_write(struct file *file, const char __user *buf, 999 size_t len, loff_t *ppos) 1000 { 1001 struct simple_attr *attr; 1002 unsigned long long val; 1003 size_t size; 1004 ssize_t ret; 1005 1006 attr = file->private_data; 1007 if (!attr->set) 1008 return -EACCES; 1009 1010 ret = mutex_lock_interruptible(&attr->mutex); 1011 if (ret) 1012 return ret; 1013 1014 ret = -EFAULT; 1015 size = min(sizeof(attr->set_buf) - 1, len); 1016 if (copy_from_user(attr->set_buf, buf, size)) 1017 goto out; 1018 1019 attr->set_buf[size] = '\0'; 1020 ret = kstrtoull(attr->set_buf, 0, &val); 1021 if (ret) 1022 goto out; 1023 ret = attr->set(attr->data, val); 1024 if (ret == 0) 1025 ret = len; /* on success, claim we got the whole input */ 1026 out: 1027 mutex_unlock(&attr->mutex); 1028 return ret; 1029 } 1030 EXPORT_SYMBOL_GPL(simple_attr_write); 1031 1032 /** 1033 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 1034 * @sb: filesystem to do the file handle conversion on 1035 * @fid: file handle to convert 1036 * @fh_len: length of the file handle in bytes 1037 * @fh_type: type of file handle 1038 * @get_inode: filesystem callback to retrieve inode 1039 * 1040 * This function decodes @fid as long as it has one of the well-known 1041 * Linux filehandle types and calls @get_inode on it to retrieve the 1042 * inode for the object specified in the file handle. 1043 */ 1044 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, 1045 int fh_len, int fh_type, struct inode *(*get_inode) 1046 (struct super_block *sb, u64 ino, u32 gen)) 1047 { 1048 struct inode *inode = NULL; 1049 1050 if (fh_len < 2) 1051 return NULL; 1052 1053 switch (fh_type) { 1054 case FILEID_INO32_GEN: 1055 case FILEID_INO32_GEN_PARENT: 1056 inode = get_inode(sb, fid->i32.ino, fid->i32.gen); 1057 break; 1058 } 1059 1060 return d_obtain_alias(inode); 1061 } 1062 EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 1063 1064 /** 1065 * generic_fh_to_parent - generic helper for the fh_to_parent export operation 1066 * @sb: filesystem to do the file handle conversion on 1067 * @fid: file handle to convert 1068 * @fh_len: length of the file handle in bytes 1069 * @fh_type: type of file handle 1070 * @get_inode: filesystem callback to retrieve inode 1071 * 1072 * This function decodes @fid as long as it has one of the well-known 1073 * Linux filehandle types and calls @get_inode on it to retrieve the 1074 * inode for the _parent_ object specified in the file handle if it 1075 * is specified in the file handle, or NULL otherwise. 1076 */ 1077 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, 1078 int fh_len, int fh_type, struct inode *(*get_inode) 1079 (struct super_block *sb, u64 ino, u32 gen)) 1080 { 1081 struct inode *inode = NULL; 1082 1083 if (fh_len <= 2) 1084 return NULL; 1085 1086 switch (fh_type) { 1087 case FILEID_INO32_GEN_PARENT: 1088 inode = get_inode(sb, fid->i32.parent_ino, 1089 (fh_len > 3 ? fid->i32.parent_gen : 0)); 1090 break; 1091 } 1092 1093 return d_obtain_alias(inode); 1094 } 1095 EXPORT_SYMBOL_GPL(generic_fh_to_parent); 1096 1097 /** 1098 * __generic_file_fsync - generic fsync implementation for simple filesystems 1099 * 1100 * @file: file to synchronize 1101 * @start: start offset in bytes 1102 * @end: end offset in bytes (inclusive) 1103 * @datasync: only synchronize essential metadata if true 1104 * 1105 * This is a generic implementation of the fsync method for simple 1106 * filesystems which track all non-inode metadata in the buffers list 1107 * hanging off the address_space structure. 1108 */ 1109 int __generic_file_fsync(struct file *file, loff_t start, loff_t end, 1110 int datasync) 1111 { 1112 struct inode *inode = file->f_mapping->host; 1113 int err; 1114 int ret; 1115 1116 err = file_write_and_wait_range(file, start, end); 1117 if (err) 1118 return err; 1119 1120 inode_lock(inode); 1121 ret = sync_mapping_buffers(inode->i_mapping); 1122 if (!(inode->i_state & I_DIRTY_ALL)) 1123 goto out; 1124 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 1125 goto out; 1126 1127 err = sync_inode_metadata(inode, 1); 1128 if (ret == 0) 1129 ret = err; 1130 1131 out: 1132 inode_unlock(inode); 1133 /* check and advance again to catch errors after syncing out buffers */ 1134 err = file_check_and_advance_wb_err(file); 1135 if (ret == 0) 1136 ret = err; 1137 return ret; 1138 } 1139 EXPORT_SYMBOL(__generic_file_fsync); 1140 1141 /** 1142 * generic_file_fsync - generic fsync implementation for simple filesystems 1143 * with flush 1144 * @file: file to synchronize 1145 * @start: start offset in bytes 1146 * @end: end offset in bytes (inclusive) 1147 * @datasync: only synchronize essential metadata if true 1148 * 1149 */ 1150 1151 int generic_file_fsync(struct file *file, loff_t start, loff_t end, 1152 int datasync) 1153 { 1154 struct inode *inode = file->f_mapping->host; 1155 int err; 1156 1157 err = __generic_file_fsync(file, start, end, datasync); 1158 if (err) 1159 return err; 1160 return blkdev_issue_flush(inode->i_sb->s_bdev); 1161 } 1162 EXPORT_SYMBOL(generic_file_fsync); 1163 1164 /** 1165 * generic_check_addressable - Check addressability of file system 1166 * @blocksize_bits: log of file system block size 1167 * @num_blocks: number of blocks in file system 1168 * 1169 * Determine whether a file system with @num_blocks blocks (and a 1170 * block size of 2**@blocksize_bits) is addressable by the sector_t 1171 * and page cache of the system. Return 0 if so and -EFBIG otherwise. 1172 */ 1173 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 1174 { 1175 u64 last_fs_block = num_blocks - 1; 1176 u64 last_fs_page = 1177 last_fs_block >> (PAGE_SHIFT - blocksize_bits); 1178 1179 if (unlikely(num_blocks == 0)) 1180 return 0; 1181 1182 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) 1183 return -EINVAL; 1184 1185 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || 1186 (last_fs_page > (pgoff_t)(~0ULL))) { 1187 return -EFBIG; 1188 } 1189 return 0; 1190 } 1191 EXPORT_SYMBOL(generic_check_addressable); 1192 1193 /* 1194 * No-op implementation of ->fsync for in-memory filesystems. 1195 */ 1196 int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1197 { 1198 return 0; 1199 } 1200 EXPORT_SYMBOL(noop_fsync); 1201 1202 ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 1203 { 1204 /* 1205 * iomap based filesystems support direct I/O without need for 1206 * this callback. However, it still needs to be set in 1207 * inode->a_ops so that open/fcntl know that direct I/O is 1208 * generally supported. 1209 */ 1210 return -EINVAL; 1211 } 1212 EXPORT_SYMBOL_GPL(noop_direct_IO); 1213 1214 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ 1215 void kfree_link(void *p) 1216 { 1217 kfree(p); 1218 } 1219 EXPORT_SYMBOL(kfree_link); 1220 1221 struct inode *alloc_anon_inode(struct super_block *s) 1222 { 1223 static const struct address_space_operations anon_aops = { 1224 .dirty_folio = noop_dirty_folio, 1225 }; 1226 struct inode *inode = new_inode_pseudo(s); 1227 1228 if (!inode) 1229 return ERR_PTR(-ENOMEM); 1230 1231 inode->i_ino = get_next_ino(); 1232 inode->i_mapping->a_ops = &anon_aops; 1233 1234 /* 1235 * Mark the inode dirty from the very beginning, 1236 * that way it will never be moved to the dirty 1237 * list because mark_inode_dirty() will think 1238 * that it already _is_ on the dirty list. 1239 */ 1240 inode->i_state = I_DIRTY; 1241 inode->i_mode = S_IRUSR | S_IWUSR; 1242 inode->i_uid = current_fsuid(); 1243 inode->i_gid = current_fsgid(); 1244 inode->i_flags |= S_PRIVATE; 1245 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 1246 return inode; 1247 } 1248 EXPORT_SYMBOL(alloc_anon_inode); 1249 1250 /** 1251 * simple_nosetlease - generic helper for prohibiting leases 1252 * @filp: file pointer 1253 * @arg: type of lease to obtain 1254 * @flp: new lease supplied for insertion 1255 * @priv: private data for lm_setup operation 1256 * 1257 * Generic helper for filesystems that do not wish to allow leases to be set. 1258 * All arguments are ignored and it just returns -EINVAL. 1259 */ 1260 int 1261 simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, 1262 void **priv) 1263 { 1264 return -EINVAL; 1265 } 1266 EXPORT_SYMBOL(simple_nosetlease); 1267 1268 /** 1269 * simple_get_link - generic helper to get the target of "fast" symlinks 1270 * @dentry: not used here 1271 * @inode: the symlink inode 1272 * @done: not used here 1273 * 1274 * Generic helper for filesystems to use for symlink inodes where a pointer to 1275 * the symlink target is stored in ->i_link. NOTE: this isn't normally called, 1276 * since as an optimization the path lookup code uses any non-NULL ->i_link 1277 * directly, without calling ->get_link(). But ->get_link() still must be set, 1278 * to mark the inode_operations as being for a symlink. 1279 * 1280 * Return: the symlink target 1281 */ 1282 const char *simple_get_link(struct dentry *dentry, struct inode *inode, 1283 struct delayed_call *done) 1284 { 1285 return inode->i_link; 1286 } 1287 EXPORT_SYMBOL(simple_get_link); 1288 1289 const struct inode_operations simple_symlink_inode_operations = { 1290 .get_link = simple_get_link, 1291 }; 1292 EXPORT_SYMBOL(simple_symlink_inode_operations); 1293 1294 /* 1295 * Operations for a permanently empty directory. 1296 */ 1297 static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1298 { 1299 return ERR_PTR(-ENOENT); 1300 } 1301 1302 static int empty_dir_getattr(struct user_namespace *mnt_userns, 1303 const struct path *path, struct kstat *stat, 1304 u32 request_mask, unsigned int query_flags) 1305 { 1306 struct inode *inode = d_inode(path->dentry); 1307 generic_fillattr(&init_user_ns, inode, stat); 1308 return 0; 1309 } 1310 1311 static int empty_dir_setattr(struct user_namespace *mnt_userns, 1312 struct dentry *dentry, struct iattr *attr) 1313 { 1314 return -EPERM; 1315 } 1316 1317 static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) 1318 { 1319 return -EOPNOTSUPP; 1320 } 1321 1322 static const struct inode_operations empty_dir_inode_operations = { 1323 .lookup = empty_dir_lookup, 1324 .permission = generic_permission, 1325 .setattr = empty_dir_setattr, 1326 .getattr = empty_dir_getattr, 1327 .listxattr = empty_dir_listxattr, 1328 }; 1329 1330 static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) 1331 { 1332 /* An empty directory has two entries . and .. at offsets 0 and 1 */ 1333 return generic_file_llseek_size(file, offset, whence, 2, 2); 1334 } 1335 1336 static int empty_dir_readdir(struct file *file, struct dir_context *ctx) 1337 { 1338 dir_emit_dots(file, ctx); 1339 return 0; 1340 } 1341 1342 static const struct file_operations empty_dir_operations = { 1343 .llseek = empty_dir_llseek, 1344 .read = generic_read_dir, 1345 .iterate_shared = empty_dir_readdir, 1346 .fsync = noop_fsync, 1347 }; 1348 1349 1350 void make_empty_dir_inode(struct inode *inode) 1351 { 1352 set_nlink(inode, 2); 1353 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1354 inode->i_uid = GLOBAL_ROOT_UID; 1355 inode->i_gid = GLOBAL_ROOT_GID; 1356 inode->i_rdev = 0; 1357 inode->i_size = 0; 1358 inode->i_blkbits = PAGE_SHIFT; 1359 inode->i_blocks = 0; 1360 1361 inode->i_op = &empty_dir_inode_operations; 1362 inode->i_opflags &= ~IOP_XATTR; 1363 inode->i_fop = &empty_dir_operations; 1364 } 1365 1366 bool is_empty_dir_inode(struct inode *inode) 1367 { 1368 return (inode->i_fop == &empty_dir_operations) && 1369 (inode->i_op == &empty_dir_inode_operations); 1370 } 1371 1372 #if IS_ENABLED(CONFIG_UNICODE) 1373 /* 1374 * Determine if the name of a dentry should be casefolded. 1375 * 1376 * Return: if names will need casefolding 1377 */ 1378 static bool needs_casefold(const struct inode *dir) 1379 { 1380 return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding; 1381 } 1382 1383 /** 1384 * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems 1385 * @dentry: dentry whose name we are checking against 1386 * @len: len of name of dentry 1387 * @str: str pointer to name of dentry 1388 * @name: Name to compare against 1389 * 1390 * Return: 0 if names match, 1 if mismatch, or -ERRNO 1391 */ 1392 static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, 1393 const char *str, const struct qstr *name) 1394 { 1395 const struct dentry *parent = READ_ONCE(dentry->d_parent); 1396 const struct inode *dir = READ_ONCE(parent->d_inode); 1397 const struct super_block *sb = dentry->d_sb; 1398 const struct unicode_map *um = sb->s_encoding; 1399 struct qstr qstr = QSTR_INIT(str, len); 1400 char strbuf[DNAME_INLINE_LEN]; 1401 int ret; 1402 1403 if (!dir || !needs_casefold(dir)) 1404 goto fallback; 1405 /* 1406 * If the dentry name is stored in-line, then it may be concurrently 1407 * modified by a rename. If this happens, the VFS will eventually retry 1408 * the lookup, so it doesn't matter what ->d_compare() returns. 1409 * However, it's unsafe to call utf8_strncasecmp() with an unstable 1410 * string. Therefore, we have to copy the name into a temporary buffer. 1411 */ 1412 if (len <= DNAME_INLINE_LEN - 1) { 1413 memcpy(strbuf, str, len); 1414 strbuf[len] = 0; 1415 qstr.name = strbuf; 1416 /* prevent compiler from optimizing out the temporary buffer */ 1417 barrier(); 1418 } 1419 ret = utf8_strncasecmp(um, name, &qstr); 1420 if (ret >= 0) 1421 return ret; 1422 1423 if (sb_has_strict_encoding(sb)) 1424 return -EINVAL; 1425 fallback: 1426 if (len != name->len) 1427 return 1; 1428 return !!memcmp(str, name->name, len); 1429 } 1430 1431 /** 1432 * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems 1433 * @dentry: dentry of the parent directory 1434 * @str: qstr of name whose hash we should fill in 1435 * 1436 * Return: 0 if hash was successful or unchanged, and -EINVAL on error 1437 */ 1438 static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) 1439 { 1440 const struct inode *dir = READ_ONCE(dentry->d_inode); 1441 struct super_block *sb = dentry->d_sb; 1442 const struct unicode_map *um = sb->s_encoding; 1443 int ret = 0; 1444 1445 if (!dir || !needs_casefold(dir)) 1446 return 0; 1447 1448 ret = utf8_casefold_hash(um, dentry, str); 1449 if (ret < 0 && sb_has_strict_encoding(sb)) 1450 return -EINVAL; 1451 return 0; 1452 } 1453 1454 static const struct dentry_operations generic_ci_dentry_ops = { 1455 .d_hash = generic_ci_d_hash, 1456 .d_compare = generic_ci_d_compare, 1457 }; 1458 #endif 1459 1460 #ifdef CONFIG_FS_ENCRYPTION 1461 static const struct dentry_operations generic_encrypted_dentry_ops = { 1462 .d_revalidate = fscrypt_d_revalidate, 1463 }; 1464 #endif 1465 1466 #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) 1467 static const struct dentry_operations generic_encrypted_ci_dentry_ops = { 1468 .d_hash = generic_ci_d_hash, 1469 .d_compare = generic_ci_d_compare, 1470 .d_revalidate = fscrypt_d_revalidate, 1471 }; 1472 #endif 1473 1474 /** 1475 * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry 1476 * @dentry: dentry to set ops on 1477 * 1478 * Casefolded directories need d_hash and d_compare set, so that the dentries 1479 * contained in them are handled case-insensitively. Note that these operations 1480 * are needed on the parent directory rather than on the dentries in it, and 1481 * while the casefolding flag can be toggled on and off on an empty directory, 1482 * dentry_operations can't be changed later. As a result, if the filesystem has 1483 * casefolding support enabled at all, we have to give all dentries the 1484 * casefolding operations even if their inode doesn't have the casefolding flag 1485 * currently (and thus the casefolding ops would be no-ops for now). 1486 * 1487 * Encryption works differently in that the only dentry operation it needs is 1488 * d_revalidate, which it only needs on dentries that have the no-key name flag. 1489 * The no-key flag can't be set "later", so we don't have to worry about that. 1490 * 1491 * Finally, to maximize compatibility with overlayfs (which isn't compatible 1492 * with certain dentry operations) and to avoid taking an unnecessary 1493 * performance hit, we use custom dentry_operations for each possible 1494 * combination rather than always installing all operations. 1495 */ 1496 void generic_set_encrypted_ci_d_ops(struct dentry *dentry) 1497 { 1498 #ifdef CONFIG_FS_ENCRYPTION 1499 bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME; 1500 #endif 1501 #if IS_ENABLED(CONFIG_UNICODE) 1502 bool needs_ci_ops = dentry->d_sb->s_encoding; 1503 #endif 1504 #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) 1505 if (needs_encrypt_ops && needs_ci_ops) { 1506 d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); 1507 return; 1508 } 1509 #endif 1510 #ifdef CONFIG_FS_ENCRYPTION 1511 if (needs_encrypt_ops) { 1512 d_set_d_op(dentry, &generic_encrypted_dentry_ops); 1513 return; 1514 } 1515 #endif 1516 #if IS_ENABLED(CONFIG_UNICODE) 1517 if (needs_ci_ops) { 1518 d_set_d_op(dentry, &generic_ci_dentry_ops); 1519 return; 1520 } 1521 #endif 1522 } 1523 EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); 1524 1525 /** 1526 * inode_maybe_inc_iversion - increments i_version 1527 * @inode: inode with the i_version that should be updated 1528 * @force: increment the counter even if it's not necessary? 1529 * 1530 * Every time the inode is modified, the i_version field must be seen to have 1531 * changed by any observer. 1532 * 1533 * If "force" is set or the QUERIED flag is set, then ensure that we increment 1534 * the value, and clear the queried flag. 1535 * 1536 * In the common case where neither is set, then we can return "false" without 1537 * updating i_version. 1538 * 1539 * If this function returns false, and no other metadata has changed, then we 1540 * can avoid logging the metadata. 1541 */ 1542 bool inode_maybe_inc_iversion(struct inode *inode, bool force) 1543 { 1544 u64 cur, new; 1545 1546 /* 1547 * The i_version field is not strictly ordered with any other inode 1548 * information, but the legacy inode_inc_iversion code used a spinlock 1549 * to serialize increments. 1550 * 1551 * Here, we add full memory barriers to ensure that any de-facto 1552 * ordering with other info is preserved. 1553 * 1554 * This barrier pairs with the barrier in inode_query_iversion() 1555 */ 1556 smp_mb(); 1557 cur = inode_peek_iversion_raw(inode); 1558 do { 1559 /* If flag is clear then we needn't do anything */ 1560 if (!force && !(cur & I_VERSION_QUERIED)) 1561 return false; 1562 1563 /* Since lowest bit is flag, add 2 to avoid it */ 1564 new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; 1565 } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); 1566 return true; 1567 } 1568 EXPORT_SYMBOL(inode_maybe_inc_iversion); 1569