1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/libfs.c 4 * Library for filesystems writers. 5 */ 6 7 #include <linux/blkdev.h> 8 #include <linux/export.h> 9 #include <linux/pagemap.h> 10 #include <linux/slab.h> 11 #include <linux/cred.h> 12 #include <linux/mount.h> 13 #include <linux/vfs.h> 14 #include <linux/quotaops.h> 15 #include <linux/mutex.h> 16 #include <linux/namei.h> 17 #include <linux/exportfs.h> 18 #include <linux/writeback.h> 19 #include <linux/buffer_head.h> /* sync_mapping_buffers */ 20 #include <linux/fs_context.h> 21 #include <linux/pseudo_fs.h> 22 23 #include <linux/uaccess.h> 24 25 #include "internal.h" 26 27 int simple_getattr(const struct path *path, struct kstat *stat, 28 u32 request_mask, unsigned int query_flags) 29 { 30 struct inode *inode = d_inode(path->dentry); 31 generic_fillattr(inode, stat); 32 stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); 33 return 0; 34 } 35 EXPORT_SYMBOL(simple_getattr); 36 37 int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 38 { 39 buf->f_type = dentry->d_sb->s_magic; 40 buf->f_bsize = PAGE_SIZE; 41 buf->f_namelen = NAME_MAX; 42 return 0; 43 } 44 EXPORT_SYMBOL(simple_statfs); 45 46 /* 47 * Retaining negative dentries for an in-memory filesystem just wastes 48 * memory and lookup time: arrange for them to be deleted immediately. 49 */ 50 int always_delete_dentry(const struct dentry *dentry) 51 { 52 return 1; 53 } 54 EXPORT_SYMBOL(always_delete_dentry); 55 56 const struct dentry_operations simple_dentry_operations = { 57 .d_delete = always_delete_dentry, 58 }; 59 EXPORT_SYMBOL(simple_dentry_operations); 60 61 /* 62 * Lookup the data. This is trivial - if the dentry didn't already 63 * exist, we know it is negative. Set d_op to delete negative dentries. 64 */ 65 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 66 { 67 if (dentry->d_name.len > NAME_MAX) 68 return ERR_PTR(-ENAMETOOLONG); 69 if (!dentry->d_sb->s_d_op) 70 d_set_d_op(dentry, &simple_dentry_operations); 71 d_add(dentry, NULL); 72 return NULL; 73 } 74 EXPORT_SYMBOL(simple_lookup); 75 76 int dcache_dir_open(struct inode *inode, struct file *file) 77 { 78 file->private_data = d_alloc_cursor(file->f_path.dentry); 79 80 return file->private_data ? 0 : -ENOMEM; 81 } 82 EXPORT_SYMBOL(dcache_dir_open); 83 84 int dcache_dir_close(struct inode *inode, struct file *file) 85 { 86 dput(file->private_data); 87 return 0; 88 } 89 EXPORT_SYMBOL(dcache_dir_close); 90 91 /* parent is locked at least shared */ 92 static struct dentry *next_positive(struct dentry *parent, 93 struct list_head *from, 94 int count) 95 { 96 unsigned *seq = &parent->d_inode->i_dir_seq, n; 97 struct dentry *res; 98 struct list_head *p; 99 bool skipped; 100 int i; 101 102 retry: 103 i = count; 104 skipped = false; 105 n = smp_load_acquire(seq) & ~1; 106 res = NULL; 107 rcu_read_lock(); 108 for (p = from->next; p != &parent->d_subdirs; p = p->next) { 109 struct dentry *d = list_entry(p, struct dentry, d_child); 110 if (!simple_positive(d)) { 111 skipped = true; 112 } else if (!--i) { 113 res = d; 114 break; 115 } 116 } 117 rcu_read_unlock(); 118 if (skipped) { 119 smp_rmb(); 120 if (unlikely(*seq != n)) 121 goto retry; 122 } 123 return res; 124 } 125 126 static void move_cursor(struct dentry *cursor, struct list_head *after) 127 { 128 struct dentry *parent = cursor->d_parent; 129 unsigned n, *seq = &parent->d_inode->i_dir_seq; 130 spin_lock(&parent->d_lock); 131 for (;;) { 132 n = *seq; 133 if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) 134 break; 135 cpu_relax(); 136 } 137 __list_del(cursor->d_child.prev, cursor->d_child.next); 138 if (after) 139 list_add(&cursor->d_child, after); 140 else 141 list_add_tail(&cursor->d_child, &parent->d_subdirs); 142 smp_store_release(seq, n + 2); 143 spin_unlock(&parent->d_lock); 144 } 145 146 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) 147 { 148 struct dentry *dentry = file->f_path.dentry; 149 switch (whence) { 150 case 1: 151 offset += file->f_pos; 152 /* fall through */ 153 case 0: 154 if (offset >= 0) 155 break; 156 /* fall through */ 157 default: 158 return -EINVAL; 159 } 160 if (offset != file->f_pos) { 161 file->f_pos = offset; 162 if (file->f_pos >= 2) { 163 struct dentry *cursor = file->private_data; 164 struct dentry *to; 165 loff_t n = file->f_pos - 2; 166 167 inode_lock_shared(dentry->d_inode); 168 to = next_positive(dentry, &dentry->d_subdirs, n); 169 move_cursor(cursor, to ? &to->d_child : NULL); 170 inode_unlock_shared(dentry->d_inode); 171 } 172 } 173 return offset; 174 } 175 EXPORT_SYMBOL(dcache_dir_lseek); 176 177 /* Relationship between i_mode and the DT_xxx types */ 178 static inline unsigned char dt_type(struct inode *inode) 179 { 180 return (inode->i_mode >> 12) & 15; 181 } 182 183 /* 184 * Directory is locked and all positive dentries in it are safe, since 185 * for ramfs-type trees they can't go away without unlink() or rmdir(), 186 * both impossible due to the lock on directory. 187 */ 188 189 int dcache_readdir(struct file *file, struct dir_context *ctx) 190 { 191 struct dentry *dentry = file->f_path.dentry; 192 struct dentry *cursor = file->private_data; 193 struct list_head *p = &cursor->d_child; 194 struct dentry *next; 195 bool moved = false; 196 197 if (!dir_emit_dots(file, ctx)) 198 return 0; 199 200 if (ctx->pos == 2) 201 p = &dentry->d_subdirs; 202 while ((next = next_positive(dentry, p, 1)) != NULL) { 203 if (!dir_emit(ctx, next->d_name.name, next->d_name.len, 204 d_inode(next)->i_ino, dt_type(d_inode(next)))) 205 break; 206 moved = true; 207 p = &next->d_child; 208 ctx->pos++; 209 } 210 if (moved) 211 move_cursor(cursor, p); 212 return 0; 213 } 214 EXPORT_SYMBOL(dcache_readdir); 215 216 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 217 { 218 return -EISDIR; 219 } 220 EXPORT_SYMBOL(generic_read_dir); 221 222 const struct file_operations simple_dir_operations = { 223 .open = dcache_dir_open, 224 .release = dcache_dir_close, 225 .llseek = dcache_dir_lseek, 226 .read = generic_read_dir, 227 .iterate_shared = dcache_readdir, 228 .fsync = noop_fsync, 229 }; 230 EXPORT_SYMBOL(simple_dir_operations); 231 232 const struct inode_operations simple_dir_inode_operations = { 233 .lookup = simple_lookup, 234 }; 235 EXPORT_SYMBOL(simple_dir_inode_operations); 236 237 static const struct super_operations simple_super_operations = { 238 .statfs = simple_statfs, 239 }; 240 241 static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) 242 { 243 struct pseudo_fs_context *ctx = fc->fs_private; 244 struct inode *root; 245 246 s->s_maxbytes = MAX_LFS_FILESIZE; 247 s->s_blocksize = PAGE_SIZE; 248 s->s_blocksize_bits = PAGE_SHIFT; 249 s->s_magic = ctx->magic; 250 s->s_op = ctx->ops ?: &simple_super_operations; 251 s->s_xattr = ctx->xattr; 252 s->s_time_gran = 1; 253 root = new_inode(s); 254 if (!root) 255 return -ENOMEM; 256 257 /* 258 * since this is the first inode, make it number 1. New inodes created 259 * after this must take care not to collide with it (by passing 260 * max_reserved of 1 to iunique). 261 */ 262 root->i_ino = 1; 263 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 264 root->i_atime = root->i_mtime = root->i_ctime = current_time(root); 265 s->s_root = d_make_root(root); 266 if (!s->s_root) 267 return -ENOMEM; 268 s->s_d_op = ctx->dops; 269 return 0; 270 } 271 272 static int pseudo_fs_get_tree(struct fs_context *fc) 273 { 274 return get_tree_nodev(fc, pseudo_fs_fill_super); 275 } 276 277 static void pseudo_fs_free(struct fs_context *fc) 278 { 279 kfree(fc->fs_private); 280 } 281 282 static const struct fs_context_operations pseudo_fs_context_ops = { 283 .free = pseudo_fs_free, 284 .get_tree = pseudo_fs_get_tree, 285 }; 286 287 /* 288 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 289 * will never be mountable) 290 */ 291 struct pseudo_fs_context *init_pseudo(struct fs_context *fc, 292 unsigned long magic) 293 { 294 struct pseudo_fs_context *ctx; 295 296 ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL); 297 if (likely(ctx)) { 298 ctx->magic = magic; 299 fc->fs_private = ctx; 300 fc->ops = &pseudo_fs_context_ops; 301 fc->sb_flags |= SB_NOUSER; 302 fc->global = true; 303 } 304 return ctx; 305 } 306 EXPORT_SYMBOL(init_pseudo); 307 308 int simple_open(struct inode *inode, struct file *file) 309 { 310 if (inode->i_private) 311 file->private_data = inode->i_private; 312 return 0; 313 } 314 EXPORT_SYMBOL(simple_open); 315 316 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 317 { 318 struct inode *inode = d_inode(old_dentry); 319 320 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 321 inc_nlink(inode); 322 ihold(inode); 323 dget(dentry); 324 d_instantiate(dentry, inode); 325 return 0; 326 } 327 EXPORT_SYMBOL(simple_link); 328 329 int simple_empty(struct dentry *dentry) 330 { 331 struct dentry *child; 332 int ret = 0; 333 334 spin_lock(&dentry->d_lock); 335 list_for_each_entry(child, &dentry->d_subdirs, d_child) { 336 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); 337 if (simple_positive(child)) { 338 spin_unlock(&child->d_lock); 339 goto out; 340 } 341 spin_unlock(&child->d_lock); 342 } 343 ret = 1; 344 out: 345 spin_unlock(&dentry->d_lock); 346 return ret; 347 } 348 EXPORT_SYMBOL(simple_empty); 349 350 int simple_unlink(struct inode *dir, struct dentry *dentry) 351 { 352 struct inode *inode = d_inode(dentry); 353 354 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 355 drop_nlink(inode); 356 dput(dentry); 357 return 0; 358 } 359 EXPORT_SYMBOL(simple_unlink); 360 361 int simple_rmdir(struct inode *dir, struct dentry *dentry) 362 { 363 if (!simple_empty(dentry)) 364 return -ENOTEMPTY; 365 366 drop_nlink(d_inode(dentry)); 367 simple_unlink(dir, dentry); 368 drop_nlink(dir); 369 return 0; 370 } 371 EXPORT_SYMBOL(simple_rmdir); 372 373 int simple_rename(struct inode *old_dir, struct dentry *old_dentry, 374 struct inode *new_dir, struct dentry *new_dentry, 375 unsigned int flags) 376 { 377 struct inode *inode = d_inode(old_dentry); 378 int they_are_dirs = d_is_dir(old_dentry); 379 380 if (flags & ~RENAME_NOREPLACE) 381 return -EINVAL; 382 383 if (!simple_empty(new_dentry)) 384 return -ENOTEMPTY; 385 386 if (d_really_is_positive(new_dentry)) { 387 simple_unlink(new_dir, new_dentry); 388 if (they_are_dirs) { 389 drop_nlink(d_inode(new_dentry)); 390 drop_nlink(old_dir); 391 } 392 } else if (they_are_dirs) { 393 drop_nlink(old_dir); 394 inc_nlink(new_dir); 395 } 396 397 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = 398 new_dir->i_mtime = inode->i_ctime = current_time(old_dir); 399 400 return 0; 401 } 402 EXPORT_SYMBOL(simple_rename); 403 404 /** 405 * simple_setattr - setattr for simple filesystem 406 * @dentry: dentry 407 * @iattr: iattr structure 408 * 409 * Returns 0 on success, -error on failure. 410 * 411 * simple_setattr is a simple ->setattr implementation without a proper 412 * implementation of size changes. 413 * 414 * It can either be used for in-memory filesystems or special files 415 * on simple regular filesystems. Anything that needs to change on-disk 416 * or wire state on size changes needs its own setattr method. 417 */ 418 int simple_setattr(struct dentry *dentry, struct iattr *iattr) 419 { 420 struct inode *inode = d_inode(dentry); 421 int error; 422 423 error = setattr_prepare(dentry, iattr); 424 if (error) 425 return error; 426 427 if (iattr->ia_valid & ATTR_SIZE) 428 truncate_setsize(inode, iattr->ia_size); 429 setattr_copy(inode, iattr); 430 mark_inode_dirty(inode); 431 return 0; 432 } 433 EXPORT_SYMBOL(simple_setattr); 434 435 int simple_readpage(struct file *file, struct page *page) 436 { 437 clear_highpage(page); 438 flush_dcache_page(page); 439 SetPageUptodate(page); 440 unlock_page(page); 441 return 0; 442 } 443 EXPORT_SYMBOL(simple_readpage); 444 445 int simple_write_begin(struct file *file, struct address_space *mapping, 446 loff_t pos, unsigned len, unsigned flags, 447 struct page **pagep, void **fsdata) 448 { 449 struct page *page; 450 pgoff_t index; 451 452 index = pos >> PAGE_SHIFT; 453 454 page = grab_cache_page_write_begin(mapping, index, flags); 455 if (!page) 456 return -ENOMEM; 457 458 *pagep = page; 459 460 if (!PageUptodate(page) && (len != PAGE_SIZE)) { 461 unsigned from = pos & (PAGE_SIZE - 1); 462 463 zero_user_segments(page, 0, from, from + len, PAGE_SIZE); 464 } 465 return 0; 466 } 467 EXPORT_SYMBOL(simple_write_begin); 468 469 /** 470 * simple_write_end - .write_end helper for non-block-device FSes 471 * @available: See .write_end of address_space_operations 472 * @file: " 473 * @mapping: " 474 * @pos: " 475 * @len: " 476 * @copied: " 477 * @page: " 478 * @fsdata: " 479 * 480 * simple_write_end does the minimum needed for updating a page after writing is 481 * done. It has the same API signature as the .write_end of 482 * address_space_operations vector. So it can just be set onto .write_end for 483 * FSes that don't need any other processing. i_mutex is assumed to be held. 484 * Block based filesystems should use generic_write_end(). 485 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 486 * is not called, so a filesystem that actually does store data in .write_inode 487 * should extend on what's done here with a call to mark_inode_dirty() in the 488 * case that i_size has changed. 489 * 490 * Use *ONLY* with simple_readpage() 491 */ 492 int simple_write_end(struct file *file, struct address_space *mapping, 493 loff_t pos, unsigned len, unsigned copied, 494 struct page *page, void *fsdata) 495 { 496 struct inode *inode = page->mapping->host; 497 loff_t last_pos = pos + copied; 498 499 /* zero the stale part of the page if we did a short copy */ 500 if (!PageUptodate(page)) { 501 if (copied < len) { 502 unsigned from = pos & (PAGE_SIZE - 1); 503 504 zero_user(page, from + copied, len - copied); 505 } 506 SetPageUptodate(page); 507 } 508 /* 509 * No need to use i_size_read() here, the i_size 510 * cannot change under us because we hold the i_mutex. 511 */ 512 if (last_pos > inode->i_size) 513 i_size_write(inode, last_pos); 514 515 set_page_dirty(page); 516 unlock_page(page); 517 put_page(page); 518 519 return copied; 520 } 521 EXPORT_SYMBOL(simple_write_end); 522 523 /* 524 * the inodes created here are not hashed. If you use iunique to generate 525 * unique inode values later for this filesystem, then you must take care 526 * to pass it an appropriate max_reserved value to avoid collisions. 527 */ 528 int simple_fill_super(struct super_block *s, unsigned long magic, 529 const struct tree_descr *files) 530 { 531 struct inode *inode; 532 struct dentry *root; 533 struct dentry *dentry; 534 int i; 535 536 s->s_blocksize = PAGE_SIZE; 537 s->s_blocksize_bits = PAGE_SHIFT; 538 s->s_magic = magic; 539 s->s_op = &simple_super_operations; 540 s->s_time_gran = 1; 541 542 inode = new_inode(s); 543 if (!inode) 544 return -ENOMEM; 545 /* 546 * because the root inode is 1, the files array must not contain an 547 * entry at index 1 548 */ 549 inode->i_ino = 1; 550 inode->i_mode = S_IFDIR | 0755; 551 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 552 inode->i_op = &simple_dir_inode_operations; 553 inode->i_fop = &simple_dir_operations; 554 set_nlink(inode, 2); 555 root = d_make_root(inode); 556 if (!root) 557 return -ENOMEM; 558 for (i = 0; !files->name || files->name[0]; i++, files++) { 559 if (!files->name) 560 continue; 561 562 /* warn if it tries to conflict with the root inode */ 563 if (unlikely(i == 1)) 564 printk(KERN_WARNING "%s: %s passed in a files array" 565 "with an index of 1!\n", __func__, 566 s->s_type->name); 567 568 dentry = d_alloc_name(root, files->name); 569 if (!dentry) 570 goto out; 571 inode = new_inode(s); 572 if (!inode) { 573 dput(dentry); 574 goto out; 575 } 576 inode->i_mode = S_IFREG | files->mode; 577 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 578 inode->i_fop = files->ops; 579 inode->i_ino = i; 580 d_add(dentry, inode); 581 } 582 s->s_root = root; 583 return 0; 584 out: 585 d_genocide(root); 586 shrink_dcache_parent(root); 587 dput(root); 588 return -ENOMEM; 589 } 590 EXPORT_SYMBOL(simple_fill_super); 591 592 static DEFINE_SPINLOCK(pin_fs_lock); 593 594 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) 595 { 596 struct vfsmount *mnt = NULL; 597 spin_lock(&pin_fs_lock); 598 if (unlikely(!*mount)) { 599 spin_unlock(&pin_fs_lock); 600 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); 601 if (IS_ERR(mnt)) 602 return PTR_ERR(mnt); 603 spin_lock(&pin_fs_lock); 604 if (!*mount) 605 *mount = mnt; 606 } 607 mntget(*mount); 608 ++*count; 609 spin_unlock(&pin_fs_lock); 610 mntput(mnt); 611 return 0; 612 } 613 EXPORT_SYMBOL(simple_pin_fs); 614 615 void simple_release_fs(struct vfsmount **mount, int *count) 616 { 617 struct vfsmount *mnt; 618 spin_lock(&pin_fs_lock); 619 mnt = *mount; 620 if (!--*count) 621 *mount = NULL; 622 spin_unlock(&pin_fs_lock); 623 mntput(mnt); 624 } 625 EXPORT_SYMBOL(simple_release_fs); 626 627 /** 628 * simple_read_from_buffer - copy data from the buffer to user space 629 * @to: the user space buffer to read to 630 * @count: the maximum number of bytes to read 631 * @ppos: the current position in the buffer 632 * @from: the buffer to read from 633 * @available: the size of the buffer 634 * 635 * The simple_read_from_buffer() function reads up to @count bytes from the 636 * buffer @from at offset @ppos into the user space address starting at @to. 637 * 638 * On success, the number of bytes read is returned and the offset @ppos is 639 * advanced by this number, or negative value is returned on error. 640 **/ 641 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 642 const void *from, size_t available) 643 { 644 loff_t pos = *ppos; 645 size_t ret; 646 647 if (pos < 0) 648 return -EINVAL; 649 if (pos >= available || !count) 650 return 0; 651 if (count > available - pos) 652 count = available - pos; 653 ret = copy_to_user(to, from + pos, count); 654 if (ret == count) 655 return -EFAULT; 656 count -= ret; 657 *ppos = pos + count; 658 return count; 659 } 660 EXPORT_SYMBOL(simple_read_from_buffer); 661 662 /** 663 * simple_write_to_buffer - copy data from user space to the buffer 664 * @to: the buffer to write to 665 * @available: the size of the buffer 666 * @ppos: the current position in the buffer 667 * @from: the user space buffer to read from 668 * @count: the maximum number of bytes to read 669 * 670 * The simple_write_to_buffer() function reads up to @count bytes from the user 671 * space address starting at @from into the buffer @to at offset @ppos. 672 * 673 * On success, the number of bytes written is returned and the offset @ppos is 674 * advanced by this number, or negative value is returned on error. 675 **/ 676 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 677 const void __user *from, size_t count) 678 { 679 loff_t pos = *ppos; 680 size_t res; 681 682 if (pos < 0) 683 return -EINVAL; 684 if (pos >= available || !count) 685 return 0; 686 if (count > available - pos) 687 count = available - pos; 688 res = copy_from_user(to + pos, from, count); 689 if (res == count) 690 return -EFAULT; 691 count -= res; 692 *ppos = pos + count; 693 return count; 694 } 695 EXPORT_SYMBOL(simple_write_to_buffer); 696 697 /** 698 * memory_read_from_buffer - copy data from the buffer 699 * @to: the kernel space buffer to read to 700 * @count: the maximum number of bytes to read 701 * @ppos: the current position in the buffer 702 * @from: the buffer to read from 703 * @available: the size of the buffer 704 * 705 * The memory_read_from_buffer() function reads up to @count bytes from the 706 * buffer @from at offset @ppos into the kernel space address starting at @to. 707 * 708 * On success, the number of bytes read is returned and the offset @ppos is 709 * advanced by this number, or negative value is returned on error. 710 **/ 711 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 712 const void *from, size_t available) 713 { 714 loff_t pos = *ppos; 715 716 if (pos < 0) 717 return -EINVAL; 718 if (pos >= available) 719 return 0; 720 if (count > available - pos) 721 count = available - pos; 722 memcpy(to, from + pos, count); 723 *ppos = pos + count; 724 725 return count; 726 } 727 EXPORT_SYMBOL(memory_read_from_buffer); 728 729 /* 730 * Transaction based IO. 731 * The file expects a single write which triggers the transaction, and then 732 * possibly a read which collects the result - which is stored in a 733 * file-local buffer. 734 */ 735 736 void simple_transaction_set(struct file *file, size_t n) 737 { 738 struct simple_transaction_argresp *ar = file->private_data; 739 740 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); 741 742 /* 743 * The barrier ensures that ar->size will really remain zero until 744 * ar->data is ready for reading. 745 */ 746 smp_mb(); 747 ar->size = n; 748 } 749 EXPORT_SYMBOL(simple_transaction_set); 750 751 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 752 { 753 struct simple_transaction_argresp *ar; 754 static DEFINE_SPINLOCK(simple_transaction_lock); 755 756 if (size > SIMPLE_TRANSACTION_LIMIT - 1) 757 return ERR_PTR(-EFBIG); 758 759 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); 760 if (!ar) 761 return ERR_PTR(-ENOMEM); 762 763 spin_lock(&simple_transaction_lock); 764 765 /* only one write allowed per open */ 766 if (file->private_data) { 767 spin_unlock(&simple_transaction_lock); 768 free_page((unsigned long)ar); 769 return ERR_PTR(-EBUSY); 770 } 771 772 file->private_data = ar; 773 774 spin_unlock(&simple_transaction_lock); 775 776 if (copy_from_user(ar->data, buf, size)) 777 return ERR_PTR(-EFAULT); 778 779 return ar->data; 780 } 781 EXPORT_SYMBOL(simple_transaction_get); 782 783 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 784 { 785 struct simple_transaction_argresp *ar = file->private_data; 786 787 if (!ar) 788 return 0; 789 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 790 } 791 EXPORT_SYMBOL(simple_transaction_read); 792 793 int simple_transaction_release(struct inode *inode, struct file *file) 794 { 795 free_page((unsigned long)file->private_data); 796 return 0; 797 } 798 EXPORT_SYMBOL(simple_transaction_release); 799 800 /* Simple attribute files */ 801 802 struct simple_attr { 803 int (*get)(void *, u64 *); 804 int (*set)(void *, u64); 805 char get_buf[24]; /* enough to store a u64 and "\n\0" */ 806 char set_buf[24]; 807 void *data; 808 const char *fmt; /* format for read operation */ 809 struct mutex mutex; /* protects access to these buffers */ 810 }; 811 812 /* simple_attr_open is called by an actual attribute open file operation 813 * to set the attribute specific access operations. */ 814 int simple_attr_open(struct inode *inode, struct file *file, 815 int (*get)(void *, u64 *), int (*set)(void *, u64), 816 const char *fmt) 817 { 818 struct simple_attr *attr; 819 820 attr = kmalloc(sizeof(*attr), GFP_KERNEL); 821 if (!attr) 822 return -ENOMEM; 823 824 attr->get = get; 825 attr->set = set; 826 attr->data = inode->i_private; 827 attr->fmt = fmt; 828 mutex_init(&attr->mutex); 829 830 file->private_data = attr; 831 832 return nonseekable_open(inode, file); 833 } 834 EXPORT_SYMBOL_GPL(simple_attr_open); 835 836 int simple_attr_release(struct inode *inode, struct file *file) 837 { 838 kfree(file->private_data); 839 return 0; 840 } 841 EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ 842 843 /* read from the buffer that is filled with the get function */ 844 ssize_t simple_attr_read(struct file *file, char __user *buf, 845 size_t len, loff_t *ppos) 846 { 847 struct simple_attr *attr; 848 size_t size; 849 ssize_t ret; 850 851 attr = file->private_data; 852 853 if (!attr->get) 854 return -EACCES; 855 856 ret = mutex_lock_interruptible(&attr->mutex); 857 if (ret) 858 return ret; 859 860 if (*ppos) { /* continued read */ 861 size = strlen(attr->get_buf); 862 } else { /* first read */ 863 u64 val; 864 ret = attr->get(attr->data, &val); 865 if (ret) 866 goto out; 867 868 size = scnprintf(attr->get_buf, sizeof(attr->get_buf), 869 attr->fmt, (unsigned long long)val); 870 } 871 872 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); 873 out: 874 mutex_unlock(&attr->mutex); 875 return ret; 876 } 877 EXPORT_SYMBOL_GPL(simple_attr_read); 878 879 /* interpret the buffer as a number to call the set function with */ 880 ssize_t simple_attr_write(struct file *file, const char __user *buf, 881 size_t len, loff_t *ppos) 882 { 883 struct simple_attr *attr; 884 u64 val; 885 size_t size; 886 ssize_t ret; 887 888 attr = file->private_data; 889 if (!attr->set) 890 return -EACCES; 891 892 ret = mutex_lock_interruptible(&attr->mutex); 893 if (ret) 894 return ret; 895 896 ret = -EFAULT; 897 size = min(sizeof(attr->set_buf) - 1, len); 898 if (copy_from_user(attr->set_buf, buf, size)) 899 goto out; 900 901 attr->set_buf[size] = '\0'; 902 val = simple_strtoll(attr->set_buf, NULL, 0); 903 ret = attr->set(attr->data, val); 904 if (ret == 0) 905 ret = len; /* on success, claim we got the whole input */ 906 out: 907 mutex_unlock(&attr->mutex); 908 return ret; 909 } 910 EXPORT_SYMBOL_GPL(simple_attr_write); 911 912 /** 913 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 914 * @sb: filesystem to do the file handle conversion on 915 * @fid: file handle to convert 916 * @fh_len: length of the file handle in bytes 917 * @fh_type: type of file handle 918 * @get_inode: filesystem callback to retrieve inode 919 * 920 * This function decodes @fid as long as it has one of the well-known 921 * Linux filehandle types and calls @get_inode on it to retrieve the 922 * inode for the object specified in the file handle. 923 */ 924 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, 925 int fh_len, int fh_type, struct inode *(*get_inode) 926 (struct super_block *sb, u64 ino, u32 gen)) 927 { 928 struct inode *inode = NULL; 929 930 if (fh_len < 2) 931 return NULL; 932 933 switch (fh_type) { 934 case FILEID_INO32_GEN: 935 case FILEID_INO32_GEN_PARENT: 936 inode = get_inode(sb, fid->i32.ino, fid->i32.gen); 937 break; 938 } 939 940 return d_obtain_alias(inode); 941 } 942 EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 943 944 /** 945 * generic_fh_to_parent - generic helper for the fh_to_parent export operation 946 * @sb: filesystem to do the file handle conversion on 947 * @fid: file handle to convert 948 * @fh_len: length of the file handle in bytes 949 * @fh_type: type of file handle 950 * @get_inode: filesystem callback to retrieve inode 951 * 952 * This function decodes @fid as long as it has one of the well-known 953 * Linux filehandle types and calls @get_inode on it to retrieve the 954 * inode for the _parent_ object specified in the file handle if it 955 * is specified in the file handle, or NULL otherwise. 956 */ 957 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, 958 int fh_len, int fh_type, struct inode *(*get_inode) 959 (struct super_block *sb, u64 ino, u32 gen)) 960 { 961 struct inode *inode = NULL; 962 963 if (fh_len <= 2) 964 return NULL; 965 966 switch (fh_type) { 967 case FILEID_INO32_GEN_PARENT: 968 inode = get_inode(sb, fid->i32.parent_ino, 969 (fh_len > 3 ? fid->i32.parent_gen : 0)); 970 break; 971 } 972 973 return d_obtain_alias(inode); 974 } 975 EXPORT_SYMBOL_GPL(generic_fh_to_parent); 976 977 /** 978 * __generic_file_fsync - generic fsync implementation for simple filesystems 979 * 980 * @file: file to synchronize 981 * @start: start offset in bytes 982 * @end: end offset in bytes (inclusive) 983 * @datasync: only synchronize essential metadata if true 984 * 985 * This is a generic implementation of the fsync method for simple 986 * filesystems which track all non-inode metadata in the buffers list 987 * hanging off the address_space structure. 988 */ 989 int __generic_file_fsync(struct file *file, loff_t start, loff_t end, 990 int datasync) 991 { 992 struct inode *inode = file->f_mapping->host; 993 int err; 994 int ret; 995 996 err = file_write_and_wait_range(file, start, end); 997 if (err) 998 return err; 999 1000 inode_lock(inode); 1001 ret = sync_mapping_buffers(inode->i_mapping); 1002 if (!(inode->i_state & I_DIRTY_ALL)) 1003 goto out; 1004 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 1005 goto out; 1006 1007 err = sync_inode_metadata(inode, 1); 1008 if (ret == 0) 1009 ret = err; 1010 1011 out: 1012 inode_unlock(inode); 1013 /* check and advance again to catch errors after syncing out buffers */ 1014 err = file_check_and_advance_wb_err(file); 1015 if (ret == 0) 1016 ret = err; 1017 return ret; 1018 } 1019 EXPORT_SYMBOL(__generic_file_fsync); 1020 1021 /** 1022 * generic_file_fsync - generic fsync implementation for simple filesystems 1023 * with flush 1024 * @file: file to synchronize 1025 * @start: start offset in bytes 1026 * @end: end offset in bytes (inclusive) 1027 * @datasync: only synchronize essential metadata if true 1028 * 1029 */ 1030 1031 int generic_file_fsync(struct file *file, loff_t start, loff_t end, 1032 int datasync) 1033 { 1034 struct inode *inode = file->f_mapping->host; 1035 int err; 1036 1037 err = __generic_file_fsync(file, start, end, datasync); 1038 if (err) 1039 return err; 1040 return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 1041 } 1042 EXPORT_SYMBOL(generic_file_fsync); 1043 1044 /** 1045 * generic_check_addressable - Check addressability of file system 1046 * @blocksize_bits: log of file system block size 1047 * @num_blocks: number of blocks in file system 1048 * 1049 * Determine whether a file system with @num_blocks blocks (and a 1050 * block size of 2**@blocksize_bits) is addressable by the sector_t 1051 * and page cache of the system. Return 0 if so and -EFBIG otherwise. 1052 */ 1053 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 1054 { 1055 u64 last_fs_block = num_blocks - 1; 1056 u64 last_fs_page = 1057 last_fs_block >> (PAGE_SHIFT - blocksize_bits); 1058 1059 if (unlikely(num_blocks == 0)) 1060 return 0; 1061 1062 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) 1063 return -EINVAL; 1064 1065 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || 1066 (last_fs_page > (pgoff_t)(~0ULL))) { 1067 return -EFBIG; 1068 } 1069 return 0; 1070 } 1071 EXPORT_SYMBOL(generic_check_addressable); 1072 1073 /* 1074 * No-op implementation of ->fsync for in-memory filesystems. 1075 */ 1076 int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1077 { 1078 return 0; 1079 } 1080 EXPORT_SYMBOL(noop_fsync); 1081 1082 int noop_set_page_dirty(struct page *page) 1083 { 1084 /* 1085 * Unlike __set_page_dirty_no_writeback that handles dirty page 1086 * tracking in the page object, dax does all dirty tracking in 1087 * the inode address_space in response to mkwrite faults. In the 1088 * dax case we only need to worry about potentially dirty CPU 1089 * caches, not dirty page cache pages to write back. 1090 * 1091 * This callback is defined to prevent fallback to 1092 * __set_page_dirty_buffers() in set_page_dirty(). 1093 */ 1094 return 0; 1095 } 1096 EXPORT_SYMBOL_GPL(noop_set_page_dirty); 1097 1098 void noop_invalidatepage(struct page *page, unsigned int offset, 1099 unsigned int length) 1100 { 1101 /* 1102 * There is no page cache to invalidate in the dax case, however 1103 * we need this callback defined to prevent falling back to 1104 * block_invalidatepage() in do_invalidatepage(). 1105 */ 1106 } 1107 EXPORT_SYMBOL_GPL(noop_invalidatepage); 1108 1109 ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 1110 { 1111 /* 1112 * iomap based filesystems support direct I/O without need for 1113 * this callback. However, it still needs to be set in 1114 * inode->a_ops so that open/fcntl know that direct I/O is 1115 * generally supported. 1116 */ 1117 return -EINVAL; 1118 } 1119 EXPORT_SYMBOL_GPL(noop_direct_IO); 1120 1121 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ 1122 void kfree_link(void *p) 1123 { 1124 kfree(p); 1125 } 1126 EXPORT_SYMBOL(kfree_link); 1127 1128 /* 1129 * nop .set_page_dirty method so that people can use .page_mkwrite on 1130 * anon inodes. 1131 */ 1132 static int anon_set_page_dirty(struct page *page) 1133 { 1134 return 0; 1135 }; 1136 1137 /* 1138 * A single inode exists for all anon_inode files. Contrary to pipes, 1139 * anon_inode inodes have no associated per-instance data, so we need 1140 * only allocate one of them. 1141 */ 1142 struct inode *alloc_anon_inode(struct super_block *s) 1143 { 1144 static const struct address_space_operations anon_aops = { 1145 .set_page_dirty = anon_set_page_dirty, 1146 }; 1147 struct inode *inode = new_inode_pseudo(s); 1148 1149 if (!inode) 1150 return ERR_PTR(-ENOMEM); 1151 1152 inode->i_ino = get_next_ino(); 1153 inode->i_mapping->a_ops = &anon_aops; 1154 1155 /* 1156 * Mark the inode dirty from the very beginning, 1157 * that way it will never be moved to the dirty 1158 * list because mark_inode_dirty() will think 1159 * that it already _is_ on the dirty list. 1160 */ 1161 inode->i_state = I_DIRTY; 1162 inode->i_mode = S_IRUSR | S_IWUSR; 1163 inode->i_uid = current_fsuid(); 1164 inode->i_gid = current_fsgid(); 1165 inode->i_flags |= S_PRIVATE; 1166 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 1167 return inode; 1168 } 1169 EXPORT_SYMBOL(alloc_anon_inode); 1170 1171 /** 1172 * simple_nosetlease - generic helper for prohibiting leases 1173 * @filp: file pointer 1174 * @arg: type of lease to obtain 1175 * @flp: new lease supplied for insertion 1176 * @priv: private data for lm_setup operation 1177 * 1178 * Generic helper for filesystems that do not wish to allow leases to be set. 1179 * All arguments are ignored and it just returns -EINVAL. 1180 */ 1181 int 1182 simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, 1183 void **priv) 1184 { 1185 return -EINVAL; 1186 } 1187 EXPORT_SYMBOL(simple_nosetlease); 1188 1189 /** 1190 * simple_get_link - generic helper to get the target of "fast" symlinks 1191 * @dentry: not used here 1192 * @inode: the symlink inode 1193 * @done: not used here 1194 * 1195 * Generic helper for filesystems to use for symlink inodes where a pointer to 1196 * the symlink target is stored in ->i_link. NOTE: this isn't normally called, 1197 * since as an optimization the path lookup code uses any non-NULL ->i_link 1198 * directly, without calling ->get_link(). But ->get_link() still must be set, 1199 * to mark the inode_operations as being for a symlink. 1200 * 1201 * Return: the symlink target 1202 */ 1203 const char *simple_get_link(struct dentry *dentry, struct inode *inode, 1204 struct delayed_call *done) 1205 { 1206 return inode->i_link; 1207 } 1208 EXPORT_SYMBOL(simple_get_link); 1209 1210 const struct inode_operations simple_symlink_inode_operations = { 1211 .get_link = simple_get_link, 1212 }; 1213 EXPORT_SYMBOL(simple_symlink_inode_operations); 1214 1215 /* 1216 * Operations for a permanently empty directory. 1217 */ 1218 static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1219 { 1220 return ERR_PTR(-ENOENT); 1221 } 1222 1223 static int empty_dir_getattr(const struct path *path, struct kstat *stat, 1224 u32 request_mask, unsigned int query_flags) 1225 { 1226 struct inode *inode = d_inode(path->dentry); 1227 generic_fillattr(inode, stat); 1228 return 0; 1229 } 1230 1231 static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) 1232 { 1233 return -EPERM; 1234 } 1235 1236 static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) 1237 { 1238 return -EOPNOTSUPP; 1239 } 1240 1241 static const struct inode_operations empty_dir_inode_operations = { 1242 .lookup = empty_dir_lookup, 1243 .permission = generic_permission, 1244 .setattr = empty_dir_setattr, 1245 .getattr = empty_dir_getattr, 1246 .listxattr = empty_dir_listxattr, 1247 }; 1248 1249 static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) 1250 { 1251 /* An empty directory has two entries . and .. at offsets 0 and 1 */ 1252 return generic_file_llseek_size(file, offset, whence, 2, 2); 1253 } 1254 1255 static int empty_dir_readdir(struct file *file, struct dir_context *ctx) 1256 { 1257 dir_emit_dots(file, ctx); 1258 return 0; 1259 } 1260 1261 static const struct file_operations empty_dir_operations = { 1262 .llseek = empty_dir_llseek, 1263 .read = generic_read_dir, 1264 .iterate_shared = empty_dir_readdir, 1265 .fsync = noop_fsync, 1266 }; 1267 1268 1269 void make_empty_dir_inode(struct inode *inode) 1270 { 1271 set_nlink(inode, 2); 1272 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1273 inode->i_uid = GLOBAL_ROOT_UID; 1274 inode->i_gid = GLOBAL_ROOT_GID; 1275 inode->i_rdev = 0; 1276 inode->i_size = 0; 1277 inode->i_blkbits = PAGE_SHIFT; 1278 inode->i_blocks = 0; 1279 1280 inode->i_op = &empty_dir_inode_operations; 1281 inode->i_opflags &= ~IOP_XATTR; 1282 inode->i_fop = &empty_dir_operations; 1283 } 1284 1285 bool is_empty_dir_inode(struct inode *inode) 1286 { 1287 return (inode->i_fop == &empty_dir_operations) && 1288 (inode->i_op == &empty_dir_inode_operations); 1289 } 1290