1 /* 2 * fs/libfs.c 3 * Library for filesystems writers. 4 */ 5 6 #include <linux/blkdev.h> 7 #include <linux/export.h> 8 #include <linux/pagemap.h> 9 #include <linux/slab.h> 10 #include <linux/mount.h> 11 #include <linux/vfs.h> 12 #include <linux/quotaops.h> 13 #include <linux/mutex.h> 14 #include <linux/namei.h> 15 #include <linux/exportfs.h> 16 #include <linux/writeback.h> 17 #include <linux/buffer_head.h> /* sync_mapping_buffers */ 18 19 #include <linux/uaccess.h> 20 21 #include "internal.h" 22 23 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, 24 struct kstat *stat) 25 { 26 struct inode *inode = d_inode(dentry); 27 generic_fillattr(inode, stat); 28 stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); 29 return 0; 30 } 31 EXPORT_SYMBOL(simple_getattr); 32 33 int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 34 { 35 buf->f_type = dentry->d_sb->s_magic; 36 buf->f_bsize = PAGE_SIZE; 37 buf->f_namelen = NAME_MAX; 38 return 0; 39 } 40 EXPORT_SYMBOL(simple_statfs); 41 42 /* 43 * Retaining negative dentries for an in-memory filesystem just wastes 44 * memory and lookup time: arrange for them to be deleted immediately. 45 */ 46 int always_delete_dentry(const struct dentry *dentry) 47 { 48 return 1; 49 } 50 EXPORT_SYMBOL(always_delete_dentry); 51 52 const struct dentry_operations simple_dentry_operations = { 53 .d_delete = always_delete_dentry, 54 }; 55 EXPORT_SYMBOL(simple_dentry_operations); 56 57 /* 58 * Lookup the data. This is trivial - if the dentry didn't already 59 * exist, we know it is negative. Set d_op to delete negative dentries. 60 */ 61 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 62 { 63 if (dentry->d_name.len > NAME_MAX) 64 return ERR_PTR(-ENAMETOOLONG); 65 if (!dentry->d_sb->s_d_op) 66 d_set_d_op(dentry, &simple_dentry_operations); 67 d_add(dentry, NULL); 68 return NULL; 69 } 70 EXPORT_SYMBOL(simple_lookup); 71 72 int dcache_dir_open(struct inode *inode, struct file *file) 73 { 74 file->private_data = d_alloc_cursor(file->f_path.dentry); 75 76 return file->private_data ? 0 : -ENOMEM; 77 } 78 EXPORT_SYMBOL(dcache_dir_open); 79 80 int dcache_dir_close(struct inode *inode, struct file *file) 81 { 82 dput(file->private_data); 83 return 0; 84 } 85 EXPORT_SYMBOL(dcache_dir_close); 86 87 /* parent is locked at least shared */ 88 static struct dentry *next_positive(struct dentry *parent, 89 struct list_head *from, 90 int count) 91 { 92 unsigned *seq = &parent->d_inode->i_dir_seq, n; 93 struct dentry *res; 94 struct list_head *p; 95 bool skipped; 96 int i; 97 98 retry: 99 i = count; 100 skipped = false; 101 n = smp_load_acquire(seq) & ~1; 102 res = NULL; 103 rcu_read_lock(); 104 for (p = from->next; p != &parent->d_subdirs; p = p->next) { 105 struct dentry *d = list_entry(p, struct dentry, d_child); 106 if (!simple_positive(d)) { 107 skipped = true; 108 } else if (!--i) { 109 res = d; 110 break; 111 } 112 } 113 rcu_read_unlock(); 114 if (skipped) { 115 smp_rmb(); 116 if (unlikely(*seq != n)) 117 goto retry; 118 } 119 return res; 120 } 121 122 static void move_cursor(struct dentry *cursor, struct list_head *after) 123 { 124 struct dentry *parent = cursor->d_parent; 125 unsigned n, *seq = &parent->d_inode->i_dir_seq; 126 spin_lock(&parent->d_lock); 127 for (;;) { 128 n = *seq; 129 if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) 130 break; 131 cpu_relax(); 132 } 133 __list_del(cursor->d_child.prev, cursor->d_child.next); 134 if (after) 135 list_add(&cursor->d_child, after); 136 else 137 list_add_tail(&cursor->d_child, &parent->d_subdirs); 138 smp_store_release(seq, n + 2); 139 spin_unlock(&parent->d_lock); 140 } 141 142 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) 143 { 144 struct dentry *dentry = file->f_path.dentry; 145 switch (whence) { 146 case 1: 147 offset += file->f_pos; 148 case 0: 149 if (offset >= 0) 150 break; 151 default: 152 return -EINVAL; 153 } 154 if (offset != file->f_pos) { 155 file->f_pos = offset; 156 if (file->f_pos >= 2) { 157 struct dentry *cursor = file->private_data; 158 struct dentry *to; 159 loff_t n = file->f_pos - 2; 160 161 inode_lock_shared(dentry->d_inode); 162 to = next_positive(dentry, &dentry->d_subdirs, n); 163 move_cursor(cursor, to ? &to->d_child : NULL); 164 inode_unlock_shared(dentry->d_inode); 165 } 166 } 167 return offset; 168 } 169 EXPORT_SYMBOL(dcache_dir_lseek); 170 171 /* Relationship between i_mode and the DT_xxx types */ 172 static inline unsigned char dt_type(struct inode *inode) 173 { 174 return (inode->i_mode >> 12) & 15; 175 } 176 177 /* 178 * Directory is locked and all positive dentries in it are safe, since 179 * for ramfs-type trees they can't go away without unlink() or rmdir(), 180 * both impossible due to the lock on directory. 181 */ 182 183 int dcache_readdir(struct file *file, struct dir_context *ctx) 184 { 185 struct dentry *dentry = file->f_path.dentry; 186 struct dentry *cursor = file->private_data; 187 struct list_head *p = &cursor->d_child; 188 struct dentry *next; 189 bool moved = false; 190 191 if (!dir_emit_dots(file, ctx)) 192 return 0; 193 194 if (ctx->pos == 2) 195 p = &dentry->d_subdirs; 196 while ((next = next_positive(dentry, p, 1)) != NULL) { 197 if (!dir_emit(ctx, next->d_name.name, next->d_name.len, 198 d_inode(next)->i_ino, dt_type(d_inode(next)))) 199 break; 200 moved = true; 201 p = &next->d_child; 202 ctx->pos++; 203 } 204 if (moved) 205 move_cursor(cursor, p); 206 return 0; 207 } 208 EXPORT_SYMBOL(dcache_readdir); 209 210 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 211 { 212 return -EISDIR; 213 } 214 EXPORT_SYMBOL(generic_read_dir); 215 216 const struct file_operations simple_dir_operations = { 217 .open = dcache_dir_open, 218 .release = dcache_dir_close, 219 .llseek = dcache_dir_lseek, 220 .read = generic_read_dir, 221 .iterate_shared = dcache_readdir, 222 .fsync = noop_fsync, 223 }; 224 EXPORT_SYMBOL(simple_dir_operations); 225 226 const struct inode_operations simple_dir_inode_operations = { 227 .lookup = simple_lookup, 228 }; 229 EXPORT_SYMBOL(simple_dir_inode_operations); 230 231 static const struct super_operations simple_super_operations = { 232 .statfs = simple_statfs, 233 }; 234 235 /* 236 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 237 * will never be mountable) 238 */ 239 struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, 240 const struct super_operations *ops, const struct xattr_handler **xattr, 241 const struct dentry_operations *dops, unsigned long magic) 242 { 243 struct super_block *s; 244 struct dentry *dentry; 245 struct inode *root; 246 struct qstr d_name = QSTR_INIT(name, strlen(name)); 247 248 s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, 249 &init_user_ns, NULL); 250 if (IS_ERR(s)) 251 return ERR_CAST(s); 252 253 s->s_maxbytes = MAX_LFS_FILESIZE; 254 s->s_blocksize = PAGE_SIZE; 255 s->s_blocksize_bits = PAGE_SHIFT; 256 s->s_magic = magic; 257 s->s_op = ops ? ops : &simple_super_operations; 258 s->s_xattr = xattr; 259 s->s_time_gran = 1; 260 root = new_inode(s); 261 if (!root) 262 goto Enomem; 263 /* 264 * since this is the first inode, make it number 1. New inodes created 265 * after this must take care not to collide with it (by passing 266 * max_reserved of 1 to iunique). 267 */ 268 root->i_ino = 1; 269 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 270 root->i_atime = root->i_mtime = root->i_ctime = current_time(root); 271 dentry = __d_alloc(s, &d_name); 272 if (!dentry) { 273 iput(root); 274 goto Enomem; 275 } 276 d_instantiate(dentry, root); 277 s->s_root = dentry; 278 s->s_d_op = dops; 279 s->s_flags |= MS_ACTIVE; 280 return dget(s->s_root); 281 282 Enomem: 283 deactivate_locked_super(s); 284 return ERR_PTR(-ENOMEM); 285 } 286 EXPORT_SYMBOL(mount_pseudo_xattr); 287 288 int simple_open(struct inode *inode, struct file *file) 289 { 290 if (inode->i_private) 291 file->private_data = inode->i_private; 292 return 0; 293 } 294 EXPORT_SYMBOL(simple_open); 295 296 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 297 { 298 struct inode *inode = d_inode(old_dentry); 299 300 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 301 inc_nlink(inode); 302 ihold(inode); 303 dget(dentry); 304 d_instantiate(dentry, inode); 305 return 0; 306 } 307 EXPORT_SYMBOL(simple_link); 308 309 int simple_empty(struct dentry *dentry) 310 { 311 struct dentry *child; 312 int ret = 0; 313 314 spin_lock(&dentry->d_lock); 315 list_for_each_entry(child, &dentry->d_subdirs, d_child) { 316 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); 317 if (simple_positive(child)) { 318 spin_unlock(&child->d_lock); 319 goto out; 320 } 321 spin_unlock(&child->d_lock); 322 } 323 ret = 1; 324 out: 325 spin_unlock(&dentry->d_lock); 326 return ret; 327 } 328 EXPORT_SYMBOL(simple_empty); 329 330 int simple_unlink(struct inode *dir, struct dentry *dentry) 331 { 332 struct inode *inode = d_inode(dentry); 333 334 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 335 drop_nlink(inode); 336 dput(dentry); 337 return 0; 338 } 339 EXPORT_SYMBOL(simple_unlink); 340 341 int simple_rmdir(struct inode *dir, struct dentry *dentry) 342 { 343 if (!simple_empty(dentry)) 344 return -ENOTEMPTY; 345 346 drop_nlink(d_inode(dentry)); 347 simple_unlink(dir, dentry); 348 drop_nlink(dir); 349 return 0; 350 } 351 EXPORT_SYMBOL(simple_rmdir); 352 353 int simple_rename(struct inode *old_dir, struct dentry *old_dentry, 354 struct inode *new_dir, struct dentry *new_dentry, 355 unsigned int flags) 356 { 357 struct inode *inode = d_inode(old_dentry); 358 int they_are_dirs = d_is_dir(old_dentry); 359 360 if (flags & ~RENAME_NOREPLACE) 361 return -EINVAL; 362 363 if (!simple_empty(new_dentry)) 364 return -ENOTEMPTY; 365 366 if (d_really_is_positive(new_dentry)) { 367 simple_unlink(new_dir, new_dentry); 368 if (they_are_dirs) { 369 drop_nlink(d_inode(new_dentry)); 370 drop_nlink(old_dir); 371 } 372 } else if (they_are_dirs) { 373 drop_nlink(old_dir); 374 inc_nlink(new_dir); 375 } 376 377 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = 378 new_dir->i_mtime = inode->i_ctime = current_time(old_dir); 379 380 return 0; 381 } 382 EXPORT_SYMBOL(simple_rename); 383 384 /** 385 * simple_setattr - setattr for simple filesystem 386 * @dentry: dentry 387 * @iattr: iattr structure 388 * 389 * Returns 0 on success, -error on failure. 390 * 391 * simple_setattr is a simple ->setattr implementation without a proper 392 * implementation of size changes. 393 * 394 * It can either be used for in-memory filesystems or special files 395 * on simple regular filesystems. Anything that needs to change on-disk 396 * or wire state on size changes needs its own setattr method. 397 */ 398 int simple_setattr(struct dentry *dentry, struct iattr *iattr) 399 { 400 struct inode *inode = d_inode(dentry); 401 int error; 402 403 error = setattr_prepare(dentry, iattr); 404 if (error) 405 return error; 406 407 if (iattr->ia_valid & ATTR_SIZE) 408 truncate_setsize(inode, iattr->ia_size); 409 setattr_copy(inode, iattr); 410 mark_inode_dirty(inode); 411 return 0; 412 } 413 EXPORT_SYMBOL(simple_setattr); 414 415 int simple_readpage(struct file *file, struct page *page) 416 { 417 clear_highpage(page); 418 flush_dcache_page(page); 419 SetPageUptodate(page); 420 unlock_page(page); 421 return 0; 422 } 423 EXPORT_SYMBOL(simple_readpage); 424 425 int simple_write_begin(struct file *file, struct address_space *mapping, 426 loff_t pos, unsigned len, unsigned flags, 427 struct page **pagep, void **fsdata) 428 { 429 struct page *page; 430 pgoff_t index; 431 432 index = pos >> PAGE_SHIFT; 433 434 page = grab_cache_page_write_begin(mapping, index, flags); 435 if (!page) 436 return -ENOMEM; 437 438 *pagep = page; 439 440 if (!PageUptodate(page) && (len != PAGE_SIZE)) { 441 unsigned from = pos & (PAGE_SIZE - 1); 442 443 zero_user_segments(page, 0, from, from + len, PAGE_SIZE); 444 } 445 return 0; 446 } 447 EXPORT_SYMBOL(simple_write_begin); 448 449 /** 450 * simple_write_end - .write_end helper for non-block-device FSes 451 * @available: See .write_end of address_space_operations 452 * @file: " 453 * @mapping: " 454 * @pos: " 455 * @len: " 456 * @copied: " 457 * @page: " 458 * @fsdata: " 459 * 460 * simple_write_end does the minimum needed for updating a page after writing is 461 * done. It has the same API signature as the .write_end of 462 * address_space_operations vector. So it can just be set onto .write_end for 463 * FSes that don't need any other processing. i_mutex is assumed to be held. 464 * Block based filesystems should use generic_write_end(). 465 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 466 * is not called, so a filesystem that actually does store data in .write_inode 467 * should extend on what's done here with a call to mark_inode_dirty() in the 468 * case that i_size has changed. 469 * 470 * Use *ONLY* with simple_readpage() 471 */ 472 int simple_write_end(struct file *file, struct address_space *mapping, 473 loff_t pos, unsigned len, unsigned copied, 474 struct page *page, void *fsdata) 475 { 476 struct inode *inode = page->mapping->host; 477 loff_t last_pos = pos + copied; 478 479 /* zero the stale part of the page if we did a short copy */ 480 if (!PageUptodate(page)) { 481 if (copied < len) { 482 unsigned from = pos & (PAGE_SIZE - 1); 483 484 zero_user(page, from + copied, len - copied); 485 } 486 SetPageUptodate(page); 487 } 488 /* 489 * No need to use i_size_read() here, the i_size 490 * cannot change under us because we hold the i_mutex. 491 */ 492 if (last_pos > inode->i_size) 493 i_size_write(inode, last_pos); 494 495 set_page_dirty(page); 496 unlock_page(page); 497 put_page(page); 498 499 return copied; 500 } 501 EXPORT_SYMBOL(simple_write_end); 502 503 /* 504 * the inodes created here are not hashed. If you use iunique to generate 505 * unique inode values later for this filesystem, then you must take care 506 * to pass it an appropriate max_reserved value to avoid collisions. 507 */ 508 int simple_fill_super(struct super_block *s, unsigned long magic, 509 struct tree_descr *files) 510 { 511 struct inode *inode; 512 struct dentry *root; 513 struct dentry *dentry; 514 int i; 515 516 s->s_blocksize = PAGE_SIZE; 517 s->s_blocksize_bits = PAGE_SHIFT; 518 s->s_magic = magic; 519 s->s_op = &simple_super_operations; 520 s->s_time_gran = 1; 521 522 inode = new_inode(s); 523 if (!inode) 524 return -ENOMEM; 525 /* 526 * because the root inode is 1, the files array must not contain an 527 * entry at index 1 528 */ 529 inode->i_ino = 1; 530 inode->i_mode = S_IFDIR | 0755; 531 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 532 inode->i_op = &simple_dir_inode_operations; 533 inode->i_fop = &simple_dir_operations; 534 set_nlink(inode, 2); 535 root = d_make_root(inode); 536 if (!root) 537 return -ENOMEM; 538 for (i = 0; !files->name || files->name[0]; i++, files++) { 539 if (!files->name) 540 continue; 541 542 /* warn if it tries to conflict with the root inode */ 543 if (unlikely(i == 1)) 544 printk(KERN_WARNING "%s: %s passed in a files array" 545 "with an index of 1!\n", __func__, 546 s->s_type->name); 547 548 dentry = d_alloc_name(root, files->name); 549 if (!dentry) 550 goto out; 551 inode = new_inode(s); 552 if (!inode) { 553 dput(dentry); 554 goto out; 555 } 556 inode->i_mode = S_IFREG | files->mode; 557 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 558 inode->i_fop = files->ops; 559 inode->i_ino = i; 560 d_add(dentry, inode); 561 } 562 s->s_root = root; 563 return 0; 564 out: 565 d_genocide(root); 566 shrink_dcache_parent(root); 567 dput(root); 568 return -ENOMEM; 569 } 570 EXPORT_SYMBOL(simple_fill_super); 571 572 static DEFINE_SPINLOCK(pin_fs_lock); 573 574 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) 575 { 576 struct vfsmount *mnt = NULL; 577 spin_lock(&pin_fs_lock); 578 if (unlikely(!*mount)) { 579 spin_unlock(&pin_fs_lock); 580 mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); 581 if (IS_ERR(mnt)) 582 return PTR_ERR(mnt); 583 spin_lock(&pin_fs_lock); 584 if (!*mount) 585 *mount = mnt; 586 } 587 mntget(*mount); 588 ++*count; 589 spin_unlock(&pin_fs_lock); 590 mntput(mnt); 591 return 0; 592 } 593 EXPORT_SYMBOL(simple_pin_fs); 594 595 void simple_release_fs(struct vfsmount **mount, int *count) 596 { 597 struct vfsmount *mnt; 598 spin_lock(&pin_fs_lock); 599 mnt = *mount; 600 if (!--*count) 601 *mount = NULL; 602 spin_unlock(&pin_fs_lock); 603 mntput(mnt); 604 } 605 EXPORT_SYMBOL(simple_release_fs); 606 607 /** 608 * simple_read_from_buffer - copy data from the buffer to user space 609 * @to: the user space buffer to read to 610 * @count: the maximum number of bytes to read 611 * @ppos: the current position in the buffer 612 * @from: the buffer to read from 613 * @available: the size of the buffer 614 * 615 * The simple_read_from_buffer() function reads up to @count bytes from the 616 * buffer @from at offset @ppos into the user space address starting at @to. 617 * 618 * On success, the number of bytes read is returned and the offset @ppos is 619 * advanced by this number, or negative value is returned on error. 620 **/ 621 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 622 const void *from, size_t available) 623 { 624 loff_t pos = *ppos; 625 size_t ret; 626 627 if (pos < 0) 628 return -EINVAL; 629 if (pos >= available || !count) 630 return 0; 631 if (count > available - pos) 632 count = available - pos; 633 ret = copy_to_user(to, from + pos, count); 634 if (ret == count) 635 return -EFAULT; 636 count -= ret; 637 *ppos = pos + count; 638 return count; 639 } 640 EXPORT_SYMBOL(simple_read_from_buffer); 641 642 /** 643 * simple_write_to_buffer - copy data from user space to the buffer 644 * @to: the buffer to write to 645 * @available: the size of the buffer 646 * @ppos: the current position in the buffer 647 * @from: the user space buffer to read from 648 * @count: the maximum number of bytes to read 649 * 650 * The simple_write_to_buffer() function reads up to @count bytes from the user 651 * space address starting at @from into the buffer @to at offset @ppos. 652 * 653 * On success, the number of bytes written is returned and the offset @ppos is 654 * advanced by this number, or negative value is returned on error. 655 **/ 656 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 657 const void __user *from, size_t count) 658 { 659 loff_t pos = *ppos; 660 size_t res; 661 662 if (pos < 0) 663 return -EINVAL; 664 if (pos >= available || !count) 665 return 0; 666 if (count > available - pos) 667 count = available - pos; 668 res = copy_from_user(to + pos, from, count); 669 if (res == count) 670 return -EFAULT; 671 count -= res; 672 *ppos = pos + count; 673 return count; 674 } 675 EXPORT_SYMBOL(simple_write_to_buffer); 676 677 /** 678 * memory_read_from_buffer - copy data from the buffer 679 * @to: the kernel space buffer to read to 680 * @count: the maximum number of bytes to read 681 * @ppos: the current position in the buffer 682 * @from: the buffer to read from 683 * @available: the size of the buffer 684 * 685 * The memory_read_from_buffer() function reads up to @count bytes from the 686 * buffer @from at offset @ppos into the kernel space address starting at @to. 687 * 688 * On success, the number of bytes read is returned and the offset @ppos is 689 * advanced by this number, or negative value is returned on error. 690 **/ 691 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 692 const void *from, size_t available) 693 { 694 loff_t pos = *ppos; 695 696 if (pos < 0) 697 return -EINVAL; 698 if (pos >= available) 699 return 0; 700 if (count > available - pos) 701 count = available - pos; 702 memcpy(to, from + pos, count); 703 *ppos = pos + count; 704 705 return count; 706 } 707 EXPORT_SYMBOL(memory_read_from_buffer); 708 709 /* 710 * Transaction based IO. 711 * The file expects a single write which triggers the transaction, and then 712 * possibly a read which collects the result - which is stored in a 713 * file-local buffer. 714 */ 715 716 void simple_transaction_set(struct file *file, size_t n) 717 { 718 struct simple_transaction_argresp *ar = file->private_data; 719 720 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); 721 722 /* 723 * The barrier ensures that ar->size will really remain zero until 724 * ar->data is ready for reading. 725 */ 726 smp_mb(); 727 ar->size = n; 728 } 729 EXPORT_SYMBOL(simple_transaction_set); 730 731 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 732 { 733 struct simple_transaction_argresp *ar; 734 static DEFINE_SPINLOCK(simple_transaction_lock); 735 736 if (size > SIMPLE_TRANSACTION_LIMIT - 1) 737 return ERR_PTR(-EFBIG); 738 739 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); 740 if (!ar) 741 return ERR_PTR(-ENOMEM); 742 743 spin_lock(&simple_transaction_lock); 744 745 /* only one write allowed per open */ 746 if (file->private_data) { 747 spin_unlock(&simple_transaction_lock); 748 free_page((unsigned long)ar); 749 return ERR_PTR(-EBUSY); 750 } 751 752 file->private_data = ar; 753 754 spin_unlock(&simple_transaction_lock); 755 756 if (copy_from_user(ar->data, buf, size)) 757 return ERR_PTR(-EFAULT); 758 759 return ar->data; 760 } 761 EXPORT_SYMBOL(simple_transaction_get); 762 763 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 764 { 765 struct simple_transaction_argresp *ar = file->private_data; 766 767 if (!ar) 768 return 0; 769 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 770 } 771 EXPORT_SYMBOL(simple_transaction_read); 772 773 int simple_transaction_release(struct inode *inode, struct file *file) 774 { 775 free_page((unsigned long)file->private_data); 776 return 0; 777 } 778 EXPORT_SYMBOL(simple_transaction_release); 779 780 /* Simple attribute files */ 781 782 struct simple_attr { 783 int (*get)(void *, u64 *); 784 int (*set)(void *, u64); 785 char get_buf[24]; /* enough to store a u64 and "\n\0" */ 786 char set_buf[24]; 787 void *data; 788 const char *fmt; /* format for read operation */ 789 struct mutex mutex; /* protects access to these buffers */ 790 }; 791 792 /* simple_attr_open is called by an actual attribute open file operation 793 * to set the attribute specific access operations. */ 794 int simple_attr_open(struct inode *inode, struct file *file, 795 int (*get)(void *, u64 *), int (*set)(void *, u64), 796 const char *fmt) 797 { 798 struct simple_attr *attr; 799 800 attr = kmalloc(sizeof(*attr), GFP_KERNEL); 801 if (!attr) 802 return -ENOMEM; 803 804 attr->get = get; 805 attr->set = set; 806 attr->data = inode->i_private; 807 attr->fmt = fmt; 808 mutex_init(&attr->mutex); 809 810 file->private_data = attr; 811 812 return nonseekable_open(inode, file); 813 } 814 EXPORT_SYMBOL_GPL(simple_attr_open); 815 816 int simple_attr_release(struct inode *inode, struct file *file) 817 { 818 kfree(file->private_data); 819 return 0; 820 } 821 EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ 822 823 /* read from the buffer that is filled with the get function */ 824 ssize_t simple_attr_read(struct file *file, char __user *buf, 825 size_t len, loff_t *ppos) 826 { 827 struct simple_attr *attr; 828 size_t size; 829 ssize_t ret; 830 831 attr = file->private_data; 832 833 if (!attr->get) 834 return -EACCES; 835 836 ret = mutex_lock_interruptible(&attr->mutex); 837 if (ret) 838 return ret; 839 840 if (*ppos) { /* continued read */ 841 size = strlen(attr->get_buf); 842 } else { /* first read */ 843 u64 val; 844 ret = attr->get(attr->data, &val); 845 if (ret) 846 goto out; 847 848 size = scnprintf(attr->get_buf, sizeof(attr->get_buf), 849 attr->fmt, (unsigned long long)val); 850 } 851 852 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); 853 out: 854 mutex_unlock(&attr->mutex); 855 return ret; 856 } 857 EXPORT_SYMBOL_GPL(simple_attr_read); 858 859 /* interpret the buffer as a number to call the set function with */ 860 ssize_t simple_attr_write(struct file *file, const char __user *buf, 861 size_t len, loff_t *ppos) 862 { 863 struct simple_attr *attr; 864 u64 val; 865 size_t size; 866 ssize_t ret; 867 868 attr = file->private_data; 869 if (!attr->set) 870 return -EACCES; 871 872 ret = mutex_lock_interruptible(&attr->mutex); 873 if (ret) 874 return ret; 875 876 ret = -EFAULT; 877 size = min(sizeof(attr->set_buf) - 1, len); 878 if (copy_from_user(attr->set_buf, buf, size)) 879 goto out; 880 881 attr->set_buf[size] = '\0'; 882 val = simple_strtoll(attr->set_buf, NULL, 0); 883 ret = attr->set(attr->data, val); 884 if (ret == 0) 885 ret = len; /* on success, claim we got the whole input */ 886 out: 887 mutex_unlock(&attr->mutex); 888 return ret; 889 } 890 EXPORT_SYMBOL_GPL(simple_attr_write); 891 892 /** 893 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 894 * @sb: filesystem to do the file handle conversion on 895 * @fid: file handle to convert 896 * @fh_len: length of the file handle in bytes 897 * @fh_type: type of file handle 898 * @get_inode: filesystem callback to retrieve inode 899 * 900 * This function decodes @fid as long as it has one of the well-known 901 * Linux filehandle types and calls @get_inode on it to retrieve the 902 * inode for the object specified in the file handle. 903 */ 904 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, 905 int fh_len, int fh_type, struct inode *(*get_inode) 906 (struct super_block *sb, u64 ino, u32 gen)) 907 { 908 struct inode *inode = NULL; 909 910 if (fh_len < 2) 911 return NULL; 912 913 switch (fh_type) { 914 case FILEID_INO32_GEN: 915 case FILEID_INO32_GEN_PARENT: 916 inode = get_inode(sb, fid->i32.ino, fid->i32.gen); 917 break; 918 } 919 920 return d_obtain_alias(inode); 921 } 922 EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 923 924 /** 925 * generic_fh_to_parent - generic helper for the fh_to_parent export operation 926 * @sb: filesystem to do the file handle conversion on 927 * @fid: file handle to convert 928 * @fh_len: length of the file handle in bytes 929 * @fh_type: type of file handle 930 * @get_inode: filesystem callback to retrieve inode 931 * 932 * This function decodes @fid as long as it has one of the well-known 933 * Linux filehandle types and calls @get_inode on it to retrieve the 934 * inode for the _parent_ object specified in the file handle if it 935 * is specified in the file handle, or NULL otherwise. 936 */ 937 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, 938 int fh_len, int fh_type, struct inode *(*get_inode) 939 (struct super_block *sb, u64 ino, u32 gen)) 940 { 941 struct inode *inode = NULL; 942 943 if (fh_len <= 2) 944 return NULL; 945 946 switch (fh_type) { 947 case FILEID_INO32_GEN_PARENT: 948 inode = get_inode(sb, fid->i32.parent_ino, 949 (fh_len > 3 ? fid->i32.parent_gen : 0)); 950 break; 951 } 952 953 return d_obtain_alias(inode); 954 } 955 EXPORT_SYMBOL_GPL(generic_fh_to_parent); 956 957 /** 958 * __generic_file_fsync - generic fsync implementation for simple filesystems 959 * 960 * @file: file to synchronize 961 * @start: start offset in bytes 962 * @end: end offset in bytes (inclusive) 963 * @datasync: only synchronize essential metadata if true 964 * 965 * This is a generic implementation of the fsync method for simple 966 * filesystems which track all non-inode metadata in the buffers list 967 * hanging off the address_space structure. 968 */ 969 int __generic_file_fsync(struct file *file, loff_t start, loff_t end, 970 int datasync) 971 { 972 struct inode *inode = file->f_mapping->host; 973 int err; 974 int ret; 975 976 err = filemap_write_and_wait_range(inode->i_mapping, start, end); 977 if (err) 978 return err; 979 980 inode_lock(inode); 981 ret = sync_mapping_buffers(inode->i_mapping); 982 if (!(inode->i_state & I_DIRTY_ALL)) 983 goto out; 984 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 985 goto out; 986 987 err = sync_inode_metadata(inode, 1); 988 if (ret == 0) 989 ret = err; 990 991 out: 992 inode_unlock(inode); 993 return ret; 994 } 995 EXPORT_SYMBOL(__generic_file_fsync); 996 997 /** 998 * generic_file_fsync - generic fsync implementation for simple filesystems 999 * with flush 1000 * @file: file to synchronize 1001 * @start: start offset in bytes 1002 * @end: end offset in bytes (inclusive) 1003 * @datasync: only synchronize essential metadata if true 1004 * 1005 */ 1006 1007 int generic_file_fsync(struct file *file, loff_t start, loff_t end, 1008 int datasync) 1009 { 1010 struct inode *inode = file->f_mapping->host; 1011 int err; 1012 1013 err = __generic_file_fsync(file, start, end, datasync); 1014 if (err) 1015 return err; 1016 return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 1017 } 1018 EXPORT_SYMBOL(generic_file_fsync); 1019 1020 /** 1021 * generic_check_addressable - Check addressability of file system 1022 * @blocksize_bits: log of file system block size 1023 * @num_blocks: number of blocks in file system 1024 * 1025 * Determine whether a file system with @num_blocks blocks (and a 1026 * block size of 2**@blocksize_bits) is addressable by the sector_t 1027 * and page cache of the system. Return 0 if so and -EFBIG otherwise. 1028 */ 1029 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 1030 { 1031 u64 last_fs_block = num_blocks - 1; 1032 u64 last_fs_page = 1033 last_fs_block >> (PAGE_SHIFT - blocksize_bits); 1034 1035 if (unlikely(num_blocks == 0)) 1036 return 0; 1037 1038 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) 1039 return -EINVAL; 1040 1041 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || 1042 (last_fs_page > (pgoff_t)(~0ULL))) { 1043 return -EFBIG; 1044 } 1045 return 0; 1046 } 1047 EXPORT_SYMBOL(generic_check_addressable); 1048 1049 /* 1050 * No-op implementation of ->fsync for in-memory filesystems. 1051 */ 1052 int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1053 { 1054 return 0; 1055 } 1056 EXPORT_SYMBOL(noop_fsync); 1057 1058 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ 1059 void kfree_link(void *p) 1060 { 1061 kfree(p); 1062 } 1063 EXPORT_SYMBOL(kfree_link); 1064 1065 /* 1066 * nop .set_page_dirty method so that people can use .page_mkwrite on 1067 * anon inodes. 1068 */ 1069 static int anon_set_page_dirty(struct page *page) 1070 { 1071 return 0; 1072 }; 1073 1074 /* 1075 * A single inode exists for all anon_inode files. Contrary to pipes, 1076 * anon_inode inodes have no associated per-instance data, so we need 1077 * only allocate one of them. 1078 */ 1079 struct inode *alloc_anon_inode(struct super_block *s) 1080 { 1081 static const struct address_space_operations anon_aops = { 1082 .set_page_dirty = anon_set_page_dirty, 1083 }; 1084 struct inode *inode = new_inode_pseudo(s); 1085 1086 if (!inode) 1087 return ERR_PTR(-ENOMEM); 1088 1089 inode->i_ino = get_next_ino(); 1090 inode->i_mapping->a_ops = &anon_aops; 1091 1092 /* 1093 * Mark the inode dirty from the very beginning, 1094 * that way it will never be moved to the dirty 1095 * list because mark_inode_dirty() will think 1096 * that it already _is_ on the dirty list. 1097 */ 1098 inode->i_state = I_DIRTY; 1099 inode->i_mode = S_IRUSR | S_IWUSR; 1100 inode->i_uid = current_fsuid(); 1101 inode->i_gid = current_fsgid(); 1102 inode->i_flags |= S_PRIVATE; 1103 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 1104 return inode; 1105 } 1106 EXPORT_SYMBOL(alloc_anon_inode); 1107 1108 /** 1109 * simple_nosetlease - generic helper for prohibiting leases 1110 * @filp: file pointer 1111 * @arg: type of lease to obtain 1112 * @flp: new lease supplied for insertion 1113 * @priv: private data for lm_setup operation 1114 * 1115 * Generic helper for filesystems that do not wish to allow leases to be set. 1116 * All arguments are ignored and it just returns -EINVAL. 1117 */ 1118 int 1119 simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, 1120 void **priv) 1121 { 1122 return -EINVAL; 1123 } 1124 EXPORT_SYMBOL(simple_nosetlease); 1125 1126 const char *simple_get_link(struct dentry *dentry, struct inode *inode, 1127 struct delayed_call *done) 1128 { 1129 return inode->i_link; 1130 } 1131 EXPORT_SYMBOL(simple_get_link); 1132 1133 const struct inode_operations simple_symlink_inode_operations = { 1134 .get_link = simple_get_link, 1135 }; 1136 EXPORT_SYMBOL(simple_symlink_inode_operations); 1137 1138 /* 1139 * Operations for a permanently empty directory. 1140 */ 1141 static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1142 { 1143 return ERR_PTR(-ENOENT); 1144 } 1145 1146 static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, 1147 struct kstat *stat) 1148 { 1149 struct inode *inode = d_inode(dentry); 1150 generic_fillattr(inode, stat); 1151 return 0; 1152 } 1153 1154 static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) 1155 { 1156 return -EPERM; 1157 } 1158 1159 static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) 1160 { 1161 return -EOPNOTSUPP; 1162 } 1163 1164 static const struct inode_operations empty_dir_inode_operations = { 1165 .lookup = empty_dir_lookup, 1166 .permission = generic_permission, 1167 .setattr = empty_dir_setattr, 1168 .getattr = empty_dir_getattr, 1169 .listxattr = empty_dir_listxattr, 1170 }; 1171 1172 static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) 1173 { 1174 /* An empty directory has two entries . and .. at offsets 0 and 1 */ 1175 return generic_file_llseek_size(file, offset, whence, 2, 2); 1176 } 1177 1178 static int empty_dir_readdir(struct file *file, struct dir_context *ctx) 1179 { 1180 dir_emit_dots(file, ctx); 1181 return 0; 1182 } 1183 1184 static const struct file_operations empty_dir_operations = { 1185 .llseek = empty_dir_llseek, 1186 .read = generic_read_dir, 1187 .iterate_shared = empty_dir_readdir, 1188 .fsync = noop_fsync, 1189 }; 1190 1191 1192 void make_empty_dir_inode(struct inode *inode) 1193 { 1194 set_nlink(inode, 2); 1195 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1196 inode->i_uid = GLOBAL_ROOT_UID; 1197 inode->i_gid = GLOBAL_ROOT_GID; 1198 inode->i_rdev = 0; 1199 inode->i_size = 0; 1200 inode->i_blkbits = PAGE_SHIFT; 1201 inode->i_blocks = 0; 1202 1203 inode->i_op = &empty_dir_inode_operations; 1204 inode->i_opflags &= ~IOP_XATTR; 1205 inode->i_fop = &empty_dir_operations; 1206 } 1207 1208 bool is_empty_dir_inode(struct inode *inode) 1209 { 1210 return (inode->i_fop == &empty_dir_operations) && 1211 (inode->i_op == &empty_dir_inode_operations); 1212 } 1213