11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 31da177e4SLinus Torvalds */ 41da177e4SLinus Torvalds 51da177e4SLinus Torvalds #include <linux/time.h> 61da177e4SLinus Torvalds #include <linux/reiserfs_fs.h> 71da177e4SLinus Torvalds #include <linux/reiserfs_acl.h> 81da177e4SLinus Torvalds #include <linux/reiserfs_xattr.h> 91da177e4SLinus Torvalds #include <asm/uaccess.h> 101da177e4SLinus Torvalds #include <linux/pagemap.h> 111da177e4SLinus Torvalds #include <linux/swap.h> 121da177e4SLinus Torvalds #include <linux/writeback.h> 131da177e4SLinus Torvalds #include <linux/blkdev.h> 141da177e4SLinus Torvalds #include <linux/buffer_head.h> 151da177e4SLinus Torvalds #include <linux/quotaops.h> 161da177e4SLinus Torvalds 171da177e4SLinus Torvalds /* 181da177e4SLinus Torvalds ** We pack the tails of files on file close, not at the time they are written. 191da177e4SLinus Torvalds ** This implies an unnecessary copy of the tail and an unnecessary indirect item 201da177e4SLinus Torvalds ** insertion/balancing, for files that are written in one write. 211da177e4SLinus Torvalds ** It avoids unnecessary tail packings (balances) for files that are written in 221da177e4SLinus Torvalds ** multiple writes and are small enough to have tails. 231da177e4SLinus Torvalds ** 241da177e4SLinus Torvalds ** file_release is called by the VFS layer when the file is closed. If 251da177e4SLinus Torvalds ** this is the last open file descriptor, and the file 261da177e4SLinus Torvalds ** small enough to have a tail, and the tail is currently in an 271da177e4SLinus Torvalds ** unformatted node, the tail is converted back into a direct item. 281da177e4SLinus Torvalds ** 291da177e4SLinus Torvalds ** We use reiserfs_truncate_file to pack the tail, since it already has 301da177e4SLinus Torvalds ** all the conditions coded. 311da177e4SLinus Torvalds */ 321da177e4SLinus Torvalds static int reiserfs_file_release(struct inode *inode, struct file *filp) 331da177e4SLinus Torvalds { 341da177e4SLinus Torvalds 351da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 361da177e4SLinus Torvalds int err; 371da177e4SLinus Torvalds int jbegin_failure = 0; 381da177e4SLinus Torvalds 3914a61442SEric Sesterhenn BUG_ON(!S_ISREG(inode->i_mode)); 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds /* fast out for when nothing needs to be done */ 421da177e4SLinus Torvalds if ((atomic_read(&inode->i_count) > 1 || 431da177e4SLinus Torvalds !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) || 441da177e4SLinus Torvalds !tail_has_to_be_packed(inode)) && 451da177e4SLinus Torvalds REISERFS_I(inode)->i_prealloc_count <= 0) { 461da177e4SLinus Torvalds return 0; 471da177e4SLinus Torvalds } 481da177e4SLinus Torvalds 491b1dcc1bSJes Sorensen mutex_lock(&inode->i_mutex); 50de14569fSVladimir Saveliev 51de14569fSVladimir Saveliev mutex_lock(&(REISERFS_I(inode)->i_mmap)); 52de14569fSVladimir Saveliev if (REISERFS_I(inode)->i_flags & i_ever_mapped) 53de14569fSVladimir Saveliev REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask; 54de14569fSVladimir Saveliev 55b5f3953cSChris Mason reiserfs_write_lock(inode->i_sb); 561da177e4SLinus Torvalds /* freeing preallocation only involves relogging blocks that 571da177e4SLinus Torvalds * are already in the current transaction. preallocation gets 581da177e4SLinus Torvalds * freed at the end of each transaction, so it is impossible for 591da177e4SLinus Torvalds * us to log any additional blocks (including quota blocks) 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds err = journal_begin(&th, inode->i_sb, 1); 621da177e4SLinus Torvalds if (err) { 631da177e4SLinus Torvalds /* uh oh, we can't allow the inode to go away while there 641da177e4SLinus Torvalds * is still preallocation blocks pending. Try to join the 651da177e4SLinus Torvalds * aborted transaction 661da177e4SLinus Torvalds */ 671da177e4SLinus Torvalds jbegin_failure = err; 681da177e4SLinus Torvalds err = journal_join_abort(&th, inode->i_sb, 1); 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds if (err) { 711da177e4SLinus Torvalds /* hmpf, our choices here aren't good. We can pin the inode 721da177e4SLinus Torvalds * which will disallow unmount from every happening, we can 731da177e4SLinus Torvalds * do nothing, which will corrupt random memory on unmount, 741da177e4SLinus Torvalds * or we can forcibly remove the file from the preallocation 751da177e4SLinus Torvalds * list, which will leak blocks on disk. Lets pin the inode 761da177e4SLinus Torvalds * and let the admin know what is going on. 771da177e4SLinus Torvalds */ 781da177e4SLinus Torvalds igrab(inode); 7945b03d5eSJeff Mahoney reiserfs_warning(inode->i_sb, "clm-9001", 80bd4c625cSLinus Torvalds "pinning inode %lu because the " 81533221fbSAlexey Dobriyan "preallocation can't be freed", 82533221fbSAlexey Dobriyan inode->i_ino); 831da177e4SLinus Torvalds goto out; 841da177e4SLinus Torvalds } 851da177e4SLinus Torvalds } 861da177e4SLinus Torvalds reiserfs_update_inode_transaction(inode); 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE 891da177e4SLinus Torvalds reiserfs_discard_prealloc(&th, inode); 901da177e4SLinus Torvalds #endif 911da177e4SLinus Torvalds err = journal_end(&th, inode->i_sb, 1); 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds /* copy back the error code from journal_begin */ 941da177e4SLinus Torvalds if (!err) 951da177e4SLinus Torvalds err = jbegin_failure; 961da177e4SLinus Torvalds 971da177e4SLinus Torvalds if (!err && atomic_read(&inode->i_count) <= 1 && 981da177e4SLinus Torvalds (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 991da177e4SLinus Torvalds tail_has_to_be_packed(inode)) { 1001da177e4SLinus Torvalds /* if regular file is released by last holder and it has been 1011da177e4SLinus Torvalds appended (we append by unformatted node only) or its direct 1021da177e4SLinus Torvalds item(s) had to be converted, then it may have to be 1031da177e4SLinus Torvalds indirect2direct converted */ 1041da177e4SLinus Torvalds err = reiserfs_truncate_file(inode, 0); 1051da177e4SLinus Torvalds } 1061da177e4SLinus Torvalds out: 107de14569fSVladimir Saveliev mutex_unlock(&(REISERFS_I(inode)->i_mmap)); 1081b1dcc1bSJes Sorensen mutex_unlock(&inode->i_mutex); 1091da177e4SLinus Torvalds reiserfs_write_unlock(inode->i_sb); 1101da177e4SLinus Torvalds return err; 1111da177e4SLinus Torvalds } 1121da177e4SLinus Torvalds 113de14569fSVladimir Saveliev static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) 114de14569fSVladimir Saveliev { 115de14569fSVladimir Saveliev struct inode *inode; 116de14569fSVladimir Saveliev 117de14569fSVladimir Saveliev inode = file->f_path.dentry->d_inode; 118de14569fSVladimir Saveliev mutex_lock(&(REISERFS_I(inode)->i_mmap)); 119de14569fSVladimir Saveliev REISERFS_I(inode)->i_flags |= i_ever_mapped; 120de14569fSVladimir Saveliev mutex_unlock(&(REISERFS_I(inode)->i_mmap)); 121de14569fSVladimir Saveliev 122de14569fSVladimir Saveliev return generic_file_mmap(file, vma); 123de14569fSVladimir Saveliev } 124de14569fSVladimir Saveliev 125bd4c625cSLinus Torvalds static void reiserfs_vfs_truncate_file(struct inode *inode) 126bd4c625cSLinus Torvalds { 1271da177e4SLinus Torvalds reiserfs_truncate_file(inode, 1); 1281da177e4SLinus Torvalds } 1291da177e4SLinus Torvalds 1301da177e4SLinus Torvalds /* Sync a reiserfs file. */ 1311da177e4SLinus Torvalds 1321da177e4SLinus Torvalds /* 1331da177e4SLinus Torvalds * FIXME: sync_mapping_buffers() never has anything to sync. Can 1341da177e4SLinus Torvalds * be removed... 1351da177e4SLinus Torvalds */ 1361da177e4SLinus Torvalds 137d68caa95SJeff Mahoney static int reiserfs_sync_file(struct file *filp, 138d68caa95SJeff Mahoney struct dentry *dentry, int datasync) 139bd4c625cSLinus Torvalds { 140d68caa95SJeff Mahoney struct inode *inode = dentry->d_inode; 141ee93961bSJeff Mahoney int err; 1421da177e4SLinus Torvalds int barrier_done; 1431da177e4SLinus Torvalds 144995c762eSJeff Mahoney BUG_ON(!S_ISREG(inode->i_mode)); 145ee93961bSJeff Mahoney err = sync_mapping_buffers(inode->i_mapping); 146995c762eSJeff Mahoney reiserfs_write_lock(inode->i_sb); 147995c762eSJeff Mahoney barrier_done = reiserfs_commit_for_inode(inode); 148995c762eSJeff Mahoney reiserfs_write_unlock(inode->i_sb); 149995c762eSJeff Mahoney if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) 150995c762eSJeff Mahoney blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 1511da177e4SLinus Torvalds if (barrier_done < 0) 1521da177e4SLinus Torvalds return barrier_done; 153ee93961bSJeff Mahoney return (err < 0) ? -EIO : 0; 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds /* taken fs/buffer.c:__block_commit_write */ 1571da177e4SLinus Torvalds int reiserfs_commit_page(struct inode *inode, struct page *page, 1581da177e4SLinus Torvalds unsigned from, unsigned to) 1591da177e4SLinus Torvalds { 1601da177e4SLinus Torvalds unsigned block_start, block_end; 1611da177e4SLinus Torvalds int partial = 0; 1621da177e4SLinus Torvalds unsigned blocksize; 1631da177e4SLinus Torvalds struct buffer_head *bh, *head; 1641da177e4SLinus Torvalds unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT; 1651da177e4SLinus Torvalds int new; 1661da177e4SLinus Torvalds int logit = reiserfs_file_data_log(inode); 1671da177e4SLinus Torvalds struct super_block *s = inode->i_sb; 1681da177e4SLinus Torvalds int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize; 1691da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 1701da177e4SLinus Torvalds int ret = 0; 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds th.t_trans_id = 0; 1731da177e4SLinus Torvalds blocksize = 1 << inode->i_blkbits; 1741da177e4SLinus Torvalds 1751da177e4SLinus Torvalds if (logit) { 1761da177e4SLinus Torvalds reiserfs_write_lock(s); 1771da177e4SLinus Torvalds ret = journal_begin(&th, s, bh_per_page + 1); 1781da177e4SLinus Torvalds if (ret) 1791da177e4SLinus Torvalds goto drop_write_lock; 1801da177e4SLinus Torvalds reiserfs_update_inode_transaction(inode); 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds for (bh = head = page_buffers(page), block_start = 0; 1831da177e4SLinus Torvalds bh != head || !block_start; 184bd4c625cSLinus Torvalds block_start = block_end, bh = bh->b_this_page) { 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds new = buffer_new(bh); 1871da177e4SLinus Torvalds clear_buffer_new(bh); 1881da177e4SLinus Torvalds block_end = block_start + blocksize; 1891da177e4SLinus Torvalds if (block_end <= from || block_start >= to) { 1901da177e4SLinus Torvalds if (!buffer_uptodate(bh)) 1911da177e4SLinus Torvalds partial = 1; 1921da177e4SLinus Torvalds } else { 1931da177e4SLinus Torvalds set_buffer_uptodate(bh); 1941da177e4SLinus Torvalds if (logit) { 1951da177e4SLinus Torvalds reiserfs_prepare_for_journal(s, bh, 1); 1961da177e4SLinus Torvalds journal_mark_dirty(&th, s, bh); 1971da177e4SLinus Torvalds } else if (!buffer_dirty(bh)) { 1981da177e4SLinus Torvalds mark_buffer_dirty(bh); 1991da177e4SLinus Torvalds /* do data=ordered on any page past the end 2001da177e4SLinus Torvalds * of file and any buffer marked BH_New. 2011da177e4SLinus Torvalds */ 2021da177e4SLinus Torvalds if (reiserfs_data_ordered(inode->i_sb) && 2031da177e4SLinus Torvalds (new || page->index >= i_size_index)) { 2041da177e4SLinus Torvalds reiserfs_add_ordered_list(inode, bh); 2051da177e4SLinus Torvalds } 2061da177e4SLinus Torvalds } 2071da177e4SLinus Torvalds } 2081da177e4SLinus Torvalds } 2091da177e4SLinus Torvalds if (logit) { 2101da177e4SLinus Torvalds ret = journal_end(&th, s, bh_per_page + 1); 2111da177e4SLinus Torvalds drop_write_lock: 2121da177e4SLinus Torvalds reiserfs_write_unlock(s); 2131da177e4SLinus Torvalds } 2141da177e4SLinus Torvalds /* 2151da177e4SLinus Torvalds * If this is a partial write which happened to make all buffers 2161da177e4SLinus Torvalds * uptodate then we can optimize away a bogus readpage() for 2171da177e4SLinus Torvalds * the next read(). Here we 'discover' whether the page went 2181da177e4SLinus Torvalds * uptodate as a result of this (potentially partial) write. 2191da177e4SLinus Torvalds */ 2201da177e4SLinus Torvalds if (!partial) 2211da177e4SLinus Torvalds SetPageUptodate(page); 2221da177e4SLinus Torvalds return ret; 2231da177e4SLinus Torvalds } 2241da177e4SLinus Torvalds 2251da177e4SLinus Torvalds /* Write @count bytes at position @ppos in a file indicated by @file 2261da177e4SLinus Torvalds from the buffer @buf. 2271da177e4SLinus Torvalds 2281da177e4SLinus Torvalds generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want 2291da177e4SLinus Torvalds something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was 2301da177e4SLinus Torvalds written for (ext2/3). This is for several reasons: 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds * It has no understanding of any filesystem specific optimizations. 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds * It enters the filesystem repeatedly for each page that is written. 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key 2371da177e4SLinus Torvalds * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time 2381da177e4SLinus Torvalds * to reiserfs which allows for fewer tree traversals. 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks. 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds * Asking the block allocation code for blocks one at a time is slightly less efficient. 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to 2451da177e4SLinus Torvalds use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make 2461da177e4SLinus Torvalds things right finally. 2471da177e4SLinus Torvalds 2481da177e4SLinus Torvalds Future Features: providing search_by_key with hints. 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds */ 2511da177e4SLinus Torvalds static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */ 2521da177e4SLinus Torvalds const char __user * buf, /* pointer to user supplied data 2531da177e4SLinus Torvalds (in userspace) */ 2541da177e4SLinus Torvalds size_t count, /* amount of bytes to write */ 2551da177e4SLinus Torvalds loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to 256bd4c625cSLinus Torvalds * new current position before returning. */ 257bd4c625cSLinus Torvalds ) 2581da177e4SLinus Torvalds { 2591fc5adbdSJosef Sipek struct inode *inode = file->f_path.dentry->d_inode; // Inode of the file that we are writing to. 2601da177e4SLinus Torvalds /* To simplify coding at this time, we store 2611da177e4SLinus Torvalds locked pages in array for now */ 2621da177e4SLinus Torvalds struct reiserfs_transaction_handle th; 2631da177e4SLinus Torvalds th.t_trans_id = 0; 2641da177e4SLinus Torvalds 265fa385befSJeff Mahoney /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items 266fa385befSJeff Mahoney * lying around (most of the disk, in fact). Despite the filesystem 267fa385befSJeff Mahoney * now being a v3.6 format, the old items still can't support large 268fa385befSJeff Mahoney * file sizes. Catch this case here, as the rest of the VFS layer is 269fa385befSJeff Mahoney * oblivious to the different limitations between old and new items. 270fa385befSJeff Mahoney * reiserfs_setattr catches this for truncates. This chunk is lifted 271fa385befSJeff Mahoney * from generic_write_checks. */ 272fa385befSJeff Mahoney if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 && 273fa385befSJeff Mahoney *ppos + count > MAX_NON_LFS) { 274fa385befSJeff Mahoney if (*ppos >= MAX_NON_LFS) { 275fa385befSJeff Mahoney return -EFBIG; 276fa385befSJeff Mahoney } 277fa385befSJeff Mahoney if (count > MAX_NON_LFS - (unsigned long)*ppos) 278fa385befSJeff Mahoney count = MAX_NON_LFS - (unsigned long)*ppos; 279fa385befSJeff Mahoney } 280fa385befSJeff Mahoney 281c5574768SVladimir V. Saveliev return do_sync_write(file, buf, count, ppos); 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2844b6f5d20SArjan van de Ven const struct file_operations reiserfs_file_operations = { 285027445c3SBadari Pulavarty .read = do_sync_read, 2861da177e4SLinus Torvalds .write = reiserfs_file_write, 287*205cb37bSFrederic Weisbecker .unlocked_ioctl = reiserfs_ioctl, 28852b499c4SDavid Howells #ifdef CONFIG_COMPAT 28952b499c4SDavid Howells .compat_ioctl = reiserfs_compat_ioctl, 29052b499c4SDavid Howells #endif 291de14569fSVladimir Saveliev .mmap = reiserfs_file_mmap, 2925a2618e6SJeff Mahoney .open = generic_file_open, 2931da177e4SLinus Torvalds .release = reiserfs_file_release, 2941da177e4SLinus Torvalds .fsync = reiserfs_sync_file, 2951da177e4SLinus Torvalds .aio_read = generic_file_aio_read, 2969637f28fSAlexey Dobriyan .aio_write = generic_file_aio_write, 2975274f052SJens Axboe .splice_read = generic_file_splice_read, 2985274f052SJens Axboe .splice_write = generic_file_splice_write, 29991efc167SChristoph Hellwig .llseek = generic_file_llseek, 3001da177e4SLinus Torvalds }; 3011da177e4SLinus Torvalds 302c5ef1c42SArjan van de Ven const struct inode_operations reiserfs_file_inode_operations = { 3031da177e4SLinus Torvalds .truncate = reiserfs_vfs_truncate_file, 3041da177e4SLinus Torvalds .setattr = reiserfs_setattr, 3051da177e4SLinus Torvalds .setxattr = reiserfs_setxattr, 3061da177e4SLinus Torvalds .getxattr = reiserfs_getxattr, 3071da177e4SLinus Torvalds .listxattr = reiserfs_listxattr, 3081da177e4SLinus Torvalds .removexattr = reiserfs_removexattr, 3091da177e4SLinus Torvalds .permission = reiserfs_permission, 3101da177e4SLinus Torvalds }; 311