xref: /openbmc/linux/fs/reiserfs/file.c (revision c45ac8887e778c4fa2b572c51a94a681a0955d4d)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
31da177e4SLinus Torvalds  */
41da177e4SLinus Torvalds 
51da177e4SLinus Torvalds #include <linux/time.h>
61da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
71da177e4SLinus Torvalds #include <linux/reiserfs_acl.h>
8*c45ac888SAl Viro #include "xattr.h"
91da177e4SLinus Torvalds #include <asm/uaccess.h>
101da177e4SLinus Torvalds #include <linux/pagemap.h>
111da177e4SLinus Torvalds #include <linux/swap.h>
121da177e4SLinus Torvalds #include <linux/writeback.h>
131da177e4SLinus Torvalds #include <linux/blkdev.h>
141da177e4SLinus Torvalds #include <linux/buffer_head.h>
151da177e4SLinus Torvalds #include <linux/quotaops.h>
161da177e4SLinus Torvalds 
171da177e4SLinus Torvalds /*
181da177e4SLinus Torvalds ** We pack the tails of files on file close, not at the time they are written.
191da177e4SLinus Torvalds ** This implies an unnecessary copy of the tail and an unnecessary indirect item
201da177e4SLinus Torvalds ** insertion/balancing, for files that are written in one write.
211da177e4SLinus Torvalds ** It avoids unnecessary tail packings (balances) for files that are written in
221da177e4SLinus Torvalds ** multiple writes and are small enough to have tails.
231da177e4SLinus Torvalds **
241da177e4SLinus Torvalds ** file_release is called by the VFS layer when the file is closed.  If
251da177e4SLinus Torvalds ** this is the last open file descriptor, and the file
261da177e4SLinus Torvalds ** small enough to have a tail, and the tail is currently in an
271da177e4SLinus Torvalds ** unformatted node, the tail is converted back into a direct item.
281da177e4SLinus Torvalds **
291da177e4SLinus Torvalds ** We use reiserfs_truncate_file to pack the tail, since it already has
301da177e4SLinus Torvalds ** all the conditions coded.
311da177e4SLinus Torvalds */
321da177e4SLinus Torvalds static int reiserfs_file_release(struct inode *inode, struct file *filp)
331da177e4SLinus Torvalds {
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
361da177e4SLinus Torvalds 	int err;
371da177e4SLinus Torvalds 	int jbegin_failure = 0;
381da177e4SLinus Torvalds 
3914a61442SEric Sesterhenn 	BUG_ON(!S_ISREG(inode->i_mode));
401da177e4SLinus Torvalds 
410e4f6a79SAl Viro         if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
420e4f6a79SAl Viro 		return 0;
430e4f6a79SAl Viro 
440e4f6a79SAl Viro 	mutex_lock(&(REISERFS_I(inode)->tailpack));
450e4f6a79SAl Viro 
460e4f6a79SAl Viro         if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
470e4f6a79SAl Viro 		mutex_unlock(&(REISERFS_I(inode)->tailpack));
481da177e4SLinus Torvalds 		return 0;
491da177e4SLinus Torvalds 	}
501da177e4SLinus Torvalds 
510e4f6a79SAl Viro 	/* fast out for when nothing needs to be done */
520e4f6a79SAl Viro 	if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
530e4f6a79SAl Viro 	     !tail_has_to_be_packed(inode)) &&
540e4f6a79SAl Viro 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
550e4f6a79SAl Viro 		mutex_unlock(&(REISERFS_I(inode)->tailpack));
560e4f6a79SAl Viro 		return 0;
570e4f6a79SAl Viro 	}
58de14569fSVladimir Saveliev 
59b5f3953cSChris Mason 	reiserfs_write_lock(inode->i_sb);
601da177e4SLinus Torvalds 	/* freeing preallocation only involves relogging blocks that
611da177e4SLinus Torvalds 	 * are already in the current transaction.  preallocation gets
621da177e4SLinus Torvalds 	 * freed at the end of each transaction, so it is impossible for
631da177e4SLinus Torvalds 	 * us to log any additional blocks (including quota blocks)
641da177e4SLinus Torvalds 	 */
651da177e4SLinus Torvalds 	err = journal_begin(&th, inode->i_sb, 1);
661da177e4SLinus Torvalds 	if (err) {
671da177e4SLinus Torvalds 		/* uh oh, we can't allow the inode to go away while there
681da177e4SLinus Torvalds 		 * is still preallocation blocks pending.  Try to join the
691da177e4SLinus Torvalds 		 * aborted transaction
701da177e4SLinus Torvalds 		 */
711da177e4SLinus Torvalds 		jbegin_failure = err;
721da177e4SLinus Torvalds 		err = journal_join_abort(&th, inode->i_sb, 1);
731da177e4SLinus Torvalds 
741da177e4SLinus Torvalds 		if (err) {
751da177e4SLinus Torvalds 			/* hmpf, our choices here aren't good.  We can pin the inode
761da177e4SLinus Torvalds 			 * which will disallow unmount from every happening, we can
771da177e4SLinus Torvalds 			 * do nothing, which will corrupt random memory on unmount,
781da177e4SLinus Torvalds 			 * or we can forcibly remove the file from the preallocation
791da177e4SLinus Torvalds 			 * list, which will leak blocks on disk.  Lets pin the inode
801da177e4SLinus Torvalds 			 * and let the admin know what is going on.
811da177e4SLinus Torvalds 			 */
821da177e4SLinus Torvalds 			igrab(inode);
8345b03d5eSJeff Mahoney 			reiserfs_warning(inode->i_sb, "clm-9001",
84bd4c625cSLinus Torvalds 					 "pinning inode %lu because the "
85533221fbSAlexey Dobriyan 					 "preallocation can't be freed",
86533221fbSAlexey Dobriyan 					 inode->i_ino);
871da177e4SLinus Torvalds 			goto out;
881da177e4SLinus Torvalds 		}
891da177e4SLinus Torvalds 	}
901da177e4SLinus Torvalds 	reiserfs_update_inode_transaction(inode);
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
931da177e4SLinus Torvalds 	reiserfs_discard_prealloc(&th, inode);
941da177e4SLinus Torvalds #endif
951da177e4SLinus Torvalds 	err = journal_end(&th, inode->i_sb, 1);
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds 	/* copy back the error code from journal_begin */
981da177e4SLinus Torvalds 	if (!err)
991da177e4SLinus Torvalds 		err = jbegin_failure;
1001da177e4SLinus Torvalds 
1010e4f6a79SAl Viro 	if (!err &&
1021da177e4SLinus Torvalds 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
1031da177e4SLinus Torvalds 	    tail_has_to_be_packed(inode)) {
1040e4f6a79SAl Viro 
1051da177e4SLinus Torvalds 		/* if regular file is released by last holder and it has been
1061da177e4SLinus Torvalds 		   appended (we append by unformatted node only) or its direct
1071da177e4SLinus Torvalds 		   item(s) had to be converted, then it may have to be
1081da177e4SLinus Torvalds 		   indirect2direct converted */
1091da177e4SLinus Torvalds 		err = reiserfs_truncate_file(inode, 0);
1101da177e4SLinus Torvalds 	}
1111da177e4SLinus Torvalds       out:
1121da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
1130e4f6a79SAl Viro 	mutex_unlock(&(REISERFS_I(inode)->tailpack));
1141da177e4SLinus Torvalds 	return err;
1151da177e4SLinus Torvalds }
1161da177e4SLinus Torvalds 
1170e4f6a79SAl Viro static int reiserfs_file_open(struct inode *inode, struct file *file)
118de14569fSVladimir Saveliev {
1190e4f6a79SAl Viro 	int err = dquot_file_open(inode, file);
1200e4f6a79SAl Viro         if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
1210e4f6a79SAl Viro 		/* somebody might be tailpacking on final close; wait for it */
1220e4f6a79SAl Viro 		mutex_lock(&(REISERFS_I(inode)->tailpack));
1230e4f6a79SAl Viro 		atomic_inc(&REISERFS_I(inode)->openers);
1240e4f6a79SAl Viro 		mutex_unlock(&(REISERFS_I(inode)->tailpack));
1250e4f6a79SAl Viro 	}
1260e4f6a79SAl Viro 	return err;
127de14569fSVladimir Saveliev }
128de14569fSVladimir Saveliev 
129bd4c625cSLinus Torvalds static void reiserfs_vfs_truncate_file(struct inode *inode)
130bd4c625cSLinus Torvalds {
1310e4f6a79SAl Viro 	mutex_lock(&(REISERFS_I(inode)->tailpack));
1321da177e4SLinus Torvalds 	reiserfs_truncate_file(inode, 1);
1330e4f6a79SAl Viro 	mutex_unlock(&(REISERFS_I(inode)->tailpack));
1341da177e4SLinus Torvalds }
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds /* Sync a reiserfs file. */
1371da177e4SLinus Torvalds 
1381da177e4SLinus Torvalds /*
1391da177e4SLinus Torvalds  * FIXME: sync_mapping_buffers() never has anything to sync.  Can
1401da177e4SLinus Torvalds  * be removed...
1411da177e4SLinus Torvalds  */
1421da177e4SLinus Torvalds 
14302c24a82SJosef Bacik static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
14402c24a82SJosef Bacik 			      int datasync)
145bd4c625cSLinus Torvalds {
1467ea80859SChristoph Hellwig 	struct inode *inode = filp->f_mapping->host;
147ee93961bSJeff Mahoney 	int err;
1481da177e4SLinus Torvalds 	int barrier_done;
1491da177e4SLinus Torvalds 
15002c24a82SJosef Bacik 	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
15102c24a82SJosef Bacik 	if (err)
15202c24a82SJosef Bacik 		return err;
15302c24a82SJosef Bacik 
15402c24a82SJosef Bacik 	mutex_lock(&inode->i_mutex);
155995c762eSJeff Mahoney 	BUG_ON(!S_ISREG(inode->i_mode));
156ee93961bSJeff Mahoney 	err = sync_mapping_buffers(inode->i_mapping);
157995c762eSJeff Mahoney 	reiserfs_write_lock(inode->i_sb);
158995c762eSJeff Mahoney 	barrier_done = reiserfs_commit_for_inode(inode);
159995c762eSJeff Mahoney 	reiserfs_write_unlock(inode->i_sb);
160995c762eSJeff Mahoney 	if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
161dd3932edSChristoph Hellwig 		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
16202c24a82SJosef Bacik 	mutex_unlock(&inode->i_mutex);
1631da177e4SLinus Torvalds 	if (barrier_done < 0)
1641da177e4SLinus Torvalds 		return barrier_done;
165ee93961bSJeff Mahoney 	return (err < 0) ? -EIO : 0;
1661da177e4SLinus Torvalds }
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds /* taken fs/buffer.c:__block_commit_write */
1691da177e4SLinus Torvalds int reiserfs_commit_page(struct inode *inode, struct page *page,
1701da177e4SLinus Torvalds 			 unsigned from, unsigned to)
1711da177e4SLinus Torvalds {
1721da177e4SLinus Torvalds 	unsigned block_start, block_end;
1731da177e4SLinus Torvalds 	int partial = 0;
1741da177e4SLinus Torvalds 	unsigned blocksize;
1751da177e4SLinus Torvalds 	struct buffer_head *bh, *head;
1761da177e4SLinus Torvalds 	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
1771da177e4SLinus Torvalds 	int new;
1781da177e4SLinus Torvalds 	int logit = reiserfs_file_data_log(inode);
1791da177e4SLinus Torvalds 	struct super_block *s = inode->i_sb;
1801da177e4SLinus Torvalds 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
1811da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
1821da177e4SLinus Torvalds 	int ret = 0;
1831da177e4SLinus Torvalds 
1841da177e4SLinus Torvalds 	th.t_trans_id = 0;
1851da177e4SLinus Torvalds 	blocksize = 1 << inode->i_blkbits;
1861da177e4SLinus Torvalds 
1871da177e4SLinus Torvalds 	if (logit) {
1881da177e4SLinus Torvalds 		reiserfs_write_lock(s);
1891da177e4SLinus Torvalds 		ret = journal_begin(&th, s, bh_per_page + 1);
1901da177e4SLinus Torvalds 		if (ret)
1911da177e4SLinus Torvalds 			goto drop_write_lock;
1921da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
1931da177e4SLinus Torvalds 	}
1941da177e4SLinus Torvalds 	for (bh = head = page_buffers(page), block_start = 0;
1951da177e4SLinus Torvalds 	     bh != head || !block_start;
196bd4c625cSLinus Torvalds 	     block_start = block_end, bh = bh->b_this_page) {
1971da177e4SLinus Torvalds 
1981da177e4SLinus Torvalds 		new = buffer_new(bh);
1991da177e4SLinus Torvalds 		clear_buffer_new(bh);
2001da177e4SLinus Torvalds 		block_end = block_start + blocksize;
2011da177e4SLinus Torvalds 		if (block_end <= from || block_start >= to) {
2021da177e4SLinus Torvalds 			if (!buffer_uptodate(bh))
2031da177e4SLinus Torvalds 				partial = 1;
2041da177e4SLinus Torvalds 		} else {
2051da177e4SLinus Torvalds 			set_buffer_uptodate(bh);
2061da177e4SLinus Torvalds 			if (logit) {
2071da177e4SLinus Torvalds 				reiserfs_prepare_for_journal(s, bh, 1);
2081da177e4SLinus Torvalds 				journal_mark_dirty(&th, s, bh);
2091da177e4SLinus Torvalds 			} else if (!buffer_dirty(bh)) {
2101da177e4SLinus Torvalds 				mark_buffer_dirty(bh);
2111da177e4SLinus Torvalds 				/* do data=ordered on any page past the end
2121da177e4SLinus Torvalds 				 * of file and any buffer marked BH_New.
2131da177e4SLinus Torvalds 				 */
2141da177e4SLinus Torvalds 				if (reiserfs_data_ordered(inode->i_sb) &&
2151da177e4SLinus Torvalds 				    (new || page->index >= i_size_index)) {
2161da177e4SLinus Torvalds 					reiserfs_add_ordered_list(inode, bh);
2171da177e4SLinus Torvalds 				}
2181da177e4SLinus Torvalds 			}
2191da177e4SLinus Torvalds 		}
2201da177e4SLinus Torvalds 	}
2211da177e4SLinus Torvalds 	if (logit) {
2221da177e4SLinus Torvalds 		ret = journal_end(&th, s, bh_per_page + 1);
2231da177e4SLinus Torvalds 	      drop_write_lock:
2241da177e4SLinus Torvalds 		reiserfs_write_unlock(s);
2251da177e4SLinus Torvalds 	}
2261da177e4SLinus Torvalds 	/*
2271da177e4SLinus Torvalds 	 * If this is a partial write which happened to make all buffers
2281da177e4SLinus Torvalds 	 * uptodate then we can optimize away a bogus readpage() for
2291da177e4SLinus Torvalds 	 * the next read(). Here we 'discover' whether the page went
2301da177e4SLinus Torvalds 	 * uptodate as a result of this (potentially partial) write.
2311da177e4SLinus Torvalds 	 */
2321da177e4SLinus Torvalds 	if (!partial)
2331da177e4SLinus Torvalds 		SetPageUptodate(page);
2341da177e4SLinus Torvalds 	return ret;
2351da177e4SLinus Torvalds }
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds /* Write @count bytes at position @ppos in a file indicated by @file
2381da177e4SLinus Torvalds    from the buffer @buf.
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds    generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
2411da177e4SLinus Torvalds    something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
2421da177e4SLinus Torvalds    written for (ext2/3).  This is for several reasons:
2431da177e4SLinus Torvalds 
2441da177e4SLinus Torvalds    * It has no understanding of any filesystem specific optimizations.
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds    * It enters the filesystem repeatedly for each page that is written.
2471da177e4SLinus Torvalds 
2481da177e4SLinus Torvalds    * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
2491da177e4SLinus Torvalds    * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
2501da177e4SLinus Torvalds    * to reiserfs which allows for fewer tree traversals.
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds    * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds    * Asking the block allocation code for blocks one at a time is slightly less efficient.
2551da177e4SLinus Torvalds 
2561da177e4SLinus Torvalds    All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
2571da177e4SLinus Torvalds    use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make
2581da177e4SLinus Torvalds    things right finally.
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds    Future Features: providing search_by_key with hints.
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds */
2631da177e4SLinus Torvalds static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going to write into */
2641da177e4SLinus Torvalds 				   const char __user * buf,	/*  pointer to user supplied data
2651da177e4SLinus Torvalds 								   (in userspace) */
2661da177e4SLinus Torvalds 				   size_t count,	/* amount of bytes to write */
2671da177e4SLinus Torvalds 				   loff_t * ppos	/* pointer to position in file that we start writing at. Should be updated to
268bd4c625cSLinus Torvalds 							 * new current position before returning. */
269bd4c625cSLinus Torvalds 				   )
2701da177e4SLinus Torvalds {
2711fc5adbdSJosef Sipek 	struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to.
2721da177e4SLinus Torvalds 	/* To simplify coding at this time, we store
2731da177e4SLinus Torvalds 	   locked pages in array for now */
2741da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
2751da177e4SLinus Torvalds 	th.t_trans_id = 0;
2761da177e4SLinus Torvalds 
277fa385befSJeff Mahoney 	/* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
278fa385befSJeff Mahoney 	* lying around (most of the disk, in fact). Despite the filesystem
279fa385befSJeff Mahoney 	* now being a v3.6 format, the old items still can't support large
280fa385befSJeff Mahoney 	* file sizes. Catch this case here, as the rest of the VFS layer is
281fa385befSJeff Mahoney 	* oblivious to the different limitations between old and new items.
282fa385befSJeff Mahoney 	* reiserfs_setattr catches this for truncates. This chunk is lifted
283fa385befSJeff Mahoney 	* from generic_write_checks. */
284fa385befSJeff Mahoney 	if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
285fa385befSJeff Mahoney 	    *ppos + count > MAX_NON_LFS) {
286fa385befSJeff Mahoney 		if (*ppos >= MAX_NON_LFS) {
287fa385befSJeff Mahoney 			return -EFBIG;
288fa385befSJeff Mahoney 		}
289fa385befSJeff Mahoney 		if (count > MAX_NON_LFS - (unsigned long)*ppos)
290fa385befSJeff Mahoney 			count = MAX_NON_LFS - (unsigned long)*ppos;
291fa385befSJeff Mahoney 	}
292fa385befSJeff Mahoney 
293c5574768SVladimir V. Saveliev 	return do_sync_write(file, buf, count, ppos);
2941da177e4SLinus Torvalds }
2951da177e4SLinus Torvalds 
2964b6f5d20SArjan van de Ven const struct file_operations reiserfs_file_operations = {
297027445c3SBadari Pulavarty 	.read = do_sync_read,
2981da177e4SLinus Torvalds 	.write = reiserfs_file_write,
299205cb37bSFrederic Weisbecker 	.unlocked_ioctl = reiserfs_ioctl,
30052b499c4SDavid Howells #ifdef CONFIG_COMPAT
30152b499c4SDavid Howells 	.compat_ioctl = reiserfs_compat_ioctl,
30252b499c4SDavid Howells #endif
3030e4f6a79SAl Viro 	.mmap = generic_file_mmap,
3040e4f6a79SAl Viro 	.open = reiserfs_file_open,
3051da177e4SLinus Torvalds 	.release = reiserfs_file_release,
3061da177e4SLinus Torvalds 	.fsync = reiserfs_sync_file,
3071da177e4SLinus Torvalds 	.aio_read = generic_file_aio_read,
3089637f28fSAlexey Dobriyan 	.aio_write = generic_file_aio_write,
3095274f052SJens Axboe 	.splice_read = generic_file_splice_read,
3105274f052SJens Axboe 	.splice_write = generic_file_splice_write,
31191efc167SChristoph Hellwig 	.llseek = generic_file_llseek,
3121da177e4SLinus Torvalds };
3131da177e4SLinus Torvalds 
314c5ef1c42SArjan van de Ven const struct inode_operations reiserfs_file_inode_operations = {
3151da177e4SLinus Torvalds 	.truncate = reiserfs_vfs_truncate_file,
3161da177e4SLinus Torvalds 	.setattr = reiserfs_setattr,
3171da177e4SLinus Torvalds 	.setxattr = reiserfs_setxattr,
3181da177e4SLinus Torvalds 	.getxattr = reiserfs_getxattr,
3191da177e4SLinus Torvalds 	.listxattr = reiserfs_listxattr,
3201da177e4SLinus Torvalds 	.removexattr = reiserfs_removexattr,
3211da177e4SLinus Torvalds 	.permission = reiserfs_permission,
3224e34e719SChristoph Hellwig 	.get_acl = reiserfs_get_acl,
3231da177e4SLinus Torvalds };
324