xref: /openbmc/linux/fs/reiserfs/file.c (revision de21c57b90b3716f6f951e88e039d00ab6729ce9)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
31da177e4SLinus Torvalds  */
41da177e4SLinus Torvalds 
51da177e4SLinus Torvalds #include <linux/time.h>
61da177e4SLinus Torvalds #include <linux/reiserfs_fs.h>
71da177e4SLinus Torvalds #include <linux/reiserfs_acl.h>
81da177e4SLinus Torvalds #include <linux/reiserfs_xattr.h>
91da177e4SLinus Torvalds #include <linux/smp_lock.h>
101da177e4SLinus Torvalds #include <asm/uaccess.h>
111da177e4SLinus Torvalds #include <linux/pagemap.h>
121da177e4SLinus Torvalds #include <linux/swap.h>
131da177e4SLinus Torvalds #include <linux/writeback.h>
141da177e4SLinus Torvalds #include <linux/blkdev.h>
151da177e4SLinus Torvalds #include <linux/buffer_head.h>
161da177e4SLinus Torvalds #include <linux/quotaops.h>
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds /*
191da177e4SLinus Torvalds ** We pack the tails of files on file close, not at the time they are written.
201da177e4SLinus Torvalds ** This implies an unnecessary copy of the tail and an unnecessary indirect item
211da177e4SLinus Torvalds ** insertion/balancing, for files that are written in one write.
221da177e4SLinus Torvalds ** It avoids unnecessary tail packings (balances) for files that are written in
231da177e4SLinus Torvalds ** multiple writes and are small enough to have tails.
241da177e4SLinus Torvalds **
251da177e4SLinus Torvalds ** file_release is called by the VFS layer when the file is closed.  If
261da177e4SLinus Torvalds ** this is the last open file descriptor, and the file
271da177e4SLinus Torvalds ** small enough to have a tail, and the tail is currently in an
281da177e4SLinus Torvalds ** unformatted node, the tail is converted back into a direct item.
291da177e4SLinus Torvalds **
301da177e4SLinus Torvalds ** We use reiserfs_truncate_file to pack the tail, since it already has
311da177e4SLinus Torvalds ** all the conditions coded.
321da177e4SLinus Torvalds */
331da177e4SLinus Torvalds static int reiserfs_file_release(struct inode *inode, struct file *filp)
341da177e4SLinus Torvalds {
351da177e4SLinus Torvalds 
361da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
371da177e4SLinus Torvalds 	int err;
381da177e4SLinus Torvalds 	int jbegin_failure = 0;
391da177e4SLinus Torvalds 
4014a61442SEric Sesterhenn 	BUG_ON(!S_ISREG(inode->i_mode));
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	/* fast out for when nothing needs to be done */
431da177e4SLinus Torvalds 	if ((atomic_read(&inode->i_count) > 1 ||
441da177e4SLinus Torvalds 	     !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
451da177e4SLinus Torvalds 	     !tail_has_to_be_packed(inode)) &&
461da177e4SLinus Torvalds 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
471da177e4SLinus Torvalds 		return 0;
481da177e4SLinus Torvalds 	}
491da177e4SLinus Torvalds 
501b1dcc1bSJes Sorensen 	mutex_lock(&inode->i_mutex);
51b5f3953cSChris Mason 	reiserfs_write_lock(inode->i_sb);
521da177e4SLinus Torvalds 	/* freeing preallocation only involves relogging blocks that
531da177e4SLinus Torvalds 	 * are already in the current transaction.  preallocation gets
541da177e4SLinus Torvalds 	 * freed at the end of each transaction, so it is impossible for
551da177e4SLinus Torvalds 	 * us to log any additional blocks (including quota blocks)
561da177e4SLinus Torvalds 	 */
571da177e4SLinus Torvalds 	err = journal_begin(&th, inode->i_sb, 1);
581da177e4SLinus Torvalds 	if (err) {
591da177e4SLinus Torvalds 		/* uh oh, we can't allow the inode to go away while there
601da177e4SLinus Torvalds 		 * is still preallocation blocks pending.  Try to join the
611da177e4SLinus Torvalds 		 * aborted transaction
621da177e4SLinus Torvalds 		 */
631da177e4SLinus Torvalds 		jbegin_failure = err;
641da177e4SLinus Torvalds 		err = journal_join_abort(&th, inode->i_sb, 1);
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds 		if (err) {
671da177e4SLinus Torvalds 			/* hmpf, our choices here aren't good.  We can pin the inode
681da177e4SLinus Torvalds 			 * which will disallow unmount from every happening, we can
691da177e4SLinus Torvalds 			 * do nothing, which will corrupt random memory on unmount,
701da177e4SLinus Torvalds 			 * or we can forcibly remove the file from the preallocation
711da177e4SLinus Torvalds 			 * list, which will leak blocks on disk.  Lets pin the inode
721da177e4SLinus Torvalds 			 * and let the admin know what is going on.
731da177e4SLinus Torvalds 			 */
741da177e4SLinus Torvalds 			igrab(inode);
75bd4c625cSLinus Torvalds 			reiserfs_warning(inode->i_sb,
76bd4c625cSLinus Torvalds 					 "pinning inode %lu because the "
77533221fbSAlexey Dobriyan 					 "preallocation can't be freed",
78533221fbSAlexey Dobriyan 					 inode->i_ino);
791da177e4SLinus Torvalds 			goto out;
801da177e4SLinus Torvalds 		}
811da177e4SLinus Torvalds 	}
821da177e4SLinus Torvalds 	reiserfs_update_inode_transaction(inode);
831da177e4SLinus Torvalds 
841da177e4SLinus Torvalds #ifdef REISERFS_PREALLOCATE
851da177e4SLinus Torvalds 	reiserfs_discard_prealloc(&th, inode);
861da177e4SLinus Torvalds #endif
871da177e4SLinus Torvalds 	err = journal_end(&th, inode->i_sb, 1);
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds 	/* copy back the error code from journal_begin */
901da177e4SLinus Torvalds 	if (!err)
911da177e4SLinus Torvalds 		err = jbegin_failure;
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds 	if (!err && atomic_read(&inode->i_count) <= 1 &&
941da177e4SLinus Torvalds 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
951da177e4SLinus Torvalds 	    tail_has_to_be_packed(inode)) {
961da177e4SLinus Torvalds 		/* if regular file is released by last holder and it has been
971da177e4SLinus Torvalds 		   appended (we append by unformatted node only) or its direct
981da177e4SLinus Torvalds 		   item(s) had to be converted, then it may have to be
991da177e4SLinus Torvalds 		   indirect2direct converted */
1001da177e4SLinus Torvalds 		err = reiserfs_truncate_file(inode, 0);
1011da177e4SLinus Torvalds 	}
1021da177e4SLinus Torvalds       out:
1031b1dcc1bSJes Sorensen 	mutex_unlock(&inode->i_mutex);
1041da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
1051da177e4SLinus Torvalds 	return err;
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
108bd4c625cSLinus Torvalds static void reiserfs_vfs_truncate_file(struct inode *inode)
109bd4c625cSLinus Torvalds {
1101da177e4SLinus Torvalds 	reiserfs_truncate_file(inode, 1);
1111da177e4SLinus Torvalds }
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds /* Sync a reiserfs file. */
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds /*
1161da177e4SLinus Torvalds  * FIXME: sync_mapping_buffers() never has anything to sync.  Can
1171da177e4SLinus Torvalds  * be removed...
1181da177e4SLinus Torvalds  */
1191da177e4SLinus Torvalds 
120bd4c625cSLinus Torvalds static int reiserfs_sync_file(struct file *p_s_filp,
121bd4c625cSLinus Torvalds 			      struct dentry *p_s_dentry, int datasync)
122bd4c625cSLinus Torvalds {
1231da177e4SLinus Torvalds 	struct inode *p_s_inode = p_s_dentry->d_inode;
1241da177e4SLinus Torvalds 	int n_err;
1251da177e4SLinus Torvalds 	int barrier_done;
1261da177e4SLinus Torvalds 
12714a61442SEric Sesterhenn 	BUG_ON(!S_ISREG(p_s_inode->i_mode));
1281da177e4SLinus Torvalds 	n_err = sync_mapping_buffers(p_s_inode->i_mapping);
1291da177e4SLinus Torvalds 	reiserfs_write_lock(p_s_inode->i_sb);
1301da177e4SLinus Torvalds 	barrier_done = reiserfs_commit_for_inode(p_s_inode);
1311da177e4SLinus Torvalds 	reiserfs_write_unlock(p_s_inode->i_sb);
13225736b1cSChris Mason 	if (barrier_done != 1 && reiserfs_barrier_flush(p_s_inode->i_sb))
1331da177e4SLinus Torvalds 		blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
1341da177e4SLinus Torvalds 	if (barrier_done < 0)
1351da177e4SLinus Torvalds 		return barrier_done;
1361da177e4SLinus Torvalds 	return (n_err < 0) ? -EIO : 0;
1371da177e4SLinus Torvalds }
1381da177e4SLinus Torvalds 
1391da177e4SLinus Torvalds /* I really do not want to play with memory shortage right now, so
1401da177e4SLinus Torvalds    to simplify the code, we are not going to write more than this much pages at
1411da177e4SLinus Torvalds    a time. This still should considerably improve performance compared to 4k
1421da177e4SLinus Torvalds    at a time case. This is 32 pages of 4k size. */
1431da177e4SLinus Torvalds #define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds /* Allocates blocks for a file to fulfil write request.
1461da177e4SLinus Torvalds    Maps all unmapped but prepared pages from the list.
1471da177e4SLinus Torvalds    Updates metadata with newly allocated blocknumbers as needed */
148bd4c625cSLinus Torvalds static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode we work with */
1491da177e4SLinus Torvalds 					       loff_t pos,	/* Writing position */
1501da177e4SLinus Torvalds 					       int num_pages,	/* number of pages write going
1511da177e4SLinus Torvalds 								   to touch */
1521da177e4SLinus Torvalds 					       int write_bytes,	/* amount of bytes to write */
1531da177e4SLinus Torvalds 					       struct page **prepared_pages,	/* array of
1541da177e4SLinus Torvalds 										   prepared pages
1551da177e4SLinus Torvalds 										 */
1561da177e4SLinus Torvalds 					       int blocks_to_allocate	/* Amount of blocks we
1571da177e4SLinus Torvalds 									   need to allocate to
1581da177e4SLinus Torvalds 									   fit the data into file
1591da177e4SLinus Torvalds 									 */
1601da177e4SLinus Torvalds     )
1611da177e4SLinus Torvalds {
1621da177e4SLinus Torvalds 	struct cpu_key key;	// cpu key of item that we are going to deal with
1631da177e4SLinus Torvalds 	struct item_head *ih;	// pointer to item head that we are going to deal with
1641da177e4SLinus Torvalds 	struct buffer_head *bh;	// Buffer head that contains items that we are going to deal with
1653e8962beSAl Viro 	__le32 *item;		// pointer to item we are going to deal with
1661da177e4SLinus Torvalds 	INITIALIZE_PATH(path);	// path to item, that we are going to deal with.
1671da177e4SLinus Torvalds 	b_blocknr_t *allocated_blocks;	// Pointer to a place where allocated blocknumbers would be stored.
1681da177e4SLinus Torvalds 	reiserfs_blocknr_hint_t hint;	// hint structure for block allocator.
1691da177e4SLinus Torvalds 	size_t res;		// return value of various functions that we call.
1701da177e4SLinus Torvalds 	int curr_block;		// current block used to keep track of unmapped blocks.
1711da177e4SLinus Torvalds 	int i;			// loop counter
1721da177e4SLinus Torvalds 	int itempos;		// position in item
1731da177e4SLinus Torvalds 	unsigned int from = (pos & (PAGE_CACHE_SIZE - 1));	// writing position in
1741da177e4SLinus Torvalds 	// first page
1751da177e4SLinus Torvalds 	unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;	/* last modified byte offset in last page */
1761da177e4SLinus Torvalds 	__u64 hole_size;	// amount of blocks for a file hole, if it needed to be created.
1771da177e4SLinus Torvalds 	int modifying_this_item = 0;	// Flag for items traversal code to keep track
1781da177e4SLinus Torvalds 	// of the fact that we already prepared
1791da177e4SLinus Torvalds 	// current block for journal
1801da177e4SLinus Torvalds 	int will_prealloc = 0;
181bd4c625cSLinus Torvalds 	RFALSE(!blocks_to_allocate,
182bd4c625cSLinus Torvalds 	       "green-9004: tried to allocate zero blocks?");
1831da177e4SLinus Torvalds 
1841da177e4SLinus Torvalds 	/* only preallocate if this is a small write */
1851da177e4SLinus Torvalds 	if (REISERFS_I(inode)->i_prealloc_count ||
1861da177e4SLinus Torvalds 	    (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
1871da177e4SLinus Torvalds 	     blocks_to_allocate <
1881da177e4SLinus Torvalds 	     REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
189bd4c625cSLinus Torvalds 		will_prealloc =
190bd4c625cSLinus Torvalds 		    REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
1911da177e4SLinus Torvalds 
1921da177e4SLinus Torvalds 	allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
1931da177e4SLinus Torvalds 				   sizeof(b_blocknr_t), GFP_NOFS);
194e5dd259fSDiego Calleja 	if (!allocated_blocks)
195e5dd259fSDiego Calleja 		return -ENOMEM;
1961da177e4SLinus Torvalds 
1971da177e4SLinus Torvalds 	/* First we compose a key to point at the writing position, we want to do
1981da177e4SLinus Torvalds 	   that outside of any locking region. */
1991da177e4SLinus Torvalds 	make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
2001da177e4SLinus Torvalds 
2011da177e4SLinus Torvalds 	/* If we came here, it means we absolutely need to open a transaction,
2021da177e4SLinus Torvalds 	   since we need to allocate some blocks */
2031da177e4SLinus Torvalds 	reiserfs_write_lock(inode->i_sb);	// Journaling stuff and we need that.
204556a2a45SJan Kara 	res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));	// Wish I know if this number enough
2051da177e4SLinus Torvalds 	if (res)
2061da177e4SLinus Torvalds 		goto error_exit;
2071da177e4SLinus Torvalds 	reiserfs_update_inode_transaction(inode);
2081da177e4SLinus Torvalds 
2091da177e4SLinus Torvalds 	/* Look for the in-tree position of our write, need path for block allocator */
2101da177e4SLinus Torvalds 	res = search_for_position_by_key(inode->i_sb, &key, &path);
2111da177e4SLinus Torvalds 	if (res == IO_ERROR) {
2121da177e4SLinus Torvalds 		res = -EIO;
2131da177e4SLinus Torvalds 		goto error_exit;
2141da177e4SLinus Torvalds 	}
2151da177e4SLinus Torvalds 
2161da177e4SLinus Torvalds 	/* Allocate blocks */
2171da177e4SLinus Torvalds 	/* First fill in "hint" structure for block allocator */
2181da177e4SLinus Torvalds 	hint.th = th;		// transaction handle.
2191da177e4SLinus Torvalds 	hint.path = &path;	// Path, so that block allocator can determine packing locality or whatever it needs to determine.
2201da177e4SLinus Torvalds 	hint.inode = inode;	// Inode is needed by block allocator too.
2211da177e4SLinus Torvalds 	hint.search_start = 0;	// We have no hint on where to search free blocks for block allocator.
2221da177e4SLinus Torvalds 	hint.key = key.on_disk_key;	// on disk key of file.
2231da177e4SLinus Torvalds 	hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);	// Number of disk blocks this file occupies already.
2241da177e4SLinus Torvalds 	hint.formatted_node = 0;	// We are allocating blocks for unformatted node.
2251da177e4SLinus Torvalds 	hint.preallocate = will_prealloc;
2261da177e4SLinus Torvalds 
2271da177e4SLinus Torvalds 	/* Call block allocator to allocate blocks */
228bd4c625cSLinus Torvalds 	res =
229bd4c625cSLinus Torvalds 	    reiserfs_allocate_blocknrs(&hint, allocated_blocks,
230bd4c625cSLinus Torvalds 				       blocks_to_allocate, blocks_to_allocate);
2311da177e4SLinus Torvalds 	if (res != CARRY_ON) {
2321da177e4SLinus Torvalds 		if (res == NO_DISK_SPACE) {
2331da177e4SLinus Torvalds 			/* We flush the transaction in case of no space. This way some
2341da177e4SLinus Torvalds 			   blocks might become free */
2351da177e4SLinus Torvalds 			SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
2361da177e4SLinus Torvalds 			res = restart_transaction(th, inode, &path);
2371da177e4SLinus Torvalds 			if (res)
2381da177e4SLinus Torvalds 				goto error_exit;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 			/* We might have scheduled, so search again */
241bd4c625cSLinus Torvalds 			res =
242bd4c625cSLinus Torvalds 			    search_for_position_by_key(inode->i_sb, &key,
243bd4c625cSLinus Torvalds 						       &path);
2441da177e4SLinus Torvalds 			if (res == IO_ERROR) {
2451da177e4SLinus Torvalds 				res = -EIO;
2461da177e4SLinus Torvalds 				goto error_exit;
2471da177e4SLinus Torvalds 			}
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 			/* update changed info for hint structure. */
250bd4c625cSLinus Torvalds 			res =
251bd4c625cSLinus Torvalds 			    reiserfs_allocate_blocknrs(&hint, allocated_blocks,
252bd4c625cSLinus Torvalds 						       blocks_to_allocate,
253bd4c625cSLinus Torvalds 						       blocks_to_allocate);
2541da177e4SLinus Torvalds 			if (res != CARRY_ON) {
2550ad74ffaSJan Kara 				res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
2561da177e4SLinus Torvalds 				pathrelse(&path);
2571da177e4SLinus Torvalds 				goto error_exit;
2581da177e4SLinus Torvalds 			}
2591da177e4SLinus Torvalds 		} else {
2600ad74ffaSJan Kara 			res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
2611da177e4SLinus Torvalds 			pathrelse(&path);
2621da177e4SLinus Torvalds 			goto error_exit;
2631da177e4SLinus Torvalds 		}
2641da177e4SLinus Torvalds 	}
2651da177e4SLinus Torvalds #ifdef __BIG_ENDIAN
2661da177e4SLinus Torvalds 	// Too bad, I have not found any way to convert a given region from
2671da177e4SLinus Torvalds 	// cpu format to little endian format
2681da177e4SLinus Torvalds 	{
2691da177e4SLinus Torvalds 		int i;
2701da177e4SLinus Torvalds 		for (i = 0; i < blocks_to_allocate; i++)
2711da177e4SLinus Torvalds 			allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
2721da177e4SLinus Torvalds 	}
2731da177e4SLinus Torvalds #endif
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds 	/* Blocks allocating well might have scheduled and tree might have changed,
2761da177e4SLinus Torvalds 	   let's search the tree again */
2771da177e4SLinus Torvalds 	/* find where in the tree our write should go */
2781da177e4SLinus Torvalds 	res = search_for_position_by_key(inode->i_sb, &key, &path);
2791da177e4SLinus Torvalds 	if (res == IO_ERROR) {
2801da177e4SLinus Torvalds 		res = -EIO;
2811da177e4SLinus Torvalds 		goto error_exit_free_blocks;
2821da177e4SLinus Torvalds 	}
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	bh = get_last_bh(&path);	// Get a bufferhead for last element in path.
2851da177e4SLinus Torvalds 	ih = get_ih(&path);	// Get a pointer to last item head in path.
2861da177e4SLinus Torvalds 	item = get_item(&path);	// Get a pointer to last item in path
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	/* Let's see what we have found */
2891da177e4SLinus Torvalds 	if (res != POSITION_FOUND) {	/* position not found, this means that we
2901da177e4SLinus Torvalds 					   might need to append file with holes
2911da177e4SLinus Torvalds 					   first */
2921da177e4SLinus Torvalds 		// Since we are writing past the file's end, we need to find out if
2931da177e4SLinus Torvalds 		// there is a hole that needs to be inserted before our writing
2941da177e4SLinus Torvalds 		// position, and how many blocks it is going to cover (we need to
2951da177e4SLinus Torvalds 		//  populate pointers to file blocks representing the hole with zeros)
2961da177e4SLinus Torvalds 
2971da177e4SLinus Torvalds 		{
2981da177e4SLinus Torvalds 			int item_offset = 1;
2991da177e4SLinus Torvalds 			/*
3001da177e4SLinus Torvalds 			 * if ih is stat data, its offset is 0 and we don't want to
3011da177e4SLinus Torvalds 			 * add 1 to pos in the hole_size calculation
3021da177e4SLinus Torvalds 			 */
3031da177e4SLinus Torvalds 			if (is_statdata_le_ih(ih))
3041da177e4SLinus Torvalds 				item_offset = 0;
3051da177e4SLinus Torvalds 			hole_size = (pos + item_offset -
306bd4c625cSLinus Torvalds 				     (le_key_k_offset
307bd4c625cSLinus Torvalds 				      (get_inode_item_key_version(inode),
308bd4c625cSLinus Torvalds 				       &(ih->ih_key)) + op_bytes_number(ih,
309bd4c625cSLinus Torvalds 									inode->
310bd4c625cSLinus Torvalds 									i_sb->
311bd4c625cSLinus Torvalds 									s_blocksize)))
312bd4c625cSLinus Torvalds 			    >> inode->i_sb->s_blocksize_bits;
3131da177e4SLinus Torvalds 		}
3141da177e4SLinus Torvalds 
3151da177e4SLinus Torvalds 		if (hole_size > 0) {
3161da177e4SLinus Torvalds 			int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE);	// How much data to insert first time.
3171da177e4SLinus Torvalds 			/* area filled with zeroes, to supply as list of zero blocknumbers
3181da177e4SLinus Torvalds 			   We allocate it outside of loop just in case loop would spin for
3191da177e4SLinus Torvalds 			   several iterations. */
3201da177e4SLinus Torvalds 			char *zeros = kmalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
3211da177e4SLinus Torvalds 			if (!zeros) {
3221da177e4SLinus Torvalds 				res = -ENOMEM;
3231da177e4SLinus Torvalds 				goto error_exit_free_blocks;
3241da177e4SLinus Torvalds 			}
3251da177e4SLinus Torvalds 			memset(zeros, 0, to_paste * UNFM_P_SIZE);
3261da177e4SLinus Torvalds 			do {
327bd4c625cSLinus Torvalds 				to_paste =
328bd4c625cSLinus Torvalds 				    min_t(__u64, hole_size,
329bd4c625cSLinus Torvalds 					  MAX_ITEM_LEN(inode->i_sb->
330bd4c625cSLinus Torvalds 						       s_blocksize) /
331bd4c625cSLinus Torvalds 					  UNFM_P_SIZE);
3321da177e4SLinus Torvalds 				if (is_indirect_le_ih(ih)) {
3331da177e4SLinus Torvalds 					/* Ok, there is existing indirect item already. Need to append it */
3341da177e4SLinus Torvalds 					/* Calculate position past inserted item */
335bd4c625cSLinus Torvalds 					make_cpu_key(&key, inode,
336bd4c625cSLinus Torvalds 						     le_key_k_offset
337bd4c625cSLinus Torvalds 						     (get_inode_item_key_version
338bd4c625cSLinus Torvalds 						      (inode),
339bd4c625cSLinus Torvalds 						      &(ih->ih_key)) +
340bd4c625cSLinus Torvalds 						     op_bytes_number(ih,
341bd4c625cSLinus Torvalds 								     inode->
342bd4c625cSLinus Torvalds 								     i_sb->
343bd4c625cSLinus Torvalds 								     s_blocksize),
344bd4c625cSLinus Torvalds 						     TYPE_INDIRECT, 3);
345bd4c625cSLinus Torvalds 					res =
346bd4c625cSLinus Torvalds 					    reiserfs_paste_into_item(th, &path,
347bd4c625cSLinus Torvalds 								     &key,
348bd4c625cSLinus Torvalds 								     inode,
349bd4c625cSLinus Torvalds 								     (char *)
350bd4c625cSLinus Torvalds 								     zeros,
351bd4c625cSLinus Torvalds 								     UNFM_P_SIZE
352bd4c625cSLinus Torvalds 								     *
353bd4c625cSLinus Torvalds 								     to_paste);
3541da177e4SLinus Torvalds 					if (res) {
3551da177e4SLinus Torvalds 						kfree(zeros);
3561da177e4SLinus Torvalds 						goto error_exit_free_blocks;
3571da177e4SLinus Torvalds 					}
3581da177e4SLinus Torvalds 				} else if (is_statdata_le_ih(ih)) {
3591da177e4SLinus Torvalds 					/* No existing item, create it */
3601da177e4SLinus Torvalds 					/* item head for new item */
3611da177e4SLinus Torvalds 					struct item_head ins_ih;
3621da177e4SLinus Torvalds 
3631da177e4SLinus Torvalds 					/* create a key for our new item */
364bd4c625cSLinus Torvalds 					make_cpu_key(&key, inode, 1,
365bd4c625cSLinus Torvalds 						     TYPE_INDIRECT, 3);
3661da177e4SLinus Torvalds 
3671da177e4SLinus Torvalds 					/* Create new item head for our new item */
368bd4c625cSLinus Torvalds 					make_le_item_head(&ins_ih, &key,
369bd4c625cSLinus Torvalds 							  key.version, 1,
370bd4c625cSLinus Torvalds 							  TYPE_INDIRECT,
371bd4c625cSLinus Torvalds 							  to_paste *
372bd4c625cSLinus Torvalds 							  UNFM_P_SIZE,
3731da177e4SLinus Torvalds 							  0 /* free space */ );
3741da177e4SLinus Torvalds 
3751da177e4SLinus Torvalds 					/* Find where such item should live in the tree */
376bd4c625cSLinus Torvalds 					res =
377bd4c625cSLinus Torvalds 					    search_item(inode->i_sb, &key,
378bd4c625cSLinus Torvalds 							&path);
3791da177e4SLinus Torvalds 					if (res != ITEM_NOT_FOUND) {
3801da177e4SLinus Torvalds 						/* item should not exist, otherwise we have error */
3811da177e4SLinus Torvalds 						if (res != -ENOSPC) {
382bd4c625cSLinus Torvalds 							reiserfs_warning(inode->
383bd4c625cSLinus Torvalds 									 i_sb,
3841da177e4SLinus Torvalds 									 "green-9008: search_by_key (%K) returned %d",
385bd4c625cSLinus Torvalds 									 &key,
386bd4c625cSLinus Torvalds 									 res);
3871da177e4SLinus Torvalds 						}
3881da177e4SLinus Torvalds 						res = -EIO;
3891da177e4SLinus Torvalds 						kfree(zeros);
3901da177e4SLinus Torvalds 						goto error_exit_free_blocks;
3911da177e4SLinus Torvalds 					}
392bd4c625cSLinus Torvalds 					res =
393bd4c625cSLinus Torvalds 					    reiserfs_insert_item(th, &path,
394bd4c625cSLinus Torvalds 								 &key, &ins_ih,
395bd4c625cSLinus Torvalds 								 inode,
396bd4c625cSLinus Torvalds 								 (char *)zeros);
3971da177e4SLinus Torvalds 				} else {
398bd4c625cSLinus Torvalds 					reiserfs_panic(inode->i_sb,
399bd4c625cSLinus Torvalds 						       "green-9011: Unexpected key type %K\n",
400bd4c625cSLinus Torvalds 						       &key);
4011da177e4SLinus Torvalds 				}
4021da177e4SLinus Torvalds 				if (res) {
4031da177e4SLinus Torvalds 					kfree(zeros);
4041da177e4SLinus Torvalds 					goto error_exit_free_blocks;
4051da177e4SLinus Torvalds 				}
4061da177e4SLinus Torvalds 				/* Now we want to check if transaction is too full, and if it is
4071da177e4SLinus Torvalds 				   we restart it. This will also free the path. */
408bd4c625cSLinus Torvalds 				if (journal_transaction_should_end
409bd4c625cSLinus Torvalds 				    (th, th->t_blocks_allocated)) {
410bd4c625cSLinus Torvalds 					res =
411bd4c625cSLinus Torvalds 					    restart_transaction(th, inode,
412bd4c625cSLinus Torvalds 								&path);
4131da177e4SLinus Torvalds 					if (res) {
4141da177e4SLinus Torvalds 						pathrelse(&path);
4151da177e4SLinus Torvalds 						kfree(zeros);
4161da177e4SLinus Torvalds 						goto error_exit;
4171da177e4SLinus Torvalds 					}
4181da177e4SLinus Torvalds 				}
4191da177e4SLinus Torvalds 
4201da177e4SLinus Torvalds 				/* Well, need to recalculate path and stuff */
421bd4c625cSLinus Torvalds 				set_cpu_key_k_offset(&key,
422bd4c625cSLinus Torvalds 						     cpu_key_k_offset(&key) +
423bd4c625cSLinus Torvalds 						     (to_paste << inode->
424bd4c625cSLinus Torvalds 						      i_blkbits));
425bd4c625cSLinus Torvalds 				res =
426bd4c625cSLinus Torvalds 				    search_for_position_by_key(inode->i_sb,
427bd4c625cSLinus Torvalds 							       &key, &path);
4281da177e4SLinus Torvalds 				if (res == IO_ERROR) {
4291da177e4SLinus Torvalds 					res = -EIO;
4301da177e4SLinus Torvalds 					kfree(zeros);
4311da177e4SLinus Torvalds 					goto error_exit_free_blocks;
4321da177e4SLinus Torvalds 				}
4331da177e4SLinus Torvalds 				bh = get_last_bh(&path);
4341da177e4SLinus Torvalds 				ih = get_ih(&path);
4351da177e4SLinus Torvalds 				item = get_item(&path);
4361da177e4SLinus Torvalds 				hole_size -= to_paste;
4371da177e4SLinus Torvalds 			} while (hole_size);
4381da177e4SLinus Torvalds 			kfree(zeros);
4391da177e4SLinus Torvalds 		}
4401da177e4SLinus Torvalds 	}
4411da177e4SLinus Torvalds 	// Go through existing indirect items first
4421da177e4SLinus Torvalds 	// replace all zeroes with blocknumbers from list
4431da177e4SLinus Torvalds 	// Note that if no corresponding item was found, by previous search,
4441da177e4SLinus Torvalds 	// it means there are no existing in-tree representation for file area
4451da177e4SLinus Torvalds 	// we are going to overwrite, so there is nothing to scan through for holes.
446bd4c625cSLinus Torvalds 	for (curr_block = 0, itempos = path.pos_in_item;
447bd4c625cSLinus Torvalds 	     curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
4481da177e4SLinus Torvalds 	      retry:
4491da177e4SLinus Torvalds 
4501da177e4SLinus Torvalds 		if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
4511da177e4SLinus Torvalds 			/* We run out of data in this indirect item, let's look for another
4521da177e4SLinus Torvalds 			   one. */
4531da177e4SLinus Torvalds 			/* First if we are already modifying current item, log it */
4541da177e4SLinus Torvalds 			if (modifying_this_item) {
4551da177e4SLinus Torvalds 				journal_mark_dirty(th, inode->i_sb, bh);
4561da177e4SLinus Torvalds 				modifying_this_item = 0;
4571da177e4SLinus Torvalds 			}
4581da177e4SLinus Torvalds 			/* Then set the key to look for a new indirect item (offset of old
4591da177e4SLinus Torvalds 			   item is added to old item length */
460bd4c625cSLinus Torvalds 			set_cpu_key_k_offset(&key,
461bd4c625cSLinus Torvalds 					     le_key_k_offset
462bd4c625cSLinus Torvalds 					     (get_inode_item_key_version(inode),
463bd4c625cSLinus Torvalds 					      &(ih->ih_key)) +
464bd4c625cSLinus Torvalds 					     op_bytes_number(ih,
465bd4c625cSLinus Torvalds 							     inode->i_sb->
466bd4c625cSLinus Torvalds 							     s_blocksize));
4671da177e4SLinus Torvalds 			/* Search ofor position of new key in the tree. */
468bd4c625cSLinus Torvalds 			res =
469bd4c625cSLinus Torvalds 			    search_for_position_by_key(inode->i_sb, &key,
470bd4c625cSLinus Torvalds 						       &path);
4711da177e4SLinus Torvalds 			if (res == IO_ERROR) {
4721da177e4SLinus Torvalds 				res = -EIO;
4731da177e4SLinus Torvalds 				goto error_exit_free_blocks;
4741da177e4SLinus Torvalds 			}
4751da177e4SLinus Torvalds 			bh = get_last_bh(&path);
4761da177e4SLinus Torvalds 			ih = get_ih(&path);
4771da177e4SLinus Torvalds 			item = get_item(&path);
4781da177e4SLinus Torvalds 			itempos = path.pos_in_item;
4791da177e4SLinus Torvalds 			continue;	// loop to check all kinds of conditions and so on.
4801da177e4SLinus Torvalds 		}
4811da177e4SLinus Torvalds 		/* Ok, we have correct position in item now, so let's see if it is
4821da177e4SLinus Torvalds 		   representing file hole (blocknumber is zero) and fill it if needed */
4831da177e4SLinus Torvalds 		if (!item[itempos]) {
4841da177e4SLinus Torvalds 			/* Ok, a hole. Now we need to check if we already prepared this
4851da177e4SLinus Torvalds 			   block to be journaled */
4861da177e4SLinus Torvalds 			while (!modifying_this_item) {	// loop until succeed
4871da177e4SLinus Torvalds 				/* Well, this item is not journaled yet, so we must prepare
4881da177e4SLinus Torvalds 				   it for journal first, before we can change it */
4891da177e4SLinus Torvalds 				struct item_head tmp_ih;	// We copy item head of found item,
4901da177e4SLinus Torvalds 				// here to detect if fs changed under
4911da177e4SLinus Torvalds 				// us while we were preparing for
4921da177e4SLinus Torvalds 				// journal.
4931da177e4SLinus Torvalds 				int fs_gen;	// We store fs generation here to find if someone
4941da177e4SLinus Torvalds 				// changes fs under our feet
4951da177e4SLinus Torvalds 
4961da177e4SLinus Torvalds 				copy_item_head(&tmp_ih, ih);	// Remember itemhead
4971da177e4SLinus Torvalds 				fs_gen = get_generation(inode->i_sb);	// remember fs generation
4981da177e4SLinus Torvalds 				reiserfs_prepare_for_journal(inode->i_sb, bh, 1);	// Prepare a buffer within which indirect item is stored for changing.
499bd4c625cSLinus Torvalds 				if (fs_changed(fs_gen, inode->i_sb)
500bd4c625cSLinus Torvalds 				    && item_moved(&tmp_ih, &path)) {
5011da177e4SLinus Torvalds 					// Sigh, fs was changed under us, we need to look for new
5021da177e4SLinus Torvalds 					// location of item we are working with
5031da177e4SLinus Torvalds 
5041da177e4SLinus Torvalds 					/* unmark prepaerd area as journaled and search for it's
5051da177e4SLinus Torvalds 					   new position */
506bd4c625cSLinus Torvalds 					reiserfs_restore_prepared_buffer(inode->
507bd4c625cSLinus Torvalds 									 i_sb,
508bd4c625cSLinus Torvalds 									 bh);
509bd4c625cSLinus Torvalds 					res =
510bd4c625cSLinus Torvalds 					    search_for_position_by_key(inode->
511bd4c625cSLinus Torvalds 								       i_sb,
512bd4c625cSLinus Torvalds 								       &key,
513bd4c625cSLinus Torvalds 								       &path);
5141da177e4SLinus Torvalds 					if (res == IO_ERROR) {
5151da177e4SLinus Torvalds 						res = -EIO;
5161da177e4SLinus Torvalds 						goto error_exit_free_blocks;
5171da177e4SLinus Torvalds 					}
5181da177e4SLinus Torvalds 					bh = get_last_bh(&path);
5191da177e4SLinus Torvalds 					ih = get_ih(&path);
5201da177e4SLinus Torvalds 					item = get_item(&path);
5211da177e4SLinus Torvalds 					itempos = path.pos_in_item;
5221da177e4SLinus Torvalds 					goto retry;
5231da177e4SLinus Torvalds 				}
5241da177e4SLinus Torvalds 				modifying_this_item = 1;
5251da177e4SLinus Torvalds 			}
5261da177e4SLinus Torvalds 			item[itempos] = allocated_blocks[curr_block];	// Assign new block
5271da177e4SLinus Torvalds 			curr_block++;
5281da177e4SLinus Torvalds 		}
5291da177e4SLinus Torvalds 		itempos++;
5301da177e4SLinus Torvalds 	}
5311da177e4SLinus Torvalds 
5321da177e4SLinus Torvalds 	if (modifying_this_item) {	// We need to log last-accessed block, if it
5331da177e4SLinus Torvalds 		// was modified, but not logged yet.
5341da177e4SLinus Torvalds 		journal_mark_dirty(th, inode->i_sb, bh);
5351da177e4SLinus Torvalds 	}
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds 	if (curr_block < blocks_to_allocate) {
5381da177e4SLinus Torvalds 		// Oh, well need to append to indirect item, or to create indirect item
5391da177e4SLinus Torvalds 		// if there weren't any
5401da177e4SLinus Torvalds 		if (is_indirect_le_ih(ih)) {
5411da177e4SLinus Torvalds 			// Existing indirect item - append. First calculate key for append
5421da177e4SLinus Torvalds 			// position. We do not need to recalculate path as it should
5431da177e4SLinus Torvalds 			// already point to correct place.
544bd4c625cSLinus Torvalds 			make_cpu_key(&key, inode,
545bd4c625cSLinus Torvalds 				     le_key_k_offset(get_inode_item_key_version
546bd4c625cSLinus Torvalds 						     (inode),
547bd4c625cSLinus Torvalds 						     &(ih->ih_key)) +
548bd4c625cSLinus Torvalds 				     op_bytes_number(ih,
549bd4c625cSLinus Torvalds 						     inode->i_sb->s_blocksize),
550bd4c625cSLinus Torvalds 				     TYPE_INDIRECT, 3);
551bd4c625cSLinus Torvalds 			res =
552bd4c625cSLinus Torvalds 			    reiserfs_paste_into_item(th, &path, &key, inode,
553bd4c625cSLinus Torvalds 						     (char *)(allocated_blocks +
554bd4c625cSLinus Torvalds 							      curr_block),
555bd4c625cSLinus Torvalds 						     UNFM_P_SIZE *
556bd4c625cSLinus Torvalds 						     (blocks_to_allocate -
557bd4c625cSLinus Torvalds 						      curr_block));
5581da177e4SLinus Torvalds 			if (res) {
5591da177e4SLinus Torvalds 				goto error_exit_free_blocks;
5601da177e4SLinus Torvalds 			}
5611da177e4SLinus Torvalds 		} else if (is_statdata_le_ih(ih)) {
5621da177e4SLinus Torvalds 			// Last found item was statdata. That means we need to create indirect item.
5631da177e4SLinus Torvalds 			struct item_head ins_ih;	/* itemhead for new item */
5641da177e4SLinus Torvalds 
5651da177e4SLinus Torvalds 			/* create a key for our new item */
5661da177e4SLinus Torvalds 			make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3);	// Position one,
5671da177e4SLinus Torvalds 			// because that's
5681da177e4SLinus Torvalds 			// where first
5691da177e4SLinus Torvalds 			// indirect item
5701da177e4SLinus Torvalds 			// begins
5711da177e4SLinus Torvalds 			/* Create new item head for our new item */
572bd4c625cSLinus Torvalds 			make_le_item_head(&ins_ih, &key, key.version, 1,
573bd4c625cSLinus Torvalds 					  TYPE_INDIRECT,
574bd4c625cSLinus Torvalds 					  (blocks_to_allocate -
575bd4c625cSLinus Torvalds 					   curr_block) * UNFM_P_SIZE,
5761da177e4SLinus Torvalds 					  0 /* free space */ );
5771da177e4SLinus Torvalds 			/* Find where such item should live in the tree */
5781da177e4SLinus Torvalds 			res = search_item(inode->i_sb, &key, &path);
5791da177e4SLinus Torvalds 			if (res != ITEM_NOT_FOUND) {
5801da177e4SLinus Torvalds 				/* Well, if we have found such item already, or some error
5811da177e4SLinus Torvalds 				   occured, we need to warn user and return error */
5821da177e4SLinus Torvalds 				if (res != -ENOSPC) {
5831da177e4SLinus Torvalds 					reiserfs_warning(inode->i_sb,
5841da177e4SLinus Torvalds 							 "green-9009: search_by_key (%K) "
585bd4c625cSLinus Torvalds 							 "returned %d", &key,
586bd4c625cSLinus Torvalds 							 res);
5871da177e4SLinus Torvalds 				}
5881da177e4SLinus Torvalds 				res = -EIO;
5891da177e4SLinus Torvalds 				goto error_exit_free_blocks;
5901da177e4SLinus Torvalds 			}
5911da177e4SLinus Torvalds 			/* Insert item into the tree with the data as its body */
592bd4c625cSLinus Torvalds 			res =
593bd4c625cSLinus Torvalds 			    reiserfs_insert_item(th, &path, &key, &ins_ih,
594bd4c625cSLinus Torvalds 						 inode,
595bd4c625cSLinus Torvalds 						 (char *)(allocated_blocks +
596bd4c625cSLinus Torvalds 							  curr_block));
5971da177e4SLinus Torvalds 		} else {
598bd4c625cSLinus Torvalds 			reiserfs_panic(inode->i_sb,
599bd4c625cSLinus Torvalds 				       "green-9010: unexpected item type for key %K\n",
600bd4c625cSLinus Torvalds 				       &key);
6011da177e4SLinus Torvalds 		}
6021da177e4SLinus Torvalds 	}
6031da177e4SLinus Torvalds 	// the caller is responsible for closing the transaction
6041da177e4SLinus Torvalds 	// unless we return an error, they are also responsible for logging
6051da177e4SLinus Torvalds 	// the inode.
6061da177e4SLinus Torvalds 	//
6071da177e4SLinus Torvalds 	pathrelse(&path);
6081da177e4SLinus Torvalds 	/*
6091da177e4SLinus Torvalds 	 * cleanup prellocation from previous writes
6101da177e4SLinus Torvalds 	 * if this is a partial block write
6111da177e4SLinus Torvalds 	 */
6121da177e4SLinus Torvalds 	if (write_bytes & (inode->i_sb->s_blocksize - 1))
6131da177e4SLinus Torvalds 		reiserfs_discard_prealloc(th, inode);
6141da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 	// go through all the pages/buffers and map the buffers to newly allocated
6171da177e4SLinus Torvalds 	// blocks (so that system knows where to write these pages later).
6181da177e4SLinus Torvalds 	curr_block = 0;
6191da177e4SLinus Torvalds 	for (i = 0; i < num_pages; i++) {
6201da177e4SLinus Torvalds 		struct page *page = prepared_pages[i];	//current page
6211da177e4SLinus Torvalds 		struct buffer_head *head = page_buffers(page);	// first buffer for a page
6221da177e4SLinus Torvalds 		int block_start, block_end;	// in-page offsets for buffers.
6231da177e4SLinus Torvalds 
6241da177e4SLinus Torvalds 		if (!page_buffers(page))
625bd4c625cSLinus Torvalds 			reiserfs_panic(inode->i_sb,
626bd4c625cSLinus Torvalds 				       "green-9005: No buffers for prepared page???");
6271da177e4SLinus Torvalds 
6281da177e4SLinus Torvalds 		/* For each buffer in page */
6291da177e4SLinus Torvalds 		for (bh = head, block_start = 0; bh != head || !block_start;
6301da177e4SLinus Torvalds 		     block_start = block_end, bh = bh->b_this_page) {
6311da177e4SLinus Torvalds 			if (!bh)
632bd4c625cSLinus Torvalds 				reiserfs_panic(inode->i_sb,
633bd4c625cSLinus Torvalds 					       "green-9006: Allocated but absent buffer for a page?");
6341da177e4SLinus Torvalds 			block_end = block_start + inode->i_sb->s_blocksize;
6351da177e4SLinus Torvalds 			if (i == 0 && block_end <= from)
6361da177e4SLinus Torvalds 				/* if this buffer is before requested data to map, skip it */
6371da177e4SLinus Torvalds 				continue;
6381da177e4SLinus Torvalds 			if (i == num_pages - 1 && block_start >= to)
6391da177e4SLinus Torvalds 				/* If this buffer is after requested data to map, abort
6401da177e4SLinus Torvalds 				   processing of current page */
6411da177e4SLinus Torvalds 				break;
6421da177e4SLinus Torvalds 
6431da177e4SLinus Torvalds 			if (!buffer_mapped(bh)) {	// Ok, unmapped buffer, need to map it
644bd4c625cSLinus Torvalds 				map_bh(bh, inode->i_sb,
645bd4c625cSLinus Torvalds 				       le32_to_cpu(allocated_blocks
646bd4c625cSLinus Torvalds 						   [curr_block]));
6471da177e4SLinus Torvalds 				curr_block++;
6481da177e4SLinus Torvalds 				set_buffer_new(bh);
6491da177e4SLinus Torvalds 			}
6501da177e4SLinus Torvalds 		}
6511da177e4SLinus Torvalds 	}
6521da177e4SLinus Torvalds 
653bd4c625cSLinus Torvalds 	RFALSE(curr_block > blocks_to_allocate,
654bd4c625cSLinus Torvalds 	       "green-9007: Used too many blocks? weird");
6551da177e4SLinus Torvalds 
6561da177e4SLinus Torvalds 	kfree(allocated_blocks);
6571da177e4SLinus Torvalds 	return 0;
6581da177e4SLinus Torvalds 
6591da177e4SLinus Torvalds // Need to deal with transaction here.
6601da177e4SLinus Torvalds       error_exit_free_blocks:
6611da177e4SLinus Torvalds 	pathrelse(&path);
6621da177e4SLinus Torvalds 	// free blocks
6631da177e4SLinus Torvalds 	for (i = 0; i < blocks_to_allocate; i++)
664bd4c625cSLinus Torvalds 		reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
665bd4c625cSLinus Torvalds 				    1);
6661da177e4SLinus Torvalds 
6671da177e4SLinus Torvalds       error_exit:
6681da177e4SLinus Torvalds 	if (th->t_trans_id) {
6691da177e4SLinus Torvalds 		int err;
6701da177e4SLinus Torvalds 		// update any changes we made to blk count
6719f03783cSChris Mason 		mark_inode_dirty(inode);
672bd4c625cSLinus Torvalds 		err =
673bd4c625cSLinus Torvalds 		    journal_end(th, inode->i_sb,
674bd4c625cSLinus Torvalds 				JOURNAL_PER_BALANCE_CNT * 3 + 1 +
675bd4c625cSLinus Torvalds 				2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
6761da177e4SLinus Torvalds 		if (err)
6771da177e4SLinus Torvalds 			res = err;
6781da177e4SLinus Torvalds 	}
6791da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
6801da177e4SLinus Torvalds 	kfree(allocated_blocks);
6811da177e4SLinus Torvalds 
6821da177e4SLinus Torvalds 	return res;
6831da177e4SLinus Torvalds }
6841da177e4SLinus Torvalds 
6851da177e4SLinus Torvalds /* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
6861da177e4SLinus Torvalds static void reiserfs_unprepare_pages(struct page **prepared_pages,	/* list of locked pages */
687bd4c625cSLinus Torvalds 				     size_t num_pages /* amount of pages */ )
688bd4c625cSLinus Torvalds {
6891da177e4SLinus Torvalds 	int i;			// loop counter
6901da177e4SLinus Torvalds 
6911da177e4SLinus Torvalds 	for (i = 0; i < num_pages; i++) {
6921da177e4SLinus Torvalds 		struct page *page = prepared_pages[i];
6931da177e4SLinus Torvalds 
6941da177e4SLinus Torvalds 		try_to_free_buffers(page);
6951da177e4SLinus Torvalds 		unlock_page(page);
6961da177e4SLinus Torvalds 		page_cache_release(page);
6971da177e4SLinus Torvalds 	}
6981da177e4SLinus Torvalds }
6991da177e4SLinus Torvalds 
7001da177e4SLinus Torvalds /* This function will copy data from userspace to specified pages within
7011da177e4SLinus Torvalds    supplied byte range */
702bd4c625cSLinus Torvalds static int reiserfs_copy_from_user_to_file_region(loff_t pos,	/* In-file position */
7031da177e4SLinus Torvalds 						  int num_pages,	/* Number of pages affected */
7041da177e4SLinus Torvalds 						  int write_bytes,	/* Amount of bytes to write */
7051da177e4SLinus Torvalds 						  struct page **prepared_pages,	/* pointer to
7061da177e4SLinus Torvalds 										   array to
7071da177e4SLinus Torvalds 										   prepared pages
7081da177e4SLinus Torvalds 										 */
7091da177e4SLinus Torvalds 						  const char __user * buf	/* Pointer to user-supplied
7101da177e4SLinus Torvalds 										   data */
7111da177e4SLinus Torvalds     )
7121da177e4SLinus Torvalds {
7131da177e4SLinus Torvalds 	long page_fault = 0;	// status of copy_from_user.
7141da177e4SLinus Torvalds 	int i;			// loop counter.
7151da177e4SLinus Torvalds 	int offset;		// offset in page
7161da177e4SLinus Torvalds 
717bd4c625cSLinus Torvalds 	for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
718bd4c625cSLinus Torvalds 	     i++, offset = 0) {
7191da177e4SLinus Torvalds 		size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes);	// How much of bytes to write to this page
7201da177e4SLinus Torvalds 		struct page *page = prepared_pages[i];	// Current page we process.
7211da177e4SLinus Torvalds 
7221da177e4SLinus Torvalds 		fault_in_pages_readable(buf, count);
7231da177e4SLinus Torvalds 
7241da177e4SLinus Torvalds 		/* Copy data from userspace to the current page */
7251da177e4SLinus Torvalds 		kmap(page);
7261da177e4SLinus Torvalds 		page_fault = __copy_from_user(page_address(page) + offset, buf, count);	// Copy the data.
7271da177e4SLinus Torvalds 		/* Flush processor's dcache for this page */
7281da177e4SLinus Torvalds 		flush_dcache_page(page);
7291da177e4SLinus Torvalds 		kunmap(page);
7301da177e4SLinus Torvalds 		buf += count;
7311da177e4SLinus Torvalds 		write_bytes -= count;
7321da177e4SLinus Torvalds 
7331da177e4SLinus Torvalds 		if (page_fault)
7341da177e4SLinus Torvalds 			break;	// Was there a fault? abort.
7351da177e4SLinus Torvalds 	}
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	return page_fault ? -EFAULT : 0;
7381da177e4SLinus Torvalds }
7391da177e4SLinus Torvalds 
7401da177e4SLinus Torvalds /* taken fs/buffer.c:__block_commit_write */
7411da177e4SLinus Torvalds int reiserfs_commit_page(struct inode *inode, struct page *page,
7421da177e4SLinus Torvalds 			 unsigned from, unsigned to)
7431da177e4SLinus Torvalds {
7441da177e4SLinus Torvalds 	unsigned block_start, block_end;
7451da177e4SLinus Torvalds 	int partial = 0;
7461da177e4SLinus Torvalds 	unsigned blocksize;
7471da177e4SLinus Torvalds 	struct buffer_head *bh, *head;
7481da177e4SLinus Torvalds 	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
7491da177e4SLinus Torvalds 	int new;
7501da177e4SLinus Torvalds 	int logit = reiserfs_file_data_log(inode);
7511da177e4SLinus Torvalds 	struct super_block *s = inode->i_sb;
7521da177e4SLinus Torvalds 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
7531da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
7541da177e4SLinus Torvalds 	int ret = 0;
7551da177e4SLinus Torvalds 
7561da177e4SLinus Torvalds 	th.t_trans_id = 0;
7571da177e4SLinus Torvalds 	blocksize = 1 << inode->i_blkbits;
7581da177e4SLinus Torvalds 
7591da177e4SLinus Torvalds 	if (logit) {
7601da177e4SLinus Torvalds 		reiserfs_write_lock(s);
7611da177e4SLinus Torvalds 		ret = journal_begin(&th, s, bh_per_page + 1);
7621da177e4SLinus Torvalds 		if (ret)
7631da177e4SLinus Torvalds 			goto drop_write_lock;
7641da177e4SLinus Torvalds 		reiserfs_update_inode_transaction(inode);
7651da177e4SLinus Torvalds 	}
7661da177e4SLinus Torvalds 	for (bh = head = page_buffers(page), block_start = 0;
7671da177e4SLinus Torvalds 	     bh != head || !block_start;
768bd4c625cSLinus Torvalds 	     block_start = block_end, bh = bh->b_this_page) {
7691da177e4SLinus Torvalds 
7701da177e4SLinus Torvalds 		new = buffer_new(bh);
7711da177e4SLinus Torvalds 		clear_buffer_new(bh);
7721da177e4SLinus Torvalds 		block_end = block_start + blocksize;
7731da177e4SLinus Torvalds 		if (block_end <= from || block_start >= to) {
7741da177e4SLinus Torvalds 			if (!buffer_uptodate(bh))
7751da177e4SLinus Torvalds 				partial = 1;
7761da177e4SLinus Torvalds 		} else {
7771da177e4SLinus Torvalds 			set_buffer_uptodate(bh);
7781da177e4SLinus Torvalds 			if (logit) {
7791da177e4SLinus Torvalds 				reiserfs_prepare_for_journal(s, bh, 1);
7801da177e4SLinus Torvalds 				journal_mark_dirty(&th, s, bh);
7811da177e4SLinus Torvalds 			} else if (!buffer_dirty(bh)) {
7821da177e4SLinus Torvalds 				mark_buffer_dirty(bh);
7831da177e4SLinus Torvalds 				/* do data=ordered on any page past the end
7841da177e4SLinus Torvalds 				 * of file and any buffer marked BH_New.
7851da177e4SLinus Torvalds 				 */
7861da177e4SLinus Torvalds 				if (reiserfs_data_ordered(inode->i_sb) &&
7871da177e4SLinus Torvalds 				    (new || page->index >= i_size_index)) {
7881da177e4SLinus Torvalds 					reiserfs_add_ordered_list(inode, bh);
7891da177e4SLinus Torvalds 				}
7901da177e4SLinus Torvalds 			}
7911da177e4SLinus Torvalds 		}
7921da177e4SLinus Torvalds 	}
7931da177e4SLinus Torvalds 	if (logit) {
7941da177e4SLinus Torvalds 		ret = journal_end(&th, s, bh_per_page + 1);
7951da177e4SLinus Torvalds 	      drop_write_lock:
7961da177e4SLinus Torvalds 		reiserfs_write_unlock(s);
7971da177e4SLinus Torvalds 	}
7981da177e4SLinus Torvalds 	/*
7991da177e4SLinus Torvalds 	 * If this is a partial write which happened to make all buffers
8001da177e4SLinus Torvalds 	 * uptodate then we can optimize away a bogus readpage() for
8011da177e4SLinus Torvalds 	 * the next read(). Here we 'discover' whether the page went
8021da177e4SLinus Torvalds 	 * uptodate as a result of this (potentially partial) write.
8031da177e4SLinus Torvalds 	 */
8041da177e4SLinus Torvalds 	if (!partial)
8051da177e4SLinus Torvalds 		SetPageUptodate(page);
8061da177e4SLinus Torvalds 	return ret;
8071da177e4SLinus Torvalds }
8081da177e4SLinus Torvalds 
8091da177e4SLinus Torvalds /* Submit pages for write. This was separated from actual file copying
8101da177e4SLinus Torvalds    because we might want to allocate block numbers in-between.
8111da177e4SLinus Torvalds    This function assumes that caller will adjust file size to correct value. */
812bd4c625cSLinus Torvalds static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos,	/* Writing position offset */
8131da177e4SLinus Torvalds 						 size_t num_pages,	/* Number of pages to write */
8141da177e4SLinus Torvalds 						 size_t write_bytes,	/* number of bytes to write */
8151da177e4SLinus Torvalds 						 struct page **prepared_pages	/* list of pages */
8161da177e4SLinus Torvalds     )
8171da177e4SLinus Torvalds {
8181da177e4SLinus Torvalds 	int status;		// return status of block_commit_write.
8191da177e4SLinus Torvalds 	int retval = 0;		// Return value we are going to return.
8201da177e4SLinus Torvalds 	int i;			// loop counter
8211da177e4SLinus Torvalds 	int offset;		// Writing offset in page.
8221da177e4SLinus Torvalds 	int orig_write_bytes = write_bytes;
8231da177e4SLinus Torvalds 	int sd_update = 0;
8241da177e4SLinus Torvalds 
825bd4c625cSLinus Torvalds 	for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
826bd4c625cSLinus Torvalds 	     i++, offset = 0) {
8271da177e4SLinus Torvalds 		int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes);	// How much of bytes to write to this page
8281da177e4SLinus Torvalds 		struct page *page = prepared_pages[i];	// Current page we process.
8291da177e4SLinus Torvalds 
830bd4c625cSLinus Torvalds 		status =
831bd4c625cSLinus Torvalds 		    reiserfs_commit_page(inode, page, offset, offset + count);
8321da177e4SLinus Torvalds 		if (status)
8331da177e4SLinus Torvalds 			retval = status;	// To not overcomplicate matters We are going to
8341da177e4SLinus Torvalds 		// submit all the pages even if there was error.
8351da177e4SLinus Torvalds 		// we only remember error status to report it on
8361da177e4SLinus Torvalds 		// exit.
8371da177e4SLinus Torvalds 		write_bytes -= count;
8381da177e4SLinus Torvalds 	}
8391da177e4SLinus Torvalds 	/* now that we've gotten all the ordered buffers marked dirty,
8401da177e4SLinus Torvalds 	 * we can safely update i_size and close any running transaction
8411da177e4SLinus Torvalds 	 */
8421da177e4SLinus Torvalds 	if (pos + orig_write_bytes > inode->i_size) {
8431da177e4SLinus Torvalds 		inode->i_size = pos + orig_write_bytes;	// Set new size
8441da177e4SLinus Torvalds 		/* If the file have grown so much that tail packing is no
8451da177e4SLinus Torvalds 		 * longer possible, reset "need to pack" flag */
8461da177e4SLinus Torvalds 		if ((have_large_tails(inode->i_sb) &&
8471da177e4SLinus Torvalds 		     inode->i_size > i_block_size(inode) * 4) ||
8481da177e4SLinus Torvalds 		    (have_small_tails(inode->i_sb) &&
8491da177e4SLinus Torvalds 		     inode->i_size > i_block_size(inode)))
8501da177e4SLinus Torvalds 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
8511da177e4SLinus Torvalds 		else if ((have_large_tails(inode->i_sb) &&
8521da177e4SLinus Torvalds 			  inode->i_size < i_block_size(inode) * 4) ||
8531da177e4SLinus Torvalds 			 (have_small_tails(inode->i_sb) &&
8541da177e4SLinus Torvalds 			  inode->i_size < i_block_size(inode)))
8551da177e4SLinus Torvalds 			REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
8561da177e4SLinus Torvalds 
8571da177e4SLinus Torvalds 		if (th->t_trans_id) {
8581da177e4SLinus Torvalds 			reiserfs_write_lock(inode->i_sb);
8599f03783cSChris Mason 			// this sets the proper flags for O_SYNC to trigger a commit
8609f03783cSChris Mason 			mark_inode_dirty(inode);
8611da177e4SLinus Torvalds 			reiserfs_write_unlock(inode->i_sb);
86273ce5934SHisashi Hifumi 		} else {
86373ce5934SHisashi Hifumi 			reiserfs_write_lock(inode->i_sb);
86473ce5934SHisashi Hifumi 			reiserfs_update_inode_transaction(inode);
8659f03783cSChris Mason 			mark_inode_dirty(inode);
86673ce5934SHisashi Hifumi 			reiserfs_write_unlock(inode->i_sb);
86773ce5934SHisashi Hifumi 		}
8681da177e4SLinus Torvalds 
8691da177e4SLinus Torvalds 		sd_update = 1;
8701da177e4SLinus Torvalds 	}
8711da177e4SLinus Torvalds 	if (th->t_trans_id) {
8721da177e4SLinus Torvalds 		reiserfs_write_lock(inode->i_sb);
8731da177e4SLinus Torvalds 		if (!sd_update)
8749f03783cSChris Mason 			mark_inode_dirty(inode);
8751da177e4SLinus Torvalds 		status = journal_end(th, th->t_super, th->t_blocks_allocated);
8761da177e4SLinus Torvalds 		if (status)
8771da177e4SLinus Torvalds 			retval = status;
8781da177e4SLinus Torvalds 		reiserfs_write_unlock(inode->i_sb);
8791da177e4SLinus Torvalds 	}
8801da177e4SLinus Torvalds 	th->t_trans_id = 0;
8811da177e4SLinus Torvalds 
8821da177e4SLinus Torvalds 	/*
8831da177e4SLinus Torvalds 	 * we have to unlock the pages after updating i_size, otherwise
8841da177e4SLinus Torvalds 	 * we race with writepage
8851da177e4SLinus Torvalds 	 */
8861da177e4SLinus Torvalds 	for (i = 0; i < num_pages; i++) {
8871da177e4SLinus Torvalds 		struct page *page = prepared_pages[i];
8881da177e4SLinus Torvalds 		unlock_page(page);
8891da177e4SLinus Torvalds 		mark_page_accessed(page);
8901da177e4SLinus Torvalds 		page_cache_release(page);
8911da177e4SLinus Torvalds 	}
8921da177e4SLinus Torvalds 	return retval;
8931da177e4SLinus Torvalds }
8941da177e4SLinus Torvalds 
8951da177e4SLinus Torvalds /* Look if passed writing region is going to touch file's tail
8961da177e4SLinus Torvalds    (if it is present). And if it is, convert the tail to unformatted node */
8971da177e4SLinus Torvalds static int reiserfs_check_for_tail_and_convert(struct inode *inode,	/* inode to deal with */
8981da177e4SLinus Torvalds 					       loff_t pos,	/* Writing position */
8991da177e4SLinus Torvalds 					       int write_bytes	/* amount of bytes to write */
9001da177e4SLinus Torvalds     )
9011da177e4SLinus Torvalds {
9021da177e4SLinus Torvalds 	INITIALIZE_PATH(path);	// needed for search_for_position
9031da177e4SLinus Torvalds 	struct cpu_key key;	// Key that would represent last touched writing byte.
9041da177e4SLinus Torvalds 	struct item_head *ih;	// item header of found block;
9051da177e4SLinus Torvalds 	int res;		// Return value of various functions we call.
9061da177e4SLinus Torvalds 	int cont_expand_offset;	// We will put offset for generic_cont_expand here
9071da177e4SLinus Torvalds 	// This can be int just because tails are created
9081da177e4SLinus Torvalds 	// only for small files.
9091da177e4SLinus Torvalds 
9101da177e4SLinus Torvalds /* this embodies a dependency on a particular tail policy */
9111da177e4SLinus Torvalds 	if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
9121da177e4SLinus Torvalds 		/* such a big files do not have tails, so we won't bother ourselves
9131da177e4SLinus Torvalds 		   to look for tails, simply return */
9141da177e4SLinus Torvalds 		return 0;
9151da177e4SLinus Torvalds 	}
9161da177e4SLinus Torvalds 
9171da177e4SLinus Torvalds 	reiserfs_write_lock(inode->i_sb);
9181da177e4SLinus Torvalds 	/* find the item containing the last byte to be written, or if
9191da177e4SLinus Torvalds 	 * writing past the end of the file then the last item of the
9201da177e4SLinus Torvalds 	 * file (and then we check its type). */
921bd4c625cSLinus Torvalds 	make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
922bd4c625cSLinus Torvalds 		     3 /*key length */ );
9231da177e4SLinus Torvalds 	res = search_for_position_by_key(inode->i_sb, &key, &path);
9241da177e4SLinus Torvalds 	if (res == IO_ERROR) {
9251da177e4SLinus Torvalds 		reiserfs_write_unlock(inode->i_sb);
9261da177e4SLinus Torvalds 		return -EIO;
9271da177e4SLinus Torvalds 	}
9281da177e4SLinus Torvalds 	ih = get_ih(&path);
9291da177e4SLinus Torvalds 	res = 0;
9301da177e4SLinus Torvalds 	if (is_direct_le_ih(ih)) {
9311da177e4SLinus Torvalds 		/* Ok, closest item is file tail (tails are stored in "direct"
9321da177e4SLinus Torvalds 		 * items), so we need to unpack it. */
9331da177e4SLinus Torvalds 		/* To not overcomplicate matters, we just call generic_cont_expand
9341da177e4SLinus Torvalds 		   which will in turn call other stuff and finally will boil down to
9351da177e4SLinus Torvalds 		   reiserfs_get_block() that would do necessary conversion. */
936bd4c625cSLinus Torvalds 		cont_expand_offset =
937bd4c625cSLinus Torvalds 		    le_key_k_offset(get_inode_item_key_version(inode),
938bd4c625cSLinus Torvalds 				    &(ih->ih_key));
9391da177e4SLinus Torvalds 		pathrelse(&path);
9401da177e4SLinus Torvalds 		res = generic_cont_expand(inode, cont_expand_offset);
9411da177e4SLinus Torvalds 	} else
9421da177e4SLinus Torvalds 		pathrelse(&path);
9431da177e4SLinus Torvalds 
9441da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
9451da177e4SLinus Torvalds 	return res;
9461da177e4SLinus Torvalds }
9471da177e4SLinus Torvalds 
9481da177e4SLinus Torvalds /* This function locks pages starting from @pos for @inode.
9491da177e4SLinus Torvalds    @num_pages pages are locked and stored in
9501da177e4SLinus Torvalds    @prepared_pages array. Also buffers are allocated for these pages.
9511da177e4SLinus Torvalds    First and last page of the region is read if it is overwritten only
9521da177e4SLinus Torvalds    partially. If last page did not exist before write (file hole or file
9531da177e4SLinus Torvalds    append), it is zeroed, then.
9541da177e4SLinus Torvalds    Returns number of unallocated blocks that should be allocated to cover
9551da177e4SLinus Torvalds    new file data.*/
956bd4c625cSLinus Torvalds static int reiserfs_prepare_file_region_for_write(struct inode *inode
957bd4c625cSLinus Torvalds 						  /* Inode of the file */ ,
9581da177e4SLinus Torvalds 						  loff_t pos,	/* position in the file */
9591da177e4SLinus Torvalds 						  size_t num_pages,	/* number of pages to
9601da177e4SLinus Torvalds 									   prepare */
9611da177e4SLinus Torvalds 						  size_t write_bytes,	/* Amount of bytes to be
9621da177e4SLinus Torvalds 									   overwritten from
9631da177e4SLinus Torvalds 									   @pos */
9641da177e4SLinus Torvalds 						  struct page **prepared_pages	/* pointer to array
9651da177e4SLinus Torvalds 										   where to store
9661da177e4SLinus Torvalds 										   prepared pages */
9671da177e4SLinus Torvalds     )
9681da177e4SLinus Torvalds {
9691da177e4SLinus Torvalds 	int res = 0;		// Return values of different functions we call.
9701da177e4SLinus Torvalds 	unsigned long index = pos >> PAGE_CACHE_SHIFT;	// Offset in file in pages.
9711da177e4SLinus Torvalds 	int from = (pos & (PAGE_CACHE_SIZE - 1));	// Writing offset in first page
9721da177e4SLinus Torvalds 	int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
9731da177e4SLinus Torvalds 	/* offset of last modified byte in last
9741da177e4SLinus Torvalds 	   page */
9751da177e4SLinus Torvalds 	struct address_space *mapping = inode->i_mapping;	// Pages are mapped here.
9761da177e4SLinus Torvalds 	int i;			// Simple counter
9771da177e4SLinus Torvalds 	int blocks = 0;		/* Return value (blocks that should be allocated) */
9781da177e4SLinus Torvalds 	struct buffer_head *bh, *head;	// Current bufferhead and first bufferhead
9791da177e4SLinus Torvalds 	// of a page.
9801da177e4SLinus Torvalds 	unsigned block_start, block_end;	// Starting and ending offsets of current
9811da177e4SLinus Torvalds 	// buffer in the page.
9821da177e4SLinus Torvalds 	struct buffer_head *wait[2], **wait_bh = wait;	// Buffers for page, if
9831da177e4SLinus Torvalds 	// Page appeared to be not up
9841da177e4SLinus Torvalds 	// to date. Note how we have
9851da177e4SLinus Torvalds 	// at most 2 buffers, this is
9861da177e4SLinus Torvalds 	// because we at most may
9871da177e4SLinus Torvalds 	// partially overwrite two
9881da177e4SLinus Torvalds 	// buffers for one page. One at                                                 // the beginning of write area
9891da177e4SLinus Torvalds 	// and one at the end.
9901da177e4SLinus Torvalds 	// Everything inthe middle gets                                                 // overwritten totally.
9911da177e4SLinus Torvalds 
9921da177e4SLinus Torvalds 	struct cpu_key key;	// cpu key of item that we are going to deal with
9931da177e4SLinus Torvalds 	struct item_head *ih = NULL;	// pointer to item head that we are going to deal with
9941da177e4SLinus Torvalds 	struct buffer_head *itembuf = NULL;	// Buffer head that contains items that we are going to deal with
9951da177e4SLinus Torvalds 	INITIALIZE_PATH(path);	// path to item, that we are going to deal with.
9963e8962beSAl Viro 	__le32 *item = NULL;	// pointer to item we are going to deal with
9971da177e4SLinus Torvalds 	int item_pos = -1;	/* Position in indirect item */
9981da177e4SLinus Torvalds 
9991da177e4SLinus Torvalds 	if (num_pages < 1) {
10001da177e4SLinus Torvalds 		reiserfs_warning(inode->i_sb,
10011da177e4SLinus Torvalds 				 "green-9001: reiserfs_prepare_file_region_for_write "
10021da177e4SLinus Torvalds 				 "called with zero number of pages to process");
10031da177e4SLinus Torvalds 		return -EFAULT;
10041da177e4SLinus Torvalds 	}
10051da177e4SLinus Torvalds 
10061da177e4SLinus Torvalds 	/* We have 2 loops for pages. In first loop we grab and lock the pages, so
10071da177e4SLinus Torvalds 	   that nobody would touch these until we release the pages. Then
10081da177e4SLinus Torvalds 	   we'd start to deal with mapping buffers to blocks. */
10091da177e4SLinus Torvalds 	for (i = 0; i < num_pages; i++) {
10101da177e4SLinus Torvalds 		prepared_pages[i] = grab_cache_page(mapping, index + i);	// locks the page
10111da177e4SLinus Torvalds 		if (!prepared_pages[i]) {
10121da177e4SLinus Torvalds 			res = -ENOMEM;
10131da177e4SLinus Torvalds 			goto failed_page_grabbing;
10141da177e4SLinus Torvalds 		}
10151da177e4SLinus Torvalds 		if (!page_has_buffers(prepared_pages[i]))
1016bd4c625cSLinus Torvalds 			create_empty_buffers(prepared_pages[i],
1017bd4c625cSLinus Torvalds 					     inode->i_sb->s_blocksize, 0);
10181da177e4SLinus Torvalds 	}
10191da177e4SLinus Torvalds 
10201da177e4SLinus Torvalds 	/* Let's count amount of blocks for a case where all the blocks
10211da177e4SLinus Torvalds 	   overwritten are new (we will substract already allocated blocks later) */
10221da177e4SLinus Torvalds 	if (num_pages > 2)
10231da177e4SLinus Torvalds 		/* These are full-overwritten pages so we count all the blocks in
10241da177e4SLinus Torvalds 		   these pages are counted as needed to be allocated */
1025bd4c625cSLinus Torvalds 		blocks =
1026bd4c625cSLinus Torvalds 		    (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
10271da177e4SLinus Torvalds 
10281da177e4SLinus Torvalds 	/* count blocks needed for first page (possibly partially written) */
1029bd4c625cSLinus Torvalds 	blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1));	/* roundup */
10301da177e4SLinus Torvalds 
10311da177e4SLinus Torvalds 	/* Now we account for last page. If last page == first page (we
10321da177e4SLinus Torvalds 	   overwrite only one page), we substract all the blocks past the
10331da177e4SLinus Torvalds 	   last writing position in a page out of already calculated number
10341da177e4SLinus Torvalds 	   of blocks */
10351da177e4SLinus Torvalds 	blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
10361da177e4SLinus Torvalds 	    ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
10371da177e4SLinus Torvalds 	/* Note how we do not roundup here since partial blocks still
10381da177e4SLinus Torvalds 	   should be allocated */
10391da177e4SLinus Torvalds 
10401da177e4SLinus Torvalds 	/* Now if all the write area lies past the file end, no point in
10411da177e4SLinus Torvalds 	   maping blocks, since there is none, so we just zero out remaining
10421da177e4SLinus Torvalds 	   parts of first and last pages in write area (if needed) */
10431da177e4SLinus Torvalds 	if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
10441da177e4SLinus Torvalds 		if (from != 0) {	/* First page needs to be partially zeroed */
10451da177e4SLinus Torvalds 			char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
10461da177e4SLinus Torvalds 			memset(kaddr, 0, from);
10471da177e4SLinus Torvalds 			kunmap_atomic(kaddr, KM_USER0);
1048*de21c57bSAlexey Dobriyan 			flush_dcache_page(prepared_pages[0]);
10491da177e4SLinus Torvalds 		}
10501da177e4SLinus Torvalds 		if (to != PAGE_CACHE_SIZE) {	/* Last page needs to be partially zeroed */
1051bd4c625cSLinus Torvalds 			char *kaddr =
1052bd4c625cSLinus Torvalds 			    kmap_atomic(prepared_pages[num_pages - 1],
1053bd4c625cSLinus Torvalds 					KM_USER0);
10541da177e4SLinus Torvalds 			memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
10551da177e4SLinus Torvalds 			kunmap_atomic(kaddr, KM_USER0);
1056*de21c57bSAlexey Dobriyan 			flush_dcache_page(prepared_pages[num_pages - 1]);
10571da177e4SLinus Torvalds 		}
10581da177e4SLinus Torvalds 
10591da177e4SLinus Torvalds 		/* Since all blocks are new - use already calculated value */
10601da177e4SLinus Torvalds 		return blocks;
10611da177e4SLinus Torvalds 	}
10621da177e4SLinus Torvalds 
10631da177e4SLinus Torvalds 	/* Well, since we write somewhere into the middle of a file, there is
10641da177e4SLinus Torvalds 	   possibility we are writing over some already allocated blocks, so
10651da177e4SLinus Torvalds 	   let's map these blocks and substract number of such blocks out of blocks
10661da177e4SLinus Torvalds 	   we need to allocate (calculated above) */
10671da177e4SLinus Torvalds 	/* Mask write position to start on blocksize, we do it out of the
10681da177e4SLinus Torvalds 	   loop for performance reasons */
10691da177e4SLinus Torvalds 	pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
10701da177e4SLinus Torvalds 	/* Set cpu key to the starting position in a file (on left block boundary) */
1071bd4c625cSLinus Torvalds 	make_cpu_key(&key, inode,
1072bd4c625cSLinus Torvalds 		     1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
1073bd4c625cSLinus Torvalds 		     TYPE_ANY, 3 /*key length */ );
10741da177e4SLinus Torvalds 
10751da177e4SLinus Torvalds 	reiserfs_write_lock(inode->i_sb);	// We need that for at least search_by_key()
10761da177e4SLinus Torvalds 	for (i = 0; i < num_pages; i++) {
10771da177e4SLinus Torvalds 
10781da177e4SLinus Torvalds 		head = page_buffers(prepared_pages[i]);
10791da177e4SLinus Torvalds 		/* For each buffer in the page */
10801da177e4SLinus Torvalds 		for (bh = head, block_start = 0; bh != head || !block_start;
10811da177e4SLinus Torvalds 		     block_start = block_end, bh = bh->b_this_page) {
10821da177e4SLinus Torvalds 			if (!bh)
1083bd4c625cSLinus Torvalds 				reiserfs_panic(inode->i_sb,
1084bd4c625cSLinus Torvalds 					       "green-9002: Allocated but absent buffer for a page?");
10851da177e4SLinus Torvalds 			/* Find where this buffer ends */
10861da177e4SLinus Torvalds 			block_end = block_start + inode->i_sb->s_blocksize;
10871da177e4SLinus Torvalds 			if (i == 0 && block_end <= from)
10881da177e4SLinus Torvalds 				/* if this buffer is before requested data to map, skip it */
10891da177e4SLinus Torvalds 				continue;
10901da177e4SLinus Torvalds 
10911da177e4SLinus Torvalds 			if (i == num_pages - 1 && block_start >= to) {
10921da177e4SLinus Torvalds 				/* If this buffer is after requested data to map, abort
10931da177e4SLinus Torvalds 				   processing of current page */
10941da177e4SLinus Torvalds 				break;
10951da177e4SLinus Torvalds 			}
10961da177e4SLinus Torvalds 
10971da177e4SLinus Torvalds 			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
10981da177e4SLinus Torvalds 				/* This is optimisation for a case where buffer is mapped
10991da177e4SLinus Torvalds 				   and have blocknumber assigned. In case significant amount
11001da177e4SLinus Torvalds 				   of such buffers are present, we may avoid some amount
11011da177e4SLinus Torvalds 				   of search_by_key calls.
11021da177e4SLinus Torvalds 				   Probably it would be possible to move parts of this code
11031da177e4SLinus Torvalds 				   out of BKL, but I afraid that would overcomplicate code
11041da177e4SLinus Torvalds 				   without any noticeable benefit.
11051da177e4SLinus Torvalds 				 */
11061da177e4SLinus Torvalds 				item_pos++;
11071da177e4SLinus Torvalds 				/* Update the key */
1108bd4c625cSLinus Torvalds 				set_cpu_key_k_offset(&key,
1109bd4c625cSLinus Torvalds 						     cpu_key_k_offset(&key) +
1110bd4c625cSLinus Torvalds 						     inode->i_sb->s_blocksize);
11111da177e4SLinus Torvalds 				blocks--;	// Decrease the amount of blocks that need to be
11121da177e4SLinus Torvalds 				// allocated
11131da177e4SLinus Torvalds 				continue;	// Go to the next buffer
11141da177e4SLinus Torvalds 			}
11151da177e4SLinus Torvalds 
11161da177e4SLinus Torvalds 			if (!itembuf ||	/* if first iteration */
1117bd4c625cSLinus Torvalds 			    item_pos >= ih_item_len(ih) / UNFM_P_SIZE) {	/* or if we progressed past the
11181da177e4SLinus Torvalds 										   current unformatted_item */
11191da177e4SLinus Torvalds 				/* Try to find next item */
1120bd4c625cSLinus Torvalds 				res =
1121bd4c625cSLinus Torvalds 				    search_for_position_by_key(inode->i_sb,
1122bd4c625cSLinus Torvalds 							       &key, &path);
11231da177e4SLinus Torvalds 				/* Abort if no more items */
11241da177e4SLinus Torvalds 				if (res != POSITION_FOUND) {
11251da177e4SLinus Torvalds 					/* make sure later loops don't use this item */
11261da177e4SLinus Torvalds 					itembuf = NULL;
11271da177e4SLinus Torvalds 					item = NULL;
11281da177e4SLinus Torvalds 					break;
11291da177e4SLinus Torvalds 				}
11301da177e4SLinus Torvalds 
11311da177e4SLinus Torvalds 				/* Update information about current indirect item */
11321da177e4SLinus Torvalds 				itembuf = get_last_bh(&path);
11331da177e4SLinus Torvalds 				ih = get_ih(&path);
11341da177e4SLinus Torvalds 				item = get_item(&path);
11351da177e4SLinus Torvalds 				item_pos = path.pos_in_item;
11361da177e4SLinus Torvalds 
1137bd4c625cSLinus Torvalds 				RFALSE(!is_indirect_le_ih(ih),
1138bd4c625cSLinus Torvalds 				       "green-9003: indirect item expected");
11391da177e4SLinus Torvalds 			}
11401da177e4SLinus Torvalds 
11411da177e4SLinus Torvalds 			/* See if there is some block associated with the file
11421da177e4SLinus Torvalds 			   at that position, map the buffer to this block */
11431da177e4SLinus Torvalds 			if (get_block_num(item, item_pos)) {
1144bd4c625cSLinus Torvalds 				map_bh(bh, inode->i_sb,
1145bd4c625cSLinus Torvalds 				       get_block_num(item, item_pos));
11461da177e4SLinus Torvalds 				blocks--;	// Decrease the amount of blocks that need to be
11471da177e4SLinus Torvalds 				// allocated
11481da177e4SLinus Torvalds 			}
11491da177e4SLinus Torvalds 			item_pos++;
11501da177e4SLinus Torvalds 			/* Update the key */
1151bd4c625cSLinus Torvalds 			set_cpu_key_k_offset(&key,
1152bd4c625cSLinus Torvalds 					     cpu_key_k_offset(&key) +
1153bd4c625cSLinus Torvalds 					     inode->i_sb->s_blocksize);
11541da177e4SLinus Torvalds 		}
11551da177e4SLinus Torvalds 	}
11561da177e4SLinus Torvalds 	pathrelse(&path);	// Free the path
11571da177e4SLinus Torvalds 	reiserfs_write_unlock(inode->i_sb);
11581da177e4SLinus Torvalds 
11591da177e4SLinus Torvalds 	/* Now zero out unmappend buffers for the first and last pages of
11601da177e4SLinus Torvalds 	   write area or issue read requests if page is mapped. */
11611da177e4SLinus Torvalds 	/* First page, see if it is not uptodate */
11621da177e4SLinus Torvalds 	if (!PageUptodate(prepared_pages[0])) {
11631da177e4SLinus Torvalds 		head = page_buffers(prepared_pages[0]);
11641da177e4SLinus Torvalds 
11651da177e4SLinus Torvalds 		/* For each buffer in page */
11661da177e4SLinus Torvalds 		for (bh = head, block_start = 0; bh != head || !block_start;
11671da177e4SLinus Torvalds 		     block_start = block_end, bh = bh->b_this_page) {
11681da177e4SLinus Torvalds 
11691da177e4SLinus Torvalds 			if (!bh)
1170bd4c625cSLinus Torvalds 				reiserfs_panic(inode->i_sb,
1171bd4c625cSLinus Torvalds 					       "green-9002: Allocated but absent buffer for a page?");
11721da177e4SLinus Torvalds 			/* Find where this buffer ends */
11731da177e4SLinus Torvalds 			block_end = block_start + inode->i_sb->s_blocksize;
11741da177e4SLinus Torvalds 			if (block_end <= from)
11751da177e4SLinus Torvalds 				/* if this buffer is before requested data to map, skip it */
11761da177e4SLinus Torvalds 				continue;
11771da177e4SLinus Torvalds 			if (block_start < from) {	/* Aha, our partial buffer */
11781da177e4SLinus Torvalds 				if (buffer_mapped(bh)) {	/* If it is mapped, we need to
11791da177e4SLinus Torvalds 								   issue READ request for it to
11801da177e4SLinus Torvalds 								   not loose data */
11811da177e4SLinus Torvalds 					ll_rw_block(READ, 1, &bh);
11821da177e4SLinus Torvalds 					*wait_bh++ = bh;
11831da177e4SLinus Torvalds 				} else {	/* Not mapped, zero it */
1184bd4c625cSLinus Torvalds 					char *kaddr =
1185bd4c625cSLinus Torvalds 					    kmap_atomic(prepared_pages[0],
1186bd4c625cSLinus Torvalds 							KM_USER0);
1187bd4c625cSLinus Torvalds 					memset(kaddr + block_start, 0,
1188bd4c625cSLinus Torvalds 					       from - block_start);
11891da177e4SLinus Torvalds 					kunmap_atomic(kaddr, KM_USER0);
1190*de21c57bSAlexey Dobriyan 					flush_dcache_page(prepared_pages[0]);
11911da177e4SLinus Torvalds 					set_buffer_uptodate(bh);
11921da177e4SLinus Torvalds 				}
11931da177e4SLinus Torvalds 			}
11941da177e4SLinus Torvalds 		}
11951da177e4SLinus Torvalds 	}
11961da177e4SLinus Torvalds 
11971da177e4SLinus Torvalds 	/* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
11981da177e4SLinus Torvalds 	if (!PageUptodate(prepared_pages[num_pages - 1]) ||
1199bd4c625cSLinus Torvalds 	    ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
1200bd4c625cSLinus Torvalds 	    (inode->i_size >> PAGE_CACHE_SHIFT)) {
12011da177e4SLinus Torvalds 		head = page_buffers(prepared_pages[num_pages - 1]);
12021da177e4SLinus Torvalds 
12031da177e4SLinus Torvalds 		/* for each buffer in page */
12041da177e4SLinus Torvalds 		for (bh = head, block_start = 0; bh != head || !block_start;
12051da177e4SLinus Torvalds 		     block_start = block_end, bh = bh->b_this_page) {
12061da177e4SLinus Torvalds 
12071da177e4SLinus Torvalds 			if (!bh)
1208bd4c625cSLinus Torvalds 				reiserfs_panic(inode->i_sb,
1209bd4c625cSLinus Torvalds 					       "green-9002: Allocated but absent buffer for a page?");
12101da177e4SLinus Torvalds 			/* Find where this buffer ends */
12111da177e4SLinus Torvalds 			block_end = block_start + inode->i_sb->s_blocksize;
12121da177e4SLinus Torvalds 			if (block_start >= to)
12131da177e4SLinus Torvalds 				/* if this buffer is after requested data to map, skip it */
12141da177e4SLinus Torvalds 				break;
12151da177e4SLinus Torvalds 			if (block_end > to) {	/* Aha, our partial buffer */
12161da177e4SLinus Torvalds 				if (buffer_mapped(bh)) {	/* If it is mapped, we need to
12171da177e4SLinus Torvalds 								   issue READ request for it to
12181da177e4SLinus Torvalds 								   not loose data */
12191da177e4SLinus Torvalds 					ll_rw_block(READ, 1, &bh);
12201da177e4SLinus Torvalds 					*wait_bh++ = bh;
12211da177e4SLinus Torvalds 				} else {	/* Not mapped, zero it */
1222bd4c625cSLinus Torvalds 					char *kaddr =
1223bd4c625cSLinus Torvalds 					    kmap_atomic(prepared_pages
1224bd4c625cSLinus Torvalds 							[num_pages - 1],
1225bd4c625cSLinus Torvalds 							KM_USER0);
12261da177e4SLinus Torvalds 					memset(kaddr + to, 0, block_end - to);
12271da177e4SLinus Torvalds 					kunmap_atomic(kaddr, KM_USER0);
1228*de21c57bSAlexey Dobriyan 					flush_dcache_page(prepared_pages[num_pages - 1]);
12291da177e4SLinus Torvalds 					set_buffer_uptodate(bh);
12301da177e4SLinus Torvalds 				}
12311da177e4SLinus Torvalds 			}
12321da177e4SLinus Torvalds 		}
12331da177e4SLinus Torvalds 	}
12341da177e4SLinus Torvalds 
12351da177e4SLinus Torvalds 	/* Wait for read requests we made to happen, if necessary */
12361da177e4SLinus Torvalds 	while (wait_bh > wait) {
12371da177e4SLinus Torvalds 		wait_on_buffer(*--wait_bh);
12381da177e4SLinus Torvalds 		if (!buffer_uptodate(*wait_bh)) {
12391da177e4SLinus Torvalds 			res = -EIO;
12401da177e4SLinus Torvalds 			goto failed_read;
12411da177e4SLinus Torvalds 		}
12421da177e4SLinus Torvalds 	}
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds 	return blocks;
12451da177e4SLinus Torvalds       failed_page_grabbing:
12461da177e4SLinus Torvalds 	num_pages = i;
12471da177e4SLinus Torvalds       failed_read:
12481da177e4SLinus Torvalds 	reiserfs_unprepare_pages(prepared_pages, num_pages);
12491da177e4SLinus Torvalds 	return res;
12501da177e4SLinus Torvalds }
12511da177e4SLinus Torvalds 
12521da177e4SLinus Torvalds /* Write @count bytes at position @ppos in a file indicated by @file
12531da177e4SLinus Torvalds    from the buffer @buf.
12541da177e4SLinus Torvalds 
12551da177e4SLinus Torvalds    generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
12561da177e4SLinus Torvalds    something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
12571da177e4SLinus Torvalds    written for (ext2/3).  This is for several reasons:
12581da177e4SLinus Torvalds 
12591da177e4SLinus Torvalds    * It has no understanding of any filesystem specific optimizations.
12601da177e4SLinus Torvalds 
12611da177e4SLinus Torvalds    * It enters the filesystem repeatedly for each page that is written.
12621da177e4SLinus Torvalds 
12631da177e4SLinus Torvalds    * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
12641da177e4SLinus Torvalds    * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
12651da177e4SLinus Torvalds    * to reiserfs which allows for fewer tree traversals.
12661da177e4SLinus Torvalds 
12671da177e4SLinus Torvalds    * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
12681da177e4SLinus Torvalds 
12691da177e4SLinus Torvalds    * Asking the block allocation code for blocks one at a time is slightly less efficient.
12701da177e4SLinus Torvalds 
12711da177e4SLinus Torvalds    All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
12721da177e4SLinus Torvalds    use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make
12731da177e4SLinus Torvalds    things right finally.
12741da177e4SLinus Torvalds 
12751da177e4SLinus Torvalds    Future Features: providing search_by_key with hints.
12761da177e4SLinus Torvalds 
12771da177e4SLinus Torvalds */
12781da177e4SLinus Torvalds static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going to write into */
12791da177e4SLinus Torvalds 				   const char __user * buf,	/*  pointer to user supplied data
12801da177e4SLinus Torvalds 								   (in userspace) */
12811da177e4SLinus Torvalds 				   size_t count,	/* amount of bytes to write */
12821da177e4SLinus Torvalds 				   loff_t * ppos	/* pointer to position in file that we start writing at. Should be updated to
1283bd4c625cSLinus Torvalds 							 * new current position before returning. */
1284bd4c625cSLinus Torvalds 				   )
12851da177e4SLinus Torvalds {
12861da177e4SLinus Torvalds 	size_t already_written = 0;	// Number of bytes already written to the file.
12871da177e4SLinus Torvalds 	loff_t pos;		// Current position in the file.
12881da177e4SLinus Torvalds 	ssize_t res;		// return value of various functions that we call.
12891da177e4SLinus Torvalds 	int err = 0;
12901da177e4SLinus Torvalds 	struct inode *inode = file->f_dentry->d_inode;	// Inode of the file that we are writing to.
12911da177e4SLinus Torvalds 	/* To simplify coding at this time, we store
12921da177e4SLinus Torvalds 	   locked pages in array for now */
12931da177e4SLinus Torvalds 	struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
12941da177e4SLinus Torvalds 	struct reiserfs_transaction_handle th;
12951da177e4SLinus Torvalds 	th.t_trans_id = 0;
12961da177e4SLinus Torvalds 
1297fa385befSJeff Mahoney 	/* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
1298fa385befSJeff Mahoney 	* lying around (most of the disk, in fact). Despite the filesystem
1299fa385befSJeff Mahoney 	* now being a v3.6 format, the old items still can't support large
1300fa385befSJeff Mahoney 	* file sizes. Catch this case here, as the rest of the VFS layer is
1301fa385befSJeff Mahoney 	* oblivious to the different limitations between old and new items.
1302fa385befSJeff Mahoney 	* reiserfs_setattr catches this for truncates. This chunk is lifted
1303fa385befSJeff Mahoney 	* from generic_write_checks. */
1304fa385befSJeff Mahoney 	if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
1305fa385befSJeff Mahoney 	    *ppos + count > MAX_NON_LFS) {
1306fa385befSJeff Mahoney 		if (*ppos >= MAX_NON_LFS) {
1307fa385befSJeff Mahoney 			send_sig(SIGXFSZ, current, 0);
1308fa385befSJeff Mahoney 			return -EFBIG;
1309fa385befSJeff Mahoney 		}
1310fa385befSJeff Mahoney 		if (count > MAX_NON_LFS - (unsigned long)*ppos)
1311fa385befSJeff Mahoney 			count = MAX_NON_LFS - (unsigned long)*ppos;
1312fa385befSJeff Mahoney 	}
1313fa385befSJeff Mahoney 
13141da177e4SLinus Torvalds 	if (file->f_flags & O_DIRECT) {	// Direct IO needs treatment
13151da177e4SLinus Torvalds 		ssize_t result, after_file_end = 0;
1316bd4c625cSLinus Torvalds 		if ((*ppos + count >= inode->i_size)
1317bd4c625cSLinus Torvalds 		    || (file->f_flags & O_APPEND)) {
13181da177e4SLinus Torvalds 			/* If we are appending a file, we need to put this savelink in here.
13191da177e4SLinus Torvalds 			   If we will crash while doing direct io, finish_unfinished will
13201da177e4SLinus Torvalds 			   cut the garbage from the file end. */
13211da177e4SLinus Torvalds 			reiserfs_write_lock(inode->i_sb);
1322bd4c625cSLinus Torvalds 			err =
1323bd4c625cSLinus Torvalds 			    journal_begin(&th, inode->i_sb,
1324bd4c625cSLinus Torvalds 					  JOURNAL_PER_BALANCE_CNT);
13251da177e4SLinus Torvalds 			if (err) {
13261da177e4SLinus Torvalds 				reiserfs_write_unlock(inode->i_sb);
13271da177e4SLinus Torvalds 				return err;
13281da177e4SLinus Torvalds 			}
13291da177e4SLinus Torvalds 			reiserfs_update_inode_transaction(inode);
13301da177e4SLinus Torvalds 			add_save_link(&th, inode, 1 /* Truncate */ );
13311da177e4SLinus Torvalds 			after_file_end = 1;
1332bd4c625cSLinus Torvalds 			err =
1333bd4c625cSLinus Torvalds 			    journal_end(&th, inode->i_sb,
1334bd4c625cSLinus Torvalds 					JOURNAL_PER_BALANCE_CNT);
13351da177e4SLinus Torvalds 			reiserfs_write_unlock(inode->i_sb);
13361da177e4SLinus Torvalds 			if (err)
13371da177e4SLinus Torvalds 				return err;
13381da177e4SLinus Torvalds 		}
1339027445c3SBadari Pulavarty 		result = do_sync_write(file, buf, count, ppos);
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds 		if (after_file_end) {	/* Now update i_size and remove the savelink */
13421da177e4SLinus Torvalds 			struct reiserfs_transaction_handle th;
13431da177e4SLinus Torvalds 			reiserfs_write_lock(inode->i_sb);
13441da177e4SLinus Torvalds 			err = journal_begin(&th, inode->i_sb, 1);
13451da177e4SLinus Torvalds 			if (err) {
13461da177e4SLinus Torvalds 				reiserfs_write_unlock(inode->i_sb);
13471da177e4SLinus Torvalds 				return err;
13481da177e4SLinus Torvalds 			}
13491da177e4SLinus Torvalds 			reiserfs_update_inode_transaction(inode);
13509f03783cSChris Mason 			mark_inode_dirty(inode);
13511da177e4SLinus Torvalds 			err = journal_end(&th, inode->i_sb, 1);
13521da177e4SLinus Torvalds 			if (err) {
13531da177e4SLinus Torvalds 				reiserfs_write_unlock(inode->i_sb);
13541da177e4SLinus Torvalds 				return err;
13551da177e4SLinus Torvalds 			}
13561da177e4SLinus Torvalds 			err = remove_save_link(inode, 1 /* truncate */ );
13571da177e4SLinus Torvalds 			reiserfs_write_unlock(inode->i_sb);
13581da177e4SLinus Torvalds 			if (err)
13591da177e4SLinus Torvalds 				return err;
13601da177e4SLinus Torvalds 		}
13611da177e4SLinus Torvalds 
13621da177e4SLinus Torvalds 		return result;
13631da177e4SLinus Torvalds 	}
13641da177e4SLinus Torvalds 
13651da177e4SLinus Torvalds 	if (unlikely((ssize_t) count < 0))
13661da177e4SLinus Torvalds 		return -EINVAL;
13671da177e4SLinus Torvalds 
13681da177e4SLinus Torvalds 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
13691da177e4SLinus Torvalds 		return -EFAULT;
13701da177e4SLinus Torvalds 
13711b1dcc1bSJes Sorensen 	mutex_lock(&inode->i_mutex);	// locks the entire file for just us
13721da177e4SLinus Torvalds 
13731da177e4SLinus Torvalds 	pos = *ppos;
13741da177e4SLinus Torvalds 
13751da177e4SLinus Torvalds 	/* Check if we can write to specified region of file, file
13761da177e4SLinus Torvalds 	   is not overly big and this kind of stuff. Adjust pos and
13771da177e4SLinus Torvalds 	   count, if needed */
13781da177e4SLinus Torvalds 	res = generic_write_checks(file, &pos, &count, 0);
13791da177e4SLinus Torvalds 	if (res)
13801da177e4SLinus Torvalds 		goto out;
13811da177e4SLinus Torvalds 
13821da177e4SLinus Torvalds 	if (count == 0)
13831da177e4SLinus Torvalds 		goto out;
13841da177e4SLinus Torvalds 
13851da177e4SLinus Torvalds 	res = remove_suid(file->f_dentry);
13861da177e4SLinus Torvalds 	if (res)
13871da177e4SLinus Torvalds 		goto out;
13881da177e4SLinus Torvalds 
1389870f4817SChristoph Hellwig 	file_update_time(file);
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds 	// Ok, we are done with all the checks.
13921da177e4SLinus Torvalds 
13931da177e4SLinus Torvalds 	// Now we should start real work
13941da177e4SLinus Torvalds 
13951da177e4SLinus Torvalds 	/* If we are going to write past the file's packed tail or if we are going
13961da177e4SLinus Torvalds 	   to overwrite part of the tail, we need that tail to be converted into
13971da177e4SLinus Torvalds 	   unformatted node */
13981da177e4SLinus Torvalds 	res = reiserfs_check_for_tail_and_convert(inode, pos, count);
13991da177e4SLinus Torvalds 	if (res)
14001da177e4SLinus Torvalds 		goto out;
14011da177e4SLinus Torvalds 
14021da177e4SLinus Torvalds 	while (count > 0) {
14031da177e4SLinus Torvalds 		/* This is the main loop in which we running until some error occures
14041da177e4SLinus Torvalds 		   or until we write all of the data. */
14051da177e4SLinus Torvalds 		size_t num_pages;	/* amount of pages we are going to write this iteration */
14061da177e4SLinus Torvalds 		size_t write_bytes;	/* amount of bytes to write during this iteration */
14071da177e4SLinus Torvalds 		size_t blocks_to_allocate;	/* how much blocks we need to allocate for this iteration */
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds 		/*  (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
14101da177e4SLinus Torvalds 		num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) +	/* round up partial
14111da177e4SLinus Torvalds 									   pages */
1412bd4c625cSLinus Torvalds 		    ((count +
1413bd4c625cSLinus Torvalds 		      (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
14141da177e4SLinus Torvalds 		/* convert size to amount of
14151da177e4SLinus Torvalds 		   pages */
14161da177e4SLinus Torvalds 		reiserfs_write_lock(inode->i_sb);
14171da177e4SLinus Torvalds 		if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
14181da177e4SLinus Torvalds 		    || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
14191da177e4SLinus Torvalds 			/* If we were asked to write more data than we want to or if there
14201da177e4SLinus Torvalds 			   is not that much space, then we shorten amount of data to write
14211da177e4SLinus Torvalds 			   for this iteration. */
1422bd4c625cSLinus Torvalds 			num_pages =
1423bd4c625cSLinus Torvalds 			    min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
1424bd4c625cSLinus Torvalds 				  reiserfs_can_fit_pages(inode->i_sb));
14251da177e4SLinus Torvalds 			/* Also we should not forget to set size in bytes accordingly */
14261da177e4SLinus Torvalds 			write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
14271da177e4SLinus Torvalds 			    (pos & (PAGE_CACHE_SIZE - 1));
14281da177e4SLinus Torvalds 			/* If position is not on the
14291da177e4SLinus Torvalds 			   start of the page, we need
14301da177e4SLinus Torvalds 			   to substract the offset
14311da177e4SLinus Torvalds 			   within page */
14321da177e4SLinus Torvalds 		} else
14331da177e4SLinus Torvalds 			write_bytes = count;
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 		/* reserve the blocks to be allocated later, so that later on
14361da177e4SLinus Torvalds 		   we still have the space to write the blocks to */
1437bd4c625cSLinus Torvalds 		reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1438bd4c625cSLinus Torvalds 						      num_pages <<
1439bd4c625cSLinus Torvalds 						      (PAGE_CACHE_SHIFT -
1440bd4c625cSLinus Torvalds 						       inode->i_blkbits));
14411da177e4SLinus Torvalds 		reiserfs_write_unlock(inode->i_sb);
14421da177e4SLinus Torvalds 
1443127144dfSJan Kara 		if (!num_pages) {	/* If we do not have enough space even for a single page... */
1444bd4c625cSLinus Torvalds 			if (pos >
1445bd4c625cSLinus Torvalds 			    inode->i_size + inode->i_sb->s_blocksize -
1446bd4c625cSLinus Torvalds 			    (pos & (inode->i_sb->s_blocksize - 1))) {
1447127144dfSJan Kara 				res = -ENOSPC;
1448127144dfSJan Kara 				break;	// In case we are writing past the end of the last file block, break.
1449127144dfSJan Kara 			}
14501da177e4SLinus Torvalds 			// Otherwise we are possibly overwriting the file, so
14511da177e4SLinus Torvalds 			// let's set write size to be equal or less than blocksize.
14521da177e4SLinus Torvalds 			// This way we get it correctly for file holes.
14531da177e4SLinus Torvalds 			// But overwriting files on absolutelly full volumes would not
14541da177e4SLinus Torvalds 			// be very efficient. Well, people are not supposed to fill
14551da177e4SLinus Torvalds 			// 100% of disk space anyway.
1456bd4c625cSLinus Torvalds 			write_bytes =
1457bd4c625cSLinus Torvalds 			    min_t(size_t, count,
1458bd4c625cSLinus Torvalds 				  inode->i_sb->s_blocksize -
1459bd4c625cSLinus Torvalds 				  (pos & (inode->i_sb->s_blocksize - 1)));
14601da177e4SLinus Torvalds 			num_pages = 1;
14611da177e4SLinus Torvalds 			// No blocks were claimed before, so do it now.
1462bd4c625cSLinus Torvalds 			reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
1463bd4c625cSLinus Torvalds 							      1 <<
1464bd4c625cSLinus Torvalds 							      (PAGE_CACHE_SHIFT
1465bd4c625cSLinus Torvalds 							       -
1466bd4c625cSLinus Torvalds 							       inode->
1467bd4c625cSLinus Torvalds 							       i_blkbits));
14681da177e4SLinus Torvalds 		}
14691da177e4SLinus Torvalds 
14701da177e4SLinus Torvalds 		/* Prepare for writing into the region, read in all the
14711da177e4SLinus Torvalds 		   partially overwritten pages, if needed. And lock the pages,
14721da177e4SLinus Torvalds 		   so that nobody else can access these until we are done.
14731da177e4SLinus Torvalds 		   We get number of actual blocks needed as a result. */
1474c499ec24SVladimir V. Saveliev 		res = reiserfs_prepare_file_region_for_write(inode, pos,
1475bd4c625cSLinus Torvalds 							     num_pages,
1476bd4c625cSLinus Torvalds 							     write_bytes,
1477bd4c625cSLinus Torvalds 							     prepared_pages);
1478c499ec24SVladimir V. Saveliev 		if (res < 0) {
1479bd4c625cSLinus Torvalds 			reiserfs_release_claimed_blocks(inode->i_sb,
1480bd4c625cSLinus Torvalds 							num_pages <<
1481bd4c625cSLinus Torvalds 							(PAGE_CACHE_SHIFT -
1482bd4c625cSLinus Torvalds 							 inode->i_blkbits));
14831da177e4SLinus Torvalds 			break;
14841da177e4SLinus Torvalds 		}
14851da177e4SLinus Torvalds 
1486c499ec24SVladimir V. Saveliev 		blocks_to_allocate = res;
1487c499ec24SVladimir V. Saveliev 
14881da177e4SLinus Torvalds 		/* First we correct our estimate of how many blocks we need */
1489bd4c625cSLinus Torvalds 		reiserfs_release_claimed_blocks(inode->i_sb,
1490bd4c625cSLinus Torvalds 						(num_pages <<
1491bd4c625cSLinus Torvalds 						 (PAGE_CACHE_SHIFT -
1492bd4c625cSLinus Torvalds 						  inode->i_sb->
1493bd4c625cSLinus Torvalds 						  s_blocksize_bits)) -
1494bd4c625cSLinus Torvalds 						blocks_to_allocate);
14951da177e4SLinus Torvalds 
14961da177e4SLinus Torvalds 		if (blocks_to_allocate > 0) {	/*We only allocate blocks if we need to */
14971da177e4SLinus Torvalds 			/* Fill in all the possible holes and append the file if needed */
1498bd4c625cSLinus Torvalds 			res =
1499bd4c625cSLinus Torvalds 			    reiserfs_allocate_blocks_for_region(&th, inode, pos,
1500bd4c625cSLinus Torvalds 								num_pages,
1501bd4c625cSLinus Torvalds 								write_bytes,
1502bd4c625cSLinus Torvalds 								prepared_pages,
1503bd4c625cSLinus Torvalds 								blocks_to_allocate);
15041da177e4SLinus Torvalds 		}
15051da177e4SLinus Torvalds 
15061da177e4SLinus Torvalds 		/* well, we have allocated the blocks, so it is time to free
15071da177e4SLinus Torvalds 		   the reservation we made earlier. */
1508bd4c625cSLinus Torvalds 		reiserfs_release_claimed_blocks(inode->i_sb,
1509bd4c625cSLinus Torvalds 						blocks_to_allocate);
15101da177e4SLinus Torvalds 		if (res) {
15111da177e4SLinus Torvalds 			reiserfs_unprepare_pages(prepared_pages, num_pages);
15121da177e4SLinus Torvalds 			break;
15131da177e4SLinus Torvalds 		}
15141da177e4SLinus Torvalds 
15151da177e4SLinus Torvalds /* NOTE that allocating blocks and filling blocks can be done in reverse order
15161da177e4SLinus Torvalds    and probably we would do that just to get rid of garbage in files after a
15171da177e4SLinus Torvalds    crash */
15181da177e4SLinus Torvalds 
15191da177e4SLinus Torvalds 		/* Copy data from user-supplied buffer to file's pages */
1520bd4c625cSLinus Torvalds 		res =
1521bd4c625cSLinus Torvalds 		    reiserfs_copy_from_user_to_file_region(pos, num_pages,
1522bd4c625cSLinus Torvalds 							   write_bytes,
1523bd4c625cSLinus Torvalds 							   prepared_pages, buf);
15241da177e4SLinus Torvalds 		if (res) {
15251da177e4SLinus Torvalds 			reiserfs_unprepare_pages(prepared_pages, num_pages);
15261da177e4SLinus Torvalds 			break;
15271da177e4SLinus Torvalds 		}
15281da177e4SLinus Torvalds 
15291da177e4SLinus Torvalds 		/* Send the pages to disk and unlock them. */
1530bd4c625cSLinus Torvalds 		res =
1531bd4c625cSLinus Torvalds 		    reiserfs_submit_file_region_for_write(&th, inode, pos,
1532bd4c625cSLinus Torvalds 							  num_pages,
1533bd4c625cSLinus Torvalds 							  write_bytes,
1534bd4c625cSLinus Torvalds 							  prepared_pages);
15351da177e4SLinus Torvalds 		if (res)
15361da177e4SLinus Torvalds 			break;
15371da177e4SLinus Torvalds 
15381da177e4SLinus Torvalds 		already_written += write_bytes;
15391da177e4SLinus Torvalds 		buf += write_bytes;
15401da177e4SLinus Torvalds 		*ppos = pos += write_bytes;
15411da177e4SLinus Torvalds 		count -= write_bytes;
154259308602SAlexander Zarochentsev 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
15431da177e4SLinus Torvalds 	}
15441da177e4SLinus Torvalds 
15451da177e4SLinus Torvalds 	/* this is only true on error */
15461da177e4SLinus Torvalds 	if (th.t_trans_id) {
15471da177e4SLinus Torvalds 		reiserfs_write_lock(inode->i_sb);
15481da177e4SLinus Torvalds 		err = journal_end(&th, th.t_super, th.t_blocks_allocated);
15491da177e4SLinus Torvalds 		reiserfs_write_unlock(inode->i_sb);
15501da177e4SLinus Torvalds 		if (err) {
15511da177e4SLinus Torvalds 			res = err;
15521da177e4SLinus Torvalds 			goto out;
15531da177e4SLinus Torvalds 		}
15541da177e4SLinus Torvalds 	}
15551da177e4SLinus Torvalds 
1556619d5d8aSJeff Mahoney 	if (likely(res >= 0) &&
1557619d5d8aSJeff Mahoney 	    (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
1558619d5d8aSJeff Mahoney 		res = generic_osync_inode(inode, file->f_mapping,
1559bd4c625cSLinus Torvalds 		                          OSYNC_METADATA | OSYNC_DATA);
15601da177e4SLinus Torvalds 
15611b1dcc1bSJes Sorensen 	mutex_unlock(&inode->i_mutex);
15621da177e4SLinus Torvalds 	reiserfs_async_progress_wait(inode->i_sb);
15631da177e4SLinus Torvalds 	return (already_written != 0) ? already_written : res;
15641da177e4SLinus Torvalds 
15651da177e4SLinus Torvalds       out:
15661b1dcc1bSJes Sorensen 	mutex_unlock(&inode->i_mutex);	// unlock the file on exit.
15671da177e4SLinus Torvalds 	return res;
15681da177e4SLinus Torvalds }
15691da177e4SLinus Torvalds 
15704b6f5d20SArjan van de Ven const struct file_operations reiserfs_file_operations = {
1571027445c3SBadari Pulavarty 	.read = do_sync_read,
15721da177e4SLinus Torvalds 	.write = reiserfs_file_write,
15731da177e4SLinus Torvalds 	.ioctl = reiserfs_ioctl,
157452b499c4SDavid Howells #ifdef CONFIG_COMPAT
157552b499c4SDavid Howells 	.compat_ioctl = reiserfs_compat_ioctl,
157652b499c4SDavid Howells #endif
15771da177e4SLinus Torvalds 	.mmap = generic_file_mmap,
15785a2618e6SJeff Mahoney 	.open = generic_file_open,
15791da177e4SLinus Torvalds 	.release = reiserfs_file_release,
15801da177e4SLinus Torvalds 	.fsync = reiserfs_sync_file,
15811da177e4SLinus Torvalds 	.sendfile = generic_file_sendfile,
15821da177e4SLinus Torvalds 	.aio_read = generic_file_aio_read,
15839637f28fSAlexey Dobriyan 	.aio_write = generic_file_aio_write,
15845274f052SJens Axboe 	.splice_read = generic_file_splice_read,
15855274f052SJens Axboe 	.splice_write = generic_file_splice_write,
15861da177e4SLinus Torvalds };
15871da177e4SLinus Torvalds 
15881da177e4SLinus Torvalds struct inode_operations reiserfs_file_inode_operations = {
15891da177e4SLinus Torvalds 	.truncate = reiserfs_vfs_truncate_file,
15901da177e4SLinus Torvalds 	.setattr = reiserfs_setattr,
15911da177e4SLinus Torvalds 	.setxattr = reiserfs_setxattr,
15921da177e4SLinus Torvalds 	.getxattr = reiserfs_getxattr,
15931da177e4SLinus Torvalds 	.listxattr = reiserfs_listxattr,
15941da177e4SLinus Torvalds 	.removexattr = reiserfs_removexattr,
15951da177e4SLinus Torvalds 	.permission = reiserfs_permission,
15961da177e4SLinus Torvalds };
1597