xref: /openbmc/linux/fs/reiserfs/inode.c (revision b04b4f78)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 #include <linux/time.h>
6 #include <linux/fs.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/reiserfs_acl.h>
9 #include <linux/reiserfs_xattr.h>
10 #include <linux/exportfs.h>
11 #include <linux/smp_lock.h>
12 #include <linux/pagemap.h>
13 #include <linux/highmem.h>
14 #include <asm/uaccess.h>
15 #include <asm/unaligned.h>
16 #include <linux/buffer_head.h>
17 #include <linux/mpage.h>
18 #include <linux/writeback.h>
19 #include <linux/quotaops.h>
20 #include <linux/swap.h>
21 
22 int reiserfs_commit_write(struct file *f, struct page *page,
23 			  unsigned from, unsigned to);
24 int reiserfs_prepare_write(struct file *f, struct page *page,
25 			   unsigned from, unsigned to);
26 
27 void reiserfs_delete_inode(struct inode *inode)
28 {
29 	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
30 	int jbegin_count =
31 	    JOURNAL_PER_BALANCE_CNT * 2 +
32 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 	struct reiserfs_transaction_handle th;
34 	int err;
35 
36 	truncate_inode_pages(&inode->i_data, 0);
37 
38 	reiserfs_write_lock(inode->i_sb);
39 
40 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
42 		reiserfs_delete_xattrs(inode);
43 
44 		if (journal_begin(&th, inode->i_sb, jbegin_count))
45 			goto out;
46 		reiserfs_update_inode_transaction(inode);
47 
48 		reiserfs_discard_prealloc(&th, inode);
49 
50 		err = reiserfs_delete_object(&th, inode);
51 
52 		/* Do quota update inside a transaction for journaled quotas. We must do that
53 		 * after delete_object so that quota updates go into the same transaction as
54 		 * stat data deletion */
55 		if (!err)
56 			vfs_dq_free_inode(inode);
57 
58 		if (journal_end(&th, inode->i_sb, jbegin_count))
59 			goto out;
60 
61 		/* check return value from reiserfs_delete_object after
62 		 * ending the transaction
63 		 */
64 		if (err)
65 		    goto out;
66 
67 		/* all items of file are deleted, so we can remove "save" link */
68 		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
69 								 * about an error here */
70 	} else {
71 		/* no object items are in the tree */
72 		;
73 	}
74       out:
75 	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
76 	inode->i_blocks = 0;
77 	reiserfs_write_unlock(inode->i_sb);
78 }
79 
80 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
81 			  __u32 objectid, loff_t offset, int type, int length)
82 {
83 	key->version = version;
84 
85 	key->on_disk_key.k_dir_id = dirid;
86 	key->on_disk_key.k_objectid = objectid;
87 	set_cpu_key_k_offset(key, offset);
88 	set_cpu_key_k_type(key, type);
89 	key->key_length = length;
90 }
91 
92 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
93    offset and type of key */
94 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
95 		  int type, int length)
96 {
97 	_make_cpu_key(key, get_inode_item_key_version(inode),
98 		      le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
99 		      le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
100 		      length);
101 }
102 
103 //
104 // when key is 0, do not set version and short key
105 //
106 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
107 			      int version,
108 			      loff_t offset, int type, int length,
109 			      int entry_count /*or ih_free_space */ )
110 {
111 	if (key) {
112 		ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
113 		ih->ih_key.k_objectid =
114 		    cpu_to_le32(key->on_disk_key.k_objectid);
115 	}
116 	put_ih_version(ih, version);
117 	set_le_ih_k_offset(ih, offset);
118 	set_le_ih_k_type(ih, type);
119 	put_ih_item_len(ih, length);
120 	/*    set_ih_free_space (ih, 0); */
121 	// for directory items it is entry count, for directs and stat
122 	// datas - 0xffff, for indirects - 0
123 	put_ih_entry_count(ih, entry_count);
124 }
125 
126 //
127 // FIXME: we might cache recently accessed indirect item
128 
129 // Ugh.  Not too eager for that....
130 //  I cut the code until such time as I see a convincing argument (benchmark).
131 // I don't want a bloated inode struct..., and I don't like code complexity....
132 
133 /* cutting the code is fine, since it really isn't in use yet and is easy
134 ** to add back in.  But, Vladimir has a really good idea here.  Think
135 ** about what happens for reading a file.  For each page,
136 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
137 ** an indirect item.  This indirect item has X number of pointers, where
138 ** X is a big number if we've done the block allocation right.  But,
139 ** we only use one or two of these pointers during each call to readpage,
140 ** needlessly researching again later on.
141 **
142 ** The size of the cache could be dynamic based on the size of the file.
143 **
144 ** I'd also like to see us cache the location the stat data item, since
145 ** we are needlessly researching for that frequently.
146 **
147 ** --chris
148 */
149 
150 /* If this page has a file tail in it, and
151 ** it was read in by get_block_create_0, the page data is valid,
152 ** but tail is still sitting in a direct item, and we can't write to
153 ** it.  So, look through this page, and check all the mapped buffers
154 ** to make sure they have valid block numbers.  Any that don't need
155 ** to be unmapped, so that block_prepare_write will correctly call
156 ** reiserfs_get_block to convert the tail into an unformatted node
157 */
158 static inline void fix_tail_page_for_writing(struct page *page)
159 {
160 	struct buffer_head *head, *next, *bh;
161 
162 	if (page && page_has_buffers(page)) {
163 		head = page_buffers(page);
164 		bh = head;
165 		do {
166 			next = bh->b_this_page;
167 			if (buffer_mapped(bh) && bh->b_blocknr == 0) {
168 				reiserfs_unmap_buffer(bh);
169 			}
170 			bh = next;
171 		} while (bh != head);
172 	}
173 }
174 
175 /* reiserfs_get_block does not need to allocate a block only if it has been
176    done already or non-hole position has been found in the indirect item */
177 static inline int allocation_needed(int retval, b_blocknr_t allocated,
178 				    struct item_head *ih,
179 				    __le32 * item, int pos_in_item)
180 {
181 	if (allocated)
182 		return 0;
183 	if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
184 	    get_block_num(item, pos_in_item))
185 		return 0;
186 	return 1;
187 }
188 
189 static inline int indirect_item_found(int retval, struct item_head *ih)
190 {
191 	return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
192 }
193 
194 static inline void set_block_dev_mapped(struct buffer_head *bh,
195 					b_blocknr_t block, struct inode *inode)
196 {
197 	map_bh(bh, inode->i_sb, block);
198 }
199 
200 //
201 // files which were created in the earlier version can not be longer,
202 // than 2 gb
203 //
204 static int file_capable(struct inode *inode, sector_t block)
205 {
206 	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
207 	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
208 		return 1;
209 
210 	return 0;
211 }
212 
213 static int restart_transaction(struct reiserfs_transaction_handle *th,
214 			       struct inode *inode, struct treepath *path)
215 {
216 	struct super_block *s = th->t_super;
217 	int len = th->t_blocks_allocated;
218 	int err;
219 
220 	BUG_ON(!th->t_trans_id);
221 	BUG_ON(!th->t_refcount);
222 
223 	pathrelse(path);
224 
225 	/* we cannot restart while nested */
226 	if (th->t_refcount > 1) {
227 		return 0;
228 	}
229 	reiserfs_update_sd(th, inode);
230 	err = journal_end(th, s, len);
231 	if (!err) {
232 		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
233 		if (!err)
234 			reiserfs_update_inode_transaction(inode);
235 	}
236 	return err;
237 }
238 
239 // it is called by get_block when create == 0. Returns block number
240 // for 'block'-th logical block of file. When it hits direct item it
241 // returns 0 (being called from bmap) or read direct item into piece
242 // of page (bh_result)
243 
244 // Please improve the english/clarity in the comment above, as it is
245 // hard to understand.
246 
247 static int _get_block_create_0(struct inode *inode, sector_t block,
248 			       struct buffer_head *bh_result, int args)
249 {
250 	INITIALIZE_PATH(path);
251 	struct cpu_key key;
252 	struct buffer_head *bh;
253 	struct item_head *ih, tmp_ih;
254 	int fs_gen;
255 	b_blocknr_t blocknr;
256 	char *p = NULL;
257 	int chars;
258 	int ret;
259 	int result;
260 	int done = 0;
261 	unsigned long offset;
262 
263 	// prepare the key to look for the 'block'-th block of file
264 	make_cpu_key(&key, inode,
265 		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
266 		     3);
267 
268       research:
269 	result = search_for_position_by_key(inode->i_sb, &key, &path);
270 	if (result != POSITION_FOUND) {
271 		pathrelse(&path);
272 		if (p)
273 			kunmap(bh_result->b_page);
274 		if (result == IO_ERROR)
275 			return -EIO;
276 		// We do not return -ENOENT if there is a hole but page is uptodate, because it means
277 		// That there is some MMAPED data associated with it that is yet to be written to disk.
278 		if ((args & GET_BLOCK_NO_HOLE)
279 		    && !PageUptodate(bh_result->b_page)) {
280 			return -ENOENT;
281 		}
282 		return 0;
283 	}
284 	//
285 	bh = get_last_bh(&path);
286 	ih = get_ih(&path);
287 	if (is_indirect_le_ih(ih)) {
288 		__le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
289 
290 		/* FIXME: here we could cache indirect item or part of it in
291 		   the inode to avoid search_by_key in case of subsequent
292 		   access to file */
293 		blocknr = get_block_num(ind_item, path.pos_in_item);
294 		ret = 0;
295 		if (blocknr) {
296 			map_bh(bh_result, inode->i_sb, blocknr);
297 			if (path.pos_in_item ==
298 			    ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
299 				set_buffer_boundary(bh_result);
300 			}
301 		} else
302 			// We do not return -ENOENT if there is a hole but page is uptodate, because it means
303 			// That there is some MMAPED data associated with it that is yet to  be written to disk.
304 		if ((args & GET_BLOCK_NO_HOLE)
305 			    && !PageUptodate(bh_result->b_page)) {
306 			ret = -ENOENT;
307 		}
308 
309 		pathrelse(&path);
310 		if (p)
311 			kunmap(bh_result->b_page);
312 		return ret;
313 	}
314 	// requested data are in direct item(s)
315 	if (!(args & GET_BLOCK_READ_DIRECT)) {
316 		// we are called by bmap. FIXME: we can not map block of file
317 		// when it is stored in direct item(s)
318 		pathrelse(&path);
319 		if (p)
320 			kunmap(bh_result->b_page);
321 		return -ENOENT;
322 	}
323 
324 	/* if we've got a direct item, and the buffer or page was uptodate,
325 	 ** we don't want to pull data off disk again.  skip to the
326 	 ** end, where we map the buffer and return
327 	 */
328 	if (buffer_uptodate(bh_result)) {
329 		goto finished;
330 	} else
331 		/*
332 		 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
333 		 ** pages without any buffers.  If the page is up to date, we don't want
334 		 ** read old data off disk.  Set the up to date bit on the buffer instead
335 		 ** and jump to the end
336 		 */
337 	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
338 		set_buffer_uptodate(bh_result);
339 		goto finished;
340 	}
341 	// read file tail into part of page
342 	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
343 	fs_gen = get_generation(inode->i_sb);
344 	copy_item_head(&tmp_ih, ih);
345 
346 	/* we only want to kmap if we are reading the tail into the page.
347 	 ** this is not the common case, so we don't kmap until we are
348 	 ** sure we need to.  But, this means the item might move if
349 	 ** kmap schedules
350 	 */
351 	if (!p) {
352 		p = (char *)kmap(bh_result->b_page);
353 		if (fs_changed(fs_gen, inode->i_sb)
354 		    && item_moved(&tmp_ih, &path)) {
355 			goto research;
356 		}
357 	}
358 	p += offset;
359 	memset(p, 0, inode->i_sb->s_blocksize);
360 	do {
361 		if (!is_direct_le_ih(ih)) {
362 			BUG();
363 		}
364 		/* make sure we don't read more bytes than actually exist in
365 		 ** the file.  This can happen in odd cases where i_size isn't
366 		 ** correct, and when direct item padding results in a few
367 		 ** extra bytes at the end of the direct item
368 		 */
369 		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
370 			break;
371 		if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
372 			chars =
373 			    inode->i_size - (le_ih_k_offset(ih) - 1) -
374 			    path.pos_in_item;
375 			done = 1;
376 		} else {
377 			chars = ih_item_len(ih) - path.pos_in_item;
378 		}
379 		memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
380 
381 		if (done)
382 			break;
383 
384 		p += chars;
385 
386 		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
387 			// we done, if read direct item is not the last item of
388 			// node FIXME: we could try to check right delimiting key
389 			// to see whether direct item continues in the right
390 			// neighbor or rely on i_size
391 			break;
392 
393 		// update key to look for the next piece
394 		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
395 		result = search_for_position_by_key(inode->i_sb, &key, &path);
396 		if (result != POSITION_FOUND)
397 			// i/o error most likely
398 			break;
399 		bh = get_last_bh(&path);
400 		ih = get_ih(&path);
401 	} while (1);
402 
403 	flush_dcache_page(bh_result->b_page);
404 	kunmap(bh_result->b_page);
405 
406       finished:
407 	pathrelse(&path);
408 
409 	if (result == IO_ERROR)
410 		return -EIO;
411 
412 	/* this buffer has valid data, but isn't valid for io.  mapping it to
413 	 * block #0 tells the rest of reiserfs it just has a tail in it
414 	 */
415 	map_bh(bh_result, inode->i_sb, 0);
416 	set_buffer_uptodate(bh_result);
417 	return 0;
418 }
419 
420 // this is called to create file map. So, _get_block_create_0 will not
421 // read direct item
422 static int reiserfs_bmap(struct inode *inode, sector_t block,
423 			 struct buffer_head *bh_result, int create)
424 {
425 	if (!file_capable(inode, block))
426 		return -EFBIG;
427 
428 	reiserfs_write_lock(inode->i_sb);
429 	/* do not read the direct item */
430 	_get_block_create_0(inode, block, bh_result, 0);
431 	reiserfs_write_unlock(inode->i_sb);
432 	return 0;
433 }
434 
435 /* special version of get_block that is only used by grab_tail_page right
436 ** now.  It is sent to block_prepare_write, and when you try to get a
437 ** block past the end of the file (or a block from a hole) it returns
438 ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
439 ** be able to do i/o on the buffers returned, unless an error value
440 ** is also returned.
441 **
442 ** So, this allows block_prepare_write to be used for reading a single block
443 ** in a page.  Where it does not produce a valid page for holes, or past the
444 ** end of the file.  This turns out to be exactly what we need for reading
445 ** tails for conversion.
446 **
447 ** The point of the wrapper is forcing a certain value for create, even
448 ** though the VFS layer is calling this function with create==1.  If you
449 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
450 ** don't use this function.
451 */
452 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
453 				       struct buffer_head *bh_result,
454 				       int create)
455 {
456 	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
457 }
458 
459 /* This is special helper for reiserfs_get_block in case we are executing
460    direct_IO request. */
461 static int reiserfs_get_blocks_direct_io(struct inode *inode,
462 					 sector_t iblock,
463 					 struct buffer_head *bh_result,
464 					 int create)
465 {
466 	int ret;
467 
468 	bh_result->b_page = NULL;
469 
470 	/* We set the b_size before reiserfs_get_block call since it is
471 	   referenced in convert_tail_for_hole() that may be called from
472 	   reiserfs_get_block() */
473 	bh_result->b_size = (1 << inode->i_blkbits);
474 
475 	ret = reiserfs_get_block(inode, iblock, bh_result,
476 				 create | GET_BLOCK_NO_DANGLE);
477 	if (ret)
478 		goto out;
479 
480 	/* don't allow direct io onto tail pages */
481 	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
482 		/* make sure future calls to the direct io funcs for this offset
483 		 ** in the file fail by unmapping the buffer
484 		 */
485 		clear_buffer_mapped(bh_result);
486 		ret = -EINVAL;
487 	}
488 	/* Possible unpacked tail. Flush the data before pages have
489 	   disappeared */
490 	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
491 		int err;
492 		lock_kernel();
493 		err = reiserfs_commit_for_inode(inode);
494 		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
495 		unlock_kernel();
496 		if (err < 0)
497 			ret = err;
498 	}
499       out:
500 	return ret;
501 }
502 
503 /*
504 ** helper function for when reiserfs_get_block is called for a hole
505 ** but the file tail is still in a direct item
506 ** bh_result is the buffer head for the hole
507 ** tail_offset is the offset of the start of the tail in the file
508 **
509 ** This calls prepare_write, which will start a new transaction
510 ** you should not be in a transaction, or have any paths held when you
511 ** call this.
512 */
513 static int convert_tail_for_hole(struct inode *inode,
514 				 struct buffer_head *bh_result,
515 				 loff_t tail_offset)
516 {
517 	unsigned long index;
518 	unsigned long tail_end;
519 	unsigned long tail_start;
520 	struct page *tail_page;
521 	struct page *hole_page = bh_result->b_page;
522 	int retval = 0;
523 
524 	if ((tail_offset & (bh_result->b_size - 1)) != 1)
525 		return -EIO;
526 
527 	/* always try to read until the end of the block */
528 	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
529 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
530 
531 	index = tail_offset >> PAGE_CACHE_SHIFT;
532 	/* hole_page can be zero in case of direct_io, we are sure
533 	   that we cannot get here if we write with O_DIRECT into
534 	   tail page */
535 	if (!hole_page || index != hole_page->index) {
536 		tail_page = grab_cache_page(inode->i_mapping, index);
537 		retval = -ENOMEM;
538 		if (!tail_page) {
539 			goto out;
540 		}
541 	} else {
542 		tail_page = hole_page;
543 	}
544 
545 	/* we don't have to make sure the conversion did not happen while
546 	 ** we were locking the page because anyone that could convert
547 	 ** must first take i_mutex.
548 	 **
549 	 ** We must fix the tail page for writing because it might have buffers
550 	 ** that are mapped, but have a block number of 0.  This indicates tail
551 	 ** data that has been read directly into the page, and block_prepare_write
552 	 ** won't trigger a get_block in this case.
553 	 */
554 	fix_tail_page_for_writing(tail_page);
555 	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
556 	if (retval)
557 		goto unlock;
558 
559 	/* tail conversion might change the data in the page */
560 	flush_dcache_page(tail_page);
561 
562 	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
563 
564       unlock:
565 	if (tail_page != hole_page) {
566 		unlock_page(tail_page);
567 		page_cache_release(tail_page);
568 	}
569       out:
570 	return retval;
571 }
572 
573 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
574 				  sector_t block,
575 				  struct inode *inode,
576 				  b_blocknr_t * allocated_block_nr,
577 				  struct treepath *path, int flags)
578 {
579 	BUG_ON(!th->t_trans_id);
580 
581 #ifdef REISERFS_PREALLOCATE
582 	if (!(flags & GET_BLOCK_NO_IMUX)) {
583 		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
584 						  path, block);
585 	}
586 #endif
587 	return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
588 					 block);
589 }
590 
591 int reiserfs_get_block(struct inode *inode, sector_t block,
592 		       struct buffer_head *bh_result, int create)
593 {
594 	int repeat, retval = 0;
595 	b_blocknr_t allocated_block_nr = 0;	// b_blocknr_t is (unsigned) 32 bit int
596 	INITIALIZE_PATH(path);
597 	int pos_in_item;
598 	struct cpu_key key;
599 	struct buffer_head *bh, *unbh = NULL;
600 	struct item_head *ih, tmp_ih;
601 	__le32 *item;
602 	int done;
603 	int fs_gen;
604 	struct reiserfs_transaction_handle *th = NULL;
605 	/* space reserved in transaction batch:
606 	   . 3 balancings in direct->indirect conversion
607 	   . 1 block involved into reiserfs_update_sd()
608 	   XXX in practically impossible worst case direct2indirect()
609 	   can incur (much) more than 3 balancings.
610 	   quota update for user, group */
611 	int jbegin_count =
612 	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
613 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
614 	int version;
615 	int dangle = 1;
616 	loff_t new_offset =
617 	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
618 
619 	/* bad.... */
620 	reiserfs_write_lock(inode->i_sb);
621 	version = get_inode_item_key_version(inode);
622 
623 	if (!file_capable(inode, block)) {
624 		reiserfs_write_unlock(inode->i_sb);
625 		return -EFBIG;
626 	}
627 
628 	/* if !create, we aren't changing the FS, so we don't need to
629 	 ** log anything, so we don't need to start a transaction
630 	 */
631 	if (!(create & GET_BLOCK_CREATE)) {
632 		int ret;
633 		/* find number of block-th logical block of the file */
634 		ret = _get_block_create_0(inode, block, bh_result,
635 					  create | GET_BLOCK_READ_DIRECT);
636 		reiserfs_write_unlock(inode->i_sb);
637 		return ret;
638 	}
639 	/*
640 	 * if we're already in a transaction, make sure to close
641 	 * any new transactions we start in this func
642 	 */
643 	if ((create & GET_BLOCK_NO_DANGLE) ||
644 	    reiserfs_transaction_running(inode->i_sb))
645 		dangle = 0;
646 
647 	/* If file is of such a size, that it might have a tail and tails are enabled
648 	 ** we should mark it as possibly needing tail packing on close
649 	 */
650 	if ((have_large_tails(inode->i_sb)
651 	     && inode->i_size < i_block_size(inode) * 4)
652 	    || (have_small_tails(inode->i_sb)
653 		&& inode->i_size < i_block_size(inode)))
654 		REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
655 
656 	/* set the key of the first byte in the 'block'-th block of file */
657 	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
658 	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
659 	      start_trans:
660 		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
661 		if (!th) {
662 			retval = -ENOMEM;
663 			goto failure;
664 		}
665 		reiserfs_update_inode_transaction(inode);
666 	}
667       research:
668 
669 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
670 	if (retval == IO_ERROR) {
671 		retval = -EIO;
672 		goto failure;
673 	}
674 
675 	bh = get_last_bh(&path);
676 	ih = get_ih(&path);
677 	item = get_item(&path);
678 	pos_in_item = path.pos_in_item;
679 
680 	fs_gen = get_generation(inode->i_sb);
681 	copy_item_head(&tmp_ih, ih);
682 
683 	if (allocation_needed
684 	    (retval, allocated_block_nr, ih, item, pos_in_item)) {
685 		/* we have to allocate block for the unformatted node */
686 		if (!th) {
687 			pathrelse(&path);
688 			goto start_trans;
689 		}
690 
691 		repeat =
692 		    _allocate_block(th, block, inode, &allocated_block_nr,
693 				    &path, create);
694 
695 		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
696 			/* restart the transaction to give the journal a chance to free
697 			 ** some blocks.  releases the path, so we have to go back to
698 			 ** research if we succeed on the second try
699 			 */
700 			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
701 			retval = restart_transaction(th, inode, &path);
702 			if (retval)
703 				goto failure;
704 			repeat =
705 			    _allocate_block(th, block, inode,
706 					    &allocated_block_nr, NULL, create);
707 
708 			if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
709 				goto research;
710 			}
711 			if (repeat == QUOTA_EXCEEDED)
712 				retval = -EDQUOT;
713 			else
714 				retval = -ENOSPC;
715 			goto failure;
716 		}
717 
718 		if (fs_changed(fs_gen, inode->i_sb)
719 		    && item_moved(&tmp_ih, &path)) {
720 			goto research;
721 		}
722 	}
723 
724 	if (indirect_item_found(retval, ih)) {
725 		b_blocknr_t unfm_ptr;
726 		/* 'block'-th block is in the file already (there is
727 		   corresponding cell in some indirect item). But it may be
728 		   zero unformatted node pointer (hole) */
729 		unfm_ptr = get_block_num(item, pos_in_item);
730 		if (unfm_ptr == 0) {
731 			/* use allocated block to plug the hole */
732 			reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
733 			if (fs_changed(fs_gen, inode->i_sb)
734 			    && item_moved(&tmp_ih, &path)) {
735 				reiserfs_restore_prepared_buffer(inode->i_sb,
736 								 bh);
737 				goto research;
738 			}
739 			set_buffer_new(bh_result);
740 			if (buffer_dirty(bh_result)
741 			    && reiserfs_data_ordered(inode->i_sb))
742 				reiserfs_add_ordered_list(inode, bh_result);
743 			put_block_num(item, pos_in_item, allocated_block_nr);
744 			unfm_ptr = allocated_block_nr;
745 			journal_mark_dirty(th, inode->i_sb, bh);
746 			reiserfs_update_sd(th, inode);
747 		}
748 		set_block_dev_mapped(bh_result, unfm_ptr, inode);
749 		pathrelse(&path);
750 		retval = 0;
751 		if (!dangle && th)
752 			retval = reiserfs_end_persistent_transaction(th);
753 
754 		reiserfs_write_unlock(inode->i_sb);
755 
756 		/* the item was found, so new blocks were not added to the file
757 		 ** there is no need to make sure the inode is updated with this
758 		 ** transaction
759 		 */
760 		return retval;
761 	}
762 
763 	if (!th) {
764 		pathrelse(&path);
765 		goto start_trans;
766 	}
767 
768 	/* desired position is not found or is in the direct item. We have
769 	   to append file with holes up to 'block'-th block converting
770 	   direct items to indirect one if necessary */
771 	done = 0;
772 	do {
773 		if (is_statdata_le_ih(ih)) {
774 			__le32 unp = 0;
775 			struct cpu_key tmp_key;
776 
777 			/* indirect item has to be inserted */
778 			make_le_item_head(&tmp_ih, &key, version, 1,
779 					  TYPE_INDIRECT, UNFM_P_SIZE,
780 					  0 /* free_space */ );
781 
782 			if (cpu_key_k_offset(&key) == 1) {
783 				/* we are going to add 'block'-th block to the file. Use
784 				   allocated block for that */
785 				unp = cpu_to_le32(allocated_block_nr);
786 				set_block_dev_mapped(bh_result,
787 						     allocated_block_nr, inode);
788 				set_buffer_new(bh_result);
789 				done = 1;
790 			}
791 			tmp_key = key;	// ;)
792 			set_cpu_key_k_offset(&tmp_key, 1);
793 			PATH_LAST_POSITION(&path)++;
794 
795 			retval =
796 			    reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
797 						 inode, (char *)&unp);
798 			if (retval) {
799 				reiserfs_free_block(th, inode,
800 						    allocated_block_nr, 1);
801 				goto failure;	// retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
802 			}
803 			//mark_tail_converted (inode);
804 		} else if (is_direct_le_ih(ih)) {
805 			/* direct item has to be converted */
806 			loff_t tail_offset;
807 
808 			tail_offset =
809 			    ((le_ih_k_offset(ih) -
810 			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
811 			if (tail_offset == cpu_key_k_offset(&key)) {
812 				/* direct item we just found fits into block we have
813 				   to map. Convert it into unformatted node: use
814 				   bh_result for the conversion */
815 				set_block_dev_mapped(bh_result,
816 						     allocated_block_nr, inode);
817 				unbh = bh_result;
818 				done = 1;
819 			} else {
820 				/* we have to padd file tail stored in direct item(s)
821 				   up to block size and convert it to unformatted
822 				   node. FIXME: this should also get into page cache */
823 
824 				pathrelse(&path);
825 				/*
826 				 * ugly, but we can only end the transaction if
827 				 * we aren't nested
828 				 */
829 				BUG_ON(!th->t_refcount);
830 				if (th->t_refcount == 1) {
831 					retval =
832 					    reiserfs_end_persistent_transaction
833 					    (th);
834 					th = NULL;
835 					if (retval)
836 						goto failure;
837 				}
838 
839 				retval =
840 				    convert_tail_for_hole(inode, bh_result,
841 							  tail_offset);
842 				if (retval) {
843 					if (retval != -ENOSPC)
844 						reiserfs_error(inode->i_sb,
845 							"clm-6004",
846 							"convert tail failed "
847 							"inode %lu, error %d",
848 							inode->i_ino,
849 							retval);
850 					if (allocated_block_nr) {
851 						/* the bitmap, the super, and the stat data == 3 */
852 						if (!th)
853 							th = reiserfs_persistent_transaction(inode->i_sb, 3);
854 						if (th)
855 							reiserfs_free_block(th,
856 									    inode,
857 									    allocated_block_nr,
858 									    1);
859 					}
860 					goto failure;
861 				}
862 				goto research;
863 			}
864 			retval =
865 			    direct2indirect(th, inode, &path, unbh,
866 					    tail_offset);
867 			if (retval) {
868 				reiserfs_unmap_buffer(unbh);
869 				reiserfs_free_block(th, inode,
870 						    allocated_block_nr, 1);
871 				goto failure;
872 			}
873 			/* it is important the set_buffer_uptodate is done after
874 			 ** the direct2indirect.  The buffer might contain valid
875 			 ** data newer than the data on disk (read by readpage, changed,
876 			 ** and then sent here by writepage).  direct2indirect needs
877 			 ** to know if unbh was already up to date, so it can decide
878 			 ** if the data in unbh needs to be replaced with data from
879 			 ** the disk
880 			 */
881 			set_buffer_uptodate(unbh);
882 
883 			/* unbh->b_page == NULL in case of DIRECT_IO request, this means
884 			   buffer will disappear shortly, so it should not be added to
885 			 */
886 			if (unbh->b_page) {
887 				/* we've converted the tail, so we must
888 				 ** flush unbh before the transaction commits
889 				 */
890 				reiserfs_add_tail_list(inode, unbh);
891 
892 				/* mark it dirty now to prevent commit_write from adding
893 				 ** this buffer to the inode's dirty buffer list
894 				 */
895 				/*
896 				 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
897 				 * It's still atomic, but it sets the page dirty too,
898 				 * which makes it eligible for writeback at any time by the
899 				 * VM (which was also the case with __mark_buffer_dirty())
900 				 */
901 				mark_buffer_dirty(unbh);
902 			}
903 		} else {
904 			/* append indirect item with holes if needed, when appending
905 			   pointer to 'block'-th block use block, which is already
906 			   allocated */
907 			struct cpu_key tmp_key;
908 			unp_t unf_single = 0;	// We use this in case we need to allocate only
909 			// one block which is a fastpath
910 			unp_t *un;
911 			__u64 max_to_insert =
912 			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
913 			    UNFM_P_SIZE;
914 			__u64 blocks_needed;
915 
916 			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
917 			       "vs-804: invalid position for append");
918 			/* indirect item has to be appended, set up key of that position */
919 			make_cpu_key(&tmp_key, inode,
920 				     le_key_k_offset(version,
921 						     &(ih->ih_key)) +
922 				     op_bytes_number(ih,
923 						     inode->i_sb->s_blocksize),
924 				     //pos_in_item * inode->i_sb->s_blocksize,
925 				     TYPE_INDIRECT, 3);	// key type is unimportant
926 
927 			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
928 			       "green-805: invalid offset");
929 			blocks_needed =
930 			    1 +
931 			    ((cpu_key_k_offset(&key) -
932 			      cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
933 			     s_blocksize_bits);
934 
935 			if (blocks_needed == 1) {
936 				un = &unf_single;
937 			} else {
938 				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_ATOMIC);	// We need to avoid scheduling.
939 				if (!un) {
940 					un = &unf_single;
941 					blocks_needed = 1;
942 					max_to_insert = 0;
943 				}
944 			}
945 			if (blocks_needed <= max_to_insert) {
946 				/* we are going to add target block to the file. Use allocated
947 				   block for that */
948 				un[blocks_needed - 1] =
949 				    cpu_to_le32(allocated_block_nr);
950 				set_block_dev_mapped(bh_result,
951 						     allocated_block_nr, inode);
952 				set_buffer_new(bh_result);
953 				done = 1;
954 			} else {
955 				/* paste hole to the indirect item */
956 				/* If kmalloc failed, max_to_insert becomes zero and it means we
957 				   only have space for one block */
958 				blocks_needed =
959 				    max_to_insert ? max_to_insert : 1;
960 			}
961 			retval =
962 			    reiserfs_paste_into_item(th, &path, &tmp_key, inode,
963 						     (char *)un,
964 						     UNFM_P_SIZE *
965 						     blocks_needed);
966 
967 			if (blocks_needed != 1)
968 				kfree(un);
969 
970 			if (retval) {
971 				reiserfs_free_block(th, inode,
972 						    allocated_block_nr, 1);
973 				goto failure;
974 			}
975 			if (!done) {
976 				/* We need to mark new file size in case this function will be
977 				   interrupted/aborted later on. And we may do this only for
978 				   holes. */
979 				inode->i_size +=
980 				    inode->i_sb->s_blocksize * blocks_needed;
981 			}
982 		}
983 
984 		if (done == 1)
985 			break;
986 
987 		/* this loop could log more blocks than we had originally asked
988 		 ** for.  So, we have to allow the transaction to end if it is
989 		 ** too big or too full.  Update the inode so things are
990 		 ** consistent if we crash before the function returns
991 		 **
992 		 ** release the path so that anybody waiting on the path before
993 		 ** ending their transaction will be able to continue.
994 		 */
995 		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
996 			retval = restart_transaction(th, inode, &path);
997 			if (retval)
998 				goto failure;
999 		}
1000 		/* inserting indirect pointers for a hole can take a
1001 		 ** long time.  reschedule if needed
1002 		 */
1003 		cond_resched();
1004 
1005 		retval = search_for_position_by_key(inode->i_sb, &key, &path);
1006 		if (retval == IO_ERROR) {
1007 			retval = -EIO;
1008 			goto failure;
1009 		}
1010 		if (retval == POSITION_FOUND) {
1011 			reiserfs_warning(inode->i_sb, "vs-825",
1012 					 "%K should not be found", &key);
1013 			retval = -EEXIST;
1014 			if (allocated_block_nr)
1015 				reiserfs_free_block(th, inode,
1016 						    allocated_block_nr, 1);
1017 			pathrelse(&path);
1018 			goto failure;
1019 		}
1020 		bh = get_last_bh(&path);
1021 		ih = get_ih(&path);
1022 		item = get_item(&path);
1023 		pos_in_item = path.pos_in_item;
1024 	} while (1);
1025 
1026 	retval = 0;
1027 
1028       failure:
1029 	if (th && (!dangle || (retval && !th->t_trans_id))) {
1030 		int err;
1031 		if (th->t_trans_id)
1032 			reiserfs_update_sd(th, inode);
1033 		err = reiserfs_end_persistent_transaction(th);
1034 		if (err)
1035 			retval = err;
1036 	}
1037 
1038 	reiserfs_write_unlock(inode->i_sb);
1039 	reiserfs_check_path(&path);
1040 	return retval;
1041 }
1042 
1043 static int
1044 reiserfs_readpages(struct file *file, struct address_space *mapping,
1045 		   struct list_head *pages, unsigned nr_pages)
1046 {
1047 	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1048 }
1049 
1050 /* Compute real number of used bytes by file
1051  * Following three functions can go away when we'll have enough space in stat item
1052  */
1053 static int real_space_diff(struct inode *inode, int sd_size)
1054 {
1055 	int bytes;
1056 	loff_t blocksize = inode->i_sb->s_blocksize;
1057 
1058 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1059 		return sd_size;
1060 
1061 	/* End of file is also in full block with indirect reference, so round
1062 	 ** up to the next block.
1063 	 **
1064 	 ** there is just no way to know if the tail is actually packed
1065 	 ** on the file, so we have to assume it isn't.  When we pack the
1066 	 ** tail, we add 4 bytes to pretend there really is an unformatted
1067 	 ** node pointer
1068 	 */
1069 	bytes =
1070 	    ((inode->i_size +
1071 	      (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1072 	    sd_size;
1073 	return bytes;
1074 }
1075 
1076 static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1077 					int sd_size)
1078 {
1079 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1080 		return inode->i_size +
1081 		    (loff_t) (real_space_diff(inode, sd_size));
1082 	}
1083 	return ((loff_t) real_space_diff(inode, sd_size)) +
1084 	    (((loff_t) blocks) << 9);
1085 }
1086 
1087 /* Compute number of blocks used by file in ReiserFS counting */
1088 static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1089 {
1090 	loff_t bytes = inode_get_bytes(inode);
1091 	loff_t real_space = real_space_diff(inode, sd_size);
1092 
1093 	/* keeps fsck and non-quota versions of reiserfs happy */
1094 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1095 		bytes += (loff_t) 511;
1096 	}
1097 
1098 	/* files from before the quota patch might i_blocks such that
1099 	 ** bytes < real_space.  Deal with that here to prevent it from
1100 	 ** going negative.
1101 	 */
1102 	if (bytes < real_space)
1103 		return 0;
1104 	return (bytes - real_space) >> 9;
1105 }
1106 
1107 //
1108 // BAD: new directories have stat data of new type and all other items
1109 // of old type. Version stored in the inode says about body items, so
1110 // in update_stat_data we can not rely on inode, but have to check
1111 // item version directly
1112 //
1113 
1114 // called by read_locked_inode
1115 static void init_inode(struct inode *inode, struct treepath *path)
1116 {
1117 	struct buffer_head *bh;
1118 	struct item_head *ih;
1119 	__u32 rdev;
1120 	//int version = ITEM_VERSION_1;
1121 
1122 	bh = PATH_PLAST_BUFFER(path);
1123 	ih = PATH_PITEM_HEAD(path);
1124 
1125 	copy_key(INODE_PKEY(inode), &(ih->ih_key));
1126 
1127 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1128 	REISERFS_I(inode)->i_flags = 0;
1129 	REISERFS_I(inode)->i_prealloc_block = 0;
1130 	REISERFS_I(inode)->i_prealloc_count = 0;
1131 	REISERFS_I(inode)->i_trans_id = 0;
1132 	REISERFS_I(inode)->i_jl = NULL;
1133 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1134 	reiserfs_init_acl_access(inode);
1135 	reiserfs_init_acl_default(inode);
1136 	reiserfs_init_xattr_rwsem(inode);
1137 
1138 	if (stat_data_v1(ih)) {
1139 		struct stat_data_v1 *sd =
1140 		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1141 		unsigned long blocks;
1142 
1143 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1144 		set_inode_sd_version(inode, STAT_DATA_V1);
1145 		inode->i_mode = sd_v1_mode(sd);
1146 		inode->i_nlink = sd_v1_nlink(sd);
1147 		inode->i_uid = sd_v1_uid(sd);
1148 		inode->i_gid = sd_v1_gid(sd);
1149 		inode->i_size = sd_v1_size(sd);
1150 		inode->i_atime.tv_sec = sd_v1_atime(sd);
1151 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1152 		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1153 		inode->i_atime.tv_nsec = 0;
1154 		inode->i_ctime.tv_nsec = 0;
1155 		inode->i_mtime.tv_nsec = 0;
1156 
1157 		inode->i_blocks = sd_v1_blocks(sd);
1158 		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1159 		blocks = (inode->i_size + 511) >> 9;
1160 		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1161 		if (inode->i_blocks > blocks) {
1162 			// there was a bug in <=3.5.23 when i_blocks could take negative
1163 			// values. Starting from 3.5.17 this value could even be stored in
1164 			// stat data. For such files we set i_blocks based on file
1165 			// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1166 			// only updated if file's inode will ever change
1167 			inode->i_blocks = blocks;
1168 		}
1169 
1170 		rdev = sd_v1_rdev(sd);
1171 		REISERFS_I(inode)->i_first_direct_byte =
1172 		    sd_v1_first_direct_byte(sd);
1173 		/* an early bug in the quota code can give us an odd number for the
1174 		 ** block count.  This is incorrect, fix it here.
1175 		 */
1176 		if (inode->i_blocks & 1) {
1177 			inode->i_blocks++;
1178 		}
1179 		inode_set_bytes(inode,
1180 				to_real_used_space(inode, inode->i_blocks,
1181 						   SD_V1_SIZE));
1182 		/* nopack is initially zero for v1 objects. For v2 objects,
1183 		   nopack is initialised from sd_attrs */
1184 		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1185 	} else {
1186 		// new stat data found, but object may have old items
1187 		// (directories and symlinks)
1188 		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1189 
1190 		inode->i_mode = sd_v2_mode(sd);
1191 		inode->i_nlink = sd_v2_nlink(sd);
1192 		inode->i_uid = sd_v2_uid(sd);
1193 		inode->i_size = sd_v2_size(sd);
1194 		inode->i_gid = sd_v2_gid(sd);
1195 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1196 		inode->i_atime.tv_sec = sd_v2_atime(sd);
1197 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1198 		inode->i_ctime.tv_nsec = 0;
1199 		inode->i_mtime.tv_nsec = 0;
1200 		inode->i_atime.tv_nsec = 0;
1201 		inode->i_blocks = sd_v2_blocks(sd);
1202 		rdev = sd_v2_rdev(sd);
1203 		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1204 			inode->i_generation =
1205 			    le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1206 		else
1207 			inode->i_generation = sd_v2_generation(sd);
1208 
1209 		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1210 			set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1211 		else
1212 			set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1213 		REISERFS_I(inode)->i_first_direct_byte = 0;
1214 		set_inode_sd_version(inode, STAT_DATA_V2);
1215 		inode_set_bytes(inode,
1216 				to_real_used_space(inode, inode->i_blocks,
1217 						   SD_V2_SIZE));
1218 		/* read persistent inode attributes from sd and initalise
1219 		   generic inode flags from them */
1220 		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1221 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1222 	}
1223 
1224 	pathrelse(path);
1225 	if (S_ISREG(inode->i_mode)) {
1226 		inode->i_op = &reiserfs_file_inode_operations;
1227 		inode->i_fop = &reiserfs_file_operations;
1228 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1229 	} else if (S_ISDIR(inode->i_mode)) {
1230 		inode->i_op = &reiserfs_dir_inode_operations;
1231 		inode->i_fop = &reiserfs_dir_operations;
1232 	} else if (S_ISLNK(inode->i_mode)) {
1233 		inode->i_op = &reiserfs_symlink_inode_operations;
1234 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1235 	} else {
1236 		inode->i_blocks = 0;
1237 		inode->i_op = &reiserfs_special_inode_operations;
1238 		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1239 	}
1240 }
1241 
1242 // update new stat data with inode fields
1243 static void inode2sd(void *sd, struct inode *inode, loff_t size)
1244 {
1245 	struct stat_data *sd_v2 = (struct stat_data *)sd;
1246 	__u16 flags;
1247 
1248 	set_sd_v2_mode(sd_v2, inode->i_mode);
1249 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
1250 	set_sd_v2_uid(sd_v2, inode->i_uid);
1251 	set_sd_v2_size(sd_v2, size);
1252 	set_sd_v2_gid(sd_v2, inode->i_gid);
1253 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1254 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1255 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1256 	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1257 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1258 		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1259 	else
1260 		set_sd_v2_generation(sd_v2, inode->i_generation);
1261 	flags = REISERFS_I(inode)->i_attrs;
1262 	i_attrs_to_sd_attrs(inode, &flags);
1263 	set_sd_v2_attrs(sd_v2, flags);
1264 }
1265 
1266 // used to copy inode's fields to old stat data
1267 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1268 {
1269 	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1270 
1271 	set_sd_v1_mode(sd_v1, inode->i_mode);
1272 	set_sd_v1_uid(sd_v1, inode->i_uid);
1273 	set_sd_v1_gid(sd_v1, inode->i_gid);
1274 	set_sd_v1_nlink(sd_v1, inode->i_nlink);
1275 	set_sd_v1_size(sd_v1, size);
1276 	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1277 	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1278 	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1279 
1280 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1281 		set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1282 	else
1283 		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1284 
1285 	// Sigh. i_first_direct_byte is back
1286 	set_sd_v1_first_direct_byte(sd_v1,
1287 				    REISERFS_I(inode)->i_first_direct_byte);
1288 }
1289 
1290 /* NOTE, you must prepare the buffer head before sending it here,
1291 ** and then log it after the call
1292 */
1293 static void update_stat_data(struct treepath *path, struct inode *inode,
1294 			     loff_t size)
1295 {
1296 	struct buffer_head *bh;
1297 	struct item_head *ih;
1298 
1299 	bh = PATH_PLAST_BUFFER(path);
1300 	ih = PATH_PITEM_HEAD(path);
1301 
1302 	if (!is_statdata_le_ih(ih))
1303 		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1304 			       INODE_PKEY(inode), ih);
1305 
1306 	if (stat_data_v1(ih)) {
1307 		// path points to old stat data
1308 		inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1309 	} else {
1310 		inode2sd(B_I_PITEM(bh, ih), inode, size);
1311 	}
1312 
1313 	return;
1314 }
1315 
1316 void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1317 			     struct inode *inode, loff_t size)
1318 {
1319 	struct cpu_key key;
1320 	INITIALIZE_PATH(path);
1321 	struct buffer_head *bh;
1322 	int fs_gen;
1323 	struct item_head *ih, tmp_ih;
1324 	int retval;
1325 
1326 	BUG_ON(!th->t_trans_id);
1327 
1328 	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);	//key type is unimportant
1329 
1330 	for (;;) {
1331 		int pos;
1332 		/* look for the object's stat data */
1333 		retval = search_item(inode->i_sb, &key, &path);
1334 		if (retval == IO_ERROR) {
1335 			reiserfs_error(inode->i_sb, "vs-13050",
1336 				       "i/o failure occurred trying to "
1337 				       "update %K stat data", &key);
1338 			return;
1339 		}
1340 		if (retval == ITEM_NOT_FOUND) {
1341 			pos = PATH_LAST_POSITION(&path);
1342 			pathrelse(&path);
1343 			if (inode->i_nlink == 0) {
1344 				/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1345 				return;
1346 			}
1347 			reiserfs_warning(inode->i_sb, "vs-13060",
1348 					 "stat data of object %k (nlink == %d) "
1349 					 "not found (pos %d)",
1350 					 INODE_PKEY(inode), inode->i_nlink,
1351 					 pos);
1352 			reiserfs_check_path(&path);
1353 			return;
1354 		}
1355 
1356 		/* sigh, prepare_for_journal might schedule.  When it schedules the
1357 		 ** FS might change.  We have to detect that, and loop back to the
1358 		 ** search if the stat data item has moved
1359 		 */
1360 		bh = get_last_bh(&path);
1361 		ih = get_ih(&path);
1362 		copy_item_head(&tmp_ih, ih);
1363 		fs_gen = get_generation(inode->i_sb);
1364 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1365 		if (fs_changed(fs_gen, inode->i_sb)
1366 		    && item_moved(&tmp_ih, &path)) {
1367 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1368 			continue;	/* Stat_data item has been moved after scheduling. */
1369 		}
1370 		break;
1371 	}
1372 	update_stat_data(&path, inode, size);
1373 	journal_mark_dirty(th, th->t_super, bh);
1374 	pathrelse(&path);
1375 	return;
1376 }
1377 
1378 /* reiserfs_read_locked_inode is called to read the inode off disk, and it
1379 ** does a make_bad_inode when things go wrong.  But, we need to make sure
1380 ** and clear the key in the private portion of the inode, otherwise a
1381 ** corresponding iput might try to delete whatever object the inode last
1382 ** represented.
1383 */
1384 static void reiserfs_make_bad_inode(struct inode *inode)
1385 {
1386 	memset(INODE_PKEY(inode), 0, KEY_SIZE);
1387 	make_bad_inode(inode);
1388 }
1389 
1390 //
1391 // initially this function was derived from minix or ext2's analog and
1392 // evolved as the prototype did
1393 //
1394 
1395 int reiserfs_init_locked_inode(struct inode *inode, void *p)
1396 {
1397 	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1398 	inode->i_ino = args->objectid;
1399 	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1400 	return 0;
1401 }
1402 
1403 /* looks for stat data in the tree, and fills up the fields of in-core
1404    inode stat data fields */
1405 void reiserfs_read_locked_inode(struct inode *inode,
1406 				struct reiserfs_iget_args *args)
1407 {
1408 	INITIALIZE_PATH(path_to_sd);
1409 	struct cpu_key key;
1410 	unsigned long dirino;
1411 	int retval;
1412 
1413 	dirino = args->dirid;
1414 
1415 	/* set version 1, version 2 could be used too, because stat data
1416 	   key is the same in both versions */
1417 	key.version = KEY_FORMAT_3_5;
1418 	key.on_disk_key.k_dir_id = dirino;
1419 	key.on_disk_key.k_objectid = inode->i_ino;
1420 	key.on_disk_key.k_offset = 0;
1421 	key.on_disk_key.k_type = 0;
1422 
1423 	/* look for the object's stat data */
1424 	retval = search_item(inode->i_sb, &key, &path_to_sd);
1425 	if (retval == IO_ERROR) {
1426 		reiserfs_error(inode->i_sb, "vs-13070",
1427 			       "i/o failure occurred trying to find "
1428 			       "stat data of %K", &key);
1429 		reiserfs_make_bad_inode(inode);
1430 		return;
1431 	}
1432 	if (retval != ITEM_FOUND) {
1433 		/* a stale NFS handle can trigger this without it being an error */
1434 		pathrelse(&path_to_sd);
1435 		reiserfs_make_bad_inode(inode);
1436 		inode->i_nlink = 0;
1437 		return;
1438 	}
1439 
1440 	init_inode(inode, &path_to_sd);
1441 
1442 	/* It is possible that knfsd is trying to access inode of a file
1443 	   that is being removed from the disk by some other thread. As we
1444 	   update sd on unlink all that is required is to check for nlink
1445 	   here. This bug was first found by Sizif when debugging
1446 	   SquidNG/Butterfly, forgotten, and found again after Philippe
1447 	   Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1448 
1449 	   More logical fix would require changes in fs/inode.c:iput() to
1450 	   remove inode from hash-table _after_ fs cleaned disk stuff up and
1451 	   in iget() to return NULL if I_FREEING inode is found in
1452 	   hash-table. */
1453 	/* Currently there is one place where it's ok to meet inode with
1454 	   nlink==0: processing of open-unlinked and half-truncated files
1455 	   during mount (fs/reiserfs/super.c:finish_unfinished()). */
1456 	if ((inode->i_nlink == 0) &&
1457 	    !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1458 		reiserfs_warning(inode->i_sb, "vs-13075",
1459 				 "dead inode read from disk %K. "
1460 				 "This is likely to be race with knfsd. Ignore",
1461 				 &key);
1462 		reiserfs_make_bad_inode(inode);
1463 	}
1464 
1465 	reiserfs_check_path(&path_to_sd);	/* init inode should be relsing */
1466 
1467 }
1468 
1469 /**
1470  * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1471  *
1472  * @inode:    inode from hash table to check
1473  * @opaque:   "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1474  *
1475  * This function is called by iget5_locked() to distinguish reiserfs inodes
1476  * having the same inode numbers. Such inodes can only exist due to some
1477  * error condition. One of them should be bad. Inodes with identical
1478  * inode numbers (objectids) are distinguished by parent directory ids.
1479  *
1480  */
1481 int reiserfs_find_actor(struct inode *inode, void *opaque)
1482 {
1483 	struct reiserfs_iget_args *args;
1484 
1485 	args = opaque;
1486 	/* args is already in CPU order */
1487 	return (inode->i_ino == args->objectid) &&
1488 	    (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1489 }
1490 
1491 struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1492 {
1493 	struct inode *inode;
1494 	struct reiserfs_iget_args args;
1495 
1496 	args.objectid = key->on_disk_key.k_objectid;
1497 	args.dirid = key->on_disk_key.k_dir_id;
1498 	inode = iget5_locked(s, key->on_disk_key.k_objectid,
1499 			     reiserfs_find_actor, reiserfs_init_locked_inode,
1500 			     (void *)(&args));
1501 	if (!inode)
1502 		return ERR_PTR(-ENOMEM);
1503 
1504 	if (inode->i_state & I_NEW) {
1505 		reiserfs_read_locked_inode(inode, &args);
1506 		unlock_new_inode(inode);
1507 	}
1508 
1509 	if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1510 		/* either due to i/o error or a stale NFS handle */
1511 		iput(inode);
1512 		inode = NULL;
1513 	}
1514 	return inode;
1515 }
1516 
1517 static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1518 	u32 objectid, u32 dir_id, u32 generation)
1519 
1520 {
1521 	struct cpu_key key;
1522 	struct inode *inode;
1523 
1524 	key.on_disk_key.k_objectid = objectid;
1525 	key.on_disk_key.k_dir_id = dir_id;
1526 	reiserfs_write_lock(sb);
1527 	inode = reiserfs_iget(sb, &key);
1528 	if (inode && !IS_ERR(inode) && generation != 0 &&
1529 	    generation != inode->i_generation) {
1530 		iput(inode);
1531 		inode = NULL;
1532 	}
1533 	reiserfs_write_unlock(sb);
1534 
1535 	return d_obtain_alias(inode);
1536 }
1537 
1538 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1539 		int fh_len, int fh_type)
1540 {
1541 	/* fhtype happens to reflect the number of u32s encoded.
1542 	 * due to a bug in earlier code, fhtype might indicate there
1543 	 * are more u32s then actually fitted.
1544 	 * so if fhtype seems to be more than len, reduce fhtype.
1545 	 * Valid types are:
1546 	 *   2 - objectid + dir_id - legacy support
1547 	 *   3 - objectid + dir_id + generation
1548 	 *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1549 	 *   5 - objectid + dir_id + generation + objectid and dirid of parent
1550 	 *   6 - as above plus generation of directory
1551 	 * 6 does not fit in NFSv2 handles
1552 	 */
1553 	if (fh_type > fh_len) {
1554 		if (fh_type != 6 || fh_len != 5)
1555 			reiserfs_warning(sb, "reiserfs-13077",
1556 				"nfsd/reiserfs, fhtype=%d, len=%d - odd",
1557 				fh_type, fh_len);
1558 		fh_type = 5;
1559 	}
1560 
1561 	return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
1562 		(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
1563 }
1564 
1565 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1566 		int fh_len, int fh_type)
1567 {
1568 	if (fh_type < 4)
1569 		return NULL;
1570 
1571 	return reiserfs_get_dentry(sb,
1572 		(fh_type >= 5) ? fid->raw[3] : fid->raw[2],
1573 		(fh_type >= 5) ? fid->raw[4] : fid->raw[3],
1574 		(fh_type == 6) ? fid->raw[5] : 0);
1575 }
1576 
1577 int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1578 		       int need_parent)
1579 {
1580 	struct inode *inode = dentry->d_inode;
1581 	int maxlen = *lenp;
1582 
1583 	if (maxlen < 3)
1584 		return 255;
1585 
1586 	data[0] = inode->i_ino;
1587 	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1588 	data[2] = inode->i_generation;
1589 	*lenp = 3;
1590 	/* no room for directory info? return what we've stored so far */
1591 	if (maxlen < 5 || !need_parent)
1592 		return 3;
1593 
1594 	spin_lock(&dentry->d_lock);
1595 	inode = dentry->d_parent->d_inode;
1596 	data[3] = inode->i_ino;
1597 	data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1598 	*lenp = 5;
1599 	if (maxlen >= 6) {
1600 		data[5] = inode->i_generation;
1601 		*lenp = 6;
1602 	}
1603 	spin_unlock(&dentry->d_lock);
1604 	return *lenp;
1605 }
1606 
1607 /* looks for stat data, then copies fields to it, marks the buffer
1608    containing stat data as dirty */
1609 /* reiserfs inodes are never really dirty, since the dirty inode call
1610 ** always logs them.  This call allows the VFS inode marking routines
1611 ** to properly mark inodes for datasync and such, but only actually
1612 ** does something when called for a synchronous update.
1613 */
1614 int reiserfs_write_inode(struct inode *inode, int do_sync)
1615 {
1616 	struct reiserfs_transaction_handle th;
1617 	int jbegin_count = 1;
1618 
1619 	if (inode->i_sb->s_flags & MS_RDONLY)
1620 		return -EROFS;
1621 	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
1622 	 ** these cases are just when the system needs ram, not when the
1623 	 ** inode needs to reach disk for safety, and they can safely be
1624 	 ** ignored because the altered inode has already been logged.
1625 	 */
1626 	if (do_sync && !(current->flags & PF_MEMALLOC)) {
1627 		reiserfs_write_lock(inode->i_sb);
1628 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1629 			reiserfs_update_sd(&th, inode);
1630 			journal_end_sync(&th, inode->i_sb, jbegin_count);
1631 		}
1632 		reiserfs_write_unlock(inode->i_sb);
1633 	}
1634 	return 0;
1635 }
1636 
1637 /* stat data of new object is inserted already, this inserts the item
1638    containing "." and ".." entries */
1639 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1640 				  struct inode *inode,
1641 				  struct item_head *ih, struct treepath *path,
1642 				  struct inode *dir)
1643 {
1644 	struct super_block *sb = th->t_super;
1645 	char empty_dir[EMPTY_DIR_SIZE];
1646 	char *body = empty_dir;
1647 	struct cpu_key key;
1648 	int retval;
1649 
1650 	BUG_ON(!th->t_trans_id);
1651 
1652 	_make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1653 		      le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1654 		      TYPE_DIRENTRY, 3 /*key length */ );
1655 
1656 	/* compose item head for new item. Directories consist of items of
1657 	   old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1658 	   is done by reiserfs_new_inode */
1659 	if (old_format_only(sb)) {
1660 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1661 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1662 
1663 		make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1664 				       ih->ih_key.k_objectid,
1665 				       INODE_PKEY(dir)->k_dir_id,
1666 				       INODE_PKEY(dir)->k_objectid);
1667 	} else {
1668 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1669 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1670 
1671 		make_empty_dir_item(body, ih->ih_key.k_dir_id,
1672 				    ih->ih_key.k_objectid,
1673 				    INODE_PKEY(dir)->k_dir_id,
1674 				    INODE_PKEY(dir)->k_objectid);
1675 	}
1676 
1677 	/* look for place in the tree for new item */
1678 	retval = search_item(sb, &key, path);
1679 	if (retval == IO_ERROR) {
1680 		reiserfs_error(sb, "vs-13080",
1681 			       "i/o failure occurred creating new directory");
1682 		return -EIO;
1683 	}
1684 	if (retval == ITEM_FOUND) {
1685 		pathrelse(path);
1686 		reiserfs_warning(sb, "vs-13070",
1687 				 "object with this key exists (%k)",
1688 				 &(ih->ih_key));
1689 		return -EEXIST;
1690 	}
1691 
1692 	/* insert item, that is empty directory item */
1693 	return reiserfs_insert_item(th, path, &key, ih, inode, body);
1694 }
1695 
1696 /* stat data of object has been inserted, this inserts the item
1697    containing the body of symlink */
1698 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
1699 				struct item_head *ih,
1700 				struct treepath *path, const char *symname,
1701 				int item_len)
1702 {
1703 	struct super_block *sb = th->t_super;
1704 	struct cpu_key key;
1705 	int retval;
1706 
1707 	BUG_ON(!th->t_trans_id);
1708 
1709 	_make_cpu_key(&key, KEY_FORMAT_3_5,
1710 		      le32_to_cpu(ih->ih_key.k_dir_id),
1711 		      le32_to_cpu(ih->ih_key.k_objectid),
1712 		      1, TYPE_DIRECT, 3 /*key length */ );
1713 
1714 	make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1715 			  0 /*free_space */ );
1716 
1717 	/* look for place in the tree for new item */
1718 	retval = search_item(sb, &key, path);
1719 	if (retval == IO_ERROR) {
1720 		reiserfs_error(sb, "vs-13080",
1721 			       "i/o failure occurred creating new symlink");
1722 		return -EIO;
1723 	}
1724 	if (retval == ITEM_FOUND) {
1725 		pathrelse(path);
1726 		reiserfs_warning(sb, "vs-13080",
1727 				 "object with this key exists (%k)",
1728 				 &(ih->ih_key));
1729 		return -EEXIST;
1730 	}
1731 
1732 	/* insert item, that is body of symlink */
1733 	return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1734 }
1735 
1736 /* inserts the stat data into the tree, and then calls
1737    reiserfs_new_directory (to insert ".", ".." item if new object is
1738    directory) or reiserfs_new_symlink (to insert symlink body if new
1739    object is symlink) or nothing (if new object is regular file)
1740 
1741    NOTE! uid and gid must already be set in the inode.  If we return
1742    non-zero due to an error, we have to drop the quota previously allocated
1743    for the fresh inode.  This can only be done outside a transaction, so
1744    if we return non-zero, we also end the transaction.  */
1745 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1746 		       struct inode *dir, int mode, const char *symname,
1747 		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1748 		          strlen (symname) for symlinks) */
1749 		       loff_t i_size, struct dentry *dentry,
1750 		       struct inode *inode,
1751 		       struct reiserfs_security_handle *security)
1752 {
1753 	struct super_block *sb;
1754 	struct reiserfs_iget_args args;
1755 	INITIALIZE_PATH(path_to_key);
1756 	struct cpu_key key;
1757 	struct item_head ih;
1758 	struct stat_data sd;
1759 	int retval;
1760 	int err;
1761 
1762 	BUG_ON(!th->t_trans_id);
1763 
1764 	if (vfs_dq_alloc_inode(inode)) {
1765 		err = -EDQUOT;
1766 		goto out_end_trans;
1767 	}
1768 	if (!dir->i_nlink) {
1769 		err = -EPERM;
1770 		goto out_bad_inode;
1771 	}
1772 
1773 	sb = dir->i_sb;
1774 
1775 	/* item head of new item */
1776 	ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1777 	ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1778 	if (!ih.ih_key.k_objectid) {
1779 		err = -ENOMEM;
1780 		goto out_bad_inode;
1781 	}
1782 	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1783 	if (old_format_only(sb))
1784 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1785 				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1786 	else
1787 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1788 				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1789 	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1790 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1791 	if (insert_inode_locked4(inode, args.objectid,
1792 			     reiserfs_find_actor, &args) < 0) {
1793 		err = -EINVAL;
1794 		goto out_bad_inode;
1795 	}
1796 	if (old_format_only(sb))
1797 		/* not a perfect generation count, as object ids can be reused, but
1798 		 ** this is as good as reiserfs can do right now.
1799 		 ** note that the private part of inode isn't filled in yet, we have
1800 		 ** to use the directory.
1801 		 */
1802 		inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1803 	else
1804 #if defined( USE_INODE_GENERATION_COUNTER )
1805 		inode->i_generation =
1806 		    le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1807 #else
1808 		inode->i_generation = ++event;
1809 #endif
1810 
1811 	/* fill stat data */
1812 	inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1813 
1814 	/* uid and gid must already be set by the caller for quota init */
1815 
1816 	/* symlink cannot be immutable or append only, right? */
1817 	if (S_ISLNK(inode->i_mode))
1818 		inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1819 
1820 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1821 	inode->i_size = i_size;
1822 	inode->i_blocks = 0;
1823 	inode->i_bytes = 0;
1824 	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1825 	    U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1826 
1827 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1828 	REISERFS_I(inode)->i_flags = 0;
1829 	REISERFS_I(inode)->i_prealloc_block = 0;
1830 	REISERFS_I(inode)->i_prealloc_count = 0;
1831 	REISERFS_I(inode)->i_trans_id = 0;
1832 	REISERFS_I(inode)->i_jl = NULL;
1833 	REISERFS_I(inode)->i_attrs =
1834 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1835 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1836 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1837 	reiserfs_init_acl_access(inode);
1838 	reiserfs_init_acl_default(inode);
1839 	reiserfs_init_xattr_rwsem(inode);
1840 
1841 	/* key to search for correct place for new stat data */
1842 	_make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1843 		      le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1844 		      TYPE_STAT_DATA, 3 /*key length */ );
1845 
1846 	/* find proper place for inserting of stat data */
1847 	retval = search_item(sb, &key, &path_to_key);
1848 	if (retval == IO_ERROR) {
1849 		err = -EIO;
1850 		goto out_bad_inode;
1851 	}
1852 	if (retval == ITEM_FOUND) {
1853 		pathrelse(&path_to_key);
1854 		err = -EEXIST;
1855 		goto out_bad_inode;
1856 	}
1857 	if (old_format_only(sb)) {
1858 		if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1859 			pathrelse(&path_to_key);
1860 			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
1861 			err = -EINVAL;
1862 			goto out_bad_inode;
1863 		}
1864 		inode2sd_v1(&sd, inode, inode->i_size);
1865 	} else {
1866 		inode2sd(&sd, inode, inode->i_size);
1867 	}
1868 	// store in in-core inode the key of stat data and version all
1869 	// object items will have (directory items will have old offset
1870 	// format, other new objects will consist of new items)
1871 	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1872 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1873 	else
1874 		set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1875 	if (old_format_only(sb))
1876 		set_inode_sd_version(inode, STAT_DATA_V1);
1877 	else
1878 		set_inode_sd_version(inode, STAT_DATA_V2);
1879 
1880 	/* insert the stat data into the tree */
1881 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1882 	if (REISERFS_I(dir)->new_packing_locality)
1883 		th->displace_new_blocks = 1;
1884 #endif
1885 	retval =
1886 	    reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1887 				 (char *)(&sd));
1888 	if (retval) {
1889 		err = retval;
1890 		reiserfs_check_path(&path_to_key);
1891 		goto out_bad_inode;
1892 	}
1893 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1894 	if (!th->displace_new_blocks)
1895 		REISERFS_I(dir)->new_packing_locality = 0;
1896 #endif
1897 	if (S_ISDIR(mode)) {
1898 		/* insert item with "." and ".." */
1899 		retval =
1900 		    reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1901 	}
1902 
1903 	if (S_ISLNK(mode)) {
1904 		/* insert body of symlink */
1905 		if (!old_format_only(sb))
1906 			i_size = ROUND_UP(i_size);
1907 		retval =
1908 		    reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1909 					 i_size);
1910 	}
1911 	if (retval) {
1912 		err = retval;
1913 		reiserfs_check_path(&path_to_key);
1914 		journal_end(th, th->t_super, th->t_blocks_allocated);
1915 		goto out_inserted_sd;
1916 	}
1917 
1918 	if (reiserfs_posixacl(inode->i_sb)) {
1919 		retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
1920 		if (retval) {
1921 			err = retval;
1922 			reiserfs_check_path(&path_to_key);
1923 			journal_end(th, th->t_super, th->t_blocks_allocated);
1924 			goto out_inserted_sd;
1925 		}
1926 	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
1927 		reiserfs_warning(inode->i_sb, "jdm-13090",
1928 				 "ACLs aren't enabled in the fs, "
1929 				 "but vfs thinks they are!");
1930 	} else if (IS_PRIVATE(dir))
1931 		inode->i_flags |= S_PRIVATE;
1932 
1933 	if (security->name) {
1934 		retval = reiserfs_security_write(th, inode, security);
1935 		if (retval) {
1936 			err = retval;
1937 			reiserfs_check_path(&path_to_key);
1938 			retval = journal_end(th, th->t_super,
1939 					     th->t_blocks_allocated);
1940 			if (retval)
1941 				err = retval;
1942 			goto out_inserted_sd;
1943 		}
1944 	}
1945 
1946 	reiserfs_update_sd(th, inode);
1947 	reiserfs_check_path(&path_to_key);
1948 
1949 	return 0;
1950 
1951 /* it looks like you can easily compress these two goto targets into
1952  * one.  Keeping it like this doesn't actually hurt anything, and they
1953  * are place holders for what the quota code actually needs.
1954  */
1955       out_bad_inode:
1956 	/* Invalidate the object, nothing was inserted yet */
1957 	INODE_PKEY(inode)->k_objectid = 0;
1958 
1959 	/* Quota change must be inside a transaction for journaling */
1960 	vfs_dq_free_inode(inode);
1961 
1962       out_end_trans:
1963 	journal_end(th, th->t_super, th->t_blocks_allocated);
1964 	/* Drop can be outside and it needs more credits so it's better to have it outside */
1965 	vfs_dq_drop(inode);
1966 	inode->i_flags |= S_NOQUOTA;
1967 	make_bad_inode(inode);
1968 
1969       out_inserted_sd:
1970 	inode->i_nlink = 0;
1971 	th->t_trans_id = 0;	/* so the caller can't use this handle later */
1972 	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1973 	iput(inode);
1974 	return err;
1975 }
1976 
1977 /*
1978 ** finds the tail page in the page cache,
1979 ** reads the last block in.
1980 **
1981 ** On success, page_result is set to a locked, pinned page, and bh_result
1982 ** is set to an up to date buffer for the last block in the file.  returns 0.
1983 **
1984 ** tail conversion is not done, so bh_result might not be valid for writing
1985 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1986 ** trying to write the block.
1987 **
1988 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1989 */
1990 static int grab_tail_page(struct inode *inode,
1991 			  struct page **page_result,
1992 			  struct buffer_head **bh_result)
1993 {
1994 
1995 	/* we want the page with the last byte in the file,
1996 	 ** not the page that will hold the next byte for appending
1997 	 */
1998 	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
1999 	unsigned long pos = 0;
2000 	unsigned long start = 0;
2001 	unsigned long blocksize = inode->i_sb->s_blocksize;
2002 	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
2003 	struct buffer_head *bh;
2004 	struct buffer_head *head;
2005 	struct page *page;
2006 	int error;
2007 
2008 	/* we know that we are only called with inode->i_size > 0.
2009 	 ** we also know that a file tail can never be as big as a block
2010 	 ** If i_size % blocksize == 0, our file is currently block aligned
2011 	 ** and it won't need converting or zeroing after a truncate.
2012 	 */
2013 	if ((offset & (blocksize - 1)) == 0) {
2014 		return -ENOENT;
2015 	}
2016 	page = grab_cache_page(inode->i_mapping, index);
2017 	error = -ENOMEM;
2018 	if (!page) {
2019 		goto out;
2020 	}
2021 	/* start within the page of the last block in the file */
2022 	start = (offset / blocksize) * blocksize;
2023 
2024 	error = block_prepare_write(page, start, offset,
2025 				    reiserfs_get_block_create_0);
2026 	if (error)
2027 		goto unlock;
2028 
2029 	head = page_buffers(page);
2030 	bh = head;
2031 	do {
2032 		if (pos >= start) {
2033 			break;
2034 		}
2035 		bh = bh->b_this_page;
2036 		pos += blocksize;
2037 	} while (bh != head);
2038 
2039 	if (!buffer_uptodate(bh)) {
2040 		/* note, this should never happen, prepare_write should
2041 		 ** be taking care of this for us.  If the buffer isn't up to date,
2042 		 ** I've screwed up the code to find the buffer, or the code to
2043 		 ** call prepare_write
2044 		 */
2045 		reiserfs_error(inode->i_sb, "clm-6000",
2046 			       "error reading block %lu", bh->b_blocknr);
2047 		error = -EIO;
2048 		goto unlock;
2049 	}
2050 	*bh_result = bh;
2051 	*page_result = page;
2052 
2053       out:
2054 	return error;
2055 
2056       unlock:
2057 	unlock_page(page);
2058 	page_cache_release(page);
2059 	return error;
2060 }
2061 
2062 /*
2063 ** vfs version of truncate file.  Must NOT be called with
2064 ** a transaction already started.
2065 **
2066 ** some code taken from block_truncate_page
2067 */
2068 int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2069 {
2070 	struct reiserfs_transaction_handle th;
2071 	/* we want the offset for the first byte after the end of the file */
2072 	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2073 	unsigned blocksize = inode->i_sb->s_blocksize;
2074 	unsigned length;
2075 	struct page *page = NULL;
2076 	int error;
2077 	struct buffer_head *bh = NULL;
2078 	int err2;
2079 
2080 	reiserfs_write_lock(inode->i_sb);
2081 
2082 	if (inode->i_size > 0) {
2083 		error = grab_tail_page(inode, &page, &bh);
2084 		if (error) {
2085 			// -ENOENT means we truncated past the end of the file,
2086 			// and get_block_create_0 could not find a block to read in,
2087 			// which is ok.
2088 			if (error != -ENOENT)
2089 				reiserfs_error(inode->i_sb, "clm-6001",
2090 					       "grab_tail_page failed %d",
2091 					       error);
2092 			page = NULL;
2093 			bh = NULL;
2094 		}
2095 	}
2096 
2097 	/* so, if page != NULL, we have a buffer head for the offset at
2098 	 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2099 	 ** then we have an unformatted node.  Otherwise, we have a direct item,
2100 	 ** and no zeroing is required on disk.  We zero after the truncate,
2101 	 ** because the truncate might pack the item anyway
2102 	 ** (it will unmap bh if it packs).
2103 	 */
2104 	/* it is enough to reserve space in transaction for 2 balancings:
2105 	   one for "save" link adding and another for the first
2106 	   cut_from_item. 1 is for update_sd */
2107 	error = journal_begin(&th, inode->i_sb,
2108 			      JOURNAL_PER_BALANCE_CNT * 2 + 1);
2109 	if (error)
2110 		goto out;
2111 	reiserfs_update_inode_transaction(inode);
2112 	if (update_timestamps)
2113 		/* we are doing real truncate: if the system crashes before the last
2114 		   transaction of truncating gets committed - on reboot the file
2115 		   either appears truncated properly or not truncated at all */
2116 		add_save_link(&th, inode, 1);
2117 	err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2118 	error =
2119 	    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2120 	if (error)
2121 		goto out;
2122 
2123 	/* check reiserfs_do_truncate after ending the transaction */
2124 	if (err2) {
2125 		error = err2;
2126   		goto out;
2127 	}
2128 
2129 	if (update_timestamps) {
2130 		error = remove_save_link(inode, 1 /* truncate */);
2131 		if (error)
2132 			goto out;
2133 	}
2134 
2135 	if (page) {
2136 		length = offset & (blocksize - 1);
2137 		/* if we are not on a block boundary */
2138 		if (length) {
2139 			length = blocksize - length;
2140 			zero_user(page, offset, length);
2141 			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2142 				mark_buffer_dirty(bh);
2143 			}
2144 		}
2145 		unlock_page(page);
2146 		page_cache_release(page);
2147 	}
2148 
2149 	reiserfs_write_unlock(inode->i_sb);
2150 	return 0;
2151       out:
2152 	if (page) {
2153 		unlock_page(page);
2154 		page_cache_release(page);
2155 	}
2156 	reiserfs_write_unlock(inode->i_sb);
2157 	return error;
2158 }
2159 
2160 static int map_block_for_writepage(struct inode *inode,
2161 				   struct buffer_head *bh_result,
2162 				   unsigned long block)
2163 {
2164 	struct reiserfs_transaction_handle th;
2165 	int fs_gen;
2166 	struct item_head tmp_ih;
2167 	struct item_head *ih;
2168 	struct buffer_head *bh;
2169 	__le32 *item;
2170 	struct cpu_key key;
2171 	INITIALIZE_PATH(path);
2172 	int pos_in_item;
2173 	int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2174 	loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2175 	int retval;
2176 	int use_get_block = 0;
2177 	int bytes_copied = 0;
2178 	int copy_size;
2179 	int trans_running = 0;
2180 
2181 	/* catch places below that try to log something without starting a trans */
2182 	th.t_trans_id = 0;
2183 
2184 	if (!buffer_uptodate(bh_result)) {
2185 		return -EIO;
2186 	}
2187 
2188 	kmap(bh_result->b_page);
2189       start_over:
2190 	reiserfs_write_lock(inode->i_sb);
2191 	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2192 
2193       research:
2194 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
2195 	if (retval != POSITION_FOUND) {
2196 		use_get_block = 1;
2197 		goto out;
2198 	}
2199 
2200 	bh = get_last_bh(&path);
2201 	ih = get_ih(&path);
2202 	item = get_item(&path);
2203 	pos_in_item = path.pos_in_item;
2204 
2205 	/* we've found an unformatted node */
2206 	if (indirect_item_found(retval, ih)) {
2207 		if (bytes_copied > 0) {
2208 			reiserfs_warning(inode->i_sb, "clm-6002",
2209 					 "bytes_copied %d", bytes_copied);
2210 		}
2211 		if (!get_block_num(item, pos_in_item)) {
2212 			/* crap, we are writing to a hole */
2213 			use_get_block = 1;
2214 			goto out;
2215 		}
2216 		set_block_dev_mapped(bh_result,
2217 				     get_block_num(item, pos_in_item), inode);
2218 	} else if (is_direct_le_ih(ih)) {
2219 		char *p;
2220 		p = page_address(bh_result->b_page);
2221 		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2222 		copy_size = ih_item_len(ih) - pos_in_item;
2223 
2224 		fs_gen = get_generation(inode->i_sb);
2225 		copy_item_head(&tmp_ih, ih);
2226 
2227 		if (!trans_running) {
2228 			/* vs-3050 is gone, no need to drop the path */
2229 			retval = journal_begin(&th, inode->i_sb, jbegin_count);
2230 			if (retval)
2231 				goto out;
2232 			reiserfs_update_inode_transaction(inode);
2233 			trans_running = 1;
2234 			if (fs_changed(fs_gen, inode->i_sb)
2235 			    && item_moved(&tmp_ih, &path)) {
2236 				reiserfs_restore_prepared_buffer(inode->i_sb,
2237 								 bh);
2238 				goto research;
2239 			}
2240 		}
2241 
2242 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2243 
2244 		if (fs_changed(fs_gen, inode->i_sb)
2245 		    && item_moved(&tmp_ih, &path)) {
2246 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2247 			goto research;
2248 		}
2249 
2250 		memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2251 		       copy_size);
2252 
2253 		journal_mark_dirty(&th, inode->i_sb, bh);
2254 		bytes_copied += copy_size;
2255 		set_block_dev_mapped(bh_result, 0, inode);
2256 
2257 		/* are there still bytes left? */
2258 		if (bytes_copied < bh_result->b_size &&
2259 		    (byte_offset + bytes_copied) < inode->i_size) {
2260 			set_cpu_key_k_offset(&key,
2261 					     cpu_key_k_offset(&key) +
2262 					     copy_size);
2263 			goto research;
2264 		}
2265 	} else {
2266 		reiserfs_warning(inode->i_sb, "clm-6003",
2267 				 "bad item inode %lu", inode->i_ino);
2268 		retval = -EIO;
2269 		goto out;
2270 	}
2271 	retval = 0;
2272 
2273       out:
2274 	pathrelse(&path);
2275 	if (trans_running) {
2276 		int err = journal_end(&th, inode->i_sb, jbegin_count);
2277 		if (err)
2278 			retval = err;
2279 		trans_running = 0;
2280 	}
2281 	reiserfs_write_unlock(inode->i_sb);
2282 
2283 	/* this is where we fill in holes in the file. */
2284 	if (use_get_block) {
2285 		retval = reiserfs_get_block(inode, block, bh_result,
2286 					    GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2287 					    | GET_BLOCK_NO_DANGLE);
2288 		if (!retval) {
2289 			if (!buffer_mapped(bh_result)
2290 			    || bh_result->b_blocknr == 0) {
2291 				/* get_block failed to find a mapped unformatted node. */
2292 				use_get_block = 0;
2293 				goto start_over;
2294 			}
2295 		}
2296 	}
2297 	kunmap(bh_result->b_page);
2298 
2299 	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2300 		/* we've copied data from the page into the direct item, so the
2301 		 * buffer in the page is now clean, mark it to reflect that.
2302 		 */
2303 		lock_buffer(bh_result);
2304 		clear_buffer_dirty(bh_result);
2305 		unlock_buffer(bh_result);
2306 	}
2307 	return retval;
2308 }
2309 
2310 /*
2311  * mason@suse.com: updated in 2.5.54 to follow the same general io
2312  * start/recovery path as __block_write_full_page, along with special
2313  * code to handle reiserfs tails.
2314  */
2315 static int reiserfs_write_full_page(struct page *page,
2316 				    struct writeback_control *wbc)
2317 {
2318 	struct inode *inode = page->mapping->host;
2319 	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2320 	int error = 0;
2321 	unsigned long block;
2322 	sector_t last_block;
2323 	struct buffer_head *head, *bh;
2324 	int partial = 0;
2325 	int nr = 0;
2326 	int checked = PageChecked(page);
2327 	struct reiserfs_transaction_handle th;
2328 	struct super_block *s = inode->i_sb;
2329 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2330 	th.t_trans_id = 0;
2331 
2332 	/* no logging allowed when nonblocking or from PF_MEMALLOC */
2333 	if (checked && (current->flags & PF_MEMALLOC)) {
2334 		redirty_page_for_writepage(wbc, page);
2335 		unlock_page(page);
2336 		return 0;
2337 	}
2338 
2339 	/* The page dirty bit is cleared before writepage is called, which
2340 	 * means we have to tell create_empty_buffers to make dirty buffers
2341 	 * The page really should be up to date at this point, so tossing
2342 	 * in the BH_Uptodate is just a sanity check.
2343 	 */
2344 	if (!page_has_buffers(page)) {
2345 		create_empty_buffers(page, s->s_blocksize,
2346 				     (1 << BH_Dirty) | (1 << BH_Uptodate));
2347 	}
2348 	head = page_buffers(page);
2349 
2350 	/* last page in the file, zero out any contents past the
2351 	 ** last byte in the file
2352 	 */
2353 	if (page->index >= end_index) {
2354 		unsigned last_offset;
2355 
2356 		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2357 		/* no file contents in this page */
2358 		if (page->index >= end_index + 1 || !last_offset) {
2359 			unlock_page(page);
2360 			return 0;
2361 		}
2362 		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
2363 	}
2364 	bh = head;
2365 	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2366 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2367 	/* first map all the buffers, logging any direct items we find */
2368 	do {
2369 		if (block > last_block) {
2370 			/*
2371 			 * This can happen when the block size is less than
2372 			 * the page size.  The corresponding bytes in the page
2373 			 * were zero filled above
2374 			 */
2375 			clear_buffer_dirty(bh);
2376 			set_buffer_uptodate(bh);
2377 		} else if ((checked || buffer_dirty(bh)) &&
2378 		           (!buffer_mapped(bh) || (buffer_mapped(bh)
2379 						       && bh->b_blocknr ==
2380 						       0))) {
2381 			/* not mapped yet, or it points to a direct item, search
2382 			 * the btree for the mapping info, and log any direct
2383 			 * items found
2384 			 */
2385 			if ((error = map_block_for_writepage(inode, bh, block))) {
2386 				goto fail;
2387 			}
2388 		}
2389 		bh = bh->b_this_page;
2390 		block++;
2391 	} while (bh != head);
2392 
2393 	/*
2394 	 * we start the transaction after map_block_for_writepage,
2395 	 * because it can create holes in the file (an unbounded operation).
2396 	 * starting it here, we can make a reliable estimate for how many
2397 	 * blocks we're going to log
2398 	 */
2399 	if (checked) {
2400 		ClearPageChecked(page);
2401 		reiserfs_write_lock(s);
2402 		error = journal_begin(&th, s, bh_per_page + 1);
2403 		if (error) {
2404 			reiserfs_write_unlock(s);
2405 			goto fail;
2406 		}
2407 		reiserfs_update_inode_transaction(inode);
2408 	}
2409 	/* now go through and lock any dirty buffers on the page */
2410 	do {
2411 		get_bh(bh);
2412 		if (!buffer_mapped(bh))
2413 			continue;
2414 		if (buffer_mapped(bh) && bh->b_blocknr == 0)
2415 			continue;
2416 
2417 		if (checked) {
2418 			reiserfs_prepare_for_journal(s, bh, 1);
2419 			journal_mark_dirty(&th, s, bh);
2420 			continue;
2421 		}
2422 		/* from this point on, we know the buffer is mapped to a
2423 		 * real block and not a direct item
2424 		 */
2425 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2426 			lock_buffer(bh);
2427 		} else {
2428 			if (!trylock_buffer(bh)) {
2429 				redirty_page_for_writepage(wbc, page);
2430 				continue;
2431 			}
2432 		}
2433 		if (test_clear_buffer_dirty(bh)) {
2434 			mark_buffer_async_write(bh);
2435 		} else {
2436 			unlock_buffer(bh);
2437 		}
2438 	} while ((bh = bh->b_this_page) != head);
2439 
2440 	if (checked) {
2441 		error = journal_end(&th, s, bh_per_page + 1);
2442 		reiserfs_write_unlock(s);
2443 		if (error)
2444 			goto fail;
2445 	}
2446 	BUG_ON(PageWriteback(page));
2447 	set_page_writeback(page);
2448 	unlock_page(page);
2449 
2450 	/*
2451 	 * since any buffer might be the only dirty buffer on the page,
2452 	 * the first submit_bh can bring the page out of writeback.
2453 	 * be careful with the buffers.
2454 	 */
2455 	do {
2456 		struct buffer_head *next = bh->b_this_page;
2457 		if (buffer_async_write(bh)) {
2458 			submit_bh(WRITE, bh);
2459 			nr++;
2460 		}
2461 		put_bh(bh);
2462 		bh = next;
2463 	} while (bh != head);
2464 
2465 	error = 0;
2466       done:
2467 	if (nr == 0) {
2468 		/*
2469 		 * if this page only had a direct item, it is very possible for
2470 		 * no io to be required without there being an error.  Or,
2471 		 * someone else could have locked them and sent them down the
2472 		 * pipe without locking the page
2473 		 */
2474 		bh = head;
2475 		do {
2476 			if (!buffer_uptodate(bh)) {
2477 				partial = 1;
2478 				break;
2479 			}
2480 			bh = bh->b_this_page;
2481 		} while (bh != head);
2482 		if (!partial)
2483 			SetPageUptodate(page);
2484 		end_page_writeback(page);
2485 	}
2486 	return error;
2487 
2488       fail:
2489 	/* catches various errors, we need to make sure any valid dirty blocks
2490 	 * get to the media.  The page is currently locked and not marked for
2491 	 * writeback
2492 	 */
2493 	ClearPageUptodate(page);
2494 	bh = head;
2495 	do {
2496 		get_bh(bh);
2497 		if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2498 			lock_buffer(bh);
2499 			mark_buffer_async_write(bh);
2500 		} else {
2501 			/*
2502 			 * clear any dirty bits that might have come from getting
2503 			 * attached to a dirty page
2504 			 */
2505 			clear_buffer_dirty(bh);
2506 		}
2507 		bh = bh->b_this_page;
2508 	} while (bh != head);
2509 	SetPageError(page);
2510 	BUG_ON(PageWriteback(page));
2511 	set_page_writeback(page);
2512 	unlock_page(page);
2513 	do {
2514 		struct buffer_head *next = bh->b_this_page;
2515 		if (buffer_async_write(bh)) {
2516 			clear_buffer_dirty(bh);
2517 			submit_bh(WRITE, bh);
2518 			nr++;
2519 		}
2520 		put_bh(bh);
2521 		bh = next;
2522 	} while (bh != head);
2523 	goto done;
2524 }
2525 
2526 static int reiserfs_readpage(struct file *f, struct page *page)
2527 {
2528 	return block_read_full_page(page, reiserfs_get_block);
2529 }
2530 
2531 static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2532 {
2533 	struct inode *inode = page->mapping->host;
2534 	reiserfs_wait_on_write_block(inode->i_sb);
2535 	return reiserfs_write_full_page(page, wbc);
2536 }
2537 
2538 static int reiserfs_write_begin(struct file *file,
2539 				struct address_space *mapping,
2540 				loff_t pos, unsigned len, unsigned flags,
2541 				struct page **pagep, void **fsdata)
2542 {
2543 	struct inode *inode;
2544 	struct page *page;
2545 	pgoff_t index;
2546 	int ret;
2547 	int old_ref = 0;
2548 
2549  	inode = mapping->host;
2550 	*fsdata = 0;
2551  	if (flags & AOP_FLAG_CONT_EXPAND &&
2552  	    (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2553  		pos ++;
2554 		*fsdata = (void *)(unsigned long)flags;
2555 	}
2556 
2557 	index = pos >> PAGE_CACHE_SHIFT;
2558 	page = grab_cache_page_write_begin(mapping, index, flags);
2559 	if (!page)
2560 		return -ENOMEM;
2561 	*pagep = page;
2562 
2563 	reiserfs_wait_on_write_block(inode->i_sb);
2564 	fix_tail_page_for_writing(page);
2565 	if (reiserfs_transaction_running(inode->i_sb)) {
2566 		struct reiserfs_transaction_handle *th;
2567 		th = (struct reiserfs_transaction_handle *)current->
2568 		    journal_info;
2569 		BUG_ON(!th->t_refcount);
2570 		BUG_ON(!th->t_trans_id);
2571 		old_ref = th->t_refcount;
2572 		th->t_refcount++;
2573 	}
2574 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2575 				reiserfs_get_block);
2576 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2577 		struct reiserfs_transaction_handle *th = current->journal_info;
2578 		/* this gets a little ugly.  If reiserfs_get_block returned an
2579 		 * error and left a transacstion running, we've got to close it,
2580 		 * and we've got to free handle if it was a persistent transaction.
2581 		 *
2582 		 * But, if we had nested into an existing transaction, we need
2583 		 * to just drop the ref count on the handle.
2584 		 *
2585 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2586 		 * and it was a persistent trans.  Otherwise, it was nested above.
2587 		 */
2588 		if (th->t_refcount > old_ref) {
2589 			if (old_ref)
2590 				th->t_refcount--;
2591 			else {
2592 				int err;
2593 				reiserfs_write_lock(inode->i_sb);
2594 				err = reiserfs_end_persistent_transaction(th);
2595 				reiserfs_write_unlock(inode->i_sb);
2596 				if (err)
2597 					ret = err;
2598 			}
2599 		}
2600 	}
2601 	if (ret) {
2602 		unlock_page(page);
2603 		page_cache_release(page);
2604 	}
2605 	return ret;
2606 }
2607 
2608 int reiserfs_prepare_write(struct file *f, struct page *page,
2609 			   unsigned from, unsigned to)
2610 {
2611 	struct inode *inode = page->mapping->host;
2612 	int ret;
2613 	int old_ref = 0;
2614 
2615 	reiserfs_wait_on_write_block(inode->i_sb);
2616 	fix_tail_page_for_writing(page);
2617 	if (reiserfs_transaction_running(inode->i_sb)) {
2618 		struct reiserfs_transaction_handle *th;
2619 		th = (struct reiserfs_transaction_handle *)current->
2620 		    journal_info;
2621 		BUG_ON(!th->t_refcount);
2622 		BUG_ON(!th->t_trans_id);
2623 		old_ref = th->t_refcount;
2624 		th->t_refcount++;
2625 	}
2626 
2627 	ret = block_prepare_write(page, from, to, reiserfs_get_block);
2628 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2629 		struct reiserfs_transaction_handle *th = current->journal_info;
2630 		/* this gets a little ugly.  If reiserfs_get_block returned an
2631 		 * error and left a transacstion running, we've got to close it,
2632 		 * and we've got to free handle if it was a persistent transaction.
2633 		 *
2634 		 * But, if we had nested into an existing transaction, we need
2635 		 * to just drop the ref count on the handle.
2636 		 *
2637 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2638 		 * and it was a persistent trans.  Otherwise, it was nested above.
2639 		 */
2640 		if (th->t_refcount > old_ref) {
2641 			if (old_ref)
2642 				th->t_refcount--;
2643 			else {
2644 				int err;
2645 				reiserfs_write_lock(inode->i_sb);
2646 				err = reiserfs_end_persistent_transaction(th);
2647 				reiserfs_write_unlock(inode->i_sb);
2648 				if (err)
2649 					ret = err;
2650 			}
2651 		}
2652 	}
2653 	return ret;
2654 
2655 }
2656 
2657 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2658 {
2659 	return generic_block_bmap(as, block, reiserfs_bmap);
2660 }
2661 
2662 static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2663 			      loff_t pos, unsigned len, unsigned copied,
2664 			      struct page *page, void *fsdata)
2665 {
2666 	struct inode *inode = page->mapping->host;
2667 	int ret = 0;
2668 	int update_sd = 0;
2669 	struct reiserfs_transaction_handle *th;
2670 	unsigned start;
2671 
2672 	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2673 		pos ++;
2674 
2675 	reiserfs_wait_on_write_block(inode->i_sb);
2676 	if (reiserfs_transaction_running(inode->i_sb))
2677 		th = current->journal_info;
2678 	else
2679 		th = NULL;
2680 
2681 	start = pos & (PAGE_CACHE_SIZE - 1);
2682 	if (unlikely(copied < len)) {
2683 		if (!PageUptodate(page))
2684 			copied = 0;
2685 
2686 		page_zero_new_buffers(page, start + copied, start + len);
2687 	}
2688 	flush_dcache_page(page);
2689 
2690 	reiserfs_commit_page(inode, page, start, start + copied);
2691 
2692 	/* generic_commit_write does this for us, but does not update the
2693 	 ** transaction tracking stuff when the size changes.  So, we have
2694 	 ** to do the i_size updates here.
2695 	 */
2696 	pos += copied;
2697 	if (pos > inode->i_size) {
2698 		struct reiserfs_transaction_handle myth;
2699 		reiserfs_write_lock(inode->i_sb);
2700 		/* If the file have grown beyond the border where it
2701 		   can have a tail, unmark it as needing a tail
2702 		   packing */
2703 		if ((have_large_tails(inode->i_sb)
2704 		     && inode->i_size > i_block_size(inode) * 4)
2705 		    || (have_small_tails(inode->i_sb)
2706 			&& inode->i_size > i_block_size(inode)))
2707 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2708 
2709 		ret = journal_begin(&myth, inode->i_sb, 1);
2710 		if (ret) {
2711 			reiserfs_write_unlock(inode->i_sb);
2712 			goto journal_error;
2713 		}
2714 		reiserfs_update_inode_transaction(inode);
2715 		inode->i_size = pos;
2716 		/*
2717 		 * this will just nest into our transaction.  It's important
2718 		 * to use mark_inode_dirty so the inode gets pushed around on the
2719 		 * dirty lists, and so that O_SYNC works as expected
2720 		 */
2721 		mark_inode_dirty(inode);
2722 		reiserfs_update_sd(&myth, inode);
2723 		update_sd = 1;
2724 		ret = journal_end(&myth, inode->i_sb, 1);
2725 		reiserfs_write_unlock(inode->i_sb);
2726 		if (ret)
2727 			goto journal_error;
2728 	}
2729 	if (th) {
2730 		reiserfs_write_lock(inode->i_sb);
2731 		if (!update_sd)
2732 			mark_inode_dirty(inode);
2733 		ret = reiserfs_end_persistent_transaction(th);
2734 		reiserfs_write_unlock(inode->i_sb);
2735 		if (ret)
2736 			goto out;
2737 	}
2738 
2739       out:
2740 	unlock_page(page);
2741 	page_cache_release(page);
2742 	return ret == 0 ? copied : ret;
2743 
2744       journal_error:
2745 	if (th) {
2746 		reiserfs_write_lock(inode->i_sb);
2747 		if (!update_sd)
2748 			reiserfs_update_sd(th, inode);
2749 		ret = reiserfs_end_persistent_transaction(th);
2750 		reiserfs_write_unlock(inode->i_sb);
2751 	}
2752 
2753 	goto out;
2754 }
2755 
2756 int reiserfs_commit_write(struct file *f, struct page *page,
2757 			  unsigned from, unsigned to)
2758 {
2759 	struct inode *inode = page->mapping->host;
2760 	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2761 	int ret = 0;
2762 	int update_sd = 0;
2763 	struct reiserfs_transaction_handle *th = NULL;
2764 
2765 	reiserfs_wait_on_write_block(inode->i_sb);
2766 	if (reiserfs_transaction_running(inode->i_sb)) {
2767 		th = current->journal_info;
2768 	}
2769 	reiserfs_commit_page(inode, page, from, to);
2770 
2771 	/* generic_commit_write does this for us, but does not update the
2772 	 ** transaction tracking stuff when the size changes.  So, we have
2773 	 ** to do the i_size updates here.
2774 	 */
2775 	if (pos > inode->i_size) {
2776 		struct reiserfs_transaction_handle myth;
2777 		reiserfs_write_lock(inode->i_sb);
2778 		/* If the file have grown beyond the border where it
2779 		   can have a tail, unmark it as needing a tail
2780 		   packing */
2781 		if ((have_large_tails(inode->i_sb)
2782 		     && inode->i_size > i_block_size(inode) * 4)
2783 		    || (have_small_tails(inode->i_sb)
2784 			&& inode->i_size > i_block_size(inode)))
2785 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2786 
2787 		ret = journal_begin(&myth, inode->i_sb, 1);
2788 		if (ret) {
2789 			reiserfs_write_unlock(inode->i_sb);
2790 			goto journal_error;
2791 		}
2792 		reiserfs_update_inode_transaction(inode);
2793 		inode->i_size = pos;
2794 		/*
2795 		 * this will just nest into our transaction.  It's important
2796 		 * to use mark_inode_dirty so the inode gets pushed around on the
2797 		 * dirty lists, and so that O_SYNC works as expected
2798 		 */
2799 		mark_inode_dirty(inode);
2800 		reiserfs_update_sd(&myth, inode);
2801 		update_sd = 1;
2802 		ret = journal_end(&myth, inode->i_sb, 1);
2803 		reiserfs_write_unlock(inode->i_sb);
2804 		if (ret)
2805 			goto journal_error;
2806 	}
2807 	if (th) {
2808 		reiserfs_write_lock(inode->i_sb);
2809 		if (!update_sd)
2810 			mark_inode_dirty(inode);
2811 		ret = reiserfs_end_persistent_transaction(th);
2812 		reiserfs_write_unlock(inode->i_sb);
2813 		if (ret)
2814 			goto out;
2815 	}
2816 
2817       out:
2818 	return ret;
2819 
2820       journal_error:
2821 	if (th) {
2822 		reiserfs_write_lock(inode->i_sb);
2823 		if (!update_sd)
2824 			reiserfs_update_sd(th, inode);
2825 		ret = reiserfs_end_persistent_transaction(th);
2826 		reiserfs_write_unlock(inode->i_sb);
2827 	}
2828 
2829 	return ret;
2830 }
2831 
2832 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2833 {
2834 	if (reiserfs_attrs(inode->i_sb)) {
2835 		if (sd_attrs & REISERFS_SYNC_FL)
2836 			inode->i_flags |= S_SYNC;
2837 		else
2838 			inode->i_flags &= ~S_SYNC;
2839 		if (sd_attrs & REISERFS_IMMUTABLE_FL)
2840 			inode->i_flags |= S_IMMUTABLE;
2841 		else
2842 			inode->i_flags &= ~S_IMMUTABLE;
2843 		if (sd_attrs & REISERFS_APPEND_FL)
2844 			inode->i_flags |= S_APPEND;
2845 		else
2846 			inode->i_flags &= ~S_APPEND;
2847 		if (sd_attrs & REISERFS_NOATIME_FL)
2848 			inode->i_flags |= S_NOATIME;
2849 		else
2850 			inode->i_flags &= ~S_NOATIME;
2851 		if (sd_attrs & REISERFS_NOTAIL_FL)
2852 			REISERFS_I(inode)->i_flags |= i_nopack_mask;
2853 		else
2854 			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2855 	}
2856 }
2857 
2858 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2859 {
2860 	if (reiserfs_attrs(inode->i_sb)) {
2861 		if (inode->i_flags & S_IMMUTABLE)
2862 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
2863 		else
2864 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2865 		if (inode->i_flags & S_SYNC)
2866 			*sd_attrs |= REISERFS_SYNC_FL;
2867 		else
2868 			*sd_attrs &= ~REISERFS_SYNC_FL;
2869 		if (inode->i_flags & S_NOATIME)
2870 			*sd_attrs |= REISERFS_NOATIME_FL;
2871 		else
2872 			*sd_attrs &= ~REISERFS_NOATIME_FL;
2873 		if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2874 			*sd_attrs |= REISERFS_NOTAIL_FL;
2875 		else
2876 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
2877 	}
2878 }
2879 
2880 /* decide if this buffer needs to stay around for data logging or ordered
2881 ** write purposes
2882 */
2883 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2884 {
2885 	int ret = 1;
2886 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2887 
2888 	lock_buffer(bh);
2889 	spin_lock(&j->j_dirty_buffers_lock);
2890 	if (!buffer_mapped(bh)) {
2891 		goto free_jh;
2892 	}
2893 	/* the page is locked, and the only places that log a data buffer
2894 	 * also lock the page.
2895 	 */
2896 	if (reiserfs_file_data_log(inode)) {
2897 		/*
2898 		 * very conservative, leave the buffer pinned if
2899 		 * anyone might need it.
2900 		 */
2901 		if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2902 			ret = 0;
2903 		}
2904 	} else  if (buffer_dirty(bh)) {
2905 		struct reiserfs_journal_list *jl;
2906 		struct reiserfs_jh *jh = bh->b_private;
2907 
2908 		/* why is this safe?
2909 		 * reiserfs_setattr updates i_size in the on disk
2910 		 * stat data before allowing vmtruncate to be called.
2911 		 *
2912 		 * If buffer was put onto the ordered list for this
2913 		 * transaction, we know for sure either this transaction
2914 		 * or an older one already has updated i_size on disk,
2915 		 * and this ordered data won't be referenced in the file
2916 		 * if we crash.
2917 		 *
2918 		 * if the buffer was put onto the ordered list for an older
2919 		 * transaction, we need to leave it around
2920 		 */
2921 		if (jh && (jl = jh->jl)
2922 		    && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2923 			ret = 0;
2924 	}
2925       free_jh:
2926 	if (ret && bh->b_private) {
2927 		reiserfs_free_jh(bh);
2928 	}
2929 	spin_unlock(&j->j_dirty_buffers_lock);
2930 	unlock_buffer(bh);
2931 	return ret;
2932 }
2933 
2934 /* clm -- taken from fs/buffer.c:block_invalidate_page */
2935 static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
2936 {
2937 	struct buffer_head *head, *bh, *next;
2938 	struct inode *inode = page->mapping->host;
2939 	unsigned int curr_off = 0;
2940 	int ret = 1;
2941 
2942 	BUG_ON(!PageLocked(page));
2943 
2944 	if (offset == 0)
2945 		ClearPageChecked(page);
2946 
2947 	if (!page_has_buffers(page))
2948 		goto out;
2949 
2950 	head = page_buffers(page);
2951 	bh = head;
2952 	do {
2953 		unsigned int next_off = curr_off + bh->b_size;
2954 		next = bh->b_this_page;
2955 
2956 		/*
2957 		 * is this block fully invalidated?
2958 		 */
2959 		if (offset <= curr_off) {
2960 			if (invalidatepage_can_drop(inode, bh))
2961 				reiserfs_unmap_buffer(bh);
2962 			else
2963 				ret = 0;
2964 		}
2965 		curr_off = next_off;
2966 		bh = next;
2967 	} while (bh != head);
2968 
2969 	/*
2970 	 * We release buffers only if the entire page is being invalidated.
2971 	 * The get_block cached value has been unconditionally invalidated,
2972 	 * so real IO is not possible anymore.
2973 	 */
2974 	if (!offset && ret) {
2975 		ret = try_to_release_page(page, 0);
2976 		/* maybe should BUG_ON(!ret); - neilb */
2977 	}
2978       out:
2979 	return;
2980 }
2981 
2982 static int reiserfs_set_page_dirty(struct page *page)
2983 {
2984 	struct inode *inode = page->mapping->host;
2985 	if (reiserfs_file_data_log(inode)) {
2986 		SetPageChecked(page);
2987 		return __set_page_dirty_nobuffers(page);
2988 	}
2989 	return __set_page_dirty_buffers(page);
2990 }
2991 
2992 /*
2993  * Returns 1 if the page's buffers were dropped.  The page is locked.
2994  *
2995  * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
2996  * in the buffers at page_buffers(page).
2997  *
2998  * even in -o notail mode, we can't be sure an old mount without -o notail
2999  * didn't create files with tails.
3000  */
3001 static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3002 {
3003 	struct inode *inode = page->mapping->host;
3004 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3005 	struct buffer_head *head;
3006 	struct buffer_head *bh;
3007 	int ret = 1;
3008 
3009 	WARN_ON(PageChecked(page));
3010 	spin_lock(&j->j_dirty_buffers_lock);
3011 	head = page_buffers(page);
3012 	bh = head;
3013 	do {
3014 		if (bh->b_private) {
3015 			if (!buffer_dirty(bh) && !buffer_locked(bh)) {
3016 				reiserfs_free_jh(bh);
3017 			} else {
3018 				ret = 0;
3019 				break;
3020 			}
3021 		}
3022 		bh = bh->b_this_page;
3023 	} while (bh != head);
3024 	if (ret)
3025 		ret = try_to_free_buffers(page);
3026 	spin_unlock(&j->j_dirty_buffers_lock);
3027 	return ret;
3028 }
3029 
3030 /* We thank Mingming Cao for helping us understand in great detail what
3031    to do in this section of the code. */
3032 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3033 				  const struct iovec *iov, loff_t offset,
3034 				  unsigned long nr_segs)
3035 {
3036 	struct file *file = iocb->ki_filp;
3037 	struct inode *inode = file->f_mapping->host;
3038 
3039 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3040 				  offset, nr_segs,
3041 				  reiserfs_get_blocks_direct_io, NULL);
3042 }
3043 
3044 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3045 {
3046 	struct inode *inode = dentry->d_inode;
3047 	int error;
3048 	unsigned int ia_valid;
3049 
3050 	/* must be turned off for recursive notify_change calls */
3051 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3052 
3053 	reiserfs_write_lock(inode->i_sb);
3054 	if (attr->ia_valid & ATTR_SIZE) {
3055 		/* version 2 items will be caught by the s_maxbytes check
3056 		 ** done for us in vmtruncate
3057 		 */
3058 		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3059 		    attr->ia_size > MAX_NON_LFS) {
3060 			error = -EFBIG;
3061 			goto out;
3062 		}
3063 		/* fill in hole pointers in the expanding truncate case. */
3064 		if (attr->ia_size > inode->i_size) {
3065 			error = generic_cont_expand_simple(inode, attr->ia_size);
3066 			if (REISERFS_I(inode)->i_prealloc_count > 0) {
3067 				int err;
3068 				struct reiserfs_transaction_handle th;
3069 				/* we're changing at most 2 bitmaps, inode + super */
3070 				err = journal_begin(&th, inode->i_sb, 4);
3071 				if (!err) {
3072 					reiserfs_discard_prealloc(&th, inode);
3073 					err = journal_end(&th, inode->i_sb, 4);
3074 				}
3075 				if (err)
3076 					error = err;
3077 			}
3078 			if (error)
3079 				goto out;
3080 			/*
3081 			 * file size is changed, ctime and mtime are
3082 			 * to be updated
3083 			 */
3084 			attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
3085 		}
3086 	}
3087 
3088 	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
3089 	     ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
3090 	    (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3091 		/* stat data of format v3.5 has 16 bit uid and gid */
3092 		error = -EINVAL;
3093 		goto out;
3094 	}
3095 
3096 	error = inode_change_ok(inode, attr);
3097 	if (!error) {
3098 		if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3099 		    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3100 			error = reiserfs_chown_xattrs(inode, attr);
3101 
3102 			if (!error) {
3103 				struct reiserfs_transaction_handle th;
3104 				int jbegin_count =
3105 				    2 *
3106 				    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3107 				     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3108 				    2;
3109 
3110 				/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3111 				error =
3112 				    journal_begin(&th, inode->i_sb,
3113 						  jbegin_count);
3114 				if (error)
3115 					goto out;
3116 				error =
3117 				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
3118 				if (error) {
3119 					journal_end(&th, inode->i_sb,
3120 						    jbegin_count);
3121 					goto out;
3122 				}
3123 				/* Update corresponding info in inode so that everything is in
3124 				 * one transaction */
3125 				if (attr->ia_valid & ATTR_UID)
3126 					inode->i_uid = attr->ia_uid;
3127 				if (attr->ia_valid & ATTR_GID)
3128 					inode->i_gid = attr->ia_gid;
3129 				mark_inode_dirty(inode);
3130 				error =
3131 				    journal_end(&th, inode->i_sb, jbegin_count);
3132 			}
3133 		}
3134 		if (!error)
3135 			error = inode_setattr(inode, attr);
3136 	}
3137 
3138 	if (!error && reiserfs_posixacl(inode->i_sb)) {
3139 		if (attr->ia_valid & ATTR_MODE)
3140 			error = reiserfs_acl_chmod(inode);
3141 	}
3142 
3143       out:
3144 	reiserfs_write_unlock(inode->i_sb);
3145 	return error;
3146 }
3147 
3148 const struct address_space_operations reiserfs_address_space_operations = {
3149 	.writepage = reiserfs_writepage,
3150 	.readpage = reiserfs_readpage,
3151 	.readpages = reiserfs_readpages,
3152 	.releasepage = reiserfs_releasepage,
3153 	.invalidatepage = reiserfs_invalidatepage,
3154 	.sync_page = block_sync_page,
3155 	.write_begin = reiserfs_write_begin,
3156 	.write_end = reiserfs_write_end,
3157 	.bmap = reiserfs_aop_bmap,
3158 	.direct_IO = reiserfs_direct_IO,
3159 	.set_page_dirty = reiserfs_set_page_dirty,
3160 };
3161