xref: /openbmc/linux/fs/reiserfs/inode.c (revision 02bf6cc7)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 #include <linux/time.h>
6 #include <linux/fs.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/reiserfs_acl.h>
9 #include <linux/reiserfs_xattr.h>
10 #include <linux/exportfs.h>
11 #include <linux/smp_lock.h>
12 #include <linux/pagemap.h>
13 #include <linux/highmem.h>
14 #include <asm/uaccess.h>
15 #include <asm/unaligned.h>
16 #include <linux/buffer_head.h>
17 #include <linux/mpage.h>
18 #include <linux/writeback.h>
19 #include <linux/quotaops.h>
20 #include <linux/swap.h>
21 
22 int reiserfs_commit_write(struct file *f, struct page *page,
23 			  unsigned from, unsigned to);
24 int reiserfs_prepare_write(struct file *f, struct page *page,
25 			   unsigned from, unsigned to);
26 
27 void reiserfs_delete_inode(struct inode *inode)
28 {
29 	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
30 	int jbegin_count =
31 	    JOURNAL_PER_BALANCE_CNT * 2 +
32 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 	struct reiserfs_transaction_handle th;
34 	int depth;
35 	int err;
36 
37 	truncate_inode_pages(&inode->i_data, 0);
38 
39 	depth = reiserfs_write_lock_once(inode->i_sb);
40 
41 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
42 	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
43 		reiserfs_delete_xattrs(inode);
44 
45 		if (journal_begin(&th, inode->i_sb, jbegin_count))
46 			goto out;
47 		reiserfs_update_inode_transaction(inode);
48 
49 		reiserfs_discard_prealloc(&th, inode);
50 
51 		err = reiserfs_delete_object(&th, inode);
52 
53 		/* Do quota update inside a transaction for journaled quotas. We must do that
54 		 * after delete_object so that quota updates go into the same transaction as
55 		 * stat data deletion */
56 		if (!err)
57 			vfs_dq_free_inode(inode);
58 
59 		if (journal_end(&th, inode->i_sb, jbegin_count))
60 			goto out;
61 
62 		/* check return value from reiserfs_delete_object after
63 		 * ending the transaction
64 		 */
65 		if (err)
66 		    goto out;
67 
68 		/* all items of file are deleted, so we can remove "save" link */
69 		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
70 								 * about an error here */
71 	} else {
72 		/* no object items are in the tree */
73 		;
74 	}
75       out:
76 	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
77 	inode->i_blocks = 0;
78 	reiserfs_write_unlock_once(inode->i_sb, depth);
79 }
80 
81 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
82 			  __u32 objectid, loff_t offset, int type, int length)
83 {
84 	key->version = version;
85 
86 	key->on_disk_key.k_dir_id = dirid;
87 	key->on_disk_key.k_objectid = objectid;
88 	set_cpu_key_k_offset(key, offset);
89 	set_cpu_key_k_type(key, type);
90 	key->key_length = length;
91 }
92 
93 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
94    offset and type of key */
95 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
96 		  int type, int length)
97 {
98 	_make_cpu_key(key, get_inode_item_key_version(inode),
99 		      le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
100 		      le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
101 		      length);
102 }
103 
104 //
105 // when key is 0, do not set version and short key
106 //
107 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
108 			      int version,
109 			      loff_t offset, int type, int length,
110 			      int entry_count /*or ih_free_space */ )
111 {
112 	if (key) {
113 		ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
114 		ih->ih_key.k_objectid =
115 		    cpu_to_le32(key->on_disk_key.k_objectid);
116 	}
117 	put_ih_version(ih, version);
118 	set_le_ih_k_offset(ih, offset);
119 	set_le_ih_k_type(ih, type);
120 	put_ih_item_len(ih, length);
121 	/*    set_ih_free_space (ih, 0); */
122 	// for directory items it is entry count, for directs and stat
123 	// datas - 0xffff, for indirects - 0
124 	put_ih_entry_count(ih, entry_count);
125 }
126 
127 //
128 // FIXME: we might cache recently accessed indirect item
129 
130 // Ugh.  Not too eager for that....
131 //  I cut the code until such time as I see a convincing argument (benchmark).
132 // I don't want a bloated inode struct..., and I don't like code complexity....
133 
134 /* cutting the code is fine, since it really isn't in use yet and is easy
135 ** to add back in.  But, Vladimir has a really good idea here.  Think
136 ** about what happens for reading a file.  For each page,
137 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
138 ** an indirect item.  This indirect item has X number of pointers, where
139 ** X is a big number if we've done the block allocation right.  But,
140 ** we only use one or two of these pointers during each call to readpage,
141 ** needlessly researching again later on.
142 **
143 ** The size of the cache could be dynamic based on the size of the file.
144 **
145 ** I'd also like to see us cache the location the stat data item, since
146 ** we are needlessly researching for that frequently.
147 **
148 ** --chris
149 */
150 
151 /* If this page has a file tail in it, and
152 ** it was read in by get_block_create_0, the page data is valid,
153 ** but tail is still sitting in a direct item, and we can't write to
154 ** it.  So, look through this page, and check all the mapped buffers
155 ** to make sure they have valid block numbers.  Any that don't need
156 ** to be unmapped, so that block_prepare_write will correctly call
157 ** reiserfs_get_block to convert the tail into an unformatted node
158 */
159 static inline void fix_tail_page_for_writing(struct page *page)
160 {
161 	struct buffer_head *head, *next, *bh;
162 
163 	if (page && page_has_buffers(page)) {
164 		head = page_buffers(page);
165 		bh = head;
166 		do {
167 			next = bh->b_this_page;
168 			if (buffer_mapped(bh) && bh->b_blocknr == 0) {
169 				reiserfs_unmap_buffer(bh);
170 			}
171 			bh = next;
172 		} while (bh != head);
173 	}
174 }
175 
176 /* reiserfs_get_block does not need to allocate a block only if it has been
177    done already or non-hole position has been found in the indirect item */
178 static inline int allocation_needed(int retval, b_blocknr_t allocated,
179 				    struct item_head *ih,
180 				    __le32 * item, int pos_in_item)
181 {
182 	if (allocated)
183 		return 0;
184 	if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
185 	    get_block_num(item, pos_in_item))
186 		return 0;
187 	return 1;
188 }
189 
190 static inline int indirect_item_found(int retval, struct item_head *ih)
191 {
192 	return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
193 }
194 
195 static inline void set_block_dev_mapped(struct buffer_head *bh,
196 					b_blocknr_t block, struct inode *inode)
197 {
198 	map_bh(bh, inode->i_sb, block);
199 }
200 
201 //
202 // files which were created in the earlier version can not be longer,
203 // than 2 gb
204 //
205 static int file_capable(struct inode *inode, sector_t block)
206 {
207 	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
208 	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
209 		return 1;
210 
211 	return 0;
212 }
213 
214 static int restart_transaction(struct reiserfs_transaction_handle *th,
215 			       struct inode *inode, struct treepath *path)
216 {
217 	struct super_block *s = th->t_super;
218 	int len = th->t_blocks_allocated;
219 	int err;
220 
221 	BUG_ON(!th->t_trans_id);
222 	BUG_ON(!th->t_refcount);
223 
224 	pathrelse(path);
225 
226 	/* we cannot restart while nested */
227 	if (th->t_refcount > 1) {
228 		return 0;
229 	}
230 	reiserfs_update_sd(th, inode);
231 	err = journal_end(th, s, len);
232 	if (!err) {
233 		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
234 		if (!err)
235 			reiserfs_update_inode_transaction(inode);
236 	}
237 	return err;
238 }
239 
240 // it is called by get_block when create == 0. Returns block number
241 // for 'block'-th logical block of file. When it hits direct item it
242 // returns 0 (being called from bmap) or read direct item into piece
243 // of page (bh_result)
244 
245 // Please improve the english/clarity in the comment above, as it is
246 // hard to understand.
247 
248 static int _get_block_create_0(struct inode *inode, sector_t block,
249 			       struct buffer_head *bh_result, int args)
250 {
251 	INITIALIZE_PATH(path);
252 	struct cpu_key key;
253 	struct buffer_head *bh;
254 	struct item_head *ih, tmp_ih;
255 	b_blocknr_t blocknr;
256 	char *p = NULL;
257 	int chars;
258 	int ret;
259 	int result;
260 	int done = 0;
261 	unsigned long offset;
262 
263 	// prepare the key to look for the 'block'-th block of file
264 	make_cpu_key(&key, inode,
265 		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
266 		     3);
267 
268 	result = search_for_position_by_key(inode->i_sb, &key, &path);
269 	if (result != POSITION_FOUND) {
270 		pathrelse(&path);
271 		if (p)
272 			kunmap(bh_result->b_page);
273 		if (result == IO_ERROR)
274 			return -EIO;
275 		// We do not return -ENOENT if there is a hole but page is uptodate, because it means
276 		// That there is some MMAPED data associated with it that is yet to be written to disk.
277 		if ((args & GET_BLOCK_NO_HOLE)
278 		    && !PageUptodate(bh_result->b_page)) {
279 			return -ENOENT;
280 		}
281 		return 0;
282 	}
283 	//
284 	bh = get_last_bh(&path);
285 	ih = get_ih(&path);
286 	if (is_indirect_le_ih(ih)) {
287 		__le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
288 
289 		/* FIXME: here we could cache indirect item or part of it in
290 		   the inode to avoid search_by_key in case of subsequent
291 		   access to file */
292 		blocknr = get_block_num(ind_item, path.pos_in_item);
293 		ret = 0;
294 		if (blocknr) {
295 			map_bh(bh_result, inode->i_sb, blocknr);
296 			if (path.pos_in_item ==
297 			    ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
298 				set_buffer_boundary(bh_result);
299 			}
300 		} else
301 			// We do not return -ENOENT if there is a hole but page is uptodate, because it means
302 			// That there is some MMAPED data associated with it that is yet to  be written to disk.
303 		if ((args & GET_BLOCK_NO_HOLE)
304 			    && !PageUptodate(bh_result->b_page)) {
305 			ret = -ENOENT;
306 		}
307 
308 		pathrelse(&path);
309 		if (p)
310 			kunmap(bh_result->b_page);
311 		return ret;
312 	}
313 	// requested data are in direct item(s)
314 	if (!(args & GET_BLOCK_READ_DIRECT)) {
315 		// we are called by bmap. FIXME: we can not map block of file
316 		// when it is stored in direct item(s)
317 		pathrelse(&path);
318 		if (p)
319 			kunmap(bh_result->b_page);
320 		return -ENOENT;
321 	}
322 
323 	/* if we've got a direct item, and the buffer or page was uptodate,
324 	 ** we don't want to pull data off disk again.  skip to the
325 	 ** end, where we map the buffer and return
326 	 */
327 	if (buffer_uptodate(bh_result)) {
328 		goto finished;
329 	} else
330 		/*
331 		 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
332 		 ** pages without any buffers.  If the page is up to date, we don't want
333 		 ** read old data off disk.  Set the up to date bit on the buffer instead
334 		 ** and jump to the end
335 		 */
336 	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
337 		set_buffer_uptodate(bh_result);
338 		goto finished;
339 	}
340 	// read file tail into part of page
341 	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
342 	copy_item_head(&tmp_ih, ih);
343 
344 	/* we only want to kmap if we are reading the tail into the page.
345 	 ** this is not the common case, so we don't kmap until we are
346 	 ** sure we need to.  But, this means the item might move if
347 	 ** kmap schedules
348 	 */
349 	if (!p)
350 		p = (char *)kmap(bh_result->b_page);
351 
352 	p += offset;
353 	memset(p, 0, inode->i_sb->s_blocksize);
354 	do {
355 		if (!is_direct_le_ih(ih)) {
356 			BUG();
357 		}
358 		/* make sure we don't read more bytes than actually exist in
359 		 ** the file.  This can happen in odd cases where i_size isn't
360 		 ** correct, and when direct item padding results in a few
361 		 ** extra bytes at the end of the direct item
362 		 */
363 		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
364 			break;
365 		if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
366 			chars =
367 			    inode->i_size - (le_ih_k_offset(ih) - 1) -
368 			    path.pos_in_item;
369 			done = 1;
370 		} else {
371 			chars = ih_item_len(ih) - path.pos_in_item;
372 		}
373 		memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
374 
375 		if (done)
376 			break;
377 
378 		p += chars;
379 
380 		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
381 			// we done, if read direct item is not the last item of
382 			// node FIXME: we could try to check right delimiting key
383 			// to see whether direct item continues in the right
384 			// neighbor or rely on i_size
385 			break;
386 
387 		// update key to look for the next piece
388 		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
389 		result = search_for_position_by_key(inode->i_sb, &key, &path);
390 		if (result != POSITION_FOUND)
391 			// i/o error most likely
392 			break;
393 		bh = get_last_bh(&path);
394 		ih = get_ih(&path);
395 	} while (1);
396 
397 	flush_dcache_page(bh_result->b_page);
398 	kunmap(bh_result->b_page);
399 
400       finished:
401 	pathrelse(&path);
402 
403 	if (result == IO_ERROR)
404 		return -EIO;
405 
406 	/* this buffer has valid data, but isn't valid for io.  mapping it to
407 	 * block #0 tells the rest of reiserfs it just has a tail in it
408 	 */
409 	map_bh(bh_result, inode->i_sb, 0);
410 	set_buffer_uptodate(bh_result);
411 	return 0;
412 }
413 
414 // this is called to create file map. So, _get_block_create_0 will not
415 // read direct item
416 static int reiserfs_bmap(struct inode *inode, sector_t block,
417 			 struct buffer_head *bh_result, int create)
418 {
419 	if (!file_capable(inode, block))
420 		return -EFBIG;
421 
422 	reiserfs_write_lock(inode->i_sb);
423 	/* do not read the direct item */
424 	_get_block_create_0(inode, block, bh_result, 0);
425 	reiserfs_write_unlock(inode->i_sb);
426 	return 0;
427 }
428 
429 /* special version of get_block that is only used by grab_tail_page right
430 ** now.  It is sent to block_prepare_write, and when you try to get a
431 ** block past the end of the file (or a block from a hole) it returns
432 ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
433 ** be able to do i/o on the buffers returned, unless an error value
434 ** is also returned.
435 **
436 ** So, this allows block_prepare_write to be used for reading a single block
437 ** in a page.  Where it does not produce a valid page for holes, or past the
438 ** end of the file.  This turns out to be exactly what we need for reading
439 ** tails for conversion.
440 **
441 ** The point of the wrapper is forcing a certain value for create, even
442 ** though the VFS layer is calling this function with create==1.  If you
443 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
444 ** don't use this function.
445 */
446 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
447 				       struct buffer_head *bh_result,
448 				       int create)
449 {
450 	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
451 }
452 
453 /* This is special helper for reiserfs_get_block in case we are executing
454    direct_IO request. */
455 static int reiserfs_get_blocks_direct_io(struct inode *inode,
456 					 sector_t iblock,
457 					 struct buffer_head *bh_result,
458 					 int create)
459 {
460 	int ret;
461 
462 	bh_result->b_page = NULL;
463 
464 	/* We set the b_size before reiserfs_get_block call since it is
465 	   referenced in convert_tail_for_hole() that may be called from
466 	   reiserfs_get_block() */
467 	bh_result->b_size = (1 << inode->i_blkbits);
468 
469 	ret = reiserfs_get_block(inode, iblock, bh_result,
470 				 create | GET_BLOCK_NO_DANGLE);
471 	if (ret)
472 		goto out;
473 
474 	/* don't allow direct io onto tail pages */
475 	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
476 		/* make sure future calls to the direct io funcs for this offset
477 		 ** in the file fail by unmapping the buffer
478 		 */
479 		clear_buffer_mapped(bh_result);
480 		ret = -EINVAL;
481 	}
482 	/* Possible unpacked tail. Flush the data before pages have
483 	   disappeared */
484 	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
485 		int err;
486 
487 		reiserfs_write_lock(inode->i_sb);
488 
489 		err = reiserfs_commit_for_inode(inode);
490 		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
491 
492 		reiserfs_write_unlock(inode->i_sb);
493 
494 		if (err < 0)
495 			ret = err;
496 	}
497       out:
498 	return ret;
499 }
500 
501 /*
502 ** helper function for when reiserfs_get_block is called for a hole
503 ** but the file tail is still in a direct item
504 ** bh_result is the buffer head for the hole
505 ** tail_offset is the offset of the start of the tail in the file
506 **
507 ** This calls prepare_write, which will start a new transaction
508 ** you should not be in a transaction, or have any paths held when you
509 ** call this.
510 */
511 static int convert_tail_for_hole(struct inode *inode,
512 				 struct buffer_head *bh_result,
513 				 loff_t tail_offset)
514 {
515 	unsigned long index;
516 	unsigned long tail_end;
517 	unsigned long tail_start;
518 	struct page *tail_page;
519 	struct page *hole_page = bh_result->b_page;
520 	int retval = 0;
521 
522 	if ((tail_offset & (bh_result->b_size - 1)) != 1)
523 		return -EIO;
524 
525 	/* always try to read until the end of the block */
526 	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
527 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
528 
529 	index = tail_offset >> PAGE_CACHE_SHIFT;
530 	/* hole_page can be zero in case of direct_io, we are sure
531 	   that we cannot get here if we write with O_DIRECT into
532 	   tail page */
533 	if (!hole_page || index != hole_page->index) {
534 		tail_page = grab_cache_page(inode->i_mapping, index);
535 		retval = -ENOMEM;
536 		if (!tail_page) {
537 			goto out;
538 		}
539 	} else {
540 		tail_page = hole_page;
541 	}
542 
543 	/* we don't have to make sure the conversion did not happen while
544 	 ** we were locking the page because anyone that could convert
545 	 ** must first take i_mutex.
546 	 **
547 	 ** We must fix the tail page for writing because it might have buffers
548 	 ** that are mapped, but have a block number of 0.  This indicates tail
549 	 ** data that has been read directly into the page, and block_prepare_write
550 	 ** won't trigger a get_block in this case.
551 	 */
552 	fix_tail_page_for_writing(tail_page);
553 	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
554 	if (retval)
555 		goto unlock;
556 
557 	/* tail conversion might change the data in the page */
558 	flush_dcache_page(tail_page);
559 
560 	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
561 
562       unlock:
563 	if (tail_page != hole_page) {
564 		unlock_page(tail_page);
565 		page_cache_release(tail_page);
566 	}
567       out:
568 	return retval;
569 }
570 
571 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
572 				  sector_t block,
573 				  struct inode *inode,
574 				  b_blocknr_t * allocated_block_nr,
575 				  struct treepath *path, int flags)
576 {
577 	BUG_ON(!th->t_trans_id);
578 
579 #ifdef REISERFS_PREALLOCATE
580 	if (!(flags & GET_BLOCK_NO_IMUX)) {
581 		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
582 						  path, block);
583 	}
584 #endif
585 	return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
586 					 block);
587 }
588 
589 int reiserfs_get_block(struct inode *inode, sector_t block,
590 		       struct buffer_head *bh_result, int create)
591 {
592 	int repeat, retval = 0;
593 	b_blocknr_t allocated_block_nr = 0;	// b_blocknr_t is (unsigned) 32 bit int
594 	INITIALIZE_PATH(path);
595 	int pos_in_item;
596 	struct cpu_key key;
597 	struct buffer_head *bh, *unbh = NULL;
598 	struct item_head *ih, tmp_ih;
599 	__le32 *item;
600 	int done;
601 	int fs_gen;
602 	int lock_depth;
603 	struct reiserfs_transaction_handle *th = NULL;
604 	/* space reserved in transaction batch:
605 	   . 3 balancings in direct->indirect conversion
606 	   . 1 block involved into reiserfs_update_sd()
607 	   XXX in practically impossible worst case direct2indirect()
608 	   can incur (much) more than 3 balancings.
609 	   quota update for user, group */
610 	int jbegin_count =
611 	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
612 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
613 	int version;
614 	int dangle = 1;
615 	loff_t new_offset =
616 	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
617 
618 	lock_depth = reiserfs_write_lock_once(inode->i_sb);
619 	version = get_inode_item_key_version(inode);
620 
621 	if (!file_capable(inode, block)) {
622 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
623 		return -EFBIG;
624 	}
625 
626 	/* if !create, we aren't changing the FS, so we don't need to
627 	 ** log anything, so we don't need to start a transaction
628 	 */
629 	if (!(create & GET_BLOCK_CREATE)) {
630 		int ret;
631 		/* find number of block-th logical block of the file */
632 		ret = _get_block_create_0(inode, block, bh_result,
633 					  create | GET_BLOCK_READ_DIRECT);
634 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
635 		return ret;
636 	}
637 	/*
638 	 * if we're already in a transaction, make sure to close
639 	 * any new transactions we start in this func
640 	 */
641 	if ((create & GET_BLOCK_NO_DANGLE) ||
642 	    reiserfs_transaction_running(inode->i_sb))
643 		dangle = 0;
644 
645 	/* If file is of such a size, that it might have a tail and tails are enabled
646 	 ** we should mark it as possibly needing tail packing on close
647 	 */
648 	if ((have_large_tails(inode->i_sb)
649 	     && inode->i_size < i_block_size(inode) * 4)
650 	    || (have_small_tails(inode->i_sb)
651 		&& inode->i_size < i_block_size(inode)))
652 		REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
653 
654 	/* set the key of the first byte in the 'block'-th block of file */
655 	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
656 	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
657 	      start_trans:
658 		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
659 		if (!th) {
660 			retval = -ENOMEM;
661 			goto failure;
662 		}
663 		reiserfs_update_inode_transaction(inode);
664 	}
665       research:
666 
667 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
668 	if (retval == IO_ERROR) {
669 		retval = -EIO;
670 		goto failure;
671 	}
672 
673 	bh = get_last_bh(&path);
674 	ih = get_ih(&path);
675 	item = get_item(&path);
676 	pos_in_item = path.pos_in_item;
677 
678 	fs_gen = get_generation(inode->i_sb);
679 	copy_item_head(&tmp_ih, ih);
680 
681 	if (allocation_needed
682 	    (retval, allocated_block_nr, ih, item, pos_in_item)) {
683 		/* we have to allocate block for the unformatted node */
684 		if (!th) {
685 			pathrelse(&path);
686 			goto start_trans;
687 		}
688 
689 		repeat =
690 		    _allocate_block(th, block, inode, &allocated_block_nr,
691 				    &path, create);
692 
693 		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
694 			/* restart the transaction to give the journal a chance to free
695 			 ** some blocks.  releases the path, so we have to go back to
696 			 ** research if we succeed on the second try
697 			 */
698 			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
699 			retval = restart_transaction(th, inode, &path);
700 			if (retval)
701 				goto failure;
702 			repeat =
703 			    _allocate_block(th, block, inode,
704 					    &allocated_block_nr, NULL, create);
705 
706 			if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
707 				goto research;
708 			}
709 			if (repeat == QUOTA_EXCEEDED)
710 				retval = -EDQUOT;
711 			else
712 				retval = -ENOSPC;
713 			goto failure;
714 		}
715 
716 		if (fs_changed(fs_gen, inode->i_sb)
717 		    && item_moved(&tmp_ih, &path)) {
718 			goto research;
719 		}
720 	}
721 
722 	if (indirect_item_found(retval, ih)) {
723 		b_blocknr_t unfm_ptr;
724 		/* 'block'-th block is in the file already (there is
725 		   corresponding cell in some indirect item). But it may be
726 		   zero unformatted node pointer (hole) */
727 		unfm_ptr = get_block_num(item, pos_in_item);
728 		if (unfm_ptr == 0) {
729 			/* use allocated block to plug the hole */
730 			reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
731 			if (fs_changed(fs_gen, inode->i_sb)
732 			    && item_moved(&tmp_ih, &path)) {
733 				reiserfs_restore_prepared_buffer(inode->i_sb,
734 								 bh);
735 				goto research;
736 			}
737 			set_buffer_new(bh_result);
738 			if (buffer_dirty(bh_result)
739 			    && reiserfs_data_ordered(inode->i_sb))
740 				reiserfs_add_ordered_list(inode, bh_result);
741 			put_block_num(item, pos_in_item, allocated_block_nr);
742 			unfm_ptr = allocated_block_nr;
743 			journal_mark_dirty(th, inode->i_sb, bh);
744 			reiserfs_update_sd(th, inode);
745 		}
746 		set_block_dev_mapped(bh_result, unfm_ptr, inode);
747 		pathrelse(&path);
748 		retval = 0;
749 		if (!dangle && th)
750 			retval = reiserfs_end_persistent_transaction(th);
751 
752 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
753 
754 		/* the item was found, so new blocks were not added to the file
755 		 ** there is no need to make sure the inode is updated with this
756 		 ** transaction
757 		 */
758 		return retval;
759 	}
760 
761 	if (!th) {
762 		pathrelse(&path);
763 		goto start_trans;
764 	}
765 
766 	/* desired position is not found or is in the direct item. We have
767 	   to append file with holes up to 'block'-th block converting
768 	   direct items to indirect one if necessary */
769 	done = 0;
770 	do {
771 		if (is_statdata_le_ih(ih)) {
772 			__le32 unp = 0;
773 			struct cpu_key tmp_key;
774 
775 			/* indirect item has to be inserted */
776 			make_le_item_head(&tmp_ih, &key, version, 1,
777 					  TYPE_INDIRECT, UNFM_P_SIZE,
778 					  0 /* free_space */ );
779 
780 			if (cpu_key_k_offset(&key) == 1) {
781 				/* we are going to add 'block'-th block to the file. Use
782 				   allocated block for that */
783 				unp = cpu_to_le32(allocated_block_nr);
784 				set_block_dev_mapped(bh_result,
785 						     allocated_block_nr, inode);
786 				set_buffer_new(bh_result);
787 				done = 1;
788 			}
789 			tmp_key = key;	// ;)
790 			set_cpu_key_k_offset(&tmp_key, 1);
791 			PATH_LAST_POSITION(&path)++;
792 
793 			retval =
794 			    reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
795 						 inode, (char *)&unp);
796 			if (retval) {
797 				reiserfs_free_block(th, inode,
798 						    allocated_block_nr, 1);
799 				goto failure;	// retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
800 			}
801 			//mark_tail_converted (inode);
802 		} else if (is_direct_le_ih(ih)) {
803 			/* direct item has to be converted */
804 			loff_t tail_offset;
805 
806 			tail_offset =
807 			    ((le_ih_k_offset(ih) -
808 			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
809 			if (tail_offset == cpu_key_k_offset(&key)) {
810 				/* direct item we just found fits into block we have
811 				   to map. Convert it into unformatted node: use
812 				   bh_result for the conversion */
813 				set_block_dev_mapped(bh_result,
814 						     allocated_block_nr, inode);
815 				unbh = bh_result;
816 				done = 1;
817 			} else {
818 				/* we have to padd file tail stored in direct item(s)
819 				   up to block size and convert it to unformatted
820 				   node. FIXME: this should also get into page cache */
821 
822 				pathrelse(&path);
823 				/*
824 				 * ugly, but we can only end the transaction if
825 				 * we aren't nested
826 				 */
827 				BUG_ON(!th->t_refcount);
828 				if (th->t_refcount == 1) {
829 					retval =
830 					    reiserfs_end_persistent_transaction
831 					    (th);
832 					th = NULL;
833 					if (retval)
834 						goto failure;
835 				}
836 
837 				retval =
838 				    convert_tail_for_hole(inode, bh_result,
839 							  tail_offset);
840 				if (retval) {
841 					if (retval != -ENOSPC)
842 						reiserfs_error(inode->i_sb,
843 							"clm-6004",
844 							"convert tail failed "
845 							"inode %lu, error %d",
846 							inode->i_ino,
847 							retval);
848 					if (allocated_block_nr) {
849 						/* the bitmap, the super, and the stat data == 3 */
850 						if (!th)
851 							th = reiserfs_persistent_transaction(inode->i_sb, 3);
852 						if (th)
853 							reiserfs_free_block(th,
854 									    inode,
855 									    allocated_block_nr,
856 									    1);
857 					}
858 					goto failure;
859 				}
860 				goto research;
861 			}
862 			retval =
863 			    direct2indirect(th, inode, &path, unbh,
864 					    tail_offset);
865 			if (retval) {
866 				reiserfs_unmap_buffer(unbh);
867 				reiserfs_free_block(th, inode,
868 						    allocated_block_nr, 1);
869 				goto failure;
870 			}
871 			/* it is important the set_buffer_uptodate is done after
872 			 ** the direct2indirect.  The buffer might contain valid
873 			 ** data newer than the data on disk (read by readpage, changed,
874 			 ** and then sent here by writepage).  direct2indirect needs
875 			 ** to know if unbh was already up to date, so it can decide
876 			 ** if the data in unbh needs to be replaced with data from
877 			 ** the disk
878 			 */
879 			set_buffer_uptodate(unbh);
880 
881 			/* unbh->b_page == NULL in case of DIRECT_IO request, this means
882 			   buffer will disappear shortly, so it should not be added to
883 			 */
884 			if (unbh->b_page) {
885 				/* we've converted the tail, so we must
886 				 ** flush unbh before the transaction commits
887 				 */
888 				reiserfs_add_tail_list(inode, unbh);
889 
890 				/* mark it dirty now to prevent commit_write from adding
891 				 ** this buffer to the inode's dirty buffer list
892 				 */
893 				/*
894 				 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
895 				 * It's still atomic, but it sets the page dirty too,
896 				 * which makes it eligible for writeback at any time by the
897 				 * VM (which was also the case with __mark_buffer_dirty())
898 				 */
899 				mark_buffer_dirty(unbh);
900 			}
901 		} else {
902 			/* append indirect item with holes if needed, when appending
903 			   pointer to 'block'-th block use block, which is already
904 			   allocated */
905 			struct cpu_key tmp_key;
906 			unp_t unf_single = 0;	// We use this in case we need to allocate only
907 			// one block which is a fastpath
908 			unp_t *un;
909 			__u64 max_to_insert =
910 			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
911 			    UNFM_P_SIZE;
912 			__u64 blocks_needed;
913 
914 			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
915 			       "vs-804: invalid position for append");
916 			/* indirect item has to be appended, set up key of that position */
917 			make_cpu_key(&tmp_key, inode,
918 				     le_key_k_offset(version,
919 						     &(ih->ih_key)) +
920 				     op_bytes_number(ih,
921 						     inode->i_sb->s_blocksize),
922 				     //pos_in_item * inode->i_sb->s_blocksize,
923 				     TYPE_INDIRECT, 3);	// key type is unimportant
924 
925 			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
926 			       "green-805: invalid offset");
927 			blocks_needed =
928 			    1 +
929 			    ((cpu_key_k_offset(&key) -
930 			      cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
931 			     s_blocksize_bits);
932 
933 			if (blocks_needed == 1) {
934 				un = &unf_single;
935 			} else {
936 				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
937 				if (!un) {
938 					un = &unf_single;
939 					blocks_needed = 1;
940 					max_to_insert = 0;
941 				}
942 			}
943 			if (blocks_needed <= max_to_insert) {
944 				/* we are going to add target block to the file. Use allocated
945 				   block for that */
946 				un[blocks_needed - 1] =
947 				    cpu_to_le32(allocated_block_nr);
948 				set_block_dev_mapped(bh_result,
949 						     allocated_block_nr, inode);
950 				set_buffer_new(bh_result);
951 				done = 1;
952 			} else {
953 				/* paste hole to the indirect item */
954 				/* If kmalloc failed, max_to_insert becomes zero and it means we
955 				   only have space for one block */
956 				blocks_needed =
957 				    max_to_insert ? max_to_insert : 1;
958 			}
959 			retval =
960 			    reiserfs_paste_into_item(th, &path, &tmp_key, inode,
961 						     (char *)un,
962 						     UNFM_P_SIZE *
963 						     blocks_needed);
964 
965 			if (blocks_needed != 1)
966 				kfree(un);
967 
968 			if (retval) {
969 				reiserfs_free_block(th, inode,
970 						    allocated_block_nr, 1);
971 				goto failure;
972 			}
973 			if (!done) {
974 				/* We need to mark new file size in case this function will be
975 				   interrupted/aborted later on. And we may do this only for
976 				   holes. */
977 				inode->i_size +=
978 				    inode->i_sb->s_blocksize * blocks_needed;
979 			}
980 		}
981 
982 		if (done == 1)
983 			break;
984 
985 		/* this loop could log more blocks than we had originally asked
986 		 ** for.  So, we have to allow the transaction to end if it is
987 		 ** too big or too full.  Update the inode so things are
988 		 ** consistent if we crash before the function returns
989 		 **
990 		 ** release the path so that anybody waiting on the path before
991 		 ** ending their transaction will be able to continue.
992 		 */
993 		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
994 			retval = restart_transaction(th, inode, &path);
995 			if (retval)
996 				goto failure;
997 		}
998 		/*
999 		 * inserting indirect pointers for a hole can take a
1000 		 * long time.  reschedule if needed and also release the write
1001 		 * lock for others.
1002 		 */
1003 		if (need_resched()) {
1004 			reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1005 			schedule();
1006 			lock_depth = reiserfs_write_lock_once(inode->i_sb);
1007 		}
1008 
1009 		retval = search_for_position_by_key(inode->i_sb, &key, &path);
1010 		if (retval == IO_ERROR) {
1011 			retval = -EIO;
1012 			goto failure;
1013 		}
1014 		if (retval == POSITION_FOUND) {
1015 			reiserfs_warning(inode->i_sb, "vs-825",
1016 					 "%K should not be found", &key);
1017 			retval = -EEXIST;
1018 			if (allocated_block_nr)
1019 				reiserfs_free_block(th, inode,
1020 						    allocated_block_nr, 1);
1021 			pathrelse(&path);
1022 			goto failure;
1023 		}
1024 		bh = get_last_bh(&path);
1025 		ih = get_ih(&path);
1026 		item = get_item(&path);
1027 		pos_in_item = path.pos_in_item;
1028 	} while (1);
1029 
1030 	retval = 0;
1031 
1032       failure:
1033 	if (th && (!dangle || (retval && !th->t_trans_id))) {
1034 		int err;
1035 		if (th->t_trans_id)
1036 			reiserfs_update_sd(th, inode);
1037 		err = reiserfs_end_persistent_transaction(th);
1038 		if (err)
1039 			retval = err;
1040 	}
1041 
1042 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1043 	reiserfs_check_path(&path);
1044 	return retval;
1045 }
1046 
1047 static int
1048 reiserfs_readpages(struct file *file, struct address_space *mapping,
1049 		   struct list_head *pages, unsigned nr_pages)
1050 {
1051 	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1052 }
1053 
1054 /* Compute real number of used bytes by file
1055  * Following three functions can go away when we'll have enough space in stat item
1056  */
1057 static int real_space_diff(struct inode *inode, int sd_size)
1058 {
1059 	int bytes;
1060 	loff_t blocksize = inode->i_sb->s_blocksize;
1061 
1062 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1063 		return sd_size;
1064 
1065 	/* End of file is also in full block with indirect reference, so round
1066 	 ** up to the next block.
1067 	 **
1068 	 ** there is just no way to know if the tail is actually packed
1069 	 ** on the file, so we have to assume it isn't.  When we pack the
1070 	 ** tail, we add 4 bytes to pretend there really is an unformatted
1071 	 ** node pointer
1072 	 */
1073 	bytes =
1074 	    ((inode->i_size +
1075 	      (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1076 	    sd_size;
1077 	return bytes;
1078 }
1079 
1080 static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1081 					int sd_size)
1082 {
1083 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1084 		return inode->i_size +
1085 		    (loff_t) (real_space_diff(inode, sd_size));
1086 	}
1087 	return ((loff_t) real_space_diff(inode, sd_size)) +
1088 	    (((loff_t) blocks) << 9);
1089 }
1090 
1091 /* Compute number of blocks used by file in ReiserFS counting */
1092 static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1093 {
1094 	loff_t bytes = inode_get_bytes(inode);
1095 	loff_t real_space = real_space_diff(inode, sd_size);
1096 
1097 	/* keeps fsck and non-quota versions of reiserfs happy */
1098 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1099 		bytes += (loff_t) 511;
1100 	}
1101 
1102 	/* files from before the quota patch might i_blocks such that
1103 	 ** bytes < real_space.  Deal with that here to prevent it from
1104 	 ** going negative.
1105 	 */
1106 	if (bytes < real_space)
1107 		return 0;
1108 	return (bytes - real_space) >> 9;
1109 }
1110 
1111 //
1112 // BAD: new directories have stat data of new type and all other items
1113 // of old type. Version stored in the inode says about body items, so
1114 // in update_stat_data we can not rely on inode, but have to check
1115 // item version directly
1116 //
1117 
1118 // called by read_locked_inode
1119 static void init_inode(struct inode *inode, struct treepath *path)
1120 {
1121 	struct buffer_head *bh;
1122 	struct item_head *ih;
1123 	__u32 rdev;
1124 	//int version = ITEM_VERSION_1;
1125 
1126 	bh = PATH_PLAST_BUFFER(path);
1127 	ih = PATH_PITEM_HEAD(path);
1128 
1129 	copy_key(INODE_PKEY(inode), &(ih->ih_key));
1130 
1131 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1132 	REISERFS_I(inode)->i_flags = 0;
1133 	REISERFS_I(inode)->i_prealloc_block = 0;
1134 	REISERFS_I(inode)->i_prealloc_count = 0;
1135 	REISERFS_I(inode)->i_trans_id = 0;
1136 	REISERFS_I(inode)->i_jl = NULL;
1137 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1138 	reiserfs_init_xattr_rwsem(inode);
1139 
1140 	if (stat_data_v1(ih)) {
1141 		struct stat_data_v1 *sd =
1142 		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1143 		unsigned long blocks;
1144 
1145 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1146 		set_inode_sd_version(inode, STAT_DATA_V1);
1147 		inode->i_mode = sd_v1_mode(sd);
1148 		inode->i_nlink = sd_v1_nlink(sd);
1149 		inode->i_uid = sd_v1_uid(sd);
1150 		inode->i_gid = sd_v1_gid(sd);
1151 		inode->i_size = sd_v1_size(sd);
1152 		inode->i_atime.tv_sec = sd_v1_atime(sd);
1153 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1154 		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1155 		inode->i_atime.tv_nsec = 0;
1156 		inode->i_ctime.tv_nsec = 0;
1157 		inode->i_mtime.tv_nsec = 0;
1158 
1159 		inode->i_blocks = sd_v1_blocks(sd);
1160 		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1161 		blocks = (inode->i_size + 511) >> 9;
1162 		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1163 		if (inode->i_blocks > blocks) {
1164 			// there was a bug in <=3.5.23 when i_blocks could take negative
1165 			// values. Starting from 3.5.17 this value could even be stored in
1166 			// stat data. For such files we set i_blocks based on file
1167 			// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1168 			// only updated if file's inode will ever change
1169 			inode->i_blocks = blocks;
1170 		}
1171 
1172 		rdev = sd_v1_rdev(sd);
1173 		REISERFS_I(inode)->i_first_direct_byte =
1174 		    sd_v1_first_direct_byte(sd);
1175 		/* an early bug in the quota code can give us an odd number for the
1176 		 ** block count.  This is incorrect, fix it here.
1177 		 */
1178 		if (inode->i_blocks & 1) {
1179 			inode->i_blocks++;
1180 		}
1181 		inode_set_bytes(inode,
1182 				to_real_used_space(inode, inode->i_blocks,
1183 						   SD_V1_SIZE));
1184 		/* nopack is initially zero for v1 objects. For v2 objects,
1185 		   nopack is initialised from sd_attrs */
1186 		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1187 	} else {
1188 		// new stat data found, but object may have old items
1189 		// (directories and symlinks)
1190 		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1191 
1192 		inode->i_mode = sd_v2_mode(sd);
1193 		inode->i_nlink = sd_v2_nlink(sd);
1194 		inode->i_uid = sd_v2_uid(sd);
1195 		inode->i_size = sd_v2_size(sd);
1196 		inode->i_gid = sd_v2_gid(sd);
1197 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1198 		inode->i_atime.tv_sec = sd_v2_atime(sd);
1199 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1200 		inode->i_ctime.tv_nsec = 0;
1201 		inode->i_mtime.tv_nsec = 0;
1202 		inode->i_atime.tv_nsec = 0;
1203 		inode->i_blocks = sd_v2_blocks(sd);
1204 		rdev = sd_v2_rdev(sd);
1205 		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1206 			inode->i_generation =
1207 			    le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1208 		else
1209 			inode->i_generation = sd_v2_generation(sd);
1210 
1211 		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1212 			set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1213 		else
1214 			set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1215 		REISERFS_I(inode)->i_first_direct_byte = 0;
1216 		set_inode_sd_version(inode, STAT_DATA_V2);
1217 		inode_set_bytes(inode,
1218 				to_real_used_space(inode, inode->i_blocks,
1219 						   SD_V2_SIZE));
1220 		/* read persistent inode attributes from sd and initalise
1221 		   generic inode flags from them */
1222 		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1223 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1224 	}
1225 
1226 	pathrelse(path);
1227 	if (S_ISREG(inode->i_mode)) {
1228 		inode->i_op = &reiserfs_file_inode_operations;
1229 		inode->i_fop = &reiserfs_file_operations;
1230 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1231 	} else if (S_ISDIR(inode->i_mode)) {
1232 		inode->i_op = &reiserfs_dir_inode_operations;
1233 		inode->i_fop = &reiserfs_dir_operations;
1234 	} else if (S_ISLNK(inode->i_mode)) {
1235 		inode->i_op = &reiserfs_symlink_inode_operations;
1236 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1237 	} else {
1238 		inode->i_blocks = 0;
1239 		inode->i_op = &reiserfs_special_inode_operations;
1240 		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1241 	}
1242 }
1243 
1244 // update new stat data with inode fields
1245 static void inode2sd(void *sd, struct inode *inode, loff_t size)
1246 {
1247 	struct stat_data *sd_v2 = (struct stat_data *)sd;
1248 	__u16 flags;
1249 
1250 	set_sd_v2_mode(sd_v2, inode->i_mode);
1251 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
1252 	set_sd_v2_uid(sd_v2, inode->i_uid);
1253 	set_sd_v2_size(sd_v2, size);
1254 	set_sd_v2_gid(sd_v2, inode->i_gid);
1255 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1256 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1257 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1258 	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1259 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1260 		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1261 	else
1262 		set_sd_v2_generation(sd_v2, inode->i_generation);
1263 	flags = REISERFS_I(inode)->i_attrs;
1264 	i_attrs_to_sd_attrs(inode, &flags);
1265 	set_sd_v2_attrs(sd_v2, flags);
1266 }
1267 
1268 // used to copy inode's fields to old stat data
1269 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1270 {
1271 	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1272 
1273 	set_sd_v1_mode(sd_v1, inode->i_mode);
1274 	set_sd_v1_uid(sd_v1, inode->i_uid);
1275 	set_sd_v1_gid(sd_v1, inode->i_gid);
1276 	set_sd_v1_nlink(sd_v1, inode->i_nlink);
1277 	set_sd_v1_size(sd_v1, size);
1278 	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1279 	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1280 	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1281 
1282 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1283 		set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1284 	else
1285 		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1286 
1287 	// Sigh. i_first_direct_byte is back
1288 	set_sd_v1_first_direct_byte(sd_v1,
1289 				    REISERFS_I(inode)->i_first_direct_byte);
1290 }
1291 
1292 /* NOTE, you must prepare the buffer head before sending it here,
1293 ** and then log it after the call
1294 */
1295 static void update_stat_data(struct treepath *path, struct inode *inode,
1296 			     loff_t size)
1297 {
1298 	struct buffer_head *bh;
1299 	struct item_head *ih;
1300 
1301 	bh = PATH_PLAST_BUFFER(path);
1302 	ih = PATH_PITEM_HEAD(path);
1303 
1304 	if (!is_statdata_le_ih(ih))
1305 		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1306 			       INODE_PKEY(inode), ih);
1307 
1308 	if (stat_data_v1(ih)) {
1309 		// path points to old stat data
1310 		inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1311 	} else {
1312 		inode2sd(B_I_PITEM(bh, ih), inode, size);
1313 	}
1314 
1315 	return;
1316 }
1317 
1318 void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1319 			     struct inode *inode, loff_t size)
1320 {
1321 	struct cpu_key key;
1322 	INITIALIZE_PATH(path);
1323 	struct buffer_head *bh;
1324 	int fs_gen;
1325 	struct item_head *ih, tmp_ih;
1326 	int retval;
1327 
1328 	BUG_ON(!th->t_trans_id);
1329 
1330 	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);	//key type is unimportant
1331 
1332 	for (;;) {
1333 		int pos;
1334 		/* look for the object's stat data */
1335 		retval = search_item(inode->i_sb, &key, &path);
1336 		if (retval == IO_ERROR) {
1337 			reiserfs_error(inode->i_sb, "vs-13050",
1338 				       "i/o failure occurred trying to "
1339 				       "update %K stat data", &key);
1340 			return;
1341 		}
1342 		if (retval == ITEM_NOT_FOUND) {
1343 			pos = PATH_LAST_POSITION(&path);
1344 			pathrelse(&path);
1345 			if (inode->i_nlink == 0) {
1346 				/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1347 				return;
1348 			}
1349 			reiserfs_warning(inode->i_sb, "vs-13060",
1350 					 "stat data of object %k (nlink == %d) "
1351 					 "not found (pos %d)",
1352 					 INODE_PKEY(inode), inode->i_nlink,
1353 					 pos);
1354 			reiserfs_check_path(&path);
1355 			return;
1356 		}
1357 
1358 		/* sigh, prepare_for_journal might schedule.  When it schedules the
1359 		 ** FS might change.  We have to detect that, and loop back to the
1360 		 ** search if the stat data item has moved
1361 		 */
1362 		bh = get_last_bh(&path);
1363 		ih = get_ih(&path);
1364 		copy_item_head(&tmp_ih, ih);
1365 		fs_gen = get_generation(inode->i_sb);
1366 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1367 		if (fs_changed(fs_gen, inode->i_sb)
1368 		    && item_moved(&tmp_ih, &path)) {
1369 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1370 			continue;	/* Stat_data item has been moved after scheduling. */
1371 		}
1372 		break;
1373 	}
1374 	update_stat_data(&path, inode, size);
1375 	journal_mark_dirty(th, th->t_super, bh);
1376 	pathrelse(&path);
1377 	return;
1378 }
1379 
1380 /* reiserfs_read_locked_inode is called to read the inode off disk, and it
1381 ** does a make_bad_inode when things go wrong.  But, we need to make sure
1382 ** and clear the key in the private portion of the inode, otherwise a
1383 ** corresponding iput might try to delete whatever object the inode last
1384 ** represented.
1385 */
1386 static void reiserfs_make_bad_inode(struct inode *inode)
1387 {
1388 	memset(INODE_PKEY(inode), 0, KEY_SIZE);
1389 	make_bad_inode(inode);
1390 }
1391 
1392 //
1393 // initially this function was derived from minix or ext2's analog and
1394 // evolved as the prototype did
1395 //
1396 
1397 int reiserfs_init_locked_inode(struct inode *inode, void *p)
1398 {
1399 	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1400 	inode->i_ino = args->objectid;
1401 	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1402 	return 0;
1403 }
1404 
1405 /* looks for stat data in the tree, and fills up the fields of in-core
1406    inode stat data fields */
1407 void reiserfs_read_locked_inode(struct inode *inode,
1408 				struct reiserfs_iget_args *args)
1409 {
1410 	INITIALIZE_PATH(path_to_sd);
1411 	struct cpu_key key;
1412 	unsigned long dirino;
1413 	int retval;
1414 
1415 	dirino = args->dirid;
1416 
1417 	/* set version 1, version 2 could be used too, because stat data
1418 	   key is the same in both versions */
1419 	key.version = KEY_FORMAT_3_5;
1420 	key.on_disk_key.k_dir_id = dirino;
1421 	key.on_disk_key.k_objectid = inode->i_ino;
1422 	key.on_disk_key.k_offset = 0;
1423 	key.on_disk_key.k_type = 0;
1424 
1425 	/* look for the object's stat data */
1426 	retval = search_item(inode->i_sb, &key, &path_to_sd);
1427 	if (retval == IO_ERROR) {
1428 		reiserfs_error(inode->i_sb, "vs-13070",
1429 			       "i/o failure occurred trying to find "
1430 			       "stat data of %K", &key);
1431 		reiserfs_make_bad_inode(inode);
1432 		return;
1433 	}
1434 	if (retval != ITEM_FOUND) {
1435 		/* a stale NFS handle can trigger this without it being an error */
1436 		pathrelse(&path_to_sd);
1437 		reiserfs_make_bad_inode(inode);
1438 		inode->i_nlink = 0;
1439 		return;
1440 	}
1441 
1442 	init_inode(inode, &path_to_sd);
1443 
1444 	/* It is possible that knfsd is trying to access inode of a file
1445 	   that is being removed from the disk by some other thread. As we
1446 	   update sd on unlink all that is required is to check for nlink
1447 	   here. This bug was first found by Sizif when debugging
1448 	   SquidNG/Butterfly, forgotten, and found again after Philippe
1449 	   Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1450 
1451 	   More logical fix would require changes in fs/inode.c:iput() to
1452 	   remove inode from hash-table _after_ fs cleaned disk stuff up and
1453 	   in iget() to return NULL if I_FREEING inode is found in
1454 	   hash-table. */
1455 	/* Currently there is one place where it's ok to meet inode with
1456 	   nlink==0: processing of open-unlinked and half-truncated files
1457 	   during mount (fs/reiserfs/super.c:finish_unfinished()). */
1458 	if ((inode->i_nlink == 0) &&
1459 	    !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1460 		reiserfs_warning(inode->i_sb, "vs-13075",
1461 				 "dead inode read from disk %K. "
1462 				 "This is likely to be race with knfsd. Ignore",
1463 				 &key);
1464 		reiserfs_make_bad_inode(inode);
1465 	}
1466 
1467 	reiserfs_check_path(&path_to_sd);	/* init inode should be relsing */
1468 
1469 }
1470 
1471 /**
1472  * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1473  *
1474  * @inode:    inode from hash table to check
1475  * @opaque:   "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1476  *
1477  * This function is called by iget5_locked() to distinguish reiserfs inodes
1478  * having the same inode numbers. Such inodes can only exist due to some
1479  * error condition. One of them should be bad. Inodes with identical
1480  * inode numbers (objectids) are distinguished by parent directory ids.
1481  *
1482  */
1483 int reiserfs_find_actor(struct inode *inode, void *opaque)
1484 {
1485 	struct reiserfs_iget_args *args;
1486 
1487 	args = opaque;
1488 	/* args is already in CPU order */
1489 	return (inode->i_ino == args->objectid) &&
1490 	    (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1491 }
1492 
1493 struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1494 {
1495 	struct inode *inode;
1496 	struct reiserfs_iget_args args;
1497 
1498 	args.objectid = key->on_disk_key.k_objectid;
1499 	args.dirid = key->on_disk_key.k_dir_id;
1500 	inode = iget5_locked(s, key->on_disk_key.k_objectid,
1501 			     reiserfs_find_actor, reiserfs_init_locked_inode,
1502 			     (void *)(&args));
1503 	if (!inode)
1504 		return ERR_PTR(-ENOMEM);
1505 
1506 	if (inode->i_state & I_NEW) {
1507 		reiserfs_read_locked_inode(inode, &args);
1508 		unlock_new_inode(inode);
1509 	}
1510 
1511 	if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1512 		/* either due to i/o error or a stale NFS handle */
1513 		iput(inode);
1514 		inode = NULL;
1515 	}
1516 	return inode;
1517 }
1518 
1519 static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1520 	u32 objectid, u32 dir_id, u32 generation)
1521 
1522 {
1523 	struct cpu_key key;
1524 	struct inode *inode;
1525 
1526 	key.on_disk_key.k_objectid = objectid;
1527 	key.on_disk_key.k_dir_id = dir_id;
1528 	reiserfs_write_lock(sb);
1529 	inode = reiserfs_iget(sb, &key);
1530 	if (inode && !IS_ERR(inode) && generation != 0 &&
1531 	    generation != inode->i_generation) {
1532 		iput(inode);
1533 		inode = NULL;
1534 	}
1535 	reiserfs_write_unlock(sb);
1536 
1537 	return d_obtain_alias(inode);
1538 }
1539 
1540 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1541 		int fh_len, int fh_type)
1542 {
1543 	/* fhtype happens to reflect the number of u32s encoded.
1544 	 * due to a bug in earlier code, fhtype might indicate there
1545 	 * are more u32s then actually fitted.
1546 	 * so if fhtype seems to be more than len, reduce fhtype.
1547 	 * Valid types are:
1548 	 *   2 - objectid + dir_id - legacy support
1549 	 *   3 - objectid + dir_id + generation
1550 	 *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1551 	 *   5 - objectid + dir_id + generation + objectid and dirid of parent
1552 	 *   6 - as above plus generation of directory
1553 	 * 6 does not fit in NFSv2 handles
1554 	 */
1555 	if (fh_type > fh_len) {
1556 		if (fh_type != 6 || fh_len != 5)
1557 			reiserfs_warning(sb, "reiserfs-13077",
1558 				"nfsd/reiserfs, fhtype=%d, len=%d - odd",
1559 				fh_type, fh_len);
1560 		fh_type = 5;
1561 	}
1562 
1563 	return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
1564 		(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
1565 }
1566 
1567 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1568 		int fh_len, int fh_type)
1569 {
1570 	if (fh_type < 4)
1571 		return NULL;
1572 
1573 	return reiserfs_get_dentry(sb,
1574 		(fh_type >= 5) ? fid->raw[3] : fid->raw[2],
1575 		(fh_type >= 5) ? fid->raw[4] : fid->raw[3],
1576 		(fh_type == 6) ? fid->raw[5] : 0);
1577 }
1578 
1579 int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1580 		       int need_parent)
1581 {
1582 	struct inode *inode = dentry->d_inode;
1583 	int maxlen = *lenp;
1584 
1585 	if (maxlen < 3)
1586 		return 255;
1587 
1588 	data[0] = inode->i_ino;
1589 	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1590 	data[2] = inode->i_generation;
1591 	*lenp = 3;
1592 	/* no room for directory info? return what we've stored so far */
1593 	if (maxlen < 5 || !need_parent)
1594 		return 3;
1595 
1596 	spin_lock(&dentry->d_lock);
1597 	inode = dentry->d_parent->d_inode;
1598 	data[3] = inode->i_ino;
1599 	data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1600 	*lenp = 5;
1601 	if (maxlen >= 6) {
1602 		data[5] = inode->i_generation;
1603 		*lenp = 6;
1604 	}
1605 	spin_unlock(&dentry->d_lock);
1606 	return *lenp;
1607 }
1608 
1609 /* looks for stat data, then copies fields to it, marks the buffer
1610    containing stat data as dirty */
1611 /* reiserfs inodes are never really dirty, since the dirty inode call
1612 ** always logs them.  This call allows the VFS inode marking routines
1613 ** to properly mark inodes for datasync and such, but only actually
1614 ** does something when called for a synchronous update.
1615 */
1616 int reiserfs_write_inode(struct inode *inode, int do_sync)
1617 {
1618 	struct reiserfs_transaction_handle th;
1619 	int jbegin_count = 1;
1620 
1621 	if (inode->i_sb->s_flags & MS_RDONLY)
1622 		return -EROFS;
1623 	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
1624 	 ** these cases are just when the system needs ram, not when the
1625 	 ** inode needs to reach disk for safety, and they can safely be
1626 	 ** ignored because the altered inode has already been logged.
1627 	 */
1628 	if (do_sync && !(current->flags & PF_MEMALLOC)) {
1629 		reiserfs_write_lock(inode->i_sb);
1630 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1631 			reiserfs_update_sd(&th, inode);
1632 			journal_end_sync(&th, inode->i_sb, jbegin_count);
1633 		}
1634 		reiserfs_write_unlock(inode->i_sb);
1635 	}
1636 	return 0;
1637 }
1638 
1639 /* stat data of new object is inserted already, this inserts the item
1640    containing "." and ".." entries */
1641 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1642 				  struct inode *inode,
1643 				  struct item_head *ih, struct treepath *path,
1644 				  struct inode *dir)
1645 {
1646 	struct super_block *sb = th->t_super;
1647 	char empty_dir[EMPTY_DIR_SIZE];
1648 	char *body = empty_dir;
1649 	struct cpu_key key;
1650 	int retval;
1651 
1652 	BUG_ON(!th->t_trans_id);
1653 
1654 	_make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1655 		      le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1656 		      TYPE_DIRENTRY, 3 /*key length */ );
1657 
1658 	/* compose item head for new item. Directories consist of items of
1659 	   old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1660 	   is done by reiserfs_new_inode */
1661 	if (old_format_only(sb)) {
1662 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1663 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1664 
1665 		make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1666 				       ih->ih_key.k_objectid,
1667 				       INODE_PKEY(dir)->k_dir_id,
1668 				       INODE_PKEY(dir)->k_objectid);
1669 	} else {
1670 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1671 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1672 
1673 		make_empty_dir_item(body, ih->ih_key.k_dir_id,
1674 				    ih->ih_key.k_objectid,
1675 				    INODE_PKEY(dir)->k_dir_id,
1676 				    INODE_PKEY(dir)->k_objectid);
1677 	}
1678 
1679 	/* look for place in the tree for new item */
1680 	retval = search_item(sb, &key, path);
1681 	if (retval == IO_ERROR) {
1682 		reiserfs_error(sb, "vs-13080",
1683 			       "i/o failure occurred creating new directory");
1684 		return -EIO;
1685 	}
1686 	if (retval == ITEM_FOUND) {
1687 		pathrelse(path);
1688 		reiserfs_warning(sb, "vs-13070",
1689 				 "object with this key exists (%k)",
1690 				 &(ih->ih_key));
1691 		return -EEXIST;
1692 	}
1693 
1694 	/* insert item, that is empty directory item */
1695 	return reiserfs_insert_item(th, path, &key, ih, inode, body);
1696 }
1697 
1698 /* stat data of object has been inserted, this inserts the item
1699    containing the body of symlink */
1700 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
1701 				struct item_head *ih,
1702 				struct treepath *path, const char *symname,
1703 				int item_len)
1704 {
1705 	struct super_block *sb = th->t_super;
1706 	struct cpu_key key;
1707 	int retval;
1708 
1709 	BUG_ON(!th->t_trans_id);
1710 
1711 	_make_cpu_key(&key, KEY_FORMAT_3_5,
1712 		      le32_to_cpu(ih->ih_key.k_dir_id),
1713 		      le32_to_cpu(ih->ih_key.k_objectid),
1714 		      1, TYPE_DIRECT, 3 /*key length */ );
1715 
1716 	make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1717 			  0 /*free_space */ );
1718 
1719 	/* look for place in the tree for new item */
1720 	retval = search_item(sb, &key, path);
1721 	if (retval == IO_ERROR) {
1722 		reiserfs_error(sb, "vs-13080",
1723 			       "i/o failure occurred creating new symlink");
1724 		return -EIO;
1725 	}
1726 	if (retval == ITEM_FOUND) {
1727 		pathrelse(path);
1728 		reiserfs_warning(sb, "vs-13080",
1729 				 "object with this key exists (%k)",
1730 				 &(ih->ih_key));
1731 		return -EEXIST;
1732 	}
1733 
1734 	/* insert item, that is body of symlink */
1735 	return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1736 }
1737 
1738 /* inserts the stat data into the tree, and then calls
1739    reiserfs_new_directory (to insert ".", ".." item if new object is
1740    directory) or reiserfs_new_symlink (to insert symlink body if new
1741    object is symlink) or nothing (if new object is regular file)
1742 
1743    NOTE! uid and gid must already be set in the inode.  If we return
1744    non-zero due to an error, we have to drop the quota previously allocated
1745    for the fresh inode.  This can only be done outside a transaction, so
1746    if we return non-zero, we also end the transaction.  */
1747 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1748 		       struct inode *dir, int mode, const char *symname,
1749 		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1750 		          strlen (symname) for symlinks) */
1751 		       loff_t i_size, struct dentry *dentry,
1752 		       struct inode *inode,
1753 		       struct reiserfs_security_handle *security)
1754 {
1755 	struct super_block *sb;
1756 	struct reiserfs_iget_args args;
1757 	INITIALIZE_PATH(path_to_key);
1758 	struct cpu_key key;
1759 	struct item_head ih;
1760 	struct stat_data sd;
1761 	int retval;
1762 	int err;
1763 
1764 	BUG_ON(!th->t_trans_id);
1765 
1766 	if (vfs_dq_alloc_inode(inode)) {
1767 		err = -EDQUOT;
1768 		goto out_end_trans;
1769 	}
1770 	if (!dir->i_nlink) {
1771 		err = -EPERM;
1772 		goto out_bad_inode;
1773 	}
1774 
1775 	sb = dir->i_sb;
1776 
1777 	/* item head of new item */
1778 	ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1779 	ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1780 	if (!ih.ih_key.k_objectid) {
1781 		err = -ENOMEM;
1782 		goto out_bad_inode;
1783 	}
1784 	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1785 	if (old_format_only(sb))
1786 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1787 				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1788 	else
1789 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1790 				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1791 	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1792 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1793 	if (insert_inode_locked4(inode, args.objectid,
1794 			     reiserfs_find_actor, &args) < 0) {
1795 		err = -EINVAL;
1796 		goto out_bad_inode;
1797 	}
1798 	if (old_format_only(sb))
1799 		/* not a perfect generation count, as object ids can be reused, but
1800 		 ** this is as good as reiserfs can do right now.
1801 		 ** note that the private part of inode isn't filled in yet, we have
1802 		 ** to use the directory.
1803 		 */
1804 		inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1805 	else
1806 #if defined( USE_INODE_GENERATION_COUNTER )
1807 		inode->i_generation =
1808 		    le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1809 #else
1810 		inode->i_generation = ++event;
1811 #endif
1812 
1813 	/* fill stat data */
1814 	inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1815 
1816 	/* uid and gid must already be set by the caller for quota init */
1817 
1818 	/* symlink cannot be immutable or append only, right? */
1819 	if (S_ISLNK(inode->i_mode))
1820 		inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1821 
1822 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1823 	inode->i_size = i_size;
1824 	inode->i_blocks = 0;
1825 	inode->i_bytes = 0;
1826 	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1827 	    U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1828 
1829 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1830 	REISERFS_I(inode)->i_flags = 0;
1831 	REISERFS_I(inode)->i_prealloc_block = 0;
1832 	REISERFS_I(inode)->i_prealloc_count = 0;
1833 	REISERFS_I(inode)->i_trans_id = 0;
1834 	REISERFS_I(inode)->i_jl = NULL;
1835 	REISERFS_I(inode)->i_attrs =
1836 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1837 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1838 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1839 	reiserfs_init_xattr_rwsem(inode);
1840 
1841 	/* key to search for correct place for new stat data */
1842 	_make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1843 		      le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1844 		      TYPE_STAT_DATA, 3 /*key length */ );
1845 
1846 	/* find proper place for inserting of stat data */
1847 	retval = search_item(sb, &key, &path_to_key);
1848 	if (retval == IO_ERROR) {
1849 		err = -EIO;
1850 		goto out_bad_inode;
1851 	}
1852 	if (retval == ITEM_FOUND) {
1853 		pathrelse(&path_to_key);
1854 		err = -EEXIST;
1855 		goto out_bad_inode;
1856 	}
1857 	if (old_format_only(sb)) {
1858 		if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1859 			pathrelse(&path_to_key);
1860 			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
1861 			err = -EINVAL;
1862 			goto out_bad_inode;
1863 		}
1864 		inode2sd_v1(&sd, inode, inode->i_size);
1865 	} else {
1866 		inode2sd(&sd, inode, inode->i_size);
1867 	}
1868 	// store in in-core inode the key of stat data and version all
1869 	// object items will have (directory items will have old offset
1870 	// format, other new objects will consist of new items)
1871 	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1872 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1873 	else
1874 		set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1875 	if (old_format_only(sb))
1876 		set_inode_sd_version(inode, STAT_DATA_V1);
1877 	else
1878 		set_inode_sd_version(inode, STAT_DATA_V2);
1879 
1880 	/* insert the stat data into the tree */
1881 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1882 	if (REISERFS_I(dir)->new_packing_locality)
1883 		th->displace_new_blocks = 1;
1884 #endif
1885 	retval =
1886 	    reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1887 				 (char *)(&sd));
1888 	if (retval) {
1889 		err = retval;
1890 		reiserfs_check_path(&path_to_key);
1891 		goto out_bad_inode;
1892 	}
1893 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1894 	if (!th->displace_new_blocks)
1895 		REISERFS_I(dir)->new_packing_locality = 0;
1896 #endif
1897 	if (S_ISDIR(mode)) {
1898 		/* insert item with "." and ".." */
1899 		retval =
1900 		    reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1901 	}
1902 
1903 	if (S_ISLNK(mode)) {
1904 		/* insert body of symlink */
1905 		if (!old_format_only(sb))
1906 			i_size = ROUND_UP(i_size);
1907 		retval =
1908 		    reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1909 					 i_size);
1910 	}
1911 	if (retval) {
1912 		err = retval;
1913 		reiserfs_check_path(&path_to_key);
1914 		journal_end(th, th->t_super, th->t_blocks_allocated);
1915 		goto out_inserted_sd;
1916 	}
1917 
1918 	if (reiserfs_posixacl(inode->i_sb)) {
1919 		retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
1920 		if (retval) {
1921 			err = retval;
1922 			reiserfs_check_path(&path_to_key);
1923 			journal_end(th, th->t_super, th->t_blocks_allocated);
1924 			goto out_inserted_sd;
1925 		}
1926 	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
1927 		reiserfs_warning(inode->i_sb, "jdm-13090",
1928 				 "ACLs aren't enabled in the fs, "
1929 				 "but vfs thinks they are!");
1930 	} else if (IS_PRIVATE(dir))
1931 		inode->i_flags |= S_PRIVATE;
1932 
1933 	if (security->name) {
1934 		retval = reiserfs_security_write(th, inode, security);
1935 		if (retval) {
1936 			err = retval;
1937 			reiserfs_check_path(&path_to_key);
1938 			retval = journal_end(th, th->t_super,
1939 					     th->t_blocks_allocated);
1940 			if (retval)
1941 				err = retval;
1942 			goto out_inserted_sd;
1943 		}
1944 	}
1945 
1946 	reiserfs_update_sd(th, inode);
1947 	reiserfs_check_path(&path_to_key);
1948 
1949 	return 0;
1950 
1951 /* it looks like you can easily compress these two goto targets into
1952  * one.  Keeping it like this doesn't actually hurt anything, and they
1953  * are place holders for what the quota code actually needs.
1954  */
1955       out_bad_inode:
1956 	/* Invalidate the object, nothing was inserted yet */
1957 	INODE_PKEY(inode)->k_objectid = 0;
1958 
1959 	/* Quota change must be inside a transaction for journaling */
1960 	vfs_dq_free_inode(inode);
1961 
1962       out_end_trans:
1963 	journal_end(th, th->t_super, th->t_blocks_allocated);
1964 	/* Drop can be outside and it needs more credits so it's better to have it outside */
1965 	vfs_dq_drop(inode);
1966 	inode->i_flags |= S_NOQUOTA;
1967 	make_bad_inode(inode);
1968 
1969       out_inserted_sd:
1970 	inode->i_nlink = 0;
1971 	th->t_trans_id = 0;	/* so the caller can't use this handle later */
1972 	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1973 	iput(inode);
1974 	return err;
1975 }
1976 
1977 /*
1978 ** finds the tail page in the page cache,
1979 ** reads the last block in.
1980 **
1981 ** On success, page_result is set to a locked, pinned page, and bh_result
1982 ** is set to an up to date buffer for the last block in the file.  returns 0.
1983 **
1984 ** tail conversion is not done, so bh_result might not be valid for writing
1985 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1986 ** trying to write the block.
1987 **
1988 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1989 */
1990 static int grab_tail_page(struct inode *inode,
1991 			  struct page **page_result,
1992 			  struct buffer_head **bh_result)
1993 {
1994 
1995 	/* we want the page with the last byte in the file,
1996 	 ** not the page that will hold the next byte for appending
1997 	 */
1998 	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
1999 	unsigned long pos = 0;
2000 	unsigned long start = 0;
2001 	unsigned long blocksize = inode->i_sb->s_blocksize;
2002 	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
2003 	struct buffer_head *bh;
2004 	struct buffer_head *head;
2005 	struct page *page;
2006 	int error;
2007 
2008 	/* we know that we are only called with inode->i_size > 0.
2009 	 ** we also know that a file tail can never be as big as a block
2010 	 ** If i_size % blocksize == 0, our file is currently block aligned
2011 	 ** and it won't need converting or zeroing after a truncate.
2012 	 */
2013 	if ((offset & (blocksize - 1)) == 0) {
2014 		return -ENOENT;
2015 	}
2016 	page = grab_cache_page(inode->i_mapping, index);
2017 	error = -ENOMEM;
2018 	if (!page) {
2019 		goto out;
2020 	}
2021 	/* start within the page of the last block in the file */
2022 	start = (offset / blocksize) * blocksize;
2023 
2024 	error = block_prepare_write(page, start, offset,
2025 				    reiserfs_get_block_create_0);
2026 	if (error)
2027 		goto unlock;
2028 
2029 	head = page_buffers(page);
2030 	bh = head;
2031 	do {
2032 		if (pos >= start) {
2033 			break;
2034 		}
2035 		bh = bh->b_this_page;
2036 		pos += blocksize;
2037 	} while (bh != head);
2038 
2039 	if (!buffer_uptodate(bh)) {
2040 		/* note, this should never happen, prepare_write should
2041 		 ** be taking care of this for us.  If the buffer isn't up to date,
2042 		 ** I've screwed up the code to find the buffer, or the code to
2043 		 ** call prepare_write
2044 		 */
2045 		reiserfs_error(inode->i_sb, "clm-6000",
2046 			       "error reading block %lu", bh->b_blocknr);
2047 		error = -EIO;
2048 		goto unlock;
2049 	}
2050 	*bh_result = bh;
2051 	*page_result = page;
2052 
2053       out:
2054 	return error;
2055 
2056       unlock:
2057 	unlock_page(page);
2058 	page_cache_release(page);
2059 	return error;
2060 }
2061 
2062 /*
2063 ** vfs version of truncate file.  Must NOT be called with
2064 ** a transaction already started.
2065 **
2066 ** some code taken from block_truncate_page
2067 */
2068 int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2069 {
2070 	struct reiserfs_transaction_handle th;
2071 	/* we want the offset for the first byte after the end of the file */
2072 	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2073 	unsigned blocksize = inode->i_sb->s_blocksize;
2074 	unsigned length;
2075 	struct page *page = NULL;
2076 	int error;
2077 	struct buffer_head *bh = NULL;
2078 	int err2;
2079 	int lock_depth;
2080 
2081 	lock_depth = reiserfs_write_lock_once(inode->i_sb);
2082 
2083 	if (inode->i_size > 0) {
2084 		error = grab_tail_page(inode, &page, &bh);
2085 		if (error) {
2086 			// -ENOENT means we truncated past the end of the file,
2087 			// and get_block_create_0 could not find a block to read in,
2088 			// which is ok.
2089 			if (error != -ENOENT)
2090 				reiserfs_error(inode->i_sb, "clm-6001",
2091 					       "grab_tail_page failed %d",
2092 					       error);
2093 			page = NULL;
2094 			bh = NULL;
2095 		}
2096 	}
2097 
2098 	/* so, if page != NULL, we have a buffer head for the offset at
2099 	 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2100 	 ** then we have an unformatted node.  Otherwise, we have a direct item,
2101 	 ** and no zeroing is required on disk.  We zero after the truncate,
2102 	 ** because the truncate might pack the item anyway
2103 	 ** (it will unmap bh if it packs).
2104 	 */
2105 	/* it is enough to reserve space in transaction for 2 balancings:
2106 	   one for "save" link adding and another for the first
2107 	   cut_from_item. 1 is for update_sd */
2108 	error = journal_begin(&th, inode->i_sb,
2109 			      JOURNAL_PER_BALANCE_CNT * 2 + 1);
2110 	if (error)
2111 		goto out;
2112 	reiserfs_update_inode_transaction(inode);
2113 	if (update_timestamps)
2114 		/* we are doing real truncate: if the system crashes before the last
2115 		   transaction of truncating gets committed - on reboot the file
2116 		   either appears truncated properly or not truncated at all */
2117 		add_save_link(&th, inode, 1);
2118 	err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2119 	error =
2120 	    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2121 	if (error)
2122 		goto out;
2123 
2124 	/* check reiserfs_do_truncate after ending the transaction */
2125 	if (err2) {
2126 		error = err2;
2127   		goto out;
2128 	}
2129 
2130 	if (update_timestamps) {
2131 		error = remove_save_link(inode, 1 /* truncate */);
2132 		if (error)
2133 			goto out;
2134 	}
2135 
2136 	if (page) {
2137 		length = offset & (blocksize - 1);
2138 		/* if we are not on a block boundary */
2139 		if (length) {
2140 			length = blocksize - length;
2141 			zero_user(page, offset, length);
2142 			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2143 				mark_buffer_dirty(bh);
2144 			}
2145 		}
2146 		unlock_page(page);
2147 		page_cache_release(page);
2148 	}
2149 
2150 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2151 
2152 	return 0;
2153       out:
2154 	if (page) {
2155 		unlock_page(page);
2156 		page_cache_release(page);
2157 	}
2158 
2159 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2160 
2161 	return error;
2162 }
2163 
2164 static int map_block_for_writepage(struct inode *inode,
2165 				   struct buffer_head *bh_result,
2166 				   unsigned long block)
2167 {
2168 	struct reiserfs_transaction_handle th;
2169 	int fs_gen;
2170 	struct item_head tmp_ih;
2171 	struct item_head *ih;
2172 	struct buffer_head *bh;
2173 	__le32 *item;
2174 	struct cpu_key key;
2175 	INITIALIZE_PATH(path);
2176 	int pos_in_item;
2177 	int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2178 	loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2179 	int retval;
2180 	int use_get_block = 0;
2181 	int bytes_copied = 0;
2182 	int copy_size;
2183 	int trans_running = 0;
2184 
2185 	/* catch places below that try to log something without starting a trans */
2186 	th.t_trans_id = 0;
2187 
2188 	if (!buffer_uptodate(bh_result)) {
2189 		return -EIO;
2190 	}
2191 
2192 	kmap(bh_result->b_page);
2193       start_over:
2194 	reiserfs_write_lock(inode->i_sb);
2195 	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2196 
2197       research:
2198 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
2199 	if (retval != POSITION_FOUND) {
2200 		use_get_block = 1;
2201 		goto out;
2202 	}
2203 
2204 	bh = get_last_bh(&path);
2205 	ih = get_ih(&path);
2206 	item = get_item(&path);
2207 	pos_in_item = path.pos_in_item;
2208 
2209 	/* we've found an unformatted node */
2210 	if (indirect_item_found(retval, ih)) {
2211 		if (bytes_copied > 0) {
2212 			reiserfs_warning(inode->i_sb, "clm-6002",
2213 					 "bytes_copied %d", bytes_copied);
2214 		}
2215 		if (!get_block_num(item, pos_in_item)) {
2216 			/* crap, we are writing to a hole */
2217 			use_get_block = 1;
2218 			goto out;
2219 		}
2220 		set_block_dev_mapped(bh_result,
2221 				     get_block_num(item, pos_in_item), inode);
2222 	} else if (is_direct_le_ih(ih)) {
2223 		char *p;
2224 		p = page_address(bh_result->b_page);
2225 		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2226 		copy_size = ih_item_len(ih) - pos_in_item;
2227 
2228 		fs_gen = get_generation(inode->i_sb);
2229 		copy_item_head(&tmp_ih, ih);
2230 
2231 		if (!trans_running) {
2232 			/* vs-3050 is gone, no need to drop the path */
2233 			retval = journal_begin(&th, inode->i_sb, jbegin_count);
2234 			if (retval)
2235 				goto out;
2236 			reiserfs_update_inode_transaction(inode);
2237 			trans_running = 1;
2238 			if (fs_changed(fs_gen, inode->i_sb)
2239 			    && item_moved(&tmp_ih, &path)) {
2240 				reiserfs_restore_prepared_buffer(inode->i_sb,
2241 								 bh);
2242 				goto research;
2243 			}
2244 		}
2245 
2246 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2247 
2248 		if (fs_changed(fs_gen, inode->i_sb)
2249 		    && item_moved(&tmp_ih, &path)) {
2250 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2251 			goto research;
2252 		}
2253 
2254 		memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2255 		       copy_size);
2256 
2257 		journal_mark_dirty(&th, inode->i_sb, bh);
2258 		bytes_copied += copy_size;
2259 		set_block_dev_mapped(bh_result, 0, inode);
2260 
2261 		/* are there still bytes left? */
2262 		if (bytes_copied < bh_result->b_size &&
2263 		    (byte_offset + bytes_copied) < inode->i_size) {
2264 			set_cpu_key_k_offset(&key,
2265 					     cpu_key_k_offset(&key) +
2266 					     copy_size);
2267 			goto research;
2268 		}
2269 	} else {
2270 		reiserfs_warning(inode->i_sb, "clm-6003",
2271 				 "bad item inode %lu", inode->i_ino);
2272 		retval = -EIO;
2273 		goto out;
2274 	}
2275 	retval = 0;
2276 
2277       out:
2278 	pathrelse(&path);
2279 	if (trans_running) {
2280 		int err = journal_end(&th, inode->i_sb, jbegin_count);
2281 		if (err)
2282 			retval = err;
2283 		trans_running = 0;
2284 	}
2285 	reiserfs_write_unlock(inode->i_sb);
2286 
2287 	/* this is where we fill in holes in the file. */
2288 	if (use_get_block) {
2289 		retval = reiserfs_get_block(inode, block, bh_result,
2290 					    GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2291 					    | GET_BLOCK_NO_DANGLE);
2292 		if (!retval) {
2293 			if (!buffer_mapped(bh_result)
2294 			    || bh_result->b_blocknr == 0) {
2295 				/* get_block failed to find a mapped unformatted node. */
2296 				use_get_block = 0;
2297 				goto start_over;
2298 			}
2299 		}
2300 	}
2301 	kunmap(bh_result->b_page);
2302 
2303 	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2304 		/* we've copied data from the page into the direct item, so the
2305 		 * buffer in the page is now clean, mark it to reflect that.
2306 		 */
2307 		lock_buffer(bh_result);
2308 		clear_buffer_dirty(bh_result);
2309 		unlock_buffer(bh_result);
2310 	}
2311 	return retval;
2312 }
2313 
2314 /*
2315  * mason@suse.com: updated in 2.5.54 to follow the same general io
2316  * start/recovery path as __block_write_full_page, along with special
2317  * code to handle reiserfs tails.
2318  */
2319 static int reiserfs_write_full_page(struct page *page,
2320 				    struct writeback_control *wbc)
2321 {
2322 	struct inode *inode = page->mapping->host;
2323 	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2324 	int error = 0;
2325 	unsigned long block;
2326 	sector_t last_block;
2327 	struct buffer_head *head, *bh;
2328 	int partial = 0;
2329 	int nr = 0;
2330 	int checked = PageChecked(page);
2331 	struct reiserfs_transaction_handle th;
2332 	struct super_block *s = inode->i_sb;
2333 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2334 	th.t_trans_id = 0;
2335 
2336 	/* no logging allowed when nonblocking or from PF_MEMALLOC */
2337 	if (checked && (current->flags & PF_MEMALLOC)) {
2338 		redirty_page_for_writepage(wbc, page);
2339 		unlock_page(page);
2340 		return 0;
2341 	}
2342 
2343 	/* The page dirty bit is cleared before writepage is called, which
2344 	 * means we have to tell create_empty_buffers to make dirty buffers
2345 	 * The page really should be up to date at this point, so tossing
2346 	 * in the BH_Uptodate is just a sanity check.
2347 	 */
2348 	if (!page_has_buffers(page)) {
2349 		create_empty_buffers(page, s->s_blocksize,
2350 				     (1 << BH_Dirty) | (1 << BH_Uptodate));
2351 	}
2352 	head = page_buffers(page);
2353 
2354 	/* last page in the file, zero out any contents past the
2355 	 ** last byte in the file
2356 	 */
2357 	if (page->index >= end_index) {
2358 		unsigned last_offset;
2359 
2360 		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2361 		/* no file contents in this page */
2362 		if (page->index >= end_index + 1 || !last_offset) {
2363 			unlock_page(page);
2364 			return 0;
2365 		}
2366 		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
2367 	}
2368 	bh = head;
2369 	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2370 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2371 	/* first map all the buffers, logging any direct items we find */
2372 	do {
2373 		if (block > last_block) {
2374 			/*
2375 			 * This can happen when the block size is less than
2376 			 * the page size.  The corresponding bytes in the page
2377 			 * were zero filled above
2378 			 */
2379 			clear_buffer_dirty(bh);
2380 			set_buffer_uptodate(bh);
2381 		} else if ((checked || buffer_dirty(bh)) &&
2382 		           (!buffer_mapped(bh) || (buffer_mapped(bh)
2383 						       && bh->b_blocknr ==
2384 						       0))) {
2385 			/* not mapped yet, or it points to a direct item, search
2386 			 * the btree for the mapping info, and log any direct
2387 			 * items found
2388 			 */
2389 			if ((error = map_block_for_writepage(inode, bh, block))) {
2390 				goto fail;
2391 			}
2392 		}
2393 		bh = bh->b_this_page;
2394 		block++;
2395 	} while (bh != head);
2396 
2397 	/*
2398 	 * we start the transaction after map_block_for_writepage,
2399 	 * because it can create holes in the file (an unbounded operation).
2400 	 * starting it here, we can make a reliable estimate for how many
2401 	 * blocks we're going to log
2402 	 */
2403 	if (checked) {
2404 		ClearPageChecked(page);
2405 		reiserfs_write_lock(s);
2406 		error = journal_begin(&th, s, bh_per_page + 1);
2407 		if (error) {
2408 			reiserfs_write_unlock(s);
2409 			goto fail;
2410 		}
2411 		reiserfs_update_inode_transaction(inode);
2412 	}
2413 	/* now go through and lock any dirty buffers on the page */
2414 	do {
2415 		get_bh(bh);
2416 		if (!buffer_mapped(bh))
2417 			continue;
2418 		if (buffer_mapped(bh) && bh->b_blocknr == 0)
2419 			continue;
2420 
2421 		if (checked) {
2422 			reiserfs_prepare_for_journal(s, bh, 1);
2423 			journal_mark_dirty(&th, s, bh);
2424 			continue;
2425 		}
2426 		/* from this point on, we know the buffer is mapped to a
2427 		 * real block and not a direct item
2428 		 */
2429 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2430 			lock_buffer(bh);
2431 		} else {
2432 			if (!trylock_buffer(bh)) {
2433 				redirty_page_for_writepage(wbc, page);
2434 				continue;
2435 			}
2436 		}
2437 		if (test_clear_buffer_dirty(bh)) {
2438 			mark_buffer_async_write(bh);
2439 		} else {
2440 			unlock_buffer(bh);
2441 		}
2442 	} while ((bh = bh->b_this_page) != head);
2443 
2444 	if (checked) {
2445 		error = journal_end(&th, s, bh_per_page + 1);
2446 		reiserfs_write_unlock(s);
2447 		if (error)
2448 			goto fail;
2449 	}
2450 	BUG_ON(PageWriteback(page));
2451 	set_page_writeback(page);
2452 	unlock_page(page);
2453 
2454 	/*
2455 	 * since any buffer might be the only dirty buffer on the page,
2456 	 * the first submit_bh can bring the page out of writeback.
2457 	 * be careful with the buffers.
2458 	 */
2459 	do {
2460 		struct buffer_head *next = bh->b_this_page;
2461 		if (buffer_async_write(bh)) {
2462 			submit_bh(WRITE, bh);
2463 			nr++;
2464 		}
2465 		put_bh(bh);
2466 		bh = next;
2467 	} while (bh != head);
2468 
2469 	error = 0;
2470       done:
2471 	if (nr == 0) {
2472 		/*
2473 		 * if this page only had a direct item, it is very possible for
2474 		 * no io to be required without there being an error.  Or,
2475 		 * someone else could have locked them and sent them down the
2476 		 * pipe without locking the page
2477 		 */
2478 		bh = head;
2479 		do {
2480 			if (!buffer_uptodate(bh)) {
2481 				partial = 1;
2482 				break;
2483 			}
2484 			bh = bh->b_this_page;
2485 		} while (bh != head);
2486 		if (!partial)
2487 			SetPageUptodate(page);
2488 		end_page_writeback(page);
2489 	}
2490 	return error;
2491 
2492       fail:
2493 	/* catches various errors, we need to make sure any valid dirty blocks
2494 	 * get to the media.  The page is currently locked and not marked for
2495 	 * writeback
2496 	 */
2497 	ClearPageUptodate(page);
2498 	bh = head;
2499 	do {
2500 		get_bh(bh);
2501 		if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2502 			lock_buffer(bh);
2503 			mark_buffer_async_write(bh);
2504 		} else {
2505 			/*
2506 			 * clear any dirty bits that might have come from getting
2507 			 * attached to a dirty page
2508 			 */
2509 			clear_buffer_dirty(bh);
2510 		}
2511 		bh = bh->b_this_page;
2512 	} while (bh != head);
2513 	SetPageError(page);
2514 	BUG_ON(PageWriteback(page));
2515 	set_page_writeback(page);
2516 	unlock_page(page);
2517 	do {
2518 		struct buffer_head *next = bh->b_this_page;
2519 		if (buffer_async_write(bh)) {
2520 			clear_buffer_dirty(bh);
2521 			submit_bh(WRITE, bh);
2522 			nr++;
2523 		}
2524 		put_bh(bh);
2525 		bh = next;
2526 	} while (bh != head);
2527 	goto done;
2528 }
2529 
2530 static int reiserfs_readpage(struct file *f, struct page *page)
2531 {
2532 	return block_read_full_page(page, reiserfs_get_block);
2533 }
2534 
2535 static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2536 {
2537 	struct inode *inode = page->mapping->host;
2538 	reiserfs_wait_on_write_block(inode->i_sb);
2539 	return reiserfs_write_full_page(page, wbc);
2540 }
2541 
2542 static void reiserfs_truncate_failed_write(struct inode *inode)
2543 {
2544 	truncate_inode_pages(inode->i_mapping, inode->i_size);
2545 	reiserfs_truncate_file(inode, 0);
2546 }
2547 
2548 static int reiserfs_write_begin(struct file *file,
2549 				struct address_space *mapping,
2550 				loff_t pos, unsigned len, unsigned flags,
2551 				struct page **pagep, void **fsdata)
2552 {
2553 	struct inode *inode;
2554 	struct page *page;
2555 	pgoff_t index;
2556 	int ret;
2557 	int old_ref = 0;
2558 
2559  	inode = mapping->host;
2560 	*fsdata = 0;
2561  	if (flags & AOP_FLAG_CONT_EXPAND &&
2562  	    (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2563  		pos ++;
2564 		*fsdata = (void *)(unsigned long)flags;
2565 	}
2566 
2567 	index = pos >> PAGE_CACHE_SHIFT;
2568 	page = grab_cache_page_write_begin(mapping, index, flags);
2569 	if (!page)
2570 		return -ENOMEM;
2571 	*pagep = page;
2572 
2573 	reiserfs_wait_on_write_block(inode->i_sb);
2574 	fix_tail_page_for_writing(page);
2575 	if (reiserfs_transaction_running(inode->i_sb)) {
2576 		struct reiserfs_transaction_handle *th;
2577 		th = (struct reiserfs_transaction_handle *)current->
2578 		    journal_info;
2579 		BUG_ON(!th->t_refcount);
2580 		BUG_ON(!th->t_trans_id);
2581 		old_ref = th->t_refcount;
2582 		th->t_refcount++;
2583 	}
2584 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2585 				reiserfs_get_block);
2586 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2587 		struct reiserfs_transaction_handle *th = current->journal_info;
2588 		/* this gets a little ugly.  If reiserfs_get_block returned an
2589 		 * error and left a transacstion running, we've got to close it,
2590 		 * and we've got to free handle if it was a persistent transaction.
2591 		 *
2592 		 * But, if we had nested into an existing transaction, we need
2593 		 * to just drop the ref count on the handle.
2594 		 *
2595 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2596 		 * and it was a persistent trans.  Otherwise, it was nested above.
2597 		 */
2598 		if (th->t_refcount > old_ref) {
2599 			if (old_ref)
2600 				th->t_refcount--;
2601 			else {
2602 				int err;
2603 				reiserfs_write_lock(inode->i_sb);
2604 				err = reiserfs_end_persistent_transaction(th);
2605 				reiserfs_write_unlock(inode->i_sb);
2606 				if (err)
2607 					ret = err;
2608 			}
2609 		}
2610 	}
2611 	if (ret) {
2612 		unlock_page(page);
2613 		page_cache_release(page);
2614 		/* Truncate allocated blocks */
2615 		reiserfs_truncate_failed_write(inode);
2616 	}
2617 	return ret;
2618 }
2619 
2620 int reiserfs_prepare_write(struct file *f, struct page *page,
2621 			   unsigned from, unsigned to)
2622 {
2623 	struct inode *inode = page->mapping->host;
2624 	int ret;
2625 	int old_ref = 0;
2626 
2627 	reiserfs_write_unlock(inode->i_sb);
2628 	reiserfs_wait_on_write_block(inode->i_sb);
2629 	reiserfs_write_lock(inode->i_sb);
2630 
2631 	fix_tail_page_for_writing(page);
2632 	if (reiserfs_transaction_running(inode->i_sb)) {
2633 		struct reiserfs_transaction_handle *th;
2634 		th = (struct reiserfs_transaction_handle *)current->
2635 		    journal_info;
2636 		BUG_ON(!th->t_refcount);
2637 		BUG_ON(!th->t_trans_id);
2638 		old_ref = th->t_refcount;
2639 		th->t_refcount++;
2640 	}
2641 
2642 	ret = block_prepare_write(page, from, to, reiserfs_get_block);
2643 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2644 		struct reiserfs_transaction_handle *th = current->journal_info;
2645 		/* this gets a little ugly.  If reiserfs_get_block returned an
2646 		 * error and left a transacstion running, we've got to close it,
2647 		 * and we've got to free handle if it was a persistent transaction.
2648 		 *
2649 		 * But, if we had nested into an existing transaction, we need
2650 		 * to just drop the ref count on the handle.
2651 		 *
2652 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2653 		 * and it was a persistent trans.  Otherwise, it was nested above.
2654 		 */
2655 		if (th->t_refcount > old_ref) {
2656 			if (old_ref)
2657 				th->t_refcount--;
2658 			else {
2659 				int err;
2660 				reiserfs_write_lock(inode->i_sb);
2661 				err = reiserfs_end_persistent_transaction(th);
2662 				reiserfs_write_unlock(inode->i_sb);
2663 				if (err)
2664 					ret = err;
2665 			}
2666 		}
2667 	}
2668 	return ret;
2669 
2670 }
2671 
2672 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2673 {
2674 	return generic_block_bmap(as, block, reiserfs_bmap);
2675 }
2676 
2677 static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2678 			      loff_t pos, unsigned len, unsigned copied,
2679 			      struct page *page, void *fsdata)
2680 {
2681 	struct inode *inode = page->mapping->host;
2682 	int ret = 0;
2683 	int update_sd = 0;
2684 	struct reiserfs_transaction_handle *th;
2685 	unsigned start;
2686 	int lock_depth = 0;
2687 	bool locked = false;
2688 
2689 	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2690 		pos ++;
2691 
2692 	reiserfs_wait_on_write_block(inode->i_sb);
2693 	if (reiserfs_transaction_running(inode->i_sb))
2694 		th = current->journal_info;
2695 	else
2696 		th = NULL;
2697 
2698 	start = pos & (PAGE_CACHE_SIZE - 1);
2699 	if (unlikely(copied < len)) {
2700 		if (!PageUptodate(page))
2701 			copied = 0;
2702 
2703 		page_zero_new_buffers(page, start + copied, start + len);
2704 	}
2705 	flush_dcache_page(page);
2706 
2707 	reiserfs_commit_page(inode, page, start, start + copied);
2708 
2709 	/* generic_commit_write does this for us, but does not update the
2710 	 ** transaction tracking stuff when the size changes.  So, we have
2711 	 ** to do the i_size updates here.
2712 	 */
2713 	if (pos + copied > inode->i_size) {
2714 		struct reiserfs_transaction_handle myth;
2715 		lock_depth = reiserfs_write_lock_once(inode->i_sb);
2716 		locked = true;
2717 		/* If the file have grown beyond the border where it
2718 		   can have a tail, unmark it as needing a tail
2719 		   packing */
2720 		if ((have_large_tails(inode->i_sb)
2721 		     && inode->i_size > i_block_size(inode) * 4)
2722 		    || (have_small_tails(inode->i_sb)
2723 			&& inode->i_size > i_block_size(inode)))
2724 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2725 
2726 		ret = journal_begin(&myth, inode->i_sb, 1);
2727 		if (ret)
2728 			goto journal_error;
2729 
2730 		reiserfs_update_inode_transaction(inode);
2731 		inode->i_size = pos + copied;
2732 		/*
2733 		 * this will just nest into our transaction.  It's important
2734 		 * to use mark_inode_dirty so the inode gets pushed around on the
2735 		 * dirty lists, and so that O_SYNC works as expected
2736 		 */
2737 		mark_inode_dirty(inode);
2738 		reiserfs_update_sd(&myth, inode);
2739 		update_sd = 1;
2740 		ret = journal_end(&myth, inode->i_sb, 1);
2741 		if (ret)
2742 			goto journal_error;
2743 	}
2744 	if (th) {
2745 		if (!locked) {
2746 			lock_depth = reiserfs_write_lock_once(inode->i_sb);
2747 			locked = true;
2748 		}
2749 		if (!update_sd)
2750 			mark_inode_dirty(inode);
2751 		ret = reiserfs_end_persistent_transaction(th);
2752 		if (ret)
2753 			goto out;
2754 	}
2755 
2756       out:
2757 	if (locked)
2758 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2759 	unlock_page(page);
2760 	page_cache_release(page);
2761 
2762 	if (pos + len > inode->i_size)
2763 		reiserfs_truncate_failed_write(inode);
2764 
2765 	return ret == 0 ? copied : ret;
2766 
2767       journal_error:
2768 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2769 	locked = false;
2770 	if (th) {
2771 		if (!update_sd)
2772 			reiserfs_update_sd(th, inode);
2773 		ret = reiserfs_end_persistent_transaction(th);
2774 	}
2775 	goto out;
2776 }
2777 
2778 int reiserfs_commit_write(struct file *f, struct page *page,
2779 			  unsigned from, unsigned to)
2780 {
2781 	struct inode *inode = page->mapping->host;
2782 	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2783 	int ret = 0;
2784 	int update_sd = 0;
2785 	struct reiserfs_transaction_handle *th = NULL;
2786 
2787 	reiserfs_write_unlock(inode->i_sb);
2788 	reiserfs_wait_on_write_block(inode->i_sb);
2789 	reiserfs_write_lock(inode->i_sb);
2790 
2791 	if (reiserfs_transaction_running(inode->i_sb)) {
2792 		th = current->journal_info;
2793 	}
2794 	reiserfs_commit_page(inode, page, from, to);
2795 
2796 	/* generic_commit_write does this for us, but does not update the
2797 	 ** transaction tracking stuff when the size changes.  So, we have
2798 	 ** to do the i_size updates here.
2799 	 */
2800 	if (pos > inode->i_size) {
2801 		struct reiserfs_transaction_handle myth;
2802 		/* If the file have grown beyond the border where it
2803 		   can have a tail, unmark it as needing a tail
2804 		   packing */
2805 		if ((have_large_tails(inode->i_sb)
2806 		     && inode->i_size > i_block_size(inode) * 4)
2807 		    || (have_small_tails(inode->i_sb)
2808 			&& inode->i_size > i_block_size(inode)))
2809 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2810 
2811 		ret = journal_begin(&myth, inode->i_sb, 1);
2812 		if (ret)
2813 			goto journal_error;
2814 
2815 		reiserfs_update_inode_transaction(inode);
2816 		inode->i_size = pos;
2817 		/*
2818 		 * this will just nest into our transaction.  It's important
2819 		 * to use mark_inode_dirty so the inode gets pushed around on the
2820 		 * dirty lists, and so that O_SYNC works as expected
2821 		 */
2822 		mark_inode_dirty(inode);
2823 		reiserfs_update_sd(&myth, inode);
2824 		update_sd = 1;
2825 		ret = journal_end(&myth, inode->i_sb, 1);
2826 		if (ret)
2827 			goto journal_error;
2828 	}
2829 	if (th) {
2830 		if (!update_sd)
2831 			mark_inode_dirty(inode);
2832 		ret = reiserfs_end_persistent_transaction(th);
2833 		if (ret)
2834 			goto out;
2835 	}
2836 
2837       out:
2838 	return ret;
2839 
2840       journal_error:
2841 	if (th) {
2842 		if (!update_sd)
2843 			reiserfs_update_sd(th, inode);
2844 		ret = reiserfs_end_persistent_transaction(th);
2845 	}
2846 
2847 	return ret;
2848 }
2849 
2850 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2851 {
2852 	if (reiserfs_attrs(inode->i_sb)) {
2853 		if (sd_attrs & REISERFS_SYNC_FL)
2854 			inode->i_flags |= S_SYNC;
2855 		else
2856 			inode->i_flags &= ~S_SYNC;
2857 		if (sd_attrs & REISERFS_IMMUTABLE_FL)
2858 			inode->i_flags |= S_IMMUTABLE;
2859 		else
2860 			inode->i_flags &= ~S_IMMUTABLE;
2861 		if (sd_attrs & REISERFS_APPEND_FL)
2862 			inode->i_flags |= S_APPEND;
2863 		else
2864 			inode->i_flags &= ~S_APPEND;
2865 		if (sd_attrs & REISERFS_NOATIME_FL)
2866 			inode->i_flags |= S_NOATIME;
2867 		else
2868 			inode->i_flags &= ~S_NOATIME;
2869 		if (sd_attrs & REISERFS_NOTAIL_FL)
2870 			REISERFS_I(inode)->i_flags |= i_nopack_mask;
2871 		else
2872 			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2873 	}
2874 }
2875 
2876 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2877 {
2878 	if (reiserfs_attrs(inode->i_sb)) {
2879 		if (inode->i_flags & S_IMMUTABLE)
2880 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
2881 		else
2882 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2883 		if (inode->i_flags & S_SYNC)
2884 			*sd_attrs |= REISERFS_SYNC_FL;
2885 		else
2886 			*sd_attrs &= ~REISERFS_SYNC_FL;
2887 		if (inode->i_flags & S_NOATIME)
2888 			*sd_attrs |= REISERFS_NOATIME_FL;
2889 		else
2890 			*sd_attrs &= ~REISERFS_NOATIME_FL;
2891 		if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2892 			*sd_attrs |= REISERFS_NOTAIL_FL;
2893 		else
2894 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
2895 	}
2896 }
2897 
2898 /* decide if this buffer needs to stay around for data logging or ordered
2899 ** write purposes
2900 */
2901 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2902 {
2903 	int ret = 1;
2904 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2905 
2906 	lock_buffer(bh);
2907 	spin_lock(&j->j_dirty_buffers_lock);
2908 	if (!buffer_mapped(bh)) {
2909 		goto free_jh;
2910 	}
2911 	/* the page is locked, and the only places that log a data buffer
2912 	 * also lock the page.
2913 	 */
2914 	if (reiserfs_file_data_log(inode)) {
2915 		/*
2916 		 * very conservative, leave the buffer pinned if
2917 		 * anyone might need it.
2918 		 */
2919 		if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2920 			ret = 0;
2921 		}
2922 	} else  if (buffer_dirty(bh)) {
2923 		struct reiserfs_journal_list *jl;
2924 		struct reiserfs_jh *jh = bh->b_private;
2925 
2926 		/* why is this safe?
2927 		 * reiserfs_setattr updates i_size in the on disk
2928 		 * stat data before allowing vmtruncate to be called.
2929 		 *
2930 		 * If buffer was put onto the ordered list for this
2931 		 * transaction, we know for sure either this transaction
2932 		 * or an older one already has updated i_size on disk,
2933 		 * and this ordered data won't be referenced in the file
2934 		 * if we crash.
2935 		 *
2936 		 * if the buffer was put onto the ordered list for an older
2937 		 * transaction, we need to leave it around
2938 		 */
2939 		if (jh && (jl = jh->jl)
2940 		    && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2941 			ret = 0;
2942 	}
2943       free_jh:
2944 	if (ret && bh->b_private) {
2945 		reiserfs_free_jh(bh);
2946 	}
2947 	spin_unlock(&j->j_dirty_buffers_lock);
2948 	unlock_buffer(bh);
2949 	return ret;
2950 }
2951 
2952 /* clm -- taken from fs/buffer.c:block_invalidate_page */
2953 static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
2954 {
2955 	struct buffer_head *head, *bh, *next;
2956 	struct inode *inode = page->mapping->host;
2957 	unsigned int curr_off = 0;
2958 	int ret = 1;
2959 
2960 	BUG_ON(!PageLocked(page));
2961 
2962 	if (offset == 0)
2963 		ClearPageChecked(page);
2964 
2965 	if (!page_has_buffers(page))
2966 		goto out;
2967 
2968 	head = page_buffers(page);
2969 	bh = head;
2970 	do {
2971 		unsigned int next_off = curr_off + bh->b_size;
2972 		next = bh->b_this_page;
2973 
2974 		/*
2975 		 * is this block fully invalidated?
2976 		 */
2977 		if (offset <= curr_off) {
2978 			if (invalidatepage_can_drop(inode, bh))
2979 				reiserfs_unmap_buffer(bh);
2980 			else
2981 				ret = 0;
2982 		}
2983 		curr_off = next_off;
2984 		bh = next;
2985 	} while (bh != head);
2986 
2987 	/*
2988 	 * We release buffers only if the entire page is being invalidated.
2989 	 * The get_block cached value has been unconditionally invalidated,
2990 	 * so real IO is not possible anymore.
2991 	 */
2992 	if (!offset && ret) {
2993 		ret = try_to_release_page(page, 0);
2994 		/* maybe should BUG_ON(!ret); - neilb */
2995 	}
2996       out:
2997 	return;
2998 }
2999 
3000 static int reiserfs_set_page_dirty(struct page *page)
3001 {
3002 	struct inode *inode = page->mapping->host;
3003 	if (reiserfs_file_data_log(inode)) {
3004 		SetPageChecked(page);
3005 		return __set_page_dirty_nobuffers(page);
3006 	}
3007 	return __set_page_dirty_buffers(page);
3008 }
3009 
3010 /*
3011  * Returns 1 if the page's buffers were dropped.  The page is locked.
3012  *
3013  * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
3014  * in the buffers at page_buffers(page).
3015  *
3016  * even in -o notail mode, we can't be sure an old mount without -o notail
3017  * didn't create files with tails.
3018  */
3019 static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3020 {
3021 	struct inode *inode = page->mapping->host;
3022 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3023 	struct buffer_head *head;
3024 	struct buffer_head *bh;
3025 	int ret = 1;
3026 
3027 	WARN_ON(PageChecked(page));
3028 	spin_lock(&j->j_dirty_buffers_lock);
3029 	head = page_buffers(page);
3030 	bh = head;
3031 	do {
3032 		if (bh->b_private) {
3033 			if (!buffer_dirty(bh) && !buffer_locked(bh)) {
3034 				reiserfs_free_jh(bh);
3035 			} else {
3036 				ret = 0;
3037 				break;
3038 			}
3039 		}
3040 		bh = bh->b_this_page;
3041 	} while (bh != head);
3042 	if (ret)
3043 		ret = try_to_free_buffers(page);
3044 	spin_unlock(&j->j_dirty_buffers_lock);
3045 	return ret;
3046 }
3047 
3048 /* We thank Mingming Cao for helping us understand in great detail what
3049    to do in this section of the code. */
3050 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3051 				  const struct iovec *iov, loff_t offset,
3052 				  unsigned long nr_segs)
3053 {
3054 	struct file *file = iocb->ki_filp;
3055 	struct inode *inode = file->f_mapping->host;
3056 
3057 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3058 				  offset, nr_segs,
3059 				  reiserfs_get_blocks_direct_io, NULL);
3060 }
3061 
3062 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3063 {
3064 	struct inode *inode = dentry->d_inode;
3065 	unsigned int ia_valid;
3066 	int depth;
3067 	int error;
3068 
3069 	/* must be turned off for recursive notify_change calls */
3070 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3071 
3072 	depth = reiserfs_write_lock_once(inode->i_sb);
3073 	if (attr->ia_valid & ATTR_SIZE) {
3074 		/* version 2 items will be caught by the s_maxbytes check
3075 		 ** done for us in vmtruncate
3076 		 */
3077 		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3078 		    attr->ia_size > MAX_NON_LFS) {
3079 			error = -EFBIG;
3080 			goto out;
3081 		}
3082 		/* fill in hole pointers in the expanding truncate case. */
3083 		if (attr->ia_size > inode->i_size) {
3084 			error = generic_cont_expand_simple(inode, attr->ia_size);
3085 			if (REISERFS_I(inode)->i_prealloc_count > 0) {
3086 				int err;
3087 				struct reiserfs_transaction_handle th;
3088 				/* we're changing at most 2 bitmaps, inode + super */
3089 				err = journal_begin(&th, inode->i_sb, 4);
3090 				if (!err) {
3091 					reiserfs_discard_prealloc(&th, inode);
3092 					err = journal_end(&th, inode->i_sb, 4);
3093 				}
3094 				if (err)
3095 					error = err;
3096 			}
3097 			if (error)
3098 				goto out;
3099 			/*
3100 			 * file size is changed, ctime and mtime are
3101 			 * to be updated
3102 			 */
3103 			attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
3104 		}
3105 	}
3106 
3107 	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
3108 	     ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
3109 	    (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3110 		/* stat data of format v3.5 has 16 bit uid and gid */
3111 		error = -EINVAL;
3112 		goto out;
3113 	}
3114 
3115 	error = inode_change_ok(inode, attr);
3116 	if (!error) {
3117 		if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3118 		    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3119 			error = reiserfs_chown_xattrs(inode, attr);
3120 
3121 			if (!error) {
3122 				struct reiserfs_transaction_handle th;
3123 				int jbegin_count =
3124 				    2 *
3125 				    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3126 				     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3127 				    2;
3128 
3129 				/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3130 				error =
3131 				    journal_begin(&th, inode->i_sb,
3132 						  jbegin_count);
3133 				if (error)
3134 					goto out;
3135 				error =
3136 				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
3137 				if (error) {
3138 					journal_end(&th, inode->i_sb,
3139 						    jbegin_count);
3140 					goto out;
3141 				}
3142 				/* Update corresponding info in inode so that everything is in
3143 				 * one transaction */
3144 				if (attr->ia_valid & ATTR_UID)
3145 					inode->i_uid = attr->ia_uid;
3146 				if (attr->ia_valid & ATTR_GID)
3147 					inode->i_gid = attr->ia_gid;
3148 				mark_inode_dirty(inode);
3149 				error =
3150 				    journal_end(&th, inode->i_sb, jbegin_count);
3151 			}
3152 		}
3153 		if (!error) {
3154 			/*
3155 			 * Relax the lock here, as it might truncate the
3156 			 * inode pages and wait for inode pages locks.
3157 			 * To release such page lock, the owner needs the
3158 			 * reiserfs lock
3159 			 */
3160 			reiserfs_write_unlock_once(inode->i_sb, depth);
3161 			error = inode_setattr(inode, attr);
3162 			depth = reiserfs_write_lock_once(inode->i_sb);
3163 		}
3164 	}
3165 
3166 	if (!error && reiserfs_posixacl(inode->i_sb)) {
3167 		if (attr->ia_valid & ATTR_MODE)
3168 			error = reiserfs_acl_chmod(inode);
3169 	}
3170 
3171       out:
3172 	reiserfs_write_unlock_once(inode->i_sb, depth);
3173 
3174 	return error;
3175 }
3176 
3177 const struct address_space_operations reiserfs_address_space_operations = {
3178 	.writepage = reiserfs_writepage,
3179 	.readpage = reiserfs_readpage,
3180 	.readpages = reiserfs_readpages,
3181 	.releasepage = reiserfs_releasepage,
3182 	.invalidatepage = reiserfs_invalidatepage,
3183 	.sync_page = block_sync_page,
3184 	.write_begin = reiserfs_write_begin,
3185 	.write_end = reiserfs_write_end,
3186 	.bmap = reiserfs_aop_bmap,
3187 	.direct_IO = reiserfs_direct_IO,
3188 	.set_page_dirty = reiserfs_set_page_dirty,
3189 };
3190