xref: /openbmc/linux/fs/reiserfs/inode.c (revision b6dcefde)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 #include <linux/time.h>
6 #include <linux/fs.h>
7 #include <linux/reiserfs_fs.h>
8 #include <linux/reiserfs_acl.h>
9 #include <linux/reiserfs_xattr.h>
10 #include <linux/exportfs.h>
11 #include <linux/smp_lock.h>
12 #include <linux/pagemap.h>
13 #include <linux/highmem.h>
14 #include <asm/uaccess.h>
15 #include <asm/unaligned.h>
16 #include <linux/buffer_head.h>
17 #include <linux/mpage.h>
18 #include <linux/writeback.h>
19 #include <linux/quotaops.h>
20 #include <linux/swap.h>
21 
22 int reiserfs_commit_write(struct file *f, struct page *page,
23 			  unsigned from, unsigned to);
24 int reiserfs_prepare_write(struct file *f, struct page *page,
25 			   unsigned from, unsigned to);
26 
27 void reiserfs_delete_inode(struct inode *inode)
28 {
29 	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
30 	int jbegin_count =
31 	    JOURNAL_PER_BALANCE_CNT * 2 +
32 	    2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 	struct reiserfs_transaction_handle th;
34 	int depth;
35 	int err;
36 
37 	truncate_inode_pages(&inode->i_data, 0);
38 
39 	depth = reiserfs_write_lock_once(inode->i_sb);
40 
41 	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
42 	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) {	/* also handles bad_inode case */
43 		reiserfs_delete_xattrs(inode);
44 
45 		if (journal_begin(&th, inode->i_sb, jbegin_count))
46 			goto out;
47 		reiserfs_update_inode_transaction(inode);
48 
49 		reiserfs_discard_prealloc(&th, inode);
50 
51 		err = reiserfs_delete_object(&th, inode);
52 
53 		/* Do quota update inside a transaction for journaled quotas. We must do that
54 		 * after delete_object so that quota updates go into the same transaction as
55 		 * stat data deletion */
56 		if (!err)
57 			vfs_dq_free_inode(inode);
58 
59 		if (journal_end(&th, inode->i_sb, jbegin_count))
60 			goto out;
61 
62 		/* check return value from reiserfs_delete_object after
63 		 * ending the transaction
64 		 */
65 		if (err)
66 		    goto out;
67 
68 		/* all items of file are deleted, so we can remove "save" link */
69 		remove_save_link(inode, 0 /* not truncate */ );	/* we can't do anything
70 								 * about an error here */
71 	} else {
72 		/* no object items are in the tree */
73 		;
74 	}
75       out:
76 	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
77 	inode->i_blocks = 0;
78 	reiserfs_write_unlock_once(inode->i_sb, depth);
79 }
80 
81 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
82 			  __u32 objectid, loff_t offset, int type, int length)
83 {
84 	key->version = version;
85 
86 	key->on_disk_key.k_dir_id = dirid;
87 	key->on_disk_key.k_objectid = objectid;
88 	set_cpu_key_k_offset(key, offset);
89 	set_cpu_key_k_type(key, type);
90 	key->key_length = length;
91 }
92 
93 /* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
94    offset and type of key */
95 void make_cpu_key(struct cpu_key *key, struct inode *inode, loff_t offset,
96 		  int type, int length)
97 {
98 	_make_cpu_key(key, get_inode_item_key_version(inode),
99 		      le32_to_cpu(INODE_PKEY(inode)->k_dir_id),
100 		      le32_to_cpu(INODE_PKEY(inode)->k_objectid), offset, type,
101 		      length);
102 }
103 
104 //
105 // when key is 0, do not set version and short key
106 //
107 inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
108 			      int version,
109 			      loff_t offset, int type, int length,
110 			      int entry_count /*or ih_free_space */ )
111 {
112 	if (key) {
113 		ih->ih_key.k_dir_id = cpu_to_le32(key->on_disk_key.k_dir_id);
114 		ih->ih_key.k_objectid =
115 		    cpu_to_le32(key->on_disk_key.k_objectid);
116 	}
117 	put_ih_version(ih, version);
118 	set_le_ih_k_offset(ih, offset);
119 	set_le_ih_k_type(ih, type);
120 	put_ih_item_len(ih, length);
121 	/*    set_ih_free_space (ih, 0); */
122 	// for directory items it is entry count, for directs and stat
123 	// datas - 0xffff, for indirects - 0
124 	put_ih_entry_count(ih, entry_count);
125 }
126 
127 //
128 // FIXME: we might cache recently accessed indirect item
129 
130 // Ugh.  Not too eager for that....
131 //  I cut the code until such time as I see a convincing argument (benchmark).
132 // I don't want a bloated inode struct..., and I don't like code complexity....
133 
134 /* cutting the code is fine, since it really isn't in use yet and is easy
135 ** to add back in.  But, Vladimir has a really good idea here.  Think
136 ** about what happens for reading a file.  For each page,
137 ** The VFS layer calls reiserfs_readpage, who searches the tree to find
138 ** an indirect item.  This indirect item has X number of pointers, where
139 ** X is a big number if we've done the block allocation right.  But,
140 ** we only use one or two of these pointers during each call to readpage,
141 ** needlessly researching again later on.
142 **
143 ** The size of the cache could be dynamic based on the size of the file.
144 **
145 ** I'd also like to see us cache the location the stat data item, since
146 ** we are needlessly researching for that frequently.
147 **
148 ** --chris
149 */
150 
151 /* If this page has a file tail in it, and
152 ** it was read in by get_block_create_0, the page data is valid,
153 ** but tail is still sitting in a direct item, and we can't write to
154 ** it.  So, look through this page, and check all the mapped buffers
155 ** to make sure they have valid block numbers.  Any that don't need
156 ** to be unmapped, so that block_prepare_write will correctly call
157 ** reiserfs_get_block to convert the tail into an unformatted node
158 */
159 static inline void fix_tail_page_for_writing(struct page *page)
160 {
161 	struct buffer_head *head, *next, *bh;
162 
163 	if (page && page_has_buffers(page)) {
164 		head = page_buffers(page);
165 		bh = head;
166 		do {
167 			next = bh->b_this_page;
168 			if (buffer_mapped(bh) && bh->b_blocknr == 0) {
169 				reiserfs_unmap_buffer(bh);
170 			}
171 			bh = next;
172 		} while (bh != head);
173 	}
174 }
175 
176 /* reiserfs_get_block does not need to allocate a block only if it has been
177    done already or non-hole position has been found in the indirect item */
178 static inline int allocation_needed(int retval, b_blocknr_t allocated,
179 				    struct item_head *ih,
180 				    __le32 * item, int pos_in_item)
181 {
182 	if (allocated)
183 		return 0;
184 	if (retval == POSITION_FOUND && is_indirect_le_ih(ih) &&
185 	    get_block_num(item, pos_in_item))
186 		return 0;
187 	return 1;
188 }
189 
190 static inline int indirect_item_found(int retval, struct item_head *ih)
191 {
192 	return (retval == POSITION_FOUND) && is_indirect_le_ih(ih);
193 }
194 
195 static inline void set_block_dev_mapped(struct buffer_head *bh,
196 					b_blocknr_t block, struct inode *inode)
197 {
198 	map_bh(bh, inode->i_sb, block);
199 }
200 
201 //
202 // files which were created in the earlier version can not be longer,
203 // than 2 gb
204 //
205 static int file_capable(struct inode *inode, sector_t block)
206 {
207 	if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 ||	// it is new file.
208 	    block < (1 << (31 - inode->i_sb->s_blocksize_bits)))	// old file, but 'block' is inside of 2gb
209 		return 1;
210 
211 	return 0;
212 }
213 
214 static int restart_transaction(struct reiserfs_transaction_handle *th,
215 			       struct inode *inode, struct treepath *path)
216 {
217 	struct super_block *s = th->t_super;
218 	int len = th->t_blocks_allocated;
219 	int err;
220 
221 	BUG_ON(!th->t_trans_id);
222 	BUG_ON(!th->t_refcount);
223 
224 	pathrelse(path);
225 
226 	/* we cannot restart while nested */
227 	if (th->t_refcount > 1) {
228 		return 0;
229 	}
230 	reiserfs_update_sd(th, inode);
231 	err = journal_end(th, s, len);
232 	if (!err) {
233 		err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6);
234 		if (!err)
235 			reiserfs_update_inode_transaction(inode);
236 	}
237 	return err;
238 }
239 
240 // it is called by get_block when create == 0. Returns block number
241 // for 'block'-th logical block of file. When it hits direct item it
242 // returns 0 (being called from bmap) or read direct item into piece
243 // of page (bh_result)
244 
245 // Please improve the english/clarity in the comment above, as it is
246 // hard to understand.
247 
248 static int _get_block_create_0(struct inode *inode, sector_t block,
249 			       struct buffer_head *bh_result, int args)
250 {
251 	INITIALIZE_PATH(path);
252 	struct cpu_key key;
253 	struct buffer_head *bh;
254 	struct item_head *ih, tmp_ih;
255 	b_blocknr_t blocknr;
256 	char *p = NULL;
257 	int chars;
258 	int ret;
259 	int result;
260 	int done = 0;
261 	unsigned long offset;
262 
263 	// prepare the key to look for the 'block'-th block of file
264 	make_cpu_key(&key, inode,
265 		     (loff_t) block * inode->i_sb->s_blocksize + 1, TYPE_ANY,
266 		     3);
267 
268 	result = search_for_position_by_key(inode->i_sb, &key, &path);
269 	if (result != POSITION_FOUND) {
270 		pathrelse(&path);
271 		if (p)
272 			kunmap(bh_result->b_page);
273 		if (result == IO_ERROR)
274 			return -EIO;
275 		// We do not return -ENOENT if there is a hole but page is uptodate, because it means
276 		// That there is some MMAPED data associated with it that is yet to be written to disk.
277 		if ((args & GET_BLOCK_NO_HOLE)
278 		    && !PageUptodate(bh_result->b_page)) {
279 			return -ENOENT;
280 		}
281 		return 0;
282 	}
283 	//
284 	bh = get_last_bh(&path);
285 	ih = get_ih(&path);
286 	if (is_indirect_le_ih(ih)) {
287 		__le32 *ind_item = (__le32 *) B_I_PITEM(bh, ih);
288 
289 		/* FIXME: here we could cache indirect item or part of it in
290 		   the inode to avoid search_by_key in case of subsequent
291 		   access to file */
292 		blocknr = get_block_num(ind_item, path.pos_in_item);
293 		ret = 0;
294 		if (blocknr) {
295 			map_bh(bh_result, inode->i_sb, blocknr);
296 			if (path.pos_in_item ==
297 			    ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
298 				set_buffer_boundary(bh_result);
299 			}
300 		} else
301 			// We do not return -ENOENT if there is a hole but page is uptodate, because it means
302 			// That there is some MMAPED data associated with it that is yet to  be written to disk.
303 		if ((args & GET_BLOCK_NO_HOLE)
304 			    && !PageUptodate(bh_result->b_page)) {
305 			ret = -ENOENT;
306 		}
307 
308 		pathrelse(&path);
309 		if (p)
310 			kunmap(bh_result->b_page);
311 		return ret;
312 	}
313 	// requested data are in direct item(s)
314 	if (!(args & GET_BLOCK_READ_DIRECT)) {
315 		// we are called by bmap. FIXME: we can not map block of file
316 		// when it is stored in direct item(s)
317 		pathrelse(&path);
318 		if (p)
319 			kunmap(bh_result->b_page);
320 		return -ENOENT;
321 	}
322 
323 	/* if we've got a direct item, and the buffer or page was uptodate,
324 	 ** we don't want to pull data off disk again.  skip to the
325 	 ** end, where we map the buffer and return
326 	 */
327 	if (buffer_uptodate(bh_result)) {
328 		goto finished;
329 	} else
330 		/*
331 		 ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
332 		 ** pages without any buffers.  If the page is up to date, we don't want
333 		 ** read old data off disk.  Set the up to date bit on the buffer instead
334 		 ** and jump to the end
335 		 */
336 	if (!bh_result->b_page || PageUptodate(bh_result->b_page)) {
337 		set_buffer_uptodate(bh_result);
338 		goto finished;
339 	}
340 	// read file tail into part of page
341 	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
342 	copy_item_head(&tmp_ih, ih);
343 
344 	/* we only want to kmap if we are reading the tail into the page.
345 	 ** this is not the common case, so we don't kmap until we are
346 	 ** sure we need to.  But, this means the item might move if
347 	 ** kmap schedules
348 	 */
349 	if (!p)
350 		p = (char *)kmap(bh_result->b_page);
351 
352 	p += offset;
353 	memset(p, 0, inode->i_sb->s_blocksize);
354 	do {
355 		if (!is_direct_le_ih(ih)) {
356 			BUG();
357 		}
358 		/* make sure we don't read more bytes than actually exist in
359 		 ** the file.  This can happen in odd cases where i_size isn't
360 		 ** correct, and when direct item padding results in a few
361 		 ** extra bytes at the end of the direct item
362 		 */
363 		if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
364 			break;
365 		if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
366 			chars =
367 			    inode->i_size - (le_ih_k_offset(ih) - 1) -
368 			    path.pos_in_item;
369 			done = 1;
370 		} else {
371 			chars = ih_item_len(ih) - path.pos_in_item;
372 		}
373 		memcpy(p, B_I_PITEM(bh, ih) + path.pos_in_item, chars);
374 
375 		if (done)
376 			break;
377 
378 		p += chars;
379 
380 		if (PATH_LAST_POSITION(&path) != (B_NR_ITEMS(bh) - 1))
381 			// we done, if read direct item is not the last item of
382 			// node FIXME: we could try to check right delimiting key
383 			// to see whether direct item continues in the right
384 			// neighbor or rely on i_size
385 			break;
386 
387 		// update key to look for the next piece
388 		set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + chars);
389 		result = search_for_position_by_key(inode->i_sb, &key, &path);
390 		if (result != POSITION_FOUND)
391 			// i/o error most likely
392 			break;
393 		bh = get_last_bh(&path);
394 		ih = get_ih(&path);
395 	} while (1);
396 
397 	flush_dcache_page(bh_result->b_page);
398 	kunmap(bh_result->b_page);
399 
400       finished:
401 	pathrelse(&path);
402 
403 	if (result == IO_ERROR)
404 		return -EIO;
405 
406 	/* this buffer has valid data, but isn't valid for io.  mapping it to
407 	 * block #0 tells the rest of reiserfs it just has a tail in it
408 	 */
409 	map_bh(bh_result, inode->i_sb, 0);
410 	set_buffer_uptodate(bh_result);
411 	return 0;
412 }
413 
414 // this is called to create file map. So, _get_block_create_0 will not
415 // read direct item
416 static int reiserfs_bmap(struct inode *inode, sector_t block,
417 			 struct buffer_head *bh_result, int create)
418 {
419 	if (!file_capable(inode, block))
420 		return -EFBIG;
421 
422 	reiserfs_write_lock(inode->i_sb);
423 	/* do not read the direct item */
424 	_get_block_create_0(inode, block, bh_result, 0);
425 	reiserfs_write_unlock(inode->i_sb);
426 	return 0;
427 }
428 
429 /* special version of get_block that is only used by grab_tail_page right
430 ** now.  It is sent to block_prepare_write, and when you try to get a
431 ** block past the end of the file (or a block from a hole) it returns
432 ** -ENOENT instead of a valid buffer.  block_prepare_write expects to
433 ** be able to do i/o on the buffers returned, unless an error value
434 ** is also returned.
435 **
436 ** So, this allows block_prepare_write to be used for reading a single block
437 ** in a page.  Where it does not produce a valid page for holes, or past the
438 ** end of the file.  This turns out to be exactly what we need for reading
439 ** tails for conversion.
440 **
441 ** The point of the wrapper is forcing a certain value for create, even
442 ** though the VFS layer is calling this function with create==1.  If you
443 ** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
444 ** don't use this function.
445 */
446 static int reiserfs_get_block_create_0(struct inode *inode, sector_t block,
447 				       struct buffer_head *bh_result,
448 				       int create)
449 {
450 	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE);
451 }
452 
453 /* This is special helper for reiserfs_get_block in case we are executing
454    direct_IO request. */
455 static int reiserfs_get_blocks_direct_io(struct inode *inode,
456 					 sector_t iblock,
457 					 struct buffer_head *bh_result,
458 					 int create)
459 {
460 	int ret;
461 
462 	bh_result->b_page = NULL;
463 
464 	/* We set the b_size before reiserfs_get_block call since it is
465 	   referenced in convert_tail_for_hole() that may be called from
466 	   reiserfs_get_block() */
467 	bh_result->b_size = (1 << inode->i_blkbits);
468 
469 	ret = reiserfs_get_block(inode, iblock, bh_result,
470 				 create | GET_BLOCK_NO_DANGLE);
471 	if (ret)
472 		goto out;
473 
474 	/* don't allow direct io onto tail pages */
475 	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
476 		/* make sure future calls to the direct io funcs for this offset
477 		 ** in the file fail by unmapping the buffer
478 		 */
479 		clear_buffer_mapped(bh_result);
480 		ret = -EINVAL;
481 	}
482 	/* Possible unpacked tail. Flush the data before pages have
483 	   disappeared */
484 	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
485 		int err;
486 
487 		reiserfs_write_lock(inode->i_sb);
488 
489 		err = reiserfs_commit_for_inode(inode);
490 		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
491 
492 		reiserfs_write_unlock(inode->i_sb);
493 
494 		if (err < 0)
495 			ret = err;
496 	}
497       out:
498 	return ret;
499 }
500 
501 /*
502 ** helper function for when reiserfs_get_block is called for a hole
503 ** but the file tail is still in a direct item
504 ** bh_result is the buffer head for the hole
505 ** tail_offset is the offset of the start of the tail in the file
506 **
507 ** This calls prepare_write, which will start a new transaction
508 ** you should not be in a transaction, or have any paths held when you
509 ** call this.
510 */
511 static int convert_tail_for_hole(struct inode *inode,
512 				 struct buffer_head *bh_result,
513 				 loff_t tail_offset)
514 {
515 	unsigned long index;
516 	unsigned long tail_end;
517 	unsigned long tail_start;
518 	struct page *tail_page;
519 	struct page *hole_page = bh_result->b_page;
520 	int retval = 0;
521 
522 	if ((tail_offset & (bh_result->b_size - 1)) != 1)
523 		return -EIO;
524 
525 	/* always try to read until the end of the block */
526 	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
527 	tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
528 
529 	index = tail_offset >> PAGE_CACHE_SHIFT;
530 	/* hole_page can be zero in case of direct_io, we are sure
531 	   that we cannot get here if we write with O_DIRECT into
532 	   tail page */
533 	if (!hole_page || index != hole_page->index) {
534 		tail_page = grab_cache_page(inode->i_mapping, index);
535 		retval = -ENOMEM;
536 		if (!tail_page) {
537 			goto out;
538 		}
539 	} else {
540 		tail_page = hole_page;
541 	}
542 
543 	/* we don't have to make sure the conversion did not happen while
544 	 ** we were locking the page because anyone that could convert
545 	 ** must first take i_mutex.
546 	 **
547 	 ** We must fix the tail page for writing because it might have buffers
548 	 ** that are mapped, but have a block number of 0.  This indicates tail
549 	 ** data that has been read directly into the page, and block_prepare_write
550 	 ** won't trigger a get_block in this case.
551 	 */
552 	fix_tail_page_for_writing(tail_page);
553 	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
554 	if (retval)
555 		goto unlock;
556 
557 	/* tail conversion might change the data in the page */
558 	flush_dcache_page(tail_page);
559 
560 	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end);
561 
562       unlock:
563 	if (tail_page != hole_page) {
564 		unlock_page(tail_page);
565 		page_cache_release(tail_page);
566 	}
567       out:
568 	return retval;
569 }
570 
571 static inline int _allocate_block(struct reiserfs_transaction_handle *th,
572 				  sector_t block,
573 				  struct inode *inode,
574 				  b_blocknr_t * allocated_block_nr,
575 				  struct treepath *path, int flags)
576 {
577 	BUG_ON(!th->t_trans_id);
578 
579 #ifdef REISERFS_PREALLOCATE
580 	if (!(flags & GET_BLOCK_NO_IMUX)) {
581 		return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr,
582 						  path, block);
583 	}
584 #endif
585 	return reiserfs_new_unf_blocknrs(th, inode, allocated_block_nr, path,
586 					 block);
587 }
588 
589 int reiserfs_get_block(struct inode *inode, sector_t block,
590 		       struct buffer_head *bh_result, int create)
591 {
592 	int repeat, retval = 0;
593 	b_blocknr_t allocated_block_nr = 0;	// b_blocknr_t is (unsigned) 32 bit int
594 	INITIALIZE_PATH(path);
595 	int pos_in_item;
596 	struct cpu_key key;
597 	struct buffer_head *bh, *unbh = NULL;
598 	struct item_head *ih, tmp_ih;
599 	__le32 *item;
600 	int done;
601 	int fs_gen;
602 	int lock_depth;
603 	struct reiserfs_transaction_handle *th = NULL;
604 	/* space reserved in transaction batch:
605 	   . 3 balancings in direct->indirect conversion
606 	   . 1 block involved into reiserfs_update_sd()
607 	   XXX in practically impossible worst case direct2indirect()
608 	   can incur (much) more than 3 balancings.
609 	   quota update for user, group */
610 	int jbegin_count =
611 	    JOURNAL_PER_BALANCE_CNT * 3 + 1 +
612 	    2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
613 	int version;
614 	int dangle = 1;
615 	loff_t new_offset =
616 	    (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1;
617 
618 	lock_depth = reiserfs_write_lock_once(inode->i_sb);
619 	version = get_inode_item_key_version(inode);
620 
621 	if (!file_capable(inode, block)) {
622 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
623 		return -EFBIG;
624 	}
625 
626 	/* if !create, we aren't changing the FS, so we don't need to
627 	 ** log anything, so we don't need to start a transaction
628 	 */
629 	if (!(create & GET_BLOCK_CREATE)) {
630 		int ret;
631 		/* find number of block-th logical block of the file */
632 		ret = _get_block_create_0(inode, block, bh_result,
633 					  create | GET_BLOCK_READ_DIRECT);
634 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
635 		return ret;
636 	}
637 	/*
638 	 * if we're already in a transaction, make sure to close
639 	 * any new transactions we start in this func
640 	 */
641 	if ((create & GET_BLOCK_NO_DANGLE) ||
642 	    reiserfs_transaction_running(inode->i_sb))
643 		dangle = 0;
644 
645 	/* If file is of such a size, that it might have a tail and tails are enabled
646 	 ** we should mark it as possibly needing tail packing on close
647 	 */
648 	if ((have_large_tails(inode->i_sb)
649 	     && inode->i_size < i_block_size(inode) * 4)
650 	    || (have_small_tails(inode->i_sb)
651 		&& inode->i_size < i_block_size(inode)))
652 		REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
653 
654 	/* set the key of the first byte in the 'block'-th block of file */
655 	make_cpu_key(&key, inode, new_offset, TYPE_ANY, 3 /*key length */ );
656 	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
657 	      start_trans:
658 		th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
659 		if (!th) {
660 			retval = -ENOMEM;
661 			goto failure;
662 		}
663 		reiserfs_update_inode_transaction(inode);
664 	}
665       research:
666 
667 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
668 	if (retval == IO_ERROR) {
669 		retval = -EIO;
670 		goto failure;
671 	}
672 
673 	bh = get_last_bh(&path);
674 	ih = get_ih(&path);
675 	item = get_item(&path);
676 	pos_in_item = path.pos_in_item;
677 
678 	fs_gen = get_generation(inode->i_sb);
679 	copy_item_head(&tmp_ih, ih);
680 
681 	if (allocation_needed
682 	    (retval, allocated_block_nr, ih, item, pos_in_item)) {
683 		/* we have to allocate block for the unformatted node */
684 		if (!th) {
685 			pathrelse(&path);
686 			goto start_trans;
687 		}
688 
689 		repeat =
690 		    _allocate_block(th, block, inode, &allocated_block_nr,
691 				    &path, create);
692 
693 		if (repeat == NO_DISK_SPACE || repeat == QUOTA_EXCEEDED) {
694 			/* restart the transaction to give the journal a chance to free
695 			 ** some blocks.  releases the path, so we have to go back to
696 			 ** research if we succeed on the second try
697 			 */
698 			SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
699 			retval = restart_transaction(th, inode, &path);
700 			if (retval)
701 				goto failure;
702 			repeat =
703 			    _allocate_block(th, block, inode,
704 					    &allocated_block_nr, NULL, create);
705 
706 			if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
707 				goto research;
708 			}
709 			if (repeat == QUOTA_EXCEEDED)
710 				retval = -EDQUOT;
711 			else
712 				retval = -ENOSPC;
713 			goto failure;
714 		}
715 
716 		if (fs_changed(fs_gen, inode->i_sb)
717 		    && item_moved(&tmp_ih, &path)) {
718 			goto research;
719 		}
720 	}
721 
722 	if (indirect_item_found(retval, ih)) {
723 		b_blocknr_t unfm_ptr;
724 		/* 'block'-th block is in the file already (there is
725 		   corresponding cell in some indirect item). But it may be
726 		   zero unformatted node pointer (hole) */
727 		unfm_ptr = get_block_num(item, pos_in_item);
728 		if (unfm_ptr == 0) {
729 			/* use allocated block to plug the hole */
730 			reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
731 			if (fs_changed(fs_gen, inode->i_sb)
732 			    && item_moved(&tmp_ih, &path)) {
733 				reiserfs_restore_prepared_buffer(inode->i_sb,
734 								 bh);
735 				goto research;
736 			}
737 			set_buffer_new(bh_result);
738 			if (buffer_dirty(bh_result)
739 			    && reiserfs_data_ordered(inode->i_sb))
740 				reiserfs_add_ordered_list(inode, bh_result);
741 			put_block_num(item, pos_in_item, allocated_block_nr);
742 			unfm_ptr = allocated_block_nr;
743 			journal_mark_dirty(th, inode->i_sb, bh);
744 			reiserfs_update_sd(th, inode);
745 		}
746 		set_block_dev_mapped(bh_result, unfm_ptr, inode);
747 		pathrelse(&path);
748 		retval = 0;
749 		if (!dangle && th)
750 			retval = reiserfs_end_persistent_transaction(th);
751 
752 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
753 
754 		/* the item was found, so new blocks were not added to the file
755 		 ** there is no need to make sure the inode is updated with this
756 		 ** transaction
757 		 */
758 		return retval;
759 	}
760 
761 	if (!th) {
762 		pathrelse(&path);
763 		goto start_trans;
764 	}
765 
766 	/* desired position is not found or is in the direct item. We have
767 	   to append file with holes up to 'block'-th block converting
768 	   direct items to indirect one if necessary */
769 	done = 0;
770 	do {
771 		if (is_statdata_le_ih(ih)) {
772 			__le32 unp = 0;
773 			struct cpu_key tmp_key;
774 
775 			/* indirect item has to be inserted */
776 			make_le_item_head(&tmp_ih, &key, version, 1,
777 					  TYPE_INDIRECT, UNFM_P_SIZE,
778 					  0 /* free_space */ );
779 
780 			if (cpu_key_k_offset(&key) == 1) {
781 				/* we are going to add 'block'-th block to the file. Use
782 				   allocated block for that */
783 				unp = cpu_to_le32(allocated_block_nr);
784 				set_block_dev_mapped(bh_result,
785 						     allocated_block_nr, inode);
786 				set_buffer_new(bh_result);
787 				done = 1;
788 			}
789 			tmp_key = key;	// ;)
790 			set_cpu_key_k_offset(&tmp_key, 1);
791 			PATH_LAST_POSITION(&path)++;
792 
793 			retval =
794 			    reiserfs_insert_item(th, &path, &tmp_key, &tmp_ih,
795 						 inode, (char *)&unp);
796 			if (retval) {
797 				reiserfs_free_block(th, inode,
798 						    allocated_block_nr, 1);
799 				goto failure;	// retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
800 			}
801 			//mark_tail_converted (inode);
802 		} else if (is_direct_le_ih(ih)) {
803 			/* direct item has to be converted */
804 			loff_t tail_offset;
805 
806 			tail_offset =
807 			    ((le_ih_k_offset(ih) -
808 			      1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
809 			if (tail_offset == cpu_key_k_offset(&key)) {
810 				/* direct item we just found fits into block we have
811 				   to map. Convert it into unformatted node: use
812 				   bh_result for the conversion */
813 				set_block_dev_mapped(bh_result,
814 						     allocated_block_nr, inode);
815 				unbh = bh_result;
816 				done = 1;
817 			} else {
818 				/* we have to padd file tail stored in direct item(s)
819 				   up to block size and convert it to unformatted
820 				   node. FIXME: this should also get into page cache */
821 
822 				pathrelse(&path);
823 				/*
824 				 * ugly, but we can only end the transaction if
825 				 * we aren't nested
826 				 */
827 				BUG_ON(!th->t_refcount);
828 				if (th->t_refcount == 1) {
829 					retval =
830 					    reiserfs_end_persistent_transaction
831 					    (th);
832 					th = NULL;
833 					if (retval)
834 						goto failure;
835 				}
836 
837 				retval =
838 				    convert_tail_for_hole(inode, bh_result,
839 							  tail_offset);
840 				if (retval) {
841 					if (retval != -ENOSPC)
842 						reiserfs_error(inode->i_sb,
843 							"clm-6004",
844 							"convert tail failed "
845 							"inode %lu, error %d",
846 							inode->i_ino,
847 							retval);
848 					if (allocated_block_nr) {
849 						/* the bitmap, the super, and the stat data == 3 */
850 						if (!th)
851 							th = reiserfs_persistent_transaction(inode->i_sb, 3);
852 						if (th)
853 							reiserfs_free_block(th,
854 									    inode,
855 									    allocated_block_nr,
856 									    1);
857 					}
858 					goto failure;
859 				}
860 				goto research;
861 			}
862 			retval =
863 			    direct2indirect(th, inode, &path, unbh,
864 					    tail_offset);
865 			if (retval) {
866 				reiserfs_unmap_buffer(unbh);
867 				reiserfs_free_block(th, inode,
868 						    allocated_block_nr, 1);
869 				goto failure;
870 			}
871 			/* it is important the set_buffer_uptodate is done after
872 			 ** the direct2indirect.  The buffer might contain valid
873 			 ** data newer than the data on disk (read by readpage, changed,
874 			 ** and then sent here by writepage).  direct2indirect needs
875 			 ** to know if unbh was already up to date, so it can decide
876 			 ** if the data in unbh needs to be replaced with data from
877 			 ** the disk
878 			 */
879 			set_buffer_uptodate(unbh);
880 
881 			/* unbh->b_page == NULL in case of DIRECT_IO request, this means
882 			   buffer will disappear shortly, so it should not be added to
883 			 */
884 			if (unbh->b_page) {
885 				/* we've converted the tail, so we must
886 				 ** flush unbh before the transaction commits
887 				 */
888 				reiserfs_add_tail_list(inode, unbh);
889 
890 				/* mark it dirty now to prevent commit_write from adding
891 				 ** this buffer to the inode's dirty buffer list
892 				 */
893 				/*
894 				 * AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
895 				 * It's still atomic, but it sets the page dirty too,
896 				 * which makes it eligible for writeback at any time by the
897 				 * VM (which was also the case with __mark_buffer_dirty())
898 				 */
899 				mark_buffer_dirty(unbh);
900 			}
901 		} else {
902 			/* append indirect item with holes if needed, when appending
903 			   pointer to 'block'-th block use block, which is already
904 			   allocated */
905 			struct cpu_key tmp_key;
906 			unp_t unf_single = 0;	// We use this in case we need to allocate only
907 			// one block which is a fastpath
908 			unp_t *un;
909 			__u64 max_to_insert =
910 			    MAX_ITEM_LEN(inode->i_sb->s_blocksize) /
911 			    UNFM_P_SIZE;
912 			__u64 blocks_needed;
913 
914 			RFALSE(pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
915 			       "vs-804: invalid position for append");
916 			/* indirect item has to be appended, set up key of that position */
917 			make_cpu_key(&tmp_key, inode,
918 				     le_key_k_offset(version,
919 						     &(ih->ih_key)) +
920 				     op_bytes_number(ih,
921 						     inode->i_sb->s_blocksize),
922 				     //pos_in_item * inode->i_sb->s_blocksize,
923 				     TYPE_INDIRECT, 3);	// key type is unimportant
924 
925 			RFALSE(cpu_key_k_offset(&tmp_key) > cpu_key_k_offset(&key),
926 			       "green-805: invalid offset");
927 			blocks_needed =
928 			    1 +
929 			    ((cpu_key_k_offset(&key) -
930 			      cpu_key_k_offset(&tmp_key)) >> inode->i_sb->
931 			     s_blocksize_bits);
932 
933 			if (blocks_needed == 1) {
934 				un = &unf_single;
935 			} else {
936 				un = kzalloc(min(blocks_needed, max_to_insert) * UNFM_P_SIZE, GFP_NOFS);
937 				if (!un) {
938 					un = &unf_single;
939 					blocks_needed = 1;
940 					max_to_insert = 0;
941 				}
942 			}
943 			if (blocks_needed <= max_to_insert) {
944 				/* we are going to add target block to the file. Use allocated
945 				   block for that */
946 				un[blocks_needed - 1] =
947 				    cpu_to_le32(allocated_block_nr);
948 				set_block_dev_mapped(bh_result,
949 						     allocated_block_nr, inode);
950 				set_buffer_new(bh_result);
951 				done = 1;
952 			} else {
953 				/* paste hole to the indirect item */
954 				/* If kmalloc failed, max_to_insert becomes zero and it means we
955 				   only have space for one block */
956 				blocks_needed =
957 				    max_to_insert ? max_to_insert : 1;
958 			}
959 			retval =
960 			    reiserfs_paste_into_item(th, &path, &tmp_key, inode,
961 						     (char *)un,
962 						     UNFM_P_SIZE *
963 						     blocks_needed);
964 
965 			if (blocks_needed != 1)
966 				kfree(un);
967 
968 			if (retval) {
969 				reiserfs_free_block(th, inode,
970 						    allocated_block_nr, 1);
971 				goto failure;
972 			}
973 			if (!done) {
974 				/* We need to mark new file size in case this function will be
975 				   interrupted/aborted later on. And we may do this only for
976 				   holes. */
977 				inode->i_size +=
978 				    inode->i_sb->s_blocksize * blocks_needed;
979 			}
980 		}
981 
982 		if (done == 1)
983 			break;
984 
985 		/* this loop could log more blocks than we had originally asked
986 		 ** for.  So, we have to allow the transaction to end if it is
987 		 ** too big or too full.  Update the inode so things are
988 		 ** consistent if we crash before the function returns
989 		 **
990 		 ** release the path so that anybody waiting on the path before
991 		 ** ending their transaction will be able to continue.
992 		 */
993 		if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
994 			retval = restart_transaction(th, inode, &path);
995 			if (retval)
996 				goto failure;
997 		}
998 		/*
999 		 * inserting indirect pointers for a hole can take a
1000 		 * long time.  reschedule if needed and also release the write
1001 		 * lock for others.
1002 		 */
1003 		if (need_resched()) {
1004 			reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1005 			schedule();
1006 			lock_depth = reiserfs_write_lock_once(inode->i_sb);
1007 		}
1008 
1009 		retval = search_for_position_by_key(inode->i_sb, &key, &path);
1010 		if (retval == IO_ERROR) {
1011 			retval = -EIO;
1012 			goto failure;
1013 		}
1014 		if (retval == POSITION_FOUND) {
1015 			reiserfs_warning(inode->i_sb, "vs-825",
1016 					 "%K should not be found", &key);
1017 			retval = -EEXIST;
1018 			if (allocated_block_nr)
1019 				reiserfs_free_block(th, inode,
1020 						    allocated_block_nr, 1);
1021 			pathrelse(&path);
1022 			goto failure;
1023 		}
1024 		bh = get_last_bh(&path);
1025 		ih = get_ih(&path);
1026 		item = get_item(&path);
1027 		pos_in_item = path.pos_in_item;
1028 	} while (1);
1029 
1030 	retval = 0;
1031 
1032       failure:
1033 	if (th && (!dangle || (retval && !th->t_trans_id))) {
1034 		int err;
1035 		if (th->t_trans_id)
1036 			reiserfs_update_sd(th, inode);
1037 		err = reiserfs_end_persistent_transaction(th);
1038 		if (err)
1039 			retval = err;
1040 	}
1041 
1042 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
1043 	reiserfs_check_path(&path);
1044 	return retval;
1045 }
1046 
1047 static int
1048 reiserfs_readpages(struct file *file, struct address_space *mapping,
1049 		   struct list_head *pages, unsigned nr_pages)
1050 {
1051 	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
1052 }
1053 
1054 /* Compute real number of used bytes by file
1055  * Following three functions can go away when we'll have enough space in stat item
1056  */
1057 static int real_space_diff(struct inode *inode, int sd_size)
1058 {
1059 	int bytes;
1060 	loff_t blocksize = inode->i_sb->s_blocksize;
1061 
1062 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode))
1063 		return sd_size;
1064 
1065 	/* End of file is also in full block with indirect reference, so round
1066 	 ** up to the next block.
1067 	 **
1068 	 ** there is just no way to know if the tail is actually packed
1069 	 ** on the file, so we have to assume it isn't.  When we pack the
1070 	 ** tail, we add 4 bytes to pretend there really is an unformatted
1071 	 ** node pointer
1072 	 */
1073 	bytes =
1074 	    ((inode->i_size +
1075 	      (blocksize - 1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE +
1076 	    sd_size;
1077 	return bytes;
1078 }
1079 
1080 static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
1081 					int sd_size)
1082 {
1083 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1084 		return inode->i_size +
1085 		    (loff_t) (real_space_diff(inode, sd_size));
1086 	}
1087 	return ((loff_t) real_space_diff(inode, sd_size)) +
1088 	    (((loff_t) blocks) << 9);
1089 }
1090 
1091 /* Compute number of blocks used by file in ReiserFS counting */
1092 static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
1093 {
1094 	loff_t bytes = inode_get_bytes(inode);
1095 	loff_t real_space = real_space_diff(inode, sd_size);
1096 
1097 	/* keeps fsck and non-quota versions of reiserfs happy */
1098 	if (S_ISLNK(inode->i_mode) || S_ISDIR(inode->i_mode)) {
1099 		bytes += (loff_t) 511;
1100 	}
1101 
1102 	/* files from before the quota patch might i_blocks such that
1103 	 ** bytes < real_space.  Deal with that here to prevent it from
1104 	 ** going negative.
1105 	 */
1106 	if (bytes < real_space)
1107 		return 0;
1108 	return (bytes - real_space) >> 9;
1109 }
1110 
1111 //
1112 // BAD: new directories have stat data of new type and all other items
1113 // of old type. Version stored in the inode says about body items, so
1114 // in update_stat_data we can not rely on inode, but have to check
1115 // item version directly
1116 //
1117 
1118 // called by read_locked_inode
1119 static void init_inode(struct inode *inode, struct treepath *path)
1120 {
1121 	struct buffer_head *bh;
1122 	struct item_head *ih;
1123 	__u32 rdev;
1124 	//int version = ITEM_VERSION_1;
1125 
1126 	bh = PATH_PLAST_BUFFER(path);
1127 	ih = PATH_PITEM_HEAD(path);
1128 
1129 	copy_key(INODE_PKEY(inode), &(ih->ih_key));
1130 
1131 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1132 	REISERFS_I(inode)->i_flags = 0;
1133 	REISERFS_I(inode)->i_prealloc_block = 0;
1134 	REISERFS_I(inode)->i_prealloc_count = 0;
1135 	REISERFS_I(inode)->i_trans_id = 0;
1136 	REISERFS_I(inode)->i_jl = NULL;
1137 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1138 	reiserfs_init_xattr_rwsem(inode);
1139 
1140 	if (stat_data_v1(ih)) {
1141 		struct stat_data_v1 *sd =
1142 		    (struct stat_data_v1 *)B_I_PITEM(bh, ih);
1143 		unsigned long blocks;
1144 
1145 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1146 		set_inode_sd_version(inode, STAT_DATA_V1);
1147 		inode->i_mode = sd_v1_mode(sd);
1148 		inode->i_nlink = sd_v1_nlink(sd);
1149 		inode->i_uid = sd_v1_uid(sd);
1150 		inode->i_gid = sd_v1_gid(sd);
1151 		inode->i_size = sd_v1_size(sd);
1152 		inode->i_atime.tv_sec = sd_v1_atime(sd);
1153 		inode->i_mtime.tv_sec = sd_v1_mtime(sd);
1154 		inode->i_ctime.tv_sec = sd_v1_ctime(sd);
1155 		inode->i_atime.tv_nsec = 0;
1156 		inode->i_ctime.tv_nsec = 0;
1157 		inode->i_mtime.tv_nsec = 0;
1158 
1159 		inode->i_blocks = sd_v1_blocks(sd);
1160 		inode->i_generation = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1161 		blocks = (inode->i_size + 511) >> 9;
1162 		blocks = _ROUND_UP(blocks, inode->i_sb->s_blocksize >> 9);
1163 		if (inode->i_blocks > blocks) {
1164 			// there was a bug in <=3.5.23 when i_blocks could take negative
1165 			// values. Starting from 3.5.17 this value could even be stored in
1166 			// stat data. For such files we set i_blocks based on file
1167 			// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
1168 			// only updated if file's inode will ever change
1169 			inode->i_blocks = blocks;
1170 		}
1171 
1172 		rdev = sd_v1_rdev(sd);
1173 		REISERFS_I(inode)->i_first_direct_byte =
1174 		    sd_v1_first_direct_byte(sd);
1175 		/* an early bug in the quota code can give us an odd number for the
1176 		 ** block count.  This is incorrect, fix it here.
1177 		 */
1178 		if (inode->i_blocks & 1) {
1179 			inode->i_blocks++;
1180 		}
1181 		inode_set_bytes(inode,
1182 				to_real_used_space(inode, inode->i_blocks,
1183 						   SD_V1_SIZE));
1184 		/* nopack is initially zero for v1 objects. For v2 objects,
1185 		   nopack is initialised from sd_attrs */
1186 		REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
1187 	} else {
1188 		// new stat data found, but object may have old items
1189 		// (directories and symlinks)
1190 		struct stat_data *sd = (struct stat_data *)B_I_PITEM(bh, ih);
1191 
1192 		inode->i_mode = sd_v2_mode(sd);
1193 		inode->i_nlink = sd_v2_nlink(sd);
1194 		inode->i_uid = sd_v2_uid(sd);
1195 		inode->i_size = sd_v2_size(sd);
1196 		inode->i_gid = sd_v2_gid(sd);
1197 		inode->i_mtime.tv_sec = sd_v2_mtime(sd);
1198 		inode->i_atime.tv_sec = sd_v2_atime(sd);
1199 		inode->i_ctime.tv_sec = sd_v2_ctime(sd);
1200 		inode->i_ctime.tv_nsec = 0;
1201 		inode->i_mtime.tv_nsec = 0;
1202 		inode->i_atime.tv_nsec = 0;
1203 		inode->i_blocks = sd_v2_blocks(sd);
1204 		rdev = sd_v2_rdev(sd);
1205 		if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1206 			inode->i_generation =
1207 			    le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1208 		else
1209 			inode->i_generation = sd_v2_generation(sd);
1210 
1211 		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1212 			set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1213 		else
1214 			set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1215 		REISERFS_I(inode)->i_first_direct_byte = 0;
1216 		set_inode_sd_version(inode, STAT_DATA_V2);
1217 		inode_set_bytes(inode,
1218 				to_real_used_space(inode, inode->i_blocks,
1219 						   SD_V2_SIZE));
1220 		/* read persistent inode attributes from sd and initalise
1221 		   generic inode flags from them */
1222 		REISERFS_I(inode)->i_attrs = sd_v2_attrs(sd);
1223 		sd_attrs_to_i_attrs(sd_v2_attrs(sd), inode);
1224 	}
1225 
1226 	pathrelse(path);
1227 	if (S_ISREG(inode->i_mode)) {
1228 		inode->i_op = &reiserfs_file_inode_operations;
1229 		inode->i_fop = &reiserfs_file_operations;
1230 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1231 	} else if (S_ISDIR(inode->i_mode)) {
1232 		inode->i_op = &reiserfs_dir_inode_operations;
1233 		inode->i_fop = &reiserfs_dir_operations;
1234 	} else if (S_ISLNK(inode->i_mode)) {
1235 		inode->i_op = &reiserfs_symlink_inode_operations;
1236 		inode->i_mapping->a_ops = &reiserfs_address_space_operations;
1237 	} else {
1238 		inode->i_blocks = 0;
1239 		inode->i_op = &reiserfs_special_inode_operations;
1240 		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
1241 	}
1242 }
1243 
1244 // update new stat data with inode fields
1245 static void inode2sd(void *sd, struct inode *inode, loff_t size)
1246 {
1247 	struct stat_data *sd_v2 = (struct stat_data *)sd;
1248 	__u16 flags;
1249 
1250 	set_sd_v2_mode(sd_v2, inode->i_mode);
1251 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
1252 	set_sd_v2_uid(sd_v2, inode->i_uid);
1253 	set_sd_v2_size(sd_v2, size);
1254 	set_sd_v2_gid(sd_v2, inode->i_gid);
1255 	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
1256 	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
1257 	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
1258 	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
1259 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1260 		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
1261 	else
1262 		set_sd_v2_generation(sd_v2, inode->i_generation);
1263 	flags = REISERFS_I(inode)->i_attrs;
1264 	i_attrs_to_sd_attrs(inode, &flags);
1265 	set_sd_v2_attrs(sd_v2, flags);
1266 }
1267 
1268 // used to copy inode's fields to old stat data
1269 static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
1270 {
1271 	struct stat_data_v1 *sd_v1 = (struct stat_data_v1 *)sd;
1272 
1273 	set_sd_v1_mode(sd_v1, inode->i_mode);
1274 	set_sd_v1_uid(sd_v1, inode->i_uid);
1275 	set_sd_v1_gid(sd_v1, inode->i_gid);
1276 	set_sd_v1_nlink(sd_v1, inode->i_nlink);
1277 	set_sd_v1_size(sd_v1, size);
1278 	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
1279 	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
1280 	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
1281 
1282 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
1283 		set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
1284 	else
1285 		set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
1286 
1287 	// Sigh. i_first_direct_byte is back
1288 	set_sd_v1_first_direct_byte(sd_v1,
1289 				    REISERFS_I(inode)->i_first_direct_byte);
1290 }
1291 
1292 /* NOTE, you must prepare the buffer head before sending it here,
1293 ** and then log it after the call
1294 */
1295 static void update_stat_data(struct treepath *path, struct inode *inode,
1296 			     loff_t size)
1297 {
1298 	struct buffer_head *bh;
1299 	struct item_head *ih;
1300 
1301 	bh = PATH_PLAST_BUFFER(path);
1302 	ih = PATH_PITEM_HEAD(path);
1303 
1304 	if (!is_statdata_le_ih(ih))
1305 		reiserfs_panic(inode->i_sb, "vs-13065", "key %k, found item %h",
1306 			       INODE_PKEY(inode), ih);
1307 
1308 	if (stat_data_v1(ih)) {
1309 		// path points to old stat data
1310 		inode2sd_v1(B_I_PITEM(bh, ih), inode, size);
1311 	} else {
1312 		inode2sd(B_I_PITEM(bh, ih), inode, size);
1313 	}
1314 
1315 	return;
1316 }
1317 
1318 void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th,
1319 			     struct inode *inode, loff_t size)
1320 {
1321 	struct cpu_key key;
1322 	INITIALIZE_PATH(path);
1323 	struct buffer_head *bh;
1324 	int fs_gen;
1325 	struct item_head *ih, tmp_ih;
1326 	int retval;
1327 
1328 	BUG_ON(!th->t_trans_id);
1329 
1330 	make_cpu_key(&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);	//key type is unimportant
1331 
1332 	for (;;) {
1333 		int pos;
1334 		/* look for the object's stat data */
1335 		retval = search_item(inode->i_sb, &key, &path);
1336 		if (retval == IO_ERROR) {
1337 			reiserfs_error(inode->i_sb, "vs-13050",
1338 				       "i/o failure occurred trying to "
1339 				       "update %K stat data", &key);
1340 			return;
1341 		}
1342 		if (retval == ITEM_NOT_FOUND) {
1343 			pos = PATH_LAST_POSITION(&path);
1344 			pathrelse(&path);
1345 			if (inode->i_nlink == 0) {
1346 				/*reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found"); */
1347 				return;
1348 			}
1349 			reiserfs_warning(inode->i_sb, "vs-13060",
1350 					 "stat data of object %k (nlink == %d) "
1351 					 "not found (pos %d)",
1352 					 INODE_PKEY(inode), inode->i_nlink,
1353 					 pos);
1354 			reiserfs_check_path(&path);
1355 			return;
1356 		}
1357 
1358 		/* sigh, prepare_for_journal might schedule.  When it schedules the
1359 		 ** FS might change.  We have to detect that, and loop back to the
1360 		 ** search if the stat data item has moved
1361 		 */
1362 		bh = get_last_bh(&path);
1363 		ih = get_ih(&path);
1364 		copy_item_head(&tmp_ih, ih);
1365 		fs_gen = get_generation(inode->i_sb);
1366 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
1367 		if (fs_changed(fs_gen, inode->i_sb)
1368 		    && item_moved(&tmp_ih, &path)) {
1369 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
1370 			continue;	/* Stat_data item has been moved after scheduling. */
1371 		}
1372 		break;
1373 	}
1374 	update_stat_data(&path, inode, size);
1375 	journal_mark_dirty(th, th->t_super, bh);
1376 	pathrelse(&path);
1377 	return;
1378 }
1379 
1380 /* reiserfs_read_locked_inode is called to read the inode off disk, and it
1381 ** does a make_bad_inode when things go wrong.  But, we need to make sure
1382 ** and clear the key in the private portion of the inode, otherwise a
1383 ** corresponding iput might try to delete whatever object the inode last
1384 ** represented.
1385 */
1386 static void reiserfs_make_bad_inode(struct inode *inode)
1387 {
1388 	memset(INODE_PKEY(inode), 0, KEY_SIZE);
1389 	make_bad_inode(inode);
1390 }
1391 
1392 //
1393 // initially this function was derived from minix or ext2's analog and
1394 // evolved as the prototype did
1395 //
1396 
1397 int reiserfs_init_locked_inode(struct inode *inode, void *p)
1398 {
1399 	struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p;
1400 	inode->i_ino = args->objectid;
1401 	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
1402 	return 0;
1403 }
1404 
1405 /* looks for stat data in the tree, and fills up the fields of in-core
1406    inode stat data fields */
1407 void reiserfs_read_locked_inode(struct inode *inode,
1408 				struct reiserfs_iget_args *args)
1409 {
1410 	INITIALIZE_PATH(path_to_sd);
1411 	struct cpu_key key;
1412 	unsigned long dirino;
1413 	int retval;
1414 
1415 	dirino = args->dirid;
1416 
1417 	/* set version 1, version 2 could be used too, because stat data
1418 	   key is the same in both versions */
1419 	key.version = KEY_FORMAT_3_5;
1420 	key.on_disk_key.k_dir_id = dirino;
1421 	key.on_disk_key.k_objectid = inode->i_ino;
1422 	key.on_disk_key.k_offset = 0;
1423 	key.on_disk_key.k_type = 0;
1424 
1425 	/* look for the object's stat data */
1426 	retval = search_item(inode->i_sb, &key, &path_to_sd);
1427 	if (retval == IO_ERROR) {
1428 		reiserfs_error(inode->i_sb, "vs-13070",
1429 			       "i/o failure occurred trying to find "
1430 			       "stat data of %K", &key);
1431 		reiserfs_make_bad_inode(inode);
1432 		return;
1433 	}
1434 	if (retval != ITEM_FOUND) {
1435 		/* a stale NFS handle can trigger this without it being an error */
1436 		pathrelse(&path_to_sd);
1437 		reiserfs_make_bad_inode(inode);
1438 		inode->i_nlink = 0;
1439 		return;
1440 	}
1441 
1442 	init_inode(inode, &path_to_sd);
1443 
1444 	/* It is possible that knfsd is trying to access inode of a file
1445 	   that is being removed from the disk by some other thread. As we
1446 	   update sd on unlink all that is required is to check for nlink
1447 	   here. This bug was first found by Sizif when debugging
1448 	   SquidNG/Butterfly, forgotten, and found again after Philippe
1449 	   Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
1450 
1451 	   More logical fix would require changes in fs/inode.c:iput() to
1452 	   remove inode from hash-table _after_ fs cleaned disk stuff up and
1453 	   in iget() to return NULL if I_FREEING inode is found in
1454 	   hash-table. */
1455 	/* Currently there is one place where it's ok to meet inode with
1456 	   nlink==0: processing of open-unlinked and half-truncated files
1457 	   during mount (fs/reiserfs/super.c:finish_unfinished()). */
1458 	if ((inode->i_nlink == 0) &&
1459 	    !REISERFS_SB(inode->i_sb)->s_is_unlinked_ok) {
1460 		reiserfs_warning(inode->i_sb, "vs-13075",
1461 				 "dead inode read from disk %K. "
1462 				 "This is likely to be race with knfsd. Ignore",
1463 				 &key);
1464 		reiserfs_make_bad_inode(inode);
1465 	}
1466 
1467 	reiserfs_check_path(&path_to_sd);	/* init inode should be relsing */
1468 
1469 }
1470 
1471 /**
1472  * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
1473  *
1474  * @inode:    inode from hash table to check
1475  * @opaque:   "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
1476  *
1477  * This function is called by iget5_locked() to distinguish reiserfs inodes
1478  * having the same inode numbers. Such inodes can only exist due to some
1479  * error condition. One of them should be bad. Inodes with identical
1480  * inode numbers (objectids) are distinguished by parent directory ids.
1481  *
1482  */
1483 int reiserfs_find_actor(struct inode *inode, void *opaque)
1484 {
1485 	struct reiserfs_iget_args *args;
1486 
1487 	args = opaque;
1488 	/* args is already in CPU order */
1489 	return (inode->i_ino == args->objectid) &&
1490 	    (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
1491 }
1492 
1493 struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
1494 {
1495 	struct inode *inode;
1496 	struct reiserfs_iget_args args;
1497 
1498 	args.objectid = key->on_disk_key.k_objectid;
1499 	args.dirid = key->on_disk_key.k_dir_id;
1500 	reiserfs_write_unlock(s);
1501 	inode = iget5_locked(s, key->on_disk_key.k_objectid,
1502 			     reiserfs_find_actor, reiserfs_init_locked_inode,
1503 			     (void *)(&args));
1504 	reiserfs_write_lock(s);
1505 	if (!inode)
1506 		return ERR_PTR(-ENOMEM);
1507 
1508 	if (inode->i_state & I_NEW) {
1509 		reiserfs_read_locked_inode(inode, &args);
1510 		unlock_new_inode(inode);
1511 	}
1512 
1513 	if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) {
1514 		/* either due to i/o error or a stale NFS handle */
1515 		iput(inode);
1516 		inode = NULL;
1517 	}
1518 	return inode;
1519 }
1520 
1521 static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1522 	u32 objectid, u32 dir_id, u32 generation)
1523 
1524 {
1525 	struct cpu_key key;
1526 	struct inode *inode;
1527 
1528 	key.on_disk_key.k_objectid = objectid;
1529 	key.on_disk_key.k_dir_id = dir_id;
1530 	reiserfs_write_lock(sb);
1531 	inode = reiserfs_iget(sb, &key);
1532 	if (inode && !IS_ERR(inode) && generation != 0 &&
1533 	    generation != inode->i_generation) {
1534 		iput(inode);
1535 		inode = NULL;
1536 	}
1537 	reiserfs_write_unlock(sb);
1538 
1539 	return d_obtain_alias(inode);
1540 }
1541 
1542 struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1543 		int fh_len, int fh_type)
1544 {
1545 	/* fhtype happens to reflect the number of u32s encoded.
1546 	 * due to a bug in earlier code, fhtype might indicate there
1547 	 * are more u32s then actually fitted.
1548 	 * so if fhtype seems to be more than len, reduce fhtype.
1549 	 * Valid types are:
1550 	 *   2 - objectid + dir_id - legacy support
1551 	 *   3 - objectid + dir_id + generation
1552 	 *   4 - objectid + dir_id + objectid and dirid of parent - legacy
1553 	 *   5 - objectid + dir_id + generation + objectid and dirid of parent
1554 	 *   6 - as above plus generation of directory
1555 	 * 6 does not fit in NFSv2 handles
1556 	 */
1557 	if (fh_type > fh_len) {
1558 		if (fh_type != 6 || fh_len != 5)
1559 			reiserfs_warning(sb, "reiserfs-13077",
1560 				"nfsd/reiserfs, fhtype=%d, len=%d - odd",
1561 				fh_type, fh_len);
1562 		fh_type = 5;
1563 	}
1564 
1565 	return reiserfs_get_dentry(sb, fid->raw[0], fid->raw[1],
1566 		(fh_type == 3 || fh_type >= 5) ? fid->raw[2] : 0);
1567 }
1568 
1569 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1570 		int fh_len, int fh_type)
1571 {
1572 	if (fh_type < 4)
1573 		return NULL;
1574 
1575 	return reiserfs_get_dentry(sb,
1576 		(fh_type >= 5) ? fid->raw[3] : fid->raw[2],
1577 		(fh_type >= 5) ? fid->raw[4] : fid->raw[3],
1578 		(fh_type == 6) ? fid->raw[5] : 0);
1579 }
1580 
1581 int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1582 		       int need_parent)
1583 {
1584 	struct inode *inode = dentry->d_inode;
1585 	int maxlen = *lenp;
1586 
1587 	if (maxlen < 3)
1588 		return 255;
1589 
1590 	data[0] = inode->i_ino;
1591 	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1592 	data[2] = inode->i_generation;
1593 	*lenp = 3;
1594 	/* no room for directory info? return what we've stored so far */
1595 	if (maxlen < 5 || !need_parent)
1596 		return 3;
1597 
1598 	spin_lock(&dentry->d_lock);
1599 	inode = dentry->d_parent->d_inode;
1600 	data[3] = inode->i_ino;
1601 	data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
1602 	*lenp = 5;
1603 	if (maxlen >= 6) {
1604 		data[5] = inode->i_generation;
1605 		*lenp = 6;
1606 	}
1607 	spin_unlock(&dentry->d_lock);
1608 	return *lenp;
1609 }
1610 
1611 /* looks for stat data, then copies fields to it, marks the buffer
1612    containing stat data as dirty */
1613 /* reiserfs inodes are never really dirty, since the dirty inode call
1614 ** always logs them.  This call allows the VFS inode marking routines
1615 ** to properly mark inodes for datasync and such, but only actually
1616 ** does something when called for a synchronous update.
1617 */
1618 int reiserfs_write_inode(struct inode *inode, int do_sync)
1619 {
1620 	struct reiserfs_transaction_handle th;
1621 	int jbegin_count = 1;
1622 
1623 	if (inode->i_sb->s_flags & MS_RDONLY)
1624 		return -EROFS;
1625 	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
1626 	 ** these cases are just when the system needs ram, not when the
1627 	 ** inode needs to reach disk for safety, and they can safely be
1628 	 ** ignored because the altered inode has already been logged.
1629 	 */
1630 	if (do_sync && !(current->flags & PF_MEMALLOC)) {
1631 		reiserfs_write_lock(inode->i_sb);
1632 		if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
1633 			reiserfs_update_sd(&th, inode);
1634 			journal_end_sync(&th, inode->i_sb, jbegin_count);
1635 		}
1636 		reiserfs_write_unlock(inode->i_sb);
1637 	}
1638 	return 0;
1639 }
1640 
1641 /* stat data of new object is inserted already, this inserts the item
1642    containing "." and ".." entries */
1643 static int reiserfs_new_directory(struct reiserfs_transaction_handle *th,
1644 				  struct inode *inode,
1645 				  struct item_head *ih, struct treepath *path,
1646 				  struct inode *dir)
1647 {
1648 	struct super_block *sb = th->t_super;
1649 	char empty_dir[EMPTY_DIR_SIZE];
1650 	char *body = empty_dir;
1651 	struct cpu_key key;
1652 	int retval;
1653 
1654 	BUG_ON(!th->t_trans_id);
1655 
1656 	_make_cpu_key(&key, KEY_FORMAT_3_5, le32_to_cpu(ih->ih_key.k_dir_id),
1657 		      le32_to_cpu(ih->ih_key.k_objectid), DOT_OFFSET,
1658 		      TYPE_DIRENTRY, 3 /*key length */ );
1659 
1660 	/* compose item head for new item. Directories consist of items of
1661 	   old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
1662 	   is done by reiserfs_new_inode */
1663 	if (old_format_only(sb)) {
1664 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1665 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
1666 
1667 		make_empty_dir_item_v1(body, ih->ih_key.k_dir_id,
1668 				       ih->ih_key.k_objectid,
1669 				       INODE_PKEY(dir)->k_dir_id,
1670 				       INODE_PKEY(dir)->k_objectid);
1671 	} else {
1672 		make_le_item_head(ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET,
1673 				  TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
1674 
1675 		make_empty_dir_item(body, ih->ih_key.k_dir_id,
1676 				    ih->ih_key.k_objectid,
1677 				    INODE_PKEY(dir)->k_dir_id,
1678 				    INODE_PKEY(dir)->k_objectid);
1679 	}
1680 
1681 	/* look for place in the tree for new item */
1682 	retval = search_item(sb, &key, path);
1683 	if (retval == IO_ERROR) {
1684 		reiserfs_error(sb, "vs-13080",
1685 			       "i/o failure occurred creating new directory");
1686 		return -EIO;
1687 	}
1688 	if (retval == ITEM_FOUND) {
1689 		pathrelse(path);
1690 		reiserfs_warning(sb, "vs-13070",
1691 				 "object with this key exists (%k)",
1692 				 &(ih->ih_key));
1693 		return -EEXIST;
1694 	}
1695 
1696 	/* insert item, that is empty directory item */
1697 	return reiserfs_insert_item(th, path, &key, ih, inode, body);
1698 }
1699 
1700 /* stat data of object has been inserted, this inserts the item
1701    containing the body of symlink */
1702 static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode of symlink */
1703 				struct item_head *ih,
1704 				struct treepath *path, const char *symname,
1705 				int item_len)
1706 {
1707 	struct super_block *sb = th->t_super;
1708 	struct cpu_key key;
1709 	int retval;
1710 
1711 	BUG_ON(!th->t_trans_id);
1712 
1713 	_make_cpu_key(&key, KEY_FORMAT_3_5,
1714 		      le32_to_cpu(ih->ih_key.k_dir_id),
1715 		      le32_to_cpu(ih->ih_key.k_objectid),
1716 		      1, TYPE_DIRECT, 3 /*key length */ );
1717 
1718 	make_le_item_head(ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len,
1719 			  0 /*free_space */ );
1720 
1721 	/* look for place in the tree for new item */
1722 	retval = search_item(sb, &key, path);
1723 	if (retval == IO_ERROR) {
1724 		reiserfs_error(sb, "vs-13080",
1725 			       "i/o failure occurred creating new symlink");
1726 		return -EIO;
1727 	}
1728 	if (retval == ITEM_FOUND) {
1729 		pathrelse(path);
1730 		reiserfs_warning(sb, "vs-13080",
1731 				 "object with this key exists (%k)",
1732 				 &(ih->ih_key));
1733 		return -EEXIST;
1734 	}
1735 
1736 	/* insert item, that is body of symlink */
1737 	return reiserfs_insert_item(th, path, &key, ih, inode, symname);
1738 }
1739 
1740 /* inserts the stat data into the tree, and then calls
1741    reiserfs_new_directory (to insert ".", ".." item if new object is
1742    directory) or reiserfs_new_symlink (to insert symlink body if new
1743    object is symlink) or nothing (if new object is regular file)
1744 
1745    NOTE! uid and gid must already be set in the inode.  If we return
1746    non-zero due to an error, we have to drop the quota previously allocated
1747    for the fresh inode.  This can only be done outside a transaction, so
1748    if we return non-zero, we also end the transaction.  */
1749 int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1750 		       struct inode *dir, int mode, const char *symname,
1751 		       /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1752 		          strlen (symname) for symlinks) */
1753 		       loff_t i_size, struct dentry *dentry,
1754 		       struct inode *inode,
1755 		       struct reiserfs_security_handle *security)
1756 {
1757 	struct super_block *sb;
1758 	struct reiserfs_iget_args args;
1759 	INITIALIZE_PATH(path_to_key);
1760 	struct cpu_key key;
1761 	struct item_head ih;
1762 	struct stat_data sd;
1763 	int retval;
1764 	int err;
1765 
1766 	BUG_ON(!th->t_trans_id);
1767 
1768 	if (vfs_dq_alloc_inode(inode)) {
1769 		err = -EDQUOT;
1770 		goto out_end_trans;
1771 	}
1772 	if (!dir->i_nlink) {
1773 		err = -EPERM;
1774 		goto out_bad_inode;
1775 	}
1776 
1777 	sb = dir->i_sb;
1778 
1779 	/* item head of new item */
1780 	ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
1781 	ih.ih_key.k_objectid = cpu_to_le32(reiserfs_get_unused_objectid(th));
1782 	if (!ih.ih_key.k_objectid) {
1783 		err = -ENOMEM;
1784 		goto out_bad_inode;
1785 	}
1786 	args.objectid = inode->i_ino = le32_to_cpu(ih.ih_key.k_objectid);
1787 	if (old_format_only(sb))
1788 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET,
1789 				  TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
1790 	else
1791 		make_le_item_head(&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET,
1792 				  TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1793 	memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1794 	args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1795 	if (insert_inode_locked4(inode, args.objectid,
1796 			     reiserfs_find_actor, &args) < 0) {
1797 		err = -EINVAL;
1798 		goto out_bad_inode;
1799 	}
1800 	if (old_format_only(sb))
1801 		/* not a perfect generation count, as object ids can be reused, but
1802 		 ** this is as good as reiserfs can do right now.
1803 		 ** note that the private part of inode isn't filled in yet, we have
1804 		 ** to use the directory.
1805 		 */
1806 		inode->i_generation = le32_to_cpu(INODE_PKEY(dir)->k_objectid);
1807 	else
1808 #if defined( USE_INODE_GENERATION_COUNTER )
1809 		inode->i_generation =
1810 		    le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
1811 #else
1812 		inode->i_generation = ++event;
1813 #endif
1814 
1815 	/* fill stat data */
1816 	inode->i_nlink = (S_ISDIR(mode) ? 2 : 1);
1817 
1818 	/* uid and gid must already be set by the caller for quota init */
1819 
1820 	/* symlink cannot be immutable or append only, right? */
1821 	if (S_ISLNK(inode->i_mode))
1822 		inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
1823 
1824 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
1825 	inode->i_size = i_size;
1826 	inode->i_blocks = 0;
1827 	inode->i_bytes = 0;
1828 	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
1829 	    U32_MAX /*NO_BYTES_IN_DIRECT_ITEM */ ;
1830 
1831 	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list));
1832 	REISERFS_I(inode)->i_flags = 0;
1833 	REISERFS_I(inode)->i_prealloc_block = 0;
1834 	REISERFS_I(inode)->i_prealloc_count = 0;
1835 	REISERFS_I(inode)->i_trans_id = 0;
1836 	REISERFS_I(inode)->i_jl = NULL;
1837 	REISERFS_I(inode)->i_attrs =
1838 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1839 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1840 	mutex_init(&(REISERFS_I(inode)->i_mmap));
1841 	reiserfs_init_xattr_rwsem(inode);
1842 
1843 	/* key to search for correct place for new stat data */
1844 	_make_cpu_key(&key, KEY_FORMAT_3_6, le32_to_cpu(ih.ih_key.k_dir_id),
1845 		      le32_to_cpu(ih.ih_key.k_objectid), SD_OFFSET,
1846 		      TYPE_STAT_DATA, 3 /*key length */ );
1847 
1848 	/* find proper place for inserting of stat data */
1849 	retval = search_item(sb, &key, &path_to_key);
1850 	if (retval == IO_ERROR) {
1851 		err = -EIO;
1852 		goto out_bad_inode;
1853 	}
1854 	if (retval == ITEM_FOUND) {
1855 		pathrelse(&path_to_key);
1856 		err = -EEXIST;
1857 		goto out_bad_inode;
1858 	}
1859 	if (old_format_only(sb)) {
1860 		if (inode->i_uid & ~0xffff || inode->i_gid & ~0xffff) {
1861 			pathrelse(&path_to_key);
1862 			/* i_uid or i_gid is too big to be stored in stat data v3.5 */
1863 			err = -EINVAL;
1864 			goto out_bad_inode;
1865 		}
1866 		inode2sd_v1(&sd, inode, inode->i_size);
1867 	} else {
1868 		inode2sd(&sd, inode, inode->i_size);
1869 	}
1870 	// store in in-core inode the key of stat data and version all
1871 	// object items will have (directory items will have old offset
1872 	// format, other new objects will consist of new items)
1873 	if (old_format_only(sb) || S_ISDIR(mode) || S_ISLNK(mode))
1874 		set_inode_item_key_version(inode, KEY_FORMAT_3_5);
1875 	else
1876 		set_inode_item_key_version(inode, KEY_FORMAT_3_6);
1877 	if (old_format_only(sb))
1878 		set_inode_sd_version(inode, STAT_DATA_V1);
1879 	else
1880 		set_inode_sd_version(inode, STAT_DATA_V2);
1881 
1882 	/* insert the stat data into the tree */
1883 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1884 	if (REISERFS_I(dir)->new_packing_locality)
1885 		th->displace_new_blocks = 1;
1886 #endif
1887 	retval =
1888 	    reiserfs_insert_item(th, &path_to_key, &key, &ih, inode,
1889 				 (char *)(&sd));
1890 	if (retval) {
1891 		err = retval;
1892 		reiserfs_check_path(&path_to_key);
1893 		goto out_bad_inode;
1894 	}
1895 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1896 	if (!th->displace_new_blocks)
1897 		REISERFS_I(dir)->new_packing_locality = 0;
1898 #endif
1899 	if (S_ISDIR(mode)) {
1900 		/* insert item with "." and ".." */
1901 		retval =
1902 		    reiserfs_new_directory(th, inode, &ih, &path_to_key, dir);
1903 	}
1904 
1905 	if (S_ISLNK(mode)) {
1906 		/* insert body of symlink */
1907 		if (!old_format_only(sb))
1908 			i_size = ROUND_UP(i_size);
1909 		retval =
1910 		    reiserfs_new_symlink(th, inode, &ih, &path_to_key, symname,
1911 					 i_size);
1912 	}
1913 	if (retval) {
1914 		err = retval;
1915 		reiserfs_check_path(&path_to_key);
1916 		journal_end(th, th->t_super, th->t_blocks_allocated);
1917 		goto out_inserted_sd;
1918 	}
1919 
1920 	if (reiserfs_posixacl(inode->i_sb)) {
1921 		retval = reiserfs_inherit_default_acl(th, dir, dentry, inode);
1922 		if (retval) {
1923 			err = retval;
1924 			reiserfs_check_path(&path_to_key);
1925 			journal_end(th, th->t_super, th->t_blocks_allocated);
1926 			goto out_inserted_sd;
1927 		}
1928 	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
1929 		reiserfs_warning(inode->i_sb, "jdm-13090",
1930 				 "ACLs aren't enabled in the fs, "
1931 				 "but vfs thinks they are!");
1932 	} else if (IS_PRIVATE(dir))
1933 		inode->i_flags |= S_PRIVATE;
1934 
1935 	if (security->name) {
1936 		retval = reiserfs_security_write(th, inode, security);
1937 		if (retval) {
1938 			err = retval;
1939 			reiserfs_check_path(&path_to_key);
1940 			retval = journal_end(th, th->t_super,
1941 					     th->t_blocks_allocated);
1942 			if (retval)
1943 				err = retval;
1944 			goto out_inserted_sd;
1945 		}
1946 	}
1947 
1948 	reiserfs_update_sd(th, inode);
1949 	reiserfs_check_path(&path_to_key);
1950 
1951 	return 0;
1952 
1953 /* it looks like you can easily compress these two goto targets into
1954  * one.  Keeping it like this doesn't actually hurt anything, and they
1955  * are place holders for what the quota code actually needs.
1956  */
1957       out_bad_inode:
1958 	/* Invalidate the object, nothing was inserted yet */
1959 	INODE_PKEY(inode)->k_objectid = 0;
1960 
1961 	/* Quota change must be inside a transaction for journaling */
1962 	vfs_dq_free_inode(inode);
1963 
1964       out_end_trans:
1965 	journal_end(th, th->t_super, th->t_blocks_allocated);
1966 	/* Drop can be outside and it needs more credits so it's better to have it outside */
1967 	vfs_dq_drop(inode);
1968 	inode->i_flags |= S_NOQUOTA;
1969 	make_bad_inode(inode);
1970 
1971       out_inserted_sd:
1972 	inode->i_nlink = 0;
1973 	th->t_trans_id = 0;	/* so the caller can't use this handle later */
1974 	unlock_new_inode(inode); /* OK to do even if we hadn't locked it */
1975 	iput(inode);
1976 	return err;
1977 }
1978 
1979 /*
1980 ** finds the tail page in the page cache,
1981 ** reads the last block in.
1982 **
1983 ** On success, page_result is set to a locked, pinned page, and bh_result
1984 ** is set to an up to date buffer for the last block in the file.  returns 0.
1985 **
1986 ** tail conversion is not done, so bh_result might not be valid for writing
1987 ** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
1988 ** trying to write the block.
1989 **
1990 ** on failure, nonzero is returned, page_result and bh_result are untouched.
1991 */
1992 static int grab_tail_page(struct inode *inode,
1993 			  struct page **page_result,
1994 			  struct buffer_head **bh_result)
1995 {
1996 
1997 	/* we want the page with the last byte in the file,
1998 	 ** not the page that will hold the next byte for appending
1999 	 */
2000 	unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
2001 	unsigned long pos = 0;
2002 	unsigned long start = 0;
2003 	unsigned long blocksize = inode->i_sb->s_blocksize;
2004 	unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
2005 	struct buffer_head *bh;
2006 	struct buffer_head *head;
2007 	struct page *page;
2008 	int error;
2009 
2010 	/* we know that we are only called with inode->i_size > 0.
2011 	 ** we also know that a file tail can never be as big as a block
2012 	 ** If i_size % blocksize == 0, our file is currently block aligned
2013 	 ** and it won't need converting or zeroing after a truncate.
2014 	 */
2015 	if ((offset & (blocksize - 1)) == 0) {
2016 		return -ENOENT;
2017 	}
2018 	page = grab_cache_page(inode->i_mapping, index);
2019 	error = -ENOMEM;
2020 	if (!page) {
2021 		goto out;
2022 	}
2023 	/* start within the page of the last block in the file */
2024 	start = (offset / blocksize) * blocksize;
2025 
2026 	error = block_prepare_write(page, start, offset,
2027 				    reiserfs_get_block_create_0);
2028 	if (error)
2029 		goto unlock;
2030 
2031 	head = page_buffers(page);
2032 	bh = head;
2033 	do {
2034 		if (pos >= start) {
2035 			break;
2036 		}
2037 		bh = bh->b_this_page;
2038 		pos += blocksize;
2039 	} while (bh != head);
2040 
2041 	if (!buffer_uptodate(bh)) {
2042 		/* note, this should never happen, prepare_write should
2043 		 ** be taking care of this for us.  If the buffer isn't up to date,
2044 		 ** I've screwed up the code to find the buffer, or the code to
2045 		 ** call prepare_write
2046 		 */
2047 		reiserfs_error(inode->i_sb, "clm-6000",
2048 			       "error reading block %lu", bh->b_blocknr);
2049 		error = -EIO;
2050 		goto unlock;
2051 	}
2052 	*bh_result = bh;
2053 	*page_result = page;
2054 
2055       out:
2056 	return error;
2057 
2058       unlock:
2059 	unlock_page(page);
2060 	page_cache_release(page);
2061 	return error;
2062 }
2063 
2064 /*
2065 ** vfs version of truncate file.  Must NOT be called with
2066 ** a transaction already started.
2067 **
2068 ** some code taken from block_truncate_page
2069 */
2070 int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
2071 {
2072 	struct reiserfs_transaction_handle th;
2073 	/* we want the offset for the first byte after the end of the file */
2074 	unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2075 	unsigned blocksize = inode->i_sb->s_blocksize;
2076 	unsigned length;
2077 	struct page *page = NULL;
2078 	int error;
2079 	struct buffer_head *bh = NULL;
2080 	int err2;
2081 	int lock_depth;
2082 
2083 	lock_depth = reiserfs_write_lock_once(inode->i_sb);
2084 
2085 	if (inode->i_size > 0) {
2086 		error = grab_tail_page(inode, &page, &bh);
2087 		if (error) {
2088 			// -ENOENT means we truncated past the end of the file,
2089 			// and get_block_create_0 could not find a block to read in,
2090 			// which is ok.
2091 			if (error != -ENOENT)
2092 				reiserfs_error(inode->i_sb, "clm-6001",
2093 					       "grab_tail_page failed %d",
2094 					       error);
2095 			page = NULL;
2096 			bh = NULL;
2097 		}
2098 	}
2099 
2100 	/* so, if page != NULL, we have a buffer head for the offset at
2101 	 ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
2102 	 ** then we have an unformatted node.  Otherwise, we have a direct item,
2103 	 ** and no zeroing is required on disk.  We zero after the truncate,
2104 	 ** because the truncate might pack the item anyway
2105 	 ** (it will unmap bh if it packs).
2106 	 */
2107 	/* it is enough to reserve space in transaction for 2 balancings:
2108 	   one for "save" link adding and another for the first
2109 	   cut_from_item. 1 is for update_sd */
2110 	error = journal_begin(&th, inode->i_sb,
2111 			      JOURNAL_PER_BALANCE_CNT * 2 + 1);
2112 	if (error)
2113 		goto out;
2114 	reiserfs_update_inode_transaction(inode);
2115 	if (update_timestamps)
2116 		/* we are doing real truncate: if the system crashes before the last
2117 		   transaction of truncating gets committed - on reboot the file
2118 		   either appears truncated properly or not truncated at all */
2119 		add_save_link(&th, inode, 1);
2120 	err2 = reiserfs_do_truncate(&th, inode, page, update_timestamps);
2121 	error =
2122 	    journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
2123 	if (error)
2124 		goto out;
2125 
2126 	/* check reiserfs_do_truncate after ending the transaction */
2127 	if (err2) {
2128 		error = err2;
2129   		goto out;
2130 	}
2131 
2132 	if (update_timestamps) {
2133 		error = remove_save_link(inode, 1 /* truncate */);
2134 		if (error)
2135 			goto out;
2136 	}
2137 
2138 	if (page) {
2139 		length = offset & (blocksize - 1);
2140 		/* if we are not on a block boundary */
2141 		if (length) {
2142 			length = blocksize - length;
2143 			zero_user(page, offset, length);
2144 			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
2145 				mark_buffer_dirty(bh);
2146 			}
2147 		}
2148 		unlock_page(page);
2149 		page_cache_release(page);
2150 	}
2151 
2152 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2153 
2154 	return 0;
2155       out:
2156 	if (page) {
2157 		unlock_page(page);
2158 		page_cache_release(page);
2159 	}
2160 
2161 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2162 
2163 	return error;
2164 }
2165 
2166 static int map_block_for_writepage(struct inode *inode,
2167 				   struct buffer_head *bh_result,
2168 				   unsigned long block)
2169 {
2170 	struct reiserfs_transaction_handle th;
2171 	int fs_gen;
2172 	struct item_head tmp_ih;
2173 	struct item_head *ih;
2174 	struct buffer_head *bh;
2175 	__le32 *item;
2176 	struct cpu_key key;
2177 	INITIALIZE_PATH(path);
2178 	int pos_in_item;
2179 	int jbegin_count = JOURNAL_PER_BALANCE_CNT;
2180 	loff_t byte_offset = ((loff_t)block << inode->i_sb->s_blocksize_bits)+1;
2181 	int retval;
2182 	int use_get_block = 0;
2183 	int bytes_copied = 0;
2184 	int copy_size;
2185 	int trans_running = 0;
2186 
2187 	/* catch places below that try to log something without starting a trans */
2188 	th.t_trans_id = 0;
2189 
2190 	if (!buffer_uptodate(bh_result)) {
2191 		return -EIO;
2192 	}
2193 
2194 	kmap(bh_result->b_page);
2195       start_over:
2196 	reiserfs_write_lock(inode->i_sb);
2197 	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3);
2198 
2199       research:
2200 	retval = search_for_position_by_key(inode->i_sb, &key, &path);
2201 	if (retval != POSITION_FOUND) {
2202 		use_get_block = 1;
2203 		goto out;
2204 	}
2205 
2206 	bh = get_last_bh(&path);
2207 	ih = get_ih(&path);
2208 	item = get_item(&path);
2209 	pos_in_item = path.pos_in_item;
2210 
2211 	/* we've found an unformatted node */
2212 	if (indirect_item_found(retval, ih)) {
2213 		if (bytes_copied > 0) {
2214 			reiserfs_warning(inode->i_sb, "clm-6002",
2215 					 "bytes_copied %d", bytes_copied);
2216 		}
2217 		if (!get_block_num(item, pos_in_item)) {
2218 			/* crap, we are writing to a hole */
2219 			use_get_block = 1;
2220 			goto out;
2221 		}
2222 		set_block_dev_mapped(bh_result,
2223 				     get_block_num(item, pos_in_item), inode);
2224 	} else if (is_direct_le_ih(ih)) {
2225 		char *p;
2226 		p = page_address(bh_result->b_page);
2227 		p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
2228 		copy_size = ih_item_len(ih) - pos_in_item;
2229 
2230 		fs_gen = get_generation(inode->i_sb);
2231 		copy_item_head(&tmp_ih, ih);
2232 
2233 		if (!trans_running) {
2234 			/* vs-3050 is gone, no need to drop the path */
2235 			retval = journal_begin(&th, inode->i_sb, jbegin_count);
2236 			if (retval)
2237 				goto out;
2238 			reiserfs_update_inode_transaction(inode);
2239 			trans_running = 1;
2240 			if (fs_changed(fs_gen, inode->i_sb)
2241 			    && item_moved(&tmp_ih, &path)) {
2242 				reiserfs_restore_prepared_buffer(inode->i_sb,
2243 								 bh);
2244 				goto research;
2245 			}
2246 		}
2247 
2248 		reiserfs_prepare_for_journal(inode->i_sb, bh, 1);
2249 
2250 		if (fs_changed(fs_gen, inode->i_sb)
2251 		    && item_moved(&tmp_ih, &path)) {
2252 			reiserfs_restore_prepared_buffer(inode->i_sb, bh);
2253 			goto research;
2254 		}
2255 
2256 		memcpy(B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied,
2257 		       copy_size);
2258 
2259 		journal_mark_dirty(&th, inode->i_sb, bh);
2260 		bytes_copied += copy_size;
2261 		set_block_dev_mapped(bh_result, 0, inode);
2262 
2263 		/* are there still bytes left? */
2264 		if (bytes_copied < bh_result->b_size &&
2265 		    (byte_offset + bytes_copied) < inode->i_size) {
2266 			set_cpu_key_k_offset(&key,
2267 					     cpu_key_k_offset(&key) +
2268 					     copy_size);
2269 			goto research;
2270 		}
2271 	} else {
2272 		reiserfs_warning(inode->i_sb, "clm-6003",
2273 				 "bad item inode %lu", inode->i_ino);
2274 		retval = -EIO;
2275 		goto out;
2276 	}
2277 	retval = 0;
2278 
2279       out:
2280 	pathrelse(&path);
2281 	if (trans_running) {
2282 		int err = journal_end(&th, inode->i_sb, jbegin_count);
2283 		if (err)
2284 			retval = err;
2285 		trans_running = 0;
2286 	}
2287 	reiserfs_write_unlock(inode->i_sb);
2288 
2289 	/* this is where we fill in holes in the file. */
2290 	if (use_get_block) {
2291 		retval = reiserfs_get_block(inode, block, bh_result,
2292 					    GET_BLOCK_CREATE | GET_BLOCK_NO_IMUX
2293 					    | GET_BLOCK_NO_DANGLE);
2294 		if (!retval) {
2295 			if (!buffer_mapped(bh_result)
2296 			    || bh_result->b_blocknr == 0) {
2297 				/* get_block failed to find a mapped unformatted node. */
2298 				use_get_block = 0;
2299 				goto start_over;
2300 			}
2301 		}
2302 	}
2303 	kunmap(bh_result->b_page);
2304 
2305 	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
2306 		/* we've copied data from the page into the direct item, so the
2307 		 * buffer in the page is now clean, mark it to reflect that.
2308 		 */
2309 		lock_buffer(bh_result);
2310 		clear_buffer_dirty(bh_result);
2311 		unlock_buffer(bh_result);
2312 	}
2313 	return retval;
2314 }
2315 
2316 /*
2317  * mason@suse.com: updated in 2.5.54 to follow the same general io
2318  * start/recovery path as __block_write_full_page, along with special
2319  * code to handle reiserfs tails.
2320  */
2321 static int reiserfs_write_full_page(struct page *page,
2322 				    struct writeback_control *wbc)
2323 {
2324 	struct inode *inode = page->mapping->host;
2325 	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
2326 	int error = 0;
2327 	unsigned long block;
2328 	sector_t last_block;
2329 	struct buffer_head *head, *bh;
2330 	int partial = 0;
2331 	int nr = 0;
2332 	int checked = PageChecked(page);
2333 	struct reiserfs_transaction_handle th;
2334 	struct super_block *s = inode->i_sb;
2335 	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
2336 	th.t_trans_id = 0;
2337 
2338 	/* no logging allowed when nonblocking or from PF_MEMALLOC */
2339 	if (checked && (current->flags & PF_MEMALLOC)) {
2340 		redirty_page_for_writepage(wbc, page);
2341 		unlock_page(page);
2342 		return 0;
2343 	}
2344 
2345 	/* The page dirty bit is cleared before writepage is called, which
2346 	 * means we have to tell create_empty_buffers to make dirty buffers
2347 	 * The page really should be up to date at this point, so tossing
2348 	 * in the BH_Uptodate is just a sanity check.
2349 	 */
2350 	if (!page_has_buffers(page)) {
2351 		create_empty_buffers(page, s->s_blocksize,
2352 				     (1 << BH_Dirty) | (1 << BH_Uptodate));
2353 	}
2354 	head = page_buffers(page);
2355 
2356 	/* last page in the file, zero out any contents past the
2357 	 ** last byte in the file
2358 	 */
2359 	if (page->index >= end_index) {
2360 		unsigned last_offset;
2361 
2362 		last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
2363 		/* no file contents in this page */
2364 		if (page->index >= end_index + 1 || !last_offset) {
2365 			unlock_page(page);
2366 			return 0;
2367 		}
2368 		zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
2369 	}
2370 	bh = head;
2371 	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
2372 	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
2373 	/* first map all the buffers, logging any direct items we find */
2374 	do {
2375 		if (block > last_block) {
2376 			/*
2377 			 * This can happen when the block size is less than
2378 			 * the page size.  The corresponding bytes in the page
2379 			 * were zero filled above
2380 			 */
2381 			clear_buffer_dirty(bh);
2382 			set_buffer_uptodate(bh);
2383 		} else if ((checked || buffer_dirty(bh)) &&
2384 		           (!buffer_mapped(bh) || (buffer_mapped(bh)
2385 						       && bh->b_blocknr ==
2386 						       0))) {
2387 			/* not mapped yet, or it points to a direct item, search
2388 			 * the btree for the mapping info, and log any direct
2389 			 * items found
2390 			 */
2391 			if ((error = map_block_for_writepage(inode, bh, block))) {
2392 				goto fail;
2393 			}
2394 		}
2395 		bh = bh->b_this_page;
2396 		block++;
2397 	} while (bh != head);
2398 
2399 	/*
2400 	 * we start the transaction after map_block_for_writepage,
2401 	 * because it can create holes in the file (an unbounded operation).
2402 	 * starting it here, we can make a reliable estimate for how many
2403 	 * blocks we're going to log
2404 	 */
2405 	if (checked) {
2406 		ClearPageChecked(page);
2407 		reiserfs_write_lock(s);
2408 		error = journal_begin(&th, s, bh_per_page + 1);
2409 		if (error) {
2410 			reiserfs_write_unlock(s);
2411 			goto fail;
2412 		}
2413 		reiserfs_update_inode_transaction(inode);
2414 	}
2415 	/* now go through and lock any dirty buffers on the page */
2416 	do {
2417 		get_bh(bh);
2418 		if (!buffer_mapped(bh))
2419 			continue;
2420 		if (buffer_mapped(bh) && bh->b_blocknr == 0)
2421 			continue;
2422 
2423 		if (checked) {
2424 			reiserfs_prepare_for_journal(s, bh, 1);
2425 			journal_mark_dirty(&th, s, bh);
2426 			continue;
2427 		}
2428 		/* from this point on, we know the buffer is mapped to a
2429 		 * real block and not a direct item
2430 		 */
2431 		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2432 			lock_buffer(bh);
2433 		} else {
2434 			if (!trylock_buffer(bh)) {
2435 				redirty_page_for_writepage(wbc, page);
2436 				continue;
2437 			}
2438 		}
2439 		if (test_clear_buffer_dirty(bh)) {
2440 			mark_buffer_async_write(bh);
2441 		} else {
2442 			unlock_buffer(bh);
2443 		}
2444 	} while ((bh = bh->b_this_page) != head);
2445 
2446 	if (checked) {
2447 		error = journal_end(&th, s, bh_per_page + 1);
2448 		reiserfs_write_unlock(s);
2449 		if (error)
2450 			goto fail;
2451 	}
2452 	BUG_ON(PageWriteback(page));
2453 	set_page_writeback(page);
2454 	unlock_page(page);
2455 
2456 	/*
2457 	 * since any buffer might be the only dirty buffer on the page,
2458 	 * the first submit_bh can bring the page out of writeback.
2459 	 * be careful with the buffers.
2460 	 */
2461 	do {
2462 		struct buffer_head *next = bh->b_this_page;
2463 		if (buffer_async_write(bh)) {
2464 			submit_bh(WRITE, bh);
2465 			nr++;
2466 		}
2467 		put_bh(bh);
2468 		bh = next;
2469 	} while (bh != head);
2470 
2471 	error = 0;
2472       done:
2473 	if (nr == 0) {
2474 		/*
2475 		 * if this page only had a direct item, it is very possible for
2476 		 * no io to be required without there being an error.  Or,
2477 		 * someone else could have locked them and sent them down the
2478 		 * pipe without locking the page
2479 		 */
2480 		bh = head;
2481 		do {
2482 			if (!buffer_uptodate(bh)) {
2483 				partial = 1;
2484 				break;
2485 			}
2486 			bh = bh->b_this_page;
2487 		} while (bh != head);
2488 		if (!partial)
2489 			SetPageUptodate(page);
2490 		end_page_writeback(page);
2491 	}
2492 	return error;
2493 
2494       fail:
2495 	/* catches various errors, we need to make sure any valid dirty blocks
2496 	 * get to the media.  The page is currently locked and not marked for
2497 	 * writeback
2498 	 */
2499 	ClearPageUptodate(page);
2500 	bh = head;
2501 	do {
2502 		get_bh(bh);
2503 		if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
2504 			lock_buffer(bh);
2505 			mark_buffer_async_write(bh);
2506 		} else {
2507 			/*
2508 			 * clear any dirty bits that might have come from getting
2509 			 * attached to a dirty page
2510 			 */
2511 			clear_buffer_dirty(bh);
2512 		}
2513 		bh = bh->b_this_page;
2514 	} while (bh != head);
2515 	SetPageError(page);
2516 	BUG_ON(PageWriteback(page));
2517 	set_page_writeback(page);
2518 	unlock_page(page);
2519 	do {
2520 		struct buffer_head *next = bh->b_this_page;
2521 		if (buffer_async_write(bh)) {
2522 			clear_buffer_dirty(bh);
2523 			submit_bh(WRITE, bh);
2524 			nr++;
2525 		}
2526 		put_bh(bh);
2527 		bh = next;
2528 	} while (bh != head);
2529 	goto done;
2530 }
2531 
2532 static int reiserfs_readpage(struct file *f, struct page *page)
2533 {
2534 	return block_read_full_page(page, reiserfs_get_block);
2535 }
2536 
2537 static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
2538 {
2539 	struct inode *inode = page->mapping->host;
2540 	reiserfs_wait_on_write_block(inode->i_sb);
2541 	return reiserfs_write_full_page(page, wbc);
2542 }
2543 
2544 static void reiserfs_truncate_failed_write(struct inode *inode)
2545 {
2546 	truncate_inode_pages(inode->i_mapping, inode->i_size);
2547 	reiserfs_truncate_file(inode, 0);
2548 }
2549 
2550 static int reiserfs_write_begin(struct file *file,
2551 				struct address_space *mapping,
2552 				loff_t pos, unsigned len, unsigned flags,
2553 				struct page **pagep, void **fsdata)
2554 {
2555 	struct inode *inode;
2556 	struct page *page;
2557 	pgoff_t index;
2558 	int ret;
2559 	int old_ref = 0;
2560 
2561  	inode = mapping->host;
2562 	*fsdata = 0;
2563  	if (flags & AOP_FLAG_CONT_EXPAND &&
2564  	    (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
2565  		pos ++;
2566 		*fsdata = (void *)(unsigned long)flags;
2567 	}
2568 
2569 	index = pos >> PAGE_CACHE_SHIFT;
2570 	page = grab_cache_page_write_begin(mapping, index, flags);
2571 	if (!page)
2572 		return -ENOMEM;
2573 	*pagep = page;
2574 
2575 	reiserfs_wait_on_write_block(inode->i_sb);
2576 	fix_tail_page_for_writing(page);
2577 	if (reiserfs_transaction_running(inode->i_sb)) {
2578 		struct reiserfs_transaction_handle *th;
2579 		th = (struct reiserfs_transaction_handle *)current->
2580 		    journal_info;
2581 		BUG_ON(!th->t_refcount);
2582 		BUG_ON(!th->t_trans_id);
2583 		old_ref = th->t_refcount;
2584 		th->t_refcount++;
2585 	}
2586 	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
2587 				reiserfs_get_block);
2588 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2589 		struct reiserfs_transaction_handle *th = current->journal_info;
2590 		/* this gets a little ugly.  If reiserfs_get_block returned an
2591 		 * error and left a transacstion running, we've got to close it,
2592 		 * and we've got to free handle if it was a persistent transaction.
2593 		 *
2594 		 * But, if we had nested into an existing transaction, we need
2595 		 * to just drop the ref count on the handle.
2596 		 *
2597 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2598 		 * and it was a persistent trans.  Otherwise, it was nested above.
2599 		 */
2600 		if (th->t_refcount > old_ref) {
2601 			if (old_ref)
2602 				th->t_refcount--;
2603 			else {
2604 				int err;
2605 				reiserfs_write_lock(inode->i_sb);
2606 				err = reiserfs_end_persistent_transaction(th);
2607 				reiserfs_write_unlock(inode->i_sb);
2608 				if (err)
2609 					ret = err;
2610 			}
2611 		}
2612 	}
2613 	if (ret) {
2614 		unlock_page(page);
2615 		page_cache_release(page);
2616 		/* Truncate allocated blocks */
2617 		reiserfs_truncate_failed_write(inode);
2618 	}
2619 	return ret;
2620 }
2621 
2622 int reiserfs_prepare_write(struct file *f, struct page *page,
2623 			   unsigned from, unsigned to)
2624 {
2625 	struct inode *inode = page->mapping->host;
2626 	int ret;
2627 	int old_ref = 0;
2628 
2629 	reiserfs_write_unlock(inode->i_sb);
2630 	reiserfs_wait_on_write_block(inode->i_sb);
2631 	reiserfs_write_lock(inode->i_sb);
2632 
2633 	fix_tail_page_for_writing(page);
2634 	if (reiserfs_transaction_running(inode->i_sb)) {
2635 		struct reiserfs_transaction_handle *th;
2636 		th = (struct reiserfs_transaction_handle *)current->
2637 		    journal_info;
2638 		BUG_ON(!th->t_refcount);
2639 		BUG_ON(!th->t_trans_id);
2640 		old_ref = th->t_refcount;
2641 		th->t_refcount++;
2642 	}
2643 
2644 	ret = block_prepare_write(page, from, to, reiserfs_get_block);
2645 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
2646 		struct reiserfs_transaction_handle *th = current->journal_info;
2647 		/* this gets a little ugly.  If reiserfs_get_block returned an
2648 		 * error and left a transacstion running, we've got to close it,
2649 		 * and we've got to free handle if it was a persistent transaction.
2650 		 *
2651 		 * But, if we had nested into an existing transaction, we need
2652 		 * to just drop the ref count on the handle.
2653 		 *
2654 		 * If old_ref == 0, the transaction is from reiserfs_get_block,
2655 		 * and it was a persistent trans.  Otherwise, it was nested above.
2656 		 */
2657 		if (th->t_refcount > old_ref) {
2658 			if (old_ref)
2659 				th->t_refcount--;
2660 			else {
2661 				int err;
2662 				reiserfs_write_lock(inode->i_sb);
2663 				err = reiserfs_end_persistent_transaction(th);
2664 				reiserfs_write_unlock(inode->i_sb);
2665 				if (err)
2666 					ret = err;
2667 			}
2668 		}
2669 	}
2670 	return ret;
2671 
2672 }
2673 
2674 static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
2675 {
2676 	return generic_block_bmap(as, block, reiserfs_bmap);
2677 }
2678 
2679 static int reiserfs_write_end(struct file *file, struct address_space *mapping,
2680 			      loff_t pos, unsigned len, unsigned copied,
2681 			      struct page *page, void *fsdata)
2682 {
2683 	struct inode *inode = page->mapping->host;
2684 	int ret = 0;
2685 	int update_sd = 0;
2686 	struct reiserfs_transaction_handle *th;
2687 	unsigned start;
2688 	int lock_depth = 0;
2689 	bool locked = false;
2690 
2691 	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
2692 		pos ++;
2693 
2694 	reiserfs_wait_on_write_block(inode->i_sb);
2695 	if (reiserfs_transaction_running(inode->i_sb))
2696 		th = current->journal_info;
2697 	else
2698 		th = NULL;
2699 
2700 	start = pos & (PAGE_CACHE_SIZE - 1);
2701 	if (unlikely(copied < len)) {
2702 		if (!PageUptodate(page))
2703 			copied = 0;
2704 
2705 		page_zero_new_buffers(page, start + copied, start + len);
2706 	}
2707 	flush_dcache_page(page);
2708 
2709 	reiserfs_commit_page(inode, page, start, start + copied);
2710 
2711 	/* generic_commit_write does this for us, but does not update the
2712 	 ** transaction tracking stuff when the size changes.  So, we have
2713 	 ** to do the i_size updates here.
2714 	 */
2715 	if (pos + copied > inode->i_size) {
2716 		struct reiserfs_transaction_handle myth;
2717 		lock_depth = reiserfs_write_lock_once(inode->i_sb);
2718 		locked = true;
2719 		/* If the file have grown beyond the border where it
2720 		   can have a tail, unmark it as needing a tail
2721 		   packing */
2722 		if ((have_large_tails(inode->i_sb)
2723 		     && inode->i_size > i_block_size(inode) * 4)
2724 		    || (have_small_tails(inode->i_sb)
2725 			&& inode->i_size > i_block_size(inode)))
2726 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2727 
2728 		ret = journal_begin(&myth, inode->i_sb, 1);
2729 		if (ret)
2730 			goto journal_error;
2731 
2732 		reiserfs_update_inode_transaction(inode);
2733 		inode->i_size = pos + copied;
2734 		/*
2735 		 * this will just nest into our transaction.  It's important
2736 		 * to use mark_inode_dirty so the inode gets pushed around on the
2737 		 * dirty lists, and so that O_SYNC works as expected
2738 		 */
2739 		mark_inode_dirty(inode);
2740 		reiserfs_update_sd(&myth, inode);
2741 		update_sd = 1;
2742 		ret = journal_end(&myth, inode->i_sb, 1);
2743 		if (ret)
2744 			goto journal_error;
2745 	}
2746 	if (th) {
2747 		if (!locked) {
2748 			lock_depth = reiserfs_write_lock_once(inode->i_sb);
2749 			locked = true;
2750 		}
2751 		if (!update_sd)
2752 			mark_inode_dirty(inode);
2753 		ret = reiserfs_end_persistent_transaction(th);
2754 		if (ret)
2755 			goto out;
2756 	}
2757 
2758       out:
2759 	if (locked)
2760 		reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2761 	unlock_page(page);
2762 	page_cache_release(page);
2763 
2764 	if (pos + len > inode->i_size)
2765 		reiserfs_truncate_failed_write(inode);
2766 
2767 	return ret == 0 ? copied : ret;
2768 
2769       journal_error:
2770 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
2771 	locked = false;
2772 	if (th) {
2773 		if (!update_sd)
2774 			reiserfs_update_sd(th, inode);
2775 		ret = reiserfs_end_persistent_transaction(th);
2776 	}
2777 	goto out;
2778 }
2779 
2780 int reiserfs_commit_write(struct file *f, struct page *page,
2781 			  unsigned from, unsigned to)
2782 {
2783 	struct inode *inode = page->mapping->host;
2784 	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
2785 	int ret = 0;
2786 	int update_sd = 0;
2787 	struct reiserfs_transaction_handle *th = NULL;
2788 
2789 	reiserfs_write_unlock(inode->i_sb);
2790 	reiserfs_wait_on_write_block(inode->i_sb);
2791 	reiserfs_write_lock(inode->i_sb);
2792 
2793 	if (reiserfs_transaction_running(inode->i_sb)) {
2794 		th = current->journal_info;
2795 	}
2796 	reiserfs_commit_page(inode, page, from, to);
2797 
2798 	/* generic_commit_write does this for us, but does not update the
2799 	 ** transaction tracking stuff when the size changes.  So, we have
2800 	 ** to do the i_size updates here.
2801 	 */
2802 	if (pos > inode->i_size) {
2803 		struct reiserfs_transaction_handle myth;
2804 		/* If the file have grown beyond the border where it
2805 		   can have a tail, unmark it as needing a tail
2806 		   packing */
2807 		if ((have_large_tails(inode->i_sb)
2808 		     && inode->i_size > i_block_size(inode) * 4)
2809 		    || (have_small_tails(inode->i_sb)
2810 			&& inode->i_size > i_block_size(inode)))
2811 			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
2812 
2813 		ret = journal_begin(&myth, inode->i_sb, 1);
2814 		if (ret)
2815 			goto journal_error;
2816 
2817 		reiserfs_update_inode_transaction(inode);
2818 		inode->i_size = pos;
2819 		/*
2820 		 * this will just nest into our transaction.  It's important
2821 		 * to use mark_inode_dirty so the inode gets pushed around on the
2822 		 * dirty lists, and so that O_SYNC works as expected
2823 		 */
2824 		mark_inode_dirty(inode);
2825 		reiserfs_update_sd(&myth, inode);
2826 		update_sd = 1;
2827 		ret = journal_end(&myth, inode->i_sb, 1);
2828 		if (ret)
2829 			goto journal_error;
2830 	}
2831 	if (th) {
2832 		if (!update_sd)
2833 			mark_inode_dirty(inode);
2834 		ret = reiserfs_end_persistent_transaction(th);
2835 		if (ret)
2836 			goto out;
2837 	}
2838 
2839       out:
2840 	return ret;
2841 
2842       journal_error:
2843 	if (th) {
2844 		if (!update_sd)
2845 			reiserfs_update_sd(th, inode);
2846 		ret = reiserfs_end_persistent_transaction(th);
2847 	}
2848 
2849 	return ret;
2850 }
2851 
2852 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
2853 {
2854 	if (reiserfs_attrs(inode->i_sb)) {
2855 		if (sd_attrs & REISERFS_SYNC_FL)
2856 			inode->i_flags |= S_SYNC;
2857 		else
2858 			inode->i_flags &= ~S_SYNC;
2859 		if (sd_attrs & REISERFS_IMMUTABLE_FL)
2860 			inode->i_flags |= S_IMMUTABLE;
2861 		else
2862 			inode->i_flags &= ~S_IMMUTABLE;
2863 		if (sd_attrs & REISERFS_APPEND_FL)
2864 			inode->i_flags |= S_APPEND;
2865 		else
2866 			inode->i_flags &= ~S_APPEND;
2867 		if (sd_attrs & REISERFS_NOATIME_FL)
2868 			inode->i_flags |= S_NOATIME;
2869 		else
2870 			inode->i_flags &= ~S_NOATIME;
2871 		if (sd_attrs & REISERFS_NOTAIL_FL)
2872 			REISERFS_I(inode)->i_flags |= i_nopack_mask;
2873 		else
2874 			REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
2875 	}
2876 }
2877 
2878 void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
2879 {
2880 	if (reiserfs_attrs(inode->i_sb)) {
2881 		if (inode->i_flags & S_IMMUTABLE)
2882 			*sd_attrs |= REISERFS_IMMUTABLE_FL;
2883 		else
2884 			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
2885 		if (inode->i_flags & S_SYNC)
2886 			*sd_attrs |= REISERFS_SYNC_FL;
2887 		else
2888 			*sd_attrs &= ~REISERFS_SYNC_FL;
2889 		if (inode->i_flags & S_NOATIME)
2890 			*sd_attrs |= REISERFS_NOATIME_FL;
2891 		else
2892 			*sd_attrs &= ~REISERFS_NOATIME_FL;
2893 		if (REISERFS_I(inode)->i_flags & i_nopack_mask)
2894 			*sd_attrs |= REISERFS_NOTAIL_FL;
2895 		else
2896 			*sd_attrs &= ~REISERFS_NOTAIL_FL;
2897 	}
2898 }
2899 
2900 /* decide if this buffer needs to stay around for data logging or ordered
2901 ** write purposes
2902 */
2903 static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh)
2904 {
2905 	int ret = 1;
2906 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
2907 
2908 	lock_buffer(bh);
2909 	spin_lock(&j->j_dirty_buffers_lock);
2910 	if (!buffer_mapped(bh)) {
2911 		goto free_jh;
2912 	}
2913 	/* the page is locked, and the only places that log a data buffer
2914 	 * also lock the page.
2915 	 */
2916 	if (reiserfs_file_data_log(inode)) {
2917 		/*
2918 		 * very conservative, leave the buffer pinned if
2919 		 * anyone might need it.
2920 		 */
2921 		if (buffer_journaled(bh) || buffer_journal_dirty(bh)) {
2922 			ret = 0;
2923 		}
2924 	} else  if (buffer_dirty(bh)) {
2925 		struct reiserfs_journal_list *jl;
2926 		struct reiserfs_jh *jh = bh->b_private;
2927 
2928 		/* why is this safe?
2929 		 * reiserfs_setattr updates i_size in the on disk
2930 		 * stat data before allowing vmtruncate to be called.
2931 		 *
2932 		 * If buffer was put onto the ordered list for this
2933 		 * transaction, we know for sure either this transaction
2934 		 * or an older one already has updated i_size on disk,
2935 		 * and this ordered data won't be referenced in the file
2936 		 * if we crash.
2937 		 *
2938 		 * if the buffer was put onto the ordered list for an older
2939 		 * transaction, we need to leave it around
2940 		 */
2941 		if (jh && (jl = jh->jl)
2942 		    && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
2943 			ret = 0;
2944 	}
2945       free_jh:
2946 	if (ret && bh->b_private) {
2947 		reiserfs_free_jh(bh);
2948 	}
2949 	spin_unlock(&j->j_dirty_buffers_lock);
2950 	unlock_buffer(bh);
2951 	return ret;
2952 }
2953 
2954 /* clm -- taken from fs/buffer.c:block_invalidate_page */
2955 static void reiserfs_invalidatepage(struct page *page, unsigned long offset)
2956 {
2957 	struct buffer_head *head, *bh, *next;
2958 	struct inode *inode = page->mapping->host;
2959 	unsigned int curr_off = 0;
2960 	int ret = 1;
2961 
2962 	BUG_ON(!PageLocked(page));
2963 
2964 	if (offset == 0)
2965 		ClearPageChecked(page);
2966 
2967 	if (!page_has_buffers(page))
2968 		goto out;
2969 
2970 	head = page_buffers(page);
2971 	bh = head;
2972 	do {
2973 		unsigned int next_off = curr_off + bh->b_size;
2974 		next = bh->b_this_page;
2975 
2976 		/*
2977 		 * is this block fully invalidated?
2978 		 */
2979 		if (offset <= curr_off) {
2980 			if (invalidatepage_can_drop(inode, bh))
2981 				reiserfs_unmap_buffer(bh);
2982 			else
2983 				ret = 0;
2984 		}
2985 		curr_off = next_off;
2986 		bh = next;
2987 	} while (bh != head);
2988 
2989 	/*
2990 	 * We release buffers only if the entire page is being invalidated.
2991 	 * The get_block cached value has been unconditionally invalidated,
2992 	 * so real IO is not possible anymore.
2993 	 */
2994 	if (!offset && ret) {
2995 		ret = try_to_release_page(page, 0);
2996 		/* maybe should BUG_ON(!ret); - neilb */
2997 	}
2998       out:
2999 	return;
3000 }
3001 
3002 static int reiserfs_set_page_dirty(struct page *page)
3003 {
3004 	struct inode *inode = page->mapping->host;
3005 	if (reiserfs_file_data_log(inode)) {
3006 		SetPageChecked(page);
3007 		return __set_page_dirty_nobuffers(page);
3008 	}
3009 	return __set_page_dirty_buffers(page);
3010 }
3011 
3012 /*
3013  * Returns 1 if the page's buffers were dropped.  The page is locked.
3014  *
3015  * Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
3016  * in the buffers at page_buffers(page).
3017  *
3018  * even in -o notail mode, we can't be sure an old mount without -o notail
3019  * didn't create files with tails.
3020  */
3021 static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
3022 {
3023 	struct inode *inode = page->mapping->host;
3024 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
3025 	struct buffer_head *head;
3026 	struct buffer_head *bh;
3027 	int ret = 1;
3028 
3029 	WARN_ON(PageChecked(page));
3030 	spin_lock(&j->j_dirty_buffers_lock);
3031 	head = page_buffers(page);
3032 	bh = head;
3033 	do {
3034 		if (bh->b_private) {
3035 			if (!buffer_dirty(bh) && !buffer_locked(bh)) {
3036 				reiserfs_free_jh(bh);
3037 			} else {
3038 				ret = 0;
3039 				break;
3040 			}
3041 		}
3042 		bh = bh->b_this_page;
3043 	} while (bh != head);
3044 	if (ret)
3045 		ret = try_to_free_buffers(page);
3046 	spin_unlock(&j->j_dirty_buffers_lock);
3047 	return ret;
3048 }
3049 
3050 /* We thank Mingming Cao for helping us understand in great detail what
3051    to do in this section of the code. */
3052 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3053 				  const struct iovec *iov, loff_t offset,
3054 				  unsigned long nr_segs)
3055 {
3056 	struct file *file = iocb->ki_filp;
3057 	struct inode *inode = file->f_mapping->host;
3058 
3059 	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3060 				  offset, nr_segs,
3061 				  reiserfs_get_blocks_direct_io, NULL);
3062 }
3063 
3064 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3065 {
3066 	struct inode *inode = dentry->d_inode;
3067 	unsigned int ia_valid;
3068 	int depth;
3069 	int error;
3070 
3071 	/* must be turned off for recursive notify_change calls */
3072 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3073 
3074 	depth = reiserfs_write_lock_once(inode->i_sb);
3075 	if (attr->ia_valid & ATTR_SIZE) {
3076 		/* version 2 items will be caught by the s_maxbytes check
3077 		 ** done for us in vmtruncate
3078 		 */
3079 		if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
3080 		    attr->ia_size > MAX_NON_LFS) {
3081 			error = -EFBIG;
3082 			goto out;
3083 		}
3084 		/* fill in hole pointers in the expanding truncate case. */
3085 		if (attr->ia_size > inode->i_size) {
3086 			error = generic_cont_expand_simple(inode, attr->ia_size);
3087 			if (REISERFS_I(inode)->i_prealloc_count > 0) {
3088 				int err;
3089 				struct reiserfs_transaction_handle th;
3090 				/* we're changing at most 2 bitmaps, inode + super */
3091 				err = journal_begin(&th, inode->i_sb, 4);
3092 				if (!err) {
3093 					reiserfs_discard_prealloc(&th, inode);
3094 					err = journal_end(&th, inode->i_sb, 4);
3095 				}
3096 				if (err)
3097 					error = err;
3098 			}
3099 			if (error)
3100 				goto out;
3101 			/*
3102 			 * file size is changed, ctime and mtime are
3103 			 * to be updated
3104 			 */
3105 			attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME);
3106 		}
3107 	}
3108 
3109 	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) ||
3110 	     ((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
3111 	    (get_inode_sd_version(inode) == STAT_DATA_V1)) {
3112 		/* stat data of format v3.5 has 16 bit uid and gid */
3113 		error = -EINVAL;
3114 		goto out;
3115 	}
3116 
3117 	error = inode_change_ok(inode, attr);
3118 	if (!error) {
3119 		if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3120 		    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3121 			error = reiserfs_chown_xattrs(inode, attr);
3122 
3123 			if (!error) {
3124 				struct reiserfs_transaction_handle th;
3125 				int jbegin_count =
3126 				    2 *
3127 				    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3128 				     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3129 				    2;
3130 
3131 				/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3132 				error =
3133 				    journal_begin(&th, inode->i_sb,
3134 						  jbegin_count);
3135 				if (error)
3136 					goto out;
3137 				error =
3138 				    vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
3139 				if (error) {
3140 					journal_end(&th, inode->i_sb,
3141 						    jbegin_count);
3142 					goto out;
3143 				}
3144 				/* Update corresponding info in inode so that everything is in
3145 				 * one transaction */
3146 				if (attr->ia_valid & ATTR_UID)
3147 					inode->i_uid = attr->ia_uid;
3148 				if (attr->ia_valid & ATTR_GID)
3149 					inode->i_gid = attr->ia_gid;
3150 				mark_inode_dirty(inode);
3151 				error =
3152 				    journal_end(&th, inode->i_sb, jbegin_count);
3153 			}
3154 		}
3155 		if (!error) {
3156 			/*
3157 			 * Relax the lock here, as it might truncate the
3158 			 * inode pages and wait for inode pages locks.
3159 			 * To release such page lock, the owner needs the
3160 			 * reiserfs lock
3161 			 */
3162 			reiserfs_write_unlock_once(inode->i_sb, depth);
3163 			error = inode_setattr(inode, attr);
3164 			depth = reiserfs_write_lock_once(inode->i_sb);
3165 		}
3166 	}
3167 
3168 	if (!error && reiserfs_posixacl(inode->i_sb)) {
3169 		if (attr->ia_valid & ATTR_MODE)
3170 			error = reiserfs_acl_chmod(inode);
3171 	}
3172 
3173       out:
3174 	reiserfs_write_unlock_once(inode->i_sb, depth);
3175 
3176 	return error;
3177 }
3178 
3179 const struct address_space_operations reiserfs_address_space_operations = {
3180 	.writepage = reiserfs_writepage,
3181 	.readpage = reiserfs_readpage,
3182 	.readpages = reiserfs_readpages,
3183 	.releasepage = reiserfs_releasepage,
3184 	.invalidatepage = reiserfs_invalidatepage,
3185 	.sync_page = block_sync_page,
3186 	.write_begin = reiserfs_write_begin,
3187 	.write_end = reiserfs_write_end,
3188 	.bmap = reiserfs_aop_bmap,
3189 	.direct_IO = reiserfs_direct_IO,
3190 	.set_page_dirty = reiserfs_set_page_dirty,
3191 };
3192