xref: /openbmc/linux/fs/reiserfs/stree.c (revision 36ca1195)
1 /*
2  *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /*
6  *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7  *  Programm System Institute
8  *  Pereslavl-Zalessky Russia
9  */
10 
11 /*
12  *  This file contains functions dealing with S+tree
13  *
14  * B_IS_IN_TREE
15  * copy_item_head
16  * comp_short_keys
17  * comp_keys
18  * comp_short_le_keys
19  * le_key2cpu_key
20  * comp_le_keys
21  * bin_search
22  * get_lkey
23  * get_rkey
24  * key_in_buffer
25  * decrement_bcount
26  * decrement_counters_in_path
27  * reiserfs_check_path
28  * pathrelse_and_restore
29  * pathrelse
30  * search_by_key_reada
31  * search_by_key
32  * search_for_position_by_key
33  * comp_items
34  * prepare_for_direct_item
35  * prepare_for_direntry_item
36  * prepare_for_delete_or_cut
37  * calc_deleted_bytes_number
38  * init_tb_struct
39  * padd_item
40  * reiserfs_delete_item
41  * reiserfs_delete_solid_item
42  * reiserfs_delete_object
43  * maybe_indirect_to_direct
44  * indirect_to_direct_roll_back
45  * reiserfs_cut_from_item
46  * truncate_directory
47  * reiserfs_do_truncate
48  * reiserfs_paste_into_item
49  * reiserfs_insert_item
50  */
51 
52 #include <linux/config.h>
53 #include <linux/time.h>
54 #include <linux/string.h>
55 #include <linux/pagemap.h>
56 #include <linux/reiserfs_fs.h>
57 #include <linux/smp_lock.h>
58 #include <linux/buffer_head.h>
59 #include <linux/quotaops.h>
60 
61 /* Does the buffer contain a disk block which is in the tree. */
62 inline int B_IS_IN_TREE (const struct buffer_head * p_s_bh)
63 {
64 
65   RFALSE( B_LEVEL (p_s_bh) > MAX_HEIGHT,
66 	  "PAP-1010: block (%b) has too big level (%z)", p_s_bh, p_s_bh);
67 
68   return ( B_LEVEL (p_s_bh) != FREE_LEVEL );
69 }
70 
71 //
72 // to gets item head in le form
73 //
74 inline void copy_item_head(struct item_head * p_v_to,
75 			   const struct item_head * p_v_from)
76 {
77   memcpy (p_v_to, p_v_from, IH_SIZE);
78 }
79 
80 
81 /* k1 is pointer to on-disk structure which is stored in little-endian
82    form. k2 is pointer to cpu variable. For key of items of the same
83    object this returns 0.
84    Returns: -1 if key1 < key2
85    0 if key1 == key2
86    1 if key1 > key2 */
87 inline int  comp_short_keys (const struct reiserfs_key * le_key,
88 			     const struct cpu_key * cpu_key)
89 {
90   __u32 n;
91   n = le32_to_cpu(le_key->k_dir_id);
92   if (n < cpu_key->on_disk_key.k_dir_id)
93       return -1;
94   if (n > cpu_key->on_disk_key.k_dir_id)
95       return 1;
96   n = le32_to_cpu(le_key->k_objectid);
97   if (n < cpu_key->on_disk_key.k_objectid)
98       return -1;
99   if (n > cpu_key->on_disk_key.k_objectid)
100       return 1;
101   return 0;
102 }
103 
104 /* k1 is pointer to on-disk structure which is stored in little-endian
105    form. k2 is pointer to cpu variable.
106    Compare keys using all 4 key fields.
107    Returns: -1 if key1 < key2 0
108    if key1 = key2 1 if key1 > key2 */
109 static inline int  comp_keys (const struct reiserfs_key * le_key, const struct cpu_key * cpu_key)
110 {
111   int retval;
112 
113   retval = comp_short_keys (le_key, cpu_key);
114   if (retval)
115       return retval;
116   if (le_key_k_offset (le_key_version(le_key), le_key) < cpu_key_k_offset (cpu_key))
117       return -1;
118   if (le_key_k_offset (le_key_version(le_key), le_key) > cpu_key_k_offset (cpu_key))
119       return 1;
120 
121   if (cpu_key->key_length == 3)
122       return 0;
123 
124   /* this part is needed only when tail conversion is in progress */
125   if (le_key_k_type (le_key_version(le_key), le_key) < cpu_key_k_type (cpu_key))
126     return -1;
127 
128   if (le_key_k_type (le_key_version(le_key), le_key) > cpu_key_k_type (cpu_key))
129     return 1;
130 
131   return 0;
132 }
133 
134 
135 inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct reiserfs_key * key2)
136 {
137   __u32 * p_s_1_u32, * p_s_2_u32;
138   int n_key_length = REISERFS_SHORT_KEY_LEN;
139 
140   p_s_1_u32 = (__u32 *)key1;
141   p_s_2_u32 = (__u32 *)key2;
142   for( ; n_key_length--; ++p_s_1_u32, ++p_s_2_u32 ) {
143     if ( le32_to_cpu (*p_s_1_u32) < le32_to_cpu (*p_s_2_u32) )
144       return -1;
145     if ( le32_to_cpu (*p_s_1_u32) > le32_to_cpu (*p_s_2_u32) )
146       return 1;
147   }
148   return 0;
149 }
150 
151 inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from)
152 {
153     int version;
154     to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
155     to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
156 
157     // find out version of the key
158     version = le_key_version (from);
159     to->version = version;
160     to->on_disk_key.k_offset = le_key_k_offset(version, from);
161     to->on_disk_key.k_type = le_key_k_type(version, from);
162 }
163 
164 
165 
166 // this does not say which one is bigger, it only returns 1 if keys
167 // are not equal, 0 otherwise
168 inline int comp_le_keys (const struct reiserfs_key * k1, const struct reiserfs_key * k2)
169 {
170     return memcmp (k1, k2, sizeof (struct reiserfs_key));
171 }
172 
173 /**************************************************************************
174  *  Binary search toolkit function                                        *
175  *  Search for an item in the array by the item key                       *
176  *  Returns:    1 if found,  0 if not found;                              *
177  *        *p_n_pos = number of the searched element if found, else the    *
178  *        number of the first element that is larger than p_v_key.        *
179  **************************************************************************/
180 /* For those not familiar with binary search: n_lbound is the leftmost item that it
181  could be, n_rbound the rightmost item that it could be.  We examine the item
182  halfway between n_lbound and n_rbound, and that tells us either that we can increase
183  n_lbound, or decrease n_rbound, or that we have found it, or if n_lbound <= n_rbound that
184  there are no possible items, and we have not found it. With each examination we
185  cut the number of possible items it could be by one more than half rounded down,
186  or we find it. */
187 static inline	int bin_search (
188               const void * p_v_key, /* Key to search for.                   */
189 	      const void * p_v_base,/* First item in the array.             */
190 	      int       p_n_num,    /* Number of items in the array.        */
191 	      int       p_n_width,  /* Item size in the array.
192 				       searched. Lest the reader be
193 				       confused, note that this is crafted
194 				       as a general function, and when it
195 				       is applied specifically to the array
196 				       of item headers in a node, p_n_width
197 				       is actually the item header size not
198 				       the item size.                      */
199 	      int     * p_n_pos     /* Number of the searched for element. */
200             ) {
201     int   n_rbound, n_lbound, n_j;
202 
203    for ( n_j = ((n_rbound = p_n_num - 1) + (n_lbound = 0))/2; n_lbound <= n_rbound; n_j = (n_rbound + n_lbound)/2 )
204      switch( comp_keys((struct reiserfs_key *)((char * )p_v_base + n_j * p_n_width), (struct cpu_key *)p_v_key) )  {
205      case -1: n_lbound = n_j + 1; continue;
206      case  1: n_rbound = n_j - 1; continue;
207      case  0: *p_n_pos = n_j;     return ITEM_FOUND; /* Key found in the array.  */
208         }
209 
210     /* bin_search did not find given key, it returns position of key,
211         that is minimal and greater than the given one. */
212     *p_n_pos = n_lbound;
213     return ITEM_NOT_FOUND;
214 }
215 
216 #ifdef CONFIG_REISERFS_CHECK
217 extern struct tree_balance * cur_tb;
218 #endif
219 
220 
221 
222 /* Minimal possible key. It is never in the tree. */
223 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
224 
225 /* Maximal possible key. It is never in the tree. */
226 const struct reiserfs_key  MAX_KEY = {
227 	__constant_cpu_to_le32(0xffffffff),
228 	__constant_cpu_to_le32(0xffffffff),
229 	{{__constant_cpu_to_le32(0xffffffff),
230 	__constant_cpu_to_le32(0xffffffff)},}
231 };
232 
233 
234 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
235    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
236    the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
237    case we return a special key, either MIN_KEY or MAX_KEY. */
238 static inline	const struct  reiserfs_key * get_lkey  (
239 	                const struct path         * p_s_chk_path,
240                         const struct super_block  * p_s_sb
241                       ) {
242   int                   n_position, n_path_offset = p_s_chk_path->path_length;
243   struct buffer_head  * p_s_parent;
244 
245   RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
246 	  "PAP-5010: invalid offset in the path");
247 
248   /* While not higher in path than first element. */
249   while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
250 
251     RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
252 	    "PAP-5020: parent is not uptodate");
253 
254     /* Parent at the path is not in the tree now. */
255     if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
256       return &MAX_KEY;
257     /* Check whether position in the parent is correct. */
258     if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
259        return &MAX_KEY;
260     /* Check whether parent at the path really points to the child. */
261     if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
262 	 PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
263       return &MAX_KEY;
264     /* Return delimiting key if position in the parent is not equal to zero. */
265     if ( n_position )
266       return B_N_PDELIM_KEY(p_s_parent, n_position - 1);
267   }
268   /* Return MIN_KEY if we are in the root of the buffer tree. */
269   if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
270        SB_ROOT_BLOCK (p_s_sb) )
271     return &MIN_KEY;
272   return  &MAX_KEY;
273 }
274 
275 
276 /* Get delimiting key of the buffer at the path and its right neighbor. */
277 inline	const struct  reiserfs_key * get_rkey  (
278 	                const struct path         * p_s_chk_path,
279                         const struct super_block  * p_s_sb
280                       ) {
281   int                   n_position,
282     			n_path_offset = p_s_chk_path->path_length;
283   struct buffer_head  * p_s_parent;
284 
285   RFALSE( n_path_offset < FIRST_PATH_ELEMENT_OFFSET,
286 	  "PAP-5030: invalid offset in the path");
287 
288   while ( n_path_offset-- > FIRST_PATH_ELEMENT_OFFSET ) {
289 
290     RFALSE( ! buffer_uptodate(PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)),
291 	    "PAP-5040: parent is not uptodate");
292 
293     /* Parent at the path is not in the tree now. */
294     if ( ! B_IS_IN_TREE(p_s_parent = PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset)) )
295       return &MIN_KEY;
296     /* Check whether position in the parent is correct. */
297     if ( (n_position = PATH_OFFSET_POSITION(p_s_chk_path, n_path_offset)) > B_NR_ITEMS(p_s_parent) )
298       return &MIN_KEY;
299     /* Check whether parent at the path really points to the child. */
300     if ( B_N_CHILD_NUM(p_s_parent, n_position) !=
301                                         PATH_OFFSET_PBUFFER(p_s_chk_path, n_path_offset + 1)->b_blocknr )
302       return &MIN_KEY;
303     /* Return delimiting key if position in the parent is not the last one. */
304     if ( n_position != B_NR_ITEMS(p_s_parent) )
305       return B_N_PDELIM_KEY(p_s_parent, n_position);
306   }
307   /* Return MAX_KEY if we are in the root of the buffer tree. */
308   if ( PATH_OFFSET_PBUFFER(p_s_chk_path, FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
309        SB_ROOT_BLOCK (p_s_sb) )
310     return &MAX_KEY;
311   return  &MIN_KEY;
312 }
313 
314 
315 /* Check whether a key is contained in the tree rooted from a buffer at a path. */
316 /* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
317    the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
318    buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
319    this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
320 static  inline  int key_in_buffer (
321                       struct path         * p_s_chk_path, /* Path which should be checked.  */
322                       const struct cpu_key      * p_s_key,      /* Key which should be checked.   */
323                       struct super_block  * p_s_sb        /* Super block pointer.           */
324 		      ) {
325 
326   RFALSE( ! p_s_key || p_s_chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET ||
327 	  p_s_chk_path->path_length > MAX_HEIGHT,
328 	  "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
329 	  p_s_key, p_s_chk_path->path_length);
330   RFALSE( !PATH_PLAST_BUFFER(p_s_chk_path)->b_bdev,
331 	  "PAP-5060: device must not be NODEV");
332 
333   if ( comp_keys(get_lkey(p_s_chk_path, p_s_sb), p_s_key) == 1 )
334     /* left delimiting key is bigger, that the key we look for */
335     return 0;
336   //  if ( comp_keys(p_s_key, get_rkey(p_s_chk_path, p_s_sb)) != -1 )
337   if ( comp_keys(get_rkey(p_s_chk_path, p_s_sb), p_s_key) != 1 )
338     /* p_s_key must be less than right delimitiing key */
339     return 0;
340   return 1;
341 }
342 
343 
344 inline void decrement_bcount(
345               struct buffer_head  * p_s_bh
346             ) {
347   if ( p_s_bh ) {
348     if ( atomic_read (&(p_s_bh->b_count)) ) {
349       put_bh(p_s_bh) ;
350       return;
351     }
352     reiserfs_panic(NULL, "PAP-5070: decrement_bcount: trying to free free buffer %b", p_s_bh);
353   }
354 }
355 
356 
357 /* Decrement b_count field of the all buffers in the path. */
358 void decrement_counters_in_path (
359               struct path * p_s_search_path
360             ) {
361   int n_path_offset = p_s_search_path->path_length;
362 
363   RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET ||
364 	  n_path_offset > EXTENDED_MAX_HEIGHT - 1,
365 	  "PAP-5080: invalid path offset of %d", n_path_offset);
366 
367   while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET ) {
368     struct buffer_head * bh;
369 
370     bh = PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--);
371     decrement_bcount (bh);
372   }
373   p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
374 }
375 
376 
377 int reiserfs_check_path(struct path *p) {
378   RFALSE( p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
379 	  "path not properly relsed") ;
380   return 0 ;
381 }
382 
383 
384 /* Release all buffers in the path. Restore dirty bits clean
385 ** when preparing the buffer for the log
386 **
387 ** only called from fix_nodes()
388 */
389 void  pathrelse_and_restore (
390 	struct super_block *s,
391         struct path * p_s_search_path
392       ) {
393   int n_path_offset = p_s_search_path->path_length;
394 
395   RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
396 	  "clm-4000: invalid path offset");
397 
398   while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )  {
399     reiserfs_restore_prepared_buffer(s, PATH_OFFSET_PBUFFER(p_s_search_path,
400                                      n_path_offset));
401     brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
402   }
403   p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
404 }
405 
406 /* Release all buffers in the path. */
407 void  pathrelse (
408         struct path * p_s_search_path
409       ) {
410   int n_path_offset = p_s_search_path->path_length;
411 
412   RFALSE( n_path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
413 	  "PAP-5090: invalid path offset");
414 
415   while ( n_path_offset > ILLEGAL_PATH_ELEMENT_OFFSET )
416     brelse(PATH_OFFSET_PBUFFER(p_s_search_path, n_path_offset--));
417 
418   p_s_search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
419 }
420 
421 
422 
423 static int is_leaf (char * buf, int blocksize, struct buffer_head * bh)
424 {
425     struct block_head * blkh;
426     struct item_head * ih;
427     int used_space;
428     int prev_location;
429     int i;
430     int nr;
431 
432     blkh = (struct block_head *)buf;
433     if ( blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
434 	reiserfs_warning (NULL, "is_leaf: this should be caught earlier");
435 	return 0;
436     }
437 
438     nr = blkh_nr_item(blkh);
439     if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
440 	/* item number is too big or too small */
441 	reiserfs_warning (NULL, "is_leaf: nr_item seems wrong: %z", bh);
442 	return 0;
443     }
444     ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
445     used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location (ih));
446     if (used_space != blocksize - blkh_free_space(blkh)) {
447 	/* free space does not match to calculated amount of use space */
448 	reiserfs_warning (NULL, "is_leaf: free space seems wrong: %z", bh);
449 	return 0;
450     }
451 
452     // FIXME: it is_leaf will hit performance too much - we may have
453     // return 1 here
454 
455     /* check tables of item heads */
456     ih = (struct item_head *)(buf + BLKH_SIZE);
457     prev_location = blocksize;
458     for (i = 0; i < nr; i ++, ih ++) {
459 	if ( le_ih_k_type(ih) == TYPE_ANY) {
460 	    reiserfs_warning (NULL, "is_leaf: wrong item type for item %h",ih);
461 	    return 0;
462 	}
463 	if (ih_location (ih) >= blocksize || ih_location (ih) < IH_SIZE * nr) {
464 	    reiserfs_warning (NULL, "is_leaf: item location seems wrong: %h", ih);
465 	    return 0;
466 	}
467 	if (ih_item_len (ih) < 1 || ih_item_len (ih) > MAX_ITEM_LEN (blocksize)) {
468 	    reiserfs_warning (NULL, "is_leaf: item length seems wrong: %h", ih);
469 	    return 0;
470 	}
471 	if (prev_location - ih_location (ih) != ih_item_len (ih)) {
472 	    reiserfs_warning (NULL, "is_leaf: item location seems wrong (second one): %h", ih);
473 	    return 0;
474 	}
475 	prev_location = ih_location (ih);
476     }
477 
478     // one may imagine much more checks
479     return 1;
480 }
481 
482 
483 /* returns 1 if buf looks like an internal node, 0 otherwise */
484 static int is_internal (char * buf, int blocksize, struct buffer_head * bh)
485 {
486     struct block_head * blkh;
487     int nr;
488     int used_space;
489 
490     blkh = (struct block_head *)buf;
491     nr = blkh_level(blkh);
492     if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
493 	/* this level is not possible for internal nodes */
494 	reiserfs_warning (NULL, "is_internal: this should be caught earlier");
495 	return 0;
496     }
497 
498     nr = blkh_nr_item(blkh);
499     if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
500 	/* for internal which is not root we might check min number of keys */
501 	reiserfs_warning (NULL, "is_internal: number of key seems wrong: %z", bh);
502 	return 0;
503     }
504 
505     used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
506     if (used_space != blocksize - blkh_free_space(blkh)) {
507 	reiserfs_warning (NULL, "is_internal: free space seems wrong: %z", bh);
508 	return 0;
509     }
510 
511     // one may imagine much more checks
512     return 1;
513 }
514 
515 
516 // make sure that bh contains formatted node of reiserfs tree of
517 // 'level'-th level
518 static int is_tree_node (struct buffer_head * bh, int level)
519 {
520     if (B_LEVEL (bh) != level) {
521 	reiserfs_warning (NULL, "is_tree_node: node level %d does not match to the expected one %d",
522 		B_LEVEL (bh), level);
523 	return 0;
524     }
525     if (level == DISK_LEAF_NODE_LEVEL)
526 	return is_leaf (bh->b_data, bh->b_size, bh);
527 
528     return is_internal (bh->b_data, bh->b_size, bh);
529 }
530 
531 
532 
533 #define SEARCH_BY_KEY_READA 16
534 
535 /* The function is NOT SCHEDULE-SAFE! */
536 static void search_by_key_reada (struct super_block * s,
537                                  struct buffer_head **bh,
538 				 unsigned long *b, int num)
539 {
540     int i,j;
541 
542     for (i = 0 ; i < num ; i++) {
543 	bh[i] = sb_getblk (s, b[i]);
544     }
545     for (j = 0 ; j < i ; j++) {
546 	/*
547 	 * note, this needs attention if we are getting rid of the BKL
548 	 * you have to make sure the prepared bit isn't set on this buffer
549 	 */
550 	if (!buffer_uptodate(bh[j]))
551 	    ll_rw_block(READA, 1, bh + j);
552     	brelse(bh[j]);
553     }
554 }
555 
556 /**************************************************************************
557  * Algorithm   SearchByKey                                                *
558  *             look for item in the Disk S+Tree by its key                *
559  * Input:  p_s_sb   -  super block                                        *
560  *         p_s_key  - pointer to the key to search                        *
561  * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
562  *         p_s_search_path - path from the root to the needed leaf        *
563  **************************************************************************/
564 
565 /* This function fills up the path from the root to the leaf as it
566    descends the tree looking for the key.  It uses reiserfs_bread to
567    try to find buffers in the cache given their block number.  If it
568    does not find them in the cache it reads them from disk.  For each
569    node search_by_key finds using reiserfs_bread it then uses
570    bin_search to look through that node.  bin_search will find the
571    position of the block_number of the next node if it is looking
572    through an internal node.  If it is looking through a leaf node
573    bin_search will find the position of the item which has key either
574    equal to given key, or which is the maximal key less than the given
575    key.  search_by_key returns a path that must be checked for the
576    correctness of the top of the path but need not be checked for the
577    correctness of the bottom of the path */
578 /* The function is NOT SCHEDULE-SAFE! */
579 int search_by_key (struct super_block * p_s_sb,
580 		   const struct cpu_key * p_s_key, /* Key to search. */
581 		   struct path * p_s_search_path, /* This structure was
582 						     allocated and initialized
583 						     by the calling
584 						     function. It is filled up
585 						     by this function.  */
586 		   int n_stop_level /* How far down the tree to search. To
587                                        stop at leaf level - set to
588                                        DISK_LEAF_NODE_LEVEL */
589     ) {
590     int  n_block_number;
591     int  expected_level;
592     struct buffer_head  *       p_s_bh;
593     struct path_element *       p_s_last_element;
594     int				n_node_level, n_retval;
595     int 			right_neighbor_of_leaf_node;
596     int				fs_gen;
597     struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
598     unsigned long      reada_blocks[SEARCH_BY_KEY_READA];
599     int reada_count = 0;
600 
601 #ifdef CONFIG_REISERFS_CHECK
602     int n_repeat_counter = 0;
603 #endif
604 
605     PROC_INFO_INC( p_s_sb, search_by_key );
606 
607     /* As we add each node to a path we increase its count.  This means that
608        we must be careful to release all nodes in a path before we either
609        discard the path struct or re-use the path struct, as we do here. */
610 
611     decrement_counters_in_path(p_s_search_path);
612 
613     right_neighbor_of_leaf_node = 0;
614 
615     /* With each iteration of this loop we search through the items in the
616        current node, and calculate the next current node(next path element)
617        for the next iteration of this loop.. */
618     n_block_number = SB_ROOT_BLOCK (p_s_sb);
619     expected_level = -1;
620     while ( 1 ) {
621 
622 #ifdef CONFIG_REISERFS_CHECK
623 	if ( !(++n_repeat_counter % 50000) )
624 	    reiserfs_warning (p_s_sb, "PAP-5100: search_by_key: %s:"
625 			      "there were %d iterations of while loop "
626 			      "looking for key %K",
627 			      current->comm, n_repeat_counter, p_s_key);
628 #endif
629 
630 	/* prep path to have another element added to it. */
631 	p_s_last_element = PATH_OFFSET_PELEMENT(p_s_search_path, ++p_s_search_path->path_length);
632 	fs_gen = get_generation (p_s_sb);
633 
634 	/* Read the next tree node, and set the last element in the path to
635            have a pointer to it. */
636 	if ((p_s_bh = p_s_last_element->pe_buffer =
637 	     sb_getblk(p_s_sb, n_block_number)) ) {
638 	    if (!buffer_uptodate(p_s_bh) && reada_count > 1) {
639 		search_by_key_reada (p_s_sb, reada_bh,
640 		                     reada_blocks, reada_count);
641 	    }
642 	    ll_rw_block(READ, 1, &p_s_bh);
643 	    wait_on_buffer(p_s_bh);
644 	    if (!buffer_uptodate(p_s_bh))
645 	        goto io_error;
646 	} else {
647 io_error:
648 	    p_s_search_path->path_length --;
649 	    pathrelse(p_s_search_path);
650 	    return IO_ERROR;
651 	}
652 	reada_count = 0;
653 	if (expected_level == -1)
654 		expected_level = SB_TREE_HEIGHT (p_s_sb);
655 	expected_level --;
656 
657 	/* It is possible that schedule occurred. We must check whether the key
658 	   to search is still in the tree rooted from the current buffer. If
659 	   not then repeat search from the root. */
660 	if ( fs_changed (fs_gen, p_s_sb) &&
661 	    (!B_IS_IN_TREE (p_s_bh) ||
662 	     B_LEVEL(p_s_bh) != expected_level ||
663 	     !key_in_buffer(p_s_search_path, p_s_key, p_s_sb))) {
664 	    PROC_INFO_INC( p_s_sb, search_by_key_fs_changed );
665 	    PROC_INFO_INC( p_s_sb, search_by_key_restarted );
666 	    PROC_INFO_INC( p_s_sb, sbk_restarted[ expected_level - 1 ] );
667 	    decrement_counters_in_path(p_s_search_path);
668 
669 	    /* Get the root block number so that we can repeat the search
670 	       starting from the root. */
671 	    n_block_number = SB_ROOT_BLOCK (p_s_sb);
672 	    expected_level = -1;
673 	    right_neighbor_of_leaf_node = 0;
674 
675 	    /* repeat search from the root */
676 	    continue;
677 	}
678 
679         /* only check that the key is in the buffer if p_s_key is not
680            equal to the MAX_KEY. Latter case is only possible in
681            "finish_unfinished()" processing during mount. */
682         RFALSE( comp_keys( &MAX_KEY, p_s_key ) &&
683                 ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb),
684 		"PAP-5130: key is not in the buffer");
685 #ifdef CONFIG_REISERFS_CHECK
686 	if ( cur_tb ) {
687 	    print_cur_tb ("5140");
688 	    reiserfs_panic(p_s_sb, "PAP-5140: search_by_key: schedule occurred in do_balance!");
689 	}
690 #endif
691 
692 	// make sure, that the node contents look like a node of
693 	// certain level
694 	if (!is_tree_node (p_s_bh, expected_level)) {
695 	    reiserfs_warning (p_s_sb, "vs-5150: search_by_key: "
696 			      "invalid format found in block %ld. Fsck?",
697 			      p_s_bh->b_blocknr);
698 	    pathrelse (p_s_search_path);
699 	    return IO_ERROR;
700 	}
701 
702 	/* ok, we have acquired next formatted node in the tree */
703 	n_node_level = B_LEVEL (p_s_bh);
704 
705 	PROC_INFO_BH_STAT( p_s_sb, p_s_bh, n_node_level - 1 );
706 
707 	RFALSE( n_node_level < n_stop_level,
708 		"vs-5152: tree level (%d) is less than stop level (%d)",
709 		n_node_level, n_stop_level);
710 
711 	n_retval = bin_search( p_s_key, B_N_PITEM_HEAD(p_s_bh, 0),
712                 B_NR_ITEMS(p_s_bh),
713                 ( n_node_level == DISK_LEAF_NODE_LEVEL ) ? IH_SIZE : KEY_SIZE,
714                 &(p_s_last_element->pe_position));
715 	if (n_node_level == n_stop_level) {
716 	    return n_retval;
717 	}
718 
719 	/* we are not in the stop level */
720 	if (n_retval == ITEM_FOUND)
721 	    /* item has been found, so we choose the pointer which is to the right of the found one */
722 	    p_s_last_element->pe_position++;
723 
724 	/* if item was not found we choose the position which is to
725 	   the left of the found item. This requires no code,
726 	   bin_search did it already.*/
727 
728 	/* So we have chosen a position in the current node which is
729 	   an internal node.  Now we calculate child block number by
730 	   position in the node. */
731 	n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
732 
733 	/* if we are going to read leaf nodes, try for read ahead as well */
734 	if ((p_s_search_path->reada & PATH_READA) &&
735 	    n_node_level == DISK_LEAF_NODE_LEVEL + 1)
736 	{
737 	    int pos = p_s_last_element->pe_position;
738 	    int limit = B_NR_ITEMS(p_s_bh);
739 	    struct reiserfs_key *le_key;
740 
741 	    if (p_s_search_path->reada & PATH_READA_BACK)
742 		limit = 0;
743 	    while(reada_count < SEARCH_BY_KEY_READA) {
744 		if (pos == limit)
745 		    break;
746 	        reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos);
747 		if (p_s_search_path->reada & PATH_READA_BACK)
748 		    pos--;
749 		else
750 		    pos++;
751 
752 		/*
753 		 * check to make sure we're in the same object
754 		 */
755 		le_key = B_N_PDELIM_KEY(p_s_bh, pos);
756 		if (le32_to_cpu(le_key->k_objectid) !=
757 		    p_s_key->on_disk_key.k_objectid)
758 		{
759 		    break;
760 		}
761 	    }
762         }
763     }
764 }
765 
766 
767 /* Form the path to an item and position in this item which contains
768    file byte defined by p_s_key. If there is no such item
769    corresponding to the key, we point the path to the item with
770    maximal key less than p_s_key, and *p_n_pos_in_item is set to one
771    past the last entry/byte in the item.  If searching for entry in a
772    directory item, and it is not found, *p_n_pos_in_item is set to one
773    entry more than the entry with maximal key which is less than the
774    sought key.
775 
776    Note that if there is no entry in this same node which is one more,
777    then we point to an imaginary entry.  for direct items, the
778    position is in units of bytes, for indirect items the position is
779    in units of blocknr entries, for directory items the position is in
780    units of directory entries.  */
781 
782 /* The function is NOT SCHEDULE-SAFE! */
783 int search_for_position_by_key (struct super_block  * p_s_sb,         /* Pointer to the super block.          */
784 				const struct cpu_key  * p_cpu_key,      /* Key to search (cpu variable)         */
785 				struct path         * p_s_search_path /* Filled up by this function.          */
786     ) {
787     struct item_head    * p_le_ih; /* pointer to on-disk structure */
788     int                   n_blk_size;
789     loff_t item_offset, offset;
790     struct reiserfs_dir_entry de;
791     int retval;
792 
793     /* If searching for directory entry. */
794     if ( is_direntry_cpu_key (p_cpu_key) )
795 	return  search_by_entry_key (p_s_sb, p_cpu_key, p_s_search_path, &de);
796 
797     /* If not searching for directory entry. */
798 
799     /* If item is found. */
800     retval = search_item (p_s_sb, p_cpu_key, p_s_search_path);
801     if (retval == IO_ERROR)
802 	return retval;
803     if ( retval == ITEM_FOUND )  {
804 
805 	RFALSE( ! ih_item_len(
806                 B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path),
807 			       PATH_LAST_POSITION(p_s_search_path))),
808 	        "PAP-5165: item length equals zero");
809 
810 	pos_in_item(p_s_search_path) = 0;
811 	return POSITION_FOUND;
812     }
813 
814     RFALSE( ! PATH_LAST_POSITION(p_s_search_path),
815 	    "PAP-5170: position equals zero");
816 
817     /* Item is not found. Set path to the previous item. */
818     p_le_ih = B_N_PITEM_HEAD(PATH_PLAST_BUFFER(p_s_search_path), --PATH_LAST_POSITION(p_s_search_path));
819     n_blk_size = p_s_sb->s_blocksize;
820 
821     if (comp_short_keys (&(p_le_ih->ih_key), p_cpu_key)) {
822 	return FILE_NOT_FOUND;
823     }
824 
825     // FIXME: quite ugly this far
826 
827     item_offset = le_ih_k_offset (p_le_ih);
828     offset = cpu_key_k_offset (p_cpu_key);
829 
830     /* Needed byte is contained in the item pointed to by the path.*/
831     if (item_offset <= offset &&
832 	item_offset + op_bytes_number (p_le_ih, n_blk_size) > offset) {
833 	pos_in_item (p_s_search_path) = offset - item_offset;
834 	if ( is_indirect_le_ih(p_le_ih) ) {
835 	    pos_in_item (p_s_search_path) /= n_blk_size;
836 	}
837 	return POSITION_FOUND;
838     }
839 
840     /* Needed byte is not contained in the item pointed to by the
841      path. Set pos_in_item out of the item. */
842     if ( is_indirect_le_ih (p_le_ih) )
843 	pos_in_item (p_s_search_path) = ih_item_len(p_le_ih) / UNFM_P_SIZE;
844     else
845         pos_in_item (p_s_search_path) = ih_item_len( p_le_ih );
846 
847     return POSITION_NOT_FOUND;
848 }
849 
850 
851 /* Compare given item and item pointed to by the path. */
852 int comp_items (const struct item_head * stored_ih, const struct path * p_s_path)
853 {
854     struct buffer_head  * p_s_bh;
855     struct item_head    * ih;
856 
857     /* Last buffer at the path is not in the tree. */
858     if ( ! B_IS_IN_TREE(p_s_bh = PATH_PLAST_BUFFER(p_s_path)) )
859 	return 1;
860 
861     /* Last path position is invalid. */
862     if ( PATH_LAST_POSITION(p_s_path) >= B_NR_ITEMS(p_s_bh) )
863 	return 1;
864 
865     /* we need only to know, whether it is the same item */
866     ih = get_ih (p_s_path);
867     return memcmp (stored_ih, ih, IH_SIZE);
868 }
869 
870 
871 /* unformatted nodes are not logged anymore, ever.  This is safe
872 ** now
873 */
874 #define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
875 
876 // block can not be forgotten as it is in I/O or held by someone
877 #define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
878 
879 
880 
881 // prepare for delete or cut of direct item
882 static inline int prepare_for_direct_item (struct path * path,
883 					   struct item_head * le_ih,
884 					   struct inode * inode,
885 					   loff_t new_file_length,
886 					   int * cut_size)
887 {
888     loff_t round_len;
889 
890 
891     if ( new_file_length == max_reiserfs_offset (inode) ) {
892 	/* item has to be deleted */
893 	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
894 	return M_DELETE;
895     }
896 
897     // new file gets truncated
898     if (get_inode_item_key_version (inode) == KEY_FORMAT_3_6) {
899 	//
900 	round_len = ROUND_UP (new_file_length);
901 	/* this was n_new_file_length < le_ih ... */
902 	if ( round_len < le_ih_k_offset (le_ih) )  {
903 	    *cut_size = -(IH_SIZE + ih_item_len(le_ih));
904 	    return M_DELETE; /* Delete this item. */
905 	}
906 	/* Calculate first position and size for cutting from item. */
907 	pos_in_item (path) = round_len - (le_ih_k_offset (le_ih) - 1);
908 	*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
909 
910 	return M_CUT; /* Cut from this item. */
911     }
912 
913 
914     // old file: items may have any length
915 
916     if ( new_file_length < le_ih_k_offset (le_ih) )  {
917 	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
918 	return M_DELETE; /* Delete this item. */
919     }
920     /* Calculate first position and size for cutting from item. */
921     *cut_size = -(ih_item_len(le_ih) -
922 		      (pos_in_item (path) = new_file_length + 1 - le_ih_k_offset (le_ih)));
923     return M_CUT; /* Cut from this item. */
924 }
925 
926 
927 static inline int prepare_for_direntry_item (struct path * path,
928 					     struct item_head * le_ih,
929 					     struct inode * inode,
930 					     loff_t new_file_length,
931 					     int * cut_size)
932 {
933     if (le_ih_k_offset (le_ih) == DOT_OFFSET &&
934 	new_file_length == max_reiserfs_offset (inode)) {
935 	RFALSE( ih_entry_count (le_ih) != 2,
936 	        "PAP-5220: incorrect empty directory item (%h)", le_ih);
937 	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
938 	return M_DELETE; /* Delete the directory item containing "." and ".." entry. */
939     }
940 
941     if ( ih_entry_count (le_ih) == 1 )  {
942 	/* Delete the directory item such as there is one record only
943 	   in this item*/
944 	*cut_size = -(IH_SIZE + ih_item_len(le_ih));
945 	return M_DELETE;
946     }
947 
948     /* Cut one record from the directory item. */
949     *cut_size = -(DEH_SIZE + entry_length (get_last_bh (path), le_ih, pos_in_item (path)));
950     return M_CUT;
951 }
952 
953 
954 /*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
955     If the path points to an indirect item, remove some number of its unformatted nodes.
956     In case of file truncate calculate whether this item must be deleted/truncated or last
957     unformatted node of this item will be converted to a direct item.
958     This function returns a determination of what balance mode the calling function should employ. */
959 static char  prepare_for_delete_or_cut(
960 				       struct reiserfs_transaction_handle *th,
961 				       struct inode * inode,
962 				       struct path         * p_s_path,
963 				       const struct cpu_key      * p_s_item_key,
964 				       int                 * p_n_removed,      /* Number of unformatted nodes which were removed
965 										  from end of the file. */
966 				       int                 * p_n_cut_size,
967 				       unsigned long long    n_new_file_length /* MAX_KEY_OFFSET in case of delete. */
968     ) {
969     struct super_block  * p_s_sb = inode->i_sb;
970     struct item_head    * p_le_ih = PATH_PITEM_HEAD(p_s_path);
971     struct buffer_head  * p_s_bh = PATH_PLAST_BUFFER(p_s_path);
972 
973     BUG_ON (!th->t_trans_id);
974 
975     /* Stat_data item. */
976     if ( is_statdata_le_ih (p_le_ih) ) {
977 
978 	RFALSE( n_new_file_length != max_reiserfs_offset (inode),
979 		"PAP-5210: mode must be M_DELETE");
980 
981 	*p_n_cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
982 	return M_DELETE;
983     }
984 
985 
986     /* Directory item. */
987     if ( is_direntry_le_ih (p_le_ih) )
988 	return prepare_for_direntry_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
989 
990     /* Direct item. */
991     if ( is_direct_le_ih (p_le_ih) )
992 	return prepare_for_direct_item (p_s_path, p_le_ih, inode, n_new_file_length, p_n_cut_size);
993 
994 
995     /* Case of an indirect item. */
996     {
997 	int                   n_unfm_number,    /* Number of the item unformatted nodes. */
998 	    n_counter,
999 	    n_blk_size;
1000 	__le32               * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
1001 	__u32 tmp;
1002 	struct item_head      s_ih;           /* Item header. */
1003 	char                  c_mode;           /* Returned mode of the balance. */
1004 	int need_research;
1005 
1006 
1007 	n_blk_size = p_s_sb->s_blocksize;
1008 
1009 	/* Search for the needed object indirect item until there are no unformatted nodes to be removed. */
1010 	do  {
1011 	    need_research = 0;
1012             p_s_bh = PATH_PLAST_BUFFER(p_s_path);
1013 	    /* Copy indirect item header to a temp variable. */
1014 	    copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1015 	    /* Calculate number of unformatted nodes in this item. */
1016 	    n_unfm_number = I_UNFM_NUM(&s_ih);
1017 
1018 	    RFALSE( ! is_indirect_le_ih(&s_ih) || ! n_unfm_number ||
1019 		    pos_in_item (p_s_path) + 1 !=  n_unfm_number,
1020 		    "PAP-5240: invalid item %h "
1021 		    "n_unfm_number = %d *p_n_pos_in_item = %d",
1022 		    &s_ih, n_unfm_number, pos_in_item (p_s_path));
1023 
1024 	    /* Calculate balance mode and position in the item to remove unformatted nodes. */
1025 	    if ( n_new_file_length == max_reiserfs_offset (inode) ) {/* Case of delete. */
1026 		pos_in_item (p_s_path) = 0;
1027 		*p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1028 		c_mode = M_DELETE;
1029 	    }
1030 	    else  { /* Case of truncate. */
1031 		if ( n_new_file_length < le_ih_k_offset (&s_ih) )  {
1032 		    pos_in_item (p_s_path) = 0;
1033 		    *p_n_cut_size = -(IH_SIZE + ih_item_len(&s_ih));
1034 		    c_mode = M_DELETE; /* Delete this item. */
1035 		}
1036 		else  {
1037 		    /* indirect item must be truncated starting from *p_n_pos_in_item-th position */
1038 		    pos_in_item (p_s_path) = (n_new_file_length + n_blk_size - le_ih_k_offset (&s_ih) ) >> p_s_sb->s_blocksize_bits;
1039 
1040 		    RFALSE( pos_in_item (p_s_path) > n_unfm_number,
1041 			    "PAP-5250: invalid position in the item");
1042 
1043 		    /* Either convert last unformatted node of indirect item to direct item or increase
1044 		       its free space.  */
1045 		    if ( pos_in_item (p_s_path) == n_unfm_number )  {
1046 			*p_n_cut_size = 0; /* Nothing to cut. */
1047 			return M_CONVERT; /* Maybe convert last unformatted node to the direct item. */
1048 		    }
1049 		    /* Calculate size to cut. */
1050 		    *p_n_cut_size = -(ih_item_len(&s_ih) - pos_in_item(p_s_path) * UNFM_P_SIZE);
1051 
1052 		    c_mode = M_CUT;     /* Cut from this indirect item. */
1053 		}
1054 	    }
1055 
1056 	    RFALSE( n_unfm_number <= pos_in_item (p_s_path),
1057 		    "PAP-5260: invalid position in the indirect item");
1058 
1059 	    /* pointers to be cut */
1060 	    n_unfm_number -= pos_in_item (p_s_path);
1061 	    /* Set pointer to the last unformatted node pointer that is to be cut. */
1062 	    p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
1063 
1064 
1065 	    /* We go through the unformatted nodes pointers of the indirect
1066 	       item and look for the unformatted nodes in the cache. If we
1067 	       found some of them we free it, zero corresponding indirect item
1068 	       entry and log buffer containing that indirect item. For this we
1069 	       need to prepare last path element for logging. If some
1070 	       unformatted node has b_count > 1 we must not free this
1071 	       unformatted node since it is in use. */
1072 	    reiserfs_prepare_for_journal(p_s_sb, p_s_bh, 1);
1073 	    // note: path could be changed, first line in for loop takes care
1074 	    // of it
1075 
1076 	    for (n_counter = *p_n_removed;
1077 		 n_counter < n_unfm_number; n_counter++, p_n_unfm_pointer-- ) {
1078 
1079 		cond_resched();
1080 		if (item_moved (&s_ih, p_s_path)) {
1081 		    need_research = 1 ;
1082 		    break;
1083 		}
1084 		RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
1085 			p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
1086 			"vs-5265: pointer out of range");
1087 
1088 		/* Hole, nothing to remove. */
1089 		if ( ! get_block_num(p_n_unfm_pointer,0) )  {
1090 			(*p_n_removed)++;
1091 			continue;
1092 		}
1093 
1094 		(*p_n_removed)++;
1095 
1096 		tmp = get_block_num(p_n_unfm_pointer,0);
1097 		put_block_num(p_n_unfm_pointer, 0, 0);
1098 		journal_mark_dirty (th, p_s_sb, p_s_bh);
1099 		reiserfs_free_block(th, inode, tmp, 1);
1100 		if ( item_moved (&s_ih, p_s_path) )  {
1101 			need_research = 1;
1102 			break ;
1103 		}
1104 	    }
1105 
1106 	    /* a trick.  If the buffer has been logged, this
1107 	    ** will do nothing.  If we've broken the loop without
1108 	    ** logging it, it will restore the buffer
1109 	    **
1110 	    */
1111 	    reiserfs_restore_prepared_buffer(p_s_sb, p_s_bh);
1112 
1113 	    /* This loop can be optimized. */
1114 	} while ( (*p_n_removed < n_unfm_number || need_research) &&
1115 		  search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_FOUND );
1116 
1117 	RFALSE( *p_n_removed < n_unfm_number,
1118 		"PAP-5310: indirect item is not found");
1119 	RFALSE( item_moved (&s_ih, p_s_path),
1120 		"after while, comp failed, retry") ;
1121 
1122 	if (c_mode == M_CUT)
1123 	    pos_in_item (p_s_path) *= UNFM_P_SIZE;
1124 	return c_mode;
1125     }
1126 }
1127 
1128 /* Calculate number of bytes which will be deleted or cut during balance */
1129 static int calc_deleted_bytes_number(
1130     struct  tree_balance  * p_s_tb,
1131     char                    c_mode
1132     ) {
1133     int                     n_del_size;
1134     struct  item_head     * p_le_ih = PATH_PITEM_HEAD(p_s_tb->tb_path);
1135 
1136     if ( is_statdata_le_ih (p_le_ih) )
1137 	return 0;
1138 
1139     n_del_size = ( c_mode == M_DELETE ) ? ih_item_len(p_le_ih) : -p_s_tb->insert_size[0];
1140     if ( is_direntry_le_ih (p_le_ih) ) {
1141 	// return EMPTY_DIR_SIZE; /* We delete emty directoris only. */
1142 	// we can't use EMPTY_DIR_SIZE, as old format dirs have a different
1143 	// empty size.  ick. FIXME, is this right?
1144 	//
1145 	return n_del_size ;
1146     }
1147 
1148     if ( is_indirect_le_ih (p_le_ih) )
1149 	n_del_size = (n_del_size/UNFM_P_SIZE)*
1150 	  (PATH_PLAST_BUFFER(p_s_tb->tb_path)->b_size);// - get_ih_free_space (p_le_ih);
1151     return n_del_size;
1152 }
1153 
1154 static void init_tb_struct(
1155     struct reiserfs_transaction_handle *th,
1156     struct tree_balance * p_s_tb,
1157     struct super_block  * p_s_sb,
1158     struct path         * p_s_path,
1159     int                   n_size
1160     ) {
1161 
1162     BUG_ON (!th->t_trans_id);
1163 
1164     memset (p_s_tb,'\0',sizeof(struct tree_balance));
1165     p_s_tb->transaction_handle = th ;
1166     p_s_tb->tb_sb = p_s_sb;
1167     p_s_tb->tb_path = p_s_path;
1168     PATH_OFFSET_PBUFFER(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1169     PATH_OFFSET_POSITION(p_s_path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1170     p_s_tb->insert_size[0] = n_size;
1171 }
1172 
1173 
1174 
1175 void padd_item (char * item, int total_length, int length)
1176 {
1177     int i;
1178 
1179     for (i = total_length; i > length; )
1180 	item [--i] = 0;
1181 }
1182 
1183 #ifdef REISERQUOTA_DEBUG
1184 char key2type(struct reiserfs_key *ih)
1185 {
1186   if (is_direntry_le_key(2, ih))
1187     return 'd';
1188   if (is_direct_le_key(2, ih))
1189     return 'D';
1190   if (is_indirect_le_key(2, ih))
1191     return 'i';
1192   if (is_statdata_le_key(2, ih))
1193     return 's';
1194   return 'u';
1195 }
1196 
1197 char head2type(struct item_head *ih)
1198 {
1199   if (is_direntry_le_ih(ih))
1200     return 'd';
1201   if (is_direct_le_ih(ih))
1202     return 'D';
1203   if (is_indirect_le_ih(ih))
1204     return 'i';
1205   if (is_statdata_le_ih(ih))
1206     return 's';
1207   return 'u';
1208 }
1209 #endif
1210 
1211 /* Delete object item. */
1212 int reiserfs_delete_item (struct reiserfs_transaction_handle *th,
1213 			  struct path * p_s_path, /* Path to the deleted item. */
1214 			  const struct cpu_key * p_s_item_key, /* Key to search for the deleted item.  */
1215 			  struct inode * p_s_inode,/* inode is here just to update i_blocks and quotas */
1216 			  struct buffer_head  * p_s_un_bh)    /* NULL or unformatted node pointer.    */
1217 {
1218     struct super_block * p_s_sb = p_s_inode->i_sb;
1219     struct tree_balance   s_del_balance;
1220     struct item_head      s_ih;
1221     struct item_head      *q_ih;
1222     int			  quota_cut_bytes;
1223     int                   n_ret_value,
1224 	n_del_size,
1225 	n_removed;
1226 
1227 #ifdef CONFIG_REISERFS_CHECK
1228     char                  c_mode;
1229     int			n_iter = 0;
1230 #endif
1231 
1232     BUG_ON (!th->t_trans_id);
1233 
1234     init_tb_struct(th, &s_del_balance, p_s_sb, p_s_path, 0/*size is unknown*/);
1235 
1236     while ( 1 ) {
1237 	n_removed = 0;
1238 
1239 #ifdef CONFIG_REISERFS_CHECK
1240 	n_iter++;
1241 	c_mode =
1242 #endif
1243 	    prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed, &n_del_size, max_reiserfs_offset (p_s_inode));
1244 
1245 	RFALSE( c_mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1246 
1247 	copy_item_head(&s_ih, PATH_PITEM_HEAD(p_s_path));
1248 	s_del_balance.insert_size[0] = n_del_size;
1249 
1250 	n_ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1251 	if ( n_ret_value != REPEAT_SEARCH )
1252 	    break;
1253 
1254 	PROC_INFO_INC( p_s_sb, delete_item_restarted );
1255 
1256 	// file system changed, repeat search
1257 	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1258 	if (n_ret_value == IO_ERROR)
1259 	    break;
1260 	if (n_ret_value == FILE_NOT_FOUND) {
1261 	    reiserfs_warning (p_s_sb, "vs-5340: reiserfs_delete_item: "
1262 			      "no items of the file %K found", p_s_item_key);
1263 	    break;
1264 	}
1265     } /* while (1) */
1266 
1267     if ( n_ret_value != CARRY_ON ) {
1268 	unfix_nodes(&s_del_balance);
1269 	return 0;
1270     }
1271 
1272     // reiserfs_delete_item returns item length when success
1273     n_ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1274     q_ih = get_ih(p_s_path) ;
1275     quota_cut_bytes = ih_item_len(q_ih) ;
1276 
1277     /* hack so the quota code doesn't have to guess if the file
1278     ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1279     ** We test the offset because the tail might have been
1280     ** split into multiple items, and we only want to decrement for
1281     ** the unfm node once
1282     */
1283     if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(q_ih)) {
1284         if ((le_ih_k_offset(q_ih) & (p_s_sb->s_blocksize - 1)) == 1) {
1285             quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE;
1286         } else {
1287 	    quota_cut_bytes = 0 ;
1288 	}
1289     }
1290 
1291     if ( p_s_un_bh )  {
1292 	int off;
1293         char *data ;
1294 
1295 	/* We are in direct2indirect conversion, so move tail contents
1296            to the unformatted node */
1297 	/* note, we do the copy before preparing the buffer because we
1298 	** don't care about the contents of the unformatted node yet.
1299 	** the only thing we really care about is the direct item's data
1300 	** is in the unformatted node.
1301 	**
1302 	** Otherwise, we would have to call reiserfs_prepare_for_journal on
1303 	** the unformatted node, which might schedule, meaning we'd have to
1304 	** loop all the way back up to the start of the while loop.
1305 	**
1306 	** The unformatted node must be dirtied later on.  We can't be
1307 	** sure here if the entire tail has been deleted yet.
1308         **
1309         ** p_s_un_bh is from the page cache (all unformatted nodes are
1310         ** from the page cache) and might be a highmem page.  So, we
1311         ** can't use p_s_un_bh->b_data.
1312 	** -clm
1313 	*/
1314 
1315         data = kmap_atomic(p_s_un_bh->b_page, KM_USER0);
1316 	off = ((le_ih_k_offset (&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1317 	memcpy(data + off,
1318 	       B_I_PITEM(PATH_PLAST_BUFFER(p_s_path), &s_ih), n_ret_value);
1319 	kunmap_atomic(data, KM_USER0);
1320     }
1321     /* Perform balancing after all resources have been collected at once. */
1322     do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1323 
1324 #ifdef REISERQUOTA_DEBUG
1325     reiserfs_debug (p_s_sb, REISERFS_DEBUG_CODE, "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, head2type(&s_ih));
1326 #endif
1327     DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1328 
1329     /* Return deleted body length */
1330     return n_ret_value;
1331 }
1332 
1333 
1334 /* Summary Of Mechanisms For Handling Collisions Between Processes:
1335 
1336  deletion of the body of the object is performed by iput(), with the
1337  result that if multiple processes are operating on a file, the
1338  deletion of the body of the file is deferred until the last process
1339  that has an open inode performs its iput().
1340 
1341  writes and truncates are protected from collisions by use of
1342  semaphores.
1343 
1344  creates, linking, and mknod are protected from collisions with other
1345  processes by making the reiserfs_add_entry() the last step in the
1346  creation, and then rolling back all changes if there was a collision.
1347  - Hans
1348 */
1349 
1350 
1351 /* this deletes item which never gets split */
1352 void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th,
1353 				 struct inode *inode,
1354 				 struct reiserfs_key * key)
1355 {
1356     struct tree_balance tb;
1357     INITIALIZE_PATH (path);
1358     int item_len = 0;
1359     int tb_init = 0 ;
1360     struct cpu_key cpu_key;
1361     int retval;
1362     int quota_cut_bytes = 0;
1363 
1364     BUG_ON (!th->t_trans_id);
1365 
1366     le_key2cpu_key (&cpu_key, key);
1367 
1368     while (1) {
1369 	retval = search_item (th->t_super, &cpu_key, &path);
1370 	if (retval == IO_ERROR) {
1371 	    reiserfs_warning (th->t_super,
1372 			      "vs-5350: reiserfs_delete_solid_item: "
1373 			      "i/o failure occurred trying to delete %K",
1374 			      &cpu_key);
1375 	    break;
1376 	}
1377 	if (retval != ITEM_FOUND) {
1378 	    pathrelse (&path);
1379 	    // No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1380 	    if ( !( (unsigned long long) GET_HASH_VALUE (le_key_k_offset (le_key_version (key), key)) == 0 && \
1381 		 (unsigned long long) GET_GENERATION_NUMBER (le_key_k_offset (le_key_version (key), key)) == 1 ) )
1382 		reiserfs_warning (th->t_super, "vs-5355: reiserfs_delete_solid_item: %k not found", key);
1383 	    break;
1384 	}
1385 	if (!tb_init) {
1386 	    tb_init = 1 ;
1387 	    item_len = ih_item_len( PATH_PITEM_HEAD(&path) );
1388 	    init_tb_struct (th, &tb, th->t_super, &path, - (IH_SIZE + item_len));
1389 	}
1390 	quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path)) ;
1391 
1392 	retval = fix_nodes (M_DELETE, &tb, NULL, NULL);
1393 	if (retval == REPEAT_SEARCH) {
1394 	    PROC_INFO_INC( th -> t_super, delete_solid_item_restarted );
1395 	    continue;
1396 	}
1397 
1398 	if (retval == CARRY_ON) {
1399 	    do_balance (&tb, NULL, NULL, M_DELETE);
1400 	    if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
1401 #ifdef REISERQUOTA_DEBUG
1402 		reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key));
1403 #endif
1404 		DQUOT_FREE_SPACE_NODIRTY(inode, quota_cut_bytes);
1405 	    }
1406 	    break;
1407 	}
1408 
1409 	// IO_ERROR, NO_DISK_SPACE, etc
1410 	reiserfs_warning (th->t_super, "vs-5360: reiserfs_delete_solid_item: "
1411 			  "could not delete %K due to fix_nodes failure", &cpu_key);
1412 	unfix_nodes (&tb);
1413 	break;
1414     }
1415 
1416     reiserfs_check_path(&path) ;
1417 }
1418 
1419 
1420 int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * inode)
1421 {
1422     int err;
1423     inode->i_size = 0;
1424     BUG_ON (!th->t_trans_id);
1425 
1426     /* for directory this deletes item containing "." and ".." */
1427     err = reiserfs_do_truncate (th, inode, NULL, 0/*no timestamp updates*/);
1428     if (err)
1429         return err;
1430 
1431 #if defined( USE_INODE_GENERATION_COUNTER )
1432     if( !old_format_only ( th -> t_super ) )
1433       {
1434        __le32 *inode_generation;
1435 
1436        inode_generation =
1437          &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
1438        *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 );
1439       }
1440 /* USE_INODE_GENERATION_COUNTER */
1441 #endif
1442     reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1443 
1444     return err;
1445 }
1446 
1447 static void
1448 unmap_buffers(struct page *page, loff_t pos) {
1449     struct buffer_head *bh ;
1450     struct buffer_head *head ;
1451     struct buffer_head *next ;
1452     unsigned long tail_index ;
1453     unsigned long cur_index ;
1454 
1455     if (page) {
1456 	if (page_has_buffers(page)) {
1457 	    tail_index = pos & (PAGE_CACHE_SIZE - 1) ;
1458 	    cur_index = 0 ;
1459 	    head = page_buffers(page) ;
1460 	    bh = head ;
1461 	    do {
1462 		next = bh->b_this_page ;
1463 
1464 		/* we want to unmap the buffers that contain the tail, and
1465 		** all the buffers after it (since the tail must be at the
1466 		** end of the file).  We don't want to unmap file data
1467 		** before the tail, since it might be dirty and waiting to
1468 		** reach disk
1469 		*/
1470 		cur_index += bh->b_size ;
1471 		if (cur_index > tail_index) {
1472 		    reiserfs_unmap_buffer(bh) ;
1473 		}
1474 		bh = next ;
1475 	    } while (bh != head) ;
1476 	    if ( PAGE_SIZE == bh->b_size ) {
1477 		clear_page_dirty(page);
1478 	    }
1479 	}
1480     }
1481 }
1482 
1483 static int maybe_indirect_to_direct (struct reiserfs_transaction_handle *th,
1484 			      struct inode * p_s_inode,
1485 			      struct page *page,
1486 			      struct path         * p_s_path,
1487 			      const struct cpu_key      * p_s_item_key,
1488 			      loff_t         n_new_file_size,
1489 			      char                * p_c_mode
1490 			      ) {
1491     struct super_block * p_s_sb = p_s_inode->i_sb;
1492     int n_block_size = p_s_sb->s_blocksize;
1493     int cut_bytes;
1494     BUG_ON (!th->t_trans_id);
1495 
1496     if (n_new_file_size != p_s_inode->i_size)
1497 	BUG ();
1498 
1499     /* the page being sent in could be NULL if there was an i/o error
1500     ** reading in the last block.  The user will hit problems trying to
1501     ** read the file, but for now we just skip the indirect2direct
1502     */
1503     if (atomic_read(&p_s_inode->i_count) > 1 ||
1504         !tail_has_to_be_packed (p_s_inode) ||
1505 	!page || (REISERFS_I(p_s_inode)->i_flags & i_nopack_mask)) {
1506 	// leave tail in an unformatted node
1507 	*p_c_mode = M_SKIP_BALANCING;
1508 	cut_bytes = n_block_size - (n_new_file_size & (n_block_size - 1));
1509 	pathrelse(p_s_path);
1510 	return cut_bytes;
1511     }
1512     /* Permorm the conversion to a direct_item. */
1513     /*return indirect_to_direct (p_s_inode, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);*/
1514     return indirect2direct (th, p_s_inode, page, p_s_path, p_s_item_key, n_new_file_size, p_c_mode);
1515 }
1516 
1517 
1518 /* we did indirect_to_direct conversion. And we have inserted direct
1519    item successesfully, but there were no disk space to cut unfm
1520    pointer being converted. Therefore we have to delete inserted
1521    direct item(s) */
1522 static void indirect_to_direct_roll_back (struct reiserfs_transaction_handle *th, struct inode * inode, struct path * path)
1523 {
1524     struct cpu_key tail_key;
1525     int tail_len;
1526     int removed;
1527     BUG_ON (!th->t_trans_id);
1528 
1529     make_cpu_key (&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);// !!!!
1530     tail_key.key_length = 4;
1531 
1532     tail_len = (cpu_key_k_offset (&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1533     while (tail_len) {
1534 	/* look for the last byte of the tail */
1535 	if (search_for_position_by_key (inode->i_sb, &tail_key, path) == POSITION_NOT_FOUND)
1536 	    reiserfs_panic (inode->i_sb, "vs-5615: indirect_to_direct_roll_back: found invalid item");
1537 	RFALSE( path->pos_in_item != ih_item_len(PATH_PITEM_HEAD (path)) - 1,
1538 	        "vs-5616: appended bytes found");
1539 	PATH_LAST_POSITION (path) --;
1540 
1541 	removed = reiserfs_delete_item (th, path, &tail_key, inode, NULL/*unbh not needed*/);
1542 	RFALSE( removed <= 0 || removed > tail_len,
1543 	        "vs-5617: there was tail %d bytes, removed item length %d bytes",
1544                 tail_len, removed);
1545 	tail_len -= removed;
1546 	set_cpu_key_k_offset (&tail_key, cpu_key_k_offset (&tail_key) - removed);
1547     }
1548     reiserfs_warning (inode->i_sb, "indirect_to_direct_roll_back: indirect_to_direct conversion has been rolled back due to lack of disk space");
1549     //mark_file_without_tail (inode);
1550     mark_inode_dirty (inode);
1551 }
1552 
1553 
1554 /* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1555 int reiserfs_cut_from_item (struct reiserfs_transaction_handle *th,
1556 			    struct path * p_s_path,
1557 			    struct cpu_key * p_s_item_key,
1558 			    struct inode * p_s_inode,
1559 			    struct page *page,
1560 			    loff_t n_new_file_size)
1561 {
1562     struct super_block * p_s_sb = p_s_inode->i_sb;
1563     /* Every function which is going to call do_balance must first
1564        create a tree_balance structure.  Then it must fill up this
1565        structure by using the init_tb_struct and fix_nodes functions.
1566        After that we can make tree balancing. */
1567     struct tree_balance s_cut_balance;
1568     struct item_head *p_le_ih;
1569     int n_cut_size = 0,        /* Amount to be cut. */
1570 	n_ret_value = CARRY_ON,
1571 	n_removed = 0,     /* Number of the removed unformatted nodes. */
1572 	n_is_inode_locked = 0;
1573     char                c_mode;            /* Mode of the balance. */
1574     int retval2 = -1;
1575     int quota_cut_bytes;
1576     loff_t tail_pos = 0;
1577 
1578     BUG_ON (!th->t_trans_id);
1579 
1580     init_tb_struct(th, &s_cut_balance, p_s_inode->i_sb, p_s_path, n_cut_size);
1581 
1582 
1583     /* Repeat this loop until we either cut the item without needing
1584        to balance, or we fix_nodes without schedule occurring */
1585     while ( 1 ) {
1586 	/* Determine the balance mode, position of the first byte to
1587 	   be cut, and size to be cut.  In case of the indirect item
1588 	   free unformatted nodes which are pointed to by the cut
1589 	   pointers. */
1590 
1591 	c_mode = prepare_for_delete_or_cut(th, p_s_inode, p_s_path, p_s_item_key, &n_removed,
1592 					   &n_cut_size, n_new_file_size);
1593 	if ( c_mode == M_CONVERT )  {
1594 	    /* convert last unformatted node to direct item or leave
1595                tail in the unformatted node */
1596 	    RFALSE( n_ret_value != CARRY_ON, "PAP-5570: can not convert twice");
1597 
1598 	    n_ret_value = maybe_indirect_to_direct (th, p_s_inode, page, p_s_path, p_s_item_key,
1599 						    n_new_file_size, &c_mode);
1600 	    if ( c_mode == M_SKIP_BALANCING )
1601 		/* tail has been left in the unformatted node */
1602 		return n_ret_value;
1603 
1604 	    n_is_inode_locked = 1;
1605 
1606 	    /* removing of last unformatted node will change value we
1607                have to return to truncate. Save it */
1608 	    retval2 = n_ret_value;
1609 	    /*retval2 = p_s_sb->s_blocksize - (n_new_file_size & (p_s_sb->s_blocksize - 1));*/
1610 
1611 	    /* So, we have performed the first part of the conversion:
1612 	       inserting the new direct item.  Now we are removing the
1613 	       last unformatted node pointer. Set key to search for
1614 	       it. */
1615       	    set_cpu_key_k_type (p_s_item_key, TYPE_INDIRECT);
1616 	    p_s_item_key->key_length = 4;
1617 	    n_new_file_size -= (n_new_file_size & (p_s_sb->s_blocksize - 1));
1618 	    tail_pos = n_new_file_size;
1619 	    set_cpu_key_k_offset (p_s_item_key, n_new_file_size + 1);
1620 	    if ( search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path) == POSITION_NOT_FOUND ){
1621 		print_block (PATH_PLAST_BUFFER (p_s_path), 3, PATH_LAST_POSITION (p_s_path) - 1, PATH_LAST_POSITION (p_s_path) + 1);
1622 		reiserfs_panic(p_s_sb, "PAP-5580: reiserfs_cut_from_item: item to convert does not exist (%K)", p_s_item_key);
1623 	    }
1624 	    continue;
1625 	}
1626 	if (n_cut_size == 0) {
1627 	    pathrelse (p_s_path);
1628 	    return 0;
1629 	}
1630 
1631 	s_cut_balance.insert_size[0] = n_cut_size;
1632 
1633 	n_ret_value = fix_nodes(c_mode, &s_cut_balance, NULL, NULL);
1634       	if ( n_ret_value != REPEAT_SEARCH )
1635 	    break;
1636 
1637 	PROC_INFO_INC( p_s_sb, cut_from_item_restarted );
1638 
1639 	n_ret_value = search_for_position_by_key(p_s_sb, p_s_item_key, p_s_path);
1640 	if (n_ret_value == POSITION_FOUND)
1641 	    continue;
1642 
1643 	reiserfs_warning (p_s_sb, "PAP-5610: reiserfs_cut_from_item: item %K not found", p_s_item_key);
1644 	unfix_nodes (&s_cut_balance);
1645 	return (n_ret_value == IO_ERROR) ? -EIO : -ENOENT;
1646     } /* while */
1647 
1648     // check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1649     if ( n_ret_value != CARRY_ON ) {
1650 	if ( n_is_inode_locked ) {
1651 	    // FIXME: this seems to be not needed: we are always able
1652 	    // to cut item
1653 	    indirect_to_direct_roll_back (th, p_s_inode, p_s_path);
1654 	}
1655 	if (n_ret_value == NO_DISK_SPACE)
1656 	    reiserfs_warning (p_s_sb, "NO_DISK_SPACE");
1657 	unfix_nodes (&s_cut_balance);
1658 	return -EIO;
1659     }
1660 
1661     /* go ahead and perform balancing */
1662 
1663     RFALSE( c_mode == M_PASTE || c_mode == M_INSERT, "invalid mode");
1664 
1665     /* Calculate number of bytes that need to be cut from the item. */
1666     quota_cut_bytes = ( c_mode == M_DELETE ) ? ih_item_len(get_ih(p_s_path)) : -s_cut_balance.insert_size[0];
1667     if (retval2 == -1)
1668 	n_ret_value = calc_deleted_bytes_number(&s_cut_balance, c_mode);
1669     else
1670 	n_ret_value = retval2;
1671 
1672 
1673     /* For direct items, we only change the quota when deleting the last
1674     ** item.
1675     */
1676     p_le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1677     if (!S_ISLNK (p_s_inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1678         if (c_mode == M_DELETE &&
1679 	   (le_ih_k_offset (p_le_ih) & (p_s_sb->s_blocksize - 1)) == 1 ) {
1680 	    // FIXME: this is to keep 3.5 happy
1681 	    REISERFS_I(p_s_inode)->i_first_direct_byte = U32_MAX;
1682 	    quota_cut_bytes = p_s_sb->s_blocksize + UNFM_P_SIZE ;
1683         } else {
1684 	    quota_cut_bytes = 0 ;
1685 	}
1686     }
1687 #ifdef CONFIG_REISERFS_CHECK
1688     if (n_is_inode_locked) {
1689 	struct item_head * le_ih = PATH_PITEM_HEAD (s_cut_balance.tb_path);
1690 	/* we are going to complete indirect2direct conversion. Make
1691            sure, that we exactly remove last unformatted node pointer
1692            of the item */
1693 	if (!is_indirect_le_ih (le_ih))
1694 	    reiserfs_panic (p_s_sb, "vs-5652: reiserfs_cut_from_item: "
1695 			    "item must be indirect %h", le_ih);
1696 
1697 	if (c_mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1698 	    reiserfs_panic (p_s_sb, "vs-5653: reiserfs_cut_from_item: "
1699 			    "completing indirect2direct conversion indirect item %h "
1700 			    "being deleted must be of 4 byte long", le_ih);
1701 
1702 	if (c_mode == M_CUT && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1703 	    reiserfs_panic (p_s_sb, "vs-5654: reiserfs_cut_from_item: "
1704 			    "can not complete indirect2direct conversion of %h (CUT, insert_size==%d)",
1705 			    le_ih, s_cut_balance.insert_size[0]);
1706 	}
1707 	/* it would be useful to make sure, that right neighboring
1708            item is direct item of this file */
1709     }
1710 #endif
1711 
1712     do_balance(&s_cut_balance, NULL, NULL, c_mode);
1713     if ( n_is_inode_locked ) {
1714 	/* we've done an indirect->direct conversion.  when the data block
1715 	** was freed, it was removed from the list of blocks that must
1716 	** be flushed before the transaction commits, make sure to
1717 	** unmap and invalidate it
1718 	*/
1719 	unmap_buffers(page, tail_pos);
1720 	REISERFS_I(p_s_inode)->i_flags &= ~i_pack_on_close_mask ;
1721     }
1722 #ifdef REISERQUOTA_DEBUG
1723     reiserfs_debug (p_s_inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, p_s_inode->i_uid, '?');
1724 #endif
1725     DQUOT_FREE_SPACE_NODIRTY(p_s_inode, quota_cut_bytes);
1726     return n_ret_value;
1727 }
1728 
1729 static void truncate_directory (struct reiserfs_transaction_handle *th, struct inode * inode)
1730 {
1731     BUG_ON (!th->t_trans_id);
1732     if (inode->i_nlink)
1733 	reiserfs_warning (inode->i_sb,
1734 			  "vs-5655: truncate_directory: link count != 0");
1735 
1736     set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), DOT_OFFSET);
1737     set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_DIRENTRY);
1738     reiserfs_delete_solid_item (th, inode, INODE_PKEY (inode));
1739     reiserfs_update_sd(th, inode) ;
1740     set_le_key_k_offset (KEY_FORMAT_3_5, INODE_PKEY (inode), SD_OFFSET);
1741     set_le_key_k_type (KEY_FORMAT_3_5, INODE_PKEY (inode), TYPE_STAT_DATA);
1742 }
1743 
1744 
1745 
1746 
1747 /* Truncate file to the new size. Note, this must be called with a transaction
1748    already started */
1749 int reiserfs_do_truncate (struct reiserfs_transaction_handle *th,
1750 			   struct  inode * p_s_inode, /* ->i_size contains new
1751                                                          size */
1752 			   struct page *page, /* up to date for last block */
1753 			   int update_timestamps  /* when it is called by
1754 						     file_release to convert
1755 						     the tail - no timestamps
1756 						     should be updated */
1757     ) {
1758     INITIALIZE_PATH (s_search_path);       /* Path to the current object item. */
1759     struct item_head    * p_le_ih;         /* Pointer to an item header. */
1760     struct cpu_key      s_item_key;     /* Key to search for a previous file item. */
1761     loff_t         n_file_size,    /* Old file size. */
1762 	n_new_file_size;/* New file size. */
1763     int                   n_deleted;      /* Number of deleted or truncated bytes. */
1764     int retval;
1765     int err = 0;
1766 
1767     BUG_ON (!th->t_trans_id);
1768     if ( ! (S_ISREG(p_s_inode->i_mode) || S_ISDIR(p_s_inode->i_mode) || S_ISLNK(p_s_inode->i_mode)) )
1769 	return 0;
1770 
1771     if (S_ISDIR(p_s_inode->i_mode)) {
1772 	// deletion of directory - no need to update timestamps
1773 	truncate_directory (th, p_s_inode);
1774 	return 0;
1775     }
1776 
1777     /* Get new file size. */
1778     n_new_file_size = p_s_inode->i_size;
1779 
1780     // FIXME: note, that key type is unimportant here
1781     make_cpu_key (&s_item_key, p_s_inode, max_reiserfs_offset (p_s_inode), TYPE_DIRECT, 3);
1782 
1783     retval = search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path);
1784     if (retval == IO_ERROR) {
1785 	reiserfs_warning (p_s_inode->i_sb, "vs-5657: reiserfs_do_truncate: "
1786 			  "i/o failure occurred trying to truncate %K", &s_item_key);
1787         err = -EIO;
1788         goto out;
1789     }
1790     if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1791 	reiserfs_warning (p_s_inode->i_sb, "PAP-5660: reiserfs_do_truncate: "
1792 			  "wrong result %d of search for %K", retval, &s_item_key);
1793 
1794         err = -EIO;
1795         goto out;
1796     }
1797 
1798     s_search_path.pos_in_item --;
1799 
1800     /* Get real file size (total length of all file items) */
1801     p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1802     if ( is_statdata_le_ih (p_le_ih) )
1803 	n_file_size = 0;
1804     else {
1805 	loff_t offset = le_ih_k_offset (p_le_ih);
1806 	int bytes = op_bytes_number (p_le_ih,p_s_inode->i_sb->s_blocksize);
1807 
1808 	/* this may mismatch with real file size: if last direct item
1809            had no padding zeros and last unformatted node had no free
1810            space, this file would have this file size */
1811 	n_file_size = offset + bytes - 1;
1812     }
1813     /*
1814      * are we doing a full truncate or delete, if so
1815      * kick in the reada code
1816      */
1817     if (n_new_file_size == 0)
1818         s_search_path.reada = PATH_READA | PATH_READA_BACK;
1819 
1820     if ( n_file_size == 0 || n_file_size < n_new_file_size ) {
1821 	goto update_and_out ;
1822     }
1823 
1824     /* Update key to search for the last file item. */
1825     set_cpu_key_k_offset (&s_item_key, n_file_size);
1826 
1827     do  {
1828 	/* Cut or delete file item. */
1829 	n_deleted = reiserfs_cut_from_item(th, &s_search_path, &s_item_key, p_s_inode,  page, n_new_file_size);
1830 	if (n_deleted < 0) {
1831 	    reiserfs_warning (p_s_inode->i_sb, "vs-5665: reiserfs_do_truncate: reiserfs_cut_from_item failed");
1832 	    reiserfs_check_path(&s_search_path) ;
1833 	    return 0;
1834 	}
1835 
1836 	RFALSE( n_deleted > n_file_size,
1837 		"PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1838 		n_deleted, n_file_size, &s_item_key);
1839 
1840 	/* Change key to search the last file item. */
1841 	n_file_size -= n_deleted;
1842 
1843 	set_cpu_key_k_offset (&s_item_key, n_file_size);
1844 
1845 	/* While there are bytes to truncate and previous file item is presented in the tree. */
1846 
1847 	/*
1848 	** This loop could take a really long time, and could log
1849 	** many more blocks than a transaction can hold.  So, we do a polite
1850 	** journal end here, and if the transaction needs ending, we make
1851 	** sure the file is consistent before ending the current trans
1852 	** and starting a new one
1853 	*/
1854         if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
1855 	  int orig_len_alloc = th->t_blocks_allocated ;
1856 	  decrement_counters_in_path(&s_search_path) ;
1857 
1858 	  if (update_timestamps) {
1859 	      p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1860 	  }
1861 	  reiserfs_update_sd(th, p_s_inode) ;
1862 
1863 	  err = journal_end(th, p_s_inode->i_sb, orig_len_alloc) ;
1864 	  if (err)
1865 	    goto out;
1866 	  err = journal_begin (th, p_s_inode->i_sb,
1867                                JOURNAL_PER_BALANCE_CNT * 6);
1868 	  if (err)
1869 	    goto out;
1870 	  reiserfs_update_inode_transaction(p_s_inode) ;
1871 	}
1872     } while ( n_file_size > ROUND_UP (n_new_file_size) &&
1873 	      search_for_position_by_key(p_s_inode->i_sb, &s_item_key, &s_search_path) == POSITION_FOUND )  ;
1874 
1875     RFALSE( n_file_size > ROUND_UP (n_new_file_size),
1876 	    "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1877 	    n_new_file_size, n_file_size, s_item_key.on_disk_key.k_objectid);
1878 
1879 update_and_out:
1880     if (update_timestamps) {
1881 	// this is truncate, not file closing
1882 	    p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME_SEC;
1883     }
1884     reiserfs_update_sd (th, p_s_inode);
1885 
1886 out:
1887     pathrelse(&s_search_path) ;
1888     return err;
1889 }
1890 
1891 
1892 #ifdef CONFIG_REISERFS_CHECK
1893 // this makes sure, that we __append__, not overwrite or add holes
1894 static void check_research_for_paste (struct path * path,
1895 				      const struct cpu_key * p_s_key)
1896 {
1897     struct item_head * found_ih = get_ih (path);
1898 
1899     if (is_direct_le_ih (found_ih)) {
1900 	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) !=
1901 	    cpu_key_k_offset (p_s_key) ||
1902 	    op_bytes_number (found_ih, get_last_bh (path)->b_size) != pos_in_item (path))
1903 	    reiserfs_panic (NULL, "PAP-5720: check_research_for_paste: "
1904 			    "found direct item %h or position (%d) does not match to key %K",
1905 			    found_ih, pos_in_item (path), p_s_key);
1906     }
1907     if (is_indirect_le_ih (found_ih)) {
1908 	if (le_ih_k_offset (found_ih) + op_bytes_number (found_ih, get_last_bh (path)->b_size) != cpu_key_k_offset (p_s_key) ||
1909 	    I_UNFM_NUM (found_ih) != pos_in_item (path) ||
1910 	    get_ih_free_space (found_ih) != 0)
1911 	    reiserfs_panic (NULL, "PAP-5730: check_research_for_paste: "
1912 			    "found indirect item (%h) or position (%d) does not match to key (%K)",
1913 			    found_ih, pos_in_item (path), p_s_key);
1914     }
1915 }
1916 #endif /* config reiserfs check */
1917 
1918 
1919 /* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1920 int reiserfs_paste_into_item (struct reiserfs_transaction_handle *th,
1921 			      struct path         * p_s_search_path,	/* Path to the pasted item.          */
1922 			      const struct cpu_key      * p_s_key,        	/* Key to search for the needed item.*/
1923 			      struct inode	  * inode,		/* Inode item belongs to */
1924 			      const char          * p_c_body,       	/* Pointer to the bytes to paste.    */
1925 			      int                   n_pasted_size)  	/* Size of pasted bytes.             */
1926 {
1927     struct tree_balance s_paste_balance;
1928     int                 retval;
1929     int			fs_gen;
1930 
1931     BUG_ON (!th->t_trans_id);
1932 
1933     fs_gen = get_generation(inode->i_sb) ;
1934 
1935 #ifdef REISERQUOTA_DEBUG
1936     reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): allocating %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1937 #endif
1938 
1939     if (DQUOT_ALLOC_SPACE_NODIRTY(inode, n_pasted_size)) {
1940 	pathrelse(p_s_search_path);
1941 	return -EDQUOT;
1942     }
1943     init_tb_struct(th, &s_paste_balance, th->t_super, p_s_search_path, n_pasted_size);
1944 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
1945     s_paste_balance.key = p_s_key->on_disk_key;
1946 #endif
1947 
1948     /* DQUOT_* can schedule, must check before the fix_nodes */
1949     if (fs_changed(fs_gen, inode->i_sb)) {
1950 	goto search_again;
1951     }
1952 
1953     while ((retval = fix_nodes(M_PASTE, &s_paste_balance, NULL, p_c_body)) ==
1954 REPEAT_SEARCH ) {
1955 search_again:
1956 	/* file system changed while we were in the fix_nodes */
1957 	PROC_INFO_INC( th -> t_super, paste_into_item_restarted );
1958 	retval = search_for_position_by_key (th->t_super, p_s_key, p_s_search_path);
1959 	if (retval == IO_ERROR) {
1960 	    retval = -EIO ;
1961 	    goto error_out ;
1962 	}
1963 	if (retval == POSITION_FOUND) {
1964 	    reiserfs_warning (inode->i_sb, "PAP-5710: reiserfs_paste_into_item: entry or pasted byte (%K) exists", p_s_key);
1965 	    retval = -EEXIST ;
1966 	    goto error_out ;
1967 	}
1968 
1969 #ifdef CONFIG_REISERFS_CHECK
1970 	check_research_for_paste (p_s_search_path, p_s_key);
1971 #endif
1972     }
1973 
1974     /* Perform balancing after all resources are collected by fix_nodes, and
1975        accessing them will not risk triggering schedule. */
1976     if ( retval == CARRY_ON ) {
1977 	do_balance(&s_paste_balance, NULL/*ih*/, p_c_body, M_PASTE);
1978 	return 0;
1979     }
1980     retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
1981 error_out:
1982     /* this also releases the path */
1983     unfix_nodes(&s_paste_balance);
1984 #ifdef REISERQUOTA_DEBUG
1985     reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota paste_into_item(): freeing %u id=%u type=%c", n_pasted_size, inode->i_uid, key2type(&(p_s_key->on_disk_key)));
1986 #endif
1987     DQUOT_FREE_SPACE_NODIRTY(inode, n_pasted_size);
1988     return retval ;
1989 }
1990 
1991 
1992 /* Insert new item into the buffer at the path. */
1993 int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
1994 			 struct path         * 	p_s_path,         /* Path to the inserteded item.         */
1995 			 const struct cpu_key      * key,
1996 			 struct item_head    * 	p_s_ih,           /* Pointer to the item header to insert.*/
1997 			 struct inode        * inode,
1998 			 const char          * 	p_c_body)         /* Pointer to the bytes to insert.      */
1999 {
2000     struct tree_balance s_ins_balance;
2001     int                 retval;
2002     int fs_gen = 0 ;
2003     int quota_bytes = 0 ;
2004 
2005     BUG_ON (!th->t_trans_id);
2006 
2007     if (inode) {      /* Do we count quotas for item? */
2008 	fs_gen = get_generation(inode->i_sb);
2009 	quota_bytes = ih_item_len(p_s_ih);
2010 
2011 	/* hack so the quota code doesn't have to guess if the file has
2012 	 ** a tail, links are always tails, so there's no guessing needed
2013 	 */
2014 	if (!S_ISLNK (inode->i_mode) && is_direct_le_ih(p_s_ih)) {
2015 	    quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE ;
2016 	}
2017 #ifdef REISERQUOTA_DEBUG
2018 	reiserfs_debug (inode->i_sb, REISERFS_DEBUG_CODE, "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2019 #endif
2020 	/* We can't dirty inode here. It would be immediately written but
2021 	 * appropriate stat item isn't inserted yet... */
2022 	if (DQUOT_ALLOC_SPACE_NODIRTY(inode, quota_bytes)) {
2023 	    pathrelse(p_s_path);
2024 	    return -EDQUOT;
2025 	}
2026     }
2027     init_tb_struct(th, &s_ins_balance, th->t_super, p_s_path, IH_SIZE + ih_item_len(p_s_ih));
2028 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
2029     s_ins_balance.key = key->on_disk_key;
2030 #endif
2031     /* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2032     if (inode && fs_changed(fs_gen, inode->i_sb)) {
2033 	goto search_again;
2034     }
2035 
2036     while ( (retval = fix_nodes(M_INSERT, &s_ins_balance, p_s_ih, p_c_body)) == REPEAT_SEARCH) {
2037 search_again:
2038 	/* file system changed while we were in the fix_nodes */
2039 	PROC_INFO_INC( th -> t_super, insert_item_restarted );
2040 	retval = search_item (th->t_super, key, p_s_path);
2041 	if (retval == IO_ERROR) {
2042 	    retval = -EIO;
2043 	    goto error_out ;
2044 	}
2045 	if (retval == ITEM_FOUND) {
2046 	    reiserfs_warning (th->t_super, "PAP-5760: reiserfs_insert_item: "
2047 			      "key %K already exists in the tree", key);
2048 	    retval = -EEXIST ;
2049 	    goto error_out;
2050 	}
2051     }
2052 
2053     /* make balancing after all resources will be collected at a time */
2054     if ( retval == CARRY_ON ) {
2055 	do_balance (&s_ins_balance, p_s_ih, p_c_body, M_INSERT);
2056 	return 0;
2057     }
2058 
2059     retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2060 error_out:
2061     /* also releases the path */
2062     unfix_nodes(&s_ins_balance);
2063 #ifdef REISERQUOTA_DEBUG
2064     reiserfs_debug (th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(p_s_ih));
2065 #endif
2066     if (inode)
2067 	DQUOT_FREE_SPACE_NODIRTY(inode, quota_bytes) ;
2068     return retval;
2069 }
2070 
2071 
2072 
2073 
2074