xref: /openbmc/linux/fs/reiserfs/fix_node.c (revision 87c2ce3b)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /**
6  ** old_item_num
7  ** old_entry_num
8  ** set_entry_sizes
9  ** create_virtual_node
10  ** check_left
11  ** check_right
12  ** directory_part_size
13  ** get_num_ver
14  ** set_parameters
15  ** is_leaf_removable
16  ** are_leaves_removable
17  ** get_empty_nodes
18  ** get_lfree
19  ** get_rfree
20  ** is_left_neighbor_in_cache
21  ** decrement_key
22  ** get_far_parent
23  ** get_parents
24  ** can_node_be_removed
25  ** ip_check_balance
26  ** dc_check_balance_internal
27  ** dc_check_balance_leaf
28  ** dc_check_balance
29  ** check_balance
30  ** get_direct_parent
31  ** get_neighbors
32  ** fix_nodes
33  **
34  **
35  **/
36 
37 #include <linux/config.h>
38 #include <linux/time.h>
39 #include <linux/string.h>
40 #include <linux/reiserfs_fs.h>
41 #include <linux/buffer_head.h>
42 
43 /* To make any changes in the tree we find a node, that contains item
44    to be changed/deleted or position in the node we insert a new item
45    to. We call this node S. To do balancing we need to decide what we
46    will shift to left/right neighbor, or to a new node, where new item
47    will be etc. To make this analysis simpler we build virtual
48    node. Virtual node is an array of items, that will replace items of
49    node S. (For instance if we are going to delete an item, virtual
50    node does not contain it). Virtual node keeps information about
51    item sizes and types, mergeability of first and last items, sizes
52    of all entries in directory item. We use this array of items when
53    calculating what we can shift to neighbors and how many nodes we
54    have to have if we do not any shiftings, if we shift to left/right
55    neighbor or to both. */
56 
57 /* taking item number in virtual node, returns number of item, that it has in source buffer */
58 static inline int old_item_num(int new_num, int affected_item_num, int mode)
59 {
60 	if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num)
61 		return new_num;
62 
63 	if (mode == M_INSERT) {
64 
65 		RFALSE(new_num == 0,
66 		       "vs-8005: for INSERT mode and item number of inserted item");
67 
68 		return new_num - 1;
69 	}
70 
71 	RFALSE(mode != M_DELETE,
72 	       "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'",
73 	       mode);
74 	/* delete mode */
75 	return new_num + 1;
76 }
77 
78 static void create_virtual_node(struct tree_balance *tb, int h)
79 {
80 	struct item_head *ih;
81 	struct virtual_node *vn = tb->tb_vn;
82 	int new_num;
83 	struct buffer_head *Sh;	/* this comes from tb->S[h] */
84 
85 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
86 
87 	/* size of changed node */
88 	vn->vn_size =
89 	    MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h];
90 
91 	/* for internal nodes array if virtual items is not created */
92 	if (h) {
93 		vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE);
94 		return;
95 	}
96 
97 	/* number of items in virtual node  */
98 	vn->vn_nr_item =
99 	    B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) -
100 	    ((vn->vn_mode == M_DELETE) ? 1 : 0);
101 
102 	/* first virtual item */
103 	vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1);
104 	memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item));
105 	vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item);
106 
107 	/* first item in the node */
108 	ih = B_N_PITEM_HEAD(Sh, 0);
109 
110 	/* define the mergeability for 0-th item (if it is not being deleted) */
111 	if (op_is_left_mergeable(&(ih->ih_key), Sh->b_size)
112 	    && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num))
113 		vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE;
114 
115 	/* go through all items those remain in the virtual node (except for the new (inserted) one) */
116 	for (new_num = 0; new_num < vn->vn_nr_item; new_num++) {
117 		int j;
118 		struct virtual_item *vi = vn->vn_vi + new_num;
119 		int is_affected =
120 		    ((new_num != vn->vn_affected_item_num) ? 0 : 1);
121 
122 		if (is_affected && vn->vn_mode == M_INSERT)
123 			continue;
124 
125 		/* get item number in source node */
126 		j = old_item_num(new_num, vn->vn_affected_item_num,
127 				 vn->vn_mode);
128 
129 		vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE;
130 		vi->vi_ih = ih + j;
131 		vi->vi_item = B_I_PITEM(Sh, ih + j);
132 		vi->vi_uarea = vn->vn_free_ptr;
133 
134 		// FIXME: there is no check, that item operation did not
135 		// consume too much memory
136 		vn->vn_free_ptr +=
137 		    op_create_vi(vn, vi, is_affected, tb->insert_size[0]);
138 		if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr)
139 			reiserfs_panic(tb->tb_sb,
140 				       "vs-8030: create_virtual_node: "
141 				       "virtual node space consumed");
142 
143 		if (!is_affected)
144 			/* this is not being changed */
145 			continue;
146 
147 		if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) {
148 			vn->vn_vi[new_num].vi_item_len += tb->insert_size[0];
149 			vi->vi_new_data = vn->vn_data;	// pointer to data which is going to be pasted
150 		}
151 	}
152 
153 	/* virtual inserted item is not defined yet */
154 	if (vn->vn_mode == M_INSERT) {
155 		struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num;
156 
157 		RFALSE(vn->vn_ins_ih == 0,
158 		       "vs-8040: item header of inserted item is not specified");
159 		vi->vi_item_len = tb->insert_size[0];
160 		vi->vi_ih = vn->vn_ins_ih;
161 		vi->vi_item = vn->vn_data;
162 		vi->vi_uarea = vn->vn_free_ptr;
163 
164 		op_create_vi(vn, vi, 0 /*not pasted or cut */ ,
165 			     tb->insert_size[0]);
166 	}
167 
168 	/* set right merge flag we take right delimiting key and check whether it is a mergeable item */
169 	if (tb->CFR[0]) {
170 		struct reiserfs_key *key;
171 
172 		key = B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]);
173 		if (op_is_left_mergeable(key, Sh->b_size)
174 		    && (vn->vn_mode != M_DELETE
175 			|| vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1))
176 			vn->vn_vi[vn->vn_nr_item - 1].vi_type |=
177 			    VI_TYPE_RIGHT_MERGEABLE;
178 
179 #ifdef CONFIG_REISERFS_CHECK
180 		if (op_is_left_mergeable(key, Sh->b_size) &&
181 		    !(vn->vn_mode != M_DELETE
182 		      || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) {
183 			/* we delete last item and it could be merged with right neighbor's first item */
184 			if (!
185 			    (B_NR_ITEMS(Sh) == 1
186 			     && is_direntry_le_ih(B_N_PITEM_HEAD(Sh, 0))
187 			     && I_ENTRY_COUNT(B_N_PITEM_HEAD(Sh, 0)) == 1)) {
188 				/* node contains more than 1 item, or item is not directory item, or this item contains more than 1 entry */
189 				print_block(Sh, 0, -1, -1);
190 				reiserfs_panic(tb->tb_sb,
191 					       "vs-8045: create_virtual_node: rdkey %k, affected item==%d (mode==%c) Must be %c",
192 					       key, vn->vn_affected_item_num,
193 					       vn->vn_mode, M_DELETE);
194 			} else
195 				/* we can delete directory item, that has only one directory entry in it */
196 				;
197 		}
198 #endif
199 
200 	}
201 }
202 
203 /* using virtual node check, how many items can be shifted to left
204    neighbor */
205 static void check_left(struct tree_balance *tb, int h, int cur_free)
206 {
207 	int i;
208 	struct virtual_node *vn = tb->tb_vn;
209 	struct virtual_item *vi;
210 	int d_size, ih_size;
211 
212 	RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free);
213 
214 	/* internal level */
215 	if (h > 0) {
216 		tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
217 		return;
218 	}
219 
220 	/* leaf level */
221 
222 	if (!cur_free || !vn->vn_nr_item) {
223 		/* no free space or nothing to move */
224 		tb->lnum[h] = 0;
225 		tb->lbytes = -1;
226 		return;
227 	}
228 
229 	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
230 	       "vs-8055: parent does not exist or invalid");
231 
232 	vi = vn->vn_vi;
233 	if ((unsigned int)cur_free >=
234 	    (vn->vn_size -
235 	     ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) {
236 		/* all contents of S[0] fits into L[0] */
237 
238 		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
239 		       "vs-8055: invalid mode or balance condition failed");
240 
241 		tb->lnum[0] = vn->vn_nr_item;
242 		tb->lbytes = -1;
243 		return;
244 	}
245 
246 	d_size = 0, ih_size = IH_SIZE;
247 
248 	/* first item may be merge with last item in left neighbor */
249 	if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE)
250 		d_size = -((int)IH_SIZE), ih_size = 0;
251 
252 	tb->lnum[0] = 0;
253 	for (i = 0; i < vn->vn_nr_item;
254 	     i++, ih_size = IH_SIZE, d_size = 0, vi++) {
255 		d_size += vi->vi_item_len;
256 		if (cur_free >= d_size) {
257 			/* the item can be shifted entirely */
258 			cur_free -= d_size;
259 			tb->lnum[0]++;
260 			continue;
261 		}
262 
263 		/* the item cannot be shifted entirely, try to split it */
264 		/* check whether L[0] can hold ih and at least one byte of the item body */
265 		if (cur_free <= ih_size) {
266 			/* cannot shift even a part of the current item */
267 			tb->lbytes = -1;
268 			return;
269 		}
270 		cur_free -= ih_size;
271 
272 		tb->lbytes = op_check_left(vi, cur_free, 0, 0);
273 		if (tb->lbytes != -1)
274 			/* count partially shifted item */
275 			tb->lnum[0]++;
276 
277 		break;
278 	}
279 
280 	return;
281 }
282 
283 /* using virtual node check, how many items can be shifted to right
284    neighbor */
285 static void check_right(struct tree_balance *tb, int h, int cur_free)
286 {
287 	int i;
288 	struct virtual_node *vn = tb->tb_vn;
289 	struct virtual_item *vi;
290 	int d_size, ih_size;
291 
292 	RFALSE(cur_free < 0, "vs-8070: cur_free < 0");
293 
294 	/* internal level */
295 	if (h > 0) {
296 		tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE);
297 		return;
298 	}
299 
300 	/* leaf level */
301 
302 	if (!cur_free || !vn->vn_nr_item) {
303 		/* no free space  */
304 		tb->rnum[h] = 0;
305 		tb->rbytes = -1;
306 		return;
307 	}
308 
309 	RFALSE(!PATH_H_PPARENT(tb->tb_path, 0),
310 	       "vs-8075: parent does not exist or invalid");
311 
312 	vi = vn->vn_vi + vn->vn_nr_item - 1;
313 	if ((unsigned int)cur_free >=
314 	    (vn->vn_size -
315 	     ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) {
316 		/* all contents of S[0] fits into R[0] */
317 
318 		RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE,
319 		       "vs-8080: invalid mode or balance condition failed");
320 
321 		tb->rnum[h] = vn->vn_nr_item;
322 		tb->rbytes = -1;
323 		return;
324 	}
325 
326 	d_size = 0, ih_size = IH_SIZE;
327 
328 	/* last item may be merge with first item in right neighbor */
329 	if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE)
330 		d_size = -(int)IH_SIZE, ih_size = 0;
331 
332 	tb->rnum[0] = 0;
333 	for (i = vn->vn_nr_item - 1; i >= 0;
334 	     i--, d_size = 0, ih_size = IH_SIZE, vi--) {
335 		d_size += vi->vi_item_len;
336 		if (cur_free >= d_size) {
337 			/* the item can be shifted entirely */
338 			cur_free -= d_size;
339 			tb->rnum[0]++;
340 			continue;
341 		}
342 
343 		/* check whether R[0] can hold ih and at least one byte of the item body */
344 		if (cur_free <= ih_size) {	/* cannot shift even a part of the current item */
345 			tb->rbytes = -1;
346 			return;
347 		}
348 
349 		/* R[0] can hold the header of the item and at least one byte of its body */
350 		cur_free -= ih_size;	/* cur_free is still > 0 */
351 
352 		tb->rbytes = op_check_right(vi, cur_free);
353 		if (tb->rbytes != -1)
354 			/* count partially shifted item */
355 			tb->rnum[0]++;
356 
357 		break;
358 	}
359 
360 	return;
361 }
362 
363 /*
364  * from - number of items, which are shifted to left neighbor entirely
365  * to - number of item, which are shifted to right neighbor entirely
366  * from_bytes - number of bytes of boundary item (or directory entries) which are shifted to left neighbor
367  * to_bytes - number of bytes of boundary item (or directory entries) which are shifted to right neighbor */
368 static int get_num_ver(int mode, struct tree_balance *tb, int h,
369 		       int from, int from_bytes,
370 		       int to, int to_bytes, short *snum012, int flow)
371 {
372 	int i;
373 	int cur_free;
374 	//    int bytes;
375 	int units;
376 	struct virtual_node *vn = tb->tb_vn;
377 	//    struct virtual_item * vi;
378 
379 	int total_node_size, max_node_size, current_item_size;
380 	int needed_nodes;
381 	int start_item,		/* position of item we start filling node from */
382 	 end_item,		/* position of item we finish filling node by */
383 	 start_bytes,		/* number of first bytes (entries for directory) of start_item-th item
384 				   we do not include into node that is being filled */
385 	 end_bytes;		/* number of last bytes (entries for directory) of end_item-th item
386 				   we do node include into node that is being filled */
387 	int split_item_positions[2];	/* these are positions in virtual item of
388 					   items, that are split between S[0] and
389 					   S1new and S1new and S2new */
390 
391 	split_item_positions[0] = -1;
392 	split_item_positions[1] = -1;
393 
394 	/* We only create additional nodes if we are in insert or paste mode
395 	   or we are in replace mode at the internal level. If h is 0 and
396 	   the mode is M_REPLACE then in fix_nodes we change the mode to
397 	   paste or insert before we get here in the code.  */
398 	RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE),
399 	       "vs-8100: insert_size < 0 in overflow");
400 
401 	max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h));
402 
403 	/* snum012 [0-2] - number of items, that lay
404 	   to S[0], first new node and second new node */
405 	snum012[3] = -1;	/* s1bytes */
406 	snum012[4] = -1;	/* s2bytes */
407 
408 	/* internal level */
409 	if (h > 0) {
410 		i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE);
411 		if (i == max_node_size)
412 			return 1;
413 		return (i / max_node_size + 1);
414 	}
415 
416 	/* leaf level */
417 	needed_nodes = 1;
418 	total_node_size = 0;
419 	cur_free = max_node_size;
420 
421 	// start from 'from'-th item
422 	start_item = from;
423 	// skip its first 'start_bytes' units
424 	start_bytes = ((from_bytes != -1) ? from_bytes : 0);
425 
426 	// last included item is the 'end_item'-th one
427 	end_item = vn->vn_nr_item - to - 1;
428 	// do not count last 'end_bytes' units of 'end_item'-th item
429 	end_bytes = (to_bytes != -1) ? to_bytes : 0;
430 
431 	/* go through all item beginning from the start_item-th item and ending by
432 	   the end_item-th item. Do not count first 'start_bytes' units of
433 	   'start_item'-th item and last 'end_bytes' of 'end_item'-th item */
434 
435 	for (i = start_item; i <= end_item; i++) {
436 		struct virtual_item *vi = vn->vn_vi + i;
437 		int skip_from_end = ((i == end_item) ? end_bytes : 0);
438 
439 		RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed");
440 
441 		/* get size of current item */
442 		current_item_size = vi->vi_item_len;
443 
444 		/* do not take in calculation head part (from_bytes) of from-th item */
445 		current_item_size -=
446 		    op_part_size(vi, 0 /*from start */ , start_bytes);
447 
448 		/* do not take in calculation tail part of last item */
449 		current_item_size -=
450 		    op_part_size(vi, 1 /*from end */ , skip_from_end);
451 
452 		/* if item fits into current node entierly */
453 		if (total_node_size + current_item_size <= max_node_size) {
454 			snum012[needed_nodes - 1]++;
455 			total_node_size += current_item_size;
456 			start_bytes = 0;
457 			continue;
458 		}
459 
460 		if (current_item_size > max_node_size) {
461 			/* virtual item length is longer, than max size of item in
462 			   a node. It is impossible for direct item */
463 			RFALSE(is_direct_le_ih(vi->vi_ih),
464 			       "vs-8110: "
465 			       "direct item length is %d. It can not be longer than %d",
466 			       current_item_size, max_node_size);
467 			/* we will try to split it */
468 			flow = 1;
469 		}
470 
471 		if (!flow) {
472 			/* as we do not split items, take new node and continue */
473 			needed_nodes++;
474 			i--;
475 			total_node_size = 0;
476 			continue;
477 		}
478 		// calculate number of item units which fit into node being
479 		// filled
480 		{
481 			int free_space;
482 
483 			free_space = max_node_size - total_node_size - IH_SIZE;
484 			units =
485 			    op_check_left(vi, free_space, start_bytes,
486 					  skip_from_end);
487 			if (units == -1) {
488 				/* nothing fits into current node, take new node and continue */
489 				needed_nodes++, i--, total_node_size = 0;
490 				continue;
491 			}
492 		}
493 
494 		/* something fits into the current node */
495 		//if (snum012[3] != -1 || needed_nodes != 1)
496 		//  reiserfs_panic (tb->tb_sb, "vs-8115: get_num_ver: too many nodes required");
497 		//snum012[needed_nodes - 1 + 3] = op_unit_num (vi) - start_bytes - units;
498 		start_bytes += units;
499 		snum012[needed_nodes - 1 + 3] = units;
500 
501 		if (needed_nodes > 2)
502 			reiserfs_warning(tb->tb_sb, "vs-8111: get_num_ver: "
503 					 "split_item_position is out of boundary");
504 		snum012[needed_nodes - 1]++;
505 		split_item_positions[needed_nodes - 1] = i;
506 		needed_nodes++;
507 		/* continue from the same item with start_bytes != -1 */
508 		start_item = i;
509 		i--;
510 		total_node_size = 0;
511 	}
512 
513 	// sum012[4] (if it is not -1) contains number of units of which
514 	// are to be in S1new, snum012[3] - to be in S0. They are supposed
515 	// to be S1bytes and S2bytes correspondingly, so recalculate
516 	if (snum012[4] > 0) {
517 		int split_item_num;
518 		int bytes_to_r, bytes_to_l;
519 		int bytes_to_S1new;
520 
521 		split_item_num = split_item_positions[1];
522 		bytes_to_l =
523 		    ((from == split_item_num
524 		      && from_bytes != -1) ? from_bytes : 0);
525 		bytes_to_r =
526 		    ((end_item == split_item_num
527 		      && end_bytes != -1) ? end_bytes : 0);
528 		bytes_to_S1new =
529 		    ((split_item_positions[0] ==
530 		      split_item_positions[1]) ? snum012[3] : 0);
531 
532 		// s2bytes
533 		snum012[4] =
534 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] -
535 		    bytes_to_r - bytes_to_l - bytes_to_S1new;
536 
537 		if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY &&
538 		    vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT)
539 			reiserfs_warning(tb->tb_sb, "vs-8115: get_num_ver: not "
540 					 "directory or indirect item");
541 	}
542 
543 	/* now we know S2bytes, calculate S1bytes */
544 	if (snum012[3] > 0) {
545 		int split_item_num;
546 		int bytes_to_r, bytes_to_l;
547 		int bytes_to_S2new;
548 
549 		split_item_num = split_item_positions[0];
550 		bytes_to_l =
551 		    ((from == split_item_num
552 		      && from_bytes != -1) ? from_bytes : 0);
553 		bytes_to_r =
554 		    ((end_item == split_item_num
555 		      && end_bytes != -1) ? end_bytes : 0);
556 		bytes_to_S2new =
557 		    ((split_item_positions[0] == split_item_positions[1]
558 		      && snum012[4] != -1) ? snum012[4] : 0);
559 
560 		// s1bytes
561 		snum012[3] =
562 		    op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] -
563 		    bytes_to_r - bytes_to_l - bytes_to_S2new;
564 	}
565 
566 	return needed_nodes;
567 }
568 
569 #ifdef CONFIG_REISERFS_CHECK
570 extern struct tree_balance *cur_tb;
571 #endif
572 
573 /* Set parameters for balancing.
574  * Performs write of results of analysis of balancing into structure tb,
575  * where it will later be used by the functions that actually do the balancing.
576  * Parameters:
577  *	tb	tree_balance structure;
578  *	h	current level of the node;
579  *	lnum	number of items from S[h] that must be shifted to L[h];
580  *	rnum	number of items from S[h] that must be shifted to R[h];
581  *	blk_num	number of blocks that S[h] will be splitted into;
582  *	s012	number of items that fall into splitted nodes.
583  *	lbytes	number of bytes which flow to the left neighbor from the item that is not
584  *		not shifted entirely
585  *	rbytes	number of bytes which flow to the right neighbor from the item that is not
586  *		not shifted entirely
587  *	s1bytes	number of bytes which flow to the first  new node when S[0] splits (this number is contained in s012 array)
588  */
589 
590 static void set_parameters(struct tree_balance *tb, int h, int lnum,
591 			   int rnum, int blk_num, short *s012, int lb, int rb)
592 {
593 
594 	tb->lnum[h] = lnum;
595 	tb->rnum[h] = rnum;
596 	tb->blknum[h] = blk_num;
597 
598 	if (h == 0) {		/* only for leaf level */
599 		if (s012 != NULL) {
600 			tb->s0num = *s012++,
601 			    tb->s1num = *s012++, tb->s2num = *s012++;
602 			tb->s1bytes = *s012++;
603 			tb->s2bytes = *s012;
604 		}
605 		tb->lbytes = lb;
606 		tb->rbytes = rb;
607 	}
608 	PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum);
609 	PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum);
610 
611 	PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb);
612 	PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb);
613 }
614 
615 /* check, does node disappear if we shift tb->lnum[0] items to left
616    neighbor and tb->rnum[0] to the right one. */
617 static int is_leaf_removable(struct tree_balance *tb)
618 {
619 	struct virtual_node *vn = tb->tb_vn;
620 	int to_left, to_right;
621 	int size;
622 	int remain_items;
623 
624 	/* number of items, that will be shifted to left (right) neighbor
625 	   entirely */
626 	to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0);
627 	to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0);
628 	remain_items = vn->vn_nr_item;
629 
630 	/* how many items remain in S[0] after shiftings to neighbors */
631 	remain_items -= (to_left + to_right);
632 
633 	if (remain_items < 1) {
634 		/* all content of node can be shifted to neighbors */
635 		set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0,
636 			       NULL, -1, -1);
637 		return 1;
638 	}
639 
640 	if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1)
641 		/* S[0] is not removable */
642 		return 0;
643 
644 	/* check, whether we can divide 1 remaining item between neighbors */
645 
646 	/* get size of remaining item (in item units) */
647 	size = op_unit_num(&(vn->vn_vi[to_left]));
648 
649 	if (tb->lbytes + tb->rbytes >= size) {
650 		set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL,
651 			       tb->lbytes, -1);
652 		return 1;
653 	}
654 
655 	return 0;
656 }
657 
658 /* check whether L, S, R can be joined in one node */
659 static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree)
660 {
661 	struct virtual_node *vn = tb->tb_vn;
662 	int ih_size;
663 	struct buffer_head *S0;
664 
665 	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
666 
667 	ih_size = 0;
668 	if (vn->vn_nr_item) {
669 		if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE)
670 			ih_size += IH_SIZE;
671 
672 		if (vn->vn_vi[vn->vn_nr_item - 1].
673 		    vi_type & VI_TYPE_RIGHT_MERGEABLE)
674 			ih_size += IH_SIZE;
675 	} else {
676 		/* there was only one item and it will be deleted */
677 		struct item_head *ih;
678 
679 		RFALSE(B_NR_ITEMS(S0) != 1,
680 		       "vs-8125: item number must be 1: it is %d",
681 		       B_NR_ITEMS(S0));
682 
683 		ih = B_N_PITEM_HEAD(S0, 0);
684 		if (tb->CFR[0]
685 		    && !comp_short_le_keys(&(ih->ih_key),
686 					   B_N_PDELIM_KEY(tb->CFR[0],
687 							  tb->rkey[0])))
688 			if (is_direntry_le_ih(ih)) {
689 				/* Directory must be in correct state here: that is
690 				   somewhere at the left side should exist first directory
691 				   item. But the item being deleted can not be that first
692 				   one because its right neighbor is item of the same
693 				   directory. (But first item always gets deleted in last
694 				   turn). So, neighbors of deleted item can be merged, so
695 				   we can save ih_size */
696 				ih_size = IH_SIZE;
697 
698 				/* we might check that left neighbor exists and is of the
699 				   same directory */
700 				RFALSE(le_ih_k_offset(ih) == DOT_OFFSET,
701 				       "vs-8130: first directory item can not be removed until directory is not empty");
702 			}
703 
704 	}
705 
706 	if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) {
707 		set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1);
708 		PROC_INFO_INC(tb->tb_sb, leaves_removable);
709 		return 1;
710 	}
711 	return 0;
712 
713 }
714 
715 /* when we do not split item, lnum and rnum are numbers of entire items */
716 #define SET_PAR_SHIFT_LEFT \
717 if (h)\
718 {\
719    int to_l;\
720    \
721    to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\
722 	      (MAX_NR_KEY(Sh) + 1 - lpar);\
723 	      \
724 	      set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\
725 }\
726 else \
727 {\
728    if (lset==LEFT_SHIFT_FLOW)\
729      set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\
730 		     tb->lbytes, -1);\
731    else\
732      set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\
733 		     -1, -1);\
734 }
735 
736 #define SET_PAR_SHIFT_RIGHT \
737 if (h)\
738 {\
739    int to_r;\
740    \
741    to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\
742    \
743    set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\
744 }\
745 else \
746 {\
747    if (rset==RIGHT_SHIFT_FLOW)\
748      set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\
749 		  -1, tb->rbytes);\
750    else\
751      set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\
752 		  -1, -1);\
753 }
754 
755 static void free_buffers_in_tb(struct tree_balance *p_s_tb)
756 {
757 	int n_counter;
758 
759 	decrement_counters_in_path(p_s_tb->tb_path);
760 
761 	for (n_counter = 0; n_counter < MAX_HEIGHT; n_counter++) {
762 		decrement_bcount(p_s_tb->L[n_counter]);
763 		p_s_tb->L[n_counter] = NULL;
764 		decrement_bcount(p_s_tb->R[n_counter]);
765 		p_s_tb->R[n_counter] = NULL;
766 		decrement_bcount(p_s_tb->FL[n_counter]);
767 		p_s_tb->FL[n_counter] = NULL;
768 		decrement_bcount(p_s_tb->FR[n_counter]);
769 		p_s_tb->FR[n_counter] = NULL;
770 		decrement_bcount(p_s_tb->CFL[n_counter]);
771 		p_s_tb->CFL[n_counter] = NULL;
772 		decrement_bcount(p_s_tb->CFR[n_counter]);
773 		p_s_tb->CFR[n_counter] = NULL;
774 	}
775 }
776 
777 /* Get new buffers for storing new nodes that are created while balancing.
778  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
779  *	        CARRY_ON - schedule didn't occur while the function worked;
780  *	        NO_DISK_SPACE - no disk space.
781  */
782 /* The function is NOT SCHEDULE-SAFE! */
783 static int get_empty_nodes(struct tree_balance *p_s_tb, int n_h)
784 {
785 	struct buffer_head *p_s_new_bh,
786 	    *p_s_Sh = PATH_H_PBUFFER(p_s_tb->tb_path, n_h);
787 	b_blocknr_t *p_n_blocknr, a_n_blocknrs[MAX_AMOUNT_NEEDED] = { 0, };
788 	int n_counter, n_number_of_freeblk, n_amount_needed,	/* number of needed empty blocks */
789 	 n_retval = CARRY_ON;
790 	struct super_block *p_s_sb = p_s_tb->tb_sb;
791 
792 	/* number_of_freeblk is the number of empty blocks which have been
793 	   acquired for use by the balancing algorithm minus the number of
794 	   empty blocks used in the previous levels of the analysis,
795 	   number_of_freeblk = tb->cur_blknum can be non-zero if a schedule occurs
796 	   after empty blocks are acquired, and the balancing analysis is
797 	   then restarted, amount_needed is the number needed by this level
798 	   (n_h) of the balancing analysis.
799 
800 	   Note that for systems with many processes writing, it would be
801 	   more layout optimal to calculate the total number needed by all
802 	   levels and then to run reiserfs_new_blocks to get all of them at once.  */
803 
804 	/* Initiate number_of_freeblk to the amount acquired prior to the restart of
805 	   the analysis or 0 if not restarted, then subtract the amount needed
806 	   by all of the levels of the tree below n_h. */
807 	/* blknum includes S[n_h], so we subtract 1 in this calculation */
808 	for (n_counter = 0, n_number_of_freeblk = p_s_tb->cur_blknum;
809 	     n_counter < n_h; n_counter++)
810 		n_number_of_freeblk -=
811 		    (p_s_tb->blknum[n_counter]) ? (p_s_tb->blknum[n_counter] -
812 						   1) : 0;
813 
814 	/* Allocate missing empty blocks. */
815 	/* if p_s_Sh == 0  then we are getting a new root */
816 	n_amount_needed = (p_s_Sh) ? (p_s_tb->blknum[n_h] - 1) : 1;
817 	/*  Amount_needed = the amount that we need more than the amount that we have. */
818 	if (n_amount_needed > n_number_of_freeblk)
819 		n_amount_needed -= n_number_of_freeblk;
820 	else			/* If we have enough already then there is nothing to do. */
821 		return CARRY_ON;
822 
823 	/* No need to check quota - is not allocated for blocks used for formatted nodes */
824 	if (reiserfs_new_form_blocknrs(p_s_tb, a_n_blocknrs,
825 				       n_amount_needed) == NO_DISK_SPACE)
826 		return NO_DISK_SPACE;
827 
828 	/* for each blocknumber we just got, get a buffer and stick it on FEB */
829 	for (p_n_blocknr = a_n_blocknrs, n_counter = 0;
830 	     n_counter < n_amount_needed; p_n_blocknr++, n_counter++) {
831 
832 		RFALSE(!*p_n_blocknr,
833 		       "PAP-8135: reiserfs_new_blocknrs failed when got new blocks");
834 
835 		p_s_new_bh = sb_getblk(p_s_sb, *p_n_blocknr);
836 		RFALSE(buffer_dirty(p_s_new_bh) ||
837 		       buffer_journaled(p_s_new_bh) ||
838 		       buffer_journal_dirty(p_s_new_bh),
839 		       "PAP-8140: journlaled or dirty buffer %b for the new block",
840 		       p_s_new_bh);
841 
842 		/* Put empty buffers into the array. */
843 		RFALSE(p_s_tb->FEB[p_s_tb->cur_blknum],
844 		       "PAP-8141: busy slot for new buffer");
845 
846 		set_buffer_journal_new(p_s_new_bh);
847 		p_s_tb->FEB[p_s_tb->cur_blknum++] = p_s_new_bh;
848 	}
849 
850 	if (n_retval == CARRY_ON && FILESYSTEM_CHANGED_TB(p_s_tb))
851 		n_retval = REPEAT_SEARCH;
852 
853 	return n_retval;
854 }
855 
856 /* Get free space of the left neighbor, which is stored in the parent
857  * node of the left neighbor.  */
858 static int get_lfree(struct tree_balance *tb, int h)
859 {
860 	struct buffer_head *l, *f;
861 	int order;
862 
863 	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (l = tb->FL[h]) == 0)
864 		return 0;
865 
866 	if (f == l)
867 		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1;
868 	else {
869 		order = B_NR_ITEMS(l);
870 		f = l;
871 	}
872 
873 	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
874 }
875 
876 /* Get free space of the right neighbor,
877  * which is stored in the parent node of the right neighbor.
878  */
879 static int get_rfree(struct tree_balance *tb, int h)
880 {
881 	struct buffer_head *r, *f;
882 	int order;
883 
884 	if ((f = PATH_H_PPARENT(tb->tb_path, h)) == 0 || (r = tb->FR[h]) == 0)
885 		return 0;
886 
887 	if (f == r)
888 		order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1;
889 	else {
890 		order = 0;
891 		f = r;
892 	}
893 
894 	return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order)));
895 
896 }
897 
898 /* Check whether left neighbor is in memory. */
899 static int is_left_neighbor_in_cache(struct tree_balance *p_s_tb, int n_h)
900 {
901 	struct buffer_head *p_s_father, *left;
902 	struct super_block *p_s_sb = p_s_tb->tb_sb;
903 	b_blocknr_t n_left_neighbor_blocknr;
904 	int n_left_neighbor_position;
905 
906 	if (!p_s_tb->FL[n_h])	/* Father of the left neighbor does not exist. */
907 		return 0;
908 
909 	/* Calculate father of the node to be balanced. */
910 	p_s_father = PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1);
911 
912 	RFALSE(!p_s_father ||
913 	       !B_IS_IN_TREE(p_s_father) ||
914 	       !B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
915 	       !buffer_uptodate(p_s_father) ||
916 	       !buffer_uptodate(p_s_tb->FL[n_h]),
917 	       "vs-8165: F[h] (%b) or FL[h] (%b) is invalid",
918 	       p_s_father, p_s_tb->FL[n_h]);
919 
920 	/* Get position of the pointer to the left neighbor into the left father. */
921 	n_left_neighbor_position = (p_s_father == p_s_tb->FL[n_h]) ?
922 	    p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->FL[n_h]);
923 	/* Get left neighbor block number. */
924 	n_left_neighbor_blocknr =
925 	    B_N_CHILD_NUM(p_s_tb->FL[n_h], n_left_neighbor_position);
926 	/* Look for the left neighbor in the cache. */
927 	if ((left = sb_find_get_block(p_s_sb, n_left_neighbor_blocknr))) {
928 
929 		RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left),
930 		       "vs-8170: left neighbor (%b %z) is not in the tree",
931 		       left, left);
932 		put_bh(left);
933 		return 1;
934 	}
935 
936 	return 0;
937 }
938 
939 #define LEFT_PARENTS  'l'
940 #define RIGHT_PARENTS 'r'
941 
942 static void decrement_key(struct cpu_key *p_s_key)
943 {
944 	// call item specific function for this key
945 	item_ops[cpu_key_k_type(p_s_key)]->decrement_key(p_s_key);
946 }
947 
948 /* Calculate far left/right parent of the left/right neighbor of the current node, that
949  * is calculate the left/right (FL[h]/FR[h]) neighbor of the parent F[h].
950  * Calculate left/right common parent of the current node and L[h]/R[h].
951  * Calculate left/right delimiting key position.
952  * Returns:	PATH_INCORRECT   - path in the tree is not correct;
953  		SCHEDULE_OCCURRED - schedule occurred while the function worked;
954  *	        CARRY_ON         - schedule didn't occur while the function worked;
955  */
956 static int get_far_parent(struct tree_balance *p_s_tb,
957 			  int n_h,
958 			  struct buffer_head **pp_s_father,
959 			  struct buffer_head **pp_s_com_father, char c_lr_par)
960 {
961 	struct buffer_head *p_s_parent;
962 	INITIALIZE_PATH(s_path_to_neighbor_father);
963 	struct path *p_s_path = p_s_tb->tb_path;
964 	struct cpu_key s_lr_father_key;
965 	int n_counter,
966 	    n_position = INT_MAX,
967 	    n_first_last_position = 0,
968 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_path, n_h);
969 
970 	/* Starting from F[n_h] go upwards in the tree, and look for the common
971 	   ancestor of F[n_h], and its neighbor l/r, that should be obtained. */
972 
973 	n_counter = n_path_offset;
974 
975 	RFALSE(n_counter < FIRST_PATH_ELEMENT_OFFSET,
976 	       "PAP-8180: invalid path length");
977 
978 	for (; n_counter > FIRST_PATH_ELEMENT_OFFSET; n_counter--) {
979 		/* Check whether parent of the current buffer in the path is really parent in the tree. */
980 		if (!B_IS_IN_TREE
981 		    (p_s_parent = PATH_OFFSET_PBUFFER(p_s_path, n_counter - 1)))
982 			return REPEAT_SEARCH;
983 		/* Check whether position in the parent is correct. */
984 		if ((n_position =
985 		     PATH_OFFSET_POSITION(p_s_path,
986 					  n_counter - 1)) >
987 		    B_NR_ITEMS(p_s_parent))
988 			return REPEAT_SEARCH;
989 		/* Check whether parent at the path really points to the child. */
990 		if (B_N_CHILD_NUM(p_s_parent, n_position) !=
991 		    PATH_OFFSET_PBUFFER(p_s_path, n_counter)->b_blocknr)
992 			return REPEAT_SEARCH;
993 		/* Return delimiting key if position in the parent is not equal to first/last one. */
994 		if (c_lr_par == RIGHT_PARENTS)
995 			n_first_last_position = B_NR_ITEMS(p_s_parent);
996 		if (n_position != n_first_last_position) {
997 			*pp_s_com_father = p_s_parent;
998 			get_bh(*pp_s_com_father);
999 			/*(*pp_s_com_father = p_s_parent)->b_count++; */
1000 			break;
1001 		}
1002 	}
1003 
1004 	/* if we are in the root of the tree, then there is no common father */
1005 	if (n_counter == FIRST_PATH_ELEMENT_OFFSET) {
1006 		/* Check whether first buffer in the path is the root of the tree. */
1007 		if (PATH_OFFSET_PBUFFER
1008 		    (p_s_tb->tb_path,
1009 		     FIRST_PATH_ELEMENT_OFFSET)->b_blocknr ==
1010 		    SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1011 			*pp_s_father = *pp_s_com_father = NULL;
1012 			return CARRY_ON;
1013 		}
1014 		return REPEAT_SEARCH;
1015 	}
1016 
1017 	RFALSE(B_LEVEL(*pp_s_com_father) <= DISK_LEAF_NODE_LEVEL,
1018 	       "PAP-8185: (%b %z) level too small",
1019 	       *pp_s_com_father, *pp_s_com_father);
1020 
1021 	/* Check whether the common parent is locked. */
1022 
1023 	if (buffer_locked(*pp_s_com_father)) {
1024 		__wait_on_buffer(*pp_s_com_father);
1025 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1026 			decrement_bcount(*pp_s_com_father);
1027 			return REPEAT_SEARCH;
1028 		}
1029 	}
1030 
1031 	/* So, we got common parent of the current node and its left/right neighbor.
1032 	   Now we are geting the parent of the left/right neighbor. */
1033 
1034 	/* Form key to get parent of the left/right neighbor. */
1035 	le_key2cpu_key(&s_lr_father_key,
1036 		       B_N_PDELIM_KEY(*pp_s_com_father,
1037 				      (c_lr_par ==
1038 				       LEFT_PARENTS) ? (p_s_tb->lkey[n_h - 1] =
1039 							n_position -
1040 							1) : (p_s_tb->rkey[n_h -
1041 									   1] =
1042 							      n_position)));
1043 
1044 	if (c_lr_par == LEFT_PARENTS)
1045 		decrement_key(&s_lr_father_key);
1046 
1047 	if (search_by_key
1048 	    (p_s_tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father,
1049 	     n_h + 1) == IO_ERROR)
1050 		// path is released
1051 		return IO_ERROR;
1052 
1053 	if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1054 		decrement_counters_in_path(&s_path_to_neighbor_father);
1055 		decrement_bcount(*pp_s_com_father);
1056 		return REPEAT_SEARCH;
1057 	}
1058 
1059 	*pp_s_father = PATH_PLAST_BUFFER(&s_path_to_neighbor_father);
1060 
1061 	RFALSE(B_LEVEL(*pp_s_father) != n_h + 1,
1062 	       "PAP-8190: (%b %z) level too small", *pp_s_father, *pp_s_father);
1063 	RFALSE(s_path_to_neighbor_father.path_length <
1064 	       FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small");
1065 
1066 	s_path_to_neighbor_father.path_length--;
1067 	decrement_counters_in_path(&s_path_to_neighbor_father);
1068 	return CARRY_ON;
1069 }
1070 
1071 /* Get parents of neighbors of node in the path(S[n_path_offset]) and common parents of
1072  * S[n_path_offset] and L[n_path_offset]/R[n_path_offset]: F[n_path_offset], FL[n_path_offset],
1073  * FR[n_path_offset], CFL[n_path_offset], CFR[n_path_offset].
1074  * Calculate numbers of left and right delimiting keys position: lkey[n_path_offset], rkey[n_path_offset].
1075  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
1076  *	        CARRY_ON - schedule didn't occur while the function worked;
1077  */
1078 static int get_parents(struct tree_balance *p_s_tb, int n_h)
1079 {
1080 	struct path *p_s_path = p_s_tb->tb_path;
1081 	int n_position,
1082 	    n_ret_value,
1083 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1084 	struct buffer_head *p_s_curf, *p_s_curcf;
1085 
1086 	/* Current node is the root of the tree or will be root of the tree */
1087 	if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1088 		/* The root can not have parents.
1089 		   Release nodes which previously were obtained as parents of the current node neighbors. */
1090 		decrement_bcount(p_s_tb->FL[n_h]);
1091 		decrement_bcount(p_s_tb->CFL[n_h]);
1092 		decrement_bcount(p_s_tb->FR[n_h]);
1093 		decrement_bcount(p_s_tb->CFR[n_h]);
1094 		p_s_tb->FL[n_h] = p_s_tb->CFL[n_h] = p_s_tb->FR[n_h] =
1095 		    p_s_tb->CFR[n_h] = NULL;
1096 		return CARRY_ON;
1097 	}
1098 
1099 	/* Get parent FL[n_path_offset] of L[n_path_offset]. */
1100 	if ((n_position = PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1))) {
1101 		/* Current node is not the first child of its parent. */
1102 		/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1103 		p_s_curf = p_s_curcf =
1104 		    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1105 		get_bh(p_s_curf);
1106 		get_bh(p_s_curf);
1107 		p_s_tb->lkey[n_h] = n_position - 1;
1108 	} else {
1109 		/* Calculate current parent of L[n_path_offset], which is the left neighbor of the current node.
1110 		   Calculate current common parent of L[n_path_offset] and the current node. Note that
1111 		   CFL[n_path_offset] not equal FL[n_path_offset] and CFL[n_path_offset] not equal F[n_path_offset].
1112 		   Calculate lkey[n_path_offset]. */
1113 		if ((n_ret_value = get_far_parent(p_s_tb, n_h + 1, &p_s_curf,
1114 						  &p_s_curcf,
1115 						  LEFT_PARENTS)) != CARRY_ON)
1116 			return n_ret_value;
1117 	}
1118 
1119 	decrement_bcount(p_s_tb->FL[n_h]);
1120 	p_s_tb->FL[n_h] = p_s_curf;	/* New initialization of FL[n_h]. */
1121 	decrement_bcount(p_s_tb->CFL[n_h]);
1122 	p_s_tb->CFL[n_h] = p_s_curcf;	/* New initialization of CFL[n_h]. */
1123 
1124 	RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1125 	       (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1126 	       "PAP-8195: FL (%b) or CFL (%b) is invalid", p_s_curf, p_s_curcf);
1127 
1128 /* Get parent FR[n_h] of R[n_h]. */
1129 
1130 /* Current node is the last child of F[n_h]. FR[n_h] != F[n_h]. */
1131 	if (n_position == B_NR_ITEMS(PATH_H_PBUFFER(p_s_path, n_h + 1))) {
1132 /* Calculate current parent of R[n_h], which is the right neighbor of F[n_h].
1133    Calculate current common parent of R[n_h] and current node. Note that CFR[n_h]
1134    not equal FR[n_path_offset] and CFR[n_h] not equal F[n_h]. */
1135 		if ((n_ret_value =
1136 		     get_far_parent(p_s_tb, n_h + 1, &p_s_curf, &p_s_curcf,
1137 				    RIGHT_PARENTS)) != CARRY_ON)
1138 			return n_ret_value;
1139 	} else {
1140 /* Current node is not the last child of its parent F[n_h]. */
1141 		/*(p_s_curf = p_s_curcf = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1))->b_count += 2; */
1142 		p_s_curf = p_s_curcf =
1143 		    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1);
1144 		get_bh(p_s_curf);
1145 		get_bh(p_s_curf);
1146 		p_s_tb->rkey[n_h] = n_position;
1147 	}
1148 
1149 	decrement_bcount(p_s_tb->FR[n_h]);
1150 	p_s_tb->FR[n_h] = p_s_curf;	/* New initialization of FR[n_path_offset]. */
1151 
1152 	decrement_bcount(p_s_tb->CFR[n_h]);
1153 	p_s_tb->CFR[n_h] = p_s_curcf;	/* New initialization of CFR[n_path_offset]. */
1154 
1155 	RFALSE((p_s_curf && !B_IS_IN_TREE(p_s_curf)) ||
1156 	       (p_s_curcf && !B_IS_IN_TREE(p_s_curcf)),
1157 	       "PAP-8205: FR (%b) or CFR (%b) is invalid", p_s_curf, p_s_curcf);
1158 
1159 	return CARRY_ON;
1160 }
1161 
1162 /* it is possible to remove node as result of shiftings to
1163    neighbors even when we insert or paste item. */
1164 static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree,
1165 				      struct tree_balance *tb, int h)
1166 {
1167 	struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h);
1168 	int levbytes = tb->insert_size[h];
1169 	struct item_head *ih;
1170 	struct reiserfs_key *r_key = NULL;
1171 
1172 	ih = B_N_PITEM_HEAD(Sh, 0);
1173 	if (tb->CFR[h])
1174 		r_key = B_N_PDELIM_KEY(tb->CFR[h], tb->rkey[h]);
1175 
1176 	if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes
1177 	    /* shifting may merge items which might save space */
1178 	    -
1179 	    ((!h
1180 	      && op_is_left_mergeable(&(ih->ih_key), Sh->b_size)) ? IH_SIZE : 0)
1181 	    -
1182 	    ((!h && r_key
1183 	      && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0)
1184 	    + ((h) ? KEY_SIZE : 0)) {
1185 		/* node can not be removed */
1186 		if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
1187 			if (!h)
1188 				tb->s0num =
1189 				    B_NR_ITEMS(Sh) +
1190 				    ((mode == M_INSERT) ? 1 : 0);
1191 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1192 			return NO_BALANCING_NEEDED;
1193 		}
1194 	}
1195 	PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]);
1196 	return !NO_BALANCING_NEEDED;
1197 }
1198 
1199 /* Check whether current node S[h] is balanced when increasing its size by
1200  * Inserting or Pasting.
1201  * Calculate parameters for balancing for current level h.
1202  * Parameters:
1203  *	tb	tree_balance structure;
1204  *	h	current level of the node;
1205  *	inum	item number in S[h];
1206  *	mode	i - insert, p - paste;
1207  * Returns:	1 - schedule occurred;
1208  *	        0 - balancing for higher levels needed;
1209  *	       -1 - no balancing for higher levels needed;
1210  *	       -2 - no disk space.
1211  */
1212 /* ip means Inserting or Pasting */
1213 static int ip_check_balance(struct tree_balance *tb, int h)
1214 {
1215 	struct virtual_node *vn = tb->tb_vn;
1216 	int levbytes,		/* Number of bytes that must be inserted into (value
1217 				   is negative if bytes are deleted) buffer which
1218 				   contains node being balanced.  The mnemonic is
1219 				   that the attempted change in node space used level
1220 				   is levbytes bytes. */
1221 	 n_ret_value;
1222 
1223 	int lfree, sfree, rfree /* free space in L, S and R */ ;
1224 
1225 	/* nver is short for number of vertixes, and lnver is the number if
1226 	   we shift to the left, rnver is the number if we shift to the
1227 	   right, and lrnver is the number if we shift in both directions.
1228 	   The goal is to minimize first the number of vertixes, and second,
1229 	   the number of vertixes whose contents are changed by shifting,
1230 	   and third the number of uncached vertixes whose contents are
1231 	   changed by shifting and must be read from disk.  */
1232 	int nver, lnver, rnver, lrnver;
1233 
1234 	/* used at leaf level only, S0 = S[0] is the node being balanced,
1235 	   sInum [ I = 0,1,2 ] is the number of items that will
1236 	   remain in node SI after balancing.  S1 and S2 are new
1237 	   nodes that might be created. */
1238 
1239 	/* we perform 8 calls to get_num_ver().  For each call we calculate five parameters.
1240 	   where 4th parameter is s1bytes and 5th - s2bytes
1241 	 */
1242 	short snum012[40] = { 0, };	/* s0num, s1num, s2num for 8 cases
1243 					   0,1 - do not shift and do not shift but bottle
1244 					   2 - shift only whole item to left
1245 					   3 - shift to left and bottle as much as possible
1246 					   4,5 - shift to right (whole items and as much as possible
1247 					   6,7 - shift to both directions (whole items and as much as possible)
1248 					 */
1249 
1250 	/* Sh is the node whose balance is currently being checked */
1251 	struct buffer_head *Sh;
1252 
1253 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
1254 	levbytes = tb->insert_size[h];
1255 
1256 	/* Calculate balance parameters for creating new root. */
1257 	if (!Sh) {
1258 		if (!h)
1259 			reiserfs_panic(tb->tb_sb,
1260 				       "vs-8210: ip_check_balance: S[0] can not be 0");
1261 		switch (n_ret_value = get_empty_nodes(tb, h)) {
1262 		case CARRY_ON:
1263 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1264 			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
1265 
1266 		case NO_DISK_SPACE:
1267 		case REPEAT_SEARCH:
1268 			return n_ret_value;
1269 		default:
1270 			reiserfs_panic(tb->tb_sb,
1271 				       "vs-8215: ip_check_balance: incorrect return value of get_empty_nodes");
1272 		}
1273 	}
1274 
1275 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)	/* get parents of S[h] neighbors. */
1276 		return n_ret_value;
1277 
1278 	sfree = B_FREE_SPACE(Sh);
1279 
1280 	/* get free space of neighbors */
1281 	rfree = get_rfree(tb, h);
1282 	lfree = get_lfree(tb, h);
1283 
1284 	if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) ==
1285 	    NO_BALANCING_NEEDED)
1286 		/* and new item fits into node S[h] without any shifting */
1287 		return NO_BALANCING_NEEDED;
1288 
1289 	create_virtual_node(tb, h);
1290 
1291 	/*
1292 	   determine maximal number of items we can shift to the left neighbor (in tb structure)
1293 	   and the maximal number of bytes that can flow to the left neighbor
1294 	   from the left most liquid item that cannot be shifted from S[0] entirely (returned value)
1295 	 */
1296 	check_left(tb, h, lfree);
1297 
1298 	/*
1299 	   determine maximal number of items we can shift to the right neighbor (in tb structure)
1300 	   and the maximal number of bytes that can flow to the right neighbor
1301 	   from the right most liquid item that cannot be shifted from S[0] entirely (returned value)
1302 	 */
1303 	check_right(tb, h, rfree);
1304 
1305 	/* all contents of internal node S[h] can be moved into its
1306 	   neighbors, S[h] will be removed after balancing */
1307 	if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) {
1308 		int to_r;
1309 
1310 		/* Since we are working on internal nodes, and our internal
1311 		   nodes have fixed size entries, then we can balance by the
1312 		   number of items rather than the space they consume.  In this
1313 		   routine we set the left node equal to the right node,
1314 		   allowing a difference of less than or equal to 1 child
1315 		   pointer. */
1316 		to_r =
1317 		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1318 		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1319 						tb->rnum[h]);
1320 		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1321 			       -1, -1);
1322 		return CARRY_ON;
1323 	}
1324 
1325 	/* this checks balance condition, that any two neighboring nodes can not fit in one node */
1326 	RFALSE(h &&
1327 	       (tb->lnum[h] >= vn->vn_nr_item + 1 ||
1328 		tb->rnum[h] >= vn->vn_nr_item + 1),
1329 	       "vs-8220: tree is not balanced on internal level");
1330 	RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) ||
1331 		      (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))),
1332 	       "vs-8225: tree is not balanced on leaf level");
1333 
1334 	/* all contents of S[0] can be moved into its neighbors
1335 	   S[0] will be removed after balancing. */
1336 	if (!h && is_leaf_removable(tb))
1337 		return CARRY_ON;
1338 
1339 	/* why do we perform this check here rather than earlier??
1340 	   Answer: we can win 1 node in some cases above. Moreover we
1341 	   checked it above, when we checked, that S[0] is not removable
1342 	   in principle */
1343 	if (sfree >= levbytes) {	/* new item fits into node S[h] without any shifting */
1344 		if (!h)
1345 			tb->s0num = vn->vn_nr_item;
1346 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1347 		return NO_BALANCING_NEEDED;
1348 	}
1349 
1350 	{
1351 		int lpar, rpar, nset, lset, rset, lrset;
1352 		/*
1353 		 * regular overflowing of the node
1354 		 */
1355 
1356 		/* get_num_ver works in 2 modes (FLOW & NO_FLOW)
1357 		   lpar, rpar - number of items we can shift to left/right neighbor (including splitting item)
1358 		   nset, lset, rset, lrset - shows, whether flowing items give better packing
1359 		 */
1360 #define FLOW 1
1361 #define NO_FLOW 0		/* do not any splitting */
1362 
1363 		/* we choose one the following */
1364 #define NOTHING_SHIFT_NO_FLOW	0
1365 #define NOTHING_SHIFT_FLOW	5
1366 #define LEFT_SHIFT_NO_FLOW	10
1367 #define LEFT_SHIFT_FLOW		15
1368 #define RIGHT_SHIFT_NO_FLOW	20
1369 #define RIGHT_SHIFT_FLOW	25
1370 #define LR_SHIFT_NO_FLOW	30
1371 #define LR_SHIFT_FLOW		35
1372 
1373 		lpar = tb->lnum[h];
1374 		rpar = tb->rnum[h];
1375 
1376 		/* calculate number of blocks S[h] must be split into when
1377 		   nothing is shifted to the neighbors,
1378 		   as well as number of items in each part of the split node (s012 numbers),
1379 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any */
1380 		nset = NOTHING_SHIFT_NO_FLOW;
1381 		nver = get_num_ver(vn->vn_mode, tb, h,
1382 				   0, -1, h ? vn->vn_nr_item : 0, -1,
1383 				   snum012, NO_FLOW);
1384 
1385 		if (!h) {
1386 			int nver1;
1387 
1388 			/* note, that in this case we try to bottle between S[0] and S1 (S1 - the first new node) */
1389 			nver1 = get_num_ver(vn->vn_mode, tb, h,
1390 					    0, -1, 0, -1,
1391 					    snum012 + NOTHING_SHIFT_FLOW, FLOW);
1392 			if (nver > nver1)
1393 				nset = NOTHING_SHIFT_FLOW, nver = nver1;
1394 		}
1395 
1396 		/* calculate number of blocks S[h] must be split into when
1397 		   l_shift_num first items and l_shift_bytes of the right most
1398 		   liquid item to be shifted are shifted to the left neighbor,
1399 		   as well as number of items in each part of the splitted node (s012 numbers),
1400 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1401 		 */
1402 		lset = LEFT_SHIFT_NO_FLOW;
1403 		lnver = get_num_ver(vn->vn_mode, tb, h,
1404 				    lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1405 				    -1, h ? vn->vn_nr_item : 0, -1,
1406 				    snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW);
1407 		if (!h) {
1408 			int lnver1;
1409 
1410 			lnver1 = get_num_ver(vn->vn_mode, tb, h,
1411 					     lpar -
1412 					     ((tb->lbytes != -1) ? 1 : 0),
1413 					     tb->lbytes, 0, -1,
1414 					     snum012 + LEFT_SHIFT_FLOW, FLOW);
1415 			if (lnver > lnver1)
1416 				lset = LEFT_SHIFT_FLOW, lnver = lnver1;
1417 		}
1418 
1419 		/* calculate number of blocks S[h] must be split into when
1420 		   r_shift_num first items and r_shift_bytes of the left most
1421 		   liquid item to be shifted are shifted to the right neighbor,
1422 		   as well as number of items in each part of the splitted node (s012 numbers),
1423 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1424 		 */
1425 		rset = RIGHT_SHIFT_NO_FLOW;
1426 		rnver = get_num_ver(vn->vn_mode, tb, h,
1427 				    0, -1,
1428 				    h ? (vn->vn_nr_item - rpar) : (rpar -
1429 								   ((tb->
1430 								     rbytes !=
1431 								     -1) ? 1 :
1432 								    0)), -1,
1433 				    snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW);
1434 		if (!h) {
1435 			int rnver1;
1436 
1437 			rnver1 = get_num_ver(vn->vn_mode, tb, h,
1438 					     0, -1,
1439 					     (rpar -
1440 					      ((tb->rbytes != -1) ? 1 : 0)),
1441 					     tb->rbytes,
1442 					     snum012 + RIGHT_SHIFT_FLOW, FLOW);
1443 
1444 			if (rnver > rnver1)
1445 				rset = RIGHT_SHIFT_FLOW, rnver = rnver1;
1446 		}
1447 
1448 		/* calculate number of blocks S[h] must be split into when
1449 		   items are shifted in both directions,
1450 		   as well as number of items in each part of the splitted node (s012 numbers),
1451 		   and number of bytes (s1bytes) of the shared drop which flow to S1 if any
1452 		 */
1453 		lrset = LR_SHIFT_NO_FLOW;
1454 		lrnver = get_num_ver(vn->vn_mode, tb, h,
1455 				     lpar - ((h || tb->lbytes == -1) ? 0 : 1),
1456 				     -1,
1457 				     h ? (vn->vn_nr_item - rpar) : (rpar -
1458 								    ((tb->
1459 								      rbytes !=
1460 								      -1) ? 1 :
1461 								     0)), -1,
1462 				     snum012 + LR_SHIFT_NO_FLOW, NO_FLOW);
1463 		if (!h) {
1464 			int lrnver1;
1465 
1466 			lrnver1 = get_num_ver(vn->vn_mode, tb, h,
1467 					      lpar -
1468 					      ((tb->lbytes != -1) ? 1 : 0),
1469 					      tb->lbytes,
1470 					      (rpar -
1471 					       ((tb->rbytes != -1) ? 1 : 0)),
1472 					      tb->rbytes,
1473 					      snum012 + LR_SHIFT_FLOW, FLOW);
1474 			if (lrnver > lrnver1)
1475 				lrset = LR_SHIFT_FLOW, lrnver = lrnver1;
1476 		}
1477 
1478 		/* Our general shifting strategy is:
1479 		   1) to minimized number of new nodes;
1480 		   2) to minimized number of neighbors involved in shifting;
1481 		   3) to minimized number of disk reads; */
1482 
1483 		/* we can win TWO or ONE nodes by shifting in both directions */
1484 		if (lrnver < lnver && lrnver < rnver) {
1485 			RFALSE(h &&
1486 			       (tb->lnum[h] != 1 ||
1487 				tb->rnum[h] != 1 ||
1488 				lrnver != 1 || rnver != 2 || lnver != 2
1489 				|| h != 1), "vs-8230: bad h");
1490 			if (lrset == LR_SHIFT_FLOW)
1491 				set_parameters(tb, h, tb->lnum[h], tb->rnum[h],
1492 					       lrnver, snum012 + lrset,
1493 					       tb->lbytes, tb->rbytes);
1494 			else
1495 				set_parameters(tb, h,
1496 					       tb->lnum[h] -
1497 					       ((tb->lbytes == -1) ? 0 : 1),
1498 					       tb->rnum[h] -
1499 					       ((tb->rbytes == -1) ? 0 : 1),
1500 					       lrnver, snum012 + lrset, -1, -1);
1501 
1502 			return CARRY_ON;
1503 		}
1504 
1505 		/* if shifting doesn't lead to better packing then don't shift */
1506 		if (nver == lrnver) {
1507 			set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1,
1508 				       -1);
1509 			return CARRY_ON;
1510 		}
1511 
1512 		/* now we know that for better packing shifting in only one
1513 		   direction either to the left or to the right is required */
1514 
1515 		/*  if shifting to the left is better than shifting to the right */
1516 		if (lnver < rnver) {
1517 			SET_PAR_SHIFT_LEFT;
1518 			return CARRY_ON;
1519 		}
1520 
1521 		/* if shifting to the right is better than shifting to the left */
1522 		if (lnver > rnver) {
1523 			SET_PAR_SHIFT_RIGHT;
1524 			return CARRY_ON;
1525 		}
1526 
1527 		/* now shifting in either direction gives the same number
1528 		   of nodes and we can make use of the cached neighbors */
1529 		if (is_left_neighbor_in_cache(tb, h)) {
1530 			SET_PAR_SHIFT_LEFT;
1531 			return CARRY_ON;
1532 		}
1533 
1534 		/* shift to the right independently on whether the right neighbor in cache or not */
1535 		SET_PAR_SHIFT_RIGHT;
1536 		return CARRY_ON;
1537 	}
1538 }
1539 
1540 /* Check whether current node S[h] is balanced when Decreasing its size by
1541  * Deleting or Cutting for INTERNAL node of S+tree.
1542  * Calculate parameters for balancing for current level h.
1543  * Parameters:
1544  *	tb	tree_balance structure;
1545  *	h	current level of the node;
1546  *	inum	item number in S[h];
1547  *	mode	i - insert, p - paste;
1548  * Returns:	1 - schedule occurred;
1549  *	        0 - balancing for higher levels needed;
1550  *	       -1 - no balancing for higher levels needed;
1551  *	       -2 - no disk space.
1552  *
1553  * Note: Items of internal nodes have fixed size, so the balance condition for
1554  * the internal part of S+tree is as for the B-trees.
1555  */
1556 static int dc_check_balance_internal(struct tree_balance *tb, int h)
1557 {
1558 	struct virtual_node *vn = tb->tb_vn;
1559 
1560 	/* Sh is the node whose balance is currently being checked,
1561 	   and Fh is its father.  */
1562 	struct buffer_head *Sh, *Fh;
1563 	int maxsize, n_ret_value;
1564 	int lfree, rfree /* free space in L and R */ ;
1565 
1566 	Sh = PATH_H_PBUFFER(tb->tb_path, h);
1567 	Fh = PATH_H_PPARENT(tb->tb_path, h);
1568 
1569 	maxsize = MAX_CHILD_SIZE(Sh);
1570 
1571 /*   using tb->insert_size[h], which is negative in this case, create_virtual_node calculates: */
1572 /*   new_nr_item = number of items node would have if operation is */
1573 /* 	performed without balancing (new_nr_item); */
1574 	create_virtual_node(tb, h);
1575 
1576 	if (!Fh) {		/* S[h] is the root. */
1577 		if (vn->vn_nr_item > 0) {
1578 			set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1579 			return NO_BALANCING_NEEDED;	/* no balancing for higher levels needed */
1580 		}
1581 		/* new_nr_item == 0.
1582 		 * Current root will be deleted resulting in
1583 		 * decrementing the tree height. */
1584 		set_parameters(tb, h, 0, 0, 0, NULL, -1, -1);
1585 		return CARRY_ON;
1586 	}
1587 
1588 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1589 		return n_ret_value;
1590 
1591 	/* get free space of neighbors */
1592 	rfree = get_rfree(tb, h);
1593 	lfree = get_lfree(tb, h);
1594 
1595 	/* determine maximal number of items we can fit into neighbors */
1596 	check_left(tb, h, lfree);
1597 	check_right(tb, h, rfree);
1598 
1599 	if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) {	/* Balance condition for the internal node is valid.
1600 						 * In this case we balance only if it leads to better packing. */
1601 		if (vn->vn_nr_item == MIN_NR_KEY(Sh)) {	/* Here we join S[h] with one of its neighbors,
1602 							 * which is impossible with greater values of new_nr_item. */
1603 			if (tb->lnum[h] >= vn->vn_nr_item + 1) {
1604 				/* All contents of S[h] can be moved to L[h]. */
1605 				int n;
1606 				int order_L;
1607 
1608 				order_L =
1609 				    ((n =
1610 				      PATH_H_B_ITEM_ORDER(tb->tb_path,
1611 							  h)) ==
1612 				     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1613 				n = dc_size(B_N_CHILD(tb->FL[h], order_L)) /
1614 				    (DC_SIZE + KEY_SIZE);
1615 				set_parameters(tb, h, -n - 1, 0, 0, NULL, -1,
1616 					       -1);
1617 				return CARRY_ON;
1618 			}
1619 
1620 			if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1621 				/* All contents of S[h] can be moved to R[h]. */
1622 				int n;
1623 				int order_R;
1624 
1625 				order_R =
1626 				    ((n =
1627 				      PATH_H_B_ITEM_ORDER(tb->tb_path,
1628 							  h)) ==
1629 				     B_NR_ITEMS(Fh)) ? 0 : n + 1;
1630 				n = dc_size(B_N_CHILD(tb->FR[h], order_R)) /
1631 				    (DC_SIZE + KEY_SIZE);
1632 				set_parameters(tb, h, 0, -n - 1, 0, NULL, -1,
1633 					       -1);
1634 				return CARRY_ON;
1635 			}
1636 		}
1637 
1638 		if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1639 			/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1640 			int to_r;
1641 
1642 			to_r =
1643 			    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] -
1644 			     tb->rnum[h] + vn->vn_nr_item + 1) / 2 -
1645 			    (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]);
1646 			set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r,
1647 				       0, NULL, -1, -1);
1648 			return CARRY_ON;
1649 		}
1650 
1651 		/* Balancing does not lead to better packing. */
1652 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1653 		return NO_BALANCING_NEEDED;
1654 	}
1655 
1656 	/* Current node contain insufficient number of items. Balancing is required. */
1657 	/* Check whether we can merge S[h] with left neighbor. */
1658 	if (tb->lnum[h] >= vn->vn_nr_item + 1)
1659 		if (is_left_neighbor_in_cache(tb, h)
1660 		    || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) {
1661 			int n;
1662 			int order_L;
1663 
1664 			order_L =
1665 			    ((n =
1666 			      PATH_H_B_ITEM_ORDER(tb->tb_path,
1667 						  h)) ==
1668 			     0) ? B_NR_ITEMS(tb->FL[h]) : n - 1;
1669 			n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE +
1670 								      KEY_SIZE);
1671 			set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1);
1672 			return CARRY_ON;
1673 		}
1674 
1675 	/* Check whether we can merge S[h] with right neighbor. */
1676 	if (tb->rnum[h] >= vn->vn_nr_item + 1) {
1677 		int n;
1678 		int order_R;
1679 
1680 		order_R =
1681 		    ((n =
1682 		      PATH_H_B_ITEM_ORDER(tb->tb_path,
1683 					  h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1);
1684 		n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE +
1685 							      KEY_SIZE);
1686 		set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1);
1687 		return CARRY_ON;
1688 	}
1689 
1690 	/* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */
1691 	if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) {
1692 		int to_r;
1693 
1694 		to_r =
1695 		    ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] +
1696 		     vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 -
1697 						tb->rnum[h]);
1698 		set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL,
1699 			       -1, -1);
1700 		return CARRY_ON;
1701 	}
1702 
1703 	/* For internal nodes try to borrow item from a neighbor */
1704 	RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root");
1705 
1706 	/* Borrow one or two items from caching neighbor */
1707 	if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) {
1708 		int from_l;
1709 
1710 		from_l =
1711 		    (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item +
1712 		     1) / 2 - (vn->vn_nr_item + 1);
1713 		set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1);
1714 		return CARRY_ON;
1715 	}
1716 
1717 	set_parameters(tb, h, 0,
1718 		       -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item +
1719 			  1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1);
1720 	return CARRY_ON;
1721 }
1722 
1723 /* Check whether current node S[h] is balanced when Decreasing its size by
1724  * Deleting or Truncating for LEAF node of S+tree.
1725  * Calculate parameters for balancing for current level h.
1726  * Parameters:
1727  *	tb	tree_balance structure;
1728  *	h	current level of the node;
1729  *	inum	item number in S[h];
1730  *	mode	i - insert, p - paste;
1731  * Returns:	1 - schedule occurred;
1732  *	        0 - balancing for higher levels needed;
1733  *	       -1 - no balancing for higher levels needed;
1734  *	       -2 - no disk space.
1735  */
1736 static int dc_check_balance_leaf(struct tree_balance *tb, int h)
1737 {
1738 	struct virtual_node *vn = tb->tb_vn;
1739 
1740 	/* Number of bytes that must be deleted from
1741 	   (value is negative if bytes are deleted) buffer which
1742 	   contains node being balanced.  The mnemonic is that the
1743 	   attempted change in node space used level is levbytes bytes. */
1744 	int levbytes;
1745 	/* the maximal item size */
1746 	int maxsize, n_ret_value;
1747 	/* S0 is the node whose balance is currently being checked,
1748 	   and F0 is its father.  */
1749 	struct buffer_head *S0, *F0;
1750 	int lfree, rfree /* free space in L and R */ ;
1751 
1752 	S0 = PATH_H_PBUFFER(tb->tb_path, 0);
1753 	F0 = PATH_H_PPARENT(tb->tb_path, 0);
1754 
1755 	levbytes = tb->insert_size[h];
1756 
1757 	maxsize = MAX_CHILD_SIZE(S0);	/* maximal possible size of an item */
1758 
1759 	if (!F0) {		/* S[0] is the root now. */
1760 
1761 		RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0),
1762 		       "vs-8240: attempt to create empty buffer tree");
1763 
1764 		set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1765 		return NO_BALANCING_NEEDED;
1766 	}
1767 
1768 	if ((n_ret_value = get_parents(tb, h)) != CARRY_ON)
1769 		return n_ret_value;
1770 
1771 	/* get free space of neighbors */
1772 	rfree = get_rfree(tb, h);
1773 	lfree = get_lfree(tb, h);
1774 
1775 	create_virtual_node(tb, h);
1776 
1777 	/* if 3 leaves can be merge to one, set parameters and return */
1778 	if (are_leaves_removable(tb, lfree, rfree))
1779 		return CARRY_ON;
1780 
1781 	/* determine maximal number of items we can shift to the left/right  neighbor
1782 	   and the maximal number of bytes that can flow to the left/right neighbor
1783 	   from the left/right most liquid item that cannot be shifted from S[0] entirely
1784 	 */
1785 	check_left(tb, h, lfree);
1786 	check_right(tb, h, rfree);
1787 
1788 	/* check whether we can merge S with left neighbor. */
1789 	if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1)
1790 		if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) ||	/* S can not be merged with R */
1791 		    !tb->FR[h]) {
1792 
1793 			RFALSE(!tb->FL[h],
1794 			       "vs-8245: dc_check_balance_leaf: FL[h] must exist");
1795 
1796 			/* set parameter to merge S[0] with its left neighbor */
1797 			set_parameters(tb, h, -1, 0, 0, NULL, -1, -1);
1798 			return CARRY_ON;
1799 		}
1800 
1801 	/* check whether we can merge S[0] with right neighbor. */
1802 	if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) {
1803 		set_parameters(tb, h, 0, -1, 0, NULL, -1, -1);
1804 		return CARRY_ON;
1805 	}
1806 
1807 	/* All contents of S[0] can be moved to the neighbors (L[0] & R[0]). Set parameters and return */
1808 	if (is_leaf_removable(tb))
1809 		return CARRY_ON;
1810 
1811 	/* Balancing is not required. */
1812 	tb->s0num = vn->vn_nr_item;
1813 	set_parameters(tb, h, 0, 0, 1, NULL, -1, -1);
1814 	return NO_BALANCING_NEEDED;
1815 }
1816 
1817 /* Check whether current node S[h] is balanced when Decreasing its size by
1818  * Deleting or Cutting.
1819  * Calculate parameters for balancing for current level h.
1820  * Parameters:
1821  *	tb	tree_balance structure;
1822  *	h	current level of the node;
1823  *	inum	item number in S[h];
1824  *	mode	d - delete, c - cut.
1825  * Returns:	1 - schedule occurred;
1826  *	        0 - balancing for higher levels needed;
1827  *	       -1 - no balancing for higher levels needed;
1828  *	       -2 - no disk space.
1829  */
1830 static int dc_check_balance(struct tree_balance *tb, int h)
1831 {
1832 	RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)),
1833 	       "vs-8250: S is not initialized");
1834 
1835 	if (h)
1836 		return dc_check_balance_internal(tb, h);
1837 	else
1838 		return dc_check_balance_leaf(tb, h);
1839 }
1840 
1841 /* Check whether current node S[h] is balanced.
1842  * Calculate parameters for balancing for current level h.
1843  * Parameters:
1844  *
1845  *	tb	tree_balance structure:
1846  *
1847  *              tb is a large structure that must be read about in the header file
1848  *              at the same time as this procedure if the reader is to successfully
1849  *              understand this procedure
1850  *
1851  *	h	current level of the node;
1852  *	inum	item number in S[h];
1853  *	mode	i - insert, p - paste, d - delete, c - cut.
1854  * Returns:	1 - schedule occurred;
1855  *	        0 - balancing for higher levels needed;
1856  *	       -1 - no balancing for higher levels needed;
1857  *	       -2 - no disk space.
1858  */
1859 static int check_balance(int mode,
1860 			 struct tree_balance *tb,
1861 			 int h,
1862 			 int inum,
1863 			 int pos_in_item,
1864 			 struct item_head *ins_ih, const void *data)
1865 {
1866 	struct virtual_node *vn;
1867 
1868 	vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf);
1869 	vn->vn_free_ptr = (char *)(tb->tb_vn + 1);
1870 	vn->vn_mode = mode;
1871 	vn->vn_affected_item_num = inum;
1872 	vn->vn_pos_in_item = pos_in_item;
1873 	vn->vn_ins_ih = ins_ih;
1874 	vn->vn_data = data;
1875 
1876 	RFALSE(mode == M_INSERT && !vn->vn_ins_ih,
1877 	       "vs-8255: ins_ih can not be 0 in insert mode");
1878 
1879 	if (tb->insert_size[h] > 0)
1880 		/* Calculate balance parameters when size of node is increasing. */
1881 		return ip_check_balance(tb, h);
1882 
1883 	/* Calculate balance parameters when  size of node is decreasing. */
1884 	return dc_check_balance(tb, h);
1885 }
1886 
1887 /* Check whether parent at the path is the really parent of the current node.*/
1888 static int get_direct_parent(struct tree_balance *p_s_tb, int n_h)
1889 {
1890 	struct buffer_head *p_s_bh;
1891 	struct path *p_s_path = p_s_tb->tb_path;
1892 	int n_position,
1893 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h);
1894 
1895 	/* We are in the root or in the new root. */
1896 	if (n_path_offset <= FIRST_PATH_ELEMENT_OFFSET) {
1897 
1898 		RFALSE(n_path_offset < FIRST_PATH_ELEMENT_OFFSET - 1,
1899 		       "PAP-8260: invalid offset in the path");
1900 
1901 		if (PATH_OFFSET_PBUFFER(p_s_path, FIRST_PATH_ELEMENT_OFFSET)->
1902 		    b_blocknr == SB_ROOT_BLOCK(p_s_tb->tb_sb)) {
1903 			/* Root is not changed. */
1904 			PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1) = NULL;
1905 			PATH_OFFSET_POSITION(p_s_path, n_path_offset - 1) = 0;
1906 			return CARRY_ON;
1907 		}
1908 		return REPEAT_SEARCH;	/* Root is changed and we must recalculate the path. */
1909 	}
1910 
1911 	if (!B_IS_IN_TREE
1912 	    (p_s_bh = PATH_OFFSET_PBUFFER(p_s_path, n_path_offset - 1)))
1913 		return REPEAT_SEARCH;	/* Parent in the path is not in the tree. */
1914 
1915 	if ((n_position =
1916 	     PATH_OFFSET_POSITION(p_s_path,
1917 				  n_path_offset - 1)) > B_NR_ITEMS(p_s_bh))
1918 		return REPEAT_SEARCH;
1919 
1920 	if (B_N_CHILD_NUM(p_s_bh, n_position) !=
1921 	    PATH_OFFSET_PBUFFER(p_s_path, n_path_offset)->b_blocknr)
1922 		/* Parent in the path is not parent of the current node in the tree. */
1923 		return REPEAT_SEARCH;
1924 
1925 	if (buffer_locked(p_s_bh)) {
1926 		__wait_on_buffer(p_s_bh);
1927 		if (FILESYSTEM_CHANGED_TB(p_s_tb))
1928 			return REPEAT_SEARCH;
1929 	}
1930 
1931 	return CARRY_ON;	/* Parent in the path is unlocked and really parent of the current node.  */
1932 }
1933 
1934 /* Using lnum[n_h] and rnum[n_h] we should determine what neighbors
1935  * of S[n_h] we
1936  * need in order to balance S[n_h], and get them if necessary.
1937  * Returns:	SCHEDULE_OCCURRED - schedule occurred while the function worked;
1938  *	        CARRY_ON - schedule didn't occur while the function worked;
1939  */
1940 static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
1941 {
1942 	int n_child_position,
1943 	    n_path_offset = PATH_H_PATH_OFFSET(p_s_tb->tb_path, n_h + 1);
1944 	unsigned long n_son_number;
1945 	struct super_block *p_s_sb = p_s_tb->tb_sb;
1946 	struct buffer_head *p_s_bh;
1947 
1948 	PROC_INFO_INC(p_s_sb, get_neighbors[n_h]);
1949 
1950 	if (p_s_tb->lnum[n_h]) {
1951 		/* We need left neighbor to balance S[n_h]. */
1952 		PROC_INFO_INC(p_s_sb, need_l_neighbor[n_h]);
1953 		p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1954 
1955 		RFALSE(p_s_bh == p_s_tb->FL[n_h] &&
1956 		       !PATH_OFFSET_POSITION(p_s_tb->tb_path, n_path_offset),
1957 		       "PAP-8270: invalid position in the parent");
1958 
1959 		n_child_position =
1960 		    (p_s_bh ==
1961 		     p_s_tb->FL[n_h]) ? p_s_tb->lkey[n_h] : B_NR_ITEMS(p_s_tb->
1962 								       FL[n_h]);
1963 		n_son_number = B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position);
1964 		p_s_bh = sb_bread(p_s_sb, n_son_number);
1965 		if (!p_s_bh)
1966 			return IO_ERROR;
1967 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
1968 			decrement_bcount(p_s_bh);
1969 			PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
1970 			return REPEAT_SEARCH;
1971 		}
1972 
1973 		RFALSE(!B_IS_IN_TREE(p_s_tb->FL[n_h]) ||
1974 		       n_child_position > B_NR_ITEMS(p_s_tb->FL[n_h]) ||
1975 		       B_N_CHILD_NUM(p_s_tb->FL[n_h], n_child_position) !=
1976 		       p_s_bh->b_blocknr, "PAP-8275: invalid parent");
1977 		RFALSE(!B_IS_IN_TREE(p_s_bh), "PAP-8280: invalid child");
1978 		RFALSE(!n_h &&
1979 		       B_FREE_SPACE(p_s_bh) !=
1980 		       MAX_CHILD_SIZE(p_s_bh) -
1981 		       dc_size(B_N_CHILD(p_s_tb->FL[0], n_child_position)),
1982 		       "PAP-8290: invalid child size of left neighbor");
1983 
1984 		decrement_bcount(p_s_tb->L[n_h]);
1985 		p_s_tb->L[n_h] = p_s_bh;
1986 	}
1987 
1988 	if (p_s_tb->rnum[n_h]) {	/* We need right neighbor to balance S[n_path_offset]. */
1989 		PROC_INFO_INC(p_s_sb, need_r_neighbor[n_h]);
1990 		p_s_bh = PATH_OFFSET_PBUFFER(p_s_tb->tb_path, n_path_offset);
1991 
1992 		RFALSE(p_s_bh == p_s_tb->FR[n_h] &&
1993 		       PATH_OFFSET_POSITION(p_s_tb->tb_path,
1994 					    n_path_offset) >=
1995 		       B_NR_ITEMS(p_s_bh),
1996 		       "PAP-8295: invalid position in the parent");
1997 
1998 		n_child_position =
1999 		    (p_s_bh == p_s_tb->FR[n_h]) ? p_s_tb->rkey[n_h] + 1 : 0;
2000 		n_son_number = B_N_CHILD_NUM(p_s_tb->FR[n_h], n_child_position);
2001 		p_s_bh = sb_bread(p_s_sb, n_son_number);
2002 		if (!p_s_bh)
2003 			return IO_ERROR;
2004 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2005 			decrement_bcount(p_s_bh);
2006 			PROC_INFO_INC(p_s_sb, get_neighbors_restart[n_h]);
2007 			return REPEAT_SEARCH;
2008 		}
2009 		decrement_bcount(p_s_tb->R[n_h]);
2010 		p_s_tb->R[n_h] = p_s_bh;
2011 
2012 		RFALSE(!n_h
2013 		       && B_FREE_SPACE(p_s_bh) !=
2014 		       MAX_CHILD_SIZE(p_s_bh) -
2015 		       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)),
2016 		       "PAP-8300: invalid child size of right neighbor (%d != %d - %d)",
2017 		       B_FREE_SPACE(p_s_bh), MAX_CHILD_SIZE(p_s_bh),
2018 		       dc_size(B_N_CHILD(p_s_tb->FR[0], n_child_position)));
2019 
2020 	}
2021 	return CARRY_ON;
2022 }
2023 
2024 #ifdef CONFIG_REISERFS_CHECK
2025 void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
2026 {
2027 	void *vp;
2028 	static size_t malloced;
2029 
2030 	vp = kmalloc(size, flags);
2031 	if (vp) {
2032 		REISERFS_SB(s)->s_kmallocs += size;
2033 		if (REISERFS_SB(s)->s_kmallocs > malloced + 200000) {
2034 			reiserfs_warning(s,
2035 					 "vs-8301: reiserfs_kmalloc: allocated memory %d",
2036 					 REISERFS_SB(s)->s_kmallocs);
2037 			malloced = REISERFS_SB(s)->s_kmallocs;
2038 		}
2039 	}
2040 	return vp;
2041 }
2042 
2043 void reiserfs_kfree(const void *vp, size_t size, struct super_block *s)
2044 {
2045 	kfree(vp);
2046 
2047 	REISERFS_SB(s)->s_kmallocs -= size;
2048 	if (REISERFS_SB(s)->s_kmallocs < 0)
2049 		reiserfs_warning(s,
2050 				 "vs-8302: reiserfs_kfree: allocated memory %d",
2051 				 REISERFS_SB(s)->s_kmallocs);
2052 
2053 }
2054 #endif
2055 
2056 static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh)
2057 {
2058 	int max_num_of_items;
2059 	int max_num_of_entries;
2060 	unsigned long blocksize = sb->s_blocksize;
2061 
2062 #define MIN_NAME_LEN 1
2063 
2064 	max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN);
2065 	max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) /
2066 	    (DEH_SIZE + MIN_NAME_LEN);
2067 
2068 	return sizeof(struct virtual_node) +
2069 	    max(max_num_of_items * sizeof(struct virtual_item),
2070 		sizeof(struct virtual_item) + sizeof(struct direntry_uarea) +
2071 		(max_num_of_entries - 1) * sizeof(__u16));
2072 }
2073 
2074 /* maybe we should fail balancing we are going to perform when kmalloc
2075    fails several times. But now it will loop until kmalloc gets
2076    required memory */
2077 static int get_mem_for_virtual_node(struct tree_balance *tb)
2078 {
2079 	int check_fs = 0;
2080 	int size;
2081 	char *buf;
2082 
2083 	size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path));
2084 
2085 	if (size > tb->vn_buf_size) {
2086 		/* we have to allocate more memory for virtual node */
2087 		if (tb->vn_buf) {
2088 			/* free memory allocated before */
2089 			reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2090 			/* this is not needed if kfree is atomic */
2091 			check_fs = 1;
2092 		}
2093 
2094 		/* virtual node requires now more memory */
2095 		tb->vn_buf_size = size;
2096 
2097 		/* get memory for virtual item */
2098 		buf =
2099 		    reiserfs_kmalloc(size, GFP_ATOMIC | __GFP_NOWARN,
2100 				     tb->tb_sb);
2101 		if (!buf) {
2102 			/* getting memory with GFP_KERNEL priority may involve
2103 			   balancing now (due to indirect_to_direct conversion on
2104 			   dcache shrinking). So, release path and collected
2105 			   resources here */
2106 			free_buffers_in_tb(tb);
2107 			buf = reiserfs_kmalloc(size, GFP_NOFS, tb->tb_sb);
2108 			if (!buf) {
2109 #ifdef CONFIG_REISERFS_CHECK
2110 				reiserfs_warning(tb->tb_sb,
2111 						 "vs-8345: get_mem_for_virtual_node: "
2112 						 "kmalloc failed. reiserfs kmalloced %d bytes",
2113 						 REISERFS_SB(tb->tb_sb)->
2114 						 s_kmallocs);
2115 #endif
2116 				tb->vn_buf_size = 0;
2117 			}
2118 			tb->vn_buf = buf;
2119 			schedule();
2120 			return REPEAT_SEARCH;
2121 		}
2122 
2123 		tb->vn_buf = buf;
2124 	}
2125 
2126 	if (check_fs && FILESYSTEM_CHANGED_TB(tb))
2127 		return REPEAT_SEARCH;
2128 
2129 	return CARRY_ON;
2130 }
2131 
2132 #ifdef CONFIG_REISERFS_CHECK
2133 static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2134 				   struct buffer_head *p_s_bh,
2135 				   const char *descr, int level)
2136 {
2137 	if (p_s_bh) {
2138 		if (atomic_read(&(p_s_bh->b_count)) <= 0) {
2139 
2140 			reiserfs_panic(p_s_sb,
2141 				       "jmacd-1: tb_buffer_sanity_check(): negative or zero reference counter for buffer %s[%d] (%b)\n",
2142 				       descr, level, p_s_bh);
2143 		}
2144 
2145 		if (!buffer_uptodate(p_s_bh)) {
2146 			reiserfs_panic(p_s_sb,
2147 				       "jmacd-2: tb_buffer_sanity_check(): buffer is not up to date %s[%d] (%b)\n",
2148 				       descr, level, p_s_bh);
2149 		}
2150 
2151 		if (!B_IS_IN_TREE(p_s_bh)) {
2152 			reiserfs_panic(p_s_sb,
2153 				       "jmacd-3: tb_buffer_sanity_check(): buffer is not in tree %s[%d] (%b)\n",
2154 				       descr, level, p_s_bh);
2155 		}
2156 
2157 		if (p_s_bh->b_bdev != p_s_sb->s_bdev) {
2158 			reiserfs_panic(p_s_sb,
2159 				       "jmacd-4: tb_buffer_sanity_check(): buffer has wrong device %s[%d] (%b)\n",
2160 				       descr, level, p_s_bh);
2161 		}
2162 
2163 		if (p_s_bh->b_size != p_s_sb->s_blocksize) {
2164 			reiserfs_panic(p_s_sb,
2165 				       "jmacd-5: tb_buffer_sanity_check(): buffer has wrong blocksize %s[%d] (%b)\n",
2166 				       descr, level, p_s_bh);
2167 		}
2168 
2169 		if (p_s_bh->b_blocknr > SB_BLOCK_COUNT(p_s_sb)) {
2170 			reiserfs_panic(p_s_sb,
2171 				       "jmacd-6: tb_buffer_sanity_check(): buffer block number too high %s[%d] (%b)\n",
2172 				       descr, level, p_s_bh);
2173 		}
2174 	}
2175 }
2176 #else
2177 static void tb_buffer_sanity_check(struct super_block *p_s_sb,
2178 				   struct buffer_head *p_s_bh,
2179 				   const char *descr, int level)
2180 {;
2181 }
2182 #endif
2183 
2184 static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh)
2185 {
2186 	return reiserfs_prepare_for_journal(s, bh, 0);
2187 }
2188 
2189 static int wait_tb_buffers_until_unlocked(struct tree_balance *p_s_tb)
2190 {
2191 	struct buffer_head *locked;
2192 #ifdef CONFIG_REISERFS_CHECK
2193 	int repeat_counter = 0;
2194 #endif
2195 	int i;
2196 
2197 	do {
2198 
2199 		locked = NULL;
2200 
2201 		for (i = p_s_tb->tb_path->path_length;
2202 		     !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) {
2203 			if (PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2204 				/* if I understand correctly, we can only be sure the last buffer
2205 				 ** in the path is in the tree --clm
2206 				 */
2207 #ifdef CONFIG_REISERFS_CHECK
2208 				if (PATH_PLAST_BUFFER(p_s_tb->tb_path) ==
2209 				    PATH_OFFSET_PBUFFER(p_s_tb->tb_path, i)) {
2210 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2211 							       PATH_OFFSET_PBUFFER
2212 							       (p_s_tb->tb_path,
2213 								i), "S",
2214 							       p_s_tb->tb_path->
2215 							       path_length - i);
2216 				}
2217 #endif
2218 				if (!clear_all_dirty_bits(p_s_tb->tb_sb,
2219 							  PATH_OFFSET_PBUFFER
2220 							  (p_s_tb->tb_path,
2221 							   i))) {
2222 					locked =
2223 					    PATH_OFFSET_PBUFFER(p_s_tb->tb_path,
2224 								i);
2225 				}
2226 			}
2227 		}
2228 
2229 		for (i = 0; !locked && i < MAX_HEIGHT && p_s_tb->insert_size[i];
2230 		     i++) {
2231 
2232 			if (p_s_tb->lnum[i]) {
2233 
2234 				if (p_s_tb->L[i]) {
2235 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2236 							       p_s_tb->L[i],
2237 							       "L", i);
2238 					if (!clear_all_dirty_bits
2239 					    (p_s_tb->tb_sb, p_s_tb->L[i]))
2240 						locked = p_s_tb->L[i];
2241 				}
2242 
2243 				if (!locked && p_s_tb->FL[i]) {
2244 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2245 							       p_s_tb->FL[i],
2246 							       "FL", i);
2247 					if (!clear_all_dirty_bits
2248 					    (p_s_tb->tb_sb, p_s_tb->FL[i]))
2249 						locked = p_s_tb->FL[i];
2250 				}
2251 
2252 				if (!locked && p_s_tb->CFL[i]) {
2253 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2254 							       p_s_tb->CFL[i],
2255 							       "CFL", i);
2256 					if (!clear_all_dirty_bits
2257 					    (p_s_tb->tb_sb, p_s_tb->CFL[i]))
2258 						locked = p_s_tb->CFL[i];
2259 				}
2260 
2261 			}
2262 
2263 			if (!locked && (p_s_tb->rnum[i])) {
2264 
2265 				if (p_s_tb->R[i]) {
2266 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2267 							       p_s_tb->R[i],
2268 							       "R", i);
2269 					if (!clear_all_dirty_bits
2270 					    (p_s_tb->tb_sb, p_s_tb->R[i]))
2271 						locked = p_s_tb->R[i];
2272 				}
2273 
2274 				if (!locked && p_s_tb->FR[i]) {
2275 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2276 							       p_s_tb->FR[i],
2277 							       "FR", i);
2278 					if (!clear_all_dirty_bits
2279 					    (p_s_tb->tb_sb, p_s_tb->FR[i]))
2280 						locked = p_s_tb->FR[i];
2281 				}
2282 
2283 				if (!locked && p_s_tb->CFR[i]) {
2284 					tb_buffer_sanity_check(p_s_tb->tb_sb,
2285 							       p_s_tb->CFR[i],
2286 							       "CFR", i);
2287 					if (!clear_all_dirty_bits
2288 					    (p_s_tb->tb_sb, p_s_tb->CFR[i]))
2289 						locked = p_s_tb->CFR[i];
2290 				}
2291 			}
2292 		}
2293 		/* as far as I can tell, this is not required.  The FEB list seems
2294 		 ** to be full of newly allocated nodes, which will never be locked,
2295 		 ** dirty, or anything else.
2296 		 ** To be safe, I'm putting in the checks and waits in.  For the moment,
2297 		 ** they are needed to keep the code in journal.c from complaining
2298 		 ** about the buffer.  That code is inside CONFIG_REISERFS_CHECK as well.
2299 		 ** --clm
2300 		 */
2301 		for (i = 0; !locked && i < MAX_FEB_SIZE; i++) {
2302 			if (p_s_tb->FEB[i]) {
2303 				if (!clear_all_dirty_bits
2304 				    (p_s_tb->tb_sb, p_s_tb->FEB[i]))
2305 					locked = p_s_tb->FEB[i];
2306 			}
2307 		}
2308 
2309 		if (locked) {
2310 #ifdef CONFIG_REISERFS_CHECK
2311 			repeat_counter++;
2312 			if ((repeat_counter % 10000) == 0) {
2313 				reiserfs_warning(p_s_tb->tb_sb,
2314 						 "wait_tb_buffers_until_released(): too many "
2315 						 "iterations waiting for buffer to unlock "
2316 						 "(%b)", locked);
2317 
2318 				/* Don't loop forever.  Try to recover from possible error. */
2319 
2320 				return (FILESYSTEM_CHANGED_TB(p_s_tb)) ?
2321 				    REPEAT_SEARCH : CARRY_ON;
2322 			}
2323 #endif
2324 			__wait_on_buffer(locked);
2325 			if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2326 				return REPEAT_SEARCH;
2327 			}
2328 		}
2329 
2330 	} while (locked);
2331 
2332 	return CARRY_ON;
2333 }
2334 
2335 /* Prepare for balancing, that is
2336  *	get all necessary parents, and neighbors;
2337  *	analyze what and where should be moved;
2338  *	get sufficient number of new nodes;
2339  * Balancing will start only after all resources will be collected at a time.
2340  *
2341  * When ported to SMP kernels, only at the last moment after all needed nodes
2342  * are collected in cache, will the resources be locked using the usual
2343  * textbook ordered lock acquisition algorithms.  Note that ensuring that
2344  * this code neither write locks what it does not need to write lock nor locks out of order
2345  * will be a pain in the butt that could have been avoided.  Grumble grumble. -Hans
2346  *
2347  * fix is meant in the sense of render unchanging
2348  *
2349  * Latency might be improved by first gathering a list of what buffers are needed
2350  * and then getting as many of them in parallel as possible? -Hans
2351  *
2352  * Parameters:
2353  *	op_mode	i - insert, d - delete, c - cut (truncate), p - paste (append)
2354  *	tb	tree_balance structure;
2355  *	inum	item number in S[h];
2356  *      pos_in_item - comment this if you can
2357  *      ins_ih & ins_sd are used when inserting
2358  * Returns:	1 - schedule occurred while the function worked;
2359  *	        0 - schedule didn't occur while the function worked;
2360  *             -1 - if no_disk_space
2361  */
2362 
2363 int fix_nodes(int n_op_mode, struct tree_balance *p_s_tb, struct item_head *p_s_ins_ih,	// item head of item being inserted
2364 	      const void *data	// inserted item or data to be pasted
2365     )
2366 {
2367 	int n_ret_value, n_h, n_item_num = PATH_LAST_POSITION(p_s_tb->tb_path);
2368 	int n_pos_in_item;
2369 
2370 	/* we set wait_tb_buffers_run when we have to restore any dirty bits cleared
2371 	 ** during wait_tb_buffers_run
2372 	 */
2373 	int wait_tb_buffers_run = 0;
2374 	struct buffer_head *p_s_tbS0 = PATH_PLAST_BUFFER(p_s_tb->tb_path);
2375 
2376 	++REISERFS_SB(p_s_tb->tb_sb)->s_fix_nodes;
2377 
2378 	n_pos_in_item = p_s_tb->tb_path->pos_in_item;
2379 
2380 	p_s_tb->fs_gen = get_generation(p_s_tb->tb_sb);
2381 
2382 	/* we prepare and log the super here so it will already be in the
2383 	 ** transaction when do_balance needs to change it.
2384 	 ** This way do_balance won't have to schedule when trying to prepare
2385 	 ** the super for logging
2386 	 */
2387 	reiserfs_prepare_for_journal(p_s_tb->tb_sb,
2388 				     SB_BUFFER_WITH_SB(p_s_tb->tb_sb), 1);
2389 	journal_mark_dirty(p_s_tb->transaction_handle, p_s_tb->tb_sb,
2390 			   SB_BUFFER_WITH_SB(p_s_tb->tb_sb));
2391 	if (FILESYSTEM_CHANGED_TB(p_s_tb))
2392 		return REPEAT_SEARCH;
2393 
2394 	/* if it possible in indirect_to_direct conversion */
2395 	if (buffer_locked(p_s_tbS0)) {
2396 		__wait_on_buffer(p_s_tbS0);
2397 		if (FILESYSTEM_CHANGED_TB(p_s_tb))
2398 			return REPEAT_SEARCH;
2399 	}
2400 #ifdef CONFIG_REISERFS_CHECK
2401 	if (cur_tb) {
2402 		print_cur_tb("fix_nodes");
2403 		reiserfs_panic(p_s_tb->tb_sb,
2404 			       "PAP-8305: fix_nodes:  there is pending do_balance");
2405 	}
2406 
2407 	if (!buffer_uptodate(p_s_tbS0) || !B_IS_IN_TREE(p_s_tbS0)) {
2408 		reiserfs_panic(p_s_tb->tb_sb,
2409 			       "PAP-8320: fix_nodes: S[0] (%b %z) is not uptodate "
2410 			       "at the beginning of fix_nodes or not in tree (mode %c)",
2411 			       p_s_tbS0, p_s_tbS0, n_op_mode);
2412 	}
2413 
2414 	/* Check parameters. */
2415 	switch (n_op_mode) {
2416 	case M_INSERT:
2417 		if (n_item_num <= 0 || n_item_num > B_NR_ITEMS(p_s_tbS0))
2418 			reiserfs_panic(p_s_tb->tb_sb,
2419 				       "PAP-8330: fix_nodes: Incorrect item number %d (in S0 - %d) in case of insert",
2420 				       n_item_num, B_NR_ITEMS(p_s_tbS0));
2421 		break;
2422 	case M_PASTE:
2423 	case M_DELETE:
2424 	case M_CUT:
2425 		if (n_item_num < 0 || n_item_num >= B_NR_ITEMS(p_s_tbS0)) {
2426 			print_block(p_s_tbS0, 0, -1, -1);
2427 			reiserfs_panic(p_s_tb->tb_sb,
2428 				       "PAP-8335: fix_nodes: Incorrect item number(%d); mode = %c insert_size = %d\n",
2429 				       n_item_num, n_op_mode,
2430 				       p_s_tb->insert_size[0]);
2431 		}
2432 		break;
2433 	default:
2434 		reiserfs_panic(p_s_tb->tb_sb,
2435 			       "PAP-8340: fix_nodes: Incorrect mode of operation");
2436 	}
2437 #endif
2438 
2439 	if (get_mem_for_virtual_node(p_s_tb) == REPEAT_SEARCH)
2440 		// FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat
2441 		return REPEAT_SEARCH;
2442 
2443 	/* Starting from the leaf level; for all levels n_h of the tree. */
2444 	for (n_h = 0; n_h < MAX_HEIGHT && p_s_tb->insert_size[n_h]; n_h++) {
2445 		if ((n_ret_value = get_direct_parent(p_s_tb, n_h)) != CARRY_ON) {
2446 			goto repeat;
2447 		}
2448 
2449 		if ((n_ret_value =
2450 		     check_balance(n_op_mode, p_s_tb, n_h, n_item_num,
2451 				   n_pos_in_item, p_s_ins_ih,
2452 				   data)) != CARRY_ON) {
2453 			if (n_ret_value == NO_BALANCING_NEEDED) {
2454 				/* No balancing for higher levels needed. */
2455 				if ((n_ret_value =
2456 				     get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2457 					goto repeat;
2458 				}
2459 				if (n_h != MAX_HEIGHT - 1)
2460 					p_s_tb->insert_size[n_h + 1] = 0;
2461 				/* ok, analysis and resource gathering are complete */
2462 				break;
2463 			}
2464 			goto repeat;
2465 		}
2466 
2467 		if ((n_ret_value = get_neighbors(p_s_tb, n_h)) != CARRY_ON) {
2468 			goto repeat;
2469 		}
2470 
2471 		if ((n_ret_value = get_empty_nodes(p_s_tb, n_h)) != CARRY_ON) {
2472 			goto repeat;	/* No disk space, or schedule occurred and
2473 					   analysis may be invalid and needs to be redone. */
2474 		}
2475 
2476 		if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h)) {
2477 			/* We have a positive insert size but no nodes exist on this
2478 			   level, this means that we are creating a new root. */
2479 
2480 			RFALSE(p_s_tb->blknum[n_h] != 1,
2481 			       "PAP-8350: creating new empty root");
2482 
2483 			if (n_h < MAX_HEIGHT - 1)
2484 				p_s_tb->insert_size[n_h + 1] = 0;
2485 		} else if (!PATH_H_PBUFFER(p_s_tb->tb_path, n_h + 1)) {
2486 			if (p_s_tb->blknum[n_h] > 1) {
2487 				/* The tree needs to be grown, so this node S[n_h]
2488 				   which is the root node is split into two nodes,
2489 				   and a new node (S[n_h+1]) will be created to
2490 				   become the root node.  */
2491 
2492 				RFALSE(n_h == MAX_HEIGHT - 1,
2493 				       "PAP-8355: attempt to create too high of a tree");
2494 
2495 				p_s_tb->insert_size[n_h + 1] =
2496 				    (DC_SIZE +
2497 				     KEY_SIZE) * (p_s_tb->blknum[n_h] - 1) +
2498 				    DC_SIZE;
2499 			} else if (n_h < MAX_HEIGHT - 1)
2500 				p_s_tb->insert_size[n_h + 1] = 0;
2501 		} else
2502 			p_s_tb->insert_size[n_h + 1] =
2503 			    (DC_SIZE + KEY_SIZE) * (p_s_tb->blknum[n_h] - 1);
2504 	}
2505 
2506 	if ((n_ret_value = wait_tb_buffers_until_unlocked(p_s_tb)) == CARRY_ON) {
2507 		if (FILESYSTEM_CHANGED_TB(p_s_tb)) {
2508 			wait_tb_buffers_run = 1;
2509 			n_ret_value = REPEAT_SEARCH;
2510 			goto repeat;
2511 		} else {
2512 			return CARRY_ON;
2513 		}
2514 	} else {
2515 		wait_tb_buffers_run = 1;
2516 		goto repeat;
2517 	}
2518 
2519       repeat:
2520 	// fix_nodes was unable to perform its calculation due to
2521 	// filesystem got changed under us, lack of free disk space or i/o
2522 	// failure. If the first is the case - the search will be
2523 	// repeated. For now - free all resources acquired so far except
2524 	// for the new allocated nodes
2525 	{
2526 		int i;
2527 
2528 		/* Release path buffers. */
2529 		if (wait_tb_buffers_run) {
2530 			pathrelse_and_restore(p_s_tb->tb_sb, p_s_tb->tb_path);
2531 		} else {
2532 			pathrelse(p_s_tb->tb_path);
2533 		}
2534 		/* brelse all resources collected for balancing */
2535 		for (i = 0; i < MAX_HEIGHT; i++) {
2536 			if (wait_tb_buffers_run) {
2537 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2538 								 p_s_tb->L[i]);
2539 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2540 								 p_s_tb->R[i]);
2541 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2542 								 p_s_tb->FL[i]);
2543 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2544 								 p_s_tb->FR[i]);
2545 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2546 								 p_s_tb->
2547 								 CFL[i]);
2548 				reiserfs_restore_prepared_buffer(p_s_tb->tb_sb,
2549 								 p_s_tb->
2550 								 CFR[i]);
2551 			}
2552 
2553 			brelse(p_s_tb->L[i]);
2554 			p_s_tb->L[i] = NULL;
2555 			brelse(p_s_tb->R[i]);
2556 			p_s_tb->R[i] = NULL;
2557 			brelse(p_s_tb->FL[i]);
2558 			p_s_tb->FL[i] = NULL;
2559 			brelse(p_s_tb->FR[i]);
2560 			p_s_tb->FR[i] = NULL;
2561 			brelse(p_s_tb->CFL[i]);
2562 			p_s_tb->CFL[i] = NULL;
2563 			brelse(p_s_tb->CFR[i]);
2564 			p_s_tb->CFR[i] = NULL;
2565 		}
2566 
2567 		if (wait_tb_buffers_run) {
2568 			for (i = 0; i < MAX_FEB_SIZE; i++) {
2569 				if (p_s_tb->FEB[i]) {
2570 					reiserfs_restore_prepared_buffer
2571 					    (p_s_tb->tb_sb, p_s_tb->FEB[i]);
2572 				}
2573 			}
2574 		}
2575 		return n_ret_value;
2576 	}
2577 
2578 }
2579 
2580 /* Anatoly will probably forgive me renaming p_s_tb to tb. I just
2581    wanted to make lines shorter */
2582 void unfix_nodes(struct tree_balance *tb)
2583 {
2584 	int i;
2585 
2586 	/* Release path buffers. */
2587 	pathrelse_and_restore(tb->tb_sb, tb->tb_path);
2588 
2589 	/* brelse all resources collected for balancing */
2590 	for (i = 0; i < MAX_HEIGHT; i++) {
2591 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]);
2592 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]);
2593 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]);
2594 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]);
2595 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]);
2596 		reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]);
2597 
2598 		brelse(tb->L[i]);
2599 		brelse(tb->R[i]);
2600 		brelse(tb->FL[i]);
2601 		brelse(tb->FR[i]);
2602 		brelse(tb->CFL[i]);
2603 		brelse(tb->CFR[i]);
2604 	}
2605 
2606 	/* deal with list of allocated (used and unused) nodes */
2607 	for (i = 0; i < MAX_FEB_SIZE; i++) {
2608 		if (tb->FEB[i]) {
2609 			b_blocknr_t blocknr = tb->FEB[i]->b_blocknr;
2610 			/* de-allocated block which was not used by balancing and
2611 			   bforget about buffer for it */
2612 			brelse(tb->FEB[i]);
2613 			reiserfs_free_block(tb->transaction_handle, NULL,
2614 					    blocknr, 0);
2615 		}
2616 		if (tb->used[i]) {
2617 			/* release used as new nodes including a new root */
2618 			brelse(tb->used[i]);
2619 		}
2620 	}
2621 
2622 	if (tb->vn_buf)
2623 		reiserfs_kfree(tb->vn_buf, tb->vn_buf_size, tb->tb_sb);
2624 
2625 }
2626