xref: /openbmc/linux/fs/reiserfs/do_balan.c (revision afb46f79)
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /* Now we have all buffers that must be used in balancing of the tree 	*/
6 /* Further calculations can not cause schedule(), and thus the buffer 	*/
7 /* tree will be stable until the balancing will be finished 		*/
8 /* balance the tree according to the analysis made before,		*/
9 /* and using buffers obtained after all above.				*/
10 
11 /**
12  ** balance_leaf_when_delete
13  ** balance_leaf
14  ** do_balance
15  **
16  **/
17 
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include "reiserfs.h"
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
23 
24 static inline void buffer_info_init_left(struct tree_balance *tb,
25                                          struct buffer_info *bi)
26 {
27 	bi->tb          = tb;
28 	bi->bi_bh       = tb->L[0];
29 	bi->bi_parent   = tb->FL[0];
30 	bi->bi_position = get_left_neighbor_position(tb, 0);
31 }
32 
33 static inline void buffer_info_init_right(struct tree_balance *tb,
34                                           struct buffer_info *bi)
35 {
36 	bi->tb          = tb;
37 	bi->bi_bh       = tb->R[0];
38 	bi->bi_parent   = tb->FR[0];
39 	bi->bi_position = get_right_neighbor_position(tb, 0);
40 }
41 
42 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43                                          struct buffer_info *bi)
44 {
45 	bi->tb          = tb;
46 	bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
47 	bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
48 	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
49 }
50 
51 static inline void buffer_info_init_bh(struct tree_balance *tb,
52                                        struct buffer_info *bi,
53                                        struct buffer_head *bh)
54 {
55 	bi->tb          = tb;
56 	bi->bi_bh       = bh;
57 	bi->bi_parent   = NULL;
58 	bi->bi_position = 0;
59 }
60 
61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62 				       struct buffer_head *bh, int flag)
63 {
64 	journal_mark_dirty(tb->transaction_handle,
65 			   tb->transaction_handle->t_super, bh);
66 }
67 
68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70 
71 /* summary:
72  if deleting something ( tb->insert_size[0] < 0 )
73    return(balance_leaf_when_delete()); (flag d handled here)
74  else
75    if lnum is larger than 0 we put items into the left node
76    if rnum is larger than 0 we put items into the right node
77    if snum1 is larger than 0 we put items into the new node s1
78    if snum2 is larger than 0 we put items into the new node s2
79 Note that all *num* count new items being created.
80 
81 It would be easier to read balance_leaf() if each of these summary
82 lines was a separate procedure rather than being inlined.  I think
83 that there are many passages here and in balance_leaf_when_delete() in
84 which two calls to one procedure can replace two passages, and it
85 might save cache space and improve software maintenance costs to do so.
86 
87 Vladimir made the perceptive comment that we should offload most of
88 the decision making in this function into fix_nodes/check_balance, and
89 then create some sort of structure in tb that says what actions should
90 be performed by do_balance.
91 
92 -Hans */
93 
94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
95  *
96  * lnum, rnum can have values >= -1
97  *	-1 means that the neighbor must be joined with S
98  *	 0 means that nothing should be done with the neighbor
99  *	>0 means to shift entirely or partly the specified number of items to the neighbor
100  */
101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102 {
103 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104 	int item_pos = PATH_LAST_POSITION(tb->tb_path);
105 	int pos_in_item = tb->tb_path->pos_in_item;
106 	struct buffer_info bi;
107 	int n;
108 	struct item_head *ih;
109 
110 	RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111 	       "vs- 12000: level: wrong FR %z", tb->FR[0]);
112 	RFALSE(tb->blknum[0] > 1,
113 	       "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114 	RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115 	       "PAP-12010: tree can not be empty");
116 
117 	ih = B_N_PITEM_HEAD(tbS0, item_pos);
118 	buffer_info_init_tbS0(tb, &bi);
119 
120 	/* Delete or truncate the item */
121 
122 	switch (flag) {
123 	case M_DELETE:		/* delete item in S[0] */
124 
125 		RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126 		       "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127 		       -tb->insert_size[0], ih);
128 
129 		leaf_delete_items(&bi, 0, item_pos, 1, -1);
130 
131 		if (!item_pos && tb->CFL[0]) {
132 			if (B_NR_ITEMS(tbS0)) {
133 				replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134 					    0);
135 			} else {
136 				if (!PATH_H_POSITION(tb->tb_path, 1))
137 					replace_key(tb, tb->CFL[0], tb->lkey[0],
138 						    PATH_H_PPARENT(tb->tb_path,
139 								   0), 0);
140 			}
141 		}
142 
143 		RFALSE(!item_pos && !tb->CFL[0],
144 		       "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145 		       tb->L[0]);
146 
147 		break;
148 
149 	case M_CUT:{		/* cut item in S[0] */
150 			if (is_direntry_le_ih(ih)) {
151 
152 				/* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153 				/* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154 				tb->insert_size[0] = -1;
155 				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156 						     -tb->insert_size[0]);
157 
158 				RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159 				       "PAP-12030: can not change delimiting key. CFL[0]=%p",
160 				       tb->CFL[0]);
161 
162 				if (!item_pos && !pos_in_item && tb->CFL[0]) {
163 					replace_key(tb, tb->CFL[0], tb->lkey[0],
164 						    tbS0, 0);
165 				}
166 			} else {
167 				leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168 						     -tb->insert_size[0]);
169 
170 				RFALSE(!ih_item_len(ih),
171 				       "PAP-12035: cut must leave non-zero dynamic length of item");
172 			}
173 			break;
174 		}
175 
176 	default:
177 		print_cur_tb("12040");
178 		reiserfs_panic(tb->tb_sb, "PAP-12040",
179 			       "unexpected mode: %s(%d)",
180 			       (flag ==
181 				M_PASTE) ? "PASTE" : ((flag ==
182 						       M_INSERT) ? "INSERT" :
183 						      "UNKNOWN"), flag);
184 	}
185 
186 	/* the rule is that no shifting occurs unless by shifting a node can be freed */
187 	n = B_NR_ITEMS(tbS0);
188 	if (tb->lnum[0]) {	/* L[0] takes part in balancing */
189 		if (tb->lnum[0] == -1) {	/* L[0] must be joined with S[0] */
190 			if (tb->rnum[0] == -1) {	/* R[0] must be also joined with S[0] */
191 				if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192 					/* all contents of all the 3 buffers will be in L[0] */
193 					if (PATH_H_POSITION(tb->tb_path, 1) == 0
194 					    && 1 < B_NR_ITEMS(tb->FR[0]))
195 						replace_key(tb, tb->CFL[0],
196 							    tb->lkey[0],
197 							    tb->FR[0], 1);
198 
199 					leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
200 							-1, NULL);
201 					leaf_move_items(LEAF_FROM_R_TO_L, tb,
202 							B_NR_ITEMS(tb->R[0]),
203 							-1, NULL);
204 
205 					reiserfs_invalidate_buffer(tb, tbS0);
206 					reiserfs_invalidate_buffer(tb,
207 								   tb->R[0]);
208 
209 					return 0;
210 				}
211 				/* all contents of all the 3 buffers will be in R[0] */
212 				leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213 						NULL);
214 				leaf_move_items(LEAF_FROM_L_TO_R, tb,
215 						B_NR_ITEMS(tb->L[0]), -1, NULL);
216 
217 				/* right_delimiting_key is correct in R[0] */
218 				replace_key(tb, tb->CFR[0], tb->rkey[0],
219 					    tb->R[0], 0);
220 
221 				reiserfs_invalidate_buffer(tb, tbS0);
222 				reiserfs_invalidate_buffer(tb, tb->L[0]);
223 
224 				return -1;
225 			}
226 
227 			RFALSE(tb->rnum[0] != 0,
228 			       "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229 			/* all contents of L[0] and S[0] will be in L[0] */
230 			leaf_shift_left(tb, n, -1);
231 
232 			reiserfs_invalidate_buffer(tb, tbS0);
233 
234 			return 0;
235 		}
236 		/* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237 
238 		RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239 		       (tb->lnum[0] + tb->rnum[0] > n + 1),
240 		       "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241 		       tb->rnum[0], tb->lnum[0], n);
242 		RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243 		       (tb->lbytes != -1 || tb->rbytes != -1),
244 		       "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245 		       tb->rbytes, tb->lbytes);
246 		RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247 		       (tb->lbytes < 1 || tb->rbytes != -1),
248 		       "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249 		       tb->rbytes, tb->lbytes);
250 
251 		leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252 		leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
253 
254 		reiserfs_invalidate_buffer(tb, tbS0);
255 
256 		return 0;
257 	}
258 
259 	if (tb->rnum[0] == -1) {
260 		/* all contents of R[0] and S[0] will be in R[0] */
261 		leaf_shift_right(tb, n, -1);
262 		reiserfs_invalidate_buffer(tb, tbS0);
263 		return 0;
264 	}
265 
266 	RFALSE(tb->rnum[0],
267 	       "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
268 	return 0;
269 }
270 
271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih,	/* item header of inserted item (this is on little endian) */
272 			const char *body,	/* body  of inserted item or bytes to paste */
273 			int flag,	/* i - insert, d - delete, c - cut, p - paste
274 					   (see comment to do_balance) */
275 			struct item_head *insert_key,	/* in our processing of one level we sometimes determine what
276 							   must be inserted into the next higher level.  This insertion
277 							   consists of a key or two keys and their corresponding
278 							   pointers */
279 			struct buffer_head **insert_ptr	/* inserted node-ptrs for the next level */
280     )
281 {
282 	struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283 	int item_pos = PATH_LAST_POSITION(tb->tb_path);	/*  index into the array of item headers in S[0]
284 							   of the affected item */
285 	struct buffer_info bi;
286 	struct buffer_head *S_new[2];	/* new nodes allocated to hold what could not fit into S */
287 	int snum[2];		/* number of items that will be placed
288 				   into S_new (includes partially shifted
289 				   items) */
290 	int sbytes[2];		/* if an item is partially shifted into S_new then
291 				   if it is a directory item
292 				   it is the number of entries from the item that are shifted into S_new
293 				   else
294 				   it is the number of bytes from the item that are shifted into S_new
295 				 */
296 	int n, i;
297 	int ret_val;
298 	int pos_in_item;
299 	int zeros_num;
300 
301 	PROC_INFO_INC(tb->tb_sb, balance_at[0]);
302 
303 	/* Make balance in case insert_size[0] < 0 */
304 	if (tb->insert_size[0] < 0)
305 		return balance_leaf_when_delete(tb, flag);
306 
307 	zeros_num = 0;
308 	if (flag == M_INSERT && !body)
309 		zeros_num = ih_item_len(ih);
310 
311 	pos_in_item = tb->tb_path->pos_in_item;
312 	/* for indirect item pos_in_item is measured in unformatted node
313 	   pointers. Recalculate to bytes */
314 	if (flag != M_INSERT
315 	    && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316 		pos_in_item *= UNFM_P_SIZE;
317 
318 	if (tb->lnum[0] > 0) {
319 		/* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320 		if (item_pos < tb->lnum[0]) {
321 			/* new item or it part falls to L[0], shift it too */
322 			n = B_NR_ITEMS(tb->L[0]);
323 
324 			switch (flag) {
325 			case M_INSERT:	/* insert item into L[0] */
326 
327 				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
328 					/* part of new item falls into L[0] */
329 					int new_item_len;
330 					int version;
331 
332 					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
333 
334 					/* Calculate item length to insert to S[0] */
335 					new_item_len = ih_item_len(ih) - tb->lbytes;
336 					/* Calculate and check item length to insert to L[0] */
337 					put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
338 
339 					RFALSE(ih_item_len(ih) <= 0,
340 					       "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341 					       ih_item_len(ih));
342 
343 					/* Insert new item into L[0] */
344 					buffer_info_init_left(tb, &bi);
345 					leaf_insert_into_buf(&bi,
346 							n + item_pos - ret_val, ih, body,
347 							zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
348 
349 					version = ih_version(ih);
350 
351 					/* Calculate key component, item length and body to insert into S[0] */
352 					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353 							(tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
354 
355 					put_ih_item_len(ih, new_item_len);
356 					if (tb->lbytes > zeros_num) {
357 						body += (tb->lbytes - zeros_num);
358 						zeros_num = 0;
359 					} else
360 						zeros_num -= tb->lbytes;
361 
362 					RFALSE(ih_item_len(ih) <= 0,
363 					       "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 					       ih_item_len(ih));
365 				} else {
366 					/* new item in whole falls into L[0] */
367 					/* Shift lnum[0]-1 items to L[0] */
368 					ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
369 					/* Insert new item into L[0] */
370 					buffer_info_init_left(tb, &bi);
371 					leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
372 					tb->insert_size[0] = 0;
373 					zeros_num = 0;
374 				}
375 				break;
376 
377 			case M_PASTE:	/* append item in L[0] */
378 
379 				if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
380 					/* we must shift the part of the appended item */
381 					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
382 
383 						RFALSE(zeros_num,
384 						       "PAP-12090: invalid parameter in case of a directory");
385 						/* directory item */
386 						if (tb->lbytes > pos_in_item) {
387 							/* new directory entry falls into L[0] */
388 							struct item_head *pasted;
389 							int l_pos_in_item = pos_in_item;
390 
391 							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
392 							ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393 							if (ret_val && !item_pos) {
394 								pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395 								l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
396 							}
397 
398 							/* Append given directory entry to directory item */
399 							buffer_info_init_left(tb, &bi);
400 							leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
401 
402 							/* previous string prepared space for pasting new entry, following string pastes this entry */
403 
404 							/* when we have merge directory item, pos_in_item has been changed too */
405 
406 							/* paste new directory entry. 1 is entry number */
407 							leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408 									   1, (struct reiserfs_de_head *) body,
409 									   body + DEH_SIZE, tb->insert_size[0]);
410 							tb->insert_size[0] = 0;
411 						} else {
412 							/* new directory item doesn't fall into L[0] */
413 							/* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
414 							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
415 						}
416 						/* Calculate new position to append in item body */
417 						pos_in_item -= tb->lbytes;
418 					} else {
419 						/* regular object */
420 						RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421 						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
422 						       "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
423 						       ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
424 
425 						if (tb->lbytes >= pos_in_item) {
426 							/* appended item will be in L[0] in whole */
427 							int l_n;
428 
429 							/* this bytes number must be appended to the last item of L[h] */
430 							l_n = tb->lbytes - pos_in_item;
431 
432 							/* Calculate new insert_size[0] */
433 							tb->insert_size[0] -= l_n;
434 
435 							RFALSE(tb->insert_size[0] <= 0,
436 							       "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
437 							       tb->insert_size[0]);
438 							ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439 									    (B_N_PITEM_HEAD(tbS0, item_pos)));
440 							/* Append to body of item in L[0] */
441 							buffer_info_init_left(tb, &bi);
442 							leaf_paste_in_buffer
443 							    (&bi, n + item_pos - ret_val, ih_item_len
444 							     (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445 							     l_n, body,
446 							     zeros_num > l_n ? l_n : zeros_num);
447 							/* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
448 							{
449 								int version;
450 								int temp_l = l_n;
451 
452 								RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
453 								     "PAP-12106: item length must be 0");
454 								RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455 								      (tb->L[0], n + item_pos - ret_val)),
456 								     "PAP-12107: items must be of the same file");
457 								if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
458 									temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
459 								}
460 								/* update key of first item in S0 */
461 								version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462 								set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463 								     le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
464 								/* update left delimiting key */
465 								set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466 								     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
467 							}
468 
469 							/* Calculate new body, position in item and insert_size[0] */
470 							if (l_n > zeros_num) {
471 								body += (l_n - zeros_num);
472 								zeros_num = 0;
473 							} else
474 								zeros_num -= l_n;
475 							pos_in_item = 0;
476 
477 							RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478 							     || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479 							     || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
480 							     "PAP-12120: item must be merge-able with left neighboring item");
481 						} else {	/* only part of the appended item will be in L[0] */
482 
483 							/* Calculate position in item for append in S[0] */
484 							pos_in_item -= tb->lbytes;
485 
486 							RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
487 
488 							/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
489 							leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
490 						}
491 					}
492 				} else {	/* appended item will be in L[0] in whole */
493 
494 					struct item_head *pasted;
495 
496 					if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) {	/* if we paste into first item of S[0] and it is left mergable */
497 						/* then increment pos_in_item by the size of the last item in L[0] */
498 						pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
499 						if (is_direntry_le_ih(pasted))
500 							pos_in_item += ih_entry_count(pasted);
501 						else
502 							pos_in_item += ih_item_len(pasted);
503 					}
504 
505 					/* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
506 					ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
507 					/* Append to body of item in L[0] */
508 					buffer_info_init_left(tb, &bi);
509 					leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
510 							     pos_in_item,
511 							     tb->insert_size[0],
512 							     body, zeros_num);
513 
514 					/* if appended item is directory, paste entry */
515 					pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
516 					if (is_direntry_le_ih(pasted))
517 						leaf_paste_entries(&bi, n + item_pos - ret_val,
518 								   pos_in_item, 1,
519 								   (struct reiserfs_de_head *) body,
520 								   body + DEH_SIZE,
521 								   tb->insert_size[0]);
522 					/* if appended item is indirect item, put unformatted node into un list */
523 					if (is_indirect_le_ih(pasted))
524 						set_ih_free_space(pasted, 0);
525 					tb->insert_size[0] = 0;
526 					zeros_num = 0;
527 				}
528 				break;
529 			default:	/* cases d and t */
530 				reiserfs_panic(tb->tb_sb, "PAP-12130",
531 					       "lnum > 0: unexpected mode: "
532 					       " %s(%d)",
533 					       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
534 			}
535 		} else {
536 			/* new item doesn't fall into L[0] */
537 			leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
538 		}
539 	}
540 
541 	/* tb->lnum[0] > 0 */
542 	/* Calculate new item position */
543 	item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
544 
545 	if (tb->rnum[0] > 0) {
546 		/* shift rnum[0] items from S[0] to the right neighbor R[0] */
547 		n = B_NR_ITEMS(tbS0);
548 		switch (flag) {
549 
550 		case M_INSERT:	/* insert item */
551 			if (n - tb->rnum[0] < item_pos) {	/* new item or its part falls to R[0] */
552 				if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {	/* part of new item falls into R[0] */
553 					loff_t old_key_comp, old_len, r_zeros_number;
554 					const char *r_body;
555 					int version;
556 					loff_t offset;
557 
558 					leaf_shift_right(tb, tb->rnum[0] - 1, -1);
559 
560 					version = ih_version(ih);
561 					/* Remember key component and item length */
562 					old_key_comp = le_ih_k_offset(ih);
563 					old_len = ih_item_len(ih);
564 
565 					/* Calculate key component and item length to insert into R[0] */
566 					offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
567 					set_le_ih_k_offset(ih, offset);
568 					put_ih_item_len(ih, tb->rbytes);
569 					/* Insert part of the item into R[0] */
570 					buffer_info_init_right(tb, &bi);
571 					if ((old_len - tb->rbytes) > zeros_num) {
572 						r_zeros_number = 0;
573 						r_body = body + (old_len - tb->rbytes) - zeros_num;
574 					} else {
575 						r_body = body;
576 						r_zeros_number = zeros_num - (old_len - tb->rbytes);
577 						zeros_num -= r_zeros_number;
578 					}
579 
580 					leaf_insert_into_buf(&bi, 0, ih, r_body,
581 							     r_zeros_number);
582 
583 					/* Replace right delimiting key by first key in R[0] */
584 					replace_key(tb, tb->CFR[0], tb->rkey[0],
585 						    tb->R[0], 0);
586 
587 					/* Calculate key component and item length to insert into S[0] */
588 					set_le_ih_k_offset(ih, old_key_comp);
589 					put_ih_item_len(ih, old_len - tb->rbytes);
590 
591 					tb->insert_size[0] -= tb->rbytes;
592 
593 				} else {	/* whole new item falls into R[0] */
594 
595 					/* Shift rnum[0]-1 items to R[0] */
596 					ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
597 					/* Insert new item into R[0] */
598 					buffer_info_init_right(tb, &bi);
599 					leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600 							     ih, body, zeros_num);
601 
602 					if (item_pos - n + tb->rnum[0] - 1 == 0) {
603 						replace_key(tb, tb->CFR[0],
604 							    tb->rkey[0],
605 							    tb->R[0], 0);
606 
607 					}
608 					zeros_num = tb->insert_size[0] = 0;
609 				}
610 			} else {	/* new item or part of it doesn't fall into R[0] */
611 
612 				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
613 			}
614 			break;
615 
616 		case M_PASTE:	/* append item */
617 
618 			if (n - tb->rnum[0] <= item_pos) {	/* pasted item or part of it falls to R[0] */
619 				if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {	/* we must shift the part of the appended item */
620 					if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {	/* we append to directory item */
621 						int entry_count;
622 
623 						RFALSE(zeros_num,
624 						       "PAP-12145: invalid parameter in case of a directory");
625 						entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626 								  (tbS0, item_pos));
627 						if (entry_count - tb->rbytes <
628 						    pos_in_item)
629 							/* new directory entry falls into R[0] */
630 						{
631 							int paste_entry_position;
632 
633 							RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
634 							       "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
635 							       tb->rbytes, entry_count);
636 							/* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
637 							leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
638 							/* Paste given directory entry to directory item */
639 							paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
640 							buffer_info_init_right(tb, &bi);
641 							leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
642 							/* paste entry */
643 							leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644 									   (struct reiserfs_de_head *) body,
645 									   body + DEH_SIZE, tb->insert_size[0]);
646 
647 							if (paste_entry_position == 0) {
648 								/* change delimiting keys */
649 								replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
650 							}
651 
652 							tb->insert_size[0] = 0;
653 							pos_in_item++;
654 						} else {	/* new directory entry doesn't fall into R[0] */
655 
656 							leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
657 						}
658 					} else {	/* regular object */
659 
660 						int n_shift, n_rem, r_zeros_number;
661 						const char *r_body;
662 
663 						/* Calculate number of bytes which must be shifted from appended item */
664 						if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
665 							n_shift = 0;
666 
667 						RFALSE(pos_in_item != ih_item_len
668 						       (B_N_PITEM_HEAD(tbS0, item_pos)),
669 						       "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
670 						       pos_in_item, ih_item_len
671 						       (B_N_PITEM_HEAD(tbS0, item_pos)));
672 
673 						leaf_shift_right(tb, tb->rnum[0], n_shift);
674 						/* Calculate number of bytes which must remain in body after appending to R[0] */
675 						if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
676 							n_rem = 0;
677 
678 						{
679 							int version;
680 							unsigned long temp_rem = n_rem;
681 
682 							version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683 							if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684 								temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
685 							}
686 							set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687 							     le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688 							set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689 							     le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
690 						}
691 /*		  k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692 		  k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
693 						do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
694 
695 						/* Append part of body into R[0] */
696 						buffer_info_init_right(tb, &bi);
697 						if (n_rem > zeros_num) {
698 							r_zeros_number = 0;
699 							r_body = body + n_rem - zeros_num;
700 						} else {
701 							r_body = body;
702 							r_zeros_number = zeros_num - n_rem;
703 							zeros_num -= r_zeros_number;
704 						}
705 
706 						leaf_paste_in_buffer(&bi, 0, n_shift,
707 								     tb->insert_size[0] - n_rem,
708 								     r_body, r_zeros_number);
709 
710 						if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
711 #if 0
712 							RFALSE(n_rem,
713 							       "PAP-12160: paste more than one unformatted node pointer");
714 #endif
715 							set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
716 						}
717 						tb->insert_size[0] = n_rem;
718 						if (!n_rem)
719 							pos_in_item++;
720 					}
721 				} else {	/* pasted item in whole falls into R[0] */
722 
723 					struct item_head *pasted;
724 
725 					ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
726 					/* append item in R[0] */
727 					if (pos_in_item >= 0) {
728 						buffer_info_init_right(tb, &bi);
729 						leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730 								     tb->insert_size[0], body, zeros_num);
731 					}
732 
733 					/* paste new entry, if item is directory item */
734 					pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735 					if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736 						leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737 								   pos_in_item, 1,
738 								   (struct reiserfs_de_head *) body,
739 								   body + DEH_SIZE, tb->insert_size[0]);
740 						if (!pos_in_item) {
741 
742 							RFALSE(item_pos - n + tb->rnum[0],
743 							       "PAP-12165: directory item must be first item of node when pasting is in 0th position");
744 
745 							/* update delimiting keys */
746 							replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
747 						}
748 					}
749 
750 					if (is_indirect_le_ih(pasted))
751 						set_ih_free_space(pasted, 0);
752 					zeros_num = tb->insert_size[0] = 0;
753 				}
754 			} else {	/* new item doesn't fall into R[0] */
755 
756 				leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
757 			}
758 			break;
759 		default:	/* cases d and t */
760 			reiserfs_panic(tb->tb_sb, "PAP-12175",
761 				       "rnum > 0: unexpected mode: %s(%d)",
762 				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
763 		}
764 
765 	}
766 
767 	/* tb->rnum[0] > 0 */
768 	RFALSE(tb->blknum[0] > 3,
769 	       "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
770 	RFALSE(tb->blknum[0] < 0,
771 	       "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
772 
773 	/* if while adding to a node we discover that it is possible to split
774 	   it in two, and merge the left part into the left neighbor and the
775 	   right part into the right neighbor, eliminating the node */
776 	if (tb->blknum[0] == 0) {	/* node S[0] is empty now */
777 
778 		RFALSE(!tb->lnum[0] || !tb->rnum[0],
779 		       "PAP-12190: lnum and rnum must not be zero");
780 		/* if insertion was done before 0-th position in R[0], right
781 		   delimiting key of the tb->L[0]'s and left delimiting key are
782 		   not set correctly */
783 		if (tb->CFL[0]) {
784 			if (!tb->CFR[0])
785 				reiserfs_panic(tb->tb_sb, "vs-12195",
786 					       "CFR not initialized");
787 			copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
788 				 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
789 			do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
790 		}
791 
792 		reiserfs_invalidate_buffer(tb, tbS0);
793 		return 0;
794 	}
795 
796 	/* Fill new nodes that appear in place of S[0] */
797 
798 	/* I am told that this copying is because we need an array to enable
799 	   the looping code. -Hans */
800 	snum[0] = tb->s1num, snum[1] = tb->s2num;
801 	sbytes[0] = tb->s1bytes;
802 	sbytes[1] = tb->s2bytes;
803 	for (i = tb->blknum[0] - 2; i >= 0; i--) {
804 
805 		RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
806 		       snum[i]);
807 
808 		/* here we shift from S to S_new nodes */
809 
810 		S_new[i] = get_FEB(tb);
811 
812 		/* initialized block type and tree level */
813 		set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
814 
815 		n = B_NR_ITEMS(tbS0);
816 
817 		switch (flag) {
818 		case M_INSERT:	/* insert item */
819 
820 			if (n - snum[i] < item_pos) {	/* new item or it's part falls to first new node S_new[i] */
821 				if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {	/* part of new item falls into S_new[i] */
822 					int old_key_comp, old_len, r_zeros_number;
823 					const char *r_body;
824 					int version;
825 
826 					/* Move snum[i]-1 items from S[0] to S_new[i] */
827 					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
828 							snum[i] - 1, -1,
829 							S_new[i]);
830 					/* Remember key component and item length */
831 					version = ih_version(ih);
832 					old_key_comp = le_ih_k_offset(ih);
833 					old_len = ih_item_len(ih);
834 
835 					/* Calculate key component and item length to insert into S_new[i] */
836 					set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837 							   ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
838 
839 					put_ih_item_len(ih, sbytes[i]);
840 
841 					/* Insert part of the item into S_new[i] before 0-th item */
842 					buffer_info_init_bh(tb, &bi, S_new[i]);
843 
844 					if ((old_len - sbytes[i]) > zeros_num) {
845 						r_zeros_number = 0;
846 						r_body = body + (old_len - sbytes[i]) - zeros_num;
847 					} else {
848 						r_body = body;
849 						r_zeros_number = zeros_num - (old_len - sbytes[i]);
850 						zeros_num -= r_zeros_number;
851 					}
852 
853 					leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
854 
855 					/* Calculate key component and item length to insert into S[i] */
856 					set_le_ih_k_offset(ih, old_key_comp);
857 					put_ih_item_len(ih, old_len - sbytes[i]);
858 					tb->insert_size[0] -= sbytes[i];
859 				} else {	/* whole new item falls into S_new[i] */
860 
861 					/* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862 					leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
863 							snum[i] - 1, sbytes[i], S_new[i]);
864 
865 					/* Insert new item into S_new[i] */
866 					buffer_info_init_bh(tb, &bi, S_new[i]);
867 					leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868 							     ih, body, zeros_num);
869 
870 					zeros_num = tb->insert_size[0] = 0;
871 				}
872 			}
873 
874 			else {	/* new item or it part don't falls into S_new[i] */
875 
876 				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
877 						snum[i], sbytes[i], S_new[i]);
878 			}
879 			break;
880 
881 		case M_PASTE:	/* append item */
882 
883 			if (n - snum[i] <= item_pos) {	/* pasted item or part if it falls to S_new[i] */
884 				if (item_pos == n - snum[i] && sbytes[i] != -1) {	/* we must shift part of the appended item */
885 					struct item_head *aux_ih;
886 
887 					RFALSE(ih, "PAP-12210: ih must be 0");
888 
889 					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890 					if (is_direntry_le_ih(aux_ih)) {
891 						/* we append to directory item */
892 
893 						int entry_count;
894 
895 						entry_count = ih_entry_count(aux_ih);
896 
897 						if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
898 							/* new directory entry falls into S_new[i] */
899 
900 							RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901 							RFALSE(sbytes[i] - 1 >= entry_count,
902 							       "PAP-12220: there are no so much entries (%d), only %d",
903 							       sbytes[i] - 1, entry_count);
904 
905 							/* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
906 							leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
907 							/* Paste given directory entry to directory item */
908 							buffer_info_init_bh(tb, &bi, S_new[i]);
909 							leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910 							     tb->insert_size[0], body, zeros_num);
911 							/* paste new directory entry */
912 							leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913 									   (struct reiserfs_de_head *) body,
914 									   body + DEH_SIZE, tb->insert_size[0]);
915 							tb->insert_size[0] = 0;
916 							pos_in_item++;
917 						} else {	/* new directory entry doesn't fall into S_new[i] */
918 							leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
919 						}
920 					} else {	/* regular object */
921 
922 						int n_shift, n_rem, r_zeros_number;
923 						const char *r_body;
924 
925 						RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
926 						       "PAP-12225: item too short or insert_size <= 0");
927 
928 						/* Calculate number of bytes which must be shifted from appended item */
929 						n_shift = sbytes[i] - tb->insert_size[0];
930 						if (n_shift < 0)
931 							n_shift = 0;
932 						leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
933 
934 						/* Calculate number of bytes which must remain in body after append to S_new[i] */
935 						n_rem = tb->insert_size[0] - sbytes[i];
936 						if (n_rem < 0)
937 							n_rem = 0;
938 						/* Append part of body into S_new[0] */
939 						buffer_info_init_bh(tb, &bi, S_new[i]);
940 						if (n_rem > zeros_num) {
941 							r_zeros_number = 0;
942 							r_body = body + n_rem - zeros_num;
943 						} else {
944 							r_body = body;
945 							r_zeros_number = zeros_num - n_rem;
946 							zeros_num -= r_zeros_number;
947 						}
948 
949 						leaf_paste_in_buffer(&bi, 0, n_shift,
950 								     tb->insert_size[0] - n_rem,
951 								     r_body, r_zeros_number);
952 						{
953 							struct item_head *tmp;
954 
955 							tmp = B_N_PITEM_HEAD(S_new[i], 0);
956 							if (is_indirect_le_ih
957 							    (tmp)) {
958 								set_ih_free_space(tmp, 0);
959 								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
960 							} else {
961 								set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
962 							}
963 						}
964 
965 						tb->insert_size[0] = n_rem;
966 						if (!n_rem)
967 							pos_in_item++;
968 					}
969 				} else
970 					/* item falls wholly into S_new[i] */
971 				{
972 					int leaf_mi;
973 					struct item_head *pasted;
974 
975 #ifdef CONFIG_REISERFS_CHECK
976 					struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
977 
978 					if (!is_direntry_le_ih(ih_check)
979 					    && (pos_in_item != ih_item_len(ih_check)
980 						|| tb->insert_size[0] <= 0))
981 						reiserfs_panic(tb->tb_sb,
982 							     "PAP-12235",
983 							     "pos_in_item "
984 							     "must be equal "
985 							     "to ih_item_len");
986 #endif				/* CONFIG_REISERFS_CHECK */
987 
988 					leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
989 							    tb, snum[i],
990 							    sbytes[i],
991 							    S_new[i]);
992 
993 					RFALSE(leaf_mi,
994 					       "PAP-12240: unexpected value returned by leaf_move_items (%d)",
995 					       leaf_mi);
996 
997 					/* paste into item */
998 					buffer_info_init_bh(tb, &bi, S_new[i]);
999 					leaf_paste_in_buffer(&bi,
1000 							     item_pos - n + snum[i],
1001 							     pos_in_item,
1002 							     tb->insert_size[0],
1003 							     body, zeros_num);
1004 
1005 					pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006 					if (is_direntry_le_ih(pasted)) {
1007 						leaf_paste_entries(&bi,
1008 								   item_pos - n + snum[i],
1009 								   pos_in_item, 1,
1010 								   (struct reiserfs_de_head *)body,
1011 								   body + DEH_SIZE,
1012 								   tb->insert_size[0]
1013 						    );
1014 					}
1015 
1016 					/* if we paste to indirect item update ih_free_space */
1017 					if (is_indirect_le_ih(pasted))
1018 						set_ih_free_space(pasted, 0);
1019 					zeros_num = tb->insert_size[0] = 0;
1020 				}
1021 			}
1022 
1023 			else {	/* pasted item doesn't fall into S_new[i] */
1024 
1025 				leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026 						snum[i], sbytes[i], S_new[i]);
1027 			}
1028 			break;
1029 		default:	/* cases d and t */
1030 			reiserfs_panic(tb->tb_sb, "PAP-12245",
1031 				       "blknum > 2: unexpected mode: %s(%d)",
1032 				       (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1033 		}
1034 
1035 		memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036 		insert_ptr[i] = S_new[i];
1037 
1038 		RFALSE(!buffer_journaled(S_new[i])
1039 		       || buffer_journal_dirty(S_new[i])
1040 		       || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041 		       i, S_new[i]);
1042 	}
1043 
1044 	/* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045 	   affected item which remains in S */
1046 	if (0 <= item_pos && item_pos < tb->s0num) {	/* if we must insert or append into buffer S[0] */
1047 
1048 		switch (flag) {
1049 		case M_INSERT:	/* insert item into S[0] */
1050 			buffer_info_init_tbS0(tb, &bi);
1051 			leaf_insert_into_buf(&bi, item_pos, ih, body,
1052 					     zeros_num);
1053 
1054 			/* If we insert the first key change the delimiting key */
1055 			if (item_pos == 0) {
1056 				if (tb->CFL[0])	/* can be 0 in reiserfsck */
1057 					replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1058 			}
1059 			break;
1060 
1061 		case M_PASTE:{	/* append item in S[0] */
1062 				struct item_head *pasted;
1063 
1064 				pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065 				/* when directory, may be new entry already pasted */
1066 				if (is_direntry_le_ih(pasted)) {
1067 					if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1068 
1069 						RFALSE(!tb->insert_size[0],
1070 						       "PAP-12260: insert_size is 0 already");
1071 
1072 						/* prepare space */
1073 						buffer_info_init_tbS0(tb, &bi);
1074 						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075 								     tb->insert_size[0], body,
1076 								     zeros_num);
1077 
1078 						/* paste entry */
1079 						leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080 								   (struct reiserfs_de_head *)body,
1081 								   body + DEH_SIZE,
1082 								   tb->insert_size[0]);
1083 						if (!item_pos && !pos_in_item) {
1084 							RFALSE(!tb->CFL[0] || !tb->L[0],
1085 							       "PAP-12270: CFL[0]/L[0] must be specified");
1086 							if (tb->CFL[0])
1087 								replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1088 						}
1089 						tb->insert_size[0] = 0;
1090 					}
1091 				} else {	/* regular object */
1092 					if (pos_in_item == ih_item_len(pasted)) {
1093 
1094 						RFALSE(tb->insert_size[0] <= 0,
1095 						       "PAP-12275: insert size must not be %d",
1096 						       tb->insert_size[0]);
1097 						buffer_info_init_tbS0(tb, &bi);
1098 						leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099 								     tb->insert_size[0], body, zeros_num);
1100 
1101 						if (is_indirect_le_ih(pasted)) {
1102 #if 0
1103 							RFALSE(tb->
1104 							       insert_size[0] !=
1105 							       UNFM_P_SIZE,
1106 							       "PAP-12280: insert_size for indirect item must be %d, not %d",
1107 							       UNFM_P_SIZE,
1108 							       tb->
1109 							       insert_size[0]);
1110 #endif
1111 							set_ih_free_space(pasted, 0);
1112 						}
1113 						tb->insert_size[0] = 0;
1114 					}
1115 #ifdef CONFIG_REISERFS_CHECK
1116 					else {
1117 						if (tb->insert_size[0]) {
1118 							print_cur_tb("12285");
1119 							reiserfs_panic(tb->tb_sb,
1120 							    "PAP-12285",
1121 							    "insert_size "
1122 							    "must be 0 "
1123 							    "(%d)",
1124 							    tb->insert_size[0]);
1125 						}
1126 					}
1127 #endif				/* CONFIG_REISERFS_CHECK */
1128 
1129 				}
1130 			}	/* case M_PASTE: */
1131 		}
1132 	}
1133 #ifdef CONFIG_REISERFS_CHECK
1134 	if (flag == M_PASTE && tb->insert_size[0]) {
1135 		print_cur_tb("12290");
1136 		reiserfs_panic(tb->tb_sb,
1137 			       "PAP-12290", "insert_size is still not 0 (%d)",
1138 			       tb->insert_size[0]);
1139 	}
1140 #endif				/* CONFIG_REISERFS_CHECK */
1141 	return 0;
1142 }				/* Leaf level of the tree is balanced (end of balance_leaf) */
1143 
1144 /* Make empty node */
1145 void make_empty_node(struct buffer_info *bi)
1146 {
1147 	struct block_head *blkh;
1148 
1149 	RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1150 
1151 	blkh = B_BLK_HEAD(bi->bi_bh);
1152 	set_blkh_nr_item(blkh, 0);
1153 	set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1154 
1155 	if (bi->bi_parent)
1156 		B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0;	/* Endian safe if 0 */
1157 }
1158 
1159 /* Get first empty buffer */
1160 struct buffer_head *get_FEB(struct tree_balance *tb)
1161 {
1162 	int i;
1163 	struct buffer_info bi;
1164 
1165 	for (i = 0; i < MAX_FEB_SIZE; i++)
1166 		if (tb->FEB[i] != NULL)
1167 			break;
1168 
1169 	if (i == MAX_FEB_SIZE)
1170 		reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1171 
1172 	buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1173 	make_empty_node(&bi);
1174 	set_buffer_uptodate(tb->FEB[i]);
1175 	tb->used[i] = tb->FEB[i];
1176 	tb->FEB[i] = NULL;
1177 
1178 	return tb->used[i];
1179 }
1180 
1181 /* This is now used because reiserfs_free_block has to be able to
1182 ** schedule.
1183 */
1184 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185 {
1186 	int i;
1187 
1188 	if (buffer_dirty(bh))
1189 		reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190 				 "called with dirty buffer");
1191 	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1192 		if (!tb->thrown[i]) {
1193 			tb->thrown[i] = bh;
1194 			get_bh(bh);	/* free_thrown puts this */
1195 			return;
1196 		}
1197 	reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198 			 "too many thrown buffers");
1199 }
1200 
1201 static void free_thrown(struct tree_balance *tb)
1202 {
1203 	int i;
1204 	b_blocknr_t blocknr;
1205 	for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1206 		if (tb->thrown[i]) {
1207 			blocknr = tb->thrown[i]->b_blocknr;
1208 			if (buffer_dirty(tb->thrown[i]))
1209 				reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210 						 "called with dirty buffer %d",
1211 						 blocknr);
1212 			brelse(tb->thrown[i]);	/* incremented in store_thrown */
1213 			reiserfs_free_block(tb->transaction_handle, NULL,
1214 					    blocknr, 0);
1215 		}
1216 	}
1217 }
1218 
1219 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1220 {
1221 	struct block_head *blkh;
1222 	blkh = B_BLK_HEAD(bh);
1223 	set_blkh_level(blkh, FREE_LEVEL);
1224 	set_blkh_nr_item(blkh, 0);
1225 
1226 	clear_buffer_dirty(bh);
1227 	store_thrown(tb, bh);
1228 }
1229 
1230 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1231 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232 		 struct buffer_head *src, int n_src)
1233 {
1234 
1235 	RFALSE(dest == NULL || src == NULL,
1236 	       "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237 	       src, dest);
1238 	RFALSE(!B_IS_KEYS_LEVEL(dest),
1239 	       "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240 	       dest);
1241 	RFALSE(n_dest < 0 || n_src < 0,
1242 	       "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243 	RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244 	       "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245 	       n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1246 
1247 	if (B_IS_ITEMS_LEVEL(src))
1248 		/* source buffer contains leaf node */
1249 		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250 		       KEY_SIZE);
1251 	else
1252 		memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253 		       KEY_SIZE);
1254 
1255 	do_balance_mark_internal_dirty(tb, dest, 0);
1256 }
1257 
1258 int get_left_neighbor_position(struct tree_balance *tb, int h)
1259 {
1260 	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1261 
1262 	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1263 	       "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264 	       h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1265 
1266 	if (Sh_position == 0)
1267 		return B_NR_ITEMS(tb->FL[h]);
1268 	else
1269 		return Sh_position - 1;
1270 }
1271 
1272 int get_right_neighbor_position(struct tree_balance *tb, int h)
1273 {
1274 	int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1275 
1276 	RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1277 	       "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278 	       h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1279 
1280 	if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281 		return 0;
1282 	else
1283 		return Sh_position + 1;
1284 }
1285 
1286 #ifdef CONFIG_REISERFS_CHECK
1287 
1288 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290 				char *mes)
1291 {
1292 	struct disk_child *dc;
1293 	int i;
1294 
1295 	RFALSE(!bh, "PAP-12336: bh == 0");
1296 
1297 	if (!bh || !B_IS_IN_TREE(bh))
1298 		return;
1299 
1300 	RFALSE(!buffer_dirty(bh) &&
1301 	       !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302 	       "PAP-12337: buffer (%b) must be dirty", bh);
1303 	dc = B_N_CHILD(bh, 0);
1304 
1305 	for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306 		if (!is_reusable(s, dc_block_number(dc), 1)) {
1307 			print_cur_tb(mes);
1308 			reiserfs_panic(s, "PAP-12338",
1309 				       "invalid child pointer %y in %b",
1310 				       dc, bh);
1311 		}
1312 	}
1313 }
1314 
1315 static int locked_or_not_in_tree(struct tree_balance *tb,
1316 				  struct buffer_head *bh, char *which)
1317 {
1318 	if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319 	    !B_IS_IN_TREE(bh)) {
1320 		reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1321 		return 1;
1322 	}
1323 	return 0;
1324 }
1325 
1326 static int check_before_balancing(struct tree_balance *tb)
1327 {
1328 	int retval = 0;
1329 
1330 	if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1331 		reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332 			       "occurred based on cur_tb not being null at "
1333 			       "this point in code. do_balance cannot properly "
1334 			       "handle concurrent tree accesses on a same "
1335 			       "mount point.");
1336 	}
1337 
1338 	/* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339 	   prepped all of these for us). */
1340 	if (tb->lnum[0]) {
1341 		retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342 		retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343 		retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1344 		check_leaf(tb->L[0]);
1345 	}
1346 	if (tb->rnum[0]) {
1347 		retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348 		retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349 		retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1350 		check_leaf(tb->R[0]);
1351 	}
1352 	retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353 					"S[0]");
1354 	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1355 
1356 	return retval;
1357 }
1358 
1359 static void check_after_balance_leaf(struct tree_balance *tb)
1360 {
1361 	if (tb->lnum[0]) {
1362 		if (B_FREE_SPACE(tb->L[0]) !=
1363 		    MAX_CHILD_SIZE(tb->L[0]) -
1364 		    dc_size(B_N_CHILD
1365 			    (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366 			print_cur_tb("12221");
1367 			reiserfs_panic(tb->tb_sb, "PAP-12355",
1368 				       "shift to left was incorrect");
1369 		}
1370 	}
1371 	if (tb->rnum[0]) {
1372 		if (B_FREE_SPACE(tb->R[0]) !=
1373 		    MAX_CHILD_SIZE(tb->R[0]) -
1374 		    dc_size(B_N_CHILD
1375 			    (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376 			print_cur_tb("12222");
1377 			reiserfs_panic(tb->tb_sb, "PAP-12360",
1378 				       "shift to right was incorrect");
1379 		}
1380 	}
1381 	if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382 	    (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383 	     (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384 	      dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385 				PATH_H_POSITION(tb->tb_path, 1)))))) {
1386 		int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387 		int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388 			     dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389 					       PATH_H_POSITION(tb->tb_path,
1390 							       1))));
1391 		print_cur_tb("12223");
1392 		reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1393 				 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394 				 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395 				 left,
1396 				 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397 				 PATH_H_PBUFFER(tb->tb_path, 1),
1398 				 PATH_H_POSITION(tb->tb_path, 1),
1399 				 dc_size(B_N_CHILD
1400 					 (PATH_H_PBUFFER(tb->tb_path, 1),
1401 					  PATH_H_POSITION(tb->tb_path, 1))),
1402 				 right);
1403 		reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1404 	}
1405 }
1406 
1407 static void check_leaf_level(struct tree_balance *tb)
1408 {
1409 	check_leaf(tb->L[0]);
1410 	check_leaf(tb->R[0]);
1411 	check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1412 }
1413 
1414 static void check_internal_levels(struct tree_balance *tb)
1415 {
1416 	int h;
1417 
1418 	/* check all internal nodes */
1419 	for (h = 1; tb->insert_size[h]; h++) {
1420 		check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421 				    "BAD BUFFER ON PATH");
1422 		if (tb->lnum[h])
1423 			check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424 		if (tb->rnum[h])
1425 			check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1426 	}
1427 
1428 }
1429 
1430 #endif
1431 
1432 /* Now we have all of the buffers that must be used in balancing of
1433    the tree.  We rely on the assumption that schedule() will not occur
1434    while do_balance works. ( Only interrupt handlers are acceptable.)
1435    We balance the tree according to the analysis made before this,
1436    using buffers already obtained.  For SMP support it will someday be
1437    necessary to add ordered locking of tb. */
1438 
1439 /* Some interesting rules of balancing:
1440 
1441    we delete a maximum of two nodes per level per balancing: we never
1442    delete R, when we delete two of three nodes L, S, R then we move
1443    them into R.
1444 
1445    we only delete L if we are deleting two nodes, if we delete only
1446    one node we delete S
1447 
1448    if we shift leaves then we shift as much as we can: this is a
1449    deliberate policy of extremism in node packing which results in
1450    higher average utilization after repeated random balance operations
1451    at the cost of more memory copies and more balancing as a result of
1452    small insertions to full nodes.
1453 
1454    if we shift internal nodes we try to evenly balance the node
1455    utilization, with consequent less balancing at the cost of lower
1456    utilization.
1457 
1458    one could argue that the policy for directories in leaves should be
1459    that of internal nodes, but we will wait until another day to
1460    evaluate this....  It would be nice to someday measure and prove
1461    these assumptions as to what is optimal....
1462 
1463 */
1464 
1465 static inline void do_balance_starts(struct tree_balance *tb)
1466 {
1467 	/* use print_cur_tb() to see initial state of struct
1468 	   tree_balance */
1469 
1470 	/* store_print_tb (tb); */
1471 
1472 	/* do not delete, just comment it out */
1473 /*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1474 	     "check");*/
1475 	RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476 #ifdef CONFIG_REISERFS_CHECK
1477 	REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1478 #endif
1479 }
1480 
1481 static inline void do_balance_completed(struct tree_balance *tb)
1482 {
1483 
1484 #ifdef CONFIG_REISERFS_CHECK
1485 	check_leaf_level(tb);
1486 	check_internal_levels(tb);
1487 	REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488 #endif
1489 
1490 	/* reiserfs_free_block is no longer schedule safe.  So, we need to
1491 	 ** put the buffers we want freed on the thrown list during do_balance,
1492 	 ** and then free them now
1493 	 */
1494 
1495 	REISERFS_SB(tb->tb_sb)->s_do_balance++;
1496 
1497 	/* release all nodes hold to perform the balancing */
1498 	unfix_nodes(tb);
1499 
1500 	free_thrown(tb);
1501 }
1502 
1503 void do_balance(struct tree_balance *tb,	/* tree_balance structure */
1504 		struct item_head *ih,	/* item header of inserted item */
1505 		const char *body,	/* body  of inserted item or bytes to paste */
1506 		int flag)
1507 {				/* i - insert, d - delete
1508 				   c - cut, p - paste
1509 
1510 				   Cut means delete part of an item
1511 				   (includes removing an entry from a
1512 				   directory).
1513 
1514 				   Delete means delete whole item.
1515 
1516 				   Insert means add a new item into the
1517 				   tree.
1518 
1519 				   Paste means to append to the end of an
1520 				   existing file or to insert a directory
1521 				   entry.  */
1522 	int child_pos,		/* position of a child node in its parent */
1523 	 h;			/* level of the tree being processed */
1524 	struct item_head insert_key[2];	/* in our processing of one level
1525 					   we sometimes determine what
1526 					   must be inserted into the next
1527 					   higher level.  This insertion
1528 					   consists of a key or two keys
1529 					   and their corresponding
1530 					   pointers */
1531 	struct buffer_head *insert_ptr[2];	/* inserted node-ptrs for the next
1532 						   level */
1533 
1534 	tb->tb_mode = flag;
1535 	tb->need_balance_dirty = 0;
1536 
1537 	if (FILESYSTEM_CHANGED_TB(tb)) {
1538 		reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539 			       "changed");
1540 	}
1541 	/* if we have no real work to do  */
1542 	if (!tb->insert_size[0]) {
1543 		reiserfs_warning(tb->tb_sb, "PAP-12350",
1544 				 "insert_size == 0, mode == %c", flag);
1545 		unfix_nodes(tb);
1546 		return;
1547 	}
1548 
1549 	atomic_inc(&(fs_generation(tb->tb_sb)));
1550 	do_balance_starts(tb);
1551 
1552 	/* balance leaf returns 0 except if combining L R and S into
1553 	   one node.  see balance_internal() for explanation of this
1554 	   line of code. */
1555 	child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556 	    balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557 
1558 #ifdef CONFIG_REISERFS_CHECK
1559 	check_after_balance_leaf(tb);
1560 #endif
1561 
1562 	/* Balance internal level of the tree. */
1563 	for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564 		child_pos =
1565 		    balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1566 
1567 	do_balance_completed(tb);
1568 
1569 }
1570