1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 /* Now we have all buffers that must be used in balancing of the tree */ 6 /* Further calculations can not cause schedule(), and thus the buffer */ 7 /* tree will be stable until the balancing will be finished */ 8 /* balance the tree according to the analysis made before, */ 9 /* and using buffers obtained after all above. */ 10 11 /** 12 ** balance_leaf_when_delete 13 ** balance_leaf 14 ** do_balance 15 ** 16 **/ 17 18 #include <asm/uaccess.h> 19 #include <linux/time.h> 20 #include "reiserfs.h" 21 #include <linux/buffer_head.h> 22 #include <linux/kernel.h> 23 24 static inline void buffer_info_init_left(struct tree_balance *tb, 25 struct buffer_info *bi) 26 { 27 bi->tb = tb; 28 bi->bi_bh = tb->L[0]; 29 bi->bi_parent = tb->FL[0]; 30 bi->bi_position = get_left_neighbor_position(tb, 0); 31 } 32 33 static inline void buffer_info_init_right(struct tree_balance *tb, 34 struct buffer_info *bi) 35 { 36 bi->tb = tb; 37 bi->bi_bh = tb->R[0]; 38 bi->bi_parent = tb->FR[0]; 39 bi->bi_position = get_right_neighbor_position(tb, 0); 40 } 41 42 static inline void buffer_info_init_tbS0(struct tree_balance *tb, 43 struct buffer_info *bi) 44 { 45 bi->tb = tb; 46 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 47 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 48 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); 49 } 50 51 static inline void buffer_info_init_bh(struct tree_balance *tb, 52 struct buffer_info *bi, 53 struct buffer_head *bh) 54 { 55 bi->tb = tb; 56 bi->bi_bh = bh; 57 bi->bi_parent = NULL; 58 bi->bi_position = 0; 59 } 60 61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 62 struct buffer_head *bh, int flag) 63 { 64 journal_mark_dirty(tb->transaction_handle, 65 tb->transaction_handle->t_super, bh); 66 } 67 68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 70 71 /* summary: 72 if deleting something ( tb->insert_size[0] < 0 ) 73 return(balance_leaf_when_delete()); (flag d handled here) 74 else 75 if lnum is larger than 0 we put items into the left node 76 if rnum is larger than 0 we put items into the right node 77 if snum1 is larger than 0 we put items into the new node s1 78 if snum2 is larger than 0 we put items into the new node s2 79 Note that all *num* count new items being created. 80 81 It would be easier to read balance_leaf() if each of these summary 82 lines was a separate procedure rather than being inlined. I think 83 that there are many passages here and in balance_leaf_when_delete() in 84 which two calls to one procedure can replace two passages, and it 85 might save cache space and improve software maintenance costs to do so. 86 87 Vladimir made the perceptive comment that we should offload most of 88 the decision making in this function into fix_nodes/check_balance, and 89 then create some sort of structure in tb that says what actions should 90 be performed by do_balance. 91 92 -Hans */ 93 94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0 95 * 96 * lnum, rnum can have values >= -1 97 * -1 means that the neighbor must be joined with S 98 * 0 means that nothing should be done with the neighbor 99 * >0 means to shift entirely or partly the specified number of items to the neighbor 100 */ 101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 102 { 103 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 104 int item_pos = PATH_LAST_POSITION(tb->tb_path); 105 int pos_in_item = tb->tb_path->pos_in_item; 106 struct buffer_info bi; 107 int n; 108 struct item_head *ih; 109 110 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 111 "vs- 12000: level: wrong FR %z", tb->FR[0]); 112 RFALSE(tb->blknum[0] > 1, 113 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 114 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 115 "PAP-12010: tree can not be empty"); 116 117 ih = B_N_PITEM_HEAD(tbS0, item_pos); 118 buffer_info_init_tbS0(tb, &bi); 119 120 /* Delete or truncate the item */ 121 122 switch (flag) { 123 case M_DELETE: /* delete item in S[0] */ 124 125 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 126 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 127 -tb->insert_size[0], ih); 128 129 leaf_delete_items(&bi, 0, item_pos, 1, -1); 130 131 if (!item_pos && tb->CFL[0]) { 132 if (B_NR_ITEMS(tbS0)) { 133 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 134 0); 135 } else { 136 if (!PATH_H_POSITION(tb->tb_path, 1)) 137 replace_key(tb, tb->CFL[0], tb->lkey[0], 138 PATH_H_PPARENT(tb->tb_path, 139 0), 0); 140 } 141 } 142 143 RFALSE(!item_pos && !tb->CFL[0], 144 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 145 tb->L[0]); 146 147 break; 148 149 case M_CUT:{ /* cut item in S[0] */ 150 if (is_direntry_le_ih(ih)) { 151 152 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 153 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 154 tb->insert_size[0] = -1; 155 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 156 -tb->insert_size[0]); 157 158 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 159 "PAP-12030: can not change delimiting key. CFL[0]=%p", 160 tb->CFL[0]); 161 162 if (!item_pos && !pos_in_item && tb->CFL[0]) { 163 replace_key(tb, tb->CFL[0], tb->lkey[0], 164 tbS0, 0); 165 } 166 } else { 167 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 168 -tb->insert_size[0]); 169 170 RFALSE(!ih_item_len(ih), 171 "PAP-12035: cut must leave non-zero dynamic length of item"); 172 } 173 break; 174 } 175 176 default: 177 print_cur_tb("12040"); 178 reiserfs_panic(tb->tb_sb, "PAP-12040", 179 "unexpected mode: %s(%d)", 180 (flag == 181 M_PASTE) ? "PASTE" : ((flag == 182 M_INSERT) ? "INSERT" : 183 "UNKNOWN"), flag); 184 } 185 186 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 187 n = B_NR_ITEMS(tbS0); 188 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 189 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 190 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 191 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 192 /* all contents of all the 3 buffers will be in L[0] */ 193 if (PATH_H_POSITION(tb->tb_path, 1) == 0 194 && 1 < B_NR_ITEMS(tb->FR[0])) 195 replace_key(tb, tb->CFL[0], 196 tb->lkey[0], 197 tb->FR[0], 1); 198 199 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 200 -1, NULL); 201 leaf_move_items(LEAF_FROM_R_TO_L, tb, 202 B_NR_ITEMS(tb->R[0]), 203 -1, NULL); 204 205 reiserfs_invalidate_buffer(tb, tbS0); 206 reiserfs_invalidate_buffer(tb, 207 tb->R[0]); 208 209 return 0; 210 } 211 /* all contents of all the 3 buffers will be in R[0] */ 212 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 213 NULL); 214 leaf_move_items(LEAF_FROM_L_TO_R, tb, 215 B_NR_ITEMS(tb->L[0]), -1, NULL); 216 217 /* right_delimiting_key is correct in R[0] */ 218 replace_key(tb, tb->CFR[0], tb->rkey[0], 219 tb->R[0], 0); 220 221 reiserfs_invalidate_buffer(tb, tbS0); 222 reiserfs_invalidate_buffer(tb, tb->L[0]); 223 224 return -1; 225 } 226 227 RFALSE(tb->rnum[0] != 0, 228 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 229 /* all contents of L[0] and S[0] will be in L[0] */ 230 leaf_shift_left(tb, n, -1); 231 232 reiserfs_invalidate_buffer(tb, tbS0); 233 234 return 0; 235 } 236 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 237 238 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 239 (tb->lnum[0] + tb->rnum[0] > n + 1), 240 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 241 tb->rnum[0], tb->lnum[0], n); 242 RFALSE((tb->lnum[0] + tb->rnum[0] == n) && 243 (tb->lbytes != -1 || tb->rbytes != -1), 244 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 245 tb->rbytes, tb->lbytes); 246 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && 247 (tb->lbytes < 1 || tb->rbytes != -1), 248 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 249 tb->rbytes, tb->lbytes); 250 251 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 252 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 253 254 reiserfs_invalidate_buffer(tb, tbS0); 255 256 return 0; 257 } 258 259 if (tb->rnum[0] == -1) { 260 /* all contents of R[0] and S[0] will be in R[0] */ 261 leaf_shift_right(tb, n, -1); 262 reiserfs_invalidate_buffer(tb, tbS0); 263 return 0; 264 } 265 266 RFALSE(tb->rnum[0], 267 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 268 return 0; 269 } 270 271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 272 const char *body, /* body of inserted item or bytes to paste */ 273 int flag, /* i - insert, d - delete, c - cut, p - paste 274 (see comment to do_balance) */ 275 struct item_head *insert_key, /* in our processing of one level we sometimes determine what 276 must be inserted into the next higher level. This insertion 277 consists of a key or two keys and their corresponding 278 pointers */ 279 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ 280 ) 281 { 282 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 283 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 284 of the affected item */ 285 struct buffer_info bi; 286 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 287 int snum[2]; /* number of items that will be placed 288 into S_new (includes partially shifted 289 items) */ 290 int sbytes[2]; /* if an item is partially shifted into S_new then 291 if it is a directory item 292 it is the number of entries from the item that are shifted into S_new 293 else 294 it is the number of bytes from the item that are shifted into S_new 295 */ 296 int n, i; 297 int ret_val; 298 int pos_in_item; 299 int zeros_num; 300 301 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 302 303 /* Make balance in case insert_size[0] < 0 */ 304 if (tb->insert_size[0] < 0) 305 return balance_leaf_when_delete(tb, flag); 306 307 zeros_num = 0; 308 if (flag == M_INSERT && !body) 309 zeros_num = ih_item_len(ih); 310 311 pos_in_item = tb->tb_path->pos_in_item; 312 /* for indirect item pos_in_item is measured in unformatted node 313 pointers. Recalculate to bytes */ 314 if (flag != M_INSERT 315 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) 316 pos_in_item *= UNFM_P_SIZE; 317 318 if (tb->lnum[0] > 0) { 319 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 320 if (item_pos < tb->lnum[0]) { 321 /* new item or it part falls to L[0], shift it too */ 322 n = B_NR_ITEMS(tb->L[0]); 323 324 switch (flag) { 325 case M_INSERT: /* insert item into L[0] */ 326 327 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { 328 /* part of new item falls into L[0] */ 329 int new_item_len; 330 int version; 331 332 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1); 333 334 /* Calculate item length to insert to S[0] */ 335 new_item_len = ih_item_len(ih) - tb->lbytes; 336 /* Calculate and check item length to insert to L[0] */ 337 put_ih_item_len(ih, ih_item_len(ih) - new_item_len); 338 339 RFALSE(ih_item_len(ih) <= 0, 340 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 341 ih_item_len(ih)); 342 343 /* Insert new item into L[0] */ 344 buffer_info_init_left(tb, &bi); 345 leaf_insert_into_buf(&bi, 346 n + item_pos - ret_val, ih, body, 347 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num); 348 349 version = ih_version(ih); 350 351 /* Calculate key component, item length and body to insert into S[0] */ 352 set_le_ih_k_offset(ih, le_ih_k_offset(ih) + 353 (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); 354 355 put_ih_item_len(ih, new_item_len); 356 if (tb->lbytes > zeros_num) { 357 body += (tb->lbytes - zeros_num); 358 zeros_num = 0; 359 } else 360 zeros_num -= tb->lbytes; 361 362 RFALSE(ih_item_len(ih) <= 0, 363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 364 ih_item_len(ih)); 365 } else { 366 /* new item in whole falls into L[0] */ 367 /* Shift lnum[0]-1 items to L[0] */ 368 ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes); 369 /* Insert new item into L[0] */ 370 buffer_info_init_left(tb, &bi); 371 leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num); 372 tb->insert_size[0] = 0; 373 zeros_num = 0; 374 } 375 break; 376 377 case M_PASTE: /* append item in L[0] */ 378 379 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) { 380 /* we must shift the part of the appended item */ 381 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { 382 383 RFALSE(zeros_num, 384 "PAP-12090: invalid parameter in case of a directory"); 385 /* directory item */ 386 if (tb->lbytes > pos_in_item) { 387 /* new directory entry falls into L[0] */ 388 struct item_head *pasted; 389 int l_pos_in_item = pos_in_item; 390 391 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 392 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1); 393 if (ret_val && !item_pos) { 394 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1); 395 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1); 396 } 397 398 /* Append given directory entry to directory item */ 399 buffer_info_init_left(tb, &bi); 400 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num); 401 402 /* previous string prepared space for pasting new entry, following string pastes this entry */ 403 404 /* when we have merge directory item, pos_in_item has been changed too */ 405 406 /* paste new directory entry. 1 is entry number */ 407 leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item, 408 1, (struct reiserfs_de_head *) body, 409 body + DEH_SIZE, tb->insert_size[0]); 410 tb->insert_size[0] = 0; 411 } else { 412 /* new directory item doesn't fall into L[0] */ 413 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 414 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 415 } 416 /* Calculate new position to append in item body */ 417 pos_in_item -= tb->lbytes; 418 } else { 419 /* regular object */ 420 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes); 421 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)), 422 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 423 ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item); 424 425 if (tb->lbytes >= pos_in_item) { 426 /* appended item will be in L[0] in whole */ 427 int l_n; 428 429 /* this bytes number must be appended to the last item of L[h] */ 430 l_n = tb->lbytes - pos_in_item; 431 432 /* Calculate new insert_size[0] */ 433 tb->insert_size[0] -= l_n; 434 435 RFALSE(tb->insert_size[0] <= 0, 436 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 437 tb->insert_size[0]); 438 ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len 439 (B_N_PITEM_HEAD(tbS0, item_pos))); 440 /* Append to body of item in L[0] */ 441 buffer_info_init_left(tb, &bi); 442 leaf_paste_in_buffer 443 (&bi, n + item_pos - ret_val, ih_item_len 444 (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)), 445 l_n, body, 446 zeros_num > l_n ? l_n : zeros_num); 447 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ 448 { 449 int version; 450 int temp_l = l_n; 451 452 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)), 453 "PAP-12106: item length must be 0"); 454 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY 455 (tb->L[0], n + item_pos - ret_val)), 456 "PAP-12107: items must be of the same file"); 457 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { 458 temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT); 459 } 460 /* update key of first item in S0 */ 461 version = ih_version(B_N_PITEM_HEAD(tbS0, 0)); 462 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0), 463 le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l); 464 /* update left delimiting key */ 465 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 466 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l); 467 } 468 469 /* Calculate new body, position in item and insert_size[0] */ 470 if (l_n > zeros_num) { 471 body += (l_n - zeros_num); 472 zeros_num = 0; 473 } else 474 zeros_num -= l_n; 475 pos_in_item = 0; 476 477 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1)) 478 || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size) 479 || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size), 480 "PAP-12120: item must be merge-able with left neighboring item"); 481 } else { /* only part of the appended item will be in L[0] */ 482 483 /* Calculate position in item for append in S[0] */ 484 pos_in_item -= tb->lbytes; 485 486 RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item); 487 488 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 489 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 490 } 491 } 492 } else { /* appended item will be in L[0] in whole */ 493 494 struct item_head *pasted; 495 496 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ 497 /* then increment pos_in_item by the size of the last item in L[0] */ 498 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1); 499 if (is_direntry_le_ih(pasted)) 500 pos_in_item += ih_entry_count(pasted); 501 else 502 pos_in_item += ih_item_len(pasted); 503 } 504 505 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 506 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 507 /* Append to body of item in L[0] */ 508 buffer_info_init_left(tb, &bi); 509 leaf_paste_in_buffer(&bi, n + item_pos - ret_val, 510 pos_in_item, 511 tb->insert_size[0], 512 body, zeros_num); 513 514 /* if appended item is directory, paste entry */ 515 pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val); 516 if (is_direntry_le_ih(pasted)) 517 leaf_paste_entries(&bi, n + item_pos - ret_val, 518 pos_in_item, 1, 519 (struct reiserfs_de_head *) body, 520 body + DEH_SIZE, 521 tb->insert_size[0]); 522 /* if appended item is indirect item, put unformatted node into un list */ 523 if (is_indirect_le_ih(pasted)) 524 set_ih_free_space(pasted, 0); 525 tb->insert_size[0] = 0; 526 zeros_num = 0; 527 } 528 break; 529 default: /* cases d and t */ 530 reiserfs_panic(tb->tb_sb, "PAP-12130", 531 "lnum > 0: unexpected mode: " 532 " %s(%d)", 533 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 534 } 535 } else { 536 /* new item doesn't fall into L[0] */ 537 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 538 } 539 } 540 541 /* tb->lnum[0] > 0 */ 542 /* Calculate new item position */ 543 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 544 545 if (tb->rnum[0] > 0) { 546 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 547 n = B_NR_ITEMS(tbS0); 548 switch (flag) { 549 550 case M_INSERT: /* insert item */ 551 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ 552 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ 553 loff_t old_key_comp, old_len, r_zeros_number; 554 const char *r_body; 555 int version; 556 loff_t offset; 557 558 leaf_shift_right(tb, tb->rnum[0] - 1, -1); 559 560 version = ih_version(ih); 561 /* Remember key component and item length */ 562 old_key_comp = le_ih_k_offset(ih); 563 old_len = ih_item_len(ih); 564 565 /* Calculate key component and item length to insert into R[0] */ 566 offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0)); 567 set_le_ih_k_offset(ih, offset); 568 put_ih_item_len(ih, tb->rbytes); 569 /* Insert part of the item into R[0] */ 570 buffer_info_init_right(tb, &bi); 571 if ((old_len - tb->rbytes) > zeros_num) { 572 r_zeros_number = 0; 573 r_body = body + (old_len - tb->rbytes) - zeros_num; 574 } else { 575 r_body = body; 576 r_zeros_number = zeros_num - (old_len - tb->rbytes); 577 zeros_num -= r_zeros_number; 578 } 579 580 leaf_insert_into_buf(&bi, 0, ih, r_body, 581 r_zeros_number); 582 583 /* Replace right delimiting key by first key in R[0] */ 584 replace_key(tb, tb->CFR[0], tb->rkey[0], 585 tb->R[0], 0); 586 587 /* Calculate key component and item length to insert into S[0] */ 588 set_le_ih_k_offset(ih, old_key_comp); 589 put_ih_item_len(ih, old_len - tb->rbytes); 590 591 tb->insert_size[0] -= tb->rbytes; 592 593 } else { /* whole new item falls into R[0] */ 594 595 /* Shift rnum[0]-1 items to R[0] */ 596 ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes); 597 /* Insert new item into R[0] */ 598 buffer_info_init_right(tb, &bi); 599 leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1, 600 ih, body, zeros_num); 601 602 if (item_pos - n + tb->rnum[0] - 1 == 0) { 603 replace_key(tb, tb->CFR[0], 604 tb->rkey[0], 605 tb->R[0], 0); 606 607 } 608 zeros_num = tb->insert_size[0] = 0; 609 } 610 } else { /* new item or part of it doesn't fall into R[0] */ 611 612 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 613 } 614 break; 615 616 case M_PASTE: /* append item */ 617 618 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 619 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 620 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ 621 int entry_count; 622 623 RFALSE(zeros_num, 624 "PAP-12145: invalid parameter in case of a directory"); 625 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD 626 (tbS0, item_pos)); 627 if (entry_count - tb->rbytes < 628 pos_in_item) 629 /* new directory entry falls into R[0] */ 630 { 631 int paste_entry_position; 632 633 RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0], 634 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 635 tb->rbytes, entry_count); 636 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 637 leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1); 638 /* Paste given directory entry to directory item */ 639 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1; 640 buffer_info_init_right(tb, &bi); 641 leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num); 642 /* paste entry */ 643 leaf_paste_entries(&bi, 0, paste_entry_position, 1, 644 (struct reiserfs_de_head *) body, 645 body + DEH_SIZE, tb->insert_size[0]); 646 647 if (paste_entry_position == 0) { 648 /* change delimiting keys */ 649 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0); 650 } 651 652 tb->insert_size[0] = 0; 653 pos_in_item++; 654 } else { /* new directory entry doesn't fall into R[0] */ 655 656 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 657 } 658 } else { /* regular object */ 659 660 int n_shift, n_rem, r_zeros_number; 661 const char *r_body; 662 663 /* Calculate number of bytes which must be shifted from appended item */ 664 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0) 665 n_shift = 0; 666 667 RFALSE(pos_in_item != ih_item_len 668 (B_N_PITEM_HEAD(tbS0, item_pos)), 669 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 670 pos_in_item, ih_item_len 671 (B_N_PITEM_HEAD(tbS0, item_pos))); 672 673 leaf_shift_right(tb, tb->rnum[0], n_shift); 674 /* Calculate number of bytes which must remain in body after appending to R[0] */ 675 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0) 676 n_rem = 0; 677 678 { 679 int version; 680 unsigned long temp_rem = n_rem; 681 682 version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0)); 683 if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) { 684 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT); 685 } 686 set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0), 687 le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem); 688 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]), 689 le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem); 690 } 691 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 692 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 693 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0); 694 695 /* Append part of body into R[0] */ 696 buffer_info_init_right(tb, &bi); 697 if (n_rem > zeros_num) { 698 r_zeros_number = 0; 699 r_body = body + n_rem - zeros_num; 700 } else { 701 r_body = body; 702 r_zeros_number = zeros_num - n_rem; 703 zeros_num -= r_zeros_number; 704 } 705 706 leaf_paste_in_buffer(&bi, 0, n_shift, 707 tb->insert_size[0] - n_rem, 708 r_body, r_zeros_number); 709 710 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) { 711 #if 0 712 RFALSE(n_rem, 713 "PAP-12160: paste more than one unformatted node pointer"); 714 #endif 715 set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0); 716 } 717 tb->insert_size[0] = n_rem; 718 if (!n_rem) 719 pos_in_item++; 720 } 721 } else { /* pasted item in whole falls into R[0] */ 722 723 struct item_head *pasted; 724 725 ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 726 /* append item in R[0] */ 727 if (pos_in_item >= 0) { 728 buffer_info_init_right(tb, &bi); 729 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item, 730 tb->insert_size[0], body, zeros_num); 731 } 732 733 /* paste new entry, if item is directory item */ 734 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]); 735 if (is_direntry_le_ih(pasted) && pos_in_item >= 0) { 736 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0], 737 pos_in_item, 1, 738 (struct reiserfs_de_head *) body, 739 body + DEH_SIZE, tb->insert_size[0]); 740 if (!pos_in_item) { 741 742 RFALSE(item_pos - n + tb->rnum[0], 743 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 744 745 /* update delimiting keys */ 746 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0); 747 } 748 } 749 750 if (is_indirect_le_ih(pasted)) 751 set_ih_free_space(pasted, 0); 752 zeros_num = tb->insert_size[0] = 0; 753 } 754 } else { /* new item doesn't fall into R[0] */ 755 756 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 757 } 758 break; 759 default: /* cases d and t */ 760 reiserfs_panic(tb->tb_sb, "PAP-12175", 761 "rnum > 0: unexpected mode: %s(%d)", 762 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 763 } 764 765 } 766 767 /* tb->rnum[0] > 0 */ 768 RFALSE(tb->blknum[0] > 3, 769 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]); 770 RFALSE(tb->blknum[0] < 0, 771 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]); 772 773 /* if while adding to a node we discover that it is possible to split 774 it in two, and merge the left part into the left neighbor and the 775 right part into the right neighbor, eliminating the node */ 776 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 777 778 RFALSE(!tb->lnum[0] || !tb->rnum[0], 779 "PAP-12190: lnum and rnum must not be zero"); 780 /* if insertion was done before 0-th position in R[0], right 781 delimiting key of the tb->L[0]'s and left delimiting key are 782 not set correctly */ 783 if (tb->CFL[0]) { 784 if (!tb->CFR[0]) 785 reiserfs_panic(tb->tb_sb, "vs-12195", 786 "CFR not initialized"); 787 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 788 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 789 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 790 } 791 792 reiserfs_invalidate_buffer(tb, tbS0); 793 return 0; 794 } 795 796 /* Fill new nodes that appear in place of S[0] */ 797 798 /* I am told that this copying is because we need an array to enable 799 the looping code. -Hans */ 800 snum[0] = tb->s1num, snum[1] = tb->s2num; 801 sbytes[0] = tb->s1bytes; 802 sbytes[1] = tb->s2bytes; 803 for (i = tb->blknum[0] - 2; i >= 0; i--) { 804 805 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 806 snum[i]); 807 808 /* here we shift from S to S_new nodes */ 809 810 S_new[i] = get_FEB(tb); 811 812 /* initialized block type and tree level */ 813 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 814 815 n = B_NR_ITEMS(tbS0); 816 817 switch (flag) { 818 case M_INSERT: /* insert item */ 819 820 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 821 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 822 int old_key_comp, old_len, r_zeros_number; 823 const char *r_body; 824 int version; 825 826 /* Move snum[i]-1 items from S[0] to S_new[i] */ 827 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 828 snum[i] - 1, -1, 829 S_new[i]); 830 /* Remember key component and item length */ 831 version = ih_version(ih); 832 old_key_comp = le_ih_k_offset(ih); 833 old_len = ih_item_len(ih); 834 835 /* Calculate key component and item length to insert into S_new[i] */ 836 set_le_ih_k_offset(ih, le_ih_k_offset(ih) + 837 ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0))); 838 839 put_ih_item_len(ih, sbytes[i]); 840 841 /* Insert part of the item into S_new[i] before 0-th item */ 842 buffer_info_init_bh(tb, &bi, S_new[i]); 843 844 if ((old_len - sbytes[i]) > zeros_num) { 845 r_zeros_number = 0; 846 r_body = body + (old_len - sbytes[i]) - zeros_num; 847 } else { 848 r_body = body; 849 r_zeros_number = zeros_num - (old_len - sbytes[i]); 850 zeros_num -= r_zeros_number; 851 } 852 853 leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number); 854 855 /* Calculate key component and item length to insert into S[i] */ 856 set_le_ih_k_offset(ih, old_key_comp); 857 put_ih_item_len(ih, old_len - sbytes[i]); 858 tb->insert_size[0] -= sbytes[i]; 859 } else { /* whole new item falls into S_new[i] */ 860 861 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 862 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 863 snum[i] - 1, sbytes[i], S_new[i]); 864 865 /* Insert new item into S_new[i] */ 866 buffer_info_init_bh(tb, &bi, S_new[i]); 867 leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1, 868 ih, body, zeros_num); 869 870 zeros_num = tb->insert_size[0] = 0; 871 } 872 } 873 874 else { /* new item or it part don't falls into S_new[i] */ 875 876 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 877 snum[i], sbytes[i], S_new[i]); 878 } 879 break; 880 881 case M_PASTE: /* append item */ 882 883 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 884 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 885 struct item_head *aux_ih; 886 887 RFALSE(ih, "PAP-12210: ih must be 0"); 888 889 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); 890 if (is_direntry_le_ih(aux_ih)) { 891 /* we append to directory item */ 892 893 int entry_count; 894 895 entry_count = ih_entry_count(aux_ih); 896 897 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) { 898 /* new directory entry falls into S_new[i] */ 899 900 RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0"); 901 RFALSE(sbytes[i] - 1 >= entry_count, 902 "PAP-12220: there are no so much entries (%d), only %d", 903 sbytes[i] - 1, entry_count); 904 905 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 906 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]); 907 /* Paste given directory entry to directory item */ 908 buffer_info_init_bh(tb, &bi, S_new[i]); 909 leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 910 tb->insert_size[0], body, zeros_num); 911 /* paste new directory entry */ 912 leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1, 913 (struct reiserfs_de_head *) body, 914 body + DEH_SIZE, tb->insert_size[0]); 915 tb->insert_size[0] = 0; 916 pos_in_item++; 917 } else { /* new directory entry doesn't fall into S_new[i] */ 918 leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]); 919 } 920 } else { /* regular object */ 921 922 int n_shift, n_rem, r_zeros_number; 923 const char *r_body; 924 925 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0, 926 "PAP-12225: item too short or insert_size <= 0"); 927 928 /* Calculate number of bytes which must be shifted from appended item */ 929 n_shift = sbytes[i] - tb->insert_size[0]; 930 if (n_shift < 0) 931 n_shift = 0; 932 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]); 933 934 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 935 n_rem = tb->insert_size[0] - sbytes[i]; 936 if (n_rem < 0) 937 n_rem = 0; 938 /* Append part of body into S_new[0] */ 939 buffer_info_init_bh(tb, &bi, S_new[i]); 940 if (n_rem > zeros_num) { 941 r_zeros_number = 0; 942 r_body = body + n_rem - zeros_num; 943 } else { 944 r_body = body; 945 r_zeros_number = zeros_num - n_rem; 946 zeros_num -= r_zeros_number; 947 } 948 949 leaf_paste_in_buffer(&bi, 0, n_shift, 950 tb->insert_size[0] - n_rem, 951 r_body, r_zeros_number); 952 { 953 struct item_head *tmp; 954 955 tmp = B_N_PITEM_HEAD(S_new[i], 0); 956 if (is_indirect_le_ih 957 (tmp)) { 958 set_ih_free_space(tmp, 0); 959 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT))); 960 } else { 961 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem); 962 } 963 } 964 965 tb->insert_size[0] = n_rem; 966 if (!n_rem) 967 pos_in_item++; 968 } 969 } else 970 /* item falls wholly into S_new[i] */ 971 { 972 int leaf_mi; 973 struct item_head *pasted; 974 975 #ifdef CONFIG_REISERFS_CHECK 976 struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos); 977 978 if (!is_direntry_le_ih(ih_check) 979 && (pos_in_item != ih_item_len(ih_check) 980 || tb->insert_size[0] <= 0)) 981 reiserfs_panic(tb->tb_sb, 982 "PAP-12235", 983 "pos_in_item " 984 "must be equal " 985 "to ih_item_len"); 986 #endif /* CONFIG_REISERFS_CHECK */ 987 988 leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW, 989 tb, snum[i], 990 sbytes[i], 991 S_new[i]); 992 993 RFALSE(leaf_mi, 994 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 995 leaf_mi); 996 997 /* paste into item */ 998 buffer_info_init_bh(tb, &bi, S_new[i]); 999 leaf_paste_in_buffer(&bi, 1000 item_pos - n + snum[i], 1001 pos_in_item, 1002 tb->insert_size[0], 1003 body, zeros_num); 1004 1005 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]); 1006 if (is_direntry_le_ih(pasted)) { 1007 leaf_paste_entries(&bi, 1008 item_pos - n + snum[i], 1009 pos_in_item, 1, 1010 (struct reiserfs_de_head *)body, 1011 body + DEH_SIZE, 1012 tb->insert_size[0] 1013 ); 1014 } 1015 1016 /* if we paste to indirect item update ih_free_space */ 1017 if (is_indirect_le_ih(pasted)) 1018 set_ih_free_space(pasted, 0); 1019 zeros_num = tb->insert_size[0] = 0; 1020 } 1021 } 1022 1023 else { /* pasted item doesn't fall into S_new[i] */ 1024 1025 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1026 snum[i], sbytes[i], S_new[i]); 1027 } 1028 break; 1029 default: /* cases d and t */ 1030 reiserfs_panic(tb->tb_sb, "PAP-12245", 1031 "blknum > 2: unexpected mode: %s(%d)", 1032 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag); 1033 } 1034 1035 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); 1036 insert_ptr[i] = S_new[i]; 1037 1038 RFALSE(!buffer_journaled(S_new[i]) 1039 || buffer_journal_dirty(S_new[i]) 1040 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1041 i, S_new[i]); 1042 } 1043 1044 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1045 affected item which remains in S */ 1046 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ 1047 1048 switch (flag) { 1049 case M_INSERT: /* insert item into S[0] */ 1050 buffer_info_init_tbS0(tb, &bi); 1051 leaf_insert_into_buf(&bi, item_pos, ih, body, 1052 zeros_num); 1053 1054 /* If we insert the first key change the delimiting key */ 1055 if (item_pos == 0) { 1056 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1057 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); 1058 } 1059 break; 1060 1061 case M_PASTE:{ /* append item in S[0] */ 1062 struct item_head *pasted; 1063 1064 pasted = B_N_PITEM_HEAD(tbS0, item_pos); 1065 /* when directory, may be new entry already pasted */ 1066 if (is_direntry_le_ih(pasted)) { 1067 if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) { 1068 1069 RFALSE(!tb->insert_size[0], 1070 "PAP-12260: insert_size is 0 already"); 1071 1072 /* prepare space */ 1073 buffer_info_init_tbS0(tb, &bi); 1074 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, 1075 tb->insert_size[0], body, 1076 zeros_num); 1077 1078 /* paste entry */ 1079 leaf_paste_entries(&bi, item_pos, pos_in_item, 1, 1080 (struct reiserfs_de_head *)body, 1081 body + DEH_SIZE, 1082 tb->insert_size[0]); 1083 if (!item_pos && !pos_in_item) { 1084 RFALSE(!tb->CFL[0] || !tb->L[0], 1085 "PAP-12270: CFL[0]/L[0] must be specified"); 1086 if (tb->CFL[0]) 1087 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0); 1088 } 1089 tb->insert_size[0] = 0; 1090 } 1091 } else { /* regular object */ 1092 if (pos_in_item == ih_item_len(pasted)) { 1093 1094 RFALSE(tb->insert_size[0] <= 0, 1095 "PAP-12275: insert size must not be %d", 1096 tb->insert_size[0]); 1097 buffer_info_init_tbS0(tb, &bi); 1098 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, 1099 tb->insert_size[0], body, zeros_num); 1100 1101 if (is_indirect_le_ih(pasted)) { 1102 #if 0 1103 RFALSE(tb-> 1104 insert_size[0] != 1105 UNFM_P_SIZE, 1106 "PAP-12280: insert_size for indirect item must be %d, not %d", 1107 UNFM_P_SIZE, 1108 tb-> 1109 insert_size[0]); 1110 #endif 1111 set_ih_free_space(pasted, 0); 1112 } 1113 tb->insert_size[0] = 0; 1114 } 1115 #ifdef CONFIG_REISERFS_CHECK 1116 else { 1117 if (tb->insert_size[0]) { 1118 print_cur_tb("12285"); 1119 reiserfs_panic(tb->tb_sb, 1120 "PAP-12285", 1121 "insert_size " 1122 "must be 0 " 1123 "(%d)", 1124 tb->insert_size[0]); 1125 } 1126 } 1127 #endif /* CONFIG_REISERFS_CHECK */ 1128 1129 } 1130 } /* case M_PASTE: */ 1131 } 1132 } 1133 #ifdef CONFIG_REISERFS_CHECK 1134 if (flag == M_PASTE && tb->insert_size[0]) { 1135 print_cur_tb("12290"); 1136 reiserfs_panic(tb->tb_sb, 1137 "PAP-12290", "insert_size is still not 0 (%d)", 1138 tb->insert_size[0]); 1139 } 1140 #endif /* CONFIG_REISERFS_CHECK */ 1141 return 0; 1142 } /* Leaf level of the tree is balanced (end of balance_leaf) */ 1143 1144 /* Make empty node */ 1145 void make_empty_node(struct buffer_info *bi) 1146 { 1147 struct block_head *blkh; 1148 1149 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1150 1151 blkh = B_BLK_HEAD(bi->bi_bh); 1152 set_blkh_nr_item(blkh, 0); 1153 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); 1154 1155 if (bi->bi_parent) 1156 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1157 } 1158 1159 /* Get first empty buffer */ 1160 struct buffer_head *get_FEB(struct tree_balance *tb) 1161 { 1162 int i; 1163 struct buffer_info bi; 1164 1165 for (i = 0; i < MAX_FEB_SIZE; i++) 1166 if (tb->FEB[i] != NULL) 1167 break; 1168 1169 if (i == MAX_FEB_SIZE) 1170 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); 1171 1172 buffer_info_init_bh(tb, &bi, tb->FEB[i]); 1173 make_empty_node(&bi); 1174 set_buffer_uptodate(tb->FEB[i]); 1175 tb->used[i] = tb->FEB[i]; 1176 tb->FEB[i] = NULL; 1177 1178 return tb->used[i]; 1179 } 1180 1181 /* This is now used because reiserfs_free_block has to be able to 1182 ** schedule. 1183 */ 1184 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1185 { 1186 int i; 1187 1188 if (buffer_dirty(bh)) 1189 reiserfs_warning(tb->tb_sb, "reiserfs-12320", 1190 "called with dirty buffer"); 1191 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1192 if (!tb->thrown[i]) { 1193 tb->thrown[i] = bh; 1194 get_bh(bh); /* free_thrown puts this */ 1195 return; 1196 } 1197 reiserfs_warning(tb->tb_sb, "reiserfs-12321", 1198 "too many thrown buffers"); 1199 } 1200 1201 static void free_thrown(struct tree_balance *tb) 1202 { 1203 int i; 1204 b_blocknr_t blocknr; 1205 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { 1206 if (tb->thrown[i]) { 1207 blocknr = tb->thrown[i]->b_blocknr; 1208 if (buffer_dirty(tb->thrown[i])) 1209 reiserfs_warning(tb->tb_sb, "reiserfs-12322", 1210 "called with dirty buffer %d", 1211 blocknr); 1212 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1213 reiserfs_free_block(tb->transaction_handle, NULL, 1214 blocknr, 0); 1215 } 1216 } 1217 } 1218 1219 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) 1220 { 1221 struct block_head *blkh; 1222 blkh = B_BLK_HEAD(bh); 1223 set_blkh_level(blkh, FREE_LEVEL); 1224 set_blkh_nr_item(blkh, 0); 1225 1226 clear_buffer_dirty(bh); 1227 store_thrown(tb, bh); 1228 } 1229 1230 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1231 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, 1232 struct buffer_head *src, int n_src) 1233 { 1234 1235 RFALSE(dest == NULL || src == NULL, 1236 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1237 src, dest); 1238 RFALSE(!B_IS_KEYS_LEVEL(dest), 1239 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1240 dest); 1241 RFALSE(n_dest < 0 || n_src < 0, 1242 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1243 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1244 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1245 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1246 1247 if (B_IS_ITEMS_LEVEL(src)) 1248 /* source buffer contains leaf node */ 1249 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), 1250 KEY_SIZE); 1251 else 1252 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), 1253 KEY_SIZE); 1254 1255 do_balance_mark_internal_dirty(tb, dest, 0); 1256 } 1257 1258 int get_left_neighbor_position(struct tree_balance *tb, int h) 1259 { 1260 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1261 1262 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, 1263 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1264 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1265 1266 if (Sh_position == 0) 1267 return B_NR_ITEMS(tb->FL[h]); 1268 else 1269 return Sh_position - 1; 1270 } 1271 1272 int get_right_neighbor_position(struct tree_balance *tb, int h) 1273 { 1274 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1275 1276 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, 1277 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1278 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1279 1280 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) 1281 return 0; 1282 else 1283 return Sh_position + 1; 1284 } 1285 1286 #ifdef CONFIG_REISERFS_CHECK 1287 1288 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); 1289 static void check_internal_node(struct super_block *s, struct buffer_head *bh, 1290 char *mes) 1291 { 1292 struct disk_child *dc; 1293 int i; 1294 1295 RFALSE(!bh, "PAP-12336: bh == 0"); 1296 1297 if (!bh || !B_IS_IN_TREE(bh)) 1298 return; 1299 1300 RFALSE(!buffer_dirty(bh) && 1301 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1302 "PAP-12337: buffer (%b) must be dirty", bh); 1303 dc = B_N_CHILD(bh, 0); 1304 1305 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1306 if (!is_reusable(s, dc_block_number(dc), 1)) { 1307 print_cur_tb(mes); 1308 reiserfs_panic(s, "PAP-12338", 1309 "invalid child pointer %y in %b", 1310 dc, bh); 1311 } 1312 } 1313 } 1314 1315 static int locked_or_not_in_tree(struct tree_balance *tb, 1316 struct buffer_head *bh, char *which) 1317 { 1318 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1319 !B_IS_IN_TREE(bh)) { 1320 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); 1321 return 1; 1322 } 1323 return 0; 1324 } 1325 1326 static int check_before_balancing(struct tree_balance *tb) 1327 { 1328 int retval = 0; 1329 1330 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 1331 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1332 "occurred based on cur_tb not being null at " 1333 "this point in code. do_balance cannot properly " 1334 "handle concurrent tree accesses on a same " 1335 "mount point."); 1336 } 1337 1338 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1339 prepped all of these for us). */ 1340 if (tb->lnum[0]) { 1341 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1342 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1343 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); 1344 check_leaf(tb->L[0]); 1345 } 1346 if (tb->rnum[0]) { 1347 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); 1348 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); 1349 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); 1350 check_leaf(tb->R[0]); 1351 } 1352 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), 1353 "S[0]"); 1354 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1355 1356 return retval; 1357 } 1358 1359 static void check_after_balance_leaf(struct tree_balance *tb) 1360 { 1361 if (tb->lnum[0]) { 1362 if (B_FREE_SPACE(tb->L[0]) != 1363 MAX_CHILD_SIZE(tb->L[0]) - 1364 dc_size(B_N_CHILD 1365 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1366 print_cur_tb("12221"); 1367 reiserfs_panic(tb->tb_sb, "PAP-12355", 1368 "shift to left was incorrect"); 1369 } 1370 } 1371 if (tb->rnum[0]) { 1372 if (B_FREE_SPACE(tb->R[0]) != 1373 MAX_CHILD_SIZE(tb->R[0]) - 1374 dc_size(B_N_CHILD 1375 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1376 print_cur_tb("12222"); 1377 reiserfs_panic(tb->tb_sb, "PAP-12360", 1378 "shift to right was incorrect"); 1379 } 1380 } 1381 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1382 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != 1383 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1384 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1385 PATH_H_POSITION(tb->tb_path, 1)))))) { 1386 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); 1387 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1388 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1389 PATH_H_POSITION(tb->tb_path, 1390 1)))); 1391 print_cur_tb("12223"); 1392 reiserfs_warning(tb->tb_sb, "reiserfs-12363", 1393 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1394 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1395 left, 1396 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), 1397 PATH_H_PBUFFER(tb->tb_path, 1), 1398 PATH_H_POSITION(tb->tb_path, 1), 1399 dc_size(B_N_CHILD 1400 (PATH_H_PBUFFER(tb->tb_path, 1), 1401 PATH_H_POSITION(tb->tb_path, 1))), 1402 right); 1403 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); 1404 } 1405 } 1406 1407 static void check_leaf_level(struct tree_balance *tb) 1408 { 1409 check_leaf(tb->L[0]); 1410 check_leaf(tb->R[0]); 1411 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1412 } 1413 1414 static void check_internal_levels(struct tree_balance *tb) 1415 { 1416 int h; 1417 1418 /* check all internal nodes */ 1419 for (h = 1; tb->insert_size[h]; h++) { 1420 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), 1421 "BAD BUFFER ON PATH"); 1422 if (tb->lnum[h]) 1423 check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); 1424 if (tb->rnum[h]) 1425 check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); 1426 } 1427 1428 } 1429 1430 #endif 1431 1432 /* Now we have all of the buffers that must be used in balancing of 1433 the tree. We rely on the assumption that schedule() will not occur 1434 while do_balance works. ( Only interrupt handlers are acceptable.) 1435 We balance the tree according to the analysis made before this, 1436 using buffers already obtained. For SMP support it will someday be 1437 necessary to add ordered locking of tb. */ 1438 1439 /* Some interesting rules of balancing: 1440 1441 we delete a maximum of two nodes per level per balancing: we never 1442 delete R, when we delete two of three nodes L, S, R then we move 1443 them into R. 1444 1445 we only delete L if we are deleting two nodes, if we delete only 1446 one node we delete S 1447 1448 if we shift leaves then we shift as much as we can: this is a 1449 deliberate policy of extremism in node packing which results in 1450 higher average utilization after repeated random balance operations 1451 at the cost of more memory copies and more balancing as a result of 1452 small insertions to full nodes. 1453 1454 if we shift internal nodes we try to evenly balance the node 1455 utilization, with consequent less balancing at the cost of lower 1456 utilization. 1457 1458 one could argue that the policy for directories in leaves should be 1459 that of internal nodes, but we will wait until another day to 1460 evaluate this.... It would be nice to someday measure and prove 1461 these assumptions as to what is optimal.... 1462 1463 */ 1464 1465 static inline void do_balance_starts(struct tree_balance *tb) 1466 { 1467 /* use print_cur_tb() to see initial state of struct 1468 tree_balance */ 1469 1470 /* store_print_tb (tb); */ 1471 1472 /* do not delete, just comment it out */ 1473 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1474 "check");*/ 1475 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1476 #ifdef CONFIG_REISERFS_CHECK 1477 REISERFS_SB(tb->tb_sb)->cur_tb = tb; 1478 #endif 1479 } 1480 1481 static inline void do_balance_completed(struct tree_balance *tb) 1482 { 1483 1484 #ifdef CONFIG_REISERFS_CHECK 1485 check_leaf_level(tb); 1486 check_internal_levels(tb); 1487 REISERFS_SB(tb->tb_sb)->cur_tb = NULL; 1488 #endif 1489 1490 /* reiserfs_free_block is no longer schedule safe. So, we need to 1491 ** put the buffers we want freed on the thrown list during do_balance, 1492 ** and then free them now 1493 */ 1494 1495 REISERFS_SB(tb->tb_sb)->s_do_balance++; 1496 1497 /* release all nodes hold to perform the balancing */ 1498 unfix_nodes(tb); 1499 1500 free_thrown(tb); 1501 } 1502 1503 void do_balance(struct tree_balance *tb, /* tree_balance structure */ 1504 struct item_head *ih, /* item header of inserted item */ 1505 const char *body, /* body of inserted item or bytes to paste */ 1506 int flag) 1507 { /* i - insert, d - delete 1508 c - cut, p - paste 1509 1510 Cut means delete part of an item 1511 (includes removing an entry from a 1512 directory). 1513 1514 Delete means delete whole item. 1515 1516 Insert means add a new item into the 1517 tree. 1518 1519 Paste means to append to the end of an 1520 existing file or to insert a directory 1521 entry. */ 1522 int child_pos, /* position of a child node in its parent */ 1523 h; /* level of the tree being processed */ 1524 struct item_head insert_key[2]; /* in our processing of one level 1525 we sometimes determine what 1526 must be inserted into the next 1527 higher level. This insertion 1528 consists of a key or two keys 1529 and their corresponding 1530 pointers */ 1531 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 1532 level */ 1533 1534 tb->tb_mode = flag; 1535 tb->need_balance_dirty = 0; 1536 1537 if (FILESYSTEM_CHANGED_TB(tb)) { 1538 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " 1539 "changed"); 1540 } 1541 /* if we have no real work to do */ 1542 if (!tb->insert_size[0]) { 1543 reiserfs_warning(tb->tb_sb, "PAP-12350", 1544 "insert_size == 0, mode == %c", flag); 1545 unfix_nodes(tb); 1546 return; 1547 } 1548 1549 atomic_inc(&(fs_generation(tb->tb_sb))); 1550 do_balance_starts(tb); 1551 1552 /* balance leaf returns 0 except if combining L R and S into 1553 one node. see balance_internal() for explanation of this 1554 line of code. */ 1555 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 1556 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 1557 1558 #ifdef CONFIG_REISERFS_CHECK 1559 check_after_balance_leaf(tb); 1560 #endif 1561 1562 /* Balance internal level of the tree. */ 1563 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 1564 child_pos = 1565 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 1566 1567 do_balance_completed(tb); 1568 1569 } 1570