1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 /* Now we have all buffers that must be used in balancing of the tree */ 6 /* Further calculations can not cause schedule(), and thus the buffer */ 7 /* tree will be stable until the balancing will be finished */ 8 /* balance the tree according to the analysis made before, */ 9 /* and using buffers obtained after all above. */ 10 11 /** 12 ** balance_leaf_when_delete 13 ** balance_leaf 14 ** do_balance 15 ** 16 **/ 17 18 #include <asm/uaccess.h> 19 #include <linux/time.h> 20 #include <linux/reiserfs_fs.h> 21 #include <linux/buffer_head.h> 22 #include <linux/kernel.h> 23 24 #ifdef CONFIG_REISERFS_CHECK 25 26 struct tree_balance *cur_tb = NULL; /* detects whether more than one 27 copy of tb exists as a means 28 of checking whether schedule 29 is interrupting do_balance */ 30 #endif 31 32 static inline void buffer_info_init_left(struct tree_balance *tb, 33 struct buffer_info *bi) 34 { 35 bi->tb = tb; 36 bi->bi_bh = tb->L[0]; 37 bi->bi_parent = tb->FL[0]; 38 bi->bi_position = get_left_neighbor_position(tb, 0); 39 } 40 41 static inline void buffer_info_init_right(struct tree_balance *tb, 42 struct buffer_info *bi) 43 { 44 bi->tb = tb; 45 bi->bi_bh = tb->R[0]; 46 bi->bi_parent = tb->FR[0]; 47 bi->bi_position = get_right_neighbor_position(tb, 0); 48 } 49 50 static inline void buffer_info_init_tbS0(struct tree_balance *tb, 51 struct buffer_info *bi) 52 { 53 bi->tb = tb; 54 bi->bi_bh = PATH_PLAST_BUFFER(tb->tb_path); 55 bi->bi_parent = PATH_H_PPARENT(tb->tb_path, 0); 56 bi->bi_position = PATH_H_POSITION(tb->tb_path, 1); 57 } 58 59 static inline void buffer_info_init_bh(struct tree_balance *tb, 60 struct buffer_info *bi, 61 struct buffer_head *bh) 62 { 63 bi->tb = tb; 64 bi->bi_bh = bh; 65 bi->bi_parent = NULL; 66 bi->bi_position = 0; 67 } 68 69 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb, 70 struct buffer_head *bh, int flag) 71 { 72 journal_mark_dirty(tb->transaction_handle, 73 tb->transaction_handle->t_super, bh); 74 } 75 76 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty 77 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty 78 79 /* summary: 80 if deleting something ( tb->insert_size[0] < 0 ) 81 return(balance_leaf_when_delete()); (flag d handled here) 82 else 83 if lnum is larger than 0 we put items into the left node 84 if rnum is larger than 0 we put items into the right node 85 if snum1 is larger than 0 we put items into the new node s1 86 if snum2 is larger than 0 we put items into the new node s2 87 Note that all *num* count new items being created. 88 89 It would be easier to read balance_leaf() if each of these summary 90 lines was a separate procedure rather than being inlined. I think 91 that there are many passages here and in balance_leaf_when_delete() in 92 which two calls to one procedure can replace two passages, and it 93 might save cache space and improve software maintenance costs to do so. 94 95 Vladimir made the perceptive comment that we should offload most of 96 the decision making in this function into fix_nodes/check_balance, and 97 then create some sort of structure in tb that says what actions should 98 be performed by do_balance. 99 100 -Hans */ 101 102 /* Balance leaf node in case of delete or cut: insert_size[0] < 0 103 * 104 * lnum, rnum can have values >= -1 105 * -1 means that the neighbor must be joined with S 106 * 0 means that nothing should be done with the neighbor 107 * >0 means to shift entirely or partly the specified number of items to the neighbor 108 */ 109 static int balance_leaf_when_delete(struct tree_balance *tb, int flag) 110 { 111 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 112 int item_pos = PATH_LAST_POSITION(tb->tb_path); 113 int pos_in_item = tb->tb_path->pos_in_item; 114 struct buffer_info bi; 115 int n; 116 struct item_head *ih; 117 118 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1, 119 "vs- 12000: level: wrong FR %z", tb->FR[0]); 120 RFALSE(tb->blknum[0] > 1, 121 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]); 122 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0), 123 "PAP-12010: tree can not be empty"); 124 125 ih = B_N_PITEM_HEAD(tbS0, item_pos); 126 buffer_info_init_tbS0(tb, &bi); 127 128 /* Delete or truncate the item */ 129 130 switch (flag) { 131 case M_DELETE: /* delete item in S[0] */ 132 133 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0], 134 "vs-12013: mode Delete, insert size %d, ih to be deleted %h", 135 -tb->insert_size[0], ih); 136 137 leaf_delete_items(&bi, 0, item_pos, 1, -1); 138 139 if (!item_pos && tb->CFL[0]) { 140 if (B_NR_ITEMS(tbS0)) { 141 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 142 0); 143 } else { 144 if (!PATH_H_POSITION(tb->tb_path, 1)) 145 replace_key(tb, tb->CFL[0], tb->lkey[0], 146 PATH_H_PPARENT(tb->tb_path, 147 0), 0); 148 } 149 } 150 151 RFALSE(!item_pos && !tb->CFL[0], 152 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], 153 tb->L[0]); 154 155 break; 156 157 case M_CUT:{ /* cut item in S[0] */ 158 if (is_direntry_le_ih(ih)) { 159 160 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */ 161 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */ 162 tb->insert_size[0] = -1; 163 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 164 -tb->insert_size[0]); 165 166 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0], 167 "PAP-12030: can not change delimiting key. CFL[0]=%p", 168 tb->CFL[0]); 169 170 if (!item_pos && !pos_in_item && tb->CFL[0]) { 171 replace_key(tb, tb->CFL[0], tb->lkey[0], 172 tbS0, 0); 173 } 174 } else { 175 leaf_cut_from_buffer(&bi, item_pos, pos_in_item, 176 -tb->insert_size[0]); 177 178 RFALSE(!ih_item_len(ih), 179 "PAP-12035: cut must leave non-zero dynamic length of item"); 180 } 181 break; 182 } 183 184 default: 185 print_cur_tb("12040"); 186 reiserfs_panic(tb->tb_sb, "PAP-12040", 187 "unexpected mode: %s(%d)", 188 (flag == 189 M_PASTE) ? "PASTE" : ((flag == 190 M_INSERT) ? "INSERT" : 191 "UNKNOWN"), flag); 192 } 193 194 /* the rule is that no shifting occurs unless by shifting a node can be freed */ 195 n = B_NR_ITEMS(tbS0); 196 if (tb->lnum[0]) { /* L[0] takes part in balancing */ 197 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */ 198 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */ 199 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) { 200 /* all contents of all the 3 buffers will be in L[0] */ 201 if (PATH_H_POSITION(tb->tb_path, 1) == 0 202 && 1 < B_NR_ITEMS(tb->FR[0])) 203 replace_key(tb, tb->CFL[0], 204 tb->lkey[0], 205 tb->FR[0], 1); 206 207 leaf_move_items(LEAF_FROM_S_TO_L, tb, n, 208 -1, NULL); 209 leaf_move_items(LEAF_FROM_R_TO_L, tb, 210 B_NR_ITEMS(tb->R[0]), 211 -1, NULL); 212 213 reiserfs_invalidate_buffer(tb, tbS0); 214 reiserfs_invalidate_buffer(tb, 215 tb->R[0]); 216 217 return 0; 218 } 219 /* all contents of all the 3 buffers will be in R[0] */ 220 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1, 221 NULL); 222 leaf_move_items(LEAF_FROM_L_TO_R, tb, 223 B_NR_ITEMS(tb->L[0]), -1, NULL); 224 225 /* right_delimiting_key is correct in R[0] */ 226 replace_key(tb, tb->CFR[0], tb->rkey[0], 227 tb->R[0], 0); 228 229 reiserfs_invalidate_buffer(tb, tbS0); 230 reiserfs_invalidate_buffer(tb, tb->L[0]); 231 232 return -1; 233 } 234 235 RFALSE(tb->rnum[0] != 0, 236 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]); 237 /* all contents of L[0] and S[0] will be in L[0] */ 238 leaf_shift_left(tb, n, -1); 239 240 reiserfs_invalidate_buffer(tb, tbS0); 241 242 return 0; 243 } 244 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */ 245 246 RFALSE((tb->lnum[0] + tb->rnum[0] < n) || 247 (tb->lnum[0] + tb->rnum[0] > n + 1), 248 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent", 249 tb->rnum[0], tb->lnum[0], n); 250 RFALSE((tb->lnum[0] + tb->rnum[0] == n) && 251 (tb->lbytes != -1 || tb->rbytes != -1), 252 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 253 tb->rbytes, tb->lbytes); 254 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) && 255 (tb->lbytes < 1 || tb->rbytes != -1), 256 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 257 tb->rbytes, tb->lbytes); 258 259 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 260 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 261 262 reiserfs_invalidate_buffer(tb, tbS0); 263 264 return 0; 265 } 266 267 if (tb->rnum[0] == -1) { 268 /* all contents of R[0] and S[0] will be in R[0] */ 269 leaf_shift_right(tb, n, -1); 270 reiserfs_invalidate_buffer(tb, tbS0); 271 return 0; 272 } 273 274 RFALSE(tb->rnum[0], 275 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]); 276 return 0; 277 } 278 279 static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */ 280 const char *body, /* body of inserted item or bytes to paste */ 281 int flag, /* i - insert, d - delete, c - cut, p - paste 282 (see comment to do_balance) */ 283 struct item_head *insert_key, /* in our processing of one level we sometimes determine what 284 must be inserted into the next higher level. This insertion 285 consists of a key or two keys and their corresponding 286 pointers */ 287 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */ 288 ) 289 { 290 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 291 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0] 292 of the affected item */ 293 struct buffer_info bi; 294 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */ 295 int snum[2]; /* number of items that will be placed 296 into S_new (includes partially shifted 297 items) */ 298 int sbytes[2]; /* if an item is partially shifted into S_new then 299 if it is a directory item 300 it is the number of entries from the item that are shifted into S_new 301 else 302 it is the number of bytes from the item that are shifted into S_new 303 */ 304 int n, i; 305 int ret_val; 306 int pos_in_item; 307 int zeros_num; 308 309 PROC_INFO_INC(tb->tb_sb, balance_at[0]); 310 311 /* Make balance in case insert_size[0] < 0 */ 312 if (tb->insert_size[0] < 0) 313 return balance_leaf_when_delete(tb, flag); 314 315 zeros_num = 0; 316 if (flag == M_INSERT && !body) 317 zeros_num = ih_item_len(ih); 318 319 pos_in_item = tb->tb_path->pos_in_item; 320 /* for indirect item pos_in_item is measured in unformatted node 321 pointers. Recalculate to bytes */ 322 if (flag != M_INSERT 323 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) 324 pos_in_item *= UNFM_P_SIZE; 325 326 if (tb->lnum[0] > 0) { 327 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */ 328 if (item_pos < tb->lnum[0]) { 329 /* new item or it part falls to L[0], shift it too */ 330 n = B_NR_ITEMS(tb->L[0]); 331 332 switch (flag) { 333 case M_INSERT: /* insert item into L[0] */ 334 335 if (item_pos == tb->lnum[0] - 1 336 && tb->lbytes != -1) { 337 /* part of new item falls into L[0] */ 338 int new_item_len; 339 int version; 340 341 ret_val = 342 leaf_shift_left(tb, tb->lnum[0] - 1, 343 -1); 344 345 /* Calculate item length to insert to S[0] */ 346 new_item_len = 347 ih_item_len(ih) - tb->lbytes; 348 /* Calculate and check item length to insert to L[0] */ 349 put_ih_item_len(ih, 350 ih_item_len(ih) - 351 new_item_len); 352 353 RFALSE(ih_item_len(ih) <= 0, 354 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d", 355 ih_item_len(ih)); 356 357 /* Insert new item into L[0] */ 358 buffer_info_init_left(tb, &bi); 359 leaf_insert_into_buf(&bi, 360 n + item_pos - 361 ret_val, ih, body, 362 zeros_num > 363 ih_item_len(ih) ? 364 ih_item_len(ih) : 365 zeros_num); 366 367 version = ih_version(ih); 368 369 /* Calculate key component, item length and body to insert into S[0] */ 370 set_le_ih_k_offset(ih, 371 le_ih_k_offset(ih) + 372 (tb-> 373 lbytes << 374 (is_indirect_le_ih 375 (ih) ? tb->tb_sb-> 376 s_blocksize_bits - 377 UNFM_P_SHIFT : 378 0))); 379 380 put_ih_item_len(ih, new_item_len); 381 if (tb->lbytes > zeros_num) { 382 body += 383 (tb->lbytes - zeros_num); 384 zeros_num = 0; 385 } else 386 zeros_num -= tb->lbytes; 387 388 RFALSE(ih_item_len(ih) <= 0, 389 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d", 390 ih_item_len(ih)); 391 } else { 392 /* new item in whole falls into L[0] */ 393 /* Shift lnum[0]-1 items to L[0] */ 394 ret_val = 395 leaf_shift_left(tb, tb->lnum[0] - 1, 396 tb->lbytes); 397 /* Insert new item into L[0] */ 398 buffer_info_init_left(tb, &bi); 399 leaf_insert_into_buf(&bi, 400 n + item_pos - 401 ret_val, ih, body, 402 zeros_num); 403 tb->insert_size[0] = 0; 404 zeros_num = 0; 405 } 406 break; 407 408 case M_PASTE: /* append item in L[0] */ 409 410 if (item_pos == tb->lnum[0] - 1 411 && tb->lbytes != -1) { 412 /* we must shift the part of the appended item */ 413 if (is_direntry_le_ih 414 (B_N_PITEM_HEAD(tbS0, item_pos))) { 415 416 RFALSE(zeros_num, 417 "PAP-12090: invalid parameter in case of a directory"); 418 /* directory item */ 419 if (tb->lbytes > pos_in_item) { 420 /* new directory entry falls into L[0] */ 421 struct item_head 422 *pasted; 423 int l_pos_in_item = 424 pos_in_item; 425 426 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */ 427 ret_val = 428 leaf_shift_left(tb, 429 tb-> 430 lnum 431 [0], 432 tb-> 433 lbytes 434 - 435 1); 436 if (ret_val 437 && !item_pos) { 438 pasted = 439 B_N_PITEM_HEAD 440 (tb->L[0], 441 B_NR_ITEMS 442 (tb-> 443 L[0]) - 444 1); 445 l_pos_in_item += 446 I_ENTRY_COUNT 447 (pasted) - 448 (tb-> 449 lbytes - 450 1); 451 } 452 453 /* Append given directory entry to directory item */ 454 buffer_info_init_left(tb, &bi); 455 leaf_paste_in_buffer 456 (&bi, 457 n + item_pos - 458 ret_val, 459 l_pos_in_item, 460 tb->insert_size[0], 461 body, zeros_num); 462 463 /* previous string prepared space for pasting new entry, following string pastes this entry */ 464 465 /* when we have merge directory item, pos_in_item has been changed too */ 466 467 /* paste new directory entry. 1 is entry number */ 468 leaf_paste_entries(&bi, 469 n + 470 item_pos 471 - 472 ret_val, 473 l_pos_in_item, 474 1, 475 (struct 476 reiserfs_de_head 477 *) 478 body, 479 body 480 + 481 DEH_SIZE, 482 tb-> 483 insert_size 484 [0] 485 ); 486 tb->insert_size[0] = 0; 487 } else { 488 /* new directory item doesn't fall into L[0] */ 489 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */ 490 leaf_shift_left(tb, 491 tb-> 492 lnum[0], 493 tb-> 494 lbytes); 495 } 496 /* Calculate new position to append in item body */ 497 pos_in_item -= tb->lbytes; 498 } else { 499 /* regular object */ 500 RFALSE(tb->lbytes <= 0, 501 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", 502 tb->lbytes); 503 RFALSE(pos_in_item != 504 ih_item_len 505 (B_N_PITEM_HEAD 506 (tbS0, item_pos)), 507 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d", 508 ih_item_len 509 (B_N_PITEM_HEAD 510 (tbS0, item_pos)), 511 pos_in_item); 512 513 if (tb->lbytes >= pos_in_item) { 514 /* appended item will be in L[0] in whole */ 515 int l_n; 516 517 /* this bytes number must be appended to the last item of L[h] */ 518 l_n = 519 tb->lbytes - 520 pos_in_item; 521 522 /* Calculate new insert_size[0] */ 523 tb->insert_size[0] -= 524 l_n; 525 526 RFALSE(tb-> 527 insert_size[0] <= 528 0, 529 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d", 530 tb-> 531 insert_size[0]); 532 ret_val = 533 leaf_shift_left(tb, 534 tb-> 535 lnum 536 [0], 537 ih_item_len 538 (B_N_PITEM_HEAD 539 (tbS0, 540 item_pos))); 541 /* Append to body of item in L[0] */ 542 buffer_info_init_left(tb, &bi); 543 leaf_paste_in_buffer 544 (&bi, 545 n + item_pos - 546 ret_val, 547 ih_item_len 548 (B_N_PITEM_HEAD 549 (tb->L[0], 550 n + item_pos - 551 ret_val)), l_n, 552 body, 553 zeros_num > 554 l_n ? l_n : 555 zeros_num); 556 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */ 557 { 558 int version; 559 int temp_l = 560 l_n; 561 562 RFALSE 563 (ih_item_len 564 (B_N_PITEM_HEAD 565 (tbS0, 566 0)), 567 "PAP-12106: item length must be 0"); 568 RFALSE 569 (comp_short_le_keys 570 (B_N_PKEY 571 (tbS0, 0), 572 B_N_PKEY 573 (tb->L[0], 574 n + 575 item_pos 576 - 577 ret_val)), 578 "PAP-12107: items must be of the same file"); 579 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) { 580 temp_l = 581 l_n 582 << 583 (tb-> 584 tb_sb-> 585 s_blocksize_bits 586 - 587 UNFM_P_SHIFT); 588 } 589 /* update key of first item in S0 */ 590 version = 591 ih_version 592 (B_N_PITEM_HEAD 593 (tbS0, 0)); 594 set_le_key_k_offset 595 (version, 596 B_N_PKEY 597 (tbS0, 0), 598 le_key_k_offset 599 (version, 600 B_N_PKEY 601 (tbS0, 602 0)) + 603 temp_l); 604 /* update left delimiting key */ 605 set_le_key_k_offset 606 (version, 607 B_N_PDELIM_KEY 608 (tb-> 609 CFL[0], 610 tb-> 611 lkey[0]), 612 le_key_k_offset 613 (version, 614 B_N_PDELIM_KEY 615 (tb-> 616 CFL[0], 617 tb-> 618 lkey[0])) 619 + temp_l); 620 } 621 622 /* Calculate new body, position in item and insert_size[0] */ 623 if (l_n > zeros_num) { 624 body += 625 (l_n - 626 zeros_num); 627 zeros_num = 0; 628 } else 629 zeros_num -= 630 l_n; 631 pos_in_item = 0; 632 633 RFALSE 634 (comp_short_le_keys 635 (B_N_PKEY(tbS0, 0), 636 B_N_PKEY(tb->L[0], 637 B_NR_ITEMS 638 (tb-> 639 L[0]) - 640 1)) 641 || 642 !op_is_left_mergeable 643 (B_N_PKEY(tbS0, 0), 644 tbS0->b_size) 645 || 646 !op_is_left_mergeable 647 (B_N_PDELIM_KEY 648 (tb->CFL[0], 649 tb->lkey[0]), 650 tbS0->b_size), 651 "PAP-12120: item must be merge-able with left neighboring item"); 652 } else { /* only part of the appended item will be in L[0] */ 653 654 /* Calculate position in item for append in S[0] */ 655 pos_in_item -= 656 tb->lbytes; 657 658 RFALSE(pos_in_item <= 0, 659 "PAP-12125: no place for paste. pos_in_item=%d", 660 pos_in_item); 661 662 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 663 leaf_shift_left(tb, 664 tb-> 665 lnum[0], 666 tb-> 667 lbytes); 668 } 669 } 670 } else { /* appended item will be in L[0] in whole */ 671 672 struct item_head *pasted; 673 674 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */ 675 /* then increment pos_in_item by the size of the last item in L[0] */ 676 pasted = 677 B_N_PITEM_HEAD(tb->L[0], 678 n - 1); 679 if (is_direntry_le_ih(pasted)) 680 pos_in_item += 681 ih_entry_count 682 (pasted); 683 else 684 pos_in_item += 685 ih_item_len(pasted); 686 } 687 688 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */ 689 ret_val = 690 leaf_shift_left(tb, tb->lnum[0], 691 tb->lbytes); 692 /* Append to body of item in L[0] */ 693 buffer_info_init_left(tb, &bi); 694 leaf_paste_in_buffer(&bi, 695 n + item_pos - 696 ret_val, 697 pos_in_item, 698 tb->insert_size[0], 699 body, zeros_num); 700 701 /* if appended item is directory, paste entry */ 702 pasted = 703 B_N_PITEM_HEAD(tb->L[0], 704 n + item_pos - 705 ret_val); 706 if (is_direntry_le_ih(pasted)) 707 leaf_paste_entries(&bi, 708 n + 709 item_pos - 710 ret_val, 711 pos_in_item, 712 1, 713 (struct 714 reiserfs_de_head 715 *)body, 716 body + 717 DEH_SIZE, 718 tb-> 719 insert_size 720 [0] 721 ); 722 /* if appended item is indirect item, put unformatted node into un list */ 723 if (is_indirect_le_ih(pasted)) 724 set_ih_free_space(pasted, 0); 725 tb->insert_size[0] = 0; 726 zeros_num = 0; 727 } 728 break; 729 default: /* cases d and t */ 730 reiserfs_panic(tb->tb_sb, "PAP-12130", 731 "lnum > 0: unexpected mode: " 732 " %s(%d)", 733 (flag == 734 M_DELETE) ? "DELETE" : ((flag == 735 M_CUT) 736 ? "CUT" 737 : 738 "UNKNOWN"), 739 flag); 740 } 741 } else { 742 /* new item doesn't fall into L[0] */ 743 leaf_shift_left(tb, tb->lnum[0], tb->lbytes); 744 } 745 } 746 747 /* tb->lnum[0] > 0 */ 748 /* Calculate new item position */ 749 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0)); 750 751 if (tb->rnum[0] > 0) { 752 /* shift rnum[0] items from S[0] to the right neighbor R[0] */ 753 n = B_NR_ITEMS(tbS0); 754 switch (flag) { 755 756 case M_INSERT: /* insert item */ 757 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */ 758 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */ 759 loff_t old_key_comp, old_len, 760 r_zeros_number; 761 const char *r_body; 762 int version; 763 loff_t offset; 764 765 leaf_shift_right(tb, tb->rnum[0] - 1, 766 -1); 767 768 version = ih_version(ih); 769 /* Remember key component and item length */ 770 old_key_comp = le_ih_k_offset(ih); 771 old_len = ih_item_len(ih); 772 773 /* Calculate key component and item length to insert into R[0] */ 774 offset = 775 le_ih_k_offset(ih) + 776 ((old_len - 777 tb-> 778 rbytes) << (is_indirect_le_ih(ih) 779 ? tb->tb_sb-> 780 s_blocksize_bits - 781 UNFM_P_SHIFT : 0)); 782 set_le_ih_k_offset(ih, offset); 783 put_ih_item_len(ih, tb->rbytes); 784 /* Insert part of the item into R[0] */ 785 buffer_info_init_right(tb, &bi); 786 if ((old_len - tb->rbytes) > zeros_num) { 787 r_zeros_number = 0; 788 r_body = 789 body + (old_len - 790 tb->rbytes) - 791 zeros_num; 792 } else { 793 r_body = body; 794 r_zeros_number = 795 zeros_num - (old_len - 796 tb->rbytes); 797 zeros_num -= r_zeros_number; 798 } 799 800 leaf_insert_into_buf(&bi, 0, ih, r_body, 801 r_zeros_number); 802 803 /* Replace right delimiting key by first key in R[0] */ 804 replace_key(tb, tb->CFR[0], tb->rkey[0], 805 tb->R[0], 0); 806 807 /* Calculate key component and item length to insert into S[0] */ 808 set_le_ih_k_offset(ih, old_key_comp); 809 put_ih_item_len(ih, 810 old_len - tb->rbytes); 811 812 tb->insert_size[0] -= tb->rbytes; 813 814 } else { /* whole new item falls into R[0] */ 815 816 /* Shift rnum[0]-1 items to R[0] */ 817 ret_val = 818 leaf_shift_right(tb, 819 tb->rnum[0] - 1, 820 tb->rbytes); 821 /* Insert new item into R[0] */ 822 buffer_info_init_right(tb, &bi); 823 leaf_insert_into_buf(&bi, 824 item_pos - n + 825 tb->rnum[0] - 1, 826 ih, body, 827 zeros_num); 828 829 if (item_pos - n + tb->rnum[0] - 1 == 0) { 830 replace_key(tb, tb->CFR[0], 831 tb->rkey[0], 832 tb->R[0], 0); 833 834 } 835 zeros_num = tb->insert_size[0] = 0; 836 } 837 } else { /* new item or part of it doesn't fall into R[0] */ 838 839 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 840 } 841 break; 842 843 case M_PASTE: /* append item */ 844 845 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */ 846 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */ 847 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */ 848 int entry_count; 849 850 RFALSE(zeros_num, 851 "PAP-12145: invalid parameter in case of a directory"); 852 entry_count = 853 I_ENTRY_COUNT(B_N_PITEM_HEAD 854 (tbS0, 855 item_pos)); 856 if (entry_count - tb->rbytes < 857 pos_in_item) 858 /* new directory entry falls into R[0] */ 859 { 860 int paste_entry_position; 861 862 RFALSE(tb->rbytes - 1 >= 863 entry_count 864 || !tb-> 865 insert_size[0], 866 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d", 867 tb->rbytes, 868 entry_count); 869 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */ 870 leaf_shift_right(tb, 871 tb-> 872 rnum 873 [0], 874 tb-> 875 rbytes 876 - 1); 877 /* Paste given directory entry to directory item */ 878 paste_entry_position = 879 pos_in_item - 880 entry_count + 881 tb->rbytes - 1; 882 buffer_info_init_right(tb, &bi); 883 leaf_paste_in_buffer 884 (&bi, 0, 885 paste_entry_position, 886 tb->insert_size[0], 887 body, zeros_num); 888 /* paste entry */ 889 leaf_paste_entries(&bi, 890 0, 891 paste_entry_position, 892 1, 893 (struct 894 reiserfs_de_head 895 *) 896 body, 897 body 898 + 899 DEH_SIZE, 900 tb-> 901 insert_size 902 [0] 903 ); 904 905 if (paste_entry_position 906 == 0) { 907 /* change delimiting keys */ 908 replace_key(tb, 909 tb-> 910 CFR 911 [0], 912 tb-> 913 rkey 914 [0], 915 tb-> 916 R 917 [0], 918 0); 919 } 920 921 tb->insert_size[0] = 0; 922 pos_in_item++; 923 } else { /* new directory entry doesn't fall into R[0] */ 924 925 leaf_shift_right(tb, 926 tb-> 927 rnum 928 [0], 929 tb-> 930 rbytes); 931 } 932 } else { /* regular object */ 933 934 int n_shift, n_rem, 935 r_zeros_number; 936 const char *r_body; 937 938 /* Calculate number of bytes which must be shifted from appended item */ 939 if ((n_shift = 940 tb->rbytes - 941 tb->insert_size[0]) < 0) 942 n_shift = 0; 943 944 RFALSE(pos_in_item != 945 ih_item_len 946 (B_N_PITEM_HEAD 947 (tbS0, item_pos)), 948 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d", 949 pos_in_item, 950 ih_item_len 951 (B_N_PITEM_HEAD 952 (tbS0, item_pos))); 953 954 leaf_shift_right(tb, 955 tb->rnum[0], 956 n_shift); 957 /* Calculate number of bytes which must remain in body after appending to R[0] */ 958 if ((n_rem = 959 tb->insert_size[0] - 960 tb->rbytes) < 0) 961 n_rem = 0; 962 963 { 964 int version; 965 unsigned long temp_rem = 966 n_rem; 967 968 version = 969 ih_version 970 (B_N_PITEM_HEAD 971 (tb->R[0], 0)); 972 if (is_indirect_le_key 973 (version, 974 B_N_PKEY(tb->R[0], 975 0))) { 976 temp_rem = 977 n_rem << 978 (tb->tb_sb-> 979 s_blocksize_bits 980 - 981 UNFM_P_SHIFT); 982 } 983 set_le_key_k_offset 984 (version, 985 B_N_PKEY(tb->R[0], 986 0), 987 le_key_k_offset 988 (version, 989 B_N_PKEY(tb->R[0], 990 0)) + 991 temp_rem); 992 set_le_key_k_offset 993 (version, 994 B_N_PDELIM_KEY(tb-> 995 CFR 996 [0], 997 tb-> 998 rkey 999 [0]), 1000 le_key_k_offset 1001 (version, 1002 B_N_PDELIM_KEY 1003 (tb->CFR[0], 1004 tb->rkey[0])) + 1005 temp_rem); 1006 } 1007 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; 1008 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ 1009 do_balance_mark_internal_dirty 1010 (tb, tb->CFR[0], 0); 1011 1012 /* Append part of body into R[0] */ 1013 buffer_info_init_right(tb, &bi); 1014 if (n_rem > zeros_num) { 1015 r_zeros_number = 0; 1016 r_body = 1017 body + n_rem - 1018 zeros_num; 1019 } else { 1020 r_body = body; 1021 r_zeros_number = 1022 zeros_num - n_rem; 1023 zeros_num -= 1024 r_zeros_number; 1025 } 1026 1027 leaf_paste_in_buffer(&bi, 0, 1028 n_shift, 1029 tb-> 1030 insert_size 1031 [0] - 1032 n_rem, 1033 r_body, 1034 r_zeros_number); 1035 1036 if (is_indirect_le_ih 1037 (B_N_PITEM_HEAD 1038 (tb->R[0], 0))) { 1039 #if 0 1040 RFALSE(n_rem, 1041 "PAP-12160: paste more than one unformatted node pointer"); 1042 #endif 1043 set_ih_free_space 1044 (B_N_PITEM_HEAD 1045 (tb->R[0], 0), 0); 1046 } 1047 tb->insert_size[0] = n_rem; 1048 if (!n_rem) 1049 pos_in_item++; 1050 } 1051 } else { /* pasted item in whole falls into R[0] */ 1052 1053 struct item_head *pasted; 1054 1055 ret_val = 1056 leaf_shift_right(tb, tb->rnum[0], 1057 tb->rbytes); 1058 /* append item in R[0] */ 1059 if (pos_in_item >= 0) { 1060 buffer_info_init_right(tb, &bi); 1061 leaf_paste_in_buffer(&bi, 1062 item_pos - 1063 n + 1064 tb-> 1065 rnum[0], 1066 pos_in_item, 1067 tb-> 1068 insert_size 1069 [0], body, 1070 zeros_num); 1071 } 1072 1073 /* paste new entry, if item is directory item */ 1074 pasted = 1075 B_N_PITEM_HEAD(tb->R[0], 1076 item_pos - n + 1077 tb->rnum[0]); 1078 if (is_direntry_le_ih(pasted) 1079 && pos_in_item >= 0) { 1080 leaf_paste_entries(&bi, 1081 item_pos - 1082 n + 1083 tb->rnum[0], 1084 pos_in_item, 1085 1, 1086 (struct 1087 reiserfs_de_head 1088 *)body, 1089 body + 1090 DEH_SIZE, 1091 tb-> 1092 insert_size 1093 [0] 1094 ); 1095 if (!pos_in_item) { 1096 1097 RFALSE(item_pos - n + 1098 tb->rnum[0], 1099 "PAP-12165: directory item must be first item of node when pasting is in 0th position"); 1100 1101 /* update delimiting keys */ 1102 replace_key(tb, 1103 tb->CFR[0], 1104 tb->rkey[0], 1105 tb->R[0], 1106 0); 1107 } 1108 } 1109 1110 if (is_indirect_le_ih(pasted)) 1111 set_ih_free_space(pasted, 0); 1112 zeros_num = tb->insert_size[0] = 0; 1113 } 1114 } else { /* new item doesn't fall into R[0] */ 1115 1116 leaf_shift_right(tb, tb->rnum[0], tb->rbytes); 1117 } 1118 break; 1119 default: /* cases d and t */ 1120 reiserfs_panic(tb->tb_sb, "PAP-12175", 1121 "rnum > 0: unexpected mode: %s(%d)", 1122 (flag == 1123 M_DELETE) ? "DELETE" : ((flag == 1124 M_CUT) ? "CUT" 1125 : "UNKNOWN"), 1126 flag); 1127 } 1128 1129 } 1130 1131 /* tb->rnum[0] > 0 */ 1132 RFALSE(tb->blknum[0] > 3, 1133 "PAP-12180: blknum can not be %d. It must be <= 3", 1134 tb->blknum[0]); 1135 RFALSE(tb->blknum[0] < 0, 1136 "PAP-12185: blknum can not be %d. It must be >= 0", 1137 tb->blknum[0]); 1138 1139 /* if while adding to a node we discover that it is possible to split 1140 it in two, and merge the left part into the left neighbor and the 1141 right part into the right neighbor, eliminating the node */ 1142 if (tb->blknum[0] == 0) { /* node S[0] is empty now */ 1143 1144 RFALSE(!tb->lnum[0] || !tb->rnum[0], 1145 "PAP-12190: lnum and rnum must not be zero"); 1146 /* if insertion was done before 0-th position in R[0], right 1147 delimiting key of the tb->L[0]'s and left delimiting key are 1148 not set correctly */ 1149 if (tb->CFL[0]) { 1150 if (!tb->CFR[0]) 1151 reiserfs_panic(tb->tb_sb, "vs-12195", 1152 "CFR not initialized"); 1153 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), 1154 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])); 1155 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0); 1156 } 1157 1158 reiserfs_invalidate_buffer(tb, tbS0); 1159 return 0; 1160 } 1161 1162 /* Fill new nodes that appear in place of S[0] */ 1163 1164 /* I am told that this copying is because we need an array to enable 1165 the looping code. -Hans */ 1166 snum[0] = tb->s1num, snum[1] = tb->s2num; 1167 sbytes[0] = tb->s1bytes; 1168 sbytes[1] = tb->s2bytes; 1169 for (i = tb->blknum[0] - 2; i >= 0; i--) { 1170 1171 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, 1172 snum[i]); 1173 1174 /* here we shift from S to S_new nodes */ 1175 1176 S_new[i] = get_FEB(tb); 1177 1178 /* initialized block type and tree level */ 1179 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL); 1180 1181 n = B_NR_ITEMS(tbS0); 1182 1183 switch (flag) { 1184 case M_INSERT: /* insert item */ 1185 1186 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */ 1187 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */ 1188 int old_key_comp, old_len, 1189 r_zeros_number; 1190 const char *r_body; 1191 int version; 1192 1193 /* Move snum[i]-1 items from S[0] to S_new[i] */ 1194 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1195 snum[i] - 1, -1, 1196 S_new[i]); 1197 /* Remember key component and item length */ 1198 version = ih_version(ih); 1199 old_key_comp = le_ih_k_offset(ih); 1200 old_len = ih_item_len(ih); 1201 1202 /* Calculate key component and item length to insert into S_new[i] */ 1203 set_le_ih_k_offset(ih, 1204 le_ih_k_offset(ih) + 1205 ((old_len - 1206 sbytes[i]) << 1207 (is_indirect_le_ih 1208 (ih) ? tb->tb_sb-> 1209 s_blocksize_bits - 1210 UNFM_P_SHIFT : 1211 0))); 1212 1213 put_ih_item_len(ih, sbytes[i]); 1214 1215 /* Insert part of the item into S_new[i] before 0-th item */ 1216 buffer_info_init_bh(tb, &bi, S_new[i]); 1217 1218 if ((old_len - sbytes[i]) > zeros_num) { 1219 r_zeros_number = 0; 1220 r_body = 1221 body + (old_len - 1222 sbytes[i]) - 1223 zeros_num; 1224 } else { 1225 r_body = body; 1226 r_zeros_number = 1227 zeros_num - (old_len - 1228 sbytes[i]); 1229 zeros_num -= r_zeros_number; 1230 } 1231 1232 leaf_insert_into_buf(&bi, 0, ih, r_body, 1233 r_zeros_number); 1234 1235 /* Calculate key component and item length to insert into S[i] */ 1236 set_le_ih_k_offset(ih, old_key_comp); 1237 put_ih_item_len(ih, 1238 old_len - sbytes[i]); 1239 tb->insert_size[0] -= sbytes[i]; 1240 } else { /* whole new item falls into S_new[i] */ 1241 1242 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */ 1243 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1244 snum[i] - 1, sbytes[i], 1245 S_new[i]); 1246 1247 /* Insert new item into S_new[i] */ 1248 buffer_info_init_bh(tb, &bi, S_new[i]); 1249 leaf_insert_into_buf(&bi, 1250 item_pos - n + 1251 snum[i] - 1, ih, 1252 body, zeros_num); 1253 1254 zeros_num = tb->insert_size[0] = 0; 1255 } 1256 } 1257 1258 else { /* new item or it part don't falls into S_new[i] */ 1259 1260 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1261 snum[i], sbytes[i], S_new[i]); 1262 } 1263 break; 1264 1265 case M_PASTE: /* append item */ 1266 1267 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */ 1268 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */ 1269 struct item_head *aux_ih; 1270 1271 RFALSE(ih, "PAP-12210: ih must be 0"); 1272 1273 aux_ih = B_N_PITEM_HEAD(tbS0, item_pos); 1274 if (is_direntry_le_ih(aux_ih)) { 1275 /* we append to directory item */ 1276 1277 int entry_count; 1278 1279 entry_count = 1280 ih_entry_count(aux_ih); 1281 1282 if (entry_count - sbytes[i] < 1283 pos_in_item 1284 && pos_in_item <= 1285 entry_count) { 1286 /* new directory entry falls into S_new[i] */ 1287 1288 RFALSE(!tb-> 1289 insert_size[0], 1290 "PAP-12215: insert_size is already 0"); 1291 RFALSE(sbytes[i] - 1 >= 1292 entry_count, 1293 "PAP-12220: there are no so much entries (%d), only %d", 1294 sbytes[i] - 1, 1295 entry_count); 1296 1297 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */ 1298 leaf_move_items 1299 (LEAF_FROM_S_TO_SNEW, 1300 tb, snum[i], 1301 sbytes[i] - 1, 1302 S_new[i]); 1303 /* Paste given directory entry to directory item */ 1304 buffer_info_init_bh(tb, &bi, S_new[i]); 1305 leaf_paste_in_buffer 1306 (&bi, 0, 1307 pos_in_item - 1308 entry_count + 1309 sbytes[i] - 1, 1310 tb->insert_size[0], 1311 body, zeros_num); 1312 /* paste new directory entry */ 1313 leaf_paste_entries(&bi, 1314 0, 1315 pos_in_item 1316 - 1317 entry_count 1318 + 1319 sbytes 1320 [i] - 1321 1, 1, 1322 (struct 1323 reiserfs_de_head 1324 *) 1325 body, 1326 body 1327 + 1328 DEH_SIZE, 1329 tb-> 1330 insert_size 1331 [0] 1332 ); 1333 tb->insert_size[0] = 0; 1334 pos_in_item++; 1335 } else { /* new directory entry doesn't fall into S_new[i] */ 1336 leaf_move_items 1337 (LEAF_FROM_S_TO_SNEW, 1338 tb, snum[i], 1339 sbytes[i], 1340 S_new[i]); 1341 } 1342 } else { /* regular object */ 1343 1344 int n_shift, n_rem, 1345 r_zeros_number; 1346 const char *r_body; 1347 1348 RFALSE(pos_in_item != 1349 ih_item_len 1350 (B_N_PITEM_HEAD 1351 (tbS0, item_pos)) 1352 || tb->insert_size[0] <= 1353 0, 1354 "PAP-12225: item too short or insert_size <= 0"); 1355 1356 /* Calculate number of bytes which must be shifted from appended item */ 1357 n_shift = 1358 sbytes[i] - 1359 tb->insert_size[0]; 1360 if (n_shift < 0) 1361 n_shift = 0; 1362 leaf_move_items 1363 (LEAF_FROM_S_TO_SNEW, tb, 1364 snum[i], n_shift, 1365 S_new[i]); 1366 1367 /* Calculate number of bytes which must remain in body after append to S_new[i] */ 1368 n_rem = 1369 tb->insert_size[0] - 1370 sbytes[i]; 1371 if (n_rem < 0) 1372 n_rem = 0; 1373 /* Append part of body into S_new[0] */ 1374 buffer_info_init_bh(tb, &bi, S_new[i]); 1375 if (n_rem > zeros_num) { 1376 r_zeros_number = 0; 1377 r_body = 1378 body + n_rem - 1379 zeros_num; 1380 } else { 1381 r_body = body; 1382 r_zeros_number = 1383 zeros_num - n_rem; 1384 zeros_num -= 1385 r_zeros_number; 1386 } 1387 1388 leaf_paste_in_buffer(&bi, 0, 1389 n_shift, 1390 tb-> 1391 insert_size 1392 [0] - 1393 n_rem, 1394 r_body, 1395 r_zeros_number); 1396 { 1397 struct item_head *tmp; 1398 1399 tmp = 1400 B_N_PITEM_HEAD(S_new 1401 [i], 1402 0); 1403 if (is_indirect_le_ih 1404 (tmp)) { 1405 set_ih_free_space 1406 (tmp, 0); 1407 set_le_ih_k_offset 1408 (tmp, 1409 le_ih_k_offset 1410 (tmp) + 1411 (n_rem << 1412 (tb-> 1413 tb_sb-> 1414 s_blocksize_bits 1415 - 1416 UNFM_P_SHIFT))); 1417 } else { 1418 set_le_ih_k_offset 1419 (tmp, 1420 le_ih_k_offset 1421 (tmp) + 1422 n_rem); 1423 } 1424 } 1425 1426 tb->insert_size[0] = n_rem; 1427 if (!n_rem) 1428 pos_in_item++; 1429 } 1430 } else 1431 /* item falls wholly into S_new[i] */ 1432 { 1433 int leaf_mi; 1434 struct item_head *pasted; 1435 1436 #ifdef CONFIG_REISERFS_CHECK 1437 struct item_head *ih_check = 1438 B_N_PITEM_HEAD(tbS0, item_pos); 1439 1440 if (!is_direntry_le_ih(ih_check) 1441 && (pos_in_item != ih_item_len(ih_check) 1442 || tb->insert_size[0] <= 0)) 1443 reiserfs_panic(tb->tb_sb, 1444 "PAP-12235", 1445 "pos_in_item " 1446 "must be equal " 1447 "to ih_item_len"); 1448 #endif /* CONFIG_REISERFS_CHECK */ 1449 1450 leaf_mi = 1451 leaf_move_items(LEAF_FROM_S_TO_SNEW, 1452 tb, snum[i], 1453 sbytes[i], 1454 S_new[i]); 1455 1456 RFALSE(leaf_mi, 1457 "PAP-12240: unexpected value returned by leaf_move_items (%d)", 1458 leaf_mi); 1459 1460 /* paste into item */ 1461 buffer_info_init_bh(tb, &bi, S_new[i]); 1462 leaf_paste_in_buffer(&bi, 1463 item_pos - n + 1464 snum[i], 1465 pos_in_item, 1466 tb->insert_size[0], 1467 body, zeros_num); 1468 1469 pasted = 1470 B_N_PITEM_HEAD(S_new[i], 1471 item_pos - n + 1472 snum[i]); 1473 if (is_direntry_le_ih(pasted)) { 1474 leaf_paste_entries(&bi, 1475 item_pos - 1476 n + snum[i], 1477 pos_in_item, 1478 1, 1479 (struct 1480 reiserfs_de_head 1481 *)body, 1482 body + 1483 DEH_SIZE, 1484 tb-> 1485 insert_size 1486 [0] 1487 ); 1488 } 1489 1490 /* if we paste to indirect item update ih_free_space */ 1491 if (is_indirect_le_ih(pasted)) 1492 set_ih_free_space(pasted, 0); 1493 zeros_num = tb->insert_size[0] = 0; 1494 } 1495 } 1496 1497 else { /* pasted item doesn't fall into S_new[i] */ 1498 1499 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, 1500 snum[i], sbytes[i], S_new[i]); 1501 } 1502 break; 1503 default: /* cases d and t */ 1504 reiserfs_panic(tb->tb_sb, "PAP-12245", 1505 "blknum > 2: unexpected mode: %s(%d)", 1506 (flag == 1507 M_DELETE) ? "DELETE" : ((flag == 1508 M_CUT) ? "CUT" 1509 : "UNKNOWN"), 1510 flag); 1511 } 1512 1513 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE); 1514 insert_ptr[i] = S_new[i]; 1515 1516 RFALSE(!buffer_journaled(S_new[i]) 1517 || buffer_journal_dirty(S_new[i]) 1518 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)", 1519 i, S_new[i]); 1520 } 1521 1522 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the 1523 affected item which remains in S */ 1524 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */ 1525 1526 switch (flag) { 1527 case M_INSERT: /* insert item into S[0] */ 1528 buffer_info_init_tbS0(tb, &bi); 1529 leaf_insert_into_buf(&bi, item_pos, ih, body, 1530 zeros_num); 1531 1532 /* If we insert the first key change the delimiting key */ 1533 if (item_pos == 0) { 1534 if (tb->CFL[0]) /* can be 0 in reiserfsck */ 1535 replace_key(tb, tb->CFL[0], tb->lkey[0], 1536 tbS0, 0); 1537 1538 } 1539 break; 1540 1541 case M_PASTE:{ /* append item in S[0] */ 1542 struct item_head *pasted; 1543 1544 pasted = B_N_PITEM_HEAD(tbS0, item_pos); 1545 /* when directory, may be new entry already pasted */ 1546 if (is_direntry_le_ih(pasted)) { 1547 if (pos_in_item >= 0 && 1548 pos_in_item <= 1549 ih_entry_count(pasted)) { 1550 1551 RFALSE(!tb->insert_size[0], 1552 "PAP-12260: insert_size is 0 already"); 1553 1554 /* prepare space */ 1555 buffer_info_init_tbS0(tb, &bi); 1556 leaf_paste_in_buffer(&bi, 1557 item_pos, 1558 pos_in_item, 1559 tb-> 1560 insert_size 1561 [0], body, 1562 zeros_num); 1563 1564 /* paste entry */ 1565 leaf_paste_entries(&bi, 1566 item_pos, 1567 pos_in_item, 1568 1, 1569 (struct 1570 reiserfs_de_head 1571 *)body, 1572 body + 1573 DEH_SIZE, 1574 tb-> 1575 insert_size 1576 [0] 1577 ); 1578 if (!item_pos && !pos_in_item) { 1579 RFALSE(!tb->CFL[0] 1580 || !tb->L[0], 1581 "PAP-12270: CFL[0]/L[0] must be specified"); 1582 if (tb->CFL[0]) { 1583 replace_key(tb, 1584 tb-> 1585 CFL 1586 [0], 1587 tb-> 1588 lkey 1589 [0], 1590 tbS0, 1591 0); 1592 1593 } 1594 } 1595 tb->insert_size[0] = 0; 1596 } 1597 } else { /* regular object */ 1598 if (pos_in_item == ih_item_len(pasted)) { 1599 1600 RFALSE(tb->insert_size[0] <= 0, 1601 "PAP-12275: insert size must not be %d", 1602 tb->insert_size[0]); 1603 buffer_info_init_tbS0(tb, &bi); 1604 leaf_paste_in_buffer(&bi, 1605 item_pos, 1606 pos_in_item, 1607 tb-> 1608 insert_size 1609 [0], body, 1610 zeros_num); 1611 1612 if (is_indirect_le_ih(pasted)) { 1613 #if 0 1614 RFALSE(tb-> 1615 insert_size[0] != 1616 UNFM_P_SIZE, 1617 "PAP-12280: insert_size for indirect item must be %d, not %d", 1618 UNFM_P_SIZE, 1619 tb-> 1620 insert_size[0]); 1621 #endif 1622 set_ih_free_space 1623 (pasted, 0); 1624 } 1625 tb->insert_size[0] = 0; 1626 } 1627 #ifdef CONFIG_REISERFS_CHECK 1628 else { 1629 if (tb->insert_size[0]) { 1630 print_cur_tb("12285"); 1631 reiserfs_panic(tb-> 1632 tb_sb, 1633 "PAP-12285", 1634 "insert_size " 1635 "must be 0 " 1636 "(%d)", 1637 tb->insert_size[0]); 1638 } 1639 } 1640 #endif /* CONFIG_REISERFS_CHECK */ 1641 1642 } 1643 } /* case M_PASTE: */ 1644 } 1645 } 1646 #ifdef CONFIG_REISERFS_CHECK 1647 if (flag == M_PASTE && tb->insert_size[0]) { 1648 print_cur_tb("12290"); 1649 reiserfs_panic(tb->tb_sb, 1650 "PAP-12290", "insert_size is still not 0 (%d)", 1651 tb->insert_size[0]); 1652 } 1653 #endif /* CONFIG_REISERFS_CHECK */ 1654 return 0; 1655 } /* Leaf level of the tree is balanced (end of balance_leaf) */ 1656 1657 /* Make empty node */ 1658 void make_empty_node(struct buffer_info *bi) 1659 { 1660 struct block_head *blkh; 1661 1662 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL"); 1663 1664 blkh = B_BLK_HEAD(bi->bi_bh); 1665 set_blkh_nr_item(blkh, 0); 1666 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh)); 1667 1668 if (bi->bi_parent) 1669 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */ 1670 } 1671 1672 /* Get first empty buffer */ 1673 struct buffer_head *get_FEB(struct tree_balance *tb) 1674 { 1675 int i; 1676 struct buffer_info bi; 1677 1678 for (i = 0; i < MAX_FEB_SIZE; i++) 1679 if (tb->FEB[i] != NULL) 1680 break; 1681 1682 if (i == MAX_FEB_SIZE) 1683 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty"); 1684 1685 buffer_info_init_bh(tb, &bi, tb->FEB[i]); 1686 make_empty_node(&bi); 1687 set_buffer_uptodate(tb->FEB[i]); 1688 tb->used[i] = tb->FEB[i]; 1689 tb->FEB[i] = NULL; 1690 1691 return tb->used[i]; 1692 } 1693 1694 /* This is now used because reiserfs_free_block has to be able to 1695 ** schedule. 1696 */ 1697 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh) 1698 { 1699 int i; 1700 1701 if (buffer_dirty(bh)) 1702 reiserfs_warning(tb->tb_sb, "reiserfs-12320", 1703 "called with dirty buffer"); 1704 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) 1705 if (!tb->thrown[i]) { 1706 tb->thrown[i] = bh; 1707 get_bh(bh); /* free_thrown puts this */ 1708 return; 1709 } 1710 reiserfs_warning(tb->tb_sb, "reiserfs-12321", 1711 "too many thrown buffers"); 1712 } 1713 1714 static void free_thrown(struct tree_balance *tb) 1715 { 1716 int i; 1717 b_blocknr_t blocknr; 1718 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) { 1719 if (tb->thrown[i]) { 1720 blocknr = tb->thrown[i]->b_blocknr; 1721 if (buffer_dirty(tb->thrown[i])) 1722 reiserfs_warning(tb->tb_sb, "reiserfs-12322", 1723 "called with dirty buffer %d", 1724 blocknr); 1725 brelse(tb->thrown[i]); /* incremented in store_thrown */ 1726 reiserfs_free_block(tb->transaction_handle, NULL, 1727 blocknr, 0); 1728 } 1729 } 1730 } 1731 1732 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh) 1733 { 1734 struct block_head *blkh; 1735 blkh = B_BLK_HEAD(bh); 1736 set_blkh_level(blkh, FREE_LEVEL); 1737 set_blkh_nr_item(blkh, 0); 1738 1739 clear_buffer_dirty(bh); 1740 store_thrown(tb, bh); 1741 } 1742 1743 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/ 1744 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest, 1745 struct buffer_head *src, int n_src) 1746 { 1747 1748 RFALSE(dest == NULL || src == NULL, 1749 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)", 1750 src, dest); 1751 RFALSE(!B_IS_KEYS_LEVEL(dest), 1752 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf", 1753 dest); 1754 RFALSE(n_dest < 0 || n_src < 0, 1755 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest); 1756 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src), 1757 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big", 1758 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest)); 1759 1760 if (B_IS_ITEMS_LEVEL(src)) 1761 /* source buffer contains leaf node */ 1762 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src), 1763 KEY_SIZE); 1764 else 1765 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src), 1766 KEY_SIZE); 1767 1768 do_balance_mark_internal_dirty(tb, dest, 0); 1769 } 1770 1771 int get_left_neighbor_position(struct tree_balance *tb, int h) 1772 { 1773 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1774 1775 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL, 1776 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 1777 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h)); 1778 1779 if (Sh_position == 0) 1780 return B_NR_ITEMS(tb->FL[h]); 1781 else 1782 return Sh_position - 1; 1783 } 1784 1785 int get_right_neighbor_position(struct tree_balance *tb, int h) 1786 { 1787 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1); 1788 1789 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL, 1790 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 1791 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]); 1792 1793 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h))) 1794 return 0; 1795 else 1796 return Sh_position + 1; 1797 } 1798 1799 #ifdef CONFIG_REISERFS_CHECK 1800 1801 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); 1802 static void check_internal_node(struct super_block *s, struct buffer_head *bh, 1803 char *mes) 1804 { 1805 struct disk_child *dc; 1806 int i; 1807 1808 RFALSE(!bh, "PAP-12336: bh == 0"); 1809 1810 if (!bh || !B_IS_IN_TREE(bh)) 1811 return; 1812 1813 RFALSE(!buffer_dirty(bh) && 1814 !(buffer_journaled(bh) || buffer_journal_dirty(bh)), 1815 "PAP-12337: buffer (%b) must be dirty", bh); 1816 dc = B_N_CHILD(bh, 0); 1817 1818 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) { 1819 if (!is_reusable(s, dc_block_number(dc), 1)) { 1820 print_cur_tb(mes); 1821 reiserfs_panic(s, "PAP-12338", 1822 "invalid child pointer %y in %b", 1823 dc, bh); 1824 } 1825 } 1826 } 1827 1828 static int locked_or_not_in_tree(struct tree_balance *tb, 1829 struct buffer_head *bh, char *which) 1830 { 1831 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) || 1832 !B_IS_IN_TREE(bh)) { 1833 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh); 1834 return 1; 1835 } 1836 return 0; 1837 } 1838 1839 static int check_before_balancing(struct tree_balance *tb) 1840 { 1841 int retval = 0; 1842 1843 if (cur_tb) { 1844 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule " 1845 "occurred based on cur_tb not being null at " 1846 "this point in code. do_balance cannot properly " 1847 "handle schedule occurring while it runs."); 1848 } 1849 1850 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have 1851 prepped all of these for us). */ 1852 if (tb->lnum[0]) { 1853 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]"); 1854 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]"); 1855 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]"); 1856 check_leaf(tb->L[0]); 1857 } 1858 if (tb->rnum[0]) { 1859 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]"); 1860 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]"); 1861 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]"); 1862 check_leaf(tb->R[0]); 1863 } 1864 retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path), 1865 "S[0]"); 1866 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1867 1868 return retval; 1869 } 1870 1871 static void check_after_balance_leaf(struct tree_balance *tb) 1872 { 1873 if (tb->lnum[0]) { 1874 if (B_FREE_SPACE(tb->L[0]) != 1875 MAX_CHILD_SIZE(tb->L[0]) - 1876 dc_size(B_N_CHILD 1877 (tb->FL[0], get_left_neighbor_position(tb, 0)))) { 1878 print_cur_tb("12221"); 1879 reiserfs_panic(tb->tb_sb, "PAP-12355", 1880 "shift to left was incorrect"); 1881 } 1882 } 1883 if (tb->rnum[0]) { 1884 if (B_FREE_SPACE(tb->R[0]) != 1885 MAX_CHILD_SIZE(tb->R[0]) - 1886 dc_size(B_N_CHILD 1887 (tb->FR[0], get_right_neighbor_position(tb, 0)))) { 1888 print_cur_tb("12222"); 1889 reiserfs_panic(tb->tb_sb, "PAP-12360", 1890 "shift to right was incorrect"); 1891 } 1892 } 1893 if (PATH_H_PBUFFER(tb->tb_path, 1) && 1894 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) != 1895 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1896 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1897 PATH_H_POSITION(tb->tb_path, 1)))))) { 1898 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)); 1899 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) - 1900 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1), 1901 PATH_H_POSITION(tb->tb_path, 1902 1)))); 1903 print_cur_tb("12223"); 1904 reiserfs_warning(tb->tb_sb, "reiserfs-12363", 1905 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; " 1906 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d", 1907 left, 1908 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)), 1909 PATH_H_PBUFFER(tb->tb_path, 1), 1910 PATH_H_POSITION(tb->tb_path, 1), 1911 dc_size(B_N_CHILD 1912 (PATH_H_PBUFFER(tb->tb_path, 1), 1913 PATH_H_POSITION(tb->tb_path, 1))), 1914 right); 1915 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect"); 1916 } 1917 } 1918 1919 static void check_leaf_level(struct tree_balance *tb) 1920 { 1921 check_leaf(tb->L[0]); 1922 check_leaf(tb->R[0]); 1923 check_leaf(PATH_PLAST_BUFFER(tb->tb_path)); 1924 } 1925 1926 static void check_internal_levels(struct tree_balance *tb) 1927 { 1928 int h; 1929 1930 /* check all internal nodes */ 1931 for (h = 1; tb->insert_size[h]; h++) { 1932 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h), 1933 "BAD BUFFER ON PATH"); 1934 if (tb->lnum[h]) 1935 check_internal_node(tb->tb_sb, tb->L[h], "BAD L"); 1936 if (tb->rnum[h]) 1937 check_internal_node(tb->tb_sb, tb->R[h], "BAD R"); 1938 } 1939 1940 } 1941 1942 #endif 1943 1944 /* Now we have all of the buffers that must be used in balancing of 1945 the tree. We rely on the assumption that schedule() will not occur 1946 while do_balance works. ( Only interrupt handlers are acceptable.) 1947 We balance the tree according to the analysis made before this, 1948 using buffers already obtained. For SMP support it will someday be 1949 necessary to add ordered locking of tb. */ 1950 1951 /* Some interesting rules of balancing: 1952 1953 we delete a maximum of two nodes per level per balancing: we never 1954 delete R, when we delete two of three nodes L, S, R then we move 1955 them into R. 1956 1957 we only delete L if we are deleting two nodes, if we delete only 1958 one node we delete S 1959 1960 if we shift leaves then we shift as much as we can: this is a 1961 deliberate policy of extremism in node packing which results in 1962 higher average utilization after repeated random balance operations 1963 at the cost of more memory copies and more balancing as a result of 1964 small insertions to full nodes. 1965 1966 if we shift internal nodes we try to evenly balance the node 1967 utilization, with consequent less balancing at the cost of lower 1968 utilization. 1969 1970 one could argue that the policy for directories in leaves should be 1971 that of internal nodes, but we will wait until another day to 1972 evaluate this.... It would be nice to someday measure and prove 1973 these assumptions as to what is optimal.... 1974 1975 */ 1976 1977 static inline void do_balance_starts(struct tree_balance *tb) 1978 { 1979 /* use print_cur_tb() to see initial state of struct 1980 tree_balance */ 1981 1982 /* store_print_tb (tb); */ 1983 1984 /* do not delete, just comment it out */ 1985 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 1986 "check");*/ 1987 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB"); 1988 #ifdef CONFIG_REISERFS_CHECK 1989 cur_tb = tb; 1990 #endif 1991 } 1992 1993 static inline void do_balance_completed(struct tree_balance *tb) 1994 { 1995 1996 #ifdef CONFIG_REISERFS_CHECK 1997 check_leaf_level(tb); 1998 check_internal_levels(tb); 1999 cur_tb = NULL; 2000 #endif 2001 2002 /* reiserfs_free_block is no longer schedule safe. So, we need to 2003 ** put the buffers we want freed on the thrown list during do_balance, 2004 ** and then free them now 2005 */ 2006 2007 REISERFS_SB(tb->tb_sb)->s_do_balance++; 2008 2009 /* release all nodes hold to perform the balancing */ 2010 unfix_nodes(tb); 2011 2012 free_thrown(tb); 2013 } 2014 2015 void do_balance(struct tree_balance *tb, /* tree_balance structure */ 2016 struct item_head *ih, /* item header of inserted item */ 2017 const char *body, /* body of inserted item or bytes to paste */ 2018 int flag) 2019 { /* i - insert, d - delete 2020 c - cut, p - paste 2021 2022 Cut means delete part of an item 2023 (includes removing an entry from a 2024 directory). 2025 2026 Delete means delete whole item. 2027 2028 Insert means add a new item into the 2029 tree. 2030 2031 Paste means to append to the end of an 2032 existing file or to insert a directory 2033 entry. */ 2034 int child_pos, /* position of a child node in its parent */ 2035 h; /* level of the tree being processed */ 2036 struct item_head insert_key[2]; /* in our processing of one level 2037 we sometimes determine what 2038 must be inserted into the next 2039 higher level. This insertion 2040 consists of a key or two keys 2041 and their corresponding 2042 pointers */ 2043 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next 2044 level */ 2045 2046 tb->tb_mode = flag; 2047 tb->need_balance_dirty = 0; 2048 2049 if (FILESYSTEM_CHANGED_TB(tb)) { 2050 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has " 2051 "changed"); 2052 } 2053 /* if we have no real work to do */ 2054 if (!tb->insert_size[0]) { 2055 reiserfs_warning(tb->tb_sb, "PAP-12350", 2056 "insert_size == 0, mode == %c", flag); 2057 unfix_nodes(tb); 2058 return; 2059 } 2060 2061 atomic_inc(&(fs_generation(tb->tb_sb))); 2062 do_balance_starts(tb); 2063 2064 /* balance leaf returns 0 except if combining L R and S into 2065 one node. see balance_internal() for explanation of this 2066 line of code. */ 2067 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) + 2068 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr); 2069 2070 #ifdef CONFIG_REISERFS_CHECK 2071 check_after_balance_leaf(tb); 2072 #endif 2073 2074 /* Balance internal level of the tree. */ 2075 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++) 2076 child_pos = 2077 balance_internal(tb, h, child_pos, insert_key, insert_ptr); 2078 2079 do_balance_completed(tb); 2080 2081 } 2082