1 /* 2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README 3 */ 4 5 #include <linux/time.h> 6 #include <linux/slab.h> 7 #include <linux/string.h> 8 #include "reiserfs.h" 9 #include <linux/buffer_head.h> 10 11 /* 12 * To make any changes in the tree we find a node that contains item 13 * to be changed/deleted or position in the node we insert a new item 14 * to. We call this node S. To do balancing we need to decide what we 15 * will shift to left/right neighbor, or to a new node, where new item 16 * will be etc. To make this analysis simpler we build virtual 17 * node. Virtual node is an array of items, that will replace items of 18 * node S. (For instance if we are going to delete an item, virtual 19 * node does not contain it). Virtual node keeps information about 20 * item sizes and types, mergeability of first and last items, sizes 21 * of all entries in directory item. We use this array of items when 22 * calculating what we can shift to neighbors and how many nodes we 23 * have to have if we do not any shiftings, if we shift to left/right 24 * neighbor or to both. 25 */ 26 27 /* 28 * Takes item number in virtual node, returns number of item 29 * that it has in source buffer 30 */ 31 static inline int old_item_num(int new_num, int affected_item_num, int mode) 32 { 33 if (mode == M_PASTE || mode == M_CUT || new_num < affected_item_num) 34 return new_num; 35 36 if (mode == M_INSERT) { 37 38 RFALSE(new_num == 0, 39 "vs-8005: for INSERT mode and item number of inserted item"); 40 41 return new_num - 1; 42 } 43 44 RFALSE(mode != M_DELETE, 45 "vs-8010: old_item_num: mode must be M_DELETE (mode = \'%c\'", 46 mode); 47 /* delete mode */ 48 return new_num + 1; 49 } 50 51 static void create_virtual_node(struct tree_balance *tb, int h) 52 { 53 struct item_head *ih; 54 struct virtual_node *vn = tb->tb_vn; 55 int new_num; 56 struct buffer_head *Sh; /* this comes from tb->S[h] */ 57 58 Sh = PATH_H_PBUFFER(tb->tb_path, h); 59 60 /* size of changed node */ 61 vn->vn_size = 62 MAX_CHILD_SIZE(Sh) - B_FREE_SPACE(Sh) + tb->insert_size[h]; 63 64 /* for internal nodes array if virtual items is not created */ 65 if (h) { 66 vn->vn_nr_item = (vn->vn_size - DC_SIZE) / (DC_SIZE + KEY_SIZE); 67 return; 68 } 69 70 /* number of items in virtual node */ 71 vn->vn_nr_item = 72 B_NR_ITEMS(Sh) + ((vn->vn_mode == M_INSERT) ? 1 : 0) - 73 ((vn->vn_mode == M_DELETE) ? 1 : 0); 74 75 /* first virtual item */ 76 vn->vn_vi = (struct virtual_item *)(tb->tb_vn + 1); 77 memset(vn->vn_vi, 0, vn->vn_nr_item * sizeof(struct virtual_item)); 78 vn->vn_free_ptr += vn->vn_nr_item * sizeof(struct virtual_item); 79 80 /* first item in the node */ 81 ih = item_head(Sh, 0); 82 83 /* define the mergeability for 0-th item (if it is not being deleted) */ 84 if (op_is_left_mergeable(&ih->ih_key, Sh->b_size) 85 && (vn->vn_mode != M_DELETE || vn->vn_affected_item_num)) 86 vn->vn_vi[0].vi_type |= VI_TYPE_LEFT_MERGEABLE; 87 88 /* 89 * go through all items that remain in the virtual 90 * node (except for the new (inserted) one) 91 */ 92 for (new_num = 0; new_num < vn->vn_nr_item; new_num++) { 93 int j; 94 struct virtual_item *vi = vn->vn_vi + new_num; 95 int is_affected = 96 ((new_num != vn->vn_affected_item_num) ? 0 : 1); 97 98 if (is_affected && vn->vn_mode == M_INSERT) 99 continue; 100 101 /* get item number in source node */ 102 j = old_item_num(new_num, vn->vn_affected_item_num, 103 vn->vn_mode); 104 105 vi->vi_item_len += ih_item_len(ih + j) + IH_SIZE; 106 vi->vi_ih = ih + j; 107 vi->vi_item = ih_item_body(Sh, ih + j); 108 vi->vi_uarea = vn->vn_free_ptr; 109 110 /* 111 * FIXME: there is no check that item operation did not 112 * consume too much memory 113 */ 114 vn->vn_free_ptr += 115 op_create_vi(vn, vi, is_affected, tb->insert_size[0]); 116 if (tb->vn_buf + tb->vn_buf_size < vn->vn_free_ptr) 117 reiserfs_panic(tb->tb_sb, "vs-8030", 118 "virtual node space consumed"); 119 120 if (!is_affected) 121 /* this is not being changed */ 122 continue; 123 124 if (vn->vn_mode == M_PASTE || vn->vn_mode == M_CUT) { 125 vn->vn_vi[new_num].vi_item_len += tb->insert_size[0]; 126 /* pointer to data which is going to be pasted */ 127 vi->vi_new_data = vn->vn_data; 128 } 129 } 130 131 /* virtual inserted item is not defined yet */ 132 if (vn->vn_mode == M_INSERT) { 133 struct virtual_item *vi = vn->vn_vi + vn->vn_affected_item_num; 134 135 RFALSE(vn->vn_ins_ih == NULL, 136 "vs-8040: item header of inserted item is not specified"); 137 vi->vi_item_len = tb->insert_size[0]; 138 vi->vi_ih = vn->vn_ins_ih; 139 vi->vi_item = vn->vn_data; 140 vi->vi_uarea = vn->vn_free_ptr; 141 142 op_create_vi(vn, vi, 0 /*not pasted or cut */ , 143 tb->insert_size[0]); 144 } 145 146 /* 147 * set right merge flag we take right delimiting key and 148 * check whether it is a mergeable item 149 */ 150 if (tb->CFR[0]) { 151 struct reiserfs_key *key; 152 153 key = internal_key(tb->CFR[0], tb->rkey[0]); 154 if (op_is_left_mergeable(key, Sh->b_size) 155 && (vn->vn_mode != M_DELETE 156 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) 157 vn->vn_vi[vn->vn_nr_item - 1].vi_type |= 158 VI_TYPE_RIGHT_MERGEABLE; 159 160 #ifdef CONFIG_REISERFS_CHECK 161 if (op_is_left_mergeable(key, Sh->b_size) && 162 !(vn->vn_mode != M_DELETE 163 || vn->vn_affected_item_num != B_NR_ITEMS(Sh) - 1)) { 164 /* 165 * we delete last item and it could be merged 166 * with right neighbor's first item 167 */ 168 if (! 169 (B_NR_ITEMS(Sh) == 1 170 && is_direntry_le_ih(item_head(Sh, 0)) 171 && ih_entry_count(item_head(Sh, 0)) == 1)) { 172 /* 173 * node contains more than 1 item, or item 174 * is not directory item, or this item 175 * contains more than 1 entry 176 */ 177 print_block(Sh, 0, -1, -1); 178 reiserfs_panic(tb->tb_sb, "vs-8045", 179 "rdkey %k, affected item==%d " 180 "(mode==%c) Must be %c", 181 key, vn->vn_affected_item_num, 182 vn->vn_mode, M_DELETE); 183 } 184 } 185 #endif 186 187 } 188 } 189 190 /* 191 * Using virtual node check, how many items can be 192 * shifted to left neighbor 193 */ 194 static void check_left(struct tree_balance *tb, int h, int cur_free) 195 { 196 int i; 197 struct virtual_node *vn = tb->tb_vn; 198 struct virtual_item *vi; 199 int d_size, ih_size; 200 201 RFALSE(cur_free < 0, "vs-8050: cur_free (%d) < 0", cur_free); 202 203 /* internal level */ 204 if (h > 0) { 205 tb->lnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 206 return; 207 } 208 209 /* leaf level */ 210 211 if (!cur_free || !vn->vn_nr_item) { 212 /* no free space or nothing to move */ 213 tb->lnum[h] = 0; 214 tb->lbytes = -1; 215 return; 216 } 217 218 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 219 "vs-8055: parent does not exist or invalid"); 220 221 vi = vn->vn_vi; 222 if ((unsigned int)cur_free >= 223 (vn->vn_size - 224 ((vi->vi_type & VI_TYPE_LEFT_MERGEABLE) ? IH_SIZE : 0))) { 225 /* all contents of S[0] fits into L[0] */ 226 227 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 228 "vs-8055: invalid mode or balance condition failed"); 229 230 tb->lnum[0] = vn->vn_nr_item; 231 tb->lbytes = -1; 232 return; 233 } 234 235 d_size = 0, ih_size = IH_SIZE; 236 237 /* first item may be merge with last item in left neighbor */ 238 if (vi->vi_type & VI_TYPE_LEFT_MERGEABLE) 239 d_size = -((int)IH_SIZE), ih_size = 0; 240 241 tb->lnum[0] = 0; 242 for (i = 0; i < vn->vn_nr_item; 243 i++, ih_size = IH_SIZE, d_size = 0, vi++) { 244 d_size += vi->vi_item_len; 245 if (cur_free >= d_size) { 246 /* the item can be shifted entirely */ 247 cur_free -= d_size; 248 tb->lnum[0]++; 249 continue; 250 } 251 252 /* the item cannot be shifted entirely, try to split it */ 253 /* 254 * check whether L[0] can hold ih and at least one byte 255 * of the item body 256 */ 257 258 /* cannot shift even a part of the current item */ 259 if (cur_free <= ih_size) { 260 tb->lbytes = -1; 261 return; 262 } 263 cur_free -= ih_size; 264 265 tb->lbytes = op_check_left(vi, cur_free, 0, 0); 266 if (tb->lbytes != -1) 267 /* count partially shifted item */ 268 tb->lnum[0]++; 269 270 break; 271 } 272 273 return; 274 } 275 276 /* 277 * Using virtual node check, how many items can be 278 * shifted to right neighbor 279 */ 280 static void check_right(struct tree_balance *tb, int h, int cur_free) 281 { 282 int i; 283 struct virtual_node *vn = tb->tb_vn; 284 struct virtual_item *vi; 285 int d_size, ih_size; 286 287 RFALSE(cur_free < 0, "vs-8070: cur_free < 0"); 288 289 /* internal level */ 290 if (h > 0) { 291 tb->rnum[h] = cur_free / (DC_SIZE + KEY_SIZE); 292 return; 293 } 294 295 /* leaf level */ 296 297 if (!cur_free || !vn->vn_nr_item) { 298 /* no free space */ 299 tb->rnum[h] = 0; 300 tb->rbytes = -1; 301 return; 302 } 303 304 RFALSE(!PATH_H_PPARENT(tb->tb_path, 0), 305 "vs-8075: parent does not exist or invalid"); 306 307 vi = vn->vn_vi + vn->vn_nr_item - 1; 308 if ((unsigned int)cur_free >= 309 (vn->vn_size - 310 ((vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) ? IH_SIZE : 0))) { 311 /* all contents of S[0] fits into R[0] */ 312 313 RFALSE(vn->vn_mode == M_INSERT || vn->vn_mode == M_PASTE, 314 "vs-8080: invalid mode or balance condition failed"); 315 316 tb->rnum[h] = vn->vn_nr_item; 317 tb->rbytes = -1; 318 return; 319 } 320 321 d_size = 0, ih_size = IH_SIZE; 322 323 /* last item may be merge with first item in right neighbor */ 324 if (vi->vi_type & VI_TYPE_RIGHT_MERGEABLE) 325 d_size = -(int)IH_SIZE, ih_size = 0; 326 327 tb->rnum[0] = 0; 328 for (i = vn->vn_nr_item - 1; i >= 0; 329 i--, d_size = 0, ih_size = IH_SIZE, vi--) { 330 d_size += vi->vi_item_len; 331 if (cur_free >= d_size) { 332 /* the item can be shifted entirely */ 333 cur_free -= d_size; 334 tb->rnum[0]++; 335 continue; 336 } 337 338 /* 339 * check whether R[0] can hold ih and at least one 340 * byte of the item body 341 */ 342 343 /* cannot shift even a part of the current item */ 344 if (cur_free <= ih_size) { 345 tb->rbytes = -1; 346 return; 347 } 348 349 /* 350 * R[0] can hold the header of the item and at least 351 * one byte of its body 352 */ 353 cur_free -= ih_size; /* cur_free is still > 0 */ 354 355 tb->rbytes = op_check_right(vi, cur_free); 356 if (tb->rbytes != -1) 357 /* count partially shifted item */ 358 tb->rnum[0]++; 359 360 break; 361 } 362 363 return; 364 } 365 366 /* 367 * from - number of items, which are shifted to left neighbor entirely 368 * to - number of item, which are shifted to right neighbor entirely 369 * from_bytes - number of bytes of boundary item (or directory entries) 370 * which are shifted to left neighbor 371 * to_bytes - number of bytes of boundary item (or directory entries) 372 * which are shifted to right neighbor 373 */ 374 static int get_num_ver(int mode, struct tree_balance *tb, int h, 375 int from, int from_bytes, 376 int to, int to_bytes, short *snum012, int flow) 377 { 378 int i; 379 int units; 380 struct virtual_node *vn = tb->tb_vn; 381 int total_node_size, max_node_size, current_item_size; 382 int needed_nodes; 383 384 /* position of item we start filling node from */ 385 int start_item; 386 387 /* position of item we finish filling node by */ 388 int end_item; 389 390 /* 391 * number of first bytes (entries for directory) of start_item-th item 392 * we do not include into node that is being filled 393 */ 394 int start_bytes; 395 396 /* 397 * number of last bytes (entries for directory) of end_item-th item 398 * we do node include into node that is being filled 399 */ 400 int end_bytes; 401 402 /* 403 * these are positions in virtual item of items, that are split 404 * between S[0] and S1new and S1new and S2new 405 */ 406 int split_item_positions[2]; 407 408 split_item_positions[0] = -1; 409 split_item_positions[1] = -1; 410 411 /* 412 * We only create additional nodes if we are in insert or paste mode 413 * or we are in replace mode at the internal level. If h is 0 and 414 * the mode is M_REPLACE then in fix_nodes we change the mode to 415 * paste or insert before we get here in the code. 416 */ 417 RFALSE(tb->insert_size[h] < 0 || (mode != M_INSERT && mode != M_PASTE), 418 "vs-8100: insert_size < 0 in overflow"); 419 420 max_node_size = MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, h)); 421 422 /* 423 * snum012 [0-2] - number of items, that lay 424 * to S[0], first new node and second new node 425 */ 426 snum012[3] = -1; /* s1bytes */ 427 snum012[4] = -1; /* s2bytes */ 428 429 /* internal level */ 430 if (h > 0) { 431 i = ((to - from) * (KEY_SIZE + DC_SIZE) + DC_SIZE); 432 if (i == max_node_size) 433 return 1; 434 return (i / max_node_size + 1); 435 } 436 437 /* leaf level */ 438 needed_nodes = 1; 439 total_node_size = 0; 440 441 /* start from 'from'-th item */ 442 start_item = from; 443 /* skip its first 'start_bytes' units */ 444 start_bytes = ((from_bytes != -1) ? from_bytes : 0); 445 446 /* last included item is the 'end_item'-th one */ 447 end_item = vn->vn_nr_item - to - 1; 448 /* do not count last 'end_bytes' units of 'end_item'-th item */ 449 end_bytes = (to_bytes != -1) ? to_bytes : 0; 450 451 /* 452 * go through all item beginning from the start_item-th item 453 * and ending by the end_item-th item. Do not count first 454 * 'start_bytes' units of 'start_item'-th item and last 455 * 'end_bytes' of 'end_item'-th item 456 */ 457 for (i = start_item; i <= end_item; i++) { 458 struct virtual_item *vi = vn->vn_vi + i; 459 int skip_from_end = ((i == end_item) ? end_bytes : 0); 460 461 RFALSE(needed_nodes > 3, "vs-8105: too many nodes are needed"); 462 463 /* get size of current item */ 464 current_item_size = vi->vi_item_len; 465 466 /* 467 * do not take in calculation head part (from_bytes) 468 * of from-th item 469 */ 470 current_item_size -= 471 op_part_size(vi, 0 /*from start */ , start_bytes); 472 473 /* do not take in calculation tail part of last item */ 474 current_item_size -= 475 op_part_size(vi, 1 /*from end */ , skip_from_end); 476 477 /* if item fits into current node entierly */ 478 if (total_node_size + current_item_size <= max_node_size) { 479 snum012[needed_nodes - 1]++; 480 total_node_size += current_item_size; 481 start_bytes = 0; 482 continue; 483 } 484 485 /* 486 * virtual item length is longer, than max size of item in 487 * a node. It is impossible for direct item 488 */ 489 if (current_item_size > max_node_size) { 490 RFALSE(is_direct_le_ih(vi->vi_ih), 491 "vs-8110: " 492 "direct item length is %d. It can not be longer than %d", 493 current_item_size, max_node_size); 494 /* we will try to split it */ 495 flow = 1; 496 } 497 498 /* as we do not split items, take new node and continue */ 499 if (!flow) { 500 needed_nodes++; 501 i--; 502 total_node_size = 0; 503 continue; 504 } 505 506 /* 507 * calculate number of item units which fit into node being 508 * filled 509 */ 510 { 511 int free_space; 512 513 free_space = max_node_size - total_node_size - IH_SIZE; 514 units = 515 op_check_left(vi, free_space, start_bytes, 516 skip_from_end); 517 /* 518 * nothing fits into current node, take new 519 * node and continue 520 */ 521 if (units == -1) { 522 needed_nodes++, i--, total_node_size = 0; 523 continue; 524 } 525 } 526 527 /* something fits into the current node */ 528 start_bytes += units; 529 snum012[needed_nodes - 1 + 3] = units; 530 531 if (needed_nodes > 2) 532 reiserfs_warning(tb->tb_sb, "vs-8111", 533 "split_item_position is out of range"); 534 snum012[needed_nodes - 1]++; 535 split_item_positions[needed_nodes - 1] = i; 536 needed_nodes++; 537 /* continue from the same item with start_bytes != -1 */ 538 start_item = i; 539 i--; 540 total_node_size = 0; 541 } 542 543 /* 544 * sum012[4] (if it is not -1) contains number of units of which 545 * are to be in S1new, snum012[3] - to be in S0. They are supposed 546 * to be S1bytes and S2bytes correspondingly, so recalculate 547 */ 548 if (snum012[4] > 0) { 549 int split_item_num; 550 int bytes_to_r, bytes_to_l; 551 int bytes_to_S1new; 552 553 split_item_num = split_item_positions[1]; 554 bytes_to_l = 555 ((from == split_item_num 556 && from_bytes != -1) ? from_bytes : 0); 557 bytes_to_r = 558 ((end_item == split_item_num 559 && end_bytes != -1) ? end_bytes : 0); 560 bytes_to_S1new = 561 ((split_item_positions[0] == 562 split_item_positions[1]) ? snum012[3] : 0); 563 564 /* s2bytes */ 565 snum012[4] = 566 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[4] - 567 bytes_to_r - bytes_to_l - bytes_to_S1new; 568 569 if (vn->vn_vi[split_item_num].vi_index != TYPE_DIRENTRY && 570 vn->vn_vi[split_item_num].vi_index != TYPE_INDIRECT) 571 reiserfs_warning(tb->tb_sb, "vs-8115", 572 "not directory or indirect item"); 573 } 574 575 /* now we know S2bytes, calculate S1bytes */ 576 if (snum012[3] > 0) { 577 int split_item_num; 578 int bytes_to_r, bytes_to_l; 579 int bytes_to_S2new; 580 581 split_item_num = split_item_positions[0]; 582 bytes_to_l = 583 ((from == split_item_num 584 && from_bytes != -1) ? from_bytes : 0); 585 bytes_to_r = 586 ((end_item == split_item_num 587 && end_bytes != -1) ? end_bytes : 0); 588 bytes_to_S2new = 589 ((split_item_positions[0] == split_item_positions[1] 590 && snum012[4] != -1) ? snum012[4] : 0); 591 592 /* s1bytes */ 593 snum012[3] = 594 op_unit_num(&vn->vn_vi[split_item_num]) - snum012[3] - 595 bytes_to_r - bytes_to_l - bytes_to_S2new; 596 } 597 598 return needed_nodes; 599 } 600 601 602 /* 603 * Set parameters for balancing. 604 * Performs write of results of analysis of balancing into structure tb, 605 * where it will later be used by the functions that actually do the balancing. 606 * Parameters: 607 * tb tree_balance structure; 608 * h current level of the node; 609 * lnum number of items from S[h] that must be shifted to L[h]; 610 * rnum number of items from S[h] that must be shifted to R[h]; 611 * blk_num number of blocks that S[h] will be splitted into; 612 * s012 number of items that fall into splitted nodes. 613 * lbytes number of bytes which flow to the left neighbor from the 614 * item that is not shifted entirely 615 * rbytes number of bytes which flow to the right neighbor from the 616 * item that is not shifted entirely 617 * s1bytes number of bytes which flow to the first new node when 618 * S[0] splits (this number is contained in s012 array) 619 */ 620 621 static void set_parameters(struct tree_balance *tb, int h, int lnum, 622 int rnum, int blk_num, short *s012, int lb, int rb) 623 { 624 625 tb->lnum[h] = lnum; 626 tb->rnum[h] = rnum; 627 tb->blknum[h] = blk_num; 628 629 /* only for leaf level */ 630 if (h == 0) { 631 if (s012 != NULL) { 632 tb->s0num = *s012++; 633 tb->snum[0] = *s012++; 634 tb->snum[1] = *s012++; 635 tb->sbytes[0] = *s012++; 636 tb->sbytes[1] = *s012; 637 } 638 tb->lbytes = lb; 639 tb->rbytes = rb; 640 } 641 PROC_INFO_ADD(tb->tb_sb, lnum[h], lnum); 642 PROC_INFO_ADD(tb->tb_sb, rnum[h], rnum); 643 644 PROC_INFO_ADD(tb->tb_sb, lbytes[h], lb); 645 PROC_INFO_ADD(tb->tb_sb, rbytes[h], rb); 646 } 647 648 /* 649 * check if node disappears if we shift tb->lnum[0] items to left 650 * neighbor and tb->rnum[0] to the right one. 651 */ 652 static int is_leaf_removable(struct tree_balance *tb) 653 { 654 struct virtual_node *vn = tb->tb_vn; 655 int to_left, to_right; 656 int size; 657 int remain_items; 658 659 /* 660 * number of items that will be shifted to left (right) neighbor 661 * entirely 662 */ 663 to_left = tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0); 664 to_right = tb->rnum[0] - ((tb->rbytes != -1) ? 1 : 0); 665 remain_items = vn->vn_nr_item; 666 667 /* how many items remain in S[0] after shiftings to neighbors */ 668 remain_items -= (to_left + to_right); 669 670 /* all content of node can be shifted to neighbors */ 671 if (remain_items < 1) { 672 set_parameters(tb, 0, to_left, vn->vn_nr_item - to_left, 0, 673 NULL, -1, -1); 674 return 1; 675 } 676 677 /* S[0] is not removable */ 678 if (remain_items > 1 || tb->lbytes == -1 || tb->rbytes == -1) 679 return 0; 680 681 /* check whether we can divide 1 remaining item between neighbors */ 682 683 /* get size of remaining item (in item units) */ 684 size = op_unit_num(&vn->vn_vi[to_left]); 685 686 if (tb->lbytes + tb->rbytes >= size) { 687 set_parameters(tb, 0, to_left + 1, to_right + 1, 0, NULL, 688 tb->lbytes, -1); 689 return 1; 690 } 691 692 return 0; 693 } 694 695 /* check whether L, S, R can be joined in one node */ 696 static int are_leaves_removable(struct tree_balance *tb, int lfree, int rfree) 697 { 698 struct virtual_node *vn = tb->tb_vn; 699 int ih_size; 700 struct buffer_head *S0; 701 702 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 703 704 ih_size = 0; 705 if (vn->vn_nr_item) { 706 if (vn->vn_vi[0].vi_type & VI_TYPE_LEFT_MERGEABLE) 707 ih_size += IH_SIZE; 708 709 if (vn->vn_vi[vn->vn_nr_item - 1]. 710 vi_type & VI_TYPE_RIGHT_MERGEABLE) 711 ih_size += IH_SIZE; 712 } else { 713 /* there was only one item and it will be deleted */ 714 struct item_head *ih; 715 716 RFALSE(B_NR_ITEMS(S0) != 1, 717 "vs-8125: item number must be 1: it is %d", 718 B_NR_ITEMS(S0)); 719 720 ih = item_head(S0, 0); 721 if (tb->CFR[0] 722 && !comp_short_le_keys(&ih->ih_key, 723 internal_key(tb->CFR[0], 724 tb->rkey[0]))) 725 /* 726 * Directory must be in correct state here: that is 727 * somewhere at the left side should exist first 728 * directory item. But the item being deleted can 729 * not be that first one because its right neighbor 730 * is item of the same directory. (But first item 731 * always gets deleted in last turn). So, neighbors 732 * of deleted item can be merged, so we can save 733 * ih_size 734 */ 735 if (is_direntry_le_ih(ih)) { 736 ih_size = IH_SIZE; 737 738 /* 739 * we might check that left neighbor exists 740 * and is of the same directory 741 */ 742 RFALSE(le_ih_k_offset(ih) == DOT_OFFSET, 743 "vs-8130: first directory item can not be removed until directory is not empty"); 744 } 745 746 } 747 748 if (MAX_CHILD_SIZE(S0) + vn->vn_size <= rfree + lfree + ih_size) { 749 set_parameters(tb, 0, -1, -1, -1, NULL, -1, -1); 750 PROC_INFO_INC(tb->tb_sb, leaves_removable); 751 return 1; 752 } 753 return 0; 754 755 } 756 757 /* when we do not split item, lnum and rnum are numbers of entire items */ 758 #define SET_PAR_SHIFT_LEFT \ 759 if (h)\ 760 {\ 761 int to_l;\ 762 \ 763 to_l = (MAX_NR_KEY(Sh)+1 - lpar + vn->vn_nr_item + 1) / 2 -\ 764 (MAX_NR_KEY(Sh) + 1 - lpar);\ 765 \ 766 set_parameters (tb, h, to_l, 0, lnver, NULL, -1, -1);\ 767 }\ 768 else \ 769 {\ 770 if (lset==LEFT_SHIFT_FLOW)\ 771 set_parameters (tb, h, lpar, 0, lnver, snum012+lset,\ 772 tb->lbytes, -1);\ 773 else\ 774 set_parameters (tb, h, lpar - (tb->lbytes!=-1), 0, lnver, snum012+lset,\ 775 -1, -1);\ 776 } 777 778 #define SET_PAR_SHIFT_RIGHT \ 779 if (h)\ 780 {\ 781 int to_r;\ 782 \ 783 to_r = (MAX_NR_KEY(Sh)+1 - rpar + vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - rpar);\ 784 \ 785 set_parameters (tb, h, 0, to_r, rnver, NULL, -1, -1);\ 786 }\ 787 else \ 788 {\ 789 if (rset==RIGHT_SHIFT_FLOW)\ 790 set_parameters (tb, h, 0, rpar, rnver, snum012+rset,\ 791 -1, tb->rbytes);\ 792 else\ 793 set_parameters (tb, h, 0, rpar - (tb->rbytes!=-1), rnver, snum012+rset,\ 794 -1, -1);\ 795 } 796 797 static void free_buffers_in_tb(struct tree_balance *tb) 798 { 799 int i; 800 801 pathrelse(tb->tb_path); 802 803 for (i = 0; i < MAX_HEIGHT; i++) { 804 brelse(tb->L[i]); 805 brelse(tb->R[i]); 806 brelse(tb->FL[i]); 807 brelse(tb->FR[i]); 808 brelse(tb->CFL[i]); 809 brelse(tb->CFR[i]); 810 811 tb->L[i] = NULL; 812 tb->R[i] = NULL; 813 tb->FL[i] = NULL; 814 tb->FR[i] = NULL; 815 tb->CFL[i] = NULL; 816 tb->CFR[i] = NULL; 817 } 818 } 819 820 /* 821 * Get new buffers for storing new nodes that are created while balancing. 822 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 823 * CARRY_ON - schedule didn't occur while the function worked; 824 * NO_DISK_SPACE - no disk space. 825 */ 826 /* The function is NOT SCHEDULE-SAFE! */ 827 static int get_empty_nodes(struct tree_balance *tb, int h) 828 { 829 struct buffer_head *new_bh, *Sh = PATH_H_PBUFFER(tb->tb_path, h); 830 b_blocknr_t *blocknr, blocknrs[MAX_AMOUNT_NEEDED] = { 0, }; 831 int counter, number_of_freeblk; 832 int amount_needed; /* number of needed empty blocks */ 833 int retval = CARRY_ON; 834 struct super_block *sb = tb->tb_sb; 835 836 /* 837 * number_of_freeblk is the number of empty blocks which have been 838 * acquired for use by the balancing algorithm minus the number of 839 * empty blocks used in the previous levels of the analysis, 840 * number_of_freeblk = tb->cur_blknum can be non-zero if a schedule 841 * occurs after empty blocks are acquired, and the balancing analysis 842 * is then restarted, amount_needed is the number needed by this 843 * level (h) of the balancing analysis. 844 * 845 * Note that for systems with many processes writing, it would be 846 * more layout optimal to calculate the total number needed by all 847 * levels and then to run reiserfs_new_blocks to get all of them at 848 * once. 849 */ 850 851 /* 852 * Initiate number_of_freeblk to the amount acquired prior to the 853 * restart of the analysis or 0 if not restarted, then subtract the 854 * amount needed by all of the levels of the tree below h. 855 */ 856 /* blknum includes S[h], so we subtract 1 in this calculation */ 857 for (counter = 0, number_of_freeblk = tb->cur_blknum; 858 counter < h; counter++) 859 number_of_freeblk -= 860 (tb->blknum[counter]) ? (tb->blknum[counter] - 861 1) : 0; 862 863 /* Allocate missing empty blocks. */ 864 /* if Sh == 0 then we are getting a new root */ 865 amount_needed = (Sh) ? (tb->blknum[h] - 1) : 1; 866 /* 867 * Amount_needed = the amount that we need more than the 868 * amount that we have. 869 */ 870 if (amount_needed > number_of_freeblk) 871 amount_needed -= number_of_freeblk; 872 else /* If we have enough already then there is nothing to do. */ 873 return CARRY_ON; 874 875 /* 876 * No need to check quota - is not allocated for blocks used 877 * for formatted nodes 878 */ 879 if (reiserfs_new_form_blocknrs(tb, blocknrs, 880 amount_needed) == NO_DISK_SPACE) 881 return NO_DISK_SPACE; 882 883 /* for each blocknumber we just got, get a buffer and stick it on FEB */ 884 for (blocknr = blocknrs, counter = 0; 885 counter < amount_needed; blocknr++, counter++) { 886 887 RFALSE(!*blocknr, 888 "PAP-8135: reiserfs_new_blocknrs failed when got new blocks"); 889 890 new_bh = sb_getblk(sb, *blocknr); 891 RFALSE(buffer_dirty(new_bh) || 892 buffer_journaled(new_bh) || 893 buffer_journal_dirty(new_bh), 894 "PAP-8140: journaled or dirty buffer %b for the new block", 895 new_bh); 896 897 /* Put empty buffers into the array. */ 898 RFALSE(tb->FEB[tb->cur_blknum], 899 "PAP-8141: busy slot for new buffer"); 900 901 set_buffer_journal_new(new_bh); 902 tb->FEB[tb->cur_blknum++] = new_bh; 903 } 904 905 if (retval == CARRY_ON && FILESYSTEM_CHANGED_TB(tb)) 906 retval = REPEAT_SEARCH; 907 908 return retval; 909 } 910 911 /* 912 * Get free space of the left neighbor, which is stored in the parent 913 * node of the left neighbor. 914 */ 915 static int get_lfree(struct tree_balance *tb, int h) 916 { 917 struct buffer_head *l, *f; 918 int order; 919 920 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || 921 (l = tb->FL[h]) == NULL) 922 return 0; 923 924 if (f == l) 925 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) - 1; 926 else { 927 order = B_NR_ITEMS(l); 928 f = l; 929 } 930 931 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 932 } 933 934 /* 935 * Get free space of the right neighbor, 936 * which is stored in the parent node of the right neighbor. 937 */ 938 static int get_rfree(struct tree_balance *tb, int h) 939 { 940 struct buffer_head *r, *f; 941 int order; 942 943 if ((f = PATH_H_PPARENT(tb->tb_path, h)) == NULL || 944 (r = tb->FR[h]) == NULL) 945 return 0; 946 947 if (f == r) 948 order = PATH_H_B_ITEM_ORDER(tb->tb_path, h) + 1; 949 else { 950 order = 0; 951 f = r; 952 } 953 954 return (MAX_CHILD_SIZE(f) - dc_size(B_N_CHILD(f, order))); 955 956 } 957 958 /* Check whether left neighbor is in memory. */ 959 static int is_left_neighbor_in_cache(struct tree_balance *tb, int h) 960 { 961 struct buffer_head *father, *left; 962 struct super_block *sb = tb->tb_sb; 963 b_blocknr_t left_neighbor_blocknr; 964 int left_neighbor_position; 965 966 /* Father of the left neighbor does not exist. */ 967 if (!tb->FL[h]) 968 return 0; 969 970 /* Calculate father of the node to be balanced. */ 971 father = PATH_H_PBUFFER(tb->tb_path, h + 1); 972 973 RFALSE(!father || 974 !B_IS_IN_TREE(father) || 975 !B_IS_IN_TREE(tb->FL[h]) || 976 !buffer_uptodate(father) || 977 !buffer_uptodate(tb->FL[h]), 978 "vs-8165: F[h] (%b) or FL[h] (%b) is invalid", 979 father, tb->FL[h]); 980 981 /* 982 * Get position of the pointer to the left neighbor 983 * into the left father. 984 */ 985 left_neighbor_position = (father == tb->FL[h]) ? 986 tb->lkey[h] : B_NR_ITEMS(tb->FL[h]); 987 /* Get left neighbor block number. */ 988 left_neighbor_blocknr = 989 B_N_CHILD_NUM(tb->FL[h], left_neighbor_position); 990 /* Look for the left neighbor in the cache. */ 991 if ((left = sb_find_get_block(sb, left_neighbor_blocknr))) { 992 993 RFALSE(buffer_uptodate(left) && !B_IS_IN_TREE(left), 994 "vs-8170: left neighbor (%b %z) is not in the tree", 995 left, left); 996 put_bh(left); 997 return 1; 998 } 999 1000 return 0; 1001 } 1002 1003 #define LEFT_PARENTS 'l' 1004 #define RIGHT_PARENTS 'r' 1005 1006 static void decrement_key(struct cpu_key *key) 1007 { 1008 /* call item specific function for this key */ 1009 item_ops[cpu_key_k_type(key)]->decrement_key(key); 1010 } 1011 1012 /* 1013 * Calculate far left/right parent of the left/right neighbor of the 1014 * current node, that is calculate the left/right (FL[h]/FR[h]) neighbor 1015 * of the parent F[h]. 1016 * Calculate left/right common parent of the current node and L[h]/R[h]. 1017 * Calculate left/right delimiting key position. 1018 * Returns: PATH_INCORRECT - path in the tree is not correct 1019 * SCHEDULE_OCCURRED - schedule occurred while the function worked 1020 * CARRY_ON - schedule didn't occur while the function 1021 * worked 1022 */ 1023 static int get_far_parent(struct tree_balance *tb, 1024 int h, 1025 struct buffer_head **pfather, 1026 struct buffer_head **pcom_father, char c_lr_par) 1027 { 1028 struct buffer_head *parent; 1029 INITIALIZE_PATH(s_path_to_neighbor_father); 1030 struct treepath *path = tb->tb_path; 1031 struct cpu_key s_lr_father_key; 1032 int counter, 1033 position = INT_MAX, 1034 first_last_position = 0, 1035 path_offset = PATH_H_PATH_OFFSET(path, h); 1036 1037 /* 1038 * Starting from F[h] go upwards in the tree, and look for the common 1039 * ancestor of F[h], and its neighbor l/r, that should be obtained. 1040 */ 1041 1042 counter = path_offset; 1043 1044 RFALSE(counter < FIRST_PATH_ELEMENT_OFFSET, 1045 "PAP-8180: invalid path length"); 1046 1047 for (; counter > FIRST_PATH_ELEMENT_OFFSET; counter--) { 1048 /* 1049 * Check whether parent of the current buffer in the path 1050 * is really parent in the tree. 1051 */ 1052 if (!B_IS_IN_TREE 1053 (parent = PATH_OFFSET_PBUFFER(path, counter - 1))) 1054 return REPEAT_SEARCH; 1055 1056 /* Check whether position in the parent is correct. */ 1057 if ((position = 1058 PATH_OFFSET_POSITION(path, 1059 counter - 1)) > 1060 B_NR_ITEMS(parent)) 1061 return REPEAT_SEARCH; 1062 1063 /* 1064 * Check whether parent at the path really points 1065 * to the child. 1066 */ 1067 if (B_N_CHILD_NUM(parent, position) != 1068 PATH_OFFSET_PBUFFER(path, counter)->b_blocknr) 1069 return REPEAT_SEARCH; 1070 1071 /* 1072 * Return delimiting key if position in the parent is not 1073 * equal to first/last one. 1074 */ 1075 if (c_lr_par == RIGHT_PARENTS) 1076 first_last_position = B_NR_ITEMS(parent); 1077 if (position != first_last_position) { 1078 *pcom_father = parent; 1079 get_bh(*pcom_father); 1080 /*(*pcom_father = parent)->b_count++; */ 1081 break; 1082 } 1083 } 1084 1085 /* if we are in the root of the tree, then there is no common father */ 1086 if (counter == FIRST_PATH_ELEMENT_OFFSET) { 1087 /* 1088 * Check whether first buffer in the path is the 1089 * root of the tree. 1090 */ 1091 if (PATH_OFFSET_PBUFFER 1092 (tb->tb_path, 1093 FIRST_PATH_ELEMENT_OFFSET)->b_blocknr == 1094 SB_ROOT_BLOCK(tb->tb_sb)) { 1095 *pfather = *pcom_father = NULL; 1096 return CARRY_ON; 1097 } 1098 return REPEAT_SEARCH; 1099 } 1100 1101 RFALSE(B_LEVEL(*pcom_father) <= DISK_LEAF_NODE_LEVEL, 1102 "PAP-8185: (%b %z) level too small", 1103 *pcom_father, *pcom_father); 1104 1105 /* Check whether the common parent is locked. */ 1106 1107 if (buffer_locked(*pcom_father)) { 1108 1109 /* Release the write lock while the buffer is busy */ 1110 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 1111 __wait_on_buffer(*pcom_father); 1112 reiserfs_write_lock_nested(tb->tb_sb, depth); 1113 if (FILESYSTEM_CHANGED_TB(tb)) { 1114 brelse(*pcom_father); 1115 return REPEAT_SEARCH; 1116 } 1117 } 1118 1119 /* 1120 * So, we got common parent of the current node and its 1121 * left/right neighbor. Now we are getting the parent of the 1122 * left/right neighbor. 1123 */ 1124 1125 /* Form key to get parent of the left/right neighbor. */ 1126 le_key2cpu_key(&s_lr_father_key, 1127 internal_key(*pcom_father, 1128 (c_lr_par == 1129 LEFT_PARENTS) ? (tb->lkey[h - 1] = 1130 position - 1131 1) : (tb->rkey[h - 1132 1] = 1133 position))); 1134 1135 if (c_lr_par == LEFT_PARENTS) 1136 decrement_key(&s_lr_father_key); 1137 1138 if (search_by_key 1139 (tb->tb_sb, &s_lr_father_key, &s_path_to_neighbor_father, 1140 h + 1) == IO_ERROR) 1141 /* path is released */ 1142 return IO_ERROR; 1143 1144 if (FILESYSTEM_CHANGED_TB(tb)) { 1145 pathrelse(&s_path_to_neighbor_father); 1146 brelse(*pcom_father); 1147 return REPEAT_SEARCH; 1148 } 1149 1150 *pfather = PATH_PLAST_BUFFER(&s_path_to_neighbor_father); 1151 1152 RFALSE(B_LEVEL(*pfather) != h + 1, 1153 "PAP-8190: (%b %z) level too small", *pfather, *pfather); 1154 RFALSE(s_path_to_neighbor_father.path_length < 1155 FIRST_PATH_ELEMENT_OFFSET, "PAP-8192: path length is too small"); 1156 1157 s_path_to_neighbor_father.path_length--; 1158 pathrelse(&s_path_to_neighbor_father); 1159 return CARRY_ON; 1160 } 1161 1162 /* 1163 * Get parents of neighbors of node in the path(S[path_offset]) and 1164 * common parents of S[path_offset] and L[path_offset]/R[path_offset]: 1165 * F[path_offset], FL[path_offset], FR[path_offset], CFL[path_offset], 1166 * CFR[path_offset]. 1167 * Calculate numbers of left and right delimiting keys position: 1168 * lkey[path_offset], rkey[path_offset]. 1169 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked 1170 * CARRY_ON - schedule didn't occur while the function worked 1171 */ 1172 static int get_parents(struct tree_balance *tb, int h) 1173 { 1174 struct treepath *path = tb->tb_path; 1175 int position, 1176 ret, 1177 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); 1178 struct buffer_head *curf, *curcf; 1179 1180 /* Current node is the root of the tree or will be root of the tree */ 1181 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 1182 /* 1183 * The root can not have parents. 1184 * Release nodes which previously were obtained as 1185 * parents of the current node neighbors. 1186 */ 1187 brelse(tb->FL[h]); 1188 brelse(tb->CFL[h]); 1189 brelse(tb->FR[h]); 1190 brelse(tb->CFR[h]); 1191 tb->FL[h] = NULL; 1192 tb->CFL[h] = NULL; 1193 tb->FR[h] = NULL; 1194 tb->CFR[h] = NULL; 1195 return CARRY_ON; 1196 } 1197 1198 /* Get parent FL[path_offset] of L[path_offset]. */ 1199 position = PATH_OFFSET_POSITION(path, path_offset - 1); 1200 if (position) { 1201 /* Current node is not the first child of its parent. */ 1202 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1203 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1204 get_bh(curf); 1205 get_bh(curf); 1206 tb->lkey[h] = position - 1; 1207 } else { 1208 /* 1209 * Calculate current parent of L[path_offset], which is the 1210 * left neighbor of the current node. Calculate current 1211 * common parent of L[path_offset] and the current node. 1212 * Note that CFL[path_offset] not equal FL[path_offset] and 1213 * CFL[path_offset] not equal F[path_offset]. 1214 * Calculate lkey[path_offset]. 1215 */ 1216 if ((ret = get_far_parent(tb, h + 1, &curf, 1217 &curcf, 1218 LEFT_PARENTS)) != CARRY_ON) 1219 return ret; 1220 } 1221 1222 brelse(tb->FL[h]); 1223 tb->FL[h] = curf; /* New initialization of FL[h]. */ 1224 brelse(tb->CFL[h]); 1225 tb->CFL[h] = curcf; /* New initialization of CFL[h]. */ 1226 1227 RFALSE((curf && !B_IS_IN_TREE(curf)) || 1228 (curcf && !B_IS_IN_TREE(curcf)), 1229 "PAP-8195: FL (%b) or CFL (%b) is invalid", curf, curcf); 1230 1231 /* Get parent FR[h] of R[h]. */ 1232 1233 /* Current node is the last child of F[h]. FR[h] != F[h]. */ 1234 if (position == B_NR_ITEMS(PATH_H_PBUFFER(path, h + 1))) { 1235 /* 1236 * Calculate current parent of R[h], which is the right 1237 * neighbor of F[h]. Calculate current common parent of 1238 * R[h] and current node. Note that CFR[h] not equal 1239 * FR[path_offset] and CFR[h] not equal F[h]. 1240 */ 1241 if ((ret = 1242 get_far_parent(tb, h + 1, &curf, &curcf, 1243 RIGHT_PARENTS)) != CARRY_ON) 1244 return ret; 1245 } else { 1246 /* Current node is not the last child of its parent F[h]. */ 1247 curf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1248 curcf = PATH_OFFSET_PBUFFER(path, path_offset - 1); 1249 get_bh(curf); 1250 get_bh(curf); 1251 tb->rkey[h] = position; 1252 } 1253 1254 brelse(tb->FR[h]); 1255 /* New initialization of FR[path_offset]. */ 1256 tb->FR[h] = curf; 1257 1258 brelse(tb->CFR[h]); 1259 /* New initialization of CFR[path_offset]. */ 1260 tb->CFR[h] = curcf; 1261 1262 RFALSE((curf && !B_IS_IN_TREE(curf)) || 1263 (curcf && !B_IS_IN_TREE(curcf)), 1264 "PAP-8205: FR (%b) or CFR (%b) is invalid", curf, curcf); 1265 1266 return CARRY_ON; 1267 } 1268 1269 /* 1270 * it is possible to remove node as result of shiftings to 1271 * neighbors even when we insert or paste item. 1272 */ 1273 static inline int can_node_be_removed(int mode, int lfree, int sfree, int rfree, 1274 struct tree_balance *tb, int h) 1275 { 1276 struct buffer_head *Sh = PATH_H_PBUFFER(tb->tb_path, h); 1277 int levbytes = tb->insert_size[h]; 1278 struct item_head *ih; 1279 struct reiserfs_key *r_key = NULL; 1280 1281 ih = item_head(Sh, 0); 1282 if (tb->CFR[h]) 1283 r_key = internal_key(tb->CFR[h], tb->rkey[h]); 1284 1285 if (lfree + rfree + sfree < MAX_CHILD_SIZE(Sh) + levbytes 1286 /* shifting may merge items which might save space */ 1287 - 1288 ((!h 1289 && op_is_left_mergeable(&ih->ih_key, Sh->b_size)) ? IH_SIZE : 0) 1290 - 1291 ((!h && r_key 1292 && op_is_left_mergeable(r_key, Sh->b_size)) ? IH_SIZE : 0) 1293 + ((h) ? KEY_SIZE : 0)) { 1294 /* node can not be removed */ 1295 if (sfree >= levbytes) { 1296 /* new item fits into node S[h] without any shifting */ 1297 if (!h) 1298 tb->s0num = 1299 B_NR_ITEMS(Sh) + 1300 ((mode == M_INSERT) ? 1 : 0); 1301 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1302 return NO_BALANCING_NEEDED; 1303 } 1304 } 1305 PROC_INFO_INC(tb->tb_sb, can_node_be_removed[h]); 1306 return !NO_BALANCING_NEEDED; 1307 } 1308 1309 /* 1310 * Check whether current node S[h] is balanced when increasing its size by 1311 * Inserting or Pasting. 1312 * Calculate parameters for balancing for current level h. 1313 * Parameters: 1314 * tb tree_balance structure; 1315 * h current level of the node; 1316 * inum item number in S[h]; 1317 * mode i - insert, p - paste; 1318 * Returns: 1 - schedule occurred; 1319 * 0 - balancing for higher levels needed; 1320 * -1 - no balancing for higher levels needed; 1321 * -2 - no disk space. 1322 */ 1323 /* ip means Inserting or Pasting */ 1324 static int ip_check_balance(struct tree_balance *tb, int h) 1325 { 1326 struct virtual_node *vn = tb->tb_vn; 1327 /* 1328 * Number of bytes that must be inserted into (value is negative 1329 * if bytes are deleted) buffer which contains node being balanced. 1330 * The mnemonic is that the attempted change in node space used 1331 * level is levbytes bytes. 1332 */ 1333 int levbytes; 1334 int ret; 1335 1336 int lfree, sfree, rfree /* free space in L, S and R */ ; 1337 1338 /* 1339 * nver is short for number of vertixes, and lnver is the number if 1340 * we shift to the left, rnver is the number if we shift to the 1341 * right, and lrnver is the number if we shift in both directions. 1342 * The goal is to minimize first the number of vertixes, and second, 1343 * the number of vertixes whose contents are changed by shifting, 1344 * and third the number of uncached vertixes whose contents are 1345 * changed by shifting and must be read from disk. 1346 */ 1347 int nver, lnver, rnver, lrnver; 1348 1349 /* 1350 * used at leaf level only, S0 = S[0] is the node being balanced, 1351 * sInum [ I = 0,1,2 ] is the number of items that will 1352 * remain in node SI after balancing. S1 and S2 are new 1353 * nodes that might be created. 1354 */ 1355 1356 /* 1357 * we perform 8 calls to get_num_ver(). For each call we 1358 * calculate five parameters. where 4th parameter is s1bytes 1359 * and 5th - s2bytes 1360 * 1361 * s0num, s1num, s2num for 8 cases 1362 * 0,1 - do not shift and do not shift but bottle 1363 * 2 - shift only whole item to left 1364 * 3 - shift to left and bottle as much as possible 1365 * 4,5 - shift to right (whole items and as much as possible 1366 * 6,7 - shift to both directions (whole items and as much as possible) 1367 */ 1368 short snum012[40] = { 0, }; 1369 1370 /* Sh is the node whose balance is currently being checked */ 1371 struct buffer_head *Sh; 1372 1373 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1374 levbytes = tb->insert_size[h]; 1375 1376 /* Calculate balance parameters for creating new root. */ 1377 if (!Sh) { 1378 if (!h) 1379 reiserfs_panic(tb->tb_sb, "vs-8210", 1380 "S[0] can not be 0"); 1381 switch (ret = get_empty_nodes(tb, h)) { 1382 /* no balancing for higher levels needed */ 1383 case CARRY_ON: 1384 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1385 return NO_BALANCING_NEEDED; 1386 1387 case NO_DISK_SPACE: 1388 case REPEAT_SEARCH: 1389 return ret; 1390 default: 1391 reiserfs_panic(tb->tb_sb, "vs-8215", "incorrect " 1392 "return value of get_empty_nodes"); 1393 } 1394 } 1395 1396 /* get parents of S[h] neighbors. */ 1397 ret = get_parents(tb, h); 1398 if (ret != CARRY_ON) 1399 return ret; 1400 1401 sfree = B_FREE_SPACE(Sh); 1402 1403 /* get free space of neighbors */ 1404 rfree = get_rfree(tb, h); 1405 lfree = get_lfree(tb, h); 1406 1407 /* and new item fits into node S[h] without any shifting */ 1408 if (can_node_be_removed(vn->vn_mode, lfree, sfree, rfree, tb, h) == 1409 NO_BALANCING_NEEDED) 1410 return NO_BALANCING_NEEDED; 1411 1412 create_virtual_node(tb, h); 1413 1414 /* 1415 * determine maximal number of items we can shift to the left 1416 * neighbor (in tb structure) and the maximal number of bytes 1417 * that can flow to the left neighbor from the left most liquid 1418 * item that cannot be shifted from S[0] entirely (returned value) 1419 */ 1420 check_left(tb, h, lfree); 1421 1422 /* 1423 * determine maximal number of items we can shift to the right 1424 * neighbor (in tb structure) and the maximal number of bytes 1425 * that can flow to the right neighbor from the right most liquid 1426 * item that cannot be shifted from S[0] entirely (returned value) 1427 */ 1428 check_right(tb, h, rfree); 1429 1430 /* 1431 * all contents of internal node S[h] can be moved into its 1432 * neighbors, S[h] will be removed after balancing 1433 */ 1434 if (h && (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1)) { 1435 int to_r; 1436 1437 /* 1438 * Since we are working on internal nodes, and our internal 1439 * nodes have fixed size entries, then we can balance by the 1440 * number of items rather than the space they consume. In this 1441 * routine we set the left node equal to the right node, 1442 * allowing a difference of less than or equal to 1 child 1443 * pointer. 1444 */ 1445 to_r = 1446 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1447 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1448 tb->rnum[h]); 1449 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1450 -1, -1); 1451 return CARRY_ON; 1452 } 1453 1454 /* 1455 * this checks balance condition, that any two neighboring nodes 1456 * can not fit in one node 1457 */ 1458 RFALSE(h && 1459 (tb->lnum[h] >= vn->vn_nr_item + 1 || 1460 tb->rnum[h] >= vn->vn_nr_item + 1), 1461 "vs-8220: tree is not balanced on internal level"); 1462 RFALSE(!h && ((tb->lnum[h] >= vn->vn_nr_item && (tb->lbytes == -1)) || 1463 (tb->rnum[h] >= vn->vn_nr_item && (tb->rbytes == -1))), 1464 "vs-8225: tree is not balanced on leaf level"); 1465 1466 /* 1467 * all contents of S[0] can be moved into its neighbors 1468 * S[0] will be removed after balancing. 1469 */ 1470 if (!h && is_leaf_removable(tb)) 1471 return CARRY_ON; 1472 1473 /* 1474 * why do we perform this check here rather than earlier?? 1475 * Answer: we can win 1 node in some cases above. Moreover we 1476 * checked it above, when we checked, that S[0] is not removable 1477 * in principle 1478 */ 1479 1480 /* new item fits into node S[h] without any shifting */ 1481 if (sfree >= levbytes) { 1482 if (!h) 1483 tb->s0num = vn->vn_nr_item; 1484 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1485 return NO_BALANCING_NEEDED; 1486 } 1487 1488 { 1489 int lpar, rpar, nset, lset, rset, lrset; 1490 /* regular overflowing of the node */ 1491 1492 /* 1493 * get_num_ver works in 2 modes (FLOW & NO_FLOW) 1494 * lpar, rpar - number of items we can shift to left/right 1495 * neighbor (including splitting item) 1496 * nset, lset, rset, lrset - shows, whether flowing items 1497 * give better packing 1498 */ 1499 #define FLOW 1 1500 #define NO_FLOW 0 /* do not any splitting */ 1501 1502 /* we choose one of the following */ 1503 #define NOTHING_SHIFT_NO_FLOW 0 1504 #define NOTHING_SHIFT_FLOW 5 1505 #define LEFT_SHIFT_NO_FLOW 10 1506 #define LEFT_SHIFT_FLOW 15 1507 #define RIGHT_SHIFT_NO_FLOW 20 1508 #define RIGHT_SHIFT_FLOW 25 1509 #define LR_SHIFT_NO_FLOW 30 1510 #define LR_SHIFT_FLOW 35 1511 1512 lpar = tb->lnum[h]; 1513 rpar = tb->rnum[h]; 1514 1515 /* 1516 * calculate number of blocks S[h] must be split into when 1517 * nothing is shifted to the neighbors, as well as number of 1518 * items in each part of the split node (s012 numbers), 1519 * and number of bytes (s1bytes) of the shared drop which 1520 * flow to S1 if any 1521 */ 1522 nset = NOTHING_SHIFT_NO_FLOW; 1523 nver = get_num_ver(vn->vn_mode, tb, h, 1524 0, -1, h ? vn->vn_nr_item : 0, -1, 1525 snum012, NO_FLOW); 1526 1527 if (!h) { 1528 int nver1; 1529 1530 /* 1531 * note, that in this case we try to bottle 1532 * between S[0] and S1 (S1 - the first new node) 1533 */ 1534 nver1 = get_num_ver(vn->vn_mode, tb, h, 1535 0, -1, 0, -1, 1536 snum012 + NOTHING_SHIFT_FLOW, FLOW); 1537 if (nver > nver1) 1538 nset = NOTHING_SHIFT_FLOW, nver = nver1; 1539 } 1540 1541 /* 1542 * calculate number of blocks S[h] must be split into when 1543 * l_shift_num first items and l_shift_bytes of the right 1544 * most liquid item to be shifted are shifted to the left 1545 * neighbor, as well as number of items in each part of the 1546 * splitted node (s012 numbers), and number of bytes 1547 * (s1bytes) of the shared drop which flow to S1 if any 1548 */ 1549 lset = LEFT_SHIFT_NO_FLOW; 1550 lnver = get_num_ver(vn->vn_mode, tb, h, 1551 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1552 -1, h ? vn->vn_nr_item : 0, -1, 1553 snum012 + LEFT_SHIFT_NO_FLOW, NO_FLOW); 1554 if (!h) { 1555 int lnver1; 1556 1557 lnver1 = get_num_ver(vn->vn_mode, tb, h, 1558 lpar - 1559 ((tb->lbytes != -1) ? 1 : 0), 1560 tb->lbytes, 0, -1, 1561 snum012 + LEFT_SHIFT_FLOW, FLOW); 1562 if (lnver > lnver1) 1563 lset = LEFT_SHIFT_FLOW, lnver = lnver1; 1564 } 1565 1566 /* 1567 * calculate number of blocks S[h] must be split into when 1568 * r_shift_num first items and r_shift_bytes of the left most 1569 * liquid item to be shifted are shifted to the right neighbor, 1570 * as well as number of items in each part of the splitted 1571 * node (s012 numbers), and number of bytes (s1bytes) of the 1572 * shared drop which flow to S1 if any 1573 */ 1574 rset = RIGHT_SHIFT_NO_FLOW; 1575 rnver = get_num_ver(vn->vn_mode, tb, h, 1576 0, -1, 1577 h ? (vn->vn_nr_item - rpar) : (rpar - 1578 ((tb-> 1579 rbytes != 1580 -1) ? 1 : 1581 0)), -1, 1582 snum012 + RIGHT_SHIFT_NO_FLOW, NO_FLOW); 1583 if (!h) { 1584 int rnver1; 1585 1586 rnver1 = get_num_ver(vn->vn_mode, tb, h, 1587 0, -1, 1588 (rpar - 1589 ((tb->rbytes != -1) ? 1 : 0)), 1590 tb->rbytes, 1591 snum012 + RIGHT_SHIFT_FLOW, FLOW); 1592 1593 if (rnver > rnver1) 1594 rset = RIGHT_SHIFT_FLOW, rnver = rnver1; 1595 } 1596 1597 /* 1598 * calculate number of blocks S[h] must be split into when 1599 * items are shifted in both directions, as well as number 1600 * of items in each part of the splitted node (s012 numbers), 1601 * and number of bytes (s1bytes) of the shared drop which 1602 * flow to S1 if any 1603 */ 1604 lrset = LR_SHIFT_NO_FLOW; 1605 lrnver = get_num_ver(vn->vn_mode, tb, h, 1606 lpar - ((h || tb->lbytes == -1) ? 0 : 1), 1607 -1, 1608 h ? (vn->vn_nr_item - rpar) : (rpar - 1609 ((tb-> 1610 rbytes != 1611 -1) ? 1 : 1612 0)), -1, 1613 snum012 + LR_SHIFT_NO_FLOW, NO_FLOW); 1614 if (!h) { 1615 int lrnver1; 1616 1617 lrnver1 = get_num_ver(vn->vn_mode, tb, h, 1618 lpar - 1619 ((tb->lbytes != -1) ? 1 : 0), 1620 tb->lbytes, 1621 (rpar - 1622 ((tb->rbytes != -1) ? 1 : 0)), 1623 tb->rbytes, 1624 snum012 + LR_SHIFT_FLOW, FLOW); 1625 if (lrnver > lrnver1) 1626 lrset = LR_SHIFT_FLOW, lrnver = lrnver1; 1627 } 1628 1629 /* 1630 * Our general shifting strategy is: 1631 * 1) to minimized number of new nodes; 1632 * 2) to minimized number of neighbors involved in shifting; 1633 * 3) to minimized number of disk reads; 1634 */ 1635 1636 /* we can win TWO or ONE nodes by shifting in both directions */ 1637 if (lrnver < lnver && lrnver < rnver) { 1638 RFALSE(h && 1639 (tb->lnum[h] != 1 || 1640 tb->rnum[h] != 1 || 1641 lrnver != 1 || rnver != 2 || lnver != 2 1642 || h != 1), "vs-8230: bad h"); 1643 if (lrset == LR_SHIFT_FLOW) 1644 set_parameters(tb, h, tb->lnum[h], tb->rnum[h], 1645 lrnver, snum012 + lrset, 1646 tb->lbytes, tb->rbytes); 1647 else 1648 set_parameters(tb, h, 1649 tb->lnum[h] - 1650 ((tb->lbytes == -1) ? 0 : 1), 1651 tb->rnum[h] - 1652 ((tb->rbytes == -1) ? 0 : 1), 1653 lrnver, snum012 + lrset, -1, -1); 1654 1655 return CARRY_ON; 1656 } 1657 1658 /* 1659 * if shifting doesn't lead to better packing 1660 * then don't shift 1661 */ 1662 if (nver == lrnver) { 1663 set_parameters(tb, h, 0, 0, nver, snum012 + nset, -1, 1664 -1); 1665 return CARRY_ON; 1666 } 1667 1668 /* 1669 * now we know that for better packing shifting in only one 1670 * direction either to the left or to the right is required 1671 */ 1672 1673 /* 1674 * if shifting to the left is better than 1675 * shifting to the right 1676 */ 1677 if (lnver < rnver) { 1678 SET_PAR_SHIFT_LEFT; 1679 return CARRY_ON; 1680 } 1681 1682 /* 1683 * if shifting to the right is better than 1684 * shifting to the left 1685 */ 1686 if (lnver > rnver) { 1687 SET_PAR_SHIFT_RIGHT; 1688 return CARRY_ON; 1689 } 1690 1691 /* 1692 * now shifting in either direction gives the same number 1693 * of nodes and we can make use of the cached neighbors 1694 */ 1695 if (is_left_neighbor_in_cache(tb, h)) { 1696 SET_PAR_SHIFT_LEFT; 1697 return CARRY_ON; 1698 } 1699 1700 /* 1701 * shift to the right independently on whether the 1702 * right neighbor in cache or not 1703 */ 1704 SET_PAR_SHIFT_RIGHT; 1705 return CARRY_ON; 1706 } 1707 } 1708 1709 /* 1710 * Check whether current node S[h] is balanced when Decreasing its size by 1711 * Deleting or Cutting for INTERNAL node of S+tree. 1712 * Calculate parameters for balancing for current level h. 1713 * Parameters: 1714 * tb tree_balance structure; 1715 * h current level of the node; 1716 * inum item number in S[h]; 1717 * mode i - insert, p - paste; 1718 * Returns: 1 - schedule occurred; 1719 * 0 - balancing for higher levels needed; 1720 * -1 - no balancing for higher levels needed; 1721 * -2 - no disk space. 1722 * 1723 * Note: Items of internal nodes have fixed size, so the balance condition for 1724 * the internal part of S+tree is as for the B-trees. 1725 */ 1726 static int dc_check_balance_internal(struct tree_balance *tb, int h) 1727 { 1728 struct virtual_node *vn = tb->tb_vn; 1729 1730 /* 1731 * Sh is the node whose balance is currently being checked, 1732 * and Fh is its father. 1733 */ 1734 struct buffer_head *Sh, *Fh; 1735 int ret; 1736 int lfree, rfree /* free space in L and R */ ; 1737 1738 Sh = PATH_H_PBUFFER(tb->tb_path, h); 1739 Fh = PATH_H_PPARENT(tb->tb_path, h); 1740 1741 /* 1742 * using tb->insert_size[h], which is negative in this case, 1743 * create_virtual_node calculates: 1744 * new_nr_item = number of items node would have if operation is 1745 * performed without balancing (new_nr_item); 1746 */ 1747 create_virtual_node(tb, h); 1748 1749 if (!Fh) { /* S[h] is the root. */ 1750 /* no balancing for higher levels needed */ 1751 if (vn->vn_nr_item > 0) { 1752 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1753 return NO_BALANCING_NEEDED; 1754 } 1755 /* 1756 * new_nr_item == 0. 1757 * Current root will be deleted resulting in 1758 * decrementing the tree height. 1759 */ 1760 set_parameters(tb, h, 0, 0, 0, NULL, -1, -1); 1761 return CARRY_ON; 1762 } 1763 1764 if ((ret = get_parents(tb, h)) != CARRY_ON) 1765 return ret; 1766 1767 /* get free space of neighbors */ 1768 rfree = get_rfree(tb, h); 1769 lfree = get_lfree(tb, h); 1770 1771 /* determine maximal number of items we can fit into neighbors */ 1772 check_left(tb, h, lfree); 1773 check_right(tb, h, rfree); 1774 1775 /* 1776 * Balance condition for the internal node is valid. 1777 * In this case we balance only if it leads to better packing. 1778 */ 1779 if (vn->vn_nr_item >= MIN_NR_KEY(Sh)) { 1780 /* 1781 * Here we join S[h] with one of its neighbors, 1782 * which is impossible with greater values of new_nr_item. 1783 */ 1784 if (vn->vn_nr_item == MIN_NR_KEY(Sh)) { 1785 /* All contents of S[h] can be moved to L[h]. */ 1786 if (tb->lnum[h] >= vn->vn_nr_item + 1) { 1787 int n; 1788 int order_L; 1789 1790 order_L = 1791 ((n = 1792 PATH_H_B_ITEM_ORDER(tb->tb_path, 1793 h)) == 1794 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1795 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / 1796 (DC_SIZE + KEY_SIZE); 1797 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, 1798 -1); 1799 return CARRY_ON; 1800 } 1801 1802 /* All contents of S[h] can be moved to R[h]. */ 1803 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1804 int n; 1805 int order_R; 1806 1807 order_R = 1808 ((n = 1809 PATH_H_B_ITEM_ORDER(tb->tb_path, 1810 h)) == 1811 B_NR_ITEMS(Fh)) ? 0 : n + 1; 1812 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / 1813 (DC_SIZE + KEY_SIZE); 1814 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, 1815 -1); 1816 return CARRY_ON; 1817 } 1818 } 1819 1820 /* 1821 * All contents of S[h] can be moved to the neighbors 1822 * (L[h] & R[h]). 1823 */ 1824 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1825 int to_r; 1826 1827 to_r = 1828 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - 1829 tb->rnum[h] + vn->vn_nr_item + 1) / 2 - 1830 (MAX_NR_KEY(Sh) + 1 - tb->rnum[h]); 1831 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 1832 0, NULL, -1, -1); 1833 return CARRY_ON; 1834 } 1835 1836 /* Balancing does not lead to better packing. */ 1837 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1838 return NO_BALANCING_NEEDED; 1839 } 1840 1841 /* 1842 * Current node contain insufficient number of items. 1843 * Balancing is required. 1844 */ 1845 /* Check whether we can merge S[h] with left neighbor. */ 1846 if (tb->lnum[h] >= vn->vn_nr_item + 1) 1847 if (is_left_neighbor_in_cache(tb, h) 1848 || tb->rnum[h] < vn->vn_nr_item + 1 || !tb->FR[h]) { 1849 int n; 1850 int order_L; 1851 1852 order_L = 1853 ((n = 1854 PATH_H_B_ITEM_ORDER(tb->tb_path, 1855 h)) == 1856 0) ? B_NR_ITEMS(tb->FL[h]) : n - 1; 1857 n = dc_size(B_N_CHILD(tb->FL[h], order_L)) / (DC_SIZE + 1858 KEY_SIZE); 1859 set_parameters(tb, h, -n - 1, 0, 0, NULL, -1, -1); 1860 return CARRY_ON; 1861 } 1862 1863 /* Check whether we can merge S[h] with right neighbor. */ 1864 if (tb->rnum[h] >= vn->vn_nr_item + 1) { 1865 int n; 1866 int order_R; 1867 1868 order_R = 1869 ((n = 1870 PATH_H_B_ITEM_ORDER(tb->tb_path, 1871 h)) == B_NR_ITEMS(Fh)) ? 0 : (n + 1); 1872 n = dc_size(B_N_CHILD(tb->FR[h], order_R)) / (DC_SIZE + 1873 KEY_SIZE); 1874 set_parameters(tb, h, 0, -n - 1, 0, NULL, -1, -1); 1875 return CARRY_ON; 1876 } 1877 1878 /* All contents of S[h] can be moved to the neighbors (L[h] & R[h]). */ 1879 if (tb->rnum[h] + tb->lnum[h] >= vn->vn_nr_item + 1) { 1880 int to_r; 1881 1882 to_r = 1883 ((MAX_NR_KEY(Sh) << 1) + 2 - tb->lnum[h] - tb->rnum[h] + 1884 vn->vn_nr_item + 1) / 2 - (MAX_NR_KEY(Sh) + 1 - 1885 tb->rnum[h]); 1886 set_parameters(tb, h, vn->vn_nr_item + 1 - to_r, to_r, 0, NULL, 1887 -1, -1); 1888 return CARRY_ON; 1889 } 1890 1891 /* For internal nodes try to borrow item from a neighbor */ 1892 RFALSE(!tb->FL[h] && !tb->FR[h], "vs-8235: trying to borrow for root"); 1893 1894 /* Borrow one or two items from caching neighbor */ 1895 if (is_left_neighbor_in_cache(tb, h) || !tb->FR[h]) { 1896 int from_l; 1897 1898 from_l = 1899 (MAX_NR_KEY(Sh) + 1 - tb->lnum[h] + vn->vn_nr_item + 1900 1) / 2 - (vn->vn_nr_item + 1); 1901 set_parameters(tb, h, -from_l, 0, 1, NULL, -1, -1); 1902 return CARRY_ON; 1903 } 1904 1905 set_parameters(tb, h, 0, 1906 -((MAX_NR_KEY(Sh) + 1 - tb->rnum[h] + vn->vn_nr_item + 1907 1) / 2 - (vn->vn_nr_item + 1)), 1, NULL, -1, -1); 1908 return CARRY_ON; 1909 } 1910 1911 /* 1912 * Check whether current node S[h] is balanced when Decreasing its size by 1913 * Deleting or Truncating for LEAF node of S+tree. 1914 * Calculate parameters for balancing for current level h. 1915 * Parameters: 1916 * tb tree_balance structure; 1917 * h current level of the node; 1918 * inum item number in S[h]; 1919 * mode i - insert, p - paste; 1920 * Returns: 1 - schedule occurred; 1921 * 0 - balancing for higher levels needed; 1922 * -1 - no balancing for higher levels needed; 1923 * -2 - no disk space. 1924 */ 1925 static int dc_check_balance_leaf(struct tree_balance *tb, int h) 1926 { 1927 struct virtual_node *vn = tb->tb_vn; 1928 1929 /* 1930 * Number of bytes that must be deleted from 1931 * (value is negative if bytes are deleted) buffer which 1932 * contains node being balanced. The mnemonic is that the 1933 * attempted change in node space used level is levbytes bytes. 1934 */ 1935 int levbytes; 1936 1937 /* the maximal item size */ 1938 int maxsize, ret; 1939 1940 /* 1941 * S0 is the node whose balance is currently being checked, 1942 * and F0 is its father. 1943 */ 1944 struct buffer_head *S0, *F0; 1945 int lfree, rfree /* free space in L and R */ ; 1946 1947 S0 = PATH_H_PBUFFER(tb->tb_path, 0); 1948 F0 = PATH_H_PPARENT(tb->tb_path, 0); 1949 1950 levbytes = tb->insert_size[h]; 1951 1952 maxsize = MAX_CHILD_SIZE(S0); /* maximal possible size of an item */ 1953 1954 if (!F0) { /* S[0] is the root now. */ 1955 1956 RFALSE(-levbytes >= maxsize - B_FREE_SPACE(S0), 1957 "vs-8240: attempt to create empty buffer tree"); 1958 1959 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 1960 return NO_BALANCING_NEEDED; 1961 } 1962 1963 if ((ret = get_parents(tb, h)) != CARRY_ON) 1964 return ret; 1965 1966 /* get free space of neighbors */ 1967 rfree = get_rfree(tb, h); 1968 lfree = get_lfree(tb, h); 1969 1970 create_virtual_node(tb, h); 1971 1972 /* if 3 leaves can be merge to one, set parameters and return */ 1973 if (are_leaves_removable(tb, lfree, rfree)) 1974 return CARRY_ON; 1975 1976 /* 1977 * determine maximal number of items we can shift to the left/right 1978 * neighbor and the maximal number of bytes that can flow to the 1979 * left/right neighbor from the left/right most liquid item that 1980 * cannot be shifted from S[0] entirely 1981 */ 1982 check_left(tb, h, lfree); 1983 check_right(tb, h, rfree); 1984 1985 /* check whether we can merge S with left neighbor. */ 1986 if (tb->lnum[0] >= vn->vn_nr_item && tb->lbytes == -1) 1987 if (is_left_neighbor_in_cache(tb, h) || ((tb->rnum[0] - ((tb->rbytes == -1) ? 0 : 1)) < vn->vn_nr_item) || /* S can not be merged with R */ 1988 !tb->FR[h]) { 1989 1990 RFALSE(!tb->FL[h], 1991 "vs-8245: dc_check_balance_leaf: FL[h] must exist"); 1992 1993 /* set parameter to merge S[0] with its left neighbor */ 1994 set_parameters(tb, h, -1, 0, 0, NULL, -1, -1); 1995 return CARRY_ON; 1996 } 1997 1998 /* check whether we can merge S[0] with right neighbor. */ 1999 if (tb->rnum[0] >= vn->vn_nr_item && tb->rbytes == -1) { 2000 set_parameters(tb, h, 0, -1, 0, NULL, -1, -1); 2001 return CARRY_ON; 2002 } 2003 2004 /* 2005 * All contents of S[0] can be moved to the neighbors (L[0] & R[0]). 2006 * Set parameters and return 2007 */ 2008 if (is_leaf_removable(tb)) 2009 return CARRY_ON; 2010 2011 /* Balancing is not required. */ 2012 tb->s0num = vn->vn_nr_item; 2013 set_parameters(tb, h, 0, 0, 1, NULL, -1, -1); 2014 return NO_BALANCING_NEEDED; 2015 } 2016 2017 /* 2018 * Check whether current node S[h] is balanced when Decreasing its size by 2019 * Deleting or Cutting. 2020 * Calculate parameters for balancing for current level h. 2021 * Parameters: 2022 * tb tree_balance structure; 2023 * h current level of the node; 2024 * inum item number in S[h]; 2025 * mode d - delete, c - cut. 2026 * Returns: 1 - schedule occurred; 2027 * 0 - balancing for higher levels needed; 2028 * -1 - no balancing for higher levels needed; 2029 * -2 - no disk space. 2030 */ 2031 static int dc_check_balance(struct tree_balance *tb, int h) 2032 { 2033 RFALSE(!(PATH_H_PBUFFER(tb->tb_path, h)), 2034 "vs-8250: S is not initialized"); 2035 2036 if (h) 2037 return dc_check_balance_internal(tb, h); 2038 else 2039 return dc_check_balance_leaf(tb, h); 2040 } 2041 2042 /* 2043 * Check whether current node S[h] is balanced. 2044 * Calculate parameters for balancing for current level h. 2045 * Parameters: 2046 * 2047 * tb tree_balance structure: 2048 * 2049 * tb is a large structure that must be read about in the header 2050 * file at the same time as this procedure if the reader is 2051 * to successfully understand this procedure 2052 * 2053 * h current level of the node; 2054 * inum item number in S[h]; 2055 * mode i - insert, p - paste, d - delete, c - cut. 2056 * Returns: 1 - schedule occurred; 2057 * 0 - balancing for higher levels needed; 2058 * -1 - no balancing for higher levels needed; 2059 * -2 - no disk space. 2060 */ 2061 static int check_balance(int mode, 2062 struct tree_balance *tb, 2063 int h, 2064 int inum, 2065 int pos_in_item, 2066 struct item_head *ins_ih, const void *data) 2067 { 2068 struct virtual_node *vn; 2069 2070 vn = tb->tb_vn = (struct virtual_node *)(tb->vn_buf); 2071 vn->vn_free_ptr = (char *)(tb->tb_vn + 1); 2072 vn->vn_mode = mode; 2073 vn->vn_affected_item_num = inum; 2074 vn->vn_pos_in_item = pos_in_item; 2075 vn->vn_ins_ih = ins_ih; 2076 vn->vn_data = data; 2077 2078 RFALSE(mode == M_INSERT && !vn->vn_ins_ih, 2079 "vs-8255: ins_ih can not be 0 in insert mode"); 2080 2081 /* Calculate balance parameters when size of node is increasing. */ 2082 if (tb->insert_size[h] > 0) 2083 return ip_check_balance(tb, h); 2084 2085 /* Calculate balance parameters when size of node is decreasing. */ 2086 return dc_check_balance(tb, h); 2087 } 2088 2089 /* Check whether parent at the path is the really parent of the current node.*/ 2090 static int get_direct_parent(struct tree_balance *tb, int h) 2091 { 2092 struct buffer_head *bh; 2093 struct treepath *path = tb->tb_path; 2094 int position, 2095 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h); 2096 2097 /* We are in the root or in the new root. */ 2098 if (path_offset <= FIRST_PATH_ELEMENT_OFFSET) { 2099 2100 RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET - 1, 2101 "PAP-8260: invalid offset in the path"); 2102 2103 if (PATH_OFFSET_PBUFFER(path, FIRST_PATH_ELEMENT_OFFSET)-> 2104 b_blocknr == SB_ROOT_BLOCK(tb->tb_sb)) { 2105 /* Root is not changed. */ 2106 PATH_OFFSET_PBUFFER(path, path_offset - 1) = NULL; 2107 PATH_OFFSET_POSITION(path, path_offset - 1) = 0; 2108 return CARRY_ON; 2109 } 2110 /* Root is changed and we must recalculate the path. */ 2111 return REPEAT_SEARCH; 2112 } 2113 2114 /* Parent in the path is not in the tree. */ 2115 if (!B_IS_IN_TREE 2116 (bh = PATH_OFFSET_PBUFFER(path, path_offset - 1))) 2117 return REPEAT_SEARCH; 2118 2119 if ((position = 2120 PATH_OFFSET_POSITION(path, 2121 path_offset - 1)) > B_NR_ITEMS(bh)) 2122 return REPEAT_SEARCH; 2123 2124 /* Parent in the path is not parent of the current node in the tree. */ 2125 if (B_N_CHILD_NUM(bh, position) != 2126 PATH_OFFSET_PBUFFER(path, path_offset)->b_blocknr) 2127 return REPEAT_SEARCH; 2128 2129 if (buffer_locked(bh)) { 2130 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 2131 __wait_on_buffer(bh); 2132 reiserfs_write_lock_nested(tb->tb_sb, depth); 2133 if (FILESYSTEM_CHANGED_TB(tb)) 2134 return REPEAT_SEARCH; 2135 } 2136 2137 /* 2138 * Parent in the path is unlocked and really parent 2139 * of the current node. 2140 */ 2141 return CARRY_ON; 2142 } 2143 2144 /* 2145 * Using lnum[h] and rnum[h] we should determine what neighbors 2146 * of S[h] we 2147 * need in order to balance S[h], and get them if necessary. 2148 * Returns: SCHEDULE_OCCURRED - schedule occurred while the function worked; 2149 * CARRY_ON - schedule didn't occur while the function worked; 2150 */ 2151 static int get_neighbors(struct tree_balance *tb, int h) 2152 { 2153 int child_position, 2154 path_offset = PATH_H_PATH_OFFSET(tb->tb_path, h + 1); 2155 unsigned long son_number; 2156 struct super_block *sb = tb->tb_sb; 2157 struct buffer_head *bh; 2158 int depth; 2159 2160 PROC_INFO_INC(sb, get_neighbors[h]); 2161 2162 if (tb->lnum[h]) { 2163 /* We need left neighbor to balance S[h]. */ 2164 PROC_INFO_INC(sb, need_l_neighbor[h]); 2165 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2166 2167 RFALSE(bh == tb->FL[h] && 2168 !PATH_OFFSET_POSITION(tb->tb_path, path_offset), 2169 "PAP-8270: invalid position in the parent"); 2170 2171 child_position = 2172 (bh == 2173 tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> 2174 FL[h]); 2175 son_number = B_N_CHILD_NUM(tb->FL[h], child_position); 2176 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2177 bh = sb_bread(sb, son_number); 2178 reiserfs_write_lock_nested(tb->tb_sb, depth); 2179 if (!bh) 2180 return IO_ERROR; 2181 if (FILESYSTEM_CHANGED_TB(tb)) { 2182 brelse(bh); 2183 PROC_INFO_INC(sb, get_neighbors_restart[h]); 2184 return REPEAT_SEARCH; 2185 } 2186 2187 RFALSE(!B_IS_IN_TREE(tb->FL[h]) || 2188 child_position > B_NR_ITEMS(tb->FL[h]) || 2189 B_N_CHILD_NUM(tb->FL[h], child_position) != 2190 bh->b_blocknr, "PAP-8275: invalid parent"); 2191 RFALSE(!B_IS_IN_TREE(bh), "PAP-8280: invalid child"); 2192 RFALSE(!h && 2193 B_FREE_SPACE(bh) != 2194 MAX_CHILD_SIZE(bh) - 2195 dc_size(B_N_CHILD(tb->FL[0], child_position)), 2196 "PAP-8290: invalid child size of left neighbor"); 2197 2198 brelse(tb->L[h]); 2199 tb->L[h] = bh; 2200 } 2201 2202 /* We need right neighbor to balance S[path_offset]. */ 2203 if (tb->rnum[h]) { 2204 PROC_INFO_INC(sb, need_r_neighbor[h]); 2205 bh = PATH_OFFSET_PBUFFER(tb->tb_path, path_offset); 2206 2207 RFALSE(bh == tb->FR[h] && 2208 PATH_OFFSET_POSITION(tb->tb_path, 2209 path_offset) >= 2210 B_NR_ITEMS(bh), 2211 "PAP-8295: invalid position in the parent"); 2212 2213 child_position = 2214 (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; 2215 son_number = B_N_CHILD_NUM(tb->FR[h], child_position); 2216 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2217 bh = sb_bread(sb, son_number); 2218 reiserfs_write_lock_nested(tb->tb_sb, depth); 2219 if (!bh) 2220 return IO_ERROR; 2221 if (FILESYSTEM_CHANGED_TB(tb)) { 2222 brelse(bh); 2223 PROC_INFO_INC(sb, get_neighbors_restart[h]); 2224 return REPEAT_SEARCH; 2225 } 2226 brelse(tb->R[h]); 2227 tb->R[h] = bh; 2228 2229 RFALSE(!h 2230 && B_FREE_SPACE(bh) != 2231 MAX_CHILD_SIZE(bh) - 2232 dc_size(B_N_CHILD(tb->FR[0], child_position)), 2233 "PAP-8300: invalid child size of right neighbor (%d != %d - %d)", 2234 B_FREE_SPACE(bh), MAX_CHILD_SIZE(bh), 2235 dc_size(B_N_CHILD(tb->FR[0], child_position))); 2236 2237 } 2238 return CARRY_ON; 2239 } 2240 2241 static int get_virtual_node_size(struct super_block *sb, struct buffer_head *bh) 2242 { 2243 int max_num_of_items; 2244 int max_num_of_entries; 2245 unsigned long blocksize = sb->s_blocksize; 2246 2247 #define MIN_NAME_LEN 1 2248 2249 max_num_of_items = (blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN); 2250 max_num_of_entries = (blocksize - BLKH_SIZE - IH_SIZE) / 2251 (DEH_SIZE + MIN_NAME_LEN); 2252 2253 return sizeof(struct virtual_node) + 2254 max(max_num_of_items * sizeof(struct virtual_item), 2255 sizeof(struct virtual_item) + sizeof(struct direntry_uarea) + 2256 (max_num_of_entries - 1) * sizeof(__u16)); 2257 } 2258 2259 /* 2260 * maybe we should fail balancing we are going to perform when kmalloc 2261 * fails several times. But now it will loop until kmalloc gets 2262 * required memory 2263 */ 2264 static int get_mem_for_virtual_node(struct tree_balance *tb) 2265 { 2266 int check_fs = 0; 2267 int size; 2268 char *buf; 2269 2270 size = get_virtual_node_size(tb->tb_sb, PATH_PLAST_BUFFER(tb->tb_path)); 2271 2272 /* we have to allocate more memory for virtual node */ 2273 if (size > tb->vn_buf_size) { 2274 if (tb->vn_buf) { 2275 /* free memory allocated before */ 2276 kfree(tb->vn_buf); 2277 /* this is not needed if kfree is atomic */ 2278 check_fs = 1; 2279 } 2280 2281 /* virtual node requires now more memory */ 2282 tb->vn_buf_size = size; 2283 2284 /* get memory for virtual item */ 2285 buf = kmalloc(size, GFP_ATOMIC | __GFP_NOWARN); 2286 if (!buf) { 2287 /* 2288 * getting memory with GFP_KERNEL priority may involve 2289 * balancing now (due to indirect_to_direct conversion 2290 * on dcache shrinking). So, release path and collected 2291 * resources here 2292 */ 2293 free_buffers_in_tb(tb); 2294 buf = kmalloc(size, GFP_NOFS); 2295 if (!buf) { 2296 tb->vn_buf_size = 0; 2297 } 2298 tb->vn_buf = buf; 2299 schedule(); 2300 return REPEAT_SEARCH; 2301 } 2302 2303 tb->vn_buf = buf; 2304 } 2305 2306 if (check_fs && FILESYSTEM_CHANGED_TB(tb)) 2307 return REPEAT_SEARCH; 2308 2309 return CARRY_ON; 2310 } 2311 2312 #ifdef CONFIG_REISERFS_CHECK 2313 static void tb_buffer_sanity_check(struct super_block *sb, 2314 struct buffer_head *bh, 2315 const char *descr, int level) 2316 { 2317 if (bh) { 2318 if (atomic_read(&(bh->b_count)) <= 0) 2319 2320 reiserfs_panic(sb, "jmacd-1", "negative or zero " 2321 "reference counter for buffer %s[%d] " 2322 "(%b)", descr, level, bh); 2323 2324 if (!buffer_uptodate(bh)) 2325 reiserfs_panic(sb, "jmacd-2", "buffer is not up " 2326 "to date %s[%d] (%b)", 2327 descr, level, bh); 2328 2329 if (!B_IS_IN_TREE(bh)) 2330 reiserfs_panic(sb, "jmacd-3", "buffer is not " 2331 "in tree %s[%d] (%b)", 2332 descr, level, bh); 2333 2334 if (bh->b_bdev != sb->s_bdev) 2335 reiserfs_panic(sb, "jmacd-4", "buffer has wrong " 2336 "device %s[%d] (%b)", 2337 descr, level, bh); 2338 2339 if (bh->b_size != sb->s_blocksize) 2340 reiserfs_panic(sb, "jmacd-5", "buffer has wrong " 2341 "blocksize %s[%d] (%b)", 2342 descr, level, bh); 2343 2344 if (bh->b_blocknr > SB_BLOCK_COUNT(sb)) 2345 reiserfs_panic(sb, "jmacd-6", "buffer block " 2346 "number too high %s[%d] (%b)", 2347 descr, level, bh); 2348 } 2349 } 2350 #else 2351 static void tb_buffer_sanity_check(struct super_block *sb, 2352 struct buffer_head *bh, 2353 const char *descr, int level) 2354 {; 2355 } 2356 #endif 2357 2358 static int clear_all_dirty_bits(struct super_block *s, struct buffer_head *bh) 2359 { 2360 return reiserfs_prepare_for_journal(s, bh, 0); 2361 } 2362 2363 static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) 2364 { 2365 struct buffer_head *locked; 2366 #ifdef CONFIG_REISERFS_CHECK 2367 int repeat_counter = 0; 2368 #endif 2369 int i; 2370 2371 do { 2372 2373 locked = NULL; 2374 2375 for (i = tb->tb_path->path_length; 2376 !locked && i > ILLEGAL_PATH_ELEMENT_OFFSET; i--) { 2377 if (PATH_OFFSET_PBUFFER(tb->tb_path, i)) { 2378 /* 2379 * if I understand correctly, we can only 2380 * be sure the last buffer in the path is 2381 * in the tree --clm 2382 */ 2383 #ifdef CONFIG_REISERFS_CHECK 2384 if (PATH_PLAST_BUFFER(tb->tb_path) == 2385 PATH_OFFSET_PBUFFER(tb->tb_path, i)) 2386 tb_buffer_sanity_check(tb->tb_sb, 2387 PATH_OFFSET_PBUFFER 2388 (tb->tb_path, 2389 i), "S", 2390 tb->tb_path-> 2391 path_length - i); 2392 #endif 2393 if (!clear_all_dirty_bits(tb->tb_sb, 2394 PATH_OFFSET_PBUFFER 2395 (tb->tb_path, 2396 i))) { 2397 locked = 2398 PATH_OFFSET_PBUFFER(tb->tb_path, 2399 i); 2400 } 2401 } 2402 } 2403 2404 for (i = 0; !locked && i < MAX_HEIGHT && tb->insert_size[i]; 2405 i++) { 2406 2407 if (tb->lnum[i]) { 2408 2409 if (tb->L[i]) { 2410 tb_buffer_sanity_check(tb->tb_sb, 2411 tb->L[i], 2412 "L", i); 2413 if (!clear_all_dirty_bits 2414 (tb->tb_sb, tb->L[i])) 2415 locked = tb->L[i]; 2416 } 2417 2418 if (!locked && tb->FL[i]) { 2419 tb_buffer_sanity_check(tb->tb_sb, 2420 tb->FL[i], 2421 "FL", i); 2422 if (!clear_all_dirty_bits 2423 (tb->tb_sb, tb->FL[i])) 2424 locked = tb->FL[i]; 2425 } 2426 2427 if (!locked && tb->CFL[i]) { 2428 tb_buffer_sanity_check(tb->tb_sb, 2429 tb->CFL[i], 2430 "CFL", i); 2431 if (!clear_all_dirty_bits 2432 (tb->tb_sb, tb->CFL[i])) 2433 locked = tb->CFL[i]; 2434 } 2435 2436 } 2437 2438 if (!locked && (tb->rnum[i])) { 2439 2440 if (tb->R[i]) { 2441 tb_buffer_sanity_check(tb->tb_sb, 2442 tb->R[i], 2443 "R", i); 2444 if (!clear_all_dirty_bits 2445 (tb->tb_sb, tb->R[i])) 2446 locked = tb->R[i]; 2447 } 2448 2449 if (!locked && tb->FR[i]) { 2450 tb_buffer_sanity_check(tb->tb_sb, 2451 tb->FR[i], 2452 "FR", i); 2453 if (!clear_all_dirty_bits 2454 (tb->tb_sb, tb->FR[i])) 2455 locked = tb->FR[i]; 2456 } 2457 2458 if (!locked && tb->CFR[i]) { 2459 tb_buffer_sanity_check(tb->tb_sb, 2460 tb->CFR[i], 2461 "CFR", i); 2462 if (!clear_all_dirty_bits 2463 (tb->tb_sb, tb->CFR[i])) 2464 locked = tb->CFR[i]; 2465 } 2466 } 2467 } 2468 2469 /* 2470 * as far as I can tell, this is not required. The FEB list 2471 * seems to be full of newly allocated nodes, which will 2472 * never be locked, dirty, or anything else. 2473 * To be safe, I'm putting in the checks and waits in. 2474 * For the moment, they are needed to keep the code in 2475 * journal.c from complaining about the buffer. 2476 * That code is inside CONFIG_REISERFS_CHECK as well. --clm 2477 */ 2478 for (i = 0; !locked && i < MAX_FEB_SIZE; i++) { 2479 if (tb->FEB[i]) { 2480 if (!clear_all_dirty_bits 2481 (tb->tb_sb, tb->FEB[i])) 2482 locked = tb->FEB[i]; 2483 } 2484 } 2485 2486 if (locked) { 2487 int depth; 2488 #ifdef CONFIG_REISERFS_CHECK 2489 repeat_counter++; 2490 if ((repeat_counter % 10000) == 0) { 2491 reiserfs_warning(tb->tb_sb, "reiserfs-8200", 2492 "too many iterations waiting " 2493 "for buffer to unlock " 2494 "(%b)", locked); 2495 2496 /* Don't loop forever. Try to recover from possible error. */ 2497 2498 return (FILESYSTEM_CHANGED_TB(tb)) ? 2499 REPEAT_SEARCH : CARRY_ON; 2500 } 2501 #endif 2502 depth = reiserfs_write_unlock_nested(tb->tb_sb); 2503 __wait_on_buffer(locked); 2504 reiserfs_write_lock_nested(tb->tb_sb, depth); 2505 if (FILESYSTEM_CHANGED_TB(tb)) 2506 return REPEAT_SEARCH; 2507 } 2508 2509 } while (locked); 2510 2511 return CARRY_ON; 2512 } 2513 2514 /* 2515 * Prepare for balancing, that is 2516 * get all necessary parents, and neighbors; 2517 * analyze what and where should be moved; 2518 * get sufficient number of new nodes; 2519 * Balancing will start only after all resources will be collected at a time. 2520 * 2521 * When ported to SMP kernels, only at the last moment after all needed nodes 2522 * are collected in cache, will the resources be locked using the usual 2523 * textbook ordered lock acquisition algorithms. Note that ensuring that 2524 * this code neither write locks what it does not need to write lock nor locks 2525 * out of order will be a pain in the butt that could have been avoided. 2526 * Grumble grumble. -Hans 2527 * 2528 * fix is meant in the sense of render unchanging 2529 * 2530 * Latency might be improved by first gathering a list of what buffers 2531 * are needed and then getting as many of them in parallel as possible? -Hans 2532 * 2533 * Parameters: 2534 * op_mode i - insert, d - delete, c - cut (truncate), p - paste (append) 2535 * tb tree_balance structure; 2536 * inum item number in S[h]; 2537 * pos_in_item - comment this if you can 2538 * ins_ih item head of item being inserted 2539 * data inserted item or data to be pasted 2540 * Returns: 1 - schedule occurred while the function worked; 2541 * 0 - schedule didn't occur while the function worked; 2542 * -1 - if no_disk_space 2543 */ 2544 2545 int fix_nodes(int op_mode, struct tree_balance *tb, 2546 struct item_head *ins_ih, const void *data) 2547 { 2548 int ret, h, item_num = PATH_LAST_POSITION(tb->tb_path); 2549 int pos_in_item; 2550 2551 /* 2552 * we set wait_tb_buffers_run when we have to restore any dirty 2553 * bits cleared during wait_tb_buffers_run 2554 */ 2555 int wait_tb_buffers_run = 0; 2556 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path); 2557 2558 ++REISERFS_SB(tb->tb_sb)->s_fix_nodes; 2559 2560 pos_in_item = tb->tb_path->pos_in_item; 2561 2562 tb->fs_gen = get_generation(tb->tb_sb); 2563 2564 /* 2565 * we prepare and log the super here so it will already be in the 2566 * transaction when do_balance needs to change it. 2567 * This way do_balance won't have to schedule when trying to prepare 2568 * the super for logging 2569 */ 2570 reiserfs_prepare_for_journal(tb->tb_sb, 2571 SB_BUFFER_WITH_SB(tb->tb_sb), 1); 2572 journal_mark_dirty(tb->transaction_handle, 2573 SB_BUFFER_WITH_SB(tb->tb_sb)); 2574 if (FILESYSTEM_CHANGED_TB(tb)) 2575 return REPEAT_SEARCH; 2576 2577 /* if it possible in indirect_to_direct conversion */ 2578 if (buffer_locked(tbS0)) { 2579 int depth = reiserfs_write_unlock_nested(tb->tb_sb); 2580 __wait_on_buffer(tbS0); 2581 reiserfs_write_lock_nested(tb->tb_sb, depth); 2582 if (FILESYSTEM_CHANGED_TB(tb)) 2583 return REPEAT_SEARCH; 2584 } 2585 #ifdef CONFIG_REISERFS_CHECK 2586 if (REISERFS_SB(tb->tb_sb)->cur_tb) { 2587 print_cur_tb("fix_nodes"); 2588 reiserfs_panic(tb->tb_sb, "PAP-8305", 2589 "there is pending do_balance"); 2590 } 2591 2592 if (!buffer_uptodate(tbS0) || !B_IS_IN_TREE(tbS0)) 2593 reiserfs_panic(tb->tb_sb, "PAP-8320", "S[0] (%b %z) is " 2594 "not uptodate at the beginning of fix_nodes " 2595 "or not in tree (mode %c)", 2596 tbS0, tbS0, op_mode); 2597 2598 /* Check parameters. */ 2599 switch (op_mode) { 2600 case M_INSERT: 2601 if (item_num <= 0 || item_num > B_NR_ITEMS(tbS0)) 2602 reiserfs_panic(tb->tb_sb, "PAP-8330", "Incorrect " 2603 "item number %d (in S0 - %d) in case " 2604 "of insert", item_num, 2605 B_NR_ITEMS(tbS0)); 2606 break; 2607 case M_PASTE: 2608 case M_DELETE: 2609 case M_CUT: 2610 if (item_num < 0 || item_num >= B_NR_ITEMS(tbS0)) { 2611 print_block(tbS0, 0, -1, -1); 2612 reiserfs_panic(tb->tb_sb, "PAP-8335", "Incorrect " 2613 "item number(%d); mode = %c " 2614 "insert_size = %d", 2615 item_num, op_mode, 2616 tb->insert_size[0]); 2617 } 2618 break; 2619 default: 2620 reiserfs_panic(tb->tb_sb, "PAP-8340", "Incorrect mode " 2621 "of operation"); 2622 } 2623 #endif 2624 2625 if (get_mem_for_virtual_node(tb) == REPEAT_SEARCH) 2626 /* FIXME: maybe -ENOMEM when tb->vn_buf == 0? Now just repeat */ 2627 return REPEAT_SEARCH; 2628 2629 /* Starting from the leaf level; for all levels h of the tree. */ 2630 for (h = 0; h < MAX_HEIGHT && tb->insert_size[h]; h++) { 2631 ret = get_direct_parent(tb, h); 2632 if (ret != CARRY_ON) 2633 goto repeat; 2634 2635 ret = check_balance(op_mode, tb, h, item_num, 2636 pos_in_item, ins_ih, data); 2637 if (ret != CARRY_ON) { 2638 if (ret == NO_BALANCING_NEEDED) { 2639 /* No balancing for higher levels needed. */ 2640 ret = get_neighbors(tb, h); 2641 if (ret != CARRY_ON) 2642 goto repeat; 2643 if (h != MAX_HEIGHT - 1) 2644 tb->insert_size[h + 1] = 0; 2645 /* 2646 * ok, analysis and resource gathering 2647 * are complete 2648 */ 2649 break; 2650 } 2651 goto repeat; 2652 } 2653 2654 ret = get_neighbors(tb, h); 2655 if (ret != CARRY_ON) 2656 goto repeat; 2657 2658 /* 2659 * No disk space, or schedule occurred and analysis may be 2660 * invalid and needs to be redone. 2661 */ 2662 ret = get_empty_nodes(tb, h); 2663 if (ret != CARRY_ON) 2664 goto repeat; 2665 2666 /* 2667 * We have a positive insert size but no nodes exist on this 2668 * level, this means that we are creating a new root. 2669 */ 2670 if (!PATH_H_PBUFFER(tb->tb_path, h)) { 2671 2672 RFALSE(tb->blknum[h] != 1, 2673 "PAP-8350: creating new empty root"); 2674 2675 if (h < MAX_HEIGHT - 1) 2676 tb->insert_size[h + 1] = 0; 2677 } else if (!PATH_H_PBUFFER(tb->tb_path, h + 1)) { 2678 /* 2679 * The tree needs to be grown, so this node S[h] 2680 * which is the root node is split into two nodes, 2681 * and a new node (S[h+1]) will be created to 2682 * become the root node. 2683 */ 2684 if (tb->blknum[h] > 1) { 2685 2686 RFALSE(h == MAX_HEIGHT - 1, 2687 "PAP-8355: attempt to create too high of a tree"); 2688 2689 tb->insert_size[h + 1] = 2690 (DC_SIZE + 2691 KEY_SIZE) * (tb->blknum[h] - 1) + 2692 DC_SIZE; 2693 } else if (h < MAX_HEIGHT - 1) 2694 tb->insert_size[h + 1] = 0; 2695 } else 2696 tb->insert_size[h + 1] = 2697 (DC_SIZE + KEY_SIZE) * (tb->blknum[h] - 1); 2698 } 2699 2700 ret = wait_tb_buffers_until_unlocked(tb); 2701 if (ret == CARRY_ON) { 2702 if (FILESYSTEM_CHANGED_TB(tb)) { 2703 wait_tb_buffers_run = 1; 2704 ret = REPEAT_SEARCH; 2705 goto repeat; 2706 } else { 2707 return CARRY_ON; 2708 } 2709 } else { 2710 wait_tb_buffers_run = 1; 2711 goto repeat; 2712 } 2713 2714 repeat: 2715 /* 2716 * fix_nodes was unable to perform its calculation due to 2717 * filesystem got changed under us, lack of free disk space or i/o 2718 * failure. If the first is the case - the search will be 2719 * repeated. For now - free all resources acquired so far except 2720 * for the new allocated nodes 2721 */ 2722 { 2723 int i; 2724 2725 /* Release path buffers. */ 2726 if (wait_tb_buffers_run) { 2727 pathrelse_and_restore(tb->tb_sb, tb->tb_path); 2728 } else { 2729 pathrelse(tb->tb_path); 2730 } 2731 /* brelse all resources collected for balancing */ 2732 for (i = 0; i < MAX_HEIGHT; i++) { 2733 if (wait_tb_buffers_run) { 2734 reiserfs_restore_prepared_buffer(tb->tb_sb, 2735 tb->L[i]); 2736 reiserfs_restore_prepared_buffer(tb->tb_sb, 2737 tb->R[i]); 2738 reiserfs_restore_prepared_buffer(tb->tb_sb, 2739 tb->FL[i]); 2740 reiserfs_restore_prepared_buffer(tb->tb_sb, 2741 tb->FR[i]); 2742 reiserfs_restore_prepared_buffer(tb->tb_sb, 2743 tb-> 2744 CFL[i]); 2745 reiserfs_restore_prepared_buffer(tb->tb_sb, 2746 tb-> 2747 CFR[i]); 2748 } 2749 2750 brelse(tb->L[i]); 2751 brelse(tb->R[i]); 2752 brelse(tb->FL[i]); 2753 brelse(tb->FR[i]); 2754 brelse(tb->CFL[i]); 2755 brelse(tb->CFR[i]); 2756 2757 tb->L[i] = NULL; 2758 tb->R[i] = NULL; 2759 tb->FL[i] = NULL; 2760 tb->FR[i] = NULL; 2761 tb->CFL[i] = NULL; 2762 tb->CFR[i] = NULL; 2763 } 2764 2765 if (wait_tb_buffers_run) { 2766 for (i = 0; i < MAX_FEB_SIZE; i++) { 2767 if (tb->FEB[i]) 2768 reiserfs_restore_prepared_buffer 2769 (tb->tb_sb, tb->FEB[i]); 2770 } 2771 } 2772 return ret; 2773 } 2774 2775 } 2776 2777 void unfix_nodes(struct tree_balance *tb) 2778 { 2779 int i; 2780 2781 /* Release path buffers. */ 2782 pathrelse_and_restore(tb->tb_sb, tb->tb_path); 2783 2784 /* brelse all resources collected for balancing */ 2785 for (i = 0; i < MAX_HEIGHT; i++) { 2786 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->L[i]); 2787 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->R[i]); 2788 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FL[i]); 2789 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->FR[i]); 2790 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFL[i]); 2791 reiserfs_restore_prepared_buffer(tb->tb_sb, tb->CFR[i]); 2792 2793 brelse(tb->L[i]); 2794 brelse(tb->R[i]); 2795 brelse(tb->FL[i]); 2796 brelse(tb->FR[i]); 2797 brelse(tb->CFL[i]); 2798 brelse(tb->CFR[i]); 2799 } 2800 2801 /* deal with list of allocated (used and unused) nodes */ 2802 for (i = 0; i < MAX_FEB_SIZE; i++) { 2803 if (tb->FEB[i]) { 2804 b_blocknr_t blocknr = tb->FEB[i]->b_blocknr; 2805 /* 2806 * de-allocated block which was not used by 2807 * balancing and bforget about buffer for it 2808 */ 2809 brelse(tb->FEB[i]); 2810 reiserfs_free_block(tb->transaction_handle, NULL, 2811 blocknr, 0); 2812 } 2813 if (tb->used[i]) { 2814 /* release used as new nodes including a new root */ 2815 brelse(tb->used[i]); 2816 } 2817 } 2818 2819 kfree(tb->vn_buf); 2820 2821 } 2822