1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/quotaops.h> 24 #include "jfs_incore.h" 25 #include "jfs_filsys.h" 26 #include "jfs_metapage.h" 27 #include "jfs_dmap.h" 28 #include "jfs_dinode.h" 29 #include "jfs_superblock.h" 30 #include "jfs_debug.h" 31 32 /* 33 * xtree local flag 34 */ 35 #define XT_INSERT 0x00000001 36 37 /* 38 * xtree key/entry comparison: extent offset 39 * 40 * return: 41 * -1: k < start of extent 42 * 0: start_of_extent <= k <= end_of_extent 43 * 1: k > end_of_extent 44 */ 45 #define XT_CMP(CMP, K, X, OFFSET64)\ 46 {\ 47 OFFSET64 = offsetXAD(X);\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 49 ((K) < OFFSET64) ? -1 : 0;\ 50 } 51 52 /* write a xad entry */ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 54 {\ 55 (XAD)->flag = (FLAG);\ 56 XADoffset((XAD), (OFF));\ 57 XADlength((XAD), (LEN));\ 58 XADaddress((XAD), (ADDR));\ 59 } 60 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 62 63 /* get page buffer for specified block address */ 64 /* ToDo: Replace this ugly macro with a function */ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 66 {\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 68 if (!(RC))\ 69 {\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 73 {\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 75 BT_PUTPAGE(MP);\ 76 MP = NULL;\ 77 RC = -EIO;\ 78 }\ 79 }\ 80 } 81 82 /* for consistency */ 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 84 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 87 /* xtree entry parameter descriptor */ 88 struct xtsplit { 89 struct metapage *mp; 90 s16 index; 91 u8 flag; 92 s64 off; 93 s64 addr; 94 int len; 95 struct pxdlist *pxdlist; 96 }; 97 98 99 /* 100 * statistics 101 */ 102 #ifdef CONFIG_JFS_STATISTICS 103 static struct { 104 uint search; 105 uint fastSearch; 106 uint split; 107 } xtStat; 108 #endif 109 110 111 /* 112 * forward references 113 */ 114 static int xtSearch(struct inode *ip, 115 s64 xoff, int *cmpp, struct btstack * btstack, int flag); 116 117 static int xtSplitUp(tid_t tid, 118 struct inode *ip, 119 struct xtsplit * split, struct btstack * btstack); 120 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 122 struct metapage ** rmpp, s64 * rbnp); 123 124 static int xtSplitRoot(tid_t tid, struct inode *ip, 125 struct xtsplit * split, struct metapage ** rmpp); 126 127 #ifdef _STILL_TO_PORT 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 129 xtpage_t * fp, struct btstack * btstack); 130 131 static int xtSearchNode(struct inode *ip, 132 xad_t * xad, 133 int *cmpp, struct btstack * btstack, int flag); 134 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 136 #endif /* _STILL_TO_PORT */ 137 138 /* External references */ 139 140 /* 141 * debug control 142 */ 143 /* #define _JFS_DEBUG_XTREE 1 */ 144 145 146 /* 147 * xtLookup() 148 * 149 * function: map a single page into a physical extent; 150 */ 151 int xtLookup(struct inode *ip, s64 lstart, 152 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 153 { 154 int rc = 0; 155 struct btstack btstack; 156 int cmp; 157 s64 bn; 158 struct metapage *mp; 159 xtpage_t *p; 160 int index; 161 xad_t *xad; 162 s64 size, xoff, xend; 163 int xlen; 164 s64 xaddr; 165 166 *plen = 0; 167 168 if (!no_check) { 169 /* is lookup offset beyond eof ? */ 170 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 171 JFS_SBI(ip->i_sb)->l2bsize; 172 if (lstart >= size) { 173 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 174 (ulong) lstart, (ulong) size); 175 return 0; 176 } 177 } 178 179 /* 180 * search for the xad entry covering the logical extent 181 */ 182 //search: 183 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) { 184 jfs_err("xtLookup: xtSearch returned %d", rc); 185 return rc; 186 } 187 188 /* 189 * compute the physical extent covering logical extent 190 * 191 * N.B. search may have failed (e.g., hole in sparse file), 192 * and returned the index of the next entry. 193 */ 194 /* retrieve search result */ 195 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 196 197 /* is xad found covering start of logical extent ? 198 * lstart is a page start address, 199 * i.e., lstart cannot start in a hole; 200 */ 201 if (cmp) 202 goto out; 203 204 /* 205 * lxd covered by xad 206 */ 207 xad = &p->xad[index]; 208 xoff = offsetXAD(xad); 209 xlen = lengthXAD(xad); 210 xend = xoff + xlen; 211 xaddr = addressXAD(xad); 212 213 /* initialize new pxd */ 214 *pflag = xad->flag; 215 *paddr = xaddr + (lstart - xoff); 216 /* a page must be fully covered by an xad */ 217 *plen = min(xend - lstart, llen); 218 219 out: 220 XT_PUTPAGE(mp); 221 222 return rc; 223 } 224 225 226 /* 227 * xtLookupList() 228 * 229 * function: map a single logical extent into a list of physical extent; 230 * 231 * parameter: 232 * struct inode *ip, 233 * struct lxdlist *lxdlist, lxd list (in) 234 * struct xadlist *xadlist, xad list (in/out) 235 * int flag) 236 * 237 * coverage of lxd by xad under assumption of 238 * . lxd's are ordered and disjoint. 239 * . xad's are ordered and disjoint. 240 * 241 * return: 242 * 0: success 243 * 244 * note: a page being written (even a single byte) is backed fully, 245 * except the last page which is only backed with blocks 246 * required to cover the last byte; 247 * the extent backing a page is fully contained within an xad; 248 */ 249 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 250 struct xadlist * xadlist, int flag) 251 { 252 int rc = 0; 253 struct btstack btstack; 254 int cmp; 255 s64 bn; 256 struct metapage *mp; 257 xtpage_t *p; 258 int index; 259 lxd_t *lxd; 260 xad_t *xad, *pxd; 261 s64 size, lstart, lend, xstart, xend, pstart; 262 s64 llen, xlen, plen; 263 s64 xaddr, paddr; 264 int nlxd, npxd, maxnpxd; 265 266 npxd = xadlist->nxad = 0; 267 maxnpxd = xadlist->maxnxad; 268 pxd = xadlist->xad; 269 270 nlxd = lxdlist->nlxd; 271 lxd = lxdlist->lxd; 272 273 lstart = offsetLXD(lxd); 274 llen = lengthLXD(lxd); 275 lend = lstart + llen; 276 277 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 278 JFS_SBI(ip->i_sb)->l2bsize; 279 280 /* 281 * search for the xad entry covering the logical extent 282 */ 283 search: 284 if (lstart >= size) 285 return 0; 286 287 if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) 288 return rc; 289 290 /* 291 * compute the physical extent covering logical extent 292 * 293 * N.B. search may have failed (e.g., hole in sparse file), 294 * and returned the index of the next entry. 295 */ 296 //map: 297 /* retrieve search result */ 298 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 299 300 /* is xad on the next sibling page ? */ 301 if (index == le16_to_cpu(p->header.nextindex)) { 302 if (p->header.flag & BT_ROOT) 303 goto mapend; 304 305 if ((bn = le64_to_cpu(p->header.next)) == 0) 306 goto mapend; 307 308 XT_PUTPAGE(mp); 309 310 /* get next sibling page */ 311 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 312 if (rc) 313 return rc; 314 315 index = XTENTRYSTART; 316 } 317 318 xad = &p->xad[index]; 319 320 /* 321 * is lxd covered by xad ? 322 */ 323 compare: 324 xstart = offsetXAD(xad); 325 xlen = lengthXAD(xad); 326 xend = xstart + xlen; 327 xaddr = addressXAD(xad); 328 329 compare1: 330 if (xstart < lstart) 331 goto compare2; 332 333 /* (lstart <= xstart) */ 334 335 /* lxd is NOT covered by xad */ 336 if (lend <= xstart) { 337 /* 338 * get next lxd 339 */ 340 if (--nlxd == 0) 341 goto mapend; 342 lxd++; 343 344 lstart = offsetLXD(lxd); 345 llen = lengthLXD(lxd); 346 lend = lstart + llen; 347 if (lstart >= size) 348 goto mapend; 349 350 /* compare with the current xad */ 351 goto compare1; 352 } 353 /* lxd is covered by xad */ 354 else { /* (xstart < lend) */ 355 356 /* initialize new pxd */ 357 pstart = xstart; 358 plen = min(lend - xstart, xlen); 359 paddr = xaddr; 360 361 goto cover; 362 } 363 364 /* (xstart < lstart) */ 365 compare2: 366 /* lxd is covered by xad */ 367 if (lstart < xend) { 368 /* initialize new pxd */ 369 pstart = lstart; 370 plen = min(xend - lstart, llen); 371 paddr = xaddr + (lstart - xstart); 372 373 goto cover; 374 } 375 /* lxd is NOT covered by xad */ 376 else { /* (xend <= lstart) */ 377 378 /* 379 * get next xad 380 * 381 * linear search next xad covering lxd on 382 * the current xad page, and then tree search 383 */ 384 if (index == le16_to_cpu(p->header.nextindex) - 1) { 385 if (p->header.flag & BT_ROOT) 386 goto mapend; 387 388 XT_PUTPAGE(mp); 389 goto search; 390 } else { 391 index++; 392 xad++; 393 394 /* compare with new xad */ 395 goto compare; 396 } 397 } 398 399 /* 400 * lxd is covered by xad and a new pxd has been initialized 401 * (lstart <= xstart < lend) or (xstart < lstart < xend) 402 */ 403 cover: 404 /* finalize pxd corresponding to current xad */ 405 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 406 407 if (++npxd >= maxnpxd) 408 goto mapend; 409 pxd++; 410 411 /* 412 * lxd is fully covered by xad 413 */ 414 if (lend <= xend) { 415 /* 416 * get next lxd 417 */ 418 if (--nlxd == 0) 419 goto mapend; 420 lxd++; 421 422 lstart = offsetLXD(lxd); 423 llen = lengthLXD(lxd); 424 lend = lstart + llen; 425 if (lstart >= size) 426 goto mapend; 427 428 /* 429 * test for old xad covering new lxd 430 * (old xstart < new lstart) 431 */ 432 goto compare2; 433 } 434 /* 435 * lxd is partially covered by xad 436 */ 437 else { /* (xend < lend) */ 438 439 /* 440 * get next xad 441 * 442 * linear search next xad covering lxd on 443 * the current xad page, and then next xad page search 444 */ 445 if (index == le16_to_cpu(p->header.nextindex) - 1) { 446 if (p->header.flag & BT_ROOT) 447 goto mapend; 448 449 if ((bn = le64_to_cpu(p->header.next)) == 0) 450 goto mapend; 451 452 XT_PUTPAGE(mp); 453 454 /* get next sibling page */ 455 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 456 if (rc) 457 return rc; 458 459 index = XTENTRYSTART; 460 xad = &p->xad[index]; 461 } else { 462 index++; 463 xad++; 464 } 465 466 /* 467 * test for new xad covering old lxd 468 * (old lstart < new xstart) 469 */ 470 goto compare; 471 } 472 473 mapend: 474 xadlist->nxad = npxd; 475 476 //out: 477 XT_PUTPAGE(mp); 478 479 return rc; 480 } 481 482 483 /* 484 * xtSearch() 485 * 486 * function: search for the xad entry covering specified offset. 487 * 488 * parameters: 489 * ip - file object; 490 * xoff - extent offset; 491 * cmpp - comparison result: 492 * btstack - traverse stack; 493 * flag - search process flag (XT_INSERT); 494 * 495 * returns: 496 * btstack contains (bn, index) of search path traversed to the entry. 497 * *cmpp is set to result of comparison with the entry returned. 498 * the page containing the entry is pinned at exit. 499 */ 500 static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */ 501 int *cmpp, struct btstack * btstack, int flag) 502 { 503 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 504 int rc = 0; 505 int cmp = 1; /* init for empty page */ 506 s64 bn; /* block number */ 507 struct metapage *mp; /* page buffer */ 508 xtpage_t *p; /* page */ 509 xad_t *xad; 510 int base, index, lim, btindex; 511 struct btframe *btsp; 512 int nsplit = 0; /* number of pages to split */ 513 s64 t64; 514 515 INCREMENT(xtStat.search); 516 517 BT_CLR(btstack); 518 519 btstack->nsplit = 0; 520 521 /* 522 * search down tree from root: 523 * 524 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 525 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 526 * 527 * if entry with search key K is not found 528 * internal page search find the entry with largest key Ki 529 * less than K which point to the child page to search; 530 * leaf page search find the entry with smallest key Kj 531 * greater than K so that the returned index is the position of 532 * the entry to be shifted right for insertion of new entry. 533 * for empty tree, search key is greater than any key of the tree. 534 * 535 * by convention, root bn = 0. 536 */ 537 for (bn = 0;;) { 538 /* get/pin the page to search */ 539 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 540 if (rc) 541 return rc; 542 543 /* try sequential access heuristics with the previous 544 * access entry in target leaf page: 545 * once search narrowed down into the target leaf, 546 * key must either match an entry in the leaf or 547 * key entry does not exist in the tree; 548 */ 549 //fastSearch: 550 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 551 (p->header.flag & BT_LEAF) && 552 (index = jfs_ip->btindex) < 553 le16_to_cpu(p->header.nextindex)) { 554 xad = &p->xad[index]; 555 t64 = offsetXAD(xad); 556 if (xoff < t64 + lengthXAD(xad)) { 557 if (xoff >= t64) { 558 *cmpp = 0; 559 goto out; 560 } 561 562 /* stop sequential access heuristics */ 563 goto binarySearch; 564 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 565 566 /* try next sequential entry */ 567 index++; 568 if (index < 569 le16_to_cpu(p->header.nextindex)) { 570 xad++; 571 t64 = offsetXAD(xad); 572 if (xoff < t64 + lengthXAD(xad)) { 573 if (xoff >= t64) { 574 *cmpp = 0; 575 goto out; 576 } 577 578 /* miss: key falls between 579 * previous and this entry 580 */ 581 *cmpp = 1; 582 goto out; 583 } 584 585 /* (xoff >= t64 + lengthXAD(xad)); 586 * matching entry may be further out: 587 * stop heuristic search 588 */ 589 /* stop sequential access heuristics */ 590 goto binarySearch; 591 } 592 593 /* (index == p->header.nextindex); 594 * miss: key entry does not exist in 595 * the target leaf/tree 596 */ 597 *cmpp = 1; 598 goto out; 599 } 600 601 /* 602 * if hit, return index of the entry found, and 603 * if miss, where new entry with search key is 604 * to be inserted; 605 */ 606 out: 607 /* compute number of pages to split */ 608 if (flag & XT_INSERT) { 609 if (p->header.nextindex == /* little-endian */ 610 p->header.maxentry) 611 nsplit++; 612 else 613 nsplit = 0; 614 btstack->nsplit = nsplit; 615 } 616 617 /* save search result */ 618 btsp = btstack->top; 619 btsp->bn = bn; 620 btsp->index = index; 621 btsp->mp = mp; 622 623 /* update sequential access heuristics */ 624 jfs_ip->btindex = index; 625 626 INCREMENT(xtStat.fastSearch); 627 return 0; 628 } 629 630 /* well, ... full search now */ 631 binarySearch: 632 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 633 634 /* 635 * binary search with search key K on the current page 636 */ 637 for (base = XTENTRYSTART; lim; lim >>= 1) { 638 index = base + (lim >> 1); 639 640 XT_CMP(cmp, xoff, &p->xad[index], t64); 641 if (cmp == 0) { 642 /* 643 * search hit 644 */ 645 /* search hit - leaf page: 646 * return the entry found 647 */ 648 if (p->header.flag & BT_LEAF) { 649 *cmpp = cmp; 650 651 /* compute number of pages to split */ 652 if (flag & XT_INSERT) { 653 if (p->header.nextindex == 654 p->header.maxentry) 655 nsplit++; 656 else 657 nsplit = 0; 658 btstack->nsplit = nsplit; 659 } 660 661 /* save search result */ 662 btsp = btstack->top; 663 btsp->bn = bn; 664 btsp->index = index; 665 btsp->mp = mp; 666 667 /* init sequential access heuristics */ 668 btindex = jfs_ip->btindex; 669 if (index == btindex || 670 index == btindex + 1) 671 jfs_ip->btorder = BT_SEQUENTIAL; 672 else 673 jfs_ip->btorder = BT_RANDOM; 674 jfs_ip->btindex = index; 675 676 return 0; 677 } 678 679 /* search hit - internal page: 680 * descend/search its child page 681 */ 682 goto next; 683 } 684 685 if (cmp > 0) { 686 base = index + 1; 687 --lim; 688 } 689 } 690 691 /* 692 * search miss 693 * 694 * base is the smallest index with key (Kj) greater than 695 * search key (K) and may be zero or maxentry index. 696 */ 697 /* 698 * search miss - leaf page: 699 * 700 * return location of entry (base) where new entry with 701 * search key K is to be inserted. 702 */ 703 if (p->header.flag & BT_LEAF) { 704 *cmpp = cmp; 705 706 /* compute number of pages to split */ 707 if (flag & XT_INSERT) { 708 if (p->header.nextindex == 709 p->header.maxentry) 710 nsplit++; 711 else 712 nsplit = 0; 713 btstack->nsplit = nsplit; 714 } 715 716 /* save search result */ 717 btsp = btstack->top; 718 btsp->bn = bn; 719 btsp->index = base; 720 btsp->mp = mp; 721 722 /* init sequential access heuristics */ 723 btindex = jfs_ip->btindex; 724 if (base == btindex || base == btindex + 1) 725 jfs_ip->btorder = BT_SEQUENTIAL; 726 else 727 jfs_ip->btorder = BT_RANDOM; 728 jfs_ip->btindex = base; 729 730 return 0; 731 } 732 733 /* 734 * search miss - non-leaf page: 735 * 736 * if base is non-zero, decrement base by one to get the parent 737 * entry of the child page to search. 738 */ 739 index = base ? base - 1 : base; 740 741 /* 742 * go down to child page 743 */ 744 next: 745 /* update number of pages to split */ 746 if (p->header.nextindex == p->header.maxentry) 747 nsplit++; 748 else 749 nsplit = 0; 750 751 /* push (bn, index) of the parent page/entry */ 752 BT_PUSH(btstack, bn, index); 753 754 /* get the child page block number */ 755 bn = addressXAD(&p->xad[index]); 756 757 /* unpin the parent page */ 758 XT_PUTPAGE(mp); 759 } 760 } 761 762 /* 763 * xtInsert() 764 * 765 * function: 766 * 767 * parameter: 768 * tid - transaction id; 769 * ip - file object; 770 * xflag - extent flag (XAD_NOTRECORDED): 771 * xoff - extent offset; 772 * xlen - extent length; 773 * xaddrp - extent address pointer (in/out): 774 * if (*xaddrp) 775 * caller allocated data extent at *xaddrp; 776 * else 777 * allocate data extent and return its xaddr; 778 * flag - 779 * 780 * return: 781 */ 782 int xtInsert(tid_t tid, /* transaction id */ 783 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 784 int flag) 785 { 786 int rc = 0; 787 s64 xaddr, hint; 788 struct metapage *mp; /* meta-page buffer */ 789 xtpage_t *p; /* base B+-tree index page */ 790 s64 bn; 791 int index, nextindex; 792 struct btstack btstack; /* traverse stack */ 793 struct xtsplit split; /* split information */ 794 xad_t *xad; 795 int cmp; 796 struct tlock *tlck; 797 struct xtlock *xtlck; 798 799 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 800 801 /* 802 * search for the entry location at which to insert: 803 * 804 * xtFastSearch() and xtSearch() both returns (leaf page 805 * pinned, index at which to insert). 806 * n.b. xtSearch() may return index of maxentry of 807 * the full page. 808 */ 809 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 810 return rc; 811 812 /* retrieve search result */ 813 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 814 815 /* This test must follow XT_GETSEARCH since mp must be valid if 816 * we branch to out: */ 817 if (cmp == 0) { 818 rc = -EEXIST; 819 goto out; 820 } 821 822 /* 823 * allocate data extent requested 824 * 825 * allocation hint: last xad 826 */ 827 if ((xaddr = *xaddrp) == 0) { 828 if (index > XTENTRYSTART) { 829 xad = &p->xad[index - 1]; 830 hint = addressXAD(xad) + lengthXAD(xad) - 1; 831 } else 832 hint = 0; 833 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 834 goto out; 835 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 836 DQUOT_FREE_BLOCK(ip, xlen); 837 goto out; 838 } 839 } 840 841 /* 842 * insert entry for new extent 843 */ 844 xflag |= XAD_NEW; 845 846 /* 847 * if the leaf page is full, split the page and 848 * propagate up the router entry for the new page from split 849 * 850 * The xtSplitUp() will insert the entry and unpin the leaf page. 851 */ 852 nextindex = le16_to_cpu(p->header.nextindex); 853 if (nextindex == le16_to_cpu(p->header.maxentry)) { 854 split.mp = mp; 855 split.index = index; 856 split.flag = xflag; 857 split.off = xoff; 858 split.len = xlen; 859 split.addr = xaddr; 860 split.pxdlist = NULL; 861 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 862 /* undo data extent allocation */ 863 if (*xaddrp == 0) { 864 dbFree(ip, xaddr, (s64) xlen); 865 DQUOT_FREE_BLOCK(ip, xlen); 866 } 867 return rc; 868 } 869 870 *xaddrp = xaddr; 871 return 0; 872 } 873 874 /* 875 * insert the new entry into the leaf page 876 */ 877 /* 878 * acquire a transaction lock on the leaf page; 879 * 880 * action: xad insertion/extension; 881 */ 882 BT_MARK_DIRTY(mp, ip); 883 884 /* if insert into middle, shift right remaining entries. */ 885 if (index < nextindex) 886 memmove(&p->xad[index + 1], &p->xad[index], 887 (nextindex - index) * sizeof(xad_t)); 888 889 /* insert the new entry: mark the entry NEW */ 890 xad = &p->xad[index]; 891 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 892 893 /* advance next available entry index */ 894 p->header.nextindex = 895 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 896 897 /* Don't log it if there are no links to the file */ 898 if (!test_cflag(COMMIT_Nolink, ip)) { 899 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 900 xtlck = (struct xtlock *) & tlck->lock; 901 xtlck->lwm.offset = 902 (xtlck->lwm.offset) ? min(index, 903 (int)xtlck->lwm.offset) : index; 904 xtlck->lwm.length = 905 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 906 } 907 908 *xaddrp = xaddr; 909 910 out: 911 /* unpin the leaf page */ 912 XT_PUTPAGE(mp); 913 914 return rc; 915 } 916 917 918 /* 919 * xtSplitUp() 920 * 921 * function: 922 * split full pages as propagating insertion up the tree 923 * 924 * parameter: 925 * tid - transaction id; 926 * ip - file object; 927 * split - entry parameter descriptor; 928 * btstack - traverse stack from xtSearch() 929 * 930 * return: 931 */ 932 static int 933 xtSplitUp(tid_t tid, 934 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 935 { 936 int rc = 0; 937 struct metapage *smp; 938 xtpage_t *sp; /* split page */ 939 struct metapage *rmp; 940 s64 rbn; /* new right page block number */ 941 struct metapage *rcmp; 942 xtpage_t *rcp; /* right child page */ 943 s64 rcbn; /* right child page block number */ 944 int skip; /* index of entry of insertion */ 945 int nextindex; /* next available entry index of p */ 946 struct btframe *parent; /* parent page entry on traverse stack */ 947 xad_t *xad; 948 s64 xaddr; 949 int xlen; 950 int nsplit; /* number of pages split */ 951 struct pxdlist pxdlist; 952 pxd_t *pxd; 953 struct tlock *tlck; 954 struct xtlock *xtlck; 955 956 smp = split->mp; 957 sp = XT_PAGE(ip, smp); 958 959 /* is inode xtree root extension/inline EA area free ? */ 960 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 961 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 962 (JFS_IP(ip)->mode2 & INLINEEA)) { 963 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 964 JFS_IP(ip)->mode2 &= ~INLINEEA; 965 966 BT_MARK_DIRTY(smp, ip); 967 /* 968 * acquire a transaction lock on the leaf page; 969 * 970 * action: xad insertion/extension; 971 */ 972 973 /* if insert into middle, shift right remaining entries. */ 974 skip = split->index; 975 nextindex = le16_to_cpu(sp->header.nextindex); 976 if (skip < nextindex) 977 memmove(&sp->xad[skip + 1], &sp->xad[skip], 978 (nextindex - skip) * sizeof(xad_t)); 979 980 /* insert the new entry: mark the entry NEW */ 981 xad = &sp->xad[skip]; 982 XT_PUTENTRY(xad, split->flag, split->off, split->len, 983 split->addr); 984 985 /* advance next available entry index */ 986 sp->header.nextindex = 987 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 988 989 /* Don't log it if there are no links to the file */ 990 if (!test_cflag(COMMIT_Nolink, ip)) { 991 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 992 xtlck = (struct xtlock *) & tlck->lock; 993 xtlck->lwm.offset = (xtlck->lwm.offset) ? 994 min(skip, (int)xtlck->lwm.offset) : skip; 995 xtlck->lwm.length = 996 le16_to_cpu(sp->header.nextindex) - 997 xtlck->lwm.offset; 998 } 999 1000 return 0; 1001 } 1002 1003 /* 1004 * allocate new index blocks to cover index page split(s) 1005 * 1006 * allocation hint: ? 1007 */ 1008 if (split->pxdlist == NULL) { 1009 nsplit = btstack->nsplit; 1010 split->pxdlist = &pxdlist; 1011 pxdlist.maxnpxd = pxdlist.npxd = 0; 1012 pxd = &pxdlist.pxd[0]; 1013 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1014 for (; nsplit > 0; nsplit--, pxd++) { 1015 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1016 == 0) { 1017 PXDaddress(pxd, xaddr); 1018 PXDlength(pxd, xlen); 1019 1020 pxdlist.maxnpxd++; 1021 1022 continue; 1023 } 1024 1025 /* undo allocation */ 1026 1027 XT_PUTPAGE(smp); 1028 return rc; 1029 } 1030 } 1031 1032 /* 1033 * Split leaf page <sp> into <sp> and a new right page <rp>. 1034 * 1035 * The split routines insert the new entry into the leaf page, 1036 * and acquire txLock as appropriate. 1037 * return <rp> pinned and its block number <rpbn>. 1038 */ 1039 rc = (sp->header.flag & BT_ROOT) ? 1040 xtSplitRoot(tid, ip, split, &rmp) : 1041 xtSplitPage(tid, ip, split, &rmp, &rbn); 1042 1043 XT_PUTPAGE(smp); 1044 1045 if (rc) 1046 return -EIO; 1047 /* 1048 * propagate up the router entry for the leaf page just split 1049 * 1050 * insert a router entry for the new page into the parent page, 1051 * propagate the insert/split up the tree by walking back the stack 1052 * of (bn of parent page, index of child page entry in parent page) 1053 * that were traversed during the search for the page that split. 1054 * 1055 * the propagation of insert/split up the tree stops if the root 1056 * splits or the page inserted into doesn't have to split to hold 1057 * the new entry. 1058 * 1059 * the parent entry for the split page remains the same, and 1060 * a new entry is inserted at its right with the first key and 1061 * block number of the new right page. 1062 * 1063 * There are a maximum of 3 pages pinned at any time: 1064 * right child, left parent and right parent (when the parent splits) 1065 * to keep the child page pinned while working on the parent. 1066 * make sure that all pins are released at exit. 1067 */ 1068 while ((parent = BT_POP(btstack)) != NULL) { 1069 /* parent page specified by stack frame <parent> */ 1070 1071 /* keep current child pages <rcp> pinned */ 1072 rcmp = rmp; 1073 rcbn = rbn; 1074 rcp = XT_PAGE(ip, rcmp); 1075 1076 /* 1077 * insert router entry in parent for new right child page <rp> 1078 */ 1079 /* get/pin the parent page <sp> */ 1080 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1081 if (rc) { 1082 XT_PUTPAGE(rcmp); 1083 return rc; 1084 } 1085 1086 /* 1087 * The new key entry goes ONE AFTER the index of parent entry, 1088 * because the split was to the right. 1089 */ 1090 skip = parent->index + 1; 1091 1092 /* 1093 * split or shift right remaining entries of the parent page 1094 */ 1095 nextindex = le16_to_cpu(sp->header.nextindex); 1096 /* 1097 * parent page is full - split the parent page 1098 */ 1099 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1100 /* init for parent page split */ 1101 split->mp = smp; 1102 split->index = skip; /* index at insert */ 1103 split->flag = XAD_NEW; 1104 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1105 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1106 split->addr = rcbn; 1107 1108 /* unpin previous right child page */ 1109 XT_PUTPAGE(rcmp); 1110 1111 /* The split routines insert the new entry, 1112 * and acquire txLock as appropriate. 1113 * return <rp> pinned and its block number <rpbn>. 1114 */ 1115 rc = (sp->header.flag & BT_ROOT) ? 1116 xtSplitRoot(tid, ip, split, &rmp) : 1117 xtSplitPage(tid, ip, split, &rmp, &rbn); 1118 if (rc) { 1119 XT_PUTPAGE(smp); 1120 return rc; 1121 } 1122 1123 XT_PUTPAGE(smp); 1124 /* keep new child page <rp> pinned */ 1125 } 1126 /* 1127 * parent page is not full - insert in parent page 1128 */ 1129 else { 1130 /* 1131 * insert router entry in parent for the right child 1132 * page from the first entry of the right child page: 1133 */ 1134 /* 1135 * acquire a transaction lock on the parent page; 1136 * 1137 * action: router xad insertion; 1138 */ 1139 BT_MARK_DIRTY(smp, ip); 1140 1141 /* 1142 * if insert into middle, shift right remaining entries 1143 */ 1144 if (skip < nextindex) 1145 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1146 (nextindex - 1147 skip) << L2XTSLOTSIZE); 1148 1149 /* insert the router entry */ 1150 xad = &sp->xad[skip]; 1151 XT_PUTENTRY(xad, XAD_NEW, 1152 offsetXAD(&rcp->xad[XTENTRYSTART]), 1153 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1154 1155 /* advance next available entry index. */ 1156 sp->header.nextindex = 1157 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1158 1); 1159 1160 /* Don't log it if there are no links to the file */ 1161 if (!test_cflag(COMMIT_Nolink, ip)) { 1162 tlck = txLock(tid, ip, smp, 1163 tlckXTREE | tlckGROW); 1164 xtlck = (struct xtlock *) & tlck->lock; 1165 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1166 min(skip, (int)xtlck->lwm.offset) : skip; 1167 xtlck->lwm.length = 1168 le16_to_cpu(sp->header.nextindex) - 1169 xtlck->lwm.offset; 1170 } 1171 1172 /* unpin parent page */ 1173 XT_PUTPAGE(smp); 1174 1175 /* exit propagate up */ 1176 break; 1177 } 1178 } 1179 1180 /* unpin current right page */ 1181 XT_PUTPAGE(rmp); 1182 1183 return 0; 1184 } 1185 1186 1187 /* 1188 * xtSplitPage() 1189 * 1190 * function: 1191 * split a full non-root page into 1192 * original/split/left page and new right page 1193 * i.e., the original/split page remains as left page. 1194 * 1195 * parameter: 1196 * int tid, 1197 * struct inode *ip, 1198 * struct xtsplit *split, 1199 * struct metapage **rmpp, 1200 * u64 *rbnp, 1201 * 1202 * return: 1203 * Pointer to page in which to insert or NULL on error. 1204 */ 1205 static int 1206 xtSplitPage(tid_t tid, struct inode *ip, 1207 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1208 { 1209 int rc = 0; 1210 struct metapage *smp; 1211 xtpage_t *sp; 1212 struct metapage *rmp; 1213 xtpage_t *rp; /* new right page allocated */ 1214 s64 rbn; /* new right page block number */ 1215 struct metapage *mp; 1216 xtpage_t *p; 1217 s64 nextbn; 1218 int skip, maxentry, middle, righthalf, n; 1219 xad_t *xad; 1220 struct pxdlist *pxdlist; 1221 pxd_t *pxd; 1222 struct tlock *tlck; 1223 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1224 int quota_allocation = 0; 1225 1226 smp = split->mp; 1227 sp = XT_PAGE(ip, smp); 1228 1229 INCREMENT(xtStat.split); 1230 1231 pxdlist = split->pxdlist; 1232 pxd = &pxdlist->pxd[pxdlist->npxd]; 1233 pxdlist->npxd++; 1234 rbn = addressPXD(pxd); 1235 1236 /* Allocate blocks to quota. */ 1237 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1238 rc = -EDQUOT; 1239 goto clean_up; 1240 } 1241 1242 quota_allocation += lengthPXD(pxd); 1243 1244 /* 1245 * allocate the new right page for the split 1246 */ 1247 rmp = get_metapage(ip, rbn, PSIZE, 1); 1248 if (rmp == NULL) { 1249 rc = -EIO; 1250 goto clean_up; 1251 } 1252 1253 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1254 1255 BT_MARK_DIRTY(rmp, ip); 1256 /* 1257 * action: new page; 1258 */ 1259 1260 rp = (xtpage_t *) rmp->data; 1261 rp->header.self = *pxd; 1262 rp->header.flag = sp->header.flag & BT_TYPE; 1263 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1264 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1265 1266 BT_MARK_DIRTY(smp, ip); 1267 /* Don't log it if there are no links to the file */ 1268 if (!test_cflag(COMMIT_Nolink, ip)) { 1269 /* 1270 * acquire a transaction lock on the new right page; 1271 */ 1272 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1273 rxtlck = (struct xtlock *) & tlck->lock; 1274 rxtlck->lwm.offset = XTENTRYSTART; 1275 /* 1276 * acquire a transaction lock on the split page 1277 */ 1278 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1279 sxtlck = (struct xtlock *) & tlck->lock; 1280 } 1281 1282 /* 1283 * initialize/update sibling pointers of <sp> and <rp> 1284 */ 1285 nextbn = le64_to_cpu(sp->header.next); 1286 rp->header.next = cpu_to_le64(nextbn); 1287 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1288 sp->header.next = cpu_to_le64(rbn); 1289 1290 skip = split->index; 1291 1292 /* 1293 * sequential append at tail (after last entry of last page) 1294 * 1295 * if splitting the last page on a level because of appending 1296 * a entry to it (skip is maxentry), it's likely that the access is 1297 * sequential. adding an empty page on the side of the level is less 1298 * work and can push the fill factor much higher than normal. 1299 * if we're wrong it's no big deal - we will do the split the right 1300 * way next time. 1301 * (it may look like it's equally easy to do a similar hack for 1302 * reverse sorted data, that is, split the tree left, but it's not. 1303 * Be my guest.) 1304 */ 1305 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1306 /* 1307 * acquire a transaction lock on the new/right page; 1308 * 1309 * action: xad insertion; 1310 */ 1311 /* insert entry at the first entry of the new right page */ 1312 xad = &rp->xad[XTENTRYSTART]; 1313 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1314 split->addr); 1315 1316 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1317 1318 if (!test_cflag(COMMIT_Nolink, ip)) { 1319 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1320 rxtlck->lwm.length = 1; 1321 } 1322 1323 *rmpp = rmp; 1324 *rbnp = rbn; 1325 1326 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1327 return 0; 1328 } 1329 1330 /* 1331 * non-sequential insert (at possibly middle page) 1332 */ 1333 1334 /* 1335 * update previous pointer of old next/right page of <sp> 1336 */ 1337 if (nextbn != 0) { 1338 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1339 if (rc) { 1340 XT_PUTPAGE(rmp); 1341 goto clean_up; 1342 } 1343 1344 BT_MARK_DIRTY(mp, ip); 1345 /* 1346 * acquire a transaction lock on the next page; 1347 * 1348 * action:sibling pointer update; 1349 */ 1350 if (!test_cflag(COMMIT_Nolink, ip)) 1351 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1352 1353 p->header.prev = cpu_to_le64(rbn); 1354 1355 /* sibling page may have been updated previously, or 1356 * it may be updated later; 1357 */ 1358 1359 XT_PUTPAGE(mp); 1360 } 1361 1362 /* 1363 * split the data between the split and new/right pages 1364 */ 1365 maxentry = le16_to_cpu(sp->header.maxentry); 1366 middle = maxentry >> 1; 1367 righthalf = maxentry - middle; 1368 1369 /* 1370 * skip index in old split/left page - insert into left page: 1371 */ 1372 if (skip <= middle) { 1373 /* move right half of split page to the new right page */ 1374 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1375 righthalf << L2XTSLOTSIZE); 1376 1377 /* shift right tail of left half to make room for new entry */ 1378 if (skip < middle) 1379 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1380 (middle - skip) << L2XTSLOTSIZE); 1381 1382 /* insert new entry */ 1383 xad = &sp->xad[skip]; 1384 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1385 split->addr); 1386 1387 /* update page header */ 1388 sp->header.nextindex = cpu_to_le16(middle + 1); 1389 if (!test_cflag(COMMIT_Nolink, ip)) { 1390 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1391 min(skip, (int)sxtlck->lwm.offset) : skip; 1392 } 1393 1394 rp->header.nextindex = 1395 cpu_to_le16(XTENTRYSTART + righthalf); 1396 } 1397 /* 1398 * skip index in new right page - insert into right page: 1399 */ 1400 else { 1401 /* move left head of right half to right page */ 1402 n = skip - middle; 1403 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1404 n << L2XTSLOTSIZE); 1405 1406 /* insert new entry */ 1407 n += XTENTRYSTART; 1408 xad = &rp->xad[n]; 1409 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1410 split->addr); 1411 1412 /* move right tail of right half to right page */ 1413 if (skip < maxentry) 1414 memmove(&rp->xad[n + 1], &sp->xad[skip], 1415 (maxentry - skip) << L2XTSLOTSIZE); 1416 1417 /* update page header */ 1418 sp->header.nextindex = cpu_to_le16(middle); 1419 if (!test_cflag(COMMIT_Nolink, ip)) { 1420 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1421 min(middle, (int)sxtlck->lwm.offset) : middle; 1422 } 1423 1424 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1425 righthalf + 1); 1426 } 1427 1428 if (!test_cflag(COMMIT_Nolink, ip)) { 1429 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1430 sxtlck->lwm.offset; 1431 1432 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1433 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1434 XTENTRYSTART; 1435 } 1436 1437 *rmpp = rmp; 1438 *rbnp = rbn; 1439 1440 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1441 return rc; 1442 1443 clean_up: 1444 1445 /* Rollback quota allocation. */ 1446 if (quota_allocation) 1447 DQUOT_FREE_BLOCK(ip, quota_allocation); 1448 1449 return (rc); 1450 } 1451 1452 1453 /* 1454 * xtSplitRoot() 1455 * 1456 * function: 1457 * split the full root page into 1458 * original/root/split page and new right page 1459 * i.e., root remains fixed in tree anchor (inode) and 1460 * the root is copied to a single new right child page 1461 * since root page << non-root page, and 1462 * the split root page contains a single entry for the 1463 * new right child page. 1464 * 1465 * parameter: 1466 * int tid, 1467 * struct inode *ip, 1468 * struct xtsplit *split, 1469 * struct metapage **rmpp) 1470 * 1471 * return: 1472 * Pointer to page in which to insert or NULL on error. 1473 */ 1474 static int 1475 xtSplitRoot(tid_t tid, 1476 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1477 { 1478 xtpage_t *sp; 1479 struct metapage *rmp; 1480 xtpage_t *rp; 1481 s64 rbn; 1482 int skip, nextindex; 1483 xad_t *xad; 1484 pxd_t *pxd; 1485 struct pxdlist *pxdlist; 1486 struct tlock *tlck; 1487 struct xtlock *xtlck; 1488 1489 sp = &JFS_IP(ip)->i_xtroot; 1490 1491 INCREMENT(xtStat.split); 1492 1493 /* 1494 * allocate a single (right) child page 1495 */ 1496 pxdlist = split->pxdlist; 1497 pxd = &pxdlist->pxd[pxdlist->npxd]; 1498 pxdlist->npxd++; 1499 rbn = addressPXD(pxd); 1500 rmp = get_metapage(ip, rbn, PSIZE, 1); 1501 if (rmp == NULL) 1502 return -EIO; 1503 1504 /* Allocate blocks to quota. */ 1505 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1506 release_metapage(rmp); 1507 return -EDQUOT; 1508 } 1509 1510 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1511 1512 /* 1513 * acquire a transaction lock on the new right page; 1514 * 1515 * action: new page; 1516 */ 1517 BT_MARK_DIRTY(rmp, ip); 1518 1519 rp = (xtpage_t *) rmp->data; 1520 rp->header.flag = 1521 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1522 rp->header.self = *pxd; 1523 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1524 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1525 1526 /* initialize sibling pointers */ 1527 rp->header.next = 0; 1528 rp->header.prev = 0; 1529 1530 /* 1531 * copy the in-line root page into new right page extent 1532 */ 1533 nextindex = le16_to_cpu(sp->header.maxentry); 1534 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1535 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1536 1537 /* 1538 * insert the new entry into the new right/child page 1539 * (skip index in the new right page will not change) 1540 */ 1541 skip = split->index; 1542 /* if insert into middle, shift right remaining entries */ 1543 if (skip != nextindex) 1544 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1545 (nextindex - skip) * sizeof(xad_t)); 1546 1547 xad = &rp->xad[skip]; 1548 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1549 1550 /* update page header */ 1551 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1552 1553 if (!test_cflag(COMMIT_Nolink, ip)) { 1554 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1555 xtlck = (struct xtlock *) & tlck->lock; 1556 xtlck->lwm.offset = XTENTRYSTART; 1557 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1558 XTENTRYSTART; 1559 } 1560 1561 /* 1562 * reset the root 1563 * 1564 * init root with the single entry for the new right page 1565 * set the 1st entry offset to 0, which force the left-most key 1566 * at any level of the tree to be less than any search key. 1567 */ 1568 /* 1569 * acquire a transaction lock on the root page (in-memory inode); 1570 * 1571 * action: root split; 1572 */ 1573 BT_MARK_DIRTY(split->mp, ip); 1574 1575 xad = &sp->xad[XTENTRYSTART]; 1576 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1577 1578 /* update page header of root */ 1579 sp->header.flag &= ~BT_LEAF; 1580 sp->header.flag |= BT_INTERNAL; 1581 1582 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1583 1584 if (!test_cflag(COMMIT_Nolink, ip)) { 1585 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1586 xtlck = (struct xtlock *) & tlck->lock; 1587 xtlck->lwm.offset = XTENTRYSTART; 1588 xtlck->lwm.length = 1; 1589 } 1590 1591 *rmpp = rmp; 1592 1593 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1594 return 0; 1595 } 1596 1597 1598 /* 1599 * xtExtend() 1600 * 1601 * function: extend in-place; 1602 * 1603 * note: existing extent may or may not have been committed. 1604 * caller is responsible for pager buffer cache update, and 1605 * working block allocation map update; 1606 * update pmap: alloc whole extended extent; 1607 */ 1608 int xtExtend(tid_t tid, /* transaction id */ 1609 struct inode *ip, s64 xoff, /* delta extent offset */ 1610 s32 xlen, /* delta extent length */ 1611 int flag) 1612 { 1613 int rc = 0; 1614 int cmp; 1615 struct metapage *mp; /* meta-page buffer */ 1616 xtpage_t *p; /* base B+-tree index page */ 1617 s64 bn; 1618 int index, nextindex, len; 1619 struct btstack btstack; /* traverse stack */ 1620 struct xtsplit split; /* split information */ 1621 xad_t *xad; 1622 s64 xaddr; 1623 struct tlock *tlck; 1624 struct xtlock *xtlck = NULL; 1625 1626 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1627 1628 /* there must exist extent to be extended */ 1629 if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT))) 1630 return rc; 1631 1632 /* retrieve search result */ 1633 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1634 1635 if (cmp != 0) { 1636 XT_PUTPAGE(mp); 1637 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1638 return -EIO; 1639 } 1640 1641 /* extension must be contiguous */ 1642 xad = &p->xad[index]; 1643 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1644 XT_PUTPAGE(mp); 1645 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1646 return -EIO; 1647 } 1648 1649 /* 1650 * acquire a transaction lock on the leaf page; 1651 * 1652 * action: xad insertion/extension; 1653 */ 1654 BT_MARK_DIRTY(mp, ip); 1655 if (!test_cflag(COMMIT_Nolink, ip)) { 1656 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1657 xtlck = (struct xtlock *) & tlck->lock; 1658 } 1659 1660 /* extend will overflow extent ? */ 1661 xlen = lengthXAD(xad) + xlen; 1662 if ((len = xlen - MAXXLEN) <= 0) 1663 goto extendOld; 1664 1665 /* 1666 * extent overflow: insert entry for new extent 1667 */ 1668 //insertNew: 1669 xoff = offsetXAD(xad) + MAXXLEN; 1670 xaddr = addressXAD(xad) + MAXXLEN; 1671 nextindex = le16_to_cpu(p->header.nextindex); 1672 1673 /* 1674 * if the leaf page is full, insert the new entry and 1675 * propagate up the router entry for the new page from split 1676 * 1677 * The xtSplitUp() will insert the entry and unpin the leaf page. 1678 */ 1679 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1680 /* xtSpliUp() unpins leaf pages */ 1681 split.mp = mp; 1682 split.index = index + 1; 1683 split.flag = XAD_NEW; 1684 split.off = xoff; /* split offset */ 1685 split.len = len; 1686 split.addr = xaddr; 1687 split.pxdlist = NULL; 1688 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1689 return rc; 1690 1691 /* get back old page */ 1692 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1693 if (rc) 1694 return rc; 1695 /* 1696 * if leaf root has been split, original root has been 1697 * copied to new child page, i.e., original entry now 1698 * resides on the new child page; 1699 */ 1700 if (p->header.flag & BT_INTERNAL) { 1701 ASSERT(p->header.nextindex == 1702 cpu_to_le16(XTENTRYSTART + 1)); 1703 xad = &p->xad[XTENTRYSTART]; 1704 bn = addressXAD(xad); 1705 XT_PUTPAGE(mp); 1706 1707 /* get new child page */ 1708 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1709 if (rc) 1710 return rc; 1711 1712 BT_MARK_DIRTY(mp, ip); 1713 if (!test_cflag(COMMIT_Nolink, ip)) { 1714 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1715 xtlck = (struct xtlock *) & tlck->lock; 1716 } 1717 } 1718 } 1719 /* 1720 * insert the new entry into the leaf page 1721 */ 1722 else { 1723 /* insert the new entry: mark the entry NEW */ 1724 xad = &p->xad[index + 1]; 1725 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1726 1727 /* advance next available entry index */ 1728 p->header.nextindex = 1729 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1730 } 1731 1732 /* get back old entry */ 1733 xad = &p->xad[index]; 1734 xlen = MAXXLEN; 1735 1736 /* 1737 * extend old extent 1738 */ 1739 extendOld: 1740 XADlength(xad, xlen); 1741 if (!(xad->flag & XAD_NEW)) 1742 xad->flag |= XAD_EXTENDED; 1743 1744 if (!test_cflag(COMMIT_Nolink, ip)) { 1745 xtlck->lwm.offset = 1746 (xtlck->lwm.offset) ? min(index, 1747 (int)xtlck->lwm.offset) : index; 1748 xtlck->lwm.length = 1749 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1750 } 1751 1752 /* unpin the leaf page */ 1753 XT_PUTPAGE(mp); 1754 1755 return rc; 1756 } 1757 1758 #ifdef _NOTYET 1759 /* 1760 * xtTailgate() 1761 * 1762 * function: split existing 'tail' extent 1763 * (split offset >= start offset of tail extent), and 1764 * relocate and extend the split tail half; 1765 * 1766 * note: existing extent may or may not have been committed. 1767 * caller is responsible for pager buffer cache update, and 1768 * working block allocation map update; 1769 * update pmap: free old split tail extent, alloc new extent; 1770 */ 1771 int xtTailgate(tid_t tid, /* transaction id */ 1772 struct inode *ip, s64 xoff, /* split/new extent offset */ 1773 s32 xlen, /* new extent length */ 1774 s64 xaddr, /* new extent address */ 1775 int flag) 1776 { 1777 int rc = 0; 1778 int cmp; 1779 struct metapage *mp; /* meta-page buffer */ 1780 xtpage_t *p; /* base B+-tree index page */ 1781 s64 bn; 1782 int index, nextindex, llen, rlen; 1783 struct btstack btstack; /* traverse stack */ 1784 struct xtsplit split; /* split information */ 1785 xad_t *xad; 1786 struct tlock *tlck; 1787 struct xtlock *xtlck = 0; 1788 struct tlock *mtlck; 1789 struct maplock *pxdlock; 1790 1791 /* 1792 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1793 (ulong)xoff, xlen, (ulong)xaddr); 1794 */ 1795 1796 /* there must exist extent to be tailgated */ 1797 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 1798 return rc; 1799 1800 /* retrieve search result */ 1801 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1802 1803 if (cmp != 0) { 1804 XT_PUTPAGE(mp); 1805 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1806 return -EIO; 1807 } 1808 1809 /* entry found must be last entry */ 1810 nextindex = le16_to_cpu(p->header.nextindex); 1811 if (index != nextindex - 1) { 1812 XT_PUTPAGE(mp); 1813 jfs_error(ip->i_sb, 1814 "xtTailgate: the entry found is not the last entry"); 1815 return -EIO; 1816 } 1817 1818 BT_MARK_DIRTY(mp, ip); 1819 /* 1820 * acquire tlock of the leaf page containing original entry 1821 */ 1822 if (!test_cflag(COMMIT_Nolink, ip)) { 1823 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1824 xtlck = (struct xtlock *) & tlck->lock; 1825 } 1826 1827 /* completely replace extent ? */ 1828 xad = &p->xad[index]; 1829 /* 1830 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1831 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1832 */ 1833 if ((llen = xoff - offsetXAD(xad)) == 0) 1834 goto updateOld; 1835 1836 /* 1837 * partially replace extent: insert entry for new extent 1838 */ 1839 //insertNew: 1840 /* 1841 * if the leaf page is full, insert the new entry and 1842 * propagate up the router entry for the new page from split 1843 * 1844 * The xtSplitUp() will insert the entry and unpin the leaf page. 1845 */ 1846 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1847 /* xtSpliUp() unpins leaf pages */ 1848 split.mp = mp; 1849 split.index = index + 1; 1850 split.flag = XAD_NEW; 1851 split.off = xoff; /* split offset */ 1852 split.len = xlen; 1853 split.addr = xaddr; 1854 split.pxdlist = NULL; 1855 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1856 return rc; 1857 1858 /* get back old page */ 1859 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1860 if (rc) 1861 return rc; 1862 /* 1863 * if leaf root has been split, original root has been 1864 * copied to new child page, i.e., original entry now 1865 * resides on the new child page; 1866 */ 1867 if (p->header.flag & BT_INTERNAL) { 1868 ASSERT(p->header.nextindex == 1869 cpu_to_le16(XTENTRYSTART + 1)); 1870 xad = &p->xad[XTENTRYSTART]; 1871 bn = addressXAD(xad); 1872 XT_PUTPAGE(mp); 1873 1874 /* get new child page */ 1875 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1876 if (rc) 1877 return rc; 1878 1879 BT_MARK_DIRTY(mp, ip); 1880 if (!test_cflag(COMMIT_Nolink, ip)) { 1881 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1882 xtlck = (struct xtlock *) & tlck->lock; 1883 } 1884 } 1885 } 1886 /* 1887 * insert the new entry into the leaf page 1888 */ 1889 else { 1890 /* insert the new entry: mark the entry NEW */ 1891 xad = &p->xad[index + 1]; 1892 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1893 1894 /* advance next available entry index */ 1895 p->header.nextindex = 1896 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1897 } 1898 1899 /* get back old XAD */ 1900 xad = &p->xad[index]; 1901 1902 /* 1903 * truncate/relocate old extent at split offset 1904 */ 1905 updateOld: 1906 /* update dmap for old/committed/truncated extent */ 1907 rlen = lengthXAD(xad) - llen; 1908 if (!(xad->flag & XAD_NEW)) { 1909 /* free from PWMAP at commit */ 1910 if (!test_cflag(COMMIT_Nolink, ip)) { 1911 mtlck = txMaplock(tid, ip, tlckMAP); 1912 pxdlock = (struct maplock *) & mtlck->lock; 1913 pxdlock->flag = mlckFREEPXD; 1914 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1915 PXDlength(&pxdlock->pxd, rlen); 1916 pxdlock->index = 1; 1917 } 1918 } else 1919 /* free from WMAP */ 1920 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1921 1922 if (llen) 1923 /* truncate */ 1924 XADlength(xad, llen); 1925 else 1926 /* replace */ 1927 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1928 1929 if (!test_cflag(COMMIT_Nolink, ip)) { 1930 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1931 min(index, (int)xtlck->lwm.offset) : index; 1932 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1933 xtlck->lwm.offset; 1934 } 1935 1936 /* unpin the leaf page */ 1937 XT_PUTPAGE(mp); 1938 1939 return rc; 1940 } 1941 #endif /* _NOTYET */ 1942 1943 /* 1944 * xtUpdate() 1945 * 1946 * function: update XAD; 1947 * 1948 * update extent for allocated_but_not_recorded or 1949 * compressed extent; 1950 * 1951 * parameter: 1952 * nxad - new XAD; 1953 * logical extent of the specified XAD must be completely 1954 * contained by an existing XAD; 1955 */ 1956 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1957 { /* new XAD */ 1958 int rc = 0; 1959 int cmp; 1960 struct metapage *mp; /* meta-page buffer */ 1961 xtpage_t *p; /* base B+-tree index page */ 1962 s64 bn; 1963 int index0, index, newindex, nextindex; 1964 struct btstack btstack; /* traverse stack */ 1965 struct xtsplit split; /* split information */ 1966 xad_t *xad, *lxad, *rxad; 1967 int xflag; 1968 s64 nxoff, xoff; 1969 int nxlen, xlen, lxlen, rxlen; 1970 s64 nxaddr, xaddr; 1971 struct tlock *tlck; 1972 struct xtlock *xtlck = NULL; 1973 int newpage = 0; 1974 1975 /* there must exist extent to be tailgated */ 1976 nxoff = offsetXAD(nxad); 1977 nxlen = lengthXAD(nxad); 1978 nxaddr = addressXAD(nxad); 1979 1980 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 1981 return rc; 1982 1983 /* retrieve search result */ 1984 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1985 1986 if (cmp != 0) { 1987 XT_PUTPAGE(mp); 1988 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1989 return -EIO; 1990 } 1991 1992 BT_MARK_DIRTY(mp, ip); 1993 /* 1994 * acquire tlock of the leaf page containing original entry 1995 */ 1996 if (!test_cflag(COMMIT_Nolink, ip)) { 1997 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1998 xtlck = (struct xtlock *) & tlck->lock; 1999 } 2000 2001 xad = &p->xad[index0]; 2002 xflag = xad->flag; 2003 xoff = offsetXAD(xad); 2004 xlen = lengthXAD(xad); 2005 xaddr = addressXAD(xad); 2006 2007 /* nXAD must be completely contained within XAD */ 2008 if ((xoff > nxoff) || 2009 (nxoff + nxlen > xoff + xlen)) { 2010 XT_PUTPAGE(mp); 2011 jfs_error(ip->i_sb, 2012 "xtUpdate: nXAD in not completely contained within XAD"); 2013 return -EIO; 2014 } 2015 2016 index = index0; 2017 newindex = index + 1; 2018 nextindex = le16_to_cpu(p->header.nextindex); 2019 2020 #ifdef _JFS_WIP_NOCOALESCE 2021 if (xoff < nxoff) 2022 goto updateRight; 2023 2024 /* 2025 * replace XAD with nXAD 2026 */ 2027 replace: /* (nxoff == xoff) */ 2028 if (nxlen == xlen) { 2029 /* replace XAD with nXAD:recorded */ 2030 *xad = *nxad; 2031 xad->flag = xflag & ~XAD_NOTRECORDED; 2032 2033 goto out; 2034 } else /* (nxlen < xlen) */ 2035 goto updateLeft; 2036 #endif /* _JFS_WIP_NOCOALESCE */ 2037 2038 /* #ifdef _JFS_WIP_COALESCE */ 2039 if (xoff < nxoff) 2040 goto coalesceRight; 2041 2042 /* 2043 * coalesce with left XAD 2044 */ 2045 //coalesceLeft: /* (xoff == nxoff) */ 2046 /* is XAD first entry of page ? */ 2047 if (index == XTENTRYSTART) 2048 goto replace; 2049 2050 /* is nXAD logically and physically contiguous with lXAD ? */ 2051 lxad = &p->xad[index - 1]; 2052 lxlen = lengthXAD(lxad); 2053 if (!(lxad->flag & XAD_NOTRECORDED) && 2054 (nxoff == offsetXAD(lxad) + lxlen) && 2055 (nxaddr == addressXAD(lxad) + lxlen) && 2056 (lxlen + nxlen < MAXXLEN)) { 2057 /* extend right lXAD */ 2058 index0 = index - 1; 2059 XADlength(lxad, lxlen + nxlen); 2060 2061 /* If we just merged two extents together, need to make sure the 2062 * right extent gets logged. If the left one is marked XAD_NEW, 2063 * then we know it will be logged. Otherwise, mark as 2064 * XAD_EXTENDED 2065 */ 2066 if (!(lxad->flag & XAD_NEW)) 2067 lxad->flag |= XAD_EXTENDED; 2068 2069 if (xlen > nxlen) { 2070 /* truncate XAD */ 2071 XADoffset(xad, xoff + nxlen); 2072 XADlength(xad, xlen - nxlen); 2073 XADaddress(xad, xaddr + nxlen); 2074 goto out; 2075 } else { /* (xlen == nxlen) */ 2076 2077 /* remove XAD */ 2078 if (index < nextindex - 1) 2079 memmove(&p->xad[index], &p->xad[index + 1], 2080 (nextindex - index - 2081 1) << L2XTSLOTSIZE); 2082 2083 p->header.nextindex = 2084 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2085 1); 2086 2087 index = index0; 2088 newindex = index + 1; 2089 nextindex = le16_to_cpu(p->header.nextindex); 2090 xoff = nxoff = offsetXAD(lxad); 2091 xlen = nxlen = lxlen + nxlen; 2092 xaddr = nxaddr = addressXAD(lxad); 2093 goto coalesceRight; 2094 } 2095 } 2096 2097 /* 2098 * replace XAD with nXAD 2099 */ 2100 replace: /* (nxoff == xoff) */ 2101 if (nxlen == xlen) { 2102 /* replace XAD with nXAD:recorded */ 2103 *xad = *nxad; 2104 xad->flag = xflag & ~XAD_NOTRECORDED; 2105 2106 goto coalesceRight; 2107 } else /* (nxlen < xlen) */ 2108 goto updateLeft; 2109 2110 /* 2111 * coalesce with right XAD 2112 */ 2113 coalesceRight: /* (xoff <= nxoff) */ 2114 /* is XAD last entry of page ? */ 2115 if (newindex == nextindex) { 2116 if (xoff == nxoff) 2117 goto out; 2118 goto updateRight; 2119 } 2120 2121 /* is nXAD logically and physically contiguous with rXAD ? */ 2122 rxad = &p->xad[index + 1]; 2123 rxlen = lengthXAD(rxad); 2124 if (!(rxad->flag & XAD_NOTRECORDED) && 2125 (nxoff + nxlen == offsetXAD(rxad)) && 2126 (nxaddr + nxlen == addressXAD(rxad)) && 2127 (rxlen + nxlen < MAXXLEN)) { 2128 /* extend left rXAD */ 2129 XADoffset(rxad, nxoff); 2130 XADlength(rxad, rxlen + nxlen); 2131 XADaddress(rxad, nxaddr); 2132 2133 /* If we just merged two extents together, need to make sure 2134 * the left extent gets logged. If the right one is marked 2135 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2136 * XAD_EXTENDED 2137 */ 2138 if (!(rxad->flag & XAD_NEW)) 2139 rxad->flag |= XAD_EXTENDED; 2140 2141 if (xlen > nxlen) 2142 /* truncate XAD */ 2143 XADlength(xad, xlen - nxlen); 2144 else { /* (xlen == nxlen) */ 2145 2146 /* remove XAD */ 2147 memmove(&p->xad[index], &p->xad[index + 1], 2148 (nextindex - index - 1) << L2XTSLOTSIZE); 2149 2150 p->header.nextindex = 2151 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2152 1); 2153 } 2154 2155 goto out; 2156 } else if (xoff == nxoff) 2157 goto out; 2158 2159 if (xoff >= nxoff) { 2160 XT_PUTPAGE(mp); 2161 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2162 return -EIO; 2163 } 2164 /* #endif _JFS_WIP_COALESCE */ 2165 2166 /* 2167 * split XAD into (lXAD, nXAD): 2168 * 2169 * |---nXAD---> 2170 * --|----------XAD----------|-- 2171 * |-lXAD-| 2172 */ 2173 updateRight: /* (xoff < nxoff) */ 2174 /* truncate old XAD as lXAD:not_recorded */ 2175 xad = &p->xad[index]; 2176 XADlength(xad, nxoff - xoff); 2177 2178 /* insert nXAD:recorded */ 2179 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2180 2181 /* xtSpliUp() unpins leaf pages */ 2182 split.mp = mp; 2183 split.index = newindex; 2184 split.flag = xflag & ~XAD_NOTRECORDED; 2185 split.off = nxoff; 2186 split.len = nxlen; 2187 split.addr = nxaddr; 2188 split.pxdlist = NULL; 2189 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2190 return rc; 2191 2192 /* get back old page */ 2193 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2194 if (rc) 2195 return rc; 2196 /* 2197 * if leaf root has been split, original root has been 2198 * copied to new child page, i.e., original entry now 2199 * resides on the new child page; 2200 */ 2201 if (p->header.flag & BT_INTERNAL) { 2202 ASSERT(p->header.nextindex == 2203 cpu_to_le16(XTENTRYSTART + 1)); 2204 xad = &p->xad[XTENTRYSTART]; 2205 bn = addressXAD(xad); 2206 XT_PUTPAGE(mp); 2207 2208 /* get new child page */ 2209 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2210 if (rc) 2211 return rc; 2212 2213 BT_MARK_DIRTY(mp, ip); 2214 if (!test_cflag(COMMIT_Nolink, ip)) { 2215 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2216 xtlck = (struct xtlock *) & tlck->lock; 2217 } 2218 } else { 2219 /* is nXAD on new page ? */ 2220 if (newindex > 2221 (le16_to_cpu(p->header.maxentry) >> 1)) { 2222 newindex = 2223 newindex - 2224 le16_to_cpu(p->header.nextindex) + 2225 XTENTRYSTART; 2226 newpage = 1; 2227 } 2228 } 2229 } else { 2230 /* if insert into middle, shift right remaining entries */ 2231 if (newindex < nextindex) 2232 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2233 (nextindex - newindex) << L2XTSLOTSIZE); 2234 2235 /* insert the entry */ 2236 xad = &p->xad[newindex]; 2237 *xad = *nxad; 2238 xad->flag = xflag & ~XAD_NOTRECORDED; 2239 2240 /* advance next available entry index. */ 2241 p->header.nextindex = 2242 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2243 } 2244 2245 /* 2246 * does nXAD force 3-way split ? 2247 * 2248 * |---nXAD--->| 2249 * --|----------XAD-------------|-- 2250 * |-lXAD-| |-rXAD -| 2251 */ 2252 if (nxoff + nxlen == xoff + xlen) 2253 goto out; 2254 2255 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2256 if (newpage) { 2257 /* close out old page */ 2258 if (!test_cflag(COMMIT_Nolink, ip)) { 2259 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2260 min(index0, (int)xtlck->lwm.offset) : index0; 2261 xtlck->lwm.length = 2262 le16_to_cpu(p->header.nextindex) - 2263 xtlck->lwm.offset; 2264 } 2265 2266 bn = le64_to_cpu(p->header.next); 2267 XT_PUTPAGE(mp); 2268 2269 /* get new right page */ 2270 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2271 if (rc) 2272 return rc; 2273 2274 BT_MARK_DIRTY(mp, ip); 2275 if (!test_cflag(COMMIT_Nolink, ip)) { 2276 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2277 xtlck = (struct xtlock *) & tlck->lock; 2278 } 2279 2280 index0 = index = newindex; 2281 } else 2282 index++; 2283 2284 newindex = index + 1; 2285 nextindex = le16_to_cpu(p->header.nextindex); 2286 xlen = xlen - (nxoff - xoff); 2287 xoff = nxoff; 2288 xaddr = nxaddr; 2289 2290 /* recompute split pages */ 2291 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2292 XT_PUTPAGE(mp); 2293 2294 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT))) 2295 return rc; 2296 2297 /* retrieve search result */ 2298 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2299 2300 if (cmp != 0) { 2301 XT_PUTPAGE(mp); 2302 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2303 return -EIO; 2304 } 2305 2306 if (index0 != index) { 2307 XT_PUTPAGE(mp); 2308 jfs_error(ip->i_sb, 2309 "xtUpdate: unexpected value of index"); 2310 return -EIO; 2311 } 2312 } 2313 2314 /* 2315 * split XAD into (nXAD, rXAD) 2316 * 2317 * ---nXAD---| 2318 * --|----------XAD----------|-- 2319 * |-rXAD-| 2320 */ 2321 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2322 /* update old XAD with nXAD:recorded */ 2323 xad = &p->xad[index]; 2324 *xad = *nxad; 2325 xad->flag = xflag & ~XAD_NOTRECORDED; 2326 2327 /* insert rXAD:not_recorded */ 2328 xoff = xoff + nxlen; 2329 xlen = xlen - nxlen; 2330 xaddr = xaddr + nxlen; 2331 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2332 /* 2333 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2334 */ 2335 /* xtSpliUp() unpins leaf pages */ 2336 split.mp = mp; 2337 split.index = newindex; 2338 split.flag = xflag; 2339 split.off = xoff; 2340 split.len = xlen; 2341 split.addr = xaddr; 2342 split.pxdlist = NULL; 2343 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2344 return rc; 2345 2346 /* get back old page */ 2347 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2348 if (rc) 2349 return rc; 2350 2351 /* 2352 * if leaf root has been split, original root has been 2353 * copied to new child page, i.e., original entry now 2354 * resides on the new child page; 2355 */ 2356 if (p->header.flag & BT_INTERNAL) { 2357 ASSERT(p->header.nextindex == 2358 cpu_to_le16(XTENTRYSTART + 1)); 2359 xad = &p->xad[XTENTRYSTART]; 2360 bn = addressXAD(xad); 2361 XT_PUTPAGE(mp); 2362 2363 /* get new child page */ 2364 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2365 if (rc) 2366 return rc; 2367 2368 BT_MARK_DIRTY(mp, ip); 2369 if (!test_cflag(COMMIT_Nolink, ip)) { 2370 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2371 xtlck = (struct xtlock *) & tlck->lock; 2372 } 2373 } 2374 } else { 2375 /* if insert into middle, shift right remaining entries */ 2376 if (newindex < nextindex) 2377 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2378 (nextindex - newindex) << L2XTSLOTSIZE); 2379 2380 /* insert the entry */ 2381 xad = &p->xad[newindex]; 2382 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2383 2384 /* advance next available entry index. */ 2385 p->header.nextindex = 2386 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2387 } 2388 2389 out: 2390 if (!test_cflag(COMMIT_Nolink, ip)) { 2391 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2392 min(index0, (int)xtlck->lwm.offset) : index0; 2393 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2394 xtlck->lwm.offset; 2395 } 2396 2397 /* unpin the leaf page */ 2398 XT_PUTPAGE(mp); 2399 2400 return rc; 2401 } 2402 2403 2404 /* 2405 * xtAppend() 2406 * 2407 * function: grow in append mode from contiguous region specified ; 2408 * 2409 * parameter: 2410 * tid - transaction id; 2411 * ip - file object; 2412 * xflag - extent flag: 2413 * xoff - extent offset; 2414 * maxblocks - max extent length; 2415 * xlen - extent length (in/out); 2416 * xaddrp - extent address pointer (in/out): 2417 * flag - 2418 * 2419 * return: 2420 */ 2421 int xtAppend(tid_t tid, /* transaction id */ 2422 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2423 s32 * xlenp, /* (in/out) */ 2424 s64 * xaddrp, /* (in/out) */ 2425 int flag) 2426 { 2427 int rc = 0; 2428 struct metapage *mp; /* meta-page buffer */ 2429 xtpage_t *p; /* base B+-tree index page */ 2430 s64 bn, xaddr; 2431 int index, nextindex; 2432 struct btstack btstack; /* traverse stack */ 2433 struct xtsplit split; /* split information */ 2434 xad_t *xad; 2435 int cmp; 2436 struct tlock *tlck; 2437 struct xtlock *xtlck; 2438 int nsplit, nblocks, xlen; 2439 struct pxdlist pxdlist; 2440 pxd_t *pxd; 2441 2442 xaddr = *xaddrp; 2443 xlen = *xlenp; 2444 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2445 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2446 2447 /* 2448 * search for the entry location at which to insert: 2449 * 2450 * xtFastSearch() and xtSearch() both returns (leaf page 2451 * pinned, index at which to insert). 2452 * n.b. xtSearch() may return index of maxentry of 2453 * the full page. 2454 */ 2455 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT))) 2456 return rc; 2457 2458 /* retrieve search result */ 2459 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2460 2461 if (cmp == 0) { 2462 rc = -EEXIST; 2463 goto out; 2464 } 2465 //insert: 2466 /* 2467 * insert entry for new extent 2468 */ 2469 xflag |= XAD_NEW; 2470 2471 /* 2472 * if the leaf page is full, split the page and 2473 * propagate up the router entry for the new page from split 2474 * 2475 * The xtSplitUp() will insert the entry and unpin the leaf page. 2476 */ 2477 nextindex = le16_to_cpu(p->header.nextindex); 2478 if (nextindex < le16_to_cpu(p->header.maxentry)) 2479 goto insertLeaf; 2480 2481 /* 2482 * allocate new index blocks to cover index page split(s) 2483 */ 2484 nsplit = btstack.nsplit; 2485 split.pxdlist = &pxdlist; 2486 pxdlist.maxnpxd = pxdlist.npxd = 0; 2487 pxd = &pxdlist.pxd[0]; 2488 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2489 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2490 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2491 PXDaddress(pxd, xaddr); 2492 PXDlength(pxd, nblocks); 2493 2494 pxdlist.maxnpxd++; 2495 2496 continue; 2497 } 2498 2499 /* undo allocation */ 2500 2501 goto out; 2502 } 2503 2504 xlen = min(xlen, maxblocks); 2505 2506 /* 2507 * allocate data extent requested 2508 */ 2509 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2510 goto out; 2511 2512 split.mp = mp; 2513 split.index = index; 2514 split.flag = xflag; 2515 split.off = xoff; 2516 split.len = xlen; 2517 split.addr = xaddr; 2518 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2519 /* undo data extent allocation */ 2520 dbFree(ip, *xaddrp, (s64) * xlenp); 2521 2522 return rc; 2523 } 2524 2525 *xaddrp = xaddr; 2526 *xlenp = xlen; 2527 return 0; 2528 2529 /* 2530 * insert the new entry into the leaf page 2531 */ 2532 insertLeaf: 2533 /* 2534 * allocate data extent requested 2535 */ 2536 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2537 goto out; 2538 2539 BT_MARK_DIRTY(mp, ip); 2540 /* 2541 * acquire a transaction lock on the leaf page; 2542 * 2543 * action: xad insertion/extension; 2544 */ 2545 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2546 xtlck = (struct xtlock *) & tlck->lock; 2547 2548 /* insert the new entry: mark the entry NEW */ 2549 xad = &p->xad[index]; 2550 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2551 2552 /* advance next available entry index */ 2553 p->header.nextindex = 2554 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2555 2556 xtlck->lwm.offset = 2557 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2558 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2559 xtlck->lwm.offset; 2560 2561 *xaddrp = xaddr; 2562 *xlenp = xlen; 2563 2564 out: 2565 /* unpin the leaf page */ 2566 XT_PUTPAGE(mp); 2567 2568 return rc; 2569 } 2570 #ifdef _STILL_TO_PORT 2571 2572 /* - TBD for defragmentaion/reorganization - 2573 * 2574 * xtDelete() 2575 * 2576 * function: 2577 * delete the entry with the specified key. 2578 * 2579 * N.B.: whole extent of the entry is assumed to be deleted. 2580 * 2581 * parameter: 2582 * 2583 * return: 2584 * ENOENT: if the entry is not found. 2585 * 2586 * exception: 2587 */ 2588 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2589 { 2590 int rc = 0; 2591 struct btstack btstack; 2592 int cmp; 2593 s64 bn; 2594 struct metapage *mp; 2595 xtpage_t *p; 2596 int index, nextindex; 2597 struct tlock *tlck; 2598 struct xtlock *xtlck; 2599 2600 /* 2601 * find the matching entry; xtSearch() pins the page 2602 */ 2603 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) 2604 return rc; 2605 2606 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2607 if (cmp) { 2608 /* unpin the leaf page */ 2609 XT_PUTPAGE(mp); 2610 return -ENOENT; 2611 } 2612 2613 /* 2614 * delete the entry from the leaf page 2615 */ 2616 nextindex = le16_to_cpu(p->header.nextindex); 2617 p->header.nextindex = 2618 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2619 2620 /* 2621 * if the leaf page bocome empty, free the page 2622 */ 2623 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2624 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2625 2626 BT_MARK_DIRTY(mp, ip); 2627 /* 2628 * acquire a transaction lock on the leaf page; 2629 * 2630 * action:xad deletion; 2631 */ 2632 tlck = txLock(tid, ip, mp, tlckXTREE); 2633 xtlck = (struct xtlock *) & tlck->lock; 2634 xtlck->lwm.offset = 2635 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2636 2637 /* if delete from middle, shift left/compact the remaining entries */ 2638 if (index < nextindex - 1) 2639 memmove(&p->xad[index], &p->xad[index + 1], 2640 (nextindex - index - 1) * sizeof(xad_t)); 2641 2642 XT_PUTPAGE(mp); 2643 2644 return 0; 2645 } 2646 2647 2648 /* - TBD for defragmentaion/reorganization - 2649 * 2650 * xtDeleteUp() 2651 * 2652 * function: 2653 * free empty pages as propagating deletion up the tree 2654 * 2655 * parameter: 2656 * 2657 * return: 2658 */ 2659 static int 2660 xtDeleteUp(tid_t tid, struct inode *ip, 2661 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2662 { 2663 int rc = 0; 2664 struct metapage *mp; 2665 xtpage_t *p; 2666 int index, nextindex; 2667 s64 xaddr; 2668 int xlen; 2669 struct btframe *parent; 2670 struct tlock *tlck; 2671 struct xtlock *xtlck; 2672 2673 /* 2674 * keep root leaf page which has become empty 2675 */ 2676 if (fp->header.flag & BT_ROOT) { 2677 /* keep the root page */ 2678 fp->header.flag &= ~BT_INTERNAL; 2679 fp->header.flag |= BT_LEAF; 2680 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2681 2682 /* XT_PUTPAGE(fmp); */ 2683 2684 return 0; 2685 } 2686 2687 /* 2688 * free non-root leaf page 2689 */ 2690 if ((rc = xtRelink(tid, ip, fp))) { 2691 XT_PUTPAGE(fmp); 2692 return rc; 2693 } 2694 2695 xaddr = addressPXD(&fp->header.self); 2696 xlen = lengthPXD(&fp->header.self); 2697 /* free the page extent */ 2698 dbFree(ip, xaddr, (s64) xlen); 2699 2700 /* free the buffer page */ 2701 discard_metapage(fmp); 2702 2703 /* 2704 * propagate page deletion up the index tree 2705 * 2706 * If the delete from the parent page makes it empty, 2707 * continue all the way up the tree. 2708 * stop if the root page is reached (which is never deleted) or 2709 * if the entry deletion does not empty the page. 2710 */ 2711 while ((parent = BT_POP(btstack)) != NULL) { 2712 /* get/pin the parent page <sp> */ 2713 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2714 if (rc) 2715 return rc; 2716 2717 index = parent->index; 2718 2719 /* delete the entry for the freed child page from parent. 2720 */ 2721 nextindex = le16_to_cpu(p->header.nextindex); 2722 2723 /* 2724 * the parent has the single entry being deleted: 2725 * free the parent page which has become empty. 2726 */ 2727 if (nextindex == 1) { 2728 if (p->header.flag & BT_ROOT) { 2729 /* keep the root page */ 2730 p->header.flag &= ~BT_INTERNAL; 2731 p->header.flag |= BT_LEAF; 2732 p->header.nextindex = 2733 cpu_to_le16(XTENTRYSTART); 2734 2735 /* XT_PUTPAGE(mp); */ 2736 2737 break; 2738 } else { 2739 /* free the parent page */ 2740 if ((rc = xtRelink(tid, ip, p))) 2741 return rc; 2742 2743 xaddr = addressPXD(&p->header.self); 2744 /* free the page extent */ 2745 dbFree(ip, xaddr, 2746 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2747 2748 /* unpin/free the buffer page */ 2749 discard_metapage(mp); 2750 2751 /* propagate up */ 2752 continue; 2753 } 2754 } 2755 /* 2756 * the parent has other entries remaining: 2757 * delete the router entry from the parent page. 2758 */ 2759 else { 2760 BT_MARK_DIRTY(mp, ip); 2761 /* 2762 * acquire a transaction lock on the leaf page; 2763 * 2764 * action:xad deletion; 2765 */ 2766 tlck = txLock(tid, ip, mp, tlckXTREE); 2767 xtlck = (struct xtlock *) & tlck->lock; 2768 xtlck->lwm.offset = 2769 (xtlck->lwm.offset) ? min(index, 2770 xtlck->lwm. 2771 offset) : index; 2772 2773 /* if delete from middle, 2774 * shift left/compact the remaining entries in the page 2775 */ 2776 if (index < nextindex - 1) 2777 memmove(&p->xad[index], &p->xad[index + 1], 2778 (nextindex - index - 2779 1) << L2XTSLOTSIZE); 2780 2781 p->header.nextindex = 2782 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2783 1); 2784 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2785 (ulong) parent->bn, index); 2786 } 2787 2788 /* unpin the parent page */ 2789 XT_PUTPAGE(mp); 2790 2791 /* exit propagation up */ 2792 break; 2793 } 2794 2795 return 0; 2796 } 2797 2798 2799 /* 2800 * NAME: xtRelocate() 2801 * 2802 * FUNCTION: relocate xtpage or data extent of regular file; 2803 * This function is mainly used by defragfs utility. 2804 * 2805 * NOTE: This routine does not have the logic to handle 2806 * uncommitted allocated extent. The caller should call 2807 * txCommit() to commit all the allocation before call 2808 * this routine. 2809 */ 2810 int 2811 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2812 s64 nxaddr, /* new xaddr */ 2813 int xtype) 2814 { /* extent type: XTPAGE or DATAEXT */ 2815 int rc = 0; 2816 struct tblock *tblk; 2817 struct tlock *tlck; 2818 struct xtlock *xtlck; 2819 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2820 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2821 xad_t *xad; 2822 pxd_t *pxd; 2823 s64 xoff, xsize; 2824 int xlen; 2825 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2826 cbuf_t *cp; 2827 s64 offset, nbytes, nbrd, pno; 2828 int nb, npages, nblks; 2829 s64 bn; 2830 int cmp; 2831 int index; 2832 struct pxd_lock *pxdlock; 2833 struct btstack btstack; /* traverse stack */ 2834 2835 xtype = xtype & EXTENT_TYPE; 2836 2837 xoff = offsetXAD(oxad); 2838 oxaddr = addressXAD(oxad); 2839 xlen = lengthXAD(oxad); 2840 2841 /* validate extent offset */ 2842 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2843 if (offset >= ip->i_size) 2844 return -ESTALE; /* stale extent */ 2845 2846 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2847 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2848 2849 /* 2850 * 1. get and validate the parent xtpage/xad entry 2851 * covering the source extent to be relocated; 2852 */ 2853 if (xtype == DATAEXT) { 2854 /* search in leaf entry */ 2855 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 2856 if (rc) 2857 return rc; 2858 2859 /* retrieve search result */ 2860 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2861 2862 if (cmp) { 2863 XT_PUTPAGE(pmp); 2864 return -ESTALE; 2865 } 2866 2867 /* validate for exact match with a single entry */ 2868 xad = &pp->xad[index]; 2869 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2870 XT_PUTPAGE(pmp); 2871 return -ESTALE; 2872 } 2873 } else { /* (xtype == XTPAGE) */ 2874 2875 /* search in internal entry */ 2876 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2877 if (rc) 2878 return rc; 2879 2880 /* retrieve search result */ 2881 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2882 2883 if (cmp) { 2884 XT_PUTPAGE(pmp); 2885 return -ESTALE; 2886 } 2887 2888 /* xtSearchNode() validated for exact match with a single entry 2889 */ 2890 xad = &pp->xad[index]; 2891 } 2892 jfs_info("xtRelocate: parent xad entry validated."); 2893 2894 /* 2895 * 2. relocate the extent 2896 */ 2897 if (xtype == DATAEXT) { 2898 /* if the extent is allocated-but-not-recorded 2899 * there is no real data to be moved in this extent, 2900 */ 2901 if (xad->flag & XAD_NOTRECORDED) 2902 goto out; 2903 else 2904 /* release xtpage for cmRead()/xtLookup() */ 2905 XT_PUTPAGE(pmp); 2906 2907 /* 2908 * cmRelocate() 2909 * 2910 * copy target data pages to be relocated; 2911 * 2912 * data extent must start at page boundary and 2913 * multiple of page size (except the last data extent); 2914 * read in each page of the source data extent into cbuf, 2915 * update the cbuf extent descriptor of the page to be 2916 * homeward bound to new dst data extent 2917 * copy the data from the old extent to new extent. 2918 * copy is essential for compressed files to avoid problems 2919 * that can arise if there was a change in compression 2920 * algorithms. 2921 * it is a good strategy because it may disrupt cache 2922 * policy to keep the pages in memory afterwards. 2923 */ 2924 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2925 assert((offset & CM_OFFSET) == 0); 2926 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2927 pno = offset >> CM_L2BSIZE; 2928 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2929 /* 2930 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2931 (offset >> CM_L2BSIZE) + 1; 2932 */ 2933 sxaddr = oxaddr; 2934 dxaddr = nxaddr; 2935 2936 /* process the request one cache buffer at a time */ 2937 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2938 offset += nb, pno++, npages--) { 2939 /* compute page size */ 2940 nb = min(nbytes - nbrd, CM_BSIZE); 2941 2942 /* get the cache buffer of the page */ 2943 if (rc = cmRead(ip, offset, npages, &cp)) 2944 break; 2945 2946 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2947 assert(!cp->cm_modified); 2948 2949 /* bind buffer with the new extent address */ 2950 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2951 cmSetXD(ip, cp, pno, dxaddr, nblks); 2952 2953 /* release the cbuf, mark it as modified */ 2954 cmPut(cp, TRUE); 2955 2956 dxaddr += nblks; 2957 sxaddr += nblks; 2958 } 2959 2960 /* get back parent page */ 2961 if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0))) 2962 return rc; 2963 2964 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2965 jfs_info("xtRelocate: target data extent relocated."); 2966 } else { /* (xtype == XTPAGE) */ 2967 2968 /* 2969 * read in the target xtpage from the source extent; 2970 */ 2971 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2972 if (rc) { 2973 XT_PUTPAGE(pmp); 2974 return rc; 2975 } 2976 2977 /* 2978 * read in sibling pages if any to update sibling pointers; 2979 */ 2980 rmp = NULL; 2981 if (p->header.next) { 2982 nextbn = le64_to_cpu(p->header.next); 2983 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2984 if (rc) { 2985 XT_PUTPAGE(pmp); 2986 XT_PUTPAGE(mp); 2987 return (rc); 2988 } 2989 } 2990 2991 lmp = NULL; 2992 if (p->header.prev) { 2993 prevbn = le64_to_cpu(p->header.prev); 2994 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2995 if (rc) { 2996 XT_PUTPAGE(pmp); 2997 XT_PUTPAGE(mp); 2998 if (rmp) 2999 XT_PUTPAGE(rmp); 3000 return (rc); 3001 } 3002 } 3003 3004 /* at this point, all xtpages to be updated are in memory */ 3005 3006 /* 3007 * update sibling pointers of sibling xtpages if any; 3008 */ 3009 if (lmp) { 3010 BT_MARK_DIRTY(lmp, ip); 3011 tlck = 3012 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3013 lp->header.next = cpu_to_le64(nxaddr); 3014 XT_PUTPAGE(lmp); 3015 } 3016 3017 if (rmp) { 3018 BT_MARK_DIRTY(rmp, ip); 3019 tlck = 3020 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3021 rp->header.prev = cpu_to_le64(nxaddr); 3022 XT_PUTPAGE(rmp); 3023 } 3024 3025 /* 3026 * update the target xtpage to be relocated 3027 * 3028 * update the self address of the target page 3029 * and write to destination extent; 3030 * redo image covers the whole xtpage since it is new page 3031 * to the destination extent; 3032 * update of bmap for the free of source extent 3033 * of the target xtpage itself: 3034 * update of bmap for the allocation of destination extent 3035 * of the target xtpage itself: 3036 * update of bmap for the extents covered by xad entries in 3037 * the target xtpage is not necessary since they are not 3038 * updated; 3039 * if not committed before this relocation, 3040 * target page may contain XAD_NEW entries which must 3041 * be scanned for bmap update (logredo() always 3042 * scan xtpage REDOPAGE image for bmap update); 3043 * if committed before this relocation (tlckRELOCATE), 3044 * scan may be skipped by commit() and logredo(); 3045 */ 3046 BT_MARK_DIRTY(mp, ip); 3047 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3048 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3049 xtlck = (struct xtlock *) & tlck->lock; 3050 3051 /* update the self address in the xtpage header */ 3052 pxd = &p->header.self; 3053 PXDaddress(pxd, nxaddr); 3054 3055 /* linelock for the after image of the whole page */ 3056 xtlck->lwm.length = 3057 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3058 3059 /* update the buffer extent descriptor of target xtpage */ 3060 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3061 bmSetXD(mp, nxaddr, xsize); 3062 3063 /* unpin the target page to new homeward bound */ 3064 XT_PUTPAGE(mp); 3065 jfs_info("xtRelocate: target xtpage relocated."); 3066 } 3067 3068 /* 3069 * 3. acquire maplock for the source extent to be freed; 3070 * 3071 * acquire a maplock saving the src relocated extent address; 3072 * to free of the extent at commit time; 3073 */ 3074 out: 3075 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3076 * free PXD of the source data extent (logredo() will update 3077 * bmap for free of source data extent), and update bmap for 3078 * free of the source data extent; 3079 */ 3080 if (xtype == DATAEXT) 3081 tlck = txMaplock(tid, ip, tlckMAP); 3082 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3083 * for the source xtpage (logredo() will init NoRedoPage 3084 * filter and will also update bmap for free of the source 3085 * xtpage), and update bmap for free of the source xtpage; 3086 * N.B. We use tlckMAP instead of tlkcXTREE because there 3087 * is no buffer associated with this lock since the buffer 3088 * has been redirected to the target location. 3089 */ 3090 else /* (xtype == XTPAGE) */ 3091 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3092 3093 pxdlock = (struct pxd_lock *) & tlck->lock; 3094 pxdlock->flag = mlckFREEPXD; 3095 PXDaddress(&pxdlock->pxd, oxaddr); 3096 PXDlength(&pxdlock->pxd, xlen); 3097 pxdlock->index = 1; 3098 3099 /* 3100 * 4. update the parent xad entry for relocation; 3101 * 3102 * acquire tlck for the parent entry with XAD_NEW as entry 3103 * update which will write LOG_REDOPAGE and update bmap for 3104 * allocation of XAD_NEW destination extent; 3105 */ 3106 jfs_info("xtRelocate: update parent xad entry."); 3107 BT_MARK_DIRTY(pmp, ip); 3108 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3109 xtlck = (struct xtlock *) & tlck->lock; 3110 3111 /* update the XAD with the new destination extent; */ 3112 xad = &pp->xad[index]; 3113 xad->flag |= XAD_NEW; 3114 XADaddress(xad, nxaddr); 3115 3116 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3117 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3118 xtlck->lwm.offset; 3119 3120 /* unpin the parent xtpage */ 3121 XT_PUTPAGE(pmp); 3122 3123 return rc; 3124 } 3125 3126 3127 /* 3128 * xtSearchNode() 3129 * 3130 * function: search for the internal xad entry covering specified extent. 3131 * This function is mainly used by defragfs utility. 3132 * 3133 * parameters: 3134 * ip - file object; 3135 * xad - extent to find; 3136 * cmpp - comparison result: 3137 * btstack - traverse stack; 3138 * flag - search process flag; 3139 * 3140 * returns: 3141 * btstack contains (bn, index) of search path traversed to the entry. 3142 * *cmpp is set to result of comparison with the entry returned. 3143 * the page containing the entry is pinned at exit. 3144 */ 3145 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3146 int *cmpp, struct btstack * btstack, int flag) 3147 { 3148 int rc = 0; 3149 s64 xoff, xaddr; 3150 int xlen; 3151 int cmp = 1; /* init for empty page */ 3152 s64 bn; /* block number */ 3153 struct metapage *mp; /* meta-page buffer */ 3154 xtpage_t *p; /* page */ 3155 int base, index, lim; 3156 struct btframe *btsp; 3157 s64 t64; 3158 3159 BT_CLR(btstack); 3160 3161 xoff = offsetXAD(xad); 3162 xlen = lengthXAD(xad); 3163 xaddr = addressXAD(xad); 3164 3165 /* 3166 * search down tree from root: 3167 * 3168 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3169 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3170 * 3171 * if entry with search key K is not found 3172 * internal page search find the entry with largest key Ki 3173 * less than K which point to the child page to search; 3174 * leaf page search find the entry with smallest key Kj 3175 * greater than K so that the returned index is the position of 3176 * the entry to be shifted right for insertion of new entry. 3177 * for empty tree, search key is greater than any key of the tree. 3178 * 3179 * by convention, root bn = 0. 3180 */ 3181 for (bn = 0;;) { 3182 /* get/pin the page to search */ 3183 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3184 if (rc) 3185 return rc; 3186 if (p->header.flag & BT_LEAF) { 3187 XT_PUTPAGE(mp); 3188 return -ESTALE; 3189 } 3190 3191 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3192 3193 /* 3194 * binary search with search key K on the current page 3195 */ 3196 for (base = XTENTRYSTART; lim; lim >>= 1) { 3197 index = base + (lim >> 1); 3198 3199 XT_CMP(cmp, xoff, &p->xad[index], t64); 3200 if (cmp == 0) { 3201 /* 3202 * search hit 3203 * 3204 * verify for exact match; 3205 */ 3206 if (xaddr == addressXAD(&p->xad[index]) && 3207 xoff == offsetXAD(&p->xad[index])) { 3208 *cmpp = cmp; 3209 3210 /* save search result */ 3211 btsp = btstack->top; 3212 btsp->bn = bn; 3213 btsp->index = index; 3214 btsp->mp = mp; 3215 3216 return 0; 3217 } 3218 3219 /* descend/search its child page */ 3220 goto next; 3221 } 3222 3223 if (cmp > 0) { 3224 base = index + 1; 3225 --lim; 3226 } 3227 } 3228 3229 /* 3230 * search miss - non-leaf page: 3231 * 3232 * base is the smallest index with key (Kj) greater than 3233 * search key (K) and may be zero or maxentry index. 3234 * if base is non-zero, decrement base by one to get the parent 3235 * entry of the child page to search. 3236 */ 3237 index = base ? base - 1 : base; 3238 3239 /* 3240 * go down to child page 3241 */ 3242 next: 3243 /* get the child page block number */ 3244 bn = addressXAD(&p->xad[index]); 3245 3246 /* unpin the parent page */ 3247 XT_PUTPAGE(mp); 3248 } 3249 } 3250 3251 3252 /* 3253 * xtRelink() 3254 * 3255 * function: 3256 * link around a freed page. 3257 * 3258 * Parameter: 3259 * int tid, 3260 * struct inode *ip, 3261 * xtpage_t *p) 3262 * 3263 * returns: 3264 */ 3265 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3266 { 3267 int rc = 0; 3268 struct metapage *mp; 3269 s64 nextbn, prevbn; 3270 struct tlock *tlck; 3271 3272 nextbn = le64_to_cpu(p->header.next); 3273 prevbn = le64_to_cpu(p->header.prev); 3274 3275 /* update prev pointer of the next page */ 3276 if (nextbn != 0) { 3277 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3278 if (rc) 3279 return rc; 3280 3281 /* 3282 * acquire a transaction lock on the page; 3283 * 3284 * action: update prev pointer; 3285 */ 3286 BT_MARK_DIRTY(mp, ip); 3287 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3288 3289 /* the page may already have been tlock'd */ 3290 3291 p->header.prev = cpu_to_le64(prevbn); 3292 3293 XT_PUTPAGE(mp); 3294 } 3295 3296 /* update next pointer of the previous page */ 3297 if (prevbn != 0) { 3298 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3299 if (rc) 3300 return rc; 3301 3302 /* 3303 * acquire a transaction lock on the page; 3304 * 3305 * action: update next pointer; 3306 */ 3307 BT_MARK_DIRTY(mp, ip); 3308 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3309 3310 /* the page may already have been tlock'd */ 3311 3312 p->header.next = le64_to_cpu(nextbn); 3313 3314 XT_PUTPAGE(mp); 3315 } 3316 3317 return 0; 3318 } 3319 #endif /* _STILL_TO_PORT */ 3320 3321 3322 /* 3323 * xtInitRoot() 3324 * 3325 * initialize file root (inline in inode) 3326 */ 3327 void xtInitRoot(tid_t tid, struct inode *ip) 3328 { 3329 xtpage_t *p; 3330 3331 /* 3332 * acquire a transaction lock on the root 3333 * 3334 * action: 3335 */ 3336 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3337 tlckXTREE | tlckNEW); 3338 p = &JFS_IP(ip)->i_xtroot; 3339 3340 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3341 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3342 3343 if (S_ISDIR(ip->i_mode)) 3344 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3345 else { 3346 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3347 ip->i_size = 0; 3348 } 3349 3350 3351 return; 3352 } 3353 3354 3355 /* 3356 * We can run into a deadlock truncating a file with a large number of 3357 * xtree pages (large fragmented file). A robust fix would entail a 3358 * reservation system where we would reserve a number of metadata pages 3359 * and tlocks which we would be guaranteed without a deadlock. Without 3360 * this, a partial fix is to limit number of metadata pages we will lock 3361 * in a single transaction. Currently we will truncate the file so that 3362 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3363 * will be responsible for ensuring that the current transaction gets 3364 * committed, and that subsequent transactions are created to truncate 3365 * the file further if needed. 3366 */ 3367 #define MAX_TRUNCATE_LEAVES 50 3368 3369 /* 3370 * xtTruncate() 3371 * 3372 * function: 3373 * traverse for truncation logging backward bottom up; 3374 * terminate at the last extent entry at the current subtree 3375 * root page covering new down size. 3376 * truncation may occur within the last extent entry. 3377 * 3378 * parameter: 3379 * int tid, 3380 * struct inode *ip, 3381 * s64 newsize, 3382 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3383 * 3384 * return: 3385 * 3386 * note: 3387 * PWMAP: 3388 * 1. truncate (non-COMMIT_NOLINK file) 3389 * by jfs_truncate() or jfs_open(O_TRUNC): 3390 * xtree is updated; 3391 * 2. truncate index table of directory when last entry removed 3392 * map update via tlock at commit time; 3393 * PMAP: 3394 * Call xtTruncate_pmap instead 3395 * WMAP: 3396 * 1. remove (free zero link count) on last reference release 3397 * (pmap has been freed at commit zero link count); 3398 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3399 * xtree is updated; 3400 * map update directly at truncation time; 3401 * 3402 * if (DELETE) 3403 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3404 * else if (TRUNCATE) 3405 * must write LOG_NOREDOPAGE for deleted index page; 3406 * 3407 * pages may already have been tlocked by anonymous transactions 3408 * during file growth (i.e., write) before truncation; 3409 * 3410 * except last truncated entry, deleted entries remains as is 3411 * in the page (nextindex is updated) for other use 3412 * (e.g., log/update allocation map): this avoid copying the page 3413 * info but delay free of pages; 3414 * 3415 */ 3416 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3417 { 3418 int rc = 0; 3419 s64 teof; 3420 struct metapage *mp; 3421 xtpage_t *p; 3422 s64 bn; 3423 int index, nextindex; 3424 xad_t *xad; 3425 s64 xoff, xaddr; 3426 int xlen, len, freexlen; 3427 struct btstack btstack; 3428 struct btframe *parent; 3429 struct tblock *tblk = NULL; 3430 struct tlock *tlck = NULL; 3431 struct xtlock *xtlck = NULL; 3432 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3433 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3434 s64 nfreed; 3435 int freed, log; 3436 int locked_leaves = 0; 3437 3438 /* save object truncation type */ 3439 if (tid) { 3440 tblk = tid_to_tblock(tid); 3441 tblk->xflag |= flag; 3442 } 3443 3444 nfreed = 0; 3445 3446 flag &= COMMIT_MAP; 3447 assert(flag != COMMIT_PMAP); 3448 3449 if (flag == COMMIT_PWMAP) 3450 log = 1; 3451 else { 3452 log = 0; 3453 xadlock.flag = mlckFREEXADLIST; 3454 xadlock.index = 1; 3455 } 3456 3457 /* 3458 * if the newsize is not an integral number of pages, 3459 * the file between newsize and next page boundary will 3460 * be cleared. 3461 * if truncating into a file hole, it will cause 3462 * a full block to be allocated for the logical block. 3463 */ 3464 3465 /* 3466 * release page blocks of truncated region <teof, eof> 3467 * 3468 * free the data blocks from the leaf index blocks. 3469 * delete the parent index entries corresponding to 3470 * the freed child data/index blocks. 3471 * free the index blocks themselves which aren't needed 3472 * in new sized file. 3473 * 3474 * index blocks are updated only if the blocks are to be 3475 * retained in the new sized file. 3476 * if type is PMAP, the data and index pages are NOT 3477 * freed, and the data and index blocks are NOT freed 3478 * from working map. 3479 * (this will allow continued access of data/index of 3480 * temporary file (zerolink count file truncated to zero-length)). 3481 */ 3482 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3483 JFS_SBI(ip->i_sb)->l2bsize; 3484 3485 /* clear stack */ 3486 BT_CLR(&btstack); 3487 3488 /* 3489 * start with root 3490 * 3491 * root resides in the inode 3492 */ 3493 bn = 0; 3494 3495 /* 3496 * first access of each page: 3497 */ 3498 getPage: 3499 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3500 if (rc) 3501 return rc; 3502 3503 /* process entries backward from last index */ 3504 index = le16_to_cpu(p->header.nextindex) - 1; 3505 3506 if (p->header.flag & BT_INTERNAL) 3507 goto getChild; 3508 3509 /* 3510 * leaf page 3511 */ 3512 3513 /* Since this is the rightmost leaf, and we may have already freed 3514 * a page that was formerly to the right, let's make sure that the 3515 * next pointer is zero. 3516 */ 3517 if (p->header.next) { 3518 if (log) 3519 /* 3520 * Make sure this change to the header is logged. 3521 * If we really truncate this leaf, the flag 3522 * will be changed to tlckTRUNCATE 3523 */ 3524 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3525 BT_MARK_DIRTY(mp, ip); 3526 p->header.next = 0; 3527 } 3528 3529 freed = 0; 3530 3531 /* does region covered by leaf page precede Teof ? */ 3532 xad = &p->xad[index]; 3533 xoff = offsetXAD(xad); 3534 xlen = lengthXAD(xad); 3535 if (teof >= xoff + xlen) { 3536 XT_PUTPAGE(mp); 3537 goto getParent; 3538 } 3539 3540 /* (re)acquire tlock of the leaf page */ 3541 if (log) { 3542 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3543 /* 3544 * We need to limit the size of the transaction 3545 * to avoid exhausting pagecache & tlocks 3546 */ 3547 XT_PUTPAGE(mp); 3548 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3549 goto getParent; 3550 } 3551 tlck = txLock(tid, ip, mp, tlckXTREE); 3552 tlck->type = tlckXTREE | tlckTRUNCATE; 3553 xtlck = (struct xtlock *) & tlck->lock; 3554 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3555 } 3556 BT_MARK_DIRTY(mp, ip); 3557 3558 /* 3559 * scan backward leaf page entries 3560 */ 3561 for (; index >= XTENTRYSTART; index--) { 3562 xad = &p->xad[index]; 3563 xoff = offsetXAD(xad); 3564 xlen = lengthXAD(xad); 3565 xaddr = addressXAD(xad); 3566 3567 /* 3568 * The "data" for a directory is indexed by the block 3569 * device's address space. This metadata must be invalidated 3570 * here 3571 */ 3572 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3573 invalidate_xad_metapages(ip, *xad); 3574 /* 3575 * entry beyond eof: continue scan of current page 3576 * xad 3577 * ---|---=======-------> 3578 * eof 3579 */ 3580 if (teof < xoff) { 3581 nfreed += xlen; 3582 continue; 3583 } 3584 3585 /* 3586 * (xoff <= teof): last entry to be deleted from page; 3587 * If other entries remain in page: keep and update the page. 3588 */ 3589 3590 /* 3591 * eof == entry_start: delete the entry 3592 * xad 3593 * -------|=======-------> 3594 * eof 3595 * 3596 */ 3597 if (teof == xoff) { 3598 nfreed += xlen; 3599 3600 if (index == XTENTRYSTART) 3601 break; 3602 3603 nextindex = index; 3604 } 3605 /* 3606 * eof within the entry: truncate the entry. 3607 * xad 3608 * -------===|===-------> 3609 * eof 3610 */ 3611 else if (teof < xoff + xlen) { 3612 /* update truncated entry */ 3613 len = teof - xoff; 3614 freexlen = xlen - len; 3615 XADlength(xad, len); 3616 3617 /* save pxd of truncated extent in tlck */ 3618 xaddr += len; 3619 if (log) { /* COMMIT_PWMAP */ 3620 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3621 min(index, (int)xtlck->lwm.offset) : index; 3622 xtlck->lwm.length = index + 1 - 3623 xtlck->lwm.offset; 3624 xtlck->twm.offset = index; 3625 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3626 pxdlock->flag = mlckFREEPXD; 3627 PXDaddress(&pxdlock->pxd, xaddr); 3628 PXDlength(&pxdlock->pxd, freexlen); 3629 } 3630 /* free truncated extent */ 3631 else { /* COMMIT_WMAP */ 3632 3633 pxdlock = (struct pxd_lock *) & xadlock; 3634 pxdlock->flag = mlckFREEPXD; 3635 PXDaddress(&pxdlock->pxd, xaddr); 3636 PXDlength(&pxdlock->pxd, freexlen); 3637 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3638 3639 /* reset map lock */ 3640 xadlock.flag = mlckFREEXADLIST; 3641 } 3642 3643 /* current entry is new last entry; */ 3644 nextindex = index + 1; 3645 3646 nfreed += freexlen; 3647 } 3648 /* 3649 * eof beyond the entry: 3650 * xad 3651 * -------=======---|---> 3652 * eof 3653 */ 3654 else { /* (xoff + xlen < teof) */ 3655 3656 nextindex = index + 1; 3657 } 3658 3659 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3660 if (!log) { /* COMMIT_WAMP */ 3661 xadlock.xdlist = &p->xad[nextindex]; 3662 xadlock.count = 3663 le16_to_cpu(p->header.nextindex) - 3664 nextindex; 3665 txFreeMap(ip, (struct maplock *) & xadlock, 3666 NULL, COMMIT_WMAP); 3667 } 3668 p->header.nextindex = cpu_to_le16(nextindex); 3669 } 3670 3671 XT_PUTPAGE(mp); 3672 3673 /* assert(freed == 0); */ 3674 goto getParent; 3675 } /* end scan of leaf page entries */ 3676 3677 freed = 1; 3678 3679 /* 3680 * leaf page become empty: free the page if type != PMAP 3681 */ 3682 if (log) { /* COMMIT_PWMAP */ 3683 /* txCommit() with tlckFREE: 3684 * free data extents covered by leaf [XTENTRYSTART:hwm); 3685 * invalidate leaf if COMMIT_PWMAP; 3686 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3687 */ 3688 tlck->type = tlckXTREE | tlckFREE; 3689 } else { /* COMMIT_WAMP */ 3690 3691 /* free data extents covered by leaf */ 3692 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3693 xadlock.count = 3694 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3695 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3696 } 3697 3698 if (p->header.flag & BT_ROOT) { 3699 p->header.flag &= ~BT_INTERNAL; 3700 p->header.flag |= BT_LEAF; 3701 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3702 3703 XT_PUTPAGE(mp); /* debug */ 3704 goto out; 3705 } else { 3706 if (log) { /* COMMIT_PWMAP */ 3707 /* page will be invalidated at tx completion 3708 */ 3709 XT_PUTPAGE(mp); 3710 } else { /* COMMIT_WMAP */ 3711 3712 if (mp->lid) 3713 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3714 3715 /* invalidate empty leaf page */ 3716 discard_metapage(mp); 3717 } 3718 } 3719 3720 /* 3721 * the leaf page become empty: delete the parent entry 3722 * for the leaf page if the parent page is to be kept 3723 * in the new sized file. 3724 */ 3725 3726 /* 3727 * go back up to the parent page 3728 */ 3729 getParent: 3730 /* pop/restore parent entry for the current child page */ 3731 if ((parent = BT_POP(&btstack)) == NULL) 3732 /* current page must have been root */ 3733 goto out; 3734 3735 /* get back the parent page */ 3736 bn = parent->bn; 3737 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3738 if (rc) 3739 return rc; 3740 3741 index = parent->index; 3742 3743 /* 3744 * child page was not empty: 3745 */ 3746 if (freed == 0) { 3747 /* has any entry deleted from parent ? */ 3748 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3749 /* (re)acquire tlock on the parent page */ 3750 if (log) { /* COMMIT_PWMAP */ 3751 /* txCommit() with tlckTRUNCATE: 3752 * free child extents covered by parent [); 3753 */ 3754 tlck = txLock(tid, ip, mp, tlckXTREE); 3755 xtlck = (struct xtlock *) & tlck->lock; 3756 if (!(tlck->type & tlckTRUNCATE)) { 3757 xtlck->hwm.offset = 3758 le16_to_cpu(p->header. 3759 nextindex) - 1; 3760 tlck->type = 3761 tlckXTREE | tlckTRUNCATE; 3762 } 3763 } else { /* COMMIT_WMAP */ 3764 3765 /* free child extents covered by parent */ 3766 xadlock.xdlist = &p->xad[index + 1]; 3767 xadlock.count = 3768 le16_to_cpu(p->header.nextindex) - 3769 index - 1; 3770 txFreeMap(ip, (struct maplock *) & xadlock, 3771 NULL, COMMIT_WMAP); 3772 } 3773 BT_MARK_DIRTY(mp, ip); 3774 3775 p->header.nextindex = cpu_to_le16(index + 1); 3776 } 3777 XT_PUTPAGE(mp); 3778 goto getParent; 3779 } 3780 3781 /* 3782 * child page was empty: 3783 */ 3784 nfreed += lengthXAD(&p->xad[index]); 3785 3786 /* 3787 * During working map update, child page's tlock must be handled 3788 * before parent's. This is because the parent's tlock will cause 3789 * the child's disk space to be marked available in the wmap, so 3790 * it's important that the child page be released by that time. 3791 * 3792 * ToDo: tlocks should be on doubly-linked list, so we can 3793 * quickly remove it and add it to the end. 3794 */ 3795 3796 /* 3797 * Move parent page's tlock to the end of the tid's tlock list 3798 */ 3799 if (log && mp->lid && (tblk->last != mp->lid) && 3800 lid_to_tlock(mp->lid)->tid) { 3801 lid_t lid = mp->lid; 3802 struct tlock *prev; 3803 3804 tlck = lid_to_tlock(lid); 3805 3806 if (tblk->next == lid) 3807 tblk->next = tlck->next; 3808 else { 3809 for (prev = lid_to_tlock(tblk->next); 3810 prev->next != lid; 3811 prev = lid_to_tlock(prev->next)) { 3812 assert(prev->next); 3813 } 3814 prev->next = tlck->next; 3815 } 3816 lid_to_tlock(tblk->last)->next = lid; 3817 tlck->next = 0; 3818 tblk->last = lid; 3819 } 3820 3821 /* 3822 * parent page become empty: free the page 3823 */ 3824 if (index == XTENTRYSTART) { 3825 if (log) { /* COMMIT_PWMAP */ 3826 /* txCommit() with tlckFREE: 3827 * free child extents covered by parent; 3828 * invalidate parent if COMMIT_PWMAP; 3829 */ 3830 tlck = txLock(tid, ip, mp, tlckXTREE); 3831 xtlck = (struct xtlock *) & tlck->lock; 3832 xtlck->hwm.offset = 3833 le16_to_cpu(p->header.nextindex) - 1; 3834 tlck->type = tlckXTREE | tlckFREE; 3835 } else { /* COMMIT_WMAP */ 3836 3837 /* free child extents covered by parent */ 3838 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3839 xadlock.count = 3840 le16_to_cpu(p->header.nextindex) - 3841 XTENTRYSTART; 3842 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3843 COMMIT_WMAP); 3844 } 3845 BT_MARK_DIRTY(mp, ip); 3846 3847 if (p->header.flag & BT_ROOT) { 3848 p->header.flag &= ~BT_INTERNAL; 3849 p->header.flag |= BT_LEAF; 3850 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3851 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3852 /* 3853 * Shrink root down to allow inline 3854 * EA (otherwise fsck complains) 3855 */ 3856 p->header.maxentry = 3857 cpu_to_le16(XTROOTINITSLOT); 3858 JFS_IP(ip)->mode2 |= INLINEEA; 3859 } 3860 3861 XT_PUTPAGE(mp); /* debug */ 3862 goto out; 3863 } else { 3864 if (log) { /* COMMIT_PWMAP */ 3865 /* page will be invalidated at tx completion 3866 */ 3867 XT_PUTPAGE(mp); 3868 } else { /* COMMIT_WMAP */ 3869 3870 if (mp->lid) 3871 lid_to_tlock(mp->lid)->flag |= 3872 tlckFREELOCK; 3873 3874 /* invalidate parent page */ 3875 discard_metapage(mp); 3876 } 3877 3878 /* parent has become empty and freed: 3879 * go back up to its parent page 3880 */ 3881 /* freed = 1; */ 3882 goto getParent; 3883 } 3884 } 3885 /* 3886 * parent page still has entries for front region; 3887 */ 3888 else { 3889 /* try truncate region covered by preceding entry 3890 * (process backward) 3891 */ 3892 index--; 3893 3894 /* go back down to the child page corresponding 3895 * to the entry 3896 */ 3897 goto getChild; 3898 } 3899 3900 /* 3901 * internal page: go down to child page of current entry 3902 */ 3903 getChild: 3904 /* save current parent entry for the child page */ 3905 BT_PUSH(&btstack, bn, index); 3906 3907 /* get child page */ 3908 xad = &p->xad[index]; 3909 bn = addressXAD(xad); 3910 3911 /* 3912 * first access of each internal entry: 3913 */ 3914 /* release parent page */ 3915 XT_PUTPAGE(mp); 3916 3917 /* process the child page */ 3918 goto getPage; 3919 3920 out: 3921 /* 3922 * update file resource stat 3923 */ 3924 /* set size 3925 */ 3926 if (S_ISDIR(ip->i_mode) && !newsize) 3927 ip->i_size = 1; /* fsck hates zero-length directories */ 3928 else 3929 ip->i_size = newsize; 3930 3931 /* update quota allocation to reflect freed blocks */ 3932 DQUOT_FREE_BLOCK(ip, nfreed); 3933 3934 /* 3935 * free tlock of invalidated pages 3936 */ 3937 if (flag == COMMIT_WMAP) 3938 txFreelock(ip); 3939 3940 return newsize; 3941 } 3942 3943 3944 /* 3945 * xtTruncate_pmap() 3946 * 3947 * function: 3948 * Perform truncate to zero lenghth for deleted file, leaving the 3949 * the xtree and working map untouched. This allows the file to 3950 * be accessed via open file handles, while the delete of the file 3951 * is committed to disk. 3952 * 3953 * parameter: 3954 * tid_t tid, 3955 * struct inode *ip, 3956 * s64 committed_size) 3957 * 3958 * return: new committed size 3959 * 3960 * note: 3961 * 3962 * To avoid deadlock by holding too many transaction locks, the 3963 * truncation may be broken up into multiple transactions. 3964 * The committed_size keeps track of part of the file has been 3965 * freed from the pmaps. 3966 */ 3967 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3968 { 3969 s64 bn; 3970 struct btstack btstack; 3971 int cmp; 3972 int index; 3973 int locked_leaves = 0; 3974 struct metapage *mp; 3975 xtpage_t *p; 3976 struct btframe *parent; 3977 int rc; 3978 struct tblock *tblk; 3979 struct tlock *tlck = NULL; 3980 xad_t *xad; 3981 int xlen; 3982 s64 xoff; 3983 struct xtlock *xtlck = NULL; 3984 3985 /* save object truncation type */ 3986 tblk = tid_to_tblock(tid); 3987 tblk->xflag |= COMMIT_PMAP; 3988 3989 /* clear stack */ 3990 BT_CLR(&btstack); 3991 3992 if (committed_size) { 3993 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3994 rc = xtSearch(ip, xoff, &cmp, &btstack, 0); 3995 if (rc) 3996 return rc; 3997 3998 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3999 4000 if (cmp != 0) { 4001 XT_PUTPAGE(mp); 4002 jfs_error(ip->i_sb, 4003 "xtTruncate_pmap: did not find extent"); 4004 return -EIO; 4005 } 4006 } else { 4007 /* 4008 * start with root 4009 * 4010 * root resides in the inode 4011 */ 4012 bn = 0; 4013 4014 /* 4015 * first access of each page: 4016 */ 4017 getPage: 4018 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4019 if (rc) 4020 return rc; 4021 4022 /* process entries backward from last index */ 4023 index = le16_to_cpu(p->header.nextindex) - 1; 4024 4025 if (p->header.flag & BT_INTERNAL) 4026 goto getChild; 4027 } 4028 4029 /* 4030 * leaf page 4031 */ 4032 4033 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4034 /* 4035 * We need to limit the size of the transaction 4036 * to avoid exhausting pagecache & tlocks 4037 */ 4038 xad = &p->xad[index]; 4039 xoff = offsetXAD(xad); 4040 xlen = lengthXAD(xad); 4041 XT_PUTPAGE(mp); 4042 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4043 } 4044 tlck = txLock(tid, ip, mp, tlckXTREE); 4045 tlck->type = tlckXTREE | tlckFREE; 4046 xtlck = (struct xtlock *) & tlck->lock; 4047 xtlck->hwm.offset = index; 4048 4049 4050 XT_PUTPAGE(mp); 4051 4052 /* 4053 * go back up to the parent page 4054 */ 4055 getParent: 4056 /* pop/restore parent entry for the current child page */ 4057 if ((parent = BT_POP(&btstack)) == NULL) 4058 /* current page must have been root */ 4059 goto out; 4060 4061 /* get back the parent page */ 4062 bn = parent->bn; 4063 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4064 if (rc) 4065 return rc; 4066 4067 index = parent->index; 4068 4069 /* 4070 * parent page become empty: free the page 4071 */ 4072 if (index == XTENTRYSTART) { 4073 /* txCommit() with tlckFREE: 4074 * free child extents covered by parent; 4075 * invalidate parent if COMMIT_PWMAP; 4076 */ 4077 tlck = txLock(tid, ip, mp, tlckXTREE); 4078 xtlck = (struct xtlock *) & tlck->lock; 4079 xtlck->hwm.offset = 4080 le16_to_cpu(p->header.nextindex) - 1; 4081 tlck->type = tlckXTREE | tlckFREE; 4082 4083 XT_PUTPAGE(mp); 4084 4085 if (p->header.flag & BT_ROOT) { 4086 4087 goto out; 4088 } else { 4089 goto getParent; 4090 } 4091 } 4092 /* 4093 * parent page still has entries for front region; 4094 */ 4095 else 4096 index--; 4097 /* 4098 * internal page: go down to child page of current entry 4099 */ 4100 getChild: 4101 /* save current parent entry for the child page */ 4102 BT_PUSH(&btstack, bn, index); 4103 4104 /* get child page */ 4105 xad = &p->xad[index]; 4106 bn = addressXAD(xad); 4107 4108 /* 4109 * first access of each internal entry: 4110 */ 4111 /* release parent page */ 4112 XT_PUTPAGE(mp); 4113 4114 /* process the child page */ 4115 goto getPage; 4116 4117 out: 4118 4119 return 0; 4120 } 4121 4122 4123 #ifdef _JFS_DEBUG_XTREE 4124 /* 4125 * xtDisplayTree() 4126 * 4127 * function: traverse forward 4128 */ 4129 int xtDisplayTree(struct inode *ip) 4130 { 4131 int rc = 0; 4132 struct metapage *mp; 4133 xtpage_t *p; 4134 s64 bn, pbn; 4135 int index, lastindex, v, h; 4136 xad_t *xad; 4137 struct btstack btstack; 4138 struct btframe *btsp; 4139 struct btframe *parent; 4140 4141 printk("display B+-tree.\n"); 4142 4143 /* clear stack */ 4144 btsp = btstack.stack; 4145 4146 /* 4147 * start with root 4148 * 4149 * root resides in the inode 4150 */ 4151 bn = 0; 4152 v = h = 0; 4153 4154 /* 4155 * first access of each page: 4156 */ 4157 getPage: 4158 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4159 if (rc) 4160 return rc; 4161 4162 /* process entries forward from first index */ 4163 index = XTENTRYSTART; 4164 lastindex = le16_to_cpu(p->header.nextindex) - 1; 4165 4166 if (p->header.flag & BT_INTERNAL) { 4167 /* 4168 * first access of each internal page 4169 */ 4170 goto getChild; 4171 } else { /* (p->header.flag & BT_LEAF) */ 4172 4173 /* 4174 * first access of each leaf page 4175 */ 4176 printf("leaf page "); 4177 xtDisplayPage(ip, bn, p); 4178 4179 /* unpin the leaf page */ 4180 XT_PUTPAGE(mp); 4181 } 4182 4183 /* 4184 * go back up to the parent page 4185 */ 4186 getParent: 4187 /* pop/restore parent entry for the current child page */ 4188 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4189 /* current page must have been root */ 4190 return; 4191 4192 /* 4193 * parent page scan completed 4194 */ 4195 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4196 /* go back up to the parent page */ 4197 goto getParent; 4198 } 4199 4200 /* 4201 * parent page has entries remaining 4202 */ 4203 /* get back the parent page */ 4204 bn = parent->bn; 4205 /* v = parent->level; */ 4206 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4207 if (rc) 4208 return rc; 4209 4210 /* get next parent entry */ 4211 index++; 4212 4213 /* 4214 * internal page: go down to child page of current entry 4215 */ 4216 getChild: 4217 /* push/save current parent entry for the child page */ 4218 btsp->bn = pbn = bn; 4219 btsp->index = index; 4220 btsp->lastindex = lastindex; 4221 /* btsp->level = v; */ 4222 /* btsp->node = h; */ 4223 ++btsp; 4224 4225 /* get child page */ 4226 xad = &p->xad[index]; 4227 bn = addressXAD(xad); 4228 4229 /* 4230 * first access of each internal entry: 4231 */ 4232 /* release parent page */ 4233 XT_PUTPAGE(mp); 4234 4235 printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index, 4236 (ulong) bn); 4237 v++; 4238 h = index; 4239 4240 /* process the child page */ 4241 goto getPage; 4242 } 4243 4244 4245 /* 4246 * xtDisplayPage() 4247 * 4248 * function: display page 4249 */ 4250 int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p) 4251 { 4252 int rc = 0; 4253 xad_t *xad; 4254 s64 xaddr, xoff; 4255 int xlen, i, j; 4256 4257 /* display page control */ 4258 printf("bn:0x%lx flag:0x%x nextindex:%d\n", 4259 (ulong) bn, p->header.flag, 4260 le16_to_cpu(p->header.nextindex)); 4261 4262 /* display entries */ 4263 xad = &p->xad[XTENTRYSTART]; 4264 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex); 4265 i++, xad++, j++) { 4266 xoff = offsetXAD(xad); 4267 xaddr = addressXAD(xad); 4268 xlen = lengthXAD(xad); 4269 printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff, 4270 (ulong) xaddr, xlen); 4271 4272 if (j == 4) { 4273 printf("\n"); 4274 j = 0; 4275 } 4276 } 4277 4278 printf("\n"); 4279 } 4280 #endif /* _JFS_DEBUG_XTREE */ 4281 4282 4283 #ifdef _JFS_WIP 4284 /* 4285 * xtGather() 4286 * 4287 * function: 4288 * traverse for allocation acquiring tlock at commit time 4289 * (vs at the time of update) logging backward top down 4290 * 4291 * note: 4292 * problem - establishing that all new allocation have been 4293 * processed both for append and random write in sparse file 4294 * at the current entry at the current subtree root page 4295 * 4296 */ 4297 int xtGather(btree_t *t) 4298 { 4299 int rc = 0; 4300 xtpage_t *p; 4301 u64 bn; 4302 int index; 4303 btentry_t *e; 4304 struct btstack btstack; 4305 struct btsf *parent; 4306 4307 /* clear stack */ 4308 BT_CLR(&btstack); 4309 4310 /* 4311 * start with root 4312 * 4313 * root resides in the inode 4314 */ 4315 bn = 0; 4316 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4317 if (rc) 4318 return rc; 4319 4320 /* new root is NOT pointed by a new entry 4321 if (p->header.flag & NEW) 4322 allocate new page lock; 4323 write a NEWPAGE log; 4324 */ 4325 4326 dopage: 4327 /* 4328 * first access of each page: 4329 */ 4330 /* process entries backward from last index */ 4331 index = le16_to_cpu(p->header.nextindex) - 1; 4332 4333 if (p->header.flag & BT_LEAF) { 4334 /* 4335 * first access of each leaf page 4336 */ 4337 /* process leaf page entries backward */ 4338 for (; index >= XTENTRYSTART; index--) { 4339 e = &p->xad[index]; 4340 /* 4341 * if newpage, log NEWPAGE. 4342 * 4343 if (e->flag & XAD_NEW) { 4344 nfound =+ entry->length; 4345 update current page lock for the entry; 4346 newpage(entry); 4347 * 4348 * if moved, log move. 4349 * 4350 } else if (e->flag & XAD_MOVED) { 4351 reset flag; 4352 update current page lock for the entry; 4353 } 4354 */ 4355 } 4356 4357 /* unpin the leaf page */ 4358 XT_PUTPAGE(mp); 4359 4360 /* 4361 * go back up to the parent page 4362 */ 4363 getParent: 4364 /* restore parent entry for the current child page */ 4365 if ((parent = BT_POP(&btstack)) == NULL) 4366 /* current page must have been root */ 4367 return 0; 4368 4369 if ((index = parent->index) == XTENTRYSTART) { 4370 /* 4371 * parent page scan completed 4372 */ 4373 /* go back up to the parent page */ 4374 goto getParent; 4375 } else { 4376 /* 4377 * parent page has entries remaining 4378 */ 4379 /* get back the parent page */ 4380 bn = parent->bn; 4381 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4382 if (rc) 4383 return -EIO; 4384 4385 /* first subroot page which 4386 * covers all new allocated blocks 4387 * itself not new/modified. 4388 * (if modified from split of descendent, 4389 * go down path of split page) 4390 4391 if (nfound == nnew && 4392 !(p->header.flag & (NEW | MOD))) 4393 exit scan; 4394 */ 4395 4396 /* process parent page entries backward */ 4397 index--; 4398 } 4399 } else { 4400 /* 4401 * first access of each internal page 4402 */ 4403 } 4404 4405 /* 4406 * internal page: go down to child page of current entry 4407 */ 4408 4409 /* save current parent entry for the child page */ 4410 BT_PUSH(&btstack, bn, index); 4411 4412 /* get current entry for the child page */ 4413 e = &p->xad[index]; 4414 4415 /* 4416 * first access of each internal entry: 4417 */ 4418 /* 4419 * if new entry, log btree_tnewentry. 4420 * 4421 if (e->flag & XAD_NEW) 4422 update parent page lock for the entry; 4423 */ 4424 4425 /* release parent page */ 4426 XT_PUTPAGE(mp); 4427 4428 /* get child page */ 4429 bn = e->bn; 4430 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4431 if (rc) 4432 return rc; 4433 4434 /* 4435 * first access of each non-root page: 4436 */ 4437 /* 4438 * if new, log btree_newpage. 4439 * 4440 if (p->header.flag & NEW) 4441 allocate new page lock; 4442 write a NEWPAGE log (next, prev); 4443 */ 4444 4445 /* process the child page */ 4446 goto dopage; 4447 4448 out: 4449 return 0; 4450 } 4451 #endif /* _JFS_WIP */ 4452 4453 4454 #ifdef CONFIG_JFS_STATISTICS 4455 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4456 int *eof, void *data) 4457 { 4458 int len = 0; 4459 off_t begin; 4460 4461 len += sprintf(buffer, 4462 "JFS Xtree statistics\n" 4463 "====================\n" 4464 "searches = %d\n" 4465 "fast searches = %d\n" 4466 "splits = %d\n", 4467 xtStat.search, 4468 xtStat.fastSearch, 4469 xtStat.split); 4470 4471 begin = offset; 4472 *start = buffer + begin; 4473 len -= begin; 4474 4475 if (len > length) 4476 len = length; 4477 else 4478 *eof = 1; 4479 4480 if (len < 0) 4481 len = 0; 4482 4483 return len; 4484 } 4485 #endif 4486