1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/quotaops.h> 24 #include "jfs_incore.h" 25 #include "jfs_filsys.h" 26 #include "jfs_metapage.h" 27 #include "jfs_dmap.h" 28 #include "jfs_dinode.h" 29 #include "jfs_superblock.h" 30 #include "jfs_debug.h" 31 32 /* 33 * xtree local flag 34 */ 35 #define XT_INSERT 0x00000001 36 37 /* 38 * xtree key/entry comparison: extent offset 39 * 40 * return: 41 * -1: k < start of extent 42 * 0: start_of_extent <= k <= end_of_extent 43 * 1: k > end_of_extent 44 */ 45 #define XT_CMP(CMP, K, X, OFFSET64)\ 46 {\ 47 OFFSET64 = offsetXAD(X);\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 49 ((K) < OFFSET64) ? -1 : 0;\ 50 } 51 52 /* write a xad entry */ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 54 {\ 55 (XAD)->flag = (FLAG);\ 56 XADoffset((XAD), (OFF));\ 57 XADlength((XAD), (LEN));\ 58 XADaddress((XAD), (ADDR));\ 59 } 60 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 62 63 /* get page buffer for specified block address */ 64 /* ToDo: Replace this ugly macro with a function */ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 66 {\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 68 if (!(RC))\ 69 {\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 73 {\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 75 BT_PUTPAGE(MP);\ 76 MP = NULL;\ 77 RC = -EIO;\ 78 }\ 79 }\ 80 } 81 82 /* for consistency */ 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 84 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 87 /* xtree entry parameter descriptor */ 88 struct xtsplit { 89 struct metapage *mp; 90 s16 index; 91 u8 flag; 92 s64 off; 93 s64 addr; 94 int len; 95 struct pxdlist *pxdlist; 96 }; 97 98 99 /* 100 * statistics 101 */ 102 #ifdef CONFIG_JFS_STATISTICS 103 static struct { 104 uint search; 105 uint fastSearch; 106 uint split; 107 } xtStat; 108 #endif 109 110 111 /* 112 * forward references 113 */ 114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 115 struct btstack * btstack, int flag); 116 117 static int xtSplitUp(tid_t tid, 118 struct inode *ip, 119 struct xtsplit * split, struct btstack * btstack); 120 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 122 struct metapage ** rmpp, s64 * rbnp); 123 124 static int xtSplitRoot(tid_t tid, struct inode *ip, 125 struct xtsplit * split, struct metapage ** rmpp); 126 127 #ifdef _STILL_TO_PORT 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 129 xtpage_t * fp, struct btstack * btstack); 130 131 static int xtSearchNode(struct inode *ip, 132 xad_t * xad, 133 int *cmpp, struct btstack * btstack, int flag); 134 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 136 #endif /* _STILL_TO_PORT */ 137 138 /* 139 * xtLookup() 140 * 141 * function: map a single page into a physical extent; 142 */ 143 int xtLookup(struct inode *ip, s64 lstart, 144 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 145 { 146 int rc = 0; 147 struct btstack btstack; 148 int cmp; 149 s64 bn; 150 struct metapage *mp; 151 xtpage_t *p; 152 int index; 153 xad_t *xad; 154 s64 next, size, xoff, xend; 155 int xlen; 156 s64 xaddr; 157 158 *paddr = 0; 159 *plen = llen; 160 161 if (!no_check) { 162 /* is lookup offset beyond eof ? */ 163 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 164 JFS_SBI(ip->i_sb)->l2bsize; 165 if (lstart >= size) { 166 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 167 (ulong) lstart, (ulong) size); 168 return 0; 169 } 170 } 171 172 /* 173 * search for the xad entry covering the logical extent 174 */ 175 //search: 176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 177 jfs_err("xtLookup: xtSearch returned %d", rc); 178 return rc; 179 } 180 181 /* 182 * compute the physical extent covering logical extent 183 * 184 * N.B. search may have failed (e.g., hole in sparse file), 185 * and returned the index of the next entry. 186 */ 187 /* retrieve search result */ 188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 189 190 /* is xad found covering start of logical extent ? 191 * lstart is a page start address, 192 * i.e., lstart cannot start in a hole; 193 */ 194 if (cmp) { 195 if (next) 196 *plen = min(next - lstart, llen); 197 goto out; 198 } 199 200 /* 201 * lxd covered by xad 202 */ 203 xad = &p->xad[index]; 204 xoff = offsetXAD(xad); 205 xlen = lengthXAD(xad); 206 xend = xoff + xlen; 207 xaddr = addressXAD(xad); 208 209 /* initialize new pxd */ 210 *pflag = xad->flag; 211 *paddr = xaddr + (lstart - xoff); 212 /* a page must be fully covered by an xad */ 213 *plen = min(xend - lstart, llen); 214 215 out: 216 XT_PUTPAGE(mp); 217 218 return rc; 219 } 220 221 222 /* 223 * xtLookupList() 224 * 225 * function: map a single logical extent into a list of physical extent; 226 * 227 * parameter: 228 * struct inode *ip, 229 * struct lxdlist *lxdlist, lxd list (in) 230 * struct xadlist *xadlist, xad list (in/out) 231 * int flag) 232 * 233 * coverage of lxd by xad under assumption of 234 * . lxd's are ordered and disjoint. 235 * . xad's are ordered and disjoint. 236 * 237 * return: 238 * 0: success 239 * 240 * note: a page being written (even a single byte) is backed fully, 241 * except the last page which is only backed with blocks 242 * required to cover the last byte; 243 * the extent backing a page is fully contained within an xad; 244 */ 245 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 246 struct xadlist * xadlist, int flag) 247 { 248 int rc = 0; 249 struct btstack btstack; 250 int cmp; 251 s64 bn; 252 struct metapage *mp; 253 xtpage_t *p; 254 int index; 255 lxd_t *lxd; 256 xad_t *xad, *pxd; 257 s64 size, lstart, lend, xstart, xend, pstart; 258 s64 llen, xlen, plen; 259 s64 xaddr, paddr; 260 int nlxd, npxd, maxnpxd; 261 262 npxd = xadlist->nxad = 0; 263 maxnpxd = xadlist->maxnxad; 264 pxd = xadlist->xad; 265 266 nlxd = lxdlist->nlxd; 267 lxd = lxdlist->lxd; 268 269 lstart = offsetLXD(lxd); 270 llen = lengthLXD(lxd); 271 lend = lstart + llen; 272 273 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 274 JFS_SBI(ip->i_sb)->l2bsize; 275 276 /* 277 * search for the xad entry covering the logical extent 278 */ 279 search: 280 if (lstart >= size) 281 return 0; 282 283 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 284 return rc; 285 286 /* 287 * compute the physical extent covering logical extent 288 * 289 * N.B. search may have failed (e.g., hole in sparse file), 290 * and returned the index of the next entry. 291 */ 292 //map: 293 /* retrieve search result */ 294 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 295 296 /* is xad on the next sibling page ? */ 297 if (index == le16_to_cpu(p->header.nextindex)) { 298 if (p->header.flag & BT_ROOT) 299 goto mapend; 300 301 if ((bn = le64_to_cpu(p->header.next)) == 0) 302 goto mapend; 303 304 XT_PUTPAGE(mp); 305 306 /* get next sibling page */ 307 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 308 if (rc) 309 return rc; 310 311 index = XTENTRYSTART; 312 } 313 314 xad = &p->xad[index]; 315 316 /* 317 * is lxd covered by xad ? 318 */ 319 compare: 320 xstart = offsetXAD(xad); 321 xlen = lengthXAD(xad); 322 xend = xstart + xlen; 323 xaddr = addressXAD(xad); 324 325 compare1: 326 if (xstart < lstart) 327 goto compare2; 328 329 /* (lstart <= xstart) */ 330 331 /* lxd is NOT covered by xad */ 332 if (lend <= xstart) { 333 /* 334 * get next lxd 335 */ 336 if (--nlxd == 0) 337 goto mapend; 338 lxd++; 339 340 lstart = offsetLXD(lxd); 341 llen = lengthLXD(lxd); 342 lend = lstart + llen; 343 if (lstart >= size) 344 goto mapend; 345 346 /* compare with the current xad */ 347 goto compare1; 348 } 349 /* lxd is covered by xad */ 350 else { /* (xstart < lend) */ 351 352 /* initialize new pxd */ 353 pstart = xstart; 354 plen = min(lend - xstart, xlen); 355 paddr = xaddr; 356 357 goto cover; 358 } 359 360 /* (xstart < lstart) */ 361 compare2: 362 /* lxd is covered by xad */ 363 if (lstart < xend) { 364 /* initialize new pxd */ 365 pstart = lstart; 366 plen = min(xend - lstart, llen); 367 paddr = xaddr + (lstart - xstart); 368 369 goto cover; 370 } 371 /* lxd is NOT covered by xad */ 372 else { /* (xend <= lstart) */ 373 374 /* 375 * get next xad 376 * 377 * linear search next xad covering lxd on 378 * the current xad page, and then tree search 379 */ 380 if (index == le16_to_cpu(p->header.nextindex) - 1) { 381 if (p->header.flag & BT_ROOT) 382 goto mapend; 383 384 XT_PUTPAGE(mp); 385 goto search; 386 } else { 387 index++; 388 xad++; 389 390 /* compare with new xad */ 391 goto compare; 392 } 393 } 394 395 /* 396 * lxd is covered by xad and a new pxd has been initialized 397 * (lstart <= xstart < lend) or (xstart < lstart < xend) 398 */ 399 cover: 400 /* finalize pxd corresponding to current xad */ 401 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 402 403 if (++npxd >= maxnpxd) 404 goto mapend; 405 pxd++; 406 407 /* 408 * lxd is fully covered by xad 409 */ 410 if (lend <= xend) { 411 /* 412 * get next lxd 413 */ 414 if (--nlxd == 0) 415 goto mapend; 416 lxd++; 417 418 lstart = offsetLXD(lxd); 419 llen = lengthLXD(lxd); 420 lend = lstart + llen; 421 if (lstart >= size) 422 goto mapend; 423 424 /* 425 * test for old xad covering new lxd 426 * (old xstart < new lstart) 427 */ 428 goto compare2; 429 } 430 /* 431 * lxd is partially covered by xad 432 */ 433 else { /* (xend < lend) */ 434 435 /* 436 * get next xad 437 * 438 * linear search next xad covering lxd on 439 * the current xad page, and then next xad page search 440 */ 441 if (index == le16_to_cpu(p->header.nextindex) - 1) { 442 if (p->header.flag & BT_ROOT) 443 goto mapend; 444 445 if ((bn = le64_to_cpu(p->header.next)) == 0) 446 goto mapend; 447 448 XT_PUTPAGE(mp); 449 450 /* get next sibling page */ 451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 452 if (rc) 453 return rc; 454 455 index = XTENTRYSTART; 456 xad = &p->xad[index]; 457 } else { 458 index++; 459 xad++; 460 } 461 462 /* 463 * test for new xad covering old lxd 464 * (old lstart < new xstart) 465 */ 466 goto compare; 467 } 468 469 mapend: 470 xadlist->nxad = npxd; 471 472 //out: 473 XT_PUTPAGE(mp); 474 475 return rc; 476 } 477 478 479 /* 480 * xtSearch() 481 * 482 * function: search for the xad entry covering specified offset. 483 * 484 * parameters: 485 * ip - file object; 486 * xoff - extent offset; 487 * nextp - address of next extent (if any) for search miss 488 * cmpp - comparison result: 489 * btstack - traverse stack; 490 * flag - search process flag (XT_INSERT); 491 * 492 * returns: 493 * btstack contains (bn, index) of search path traversed to the entry. 494 * *cmpp is set to result of comparison with the entry returned. 495 * the page containing the entry is pinned at exit. 496 */ 497 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 498 int *cmpp, struct btstack * btstack, int flag) 499 { 500 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 501 int rc = 0; 502 int cmp = 1; /* init for empty page */ 503 s64 bn; /* block number */ 504 struct metapage *mp; /* page buffer */ 505 xtpage_t *p; /* page */ 506 xad_t *xad; 507 int base, index, lim, btindex; 508 struct btframe *btsp; 509 int nsplit = 0; /* number of pages to split */ 510 s64 t64; 511 s64 next = 0; 512 513 INCREMENT(xtStat.search); 514 515 BT_CLR(btstack); 516 517 btstack->nsplit = 0; 518 519 /* 520 * search down tree from root: 521 * 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 524 * 525 * if entry with search key K is not found 526 * internal page search find the entry with largest key Ki 527 * less than K which point to the child page to search; 528 * leaf page search find the entry with smallest key Kj 529 * greater than K so that the returned index is the position of 530 * the entry to be shifted right for insertion of new entry. 531 * for empty tree, search key is greater than any key of the tree. 532 * 533 * by convention, root bn = 0. 534 */ 535 for (bn = 0;;) { 536 /* get/pin the page to search */ 537 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 538 if (rc) 539 return rc; 540 541 /* try sequential access heuristics with the previous 542 * access entry in target leaf page: 543 * once search narrowed down into the target leaf, 544 * key must either match an entry in the leaf or 545 * key entry does not exist in the tree; 546 */ 547 //fastSearch: 548 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 549 (p->header.flag & BT_LEAF) && 550 (index = jfs_ip->btindex) < 551 le16_to_cpu(p->header.nextindex)) { 552 xad = &p->xad[index]; 553 t64 = offsetXAD(xad); 554 if (xoff < t64 + lengthXAD(xad)) { 555 if (xoff >= t64) { 556 *cmpp = 0; 557 goto out; 558 } 559 560 /* stop sequential access heuristics */ 561 goto binarySearch; 562 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 563 564 /* try next sequential entry */ 565 index++; 566 if (index < 567 le16_to_cpu(p->header.nextindex)) { 568 xad++; 569 t64 = offsetXAD(xad); 570 if (xoff < t64 + lengthXAD(xad)) { 571 if (xoff >= t64) { 572 *cmpp = 0; 573 goto out; 574 } 575 576 /* miss: key falls between 577 * previous and this entry 578 */ 579 *cmpp = 1; 580 next = t64; 581 goto out; 582 } 583 584 /* (xoff >= t64 + lengthXAD(xad)); 585 * matching entry may be further out: 586 * stop heuristic search 587 */ 588 /* stop sequential access heuristics */ 589 goto binarySearch; 590 } 591 592 /* (index == p->header.nextindex); 593 * miss: key entry does not exist in 594 * the target leaf/tree 595 */ 596 *cmpp = 1; 597 goto out; 598 } 599 600 /* 601 * if hit, return index of the entry found, and 602 * if miss, where new entry with search key is 603 * to be inserted; 604 */ 605 out: 606 /* compute number of pages to split */ 607 if (flag & XT_INSERT) { 608 if (p->header.nextindex == /* little-endian */ 609 p->header.maxentry) 610 nsplit++; 611 else 612 nsplit = 0; 613 btstack->nsplit = nsplit; 614 } 615 616 /* save search result */ 617 btsp = btstack->top; 618 btsp->bn = bn; 619 btsp->index = index; 620 btsp->mp = mp; 621 622 /* update sequential access heuristics */ 623 jfs_ip->btindex = index; 624 625 if (nextp) 626 *nextp = next; 627 628 INCREMENT(xtStat.fastSearch); 629 return 0; 630 } 631 632 /* well, ... full search now */ 633 binarySearch: 634 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 635 636 /* 637 * binary search with search key K on the current page 638 */ 639 for (base = XTENTRYSTART; lim; lim >>= 1) { 640 index = base + (lim >> 1); 641 642 XT_CMP(cmp, xoff, &p->xad[index], t64); 643 if (cmp == 0) { 644 /* 645 * search hit 646 */ 647 /* search hit - leaf page: 648 * return the entry found 649 */ 650 if (p->header.flag & BT_LEAF) { 651 *cmpp = cmp; 652 653 /* compute number of pages to split */ 654 if (flag & XT_INSERT) { 655 if (p->header.nextindex == 656 p->header.maxentry) 657 nsplit++; 658 else 659 nsplit = 0; 660 btstack->nsplit = nsplit; 661 } 662 663 /* save search result */ 664 btsp = btstack->top; 665 btsp->bn = bn; 666 btsp->index = index; 667 btsp->mp = mp; 668 669 /* init sequential access heuristics */ 670 btindex = jfs_ip->btindex; 671 if (index == btindex || 672 index == btindex + 1) 673 jfs_ip->btorder = BT_SEQUENTIAL; 674 else 675 jfs_ip->btorder = BT_RANDOM; 676 jfs_ip->btindex = index; 677 678 return 0; 679 } 680 /* search hit - internal page: 681 * descend/search its child page 682 */ 683 if (index < le16_to_cpu(p->header.nextindex)-1) 684 next = offsetXAD(&p->xad[index + 1]); 685 goto next; 686 } 687 688 if (cmp > 0) { 689 base = index + 1; 690 --lim; 691 } 692 } 693 694 /* 695 * search miss 696 * 697 * base is the smallest index with key (Kj) greater than 698 * search key (K) and may be zero or maxentry index. 699 */ 700 if (base < le16_to_cpu(p->header.nextindex)) 701 next = offsetXAD(&p->xad[base]); 702 /* 703 * search miss - leaf page: 704 * 705 * return location of entry (base) where new entry with 706 * search key K is to be inserted. 707 */ 708 if (p->header.flag & BT_LEAF) { 709 *cmpp = cmp; 710 711 /* compute number of pages to split */ 712 if (flag & XT_INSERT) { 713 if (p->header.nextindex == 714 p->header.maxentry) 715 nsplit++; 716 else 717 nsplit = 0; 718 btstack->nsplit = nsplit; 719 } 720 721 /* save search result */ 722 btsp = btstack->top; 723 btsp->bn = bn; 724 btsp->index = base; 725 btsp->mp = mp; 726 727 /* init sequential access heuristics */ 728 btindex = jfs_ip->btindex; 729 if (base == btindex || base == btindex + 1) 730 jfs_ip->btorder = BT_SEQUENTIAL; 731 else 732 jfs_ip->btorder = BT_RANDOM; 733 jfs_ip->btindex = base; 734 735 if (nextp) 736 *nextp = next; 737 738 return 0; 739 } 740 741 /* 742 * search miss - non-leaf page: 743 * 744 * if base is non-zero, decrement base by one to get the parent 745 * entry of the child page to search. 746 */ 747 index = base ? base - 1 : base; 748 749 /* 750 * go down to child page 751 */ 752 next: 753 /* update number of pages to split */ 754 if (p->header.nextindex == p->header.maxentry) 755 nsplit++; 756 else 757 nsplit = 0; 758 759 /* push (bn, index) of the parent page/entry */ 760 BT_PUSH(btstack, bn, index); 761 762 /* get the child page block number */ 763 bn = addressXAD(&p->xad[index]); 764 765 /* unpin the parent page */ 766 XT_PUTPAGE(mp); 767 } 768 } 769 770 /* 771 * xtInsert() 772 * 773 * function: 774 * 775 * parameter: 776 * tid - transaction id; 777 * ip - file object; 778 * xflag - extent flag (XAD_NOTRECORDED): 779 * xoff - extent offset; 780 * xlen - extent length; 781 * xaddrp - extent address pointer (in/out): 782 * if (*xaddrp) 783 * caller allocated data extent at *xaddrp; 784 * else 785 * allocate data extent and return its xaddr; 786 * flag - 787 * 788 * return: 789 */ 790 int xtInsert(tid_t tid, /* transaction id */ 791 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 792 int flag) 793 { 794 int rc = 0; 795 s64 xaddr, hint; 796 struct metapage *mp; /* meta-page buffer */ 797 xtpage_t *p; /* base B+-tree index page */ 798 s64 bn; 799 int index, nextindex; 800 struct btstack btstack; /* traverse stack */ 801 struct xtsplit split; /* split information */ 802 xad_t *xad; 803 int cmp; 804 s64 next; 805 struct tlock *tlck; 806 struct xtlock *xtlck; 807 808 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 809 810 /* 811 * search for the entry location at which to insert: 812 * 813 * xtFastSearch() and xtSearch() both returns (leaf page 814 * pinned, index at which to insert). 815 * n.b. xtSearch() may return index of maxentry of 816 * the full page. 817 */ 818 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 819 return rc; 820 821 /* retrieve search result */ 822 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 823 824 /* This test must follow XT_GETSEARCH since mp must be valid if 825 * we branch to out: */ 826 if ((cmp == 0) || (next && (xlen > next - xoff))) { 827 rc = -EEXIST; 828 goto out; 829 } 830 831 /* 832 * allocate data extent requested 833 * 834 * allocation hint: last xad 835 */ 836 if ((xaddr = *xaddrp) == 0) { 837 if (index > XTENTRYSTART) { 838 xad = &p->xad[index - 1]; 839 hint = addressXAD(xad) + lengthXAD(xad) - 1; 840 } else 841 hint = 0; 842 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 843 goto out; 844 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 845 DQUOT_FREE_BLOCK(ip, xlen); 846 goto out; 847 } 848 } 849 850 /* 851 * insert entry for new extent 852 */ 853 xflag |= XAD_NEW; 854 855 /* 856 * if the leaf page is full, split the page and 857 * propagate up the router entry for the new page from split 858 * 859 * The xtSplitUp() will insert the entry and unpin the leaf page. 860 */ 861 nextindex = le16_to_cpu(p->header.nextindex); 862 if (nextindex == le16_to_cpu(p->header.maxentry)) { 863 split.mp = mp; 864 split.index = index; 865 split.flag = xflag; 866 split.off = xoff; 867 split.len = xlen; 868 split.addr = xaddr; 869 split.pxdlist = NULL; 870 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 871 /* undo data extent allocation */ 872 if (*xaddrp == 0) { 873 dbFree(ip, xaddr, (s64) xlen); 874 DQUOT_FREE_BLOCK(ip, xlen); 875 } 876 return rc; 877 } 878 879 *xaddrp = xaddr; 880 return 0; 881 } 882 883 /* 884 * insert the new entry into the leaf page 885 */ 886 /* 887 * acquire a transaction lock on the leaf page; 888 * 889 * action: xad insertion/extension; 890 */ 891 BT_MARK_DIRTY(mp, ip); 892 893 /* if insert into middle, shift right remaining entries. */ 894 if (index < nextindex) 895 memmove(&p->xad[index + 1], &p->xad[index], 896 (nextindex - index) * sizeof(xad_t)); 897 898 /* insert the new entry: mark the entry NEW */ 899 xad = &p->xad[index]; 900 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 901 902 /* advance next available entry index */ 903 p->header.nextindex = 904 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 905 906 /* Don't log it if there are no links to the file */ 907 if (!test_cflag(COMMIT_Nolink, ip)) { 908 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 909 xtlck = (struct xtlock *) & tlck->lock; 910 xtlck->lwm.offset = 911 (xtlck->lwm.offset) ? min(index, 912 (int)xtlck->lwm.offset) : index; 913 xtlck->lwm.length = 914 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 915 } 916 917 *xaddrp = xaddr; 918 919 out: 920 /* unpin the leaf page */ 921 XT_PUTPAGE(mp); 922 923 return rc; 924 } 925 926 927 /* 928 * xtSplitUp() 929 * 930 * function: 931 * split full pages as propagating insertion up the tree 932 * 933 * parameter: 934 * tid - transaction id; 935 * ip - file object; 936 * split - entry parameter descriptor; 937 * btstack - traverse stack from xtSearch() 938 * 939 * return: 940 */ 941 static int 942 xtSplitUp(tid_t tid, 943 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 944 { 945 int rc = 0; 946 struct metapage *smp; 947 xtpage_t *sp; /* split page */ 948 struct metapage *rmp; 949 s64 rbn; /* new right page block number */ 950 struct metapage *rcmp; 951 xtpage_t *rcp; /* right child page */ 952 s64 rcbn; /* right child page block number */ 953 int skip; /* index of entry of insertion */ 954 int nextindex; /* next available entry index of p */ 955 struct btframe *parent; /* parent page entry on traverse stack */ 956 xad_t *xad; 957 s64 xaddr; 958 int xlen; 959 int nsplit; /* number of pages split */ 960 struct pxdlist pxdlist; 961 pxd_t *pxd; 962 struct tlock *tlck; 963 struct xtlock *xtlck; 964 965 smp = split->mp; 966 sp = XT_PAGE(ip, smp); 967 968 /* is inode xtree root extension/inline EA area free ? */ 969 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 970 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 971 (JFS_IP(ip)->mode2 & INLINEEA)) { 972 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 973 JFS_IP(ip)->mode2 &= ~INLINEEA; 974 975 BT_MARK_DIRTY(smp, ip); 976 /* 977 * acquire a transaction lock on the leaf page; 978 * 979 * action: xad insertion/extension; 980 */ 981 982 /* if insert into middle, shift right remaining entries. */ 983 skip = split->index; 984 nextindex = le16_to_cpu(sp->header.nextindex); 985 if (skip < nextindex) 986 memmove(&sp->xad[skip + 1], &sp->xad[skip], 987 (nextindex - skip) * sizeof(xad_t)); 988 989 /* insert the new entry: mark the entry NEW */ 990 xad = &sp->xad[skip]; 991 XT_PUTENTRY(xad, split->flag, split->off, split->len, 992 split->addr); 993 994 /* advance next available entry index */ 995 sp->header.nextindex = 996 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 997 998 /* Don't log it if there are no links to the file */ 999 if (!test_cflag(COMMIT_Nolink, ip)) { 1000 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1001 xtlck = (struct xtlock *) & tlck->lock; 1002 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1003 min(skip, (int)xtlck->lwm.offset) : skip; 1004 xtlck->lwm.length = 1005 le16_to_cpu(sp->header.nextindex) - 1006 xtlck->lwm.offset; 1007 } 1008 1009 return 0; 1010 } 1011 1012 /* 1013 * allocate new index blocks to cover index page split(s) 1014 * 1015 * allocation hint: ? 1016 */ 1017 if (split->pxdlist == NULL) { 1018 nsplit = btstack->nsplit; 1019 split->pxdlist = &pxdlist; 1020 pxdlist.maxnpxd = pxdlist.npxd = 0; 1021 pxd = &pxdlist.pxd[0]; 1022 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1023 for (; nsplit > 0; nsplit--, pxd++) { 1024 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1025 == 0) { 1026 PXDaddress(pxd, xaddr); 1027 PXDlength(pxd, xlen); 1028 1029 pxdlist.maxnpxd++; 1030 1031 continue; 1032 } 1033 1034 /* undo allocation */ 1035 1036 XT_PUTPAGE(smp); 1037 return rc; 1038 } 1039 } 1040 1041 /* 1042 * Split leaf page <sp> into <sp> and a new right page <rp>. 1043 * 1044 * The split routines insert the new entry into the leaf page, 1045 * and acquire txLock as appropriate. 1046 * return <rp> pinned and its block number <rpbn>. 1047 */ 1048 rc = (sp->header.flag & BT_ROOT) ? 1049 xtSplitRoot(tid, ip, split, &rmp) : 1050 xtSplitPage(tid, ip, split, &rmp, &rbn); 1051 1052 XT_PUTPAGE(smp); 1053 1054 if (rc) 1055 return -EIO; 1056 /* 1057 * propagate up the router entry for the leaf page just split 1058 * 1059 * insert a router entry for the new page into the parent page, 1060 * propagate the insert/split up the tree by walking back the stack 1061 * of (bn of parent page, index of child page entry in parent page) 1062 * that were traversed during the search for the page that split. 1063 * 1064 * the propagation of insert/split up the tree stops if the root 1065 * splits or the page inserted into doesn't have to split to hold 1066 * the new entry. 1067 * 1068 * the parent entry for the split page remains the same, and 1069 * a new entry is inserted at its right with the first key and 1070 * block number of the new right page. 1071 * 1072 * There are a maximum of 3 pages pinned at any time: 1073 * right child, left parent and right parent (when the parent splits) 1074 * to keep the child page pinned while working on the parent. 1075 * make sure that all pins are released at exit. 1076 */ 1077 while ((parent = BT_POP(btstack)) != NULL) { 1078 /* parent page specified by stack frame <parent> */ 1079 1080 /* keep current child pages <rcp> pinned */ 1081 rcmp = rmp; 1082 rcbn = rbn; 1083 rcp = XT_PAGE(ip, rcmp); 1084 1085 /* 1086 * insert router entry in parent for new right child page <rp> 1087 */ 1088 /* get/pin the parent page <sp> */ 1089 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1090 if (rc) { 1091 XT_PUTPAGE(rcmp); 1092 return rc; 1093 } 1094 1095 /* 1096 * The new key entry goes ONE AFTER the index of parent entry, 1097 * because the split was to the right. 1098 */ 1099 skip = parent->index + 1; 1100 1101 /* 1102 * split or shift right remaining entries of the parent page 1103 */ 1104 nextindex = le16_to_cpu(sp->header.nextindex); 1105 /* 1106 * parent page is full - split the parent page 1107 */ 1108 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1109 /* init for parent page split */ 1110 split->mp = smp; 1111 split->index = skip; /* index at insert */ 1112 split->flag = XAD_NEW; 1113 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1114 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1115 split->addr = rcbn; 1116 1117 /* unpin previous right child page */ 1118 XT_PUTPAGE(rcmp); 1119 1120 /* The split routines insert the new entry, 1121 * and acquire txLock as appropriate. 1122 * return <rp> pinned and its block number <rpbn>. 1123 */ 1124 rc = (sp->header.flag & BT_ROOT) ? 1125 xtSplitRoot(tid, ip, split, &rmp) : 1126 xtSplitPage(tid, ip, split, &rmp, &rbn); 1127 if (rc) { 1128 XT_PUTPAGE(smp); 1129 return rc; 1130 } 1131 1132 XT_PUTPAGE(smp); 1133 /* keep new child page <rp> pinned */ 1134 } 1135 /* 1136 * parent page is not full - insert in parent page 1137 */ 1138 else { 1139 /* 1140 * insert router entry in parent for the right child 1141 * page from the first entry of the right child page: 1142 */ 1143 /* 1144 * acquire a transaction lock on the parent page; 1145 * 1146 * action: router xad insertion; 1147 */ 1148 BT_MARK_DIRTY(smp, ip); 1149 1150 /* 1151 * if insert into middle, shift right remaining entries 1152 */ 1153 if (skip < nextindex) 1154 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1155 (nextindex - 1156 skip) << L2XTSLOTSIZE); 1157 1158 /* insert the router entry */ 1159 xad = &sp->xad[skip]; 1160 XT_PUTENTRY(xad, XAD_NEW, 1161 offsetXAD(&rcp->xad[XTENTRYSTART]), 1162 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1163 1164 /* advance next available entry index. */ 1165 sp->header.nextindex = 1166 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1167 1); 1168 1169 /* Don't log it if there are no links to the file */ 1170 if (!test_cflag(COMMIT_Nolink, ip)) { 1171 tlck = txLock(tid, ip, smp, 1172 tlckXTREE | tlckGROW); 1173 xtlck = (struct xtlock *) & tlck->lock; 1174 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1175 min(skip, (int)xtlck->lwm.offset) : skip; 1176 xtlck->lwm.length = 1177 le16_to_cpu(sp->header.nextindex) - 1178 xtlck->lwm.offset; 1179 } 1180 1181 /* unpin parent page */ 1182 XT_PUTPAGE(smp); 1183 1184 /* exit propagate up */ 1185 break; 1186 } 1187 } 1188 1189 /* unpin current right page */ 1190 XT_PUTPAGE(rmp); 1191 1192 return 0; 1193 } 1194 1195 1196 /* 1197 * xtSplitPage() 1198 * 1199 * function: 1200 * split a full non-root page into 1201 * original/split/left page and new right page 1202 * i.e., the original/split page remains as left page. 1203 * 1204 * parameter: 1205 * int tid, 1206 * struct inode *ip, 1207 * struct xtsplit *split, 1208 * struct metapage **rmpp, 1209 * u64 *rbnp, 1210 * 1211 * return: 1212 * Pointer to page in which to insert or NULL on error. 1213 */ 1214 static int 1215 xtSplitPage(tid_t tid, struct inode *ip, 1216 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1217 { 1218 int rc = 0; 1219 struct metapage *smp; 1220 xtpage_t *sp; 1221 struct metapage *rmp; 1222 xtpage_t *rp; /* new right page allocated */ 1223 s64 rbn; /* new right page block number */ 1224 struct metapage *mp; 1225 xtpage_t *p; 1226 s64 nextbn; 1227 int skip, maxentry, middle, righthalf, n; 1228 xad_t *xad; 1229 struct pxdlist *pxdlist; 1230 pxd_t *pxd; 1231 struct tlock *tlck; 1232 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1233 int quota_allocation = 0; 1234 1235 smp = split->mp; 1236 sp = XT_PAGE(ip, smp); 1237 1238 INCREMENT(xtStat.split); 1239 1240 pxdlist = split->pxdlist; 1241 pxd = &pxdlist->pxd[pxdlist->npxd]; 1242 pxdlist->npxd++; 1243 rbn = addressPXD(pxd); 1244 1245 /* Allocate blocks to quota. */ 1246 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1247 rc = -EDQUOT; 1248 goto clean_up; 1249 } 1250 1251 quota_allocation += lengthPXD(pxd); 1252 1253 /* 1254 * allocate the new right page for the split 1255 */ 1256 rmp = get_metapage(ip, rbn, PSIZE, 1); 1257 if (rmp == NULL) { 1258 rc = -EIO; 1259 goto clean_up; 1260 } 1261 1262 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1263 1264 BT_MARK_DIRTY(rmp, ip); 1265 /* 1266 * action: new page; 1267 */ 1268 1269 rp = (xtpage_t *) rmp->data; 1270 rp->header.self = *pxd; 1271 rp->header.flag = sp->header.flag & BT_TYPE; 1272 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1273 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1274 1275 BT_MARK_DIRTY(smp, ip); 1276 /* Don't log it if there are no links to the file */ 1277 if (!test_cflag(COMMIT_Nolink, ip)) { 1278 /* 1279 * acquire a transaction lock on the new right page; 1280 */ 1281 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1282 rxtlck = (struct xtlock *) & tlck->lock; 1283 rxtlck->lwm.offset = XTENTRYSTART; 1284 /* 1285 * acquire a transaction lock on the split page 1286 */ 1287 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1288 sxtlck = (struct xtlock *) & tlck->lock; 1289 } 1290 1291 /* 1292 * initialize/update sibling pointers of <sp> and <rp> 1293 */ 1294 nextbn = le64_to_cpu(sp->header.next); 1295 rp->header.next = cpu_to_le64(nextbn); 1296 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1297 sp->header.next = cpu_to_le64(rbn); 1298 1299 skip = split->index; 1300 1301 /* 1302 * sequential append at tail (after last entry of last page) 1303 * 1304 * if splitting the last page on a level because of appending 1305 * a entry to it (skip is maxentry), it's likely that the access is 1306 * sequential. adding an empty page on the side of the level is less 1307 * work and can push the fill factor much higher than normal. 1308 * if we're wrong it's no big deal - we will do the split the right 1309 * way next time. 1310 * (it may look like it's equally easy to do a similar hack for 1311 * reverse sorted data, that is, split the tree left, but it's not. 1312 * Be my guest.) 1313 */ 1314 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1315 /* 1316 * acquire a transaction lock on the new/right page; 1317 * 1318 * action: xad insertion; 1319 */ 1320 /* insert entry at the first entry of the new right page */ 1321 xad = &rp->xad[XTENTRYSTART]; 1322 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1323 split->addr); 1324 1325 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1326 1327 if (!test_cflag(COMMIT_Nolink, ip)) { 1328 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1329 rxtlck->lwm.length = 1; 1330 } 1331 1332 *rmpp = rmp; 1333 *rbnp = rbn; 1334 1335 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1336 return 0; 1337 } 1338 1339 /* 1340 * non-sequential insert (at possibly middle page) 1341 */ 1342 1343 /* 1344 * update previous pointer of old next/right page of <sp> 1345 */ 1346 if (nextbn != 0) { 1347 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1348 if (rc) { 1349 XT_PUTPAGE(rmp); 1350 goto clean_up; 1351 } 1352 1353 BT_MARK_DIRTY(mp, ip); 1354 /* 1355 * acquire a transaction lock on the next page; 1356 * 1357 * action:sibling pointer update; 1358 */ 1359 if (!test_cflag(COMMIT_Nolink, ip)) 1360 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1361 1362 p->header.prev = cpu_to_le64(rbn); 1363 1364 /* sibling page may have been updated previously, or 1365 * it may be updated later; 1366 */ 1367 1368 XT_PUTPAGE(mp); 1369 } 1370 1371 /* 1372 * split the data between the split and new/right pages 1373 */ 1374 maxentry = le16_to_cpu(sp->header.maxentry); 1375 middle = maxentry >> 1; 1376 righthalf = maxentry - middle; 1377 1378 /* 1379 * skip index in old split/left page - insert into left page: 1380 */ 1381 if (skip <= middle) { 1382 /* move right half of split page to the new right page */ 1383 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1384 righthalf << L2XTSLOTSIZE); 1385 1386 /* shift right tail of left half to make room for new entry */ 1387 if (skip < middle) 1388 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1389 (middle - skip) << L2XTSLOTSIZE); 1390 1391 /* insert new entry */ 1392 xad = &sp->xad[skip]; 1393 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1394 split->addr); 1395 1396 /* update page header */ 1397 sp->header.nextindex = cpu_to_le16(middle + 1); 1398 if (!test_cflag(COMMIT_Nolink, ip)) { 1399 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1400 min(skip, (int)sxtlck->lwm.offset) : skip; 1401 } 1402 1403 rp->header.nextindex = 1404 cpu_to_le16(XTENTRYSTART + righthalf); 1405 } 1406 /* 1407 * skip index in new right page - insert into right page: 1408 */ 1409 else { 1410 /* move left head of right half to right page */ 1411 n = skip - middle; 1412 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1413 n << L2XTSLOTSIZE); 1414 1415 /* insert new entry */ 1416 n += XTENTRYSTART; 1417 xad = &rp->xad[n]; 1418 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1419 split->addr); 1420 1421 /* move right tail of right half to right page */ 1422 if (skip < maxentry) 1423 memmove(&rp->xad[n + 1], &sp->xad[skip], 1424 (maxentry - skip) << L2XTSLOTSIZE); 1425 1426 /* update page header */ 1427 sp->header.nextindex = cpu_to_le16(middle); 1428 if (!test_cflag(COMMIT_Nolink, ip)) { 1429 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1430 min(middle, (int)sxtlck->lwm.offset) : middle; 1431 } 1432 1433 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1434 righthalf + 1); 1435 } 1436 1437 if (!test_cflag(COMMIT_Nolink, ip)) { 1438 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1439 sxtlck->lwm.offset; 1440 1441 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1442 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1443 XTENTRYSTART; 1444 } 1445 1446 *rmpp = rmp; 1447 *rbnp = rbn; 1448 1449 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1450 return rc; 1451 1452 clean_up: 1453 1454 /* Rollback quota allocation. */ 1455 if (quota_allocation) 1456 DQUOT_FREE_BLOCK(ip, quota_allocation); 1457 1458 return (rc); 1459 } 1460 1461 1462 /* 1463 * xtSplitRoot() 1464 * 1465 * function: 1466 * split the full root page into 1467 * original/root/split page and new right page 1468 * i.e., root remains fixed in tree anchor (inode) and 1469 * the root is copied to a single new right child page 1470 * since root page << non-root page, and 1471 * the split root page contains a single entry for the 1472 * new right child page. 1473 * 1474 * parameter: 1475 * int tid, 1476 * struct inode *ip, 1477 * struct xtsplit *split, 1478 * struct metapage **rmpp) 1479 * 1480 * return: 1481 * Pointer to page in which to insert or NULL on error. 1482 */ 1483 static int 1484 xtSplitRoot(tid_t tid, 1485 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1486 { 1487 xtpage_t *sp; 1488 struct metapage *rmp; 1489 xtpage_t *rp; 1490 s64 rbn; 1491 int skip, nextindex; 1492 xad_t *xad; 1493 pxd_t *pxd; 1494 struct pxdlist *pxdlist; 1495 struct tlock *tlck; 1496 struct xtlock *xtlck; 1497 1498 sp = &JFS_IP(ip)->i_xtroot; 1499 1500 INCREMENT(xtStat.split); 1501 1502 /* 1503 * allocate a single (right) child page 1504 */ 1505 pxdlist = split->pxdlist; 1506 pxd = &pxdlist->pxd[pxdlist->npxd]; 1507 pxdlist->npxd++; 1508 rbn = addressPXD(pxd); 1509 rmp = get_metapage(ip, rbn, PSIZE, 1); 1510 if (rmp == NULL) 1511 return -EIO; 1512 1513 /* Allocate blocks to quota. */ 1514 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1515 release_metapage(rmp); 1516 return -EDQUOT; 1517 } 1518 1519 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1520 1521 /* 1522 * acquire a transaction lock on the new right page; 1523 * 1524 * action: new page; 1525 */ 1526 BT_MARK_DIRTY(rmp, ip); 1527 1528 rp = (xtpage_t *) rmp->data; 1529 rp->header.flag = 1530 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1531 rp->header.self = *pxd; 1532 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1533 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1534 1535 /* initialize sibling pointers */ 1536 rp->header.next = 0; 1537 rp->header.prev = 0; 1538 1539 /* 1540 * copy the in-line root page into new right page extent 1541 */ 1542 nextindex = le16_to_cpu(sp->header.maxentry); 1543 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1544 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1545 1546 /* 1547 * insert the new entry into the new right/child page 1548 * (skip index in the new right page will not change) 1549 */ 1550 skip = split->index; 1551 /* if insert into middle, shift right remaining entries */ 1552 if (skip != nextindex) 1553 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1554 (nextindex - skip) * sizeof(xad_t)); 1555 1556 xad = &rp->xad[skip]; 1557 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1558 1559 /* update page header */ 1560 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1561 1562 if (!test_cflag(COMMIT_Nolink, ip)) { 1563 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1564 xtlck = (struct xtlock *) & tlck->lock; 1565 xtlck->lwm.offset = XTENTRYSTART; 1566 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1567 XTENTRYSTART; 1568 } 1569 1570 /* 1571 * reset the root 1572 * 1573 * init root with the single entry for the new right page 1574 * set the 1st entry offset to 0, which force the left-most key 1575 * at any level of the tree to be less than any search key. 1576 */ 1577 /* 1578 * acquire a transaction lock on the root page (in-memory inode); 1579 * 1580 * action: root split; 1581 */ 1582 BT_MARK_DIRTY(split->mp, ip); 1583 1584 xad = &sp->xad[XTENTRYSTART]; 1585 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1586 1587 /* update page header of root */ 1588 sp->header.flag &= ~BT_LEAF; 1589 sp->header.flag |= BT_INTERNAL; 1590 1591 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1592 1593 if (!test_cflag(COMMIT_Nolink, ip)) { 1594 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1595 xtlck = (struct xtlock *) & tlck->lock; 1596 xtlck->lwm.offset = XTENTRYSTART; 1597 xtlck->lwm.length = 1; 1598 } 1599 1600 *rmpp = rmp; 1601 1602 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1603 return 0; 1604 } 1605 1606 1607 /* 1608 * xtExtend() 1609 * 1610 * function: extend in-place; 1611 * 1612 * note: existing extent may or may not have been committed. 1613 * caller is responsible for pager buffer cache update, and 1614 * working block allocation map update; 1615 * update pmap: alloc whole extended extent; 1616 */ 1617 int xtExtend(tid_t tid, /* transaction id */ 1618 struct inode *ip, s64 xoff, /* delta extent offset */ 1619 s32 xlen, /* delta extent length */ 1620 int flag) 1621 { 1622 int rc = 0; 1623 int cmp; 1624 struct metapage *mp; /* meta-page buffer */ 1625 xtpage_t *p; /* base B+-tree index page */ 1626 s64 bn; 1627 int index, nextindex, len; 1628 struct btstack btstack; /* traverse stack */ 1629 struct xtsplit split; /* split information */ 1630 xad_t *xad; 1631 s64 xaddr; 1632 struct tlock *tlck; 1633 struct xtlock *xtlck = NULL; 1634 1635 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1636 1637 /* there must exist extent to be extended */ 1638 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1639 return rc; 1640 1641 /* retrieve search result */ 1642 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1643 1644 if (cmp != 0) { 1645 XT_PUTPAGE(mp); 1646 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1647 return -EIO; 1648 } 1649 1650 /* extension must be contiguous */ 1651 xad = &p->xad[index]; 1652 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1653 XT_PUTPAGE(mp); 1654 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1655 return -EIO; 1656 } 1657 1658 /* 1659 * acquire a transaction lock on the leaf page; 1660 * 1661 * action: xad insertion/extension; 1662 */ 1663 BT_MARK_DIRTY(mp, ip); 1664 if (!test_cflag(COMMIT_Nolink, ip)) { 1665 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1666 xtlck = (struct xtlock *) & tlck->lock; 1667 } 1668 1669 /* extend will overflow extent ? */ 1670 xlen = lengthXAD(xad) + xlen; 1671 if ((len = xlen - MAXXLEN) <= 0) 1672 goto extendOld; 1673 1674 /* 1675 * extent overflow: insert entry for new extent 1676 */ 1677 //insertNew: 1678 xoff = offsetXAD(xad) + MAXXLEN; 1679 xaddr = addressXAD(xad) + MAXXLEN; 1680 nextindex = le16_to_cpu(p->header.nextindex); 1681 1682 /* 1683 * if the leaf page is full, insert the new entry and 1684 * propagate up the router entry for the new page from split 1685 * 1686 * The xtSplitUp() will insert the entry and unpin the leaf page. 1687 */ 1688 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1689 /* xtSpliUp() unpins leaf pages */ 1690 split.mp = mp; 1691 split.index = index + 1; 1692 split.flag = XAD_NEW; 1693 split.off = xoff; /* split offset */ 1694 split.len = len; 1695 split.addr = xaddr; 1696 split.pxdlist = NULL; 1697 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1698 return rc; 1699 1700 /* get back old page */ 1701 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1702 if (rc) 1703 return rc; 1704 /* 1705 * if leaf root has been split, original root has been 1706 * copied to new child page, i.e., original entry now 1707 * resides on the new child page; 1708 */ 1709 if (p->header.flag & BT_INTERNAL) { 1710 ASSERT(p->header.nextindex == 1711 cpu_to_le16(XTENTRYSTART + 1)); 1712 xad = &p->xad[XTENTRYSTART]; 1713 bn = addressXAD(xad); 1714 XT_PUTPAGE(mp); 1715 1716 /* get new child page */ 1717 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1718 if (rc) 1719 return rc; 1720 1721 BT_MARK_DIRTY(mp, ip); 1722 if (!test_cflag(COMMIT_Nolink, ip)) { 1723 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1724 xtlck = (struct xtlock *) & tlck->lock; 1725 } 1726 } 1727 } 1728 /* 1729 * insert the new entry into the leaf page 1730 */ 1731 else { 1732 /* insert the new entry: mark the entry NEW */ 1733 xad = &p->xad[index + 1]; 1734 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1735 1736 /* advance next available entry index */ 1737 p->header.nextindex = 1738 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1739 } 1740 1741 /* get back old entry */ 1742 xad = &p->xad[index]; 1743 xlen = MAXXLEN; 1744 1745 /* 1746 * extend old extent 1747 */ 1748 extendOld: 1749 XADlength(xad, xlen); 1750 if (!(xad->flag & XAD_NEW)) 1751 xad->flag |= XAD_EXTENDED; 1752 1753 if (!test_cflag(COMMIT_Nolink, ip)) { 1754 xtlck->lwm.offset = 1755 (xtlck->lwm.offset) ? min(index, 1756 (int)xtlck->lwm.offset) : index; 1757 xtlck->lwm.length = 1758 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1759 } 1760 1761 /* unpin the leaf page */ 1762 XT_PUTPAGE(mp); 1763 1764 return rc; 1765 } 1766 1767 #ifdef _NOTYET 1768 /* 1769 * xtTailgate() 1770 * 1771 * function: split existing 'tail' extent 1772 * (split offset >= start offset of tail extent), and 1773 * relocate and extend the split tail half; 1774 * 1775 * note: existing extent may or may not have been committed. 1776 * caller is responsible for pager buffer cache update, and 1777 * working block allocation map update; 1778 * update pmap: free old split tail extent, alloc new extent; 1779 */ 1780 int xtTailgate(tid_t tid, /* transaction id */ 1781 struct inode *ip, s64 xoff, /* split/new extent offset */ 1782 s32 xlen, /* new extent length */ 1783 s64 xaddr, /* new extent address */ 1784 int flag) 1785 { 1786 int rc = 0; 1787 int cmp; 1788 struct metapage *mp; /* meta-page buffer */ 1789 xtpage_t *p; /* base B+-tree index page */ 1790 s64 bn; 1791 int index, nextindex, llen, rlen; 1792 struct btstack btstack; /* traverse stack */ 1793 struct xtsplit split; /* split information */ 1794 xad_t *xad; 1795 struct tlock *tlck; 1796 struct xtlock *xtlck = 0; 1797 struct tlock *mtlck; 1798 struct maplock *pxdlock; 1799 1800 /* 1801 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1802 (ulong)xoff, xlen, (ulong)xaddr); 1803 */ 1804 1805 /* there must exist extent to be tailgated */ 1806 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1807 return rc; 1808 1809 /* retrieve search result */ 1810 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1811 1812 if (cmp != 0) { 1813 XT_PUTPAGE(mp); 1814 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1815 return -EIO; 1816 } 1817 1818 /* entry found must be last entry */ 1819 nextindex = le16_to_cpu(p->header.nextindex); 1820 if (index != nextindex - 1) { 1821 XT_PUTPAGE(mp); 1822 jfs_error(ip->i_sb, 1823 "xtTailgate: the entry found is not the last entry"); 1824 return -EIO; 1825 } 1826 1827 BT_MARK_DIRTY(mp, ip); 1828 /* 1829 * acquire tlock of the leaf page containing original entry 1830 */ 1831 if (!test_cflag(COMMIT_Nolink, ip)) { 1832 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1833 xtlck = (struct xtlock *) & tlck->lock; 1834 } 1835 1836 /* completely replace extent ? */ 1837 xad = &p->xad[index]; 1838 /* 1839 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1840 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1841 */ 1842 if ((llen = xoff - offsetXAD(xad)) == 0) 1843 goto updateOld; 1844 1845 /* 1846 * partially replace extent: insert entry for new extent 1847 */ 1848 //insertNew: 1849 /* 1850 * if the leaf page is full, insert the new entry and 1851 * propagate up the router entry for the new page from split 1852 * 1853 * The xtSplitUp() will insert the entry and unpin the leaf page. 1854 */ 1855 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1856 /* xtSpliUp() unpins leaf pages */ 1857 split.mp = mp; 1858 split.index = index + 1; 1859 split.flag = XAD_NEW; 1860 split.off = xoff; /* split offset */ 1861 split.len = xlen; 1862 split.addr = xaddr; 1863 split.pxdlist = NULL; 1864 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1865 return rc; 1866 1867 /* get back old page */ 1868 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1869 if (rc) 1870 return rc; 1871 /* 1872 * if leaf root has been split, original root has been 1873 * copied to new child page, i.e., original entry now 1874 * resides on the new child page; 1875 */ 1876 if (p->header.flag & BT_INTERNAL) { 1877 ASSERT(p->header.nextindex == 1878 cpu_to_le16(XTENTRYSTART + 1)); 1879 xad = &p->xad[XTENTRYSTART]; 1880 bn = addressXAD(xad); 1881 XT_PUTPAGE(mp); 1882 1883 /* get new child page */ 1884 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1885 if (rc) 1886 return rc; 1887 1888 BT_MARK_DIRTY(mp, ip); 1889 if (!test_cflag(COMMIT_Nolink, ip)) { 1890 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1891 xtlck = (struct xtlock *) & tlck->lock; 1892 } 1893 } 1894 } 1895 /* 1896 * insert the new entry into the leaf page 1897 */ 1898 else { 1899 /* insert the new entry: mark the entry NEW */ 1900 xad = &p->xad[index + 1]; 1901 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1902 1903 /* advance next available entry index */ 1904 p->header.nextindex = 1905 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1906 } 1907 1908 /* get back old XAD */ 1909 xad = &p->xad[index]; 1910 1911 /* 1912 * truncate/relocate old extent at split offset 1913 */ 1914 updateOld: 1915 /* update dmap for old/committed/truncated extent */ 1916 rlen = lengthXAD(xad) - llen; 1917 if (!(xad->flag & XAD_NEW)) { 1918 /* free from PWMAP at commit */ 1919 if (!test_cflag(COMMIT_Nolink, ip)) { 1920 mtlck = txMaplock(tid, ip, tlckMAP); 1921 pxdlock = (struct maplock *) & mtlck->lock; 1922 pxdlock->flag = mlckFREEPXD; 1923 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1924 PXDlength(&pxdlock->pxd, rlen); 1925 pxdlock->index = 1; 1926 } 1927 } else 1928 /* free from WMAP */ 1929 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1930 1931 if (llen) 1932 /* truncate */ 1933 XADlength(xad, llen); 1934 else 1935 /* replace */ 1936 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1937 1938 if (!test_cflag(COMMIT_Nolink, ip)) { 1939 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1940 min(index, (int)xtlck->lwm.offset) : index; 1941 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1942 xtlck->lwm.offset; 1943 } 1944 1945 /* unpin the leaf page */ 1946 XT_PUTPAGE(mp); 1947 1948 return rc; 1949 } 1950 #endif /* _NOTYET */ 1951 1952 /* 1953 * xtUpdate() 1954 * 1955 * function: update XAD; 1956 * 1957 * update extent for allocated_but_not_recorded or 1958 * compressed extent; 1959 * 1960 * parameter: 1961 * nxad - new XAD; 1962 * logical extent of the specified XAD must be completely 1963 * contained by an existing XAD; 1964 */ 1965 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1966 { /* new XAD */ 1967 int rc = 0; 1968 int cmp; 1969 struct metapage *mp; /* meta-page buffer */ 1970 xtpage_t *p; /* base B+-tree index page */ 1971 s64 bn; 1972 int index0, index, newindex, nextindex; 1973 struct btstack btstack; /* traverse stack */ 1974 struct xtsplit split; /* split information */ 1975 xad_t *xad, *lxad, *rxad; 1976 int xflag; 1977 s64 nxoff, xoff; 1978 int nxlen, xlen, lxlen, rxlen; 1979 s64 nxaddr, xaddr; 1980 struct tlock *tlck; 1981 struct xtlock *xtlck = NULL; 1982 int newpage = 0; 1983 1984 /* there must exist extent to be tailgated */ 1985 nxoff = offsetXAD(nxad); 1986 nxlen = lengthXAD(nxad); 1987 nxaddr = addressXAD(nxad); 1988 1989 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1990 return rc; 1991 1992 /* retrieve search result */ 1993 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1994 1995 if (cmp != 0) { 1996 XT_PUTPAGE(mp); 1997 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1998 return -EIO; 1999 } 2000 2001 BT_MARK_DIRTY(mp, ip); 2002 /* 2003 * acquire tlock of the leaf page containing original entry 2004 */ 2005 if (!test_cflag(COMMIT_Nolink, ip)) { 2006 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2007 xtlck = (struct xtlock *) & tlck->lock; 2008 } 2009 2010 xad = &p->xad[index0]; 2011 xflag = xad->flag; 2012 xoff = offsetXAD(xad); 2013 xlen = lengthXAD(xad); 2014 xaddr = addressXAD(xad); 2015 2016 /* nXAD must be completely contained within XAD */ 2017 if ((xoff > nxoff) || 2018 (nxoff + nxlen > xoff + xlen)) { 2019 XT_PUTPAGE(mp); 2020 jfs_error(ip->i_sb, 2021 "xtUpdate: nXAD in not completely contained within XAD"); 2022 return -EIO; 2023 } 2024 2025 index = index0; 2026 newindex = index + 1; 2027 nextindex = le16_to_cpu(p->header.nextindex); 2028 2029 #ifdef _JFS_WIP_NOCOALESCE 2030 if (xoff < nxoff) 2031 goto updateRight; 2032 2033 /* 2034 * replace XAD with nXAD 2035 */ 2036 replace: /* (nxoff == xoff) */ 2037 if (nxlen == xlen) { 2038 /* replace XAD with nXAD:recorded */ 2039 *xad = *nxad; 2040 xad->flag = xflag & ~XAD_NOTRECORDED; 2041 2042 goto out; 2043 } else /* (nxlen < xlen) */ 2044 goto updateLeft; 2045 #endif /* _JFS_WIP_NOCOALESCE */ 2046 2047 /* #ifdef _JFS_WIP_COALESCE */ 2048 if (xoff < nxoff) 2049 goto coalesceRight; 2050 2051 /* 2052 * coalesce with left XAD 2053 */ 2054 //coalesceLeft: /* (xoff == nxoff) */ 2055 /* is XAD first entry of page ? */ 2056 if (index == XTENTRYSTART) 2057 goto replace; 2058 2059 /* is nXAD logically and physically contiguous with lXAD ? */ 2060 lxad = &p->xad[index - 1]; 2061 lxlen = lengthXAD(lxad); 2062 if (!(lxad->flag & XAD_NOTRECORDED) && 2063 (nxoff == offsetXAD(lxad) + lxlen) && 2064 (nxaddr == addressXAD(lxad) + lxlen) && 2065 (lxlen + nxlen < MAXXLEN)) { 2066 /* extend right lXAD */ 2067 index0 = index - 1; 2068 XADlength(lxad, lxlen + nxlen); 2069 2070 /* If we just merged two extents together, need to make sure the 2071 * right extent gets logged. If the left one is marked XAD_NEW, 2072 * then we know it will be logged. Otherwise, mark as 2073 * XAD_EXTENDED 2074 */ 2075 if (!(lxad->flag & XAD_NEW)) 2076 lxad->flag |= XAD_EXTENDED; 2077 2078 if (xlen > nxlen) { 2079 /* truncate XAD */ 2080 XADoffset(xad, xoff + nxlen); 2081 XADlength(xad, xlen - nxlen); 2082 XADaddress(xad, xaddr + nxlen); 2083 goto out; 2084 } else { /* (xlen == nxlen) */ 2085 2086 /* remove XAD */ 2087 if (index < nextindex - 1) 2088 memmove(&p->xad[index], &p->xad[index + 1], 2089 (nextindex - index - 2090 1) << L2XTSLOTSIZE); 2091 2092 p->header.nextindex = 2093 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2094 1); 2095 2096 index = index0; 2097 newindex = index + 1; 2098 nextindex = le16_to_cpu(p->header.nextindex); 2099 xoff = nxoff = offsetXAD(lxad); 2100 xlen = nxlen = lxlen + nxlen; 2101 xaddr = nxaddr = addressXAD(lxad); 2102 goto coalesceRight; 2103 } 2104 } 2105 2106 /* 2107 * replace XAD with nXAD 2108 */ 2109 replace: /* (nxoff == xoff) */ 2110 if (nxlen == xlen) { 2111 /* replace XAD with nXAD:recorded */ 2112 *xad = *nxad; 2113 xad->flag = xflag & ~XAD_NOTRECORDED; 2114 2115 goto coalesceRight; 2116 } else /* (nxlen < xlen) */ 2117 goto updateLeft; 2118 2119 /* 2120 * coalesce with right XAD 2121 */ 2122 coalesceRight: /* (xoff <= nxoff) */ 2123 /* is XAD last entry of page ? */ 2124 if (newindex == nextindex) { 2125 if (xoff == nxoff) 2126 goto out; 2127 goto updateRight; 2128 } 2129 2130 /* is nXAD logically and physically contiguous with rXAD ? */ 2131 rxad = &p->xad[index + 1]; 2132 rxlen = lengthXAD(rxad); 2133 if (!(rxad->flag & XAD_NOTRECORDED) && 2134 (nxoff + nxlen == offsetXAD(rxad)) && 2135 (nxaddr + nxlen == addressXAD(rxad)) && 2136 (rxlen + nxlen < MAXXLEN)) { 2137 /* extend left rXAD */ 2138 XADoffset(rxad, nxoff); 2139 XADlength(rxad, rxlen + nxlen); 2140 XADaddress(rxad, nxaddr); 2141 2142 /* If we just merged two extents together, need to make sure 2143 * the left extent gets logged. If the right one is marked 2144 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2145 * XAD_EXTENDED 2146 */ 2147 if (!(rxad->flag & XAD_NEW)) 2148 rxad->flag |= XAD_EXTENDED; 2149 2150 if (xlen > nxlen) 2151 /* truncate XAD */ 2152 XADlength(xad, xlen - nxlen); 2153 else { /* (xlen == nxlen) */ 2154 2155 /* remove XAD */ 2156 memmove(&p->xad[index], &p->xad[index + 1], 2157 (nextindex - index - 1) << L2XTSLOTSIZE); 2158 2159 p->header.nextindex = 2160 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2161 1); 2162 } 2163 2164 goto out; 2165 } else if (xoff == nxoff) 2166 goto out; 2167 2168 if (xoff >= nxoff) { 2169 XT_PUTPAGE(mp); 2170 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2171 return -EIO; 2172 } 2173 /* #endif _JFS_WIP_COALESCE */ 2174 2175 /* 2176 * split XAD into (lXAD, nXAD): 2177 * 2178 * |---nXAD---> 2179 * --|----------XAD----------|-- 2180 * |-lXAD-| 2181 */ 2182 updateRight: /* (xoff < nxoff) */ 2183 /* truncate old XAD as lXAD:not_recorded */ 2184 xad = &p->xad[index]; 2185 XADlength(xad, nxoff - xoff); 2186 2187 /* insert nXAD:recorded */ 2188 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2189 2190 /* xtSpliUp() unpins leaf pages */ 2191 split.mp = mp; 2192 split.index = newindex; 2193 split.flag = xflag & ~XAD_NOTRECORDED; 2194 split.off = nxoff; 2195 split.len = nxlen; 2196 split.addr = nxaddr; 2197 split.pxdlist = NULL; 2198 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2199 return rc; 2200 2201 /* get back old page */ 2202 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2203 if (rc) 2204 return rc; 2205 /* 2206 * if leaf root has been split, original root has been 2207 * copied to new child page, i.e., original entry now 2208 * resides on the new child page; 2209 */ 2210 if (p->header.flag & BT_INTERNAL) { 2211 ASSERT(p->header.nextindex == 2212 cpu_to_le16(XTENTRYSTART + 1)); 2213 xad = &p->xad[XTENTRYSTART]; 2214 bn = addressXAD(xad); 2215 XT_PUTPAGE(mp); 2216 2217 /* get new child page */ 2218 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2219 if (rc) 2220 return rc; 2221 2222 BT_MARK_DIRTY(mp, ip); 2223 if (!test_cflag(COMMIT_Nolink, ip)) { 2224 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2225 xtlck = (struct xtlock *) & tlck->lock; 2226 } 2227 } else { 2228 /* is nXAD on new page ? */ 2229 if (newindex > 2230 (le16_to_cpu(p->header.maxentry) >> 1)) { 2231 newindex = 2232 newindex - 2233 le16_to_cpu(p->header.nextindex) + 2234 XTENTRYSTART; 2235 newpage = 1; 2236 } 2237 } 2238 } else { 2239 /* if insert into middle, shift right remaining entries */ 2240 if (newindex < nextindex) 2241 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2242 (nextindex - newindex) << L2XTSLOTSIZE); 2243 2244 /* insert the entry */ 2245 xad = &p->xad[newindex]; 2246 *xad = *nxad; 2247 xad->flag = xflag & ~XAD_NOTRECORDED; 2248 2249 /* advance next available entry index. */ 2250 p->header.nextindex = 2251 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2252 } 2253 2254 /* 2255 * does nXAD force 3-way split ? 2256 * 2257 * |---nXAD--->| 2258 * --|----------XAD-------------|-- 2259 * |-lXAD-| |-rXAD -| 2260 */ 2261 if (nxoff + nxlen == xoff + xlen) 2262 goto out; 2263 2264 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2265 if (newpage) { 2266 /* close out old page */ 2267 if (!test_cflag(COMMIT_Nolink, ip)) { 2268 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2269 min(index0, (int)xtlck->lwm.offset) : index0; 2270 xtlck->lwm.length = 2271 le16_to_cpu(p->header.nextindex) - 2272 xtlck->lwm.offset; 2273 } 2274 2275 bn = le64_to_cpu(p->header.next); 2276 XT_PUTPAGE(mp); 2277 2278 /* get new right page */ 2279 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2280 if (rc) 2281 return rc; 2282 2283 BT_MARK_DIRTY(mp, ip); 2284 if (!test_cflag(COMMIT_Nolink, ip)) { 2285 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2286 xtlck = (struct xtlock *) & tlck->lock; 2287 } 2288 2289 index0 = index = newindex; 2290 } else 2291 index++; 2292 2293 newindex = index + 1; 2294 nextindex = le16_to_cpu(p->header.nextindex); 2295 xlen = xlen - (nxoff - xoff); 2296 xoff = nxoff; 2297 xaddr = nxaddr; 2298 2299 /* recompute split pages */ 2300 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2301 XT_PUTPAGE(mp); 2302 2303 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2304 return rc; 2305 2306 /* retrieve search result */ 2307 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2308 2309 if (cmp != 0) { 2310 XT_PUTPAGE(mp); 2311 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2312 return -EIO; 2313 } 2314 2315 if (index0 != index) { 2316 XT_PUTPAGE(mp); 2317 jfs_error(ip->i_sb, 2318 "xtUpdate: unexpected value of index"); 2319 return -EIO; 2320 } 2321 } 2322 2323 /* 2324 * split XAD into (nXAD, rXAD) 2325 * 2326 * ---nXAD---| 2327 * --|----------XAD----------|-- 2328 * |-rXAD-| 2329 */ 2330 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2331 /* update old XAD with nXAD:recorded */ 2332 xad = &p->xad[index]; 2333 *xad = *nxad; 2334 xad->flag = xflag & ~XAD_NOTRECORDED; 2335 2336 /* insert rXAD:not_recorded */ 2337 xoff = xoff + nxlen; 2338 xlen = xlen - nxlen; 2339 xaddr = xaddr + nxlen; 2340 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2341 /* 2342 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2343 */ 2344 /* xtSpliUp() unpins leaf pages */ 2345 split.mp = mp; 2346 split.index = newindex; 2347 split.flag = xflag; 2348 split.off = xoff; 2349 split.len = xlen; 2350 split.addr = xaddr; 2351 split.pxdlist = NULL; 2352 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2353 return rc; 2354 2355 /* get back old page */ 2356 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2357 if (rc) 2358 return rc; 2359 2360 /* 2361 * if leaf root has been split, original root has been 2362 * copied to new child page, i.e., original entry now 2363 * resides on the new child page; 2364 */ 2365 if (p->header.flag & BT_INTERNAL) { 2366 ASSERT(p->header.nextindex == 2367 cpu_to_le16(XTENTRYSTART + 1)); 2368 xad = &p->xad[XTENTRYSTART]; 2369 bn = addressXAD(xad); 2370 XT_PUTPAGE(mp); 2371 2372 /* get new child page */ 2373 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2374 if (rc) 2375 return rc; 2376 2377 BT_MARK_DIRTY(mp, ip); 2378 if (!test_cflag(COMMIT_Nolink, ip)) { 2379 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2380 xtlck = (struct xtlock *) & tlck->lock; 2381 } 2382 } 2383 } else { 2384 /* if insert into middle, shift right remaining entries */ 2385 if (newindex < nextindex) 2386 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2387 (nextindex - newindex) << L2XTSLOTSIZE); 2388 2389 /* insert the entry */ 2390 xad = &p->xad[newindex]; 2391 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2392 2393 /* advance next available entry index. */ 2394 p->header.nextindex = 2395 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2396 } 2397 2398 out: 2399 if (!test_cflag(COMMIT_Nolink, ip)) { 2400 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2401 min(index0, (int)xtlck->lwm.offset) : index0; 2402 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2403 xtlck->lwm.offset; 2404 } 2405 2406 /* unpin the leaf page */ 2407 XT_PUTPAGE(mp); 2408 2409 return rc; 2410 } 2411 2412 2413 /* 2414 * xtAppend() 2415 * 2416 * function: grow in append mode from contiguous region specified ; 2417 * 2418 * parameter: 2419 * tid - transaction id; 2420 * ip - file object; 2421 * xflag - extent flag: 2422 * xoff - extent offset; 2423 * maxblocks - max extent length; 2424 * xlen - extent length (in/out); 2425 * xaddrp - extent address pointer (in/out): 2426 * flag - 2427 * 2428 * return: 2429 */ 2430 int xtAppend(tid_t tid, /* transaction id */ 2431 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2432 s32 * xlenp, /* (in/out) */ 2433 s64 * xaddrp, /* (in/out) */ 2434 int flag) 2435 { 2436 int rc = 0; 2437 struct metapage *mp; /* meta-page buffer */ 2438 xtpage_t *p; /* base B+-tree index page */ 2439 s64 bn, xaddr; 2440 int index, nextindex; 2441 struct btstack btstack; /* traverse stack */ 2442 struct xtsplit split; /* split information */ 2443 xad_t *xad; 2444 int cmp; 2445 struct tlock *tlck; 2446 struct xtlock *xtlck; 2447 int nsplit, nblocks, xlen; 2448 struct pxdlist pxdlist; 2449 pxd_t *pxd; 2450 s64 next; 2451 2452 xaddr = *xaddrp; 2453 xlen = *xlenp; 2454 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2455 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2456 2457 /* 2458 * search for the entry location at which to insert: 2459 * 2460 * xtFastSearch() and xtSearch() both returns (leaf page 2461 * pinned, index at which to insert). 2462 * n.b. xtSearch() may return index of maxentry of 2463 * the full page. 2464 */ 2465 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2466 return rc; 2467 2468 /* retrieve search result */ 2469 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2470 2471 if (cmp == 0) { 2472 rc = -EEXIST; 2473 goto out; 2474 } 2475 2476 if (next) 2477 xlen = min(xlen, (int)(next - xoff)); 2478 //insert: 2479 /* 2480 * insert entry for new extent 2481 */ 2482 xflag |= XAD_NEW; 2483 2484 /* 2485 * if the leaf page is full, split the page and 2486 * propagate up the router entry for the new page from split 2487 * 2488 * The xtSplitUp() will insert the entry and unpin the leaf page. 2489 */ 2490 nextindex = le16_to_cpu(p->header.nextindex); 2491 if (nextindex < le16_to_cpu(p->header.maxentry)) 2492 goto insertLeaf; 2493 2494 /* 2495 * allocate new index blocks to cover index page split(s) 2496 */ 2497 nsplit = btstack.nsplit; 2498 split.pxdlist = &pxdlist; 2499 pxdlist.maxnpxd = pxdlist.npxd = 0; 2500 pxd = &pxdlist.pxd[0]; 2501 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2502 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2503 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2504 PXDaddress(pxd, xaddr); 2505 PXDlength(pxd, nblocks); 2506 2507 pxdlist.maxnpxd++; 2508 2509 continue; 2510 } 2511 2512 /* undo allocation */ 2513 2514 goto out; 2515 } 2516 2517 xlen = min(xlen, maxblocks); 2518 2519 /* 2520 * allocate data extent requested 2521 */ 2522 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2523 goto out; 2524 2525 split.mp = mp; 2526 split.index = index; 2527 split.flag = xflag; 2528 split.off = xoff; 2529 split.len = xlen; 2530 split.addr = xaddr; 2531 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2532 /* undo data extent allocation */ 2533 dbFree(ip, *xaddrp, (s64) * xlenp); 2534 2535 return rc; 2536 } 2537 2538 *xaddrp = xaddr; 2539 *xlenp = xlen; 2540 return 0; 2541 2542 /* 2543 * insert the new entry into the leaf page 2544 */ 2545 insertLeaf: 2546 /* 2547 * allocate data extent requested 2548 */ 2549 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2550 goto out; 2551 2552 BT_MARK_DIRTY(mp, ip); 2553 /* 2554 * acquire a transaction lock on the leaf page; 2555 * 2556 * action: xad insertion/extension; 2557 */ 2558 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2559 xtlck = (struct xtlock *) & tlck->lock; 2560 2561 /* insert the new entry: mark the entry NEW */ 2562 xad = &p->xad[index]; 2563 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2564 2565 /* advance next available entry index */ 2566 p->header.nextindex = 2567 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2568 2569 xtlck->lwm.offset = 2570 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2571 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2572 xtlck->lwm.offset; 2573 2574 *xaddrp = xaddr; 2575 *xlenp = xlen; 2576 2577 out: 2578 /* unpin the leaf page */ 2579 XT_PUTPAGE(mp); 2580 2581 return rc; 2582 } 2583 #ifdef _STILL_TO_PORT 2584 2585 /* - TBD for defragmentaion/reorganization - 2586 * 2587 * xtDelete() 2588 * 2589 * function: 2590 * delete the entry with the specified key. 2591 * 2592 * N.B.: whole extent of the entry is assumed to be deleted. 2593 * 2594 * parameter: 2595 * 2596 * return: 2597 * ENOENT: if the entry is not found. 2598 * 2599 * exception: 2600 */ 2601 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2602 { 2603 int rc = 0; 2604 struct btstack btstack; 2605 int cmp; 2606 s64 bn; 2607 struct metapage *mp; 2608 xtpage_t *p; 2609 int index, nextindex; 2610 struct tlock *tlck; 2611 struct xtlock *xtlck; 2612 2613 /* 2614 * find the matching entry; xtSearch() pins the page 2615 */ 2616 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2617 return rc; 2618 2619 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2620 if (cmp) { 2621 /* unpin the leaf page */ 2622 XT_PUTPAGE(mp); 2623 return -ENOENT; 2624 } 2625 2626 /* 2627 * delete the entry from the leaf page 2628 */ 2629 nextindex = le16_to_cpu(p->header.nextindex); 2630 p->header.nextindex = 2631 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2632 2633 /* 2634 * if the leaf page bocome empty, free the page 2635 */ 2636 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2637 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2638 2639 BT_MARK_DIRTY(mp, ip); 2640 /* 2641 * acquire a transaction lock on the leaf page; 2642 * 2643 * action:xad deletion; 2644 */ 2645 tlck = txLock(tid, ip, mp, tlckXTREE); 2646 xtlck = (struct xtlock *) & tlck->lock; 2647 xtlck->lwm.offset = 2648 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2649 2650 /* if delete from middle, shift left/compact the remaining entries */ 2651 if (index < nextindex - 1) 2652 memmove(&p->xad[index], &p->xad[index + 1], 2653 (nextindex - index - 1) * sizeof(xad_t)); 2654 2655 XT_PUTPAGE(mp); 2656 2657 return 0; 2658 } 2659 2660 2661 /* - TBD for defragmentaion/reorganization - 2662 * 2663 * xtDeleteUp() 2664 * 2665 * function: 2666 * free empty pages as propagating deletion up the tree 2667 * 2668 * parameter: 2669 * 2670 * return: 2671 */ 2672 static int 2673 xtDeleteUp(tid_t tid, struct inode *ip, 2674 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2675 { 2676 int rc = 0; 2677 struct metapage *mp; 2678 xtpage_t *p; 2679 int index, nextindex; 2680 s64 xaddr; 2681 int xlen; 2682 struct btframe *parent; 2683 struct tlock *tlck; 2684 struct xtlock *xtlck; 2685 2686 /* 2687 * keep root leaf page which has become empty 2688 */ 2689 if (fp->header.flag & BT_ROOT) { 2690 /* keep the root page */ 2691 fp->header.flag &= ~BT_INTERNAL; 2692 fp->header.flag |= BT_LEAF; 2693 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2694 2695 /* XT_PUTPAGE(fmp); */ 2696 2697 return 0; 2698 } 2699 2700 /* 2701 * free non-root leaf page 2702 */ 2703 if ((rc = xtRelink(tid, ip, fp))) { 2704 XT_PUTPAGE(fmp); 2705 return rc; 2706 } 2707 2708 xaddr = addressPXD(&fp->header.self); 2709 xlen = lengthPXD(&fp->header.self); 2710 /* free the page extent */ 2711 dbFree(ip, xaddr, (s64) xlen); 2712 2713 /* free the buffer page */ 2714 discard_metapage(fmp); 2715 2716 /* 2717 * propagate page deletion up the index tree 2718 * 2719 * If the delete from the parent page makes it empty, 2720 * continue all the way up the tree. 2721 * stop if the root page is reached (which is never deleted) or 2722 * if the entry deletion does not empty the page. 2723 */ 2724 while ((parent = BT_POP(btstack)) != NULL) { 2725 /* get/pin the parent page <sp> */ 2726 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2727 if (rc) 2728 return rc; 2729 2730 index = parent->index; 2731 2732 /* delete the entry for the freed child page from parent. 2733 */ 2734 nextindex = le16_to_cpu(p->header.nextindex); 2735 2736 /* 2737 * the parent has the single entry being deleted: 2738 * free the parent page which has become empty. 2739 */ 2740 if (nextindex == 1) { 2741 if (p->header.flag & BT_ROOT) { 2742 /* keep the root page */ 2743 p->header.flag &= ~BT_INTERNAL; 2744 p->header.flag |= BT_LEAF; 2745 p->header.nextindex = 2746 cpu_to_le16(XTENTRYSTART); 2747 2748 /* XT_PUTPAGE(mp); */ 2749 2750 break; 2751 } else { 2752 /* free the parent page */ 2753 if ((rc = xtRelink(tid, ip, p))) 2754 return rc; 2755 2756 xaddr = addressPXD(&p->header.self); 2757 /* free the page extent */ 2758 dbFree(ip, xaddr, 2759 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2760 2761 /* unpin/free the buffer page */ 2762 discard_metapage(mp); 2763 2764 /* propagate up */ 2765 continue; 2766 } 2767 } 2768 /* 2769 * the parent has other entries remaining: 2770 * delete the router entry from the parent page. 2771 */ 2772 else { 2773 BT_MARK_DIRTY(mp, ip); 2774 /* 2775 * acquire a transaction lock on the leaf page; 2776 * 2777 * action:xad deletion; 2778 */ 2779 tlck = txLock(tid, ip, mp, tlckXTREE); 2780 xtlck = (struct xtlock *) & tlck->lock; 2781 xtlck->lwm.offset = 2782 (xtlck->lwm.offset) ? min(index, 2783 xtlck->lwm. 2784 offset) : index; 2785 2786 /* if delete from middle, 2787 * shift left/compact the remaining entries in the page 2788 */ 2789 if (index < nextindex - 1) 2790 memmove(&p->xad[index], &p->xad[index + 1], 2791 (nextindex - index - 2792 1) << L2XTSLOTSIZE); 2793 2794 p->header.nextindex = 2795 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2796 1); 2797 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2798 (ulong) parent->bn, index); 2799 } 2800 2801 /* unpin the parent page */ 2802 XT_PUTPAGE(mp); 2803 2804 /* exit propagation up */ 2805 break; 2806 } 2807 2808 return 0; 2809 } 2810 2811 2812 /* 2813 * NAME: xtRelocate() 2814 * 2815 * FUNCTION: relocate xtpage or data extent of regular file; 2816 * This function is mainly used by defragfs utility. 2817 * 2818 * NOTE: This routine does not have the logic to handle 2819 * uncommitted allocated extent. The caller should call 2820 * txCommit() to commit all the allocation before call 2821 * this routine. 2822 */ 2823 int 2824 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2825 s64 nxaddr, /* new xaddr */ 2826 int xtype) 2827 { /* extent type: XTPAGE or DATAEXT */ 2828 int rc = 0; 2829 struct tblock *tblk; 2830 struct tlock *tlck; 2831 struct xtlock *xtlck; 2832 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2833 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2834 xad_t *xad; 2835 pxd_t *pxd; 2836 s64 xoff, xsize; 2837 int xlen; 2838 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2839 cbuf_t *cp; 2840 s64 offset, nbytes, nbrd, pno; 2841 int nb, npages, nblks; 2842 s64 bn; 2843 int cmp; 2844 int index; 2845 struct pxd_lock *pxdlock; 2846 struct btstack btstack; /* traverse stack */ 2847 2848 xtype = xtype & EXTENT_TYPE; 2849 2850 xoff = offsetXAD(oxad); 2851 oxaddr = addressXAD(oxad); 2852 xlen = lengthXAD(oxad); 2853 2854 /* validate extent offset */ 2855 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2856 if (offset >= ip->i_size) 2857 return -ESTALE; /* stale extent */ 2858 2859 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2860 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2861 2862 /* 2863 * 1. get and validate the parent xtpage/xad entry 2864 * covering the source extent to be relocated; 2865 */ 2866 if (xtype == DATAEXT) { 2867 /* search in leaf entry */ 2868 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2869 if (rc) 2870 return rc; 2871 2872 /* retrieve search result */ 2873 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2874 2875 if (cmp) { 2876 XT_PUTPAGE(pmp); 2877 return -ESTALE; 2878 } 2879 2880 /* validate for exact match with a single entry */ 2881 xad = &pp->xad[index]; 2882 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2883 XT_PUTPAGE(pmp); 2884 return -ESTALE; 2885 } 2886 } else { /* (xtype == XTPAGE) */ 2887 2888 /* search in internal entry */ 2889 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2890 if (rc) 2891 return rc; 2892 2893 /* retrieve search result */ 2894 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2895 2896 if (cmp) { 2897 XT_PUTPAGE(pmp); 2898 return -ESTALE; 2899 } 2900 2901 /* xtSearchNode() validated for exact match with a single entry 2902 */ 2903 xad = &pp->xad[index]; 2904 } 2905 jfs_info("xtRelocate: parent xad entry validated."); 2906 2907 /* 2908 * 2. relocate the extent 2909 */ 2910 if (xtype == DATAEXT) { 2911 /* if the extent is allocated-but-not-recorded 2912 * there is no real data to be moved in this extent, 2913 */ 2914 if (xad->flag & XAD_NOTRECORDED) 2915 goto out; 2916 else 2917 /* release xtpage for cmRead()/xtLookup() */ 2918 XT_PUTPAGE(pmp); 2919 2920 /* 2921 * cmRelocate() 2922 * 2923 * copy target data pages to be relocated; 2924 * 2925 * data extent must start at page boundary and 2926 * multiple of page size (except the last data extent); 2927 * read in each page of the source data extent into cbuf, 2928 * update the cbuf extent descriptor of the page to be 2929 * homeward bound to new dst data extent 2930 * copy the data from the old extent to new extent. 2931 * copy is essential for compressed files to avoid problems 2932 * that can arise if there was a change in compression 2933 * algorithms. 2934 * it is a good strategy because it may disrupt cache 2935 * policy to keep the pages in memory afterwards. 2936 */ 2937 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2938 assert((offset & CM_OFFSET) == 0); 2939 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2940 pno = offset >> CM_L2BSIZE; 2941 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2942 /* 2943 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2944 (offset >> CM_L2BSIZE) + 1; 2945 */ 2946 sxaddr = oxaddr; 2947 dxaddr = nxaddr; 2948 2949 /* process the request one cache buffer at a time */ 2950 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2951 offset += nb, pno++, npages--) { 2952 /* compute page size */ 2953 nb = min(nbytes - nbrd, CM_BSIZE); 2954 2955 /* get the cache buffer of the page */ 2956 if (rc = cmRead(ip, offset, npages, &cp)) 2957 break; 2958 2959 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2960 assert(!cp->cm_modified); 2961 2962 /* bind buffer with the new extent address */ 2963 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2964 cmSetXD(ip, cp, pno, dxaddr, nblks); 2965 2966 /* release the cbuf, mark it as modified */ 2967 cmPut(cp, TRUE); 2968 2969 dxaddr += nblks; 2970 sxaddr += nblks; 2971 } 2972 2973 /* get back parent page */ 2974 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2975 return rc; 2976 2977 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2978 jfs_info("xtRelocate: target data extent relocated."); 2979 } else { /* (xtype == XTPAGE) */ 2980 2981 /* 2982 * read in the target xtpage from the source extent; 2983 */ 2984 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2985 if (rc) { 2986 XT_PUTPAGE(pmp); 2987 return rc; 2988 } 2989 2990 /* 2991 * read in sibling pages if any to update sibling pointers; 2992 */ 2993 rmp = NULL; 2994 if (p->header.next) { 2995 nextbn = le64_to_cpu(p->header.next); 2996 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2997 if (rc) { 2998 XT_PUTPAGE(pmp); 2999 XT_PUTPAGE(mp); 3000 return (rc); 3001 } 3002 } 3003 3004 lmp = NULL; 3005 if (p->header.prev) { 3006 prevbn = le64_to_cpu(p->header.prev); 3007 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3008 if (rc) { 3009 XT_PUTPAGE(pmp); 3010 XT_PUTPAGE(mp); 3011 if (rmp) 3012 XT_PUTPAGE(rmp); 3013 return (rc); 3014 } 3015 } 3016 3017 /* at this point, all xtpages to be updated are in memory */ 3018 3019 /* 3020 * update sibling pointers of sibling xtpages if any; 3021 */ 3022 if (lmp) { 3023 BT_MARK_DIRTY(lmp, ip); 3024 tlck = 3025 txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3026 lp->header.next = cpu_to_le64(nxaddr); 3027 XT_PUTPAGE(lmp); 3028 } 3029 3030 if (rmp) { 3031 BT_MARK_DIRTY(rmp, ip); 3032 tlck = 3033 txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3034 rp->header.prev = cpu_to_le64(nxaddr); 3035 XT_PUTPAGE(rmp); 3036 } 3037 3038 /* 3039 * update the target xtpage to be relocated 3040 * 3041 * update the self address of the target page 3042 * and write to destination extent; 3043 * redo image covers the whole xtpage since it is new page 3044 * to the destination extent; 3045 * update of bmap for the free of source extent 3046 * of the target xtpage itself: 3047 * update of bmap for the allocation of destination extent 3048 * of the target xtpage itself: 3049 * update of bmap for the extents covered by xad entries in 3050 * the target xtpage is not necessary since they are not 3051 * updated; 3052 * if not committed before this relocation, 3053 * target page may contain XAD_NEW entries which must 3054 * be scanned for bmap update (logredo() always 3055 * scan xtpage REDOPAGE image for bmap update); 3056 * if committed before this relocation (tlckRELOCATE), 3057 * scan may be skipped by commit() and logredo(); 3058 */ 3059 BT_MARK_DIRTY(mp, ip); 3060 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3061 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3062 xtlck = (struct xtlock *) & tlck->lock; 3063 3064 /* update the self address in the xtpage header */ 3065 pxd = &p->header.self; 3066 PXDaddress(pxd, nxaddr); 3067 3068 /* linelock for the after image of the whole page */ 3069 xtlck->lwm.length = 3070 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3071 3072 /* update the buffer extent descriptor of target xtpage */ 3073 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3074 bmSetXD(mp, nxaddr, xsize); 3075 3076 /* unpin the target page to new homeward bound */ 3077 XT_PUTPAGE(mp); 3078 jfs_info("xtRelocate: target xtpage relocated."); 3079 } 3080 3081 /* 3082 * 3. acquire maplock for the source extent to be freed; 3083 * 3084 * acquire a maplock saving the src relocated extent address; 3085 * to free of the extent at commit time; 3086 */ 3087 out: 3088 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3089 * free PXD of the source data extent (logredo() will update 3090 * bmap for free of source data extent), and update bmap for 3091 * free of the source data extent; 3092 */ 3093 if (xtype == DATAEXT) 3094 tlck = txMaplock(tid, ip, tlckMAP); 3095 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3096 * for the source xtpage (logredo() will init NoRedoPage 3097 * filter and will also update bmap for free of the source 3098 * xtpage), and update bmap for free of the source xtpage; 3099 * N.B. We use tlckMAP instead of tlkcXTREE because there 3100 * is no buffer associated with this lock since the buffer 3101 * has been redirected to the target location. 3102 */ 3103 else /* (xtype == XTPAGE) */ 3104 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3105 3106 pxdlock = (struct pxd_lock *) & tlck->lock; 3107 pxdlock->flag = mlckFREEPXD; 3108 PXDaddress(&pxdlock->pxd, oxaddr); 3109 PXDlength(&pxdlock->pxd, xlen); 3110 pxdlock->index = 1; 3111 3112 /* 3113 * 4. update the parent xad entry for relocation; 3114 * 3115 * acquire tlck for the parent entry with XAD_NEW as entry 3116 * update which will write LOG_REDOPAGE and update bmap for 3117 * allocation of XAD_NEW destination extent; 3118 */ 3119 jfs_info("xtRelocate: update parent xad entry."); 3120 BT_MARK_DIRTY(pmp, ip); 3121 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3122 xtlck = (struct xtlock *) & tlck->lock; 3123 3124 /* update the XAD with the new destination extent; */ 3125 xad = &pp->xad[index]; 3126 xad->flag |= XAD_NEW; 3127 XADaddress(xad, nxaddr); 3128 3129 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3130 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3131 xtlck->lwm.offset; 3132 3133 /* unpin the parent xtpage */ 3134 XT_PUTPAGE(pmp); 3135 3136 return rc; 3137 } 3138 3139 3140 /* 3141 * xtSearchNode() 3142 * 3143 * function: search for the internal xad entry covering specified extent. 3144 * This function is mainly used by defragfs utility. 3145 * 3146 * parameters: 3147 * ip - file object; 3148 * xad - extent to find; 3149 * cmpp - comparison result: 3150 * btstack - traverse stack; 3151 * flag - search process flag; 3152 * 3153 * returns: 3154 * btstack contains (bn, index) of search path traversed to the entry. 3155 * *cmpp is set to result of comparison with the entry returned. 3156 * the page containing the entry is pinned at exit. 3157 */ 3158 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3159 int *cmpp, struct btstack * btstack, int flag) 3160 { 3161 int rc = 0; 3162 s64 xoff, xaddr; 3163 int xlen; 3164 int cmp = 1; /* init for empty page */ 3165 s64 bn; /* block number */ 3166 struct metapage *mp; /* meta-page buffer */ 3167 xtpage_t *p; /* page */ 3168 int base, index, lim; 3169 struct btframe *btsp; 3170 s64 t64; 3171 3172 BT_CLR(btstack); 3173 3174 xoff = offsetXAD(xad); 3175 xlen = lengthXAD(xad); 3176 xaddr = addressXAD(xad); 3177 3178 /* 3179 * search down tree from root: 3180 * 3181 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3182 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3183 * 3184 * if entry with search key K is not found 3185 * internal page search find the entry with largest key Ki 3186 * less than K which point to the child page to search; 3187 * leaf page search find the entry with smallest key Kj 3188 * greater than K so that the returned index is the position of 3189 * the entry to be shifted right for insertion of new entry. 3190 * for empty tree, search key is greater than any key of the tree. 3191 * 3192 * by convention, root bn = 0. 3193 */ 3194 for (bn = 0;;) { 3195 /* get/pin the page to search */ 3196 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3197 if (rc) 3198 return rc; 3199 if (p->header.flag & BT_LEAF) { 3200 XT_PUTPAGE(mp); 3201 return -ESTALE; 3202 } 3203 3204 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3205 3206 /* 3207 * binary search with search key K on the current page 3208 */ 3209 for (base = XTENTRYSTART; lim; lim >>= 1) { 3210 index = base + (lim >> 1); 3211 3212 XT_CMP(cmp, xoff, &p->xad[index], t64); 3213 if (cmp == 0) { 3214 /* 3215 * search hit 3216 * 3217 * verify for exact match; 3218 */ 3219 if (xaddr == addressXAD(&p->xad[index]) && 3220 xoff == offsetXAD(&p->xad[index])) { 3221 *cmpp = cmp; 3222 3223 /* save search result */ 3224 btsp = btstack->top; 3225 btsp->bn = bn; 3226 btsp->index = index; 3227 btsp->mp = mp; 3228 3229 return 0; 3230 } 3231 3232 /* descend/search its child page */ 3233 goto next; 3234 } 3235 3236 if (cmp > 0) { 3237 base = index + 1; 3238 --lim; 3239 } 3240 } 3241 3242 /* 3243 * search miss - non-leaf page: 3244 * 3245 * base is the smallest index with key (Kj) greater than 3246 * search key (K) and may be zero or maxentry index. 3247 * if base is non-zero, decrement base by one to get the parent 3248 * entry of the child page to search. 3249 */ 3250 index = base ? base - 1 : base; 3251 3252 /* 3253 * go down to child page 3254 */ 3255 next: 3256 /* get the child page block number */ 3257 bn = addressXAD(&p->xad[index]); 3258 3259 /* unpin the parent page */ 3260 XT_PUTPAGE(mp); 3261 } 3262 } 3263 3264 3265 /* 3266 * xtRelink() 3267 * 3268 * function: 3269 * link around a freed page. 3270 * 3271 * Parameter: 3272 * int tid, 3273 * struct inode *ip, 3274 * xtpage_t *p) 3275 * 3276 * returns: 3277 */ 3278 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3279 { 3280 int rc = 0; 3281 struct metapage *mp; 3282 s64 nextbn, prevbn; 3283 struct tlock *tlck; 3284 3285 nextbn = le64_to_cpu(p->header.next); 3286 prevbn = le64_to_cpu(p->header.prev); 3287 3288 /* update prev pointer of the next page */ 3289 if (nextbn != 0) { 3290 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3291 if (rc) 3292 return rc; 3293 3294 /* 3295 * acquire a transaction lock on the page; 3296 * 3297 * action: update prev pointer; 3298 */ 3299 BT_MARK_DIRTY(mp, ip); 3300 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3301 3302 /* the page may already have been tlock'd */ 3303 3304 p->header.prev = cpu_to_le64(prevbn); 3305 3306 XT_PUTPAGE(mp); 3307 } 3308 3309 /* update next pointer of the previous page */ 3310 if (prevbn != 0) { 3311 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3312 if (rc) 3313 return rc; 3314 3315 /* 3316 * acquire a transaction lock on the page; 3317 * 3318 * action: update next pointer; 3319 */ 3320 BT_MARK_DIRTY(mp, ip); 3321 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3322 3323 /* the page may already have been tlock'd */ 3324 3325 p->header.next = le64_to_cpu(nextbn); 3326 3327 XT_PUTPAGE(mp); 3328 } 3329 3330 return 0; 3331 } 3332 #endif /* _STILL_TO_PORT */ 3333 3334 3335 /* 3336 * xtInitRoot() 3337 * 3338 * initialize file root (inline in inode) 3339 */ 3340 void xtInitRoot(tid_t tid, struct inode *ip) 3341 { 3342 xtpage_t *p; 3343 3344 /* 3345 * acquire a transaction lock on the root 3346 * 3347 * action: 3348 */ 3349 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3350 tlckXTREE | tlckNEW); 3351 p = &JFS_IP(ip)->i_xtroot; 3352 3353 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3354 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3355 3356 if (S_ISDIR(ip->i_mode)) 3357 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3358 else { 3359 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3360 ip->i_size = 0; 3361 } 3362 3363 3364 return; 3365 } 3366 3367 3368 /* 3369 * We can run into a deadlock truncating a file with a large number of 3370 * xtree pages (large fragmented file). A robust fix would entail a 3371 * reservation system where we would reserve a number of metadata pages 3372 * and tlocks which we would be guaranteed without a deadlock. Without 3373 * this, a partial fix is to limit number of metadata pages we will lock 3374 * in a single transaction. Currently we will truncate the file so that 3375 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3376 * will be responsible for ensuring that the current transaction gets 3377 * committed, and that subsequent transactions are created to truncate 3378 * the file further if needed. 3379 */ 3380 #define MAX_TRUNCATE_LEAVES 50 3381 3382 /* 3383 * xtTruncate() 3384 * 3385 * function: 3386 * traverse for truncation logging backward bottom up; 3387 * terminate at the last extent entry at the current subtree 3388 * root page covering new down size. 3389 * truncation may occur within the last extent entry. 3390 * 3391 * parameter: 3392 * int tid, 3393 * struct inode *ip, 3394 * s64 newsize, 3395 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3396 * 3397 * return: 3398 * 3399 * note: 3400 * PWMAP: 3401 * 1. truncate (non-COMMIT_NOLINK file) 3402 * by jfs_truncate() or jfs_open(O_TRUNC): 3403 * xtree is updated; 3404 * 2. truncate index table of directory when last entry removed 3405 * map update via tlock at commit time; 3406 * PMAP: 3407 * Call xtTruncate_pmap instead 3408 * WMAP: 3409 * 1. remove (free zero link count) on last reference release 3410 * (pmap has been freed at commit zero link count); 3411 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3412 * xtree is updated; 3413 * map update directly at truncation time; 3414 * 3415 * if (DELETE) 3416 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3417 * else if (TRUNCATE) 3418 * must write LOG_NOREDOPAGE for deleted index page; 3419 * 3420 * pages may already have been tlocked by anonymous transactions 3421 * during file growth (i.e., write) before truncation; 3422 * 3423 * except last truncated entry, deleted entries remains as is 3424 * in the page (nextindex is updated) for other use 3425 * (e.g., log/update allocation map): this avoid copying the page 3426 * info but delay free of pages; 3427 * 3428 */ 3429 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3430 { 3431 int rc = 0; 3432 s64 teof; 3433 struct metapage *mp; 3434 xtpage_t *p; 3435 s64 bn; 3436 int index, nextindex; 3437 xad_t *xad; 3438 s64 xoff, xaddr; 3439 int xlen, len, freexlen; 3440 struct btstack btstack; 3441 struct btframe *parent; 3442 struct tblock *tblk = NULL; 3443 struct tlock *tlck = NULL; 3444 struct xtlock *xtlck = NULL; 3445 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3446 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3447 s64 nfreed; 3448 int freed, log; 3449 int locked_leaves = 0; 3450 3451 /* save object truncation type */ 3452 if (tid) { 3453 tblk = tid_to_tblock(tid); 3454 tblk->xflag |= flag; 3455 } 3456 3457 nfreed = 0; 3458 3459 flag &= COMMIT_MAP; 3460 assert(flag != COMMIT_PMAP); 3461 3462 if (flag == COMMIT_PWMAP) 3463 log = 1; 3464 else { 3465 log = 0; 3466 xadlock.flag = mlckFREEXADLIST; 3467 xadlock.index = 1; 3468 } 3469 3470 /* 3471 * if the newsize is not an integral number of pages, 3472 * the file between newsize and next page boundary will 3473 * be cleared. 3474 * if truncating into a file hole, it will cause 3475 * a full block to be allocated for the logical block. 3476 */ 3477 3478 /* 3479 * release page blocks of truncated region <teof, eof> 3480 * 3481 * free the data blocks from the leaf index blocks. 3482 * delete the parent index entries corresponding to 3483 * the freed child data/index blocks. 3484 * free the index blocks themselves which aren't needed 3485 * in new sized file. 3486 * 3487 * index blocks are updated only if the blocks are to be 3488 * retained in the new sized file. 3489 * if type is PMAP, the data and index pages are NOT 3490 * freed, and the data and index blocks are NOT freed 3491 * from working map. 3492 * (this will allow continued access of data/index of 3493 * temporary file (zerolink count file truncated to zero-length)). 3494 */ 3495 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3496 JFS_SBI(ip->i_sb)->l2bsize; 3497 3498 /* clear stack */ 3499 BT_CLR(&btstack); 3500 3501 /* 3502 * start with root 3503 * 3504 * root resides in the inode 3505 */ 3506 bn = 0; 3507 3508 /* 3509 * first access of each page: 3510 */ 3511 getPage: 3512 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3513 if (rc) 3514 return rc; 3515 3516 /* process entries backward from last index */ 3517 index = le16_to_cpu(p->header.nextindex) - 1; 3518 3519 3520 /* Since this is the rightmost page at this level, and we may have 3521 * already freed a page that was formerly to the right, let's make 3522 * sure that the next pointer is zero. 3523 */ 3524 if (p->header.next) { 3525 if (log) 3526 /* 3527 * Make sure this change to the header is logged. 3528 * If we really truncate this leaf, the flag 3529 * will be changed to tlckTRUNCATE 3530 */ 3531 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3532 BT_MARK_DIRTY(mp, ip); 3533 p->header.next = 0; 3534 } 3535 3536 if (p->header.flag & BT_INTERNAL) 3537 goto getChild; 3538 3539 /* 3540 * leaf page 3541 */ 3542 freed = 0; 3543 3544 /* does region covered by leaf page precede Teof ? */ 3545 xad = &p->xad[index]; 3546 xoff = offsetXAD(xad); 3547 xlen = lengthXAD(xad); 3548 if (teof >= xoff + xlen) { 3549 XT_PUTPAGE(mp); 3550 goto getParent; 3551 } 3552 3553 /* (re)acquire tlock of the leaf page */ 3554 if (log) { 3555 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3556 /* 3557 * We need to limit the size of the transaction 3558 * to avoid exhausting pagecache & tlocks 3559 */ 3560 XT_PUTPAGE(mp); 3561 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3562 goto getParent; 3563 } 3564 tlck = txLock(tid, ip, mp, tlckXTREE); 3565 tlck->type = tlckXTREE | tlckTRUNCATE; 3566 xtlck = (struct xtlock *) & tlck->lock; 3567 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3568 } 3569 BT_MARK_DIRTY(mp, ip); 3570 3571 /* 3572 * scan backward leaf page entries 3573 */ 3574 for (; index >= XTENTRYSTART; index--) { 3575 xad = &p->xad[index]; 3576 xoff = offsetXAD(xad); 3577 xlen = lengthXAD(xad); 3578 xaddr = addressXAD(xad); 3579 3580 /* 3581 * The "data" for a directory is indexed by the block 3582 * device's address space. This metadata must be invalidated 3583 * here 3584 */ 3585 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3586 invalidate_xad_metapages(ip, *xad); 3587 /* 3588 * entry beyond eof: continue scan of current page 3589 * xad 3590 * ---|---=======-------> 3591 * eof 3592 */ 3593 if (teof < xoff) { 3594 nfreed += xlen; 3595 continue; 3596 } 3597 3598 /* 3599 * (xoff <= teof): last entry to be deleted from page; 3600 * If other entries remain in page: keep and update the page. 3601 */ 3602 3603 /* 3604 * eof == entry_start: delete the entry 3605 * xad 3606 * -------|=======-------> 3607 * eof 3608 * 3609 */ 3610 if (teof == xoff) { 3611 nfreed += xlen; 3612 3613 if (index == XTENTRYSTART) 3614 break; 3615 3616 nextindex = index; 3617 } 3618 /* 3619 * eof within the entry: truncate the entry. 3620 * xad 3621 * -------===|===-------> 3622 * eof 3623 */ 3624 else if (teof < xoff + xlen) { 3625 /* update truncated entry */ 3626 len = teof - xoff; 3627 freexlen = xlen - len; 3628 XADlength(xad, len); 3629 3630 /* save pxd of truncated extent in tlck */ 3631 xaddr += len; 3632 if (log) { /* COMMIT_PWMAP */ 3633 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3634 min(index, (int)xtlck->lwm.offset) : index; 3635 xtlck->lwm.length = index + 1 - 3636 xtlck->lwm.offset; 3637 xtlck->twm.offset = index; 3638 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3639 pxdlock->flag = mlckFREEPXD; 3640 PXDaddress(&pxdlock->pxd, xaddr); 3641 PXDlength(&pxdlock->pxd, freexlen); 3642 } 3643 /* free truncated extent */ 3644 else { /* COMMIT_WMAP */ 3645 3646 pxdlock = (struct pxd_lock *) & xadlock; 3647 pxdlock->flag = mlckFREEPXD; 3648 PXDaddress(&pxdlock->pxd, xaddr); 3649 PXDlength(&pxdlock->pxd, freexlen); 3650 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3651 3652 /* reset map lock */ 3653 xadlock.flag = mlckFREEXADLIST; 3654 } 3655 3656 /* current entry is new last entry; */ 3657 nextindex = index + 1; 3658 3659 nfreed += freexlen; 3660 } 3661 /* 3662 * eof beyond the entry: 3663 * xad 3664 * -------=======---|---> 3665 * eof 3666 */ 3667 else { /* (xoff + xlen < teof) */ 3668 3669 nextindex = index + 1; 3670 } 3671 3672 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3673 if (!log) { /* COMMIT_WAMP */ 3674 xadlock.xdlist = &p->xad[nextindex]; 3675 xadlock.count = 3676 le16_to_cpu(p->header.nextindex) - 3677 nextindex; 3678 txFreeMap(ip, (struct maplock *) & xadlock, 3679 NULL, COMMIT_WMAP); 3680 } 3681 p->header.nextindex = cpu_to_le16(nextindex); 3682 } 3683 3684 XT_PUTPAGE(mp); 3685 3686 /* assert(freed == 0); */ 3687 goto getParent; 3688 } /* end scan of leaf page entries */ 3689 3690 freed = 1; 3691 3692 /* 3693 * leaf page become empty: free the page if type != PMAP 3694 */ 3695 if (log) { /* COMMIT_PWMAP */ 3696 /* txCommit() with tlckFREE: 3697 * free data extents covered by leaf [XTENTRYSTART:hwm); 3698 * invalidate leaf if COMMIT_PWMAP; 3699 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3700 */ 3701 tlck->type = tlckXTREE | tlckFREE; 3702 } else { /* COMMIT_WAMP */ 3703 3704 /* free data extents covered by leaf */ 3705 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3706 xadlock.count = 3707 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3708 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3709 } 3710 3711 if (p->header.flag & BT_ROOT) { 3712 p->header.flag &= ~BT_INTERNAL; 3713 p->header.flag |= BT_LEAF; 3714 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3715 3716 XT_PUTPAGE(mp); /* debug */ 3717 goto out; 3718 } else { 3719 if (log) { /* COMMIT_PWMAP */ 3720 /* page will be invalidated at tx completion 3721 */ 3722 XT_PUTPAGE(mp); 3723 } else { /* COMMIT_WMAP */ 3724 3725 if (mp->lid) 3726 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3727 3728 /* invalidate empty leaf page */ 3729 discard_metapage(mp); 3730 } 3731 } 3732 3733 /* 3734 * the leaf page become empty: delete the parent entry 3735 * for the leaf page if the parent page is to be kept 3736 * in the new sized file. 3737 */ 3738 3739 /* 3740 * go back up to the parent page 3741 */ 3742 getParent: 3743 /* pop/restore parent entry for the current child page */ 3744 if ((parent = BT_POP(&btstack)) == NULL) 3745 /* current page must have been root */ 3746 goto out; 3747 3748 /* get back the parent page */ 3749 bn = parent->bn; 3750 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3751 if (rc) 3752 return rc; 3753 3754 index = parent->index; 3755 3756 /* 3757 * child page was not empty: 3758 */ 3759 if (freed == 0) { 3760 /* has any entry deleted from parent ? */ 3761 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3762 /* (re)acquire tlock on the parent page */ 3763 if (log) { /* COMMIT_PWMAP */ 3764 /* txCommit() with tlckTRUNCATE: 3765 * free child extents covered by parent [); 3766 */ 3767 tlck = txLock(tid, ip, mp, tlckXTREE); 3768 xtlck = (struct xtlock *) & tlck->lock; 3769 if (!(tlck->type & tlckTRUNCATE)) { 3770 xtlck->hwm.offset = 3771 le16_to_cpu(p->header. 3772 nextindex) - 1; 3773 tlck->type = 3774 tlckXTREE | tlckTRUNCATE; 3775 } 3776 } else { /* COMMIT_WMAP */ 3777 3778 /* free child extents covered by parent */ 3779 xadlock.xdlist = &p->xad[index + 1]; 3780 xadlock.count = 3781 le16_to_cpu(p->header.nextindex) - 3782 index - 1; 3783 txFreeMap(ip, (struct maplock *) & xadlock, 3784 NULL, COMMIT_WMAP); 3785 } 3786 BT_MARK_DIRTY(mp, ip); 3787 3788 p->header.nextindex = cpu_to_le16(index + 1); 3789 } 3790 XT_PUTPAGE(mp); 3791 goto getParent; 3792 } 3793 3794 /* 3795 * child page was empty: 3796 */ 3797 nfreed += lengthXAD(&p->xad[index]); 3798 3799 /* 3800 * During working map update, child page's tlock must be handled 3801 * before parent's. This is because the parent's tlock will cause 3802 * the child's disk space to be marked available in the wmap, so 3803 * it's important that the child page be released by that time. 3804 * 3805 * ToDo: tlocks should be on doubly-linked list, so we can 3806 * quickly remove it and add it to the end. 3807 */ 3808 3809 /* 3810 * Move parent page's tlock to the end of the tid's tlock list 3811 */ 3812 if (log && mp->lid && (tblk->last != mp->lid) && 3813 lid_to_tlock(mp->lid)->tid) { 3814 lid_t lid = mp->lid; 3815 struct tlock *prev; 3816 3817 tlck = lid_to_tlock(lid); 3818 3819 if (tblk->next == lid) 3820 tblk->next = tlck->next; 3821 else { 3822 for (prev = lid_to_tlock(tblk->next); 3823 prev->next != lid; 3824 prev = lid_to_tlock(prev->next)) { 3825 assert(prev->next); 3826 } 3827 prev->next = tlck->next; 3828 } 3829 lid_to_tlock(tblk->last)->next = lid; 3830 tlck->next = 0; 3831 tblk->last = lid; 3832 } 3833 3834 /* 3835 * parent page become empty: free the page 3836 */ 3837 if (index == XTENTRYSTART) { 3838 if (log) { /* COMMIT_PWMAP */ 3839 /* txCommit() with tlckFREE: 3840 * free child extents covered by parent; 3841 * invalidate parent if COMMIT_PWMAP; 3842 */ 3843 tlck = txLock(tid, ip, mp, tlckXTREE); 3844 xtlck = (struct xtlock *) & tlck->lock; 3845 xtlck->hwm.offset = 3846 le16_to_cpu(p->header.nextindex) - 1; 3847 tlck->type = tlckXTREE | tlckFREE; 3848 } else { /* COMMIT_WMAP */ 3849 3850 /* free child extents covered by parent */ 3851 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3852 xadlock.count = 3853 le16_to_cpu(p->header.nextindex) - 3854 XTENTRYSTART; 3855 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3856 COMMIT_WMAP); 3857 } 3858 BT_MARK_DIRTY(mp, ip); 3859 3860 if (p->header.flag & BT_ROOT) { 3861 p->header.flag &= ~BT_INTERNAL; 3862 p->header.flag |= BT_LEAF; 3863 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3864 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3865 /* 3866 * Shrink root down to allow inline 3867 * EA (otherwise fsck complains) 3868 */ 3869 p->header.maxentry = 3870 cpu_to_le16(XTROOTINITSLOT); 3871 JFS_IP(ip)->mode2 |= INLINEEA; 3872 } 3873 3874 XT_PUTPAGE(mp); /* debug */ 3875 goto out; 3876 } else { 3877 if (log) { /* COMMIT_PWMAP */ 3878 /* page will be invalidated at tx completion 3879 */ 3880 XT_PUTPAGE(mp); 3881 } else { /* COMMIT_WMAP */ 3882 3883 if (mp->lid) 3884 lid_to_tlock(mp->lid)->flag |= 3885 tlckFREELOCK; 3886 3887 /* invalidate parent page */ 3888 discard_metapage(mp); 3889 } 3890 3891 /* parent has become empty and freed: 3892 * go back up to its parent page 3893 */ 3894 /* freed = 1; */ 3895 goto getParent; 3896 } 3897 } 3898 /* 3899 * parent page still has entries for front region; 3900 */ 3901 else { 3902 /* try truncate region covered by preceding entry 3903 * (process backward) 3904 */ 3905 index--; 3906 3907 /* go back down to the child page corresponding 3908 * to the entry 3909 */ 3910 goto getChild; 3911 } 3912 3913 /* 3914 * internal page: go down to child page of current entry 3915 */ 3916 getChild: 3917 /* save current parent entry for the child page */ 3918 BT_PUSH(&btstack, bn, index); 3919 3920 /* get child page */ 3921 xad = &p->xad[index]; 3922 bn = addressXAD(xad); 3923 3924 /* 3925 * first access of each internal entry: 3926 */ 3927 /* release parent page */ 3928 XT_PUTPAGE(mp); 3929 3930 /* process the child page */ 3931 goto getPage; 3932 3933 out: 3934 /* 3935 * update file resource stat 3936 */ 3937 /* set size 3938 */ 3939 if (S_ISDIR(ip->i_mode) && !newsize) 3940 ip->i_size = 1; /* fsck hates zero-length directories */ 3941 else 3942 ip->i_size = newsize; 3943 3944 /* update quota allocation to reflect freed blocks */ 3945 DQUOT_FREE_BLOCK(ip, nfreed); 3946 3947 /* 3948 * free tlock of invalidated pages 3949 */ 3950 if (flag == COMMIT_WMAP) 3951 txFreelock(ip); 3952 3953 return newsize; 3954 } 3955 3956 3957 /* 3958 * xtTruncate_pmap() 3959 * 3960 * function: 3961 * Perform truncate to zero lenghth for deleted file, leaving the 3962 * the xtree and working map untouched. This allows the file to 3963 * be accessed via open file handles, while the delete of the file 3964 * is committed to disk. 3965 * 3966 * parameter: 3967 * tid_t tid, 3968 * struct inode *ip, 3969 * s64 committed_size) 3970 * 3971 * return: new committed size 3972 * 3973 * note: 3974 * 3975 * To avoid deadlock by holding too many transaction locks, the 3976 * truncation may be broken up into multiple transactions. 3977 * The committed_size keeps track of part of the file has been 3978 * freed from the pmaps. 3979 */ 3980 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3981 { 3982 s64 bn; 3983 struct btstack btstack; 3984 int cmp; 3985 int index; 3986 int locked_leaves = 0; 3987 struct metapage *mp; 3988 xtpage_t *p; 3989 struct btframe *parent; 3990 int rc; 3991 struct tblock *tblk; 3992 struct tlock *tlck = NULL; 3993 xad_t *xad; 3994 int xlen; 3995 s64 xoff; 3996 struct xtlock *xtlck = NULL; 3997 3998 /* save object truncation type */ 3999 tblk = tid_to_tblock(tid); 4000 tblk->xflag |= COMMIT_PMAP; 4001 4002 /* clear stack */ 4003 BT_CLR(&btstack); 4004 4005 if (committed_size) { 4006 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4007 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4008 if (rc) 4009 return rc; 4010 4011 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4012 4013 if (cmp != 0) { 4014 XT_PUTPAGE(mp); 4015 jfs_error(ip->i_sb, 4016 "xtTruncate_pmap: did not find extent"); 4017 return -EIO; 4018 } 4019 } else { 4020 /* 4021 * start with root 4022 * 4023 * root resides in the inode 4024 */ 4025 bn = 0; 4026 4027 /* 4028 * first access of each page: 4029 */ 4030 getPage: 4031 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4032 if (rc) 4033 return rc; 4034 4035 /* process entries backward from last index */ 4036 index = le16_to_cpu(p->header.nextindex) - 1; 4037 4038 if (p->header.flag & BT_INTERNAL) 4039 goto getChild; 4040 } 4041 4042 /* 4043 * leaf page 4044 */ 4045 4046 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4047 /* 4048 * We need to limit the size of the transaction 4049 * to avoid exhausting pagecache & tlocks 4050 */ 4051 xad = &p->xad[index]; 4052 xoff = offsetXAD(xad); 4053 xlen = lengthXAD(xad); 4054 XT_PUTPAGE(mp); 4055 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4056 } 4057 tlck = txLock(tid, ip, mp, tlckXTREE); 4058 tlck->type = tlckXTREE | tlckFREE; 4059 xtlck = (struct xtlock *) & tlck->lock; 4060 xtlck->hwm.offset = index; 4061 4062 4063 XT_PUTPAGE(mp); 4064 4065 /* 4066 * go back up to the parent page 4067 */ 4068 getParent: 4069 /* pop/restore parent entry for the current child page */ 4070 if ((parent = BT_POP(&btstack)) == NULL) 4071 /* current page must have been root */ 4072 goto out; 4073 4074 /* get back the parent page */ 4075 bn = parent->bn; 4076 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4077 if (rc) 4078 return rc; 4079 4080 index = parent->index; 4081 4082 /* 4083 * parent page become empty: free the page 4084 */ 4085 if (index == XTENTRYSTART) { 4086 /* txCommit() with tlckFREE: 4087 * free child extents covered by parent; 4088 * invalidate parent if COMMIT_PWMAP; 4089 */ 4090 tlck = txLock(tid, ip, mp, tlckXTREE); 4091 xtlck = (struct xtlock *) & tlck->lock; 4092 xtlck->hwm.offset = 4093 le16_to_cpu(p->header.nextindex) - 1; 4094 tlck->type = tlckXTREE | tlckFREE; 4095 4096 XT_PUTPAGE(mp); 4097 4098 if (p->header.flag & BT_ROOT) { 4099 4100 goto out; 4101 } else { 4102 goto getParent; 4103 } 4104 } 4105 /* 4106 * parent page still has entries for front region; 4107 */ 4108 else 4109 index--; 4110 /* 4111 * internal page: go down to child page of current entry 4112 */ 4113 getChild: 4114 /* save current parent entry for the child page */ 4115 BT_PUSH(&btstack, bn, index); 4116 4117 /* get child page */ 4118 xad = &p->xad[index]; 4119 bn = addressXAD(xad); 4120 4121 /* 4122 * first access of each internal entry: 4123 */ 4124 /* release parent page */ 4125 XT_PUTPAGE(mp); 4126 4127 /* process the child page */ 4128 goto getPage; 4129 4130 out: 4131 4132 return 0; 4133 } 4134 4135 #ifdef CONFIG_JFS_STATISTICS 4136 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4137 int *eof, void *data) 4138 { 4139 int len = 0; 4140 off_t begin; 4141 4142 len += sprintf(buffer, 4143 "JFS Xtree statistics\n" 4144 "====================\n" 4145 "searches = %d\n" 4146 "fast searches = %d\n" 4147 "splits = %d\n", 4148 xtStat.search, 4149 xtStat.fastSearch, 4150 xtStat.split); 4151 4152 begin = offset; 4153 *start = buffer + begin; 4154 len -= begin; 4155 4156 if (len > length) 4157 len = length; 4158 else 4159 *eof = 1; 4160 4161 if (len < 0) 4162 len = 0; 4163 4164 return len; 4165 } 4166 #endif 4167