1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/quotaops.h> 24 #include "jfs_incore.h" 25 #include "jfs_filsys.h" 26 #include "jfs_metapage.h" 27 #include "jfs_dmap.h" 28 #include "jfs_dinode.h" 29 #include "jfs_superblock.h" 30 #include "jfs_debug.h" 31 32 /* 33 * xtree local flag 34 */ 35 #define XT_INSERT 0x00000001 36 37 /* 38 * xtree key/entry comparison: extent offset 39 * 40 * return: 41 * -1: k < start of extent 42 * 0: start_of_extent <= k <= end_of_extent 43 * 1: k > end_of_extent 44 */ 45 #define XT_CMP(CMP, K, X, OFFSET64)\ 46 {\ 47 OFFSET64 = offsetXAD(X);\ 48 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 49 ((K) < OFFSET64) ? -1 : 0;\ 50 } 51 52 /* write a xad entry */ 53 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 54 {\ 55 (XAD)->flag = (FLAG);\ 56 XADoffset((XAD), (OFF));\ 57 XADlength((XAD), (LEN));\ 58 XADaddress((XAD), (ADDR));\ 59 } 60 61 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 62 63 /* get page buffer for specified block address */ 64 /* ToDo: Replace this ugly macro with a function */ 65 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 66 {\ 67 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 68 if (!(RC))\ 69 {\ 70 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 71 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 72 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 73 {\ 74 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 75 BT_PUTPAGE(MP);\ 76 MP = NULL;\ 77 RC = -EIO;\ 78 }\ 79 }\ 80 } 81 82 /* for consistency */ 83 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 84 85 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 86 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 87 /* xtree entry parameter descriptor */ 88 struct xtsplit { 89 struct metapage *mp; 90 s16 index; 91 u8 flag; 92 s64 off; 93 s64 addr; 94 int len; 95 struct pxdlist *pxdlist; 96 }; 97 98 99 /* 100 * statistics 101 */ 102 #ifdef CONFIG_JFS_STATISTICS 103 static struct { 104 uint search; 105 uint fastSearch; 106 uint split; 107 } xtStat; 108 #endif 109 110 111 /* 112 * forward references 113 */ 114 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 115 struct btstack * btstack, int flag); 116 117 static int xtSplitUp(tid_t tid, 118 struct inode *ip, 119 struct xtsplit * split, struct btstack * btstack); 120 121 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 122 struct metapage ** rmpp, s64 * rbnp); 123 124 static int xtSplitRoot(tid_t tid, struct inode *ip, 125 struct xtsplit * split, struct metapage ** rmpp); 126 127 #ifdef _STILL_TO_PORT 128 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 129 xtpage_t * fp, struct btstack * btstack); 130 131 static int xtSearchNode(struct inode *ip, 132 xad_t * xad, 133 int *cmpp, struct btstack * btstack, int flag); 134 135 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 136 #endif /* _STILL_TO_PORT */ 137 138 /* 139 * xtLookup() 140 * 141 * function: map a single page into a physical extent; 142 */ 143 int xtLookup(struct inode *ip, s64 lstart, 144 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 145 { 146 int rc = 0; 147 struct btstack btstack; 148 int cmp; 149 s64 bn; 150 struct metapage *mp; 151 xtpage_t *p; 152 int index; 153 xad_t *xad; 154 s64 next, size, xoff, xend; 155 int xlen; 156 s64 xaddr; 157 158 *paddr = 0; 159 *plen = llen; 160 161 if (!no_check) { 162 /* is lookup offset beyond eof ? */ 163 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 164 JFS_SBI(ip->i_sb)->l2bsize; 165 if (lstart >= size) { 166 jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", 167 (ulong) lstart, (ulong) size); 168 return 0; 169 } 170 } 171 172 /* 173 * search for the xad entry covering the logical extent 174 */ 175 //search: 176 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 177 jfs_err("xtLookup: xtSearch returned %d", rc); 178 return rc; 179 } 180 181 /* 182 * compute the physical extent covering logical extent 183 * 184 * N.B. search may have failed (e.g., hole in sparse file), 185 * and returned the index of the next entry. 186 */ 187 /* retrieve search result */ 188 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 189 190 /* is xad found covering start of logical extent ? 191 * lstart is a page start address, 192 * i.e., lstart cannot start in a hole; 193 */ 194 if (cmp) { 195 if (next) 196 *plen = min(next - lstart, llen); 197 goto out; 198 } 199 200 /* 201 * lxd covered by xad 202 */ 203 xad = &p->xad[index]; 204 xoff = offsetXAD(xad); 205 xlen = lengthXAD(xad); 206 xend = xoff + xlen; 207 xaddr = addressXAD(xad); 208 209 /* initialize new pxd */ 210 *pflag = xad->flag; 211 *paddr = xaddr + (lstart - xoff); 212 /* a page must be fully covered by an xad */ 213 *plen = min(xend - lstart, llen); 214 215 out: 216 XT_PUTPAGE(mp); 217 218 return rc; 219 } 220 221 222 /* 223 * xtLookupList() 224 * 225 * function: map a single logical extent into a list of physical extent; 226 * 227 * parameter: 228 * struct inode *ip, 229 * struct lxdlist *lxdlist, lxd list (in) 230 * struct xadlist *xadlist, xad list (in/out) 231 * int flag) 232 * 233 * coverage of lxd by xad under assumption of 234 * . lxd's are ordered and disjoint. 235 * . xad's are ordered and disjoint. 236 * 237 * return: 238 * 0: success 239 * 240 * note: a page being written (even a single byte) is backed fully, 241 * except the last page which is only backed with blocks 242 * required to cover the last byte; 243 * the extent backing a page is fully contained within an xad; 244 */ 245 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, 246 struct xadlist * xadlist, int flag) 247 { 248 int rc = 0; 249 struct btstack btstack; 250 int cmp; 251 s64 bn; 252 struct metapage *mp; 253 xtpage_t *p; 254 int index; 255 lxd_t *lxd; 256 xad_t *xad, *pxd; 257 s64 size, lstart, lend, xstart, xend, pstart; 258 s64 llen, xlen, plen; 259 s64 xaddr, paddr; 260 int nlxd, npxd, maxnpxd; 261 262 npxd = xadlist->nxad = 0; 263 maxnpxd = xadlist->maxnxad; 264 pxd = xadlist->xad; 265 266 nlxd = lxdlist->nlxd; 267 lxd = lxdlist->lxd; 268 269 lstart = offsetLXD(lxd); 270 llen = lengthLXD(lxd); 271 lend = lstart + llen; 272 273 size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 274 JFS_SBI(ip->i_sb)->l2bsize; 275 276 /* 277 * search for the xad entry covering the logical extent 278 */ 279 search: 280 if (lstart >= size) 281 return 0; 282 283 if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) 284 return rc; 285 286 /* 287 * compute the physical extent covering logical extent 288 * 289 * N.B. search may have failed (e.g., hole in sparse file), 290 * and returned the index of the next entry. 291 */ 292 //map: 293 /* retrieve search result */ 294 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 295 296 /* is xad on the next sibling page ? */ 297 if (index == le16_to_cpu(p->header.nextindex)) { 298 if (p->header.flag & BT_ROOT) 299 goto mapend; 300 301 if ((bn = le64_to_cpu(p->header.next)) == 0) 302 goto mapend; 303 304 XT_PUTPAGE(mp); 305 306 /* get next sibling page */ 307 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 308 if (rc) 309 return rc; 310 311 index = XTENTRYSTART; 312 } 313 314 xad = &p->xad[index]; 315 316 /* 317 * is lxd covered by xad ? 318 */ 319 compare: 320 xstart = offsetXAD(xad); 321 xlen = lengthXAD(xad); 322 xend = xstart + xlen; 323 xaddr = addressXAD(xad); 324 325 compare1: 326 if (xstart < lstart) 327 goto compare2; 328 329 /* (lstart <= xstart) */ 330 331 /* lxd is NOT covered by xad */ 332 if (lend <= xstart) { 333 /* 334 * get next lxd 335 */ 336 if (--nlxd == 0) 337 goto mapend; 338 lxd++; 339 340 lstart = offsetLXD(lxd); 341 llen = lengthLXD(lxd); 342 lend = lstart + llen; 343 if (lstart >= size) 344 goto mapend; 345 346 /* compare with the current xad */ 347 goto compare1; 348 } 349 /* lxd is covered by xad */ 350 else { /* (xstart < lend) */ 351 352 /* initialize new pxd */ 353 pstart = xstart; 354 plen = min(lend - xstart, xlen); 355 paddr = xaddr; 356 357 goto cover; 358 } 359 360 /* (xstart < lstart) */ 361 compare2: 362 /* lxd is covered by xad */ 363 if (lstart < xend) { 364 /* initialize new pxd */ 365 pstart = lstart; 366 plen = min(xend - lstart, llen); 367 paddr = xaddr + (lstart - xstart); 368 369 goto cover; 370 } 371 /* lxd is NOT covered by xad */ 372 else { /* (xend <= lstart) */ 373 374 /* 375 * get next xad 376 * 377 * linear search next xad covering lxd on 378 * the current xad page, and then tree search 379 */ 380 if (index == le16_to_cpu(p->header.nextindex) - 1) { 381 if (p->header.flag & BT_ROOT) 382 goto mapend; 383 384 XT_PUTPAGE(mp); 385 goto search; 386 } else { 387 index++; 388 xad++; 389 390 /* compare with new xad */ 391 goto compare; 392 } 393 } 394 395 /* 396 * lxd is covered by xad and a new pxd has been initialized 397 * (lstart <= xstart < lend) or (xstart < lstart < xend) 398 */ 399 cover: 400 /* finalize pxd corresponding to current xad */ 401 XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); 402 403 if (++npxd >= maxnpxd) 404 goto mapend; 405 pxd++; 406 407 /* 408 * lxd is fully covered by xad 409 */ 410 if (lend <= xend) { 411 /* 412 * get next lxd 413 */ 414 if (--nlxd == 0) 415 goto mapend; 416 lxd++; 417 418 lstart = offsetLXD(lxd); 419 llen = lengthLXD(lxd); 420 lend = lstart + llen; 421 if (lstart >= size) 422 goto mapend; 423 424 /* 425 * test for old xad covering new lxd 426 * (old xstart < new lstart) 427 */ 428 goto compare2; 429 } 430 /* 431 * lxd is partially covered by xad 432 */ 433 else { /* (xend < lend) */ 434 435 /* 436 * get next xad 437 * 438 * linear search next xad covering lxd on 439 * the current xad page, and then next xad page search 440 */ 441 if (index == le16_to_cpu(p->header.nextindex) - 1) { 442 if (p->header.flag & BT_ROOT) 443 goto mapend; 444 445 if ((bn = le64_to_cpu(p->header.next)) == 0) 446 goto mapend; 447 448 XT_PUTPAGE(mp); 449 450 /* get next sibling page */ 451 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 452 if (rc) 453 return rc; 454 455 index = XTENTRYSTART; 456 xad = &p->xad[index]; 457 } else { 458 index++; 459 xad++; 460 } 461 462 /* 463 * test for new xad covering old lxd 464 * (old lstart < new xstart) 465 */ 466 goto compare; 467 } 468 469 mapend: 470 xadlist->nxad = npxd; 471 472 //out: 473 XT_PUTPAGE(mp); 474 475 return rc; 476 } 477 478 479 /* 480 * xtSearch() 481 * 482 * function: search for the xad entry covering specified offset. 483 * 484 * parameters: 485 * ip - file object; 486 * xoff - extent offset; 487 * nextp - address of next extent (if any) for search miss 488 * cmpp - comparison result: 489 * btstack - traverse stack; 490 * flag - search process flag (XT_INSERT); 491 * 492 * returns: 493 * btstack contains (bn, index) of search path traversed to the entry. 494 * *cmpp is set to result of comparison with the entry returned. 495 * the page containing the entry is pinned at exit. 496 */ 497 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 498 int *cmpp, struct btstack * btstack, int flag) 499 { 500 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 501 int rc = 0; 502 int cmp = 1; /* init for empty page */ 503 s64 bn; /* block number */ 504 struct metapage *mp; /* page buffer */ 505 xtpage_t *p; /* page */ 506 xad_t *xad; 507 int base, index, lim, btindex; 508 struct btframe *btsp; 509 int nsplit = 0; /* number of pages to split */ 510 s64 t64; 511 s64 next = 0; 512 513 INCREMENT(xtStat.search); 514 515 BT_CLR(btstack); 516 517 btstack->nsplit = 0; 518 519 /* 520 * search down tree from root: 521 * 522 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 523 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 524 * 525 * if entry with search key K is not found 526 * internal page search find the entry with largest key Ki 527 * less than K which point to the child page to search; 528 * leaf page search find the entry with smallest key Kj 529 * greater than K so that the returned index is the position of 530 * the entry to be shifted right for insertion of new entry. 531 * for empty tree, search key is greater than any key of the tree. 532 * 533 * by convention, root bn = 0. 534 */ 535 for (bn = 0;;) { 536 /* get/pin the page to search */ 537 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 538 if (rc) 539 return rc; 540 541 /* try sequential access heuristics with the previous 542 * access entry in target leaf page: 543 * once search narrowed down into the target leaf, 544 * key must either match an entry in the leaf or 545 * key entry does not exist in the tree; 546 */ 547 //fastSearch: 548 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 549 (p->header.flag & BT_LEAF) && 550 (index = jfs_ip->btindex) < 551 le16_to_cpu(p->header.nextindex)) { 552 xad = &p->xad[index]; 553 t64 = offsetXAD(xad); 554 if (xoff < t64 + lengthXAD(xad)) { 555 if (xoff >= t64) { 556 *cmpp = 0; 557 goto out; 558 } 559 560 /* stop sequential access heuristics */ 561 goto binarySearch; 562 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 563 564 /* try next sequential entry */ 565 index++; 566 if (index < 567 le16_to_cpu(p->header.nextindex)) { 568 xad++; 569 t64 = offsetXAD(xad); 570 if (xoff < t64 + lengthXAD(xad)) { 571 if (xoff >= t64) { 572 *cmpp = 0; 573 goto out; 574 } 575 576 /* miss: key falls between 577 * previous and this entry 578 */ 579 *cmpp = 1; 580 next = t64; 581 goto out; 582 } 583 584 /* (xoff >= t64 + lengthXAD(xad)); 585 * matching entry may be further out: 586 * stop heuristic search 587 */ 588 /* stop sequential access heuristics */ 589 goto binarySearch; 590 } 591 592 /* (index == p->header.nextindex); 593 * miss: key entry does not exist in 594 * the target leaf/tree 595 */ 596 *cmpp = 1; 597 goto out; 598 } 599 600 /* 601 * if hit, return index of the entry found, and 602 * if miss, where new entry with search key is 603 * to be inserted; 604 */ 605 out: 606 /* compute number of pages to split */ 607 if (flag & XT_INSERT) { 608 if (p->header.nextindex == /* little-endian */ 609 p->header.maxentry) 610 nsplit++; 611 else 612 nsplit = 0; 613 btstack->nsplit = nsplit; 614 } 615 616 /* save search result */ 617 btsp = btstack->top; 618 btsp->bn = bn; 619 btsp->index = index; 620 btsp->mp = mp; 621 622 /* update sequential access heuristics */ 623 jfs_ip->btindex = index; 624 625 if (nextp) 626 *nextp = next; 627 628 INCREMENT(xtStat.fastSearch); 629 return 0; 630 } 631 632 /* well, ... full search now */ 633 binarySearch: 634 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 635 636 /* 637 * binary search with search key K on the current page 638 */ 639 for (base = XTENTRYSTART; lim; lim >>= 1) { 640 index = base + (lim >> 1); 641 642 XT_CMP(cmp, xoff, &p->xad[index], t64); 643 if (cmp == 0) { 644 /* 645 * search hit 646 */ 647 /* search hit - leaf page: 648 * return the entry found 649 */ 650 if (p->header.flag & BT_LEAF) { 651 *cmpp = cmp; 652 653 /* compute number of pages to split */ 654 if (flag & XT_INSERT) { 655 if (p->header.nextindex == 656 p->header.maxentry) 657 nsplit++; 658 else 659 nsplit = 0; 660 btstack->nsplit = nsplit; 661 } 662 663 /* save search result */ 664 btsp = btstack->top; 665 btsp->bn = bn; 666 btsp->index = index; 667 btsp->mp = mp; 668 669 /* init sequential access heuristics */ 670 btindex = jfs_ip->btindex; 671 if (index == btindex || 672 index == btindex + 1) 673 jfs_ip->btorder = BT_SEQUENTIAL; 674 else 675 jfs_ip->btorder = BT_RANDOM; 676 jfs_ip->btindex = index; 677 678 return 0; 679 } 680 /* search hit - internal page: 681 * descend/search its child page 682 */ 683 if (index < le16_to_cpu(p->header.nextindex)-1) 684 next = offsetXAD(&p->xad[index + 1]); 685 goto next; 686 } 687 688 if (cmp > 0) { 689 base = index + 1; 690 --lim; 691 } 692 } 693 694 /* 695 * search miss 696 * 697 * base is the smallest index with key (Kj) greater than 698 * search key (K) and may be zero or maxentry index. 699 */ 700 if (base < le16_to_cpu(p->header.nextindex)) 701 next = offsetXAD(&p->xad[base]); 702 /* 703 * search miss - leaf page: 704 * 705 * return location of entry (base) where new entry with 706 * search key K is to be inserted. 707 */ 708 if (p->header.flag & BT_LEAF) { 709 *cmpp = cmp; 710 711 /* compute number of pages to split */ 712 if (flag & XT_INSERT) { 713 if (p->header.nextindex == 714 p->header.maxentry) 715 nsplit++; 716 else 717 nsplit = 0; 718 btstack->nsplit = nsplit; 719 } 720 721 /* save search result */ 722 btsp = btstack->top; 723 btsp->bn = bn; 724 btsp->index = base; 725 btsp->mp = mp; 726 727 /* init sequential access heuristics */ 728 btindex = jfs_ip->btindex; 729 if (base == btindex || base == btindex + 1) 730 jfs_ip->btorder = BT_SEQUENTIAL; 731 else 732 jfs_ip->btorder = BT_RANDOM; 733 jfs_ip->btindex = base; 734 735 if (nextp) 736 *nextp = next; 737 738 return 0; 739 } 740 741 /* 742 * search miss - non-leaf page: 743 * 744 * if base is non-zero, decrement base by one to get the parent 745 * entry of the child page to search. 746 */ 747 index = base ? base - 1 : base; 748 749 /* 750 * go down to child page 751 */ 752 next: 753 /* update number of pages to split */ 754 if (p->header.nextindex == p->header.maxentry) 755 nsplit++; 756 else 757 nsplit = 0; 758 759 /* push (bn, index) of the parent page/entry */ 760 if (BT_STACK_FULL(btstack)) { 761 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 762 XT_PUTPAGE(mp); 763 return -EIO; 764 } 765 BT_PUSH(btstack, bn, index); 766 767 /* get the child page block number */ 768 bn = addressXAD(&p->xad[index]); 769 770 /* unpin the parent page */ 771 XT_PUTPAGE(mp); 772 } 773 } 774 775 /* 776 * xtInsert() 777 * 778 * function: 779 * 780 * parameter: 781 * tid - transaction id; 782 * ip - file object; 783 * xflag - extent flag (XAD_NOTRECORDED): 784 * xoff - extent offset; 785 * xlen - extent length; 786 * xaddrp - extent address pointer (in/out): 787 * if (*xaddrp) 788 * caller allocated data extent at *xaddrp; 789 * else 790 * allocate data extent and return its xaddr; 791 * flag - 792 * 793 * return: 794 */ 795 int xtInsert(tid_t tid, /* transaction id */ 796 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 797 int flag) 798 { 799 int rc = 0; 800 s64 xaddr, hint; 801 struct metapage *mp; /* meta-page buffer */ 802 xtpage_t *p; /* base B+-tree index page */ 803 s64 bn; 804 int index, nextindex; 805 struct btstack btstack; /* traverse stack */ 806 struct xtsplit split; /* split information */ 807 xad_t *xad; 808 int cmp; 809 s64 next; 810 struct tlock *tlck; 811 struct xtlock *xtlck; 812 813 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 814 815 /* 816 * search for the entry location at which to insert: 817 * 818 * xtFastSearch() and xtSearch() both returns (leaf page 819 * pinned, index at which to insert). 820 * n.b. xtSearch() may return index of maxentry of 821 * the full page. 822 */ 823 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 824 return rc; 825 826 /* retrieve search result */ 827 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 828 829 /* This test must follow XT_GETSEARCH since mp must be valid if 830 * we branch to out: */ 831 if ((cmp == 0) || (next && (xlen > next - xoff))) { 832 rc = -EEXIST; 833 goto out; 834 } 835 836 /* 837 * allocate data extent requested 838 * 839 * allocation hint: last xad 840 */ 841 if ((xaddr = *xaddrp) == 0) { 842 if (index > XTENTRYSTART) { 843 xad = &p->xad[index - 1]; 844 hint = addressXAD(xad) + lengthXAD(xad) - 1; 845 } else 846 hint = 0; 847 if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) 848 goto out; 849 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 850 DQUOT_FREE_BLOCK(ip, xlen); 851 goto out; 852 } 853 } 854 855 /* 856 * insert entry for new extent 857 */ 858 xflag |= XAD_NEW; 859 860 /* 861 * if the leaf page is full, split the page and 862 * propagate up the router entry for the new page from split 863 * 864 * The xtSplitUp() will insert the entry and unpin the leaf page. 865 */ 866 nextindex = le16_to_cpu(p->header.nextindex); 867 if (nextindex == le16_to_cpu(p->header.maxentry)) { 868 split.mp = mp; 869 split.index = index; 870 split.flag = xflag; 871 split.off = xoff; 872 split.len = xlen; 873 split.addr = xaddr; 874 split.pxdlist = NULL; 875 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 876 /* undo data extent allocation */ 877 if (*xaddrp == 0) { 878 dbFree(ip, xaddr, (s64) xlen); 879 DQUOT_FREE_BLOCK(ip, xlen); 880 } 881 return rc; 882 } 883 884 *xaddrp = xaddr; 885 return 0; 886 } 887 888 /* 889 * insert the new entry into the leaf page 890 */ 891 /* 892 * acquire a transaction lock on the leaf page; 893 * 894 * action: xad insertion/extension; 895 */ 896 BT_MARK_DIRTY(mp, ip); 897 898 /* if insert into middle, shift right remaining entries. */ 899 if (index < nextindex) 900 memmove(&p->xad[index + 1], &p->xad[index], 901 (nextindex - index) * sizeof(xad_t)); 902 903 /* insert the new entry: mark the entry NEW */ 904 xad = &p->xad[index]; 905 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 906 907 /* advance next available entry index */ 908 p->header.nextindex = 909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 910 911 /* Don't log it if there are no links to the file */ 912 if (!test_cflag(COMMIT_Nolink, ip)) { 913 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 914 xtlck = (struct xtlock *) & tlck->lock; 915 xtlck->lwm.offset = 916 (xtlck->lwm.offset) ? min(index, 917 (int)xtlck->lwm.offset) : index; 918 xtlck->lwm.length = 919 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 920 } 921 922 *xaddrp = xaddr; 923 924 out: 925 /* unpin the leaf page */ 926 XT_PUTPAGE(mp); 927 928 return rc; 929 } 930 931 932 /* 933 * xtSplitUp() 934 * 935 * function: 936 * split full pages as propagating insertion up the tree 937 * 938 * parameter: 939 * tid - transaction id; 940 * ip - file object; 941 * split - entry parameter descriptor; 942 * btstack - traverse stack from xtSearch() 943 * 944 * return: 945 */ 946 static int 947 xtSplitUp(tid_t tid, 948 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 949 { 950 int rc = 0; 951 struct metapage *smp; 952 xtpage_t *sp; /* split page */ 953 struct metapage *rmp; 954 s64 rbn; /* new right page block number */ 955 struct metapage *rcmp; 956 xtpage_t *rcp; /* right child page */ 957 s64 rcbn; /* right child page block number */ 958 int skip; /* index of entry of insertion */ 959 int nextindex; /* next available entry index of p */ 960 struct btframe *parent; /* parent page entry on traverse stack */ 961 xad_t *xad; 962 s64 xaddr; 963 int xlen; 964 int nsplit; /* number of pages split */ 965 struct pxdlist pxdlist; 966 pxd_t *pxd; 967 struct tlock *tlck; 968 struct xtlock *xtlck; 969 970 smp = split->mp; 971 sp = XT_PAGE(ip, smp); 972 973 /* is inode xtree root extension/inline EA area free ? */ 974 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 975 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 976 (JFS_IP(ip)->mode2 & INLINEEA)) { 977 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 978 JFS_IP(ip)->mode2 &= ~INLINEEA; 979 980 BT_MARK_DIRTY(smp, ip); 981 /* 982 * acquire a transaction lock on the leaf page; 983 * 984 * action: xad insertion/extension; 985 */ 986 987 /* if insert into middle, shift right remaining entries. */ 988 skip = split->index; 989 nextindex = le16_to_cpu(sp->header.nextindex); 990 if (skip < nextindex) 991 memmove(&sp->xad[skip + 1], &sp->xad[skip], 992 (nextindex - skip) * sizeof(xad_t)); 993 994 /* insert the new entry: mark the entry NEW */ 995 xad = &sp->xad[skip]; 996 XT_PUTENTRY(xad, split->flag, split->off, split->len, 997 split->addr); 998 999 /* advance next available entry index */ 1000 sp->header.nextindex = 1001 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); 1002 1003 /* Don't log it if there are no links to the file */ 1004 if (!test_cflag(COMMIT_Nolink, ip)) { 1005 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1006 xtlck = (struct xtlock *) & tlck->lock; 1007 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1008 min(skip, (int)xtlck->lwm.offset) : skip; 1009 xtlck->lwm.length = 1010 le16_to_cpu(sp->header.nextindex) - 1011 xtlck->lwm.offset; 1012 } 1013 1014 return 0; 1015 } 1016 1017 /* 1018 * allocate new index blocks to cover index page split(s) 1019 * 1020 * allocation hint: ? 1021 */ 1022 if (split->pxdlist == NULL) { 1023 nsplit = btstack->nsplit; 1024 split->pxdlist = &pxdlist; 1025 pxdlist.maxnpxd = pxdlist.npxd = 0; 1026 pxd = &pxdlist.pxd[0]; 1027 xlen = JFS_SBI(ip->i_sb)->nbperpage; 1028 for (; nsplit > 0; nsplit--, pxd++) { 1029 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 1030 == 0) { 1031 PXDaddress(pxd, xaddr); 1032 PXDlength(pxd, xlen); 1033 1034 pxdlist.maxnpxd++; 1035 1036 continue; 1037 } 1038 1039 /* undo allocation */ 1040 1041 XT_PUTPAGE(smp); 1042 return rc; 1043 } 1044 } 1045 1046 /* 1047 * Split leaf page <sp> into <sp> and a new right page <rp>. 1048 * 1049 * The split routines insert the new entry into the leaf page, 1050 * and acquire txLock as appropriate. 1051 * return <rp> pinned and its block number <rpbn>. 1052 */ 1053 rc = (sp->header.flag & BT_ROOT) ? 1054 xtSplitRoot(tid, ip, split, &rmp) : 1055 xtSplitPage(tid, ip, split, &rmp, &rbn); 1056 1057 XT_PUTPAGE(smp); 1058 1059 if (rc) 1060 return -EIO; 1061 /* 1062 * propagate up the router entry for the leaf page just split 1063 * 1064 * insert a router entry for the new page into the parent page, 1065 * propagate the insert/split up the tree by walking back the stack 1066 * of (bn of parent page, index of child page entry in parent page) 1067 * that were traversed during the search for the page that split. 1068 * 1069 * the propagation of insert/split up the tree stops if the root 1070 * splits or the page inserted into doesn't have to split to hold 1071 * the new entry. 1072 * 1073 * the parent entry for the split page remains the same, and 1074 * a new entry is inserted at its right with the first key and 1075 * block number of the new right page. 1076 * 1077 * There are a maximum of 3 pages pinned at any time: 1078 * right child, left parent and right parent (when the parent splits) 1079 * to keep the child page pinned while working on the parent. 1080 * make sure that all pins are released at exit. 1081 */ 1082 while ((parent = BT_POP(btstack)) != NULL) { 1083 /* parent page specified by stack frame <parent> */ 1084 1085 /* keep current child pages <rcp> pinned */ 1086 rcmp = rmp; 1087 rcbn = rbn; 1088 rcp = XT_PAGE(ip, rcmp); 1089 1090 /* 1091 * insert router entry in parent for new right child page <rp> 1092 */ 1093 /* get/pin the parent page <sp> */ 1094 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1095 if (rc) { 1096 XT_PUTPAGE(rcmp); 1097 return rc; 1098 } 1099 1100 /* 1101 * The new key entry goes ONE AFTER the index of parent entry, 1102 * because the split was to the right. 1103 */ 1104 skip = parent->index + 1; 1105 1106 /* 1107 * split or shift right remaining entries of the parent page 1108 */ 1109 nextindex = le16_to_cpu(sp->header.nextindex); 1110 /* 1111 * parent page is full - split the parent page 1112 */ 1113 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 1114 /* init for parent page split */ 1115 split->mp = smp; 1116 split->index = skip; /* index at insert */ 1117 split->flag = XAD_NEW; 1118 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 1119 split->len = JFS_SBI(ip->i_sb)->nbperpage; 1120 split->addr = rcbn; 1121 1122 /* unpin previous right child page */ 1123 XT_PUTPAGE(rcmp); 1124 1125 /* The split routines insert the new entry, 1126 * and acquire txLock as appropriate. 1127 * return <rp> pinned and its block number <rpbn>. 1128 */ 1129 rc = (sp->header.flag & BT_ROOT) ? 1130 xtSplitRoot(tid, ip, split, &rmp) : 1131 xtSplitPage(tid, ip, split, &rmp, &rbn); 1132 if (rc) { 1133 XT_PUTPAGE(smp); 1134 return rc; 1135 } 1136 1137 XT_PUTPAGE(smp); 1138 /* keep new child page <rp> pinned */ 1139 } 1140 /* 1141 * parent page is not full - insert in parent page 1142 */ 1143 else { 1144 /* 1145 * insert router entry in parent for the right child 1146 * page from the first entry of the right child page: 1147 */ 1148 /* 1149 * acquire a transaction lock on the parent page; 1150 * 1151 * action: router xad insertion; 1152 */ 1153 BT_MARK_DIRTY(smp, ip); 1154 1155 /* 1156 * if insert into middle, shift right remaining entries 1157 */ 1158 if (skip < nextindex) 1159 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1160 (nextindex - 1161 skip) << L2XTSLOTSIZE); 1162 1163 /* insert the router entry */ 1164 xad = &sp->xad[skip]; 1165 XT_PUTENTRY(xad, XAD_NEW, 1166 offsetXAD(&rcp->xad[XTENTRYSTART]), 1167 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 1168 1169 /* advance next available entry index. */ 1170 sp->header.nextindex = 1171 cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1172 1); 1173 1174 /* Don't log it if there are no links to the file */ 1175 if (!test_cflag(COMMIT_Nolink, ip)) { 1176 tlck = txLock(tid, ip, smp, 1177 tlckXTREE | tlckGROW); 1178 xtlck = (struct xtlock *) & tlck->lock; 1179 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1180 min(skip, (int)xtlck->lwm.offset) : skip; 1181 xtlck->lwm.length = 1182 le16_to_cpu(sp->header.nextindex) - 1183 xtlck->lwm.offset; 1184 } 1185 1186 /* unpin parent page */ 1187 XT_PUTPAGE(smp); 1188 1189 /* exit propagate up */ 1190 break; 1191 } 1192 } 1193 1194 /* unpin current right page */ 1195 XT_PUTPAGE(rmp); 1196 1197 return 0; 1198 } 1199 1200 1201 /* 1202 * xtSplitPage() 1203 * 1204 * function: 1205 * split a full non-root page into 1206 * original/split/left page and new right page 1207 * i.e., the original/split page remains as left page. 1208 * 1209 * parameter: 1210 * int tid, 1211 * struct inode *ip, 1212 * struct xtsplit *split, 1213 * struct metapage **rmpp, 1214 * u64 *rbnp, 1215 * 1216 * return: 1217 * Pointer to page in which to insert or NULL on error. 1218 */ 1219 static int 1220 xtSplitPage(tid_t tid, struct inode *ip, 1221 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 1222 { 1223 int rc = 0; 1224 struct metapage *smp; 1225 xtpage_t *sp; 1226 struct metapage *rmp; 1227 xtpage_t *rp; /* new right page allocated */ 1228 s64 rbn; /* new right page block number */ 1229 struct metapage *mp; 1230 xtpage_t *p; 1231 s64 nextbn; 1232 int skip, maxentry, middle, righthalf, n; 1233 xad_t *xad; 1234 struct pxdlist *pxdlist; 1235 pxd_t *pxd; 1236 struct tlock *tlck; 1237 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 1238 int quota_allocation = 0; 1239 1240 smp = split->mp; 1241 sp = XT_PAGE(ip, smp); 1242 1243 INCREMENT(xtStat.split); 1244 1245 pxdlist = split->pxdlist; 1246 pxd = &pxdlist->pxd[pxdlist->npxd]; 1247 pxdlist->npxd++; 1248 rbn = addressPXD(pxd); 1249 1250 /* Allocate blocks to quota. */ 1251 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1252 rc = -EDQUOT; 1253 goto clean_up; 1254 } 1255 1256 quota_allocation += lengthPXD(pxd); 1257 1258 /* 1259 * allocate the new right page for the split 1260 */ 1261 rmp = get_metapage(ip, rbn, PSIZE, 1); 1262 if (rmp == NULL) { 1263 rc = -EIO; 1264 goto clean_up; 1265 } 1266 1267 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1268 1269 BT_MARK_DIRTY(rmp, ip); 1270 /* 1271 * action: new page; 1272 */ 1273 1274 rp = (xtpage_t *) rmp->data; 1275 rp->header.self = *pxd; 1276 rp->header.flag = sp->header.flag & BT_TYPE; 1277 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1278 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1279 1280 BT_MARK_DIRTY(smp, ip); 1281 /* Don't log it if there are no links to the file */ 1282 if (!test_cflag(COMMIT_Nolink, ip)) { 1283 /* 1284 * acquire a transaction lock on the new right page; 1285 */ 1286 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1287 rxtlck = (struct xtlock *) & tlck->lock; 1288 rxtlck->lwm.offset = XTENTRYSTART; 1289 /* 1290 * acquire a transaction lock on the split page 1291 */ 1292 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1293 sxtlck = (struct xtlock *) & tlck->lock; 1294 } 1295 1296 /* 1297 * initialize/update sibling pointers of <sp> and <rp> 1298 */ 1299 nextbn = le64_to_cpu(sp->header.next); 1300 rp->header.next = cpu_to_le64(nextbn); 1301 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1302 sp->header.next = cpu_to_le64(rbn); 1303 1304 skip = split->index; 1305 1306 /* 1307 * sequential append at tail (after last entry of last page) 1308 * 1309 * if splitting the last page on a level because of appending 1310 * a entry to it (skip is maxentry), it's likely that the access is 1311 * sequential. adding an empty page on the side of the level is less 1312 * work and can push the fill factor much higher than normal. 1313 * if we're wrong it's no big deal - we will do the split the right 1314 * way next time. 1315 * (it may look like it's equally easy to do a similar hack for 1316 * reverse sorted data, that is, split the tree left, but it's not. 1317 * Be my guest.) 1318 */ 1319 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1320 /* 1321 * acquire a transaction lock on the new/right page; 1322 * 1323 * action: xad insertion; 1324 */ 1325 /* insert entry at the first entry of the new right page */ 1326 xad = &rp->xad[XTENTRYSTART]; 1327 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1328 split->addr); 1329 1330 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1331 1332 if (!test_cflag(COMMIT_Nolink, ip)) { 1333 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1334 rxtlck->lwm.length = 1; 1335 } 1336 1337 *rmpp = rmp; 1338 *rbnp = rbn; 1339 1340 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1341 return 0; 1342 } 1343 1344 /* 1345 * non-sequential insert (at possibly middle page) 1346 */ 1347 1348 /* 1349 * update previous pointer of old next/right page of <sp> 1350 */ 1351 if (nextbn != 0) { 1352 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1353 if (rc) { 1354 XT_PUTPAGE(rmp); 1355 goto clean_up; 1356 } 1357 1358 BT_MARK_DIRTY(mp, ip); 1359 /* 1360 * acquire a transaction lock on the next page; 1361 * 1362 * action:sibling pointer update; 1363 */ 1364 if (!test_cflag(COMMIT_Nolink, ip)) 1365 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1366 1367 p->header.prev = cpu_to_le64(rbn); 1368 1369 /* sibling page may have been updated previously, or 1370 * it may be updated later; 1371 */ 1372 1373 XT_PUTPAGE(mp); 1374 } 1375 1376 /* 1377 * split the data between the split and new/right pages 1378 */ 1379 maxentry = le16_to_cpu(sp->header.maxentry); 1380 middle = maxentry >> 1; 1381 righthalf = maxentry - middle; 1382 1383 /* 1384 * skip index in old split/left page - insert into left page: 1385 */ 1386 if (skip <= middle) { 1387 /* move right half of split page to the new right page */ 1388 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1389 righthalf << L2XTSLOTSIZE); 1390 1391 /* shift right tail of left half to make room for new entry */ 1392 if (skip < middle) 1393 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1394 (middle - skip) << L2XTSLOTSIZE); 1395 1396 /* insert new entry */ 1397 xad = &sp->xad[skip]; 1398 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1399 split->addr); 1400 1401 /* update page header */ 1402 sp->header.nextindex = cpu_to_le16(middle + 1); 1403 if (!test_cflag(COMMIT_Nolink, ip)) { 1404 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1405 min(skip, (int)sxtlck->lwm.offset) : skip; 1406 } 1407 1408 rp->header.nextindex = 1409 cpu_to_le16(XTENTRYSTART + righthalf); 1410 } 1411 /* 1412 * skip index in new right page - insert into right page: 1413 */ 1414 else { 1415 /* move left head of right half to right page */ 1416 n = skip - middle; 1417 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1418 n << L2XTSLOTSIZE); 1419 1420 /* insert new entry */ 1421 n += XTENTRYSTART; 1422 xad = &rp->xad[n]; 1423 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1424 split->addr); 1425 1426 /* move right tail of right half to right page */ 1427 if (skip < maxentry) 1428 memmove(&rp->xad[n + 1], &sp->xad[skip], 1429 (maxentry - skip) << L2XTSLOTSIZE); 1430 1431 /* update page header */ 1432 sp->header.nextindex = cpu_to_le16(middle); 1433 if (!test_cflag(COMMIT_Nolink, ip)) { 1434 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1435 min(middle, (int)sxtlck->lwm.offset) : middle; 1436 } 1437 1438 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1439 righthalf + 1); 1440 } 1441 1442 if (!test_cflag(COMMIT_Nolink, ip)) { 1443 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1444 sxtlck->lwm.offset; 1445 1446 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1447 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1448 XTENTRYSTART; 1449 } 1450 1451 *rmpp = rmp; 1452 *rbnp = rbn; 1453 1454 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1455 return rc; 1456 1457 clean_up: 1458 1459 /* Rollback quota allocation. */ 1460 if (quota_allocation) 1461 DQUOT_FREE_BLOCK(ip, quota_allocation); 1462 1463 return (rc); 1464 } 1465 1466 1467 /* 1468 * xtSplitRoot() 1469 * 1470 * function: 1471 * split the full root page into original/root/split page and new 1472 * right page 1473 * i.e., root remains fixed in tree anchor (inode) and the root is 1474 * copied to a single new right child page since root page << 1475 * non-root page, and the split root page contains a single entry 1476 * for the new right child page. 1477 * 1478 * parameter: 1479 * int tid, 1480 * struct inode *ip, 1481 * struct xtsplit *split, 1482 * struct metapage **rmpp) 1483 * 1484 * return: 1485 * Pointer to page in which to insert or NULL on error. 1486 */ 1487 static int 1488 xtSplitRoot(tid_t tid, 1489 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1490 { 1491 xtpage_t *sp; 1492 struct metapage *rmp; 1493 xtpage_t *rp; 1494 s64 rbn; 1495 int skip, nextindex; 1496 xad_t *xad; 1497 pxd_t *pxd; 1498 struct pxdlist *pxdlist; 1499 struct tlock *tlck; 1500 struct xtlock *xtlck; 1501 1502 sp = &JFS_IP(ip)->i_xtroot; 1503 1504 INCREMENT(xtStat.split); 1505 1506 /* 1507 * allocate a single (right) child page 1508 */ 1509 pxdlist = split->pxdlist; 1510 pxd = &pxdlist->pxd[pxdlist->npxd]; 1511 pxdlist->npxd++; 1512 rbn = addressPXD(pxd); 1513 rmp = get_metapage(ip, rbn, PSIZE, 1); 1514 if (rmp == NULL) 1515 return -EIO; 1516 1517 /* Allocate blocks to quota. */ 1518 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1519 release_metapage(rmp); 1520 return -EDQUOT; 1521 } 1522 1523 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1524 1525 /* 1526 * acquire a transaction lock on the new right page; 1527 * 1528 * action: new page; 1529 */ 1530 BT_MARK_DIRTY(rmp, ip); 1531 1532 rp = (xtpage_t *) rmp->data; 1533 rp->header.flag = 1534 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1535 rp->header.self = *pxd; 1536 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1537 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1538 1539 /* initialize sibling pointers */ 1540 rp->header.next = 0; 1541 rp->header.prev = 0; 1542 1543 /* 1544 * copy the in-line root page into new right page extent 1545 */ 1546 nextindex = le16_to_cpu(sp->header.maxentry); 1547 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1548 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1549 1550 /* 1551 * insert the new entry into the new right/child page 1552 * (skip index in the new right page will not change) 1553 */ 1554 skip = split->index; 1555 /* if insert into middle, shift right remaining entries */ 1556 if (skip != nextindex) 1557 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1558 (nextindex - skip) * sizeof(xad_t)); 1559 1560 xad = &rp->xad[skip]; 1561 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1562 1563 /* update page header */ 1564 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1565 1566 if (!test_cflag(COMMIT_Nolink, ip)) { 1567 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1568 xtlck = (struct xtlock *) & tlck->lock; 1569 xtlck->lwm.offset = XTENTRYSTART; 1570 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1571 XTENTRYSTART; 1572 } 1573 1574 /* 1575 * reset the root 1576 * 1577 * init root with the single entry for the new right page 1578 * set the 1st entry offset to 0, which force the left-most key 1579 * at any level of the tree to be less than any search key. 1580 */ 1581 /* 1582 * acquire a transaction lock on the root page (in-memory inode); 1583 * 1584 * action: root split; 1585 */ 1586 BT_MARK_DIRTY(split->mp, ip); 1587 1588 xad = &sp->xad[XTENTRYSTART]; 1589 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1590 1591 /* update page header of root */ 1592 sp->header.flag &= ~BT_LEAF; 1593 sp->header.flag |= BT_INTERNAL; 1594 1595 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1596 1597 if (!test_cflag(COMMIT_Nolink, ip)) { 1598 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1599 xtlck = (struct xtlock *) & tlck->lock; 1600 xtlck->lwm.offset = XTENTRYSTART; 1601 xtlck->lwm.length = 1; 1602 } 1603 1604 *rmpp = rmp; 1605 1606 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1607 return 0; 1608 } 1609 1610 1611 /* 1612 * xtExtend() 1613 * 1614 * function: extend in-place; 1615 * 1616 * note: existing extent may or may not have been committed. 1617 * caller is responsible for pager buffer cache update, and 1618 * working block allocation map update; 1619 * update pmap: alloc whole extended extent; 1620 */ 1621 int xtExtend(tid_t tid, /* transaction id */ 1622 struct inode *ip, s64 xoff, /* delta extent offset */ 1623 s32 xlen, /* delta extent length */ 1624 int flag) 1625 { 1626 int rc = 0; 1627 int cmp; 1628 struct metapage *mp; /* meta-page buffer */ 1629 xtpage_t *p; /* base B+-tree index page */ 1630 s64 bn; 1631 int index, nextindex, len; 1632 struct btstack btstack; /* traverse stack */ 1633 struct xtsplit split; /* split information */ 1634 xad_t *xad; 1635 s64 xaddr; 1636 struct tlock *tlck; 1637 struct xtlock *xtlck = NULL; 1638 1639 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1640 1641 /* there must exist extent to be extended */ 1642 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1643 return rc; 1644 1645 /* retrieve search result */ 1646 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1647 1648 if (cmp != 0) { 1649 XT_PUTPAGE(mp); 1650 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1651 return -EIO; 1652 } 1653 1654 /* extension must be contiguous */ 1655 xad = &p->xad[index]; 1656 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1657 XT_PUTPAGE(mp); 1658 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1659 return -EIO; 1660 } 1661 1662 /* 1663 * acquire a transaction lock on the leaf page; 1664 * 1665 * action: xad insertion/extension; 1666 */ 1667 BT_MARK_DIRTY(mp, ip); 1668 if (!test_cflag(COMMIT_Nolink, ip)) { 1669 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1670 xtlck = (struct xtlock *) & tlck->lock; 1671 } 1672 1673 /* extend will overflow extent ? */ 1674 xlen = lengthXAD(xad) + xlen; 1675 if ((len = xlen - MAXXLEN) <= 0) 1676 goto extendOld; 1677 1678 /* 1679 * extent overflow: insert entry for new extent 1680 */ 1681 //insertNew: 1682 xoff = offsetXAD(xad) + MAXXLEN; 1683 xaddr = addressXAD(xad) + MAXXLEN; 1684 nextindex = le16_to_cpu(p->header.nextindex); 1685 1686 /* 1687 * if the leaf page is full, insert the new entry and 1688 * propagate up the router entry for the new page from split 1689 * 1690 * The xtSplitUp() will insert the entry and unpin the leaf page. 1691 */ 1692 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1693 /* xtSpliUp() unpins leaf pages */ 1694 split.mp = mp; 1695 split.index = index + 1; 1696 split.flag = XAD_NEW; 1697 split.off = xoff; /* split offset */ 1698 split.len = len; 1699 split.addr = xaddr; 1700 split.pxdlist = NULL; 1701 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1702 return rc; 1703 1704 /* get back old page */ 1705 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1706 if (rc) 1707 return rc; 1708 /* 1709 * if leaf root has been split, original root has been 1710 * copied to new child page, i.e., original entry now 1711 * resides on the new child page; 1712 */ 1713 if (p->header.flag & BT_INTERNAL) { 1714 ASSERT(p->header.nextindex == 1715 cpu_to_le16(XTENTRYSTART + 1)); 1716 xad = &p->xad[XTENTRYSTART]; 1717 bn = addressXAD(xad); 1718 XT_PUTPAGE(mp); 1719 1720 /* get new child page */ 1721 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1722 if (rc) 1723 return rc; 1724 1725 BT_MARK_DIRTY(mp, ip); 1726 if (!test_cflag(COMMIT_Nolink, ip)) { 1727 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1728 xtlck = (struct xtlock *) & tlck->lock; 1729 } 1730 } 1731 } 1732 /* 1733 * insert the new entry into the leaf page 1734 */ 1735 else { 1736 /* insert the new entry: mark the entry NEW */ 1737 xad = &p->xad[index + 1]; 1738 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1739 1740 /* advance next available entry index */ 1741 p->header.nextindex = 1742 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1743 } 1744 1745 /* get back old entry */ 1746 xad = &p->xad[index]; 1747 xlen = MAXXLEN; 1748 1749 /* 1750 * extend old extent 1751 */ 1752 extendOld: 1753 XADlength(xad, xlen); 1754 if (!(xad->flag & XAD_NEW)) 1755 xad->flag |= XAD_EXTENDED; 1756 1757 if (!test_cflag(COMMIT_Nolink, ip)) { 1758 xtlck->lwm.offset = 1759 (xtlck->lwm.offset) ? min(index, 1760 (int)xtlck->lwm.offset) : index; 1761 xtlck->lwm.length = 1762 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1763 } 1764 1765 /* unpin the leaf page */ 1766 XT_PUTPAGE(mp); 1767 1768 return rc; 1769 } 1770 1771 #ifdef _NOTYET 1772 /* 1773 * xtTailgate() 1774 * 1775 * function: split existing 'tail' extent 1776 * (split offset >= start offset of tail extent), and 1777 * relocate and extend the split tail half; 1778 * 1779 * note: existing extent may or may not have been committed. 1780 * caller is responsible for pager buffer cache update, and 1781 * working block allocation map update; 1782 * update pmap: free old split tail extent, alloc new extent; 1783 */ 1784 int xtTailgate(tid_t tid, /* transaction id */ 1785 struct inode *ip, s64 xoff, /* split/new extent offset */ 1786 s32 xlen, /* new extent length */ 1787 s64 xaddr, /* new extent address */ 1788 int flag) 1789 { 1790 int rc = 0; 1791 int cmp; 1792 struct metapage *mp; /* meta-page buffer */ 1793 xtpage_t *p; /* base B+-tree index page */ 1794 s64 bn; 1795 int index, nextindex, llen, rlen; 1796 struct btstack btstack; /* traverse stack */ 1797 struct xtsplit split; /* split information */ 1798 xad_t *xad; 1799 struct tlock *tlck; 1800 struct xtlock *xtlck = 0; 1801 struct tlock *mtlck; 1802 struct maplock *pxdlock; 1803 1804 /* 1805 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1806 (ulong)xoff, xlen, (ulong)xaddr); 1807 */ 1808 1809 /* there must exist extent to be tailgated */ 1810 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1811 return rc; 1812 1813 /* retrieve search result */ 1814 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1815 1816 if (cmp != 0) { 1817 XT_PUTPAGE(mp); 1818 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1819 return -EIO; 1820 } 1821 1822 /* entry found must be last entry */ 1823 nextindex = le16_to_cpu(p->header.nextindex); 1824 if (index != nextindex - 1) { 1825 XT_PUTPAGE(mp); 1826 jfs_error(ip->i_sb, 1827 "xtTailgate: the entry found is not the last entry"); 1828 return -EIO; 1829 } 1830 1831 BT_MARK_DIRTY(mp, ip); 1832 /* 1833 * acquire tlock of the leaf page containing original entry 1834 */ 1835 if (!test_cflag(COMMIT_Nolink, ip)) { 1836 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1837 xtlck = (struct xtlock *) & tlck->lock; 1838 } 1839 1840 /* completely replace extent ? */ 1841 xad = &p->xad[index]; 1842 /* 1843 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1844 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1845 */ 1846 if ((llen = xoff - offsetXAD(xad)) == 0) 1847 goto updateOld; 1848 1849 /* 1850 * partially replace extent: insert entry for new extent 1851 */ 1852 //insertNew: 1853 /* 1854 * if the leaf page is full, insert the new entry and 1855 * propagate up the router entry for the new page from split 1856 * 1857 * The xtSplitUp() will insert the entry and unpin the leaf page. 1858 */ 1859 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1860 /* xtSpliUp() unpins leaf pages */ 1861 split.mp = mp; 1862 split.index = index + 1; 1863 split.flag = XAD_NEW; 1864 split.off = xoff; /* split offset */ 1865 split.len = xlen; 1866 split.addr = xaddr; 1867 split.pxdlist = NULL; 1868 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1869 return rc; 1870 1871 /* get back old page */ 1872 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1873 if (rc) 1874 return rc; 1875 /* 1876 * if leaf root has been split, original root has been 1877 * copied to new child page, i.e., original entry now 1878 * resides on the new child page; 1879 */ 1880 if (p->header.flag & BT_INTERNAL) { 1881 ASSERT(p->header.nextindex == 1882 cpu_to_le16(XTENTRYSTART + 1)); 1883 xad = &p->xad[XTENTRYSTART]; 1884 bn = addressXAD(xad); 1885 XT_PUTPAGE(mp); 1886 1887 /* get new child page */ 1888 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1889 if (rc) 1890 return rc; 1891 1892 BT_MARK_DIRTY(mp, ip); 1893 if (!test_cflag(COMMIT_Nolink, ip)) { 1894 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1895 xtlck = (struct xtlock *) & tlck->lock; 1896 } 1897 } 1898 } 1899 /* 1900 * insert the new entry into the leaf page 1901 */ 1902 else { 1903 /* insert the new entry: mark the entry NEW */ 1904 xad = &p->xad[index + 1]; 1905 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1906 1907 /* advance next available entry index */ 1908 p->header.nextindex = 1909 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1910 } 1911 1912 /* get back old XAD */ 1913 xad = &p->xad[index]; 1914 1915 /* 1916 * truncate/relocate old extent at split offset 1917 */ 1918 updateOld: 1919 /* update dmap for old/committed/truncated extent */ 1920 rlen = lengthXAD(xad) - llen; 1921 if (!(xad->flag & XAD_NEW)) { 1922 /* free from PWMAP at commit */ 1923 if (!test_cflag(COMMIT_Nolink, ip)) { 1924 mtlck = txMaplock(tid, ip, tlckMAP); 1925 pxdlock = (struct maplock *) & mtlck->lock; 1926 pxdlock->flag = mlckFREEPXD; 1927 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1928 PXDlength(&pxdlock->pxd, rlen); 1929 pxdlock->index = 1; 1930 } 1931 } else 1932 /* free from WMAP */ 1933 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1934 1935 if (llen) 1936 /* truncate */ 1937 XADlength(xad, llen); 1938 else 1939 /* replace */ 1940 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1941 1942 if (!test_cflag(COMMIT_Nolink, ip)) { 1943 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1944 min(index, (int)xtlck->lwm.offset) : index; 1945 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1946 xtlck->lwm.offset; 1947 } 1948 1949 /* unpin the leaf page */ 1950 XT_PUTPAGE(mp); 1951 1952 return rc; 1953 } 1954 #endif /* _NOTYET */ 1955 1956 /* 1957 * xtUpdate() 1958 * 1959 * function: update XAD; 1960 * 1961 * update extent for allocated_but_not_recorded or 1962 * compressed extent; 1963 * 1964 * parameter: 1965 * nxad - new XAD; 1966 * logical extent of the specified XAD must be completely 1967 * contained by an existing XAD; 1968 */ 1969 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1970 { /* new XAD */ 1971 int rc = 0; 1972 int cmp; 1973 struct metapage *mp; /* meta-page buffer */ 1974 xtpage_t *p; /* base B+-tree index page */ 1975 s64 bn; 1976 int index0, index, newindex, nextindex; 1977 struct btstack btstack; /* traverse stack */ 1978 struct xtsplit split; /* split information */ 1979 xad_t *xad, *lxad, *rxad; 1980 int xflag; 1981 s64 nxoff, xoff; 1982 int nxlen, xlen, lxlen, rxlen; 1983 s64 nxaddr, xaddr; 1984 struct tlock *tlck; 1985 struct xtlock *xtlck = NULL; 1986 int newpage = 0; 1987 1988 /* there must exist extent to be tailgated */ 1989 nxoff = offsetXAD(nxad); 1990 nxlen = lengthXAD(nxad); 1991 nxaddr = addressXAD(nxad); 1992 1993 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1994 return rc; 1995 1996 /* retrieve search result */ 1997 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1998 1999 if (cmp != 0) { 2000 XT_PUTPAGE(mp); 2001 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 2002 return -EIO; 2003 } 2004 2005 BT_MARK_DIRTY(mp, ip); 2006 /* 2007 * acquire tlock of the leaf page containing original entry 2008 */ 2009 if (!test_cflag(COMMIT_Nolink, ip)) { 2010 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2011 xtlck = (struct xtlock *) & tlck->lock; 2012 } 2013 2014 xad = &p->xad[index0]; 2015 xflag = xad->flag; 2016 xoff = offsetXAD(xad); 2017 xlen = lengthXAD(xad); 2018 xaddr = addressXAD(xad); 2019 2020 /* nXAD must be completely contained within XAD */ 2021 if ((xoff > nxoff) || 2022 (nxoff + nxlen > xoff + xlen)) { 2023 XT_PUTPAGE(mp); 2024 jfs_error(ip->i_sb, 2025 "xtUpdate: nXAD in not completely contained within XAD"); 2026 return -EIO; 2027 } 2028 2029 index = index0; 2030 newindex = index + 1; 2031 nextindex = le16_to_cpu(p->header.nextindex); 2032 2033 #ifdef _JFS_WIP_NOCOALESCE 2034 if (xoff < nxoff) 2035 goto updateRight; 2036 2037 /* 2038 * replace XAD with nXAD 2039 */ 2040 replace: /* (nxoff == xoff) */ 2041 if (nxlen == xlen) { 2042 /* replace XAD with nXAD:recorded */ 2043 *xad = *nxad; 2044 xad->flag = xflag & ~XAD_NOTRECORDED; 2045 2046 goto out; 2047 } else /* (nxlen < xlen) */ 2048 goto updateLeft; 2049 #endif /* _JFS_WIP_NOCOALESCE */ 2050 2051 /* #ifdef _JFS_WIP_COALESCE */ 2052 if (xoff < nxoff) 2053 goto coalesceRight; 2054 2055 /* 2056 * coalesce with left XAD 2057 */ 2058 //coalesceLeft: /* (xoff == nxoff) */ 2059 /* is XAD first entry of page ? */ 2060 if (index == XTENTRYSTART) 2061 goto replace; 2062 2063 /* is nXAD logically and physically contiguous with lXAD ? */ 2064 lxad = &p->xad[index - 1]; 2065 lxlen = lengthXAD(lxad); 2066 if (!(lxad->flag & XAD_NOTRECORDED) && 2067 (nxoff == offsetXAD(lxad) + lxlen) && 2068 (nxaddr == addressXAD(lxad) + lxlen) && 2069 (lxlen + nxlen < MAXXLEN)) { 2070 /* extend right lXAD */ 2071 index0 = index - 1; 2072 XADlength(lxad, lxlen + nxlen); 2073 2074 /* If we just merged two extents together, need to make sure the 2075 * right extent gets logged. If the left one is marked XAD_NEW, 2076 * then we know it will be logged. Otherwise, mark as 2077 * XAD_EXTENDED 2078 */ 2079 if (!(lxad->flag & XAD_NEW)) 2080 lxad->flag |= XAD_EXTENDED; 2081 2082 if (xlen > nxlen) { 2083 /* truncate XAD */ 2084 XADoffset(xad, xoff + nxlen); 2085 XADlength(xad, xlen - nxlen); 2086 XADaddress(xad, xaddr + nxlen); 2087 goto out; 2088 } else { /* (xlen == nxlen) */ 2089 2090 /* remove XAD */ 2091 if (index < nextindex - 1) 2092 memmove(&p->xad[index], &p->xad[index + 1], 2093 (nextindex - index - 2094 1) << L2XTSLOTSIZE); 2095 2096 p->header.nextindex = 2097 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2098 1); 2099 2100 index = index0; 2101 newindex = index + 1; 2102 nextindex = le16_to_cpu(p->header.nextindex); 2103 xoff = nxoff = offsetXAD(lxad); 2104 xlen = nxlen = lxlen + nxlen; 2105 xaddr = nxaddr = addressXAD(lxad); 2106 goto coalesceRight; 2107 } 2108 } 2109 2110 /* 2111 * replace XAD with nXAD 2112 */ 2113 replace: /* (nxoff == xoff) */ 2114 if (nxlen == xlen) { 2115 /* replace XAD with nXAD:recorded */ 2116 *xad = *nxad; 2117 xad->flag = xflag & ~XAD_NOTRECORDED; 2118 2119 goto coalesceRight; 2120 } else /* (nxlen < xlen) */ 2121 goto updateLeft; 2122 2123 /* 2124 * coalesce with right XAD 2125 */ 2126 coalesceRight: /* (xoff <= nxoff) */ 2127 /* is XAD last entry of page ? */ 2128 if (newindex == nextindex) { 2129 if (xoff == nxoff) 2130 goto out; 2131 goto updateRight; 2132 } 2133 2134 /* is nXAD logically and physically contiguous with rXAD ? */ 2135 rxad = &p->xad[index + 1]; 2136 rxlen = lengthXAD(rxad); 2137 if (!(rxad->flag & XAD_NOTRECORDED) && 2138 (nxoff + nxlen == offsetXAD(rxad)) && 2139 (nxaddr + nxlen == addressXAD(rxad)) && 2140 (rxlen + nxlen < MAXXLEN)) { 2141 /* extend left rXAD */ 2142 XADoffset(rxad, nxoff); 2143 XADlength(rxad, rxlen + nxlen); 2144 XADaddress(rxad, nxaddr); 2145 2146 /* If we just merged two extents together, need to make sure 2147 * the left extent gets logged. If the right one is marked 2148 * XAD_NEW, then we know it will be logged. Otherwise, mark as 2149 * XAD_EXTENDED 2150 */ 2151 if (!(rxad->flag & XAD_NEW)) 2152 rxad->flag |= XAD_EXTENDED; 2153 2154 if (xlen > nxlen) 2155 /* truncate XAD */ 2156 XADlength(xad, xlen - nxlen); 2157 else { /* (xlen == nxlen) */ 2158 2159 /* remove XAD */ 2160 memmove(&p->xad[index], &p->xad[index + 1], 2161 (nextindex - index - 1) << L2XTSLOTSIZE); 2162 2163 p->header.nextindex = 2164 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2165 1); 2166 } 2167 2168 goto out; 2169 } else if (xoff == nxoff) 2170 goto out; 2171 2172 if (xoff >= nxoff) { 2173 XT_PUTPAGE(mp); 2174 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 2175 return -EIO; 2176 } 2177 /* #endif _JFS_WIP_COALESCE */ 2178 2179 /* 2180 * split XAD into (lXAD, nXAD): 2181 * 2182 * |---nXAD---> 2183 * --|----------XAD----------|-- 2184 * |-lXAD-| 2185 */ 2186 updateRight: /* (xoff < nxoff) */ 2187 /* truncate old XAD as lXAD:not_recorded */ 2188 xad = &p->xad[index]; 2189 XADlength(xad, nxoff - xoff); 2190 2191 /* insert nXAD:recorded */ 2192 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2193 2194 /* xtSpliUp() unpins leaf pages */ 2195 split.mp = mp; 2196 split.index = newindex; 2197 split.flag = xflag & ~XAD_NOTRECORDED; 2198 split.off = nxoff; 2199 split.len = nxlen; 2200 split.addr = nxaddr; 2201 split.pxdlist = NULL; 2202 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2203 return rc; 2204 2205 /* get back old page */ 2206 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2207 if (rc) 2208 return rc; 2209 /* 2210 * if leaf root has been split, original root has been 2211 * copied to new child page, i.e., original entry now 2212 * resides on the new child page; 2213 */ 2214 if (p->header.flag & BT_INTERNAL) { 2215 ASSERT(p->header.nextindex == 2216 cpu_to_le16(XTENTRYSTART + 1)); 2217 xad = &p->xad[XTENTRYSTART]; 2218 bn = addressXAD(xad); 2219 XT_PUTPAGE(mp); 2220 2221 /* get new child page */ 2222 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2223 if (rc) 2224 return rc; 2225 2226 BT_MARK_DIRTY(mp, ip); 2227 if (!test_cflag(COMMIT_Nolink, ip)) { 2228 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2229 xtlck = (struct xtlock *) & tlck->lock; 2230 } 2231 } else { 2232 /* is nXAD on new page ? */ 2233 if (newindex > 2234 (le16_to_cpu(p->header.maxentry) >> 1)) { 2235 newindex = 2236 newindex - 2237 le16_to_cpu(p->header.nextindex) + 2238 XTENTRYSTART; 2239 newpage = 1; 2240 } 2241 } 2242 } else { 2243 /* if insert into middle, shift right remaining entries */ 2244 if (newindex < nextindex) 2245 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2246 (nextindex - newindex) << L2XTSLOTSIZE); 2247 2248 /* insert the entry */ 2249 xad = &p->xad[newindex]; 2250 *xad = *nxad; 2251 xad->flag = xflag & ~XAD_NOTRECORDED; 2252 2253 /* advance next available entry index. */ 2254 p->header.nextindex = 2255 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2256 } 2257 2258 /* 2259 * does nXAD force 3-way split ? 2260 * 2261 * |---nXAD--->| 2262 * --|----------XAD-------------|-- 2263 * |-lXAD-| |-rXAD -| 2264 */ 2265 if (nxoff + nxlen == xoff + xlen) 2266 goto out; 2267 2268 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2269 if (newpage) { 2270 /* close out old page */ 2271 if (!test_cflag(COMMIT_Nolink, ip)) { 2272 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2273 min(index0, (int)xtlck->lwm.offset) : index0; 2274 xtlck->lwm.length = 2275 le16_to_cpu(p->header.nextindex) - 2276 xtlck->lwm.offset; 2277 } 2278 2279 bn = le64_to_cpu(p->header.next); 2280 XT_PUTPAGE(mp); 2281 2282 /* get new right page */ 2283 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2284 if (rc) 2285 return rc; 2286 2287 BT_MARK_DIRTY(mp, ip); 2288 if (!test_cflag(COMMIT_Nolink, ip)) { 2289 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2290 xtlck = (struct xtlock *) & tlck->lock; 2291 } 2292 2293 index0 = index = newindex; 2294 } else 2295 index++; 2296 2297 newindex = index + 1; 2298 nextindex = le16_to_cpu(p->header.nextindex); 2299 xlen = xlen - (nxoff - xoff); 2300 xoff = nxoff; 2301 xaddr = nxaddr; 2302 2303 /* recompute split pages */ 2304 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2305 XT_PUTPAGE(mp); 2306 2307 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2308 return rc; 2309 2310 /* retrieve search result */ 2311 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2312 2313 if (cmp != 0) { 2314 XT_PUTPAGE(mp); 2315 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2316 return -EIO; 2317 } 2318 2319 if (index0 != index) { 2320 XT_PUTPAGE(mp); 2321 jfs_error(ip->i_sb, 2322 "xtUpdate: unexpected value of index"); 2323 return -EIO; 2324 } 2325 } 2326 2327 /* 2328 * split XAD into (nXAD, rXAD) 2329 * 2330 * ---nXAD---| 2331 * --|----------XAD----------|-- 2332 * |-rXAD-| 2333 */ 2334 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2335 /* update old XAD with nXAD:recorded */ 2336 xad = &p->xad[index]; 2337 *xad = *nxad; 2338 xad->flag = xflag & ~XAD_NOTRECORDED; 2339 2340 /* insert rXAD:not_recorded */ 2341 xoff = xoff + nxlen; 2342 xlen = xlen - nxlen; 2343 xaddr = xaddr + nxlen; 2344 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2345 /* 2346 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2347 */ 2348 /* xtSpliUp() unpins leaf pages */ 2349 split.mp = mp; 2350 split.index = newindex; 2351 split.flag = xflag; 2352 split.off = xoff; 2353 split.len = xlen; 2354 split.addr = xaddr; 2355 split.pxdlist = NULL; 2356 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2357 return rc; 2358 2359 /* get back old page */ 2360 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2361 if (rc) 2362 return rc; 2363 2364 /* 2365 * if leaf root has been split, original root has been 2366 * copied to new child page, i.e., original entry now 2367 * resides on the new child page; 2368 */ 2369 if (p->header.flag & BT_INTERNAL) { 2370 ASSERT(p->header.nextindex == 2371 cpu_to_le16(XTENTRYSTART + 1)); 2372 xad = &p->xad[XTENTRYSTART]; 2373 bn = addressXAD(xad); 2374 XT_PUTPAGE(mp); 2375 2376 /* get new child page */ 2377 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2378 if (rc) 2379 return rc; 2380 2381 BT_MARK_DIRTY(mp, ip); 2382 if (!test_cflag(COMMIT_Nolink, ip)) { 2383 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2384 xtlck = (struct xtlock *) & tlck->lock; 2385 } 2386 } 2387 } else { 2388 /* if insert into middle, shift right remaining entries */ 2389 if (newindex < nextindex) 2390 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2391 (nextindex - newindex) << L2XTSLOTSIZE); 2392 2393 /* insert the entry */ 2394 xad = &p->xad[newindex]; 2395 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2396 2397 /* advance next available entry index. */ 2398 p->header.nextindex = 2399 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2400 } 2401 2402 out: 2403 if (!test_cflag(COMMIT_Nolink, ip)) { 2404 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2405 min(index0, (int)xtlck->lwm.offset) : index0; 2406 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2407 xtlck->lwm.offset; 2408 } 2409 2410 /* unpin the leaf page */ 2411 XT_PUTPAGE(mp); 2412 2413 return rc; 2414 } 2415 2416 2417 /* 2418 * xtAppend() 2419 * 2420 * function: grow in append mode from contiguous region specified ; 2421 * 2422 * parameter: 2423 * tid - transaction id; 2424 * ip - file object; 2425 * xflag - extent flag: 2426 * xoff - extent offset; 2427 * maxblocks - max extent length; 2428 * xlen - extent length (in/out); 2429 * xaddrp - extent address pointer (in/out): 2430 * flag - 2431 * 2432 * return: 2433 */ 2434 int xtAppend(tid_t tid, /* transaction id */ 2435 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2436 s32 * xlenp, /* (in/out) */ 2437 s64 * xaddrp, /* (in/out) */ 2438 int flag) 2439 { 2440 int rc = 0; 2441 struct metapage *mp; /* meta-page buffer */ 2442 xtpage_t *p; /* base B+-tree index page */ 2443 s64 bn, xaddr; 2444 int index, nextindex; 2445 struct btstack btstack; /* traverse stack */ 2446 struct xtsplit split; /* split information */ 2447 xad_t *xad; 2448 int cmp; 2449 struct tlock *tlck; 2450 struct xtlock *xtlck; 2451 int nsplit, nblocks, xlen; 2452 struct pxdlist pxdlist; 2453 pxd_t *pxd; 2454 s64 next; 2455 2456 xaddr = *xaddrp; 2457 xlen = *xlenp; 2458 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2459 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2460 2461 /* 2462 * search for the entry location at which to insert: 2463 * 2464 * xtFastSearch() and xtSearch() both returns (leaf page 2465 * pinned, index at which to insert). 2466 * n.b. xtSearch() may return index of maxentry of 2467 * the full page. 2468 */ 2469 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2470 return rc; 2471 2472 /* retrieve search result */ 2473 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2474 2475 if (cmp == 0) { 2476 rc = -EEXIST; 2477 goto out; 2478 } 2479 2480 if (next) 2481 xlen = min(xlen, (int)(next - xoff)); 2482 //insert: 2483 /* 2484 * insert entry for new extent 2485 */ 2486 xflag |= XAD_NEW; 2487 2488 /* 2489 * if the leaf page is full, split the page and 2490 * propagate up the router entry for the new page from split 2491 * 2492 * The xtSplitUp() will insert the entry and unpin the leaf page. 2493 */ 2494 nextindex = le16_to_cpu(p->header.nextindex); 2495 if (nextindex < le16_to_cpu(p->header.maxentry)) 2496 goto insertLeaf; 2497 2498 /* 2499 * allocate new index blocks to cover index page split(s) 2500 */ 2501 nsplit = btstack.nsplit; 2502 split.pxdlist = &pxdlist; 2503 pxdlist.maxnpxd = pxdlist.npxd = 0; 2504 pxd = &pxdlist.pxd[0]; 2505 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2506 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2507 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2508 PXDaddress(pxd, xaddr); 2509 PXDlength(pxd, nblocks); 2510 2511 pxdlist.maxnpxd++; 2512 2513 continue; 2514 } 2515 2516 /* undo allocation */ 2517 2518 goto out; 2519 } 2520 2521 xlen = min(xlen, maxblocks); 2522 2523 /* 2524 * allocate data extent requested 2525 */ 2526 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2527 goto out; 2528 2529 split.mp = mp; 2530 split.index = index; 2531 split.flag = xflag; 2532 split.off = xoff; 2533 split.len = xlen; 2534 split.addr = xaddr; 2535 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2536 /* undo data extent allocation */ 2537 dbFree(ip, *xaddrp, (s64) * xlenp); 2538 2539 return rc; 2540 } 2541 2542 *xaddrp = xaddr; 2543 *xlenp = xlen; 2544 return 0; 2545 2546 /* 2547 * insert the new entry into the leaf page 2548 */ 2549 insertLeaf: 2550 /* 2551 * allocate data extent requested 2552 */ 2553 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2554 goto out; 2555 2556 BT_MARK_DIRTY(mp, ip); 2557 /* 2558 * acquire a transaction lock on the leaf page; 2559 * 2560 * action: xad insertion/extension; 2561 */ 2562 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2563 xtlck = (struct xtlock *) & tlck->lock; 2564 2565 /* insert the new entry: mark the entry NEW */ 2566 xad = &p->xad[index]; 2567 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2568 2569 /* advance next available entry index */ 2570 p->header.nextindex = 2571 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2572 2573 xtlck->lwm.offset = 2574 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2575 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2576 xtlck->lwm.offset; 2577 2578 *xaddrp = xaddr; 2579 *xlenp = xlen; 2580 2581 out: 2582 /* unpin the leaf page */ 2583 XT_PUTPAGE(mp); 2584 2585 return rc; 2586 } 2587 #ifdef _STILL_TO_PORT 2588 2589 /* - TBD for defragmentaion/reorganization - 2590 * 2591 * xtDelete() 2592 * 2593 * function: 2594 * delete the entry with the specified key. 2595 * 2596 * N.B.: whole extent of the entry is assumed to be deleted. 2597 * 2598 * parameter: 2599 * 2600 * return: 2601 * ENOENT: if the entry is not found. 2602 * 2603 * exception: 2604 */ 2605 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2606 { 2607 int rc = 0; 2608 struct btstack btstack; 2609 int cmp; 2610 s64 bn; 2611 struct metapage *mp; 2612 xtpage_t *p; 2613 int index, nextindex; 2614 struct tlock *tlck; 2615 struct xtlock *xtlck; 2616 2617 /* 2618 * find the matching entry; xtSearch() pins the page 2619 */ 2620 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2621 return rc; 2622 2623 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2624 if (cmp) { 2625 /* unpin the leaf page */ 2626 XT_PUTPAGE(mp); 2627 return -ENOENT; 2628 } 2629 2630 /* 2631 * delete the entry from the leaf page 2632 */ 2633 nextindex = le16_to_cpu(p->header.nextindex); 2634 p->header.nextindex = 2635 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); 2636 2637 /* 2638 * if the leaf page bocome empty, free the page 2639 */ 2640 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2641 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2642 2643 BT_MARK_DIRTY(mp, ip); 2644 /* 2645 * acquire a transaction lock on the leaf page; 2646 * 2647 * action:xad deletion; 2648 */ 2649 tlck = txLock(tid, ip, mp, tlckXTREE); 2650 xtlck = (struct xtlock *) & tlck->lock; 2651 xtlck->lwm.offset = 2652 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2653 2654 /* if delete from middle, shift left/compact the remaining entries */ 2655 if (index < nextindex - 1) 2656 memmove(&p->xad[index], &p->xad[index + 1], 2657 (nextindex - index - 1) * sizeof(xad_t)); 2658 2659 XT_PUTPAGE(mp); 2660 2661 return 0; 2662 } 2663 2664 2665 /* - TBD for defragmentaion/reorganization - 2666 * 2667 * xtDeleteUp() 2668 * 2669 * function: 2670 * free empty pages as propagating deletion up the tree 2671 * 2672 * parameter: 2673 * 2674 * return: 2675 */ 2676 static int 2677 xtDeleteUp(tid_t tid, struct inode *ip, 2678 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2679 { 2680 int rc = 0; 2681 struct metapage *mp; 2682 xtpage_t *p; 2683 int index, nextindex; 2684 s64 xaddr; 2685 int xlen; 2686 struct btframe *parent; 2687 struct tlock *tlck; 2688 struct xtlock *xtlck; 2689 2690 /* 2691 * keep root leaf page which has become empty 2692 */ 2693 if (fp->header.flag & BT_ROOT) { 2694 /* keep the root page */ 2695 fp->header.flag &= ~BT_INTERNAL; 2696 fp->header.flag |= BT_LEAF; 2697 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2698 2699 /* XT_PUTPAGE(fmp); */ 2700 2701 return 0; 2702 } 2703 2704 /* 2705 * free non-root leaf page 2706 */ 2707 if ((rc = xtRelink(tid, ip, fp))) { 2708 XT_PUTPAGE(fmp); 2709 return rc; 2710 } 2711 2712 xaddr = addressPXD(&fp->header.self); 2713 xlen = lengthPXD(&fp->header.self); 2714 /* free the page extent */ 2715 dbFree(ip, xaddr, (s64) xlen); 2716 2717 /* free the buffer page */ 2718 discard_metapage(fmp); 2719 2720 /* 2721 * propagate page deletion up the index tree 2722 * 2723 * If the delete from the parent page makes it empty, 2724 * continue all the way up the tree. 2725 * stop if the root page is reached (which is never deleted) or 2726 * if the entry deletion does not empty the page. 2727 */ 2728 while ((parent = BT_POP(btstack)) != NULL) { 2729 /* get/pin the parent page <sp> */ 2730 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2731 if (rc) 2732 return rc; 2733 2734 index = parent->index; 2735 2736 /* delete the entry for the freed child page from parent. 2737 */ 2738 nextindex = le16_to_cpu(p->header.nextindex); 2739 2740 /* 2741 * the parent has the single entry being deleted: 2742 * free the parent page which has become empty. 2743 */ 2744 if (nextindex == 1) { 2745 if (p->header.flag & BT_ROOT) { 2746 /* keep the root page */ 2747 p->header.flag &= ~BT_INTERNAL; 2748 p->header.flag |= BT_LEAF; 2749 p->header.nextindex = 2750 cpu_to_le16(XTENTRYSTART); 2751 2752 /* XT_PUTPAGE(mp); */ 2753 2754 break; 2755 } else { 2756 /* free the parent page */ 2757 if ((rc = xtRelink(tid, ip, p))) 2758 return rc; 2759 2760 xaddr = addressPXD(&p->header.self); 2761 /* free the page extent */ 2762 dbFree(ip, xaddr, 2763 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2764 2765 /* unpin/free the buffer page */ 2766 discard_metapage(mp); 2767 2768 /* propagate up */ 2769 continue; 2770 } 2771 } 2772 /* 2773 * the parent has other entries remaining: 2774 * delete the router entry from the parent page. 2775 */ 2776 else { 2777 BT_MARK_DIRTY(mp, ip); 2778 /* 2779 * acquire a transaction lock on the leaf page; 2780 * 2781 * action:xad deletion; 2782 */ 2783 tlck = txLock(tid, ip, mp, tlckXTREE); 2784 xtlck = (struct xtlock *) & tlck->lock; 2785 xtlck->lwm.offset = 2786 (xtlck->lwm.offset) ? min(index, 2787 xtlck->lwm. 2788 offset) : index; 2789 2790 /* if delete from middle, 2791 * shift left/compact the remaining entries in the page 2792 */ 2793 if (index < nextindex - 1) 2794 memmove(&p->xad[index], &p->xad[index + 1], 2795 (nextindex - index - 2796 1) << L2XTSLOTSIZE); 2797 2798 p->header.nextindex = 2799 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 2800 1); 2801 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2802 (ulong) parent->bn, index); 2803 } 2804 2805 /* unpin the parent page */ 2806 XT_PUTPAGE(mp); 2807 2808 /* exit propagation up */ 2809 break; 2810 } 2811 2812 return 0; 2813 } 2814 2815 2816 /* 2817 * NAME: xtRelocate() 2818 * 2819 * FUNCTION: relocate xtpage or data extent of regular file; 2820 * This function is mainly used by defragfs utility. 2821 * 2822 * NOTE: This routine does not have the logic to handle 2823 * uncommitted allocated extent. The caller should call 2824 * txCommit() to commit all the allocation before call 2825 * this routine. 2826 */ 2827 int 2828 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2829 s64 nxaddr, /* new xaddr */ 2830 int xtype) 2831 { /* extent type: XTPAGE or DATAEXT */ 2832 int rc = 0; 2833 struct tblock *tblk; 2834 struct tlock *tlck; 2835 struct xtlock *xtlck; 2836 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2837 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2838 xad_t *xad; 2839 pxd_t *pxd; 2840 s64 xoff, xsize; 2841 int xlen; 2842 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2843 cbuf_t *cp; 2844 s64 offset, nbytes, nbrd, pno; 2845 int nb, npages, nblks; 2846 s64 bn; 2847 int cmp; 2848 int index; 2849 struct pxd_lock *pxdlock; 2850 struct btstack btstack; /* traverse stack */ 2851 2852 xtype = xtype & EXTENT_TYPE; 2853 2854 xoff = offsetXAD(oxad); 2855 oxaddr = addressXAD(oxad); 2856 xlen = lengthXAD(oxad); 2857 2858 /* validate extent offset */ 2859 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2860 if (offset >= ip->i_size) 2861 return -ESTALE; /* stale extent */ 2862 2863 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2864 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2865 2866 /* 2867 * 1. get and validate the parent xtpage/xad entry 2868 * covering the source extent to be relocated; 2869 */ 2870 if (xtype == DATAEXT) { 2871 /* search in leaf entry */ 2872 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2873 if (rc) 2874 return rc; 2875 2876 /* retrieve search result */ 2877 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2878 2879 if (cmp) { 2880 XT_PUTPAGE(pmp); 2881 return -ESTALE; 2882 } 2883 2884 /* validate for exact match with a single entry */ 2885 xad = &pp->xad[index]; 2886 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2887 XT_PUTPAGE(pmp); 2888 return -ESTALE; 2889 } 2890 } else { /* (xtype == XTPAGE) */ 2891 2892 /* search in internal entry */ 2893 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2894 if (rc) 2895 return rc; 2896 2897 /* retrieve search result */ 2898 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2899 2900 if (cmp) { 2901 XT_PUTPAGE(pmp); 2902 return -ESTALE; 2903 } 2904 2905 /* xtSearchNode() validated for exact match with a single entry 2906 */ 2907 xad = &pp->xad[index]; 2908 } 2909 jfs_info("xtRelocate: parent xad entry validated."); 2910 2911 /* 2912 * 2. relocate the extent 2913 */ 2914 if (xtype == DATAEXT) { 2915 /* if the extent is allocated-but-not-recorded 2916 * there is no real data to be moved in this extent, 2917 */ 2918 if (xad->flag & XAD_NOTRECORDED) 2919 goto out; 2920 else 2921 /* release xtpage for cmRead()/xtLookup() */ 2922 XT_PUTPAGE(pmp); 2923 2924 /* 2925 * cmRelocate() 2926 * 2927 * copy target data pages to be relocated; 2928 * 2929 * data extent must start at page boundary and 2930 * multiple of page size (except the last data extent); 2931 * read in each page of the source data extent into cbuf, 2932 * update the cbuf extent descriptor of the page to be 2933 * homeward bound to new dst data extent 2934 * copy the data from the old extent to new extent. 2935 * copy is essential for compressed files to avoid problems 2936 * that can arise if there was a change in compression 2937 * algorithms. 2938 * it is a good strategy because it may disrupt cache 2939 * policy to keep the pages in memory afterwards. 2940 */ 2941 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2942 assert((offset & CM_OFFSET) == 0); 2943 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2944 pno = offset >> CM_L2BSIZE; 2945 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2946 /* 2947 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2948 (offset >> CM_L2BSIZE) + 1; 2949 */ 2950 sxaddr = oxaddr; 2951 dxaddr = nxaddr; 2952 2953 /* process the request one cache buffer at a time */ 2954 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2955 offset += nb, pno++, npages--) { 2956 /* compute page size */ 2957 nb = min(nbytes - nbrd, CM_BSIZE); 2958 2959 /* get the cache buffer of the page */ 2960 if (rc = cmRead(ip, offset, npages, &cp)) 2961 break; 2962 2963 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2964 assert(!cp->cm_modified); 2965 2966 /* bind buffer with the new extent address */ 2967 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2968 cmSetXD(ip, cp, pno, dxaddr, nblks); 2969 2970 /* release the cbuf, mark it as modified */ 2971 cmPut(cp, true); 2972 2973 dxaddr += nblks; 2974 sxaddr += nblks; 2975 } 2976 2977 /* get back parent page */ 2978 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2979 return rc; 2980 2981 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2982 jfs_info("xtRelocate: target data extent relocated."); 2983 } else { /* (xtype == XTPAGE) */ 2984 2985 /* 2986 * read in the target xtpage from the source extent; 2987 */ 2988 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2989 if (rc) { 2990 XT_PUTPAGE(pmp); 2991 return rc; 2992 } 2993 2994 /* 2995 * read in sibling pages if any to update sibling pointers; 2996 */ 2997 rmp = NULL; 2998 if (p->header.next) { 2999 nextbn = le64_to_cpu(p->header.next); 3000 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 3001 if (rc) { 3002 XT_PUTPAGE(pmp); 3003 XT_PUTPAGE(mp); 3004 return (rc); 3005 } 3006 } 3007 3008 lmp = NULL; 3009 if (p->header.prev) { 3010 prevbn = le64_to_cpu(p->header.prev); 3011 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 3012 if (rc) { 3013 XT_PUTPAGE(pmp); 3014 XT_PUTPAGE(mp); 3015 if (rmp) 3016 XT_PUTPAGE(rmp); 3017 return (rc); 3018 } 3019 } 3020 3021 /* at this point, all xtpages to be updated are in memory */ 3022 3023 /* 3024 * update sibling pointers of sibling xtpages if any; 3025 */ 3026 if (lmp) { 3027 BT_MARK_DIRTY(lmp, ip); 3028 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 3029 lp->header.next = cpu_to_le64(nxaddr); 3030 XT_PUTPAGE(lmp); 3031 } 3032 3033 if (rmp) { 3034 BT_MARK_DIRTY(rmp, ip); 3035 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 3036 rp->header.prev = cpu_to_le64(nxaddr); 3037 XT_PUTPAGE(rmp); 3038 } 3039 3040 /* 3041 * update the target xtpage to be relocated 3042 * 3043 * update the self address of the target page 3044 * and write to destination extent; 3045 * redo image covers the whole xtpage since it is new page 3046 * to the destination extent; 3047 * update of bmap for the free of source extent 3048 * of the target xtpage itself: 3049 * update of bmap for the allocation of destination extent 3050 * of the target xtpage itself: 3051 * update of bmap for the extents covered by xad entries in 3052 * the target xtpage is not necessary since they are not 3053 * updated; 3054 * if not committed before this relocation, 3055 * target page may contain XAD_NEW entries which must 3056 * be scanned for bmap update (logredo() always 3057 * scan xtpage REDOPAGE image for bmap update); 3058 * if committed before this relocation (tlckRELOCATE), 3059 * scan may be skipped by commit() and logredo(); 3060 */ 3061 BT_MARK_DIRTY(mp, ip); 3062 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 3063 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 3064 xtlck = (struct xtlock *) & tlck->lock; 3065 3066 /* update the self address in the xtpage header */ 3067 pxd = &p->header.self; 3068 PXDaddress(pxd, nxaddr); 3069 3070 /* linelock for the after image of the whole page */ 3071 xtlck->lwm.length = 3072 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 3073 3074 /* update the buffer extent descriptor of target xtpage */ 3075 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 3076 bmSetXD(mp, nxaddr, xsize); 3077 3078 /* unpin the target page to new homeward bound */ 3079 XT_PUTPAGE(mp); 3080 jfs_info("xtRelocate: target xtpage relocated."); 3081 } 3082 3083 /* 3084 * 3. acquire maplock for the source extent to be freed; 3085 * 3086 * acquire a maplock saving the src relocated extent address; 3087 * to free of the extent at commit time; 3088 */ 3089 out: 3090 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 3091 * free PXD of the source data extent (logredo() will update 3092 * bmap for free of source data extent), and update bmap for 3093 * free of the source data extent; 3094 */ 3095 if (xtype == DATAEXT) 3096 tlck = txMaplock(tid, ip, tlckMAP); 3097 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 3098 * for the source xtpage (logredo() will init NoRedoPage 3099 * filter and will also update bmap for free of the source 3100 * xtpage), and update bmap for free of the source xtpage; 3101 * N.B. We use tlckMAP instead of tlkcXTREE because there 3102 * is no buffer associated with this lock since the buffer 3103 * has been redirected to the target location. 3104 */ 3105 else /* (xtype == XTPAGE) */ 3106 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 3107 3108 pxdlock = (struct pxd_lock *) & tlck->lock; 3109 pxdlock->flag = mlckFREEPXD; 3110 PXDaddress(&pxdlock->pxd, oxaddr); 3111 PXDlength(&pxdlock->pxd, xlen); 3112 pxdlock->index = 1; 3113 3114 /* 3115 * 4. update the parent xad entry for relocation; 3116 * 3117 * acquire tlck for the parent entry with XAD_NEW as entry 3118 * update which will write LOG_REDOPAGE and update bmap for 3119 * allocation of XAD_NEW destination extent; 3120 */ 3121 jfs_info("xtRelocate: update parent xad entry."); 3122 BT_MARK_DIRTY(pmp, ip); 3123 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 3124 xtlck = (struct xtlock *) & tlck->lock; 3125 3126 /* update the XAD with the new destination extent; */ 3127 xad = &pp->xad[index]; 3128 xad->flag |= XAD_NEW; 3129 XADaddress(xad, nxaddr); 3130 3131 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 3132 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 3133 xtlck->lwm.offset; 3134 3135 /* unpin the parent xtpage */ 3136 XT_PUTPAGE(pmp); 3137 3138 return rc; 3139 } 3140 3141 3142 /* 3143 * xtSearchNode() 3144 * 3145 * function: search for the internal xad entry covering specified extent. 3146 * This function is mainly used by defragfs utility. 3147 * 3148 * parameters: 3149 * ip - file object; 3150 * xad - extent to find; 3151 * cmpp - comparison result: 3152 * btstack - traverse stack; 3153 * flag - search process flag; 3154 * 3155 * returns: 3156 * btstack contains (bn, index) of search path traversed to the entry. 3157 * *cmpp is set to result of comparison with the entry returned. 3158 * the page containing the entry is pinned at exit. 3159 */ 3160 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 3161 int *cmpp, struct btstack * btstack, int flag) 3162 { 3163 int rc = 0; 3164 s64 xoff, xaddr; 3165 int xlen; 3166 int cmp = 1; /* init for empty page */ 3167 s64 bn; /* block number */ 3168 struct metapage *mp; /* meta-page buffer */ 3169 xtpage_t *p; /* page */ 3170 int base, index, lim; 3171 struct btframe *btsp; 3172 s64 t64; 3173 3174 BT_CLR(btstack); 3175 3176 xoff = offsetXAD(xad); 3177 xlen = lengthXAD(xad); 3178 xaddr = addressXAD(xad); 3179 3180 /* 3181 * search down tree from root: 3182 * 3183 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 3184 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 3185 * 3186 * if entry with search key K is not found 3187 * internal page search find the entry with largest key Ki 3188 * less than K which point to the child page to search; 3189 * leaf page search find the entry with smallest key Kj 3190 * greater than K so that the returned index is the position of 3191 * the entry to be shifted right for insertion of new entry. 3192 * for empty tree, search key is greater than any key of the tree. 3193 * 3194 * by convention, root bn = 0. 3195 */ 3196 for (bn = 0;;) { 3197 /* get/pin the page to search */ 3198 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3199 if (rc) 3200 return rc; 3201 if (p->header.flag & BT_LEAF) { 3202 XT_PUTPAGE(mp); 3203 return -ESTALE; 3204 } 3205 3206 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3207 3208 /* 3209 * binary search with search key K on the current page 3210 */ 3211 for (base = XTENTRYSTART; lim; lim >>= 1) { 3212 index = base + (lim >> 1); 3213 3214 XT_CMP(cmp, xoff, &p->xad[index], t64); 3215 if (cmp == 0) { 3216 /* 3217 * search hit 3218 * 3219 * verify for exact match; 3220 */ 3221 if (xaddr == addressXAD(&p->xad[index]) && 3222 xoff == offsetXAD(&p->xad[index])) { 3223 *cmpp = cmp; 3224 3225 /* save search result */ 3226 btsp = btstack->top; 3227 btsp->bn = bn; 3228 btsp->index = index; 3229 btsp->mp = mp; 3230 3231 return 0; 3232 } 3233 3234 /* descend/search its child page */ 3235 goto next; 3236 } 3237 3238 if (cmp > 0) { 3239 base = index + 1; 3240 --lim; 3241 } 3242 } 3243 3244 /* 3245 * search miss - non-leaf page: 3246 * 3247 * base is the smallest index with key (Kj) greater than 3248 * search key (K) and may be zero or maxentry index. 3249 * if base is non-zero, decrement base by one to get the parent 3250 * entry of the child page to search. 3251 */ 3252 index = base ? base - 1 : base; 3253 3254 /* 3255 * go down to child page 3256 */ 3257 next: 3258 /* get the child page block number */ 3259 bn = addressXAD(&p->xad[index]); 3260 3261 /* unpin the parent page */ 3262 XT_PUTPAGE(mp); 3263 } 3264 } 3265 3266 3267 /* 3268 * xtRelink() 3269 * 3270 * function: 3271 * link around a freed page. 3272 * 3273 * Parameter: 3274 * int tid, 3275 * struct inode *ip, 3276 * xtpage_t *p) 3277 * 3278 * returns: 3279 */ 3280 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3281 { 3282 int rc = 0; 3283 struct metapage *mp; 3284 s64 nextbn, prevbn; 3285 struct tlock *tlck; 3286 3287 nextbn = le64_to_cpu(p->header.next); 3288 prevbn = le64_to_cpu(p->header.prev); 3289 3290 /* update prev pointer of the next page */ 3291 if (nextbn != 0) { 3292 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3293 if (rc) 3294 return rc; 3295 3296 /* 3297 * acquire a transaction lock on the page; 3298 * 3299 * action: update prev pointer; 3300 */ 3301 BT_MARK_DIRTY(mp, ip); 3302 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3303 3304 /* the page may already have been tlock'd */ 3305 3306 p->header.prev = cpu_to_le64(prevbn); 3307 3308 XT_PUTPAGE(mp); 3309 } 3310 3311 /* update next pointer of the previous page */ 3312 if (prevbn != 0) { 3313 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3314 if (rc) 3315 return rc; 3316 3317 /* 3318 * acquire a transaction lock on the page; 3319 * 3320 * action: update next pointer; 3321 */ 3322 BT_MARK_DIRTY(mp, ip); 3323 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3324 3325 /* the page may already have been tlock'd */ 3326 3327 p->header.next = le64_to_cpu(nextbn); 3328 3329 XT_PUTPAGE(mp); 3330 } 3331 3332 return 0; 3333 } 3334 #endif /* _STILL_TO_PORT */ 3335 3336 3337 /* 3338 * xtInitRoot() 3339 * 3340 * initialize file root (inline in inode) 3341 */ 3342 void xtInitRoot(tid_t tid, struct inode *ip) 3343 { 3344 xtpage_t *p; 3345 3346 /* 3347 * acquire a transaction lock on the root 3348 * 3349 * action: 3350 */ 3351 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3352 tlckXTREE | tlckNEW); 3353 p = &JFS_IP(ip)->i_xtroot; 3354 3355 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3356 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3357 3358 if (S_ISDIR(ip->i_mode)) 3359 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3360 else { 3361 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3362 ip->i_size = 0; 3363 } 3364 3365 3366 return; 3367 } 3368 3369 3370 /* 3371 * We can run into a deadlock truncating a file with a large number of 3372 * xtree pages (large fragmented file). A robust fix would entail a 3373 * reservation system where we would reserve a number of metadata pages 3374 * and tlocks which we would be guaranteed without a deadlock. Without 3375 * this, a partial fix is to limit number of metadata pages we will lock 3376 * in a single transaction. Currently we will truncate the file so that 3377 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3378 * will be responsible for ensuring that the current transaction gets 3379 * committed, and that subsequent transactions are created to truncate 3380 * the file further if needed. 3381 */ 3382 #define MAX_TRUNCATE_LEAVES 50 3383 3384 /* 3385 * xtTruncate() 3386 * 3387 * function: 3388 * traverse for truncation logging backward bottom up; 3389 * terminate at the last extent entry at the current subtree 3390 * root page covering new down size. 3391 * truncation may occur within the last extent entry. 3392 * 3393 * parameter: 3394 * int tid, 3395 * struct inode *ip, 3396 * s64 newsize, 3397 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3398 * 3399 * return: 3400 * 3401 * note: 3402 * PWMAP: 3403 * 1. truncate (non-COMMIT_NOLINK file) 3404 * by jfs_truncate() or jfs_open(O_TRUNC): 3405 * xtree is updated; 3406 * 2. truncate index table of directory when last entry removed 3407 * map update via tlock at commit time; 3408 * PMAP: 3409 * Call xtTruncate_pmap instead 3410 * WMAP: 3411 * 1. remove (free zero link count) on last reference release 3412 * (pmap has been freed at commit zero link count); 3413 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3414 * xtree is updated; 3415 * map update directly at truncation time; 3416 * 3417 * if (DELETE) 3418 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3419 * else if (TRUNCATE) 3420 * must write LOG_NOREDOPAGE for deleted index page; 3421 * 3422 * pages may already have been tlocked by anonymous transactions 3423 * during file growth (i.e., write) before truncation; 3424 * 3425 * except last truncated entry, deleted entries remains as is 3426 * in the page (nextindex is updated) for other use 3427 * (e.g., log/update allocation map): this avoid copying the page 3428 * info but delay free of pages; 3429 * 3430 */ 3431 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3432 { 3433 int rc = 0; 3434 s64 teof; 3435 struct metapage *mp; 3436 xtpage_t *p; 3437 s64 bn; 3438 int index, nextindex; 3439 xad_t *xad; 3440 s64 xoff, xaddr; 3441 int xlen, len, freexlen; 3442 struct btstack btstack; 3443 struct btframe *parent; 3444 struct tblock *tblk = NULL; 3445 struct tlock *tlck = NULL; 3446 struct xtlock *xtlck = NULL; 3447 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3448 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3449 s64 nfreed; 3450 int freed, log; 3451 int locked_leaves = 0; 3452 3453 /* save object truncation type */ 3454 if (tid) { 3455 tblk = tid_to_tblock(tid); 3456 tblk->xflag |= flag; 3457 } 3458 3459 nfreed = 0; 3460 3461 flag &= COMMIT_MAP; 3462 assert(flag != COMMIT_PMAP); 3463 3464 if (flag == COMMIT_PWMAP) 3465 log = 1; 3466 else { 3467 log = 0; 3468 xadlock.flag = mlckFREEXADLIST; 3469 xadlock.index = 1; 3470 } 3471 3472 /* 3473 * if the newsize is not an integral number of pages, 3474 * the file between newsize and next page boundary will 3475 * be cleared. 3476 * if truncating into a file hole, it will cause 3477 * a full block to be allocated for the logical block. 3478 */ 3479 3480 /* 3481 * release page blocks of truncated region <teof, eof> 3482 * 3483 * free the data blocks from the leaf index blocks. 3484 * delete the parent index entries corresponding to 3485 * the freed child data/index blocks. 3486 * free the index blocks themselves which aren't needed 3487 * in new sized file. 3488 * 3489 * index blocks are updated only if the blocks are to be 3490 * retained in the new sized file. 3491 * if type is PMAP, the data and index pages are NOT 3492 * freed, and the data and index blocks are NOT freed 3493 * from working map. 3494 * (this will allow continued access of data/index of 3495 * temporary file (zerolink count file truncated to zero-length)). 3496 */ 3497 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3498 JFS_SBI(ip->i_sb)->l2bsize; 3499 3500 /* clear stack */ 3501 BT_CLR(&btstack); 3502 3503 /* 3504 * start with root 3505 * 3506 * root resides in the inode 3507 */ 3508 bn = 0; 3509 3510 /* 3511 * first access of each page: 3512 */ 3513 getPage: 3514 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3515 if (rc) 3516 return rc; 3517 3518 /* process entries backward from last index */ 3519 index = le16_to_cpu(p->header.nextindex) - 1; 3520 3521 3522 /* Since this is the rightmost page at this level, and we may have 3523 * already freed a page that was formerly to the right, let's make 3524 * sure that the next pointer is zero. 3525 */ 3526 if (p->header.next) { 3527 if (log) 3528 /* 3529 * Make sure this change to the header is logged. 3530 * If we really truncate this leaf, the flag 3531 * will be changed to tlckTRUNCATE 3532 */ 3533 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3534 BT_MARK_DIRTY(mp, ip); 3535 p->header.next = 0; 3536 } 3537 3538 if (p->header.flag & BT_INTERNAL) 3539 goto getChild; 3540 3541 /* 3542 * leaf page 3543 */ 3544 freed = 0; 3545 3546 /* does region covered by leaf page precede Teof ? */ 3547 xad = &p->xad[index]; 3548 xoff = offsetXAD(xad); 3549 xlen = lengthXAD(xad); 3550 if (teof >= xoff + xlen) { 3551 XT_PUTPAGE(mp); 3552 goto getParent; 3553 } 3554 3555 /* (re)acquire tlock of the leaf page */ 3556 if (log) { 3557 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3558 /* 3559 * We need to limit the size of the transaction 3560 * to avoid exhausting pagecache & tlocks 3561 */ 3562 XT_PUTPAGE(mp); 3563 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3564 goto getParent; 3565 } 3566 tlck = txLock(tid, ip, mp, tlckXTREE); 3567 tlck->type = tlckXTREE | tlckTRUNCATE; 3568 xtlck = (struct xtlock *) & tlck->lock; 3569 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3570 } 3571 BT_MARK_DIRTY(mp, ip); 3572 3573 /* 3574 * scan backward leaf page entries 3575 */ 3576 for (; index >= XTENTRYSTART; index--) { 3577 xad = &p->xad[index]; 3578 xoff = offsetXAD(xad); 3579 xlen = lengthXAD(xad); 3580 xaddr = addressXAD(xad); 3581 3582 /* 3583 * The "data" for a directory is indexed by the block 3584 * device's address space. This metadata must be invalidated 3585 * here 3586 */ 3587 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3588 invalidate_xad_metapages(ip, *xad); 3589 /* 3590 * entry beyond eof: continue scan of current page 3591 * xad 3592 * ---|---=======-------> 3593 * eof 3594 */ 3595 if (teof < xoff) { 3596 nfreed += xlen; 3597 continue; 3598 } 3599 3600 /* 3601 * (xoff <= teof): last entry to be deleted from page; 3602 * If other entries remain in page: keep and update the page. 3603 */ 3604 3605 /* 3606 * eof == entry_start: delete the entry 3607 * xad 3608 * -------|=======-------> 3609 * eof 3610 * 3611 */ 3612 if (teof == xoff) { 3613 nfreed += xlen; 3614 3615 if (index == XTENTRYSTART) 3616 break; 3617 3618 nextindex = index; 3619 } 3620 /* 3621 * eof within the entry: truncate the entry. 3622 * xad 3623 * -------===|===-------> 3624 * eof 3625 */ 3626 else if (teof < xoff + xlen) { 3627 /* update truncated entry */ 3628 len = teof - xoff; 3629 freexlen = xlen - len; 3630 XADlength(xad, len); 3631 3632 /* save pxd of truncated extent in tlck */ 3633 xaddr += len; 3634 if (log) { /* COMMIT_PWMAP */ 3635 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3636 min(index, (int)xtlck->lwm.offset) : index; 3637 xtlck->lwm.length = index + 1 - 3638 xtlck->lwm.offset; 3639 xtlck->twm.offset = index; 3640 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3641 pxdlock->flag = mlckFREEPXD; 3642 PXDaddress(&pxdlock->pxd, xaddr); 3643 PXDlength(&pxdlock->pxd, freexlen); 3644 } 3645 /* free truncated extent */ 3646 else { /* COMMIT_WMAP */ 3647 3648 pxdlock = (struct pxd_lock *) & xadlock; 3649 pxdlock->flag = mlckFREEPXD; 3650 PXDaddress(&pxdlock->pxd, xaddr); 3651 PXDlength(&pxdlock->pxd, freexlen); 3652 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3653 3654 /* reset map lock */ 3655 xadlock.flag = mlckFREEXADLIST; 3656 } 3657 3658 /* current entry is new last entry; */ 3659 nextindex = index + 1; 3660 3661 nfreed += freexlen; 3662 } 3663 /* 3664 * eof beyond the entry: 3665 * xad 3666 * -------=======---|---> 3667 * eof 3668 */ 3669 else { /* (xoff + xlen < teof) */ 3670 3671 nextindex = index + 1; 3672 } 3673 3674 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3675 if (!log) { /* COMMIT_WAMP */ 3676 xadlock.xdlist = &p->xad[nextindex]; 3677 xadlock.count = 3678 le16_to_cpu(p->header.nextindex) - 3679 nextindex; 3680 txFreeMap(ip, (struct maplock *) & xadlock, 3681 NULL, COMMIT_WMAP); 3682 } 3683 p->header.nextindex = cpu_to_le16(nextindex); 3684 } 3685 3686 XT_PUTPAGE(mp); 3687 3688 /* assert(freed == 0); */ 3689 goto getParent; 3690 } /* end scan of leaf page entries */ 3691 3692 freed = 1; 3693 3694 /* 3695 * leaf page become empty: free the page if type != PMAP 3696 */ 3697 if (log) { /* COMMIT_PWMAP */ 3698 /* txCommit() with tlckFREE: 3699 * free data extents covered by leaf [XTENTRYSTART:hwm); 3700 * invalidate leaf if COMMIT_PWMAP; 3701 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3702 */ 3703 tlck->type = tlckXTREE | tlckFREE; 3704 } else { /* COMMIT_WAMP */ 3705 3706 /* free data extents covered by leaf */ 3707 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3708 xadlock.count = 3709 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3710 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3711 } 3712 3713 if (p->header.flag & BT_ROOT) { 3714 p->header.flag &= ~BT_INTERNAL; 3715 p->header.flag |= BT_LEAF; 3716 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3717 3718 XT_PUTPAGE(mp); /* debug */ 3719 goto out; 3720 } else { 3721 if (log) { /* COMMIT_PWMAP */ 3722 /* page will be invalidated at tx completion 3723 */ 3724 XT_PUTPAGE(mp); 3725 } else { /* COMMIT_WMAP */ 3726 3727 if (mp->lid) 3728 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3729 3730 /* invalidate empty leaf page */ 3731 discard_metapage(mp); 3732 } 3733 } 3734 3735 /* 3736 * the leaf page become empty: delete the parent entry 3737 * for the leaf page if the parent page is to be kept 3738 * in the new sized file. 3739 */ 3740 3741 /* 3742 * go back up to the parent page 3743 */ 3744 getParent: 3745 /* pop/restore parent entry for the current child page */ 3746 if ((parent = BT_POP(&btstack)) == NULL) 3747 /* current page must have been root */ 3748 goto out; 3749 3750 /* get back the parent page */ 3751 bn = parent->bn; 3752 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3753 if (rc) 3754 return rc; 3755 3756 index = parent->index; 3757 3758 /* 3759 * child page was not empty: 3760 */ 3761 if (freed == 0) { 3762 /* has any entry deleted from parent ? */ 3763 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3764 /* (re)acquire tlock on the parent page */ 3765 if (log) { /* COMMIT_PWMAP */ 3766 /* txCommit() with tlckTRUNCATE: 3767 * free child extents covered by parent [); 3768 */ 3769 tlck = txLock(tid, ip, mp, tlckXTREE); 3770 xtlck = (struct xtlock *) & tlck->lock; 3771 if (!(tlck->type & tlckTRUNCATE)) { 3772 xtlck->hwm.offset = 3773 le16_to_cpu(p->header. 3774 nextindex) - 1; 3775 tlck->type = 3776 tlckXTREE | tlckTRUNCATE; 3777 } 3778 } else { /* COMMIT_WMAP */ 3779 3780 /* free child extents covered by parent */ 3781 xadlock.xdlist = &p->xad[index + 1]; 3782 xadlock.count = 3783 le16_to_cpu(p->header.nextindex) - 3784 index - 1; 3785 txFreeMap(ip, (struct maplock *) & xadlock, 3786 NULL, COMMIT_WMAP); 3787 } 3788 BT_MARK_DIRTY(mp, ip); 3789 3790 p->header.nextindex = cpu_to_le16(index + 1); 3791 } 3792 XT_PUTPAGE(mp); 3793 goto getParent; 3794 } 3795 3796 /* 3797 * child page was empty: 3798 */ 3799 nfreed += lengthXAD(&p->xad[index]); 3800 3801 /* 3802 * During working map update, child page's tlock must be handled 3803 * before parent's. This is because the parent's tlock will cause 3804 * the child's disk space to be marked available in the wmap, so 3805 * it's important that the child page be released by that time. 3806 * 3807 * ToDo: tlocks should be on doubly-linked list, so we can 3808 * quickly remove it and add it to the end. 3809 */ 3810 3811 /* 3812 * Move parent page's tlock to the end of the tid's tlock list 3813 */ 3814 if (log && mp->lid && (tblk->last != mp->lid) && 3815 lid_to_tlock(mp->lid)->tid) { 3816 lid_t lid = mp->lid; 3817 struct tlock *prev; 3818 3819 tlck = lid_to_tlock(lid); 3820 3821 if (tblk->next == lid) 3822 tblk->next = tlck->next; 3823 else { 3824 for (prev = lid_to_tlock(tblk->next); 3825 prev->next != lid; 3826 prev = lid_to_tlock(prev->next)) { 3827 assert(prev->next); 3828 } 3829 prev->next = tlck->next; 3830 } 3831 lid_to_tlock(tblk->last)->next = lid; 3832 tlck->next = 0; 3833 tblk->last = lid; 3834 } 3835 3836 /* 3837 * parent page become empty: free the page 3838 */ 3839 if (index == XTENTRYSTART) { 3840 if (log) { /* COMMIT_PWMAP */ 3841 /* txCommit() with tlckFREE: 3842 * free child extents covered by parent; 3843 * invalidate parent if COMMIT_PWMAP; 3844 */ 3845 tlck = txLock(tid, ip, mp, tlckXTREE); 3846 xtlck = (struct xtlock *) & tlck->lock; 3847 xtlck->hwm.offset = 3848 le16_to_cpu(p->header.nextindex) - 1; 3849 tlck->type = tlckXTREE | tlckFREE; 3850 } else { /* COMMIT_WMAP */ 3851 3852 /* free child extents covered by parent */ 3853 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3854 xadlock.count = 3855 le16_to_cpu(p->header.nextindex) - 3856 XTENTRYSTART; 3857 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3858 COMMIT_WMAP); 3859 } 3860 BT_MARK_DIRTY(mp, ip); 3861 3862 if (p->header.flag & BT_ROOT) { 3863 p->header.flag &= ~BT_INTERNAL; 3864 p->header.flag |= BT_LEAF; 3865 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3866 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3867 /* 3868 * Shrink root down to allow inline 3869 * EA (otherwise fsck complains) 3870 */ 3871 p->header.maxentry = 3872 cpu_to_le16(XTROOTINITSLOT); 3873 JFS_IP(ip)->mode2 |= INLINEEA; 3874 } 3875 3876 XT_PUTPAGE(mp); /* debug */ 3877 goto out; 3878 } else { 3879 if (log) { /* COMMIT_PWMAP */ 3880 /* page will be invalidated at tx completion 3881 */ 3882 XT_PUTPAGE(mp); 3883 } else { /* COMMIT_WMAP */ 3884 3885 if (mp->lid) 3886 lid_to_tlock(mp->lid)->flag |= 3887 tlckFREELOCK; 3888 3889 /* invalidate parent page */ 3890 discard_metapage(mp); 3891 } 3892 3893 /* parent has become empty and freed: 3894 * go back up to its parent page 3895 */ 3896 /* freed = 1; */ 3897 goto getParent; 3898 } 3899 } 3900 /* 3901 * parent page still has entries for front region; 3902 */ 3903 else { 3904 /* try truncate region covered by preceding entry 3905 * (process backward) 3906 */ 3907 index--; 3908 3909 /* go back down to the child page corresponding 3910 * to the entry 3911 */ 3912 goto getChild; 3913 } 3914 3915 /* 3916 * internal page: go down to child page of current entry 3917 */ 3918 getChild: 3919 /* save current parent entry for the child page */ 3920 if (BT_STACK_FULL(&btstack)) { 3921 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3922 XT_PUTPAGE(mp); 3923 return -EIO; 3924 } 3925 BT_PUSH(&btstack, bn, index); 3926 3927 /* get child page */ 3928 xad = &p->xad[index]; 3929 bn = addressXAD(xad); 3930 3931 /* 3932 * first access of each internal entry: 3933 */ 3934 /* release parent page */ 3935 XT_PUTPAGE(mp); 3936 3937 /* process the child page */ 3938 goto getPage; 3939 3940 out: 3941 /* 3942 * update file resource stat 3943 */ 3944 /* set size 3945 */ 3946 if (S_ISDIR(ip->i_mode) && !newsize) 3947 ip->i_size = 1; /* fsck hates zero-length directories */ 3948 else 3949 ip->i_size = newsize; 3950 3951 /* update quota allocation to reflect freed blocks */ 3952 DQUOT_FREE_BLOCK(ip, nfreed); 3953 3954 /* 3955 * free tlock of invalidated pages 3956 */ 3957 if (flag == COMMIT_WMAP) 3958 txFreelock(ip); 3959 3960 return newsize; 3961 } 3962 3963 3964 /* 3965 * xtTruncate_pmap() 3966 * 3967 * function: 3968 * Perform truncate to zero lenghth for deleted file, leaving the 3969 * the xtree and working map untouched. This allows the file to 3970 * be accessed via open file handles, while the delete of the file 3971 * is committed to disk. 3972 * 3973 * parameter: 3974 * tid_t tid, 3975 * struct inode *ip, 3976 * s64 committed_size) 3977 * 3978 * return: new committed size 3979 * 3980 * note: 3981 * 3982 * To avoid deadlock by holding too many transaction locks, the 3983 * truncation may be broken up into multiple transactions. 3984 * The committed_size keeps track of part of the file has been 3985 * freed from the pmaps. 3986 */ 3987 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3988 { 3989 s64 bn; 3990 struct btstack btstack; 3991 int cmp; 3992 int index; 3993 int locked_leaves = 0; 3994 struct metapage *mp; 3995 xtpage_t *p; 3996 struct btframe *parent; 3997 int rc; 3998 struct tblock *tblk; 3999 struct tlock *tlck = NULL; 4000 xad_t *xad; 4001 int xlen; 4002 s64 xoff; 4003 struct xtlock *xtlck = NULL; 4004 4005 /* save object truncation type */ 4006 tblk = tid_to_tblock(tid); 4007 tblk->xflag |= COMMIT_PMAP; 4008 4009 /* clear stack */ 4010 BT_CLR(&btstack); 4011 4012 if (committed_size) { 4013 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 4014 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 4015 if (rc) 4016 return rc; 4017 4018 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4019 4020 if (cmp != 0) { 4021 XT_PUTPAGE(mp); 4022 jfs_error(ip->i_sb, 4023 "xtTruncate_pmap: did not find extent"); 4024 return -EIO; 4025 } 4026 } else { 4027 /* 4028 * start with root 4029 * 4030 * root resides in the inode 4031 */ 4032 bn = 0; 4033 4034 /* 4035 * first access of each page: 4036 */ 4037 getPage: 4038 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4039 if (rc) 4040 return rc; 4041 4042 /* process entries backward from last index */ 4043 index = le16_to_cpu(p->header.nextindex) - 1; 4044 4045 if (p->header.flag & BT_INTERNAL) 4046 goto getChild; 4047 } 4048 4049 /* 4050 * leaf page 4051 */ 4052 4053 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 4054 /* 4055 * We need to limit the size of the transaction 4056 * to avoid exhausting pagecache & tlocks 4057 */ 4058 xad = &p->xad[index]; 4059 xoff = offsetXAD(xad); 4060 xlen = lengthXAD(xad); 4061 XT_PUTPAGE(mp); 4062 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 4063 } 4064 tlck = txLock(tid, ip, mp, tlckXTREE); 4065 tlck->type = tlckXTREE | tlckFREE; 4066 xtlck = (struct xtlock *) & tlck->lock; 4067 xtlck->hwm.offset = index; 4068 4069 4070 XT_PUTPAGE(mp); 4071 4072 /* 4073 * go back up to the parent page 4074 */ 4075 getParent: 4076 /* pop/restore parent entry for the current child page */ 4077 if ((parent = BT_POP(&btstack)) == NULL) 4078 /* current page must have been root */ 4079 goto out; 4080 4081 /* get back the parent page */ 4082 bn = parent->bn; 4083 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4084 if (rc) 4085 return rc; 4086 4087 index = parent->index; 4088 4089 /* 4090 * parent page become empty: free the page 4091 */ 4092 if (index == XTENTRYSTART) { 4093 /* txCommit() with tlckFREE: 4094 * free child extents covered by parent; 4095 * invalidate parent if COMMIT_PWMAP; 4096 */ 4097 tlck = txLock(tid, ip, mp, tlckXTREE); 4098 xtlck = (struct xtlock *) & tlck->lock; 4099 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 4100 tlck->type = tlckXTREE | tlckFREE; 4101 4102 XT_PUTPAGE(mp); 4103 4104 if (p->header.flag & BT_ROOT) { 4105 4106 goto out; 4107 } else { 4108 goto getParent; 4109 } 4110 } 4111 /* 4112 * parent page still has entries for front region; 4113 */ 4114 else 4115 index--; 4116 /* 4117 * internal page: go down to child page of current entry 4118 */ 4119 getChild: 4120 /* save current parent entry for the child page */ 4121 if (BT_STACK_FULL(&btstack)) { 4122 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 4123 XT_PUTPAGE(mp); 4124 return -EIO; 4125 } 4126 BT_PUSH(&btstack, bn, index); 4127 4128 /* get child page */ 4129 xad = &p->xad[index]; 4130 bn = addressXAD(xad); 4131 4132 /* 4133 * first access of each internal entry: 4134 */ 4135 /* release parent page */ 4136 XT_PUTPAGE(mp); 4137 4138 /* process the child page */ 4139 goto getPage; 4140 4141 out: 4142 4143 return 0; 4144 } 4145 4146 #ifdef CONFIG_JFS_STATISTICS 4147 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, 4148 int *eof, void *data) 4149 { 4150 int len = 0; 4151 off_t begin; 4152 4153 len += sprintf(buffer, 4154 "JFS Xtree statistics\n" 4155 "====================\n" 4156 "searches = %d\n" 4157 "fast searches = %d\n" 4158 "splits = %d\n", 4159 xtStat.search, 4160 xtStat.fastSearch, 4161 xtStat.split); 4162 4163 begin = offset; 4164 *start = buffer + begin; 4165 len -= begin; 4166 4167 if (len > length) 4168 len = length; 4169 else 4170 *eof = 1; 4171 4172 if (len < 0) 4173 len = 0; 4174 4175 return len; 4176 } 4177 #endif 4178