1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) International Business Machines Corp., 2000-2005 4 */ 5 /* 6 * jfs_xtree.c: extent allocation descriptor B+-tree manager 7 */ 8 9 #include <linux/fs.h> 10 #include <linux/module.h> 11 #include <linux/quotaops.h> 12 #include <linux/seq_file.h> 13 #include "jfs_incore.h" 14 #include "jfs_filsys.h" 15 #include "jfs_metapage.h" 16 #include "jfs_dmap.h" 17 #include "jfs_dinode.h" 18 #include "jfs_superblock.h" 19 #include "jfs_debug.h" 20 21 /* 22 * xtree local flag 23 */ 24 #define XT_INSERT 0x00000001 25 26 /* 27 * xtree key/entry comparison: extent offset 28 * 29 * return: 30 * -1: k < start of extent 31 * 0: start_of_extent <= k <= end_of_extent 32 * 1: k > end_of_extent 33 */ 34 #define XT_CMP(CMP, K, X, OFFSET64)\ 35 {\ 36 OFFSET64 = offsetXAD(X);\ 37 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 38 ((K) < OFFSET64) ? -1 : 0;\ 39 } 40 41 /* write a xad entry */ 42 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 43 {\ 44 (XAD)->flag = (FLAG);\ 45 XADoffset((XAD), (OFF));\ 46 XADlength((XAD), (LEN));\ 47 XADaddress((XAD), (ADDR));\ 48 } 49 50 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 51 52 /* get page buffer for specified block address */ 53 /* ToDo: Replace this ugly macro with a function */ 54 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 55 do { \ 56 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ 57 if (!(RC)) { \ 58 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ 59 (le16_to_cpu((P)->header.nextindex) > \ 60 le16_to_cpu((P)->header.maxentry)) || \ 61 (le16_to_cpu((P)->header.maxentry) > \ 62 (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ 63 jfs_error((IP)->i_sb, \ 64 "XT_GETPAGE: xtree page corrupt\n"); \ 65 BT_PUTPAGE(MP); \ 66 MP = NULL; \ 67 RC = -EIO; \ 68 } \ 69 } \ 70 } while (0) 71 72 /* for consistency */ 73 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 74 75 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 76 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 77 /* xtree entry parameter descriptor */ 78 struct xtsplit { 79 struct metapage *mp; 80 s16 index; 81 u8 flag; 82 s64 off; 83 s64 addr; 84 int len; 85 struct pxdlist *pxdlist; 86 }; 87 88 89 /* 90 * statistics 91 */ 92 #ifdef CONFIG_JFS_STATISTICS 93 static struct { 94 uint search; 95 uint fastSearch; 96 uint split; 97 } xtStat; 98 #endif 99 100 101 /* 102 * forward references 103 */ 104 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 105 struct btstack * btstack, int flag); 106 107 static int xtSplitUp(tid_t tid, 108 struct inode *ip, 109 struct xtsplit * split, struct btstack * btstack); 110 111 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 112 struct metapage ** rmpp, s64 * rbnp); 113 114 static int xtSplitRoot(tid_t tid, struct inode *ip, 115 struct xtsplit * split, struct metapage ** rmpp); 116 117 #ifdef _STILL_TO_PORT 118 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 119 xtpage_t * fp, struct btstack * btstack); 120 121 static int xtSearchNode(struct inode *ip, 122 xad_t * xad, 123 int *cmpp, struct btstack * btstack, int flag); 124 125 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 126 #endif /* _STILL_TO_PORT */ 127 128 /* 129 * xtLookup() 130 * 131 * function: map a single page into a physical extent; 132 */ 133 int xtLookup(struct inode *ip, s64 lstart, 134 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 135 { 136 int rc = 0; 137 struct btstack btstack; 138 int cmp; 139 s64 bn; 140 struct metapage *mp; 141 xtpage_t *p; 142 int index; 143 xad_t *xad; 144 s64 next, size, xoff, xend; 145 int xlen; 146 s64 xaddr; 147 148 *paddr = 0; 149 *plen = llen; 150 151 if (!no_check) { 152 /* is lookup offset beyond eof ? */ 153 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 154 JFS_SBI(ip->i_sb)->l2bsize; 155 if (lstart >= size) 156 return 0; 157 } 158 159 /* 160 * search for the xad entry covering the logical extent 161 */ 162 //search: 163 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 164 jfs_err("xtLookup: xtSearch returned %d", rc); 165 return rc; 166 } 167 168 /* 169 * compute the physical extent covering logical extent 170 * 171 * N.B. search may have failed (e.g., hole in sparse file), 172 * and returned the index of the next entry. 173 */ 174 /* retrieve search result */ 175 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 176 177 /* is xad found covering start of logical extent ? 178 * lstart is a page start address, 179 * i.e., lstart cannot start in a hole; 180 */ 181 if (cmp) { 182 if (next) 183 *plen = min(next - lstart, llen); 184 goto out; 185 } 186 187 /* 188 * lxd covered by xad 189 */ 190 xad = &p->xad[index]; 191 xoff = offsetXAD(xad); 192 xlen = lengthXAD(xad); 193 xend = xoff + xlen; 194 xaddr = addressXAD(xad); 195 196 /* initialize new pxd */ 197 *pflag = xad->flag; 198 *paddr = xaddr + (lstart - xoff); 199 /* a page must be fully covered by an xad */ 200 *plen = min(xend - lstart, llen); 201 202 out: 203 XT_PUTPAGE(mp); 204 205 return rc; 206 } 207 208 /* 209 * xtSearch() 210 * 211 * function: search for the xad entry covering specified offset. 212 * 213 * parameters: 214 * ip - file object; 215 * xoff - extent offset; 216 * nextp - address of next extent (if any) for search miss 217 * cmpp - comparison result: 218 * btstack - traverse stack; 219 * flag - search process flag (XT_INSERT); 220 * 221 * returns: 222 * btstack contains (bn, index) of search path traversed to the entry. 223 * *cmpp is set to result of comparison with the entry returned. 224 * the page containing the entry is pinned at exit. 225 */ 226 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 227 int *cmpp, struct btstack * btstack, int flag) 228 { 229 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 230 int rc = 0; 231 int cmp = 1; /* init for empty page */ 232 s64 bn; /* block number */ 233 struct metapage *mp; /* page buffer */ 234 xtpage_t *p; /* page */ 235 xad_t *xad; 236 int base, index, lim, btindex; 237 struct btframe *btsp; 238 int nsplit = 0; /* number of pages to split */ 239 s64 t64; 240 s64 next = 0; 241 242 INCREMENT(xtStat.search); 243 244 BT_CLR(btstack); 245 246 btstack->nsplit = 0; 247 248 /* 249 * search down tree from root: 250 * 251 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 252 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 253 * 254 * if entry with search key K is not found 255 * internal page search find the entry with largest key Ki 256 * less than K which point to the child page to search; 257 * leaf page search find the entry with smallest key Kj 258 * greater than K so that the returned index is the position of 259 * the entry to be shifted right for insertion of new entry. 260 * for empty tree, search key is greater than any key of the tree. 261 * 262 * by convention, root bn = 0. 263 */ 264 for (bn = 0;;) { 265 /* get/pin the page to search */ 266 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 267 if (rc) 268 return rc; 269 270 /* try sequential access heuristics with the previous 271 * access entry in target leaf page: 272 * once search narrowed down into the target leaf, 273 * key must either match an entry in the leaf or 274 * key entry does not exist in the tree; 275 */ 276 //fastSearch: 277 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 278 (p->header.flag & BT_LEAF) && 279 (index = jfs_ip->btindex) < 280 le16_to_cpu(p->header.nextindex)) { 281 xad = &p->xad[index]; 282 t64 = offsetXAD(xad); 283 if (xoff < t64 + lengthXAD(xad)) { 284 if (xoff >= t64) { 285 *cmpp = 0; 286 goto out; 287 } 288 289 /* stop sequential access heuristics */ 290 goto binarySearch; 291 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 292 293 /* try next sequential entry */ 294 index++; 295 if (index < 296 le16_to_cpu(p->header.nextindex)) { 297 xad++; 298 t64 = offsetXAD(xad); 299 if (xoff < t64 + lengthXAD(xad)) { 300 if (xoff >= t64) { 301 *cmpp = 0; 302 goto out; 303 } 304 305 /* miss: key falls between 306 * previous and this entry 307 */ 308 *cmpp = 1; 309 next = t64; 310 goto out; 311 } 312 313 /* (xoff >= t64 + lengthXAD(xad)); 314 * matching entry may be further out: 315 * stop heuristic search 316 */ 317 /* stop sequential access heuristics */ 318 goto binarySearch; 319 } 320 321 /* (index == p->header.nextindex); 322 * miss: key entry does not exist in 323 * the target leaf/tree 324 */ 325 *cmpp = 1; 326 goto out; 327 } 328 329 /* 330 * if hit, return index of the entry found, and 331 * if miss, where new entry with search key is 332 * to be inserted; 333 */ 334 out: 335 /* compute number of pages to split */ 336 if (flag & XT_INSERT) { 337 if (p->header.nextindex == /* little-endian */ 338 p->header.maxentry) 339 nsplit++; 340 else 341 nsplit = 0; 342 btstack->nsplit = nsplit; 343 } 344 345 /* save search result */ 346 btsp = btstack->top; 347 btsp->bn = bn; 348 btsp->index = index; 349 btsp->mp = mp; 350 351 /* update sequential access heuristics */ 352 jfs_ip->btindex = index; 353 354 if (nextp) 355 *nextp = next; 356 357 INCREMENT(xtStat.fastSearch); 358 return 0; 359 } 360 361 /* well, ... full search now */ 362 binarySearch: 363 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 364 365 /* 366 * binary search with search key K on the current page 367 */ 368 for (base = XTENTRYSTART; lim; lim >>= 1) { 369 index = base + (lim >> 1); 370 371 XT_CMP(cmp, xoff, &p->xad[index], t64); 372 if (cmp == 0) { 373 /* 374 * search hit 375 */ 376 /* search hit - leaf page: 377 * return the entry found 378 */ 379 if (p->header.flag & BT_LEAF) { 380 *cmpp = cmp; 381 382 /* compute number of pages to split */ 383 if (flag & XT_INSERT) { 384 if (p->header.nextindex == 385 p->header.maxentry) 386 nsplit++; 387 else 388 nsplit = 0; 389 btstack->nsplit = nsplit; 390 } 391 392 /* save search result */ 393 btsp = btstack->top; 394 btsp->bn = bn; 395 btsp->index = index; 396 btsp->mp = mp; 397 398 /* init sequential access heuristics */ 399 btindex = jfs_ip->btindex; 400 if (index == btindex || 401 index == btindex + 1) 402 jfs_ip->btorder = BT_SEQUENTIAL; 403 else 404 jfs_ip->btorder = BT_RANDOM; 405 jfs_ip->btindex = index; 406 407 return 0; 408 } 409 /* search hit - internal page: 410 * descend/search its child page 411 */ 412 if (index < le16_to_cpu(p->header.nextindex)-1) 413 next = offsetXAD(&p->xad[index + 1]); 414 goto next; 415 } 416 417 if (cmp > 0) { 418 base = index + 1; 419 --lim; 420 } 421 } 422 423 /* 424 * search miss 425 * 426 * base is the smallest index with key (Kj) greater than 427 * search key (K) and may be zero or maxentry index. 428 */ 429 if (base < le16_to_cpu(p->header.nextindex)) 430 next = offsetXAD(&p->xad[base]); 431 /* 432 * search miss - leaf page: 433 * 434 * return location of entry (base) where new entry with 435 * search key K is to be inserted. 436 */ 437 if (p->header.flag & BT_LEAF) { 438 *cmpp = cmp; 439 440 /* compute number of pages to split */ 441 if (flag & XT_INSERT) { 442 if (p->header.nextindex == 443 p->header.maxentry) 444 nsplit++; 445 else 446 nsplit = 0; 447 btstack->nsplit = nsplit; 448 } 449 450 /* save search result */ 451 btsp = btstack->top; 452 btsp->bn = bn; 453 btsp->index = base; 454 btsp->mp = mp; 455 456 /* init sequential access heuristics */ 457 btindex = jfs_ip->btindex; 458 if (base == btindex || base == btindex + 1) 459 jfs_ip->btorder = BT_SEQUENTIAL; 460 else 461 jfs_ip->btorder = BT_RANDOM; 462 jfs_ip->btindex = base; 463 464 if (nextp) 465 *nextp = next; 466 467 return 0; 468 } 469 470 /* 471 * search miss - non-leaf page: 472 * 473 * if base is non-zero, decrement base by one to get the parent 474 * entry of the child page to search. 475 */ 476 index = base ? base - 1 : base; 477 478 /* 479 * go down to child page 480 */ 481 next: 482 /* update number of pages to split */ 483 if (p->header.nextindex == p->header.maxentry) 484 nsplit++; 485 else 486 nsplit = 0; 487 488 /* push (bn, index) of the parent page/entry */ 489 if (BT_STACK_FULL(btstack)) { 490 jfs_error(ip->i_sb, "stack overrun!\n"); 491 XT_PUTPAGE(mp); 492 return -EIO; 493 } 494 BT_PUSH(btstack, bn, index); 495 496 /* get the child page block number */ 497 bn = addressXAD(&p->xad[index]); 498 499 /* unpin the parent page */ 500 XT_PUTPAGE(mp); 501 } 502 } 503 504 /* 505 * xtInsert() 506 * 507 * function: 508 * 509 * parameter: 510 * tid - transaction id; 511 * ip - file object; 512 * xflag - extent flag (XAD_NOTRECORDED): 513 * xoff - extent offset; 514 * xlen - extent length; 515 * xaddrp - extent address pointer (in/out): 516 * if (*xaddrp) 517 * caller allocated data extent at *xaddrp; 518 * else 519 * allocate data extent and return its xaddr; 520 * flag - 521 * 522 * return: 523 */ 524 int xtInsert(tid_t tid, /* transaction id */ 525 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 526 int flag) 527 { 528 int rc = 0; 529 s64 xaddr, hint; 530 struct metapage *mp; /* meta-page buffer */ 531 xtpage_t *p; /* base B+-tree index page */ 532 s64 bn; 533 int index, nextindex; 534 struct btstack btstack; /* traverse stack */ 535 struct xtsplit split; /* split information */ 536 xad_t *xad; 537 int cmp; 538 s64 next; 539 struct tlock *tlck; 540 struct xtlock *xtlck; 541 542 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 543 544 /* 545 * search for the entry location at which to insert: 546 * 547 * xtFastSearch() and xtSearch() both returns (leaf page 548 * pinned, index at which to insert). 549 * n.b. xtSearch() may return index of maxentry of 550 * the full page. 551 */ 552 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 553 return rc; 554 555 /* retrieve search result */ 556 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 557 558 /* This test must follow XT_GETSEARCH since mp must be valid if 559 * we branch to out: */ 560 if ((cmp == 0) || (next && (xlen > next - xoff))) { 561 rc = -EEXIST; 562 goto out; 563 } 564 565 /* 566 * allocate data extent requested 567 * 568 * allocation hint: last xad 569 */ 570 if ((xaddr = *xaddrp) == 0) { 571 if (index > XTENTRYSTART) { 572 xad = &p->xad[index - 1]; 573 hint = addressXAD(xad) + lengthXAD(xad) - 1; 574 } else 575 hint = 0; 576 if ((rc = dquot_alloc_block(ip, xlen))) 577 goto out; 578 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 579 dquot_free_block(ip, xlen); 580 goto out; 581 } 582 } 583 584 /* 585 * insert entry for new extent 586 */ 587 xflag |= XAD_NEW; 588 589 /* 590 * if the leaf page is full, split the page and 591 * propagate up the router entry for the new page from split 592 * 593 * The xtSplitUp() will insert the entry and unpin the leaf page. 594 */ 595 nextindex = le16_to_cpu(p->header.nextindex); 596 if (nextindex == le16_to_cpu(p->header.maxentry)) { 597 split.mp = mp; 598 split.index = index; 599 split.flag = xflag; 600 split.off = xoff; 601 split.len = xlen; 602 split.addr = xaddr; 603 split.pxdlist = NULL; 604 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 605 /* undo data extent allocation */ 606 if (*xaddrp == 0) { 607 dbFree(ip, xaddr, (s64) xlen); 608 dquot_free_block(ip, xlen); 609 } 610 return rc; 611 } 612 613 *xaddrp = xaddr; 614 return 0; 615 } 616 617 /* 618 * insert the new entry into the leaf page 619 */ 620 /* 621 * acquire a transaction lock on the leaf page; 622 * 623 * action: xad insertion/extension; 624 */ 625 BT_MARK_DIRTY(mp, ip); 626 627 /* if insert into middle, shift right remaining entries. */ 628 if (index < nextindex) 629 memmove(&p->xad[index + 1], &p->xad[index], 630 (nextindex - index) * sizeof(xad_t)); 631 632 /* insert the new entry: mark the entry NEW */ 633 xad = &p->xad[index]; 634 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 635 636 /* advance next available entry index */ 637 le16_add_cpu(&p->header.nextindex, 1); 638 639 /* Don't log it if there are no links to the file */ 640 if (!test_cflag(COMMIT_Nolink, ip)) { 641 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 642 xtlck = (struct xtlock *) & tlck->lock; 643 xtlck->lwm.offset = 644 (xtlck->lwm.offset) ? min(index, 645 (int)xtlck->lwm.offset) : index; 646 xtlck->lwm.length = 647 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 648 } 649 650 *xaddrp = xaddr; 651 652 out: 653 /* unpin the leaf page */ 654 XT_PUTPAGE(mp); 655 656 return rc; 657 } 658 659 660 /* 661 * xtSplitUp() 662 * 663 * function: 664 * split full pages as propagating insertion up the tree 665 * 666 * parameter: 667 * tid - transaction id; 668 * ip - file object; 669 * split - entry parameter descriptor; 670 * btstack - traverse stack from xtSearch() 671 * 672 * return: 673 */ 674 static int 675 xtSplitUp(tid_t tid, 676 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 677 { 678 int rc = 0; 679 struct metapage *smp; 680 xtpage_t *sp; /* split page */ 681 struct metapage *rmp; 682 s64 rbn; /* new right page block number */ 683 struct metapage *rcmp; 684 xtpage_t *rcp; /* right child page */ 685 s64 rcbn; /* right child page block number */ 686 int skip; /* index of entry of insertion */ 687 int nextindex; /* next available entry index of p */ 688 struct btframe *parent; /* parent page entry on traverse stack */ 689 xad_t *xad; 690 s64 xaddr; 691 int xlen; 692 int nsplit; /* number of pages split */ 693 struct pxdlist pxdlist; 694 pxd_t *pxd; 695 struct tlock *tlck; 696 struct xtlock *xtlck; 697 698 smp = split->mp; 699 sp = XT_PAGE(ip, smp); 700 701 /* is inode xtree root extension/inline EA area free ? */ 702 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 703 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 704 (JFS_IP(ip)->mode2 & INLINEEA)) { 705 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 706 JFS_IP(ip)->mode2 &= ~INLINEEA; 707 708 BT_MARK_DIRTY(smp, ip); 709 /* 710 * acquire a transaction lock on the leaf page; 711 * 712 * action: xad insertion/extension; 713 */ 714 715 /* if insert into middle, shift right remaining entries. */ 716 skip = split->index; 717 nextindex = le16_to_cpu(sp->header.nextindex); 718 if (skip < nextindex) 719 memmove(&sp->xad[skip + 1], &sp->xad[skip], 720 (nextindex - skip) * sizeof(xad_t)); 721 722 /* insert the new entry: mark the entry NEW */ 723 xad = &sp->xad[skip]; 724 XT_PUTENTRY(xad, split->flag, split->off, split->len, 725 split->addr); 726 727 /* advance next available entry index */ 728 le16_add_cpu(&sp->header.nextindex, 1); 729 730 /* Don't log it if there are no links to the file */ 731 if (!test_cflag(COMMIT_Nolink, ip)) { 732 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 733 xtlck = (struct xtlock *) & tlck->lock; 734 xtlck->lwm.offset = (xtlck->lwm.offset) ? 735 min(skip, (int)xtlck->lwm.offset) : skip; 736 xtlck->lwm.length = 737 le16_to_cpu(sp->header.nextindex) - 738 xtlck->lwm.offset; 739 } 740 741 return 0; 742 } 743 744 /* 745 * allocate new index blocks to cover index page split(s) 746 * 747 * allocation hint: ? 748 */ 749 if (split->pxdlist == NULL) { 750 nsplit = btstack->nsplit; 751 split->pxdlist = &pxdlist; 752 pxdlist.maxnpxd = pxdlist.npxd = 0; 753 pxd = &pxdlist.pxd[0]; 754 xlen = JFS_SBI(ip->i_sb)->nbperpage; 755 for (; nsplit > 0; nsplit--, pxd++) { 756 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 757 == 0) { 758 PXDaddress(pxd, xaddr); 759 PXDlength(pxd, xlen); 760 761 pxdlist.maxnpxd++; 762 763 continue; 764 } 765 766 /* undo allocation */ 767 768 XT_PUTPAGE(smp); 769 return rc; 770 } 771 } 772 773 /* 774 * Split leaf page <sp> into <sp> and a new right page <rp>. 775 * 776 * The split routines insert the new entry into the leaf page, 777 * and acquire txLock as appropriate. 778 * return <rp> pinned and its block number <rpbn>. 779 */ 780 rc = (sp->header.flag & BT_ROOT) ? 781 xtSplitRoot(tid, ip, split, &rmp) : 782 xtSplitPage(tid, ip, split, &rmp, &rbn); 783 784 XT_PUTPAGE(smp); 785 786 if (rc) 787 return -EIO; 788 /* 789 * propagate up the router entry for the leaf page just split 790 * 791 * insert a router entry for the new page into the parent page, 792 * propagate the insert/split up the tree by walking back the stack 793 * of (bn of parent page, index of child page entry in parent page) 794 * that were traversed during the search for the page that split. 795 * 796 * the propagation of insert/split up the tree stops if the root 797 * splits or the page inserted into doesn't have to split to hold 798 * the new entry. 799 * 800 * the parent entry for the split page remains the same, and 801 * a new entry is inserted at its right with the first key and 802 * block number of the new right page. 803 * 804 * There are a maximum of 3 pages pinned at any time: 805 * right child, left parent and right parent (when the parent splits) 806 * to keep the child page pinned while working on the parent. 807 * make sure that all pins are released at exit. 808 */ 809 while ((parent = BT_POP(btstack)) != NULL) { 810 /* parent page specified by stack frame <parent> */ 811 812 /* keep current child pages <rcp> pinned */ 813 rcmp = rmp; 814 rcbn = rbn; 815 rcp = XT_PAGE(ip, rcmp); 816 817 /* 818 * insert router entry in parent for new right child page <rp> 819 */ 820 /* get/pin the parent page <sp> */ 821 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 822 if (rc) { 823 XT_PUTPAGE(rcmp); 824 return rc; 825 } 826 827 /* 828 * The new key entry goes ONE AFTER the index of parent entry, 829 * because the split was to the right. 830 */ 831 skip = parent->index + 1; 832 833 /* 834 * split or shift right remaining entries of the parent page 835 */ 836 nextindex = le16_to_cpu(sp->header.nextindex); 837 /* 838 * parent page is full - split the parent page 839 */ 840 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 841 /* init for parent page split */ 842 split->mp = smp; 843 split->index = skip; /* index at insert */ 844 split->flag = XAD_NEW; 845 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 846 split->len = JFS_SBI(ip->i_sb)->nbperpage; 847 split->addr = rcbn; 848 849 /* unpin previous right child page */ 850 XT_PUTPAGE(rcmp); 851 852 /* The split routines insert the new entry, 853 * and acquire txLock as appropriate. 854 * return <rp> pinned and its block number <rpbn>. 855 */ 856 rc = (sp->header.flag & BT_ROOT) ? 857 xtSplitRoot(tid, ip, split, &rmp) : 858 xtSplitPage(tid, ip, split, &rmp, &rbn); 859 if (rc) { 860 XT_PUTPAGE(smp); 861 return rc; 862 } 863 864 XT_PUTPAGE(smp); 865 /* keep new child page <rp> pinned */ 866 } 867 /* 868 * parent page is not full - insert in parent page 869 */ 870 else { 871 /* 872 * insert router entry in parent for the right child 873 * page from the first entry of the right child page: 874 */ 875 /* 876 * acquire a transaction lock on the parent page; 877 * 878 * action: router xad insertion; 879 */ 880 BT_MARK_DIRTY(smp, ip); 881 882 /* 883 * if insert into middle, shift right remaining entries 884 */ 885 if (skip < nextindex) 886 memmove(&sp->xad[skip + 1], &sp->xad[skip], 887 (nextindex - 888 skip) << L2XTSLOTSIZE); 889 890 /* insert the router entry */ 891 xad = &sp->xad[skip]; 892 XT_PUTENTRY(xad, XAD_NEW, 893 offsetXAD(&rcp->xad[XTENTRYSTART]), 894 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 895 896 /* advance next available entry index. */ 897 le16_add_cpu(&sp->header.nextindex, 1); 898 899 /* Don't log it if there are no links to the file */ 900 if (!test_cflag(COMMIT_Nolink, ip)) { 901 tlck = txLock(tid, ip, smp, 902 tlckXTREE | tlckGROW); 903 xtlck = (struct xtlock *) & tlck->lock; 904 xtlck->lwm.offset = (xtlck->lwm.offset) ? 905 min(skip, (int)xtlck->lwm.offset) : skip; 906 xtlck->lwm.length = 907 le16_to_cpu(sp->header.nextindex) - 908 xtlck->lwm.offset; 909 } 910 911 /* unpin parent page */ 912 XT_PUTPAGE(smp); 913 914 /* exit propagate up */ 915 break; 916 } 917 } 918 919 /* unpin current right page */ 920 XT_PUTPAGE(rmp); 921 922 return 0; 923 } 924 925 926 /* 927 * xtSplitPage() 928 * 929 * function: 930 * split a full non-root page into 931 * original/split/left page and new right page 932 * i.e., the original/split page remains as left page. 933 * 934 * parameter: 935 * int tid, 936 * struct inode *ip, 937 * struct xtsplit *split, 938 * struct metapage **rmpp, 939 * u64 *rbnp, 940 * 941 * return: 942 * Pointer to page in which to insert or NULL on error. 943 */ 944 static int 945 xtSplitPage(tid_t tid, struct inode *ip, 946 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 947 { 948 int rc = 0; 949 struct metapage *smp; 950 xtpage_t *sp; 951 struct metapage *rmp; 952 xtpage_t *rp; /* new right page allocated */ 953 s64 rbn; /* new right page block number */ 954 struct metapage *mp; 955 xtpage_t *p; 956 s64 nextbn; 957 int skip, maxentry, middle, righthalf, n; 958 xad_t *xad; 959 struct pxdlist *pxdlist; 960 pxd_t *pxd; 961 struct tlock *tlck; 962 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 963 int quota_allocation = 0; 964 965 smp = split->mp; 966 sp = XT_PAGE(ip, smp); 967 968 INCREMENT(xtStat.split); 969 970 pxdlist = split->pxdlist; 971 pxd = &pxdlist->pxd[pxdlist->npxd]; 972 pxdlist->npxd++; 973 rbn = addressPXD(pxd); 974 975 /* Allocate blocks to quota. */ 976 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 977 if (rc) 978 goto clean_up; 979 980 quota_allocation += lengthPXD(pxd); 981 982 /* 983 * allocate the new right page for the split 984 */ 985 rmp = get_metapage(ip, rbn, PSIZE, 1); 986 if (rmp == NULL) { 987 rc = -EIO; 988 goto clean_up; 989 } 990 991 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 992 993 BT_MARK_DIRTY(rmp, ip); 994 /* 995 * action: new page; 996 */ 997 998 rp = (xtpage_t *) rmp->data; 999 rp->header.self = *pxd; 1000 rp->header.flag = sp->header.flag & BT_TYPE; 1001 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1002 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1003 1004 BT_MARK_DIRTY(smp, ip); 1005 /* Don't log it if there are no links to the file */ 1006 if (!test_cflag(COMMIT_Nolink, ip)) { 1007 /* 1008 * acquire a transaction lock on the new right page; 1009 */ 1010 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1011 rxtlck = (struct xtlock *) & tlck->lock; 1012 rxtlck->lwm.offset = XTENTRYSTART; 1013 /* 1014 * acquire a transaction lock on the split page 1015 */ 1016 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1017 sxtlck = (struct xtlock *) & tlck->lock; 1018 } 1019 1020 /* 1021 * initialize/update sibling pointers of <sp> and <rp> 1022 */ 1023 nextbn = le64_to_cpu(sp->header.next); 1024 rp->header.next = cpu_to_le64(nextbn); 1025 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1026 sp->header.next = cpu_to_le64(rbn); 1027 1028 skip = split->index; 1029 1030 /* 1031 * sequential append at tail (after last entry of last page) 1032 * 1033 * if splitting the last page on a level because of appending 1034 * a entry to it (skip is maxentry), it's likely that the access is 1035 * sequential. adding an empty page on the side of the level is less 1036 * work and can push the fill factor much higher than normal. 1037 * if we're wrong it's no big deal - we will do the split the right 1038 * way next time. 1039 * (it may look like it's equally easy to do a similar hack for 1040 * reverse sorted data, that is, split the tree left, but it's not. 1041 * Be my guest.) 1042 */ 1043 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1044 /* 1045 * acquire a transaction lock on the new/right page; 1046 * 1047 * action: xad insertion; 1048 */ 1049 /* insert entry at the first entry of the new right page */ 1050 xad = &rp->xad[XTENTRYSTART]; 1051 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1052 split->addr); 1053 1054 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1055 1056 if (!test_cflag(COMMIT_Nolink, ip)) { 1057 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1058 rxtlck->lwm.length = 1; 1059 } 1060 1061 *rmpp = rmp; 1062 *rbnp = rbn; 1063 1064 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1065 return 0; 1066 } 1067 1068 /* 1069 * non-sequential insert (at possibly middle page) 1070 */ 1071 1072 /* 1073 * update previous pointer of old next/right page of <sp> 1074 */ 1075 if (nextbn != 0) { 1076 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1077 if (rc) { 1078 XT_PUTPAGE(rmp); 1079 goto clean_up; 1080 } 1081 1082 BT_MARK_DIRTY(mp, ip); 1083 /* 1084 * acquire a transaction lock on the next page; 1085 * 1086 * action:sibling pointer update; 1087 */ 1088 if (!test_cflag(COMMIT_Nolink, ip)) 1089 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1090 1091 p->header.prev = cpu_to_le64(rbn); 1092 1093 /* sibling page may have been updated previously, or 1094 * it may be updated later; 1095 */ 1096 1097 XT_PUTPAGE(mp); 1098 } 1099 1100 /* 1101 * split the data between the split and new/right pages 1102 */ 1103 maxentry = le16_to_cpu(sp->header.maxentry); 1104 middle = maxentry >> 1; 1105 righthalf = maxentry - middle; 1106 1107 /* 1108 * skip index in old split/left page - insert into left page: 1109 */ 1110 if (skip <= middle) { 1111 /* move right half of split page to the new right page */ 1112 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1113 righthalf << L2XTSLOTSIZE); 1114 1115 /* shift right tail of left half to make room for new entry */ 1116 if (skip < middle) 1117 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1118 (middle - skip) << L2XTSLOTSIZE); 1119 1120 /* insert new entry */ 1121 xad = &sp->xad[skip]; 1122 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1123 split->addr); 1124 1125 /* update page header */ 1126 sp->header.nextindex = cpu_to_le16(middle + 1); 1127 if (!test_cflag(COMMIT_Nolink, ip)) { 1128 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1129 min(skip, (int)sxtlck->lwm.offset) : skip; 1130 } 1131 1132 rp->header.nextindex = 1133 cpu_to_le16(XTENTRYSTART + righthalf); 1134 } 1135 /* 1136 * skip index in new right page - insert into right page: 1137 */ 1138 else { 1139 /* move left head of right half to right page */ 1140 n = skip - middle; 1141 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1142 n << L2XTSLOTSIZE); 1143 1144 /* insert new entry */ 1145 n += XTENTRYSTART; 1146 xad = &rp->xad[n]; 1147 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1148 split->addr); 1149 1150 /* move right tail of right half to right page */ 1151 if (skip < maxentry) 1152 memmove(&rp->xad[n + 1], &sp->xad[skip], 1153 (maxentry - skip) << L2XTSLOTSIZE); 1154 1155 /* update page header */ 1156 sp->header.nextindex = cpu_to_le16(middle); 1157 if (!test_cflag(COMMIT_Nolink, ip)) { 1158 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1159 min(middle, (int)sxtlck->lwm.offset) : middle; 1160 } 1161 1162 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1163 righthalf + 1); 1164 } 1165 1166 if (!test_cflag(COMMIT_Nolink, ip)) { 1167 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1168 sxtlck->lwm.offset; 1169 1170 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1171 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1172 XTENTRYSTART; 1173 } 1174 1175 *rmpp = rmp; 1176 *rbnp = rbn; 1177 1178 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1179 return rc; 1180 1181 clean_up: 1182 1183 /* Rollback quota allocation. */ 1184 if (quota_allocation) 1185 dquot_free_block(ip, quota_allocation); 1186 1187 return (rc); 1188 } 1189 1190 1191 /* 1192 * xtSplitRoot() 1193 * 1194 * function: 1195 * split the full root page into original/root/split page and new 1196 * right page 1197 * i.e., root remains fixed in tree anchor (inode) and the root is 1198 * copied to a single new right child page since root page << 1199 * non-root page, and the split root page contains a single entry 1200 * for the new right child page. 1201 * 1202 * parameter: 1203 * int tid, 1204 * struct inode *ip, 1205 * struct xtsplit *split, 1206 * struct metapage **rmpp) 1207 * 1208 * return: 1209 * Pointer to page in which to insert or NULL on error. 1210 */ 1211 static int 1212 xtSplitRoot(tid_t tid, 1213 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1214 { 1215 xtpage_t *sp; 1216 struct metapage *rmp; 1217 xtpage_t *rp; 1218 s64 rbn; 1219 int skip, nextindex; 1220 xad_t *xad; 1221 pxd_t *pxd; 1222 struct pxdlist *pxdlist; 1223 struct tlock *tlck; 1224 struct xtlock *xtlck; 1225 int rc; 1226 1227 sp = &JFS_IP(ip)->i_xtroot; 1228 1229 INCREMENT(xtStat.split); 1230 1231 /* 1232 * allocate a single (right) child page 1233 */ 1234 pxdlist = split->pxdlist; 1235 pxd = &pxdlist->pxd[pxdlist->npxd]; 1236 pxdlist->npxd++; 1237 rbn = addressPXD(pxd); 1238 rmp = get_metapage(ip, rbn, PSIZE, 1); 1239 if (rmp == NULL) 1240 return -EIO; 1241 1242 /* Allocate blocks to quota. */ 1243 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1244 if (rc) { 1245 release_metapage(rmp); 1246 return rc; 1247 } 1248 1249 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1250 1251 /* 1252 * acquire a transaction lock on the new right page; 1253 * 1254 * action: new page; 1255 */ 1256 BT_MARK_DIRTY(rmp, ip); 1257 1258 rp = (xtpage_t *) rmp->data; 1259 rp->header.flag = 1260 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1261 rp->header.self = *pxd; 1262 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1263 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1264 1265 /* initialize sibling pointers */ 1266 rp->header.next = 0; 1267 rp->header.prev = 0; 1268 1269 /* 1270 * copy the in-line root page into new right page extent 1271 */ 1272 nextindex = le16_to_cpu(sp->header.maxentry); 1273 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1274 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1275 1276 /* 1277 * insert the new entry into the new right/child page 1278 * (skip index in the new right page will not change) 1279 */ 1280 skip = split->index; 1281 /* if insert into middle, shift right remaining entries */ 1282 if (skip != nextindex) 1283 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1284 (nextindex - skip) * sizeof(xad_t)); 1285 1286 xad = &rp->xad[skip]; 1287 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1288 1289 /* update page header */ 1290 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1291 1292 if (!test_cflag(COMMIT_Nolink, ip)) { 1293 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1294 xtlck = (struct xtlock *) & tlck->lock; 1295 xtlck->lwm.offset = XTENTRYSTART; 1296 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1297 XTENTRYSTART; 1298 } 1299 1300 /* 1301 * reset the root 1302 * 1303 * init root with the single entry for the new right page 1304 * set the 1st entry offset to 0, which force the left-most key 1305 * at any level of the tree to be less than any search key. 1306 */ 1307 /* 1308 * acquire a transaction lock on the root page (in-memory inode); 1309 * 1310 * action: root split; 1311 */ 1312 BT_MARK_DIRTY(split->mp, ip); 1313 1314 xad = &sp->xad[XTENTRYSTART]; 1315 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1316 1317 /* update page header of root */ 1318 sp->header.flag &= ~BT_LEAF; 1319 sp->header.flag |= BT_INTERNAL; 1320 1321 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1322 1323 if (!test_cflag(COMMIT_Nolink, ip)) { 1324 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1325 xtlck = (struct xtlock *) & tlck->lock; 1326 xtlck->lwm.offset = XTENTRYSTART; 1327 xtlck->lwm.length = 1; 1328 } 1329 1330 *rmpp = rmp; 1331 1332 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1333 return 0; 1334 } 1335 1336 1337 /* 1338 * xtExtend() 1339 * 1340 * function: extend in-place; 1341 * 1342 * note: existing extent may or may not have been committed. 1343 * caller is responsible for pager buffer cache update, and 1344 * working block allocation map update; 1345 * update pmap: alloc whole extended extent; 1346 */ 1347 int xtExtend(tid_t tid, /* transaction id */ 1348 struct inode *ip, s64 xoff, /* delta extent offset */ 1349 s32 xlen, /* delta extent length */ 1350 int flag) 1351 { 1352 int rc = 0; 1353 int cmp; 1354 struct metapage *mp; /* meta-page buffer */ 1355 xtpage_t *p; /* base B+-tree index page */ 1356 s64 bn; 1357 int index, nextindex, len; 1358 struct btstack btstack; /* traverse stack */ 1359 struct xtsplit split; /* split information */ 1360 xad_t *xad; 1361 s64 xaddr; 1362 struct tlock *tlck; 1363 struct xtlock *xtlck = NULL; 1364 1365 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1366 1367 /* there must exist extent to be extended */ 1368 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1369 return rc; 1370 1371 /* retrieve search result */ 1372 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1373 1374 if (cmp != 0) { 1375 XT_PUTPAGE(mp); 1376 jfs_error(ip->i_sb, "xtSearch did not find extent\n"); 1377 return -EIO; 1378 } 1379 1380 /* extension must be contiguous */ 1381 xad = &p->xad[index]; 1382 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1383 XT_PUTPAGE(mp); 1384 jfs_error(ip->i_sb, "extension is not contiguous\n"); 1385 return -EIO; 1386 } 1387 1388 /* 1389 * acquire a transaction lock on the leaf page; 1390 * 1391 * action: xad insertion/extension; 1392 */ 1393 BT_MARK_DIRTY(mp, ip); 1394 if (!test_cflag(COMMIT_Nolink, ip)) { 1395 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1396 xtlck = (struct xtlock *) & tlck->lock; 1397 } 1398 1399 /* extend will overflow extent ? */ 1400 xlen = lengthXAD(xad) + xlen; 1401 if ((len = xlen - MAXXLEN) <= 0) 1402 goto extendOld; 1403 1404 /* 1405 * extent overflow: insert entry for new extent 1406 */ 1407 //insertNew: 1408 xoff = offsetXAD(xad) + MAXXLEN; 1409 xaddr = addressXAD(xad) + MAXXLEN; 1410 nextindex = le16_to_cpu(p->header.nextindex); 1411 1412 /* 1413 * if the leaf page is full, insert the new entry and 1414 * propagate up the router entry for the new page from split 1415 * 1416 * The xtSplitUp() will insert the entry and unpin the leaf page. 1417 */ 1418 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1419 /* xtSpliUp() unpins leaf pages */ 1420 split.mp = mp; 1421 split.index = index + 1; 1422 split.flag = XAD_NEW; 1423 split.off = xoff; /* split offset */ 1424 split.len = len; 1425 split.addr = xaddr; 1426 split.pxdlist = NULL; 1427 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1428 return rc; 1429 1430 /* get back old page */ 1431 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1432 if (rc) 1433 return rc; 1434 /* 1435 * if leaf root has been split, original root has been 1436 * copied to new child page, i.e., original entry now 1437 * resides on the new child page; 1438 */ 1439 if (p->header.flag & BT_INTERNAL) { 1440 ASSERT(p->header.nextindex == 1441 cpu_to_le16(XTENTRYSTART + 1)); 1442 xad = &p->xad[XTENTRYSTART]; 1443 bn = addressXAD(xad); 1444 XT_PUTPAGE(mp); 1445 1446 /* get new child page */ 1447 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1448 if (rc) 1449 return rc; 1450 1451 BT_MARK_DIRTY(mp, ip); 1452 if (!test_cflag(COMMIT_Nolink, ip)) { 1453 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1454 xtlck = (struct xtlock *) & tlck->lock; 1455 } 1456 } 1457 } 1458 /* 1459 * insert the new entry into the leaf page 1460 */ 1461 else { 1462 /* insert the new entry: mark the entry NEW */ 1463 xad = &p->xad[index + 1]; 1464 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1465 1466 /* advance next available entry index */ 1467 le16_add_cpu(&p->header.nextindex, 1); 1468 } 1469 1470 /* get back old entry */ 1471 xad = &p->xad[index]; 1472 xlen = MAXXLEN; 1473 1474 /* 1475 * extend old extent 1476 */ 1477 extendOld: 1478 XADlength(xad, xlen); 1479 if (!(xad->flag & XAD_NEW)) 1480 xad->flag |= XAD_EXTENDED; 1481 1482 if (!test_cflag(COMMIT_Nolink, ip)) { 1483 xtlck->lwm.offset = 1484 (xtlck->lwm.offset) ? min(index, 1485 (int)xtlck->lwm.offset) : index; 1486 xtlck->lwm.length = 1487 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1488 } 1489 1490 /* unpin the leaf page */ 1491 XT_PUTPAGE(mp); 1492 1493 return rc; 1494 } 1495 1496 #ifdef _NOTYET 1497 /* 1498 * xtTailgate() 1499 * 1500 * function: split existing 'tail' extent 1501 * (split offset >= start offset of tail extent), and 1502 * relocate and extend the split tail half; 1503 * 1504 * note: existing extent may or may not have been committed. 1505 * caller is responsible for pager buffer cache update, and 1506 * working block allocation map update; 1507 * update pmap: free old split tail extent, alloc new extent; 1508 */ 1509 int xtTailgate(tid_t tid, /* transaction id */ 1510 struct inode *ip, s64 xoff, /* split/new extent offset */ 1511 s32 xlen, /* new extent length */ 1512 s64 xaddr, /* new extent address */ 1513 int flag) 1514 { 1515 int rc = 0; 1516 int cmp; 1517 struct metapage *mp; /* meta-page buffer */ 1518 xtpage_t *p; /* base B+-tree index page */ 1519 s64 bn; 1520 int index, nextindex, llen, rlen; 1521 struct btstack btstack; /* traverse stack */ 1522 struct xtsplit split; /* split information */ 1523 xad_t *xad; 1524 struct tlock *tlck; 1525 struct xtlock *xtlck = 0; 1526 struct tlock *mtlck; 1527 struct maplock *pxdlock; 1528 1529 /* 1530 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1531 (ulong)xoff, xlen, (ulong)xaddr); 1532 */ 1533 1534 /* there must exist extent to be tailgated */ 1535 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1536 return rc; 1537 1538 /* retrieve search result */ 1539 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1540 1541 if (cmp != 0) { 1542 XT_PUTPAGE(mp); 1543 jfs_error(ip->i_sb, "couldn't find extent\n"); 1544 return -EIO; 1545 } 1546 1547 /* entry found must be last entry */ 1548 nextindex = le16_to_cpu(p->header.nextindex); 1549 if (index != nextindex - 1) { 1550 XT_PUTPAGE(mp); 1551 jfs_error(ip->i_sb, "the entry found is not the last entry\n"); 1552 return -EIO; 1553 } 1554 1555 BT_MARK_DIRTY(mp, ip); 1556 /* 1557 * acquire tlock of the leaf page containing original entry 1558 */ 1559 if (!test_cflag(COMMIT_Nolink, ip)) { 1560 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1561 xtlck = (struct xtlock *) & tlck->lock; 1562 } 1563 1564 /* completely replace extent ? */ 1565 xad = &p->xad[index]; 1566 /* 1567 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1568 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1569 */ 1570 if ((llen = xoff - offsetXAD(xad)) == 0) 1571 goto updateOld; 1572 1573 /* 1574 * partially replace extent: insert entry for new extent 1575 */ 1576 //insertNew: 1577 /* 1578 * if the leaf page is full, insert the new entry and 1579 * propagate up the router entry for the new page from split 1580 * 1581 * The xtSplitUp() will insert the entry and unpin the leaf page. 1582 */ 1583 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1584 /* xtSpliUp() unpins leaf pages */ 1585 split.mp = mp; 1586 split.index = index + 1; 1587 split.flag = XAD_NEW; 1588 split.off = xoff; /* split offset */ 1589 split.len = xlen; 1590 split.addr = xaddr; 1591 split.pxdlist = NULL; 1592 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1593 return rc; 1594 1595 /* get back old page */ 1596 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1597 if (rc) 1598 return rc; 1599 /* 1600 * if leaf root has been split, original root has been 1601 * copied to new child page, i.e., original entry now 1602 * resides on the new child page; 1603 */ 1604 if (p->header.flag & BT_INTERNAL) { 1605 ASSERT(p->header.nextindex == 1606 cpu_to_le16(XTENTRYSTART + 1)); 1607 xad = &p->xad[XTENTRYSTART]; 1608 bn = addressXAD(xad); 1609 XT_PUTPAGE(mp); 1610 1611 /* get new child page */ 1612 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1613 if (rc) 1614 return rc; 1615 1616 BT_MARK_DIRTY(mp, ip); 1617 if (!test_cflag(COMMIT_Nolink, ip)) { 1618 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1619 xtlck = (struct xtlock *) & tlck->lock; 1620 } 1621 } 1622 } 1623 /* 1624 * insert the new entry into the leaf page 1625 */ 1626 else { 1627 /* insert the new entry: mark the entry NEW */ 1628 xad = &p->xad[index + 1]; 1629 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1630 1631 /* advance next available entry index */ 1632 le16_add_cpu(&p->header.nextindex, 1); 1633 } 1634 1635 /* get back old XAD */ 1636 xad = &p->xad[index]; 1637 1638 /* 1639 * truncate/relocate old extent at split offset 1640 */ 1641 updateOld: 1642 /* update dmap for old/committed/truncated extent */ 1643 rlen = lengthXAD(xad) - llen; 1644 if (!(xad->flag & XAD_NEW)) { 1645 /* free from PWMAP at commit */ 1646 if (!test_cflag(COMMIT_Nolink, ip)) { 1647 mtlck = txMaplock(tid, ip, tlckMAP); 1648 pxdlock = (struct maplock *) & mtlck->lock; 1649 pxdlock->flag = mlckFREEPXD; 1650 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1651 PXDlength(&pxdlock->pxd, rlen); 1652 pxdlock->index = 1; 1653 } 1654 } else 1655 /* free from WMAP */ 1656 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1657 1658 if (llen) 1659 /* truncate */ 1660 XADlength(xad, llen); 1661 else 1662 /* replace */ 1663 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1664 1665 if (!test_cflag(COMMIT_Nolink, ip)) { 1666 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1667 min(index, (int)xtlck->lwm.offset) : index; 1668 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1669 xtlck->lwm.offset; 1670 } 1671 1672 /* unpin the leaf page */ 1673 XT_PUTPAGE(mp); 1674 1675 return rc; 1676 } 1677 #endif /* _NOTYET */ 1678 1679 /* 1680 * xtUpdate() 1681 * 1682 * function: update XAD; 1683 * 1684 * update extent for allocated_but_not_recorded or 1685 * compressed extent; 1686 * 1687 * parameter: 1688 * nxad - new XAD; 1689 * logical extent of the specified XAD must be completely 1690 * contained by an existing XAD; 1691 */ 1692 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1693 { /* new XAD */ 1694 int rc = 0; 1695 int cmp; 1696 struct metapage *mp; /* meta-page buffer */ 1697 xtpage_t *p; /* base B+-tree index page */ 1698 s64 bn; 1699 int index0, index, newindex, nextindex; 1700 struct btstack btstack; /* traverse stack */ 1701 struct xtsplit split; /* split information */ 1702 xad_t *xad, *lxad, *rxad; 1703 int xflag; 1704 s64 nxoff, xoff; 1705 int nxlen, xlen, lxlen, rxlen; 1706 s64 nxaddr, xaddr; 1707 struct tlock *tlck; 1708 struct xtlock *xtlck = NULL; 1709 int newpage = 0; 1710 1711 /* there must exist extent to be tailgated */ 1712 nxoff = offsetXAD(nxad); 1713 nxlen = lengthXAD(nxad); 1714 nxaddr = addressXAD(nxad); 1715 1716 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1717 return rc; 1718 1719 /* retrieve search result */ 1720 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1721 1722 if (cmp != 0) { 1723 XT_PUTPAGE(mp); 1724 jfs_error(ip->i_sb, "Could not find extent\n"); 1725 return -EIO; 1726 } 1727 1728 BT_MARK_DIRTY(mp, ip); 1729 /* 1730 * acquire tlock of the leaf page containing original entry 1731 */ 1732 if (!test_cflag(COMMIT_Nolink, ip)) { 1733 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1734 xtlck = (struct xtlock *) & tlck->lock; 1735 } 1736 1737 xad = &p->xad[index0]; 1738 xflag = xad->flag; 1739 xoff = offsetXAD(xad); 1740 xlen = lengthXAD(xad); 1741 xaddr = addressXAD(xad); 1742 1743 /* nXAD must be completely contained within XAD */ 1744 if ((xoff > nxoff) || 1745 (nxoff + nxlen > xoff + xlen)) { 1746 XT_PUTPAGE(mp); 1747 jfs_error(ip->i_sb, 1748 "nXAD in not completely contained within XAD\n"); 1749 return -EIO; 1750 } 1751 1752 index = index0; 1753 newindex = index + 1; 1754 nextindex = le16_to_cpu(p->header.nextindex); 1755 1756 #ifdef _JFS_WIP_NOCOALESCE 1757 if (xoff < nxoff) 1758 goto updateRight; 1759 1760 /* 1761 * replace XAD with nXAD 1762 */ 1763 replace: /* (nxoff == xoff) */ 1764 if (nxlen == xlen) { 1765 /* replace XAD with nXAD:recorded */ 1766 *xad = *nxad; 1767 xad->flag = xflag & ~XAD_NOTRECORDED; 1768 1769 goto out; 1770 } else /* (nxlen < xlen) */ 1771 goto updateLeft; 1772 #endif /* _JFS_WIP_NOCOALESCE */ 1773 1774 /* #ifdef _JFS_WIP_COALESCE */ 1775 if (xoff < nxoff) 1776 goto coalesceRight; 1777 1778 /* 1779 * coalesce with left XAD 1780 */ 1781 //coalesceLeft: /* (xoff == nxoff) */ 1782 /* is XAD first entry of page ? */ 1783 if (index == XTENTRYSTART) 1784 goto replace; 1785 1786 /* is nXAD logically and physically contiguous with lXAD ? */ 1787 lxad = &p->xad[index - 1]; 1788 lxlen = lengthXAD(lxad); 1789 if (!(lxad->flag & XAD_NOTRECORDED) && 1790 (nxoff == offsetXAD(lxad) + lxlen) && 1791 (nxaddr == addressXAD(lxad) + lxlen) && 1792 (lxlen + nxlen < MAXXLEN)) { 1793 /* extend right lXAD */ 1794 index0 = index - 1; 1795 XADlength(lxad, lxlen + nxlen); 1796 1797 /* If we just merged two extents together, need to make sure the 1798 * right extent gets logged. If the left one is marked XAD_NEW, 1799 * then we know it will be logged. Otherwise, mark as 1800 * XAD_EXTENDED 1801 */ 1802 if (!(lxad->flag & XAD_NEW)) 1803 lxad->flag |= XAD_EXTENDED; 1804 1805 if (xlen > nxlen) { 1806 /* truncate XAD */ 1807 XADoffset(xad, xoff + nxlen); 1808 XADlength(xad, xlen - nxlen); 1809 XADaddress(xad, xaddr + nxlen); 1810 goto out; 1811 } else { /* (xlen == nxlen) */ 1812 1813 /* remove XAD */ 1814 if (index < nextindex - 1) 1815 memmove(&p->xad[index], &p->xad[index + 1], 1816 (nextindex - index - 1817 1) << L2XTSLOTSIZE); 1818 1819 p->header.nextindex = 1820 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1821 1); 1822 1823 index = index0; 1824 newindex = index + 1; 1825 nextindex = le16_to_cpu(p->header.nextindex); 1826 xoff = nxoff = offsetXAD(lxad); 1827 xlen = nxlen = lxlen + nxlen; 1828 xaddr = nxaddr = addressXAD(lxad); 1829 goto coalesceRight; 1830 } 1831 } 1832 1833 /* 1834 * replace XAD with nXAD 1835 */ 1836 replace: /* (nxoff == xoff) */ 1837 if (nxlen == xlen) { 1838 /* replace XAD with nXAD:recorded */ 1839 *xad = *nxad; 1840 xad->flag = xflag & ~XAD_NOTRECORDED; 1841 1842 goto coalesceRight; 1843 } else /* (nxlen < xlen) */ 1844 goto updateLeft; 1845 1846 /* 1847 * coalesce with right XAD 1848 */ 1849 coalesceRight: /* (xoff <= nxoff) */ 1850 /* is XAD last entry of page ? */ 1851 if (newindex == nextindex) { 1852 if (xoff == nxoff) 1853 goto out; 1854 goto updateRight; 1855 } 1856 1857 /* is nXAD logically and physically contiguous with rXAD ? */ 1858 rxad = &p->xad[index + 1]; 1859 rxlen = lengthXAD(rxad); 1860 if (!(rxad->flag & XAD_NOTRECORDED) && 1861 (nxoff + nxlen == offsetXAD(rxad)) && 1862 (nxaddr + nxlen == addressXAD(rxad)) && 1863 (rxlen + nxlen < MAXXLEN)) { 1864 /* extend left rXAD */ 1865 XADoffset(rxad, nxoff); 1866 XADlength(rxad, rxlen + nxlen); 1867 XADaddress(rxad, nxaddr); 1868 1869 /* If we just merged two extents together, need to make sure 1870 * the left extent gets logged. If the right one is marked 1871 * XAD_NEW, then we know it will be logged. Otherwise, mark as 1872 * XAD_EXTENDED 1873 */ 1874 if (!(rxad->flag & XAD_NEW)) 1875 rxad->flag |= XAD_EXTENDED; 1876 1877 if (xlen > nxlen) 1878 /* truncate XAD */ 1879 XADlength(xad, xlen - nxlen); 1880 else { /* (xlen == nxlen) */ 1881 1882 /* remove XAD */ 1883 memmove(&p->xad[index], &p->xad[index + 1], 1884 (nextindex - index - 1) << L2XTSLOTSIZE); 1885 1886 p->header.nextindex = 1887 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1888 1); 1889 } 1890 1891 goto out; 1892 } else if (xoff == nxoff) 1893 goto out; 1894 1895 if (xoff >= nxoff) { 1896 XT_PUTPAGE(mp); 1897 jfs_error(ip->i_sb, "xoff >= nxoff\n"); 1898 return -EIO; 1899 } 1900 /* #endif _JFS_WIP_COALESCE */ 1901 1902 /* 1903 * split XAD into (lXAD, nXAD): 1904 * 1905 * |---nXAD---> 1906 * --|----------XAD----------|-- 1907 * |-lXAD-| 1908 */ 1909 updateRight: /* (xoff < nxoff) */ 1910 /* truncate old XAD as lXAD:not_recorded */ 1911 xad = &p->xad[index]; 1912 XADlength(xad, nxoff - xoff); 1913 1914 /* insert nXAD:recorded */ 1915 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1916 1917 /* xtSpliUp() unpins leaf pages */ 1918 split.mp = mp; 1919 split.index = newindex; 1920 split.flag = xflag & ~XAD_NOTRECORDED; 1921 split.off = nxoff; 1922 split.len = nxlen; 1923 split.addr = nxaddr; 1924 split.pxdlist = NULL; 1925 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1926 return rc; 1927 1928 /* get back old page */ 1929 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1930 if (rc) 1931 return rc; 1932 /* 1933 * if leaf root has been split, original root has been 1934 * copied to new child page, i.e., original entry now 1935 * resides on the new child page; 1936 */ 1937 if (p->header.flag & BT_INTERNAL) { 1938 ASSERT(p->header.nextindex == 1939 cpu_to_le16(XTENTRYSTART + 1)); 1940 xad = &p->xad[XTENTRYSTART]; 1941 bn = addressXAD(xad); 1942 XT_PUTPAGE(mp); 1943 1944 /* get new child page */ 1945 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1946 if (rc) 1947 return rc; 1948 1949 BT_MARK_DIRTY(mp, ip); 1950 if (!test_cflag(COMMIT_Nolink, ip)) { 1951 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1952 xtlck = (struct xtlock *) & tlck->lock; 1953 } 1954 } else { 1955 /* is nXAD on new page ? */ 1956 if (newindex > 1957 (le16_to_cpu(p->header.maxentry) >> 1)) { 1958 newindex = 1959 newindex - 1960 le16_to_cpu(p->header.nextindex) + 1961 XTENTRYSTART; 1962 newpage = 1; 1963 } 1964 } 1965 } else { 1966 /* if insert into middle, shift right remaining entries */ 1967 if (newindex < nextindex) 1968 memmove(&p->xad[newindex + 1], &p->xad[newindex], 1969 (nextindex - newindex) << L2XTSLOTSIZE); 1970 1971 /* insert the entry */ 1972 xad = &p->xad[newindex]; 1973 *xad = *nxad; 1974 xad->flag = xflag & ~XAD_NOTRECORDED; 1975 1976 /* advance next available entry index. */ 1977 p->header.nextindex = 1978 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1979 } 1980 1981 /* 1982 * does nXAD force 3-way split ? 1983 * 1984 * |---nXAD--->| 1985 * --|----------XAD-------------|-- 1986 * |-lXAD-| |-rXAD -| 1987 */ 1988 if (nxoff + nxlen == xoff + xlen) 1989 goto out; 1990 1991 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 1992 if (newpage) { 1993 /* close out old page */ 1994 if (!test_cflag(COMMIT_Nolink, ip)) { 1995 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1996 min(index0, (int)xtlck->lwm.offset) : index0; 1997 xtlck->lwm.length = 1998 le16_to_cpu(p->header.nextindex) - 1999 xtlck->lwm.offset; 2000 } 2001 2002 bn = le64_to_cpu(p->header.next); 2003 XT_PUTPAGE(mp); 2004 2005 /* get new right page */ 2006 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2007 if (rc) 2008 return rc; 2009 2010 BT_MARK_DIRTY(mp, ip); 2011 if (!test_cflag(COMMIT_Nolink, ip)) { 2012 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2013 xtlck = (struct xtlock *) & tlck->lock; 2014 } 2015 2016 index0 = index = newindex; 2017 } else 2018 index++; 2019 2020 newindex = index + 1; 2021 nextindex = le16_to_cpu(p->header.nextindex); 2022 xlen = xlen - (nxoff - xoff); 2023 xoff = nxoff; 2024 xaddr = nxaddr; 2025 2026 /* recompute split pages */ 2027 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2028 XT_PUTPAGE(mp); 2029 2030 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2031 return rc; 2032 2033 /* retrieve search result */ 2034 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2035 2036 if (cmp != 0) { 2037 XT_PUTPAGE(mp); 2038 jfs_error(ip->i_sb, "xtSearch failed\n"); 2039 return -EIO; 2040 } 2041 2042 if (index0 != index) { 2043 XT_PUTPAGE(mp); 2044 jfs_error(ip->i_sb, "unexpected value of index\n"); 2045 return -EIO; 2046 } 2047 } 2048 2049 /* 2050 * split XAD into (nXAD, rXAD) 2051 * 2052 * ---nXAD---| 2053 * --|----------XAD----------|-- 2054 * |-rXAD-| 2055 */ 2056 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2057 /* update old XAD with nXAD:recorded */ 2058 xad = &p->xad[index]; 2059 *xad = *nxad; 2060 xad->flag = xflag & ~XAD_NOTRECORDED; 2061 2062 /* insert rXAD:not_recorded */ 2063 xoff = xoff + nxlen; 2064 xlen = xlen - nxlen; 2065 xaddr = xaddr + nxlen; 2066 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2067 /* 2068 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2069 */ 2070 /* xtSpliUp() unpins leaf pages */ 2071 split.mp = mp; 2072 split.index = newindex; 2073 split.flag = xflag; 2074 split.off = xoff; 2075 split.len = xlen; 2076 split.addr = xaddr; 2077 split.pxdlist = NULL; 2078 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2079 return rc; 2080 2081 /* get back old page */ 2082 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2083 if (rc) 2084 return rc; 2085 2086 /* 2087 * if leaf root has been split, original root has been 2088 * copied to new child page, i.e., original entry now 2089 * resides on the new child page; 2090 */ 2091 if (p->header.flag & BT_INTERNAL) { 2092 ASSERT(p->header.nextindex == 2093 cpu_to_le16(XTENTRYSTART + 1)); 2094 xad = &p->xad[XTENTRYSTART]; 2095 bn = addressXAD(xad); 2096 XT_PUTPAGE(mp); 2097 2098 /* get new child page */ 2099 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2100 if (rc) 2101 return rc; 2102 2103 BT_MARK_DIRTY(mp, ip); 2104 if (!test_cflag(COMMIT_Nolink, ip)) { 2105 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2106 xtlck = (struct xtlock *) & tlck->lock; 2107 } 2108 } 2109 } else { 2110 /* if insert into middle, shift right remaining entries */ 2111 if (newindex < nextindex) 2112 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2113 (nextindex - newindex) << L2XTSLOTSIZE); 2114 2115 /* insert the entry */ 2116 xad = &p->xad[newindex]; 2117 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2118 2119 /* advance next available entry index. */ 2120 p->header.nextindex = 2121 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2122 } 2123 2124 out: 2125 if (!test_cflag(COMMIT_Nolink, ip)) { 2126 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2127 min(index0, (int)xtlck->lwm.offset) : index0; 2128 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2129 xtlck->lwm.offset; 2130 } 2131 2132 /* unpin the leaf page */ 2133 XT_PUTPAGE(mp); 2134 2135 return rc; 2136 } 2137 2138 2139 /* 2140 * xtAppend() 2141 * 2142 * function: grow in append mode from contiguous region specified ; 2143 * 2144 * parameter: 2145 * tid - transaction id; 2146 * ip - file object; 2147 * xflag - extent flag: 2148 * xoff - extent offset; 2149 * maxblocks - max extent length; 2150 * xlen - extent length (in/out); 2151 * xaddrp - extent address pointer (in/out): 2152 * flag - 2153 * 2154 * return: 2155 */ 2156 int xtAppend(tid_t tid, /* transaction id */ 2157 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2158 s32 * xlenp, /* (in/out) */ 2159 s64 * xaddrp, /* (in/out) */ 2160 int flag) 2161 { 2162 int rc = 0; 2163 struct metapage *mp; /* meta-page buffer */ 2164 xtpage_t *p; /* base B+-tree index page */ 2165 s64 bn, xaddr; 2166 int index, nextindex; 2167 struct btstack btstack; /* traverse stack */ 2168 struct xtsplit split; /* split information */ 2169 xad_t *xad; 2170 int cmp; 2171 struct tlock *tlck; 2172 struct xtlock *xtlck; 2173 int nsplit, nblocks, xlen; 2174 struct pxdlist pxdlist; 2175 pxd_t *pxd; 2176 s64 next; 2177 2178 xaddr = *xaddrp; 2179 xlen = *xlenp; 2180 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2181 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2182 2183 /* 2184 * search for the entry location at which to insert: 2185 * 2186 * xtFastSearch() and xtSearch() both returns (leaf page 2187 * pinned, index at which to insert). 2188 * n.b. xtSearch() may return index of maxentry of 2189 * the full page. 2190 */ 2191 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2192 return rc; 2193 2194 /* retrieve search result */ 2195 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2196 2197 if (cmp == 0) { 2198 rc = -EEXIST; 2199 goto out; 2200 } 2201 2202 if (next) 2203 xlen = min(xlen, (int)(next - xoff)); 2204 //insert: 2205 /* 2206 * insert entry for new extent 2207 */ 2208 xflag |= XAD_NEW; 2209 2210 /* 2211 * if the leaf page is full, split the page and 2212 * propagate up the router entry for the new page from split 2213 * 2214 * The xtSplitUp() will insert the entry and unpin the leaf page. 2215 */ 2216 nextindex = le16_to_cpu(p->header.nextindex); 2217 if (nextindex < le16_to_cpu(p->header.maxentry)) 2218 goto insertLeaf; 2219 2220 /* 2221 * allocate new index blocks to cover index page split(s) 2222 */ 2223 nsplit = btstack.nsplit; 2224 split.pxdlist = &pxdlist; 2225 pxdlist.maxnpxd = pxdlist.npxd = 0; 2226 pxd = &pxdlist.pxd[0]; 2227 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2228 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2229 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2230 PXDaddress(pxd, xaddr); 2231 PXDlength(pxd, nblocks); 2232 2233 pxdlist.maxnpxd++; 2234 2235 continue; 2236 } 2237 2238 /* undo allocation */ 2239 2240 goto out; 2241 } 2242 2243 xlen = min(xlen, maxblocks); 2244 2245 /* 2246 * allocate data extent requested 2247 */ 2248 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2249 goto out; 2250 2251 split.mp = mp; 2252 split.index = index; 2253 split.flag = xflag; 2254 split.off = xoff; 2255 split.len = xlen; 2256 split.addr = xaddr; 2257 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2258 /* undo data extent allocation */ 2259 dbFree(ip, *xaddrp, (s64) * xlenp); 2260 2261 return rc; 2262 } 2263 2264 *xaddrp = xaddr; 2265 *xlenp = xlen; 2266 return 0; 2267 2268 /* 2269 * insert the new entry into the leaf page 2270 */ 2271 insertLeaf: 2272 /* 2273 * allocate data extent requested 2274 */ 2275 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2276 goto out; 2277 2278 BT_MARK_DIRTY(mp, ip); 2279 /* 2280 * acquire a transaction lock on the leaf page; 2281 * 2282 * action: xad insertion/extension; 2283 */ 2284 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2285 xtlck = (struct xtlock *) & tlck->lock; 2286 2287 /* insert the new entry: mark the entry NEW */ 2288 xad = &p->xad[index]; 2289 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2290 2291 /* advance next available entry index */ 2292 le16_add_cpu(&p->header.nextindex, 1); 2293 2294 xtlck->lwm.offset = 2295 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2296 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2297 xtlck->lwm.offset; 2298 2299 *xaddrp = xaddr; 2300 *xlenp = xlen; 2301 2302 out: 2303 /* unpin the leaf page */ 2304 XT_PUTPAGE(mp); 2305 2306 return rc; 2307 } 2308 #ifdef _STILL_TO_PORT 2309 2310 /* - TBD for defragmentaion/reorganization - 2311 * 2312 * xtDelete() 2313 * 2314 * function: 2315 * delete the entry with the specified key. 2316 * 2317 * N.B.: whole extent of the entry is assumed to be deleted. 2318 * 2319 * parameter: 2320 * 2321 * return: 2322 * ENOENT: if the entry is not found. 2323 * 2324 * exception: 2325 */ 2326 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2327 { 2328 int rc = 0; 2329 struct btstack btstack; 2330 int cmp; 2331 s64 bn; 2332 struct metapage *mp; 2333 xtpage_t *p; 2334 int index, nextindex; 2335 struct tlock *tlck; 2336 struct xtlock *xtlck; 2337 2338 /* 2339 * find the matching entry; xtSearch() pins the page 2340 */ 2341 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2342 return rc; 2343 2344 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2345 if (cmp) { 2346 /* unpin the leaf page */ 2347 XT_PUTPAGE(mp); 2348 return -ENOENT; 2349 } 2350 2351 /* 2352 * delete the entry from the leaf page 2353 */ 2354 nextindex = le16_to_cpu(p->header.nextindex); 2355 le16_add_cpu(&p->header.nextindex, -1); 2356 2357 /* 2358 * if the leaf page bocome empty, free the page 2359 */ 2360 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2361 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2362 2363 BT_MARK_DIRTY(mp, ip); 2364 /* 2365 * acquire a transaction lock on the leaf page; 2366 * 2367 * action:xad deletion; 2368 */ 2369 tlck = txLock(tid, ip, mp, tlckXTREE); 2370 xtlck = (struct xtlock *) & tlck->lock; 2371 xtlck->lwm.offset = 2372 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2373 2374 /* if delete from middle, shift left/compact the remaining entries */ 2375 if (index < nextindex - 1) 2376 memmove(&p->xad[index], &p->xad[index + 1], 2377 (nextindex - index - 1) * sizeof(xad_t)); 2378 2379 XT_PUTPAGE(mp); 2380 2381 return 0; 2382 } 2383 2384 2385 /* - TBD for defragmentaion/reorganization - 2386 * 2387 * xtDeleteUp() 2388 * 2389 * function: 2390 * free empty pages as propagating deletion up the tree 2391 * 2392 * parameter: 2393 * 2394 * return: 2395 */ 2396 static int 2397 xtDeleteUp(tid_t tid, struct inode *ip, 2398 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2399 { 2400 int rc = 0; 2401 struct metapage *mp; 2402 xtpage_t *p; 2403 int index, nextindex; 2404 s64 xaddr; 2405 int xlen; 2406 struct btframe *parent; 2407 struct tlock *tlck; 2408 struct xtlock *xtlck; 2409 2410 /* 2411 * keep root leaf page which has become empty 2412 */ 2413 if (fp->header.flag & BT_ROOT) { 2414 /* keep the root page */ 2415 fp->header.flag &= ~BT_INTERNAL; 2416 fp->header.flag |= BT_LEAF; 2417 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2418 2419 /* XT_PUTPAGE(fmp); */ 2420 2421 return 0; 2422 } 2423 2424 /* 2425 * free non-root leaf page 2426 */ 2427 if ((rc = xtRelink(tid, ip, fp))) { 2428 XT_PUTPAGE(fmp); 2429 return rc; 2430 } 2431 2432 xaddr = addressPXD(&fp->header.self); 2433 xlen = lengthPXD(&fp->header.self); 2434 /* free the page extent */ 2435 dbFree(ip, xaddr, (s64) xlen); 2436 2437 /* free the buffer page */ 2438 discard_metapage(fmp); 2439 2440 /* 2441 * propagate page deletion up the index tree 2442 * 2443 * If the delete from the parent page makes it empty, 2444 * continue all the way up the tree. 2445 * stop if the root page is reached (which is never deleted) or 2446 * if the entry deletion does not empty the page. 2447 */ 2448 while ((parent = BT_POP(btstack)) != NULL) { 2449 /* get/pin the parent page <sp> */ 2450 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2451 if (rc) 2452 return rc; 2453 2454 index = parent->index; 2455 2456 /* delete the entry for the freed child page from parent. 2457 */ 2458 nextindex = le16_to_cpu(p->header.nextindex); 2459 2460 /* 2461 * the parent has the single entry being deleted: 2462 * free the parent page which has become empty. 2463 */ 2464 if (nextindex == 1) { 2465 if (p->header.flag & BT_ROOT) { 2466 /* keep the root page */ 2467 p->header.flag &= ~BT_INTERNAL; 2468 p->header.flag |= BT_LEAF; 2469 p->header.nextindex = 2470 cpu_to_le16(XTENTRYSTART); 2471 2472 /* XT_PUTPAGE(mp); */ 2473 2474 break; 2475 } else { 2476 /* free the parent page */ 2477 if ((rc = xtRelink(tid, ip, p))) 2478 return rc; 2479 2480 xaddr = addressPXD(&p->header.self); 2481 /* free the page extent */ 2482 dbFree(ip, xaddr, 2483 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2484 2485 /* unpin/free the buffer page */ 2486 discard_metapage(mp); 2487 2488 /* propagate up */ 2489 continue; 2490 } 2491 } 2492 /* 2493 * the parent has other entries remaining: 2494 * delete the router entry from the parent page. 2495 */ 2496 else { 2497 BT_MARK_DIRTY(mp, ip); 2498 /* 2499 * acquire a transaction lock on the leaf page; 2500 * 2501 * action:xad deletion; 2502 */ 2503 tlck = txLock(tid, ip, mp, tlckXTREE); 2504 xtlck = (struct xtlock *) & tlck->lock; 2505 xtlck->lwm.offset = 2506 (xtlck->lwm.offset) ? min(index, 2507 xtlck->lwm. 2508 offset) : index; 2509 2510 /* if delete from middle, 2511 * shift left/compact the remaining entries in the page 2512 */ 2513 if (index < nextindex - 1) 2514 memmove(&p->xad[index], &p->xad[index + 1], 2515 (nextindex - index - 2516 1) << L2XTSLOTSIZE); 2517 2518 le16_add_cpu(&p->header.nextindex, -1); 2519 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2520 (ulong) parent->bn, index); 2521 } 2522 2523 /* unpin the parent page */ 2524 XT_PUTPAGE(mp); 2525 2526 /* exit propagation up */ 2527 break; 2528 } 2529 2530 return 0; 2531 } 2532 2533 2534 /* 2535 * NAME: xtRelocate() 2536 * 2537 * FUNCTION: relocate xtpage or data extent of regular file; 2538 * This function is mainly used by defragfs utility. 2539 * 2540 * NOTE: This routine does not have the logic to handle 2541 * uncommitted allocated extent. The caller should call 2542 * txCommit() to commit all the allocation before call 2543 * this routine. 2544 */ 2545 int 2546 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2547 s64 nxaddr, /* new xaddr */ 2548 int xtype) 2549 { /* extent type: XTPAGE or DATAEXT */ 2550 int rc = 0; 2551 struct tblock *tblk; 2552 struct tlock *tlck; 2553 struct xtlock *xtlck; 2554 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2555 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2556 xad_t *xad; 2557 pxd_t *pxd; 2558 s64 xoff, xsize; 2559 int xlen; 2560 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2561 cbuf_t *cp; 2562 s64 offset, nbytes, nbrd, pno; 2563 int nb, npages, nblks; 2564 s64 bn; 2565 int cmp; 2566 int index; 2567 struct pxd_lock *pxdlock; 2568 struct btstack btstack; /* traverse stack */ 2569 2570 xtype = xtype & EXTENT_TYPE; 2571 2572 xoff = offsetXAD(oxad); 2573 oxaddr = addressXAD(oxad); 2574 xlen = lengthXAD(oxad); 2575 2576 /* validate extent offset */ 2577 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2578 if (offset >= ip->i_size) 2579 return -ESTALE; /* stale extent */ 2580 2581 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2582 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2583 2584 /* 2585 * 1. get and validate the parent xtpage/xad entry 2586 * covering the source extent to be relocated; 2587 */ 2588 if (xtype == DATAEXT) { 2589 /* search in leaf entry */ 2590 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2591 if (rc) 2592 return rc; 2593 2594 /* retrieve search result */ 2595 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2596 2597 if (cmp) { 2598 XT_PUTPAGE(pmp); 2599 return -ESTALE; 2600 } 2601 2602 /* validate for exact match with a single entry */ 2603 xad = &pp->xad[index]; 2604 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2605 XT_PUTPAGE(pmp); 2606 return -ESTALE; 2607 } 2608 } else { /* (xtype == XTPAGE) */ 2609 2610 /* search in internal entry */ 2611 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2612 if (rc) 2613 return rc; 2614 2615 /* retrieve search result */ 2616 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2617 2618 if (cmp) { 2619 XT_PUTPAGE(pmp); 2620 return -ESTALE; 2621 } 2622 2623 /* xtSearchNode() validated for exact match with a single entry 2624 */ 2625 xad = &pp->xad[index]; 2626 } 2627 jfs_info("xtRelocate: parent xad entry validated."); 2628 2629 /* 2630 * 2. relocate the extent 2631 */ 2632 if (xtype == DATAEXT) { 2633 /* if the extent is allocated-but-not-recorded 2634 * there is no real data to be moved in this extent, 2635 */ 2636 if (xad->flag & XAD_NOTRECORDED) 2637 goto out; 2638 else 2639 /* release xtpage for cmRead()/xtLookup() */ 2640 XT_PUTPAGE(pmp); 2641 2642 /* 2643 * cmRelocate() 2644 * 2645 * copy target data pages to be relocated; 2646 * 2647 * data extent must start at page boundary and 2648 * multiple of page size (except the last data extent); 2649 * read in each page of the source data extent into cbuf, 2650 * update the cbuf extent descriptor of the page to be 2651 * homeward bound to new dst data extent 2652 * copy the data from the old extent to new extent. 2653 * copy is essential for compressed files to avoid problems 2654 * that can arise if there was a change in compression 2655 * algorithms. 2656 * it is a good strategy because it may disrupt cache 2657 * policy to keep the pages in memory afterwards. 2658 */ 2659 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2660 assert((offset & CM_OFFSET) == 0); 2661 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2662 pno = offset >> CM_L2BSIZE; 2663 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2664 /* 2665 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2666 (offset >> CM_L2BSIZE) + 1; 2667 */ 2668 sxaddr = oxaddr; 2669 dxaddr = nxaddr; 2670 2671 /* process the request one cache buffer at a time */ 2672 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2673 offset += nb, pno++, npages--) { 2674 /* compute page size */ 2675 nb = min(nbytes - nbrd, CM_BSIZE); 2676 2677 /* get the cache buffer of the page */ 2678 if (rc = cmRead(ip, offset, npages, &cp)) 2679 break; 2680 2681 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2682 assert(!cp->cm_modified); 2683 2684 /* bind buffer with the new extent address */ 2685 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2686 cmSetXD(ip, cp, pno, dxaddr, nblks); 2687 2688 /* release the cbuf, mark it as modified */ 2689 cmPut(cp, true); 2690 2691 dxaddr += nblks; 2692 sxaddr += nblks; 2693 } 2694 2695 /* get back parent page */ 2696 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2697 return rc; 2698 2699 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2700 jfs_info("xtRelocate: target data extent relocated."); 2701 } else { /* (xtype == XTPAGE) */ 2702 2703 /* 2704 * read in the target xtpage from the source extent; 2705 */ 2706 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2707 if (rc) { 2708 XT_PUTPAGE(pmp); 2709 return rc; 2710 } 2711 2712 /* 2713 * read in sibling pages if any to update sibling pointers; 2714 */ 2715 rmp = NULL; 2716 if (p->header.next) { 2717 nextbn = le64_to_cpu(p->header.next); 2718 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2719 if (rc) { 2720 XT_PUTPAGE(pmp); 2721 XT_PUTPAGE(mp); 2722 return (rc); 2723 } 2724 } 2725 2726 lmp = NULL; 2727 if (p->header.prev) { 2728 prevbn = le64_to_cpu(p->header.prev); 2729 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2730 if (rc) { 2731 XT_PUTPAGE(pmp); 2732 XT_PUTPAGE(mp); 2733 if (rmp) 2734 XT_PUTPAGE(rmp); 2735 return (rc); 2736 } 2737 } 2738 2739 /* at this point, all xtpages to be updated are in memory */ 2740 2741 /* 2742 * update sibling pointers of sibling xtpages if any; 2743 */ 2744 if (lmp) { 2745 BT_MARK_DIRTY(lmp, ip); 2746 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 2747 lp->header.next = cpu_to_le64(nxaddr); 2748 XT_PUTPAGE(lmp); 2749 } 2750 2751 if (rmp) { 2752 BT_MARK_DIRTY(rmp, ip); 2753 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 2754 rp->header.prev = cpu_to_le64(nxaddr); 2755 XT_PUTPAGE(rmp); 2756 } 2757 2758 /* 2759 * update the target xtpage to be relocated 2760 * 2761 * update the self address of the target page 2762 * and write to destination extent; 2763 * redo image covers the whole xtpage since it is new page 2764 * to the destination extent; 2765 * update of bmap for the free of source extent 2766 * of the target xtpage itself: 2767 * update of bmap for the allocation of destination extent 2768 * of the target xtpage itself: 2769 * update of bmap for the extents covered by xad entries in 2770 * the target xtpage is not necessary since they are not 2771 * updated; 2772 * if not committed before this relocation, 2773 * target page may contain XAD_NEW entries which must 2774 * be scanned for bmap update (logredo() always 2775 * scan xtpage REDOPAGE image for bmap update); 2776 * if committed before this relocation (tlckRELOCATE), 2777 * scan may be skipped by commit() and logredo(); 2778 */ 2779 BT_MARK_DIRTY(mp, ip); 2780 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 2781 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 2782 xtlck = (struct xtlock *) & tlck->lock; 2783 2784 /* update the self address in the xtpage header */ 2785 pxd = &p->header.self; 2786 PXDaddress(pxd, nxaddr); 2787 2788 /* linelock for the after image of the whole page */ 2789 xtlck->lwm.length = 2790 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 2791 2792 /* update the buffer extent descriptor of target xtpage */ 2793 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2794 bmSetXD(mp, nxaddr, xsize); 2795 2796 /* unpin the target page to new homeward bound */ 2797 XT_PUTPAGE(mp); 2798 jfs_info("xtRelocate: target xtpage relocated."); 2799 } 2800 2801 /* 2802 * 3. acquire maplock for the source extent to be freed; 2803 * 2804 * acquire a maplock saving the src relocated extent address; 2805 * to free of the extent at commit time; 2806 */ 2807 out: 2808 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 2809 * free PXD of the source data extent (logredo() will update 2810 * bmap for free of source data extent), and update bmap for 2811 * free of the source data extent; 2812 */ 2813 if (xtype == DATAEXT) 2814 tlck = txMaplock(tid, ip, tlckMAP); 2815 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 2816 * for the source xtpage (logredo() will init NoRedoPage 2817 * filter and will also update bmap for free of the source 2818 * xtpage), and update bmap for free of the source xtpage; 2819 * N.B. We use tlckMAP instead of tlkcXTREE because there 2820 * is no buffer associated with this lock since the buffer 2821 * has been redirected to the target location. 2822 */ 2823 else /* (xtype == XTPAGE) */ 2824 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 2825 2826 pxdlock = (struct pxd_lock *) & tlck->lock; 2827 pxdlock->flag = mlckFREEPXD; 2828 PXDaddress(&pxdlock->pxd, oxaddr); 2829 PXDlength(&pxdlock->pxd, xlen); 2830 pxdlock->index = 1; 2831 2832 /* 2833 * 4. update the parent xad entry for relocation; 2834 * 2835 * acquire tlck for the parent entry with XAD_NEW as entry 2836 * update which will write LOG_REDOPAGE and update bmap for 2837 * allocation of XAD_NEW destination extent; 2838 */ 2839 jfs_info("xtRelocate: update parent xad entry."); 2840 BT_MARK_DIRTY(pmp, ip); 2841 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 2842 xtlck = (struct xtlock *) & tlck->lock; 2843 2844 /* update the XAD with the new destination extent; */ 2845 xad = &pp->xad[index]; 2846 xad->flag |= XAD_NEW; 2847 XADaddress(xad, nxaddr); 2848 2849 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 2850 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 2851 xtlck->lwm.offset; 2852 2853 /* unpin the parent xtpage */ 2854 XT_PUTPAGE(pmp); 2855 2856 return rc; 2857 } 2858 2859 2860 /* 2861 * xtSearchNode() 2862 * 2863 * function: search for the internal xad entry covering specified extent. 2864 * This function is mainly used by defragfs utility. 2865 * 2866 * parameters: 2867 * ip - file object; 2868 * xad - extent to find; 2869 * cmpp - comparison result: 2870 * btstack - traverse stack; 2871 * flag - search process flag; 2872 * 2873 * returns: 2874 * btstack contains (bn, index) of search path traversed to the entry. 2875 * *cmpp is set to result of comparison with the entry returned. 2876 * the page containing the entry is pinned at exit. 2877 */ 2878 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 2879 int *cmpp, struct btstack * btstack, int flag) 2880 { 2881 int rc = 0; 2882 s64 xoff, xaddr; 2883 int xlen; 2884 int cmp = 1; /* init for empty page */ 2885 s64 bn; /* block number */ 2886 struct metapage *mp; /* meta-page buffer */ 2887 xtpage_t *p; /* page */ 2888 int base, index, lim; 2889 struct btframe *btsp; 2890 s64 t64; 2891 2892 BT_CLR(btstack); 2893 2894 xoff = offsetXAD(xad); 2895 xlen = lengthXAD(xad); 2896 xaddr = addressXAD(xad); 2897 2898 /* 2899 * search down tree from root: 2900 * 2901 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 2902 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 2903 * 2904 * if entry with search key K is not found 2905 * internal page search find the entry with largest key Ki 2906 * less than K which point to the child page to search; 2907 * leaf page search find the entry with smallest key Kj 2908 * greater than K so that the returned index is the position of 2909 * the entry to be shifted right for insertion of new entry. 2910 * for empty tree, search key is greater than any key of the tree. 2911 * 2912 * by convention, root bn = 0. 2913 */ 2914 for (bn = 0;;) { 2915 /* get/pin the page to search */ 2916 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2917 if (rc) 2918 return rc; 2919 if (p->header.flag & BT_LEAF) { 2920 XT_PUTPAGE(mp); 2921 return -ESTALE; 2922 } 2923 2924 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 2925 2926 /* 2927 * binary search with search key K on the current page 2928 */ 2929 for (base = XTENTRYSTART; lim; lim >>= 1) { 2930 index = base + (lim >> 1); 2931 2932 XT_CMP(cmp, xoff, &p->xad[index], t64); 2933 if (cmp == 0) { 2934 /* 2935 * search hit 2936 * 2937 * verify for exact match; 2938 */ 2939 if (xaddr == addressXAD(&p->xad[index]) && 2940 xoff == offsetXAD(&p->xad[index])) { 2941 *cmpp = cmp; 2942 2943 /* save search result */ 2944 btsp = btstack->top; 2945 btsp->bn = bn; 2946 btsp->index = index; 2947 btsp->mp = mp; 2948 2949 return 0; 2950 } 2951 2952 /* descend/search its child page */ 2953 goto next; 2954 } 2955 2956 if (cmp > 0) { 2957 base = index + 1; 2958 --lim; 2959 } 2960 } 2961 2962 /* 2963 * search miss - non-leaf page: 2964 * 2965 * base is the smallest index with key (Kj) greater than 2966 * search key (K) and may be zero or maxentry index. 2967 * if base is non-zero, decrement base by one to get the parent 2968 * entry of the child page to search. 2969 */ 2970 index = base ? base - 1 : base; 2971 2972 /* 2973 * go down to child page 2974 */ 2975 next: 2976 /* get the child page block number */ 2977 bn = addressXAD(&p->xad[index]); 2978 2979 /* unpin the parent page */ 2980 XT_PUTPAGE(mp); 2981 } 2982 } 2983 2984 2985 /* 2986 * xtRelink() 2987 * 2988 * function: 2989 * link around a freed page. 2990 * 2991 * Parameter: 2992 * int tid, 2993 * struct inode *ip, 2994 * xtpage_t *p) 2995 * 2996 * returns: 2997 */ 2998 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 2999 { 3000 int rc = 0; 3001 struct metapage *mp; 3002 s64 nextbn, prevbn; 3003 struct tlock *tlck; 3004 3005 nextbn = le64_to_cpu(p->header.next); 3006 prevbn = le64_to_cpu(p->header.prev); 3007 3008 /* update prev pointer of the next page */ 3009 if (nextbn != 0) { 3010 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3011 if (rc) 3012 return rc; 3013 3014 /* 3015 * acquire a transaction lock on the page; 3016 * 3017 * action: update prev pointer; 3018 */ 3019 BT_MARK_DIRTY(mp, ip); 3020 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3021 3022 /* the page may already have been tlock'd */ 3023 3024 p->header.prev = cpu_to_le64(prevbn); 3025 3026 XT_PUTPAGE(mp); 3027 } 3028 3029 /* update next pointer of the previous page */ 3030 if (prevbn != 0) { 3031 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3032 if (rc) 3033 return rc; 3034 3035 /* 3036 * acquire a transaction lock on the page; 3037 * 3038 * action: update next pointer; 3039 */ 3040 BT_MARK_DIRTY(mp, ip); 3041 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3042 3043 /* the page may already have been tlock'd */ 3044 3045 p->header.next = le64_to_cpu(nextbn); 3046 3047 XT_PUTPAGE(mp); 3048 } 3049 3050 return 0; 3051 } 3052 #endif /* _STILL_TO_PORT */ 3053 3054 3055 /* 3056 * xtInitRoot() 3057 * 3058 * initialize file root (inline in inode) 3059 */ 3060 void xtInitRoot(tid_t tid, struct inode *ip) 3061 { 3062 xtpage_t *p; 3063 3064 /* 3065 * acquire a transaction lock on the root 3066 * 3067 * action: 3068 */ 3069 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3070 tlckXTREE | tlckNEW); 3071 p = &JFS_IP(ip)->i_xtroot; 3072 3073 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3074 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3075 3076 if (S_ISDIR(ip->i_mode)) 3077 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3078 else { 3079 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3080 ip->i_size = 0; 3081 } 3082 3083 3084 return; 3085 } 3086 3087 3088 /* 3089 * We can run into a deadlock truncating a file with a large number of 3090 * xtree pages (large fragmented file). A robust fix would entail a 3091 * reservation system where we would reserve a number of metadata pages 3092 * and tlocks which we would be guaranteed without a deadlock. Without 3093 * this, a partial fix is to limit number of metadata pages we will lock 3094 * in a single transaction. Currently we will truncate the file so that 3095 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3096 * will be responsible for ensuring that the current transaction gets 3097 * committed, and that subsequent transactions are created to truncate 3098 * the file further if needed. 3099 */ 3100 #define MAX_TRUNCATE_LEAVES 50 3101 3102 /* 3103 * xtTruncate() 3104 * 3105 * function: 3106 * traverse for truncation logging backward bottom up; 3107 * terminate at the last extent entry at the current subtree 3108 * root page covering new down size. 3109 * truncation may occur within the last extent entry. 3110 * 3111 * parameter: 3112 * int tid, 3113 * struct inode *ip, 3114 * s64 newsize, 3115 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3116 * 3117 * return: 3118 * 3119 * note: 3120 * PWMAP: 3121 * 1. truncate (non-COMMIT_NOLINK file) 3122 * by jfs_truncate() or jfs_open(O_TRUNC): 3123 * xtree is updated; 3124 * 2. truncate index table of directory when last entry removed 3125 * map update via tlock at commit time; 3126 * PMAP: 3127 * Call xtTruncate_pmap instead 3128 * WMAP: 3129 * 1. remove (free zero link count) on last reference release 3130 * (pmap has been freed at commit zero link count); 3131 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3132 * xtree is updated; 3133 * map update directly at truncation time; 3134 * 3135 * if (DELETE) 3136 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3137 * else if (TRUNCATE) 3138 * must write LOG_NOREDOPAGE for deleted index page; 3139 * 3140 * pages may already have been tlocked by anonymous transactions 3141 * during file growth (i.e., write) before truncation; 3142 * 3143 * except last truncated entry, deleted entries remains as is 3144 * in the page (nextindex is updated) for other use 3145 * (e.g., log/update allocation map): this avoid copying the page 3146 * info but delay free of pages; 3147 * 3148 */ 3149 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3150 { 3151 int rc = 0; 3152 s64 teof; 3153 struct metapage *mp; 3154 xtpage_t *p; 3155 s64 bn; 3156 int index, nextindex; 3157 xad_t *xad; 3158 s64 xoff, xaddr; 3159 int xlen, len, freexlen; 3160 struct btstack btstack; 3161 struct btframe *parent; 3162 struct tblock *tblk = NULL; 3163 struct tlock *tlck = NULL; 3164 struct xtlock *xtlck = NULL; 3165 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3166 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3167 s64 nfreed; 3168 int freed, log; 3169 int locked_leaves = 0; 3170 3171 /* save object truncation type */ 3172 if (tid) { 3173 tblk = tid_to_tblock(tid); 3174 tblk->xflag |= flag; 3175 } 3176 3177 nfreed = 0; 3178 3179 flag &= COMMIT_MAP; 3180 assert(flag != COMMIT_PMAP); 3181 3182 if (flag == COMMIT_PWMAP) 3183 log = 1; 3184 else { 3185 log = 0; 3186 xadlock.flag = mlckFREEXADLIST; 3187 xadlock.index = 1; 3188 } 3189 3190 /* 3191 * if the newsize is not an integral number of pages, 3192 * the file between newsize and next page boundary will 3193 * be cleared. 3194 * if truncating into a file hole, it will cause 3195 * a full block to be allocated for the logical block. 3196 */ 3197 3198 /* 3199 * release page blocks of truncated region <teof, eof> 3200 * 3201 * free the data blocks from the leaf index blocks. 3202 * delete the parent index entries corresponding to 3203 * the freed child data/index blocks. 3204 * free the index blocks themselves which aren't needed 3205 * in new sized file. 3206 * 3207 * index blocks are updated only if the blocks are to be 3208 * retained in the new sized file. 3209 * if type is PMAP, the data and index pages are NOT 3210 * freed, and the data and index blocks are NOT freed 3211 * from working map. 3212 * (this will allow continued access of data/index of 3213 * temporary file (zerolink count file truncated to zero-length)). 3214 */ 3215 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3216 JFS_SBI(ip->i_sb)->l2bsize; 3217 3218 /* clear stack */ 3219 BT_CLR(&btstack); 3220 3221 /* 3222 * start with root 3223 * 3224 * root resides in the inode 3225 */ 3226 bn = 0; 3227 3228 /* 3229 * first access of each page: 3230 */ 3231 getPage: 3232 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3233 if (rc) 3234 return rc; 3235 3236 /* process entries backward from last index */ 3237 index = le16_to_cpu(p->header.nextindex) - 1; 3238 3239 3240 /* Since this is the rightmost page at this level, and we may have 3241 * already freed a page that was formerly to the right, let's make 3242 * sure that the next pointer is zero. 3243 */ 3244 if (p->header.next) { 3245 if (log) 3246 /* 3247 * Make sure this change to the header is logged. 3248 * If we really truncate this leaf, the flag 3249 * will be changed to tlckTRUNCATE 3250 */ 3251 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3252 BT_MARK_DIRTY(mp, ip); 3253 p->header.next = 0; 3254 } 3255 3256 if (p->header.flag & BT_INTERNAL) 3257 goto getChild; 3258 3259 /* 3260 * leaf page 3261 */ 3262 freed = 0; 3263 3264 /* does region covered by leaf page precede Teof ? */ 3265 xad = &p->xad[index]; 3266 xoff = offsetXAD(xad); 3267 xlen = lengthXAD(xad); 3268 if (teof >= xoff + xlen) { 3269 XT_PUTPAGE(mp); 3270 goto getParent; 3271 } 3272 3273 /* (re)acquire tlock of the leaf page */ 3274 if (log) { 3275 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3276 /* 3277 * We need to limit the size of the transaction 3278 * to avoid exhausting pagecache & tlocks 3279 */ 3280 XT_PUTPAGE(mp); 3281 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3282 goto getParent; 3283 } 3284 tlck = txLock(tid, ip, mp, tlckXTREE); 3285 tlck->type = tlckXTREE | tlckTRUNCATE; 3286 xtlck = (struct xtlock *) & tlck->lock; 3287 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3288 } 3289 BT_MARK_DIRTY(mp, ip); 3290 3291 /* 3292 * scan backward leaf page entries 3293 */ 3294 for (; index >= XTENTRYSTART; index--) { 3295 xad = &p->xad[index]; 3296 xoff = offsetXAD(xad); 3297 xlen = lengthXAD(xad); 3298 xaddr = addressXAD(xad); 3299 3300 /* 3301 * The "data" for a directory is indexed by the block 3302 * device's address space. This metadata must be invalidated 3303 * here 3304 */ 3305 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3306 invalidate_xad_metapages(ip, *xad); 3307 /* 3308 * entry beyond eof: continue scan of current page 3309 * xad 3310 * ---|---=======-------> 3311 * eof 3312 */ 3313 if (teof < xoff) { 3314 nfreed += xlen; 3315 continue; 3316 } 3317 3318 /* 3319 * (xoff <= teof): last entry to be deleted from page; 3320 * If other entries remain in page: keep and update the page. 3321 */ 3322 3323 /* 3324 * eof == entry_start: delete the entry 3325 * xad 3326 * -------|=======-------> 3327 * eof 3328 * 3329 */ 3330 if (teof == xoff) { 3331 nfreed += xlen; 3332 3333 if (index == XTENTRYSTART) 3334 break; 3335 3336 nextindex = index; 3337 } 3338 /* 3339 * eof within the entry: truncate the entry. 3340 * xad 3341 * -------===|===-------> 3342 * eof 3343 */ 3344 else if (teof < xoff + xlen) { 3345 /* update truncated entry */ 3346 len = teof - xoff; 3347 freexlen = xlen - len; 3348 XADlength(xad, len); 3349 3350 /* save pxd of truncated extent in tlck */ 3351 xaddr += len; 3352 if (log) { /* COMMIT_PWMAP */ 3353 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3354 min(index, (int)xtlck->lwm.offset) : index; 3355 xtlck->lwm.length = index + 1 - 3356 xtlck->lwm.offset; 3357 xtlck->twm.offset = index; 3358 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3359 pxdlock->flag = mlckFREEPXD; 3360 PXDaddress(&pxdlock->pxd, xaddr); 3361 PXDlength(&pxdlock->pxd, freexlen); 3362 } 3363 /* free truncated extent */ 3364 else { /* COMMIT_WMAP */ 3365 3366 pxdlock = (struct pxd_lock *) & xadlock; 3367 pxdlock->flag = mlckFREEPXD; 3368 PXDaddress(&pxdlock->pxd, xaddr); 3369 PXDlength(&pxdlock->pxd, freexlen); 3370 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3371 3372 /* reset map lock */ 3373 xadlock.flag = mlckFREEXADLIST; 3374 } 3375 3376 /* current entry is new last entry; */ 3377 nextindex = index + 1; 3378 3379 nfreed += freexlen; 3380 } 3381 /* 3382 * eof beyond the entry: 3383 * xad 3384 * -------=======---|---> 3385 * eof 3386 */ 3387 else { /* (xoff + xlen < teof) */ 3388 3389 nextindex = index + 1; 3390 } 3391 3392 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3393 if (!log) { /* COMMIT_WAMP */ 3394 xadlock.xdlist = &p->xad[nextindex]; 3395 xadlock.count = 3396 le16_to_cpu(p->header.nextindex) - 3397 nextindex; 3398 txFreeMap(ip, (struct maplock *) & xadlock, 3399 NULL, COMMIT_WMAP); 3400 } 3401 p->header.nextindex = cpu_to_le16(nextindex); 3402 } 3403 3404 XT_PUTPAGE(mp); 3405 3406 /* assert(freed == 0); */ 3407 goto getParent; 3408 } /* end scan of leaf page entries */ 3409 3410 freed = 1; 3411 3412 /* 3413 * leaf page become empty: free the page if type != PMAP 3414 */ 3415 if (log) { /* COMMIT_PWMAP */ 3416 /* txCommit() with tlckFREE: 3417 * free data extents covered by leaf [XTENTRYSTART:hwm); 3418 * invalidate leaf if COMMIT_PWMAP; 3419 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3420 */ 3421 tlck->type = tlckXTREE | tlckFREE; 3422 } else { /* COMMIT_WAMP */ 3423 3424 /* free data extents covered by leaf */ 3425 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3426 xadlock.count = 3427 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3428 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3429 } 3430 3431 if (p->header.flag & BT_ROOT) { 3432 p->header.flag &= ~BT_INTERNAL; 3433 p->header.flag |= BT_LEAF; 3434 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3435 3436 XT_PUTPAGE(mp); /* debug */ 3437 goto out; 3438 } else { 3439 if (log) { /* COMMIT_PWMAP */ 3440 /* page will be invalidated at tx completion 3441 */ 3442 XT_PUTPAGE(mp); 3443 } else { /* COMMIT_WMAP */ 3444 3445 if (mp->lid) 3446 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3447 3448 /* invalidate empty leaf page */ 3449 discard_metapage(mp); 3450 } 3451 } 3452 3453 /* 3454 * the leaf page become empty: delete the parent entry 3455 * for the leaf page if the parent page is to be kept 3456 * in the new sized file. 3457 */ 3458 3459 /* 3460 * go back up to the parent page 3461 */ 3462 getParent: 3463 /* pop/restore parent entry for the current child page */ 3464 if ((parent = BT_POP(&btstack)) == NULL) 3465 /* current page must have been root */ 3466 goto out; 3467 3468 /* get back the parent page */ 3469 bn = parent->bn; 3470 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3471 if (rc) 3472 return rc; 3473 3474 index = parent->index; 3475 3476 /* 3477 * child page was not empty: 3478 */ 3479 if (freed == 0) { 3480 /* has any entry deleted from parent ? */ 3481 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3482 /* (re)acquire tlock on the parent page */ 3483 if (log) { /* COMMIT_PWMAP */ 3484 /* txCommit() with tlckTRUNCATE: 3485 * free child extents covered by parent [); 3486 */ 3487 tlck = txLock(tid, ip, mp, tlckXTREE); 3488 xtlck = (struct xtlock *) & tlck->lock; 3489 if (!(tlck->type & tlckTRUNCATE)) { 3490 xtlck->hwm.offset = 3491 le16_to_cpu(p->header. 3492 nextindex) - 1; 3493 tlck->type = 3494 tlckXTREE | tlckTRUNCATE; 3495 } 3496 } else { /* COMMIT_WMAP */ 3497 3498 /* free child extents covered by parent */ 3499 xadlock.xdlist = &p->xad[index + 1]; 3500 xadlock.count = 3501 le16_to_cpu(p->header.nextindex) - 3502 index - 1; 3503 txFreeMap(ip, (struct maplock *) & xadlock, 3504 NULL, COMMIT_WMAP); 3505 } 3506 BT_MARK_DIRTY(mp, ip); 3507 3508 p->header.nextindex = cpu_to_le16(index + 1); 3509 } 3510 XT_PUTPAGE(mp); 3511 goto getParent; 3512 } 3513 3514 /* 3515 * child page was empty: 3516 */ 3517 nfreed += lengthXAD(&p->xad[index]); 3518 3519 /* 3520 * During working map update, child page's tlock must be handled 3521 * before parent's. This is because the parent's tlock will cause 3522 * the child's disk space to be marked available in the wmap, so 3523 * it's important that the child page be released by that time. 3524 * 3525 * ToDo: tlocks should be on doubly-linked list, so we can 3526 * quickly remove it and add it to the end. 3527 */ 3528 3529 /* 3530 * Move parent page's tlock to the end of the tid's tlock list 3531 */ 3532 if (log && mp->lid && (tblk->last != mp->lid) && 3533 lid_to_tlock(mp->lid)->tid) { 3534 lid_t lid = mp->lid; 3535 struct tlock *prev; 3536 3537 tlck = lid_to_tlock(lid); 3538 3539 if (tblk->next == lid) 3540 tblk->next = tlck->next; 3541 else { 3542 for (prev = lid_to_tlock(tblk->next); 3543 prev->next != lid; 3544 prev = lid_to_tlock(prev->next)) { 3545 assert(prev->next); 3546 } 3547 prev->next = tlck->next; 3548 } 3549 lid_to_tlock(tblk->last)->next = lid; 3550 tlck->next = 0; 3551 tblk->last = lid; 3552 } 3553 3554 /* 3555 * parent page become empty: free the page 3556 */ 3557 if (index == XTENTRYSTART) { 3558 if (log) { /* COMMIT_PWMAP */ 3559 /* txCommit() with tlckFREE: 3560 * free child extents covered by parent; 3561 * invalidate parent if COMMIT_PWMAP; 3562 */ 3563 tlck = txLock(tid, ip, mp, tlckXTREE); 3564 xtlck = (struct xtlock *) & tlck->lock; 3565 xtlck->hwm.offset = 3566 le16_to_cpu(p->header.nextindex) - 1; 3567 tlck->type = tlckXTREE | tlckFREE; 3568 } else { /* COMMIT_WMAP */ 3569 3570 /* free child extents covered by parent */ 3571 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3572 xadlock.count = 3573 le16_to_cpu(p->header.nextindex) - 3574 XTENTRYSTART; 3575 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3576 COMMIT_WMAP); 3577 } 3578 BT_MARK_DIRTY(mp, ip); 3579 3580 if (p->header.flag & BT_ROOT) { 3581 p->header.flag &= ~BT_INTERNAL; 3582 p->header.flag |= BT_LEAF; 3583 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3584 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3585 /* 3586 * Shrink root down to allow inline 3587 * EA (otherwise fsck complains) 3588 */ 3589 p->header.maxentry = 3590 cpu_to_le16(XTROOTINITSLOT); 3591 JFS_IP(ip)->mode2 |= INLINEEA; 3592 } 3593 3594 XT_PUTPAGE(mp); /* debug */ 3595 goto out; 3596 } else { 3597 if (log) { /* COMMIT_PWMAP */ 3598 /* page will be invalidated at tx completion 3599 */ 3600 XT_PUTPAGE(mp); 3601 } else { /* COMMIT_WMAP */ 3602 3603 if (mp->lid) 3604 lid_to_tlock(mp->lid)->flag |= 3605 tlckFREELOCK; 3606 3607 /* invalidate parent page */ 3608 discard_metapage(mp); 3609 } 3610 3611 /* parent has become empty and freed: 3612 * go back up to its parent page 3613 */ 3614 /* freed = 1; */ 3615 goto getParent; 3616 } 3617 } 3618 /* 3619 * parent page still has entries for front region; 3620 */ 3621 else { 3622 /* try truncate region covered by preceding entry 3623 * (process backward) 3624 */ 3625 index--; 3626 3627 /* go back down to the child page corresponding 3628 * to the entry 3629 */ 3630 goto getChild; 3631 } 3632 3633 /* 3634 * internal page: go down to child page of current entry 3635 */ 3636 getChild: 3637 /* save current parent entry for the child page */ 3638 if (BT_STACK_FULL(&btstack)) { 3639 jfs_error(ip->i_sb, "stack overrun!\n"); 3640 XT_PUTPAGE(mp); 3641 return -EIO; 3642 } 3643 BT_PUSH(&btstack, bn, index); 3644 3645 /* get child page */ 3646 xad = &p->xad[index]; 3647 bn = addressXAD(xad); 3648 3649 /* 3650 * first access of each internal entry: 3651 */ 3652 /* release parent page */ 3653 XT_PUTPAGE(mp); 3654 3655 /* process the child page */ 3656 goto getPage; 3657 3658 out: 3659 /* 3660 * update file resource stat 3661 */ 3662 /* set size 3663 */ 3664 if (S_ISDIR(ip->i_mode) && !newsize) 3665 ip->i_size = 1; /* fsck hates zero-length directories */ 3666 else 3667 ip->i_size = newsize; 3668 3669 /* update quota allocation to reflect freed blocks */ 3670 dquot_free_block(ip, nfreed); 3671 3672 /* 3673 * free tlock of invalidated pages 3674 */ 3675 if (flag == COMMIT_WMAP) 3676 txFreelock(ip); 3677 3678 return newsize; 3679 } 3680 3681 3682 /* 3683 * xtTruncate_pmap() 3684 * 3685 * function: 3686 * Perform truncate to zero length for deleted file, leaving the 3687 * the xtree and working map untouched. This allows the file to 3688 * be accessed via open file handles, while the delete of the file 3689 * is committed to disk. 3690 * 3691 * parameter: 3692 * tid_t tid, 3693 * struct inode *ip, 3694 * s64 committed_size) 3695 * 3696 * return: new committed size 3697 * 3698 * note: 3699 * 3700 * To avoid deadlock by holding too many transaction locks, the 3701 * truncation may be broken up into multiple transactions. 3702 * The committed_size keeps track of part of the file has been 3703 * freed from the pmaps. 3704 */ 3705 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3706 { 3707 s64 bn; 3708 struct btstack btstack; 3709 int cmp; 3710 int index; 3711 int locked_leaves = 0; 3712 struct metapage *mp; 3713 xtpage_t *p; 3714 struct btframe *parent; 3715 int rc; 3716 struct tblock *tblk; 3717 struct tlock *tlck = NULL; 3718 xad_t *xad; 3719 int xlen; 3720 s64 xoff; 3721 struct xtlock *xtlck = NULL; 3722 3723 /* save object truncation type */ 3724 tblk = tid_to_tblock(tid); 3725 tblk->xflag |= COMMIT_PMAP; 3726 3727 /* clear stack */ 3728 BT_CLR(&btstack); 3729 3730 if (committed_size) { 3731 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3732 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 3733 if (rc) 3734 return rc; 3735 3736 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3737 3738 if (cmp != 0) { 3739 XT_PUTPAGE(mp); 3740 jfs_error(ip->i_sb, "did not find extent\n"); 3741 return -EIO; 3742 } 3743 } else { 3744 /* 3745 * start with root 3746 * 3747 * root resides in the inode 3748 */ 3749 bn = 0; 3750 3751 /* 3752 * first access of each page: 3753 */ 3754 getPage: 3755 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3756 if (rc) 3757 return rc; 3758 3759 /* process entries backward from last index */ 3760 index = le16_to_cpu(p->header.nextindex) - 1; 3761 3762 if (p->header.flag & BT_INTERNAL) 3763 goto getChild; 3764 } 3765 3766 /* 3767 * leaf page 3768 */ 3769 3770 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3771 /* 3772 * We need to limit the size of the transaction 3773 * to avoid exhausting pagecache & tlocks 3774 */ 3775 xad = &p->xad[index]; 3776 xoff = offsetXAD(xad); 3777 xlen = lengthXAD(xad); 3778 XT_PUTPAGE(mp); 3779 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3780 } 3781 tlck = txLock(tid, ip, mp, tlckXTREE); 3782 tlck->type = tlckXTREE | tlckFREE; 3783 xtlck = (struct xtlock *) & tlck->lock; 3784 xtlck->hwm.offset = index; 3785 3786 3787 XT_PUTPAGE(mp); 3788 3789 /* 3790 * go back up to the parent page 3791 */ 3792 getParent: 3793 /* pop/restore parent entry for the current child page */ 3794 if ((parent = BT_POP(&btstack)) == NULL) 3795 /* current page must have been root */ 3796 goto out; 3797 3798 /* get back the parent page */ 3799 bn = parent->bn; 3800 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3801 if (rc) 3802 return rc; 3803 3804 index = parent->index; 3805 3806 /* 3807 * parent page become empty: free the page 3808 */ 3809 if (index == XTENTRYSTART) { 3810 /* txCommit() with tlckFREE: 3811 * free child extents covered by parent; 3812 * invalidate parent if COMMIT_PWMAP; 3813 */ 3814 tlck = txLock(tid, ip, mp, tlckXTREE); 3815 xtlck = (struct xtlock *) & tlck->lock; 3816 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3817 tlck->type = tlckXTREE | tlckFREE; 3818 3819 XT_PUTPAGE(mp); 3820 3821 if (p->header.flag & BT_ROOT) { 3822 3823 goto out; 3824 } else { 3825 goto getParent; 3826 } 3827 } 3828 /* 3829 * parent page still has entries for front region; 3830 */ 3831 else 3832 index--; 3833 /* 3834 * internal page: go down to child page of current entry 3835 */ 3836 getChild: 3837 /* save current parent entry for the child page */ 3838 if (BT_STACK_FULL(&btstack)) { 3839 jfs_error(ip->i_sb, "stack overrun!\n"); 3840 XT_PUTPAGE(mp); 3841 return -EIO; 3842 } 3843 BT_PUSH(&btstack, bn, index); 3844 3845 /* get child page */ 3846 xad = &p->xad[index]; 3847 bn = addressXAD(xad); 3848 3849 /* 3850 * first access of each internal entry: 3851 */ 3852 /* release parent page */ 3853 XT_PUTPAGE(mp); 3854 3855 /* process the child page */ 3856 goto getPage; 3857 3858 out: 3859 3860 return 0; 3861 } 3862 3863 #ifdef CONFIG_JFS_STATISTICS 3864 int jfs_xtstat_proc_show(struct seq_file *m, void *v) 3865 { 3866 seq_printf(m, 3867 "JFS Xtree statistics\n" 3868 "====================\n" 3869 "searches = %d\n" 3870 "fast searches = %d\n" 3871 "splits = %d\n", 3872 xtStat.search, 3873 xtStat.fastSearch, 3874 xtStat.split); 3875 return 0; 3876 } 3877 #endif 3878