1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/module.h> 24 #include <linux/quotaops.h> 25 #include <linux/seq_file.h> 26 #include "jfs_incore.h" 27 #include "jfs_filsys.h" 28 #include "jfs_metapage.h" 29 #include "jfs_dmap.h" 30 #include "jfs_dinode.h" 31 #include "jfs_superblock.h" 32 #include "jfs_debug.h" 33 34 /* 35 * xtree local flag 36 */ 37 #define XT_INSERT 0x00000001 38 39 /* 40 * xtree key/entry comparison: extent offset 41 * 42 * return: 43 * -1: k < start of extent 44 * 0: start_of_extent <= k <= end_of_extent 45 * 1: k > end_of_extent 46 */ 47 #define XT_CMP(CMP, K, X, OFFSET64)\ 48 {\ 49 OFFSET64 = offsetXAD(X);\ 50 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 51 ((K) < OFFSET64) ? -1 : 0;\ 52 } 53 54 /* write a xad entry */ 55 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 56 {\ 57 (XAD)->flag = (FLAG);\ 58 XADoffset((XAD), (OFF));\ 59 XADlength((XAD), (LEN));\ 60 XADaddress((XAD), (ADDR));\ 61 } 62 63 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 64 65 /* get page buffer for specified block address */ 66 /* ToDo: Replace this ugly macro with a function */ 67 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 68 {\ 69 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 70 if (!(RC))\ 71 {\ 72 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 73 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 74 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 75 {\ 76 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 77 BT_PUTPAGE(MP);\ 78 MP = NULL;\ 79 RC = -EIO;\ 80 }\ 81 }\ 82 } 83 84 /* for consistency */ 85 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 86 87 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 88 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 89 /* xtree entry parameter descriptor */ 90 struct xtsplit { 91 struct metapage *mp; 92 s16 index; 93 u8 flag; 94 s64 off; 95 s64 addr; 96 int len; 97 struct pxdlist *pxdlist; 98 }; 99 100 101 /* 102 * statistics 103 */ 104 #ifdef CONFIG_JFS_STATISTICS 105 static struct { 106 uint search; 107 uint fastSearch; 108 uint split; 109 } xtStat; 110 #endif 111 112 113 /* 114 * forward references 115 */ 116 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 117 struct btstack * btstack, int flag); 118 119 static int xtSplitUp(tid_t tid, 120 struct inode *ip, 121 struct xtsplit * split, struct btstack * btstack); 122 123 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 124 struct metapage ** rmpp, s64 * rbnp); 125 126 static int xtSplitRoot(tid_t tid, struct inode *ip, 127 struct xtsplit * split, struct metapage ** rmpp); 128 129 #ifdef _STILL_TO_PORT 130 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 131 xtpage_t * fp, struct btstack * btstack); 132 133 static int xtSearchNode(struct inode *ip, 134 xad_t * xad, 135 int *cmpp, struct btstack * btstack, int flag); 136 137 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 138 #endif /* _STILL_TO_PORT */ 139 140 /* 141 * xtLookup() 142 * 143 * function: map a single page into a physical extent; 144 */ 145 int xtLookup(struct inode *ip, s64 lstart, 146 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 147 { 148 int rc = 0; 149 struct btstack btstack; 150 int cmp; 151 s64 bn; 152 struct metapage *mp; 153 xtpage_t *p; 154 int index; 155 xad_t *xad; 156 s64 next, size, xoff, xend; 157 int xlen; 158 s64 xaddr; 159 160 *paddr = 0; 161 *plen = llen; 162 163 if (!no_check) { 164 /* is lookup offset beyond eof ? */ 165 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 166 JFS_SBI(ip->i_sb)->l2bsize; 167 if (lstart >= size) 168 return 0; 169 } 170 171 /* 172 * search for the xad entry covering the logical extent 173 */ 174 //search: 175 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 176 jfs_err("xtLookup: xtSearch returned %d", rc); 177 return rc; 178 } 179 180 /* 181 * compute the physical extent covering logical extent 182 * 183 * N.B. search may have failed (e.g., hole in sparse file), 184 * and returned the index of the next entry. 185 */ 186 /* retrieve search result */ 187 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 188 189 /* is xad found covering start of logical extent ? 190 * lstart is a page start address, 191 * i.e., lstart cannot start in a hole; 192 */ 193 if (cmp) { 194 if (next) 195 *plen = min(next - lstart, llen); 196 goto out; 197 } 198 199 /* 200 * lxd covered by xad 201 */ 202 xad = &p->xad[index]; 203 xoff = offsetXAD(xad); 204 xlen = lengthXAD(xad); 205 xend = xoff + xlen; 206 xaddr = addressXAD(xad); 207 208 /* initialize new pxd */ 209 *pflag = xad->flag; 210 *paddr = xaddr + (lstart - xoff); 211 /* a page must be fully covered by an xad */ 212 *plen = min(xend - lstart, llen); 213 214 out: 215 XT_PUTPAGE(mp); 216 217 return rc; 218 } 219 220 /* 221 * xtSearch() 222 * 223 * function: search for the xad entry covering specified offset. 224 * 225 * parameters: 226 * ip - file object; 227 * xoff - extent offset; 228 * nextp - address of next extent (if any) for search miss 229 * cmpp - comparison result: 230 * btstack - traverse stack; 231 * flag - search process flag (XT_INSERT); 232 * 233 * returns: 234 * btstack contains (bn, index) of search path traversed to the entry. 235 * *cmpp is set to result of comparison with the entry returned. 236 * the page containing the entry is pinned at exit. 237 */ 238 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 239 int *cmpp, struct btstack * btstack, int flag) 240 { 241 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 242 int rc = 0; 243 int cmp = 1; /* init for empty page */ 244 s64 bn; /* block number */ 245 struct metapage *mp; /* page buffer */ 246 xtpage_t *p; /* page */ 247 xad_t *xad; 248 int base, index, lim, btindex; 249 struct btframe *btsp; 250 int nsplit = 0; /* number of pages to split */ 251 s64 t64; 252 s64 next = 0; 253 254 INCREMENT(xtStat.search); 255 256 BT_CLR(btstack); 257 258 btstack->nsplit = 0; 259 260 /* 261 * search down tree from root: 262 * 263 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 264 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 265 * 266 * if entry with search key K is not found 267 * internal page search find the entry with largest key Ki 268 * less than K which point to the child page to search; 269 * leaf page search find the entry with smallest key Kj 270 * greater than K so that the returned index is the position of 271 * the entry to be shifted right for insertion of new entry. 272 * for empty tree, search key is greater than any key of the tree. 273 * 274 * by convention, root bn = 0. 275 */ 276 for (bn = 0;;) { 277 /* get/pin the page to search */ 278 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 279 if (rc) 280 return rc; 281 282 /* try sequential access heuristics with the previous 283 * access entry in target leaf page: 284 * once search narrowed down into the target leaf, 285 * key must either match an entry in the leaf or 286 * key entry does not exist in the tree; 287 */ 288 //fastSearch: 289 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 290 (p->header.flag & BT_LEAF) && 291 (index = jfs_ip->btindex) < 292 le16_to_cpu(p->header.nextindex)) { 293 xad = &p->xad[index]; 294 t64 = offsetXAD(xad); 295 if (xoff < t64 + lengthXAD(xad)) { 296 if (xoff >= t64) { 297 *cmpp = 0; 298 goto out; 299 } 300 301 /* stop sequential access heuristics */ 302 goto binarySearch; 303 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 304 305 /* try next sequential entry */ 306 index++; 307 if (index < 308 le16_to_cpu(p->header.nextindex)) { 309 xad++; 310 t64 = offsetXAD(xad); 311 if (xoff < t64 + lengthXAD(xad)) { 312 if (xoff >= t64) { 313 *cmpp = 0; 314 goto out; 315 } 316 317 /* miss: key falls between 318 * previous and this entry 319 */ 320 *cmpp = 1; 321 next = t64; 322 goto out; 323 } 324 325 /* (xoff >= t64 + lengthXAD(xad)); 326 * matching entry may be further out: 327 * stop heuristic search 328 */ 329 /* stop sequential access heuristics */ 330 goto binarySearch; 331 } 332 333 /* (index == p->header.nextindex); 334 * miss: key entry does not exist in 335 * the target leaf/tree 336 */ 337 *cmpp = 1; 338 goto out; 339 } 340 341 /* 342 * if hit, return index of the entry found, and 343 * if miss, where new entry with search key is 344 * to be inserted; 345 */ 346 out: 347 /* compute number of pages to split */ 348 if (flag & XT_INSERT) { 349 if (p->header.nextindex == /* little-endian */ 350 p->header.maxentry) 351 nsplit++; 352 else 353 nsplit = 0; 354 btstack->nsplit = nsplit; 355 } 356 357 /* save search result */ 358 btsp = btstack->top; 359 btsp->bn = bn; 360 btsp->index = index; 361 btsp->mp = mp; 362 363 /* update sequential access heuristics */ 364 jfs_ip->btindex = index; 365 366 if (nextp) 367 *nextp = next; 368 369 INCREMENT(xtStat.fastSearch); 370 return 0; 371 } 372 373 /* well, ... full search now */ 374 binarySearch: 375 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 376 377 /* 378 * binary search with search key K on the current page 379 */ 380 for (base = XTENTRYSTART; lim; lim >>= 1) { 381 index = base + (lim >> 1); 382 383 XT_CMP(cmp, xoff, &p->xad[index], t64); 384 if (cmp == 0) { 385 /* 386 * search hit 387 */ 388 /* search hit - leaf page: 389 * return the entry found 390 */ 391 if (p->header.flag & BT_LEAF) { 392 *cmpp = cmp; 393 394 /* compute number of pages to split */ 395 if (flag & XT_INSERT) { 396 if (p->header.nextindex == 397 p->header.maxentry) 398 nsplit++; 399 else 400 nsplit = 0; 401 btstack->nsplit = nsplit; 402 } 403 404 /* save search result */ 405 btsp = btstack->top; 406 btsp->bn = bn; 407 btsp->index = index; 408 btsp->mp = mp; 409 410 /* init sequential access heuristics */ 411 btindex = jfs_ip->btindex; 412 if (index == btindex || 413 index == btindex + 1) 414 jfs_ip->btorder = BT_SEQUENTIAL; 415 else 416 jfs_ip->btorder = BT_RANDOM; 417 jfs_ip->btindex = index; 418 419 return 0; 420 } 421 /* search hit - internal page: 422 * descend/search its child page 423 */ 424 if (index < le16_to_cpu(p->header.nextindex)-1) 425 next = offsetXAD(&p->xad[index + 1]); 426 goto next; 427 } 428 429 if (cmp > 0) { 430 base = index + 1; 431 --lim; 432 } 433 } 434 435 /* 436 * search miss 437 * 438 * base is the smallest index with key (Kj) greater than 439 * search key (K) and may be zero or maxentry index. 440 */ 441 if (base < le16_to_cpu(p->header.nextindex)) 442 next = offsetXAD(&p->xad[base]); 443 /* 444 * search miss - leaf page: 445 * 446 * return location of entry (base) where new entry with 447 * search key K is to be inserted. 448 */ 449 if (p->header.flag & BT_LEAF) { 450 *cmpp = cmp; 451 452 /* compute number of pages to split */ 453 if (flag & XT_INSERT) { 454 if (p->header.nextindex == 455 p->header.maxentry) 456 nsplit++; 457 else 458 nsplit = 0; 459 btstack->nsplit = nsplit; 460 } 461 462 /* save search result */ 463 btsp = btstack->top; 464 btsp->bn = bn; 465 btsp->index = base; 466 btsp->mp = mp; 467 468 /* init sequential access heuristics */ 469 btindex = jfs_ip->btindex; 470 if (base == btindex || base == btindex + 1) 471 jfs_ip->btorder = BT_SEQUENTIAL; 472 else 473 jfs_ip->btorder = BT_RANDOM; 474 jfs_ip->btindex = base; 475 476 if (nextp) 477 *nextp = next; 478 479 return 0; 480 } 481 482 /* 483 * search miss - non-leaf page: 484 * 485 * if base is non-zero, decrement base by one to get the parent 486 * entry of the child page to search. 487 */ 488 index = base ? base - 1 : base; 489 490 /* 491 * go down to child page 492 */ 493 next: 494 /* update number of pages to split */ 495 if (p->header.nextindex == p->header.maxentry) 496 nsplit++; 497 else 498 nsplit = 0; 499 500 /* push (bn, index) of the parent page/entry */ 501 if (BT_STACK_FULL(btstack)) { 502 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 503 XT_PUTPAGE(mp); 504 return -EIO; 505 } 506 BT_PUSH(btstack, bn, index); 507 508 /* get the child page block number */ 509 bn = addressXAD(&p->xad[index]); 510 511 /* unpin the parent page */ 512 XT_PUTPAGE(mp); 513 } 514 } 515 516 /* 517 * xtInsert() 518 * 519 * function: 520 * 521 * parameter: 522 * tid - transaction id; 523 * ip - file object; 524 * xflag - extent flag (XAD_NOTRECORDED): 525 * xoff - extent offset; 526 * xlen - extent length; 527 * xaddrp - extent address pointer (in/out): 528 * if (*xaddrp) 529 * caller allocated data extent at *xaddrp; 530 * else 531 * allocate data extent and return its xaddr; 532 * flag - 533 * 534 * return: 535 */ 536 int xtInsert(tid_t tid, /* transaction id */ 537 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 538 int flag) 539 { 540 int rc = 0; 541 s64 xaddr, hint; 542 struct metapage *mp; /* meta-page buffer */ 543 xtpage_t *p; /* base B+-tree index page */ 544 s64 bn; 545 int index, nextindex; 546 struct btstack btstack; /* traverse stack */ 547 struct xtsplit split; /* split information */ 548 xad_t *xad; 549 int cmp; 550 s64 next; 551 struct tlock *tlck; 552 struct xtlock *xtlck; 553 554 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 555 556 /* 557 * search for the entry location at which to insert: 558 * 559 * xtFastSearch() and xtSearch() both returns (leaf page 560 * pinned, index at which to insert). 561 * n.b. xtSearch() may return index of maxentry of 562 * the full page. 563 */ 564 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 565 return rc; 566 567 /* retrieve search result */ 568 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 569 570 /* This test must follow XT_GETSEARCH since mp must be valid if 571 * we branch to out: */ 572 if ((cmp == 0) || (next && (xlen > next - xoff))) { 573 rc = -EEXIST; 574 goto out; 575 } 576 577 /* 578 * allocate data extent requested 579 * 580 * allocation hint: last xad 581 */ 582 if ((xaddr = *xaddrp) == 0) { 583 if (index > XTENTRYSTART) { 584 xad = &p->xad[index - 1]; 585 hint = addressXAD(xad) + lengthXAD(xad) - 1; 586 } else 587 hint = 0; 588 if ((rc = dquot_alloc_block(ip, xlen))) 589 goto out; 590 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 591 dquot_free_block(ip, xlen); 592 goto out; 593 } 594 } 595 596 /* 597 * insert entry for new extent 598 */ 599 xflag |= XAD_NEW; 600 601 /* 602 * if the leaf page is full, split the page and 603 * propagate up the router entry for the new page from split 604 * 605 * The xtSplitUp() will insert the entry and unpin the leaf page. 606 */ 607 nextindex = le16_to_cpu(p->header.nextindex); 608 if (nextindex == le16_to_cpu(p->header.maxentry)) { 609 split.mp = mp; 610 split.index = index; 611 split.flag = xflag; 612 split.off = xoff; 613 split.len = xlen; 614 split.addr = xaddr; 615 split.pxdlist = NULL; 616 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 617 /* undo data extent allocation */ 618 if (*xaddrp == 0) { 619 dbFree(ip, xaddr, (s64) xlen); 620 dquot_free_block(ip, xlen); 621 } 622 return rc; 623 } 624 625 *xaddrp = xaddr; 626 return 0; 627 } 628 629 /* 630 * insert the new entry into the leaf page 631 */ 632 /* 633 * acquire a transaction lock on the leaf page; 634 * 635 * action: xad insertion/extension; 636 */ 637 BT_MARK_DIRTY(mp, ip); 638 639 /* if insert into middle, shift right remaining entries. */ 640 if (index < nextindex) 641 memmove(&p->xad[index + 1], &p->xad[index], 642 (nextindex - index) * sizeof(xad_t)); 643 644 /* insert the new entry: mark the entry NEW */ 645 xad = &p->xad[index]; 646 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 647 648 /* advance next available entry index */ 649 le16_add_cpu(&p->header.nextindex, 1); 650 651 /* Don't log it if there are no links to the file */ 652 if (!test_cflag(COMMIT_Nolink, ip)) { 653 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 654 xtlck = (struct xtlock *) & tlck->lock; 655 xtlck->lwm.offset = 656 (xtlck->lwm.offset) ? min(index, 657 (int)xtlck->lwm.offset) : index; 658 xtlck->lwm.length = 659 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 660 } 661 662 *xaddrp = xaddr; 663 664 out: 665 /* unpin the leaf page */ 666 XT_PUTPAGE(mp); 667 668 return rc; 669 } 670 671 672 /* 673 * xtSplitUp() 674 * 675 * function: 676 * split full pages as propagating insertion up the tree 677 * 678 * parameter: 679 * tid - transaction id; 680 * ip - file object; 681 * split - entry parameter descriptor; 682 * btstack - traverse stack from xtSearch() 683 * 684 * return: 685 */ 686 static int 687 xtSplitUp(tid_t tid, 688 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 689 { 690 int rc = 0; 691 struct metapage *smp; 692 xtpage_t *sp; /* split page */ 693 struct metapage *rmp; 694 s64 rbn; /* new right page block number */ 695 struct metapage *rcmp; 696 xtpage_t *rcp; /* right child page */ 697 s64 rcbn; /* right child page block number */ 698 int skip; /* index of entry of insertion */ 699 int nextindex; /* next available entry index of p */ 700 struct btframe *parent; /* parent page entry on traverse stack */ 701 xad_t *xad; 702 s64 xaddr; 703 int xlen; 704 int nsplit; /* number of pages split */ 705 struct pxdlist pxdlist; 706 pxd_t *pxd; 707 struct tlock *tlck; 708 struct xtlock *xtlck; 709 710 smp = split->mp; 711 sp = XT_PAGE(ip, smp); 712 713 /* is inode xtree root extension/inline EA area free ? */ 714 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 715 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 716 (JFS_IP(ip)->mode2 & INLINEEA)) { 717 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 718 JFS_IP(ip)->mode2 &= ~INLINEEA; 719 720 BT_MARK_DIRTY(smp, ip); 721 /* 722 * acquire a transaction lock on the leaf page; 723 * 724 * action: xad insertion/extension; 725 */ 726 727 /* if insert into middle, shift right remaining entries. */ 728 skip = split->index; 729 nextindex = le16_to_cpu(sp->header.nextindex); 730 if (skip < nextindex) 731 memmove(&sp->xad[skip + 1], &sp->xad[skip], 732 (nextindex - skip) * sizeof(xad_t)); 733 734 /* insert the new entry: mark the entry NEW */ 735 xad = &sp->xad[skip]; 736 XT_PUTENTRY(xad, split->flag, split->off, split->len, 737 split->addr); 738 739 /* advance next available entry index */ 740 le16_add_cpu(&sp->header.nextindex, 1); 741 742 /* Don't log it if there are no links to the file */ 743 if (!test_cflag(COMMIT_Nolink, ip)) { 744 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 745 xtlck = (struct xtlock *) & tlck->lock; 746 xtlck->lwm.offset = (xtlck->lwm.offset) ? 747 min(skip, (int)xtlck->lwm.offset) : skip; 748 xtlck->lwm.length = 749 le16_to_cpu(sp->header.nextindex) - 750 xtlck->lwm.offset; 751 } 752 753 return 0; 754 } 755 756 /* 757 * allocate new index blocks to cover index page split(s) 758 * 759 * allocation hint: ? 760 */ 761 if (split->pxdlist == NULL) { 762 nsplit = btstack->nsplit; 763 split->pxdlist = &pxdlist; 764 pxdlist.maxnpxd = pxdlist.npxd = 0; 765 pxd = &pxdlist.pxd[0]; 766 xlen = JFS_SBI(ip->i_sb)->nbperpage; 767 for (; nsplit > 0; nsplit--, pxd++) { 768 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 769 == 0) { 770 PXDaddress(pxd, xaddr); 771 PXDlength(pxd, xlen); 772 773 pxdlist.maxnpxd++; 774 775 continue; 776 } 777 778 /* undo allocation */ 779 780 XT_PUTPAGE(smp); 781 return rc; 782 } 783 } 784 785 /* 786 * Split leaf page <sp> into <sp> and a new right page <rp>. 787 * 788 * The split routines insert the new entry into the leaf page, 789 * and acquire txLock as appropriate. 790 * return <rp> pinned and its block number <rpbn>. 791 */ 792 rc = (sp->header.flag & BT_ROOT) ? 793 xtSplitRoot(tid, ip, split, &rmp) : 794 xtSplitPage(tid, ip, split, &rmp, &rbn); 795 796 XT_PUTPAGE(smp); 797 798 if (rc) 799 return -EIO; 800 /* 801 * propagate up the router entry for the leaf page just split 802 * 803 * insert a router entry for the new page into the parent page, 804 * propagate the insert/split up the tree by walking back the stack 805 * of (bn of parent page, index of child page entry in parent page) 806 * that were traversed during the search for the page that split. 807 * 808 * the propagation of insert/split up the tree stops if the root 809 * splits or the page inserted into doesn't have to split to hold 810 * the new entry. 811 * 812 * the parent entry for the split page remains the same, and 813 * a new entry is inserted at its right with the first key and 814 * block number of the new right page. 815 * 816 * There are a maximum of 3 pages pinned at any time: 817 * right child, left parent and right parent (when the parent splits) 818 * to keep the child page pinned while working on the parent. 819 * make sure that all pins are released at exit. 820 */ 821 while ((parent = BT_POP(btstack)) != NULL) { 822 /* parent page specified by stack frame <parent> */ 823 824 /* keep current child pages <rcp> pinned */ 825 rcmp = rmp; 826 rcbn = rbn; 827 rcp = XT_PAGE(ip, rcmp); 828 829 /* 830 * insert router entry in parent for new right child page <rp> 831 */ 832 /* get/pin the parent page <sp> */ 833 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 834 if (rc) { 835 XT_PUTPAGE(rcmp); 836 return rc; 837 } 838 839 /* 840 * The new key entry goes ONE AFTER the index of parent entry, 841 * because the split was to the right. 842 */ 843 skip = parent->index + 1; 844 845 /* 846 * split or shift right remaining entries of the parent page 847 */ 848 nextindex = le16_to_cpu(sp->header.nextindex); 849 /* 850 * parent page is full - split the parent page 851 */ 852 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 853 /* init for parent page split */ 854 split->mp = smp; 855 split->index = skip; /* index at insert */ 856 split->flag = XAD_NEW; 857 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 858 split->len = JFS_SBI(ip->i_sb)->nbperpage; 859 split->addr = rcbn; 860 861 /* unpin previous right child page */ 862 XT_PUTPAGE(rcmp); 863 864 /* The split routines insert the new entry, 865 * and acquire txLock as appropriate. 866 * return <rp> pinned and its block number <rpbn>. 867 */ 868 rc = (sp->header.flag & BT_ROOT) ? 869 xtSplitRoot(tid, ip, split, &rmp) : 870 xtSplitPage(tid, ip, split, &rmp, &rbn); 871 if (rc) { 872 XT_PUTPAGE(smp); 873 return rc; 874 } 875 876 XT_PUTPAGE(smp); 877 /* keep new child page <rp> pinned */ 878 } 879 /* 880 * parent page is not full - insert in parent page 881 */ 882 else { 883 /* 884 * insert router entry in parent for the right child 885 * page from the first entry of the right child page: 886 */ 887 /* 888 * acquire a transaction lock on the parent page; 889 * 890 * action: router xad insertion; 891 */ 892 BT_MARK_DIRTY(smp, ip); 893 894 /* 895 * if insert into middle, shift right remaining entries 896 */ 897 if (skip < nextindex) 898 memmove(&sp->xad[skip + 1], &sp->xad[skip], 899 (nextindex - 900 skip) << L2XTSLOTSIZE); 901 902 /* insert the router entry */ 903 xad = &sp->xad[skip]; 904 XT_PUTENTRY(xad, XAD_NEW, 905 offsetXAD(&rcp->xad[XTENTRYSTART]), 906 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 907 908 /* advance next available entry index. */ 909 le16_add_cpu(&sp->header.nextindex, 1); 910 911 /* Don't log it if there are no links to the file */ 912 if (!test_cflag(COMMIT_Nolink, ip)) { 913 tlck = txLock(tid, ip, smp, 914 tlckXTREE | tlckGROW); 915 xtlck = (struct xtlock *) & tlck->lock; 916 xtlck->lwm.offset = (xtlck->lwm.offset) ? 917 min(skip, (int)xtlck->lwm.offset) : skip; 918 xtlck->lwm.length = 919 le16_to_cpu(sp->header.nextindex) - 920 xtlck->lwm.offset; 921 } 922 923 /* unpin parent page */ 924 XT_PUTPAGE(smp); 925 926 /* exit propagate up */ 927 break; 928 } 929 } 930 931 /* unpin current right page */ 932 XT_PUTPAGE(rmp); 933 934 return 0; 935 } 936 937 938 /* 939 * xtSplitPage() 940 * 941 * function: 942 * split a full non-root page into 943 * original/split/left page and new right page 944 * i.e., the original/split page remains as left page. 945 * 946 * parameter: 947 * int tid, 948 * struct inode *ip, 949 * struct xtsplit *split, 950 * struct metapage **rmpp, 951 * u64 *rbnp, 952 * 953 * return: 954 * Pointer to page in which to insert or NULL on error. 955 */ 956 static int 957 xtSplitPage(tid_t tid, struct inode *ip, 958 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 959 { 960 int rc = 0; 961 struct metapage *smp; 962 xtpage_t *sp; 963 struct metapage *rmp; 964 xtpage_t *rp; /* new right page allocated */ 965 s64 rbn; /* new right page block number */ 966 struct metapage *mp; 967 xtpage_t *p; 968 s64 nextbn; 969 int skip, maxentry, middle, righthalf, n; 970 xad_t *xad; 971 struct pxdlist *pxdlist; 972 pxd_t *pxd; 973 struct tlock *tlck; 974 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 975 int quota_allocation = 0; 976 977 smp = split->mp; 978 sp = XT_PAGE(ip, smp); 979 980 INCREMENT(xtStat.split); 981 982 pxdlist = split->pxdlist; 983 pxd = &pxdlist->pxd[pxdlist->npxd]; 984 pxdlist->npxd++; 985 rbn = addressPXD(pxd); 986 987 /* Allocate blocks to quota. */ 988 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 989 if (rc) 990 goto clean_up; 991 992 quota_allocation += lengthPXD(pxd); 993 994 /* 995 * allocate the new right page for the split 996 */ 997 rmp = get_metapage(ip, rbn, PSIZE, 1); 998 if (rmp == NULL) { 999 rc = -EIO; 1000 goto clean_up; 1001 } 1002 1003 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1004 1005 BT_MARK_DIRTY(rmp, ip); 1006 /* 1007 * action: new page; 1008 */ 1009 1010 rp = (xtpage_t *) rmp->data; 1011 rp->header.self = *pxd; 1012 rp->header.flag = sp->header.flag & BT_TYPE; 1013 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1014 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1015 1016 BT_MARK_DIRTY(smp, ip); 1017 /* Don't log it if there are no links to the file */ 1018 if (!test_cflag(COMMIT_Nolink, ip)) { 1019 /* 1020 * acquire a transaction lock on the new right page; 1021 */ 1022 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1023 rxtlck = (struct xtlock *) & tlck->lock; 1024 rxtlck->lwm.offset = XTENTRYSTART; 1025 /* 1026 * acquire a transaction lock on the split page 1027 */ 1028 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1029 sxtlck = (struct xtlock *) & tlck->lock; 1030 } 1031 1032 /* 1033 * initialize/update sibling pointers of <sp> and <rp> 1034 */ 1035 nextbn = le64_to_cpu(sp->header.next); 1036 rp->header.next = cpu_to_le64(nextbn); 1037 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1038 sp->header.next = cpu_to_le64(rbn); 1039 1040 skip = split->index; 1041 1042 /* 1043 * sequential append at tail (after last entry of last page) 1044 * 1045 * if splitting the last page on a level because of appending 1046 * a entry to it (skip is maxentry), it's likely that the access is 1047 * sequential. adding an empty page on the side of the level is less 1048 * work and can push the fill factor much higher than normal. 1049 * if we're wrong it's no big deal - we will do the split the right 1050 * way next time. 1051 * (it may look like it's equally easy to do a similar hack for 1052 * reverse sorted data, that is, split the tree left, but it's not. 1053 * Be my guest.) 1054 */ 1055 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1056 /* 1057 * acquire a transaction lock on the new/right page; 1058 * 1059 * action: xad insertion; 1060 */ 1061 /* insert entry at the first entry of the new right page */ 1062 xad = &rp->xad[XTENTRYSTART]; 1063 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1064 split->addr); 1065 1066 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1067 1068 if (!test_cflag(COMMIT_Nolink, ip)) { 1069 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1070 rxtlck->lwm.length = 1; 1071 } 1072 1073 *rmpp = rmp; 1074 *rbnp = rbn; 1075 1076 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1077 return 0; 1078 } 1079 1080 /* 1081 * non-sequential insert (at possibly middle page) 1082 */ 1083 1084 /* 1085 * update previous pointer of old next/right page of <sp> 1086 */ 1087 if (nextbn != 0) { 1088 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1089 if (rc) { 1090 XT_PUTPAGE(rmp); 1091 goto clean_up; 1092 } 1093 1094 BT_MARK_DIRTY(mp, ip); 1095 /* 1096 * acquire a transaction lock on the next page; 1097 * 1098 * action:sibling pointer update; 1099 */ 1100 if (!test_cflag(COMMIT_Nolink, ip)) 1101 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1102 1103 p->header.prev = cpu_to_le64(rbn); 1104 1105 /* sibling page may have been updated previously, or 1106 * it may be updated later; 1107 */ 1108 1109 XT_PUTPAGE(mp); 1110 } 1111 1112 /* 1113 * split the data between the split and new/right pages 1114 */ 1115 maxentry = le16_to_cpu(sp->header.maxentry); 1116 middle = maxentry >> 1; 1117 righthalf = maxentry - middle; 1118 1119 /* 1120 * skip index in old split/left page - insert into left page: 1121 */ 1122 if (skip <= middle) { 1123 /* move right half of split page to the new right page */ 1124 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1125 righthalf << L2XTSLOTSIZE); 1126 1127 /* shift right tail of left half to make room for new entry */ 1128 if (skip < middle) 1129 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1130 (middle - skip) << L2XTSLOTSIZE); 1131 1132 /* insert new entry */ 1133 xad = &sp->xad[skip]; 1134 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1135 split->addr); 1136 1137 /* update page header */ 1138 sp->header.nextindex = cpu_to_le16(middle + 1); 1139 if (!test_cflag(COMMIT_Nolink, ip)) { 1140 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1141 min(skip, (int)sxtlck->lwm.offset) : skip; 1142 } 1143 1144 rp->header.nextindex = 1145 cpu_to_le16(XTENTRYSTART + righthalf); 1146 } 1147 /* 1148 * skip index in new right page - insert into right page: 1149 */ 1150 else { 1151 /* move left head of right half to right page */ 1152 n = skip - middle; 1153 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1154 n << L2XTSLOTSIZE); 1155 1156 /* insert new entry */ 1157 n += XTENTRYSTART; 1158 xad = &rp->xad[n]; 1159 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1160 split->addr); 1161 1162 /* move right tail of right half to right page */ 1163 if (skip < maxentry) 1164 memmove(&rp->xad[n + 1], &sp->xad[skip], 1165 (maxentry - skip) << L2XTSLOTSIZE); 1166 1167 /* update page header */ 1168 sp->header.nextindex = cpu_to_le16(middle); 1169 if (!test_cflag(COMMIT_Nolink, ip)) { 1170 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1171 min(middle, (int)sxtlck->lwm.offset) : middle; 1172 } 1173 1174 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1175 righthalf + 1); 1176 } 1177 1178 if (!test_cflag(COMMIT_Nolink, ip)) { 1179 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1180 sxtlck->lwm.offset; 1181 1182 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1183 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1184 XTENTRYSTART; 1185 } 1186 1187 *rmpp = rmp; 1188 *rbnp = rbn; 1189 1190 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1191 return rc; 1192 1193 clean_up: 1194 1195 /* Rollback quota allocation. */ 1196 if (quota_allocation) 1197 dquot_free_block(ip, quota_allocation); 1198 1199 return (rc); 1200 } 1201 1202 1203 /* 1204 * xtSplitRoot() 1205 * 1206 * function: 1207 * split the full root page into original/root/split page and new 1208 * right page 1209 * i.e., root remains fixed in tree anchor (inode) and the root is 1210 * copied to a single new right child page since root page << 1211 * non-root page, and the split root page contains a single entry 1212 * for the new right child page. 1213 * 1214 * parameter: 1215 * int tid, 1216 * struct inode *ip, 1217 * struct xtsplit *split, 1218 * struct metapage **rmpp) 1219 * 1220 * return: 1221 * Pointer to page in which to insert or NULL on error. 1222 */ 1223 static int 1224 xtSplitRoot(tid_t tid, 1225 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1226 { 1227 xtpage_t *sp; 1228 struct metapage *rmp; 1229 xtpage_t *rp; 1230 s64 rbn; 1231 int skip, nextindex; 1232 xad_t *xad; 1233 pxd_t *pxd; 1234 struct pxdlist *pxdlist; 1235 struct tlock *tlck; 1236 struct xtlock *xtlck; 1237 int rc; 1238 1239 sp = &JFS_IP(ip)->i_xtroot; 1240 1241 INCREMENT(xtStat.split); 1242 1243 /* 1244 * allocate a single (right) child page 1245 */ 1246 pxdlist = split->pxdlist; 1247 pxd = &pxdlist->pxd[pxdlist->npxd]; 1248 pxdlist->npxd++; 1249 rbn = addressPXD(pxd); 1250 rmp = get_metapage(ip, rbn, PSIZE, 1); 1251 if (rmp == NULL) 1252 return -EIO; 1253 1254 /* Allocate blocks to quota. */ 1255 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1256 if (rc) { 1257 release_metapage(rmp); 1258 return rc; 1259 } 1260 1261 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1262 1263 /* 1264 * acquire a transaction lock on the new right page; 1265 * 1266 * action: new page; 1267 */ 1268 BT_MARK_DIRTY(rmp, ip); 1269 1270 rp = (xtpage_t *) rmp->data; 1271 rp->header.flag = 1272 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1273 rp->header.self = *pxd; 1274 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1275 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1276 1277 /* initialize sibling pointers */ 1278 rp->header.next = 0; 1279 rp->header.prev = 0; 1280 1281 /* 1282 * copy the in-line root page into new right page extent 1283 */ 1284 nextindex = le16_to_cpu(sp->header.maxentry); 1285 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1286 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1287 1288 /* 1289 * insert the new entry into the new right/child page 1290 * (skip index in the new right page will not change) 1291 */ 1292 skip = split->index; 1293 /* if insert into middle, shift right remaining entries */ 1294 if (skip != nextindex) 1295 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1296 (nextindex - skip) * sizeof(xad_t)); 1297 1298 xad = &rp->xad[skip]; 1299 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1300 1301 /* update page header */ 1302 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1303 1304 if (!test_cflag(COMMIT_Nolink, ip)) { 1305 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1306 xtlck = (struct xtlock *) & tlck->lock; 1307 xtlck->lwm.offset = XTENTRYSTART; 1308 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1309 XTENTRYSTART; 1310 } 1311 1312 /* 1313 * reset the root 1314 * 1315 * init root with the single entry for the new right page 1316 * set the 1st entry offset to 0, which force the left-most key 1317 * at any level of the tree to be less than any search key. 1318 */ 1319 /* 1320 * acquire a transaction lock on the root page (in-memory inode); 1321 * 1322 * action: root split; 1323 */ 1324 BT_MARK_DIRTY(split->mp, ip); 1325 1326 xad = &sp->xad[XTENTRYSTART]; 1327 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1328 1329 /* update page header of root */ 1330 sp->header.flag &= ~BT_LEAF; 1331 sp->header.flag |= BT_INTERNAL; 1332 1333 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1334 1335 if (!test_cflag(COMMIT_Nolink, ip)) { 1336 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1337 xtlck = (struct xtlock *) & tlck->lock; 1338 xtlck->lwm.offset = XTENTRYSTART; 1339 xtlck->lwm.length = 1; 1340 } 1341 1342 *rmpp = rmp; 1343 1344 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1345 return 0; 1346 } 1347 1348 1349 /* 1350 * xtExtend() 1351 * 1352 * function: extend in-place; 1353 * 1354 * note: existing extent may or may not have been committed. 1355 * caller is responsible for pager buffer cache update, and 1356 * working block allocation map update; 1357 * update pmap: alloc whole extended extent; 1358 */ 1359 int xtExtend(tid_t tid, /* transaction id */ 1360 struct inode *ip, s64 xoff, /* delta extent offset */ 1361 s32 xlen, /* delta extent length */ 1362 int flag) 1363 { 1364 int rc = 0; 1365 int cmp; 1366 struct metapage *mp; /* meta-page buffer */ 1367 xtpage_t *p; /* base B+-tree index page */ 1368 s64 bn; 1369 int index, nextindex, len; 1370 struct btstack btstack; /* traverse stack */ 1371 struct xtsplit split; /* split information */ 1372 xad_t *xad; 1373 s64 xaddr; 1374 struct tlock *tlck; 1375 struct xtlock *xtlck = NULL; 1376 1377 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1378 1379 /* there must exist extent to be extended */ 1380 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1381 return rc; 1382 1383 /* retrieve search result */ 1384 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1385 1386 if (cmp != 0) { 1387 XT_PUTPAGE(mp); 1388 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1389 return -EIO; 1390 } 1391 1392 /* extension must be contiguous */ 1393 xad = &p->xad[index]; 1394 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1395 XT_PUTPAGE(mp); 1396 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1397 return -EIO; 1398 } 1399 1400 /* 1401 * acquire a transaction lock on the leaf page; 1402 * 1403 * action: xad insertion/extension; 1404 */ 1405 BT_MARK_DIRTY(mp, ip); 1406 if (!test_cflag(COMMIT_Nolink, ip)) { 1407 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1408 xtlck = (struct xtlock *) & tlck->lock; 1409 } 1410 1411 /* extend will overflow extent ? */ 1412 xlen = lengthXAD(xad) + xlen; 1413 if ((len = xlen - MAXXLEN) <= 0) 1414 goto extendOld; 1415 1416 /* 1417 * extent overflow: insert entry for new extent 1418 */ 1419 //insertNew: 1420 xoff = offsetXAD(xad) + MAXXLEN; 1421 xaddr = addressXAD(xad) + MAXXLEN; 1422 nextindex = le16_to_cpu(p->header.nextindex); 1423 1424 /* 1425 * if the leaf page is full, insert the new entry and 1426 * propagate up the router entry for the new page from split 1427 * 1428 * The xtSplitUp() will insert the entry and unpin the leaf page. 1429 */ 1430 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1431 /* xtSpliUp() unpins leaf pages */ 1432 split.mp = mp; 1433 split.index = index + 1; 1434 split.flag = XAD_NEW; 1435 split.off = xoff; /* split offset */ 1436 split.len = len; 1437 split.addr = xaddr; 1438 split.pxdlist = NULL; 1439 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1440 return rc; 1441 1442 /* get back old page */ 1443 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1444 if (rc) 1445 return rc; 1446 /* 1447 * if leaf root has been split, original root has been 1448 * copied to new child page, i.e., original entry now 1449 * resides on the new child page; 1450 */ 1451 if (p->header.flag & BT_INTERNAL) { 1452 ASSERT(p->header.nextindex == 1453 cpu_to_le16(XTENTRYSTART + 1)); 1454 xad = &p->xad[XTENTRYSTART]; 1455 bn = addressXAD(xad); 1456 XT_PUTPAGE(mp); 1457 1458 /* get new child page */ 1459 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1460 if (rc) 1461 return rc; 1462 1463 BT_MARK_DIRTY(mp, ip); 1464 if (!test_cflag(COMMIT_Nolink, ip)) { 1465 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1466 xtlck = (struct xtlock *) & tlck->lock; 1467 } 1468 } 1469 } 1470 /* 1471 * insert the new entry into the leaf page 1472 */ 1473 else { 1474 /* insert the new entry: mark the entry NEW */ 1475 xad = &p->xad[index + 1]; 1476 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1477 1478 /* advance next available entry index */ 1479 le16_add_cpu(&p->header.nextindex, 1); 1480 } 1481 1482 /* get back old entry */ 1483 xad = &p->xad[index]; 1484 xlen = MAXXLEN; 1485 1486 /* 1487 * extend old extent 1488 */ 1489 extendOld: 1490 XADlength(xad, xlen); 1491 if (!(xad->flag & XAD_NEW)) 1492 xad->flag |= XAD_EXTENDED; 1493 1494 if (!test_cflag(COMMIT_Nolink, ip)) { 1495 xtlck->lwm.offset = 1496 (xtlck->lwm.offset) ? min(index, 1497 (int)xtlck->lwm.offset) : index; 1498 xtlck->lwm.length = 1499 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1500 } 1501 1502 /* unpin the leaf page */ 1503 XT_PUTPAGE(mp); 1504 1505 return rc; 1506 } 1507 1508 #ifdef _NOTYET 1509 /* 1510 * xtTailgate() 1511 * 1512 * function: split existing 'tail' extent 1513 * (split offset >= start offset of tail extent), and 1514 * relocate and extend the split tail half; 1515 * 1516 * note: existing extent may or may not have been committed. 1517 * caller is responsible for pager buffer cache update, and 1518 * working block allocation map update; 1519 * update pmap: free old split tail extent, alloc new extent; 1520 */ 1521 int xtTailgate(tid_t tid, /* transaction id */ 1522 struct inode *ip, s64 xoff, /* split/new extent offset */ 1523 s32 xlen, /* new extent length */ 1524 s64 xaddr, /* new extent address */ 1525 int flag) 1526 { 1527 int rc = 0; 1528 int cmp; 1529 struct metapage *mp; /* meta-page buffer */ 1530 xtpage_t *p; /* base B+-tree index page */ 1531 s64 bn; 1532 int index, nextindex, llen, rlen; 1533 struct btstack btstack; /* traverse stack */ 1534 struct xtsplit split; /* split information */ 1535 xad_t *xad; 1536 struct tlock *tlck; 1537 struct xtlock *xtlck = 0; 1538 struct tlock *mtlck; 1539 struct maplock *pxdlock; 1540 1541 /* 1542 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1543 (ulong)xoff, xlen, (ulong)xaddr); 1544 */ 1545 1546 /* there must exist extent to be tailgated */ 1547 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1548 return rc; 1549 1550 /* retrieve search result */ 1551 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1552 1553 if (cmp != 0) { 1554 XT_PUTPAGE(mp); 1555 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1556 return -EIO; 1557 } 1558 1559 /* entry found must be last entry */ 1560 nextindex = le16_to_cpu(p->header.nextindex); 1561 if (index != nextindex - 1) { 1562 XT_PUTPAGE(mp); 1563 jfs_error(ip->i_sb, 1564 "xtTailgate: the entry found is not the last entry"); 1565 return -EIO; 1566 } 1567 1568 BT_MARK_DIRTY(mp, ip); 1569 /* 1570 * acquire tlock of the leaf page containing original entry 1571 */ 1572 if (!test_cflag(COMMIT_Nolink, ip)) { 1573 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1574 xtlck = (struct xtlock *) & tlck->lock; 1575 } 1576 1577 /* completely replace extent ? */ 1578 xad = &p->xad[index]; 1579 /* 1580 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1581 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1582 */ 1583 if ((llen = xoff - offsetXAD(xad)) == 0) 1584 goto updateOld; 1585 1586 /* 1587 * partially replace extent: insert entry for new extent 1588 */ 1589 //insertNew: 1590 /* 1591 * if the leaf page is full, insert the new entry and 1592 * propagate up the router entry for the new page from split 1593 * 1594 * The xtSplitUp() will insert the entry and unpin the leaf page. 1595 */ 1596 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1597 /* xtSpliUp() unpins leaf pages */ 1598 split.mp = mp; 1599 split.index = index + 1; 1600 split.flag = XAD_NEW; 1601 split.off = xoff; /* split offset */ 1602 split.len = xlen; 1603 split.addr = xaddr; 1604 split.pxdlist = NULL; 1605 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1606 return rc; 1607 1608 /* get back old page */ 1609 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1610 if (rc) 1611 return rc; 1612 /* 1613 * if leaf root has been split, original root has been 1614 * copied to new child page, i.e., original entry now 1615 * resides on the new child page; 1616 */ 1617 if (p->header.flag & BT_INTERNAL) { 1618 ASSERT(p->header.nextindex == 1619 cpu_to_le16(XTENTRYSTART + 1)); 1620 xad = &p->xad[XTENTRYSTART]; 1621 bn = addressXAD(xad); 1622 XT_PUTPAGE(mp); 1623 1624 /* get new child page */ 1625 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1626 if (rc) 1627 return rc; 1628 1629 BT_MARK_DIRTY(mp, ip); 1630 if (!test_cflag(COMMIT_Nolink, ip)) { 1631 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1632 xtlck = (struct xtlock *) & tlck->lock; 1633 } 1634 } 1635 } 1636 /* 1637 * insert the new entry into the leaf page 1638 */ 1639 else { 1640 /* insert the new entry: mark the entry NEW */ 1641 xad = &p->xad[index + 1]; 1642 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1643 1644 /* advance next available entry index */ 1645 le16_add_cpu(&p->header.nextindex, 1); 1646 } 1647 1648 /* get back old XAD */ 1649 xad = &p->xad[index]; 1650 1651 /* 1652 * truncate/relocate old extent at split offset 1653 */ 1654 updateOld: 1655 /* update dmap for old/committed/truncated extent */ 1656 rlen = lengthXAD(xad) - llen; 1657 if (!(xad->flag & XAD_NEW)) { 1658 /* free from PWMAP at commit */ 1659 if (!test_cflag(COMMIT_Nolink, ip)) { 1660 mtlck = txMaplock(tid, ip, tlckMAP); 1661 pxdlock = (struct maplock *) & mtlck->lock; 1662 pxdlock->flag = mlckFREEPXD; 1663 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1664 PXDlength(&pxdlock->pxd, rlen); 1665 pxdlock->index = 1; 1666 } 1667 } else 1668 /* free from WMAP */ 1669 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1670 1671 if (llen) 1672 /* truncate */ 1673 XADlength(xad, llen); 1674 else 1675 /* replace */ 1676 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1677 1678 if (!test_cflag(COMMIT_Nolink, ip)) { 1679 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1680 min(index, (int)xtlck->lwm.offset) : index; 1681 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1682 xtlck->lwm.offset; 1683 } 1684 1685 /* unpin the leaf page */ 1686 XT_PUTPAGE(mp); 1687 1688 return rc; 1689 } 1690 #endif /* _NOTYET */ 1691 1692 /* 1693 * xtUpdate() 1694 * 1695 * function: update XAD; 1696 * 1697 * update extent for allocated_but_not_recorded or 1698 * compressed extent; 1699 * 1700 * parameter: 1701 * nxad - new XAD; 1702 * logical extent of the specified XAD must be completely 1703 * contained by an existing XAD; 1704 */ 1705 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1706 { /* new XAD */ 1707 int rc = 0; 1708 int cmp; 1709 struct metapage *mp; /* meta-page buffer */ 1710 xtpage_t *p; /* base B+-tree index page */ 1711 s64 bn; 1712 int index0, index, newindex, nextindex; 1713 struct btstack btstack; /* traverse stack */ 1714 struct xtsplit split; /* split information */ 1715 xad_t *xad, *lxad, *rxad; 1716 int xflag; 1717 s64 nxoff, xoff; 1718 int nxlen, xlen, lxlen, rxlen; 1719 s64 nxaddr, xaddr; 1720 struct tlock *tlck; 1721 struct xtlock *xtlck = NULL; 1722 int newpage = 0; 1723 1724 /* there must exist extent to be tailgated */ 1725 nxoff = offsetXAD(nxad); 1726 nxlen = lengthXAD(nxad); 1727 nxaddr = addressXAD(nxad); 1728 1729 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1730 return rc; 1731 1732 /* retrieve search result */ 1733 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1734 1735 if (cmp != 0) { 1736 XT_PUTPAGE(mp); 1737 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1738 return -EIO; 1739 } 1740 1741 BT_MARK_DIRTY(mp, ip); 1742 /* 1743 * acquire tlock of the leaf page containing original entry 1744 */ 1745 if (!test_cflag(COMMIT_Nolink, ip)) { 1746 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1747 xtlck = (struct xtlock *) & tlck->lock; 1748 } 1749 1750 xad = &p->xad[index0]; 1751 xflag = xad->flag; 1752 xoff = offsetXAD(xad); 1753 xlen = lengthXAD(xad); 1754 xaddr = addressXAD(xad); 1755 1756 /* nXAD must be completely contained within XAD */ 1757 if ((xoff > nxoff) || 1758 (nxoff + nxlen > xoff + xlen)) { 1759 XT_PUTPAGE(mp); 1760 jfs_error(ip->i_sb, 1761 "xtUpdate: nXAD in not completely contained within XAD"); 1762 return -EIO; 1763 } 1764 1765 index = index0; 1766 newindex = index + 1; 1767 nextindex = le16_to_cpu(p->header.nextindex); 1768 1769 #ifdef _JFS_WIP_NOCOALESCE 1770 if (xoff < nxoff) 1771 goto updateRight; 1772 1773 /* 1774 * replace XAD with nXAD 1775 */ 1776 replace: /* (nxoff == xoff) */ 1777 if (nxlen == xlen) { 1778 /* replace XAD with nXAD:recorded */ 1779 *xad = *nxad; 1780 xad->flag = xflag & ~XAD_NOTRECORDED; 1781 1782 goto out; 1783 } else /* (nxlen < xlen) */ 1784 goto updateLeft; 1785 #endif /* _JFS_WIP_NOCOALESCE */ 1786 1787 /* #ifdef _JFS_WIP_COALESCE */ 1788 if (xoff < nxoff) 1789 goto coalesceRight; 1790 1791 /* 1792 * coalesce with left XAD 1793 */ 1794 //coalesceLeft: /* (xoff == nxoff) */ 1795 /* is XAD first entry of page ? */ 1796 if (index == XTENTRYSTART) 1797 goto replace; 1798 1799 /* is nXAD logically and physically contiguous with lXAD ? */ 1800 lxad = &p->xad[index - 1]; 1801 lxlen = lengthXAD(lxad); 1802 if (!(lxad->flag & XAD_NOTRECORDED) && 1803 (nxoff == offsetXAD(lxad) + lxlen) && 1804 (nxaddr == addressXAD(lxad) + lxlen) && 1805 (lxlen + nxlen < MAXXLEN)) { 1806 /* extend right lXAD */ 1807 index0 = index - 1; 1808 XADlength(lxad, lxlen + nxlen); 1809 1810 /* If we just merged two extents together, need to make sure the 1811 * right extent gets logged. If the left one is marked XAD_NEW, 1812 * then we know it will be logged. Otherwise, mark as 1813 * XAD_EXTENDED 1814 */ 1815 if (!(lxad->flag & XAD_NEW)) 1816 lxad->flag |= XAD_EXTENDED; 1817 1818 if (xlen > nxlen) { 1819 /* truncate XAD */ 1820 XADoffset(xad, xoff + nxlen); 1821 XADlength(xad, xlen - nxlen); 1822 XADaddress(xad, xaddr + nxlen); 1823 goto out; 1824 } else { /* (xlen == nxlen) */ 1825 1826 /* remove XAD */ 1827 if (index < nextindex - 1) 1828 memmove(&p->xad[index], &p->xad[index + 1], 1829 (nextindex - index - 1830 1) << L2XTSLOTSIZE); 1831 1832 p->header.nextindex = 1833 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1834 1); 1835 1836 index = index0; 1837 newindex = index + 1; 1838 nextindex = le16_to_cpu(p->header.nextindex); 1839 xoff = nxoff = offsetXAD(lxad); 1840 xlen = nxlen = lxlen + nxlen; 1841 xaddr = nxaddr = addressXAD(lxad); 1842 goto coalesceRight; 1843 } 1844 } 1845 1846 /* 1847 * replace XAD with nXAD 1848 */ 1849 replace: /* (nxoff == xoff) */ 1850 if (nxlen == xlen) { 1851 /* replace XAD with nXAD:recorded */ 1852 *xad = *nxad; 1853 xad->flag = xflag & ~XAD_NOTRECORDED; 1854 1855 goto coalesceRight; 1856 } else /* (nxlen < xlen) */ 1857 goto updateLeft; 1858 1859 /* 1860 * coalesce with right XAD 1861 */ 1862 coalesceRight: /* (xoff <= nxoff) */ 1863 /* is XAD last entry of page ? */ 1864 if (newindex == nextindex) { 1865 if (xoff == nxoff) 1866 goto out; 1867 goto updateRight; 1868 } 1869 1870 /* is nXAD logically and physically contiguous with rXAD ? */ 1871 rxad = &p->xad[index + 1]; 1872 rxlen = lengthXAD(rxad); 1873 if (!(rxad->flag & XAD_NOTRECORDED) && 1874 (nxoff + nxlen == offsetXAD(rxad)) && 1875 (nxaddr + nxlen == addressXAD(rxad)) && 1876 (rxlen + nxlen < MAXXLEN)) { 1877 /* extend left rXAD */ 1878 XADoffset(rxad, nxoff); 1879 XADlength(rxad, rxlen + nxlen); 1880 XADaddress(rxad, nxaddr); 1881 1882 /* If we just merged two extents together, need to make sure 1883 * the left extent gets logged. If the right one is marked 1884 * XAD_NEW, then we know it will be logged. Otherwise, mark as 1885 * XAD_EXTENDED 1886 */ 1887 if (!(rxad->flag & XAD_NEW)) 1888 rxad->flag |= XAD_EXTENDED; 1889 1890 if (xlen > nxlen) 1891 /* truncate XAD */ 1892 XADlength(xad, xlen - nxlen); 1893 else { /* (xlen == nxlen) */ 1894 1895 /* remove XAD */ 1896 memmove(&p->xad[index], &p->xad[index + 1], 1897 (nextindex - index - 1) << L2XTSLOTSIZE); 1898 1899 p->header.nextindex = 1900 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1901 1); 1902 } 1903 1904 goto out; 1905 } else if (xoff == nxoff) 1906 goto out; 1907 1908 if (xoff >= nxoff) { 1909 XT_PUTPAGE(mp); 1910 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 1911 return -EIO; 1912 } 1913 /* #endif _JFS_WIP_COALESCE */ 1914 1915 /* 1916 * split XAD into (lXAD, nXAD): 1917 * 1918 * |---nXAD---> 1919 * --|----------XAD----------|-- 1920 * |-lXAD-| 1921 */ 1922 updateRight: /* (xoff < nxoff) */ 1923 /* truncate old XAD as lXAD:not_recorded */ 1924 xad = &p->xad[index]; 1925 XADlength(xad, nxoff - xoff); 1926 1927 /* insert nXAD:recorded */ 1928 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1929 1930 /* xtSpliUp() unpins leaf pages */ 1931 split.mp = mp; 1932 split.index = newindex; 1933 split.flag = xflag & ~XAD_NOTRECORDED; 1934 split.off = nxoff; 1935 split.len = nxlen; 1936 split.addr = nxaddr; 1937 split.pxdlist = NULL; 1938 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1939 return rc; 1940 1941 /* get back old page */ 1942 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1943 if (rc) 1944 return rc; 1945 /* 1946 * if leaf root has been split, original root has been 1947 * copied to new child page, i.e., original entry now 1948 * resides on the new child page; 1949 */ 1950 if (p->header.flag & BT_INTERNAL) { 1951 ASSERT(p->header.nextindex == 1952 cpu_to_le16(XTENTRYSTART + 1)); 1953 xad = &p->xad[XTENTRYSTART]; 1954 bn = addressXAD(xad); 1955 XT_PUTPAGE(mp); 1956 1957 /* get new child page */ 1958 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1959 if (rc) 1960 return rc; 1961 1962 BT_MARK_DIRTY(mp, ip); 1963 if (!test_cflag(COMMIT_Nolink, ip)) { 1964 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1965 xtlck = (struct xtlock *) & tlck->lock; 1966 } 1967 } else { 1968 /* is nXAD on new page ? */ 1969 if (newindex > 1970 (le16_to_cpu(p->header.maxentry) >> 1)) { 1971 newindex = 1972 newindex - 1973 le16_to_cpu(p->header.nextindex) + 1974 XTENTRYSTART; 1975 newpage = 1; 1976 } 1977 } 1978 } else { 1979 /* if insert into middle, shift right remaining entries */ 1980 if (newindex < nextindex) 1981 memmove(&p->xad[newindex + 1], &p->xad[newindex], 1982 (nextindex - newindex) << L2XTSLOTSIZE); 1983 1984 /* insert the entry */ 1985 xad = &p->xad[newindex]; 1986 *xad = *nxad; 1987 xad->flag = xflag & ~XAD_NOTRECORDED; 1988 1989 /* advance next available entry index. */ 1990 p->header.nextindex = 1991 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1992 } 1993 1994 /* 1995 * does nXAD force 3-way split ? 1996 * 1997 * |---nXAD--->| 1998 * --|----------XAD-------------|-- 1999 * |-lXAD-| |-rXAD -| 2000 */ 2001 if (nxoff + nxlen == xoff + xlen) 2002 goto out; 2003 2004 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2005 if (newpage) { 2006 /* close out old page */ 2007 if (!test_cflag(COMMIT_Nolink, ip)) { 2008 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2009 min(index0, (int)xtlck->lwm.offset) : index0; 2010 xtlck->lwm.length = 2011 le16_to_cpu(p->header.nextindex) - 2012 xtlck->lwm.offset; 2013 } 2014 2015 bn = le64_to_cpu(p->header.next); 2016 XT_PUTPAGE(mp); 2017 2018 /* get new right page */ 2019 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2020 if (rc) 2021 return rc; 2022 2023 BT_MARK_DIRTY(mp, ip); 2024 if (!test_cflag(COMMIT_Nolink, ip)) { 2025 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2026 xtlck = (struct xtlock *) & tlck->lock; 2027 } 2028 2029 index0 = index = newindex; 2030 } else 2031 index++; 2032 2033 newindex = index + 1; 2034 nextindex = le16_to_cpu(p->header.nextindex); 2035 xlen = xlen - (nxoff - xoff); 2036 xoff = nxoff; 2037 xaddr = nxaddr; 2038 2039 /* recompute split pages */ 2040 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2041 XT_PUTPAGE(mp); 2042 2043 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2044 return rc; 2045 2046 /* retrieve search result */ 2047 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2048 2049 if (cmp != 0) { 2050 XT_PUTPAGE(mp); 2051 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2052 return -EIO; 2053 } 2054 2055 if (index0 != index) { 2056 XT_PUTPAGE(mp); 2057 jfs_error(ip->i_sb, 2058 "xtUpdate: unexpected value of index"); 2059 return -EIO; 2060 } 2061 } 2062 2063 /* 2064 * split XAD into (nXAD, rXAD) 2065 * 2066 * ---nXAD---| 2067 * --|----------XAD----------|-- 2068 * |-rXAD-| 2069 */ 2070 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2071 /* update old XAD with nXAD:recorded */ 2072 xad = &p->xad[index]; 2073 *xad = *nxad; 2074 xad->flag = xflag & ~XAD_NOTRECORDED; 2075 2076 /* insert rXAD:not_recorded */ 2077 xoff = xoff + nxlen; 2078 xlen = xlen - nxlen; 2079 xaddr = xaddr + nxlen; 2080 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2081 /* 2082 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2083 */ 2084 /* xtSpliUp() unpins leaf pages */ 2085 split.mp = mp; 2086 split.index = newindex; 2087 split.flag = xflag; 2088 split.off = xoff; 2089 split.len = xlen; 2090 split.addr = xaddr; 2091 split.pxdlist = NULL; 2092 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2093 return rc; 2094 2095 /* get back old page */ 2096 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2097 if (rc) 2098 return rc; 2099 2100 /* 2101 * if leaf root has been split, original root has been 2102 * copied to new child page, i.e., original entry now 2103 * resides on the new child page; 2104 */ 2105 if (p->header.flag & BT_INTERNAL) { 2106 ASSERT(p->header.nextindex == 2107 cpu_to_le16(XTENTRYSTART + 1)); 2108 xad = &p->xad[XTENTRYSTART]; 2109 bn = addressXAD(xad); 2110 XT_PUTPAGE(mp); 2111 2112 /* get new child page */ 2113 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2114 if (rc) 2115 return rc; 2116 2117 BT_MARK_DIRTY(mp, ip); 2118 if (!test_cflag(COMMIT_Nolink, ip)) { 2119 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2120 xtlck = (struct xtlock *) & tlck->lock; 2121 } 2122 } 2123 } else { 2124 /* if insert into middle, shift right remaining entries */ 2125 if (newindex < nextindex) 2126 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2127 (nextindex - newindex) << L2XTSLOTSIZE); 2128 2129 /* insert the entry */ 2130 xad = &p->xad[newindex]; 2131 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2132 2133 /* advance next available entry index. */ 2134 p->header.nextindex = 2135 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2136 } 2137 2138 out: 2139 if (!test_cflag(COMMIT_Nolink, ip)) { 2140 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2141 min(index0, (int)xtlck->lwm.offset) : index0; 2142 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2143 xtlck->lwm.offset; 2144 } 2145 2146 /* unpin the leaf page */ 2147 XT_PUTPAGE(mp); 2148 2149 return rc; 2150 } 2151 2152 2153 /* 2154 * xtAppend() 2155 * 2156 * function: grow in append mode from contiguous region specified ; 2157 * 2158 * parameter: 2159 * tid - transaction id; 2160 * ip - file object; 2161 * xflag - extent flag: 2162 * xoff - extent offset; 2163 * maxblocks - max extent length; 2164 * xlen - extent length (in/out); 2165 * xaddrp - extent address pointer (in/out): 2166 * flag - 2167 * 2168 * return: 2169 */ 2170 int xtAppend(tid_t tid, /* transaction id */ 2171 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2172 s32 * xlenp, /* (in/out) */ 2173 s64 * xaddrp, /* (in/out) */ 2174 int flag) 2175 { 2176 int rc = 0; 2177 struct metapage *mp; /* meta-page buffer */ 2178 xtpage_t *p; /* base B+-tree index page */ 2179 s64 bn, xaddr; 2180 int index, nextindex; 2181 struct btstack btstack; /* traverse stack */ 2182 struct xtsplit split; /* split information */ 2183 xad_t *xad; 2184 int cmp; 2185 struct tlock *tlck; 2186 struct xtlock *xtlck; 2187 int nsplit, nblocks, xlen; 2188 struct pxdlist pxdlist; 2189 pxd_t *pxd; 2190 s64 next; 2191 2192 xaddr = *xaddrp; 2193 xlen = *xlenp; 2194 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2195 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2196 2197 /* 2198 * search for the entry location at which to insert: 2199 * 2200 * xtFastSearch() and xtSearch() both returns (leaf page 2201 * pinned, index at which to insert). 2202 * n.b. xtSearch() may return index of maxentry of 2203 * the full page. 2204 */ 2205 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2206 return rc; 2207 2208 /* retrieve search result */ 2209 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2210 2211 if (cmp == 0) { 2212 rc = -EEXIST; 2213 goto out; 2214 } 2215 2216 if (next) 2217 xlen = min(xlen, (int)(next - xoff)); 2218 //insert: 2219 /* 2220 * insert entry for new extent 2221 */ 2222 xflag |= XAD_NEW; 2223 2224 /* 2225 * if the leaf page is full, split the page and 2226 * propagate up the router entry for the new page from split 2227 * 2228 * The xtSplitUp() will insert the entry and unpin the leaf page. 2229 */ 2230 nextindex = le16_to_cpu(p->header.nextindex); 2231 if (nextindex < le16_to_cpu(p->header.maxentry)) 2232 goto insertLeaf; 2233 2234 /* 2235 * allocate new index blocks to cover index page split(s) 2236 */ 2237 nsplit = btstack.nsplit; 2238 split.pxdlist = &pxdlist; 2239 pxdlist.maxnpxd = pxdlist.npxd = 0; 2240 pxd = &pxdlist.pxd[0]; 2241 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2242 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2243 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2244 PXDaddress(pxd, xaddr); 2245 PXDlength(pxd, nblocks); 2246 2247 pxdlist.maxnpxd++; 2248 2249 continue; 2250 } 2251 2252 /* undo allocation */ 2253 2254 goto out; 2255 } 2256 2257 xlen = min(xlen, maxblocks); 2258 2259 /* 2260 * allocate data extent requested 2261 */ 2262 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2263 goto out; 2264 2265 split.mp = mp; 2266 split.index = index; 2267 split.flag = xflag; 2268 split.off = xoff; 2269 split.len = xlen; 2270 split.addr = xaddr; 2271 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2272 /* undo data extent allocation */ 2273 dbFree(ip, *xaddrp, (s64) * xlenp); 2274 2275 return rc; 2276 } 2277 2278 *xaddrp = xaddr; 2279 *xlenp = xlen; 2280 return 0; 2281 2282 /* 2283 * insert the new entry into the leaf page 2284 */ 2285 insertLeaf: 2286 /* 2287 * allocate data extent requested 2288 */ 2289 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2290 goto out; 2291 2292 BT_MARK_DIRTY(mp, ip); 2293 /* 2294 * acquire a transaction lock on the leaf page; 2295 * 2296 * action: xad insertion/extension; 2297 */ 2298 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2299 xtlck = (struct xtlock *) & tlck->lock; 2300 2301 /* insert the new entry: mark the entry NEW */ 2302 xad = &p->xad[index]; 2303 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2304 2305 /* advance next available entry index */ 2306 le16_add_cpu(&p->header.nextindex, 1); 2307 2308 xtlck->lwm.offset = 2309 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2310 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2311 xtlck->lwm.offset; 2312 2313 *xaddrp = xaddr; 2314 *xlenp = xlen; 2315 2316 out: 2317 /* unpin the leaf page */ 2318 XT_PUTPAGE(mp); 2319 2320 return rc; 2321 } 2322 #ifdef _STILL_TO_PORT 2323 2324 /* - TBD for defragmentaion/reorganization - 2325 * 2326 * xtDelete() 2327 * 2328 * function: 2329 * delete the entry with the specified key. 2330 * 2331 * N.B.: whole extent of the entry is assumed to be deleted. 2332 * 2333 * parameter: 2334 * 2335 * return: 2336 * ENOENT: if the entry is not found. 2337 * 2338 * exception: 2339 */ 2340 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2341 { 2342 int rc = 0; 2343 struct btstack btstack; 2344 int cmp; 2345 s64 bn; 2346 struct metapage *mp; 2347 xtpage_t *p; 2348 int index, nextindex; 2349 struct tlock *tlck; 2350 struct xtlock *xtlck; 2351 2352 /* 2353 * find the matching entry; xtSearch() pins the page 2354 */ 2355 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2356 return rc; 2357 2358 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2359 if (cmp) { 2360 /* unpin the leaf page */ 2361 XT_PUTPAGE(mp); 2362 return -ENOENT; 2363 } 2364 2365 /* 2366 * delete the entry from the leaf page 2367 */ 2368 nextindex = le16_to_cpu(p->header.nextindex); 2369 le16_add_cpu(&p->header.nextindex, -1); 2370 2371 /* 2372 * if the leaf page bocome empty, free the page 2373 */ 2374 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2375 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2376 2377 BT_MARK_DIRTY(mp, ip); 2378 /* 2379 * acquire a transaction lock on the leaf page; 2380 * 2381 * action:xad deletion; 2382 */ 2383 tlck = txLock(tid, ip, mp, tlckXTREE); 2384 xtlck = (struct xtlock *) & tlck->lock; 2385 xtlck->lwm.offset = 2386 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2387 2388 /* if delete from middle, shift left/compact the remaining entries */ 2389 if (index < nextindex - 1) 2390 memmove(&p->xad[index], &p->xad[index + 1], 2391 (nextindex - index - 1) * sizeof(xad_t)); 2392 2393 XT_PUTPAGE(mp); 2394 2395 return 0; 2396 } 2397 2398 2399 /* - TBD for defragmentaion/reorganization - 2400 * 2401 * xtDeleteUp() 2402 * 2403 * function: 2404 * free empty pages as propagating deletion up the tree 2405 * 2406 * parameter: 2407 * 2408 * return: 2409 */ 2410 static int 2411 xtDeleteUp(tid_t tid, struct inode *ip, 2412 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2413 { 2414 int rc = 0; 2415 struct metapage *mp; 2416 xtpage_t *p; 2417 int index, nextindex; 2418 s64 xaddr; 2419 int xlen; 2420 struct btframe *parent; 2421 struct tlock *tlck; 2422 struct xtlock *xtlck; 2423 2424 /* 2425 * keep root leaf page which has become empty 2426 */ 2427 if (fp->header.flag & BT_ROOT) { 2428 /* keep the root page */ 2429 fp->header.flag &= ~BT_INTERNAL; 2430 fp->header.flag |= BT_LEAF; 2431 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2432 2433 /* XT_PUTPAGE(fmp); */ 2434 2435 return 0; 2436 } 2437 2438 /* 2439 * free non-root leaf page 2440 */ 2441 if ((rc = xtRelink(tid, ip, fp))) { 2442 XT_PUTPAGE(fmp); 2443 return rc; 2444 } 2445 2446 xaddr = addressPXD(&fp->header.self); 2447 xlen = lengthPXD(&fp->header.self); 2448 /* free the page extent */ 2449 dbFree(ip, xaddr, (s64) xlen); 2450 2451 /* free the buffer page */ 2452 discard_metapage(fmp); 2453 2454 /* 2455 * propagate page deletion up the index tree 2456 * 2457 * If the delete from the parent page makes it empty, 2458 * continue all the way up the tree. 2459 * stop if the root page is reached (which is never deleted) or 2460 * if the entry deletion does not empty the page. 2461 */ 2462 while ((parent = BT_POP(btstack)) != NULL) { 2463 /* get/pin the parent page <sp> */ 2464 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2465 if (rc) 2466 return rc; 2467 2468 index = parent->index; 2469 2470 /* delete the entry for the freed child page from parent. 2471 */ 2472 nextindex = le16_to_cpu(p->header.nextindex); 2473 2474 /* 2475 * the parent has the single entry being deleted: 2476 * free the parent page which has become empty. 2477 */ 2478 if (nextindex == 1) { 2479 if (p->header.flag & BT_ROOT) { 2480 /* keep the root page */ 2481 p->header.flag &= ~BT_INTERNAL; 2482 p->header.flag |= BT_LEAF; 2483 p->header.nextindex = 2484 cpu_to_le16(XTENTRYSTART); 2485 2486 /* XT_PUTPAGE(mp); */ 2487 2488 break; 2489 } else { 2490 /* free the parent page */ 2491 if ((rc = xtRelink(tid, ip, p))) 2492 return rc; 2493 2494 xaddr = addressPXD(&p->header.self); 2495 /* free the page extent */ 2496 dbFree(ip, xaddr, 2497 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2498 2499 /* unpin/free the buffer page */ 2500 discard_metapage(mp); 2501 2502 /* propagate up */ 2503 continue; 2504 } 2505 } 2506 /* 2507 * the parent has other entries remaining: 2508 * delete the router entry from the parent page. 2509 */ 2510 else { 2511 BT_MARK_DIRTY(mp, ip); 2512 /* 2513 * acquire a transaction lock on the leaf page; 2514 * 2515 * action:xad deletion; 2516 */ 2517 tlck = txLock(tid, ip, mp, tlckXTREE); 2518 xtlck = (struct xtlock *) & tlck->lock; 2519 xtlck->lwm.offset = 2520 (xtlck->lwm.offset) ? min(index, 2521 xtlck->lwm. 2522 offset) : index; 2523 2524 /* if delete from middle, 2525 * shift left/compact the remaining entries in the page 2526 */ 2527 if (index < nextindex - 1) 2528 memmove(&p->xad[index], &p->xad[index + 1], 2529 (nextindex - index - 2530 1) << L2XTSLOTSIZE); 2531 2532 le16_add_cpu(&p->header.nextindex, -1); 2533 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2534 (ulong) parent->bn, index); 2535 } 2536 2537 /* unpin the parent page */ 2538 XT_PUTPAGE(mp); 2539 2540 /* exit propagation up */ 2541 break; 2542 } 2543 2544 return 0; 2545 } 2546 2547 2548 /* 2549 * NAME: xtRelocate() 2550 * 2551 * FUNCTION: relocate xtpage or data extent of regular file; 2552 * This function is mainly used by defragfs utility. 2553 * 2554 * NOTE: This routine does not have the logic to handle 2555 * uncommitted allocated extent. The caller should call 2556 * txCommit() to commit all the allocation before call 2557 * this routine. 2558 */ 2559 int 2560 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2561 s64 nxaddr, /* new xaddr */ 2562 int xtype) 2563 { /* extent type: XTPAGE or DATAEXT */ 2564 int rc = 0; 2565 struct tblock *tblk; 2566 struct tlock *tlck; 2567 struct xtlock *xtlck; 2568 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2569 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2570 xad_t *xad; 2571 pxd_t *pxd; 2572 s64 xoff, xsize; 2573 int xlen; 2574 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2575 cbuf_t *cp; 2576 s64 offset, nbytes, nbrd, pno; 2577 int nb, npages, nblks; 2578 s64 bn; 2579 int cmp; 2580 int index; 2581 struct pxd_lock *pxdlock; 2582 struct btstack btstack; /* traverse stack */ 2583 2584 xtype = xtype & EXTENT_TYPE; 2585 2586 xoff = offsetXAD(oxad); 2587 oxaddr = addressXAD(oxad); 2588 xlen = lengthXAD(oxad); 2589 2590 /* validate extent offset */ 2591 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2592 if (offset >= ip->i_size) 2593 return -ESTALE; /* stale extent */ 2594 2595 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2596 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2597 2598 /* 2599 * 1. get and validate the parent xtpage/xad entry 2600 * covering the source extent to be relocated; 2601 */ 2602 if (xtype == DATAEXT) { 2603 /* search in leaf entry */ 2604 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2605 if (rc) 2606 return rc; 2607 2608 /* retrieve search result */ 2609 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2610 2611 if (cmp) { 2612 XT_PUTPAGE(pmp); 2613 return -ESTALE; 2614 } 2615 2616 /* validate for exact match with a single entry */ 2617 xad = &pp->xad[index]; 2618 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2619 XT_PUTPAGE(pmp); 2620 return -ESTALE; 2621 } 2622 } else { /* (xtype == XTPAGE) */ 2623 2624 /* search in internal entry */ 2625 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2626 if (rc) 2627 return rc; 2628 2629 /* retrieve search result */ 2630 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2631 2632 if (cmp) { 2633 XT_PUTPAGE(pmp); 2634 return -ESTALE; 2635 } 2636 2637 /* xtSearchNode() validated for exact match with a single entry 2638 */ 2639 xad = &pp->xad[index]; 2640 } 2641 jfs_info("xtRelocate: parent xad entry validated."); 2642 2643 /* 2644 * 2. relocate the extent 2645 */ 2646 if (xtype == DATAEXT) { 2647 /* if the extent is allocated-but-not-recorded 2648 * there is no real data to be moved in this extent, 2649 */ 2650 if (xad->flag & XAD_NOTRECORDED) 2651 goto out; 2652 else 2653 /* release xtpage for cmRead()/xtLookup() */ 2654 XT_PUTPAGE(pmp); 2655 2656 /* 2657 * cmRelocate() 2658 * 2659 * copy target data pages to be relocated; 2660 * 2661 * data extent must start at page boundary and 2662 * multiple of page size (except the last data extent); 2663 * read in each page of the source data extent into cbuf, 2664 * update the cbuf extent descriptor of the page to be 2665 * homeward bound to new dst data extent 2666 * copy the data from the old extent to new extent. 2667 * copy is essential for compressed files to avoid problems 2668 * that can arise if there was a change in compression 2669 * algorithms. 2670 * it is a good strategy because it may disrupt cache 2671 * policy to keep the pages in memory afterwards. 2672 */ 2673 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2674 assert((offset & CM_OFFSET) == 0); 2675 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2676 pno = offset >> CM_L2BSIZE; 2677 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2678 /* 2679 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2680 (offset >> CM_L2BSIZE) + 1; 2681 */ 2682 sxaddr = oxaddr; 2683 dxaddr = nxaddr; 2684 2685 /* process the request one cache buffer at a time */ 2686 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2687 offset += nb, pno++, npages--) { 2688 /* compute page size */ 2689 nb = min(nbytes - nbrd, CM_BSIZE); 2690 2691 /* get the cache buffer of the page */ 2692 if (rc = cmRead(ip, offset, npages, &cp)) 2693 break; 2694 2695 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2696 assert(!cp->cm_modified); 2697 2698 /* bind buffer with the new extent address */ 2699 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2700 cmSetXD(ip, cp, pno, dxaddr, nblks); 2701 2702 /* release the cbuf, mark it as modified */ 2703 cmPut(cp, true); 2704 2705 dxaddr += nblks; 2706 sxaddr += nblks; 2707 } 2708 2709 /* get back parent page */ 2710 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2711 return rc; 2712 2713 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2714 jfs_info("xtRelocate: target data extent relocated."); 2715 } else { /* (xtype == XTPAGE) */ 2716 2717 /* 2718 * read in the target xtpage from the source extent; 2719 */ 2720 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2721 if (rc) { 2722 XT_PUTPAGE(pmp); 2723 return rc; 2724 } 2725 2726 /* 2727 * read in sibling pages if any to update sibling pointers; 2728 */ 2729 rmp = NULL; 2730 if (p->header.next) { 2731 nextbn = le64_to_cpu(p->header.next); 2732 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2733 if (rc) { 2734 XT_PUTPAGE(pmp); 2735 XT_PUTPAGE(mp); 2736 return (rc); 2737 } 2738 } 2739 2740 lmp = NULL; 2741 if (p->header.prev) { 2742 prevbn = le64_to_cpu(p->header.prev); 2743 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2744 if (rc) { 2745 XT_PUTPAGE(pmp); 2746 XT_PUTPAGE(mp); 2747 if (rmp) 2748 XT_PUTPAGE(rmp); 2749 return (rc); 2750 } 2751 } 2752 2753 /* at this point, all xtpages to be updated are in memory */ 2754 2755 /* 2756 * update sibling pointers of sibling xtpages if any; 2757 */ 2758 if (lmp) { 2759 BT_MARK_DIRTY(lmp, ip); 2760 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 2761 lp->header.next = cpu_to_le64(nxaddr); 2762 XT_PUTPAGE(lmp); 2763 } 2764 2765 if (rmp) { 2766 BT_MARK_DIRTY(rmp, ip); 2767 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 2768 rp->header.prev = cpu_to_le64(nxaddr); 2769 XT_PUTPAGE(rmp); 2770 } 2771 2772 /* 2773 * update the target xtpage to be relocated 2774 * 2775 * update the self address of the target page 2776 * and write to destination extent; 2777 * redo image covers the whole xtpage since it is new page 2778 * to the destination extent; 2779 * update of bmap for the free of source extent 2780 * of the target xtpage itself: 2781 * update of bmap for the allocation of destination extent 2782 * of the target xtpage itself: 2783 * update of bmap for the extents covered by xad entries in 2784 * the target xtpage is not necessary since they are not 2785 * updated; 2786 * if not committed before this relocation, 2787 * target page may contain XAD_NEW entries which must 2788 * be scanned for bmap update (logredo() always 2789 * scan xtpage REDOPAGE image for bmap update); 2790 * if committed before this relocation (tlckRELOCATE), 2791 * scan may be skipped by commit() and logredo(); 2792 */ 2793 BT_MARK_DIRTY(mp, ip); 2794 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 2795 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 2796 xtlck = (struct xtlock *) & tlck->lock; 2797 2798 /* update the self address in the xtpage header */ 2799 pxd = &p->header.self; 2800 PXDaddress(pxd, nxaddr); 2801 2802 /* linelock for the after image of the whole page */ 2803 xtlck->lwm.length = 2804 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 2805 2806 /* update the buffer extent descriptor of target xtpage */ 2807 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2808 bmSetXD(mp, nxaddr, xsize); 2809 2810 /* unpin the target page to new homeward bound */ 2811 XT_PUTPAGE(mp); 2812 jfs_info("xtRelocate: target xtpage relocated."); 2813 } 2814 2815 /* 2816 * 3. acquire maplock for the source extent to be freed; 2817 * 2818 * acquire a maplock saving the src relocated extent address; 2819 * to free of the extent at commit time; 2820 */ 2821 out: 2822 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 2823 * free PXD of the source data extent (logredo() will update 2824 * bmap for free of source data extent), and update bmap for 2825 * free of the source data extent; 2826 */ 2827 if (xtype == DATAEXT) 2828 tlck = txMaplock(tid, ip, tlckMAP); 2829 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 2830 * for the source xtpage (logredo() will init NoRedoPage 2831 * filter and will also update bmap for free of the source 2832 * xtpage), and update bmap for free of the source xtpage; 2833 * N.B. We use tlckMAP instead of tlkcXTREE because there 2834 * is no buffer associated with this lock since the buffer 2835 * has been redirected to the target location. 2836 */ 2837 else /* (xtype == XTPAGE) */ 2838 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 2839 2840 pxdlock = (struct pxd_lock *) & tlck->lock; 2841 pxdlock->flag = mlckFREEPXD; 2842 PXDaddress(&pxdlock->pxd, oxaddr); 2843 PXDlength(&pxdlock->pxd, xlen); 2844 pxdlock->index = 1; 2845 2846 /* 2847 * 4. update the parent xad entry for relocation; 2848 * 2849 * acquire tlck for the parent entry with XAD_NEW as entry 2850 * update which will write LOG_REDOPAGE and update bmap for 2851 * allocation of XAD_NEW destination extent; 2852 */ 2853 jfs_info("xtRelocate: update parent xad entry."); 2854 BT_MARK_DIRTY(pmp, ip); 2855 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 2856 xtlck = (struct xtlock *) & tlck->lock; 2857 2858 /* update the XAD with the new destination extent; */ 2859 xad = &pp->xad[index]; 2860 xad->flag |= XAD_NEW; 2861 XADaddress(xad, nxaddr); 2862 2863 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 2864 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 2865 xtlck->lwm.offset; 2866 2867 /* unpin the parent xtpage */ 2868 XT_PUTPAGE(pmp); 2869 2870 return rc; 2871 } 2872 2873 2874 /* 2875 * xtSearchNode() 2876 * 2877 * function: search for the internal xad entry covering specified extent. 2878 * This function is mainly used by defragfs utility. 2879 * 2880 * parameters: 2881 * ip - file object; 2882 * xad - extent to find; 2883 * cmpp - comparison result: 2884 * btstack - traverse stack; 2885 * flag - search process flag; 2886 * 2887 * returns: 2888 * btstack contains (bn, index) of search path traversed to the entry. 2889 * *cmpp is set to result of comparison with the entry returned. 2890 * the page containing the entry is pinned at exit. 2891 */ 2892 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 2893 int *cmpp, struct btstack * btstack, int flag) 2894 { 2895 int rc = 0; 2896 s64 xoff, xaddr; 2897 int xlen; 2898 int cmp = 1; /* init for empty page */ 2899 s64 bn; /* block number */ 2900 struct metapage *mp; /* meta-page buffer */ 2901 xtpage_t *p; /* page */ 2902 int base, index, lim; 2903 struct btframe *btsp; 2904 s64 t64; 2905 2906 BT_CLR(btstack); 2907 2908 xoff = offsetXAD(xad); 2909 xlen = lengthXAD(xad); 2910 xaddr = addressXAD(xad); 2911 2912 /* 2913 * search down tree from root: 2914 * 2915 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 2916 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 2917 * 2918 * if entry with search key K is not found 2919 * internal page search find the entry with largest key Ki 2920 * less than K which point to the child page to search; 2921 * leaf page search find the entry with smallest key Kj 2922 * greater than K so that the returned index is the position of 2923 * the entry to be shifted right for insertion of new entry. 2924 * for empty tree, search key is greater than any key of the tree. 2925 * 2926 * by convention, root bn = 0. 2927 */ 2928 for (bn = 0;;) { 2929 /* get/pin the page to search */ 2930 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2931 if (rc) 2932 return rc; 2933 if (p->header.flag & BT_LEAF) { 2934 XT_PUTPAGE(mp); 2935 return -ESTALE; 2936 } 2937 2938 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 2939 2940 /* 2941 * binary search with search key K on the current page 2942 */ 2943 for (base = XTENTRYSTART; lim; lim >>= 1) { 2944 index = base + (lim >> 1); 2945 2946 XT_CMP(cmp, xoff, &p->xad[index], t64); 2947 if (cmp == 0) { 2948 /* 2949 * search hit 2950 * 2951 * verify for exact match; 2952 */ 2953 if (xaddr == addressXAD(&p->xad[index]) && 2954 xoff == offsetXAD(&p->xad[index])) { 2955 *cmpp = cmp; 2956 2957 /* save search result */ 2958 btsp = btstack->top; 2959 btsp->bn = bn; 2960 btsp->index = index; 2961 btsp->mp = mp; 2962 2963 return 0; 2964 } 2965 2966 /* descend/search its child page */ 2967 goto next; 2968 } 2969 2970 if (cmp > 0) { 2971 base = index + 1; 2972 --lim; 2973 } 2974 } 2975 2976 /* 2977 * search miss - non-leaf page: 2978 * 2979 * base is the smallest index with key (Kj) greater than 2980 * search key (K) and may be zero or maxentry index. 2981 * if base is non-zero, decrement base by one to get the parent 2982 * entry of the child page to search. 2983 */ 2984 index = base ? base - 1 : base; 2985 2986 /* 2987 * go down to child page 2988 */ 2989 next: 2990 /* get the child page block number */ 2991 bn = addressXAD(&p->xad[index]); 2992 2993 /* unpin the parent page */ 2994 XT_PUTPAGE(mp); 2995 } 2996 } 2997 2998 2999 /* 3000 * xtRelink() 3001 * 3002 * function: 3003 * link around a freed page. 3004 * 3005 * Parameter: 3006 * int tid, 3007 * struct inode *ip, 3008 * xtpage_t *p) 3009 * 3010 * returns: 3011 */ 3012 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3013 { 3014 int rc = 0; 3015 struct metapage *mp; 3016 s64 nextbn, prevbn; 3017 struct tlock *tlck; 3018 3019 nextbn = le64_to_cpu(p->header.next); 3020 prevbn = le64_to_cpu(p->header.prev); 3021 3022 /* update prev pointer of the next page */ 3023 if (nextbn != 0) { 3024 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3025 if (rc) 3026 return rc; 3027 3028 /* 3029 * acquire a transaction lock on the page; 3030 * 3031 * action: update prev pointer; 3032 */ 3033 BT_MARK_DIRTY(mp, ip); 3034 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3035 3036 /* the page may already have been tlock'd */ 3037 3038 p->header.prev = cpu_to_le64(prevbn); 3039 3040 XT_PUTPAGE(mp); 3041 } 3042 3043 /* update next pointer of the previous page */ 3044 if (prevbn != 0) { 3045 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3046 if (rc) 3047 return rc; 3048 3049 /* 3050 * acquire a transaction lock on the page; 3051 * 3052 * action: update next pointer; 3053 */ 3054 BT_MARK_DIRTY(mp, ip); 3055 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3056 3057 /* the page may already have been tlock'd */ 3058 3059 p->header.next = le64_to_cpu(nextbn); 3060 3061 XT_PUTPAGE(mp); 3062 } 3063 3064 return 0; 3065 } 3066 #endif /* _STILL_TO_PORT */ 3067 3068 3069 /* 3070 * xtInitRoot() 3071 * 3072 * initialize file root (inline in inode) 3073 */ 3074 void xtInitRoot(tid_t tid, struct inode *ip) 3075 { 3076 xtpage_t *p; 3077 3078 /* 3079 * acquire a transaction lock on the root 3080 * 3081 * action: 3082 */ 3083 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3084 tlckXTREE | tlckNEW); 3085 p = &JFS_IP(ip)->i_xtroot; 3086 3087 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3088 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3089 3090 if (S_ISDIR(ip->i_mode)) 3091 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3092 else { 3093 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3094 ip->i_size = 0; 3095 } 3096 3097 3098 return; 3099 } 3100 3101 3102 /* 3103 * We can run into a deadlock truncating a file with a large number of 3104 * xtree pages (large fragmented file). A robust fix would entail a 3105 * reservation system where we would reserve a number of metadata pages 3106 * and tlocks which we would be guaranteed without a deadlock. Without 3107 * this, a partial fix is to limit number of metadata pages we will lock 3108 * in a single transaction. Currently we will truncate the file so that 3109 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3110 * will be responsible for ensuring that the current transaction gets 3111 * committed, and that subsequent transactions are created to truncate 3112 * the file further if needed. 3113 */ 3114 #define MAX_TRUNCATE_LEAVES 50 3115 3116 /* 3117 * xtTruncate() 3118 * 3119 * function: 3120 * traverse for truncation logging backward bottom up; 3121 * terminate at the last extent entry at the current subtree 3122 * root page covering new down size. 3123 * truncation may occur within the last extent entry. 3124 * 3125 * parameter: 3126 * int tid, 3127 * struct inode *ip, 3128 * s64 newsize, 3129 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3130 * 3131 * return: 3132 * 3133 * note: 3134 * PWMAP: 3135 * 1. truncate (non-COMMIT_NOLINK file) 3136 * by jfs_truncate() or jfs_open(O_TRUNC): 3137 * xtree is updated; 3138 * 2. truncate index table of directory when last entry removed 3139 * map update via tlock at commit time; 3140 * PMAP: 3141 * Call xtTruncate_pmap instead 3142 * WMAP: 3143 * 1. remove (free zero link count) on last reference release 3144 * (pmap has been freed at commit zero link count); 3145 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3146 * xtree is updated; 3147 * map update directly at truncation time; 3148 * 3149 * if (DELETE) 3150 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3151 * else if (TRUNCATE) 3152 * must write LOG_NOREDOPAGE for deleted index page; 3153 * 3154 * pages may already have been tlocked by anonymous transactions 3155 * during file growth (i.e., write) before truncation; 3156 * 3157 * except last truncated entry, deleted entries remains as is 3158 * in the page (nextindex is updated) for other use 3159 * (e.g., log/update allocation map): this avoid copying the page 3160 * info but delay free of pages; 3161 * 3162 */ 3163 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3164 { 3165 int rc = 0; 3166 s64 teof; 3167 struct metapage *mp; 3168 xtpage_t *p; 3169 s64 bn; 3170 int index, nextindex; 3171 xad_t *xad; 3172 s64 xoff, xaddr; 3173 int xlen, len, freexlen; 3174 struct btstack btstack; 3175 struct btframe *parent; 3176 struct tblock *tblk = NULL; 3177 struct tlock *tlck = NULL; 3178 struct xtlock *xtlck = NULL; 3179 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3180 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3181 s64 nfreed; 3182 int freed, log; 3183 int locked_leaves = 0; 3184 3185 /* save object truncation type */ 3186 if (tid) { 3187 tblk = tid_to_tblock(tid); 3188 tblk->xflag |= flag; 3189 } 3190 3191 nfreed = 0; 3192 3193 flag &= COMMIT_MAP; 3194 assert(flag != COMMIT_PMAP); 3195 3196 if (flag == COMMIT_PWMAP) 3197 log = 1; 3198 else { 3199 log = 0; 3200 xadlock.flag = mlckFREEXADLIST; 3201 xadlock.index = 1; 3202 } 3203 3204 /* 3205 * if the newsize is not an integral number of pages, 3206 * the file between newsize and next page boundary will 3207 * be cleared. 3208 * if truncating into a file hole, it will cause 3209 * a full block to be allocated for the logical block. 3210 */ 3211 3212 /* 3213 * release page blocks of truncated region <teof, eof> 3214 * 3215 * free the data blocks from the leaf index blocks. 3216 * delete the parent index entries corresponding to 3217 * the freed child data/index blocks. 3218 * free the index blocks themselves which aren't needed 3219 * in new sized file. 3220 * 3221 * index blocks are updated only if the blocks are to be 3222 * retained in the new sized file. 3223 * if type is PMAP, the data and index pages are NOT 3224 * freed, and the data and index blocks are NOT freed 3225 * from working map. 3226 * (this will allow continued access of data/index of 3227 * temporary file (zerolink count file truncated to zero-length)). 3228 */ 3229 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3230 JFS_SBI(ip->i_sb)->l2bsize; 3231 3232 /* clear stack */ 3233 BT_CLR(&btstack); 3234 3235 /* 3236 * start with root 3237 * 3238 * root resides in the inode 3239 */ 3240 bn = 0; 3241 3242 /* 3243 * first access of each page: 3244 */ 3245 getPage: 3246 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3247 if (rc) 3248 return rc; 3249 3250 /* process entries backward from last index */ 3251 index = le16_to_cpu(p->header.nextindex) - 1; 3252 3253 3254 /* Since this is the rightmost page at this level, and we may have 3255 * already freed a page that was formerly to the right, let's make 3256 * sure that the next pointer is zero. 3257 */ 3258 if (p->header.next) { 3259 if (log) 3260 /* 3261 * Make sure this change to the header is logged. 3262 * If we really truncate this leaf, the flag 3263 * will be changed to tlckTRUNCATE 3264 */ 3265 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3266 BT_MARK_DIRTY(mp, ip); 3267 p->header.next = 0; 3268 } 3269 3270 if (p->header.flag & BT_INTERNAL) 3271 goto getChild; 3272 3273 /* 3274 * leaf page 3275 */ 3276 freed = 0; 3277 3278 /* does region covered by leaf page precede Teof ? */ 3279 xad = &p->xad[index]; 3280 xoff = offsetXAD(xad); 3281 xlen = lengthXAD(xad); 3282 if (teof >= xoff + xlen) { 3283 XT_PUTPAGE(mp); 3284 goto getParent; 3285 } 3286 3287 /* (re)acquire tlock of the leaf page */ 3288 if (log) { 3289 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3290 /* 3291 * We need to limit the size of the transaction 3292 * to avoid exhausting pagecache & tlocks 3293 */ 3294 XT_PUTPAGE(mp); 3295 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3296 goto getParent; 3297 } 3298 tlck = txLock(tid, ip, mp, tlckXTREE); 3299 tlck->type = tlckXTREE | tlckTRUNCATE; 3300 xtlck = (struct xtlock *) & tlck->lock; 3301 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3302 } 3303 BT_MARK_DIRTY(mp, ip); 3304 3305 /* 3306 * scan backward leaf page entries 3307 */ 3308 for (; index >= XTENTRYSTART; index--) { 3309 xad = &p->xad[index]; 3310 xoff = offsetXAD(xad); 3311 xlen = lengthXAD(xad); 3312 xaddr = addressXAD(xad); 3313 3314 /* 3315 * The "data" for a directory is indexed by the block 3316 * device's address space. This metadata must be invalidated 3317 * here 3318 */ 3319 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3320 invalidate_xad_metapages(ip, *xad); 3321 /* 3322 * entry beyond eof: continue scan of current page 3323 * xad 3324 * ---|---=======-------> 3325 * eof 3326 */ 3327 if (teof < xoff) { 3328 nfreed += xlen; 3329 continue; 3330 } 3331 3332 /* 3333 * (xoff <= teof): last entry to be deleted from page; 3334 * If other entries remain in page: keep and update the page. 3335 */ 3336 3337 /* 3338 * eof == entry_start: delete the entry 3339 * xad 3340 * -------|=======-------> 3341 * eof 3342 * 3343 */ 3344 if (teof == xoff) { 3345 nfreed += xlen; 3346 3347 if (index == XTENTRYSTART) 3348 break; 3349 3350 nextindex = index; 3351 } 3352 /* 3353 * eof within the entry: truncate the entry. 3354 * xad 3355 * -------===|===-------> 3356 * eof 3357 */ 3358 else if (teof < xoff + xlen) { 3359 /* update truncated entry */ 3360 len = teof - xoff; 3361 freexlen = xlen - len; 3362 XADlength(xad, len); 3363 3364 /* save pxd of truncated extent in tlck */ 3365 xaddr += len; 3366 if (log) { /* COMMIT_PWMAP */ 3367 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3368 min(index, (int)xtlck->lwm.offset) : index; 3369 xtlck->lwm.length = index + 1 - 3370 xtlck->lwm.offset; 3371 xtlck->twm.offset = index; 3372 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3373 pxdlock->flag = mlckFREEPXD; 3374 PXDaddress(&pxdlock->pxd, xaddr); 3375 PXDlength(&pxdlock->pxd, freexlen); 3376 } 3377 /* free truncated extent */ 3378 else { /* COMMIT_WMAP */ 3379 3380 pxdlock = (struct pxd_lock *) & xadlock; 3381 pxdlock->flag = mlckFREEPXD; 3382 PXDaddress(&pxdlock->pxd, xaddr); 3383 PXDlength(&pxdlock->pxd, freexlen); 3384 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3385 3386 /* reset map lock */ 3387 xadlock.flag = mlckFREEXADLIST; 3388 } 3389 3390 /* current entry is new last entry; */ 3391 nextindex = index + 1; 3392 3393 nfreed += freexlen; 3394 } 3395 /* 3396 * eof beyond the entry: 3397 * xad 3398 * -------=======---|---> 3399 * eof 3400 */ 3401 else { /* (xoff + xlen < teof) */ 3402 3403 nextindex = index + 1; 3404 } 3405 3406 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3407 if (!log) { /* COMMIT_WAMP */ 3408 xadlock.xdlist = &p->xad[nextindex]; 3409 xadlock.count = 3410 le16_to_cpu(p->header.nextindex) - 3411 nextindex; 3412 txFreeMap(ip, (struct maplock *) & xadlock, 3413 NULL, COMMIT_WMAP); 3414 } 3415 p->header.nextindex = cpu_to_le16(nextindex); 3416 } 3417 3418 XT_PUTPAGE(mp); 3419 3420 /* assert(freed == 0); */ 3421 goto getParent; 3422 } /* end scan of leaf page entries */ 3423 3424 freed = 1; 3425 3426 /* 3427 * leaf page become empty: free the page if type != PMAP 3428 */ 3429 if (log) { /* COMMIT_PWMAP */ 3430 /* txCommit() with tlckFREE: 3431 * free data extents covered by leaf [XTENTRYSTART:hwm); 3432 * invalidate leaf if COMMIT_PWMAP; 3433 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3434 */ 3435 tlck->type = tlckXTREE | tlckFREE; 3436 } else { /* COMMIT_WAMP */ 3437 3438 /* free data extents covered by leaf */ 3439 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3440 xadlock.count = 3441 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3442 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3443 } 3444 3445 if (p->header.flag & BT_ROOT) { 3446 p->header.flag &= ~BT_INTERNAL; 3447 p->header.flag |= BT_LEAF; 3448 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3449 3450 XT_PUTPAGE(mp); /* debug */ 3451 goto out; 3452 } else { 3453 if (log) { /* COMMIT_PWMAP */ 3454 /* page will be invalidated at tx completion 3455 */ 3456 XT_PUTPAGE(mp); 3457 } else { /* COMMIT_WMAP */ 3458 3459 if (mp->lid) 3460 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3461 3462 /* invalidate empty leaf page */ 3463 discard_metapage(mp); 3464 } 3465 } 3466 3467 /* 3468 * the leaf page become empty: delete the parent entry 3469 * for the leaf page if the parent page is to be kept 3470 * in the new sized file. 3471 */ 3472 3473 /* 3474 * go back up to the parent page 3475 */ 3476 getParent: 3477 /* pop/restore parent entry for the current child page */ 3478 if ((parent = BT_POP(&btstack)) == NULL) 3479 /* current page must have been root */ 3480 goto out; 3481 3482 /* get back the parent page */ 3483 bn = parent->bn; 3484 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3485 if (rc) 3486 return rc; 3487 3488 index = parent->index; 3489 3490 /* 3491 * child page was not empty: 3492 */ 3493 if (freed == 0) { 3494 /* has any entry deleted from parent ? */ 3495 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3496 /* (re)acquire tlock on the parent page */ 3497 if (log) { /* COMMIT_PWMAP */ 3498 /* txCommit() with tlckTRUNCATE: 3499 * free child extents covered by parent [); 3500 */ 3501 tlck = txLock(tid, ip, mp, tlckXTREE); 3502 xtlck = (struct xtlock *) & tlck->lock; 3503 if (!(tlck->type & tlckTRUNCATE)) { 3504 xtlck->hwm.offset = 3505 le16_to_cpu(p->header. 3506 nextindex) - 1; 3507 tlck->type = 3508 tlckXTREE | tlckTRUNCATE; 3509 } 3510 } else { /* COMMIT_WMAP */ 3511 3512 /* free child extents covered by parent */ 3513 xadlock.xdlist = &p->xad[index + 1]; 3514 xadlock.count = 3515 le16_to_cpu(p->header.nextindex) - 3516 index - 1; 3517 txFreeMap(ip, (struct maplock *) & xadlock, 3518 NULL, COMMIT_WMAP); 3519 } 3520 BT_MARK_DIRTY(mp, ip); 3521 3522 p->header.nextindex = cpu_to_le16(index + 1); 3523 } 3524 XT_PUTPAGE(mp); 3525 goto getParent; 3526 } 3527 3528 /* 3529 * child page was empty: 3530 */ 3531 nfreed += lengthXAD(&p->xad[index]); 3532 3533 /* 3534 * During working map update, child page's tlock must be handled 3535 * before parent's. This is because the parent's tlock will cause 3536 * the child's disk space to be marked available in the wmap, so 3537 * it's important that the child page be released by that time. 3538 * 3539 * ToDo: tlocks should be on doubly-linked list, so we can 3540 * quickly remove it and add it to the end. 3541 */ 3542 3543 /* 3544 * Move parent page's tlock to the end of the tid's tlock list 3545 */ 3546 if (log && mp->lid && (tblk->last != mp->lid) && 3547 lid_to_tlock(mp->lid)->tid) { 3548 lid_t lid = mp->lid; 3549 struct tlock *prev; 3550 3551 tlck = lid_to_tlock(lid); 3552 3553 if (tblk->next == lid) 3554 tblk->next = tlck->next; 3555 else { 3556 for (prev = lid_to_tlock(tblk->next); 3557 prev->next != lid; 3558 prev = lid_to_tlock(prev->next)) { 3559 assert(prev->next); 3560 } 3561 prev->next = tlck->next; 3562 } 3563 lid_to_tlock(tblk->last)->next = lid; 3564 tlck->next = 0; 3565 tblk->last = lid; 3566 } 3567 3568 /* 3569 * parent page become empty: free the page 3570 */ 3571 if (index == XTENTRYSTART) { 3572 if (log) { /* COMMIT_PWMAP */ 3573 /* txCommit() with tlckFREE: 3574 * free child extents covered by parent; 3575 * invalidate parent if COMMIT_PWMAP; 3576 */ 3577 tlck = txLock(tid, ip, mp, tlckXTREE); 3578 xtlck = (struct xtlock *) & tlck->lock; 3579 xtlck->hwm.offset = 3580 le16_to_cpu(p->header.nextindex) - 1; 3581 tlck->type = tlckXTREE | tlckFREE; 3582 } else { /* COMMIT_WMAP */ 3583 3584 /* free child extents covered by parent */ 3585 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3586 xadlock.count = 3587 le16_to_cpu(p->header.nextindex) - 3588 XTENTRYSTART; 3589 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3590 COMMIT_WMAP); 3591 } 3592 BT_MARK_DIRTY(mp, ip); 3593 3594 if (p->header.flag & BT_ROOT) { 3595 p->header.flag &= ~BT_INTERNAL; 3596 p->header.flag |= BT_LEAF; 3597 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3598 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3599 /* 3600 * Shrink root down to allow inline 3601 * EA (otherwise fsck complains) 3602 */ 3603 p->header.maxentry = 3604 cpu_to_le16(XTROOTINITSLOT); 3605 JFS_IP(ip)->mode2 |= INLINEEA; 3606 } 3607 3608 XT_PUTPAGE(mp); /* debug */ 3609 goto out; 3610 } else { 3611 if (log) { /* COMMIT_PWMAP */ 3612 /* page will be invalidated at tx completion 3613 */ 3614 XT_PUTPAGE(mp); 3615 } else { /* COMMIT_WMAP */ 3616 3617 if (mp->lid) 3618 lid_to_tlock(mp->lid)->flag |= 3619 tlckFREELOCK; 3620 3621 /* invalidate parent page */ 3622 discard_metapage(mp); 3623 } 3624 3625 /* parent has become empty and freed: 3626 * go back up to its parent page 3627 */ 3628 /* freed = 1; */ 3629 goto getParent; 3630 } 3631 } 3632 /* 3633 * parent page still has entries for front region; 3634 */ 3635 else { 3636 /* try truncate region covered by preceding entry 3637 * (process backward) 3638 */ 3639 index--; 3640 3641 /* go back down to the child page corresponding 3642 * to the entry 3643 */ 3644 goto getChild; 3645 } 3646 3647 /* 3648 * internal page: go down to child page of current entry 3649 */ 3650 getChild: 3651 /* save current parent entry for the child page */ 3652 if (BT_STACK_FULL(&btstack)) { 3653 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3654 XT_PUTPAGE(mp); 3655 return -EIO; 3656 } 3657 BT_PUSH(&btstack, bn, index); 3658 3659 /* get child page */ 3660 xad = &p->xad[index]; 3661 bn = addressXAD(xad); 3662 3663 /* 3664 * first access of each internal entry: 3665 */ 3666 /* release parent page */ 3667 XT_PUTPAGE(mp); 3668 3669 /* process the child page */ 3670 goto getPage; 3671 3672 out: 3673 /* 3674 * update file resource stat 3675 */ 3676 /* set size 3677 */ 3678 if (S_ISDIR(ip->i_mode) && !newsize) 3679 ip->i_size = 1; /* fsck hates zero-length directories */ 3680 else 3681 ip->i_size = newsize; 3682 3683 /* update quota allocation to reflect freed blocks */ 3684 dquot_free_block(ip, nfreed); 3685 3686 /* 3687 * free tlock of invalidated pages 3688 */ 3689 if (flag == COMMIT_WMAP) 3690 txFreelock(ip); 3691 3692 return newsize; 3693 } 3694 3695 3696 /* 3697 * xtTruncate_pmap() 3698 * 3699 * function: 3700 * Perform truncate to zero length for deleted file, leaving the 3701 * the xtree and working map untouched. This allows the file to 3702 * be accessed via open file handles, while the delete of the file 3703 * is committed to disk. 3704 * 3705 * parameter: 3706 * tid_t tid, 3707 * struct inode *ip, 3708 * s64 committed_size) 3709 * 3710 * return: new committed size 3711 * 3712 * note: 3713 * 3714 * To avoid deadlock by holding too many transaction locks, the 3715 * truncation may be broken up into multiple transactions. 3716 * The committed_size keeps track of part of the file has been 3717 * freed from the pmaps. 3718 */ 3719 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3720 { 3721 s64 bn; 3722 struct btstack btstack; 3723 int cmp; 3724 int index; 3725 int locked_leaves = 0; 3726 struct metapage *mp; 3727 xtpage_t *p; 3728 struct btframe *parent; 3729 int rc; 3730 struct tblock *tblk; 3731 struct tlock *tlck = NULL; 3732 xad_t *xad; 3733 int xlen; 3734 s64 xoff; 3735 struct xtlock *xtlck = NULL; 3736 3737 /* save object truncation type */ 3738 tblk = tid_to_tblock(tid); 3739 tblk->xflag |= COMMIT_PMAP; 3740 3741 /* clear stack */ 3742 BT_CLR(&btstack); 3743 3744 if (committed_size) { 3745 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3746 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 3747 if (rc) 3748 return rc; 3749 3750 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3751 3752 if (cmp != 0) { 3753 XT_PUTPAGE(mp); 3754 jfs_error(ip->i_sb, 3755 "xtTruncate_pmap: did not find extent"); 3756 return -EIO; 3757 } 3758 } else { 3759 /* 3760 * start with root 3761 * 3762 * root resides in the inode 3763 */ 3764 bn = 0; 3765 3766 /* 3767 * first access of each page: 3768 */ 3769 getPage: 3770 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3771 if (rc) 3772 return rc; 3773 3774 /* process entries backward from last index */ 3775 index = le16_to_cpu(p->header.nextindex) - 1; 3776 3777 if (p->header.flag & BT_INTERNAL) 3778 goto getChild; 3779 } 3780 3781 /* 3782 * leaf page 3783 */ 3784 3785 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3786 /* 3787 * We need to limit the size of the transaction 3788 * to avoid exhausting pagecache & tlocks 3789 */ 3790 xad = &p->xad[index]; 3791 xoff = offsetXAD(xad); 3792 xlen = lengthXAD(xad); 3793 XT_PUTPAGE(mp); 3794 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3795 } 3796 tlck = txLock(tid, ip, mp, tlckXTREE); 3797 tlck->type = tlckXTREE | tlckFREE; 3798 xtlck = (struct xtlock *) & tlck->lock; 3799 xtlck->hwm.offset = index; 3800 3801 3802 XT_PUTPAGE(mp); 3803 3804 /* 3805 * go back up to the parent page 3806 */ 3807 getParent: 3808 /* pop/restore parent entry for the current child page */ 3809 if ((parent = BT_POP(&btstack)) == NULL) 3810 /* current page must have been root */ 3811 goto out; 3812 3813 /* get back the parent page */ 3814 bn = parent->bn; 3815 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3816 if (rc) 3817 return rc; 3818 3819 index = parent->index; 3820 3821 /* 3822 * parent page become empty: free the page 3823 */ 3824 if (index == XTENTRYSTART) { 3825 /* txCommit() with tlckFREE: 3826 * free child extents covered by parent; 3827 * invalidate parent if COMMIT_PWMAP; 3828 */ 3829 tlck = txLock(tid, ip, mp, tlckXTREE); 3830 xtlck = (struct xtlock *) & tlck->lock; 3831 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3832 tlck->type = tlckXTREE | tlckFREE; 3833 3834 XT_PUTPAGE(mp); 3835 3836 if (p->header.flag & BT_ROOT) { 3837 3838 goto out; 3839 } else { 3840 goto getParent; 3841 } 3842 } 3843 /* 3844 * parent page still has entries for front region; 3845 */ 3846 else 3847 index--; 3848 /* 3849 * internal page: go down to child page of current entry 3850 */ 3851 getChild: 3852 /* save current parent entry for the child page */ 3853 if (BT_STACK_FULL(&btstack)) { 3854 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 3855 XT_PUTPAGE(mp); 3856 return -EIO; 3857 } 3858 BT_PUSH(&btstack, bn, index); 3859 3860 /* get child page */ 3861 xad = &p->xad[index]; 3862 bn = addressXAD(xad); 3863 3864 /* 3865 * first access of each internal entry: 3866 */ 3867 /* release parent page */ 3868 XT_PUTPAGE(mp); 3869 3870 /* process the child page */ 3871 goto getPage; 3872 3873 out: 3874 3875 return 0; 3876 } 3877 3878 #ifdef CONFIG_JFS_STATISTICS 3879 static int jfs_xtstat_proc_show(struct seq_file *m, void *v) 3880 { 3881 seq_printf(m, 3882 "JFS Xtree statistics\n" 3883 "====================\n" 3884 "searches = %d\n" 3885 "fast searches = %d\n" 3886 "splits = %d\n", 3887 xtStat.search, 3888 xtStat.fastSearch, 3889 xtStat.split); 3890 return 0; 3891 } 3892 3893 static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) 3894 { 3895 return single_open(file, jfs_xtstat_proc_show, NULL); 3896 } 3897 3898 const struct file_operations jfs_xtstat_proc_fops = { 3899 .owner = THIS_MODULE, 3900 .open = jfs_xtstat_proc_open, 3901 .read = seq_read, 3902 .llseek = seq_lseek, 3903 .release = single_release, 3904 }; 3905 #endif 3906