1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2005 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 /* 19 * jfs_xtree.c: extent allocation descriptor B+-tree manager 20 */ 21 22 #include <linux/fs.h> 23 #include <linux/module.h> 24 #include <linux/quotaops.h> 25 #include <linux/seq_file.h> 26 #include "jfs_incore.h" 27 #include "jfs_filsys.h" 28 #include "jfs_metapage.h" 29 #include "jfs_dmap.h" 30 #include "jfs_dinode.h" 31 #include "jfs_superblock.h" 32 #include "jfs_debug.h" 33 34 /* 35 * xtree local flag 36 */ 37 #define XT_INSERT 0x00000001 38 39 /* 40 * xtree key/entry comparison: extent offset 41 * 42 * return: 43 * -1: k < start of extent 44 * 0: start_of_extent <= k <= end_of_extent 45 * 1: k > end_of_extent 46 */ 47 #define XT_CMP(CMP, K, X, OFFSET64)\ 48 {\ 49 OFFSET64 = offsetXAD(X);\ 50 (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ 51 ((K) < OFFSET64) ? -1 : 0;\ 52 } 53 54 /* write a xad entry */ 55 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ 56 {\ 57 (XAD)->flag = (FLAG);\ 58 XADoffset((XAD), (OFF));\ 59 XADlength((XAD), (LEN));\ 60 XADaddress((XAD), (ADDR));\ 61 } 62 63 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) 64 65 /* get page buffer for specified block address */ 66 /* ToDo: Replace this ugly macro with a function */ 67 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 68 {\ 69 BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ 70 if (!(RC))\ 71 {\ 72 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ 73 (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ 74 (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ 75 {\ 76 jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ 77 BT_PUTPAGE(MP);\ 78 MP = NULL;\ 79 RC = -EIO;\ 80 }\ 81 }\ 82 } 83 84 /* for consistency */ 85 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) 86 87 #define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 88 BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) 89 /* xtree entry parameter descriptor */ 90 struct xtsplit { 91 struct metapage *mp; 92 s16 index; 93 u8 flag; 94 s64 off; 95 s64 addr; 96 int len; 97 struct pxdlist *pxdlist; 98 }; 99 100 101 /* 102 * statistics 103 */ 104 #ifdef CONFIG_JFS_STATISTICS 105 static struct { 106 uint search; 107 uint fastSearch; 108 uint split; 109 } xtStat; 110 #endif 111 112 113 /* 114 * forward references 115 */ 116 static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp, 117 struct btstack * btstack, int flag); 118 119 static int xtSplitUp(tid_t tid, 120 struct inode *ip, 121 struct xtsplit * split, struct btstack * btstack); 122 123 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split, 124 struct metapage ** rmpp, s64 * rbnp); 125 126 static int xtSplitRoot(tid_t tid, struct inode *ip, 127 struct xtsplit * split, struct metapage ** rmpp); 128 129 #ifdef _STILL_TO_PORT 130 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 131 xtpage_t * fp, struct btstack * btstack); 132 133 static int xtSearchNode(struct inode *ip, 134 xad_t * xad, 135 int *cmpp, struct btstack * btstack, int flag); 136 137 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); 138 #endif /* _STILL_TO_PORT */ 139 140 /* 141 * xtLookup() 142 * 143 * function: map a single page into a physical extent; 144 */ 145 int xtLookup(struct inode *ip, s64 lstart, 146 s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check) 147 { 148 int rc = 0; 149 struct btstack btstack; 150 int cmp; 151 s64 bn; 152 struct metapage *mp; 153 xtpage_t *p; 154 int index; 155 xad_t *xad; 156 s64 next, size, xoff, xend; 157 int xlen; 158 s64 xaddr; 159 160 *paddr = 0; 161 *plen = llen; 162 163 if (!no_check) { 164 /* is lookup offset beyond eof ? */ 165 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 166 JFS_SBI(ip->i_sb)->l2bsize; 167 if (lstart >= size) 168 return 0; 169 } 170 171 /* 172 * search for the xad entry covering the logical extent 173 */ 174 //search: 175 if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) { 176 jfs_err("xtLookup: xtSearch returned %d", rc); 177 return rc; 178 } 179 180 /* 181 * compute the physical extent covering logical extent 182 * 183 * N.B. search may have failed (e.g., hole in sparse file), 184 * and returned the index of the next entry. 185 */ 186 /* retrieve search result */ 187 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 188 189 /* is xad found covering start of logical extent ? 190 * lstart is a page start address, 191 * i.e., lstart cannot start in a hole; 192 */ 193 if (cmp) { 194 if (next) 195 *plen = min(next - lstart, llen); 196 goto out; 197 } 198 199 /* 200 * lxd covered by xad 201 */ 202 xad = &p->xad[index]; 203 xoff = offsetXAD(xad); 204 xlen = lengthXAD(xad); 205 xend = xoff + xlen; 206 xaddr = addressXAD(xad); 207 208 /* initialize new pxd */ 209 *pflag = xad->flag; 210 *paddr = xaddr + (lstart - xoff); 211 /* a page must be fully covered by an xad */ 212 *plen = min(xend - lstart, llen); 213 214 out: 215 XT_PUTPAGE(mp); 216 217 return rc; 218 } 219 220 /* 221 * xtSearch() 222 * 223 * function: search for the xad entry covering specified offset. 224 * 225 * parameters: 226 * ip - file object; 227 * xoff - extent offset; 228 * nextp - address of next extent (if any) for search miss 229 * cmpp - comparison result: 230 * btstack - traverse stack; 231 * flag - search process flag (XT_INSERT); 232 * 233 * returns: 234 * btstack contains (bn, index) of search path traversed to the entry. 235 * *cmpp is set to result of comparison with the entry returned. 236 * the page containing the entry is pinned at exit. 237 */ 238 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, 239 int *cmpp, struct btstack * btstack, int flag) 240 { 241 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 242 int rc = 0; 243 int cmp = 1; /* init for empty page */ 244 s64 bn; /* block number */ 245 struct metapage *mp; /* page buffer */ 246 xtpage_t *p; /* page */ 247 xad_t *xad; 248 int base, index, lim, btindex; 249 struct btframe *btsp; 250 int nsplit = 0; /* number of pages to split */ 251 s64 t64; 252 s64 next = 0; 253 254 INCREMENT(xtStat.search); 255 256 BT_CLR(btstack); 257 258 btstack->nsplit = 0; 259 260 /* 261 * search down tree from root: 262 * 263 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 264 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 265 * 266 * if entry with search key K is not found 267 * internal page search find the entry with largest key Ki 268 * less than K which point to the child page to search; 269 * leaf page search find the entry with smallest key Kj 270 * greater than K so that the returned index is the position of 271 * the entry to be shifted right for insertion of new entry. 272 * for empty tree, search key is greater than any key of the tree. 273 * 274 * by convention, root bn = 0. 275 */ 276 for (bn = 0;;) { 277 /* get/pin the page to search */ 278 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 279 if (rc) 280 return rc; 281 282 /* try sequential access heuristics with the previous 283 * access entry in target leaf page: 284 * once search narrowed down into the target leaf, 285 * key must either match an entry in the leaf or 286 * key entry does not exist in the tree; 287 */ 288 //fastSearch: 289 if ((jfs_ip->btorder & BT_SEQUENTIAL) && 290 (p->header.flag & BT_LEAF) && 291 (index = jfs_ip->btindex) < 292 le16_to_cpu(p->header.nextindex)) { 293 xad = &p->xad[index]; 294 t64 = offsetXAD(xad); 295 if (xoff < t64 + lengthXAD(xad)) { 296 if (xoff >= t64) { 297 *cmpp = 0; 298 goto out; 299 } 300 301 /* stop sequential access heuristics */ 302 goto binarySearch; 303 } else { /* (t64 + lengthXAD(xad)) <= xoff */ 304 305 /* try next sequential entry */ 306 index++; 307 if (index < 308 le16_to_cpu(p->header.nextindex)) { 309 xad++; 310 t64 = offsetXAD(xad); 311 if (xoff < t64 + lengthXAD(xad)) { 312 if (xoff >= t64) { 313 *cmpp = 0; 314 goto out; 315 } 316 317 /* miss: key falls between 318 * previous and this entry 319 */ 320 *cmpp = 1; 321 next = t64; 322 goto out; 323 } 324 325 /* (xoff >= t64 + lengthXAD(xad)); 326 * matching entry may be further out: 327 * stop heuristic search 328 */ 329 /* stop sequential access heuristics */ 330 goto binarySearch; 331 } 332 333 /* (index == p->header.nextindex); 334 * miss: key entry does not exist in 335 * the target leaf/tree 336 */ 337 *cmpp = 1; 338 goto out; 339 } 340 341 /* 342 * if hit, return index of the entry found, and 343 * if miss, where new entry with search key is 344 * to be inserted; 345 */ 346 out: 347 /* compute number of pages to split */ 348 if (flag & XT_INSERT) { 349 if (p->header.nextindex == /* little-endian */ 350 p->header.maxentry) 351 nsplit++; 352 else 353 nsplit = 0; 354 btstack->nsplit = nsplit; 355 } 356 357 /* save search result */ 358 btsp = btstack->top; 359 btsp->bn = bn; 360 btsp->index = index; 361 btsp->mp = mp; 362 363 /* update sequential access heuristics */ 364 jfs_ip->btindex = index; 365 366 if (nextp) 367 *nextp = next; 368 369 INCREMENT(xtStat.fastSearch); 370 return 0; 371 } 372 373 /* well, ... full search now */ 374 binarySearch: 375 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 376 377 /* 378 * binary search with search key K on the current page 379 */ 380 for (base = XTENTRYSTART; lim; lim >>= 1) { 381 index = base + (lim >> 1); 382 383 XT_CMP(cmp, xoff, &p->xad[index], t64); 384 if (cmp == 0) { 385 /* 386 * search hit 387 */ 388 /* search hit - leaf page: 389 * return the entry found 390 */ 391 if (p->header.flag & BT_LEAF) { 392 *cmpp = cmp; 393 394 /* compute number of pages to split */ 395 if (flag & XT_INSERT) { 396 if (p->header.nextindex == 397 p->header.maxentry) 398 nsplit++; 399 else 400 nsplit = 0; 401 btstack->nsplit = nsplit; 402 } 403 404 /* save search result */ 405 btsp = btstack->top; 406 btsp->bn = bn; 407 btsp->index = index; 408 btsp->mp = mp; 409 410 /* init sequential access heuristics */ 411 btindex = jfs_ip->btindex; 412 if (index == btindex || 413 index == btindex + 1) 414 jfs_ip->btorder = BT_SEQUENTIAL; 415 else 416 jfs_ip->btorder = BT_RANDOM; 417 jfs_ip->btindex = index; 418 419 return 0; 420 } 421 /* search hit - internal page: 422 * descend/search its child page 423 */ 424 if (index < le16_to_cpu(p->header.nextindex)-1) 425 next = offsetXAD(&p->xad[index + 1]); 426 goto next; 427 } 428 429 if (cmp > 0) { 430 base = index + 1; 431 --lim; 432 } 433 } 434 435 /* 436 * search miss 437 * 438 * base is the smallest index with key (Kj) greater than 439 * search key (K) and may be zero or maxentry index. 440 */ 441 if (base < le16_to_cpu(p->header.nextindex)) 442 next = offsetXAD(&p->xad[base]); 443 /* 444 * search miss - leaf page: 445 * 446 * return location of entry (base) where new entry with 447 * search key K is to be inserted. 448 */ 449 if (p->header.flag & BT_LEAF) { 450 *cmpp = cmp; 451 452 /* compute number of pages to split */ 453 if (flag & XT_INSERT) { 454 if (p->header.nextindex == 455 p->header.maxentry) 456 nsplit++; 457 else 458 nsplit = 0; 459 btstack->nsplit = nsplit; 460 } 461 462 /* save search result */ 463 btsp = btstack->top; 464 btsp->bn = bn; 465 btsp->index = base; 466 btsp->mp = mp; 467 468 /* init sequential access heuristics */ 469 btindex = jfs_ip->btindex; 470 if (base == btindex || base == btindex + 1) 471 jfs_ip->btorder = BT_SEQUENTIAL; 472 else 473 jfs_ip->btorder = BT_RANDOM; 474 jfs_ip->btindex = base; 475 476 if (nextp) 477 *nextp = next; 478 479 return 0; 480 } 481 482 /* 483 * search miss - non-leaf page: 484 * 485 * if base is non-zero, decrement base by one to get the parent 486 * entry of the child page to search. 487 */ 488 index = base ? base - 1 : base; 489 490 /* 491 * go down to child page 492 */ 493 next: 494 /* update number of pages to split */ 495 if (p->header.nextindex == p->header.maxentry) 496 nsplit++; 497 else 498 nsplit = 0; 499 500 /* push (bn, index) of the parent page/entry */ 501 if (BT_STACK_FULL(btstack)) { 502 jfs_error(ip->i_sb, "stack overrun in xtSearch!"); 503 XT_PUTPAGE(mp); 504 return -EIO; 505 } 506 BT_PUSH(btstack, bn, index); 507 508 /* get the child page block number */ 509 bn = addressXAD(&p->xad[index]); 510 511 /* unpin the parent page */ 512 XT_PUTPAGE(mp); 513 } 514 } 515 516 /* 517 * xtInsert() 518 * 519 * function: 520 * 521 * parameter: 522 * tid - transaction id; 523 * ip - file object; 524 * xflag - extent flag (XAD_NOTRECORDED): 525 * xoff - extent offset; 526 * xlen - extent length; 527 * xaddrp - extent address pointer (in/out): 528 * if (*xaddrp) 529 * caller allocated data extent at *xaddrp; 530 * else 531 * allocate data extent and return its xaddr; 532 * flag - 533 * 534 * return: 535 */ 536 int xtInsert(tid_t tid, /* transaction id */ 537 struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp, 538 int flag) 539 { 540 int rc = 0; 541 s64 xaddr, hint; 542 struct metapage *mp; /* meta-page buffer */ 543 xtpage_t *p; /* base B+-tree index page */ 544 s64 bn; 545 int index, nextindex; 546 struct btstack btstack; /* traverse stack */ 547 struct xtsplit split; /* split information */ 548 xad_t *xad; 549 int cmp; 550 s64 next; 551 struct tlock *tlck; 552 struct xtlock *xtlck; 553 554 jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 555 556 /* 557 * search for the entry location at which to insert: 558 * 559 * xtFastSearch() and xtSearch() both returns (leaf page 560 * pinned, index at which to insert). 561 * n.b. xtSearch() may return index of maxentry of 562 * the full page. 563 */ 564 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 565 return rc; 566 567 /* retrieve search result */ 568 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 569 570 /* This test must follow XT_GETSEARCH since mp must be valid if 571 * we branch to out: */ 572 if ((cmp == 0) || (next && (xlen > next - xoff))) { 573 rc = -EEXIST; 574 goto out; 575 } 576 577 /* 578 * allocate data extent requested 579 * 580 * allocation hint: last xad 581 */ 582 if ((xaddr = *xaddrp) == 0) { 583 if (index > XTENTRYSTART) { 584 xad = &p->xad[index - 1]; 585 hint = addressXAD(xad) + lengthXAD(xad) - 1; 586 } else 587 hint = 0; 588 if ((rc = vfs_dq_alloc_block(ip, xlen))) 589 goto out; 590 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { 591 vfs_dq_free_block(ip, xlen); 592 goto out; 593 } 594 } 595 596 /* 597 * insert entry for new extent 598 */ 599 xflag |= XAD_NEW; 600 601 /* 602 * if the leaf page is full, split the page and 603 * propagate up the router entry for the new page from split 604 * 605 * The xtSplitUp() will insert the entry and unpin the leaf page. 606 */ 607 nextindex = le16_to_cpu(p->header.nextindex); 608 if (nextindex == le16_to_cpu(p->header.maxentry)) { 609 split.mp = mp; 610 split.index = index; 611 split.flag = xflag; 612 split.off = xoff; 613 split.len = xlen; 614 split.addr = xaddr; 615 split.pxdlist = NULL; 616 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 617 /* undo data extent allocation */ 618 if (*xaddrp == 0) { 619 dbFree(ip, xaddr, (s64) xlen); 620 vfs_dq_free_block(ip, xlen); 621 } 622 return rc; 623 } 624 625 *xaddrp = xaddr; 626 return 0; 627 } 628 629 /* 630 * insert the new entry into the leaf page 631 */ 632 /* 633 * acquire a transaction lock on the leaf page; 634 * 635 * action: xad insertion/extension; 636 */ 637 BT_MARK_DIRTY(mp, ip); 638 639 /* if insert into middle, shift right remaining entries. */ 640 if (index < nextindex) 641 memmove(&p->xad[index + 1], &p->xad[index], 642 (nextindex - index) * sizeof(xad_t)); 643 644 /* insert the new entry: mark the entry NEW */ 645 xad = &p->xad[index]; 646 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 647 648 /* advance next available entry index */ 649 le16_add_cpu(&p->header.nextindex, 1); 650 651 /* Don't log it if there are no links to the file */ 652 if (!test_cflag(COMMIT_Nolink, ip)) { 653 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 654 xtlck = (struct xtlock *) & tlck->lock; 655 xtlck->lwm.offset = 656 (xtlck->lwm.offset) ? min(index, 657 (int)xtlck->lwm.offset) : index; 658 xtlck->lwm.length = 659 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 660 } 661 662 *xaddrp = xaddr; 663 664 out: 665 /* unpin the leaf page */ 666 XT_PUTPAGE(mp); 667 668 return rc; 669 } 670 671 672 /* 673 * xtSplitUp() 674 * 675 * function: 676 * split full pages as propagating insertion up the tree 677 * 678 * parameter: 679 * tid - transaction id; 680 * ip - file object; 681 * split - entry parameter descriptor; 682 * btstack - traverse stack from xtSearch() 683 * 684 * return: 685 */ 686 static int 687 xtSplitUp(tid_t tid, 688 struct inode *ip, struct xtsplit * split, struct btstack * btstack) 689 { 690 int rc = 0; 691 struct metapage *smp; 692 xtpage_t *sp; /* split page */ 693 struct metapage *rmp; 694 s64 rbn; /* new right page block number */ 695 struct metapage *rcmp; 696 xtpage_t *rcp; /* right child page */ 697 s64 rcbn; /* right child page block number */ 698 int skip; /* index of entry of insertion */ 699 int nextindex; /* next available entry index of p */ 700 struct btframe *parent; /* parent page entry on traverse stack */ 701 xad_t *xad; 702 s64 xaddr; 703 int xlen; 704 int nsplit; /* number of pages split */ 705 struct pxdlist pxdlist; 706 pxd_t *pxd; 707 struct tlock *tlck; 708 struct xtlock *xtlck; 709 710 smp = split->mp; 711 sp = XT_PAGE(ip, smp); 712 713 /* is inode xtree root extension/inline EA area free ? */ 714 if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) && 715 (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) && 716 (JFS_IP(ip)->mode2 & INLINEEA)) { 717 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT); 718 JFS_IP(ip)->mode2 &= ~INLINEEA; 719 720 BT_MARK_DIRTY(smp, ip); 721 /* 722 * acquire a transaction lock on the leaf page; 723 * 724 * action: xad insertion/extension; 725 */ 726 727 /* if insert into middle, shift right remaining entries. */ 728 skip = split->index; 729 nextindex = le16_to_cpu(sp->header.nextindex); 730 if (skip < nextindex) 731 memmove(&sp->xad[skip + 1], &sp->xad[skip], 732 (nextindex - skip) * sizeof(xad_t)); 733 734 /* insert the new entry: mark the entry NEW */ 735 xad = &sp->xad[skip]; 736 XT_PUTENTRY(xad, split->flag, split->off, split->len, 737 split->addr); 738 739 /* advance next available entry index */ 740 le16_add_cpu(&sp->header.nextindex, 1); 741 742 /* Don't log it if there are no links to the file */ 743 if (!test_cflag(COMMIT_Nolink, ip)) { 744 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 745 xtlck = (struct xtlock *) & tlck->lock; 746 xtlck->lwm.offset = (xtlck->lwm.offset) ? 747 min(skip, (int)xtlck->lwm.offset) : skip; 748 xtlck->lwm.length = 749 le16_to_cpu(sp->header.nextindex) - 750 xtlck->lwm.offset; 751 } 752 753 return 0; 754 } 755 756 /* 757 * allocate new index blocks to cover index page split(s) 758 * 759 * allocation hint: ? 760 */ 761 if (split->pxdlist == NULL) { 762 nsplit = btstack->nsplit; 763 split->pxdlist = &pxdlist; 764 pxdlist.maxnpxd = pxdlist.npxd = 0; 765 pxd = &pxdlist.pxd[0]; 766 xlen = JFS_SBI(ip->i_sb)->nbperpage; 767 for (; nsplit > 0; nsplit--, pxd++) { 768 if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr)) 769 == 0) { 770 PXDaddress(pxd, xaddr); 771 PXDlength(pxd, xlen); 772 773 pxdlist.maxnpxd++; 774 775 continue; 776 } 777 778 /* undo allocation */ 779 780 XT_PUTPAGE(smp); 781 return rc; 782 } 783 } 784 785 /* 786 * Split leaf page <sp> into <sp> and a new right page <rp>. 787 * 788 * The split routines insert the new entry into the leaf page, 789 * and acquire txLock as appropriate. 790 * return <rp> pinned and its block number <rpbn>. 791 */ 792 rc = (sp->header.flag & BT_ROOT) ? 793 xtSplitRoot(tid, ip, split, &rmp) : 794 xtSplitPage(tid, ip, split, &rmp, &rbn); 795 796 XT_PUTPAGE(smp); 797 798 if (rc) 799 return -EIO; 800 /* 801 * propagate up the router entry for the leaf page just split 802 * 803 * insert a router entry for the new page into the parent page, 804 * propagate the insert/split up the tree by walking back the stack 805 * of (bn of parent page, index of child page entry in parent page) 806 * that were traversed during the search for the page that split. 807 * 808 * the propagation of insert/split up the tree stops if the root 809 * splits or the page inserted into doesn't have to split to hold 810 * the new entry. 811 * 812 * the parent entry for the split page remains the same, and 813 * a new entry is inserted at its right with the first key and 814 * block number of the new right page. 815 * 816 * There are a maximum of 3 pages pinned at any time: 817 * right child, left parent and right parent (when the parent splits) 818 * to keep the child page pinned while working on the parent. 819 * make sure that all pins are released at exit. 820 */ 821 while ((parent = BT_POP(btstack)) != NULL) { 822 /* parent page specified by stack frame <parent> */ 823 824 /* keep current child pages <rcp> pinned */ 825 rcmp = rmp; 826 rcbn = rbn; 827 rcp = XT_PAGE(ip, rcmp); 828 829 /* 830 * insert router entry in parent for new right child page <rp> 831 */ 832 /* get/pin the parent page <sp> */ 833 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 834 if (rc) { 835 XT_PUTPAGE(rcmp); 836 return rc; 837 } 838 839 /* 840 * The new key entry goes ONE AFTER the index of parent entry, 841 * because the split was to the right. 842 */ 843 skip = parent->index + 1; 844 845 /* 846 * split or shift right remaining entries of the parent page 847 */ 848 nextindex = le16_to_cpu(sp->header.nextindex); 849 /* 850 * parent page is full - split the parent page 851 */ 852 if (nextindex == le16_to_cpu(sp->header.maxentry)) { 853 /* init for parent page split */ 854 split->mp = smp; 855 split->index = skip; /* index at insert */ 856 split->flag = XAD_NEW; 857 split->off = offsetXAD(&rcp->xad[XTENTRYSTART]); 858 split->len = JFS_SBI(ip->i_sb)->nbperpage; 859 split->addr = rcbn; 860 861 /* unpin previous right child page */ 862 XT_PUTPAGE(rcmp); 863 864 /* The split routines insert the new entry, 865 * and acquire txLock as appropriate. 866 * return <rp> pinned and its block number <rpbn>. 867 */ 868 rc = (sp->header.flag & BT_ROOT) ? 869 xtSplitRoot(tid, ip, split, &rmp) : 870 xtSplitPage(tid, ip, split, &rmp, &rbn); 871 if (rc) { 872 XT_PUTPAGE(smp); 873 return rc; 874 } 875 876 XT_PUTPAGE(smp); 877 /* keep new child page <rp> pinned */ 878 } 879 /* 880 * parent page is not full - insert in parent page 881 */ 882 else { 883 /* 884 * insert router entry in parent for the right child 885 * page from the first entry of the right child page: 886 */ 887 /* 888 * acquire a transaction lock on the parent page; 889 * 890 * action: router xad insertion; 891 */ 892 BT_MARK_DIRTY(smp, ip); 893 894 /* 895 * if insert into middle, shift right remaining entries 896 */ 897 if (skip < nextindex) 898 memmove(&sp->xad[skip + 1], &sp->xad[skip], 899 (nextindex - 900 skip) << L2XTSLOTSIZE); 901 902 /* insert the router entry */ 903 xad = &sp->xad[skip]; 904 XT_PUTENTRY(xad, XAD_NEW, 905 offsetXAD(&rcp->xad[XTENTRYSTART]), 906 JFS_SBI(ip->i_sb)->nbperpage, rcbn); 907 908 /* advance next available entry index. */ 909 le16_add_cpu(&sp->header.nextindex, 1); 910 911 /* Don't log it if there are no links to the file */ 912 if (!test_cflag(COMMIT_Nolink, ip)) { 913 tlck = txLock(tid, ip, smp, 914 tlckXTREE | tlckGROW); 915 xtlck = (struct xtlock *) & tlck->lock; 916 xtlck->lwm.offset = (xtlck->lwm.offset) ? 917 min(skip, (int)xtlck->lwm.offset) : skip; 918 xtlck->lwm.length = 919 le16_to_cpu(sp->header.nextindex) - 920 xtlck->lwm.offset; 921 } 922 923 /* unpin parent page */ 924 XT_PUTPAGE(smp); 925 926 /* exit propagate up */ 927 break; 928 } 929 } 930 931 /* unpin current right page */ 932 XT_PUTPAGE(rmp); 933 934 return 0; 935 } 936 937 938 /* 939 * xtSplitPage() 940 * 941 * function: 942 * split a full non-root page into 943 * original/split/left page and new right page 944 * i.e., the original/split page remains as left page. 945 * 946 * parameter: 947 * int tid, 948 * struct inode *ip, 949 * struct xtsplit *split, 950 * struct metapage **rmpp, 951 * u64 *rbnp, 952 * 953 * return: 954 * Pointer to page in which to insert or NULL on error. 955 */ 956 static int 957 xtSplitPage(tid_t tid, struct inode *ip, 958 struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp) 959 { 960 int rc = 0; 961 struct metapage *smp; 962 xtpage_t *sp; 963 struct metapage *rmp; 964 xtpage_t *rp; /* new right page allocated */ 965 s64 rbn; /* new right page block number */ 966 struct metapage *mp; 967 xtpage_t *p; 968 s64 nextbn; 969 int skip, maxentry, middle, righthalf, n; 970 xad_t *xad; 971 struct pxdlist *pxdlist; 972 pxd_t *pxd; 973 struct tlock *tlck; 974 struct xtlock *sxtlck = NULL, *rxtlck = NULL; 975 int quota_allocation = 0; 976 977 smp = split->mp; 978 sp = XT_PAGE(ip, smp); 979 980 INCREMENT(xtStat.split); 981 982 pxdlist = split->pxdlist; 983 pxd = &pxdlist->pxd[pxdlist->npxd]; 984 pxdlist->npxd++; 985 rbn = addressPXD(pxd); 986 987 /* Allocate blocks to quota. */ 988 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 989 rc = -EDQUOT; 990 goto clean_up; 991 } 992 993 quota_allocation += lengthPXD(pxd); 994 995 /* 996 * allocate the new right page for the split 997 */ 998 rmp = get_metapage(ip, rbn, PSIZE, 1); 999 if (rmp == NULL) { 1000 rc = -EIO; 1001 goto clean_up; 1002 } 1003 1004 jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1005 1006 BT_MARK_DIRTY(rmp, ip); 1007 /* 1008 * action: new page; 1009 */ 1010 1011 rp = (xtpage_t *) rmp->data; 1012 rp->header.self = *pxd; 1013 rp->header.flag = sp->header.flag & BT_TYPE; 1014 rp->header.maxentry = sp->header.maxentry; /* little-endian */ 1015 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1016 1017 BT_MARK_DIRTY(smp, ip); 1018 /* Don't log it if there are no links to the file */ 1019 if (!test_cflag(COMMIT_Nolink, ip)) { 1020 /* 1021 * acquire a transaction lock on the new right page; 1022 */ 1023 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1024 rxtlck = (struct xtlock *) & tlck->lock; 1025 rxtlck->lwm.offset = XTENTRYSTART; 1026 /* 1027 * acquire a transaction lock on the split page 1028 */ 1029 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW); 1030 sxtlck = (struct xtlock *) & tlck->lock; 1031 } 1032 1033 /* 1034 * initialize/update sibling pointers of <sp> and <rp> 1035 */ 1036 nextbn = le64_to_cpu(sp->header.next); 1037 rp->header.next = cpu_to_le64(nextbn); 1038 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1039 sp->header.next = cpu_to_le64(rbn); 1040 1041 skip = split->index; 1042 1043 /* 1044 * sequential append at tail (after last entry of last page) 1045 * 1046 * if splitting the last page on a level because of appending 1047 * a entry to it (skip is maxentry), it's likely that the access is 1048 * sequential. adding an empty page on the side of the level is less 1049 * work and can push the fill factor much higher than normal. 1050 * if we're wrong it's no big deal - we will do the split the right 1051 * way next time. 1052 * (it may look like it's equally easy to do a similar hack for 1053 * reverse sorted data, that is, split the tree left, but it's not. 1054 * Be my guest.) 1055 */ 1056 if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) { 1057 /* 1058 * acquire a transaction lock on the new/right page; 1059 * 1060 * action: xad insertion; 1061 */ 1062 /* insert entry at the first entry of the new right page */ 1063 xad = &rp->xad[XTENTRYSTART]; 1064 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1065 split->addr); 1066 1067 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1068 1069 if (!test_cflag(COMMIT_Nolink, ip)) { 1070 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1071 rxtlck->lwm.length = 1; 1072 } 1073 1074 *rmpp = rmp; 1075 *rbnp = rbn; 1076 1077 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1078 return 0; 1079 } 1080 1081 /* 1082 * non-sequential insert (at possibly middle page) 1083 */ 1084 1085 /* 1086 * update previous pointer of old next/right page of <sp> 1087 */ 1088 if (nextbn != 0) { 1089 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1090 if (rc) { 1091 XT_PUTPAGE(rmp); 1092 goto clean_up; 1093 } 1094 1095 BT_MARK_DIRTY(mp, ip); 1096 /* 1097 * acquire a transaction lock on the next page; 1098 * 1099 * action:sibling pointer update; 1100 */ 1101 if (!test_cflag(COMMIT_Nolink, ip)) 1102 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 1103 1104 p->header.prev = cpu_to_le64(rbn); 1105 1106 /* sibling page may have been updated previously, or 1107 * it may be updated later; 1108 */ 1109 1110 XT_PUTPAGE(mp); 1111 } 1112 1113 /* 1114 * split the data between the split and new/right pages 1115 */ 1116 maxentry = le16_to_cpu(sp->header.maxentry); 1117 middle = maxentry >> 1; 1118 righthalf = maxentry - middle; 1119 1120 /* 1121 * skip index in old split/left page - insert into left page: 1122 */ 1123 if (skip <= middle) { 1124 /* move right half of split page to the new right page */ 1125 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1126 righthalf << L2XTSLOTSIZE); 1127 1128 /* shift right tail of left half to make room for new entry */ 1129 if (skip < middle) 1130 memmove(&sp->xad[skip + 1], &sp->xad[skip], 1131 (middle - skip) << L2XTSLOTSIZE); 1132 1133 /* insert new entry */ 1134 xad = &sp->xad[skip]; 1135 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1136 split->addr); 1137 1138 /* update page header */ 1139 sp->header.nextindex = cpu_to_le16(middle + 1); 1140 if (!test_cflag(COMMIT_Nolink, ip)) { 1141 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1142 min(skip, (int)sxtlck->lwm.offset) : skip; 1143 } 1144 1145 rp->header.nextindex = 1146 cpu_to_le16(XTENTRYSTART + righthalf); 1147 } 1148 /* 1149 * skip index in new right page - insert into right page: 1150 */ 1151 else { 1152 /* move left head of right half to right page */ 1153 n = skip - middle; 1154 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle], 1155 n << L2XTSLOTSIZE); 1156 1157 /* insert new entry */ 1158 n += XTENTRYSTART; 1159 xad = &rp->xad[n]; 1160 XT_PUTENTRY(xad, split->flag, split->off, split->len, 1161 split->addr); 1162 1163 /* move right tail of right half to right page */ 1164 if (skip < maxentry) 1165 memmove(&rp->xad[n + 1], &sp->xad[skip], 1166 (maxentry - skip) << L2XTSLOTSIZE); 1167 1168 /* update page header */ 1169 sp->header.nextindex = cpu_to_le16(middle); 1170 if (!test_cflag(COMMIT_Nolink, ip)) { 1171 sxtlck->lwm.offset = (sxtlck->lwm.offset) ? 1172 min(middle, (int)sxtlck->lwm.offset) : middle; 1173 } 1174 1175 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1176 righthalf + 1); 1177 } 1178 1179 if (!test_cflag(COMMIT_Nolink, ip)) { 1180 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) - 1181 sxtlck->lwm.offset; 1182 1183 /* rxtlck->lwm.offset = XTENTRYSTART; */ 1184 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1185 XTENTRYSTART; 1186 } 1187 1188 *rmpp = rmp; 1189 *rbnp = rbn; 1190 1191 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp); 1192 return rc; 1193 1194 clean_up: 1195 1196 /* Rollback quota allocation. */ 1197 if (quota_allocation) 1198 vfs_dq_free_block(ip, quota_allocation); 1199 1200 return (rc); 1201 } 1202 1203 1204 /* 1205 * xtSplitRoot() 1206 * 1207 * function: 1208 * split the full root page into original/root/split page and new 1209 * right page 1210 * i.e., root remains fixed in tree anchor (inode) and the root is 1211 * copied to a single new right child page since root page << 1212 * non-root page, and the split root page contains a single entry 1213 * for the new right child page. 1214 * 1215 * parameter: 1216 * int tid, 1217 * struct inode *ip, 1218 * struct xtsplit *split, 1219 * struct metapage **rmpp) 1220 * 1221 * return: 1222 * Pointer to page in which to insert or NULL on error. 1223 */ 1224 static int 1225 xtSplitRoot(tid_t tid, 1226 struct inode *ip, struct xtsplit * split, struct metapage ** rmpp) 1227 { 1228 xtpage_t *sp; 1229 struct metapage *rmp; 1230 xtpage_t *rp; 1231 s64 rbn; 1232 int skip, nextindex; 1233 xad_t *xad; 1234 pxd_t *pxd; 1235 struct pxdlist *pxdlist; 1236 struct tlock *tlck; 1237 struct xtlock *xtlck; 1238 1239 sp = &JFS_IP(ip)->i_xtroot; 1240 1241 INCREMENT(xtStat.split); 1242 1243 /* 1244 * allocate a single (right) child page 1245 */ 1246 pxdlist = split->pxdlist; 1247 pxd = &pxdlist->pxd[pxdlist->npxd]; 1248 pxdlist->npxd++; 1249 rbn = addressPXD(pxd); 1250 rmp = get_metapage(ip, rbn, PSIZE, 1); 1251 if (rmp == NULL) 1252 return -EIO; 1253 1254 /* Allocate blocks to quota. */ 1255 if (vfs_dq_alloc_block(ip, lengthPXD(pxd))) { 1256 release_metapage(rmp); 1257 return -EDQUOT; 1258 } 1259 1260 jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); 1261 1262 /* 1263 * acquire a transaction lock on the new right page; 1264 * 1265 * action: new page; 1266 */ 1267 BT_MARK_DIRTY(rmp, ip); 1268 1269 rp = (xtpage_t *) rmp->data; 1270 rp->header.flag = 1271 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1272 rp->header.self = *pxd; 1273 rp->header.nextindex = cpu_to_le16(XTENTRYSTART); 1274 rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE); 1275 1276 /* initialize sibling pointers */ 1277 rp->header.next = 0; 1278 rp->header.prev = 0; 1279 1280 /* 1281 * copy the in-line root page into new right page extent 1282 */ 1283 nextindex = le16_to_cpu(sp->header.maxentry); 1284 memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART], 1285 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE); 1286 1287 /* 1288 * insert the new entry into the new right/child page 1289 * (skip index in the new right page will not change) 1290 */ 1291 skip = split->index; 1292 /* if insert into middle, shift right remaining entries */ 1293 if (skip != nextindex) 1294 memmove(&rp->xad[skip + 1], &rp->xad[skip], 1295 (nextindex - skip) * sizeof(xad_t)); 1296 1297 xad = &rp->xad[skip]; 1298 XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr); 1299 1300 /* update page header */ 1301 rp->header.nextindex = cpu_to_le16(nextindex + 1); 1302 1303 if (!test_cflag(COMMIT_Nolink, ip)) { 1304 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW); 1305 xtlck = (struct xtlock *) & tlck->lock; 1306 xtlck->lwm.offset = XTENTRYSTART; 1307 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) - 1308 XTENTRYSTART; 1309 } 1310 1311 /* 1312 * reset the root 1313 * 1314 * init root with the single entry for the new right page 1315 * set the 1st entry offset to 0, which force the left-most key 1316 * at any level of the tree to be less than any search key. 1317 */ 1318 /* 1319 * acquire a transaction lock on the root page (in-memory inode); 1320 * 1321 * action: root split; 1322 */ 1323 BT_MARK_DIRTY(split->mp, ip); 1324 1325 xad = &sp->xad[XTENTRYSTART]; 1326 XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn); 1327 1328 /* update page header of root */ 1329 sp->header.flag &= ~BT_LEAF; 1330 sp->header.flag |= BT_INTERNAL; 1331 1332 sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1); 1333 1334 if (!test_cflag(COMMIT_Nolink, ip)) { 1335 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW); 1336 xtlck = (struct xtlock *) & tlck->lock; 1337 xtlck->lwm.offset = XTENTRYSTART; 1338 xtlck->lwm.length = 1; 1339 } 1340 1341 *rmpp = rmp; 1342 1343 jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp); 1344 return 0; 1345 } 1346 1347 1348 /* 1349 * xtExtend() 1350 * 1351 * function: extend in-place; 1352 * 1353 * note: existing extent may or may not have been committed. 1354 * caller is responsible for pager buffer cache update, and 1355 * working block allocation map update; 1356 * update pmap: alloc whole extended extent; 1357 */ 1358 int xtExtend(tid_t tid, /* transaction id */ 1359 struct inode *ip, s64 xoff, /* delta extent offset */ 1360 s32 xlen, /* delta extent length */ 1361 int flag) 1362 { 1363 int rc = 0; 1364 int cmp; 1365 struct metapage *mp; /* meta-page buffer */ 1366 xtpage_t *p; /* base B+-tree index page */ 1367 s64 bn; 1368 int index, nextindex, len; 1369 struct btstack btstack; /* traverse stack */ 1370 struct xtsplit split; /* split information */ 1371 xad_t *xad; 1372 s64 xaddr; 1373 struct tlock *tlck; 1374 struct xtlock *xtlck = NULL; 1375 1376 jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); 1377 1378 /* there must exist extent to be extended */ 1379 if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT))) 1380 return rc; 1381 1382 /* retrieve search result */ 1383 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1384 1385 if (cmp != 0) { 1386 XT_PUTPAGE(mp); 1387 jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); 1388 return -EIO; 1389 } 1390 1391 /* extension must be contiguous */ 1392 xad = &p->xad[index]; 1393 if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { 1394 XT_PUTPAGE(mp); 1395 jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); 1396 return -EIO; 1397 } 1398 1399 /* 1400 * acquire a transaction lock on the leaf page; 1401 * 1402 * action: xad insertion/extension; 1403 */ 1404 BT_MARK_DIRTY(mp, ip); 1405 if (!test_cflag(COMMIT_Nolink, ip)) { 1406 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1407 xtlck = (struct xtlock *) & tlck->lock; 1408 } 1409 1410 /* extend will overflow extent ? */ 1411 xlen = lengthXAD(xad) + xlen; 1412 if ((len = xlen - MAXXLEN) <= 0) 1413 goto extendOld; 1414 1415 /* 1416 * extent overflow: insert entry for new extent 1417 */ 1418 //insertNew: 1419 xoff = offsetXAD(xad) + MAXXLEN; 1420 xaddr = addressXAD(xad) + MAXXLEN; 1421 nextindex = le16_to_cpu(p->header.nextindex); 1422 1423 /* 1424 * if the leaf page is full, insert the new entry and 1425 * propagate up the router entry for the new page from split 1426 * 1427 * The xtSplitUp() will insert the entry and unpin the leaf page. 1428 */ 1429 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1430 /* xtSpliUp() unpins leaf pages */ 1431 split.mp = mp; 1432 split.index = index + 1; 1433 split.flag = XAD_NEW; 1434 split.off = xoff; /* split offset */ 1435 split.len = len; 1436 split.addr = xaddr; 1437 split.pxdlist = NULL; 1438 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1439 return rc; 1440 1441 /* get back old page */ 1442 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1443 if (rc) 1444 return rc; 1445 /* 1446 * if leaf root has been split, original root has been 1447 * copied to new child page, i.e., original entry now 1448 * resides on the new child page; 1449 */ 1450 if (p->header.flag & BT_INTERNAL) { 1451 ASSERT(p->header.nextindex == 1452 cpu_to_le16(XTENTRYSTART + 1)); 1453 xad = &p->xad[XTENTRYSTART]; 1454 bn = addressXAD(xad); 1455 XT_PUTPAGE(mp); 1456 1457 /* get new child page */ 1458 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1459 if (rc) 1460 return rc; 1461 1462 BT_MARK_DIRTY(mp, ip); 1463 if (!test_cflag(COMMIT_Nolink, ip)) { 1464 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1465 xtlck = (struct xtlock *) & tlck->lock; 1466 } 1467 } 1468 } 1469 /* 1470 * insert the new entry into the leaf page 1471 */ 1472 else { 1473 /* insert the new entry: mark the entry NEW */ 1474 xad = &p->xad[index + 1]; 1475 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); 1476 1477 /* advance next available entry index */ 1478 le16_add_cpu(&p->header.nextindex, 1); 1479 } 1480 1481 /* get back old entry */ 1482 xad = &p->xad[index]; 1483 xlen = MAXXLEN; 1484 1485 /* 1486 * extend old extent 1487 */ 1488 extendOld: 1489 XADlength(xad, xlen); 1490 if (!(xad->flag & XAD_NEW)) 1491 xad->flag |= XAD_EXTENDED; 1492 1493 if (!test_cflag(COMMIT_Nolink, ip)) { 1494 xtlck->lwm.offset = 1495 (xtlck->lwm.offset) ? min(index, 1496 (int)xtlck->lwm.offset) : index; 1497 xtlck->lwm.length = 1498 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 1499 } 1500 1501 /* unpin the leaf page */ 1502 XT_PUTPAGE(mp); 1503 1504 return rc; 1505 } 1506 1507 #ifdef _NOTYET 1508 /* 1509 * xtTailgate() 1510 * 1511 * function: split existing 'tail' extent 1512 * (split offset >= start offset of tail extent), and 1513 * relocate and extend the split tail half; 1514 * 1515 * note: existing extent may or may not have been committed. 1516 * caller is responsible for pager buffer cache update, and 1517 * working block allocation map update; 1518 * update pmap: free old split tail extent, alloc new extent; 1519 */ 1520 int xtTailgate(tid_t tid, /* transaction id */ 1521 struct inode *ip, s64 xoff, /* split/new extent offset */ 1522 s32 xlen, /* new extent length */ 1523 s64 xaddr, /* new extent address */ 1524 int flag) 1525 { 1526 int rc = 0; 1527 int cmp; 1528 struct metapage *mp; /* meta-page buffer */ 1529 xtpage_t *p; /* base B+-tree index page */ 1530 s64 bn; 1531 int index, nextindex, llen, rlen; 1532 struct btstack btstack; /* traverse stack */ 1533 struct xtsplit split; /* split information */ 1534 xad_t *xad; 1535 struct tlock *tlck; 1536 struct xtlock *xtlck = 0; 1537 struct tlock *mtlck; 1538 struct maplock *pxdlock; 1539 1540 /* 1541 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", 1542 (ulong)xoff, xlen, (ulong)xaddr); 1543 */ 1544 1545 /* there must exist extent to be tailgated */ 1546 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT))) 1547 return rc; 1548 1549 /* retrieve search result */ 1550 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 1551 1552 if (cmp != 0) { 1553 XT_PUTPAGE(mp); 1554 jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); 1555 return -EIO; 1556 } 1557 1558 /* entry found must be last entry */ 1559 nextindex = le16_to_cpu(p->header.nextindex); 1560 if (index != nextindex - 1) { 1561 XT_PUTPAGE(mp); 1562 jfs_error(ip->i_sb, 1563 "xtTailgate: the entry found is not the last entry"); 1564 return -EIO; 1565 } 1566 1567 BT_MARK_DIRTY(mp, ip); 1568 /* 1569 * acquire tlock of the leaf page containing original entry 1570 */ 1571 if (!test_cflag(COMMIT_Nolink, ip)) { 1572 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1573 xtlck = (struct xtlock *) & tlck->lock; 1574 } 1575 1576 /* completely replace extent ? */ 1577 xad = &p->xad[index]; 1578 /* 1579 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", 1580 (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); 1581 */ 1582 if ((llen = xoff - offsetXAD(xad)) == 0) 1583 goto updateOld; 1584 1585 /* 1586 * partially replace extent: insert entry for new extent 1587 */ 1588 //insertNew: 1589 /* 1590 * if the leaf page is full, insert the new entry and 1591 * propagate up the router entry for the new page from split 1592 * 1593 * The xtSplitUp() will insert the entry and unpin the leaf page. 1594 */ 1595 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1596 /* xtSpliUp() unpins leaf pages */ 1597 split.mp = mp; 1598 split.index = index + 1; 1599 split.flag = XAD_NEW; 1600 split.off = xoff; /* split offset */ 1601 split.len = xlen; 1602 split.addr = xaddr; 1603 split.pxdlist = NULL; 1604 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1605 return rc; 1606 1607 /* get back old page */ 1608 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1609 if (rc) 1610 return rc; 1611 /* 1612 * if leaf root has been split, original root has been 1613 * copied to new child page, i.e., original entry now 1614 * resides on the new child page; 1615 */ 1616 if (p->header.flag & BT_INTERNAL) { 1617 ASSERT(p->header.nextindex == 1618 cpu_to_le16(XTENTRYSTART + 1)); 1619 xad = &p->xad[XTENTRYSTART]; 1620 bn = addressXAD(xad); 1621 XT_PUTPAGE(mp); 1622 1623 /* get new child page */ 1624 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1625 if (rc) 1626 return rc; 1627 1628 BT_MARK_DIRTY(mp, ip); 1629 if (!test_cflag(COMMIT_Nolink, ip)) { 1630 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1631 xtlck = (struct xtlock *) & tlck->lock; 1632 } 1633 } 1634 } 1635 /* 1636 * insert the new entry into the leaf page 1637 */ 1638 else { 1639 /* insert the new entry: mark the entry NEW */ 1640 xad = &p->xad[index + 1]; 1641 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1642 1643 /* advance next available entry index */ 1644 le16_add_cpu(&p->header.nextindex, 1); 1645 } 1646 1647 /* get back old XAD */ 1648 xad = &p->xad[index]; 1649 1650 /* 1651 * truncate/relocate old extent at split offset 1652 */ 1653 updateOld: 1654 /* update dmap for old/committed/truncated extent */ 1655 rlen = lengthXAD(xad) - llen; 1656 if (!(xad->flag & XAD_NEW)) { 1657 /* free from PWMAP at commit */ 1658 if (!test_cflag(COMMIT_Nolink, ip)) { 1659 mtlck = txMaplock(tid, ip, tlckMAP); 1660 pxdlock = (struct maplock *) & mtlck->lock; 1661 pxdlock->flag = mlckFREEPXD; 1662 PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen); 1663 PXDlength(&pxdlock->pxd, rlen); 1664 pxdlock->index = 1; 1665 } 1666 } else 1667 /* free from WMAP */ 1668 dbFree(ip, addressXAD(xad) + llen, (s64) rlen); 1669 1670 if (llen) 1671 /* truncate */ 1672 XADlength(xad, llen); 1673 else 1674 /* replace */ 1675 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); 1676 1677 if (!test_cflag(COMMIT_Nolink, ip)) { 1678 xtlck->lwm.offset = (xtlck->lwm.offset) ? 1679 min(index, (int)xtlck->lwm.offset) : index; 1680 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 1681 xtlck->lwm.offset; 1682 } 1683 1684 /* unpin the leaf page */ 1685 XT_PUTPAGE(mp); 1686 1687 return rc; 1688 } 1689 #endif /* _NOTYET */ 1690 1691 /* 1692 * xtUpdate() 1693 * 1694 * function: update XAD; 1695 * 1696 * update extent for allocated_but_not_recorded or 1697 * compressed extent; 1698 * 1699 * parameter: 1700 * nxad - new XAD; 1701 * logical extent of the specified XAD must be completely 1702 * contained by an existing XAD; 1703 */ 1704 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) 1705 { /* new XAD */ 1706 int rc = 0; 1707 int cmp; 1708 struct metapage *mp; /* meta-page buffer */ 1709 xtpage_t *p; /* base B+-tree index page */ 1710 s64 bn; 1711 int index0, index, newindex, nextindex; 1712 struct btstack btstack; /* traverse stack */ 1713 struct xtsplit split; /* split information */ 1714 xad_t *xad, *lxad, *rxad; 1715 int xflag; 1716 s64 nxoff, xoff; 1717 int nxlen, xlen, lxlen, rxlen; 1718 s64 nxaddr, xaddr; 1719 struct tlock *tlck; 1720 struct xtlock *xtlck = NULL; 1721 int newpage = 0; 1722 1723 /* there must exist extent to be tailgated */ 1724 nxoff = offsetXAD(nxad); 1725 nxlen = lengthXAD(nxad); 1726 nxaddr = addressXAD(nxad); 1727 1728 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 1729 return rc; 1730 1731 /* retrieve search result */ 1732 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 1733 1734 if (cmp != 0) { 1735 XT_PUTPAGE(mp); 1736 jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); 1737 return -EIO; 1738 } 1739 1740 BT_MARK_DIRTY(mp, ip); 1741 /* 1742 * acquire tlock of the leaf page containing original entry 1743 */ 1744 if (!test_cflag(COMMIT_Nolink, ip)) { 1745 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 1746 xtlck = (struct xtlock *) & tlck->lock; 1747 } 1748 1749 xad = &p->xad[index0]; 1750 xflag = xad->flag; 1751 xoff = offsetXAD(xad); 1752 xlen = lengthXAD(xad); 1753 xaddr = addressXAD(xad); 1754 1755 /* nXAD must be completely contained within XAD */ 1756 if ((xoff > nxoff) || 1757 (nxoff + nxlen > xoff + xlen)) { 1758 XT_PUTPAGE(mp); 1759 jfs_error(ip->i_sb, 1760 "xtUpdate: nXAD in not completely contained within XAD"); 1761 return -EIO; 1762 } 1763 1764 index = index0; 1765 newindex = index + 1; 1766 nextindex = le16_to_cpu(p->header.nextindex); 1767 1768 #ifdef _JFS_WIP_NOCOALESCE 1769 if (xoff < nxoff) 1770 goto updateRight; 1771 1772 /* 1773 * replace XAD with nXAD 1774 */ 1775 replace: /* (nxoff == xoff) */ 1776 if (nxlen == xlen) { 1777 /* replace XAD with nXAD:recorded */ 1778 *xad = *nxad; 1779 xad->flag = xflag & ~XAD_NOTRECORDED; 1780 1781 goto out; 1782 } else /* (nxlen < xlen) */ 1783 goto updateLeft; 1784 #endif /* _JFS_WIP_NOCOALESCE */ 1785 1786 /* #ifdef _JFS_WIP_COALESCE */ 1787 if (xoff < nxoff) 1788 goto coalesceRight; 1789 1790 /* 1791 * coalesce with left XAD 1792 */ 1793 //coalesceLeft: /* (xoff == nxoff) */ 1794 /* is XAD first entry of page ? */ 1795 if (index == XTENTRYSTART) 1796 goto replace; 1797 1798 /* is nXAD logically and physically contiguous with lXAD ? */ 1799 lxad = &p->xad[index - 1]; 1800 lxlen = lengthXAD(lxad); 1801 if (!(lxad->flag & XAD_NOTRECORDED) && 1802 (nxoff == offsetXAD(lxad) + lxlen) && 1803 (nxaddr == addressXAD(lxad) + lxlen) && 1804 (lxlen + nxlen < MAXXLEN)) { 1805 /* extend right lXAD */ 1806 index0 = index - 1; 1807 XADlength(lxad, lxlen + nxlen); 1808 1809 /* If we just merged two extents together, need to make sure the 1810 * right extent gets logged. If the left one is marked XAD_NEW, 1811 * then we know it will be logged. Otherwise, mark as 1812 * XAD_EXTENDED 1813 */ 1814 if (!(lxad->flag & XAD_NEW)) 1815 lxad->flag |= XAD_EXTENDED; 1816 1817 if (xlen > nxlen) { 1818 /* truncate XAD */ 1819 XADoffset(xad, xoff + nxlen); 1820 XADlength(xad, xlen - nxlen); 1821 XADaddress(xad, xaddr + nxlen); 1822 goto out; 1823 } else { /* (xlen == nxlen) */ 1824 1825 /* remove XAD */ 1826 if (index < nextindex - 1) 1827 memmove(&p->xad[index], &p->xad[index + 1], 1828 (nextindex - index - 1829 1) << L2XTSLOTSIZE); 1830 1831 p->header.nextindex = 1832 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1833 1); 1834 1835 index = index0; 1836 newindex = index + 1; 1837 nextindex = le16_to_cpu(p->header.nextindex); 1838 xoff = nxoff = offsetXAD(lxad); 1839 xlen = nxlen = lxlen + nxlen; 1840 xaddr = nxaddr = addressXAD(lxad); 1841 goto coalesceRight; 1842 } 1843 } 1844 1845 /* 1846 * replace XAD with nXAD 1847 */ 1848 replace: /* (nxoff == xoff) */ 1849 if (nxlen == xlen) { 1850 /* replace XAD with nXAD:recorded */ 1851 *xad = *nxad; 1852 xad->flag = xflag & ~XAD_NOTRECORDED; 1853 1854 goto coalesceRight; 1855 } else /* (nxlen < xlen) */ 1856 goto updateLeft; 1857 1858 /* 1859 * coalesce with right XAD 1860 */ 1861 coalesceRight: /* (xoff <= nxoff) */ 1862 /* is XAD last entry of page ? */ 1863 if (newindex == nextindex) { 1864 if (xoff == nxoff) 1865 goto out; 1866 goto updateRight; 1867 } 1868 1869 /* is nXAD logically and physically contiguous with rXAD ? */ 1870 rxad = &p->xad[index + 1]; 1871 rxlen = lengthXAD(rxad); 1872 if (!(rxad->flag & XAD_NOTRECORDED) && 1873 (nxoff + nxlen == offsetXAD(rxad)) && 1874 (nxaddr + nxlen == addressXAD(rxad)) && 1875 (rxlen + nxlen < MAXXLEN)) { 1876 /* extend left rXAD */ 1877 XADoffset(rxad, nxoff); 1878 XADlength(rxad, rxlen + nxlen); 1879 XADaddress(rxad, nxaddr); 1880 1881 /* If we just merged two extents together, need to make sure 1882 * the left extent gets logged. If the right one is marked 1883 * XAD_NEW, then we know it will be logged. Otherwise, mark as 1884 * XAD_EXTENDED 1885 */ 1886 if (!(rxad->flag & XAD_NEW)) 1887 rxad->flag |= XAD_EXTENDED; 1888 1889 if (xlen > nxlen) 1890 /* truncate XAD */ 1891 XADlength(xad, xlen - nxlen); 1892 else { /* (xlen == nxlen) */ 1893 1894 /* remove XAD */ 1895 memmove(&p->xad[index], &p->xad[index + 1], 1896 (nextindex - index - 1) << L2XTSLOTSIZE); 1897 1898 p->header.nextindex = 1899 cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1900 1); 1901 } 1902 1903 goto out; 1904 } else if (xoff == nxoff) 1905 goto out; 1906 1907 if (xoff >= nxoff) { 1908 XT_PUTPAGE(mp); 1909 jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); 1910 return -EIO; 1911 } 1912 /* #endif _JFS_WIP_COALESCE */ 1913 1914 /* 1915 * split XAD into (lXAD, nXAD): 1916 * 1917 * |---nXAD---> 1918 * --|----------XAD----------|-- 1919 * |-lXAD-| 1920 */ 1921 updateRight: /* (xoff < nxoff) */ 1922 /* truncate old XAD as lXAD:not_recorded */ 1923 xad = &p->xad[index]; 1924 XADlength(xad, nxoff - xoff); 1925 1926 /* insert nXAD:recorded */ 1927 if (nextindex == le16_to_cpu(p->header.maxentry)) { 1928 1929 /* xtSpliUp() unpins leaf pages */ 1930 split.mp = mp; 1931 split.index = newindex; 1932 split.flag = xflag & ~XAD_NOTRECORDED; 1933 split.off = nxoff; 1934 split.len = nxlen; 1935 split.addr = nxaddr; 1936 split.pxdlist = NULL; 1937 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 1938 return rc; 1939 1940 /* get back old page */ 1941 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1942 if (rc) 1943 return rc; 1944 /* 1945 * if leaf root has been split, original root has been 1946 * copied to new child page, i.e., original entry now 1947 * resides on the new child page; 1948 */ 1949 if (p->header.flag & BT_INTERNAL) { 1950 ASSERT(p->header.nextindex == 1951 cpu_to_le16(XTENTRYSTART + 1)); 1952 xad = &p->xad[XTENTRYSTART]; 1953 bn = addressXAD(xad); 1954 XT_PUTPAGE(mp); 1955 1956 /* get new child page */ 1957 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 1958 if (rc) 1959 return rc; 1960 1961 BT_MARK_DIRTY(mp, ip); 1962 if (!test_cflag(COMMIT_Nolink, ip)) { 1963 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 1964 xtlck = (struct xtlock *) & tlck->lock; 1965 } 1966 } else { 1967 /* is nXAD on new page ? */ 1968 if (newindex > 1969 (le16_to_cpu(p->header.maxentry) >> 1)) { 1970 newindex = 1971 newindex - 1972 le16_to_cpu(p->header.nextindex) + 1973 XTENTRYSTART; 1974 newpage = 1; 1975 } 1976 } 1977 } else { 1978 /* if insert into middle, shift right remaining entries */ 1979 if (newindex < nextindex) 1980 memmove(&p->xad[newindex + 1], &p->xad[newindex], 1981 (nextindex - newindex) << L2XTSLOTSIZE); 1982 1983 /* insert the entry */ 1984 xad = &p->xad[newindex]; 1985 *xad = *nxad; 1986 xad->flag = xflag & ~XAD_NOTRECORDED; 1987 1988 /* advance next available entry index. */ 1989 p->header.nextindex = 1990 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 1991 } 1992 1993 /* 1994 * does nXAD force 3-way split ? 1995 * 1996 * |---nXAD--->| 1997 * --|----------XAD-------------|-- 1998 * |-lXAD-| |-rXAD -| 1999 */ 2000 if (nxoff + nxlen == xoff + xlen) 2001 goto out; 2002 2003 /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */ 2004 if (newpage) { 2005 /* close out old page */ 2006 if (!test_cflag(COMMIT_Nolink, ip)) { 2007 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2008 min(index0, (int)xtlck->lwm.offset) : index0; 2009 xtlck->lwm.length = 2010 le16_to_cpu(p->header.nextindex) - 2011 xtlck->lwm.offset; 2012 } 2013 2014 bn = le64_to_cpu(p->header.next); 2015 XT_PUTPAGE(mp); 2016 2017 /* get new right page */ 2018 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2019 if (rc) 2020 return rc; 2021 2022 BT_MARK_DIRTY(mp, ip); 2023 if (!test_cflag(COMMIT_Nolink, ip)) { 2024 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2025 xtlck = (struct xtlock *) & tlck->lock; 2026 } 2027 2028 index0 = index = newindex; 2029 } else 2030 index++; 2031 2032 newindex = index + 1; 2033 nextindex = le16_to_cpu(p->header.nextindex); 2034 xlen = xlen - (nxoff - xoff); 2035 xoff = nxoff; 2036 xaddr = nxaddr; 2037 2038 /* recompute split pages */ 2039 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2040 XT_PUTPAGE(mp); 2041 2042 if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT))) 2043 return rc; 2044 2045 /* retrieve search result */ 2046 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0); 2047 2048 if (cmp != 0) { 2049 XT_PUTPAGE(mp); 2050 jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); 2051 return -EIO; 2052 } 2053 2054 if (index0 != index) { 2055 XT_PUTPAGE(mp); 2056 jfs_error(ip->i_sb, 2057 "xtUpdate: unexpected value of index"); 2058 return -EIO; 2059 } 2060 } 2061 2062 /* 2063 * split XAD into (nXAD, rXAD) 2064 * 2065 * ---nXAD---| 2066 * --|----------XAD----------|-- 2067 * |-rXAD-| 2068 */ 2069 updateLeft: /* (nxoff == xoff) && (nxlen < xlen) */ 2070 /* update old XAD with nXAD:recorded */ 2071 xad = &p->xad[index]; 2072 *xad = *nxad; 2073 xad->flag = xflag & ~XAD_NOTRECORDED; 2074 2075 /* insert rXAD:not_recorded */ 2076 xoff = xoff + nxlen; 2077 xlen = xlen - nxlen; 2078 xaddr = xaddr + nxlen; 2079 if (nextindex == le16_to_cpu(p->header.maxentry)) { 2080 /* 2081 printf("xtUpdate.updateLeft.split p:0x%p\n", p); 2082 */ 2083 /* xtSpliUp() unpins leaf pages */ 2084 split.mp = mp; 2085 split.index = newindex; 2086 split.flag = xflag; 2087 split.off = xoff; 2088 split.len = xlen; 2089 split.addr = xaddr; 2090 split.pxdlist = NULL; 2091 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) 2092 return rc; 2093 2094 /* get back old page */ 2095 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2096 if (rc) 2097 return rc; 2098 2099 /* 2100 * if leaf root has been split, original root has been 2101 * copied to new child page, i.e., original entry now 2102 * resides on the new child page; 2103 */ 2104 if (p->header.flag & BT_INTERNAL) { 2105 ASSERT(p->header.nextindex == 2106 cpu_to_le16(XTENTRYSTART + 1)); 2107 xad = &p->xad[XTENTRYSTART]; 2108 bn = addressXAD(xad); 2109 XT_PUTPAGE(mp); 2110 2111 /* get new child page */ 2112 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2113 if (rc) 2114 return rc; 2115 2116 BT_MARK_DIRTY(mp, ip); 2117 if (!test_cflag(COMMIT_Nolink, ip)) { 2118 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 2119 xtlck = (struct xtlock *) & tlck->lock; 2120 } 2121 } 2122 } else { 2123 /* if insert into middle, shift right remaining entries */ 2124 if (newindex < nextindex) 2125 memmove(&p->xad[newindex + 1], &p->xad[newindex], 2126 (nextindex - newindex) << L2XTSLOTSIZE); 2127 2128 /* insert the entry */ 2129 xad = &p->xad[newindex]; 2130 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2131 2132 /* advance next available entry index. */ 2133 p->header.nextindex = 2134 cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); 2135 } 2136 2137 out: 2138 if (!test_cflag(COMMIT_Nolink, ip)) { 2139 xtlck->lwm.offset = (xtlck->lwm.offset) ? 2140 min(index0, (int)xtlck->lwm.offset) : index0; 2141 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2142 xtlck->lwm.offset; 2143 } 2144 2145 /* unpin the leaf page */ 2146 XT_PUTPAGE(mp); 2147 2148 return rc; 2149 } 2150 2151 2152 /* 2153 * xtAppend() 2154 * 2155 * function: grow in append mode from contiguous region specified ; 2156 * 2157 * parameter: 2158 * tid - transaction id; 2159 * ip - file object; 2160 * xflag - extent flag: 2161 * xoff - extent offset; 2162 * maxblocks - max extent length; 2163 * xlen - extent length (in/out); 2164 * xaddrp - extent address pointer (in/out): 2165 * flag - 2166 * 2167 * return: 2168 */ 2169 int xtAppend(tid_t tid, /* transaction id */ 2170 struct inode *ip, int xflag, s64 xoff, s32 maxblocks, 2171 s32 * xlenp, /* (in/out) */ 2172 s64 * xaddrp, /* (in/out) */ 2173 int flag) 2174 { 2175 int rc = 0; 2176 struct metapage *mp; /* meta-page buffer */ 2177 xtpage_t *p; /* base B+-tree index page */ 2178 s64 bn, xaddr; 2179 int index, nextindex; 2180 struct btstack btstack; /* traverse stack */ 2181 struct xtsplit split; /* split information */ 2182 xad_t *xad; 2183 int cmp; 2184 struct tlock *tlck; 2185 struct xtlock *xtlck; 2186 int nsplit, nblocks, xlen; 2187 struct pxdlist pxdlist; 2188 pxd_t *pxd; 2189 s64 next; 2190 2191 xaddr = *xaddrp; 2192 xlen = *xlenp; 2193 jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx", 2194 (ulong) xoff, maxblocks, xlen, (ulong) xaddr); 2195 2196 /* 2197 * search for the entry location at which to insert: 2198 * 2199 * xtFastSearch() and xtSearch() both returns (leaf page 2200 * pinned, index at which to insert). 2201 * n.b. xtSearch() may return index of maxentry of 2202 * the full page. 2203 */ 2204 if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT))) 2205 return rc; 2206 2207 /* retrieve search result */ 2208 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2209 2210 if (cmp == 0) { 2211 rc = -EEXIST; 2212 goto out; 2213 } 2214 2215 if (next) 2216 xlen = min(xlen, (int)(next - xoff)); 2217 //insert: 2218 /* 2219 * insert entry for new extent 2220 */ 2221 xflag |= XAD_NEW; 2222 2223 /* 2224 * if the leaf page is full, split the page and 2225 * propagate up the router entry for the new page from split 2226 * 2227 * The xtSplitUp() will insert the entry and unpin the leaf page. 2228 */ 2229 nextindex = le16_to_cpu(p->header.nextindex); 2230 if (nextindex < le16_to_cpu(p->header.maxentry)) 2231 goto insertLeaf; 2232 2233 /* 2234 * allocate new index blocks to cover index page split(s) 2235 */ 2236 nsplit = btstack.nsplit; 2237 split.pxdlist = &pxdlist; 2238 pxdlist.maxnpxd = pxdlist.npxd = 0; 2239 pxd = &pxdlist.pxd[0]; 2240 nblocks = JFS_SBI(ip->i_sb)->nbperpage; 2241 for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { 2242 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { 2243 PXDaddress(pxd, xaddr); 2244 PXDlength(pxd, nblocks); 2245 2246 pxdlist.maxnpxd++; 2247 2248 continue; 2249 } 2250 2251 /* undo allocation */ 2252 2253 goto out; 2254 } 2255 2256 xlen = min(xlen, maxblocks); 2257 2258 /* 2259 * allocate data extent requested 2260 */ 2261 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2262 goto out; 2263 2264 split.mp = mp; 2265 split.index = index; 2266 split.flag = xflag; 2267 split.off = xoff; 2268 split.len = xlen; 2269 split.addr = xaddr; 2270 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) { 2271 /* undo data extent allocation */ 2272 dbFree(ip, *xaddrp, (s64) * xlenp); 2273 2274 return rc; 2275 } 2276 2277 *xaddrp = xaddr; 2278 *xlenp = xlen; 2279 return 0; 2280 2281 /* 2282 * insert the new entry into the leaf page 2283 */ 2284 insertLeaf: 2285 /* 2286 * allocate data extent requested 2287 */ 2288 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen))) 2289 goto out; 2290 2291 BT_MARK_DIRTY(mp, ip); 2292 /* 2293 * acquire a transaction lock on the leaf page; 2294 * 2295 * action: xad insertion/extension; 2296 */ 2297 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW); 2298 xtlck = (struct xtlock *) & tlck->lock; 2299 2300 /* insert the new entry: mark the entry NEW */ 2301 xad = &p->xad[index]; 2302 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); 2303 2304 /* advance next available entry index */ 2305 le16_add_cpu(&p->header.nextindex, 1); 2306 2307 xtlck->lwm.offset = 2308 (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; 2309 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) - 2310 xtlck->lwm.offset; 2311 2312 *xaddrp = xaddr; 2313 *xlenp = xlen; 2314 2315 out: 2316 /* unpin the leaf page */ 2317 XT_PUTPAGE(mp); 2318 2319 return rc; 2320 } 2321 #ifdef _STILL_TO_PORT 2322 2323 /* - TBD for defragmentaion/reorganization - 2324 * 2325 * xtDelete() 2326 * 2327 * function: 2328 * delete the entry with the specified key. 2329 * 2330 * N.B.: whole extent of the entry is assumed to be deleted. 2331 * 2332 * parameter: 2333 * 2334 * return: 2335 * ENOENT: if the entry is not found. 2336 * 2337 * exception: 2338 */ 2339 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) 2340 { 2341 int rc = 0; 2342 struct btstack btstack; 2343 int cmp; 2344 s64 bn; 2345 struct metapage *mp; 2346 xtpage_t *p; 2347 int index, nextindex; 2348 struct tlock *tlck; 2349 struct xtlock *xtlck; 2350 2351 /* 2352 * find the matching entry; xtSearch() pins the page 2353 */ 2354 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2355 return rc; 2356 2357 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2358 if (cmp) { 2359 /* unpin the leaf page */ 2360 XT_PUTPAGE(mp); 2361 return -ENOENT; 2362 } 2363 2364 /* 2365 * delete the entry from the leaf page 2366 */ 2367 nextindex = le16_to_cpu(p->header.nextindex); 2368 le16_add_cpu(&p->header.nextindex, -1); 2369 2370 /* 2371 * if the leaf page bocome empty, free the page 2372 */ 2373 if (p->header.nextindex == cpu_to_le16(XTENTRYSTART)) 2374 return (xtDeleteUp(tid, ip, mp, p, &btstack)); 2375 2376 BT_MARK_DIRTY(mp, ip); 2377 /* 2378 * acquire a transaction lock on the leaf page; 2379 * 2380 * action:xad deletion; 2381 */ 2382 tlck = txLock(tid, ip, mp, tlckXTREE); 2383 xtlck = (struct xtlock *) & tlck->lock; 2384 xtlck->lwm.offset = 2385 (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index; 2386 2387 /* if delete from middle, shift left/compact the remaining entries */ 2388 if (index < nextindex - 1) 2389 memmove(&p->xad[index], &p->xad[index + 1], 2390 (nextindex - index - 1) * sizeof(xad_t)); 2391 2392 XT_PUTPAGE(mp); 2393 2394 return 0; 2395 } 2396 2397 2398 /* - TBD for defragmentaion/reorganization - 2399 * 2400 * xtDeleteUp() 2401 * 2402 * function: 2403 * free empty pages as propagating deletion up the tree 2404 * 2405 * parameter: 2406 * 2407 * return: 2408 */ 2409 static int 2410 xtDeleteUp(tid_t tid, struct inode *ip, 2411 struct metapage * fmp, xtpage_t * fp, struct btstack * btstack) 2412 { 2413 int rc = 0; 2414 struct metapage *mp; 2415 xtpage_t *p; 2416 int index, nextindex; 2417 s64 xaddr; 2418 int xlen; 2419 struct btframe *parent; 2420 struct tlock *tlck; 2421 struct xtlock *xtlck; 2422 2423 /* 2424 * keep root leaf page which has become empty 2425 */ 2426 if (fp->header.flag & BT_ROOT) { 2427 /* keep the root page */ 2428 fp->header.flag &= ~BT_INTERNAL; 2429 fp->header.flag |= BT_LEAF; 2430 fp->header.nextindex = cpu_to_le16(XTENTRYSTART); 2431 2432 /* XT_PUTPAGE(fmp); */ 2433 2434 return 0; 2435 } 2436 2437 /* 2438 * free non-root leaf page 2439 */ 2440 if ((rc = xtRelink(tid, ip, fp))) { 2441 XT_PUTPAGE(fmp); 2442 return rc; 2443 } 2444 2445 xaddr = addressPXD(&fp->header.self); 2446 xlen = lengthPXD(&fp->header.self); 2447 /* free the page extent */ 2448 dbFree(ip, xaddr, (s64) xlen); 2449 2450 /* free the buffer page */ 2451 discard_metapage(fmp); 2452 2453 /* 2454 * propagate page deletion up the index tree 2455 * 2456 * If the delete from the parent page makes it empty, 2457 * continue all the way up the tree. 2458 * stop if the root page is reached (which is never deleted) or 2459 * if the entry deletion does not empty the page. 2460 */ 2461 while ((parent = BT_POP(btstack)) != NULL) { 2462 /* get/pin the parent page <sp> */ 2463 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2464 if (rc) 2465 return rc; 2466 2467 index = parent->index; 2468 2469 /* delete the entry for the freed child page from parent. 2470 */ 2471 nextindex = le16_to_cpu(p->header.nextindex); 2472 2473 /* 2474 * the parent has the single entry being deleted: 2475 * free the parent page which has become empty. 2476 */ 2477 if (nextindex == 1) { 2478 if (p->header.flag & BT_ROOT) { 2479 /* keep the root page */ 2480 p->header.flag &= ~BT_INTERNAL; 2481 p->header.flag |= BT_LEAF; 2482 p->header.nextindex = 2483 cpu_to_le16(XTENTRYSTART); 2484 2485 /* XT_PUTPAGE(mp); */ 2486 2487 break; 2488 } else { 2489 /* free the parent page */ 2490 if ((rc = xtRelink(tid, ip, p))) 2491 return rc; 2492 2493 xaddr = addressPXD(&p->header.self); 2494 /* free the page extent */ 2495 dbFree(ip, xaddr, 2496 (s64) JFS_SBI(ip->i_sb)->nbperpage); 2497 2498 /* unpin/free the buffer page */ 2499 discard_metapage(mp); 2500 2501 /* propagate up */ 2502 continue; 2503 } 2504 } 2505 /* 2506 * the parent has other entries remaining: 2507 * delete the router entry from the parent page. 2508 */ 2509 else { 2510 BT_MARK_DIRTY(mp, ip); 2511 /* 2512 * acquire a transaction lock on the leaf page; 2513 * 2514 * action:xad deletion; 2515 */ 2516 tlck = txLock(tid, ip, mp, tlckXTREE); 2517 xtlck = (struct xtlock *) & tlck->lock; 2518 xtlck->lwm.offset = 2519 (xtlck->lwm.offset) ? min(index, 2520 xtlck->lwm. 2521 offset) : index; 2522 2523 /* if delete from middle, 2524 * shift left/compact the remaining entries in the page 2525 */ 2526 if (index < nextindex - 1) 2527 memmove(&p->xad[index], &p->xad[index + 1], 2528 (nextindex - index - 2529 1) << L2XTSLOTSIZE); 2530 2531 le16_add_cpu(&p->header.nextindex, -1); 2532 jfs_info("xtDeleteUp(entry): 0x%lx[%d]", 2533 (ulong) parent->bn, index); 2534 } 2535 2536 /* unpin the parent page */ 2537 XT_PUTPAGE(mp); 2538 2539 /* exit propagation up */ 2540 break; 2541 } 2542 2543 return 0; 2544 } 2545 2546 2547 /* 2548 * NAME: xtRelocate() 2549 * 2550 * FUNCTION: relocate xtpage or data extent of regular file; 2551 * This function is mainly used by defragfs utility. 2552 * 2553 * NOTE: This routine does not have the logic to handle 2554 * uncommitted allocated extent. The caller should call 2555 * txCommit() to commit all the allocation before call 2556 * this routine. 2557 */ 2558 int 2559 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ 2560 s64 nxaddr, /* new xaddr */ 2561 int xtype) 2562 { /* extent type: XTPAGE or DATAEXT */ 2563 int rc = 0; 2564 struct tblock *tblk; 2565 struct tlock *tlck; 2566 struct xtlock *xtlck; 2567 struct metapage *mp, *pmp, *lmp, *rmp; /* meta-page buffer */ 2568 xtpage_t *p, *pp, *rp, *lp; /* base B+-tree index page */ 2569 xad_t *xad; 2570 pxd_t *pxd; 2571 s64 xoff, xsize; 2572 int xlen; 2573 s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn; 2574 cbuf_t *cp; 2575 s64 offset, nbytes, nbrd, pno; 2576 int nb, npages, nblks; 2577 s64 bn; 2578 int cmp; 2579 int index; 2580 struct pxd_lock *pxdlock; 2581 struct btstack btstack; /* traverse stack */ 2582 2583 xtype = xtype & EXTENT_TYPE; 2584 2585 xoff = offsetXAD(oxad); 2586 oxaddr = addressXAD(oxad); 2587 xlen = lengthXAD(oxad); 2588 2589 /* validate extent offset */ 2590 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2591 if (offset >= ip->i_size) 2592 return -ESTALE; /* stale extent */ 2593 2594 jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx", 2595 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); 2596 2597 /* 2598 * 1. get and validate the parent xtpage/xad entry 2599 * covering the source extent to be relocated; 2600 */ 2601 if (xtype == DATAEXT) { 2602 /* search in leaf entry */ 2603 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 2604 if (rc) 2605 return rc; 2606 2607 /* retrieve search result */ 2608 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2609 2610 if (cmp) { 2611 XT_PUTPAGE(pmp); 2612 return -ESTALE; 2613 } 2614 2615 /* validate for exact match with a single entry */ 2616 xad = &pp->xad[index]; 2617 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) { 2618 XT_PUTPAGE(pmp); 2619 return -ESTALE; 2620 } 2621 } else { /* (xtype == XTPAGE) */ 2622 2623 /* search in internal entry */ 2624 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0); 2625 if (rc) 2626 return rc; 2627 2628 /* retrieve search result */ 2629 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2630 2631 if (cmp) { 2632 XT_PUTPAGE(pmp); 2633 return -ESTALE; 2634 } 2635 2636 /* xtSearchNode() validated for exact match with a single entry 2637 */ 2638 xad = &pp->xad[index]; 2639 } 2640 jfs_info("xtRelocate: parent xad entry validated."); 2641 2642 /* 2643 * 2. relocate the extent 2644 */ 2645 if (xtype == DATAEXT) { 2646 /* if the extent is allocated-but-not-recorded 2647 * there is no real data to be moved in this extent, 2648 */ 2649 if (xad->flag & XAD_NOTRECORDED) 2650 goto out; 2651 else 2652 /* release xtpage for cmRead()/xtLookup() */ 2653 XT_PUTPAGE(pmp); 2654 2655 /* 2656 * cmRelocate() 2657 * 2658 * copy target data pages to be relocated; 2659 * 2660 * data extent must start at page boundary and 2661 * multiple of page size (except the last data extent); 2662 * read in each page of the source data extent into cbuf, 2663 * update the cbuf extent descriptor of the page to be 2664 * homeward bound to new dst data extent 2665 * copy the data from the old extent to new extent. 2666 * copy is essential for compressed files to avoid problems 2667 * that can arise if there was a change in compression 2668 * algorithms. 2669 * it is a good strategy because it may disrupt cache 2670 * policy to keep the pages in memory afterwards. 2671 */ 2672 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize; 2673 assert((offset & CM_OFFSET) == 0); 2674 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2675 pno = offset >> CM_L2BSIZE; 2676 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; 2677 /* 2678 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - 2679 (offset >> CM_L2BSIZE) + 1; 2680 */ 2681 sxaddr = oxaddr; 2682 dxaddr = nxaddr; 2683 2684 /* process the request one cache buffer at a time */ 2685 for (nbrd = 0; nbrd < nbytes; nbrd += nb, 2686 offset += nb, pno++, npages--) { 2687 /* compute page size */ 2688 nb = min(nbytes - nbrd, CM_BSIZE); 2689 2690 /* get the cache buffer of the page */ 2691 if (rc = cmRead(ip, offset, npages, &cp)) 2692 break; 2693 2694 assert(addressPXD(&cp->cm_pxd) == sxaddr); 2695 assert(!cp->cm_modified); 2696 2697 /* bind buffer with the new extent address */ 2698 nblks = nb >> JFS_IP(ip->i_sb)->l2bsize; 2699 cmSetXD(ip, cp, pno, dxaddr, nblks); 2700 2701 /* release the cbuf, mark it as modified */ 2702 cmPut(cp, true); 2703 2704 dxaddr += nblks; 2705 sxaddr += nblks; 2706 } 2707 2708 /* get back parent page */ 2709 if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0))) 2710 return rc; 2711 2712 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2713 jfs_info("xtRelocate: target data extent relocated."); 2714 } else { /* (xtype == XTPAGE) */ 2715 2716 /* 2717 * read in the target xtpage from the source extent; 2718 */ 2719 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2720 if (rc) { 2721 XT_PUTPAGE(pmp); 2722 return rc; 2723 } 2724 2725 /* 2726 * read in sibling pages if any to update sibling pointers; 2727 */ 2728 rmp = NULL; 2729 if (p->header.next) { 2730 nextbn = le64_to_cpu(p->header.next); 2731 XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2732 if (rc) { 2733 XT_PUTPAGE(pmp); 2734 XT_PUTPAGE(mp); 2735 return (rc); 2736 } 2737 } 2738 2739 lmp = NULL; 2740 if (p->header.prev) { 2741 prevbn = le64_to_cpu(p->header.prev); 2742 XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2743 if (rc) { 2744 XT_PUTPAGE(pmp); 2745 XT_PUTPAGE(mp); 2746 if (rmp) 2747 XT_PUTPAGE(rmp); 2748 return (rc); 2749 } 2750 } 2751 2752 /* at this point, all xtpages to be updated are in memory */ 2753 2754 /* 2755 * update sibling pointers of sibling xtpages if any; 2756 */ 2757 if (lmp) { 2758 BT_MARK_DIRTY(lmp, ip); 2759 tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); 2760 lp->header.next = cpu_to_le64(nxaddr); 2761 XT_PUTPAGE(lmp); 2762 } 2763 2764 if (rmp) { 2765 BT_MARK_DIRTY(rmp, ip); 2766 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); 2767 rp->header.prev = cpu_to_le64(nxaddr); 2768 XT_PUTPAGE(rmp); 2769 } 2770 2771 /* 2772 * update the target xtpage to be relocated 2773 * 2774 * update the self address of the target page 2775 * and write to destination extent; 2776 * redo image covers the whole xtpage since it is new page 2777 * to the destination extent; 2778 * update of bmap for the free of source extent 2779 * of the target xtpage itself: 2780 * update of bmap for the allocation of destination extent 2781 * of the target xtpage itself: 2782 * update of bmap for the extents covered by xad entries in 2783 * the target xtpage is not necessary since they are not 2784 * updated; 2785 * if not committed before this relocation, 2786 * target page may contain XAD_NEW entries which must 2787 * be scanned for bmap update (logredo() always 2788 * scan xtpage REDOPAGE image for bmap update); 2789 * if committed before this relocation (tlckRELOCATE), 2790 * scan may be skipped by commit() and logredo(); 2791 */ 2792 BT_MARK_DIRTY(mp, ip); 2793 /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ 2794 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); 2795 xtlck = (struct xtlock *) & tlck->lock; 2796 2797 /* update the self address in the xtpage header */ 2798 pxd = &p->header.self; 2799 PXDaddress(pxd, nxaddr); 2800 2801 /* linelock for the after image of the whole page */ 2802 xtlck->lwm.length = 2803 le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset; 2804 2805 /* update the buffer extent descriptor of target xtpage */ 2806 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2807 bmSetXD(mp, nxaddr, xsize); 2808 2809 /* unpin the target page to new homeward bound */ 2810 XT_PUTPAGE(mp); 2811 jfs_info("xtRelocate: target xtpage relocated."); 2812 } 2813 2814 /* 2815 * 3. acquire maplock for the source extent to be freed; 2816 * 2817 * acquire a maplock saving the src relocated extent address; 2818 * to free of the extent at commit time; 2819 */ 2820 out: 2821 /* if DATAEXT relocation, write a LOG_UPDATEMAP record for 2822 * free PXD of the source data extent (logredo() will update 2823 * bmap for free of source data extent), and update bmap for 2824 * free of the source data extent; 2825 */ 2826 if (xtype == DATAEXT) 2827 tlck = txMaplock(tid, ip, tlckMAP); 2828 /* if XTPAGE relocation, write a LOG_NOREDOPAGE record 2829 * for the source xtpage (logredo() will init NoRedoPage 2830 * filter and will also update bmap for free of the source 2831 * xtpage), and update bmap for free of the source xtpage; 2832 * N.B. We use tlckMAP instead of tlkcXTREE because there 2833 * is no buffer associated with this lock since the buffer 2834 * has been redirected to the target location. 2835 */ 2836 else /* (xtype == XTPAGE) */ 2837 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); 2838 2839 pxdlock = (struct pxd_lock *) & tlck->lock; 2840 pxdlock->flag = mlckFREEPXD; 2841 PXDaddress(&pxdlock->pxd, oxaddr); 2842 PXDlength(&pxdlock->pxd, xlen); 2843 pxdlock->index = 1; 2844 2845 /* 2846 * 4. update the parent xad entry for relocation; 2847 * 2848 * acquire tlck for the parent entry with XAD_NEW as entry 2849 * update which will write LOG_REDOPAGE and update bmap for 2850 * allocation of XAD_NEW destination extent; 2851 */ 2852 jfs_info("xtRelocate: update parent xad entry."); 2853 BT_MARK_DIRTY(pmp, ip); 2854 tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW); 2855 xtlck = (struct xtlock *) & tlck->lock; 2856 2857 /* update the XAD with the new destination extent; */ 2858 xad = &pp->xad[index]; 2859 xad->flag |= XAD_NEW; 2860 XADaddress(xad, nxaddr); 2861 2862 xtlck->lwm.offset = min(index, xtlck->lwm.offset); 2863 xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) - 2864 xtlck->lwm.offset; 2865 2866 /* unpin the parent xtpage */ 2867 XT_PUTPAGE(pmp); 2868 2869 return rc; 2870 } 2871 2872 2873 /* 2874 * xtSearchNode() 2875 * 2876 * function: search for the internal xad entry covering specified extent. 2877 * This function is mainly used by defragfs utility. 2878 * 2879 * parameters: 2880 * ip - file object; 2881 * xad - extent to find; 2882 * cmpp - comparison result: 2883 * btstack - traverse stack; 2884 * flag - search process flag; 2885 * 2886 * returns: 2887 * btstack contains (bn, index) of search path traversed to the entry. 2888 * *cmpp is set to result of comparison with the entry returned. 2889 * the page containing the entry is pinned at exit. 2890 */ 2891 static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ 2892 int *cmpp, struct btstack * btstack, int flag) 2893 { 2894 int rc = 0; 2895 s64 xoff, xaddr; 2896 int xlen; 2897 int cmp = 1; /* init for empty page */ 2898 s64 bn; /* block number */ 2899 struct metapage *mp; /* meta-page buffer */ 2900 xtpage_t *p; /* page */ 2901 int base, index, lim; 2902 struct btframe *btsp; 2903 s64 t64; 2904 2905 BT_CLR(btstack); 2906 2907 xoff = offsetXAD(xad); 2908 xlen = lengthXAD(xad); 2909 xaddr = addressXAD(xad); 2910 2911 /* 2912 * search down tree from root: 2913 * 2914 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 2915 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 2916 * 2917 * if entry with search key K is not found 2918 * internal page search find the entry with largest key Ki 2919 * less than K which point to the child page to search; 2920 * leaf page search find the entry with smallest key Kj 2921 * greater than K so that the returned index is the position of 2922 * the entry to be shifted right for insertion of new entry. 2923 * for empty tree, search key is greater than any key of the tree. 2924 * 2925 * by convention, root bn = 0. 2926 */ 2927 for (bn = 0;;) { 2928 /* get/pin the page to search */ 2929 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2930 if (rc) 2931 return rc; 2932 if (p->header.flag & BT_LEAF) { 2933 XT_PUTPAGE(mp); 2934 return -ESTALE; 2935 } 2936 2937 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 2938 2939 /* 2940 * binary search with search key K on the current page 2941 */ 2942 for (base = XTENTRYSTART; lim; lim >>= 1) { 2943 index = base + (lim >> 1); 2944 2945 XT_CMP(cmp, xoff, &p->xad[index], t64); 2946 if (cmp == 0) { 2947 /* 2948 * search hit 2949 * 2950 * verify for exact match; 2951 */ 2952 if (xaddr == addressXAD(&p->xad[index]) && 2953 xoff == offsetXAD(&p->xad[index])) { 2954 *cmpp = cmp; 2955 2956 /* save search result */ 2957 btsp = btstack->top; 2958 btsp->bn = bn; 2959 btsp->index = index; 2960 btsp->mp = mp; 2961 2962 return 0; 2963 } 2964 2965 /* descend/search its child page */ 2966 goto next; 2967 } 2968 2969 if (cmp > 0) { 2970 base = index + 1; 2971 --lim; 2972 } 2973 } 2974 2975 /* 2976 * search miss - non-leaf page: 2977 * 2978 * base is the smallest index with key (Kj) greater than 2979 * search key (K) and may be zero or maxentry index. 2980 * if base is non-zero, decrement base by one to get the parent 2981 * entry of the child page to search. 2982 */ 2983 index = base ? base - 1 : base; 2984 2985 /* 2986 * go down to child page 2987 */ 2988 next: 2989 /* get the child page block number */ 2990 bn = addressXAD(&p->xad[index]); 2991 2992 /* unpin the parent page */ 2993 XT_PUTPAGE(mp); 2994 } 2995 } 2996 2997 2998 /* 2999 * xtRelink() 3000 * 3001 * function: 3002 * link around a freed page. 3003 * 3004 * Parameter: 3005 * int tid, 3006 * struct inode *ip, 3007 * xtpage_t *p) 3008 * 3009 * returns: 3010 */ 3011 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) 3012 { 3013 int rc = 0; 3014 struct metapage *mp; 3015 s64 nextbn, prevbn; 3016 struct tlock *tlck; 3017 3018 nextbn = le64_to_cpu(p->header.next); 3019 prevbn = le64_to_cpu(p->header.prev); 3020 3021 /* update prev pointer of the next page */ 3022 if (nextbn != 0) { 3023 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 3024 if (rc) 3025 return rc; 3026 3027 /* 3028 * acquire a transaction lock on the page; 3029 * 3030 * action: update prev pointer; 3031 */ 3032 BT_MARK_DIRTY(mp, ip); 3033 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3034 3035 /* the page may already have been tlock'd */ 3036 3037 p->header.prev = cpu_to_le64(prevbn); 3038 3039 XT_PUTPAGE(mp); 3040 } 3041 3042 /* update next pointer of the previous page */ 3043 if (prevbn != 0) { 3044 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 3045 if (rc) 3046 return rc; 3047 3048 /* 3049 * acquire a transaction lock on the page; 3050 * 3051 * action: update next pointer; 3052 */ 3053 BT_MARK_DIRTY(mp, ip); 3054 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK); 3055 3056 /* the page may already have been tlock'd */ 3057 3058 p->header.next = le64_to_cpu(nextbn); 3059 3060 XT_PUTPAGE(mp); 3061 } 3062 3063 return 0; 3064 } 3065 #endif /* _STILL_TO_PORT */ 3066 3067 3068 /* 3069 * xtInitRoot() 3070 * 3071 * initialize file root (inline in inode) 3072 */ 3073 void xtInitRoot(tid_t tid, struct inode *ip) 3074 { 3075 xtpage_t *p; 3076 3077 /* 3078 * acquire a transaction lock on the root 3079 * 3080 * action: 3081 */ 3082 txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag, 3083 tlckXTREE | tlckNEW); 3084 p = &JFS_IP(ip)->i_xtroot; 3085 3086 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 3087 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3088 3089 if (S_ISDIR(ip->i_mode)) 3090 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR); 3091 else { 3092 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT); 3093 ip->i_size = 0; 3094 } 3095 3096 3097 return; 3098 } 3099 3100 3101 /* 3102 * We can run into a deadlock truncating a file with a large number of 3103 * xtree pages (large fragmented file). A robust fix would entail a 3104 * reservation system where we would reserve a number of metadata pages 3105 * and tlocks which we would be guaranteed without a deadlock. Without 3106 * this, a partial fix is to limit number of metadata pages we will lock 3107 * in a single transaction. Currently we will truncate the file so that 3108 * no more than 50 leaf pages will be locked. The caller of xtTruncate 3109 * will be responsible for ensuring that the current transaction gets 3110 * committed, and that subsequent transactions are created to truncate 3111 * the file further if needed. 3112 */ 3113 #define MAX_TRUNCATE_LEAVES 50 3114 3115 /* 3116 * xtTruncate() 3117 * 3118 * function: 3119 * traverse for truncation logging backward bottom up; 3120 * terminate at the last extent entry at the current subtree 3121 * root page covering new down size. 3122 * truncation may occur within the last extent entry. 3123 * 3124 * parameter: 3125 * int tid, 3126 * struct inode *ip, 3127 * s64 newsize, 3128 * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} 3129 * 3130 * return: 3131 * 3132 * note: 3133 * PWMAP: 3134 * 1. truncate (non-COMMIT_NOLINK file) 3135 * by jfs_truncate() or jfs_open(O_TRUNC): 3136 * xtree is updated; 3137 * 2. truncate index table of directory when last entry removed 3138 * map update via tlock at commit time; 3139 * PMAP: 3140 * Call xtTruncate_pmap instead 3141 * WMAP: 3142 * 1. remove (free zero link count) on last reference release 3143 * (pmap has been freed at commit zero link count); 3144 * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): 3145 * xtree is updated; 3146 * map update directly at truncation time; 3147 * 3148 * if (DELETE) 3149 * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); 3150 * else if (TRUNCATE) 3151 * must write LOG_NOREDOPAGE for deleted index page; 3152 * 3153 * pages may already have been tlocked by anonymous transactions 3154 * during file growth (i.e., write) before truncation; 3155 * 3156 * except last truncated entry, deleted entries remains as is 3157 * in the page (nextindex is updated) for other use 3158 * (e.g., log/update allocation map): this avoid copying the page 3159 * info but delay free of pages; 3160 * 3161 */ 3162 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) 3163 { 3164 int rc = 0; 3165 s64 teof; 3166 struct metapage *mp; 3167 xtpage_t *p; 3168 s64 bn; 3169 int index, nextindex; 3170 xad_t *xad; 3171 s64 xoff, xaddr; 3172 int xlen, len, freexlen; 3173 struct btstack btstack; 3174 struct btframe *parent; 3175 struct tblock *tblk = NULL; 3176 struct tlock *tlck = NULL; 3177 struct xtlock *xtlck = NULL; 3178 struct xdlistlock xadlock; /* maplock for COMMIT_WMAP */ 3179 struct pxd_lock *pxdlock; /* maplock for COMMIT_WMAP */ 3180 s64 nfreed; 3181 int freed, log; 3182 int locked_leaves = 0; 3183 3184 /* save object truncation type */ 3185 if (tid) { 3186 tblk = tid_to_tblock(tid); 3187 tblk->xflag |= flag; 3188 } 3189 3190 nfreed = 0; 3191 3192 flag &= COMMIT_MAP; 3193 assert(flag != COMMIT_PMAP); 3194 3195 if (flag == COMMIT_PWMAP) 3196 log = 1; 3197 else { 3198 log = 0; 3199 xadlock.flag = mlckFREEXADLIST; 3200 xadlock.index = 1; 3201 } 3202 3203 /* 3204 * if the newsize is not an integral number of pages, 3205 * the file between newsize and next page boundary will 3206 * be cleared. 3207 * if truncating into a file hole, it will cause 3208 * a full block to be allocated for the logical block. 3209 */ 3210 3211 /* 3212 * release page blocks of truncated region <teof, eof> 3213 * 3214 * free the data blocks from the leaf index blocks. 3215 * delete the parent index entries corresponding to 3216 * the freed child data/index blocks. 3217 * free the index blocks themselves which aren't needed 3218 * in new sized file. 3219 * 3220 * index blocks are updated only if the blocks are to be 3221 * retained in the new sized file. 3222 * if type is PMAP, the data and index pages are NOT 3223 * freed, and the data and index blocks are NOT freed 3224 * from working map. 3225 * (this will allow continued access of data/index of 3226 * temporary file (zerolink count file truncated to zero-length)). 3227 */ 3228 teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >> 3229 JFS_SBI(ip->i_sb)->l2bsize; 3230 3231 /* clear stack */ 3232 BT_CLR(&btstack); 3233 3234 /* 3235 * start with root 3236 * 3237 * root resides in the inode 3238 */ 3239 bn = 0; 3240 3241 /* 3242 * first access of each page: 3243 */ 3244 getPage: 3245 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3246 if (rc) 3247 return rc; 3248 3249 /* process entries backward from last index */ 3250 index = le16_to_cpu(p->header.nextindex) - 1; 3251 3252 3253 /* Since this is the rightmost page at this level, and we may have 3254 * already freed a page that was formerly to the right, let's make 3255 * sure that the next pointer is zero. 3256 */ 3257 if (p->header.next) { 3258 if (log) 3259 /* 3260 * Make sure this change to the header is logged. 3261 * If we really truncate this leaf, the flag 3262 * will be changed to tlckTRUNCATE 3263 */ 3264 tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW); 3265 BT_MARK_DIRTY(mp, ip); 3266 p->header.next = 0; 3267 } 3268 3269 if (p->header.flag & BT_INTERNAL) 3270 goto getChild; 3271 3272 /* 3273 * leaf page 3274 */ 3275 freed = 0; 3276 3277 /* does region covered by leaf page precede Teof ? */ 3278 xad = &p->xad[index]; 3279 xoff = offsetXAD(xad); 3280 xlen = lengthXAD(xad); 3281 if (teof >= xoff + xlen) { 3282 XT_PUTPAGE(mp); 3283 goto getParent; 3284 } 3285 3286 /* (re)acquire tlock of the leaf page */ 3287 if (log) { 3288 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3289 /* 3290 * We need to limit the size of the transaction 3291 * to avoid exhausting pagecache & tlocks 3292 */ 3293 XT_PUTPAGE(mp); 3294 newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3295 goto getParent; 3296 } 3297 tlck = txLock(tid, ip, mp, tlckXTREE); 3298 tlck->type = tlckXTREE | tlckTRUNCATE; 3299 xtlck = (struct xtlock *) & tlck->lock; 3300 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3301 } 3302 BT_MARK_DIRTY(mp, ip); 3303 3304 /* 3305 * scan backward leaf page entries 3306 */ 3307 for (; index >= XTENTRYSTART; index--) { 3308 xad = &p->xad[index]; 3309 xoff = offsetXAD(xad); 3310 xlen = lengthXAD(xad); 3311 xaddr = addressXAD(xad); 3312 3313 /* 3314 * The "data" for a directory is indexed by the block 3315 * device's address space. This metadata must be invalidated 3316 * here 3317 */ 3318 if (S_ISDIR(ip->i_mode) && (teof == 0)) 3319 invalidate_xad_metapages(ip, *xad); 3320 /* 3321 * entry beyond eof: continue scan of current page 3322 * xad 3323 * ---|---=======-------> 3324 * eof 3325 */ 3326 if (teof < xoff) { 3327 nfreed += xlen; 3328 continue; 3329 } 3330 3331 /* 3332 * (xoff <= teof): last entry to be deleted from page; 3333 * If other entries remain in page: keep and update the page. 3334 */ 3335 3336 /* 3337 * eof == entry_start: delete the entry 3338 * xad 3339 * -------|=======-------> 3340 * eof 3341 * 3342 */ 3343 if (teof == xoff) { 3344 nfreed += xlen; 3345 3346 if (index == XTENTRYSTART) 3347 break; 3348 3349 nextindex = index; 3350 } 3351 /* 3352 * eof within the entry: truncate the entry. 3353 * xad 3354 * -------===|===-------> 3355 * eof 3356 */ 3357 else if (teof < xoff + xlen) { 3358 /* update truncated entry */ 3359 len = teof - xoff; 3360 freexlen = xlen - len; 3361 XADlength(xad, len); 3362 3363 /* save pxd of truncated extent in tlck */ 3364 xaddr += len; 3365 if (log) { /* COMMIT_PWMAP */ 3366 xtlck->lwm.offset = (xtlck->lwm.offset) ? 3367 min(index, (int)xtlck->lwm.offset) : index; 3368 xtlck->lwm.length = index + 1 - 3369 xtlck->lwm.offset; 3370 xtlck->twm.offset = index; 3371 pxdlock = (struct pxd_lock *) & xtlck->pxdlock; 3372 pxdlock->flag = mlckFREEPXD; 3373 PXDaddress(&pxdlock->pxd, xaddr); 3374 PXDlength(&pxdlock->pxd, freexlen); 3375 } 3376 /* free truncated extent */ 3377 else { /* COMMIT_WMAP */ 3378 3379 pxdlock = (struct pxd_lock *) & xadlock; 3380 pxdlock->flag = mlckFREEPXD; 3381 PXDaddress(&pxdlock->pxd, xaddr); 3382 PXDlength(&pxdlock->pxd, freexlen); 3383 txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP); 3384 3385 /* reset map lock */ 3386 xadlock.flag = mlckFREEXADLIST; 3387 } 3388 3389 /* current entry is new last entry; */ 3390 nextindex = index + 1; 3391 3392 nfreed += freexlen; 3393 } 3394 /* 3395 * eof beyond the entry: 3396 * xad 3397 * -------=======---|---> 3398 * eof 3399 */ 3400 else { /* (xoff + xlen < teof) */ 3401 3402 nextindex = index + 1; 3403 } 3404 3405 if (nextindex < le16_to_cpu(p->header.nextindex)) { 3406 if (!log) { /* COMMIT_WAMP */ 3407 xadlock.xdlist = &p->xad[nextindex]; 3408 xadlock.count = 3409 le16_to_cpu(p->header.nextindex) - 3410 nextindex; 3411 txFreeMap(ip, (struct maplock *) & xadlock, 3412 NULL, COMMIT_WMAP); 3413 } 3414 p->header.nextindex = cpu_to_le16(nextindex); 3415 } 3416 3417 XT_PUTPAGE(mp); 3418 3419 /* assert(freed == 0); */ 3420 goto getParent; 3421 } /* end scan of leaf page entries */ 3422 3423 freed = 1; 3424 3425 /* 3426 * leaf page become empty: free the page if type != PMAP 3427 */ 3428 if (log) { /* COMMIT_PWMAP */ 3429 /* txCommit() with tlckFREE: 3430 * free data extents covered by leaf [XTENTRYSTART:hwm); 3431 * invalidate leaf if COMMIT_PWMAP; 3432 * if (TRUNCATE), will write LOG_NOREDOPAGE; 3433 */ 3434 tlck->type = tlckXTREE | tlckFREE; 3435 } else { /* COMMIT_WAMP */ 3436 3437 /* free data extents covered by leaf */ 3438 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3439 xadlock.count = 3440 le16_to_cpu(p->header.nextindex) - XTENTRYSTART; 3441 txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP); 3442 } 3443 3444 if (p->header.flag & BT_ROOT) { 3445 p->header.flag &= ~BT_INTERNAL; 3446 p->header.flag |= BT_LEAF; 3447 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3448 3449 XT_PUTPAGE(mp); /* debug */ 3450 goto out; 3451 } else { 3452 if (log) { /* COMMIT_PWMAP */ 3453 /* page will be invalidated at tx completion 3454 */ 3455 XT_PUTPAGE(mp); 3456 } else { /* COMMIT_WMAP */ 3457 3458 if (mp->lid) 3459 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK; 3460 3461 /* invalidate empty leaf page */ 3462 discard_metapage(mp); 3463 } 3464 } 3465 3466 /* 3467 * the leaf page become empty: delete the parent entry 3468 * for the leaf page if the parent page is to be kept 3469 * in the new sized file. 3470 */ 3471 3472 /* 3473 * go back up to the parent page 3474 */ 3475 getParent: 3476 /* pop/restore parent entry for the current child page */ 3477 if ((parent = BT_POP(&btstack)) == NULL) 3478 /* current page must have been root */ 3479 goto out; 3480 3481 /* get back the parent page */ 3482 bn = parent->bn; 3483 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3484 if (rc) 3485 return rc; 3486 3487 index = parent->index; 3488 3489 /* 3490 * child page was not empty: 3491 */ 3492 if (freed == 0) { 3493 /* has any entry deleted from parent ? */ 3494 if (index < le16_to_cpu(p->header.nextindex) - 1) { 3495 /* (re)acquire tlock on the parent page */ 3496 if (log) { /* COMMIT_PWMAP */ 3497 /* txCommit() with tlckTRUNCATE: 3498 * free child extents covered by parent [); 3499 */ 3500 tlck = txLock(tid, ip, mp, tlckXTREE); 3501 xtlck = (struct xtlock *) & tlck->lock; 3502 if (!(tlck->type & tlckTRUNCATE)) { 3503 xtlck->hwm.offset = 3504 le16_to_cpu(p->header. 3505 nextindex) - 1; 3506 tlck->type = 3507 tlckXTREE | tlckTRUNCATE; 3508 } 3509 } else { /* COMMIT_WMAP */ 3510 3511 /* free child extents covered by parent */ 3512 xadlock.xdlist = &p->xad[index + 1]; 3513 xadlock.count = 3514 le16_to_cpu(p->header.nextindex) - 3515 index - 1; 3516 txFreeMap(ip, (struct maplock *) & xadlock, 3517 NULL, COMMIT_WMAP); 3518 } 3519 BT_MARK_DIRTY(mp, ip); 3520 3521 p->header.nextindex = cpu_to_le16(index + 1); 3522 } 3523 XT_PUTPAGE(mp); 3524 goto getParent; 3525 } 3526 3527 /* 3528 * child page was empty: 3529 */ 3530 nfreed += lengthXAD(&p->xad[index]); 3531 3532 /* 3533 * During working map update, child page's tlock must be handled 3534 * before parent's. This is because the parent's tlock will cause 3535 * the child's disk space to be marked available in the wmap, so 3536 * it's important that the child page be released by that time. 3537 * 3538 * ToDo: tlocks should be on doubly-linked list, so we can 3539 * quickly remove it and add it to the end. 3540 */ 3541 3542 /* 3543 * Move parent page's tlock to the end of the tid's tlock list 3544 */ 3545 if (log && mp->lid && (tblk->last != mp->lid) && 3546 lid_to_tlock(mp->lid)->tid) { 3547 lid_t lid = mp->lid; 3548 struct tlock *prev; 3549 3550 tlck = lid_to_tlock(lid); 3551 3552 if (tblk->next == lid) 3553 tblk->next = tlck->next; 3554 else { 3555 for (prev = lid_to_tlock(tblk->next); 3556 prev->next != lid; 3557 prev = lid_to_tlock(prev->next)) { 3558 assert(prev->next); 3559 } 3560 prev->next = tlck->next; 3561 } 3562 lid_to_tlock(tblk->last)->next = lid; 3563 tlck->next = 0; 3564 tblk->last = lid; 3565 } 3566 3567 /* 3568 * parent page become empty: free the page 3569 */ 3570 if (index == XTENTRYSTART) { 3571 if (log) { /* COMMIT_PWMAP */ 3572 /* txCommit() with tlckFREE: 3573 * free child extents covered by parent; 3574 * invalidate parent if COMMIT_PWMAP; 3575 */ 3576 tlck = txLock(tid, ip, mp, tlckXTREE); 3577 xtlck = (struct xtlock *) & tlck->lock; 3578 xtlck->hwm.offset = 3579 le16_to_cpu(p->header.nextindex) - 1; 3580 tlck->type = tlckXTREE | tlckFREE; 3581 } else { /* COMMIT_WMAP */ 3582 3583 /* free child extents covered by parent */ 3584 xadlock.xdlist = &p->xad[XTENTRYSTART]; 3585 xadlock.count = 3586 le16_to_cpu(p->header.nextindex) - 3587 XTENTRYSTART; 3588 txFreeMap(ip, (struct maplock *) & xadlock, NULL, 3589 COMMIT_WMAP); 3590 } 3591 BT_MARK_DIRTY(mp, ip); 3592 3593 if (p->header.flag & BT_ROOT) { 3594 p->header.flag &= ~BT_INTERNAL; 3595 p->header.flag |= BT_LEAF; 3596 p->header.nextindex = cpu_to_le16(XTENTRYSTART); 3597 if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) { 3598 /* 3599 * Shrink root down to allow inline 3600 * EA (otherwise fsck complains) 3601 */ 3602 p->header.maxentry = 3603 cpu_to_le16(XTROOTINITSLOT); 3604 JFS_IP(ip)->mode2 |= INLINEEA; 3605 } 3606 3607 XT_PUTPAGE(mp); /* debug */ 3608 goto out; 3609 } else { 3610 if (log) { /* COMMIT_PWMAP */ 3611 /* page will be invalidated at tx completion 3612 */ 3613 XT_PUTPAGE(mp); 3614 } else { /* COMMIT_WMAP */ 3615 3616 if (mp->lid) 3617 lid_to_tlock(mp->lid)->flag |= 3618 tlckFREELOCK; 3619 3620 /* invalidate parent page */ 3621 discard_metapage(mp); 3622 } 3623 3624 /* parent has become empty and freed: 3625 * go back up to its parent page 3626 */ 3627 /* freed = 1; */ 3628 goto getParent; 3629 } 3630 } 3631 /* 3632 * parent page still has entries for front region; 3633 */ 3634 else { 3635 /* try truncate region covered by preceding entry 3636 * (process backward) 3637 */ 3638 index--; 3639 3640 /* go back down to the child page corresponding 3641 * to the entry 3642 */ 3643 goto getChild; 3644 } 3645 3646 /* 3647 * internal page: go down to child page of current entry 3648 */ 3649 getChild: 3650 /* save current parent entry for the child page */ 3651 if (BT_STACK_FULL(&btstack)) { 3652 jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); 3653 XT_PUTPAGE(mp); 3654 return -EIO; 3655 } 3656 BT_PUSH(&btstack, bn, index); 3657 3658 /* get child page */ 3659 xad = &p->xad[index]; 3660 bn = addressXAD(xad); 3661 3662 /* 3663 * first access of each internal entry: 3664 */ 3665 /* release parent page */ 3666 XT_PUTPAGE(mp); 3667 3668 /* process the child page */ 3669 goto getPage; 3670 3671 out: 3672 /* 3673 * update file resource stat 3674 */ 3675 /* set size 3676 */ 3677 if (S_ISDIR(ip->i_mode) && !newsize) 3678 ip->i_size = 1; /* fsck hates zero-length directories */ 3679 else 3680 ip->i_size = newsize; 3681 3682 /* update quota allocation to reflect freed blocks */ 3683 vfs_dq_free_block(ip, nfreed); 3684 3685 /* 3686 * free tlock of invalidated pages 3687 */ 3688 if (flag == COMMIT_WMAP) 3689 txFreelock(ip); 3690 3691 return newsize; 3692 } 3693 3694 3695 /* 3696 * xtTruncate_pmap() 3697 * 3698 * function: 3699 * Perform truncate to zero length for deleted file, leaving the 3700 * the xtree and working map untouched. This allows the file to 3701 * be accessed via open file handles, while the delete of the file 3702 * is committed to disk. 3703 * 3704 * parameter: 3705 * tid_t tid, 3706 * struct inode *ip, 3707 * s64 committed_size) 3708 * 3709 * return: new committed size 3710 * 3711 * note: 3712 * 3713 * To avoid deadlock by holding too many transaction locks, the 3714 * truncation may be broken up into multiple transactions. 3715 * The committed_size keeps track of part of the file has been 3716 * freed from the pmaps. 3717 */ 3718 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) 3719 { 3720 s64 bn; 3721 struct btstack btstack; 3722 int cmp; 3723 int index; 3724 int locked_leaves = 0; 3725 struct metapage *mp; 3726 xtpage_t *p; 3727 struct btframe *parent; 3728 int rc; 3729 struct tblock *tblk; 3730 struct tlock *tlck = NULL; 3731 xad_t *xad; 3732 int xlen; 3733 s64 xoff; 3734 struct xtlock *xtlck = NULL; 3735 3736 /* save object truncation type */ 3737 tblk = tid_to_tblock(tid); 3738 tblk->xflag |= COMMIT_PMAP; 3739 3740 /* clear stack */ 3741 BT_CLR(&btstack); 3742 3743 if (committed_size) { 3744 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1; 3745 rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0); 3746 if (rc) 3747 return rc; 3748 3749 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3750 3751 if (cmp != 0) { 3752 XT_PUTPAGE(mp); 3753 jfs_error(ip->i_sb, 3754 "xtTruncate_pmap: did not find extent"); 3755 return -EIO; 3756 } 3757 } else { 3758 /* 3759 * start with root 3760 * 3761 * root resides in the inode 3762 */ 3763 bn = 0; 3764 3765 /* 3766 * first access of each page: 3767 */ 3768 getPage: 3769 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3770 if (rc) 3771 return rc; 3772 3773 /* process entries backward from last index */ 3774 index = le16_to_cpu(p->header.nextindex) - 1; 3775 3776 if (p->header.flag & BT_INTERNAL) 3777 goto getChild; 3778 } 3779 3780 /* 3781 * leaf page 3782 */ 3783 3784 if (++locked_leaves > MAX_TRUNCATE_LEAVES) { 3785 /* 3786 * We need to limit the size of the transaction 3787 * to avoid exhausting pagecache & tlocks 3788 */ 3789 xad = &p->xad[index]; 3790 xoff = offsetXAD(xad); 3791 xlen = lengthXAD(xad); 3792 XT_PUTPAGE(mp); 3793 return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; 3794 } 3795 tlck = txLock(tid, ip, mp, tlckXTREE); 3796 tlck->type = tlckXTREE | tlckFREE; 3797 xtlck = (struct xtlock *) & tlck->lock; 3798 xtlck->hwm.offset = index; 3799 3800 3801 XT_PUTPAGE(mp); 3802 3803 /* 3804 * go back up to the parent page 3805 */ 3806 getParent: 3807 /* pop/restore parent entry for the current child page */ 3808 if ((parent = BT_POP(&btstack)) == NULL) 3809 /* current page must have been root */ 3810 goto out; 3811 3812 /* get back the parent page */ 3813 bn = parent->bn; 3814 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3815 if (rc) 3816 return rc; 3817 3818 index = parent->index; 3819 3820 /* 3821 * parent page become empty: free the page 3822 */ 3823 if (index == XTENTRYSTART) { 3824 /* txCommit() with tlckFREE: 3825 * free child extents covered by parent; 3826 * invalidate parent if COMMIT_PWMAP; 3827 */ 3828 tlck = txLock(tid, ip, mp, tlckXTREE); 3829 xtlck = (struct xtlock *) & tlck->lock; 3830 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; 3831 tlck->type = tlckXTREE | tlckFREE; 3832 3833 XT_PUTPAGE(mp); 3834 3835 if (p->header.flag & BT_ROOT) { 3836 3837 goto out; 3838 } else { 3839 goto getParent; 3840 } 3841 } 3842 /* 3843 * parent page still has entries for front region; 3844 */ 3845 else 3846 index--; 3847 /* 3848 * internal page: go down to child page of current entry 3849 */ 3850 getChild: 3851 /* save current parent entry for the child page */ 3852 if (BT_STACK_FULL(&btstack)) { 3853 jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); 3854 XT_PUTPAGE(mp); 3855 return -EIO; 3856 } 3857 BT_PUSH(&btstack, bn, index); 3858 3859 /* get child page */ 3860 xad = &p->xad[index]; 3861 bn = addressXAD(xad); 3862 3863 /* 3864 * first access of each internal entry: 3865 */ 3866 /* release parent page */ 3867 XT_PUTPAGE(mp); 3868 3869 /* process the child page */ 3870 goto getPage; 3871 3872 out: 3873 3874 return 0; 3875 } 3876 3877 #ifdef CONFIG_JFS_STATISTICS 3878 static int jfs_xtstat_proc_show(struct seq_file *m, void *v) 3879 { 3880 seq_printf(m, 3881 "JFS Xtree statistics\n" 3882 "====================\n" 3883 "searches = %d\n" 3884 "fast searches = %d\n" 3885 "splits = %d\n", 3886 xtStat.search, 3887 xtStat.fastSearch, 3888 xtStat.split); 3889 return 0; 3890 } 3891 3892 static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) 3893 { 3894 return single_open(file, jfs_xtstat_proc_show, NULL); 3895 } 3896 3897 const struct file_operations jfs_xtstat_proc_fops = { 3898 .owner = THIS_MODULE, 3899 .open = jfs_xtstat_proc_open, 3900 .read = seq_read, 3901 .llseek = seq_lseek, 3902 .release = single_release, 3903 }; 3904 #endif 3905