1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_dtree.c: directory B+-tree manager 21 * 22 * B+-tree with variable length key directory: 23 * 24 * each directory page is structured as an array of 32-byte 25 * directory entry slots initialized as a freelist 26 * to avoid search/compaction of free space at insertion. 27 * when an entry is inserted, a number of slots are allocated 28 * from the freelist as required to store variable length data 29 * of the entry; when the entry is deleted, slots of the entry 30 * are returned to freelist. 31 * 32 * leaf entry stores full name as key and file serial number 33 * (aka inode number) as data. 34 * internal/router entry stores sufffix compressed name 35 * as key and simple extent descriptor as data. 36 * 37 * each directory page maintains a sorted entry index table 38 * which stores the start slot index of sorted entries 39 * to allow binary search on the table. 40 * 41 * directory starts as a root/leaf page in on-disk inode 42 * inline data area. 43 * when it becomes full, it starts a leaf of a external extent 44 * of length of 1 block. each time the first leaf becomes full, 45 * it is extended rather than split (its size is doubled), 46 * until its length becoms 4 KBytes, from then the extent is split 47 * with new 4 Kbyte extent when it becomes full 48 * to reduce external fragmentation of small directories. 49 * 50 * blah, blah, blah, for linear scan of directory in pieces by 51 * readdir(). 52 * 53 * 54 * case-insensitive directory file system 55 * 56 * names are stored in case-sensitive way in leaf entry. 57 * but stored, searched and compared in case-insensitive (uppercase) order 58 * (i.e., both search key and entry key are folded for search/compare): 59 * (note that case-sensitive order is BROKEN in storage, e.g., 60 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 61 * 62 * entries which folds to the same key makes up a equivalent class 63 * whose members are stored as contiguous cluster (may cross page boundary) 64 * but whose order is arbitrary and acts as duplicate, e.g., 65 * abc, Abc, aBc, abC) 66 * 67 * once match is found at leaf, requires scan forward/backward 68 * either for, in case-insensitive search, duplicate 69 * or for, in case-sensitive search, for exact match 70 * 71 * router entry must be created/stored in case-insensitive way 72 * in internal entry: 73 * (right most key of left page and left most key of right page 74 * are folded, and its suffix compression is propagated as router 75 * key in parent) 76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 77 * should be made the router key for the split) 78 * 79 * case-insensitive search: 80 * 81 * fold search key; 82 * 83 * case-insensitive search of B-tree: 84 * for internal entry, router key is already folded; 85 * for leaf entry, fold the entry key before comparison. 86 * 87 * if (leaf entry case-insensitive match found) 88 * if (next entry satisfies case-insensitive match) 89 * return EDUPLICATE; 90 * if (prev entry satisfies case-insensitive match) 91 * return EDUPLICATE; 92 * return match; 93 * else 94 * return no match; 95 * 96 * serialization: 97 * target directory inode lock is being held on entry/exit 98 * of all main directory service routines. 99 * 100 * log based recovery: 101 */ 102 103 #include <linux/fs.h> 104 #include <linux/quotaops.h> 105 #include "jfs_incore.h" 106 #include "jfs_superblock.h" 107 #include "jfs_filsys.h" 108 #include "jfs_metapage.h" 109 #include "jfs_dmap.h" 110 #include "jfs_unicode.h" 111 #include "jfs_debug.h" 112 113 /* dtree split parameter */ 114 struct dtsplit { 115 struct metapage *mp; 116 s16 index; 117 s16 nslot; 118 struct component_name *key; 119 ddata_t *data; 120 struct pxdlist *pxdlist; 121 }; 122 123 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 124 125 /* get page buffer for specified block address */ 126 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ 127 {\ 128 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ 129 if (!(RC))\ 130 {\ 131 if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ 132 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ 133 {\ 134 BT_PUTPAGE(MP);\ 135 jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ 136 MP = NULL;\ 137 RC = -EIO;\ 138 }\ 139 }\ 140 } 141 142 /* for consistency */ 143 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 144 145 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 146 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 147 148 /* 149 * forward references 150 */ 151 static int dtSplitUp(tid_t tid, struct inode *ip, 152 struct dtsplit * split, struct btstack * btstack); 153 154 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 155 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 156 157 static int dtExtendPage(tid_t tid, struct inode *ip, 158 struct dtsplit * split, struct btstack * btstack); 159 160 static int dtSplitRoot(tid_t tid, struct inode *ip, 161 struct dtsplit * split, struct metapage ** rmpp); 162 163 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 164 dtpage_t * fp, struct btstack * btstack); 165 166 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 167 168 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 169 170 static int dtReadNext(struct inode *ip, 171 loff_t * offset, struct btstack * btstack); 172 173 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 174 175 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 176 int flag); 177 178 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 179 int flag); 180 181 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 182 int ri, struct component_name * key, int flag); 183 184 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 185 ddata_t * data, struct dt_lock **); 186 187 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 188 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 189 int do_index); 190 191 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 192 193 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 194 195 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 196 197 #define ciToUpper(c) UniStrupr((c)->name) 198 199 /* 200 * read_index_page() 201 * 202 * Reads a page of a directory's index table. 203 * Having metadata mapped into the directory inode's address space 204 * presents a multitude of problems. We avoid this by mapping to 205 * the absolute address space outside of the *_metapage routines 206 */ 207 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 208 { 209 int rc; 210 s64 xaddr; 211 int xflag; 212 s32 xlen; 213 214 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 215 if (rc || (xlen == 0)) 216 return NULL; 217 218 return read_metapage(inode, xaddr, PSIZE, 1); 219 } 220 221 /* 222 * get_index_page() 223 * 224 * Same as get_index_page(), but get's a new page without reading 225 */ 226 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 227 { 228 int rc; 229 s64 xaddr; 230 int xflag; 231 s32 xlen; 232 233 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 234 if (rc || (xlen == 0)) 235 return NULL; 236 237 return get_metapage(inode, xaddr, PSIZE, 1); 238 } 239 240 /* 241 * find_index() 242 * 243 * Returns dtree page containing directory table entry for specified 244 * index and pointer to its entry. 245 * 246 * mp must be released by caller. 247 */ 248 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 249 struct metapage ** mp, s64 *lblock) 250 { 251 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 252 s64 blkno; 253 s64 offset; 254 int page_offset; 255 struct dir_table_slot *slot; 256 static int maxWarnings = 10; 257 258 if (index < 2) { 259 if (maxWarnings) { 260 jfs_warn("find_entry called with index = %d", index); 261 maxWarnings--; 262 } 263 return NULL; 264 } 265 266 if (index >= jfs_ip->next_index) { 267 jfs_warn("find_entry called with index >= next_index"); 268 return NULL; 269 } 270 271 if (jfs_dirtable_inline(ip)) { 272 /* 273 * Inline directory table 274 */ 275 *mp = NULL; 276 slot = &jfs_ip->i_dirtable[index - 2]; 277 } else { 278 offset = (index - 2) * sizeof(struct dir_table_slot); 279 page_offset = offset & (PSIZE - 1); 280 blkno = ((offset + 1) >> L2PSIZE) << 281 JFS_SBI(ip->i_sb)->l2nbperpage; 282 283 if (*mp && (*lblock != blkno)) { 284 release_metapage(*mp); 285 *mp = NULL; 286 } 287 if (*mp == 0) { 288 *lblock = blkno; 289 *mp = read_index_page(ip, blkno); 290 } 291 if (*mp == 0) { 292 jfs_err("free_index: error reading directory table"); 293 return NULL; 294 } 295 296 slot = 297 (struct dir_table_slot *) ((char *) (*mp)->data + 298 page_offset); 299 } 300 return slot; 301 } 302 303 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 304 u32 index) 305 { 306 struct tlock *tlck; 307 struct linelock *llck; 308 struct lv *lv; 309 310 tlck = txLock(tid, ip, mp, tlckDATA); 311 llck = (struct linelock *) tlck->lock; 312 313 if (llck->index >= llck->maxcnt) 314 llck = txLinelock(llck); 315 lv = &llck->lv[llck->index]; 316 317 /* 318 * Linelock slot size is twice the size of directory table 319 * slot size. 512 entries per page. 320 */ 321 lv->offset = ((index - 2) & 511) >> 1; 322 lv->length = 1; 323 llck->index++; 324 } 325 326 /* 327 * add_index() 328 * 329 * Adds an entry to the directory index table. This is used to provide 330 * each directory entry with a persistent index in which to resume 331 * directory traversals 332 */ 333 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 334 { 335 struct super_block *sb = ip->i_sb; 336 struct jfs_sb_info *sbi = JFS_SBI(sb); 337 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 338 u64 blkno; 339 struct dir_table_slot *dirtab_slot; 340 u32 index; 341 struct linelock *llck; 342 struct lv *lv; 343 struct metapage *mp; 344 s64 offset; 345 uint page_offset; 346 struct tlock *tlck; 347 s64 xaddr; 348 349 ASSERT(DO_INDEX(ip)); 350 351 if (jfs_ip->next_index < 2) { 352 jfs_warn("add_index: next_index = %d. Resetting!", 353 jfs_ip->next_index); 354 jfs_ip->next_index = 2; 355 } 356 357 index = jfs_ip->next_index++; 358 359 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 360 /* 361 * i_size reflects size of index table, or 8 bytes per entry. 362 */ 363 ip->i_size = (loff_t) (index - 1) << 3; 364 365 /* 366 * dir table fits inline within inode 367 */ 368 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 369 dirtab_slot->flag = DIR_INDEX_VALID; 370 dirtab_slot->slot = slot; 371 DTSaddress(dirtab_slot, bn); 372 373 set_cflag(COMMIT_Dirtable, ip); 374 375 return index; 376 } 377 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 378 struct dir_table_slot temp_table[12]; 379 380 /* 381 * It's time to move the inline table to an external 382 * page and begin to build the xtree 383 */ 384 if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage) || 385 dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) 386 goto clean_up; /* No space */ 387 388 /* 389 * Save the table, we're going to overwrite it with the 390 * xtree root 391 */ 392 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 393 394 /* 395 * Initialize empty x-tree 396 */ 397 xtInitRoot(tid, ip); 398 399 /* 400 * Allocate the first block & add it to the xtree 401 */ 402 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 403 /* This really shouldn't fail */ 404 jfs_warn("add_index: xtInsert failed!"); 405 memcpy(&jfs_ip->i_dirtable, temp_table, 406 sizeof (temp_table)); 407 goto clean_up; 408 } 409 ip->i_size = PSIZE; 410 411 if ((mp = get_index_page(ip, 0)) == 0) { 412 jfs_err("add_index: get_metapage failed!"); 413 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 414 memcpy(&jfs_ip->i_dirtable, temp_table, 415 sizeof (temp_table)); 416 goto clean_up; 417 } 418 tlck = txLock(tid, ip, mp, tlckDATA); 419 llck = (struct linelock *) & tlck->lock; 420 ASSERT(llck->index == 0); 421 lv = &llck->lv[0]; 422 423 lv->offset = 0; 424 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 425 llck->index++; 426 427 memcpy(mp->data, temp_table, sizeof(temp_table)); 428 429 mark_metapage_dirty(mp); 430 release_metapage(mp); 431 432 /* 433 * Logging is now directed by xtree tlocks 434 */ 435 clear_cflag(COMMIT_Dirtable, ip); 436 } 437 438 offset = (index - 2) * sizeof(struct dir_table_slot); 439 page_offset = offset & (PSIZE - 1); 440 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 441 if (page_offset == 0) { 442 /* 443 * This will be the beginning of a new page 444 */ 445 xaddr = 0; 446 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 447 jfs_warn("add_index: xtInsert failed!"); 448 goto clean_up; 449 } 450 ip->i_size += PSIZE; 451 452 if ((mp = get_index_page(ip, blkno))) 453 memset(mp->data, 0, PSIZE); /* Just looks better */ 454 else 455 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 456 } else 457 mp = read_index_page(ip, blkno); 458 459 if (mp == 0) { 460 jfs_err("add_index: get/read_metapage failed!"); 461 goto clean_up; 462 } 463 464 lock_index(tid, ip, mp, index); 465 466 dirtab_slot = 467 (struct dir_table_slot *) ((char *) mp->data + page_offset); 468 dirtab_slot->flag = DIR_INDEX_VALID; 469 dirtab_slot->slot = slot; 470 DTSaddress(dirtab_slot, bn); 471 472 mark_metapage_dirty(mp); 473 release_metapage(mp); 474 475 return index; 476 477 clean_up: 478 479 jfs_ip->next_index--; 480 481 return 0; 482 } 483 484 /* 485 * free_index() 486 * 487 * Marks an entry to the directory index table as free. 488 */ 489 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 490 { 491 struct dir_table_slot *dirtab_slot; 492 s64 lblock; 493 struct metapage *mp = NULL; 494 495 dirtab_slot = find_index(ip, index, &mp, &lblock); 496 497 if (dirtab_slot == 0) 498 return; 499 500 dirtab_slot->flag = DIR_INDEX_FREE; 501 dirtab_slot->slot = dirtab_slot->addr1 = 0; 502 dirtab_slot->addr2 = cpu_to_le32(next); 503 504 if (mp) { 505 lock_index(tid, ip, mp, index); 506 mark_metapage_dirty(mp); 507 release_metapage(mp); 508 } else 509 set_cflag(COMMIT_Dirtable, ip); 510 } 511 512 /* 513 * modify_index() 514 * 515 * Changes an entry in the directory index table 516 */ 517 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 518 int slot, struct metapage ** mp, u64 *lblock) 519 { 520 struct dir_table_slot *dirtab_slot; 521 522 dirtab_slot = find_index(ip, index, mp, lblock); 523 524 if (dirtab_slot == 0) 525 return; 526 527 DTSaddress(dirtab_slot, bn); 528 dirtab_slot->slot = slot; 529 530 if (*mp) { 531 lock_index(tid, ip, *mp, index); 532 mark_metapage_dirty(*mp); 533 } else 534 set_cflag(COMMIT_Dirtable, ip); 535 } 536 537 /* 538 * read_index() 539 * 540 * reads a directory table slot 541 */ 542 static int read_index(struct inode *ip, u32 index, 543 struct dir_table_slot * dirtab_slot) 544 { 545 s64 lblock; 546 struct metapage *mp = NULL; 547 struct dir_table_slot *slot; 548 549 slot = find_index(ip, index, &mp, &lblock); 550 if (slot == 0) { 551 return -EIO; 552 } 553 554 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 555 556 if (mp) 557 release_metapage(mp); 558 559 return 0; 560 } 561 562 /* 563 * dtSearch() 564 * 565 * function: 566 * Search for the entry with specified key 567 * 568 * parameter: 569 * 570 * return: 0 - search result on stack, leaf page pinned; 571 * errno - I/O error 572 */ 573 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 574 struct btstack * btstack, int flag) 575 { 576 int rc = 0; 577 int cmp = 1; /* init for empty page */ 578 s64 bn; 579 struct metapage *mp; 580 dtpage_t *p; 581 s8 *stbl; 582 int base, index, lim; 583 struct btframe *btsp; 584 pxd_t *pxd; 585 int psize = 288; /* initial in-line directory */ 586 ino_t inumber; 587 struct component_name ciKey; 588 struct super_block *sb = ip->i_sb; 589 590 ciKey.name = 591 (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 592 GFP_NOFS); 593 if (ciKey.name == 0) { 594 rc = -ENOMEM; 595 goto dtSearch_Exit2; 596 } 597 598 599 /* uppercase search key for c-i directory */ 600 UniStrcpy(ciKey.name, key->name); 601 ciKey.namlen = key->namlen; 602 603 /* only uppercase if case-insensitive support is on */ 604 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 605 ciToUpper(&ciKey); 606 } 607 BT_CLR(btstack); /* reset stack */ 608 609 /* init level count for max pages to split */ 610 btstack->nsplit = 1; 611 612 /* 613 * search down tree from root: 614 * 615 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 616 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 617 * 618 * if entry with search key K is not found 619 * internal page search find the entry with largest key Ki 620 * less than K which point to the child page to search; 621 * leaf page search find the entry with smallest key Kj 622 * greater than K so that the returned index is the position of 623 * the entry to be shifted right for insertion of new entry. 624 * for empty tree, search key is greater than any key of the tree. 625 * 626 * by convention, root bn = 0. 627 */ 628 for (bn = 0;;) { 629 /* get/pin the page to search */ 630 DT_GETPAGE(ip, bn, mp, psize, p, rc); 631 if (rc) 632 goto dtSearch_Exit1; 633 634 /* get sorted entry table of the page */ 635 stbl = DT_GETSTBL(p); 636 637 /* 638 * binary search with search key K on the current page. 639 */ 640 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 641 index = base + (lim >> 1); 642 643 if (p->header.flag & BT_LEAF) { 644 /* uppercase leaf name to compare */ 645 cmp = 646 ciCompare(&ciKey, p, stbl[index], 647 JFS_SBI(sb)->mntflag); 648 } else { 649 /* router key is in uppercase */ 650 651 cmp = dtCompare(&ciKey, p, stbl[index]); 652 653 654 } 655 if (cmp == 0) { 656 /* 657 * search hit 658 */ 659 /* search hit - leaf page: 660 * return the entry found 661 */ 662 if (p->header.flag & BT_LEAF) { 663 inumber = le32_to_cpu( 664 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 665 666 /* 667 * search for JFS_LOOKUP 668 */ 669 if (flag == JFS_LOOKUP) { 670 *data = inumber; 671 rc = 0; 672 goto out; 673 } 674 675 /* 676 * search for JFS_CREATE 677 */ 678 if (flag == JFS_CREATE) { 679 *data = inumber; 680 rc = -EEXIST; 681 goto out; 682 } 683 684 /* 685 * search for JFS_REMOVE or JFS_RENAME 686 */ 687 if ((flag == JFS_REMOVE || 688 flag == JFS_RENAME) && 689 *data != inumber) { 690 rc = -ESTALE; 691 goto out; 692 } 693 694 /* 695 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 696 */ 697 /* save search result */ 698 *data = inumber; 699 btsp = btstack->top; 700 btsp->bn = bn; 701 btsp->index = index; 702 btsp->mp = mp; 703 704 rc = 0; 705 goto dtSearch_Exit1; 706 } 707 708 /* search hit - internal page: 709 * descend/search its child page 710 */ 711 goto getChild; 712 } 713 714 if (cmp > 0) { 715 base = index + 1; 716 --lim; 717 } 718 } 719 720 /* 721 * search miss 722 * 723 * base is the smallest index with key (Kj) greater than 724 * search key (K) and may be zero or (maxindex + 1) index. 725 */ 726 /* 727 * search miss - leaf page 728 * 729 * return location of entry (base) where new entry with 730 * search key K is to be inserted. 731 */ 732 if (p->header.flag & BT_LEAF) { 733 /* 734 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 735 */ 736 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 737 flag == JFS_RENAME) { 738 rc = -ENOENT; 739 goto out; 740 } 741 742 /* 743 * search for JFS_CREATE|JFS_FINDDIR: 744 * 745 * save search result 746 */ 747 *data = 0; 748 btsp = btstack->top; 749 btsp->bn = bn; 750 btsp->index = base; 751 btsp->mp = mp; 752 753 rc = 0; 754 goto dtSearch_Exit1; 755 } 756 757 /* 758 * search miss - internal page 759 * 760 * if base is non-zero, decrement base by one to get the parent 761 * entry of the child page to search. 762 */ 763 index = base ? base - 1 : base; 764 765 /* 766 * go down to child page 767 */ 768 getChild: 769 /* update max. number of pages to split */ 770 if (BT_STACK_FULL(btstack)) { 771 /* Something's corrupted, mark filesytem dirty so 772 * chkdsk will fix it. 773 */ 774 jfs_error(sb, "stack overrun in dtSearch!"); 775 BT_STACK_DUMP(btstack); 776 rc = -EIO; 777 goto out; 778 } 779 btstack->nsplit++; 780 781 /* push (bn, index) of the parent page/entry */ 782 BT_PUSH(btstack, bn, index); 783 784 /* get the child page block number */ 785 pxd = (pxd_t *) & p->slot[stbl[index]]; 786 bn = addressPXD(pxd); 787 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 788 789 /* unpin the parent page */ 790 DT_PUTPAGE(mp); 791 } 792 793 out: 794 DT_PUTPAGE(mp); 795 796 dtSearch_Exit1: 797 798 kfree(ciKey.name); 799 800 dtSearch_Exit2: 801 802 return rc; 803 } 804 805 806 /* 807 * dtInsert() 808 * 809 * function: insert an entry to directory tree 810 * 811 * parameter: 812 * 813 * return: 0 - success; 814 * errno - failure; 815 */ 816 int dtInsert(tid_t tid, struct inode *ip, 817 struct component_name * name, ino_t * fsn, struct btstack * btstack) 818 { 819 int rc = 0; 820 struct metapage *mp; /* meta-page buffer */ 821 dtpage_t *p; /* base B+-tree index page */ 822 s64 bn; 823 int index; 824 struct dtsplit split; /* split information */ 825 ddata_t data; 826 struct dt_lock *dtlck; 827 int n; 828 struct tlock *tlck; 829 struct lv *lv; 830 831 /* 832 * retrieve search result 833 * 834 * dtSearch() returns (leaf page pinned, index at which to insert). 835 * n.b. dtSearch() may return index of (maxindex + 1) of 836 * the full page. 837 */ 838 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 839 840 /* 841 * insert entry for new key 842 */ 843 if (DO_INDEX(ip)) { 844 if (JFS_IP(ip)->next_index == DIREND) { 845 DT_PUTPAGE(mp); 846 return -EMLINK; 847 } 848 n = NDTLEAF(name->namlen); 849 data.leaf.tid = tid; 850 data.leaf.ip = ip; 851 } else { 852 n = NDTLEAF_LEGACY(name->namlen); 853 data.leaf.ip = NULL; /* signifies legacy directory format */ 854 } 855 data.leaf.ino = *fsn; 856 857 /* 858 * leaf page does not have enough room for new entry: 859 * 860 * extend/split the leaf page; 861 * 862 * dtSplitUp() will insert the entry and unpin the leaf page. 863 */ 864 if (n > p->header.freecnt) { 865 split.mp = mp; 866 split.index = index; 867 split.nslot = n; 868 split.key = name; 869 split.data = &data; 870 rc = dtSplitUp(tid, ip, &split, btstack); 871 return rc; 872 } 873 874 /* 875 * leaf page does have enough room for new entry: 876 * 877 * insert the new data entry into the leaf page; 878 */ 879 BT_MARK_DIRTY(mp, ip); 880 /* 881 * acquire a transaction lock on the leaf page 882 */ 883 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 884 dtlck = (struct dt_lock *) & tlck->lock; 885 ASSERT(dtlck->index == 0); 886 lv = & dtlck->lv[0]; 887 888 /* linelock header */ 889 lv->offset = 0; 890 lv->length = 1; 891 dtlck->index++; 892 893 dtInsertEntry(p, index, name, &data, &dtlck); 894 895 /* linelock stbl of non-root leaf page */ 896 if (!(p->header.flag & BT_ROOT)) { 897 if (dtlck->index >= dtlck->maxcnt) 898 dtlck = (struct dt_lock *) txLinelock(dtlck); 899 lv = & dtlck->lv[dtlck->index]; 900 n = index >> L2DTSLOTSIZE; 901 lv->offset = p->header.stblindex + n; 902 lv->length = 903 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 904 dtlck->index++; 905 } 906 907 /* unpin the leaf page */ 908 DT_PUTPAGE(mp); 909 910 return 0; 911 } 912 913 914 /* 915 * dtSplitUp() 916 * 917 * function: propagate insertion bottom up; 918 * 919 * parameter: 920 * 921 * return: 0 - success; 922 * errno - failure; 923 * leaf page unpinned; 924 */ 925 static int dtSplitUp(tid_t tid, 926 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 927 { 928 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 929 int rc = 0; 930 struct metapage *smp; 931 dtpage_t *sp; /* split page */ 932 struct metapage *rmp; 933 dtpage_t *rp; /* new right page split from sp */ 934 pxd_t rpxd; /* new right page extent descriptor */ 935 struct metapage *lmp; 936 dtpage_t *lp; /* left child page */ 937 int skip; /* index of entry of insertion */ 938 struct btframe *parent; /* parent page entry on traverse stack */ 939 s64 xaddr, nxaddr; 940 int xlen, xsize; 941 struct pxdlist pxdlist; 942 pxd_t *pxd; 943 struct component_name key = { 0, NULL }; 944 ddata_t *data = split->data; 945 int n; 946 struct dt_lock *dtlck; 947 struct tlock *tlck; 948 struct lv *lv; 949 int quota_allocation = 0; 950 951 /* get split page */ 952 smp = split->mp; 953 sp = DT_PAGE(ip, smp); 954 955 key.name = 956 (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), 957 GFP_NOFS); 958 if (key.name == 0) { 959 DT_PUTPAGE(smp); 960 rc = -ENOMEM; 961 goto dtSplitUp_Exit; 962 } 963 964 /* 965 * split leaf page 966 * 967 * The split routines insert the new entry, and 968 * acquire txLock as appropriate. 969 */ 970 /* 971 * split root leaf page: 972 */ 973 if (sp->header.flag & BT_ROOT) { 974 /* 975 * allocate a single extent child page 976 */ 977 xlen = 1; 978 n = sbi->bsize >> L2DTSLOTSIZE; 979 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 980 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 981 if (n <= split->nslot) 982 xlen++; 983 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 984 DT_PUTPAGE(smp); 985 goto freeKeyName; 986 } 987 988 pxdlist.maxnpxd = 1; 989 pxdlist.npxd = 0; 990 pxd = &pxdlist.pxd[0]; 991 PXDaddress(pxd, xaddr); 992 PXDlength(pxd, xlen); 993 split->pxdlist = &pxdlist; 994 rc = dtSplitRoot(tid, ip, split, &rmp); 995 996 if (rc) 997 dbFree(ip, xaddr, xlen); 998 else 999 DT_PUTPAGE(rmp); 1000 1001 DT_PUTPAGE(smp); 1002 1003 goto freeKeyName; 1004 } 1005 1006 /* 1007 * extend first leaf page 1008 * 1009 * extend the 1st extent if less than buffer page size 1010 * (dtExtendPage() reurns leaf page unpinned) 1011 */ 1012 pxd = &sp->header.self; 1013 xlen = lengthPXD(pxd); 1014 xsize = xlen << sbi->l2bsize; 1015 if (xsize < PSIZE) { 1016 xaddr = addressPXD(pxd); 1017 n = xsize >> L2DTSLOTSIZE; 1018 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1019 if ((n + sp->header.freecnt) <= split->nslot) 1020 n = xlen + (xlen << 1); 1021 else 1022 n = xlen; 1023 1024 /* Allocate blocks to quota. */ 1025 if (DQUOT_ALLOC_BLOCK(ip, n)) { 1026 rc = -EDQUOT; 1027 goto extendOut; 1028 } 1029 quota_allocation += n; 1030 1031 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1032 (s64) n, &nxaddr))) 1033 goto extendOut; 1034 1035 pxdlist.maxnpxd = 1; 1036 pxdlist.npxd = 0; 1037 pxd = &pxdlist.pxd[0]; 1038 PXDaddress(pxd, nxaddr) 1039 PXDlength(pxd, xlen + n); 1040 split->pxdlist = &pxdlist; 1041 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1042 nxaddr = addressPXD(pxd); 1043 if (xaddr != nxaddr) { 1044 /* free relocated extent */ 1045 xlen = lengthPXD(pxd); 1046 dbFree(ip, nxaddr, (s64) xlen); 1047 } else { 1048 /* free extended delta */ 1049 xlen = lengthPXD(pxd) - n; 1050 xaddr = addressPXD(pxd) + xlen; 1051 dbFree(ip, xaddr, (s64) n); 1052 } 1053 } 1054 1055 extendOut: 1056 DT_PUTPAGE(smp); 1057 goto freeKeyName; 1058 } 1059 1060 /* 1061 * split leaf page <sp> into <sp> and a new right page <rp>. 1062 * 1063 * return <rp> pinned and its extent descriptor <rpxd> 1064 */ 1065 /* 1066 * allocate new directory page extent and 1067 * new index page(s) to cover page split(s) 1068 * 1069 * allocation hint: ? 1070 */ 1071 n = btstack->nsplit; 1072 pxdlist.maxnpxd = pxdlist.npxd = 0; 1073 xlen = sbi->nbperpage; 1074 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1075 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1076 PXDaddress(pxd, xaddr); 1077 PXDlength(pxd, xlen); 1078 pxdlist.maxnpxd++; 1079 continue; 1080 } 1081 1082 DT_PUTPAGE(smp); 1083 1084 /* undo allocation */ 1085 goto splitOut; 1086 } 1087 1088 split->pxdlist = &pxdlist; 1089 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1090 DT_PUTPAGE(smp); 1091 1092 /* undo allocation */ 1093 goto splitOut; 1094 } 1095 1096 /* 1097 * propagate up the router entry for the leaf page just split 1098 * 1099 * insert a router entry for the new page into the parent page, 1100 * propagate the insert/split up the tree by walking back the stack 1101 * of (bn of parent page, index of child page entry in parent page) 1102 * that were traversed during the search for the page that split. 1103 * 1104 * the propagation of insert/split up the tree stops if the root 1105 * splits or the page inserted into doesn't have to split to hold 1106 * the new entry. 1107 * 1108 * the parent entry for the split page remains the same, and 1109 * a new entry is inserted at its right with the first key and 1110 * block number of the new right page. 1111 * 1112 * There are a maximum of 4 pages pinned at any time: 1113 * two children, left parent and right parent (when the parent splits). 1114 * keep the child pages pinned while working on the parent. 1115 * make sure that all pins are released at exit. 1116 */ 1117 while ((parent = BT_POP(btstack)) != NULL) { 1118 /* parent page specified by stack frame <parent> */ 1119 1120 /* keep current child pages (<lp>, <rp>) pinned */ 1121 lmp = smp; 1122 lp = sp; 1123 1124 /* 1125 * insert router entry in parent for new right child page <rp> 1126 */ 1127 /* get the parent page <sp> */ 1128 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1129 if (rc) { 1130 DT_PUTPAGE(lmp); 1131 DT_PUTPAGE(rmp); 1132 goto splitOut; 1133 } 1134 1135 /* 1136 * The new key entry goes ONE AFTER the index of parent entry, 1137 * because the split was to the right. 1138 */ 1139 skip = parent->index + 1; 1140 1141 /* 1142 * compute the key for the router entry 1143 * 1144 * key suffix compression: 1145 * for internal pages that have leaf pages as children, 1146 * retain only what's needed to distinguish between 1147 * the new entry and the entry on the page to its left. 1148 * If the keys compare equal, retain the entire key. 1149 * 1150 * note that compression is performed only at computing 1151 * router key at the lowest internal level. 1152 * further compression of the key between pairs of higher 1153 * level internal pages loses too much information and 1154 * the search may fail. 1155 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1156 * results in two adjacent parent entries (a)(xx). 1157 * if split occurs between these two entries, and 1158 * if compression is applied, the router key of parent entry 1159 * of right page (x) will divert search for x into right 1160 * subtree and miss x in the left subtree.) 1161 * 1162 * the entire key must be retained for the next-to-leftmost 1163 * internal key at any level of the tree, or search may fail 1164 * (e.g., ?) 1165 */ 1166 switch (rp->header.flag & BT_TYPE) { 1167 case BT_LEAF: 1168 /* 1169 * compute the length of prefix for suffix compression 1170 * between last entry of left page and first entry 1171 * of right page 1172 */ 1173 if ((sp->header.flag & BT_ROOT && skip > 1) || 1174 sp->header.prev != 0 || skip > 1) { 1175 /* compute uppercase router prefix key */ 1176 rc = ciGetLeafPrefixKey(lp, 1177 lp->header.nextindex-1, 1178 rp, 0, &key, 1179 sbi->mntflag); 1180 if (rc) { 1181 DT_PUTPAGE(lmp); 1182 DT_PUTPAGE(rmp); 1183 DT_PUTPAGE(smp); 1184 goto splitOut; 1185 } 1186 } else { 1187 /* next to leftmost entry of 1188 lowest internal level */ 1189 1190 /* compute uppercase router key */ 1191 dtGetKey(rp, 0, &key, sbi->mntflag); 1192 key.name[key.namlen] = 0; 1193 1194 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1195 ciToUpper(&key); 1196 } 1197 1198 n = NDTINTERNAL(key.namlen); 1199 break; 1200 1201 case BT_INTERNAL: 1202 dtGetKey(rp, 0, &key, sbi->mntflag); 1203 n = NDTINTERNAL(key.namlen); 1204 break; 1205 1206 default: 1207 jfs_err("dtSplitUp(): UFO!"); 1208 break; 1209 } 1210 1211 /* unpin left child page */ 1212 DT_PUTPAGE(lmp); 1213 1214 /* 1215 * compute the data for the router entry 1216 */ 1217 data->xd = rpxd; /* child page xd */ 1218 1219 /* 1220 * parent page is full - split the parent page 1221 */ 1222 if (n > sp->header.freecnt) { 1223 /* init for parent page split */ 1224 split->mp = smp; 1225 split->index = skip; /* index at insert */ 1226 split->nslot = n; 1227 split->key = &key; 1228 /* split->data = data; */ 1229 1230 /* unpin right child page */ 1231 DT_PUTPAGE(rmp); 1232 1233 /* The split routines insert the new entry, 1234 * acquire txLock as appropriate. 1235 * return <rp> pinned and its block number <rbn>. 1236 */ 1237 rc = (sp->header.flag & BT_ROOT) ? 1238 dtSplitRoot(tid, ip, split, &rmp) : 1239 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1240 if (rc) { 1241 DT_PUTPAGE(smp); 1242 goto splitOut; 1243 } 1244 1245 /* smp and rmp are pinned */ 1246 } 1247 /* 1248 * parent page is not full - insert router entry in parent page 1249 */ 1250 else { 1251 BT_MARK_DIRTY(smp, ip); 1252 /* 1253 * acquire a transaction lock on the parent page 1254 */ 1255 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1256 dtlck = (struct dt_lock *) & tlck->lock; 1257 ASSERT(dtlck->index == 0); 1258 lv = & dtlck->lv[0]; 1259 1260 /* linelock header */ 1261 lv->offset = 0; 1262 lv->length = 1; 1263 dtlck->index++; 1264 1265 /* linelock stbl of non-root parent page */ 1266 if (!(sp->header.flag & BT_ROOT)) { 1267 lv++; 1268 n = skip >> L2DTSLOTSIZE; 1269 lv->offset = sp->header.stblindex + n; 1270 lv->length = 1271 ((sp->header.nextindex - 1272 1) >> L2DTSLOTSIZE) - n + 1; 1273 dtlck->index++; 1274 } 1275 1276 dtInsertEntry(sp, skip, &key, data, &dtlck); 1277 1278 /* exit propagate up */ 1279 break; 1280 } 1281 } 1282 1283 /* unpin current split and its right page */ 1284 DT_PUTPAGE(smp); 1285 DT_PUTPAGE(rmp); 1286 1287 /* 1288 * free remaining extents allocated for split 1289 */ 1290 splitOut: 1291 n = pxdlist.npxd; 1292 pxd = &pxdlist.pxd[n]; 1293 for (; n < pxdlist.maxnpxd; n++, pxd++) 1294 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1295 1296 freeKeyName: 1297 kfree(key.name); 1298 1299 /* Rollback quota allocation */ 1300 if (rc && quota_allocation) 1301 DQUOT_FREE_BLOCK(ip, quota_allocation); 1302 1303 dtSplitUp_Exit: 1304 1305 return rc; 1306 } 1307 1308 1309 /* 1310 * dtSplitPage() 1311 * 1312 * function: Split a non-root page of a btree. 1313 * 1314 * parameter: 1315 * 1316 * return: 0 - success; 1317 * errno - failure; 1318 * return split and new page pinned; 1319 */ 1320 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1321 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1322 { 1323 int rc = 0; 1324 struct metapage *smp; 1325 dtpage_t *sp; 1326 struct metapage *rmp; 1327 dtpage_t *rp; /* new right page allocated */ 1328 s64 rbn; /* new right page block number */ 1329 struct metapage *mp; 1330 dtpage_t *p; 1331 s64 nextbn; 1332 struct pxdlist *pxdlist; 1333 pxd_t *pxd; 1334 int skip, nextindex, half, left, nxt, off, si; 1335 struct ldtentry *ldtentry; 1336 struct idtentry *idtentry; 1337 u8 *stbl; 1338 struct dtslot *f; 1339 int fsi, stblsize; 1340 int n; 1341 struct dt_lock *sdtlck, *rdtlck; 1342 struct tlock *tlck; 1343 struct dt_lock *dtlck; 1344 struct lv *slv, *rlv, *lv; 1345 1346 /* get split page */ 1347 smp = split->mp; 1348 sp = DT_PAGE(ip, smp); 1349 1350 /* 1351 * allocate the new right page for the split 1352 */ 1353 pxdlist = split->pxdlist; 1354 pxd = &pxdlist->pxd[pxdlist->npxd]; 1355 pxdlist->npxd++; 1356 rbn = addressPXD(pxd); 1357 rmp = get_metapage(ip, rbn, PSIZE, 1); 1358 if (rmp == NULL) 1359 return -EIO; 1360 1361 /* Allocate blocks to quota. */ 1362 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1363 release_metapage(rmp); 1364 return -EDQUOT; 1365 } 1366 1367 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1368 1369 BT_MARK_DIRTY(rmp, ip); 1370 /* 1371 * acquire a transaction lock on the new right page 1372 */ 1373 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1374 rdtlck = (struct dt_lock *) & tlck->lock; 1375 1376 rp = (dtpage_t *) rmp->data; 1377 *rpp = rp; 1378 rp->header.self = *pxd; 1379 1380 BT_MARK_DIRTY(smp, ip); 1381 /* 1382 * acquire a transaction lock on the split page 1383 * 1384 * action: 1385 */ 1386 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1387 sdtlck = (struct dt_lock *) & tlck->lock; 1388 1389 /* linelock header of split page */ 1390 ASSERT(sdtlck->index == 0); 1391 slv = & sdtlck->lv[0]; 1392 slv->offset = 0; 1393 slv->length = 1; 1394 sdtlck->index++; 1395 1396 /* 1397 * initialize/update sibling pointers between sp and rp 1398 */ 1399 nextbn = le64_to_cpu(sp->header.next); 1400 rp->header.next = cpu_to_le64(nextbn); 1401 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1402 sp->header.next = cpu_to_le64(rbn); 1403 1404 /* 1405 * initialize new right page 1406 */ 1407 rp->header.flag = sp->header.flag; 1408 1409 /* compute sorted entry table at start of extent data area */ 1410 rp->header.nextindex = 0; 1411 rp->header.stblindex = 1; 1412 1413 n = PSIZE >> L2DTSLOTSIZE; 1414 rp->header.maxslot = n; 1415 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1416 1417 /* init freelist */ 1418 fsi = rp->header.stblindex + stblsize; 1419 rp->header.freelist = fsi; 1420 rp->header.freecnt = rp->header.maxslot - fsi; 1421 1422 /* 1423 * sequential append at tail: append without split 1424 * 1425 * If splitting the last page on a level because of appending 1426 * a entry to it (skip is maxentry), it's likely that the access is 1427 * sequential. Adding an empty page on the side of the level is less 1428 * work and can push the fill factor much higher than normal. 1429 * If we're wrong it's no big deal, we'll just do the split the right 1430 * way next time. 1431 * (It may look like it's equally easy to do a similar hack for 1432 * reverse sorted data, that is, split the tree left, 1433 * but it's not. Be my guest.) 1434 */ 1435 if (nextbn == 0 && split->index == sp->header.nextindex) { 1436 /* linelock header + stbl (first slot) of new page */ 1437 rlv = & rdtlck->lv[rdtlck->index]; 1438 rlv->offset = 0; 1439 rlv->length = 2; 1440 rdtlck->index++; 1441 1442 /* 1443 * initialize freelist of new right page 1444 */ 1445 f = &rp->slot[fsi]; 1446 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1447 f->next = fsi; 1448 f->next = -1; 1449 1450 /* insert entry at the first entry of the new right page */ 1451 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1452 1453 goto out; 1454 } 1455 1456 /* 1457 * non-sequential insert (at possibly middle page) 1458 */ 1459 1460 /* 1461 * update prev pointer of previous right sibling page; 1462 */ 1463 if (nextbn != 0) { 1464 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1465 if (rc) { 1466 discard_metapage(rmp); 1467 return rc; 1468 } 1469 1470 BT_MARK_DIRTY(mp, ip); 1471 /* 1472 * acquire a transaction lock on the next page 1473 */ 1474 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1475 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1476 tlck, ip, mp); 1477 dtlck = (struct dt_lock *) & tlck->lock; 1478 1479 /* linelock header of previous right sibling page */ 1480 lv = & dtlck->lv[dtlck->index]; 1481 lv->offset = 0; 1482 lv->length = 1; 1483 dtlck->index++; 1484 1485 p->header.prev = cpu_to_le64(rbn); 1486 1487 DT_PUTPAGE(mp); 1488 } 1489 1490 /* 1491 * split the data between the split and right pages. 1492 */ 1493 skip = split->index; 1494 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1495 left = 0; 1496 1497 /* 1498 * compute fill factor for split pages 1499 * 1500 * <nxt> traces the next entry to move to rp 1501 * <off> traces the next entry to stay in sp 1502 */ 1503 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1504 nextindex = sp->header.nextindex; 1505 for (nxt = off = 0; nxt < nextindex; ++off) { 1506 if (off == skip) 1507 /* check for fill factor with new entry size */ 1508 n = split->nslot; 1509 else { 1510 si = stbl[nxt]; 1511 switch (sp->header.flag & BT_TYPE) { 1512 case BT_LEAF: 1513 ldtentry = (struct ldtentry *) & sp->slot[si]; 1514 if (DO_INDEX(ip)) 1515 n = NDTLEAF(ldtentry->namlen); 1516 else 1517 n = NDTLEAF_LEGACY(ldtentry-> 1518 namlen); 1519 break; 1520 1521 case BT_INTERNAL: 1522 idtentry = (struct idtentry *) & sp->slot[si]; 1523 n = NDTINTERNAL(idtentry->namlen); 1524 break; 1525 1526 default: 1527 break; 1528 } 1529 1530 ++nxt; /* advance to next entry to move in sp */ 1531 } 1532 1533 left += n; 1534 if (left >= half) 1535 break; 1536 } 1537 1538 /* <nxt> poins to the 1st entry to move */ 1539 1540 /* 1541 * move entries to right page 1542 * 1543 * dtMoveEntry() initializes rp and reserves entry for insertion 1544 * 1545 * split page moved out entries are linelocked; 1546 * new/right page moved in entries are linelocked; 1547 */ 1548 /* linelock header + stbl of new right page */ 1549 rlv = & rdtlck->lv[rdtlck->index]; 1550 rlv->offset = 0; 1551 rlv->length = 5; 1552 rdtlck->index++; 1553 1554 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1555 1556 sp->header.nextindex = nxt; 1557 1558 /* 1559 * finalize freelist of new right page 1560 */ 1561 fsi = rp->header.freelist; 1562 f = &rp->slot[fsi]; 1563 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1564 f->next = fsi; 1565 f->next = -1; 1566 1567 /* 1568 * Update directory index table for entries now in right page 1569 */ 1570 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1571 s64 lblock; 1572 1573 mp = NULL; 1574 stbl = DT_GETSTBL(rp); 1575 for (n = 0; n < rp->header.nextindex; n++) { 1576 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1577 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1578 rbn, n, &mp, &lblock); 1579 } 1580 if (mp) 1581 release_metapage(mp); 1582 } 1583 1584 /* 1585 * the skipped index was on the left page, 1586 */ 1587 if (skip <= off) { 1588 /* insert the new entry in the split page */ 1589 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1590 1591 /* linelock stbl of split page */ 1592 if (sdtlck->index >= sdtlck->maxcnt) 1593 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1594 slv = & sdtlck->lv[sdtlck->index]; 1595 n = skip >> L2DTSLOTSIZE; 1596 slv->offset = sp->header.stblindex + n; 1597 slv->length = 1598 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1599 sdtlck->index++; 1600 } 1601 /* 1602 * the skipped index was on the right page, 1603 */ 1604 else { 1605 /* adjust the skip index to reflect the new position */ 1606 skip -= nxt; 1607 1608 /* insert the new entry in the right page */ 1609 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1610 } 1611 1612 out: 1613 *rmpp = rmp; 1614 *rpxdp = *pxd; 1615 1616 return rc; 1617 } 1618 1619 1620 /* 1621 * dtExtendPage() 1622 * 1623 * function: extend 1st/only directory leaf page 1624 * 1625 * parameter: 1626 * 1627 * return: 0 - success; 1628 * errno - failure; 1629 * return extended page pinned; 1630 */ 1631 static int dtExtendPage(tid_t tid, 1632 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1633 { 1634 struct super_block *sb = ip->i_sb; 1635 int rc; 1636 struct metapage *smp, *pmp, *mp; 1637 dtpage_t *sp, *pp; 1638 struct pxdlist *pxdlist; 1639 pxd_t *pxd, *tpxd; 1640 int xlen, xsize; 1641 int newstblindex, newstblsize; 1642 int oldstblindex, oldstblsize; 1643 int fsi, last; 1644 struct dtslot *f; 1645 struct btframe *parent; 1646 int n; 1647 struct dt_lock *dtlck; 1648 s64 xaddr, txaddr; 1649 struct tlock *tlck; 1650 struct pxd_lock *pxdlock; 1651 struct lv *lv; 1652 uint type; 1653 struct ldtentry *ldtentry; 1654 u8 *stbl; 1655 1656 /* get page to extend */ 1657 smp = split->mp; 1658 sp = DT_PAGE(ip, smp); 1659 1660 /* get parent/root page */ 1661 parent = BT_POP(btstack); 1662 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1663 if (rc) 1664 return (rc); 1665 1666 /* 1667 * extend the extent 1668 */ 1669 pxdlist = split->pxdlist; 1670 pxd = &pxdlist->pxd[pxdlist->npxd]; 1671 pxdlist->npxd++; 1672 1673 xaddr = addressPXD(pxd); 1674 tpxd = &sp->header.self; 1675 txaddr = addressPXD(tpxd); 1676 /* in-place extension */ 1677 if (xaddr == txaddr) { 1678 type = tlckEXTEND; 1679 } 1680 /* relocation */ 1681 else { 1682 type = tlckNEW; 1683 1684 /* save moved extent descriptor for later free */ 1685 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1686 pxdlock = (struct pxd_lock *) & tlck->lock; 1687 pxdlock->flag = mlckFREEPXD; 1688 pxdlock->pxd = sp->header.self; 1689 pxdlock->index = 1; 1690 1691 /* 1692 * Update directory index table to reflect new page address 1693 */ 1694 if (DO_INDEX(ip)) { 1695 s64 lblock; 1696 1697 mp = NULL; 1698 stbl = DT_GETSTBL(sp); 1699 for (n = 0; n < sp->header.nextindex; n++) { 1700 ldtentry = 1701 (struct ldtentry *) & sp->slot[stbl[n]]; 1702 modify_index(tid, ip, 1703 le32_to_cpu(ldtentry->index), 1704 xaddr, n, &mp, &lblock); 1705 } 1706 if (mp) 1707 release_metapage(mp); 1708 } 1709 } 1710 1711 /* 1712 * extend the page 1713 */ 1714 sp->header.self = *pxd; 1715 1716 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1717 1718 BT_MARK_DIRTY(smp, ip); 1719 /* 1720 * acquire a transaction lock on the extended/leaf page 1721 */ 1722 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1723 dtlck = (struct dt_lock *) & tlck->lock; 1724 lv = & dtlck->lv[0]; 1725 1726 /* update buffer extent descriptor of extended page */ 1727 xlen = lengthPXD(pxd); 1728 xsize = xlen << JFS_SBI(sb)->l2bsize; 1729 #ifdef _STILL_TO_PORT 1730 bmSetXD(smp, xaddr, xsize); 1731 #endif /* _STILL_TO_PORT */ 1732 1733 /* 1734 * copy old stbl to new stbl at start of extended area 1735 */ 1736 oldstblindex = sp->header.stblindex; 1737 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1738 newstblindex = sp->header.maxslot; 1739 n = xsize >> L2DTSLOTSIZE; 1740 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1741 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1742 sp->header.nextindex); 1743 1744 /* 1745 * in-line extension: linelock old area of extended page 1746 */ 1747 if (type == tlckEXTEND) { 1748 /* linelock header */ 1749 lv->offset = 0; 1750 lv->length = 1; 1751 dtlck->index++; 1752 lv++; 1753 1754 /* linelock new stbl of extended page */ 1755 lv->offset = newstblindex; 1756 lv->length = newstblsize; 1757 } 1758 /* 1759 * relocation: linelock whole relocated area 1760 */ 1761 else { 1762 lv->offset = 0; 1763 lv->length = sp->header.maxslot + newstblsize; 1764 } 1765 1766 dtlck->index++; 1767 1768 sp->header.maxslot = n; 1769 sp->header.stblindex = newstblindex; 1770 /* sp->header.nextindex remains the same */ 1771 1772 /* 1773 * add old stbl region at head of freelist 1774 */ 1775 fsi = oldstblindex; 1776 f = &sp->slot[fsi]; 1777 last = sp->header.freelist; 1778 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1779 f->next = last; 1780 last = fsi; 1781 } 1782 sp->header.freelist = last; 1783 sp->header.freecnt += oldstblsize; 1784 1785 /* 1786 * append free region of newly extended area at tail of freelist 1787 */ 1788 /* init free region of newly extended area */ 1789 fsi = n = newstblindex + newstblsize; 1790 f = &sp->slot[fsi]; 1791 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1792 f->next = fsi; 1793 f->next = -1; 1794 1795 /* append new free region at tail of old freelist */ 1796 fsi = sp->header.freelist; 1797 if (fsi == -1) 1798 sp->header.freelist = n; 1799 else { 1800 do { 1801 f = &sp->slot[fsi]; 1802 fsi = f->next; 1803 } while (fsi != -1); 1804 1805 f->next = n; 1806 } 1807 1808 sp->header.freecnt += sp->header.maxslot - n; 1809 1810 /* 1811 * insert the new entry 1812 */ 1813 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1814 1815 BT_MARK_DIRTY(pmp, ip); 1816 /* 1817 * linelock any freeslots residing in old extent 1818 */ 1819 if (type == tlckEXTEND) { 1820 n = sp->header.maxslot >> 2; 1821 if (sp->header.freelist < n) 1822 dtLinelockFreelist(sp, n, &dtlck); 1823 } 1824 1825 /* 1826 * update parent entry on the parent/root page 1827 */ 1828 /* 1829 * acquire a transaction lock on the parent/root page 1830 */ 1831 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1832 dtlck = (struct dt_lock *) & tlck->lock; 1833 lv = & dtlck->lv[dtlck->index]; 1834 1835 /* linelock parent entry - 1st slot */ 1836 lv->offset = 1; 1837 lv->length = 1; 1838 dtlck->index++; 1839 1840 /* update the parent pxd for page extension */ 1841 tpxd = (pxd_t *) & pp->slot[1]; 1842 *tpxd = *pxd; 1843 1844 DT_PUTPAGE(pmp); 1845 return 0; 1846 } 1847 1848 1849 /* 1850 * dtSplitRoot() 1851 * 1852 * function: 1853 * split the full root page into 1854 * original/root/split page and new right page 1855 * i.e., root remains fixed in tree anchor (inode) and 1856 * the root is copied to a single new right child page 1857 * since root page << non-root page, and 1858 * the split root page contains a single entry for the 1859 * new right child page. 1860 * 1861 * parameter: 1862 * 1863 * return: 0 - success; 1864 * errno - failure; 1865 * return new page pinned; 1866 */ 1867 static int dtSplitRoot(tid_t tid, 1868 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1869 { 1870 struct super_block *sb = ip->i_sb; 1871 struct metapage *smp; 1872 dtroot_t *sp; 1873 struct metapage *rmp; 1874 dtpage_t *rp; 1875 s64 rbn; 1876 int xlen; 1877 int xsize; 1878 struct dtslot *f; 1879 s8 *stbl; 1880 int fsi, stblsize, n; 1881 struct idtentry *s; 1882 pxd_t *ppxd; 1883 struct pxdlist *pxdlist; 1884 pxd_t *pxd; 1885 struct dt_lock *dtlck; 1886 struct tlock *tlck; 1887 struct lv *lv; 1888 1889 /* get split root page */ 1890 smp = split->mp; 1891 sp = &JFS_IP(ip)->i_dtroot; 1892 1893 /* 1894 * allocate/initialize a single (right) child page 1895 * 1896 * N.B. at first split, a one (or two) block to fit new entry 1897 * is allocated; at subsequent split, a full page is allocated; 1898 */ 1899 pxdlist = split->pxdlist; 1900 pxd = &pxdlist->pxd[pxdlist->npxd]; 1901 pxdlist->npxd++; 1902 rbn = addressPXD(pxd); 1903 xlen = lengthPXD(pxd); 1904 xsize = xlen << JFS_SBI(sb)->l2bsize; 1905 rmp = get_metapage(ip, rbn, xsize, 1); 1906 if (!rmp) 1907 return -EIO; 1908 1909 rp = rmp->data; 1910 1911 /* Allocate blocks to quota. */ 1912 if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { 1913 release_metapage(rmp); 1914 return -EDQUOT; 1915 } 1916 1917 BT_MARK_DIRTY(rmp, ip); 1918 /* 1919 * acquire a transaction lock on the new right page 1920 */ 1921 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1922 dtlck = (struct dt_lock *) & tlck->lock; 1923 1924 rp->header.flag = 1925 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1926 rp->header.self = *pxd; 1927 1928 /* initialize sibling pointers */ 1929 rp->header.next = 0; 1930 rp->header.prev = 0; 1931 1932 /* 1933 * move in-line root page into new right page extent 1934 */ 1935 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1936 ASSERT(dtlck->index == 0); 1937 lv = & dtlck->lv[0]; 1938 lv->offset = 0; 1939 lv->length = 10; /* 1 + 8 + 1 */ 1940 dtlck->index++; 1941 1942 n = xsize >> L2DTSLOTSIZE; 1943 rp->header.maxslot = n; 1944 stblsize = (n + 31) >> L2DTSLOTSIZE; 1945 1946 /* copy old stbl to new stbl at start of extended area */ 1947 rp->header.stblindex = DTROOTMAXSLOT; 1948 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1949 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1950 rp->header.nextindex = sp->header.nextindex; 1951 1952 /* copy old data area to start of new data area */ 1953 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1954 1955 /* 1956 * append free region of newly extended area at tail of freelist 1957 */ 1958 /* init free region of newly extended area */ 1959 fsi = n = DTROOTMAXSLOT + stblsize; 1960 f = &rp->slot[fsi]; 1961 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1962 f->next = fsi; 1963 f->next = -1; 1964 1965 /* append new free region at tail of old freelist */ 1966 fsi = sp->header.freelist; 1967 if (fsi == -1) 1968 rp->header.freelist = n; 1969 else { 1970 rp->header.freelist = fsi; 1971 1972 do { 1973 f = &rp->slot[fsi]; 1974 fsi = f->next; 1975 } while (fsi != -1); 1976 1977 f->next = n; 1978 } 1979 1980 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1981 1982 /* 1983 * Update directory index table for entries now in right page 1984 */ 1985 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1986 s64 lblock; 1987 struct metapage *mp = NULL; 1988 struct ldtentry *ldtentry; 1989 1990 stbl = DT_GETSTBL(rp); 1991 for (n = 0; n < rp->header.nextindex; n++) { 1992 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1993 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1994 rbn, n, &mp, &lblock); 1995 } 1996 if (mp) 1997 release_metapage(mp); 1998 } 1999 /* 2000 * insert the new entry into the new right/child page 2001 * (skip index in the new right page will not change) 2002 */ 2003 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2004 2005 /* 2006 * reset parent/root page 2007 * 2008 * set the 1st entry offset to 0, which force the left-most key 2009 * at any level of the tree to be less than any search key. 2010 * 2011 * The btree comparison code guarantees that the left-most key on any 2012 * level of the tree is never used, so it doesn't need to be filled in. 2013 */ 2014 BT_MARK_DIRTY(smp, ip); 2015 /* 2016 * acquire a transaction lock on the root page (in-memory inode) 2017 */ 2018 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2019 dtlck = (struct dt_lock *) & tlck->lock; 2020 2021 /* linelock root */ 2022 ASSERT(dtlck->index == 0); 2023 lv = & dtlck->lv[0]; 2024 lv->offset = 0; 2025 lv->length = DTROOTMAXSLOT; 2026 dtlck->index++; 2027 2028 /* update page header of root */ 2029 if (sp->header.flag & BT_LEAF) { 2030 sp->header.flag &= ~BT_LEAF; 2031 sp->header.flag |= BT_INTERNAL; 2032 } 2033 2034 /* init the first entry */ 2035 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2036 ppxd = (pxd_t *) s; 2037 *ppxd = *pxd; 2038 s->next = -1; 2039 s->namlen = 0; 2040 2041 stbl = sp->header.stbl; 2042 stbl[0] = DTENTRYSTART; 2043 sp->header.nextindex = 1; 2044 2045 /* init freelist */ 2046 fsi = DTENTRYSTART + 1; 2047 f = &sp->slot[fsi]; 2048 2049 /* init free region of remaining area */ 2050 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2051 f->next = fsi; 2052 f->next = -1; 2053 2054 sp->header.freelist = DTENTRYSTART + 1; 2055 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2056 2057 *rmpp = rmp; 2058 2059 return 0; 2060 } 2061 2062 2063 /* 2064 * dtDelete() 2065 * 2066 * function: delete the entry(s) referenced by a key. 2067 * 2068 * parameter: 2069 * 2070 * return: 2071 */ 2072 int dtDelete(tid_t tid, 2073 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2074 { 2075 int rc = 0; 2076 s64 bn; 2077 struct metapage *mp, *imp; 2078 dtpage_t *p; 2079 int index; 2080 struct btstack btstack; 2081 struct dt_lock *dtlck; 2082 struct tlock *tlck; 2083 struct lv *lv; 2084 int i; 2085 struct ldtentry *ldtentry; 2086 u8 *stbl; 2087 u32 table_index, next_index; 2088 struct metapage *nmp; 2089 dtpage_t *np; 2090 2091 /* 2092 * search for the entry to delete: 2093 * 2094 * dtSearch() returns (leaf page pinned, index at which to delete). 2095 */ 2096 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2097 return rc; 2098 2099 /* retrieve search result */ 2100 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2101 2102 /* 2103 * We need to find put the index of the next entry into the 2104 * directory index table in order to resume a readdir from this 2105 * entry. 2106 */ 2107 if (DO_INDEX(ip)) { 2108 stbl = DT_GETSTBL(p); 2109 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2110 table_index = le32_to_cpu(ldtentry->index); 2111 if (index == (p->header.nextindex - 1)) { 2112 /* 2113 * Last entry in this leaf page 2114 */ 2115 if ((p->header.flag & BT_ROOT) 2116 || (p->header.next == 0)) 2117 next_index = -1; 2118 else { 2119 /* Read next leaf page */ 2120 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2121 nmp, PSIZE, np, rc); 2122 if (rc) 2123 next_index = -1; 2124 else { 2125 stbl = DT_GETSTBL(np); 2126 ldtentry = 2127 (struct ldtentry *) & np-> 2128 slot[stbl[0]]; 2129 next_index = 2130 le32_to_cpu(ldtentry->index); 2131 DT_PUTPAGE(nmp); 2132 } 2133 } 2134 } else { 2135 ldtentry = 2136 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2137 next_index = le32_to_cpu(ldtentry->index); 2138 } 2139 free_index(tid, ip, table_index, next_index); 2140 } 2141 /* 2142 * the leaf page becomes empty, delete the page 2143 */ 2144 if (p->header.nextindex == 1) { 2145 /* delete empty page */ 2146 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2147 } 2148 /* 2149 * the leaf page has other entries remaining: 2150 * 2151 * delete the entry from the leaf page. 2152 */ 2153 else { 2154 BT_MARK_DIRTY(mp, ip); 2155 /* 2156 * acquire a transaction lock on the leaf page 2157 */ 2158 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2159 dtlck = (struct dt_lock *) & tlck->lock; 2160 2161 /* 2162 * Do not assume that dtlck->index will be zero. During a 2163 * rename within a directory, this transaction may have 2164 * modified this page already when adding the new entry. 2165 */ 2166 2167 /* linelock header */ 2168 if (dtlck->index >= dtlck->maxcnt) 2169 dtlck = (struct dt_lock *) txLinelock(dtlck); 2170 lv = & dtlck->lv[dtlck->index]; 2171 lv->offset = 0; 2172 lv->length = 1; 2173 dtlck->index++; 2174 2175 /* linelock stbl of non-root leaf page */ 2176 if (!(p->header.flag & BT_ROOT)) { 2177 if (dtlck->index >= dtlck->maxcnt) 2178 dtlck = (struct dt_lock *) txLinelock(dtlck); 2179 lv = & dtlck->lv[dtlck->index]; 2180 i = index >> L2DTSLOTSIZE; 2181 lv->offset = p->header.stblindex + i; 2182 lv->length = 2183 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2184 i + 1; 2185 dtlck->index++; 2186 } 2187 2188 /* free the leaf entry */ 2189 dtDeleteEntry(p, index, &dtlck); 2190 2191 /* 2192 * Update directory index table for entries moved in stbl 2193 */ 2194 if (DO_INDEX(ip) && index < p->header.nextindex) { 2195 s64 lblock; 2196 2197 imp = NULL; 2198 stbl = DT_GETSTBL(p); 2199 for (i = index; i < p->header.nextindex; i++) { 2200 ldtentry = 2201 (struct ldtentry *) & p->slot[stbl[i]]; 2202 modify_index(tid, ip, 2203 le32_to_cpu(ldtentry->index), 2204 bn, i, &imp, &lblock); 2205 } 2206 if (imp) 2207 release_metapage(imp); 2208 } 2209 2210 DT_PUTPAGE(mp); 2211 } 2212 2213 return rc; 2214 } 2215 2216 2217 /* 2218 * dtDeleteUp() 2219 * 2220 * function: 2221 * free empty pages as propagating deletion up the tree 2222 * 2223 * parameter: 2224 * 2225 * return: 2226 */ 2227 static int dtDeleteUp(tid_t tid, struct inode *ip, 2228 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2229 { 2230 int rc = 0; 2231 struct metapage *mp; 2232 dtpage_t *p; 2233 int index, nextindex; 2234 int xlen; 2235 struct btframe *parent; 2236 struct dt_lock *dtlck; 2237 struct tlock *tlck; 2238 struct lv *lv; 2239 struct pxd_lock *pxdlock; 2240 int i; 2241 2242 /* 2243 * keep the root leaf page which has become empty 2244 */ 2245 if (BT_IS_ROOT(fmp)) { 2246 /* 2247 * reset the root 2248 * 2249 * dtInitRoot() acquires txlock on the root 2250 */ 2251 dtInitRoot(tid, ip, PARENT(ip)); 2252 2253 DT_PUTPAGE(fmp); 2254 2255 return 0; 2256 } 2257 2258 /* 2259 * free the non-root leaf page 2260 */ 2261 /* 2262 * acquire a transaction lock on the page 2263 * 2264 * write FREEXTENT|NOREDOPAGE log record 2265 * N.B. linelock is overlaid as freed extent descriptor, and 2266 * the buffer page is freed; 2267 */ 2268 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2269 pxdlock = (struct pxd_lock *) & tlck->lock; 2270 pxdlock->flag = mlckFREEPXD; 2271 pxdlock->pxd = fp->header.self; 2272 pxdlock->index = 1; 2273 2274 /* update sibling pointers */ 2275 if ((rc = dtRelink(tid, ip, fp))) { 2276 BT_PUTPAGE(fmp); 2277 return rc; 2278 } 2279 2280 xlen = lengthPXD(&fp->header.self); 2281 2282 /* Free quota allocation. */ 2283 DQUOT_FREE_BLOCK(ip, xlen); 2284 2285 /* free/invalidate its buffer page */ 2286 discard_metapage(fmp); 2287 2288 /* 2289 * propagate page deletion up the directory tree 2290 * 2291 * If the delete from the parent page makes it empty, 2292 * continue all the way up the tree. 2293 * stop if the root page is reached (which is never deleted) or 2294 * if the entry deletion does not empty the page. 2295 */ 2296 while ((parent = BT_POP(btstack)) != NULL) { 2297 /* pin the parent page <sp> */ 2298 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2299 if (rc) 2300 return rc; 2301 2302 /* 2303 * free the extent of the child page deleted 2304 */ 2305 index = parent->index; 2306 2307 /* 2308 * delete the entry for the child page from parent 2309 */ 2310 nextindex = p->header.nextindex; 2311 2312 /* 2313 * the parent has the single entry being deleted: 2314 * 2315 * free the parent page which has become empty. 2316 */ 2317 if (nextindex == 1) { 2318 /* 2319 * keep the root internal page which has become empty 2320 */ 2321 if (p->header.flag & BT_ROOT) { 2322 /* 2323 * reset the root 2324 * 2325 * dtInitRoot() acquires txlock on the root 2326 */ 2327 dtInitRoot(tid, ip, PARENT(ip)); 2328 2329 DT_PUTPAGE(mp); 2330 2331 return 0; 2332 } 2333 /* 2334 * free the parent page 2335 */ 2336 else { 2337 /* 2338 * acquire a transaction lock on the page 2339 * 2340 * write FREEXTENT|NOREDOPAGE log record 2341 */ 2342 tlck = 2343 txMaplock(tid, ip, 2344 tlckDTREE | tlckFREE); 2345 pxdlock = (struct pxd_lock *) & tlck->lock; 2346 pxdlock->flag = mlckFREEPXD; 2347 pxdlock->pxd = p->header.self; 2348 pxdlock->index = 1; 2349 2350 /* update sibling pointers */ 2351 if ((rc = dtRelink(tid, ip, p))) { 2352 DT_PUTPAGE(mp); 2353 return rc; 2354 } 2355 2356 xlen = lengthPXD(&p->header.self); 2357 2358 /* Free quota allocation */ 2359 DQUOT_FREE_BLOCK(ip, xlen); 2360 2361 /* free/invalidate its buffer page */ 2362 discard_metapage(mp); 2363 2364 /* propagate up */ 2365 continue; 2366 } 2367 } 2368 2369 /* 2370 * the parent has other entries remaining: 2371 * 2372 * delete the router entry from the parent page. 2373 */ 2374 BT_MARK_DIRTY(mp, ip); 2375 /* 2376 * acquire a transaction lock on the page 2377 * 2378 * action: router entry deletion 2379 */ 2380 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2381 dtlck = (struct dt_lock *) & tlck->lock; 2382 2383 /* linelock header */ 2384 if (dtlck->index >= dtlck->maxcnt) 2385 dtlck = (struct dt_lock *) txLinelock(dtlck); 2386 lv = & dtlck->lv[dtlck->index]; 2387 lv->offset = 0; 2388 lv->length = 1; 2389 dtlck->index++; 2390 2391 /* linelock stbl of non-root leaf page */ 2392 if (!(p->header.flag & BT_ROOT)) { 2393 if (dtlck->index < dtlck->maxcnt) 2394 lv++; 2395 else { 2396 dtlck = (struct dt_lock *) txLinelock(dtlck); 2397 lv = & dtlck->lv[0]; 2398 } 2399 i = index >> L2DTSLOTSIZE; 2400 lv->offset = p->header.stblindex + i; 2401 lv->length = 2402 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2403 i + 1; 2404 dtlck->index++; 2405 } 2406 2407 /* free the router entry */ 2408 dtDeleteEntry(p, index, &dtlck); 2409 2410 /* reset key of new leftmost entry of level (for consistency) */ 2411 if (index == 0 && 2412 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2413 dtTruncateEntry(p, 0, &dtlck); 2414 2415 /* unpin the parent page */ 2416 DT_PUTPAGE(mp); 2417 2418 /* exit propagation up */ 2419 break; 2420 } 2421 2422 return 0; 2423 } 2424 2425 #ifdef _NOTYET 2426 /* 2427 * NAME: dtRelocate() 2428 * 2429 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2430 * This function is mainly used by defragfs utility. 2431 */ 2432 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2433 s64 nxaddr) 2434 { 2435 int rc = 0; 2436 struct metapage *mp, *pmp, *lmp, *rmp; 2437 dtpage_t *p, *pp, *rp = 0, *lp= 0; 2438 s64 bn; 2439 int index; 2440 struct btstack btstack; 2441 pxd_t *pxd; 2442 s64 oxaddr, nextbn, prevbn; 2443 int xlen, xsize; 2444 struct tlock *tlck; 2445 struct dt_lock *dtlck; 2446 struct pxd_lock *pxdlock; 2447 s8 *stbl; 2448 struct lv *lv; 2449 2450 oxaddr = addressPXD(opxd); 2451 xlen = lengthPXD(opxd); 2452 2453 jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d", 2454 (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, 2455 xlen); 2456 2457 /* 2458 * 1. get the internal parent dtpage covering 2459 * router entry for the tartget page to be relocated; 2460 */ 2461 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2462 if (rc) 2463 return rc; 2464 2465 /* retrieve search result */ 2466 DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2467 jfs_info("dtRelocate: parent router entry validated."); 2468 2469 /* 2470 * 2. relocate the target dtpage 2471 */ 2472 /* read in the target page from src extent */ 2473 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2474 if (rc) { 2475 /* release the pinned parent page */ 2476 DT_PUTPAGE(pmp); 2477 return rc; 2478 } 2479 2480 /* 2481 * read in sibling pages if any to update sibling pointers; 2482 */ 2483 rmp = NULL; 2484 if (p->header.next) { 2485 nextbn = le64_to_cpu(p->header.next); 2486 DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2487 if (rc) { 2488 DT_PUTPAGE(mp); 2489 DT_PUTPAGE(pmp); 2490 return (rc); 2491 } 2492 } 2493 2494 lmp = NULL; 2495 if (p->header.prev) { 2496 prevbn = le64_to_cpu(p->header.prev); 2497 DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2498 if (rc) { 2499 DT_PUTPAGE(mp); 2500 DT_PUTPAGE(pmp); 2501 if (rmp) 2502 DT_PUTPAGE(rmp); 2503 return (rc); 2504 } 2505 } 2506 2507 /* at this point, all xtpages to be updated are in memory */ 2508 2509 /* 2510 * update sibling pointers of sibling dtpages if any; 2511 */ 2512 if (lmp) { 2513 tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); 2514 dtlck = (struct dt_lock *) & tlck->lock; 2515 /* linelock header */ 2516 ASSERT(dtlck->index == 0); 2517 lv = & dtlck->lv[0]; 2518 lv->offset = 0; 2519 lv->length = 1; 2520 dtlck->index++; 2521 2522 lp->header.next = cpu_to_le64(nxaddr); 2523 DT_PUTPAGE(lmp); 2524 } 2525 2526 if (rmp) { 2527 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); 2528 dtlck = (struct dt_lock *) & tlck->lock; 2529 /* linelock header */ 2530 ASSERT(dtlck->index == 0); 2531 lv = & dtlck->lv[0]; 2532 lv->offset = 0; 2533 lv->length = 1; 2534 dtlck->index++; 2535 2536 rp->header.prev = cpu_to_le64(nxaddr); 2537 DT_PUTPAGE(rmp); 2538 } 2539 2540 /* 2541 * update the target dtpage to be relocated 2542 * 2543 * write LOG_REDOPAGE of LOG_NEW type for dst page 2544 * for the whole target page (logredo() will apply 2545 * after image and update bmap for allocation of the 2546 * dst extent), and update bmap for allocation of 2547 * the dst extent; 2548 */ 2549 tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); 2550 dtlck = (struct dt_lock *) & tlck->lock; 2551 /* linelock header */ 2552 ASSERT(dtlck->index == 0); 2553 lv = & dtlck->lv[0]; 2554 2555 /* update the self address in the dtpage header */ 2556 pxd = &p->header.self; 2557 PXDaddress(pxd, nxaddr); 2558 2559 /* the dst page is the same as the src page, i.e., 2560 * linelock for afterimage of the whole page; 2561 */ 2562 lv->offset = 0; 2563 lv->length = p->header.maxslot; 2564 dtlck->index++; 2565 2566 /* update the buffer extent descriptor of the dtpage */ 2567 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2568 #ifdef _STILL_TO_PORT 2569 bmSetXD(mp, nxaddr, xsize); 2570 #endif /* _STILL_TO_PORT */ 2571 /* unpin the relocated page */ 2572 DT_PUTPAGE(mp); 2573 jfs_info("dtRelocate: target dtpage relocated."); 2574 2575 /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec 2576 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec 2577 * will also force a bmap update ). 2578 */ 2579 2580 /* 2581 * 3. acquire maplock for the source extent to be freed; 2582 */ 2583 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2584 * for the source dtpage (logredo() will init NoRedoPage 2585 * filter and will also update bmap for free of the source 2586 * dtpage), and upadte bmap for free of the source dtpage; 2587 */ 2588 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2589 pxdlock = (struct pxd_lock *) & tlck->lock; 2590 pxdlock->flag = mlckFREEPXD; 2591 PXDaddress(&pxdlock->pxd, oxaddr); 2592 PXDlength(&pxdlock->pxd, xlen); 2593 pxdlock->index = 1; 2594 2595 /* 2596 * 4. update the parent router entry for relocation; 2597 * 2598 * acquire tlck for the parent entry covering the target dtpage; 2599 * write LOG_REDOPAGE to apply after image only; 2600 */ 2601 jfs_info("dtRelocate: update parent router entry."); 2602 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 2603 dtlck = (struct dt_lock *) & tlck->lock; 2604 lv = & dtlck->lv[dtlck->index]; 2605 2606 /* update the PXD with the new address */ 2607 stbl = DT_GETSTBL(pp); 2608 pxd = (pxd_t *) & pp->slot[stbl[index]]; 2609 PXDaddress(pxd, nxaddr); 2610 lv->offset = stbl[index]; 2611 lv->length = 1; 2612 dtlck->index++; 2613 2614 /* unpin the parent dtpage */ 2615 DT_PUTPAGE(pmp); 2616 2617 return rc; 2618 } 2619 2620 /* 2621 * NAME: dtSearchNode() 2622 * 2623 * FUNCTION: Search for an dtpage containing a specified address 2624 * This function is mainly used by defragfs utility. 2625 * 2626 * NOTE: Search result on stack, the found page is pinned at exit. 2627 * The result page must be an internal dtpage. 2628 * lmxaddr give the address of the left most page of the 2629 * dtree level, in which the required dtpage resides. 2630 */ 2631 static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, 2632 struct btstack * btstack) 2633 { 2634 int rc = 0; 2635 s64 bn; 2636 struct metapage *mp; 2637 dtpage_t *p; 2638 int psize = 288; /* initial in-line directory */ 2639 s8 *stbl; 2640 int i; 2641 pxd_t *pxd; 2642 struct btframe *btsp; 2643 2644 BT_CLR(btstack); /* reset stack */ 2645 2646 /* 2647 * descend tree to the level with specified leftmost page 2648 * 2649 * by convention, root bn = 0. 2650 */ 2651 for (bn = 0;;) { 2652 /* get/pin the page to search */ 2653 DT_GETPAGE(ip, bn, mp, psize, p, rc); 2654 if (rc) 2655 return rc; 2656 2657 /* does the xaddr of leftmost page of the levevl 2658 * matches levevl search key ? 2659 */ 2660 if (p->header.flag & BT_ROOT) { 2661 if (lmxaddr == 0) 2662 break; 2663 } else if (addressPXD(&p->header.self) == lmxaddr) 2664 break; 2665 2666 /* 2667 * descend down to leftmost child page 2668 */ 2669 if (p->header.flag & BT_LEAF) { 2670 DT_PUTPAGE(mp); 2671 return -ESTALE; 2672 } 2673 2674 /* get the leftmost entry */ 2675 stbl = DT_GETSTBL(p); 2676 pxd = (pxd_t *) & p->slot[stbl[0]]; 2677 2678 /* get the child page block address */ 2679 bn = addressPXD(pxd); 2680 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 2681 /* unpin the parent page */ 2682 DT_PUTPAGE(mp); 2683 } 2684 2685 /* 2686 * search each page at the current levevl 2687 */ 2688 loop: 2689 stbl = DT_GETSTBL(p); 2690 for (i = 0; i < p->header.nextindex; i++) { 2691 pxd = (pxd_t *) & p->slot[stbl[i]]; 2692 2693 /* found the specified router entry */ 2694 if (addressPXD(pxd) == addressPXD(kpxd) && 2695 lengthPXD(pxd) == lengthPXD(kpxd)) { 2696 btsp = btstack->top; 2697 btsp->bn = bn; 2698 btsp->index = i; 2699 btsp->mp = mp; 2700 2701 return 0; 2702 } 2703 } 2704 2705 /* get the right sibling page if any */ 2706 if (p->header.next) 2707 bn = le64_to_cpu(p->header.next); 2708 else { 2709 DT_PUTPAGE(mp); 2710 return -ESTALE; 2711 } 2712 2713 /* unpin current page */ 2714 DT_PUTPAGE(mp); 2715 2716 /* get the right sibling page */ 2717 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2718 if (rc) 2719 return rc; 2720 2721 goto loop; 2722 } 2723 #endif /* _NOTYET */ 2724 2725 /* 2726 * dtRelink() 2727 * 2728 * function: 2729 * link around a freed page. 2730 * 2731 * parameter: 2732 * fp: page to be freed 2733 * 2734 * return: 2735 */ 2736 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2737 { 2738 int rc; 2739 struct metapage *mp; 2740 s64 nextbn, prevbn; 2741 struct tlock *tlck; 2742 struct dt_lock *dtlck; 2743 struct lv *lv; 2744 2745 nextbn = le64_to_cpu(p->header.next); 2746 prevbn = le64_to_cpu(p->header.prev); 2747 2748 /* update prev pointer of the next page */ 2749 if (nextbn != 0) { 2750 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2751 if (rc) 2752 return rc; 2753 2754 BT_MARK_DIRTY(mp, ip); 2755 /* 2756 * acquire a transaction lock on the next page 2757 * 2758 * action: update prev pointer; 2759 */ 2760 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2761 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2762 tlck, ip, mp); 2763 dtlck = (struct dt_lock *) & tlck->lock; 2764 2765 /* linelock header */ 2766 if (dtlck->index >= dtlck->maxcnt) 2767 dtlck = (struct dt_lock *) txLinelock(dtlck); 2768 lv = & dtlck->lv[dtlck->index]; 2769 lv->offset = 0; 2770 lv->length = 1; 2771 dtlck->index++; 2772 2773 p->header.prev = cpu_to_le64(prevbn); 2774 DT_PUTPAGE(mp); 2775 } 2776 2777 /* update next pointer of the previous page */ 2778 if (prevbn != 0) { 2779 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2780 if (rc) 2781 return rc; 2782 2783 BT_MARK_DIRTY(mp, ip); 2784 /* 2785 * acquire a transaction lock on the prev page 2786 * 2787 * action: update next pointer; 2788 */ 2789 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2790 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2791 tlck, ip, mp); 2792 dtlck = (struct dt_lock *) & tlck->lock; 2793 2794 /* linelock header */ 2795 if (dtlck->index >= dtlck->maxcnt) 2796 dtlck = (struct dt_lock *) txLinelock(dtlck); 2797 lv = & dtlck->lv[dtlck->index]; 2798 lv->offset = 0; 2799 lv->length = 1; 2800 dtlck->index++; 2801 2802 p->header.next = cpu_to_le64(nextbn); 2803 DT_PUTPAGE(mp); 2804 } 2805 2806 return 0; 2807 } 2808 2809 2810 /* 2811 * dtInitRoot() 2812 * 2813 * initialize directory root (inline in inode) 2814 */ 2815 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2816 { 2817 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2818 dtroot_t *p; 2819 int fsi; 2820 struct dtslot *f; 2821 struct tlock *tlck; 2822 struct dt_lock *dtlck; 2823 struct lv *lv; 2824 u16 xflag_save; 2825 2826 /* 2827 * If this was previously an non-empty directory, we need to remove 2828 * the old directory table. 2829 */ 2830 if (DO_INDEX(ip)) { 2831 if (!jfs_dirtable_inline(ip)) { 2832 struct tblock *tblk = tid_to_tblock(tid); 2833 /* 2834 * We're playing games with the tid's xflag. If 2835 * we're removing a regular file, the file's xtree 2836 * is committed with COMMIT_PMAP, but we always 2837 * commit the directories xtree with COMMIT_PWMAP. 2838 */ 2839 xflag_save = tblk->xflag; 2840 tblk->xflag = 0; 2841 /* 2842 * xtTruncate isn't guaranteed to fully truncate 2843 * the xtree. The caller needs to check i_size 2844 * after committing the transaction to see if 2845 * additional truncation is needed. The 2846 * COMMIT_Stale flag tells caller that we 2847 * initiated the truncation. 2848 */ 2849 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2850 set_cflag(COMMIT_Stale, ip); 2851 2852 tblk->xflag = xflag_save; 2853 } else 2854 ip->i_size = 1; 2855 2856 jfs_ip->next_index = 2; 2857 } else 2858 ip->i_size = IDATASIZE; 2859 2860 /* 2861 * acquire a transaction lock on the root 2862 * 2863 * action: directory initialization; 2864 */ 2865 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2866 tlckDTREE | tlckENTRY | tlckBTROOT); 2867 dtlck = (struct dt_lock *) & tlck->lock; 2868 2869 /* linelock root */ 2870 ASSERT(dtlck->index == 0); 2871 lv = & dtlck->lv[0]; 2872 lv->offset = 0; 2873 lv->length = DTROOTMAXSLOT; 2874 dtlck->index++; 2875 2876 p = &jfs_ip->i_dtroot; 2877 2878 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2879 2880 p->header.nextindex = 0; 2881 2882 /* init freelist */ 2883 fsi = 1; 2884 f = &p->slot[fsi]; 2885 2886 /* init data area of root */ 2887 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2888 f->next = fsi; 2889 f->next = -1; 2890 2891 p->header.freelist = 1; 2892 p->header.freecnt = 8; 2893 2894 /* init '..' entry */ 2895 p->header.idotdot = cpu_to_le32(idotdot); 2896 2897 return; 2898 } 2899 2900 /* 2901 * add_missing_indices() 2902 * 2903 * function: Fix dtree page in which one or more entries has an invalid index. 2904 * fsck.jfs should really fix this, but it currently does not. 2905 * Called from jfs_readdir when bad index is detected. 2906 */ 2907 static void add_missing_indices(struct inode *inode, s64 bn) 2908 { 2909 struct ldtentry *d; 2910 struct dt_lock *dtlck; 2911 int i; 2912 uint index; 2913 struct lv *lv; 2914 struct metapage *mp; 2915 dtpage_t *p; 2916 int rc; 2917 s8 *stbl; 2918 tid_t tid; 2919 struct tlock *tlck; 2920 2921 tid = txBegin(inode->i_sb, 0); 2922 2923 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2924 2925 if (rc) { 2926 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2927 goto end; 2928 } 2929 BT_MARK_DIRTY(mp, inode); 2930 2931 ASSERT(p->header.flag & BT_LEAF); 2932 2933 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2934 dtlck = (struct dt_lock *) &tlck->lock; 2935 2936 stbl = DT_GETSTBL(p); 2937 for (i = 0; i < p->header.nextindex; i++) { 2938 d = (struct ldtentry *) &p->slot[stbl[i]]; 2939 index = le32_to_cpu(d->index); 2940 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2941 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2942 if (dtlck->index >= dtlck->maxcnt) 2943 dtlck = (struct dt_lock *) txLinelock(dtlck); 2944 lv = &dtlck->lv[dtlck->index]; 2945 lv->offset = stbl[i]; 2946 lv->length = 1; 2947 dtlck->index++; 2948 } 2949 } 2950 2951 DT_PUTPAGE(mp); 2952 (void) txCommit(tid, 1, &inode, 0); 2953 end: 2954 txEnd(tid); 2955 } 2956 2957 /* 2958 * Buffer to hold directory entry info while traversing a dtree page 2959 * before being fed to the filldir function 2960 */ 2961 struct jfs_dirent { 2962 loff_t position; 2963 int ino; 2964 u16 name_len; 2965 char name[0]; 2966 }; 2967 2968 /* 2969 * function to determine next variable-sized jfs_dirent in buffer 2970 */ 2971 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2972 { 2973 return (struct jfs_dirent *) 2974 ((char *)dirent + 2975 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2976 sizeof (loff_t) - 1) & 2977 ~(sizeof (loff_t) - 1))); 2978 } 2979 2980 /* 2981 * jfs_readdir() 2982 * 2983 * function: read directory entries sequentially 2984 * from the specified entry offset 2985 * 2986 * parameter: 2987 * 2988 * return: offset = (pn, index) of start entry 2989 * of next jfs_readdir()/dtRead() 2990 */ 2991 int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) 2992 { 2993 struct inode *ip = filp->f_dentry->d_inode; 2994 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 2995 int rc = 0; 2996 loff_t dtpos; /* legacy OS/2 style position */ 2997 struct dtoffset { 2998 s16 pn; 2999 s16 index; 3000 s32 unused; 3001 } *dtoffset = (struct dtoffset *) &dtpos; 3002 s64 bn; 3003 struct metapage *mp; 3004 dtpage_t *p; 3005 int index; 3006 s8 *stbl; 3007 struct btstack btstack; 3008 int i, next; 3009 struct ldtentry *d; 3010 struct dtslot *t; 3011 int d_namleft, len, outlen; 3012 unsigned long dirent_buf; 3013 char *name_ptr; 3014 u32 dir_index; 3015 int do_index = 0; 3016 uint loop_count = 0; 3017 struct jfs_dirent *jfs_dirent; 3018 int jfs_dirents; 3019 int overflow, fix_page, page_fixed = 0; 3020 static int unique_pos = 2; /* If we can't fix broken index */ 3021 3022 if (filp->f_pos == DIREND) 3023 return 0; 3024 3025 if (DO_INDEX(ip)) { 3026 /* 3027 * persistent index is stored in directory entries. 3028 * Special cases: 0 = . 3029 * 1 = .. 3030 * -1 = End of directory 3031 */ 3032 do_index = 1; 3033 3034 dir_index = (u32) filp->f_pos; 3035 3036 if (dir_index > 1) { 3037 struct dir_table_slot dirtab_slot; 3038 3039 if (dtEmpty(ip) || 3040 (dir_index >= JFS_IP(ip)->next_index)) { 3041 /* Stale position. Directory has shrunk */ 3042 filp->f_pos = DIREND; 3043 return 0; 3044 } 3045 repeat: 3046 rc = read_index(ip, dir_index, &dirtab_slot); 3047 if (rc) { 3048 filp->f_pos = DIREND; 3049 return rc; 3050 } 3051 if (dirtab_slot.flag == DIR_INDEX_FREE) { 3052 if (loop_count++ > JFS_IP(ip)->next_index) { 3053 jfs_err("jfs_readdir detected " 3054 "infinite loop!"); 3055 filp->f_pos = DIREND; 3056 return 0; 3057 } 3058 dir_index = le32_to_cpu(dirtab_slot.addr2); 3059 if (dir_index == -1) { 3060 filp->f_pos = DIREND; 3061 return 0; 3062 } 3063 goto repeat; 3064 } 3065 bn = addressDTS(&dirtab_slot); 3066 index = dirtab_slot.slot; 3067 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3068 if (rc) { 3069 filp->f_pos = DIREND; 3070 return 0; 3071 } 3072 if (p->header.flag & BT_INTERNAL) { 3073 jfs_err("jfs_readdir: bad index table"); 3074 DT_PUTPAGE(mp); 3075 filp->f_pos = -1; 3076 return 0; 3077 } 3078 } else { 3079 if (dir_index == 0) { 3080 /* 3081 * self "." 3082 */ 3083 filp->f_pos = 0; 3084 if (filldir(dirent, ".", 1, 0, ip->i_ino, 3085 DT_DIR)) 3086 return 0; 3087 } 3088 /* 3089 * parent ".." 3090 */ 3091 filp->f_pos = 1; 3092 if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) 3093 return 0; 3094 3095 /* 3096 * Find first entry of left-most leaf 3097 */ 3098 if (dtEmpty(ip)) { 3099 filp->f_pos = DIREND; 3100 return 0; 3101 } 3102 3103 if ((rc = dtReadFirst(ip, &btstack))) 3104 return rc; 3105 3106 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3107 } 3108 } else { 3109 /* 3110 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3111 * 3112 * pn = index = 0: First entry "." 3113 * pn = 0; index = 1: Second entry ".." 3114 * pn > 0: Real entries, pn=1 -> leftmost page 3115 * pn = index = -1: No more entries 3116 */ 3117 dtpos = filp->f_pos; 3118 if (dtpos == 0) { 3119 /* build "." entry */ 3120 3121 if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, 3122 DT_DIR)) 3123 return 0; 3124 dtoffset->index = 1; 3125 filp->f_pos = dtpos; 3126 } 3127 3128 if (dtoffset->pn == 0) { 3129 if (dtoffset->index == 1) { 3130 /* build ".." entry */ 3131 3132 if (filldir(dirent, "..", 2, filp->f_pos, 3133 PARENT(ip), DT_DIR)) 3134 return 0; 3135 } else { 3136 jfs_err("jfs_readdir called with " 3137 "invalid offset!"); 3138 } 3139 dtoffset->pn = 1; 3140 dtoffset->index = 0; 3141 filp->f_pos = dtpos; 3142 } 3143 3144 if (dtEmpty(ip)) { 3145 filp->f_pos = DIREND; 3146 return 0; 3147 } 3148 3149 if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { 3150 jfs_err("jfs_readdir: unexpected rc = %d " 3151 "from dtReadNext", rc); 3152 filp->f_pos = DIREND; 3153 return 0; 3154 } 3155 /* get start leaf page and index */ 3156 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3157 3158 /* offset beyond directory eof ? */ 3159 if (bn < 0) { 3160 filp->f_pos = DIREND; 3161 return 0; 3162 } 3163 } 3164 3165 dirent_buf = __get_free_page(GFP_KERNEL); 3166 if (dirent_buf == 0) { 3167 DT_PUTPAGE(mp); 3168 jfs_warn("jfs_readdir: __get_free_page failed!"); 3169 filp->f_pos = DIREND; 3170 return -ENOMEM; 3171 } 3172 3173 while (1) { 3174 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3175 jfs_dirents = 0; 3176 overflow = fix_page = 0; 3177 3178 stbl = DT_GETSTBL(p); 3179 3180 for (i = index; i < p->header.nextindex; i++) { 3181 d = (struct ldtentry *) & p->slot[stbl[i]]; 3182 3183 if (((long) jfs_dirent + d->namlen + 1) > 3184 (dirent_buf + PSIZE)) { 3185 /* DBCS codepages could overrun dirent_buf */ 3186 index = i; 3187 overflow = 1; 3188 break; 3189 } 3190 3191 d_namleft = d->namlen; 3192 name_ptr = jfs_dirent->name; 3193 jfs_dirent->ino = le32_to_cpu(d->inumber); 3194 3195 if (do_index) { 3196 len = min(d_namleft, DTLHDRDATALEN); 3197 jfs_dirent->position = le32_to_cpu(d->index); 3198 /* 3199 * d->index should always be valid, but it 3200 * isn't. fsck.jfs doesn't create the 3201 * directory index for the lost+found 3202 * directory. Rather than let it go, 3203 * we can try to fix it. 3204 */ 3205 if ((jfs_dirent->position < 2) || 3206 (jfs_dirent->position >= 3207 JFS_IP(ip)->next_index)) { 3208 if (!page_fixed && !isReadOnly(ip)) { 3209 fix_page = 1; 3210 /* 3211 * setting overflow and setting 3212 * index to i will cause the 3213 * same page to be processed 3214 * again starting here 3215 */ 3216 overflow = 1; 3217 index = i; 3218 break; 3219 } 3220 jfs_dirent->position = unique_pos++; 3221 } 3222 } else { 3223 jfs_dirent->position = dtpos; 3224 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 3225 } 3226 3227 /* copy the name of head/only segment */ 3228 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 3229 codepage); 3230 jfs_dirent->name_len = outlen; 3231 3232 /* copy name in the additional segment(s) */ 3233 next = d->next; 3234 while (next >= 0) { 3235 t = (struct dtslot *) & p->slot[next]; 3236 name_ptr += outlen; 3237 d_namleft -= len; 3238 /* Sanity Check */ 3239 if (d_namleft == 0) { 3240 jfs_error(ip->i_sb, 3241 "JFS:Dtree error: ino = " 3242 "%ld, bn=%Ld, index = %d", 3243 (long)ip->i_ino, 3244 (long long)bn, 3245 i); 3246 goto skip_one; 3247 } 3248 len = min(d_namleft, DTSLOTDATALEN); 3249 outlen = jfs_strfromUCS_le(name_ptr, t->name, 3250 len, codepage); 3251 jfs_dirent->name_len += outlen; 3252 3253 next = t->next; 3254 } 3255 3256 jfs_dirents++; 3257 jfs_dirent = next_jfs_dirent(jfs_dirent); 3258 skip_one: 3259 if (!do_index) 3260 dtoffset->index++; 3261 } 3262 3263 if (!overflow) { 3264 /* Point to next leaf page */ 3265 if (p->header.flag & BT_ROOT) 3266 bn = 0; 3267 else { 3268 bn = le64_to_cpu(p->header.next); 3269 index = 0; 3270 /* update offset (pn:index) for new page */ 3271 if (!do_index) { 3272 dtoffset->pn++; 3273 dtoffset->index = 0; 3274 } 3275 } 3276 page_fixed = 0; 3277 } 3278 3279 /* unpin previous leaf page */ 3280 DT_PUTPAGE(mp); 3281 3282 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3283 while (jfs_dirents--) { 3284 filp->f_pos = jfs_dirent->position; 3285 if (filldir(dirent, jfs_dirent->name, 3286 jfs_dirent->name_len, filp->f_pos, 3287 jfs_dirent->ino, DT_UNKNOWN)) 3288 goto out; 3289 jfs_dirent = next_jfs_dirent(jfs_dirent); 3290 } 3291 3292 if (fix_page) { 3293 add_missing_indices(ip, bn); 3294 page_fixed = 1; 3295 } 3296 3297 if (!overflow && (bn == 0)) { 3298 filp->f_pos = DIREND; 3299 break; 3300 } 3301 3302 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3303 if (rc) { 3304 free_page(dirent_buf); 3305 return rc; 3306 } 3307 } 3308 3309 out: 3310 free_page(dirent_buf); 3311 3312 return rc; 3313 } 3314 3315 3316 /* 3317 * dtReadFirst() 3318 * 3319 * function: get the leftmost page of the directory 3320 */ 3321 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3322 { 3323 int rc = 0; 3324 s64 bn; 3325 int psize = 288; /* initial in-line directory */ 3326 struct metapage *mp; 3327 dtpage_t *p; 3328 s8 *stbl; 3329 struct btframe *btsp; 3330 pxd_t *xd; 3331 3332 BT_CLR(btstack); /* reset stack */ 3333 3334 /* 3335 * descend leftmost path of the tree 3336 * 3337 * by convention, root bn = 0. 3338 */ 3339 for (bn = 0;;) { 3340 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3341 if (rc) 3342 return rc; 3343 3344 /* 3345 * leftmost leaf page 3346 */ 3347 if (p->header.flag & BT_LEAF) { 3348 /* return leftmost entry */ 3349 btsp = btstack->top; 3350 btsp->bn = bn; 3351 btsp->index = 0; 3352 btsp->mp = mp; 3353 3354 return 0; 3355 } 3356 3357 /* 3358 * descend down to leftmost child page 3359 */ 3360 if (BT_STACK_FULL(btstack)) { 3361 DT_PUTPAGE(mp); 3362 jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); 3363 BT_STACK_DUMP(btstack); 3364 return -EIO; 3365 } 3366 /* push (bn, index) of the parent page/entry */ 3367 BT_PUSH(btstack, bn, 0); 3368 3369 /* get the leftmost entry */ 3370 stbl = DT_GETSTBL(p); 3371 xd = (pxd_t *) & p->slot[stbl[0]]; 3372 3373 /* get the child page block address */ 3374 bn = addressPXD(xd); 3375 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3376 3377 /* unpin the parent page */ 3378 DT_PUTPAGE(mp); 3379 } 3380 } 3381 3382 3383 /* 3384 * dtReadNext() 3385 * 3386 * function: get the page of the specified offset (pn:index) 3387 * 3388 * return: if (offset > eof), bn = -1; 3389 * 3390 * note: if index > nextindex of the target leaf page, 3391 * start with 1st entry of next leaf page; 3392 */ 3393 static int dtReadNext(struct inode *ip, loff_t * offset, 3394 struct btstack * btstack) 3395 { 3396 int rc = 0; 3397 struct dtoffset { 3398 s16 pn; 3399 s16 index; 3400 s32 unused; 3401 } *dtoffset = (struct dtoffset *) offset; 3402 s64 bn; 3403 struct metapage *mp; 3404 dtpage_t *p; 3405 int index; 3406 int pn; 3407 s8 *stbl; 3408 struct btframe *btsp, *parent; 3409 pxd_t *xd; 3410 3411 /* 3412 * get leftmost leaf page pinned 3413 */ 3414 if ((rc = dtReadFirst(ip, btstack))) 3415 return rc; 3416 3417 /* get leaf page */ 3418 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3419 3420 /* get the start offset (pn:index) */ 3421 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3422 index = dtoffset->index; 3423 3424 /* start at leftmost page ? */ 3425 if (pn == 0) { 3426 /* offset beyond eof ? */ 3427 if (index < p->header.nextindex) 3428 goto out; 3429 3430 if (p->header.flag & BT_ROOT) { 3431 bn = -1; 3432 goto out; 3433 } 3434 3435 /* start with 1st entry of next leaf page */ 3436 dtoffset->pn++; 3437 dtoffset->index = index = 0; 3438 goto a; 3439 } 3440 3441 /* start at non-leftmost page: scan parent pages for large pn */ 3442 if (p->header.flag & BT_ROOT) { 3443 bn = -1; 3444 goto out; 3445 } 3446 3447 /* start after next leaf page ? */ 3448 if (pn > 1) 3449 goto b; 3450 3451 /* get leaf page pn = 1 */ 3452 a: 3453 bn = le64_to_cpu(p->header.next); 3454 3455 /* unpin leaf page */ 3456 DT_PUTPAGE(mp); 3457 3458 /* offset beyond eof ? */ 3459 if (bn == 0) { 3460 bn = -1; 3461 goto out; 3462 } 3463 3464 goto c; 3465 3466 /* 3467 * scan last internal page level to get target leaf page 3468 */ 3469 b: 3470 /* unpin leftmost leaf page */ 3471 DT_PUTPAGE(mp); 3472 3473 /* get left most parent page */ 3474 btsp = btstack->top; 3475 parent = btsp - 1; 3476 bn = parent->bn; 3477 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3478 if (rc) 3479 return rc; 3480 3481 /* scan parent pages at last internal page level */ 3482 while (pn >= p->header.nextindex) { 3483 pn -= p->header.nextindex; 3484 3485 /* get next parent page address */ 3486 bn = le64_to_cpu(p->header.next); 3487 3488 /* unpin current parent page */ 3489 DT_PUTPAGE(mp); 3490 3491 /* offset beyond eof ? */ 3492 if (bn == 0) { 3493 bn = -1; 3494 goto out; 3495 } 3496 3497 /* get next parent page */ 3498 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3499 if (rc) 3500 return rc; 3501 3502 /* update parent page stack frame */ 3503 parent->bn = bn; 3504 } 3505 3506 /* get leaf page address */ 3507 stbl = DT_GETSTBL(p); 3508 xd = (pxd_t *) & p->slot[stbl[pn]]; 3509 bn = addressPXD(xd); 3510 3511 /* unpin parent page */ 3512 DT_PUTPAGE(mp); 3513 3514 /* 3515 * get target leaf page 3516 */ 3517 c: 3518 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3519 if (rc) 3520 return rc; 3521 3522 /* 3523 * leaf page has been completed: 3524 * start with 1st entry of next leaf page 3525 */ 3526 if (index >= p->header.nextindex) { 3527 bn = le64_to_cpu(p->header.next); 3528 3529 /* unpin leaf page */ 3530 DT_PUTPAGE(mp); 3531 3532 /* offset beyond eof ? */ 3533 if (bn == 0) { 3534 bn = -1; 3535 goto out; 3536 } 3537 3538 /* get next leaf page */ 3539 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3540 if (rc) 3541 return rc; 3542 3543 /* start with 1st entry of next leaf page */ 3544 dtoffset->pn++; 3545 dtoffset->index = 0; 3546 } 3547 3548 out: 3549 /* return target leaf page pinned */ 3550 btsp = btstack->top; 3551 btsp->bn = bn; 3552 btsp->index = dtoffset->index; 3553 btsp->mp = mp; 3554 3555 return 0; 3556 } 3557 3558 3559 /* 3560 * dtCompare() 3561 * 3562 * function: compare search key with an internal entry 3563 * 3564 * return: 3565 * < 0 if k is < record 3566 * = 0 if k is = record 3567 * > 0 if k is > record 3568 */ 3569 static int dtCompare(struct component_name * key, /* search key */ 3570 dtpage_t * p, /* directory page */ 3571 int si) 3572 { /* entry slot index */ 3573 wchar_t *kname; 3574 __le16 *name; 3575 int klen, namlen, len, rc; 3576 struct idtentry *ih; 3577 struct dtslot *t; 3578 3579 /* 3580 * force the left-most key on internal pages, at any level of 3581 * the tree, to be less than any search key. 3582 * this obviates having to update the leftmost key on an internal 3583 * page when the user inserts a new key in the tree smaller than 3584 * anything that has been stored. 3585 * 3586 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3587 * at any internal page at any level of the tree, 3588 * it descends to child of the entry anyway - 3589 * ? make the entry as min size dummy entry) 3590 * 3591 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3592 * return (1); 3593 */ 3594 3595 kname = key->name; 3596 klen = key->namlen; 3597 3598 ih = (struct idtentry *) & p->slot[si]; 3599 si = ih->next; 3600 name = ih->name; 3601 namlen = ih->namlen; 3602 len = min(namlen, DTIHDRDATALEN); 3603 3604 /* compare with head/only segment */ 3605 len = min(klen, len); 3606 if ((rc = UniStrncmp_le(kname, name, len))) 3607 return rc; 3608 3609 klen -= len; 3610 namlen -= len; 3611 3612 /* compare with additional segment(s) */ 3613 kname += len; 3614 while (klen > 0 && namlen > 0) { 3615 /* compare with next name segment */ 3616 t = (struct dtslot *) & p->slot[si]; 3617 len = min(namlen, DTSLOTDATALEN); 3618 len = min(klen, len); 3619 name = t->name; 3620 if ((rc = UniStrncmp_le(kname, name, len))) 3621 return rc; 3622 3623 klen -= len; 3624 namlen -= len; 3625 kname += len; 3626 si = t->next; 3627 } 3628 3629 return (klen - namlen); 3630 } 3631 3632 3633 3634 3635 /* 3636 * ciCompare() 3637 * 3638 * function: compare search key with an (leaf/internal) entry 3639 * 3640 * return: 3641 * < 0 if k is < record 3642 * = 0 if k is = record 3643 * > 0 if k is > record 3644 */ 3645 static int ciCompare(struct component_name * key, /* search key */ 3646 dtpage_t * p, /* directory page */ 3647 int si, /* entry slot index */ 3648 int flag) 3649 { 3650 wchar_t *kname, x; 3651 __le16 *name; 3652 int klen, namlen, len, rc; 3653 struct ldtentry *lh; 3654 struct idtentry *ih; 3655 struct dtslot *t; 3656 int i; 3657 3658 /* 3659 * force the left-most key on internal pages, at any level of 3660 * the tree, to be less than any search key. 3661 * this obviates having to update the leftmost key on an internal 3662 * page when the user inserts a new key in the tree smaller than 3663 * anything that has been stored. 3664 * 3665 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3666 * at any internal page at any level of the tree, 3667 * it descends to child of the entry anyway - 3668 * ? make the entry as min size dummy entry) 3669 * 3670 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3671 * return (1); 3672 */ 3673 3674 kname = key->name; 3675 klen = key->namlen; 3676 3677 /* 3678 * leaf page entry 3679 */ 3680 if (p->header.flag & BT_LEAF) { 3681 lh = (struct ldtentry *) & p->slot[si]; 3682 si = lh->next; 3683 name = lh->name; 3684 namlen = lh->namlen; 3685 if (flag & JFS_DIR_INDEX) 3686 len = min(namlen, DTLHDRDATALEN); 3687 else 3688 len = min(namlen, DTLHDRDATALEN_LEGACY); 3689 } 3690 /* 3691 * internal page entry 3692 */ 3693 else { 3694 ih = (struct idtentry *) & p->slot[si]; 3695 si = ih->next; 3696 name = ih->name; 3697 namlen = ih->namlen; 3698 len = min(namlen, DTIHDRDATALEN); 3699 } 3700 3701 /* compare with head/only segment */ 3702 len = min(klen, len); 3703 for (i = 0; i < len; i++, kname++, name++) { 3704 /* only uppercase if case-insensitive support is on */ 3705 if ((flag & JFS_OS2) == JFS_OS2) 3706 x = UniToupper(le16_to_cpu(*name)); 3707 else 3708 x = le16_to_cpu(*name); 3709 if ((rc = *kname - x)) 3710 return rc; 3711 } 3712 3713 klen -= len; 3714 namlen -= len; 3715 3716 /* compare with additional segment(s) */ 3717 while (klen > 0 && namlen > 0) { 3718 /* compare with next name segment */ 3719 t = (struct dtslot *) & p->slot[si]; 3720 len = min(namlen, DTSLOTDATALEN); 3721 len = min(klen, len); 3722 name = t->name; 3723 for (i = 0; i < len; i++, kname++, name++) { 3724 /* only uppercase if case-insensitive support is on */ 3725 if ((flag & JFS_OS2) == JFS_OS2) 3726 x = UniToupper(le16_to_cpu(*name)); 3727 else 3728 x = le16_to_cpu(*name); 3729 3730 if ((rc = *kname - x)) 3731 return rc; 3732 } 3733 3734 klen -= len; 3735 namlen -= len; 3736 si = t->next; 3737 } 3738 3739 return (klen - namlen); 3740 } 3741 3742 3743 /* 3744 * ciGetLeafPrefixKey() 3745 * 3746 * function: compute prefix of suffix compression 3747 * from two adjacent leaf entries 3748 * across page boundary 3749 * 3750 * return: non-zero on error 3751 * 3752 */ 3753 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3754 int ri, struct component_name * key, int flag) 3755 { 3756 int klen, namlen; 3757 wchar_t *pl, *pr, *kname; 3758 struct component_name lkey; 3759 struct component_name rkey; 3760 3761 lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3762 GFP_KERNEL); 3763 if (lkey.name == NULL) 3764 return -ENOSPC; 3765 3766 rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), 3767 GFP_KERNEL); 3768 if (rkey.name == NULL) { 3769 kfree(lkey.name); 3770 return -ENOSPC; 3771 } 3772 3773 /* get left and right key */ 3774 dtGetKey(lp, li, &lkey, flag); 3775 lkey.name[lkey.namlen] = 0; 3776 3777 if ((flag & JFS_OS2) == JFS_OS2) 3778 ciToUpper(&lkey); 3779 3780 dtGetKey(rp, ri, &rkey, flag); 3781 rkey.name[rkey.namlen] = 0; 3782 3783 3784 if ((flag & JFS_OS2) == JFS_OS2) 3785 ciToUpper(&rkey); 3786 3787 /* compute prefix */ 3788 klen = 0; 3789 kname = key->name; 3790 namlen = min(lkey.namlen, rkey.namlen); 3791 for (pl = lkey.name, pr = rkey.name; 3792 namlen; pl++, pr++, namlen--, klen++, kname++) { 3793 *kname = *pr; 3794 if (*pl != *pr) { 3795 key->namlen = klen + 1; 3796 goto free_names; 3797 } 3798 } 3799 3800 /* l->namlen <= r->namlen since l <= r */ 3801 if (lkey.namlen < rkey.namlen) { 3802 *kname = *pr; 3803 key->namlen = klen + 1; 3804 } else /* l->namelen == r->namelen */ 3805 key->namlen = klen; 3806 3807 free_names: 3808 kfree(lkey.name); 3809 kfree(rkey.name); 3810 return 0; 3811 } 3812 3813 3814 3815 /* 3816 * dtGetKey() 3817 * 3818 * function: get key of the entry 3819 */ 3820 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3821 struct component_name * key, int flag) 3822 { 3823 int si; 3824 s8 *stbl; 3825 struct ldtentry *lh; 3826 struct idtentry *ih; 3827 struct dtslot *t; 3828 int namlen, len; 3829 wchar_t *kname; 3830 __le16 *name; 3831 3832 /* get entry */ 3833 stbl = DT_GETSTBL(p); 3834 si = stbl[i]; 3835 if (p->header.flag & BT_LEAF) { 3836 lh = (struct ldtentry *) & p->slot[si]; 3837 si = lh->next; 3838 namlen = lh->namlen; 3839 name = lh->name; 3840 if (flag & JFS_DIR_INDEX) 3841 len = min(namlen, DTLHDRDATALEN); 3842 else 3843 len = min(namlen, DTLHDRDATALEN_LEGACY); 3844 } else { 3845 ih = (struct idtentry *) & p->slot[si]; 3846 si = ih->next; 3847 namlen = ih->namlen; 3848 name = ih->name; 3849 len = min(namlen, DTIHDRDATALEN); 3850 } 3851 3852 key->namlen = namlen; 3853 kname = key->name; 3854 3855 /* 3856 * move head/only segment 3857 */ 3858 UniStrncpy_from_le(kname, name, len); 3859 3860 /* 3861 * move additional segment(s) 3862 */ 3863 while (si >= 0) { 3864 /* get next segment */ 3865 t = &p->slot[si]; 3866 kname += len; 3867 namlen -= len; 3868 len = min(namlen, DTSLOTDATALEN); 3869 UniStrncpy_from_le(kname, t->name, len); 3870 3871 si = t->next; 3872 } 3873 } 3874 3875 3876 /* 3877 * dtInsertEntry() 3878 * 3879 * function: allocate free slot(s) and 3880 * write a leaf/internal entry 3881 * 3882 * return: entry slot index 3883 */ 3884 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3885 ddata_t * data, struct dt_lock ** dtlock) 3886 { 3887 struct dtslot *h, *t; 3888 struct ldtentry *lh = NULL; 3889 struct idtentry *ih = NULL; 3890 int hsi, fsi, klen, len, nextindex; 3891 wchar_t *kname; 3892 __le16 *name; 3893 s8 *stbl; 3894 pxd_t *xd; 3895 struct dt_lock *dtlck = *dtlock; 3896 struct lv *lv; 3897 int xsi, n; 3898 s64 bn = 0; 3899 struct metapage *mp = NULL; 3900 3901 klen = key->namlen; 3902 kname = key->name; 3903 3904 /* allocate a free slot */ 3905 hsi = fsi = p->header.freelist; 3906 h = &p->slot[fsi]; 3907 p->header.freelist = h->next; 3908 --p->header.freecnt; 3909 3910 /* open new linelock */ 3911 if (dtlck->index >= dtlck->maxcnt) 3912 dtlck = (struct dt_lock *) txLinelock(dtlck); 3913 3914 lv = & dtlck->lv[dtlck->index]; 3915 lv->offset = hsi; 3916 3917 /* write head/only segment */ 3918 if (p->header.flag & BT_LEAF) { 3919 lh = (struct ldtentry *) h; 3920 lh->next = h->next; 3921 lh->inumber = cpu_to_le32(data->leaf.ino); 3922 lh->namlen = klen; 3923 name = lh->name; 3924 if (data->leaf.ip) { 3925 len = min(klen, DTLHDRDATALEN); 3926 if (!(p->header.flag & BT_ROOT)) 3927 bn = addressPXD(&p->header.self); 3928 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3929 data->leaf.ip, 3930 bn, index)); 3931 } else 3932 len = min(klen, DTLHDRDATALEN_LEGACY); 3933 } else { 3934 ih = (struct idtentry *) h; 3935 ih->next = h->next; 3936 xd = (pxd_t *) ih; 3937 *xd = data->xd; 3938 ih->namlen = klen; 3939 name = ih->name; 3940 len = min(klen, DTIHDRDATALEN); 3941 } 3942 3943 UniStrncpy_to_le(name, kname, len); 3944 3945 n = 1; 3946 xsi = hsi; 3947 3948 /* write additional segment(s) */ 3949 t = h; 3950 klen -= len; 3951 while (klen) { 3952 /* get free slot */ 3953 fsi = p->header.freelist; 3954 t = &p->slot[fsi]; 3955 p->header.freelist = t->next; 3956 --p->header.freecnt; 3957 3958 /* is next slot contiguous ? */ 3959 if (fsi != xsi + 1) { 3960 /* close current linelock */ 3961 lv->length = n; 3962 dtlck->index++; 3963 3964 /* open new linelock */ 3965 if (dtlck->index < dtlck->maxcnt) 3966 lv++; 3967 else { 3968 dtlck = (struct dt_lock *) txLinelock(dtlck); 3969 lv = & dtlck->lv[0]; 3970 } 3971 3972 lv->offset = fsi; 3973 n = 0; 3974 } 3975 3976 kname += len; 3977 len = min(klen, DTSLOTDATALEN); 3978 UniStrncpy_to_le(t->name, kname, len); 3979 3980 n++; 3981 xsi = fsi; 3982 klen -= len; 3983 } 3984 3985 /* close current linelock */ 3986 lv->length = n; 3987 dtlck->index++; 3988 3989 *dtlock = dtlck; 3990 3991 /* terminate last/only segment */ 3992 if (h == t) { 3993 /* single segment entry */ 3994 if (p->header.flag & BT_LEAF) 3995 lh->next = -1; 3996 else 3997 ih->next = -1; 3998 } else 3999 /* multi-segment entry */ 4000 t->next = -1; 4001 4002 /* if insert into middle, shift right succeeding entries in stbl */ 4003 stbl = DT_GETSTBL(p); 4004 nextindex = p->header.nextindex; 4005 if (index < nextindex) { 4006 memmove(stbl + index + 1, stbl + index, nextindex - index); 4007 4008 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 4009 s64 lblock; 4010 4011 /* 4012 * Need to update slot number for entries that moved 4013 * in the stbl 4014 */ 4015 mp = NULL; 4016 for (n = index + 1; n <= nextindex; n++) { 4017 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 4018 modify_index(data->leaf.tid, data->leaf.ip, 4019 le32_to_cpu(lh->index), bn, n, 4020 &mp, &lblock); 4021 } 4022 if (mp) 4023 release_metapage(mp); 4024 } 4025 } 4026 4027 stbl[index] = hsi; 4028 4029 /* advance next available entry index of stbl */ 4030 ++p->header.nextindex; 4031 } 4032 4033 4034 /* 4035 * dtMoveEntry() 4036 * 4037 * function: move entries from split/left page to new/right page 4038 * 4039 * nextindex of dst page and freelist/freecnt of both pages 4040 * are updated. 4041 */ 4042 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 4043 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 4044 int do_index) 4045 { 4046 int ssi, next; /* src slot index */ 4047 int di; /* dst entry index */ 4048 int dsi; /* dst slot index */ 4049 s8 *sstbl, *dstbl; /* sorted entry table */ 4050 int snamlen, len; 4051 struct ldtentry *slh, *dlh = NULL; 4052 struct idtentry *sih, *dih = NULL; 4053 struct dtslot *h, *s, *d; 4054 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 4055 struct lv *slv, *dlv; 4056 int xssi, ns, nd; 4057 int sfsi; 4058 4059 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 4060 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 4061 4062 dsi = dp->header.freelist; /* first (whole page) free slot */ 4063 sfsi = sp->header.freelist; 4064 4065 /* linelock destination entry slot */ 4066 dlv = & ddtlck->lv[ddtlck->index]; 4067 dlv->offset = dsi; 4068 4069 /* linelock source entry slot */ 4070 slv = & sdtlck->lv[sdtlck->index]; 4071 slv->offset = sstbl[si]; 4072 xssi = slv->offset - 1; 4073 4074 /* 4075 * move entries 4076 */ 4077 ns = nd = 0; 4078 for (di = 0; si < sp->header.nextindex; si++, di++) { 4079 ssi = sstbl[si]; 4080 dstbl[di] = dsi; 4081 4082 /* is next slot contiguous ? */ 4083 if (ssi != xssi + 1) { 4084 /* close current linelock */ 4085 slv->length = ns; 4086 sdtlck->index++; 4087 4088 /* open new linelock */ 4089 if (sdtlck->index < sdtlck->maxcnt) 4090 slv++; 4091 else { 4092 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 4093 slv = & sdtlck->lv[0]; 4094 } 4095 4096 slv->offset = ssi; 4097 ns = 0; 4098 } 4099 4100 /* 4101 * move head/only segment of an entry 4102 */ 4103 /* get dst slot */ 4104 h = d = &dp->slot[dsi]; 4105 4106 /* get src slot and move */ 4107 s = &sp->slot[ssi]; 4108 if (sp->header.flag & BT_LEAF) { 4109 /* get source entry */ 4110 slh = (struct ldtentry *) s; 4111 dlh = (struct ldtentry *) h; 4112 snamlen = slh->namlen; 4113 4114 if (do_index) { 4115 len = min(snamlen, DTLHDRDATALEN); 4116 dlh->index = slh->index; /* little-endian */ 4117 } else 4118 len = min(snamlen, DTLHDRDATALEN_LEGACY); 4119 4120 memcpy(dlh, slh, 6 + len * 2); 4121 4122 next = slh->next; 4123 4124 /* update dst head/only segment next field */ 4125 dsi++; 4126 dlh->next = dsi; 4127 } else { 4128 sih = (struct idtentry *) s; 4129 snamlen = sih->namlen; 4130 4131 len = min(snamlen, DTIHDRDATALEN); 4132 dih = (struct idtentry *) h; 4133 memcpy(dih, sih, 10 + len * 2); 4134 next = sih->next; 4135 4136 dsi++; 4137 dih->next = dsi; 4138 } 4139 4140 /* free src head/only segment */ 4141 s->next = sfsi; 4142 s->cnt = 1; 4143 sfsi = ssi; 4144 4145 ns++; 4146 nd++; 4147 xssi = ssi; 4148 4149 /* 4150 * move additional segment(s) of the entry 4151 */ 4152 snamlen -= len; 4153 while ((ssi = next) >= 0) { 4154 /* is next slot contiguous ? */ 4155 if (ssi != xssi + 1) { 4156 /* close current linelock */ 4157 slv->length = ns; 4158 sdtlck->index++; 4159 4160 /* open new linelock */ 4161 if (sdtlck->index < sdtlck->maxcnt) 4162 slv++; 4163 else { 4164 sdtlck = 4165 (struct dt_lock *) 4166 txLinelock(sdtlck); 4167 slv = & sdtlck->lv[0]; 4168 } 4169 4170 slv->offset = ssi; 4171 ns = 0; 4172 } 4173 4174 /* get next source segment */ 4175 s = &sp->slot[ssi]; 4176 4177 /* get next destination free slot */ 4178 d++; 4179 4180 len = min(snamlen, DTSLOTDATALEN); 4181 UniStrncpy_le(d->name, s->name, len); 4182 4183 ns++; 4184 nd++; 4185 xssi = ssi; 4186 4187 dsi++; 4188 d->next = dsi; 4189 4190 /* free source segment */ 4191 next = s->next; 4192 s->next = sfsi; 4193 s->cnt = 1; 4194 sfsi = ssi; 4195 4196 snamlen -= len; 4197 } /* end while */ 4198 4199 /* terminate dst last/only segment */ 4200 if (h == d) { 4201 /* single segment entry */ 4202 if (dp->header.flag & BT_LEAF) 4203 dlh->next = -1; 4204 else 4205 dih->next = -1; 4206 } else 4207 /* multi-segment entry */ 4208 d->next = -1; 4209 } /* end for */ 4210 4211 /* close current linelock */ 4212 slv->length = ns; 4213 sdtlck->index++; 4214 *sdtlock = sdtlck; 4215 4216 dlv->length = nd; 4217 ddtlck->index++; 4218 *ddtlock = ddtlck; 4219 4220 /* update source header */ 4221 sp->header.freelist = sfsi; 4222 sp->header.freecnt += nd; 4223 4224 /* update destination header */ 4225 dp->header.nextindex = di; 4226 4227 dp->header.freelist = dsi; 4228 dp->header.freecnt -= nd; 4229 } 4230 4231 4232 /* 4233 * dtDeleteEntry() 4234 * 4235 * function: free a (leaf/internal) entry 4236 * 4237 * log freelist header, stbl, and each segment slot of entry 4238 * (even though last/only segment next field is modified, 4239 * physical image logging requires all segment slots of 4240 * the entry logged to avoid applying previous updates 4241 * to the same slots) 4242 */ 4243 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 4244 { 4245 int fsi; /* free entry slot index */ 4246 s8 *stbl; 4247 struct dtslot *t; 4248 int si, freecnt; 4249 struct dt_lock *dtlck = *dtlock; 4250 struct lv *lv; 4251 int xsi, n; 4252 4253 /* get free entry slot index */ 4254 stbl = DT_GETSTBL(p); 4255 fsi = stbl[fi]; 4256 4257 /* open new linelock */ 4258 if (dtlck->index >= dtlck->maxcnt) 4259 dtlck = (struct dt_lock *) txLinelock(dtlck); 4260 lv = & dtlck->lv[dtlck->index]; 4261 4262 lv->offset = fsi; 4263 4264 /* get the head/only segment */ 4265 t = &p->slot[fsi]; 4266 if (p->header.flag & BT_LEAF) 4267 si = ((struct ldtentry *) t)->next; 4268 else 4269 si = ((struct idtentry *) t)->next; 4270 t->next = si; 4271 t->cnt = 1; 4272 4273 n = freecnt = 1; 4274 xsi = fsi; 4275 4276 /* find the last/only segment */ 4277 while (si >= 0) { 4278 /* is next slot contiguous ? */ 4279 if (si != xsi + 1) { 4280 /* close current linelock */ 4281 lv->length = n; 4282 dtlck->index++; 4283 4284 /* open new linelock */ 4285 if (dtlck->index < dtlck->maxcnt) 4286 lv++; 4287 else { 4288 dtlck = (struct dt_lock *) txLinelock(dtlck); 4289 lv = & dtlck->lv[0]; 4290 } 4291 4292 lv->offset = si; 4293 n = 0; 4294 } 4295 4296 n++; 4297 xsi = si; 4298 freecnt++; 4299 4300 t = &p->slot[si]; 4301 t->cnt = 1; 4302 si = t->next; 4303 } 4304 4305 /* close current linelock */ 4306 lv->length = n; 4307 dtlck->index++; 4308 4309 *dtlock = dtlck; 4310 4311 /* update freelist */ 4312 t->next = p->header.freelist; 4313 p->header.freelist = fsi; 4314 p->header.freecnt += freecnt; 4315 4316 /* if delete from middle, 4317 * shift left the succedding entries in the stbl 4318 */ 4319 si = p->header.nextindex; 4320 if (fi < si - 1) 4321 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4322 4323 p->header.nextindex--; 4324 } 4325 4326 4327 /* 4328 * dtTruncateEntry() 4329 * 4330 * function: truncate a (leaf/internal) entry 4331 * 4332 * log freelist header, stbl, and each segment slot of entry 4333 * (even though last/only segment next field is modified, 4334 * physical image logging requires all segment slots of 4335 * the entry logged to avoid applying previous updates 4336 * to the same slots) 4337 */ 4338 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4339 { 4340 int tsi; /* truncate entry slot index */ 4341 s8 *stbl; 4342 struct dtslot *t; 4343 int si, freecnt; 4344 struct dt_lock *dtlck = *dtlock; 4345 struct lv *lv; 4346 int fsi, xsi, n; 4347 4348 /* get free entry slot index */ 4349 stbl = DT_GETSTBL(p); 4350 tsi = stbl[ti]; 4351 4352 /* open new linelock */ 4353 if (dtlck->index >= dtlck->maxcnt) 4354 dtlck = (struct dt_lock *) txLinelock(dtlck); 4355 lv = & dtlck->lv[dtlck->index]; 4356 4357 lv->offset = tsi; 4358 4359 /* get the head/only segment */ 4360 t = &p->slot[tsi]; 4361 ASSERT(p->header.flag & BT_INTERNAL); 4362 ((struct idtentry *) t)->namlen = 0; 4363 si = ((struct idtentry *) t)->next; 4364 ((struct idtentry *) t)->next = -1; 4365 4366 n = 1; 4367 freecnt = 0; 4368 fsi = si; 4369 xsi = tsi; 4370 4371 /* find the last/only segment */ 4372 while (si >= 0) { 4373 /* is next slot contiguous ? */ 4374 if (si != xsi + 1) { 4375 /* close current linelock */ 4376 lv->length = n; 4377 dtlck->index++; 4378 4379 /* open new linelock */ 4380 if (dtlck->index < dtlck->maxcnt) 4381 lv++; 4382 else { 4383 dtlck = (struct dt_lock *) txLinelock(dtlck); 4384 lv = & dtlck->lv[0]; 4385 } 4386 4387 lv->offset = si; 4388 n = 0; 4389 } 4390 4391 n++; 4392 xsi = si; 4393 freecnt++; 4394 4395 t = &p->slot[si]; 4396 t->cnt = 1; 4397 si = t->next; 4398 } 4399 4400 /* close current linelock */ 4401 lv->length = n; 4402 dtlck->index++; 4403 4404 *dtlock = dtlck; 4405 4406 /* update freelist */ 4407 if (freecnt == 0) 4408 return; 4409 t->next = p->header.freelist; 4410 p->header.freelist = fsi; 4411 p->header.freecnt += freecnt; 4412 } 4413 4414 4415 /* 4416 * dtLinelockFreelist() 4417 */ 4418 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4419 int m, /* max slot index */ 4420 struct dt_lock ** dtlock) 4421 { 4422 int fsi; /* free entry slot index */ 4423 struct dtslot *t; 4424 int si; 4425 struct dt_lock *dtlck = *dtlock; 4426 struct lv *lv; 4427 int xsi, n; 4428 4429 /* get free entry slot index */ 4430 fsi = p->header.freelist; 4431 4432 /* open new linelock */ 4433 if (dtlck->index >= dtlck->maxcnt) 4434 dtlck = (struct dt_lock *) txLinelock(dtlck); 4435 lv = & dtlck->lv[dtlck->index]; 4436 4437 lv->offset = fsi; 4438 4439 n = 1; 4440 xsi = fsi; 4441 4442 t = &p->slot[fsi]; 4443 si = t->next; 4444 4445 /* find the last/only segment */ 4446 while (si < m && si >= 0) { 4447 /* is next slot contiguous ? */ 4448 if (si != xsi + 1) { 4449 /* close current linelock */ 4450 lv->length = n; 4451 dtlck->index++; 4452 4453 /* open new linelock */ 4454 if (dtlck->index < dtlck->maxcnt) 4455 lv++; 4456 else { 4457 dtlck = (struct dt_lock *) txLinelock(dtlck); 4458 lv = & dtlck->lv[0]; 4459 } 4460 4461 lv->offset = si; 4462 n = 0; 4463 } 4464 4465 n++; 4466 xsi = si; 4467 4468 t = &p->slot[si]; 4469 si = t->next; 4470 } 4471 4472 /* close current linelock */ 4473 lv->length = n; 4474 dtlck->index++; 4475 4476 *dtlock = dtlck; 4477 } 4478 4479 4480 /* 4481 * NAME: dtModify 4482 * 4483 * FUNCTION: Modify the inode number part of a directory entry 4484 * 4485 * PARAMETERS: 4486 * tid - Transaction id 4487 * ip - Inode of parent directory 4488 * key - Name of entry to be modified 4489 * orig_ino - Original inode number expected in entry 4490 * new_ino - New inode number to put into entry 4491 * flag - JFS_RENAME 4492 * 4493 * RETURNS: 4494 * -ESTALE - If entry found does not match orig_ino passed in 4495 * -ENOENT - If no entry can be found to match key 4496 * 0 - If successfully modified entry 4497 */ 4498 int dtModify(tid_t tid, struct inode *ip, 4499 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4500 { 4501 int rc; 4502 s64 bn; 4503 struct metapage *mp; 4504 dtpage_t *p; 4505 int index; 4506 struct btstack btstack; 4507 struct tlock *tlck; 4508 struct dt_lock *dtlck; 4509 struct lv *lv; 4510 s8 *stbl; 4511 int entry_si; /* entry slot index */ 4512 struct ldtentry *entry; 4513 4514 /* 4515 * search for the entry to modify: 4516 * 4517 * dtSearch() returns (leaf page pinned, index at which to modify). 4518 */ 4519 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4520 return rc; 4521 4522 /* retrieve search result */ 4523 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4524 4525 BT_MARK_DIRTY(mp, ip); 4526 /* 4527 * acquire a transaction lock on the leaf page of named entry 4528 */ 4529 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4530 dtlck = (struct dt_lock *) & tlck->lock; 4531 4532 /* get slot index of the entry */ 4533 stbl = DT_GETSTBL(p); 4534 entry_si = stbl[index]; 4535 4536 /* linelock entry */ 4537 ASSERT(dtlck->index == 0); 4538 lv = & dtlck->lv[0]; 4539 lv->offset = entry_si; 4540 lv->length = 1; 4541 dtlck->index++; 4542 4543 /* get the head/only segment */ 4544 entry = (struct ldtentry *) & p->slot[entry_si]; 4545 4546 /* substitute the inode number of the entry */ 4547 entry->inumber = cpu_to_le32(new_ino); 4548 4549 /* unpin the leaf page */ 4550 DT_PUTPAGE(mp); 4551 4552 return 0; 4553 } 4554 4555 #ifdef _JFS_DEBUG_DTREE 4556 /* 4557 * dtDisplayTree() 4558 * 4559 * function: traverse forward 4560 */ 4561 int dtDisplayTree(struct inode *ip) 4562 { 4563 int rc; 4564 struct metapage *mp; 4565 dtpage_t *p; 4566 s64 bn, pbn; 4567 int index, lastindex, v, h; 4568 pxd_t *xd; 4569 struct btstack btstack; 4570 struct btframe *btsp; 4571 struct btframe *parent; 4572 u8 *stbl; 4573 int psize = 256; 4574 4575 printk("display B+-tree.\n"); 4576 4577 /* clear stack */ 4578 btsp = btstack.stack; 4579 4580 /* 4581 * start with root 4582 * 4583 * root resides in the inode 4584 */ 4585 bn = 0; 4586 v = h = 0; 4587 4588 /* 4589 * first access of each page: 4590 */ 4591 newPage: 4592 DT_GETPAGE(ip, bn, mp, psize, p, rc); 4593 if (rc) 4594 return rc; 4595 4596 /* process entries forward from first index */ 4597 index = 0; 4598 lastindex = p->header.nextindex - 1; 4599 4600 if (p->header.flag & BT_INTERNAL) { 4601 /* 4602 * first access of each internal page 4603 */ 4604 printf("internal page "); 4605 dtDisplayPage(ip, bn, p); 4606 4607 goto getChild; 4608 } else { /* (p->header.flag & BT_LEAF) */ 4609 4610 /* 4611 * first access of each leaf page 4612 */ 4613 printf("leaf page "); 4614 dtDisplayPage(ip, bn, p); 4615 4616 /* 4617 * process leaf page entries 4618 * 4619 for ( ; index <= lastindex; index++) 4620 { 4621 } 4622 */ 4623 4624 /* unpin the leaf page */ 4625 DT_PUTPAGE(mp); 4626 } 4627 4628 /* 4629 * go back up to the parent page 4630 */ 4631 getParent: 4632 /* pop/restore parent entry for the current child page */ 4633 if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL) 4634 /* current page must have been root */ 4635 return; 4636 4637 /* 4638 * parent page scan completed 4639 */ 4640 if ((index = parent->index) == (lastindex = parent->lastindex)) { 4641 /* go back up to the parent page */ 4642 goto getParent; 4643 } 4644 4645 /* 4646 * parent page has entries remaining 4647 */ 4648 /* get back the parent page */ 4649 bn = parent->bn; 4650 /* v = parent->level; */ 4651 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4652 if (rc) 4653 return rc; 4654 4655 /* get next parent entry */ 4656 index++; 4657 4658 /* 4659 * internal page: go down to child page of current entry 4660 */ 4661 getChild: 4662 /* push/save current parent entry for the child page */ 4663 btsp->bn = pbn = bn; 4664 btsp->index = index; 4665 btsp->lastindex = lastindex; 4666 /* btsp->level = v; */ 4667 /* btsp->node = h; */ 4668 ++btsp; 4669 4670 /* get current entry for the child page */ 4671 stbl = DT_GETSTBL(p); 4672 xd = (pxd_t *) & p->slot[stbl[index]]; 4673 4674 /* 4675 * first access of each internal entry: 4676 */ 4677 4678 /* get child page */ 4679 bn = addressPXD(xd); 4680 psize = lengthPXD(xd) << ip->i_ipmnt->i_l2bsize; 4681 4682 printk("traverse down 0x%Lx[%d]->0x%Lx\n", pbn, index, bn); 4683 v++; 4684 h = index; 4685 4686 /* release parent page */ 4687 DT_PUTPAGE(mp); 4688 4689 /* process the child page */ 4690 goto newPage; 4691 } 4692 4693 4694 /* 4695 * dtDisplayPage() 4696 * 4697 * function: display page 4698 */ 4699 int dtDisplayPage(struct inode *ip, s64 bn, dtpage_t * p) 4700 { 4701 int rc; 4702 struct metapage *mp; 4703 struct ldtentry *lh; 4704 struct idtentry *ih; 4705 pxd_t *xd; 4706 int i, j; 4707 u8 *stbl; 4708 wchar_t name[JFS_NAME_MAX + 1]; 4709 struct component_name key = { 0, name }; 4710 int freepage = 0; 4711 4712 if (p == NULL) { 4713 freepage = 1; 4714 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 4715 if (rc) 4716 return rc; 4717 } 4718 4719 /* display page control */ 4720 printk("bn:0x%Lx flag:0x%08x nextindex:%d\n", 4721 bn, p->header.flag, p->header.nextindex); 4722 4723 /* display entries */ 4724 stbl = DT_GETSTBL(p); 4725 for (i = 0, j = 1; i < p->header.nextindex; i++, j++) { 4726 dtGetKey(p, i, &key, JFS_SBI(ip->i_sb)->mntflag); 4727 key.name[key.namlen] = '\0'; 4728 if (p->header.flag & BT_LEAF) { 4729 lh = (struct ldtentry *) & p->slot[stbl[i]]; 4730 printf("\t[%d] %s:%d", i, key.name, 4731 le32_to_cpu(lh->inumber)); 4732 } else { 4733 ih = (struct idtentry *) & p->slot[stbl[i]]; 4734 xd = (pxd_t *) ih; 4735 bn = addressPXD(xd); 4736 printf("\t[%d] %s:0x%Lx", i, key.name, bn); 4737 } 4738 4739 if (j == 4) { 4740 printf("\n"); 4741 j = 0; 4742 } 4743 } 4744 4745 printf("\n"); 4746 4747 if (freepage) 4748 DT_PUTPAGE(mp); 4749 4750 return 0; 4751 } 4752 #endif /* _JFS_DEBUG_DTREE */ 4753