1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_dtree.c: directory B+-tree manager 21 * 22 * B+-tree with variable length key directory: 23 * 24 * each directory page is structured as an array of 32-byte 25 * directory entry slots initialized as a freelist 26 * to avoid search/compaction of free space at insertion. 27 * when an entry is inserted, a number of slots are allocated 28 * from the freelist as required to store variable length data 29 * of the entry; when the entry is deleted, slots of the entry 30 * are returned to freelist. 31 * 32 * leaf entry stores full name as key and file serial number 33 * (aka inode number) as data. 34 * internal/router entry stores sufffix compressed name 35 * as key and simple extent descriptor as data. 36 * 37 * each directory page maintains a sorted entry index table 38 * which stores the start slot index of sorted entries 39 * to allow binary search on the table. 40 * 41 * directory starts as a root/leaf page in on-disk inode 42 * inline data area. 43 * when it becomes full, it starts a leaf of a external extent 44 * of length of 1 block. each time the first leaf becomes full, 45 * it is extended rather than split (its size is doubled), 46 * until its length becoms 4 KBytes, from then the extent is split 47 * with new 4 Kbyte extent when it becomes full 48 * to reduce external fragmentation of small directories. 49 * 50 * blah, blah, blah, for linear scan of directory in pieces by 51 * readdir(). 52 * 53 * 54 * case-insensitive directory file system 55 * 56 * names are stored in case-sensitive way in leaf entry. 57 * but stored, searched and compared in case-insensitive (uppercase) order 58 * (i.e., both search key and entry key are folded for search/compare): 59 * (note that case-sensitive order is BROKEN in storage, e.g., 60 * sensitive: Ad, aB, aC, aD -> insensitive: aB, aC, aD, Ad 61 * 62 * entries which folds to the same key makes up a equivalent class 63 * whose members are stored as contiguous cluster (may cross page boundary) 64 * but whose order is arbitrary and acts as duplicate, e.g., 65 * abc, Abc, aBc, abC) 66 * 67 * once match is found at leaf, requires scan forward/backward 68 * either for, in case-insensitive search, duplicate 69 * or for, in case-sensitive search, for exact match 70 * 71 * router entry must be created/stored in case-insensitive way 72 * in internal entry: 73 * (right most key of left page and left most key of right page 74 * are folded, and its suffix compression is propagated as router 75 * key in parent) 76 * (e.g., if split occurs <abc> and <aBd>, <ABD> trather than <aB> 77 * should be made the router key for the split) 78 * 79 * case-insensitive search: 80 * 81 * fold search key; 82 * 83 * case-insensitive search of B-tree: 84 * for internal entry, router key is already folded; 85 * for leaf entry, fold the entry key before comparison. 86 * 87 * if (leaf entry case-insensitive match found) 88 * if (next entry satisfies case-insensitive match) 89 * return EDUPLICATE; 90 * if (prev entry satisfies case-insensitive match) 91 * return EDUPLICATE; 92 * return match; 93 * else 94 * return no match; 95 * 96 * serialization: 97 * target directory inode lock is being held on entry/exit 98 * of all main directory service routines. 99 * 100 * log based recovery: 101 */ 102 103 #include <linux/fs.h> 104 #include <linux/quotaops.h> 105 #include <linux/slab.h> 106 #include "jfs_incore.h" 107 #include "jfs_superblock.h" 108 #include "jfs_filsys.h" 109 #include "jfs_metapage.h" 110 #include "jfs_dmap.h" 111 #include "jfs_unicode.h" 112 #include "jfs_debug.h" 113 114 /* dtree split parameter */ 115 struct dtsplit { 116 struct metapage *mp; 117 s16 index; 118 s16 nslot; 119 struct component_name *key; 120 ddata_t *data; 121 struct pxdlist *pxdlist; 122 }; 123 124 #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) 125 126 /* get page buffer for specified block address */ 127 #define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ 128 do { \ 129 BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ 130 if (!(RC)) { \ 131 if (((P)->header.nextindex > \ 132 (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ 133 ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ 134 BT_PUTPAGE(MP); \ 135 jfs_error((IP)->i_sb, \ 136 "DT_GETPAGE: dtree page corrupt\n"); \ 137 MP = NULL; \ 138 RC = -EIO; \ 139 } \ 140 } \ 141 } while (0) 142 143 /* for consistency */ 144 #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) 145 146 #define DT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ 147 BT_GETSEARCH(IP, LEAF, BN, MP, dtpage_t, P, INDEX, i_dtroot) 148 149 /* 150 * forward references 151 */ 152 static int dtSplitUp(tid_t tid, struct inode *ip, 153 struct dtsplit * split, struct btstack * btstack); 154 155 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 156 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rxdp); 157 158 static int dtExtendPage(tid_t tid, struct inode *ip, 159 struct dtsplit * split, struct btstack * btstack); 160 161 static int dtSplitRoot(tid_t tid, struct inode *ip, 162 struct dtsplit * split, struct metapage ** rmpp); 163 164 static int dtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp, 165 dtpage_t * fp, struct btstack * btstack); 166 167 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p); 168 169 static int dtReadFirst(struct inode *ip, struct btstack * btstack); 170 171 static int dtReadNext(struct inode *ip, 172 loff_t * offset, struct btstack * btstack); 173 174 static int dtCompare(struct component_name * key, dtpage_t * p, int si); 175 176 static int ciCompare(struct component_name * key, dtpage_t * p, int si, 177 int flag); 178 179 static void dtGetKey(dtpage_t * p, int i, struct component_name * key, 180 int flag); 181 182 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 183 int ri, struct component_name * key, int flag); 184 185 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 186 ddata_t * data, struct dt_lock **); 187 188 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 189 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 190 int do_index); 191 192 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock); 193 194 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock); 195 196 static void dtLinelockFreelist(dtpage_t * p, int m, struct dt_lock ** dtlock); 197 198 #define ciToUpper(c) UniStrupr((c)->name) 199 200 /* 201 * read_index_page() 202 * 203 * Reads a page of a directory's index table. 204 * Having metadata mapped into the directory inode's address space 205 * presents a multitude of problems. We avoid this by mapping to 206 * the absolute address space outside of the *_metapage routines 207 */ 208 static struct metapage *read_index_page(struct inode *inode, s64 blkno) 209 { 210 int rc; 211 s64 xaddr; 212 int xflag; 213 s32 xlen; 214 215 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 216 if (rc || (xaddr == 0)) 217 return NULL; 218 219 return read_metapage(inode, xaddr, PSIZE, 1); 220 } 221 222 /* 223 * get_index_page() 224 * 225 * Same as get_index_page(), but get's a new page without reading 226 */ 227 static struct metapage *get_index_page(struct inode *inode, s64 blkno) 228 { 229 int rc; 230 s64 xaddr; 231 int xflag; 232 s32 xlen; 233 234 rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1); 235 if (rc || (xaddr == 0)) 236 return NULL; 237 238 return get_metapage(inode, xaddr, PSIZE, 1); 239 } 240 241 /* 242 * find_index() 243 * 244 * Returns dtree page containing directory table entry for specified 245 * index and pointer to its entry. 246 * 247 * mp must be released by caller. 248 */ 249 static struct dir_table_slot *find_index(struct inode *ip, u32 index, 250 struct metapage ** mp, s64 *lblock) 251 { 252 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 253 s64 blkno; 254 s64 offset; 255 int page_offset; 256 struct dir_table_slot *slot; 257 static int maxWarnings = 10; 258 259 if (index < 2) { 260 if (maxWarnings) { 261 jfs_warn("find_entry called with index = %d", index); 262 maxWarnings--; 263 } 264 return NULL; 265 } 266 267 if (index >= jfs_ip->next_index) { 268 jfs_warn("find_entry called with index >= next_index"); 269 return NULL; 270 } 271 272 if (jfs_dirtable_inline(ip)) { 273 /* 274 * Inline directory table 275 */ 276 *mp = NULL; 277 slot = &jfs_ip->i_dirtable[index - 2]; 278 } else { 279 offset = (index - 2) * sizeof(struct dir_table_slot); 280 page_offset = offset & (PSIZE - 1); 281 blkno = ((offset + 1) >> L2PSIZE) << 282 JFS_SBI(ip->i_sb)->l2nbperpage; 283 284 if (*mp && (*lblock != blkno)) { 285 release_metapage(*mp); 286 *mp = NULL; 287 } 288 if (!(*mp)) { 289 *lblock = blkno; 290 *mp = read_index_page(ip, blkno); 291 } 292 if (!(*mp)) { 293 jfs_err("free_index: error reading directory table"); 294 return NULL; 295 } 296 297 slot = 298 (struct dir_table_slot *) ((char *) (*mp)->data + 299 page_offset); 300 } 301 return slot; 302 } 303 304 static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, 305 u32 index) 306 { 307 struct tlock *tlck; 308 struct linelock *llck; 309 struct lv *lv; 310 311 tlck = txLock(tid, ip, mp, tlckDATA); 312 llck = (struct linelock *) tlck->lock; 313 314 if (llck->index >= llck->maxcnt) 315 llck = txLinelock(llck); 316 lv = &llck->lv[llck->index]; 317 318 /* 319 * Linelock slot size is twice the size of directory table 320 * slot size. 512 entries per page. 321 */ 322 lv->offset = ((index - 2) & 511) >> 1; 323 lv->length = 1; 324 llck->index++; 325 } 326 327 /* 328 * add_index() 329 * 330 * Adds an entry to the directory index table. This is used to provide 331 * each directory entry with a persistent index in which to resume 332 * directory traversals 333 */ 334 static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) 335 { 336 struct super_block *sb = ip->i_sb; 337 struct jfs_sb_info *sbi = JFS_SBI(sb); 338 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 339 u64 blkno; 340 struct dir_table_slot *dirtab_slot; 341 u32 index; 342 struct linelock *llck; 343 struct lv *lv; 344 struct metapage *mp; 345 s64 offset; 346 uint page_offset; 347 struct tlock *tlck; 348 s64 xaddr; 349 350 ASSERT(DO_INDEX(ip)); 351 352 if (jfs_ip->next_index < 2) { 353 jfs_warn("add_index: next_index = %d. Resetting!", 354 jfs_ip->next_index); 355 jfs_ip->next_index = 2; 356 } 357 358 index = jfs_ip->next_index++; 359 360 if (index <= MAX_INLINE_DIRTABLE_ENTRY) { 361 /* 362 * i_size reflects size of index table, or 8 bytes per entry. 363 */ 364 ip->i_size = (loff_t) (index - 1) << 3; 365 366 /* 367 * dir table fits inline within inode 368 */ 369 dirtab_slot = &jfs_ip->i_dirtable[index-2]; 370 dirtab_slot->flag = DIR_INDEX_VALID; 371 dirtab_slot->slot = slot; 372 DTSaddress(dirtab_slot, bn); 373 374 set_cflag(COMMIT_Dirtable, ip); 375 376 return index; 377 } 378 if (index == (MAX_INLINE_DIRTABLE_ENTRY + 1)) { 379 struct dir_table_slot temp_table[12]; 380 381 /* 382 * It's time to move the inline table to an external 383 * page and begin to build the xtree 384 */ 385 if (dquot_alloc_block(ip, sbi->nbperpage)) 386 goto clean_up; 387 if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { 388 dquot_free_block(ip, sbi->nbperpage); 389 goto clean_up; 390 } 391 392 /* 393 * Save the table, we're going to overwrite it with the 394 * xtree root 395 */ 396 memcpy(temp_table, &jfs_ip->i_dirtable, sizeof(temp_table)); 397 398 /* 399 * Initialize empty x-tree 400 */ 401 xtInitRoot(tid, ip); 402 403 /* 404 * Add the first block to the xtree 405 */ 406 if (xtInsert(tid, ip, 0, 0, sbi->nbperpage, &xaddr, 0)) { 407 /* This really shouldn't fail */ 408 jfs_warn("add_index: xtInsert failed!"); 409 memcpy(&jfs_ip->i_dirtable, temp_table, 410 sizeof (temp_table)); 411 dbFree(ip, xaddr, sbi->nbperpage); 412 dquot_free_block(ip, sbi->nbperpage); 413 goto clean_up; 414 } 415 ip->i_size = PSIZE; 416 417 mp = get_index_page(ip, 0); 418 if (!mp) { 419 jfs_err("add_index: get_metapage failed!"); 420 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 421 memcpy(&jfs_ip->i_dirtable, temp_table, 422 sizeof (temp_table)); 423 goto clean_up; 424 } 425 tlck = txLock(tid, ip, mp, tlckDATA); 426 llck = (struct linelock *) & tlck->lock; 427 ASSERT(llck->index == 0); 428 lv = &llck->lv[0]; 429 430 lv->offset = 0; 431 lv->length = 6; /* tlckDATA slot size is 16 bytes */ 432 llck->index++; 433 434 memcpy(mp->data, temp_table, sizeof(temp_table)); 435 436 mark_metapage_dirty(mp); 437 release_metapage(mp); 438 439 /* 440 * Logging is now directed by xtree tlocks 441 */ 442 clear_cflag(COMMIT_Dirtable, ip); 443 } 444 445 offset = (index - 2) * sizeof(struct dir_table_slot); 446 page_offset = offset & (PSIZE - 1); 447 blkno = ((offset + 1) >> L2PSIZE) << sbi->l2nbperpage; 448 if (page_offset == 0) { 449 /* 450 * This will be the beginning of a new page 451 */ 452 xaddr = 0; 453 if (xtInsert(tid, ip, 0, blkno, sbi->nbperpage, &xaddr, 0)) { 454 jfs_warn("add_index: xtInsert failed!"); 455 goto clean_up; 456 } 457 ip->i_size += PSIZE; 458 459 if ((mp = get_index_page(ip, blkno))) 460 memset(mp->data, 0, PSIZE); /* Just looks better */ 461 else 462 xtTruncate(tid, ip, offset, COMMIT_PWMAP); 463 } else 464 mp = read_index_page(ip, blkno); 465 466 if (!mp) { 467 jfs_err("add_index: get/read_metapage failed!"); 468 goto clean_up; 469 } 470 471 lock_index(tid, ip, mp, index); 472 473 dirtab_slot = 474 (struct dir_table_slot *) ((char *) mp->data + page_offset); 475 dirtab_slot->flag = DIR_INDEX_VALID; 476 dirtab_slot->slot = slot; 477 DTSaddress(dirtab_slot, bn); 478 479 mark_metapage_dirty(mp); 480 release_metapage(mp); 481 482 return index; 483 484 clean_up: 485 486 jfs_ip->next_index--; 487 488 return 0; 489 } 490 491 /* 492 * free_index() 493 * 494 * Marks an entry to the directory index table as free. 495 */ 496 static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) 497 { 498 struct dir_table_slot *dirtab_slot; 499 s64 lblock; 500 struct metapage *mp = NULL; 501 502 dirtab_slot = find_index(ip, index, &mp, &lblock); 503 504 if (!dirtab_slot) 505 return; 506 507 dirtab_slot->flag = DIR_INDEX_FREE; 508 dirtab_slot->slot = dirtab_slot->addr1 = 0; 509 dirtab_slot->addr2 = cpu_to_le32(next); 510 511 if (mp) { 512 lock_index(tid, ip, mp, index); 513 mark_metapage_dirty(mp); 514 release_metapage(mp); 515 } else 516 set_cflag(COMMIT_Dirtable, ip); 517 } 518 519 /* 520 * modify_index() 521 * 522 * Changes an entry in the directory index table 523 */ 524 static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, 525 int slot, struct metapage ** mp, s64 *lblock) 526 { 527 struct dir_table_slot *dirtab_slot; 528 529 dirtab_slot = find_index(ip, index, mp, lblock); 530 531 if (!dirtab_slot) 532 return; 533 534 DTSaddress(dirtab_slot, bn); 535 dirtab_slot->slot = slot; 536 537 if (*mp) { 538 lock_index(tid, ip, *mp, index); 539 mark_metapage_dirty(*mp); 540 } else 541 set_cflag(COMMIT_Dirtable, ip); 542 } 543 544 /* 545 * read_index() 546 * 547 * reads a directory table slot 548 */ 549 static int read_index(struct inode *ip, u32 index, 550 struct dir_table_slot * dirtab_slot) 551 { 552 s64 lblock; 553 struct metapage *mp = NULL; 554 struct dir_table_slot *slot; 555 556 slot = find_index(ip, index, &mp, &lblock); 557 if (!slot) { 558 return -EIO; 559 } 560 561 memcpy(dirtab_slot, slot, sizeof(struct dir_table_slot)); 562 563 if (mp) 564 release_metapage(mp); 565 566 return 0; 567 } 568 569 /* 570 * dtSearch() 571 * 572 * function: 573 * Search for the entry with specified key 574 * 575 * parameter: 576 * 577 * return: 0 - search result on stack, leaf page pinned; 578 * errno - I/O error 579 */ 580 int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, 581 struct btstack * btstack, int flag) 582 { 583 int rc = 0; 584 int cmp = 1; /* init for empty page */ 585 s64 bn; 586 struct metapage *mp; 587 dtpage_t *p; 588 s8 *stbl; 589 int base, index, lim; 590 struct btframe *btsp; 591 pxd_t *pxd; 592 int psize = 288; /* initial in-line directory */ 593 ino_t inumber; 594 struct component_name ciKey; 595 struct super_block *sb = ip->i_sb; 596 597 ciKey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 598 GFP_NOFS); 599 if (!ciKey.name) { 600 rc = -ENOMEM; 601 goto dtSearch_Exit2; 602 } 603 604 605 /* uppercase search key for c-i directory */ 606 UniStrcpy(ciKey.name, key->name); 607 ciKey.namlen = key->namlen; 608 609 /* only uppercase if case-insensitive support is on */ 610 if ((JFS_SBI(sb)->mntflag & JFS_OS2) == JFS_OS2) { 611 ciToUpper(&ciKey); 612 } 613 BT_CLR(btstack); /* reset stack */ 614 615 /* init level count for max pages to split */ 616 btstack->nsplit = 1; 617 618 /* 619 * search down tree from root: 620 * 621 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of 622 * internal page, child page Pi contains entry with k, Ki <= K < Kj. 623 * 624 * if entry with search key K is not found 625 * internal page search find the entry with largest key Ki 626 * less than K which point to the child page to search; 627 * leaf page search find the entry with smallest key Kj 628 * greater than K so that the returned index is the position of 629 * the entry to be shifted right for insertion of new entry. 630 * for empty tree, search key is greater than any key of the tree. 631 * 632 * by convention, root bn = 0. 633 */ 634 for (bn = 0;;) { 635 /* get/pin the page to search */ 636 DT_GETPAGE(ip, bn, mp, psize, p, rc); 637 if (rc) 638 goto dtSearch_Exit1; 639 640 /* get sorted entry table of the page */ 641 stbl = DT_GETSTBL(p); 642 643 /* 644 * binary search with search key K on the current page. 645 */ 646 for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) { 647 index = base + (lim >> 1); 648 649 if (p->header.flag & BT_LEAF) { 650 /* uppercase leaf name to compare */ 651 cmp = 652 ciCompare(&ciKey, p, stbl[index], 653 JFS_SBI(sb)->mntflag); 654 } else { 655 /* router key is in uppercase */ 656 657 cmp = dtCompare(&ciKey, p, stbl[index]); 658 659 660 } 661 if (cmp == 0) { 662 /* 663 * search hit 664 */ 665 /* search hit - leaf page: 666 * return the entry found 667 */ 668 if (p->header.flag & BT_LEAF) { 669 inumber = le32_to_cpu( 670 ((struct ldtentry *) & p->slot[stbl[index]])->inumber); 671 672 /* 673 * search for JFS_LOOKUP 674 */ 675 if (flag == JFS_LOOKUP) { 676 *data = inumber; 677 rc = 0; 678 goto out; 679 } 680 681 /* 682 * search for JFS_CREATE 683 */ 684 if (flag == JFS_CREATE) { 685 *data = inumber; 686 rc = -EEXIST; 687 goto out; 688 } 689 690 /* 691 * search for JFS_REMOVE or JFS_RENAME 692 */ 693 if ((flag == JFS_REMOVE || 694 flag == JFS_RENAME) && 695 *data != inumber) { 696 rc = -ESTALE; 697 goto out; 698 } 699 700 /* 701 * JFS_REMOVE|JFS_FINDDIR|JFS_RENAME 702 */ 703 /* save search result */ 704 *data = inumber; 705 btsp = btstack->top; 706 btsp->bn = bn; 707 btsp->index = index; 708 btsp->mp = mp; 709 710 rc = 0; 711 goto dtSearch_Exit1; 712 } 713 714 /* search hit - internal page: 715 * descend/search its child page 716 */ 717 goto getChild; 718 } 719 720 if (cmp > 0) { 721 base = index + 1; 722 --lim; 723 } 724 } 725 726 /* 727 * search miss 728 * 729 * base is the smallest index with key (Kj) greater than 730 * search key (K) and may be zero or (maxindex + 1) index. 731 */ 732 /* 733 * search miss - leaf page 734 * 735 * return location of entry (base) where new entry with 736 * search key K is to be inserted. 737 */ 738 if (p->header.flag & BT_LEAF) { 739 /* 740 * search for JFS_LOOKUP, JFS_REMOVE, or JFS_RENAME 741 */ 742 if (flag == JFS_LOOKUP || flag == JFS_REMOVE || 743 flag == JFS_RENAME) { 744 rc = -ENOENT; 745 goto out; 746 } 747 748 /* 749 * search for JFS_CREATE|JFS_FINDDIR: 750 * 751 * save search result 752 */ 753 *data = 0; 754 btsp = btstack->top; 755 btsp->bn = bn; 756 btsp->index = base; 757 btsp->mp = mp; 758 759 rc = 0; 760 goto dtSearch_Exit1; 761 } 762 763 /* 764 * search miss - internal page 765 * 766 * if base is non-zero, decrement base by one to get the parent 767 * entry of the child page to search. 768 */ 769 index = base ? base - 1 : base; 770 771 /* 772 * go down to child page 773 */ 774 getChild: 775 /* update max. number of pages to split */ 776 if (BT_STACK_FULL(btstack)) { 777 /* Something's corrupted, mark filesystem dirty so 778 * chkdsk will fix it. 779 */ 780 jfs_error(sb, "stack overrun!\n"); 781 BT_STACK_DUMP(btstack); 782 rc = -EIO; 783 goto out; 784 } 785 btstack->nsplit++; 786 787 /* push (bn, index) of the parent page/entry */ 788 BT_PUSH(btstack, bn, index); 789 790 /* get the child page block number */ 791 pxd = (pxd_t *) & p->slot[stbl[index]]; 792 bn = addressPXD(pxd); 793 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 794 795 /* unpin the parent page */ 796 DT_PUTPAGE(mp); 797 } 798 799 out: 800 DT_PUTPAGE(mp); 801 802 dtSearch_Exit1: 803 804 kfree(ciKey.name); 805 806 dtSearch_Exit2: 807 808 return rc; 809 } 810 811 812 /* 813 * dtInsert() 814 * 815 * function: insert an entry to directory tree 816 * 817 * parameter: 818 * 819 * return: 0 - success; 820 * errno - failure; 821 */ 822 int dtInsert(tid_t tid, struct inode *ip, 823 struct component_name * name, ino_t * fsn, struct btstack * btstack) 824 { 825 int rc = 0; 826 struct metapage *mp; /* meta-page buffer */ 827 dtpage_t *p; /* base B+-tree index page */ 828 s64 bn; 829 int index; 830 struct dtsplit split; /* split information */ 831 ddata_t data; 832 struct dt_lock *dtlck; 833 int n; 834 struct tlock *tlck; 835 struct lv *lv; 836 837 /* 838 * retrieve search result 839 * 840 * dtSearch() returns (leaf page pinned, index at which to insert). 841 * n.b. dtSearch() may return index of (maxindex + 1) of 842 * the full page. 843 */ 844 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 845 846 /* 847 * insert entry for new key 848 */ 849 if (DO_INDEX(ip)) { 850 if (JFS_IP(ip)->next_index == DIREND) { 851 DT_PUTPAGE(mp); 852 return -EMLINK; 853 } 854 n = NDTLEAF(name->namlen); 855 data.leaf.tid = tid; 856 data.leaf.ip = ip; 857 } else { 858 n = NDTLEAF_LEGACY(name->namlen); 859 data.leaf.ip = NULL; /* signifies legacy directory format */ 860 } 861 data.leaf.ino = *fsn; 862 863 /* 864 * leaf page does not have enough room for new entry: 865 * 866 * extend/split the leaf page; 867 * 868 * dtSplitUp() will insert the entry and unpin the leaf page. 869 */ 870 if (n > p->header.freecnt) { 871 split.mp = mp; 872 split.index = index; 873 split.nslot = n; 874 split.key = name; 875 split.data = &data; 876 rc = dtSplitUp(tid, ip, &split, btstack); 877 return rc; 878 } 879 880 /* 881 * leaf page does have enough room for new entry: 882 * 883 * insert the new data entry into the leaf page; 884 */ 885 BT_MARK_DIRTY(mp, ip); 886 /* 887 * acquire a transaction lock on the leaf page 888 */ 889 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 890 dtlck = (struct dt_lock *) & tlck->lock; 891 ASSERT(dtlck->index == 0); 892 lv = & dtlck->lv[0]; 893 894 /* linelock header */ 895 lv->offset = 0; 896 lv->length = 1; 897 dtlck->index++; 898 899 dtInsertEntry(p, index, name, &data, &dtlck); 900 901 /* linelock stbl of non-root leaf page */ 902 if (!(p->header.flag & BT_ROOT)) { 903 if (dtlck->index >= dtlck->maxcnt) 904 dtlck = (struct dt_lock *) txLinelock(dtlck); 905 lv = & dtlck->lv[dtlck->index]; 906 n = index >> L2DTSLOTSIZE; 907 lv->offset = p->header.stblindex + n; 908 lv->length = 909 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 910 dtlck->index++; 911 } 912 913 /* unpin the leaf page */ 914 DT_PUTPAGE(mp); 915 916 return 0; 917 } 918 919 920 /* 921 * dtSplitUp() 922 * 923 * function: propagate insertion bottom up; 924 * 925 * parameter: 926 * 927 * return: 0 - success; 928 * errno - failure; 929 * leaf page unpinned; 930 */ 931 static int dtSplitUp(tid_t tid, 932 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 933 { 934 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 935 int rc = 0; 936 struct metapage *smp; 937 dtpage_t *sp; /* split page */ 938 struct metapage *rmp; 939 dtpage_t *rp; /* new right page split from sp */ 940 pxd_t rpxd; /* new right page extent descriptor */ 941 struct metapage *lmp; 942 dtpage_t *lp; /* left child page */ 943 int skip; /* index of entry of insertion */ 944 struct btframe *parent; /* parent page entry on traverse stack */ 945 s64 xaddr, nxaddr; 946 int xlen, xsize; 947 struct pxdlist pxdlist; 948 pxd_t *pxd; 949 struct component_name key = { 0, NULL }; 950 ddata_t *data = split->data; 951 int n; 952 struct dt_lock *dtlck; 953 struct tlock *tlck; 954 struct lv *lv; 955 int quota_allocation = 0; 956 957 /* get split page */ 958 smp = split->mp; 959 sp = DT_PAGE(ip, smp); 960 961 key.name = kmalloc_array(JFS_NAME_MAX + 2, sizeof(wchar_t), GFP_NOFS); 962 if (!key.name) { 963 DT_PUTPAGE(smp); 964 rc = -ENOMEM; 965 goto dtSplitUp_Exit; 966 } 967 968 /* 969 * split leaf page 970 * 971 * The split routines insert the new entry, and 972 * acquire txLock as appropriate. 973 */ 974 /* 975 * split root leaf page: 976 */ 977 if (sp->header.flag & BT_ROOT) { 978 /* 979 * allocate a single extent child page 980 */ 981 xlen = 1; 982 n = sbi->bsize >> L2DTSLOTSIZE; 983 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 984 n -= DTROOTMAXSLOT - sp->header.freecnt; /* header + entries */ 985 if (n <= split->nslot) 986 xlen++; 987 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr))) { 988 DT_PUTPAGE(smp); 989 goto freeKeyName; 990 } 991 992 pxdlist.maxnpxd = 1; 993 pxdlist.npxd = 0; 994 pxd = &pxdlist.pxd[0]; 995 PXDaddress(pxd, xaddr); 996 PXDlength(pxd, xlen); 997 split->pxdlist = &pxdlist; 998 rc = dtSplitRoot(tid, ip, split, &rmp); 999 1000 if (rc) 1001 dbFree(ip, xaddr, xlen); 1002 else 1003 DT_PUTPAGE(rmp); 1004 1005 DT_PUTPAGE(smp); 1006 1007 if (!DO_INDEX(ip)) 1008 ip->i_size = xlen << sbi->l2bsize; 1009 1010 goto freeKeyName; 1011 } 1012 1013 /* 1014 * extend first leaf page 1015 * 1016 * extend the 1st extent if less than buffer page size 1017 * (dtExtendPage() reurns leaf page unpinned) 1018 */ 1019 pxd = &sp->header.self; 1020 xlen = lengthPXD(pxd); 1021 xsize = xlen << sbi->l2bsize; 1022 if (xsize < PSIZE) { 1023 xaddr = addressPXD(pxd); 1024 n = xsize >> L2DTSLOTSIZE; 1025 n -= (n + 31) >> L2DTSLOTSIZE; /* stbl size */ 1026 if ((n + sp->header.freecnt) <= split->nslot) 1027 n = xlen + (xlen << 1); 1028 else 1029 n = xlen; 1030 1031 /* Allocate blocks to quota. */ 1032 rc = dquot_alloc_block(ip, n); 1033 if (rc) 1034 goto extendOut; 1035 quota_allocation += n; 1036 1037 if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, 1038 (s64) n, &nxaddr))) 1039 goto extendOut; 1040 1041 pxdlist.maxnpxd = 1; 1042 pxdlist.npxd = 0; 1043 pxd = &pxdlist.pxd[0]; 1044 PXDaddress(pxd, nxaddr); 1045 PXDlength(pxd, xlen + n); 1046 split->pxdlist = &pxdlist; 1047 if ((rc = dtExtendPage(tid, ip, split, btstack))) { 1048 nxaddr = addressPXD(pxd); 1049 if (xaddr != nxaddr) { 1050 /* free relocated extent */ 1051 xlen = lengthPXD(pxd); 1052 dbFree(ip, nxaddr, (s64) xlen); 1053 } else { 1054 /* free extended delta */ 1055 xlen = lengthPXD(pxd) - n; 1056 xaddr = addressPXD(pxd) + xlen; 1057 dbFree(ip, xaddr, (s64) n); 1058 } 1059 } else if (!DO_INDEX(ip)) 1060 ip->i_size = lengthPXD(pxd) << sbi->l2bsize; 1061 1062 1063 extendOut: 1064 DT_PUTPAGE(smp); 1065 goto freeKeyName; 1066 } 1067 1068 /* 1069 * split leaf page <sp> into <sp> and a new right page <rp>. 1070 * 1071 * return <rp> pinned and its extent descriptor <rpxd> 1072 */ 1073 /* 1074 * allocate new directory page extent and 1075 * new index page(s) to cover page split(s) 1076 * 1077 * allocation hint: ? 1078 */ 1079 n = btstack->nsplit; 1080 pxdlist.maxnpxd = pxdlist.npxd = 0; 1081 xlen = sbi->nbperpage; 1082 for (pxd = pxdlist.pxd; n > 0; n--, pxd++) { 1083 if ((rc = dbAlloc(ip, 0, (s64) xlen, &xaddr)) == 0) { 1084 PXDaddress(pxd, xaddr); 1085 PXDlength(pxd, xlen); 1086 pxdlist.maxnpxd++; 1087 continue; 1088 } 1089 1090 DT_PUTPAGE(smp); 1091 1092 /* undo allocation */ 1093 goto splitOut; 1094 } 1095 1096 split->pxdlist = &pxdlist; 1097 if ((rc = dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd))) { 1098 DT_PUTPAGE(smp); 1099 1100 /* undo allocation */ 1101 goto splitOut; 1102 } 1103 1104 if (!DO_INDEX(ip)) 1105 ip->i_size += PSIZE; 1106 1107 /* 1108 * propagate up the router entry for the leaf page just split 1109 * 1110 * insert a router entry for the new page into the parent page, 1111 * propagate the insert/split up the tree by walking back the stack 1112 * of (bn of parent page, index of child page entry in parent page) 1113 * that were traversed during the search for the page that split. 1114 * 1115 * the propagation of insert/split up the tree stops if the root 1116 * splits or the page inserted into doesn't have to split to hold 1117 * the new entry. 1118 * 1119 * the parent entry for the split page remains the same, and 1120 * a new entry is inserted at its right with the first key and 1121 * block number of the new right page. 1122 * 1123 * There are a maximum of 4 pages pinned at any time: 1124 * two children, left parent and right parent (when the parent splits). 1125 * keep the child pages pinned while working on the parent. 1126 * make sure that all pins are released at exit. 1127 */ 1128 while ((parent = BT_POP(btstack)) != NULL) { 1129 /* parent page specified by stack frame <parent> */ 1130 1131 /* keep current child pages (<lp>, <rp>) pinned */ 1132 lmp = smp; 1133 lp = sp; 1134 1135 /* 1136 * insert router entry in parent for new right child page <rp> 1137 */ 1138 /* get the parent page <sp> */ 1139 DT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc); 1140 if (rc) { 1141 DT_PUTPAGE(lmp); 1142 DT_PUTPAGE(rmp); 1143 goto splitOut; 1144 } 1145 1146 /* 1147 * The new key entry goes ONE AFTER the index of parent entry, 1148 * because the split was to the right. 1149 */ 1150 skip = parent->index + 1; 1151 1152 /* 1153 * compute the key for the router entry 1154 * 1155 * key suffix compression: 1156 * for internal pages that have leaf pages as children, 1157 * retain only what's needed to distinguish between 1158 * the new entry and the entry on the page to its left. 1159 * If the keys compare equal, retain the entire key. 1160 * 1161 * note that compression is performed only at computing 1162 * router key at the lowest internal level. 1163 * further compression of the key between pairs of higher 1164 * level internal pages loses too much information and 1165 * the search may fail. 1166 * (e.g., two adjacent leaf pages of {a, ..., x} {xx, ...,} 1167 * results in two adjacent parent entries (a)(xx). 1168 * if split occurs between these two entries, and 1169 * if compression is applied, the router key of parent entry 1170 * of right page (x) will divert search for x into right 1171 * subtree and miss x in the left subtree.) 1172 * 1173 * the entire key must be retained for the next-to-leftmost 1174 * internal key at any level of the tree, or search may fail 1175 * (e.g., ?) 1176 */ 1177 switch (rp->header.flag & BT_TYPE) { 1178 case BT_LEAF: 1179 /* 1180 * compute the length of prefix for suffix compression 1181 * between last entry of left page and first entry 1182 * of right page 1183 */ 1184 if ((sp->header.flag & BT_ROOT && skip > 1) || 1185 sp->header.prev != 0 || skip > 1) { 1186 /* compute uppercase router prefix key */ 1187 rc = ciGetLeafPrefixKey(lp, 1188 lp->header.nextindex-1, 1189 rp, 0, &key, 1190 sbi->mntflag); 1191 if (rc) { 1192 DT_PUTPAGE(lmp); 1193 DT_PUTPAGE(rmp); 1194 DT_PUTPAGE(smp); 1195 goto splitOut; 1196 } 1197 } else { 1198 /* next to leftmost entry of 1199 lowest internal level */ 1200 1201 /* compute uppercase router key */ 1202 dtGetKey(rp, 0, &key, sbi->mntflag); 1203 key.name[key.namlen] = 0; 1204 1205 if ((sbi->mntflag & JFS_OS2) == JFS_OS2) 1206 ciToUpper(&key); 1207 } 1208 1209 n = NDTINTERNAL(key.namlen); 1210 break; 1211 1212 case BT_INTERNAL: 1213 dtGetKey(rp, 0, &key, sbi->mntflag); 1214 n = NDTINTERNAL(key.namlen); 1215 break; 1216 1217 default: 1218 jfs_err("dtSplitUp(): UFO!"); 1219 break; 1220 } 1221 1222 /* unpin left child page */ 1223 DT_PUTPAGE(lmp); 1224 1225 /* 1226 * compute the data for the router entry 1227 */ 1228 data->xd = rpxd; /* child page xd */ 1229 1230 /* 1231 * parent page is full - split the parent page 1232 */ 1233 if (n > sp->header.freecnt) { 1234 /* init for parent page split */ 1235 split->mp = smp; 1236 split->index = skip; /* index at insert */ 1237 split->nslot = n; 1238 split->key = &key; 1239 /* split->data = data; */ 1240 1241 /* unpin right child page */ 1242 DT_PUTPAGE(rmp); 1243 1244 /* The split routines insert the new entry, 1245 * acquire txLock as appropriate. 1246 * return <rp> pinned and its block number <rbn>. 1247 */ 1248 rc = (sp->header.flag & BT_ROOT) ? 1249 dtSplitRoot(tid, ip, split, &rmp) : 1250 dtSplitPage(tid, ip, split, &rmp, &rp, &rpxd); 1251 if (rc) { 1252 DT_PUTPAGE(smp); 1253 goto splitOut; 1254 } 1255 1256 /* smp and rmp are pinned */ 1257 } 1258 /* 1259 * parent page is not full - insert router entry in parent page 1260 */ 1261 else { 1262 BT_MARK_DIRTY(smp, ip); 1263 /* 1264 * acquire a transaction lock on the parent page 1265 */ 1266 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1267 dtlck = (struct dt_lock *) & tlck->lock; 1268 ASSERT(dtlck->index == 0); 1269 lv = & dtlck->lv[0]; 1270 1271 /* linelock header */ 1272 lv->offset = 0; 1273 lv->length = 1; 1274 dtlck->index++; 1275 1276 /* linelock stbl of non-root parent page */ 1277 if (!(sp->header.flag & BT_ROOT)) { 1278 lv++; 1279 n = skip >> L2DTSLOTSIZE; 1280 lv->offset = sp->header.stblindex + n; 1281 lv->length = 1282 ((sp->header.nextindex - 1283 1) >> L2DTSLOTSIZE) - n + 1; 1284 dtlck->index++; 1285 } 1286 1287 dtInsertEntry(sp, skip, &key, data, &dtlck); 1288 1289 /* exit propagate up */ 1290 break; 1291 } 1292 } 1293 1294 /* unpin current split and its right page */ 1295 DT_PUTPAGE(smp); 1296 DT_PUTPAGE(rmp); 1297 1298 /* 1299 * free remaining extents allocated for split 1300 */ 1301 splitOut: 1302 n = pxdlist.npxd; 1303 pxd = &pxdlist.pxd[n]; 1304 for (; n < pxdlist.maxnpxd; n++, pxd++) 1305 dbFree(ip, addressPXD(pxd), (s64) lengthPXD(pxd)); 1306 1307 freeKeyName: 1308 kfree(key.name); 1309 1310 /* Rollback quota allocation */ 1311 if (rc && quota_allocation) 1312 dquot_free_block(ip, quota_allocation); 1313 1314 dtSplitUp_Exit: 1315 1316 return rc; 1317 } 1318 1319 1320 /* 1321 * dtSplitPage() 1322 * 1323 * function: Split a non-root page of a btree. 1324 * 1325 * parameter: 1326 * 1327 * return: 0 - success; 1328 * errno - failure; 1329 * return split and new page pinned; 1330 */ 1331 static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, 1332 struct metapage ** rmpp, dtpage_t ** rpp, pxd_t * rpxdp) 1333 { 1334 int rc = 0; 1335 struct metapage *smp; 1336 dtpage_t *sp; 1337 struct metapage *rmp; 1338 dtpage_t *rp; /* new right page allocated */ 1339 s64 rbn; /* new right page block number */ 1340 struct metapage *mp; 1341 dtpage_t *p; 1342 s64 nextbn; 1343 struct pxdlist *pxdlist; 1344 pxd_t *pxd; 1345 int skip, nextindex, half, left, nxt, off, si; 1346 struct ldtentry *ldtentry; 1347 struct idtentry *idtentry; 1348 u8 *stbl; 1349 struct dtslot *f; 1350 int fsi, stblsize; 1351 int n; 1352 struct dt_lock *sdtlck, *rdtlck; 1353 struct tlock *tlck; 1354 struct dt_lock *dtlck; 1355 struct lv *slv, *rlv, *lv; 1356 1357 /* get split page */ 1358 smp = split->mp; 1359 sp = DT_PAGE(ip, smp); 1360 1361 /* 1362 * allocate the new right page for the split 1363 */ 1364 pxdlist = split->pxdlist; 1365 pxd = &pxdlist->pxd[pxdlist->npxd]; 1366 pxdlist->npxd++; 1367 rbn = addressPXD(pxd); 1368 rmp = get_metapage(ip, rbn, PSIZE, 1); 1369 if (rmp == NULL) 1370 return -EIO; 1371 1372 /* Allocate blocks to quota. */ 1373 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1374 if (rc) { 1375 release_metapage(rmp); 1376 return rc; 1377 } 1378 1379 jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); 1380 1381 BT_MARK_DIRTY(rmp, ip); 1382 /* 1383 * acquire a transaction lock on the new right page 1384 */ 1385 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1386 rdtlck = (struct dt_lock *) & tlck->lock; 1387 1388 rp = (dtpage_t *) rmp->data; 1389 *rpp = rp; 1390 rp->header.self = *pxd; 1391 1392 BT_MARK_DIRTY(smp, ip); 1393 /* 1394 * acquire a transaction lock on the split page 1395 * 1396 * action: 1397 */ 1398 tlck = txLock(tid, ip, smp, tlckDTREE | tlckENTRY); 1399 sdtlck = (struct dt_lock *) & tlck->lock; 1400 1401 /* linelock header of split page */ 1402 ASSERT(sdtlck->index == 0); 1403 slv = & sdtlck->lv[0]; 1404 slv->offset = 0; 1405 slv->length = 1; 1406 sdtlck->index++; 1407 1408 /* 1409 * initialize/update sibling pointers between sp and rp 1410 */ 1411 nextbn = le64_to_cpu(sp->header.next); 1412 rp->header.next = cpu_to_le64(nextbn); 1413 rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self)); 1414 sp->header.next = cpu_to_le64(rbn); 1415 1416 /* 1417 * initialize new right page 1418 */ 1419 rp->header.flag = sp->header.flag; 1420 1421 /* compute sorted entry table at start of extent data area */ 1422 rp->header.nextindex = 0; 1423 rp->header.stblindex = 1; 1424 1425 n = PSIZE >> L2DTSLOTSIZE; 1426 rp->header.maxslot = n; 1427 stblsize = (n + 31) >> L2DTSLOTSIZE; /* in unit of slot */ 1428 1429 /* init freelist */ 1430 fsi = rp->header.stblindex + stblsize; 1431 rp->header.freelist = fsi; 1432 rp->header.freecnt = rp->header.maxslot - fsi; 1433 1434 /* 1435 * sequential append at tail: append without split 1436 * 1437 * If splitting the last page on a level because of appending 1438 * a entry to it (skip is maxentry), it's likely that the access is 1439 * sequential. Adding an empty page on the side of the level is less 1440 * work and can push the fill factor much higher than normal. 1441 * If we're wrong it's no big deal, we'll just do the split the right 1442 * way next time. 1443 * (It may look like it's equally easy to do a similar hack for 1444 * reverse sorted data, that is, split the tree left, 1445 * but it's not. Be my guest.) 1446 */ 1447 if (nextbn == 0 && split->index == sp->header.nextindex) { 1448 /* linelock header + stbl (first slot) of new page */ 1449 rlv = & rdtlck->lv[rdtlck->index]; 1450 rlv->offset = 0; 1451 rlv->length = 2; 1452 rdtlck->index++; 1453 1454 /* 1455 * initialize freelist of new right page 1456 */ 1457 f = &rp->slot[fsi]; 1458 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1459 f->next = fsi; 1460 f->next = -1; 1461 1462 /* insert entry at the first entry of the new right page */ 1463 dtInsertEntry(rp, 0, split->key, split->data, &rdtlck); 1464 1465 goto out; 1466 } 1467 1468 /* 1469 * non-sequential insert (at possibly middle page) 1470 */ 1471 1472 /* 1473 * update prev pointer of previous right sibling page; 1474 */ 1475 if (nextbn != 0) { 1476 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 1477 if (rc) { 1478 discard_metapage(rmp); 1479 return rc; 1480 } 1481 1482 BT_MARK_DIRTY(mp, ip); 1483 /* 1484 * acquire a transaction lock on the next page 1485 */ 1486 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 1487 jfs_info("dtSplitPage: tlck = 0x%p, ip = 0x%p, mp=0x%p", 1488 tlck, ip, mp); 1489 dtlck = (struct dt_lock *) & tlck->lock; 1490 1491 /* linelock header of previous right sibling page */ 1492 lv = & dtlck->lv[dtlck->index]; 1493 lv->offset = 0; 1494 lv->length = 1; 1495 dtlck->index++; 1496 1497 p->header.prev = cpu_to_le64(rbn); 1498 1499 DT_PUTPAGE(mp); 1500 } 1501 1502 /* 1503 * split the data between the split and right pages. 1504 */ 1505 skip = split->index; 1506 half = (PSIZE >> L2DTSLOTSIZE) >> 1; /* swag */ 1507 left = 0; 1508 1509 /* 1510 * compute fill factor for split pages 1511 * 1512 * <nxt> traces the next entry to move to rp 1513 * <off> traces the next entry to stay in sp 1514 */ 1515 stbl = (u8 *) & sp->slot[sp->header.stblindex]; 1516 nextindex = sp->header.nextindex; 1517 for (nxt = off = 0; nxt < nextindex; ++off) { 1518 if (off == skip) 1519 /* check for fill factor with new entry size */ 1520 n = split->nslot; 1521 else { 1522 si = stbl[nxt]; 1523 switch (sp->header.flag & BT_TYPE) { 1524 case BT_LEAF: 1525 ldtentry = (struct ldtentry *) & sp->slot[si]; 1526 if (DO_INDEX(ip)) 1527 n = NDTLEAF(ldtentry->namlen); 1528 else 1529 n = NDTLEAF_LEGACY(ldtentry-> 1530 namlen); 1531 break; 1532 1533 case BT_INTERNAL: 1534 idtentry = (struct idtentry *) & sp->slot[si]; 1535 n = NDTINTERNAL(idtentry->namlen); 1536 break; 1537 1538 default: 1539 break; 1540 } 1541 1542 ++nxt; /* advance to next entry to move in sp */ 1543 } 1544 1545 left += n; 1546 if (left >= half) 1547 break; 1548 } 1549 1550 /* <nxt> poins to the 1st entry to move */ 1551 1552 /* 1553 * move entries to right page 1554 * 1555 * dtMoveEntry() initializes rp and reserves entry for insertion 1556 * 1557 * split page moved out entries are linelocked; 1558 * new/right page moved in entries are linelocked; 1559 */ 1560 /* linelock header + stbl of new right page */ 1561 rlv = & rdtlck->lv[rdtlck->index]; 1562 rlv->offset = 0; 1563 rlv->length = 5; 1564 rdtlck->index++; 1565 1566 dtMoveEntry(sp, nxt, rp, &sdtlck, &rdtlck, DO_INDEX(ip)); 1567 1568 sp->header.nextindex = nxt; 1569 1570 /* 1571 * finalize freelist of new right page 1572 */ 1573 fsi = rp->header.freelist; 1574 f = &rp->slot[fsi]; 1575 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1576 f->next = fsi; 1577 f->next = -1; 1578 1579 /* 1580 * Update directory index table for entries now in right page 1581 */ 1582 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1583 s64 lblock; 1584 1585 mp = NULL; 1586 stbl = DT_GETSTBL(rp); 1587 for (n = 0; n < rp->header.nextindex; n++) { 1588 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 1589 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 1590 rbn, n, &mp, &lblock); 1591 } 1592 if (mp) 1593 release_metapage(mp); 1594 } 1595 1596 /* 1597 * the skipped index was on the left page, 1598 */ 1599 if (skip <= off) { 1600 /* insert the new entry in the split page */ 1601 dtInsertEntry(sp, skip, split->key, split->data, &sdtlck); 1602 1603 /* linelock stbl of split page */ 1604 if (sdtlck->index >= sdtlck->maxcnt) 1605 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 1606 slv = & sdtlck->lv[sdtlck->index]; 1607 n = skip >> L2DTSLOTSIZE; 1608 slv->offset = sp->header.stblindex + n; 1609 slv->length = 1610 ((sp->header.nextindex - 1) >> L2DTSLOTSIZE) - n + 1; 1611 sdtlck->index++; 1612 } 1613 /* 1614 * the skipped index was on the right page, 1615 */ 1616 else { 1617 /* adjust the skip index to reflect the new position */ 1618 skip -= nxt; 1619 1620 /* insert the new entry in the right page */ 1621 dtInsertEntry(rp, skip, split->key, split->data, &rdtlck); 1622 } 1623 1624 out: 1625 *rmpp = rmp; 1626 *rpxdp = *pxd; 1627 1628 return rc; 1629 } 1630 1631 1632 /* 1633 * dtExtendPage() 1634 * 1635 * function: extend 1st/only directory leaf page 1636 * 1637 * parameter: 1638 * 1639 * return: 0 - success; 1640 * errno - failure; 1641 * return extended page pinned; 1642 */ 1643 static int dtExtendPage(tid_t tid, 1644 struct inode *ip, struct dtsplit * split, struct btstack * btstack) 1645 { 1646 struct super_block *sb = ip->i_sb; 1647 int rc; 1648 struct metapage *smp, *pmp, *mp; 1649 dtpage_t *sp, *pp; 1650 struct pxdlist *pxdlist; 1651 pxd_t *pxd, *tpxd; 1652 int xlen, xsize; 1653 int newstblindex, newstblsize; 1654 int oldstblindex, oldstblsize; 1655 int fsi, last; 1656 struct dtslot *f; 1657 struct btframe *parent; 1658 int n; 1659 struct dt_lock *dtlck; 1660 s64 xaddr, txaddr; 1661 struct tlock *tlck; 1662 struct pxd_lock *pxdlock; 1663 struct lv *lv; 1664 uint type; 1665 struct ldtentry *ldtentry; 1666 u8 *stbl; 1667 1668 /* get page to extend */ 1669 smp = split->mp; 1670 sp = DT_PAGE(ip, smp); 1671 1672 /* get parent/root page */ 1673 parent = BT_POP(btstack); 1674 DT_GETPAGE(ip, parent->bn, pmp, PSIZE, pp, rc); 1675 if (rc) 1676 return (rc); 1677 1678 /* 1679 * extend the extent 1680 */ 1681 pxdlist = split->pxdlist; 1682 pxd = &pxdlist->pxd[pxdlist->npxd]; 1683 pxdlist->npxd++; 1684 1685 xaddr = addressPXD(pxd); 1686 tpxd = &sp->header.self; 1687 txaddr = addressPXD(tpxd); 1688 /* in-place extension */ 1689 if (xaddr == txaddr) { 1690 type = tlckEXTEND; 1691 } 1692 /* relocation */ 1693 else { 1694 type = tlckNEW; 1695 1696 /* save moved extent descriptor for later free */ 1697 tlck = txMaplock(tid, ip, tlckDTREE | tlckRELOCATE); 1698 pxdlock = (struct pxd_lock *) & tlck->lock; 1699 pxdlock->flag = mlckFREEPXD; 1700 pxdlock->pxd = sp->header.self; 1701 pxdlock->index = 1; 1702 1703 /* 1704 * Update directory index table to reflect new page address 1705 */ 1706 if (DO_INDEX(ip)) { 1707 s64 lblock; 1708 1709 mp = NULL; 1710 stbl = DT_GETSTBL(sp); 1711 for (n = 0; n < sp->header.nextindex; n++) { 1712 ldtentry = 1713 (struct ldtentry *) & sp->slot[stbl[n]]; 1714 modify_index(tid, ip, 1715 le32_to_cpu(ldtentry->index), 1716 xaddr, n, &mp, &lblock); 1717 } 1718 if (mp) 1719 release_metapage(mp); 1720 } 1721 } 1722 1723 /* 1724 * extend the page 1725 */ 1726 sp->header.self = *pxd; 1727 1728 jfs_info("dtExtendPage: ip:0x%p smp:0x%p sp:0x%p", ip, smp, sp); 1729 1730 BT_MARK_DIRTY(smp, ip); 1731 /* 1732 * acquire a transaction lock on the extended/leaf page 1733 */ 1734 tlck = txLock(tid, ip, smp, tlckDTREE | type); 1735 dtlck = (struct dt_lock *) & tlck->lock; 1736 lv = & dtlck->lv[0]; 1737 1738 /* update buffer extent descriptor of extended page */ 1739 xlen = lengthPXD(pxd); 1740 xsize = xlen << JFS_SBI(sb)->l2bsize; 1741 1742 /* 1743 * copy old stbl to new stbl at start of extended area 1744 */ 1745 oldstblindex = sp->header.stblindex; 1746 oldstblsize = (sp->header.maxslot + 31) >> L2DTSLOTSIZE; 1747 newstblindex = sp->header.maxslot; 1748 n = xsize >> L2DTSLOTSIZE; 1749 newstblsize = (n + 31) >> L2DTSLOTSIZE; 1750 memcpy(&sp->slot[newstblindex], &sp->slot[oldstblindex], 1751 sp->header.nextindex); 1752 1753 /* 1754 * in-line extension: linelock old area of extended page 1755 */ 1756 if (type == tlckEXTEND) { 1757 /* linelock header */ 1758 lv->offset = 0; 1759 lv->length = 1; 1760 dtlck->index++; 1761 lv++; 1762 1763 /* linelock new stbl of extended page */ 1764 lv->offset = newstblindex; 1765 lv->length = newstblsize; 1766 } 1767 /* 1768 * relocation: linelock whole relocated area 1769 */ 1770 else { 1771 lv->offset = 0; 1772 lv->length = sp->header.maxslot + newstblsize; 1773 } 1774 1775 dtlck->index++; 1776 1777 sp->header.maxslot = n; 1778 sp->header.stblindex = newstblindex; 1779 /* sp->header.nextindex remains the same */ 1780 1781 /* 1782 * add old stbl region at head of freelist 1783 */ 1784 fsi = oldstblindex; 1785 f = &sp->slot[fsi]; 1786 last = sp->header.freelist; 1787 for (n = 0; n < oldstblsize; n++, fsi++, f++) { 1788 f->next = last; 1789 last = fsi; 1790 } 1791 sp->header.freelist = last; 1792 sp->header.freecnt += oldstblsize; 1793 1794 /* 1795 * append free region of newly extended area at tail of freelist 1796 */ 1797 /* init free region of newly extended area */ 1798 fsi = n = newstblindex + newstblsize; 1799 f = &sp->slot[fsi]; 1800 for (fsi++; fsi < sp->header.maxslot; f++, fsi++) 1801 f->next = fsi; 1802 f->next = -1; 1803 1804 /* append new free region at tail of old freelist */ 1805 fsi = sp->header.freelist; 1806 if (fsi == -1) 1807 sp->header.freelist = n; 1808 else { 1809 do { 1810 f = &sp->slot[fsi]; 1811 fsi = f->next; 1812 } while (fsi != -1); 1813 1814 f->next = n; 1815 } 1816 1817 sp->header.freecnt += sp->header.maxslot - n; 1818 1819 /* 1820 * insert the new entry 1821 */ 1822 dtInsertEntry(sp, split->index, split->key, split->data, &dtlck); 1823 1824 BT_MARK_DIRTY(pmp, ip); 1825 /* 1826 * linelock any freeslots residing in old extent 1827 */ 1828 if (type == tlckEXTEND) { 1829 n = sp->header.maxslot >> 2; 1830 if (sp->header.freelist < n) 1831 dtLinelockFreelist(sp, n, &dtlck); 1832 } 1833 1834 /* 1835 * update parent entry on the parent/root page 1836 */ 1837 /* 1838 * acquire a transaction lock on the parent/root page 1839 */ 1840 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 1841 dtlck = (struct dt_lock *) & tlck->lock; 1842 lv = & dtlck->lv[dtlck->index]; 1843 1844 /* linelock parent entry - 1st slot */ 1845 lv->offset = 1; 1846 lv->length = 1; 1847 dtlck->index++; 1848 1849 /* update the parent pxd for page extension */ 1850 tpxd = (pxd_t *) & pp->slot[1]; 1851 *tpxd = *pxd; 1852 1853 DT_PUTPAGE(pmp); 1854 return 0; 1855 } 1856 1857 1858 /* 1859 * dtSplitRoot() 1860 * 1861 * function: 1862 * split the full root page into 1863 * original/root/split page and new right page 1864 * i.e., root remains fixed in tree anchor (inode) and 1865 * the root is copied to a single new right child page 1866 * since root page << non-root page, and 1867 * the split root page contains a single entry for the 1868 * new right child page. 1869 * 1870 * parameter: 1871 * 1872 * return: 0 - success; 1873 * errno - failure; 1874 * return new page pinned; 1875 */ 1876 static int dtSplitRoot(tid_t tid, 1877 struct inode *ip, struct dtsplit * split, struct metapage ** rmpp) 1878 { 1879 struct super_block *sb = ip->i_sb; 1880 struct metapage *smp; 1881 dtroot_t *sp; 1882 struct metapage *rmp; 1883 dtpage_t *rp; 1884 s64 rbn; 1885 int xlen; 1886 int xsize; 1887 struct dtslot *f; 1888 s8 *stbl; 1889 int fsi, stblsize, n; 1890 struct idtentry *s; 1891 pxd_t *ppxd; 1892 struct pxdlist *pxdlist; 1893 pxd_t *pxd; 1894 struct dt_lock *dtlck; 1895 struct tlock *tlck; 1896 struct lv *lv; 1897 int rc; 1898 1899 /* get split root page */ 1900 smp = split->mp; 1901 sp = &JFS_IP(ip)->i_dtroot; 1902 1903 /* 1904 * allocate/initialize a single (right) child page 1905 * 1906 * N.B. at first split, a one (or two) block to fit new entry 1907 * is allocated; at subsequent split, a full page is allocated; 1908 */ 1909 pxdlist = split->pxdlist; 1910 pxd = &pxdlist->pxd[pxdlist->npxd]; 1911 pxdlist->npxd++; 1912 rbn = addressPXD(pxd); 1913 xlen = lengthPXD(pxd); 1914 xsize = xlen << JFS_SBI(sb)->l2bsize; 1915 rmp = get_metapage(ip, rbn, xsize, 1); 1916 if (!rmp) 1917 return -EIO; 1918 1919 rp = rmp->data; 1920 1921 /* Allocate blocks to quota. */ 1922 rc = dquot_alloc_block(ip, lengthPXD(pxd)); 1923 if (rc) { 1924 release_metapage(rmp); 1925 return rc; 1926 } 1927 1928 BT_MARK_DIRTY(rmp, ip); 1929 /* 1930 * acquire a transaction lock on the new right page 1931 */ 1932 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckNEW); 1933 dtlck = (struct dt_lock *) & tlck->lock; 1934 1935 rp->header.flag = 1936 (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL; 1937 rp->header.self = *pxd; 1938 1939 /* initialize sibling pointers */ 1940 rp->header.next = 0; 1941 rp->header.prev = 0; 1942 1943 /* 1944 * move in-line root page into new right page extent 1945 */ 1946 /* linelock header + copied entries + new stbl (1st slot) in new page */ 1947 ASSERT(dtlck->index == 0); 1948 lv = & dtlck->lv[0]; 1949 lv->offset = 0; 1950 lv->length = 10; /* 1 + 8 + 1 */ 1951 dtlck->index++; 1952 1953 n = xsize >> L2DTSLOTSIZE; 1954 rp->header.maxslot = n; 1955 stblsize = (n + 31) >> L2DTSLOTSIZE; 1956 1957 /* copy old stbl to new stbl at start of extended area */ 1958 rp->header.stblindex = DTROOTMAXSLOT; 1959 stbl = (s8 *) & rp->slot[DTROOTMAXSLOT]; 1960 memcpy(stbl, sp->header.stbl, sp->header.nextindex); 1961 rp->header.nextindex = sp->header.nextindex; 1962 1963 /* copy old data area to start of new data area */ 1964 memcpy(&rp->slot[1], &sp->slot[1], IDATASIZE); 1965 1966 /* 1967 * append free region of newly extended area at tail of freelist 1968 */ 1969 /* init free region of newly extended area */ 1970 fsi = n = DTROOTMAXSLOT + stblsize; 1971 f = &rp->slot[fsi]; 1972 for (fsi++; fsi < rp->header.maxslot; f++, fsi++) 1973 f->next = fsi; 1974 f->next = -1; 1975 1976 /* append new free region at tail of old freelist */ 1977 fsi = sp->header.freelist; 1978 if (fsi == -1) 1979 rp->header.freelist = n; 1980 else { 1981 rp->header.freelist = fsi; 1982 1983 do { 1984 f = &rp->slot[fsi]; 1985 fsi = f->next; 1986 } while (fsi != -1); 1987 1988 f->next = n; 1989 } 1990 1991 rp->header.freecnt = sp->header.freecnt + rp->header.maxslot - n; 1992 1993 /* 1994 * Update directory index table for entries now in right page 1995 */ 1996 if ((rp->header.flag & BT_LEAF) && DO_INDEX(ip)) { 1997 s64 lblock; 1998 struct metapage *mp = NULL; 1999 struct ldtentry *ldtentry; 2000 2001 stbl = DT_GETSTBL(rp); 2002 for (n = 0; n < rp->header.nextindex; n++) { 2003 ldtentry = (struct ldtentry *) & rp->slot[stbl[n]]; 2004 modify_index(tid, ip, le32_to_cpu(ldtentry->index), 2005 rbn, n, &mp, &lblock); 2006 } 2007 if (mp) 2008 release_metapage(mp); 2009 } 2010 /* 2011 * insert the new entry into the new right/child page 2012 * (skip index in the new right page will not change) 2013 */ 2014 dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); 2015 2016 /* 2017 * reset parent/root page 2018 * 2019 * set the 1st entry offset to 0, which force the left-most key 2020 * at any level of the tree to be less than any search key. 2021 * 2022 * The btree comparison code guarantees that the left-most key on any 2023 * level of the tree is never used, so it doesn't need to be filled in. 2024 */ 2025 BT_MARK_DIRTY(smp, ip); 2026 /* 2027 * acquire a transaction lock on the root page (in-memory inode) 2028 */ 2029 tlck = txLock(tid, ip, smp, tlckDTREE | tlckNEW | tlckBTROOT); 2030 dtlck = (struct dt_lock *) & tlck->lock; 2031 2032 /* linelock root */ 2033 ASSERT(dtlck->index == 0); 2034 lv = & dtlck->lv[0]; 2035 lv->offset = 0; 2036 lv->length = DTROOTMAXSLOT; 2037 dtlck->index++; 2038 2039 /* update page header of root */ 2040 if (sp->header.flag & BT_LEAF) { 2041 sp->header.flag &= ~BT_LEAF; 2042 sp->header.flag |= BT_INTERNAL; 2043 } 2044 2045 /* init the first entry */ 2046 s = (struct idtentry *) & sp->slot[DTENTRYSTART]; 2047 ppxd = (pxd_t *) s; 2048 *ppxd = *pxd; 2049 s->next = -1; 2050 s->namlen = 0; 2051 2052 stbl = sp->header.stbl; 2053 stbl[0] = DTENTRYSTART; 2054 sp->header.nextindex = 1; 2055 2056 /* init freelist */ 2057 fsi = DTENTRYSTART + 1; 2058 f = &sp->slot[fsi]; 2059 2060 /* init free region of remaining area */ 2061 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2062 f->next = fsi; 2063 f->next = -1; 2064 2065 sp->header.freelist = DTENTRYSTART + 1; 2066 sp->header.freecnt = DTROOTMAXSLOT - (DTENTRYSTART + 1); 2067 2068 *rmpp = rmp; 2069 2070 return 0; 2071 } 2072 2073 2074 /* 2075 * dtDelete() 2076 * 2077 * function: delete the entry(s) referenced by a key. 2078 * 2079 * parameter: 2080 * 2081 * return: 2082 */ 2083 int dtDelete(tid_t tid, 2084 struct inode *ip, struct component_name * key, ino_t * ino, int flag) 2085 { 2086 int rc = 0; 2087 s64 bn; 2088 struct metapage *mp, *imp; 2089 dtpage_t *p; 2090 int index; 2091 struct btstack btstack; 2092 struct dt_lock *dtlck; 2093 struct tlock *tlck; 2094 struct lv *lv; 2095 int i; 2096 struct ldtentry *ldtentry; 2097 u8 *stbl; 2098 u32 table_index, next_index; 2099 struct metapage *nmp; 2100 dtpage_t *np; 2101 2102 /* 2103 * search for the entry to delete: 2104 * 2105 * dtSearch() returns (leaf page pinned, index at which to delete). 2106 */ 2107 if ((rc = dtSearch(ip, key, ino, &btstack, flag))) 2108 return rc; 2109 2110 /* retrieve search result */ 2111 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 2112 2113 /* 2114 * We need to find put the index of the next entry into the 2115 * directory index table in order to resume a readdir from this 2116 * entry. 2117 */ 2118 if (DO_INDEX(ip)) { 2119 stbl = DT_GETSTBL(p); 2120 ldtentry = (struct ldtentry *) & p->slot[stbl[index]]; 2121 table_index = le32_to_cpu(ldtentry->index); 2122 if (index == (p->header.nextindex - 1)) { 2123 /* 2124 * Last entry in this leaf page 2125 */ 2126 if ((p->header.flag & BT_ROOT) 2127 || (p->header.next == 0)) 2128 next_index = -1; 2129 else { 2130 /* Read next leaf page */ 2131 DT_GETPAGE(ip, le64_to_cpu(p->header.next), 2132 nmp, PSIZE, np, rc); 2133 if (rc) 2134 next_index = -1; 2135 else { 2136 stbl = DT_GETSTBL(np); 2137 ldtentry = 2138 (struct ldtentry *) & np-> 2139 slot[stbl[0]]; 2140 next_index = 2141 le32_to_cpu(ldtentry->index); 2142 DT_PUTPAGE(nmp); 2143 } 2144 } 2145 } else { 2146 ldtentry = 2147 (struct ldtentry *) & p->slot[stbl[index + 1]]; 2148 next_index = le32_to_cpu(ldtentry->index); 2149 } 2150 free_index(tid, ip, table_index, next_index); 2151 } 2152 /* 2153 * the leaf page becomes empty, delete the page 2154 */ 2155 if (p->header.nextindex == 1) { 2156 /* delete empty page */ 2157 rc = dtDeleteUp(tid, ip, mp, p, &btstack); 2158 } 2159 /* 2160 * the leaf page has other entries remaining: 2161 * 2162 * delete the entry from the leaf page. 2163 */ 2164 else { 2165 BT_MARK_DIRTY(mp, ip); 2166 /* 2167 * acquire a transaction lock on the leaf page 2168 */ 2169 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2170 dtlck = (struct dt_lock *) & tlck->lock; 2171 2172 /* 2173 * Do not assume that dtlck->index will be zero. During a 2174 * rename within a directory, this transaction may have 2175 * modified this page already when adding the new entry. 2176 */ 2177 2178 /* linelock header */ 2179 if (dtlck->index >= dtlck->maxcnt) 2180 dtlck = (struct dt_lock *) txLinelock(dtlck); 2181 lv = & dtlck->lv[dtlck->index]; 2182 lv->offset = 0; 2183 lv->length = 1; 2184 dtlck->index++; 2185 2186 /* linelock stbl of non-root leaf page */ 2187 if (!(p->header.flag & BT_ROOT)) { 2188 if (dtlck->index >= dtlck->maxcnt) 2189 dtlck = (struct dt_lock *) txLinelock(dtlck); 2190 lv = & dtlck->lv[dtlck->index]; 2191 i = index >> L2DTSLOTSIZE; 2192 lv->offset = p->header.stblindex + i; 2193 lv->length = 2194 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2195 i + 1; 2196 dtlck->index++; 2197 } 2198 2199 /* free the leaf entry */ 2200 dtDeleteEntry(p, index, &dtlck); 2201 2202 /* 2203 * Update directory index table for entries moved in stbl 2204 */ 2205 if (DO_INDEX(ip) && index < p->header.nextindex) { 2206 s64 lblock; 2207 2208 imp = NULL; 2209 stbl = DT_GETSTBL(p); 2210 for (i = index; i < p->header.nextindex; i++) { 2211 ldtentry = 2212 (struct ldtentry *) & p->slot[stbl[i]]; 2213 modify_index(tid, ip, 2214 le32_to_cpu(ldtentry->index), 2215 bn, i, &imp, &lblock); 2216 } 2217 if (imp) 2218 release_metapage(imp); 2219 } 2220 2221 DT_PUTPAGE(mp); 2222 } 2223 2224 return rc; 2225 } 2226 2227 2228 /* 2229 * dtDeleteUp() 2230 * 2231 * function: 2232 * free empty pages as propagating deletion up the tree 2233 * 2234 * parameter: 2235 * 2236 * return: 2237 */ 2238 static int dtDeleteUp(tid_t tid, struct inode *ip, 2239 struct metapage * fmp, dtpage_t * fp, struct btstack * btstack) 2240 { 2241 int rc = 0; 2242 struct metapage *mp; 2243 dtpage_t *p; 2244 int index, nextindex; 2245 int xlen; 2246 struct btframe *parent; 2247 struct dt_lock *dtlck; 2248 struct tlock *tlck; 2249 struct lv *lv; 2250 struct pxd_lock *pxdlock; 2251 int i; 2252 2253 /* 2254 * keep the root leaf page which has become empty 2255 */ 2256 if (BT_IS_ROOT(fmp)) { 2257 /* 2258 * reset the root 2259 * 2260 * dtInitRoot() acquires txlock on the root 2261 */ 2262 dtInitRoot(tid, ip, PARENT(ip)); 2263 2264 DT_PUTPAGE(fmp); 2265 2266 return 0; 2267 } 2268 2269 /* 2270 * free the non-root leaf page 2271 */ 2272 /* 2273 * acquire a transaction lock on the page 2274 * 2275 * write FREEXTENT|NOREDOPAGE log record 2276 * N.B. linelock is overlaid as freed extent descriptor, and 2277 * the buffer page is freed; 2278 */ 2279 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2280 pxdlock = (struct pxd_lock *) & tlck->lock; 2281 pxdlock->flag = mlckFREEPXD; 2282 pxdlock->pxd = fp->header.self; 2283 pxdlock->index = 1; 2284 2285 /* update sibling pointers */ 2286 if ((rc = dtRelink(tid, ip, fp))) { 2287 BT_PUTPAGE(fmp); 2288 return rc; 2289 } 2290 2291 xlen = lengthPXD(&fp->header.self); 2292 2293 /* Free quota allocation. */ 2294 dquot_free_block(ip, xlen); 2295 2296 /* free/invalidate its buffer page */ 2297 discard_metapage(fmp); 2298 2299 /* 2300 * propagate page deletion up the directory tree 2301 * 2302 * If the delete from the parent page makes it empty, 2303 * continue all the way up the tree. 2304 * stop if the root page is reached (which is never deleted) or 2305 * if the entry deletion does not empty the page. 2306 */ 2307 while ((parent = BT_POP(btstack)) != NULL) { 2308 /* pin the parent page <sp> */ 2309 DT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc); 2310 if (rc) 2311 return rc; 2312 2313 /* 2314 * free the extent of the child page deleted 2315 */ 2316 index = parent->index; 2317 2318 /* 2319 * delete the entry for the child page from parent 2320 */ 2321 nextindex = p->header.nextindex; 2322 2323 /* 2324 * the parent has the single entry being deleted: 2325 * 2326 * free the parent page which has become empty. 2327 */ 2328 if (nextindex == 1) { 2329 /* 2330 * keep the root internal page which has become empty 2331 */ 2332 if (p->header.flag & BT_ROOT) { 2333 /* 2334 * reset the root 2335 * 2336 * dtInitRoot() acquires txlock on the root 2337 */ 2338 dtInitRoot(tid, ip, PARENT(ip)); 2339 2340 DT_PUTPAGE(mp); 2341 2342 return 0; 2343 } 2344 /* 2345 * free the parent page 2346 */ 2347 else { 2348 /* 2349 * acquire a transaction lock on the page 2350 * 2351 * write FREEXTENT|NOREDOPAGE log record 2352 */ 2353 tlck = 2354 txMaplock(tid, ip, 2355 tlckDTREE | tlckFREE); 2356 pxdlock = (struct pxd_lock *) & tlck->lock; 2357 pxdlock->flag = mlckFREEPXD; 2358 pxdlock->pxd = p->header.self; 2359 pxdlock->index = 1; 2360 2361 /* update sibling pointers */ 2362 if ((rc = dtRelink(tid, ip, p))) { 2363 DT_PUTPAGE(mp); 2364 return rc; 2365 } 2366 2367 xlen = lengthPXD(&p->header.self); 2368 2369 /* Free quota allocation */ 2370 dquot_free_block(ip, xlen); 2371 2372 /* free/invalidate its buffer page */ 2373 discard_metapage(mp); 2374 2375 /* propagate up */ 2376 continue; 2377 } 2378 } 2379 2380 /* 2381 * the parent has other entries remaining: 2382 * 2383 * delete the router entry from the parent page. 2384 */ 2385 BT_MARK_DIRTY(mp, ip); 2386 /* 2387 * acquire a transaction lock on the page 2388 * 2389 * action: router entry deletion 2390 */ 2391 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 2392 dtlck = (struct dt_lock *) & tlck->lock; 2393 2394 /* linelock header */ 2395 if (dtlck->index >= dtlck->maxcnt) 2396 dtlck = (struct dt_lock *) txLinelock(dtlck); 2397 lv = & dtlck->lv[dtlck->index]; 2398 lv->offset = 0; 2399 lv->length = 1; 2400 dtlck->index++; 2401 2402 /* linelock stbl of non-root leaf page */ 2403 if (!(p->header.flag & BT_ROOT)) { 2404 if (dtlck->index < dtlck->maxcnt) 2405 lv++; 2406 else { 2407 dtlck = (struct dt_lock *) txLinelock(dtlck); 2408 lv = & dtlck->lv[0]; 2409 } 2410 i = index >> L2DTSLOTSIZE; 2411 lv->offset = p->header.stblindex + i; 2412 lv->length = 2413 ((p->header.nextindex - 1) >> L2DTSLOTSIZE) - 2414 i + 1; 2415 dtlck->index++; 2416 } 2417 2418 /* free the router entry */ 2419 dtDeleteEntry(p, index, &dtlck); 2420 2421 /* reset key of new leftmost entry of level (for consistency) */ 2422 if (index == 0 && 2423 ((p->header.flag & BT_ROOT) || p->header.prev == 0)) 2424 dtTruncateEntry(p, 0, &dtlck); 2425 2426 /* unpin the parent page */ 2427 DT_PUTPAGE(mp); 2428 2429 /* exit propagation up */ 2430 break; 2431 } 2432 2433 if (!DO_INDEX(ip)) 2434 ip->i_size -= PSIZE; 2435 2436 return 0; 2437 } 2438 2439 #ifdef _NOTYET 2440 /* 2441 * NAME: dtRelocate() 2442 * 2443 * FUNCTION: relocate dtpage (internal or leaf) of directory; 2444 * This function is mainly used by defragfs utility. 2445 */ 2446 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, 2447 s64 nxaddr) 2448 { 2449 int rc = 0; 2450 struct metapage *mp, *pmp, *lmp, *rmp; 2451 dtpage_t *p, *pp, *rp = 0, *lp= 0; 2452 s64 bn; 2453 int index; 2454 struct btstack btstack; 2455 pxd_t *pxd; 2456 s64 oxaddr, nextbn, prevbn; 2457 int xlen, xsize; 2458 struct tlock *tlck; 2459 struct dt_lock *dtlck; 2460 struct pxd_lock *pxdlock; 2461 s8 *stbl; 2462 struct lv *lv; 2463 2464 oxaddr = addressPXD(opxd); 2465 xlen = lengthPXD(opxd); 2466 2467 jfs_info("dtRelocate: lmxaddr:%Ld xaddr:%Ld:%Ld xlen:%d", 2468 (long long)lmxaddr, (long long)oxaddr, (long long)nxaddr, 2469 xlen); 2470 2471 /* 2472 * 1. get the internal parent dtpage covering 2473 * router entry for the tartget page to be relocated; 2474 */ 2475 rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); 2476 if (rc) 2477 return rc; 2478 2479 /* retrieve search result */ 2480 DT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); 2481 jfs_info("dtRelocate: parent router entry validated."); 2482 2483 /* 2484 * 2. relocate the target dtpage 2485 */ 2486 /* read in the target page from src extent */ 2487 DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); 2488 if (rc) { 2489 /* release the pinned parent page */ 2490 DT_PUTPAGE(pmp); 2491 return rc; 2492 } 2493 2494 /* 2495 * read in sibling pages if any to update sibling pointers; 2496 */ 2497 rmp = NULL; 2498 if (p->header.next) { 2499 nextbn = le64_to_cpu(p->header.next); 2500 DT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc); 2501 if (rc) { 2502 DT_PUTPAGE(mp); 2503 DT_PUTPAGE(pmp); 2504 return (rc); 2505 } 2506 } 2507 2508 lmp = NULL; 2509 if (p->header.prev) { 2510 prevbn = le64_to_cpu(p->header.prev); 2511 DT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc); 2512 if (rc) { 2513 DT_PUTPAGE(mp); 2514 DT_PUTPAGE(pmp); 2515 if (rmp) 2516 DT_PUTPAGE(rmp); 2517 return (rc); 2518 } 2519 } 2520 2521 /* at this point, all xtpages to be updated are in memory */ 2522 2523 /* 2524 * update sibling pointers of sibling dtpages if any; 2525 */ 2526 if (lmp) { 2527 tlck = txLock(tid, ip, lmp, tlckDTREE | tlckRELINK); 2528 dtlck = (struct dt_lock *) & tlck->lock; 2529 /* linelock header */ 2530 ASSERT(dtlck->index == 0); 2531 lv = & dtlck->lv[0]; 2532 lv->offset = 0; 2533 lv->length = 1; 2534 dtlck->index++; 2535 2536 lp->header.next = cpu_to_le64(nxaddr); 2537 DT_PUTPAGE(lmp); 2538 } 2539 2540 if (rmp) { 2541 tlck = txLock(tid, ip, rmp, tlckDTREE | tlckRELINK); 2542 dtlck = (struct dt_lock *) & tlck->lock; 2543 /* linelock header */ 2544 ASSERT(dtlck->index == 0); 2545 lv = & dtlck->lv[0]; 2546 lv->offset = 0; 2547 lv->length = 1; 2548 dtlck->index++; 2549 2550 rp->header.prev = cpu_to_le64(nxaddr); 2551 DT_PUTPAGE(rmp); 2552 } 2553 2554 /* 2555 * update the target dtpage to be relocated 2556 * 2557 * write LOG_REDOPAGE of LOG_NEW type for dst page 2558 * for the whole target page (logredo() will apply 2559 * after image and update bmap for allocation of the 2560 * dst extent), and update bmap for allocation of 2561 * the dst extent; 2562 */ 2563 tlck = txLock(tid, ip, mp, tlckDTREE | tlckNEW); 2564 dtlck = (struct dt_lock *) & tlck->lock; 2565 /* linelock header */ 2566 ASSERT(dtlck->index == 0); 2567 lv = & dtlck->lv[0]; 2568 2569 /* update the self address in the dtpage header */ 2570 pxd = &p->header.self; 2571 PXDaddress(pxd, nxaddr); 2572 2573 /* the dst page is the same as the src page, i.e., 2574 * linelock for afterimage of the whole page; 2575 */ 2576 lv->offset = 0; 2577 lv->length = p->header.maxslot; 2578 dtlck->index++; 2579 2580 /* update the buffer extent descriptor of the dtpage */ 2581 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; 2582 2583 /* unpin the relocated page */ 2584 DT_PUTPAGE(mp); 2585 jfs_info("dtRelocate: target dtpage relocated."); 2586 2587 /* the moved extent is dtpage, then a LOG_NOREDOPAGE log rec 2588 * needs to be written (in logredo(), the LOG_NOREDOPAGE log rec 2589 * will also force a bmap update ). 2590 */ 2591 2592 /* 2593 * 3. acquire maplock for the source extent to be freed; 2594 */ 2595 /* for dtpage relocation, write a LOG_NOREDOPAGE record 2596 * for the source dtpage (logredo() will init NoRedoPage 2597 * filter and will also update bmap for free of the source 2598 * dtpage), and upadte bmap for free of the source dtpage; 2599 */ 2600 tlck = txMaplock(tid, ip, tlckDTREE | tlckFREE); 2601 pxdlock = (struct pxd_lock *) & tlck->lock; 2602 pxdlock->flag = mlckFREEPXD; 2603 PXDaddress(&pxdlock->pxd, oxaddr); 2604 PXDlength(&pxdlock->pxd, xlen); 2605 pxdlock->index = 1; 2606 2607 /* 2608 * 4. update the parent router entry for relocation; 2609 * 2610 * acquire tlck for the parent entry covering the target dtpage; 2611 * write LOG_REDOPAGE to apply after image only; 2612 */ 2613 jfs_info("dtRelocate: update parent router entry."); 2614 tlck = txLock(tid, ip, pmp, tlckDTREE | tlckENTRY); 2615 dtlck = (struct dt_lock *) & tlck->lock; 2616 lv = & dtlck->lv[dtlck->index]; 2617 2618 /* update the PXD with the new address */ 2619 stbl = DT_GETSTBL(pp); 2620 pxd = (pxd_t *) & pp->slot[stbl[index]]; 2621 PXDaddress(pxd, nxaddr); 2622 lv->offset = stbl[index]; 2623 lv->length = 1; 2624 dtlck->index++; 2625 2626 /* unpin the parent dtpage */ 2627 DT_PUTPAGE(pmp); 2628 2629 return rc; 2630 } 2631 2632 /* 2633 * NAME: dtSearchNode() 2634 * 2635 * FUNCTION: Search for an dtpage containing a specified address 2636 * This function is mainly used by defragfs utility. 2637 * 2638 * NOTE: Search result on stack, the found page is pinned at exit. 2639 * The result page must be an internal dtpage. 2640 * lmxaddr give the address of the left most page of the 2641 * dtree level, in which the required dtpage resides. 2642 */ 2643 static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, 2644 struct btstack * btstack) 2645 { 2646 int rc = 0; 2647 s64 bn; 2648 struct metapage *mp; 2649 dtpage_t *p; 2650 int psize = 288; /* initial in-line directory */ 2651 s8 *stbl; 2652 int i; 2653 pxd_t *pxd; 2654 struct btframe *btsp; 2655 2656 BT_CLR(btstack); /* reset stack */ 2657 2658 /* 2659 * descend tree to the level with specified leftmost page 2660 * 2661 * by convention, root bn = 0. 2662 */ 2663 for (bn = 0;;) { 2664 /* get/pin the page to search */ 2665 DT_GETPAGE(ip, bn, mp, psize, p, rc); 2666 if (rc) 2667 return rc; 2668 2669 /* does the xaddr of leftmost page of the levevl 2670 * matches levevl search key ? 2671 */ 2672 if (p->header.flag & BT_ROOT) { 2673 if (lmxaddr == 0) 2674 break; 2675 } else if (addressPXD(&p->header.self) == lmxaddr) 2676 break; 2677 2678 /* 2679 * descend down to leftmost child page 2680 */ 2681 if (p->header.flag & BT_LEAF) { 2682 DT_PUTPAGE(mp); 2683 return -ESTALE; 2684 } 2685 2686 /* get the leftmost entry */ 2687 stbl = DT_GETSTBL(p); 2688 pxd = (pxd_t *) & p->slot[stbl[0]]; 2689 2690 /* get the child page block address */ 2691 bn = addressPXD(pxd); 2692 psize = lengthPXD(pxd) << JFS_SBI(ip->i_sb)->l2bsize; 2693 /* unpin the parent page */ 2694 DT_PUTPAGE(mp); 2695 } 2696 2697 /* 2698 * search each page at the current levevl 2699 */ 2700 loop: 2701 stbl = DT_GETSTBL(p); 2702 for (i = 0; i < p->header.nextindex; i++) { 2703 pxd = (pxd_t *) & p->slot[stbl[i]]; 2704 2705 /* found the specified router entry */ 2706 if (addressPXD(pxd) == addressPXD(kpxd) && 2707 lengthPXD(pxd) == lengthPXD(kpxd)) { 2708 btsp = btstack->top; 2709 btsp->bn = bn; 2710 btsp->index = i; 2711 btsp->mp = mp; 2712 2713 return 0; 2714 } 2715 } 2716 2717 /* get the right sibling page if any */ 2718 if (p->header.next) 2719 bn = le64_to_cpu(p->header.next); 2720 else { 2721 DT_PUTPAGE(mp); 2722 return -ESTALE; 2723 } 2724 2725 /* unpin current page */ 2726 DT_PUTPAGE(mp); 2727 2728 /* get the right sibling page */ 2729 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 2730 if (rc) 2731 return rc; 2732 2733 goto loop; 2734 } 2735 #endif /* _NOTYET */ 2736 2737 /* 2738 * dtRelink() 2739 * 2740 * function: 2741 * link around a freed page. 2742 * 2743 * parameter: 2744 * fp: page to be freed 2745 * 2746 * return: 2747 */ 2748 static int dtRelink(tid_t tid, struct inode *ip, dtpage_t * p) 2749 { 2750 int rc; 2751 struct metapage *mp; 2752 s64 nextbn, prevbn; 2753 struct tlock *tlck; 2754 struct dt_lock *dtlck; 2755 struct lv *lv; 2756 2757 nextbn = le64_to_cpu(p->header.next); 2758 prevbn = le64_to_cpu(p->header.prev); 2759 2760 /* update prev pointer of the next page */ 2761 if (nextbn != 0) { 2762 DT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc); 2763 if (rc) 2764 return rc; 2765 2766 BT_MARK_DIRTY(mp, ip); 2767 /* 2768 * acquire a transaction lock on the next page 2769 * 2770 * action: update prev pointer; 2771 */ 2772 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2773 jfs_info("dtRelink nextbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2774 tlck, ip, mp); 2775 dtlck = (struct dt_lock *) & tlck->lock; 2776 2777 /* linelock header */ 2778 if (dtlck->index >= dtlck->maxcnt) 2779 dtlck = (struct dt_lock *) txLinelock(dtlck); 2780 lv = & dtlck->lv[dtlck->index]; 2781 lv->offset = 0; 2782 lv->length = 1; 2783 dtlck->index++; 2784 2785 p->header.prev = cpu_to_le64(prevbn); 2786 DT_PUTPAGE(mp); 2787 } 2788 2789 /* update next pointer of the previous page */ 2790 if (prevbn != 0) { 2791 DT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc); 2792 if (rc) 2793 return rc; 2794 2795 BT_MARK_DIRTY(mp, ip); 2796 /* 2797 * acquire a transaction lock on the prev page 2798 * 2799 * action: update next pointer; 2800 */ 2801 tlck = txLock(tid, ip, mp, tlckDTREE | tlckRELINK); 2802 jfs_info("dtRelink prevbn: tlck = 0x%p, ip = 0x%p, mp=0x%p", 2803 tlck, ip, mp); 2804 dtlck = (struct dt_lock *) & tlck->lock; 2805 2806 /* linelock header */ 2807 if (dtlck->index >= dtlck->maxcnt) 2808 dtlck = (struct dt_lock *) txLinelock(dtlck); 2809 lv = & dtlck->lv[dtlck->index]; 2810 lv->offset = 0; 2811 lv->length = 1; 2812 dtlck->index++; 2813 2814 p->header.next = cpu_to_le64(nextbn); 2815 DT_PUTPAGE(mp); 2816 } 2817 2818 return 0; 2819 } 2820 2821 2822 /* 2823 * dtInitRoot() 2824 * 2825 * initialize directory root (inline in inode) 2826 */ 2827 void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot) 2828 { 2829 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 2830 dtroot_t *p; 2831 int fsi; 2832 struct dtslot *f; 2833 struct tlock *tlck; 2834 struct dt_lock *dtlck; 2835 struct lv *lv; 2836 u16 xflag_save; 2837 2838 /* 2839 * If this was previously an non-empty directory, we need to remove 2840 * the old directory table. 2841 */ 2842 if (DO_INDEX(ip)) { 2843 if (!jfs_dirtable_inline(ip)) { 2844 struct tblock *tblk = tid_to_tblock(tid); 2845 /* 2846 * We're playing games with the tid's xflag. If 2847 * we're removing a regular file, the file's xtree 2848 * is committed with COMMIT_PMAP, but we always 2849 * commit the directories xtree with COMMIT_PWMAP. 2850 */ 2851 xflag_save = tblk->xflag; 2852 tblk->xflag = 0; 2853 /* 2854 * xtTruncate isn't guaranteed to fully truncate 2855 * the xtree. The caller needs to check i_size 2856 * after committing the transaction to see if 2857 * additional truncation is needed. The 2858 * COMMIT_Stale flag tells caller that we 2859 * initiated the truncation. 2860 */ 2861 xtTruncate(tid, ip, 0, COMMIT_PWMAP); 2862 set_cflag(COMMIT_Stale, ip); 2863 2864 tblk->xflag = xflag_save; 2865 } else 2866 ip->i_size = 1; 2867 2868 jfs_ip->next_index = 2; 2869 } else 2870 ip->i_size = IDATASIZE; 2871 2872 /* 2873 * acquire a transaction lock on the root 2874 * 2875 * action: directory initialization; 2876 */ 2877 tlck = txLock(tid, ip, (struct metapage *) & jfs_ip->bxflag, 2878 tlckDTREE | tlckENTRY | tlckBTROOT); 2879 dtlck = (struct dt_lock *) & tlck->lock; 2880 2881 /* linelock root */ 2882 ASSERT(dtlck->index == 0); 2883 lv = & dtlck->lv[0]; 2884 lv->offset = 0; 2885 lv->length = DTROOTMAXSLOT; 2886 dtlck->index++; 2887 2888 p = &jfs_ip->i_dtroot; 2889 2890 p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF; 2891 2892 p->header.nextindex = 0; 2893 2894 /* init freelist */ 2895 fsi = 1; 2896 f = &p->slot[fsi]; 2897 2898 /* init data area of root */ 2899 for (fsi++; fsi < DTROOTMAXSLOT; f++, fsi++) 2900 f->next = fsi; 2901 f->next = -1; 2902 2903 p->header.freelist = 1; 2904 p->header.freecnt = 8; 2905 2906 /* init '..' entry */ 2907 p->header.idotdot = cpu_to_le32(idotdot); 2908 2909 return; 2910 } 2911 2912 /* 2913 * add_missing_indices() 2914 * 2915 * function: Fix dtree page in which one or more entries has an invalid index. 2916 * fsck.jfs should really fix this, but it currently does not. 2917 * Called from jfs_readdir when bad index is detected. 2918 */ 2919 static void add_missing_indices(struct inode *inode, s64 bn) 2920 { 2921 struct ldtentry *d; 2922 struct dt_lock *dtlck; 2923 int i; 2924 uint index; 2925 struct lv *lv; 2926 struct metapage *mp; 2927 dtpage_t *p; 2928 int rc; 2929 s8 *stbl; 2930 tid_t tid; 2931 struct tlock *tlck; 2932 2933 tid = txBegin(inode->i_sb, 0); 2934 2935 DT_GETPAGE(inode, bn, mp, PSIZE, p, rc); 2936 2937 if (rc) { 2938 printk(KERN_ERR "DT_GETPAGE failed!\n"); 2939 goto end; 2940 } 2941 BT_MARK_DIRTY(mp, inode); 2942 2943 ASSERT(p->header.flag & BT_LEAF); 2944 2945 tlck = txLock(tid, inode, mp, tlckDTREE | tlckENTRY); 2946 if (BT_IS_ROOT(mp)) 2947 tlck->type |= tlckBTROOT; 2948 2949 dtlck = (struct dt_lock *) &tlck->lock; 2950 2951 stbl = DT_GETSTBL(p); 2952 for (i = 0; i < p->header.nextindex; i++) { 2953 d = (struct ldtentry *) &p->slot[stbl[i]]; 2954 index = le32_to_cpu(d->index); 2955 if ((index < 2) || (index >= JFS_IP(inode)->next_index)) { 2956 d->index = cpu_to_le32(add_index(tid, inode, bn, i)); 2957 if (dtlck->index >= dtlck->maxcnt) 2958 dtlck = (struct dt_lock *) txLinelock(dtlck); 2959 lv = &dtlck->lv[dtlck->index]; 2960 lv->offset = stbl[i]; 2961 lv->length = 1; 2962 dtlck->index++; 2963 } 2964 } 2965 2966 DT_PUTPAGE(mp); 2967 (void) txCommit(tid, 1, &inode, 0); 2968 end: 2969 txEnd(tid); 2970 } 2971 2972 /* 2973 * Buffer to hold directory entry info while traversing a dtree page 2974 * before being fed to the filldir function 2975 */ 2976 struct jfs_dirent { 2977 loff_t position; 2978 int ino; 2979 u16 name_len; 2980 char name[0]; 2981 }; 2982 2983 /* 2984 * function to determine next variable-sized jfs_dirent in buffer 2985 */ 2986 static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) 2987 { 2988 return (struct jfs_dirent *) 2989 ((char *)dirent + 2990 ((sizeof (struct jfs_dirent) + dirent->name_len + 1 + 2991 sizeof (loff_t) - 1) & 2992 ~(sizeof (loff_t) - 1))); 2993 } 2994 2995 /* 2996 * jfs_readdir() 2997 * 2998 * function: read directory entries sequentially 2999 * from the specified entry offset 3000 * 3001 * parameter: 3002 * 3003 * return: offset = (pn, index) of start entry 3004 * of next jfs_readdir()/dtRead() 3005 */ 3006 int jfs_readdir(struct file *file, struct dir_context *ctx) 3007 { 3008 struct inode *ip = file_inode(file); 3009 struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; 3010 int rc = 0; 3011 loff_t dtpos; /* legacy OS/2 style position */ 3012 struct dtoffset { 3013 s16 pn; 3014 s16 index; 3015 s32 unused; 3016 } *dtoffset = (struct dtoffset *) &dtpos; 3017 s64 bn; 3018 struct metapage *mp; 3019 dtpage_t *p; 3020 int index; 3021 s8 *stbl; 3022 struct btstack btstack; 3023 int i, next; 3024 struct ldtentry *d; 3025 struct dtslot *t; 3026 int d_namleft, len, outlen; 3027 unsigned long dirent_buf; 3028 char *name_ptr; 3029 u32 dir_index; 3030 int do_index = 0; 3031 uint loop_count = 0; 3032 struct jfs_dirent *jfs_dirent; 3033 int jfs_dirents; 3034 int overflow, fix_page, page_fixed = 0; 3035 static int unique_pos = 2; /* If we can't fix broken index */ 3036 3037 if (ctx->pos == DIREND) 3038 return 0; 3039 3040 if (DO_INDEX(ip)) { 3041 /* 3042 * persistent index is stored in directory entries. 3043 * Special cases: 0 = . 3044 * 1 = .. 3045 * -1 = End of directory 3046 */ 3047 do_index = 1; 3048 3049 dir_index = (u32) ctx->pos; 3050 3051 /* 3052 * NFSv4 reserves cookies 1 and 2 for . and .. so the value 3053 * we return to the vfs is one greater than the one we use 3054 * internally. 3055 */ 3056 if (dir_index) 3057 dir_index--; 3058 3059 if (dir_index > 1) { 3060 struct dir_table_slot dirtab_slot; 3061 3062 if (dtEmpty(ip) || 3063 (dir_index >= JFS_IP(ip)->next_index)) { 3064 /* Stale position. Directory has shrunk */ 3065 ctx->pos = DIREND; 3066 return 0; 3067 } 3068 repeat: 3069 rc = read_index(ip, dir_index, &dirtab_slot); 3070 if (rc) { 3071 ctx->pos = DIREND; 3072 return rc; 3073 } 3074 if (dirtab_slot.flag == DIR_INDEX_FREE) { 3075 if (loop_count++ > JFS_IP(ip)->next_index) { 3076 jfs_err("jfs_readdir detected infinite loop!"); 3077 ctx->pos = DIREND; 3078 return 0; 3079 } 3080 dir_index = le32_to_cpu(dirtab_slot.addr2); 3081 if (dir_index == -1) { 3082 ctx->pos = DIREND; 3083 return 0; 3084 } 3085 goto repeat; 3086 } 3087 bn = addressDTS(&dirtab_slot); 3088 index = dirtab_slot.slot; 3089 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3090 if (rc) { 3091 ctx->pos = DIREND; 3092 return 0; 3093 } 3094 if (p->header.flag & BT_INTERNAL) { 3095 jfs_err("jfs_readdir: bad index table"); 3096 DT_PUTPAGE(mp); 3097 ctx->pos = DIREND; 3098 return 0; 3099 } 3100 } else { 3101 if (dir_index == 0) { 3102 /* 3103 * self "." 3104 */ 3105 ctx->pos = 1; 3106 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 3107 return 0; 3108 } 3109 /* 3110 * parent ".." 3111 */ 3112 ctx->pos = 2; 3113 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 3114 return 0; 3115 3116 /* 3117 * Find first entry of left-most leaf 3118 */ 3119 if (dtEmpty(ip)) { 3120 ctx->pos = DIREND; 3121 return 0; 3122 } 3123 3124 if ((rc = dtReadFirst(ip, &btstack))) 3125 return rc; 3126 3127 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3128 } 3129 } else { 3130 /* 3131 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 3132 * 3133 * pn = 0; index = 1: First entry "." 3134 * pn = 0; index = 2: Second entry ".." 3135 * pn > 0: Real entries, pn=1 -> leftmost page 3136 * pn = index = -1: No more entries 3137 */ 3138 dtpos = ctx->pos; 3139 if (dtpos < 2) { 3140 /* build "." entry */ 3141 ctx->pos = 1; 3142 if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) 3143 return 0; 3144 dtoffset->index = 2; 3145 ctx->pos = dtpos; 3146 } 3147 3148 if (dtoffset->pn == 0) { 3149 if (dtoffset->index == 2) { 3150 /* build ".." entry */ 3151 if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) 3152 return 0; 3153 } else { 3154 jfs_err("jfs_readdir called with invalid offset!"); 3155 } 3156 dtoffset->pn = 1; 3157 dtoffset->index = 0; 3158 ctx->pos = dtpos; 3159 } 3160 3161 if (dtEmpty(ip)) { 3162 ctx->pos = DIREND; 3163 return 0; 3164 } 3165 3166 if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { 3167 jfs_err("jfs_readdir: unexpected rc = %d from dtReadNext", 3168 rc); 3169 ctx->pos = DIREND; 3170 return 0; 3171 } 3172 /* get start leaf page and index */ 3173 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 3174 3175 /* offset beyond directory eof ? */ 3176 if (bn < 0) { 3177 ctx->pos = DIREND; 3178 return 0; 3179 } 3180 } 3181 3182 dirent_buf = __get_free_page(GFP_KERNEL); 3183 if (dirent_buf == 0) { 3184 DT_PUTPAGE(mp); 3185 jfs_warn("jfs_readdir: __get_free_page failed!"); 3186 ctx->pos = DIREND; 3187 return -ENOMEM; 3188 } 3189 3190 while (1) { 3191 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3192 jfs_dirents = 0; 3193 overflow = fix_page = 0; 3194 3195 stbl = DT_GETSTBL(p); 3196 3197 for (i = index; i < p->header.nextindex; i++) { 3198 d = (struct ldtentry *) & p->slot[stbl[i]]; 3199 3200 if (((long) jfs_dirent + d->namlen + 1) > 3201 (dirent_buf + PAGE_SIZE)) { 3202 /* DBCS codepages could overrun dirent_buf */ 3203 index = i; 3204 overflow = 1; 3205 break; 3206 } 3207 3208 d_namleft = d->namlen; 3209 name_ptr = jfs_dirent->name; 3210 jfs_dirent->ino = le32_to_cpu(d->inumber); 3211 3212 if (do_index) { 3213 len = min(d_namleft, DTLHDRDATALEN); 3214 jfs_dirent->position = le32_to_cpu(d->index); 3215 /* 3216 * d->index should always be valid, but it 3217 * isn't. fsck.jfs doesn't create the 3218 * directory index for the lost+found 3219 * directory. Rather than let it go, 3220 * we can try to fix it. 3221 */ 3222 if ((jfs_dirent->position < 2) || 3223 (jfs_dirent->position >= 3224 JFS_IP(ip)->next_index)) { 3225 if (!page_fixed && !isReadOnly(ip)) { 3226 fix_page = 1; 3227 /* 3228 * setting overflow and setting 3229 * index to i will cause the 3230 * same page to be processed 3231 * again starting here 3232 */ 3233 overflow = 1; 3234 index = i; 3235 break; 3236 } 3237 jfs_dirent->position = unique_pos++; 3238 } 3239 /* 3240 * We add 1 to the index because we may 3241 * use a value of 2 internally, and NFSv4 3242 * doesn't like that. 3243 */ 3244 jfs_dirent->position++; 3245 } else { 3246 jfs_dirent->position = dtpos; 3247 len = min(d_namleft, DTLHDRDATALEN_LEGACY); 3248 } 3249 3250 /* copy the name of head/only segment */ 3251 outlen = jfs_strfromUCS_le(name_ptr, d->name, len, 3252 codepage); 3253 jfs_dirent->name_len = outlen; 3254 3255 /* copy name in the additional segment(s) */ 3256 next = d->next; 3257 while (next >= 0) { 3258 t = (struct dtslot *) & p->slot[next]; 3259 name_ptr += outlen; 3260 d_namleft -= len; 3261 /* Sanity Check */ 3262 if (d_namleft == 0) { 3263 jfs_error(ip->i_sb, 3264 "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", 3265 (long)ip->i_ino, 3266 (long long)bn, 3267 i); 3268 goto skip_one; 3269 } 3270 len = min(d_namleft, DTSLOTDATALEN); 3271 outlen = jfs_strfromUCS_le(name_ptr, t->name, 3272 len, codepage); 3273 jfs_dirent->name_len += outlen; 3274 3275 next = t->next; 3276 } 3277 3278 jfs_dirents++; 3279 jfs_dirent = next_jfs_dirent(jfs_dirent); 3280 skip_one: 3281 if (!do_index) 3282 dtoffset->index++; 3283 } 3284 3285 if (!overflow) { 3286 /* Point to next leaf page */ 3287 if (p->header.flag & BT_ROOT) 3288 bn = 0; 3289 else { 3290 bn = le64_to_cpu(p->header.next); 3291 index = 0; 3292 /* update offset (pn:index) for new page */ 3293 if (!do_index) { 3294 dtoffset->pn++; 3295 dtoffset->index = 0; 3296 } 3297 } 3298 page_fixed = 0; 3299 } 3300 3301 /* unpin previous leaf page */ 3302 DT_PUTPAGE(mp); 3303 3304 jfs_dirent = (struct jfs_dirent *) dirent_buf; 3305 while (jfs_dirents--) { 3306 ctx->pos = jfs_dirent->position; 3307 if (!dir_emit(ctx, jfs_dirent->name, 3308 jfs_dirent->name_len, 3309 jfs_dirent->ino, DT_UNKNOWN)) 3310 goto out; 3311 jfs_dirent = next_jfs_dirent(jfs_dirent); 3312 } 3313 3314 if (fix_page) { 3315 add_missing_indices(ip, bn); 3316 page_fixed = 1; 3317 } 3318 3319 if (!overflow && (bn == 0)) { 3320 ctx->pos = DIREND; 3321 break; 3322 } 3323 3324 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3325 if (rc) { 3326 free_page(dirent_buf); 3327 return rc; 3328 } 3329 } 3330 3331 out: 3332 free_page(dirent_buf); 3333 3334 return rc; 3335 } 3336 3337 3338 /* 3339 * dtReadFirst() 3340 * 3341 * function: get the leftmost page of the directory 3342 */ 3343 static int dtReadFirst(struct inode *ip, struct btstack * btstack) 3344 { 3345 int rc = 0; 3346 s64 bn; 3347 int psize = 288; /* initial in-line directory */ 3348 struct metapage *mp; 3349 dtpage_t *p; 3350 s8 *stbl; 3351 struct btframe *btsp; 3352 pxd_t *xd; 3353 3354 BT_CLR(btstack); /* reset stack */ 3355 3356 /* 3357 * descend leftmost path of the tree 3358 * 3359 * by convention, root bn = 0. 3360 */ 3361 for (bn = 0;;) { 3362 DT_GETPAGE(ip, bn, mp, psize, p, rc); 3363 if (rc) 3364 return rc; 3365 3366 /* 3367 * leftmost leaf page 3368 */ 3369 if (p->header.flag & BT_LEAF) { 3370 /* return leftmost entry */ 3371 btsp = btstack->top; 3372 btsp->bn = bn; 3373 btsp->index = 0; 3374 btsp->mp = mp; 3375 3376 return 0; 3377 } 3378 3379 /* 3380 * descend down to leftmost child page 3381 */ 3382 if (BT_STACK_FULL(btstack)) { 3383 DT_PUTPAGE(mp); 3384 jfs_error(ip->i_sb, "btstack overrun\n"); 3385 BT_STACK_DUMP(btstack); 3386 return -EIO; 3387 } 3388 /* push (bn, index) of the parent page/entry */ 3389 BT_PUSH(btstack, bn, 0); 3390 3391 /* get the leftmost entry */ 3392 stbl = DT_GETSTBL(p); 3393 xd = (pxd_t *) & p->slot[stbl[0]]; 3394 3395 /* get the child page block address */ 3396 bn = addressPXD(xd); 3397 psize = lengthPXD(xd) << JFS_SBI(ip->i_sb)->l2bsize; 3398 3399 /* unpin the parent page */ 3400 DT_PUTPAGE(mp); 3401 } 3402 } 3403 3404 3405 /* 3406 * dtReadNext() 3407 * 3408 * function: get the page of the specified offset (pn:index) 3409 * 3410 * return: if (offset > eof), bn = -1; 3411 * 3412 * note: if index > nextindex of the target leaf page, 3413 * start with 1st entry of next leaf page; 3414 */ 3415 static int dtReadNext(struct inode *ip, loff_t * offset, 3416 struct btstack * btstack) 3417 { 3418 int rc = 0; 3419 struct dtoffset { 3420 s16 pn; 3421 s16 index; 3422 s32 unused; 3423 } *dtoffset = (struct dtoffset *) offset; 3424 s64 bn; 3425 struct metapage *mp; 3426 dtpage_t *p; 3427 int index; 3428 int pn; 3429 s8 *stbl; 3430 struct btframe *btsp, *parent; 3431 pxd_t *xd; 3432 3433 /* 3434 * get leftmost leaf page pinned 3435 */ 3436 if ((rc = dtReadFirst(ip, btstack))) 3437 return rc; 3438 3439 /* get leaf page */ 3440 DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); 3441 3442 /* get the start offset (pn:index) */ 3443 pn = dtoffset->pn - 1; /* Now pn = 0 represents leftmost leaf */ 3444 index = dtoffset->index; 3445 3446 /* start at leftmost page ? */ 3447 if (pn == 0) { 3448 /* offset beyond eof ? */ 3449 if (index < p->header.nextindex) 3450 goto out; 3451 3452 if (p->header.flag & BT_ROOT) { 3453 bn = -1; 3454 goto out; 3455 } 3456 3457 /* start with 1st entry of next leaf page */ 3458 dtoffset->pn++; 3459 dtoffset->index = index = 0; 3460 goto a; 3461 } 3462 3463 /* start at non-leftmost page: scan parent pages for large pn */ 3464 if (p->header.flag & BT_ROOT) { 3465 bn = -1; 3466 goto out; 3467 } 3468 3469 /* start after next leaf page ? */ 3470 if (pn > 1) 3471 goto b; 3472 3473 /* get leaf page pn = 1 */ 3474 a: 3475 bn = le64_to_cpu(p->header.next); 3476 3477 /* unpin leaf page */ 3478 DT_PUTPAGE(mp); 3479 3480 /* offset beyond eof ? */ 3481 if (bn == 0) { 3482 bn = -1; 3483 goto out; 3484 } 3485 3486 goto c; 3487 3488 /* 3489 * scan last internal page level to get target leaf page 3490 */ 3491 b: 3492 /* unpin leftmost leaf page */ 3493 DT_PUTPAGE(mp); 3494 3495 /* get left most parent page */ 3496 btsp = btstack->top; 3497 parent = btsp - 1; 3498 bn = parent->bn; 3499 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3500 if (rc) 3501 return rc; 3502 3503 /* scan parent pages at last internal page level */ 3504 while (pn >= p->header.nextindex) { 3505 pn -= p->header.nextindex; 3506 3507 /* get next parent page address */ 3508 bn = le64_to_cpu(p->header.next); 3509 3510 /* unpin current parent page */ 3511 DT_PUTPAGE(mp); 3512 3513 /* offset beyond eof ? */ 3514 if (bn == 0) { 3515 bn = -1; 3516 goto out; 3517 } 3518 3519 /* get next parent page */ 3520 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3521 if (rc) 3522 return rc; 3523 3524 /* update parent page stack frame */ 3525 parent->bn = bn; 3526 } 3527 3528 /* get leaf page address */ 3529 stbl = DT_GETSTBL(p); 3530 xd = (pxd_t *) & p->slot[stbl[pn]]; 3531 bn = addressPXD(xd); 3532 3533 /* unpin parent page */ 3534 DT_PUTPAGE(mp); 3535 3536 /* 3537 * get target leaf page 3538 */ 3539 c: 3540 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3541 if (rc) 3542 return rc; 3543 3544 /* 3545 * leaf page has been completed: 3546 * start with 1st entry of next leaf page 3547 */ 3548 if (index >= p->header.nextindex) { 3549 bn = le64_to_cpu(p->header.next); 3550 3551 /* unpin leaf page */ 3552 DT_PUTPAGE(mp); 3553 3554 /* offset beyond eof ? */ 3555 if (bn == 0) { 3556 bn = -1; 3557 goto out; 3558 } 3559 3560 /* get next leaf page */ 3561 DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); 3562 if (rc) 3563 return rc; 3564 3565 /* start with 1st entry of next leaf page */ 3566 dtoffset->pn++; 3567 dtoffset->index = 0; 3568 } 3569 3570 out: 3571 /* return target leaf page pinned */ 3572 btsp = btstack->top; 3573 btsp->bn = bn; 3574 btsp->index = dtoffset->index; 3575 btsp->mp = mp; 3576 3577 return 0; 3578 } 3579 3580 3581 /* 3582 * dtCompare() 3583 * 3584 * function: compare search key with an internal entry 3585 * 3586 * return: 3587 * < 0 if k is < record 3588 * = 0 if k is = record 3589 * > 0 if k is > record 3590 */ 3591 static int dtCompare(struct component_name * key, /* search key */ 3592 dtpage_t * p, /* directory page */ 3593 int si) 3594 { /* entry slot index */ 3595 wchar_t *kname; 3596 __le16 *name; 3597 int klen, namlen, len, rc; 3598 struct idtentry *ih; 3599 struct dtslot *t; 3600 3601 /* 3602 * force the left-most key on internal pages, at any level of 3603 * the tree, to be less than any search key. 3604 * this obviates having to update the leftmost key on an internal 3605 * page when the user inserts a new key in the tree smaller than 3606 * anything that has been stored. 3607 * 3608 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3609 * at any internal page at any level of the tree, 3610 * it descends to child of the entry anyway - 3611 * ? make the entry as min size dummy entry) 3612 * 3613 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3614 * return (1); 3615 */ 3616 3617 kname = key->name; 3618 klen = key->namlen; 3619 3620 ih = (struct idtentry *) & p->slot[si]; 3621 si = ih->next; 3622 name = ih->name; 3623 namlen = ih->namlen; 3624 len = min(namlen, DTIHDRDATALEN); 3625 3626 /* compare with head/only segment */ 3627 len = min(klen, len); 3628 if ((rc = UniStrncmp_le(kname, name, len))) 3629 return rc; 3630 3631 klen -= len; 3632 namlen -= len; 3633 3634 /* compare with additional segment(s) */ 3635 kname += len; 3636 while (klen > 0 && namlen > 0) { 3637 /* compare with next name segment */ 3638 t = (struct dtslot *) & p->slot[si]; 3639 len = min(namlen, DTSLOTDATALEN); 3640 len = min(klen, len); 3641 name = t->name; 3642 if ((rc = UniStrncmp_le(kname, name, len))) 3643 return rc; 3644 3645 klen -= len; 3646 namlen -= len; 3647 kname += len; 3648 si = t->next; 3649 } 3650 3651 return (klen - namlen); 3652 } 3653 3654 3655 3656 3657 /* 3658 * ciCompare() 3659 * 3660 * function: compare search key with an (leaf/internal) entry 3661 * 3662 * return: 3663 * < 0 if k is < record 3664 * = 0 if k is = record 3665 * > 0 if k is > record 3666 */ 3667 static int ciCompare(struct component_name * key, /* search key */ 3668 dtpage_t * p, /* directory page */ 3669 int si, /* entry slot index */ 3670 int flag) 3671 { 3672 wchar_t *kname, x; 3673 __le16 *name; 3674 int klen, namlen, len, rc; 3675 struct ldtentry *lh; 3676 struct idtentry *ih; 3677 struct dtslot *t; 3678 int i; 3679 3680 /* 3681 * force the left-most key on internal pages, at any level of 3682 * the tree, to be less than any search key. 3683 * this obviates having to update the leftmost key on an internal 3684 * page when the user inserts a new key in the tree smaller than 3685 * anything that has been stored. 3686 * 3687 * (? if/when dtSearch() narrows down to 1st entry (index = 0), 3688 * at any internal page at any level of the tree, 3689 * it descends to child of the entry anyway - 3690 * ? make the entry as min size dummy entry) 3691 * 3692 * if (e->index == 0 && h->prevpg == P_INVALID && !(h->flags & BT_LEAF)) 3693 * return (1); 3694 */ 3695 3696 kname = key->name; 3697 klen = key->namlen; 3698 3699 /* 3700 * leaf page entry 3701 */ 3702 if (p->header.flag & BT_LEAF) { 3703 lh = (struct ldtentry *) & p->slot[si]; 3704 si = lh->next; 3705 name = lh->name; 3706 namlen = lh->namlen; 3707 if (flag & JFS_DIR_INDEX) 3708 len = min(namlen, DTLHDRDATALEN); 3709 else 3710 len = min(namlen, DTLHDRDATALEN_LEGACY); 3711 } 3712 /* 3713 * internal page entry 3714 */ 3715 else { 3716 ih = (struct idtentry *) & p->slot[si]; 3717 si = ih->next; 3718 name = ih->name; 3719 namlen = ih->namlen; 3720 len = min(namlen, DTIHDRDATALEN); 3721 } 3722 3723 /* compare with head/only segment */ 3724 len = min(klen, len); 3725 for (i = 0; i < len; i++, kname++, name++) { 3726 /* only uppercase if case-insensitive support is on */ 3727 if ((flag & JFS_OS2) == JFS_OS2) 3728 x = UniToupper(le16_to_cpu(*name)); 3729 else 3730 x = le16_to_cpu(*name); 3731 if ((rc = *kname - x)) 3732 return rc; 3733 } 3734 3735 klen -= len; 3736 namlen -= len; 3737 3738 /* compare with additional segment(s) */ 3739 while (klen > 0 && namlen > 0) { 3740 /* compare with next name segment */ 3741 t = (struct dtslot *) & p->slot[si]; 3742 len = min(namlen, DTSLOTDATALEN); 3743 len = min(klen, len); 3744 name = t->name; 3745 for (i = 0; i < len; i++, kname++, name++) { 3746 /* only uppercase if case-insensitive support is on */ 3747 if ((flag & JFS_OS2) == JFS_OS2) 3748 x = UniToupper(le16_to_cpu(*name)); 3749 else 3750 x = le16_to_cpu(*name); 3751 3752 if ((rc = *kname - x)) 3753 return rc; 3754 } 3755 3756 klen -= len; 3757 namlen -= len; 3758 si = t->next; 3759 } 3760 3761 return (klen - namlen); 3762 } 3763 3764 3765 /* 3766 * ciGetLeafPrefixKey() 3767 * 3768 * function: compute prefix of suffix compression 3769 * from two adjacent leaf entries 3770 * across page boundary 3771 * 3772 * return: non-zero on error 3773 * 3774 */ 3775 static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, 3776 int ri, struct component_name * key, int flag) 3777 { 3778 int klen, namlen; 3779 wchar_t *pl, *pr, *kname; 3780 struct component_name lkey; 3781 struct component_name rkey; 3782 3783 lkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3784 GFP_KERNEL); 3785 if (lkey.name == NULL) 3786 return -ENOMEM; 3787 3788 rkey.name = kmalloc_array(JFS_NAME_MAX + 1, sizeof(wchar_t), 3789 GFP_KERNEL); 3790 if (rkey.name == NULL) { 3791 kfree(lkey.name); 3792 return -ENOMEM; 3793 } 3794 3795 /* get left and right key */ 3796 dtGetKey(lp, li, &lkey, flag); 3797 lkey.name[lkey.namlen] = 0; 3798 3799 if ((flag & JFS_OS2) == JFS_OS2) 3800 ciToUpper(&lkey); 3801 3802 dtGetKey(rp, ri, &rkey, flag); 3803 rkey.name[rkey.namlen] = 0; 3804 3805 3806 if ((flag & JFS_OS2) == JFS_OS2) 3807 ciToUpper(&rkey); 3808 3809 /* compute prefix */ 3810 klen = 0; 3811 kname = key->name; 3812 namlen = min(lkey.namlen, rkey.namlen); 3813 for (pl = lkey.name, pr = rkey.name; 3814 namlen; pl++, pr++, namlen--, klen++, kname++) { 3815 *kname = *pr; 3816 if (*pl != *pr) { 3817 key->namlen = klen + 1; 3818 goto free_names; 3819 } 3820 } 3821 3822 /* l->namlen <= r->namlen since l <= r */ 3823 if (lkey.namlen < rkey.namlen) { 3824 *kname = *pr; 3825 key->namlen = klen + 1; 3826 } else /* l->namelen == r->namelen */ 3827 key->namlen = klen; 3828 3829 free_names: 3830 kfree(lkey.name); 3831 kfree(rkey.name); 3832 return 0; 3833 } 3834 3835 3836 3837 /* 3838 * dtGetKey() 3839 * 3840 * function: get key of the entry 3841 */ 3842 static void dtGetKey(dtpage_t * p, int i, /* entry index */ 3843 struct component_name * key, int flag) 3844 { 3845 int si; 3846 s8 *stbl; 3847 struct ldtentry *lh; 3848 struct idtentry *ih; 3849 struct dtslot *t; 3850 int namlen, len; 3851 wchar_t *kname; 3852 __le16 *name; 3853 3854 /* get entry */ 3855 stbl = DT_GETSTBL(p); 3856 si = stbl[i]; 3857 if (p->header.flag & BT_LEAF) { 3858 lh = (struct ldtentry *) & p->slot[si]; 3859 si = lh->next; 3860 namlen = lh->namlen; 3861 name = lh->name; 3862 if (flag & JFS_DIR_INDEX) 3863 len = min(namlen, DTLHDRDATALEN); 3864 else 3865 len = min(namlen, DTLHDRDATALEN_LEGACY); 3866 } else { 3867 ih = (struct idtentry *) & p->slot[si]; 3868 si = ih->next; 3869 namlen = ih->namlen; 3870 name = ih->name; 3871 len = min(namlen, DTIHDRDATALEN); 3872 } 3873 3874 key->namlen = namlen; 3875 kname = key->name; 3876 3877 /* 3878 * move head/only segment 3879 */ 3880 UniStrncpy_from_le(kname, name, len); 3881 3882 /* 3883 * move additional segment(s) 3884 */ 3885 while (si >= 0) { 3886 /* get next segment */ 3887 t = &p->slot[si]; 3888 kname += len; 3889 namlen -= len; 3890 len = min(namlen, DTSLOTDATALEN); 3891 UniStrncpy_from_le(kname, t->name, len); 3892 3893 si = t->next; 3894 } 3895 } 3896 3897 3898 /* 3899 * dtInsertEntry() 3900 * 3901 * function: allocate free slot(s) and 3902 * write a leaf/internal entry 3903 * 3904 * return: entry slot index 3905 */ 3906 static void dtInsertEntry(dtpage_t * p, int index, struct component_name * key, 3907 ddata_t * data, struct dt_lock ** dtlock) 3908 { 3909 struct dtslot *h, *t; 3910 struct ldtentry *lh = NULL; 3911 struct idtentry *ih = NULL; 3912 int hsi, fsi, klen, len, nextindex; 3913 wchar_t *kname; 3914 __le16 *name; 3915 s8 *stbl; 3916 pxd_t *xd; 3917 struct dt_lock *dtlck = *dtlock; 3918 struct lv *lv; 3919 int xsi, n; 3920 s64 bn = 0; 3921 struct metapage *mp = NULL; 3922 3923 klen = key->namlen; 3924 kname = key->name; 3925 3926 /* allocate a free slot */ 3927 hsi = fsi = p->header.freelist; 3928 h = &p->slot[fsi]; 3929 p->header.freelist = h->next; 3930 --p->header.freecnt; 3931 3932 /* open new linelock */ 3933 if (dtlck->index >= dtlck->maxcnt) 3934 dtlck = (struct dt_lock *) txLinelock(dtlck); 3935 3936 lv = & dtlck->lv[dtlck->index]; 3937 lv->offset = hsi; 3938 3939 /* write head/only segment */ 3940 if (p->header.flag & BT_LEAF) { 3941 lh = (struct ldtentry *) h; 3942 lh->next = h->next; 3943 lh->inumber = cpu_to_le32(data->leaf.ino); 3944 lh->namlen = klen; 3945 name = lh->name; 3946 if (data->leaf.ip) { 3947 len = min(klen, DTLHDRDATALEN); 3948 if (!(p->header.flag & BT_ROOT)) 3949 bn = addressPXD(&p->header.self); 3950 lh->index = cpu_to_le32(add_index(data->leaf.tid, 3951 data->leaf.ip, 3952 bn, index)); 3953 } else 3954 len = min(klen, DTLHDRDATALEN_LEGACY); 3955 } else { 3956 ih = (struct idtentry *) h; 3957 ih->next = h->next; 3958 xd = (pxd_t *) ih; 3959 *xd = data->xd; 3960 ih->namlen = klen; 3961 name = ih->name; 3962 len = min(klen, DTIHDRDATALEN); 3963 } 3964 3965 UniStrncpy_to_le(name, kname, len); 3966 3967 n = 1; 3968 xsi = hsi; 3969 3970 /* write additional segment(s) */ 3971 t = h; 3972 klen -= len; 3973 while (klen) { 3974 /* get free slot */ 3975 fsi = p->header.freelist; 3976 t = &p->slot[fsi]; 3977 p->header.freelist = t->next; 3978 --p->header.freecnt; 3979 3980 /* is next slot contiguous ? */ 3981 if (fsi != xsi + 1) { 3982 /* close current linelock */ 3983 lv->length = n; 3984 dtlck->index++; 3985 3986 /* open new linelock */ 3987 if (dtlck->index < dtlck->maxcnt) 3988 lv++; 3989 else { 3990 dtlck = (struct dt_lock *) txLinelock(dtlck); 3991 lv = & dtlck->lv[0]; 3992 } 3993 3994 lv->offset = fsi; 3995 n = 0; 3996 } 3997 3998 kname += len; 3999 len = min(klen, DTSLOTDATALEN); 4000 UniStrncpy_to_le(t->name, kname, len); 4001 4002 n++; 4003 xsi = fsi; 4004 klen -= len; 4005 } 4006 4007 /* close current linelock */ 4008 lv->length = n; 4009 dtlck->index++; 4010 4011 *dtlock = dtlck; 4012 4013 /* terminate last/only segment */ 4014 if (h == t) { 4015 /* single segment entry */ 4016 if (p->header.flag & BT_LEAF) 4017 lh->next = -1; 4018 else 4019 ih->next = -1; 4020 } else 4021 /* multi-segment entry */ 4022 t->next = -1; 4023 4024 /* if insert into middle, shift right succeeding entries in stbl */ 4025 stbl = DT_GETSTBL(p); 4026 nextindex = p->header.nextindex; 4027 if (index < nextindex) { 4028 memmove(stbl + index + 1, stbl + index, nextindex - index); 4029 4030 if ((p->header.flag & BT_LEAF) && data->leaf.ip) { 4031 s64 lblock; 4032 4033 /* 4034 * Need to update slot number for entries that moved 4035 * in the stbl 4036 */ 4037 mp = NULL; 4038 for (n = index + 1; n <= nextindex; n++) { 4039 lh = (struct ldtentry *) & (p->slot[stbl[n]]); 4040 modify_index(data->leaf.tid, data->leaf.ip, 4041 le32_to_cpu(lh->index), bn, n, 4042 &mp, &lblock); 4043 } 4044 if (mp) 4045 release_metapage(mp); 4046 } 4047 } 4048 4049 stbl[index] = hsi; 4050 4051 /* advance next available entry index of stbl */ 4052 ++p->header.nextindex; 4053 } 4054 4055 4056 /* 4057 * dtMoveEntry() 4058 * 4059 * function: move entries from split/left page to new/right page 4060 * 4061 * nextindex of dst page and freelist/freecnt of both pages 4062 * are updated. 4063 */ 4064 static void dtMoveEntry(dtpage_t * sp, int si, dtpage_t * dp, 4065 struct dt_lock ** sdtlock, struct dt_lock ** ddtlock, 4066 int do_index) 4067 { 4068 int ssi, next; /* src slot index */ 4069 int di; /* dst entry index */ 4070 int dsi; /* dst slot index */ 4071 s8 *sstbl, *dstbl; /* sorted entry table */ 4072 int snamlen, len; 4073 struct ldtentry *slh, *dlh = NULL; 4074 struct idtentry *sih, *dih = NULL; 4075 struct dtslot *h, *s, *d; 4076 struct dt_lock *sdtlck = *sdtlock, *ddtlck = *ddtlock; 4077 struct lv *slv, *dlv; 4078 int xssi, ns, nd; 4079 int sfsi; 4080 4081 sstbl = (s8 *) & sp->slot[sp->header.stblindex]; 4082 dstbl = (s8 *) & dp->slot[dp->header.stblindex]; 4083 4084 dsi = dp->header.freelist; /* first (whole page) free slot */ 4085 sfsi = sp->header.freelist; 4086 4087 /* linelock destination entry slot */ 4088 dlv = & ddtlck->lv[ddtlck->index]; 4089 dlv->offset = dsi; 4090 4091 /* linelock source entry slot */ 4092 slv = & sdtlck->lv[sdtlck->index]; 4093 slv->offset = sstbl[si]; 4094 xssi = slv->offset - 1; 4095 4096 /* 4097 * move entries 4098 */ 4099 ns = nd = 0; 4100 for (di = 0; si < sp->header.nextindex; si++, di++) { 4101 ssi = sstbl[si]; 4102 dstbl[di] = dsi; 4103 4104 /* is next slot contiguous ? */ 4105 if (ssi != xssi + 1) { 4106 /* close current linelock */ 4107 slv->length = ns; 4108 sdtlck->index++; 4109 4110 /* open new linelock */ 4111 if (sdtlck->index < sdtlck->maxcnt) 4112 slv++; 4113 else { 4114 sdtlck = (struct dt_lock *) txLinelock(sdtlck); 4115 slv = & sdtlck->lv[0]; 4116 } 4117 4118 slv->offset = ssi; 4119 ns = 0; 4120 } 4121 4122 /* 4123 * move head/only segment of an entry 4124 */ 4125 /* get dst slot */ 4126 h = d = &dp->slot[dsi]; 4127 4128 /* get src slot and move */ 4129 s = &sp->slot[ssi]; 4130 if (sp->header.flag & BT_LEAF) { 4131 /* get source entry */ 4132 slh = (struct ldtentry *) s; 4133 dlh = (struct ldtentry *) h; 4134 snamlen = slh->namlen; 4135 4136 if (do_index) { 4137 len = min(snamlen, DTLHDRDATALEN); 4138 dlh->index = slh->index; /* little-endian */ 4139 } else 4140 len = min(snamlen, DTLHDRDATALEN_LEGACY); 4141 4142 memcpy(dlh, slh, 6 + len * 2); 4143 4144 next = slh->next; 4145 4146 /* update dst head/only segment next field */ 4147 dsi++; 4148 dlh->next = dsi; 4149 } else { 4150 sih = (struct idtentry *) s; 4151 snamlen = sih->namlen; 4152 4153 len = min(snamlen, DTIHDRDATALEN); 4154 dih = (struct idtentry *) h; 4155 memcpy(dih, sih, 10 + len * 2); 4156 next = sih->next; 4157 4158 dsi++; 4159 dih->next = dsi; 4160 } 4161 4162 /* free src head/only segment */ 4163 s->next = sfsi; 4164 s->cnt = 1; 4165 sfsi = ssi; 4166 4167 ns++; 4168 nd++; 4169 xssi = ssi; 4170 4171 /* 4172 * move additional segment(s) of the entry 4173 */ 4174 snamlen -= len; 4175 while ((ssi = next) >= 0) { 4176 /* is next slot contiguous ? */ 4177 if (ssi != xssi + 1) { 4178 /* close current linelock */ 4179 slv->length = ns; 4180 sdtlck->index++; 4181 4182 /* open new linelock */ 4183 if (sdtlck->index < sdtlck->maxcnt) 4184 slv++; 4185 else { 4186 sdtlck = 4187 (struct dt_lock *) 4188 txLinelock(sdtlck); 4189 slv = & sdtlck->lv[0]; 4190 } 4191 4192 slv->offset = ssi; 4193 ns = 0; 4194 } 4195 4196 /* get next source segment */ 4197 s = &sp->slot[ssi]; 4198 4199 /* get next destination free slot */ 4200 d++; 4201 4202 len = min(snamlen, DTSLOTDATALEN); 4203 UniStrncpy_le(d->name, s->name, len); 4204 4205 ns++; 4206 nd++; 4207 xssi = ssi; 4208 4209 dsi++; 4210 d->next = dsi; 4211 4212 /* free source segment */ 4213 next = s->next; 4214 s->next = sfsi; 4215 s->cnt = 1; 4216 sfsi = ssi; 4217 4218 snamlen -= len; 4219 } /* end while */ 4220 4221 /* terminate dst last/only segment */ 4222 if (h == d) { 4223 /* single segment entry */ 4224 if (dp->header.flag & BT_LEAF) 4225 dlh->next = -1; 4226 else 4227 dih->next = -1; 4228 } else 4229 /* multi-segment entry */ 4230 d->next = -1; 4231 } /* end for */ 4232 4233 /* close current linelock */ 4234 slv->length = ns; 4235 sdtlck->index++; 4236 *sdtlock = sdtlck; 4237 4238 dlv->length = nd; 4239 ddtlck->index++; 4240 *ddtlock = ddtlck; 4241 4242 /* update source header */ 4243 sp->header.freelist = sfsi; 4244 sp->header.freecnt += nd; 4245 4246 /* update destination header */ 4247 dp->header.nextindex = di; 4248 4249 dp->header.freelist = dsi; 4250 dp->header.freecnt -= nd; 4251 } 4252 4253 4254 /* 4255 * dtDeleteEntry() 4256 * 4257 * function: free a (leaf/internal) entry 4258 * 4259 * log freelist header, stbl, and each segment slot of entry 4260 * (even though last/only segment next field is modified, 4261 * physical image logging requires all segment slots of 4262 * the entry logged to avoid applying previous updates 4263 * to the same slots) 4264 */ 4265 static void dtDeleteEntry(dtpage_t * p, int fi, struct dt_lock ** dtlock) 4266 { 4267 int fsi; /* free entry slot index */ 4268 s8 *stbl; 4269 struct dtslot *t; 4270 int si, freecnt; 4271 struct dt_lock *dtlck = *dtlock; 4272 struct lv *lv; 4273 int xsi, n; 4274 4275 /* get free entry slot index */ 4276 stbl = DT_GETSTBL(p); 4277 fsi = stbl[fi]; 4278 4279 /* open new linelock */ 4280 if (dtlck->index >= dtlck->maxcnt) 4281 dtlck = (struct dt_lock *) txLinelock(dtlck); 4282 lv = & dtlck->lv[dtlck->index]; 4283 4284 lv->offset = fsi; 4285 4286 /* get the head/only segment */ 4287 t = &p->slot[fsi]; 4288 if (p->header.flag & BT_LEAF) 4289 si = ((struct ldtentry *) t)->next; 4290 else 4291 si = ((struct idtentry *) t)->next; 4292 t->next = si; 4293 t->cnt = 1; 4294 4295 n = freecnt = 1; 4296 xsi = fsi; 4297 4298 /* find the last/only segment */ 4299 while (si >= 0) { 4300 /* is next slot contiguous ? */ 4301 if (si != xsi + 1) { 4302 /* close current linelock */ 4303 lv->length = n; 4304 dtlck->index++; 4305 4306 /* open new linelock */ 4307 if (dtlck->index < dtlck->maxcnt) 4308 lv++; 4309 else { 4310 dtlck = (struct dt_lock *) txLinelock(dtlck); 4311 lv = & dtlck->lv[0]; 4312 } 4313 4314 lv->offset = si; 4315 n = 0; 4316 } 4317 4318 n++; 4319 xsi = si; 4320 freecnt++; 4321 4322 t = &p->slot[si]; 4323 t->cnt = 1; 4324 si = t->next; 4325 } 4326 4327 /* close current linelock */ 4328 lv->length = n; 4329 dtlck->index++; 4330 4331 *dtlock = dtlck; 4332 4333 /* update freelist */ 4334 t->next = p->header.freelist; 4335 p->header.freelist = fsi; 4336 p->header.freecnt += freecnt; 4337 4338 /* if delete from middle, 4339 * shift left the succedding entries in the stbl 4340 */ 4341 si = p->header.nextindex; 4342 if (fi < si - 1) 4343 memmove(&stbl[fi], &stbl[fi + 1], si - fi - 1); 4344 4345 p->header.nextindex--; 4346 } 4347 4348 4349 /* 4350 * dtTruncateEntry() 4351 * 4352 * function: truncate a (leaf/internal) entry 4353 * 4354 * log freelist header, stbl, and each segment slot of entry 4355 * (even though last/only segment next field is modified, 4356 * physical image logging requires all segment slots of 4357 * the entry logged to avoid applying previous updates 4358 * to the same slots) 4359 */ 4360 static void dtTruncateEntry(dtpage_t * p, int ti, struct dt_lock ** dtlock) 4361 { 4362 int tsi; /* truncate entry slot index */ 4363 s8 *stbl; 4364 struct dtslot *t; 4365 int si, freecnt; 4366 struct dt_lock *dtlck = *dtlock; 4367 struct lv *lv; 4368 int fsi, xsi, n; 4369 4370 /* get free entry slot index */ 4371 stbl = DT_GETSTBL(p); 4372 tsi = stbl[ti]; 4373 4374 /* open new linelock */ 4375 if (dtlck->index >= dtlck->maxcnt) 4376 dtlck = (struct dt_lock *) txLinelock(dtlck); 4377 lv = & dtlck->lv[dtlck->index]; 4378 4379 lv->offset = tsi; 4380 4381 /* get the head/only segment */ 4382 t = &p->slot[tsi]; 4383 ASSERT(p->header.flag & BT_INTERNAL); 4384 ((struct idtentry *) t)->namlen = 0; 4385 si = ((struct idtentry *) t)->next; 4386 ((struct idtentry *) t)->next = -1; 4387 4388 n = 1; 4389 freecnt = 0; 4390 fsi = si; 4391 xsi = tsi; 4392 4393 /* find the last/only segment */ 4394 while (si >= 0) { 4395 /* is next slot contiguous ? */ 4396 if (si != xsi + 1) { 4397 /* close current linelock */ 4398 lv->length = n; 4399 dtlck->index++; 4400 4401 /* open new linelock */ 4402 if (dtlck->index < dtlck->maxcnt) 4403 lv++; 4404 else { 4405 dtlck = (struct dt_lock *) txLinelock(dtlck); 4406 lv = & dtlck->lv[0]; 4407 } 4408 4409 lv->offset = si; 4410 n = 0; 4411 } 4412 4413 n++; 4414 xsi = si; 4415 freecnt++; 4416 4417 t = &p->slot[si]; 4418 t->cnt = 1; 4419 si = t->next; 4420 } 4421 4422 /* close current linelock */ 4423 lv->length = n; 4424 dtlck->index++; 4425 4426 *dtlock = dtlck; 4427 4428 /* update freelist */ 4429 if (freecnt == 0) 4430 return; 4431 t->next = p->header.freelist; 4432 p->header.freelist = fsi; 4433 p->header.freecnt += freecnt; 4434 } 4435 4436 4437 /* 4438 * dtLinelockFreelist() 4439 */ 4440 static void dtLinelockFreelist(dtpage_t * p, /* directory page */ 4441 int m, /* max slot index */ 4442 struct dt_lock ** dtlock) 4443 { 4444 int fsi; /* free entry slot index */ 4445 struct dtslot *t; 4446 int si; 4447 struct dt_lock *dtlck = *dtlock; 4448 struct lv *lv; 4449 int xsi, n; 4450 4451 /* get free entry slot index */ 4452 fsi = p->header.freelist; 4453 4454 /* open new linelock */ 4455 if (dtlck->index >= dtlck->maxcnt) 4456 dtlck = (struct dt_lock *) txLinelock(dtlck); 4457 lv = & dtlck->lv[dtlck->index]; 4458 4459 lv->offset = fsi; 4460 4461 n = 1; 4462 xsi = fsi; 4463 4464 t = &p->slot[fsi]; 4465 si = t->next; 4466 4467 /* find the last/only segment */ 4468 while (si < m && si >= 0) { 4469 /* is next slot contiguous ? */ 4470 if (si != xsi + 1) { 4471 /* close current linelock */ 4472 lv->length = n; 4473 dtlck->index++; 4474 4475 /* open new linelock */ 4476 if (dtlck->index < dtlck->maxcnt) 4477 lv++; 4478 else { 4479 dtlck = (struct dt_lock *) txLinelock(dtlck); 4480 lv = & dtlck->lv[0]; 4481 } 4482 4483 lv->offset = si; 4484 n = 0; 4485 } 4486 4487 n++; 4488 xsi = si; 4489 4490 t = &p->slot[si]; 4491 si = t->next; 4492 } 4493 4494 /* close current linelock */ 4495 lv->length = n; 4496 dtlck->index++; 4497 4498 *dtlock = dtlck; 4499 } 4500 4501 4502 /* 4503 * NAME: dtModify 4504 * 4505 * FUNCTION: Modify the inode number part of a directory entry 4506 * 4507 * PARAMETERS: 4508 * tid - Transaction id 4509 * ip - Inode of parent directory 4510 * key - Name of entry to be modified 4511 * orig_ino - Original inode number expected in entry 4512 * new_ino - New inode number to put into entry 4513 * flag - JFS_RENAME 4514 * 4515 * RETURNS: 4516 * -ESTALE - If entry found does not match orig_ino passed in 4517 * -ENOENT - If no entry can be found to match key 4518 * 0 - If successfully modified entry 4519 */ 4520 int dtModify(tid_t tid, struct inode *ip, 4521 struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag) 4522 { 4523 int rc; 4524 s64 bn; 4525 struct metapage *mp; 4526 dtpage_t *p; 4527 int index; 4528 struct btstack btstack; 4529 struct tlock *tlck; 4530 struct dt_lock *dtlck; 4531 struct lv *lv; 4532 s8 *stbl; 4533 int entry_si; /* entry slot index */ 4534 struct ldtentry *entry; 4535 4536 /* 4537 * search for the entry to modify: 4538 * 4539 * dtSearch() returns (leaf page pinned, index at which to modify). 4540 */ 4541 if ((rc = dtSearch(ip, key, orig_ino, &btstack, flag))) 4542 return rc; 4543 4544 /* retrieve search result */ 4545 DT_GETSEARCH(ip, btstack.top, bn, mp, p, index); 4546 4547 BT_MARK_DIRTY(mp, ip); 4548 /* 4549 * acquire a transaction lock on the leaf page of named entry 4550 */ 4551 tlck = txLock(tid, ip, mp, tlckDTREE | tlckENTRY); 4552 dtlck = (struct dt_lock *) & tlck->lock; 4553 4554 /* get slot index of the entry */ 4555 stbl = DT_GETSTBL(p); 4556 entry_si = stbl[index]; 4557 4558 /* linelock entry */ 4559 ASSERT(dtlck->index == 0); 4560 lv = & dtlck->lv[0]; 4561 lv->offset = entry_si; 4562 lv->length = 1; 4563 dtlck->index++; 4564 4565 /* get the head/only segment */ 4566 entry = (struct ldtentry *) & p->slot[entry_si]; 4567 4568 /* substitute the inode number of the entry */ 4569 entry->inumber = cpu_to_le32(new_ino); 4570 4571 /* unpin the leaf page */ 4572 DT_PUTPAGE(mp); 4573 4574 return 0; 4575 } 4576