1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_imap.c: inode allocation map manager 21 * 22 * Serialization: 23 * Each AG has a simple lock which is used to control the serialization of 24 * the AG level lists. This lock should be taken first whenever an AG 25 * level list will be modified or accessed. 26 * 27 * Each IAG is locked by obtaining the buffer for the IAG page. 28 * 29 * There is also a inode lock for the inode map inode. A read lock needs to 30 * be taken whenever an IAG is read from the map or the global level 31 * information is read. A write lock needs to be taken whenever the global 32 * level information is modified or an atomic operation needs to be used. 33 * 34 * If more than one IAG is read at one time, the read lock may not 35 * be given up until all of the IAG's are read. Otherwise, a deadlock 36 * may occur when trying to obtain the read lock while another thread 37 * holding the read lock is waiting on the IAG already being held. 38 * 39 * The control page of the inode map is read into memory by diMount(). 40 * Thereafter it should only be modified in memory and then it will be 41 * written out when the filesystem is unmounted by diUnmount(). 42 */ 43 44 #include <linux/fs.h> 45 #include <linux/buffer_head.h> 46 #include <linux/pagemap.h> 47 #include <linux/quotaops.h> 48 #include <linux/slab.h> 49 50 #include "jfs_incore.h" 51 #include "jfs_inode.h" 52 #include "jfs_filsys.h" 53 #include "jfs_dinode.h" 54 #include "jfs_dmap.h" 55 #include "jfs_imap.h" 56 #include "jfs_metapage.h" 57 #include "jfs_superblock.h" 58 #include "jfs_debug.h" 59 60 /* 61 * imap locks 62 */ 63 /* iag free list lock */ 64 #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) 65 #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) 66 #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) 67 68 /* per ag iag list locks */ 69 #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) 70 #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) 71 #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) 72 73 /* 74 * forward references 75 */ 76 static int diAllocAG(struct inomap *, int, bool, struct inode *); 77 static int diAllocAny(struct inomap *, int, bool, struct inode *); 78 static int diAllocBit(struct inomap *, struct iag *, int); 79 static int diAllocExt(struct inomap *, int, struct inode *); 80 static int diAllocIno(struct inomap *, int, struct inode *); 81 static int diFindFree(u32, int); 82 static int diNewExt(struct inomap *, struct iag *, int); 83 static int diNewIAG(struct inomap *, int *, int, struct metapage **); 84 static void duplicateIXtree(struct super_block *, s64, int, s64 *); 85 86 static int diIAGRead(struct inomap * imap, int, struct metapage **); 87 static int copy_from_dinode(struct dinode *, struct inode *); 88 static void copy_to_dinode(struct dinode *, struct inode *); 89 90 /* 91 * NAME: diMount() 92 * 93 * FUNCTION: initialize the incore inode map control structures for 94 * a fileset or aggregate init time. 95 * 96 * the inode map's control structure (dinomap) is 97 * brought in from disk and placed in virtual memory. 98 * 99 * PARAMETERS: 100 * ipimap - pointer to inode map inode for the aggregate or fileset. 101 * 102 * RETURN VALUES: 103 * 0 - success 104 * -ENOMEM - insufficient free virtual memory. 105 * -EIO - i/o error. 106 */ 107 int diMount(struct inode *ipimap) 108 { 109 struct inomap *imap; 110 struct metapage *mp; 111 int index; 112 struct dinomap_disk *dinom_le; 113 114 /* 115 * allocate/initialize the in-memory inode map control structure 116 */ 117 /* allocate the in-memory inode map control structure. */ 118 imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); 119 if (imap == NULL) { 120 jfs_err("diMount: kmalloc returned NULL!"); 121 return -ENOMEM; 122 } 123 124 /* read the on-disk inode map control structure. */ 125 126 mp = read_metapage(ipimap, 127 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 128 PSIZE, 0); 129 if (mp == NULL) { 130 kfree(imap); 131 return -EIO; 132 } 133 134 /* copy the on-disk version to the in-memory version. */ 135 dinom_le = (struct dinomap_disk *) mp->data; 136 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 137 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 138 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 139 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 140 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 141 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 142 for (index = 0; index < MAXAG; index++) { 143 imap->im_agctl[index].inofree = 144 le32_to_cpu(dinom_le->in_agctl[index].inofree); 145 imap->im_agctl[index].extfree = 146 le32_to_cpu(dinom_le->in_agctl[index].extfree); 147 imap->im_agctl[index].numinos = 148 le32_to_cpu(dinom_le->in_agctl[index].numinos); 149 imap->im_agctl[index].numfree = 150 le32_to_cpu(dinom_le->in_agctl[index].numfree); 151 } 152 153 /* release the buffer. */ 154 release_metapage(mp); 155 156 /* 157 * allocate/initialize inode allocation map locks 158 */ 159 /* allocate and init iag free list lock */ 160 IAGFREE_LOCK_INIT(imap); 161 162 /* allocate and init ag list locks */ 163 for (index = 0; index < MAXAG; index++) { 164 AG_LOCK_INIT(imap, index); 165 } 166 167 /* bind the inode map inode and inode map control structure 168 * to each other. 169 */ 170 imap->im_ipimap = ipimap; 171 JFS_IP(ipimap)->i_imap = imap; 172 173 return (0); 174 } 175 176 177 /* 178 * NAME: diUnmount() 179 * 180 * FUNCTION: write to disk the incore inode map control structures for 181 * a fileset or aggregate at unmount time. 182 * 183 * PARAMETERS: 184 * ipimap - pointer to inode map inode for the aggregate or fileset. 185 * 186 * RETURN VALUES: 187 * 0 - success 188 * -ENOMEM - insufficient free virtual memory. 189 * -EIO - i/o error. 190 */ 191 int diUnmount(struct inode *ipimap, int mounterror) 192 { 193 struct inomap *imap = JFS_IP(ipimap)->i_imap; 194 195 /* 196 * update the on-disk inode map control structure 197 */ 198 199 if (!(mounterror || isReadOnly(ipimap))) 200 diSync(ipimap); 201 202 /* 203 * Invalidate the page cache buffers 204 */ 205 truncate_inode_pages(ipimap->i_mapping, 0); 206 207 /* 208 * free in-memory control structure 209 */ 210 kfree(imap); 211 212 return (0); 213 } 214 215 216 /* 217 * diSync() 218 */ 219 int diSync(struct inode *ipimap) 220 { 221 struct dinomap_disk *dinom_le; 222 struct inomap *imp = JFS_IP(ipimap)->i_imap; 223 struct metapage *mp; 224 int index; 225 226 /* 227 * write imap global conrol page 228 */ 229 /* read the on-disk inode map control structure */ 230 mp = get_metapage(ipimap, 231 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 232 PSIZE, 0); 233 if (mp == NULL) { 234 jfs_err("diSync: get_metapage failed!"); 235 return -EIO; 236 } 237 238 /* copy the in-memory version to the on-disk version */ 239 dinom_le = (struct dinomap_disk *) mp->data; 240 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 241 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 242 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 243 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 244 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 245 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 246 for (index = 0; index < MAXAG; index++) { 247 dinom_le->in_agctl[index].inofree = 248 cpu_to_le32(imp->im_agctl[index].inofree); 249 dinom_le->in_agctl[index].extfree = 250 cpu_to_le32(imp->im_agctl[index].extfree); 251 dinom_le->in_agctl[index].numinos = 252 cpu_to_le32(imp->im_agctl[index].numinos); 253 dinom_le->in_agctl[index].numfree = 254 cpu_to_le32(imp->im_agctl[index].numfree); 255 } 256 257 /* write out the control structure */ 258 write_metapage(mp); 259 260 /* 261 * write out dirty pages of imap 262 */ 263 filemap_write_and_wait(ipimap->i_mapping); 264 265 diWriteSpecial(ipimap, 0); 266 267 return (0); 268 } 269 270 271 /* 272 * NAME: diRead() 273 * 274 * FUNCTION: initialize an incore inode from disk. 275 * 276 * on entry, the specifed incore inode should itself 277 * specify the disk inode number corresponding to the 278 * incore inode (i.e. i_number should be initialized). 279 * 280 * this routine handles incore inode initialization for 281 * both "special" and "regular" inodes. special inodes 282 * are those required early in the mount process and 283 * require special handling since much of the file system 284 * is not yet initialized. these "special" inodes are 285 * identified by a NULL inode map inode pointer and are 286 * actually initialized by a call to diReadSpecial(). 287 * 288 * for regular inodes, the iag describing the disk inode 289 * is read from disk to determine the inode extent address 290 * for the disk inode. with the inode extent address in 291 * hand, the page of the extent that contains the disk 292 * inode is read and the disk inode is copied to the 293 * incore inode. 294 * 295 * PARAMETERS: 296 * ip - pointer to incore inode to be initialized from disk. 297 * 298 * RETURN VALUES: 299 * 0 - success 300 * -EIO - i/o error. 301 * -ENOMEM - insufficient memory 302 * 303 */ 304 int diRead(struct inode *ip) 305 { 306 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 307 int iagno, ino, extno, rc; 308 struct inode *ipimap; 309 struct dinode *dp; 310 struct iag *iagp; 311 struct metapage *mp; 312 s64 blkno, agstart; 313 struct inomap *imap; 314 int block_offset; 315 int inodes_left; 316 unsigned long pageno; 317 int rel_inode; 318 319 jfs_info("diRead: ino = %ld", ip->i_ino); 320 321 ipimap = sbi->ipimap; 322 JFS_IP(ip)->ipimap = ipimap; 323 324 /* determine the iag number for this inode (number) */ 325 iagno = INOTOIAG(ip->i_ino); 326 327 /* read the iag */ 328 imap = JFS_IP(ipimap)->i_imap; 329 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 330 rc = diIAGRead(imap, iagno, &mp); 331 IREAD_UNLOCK(ipimap); 332 if (rc) { 333 jfs_err("diRead: diIAGRead returned %d", rc); 334 return (rc); 335 } 336 337 iagp = (struct iag *) mp->data; 338 339 /* determine inode extent that holds the disk inode */ 340 ino = ip->i_ino & (INOSPERIAG - 1); 341 extno = ino >> L2INOSPEREXT; 342 343 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 344 (addressPXD(&iagp->inoext[extno]) == 0)) { 345 release_metapage(mp); 346 return -ESTALE; 347 } 348 349 /* get disk block number of the page within the inode extent 350 * that holds the disk inode. 351 */ 352 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 353 354 /* get the ag for the iag */ 355 agstart = le64_to_cpu(iagp->agstart); 356 357 release_metapage(mp); 358 359 rel_inode = (ino & (INOSPERPAGE - 1)); 360 pageno = blkno >> sbi->l2nbperpage; 361 362 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 363 /* 364 * OS/2 didn't always align inode extents on page boundaries 365 */ 366 inodes_left = 367 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 368 369 if (rel_inode < inodes_left) 370 rel_inode += block_offset << sbi->l2niperblk; 371 else { 372 pageno += 1; 373 rel_inode -= inodes_left; 374 } 375 } 376 377 /* read the page of disk inode */ 378 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 379 if (!mp) { 380 jfs_err("diRead: read_metapage failed"); 381 return -EIO; 382 } 383 384 /* locate the disk inode requested */ 385 dp = (struct dinode *) mp->data; 386 dp += rel_inode; 387 388 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 389 jfs_error(ip->i_sb, "i_ino != di_number\n"); 390 rc = -EIO; 391 } else if (le32_to_cpu(dp->di_nlink) == 0) 392 rc = -ESTALE; 393 else 394 /* copy the disk inode to the in-memory inode */ 395 rc = copy_from_dinode(dp, ip); 396 397 release_metapage(mp); 398 399 /* set the ag for the inode */ 400 JFS_IP(ip)->agstart = agstart; 401 JFS_IP(ip)->active_ag = -1; 402 403 return (rc); 404 } 405 406 407 /* 408 * NAME: diReadSpecial() 409 * 410 * FUNCTION: initialize a 'special' inode from disk. 411 * 412 * this routines handles aggregate level inodes. The 413 * inode cache cannot differentiate between the 414 * aggregate inodes and the filesystem inodes, so we 415 * handle these here. We don't actually use the aggregate 416 * inode map, since these inodes are at a fixed location 417 * and in some cases the aggregate inode map isn't initialized 418 * yet. 419 * 420 * PARAMETERS: 421 * sb - filesystem superblock 422 * inum - aggregate inode number 423 * secondary - 1 if secondary aggregate inode table 424 * 425 * RETURN VALUES: 426 * new inode - success 427 * NULL - i/o error. 428 */ 429 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 430 { 431 struct jfs_sb_info *sbi = JFS_SBI(sb); 432 uint address; 433 struct dinode *dp; 434 struct inode *ip; 435 struct metapage *mp; 436 437 ip = new_inode(sb); 438 if (ip == NULL) { 439 jfs_err("diReadSpecial: new_inode returned NULL!"); 440 return ip; 441 } 442 443 if (secondary) { 444 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 445 JFS_IP(ip)->ipimap = sbi->ipaimap2; 446 } else { 447 address = AITBL_OFF >> L2PSIZE; 448 JFS_IP(ip)->ipimap = sbi->ipaimap; 449 } 450 451 ASSERT(inum < INOSPEREXT); 452 453 ip->i_ino = inum; 454 455 address += inum >> 3; /* 8 inodes per 4K page */ 456 457 /* read the page of fixed disk inode (AIT) in raw mode */ 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 459 if (mp == NULL) { 460 set_nlink(ip, 1); /* Don't want iput() deleting it */ 461 iput(ip); 462 return (NULL); 463 } 464 465 /* get the pointer to the disk inode of interest */ 466 dp = (struct dinode *) (mp->data); 467 dp += inum % 8; /* 8 inodes per 4K page */ 468 469 /* copy on-disk inode to in-memory inode */ 470 if ((copy_from_dinode(dp, ip)) != 0) { 471 /* handle bad return by returning NULL for ip */ 472 set_nlink(ip, 1); /* Don't want iput() deleting it */ 473 iput(ip); 474 /* release the page */ 475 release_metapage(mp); 476 return (NULL); 477 478 } 479 480 ip->i_mapping->a_ops = &jfs_metapage_aops; 481 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 482 483 /* Allocations to metadata inodes should not affect quotas */ 484 ip->i_flags |= S_NOQUOTA; 485 486 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 487 sbi->gengen = le32_to_cpu(dp->di_gengen); 488 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 489 } 490 491 /* release the page */ 492 release_metapage(mp); 493 494 /* 495 * __mark_inode_dirty expects inodes to be hashed. Since we don't 496 * want special inodes in the fileset inode space, we make them 497 * appear hashed, but do not put on any lists. hlist_del() 498 * will work fine and require no locking. 499 */ 500 hlist_add_fake(&ip->i_hash); 501 502 return (ip); 503 } 504 505 /* 506 * NAME: diWriteSpecial() 507 * 508 * FUNCTION: Write the special inode to disk 509 * 510 * PARAMETERS: 511 * ip - special inode 512 * secondary - 1 if secondary aggregate inode table 513 * 514 * RETURN VALUES: none 515 */ 516 517 void diWriteSpecial(struct inode *ip, int secondary) 518 { 519 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 520 uint address; 521 struct dinode *dp; 522 ino_t inum = ip->i_ino; 523 struct metapage *mp; 524 525 if (secondary) 526 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 527 else 528 address = AITBL_OFF >> L2PSIZE; 529 530 ASSERT(inum < INOSPEREXT); 531 532 address += inum >> 3; /* 8 inodes per 4K page */ 533 534 /* read the page of fixed disk inode (AIT) in raw mode */ 535 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 536 if (mp == NULL) { 537 jfs_err("diWriteSpecial: failed to read aggregate inode " 538 "extent!"); 539 return; 540 } 541 542 /* get the pointer to the disk inode of interest */ 543 dp = (struct dinode *) (mp->data); 544 dp += inum % 8; /* 8 inodes per 4K page */ 545 546 /* copy on-disk inode to in-memory inode */ 547 copy_to_dinode(dp, ip); 548 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 549 550 if (inum == FILESYSTEM_I) 551 dp->di_gengen = cpu_to_le32(sbi->gengen); 552 553 /* write the page */ 554 write_metapage(mp); 555 } 556 557 /* 558 * NAME: diFreeSpecial() 559 * 560 * FUNCTION: Free allocated space for special inode 561 */ 562 void diFreeSpecial(struct inode *ip) 563 { 564 if (ip == NULL) { 565 jfs_err("diFreeSpecial called with NULL ip!"); 566 return; 567 } 568 filemap_write_and_wait(ip->i_mapping); 569 truncate_inode_pages(ip->i_mapping, 0); 570 iput(ip); 571 } 572 573 574 575 /* 576 * NAME: diWrite() 577 * 578 * FUNCTION: write the on-disk inode portion of the in-memory inode 579 * to its corresponding on-disk inode. 580 * 581 * on entry, the specifed incore inode should itself 582 * specify the disk inode number corresponding to the 583 * incore inode (i.e. i_number should be initialized). 584 * 585 * the inode contains the inode extent address for the disk 586 * inode. with the inode extent address in hand, the 587 * page of the extent that contains the disk inode is 588 * read and the disk inode portion of the incore inode 589 * is copied to the disk inode. 590 * 591 * PARAMETERS: 592 * tid - transacation id 593 * ip - pointer to incore inode to be written to the inode extent. 594 * 595 * RETURN VALUES: 596 * 0 - success 597 * -EIO - i/o error. 598 */ 599 int diWrite(tid_t tid, struct inode *ip) 600 { 601 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 602 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 603 int rc = 0; 604 s32 ino; 605 struct dinode *dp; 606 s64 blkno; 607 int block_offset; 608 int inodes_left; 609 struct metapage *mp; 610 unsigned long pageno; 611 int rel_inode; 612 int dioffset; 613 struct inode *ipimap; 614 uint type; 615 lid_t lid; 616 struct tlock *ditlck, *tlck; 617 struct linelock *dilinelock, *ilinelock; 618 struct lv *lv; 619 int n; 620 621 ipimap = jfs_ip->ipimap; 622 623 ino = ip->i_ino & (INOSPERIAG - 1); 624 625 if (!addressPXD(&(jfs_ip->ixpxd)) || 626 (lengthPXD(&(jfs_ip->ixpxd)) != 627 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 628 jfs_error(ip->i_sb, "ixpxd invalid\n"); 629 return -EIO; 630 } 631 632 /* 633 * read the page of disk inode containing the specified inode: 634 */ 635 /* compute the block address of the page */ 636 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 637 638 rel_inode = (ino & (INOSPERPAGE - 1)); 639 pageno = blkno >> sbi->l2nbperpage; 640 641 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 642 /* 643 * OS/2 didn't always align inode extents on page boundaries 644 */ 645 inodes_left = 646 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 647 648 if (rel_inode < inodes_left) 649 rel_inode += block_offset << sbi->l2niperblk; 650 else { 651 pageno += 1; 652 rel_inode -= inodes_left; 653 } 654 } 655 /* read the page of disk inode */ 656 retry: 657 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 658 if (!mp) 659 return -EIO; 660 661 /* get the pointer to the disk inode */ 662 dp = (struct dinode *) mp->data; 663 dp += rel_inode; 664 665 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 666 667 /* 668 * acquire transaction lock on the on-disk inode; 669 * N.B. tlock is acquired on ipimap not ip; 670 */ 671 if ((ditlck = 672 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 673 goto retry; 674 dilinelock = (struct linelock *) & ditlck->lock; 675 676 /* 677 * copy btree root from in-memory inode to on-disk inode 678 * 679 * (tlock is taken from inline B+-tree root in in-memory 680 * inode when the B+-tree root is updated, which is pointed 681 * by jfs_ip->blid as well as being on tx tlock list) 682 * 683 * further processing of btree root is based on the copy 684 * in in-memory inode, where txLog() will log from, and, 685 * for xtree root, txUpdateMap() will update map and reset 686 * XAD_NEW bit; 687 */ 688 689 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 690 /* 691 * This is the special xtree inside the directory for storing 692 * the directory table 693 */ 694 xtpage_t *p, *xp; 695 xad_t *xad; 696 697 jfs_ip->xtlid = 0; 698 tlck = lid_to_tlock(lid); 699 assert(tlck->type & tlckXTREE); 700 tlck->type |= tlckBTROOT; 701 tlck->mp = mp; 702 ilinelock = (struct linelock *) & tlck->lock; 703 704 /* 705 * copy xtree root from inode to dinode: 706 */ 707 p = &jfs_ip->i_xtroot; 708 xp = (xtpage_t *) &dp->di_dirtable; 709 lv = ilinelock->lv; 710 for (n = 0; n < ilinelock->index; n++, lv++) { 711 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 712 lv->length << L2XTSLOTSIZE); 713 } 714 715 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 716 xad = &xp->xad[XTENTRYSTART]; 717 for (n = XTENTRYSTART; 718 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 719 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 720 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 721 } 722 723 if ((lid = jfs_ip->blid) == 0) 724 goto inlineData; 725 jfs_ip->blid = 0; 726 727 tlck = lid_to_tlock(lid); 728 type = tlck->type; 729 tlck->type |= tlckBTROOT; 730 tlck->mp = mp; 731 ilinelock = (struct linelock *) & tlck->lock; 732 733 /* 734 * regular file: 16 byte (XAD slot) granularity 735 */ 736 if (type & tlckXTREE) { 737 xtpage_t *p, *xp; 738 xad_t *xad; 739 740 /* 741 * copy xtree root from inode to dinode: 742 */ 743 p = &jfs_ip->i_xtroot; 744 xp = &dp->di_xtroot; 745 lv = ilinelock->lv; 746 for (n = 0; n < ilinelock->index; n++, lv++) { 747 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 748 lv->length << L2XTSLOTSIZE); 749 } 750 751 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 752 xad = &xp->xad[XTENTRYSTART]; 753 for (n = XTENTRYSTART; 754 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 755 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 756 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 757 } 758 /* 759 * directory: 32 byte (directory entry slot) granularity 760 */ 761 else if (type & tlckDTREE) { 762 dtpage_t *p, *xp; 763 764 /* 765 * copy dtree root from inode to dinode: 766 */ 767 p = (dtpage_t *) &jfs_ip->i_dtroot; 768 xp = (dtpage_t *) & dp->di_dtroot; 769 lv = ilinelock->lv; 770 for (n = 0; n < ilinelock->index; n++, lv++) { 771 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 772 lv->length << L2DTSLOTSIZE); 773 } 774 } else { 775 jfs_err("diWrite: UFO tlock"); 776 } 777 778 inlineData: 779 /* 780 * copy inline symlink from in-memory inode to on-disk inode 781 */ 782 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 783 lv = & dilinelock->lv[dilinelock->index]; 784 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 785 lv->length = 2; 786 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 787 dilinelock->index++; 788 } 789 /* 790 * copy inline data from in-memory inode to on-disk inode: 791 * 128 byte slot granularity 792 */ 793 if (test_cflag(COMMIT_Inlineea, ip)) { 794 lv = & dilinelock->lv[dilinelock->index]; 795 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 796 lv->length = 1; 797 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 798 dilinelock->index++; 799 800 clear_cflag(COMMIT_Inlineea, ip); 801 } 802 803 /* 804 * lock/copy inode base: 128 byte slot granularity 805 */ 806 lv = & dilinelock->lv[dilinelock->index]; 807 lv->offset = dioffset >> L2INODESLOTSIZE; 808 copy_to_dinode(dp, ip); 809 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 810 lv->length = 2; 811 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 812 } else 813 lv->length = 1; 814 dilinelock->index++; 815 816 /* release the buffer holding the updated on-disk inode. 817 * the buffer will be later written by commit processing. 818 */ 819 write_metapage(mp); 820 821 return (rc); 822 } 823 824 825 /* 826 * NAME: diFree(ip) 827 * 828 * FUNCTION: free a specified inode from the inode working map 829 * for a fileset or aggregate. 830 * 831 * if the inode to be freed represents the first (only) 832 * free inode within the iag, the iag will be placed on 833 * the ag free inode list. 834 * 835 * freeing the inode will cause the inode extent to be 836 * freed if the inode is the only allocated inode within 837 * the extent. in this case all the disk resource backing 838 * up the inode extent will be freed. in addition, the iag 839 * will be placed on the ag extent free list if the extent 840 * is the first free extent in the iag. if freeing the 841 * extent also means that no free inodes will exist for 842 * the iag, the iag will also be removed from the ag free 843 * inode list. 844 * 845 * the iag describing the inode will be freed if the extent 846 * is to be freed and it is the only backed extent within 847 * the iag. in this case, the iag will be removed from the 848 * ag free extent list and ag free inode list and placed on 849 * the inode map's free iag list. 850 * 851 * a careful update approach is used to provide consistency 852 * in the face of updates to multiple buffers. under this 853 * approach, all required buffers are obtained before making 854 * any updates and are held until all updates are complete. 855 * 856 * PARAMETERS: 857 * ip - inode to be freed. 858 * 859 * RETURN VALUES: 860 * 0 - success 861 * -EIO - i/o error. 862 */ 863 int diFree(struct inode *ip) 864 { 865 int rc; 866 ino_t inum = ip->i_ino; 867 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 868 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 869 int iagno, ino, extno, bitno, sword, agno; 870 int back, fwd; 871 u32 bitmap, mask; 872 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 873 struct inomap *imap = JFS_IP(ipimap)->i_imap; 874 pxd_t freepxd; 875 tid_t tid; 876 struct inode *iplist[3]; 877 struct tlock *tlck; 878 struct pxd_lock *pxdlock; 879 880 /* 881 * This is just to suppress compiler warnings. The same logic that 882 * references these variables is used to initialize them. 883 */ 884 aiagp = biagp = ciagp = diagp = NULL; 885 886 /* get the iag number containing the inode. 887 */ 888 iagno = INOTOIAG(inum); 889 890 /* make sure that the iag is contained within 891 * the map. 892 */ 893 if (iagno >= imap->im_nextiag) { 894 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, 895 imap, 32, 0); 896 jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", 897 (uint) inum, iagno, imap->im_nextiag); 898 return -EIO; 899 } 900 901 /* get the allocation group for this ino. 902 */ 903 agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); 904 905 /* Lock the AG specific inode map information 906 */ 907 AG_LOCK(imap, agno); 908 909 /* Obtain read lock in imap inode. Don't release it until we have 910 * read all of the IAG's that we are going to. 911 */ 912 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 913 914 /* read the iag. 915 */ 916 if ((rc = diIAGRead(imap, iagno, &mp))) { 917 IREAD_UNLOCK(ipimap); 918 AG_UNLOCK(imap, agno); 919 return (rc); 920 } 921 iagp = (struct iag *) mp->data; 922 923 /* get the inode number and extent number of the inode within 924 * the iag and the inode number within the extent. 925 */ 926 ino = inum & (INOSPERIAG - 1); 927 extno = ino >> L2INOSPEREXT; 928 bitno = ino & (INOSPEREXT - 1); 929 mask = HIGHORDER >> bitno; 930 931 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 932 jfs_error(ip->i_sb, "wmap shows inode already free\n"); 933 } 934 935 if (!addressPXD(&iagp->inoext[extno])) { 936 release_metapage(mp); 937 IREAD_UNLOCK(ipimap); 938 AG_UNLOCK(imap, agno); 939 jfs_error(ip->i_sb, "invalid inoext\n"); 940 return -EIO; 941 } 942 943 /* compute the bitmap for the extent reflecting the freed inode. 944 */ 945 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 946 947 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 948 release_metapage(mp); 949 IREAD_UNLOCK(ipimap); 950 AG_UNLOCK(imap, agno); 951 jfs_error(ip->i_sb, "numfree > numinos\n"); 952 return -EIO; 953 } 954 /* 955 * inode extent still has some inodes or below low water mark: 956 * keep the inode extent; 957 */ 958 if (bitmap || 959 imap->im_agctl[agno].numfree < 96 || 960 (imap->im_agctl[agno].numfree < 288 && 961 (((imap->im_agctl[agno].numfree * 100) / 962 imap->im_agctl[agno].numinos) <= 25))) { 963 /* if the iag currently has no free inodes (i.e., 964 * the inode being freed is the first free inode of iag), 965 * insert the iag at head of the inode free list for the ag. 966 */ 967 if (iagp->nfreeinos == 0) { 968 /* check if there are any iags on the ag inode 969 * free list. if so, read the first one so that 970 * we can link the current iag onto the list at 971 * the head. 972 */ 973 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 974 /* read the iag that currently is the head 975 * of the list. 976 */ 977 if ((rc = diIAGRead(imap, fwd, &))) { 978 IREAD_UNLOCK(ipimap); 979 AG_UNLOCK(imap, agno); 980 release_metapage(mp); 981 return (rc); 982 } 983 aiagp = (struct iag *) amp->data; 984 985 /* make current head point back to the iag. 986 */ 987 aiagp->inofreeback = cpu_to_le32(iagno); 988 989 write_metapage(amp); 990 } 991 992 /* iag points forward to current head and iag 993 * becomes the new head of the list. 994 */ 995 iagp->inofreefwd = 996 cpu_to_le32(imap->im_agctl[agno].inofree); 997 iagp->inofreeback = cpu_to_le32(-1); 998 imap->im_agctl[agno].inofree = iagno; 999 } 1000 IREAD_UNLOCK(ipimap); 1001 1002 /* update the free inode summary map for the extent if 1003 * freeing the inode means the extent will now have free 1004 * inodes (i.e., the inode being freed is the first free 1005 * inode of extent), 1006 */ 1007 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1008 sword = extno >> L2EXTSPERSUM; 1009 bitno = extno & (EXTSPERSUM - 1); 1010 iagp->inosmap[sword] &= 1011 cpu_to_le32(~(HIGHORDER >> bitno)); 1012 } 1013 1014 /* update the bitmap. 1015 */ 1016 iagp->wmap[extno] = cpu_to_le32(bitmap); 1017 1018 /* update the free inode counts at the iag, ag and 1019 * map level. 1020 */ 1021 le32_add_cpu(&iagp->nfreeinos, 1); 1022 imap->im_agctl[agno].numfree += 1; 1023 atomic_inc(&imap->im_numfree); 1024 1025 /* release the AG inode map lock 1026 */ 1027 AG_UNLOCK(imap, agno); 1028 1029 /* write the iag */ 1030 write_metapage(mp); 1031 1032 return (0); 1033 } 1034 1035 1036 /* 1037 * inode extent has become free and above low water mark: 1038 * free the inode extent; 1039 */ 1040 1041 /* 1042 * prepare to update iag list(s) (careful update step 1) 1043 */ 1044 amp = bmp = cmp = dmp = NULL; 1045 fwd = back = -1; 1046 1047 /* check if the iag currently has no free extents. if so, 1048 * it will be placed on the head of the ag extent free list. 1049 */ 1050 if (iagp->nfreeexts == 0) { 1051 /* check if the ag extent free list has any iags. 1052 * if so, read the iag at the head of the list now. 1053 * this (head) iag will be updated later to reflect 1054 * the addition of the current iag at the head of 1055 * the list. 1056 */ 1057 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1058 if ((rc = diIAGRead(imap, fwd, &))) 1059 goto error_out; 1060 aiagp = (struct iag *) amp->data; 1061 } 1062 } else { 1063 /* iag has free extents. check if the addition of a free 1064 * extent will cause all extents to be free within this 1065 * iag. if so, the iag will be removed from the ag extent 1066 * free list and placed on the inode map's free iag list. 1067 */ 1068 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1069 /* in preparation for removing the iag from the 1070 * ag extent free list, read the iags preceding 1071 * and following the iag on the ag extent free 1072 * list. 1073 */ 1074 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1075 if ((rc = diIAGRead(imap, fwd, &))) 1076 goto error_out; 1077 aiagp = (struct iag *) amp->data; 1078 } 1079 1080 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1081 if ((rc = diIAGRead(imap, back, &bmp))) 1082 goto error_out; 1083 biagp = (struct iag *) bmp->data; 1084 } 1085 } 1086 } 1087 1088 /* remove the iag from the ag inode free list if freeing 1089 * this extent cause the iag to have no free inodes. 1090 */ 1091 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1092 int inofreeback = le32_to_cpu(iagp->inofreeback); 1093 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1094 1095 /* in preparation for removing the iag from the 1096 * ag inode free list, read the iags preceding 1097 * and following the iag on the ag inode free 1098 * list. before reading these iags, we must make 1099 * sure that we already don't have them in hand 1100 * from up above, since re-reading an iag (buffer) 1101 * we are currently holding would cause a deadlock. 1102 */ 1103 if (inofreefwd >= 0) { 1104 1105 if (inofreefwd == fwd) 1106 ciagp = (struct iag *) amp->data; 1107 else if (inofreefwd == back) 1108 ciagp = (struct iag *) bmp->data; 1109 else { 1110 if ((rc = 1111 diIAGRead(imap, inofreefwd, &cmp))) 1112 goto error_out; 1113 ciagp = (struct iag *) cmp->data; 1114 } 1115 assert(ciagp != NULL); 1116 } 1117 1118 if (inofreeback >= 0) { 1119 if (inofreeback == fwd) 1120 diagp = (struct iag *) amp->data; 1121 else if (inofreeback == back) 1122 diagp = (struct iag *) bmp->data; 1123 else { 1124 if ((rc = 1125 diIAGRead(imap, inofreeback, &dmp))) 1126 goto error_out; 1127 diagp = (struct iag *) dmp->data; 1128 } 1129 assert(diagp != NULL); 1130 } 1131 } 1132 1133 IREAD_UNLOCK(ipimap); 1134 1135 /* 1136 * invalidate any page of the inode extent freed from buffer cache; 1137 */ 1138 freepxd = iagp->inoext[extno]; 1139 invalidate_pxd_metapages(ip, freepxd); 1140 1141 /* 1142 * update iag list(s) (careful update step 2) 1143 */ 1144 /* add the iag to the ag extent free list if this is the 1145 * first free extent for the iag. 1146 */ 1147 if (iagp->nfreeexts == 0) { 1148 if (fwd >= 0) 1149 aiagp->extfreeback = cpu_to_le32(iagno); 1150 1151 iagp->extfreefwd = 1152 cpu_to_le32(imap->im_agctl[agno].extfree); 1153 iagp->extfreeback = cpu_to_le32(-1); 1154 imap->im_agctl[agno].extfree = iagno; 1155 } else { 1156 /* remove the iag from the ag extent list if all extents 1157 * are now free and place it on the inode map iag free list. 1158 */ 1159 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1160 if (fwd >= 0) 1161 aiagp->extfreeback = iagp->extfreeback; 1162 1163 if (back >= 0) 1164 biagp->extfreefwd = iagp->extfreefwd; 1165 else 1166 imap->im_agctl[agno].extfree = 1167 le32_to_cpu(iagp->extfreefwd); 1168 1169 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1170 1171 IAGFREE_LOCK(imap); 1172 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1173 imap->im_freeiag = iagno; 1174 IAGFREE_UNLOCK(imap); 1175 } 1176 } 1177 1178 /* remove the iag from the ag inode free list if freeing 1179 * this extent causes the iag to have no free inodes. 1180 */ 1181 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1182 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1183 ciagp->inofreeback = iagp->inofreeback; 1184 1185 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1186 diagp->inofreefwd = iagp->inofreefwd; 1187 else 1188 imap->im_agctl[agno].inofree = 1189 le32_to_cpu(iagp->inofreefwd); 1190 1191 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1192 } 1193 1194 /* update the inode extent address and working map 1195 * to reflect the free extent. 1196 * the permanent map should have been updated already 1197 * for the inode being freed. 1198 */ 1199 if (iagp->pmap[extno] != 0) { 1200 jfs_error(ip->i_sb, "the pmap does not show inode free\n"); 1201 } 1202 iagp->wmap[extno] = 0; 1203 PXDlength(&iagp->inoext[extno], 0); 1204 PXDaddress(&iagp->inoext[extno], 0); 1205 1206 /* update the free extent and free inode summary maps 1207 * to reflect the freed extent. 1208 * the inode summary map is marked to indicate no inodes 1209 * available for the freed extent. 1210 */ 1211 sword = extno >> L2EXTSPERSUM; 1212 bitno = extno & (EXTSPERSUM - 1); 1213 mask = HIGHORDER >> bitno; 1214 iagp->inosmap[sword] |= cpu_to_le32(mask); 1215 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1216 1217 /* update the number of free inodes and number of free extents 1218 * for the iag. 1219 */ 1220 le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); 1221 le32_add_cpu(&iagp->nfreeexts, 1); 1222 1223 /* update the number of free inodes and backed inodes 1224 * at the ag and inode map level. 1225 */ 1226 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1227 imap->im_agctl[agno].numinos -= INOSPEREXT; 1228 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1229 atomic_sub(INOSPEREXT, &imap->im_numinos); 1230 1231 if (amp) 1232 write_metapage(amp); 1233 if (bmp) 1234 write_metapage(bmp); 1235 if (cmp) 1236 write_metapage(cmp); 1237 if (dmp) 1238 write_metapage(dmp); 1239 1240 /* 1241 * start transaction to update block allocation map 1242 * for the inode extent freed; 1243 * 1244 * N.B. AG_LOCK is released and iag will be released below, and 1245 * other thread may allocate inode from/reusing the ixad freed 1246 * BUT with new/different backing inode extent from the extent 1247 * to be freed by the transaction; 1248 */ 1249 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1250 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 1251 1252 /* acquire tlock of the iag page of the freed ixad 1253 * to force the page NOHOMEOK (even though no data is 1254 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1255 * for the free of the extent is committed; 1256 * write FREEXTENT|NOREDOPAGE log record 1257 * N.B. linelock is overlaid as freed extent descriptor; 1258 */ 1259 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1260 pxdlock = (struct pxd_lock *) & tlck->lock; 1261 pxdlock->flag = mlckFREEPXD; 1262 pxdlock->pxd = freepxd; 1263 pxdlock->index = 1; 1264 1265 write_metapage(mp); 1266 1267 iplist[0] = ipimap; 1268 1269 /* 1270 * logredo needs the IAG number and IAG extent index in order 1271 * to ensure that the IMap is consistent. The least disruptive 1272 * way to pass these values through to the transaction manager 1273 * is in the iplist array. 1274 * 1275 * It's not pretty, but it works. 1276 */ 1277 iplist[1] = (struct inode *) (size_t)iagno; 1278 iplist[2] = (struct inode *) (size_t)extno; 1279 1280 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1281 1282 txEnd(tid); 1283 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 1284 1285 /* unlock the AG inode map information */ 1286 AG_UNLOCK(imap, agno); 1287 1288 return (0); 1289 1290 error_out: 1291 IREAD_UNLOCK(ipimap); 1292 1293 if (amp) 1294 release_metapage(amp); 1295 if (bmp) 1296 release_metapage(bmp); 1297 if (cmp) 1298 release_metapage(cmp); 1299 if (dmp) 1300 release_metapage(dmp); 1301 1302 AG_UNLOCK(imap, agno); 1303 1304 release_metapage(mp); 1305 1306 return (rc); 1307 } 1308 1309 /* 1310 * There are several places in the diAlloc* routines where we initialize 1311 * the inode. 1312 */ 1313 static inline void 1314 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1315 { 1316 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1317 1318 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1319 jfs_ip->ixpxd = iagp->inoext[extno]; 1320 jfs_ip->agstart = le64_to_cpu(iagp->agstart); 1321 jfs_ip->active_ag = -1; 1322 } 1323 1324 1325 /* 1326 * NAME: diAlloc(pip,dir,ip) 1327 * 1328 * FUNCTION: allocate a disk inode from the inode working map 1329 * for a fileset or aggregate. 1330 * 1331 * PARAMETERS: 1332 * pip - pointer to incore inode for the parent inode. 1333 * dir - 'true' if the new disk inode is for a directory. 1334 * ip - pointer to a new inode 1335 * 1336 * RETURN VALUES: 1337 * 0 - success. 1338 * -ENOSPC - insufficient disk resources. 1339 * -EIO - i/o error. 1340 */ 1341 int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1342 { 1343 int rc, ino, iagno, addext, extno, bitno, sword; 1344 int nwords, rem, i, agno; 1345 u32 mask, inosmap, extsmap; 1346 struct inode *ipimap; 1347 struct metapage *mp; 1348 ino_t inum; 1349 struct iag *iagp; 1350 struct inomap *imap; 1351 1352 /* get the pointers to the inode map inode and the 1353 * corresponding imap control structure. 1354 */ 1355 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1356 imap = JFS_IP(ipimap)->i_imap; 1357 JFS_IP(ip)->ipimap = ipimap; 1358 JFS_IP(ip)->fileset = FILESYSTEM_I; 1359 1360 /* for a directory, the allocation policy is to start 1361 * at the ag level using the preferred ag. 1362 */ 1363 if (dir) { 1364 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1365 AG_LOCK(imap, agno); 1366 goto tryag; 1367 } 1368 1369 /* for files, the policy starts off by trying to allocate from 1370 * the same iag containing the parent disk inode: 1371 * try to allocate the new disk inode close to the parent disk 1372 * inode, using parent disk inode number + 1 as the allocation 1373 * hint. (we use a left-to-right policy to attempt to avoid 1374 * moving backward on the disk.) compute the hint within the 1375 * file system and the iag. 1376 */ 1377 1378 /* get the ag number of this iag */ 1379 agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); 1380 1381 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1382 /* 1383 * There is an open file actively growing. We want to 1384 * allocate new inodes from a different ag to avoid 1385 * fragmentation problems. 1386 */ 1387 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1388 AG_LOCK(imap, agno); 1389 goto tryag; 1390 } 1391 1392 inum = pip->i_ino + 1; 1393 ino = inum & (INOSPERIAG - 1); 1394 1395 /* back off the hint if it is outside of the iag */ 1396 if (ino == 0) 1397 inum = pip->i_ino; 1398 1399 /* lock the AG inode map information */ 1400 AG_LOCK(imap, agno); 1401 1402 /* Get read lock on imap inode */ 1403 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 1404 1405 /* get the iag number and read the iag */ 1406 iagno = INOTOIAG(inum); 1407 if ((rc = diIAGRead(imap, iagno, &mp))) { 1408 IREAD_UNLOCK(ipimap); 1409 AG_UNLOCK(imap, agno); 1410 return (rc); 1411 } 1412 iagp = (struct iag *) mp->data; 1413 1414 /* determine if new inode extent is allowed to be added to the iag. 1415 * new inode extent can be added to the iag if the ag 1416 * has less than 32 free disk inodes and the iag has free extents. 1417 */ 1418 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1419 1420 /* 1421 * try to allocate from the IAG 1422 */ 1423 /* check if the inode may be allocated from the iag 1424 * (i.e. the inode has free inodes or new extent can be added). 1425 */ 1426 if (iagp->nfreeinos || addext) { 1427 /* determine the extent number of the hint. 1428 */ 1429 extno = ino >> L2INOSPEREXT; 1430 1431 /* check if the extent containing the hint has backed 1432 * inodes. if so, try to allocate within this extent. 1433 */ 1434 if (addressPXD(&iagp->inoext[extno])) { 1435 bitno = ino & (INOSPEREXT - 1); 1436 if ((bitno = 1437 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1438 bitno)) 1439 < INOSPEREXT) { 1440 ino = (extno << L2INOSPEREXT) + bitno; 1441 1442 /* a free inode (bit) was found within this 1443 * extent, so allocate it. 1444 */ 1445 rc = diAllocBit(imap, iagp, ino); 1446 IREAD_UNLOCK(ipimap); 1447 if (rc) { 1448 assert(rc == -EIO); 1449 } else { 1450 /* set the results of the allocation 1451 * and write the iag. 1452 */ 1453 diInitInode(ip, iagno, ino, extno, 1454 iagp); 1455 mark_metapage_dirty(mp); 1456 } 1457 release_metapage(mp); 1458 1459 /* free the AG lock and return. 1460 */ 1461 AG_UNLOCK(imap, agno); 1462 return (rc); 1463 } 1464 1465 if (!addext) 1466 extno = 1467 (extno == 1468 EXTSPERIAG - 1) ? 0 : extno + 1; 1469 } 1470 1471 /* 1472 * no free inodes within the extent containing the hint. 1473 * 1474 * try to allocate from the backed extents following 1475 * hint or, if appropriate (i.e. addext is true), allocate 1476 * an extent of free inodes at or following the extent 1477 * containing the hint. 1478 * 1479 * the free inode and free extent summary maps are used 1480 * here, so determine the starting summary map position 1481 * and the number of words we'll have to examine. again, 1482 * the approach is to allocate following the hint, so we 1483 * might have to initially ignore prior bits of the summary 1484 * map that represent extents prior to the extent containing 1485 * the hint and later revisit these bits. 1486 */ 1487 bitno = extno & (EXTSPERSUM - 1); 1488 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1489 sword = extno >> L2EXTSPERSUM; 1490 1491 /* mask any prior bits for the starting words of the 1492 * summary map. 1493 */ 1494 mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno)); 1495 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1496 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1497 1498 /* scan the free inode and free extent summary maps for 1499 * free resources. 1500 */ 1501 for (i = 0; i < nwords; i++) { 1502 /* check if this word of the free inode summary 1503 * map describes an extent with free inodes. 1504 */ 1505 if (~inosmap) { 1506 /* an extent with free inodes has been 1507 * found. determine the extent number 1508 * and the inode number within the extent. 1509 */ 1510 rem = diFindFree(inosmap, 0); 1511 extno = (sword << L2EXTSPERSUM) + rem; 1512 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1513 0); 1514 if (rem >= INOSPEREXT) { 1515 IREAD_UNLOCK(ipimap); 1516 release_metapage(mp); 1517 AG_UNLOCK(imap, agno); 1518 jfs_error(ip->i_sb, 1519 "can't find free bit in wmap\n"); 1520 return -EIO; 1521 } 1522 1523 /* determine the inode number within the 1524 * iag and allocate the inode from the 1525 * map. 1526 */ 1527 ino = (extno << L2INOSPEREXT) + rem; 1528 rc = diAllocBit(imap, iagp, ino); 1529 IREAD_UNLOCK(ipimap); 1530 if (rc) 1531 assert(rc == -EIO); 1532 else { 1533 /* set the results of the allocation 1534 * and write the iag. 1535 */ 1536 diInitInode(ip, iagno, ino, extno, 1537 iagp); 1538 mark_metapage_dirty(mp); 1539 } 1540 release_metapage(mp); 1541 1542 /* free the AG lock and return. 1543 */ 1544 AG_UNLOCK(imap, agno); 1545 return (rc); 1546 1547 } 1548 1549 /* check if we may allocate an extent of free 1550 * inodes and whether this word of the free 1551 * extents summary map describes a free extent. 1552 */ 1553 if (addext && ~extsmap) { 1554 /* a free extent has been found. determine 1555 * the extent number. 1556 */ 1557 rem = diFindFree(extsmap, 0); 1558 extno = (sword << L2EXTSPERSUM) + rem; 1559 1560 /* allocate an extent of free inodes. 1561 */ 1562 if ((rc = diNewExt(imap, iagp, extno))) { 1563 /* if there is no disk space for a 1564 * new extent, try to allocate the 1565 * disk inode from somewhere else. 1566 */ 1567 if (rc == -ENOSPC) 1568 break; 1569 1570 assert(rc == -EIO); 1571 } else { 1572 /* set the results of the allocation 1573 * and write the iag. 1574 */ 1575 diInitInode(ip, iagno, 1576 extno << L2INOSPEREXT, 1577 extno, iagp); 1578 mark_metapage_dirty(mp); 1579 } 1580 release_metapage(mp); 1581 /* free the imap inode & the AG lock & return. 1582 */ 1583 IREAD_UNLOCK(ipimap); 1584 AG_UNLOCK(imap, agno); 1585 return (rc); 1586 } 1587 1588 /* move on to the next set of summary map words. 1589 */ 1590 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1591 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1592 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1593 } 1594 } 1595 /* unlock imap inode */ 1596 IREAD_UNLOCK(ipimap); 1597 1598 /* nothing doing in this iag, so release it. */ 1599 release_metapage(mp); 1600 1601 tryag: 1602 /* 1603 * try to allocate anywhere within the same AG as the parent inode. 1604 */ 1605 rc = diAllocAG(imap, agno, dir, ip); 1606 1607 AG_UNLOCK(imap, agno); 1608 1609 if (rc != -ENOSPC) 1610 return (rc); 1611 1612 /* 1613 * try to allocate in any AG. 1614 */ 1615 return (diAllocAny(imap, agno, dir, ip)); 1616 } 1617 1618 1619 /* 1620 * NAME: diAllocAG(imap,agno,dir,ip) 1621 * 1622 * FUNCTION: allocate a disk inode from the allocation group. 1623 * 1624 * this routine first determines if a new extent of free 1625 * inodes should be added for the allocation group, with 1626 * the current request satisfied from this extent. if this 1627 * is the case, an attempt will be made to do just that. if 1628 * this attempt fails or it has been determined that a new 1629 * extent should not be added, an attempt is made to satisfy 1630 * the request by allocating an existing (backed) free inode 1631 * from the allocation group. 1632 * 1633 * PRE CONDITION: Already have the AG lock for this AG. 1634 * 1635 * PARAMETERS: 1636 * imap - pointer to inode map control structure. 1637 * agno - allocation group to allocate from. 1638 * dir - 'true' if the new disk inode is for a directory. 1639 * ip - pointer to the new inode to be filled in on successful return 1640 * with the disk inode number allocated, its extent address 1641 * and the start of the ag. 1642 * 1643 * RETURN VALUES: 1644 * 0 - success. 1645 * -ENOSPC - insufficient disk resources. 1646 * -EIO - i/o error. 1647 */ 1648 static int 1649 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1650 { 1651 int rc, addext, numfree, numinos; 1652 1653 /* get the number of free and the number of backed disk 1654 * inodes currently within the ag. 1655 */ 1656 numfree = imap->im_agctl[agno].numfree; 1657 numinos = imap->im_agctl[agno].numinos; 1658 1659 if (numfree > numinos) { 1660 jfs_error(ip->i_sb, "numfree > numinos\n"); 1661 return -EIO; 1662 } 1663 1664 /* determine if we should allocate a new extent of free inodes 1665 * within the ag: for directory inodes, add a new extent 1666 * if there are a small number of free inodes or number of free 1667 * inodes is a small percentage of the number of backed inodes. 1668 */ 1669 if (dir) 1670 addext = (numfree < 64 || 1671 (numfree < 256 1672 && ((numfree * 100) / numinos) <= 20)); 1673 else 1674 addext = (numfree == 0); 1675 1676 /* 1677 * try to allocate a new extent of free inodes. 1678 */ 1679 if (addext) { 1680 /* if free space is not available for this new extent, try 1681 * below to allocate a free and existing (already backed) 1682 * inode from the ag. 1683 */ 1684 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1685 return (rc); 1686 } 1687 1688 /* 1689 * try to allocate an existing free inode from the ag. 1690 */ 1691 return (diAllocIno(imap, agno, ip)); 1692 } 1693 1694 1695 /* 1696 * NAME: diAllocAny(imap,agno,dir,iap) 1697 * 1698 * FUNCTION: allocate a disk inode from any other allocation group. 1699 * 1700 * this routine is called when an allocation attempt within 1701 * the primary allocation group has failed. if attempts to 1702 * allocate an inode from any allocation group other than the 1703 * specified primary group. 1704 * 1705 * PARAMETERS: 1706 * imap - pointer to inode map control structure. 1707 * agno - primary allocation group (to avoid). 1708 * dir - 'true' if the new disk inode is for a directory. 1709 * ip - pointer to a new inode to be filled in on successful return 1710 * with the disk inode number allocated, its extent address 1711 * and the start of the ag. 1712 * 1713 * RETURN VALUES: 1714 * 0 - success. 1715 * -ENOSPC - insufficient disk resources. 1716 * -EIO - i/o error. 1717 */ 1718 static int 1719 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1720 { 1721 int ag, rc; 1722 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1723 1724 1725 /* try to allocate from the ags following agno up to 1726 * the maximum ag number. 1727 */ 1728 for (ag = agno + 1; ag <= maxag; ag++) { 1729 AG_LOCK(imap, ag); 1730 1731 rc = diAllocAG(imap, ag, dir, ip); 1732 1733 AG_UNLOCK(imap, ag); 1734 1735 if (rc != -ENOSPC) 1736 return (rc); 1737 } 1738 1739 /* try to allocate from the ags in front of agno. 1740 */ 1741 for (ag = 0; ag < agno; ag++) { 1742 AG_LOCK(imap, ag); 1743 1744 rc = diAllocAG(imap, ag, dir, ip); 1745 1746 AG_UNLOCK(imap, ag); 1747 1748 if (rc != -ENOSPC) 1749 return (rc); 1750 } 1751 1752 /* no free disk inodes. 1753 */ 1754 return -ENOSPC; 1755 } 1756 1757 1758 /* 1759 * NAME: diAllocIno(imap,agno,ip) 1760 * 1761 * FUNCTION: allocate a disk inode from the allocation group's free 1762 * inode list, returning an error if this free list is 1763 * empty (i.e. no iags on the list). 1764 * 1765 * allocation occurs from the first iag on the list using 1766 * the iag's free inode summary map to find the leftmost 1767 * free inode in the iag. 1768 * 1769 * PRE CONDITION: Already have AG lock for this AG. 1770 * 1771 * PARAMETERS: 1772 * imap - pointer to inode map control structure. 1773 * agno - allocation group. 1774 * ip - pointer to new inode to be filled in on successful return 1775 * with the disk inode number allocated, its extent address 1776 * and the start of the ag. 1777 * 1778 * RETURN VALUES: 1779 * 0 - success. 1780 * -ENOSPC - insufficient disk resources. 1781 * -EIO - i/o error. 1782 */ 1783 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1784 { 1785 int iagno, ino, rc, rem, extno, sword; 1786 struct metapage *mp; 1787 struct iag *iagp; 1788 1789 /* check if there are iags on the ag's free inode list. 1790 */ 1791 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1792 return -ENOSPC; 1793 1794 /* obtain read lock on imap inode */ 1795 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1796 1797 /* read the iag at the head of the list. 1798 */ 1799 if ((rc = diIAGRead(imap, iagno, &mp))) { 1800 IREAD_UNLOCK(imap->im_ipimap); 1801 return (rc); 1802 } 1803 iagp = (struct iag *) mp->data; 1804 1805 /* better be free inodes in this iag if it is on the 1806 * list. 1807 */ 1808 if (!iagp->nfreeinos) { 1809 IREAD_UNLOCK(imap->im_ipimap); 1810 release_metapage(mp); 1811 jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); 1812 return -EIO; 1813 } 1814 1815 /* scan the free inode summary map to find an extent 1816 * with free inodes. 1817 */ 1818 for (sword = 0;; sword++) { 1819 if (sword >= SMAPSZ) { 1820 IREAD_UNLOCK(imap->im_ipimap); 1821 release_metapage(mp); 1822 jfs_error(ip->i_sb, 1823 "free inode not found in summary map\n"); 1824 return -EIO; 1825 } 1826 1827 if (~iagp->inosmap[sword]) 1828 break; 1829 } 1830 1831 /* found a extent with free inodes. determine 1832 * the extent number. 1833 */ 1834 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1835 if (rem >= EXTSPERSUM) { 1836 IREAD_UNLOCK(imap->im_ipimap); 1837 release_metapage(mp); 1838 jfs_error(ip->i_sb, "no free extent found\n"); 1839 return -EIO; 1840 } 1841 extno = (sword << L2EXTSPERSUM) + rem; 1842 1843 /* find the first free inode in the extent. 1844 */ 1845 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1846 if (rem >= INOSPEREXT) { 1847 IREAD_UNLOCK(imap->im_ipimap); 1848 release_metapage(mp); 1849 jfs_error(ip->i_sb, "free inode not found\n"); 1850 return -EIO; 1851 } 1852 1853 /* compute the inode number within the iag. 1854 */ 1855 ino = (extno << L2INOSPEREXT) + rem; 1856 1857 /* allocate the inode. 1858 */ 1859 rc = diAllocBit(imap, iagp, ino); 1860 IREAD_UNLOCK(imap->im_ipimap); 1861 if (rc) { 1862 release_metapage(mp); 1863 return (rc); 1864 } 1865 1866 /* set the results of the allocation and write the iag. 1867 */ 1868 diInitInode(ip, iagno, ino, extno, iagp); 1869 write_metapage(mp); 1870 1871 return (0); 1872 } 1873 1874 1875 /* 1876 * NAME: diAllocExt(imap,agno,ip) 1877 * 1878 * FUNCTION: add a new extent of free inodes to an iag, allocating 1879 * an inode from this extent to satisfy the current allocation 1880 * request. 1881 * 1882 * this routine first tries to find an existing iag with free 1883 * extents through the ag free extent list. if list is not 1884 * empty, the head of the list will be selected as the home 1885 * of the new extent of free inodes. otherwise (the list is 1886 * empty), a new iag will be allocated for the ag to contain 1887 * the extent. 1888 * 1889 * once an iag has been selected, the free extent summary map 1890 * is used to locate a free extent within the iag and diNewExt() 1891 * is called to initialize the extent, with initialization 1892 * including the allocation of the first inode of the extent 1893 * for the purpose of satisfying this request. 1894 * 1895 * PARAMETERS: 1896 * imap - pointer to inode map control structure. 1897 * agno - allocation group number. 1898 * ip - pointer to new inode to be filled in on successful return 1899 * with the disk inode number allocated, its extent address 1900 * and the start of the ag. 1901 * 1902 * RETURN VALUES: 1903 * 0 - success. 1904 * -ENOSPC - insufficient disk resources. 1905 * -EIO - i/o error. 1906 */ 1907 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1908 { 1909 int rem, iagno, sword, extno, rc; 1910 struct metapage *mp; 1911 struct iag *iagp; 1912 1913 /* check if the ag has any iags with free extents. if not, 1914 * allocate a new iag for the ag. 1915 */ 1916 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1917 /* If successful, diNewIAG will obtain the read lock on the 1918 * imap inode. 1919 */ 1920 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1921 return (rc); 1922 } 1923 iagp = (struct iag *) mp->data; 1924 1925 /* set the ag number if this a brand new iag 1926 */ 1927 iagp->agstart = 1928 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1929 } else { 1930 /* read the iag. 1931 */ 1932 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1933 if ((rc = diIAGRead(imap, iagno, &mp))) { 1934 IREAD_UNLOCK(imap->im_ipimap); 1935 jfs_error(ip->i_sb, "error reading iag\n"); 1936 return rc; 1937 } 1938 iagp = (struct iag *) mp->data; 1939 } 1940 1941 /* using the free extent summary map, find a free extent. 1942 */ 1943 for (sword = 0;; sword++) { 1944 if (sword >= SMAPSZ) { 1945 release_metapage(mp); 1946 IREAD_UNLOCK(imap->im_ipimap); 1947 jfs_error(ip->i_sb, "free ext summary map not found\n"); 1948 return -EIO; 1949 } 1950 if (~iagp->extsmap[sword]) 1951 break; 1952 } 1953 1954 /* determine the extent number of the free extent. 1955 */ 1956 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1957 if (rem >= EXTSPERSUM) { 1958 release_metapage(mp); 1959 IREAD_UNLOCK(imap->im_ipimap); 1960 jfs_error(ip->i_sb, "free extent not found\n"); 1961 return -EIO; 1962 } 1963 extno = (sword << L2EXTSPERSUM) + rem; 1964 1965 /* initialize the new extent. 1966 */ 1967 rc = diNewExt(imap, iagp, extno); 1968 IREAD_UNLOCK(imap->im_ipimap); 1969 if (rc) { 1970 /* something bad happened. if a new iag was allocated, 1971 * place it back on the inode map's iag free list, and 1972 * clear the ag number information. 1973 */ 1974 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1975 IAGFREE_LOCK(imap); 1976 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1977 imap->im_freeiag = iagno; 1978 IAGFREE_UNLOCK(imap); 1979 } 1980 write_metapage(mp); 1981 return (rc); 1982 } 1983 1984 /* set the results of the allocation and write the iag. 1985 */ 1986 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 1987 1988 write_metapage(mp); 1989 1990 return (0); 1991 } 1992 1993 1994 /* 1995 * NAME: diAllocBit(imap,iagp,ino) 1996 * 1997 * FUNCTION: allocate a backed inode from an iag. 1998 * 1999 * this routine performs the mechanics of allocating a 2000 * specified inode from a backed extent. 2001 * 2002 * if the inode to be allocated represents the last free 2003 * inode within the iag, the iag will be removed from the 2004 * ag free inode list. 2005 * 2006 * a careful update approach is used to provide consistency 2007 * in the face of updates to multiple buffers. under this 2008 * approach, all required buffers are obtained before making 2009 * any updates and are held all are updates are complete. 2010 * 2011 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2012 * this AG. Must have read lock on imap inode. 2013 * 2014 * PARAMETERS: 2015 * imap - pointer to inode map control structure. 2016 * iagp - pointer to iag. 2017 * ino - inode number to be allocated within the iag. 2018 * 2019 * RETURN VALUES: 2020 * 0 - success. 2021 * -ENOSPC - insufficient disk resources. 2022 * -EIO - i/o error. 2023 */ 2024 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2025 { 2026 int extno, bitno, agno, sword, rc; 2027 struct metapage *amp = NULL, *bmp = NULL; 2028 struct iag *aiagp = NULL, *biagp = NULL; 2029 u32 mask; 2030 2031 /* check if this is the last free inode within the iag. 2032 * if so, it will have to be removed from the ag free 2033 * inode list, so get the iags preceding and following 2034 * it on the list. 2035 */ 2036 if (iagp->nfreeinos == cpu_to_le32(1)) { 2037 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2038 if ((rc = 2039 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2040 &))) 2041 return (rc); 2042 aiagp = (struct iag *) amp->data; 2043 } 2044 2045 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2046 if ((rc = 2047 diIAGRead(imap, 2048 le32_to_cpu(iagp->inofreeback), 2049 &bmp))) { 2050 if (amp) 2051 release_metapage(amp); 2052 return (rc); 2053 } 2054 biagp = (struct iag *) bmp->data; 2055 } 2056 } 2057 2058 /* get the ag number, extent number, inode number within 2059 * the extent. 2060 */ 2061 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2062 extno = ino >> L2INOSPEREXT; 2063 bitno = ino & (INOSPEREXT - 1); 2064 2065 /* compute the mask for setting the map. 2066 */ 2067 mask = HIGHORDER >> bitno; 2068 2069 /* the inode should be free and backed. 2070 */ 2071 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2072 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2073 (addressPXD(&iagp->inoext[extno]) == 0)) { 2074 if (amp) 2075 release_metapage(amp); 2076 if (bmp) 2077 release_metapage(bmp); 2078 2079 jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); 2080 return -EIO; 2081 } 2082 2083 /* mark the inode as allocated in the working map. 2084 */ 2085 iagp->wmap[extno] |= cpu_to_le32(mask); 2086 2087 /* check if all inodes within the extent are now 2088 * allocated. if so, update the free inode summary 2089 * map to reflect this. 2090 */ 2091 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2092 sword = extno >> L2EXTSPERSUM; 2093 bitno = extno & (EXTSPERSUM - 1); 2094 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2095 } 2096 2097 /* if this was the last free inode in the iag, remove the 2098 * iag from the ag free inode list. 2099 */ 2100 if (iagp->nfreeinos == cpu_to_le32(1)) { 2101 if (amp) { 2102 aiagp->inofreeback = iagp->inofreeback; 2103 write_metapage(amp); 2104 } 2105 2106 if (bmp) { 2107 biagp->inofreefwd = iagp->inofreefwd; 2108 write_metapage(bmp); 2109 } else { 2110 imap->im_agctl[agno].inofree = 2111 le32_to_cpu(iagp->inofreefwd); 2112 } 2113 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2114 } 2115 2116 /* update the free inode count at the iag, ag, inode 2117 * map levels. 2118 */ 2119 le32_add_cpu(&iagp->nfreeinos, -1); 2120 imap->im_agctl[agno].numfree -= 1; 2121 atomic_dec(&imap->im_numfree); 2122 2123 return (0); 2124 } 2125 2126 2127 /* 2128 * NAME: diNewExt(imap,iagp,extno) 2129 * 2130 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2131 * the first inode of the extent for use for the current 2132 * allocation request. 2133 * 2134 * disk resources are allocated for the new extent of inodes 2135 * and the inodes themselves are initialized to reflect their 2136 * existence within the extent (i.e. their inode numbers and 2137 * inode extent addresses are set) and their initial state 2138 * (mode and link count are set to zero). 2139 * 2140 * if the iag is new, it is not yet on an ag extent free list 2141 * but will now be placed on this list. 2142 * 2143 * if the allocation of the new extent causes the iag to 2144 * have no free extent, the iag will be removed from the 2145 * ag extent free list. 2146 * 2147 * if the iag has no free backed inodes, it will be placed 2148 * on the ag free inode list, since the addition of the new 2149 * extent will now cause it to have free inodes. 2150 * 2151 * a careful update approach is used to provide consistency 2152 * (i.e. list consistency) in the face of updates to multiple 2153 * buffers. under this approach, all required buffers are 2154 * obtained before making any updates and are held until all 2155 * updates are complete. 2156 * 2157 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2158 * this AG. Must have read lock on imap inode. 2159 * 2160 * PARAMETERS: 2161 * imap - pointer to inode map control structure. 2162 * iagp - pointer to iag. 2163 * extno - extent number. 2164 * 2165 * RETURN VALUES: 2166 * 0 - success. 2167 * -ENOSPC - insufficient disk resources. 2168 * -EIO - i/o error. 2169 */ 2170 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2171 { 2172 int agno, iagno, fwd, back, freei = 0, sword, rc; 2173 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2174 struct metapage *amp, *bmp, *cmp, *dmp; 2175 struct inode *ipimap; 2176 s64 blkno, hint; 2177 int i, j; 2178 u32 mask; 2179 ino_t ino; 2180 struct dinode *dp; 2181 struct jfs_sb_info *sbi; 2182 2183 /* better have free extents. 2184 */ 2185 if (!iagp->nfreeexts) { 2186 jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); 2187 return -EIO; 2188 } 2189 2190 /* get the inode map inode. 2191 */ 2192 ipimap = imap->im_ipimap; 2193 sbi = JFS_SBI(ipimap->i_sb); 2194 2195 amp = bmp = cmp = NULL; 2196 2197 /* get the ag and iag numbers for this iag. 2198 */ 2199 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2200 iagno = le32_to_cpu(iagp->iagnum); 2201 2202 /* check if this is the last free extent within the 2203 * iag. if so, the iag must be removed from the ag 2204 * free extent list, so get the iags preceding and 2205 * following the iag on this list. 2206 */ 2207 if (iagp->nfreeexts == cpu_to_le32(1)) { 2208 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2209 if ((rc = diIAGRead(imap, fwd, &))) 2210 return (rc); 2211 aiagp = (struct iag *) amp->data; 2212 } 2213 2214 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2215 if ((rc = diIAGRead(imap, back, &bmp))) 2216 goto error_out; 2217 biagp = (struct iag *) bmp->data; 2218 } 2219 } else { 2220 /* the iag has free extents. if all extents are free 2221 * (as is the case for a newly allocated iag), the iag 2222 * must be added to the ag free extent list, so get 2223 * the iag at the head of the list in preparation for 2224 * adding this iag to this list. 2225 */ 2226 fwd = back = -1; 2227 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2228 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2229 if ((rc = diIAGRead(imap, fwd, &))) 2230 goto error_out; 2231 aiagp = (struct iag *) amp->data; 2232 } 2233 } 2234 } 2235 2236 /* check if the iag has no free inodes. if so, the iag 2237 * will have to be added to the ag free inode list, so get 2238 * the iag at the head of the list in preparation for 2239 * adding this iag to this list. in doing this, we must 2240 * check if we already have the iag at the head of 2241 * the list in hand. 2242 */ 2243 if (iagp->nfreeinos == 0) { 2244 freei = imap->im_agctl[agno].inofree; 2245 2246 if (freei >= 0) { 2247 if (freei == fwd) { 2248 ciagp = aiagp; 2249 } else if (freei == back) { 2250 ciagp = biagp; 2251 } else { 2252 if ((rc = diIAGRead(imap, freei, &cmp))) 2253 goto error_out; 2254 ciagp = (struct iag *) cmp->data; 2255 } 2256 if (ciagp == NULL) { 2257 jfs_error(imap->im_ipimap->i_sb, 2258 "ciagp == NULL\n"); 2259 rc = -EIO; 2260 goto error_out; 2261 } 2262 } 2263 } 2264 2265 /* allocate disk space for the inode extent. 2266 */ 2267 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2268 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2269 else 2270 hint = addressPXD(&iagp->inoext[extno - 1]) + 2271 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2272 2273 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2274 goto error_out; 2275 2276 /* compute the inode number of the first inode within the 2277 * extent. 2278 */ 2279 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2280 2281 /* initialize the inodes within the newly allocated extent a 2282 * page at a time. 2283 */ 2284 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2285 /* get a buffer for this page of disk inodes. 2286 */ 2287 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2288 if (dmp == NULL) { 2289 rc = -EIO; 2290 goto error_out; 2291 } 2292 dp = (struct dinode *) dmp->data; 2293 2294 /* initialize the inode number, mode, link count and 2295 * inode extent address. 2296 */ 2297 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2298 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2299 dp->di_number = cpu_to_le32(ino); 2300 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2301 dp->di_mode = 0; 2302 dp->di_nlink = 0; 2303 PXDaddress(&(dp->di_ixpxd), blkno); 2304 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2305 } 2306 write_metapage(dmp); 2307 } 2308 2309 /* if this is the last free extent within the iag, remove the 2310 * iag from the ag free extent list. 2311 */ 2312 if (iagp->nfreeexts == cpu_to_le32(1)) { 2313 if (fwd >= 0) 2314 aiagp->extfreeback = iagp->extfreeback; 2315 2316 if (back >= 0) 2317 biagp->extfreefwd = iagp->extfreefwd; 2318 else 2319 imap->im_agctl[agno].extfree = 2320 le32_to_cpu(iagp->extfreefwd); 2321 2322 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2323 } else { 2324 /* if the iag has all free extents (newly allocated iag), 2325 * add the iag to the ag free extent list. 2326 */ 2327 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2328 if (fwd >= 0) 2329 aiagp->extfreeback = cpu_to_le32(iagno); 2330 2331 iagp->extfreefwd = cpu_to_le32(fwd); 2332 iagp->extfreeback = cpu_to_le32(-1); 2333 imap->im_agctl[agno].extfree = iagno; 2334 } 2335 } 2336 2337 /* if the iag has no free inodes, add the iag to the 2338 * ag free inode list. 2339 */ 2340 if (iagp->nfreeinos == 0) { 2341 if (freei >= 0) 2342 ciagp->inofreeback = cpu_to_le32(iagno); 2343 2344 iagp->inofreefwd = 2345 cpu_to_le32(imap->im_agctl[agno].inofree); 2346 iagp->inofreeback = cpu_to_le32(-1); 2347 imap->im_agctl[agno].inofree = iagno; 2348 } 2349 2350 /* initialize the extent descriptor of the extent. */ 2351 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2352 PXDaddress(&iagp->inoext[extno], blkno); 2353 2354 /* initialize the working and persistent map of the extent. 2355 * the working map will be initialized such that 2356 * it indicates the first inode of the extent is allocated. 2357 */ 2358 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2359 iagp->pmap[extno] = 0; 2360 2361 /* update the free inode and free extent summary maps 2362 * for the extent to indicate the extent has free inodes 2363 * and no longer represents a free extent. 2364 */ 2365 sword = extno >> L2EXTSPERSUM; 2366 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2367 iagp->extsmap[sword] |= cpu_to_le32(mask); 2368 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2369 2370 /* update the free inode and free extent counts for the 2371 * iag. 2372 */ 2373 le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); 2374 le32_add_cpu(&iagp->nfreeexts, -1); 2375 2376 /* update the free and backed inode counts for the ag. 2377 */ 2378 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2379 imap->im_agctl[agno].numinos += INOSPEREXT; 2380 2381 /* update the free and backed inode counts for the inode map. 2382 */ 2383 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2384 atomic_add(INOSPEREXT, &imap->im_numinos); 2385 2386 /* write the iags. 2387 */ 2388 if (amp) 2389 write_metapage(amp); 2390 if (bmp) 2391 write_metapage(bmp); 2392 if (cmp) 2393 write_metapage(cmp); 2394 2395 return (0); 2396 2397 error_out: 2398 2399 /* release the iags. 2400 */ 2401 if (amp) 2402 release_metapage(amp); 2403 if (bmp) 2404 release_metapage(bmp); 2405 if (cmp) 2406 release_metapage(cmp); 2407 2408 return (rc); 2409 } 2410 2411 2412 /* 2413 * NAME: diNewIAG(imap,iagnop,agno) 2414 * 2415 * FUNCTION: allocate a new iag for an allocation group. 2416 * 2417 * first tries to allocate the iag from the inode map 2418 * iagfree list: 2419 * if the list has free iags, the head of the list is removed 2420 * and returned to satisfy the request. 2421 * if the inode map's iag free list is empty, the inode map 2422 * is extended to hold a new iag. this new iag is initialized 2423 * and returned to satisfy the request. 2424 * 2425 * PARAMETERS: 2426 * imap - pointer to inode map control structure. 2427 * iagnop - pointer to an iag number set with the number of the 2428 * newly allocated iag upon successful return. 2429 * agno - allocation group number. 2430 * bpp - Buffer pointer to be filled in with new IAG's buffer 2431 * 2432 * RETURN VALUES: 2433 * 0 - success. 2434 * -ENOSPC - insufficient disk resources. 2435 * -EIO - i/o error. 2436 * 2437 * serialization: 2438 * AG lock held on entry/exit; 2439 * write lock on the map is held inside; 2440 * read lock on the map is held on successful completion; 2441 * 2442 * note: new iag transaction: 2443 * . synchronously write iag; 2444 * . write log of xtree and inode of imap; 2445 * . commit; 2446 * . synchronous write of xtree (right to left, bottom to top); 2447 * . at start of logredo(): init in-memory imap with one additional iag page; 2448 * . at end of logredo(): re-read imap inode to determine 2449 * new imap size; 2450 */ 2451 static int 2452 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2453 { 2454 int rc; 2455 int iagno, i, xlen; 2456 struct inode *ipimap; 2457 struct super_block *sb; 2458 struct jfs_sb_info *sbi; 2459 struct metapage *mp; 2460 struct iag *iagp; 2461 s64 xaddr = 0; 2462 s64 blkno; 2463 tid_t tid; 2464 struct inode *iplist[1]; 2465 2466 /* pick up pointers to the inode map and mount inodes */ 2467 ipimap = imap->im_ipimap; 2468 sb = ipimap->i_sb; 2469 sbi = JFS_SBI(sb); 2470 2471 /* acquire the free iag lock */ 2472 IAGFREE_LOCK(imap); 2473 2474 /* if there are any iags on the inode map free iag list, 2475 * allocate the iag from the head of the list. 2476 */ 2477 if (imap->im_freeiag >= 0) { 2478 /* pick up the iag number at the head of the list */ 2479 iagno = imap->im_freeiag; 2480 2481 /* determine the logical block number of the iag */ 2482 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2483 } else { 2484 /* no free iags. the inode map will have to be extented 2485 * to include a new iag. 2486 */ 2487 2488 /* acquire inode map lock */ 2489 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); 2490 2491 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2492 IWRITE_UNLOCK(ipimap); 2493 IAGFREE_UNLOCK(imap); 2494 jfs_error(imap->im_ipimap->i_sb, 2495 "ipimap->i_size is wrong\n"); 2496 return -EIO; 2497 } 2498 2499 2500 /* get the next available iag number */ 2501 iagno = imap->im_nextiag; 2502 2503 /* make sure that we have not exceeded the maximum inode 2504 * number limit. 2505 */ 2506 if (iagno > (MAXIAGS - 1)) { 2507 /* release the inode map lock */ 2508 IWRITE_UNLOCK(ipimap); 2509 2510 rc = -ENOSPC; 2511 goto out; 2512 } 2513 2514 /* 2515 * synchronously append new iag page. 2516 */ 2517 /* determine the logical address of iag page to append */ 2518 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2519 2520 /* Allocate extent for new iag page */ 2521 xlen = sbi->nbperpage; 2522 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2523 /* release the inode map lock */ 2524 IWRITE_UNLOCK(ipimap); 2525 2526 goto out; 2527 } 2528 2529 /* 2530 * start transaction of update of the inode map 2531 * addressing structure pointing to the new iag page; 2532 */ 2533 tid = txBegin(sb, COMMIT_FORCE); 2534 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 2535 2536 /* update the inode map addressing structure to point to it */ 2537 if ((rc = 2538 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2539 txEnd(tid); 2540 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2541 /* Free the blocks allocated for the iag since it was 2542 * not successfully added to the inode map 2543 */ 2544 dbFree(ipimap, xaddr, (s64) xlen); 2545 2546 /* release the inode map lock */ 2547 IWRITE_UNLOCK(ipimap); 2548 2549 goto out; 2550 } 2551 2552 /* update the inode map's inode to reflect the extension */ 2553 ipimap->i_size += PSIZE; 2554 inode_add_bytes(ipimap, PSIZE); 2555 2556 /* assign a buffer for the page */ 2557 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2558 if (!mp) { 2559 /* 2560 * This is very unlikely since we just created the 2561 * extent, but let's try to handle it correctly 2562 */ 2563 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2564 COMMIT_PWMAP); 2565 2566 txAbort(tid, 0); 2567 txEnd(tid); 2568 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2569 2570 /* release the inode map lock */ 2571 IWRITE_UNLOCK(ipimap); 2572 2573 rc = -EIO; 2574 goto out; 2575 } 2576 iagp = (struct iag *) mp->data; 2577 2578 /* init the iag */ 2579 memset(iagp, 0, sizeof(struct iag)); 2580 iagp->iagnum = cpu_to_le32(iagno); 2581 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2582 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2583 iagp->iagfree = cpu_to_le32(-1); 2584 iagp->nfreeinos = 0; 2585 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2586 2587 /* initialize the free inode summary map (free extent 2588 * summary map initialization handled by bzero). 2589 */ 2590 for (i = 0; i < SMAPSZ; i++) 2591 iagp->inosmap[i] = cpu_to_le32(ONES); 2592 2593 /* 2594 * Write and sync the metapage 2595 */ 2596 flush_metapage(mp); 2597 2598 /* 2599 * txCommit(COMMIT_FORCE) will synchronously write address 2600 * index pages and inode after commit in careful update order 2601 * of address index pages (right to left, bottom up); 2602 */ 2603 iplist[0] = ipimap; 2604 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2605 2606 txEnd(tid); 2607 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2608 2609 duplicateIXtree(sb, blkno, xlen, &xaddr); 2610 2611 /* update the next available iag number */ 2612 imap->im_nextiag += 1; 2613 2614 /* Add the iag to the iag free list so we don't lose the iag 2615 * if a failure happens now. 2616 */ 2617 imap->im_freeiag = iagno; 2618 2619 /* Until we have logredo working, we want the imap inode & 2620 * control page to be up to date. 2621 */ 2622 diSync(ipimap); 2623 2624 /* release the inode map lock */ 2625 IWRITE_UNLOCK(ipimap); 2626 } 2627 2628 /* obtain read lock on map */ 2629 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2630 2631 /* read the iag */ 2632 if ((rc = diIAGRead(imap, iagno, &mp))) { 2633 IREAD_UNLOCK(ipimap); 2634 rc = -EIO; 2635 goto out; 2636 } 2637 iagp = (struct iag *) mp->data; 2638 2639 /* remove the iag from the iag free list */ 2640 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2641 iagp->iagfree = cpu_to_le32(-1); 2642 2643 /* set the return iag number and buffer pointer */ 2644 *iagnop = iagno; 2645 *mpp = mp; 2646 2647 out: 2648 /* release the iag free lock */ 2649 IAGFREE_UNLOCK(imap); 2650 2651 return (rc); 2652 } 2653 2654 /* 2655 * NAME: diIAGRead() 2656 * 2657 * FUNCTION: get the buffer for the specified iag within a fileset 2658 * or aggregate inode map. 2659 * 2660 * PARAMETERS: 2661 * imap - pointer to inode map control structure. 2662 * iagno - iag number. 2663 * bpp - point to buffer pointer to be filled in on successful 2664 * exit. 2665 * 2666 * SERIALIZATION: 2667 * must have read lock on imap inode 2668 * (When called by diExtendFS, the filesystem is quiesced, therefore 2669 * the read lock is unnecessary.) 2670 * 2671 * RETURN VALUES: 2672 * 0 - success. 2673 * -EIO - i/o error. 2674 */ 2675 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2676 { 2677 struct inode *ipimap = imap->im_ipimap; 2678 s64 blkno; 2679 2680 /* compute the logical block number of the iag. */ 2681 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2682 2683 /* read the iag. */ 2684 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2685 if (*mpp == NULL) { 2686 return -EIO; 2687 } 2688 2689 return (0); 2690 } 2691 2692 /* 2693 * NAME: diFindFree() 2694 * 2695 * FUNCTION: find the first free bit in a word starting at 2696 * the specified bit position. 2697 * 2698 * PARAMETERS: 2699 * word - word to be examined. 2700 * start - starting bit position. 2701 * 2702 * RETURN VALUES: 2703 * bit position of first free bit in the word or 32 if 2704 * no free bits were found. 2705 */ 2706 static int diFindFree(u32 word, int start) 2707 { 2708 int bitno; 2709 assert(start < 32); 2710 /* scan the word for the first free bit. */ 2711 for (word <<= start, bitno = start; bitno < 32; 2712 bitno++, word <<= 1) { 2713 if ((word & HIGHORDER) == 0) 2714 break; 2715 } 2716 return (bitno); 2717 } 2718 2719 /* 2720 * NAME: diUpdatePMap() 2721 * 2722 * FUNCTION: Update the persistent map in an IAG for the allocation or 2723 * freeing of the specified inode. 2724 * 2725 * PRE CONDITIONS: Working map has already been updated for allocate. 2726 * 2727 * PARAMETERS: 2728 * ipimap - Incore inode map inode 2729 * inum - Number of inode to mark in permanent map 2730 * is_free - If 'true' indicates inode should be marked freed, otherwise 2731 * indicates inode should be marked allocated. 2732 * 2733 * RETURN VALUES: 2734 * 0 for success 2735 */ 2736 int 2737 diUpdatePMap(struct inode *ipimap, 2738 unsigned long inum, bool is_free, struct tblock * tblk) 2739 { 2740 int rc; 2741 struct iag *iagp; 2742 struct metapage *mp; 2743 int iagno, ino, extno, bitno; 2744 struct inomap *imap; 2745 u32 mask; 2746 struct jfs_log *log; 2747 int lsn, difft, diffp; 2748 unsigned long flags; 2749 2750 imap = JFS_IP(ipimap)->i_imap; 2751 /* get the iag number containing the inode */ 2752 iagno = INOTOIAG(inum); 2753 /* make sure that the iag is contained within the map */ 2754 if (iagno >= imap->im_nextiag) { 2755 jfs_error(ipimap->i_sb, "the iag is outside the map\n"); 2756 return -EIO; 2757 } 2758 /* read the iag */ 2759 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2760 rc = diIAGRead(imap, iagno, &mp); 2761 IREAD_UNLOCK(ipimap); 2762 if (rc) 2763 return (rc); 2764 metapage_wait_for_io(mp); 2765 iagp = (struct iag *) mp->data; 2766 /* get the inode number and extent number of the inode within 2767 * the iag and the inode number within the extent. 2768 */ 2769 ino = inum & (INOSPERIAG - 1); 2770 extno = ino >> L2INOSPEREXT; 2771 bitno = ino & (INOSPEREXT - 1); 2772 mask = HIGHORDER >> bitno; 2773 /* 2774 * mark the inode free in persistent map: 2775 */ 2776 if (is_free) { 2777 /* The inode should have been allocated both in working 2778 * map and in persistent map; 2779 * the inode will be freed from working map at the release 2780 * of last reference release; 2781 */ 2782 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2783 jfs_error(ipimap->i_sb, 2784 "inode %ld not marked as allocated in wmap!\n", 2785 inum); 2786 } 2787 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2788 jfs_error(ipimap->i_sb, 2789 "inode %ld not marked as allocated in pmap!\n", 2790 inum); 2791 } 2792 /* update the bitmap for the extent of the freed inode */ 2793 iagp->pmap[extno] &= cpu_to_le32(~mask); 2794 } 2795 /* 2796 * mark the inode allocated in persistent map: 2797 */ 2798 else { 2799 /* The inode should be already allocated in the working map 2800 * and should be free in persistent map; 2801 */ 2802 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2803 release_metapage(mp); 2804 jfs_error(ipimap->i_sb, 2805 "the inode is not allocated in the working map\n"); 2806 return -EIO; 2807 } 2808 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2809 release_metapage(mp); 2810 jfs_error(ipimap->i_sb, 2811 "the inode is not free in the persistent map\n"); 2812 return -EIO; 2813 } 2814 /* update the bitmap for the extent of the allocated inode */ 2815 iagp->pmap[extno] |= cpu_to_le32(mask); 2816 } 2817 /* 2818 * update iag lsn 2819 */ 2820 lsn = tblk->lsn; 2821 log = JFS_SBI(tblk->sb)->log; 2822 LOGSYNC_LOCK(log, flags); 2823 if (mp->lsn != 0) { 2824 /* inherit older/smaller lsn */ 2825 logdiff(difft, lsn, log); 2826 logdiff(diffp, mp->lsn, log); 2827 if (difft < diffp) { 2828 mp->lsn = lsn; 2829 /* move mp after tblock in logsync list */ 2830 list_move(&mp->synclist, &tblk->synclist); 2831 } 2832 /* inherit younger/larger clsn */ 2833 assert(mp->clsn); 2834 logdiff(difft, tblk->clsn, log); 2835 logdiff(diffp, mp->clsn, log); 2836 if (difft > diffp) 2837 mp->clsn = tblk->clsn; 2838 } else { 2839 mp->log = log; 2840 mp->lsn = lsn; 2841 /* insert mp after tblock in logsync list */ 2842 log->count++; 2843 list_add(&mp->synclist, &tblk->synclist); 2844 mp->clsn = tblk->clsn; 2845 } 2846 LOGSYNC_UNLOCK(log, flags); 2847 write_metapage(mp); 2848 return (0); 2849 } 2850 2851 /* 2852 * diExtendFS() 2853 * 2854 * function: update imap for extendfs(); 2855 * 2856 * note: AG size has been increased s.t. each k old contiguous AGs are 2857 * coalesced into a new AG; 2858 */ 2859 int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2860 { 2861 int rc, rcx = 0; 2862 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2863 struct iag *iagp = NULL, *hiagp = NULL; 2864 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2865 struct metapage *bp, *hbp; 2866 int i, n, head; 2867 int numinos, xnuminos = 0, xnumfree = 0; 2868 s64 agstart; 2869 2870 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2871 imap->im_nextiag, atomic_read(&imap->im_numinos), 2872 atomic_read(&imap->im_numfree)); 2873 2874 /* 2875 * reconstruct imap 2876 * 2877 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2878 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2879 * note: new AG size = old AG size * (2**x). 2880 */ 2881 2882 /* init per AG control information im_agctl[] */ 2883 for (i = 0; i < MAXAG; i++) { 2884 imap->im_agctl[i].inofree = -1; 2885 imap->im_agctl[i].extfree = -1; 2886 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2887 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2888 } 2889 2890 /* 2891 * process each iag page of the map. 2892 * 2893 * rebuild AG Free Inode List, AG Free Inode Extent List; 2894 */ 2895 for (i = 0; i < imap->im_nextiag; i++) { 2896 if ((rc = diIAGRead(imap, i, &bp))) { 2897 rcx = rc; 2898 continue; 2899 } 2900 iagp = (struct iag *) bp->data; 2901 if (le32_to_cpu(iagp->iagnum) != i) { 2902 release_metapage(bp); 2903 jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); 2904 return -EIO; 2905 } 2906 2907 /* leave free iag in the free iag list */ 2908 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2909 release_metapage(bp); 2910 continue; 2911 } 2912 2913 agstart = le64_to_cpu(iagp->agstart); 2914 n = agstart >> mp->db_agl2size; 2915 iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); 2916 2917 /* compute backed inodes */ 2918 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2919 << L2INOSPEREXT; 2920 if (numinos > 0) { 2921 /* merge AG backed inodes */ 2922 imap->im_agctl[n].numinos += numinos; 2923 xnuminos += numinos; 2924 } 2925 2926 /* if any backed free inodes, insert at AG free inode list */ 2927 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2928 if ((head = imap->im_agctl[n].inofree) == -1) { 2929 iagp->inofreefwd = cpu_to_le32(-1); 2930 iagp->inofreeback = cpu_to_le32(-1); 2931 } else { 2932 if ((rc = diIAGRead(imap, head, &hbp))) { 2933 rcx = rc; 2934 goto nextiag; 2935 } 2936 hiagp = (struct iag *) hbp->data; 2937 hiagp->inofreeback = iagp->iagnum; 2938 iagp->inofreefwd = cpu_to_le32(head); 2939 iagp->inofreeback = cpu_to_le32(-1); 2940 write_metapage(hbp); 2941 } 2942 2943 imap->im_agctl[n].inofree = 2944 le32_to_cpu(iagp->iagnum); 2945 2946 /* merge AG backed free inodes */ 2947 imap->im_agctl[n].numfree += 2948 le32_to_cpu(iagp->nfreeinos); 2949 xnumfree += le32_to_cpu(iagp->nfreeinos); 2950 } 2951 2952 /* if any free extents, insert at AG free extent list */ 2953 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2954 if ((head = imap->im_agctl[n].extfree) == -1) { 2955 iagp->extfreefwd = cpu_to_le32(-1); 2956 iagp->extfreeback = cpu_to_le32(-1); 2957 } else { 2958 if ((rc = diIAGRead(imap, head, &hbp))) { 2959 rcx = rc; 2960 goto nextiag; 2961 } 2962 hiagp = (struct iag *) hbp->data; 2963 hiagp->extfreeback = iagp->iagnum; 2964 iagp->extfreefwd = cpu_to_le32(head); 2965 iagp->extfreeback = cpu_to_le32(-1); 2966 write_metapage(hbp); 2967 } 2968 2969 imap->im_agctl[n].extfree = 2970 le32_to_cpu(iagp->iagnum); 2971 } 2972 2973 nextiag: 2974 write_metapage(bp); 2975 } 2976 2977 if (xnuminos != atomic_read(&imap->im_numinos) || 2978 xnumfree != atomic_read(&imap->im_numfree)) { 2979 jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); 2980 return -EIO; 2981 } 2982 2983 return rcx; 2984 } 2985 2986 2987 /* 2988 * duplicateIXtree() 2989 * 2990 * serialization: IWRITE_LOCK held on entry/exit 2991 * 2992 * note: shadow page with regular inode (rel.2); 2993 */ 2994 static void duplicateIXtree(struct super_block *sb, s64 blkno, 2995 int xlen, s64 *xaddr) 2996 { 2997 struct jfs_superblock *j_sb; 2998 struct buffer_head *bh; 2999 struct inode *ip; 3000 tid_t tid; 3001 3002 /* if AIT2 ipmap2 is bad, do not try to update it */ 3003 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 3004 return; 3005 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 3006 if (ip == NULL) { 3007 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3008 if (readSuper(sb, &bh)) 3009 return; 3010 j_sb = (struct jfs_superblock *)bh->b_data; 3011 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 3012 3013 mark_buffer_dirty(bh); 3014 sync_dirty_buffer(bh); 3015 brelse(bh); 3016 return; 3017 } 3018 3019 /* start transaction */ 3020 tid = txBegin(sb, COMMIT_FORCE); 3021 /* update the inode map addressing structure to point to it */ 3022 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3023 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3024 txAbort(tid, 1); 3025 goto cleanup; 3026 3027 } 3028 /* update the inode map's inode to reflect the extension */ 3029 ip->i_size += PSIZE; 3030 inode_add_bytes(ip, PSIZE); 3031 txCommit(tid, 1, &ip, COMMIT_FORCE); 3032 cleanup: 3033 txEnd(tid); 3034 diFreeSpecial(ip); 3035 } 3036 3037 /* 3038 * NAME: copy_from_dinode() 3039 * 3040 * FUNCTION: Copies inode info from disk inode to in-memory inode 3041 * 3042 * RETURN VALUES: 3043 * 0 - success 3044 * -ENOMEM - insufficient memory 3045 */ 3046 static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3047 { 3048 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3049 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3050 3051 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3052 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3053 jfs_set_inode_flags(ip); 3054 3055 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3056 if (sbi->umask != -1) { 3057 ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); 3058 /* For directories, add x permission if r is allowed by umask */ 3059 if (S_ISDIR(ip->i_mode)) { 3060 if (ip->i_mode & 0400) 3061 ip->i_mode |= 0100; 3062 if (ip->i_mode & 0040) 3063 ip->i_mode |= 0010; 3064 if (ip->i_mode & 0004) 3065 ip->i_mode |= 0001; 3066 } 3067 } 3068 set_nlink(ip, le32_to_cpu(dip->di_nlink)); 3069 3070 jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); 3071 if (!uid_valid(sbi->uid)) 3072 ip->i_uid = jfs_ip->saved_uid; 3073 else { 3074 ip->i_uid = sbi->uid; 3075 } 3076 3077 jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); 3078 if (!gid_valid(sbi->gid)) 3079 ip->i_gid = jfs_ip->saved_gid; 3080 else { 3081 ip->i_gid = sbi->gid; 3082 } 3083 3084 ip->i_size = le64_to_cpu(dip->di_size); 3085 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3086 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3087 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3088 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3089 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3090 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3091 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3092 ip->i_generation = le32_to_cpu(dip->di_gen); 3093 3094 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3095 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3096 jfs_ip->ea = dip->di_ea; 3097 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3098 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3099 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3100 3101 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3102 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3103 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3104 } 3105 3106 if (S_ISDIR(ip->i_mode)) { 3107 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3108 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3109 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3110 } else 3111 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3112 3113 /* Zero the in-memory-only stuff */ 3114 jfs_ip->cflag = 0; 3115 jfs_ip->btindex = 0; 3116 jfs_ip->btorder = 0; 3117 jfs_ip->bxflag = 0; 3118 jfs_ip->blid = 0; 3119 jfs_ip->atlhead = 0; 3120 jfs_ip->atltail = 0; 3121 jfs_ip->xtlid = 0; 3122 return (0); 3123 } 3124 3125 /* 3126 * NAME: copy_to_dinode() 3127 * 3128 * FUNCTION: Copies inode info from in-memory inode to disk inode 3129 */ 3130 static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3131 { 3132 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3133 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3134 3135 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3136 dip->di_inostamp = cpu_to_le32(sbi->inostamp); 3137 dip->di_number = cpu_to_le32(ip->i_ino); 3138 dip->di_gen = cpu_to_le32(ip->i_generation); 3139 dip->di_size = cpu_to_le64(ip->i_size); 3140 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3141 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3142 if (!uid_valid(sbi->uid)) 3143 dip->di_uid = cpu_to_le32(i_uid_read(ip)); 3144 else 3145 dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, 3146 jfs_ip->saved_uid)); 3147 if (!gid_valid(sbi->gid)) 3148 dip->di_gid = cpu_to_le32(i_gid_read(ip)); 3149 else 3150 dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, 3151 jfs_ip->saved_gid)); 3152 jfs_get_inode_flags(jfs_ip); 3153 /* 3154 * mode2 is only needed for storing the higher order bits. 3155 * Trust i_mode for the lower order ones 3156 */ 3157 if (sbi->umask == -1) 3158 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | 3159 ip->i_mode); 3160 else /* Leave the original permissions alone */ 3161 dip->di_mode = cpu_to_le32(jfs_ip->mode2); 3162 3163 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3164 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3165 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3166 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3167 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3168 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3169 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3170 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3171 dip->di_ea = jfs_ip->ea; 3172 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3173 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3174 dip->di_otime.tv_nsec = 0; 3175 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3176 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3177 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3178 } 3179