1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 */ 18 19 /* 20 * jfs_imap.c: inode allocation map manager 21 * 22 * Serialization: 23 * Each AG has a simple lock which is used to control the serialization of 24 * the AG level lists. This lock should be taken first whenever an AG 25 * level list will be modified or accessed. 26 * 27 * Each IAG is locked by obtaining the buffer for the IAG page. 28 * 29 * There is also a inode lock for the inode map inode. A read lock needs to 30 * be taken whenever an IAG is read from the map or the global level 31 * information is read. A write lock needs to be taken whenever the global 32 * level information is modified or an atomic operation needs to be used. 33 * 34 * If more than one IAG is read at one time, the read lock may not 35 * be given up until all of the IAG's are read. Otherwise, a deadlock 36 * may occur when trying to obtain the read lock while another thread 37 * holding the read lock is waiting on the IAG already being held. 38 * 39 * The control page of the inode map is read into memory by diMount(). 40 * Thereafter it should only be modified in memory and then it will be 41 * written out when the filesystem is unmounted by diUnmount(). 42 */ 43 44 #include <linux/fs.h> 45 #include <linux/buffer_head.h> 46 #include <linux/pagemap.h> 47 #include <linux/quotaops.h> 48 #include <linux/slab.h> 49 50 #include "jfs_incore.h" 51 #include "jfs_inode.h" 52 #include "jfs_filsys.h" 53 #include "jfs_dinode.h" 54 #include "jfs_dmap.h" 55 #include "jfs_imap.h" 56 #include "jfs_metapage.h" 57 #include "jfs_superblock.h" 58 #include "jfs_debug.h" 59 60 /* 61 * imap locks 62 */ 63 /* iag free list lock */ 64 #define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) 65 #define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) 66 #define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) 67 68 /* per ag iag list locks */ 69 #define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) 70 #define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) 71 #define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) 72 73 /* 74 * forward references 75 */ 76 static int diAllocAG(struct inomap *, int, bool, struct inode *); 77 static int diAllocAny(struct inomap *, int, bool, struct inode *); 78 static int diAllocBit(struct inomap *, struct iag *, int); 79 static int diAllocExt(struct inomap *, int, struct inode *); 80 static int diAllocIno(struct inomap *, int, struct inode *); 81 static int diFindFree(u32, int); 82 static int diNewExt(struct inomap *, struct iag *, int); 83 static int diNewIAG(struct inomap *, int *, int, struct metapage **); 84 static void duplicateIXtree(struct super_block *, s64, int, s64 *); 85 86 static int diIAGRead(struct inomap * imap, int, struct metapage **); 87 static int copy_from_dinode(struct dinode *, struct inode *); 88 static void copy_to_dinode(struct dinode *, struct inode *); 89 90 /* 91 * NAME: diMount() 92 * 93 * FUNCTION: initialize the incore inode map control structures for 94 * a fileset or aggregate init time. 95 * 96 * the inode map's control structure (dinomap) is 97 * brought in from disk and placed in virtual memory. 98 * 99 * PARAMETERS: 100 * ipimap - pointer to inode map inode for the aggregate or fileset. 101 * 102 * RETURN VALUES: 103 * 0 - success 104 * -ENOMEM - insufficient free virtual memory. 105 * -EIO - i/o error. 106 */ 107 int diMount(struct inode *ipimap) 108 { 109 struct inomap *imap; 110 struct metapage *mp; 111 int index; 112 struct dinomap_disk *dinom_le; 113 114 /* 115 * allocate/initialize the in-memory inode map control structure 116 */ 117 /* allocate the in-memory inode map control structure. */ 118 imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); 119 if (imap == NULL) { 120 jfs_err("diMount: kmalloc returned NULL!"); 121 return -ENOMEM; 122 } 123 124 /* read the on-disk inode map control structure. */ 125 126 mp = read_metapage(ipimap, 127 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 128 PSIZE, 0); 129 if (mp == NULL) { 130 kfree(imap); 131 return -EIO; 132 } 133 134 /* copy the on-disk version to the in-memory version. */ 135 dinom_le = (struct dinomap_disk *) mp->data; 136 imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag); 137 imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag); 138 atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos)); 139 atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree)); 140 imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext); 141 imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext); 142 for (index = 0; index < MAXAG; index++) { 143 imap->im_agctl[index].inofree = 144 le32_to_cpu(dinom_le->in_agctl[index].inofree); 145 imap->im_agctl[index].extfree = 146 le32_to_cpu(dinom_le->in_agctl[index].extfree); 147 imap->im_agctl[index].numinos = 148 le32_to_cpu(dinom_le->in_agctl[index].numinos); 149 imap->im_agctl[index].numfree = 150 le32_to_cpu(dinom_le->in_agctl[index].numfree); 151 } 152 153 /* release the buffer. */ 154 release_metapage(mp); 155 156 /* 157 * allocate/initialize inode allocation map locks 158 */ 159 /* allocate and init iag free list lock */ 160 IAGFREE_LOCK_INIT(imap); 161 162 /* allocate and init ag list locks */ 163 for (index = 0; index < MAXAG; index++) { 164 AG_LOCK_INIT(imap, index); 165 } 166 167 /* bind the inode map inode and inode map control structure 168 * to each other. 169 */ 170 imap->im_ipimap = ipimap; 171 JFS_IP(ipimap)->i_imap = imap; 172 173 return (0); 174 } 175 176 177 /* 178 * NAME: diUnmount() 179 * 180 * FUNCTION: write to disk the incore inode map control structures for 181 * a fileset or aggregate at unmount time. 182 * 183 * PARAMETERS: 184 * ipimap - pointer to inode map inode for the aggregate or fileset. 185 * 186 * RETURN VALUES: 187 * 0 - success 188 * -ENOMEM - insufficient free virtual memory. 189 * -EIO - i/o error. 190 */ 191 int diUnmount(struct inode *ipimap, int mounterror) 192 { 193 struct inomap *imap = JFS_IP(ipimap)->i_imap; 194 195 /* 196 * update the on-disk inode map control structure 197 */ 198 199 if (!(mounterror || isReadOnly(ipimap))) 200 diSync(ipimap); 201 202 /* 203 * Invalidate the page cache buffers 204 */ 205 truncate_inode_pages(ipimap->i_mapping, 0); 206 207 /* 208 * free in-memory control structure 209 */ 210 kfree(imap); 211 212 return (0); 213 } 214 215 216 /* 217 * diSync() 218 */ 219 int diSync(struct inode *ipimap) 220 { 221 struct dinomap_disk *dinom_le; 222 struct inomap *imp = JFS_IP(ipimap)->i_imap; 223 struct metapage *mp; 224 int index; 225 226 /* 227 * write imap global conrol page 228 */ 229 /* read the on-disk inode map control structure */ 230 mp = get_metapage(ipimap, 231 IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage, 232 PSIZE, 0); 233 if (mp == NULL) { 234 jfs_err("diSync: get_metapage failed!"); 235 return -EIO; 236 } 237 238 /* copy the in-memory version to the on-disk version */ 239 dinom_le = (struct dinomap_disk *) mp->data; 240 dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag); 241 dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag); 242 dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos)); 243 dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree)); 244 dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext); 245 dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext); 246 for (index = 0; index < MAXAG; index++) { 247 dinom_le->in_agctl[index].inofree = 248 cpu_to_le32(imp->im_agctl[index].inofree); 249 dinom_le->in_agctl[index].extfree = 250 cpu_to_le32(imp->im_agctl[index].extfree); 251 dinom_le->in_agctl[index].numinos = 252 cpu_to_le32(imp->im_agctl[index].numinos); 253 dinom_le->in_agctl[index].numfree = 254 cpu_to_le32(imp->im_agctl[index].numfree); 255 } 256 257 /* write out the control structure */ 258 write_metapage(mp); 259 260 /* 261 * write out dirty pages of imap 262 */ 263 filemap_write_and_wait(ipimap->i_mapping); 264 265 diWriteSpecial(ipimap, 0); 266 267 return (0); 268 } 269 270 271 /* 272 * NAME: diRead() 273 * 274 * FUNCTION: initialize an incore inode from disk. 275 * 276 * on entry, the specifed incore inode should itself 277 * specify the disk inode number corresponding to the 278 * incore inode (i.e. i_number should be initialized). 279 * 280 * this routine handles incore inode initialization for 281 * both "special" and "regular" inodes. special inodes 282 * are those required early in the mount process and 283 * require special handling since much of the file system 284 * is not yet initialized. these "special" inodes are 285 * identified by a NULL inode map inode pointer and are 286 * actually initialized by a call to diReadSpecial(). 287 * 288 * for regular inodes, the iag describing the disk inode 289 * is read from disk to determine the inode extent address 290 * for the disk inode. with the inode extent address in 291 * hand, the page of the extent that contains the disk 292 * inode is read and the disk inode is copied to the 293 * incore inode. 294 * 295 * PARAMETERS: 296 * ip - pointer to incore inode to be initialized from disk. 297 * 298 * RETURN VALUES: 299 * 0 - success 300 * -EIO - i/o error. 301 * -ENOMEM - insufficient memory 302 * 303 */ 304 int diRead(struct inode *ip) 305 { 306 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 307 int iagno, ino, extno, rc; 308 struct inode *ipimap; 309 struct dinode *dp; 310 struct iag *iagp; 311 struct metapage *mp; 312 s64 blkno, agstart; 313 struct inomap *imap; 314 int block_offset; 315 int inodes_left; 316 unsigned long pageno; 317 int rel_inode; 318 319 jfs_info("diRead: ino = %ld", ip->i_ino); 320 321 ipimap = sbi->ipimap; 322 JFS_IP(ip)->ipimap = ipimap; 323 324 /* determine the iag number for this inode (number) */ 325 iagno = INOTOIAG(ip->i_ino); 326 327 /* read the iag */ 328 imap = JFS_IP(ipimap)->i_imap; 329 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 330 rc = diIAGRead(imap, iagno, &mp); 331 IREAD_UNLOCK(ipimap); 332 if (rc) { 333 jfs_err("diRead: diIAGRead returned %d", rc); 334 return (rc); 335 } 336 337 iagp = (struct iag *) mp->data; 338 339 /* determine inode extent that holds the disk inode */ 340 ino = ip->i_ino & (INOSPERIAG - 1); 341 extno = ino >> L2INOSPEREXT; 342 343 if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) || 344 (addressPXD(&iagp->inoext[extno]) == 0)) { 345 release_metapage(mp); 346 return -ESTALE; 347 } 348 349 /* get disk block number of the page within the inode extent 350 * that holds the disk inode. 351 */ 352 blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage); 353 354 /* get the ag for the iag */ 355 agstart = le64_to_cpu(iagp->agstart); 356 357 release_metapage(mp); 358 359 rel_inode = (ino & (INOSPERPAGE - 1)); 360 pageno = blkno >> sbi->l2nbperpage; 361 362 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 363 /* 364 * OS/2 didn't always align inode extents on page boundaries 365 */ 366 inodes_left = 367 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 368 369 if (rel_inode < inodes_left) 370 rel_inode += block_offset << sbi->l2niperblk; 371 else { 372 pageno += 1; 373 rel_inode -= inodes_left; 374 } 375 } 376 377 /* read the page of disk inode */ 378 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 379 if (!mp) { 380 jfs_err("diRead: read_metapage failed"); 381 return -EIO; 382 } 383 384 /* locate the disk inode requested */ 385 dp = (struct dinode *) mp->data; 386 dp += rel_inode; 387 388 if (ip->i_ino != le32_to_cpu(dp->di_number)) { 389 jfs_error(ip->i_sb, "i_ino != di_number\n"); 390 rc = -EIO; 391 } else if (le32_to_cpu(dp->di_nlink) == 0) 392 rc = -ESTALE; 393 else 394 /* copy the disk inode to the in-memory inode */ 395 rc = copy_from_dinode(dp, ip); 396 397 release_metapage(mp); 398 399 /* set the ag for the inode */ 400 JFS_IP(ip)->agstart = agstart; 401 JFS_IP(ip)->active_ag = -1; 402 403 return (rc); 404 } 405 406 407 /* 408 * NAME: diReadSpecial() 409 * 410 * FUNCTION: initialize a 'special' inode from disk. 411 * 412 * this routines handles aggregate level inodes. The 413 * inode cache cannot differentiate between the 414 * aggregate inodes and the filesystem inodes, so we 415 * handle these here. We don't actually use the aggregate 416 * inode map, since these inodes are at a fixed location 417 * and in some cases the aggregate inode map isn't initialized 418 * yet. 419 * 420 * PARAMETERS: 421 * sb - filesystem superblock 422 * inum - aggregate inode number 423 * secondary - 1 if secondary aggregate inode table 424 * 425 * RETURN VALUES: 426 * new inode - success 427 * NULL - i/o error. 428 */ 429 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) 430 { 431 struct jfs_sb_info *sbi = JFS_SBI(sb); 432 uint address; 433 struct dinode *dp; 434 struct inode *ip; 435 struct metapage *mp; 436 437 ip = new_inode(sb); 438 if (ip == NULL) { 439 jfs_err("diReadSpecial: new_inode returned NULL!"); 440 return ip; 441 } 442 443 if (secondary) { 444 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 445 JFS_IP(ip)->ipimap = sbi->ipaimap2; 446 } else { 447 address = AITBL_OFF >> L2PSIZE; 448 JFS_IP(ip)->ipimap = sbi->ipaimap; 449 } 450 451 ASSERT(inum < INOSPEREXT); 452 453 ip->i_ino = inum; 454 455 address += inum >> 3; /* 8 inodes per 4K page */ 456 457 /* read the page of fixed disk inode (AIT) in raw mode */ 458 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 459 if (mp == NULL) { 460 set_nlink(ip, 1); /* Don't want iput() deleting it */ 461 iput(ip); 462 return (NULL); 463 } 464 465 /* get the pointer to the disk inode of interest */ 466 dp = (struct dinode *) (mp->data); 467 dp += inum % 8; /* 8 inodes per 4K page */ 468 469 /* copy on-disk inode to in-memory inode */ 470 if ((copy_from_dinode(dp, ip)) != 0) { 471 /* handle bad return by returning NULL for ip */ 472 set_nlink(ip, 1); /* Don't want iput() deleting it */ 473 iput(ip); 474 /* release the page */ 475 release_metapage(mp); 476 return (NULL); 477 478 } 479 480 ip->i_mapping->a_ops = &jfs_metapage_aops; 481 mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS); 482 483 /* Allocations to metadata inodes should not affect quotas */ 484 ip->i_flags |= S_NOQUOTA; 485 486 if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) { 487 sbi->gengen = le32_to_cpu(dp->di_gengen); 488 sbi->inostamp = le32_to_cpu(dp->di_inostamp); 489 } 490 491 /* release the page */ 492 release_metapage(mp); 493 494 inode_fake_hash(ip); 495 496 return (ip); 497 } 498 499 /* 500 * NAME: diWriteSpecial() 501 * 502 * FUNCTION: Write the special inode to disk 503 * 504 * PARAMETERS: 505 * ip - special inode 506 * secondary - 1 if secondary aggregate inode table 507 * 508 * RETURN VALUES: none 509 */ 510 511 void diWriteSpecial(struct inode *ip, int secondary) 512 { 513 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 514 uint address; 515 struct dinode *dp; 516 ino_t inum = ip->i_ino; 517 struct metapage *mp; 518 519 if (secondary) 520 address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage; 521 else 522 address = AITBL_OFF >> L2PSIZE; 523 524 ASSERT(inum < INOSPEREXT); 525 526 address += inum >> 3; /* 8 inodes per 4K page */ 527 528 /* read the page of fixed disk inode (AIT) in raw mode */ 529 mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); 530 if (mp == NULL) { 531 jfs_err("diWriteSpecial: failed to read aggregate inode extent!"); 532 return; 533 } 534 535 /* get the pointer to the disk inode of interest */ 536 dp = (struct dinode *) (mp->data); 537 dp += inum % 8; /* 8 inodes per 4K page */ 538 539 /* copy on-disk inode to in-memory inode */ 540 copy_to_dinode(dp, ip); 541 memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288); 542 543 if (inum == FILESYSTEM_I) 544 dp->di_gengen = cpu_to_le32(sbi->gengen); 545 546 /* write the page */ 547 write_metapage(mp); 548 } 549 550 /* 551 * NAME: diFreeSpecial() 552 * 553 * FUNCTION: Free allocated space for special inode 554 */ 555 void diFreeSpecial(struct inode *ip) 556 { 557 if (ip == NULL) { 558 jfs_err("diFreeSpecial called with NULL ip!"); 559 return; 560 } 561 filemap_write_and_wait(ip->i_mapping); 562 truncate_inode_pages(ip->i_mapping, 0); 563 iput(ip); 564 } 565 566 567 568 /* 569 * NAME: diWrite() 570 * 571 * FUNCTION: write the on-disk inode portion of the in-memory inode 572 * to its corresponding on-disk inode. 573 * 574 * on entry, the specifed incore inode should itself 575 * specify the disk inode number corresponding to the 576 * incore inode (i.e. i_number should be initialized). 577 * 578 * the inode contains the inode extent address for the disk 579 * inode. with the inode extent address in hand, the 580 * page of the extent that contains the disk inode is 581 * read and the disk inode portion of the incore inode 582 * is copied to the disk inode. 583 * 584 * PARAMETERS: 585 * tid - transacation id 586 * ip - pointer to incore inode to be written to the inode extent. 587 * 588 * RETURN VALUES: 589 * 0 - success 590 * -EIO - i/o error. 591 */ 592 int diWrite(tid_t tid, struct inode *ip) 593 { 594 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 595 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 596 int rc = 0; 597 s32 ino; 598 struct dinode *dp; 599 s64 blkno; 600 int block_offset; 601 int inodes_left; 602 struct metapage *mp; 603 unsigned long pageno; 604 int rel_inode; 605 int dioffset; 606 struct inode *ipimap; 607 uint type; 608 lid_t lid; 609 struct tlock *ditlck, *tlck; 610 struct linelock *dilinelock, *ilinelock; 611 struct lv *lv; 612 int n; 613 614 ipimap = jfs_ip->ipimap; 615 616 ino = ip->i_ino & (INOSPERIAG - 1); 617 618 if (!addressPXD(&(jfs_ip->ixpxd)) || 619 (lengthPXD(&(jfs_ip->ixpxd)) != 620 JFS_IP(ipimap)->i_imap->im_nbperiext)) { 621 jfs_error(ip->i_sb, "ixpxd invalid\n"); 622 return -EIO; 623 } 624 625 /* 626 * read the page of disk inode containing the specified inode: 627 */ 628 /* compute the block address of the page */ 629 blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage); 630 631 rel_inode = (ino & (INOSPERPAGE - 1)); 632 pageno = blkno >> sbi->l2nbperpage; 633 634 if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) { 635 /* 636 * OS/2 didn't always align inode extents on page boundaries 637 */ 638 inodes_left = 639 (sbi->nbperpage - block_offset) << sbi->l2niperblk; 640 641 if (rel_inode < inodes_left) 642 rel_inode += block_offset << sbi->l2niperblk; 643 else { 644 pageno += 1; 645 rel_inode -= inodes_left; 646 } 647 } 648 /* read the page of disk inode */ 649 retry: 650 mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); 651 if (!mp) 652 return -EIO; 653 654 /* get the pointer to the disk inode */ 655 dp = (struct dinode *) mp->data; 656 dp += rel_inode; 657 658 dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE; 659 660 /* 661 * acquire transaction lock on the on-disk inode; 662 * N.B. tlock is acquired on ipimap not ip; 663 */ 664 if ((ditlck = 665 txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL) 666 goto retry; 667 dilinelock = (struct linelock *) & ditlck->lock; 668 669 /* 670 * copy btree root from in-memory inode to on-disk inode 671 * 672 * (tlock is taken from inline B+-tree root in in-memory 673 * inode when the B+-tree root is updated, which is pointed 674 * by jfs_ip->blid as well as being on tx tlock list) 675 * 676 * further processing of btree root is based on the copy 677 * in in-memory inode, where txLog() will log from, and, 678 * for xtree root, txUpdateMap() will update map and reset 679 * XAD_NEW bit; 680 */ 681 682 if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) { 683 /* 684 * This is the special xtree inside the directory for storing 685 * the directory table 686 */ 687 xtpage_t *p, *xp; 688 xad_t *xad; 689 690 jfs_ip->xtlid = 0; 691 tlck = lid_to_tlock(lid); 692 assert(tlck->type & tlckXTREE); 693 tlck->type |= tlckBTROOT; 694 tlck->mp = mp; 695 ilinelock = (struct linelock *) & tlck->lock; 696 697 /* 698 * copy xtree root from inode to dinode: 699 */ 700 p = &jfs_ip->i_xtroot; 701 xp = (xtpage_t *) &dp->di_dirtable; 702 lv = ilinelock->lv; 703 for (n = 0; n < ilinelock->index; n++, lv++) { 704 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 705 lv->length << L2XTSLOTSIZE); 706 } 707 708 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 709 xad = &xp->xad[XTENTRYSTART]; 710 for (n = XTENTRYSTART; 711 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 712 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 713 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 714 } 715 716 if ((lid = jfs_ip->blid) == 0) 717 goto inlineData; 718 jfs_ip->blid = 0; 719 720 tlck = lid_to_tlock(lid); 721 type = tlck->type; 722 tlck->type |= tlckBTROOT; 723 tlck->mp = mp; 724 ilinelock = (struct linelock *) & tlck->lock; 725 726 /* 727 * regular file: 16 byte (XAD slot) granularity 728 */ 729 if (type & tlckXTREE) { 730 xtpage_t *p, *xp; 731 xad_t *xad; 732 733 /* 734 * copy xtree root from inode to dinode: 735 */ 736 p = &jfs_ip->i_xtroot; 737 xp = &dp->di_xtroot; 738 lv = ilinelock->lv; 739 for (n = 0; n < ilinelock->index; n++, lv++) { 740 memcpy(&xp->xad[lv->offset], &p->xad[lv->offset], 741 lv->length << L2XTSLOTSIZE); 742 } 743 744 /* reset on-disk (metadata page) xtree XAD_NEW bit */ 745 xad = &xp->xad[XTENTRYSTART]; 746 for (n = XTENTRYSTART; 747 n < le16_to_cpu(xp->header.nextindex); n++, xad++) 748 if (xad->flag & (XAD_NEW | XAD_EXTENDED)) 749 xad->flag &= ~(XAD_NEW | XAD_EXTENDED); 750 } 751 /* 752 * directory: 32 byte (directory entry slot) granularity 753 */ 754 else if (type & tlckDTREE) { 755 dtpage_t *p, *xp; 756 757 /* 758 * copy dtree root from inode to dinode: 759 */ 760 p = (dtpage_t *) &jfs_ip->i_dtroot; 761 xp = (dtpage_t *) & dp->di_dtroot; 762 lv = ilinelock->lv; 763 for (n = 0; n < ilinelock->index; n++, lv++) { 764 memcpy(&xp->slot[lv->offset], &p->slot[lv->offset], 765 lv->length << L2DTSLOTSIZE); 766 } 767 } else { 768 jfs_err("diWrite: UFO tlock"); 769 } 770 771 inlineData: 772 /* 773 * copy inline symlink from in-memory inode to on-disk inode 774 */ 775 if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) { 776 lv = & dilinelock->lv[dilinelock->index]; 777 lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE; 778 lv->length = 2; 779 memcpy(&dp->di_fastsymlink, jfs_ip->i_inline, IDATASIZE); 780 dilinelock->index++; 781 } 782 /* 783 * copy inline data from in-memory inode to on-disk inode: 784 * 128 byte slot granularity 785 */ 786 if (test_cflag(COMMIT_Inlineea, ip)) { 787 lv = & dilinelock->lv[dilinelock->index]; 788 lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE; 789 lv->length = 1; 790 memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE); 791 dilinelock->index++; 792 793 clear_cflag(COMMIT_Inlineea, ip); 794 } 795 796 /* 797 * lock/copy inode base: 128 byte slot granularity 798 */ 799 lv = & dilinelock->lv[dilinelock->index]; 800 lv->offset = dioffset >> L2INODESLOTSIZE; 801 copy_to_dinode(dp, ip); 802 if (test_and_clear_cflag(COMMIT_Dirtable, ip)) { 803 lv->length = 2; 804 memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96); 805 } else 806 lv->length = 1; 807 dilinelock->index++; 808 809 /* release the buffer holding the updated on-disk inode. 810 * the buffer will be later written by commit processing. 811 */ 812 write_metapage(mp); 813 814 return (rc); 815 } 816 817 818 /* 819 * NAME: diFree(ip) 820 * 821 * FUNCTION: free a specified inode from the inode working map 822 * for a fileset or aggregate. 823 * 824 * if the inode to be freed represents the first (only) 825 * free inode within the iag, the iag will be placed on 826 * the ag free inode list. 827 * 828 * freeing the inode will cause the inode extent to be 829 * freed if the inode is the only allocated inode within 830 * the extent. in this case all the disk resource backing 831 * up the inode extent will be freed. in addition, the iag 832 * will be placed on the ag extent free list if the extent 833 * is the first free extent in the iag. if freeing the 834 * extent also means that no free inodes will exist for 835 * the iag, the iag will also be removed from the ag free 836 * inode list. 837 * 838 * the iag describing the inode will be freed if the extent 839 * is to be freed and it is the only backed extent within 840 * the iag. in this case, the iag will be removed from the 841 * ag free extent list and ag free inode list and placed on 842 * the inode map's free iag list. 843 * 844 * a careful update approach is used to provide consistency 845 * in the face of updates to multiple buffers. under this 846 * approach, all required buffers are obtained before making 847 * any updates and are held until all updates are complete. 848 * 849 * PARAMETERS: 850 * ip - inode to be freed. 851 * 852 * RETURN VALUES: 853 * 0 - success 854 * -EIO - i/o error. 855 */ 856 int diFree(struct inode *ip) 857 { 858 int rc; 859 ino_t inum = ip->i_ino; 860 struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp; 861 struct metapage *mp, *amp, *bmp, *cmp, *dmp; 862 int iagno, ino, extno, bitno, sword, agno; 863 int back, fwd; 864 u32 bitmap, mask; 865 struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap; 866 struct inomap *imap = JFS_IP(ipimap)->i_imap; 867 pxd_t freepxd; 868 tid_t tid; 869 struct inode *iplist[3]; 870 struct tlock *tlck; 871 struct pxd_lock *pxdlock; 872 873 /* 874 * This is just to suppress compiler warnings. The same logic that 875 * references these variables is used to initialize them. 876 */ 877 aiagp = biagp = ciagp = diagp = NULL; 878 879 /* get the iag number containing the inode. 880 */ 881 iagno = INOTOIAG(inum); 882 883 /* make sure that the iag is contained within 884 * the map. 885 */ 886 if (iagno >= imap->im_nextiag) { 887 print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, 888 imap, 32, 0); 889 jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", 890 (uint) inum, iagno, imap->im_nextiag); 891 return -EIO; 892 } 893 894 /* get the allocation group for this ino. 895 */ 896 agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); 897 898 /* Lock the AG specific inode map information 899 */ 900 AG_LOCK(imap, agno); 901 902 /* Obtain read lock in imap inode. Don't release it until we have 903 * read all of the IAG's that we are going to. 904 */ 905 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 906 907 /* read the iag. 908 */ 909 if ((rc = diIAGRead(imap, iagno, &mp))) { 910 IREAD_UNLOCK(ipimap); 911 AG_UNLOCK(imap, agno); 912 return (rc); 913 } 914 iagp = (struct iag *) mp->data; 915 916 /* get the inode number and extent number of the inode within 917 * the iag and the inode number within the extent. 918 */ 919 ino = inum & (INOSPERIAG - 1); 920 extno = ino >> L2INOSPEREXT; 921 bitno = ino & (INOSPEREXT - 1); 922 mask = HIGHORDER >> bitno; 923 924 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 925 jfs_error(ip->i_sb, "wmap shows inode already free\n"); 926 } 927 928 if (!addressPXD(&iagp->inoext[extno])) { 929 release_metapage(mp); 930 IREAD_UNLOCK(ipimap); 931 AG_UNLOCK(imap, agno); 932 jfs_error(ip->i_sb, "invalid inoext\n"); 933 return -EIO; 934 } 935 936 /* compute the bitmap for the extent reflecting the freed inode. 937 */ 938 bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask; 939 940 if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) { 941 release_metapage(mp); 942 IREAD_UNLOCK(ipimap); 943 AG_UNLOCK(imap, agno); 944 jfs_error(ip->i_sb, "numfree > numinos\n"); 945 return -EIO; 946 } 947 /* 948 * inode extent still has some inodes or below low water mark: 949 * keep the inode extent; 950 */ 951 if (bitmap || 952 imap->im_agctl[agno].numfree < 96 || 953 (imap->im_agctl[agno].numfree < 288 && 954 (((imap->im_agctl[agno].numfree * 100) / 955 imap->im_agctl[agno].numinos) <= 25))) { 956 /* if the iag currently has no free inodes (i.e., 957 * the inode being freed is the first free inode of iag), 958 * insert the iag at head of the inode free list for the ag. 959 */ 960 if (iagp->nfreeinos == 0) { 961 /* check if there are any iags on the ag inode 962 * free list. if so, read the first one so that 963 * we can link the current iag onto the list at 964 * the head. 965 */ 966 if ((fwd = imap->im_agctl[agno].inofree) >= 0) { 967 /* read the iag that currently is the head 968 * of the list. 969 */ 970 if ((rc = diIAGRead(imap, fwd, &))) { 971 IREAD_UNLOCK(ipimap); 972 AG_UNLOCK(imap, agno); 973 release_metapage(mp); 974 return (rc); 975 } 976 aiagp = (struct iag *) amp->data; 977 978 /* make current head point back to the iag. 979 */ 980 aiagp->inofreeback = cpu_to_le32(iagno); 981 982 write_metapage(amp); 983 } 984 985 /* iag points forward to current head and iag 986 * becomes the new head of the list. 987 */ 988 iagp->inofreefwd = 989 cpu_to_le32(imap->im_agctl[agno].inofree); 990 iagp->inofreeback = cpu_to_le32(-1); 991 imap->im_agctl[agno].inofree = iagno; 992 } 993 IREAD_UNLOCK(ipimap); 994 995 /* update the free inode summary map for the extent if 996 * freeing the inode means the extent will now have free 997 * inodes (i.e., the inode being freed is the first free 998 * inode of extent), 999 */ 1000 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 1001 sword = extno >> L2EXTSPERSUM; 1002 bitno = extno & (EXTSPERSUM - 1); 1003 iagp->inosmap[sword] &= 1004 cpu_to_le32(~(HIGHORDER >> bitno)); 1005 } 1006 1007 /* update the bitmap. 1008 */ 1009 iagp->wmap[extno] = cpu_to_le32(bitmap); 1010 1011 /* update the free inode counts at the iag, ag and 1012 * map level. 1013 */ 1014 le32_add_cpu(&iagp->nfreeinos, 1); 1015 imap->im_agctl[agno].numfree += 1; 1016 atomic_inc(&imap->im_numfree); 1017 1018 /* release the AG inode map lock 1019 */ 1020 AG_UNLOCK(imap, agno); 1021 1022 /* write the iag */ 1023 write_metapage(mp); 1024 1025 return (0); 1026 } 1027 1028 1029 /* 1030 * inode extent has become free and above low water mark: 1031 * free the inode extent; 1032 */ 1033 1034 /* 1035 * prepare to update iag list(s) (careful update step 1) 1036 */ 1037 amp = bmp = cmp = dmp = NULL; 1038 fwd = back = -1; 1039 1040 /* check if the iag currently has no free extents. if so, 1041 * it will be placed on the head of the ag extent free list. 1042 */ 1043 if (iagp->nfreeexts == 0) { 1044 /* check if the ag extent free list has any iags. 1045 * if so, read the iag at the head of the list now. 1046 * this (head) iag will be updated later to reflect 1047 * the addition of the current iag at the head of 1048 * the list. 1049 */ 1050 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 1051 if ((rc = diIAGRead(imap, fwd, &))) 1052 goto error_out; 1053 aiagp = (struct iag *) amp->data; 1054 } 1055 } else { 1056 /* iag has free extents. check if the addition of a free 1057 * extent will cause all extents to be free within this 1058 * iag. if so, the iag will be removed from the ag extent 1059 * free list and placed on the inode map's free iag list. 1060 */ 1061 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1062 /* in preparation for removing the iag from the 1063 * ag extent free list, read the iags preceding 1064 * and following the iag on the ag extent free 1065 * list. 1066 */ 1067 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 1068 if ((rc = diIAGRead(imap, fwd, &))) 1069 goto error_out; 1070 aiagp = (struct iag *) amp->data; 1071 } 1072 1073 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 1074 if ((rc = diIAGRead(imap, back, &bmp))) 1075 goto error_out; 1076 biagp = (struct iag *) bmp->data; 1077 } 1078 } 1079 } 1080 1081 /* remove the iag from the ag inode free list if freeing 1082 * this extent cause the iag to have no free inodes. 1083 */ 1084 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1085 int inofreeback = le32_to_cpu(iagp->inofreeback); 1086 int inofreefwd = le32_to_cpu(iagp->inofreefwd); 1087 1088 /* in preparation for removing the iag from the 1089 * ag inode free list, read the iags preceding 1090 * and following the iag on the ag inode free 1091 * list. before reading these iags, we must make 1092 * sure that we already don't have them in hand 1093 * from up above, since re-reading an iag (buffer) 1094 * we are currently holding would cause a deadlock. 1095 */ 1096 if (inofreefwd >= 0) { 1097 1098 if (inofreefwd == fwd) 1099 ciagp = (struct iag *) amp->data; 1100 else if (inofreefwd == back) 1101 ciagp = (struct iag *) bmp->data; 1102 else { 1103 if ((rc = 1104 diIAGRead(imap, inofreefwd, &cmp))) 1105 goto error_out; 1106 ciagp = (struct iag *) cmp->data; 1107 } 1108 assert(ciagp != NULL); 1109 } 1110 1111 if (inofreeback >= 0) { 1112 if (inofreeback == fwd) 1113 diagp = (struct iag *) amp->data; 1114 else if (inofreeback == back) 1115 diagp = (struct iag *) bmp->data; 1116 else { 1117 if ((rc = 1118 diIAGRead(imap, inofreeback, &dmp))) 1119 goto error_out; 1120 diagp = (struct iag *) dmp->data; 1121 } 1122 assert(diagp != NULL); 1123 } 1124 } 1125 1126 IREAD_UNLOCK(ipimap); 1127 1128 /* 1129 * invalidate any page of the inode extent freed from buffer cache; 1130 */ 1131 freepxd = iagp->inoext[extno]; 1132 invalidate_pxd_metapages(ip, freepxd); 1133 1134 /* 1135 * update iag list(s) (careful update step 2) 1136 */ 1137 /* add the iag to the ag extent free list if this is the 1138 * first free extent for the iag. 1139 */ 1140 if (iagp->nfreeexts == 0) { 1141 if (fwd >= 0) 1142 aiagp->extfreeback = cpu_to_le32(iagno); 1143 1144 iagp->extfreefwd = 1145 cpu_to_le32(imap->im_agctl[agno].extfree); 1146 iagp->extfreeback = cpu_to_le32(-1); 1147 imap->im_agctl[agno].extfree = iagno; 1148 } else { 1149 /* remove the iag from the ag extent list if all extents 1150 * are now free and place it on the inode map iag free list. 1151 */ 1152 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { 1153 if (fwd >= 0) 1154 aiagp->extfreeback = iagp->extfreeback; 1155 1156 if (back >= 0) 1157 biagp->extfreefwd = iagp->extfreefwd; 1158 else 1159 imap->im_agctl[agno].extfree = 1160 le32_to_cpu(iagp->extfreefwd); 1161 1162 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 1163 1164 IAGFREE_LOCK(imap); 1165 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1166 imap->im_freeiag = iagno; 1167 IAGFREE_UNLOCK(imap); 1168 } 1169 } 1170 1171 /* remove the iag from the ag inode free list if freeing 1172 * this extent causes the iag to have no free inodes. 1173 */ 1174 if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) { 1175 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) 1176 ciagp->inofreeback = iagp->inofreeback; 1177 1178 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) 1179 diagp->inofreefwd = iagp->inofreefwd; 1180 else 1181 imap->im_agctl[agno].inofree = 1182 le32_to_cpu(iagp->inofreefwd); 1183 1184 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 1185 } 1186 1187 /* update the inode extent address and working map 1188 * to reflect the free extent. 1189 * the permanent map should have been updated already 1190 * for the inode being freed. 1191 */ 1192 if (iagp->pmap[extno] != 0) { 1193 jfs_error(ip->i_sb, "the pmap does not show inode free\n"); 1194 } 1195 iagp->wmap[extno] = 0; 1196 PXDlength(&iagp->inoext[extno], 0); 1197 PXDaddress(&iagp->inoext[extno], 0); 1198 1199 /* update the free extent and free inode summary maps 1200 * to reflect the freed extent. 1201 * the inode summary map is marked to indicate no inodes 1202 * available for the freed extent. 1203 */ 1204 sword = extno >> L2EXTSPERSUM; 1205 bitno = extno & (EXTSPERSUM - 1); 1206 mask = HIGHORDER >> bitno; 1207 iagp->inosmap[sword] |= cpu_to_le32(mask); 1208 iagp->extsmap[sword] &= cpu_to_le32(~mask); 1209 1210 /* update the number of free inodes and number of free extents 1211 * for the iag. 1212 */ 1213 le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); 1214 le32_add_cpu(&iagp->nfreeexts, 1); 1215 1216 /* update the number of free inodes and backed inodes 1217 * at the ag and inode map level. 1218 */ 1219 imap->im_agctl[agno].numfree -= (INOSPEREXT - 1); 1220 imap->im_agctl[agno].numinos -= INOSPEREXT; 1221 atomic_sub(INOSPEREXT - 1, &imap->im_numfree); 1222 atomic_sub(INOSPEREXT, &imap->im_numinos); 1223 1224 if (amp) 1225 write_metapage(amp); 1226 if (bmp) 1227 write_metapage(bmp); 1228 if (cmp) 1229 write_metapage(cmp); 1230 if (dmp) 1231 write_metapage(dmp); 1232 1233 /* 1234 * start transaction to update block allocation map 1235 * for the inode extent freed; 1236 * 1237 * N.B. AG_LOCK is released and iag will be released below, and 1238 * other thread may allocate inode from/reusing the ixad freed 1239 * BUT with new/different backing inode extent from the extent 1240 * to be freed by the transaction; 1241 */ 1242 tid = txBegin(ipimap->i_sb, COMMIT_FORCE); 1243 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 1244 1245 /* acquire tlock of the iag page of the freed ixad 1246 * to force the page NOHOMEOK (even though no data is 1247 * logged from the iag page) until NOREDOPAGE|FREEXTENT log 1248 * for the free of the extent is committed; 1249 * write FREEXTENT|NOREDOPAGE log record 1250 * N.B. linelock is overlaid as freed extent descriptor; 1251 */ 1252 tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE); 1253 pxdlock = (struct pxd_lock *) & tlck->lock; 1254 pxdlock->flag = mlckFREEPXD; 1255 pxdlock->pxd = freepxd; 1256 pxdlock->index = 1; 1257 1258 write_metapage(mp); 1259 1260 iplist[0] = ipimap; 1261 1262 /* 1263 * logredo needs the IAG number and IAG extent index in order 1264 * to ensure that the IMap is consistent. The least disruptive 1265 * way to pass these values through to the transaction manager 1266 * is in the iplist array. 1267 * 1268 * It's not pretty, but it works. 1269 */ 1270 iplist[1] = (struct inode *) (size_t)iagno; 1271 iplist[2] = (struct inode *) (size_t)extno; 1272 1273 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 1274 1275 txEnd(tid); 1276 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 1277 1278 /* unlock the AG inode map information */ 1279 AG_UNLOCK(imap, agno); 1280 1281 return (0); 1282 1283 error_out: 1284 IREAD_UNLOCK(ipimap); 1285 1286 if (amp) 1287 release_metapage(amp); 1288 if (bmp) 1289 release_metapage(bmp); 1290 if (cmp) 1291 release_metapage(cmp); 1292 if (dmp) 1293 release_metapage(dmp); 1294 1295 AG_UNLOCK(imap, agno); 1296 1297 release_metapage(mp); 1298 1299 return (rc); 1300 } 1301 1302 /* 1303 * There are several places in the diAlloc* routines where we initialize 1304 * the inode. 1305 */ 1306 static inline void 1307 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) 1308 { 1309 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 1310 1311 ip->i_ino = (iagno << L2INOSPERIAG) + ino; 1312 jfs_ip->ixpxd = iagp->inoext[extno]; 1313 jfs_ip->agstart = le64_to_cpu(iagp->agstart); 1314 jfs_ip->active_ag = -1; 1315 } 1316 1317 1318 /* 1319 * NAME: diAlloc(pip,dir,ip) 1320 * 1321 * FUNCTION: allocate a disk inode from the inode working map 1322 * for a fileset or aggregate. 1323 * 1324 * PARAMETERS: 1325 * pip - pointer to incore inode for the parent inode. 1326 * dir - 'true' if the new disk inode is for a directory. 1327 * ip - pointer to a new inode 1328 * 1329 * RETURN VALUES: 1330 * 0 - success. 1331 * -ENOSPC - insufficient disk resources. 1332 * -EIO - i/o error. 1333 */ 1334 int diAlloc(struct inode *pip, bool dir, struct inode *ip) 1335 { 1336 int rc, ino, iagno, addext, extno, bitno, sword; 1337 int nwords, rem, i, agno; 1338 u32 mask, inosmap, extsmap; 1339 struct inode *ipimap; 1340 struct metapage *mp; 1341 ino_t inum; 1342 struct iag *iagp; 1343 struct inomap *imap; 1344 1345 /* get the pointers to the inode map inode and the 1346 * corresponding imap control structure. 1347 */ 1348 ipimap = JFS_SBI(pip->i_sb)->ipimap; 1349 imap = JFS_IP(ipimap)->i_imap; 1350 JFS_IP(ip)->ipimap = ipimap; 1351 JFS_IP(ip)->fileset = FILESYSTEM_I; 1352 1353 /* for a directory, the allocation policy is to start 1354 * at the ag level using the preferred ag. 1355 */ 1356 if (dir) { 1357 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1358 AG_LOCK(imap, agno); 1359 goto tryag; 1360 } 1361 1362 /* for files, the policy starts off by trying to allocate from 1363 * the same iag containing the parent disk inode: 1364 * try to allocate the new disk inode close to the parent disk 1365 * inode, using parent disk inode number + 1 as the allocation 1366 * hint. (we use a left-to-right policy to attempt to avoid 1367 * moving backward on the disk.) compute the hint within the 1368 * file system and the iag. 1369 */ 1370 1371 /* get the ag number of this iag */ 1372 agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); 1373 1374 if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { 1375 /* 1376 * There is an open file actively growing. We want to 1377 * allocate new inodes from a different ag to avoid 1378 * fragmentation problems. 1379 */ 1380 agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); 1381 AG_LOCK(imap, agno); 1382 goto tryag; 1383 } 1384 1385 inum = pip->i_ino + 1; 1386 ino = inum & (INOSPERIAG - 1); 1387 1388 /* back off the hint if it is outside of the iag */ 1389 if (ino == 0) 1390 inum = pip->i_ino; 1391 1392 /* lock the AG inode map information */ 1393 AG_LOCK(imap, agno); 1394 1395 /* Get read lock on imap inode */ 1396 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 1397 1398 /* get the iag number and read the iag */ 1399 iagno = INOTOIAG(inum); 1400 if ((rc = diIAGRead(imap, iagno, &mp))) { 1401 IREAD_UNLOCK(ipimap); 1402 AG_UNLOCK(imap, agno); 1403 return (rc); 1404 } 1405 iagp = (struct iag *) mp->data; 1406 1407 /* determine if new inode extent is allowed to be added to the iag. 1408 * new inode extent can be added to the iag if the ag 1409 * has less than 32 free disk inodes and the iag has free extents. 1410 */ 1411 addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); 1412 1413 /* 1414 * try to allocate from the IAG 1415 */ 1416 /* check if the inode may be allocated from the iag 1417 * (i.e. the inode has free inodes or new extent can be added). 1418 */ 1419 if (iagp->nfreeinos || addext) { 1420 /* determine the extent number of the hint. 1421 */ 1422 extno = ino >> L2INOSPEREXT; 1423 1424 /* check if the extent containing the hint has backed 1425 * inodes. if so, try to allocate within this extent. 1426 */ 1427 if (addressPXD(&iagp->inoext[extno])) { 1428 bitno = ino & (INOSPEREXT - 1); 1429 if ((bitno = 1430 diFindFree(le32_to_cpu(iagp->wmap[extno]), 1431 bitno)) 1432 < INOSPEREXT) { 1433 ino = (extno << L2INOSPEREXT) + bitno; 1434 1435 /* a free inode (bit) was found within this 1436 * extent, so allocate it. 1437 */ 1438 rc = diAllocBit(imap, iagp, ino); 1439 IREAD_UNLOCK(ipimap); 1440 if (rc) { 1441 assert(rc == -EIO); 1442 } else { 1443 /* set the results of the allocation 1444 * and write the iag. 1445 */ 1446 diInitInode(ip, iagno, ino, extno, 1447 iagp); 1448 mark_metapage_dirty(mp); 1449 } 1450 release_metapage(mp); 1451 1452 /* free the AG lock and return. 1453 */ 1454 AG_UNLOCK(imap, agno); 1455 return (rc); 1456 } 1457 1458 if (!addext) 1459 extno = 1460 (extno == 1461 EXTSPERIAG - 1) ? 0 : extno + 1; 1462 } 1463 1464 /* 1465 * no free inodes within the extent containing the hint. 1466 * 1467 * try to allocate from the backed extents following 1468 * hint or, if appropriate (i.e. addext is true), allocate 1469 * an extent of free inodes at or following the extent 1470 * containing the hint. 1471 * 1472 * the free inode and free extent summary maps are used 1473 * here, so determine the starting summary map position 1474 * and the number of words we'll have to examine. again, 1475 * the approach is to allocate following the hint, so we 1476 * might have to initially ignore prior bits of the summary 1477 * map that represent extents prior to the extent containing 1478 * the hint and later revisit these bits. 1479 */ 1480 bitno = extno & (EXTSPERSUM - 1); 1481 nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1; 1482 sword = extno >> L2EXTSPERSUM; 1483 1484 /* mask any prior bits for the starting words of the 1485 * summary map. 1486 */ 1487 mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno)); 1488 inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; 1489 extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; 1490 1491 /* scan the free inode and free extent summary maps for 1492 * free resources. 1493 */ 1494 for (i = 0; i < nwords; i++) { 1495 /* check if this word of the free inode summary 1496 * map describes an extent with free inodes. 1497 */ 1498 if (~inosmap) { 1499 /* an extent with free inodes has been 1500 * found. determine the extent number 1501 * and the inode number within the extent. 1502 */ 1503 rem = diFindFree(inosmap, 0); 1504 extno = (sword << L2EXTSPERSUM) + rem; 1505 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 1506 0); 1507 if (rem >= INOSPEREXT) { 1508 IREAD_UNLOCK(ipimap); 1509 release_metapage(mp); 1510 AG_UNLOCK(imap, agno); 1511 jfs_error(ip->i_sb, 1512 "can't find free bit in wmap\n"); 1513 return -EIO; 1514 } 1515 1516 /* determine the inode number within the 1517 * iag and allocate the inode from the 1518 * map. 1519 */ 1520 ino = (extno << L2INOSPEREXT) + rem; 1521 rc = diAllocBit(imap, iagp, ino); 1522 IREAD_UNLOCK(ipimap); 1523 if (rc) 1524 assert(rc == -EIO); 1525 else { 1526 /* set the results of the allocation 1527 * and write the iag. 1528 */ 1529 diInitInode(ip, iagno, ino, extno, 1530 iagp); 1531 mark_metapage_dirty(mp); 1532 } 1533 release_metapage(mp); 1534 1535 /* free the AG lock and return. 1536 */ 1537 AG_UNLOCK(imap, agno); 1538 return (rc); 1539 1540 } 1541 1542 /* check if we may allocate an extent of free 1543 * inodes and whether this word of the free 1544 * extents summary map describes a free extent. 1545 */ 1546 if (addext && ~extsmap) { 1547 /* a free extent has been found. determine 1548 * the extent number. 1549 */ 1550 rem = diFindFree(extsmap, 0); 1551 extno = (sword << L2EXTSPERSUM) + rem; 1552 1553 /* allocate an extent of free inodes. 1554 */ 1555 if ((rc = diNewExt(imap, iagp, extno))) { 1556 /* if there is no disk space for a 1557 * new extent, try to allocate the 1558 * disk inode from somewhere else. 1559 */ 1560 if (rc == -ENOSPC) 1561 break; 1562 1563 assert(rc == -EIO); 1564 } else { 1565 /* set the results of the allocation 1566 * and write the iag. 1567 */ 1568 diInitInode(ip, iagno, 1569 extno << L2INOSPEREXT, 1570 extno, iagp); 1571 mark_metapage_dirty(mp); 1572 } 1573 release_metapage(mp); 1574 /* free the imap inode & the AG lock & return. 1575 */ 1576 IREAD_UNLOCK(ipimap); 1577 AG_UNLOCK(imap, agno); 1578 return (rc); 1579 } 1580 1581 /* move on to the next set of summary map words. 1582 */ 1583 sword = (sword == SMAPSZ - 1) ? 0 : sword + 1; 1584 inosmap = le32_to_cpu(iagp->inosmap[sword]); 1585 extsmap = le32_to_cpu(iagp->extsmap[sword]); 1586 } 1587 } 1588 /* unlock imap inode */ 1589 IREAD_UNLOCK(ipimap); 1590 1591 /* nothing doing in this iag, so release it. */ 1592 release_metapage(mp); 1593 1594 tryag: 1595 /* 1596 * try to allocate anywhere within the same AG as the parent inode. 1597 */ 1598 rc = diAllocAG(imap, agno, dir, ip); 1599 1600 AG_UNLOCK(imap, agno); 1601 1602 if (rc != -ENOSPC) 1603 return (rc); 1604 1605 /* 1606 * try to allocate in any AG. 1607 */ 1608 return (diAllocAny(imap, agno, dir, ip)); 1609 } 1610 1611 1612 /* 1613 * NAME: diAllocAG(imap,agno,dir,ip) 1614 * 1615 * FUNCTION: allocate a disk inode from the allocation group. 1616 * 1617 * this routine first determines if a new extent of free 1618 * inodes should be added for the allocation group, with 1619 * the current request satisfied from this extent. if this 1620 * is the case, an attempt will be made to do just that. if 1621 * this attempt fails or it has been determined that a new 1622 * extent should not be added, an attempt is made to satisfy 1623 * the request by allocating an existing (backed) free inode 1624 * from the allocation group. 1625 * 1626 * PRE CONDITION: Already have the AG lock for this AG. 1627 * 1628 * PARAMETERS: 1629 * imap - pointer to inode map control structure. 1630 * agno - allocation group to allocate from. 1631 * dir - 'true' if the new disk inode is for a directory. 1632 * ip - pointer to the new inode to be filled in on successful return 1633 * with the disk inode number allocated, its extent address 1634 * and the start of the ag. 1635 * 1636 * RETURN VALUES: 1637 * 0 - success. 1638 * -ENOSPC - insufficient disk resources. 1639 * -EIO - i/o error. 1640 */ 1641 static int 1642 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) 1643 { 1644 int rc, addext, numfree, numinos; 1645 1646 /* get the number of free and the number of backed disk 1647 * inodes currently within the ag. 1648 */ 1649 numfree = imap->im_agctl[agno].numfree; 1650 numinos = imap->im_agctl[agno].numinos; 1651 1652 if (numfree > numinos) { 1653 jfs_error(ip->i_sb, "numfree > numinos\n"); 1654 return -EIO; 1655 } 1656 1657 /* determine if we should allocate a new extent of free inodes 1658 * within the ag: for directory inodes, add a new extent 1659 * if there are a small number of free inodes or number of free 1660 * inodes is a small percentage of the number of backed inodes. 1661 */ 1662 if (dir) 1663 addext = (numfree < 64 || 1664 (numfree < 256 1665 && ((numfree * 100) / numinos) <= 20)); 1666 else 1667 addext = (numfree == 0); 1668 1669 /* 1670 * try to allocate a new extent of free inodes. 1671 */ 1672 if (addext) { 1673 /* if free space is not available for this new extent, try 1674 * below to allocate a free and existing (already backed) 1675 * inode from the ag. 1676 */ 1677 if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC) 1678 return (rc); 1679 } 1680 1681 /* 1682 * try to allocate an existing free inode from the ag. 1683 */ 1684 return (diAllocIno(imap, agno, ip)); 1685 } 1686 1687 1688 /* 1689 * NAME: diAllocAny(imap,agno,dir,iap) 1690 * 1691 * FUNCTION: allocate a disk inode from any other allocation group. 1692 * 1693 * this routine is called when an allocation attempt within 1694 * the primary allocation group has failed. if attempts to 1695 * allocate an inode from any allocation group other than the 1696 * specified primary group. 1697 * 1698 * PARAMETERS: 1699 * imap - pointer to inode map control structure. 1700 * agno - primary allocation group (to avoid). 1701 * dir - 'true' if the new disk inode is for a directory. 1702 * ip - pointer to a new inode to be filled in on successful return 1703 * with the disk inode number allocated, its extent address 1704 * and the start of the ag. 1705 * 1706 * RETURN VALUES: 1707 * 0 - success. 1708 * -ENOSPC - insufficient disk resources. 1709 * -EIO - i/o error. 1710 */ 1711 static int 1712 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) 1713 { 1714 int ag, rc; 1715 int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; 1716 1717 1718 /* try to allocate from the ags following agno up to 1719 * the maximum ag number. 1720 */ 1721 for (ag = agno + 1; ag <= maxag; ag++) { 1722 AG_LOCK(imap, ag); 1723 1724 rc = diAllocAG(imap, ag, dir, ip); 1725 1726 AG_UNLOCK(imap, ag); 1727 1728 if (rc != -ENOSPC) 1729 return (rc); 1730 } 1731 1732 /* try to allocate from the ags in front of agno. 1733 */ 1734 for (ag = 0; ag < agno; ag++) { 1735 AG_LOCK(imap, ag); 1736 1737 rc = diAllocAG(imap, ag, dir, ip); 1738 1739 AG_UNLOCK(imap, ag); 1740 1741 if (rc != -ENOSPC) 1742 return (rc); 1743 } 1744 1745 /* no free disk inodes. 1746 */ 1747 return -ENOSPC; 1748 } 1749 1750 1751 /* 1752 * NAME: diAllocIno(imap,agno,ip) 1753 * 1754 * FUNCTION: allocate a disk inode from the allocation group's free 1755 * inode list, returning an error if this free list is 1756 * empty (i.e. no iags on the list). 1757 * 1758 * allocation occurs from the first iag on the list using 1759 * the iag's free inode summary map to find the leftmost 1760 * free inode in the iag. 1761 * 1762 * PRE CONDITION: Already have AG lock for this AG. 1763 * 1764 * PARAMETERS: 1765 * imap - pointer to inode map control structure. 1766 * agno - allocation group. 1767 * ip - pointer to new inode to be filled in on successful return 1768 * with the disk inode number allocated, its extent address 1769 * and the start of the ag. 1770 * 1771 * RETURN VALUES: 1772 * 0 - success. 1773 * -ENOSPC - insufficient disk resources. 1774 * -EIO - i/o error. 1775 */ 1776 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) 1777 { 1778 int iagno, ino, rc, rem, extno, sword; 1779 struct metapage *mp; 1780 struct iag *iagp; 1781 1782 /* check if there are iags on the ag's free inode list. 1783 */ 1784 if ((iagno = imap->im_agctl[agno].inofree) < 0) 1785 return -ENOSPC; 1786 1787 /* obtain read lock on imap inode */ 1788 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1789 1790 /* read the iag at the head of the list. 1791 */ 1792 if ((rc = diIAGRead(imap, iagno, &mp))) { 1793 IREAD_UNLOCK(imap->im_ipimap); 1794 return (rc); 1795 } 1796 iagp = (struct iag *) mp->data; 1797 1798 /* better be free inodes in this iag if it is on the 1799 * list. 1800 */ 1801 if (!iagp->nfreeinos) { 1802 IREAD_UNLOCK(imap->im_ipimap); 1803 release_metapage(mp); 1804 jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); 1805 return -EIO; 1806 } 1807 1808 /* scan the free inode summary map to find an extent 1809 * with free inodes. 1810 */ 1811 for (sword = 0;; sword++) { 1812 if (sword >= SMAPSZ) { 1813 IREAD_UNLOCK(imap->im_ipimap); 1814 release_metapage(mp); 1815 jfs_error(ip->i_sb, 1816 "free inode not found in summary map\n"); 1817 return -EIO; 1818 } 1819 1820 if (~iagp->inosmap[sword]) 1821 break; 1822 } 1823 1824 /* found a extent with free inodes. determine 1825 * the extent number. 1826 */ 1827 rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0); 1828 if (rem >= EXTSPERSUM) { 1829 IREAD_UNLOCK(imap->im_ipimap); 1830 release_metapage(mp); 1831 jfs_error(ip->i_sb, "no free extent found\n"); 1832 return -EIO; 1833 } 1834 extno = (sword << L2EXTSPERSUM) + rem; 1835 1836 /* find the first free inode in the extent. 1837 */ 1838 rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0); 1839 if (rem >= INOSPEREXT) { 1840 IREAD_UNLOCK(imap->im_ipimap); 1841 release_metapage(mp); 1842 jfs_error(ip->i_sb, "free inode not found\n"); 1843 return -EIO; 1844 } 1845 1846 /* compute the inode number within the iag. 1847 */ 1848 ino = (extno << L2INOSPEREXT) + rem; 1849 1850 /* allocate the inode. 1851 */ 1852 rc = diAllocBit(imap, iagp, ino); 1853 IREAD_UNLOCK(imap->im_ipimap); 1854 if (rc) { 1855 release_metapage(mp); 1856 return (rc); 1857 } 1858 1859 /* set the results of the allocation and write the iag. 1860 */ 1861 diInitInode(ip, iagno, ino, extno, iagp); 1862 write_metapage(mp); 1863 1864 return (0); 1865 } 1866 1867 1868 /* 1869 * NAME: diAllocExt(imap,agno,ip) 1870 * 1871 * FUNCTION: add a new extent of free inodes to an iag, allocating 1872 * an inode from this extent to satisfy the current allocation 1873 * request. 1874 * 1875 * this routine first tries to find an existing iag with free 1876 * extents through the ag free extent list. if list is not 1877 * empty, the head of the list will be selected as the home 1878 * of the new extent of free inodes. otherwise (the list is 1879 * empty), a new iag will be allocated for the ag to contain 1880 * the extent. 1881 * 1882 * once an iag has been selected, the free extent summary map 1883 * is used to locate a free extent within the iag and diNewExt() 1884 * is called to initialize the extent, with initialization 1885 * including the allocation of the first inode of the extent 1886 * for the purpose of satisfying this request. 1887 * 1888 * PARAMETERS: 1889 * imap - pointer to inode map control structure. 1890 * agno - allocation group number. 1891 * ip - pointer to new inode to be filled in on successful return 1892 * with the disk inode number allocated, its extent address 1893 * and the start of the ag. 1894 * 1895 * RETURN VALUES: 1896 * 0 - success. 1897 * -ENOSPC - insufficient disk resources. 1898 * -EIO - i/o error. 1899 */ 1900 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) 1901 { 1902 int rem, iagno, sword, extno, rc; 1903 struct metapage *mp; 1904 struct iag *iagp; 1905 1906 /* check if the ag has any iags with free extents. if not, 1907 * allocate a new iag for the ag. 1908 */ 1909 if ((iagno = imap->im_agctl[agno].extfree) < 0) { 1910 /* If successful, diNewIAG will obtain the read lock on the 1911 * imap inode. 1912 */ 1913 if ((rc = diNewIAG(imap, &iagno, agno, &mp))) { 1914 return (rc); 1915 } 1916 iagp = (struct iag *) mp->data; 1917 1918 /* set the ag number if this a brand new iag 1919 */ 1920 iagp->agstart = 1921 cpu_to_le64(AGTOBLK(agno, imap->im_ipimap)); 1922 } else { 1923 /* read the iag. 1924 */ 1925 IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); 1926 if ((rc = diIAGRead(imap, iagno, &mp))) { 1927 IREAD_UNLOCK(imap->im_ipimap); 1928 jfs_error(ip->i_sb, "error reading iag\n"); 1929 return rc; 1930 } 1931 iagp = (struct iag *) mp->data; 1932 } 1933 1934 /* using the free extent summary map, find a free extent. 1935 */ 1936 for (sword = 0;; sword++) { 1937 if (sword >= SMAPSZ) { 1938 release_metapage(mp); 1939 IREAD_UNLOCK(imap->im_ipimap); 1940 jfs_error(ip->i_sb, "free ext summary map not found\n"); 1941 return -EIO; 1942 } 1943 if (~iagp->extsmap[sword]) 1944 break; 1945 } 1946 1947 /* determine the extent number of the free extent. 1948 */ 1949 rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0); 1950 if (rem >= EXTSPERSUM) { 1951 release_metapage(mp); 1952 IREAD_UNLOCK(imap->im_ipimap); 1953 jfs_error(ip->i_sb, "free extent not found\n"); 1954 return -EIO; 1955 } 1956 extno = (sword << L2EXTSPERSUM) + rem; 1957 1958 /* initialize the new extent. 1959 */ 1960 rc = diNewExt(imap, iagp, extno); 1961 IREAD_UNLOCK(imap->im_ipimap); 1962 if (rc) { 1963 /* something bad happened. if a new iag was allocated, 1964 * place it back on the inode map's iag free list, and 1965 * clear the ag number information. 1966 */ 1967 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 1968 IAGFREE_LOCK(imap); 1969 iagp->iagfree = cpu_to_le32(imap->im_freeiag); 1970 imap->im_freeiag = iagno; 1971 IAGFREE_UNLOCK(imap); 1972 } 1973 write_metapage(mp); 1974 return (rc); 1975 } 1976 1977 /* set the results of the allocation and write the iag. 1978 */ 1979 diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp); 1980 1981 write_metapage(mp); 1982 1983 return (0); 1984 } 1985 1986 1987 /* 1988 * NAME: diAllocBit(imap,iagp,ino) 1989 * 1990 * FUNCTION: allocate a backed inode from an iag. 1991 * 1992 * this routine performs the mechanics of allocating a 1993 * specified inode from a backed extent. 1994 * 1995 * if the inode to be allocated represents the last free 1996 * inode within the iag, the iag will be removed from the 1997 * ag free inode list. 1998 * 1999 * a careful update approach is used to provide consistency 2000 * in the face of updates to multiple buffers. under this 2001 * approach, all required buffers are obtained before making 2002 * any updates and are held all are updates are complete. 2003 * 2004 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2005 * this AG. Must have read lock on imap inode. 2006 * 2007 * PARAMETERS: 2008 * imap - pointer to inode map control structure. 2009 * iagp - pointer to iag. 2010 * ino - inode number to be allocated within the iag. 2011 * 2012 * RETURN VALUES: 2013 * 0 - success. 2014 * -ENOSPC - insufficient disk resources. 2015 * -EIO - i/o error. 2016 */ 2017 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) 2018 { 2019 int extno, bitno, agno, sword, rc; 2020 struct metapage *amp = NULL, *bmp = NULL; 2021 struct iag *aiagp = NULL, *biagp = NULL; 2022 u32 mask; 2023 2024 /* check if this is the last free inode within the iag. 2025 * if so, it will have to be removed from the ag free 2026 * inode list, so get the iags preceding and following 2027 * it on the list. 2028 */ 2029 if (iagp->nfreeinos == cpu_to_le32(1)) { 2030 if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) { 2031 if ((rc = 2032 diIAGRead(imap, le32_to_cpu(iagp->inofreefwd), 2033 &))) 2034 return (rc); 2035 aiagp = (struct iag *) amp->data; 2036 } 2037 2038 if ((int) le32_to_cpu(iagp->inofreeback) >= 0) { 2039 if ((rc = 2040 diIAGRead(imap, 2041 le32_to_cpu(iagp->inofreeback), 2042 &bmp))) { 2043 if (amp) 2044 release_metapage(amp); 2045 return (rc); 2046 } 2047 biagp = (struct iag *) bmp->data; 2048 } 2049 } 2050 2051 /* get the ag number, extent number, inode number within 2052 * the extent. 2053 */ 2054 agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb)); 2055 extno = ino >> L2INOSPEREXT; 2056 bitno = ino & (INOSPEREXT - 1); 2057 2058 /* compute the mask for setting the map. 2059 */ 2060 mask = HIGHORDER >> bitno; 2061 2062 /* the inode should be free and backed. 2063 */ 2064 if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) || 2065 ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) || 2066 (addressPXD(&iagp->inoext[extno]) == 0)) { 2067 if (amp) 2068 release_metapage(amp); 2069 if (bmp) 2070 release_metapage(bmp); 2071 2072 jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); 2073 return -EIO; 2074 } 2075 2076 /* mark the inode as allocated in the working map. 2077 */ 2078 iagp->wmap[extno] |= cpu_to_le32(mask); 2079 2080 /* check if all inodes within the extent are now 2081 * allocated. if so, update the free inode summary 2082 * map to reflect this. 2083 */ 2084 if (iagp->wmap[extno] == cpu_to_le32(ONES)) { 2085 sword = extno >> L2EXTSPERSUM; 2086 bitno = extno & (EXTSPERSUM - 1); 2087 iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno); 2088 } 2089 2090 /* if this was the last free inode in the iag, remove the 2091 * iag from the ag free inode list. 2092 */ 2093 if (iagp->nfreeinos == cpu_to_le32(1)) { 2094 if (amp) { 2095 aiagp->inofreeback = iagp->inofreeback; 2096 write_metapage(amp); 2097 } 2098 2099 if (bmp) { 2100 biagp->inofreefwd = iagp->inofreefwd; 2101 write_metapage(bmp); 2102 } else { 2103 imap->im_agctl[agno].inofree = 2104 le32_to_cpu(iagp->inofreefwd); 2105 } 2106 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2107 } 2108 2109 /* update the free inode count at the iag, ag, inode 2110 * map levels. 2111 */ 2112 le32_add_cpu(&iagp->nfreeinos, -1); 2113 imap->im_agctl[agno].numfree -= 1; 2114 atomic_dec(&imap->im_numfree); 2115 2116 return (0); 2117 } 2118 2119 2120 /* 2121 * NAME: diNewExt(imap,iagp,extno) 2122 * 2123 * FUNCTION: initialize a new extent of inodes for an iag, allocating 2124 * the first inode of the extent for use for the current 2125 * allocation request. 2126 * 2127 * disk resources are allocated for the new extent of inodes 2128 * and the inodes themselves are initialized to reflect their 2129 * existence within the extent (i.e. their inode numbers and 2130 * inode extent addresses are set) and their initial state 2131 * (mode and link count are set to zero). 2132 * 2133 * if the iag is new, it is not yet on an ag extent free list 2134 * but will now be placed on this list. 2135 * 2136 * if the allocation of the new extent causes the iag to 2137 * have no free extent, the iag will be removed from the 2138 * ag extent free list. 2139 * 2140 * if the iag has no free backed inodes, it will be placed 2141 * on the ag free inode list, since the addition of the new 2142 * extent will now cause it to have free inodes. 2143 * 2144 * a careful update approach is used to provide consistency 2145 * (i.e. list consistency) in the face of updates to multiple 2146 * buffers. under this approach, all required buffers are 2147 * obtained before making any updates and are held until all 2148 * updates are complete. 2149 * 2150 * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on 2151 * this AG. Must have read lock on imap inode. 2152 * 2153 * PARAMETERS: 2154 * imap - pointer to inode map control structure. 2155 * iagp - pointer to iag. 2156 * extno - extent number. 2157 * 2158 * RETURN VALUES: 2159 * 0 - success. 2160 * -ENOSPC - insufficient disk resources. 2161 * -EIO - i/o error. 2162 */ 2163 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) 2164 { 2165 int agno, iagno, fwd, back, freei = 0, sword, rc; 2166 struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL; 2167 struct metapage *amp, *bmp, *cmp, *dmp; 2168 struct inode *ipimap; 2169 s64 blkno, hint; 2170 int i, j; 2171 u32 mask; 2172 ino_t ino; 2173 struct dinode *dp; 2174 struct jfs_sb_info *sbi; 2175 2176 /* better have free extents. 2177 */ 2178 if (!iagp->nfreeexts) { 2179 jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); 2180 return -EIO; 2181 } 2182 2183 /* get the inode map inode. 2184 */ 2185 ipimap = imap->im_ipimap; 2186 sbi = JFS_SBI(ipimap->i_sb); 2187 2188 amp = bmp = cmp = NULL; 2189 2190 /* get the ag and iag numbers for this iag. 2191 */ 2192 agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); 2193 iagno = le32_to_cpu(iagp->iagnum); 2194 2195 /* check if this is the last free extent within the 2196 * iag. if so, the iag must be removed from the ag 2197 * free extent list, so get the iags preceding and 2198 * following the iag on this list. 2199 */ 2200 if (iagp->nfreeexts == cpu_to_le32(1)) { 2201 if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) { 2202 if ((rc = diIAGRead(imap, fwd, &))) 2203 return (rc); 2204 aiagp = (struct iag *) amp->data; 2205 } 2206 2207 if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) { 2208 if ((rc = diIAGRead(imap, back, &bmp))) 2209 goto error_out; 2210 biagp = (struct iag *) bmp->data; 2211 } 2212 } else { 2213 /* the iag has free extents. if all extents are free 2214 * (as is the case for a newly allocated iag), the iag 2215 * must be added to the ag free extent list, so get 2216 * the iag at the head of the list in preparation for 2217 * adding this iag to this list. 2218 */ 2219 fwd = back = -1; 2220 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2221 if ((fwd = imap->im_agctl[agno].extfree) >= 0) { 2222 if ((rc = diIAGRead(imap, fwd, &))) 2223 goto error_out; 2224 aiagp = (struct iag *) amp->data; 2225 } 2226 } 2227 } 2228 2229 /* check if the iag has no free inodes. if so, the iag 2230 * will have to be added to the ag free inode list, so get 2231 * the iag at the head of the list in preparation for 2232 * adding this iag to this list. in doing this, we must 2233 * check if we already have the iag at the head of 2234 * the list in hand. 2235 */ 2236 if (iagp->nfreeinos == 0) { 2237 freei = imap->im_agctl[agno].inofree; 2238 2239 if (freei >= 0) { 2240 if (freei == fwd) { 2241 ciagp = aiagp; 2242 } else if (freei == back) { 2243 ciagp = biagp; 2244 } else { 2245 if ((rc = diIAGRead(imap, freei, &cmp))) 2246 goto error_out; 2247 ciagp = (struct iag *) cmp->data; 2248 } 2249 if (ciagp == NULL) { 2250 jfs_error(imap->im_ipimap->i_sb, 2251 "ciagp == NULL\n"); 2252 rc = -EIO; 2253 goto error_out; 2254 } 2255 } 2256 } 2257 2258 /* allocate disk space for the inode extent. 2259 */ 2260 if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0)) 2261 hint = ((s64) agno << sbi->bmap->db_agl2size) - 1; 2262 else 2263 hint = addressPXD(&iagp->inoext[extno - 1]) + 2264 lengthPXD(&iagp->inoext[extno - 1]) - 1; 2265 2266 if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno))) 2267 goto error_out; 2268 2269 /* compute the inode number of the first inode within the 2270 * extent. 2271 */ 2272 ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT); 2273 2274 /* initialize the inodes within the newly allocated extent a 2275 * page at a time. 2276 */ 2277 for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) { 2278 /* get a buffer for this page of disk inodes. 2279 */ 2280 dmp = get_metapage(ipimap, blkno + i, PSIZE, 1); 2281 if (dmp == NULL) { 2282 rc = -EIO; 2283 goto error_out; 2284 } 2285 dp = (struct dinode *) dmp->data; 2286 2287 /* initialize the inode number, mode, link count and 2288 * inode extent address. 2289 */ 2290 for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) { 2291 dp->di_inostamp = cpu_to_le32(sbi->inostamp); 2292 dp->di_number = cpu_to_le32(ino); 2293 dp->di_fileset = cpu_to_le32(FILESYSTEM_I); 2294 dp->di_mode = 0; 2295 dp->di_nlink = 0; 2296 PXDaddress(&(dp->di_ixpxd), blkno); 2297 PXDlength(&(dp->di_ixpxd), imap->im_nbperiext); 2298 } 2299 write_metapage(dmp); 2300 } 2301 2302 /* if this is the last free extent within the iag, remove the 2303 * iag from the ag free extent list. 2304 */ 2305 if (iagp->nfreeexts == cpu_to_le32(1)) { 2306 if (fwd >= 0) 2307 aiagp->extfreeback = iagp->extfreeback; 2308 2309 if (back >= 0) 2310 biagp->extfreefwd = iagp->extfreefwd; 2311 else 2312 imap->im_agctl[agno].extfree = 2313 le32_to_cpu(iagp->extfreefwd); 2314 2315 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2316 } else { 2317 /* if the iag has all free extents (newly allocated iag), 2318 * add the iag to the ag free extent list. 2319 */ 2320 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2321 if (fwd >= 0) 2322 aiagp->extfreeback = cpu_to_le32(iagno); 2323 2324 iagp->extfreefwd = cpu_to_le32(fwd); 2325 iagp->extfreeback = cpu_to_le32(-1); 2326 imap->im_agctl[agno].extfree = iagno; 2327 } 2328 } 2329 2330 /* if the iag has no free inodes, add the iag to the 2331 * ag free inode list. 2332 */ 2333 if (iagp->nfreeinos == 0) { 2334 if (freei >= 0) 2335 ciagp->inofreeback = cpu_to_le32(iagno); 2336 2337 iagp->inofreefwd = 2338 cpu_to_le32(imap->im_agctl[agno].inofree); 2339 iagp->inofreeback = cpu_to_le32(-1); 2340 imap->im_agctl[agno].inofree = iagno; 2341 } 2342 2343 /* initialize the extent descriptor of the extent. */ 2344 PXDlength(&iagp->inoext[extno], imap->im_nbperiext); 2345 PXDaddress(&iagp->inoext[extno], blkno); 2346 2347 /* initialize the working and persistent map of the extent. 2348 * the working map will be initialized such that 2349 * it indicates the first inode of the extent is allocated. 2350 */ 2351 iagp->wmap[extno] = cpu_to_le32(HIGHORDER); 2352 iagp->pmap[extno] = 0; 2353 2354 /* update the free inode and free extent summary maps 2355 * for the extent to indicate the extent has free inodes 2356 * and no longer represents a free extent. 2357 */ 2358 sword = extno >> L2EXTSPERSUM; 2359 mask = HIGHORDER >> (extno & (EXTSPERSUM - 1)); 2360 iagp->extsmap[sword] |= cpu_to_le32(mask); 2361 iagp->inosmap[sword] &= cpu_to_le32(~mask); 2362 2363 /* update the free inode and free extent counts for the 2364 * iag. 2365 */ 2366 le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); 2367 le32_add_cpu(&iagp->nfreeexts, -1); 2368 2369 /* update the free and backed inode counts for the ag. 2370 */ 2371 imap->im_agctl[agno].numfree += (INOSPEREXT - 1); 2372 imap->im_agctl[agno].numinos += INOSPEREXT; 2373 2374 /* update the free and backed inode counts for the inode map. 2375 */ 2376 atomic_add(INOSPEREXT - 1, &imap->im_numfree); 2377 atomic_add(INOSPEREXT, &imap->im_numinos); 2378 2379 /* write the iags. 2380 */ 2381 if (amp) 2382 write_metapage(amp); 2383 if (bmp) 2384 write_metapage(bmp); 2385 if (cmp) 2386 write_metapage(cmp); 2387 2388 return (0); 2389 2390 error_out: 2391 2392 /* release the iags. 2393 */ 2394 if (amp) 2395 release_metapage(amp); 2396 if (bmp) 2397 release_metapage(bmp); 2398 if (cmp) 2399 release_metapage(cmp); 2400 2401 return (rc); 2402 } 2403 2404 2405 /* 2406 * NAME: diNewIAG(imap,iagnop,agno) 2407 * 2408 * FUNCTION: allocate a new iag for an allocation group. 2409 * 2410 * first tries to allocate the iag from the inode map 2411 * iagfree list: 2412 * if the list has free iags, the head of the list is removed 2413 * and returned to satisfy the request. 2414 * if the inode map's iag free list is empty, the inode map 2415 * is extended to hold a new iag. this new iag is initialized 2416 * and returned to satisfy the request. 2417 * 2418 * PARAMETERS: 2419 * imap - pointer to inode map control structure. 2420 * iagnop - pointer to an iag number set with the number of the 2421 * newly allocated iag upon successful return. 2422 * agno - allocation group number. 2423 * bpp - Buffer pointer to be filled in with new IAG's buffer 2424 * 2425 * RETURN VALUES: 2426 * 0 - success. 2427 * -ENOSPC - insufficient disk resources. 2428 * -EIO - i/o error. 2429 * 2430 * serialization: 2431 * AG lock held on entry/exit; 2432 * write lock on the map is held inside; 2433 * read lock on the map is held on successful completion; 2434 * 2435 * note: new iag transaction: 2436 * . synchronously write iag; 2437 * . write log of xtree and inode of imap; 2438 * . commit; 2439 * . synchronous write of xtree (right to left, bottom to top); 2440 * . at start of logredo(): init in-memory imap with one additional iag page; 2441 * . at end of logredo(): re-read imap inode to determine 2442 * new imap size; 2443 */ 2444 static int 2445 diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) 2446 { 2447 int rc; 2448 int iagno, i, xlen; 2449 struct inode *ipimap; 2450 struct super_block *sb; 2451 struct jfs_sb_info *sbi; 2452 struct metapage *mp; 2453 struct iag *iagp; 2454 s64 xaddr = 0; 2455 s64 blkno; 2456 tid_t tid; 2457 struct inode *iplist[1]; 2458 2459 /* pick up pointers to the inode map and mount inodes */ 2460 ipimap = imap->im_ipimap; 2461 sb = ipimap->i_sb; 2462 sbi = JFS_SBI(sb); 2463 2464 /* acquire the free iag lock */ 2465 IAGFREE_LOCK(imap); 2466 2467 /* if there are any iags on the inode map free iag list, 2468 * allocate the iag from the head of the list. 2469 */ 2470 if (imap->im_freeiag >= 0) { 2471 /* pick up the iag number at the head of the list */ 2472 iagno = imap->im_freeiag; 2473 2474 /* determine the logical block number of the iag */ 2475 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2476 } else { 2477 /* no free iags. the inode map will have to be extented 2478 * to include a new iag. 2479 */ 2480 2481 /* acquire inode map lock */ 2482 IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); 2483 2484 if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { 2485 IWRITE_UNLOCK(ipimap); 2486 IAGFREE_UNLOCK(imap); 2487 jfs_error(imap->im_ipimap->i_sb, 2488 "ipimap->i_size is wrong\n"); 2489 return -EIO; 2490 } 2491 2492 2493 /* get the next available iag number */ 2494 iagno = imap->im_nextiag; 2495 2496 /* make sure that we have not exceeded the maximum inode 2497 * number limit. 2498 */ 2499 if (iagno > (MAXIAGS - 1)) { 2500 /* release the inode map lock */ 2501 IWRITE_UNLOCK(ipimap); 2502 2503 rc = -ENOSPC; 2504 goto out; 2505 } 2506 2507 /* 2508 * synchronously append new iag page. 2509 */ 2510 /* determine the logical address of iag page to append */ 2511 blkno = IAGTOLBLK(iagno, sbi->l2nbperpage); 2512 2513 /* Allocate extent for new iag page */ 2514 xlen = sbi->nbperpage; 2515 if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) { 2516 /* release the inode map lock */ 2517 IWRITE_UNLOCK(ipimap); 2518 2519 goto out; 2520 } 2521 2522 /* 2523 * start transaction of update of the inode map 2524 * addressing structure pointing to the new iag page; 2525 */ 2526 tid = txBegin(sb, COMMIT_FORCE); 2527 mutex_lock(&JFS_IP(ipimap)->commit_mutex); 2528 2529 /* update the inode map addressing structure to point to it */ 2530 if ((rc = 2531 xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { 2532 txEnd(tid); 2533 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2534 /* Free the blocks allocated for the iag since it was 2535 * not successfully added to the inode map 2536 */ 2537 dbFree(ipimap, xaddr, (s64) xlen); 2538 2539 /* release the inode map lock */ 2540 IWRITE_UNLOCK(ipimap); 2541 2542 goto out; 2543 } 2544 2545 /* update the inode map's inode to reflect the extension */ 2546 ipimap->i_size += PSIZE; 2547 inode_add_bytes(ipimap, PSIZE); 2548 2549 /* assign a buffer for the page */ 2550 mp = get_metapage(ipimap, blkno, PSIZE, 0); 2551 if (!mp) { 2552 /* 2553 * This is very unlikely since we just created the 2554 * extent, but let's try to handle it correctly 2555 */ 2556 xtTruncate(tid, ipimap, ipimap->i_size - PSIZE, 2557 COMMIT_PWMAP); 2558 2559 txAbort(tid, 0); 2560 txEnd(tid); 2561 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2562 2563 /* release the inode map lock */ 2564 IWRITE_UNLOCK(ipimap); 2565 2566 rc = -EIO; 2567 goto out; 2568 } 2569 iagp = (struct iag *) mp->data; 2570 2571 /* init the iag */ 2572 memset(iagp, 0, sizeof(struct iag)); 2573 iagp->iagnum = cpu_to_le32(iagno); 2574 iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); 2575 iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1); 2576 iagp->iagfree = cpu_to_le32(-1); 2577 iagp->nfreeinos = 0; 2578 iagp->nfreeexts = cpu_to_le32(EXTSPERIAG); 2579 2580 /* initialize the free inode summary map (free extent 2581 * summary map initialization handled by bzero). 2582 */ 2583 for (i = 0; i < SMAPSZ; i++) 2584 iagp->inosmap[i] = cpu_to_le32(ONES); 2585 2586 /* 2587 * Write and sync the metapage 2588 */ 2589 flush_metapage(mp); 2590 2591 /* 2592 * txCommit(COMMIT_FORCE) will synchronously write address 2593 * index pages and inode after commit in careful update order 2594 * of address index pages (right to left, bottom up); 2595 */ 2596 iplist[0] = ipimap; 2597 rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); 2598 2599 txEnd(tid); 2600 mutex_unlock(&JFS_IP(ipimap)->commit_mutex); 2601 2602 duplicateIXtree(sb, blkno, xlen, &xaddr); 2603 2604 /* update the next available iag number */ 2605 imap->im_nextiag += 1; 2606 2607 /* Add the iag to the iag free list so we don't lose the iag 2608 * if a failure happens now. 2609 */ 2610 imap->im_freeiag = iagno; 2611 2612 /* Until we have logredo working, we want the imap inode & 2613 * control page to be up to date. 2614 */ 2615 diSync(ipimap); 2616 2617 /* release the inode map lock */ 2618 IWRITE_UNLOCK(ipimap); 2619 } 2620 2621 /* obtain read lock on map */ 2622 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2623 2624 /* read the iag */ 2625 if ((rc = diIAGRead(imap, iagno, &mp))) { 2626 IREAD_UNLOCK(ipimap); 2627 rc = -EIO; 2628 goto out; 2629 } 2630 iagp = (struct iag *) mp->data; 2631 2632 /* remove the iag from the iag free list */ 2633 imap->im_freeiag = le32_to_cpu(iagp->iagfree); 2634 iagp->iagfree = cpu_to_le32(-1); 2635 2636 /* set the return iag number and buffer pointer */ 2637 *iagnop = iagno; 2638 *mpp = mp; 2639 2640 out: 2641 /* release the iag free lock */ 2642 IAGFREE_UNLOCK(imap); 2643 2644 return (rc); 2645 } 2646 2647 /* 2648 * NAME: diIAGRead() 2649 * 2650 * FUNCTION: get the buffer for the specified iag within a fileset 2651 * or aggregate inode map. 2652 * 2653 * PARAMETERS: 2654 * imap - pointer to inode map control structure. 2655 * iagno - iag number. 2656 * bpp - point to buffer pointer to be filled in on successful 2657 * exit. 2658 * 2659 * SERIALIZATION: 2660 * must have read lock on imap inode 2661 * (When called by diExtendFS, the filesystem is quiesced, therefore 2662 * the read lock is unnecessary.) 2663 * 2664 * RETURN VALUES: 2665 * 0 - success. 2666 * -EIO - i/o error. 2667 */ 2668 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) 2669 { 2670 struct inode *ipimap = imap->im_ipimap; 2671 s64 blkno; 2672 2673 /* compute the logical block number of the iag. */ 2674 blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage); 2675 2676 /* read the iag. */ 2677 *mpp = read_metapage(ipimap, blkno, PSIZE, 0); 2678 if (*mpp == NULL) { 2679 return -EIO; 2680 } 2681 2682 return (0); 2683 } 2684 2685 /* 2686 * NAME: diFindFree() 2687 * 2688 * FUNCTION: find the first free bit in a word starting at 2689 * the specified bit position. 2690 * 2691 * PARAMETERS: 2692 * word - word to be examined. 2693 * start - starting bit position. 2694 * 2695 * RETURN VALUES: 2696 * bit position of first free bit in the word or 32 if 2697 * no free bits were found. 2698 */ 2699 static int diFindFree(u32 word, int start) 2700 { 2701 int bitno; 2702 assert(start < 32); 2703 /* scan the word for the first free bit. */ 2704 for (word <<= start, bitno = start; bitno < 32; 2705 bitno++, word <<= 1) { 2706 if ((word & HIGHORDER) == 0) 2707 break; 2708 } 2709 return (bitno); 2710 } 2711 2712 /* 2713 * NAME: diUpdatePMap() 2714 * 2715 * FUNCTION: Update the persistent map in an IAG for the allocation or 2716 * freeing of the specified inode. 2717 * 2718 * PRE CONDITIONS: Working map has already been updated for allocate. 2719 * 2720 * PARAMETERS: 2721 * ipimap - Incore inode map inode 2722 * inum - Number of inode to mark in permanent map 2723 * is_free - If 'true' indicates inode should be marked freed, otherwise 2724 * indicates inode should be marked allocated. 2725 * 2726 * RETURN VALUES: 2727 * 0 for success 2728 */ 2729 int 2730 diUpdatePMap(struct inode *ipimap, 2731 unsigned long inum, bool is_free, struct tblock * tblk) 2732 { 2733 int rc; 2734 struct iag *iagp; 2735 struct metapage *mp; 2736 int iagno, ino, extno, bitno; 2737 struct inomap *imap; 2738 u32 mask; 2739 struct jfs_log *log; 2740 int lsn, difft, diffp; 2741 unsigned long flags; 2742 2743 imap = JFS_IP(ipimap)->i_imap; 2744 /* get the iag number containing the inode */ 2745 iagno = INOTOIAG(inum); 2746 /* make sure that the iag is contained within the map */ 2747 if (iagno >= imap->im_nextiag) { 2748 jfs_error(ipimap->i_sb, "the iag is outside the map\n"); 2749 return -EIO; 2750 } 2751 /* read the iag */ 2752 IREAD_LOCK(ipimap, RDWRLOCK_IMAP); 2753 rc = diIAGRead(imap, iagno, &mp); 2754 IREAD_UNLOCK(ipimap); 2755 if (rc) 2756 return (rc); 2757 metapage_wait_for_io(mp); 2758 iagp = (struct iag *) mp->data; 2759 /* get the inode number and extent number of the inode within 2760 * the iag and the inode number within the extent. 2761 */ 2762 ino = inum & (INOSPERIAG - 1); 2763 extno = ino >> L2INOSPEREXT; 2764 bitno = ino & (INOSPEREXT - 1); 2765 mask = HIGHORDER >> bitno; 2766 /* 2767 * mark the inode free in persistent map: 2768 */ 2769 if (is_free) { 2770 /* The inode should have been allocated both in working 2771 * map and in persistent map; 2772 * the inode will be freed from working map at the release 2773 * of last reference release; 2774 */ 2775 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2776 jfs_error(ipimap->i_sb, 2777 "inode %ld not marked as allocated in wmap!\n", 2778 inum); 2779 } 2780 if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { 2781 jfs_error(ipimap->i_sb, 2782 "inode %ld not marked as allocated in pmap!\n", 2783 inum); 2784 } 2785 /* update the bitmap for the extent of the freed inode */ 2786 iagp->pmap[extno] &= cpu_to_le32(~mask); 2787 } 2788 /* 2789 * mark the inode allocated in persistent map: 2790 */ 2791 else { 2792 /* The inode should be already allocated in the working map 2793 * and should be free in persistent map; 2794 */ 2795 if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { 2796 release_metapage(mp); 2797 jfs_error(ipimap->i_sb, 2798 "the inode is not allocated in the working map\n"); 2799 return -EIO; 2800 } 2801 if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { 2802 release_metapage(mp); 2803 jfs_error(ipimap->i_sb, 2804 "the inode is not free in the persistent map\n"); 2805 return -EIO; 2806 } 2807 /* update the bitmap for the extent of the allocated inode */ 2808 iagp->pmap[extno] |= cpu_to_le32(mask); 2809 } 2810 /* 2811 * update iag lsn 2812 */ 2813 lsn = tblk->lsn; 2814 log = JFS_SBI(tblk->sb)->log; 2815 LOGSYNC_LOCK(log, flags); 2816 if (mp->lsn != 0) { 2817 /* inherit older/smaller lsn */ 2818 logdiff(difft, lsn, log); 2819 logdiff(diffp, mp->lsn, log); 2820 if (difft < diffp) { 2821 mp->lsn = lsn; 2822 /* move mp after tblock in logsync list */ 2823 list_move(&mp->synclist, &tblk->synclist); 2824 } 2825 /* inherit younger/larger clsn */ 2826 assert(mp->clsn); 2827 logdiff(difft, tblk->clsn, log); 2828 logdiff(diffp, mp->clsn, log); 2829 if (difft > diffp) 2830 mp->clsn = tblk->clsn; 2831 } else { 2832 mp->log = log; 2833 mp->lsn = lsn; 2834 /* insert mp after tblock in logsync list */ 2835 log->count++; 2836 list_add(&mp->synclist, &tblk->synclist); 2837 mp->clsn = tblk->clsn; 2838 } 2839 LOGSYNC_UNLOCK(log, flags); 2840 write_metapage(mp); 2841 return (0); 2842 } 2843 2844 /* 2845 * diExtendFS() 2846 * 2847 * function: update imap for extendfs(); 2848 * 2849 * note: AG size has been increased s.t. each k old contiguous AGs are 2850 * coalesced into a new AG; 2851 */ 2852 int diExtendFS(struct inode *ipimap, struct inode *ipbmap) 2853 { 2854 int rc, rcx = 0; 2855 struct inomap *imap = JFS_IP(ipimap)->i_imap; 2856 struct iag *iagp = NULL, *hiagp = NULL; 2857 struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap; 2858 struct metapage *bp, *hbp; 2859 int i, n, head; 2860 int numinos, xnuminos = 0, xnumfree = 0; 2861 s64 agstart; 2862 2863 jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d", 2864 imap->im_nextiag, atomic_read(&imap->im_numinos), 2865 atomic_read(&imap->im_numfree)); 2866 2867 /* 2868 * reconstruct imap 2869 * 2870 * coalesce contiguous k (newAGSize/oldAGSize) AGs; 2871 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 2872 * note: new AG size = old AG size * (2**x). 2873 */ 2874 2875 /* init per AG control information im_agctl[] */ 2876 for (i = 0; i < MAXAG; i++) { 2877 imap->im_agctl[i].inofree = -1; 2878 imap->im_agctl[i].extfree = -1; 2879 imap->im_agctl[i].numinos = 0; /* number of backed inodes */ 2880 imap->im_agctl[i].numfree = 0; /* number of free backed inodes */ 2881 } 2882 2883 /* 2884 * process each iag page of the map. 2885 * 2886 * rebuild AG Free Inode List, AG Free Inode Extent List; 2887 */ 2888 for (i = 0; i < imap->im_nextiag; i++) { 2889 if ((rc = diIAGRead(imap, i, &bp))) { 2890 rcx = rc; 2891 continue; 2892 } 2893 iagp = (struct iag *) bp->data; 2894 if (le32_to_cpu(iagp->iagnum) != i) { 2895 release_metapage(bp); 2896 jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); 2897 return -EIO; 2898 } 2899 2900 /* leave free iag in the free iag list */ 2901 if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { 2902 release_metapage(bp); 2903 continue; 2904 } 2905 2906 agstart = le64_to_cpu(iagp->agstart); 2907 n = agstart >> mp->db_agl2size; 2908 iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); 2909 2910 /* compute backed inodes */ 2911 numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) 2912 << L2INOSPEREXT; 2913 if (numinos > 0) { 2914 /* merge AG backed inodes */ 2915 imap->im_agctl[n].numinos += numinos; 2916 xnuminos += numinos; 2917 } 2918 2919 /* if any backed free inodes, insert at AG free inode list */ 2920 if ((int) le32_to_cpu(iagp->nfreeinos) > 0) { 2921 if ((head = imap->im_agctl[n].inofree) == -1) { 2922 iagp->inofreefwd = cpu_to_le32(-1); 2923 iagp->inofreeback = cpu_to_le32(-1); 2924 } else { 2925 if ((rc = diIAGRead(imap, head, &hbp))) { 2926 rcx = rc; 2927 goto nextiag; 2928 } 2929 hiagp = (struct iag *) hbp->data; 2930 hiagp->inofreeback = iagp->iagnum; 2931 iagp->inofreefwd = cpu_to_le32(head); 2932 iagp->inofreeback = cpu_to_le32(-1); 2933 write_metapage(hbp); 2934 } 2935 2936 imap->im_agctl[n].inofree = 2937 le32_to_cpu(iagp->iagnum); 2938 2939 /* merge AG backed free inodes */ 2940 imap->im_agctl[n].numfree += 2941 le32_to_cpu(iagp->nfreeinos); 2942 xnumfree += le32_to_cpu(iagp->nfreeinos); 2943 } 2944 2945 /* if any free extents, insert at AG free extent list */ 2946 if (le32_to_cpu(iagp->nfreeexts) > 0) { 2947 if ((head = imap->im_agctl[n].extfree) == -1) { 2948 iagp->extfreefwd = cpu_to_le32(-1); 2949 iagp->extfreeback = cpu_to_le32(-1); 2950 } else { 2951 if ((rc = diIAGRead(imap, head, &hbp))) { 2952 rcx = rc; 2953 goto nextiag; 2954 } 2955 hiagp = (struct iag *) hbp->data; 2956 hiagp->extfreeback = iagp->iagnum; 2957 iagp->extfreefwd = cpu_to_le32(head); 2958 iagp->extfreeback = cpu_to_le32(-1); 2959 write_metapage(hbp); 2960 } 2961 2962 imap->im_agctl[n].extfree = 2963 le32_to_cpu(iagp->iagnum); 2964 } 2965 2966 nextiag: 2967 write_metapage(bp); 2968 } 2969 2970 if (xnuminos != atomic_read(&imap->im_numinos) || 2971 xnumfree != atomic_read(&imap->im_numfree)) { 2972 jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); 2973 return -EIO; 2974 } 2975 2976 return rcx; 2977 } 2978 2979 2980 /* 2981 * duplicateIXtree() 2982 * 2983 * serialization: IWRITE_LOCK held on entry/exit 2984 * 2985 * note: shadow page with regular inode (rel.2); 2986 */ 2987 static void duplicateIXtree(struct super_block *sb, s64 blkno, 2988 int xlen, s64 *xaddr) 2989 { 2990 struct jfs_superblock *j_sb; 2991 struct buffer_head *bh; 2992 struct inode *ip; 2993 tid_t tid; 2994 2995 /* if AIT2 ipmap2 is bad, do not try to update it */ 2996 if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */ 2997 return; 2998 ip = diReadSpecial(sb, FILESYSTEM_I, 1); 2999 if (ip == NULL) { 3000 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3001 if (readSuper(sb, &bh)) 3002 return; 3003 j_sb = (struct jfs_superblock *)bh->b_data; 3004 j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT); 3005 3006 mark_buffer_dirty(bh); 3007 sync_dirty_buffer(bh); 3008 brelse(bh); 3009 return; 3010 } 3011 3012 /* start transaction */ 3013 tid = txBegin(sb, COMMIT_FORCE); 3014 /* update the inode map addressing structure to point to it */ 3015 if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) { 3016 JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT; 3017 txAbort(tid, 1); 3018 goto cleanup; 3019 3020 } 3021 /* update the inode map's inode to reflect the extension */ 3022 ip->i_size += PSIZE; 3023 inode_add_bytes(ip, PSIZE); 3024 txCommit(tid, 1, &ip, COMMIT_FORCE); 3025 cleanup: 3026 txEnd(tid); 3027 diFreeSpecial(ip); 3028 } 3029 3030 /* 3031 * NAME: copy_from_dinode() 3032 * 3033 * FUNCTION: Copies inode info from disk inode to in-memory inode 3034 * 3035 * RETURN VALUES: 3036 * 0 - success 3037 * -ENOMEM - insufficient memory 3038 */ 3039 static int copy_from_dinode(struct dinode * dip, struct inode *ip) 3040 { 3041 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3042 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3043 3044 jfs_ip->fileset = le32_to_cpu(dip->di_fileset); 3045 jfs_ip->mode2 = le32_to_cpu(dip->di_mode); 3046 jfs_set_inode_flags(ip); 3047 3048 ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; 3049 if (sbi->umask != -1) { 3050 ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); 3051 /* For directories, add x permission if r is allowed by umask */ 3052 if (S_ISDIR(ip->i_mode)) { 3053 if (ip->i_mode & 0400) 3054 ip->i_mode |= 0100; 3055 if (ip->i_mode & 0040) 3056 ip->i_mode |= 0010; 3057 if (ip->i_mode & 0004) 3058 ip->i_mode |= 0001; 3059 } 3060 } 3061 set_nlink(ip, le32_to_cpu(dip->di_nlink)); 3062 3063 jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); 3064 if (!uid_valid(sbi->uid)) 3065 ip->i_uid = jfs_ip->saved_uid; 3066 else { 3067 ip->i_uid = sbi->uid; 3068 } 3069 3070 jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); 3071 if (!gid_valid(sbi->gid)) 3072 ip->i_gid = jfs_ip->saved_gid; 3073 else { 3074 ip->i_gid = sbi->gid; 3075 } 3076 3077 ip->i_size = le64_to_cpu(dip->di_size); 3078 ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); 3079 ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); 3080 ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec); 3081 ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); 3082 ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); 3083 ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); 3084 ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); 3085 ip->i_generation = le32_to_cpu(dip->di_gen); 3086 3087 jfs_ip->ixpxd = dip->di_ixpxd; /* in-memory pxd's are little-endian */ 3088 jfs_ip->acl = dip->di_acl; /* as are dxd's */ 3089 jfs_ip->ea = dip->di_ea; 3090 jfs_ip->next_index = le32_to_cpu(dip->di_next_index); 3091 jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec); 3092 jfs_ip->acltype = le32_to_cpu(dip->di_acltype); 3093 3094 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) { 3095 jfs_ip->dev = le32_to_cpu(dip->di_rdev); 3096 ip->i_rdev = new_decode_dev(jfs_ip->dev); 3097 } 3098 3099 if (S_ISDIR(ip->i_mode)) { 3100 memcpy(&jfs_ip->i_dirtable, &dip->di_dirtable, 384); 3101 } else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) { 3102 memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288); 3103 } else 3104 memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128); 3105 3106 /* Zero the in-memory-only stuff */ 3107 jfs_ip->cflag = 0; 3108 jfs_ip->btindex = 0; 3109 jfs_ip->btorder = 0; 3110 jfs_ip->bxflag = 0; 3111 jfs_ip->blid = 0; 3112 jfs_ip->atlhead = 0; 3113 jfs_ip->atltail = 0; 3114 jfs_ip->xtlid = 0; 3115 return (0); 3116 } 3117 3118 /* 3119 * NAME: copy_to_dinode() 3120 * 3121 * FUNCTION: Copies inode info from in-memory inode to disk inode 3122 */ 3123 static void copy_to_dinode(struct dinode * dip, struct inode *ip) 3124 { 3125 struct jfs_inode_info *jfs_ip = JFS_IP(ip); 3126 struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 3127 3128 dip->di_fileset = cpu_to_le32(jfs_ip->fileset); 3129 dip->di_inostamp = cpu_to_le32(sbi->inostamp); 3130 dip->di_number = cpu_to_le32(ip->i_ino); 3131 dip->di_gen = cpu_to_le32(ip->i_generation); 3132 dip->di_size = cpu_to_le64(ip->i_size); 3133 dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); 3134 dip->di_nlink = cpu_to_le32(ip->i_nlink); 3135 if (!uid_valid(sbi->uid)) 3136 dip->di_uid = cpu_to_le32(i_uid_read(ip)); 3137 else 3138 dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, 3139 jfs_ip->saved_uid)); 3140 if (!gid_valid(sbi->gid)) 3141 dip->di_gid = cpu_to_le32(i_gid_read(ip)); 3142 else 3143 dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, 3144 jfs_ip->saved_gid)); 3145 /* 3146 * mode2 is only needed for storing the higher order bits. 3147 * Trust i_mode for the lower order ones 3148 */ 3149 if (sbi->umask == -1) 3150 dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | 3151 ip->i_mode); 3152 else /* Leave the original permissions alone */ 3153 dip->di_mode = cpu_to_le32(jfs_ip->mode2); 3154 3155 dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); 3156 dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); 3157 dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); 3158 dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec); 3159 dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec); 3160 dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec); 3161 dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */ 3162 dip->di_acl = jfs_ip->acl; /* as are dxd's */ 3163 dip->di_ea = jfs_ip->ea; 3164 dip->di_next_index = cpu_to_le32(jfs_ip->next_index); 3165 dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime); 3166 dip->di_otime.tv_nsec = 0; 3167 dip->di_acltype = cpu_to_le32(jfs_ip->acltype); 3168 if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) 3169 dip->di_rdev = cpu_to_le32(jfs_ip->dev); 3170 } 3171