1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_dir2.h" 28 #include "xfs_dmapi.h" 29 #include "xfs_mount.h" 30 #include "xfs_bmap_btree.h" 31 #include "xfs_alloc_btree.h" 32 #include "xfs_ialloc_btree.h" 33 #include "xfs_dir2_sf.h" 34 #include "xfs_attr_sf.h" 35 #include "xfs_dinode.h" 36 #include "xfs_inode.h" 37 #include "xfs_btree.h" 38 #include "xfs_ialloc.h" 39 #include "xfs_alloc.h" 40 #include "xfs_rtalloc.h" 41 #include "xfs_bmap.h" 42 #include "xfs_error.h" 43 #include "xfs_rw.h" 44 #include "xfs_quota.h" 45 #include "xfs_fsops.h" 46 47 STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 48 STATIC int xfs_uuid_mount(xfs_mount_t *); 49 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 50 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 51 52 53 #ifdef HAVE_PERCPU_SB 54 STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55 STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56 int, int); 57 STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58 STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 59 int64_t, int); 60 STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 61 62 #else 63 64 #define xfs_icsb_destroy_counters(mp) do { } while (0) 65 #define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 66 #define xfs_icsb_sync_counters(mp) do { } while (0) 67 #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 68 69 #endif 70 71 static const struct { 72 short offset; 73 short type; /* 0 = integer 74 * 1 = binary / string (no translation) 75 */ 76 } xfs_sb_info[] = { 77 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 78 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 79 { offsetof(xfs_sb_t, sb_dblocks), 0 }, 80 { offsetof(xfs_sb_t, sb_rblocks), 0 }, 81 { offsetof(xfs_sb_t, sb_rextents), 0 }, 82 { offsetof(xfs_sb_t, sb_uuid), 1 }, 83 { offsetof(xfs_sb_t, sb_logstart), 0 }, 84 { offsetof(xfs_sb_t, sb_rootino), 0 }, 85 { offsetof(xfs_sb_t, sb_rbmino), 0 }, 86 { offsetof(xfs_sb_t, sb_rsumino), 0 }, 87 { offsetof(xfs_sb_t, sb_rextsize), 0 }, 88 { offsetof(xfs_sb_t, sb_agblocks), 0 }, 89 { offsetof(xfs_sb_t, sb_agcount), 0 }, 90 { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 91 { offsetof(xfs_sb_t, sb_logblocks), 0 }, 92 { offsetof(xfs_sb_t, sb_versionnum), 0 }, 93 { offsetof(xfs_sb_t, sb_sectsize), 0 }, 94 { offsetof(xfs_sb_t, sb_inodesize), 0 }, 95 { offsetof(xfs_sb_t, sb_inopblock), 0 }, 96 { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 97 { offsetof(xfs_sb_t, sb_blocklog), 0 }, 98 { offsetof(xfs_sb_t, sb_sectlog), 0 }, 99 { offsetof(xfs_sb_t, sb_inodelog), 0 }, 100 { offsetof(xfs_sb_t, sb_inopblog), 0 }, 101 { offsetof(xfs_sb_t, sb_agblklog), 0 }, 102 { offsetof(xfs_sb_t, sb_rextslog), 0 }, 103 { offsetof(xfs_sb_t, sb_inprogress), 0 }, 104 { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 105 { offsetof(xfs_sb_t, sb_icount), 0 }, 106 { offsetof(xfs_sb_t, sb_ifree), 0 }, 107 { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 108 { offsetof(xfs_sb_t, sb_frextents), 0 }, 109 { offsetof(xfs_sb_t, sb_uquotino), 0 }, 110 { offsetof(xfs_sb_t, sb_gquotino), 0 }, 111 { offsetof(xfs_sb_t, sb_qflags), 0 }, 112 { offsetof(xfs_sb_t, sb_flags), 0 }, 113 { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 114 { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 115 { offsetof(xfs_sb_t, sb_unit), 0 }, 116 { offsetof(xfs_sb_t, sb_width), 0 }, 117 { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 118 { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 119 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 120 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 121 { offsetof(xfs_sb_t, sb_features2), 0 }, 122 { sizeof(xfs_sb_t), 0 } 123 }; 124 125 /* 126 * Return a pointer to an initialized xfs_mount structure. 127 */ 128 xfs_mount_t * 129 xfs_mount_init(void) 130 { 131 xfs_mount_t *mp; 132 133 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 134 135 if (xfs_icsb_init_counters(mp)) { 136 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 137 } 138 139 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 140 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 141 mutex_init(&mp->m_ilock); 142 initnsema(&mp->m_growlock, 1, "xfs_grow"); 143 /* 144 * Initialize the AIL. 145 */ 146 xfs_trans_ail_init(mp); 147 148 atomic_set(&mp->m_active_trans, 0); 149 150 return mp; 151 } 152 153 /* 154 * Free up the resources associated with a mount structure. Assume that 155 * the structure was initially zeroed, so we can tell which fields got 156 * initialized. 157 */ 158 void 159 xfs_mount_free( 160 xfs_mount_t *mp, 161 int remove_bhv) 162 { 163 if (mp->m_ihash) 164 xfs_ihash_free(mp); 165 if (mp->m_chash) 166 xfs_chash_free(mp); 167 168 if (mp->m_perag) { 169 int agno; 170 171 for (agno = 0; agno < mp->m_maxagi; agno++) 172 if (mp->m_perag[agno].pagb_list) 173 kmem_free(mp->m_perag[agno].pagb_list, 174 sizeof(xfs_perag_busy_t) * 175 XFS_PAGB_NUM_SLOTS); 176 kmem_free(mp->m_perag, 177 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 178 } 179 180 AIL_LOCK_DESTROY(&mp->m_ail_lock); 181 spinlock_destroy(&mp->m_sb_lock); 182 mutex_destroy(&mp->m_ilock); 183 freesema(&mp->m_growlock); 184 if (mp->m_quotainfo) 185 XFS_QM_DONE(mp); 186 187 if (mp->m_fsname != NULL) 188 kmem_free(mp->m_fsname, mp->m_fsname_len); 189 if (mp->m_rtname != NULL) 190 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 191 if (mp->m_logname != NULL) 192 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 193 194 if (remove_bhv) { 195 struct bhv_vfs *vfsp = XFS_MTOVFS(mp); 196 197 bhv_remove_all_vfsops(vfsp, 0); 198 VFS_REMOVEBHV(vfsp, &mp->m_bhv); 199 } 200 201 xfs_icsb_destroy_counters(mp); 202 kmem_free(mp, sizeof(xfs_mount_t)); 203 } 204 205 206 /* 207 * Check the validity of the SB found. 208 */ 209 STATIC int 210 xfs_mount_validate_sb( 211 xfs_mount_t *mp, 212 xfs_sb_t *sbp, 213 int flags) 214 { 215 /* 216 * If the log device and data device have the 217 * same device number, the log is internal. 218 * Consequently, the sb_logstart should be non-zero. If 219 * we have a zero sb_logstart in this case, we may be trying to mount 220 * a volume filesystem in a non-volume manner. 221 */ 222 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 223 xfs_fs_mount_cmn_err(flags, "bad magic number"); 224 return XFS_ERROR(EWRONGFS); 225 } 226 227 if (!XFS_SB_GOOD_VERSION(sbp)) { 228 xfs_fs_mount_cmn_err(flags, "bad version"); 229 return XFS_ERROR(EWRONGFS); 230 } 231 232 if (unlikely( 233 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 234 xfs_fs_mount_cmn_err(flags, 235 "filesystem is marked as having an external log; " 236 "specify logdev on the\nmount command line."); 237 return XFS_ERROR(EINVAL); 238 } 239 240 if (unlikely( 241 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 242 xfs_fs_mount_cmn_err(flags, 243 "filesystem is marked as having an internal log; " 244 "do not specify logdev on\nthe mount command line."); 245 return XFS_ERROR(EINVAL); 246 } 247 248 /* 249 * More sanity checking. These were stolen directly from 250 * xfs_repair. 251 */ 252 if (unlikely( 253 sbp->sb_agcount <= 0 || 254 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 255 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 256 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 257 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 258 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 259 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 260 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 261 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 262 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 263 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 264 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 265 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 266 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 267 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 268 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 269 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 270 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 271 return XFS_ERROR(EFSCORRUPTED); 272 } 273 274 /* 275 * Sanity check AG count, size fields against data size field 276 */ 277 if (unlikely( 278 sbp->sb_dblocks == 0 || 279 sbp->sb_dblocks > 280 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 281 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 282 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 283 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 284 return XFS_ERROR(EFSCORRUPTED); 285 } 286 287 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 288 ASSERT(sbp->sb_blocklog >= BBSHIFT); 289 290 #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 291 if (unlikely( 292 (sbp->sb_dblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX || 293 (sbp->sb_rblocks >> (PAGE_SHIFT - sbp->sb_blocklog)) > ULONG_MAX)) { 294 #else /* Limited by UINT_MAX of sectors */ 295 if (unlikely( 296 (sbp->sb_dblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX || 297 (sbp->sb_rblocks << (sbp->sb_blocklog - BBSHIFT)) > UINT_MAX)) { 298 #endif 299 xfs_fs_mount_cmn_err(flags, 300 "file system too large to be mounted on this system."); 301 return XFS_ERROR(E2BIG); 302 } 303 304 if (unlikely(sbp->sb_inprogress)) { 305 xfs_fs_mount_cmn_err(flags, "file system busy"); 306 return XFS_ERROR(EFSCORRUPTED); 307 } 308 309 /* 310 * Version 1 directory format has never worked on Linux. 311 */ 312 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 313 xfs_fs_mount_cmn_err(flags, 314 "file system using version 1 directory format"); 315 return XFS_ERROR(ENOSYS); 316 } 317 318 /* 319 * Until this is fixed only page-sized or smaller data blocks work. 320 */ 321 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 322 xfs_fs_mount_cmn_err(flags, 323 "file system with blocksize %d bytes", 324 sbp->sb_blocksize); 325 xfs_fs_mount_cmn_err(flags, 326 "only pagesize (%ld) or less will currently work.", 327 PAGE_SIZE); 328 return XFS_ERROR(ENOSYS); 329 } 330 331 return 0; 332 } 333 334 xfs_agnumber_t 335 xfs_initialize_perag( 336 bhv_vfs_t *vfs, 337 xfs_mount_t *mp, 338 xfs_agnumber_t agcount) 339 { 340 xfs_agnumber_t index, max_metadata; 341 xfs_perag_t *pag; 342 xfs_agino_t agino; 343 xfs_ino_t ino; 344 xfs_sb_t *sbp = &mp->m_sb; 345 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 346 347 /* Check to see if the filesystem can overflow 32 bit inodes */ 348 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 349 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 350 351 /* Clear the mount flag if no inode can overflow 32 bits 352 * on this filesystem, or if specifically requested.. 353 */ 354 if ((vfs->vfs_flag & VFS_32BITINODES) && ino > max_inum) { 355 mp->m_flags |= XFS_MOUNT_32BITINODES; 356 } else { 357 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 358 } 359 360 /* If we can overflow then setup the ag headers accordingly */ 361 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 362 /* Calculate how much should be reserved for inodes to 363 * meet the max inode percentage. 364 */ 365 if (mp->m_maxicount) { 366 __uint64_t icount; 367 368 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 369 do_div(icount, 100); 370 icount += sbp->sb_agblocks - 1; 371 do_div(icount, sbp->sb_agblocks); 372 max_metadata = icount; 373 } else { 374 max_metadata = agcount; 375 } 376 for (index = 0; index < agcount; index++) { 377 ino = XFS_AGINO_TO_INO(mp, index, agino); 378 if (ino > max_inum) { 379 index++; 380 break; 381 } 382 383 /* This ag is preferred for inodes */ 384 pag = &mp->m_perag[index]; 385 pag->pagi_inodeok = 1; 386 if (index < max_metadata) 387 pag->pagf_metadata = 1; 388 } 389 } else { 390 /* Setup default behavior for smaller filesystems */ 391 for (index = 0; index < agcount; index++) { 392 pag = &mp->m_perag[index]; 393 pag->pagi_inodeok = 1; 394 } 395 } 396 return index; 397 } 398 399 /* 400 * xfs_xlatesb 401 * 402 * data - on disk version of sb 403 * sb - a superblock 404 * dir - conversion direction: <0 - convert sb to buf 405 * >0 - convert buf to sb 406 * fields - which fields to copy (bitmask) 407 */ 408 void 409 xfs_xlatesb( 410 void *data, 411 xfs_sb_t *sb, 412 int dir, 413 __int64_t fields) 414 { 415 xfs_caddr_t buf_ptr; 416 xfs_caddr_t mem_ptr; 417 xfs_sb_field_t f; 418 int first; 419 int size; 420 421 ASSERT(dir); 422 ASSERT(fields); 423 424 if (!fields) 425 return; 426 427 buf_ptr = (xfs_caddr_t)data; 428 mem_ptr = (xfs_caddr_t)sb; 429 430 while (fields) { 431 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 432 first = xfs_sb_info[f].offset; 433 size = xfs_sb_info[f + 1].offset - first; 434 435 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 436 437 if (size == 1 || xfs_sb_info[f].type == 1) { 438 if (dir > 0) { 439 memcpy(mem_ptr + first, buf_ptr + first, size); 440 } else { 441 memcpy(buf_ptr + first, mem_ptr + first, size); 442 } 443 } else { 444 switch (size) { 445 case 2: 446 INT_XLATE(*(__uint16_t*)(buf_ptr+first), 447 *(__uint16_t*)(mem_ptr+first), 448 dir, ARCH_CONVERT); 449 break; 450 case 4: 451 INT_XLATE(*(__uint32_t*)(buf_ptr+first), 452 *(__uint32_t*)(mem_ptr+first), 453 dir, ARCH_CONVERT); 454 break; 455 case 8: 456 INT_XLATE(*(__uint64_t*)(buf_ptr+first), 457 *(__uint64_t*)(mem_ptr+first), dir, ARCH_CONVERT); 458 break; 459 default: 460 ASSERT(0); 461 } 462 } 463 464 fields &= ~(1LL << f); 465 } 466 } 467 468 /* 469 * xfs_readsb 470 * 471 * Does the initial read of the superblock. 472 */ 473 int 474 xfs_readsb(xfs_mount_t *mp, int flags) 475 { 476 unsigned int sector_size; 477 unsigned int extra_flags; 478 xfs_buf_t *bp; 479 xfs_sb_t *sbp; 480 int error; 481 482 ASSERT(mp->m_sb_bp == NULL); 483 ASSERT(mp->m_ddev_targp != NULL); 484 485 /* 486 * Allocate a (locked) buffer to hold the superblock. 487 * This will be kept around at all times to optimize 488 * access to the superblock. 489 */ 490 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 491 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 492 493 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 494 BTOBB(sector_size), extra_flags); 495 if (!bp || XFS_BUF_ISERROR(bp)) { 496 xfs_fs_mount_cmn_err(flags, "SB read failed"); 497 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 498 goto fail; 499 } 500 ASSERT(XFS_BUF_ISBUSY(bp)); 501 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 502 503 /* 504 * Initialize the mount structure from the superblock. 505 * But first do some basic consistency checking. 506 */ 507 sbp = XFS_BUF_TO_SBP(bp); 508 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), 1, XFS_SB_ALL_BITS); 509 510 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 511 if (error) { 512 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 513 goto fail; 514 } 515 516 /* 517 * We must be able to do sector-sized and sector-aligned IO. 518 */ 519 if (sector_size > mp->m_sb.sb_sectsize) { 520 xfs_fs_mount_cmn_err(flags, 521 "device supports only %u byte sectors (not %u)", 522 sector_size, mp->m_sb.sb_sectsize); 523 error = ENOSYS; 524 goto fail; 525 } 526 527 /* 528 * If device sector size is smaller than the superblock size, 529 * re-read the superblock so the buffer is correctly sized. 530 */ 531 if (sector_size < mp->m_sb.sb_sectsize) { 532 XFS_BUF_UNMANAGE(bp); 533 xfs_buf_relse(bp); 534 sector_size = mp->m_sb.sb_sectsize; 535 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 536 BTOBB(sector_size), extra_flags); 537 if (!bp || XFS_BUF_ISERROR(bp)) { 538 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 539 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 540 goto fail; 541 } 542 ASSERT(XFS_BUF_ISBUSY(bp)); 543 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 544 } 545 546 /* Initialize per-cpu counters */ 547 xfs_icsb_reinit_counters(mp); 548 549 mp->m_sb_bp = bp; 550 xfs_buf_relse(bp); 551 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 552 return 0; 553 554 fail: 555 if (bp) { 556 XFS_BUF_UNMANAGE(bp); 557 xfs_buf_relse(bp); 558 } 559 return error; 560 } 561 562 563 /* 564 * xfs_mount_common 565 * 566 * Mount initialization code establishing various mount 567 * fields from the superblock associated with the given 568 * mount structure 569 */ 570 STATIC void 571 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 572 { 573 int i; 574 575 mp->m_agfrotor = mp->m_agirotor = 0; 576 spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); 577 mp->m_maxagi = mp->m_sb.sb_agcount; 578 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 579 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 580 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 581 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 582 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 583 mp->m_litino = sbp->sb_inodesize - 584 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 585 mp->m_blockmask = sbp->sb_blocksize - 1; 586 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 587 mp->m_blockwmask = mp->m_blockwsize - 1; 588 INIT_LIST_HEAD(&mp->m_del_inodes); 589 590 /* 591 * Setup for attributes, in case they get created. 592 * This value is for inodes getting attributes for the first time, 593 * the per-inode value is for old attribute values. 594 */ 595 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 596 switch (sbp->sb_inodesize) { 597 case 256: 598 mp->m_attroffset = XFS_LITINO(mp) - 599 XFS_BMDR_SPACE_CALC(MINABTPTRS); 600 break; 601 case 512: 602 case 1024: 603 case 2048: 604 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 605 break; 606 default: 607 ASSERT(0); 608 } 609 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 610 611 for (i = 0; i < 2; i++) { 612 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 613 xfs_alloc, i == 0); 614 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 615 xfs_alloc, i == 0); 616 } 617 for (i = 0; i < 2; i++) { 618 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 619 xfs_bmbt, i == 0); 620 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 621 xfs_bmbt, i == 0); 622 } 623 for (i = 0; i < 2; i++) { 624 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 625 xfs_inobt, i == 0); 626 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 627 xfs_inobt, i == 0); 628 } 629 630 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 631 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 632 sbp->sb_inopblock); 633 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 634 } 635 /* 636 * xfs_mountfs 637 * 638 * This function does the following on an initial mount of a file system: 639 * - reads the superblock from disk and init the mount struct 640 * - if we're a 32-bit kernel, do a size check on the superblock 641 * so we don't mount terabyte filesystems 642 * - init mount struct realtime fields 643 * - allocate inode hash table for fs 644 * - init directory manager 645 * - perform recovery and init the log manager 646 */ 647 int 648 xfs_mountfs( 649 bhv_vfs_t *vfsp, 650 xfs_mount_t *mp, 651 int mfsi_flags) 652 { 653 xfs_buf_t *bp; 654 xfs_sb_t *sbp = &(mp->m_sb); 655 xfs_inode_t *rip; 656 bhv_vnode_t *rvp = NULL; 657 int readio_log, writeio_log; 658 xfs_daddr_t d; 659 __uint64_t ret64; 660 __int64_t update_flags; 661 uint quotamount, quotaflags; 662 int agno; 663 int uuid_mounted = 0; 664 int error = 0; 665 666 if (mp->m_sb_bp == NULL) { 667 if ((error = xfs_readsb(mp, mfsi_flags))) { 668 return error; 669 } 670 } 671 xfs_mount_common(mp, sbp); 672 673 /* 674 * Check if sb_agblocks is aligned at stripe boundary 675 * If sb_agblocks is NOT aligned turn off m_dalign since 676 * allocator alignment is within an ag, therefore ag has 677 * to be aligned at stripe boundary. 678 */ 679 update_flags = 0LL; 680 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 681 /* 682 * If stripe unit and stripe width are not multiples 683 * of the fs blocksize turn off alignment. 684 */ 685 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 686 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 687 if (mp->m_flags & XFS_MOUNT_RETERR) { 688 cmn_err(CE_WARN, 689 "XFS: alignment check 1 failed"); 690 error = XFS_ERROR(EINVAL); 691 goto error1; 692 } 693 mp->m_dalign = mp->m_swidth = 0; 694 } else { 695 /* 696 * Convert the stripe unit and width to FSBs. 697 */ 698 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 699 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 700 if (mp->m_flags & XFS_MOUNT_RETERR) { 701 error = XFS_ERROR(EINVAL); 702 goto error1; 703 } 704 xfs_fs_cmn_err(CE_WARN, mp, 705 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 706 mp->m_dalign, mp->m_swidth, 707 sbp->sb_agblocks); 708 709 mp->m_dalign = 0; 710 mp->m_swidth = 0; 711 } else if (mp->m_dalign) { 712 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 713 } else { 714 if (mp->m_flags & XFS_MOUNT_RETERR) { 715 xfs_fs_cmn_err(CE_WARN, mp, 716 "stripe alignment turned off: sunit(%d) less than bsize(%d)", 717 mp->m_dalign, 718 mp->m_blockmask +1); 719 error = XFS_ERROR(EINVAL); 720 goto error1; 721 } 722 mp->m_swidth = 0; 723 } 724 } 725 726 /* 727 * Update superblock with new values 728 * and log changes 729 */ 730 if (XFS_SB_VERSION_HASDALIGN(sbp)) { 731 if (sbp->sb_unit != mp->m_dalign) { 732 sbp->sb_unit = mp->m_dalign; 733 update_flags |= XFS_SB_UNIT; 734 } 735 if (sbp->sb_width != mp->m_swidth) { 736 sbp->sb_width = mp->m_swidth; 737 update_flags |= XFS_SB_WIDTH; 738 } 739 } 740 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 741 XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 742 mp->m_dalign = sbp->sb_unit; 743 mp->m_swidth = sbp->sb_width; 744 } 745 746 xfs_alloc_compute_maxlevels(mp); 747 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 748 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 749 xfs_ialloc_compute_maxlevels(mp); 750 751 if (sbp->sb_imax_pct) { 752 __uint64_t icount; 753 754 /* Make sure the maximum inode count is a multiple of the 755 * units we allocate inodes in. 756 */ 757 758 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 759 do_div(icount, 100); 760 do_div(icount, mp->m_ialloc_blks); 761 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 762 sbp->sb_inopblog; 763 } else 764 mp->m_maxicount = 0; 765 766 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 767 768 /* 769 * XFS uses the uuid from the superblock as the unique 770 * identifier for fsid. We can not use the uuid from the volume 771 * since a single partition filesystem is identical to a single 772 * partition volume/filesystem. 773 */ 774 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 775 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 776 if (xfs_uuid_mount(mp)) { 777 error = XFS_ERROR(EINVAL); 778 goto error1; 779 } 780 uuid_mounted=1; 781 ret64 = uuid_hash64(&sbp->sb_uuid); 782 memcpy(&vfsp->vfs_fsid, &ret64, sizeof(ret64)); 783 } 784 785 /* 786 * Set the default minimum read and write sizes unless 787 * already specified in a mount option. 788 * We use smaller I/O sizes when the file system 789 * is being used for NFS service (wsync mount option). 790 */ 791 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 792 if (mp->m_flags & XFS_MOUNT_WSYNC) { 793 readio_log = XFS_WSYNC_READIO_LOG; 794 writeio_log = XFS_WSYNC_WRITEIO_LOG; 795 } else { 796 readio_log = XFS_READIO_LOG_LARGE; 797 writeio_log = XFS_WRITEIO_LOG_LARGE; 798 } 799 } else { 800 readio_log = mp->m_readio_log; 801 writeio_log = mp->m_writeio_log; 802 } 803 804 /* 805 * Set the number of readahead buffers to use based on 806 * physical memory size. 807 */ 808 if (xfs_physmem <= 4096) /* <= 16MB */ 809 mp->m_nreadaheads = XFS_RW_NREADAHEAD_16MB; 810 else if (xfs_physmem <= 8192) /* <= 32MB */ 811 mp->m_nreadaheads = XFS_RW_NREADAHEAD_32MB; 812 else 813 mp->m_nreadaheads = XFS_RW_NREADAHEAD_K32; 814 if (sbp->sb_blocklog > readio_log) { 815 mp->m_readio_log = sbp->sb_blocklog; 816 } else { 817 mp->m_readio_log = readio_log; 818 } 819 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 820 if (sbp->sb_blocklog > writeio_log) { 821 mp->m_writeio_log = sbp->sb_blocklog; 822 } else { 823 mp->m_writeio_log = writeio_log; 824 } 825 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 826 827 /* 828 * Set the inode cluster size based on the physical memory 829 * size. This may still be overridden by the file system 830 * block size if it is larger than the chosen cluster size. 831 */ 832 if (xfs_physmem <= btoc(32 * 1024 * 1024)) { /* <= 32 MB */ 833 mp->m_inode_cluster_size = XFS_INODE_SMALL_CLUSTER_SIZE; 834 } else { 835 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 836 } 837 /* 838 * Set whether we're using inode alignment. 839 */ 840 if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 841 mp->m_sb.sb_inoalignmt >= 842 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 843 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 844 else 845 mp->m_inoalign_mask = 0; 846 /* 847 * If we are using stripe alignment, check whether 848 * the stripe unit is a multiple of the inode alignment 849 */ 850 if (mp->m_dalign && mp->m_inoalign_mask && 851 !(mp->m_dalign & mp->m_inoalign_mask)) 852 mp->m_sinoalign = mp->m_dalign; 853 else 854 mp->m_sinoalign = 0; 855 /* 856 * Check that the data (and log if separate) are an ok size. 857 */ 858 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 859 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 860 cmn_err(CE_WARN, "XFS: size check 1 failed"); 861 error = XFS_ERROR(E2BIG); 862 goto error1; 863 } 864 error = xfs_read_buf(mp, mp->m_ddev_targp, 865 d - XFS_FSS_TO_BB(mp, 1), 866 XFS_FSS_TO_BB(mp, 1), 0, &bp); 867 if (!error) { 868 xfs_buf_relse(bp); 869 } else { 870 cmn_err(CE_WARN, "XFS: size check 2 failed"); 871 if (error == ENOSPC) { 872 error = XFS_ERROR(E2BIG); 873 } 874 goto error1; 875 } 876 877 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 878 mp->m_logdev_targp != mp->m_ddev_targp) { 879 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 880 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 881 cmn_err(CE_WARN, "XFS: size check 3 failed"); 882 error = XFS_ERROR(E2BIG); 883 goto error1; 884 } 885 error = xfs_read_buf(mp, mp->m_logdev_targp, 886 d - XFS_FSB_TO_BB(mp, 1), 887 XFS_FSB_TO_BB(mp, 1), 0, &bp); 888 if (!error) { 889 xfs_buf_relse(bp); 890 } else { 891 cmn_err(CE_WARN, "XFS: size check 3 failed"); 892 if (error == ENOSPC) { 893 error = XFS_ERROR(E2BIG); 894 } 895 goto error1; 896 } 897 } 898 899 /* 900 * Initialize realtime fields in the mount structure 901 */ 902 if ((error = xfs_rtmount_init(mp))) { 903 cmn_err(CE_WARN, "XFS: RT mount failed"); 904 goto error1; 905 } 906 907 /* 908 * For client case we are done now 909 */ 910 if (mfsi_flags & XFS_MFSI_CLIENT) { 911 return 0; 912 } 913 914 /* 915 * Copies the low order bits of the timestamp and the randomly 916 * set "sequence" number out of a UUID. 917 */ 918 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 919 920 /* 921 * The vfs structure needs to have a file system independent 922 * way of checking for the invariant file system ID. Since it 923 * can't look at mount structures it has a pointer to the data 924 * in the mount structure. 925 * 926 * File systems that don't support user level file handles (i.e. 927 * all of them except for XFS) will leave vfs_altfsid as NULL. 928 */ 929 vfsp->vfs_altfsid = (xfs_fsid_t *)mp->m_fixedfsid; 930 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 931 932 xfs_dir_mount(mp); 933 934 /* 935 * Initialize the attribute manager's entries. 936 */ 937 mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 938 939 /* 940 * Initialize the precomputed transaction reservations values. 941 */ 942 xfs_trans_init(mp); 943 944 /* 945 * Allocate and initialize the inode hash table for this 946 * file system. 947 */ 948 xfs_ihash_init(mp); 949 xfs_chash_init(mp); 950 951 /* 952 * Allocate and initialize the per-ag data. 953 */ 954 init_rwsem(&mp->m_peraglock); 955 mp->m_perag = 956 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 957 958 mp->m_maxagi = xfs_initialize_perag(vfsp, mp, sbp->sb_agcount); 959 960 /* 961 * log's mount-time initialization. Perform 1st part recovery if needed 962 */ 963 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 964 error = xfs_log_mount(mp, mp->m_logdev_targp, 965 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 966 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 967 if (error) { 968 cmn_err(CE_WARN, "XFS: log mount failed"); 969 goto error2; 970 } 971 } else { /* No log has been defined */ 972 cmn_err(CE_WARN, "XFS: no log defined"); 973 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 974 error = XFS_ERROR(EFSCORRUPTED); 975 goto error2; 976 } 977 978 /* 979 * Get and sanity-check the root inode. 980 * Save the pointer to it in the mount structure. 981 */ 982 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 983 if (error) { 984 cmn_err(CE_WARN, "XFS: failed to read root inode"); 985 goto error3; 986 } 987 988 ASSERT(rip != NULL); 989 rvp = XFS_ITOV(rip); 990 991 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 992 cmn_err(CE_WARN, "XFS: corrupted root inode"); 993 cmn_err(CE_WARN, "Device %s - root %llu is not a directory", 994 XFS_BUFTARG_NAME(mp->m_ddev_targp), 995 (unsigned long long)rip->i_ino); 996 xfs_iunlock(rip, XFS_ILOCK_EXCL); 997 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 998 mp); 999 error = XFS_ERROR(EFSCORRUPTED); 1000 goto error4; 1001 } 1002 mp->m_rootip = rip; /* save it */ 1003 1004 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1005 1006 /* 1007 * Initialize realtime inode pointers in the mount structure 1008 */ 1009 if ((error = xfs_rtmount_inodes(mp))) { 1010 /* 1011 * Free up the root inode. 1012 */ 1013 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1014 goto error4; 1015 } 1016 1017 /* 1018 * If fs is not mounted readonly, then update the superblock 1019 * unit and width changes. 1020 */ 1021 if (update_flags && !(vfsp->vfs_flag & VFS_RDONLY)) 1022 xfs_mount_log_sbunit(mp, update_flags); 1023 1024 /* 1025 * Initialise the XFS quota management subsystem for this mount 1026 */ 1027 if ((error = XFS_QM_INIT(mp, "amount, "aflags))) 1028 goto error4; 1029 1030 /* 1031 * Finish recovering the file system. This part needed to be 1032 * delayed until after the root and real-time bitmap inodes 1033 * were consistently read in. 1034 */ 1035 error = xfs_log_mount_finish(mp, mfsi_flags); 1036 if (error) { 1037 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1038 goto error4; 1039 } 1040 1041 /* 1042 * Complete the quota initialisation, post-log-replay component. 1043 */ 1044 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1045 goto error4; 1046 1047 return 0; 1048 1049 error4: 1050 /* 1051 * Free up the root inode. 1052 */ 1053 VN_RELE(rvp); 1054 error3: 1055 xfs_log_unmount_dealloc(mp); 1056 error2: 1057 xfs_ihash_free(mp); 1058 xfs_chash_free(mp); 1059 for (agno = 0; agno < sbp->sb_agcount; agno++) 1060 if (mp->m_perag[agno].pagb_list) 1061 kmem_free(mp->m_perag[agno].pagb_list, 1062 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1063 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1064 mp->m_perag = NULL; 1065 /* FALLTHROUGH */ 1066 error1: 1067 if (uuid_mounted) 1068 xfs_uuid_unmount(mp); 1069 xfs_freesb(mp); 1070 return error; 1071 } 1072 1073 /* 1074 * xfs_unmountfs 1075 * 1076 * This flushes out the inodes,dquots and the superblock, unmounts the 1077 * log and makes sure that incore structures are freed. 1078 */ 1079 int 1080 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1081 { 1082 struct bhv_vfs *vfsp = XFS_MTOVFS(mp); 1083 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1084 int64_t fsid; 1085 #endif 1086 1087 xfs_iflush_all(mp); 1088 1089 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1090 1091 /* 1092 * Flush out the log synchronously so that we know for sure 1093 * that nothing is pinned. This is important because bflush() 1094 * will skip pinned buffers. 1095 */ 1096 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1097 1098 xfs_binval(mp->m_ddev_targp); 1099 if (mp->m_rtdev_targp) { 1100 xfs_binval(mp->m_rtdev_targp); 1101 } 1102 1103 xfs_unmountfs_writesb(mp); 1104 1105 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1106 1107 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1108 1109 xfs_freesb(mp); 1110 1111 /* 1112 * All inodes from this mount point should be freed. 1113 */ 1114 ASSERT(mp->m_inodes == NULL); 1115 1116 xfs_unmountfs_close(mp, cr); 1117 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1118 xfs_uuid_unmount(mp); 1119 1120 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1121 /* 1122 * clear all error tags on this filesystem 1123 */ 1124 memcpy(&fsid, &vfsp->vfs_fsid, sizeof(int64_t)); 1125 xfs_errortag_clearall_umount(fsid, mp->m_fsname, 0); 1126 #endif 1127 XFS_IODONE(vfsp); 1128 xfs_mount_free(mp, 1); 1129 return 0; 1130 } 1131 1132 void 1133 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1134 { 1135 if (mp->m_logdev_targp != mp->m_ddev_targp) 1136 xfs_free_buftarg(mp->m_logdev_targp, 1); 1137 if (mp->m_rtdev_targp) 1138 xfs_free_buftarg(mp->m_rtdev_targp, 1); 1139 xfs_free_buftarg(mp->m_ddev_targp, 0); 1140 } 1141 1142 STATIC void 1143 xfs_unmountfs_wait(xfs_mount_t *mp) 1144 { 1145 if (mp->m_logdev_targp != mp->m_ddev_targp) 1146 xfs_wait_buftarg(mp->m_logdev_targp); 1147 if (mp->m_rtdev_targp) 1148 xfs_wait_buftarg(mp->m_rtdev_targp); 1149 xfs_wait_buftarg(mp->m_ddev_targp); 1150 } 1151 1152 int 1153 xfs_unmountfs_writesb(xfs_mount_t *mp) 1154 { 1155 xfs_buf_t *sbp; 1156 xfs_sb_t *sb; 1157 int error = 0; 1158 1159 /* 1160 * skip superblock write if fs is read-only, or 1161 * if we are doing a forced umount. 1162 */ 1163 sbp = xfs_getsb(mp, 0); 1164 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1165 XFS_FORCED_SHUTDOWN(mp))) { 1166 1167 xfs_icsb_sync_counters(mp); 1168 1169 /* 1170 * mark shared-readonly if desired 1171 */ 1172 sb = XFS_BUF_TO_SBP(sbp); 1173 if (mp->m_mk_sharedro) { 1174 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1175 sb->sb_flags |= XFS_SBF_READONLY; 1176 if (!XFS_SB_VERSION_HASSHARED(sb)) 1177 XFS_SB_VERSION_ADDSHARED(sb); 1178 xfs_fs_cmn_err(CE_NOTE, mp, 1179 "Unmounting, marking shared read-only"); 1180 } 1181 XFS_BUF_UNDONE(sbp); 1182 XFS_BUF_UNREAD(sbp); 1183 XFS_BUF_UNDELAYWRITE(sbp); 1184 XFS_BUF_WRITE(sbp); 1185 XFS_BUF_UNASYNC(sbp); 1186 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1187 xfsbdstrat(mp, sbp); 1188 /* Nevermind errors we might get here. */ 1189 error = xfs_iowait(sbp); 1190 if (error) 1191 xfs_ioerror_alert("xfs_unmountfs_writesb", 1192 mp, sbp, XFS_BUF_ADDR(sbp)); 1193 if (error && mp->m_mk_sharedro) 1194 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1195 } 1196 xfs_buf_relse(sbp); 1197 return error; 1198 } 1199 1200 /* 1201 * xfs_mod_sb() can be used to copy arbitrary changes to the 1202 * in-core superblock into the superblock buffer to be logged. 1203 * It does not provide the higher level of locking that is 1204 * needed to protect the in-core superblock from concurrent 1205 * access. 1206 */ 1207 void 1208 xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1209 { 1210 xfs_buf_t *bp; 1211 int first; 1212 int last; 1213 xfs_mount_t *mp; 1214 xfs_sb_t *sbp; 1215 xfs_sb_field_t f; 1216 1217 ASSERT(fields); 1218 if (!fields) 1219 return; 1220 mp = tp->t_mountp; 1221 bp = xfs_trans_getsb(tp, mp, 0); 1222 sbp = XFS_BUF_TO_SBP(bp); 1223 first = sizeof(xfs_sb_t); 1224 last = 0; 1225 1226 /* translate/copy */ 1227 1228 xfs_xlatesb(XFS_BUF_PTR(bp), &(mp->m_sb), -1, fields); 1229 1230 /* find modified range */ 1231 1232 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1233 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1234 first = xfs_sb_info[f].offset; 1235 1236 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1237 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1238 last = xfs_sb_info[f + 1].offset - 1; 1239 1240 xfs_trans_log_buf(tp, bp, first, last); 1241 } 1242 1243 1244 /* 1245 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1246 * a delta to a specified field in the in-core superblock. Simply 1247 * switch on the field indicated and apply the delta to that field. 1248 * Fields are not allowed to dip below zero, so if the delta would 1249 * do this do not apply it and return EINVAL. 1250 * 1251 * The SB_LOCK must be held when this routine is called. 1252 */ 1253 int 1254 xfs_mod_incore_sb_unlocked( 1255 xfs_mount_t *mp, 1256 xfs_sb_field_t field, 1257 int64_t delta, 1258 int rsvd) 1259 { 1260 int scounter; /* short counter for 32 bit fields */ 1261 long long lcounter; /* long counter for 64 bit fields */ 1262 long long res_used, rem; 1263 1264 /* 1265 * With the in-core superblock spin lock held, switch 1266 * on the indicated field. Apply the delta to the 1267 * proper field. If the fields value would dip below 1268 * 0, then do not apply the delta and return EINVAL. 1269 */ 1270 switch (field) { 1271 case XFS_SBS_ICOUNT: 1272 lcounter = (long long)mp->m_sb.sb_icount; 1273 lcounter += delta; 1274 if (lcounter < 0) { 1275 ASSERT(0); 1276 return XFS_ERROR(EINVAL); 1277 } 1278 mp->m_sb.sb_icount = lcounter; 1279 return 0; 1280 case XFS_SBS_IFREE: 1281 lcounter = (long long)mp->m_sb.sb_ifree; 1282 lcounter += delta; 1283 if (lcounter < 0) { 1284 ASSERT(0); 1285 return XFS_ERROR(EINVAL); 1286 } 1287 mp->m_sb.sb_ifree = lcounter; 1288 return 0; 1289 case XFS_SBS_FDBLOCKS: 1290 lcounter = (long long) 1291 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1292 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1293 1294 if (delta > 0) { /* Putting blocks back */ 1295 if (res_used > delta) { 1296 mp->m_resblks_avail += delta; 1297 } else { 1298 rem = delta - res_used; 1299 mp->m_resblks_avail = mp->m_resblks; 1300 lcounter += rem; 1301 } 1302 } else { /* Taking blocks away */ 1303 1304 lcounter += delta; 1305 1306 /* 1307 * If were out of blocks, use any available reserved blocks if 1308 * were allowed to. 1309 */ 1310 1311 if (lcounter < 0) { 1312 if (rsvd) { 1313 lcounter = (long long)mp->m_resblks_avail + delta; 1314 if (lcounter < 0) { 1315 return XFS_ERROR(ENOSPC); 1316 } 1317 mp->m_resblks_avail = lcounter; 1318 return 0; 1319 } else { /* not reserved */ 1320 return XFS_ERROR(ENOSPC); 1321 } 1322 } 1323 } 1324 1325 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1326 return 0; 1327 case XFS_SBS_FREXTENTS: 1328 lcounter = (long long)mp->m_sb.sb_frextents; 1329 lcounter += delta; 1330 if (lcounter < 0) { 1331 return XFS_ERROR(ENOSPC); 1332 } 1333 mp->m_sb.sb_frextents = lcounter; 1334 return 0; 1335 case XFS_SBS_DBLOCKS: 1336 lcounter = (long long)mp->m_sb.sb_dblocks; 1337 lcounter += delta; 1338 if (lcounter < 0) { 1339 ASSERT(0); 1340 return XFS_ERROR(EINVAL); 1341 } 1342 mp->m_sb.sb_dblocks = lcounter; 1343 return 0; 1344 case XFS_SBS_AGCOUNT: 1345 scounter = mp->m_sb.sb_agcount; 1346 scounter += delta; 1347 if (scounter < 0) { 1348 ASSERT(0); 1349 return XFS_ERROR(EINVAL); 1350 } 1351 mp->m_sb.sb_agcount = scounter; 1352 return 0; 1353 case XFS_SBS_IMAX_PCT: 1354 scounter = mp->m_sb.sb_imax_pct; 1355 scounter += delta; 1356 if (scounter < 0) { 1357 ASSERT(0); 1358 return XFS_ERROR(EINVAL); 1359 } 1360 mp->m_sb.sb_imax_pct = scounter; 1361 return 0; 1362 case XFS_SBS_REXTSIZE: 1363 scounter = mp->m_sb.sb_rextsize; 1364 scounter += delta; 1365 if (scounter < 0) { 1366 ASSERT(0); 1367 return XFS_ERROR(EINVAL); 1368 } 1369 mp->m_sb.sb_rextsize = scounter; 1370 return 0; 1371 case XFS_SBS_RBMBLOCKS: 1372 scounter = mp->m_sb.sb_rbmblocks; 1373 scounter += delta; 1374 if (scounter < 0) { 1375 ASSERT(0); 1376 return XFS_ERROR(EINVAL); 1377 } 1378 mp->m_sb.sb_rbmblocks = scounter; 1379 return 0; 1380 case XFS_SBS_RBLOCKS: 1381 lcounter = (long long)mp->m_sb.sb_rblocks; 1382 lcounter += delta; 1383 if (lcounter < 0) { 1384 ASSERT(0); 1385 return XFS_ERROR(EINVAL); 1386 } 1387 mp->m_sb.sb_rblocks = lcounter; 1388 return 0; 1389 case XFS_SBS_REXTENTS: 1390 lcounter = (long long)mp->m_sb.sb_rextents; 1391 lcounter += delta; 1392 if (lcounter < 0) { 1393 ASSERT(0); 1394 return XFS_ERROR(EINVAL); 1395 } 1396 mp->m_sb.sb_rextents = lcounter; 1397 return 0; 1398 case XFS_SBS_REXTSLOG: 1399 scounter = mp->m_sb.sb_rextslog; 1400 scounter += delta; 1401 if (scounter < 0) { 1402 ASSERT(0); 1403 return XFS_ERROR(EINVAL); 1404 } 1405 mp->m_sb.sb_rextslog = scounter; 1406 return 0; 1407 default: 1408 ASSERT(0); 1409 return XFS_ERROR(EINVAL); 1410 } 1411 } 1412 1413 /* 1414 * xfs_mod_incore_sb() is used to change a field in the in-core 1415 * superblock structure by the specified delta. This modification 1416 * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() 1417 * routine to do the work. 1418 */ 1419 int 1420 xfs_mod_incore_sb( 1421 xfs_mount_t *mp, 1422 xfs_sb_field_t field, 1423 int64_t delta, 1424 int rsvd) 1425 { 1426 unsigned long s; 1427 int status; 1428 1429 /* check for per-cpu counters */ 1430 switch (field) { 1431 #ifdef HAVE_PERCPU_SB 1432 case XFS_SBS_ICOUNT: 1433 case XFS_SBS_IFREE: 1434 case XFS_SBS_FDBLOCKS: 1435 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1436 status = xfs_icsb_modify_counters(mp, field, 1437 delta, rsvd); 1438 break; 1439 } 1440 /* FALLTHROUGH */ 1441 #endif 1442 default: 1443 s = XFS_SB_LOCK(mp); 1444 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1445 XFS_SB_UNLOCK(mp, s); 1446 break; 1447 } 1448 1449 return status; 1450 } 1451 1452 /* 1453 * xfs_mod_incore_sb_batch() is used to change more than one field 1454 * in the in-core superblock structure at a time. This modification 1455 * is protected by a lock internal to this module. The fields and 1456 * changes to those fields are specified in the array of xfs_mod_sb 1457 * structures passed in. 1458 * 1459 * Either all of the specified deltas will be applied or none of 1460 * them will. If any modified field dips below 0, then all modifications 1461 * will be backed out and EINVAL will be returned. 1462 */ 1463 int 1464 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1465 { 1466 unsigned long s; 1467 int status=0; 1468 xfs_mod_sb_t *msbp; 1469 1470 /* 1471 * Loop through the array of mod structures and apply each 1472 * individually. If any fail, then back out all those 1473 * which have already been applied. Do all of this within 1474 * the scope of the SB_LOCK so that all of the changes will 1475 * be atomic. 1476 */ 1477 s = XFS_SB_LOCK(mp); 1478 msbp = &msb[0]; 1479 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1480 /* 1481 * Apply the delta at index n. If it fails, break 1482 * from the loop so we'll fall into the undo loop 1483 * below. 1484 */ 1485 switch (msbp->msb_field) { 1486 #ifdef HAVE_PERCPU_SB 1487 case XFS_SBS_ICOUNT: 1488 case XFS_SBS_IFREE: 1489 case XFS_SBS_FDBLOCKS: 1490 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1491 XFS_SB_UNLOCK(mp, s); 1492 status = xfs_icsb_modify_counters(mp, 1493 msbp->msb_field, 1494 msbp->msb_delta, rsvd); 1495 s = XFS_SB_LOCK(mp); 1496 break; 1497 } 1498 /* FALLTHROUGH */ 1499 #endif 1500 default: 1501 status = xfs_mod_incore_sb_unlocked(mp, 1502 msbp->msb_field, 1503 msbp->msb_delta, rsvd); 1504 break; 1505 } 1506 1507 if (status != 0) { 1508 break; 1509 } 1510 } 1511 1512 /* 1513 * If we didn't complete the loop above, then back out 1514 * any changes made to the superblock. If you add code 1515 * between the loop above and here, make sure that you 1516 * preserve the value of status. Loop back until 1517 * we step below the beginning of the array. Make sure 1518 * we don't touch anything back there. 1519 */ 1520 if (status != 0) { 1521 msbp--; 1522 while (msbp >= msb) { 1523 switch (msbp->msb_field) { 1524 #ifdef HAVE_PERCPU_SB 1525 case XFS_SBS_ICOUNT: 1526 case XFS_SBS_IFREE: 1527 case XFS_SBS_FDBLOCKS: 1528 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1529 XFS_SB_UNLOCK(mp, s); 1530 status = xfs_icsb_modify_counters(mp, 1531 msbp->msb_field, 1532 -(msbp->msb_delta), 1533 rsvd); 1534 s = XFS_SB_LOCK(mp); 1535 break; 1536 } 1537 /* FALLTHROUGH */ 1538 #endif 1539 default: 1540 status = xfs_mod_incore_sb_unlocked(mp, 1541 msbp->msb_field, 1542 -(msbp->msb_delta), 1543 rsvd); 1544 break; 1545 } 1546 ASSERT(status == 0); 1547 msbp--; 1548 } 1549 } 1550 XFS_SB_UNLOCK(mp, s); 1551 return status; 1552 } 1553 1554 /* 1555 * xfs_getsb() is called to obtain the buffer for the superblock. 1556 * The buffer is returned locked and read in from disk. 1557 * The buffer should be released with a call to xfs_brelse(). 1558 * 1559 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1560 * the superblock buffer if it can be locked without sleeping. 1561 * If it can't then we'll return NULL. 1562 */ 1563 xfs_buf_t * 1564 xfs_getsb( 1565 xfs_mount_t *mp, 1566 int flags) 1567 { 1568 xfs_buf_t *bp; 1569 1570 ASSERT(mp->m_sb_bp != NULL); 1571 bp = mp->m_sb_bp; 1572 if (flags & XFS_BUF_TRYLOCK) { 1573 if (!XFS_BUF_CPSEMA(bp)) { 1574 return NULL; 1575 } 1576 } else { 1577 XFS_BUF_PSEMA(bp, PRIBIO); 1578 } 1579 XFS_BUF_HOLD(bp); 1580 ASSERT(XFS_BUF_ISDONE(bp)); 1581 return bp; 1582 } 1583 1584 /* 1585 * Used to free the superblock along various error paths. 1586 */ 1587 void 1588 xfs_freesb( 1589 xfs_mount_t *mp) 1590 { 1591 xfs_buf_t *bp; 1592 1593 /* 1594 * Use xfs_getsb() so that the buffer will be locked 1595 * when we call xfs_buf_relse(). 1596 */ 1597 bp = xfs_getsb(mp, 0); 1598 XFS_BUF_UNMANAGE(bp); 1599 xfs_buf_relse(bp); 1600 mp->m_sb_bp = NULL; 1601 } 1602 1603 /* 1604 * See if the UUID is unique among mounted XFS filesystems. 1605 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1606 */ 1607 STATIC int 1608 xfs_uuid_mount( 1609 xfs_mount_t *mp) 1610 { 1611 if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1612 cmn_err(CE_WARN, 1613 "XFS: Filesystem %s has nil UUID - can't mount", 1614 mp->m_fsname); 1615 return -1; 1616 } 1617 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1618 cmn_err(CE_WARN, 1619 "XFS: Filesystem %s has duplicate UUID - can't mount", 1620 mp->m_fsname); 1621 return -1; 1622 } 1623 return 0; 1624 } 1625 1626 /* 1627 * Remove filesystem from the UUID table. 1628 */ 1629 STATIC void 1630 xfs_uuid_unmount( 1631 xfs_mount_t *mp) 1632 { 1633 uuid_table_remove(&mp->m_sb.sb_uuid); 1634 } 1635 1636 /* 1637 * Used to log changes to the superblock unit and width fields which could 1638 * be altered by the mount options. Only the first superblock is updated. 1639 */ 1640 STATIC void 1641 xfs_mount_log_sbunit( 1642 xfs_mount_t *mp, 1643 __int64_t fields) 1644 { 1645 xfs_trans_t *tp; 1646 1647 ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1648 1649 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1650 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1651 XFS_DEFAULT_LOG_COUNT)) { 1652 xfs_trans_cancel(tp, 0); 1653 return; 1654 } 1655 xfs_mod_sb(tp, fields); 1656 xfs_trans_commit(tp, 0); 1657 } 1658 1659 1660 #ifdef HAVE_PERCPU_SB 1661 /* 1662 * Per-cpu incore superblock counters 1663 * 1664 * Simple concept, difficult implementation 1665 * 1666 * Basically, replace the incore superblock counters with a distributed per cpu 1667 * counter for contended fields (e.g. free block count). 1668 * 1669 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1670 * hence needs to be accurately read when we are running low on space. Hence 1671 * there is a method to enable and disable the per-cpu counters based on how 1672 * much "stuff" is available in them. 1673 * 1674 * Basically, a counter is enabled if there is enough free resource to justify 1675 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1676 * ENOSPC), then we disable the counters to synchronise all callers and 1677 * re-distribute the available resources. 1678 * 1679 * If, once we redistributed the available resources, we still get a failure, 1680 * we disable the per-cpu counter and go through the slow path. 1681 * 1682 * The slow path is the current xfs_mod_incore_sb() function. This means that 1683 * when we disable a per-cpu counter, we need to drain it's resources back to 1684 * the global superblock. We do this after disabling the counter to prevent 1685 * more threads from queueing up on the counter. 1686 * 1687 * Essentially, this means that we still need a lock in the fast path to enable 1688 * synchronisation between the global counters and the per-cpu counters. This 1689 * is not a problem because the lock will be local to a CPU almost all the time 1690 * and have little contention except when we get to ENOSPC conditions. 1691 * 1692 * Basically, this lock becomes a barrier that enables us to lock out the fast 1693 * path while we do things like enabling and disabling counters and 1694 * synchronising the counters. 1695 * 1696 * Locking rules: 1697 * 1698 * 1. XFS_SB_LOCK() before picking up per-cpu locks 1699 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1700 * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks 1701 * 4. modifying per-cpu counters requires holding per-cpu lock 1702 * 5. modifying global counters requires holding XFS_SB_LOCK 1703 * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK 1704 * and _none_ of the per-cpu locks. 1705 * 1706 * Disabled counters are only ever re-enabled by a balance operation 1707 * that results in more free resources per CPU than a given threshold. 1708 * To ensure counters don't remain disabled, they are rebalanced when 1709 * the global resource goes above a higher threshold (i.e. some hysteresis 1710 * is present to prevent thrashing). 1711 */ 1712 1713 #ifdef CONFIG_HOTPLUG_CPU 1714 /* 1715 * hot-plug CPU notifier support. 1716 * 1717 * We need a notifier per filesystem as we need to be able to identify 1718 * the filesystem to balance the counters out. This is achieved by 1719 * having a notifier block embedded in the xfs_mount_t and doing pointer 1720 * magic to get the mount pointer from the notifier block address. 1721 */ 1722 STATIC int 1723 xfs_icsb_cpu_notify( 1724 struct notifier_block *nfb, 1725 unsigned long action, 1726 void *hcpu) 1727 { 1728 xfs_icsb_cnts_t *cntp; 1729 xfs_mount_t *mp; 1730 int s; 1731 1732 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1733 cntp = (xfs_icsb_cnts_t *) 1734 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1735 switch (action) { 1736 case CPU_UP_PREPARE: 1737 case CPU_UP_PREPARE_FROZEN: 1738 /* Easy Case - initialize the area and locks, and 1739 * then rebalance when online does everything else for us. */ 1740 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1741 break; 1742 case CPU_ONLINE: 1743 case CPU_ONLINE_FROZEN: 1744 xfs_icsb_lock(mp); 1745 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 1746 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 1747 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 1748 xfs_icsb_unlock(mp); 1749 break; 1750 case CPU_DEAD: 1751 case CPU_DEAD_FROZEN: 1752 /* Disable all the counters, then fold the dead cpu's 1753 * count into the total on the global superblock and 1754 * re-enable the counters. */ 1755 xfs_icsb_lock(mp); 1756 s = XFS_SB_LOCK(mp); 1757 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1758 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1759 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1760 1761 mp->m_sb.sb_icount += cntp->icsb_icount; 1762 mp->m_sb.sb_ifree += cntp->icsb_ifree; 1763 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1764 1765 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1766 1767 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 1768 XFS_ICSB_SB_LOCKED, 0); 1769 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 1770 XFS_ICSB_SB_LOCKED, 0); 1771 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 1772 XFS_ICSB_SB_LOCKED, 0); 1773 XFS_SB_UNLOCK(mp, s); 1774 xfs_icsb_unlock(mp); 1775 break; 1776 } 1777 1778 return NOTIFY_OK; 1779 } 1780 #endif /* CONFIG_HOTPLUG_CPU */ 1781 1782 int 1783 xfs_icsb_init_counters( 1784 xfs_mount_t *mp) 1785 { 1786 xfs_icsb_cnts_t *cntp; 1787 int i; 1788 1789 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1790 if (mp->m_sb_cnts == NULL) 1791 return -ENOMEM; 1792 1793 #ifdef CONFIG_HOTPLUG_CPU 1794 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1795 mp->m_icsb_notifier.priority = 0; 1796 register_hotcpu_notifier(&mp->m_icsb_notifier); 1797 #endif /* CONFIG_HOTPLUG_CPU */ 1798 1799 for_each_online_cpu(i) { 1800 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1801 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1802 } 1803 1804 mutex_init(&mp->m_icsb_mutex); 1805 1806 /* 1807 * start with all counters disabled so that the 1808 * initial balance kicks us off correctly 1809 */ 1810 mp->m_icsb_counters = -1; 1811 return 0; 1812 } 1813 1814 void 1815 xfs_icsb_reinit_counters( 1816 xfs_mount_t *mp) 1817 { 1818 xfs_icsb_lock(mp); 1819 /* 1820 * start with all counters disabled so that the 1821 * initial balance kicks us off correctly 1822 */ 1823 mp->m_icsb_counters = -1; 1824 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 1825 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 1826 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 1827 xfs_icsb_unlock(mp); 1828 } 1829 1830 STATIC void 1831 xfs_icsb_destroy_counters( 1832 xfs_mount_t *mp) 1833 { 1834 if (mp->m_sb_cnts) { 1835 unregister_hotcpu_notifier(&mp->m_icsb_notifier); 1836 free_percpu(mp->m_sb_cnts); 1837 } 1838 mutex_destroy(&mp->m_icsb_mutex); 1839 } 1840 1841 STATIC_INLINE void 1842 xfs_icsb_lock_cntr( 1843 xfs_icsb_cnts_t *icsbp) 1844 { 1845 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 1846 ndelay(1000); 1847 } 1848 } 1849 1850 STATIC_INLINE void 1851 xfs_icsb_unlock_cntr( 1852 xfs_icsb_cnts_t *icsbp) 1853 { 1854 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 1855 } 1856 1857 1858 STATIC_INLINE void 1859 xfs_icsb_lock_all_counters( 1860 xfs_mount_t *mp) 1861 { 1862 xfs_icsb_cnts_t *cntp; 1863 int i; 1864 1865 for_each_online_cpu(i) { 1866 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1867 xfs_icsb_lock_cntr(cntp); 1868 } 1869 } 1870 1871 STATIC_INLINE void 1872 xfs_icsb_unlock_all_counters( 1873 xfs_mount_t *mp) 1874 { 1875 xfs_icsb_cnts_t *cntp; 1876 int i; 1877 1878 for_each_online_cpu(i) { 1879 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1880 xfs_icsb_unlock_cntr(cntp); 1881 } 1882 } 1883 1884 STATIC void 1885 xfs_icsb_count( 1886 xfs_mount_t *mp, 1887 xfs_icsb_cnts_t *cnt, 1888 int flags) 1889 { 1890 xfs_icsb_cnts_t *cntp; 1891 int i; 1892 1893 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 1894 1895 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1896 xfs_icsb_lock_all_counters(mp); 1897 1898 for_each_online_cpu(i) { 1899 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1900 cnt->icsb_icount += cntp->icsb_icount; 1901 cnt->icsb_ifree += cntp->icsb_ifree; 1902 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 1903 } 1904 1905 if (!(flags & XFS_ICSB_LAZY_COUNT)) 1906 xfs_icsb_unlock_all_counters(mp); 1907 } 1908 1909 STATIC int 1910 xfs_icsb_counter_disabled( 1911 xfs_mount_t *mp, 1912 xfs_sb_field_t field) 1913 { 1914 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1915 return test_bit(field, &mp->m_icsb_counters); 1916 } 1917 1918 STATIC int 1919 xfs_icsb_disable_counter( 1920 xfs_mount_t *mp, 1921 xfs_sb_field_t field) 1922 { 1923 xfs_icsb_cnts_t cnt; 1924 1925 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1926 1927 /* 1928 * If we are already disabled, then there is nothing to do 1929 * here. We check before locking all the counters to avoid 1930 * the expensive lock operation when being called in the 1931 * slow path and the counter is already disabled. This is 1932 * safe because the only time we set or clear this state is under 1933 * the m_icsb_mutex. 1934 */ 1935 if (xfs_icsb_counter_disabled(mp, field)) 1936 return 0; 1937 1938 xfs_icsb_lock_all_counters(mp); 1939 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 1940 /* drain back to superblock */ 1941 1942 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 1943 switch(field) { 1944 case XFS_SBS_ICOUNT: 1945 mp->m_sb.sb_icount = cnt.icsb_icount; 1946 break; 1947 case XFS_SBS_IFREE: 1948 mp->m_sb.sb_ifree = cnt.icsb_ifree; 1949 break; 1950 case XFS_SBS_FDBLOCKS: 1951 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 1952 break; 1953 default: 1954 BUG(); 1955 } 1956 } 1957 1958 xfs_icsb_unlock_all_counters(mp); 1959 1960 return 0; 1961 } 1962 1963 STATIC void 1964 xfs_icsb_enable_counter( 1965 xfs_mount_t *mp, 1966 xfs_sb_field_t field, 1967 uint64_t count, 1968 uint64_t resid) 1969 { 1970 xfs_icsb_cnts_t *cntp; 1971 int i; 1972 1973 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 1974 1975 xfs_icsb_lock_all_counters(mp); 1976 for_each_online_cpu(i) { 1977 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 1978 switch (field) { 1979 case XFS_SBS_ICOUNT: 1980 cntp->icsb_icount = count + resid; 1981 break; 1982 case XFS_SBS_IFREE: 1983 cntp->icsb_ifree = count + resid; 1984 break; 1985 case XFS_SBS_FDBLOCKS: 1986 cntp->icsb_fdblocks = count + resid; 1987 break; 1988 default: 1989 BUG(); 1990 break; 1991 } 1992 resid = 0; 1993 } 1994 clear_bit(field, &mp->m_icsb_counters); 1995 xfs_icsb_unlock_all_counters(mp); 1996 } 1997 1998 void 1999 xfs_icsb_sync_counters_flags( 2000 xfs_mount_t *mp, 2001 int flags) 2002 { 2003 xfs_icsb_cnts_t cnt; 2004 int s; 2005 2006 /* Pass 1: lock all counters */ 2007 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2008 s = XFS_SB_LOCK(mp); 2009 2010 xfs_icsb_count(mp, &cnt, flags); 2011 2012 /* Step 3: update mp->m_sb fields */ 2013 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2014 mp->m_sb.sb_icount = cnt.icsb_icount; 2015 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2016 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2017 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2018 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2019 2020 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2021 XFS_SB_UNLOCK(mp, s); 2022 } 2023 2024 /* 2025 * Accurate update of per-cpu counters to incore superblock 2026 */ 2027 STATIC void 2028 xfs_icsb_sync_counters( 2029 xfs_mount_t *mp) 2030 { 2031 xfs_icsb_sync_counters_flags(mp, 0); 2032 } 2033 2034 /* 2035 * Balance and enable/disable counters as necessary. 2036 * 2037 * Thresholds for re-enabling counters are somewhat magic. inode counts are 2038 * chosen to be the same number as single on disk allocation chunk per CPU, and 2039 * free blocks is something far enough zero that we aren't going thrash when we 2040 * get near ENOSPC. We also need to supply a minimum we require per cpu to 2041 * prevent looping endlessly when xfs_alloc_space asks for more than will 2042 * be distributed to a single CPU but each CPU has enough blocks to be 2043 * reenabled. 2044 * 2045 * Note that we can be called when counters are already disabled. 2046 * xfs_icsb_disable_counter() optimises the counter locking in this case to 2047 * prevent locking every per-cpu counter needlessly. 2048 */ 2049 2050 #define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 2051 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2052 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2053 STATIC void 2054 xfs_icsb_balance_counter( 2055 xfs_mount_t *mp, 2056 xfs_sb_field_t field, 2057 int flags, 2058 int min_per_cpu) 2059 { 2060 uint64_t count, resid; 2061 int weight = num_online_cpus(); 2062 int s; 2063 uint64_t min = (uint64_t)min_per_cpu; 2064 2065 if (!(flags & XFS_ICSB_SB_LOCKED)) 2066 s = XFS_SB_LOCK(mp); 2067 2068 /* disable counter and sync counter */ 2069 xfs_icsb_disable_counter(mp, field); 2070 2071 /* update counters - first CPU gets residual*/ 2072 switch (field) { 2073 case XFS_SBS_ICOUNT: 2074 count = mp->m_sb.sb_icount; 2075 resid = do_div(count, weight); 2076 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2077 goto out; 2078 break; 2079 case XFS_SBS_IFREE: 2080 count = mp->m_sb.sb_ifree; 2081 resid = do_div(count, weight); 2082 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2083 goto out; 2084 break; 2085 case XFS_SBS_FDBLOCKS: 2086 count = mp->m_sb.sb_fdblocks; 2087 resid = do_div(count, weight); 2088 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2089 goto out; 2090 break; 2091 default: 2092 BUG(); 2093 count = resid = 0; /* quiet, gcc */ 2094 break; 2095 } 2096 2097 xfs_icsb_enable_counter(mp, field, count, resid); 2098 out: 2099 if (!(flags & XFS_ICSB_SB_LOCKED)) 2100 XFS_SB_UNLOCK(mp, s); 2101 } 2102 2103 int 2104 xfs_icsb_modify_counters( 2105 xfs_mount_t *mp, 2106 xfs_sb_field_t field, 2107 int64_t delta, 2108 int rsvd) 2109 { 2110 xfs_icsb_cnts_t *icsbp; 2111 long long lcounter; /* long counter for 64 bit fields */ 2112 int cpu, ret = 0, s; 2113 2114 might_sleep(); 2115 again: 2116 cpu = get_cpu(); 2117 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); 2118 2119 /* 2120 * if the counter is disabled, go to slow path 2121 */ 2122 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2123 goto slow_path; 2124 xfs_icsb_lock_cntr(icsbp); 2125 if (unlikely(xfs_icsb_counter_disabled(mp, field))) { 2126 xfs_icsb_unlock_cntr(icsbp); 2127 goto slow_path; 2128 } 2129 2130 switch (field) { 2131 case XFS_SBS_ICOUNT: 2132 lcounter = icsbp->icsb_icount; 2133 lcounter += delta; 2134 if (unlikely(lcounter < 0)) 2135 goto balance_counter; 2136 icsbp->icsb_icount = lcounter; 2137 break; 2138 2139 case XFS_SBS_IFREE: 2140 lcounter = icsbp->icsb_ifree; 2141 lcounter += delta; 2142 if (unlikely(lcounter < 0)) 2143 goto balance_counter; 2144 icsbp->icsb_ifree = lcounter; 2145 break; 2146 2147 case XFS_SBS_FDBLOCKS: 2148 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2149 2150 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 2151 lcounter += delta; 2152 if (unlikely(lcounter < 0)) 2153 goto balance_counter; 2154 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 2155 break; 2156 default: 2157 BUG(); 2158 break; 2159 } 2160 xfs_icsb_unlock_cntr(icsbp); 2161 put_cpu(); 2162 return 0; 2163 2164 slow_path: 2165 put_cpu(); 2166 2167 /* 2168 * serialise with a mutex so we don't burn lots of cpu on 2169 * the superblock lock. We still need to hold the superblock 2170 * lock, however, when we modify the global structures. 2171 */ 2172 xfs_icsb_lock(mp); 2173 2174 /* 2175 * Now running atomically. 2176 * 2177 * If the counter is enabled, someone has beaten us to rebalancing. 2178 * Drop the lock and try again in the fast path.... 2179 */ 2180 if (!(xfs_icsb_counter_disabled(mp, field))) { 2181 xfs_icsb_unlock(mp); 2182 goto again; 2183 } 2184 2185 /* 2186 * The counter is currently disabled. Because we are 2187 * running atomically here, we know a rebalance cannot 2188 * be in progress. Hence we can go straight to operating 2189 * on the global superblock. We do not call xfs_mod_incore_sb() 2190 * here even though we need to get the SB_LOCK. Doing so 2191 * will cause us to re-enter this function and deadlock. 2192 * Hence we get the SB_LOCK ourselves and then call 2193 * xfs_mod_incore_sb_unlocked() as the unlocked path operates 2194 * directly on the global counters. 2195 */ 2196 s = XFS_SB_LOCK(mp); 2197 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2198 XFS_SB_UNLOCK(mp, s); 2199 2200 /* 2201 * Now that we've modified the global superblock, we 2202 * may be able to re-enable the distributed counters 2203 * (e.g. lots of space just got freed). After that 2204 * we are done. 2205 */ 2206 if (ret != ENOSPC) 2207 xfs_icsb_balance_counter(mp, field, 0, 0); 2208 xfs_icsb_unlock(mp); 2209 return ret; 2210 2211 balance_counter: 2212 xfs_icsb_unlock_cntr(icsbp); 2213 put_cpu(); 2214 2215 /* 2216 * We may have multiple threads here if multiple per-cpu 2217 * counters run dry at the same time. This will mean we can 2218 * do more balances than strictly necessary but it is not 2219 * the common slowpath case. 2220 */ 2221 xfs_icsb_lock(mp); 2222 2223 /* 2224 * running atomically. 2225 * 2226 * This will leave the counter in the correct state for future 2227 * accesses. After the rebalance, we simply try again and our retry 2228 * will either succeed through the fast path or slow path without 2229 * another balance operation being required. 2230 */ 2231 xfs_icsb_balance_counter(mp, field, 0, delta); 2232 xfs_icsb_unlock(mp); 2233 goto again; 2234 } 2235 2236 #endif 2237