1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_dir2.h" 28 #include "xfs_dmapi.h" 29 #include "xfs_mount.h" 30 #include "xfs_bmap_btree.h" 31 #include "xfs_alloc_btree.h" 32 #include "xfs_ialloc_btree.h" 33 #include "xfs_dir2_sf.h" 34 #include "xfs_attr_sf.h" 35 #include "xfs_dinode.h" 36 #include "xfs_inode.h" 37 #include "xfs_btree.h" 38 #include "xfs_ialloc.h" 39 #include "xfs_alloc.h" 40 #include "xfs_rtalloc.h" 41 #include "xfs_bmap.h" 42 #include "xfs_error.h" 43 #include "xfs_rw.h" 44 #include "xfs_quota.h" 45 #include "xfs_fsops.h" 46 47 STATIC void xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48 STATIC int xfs_uuid_mount(xfs_mount_t *); 49 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 50 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 51 52 53 #ifdef HAVE_PERCPU_SB 54 STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55 STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56 int, int); 57 STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58 STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 59 int64_t, int); 60 STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 61 62 #else 63 64 #define xfs_icsb_destroy_counters(mp) do { } while (0) 65 #define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 66 #define xfs_icsb_sync_counters(mp) do { } while (0) 67 #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 68 69 #endif 70 71 static const struct { 72 short offset; 73 short type; /* 0 = integer 74 * 1 = binary / string (no translation) 75 */ 76 } xfs_sb_info[] = { 77 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 78 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 79 { offsetof(xfs_sb_t, sb_dblocks), 0 }, 80 { offsetof(xfs_sb_t, sb_rblocks), 0 }, 81 { offsetof(xfs_sb_t, sb_rextents), 0 }, 82 { offsetof(xfs_sb_t, sb_uuid), 1 }, 83 { offsetof(xfs_sb_t, sb_logstart), 0 }, 84 { offsetof(xfs_sb_t, sb_rootino), 0 }, 85 { offsetof(xfs_sb_t, sb_rbmino), 0 }, 86 { offsetof(xfs_sb_t, sb_rsumino), 0 }, 87 { offsetof(xfs_sb_t, sb_rextsize), 0 }, 88 { offsetof(xfs_sb_t, sb_agblocks), 0 }, 89 { offsetof(xfs_sb_t, sb_agcount), 0 }, 90 { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 91 { offsetof(xfs_sb_t, sb_logblocks), 0 }, 92 { offsetof(xfs_sb_t, sb_versionnum), 0 }, 93 { offsetof(xfs_sb_t, sb_sectsize), 0 }, 94 { offsetof(xfs_sb_t, sb_inodesize), 0 }, 95 { offsetof(xfs_sb_t, sb_inopblock), 0 }, 96 { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 97 { offsetof(xfs_sb_t, sb_blocklog), 0 }, 98 { offsetof(xfs_sb_t, sb_sectlog), 0 }, 99 { offsetof(xfs_sb_t, sb_inodelog), 0 }, 100 { offsetof(xfs_sb_t, sb_inopblog), 0 }, 101 { offsetof(xfs_sb_t, sb_agblklog), 0 }, 102 { offsetof(xfs_sb_t, sb_rextslog), 0 }, 103 { offsetof(xfs_sb_t, sb_inprogress), 0 }, 104 { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 105 { offsetof(xfs_sb_t, sb_icount), 0 }, 106 { offsetof(xfs_sb_t, sb_ifree), 0 }, 107 { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 108 { offsetof(xfs_sb_t, sb_frextents), 0 }, 109 { offsetof(xfs_sb_t, sb_uquotino), 0 }, 110 { offsetof(xfs_sb_t, sb_gquotino), 0 }, 111 { offsetof(xfs_sb_t, sb_qflags), 0 }, 112 { offsetof(xfs_sb_t, sb_flags), 0 }, 113 { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 114 { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 115 { offsetof(xfs_sb_t, sb_unit), 0 }, 116 { offsetof(xfs_sb_t, sb_width), 0 }, 117 { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 118 { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 119 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 120 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 121 { offsetof(xfs_sb_t, sb_features2), 0 }, 122 { offsetof(xfs_sb_t, sb_bad_features2), 0 }, 123 { sizeof(xfs_sb_t), 0 } 124 }; 125 126 /* 127 * Return a pointer to an initialized xfs_mount structure. 128 */ 129 xfs_mount_t * 130 xfs_mount_init(void) 131 { 132 xfs_mount_t *mp; 133 134 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 135 136 if (xfs_icsb_init_counters(mp)) { 137 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 138 } 139 140 spin_lock_init(&mp->m_sb_lock); 141 mutex_init(&mp->m_ilock); 142 mutex_init(&mp->m_growlock); 143 atomic_set(&mp->m_active_trans, 0); 144 145 return mp; 146 } 147 148 /* 149 * Free up the resources associated with a mount structure. Assume that 150 * the structure was initially zeroed, so we can tell which fields got 151 * initialized. 152 */ 153 void 154 xfs_mount_free( 155 xfs_mount_t *mp) 156 { 157 if (mp->m_perag) { 158 int agno; 159 160 for (agno = 0; agno < mp->m_maxagi; agno++) 161 if (mp->m_perag[agno].pagb_list) 162 kmem_free(mp->m_perag[agno].pagb_list, 163 sizeof(xfs_perag_busy_t) * 164 XFS_PAGB_NUM_SLOTS); 165 kmem_free(mp->m_perag, 166 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 167 } 168 169 spinlock_destroy(&mp->m_ail_lock); 170 spinlock_destroy(&mp->m_sb_lock); 171 mutex_destroy(&mp->m_ilock); 172 mutex_destroy(&mp->m_growlock); 173 if (mp->m_quotainfo) 174 XFS_QM_DONE(mp); 175 176 if (mp->m_fsname != NULL) 177 kmem_free(mp->m_fsname, mp->m_fsname_len); 178 if (mp->m_rtname != NULL) 179 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 180 if (mp->m_logname != NULL) 181 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 182 183 xfs_icsb_destroy_counters(mp); 184 } 185 186 /* 187 * Check size of device based on the (data/realtime) block count. 188 * Note: this check is used by the growfs code as well as mount. 189 */ 190 int 191 xfs_sb_validate_fsb_count( 192 xfs_sb_t *sbp, 193 __uint64_t nblocks) 194 { 195 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 196 ASSERT(sbp->sb_blocklog >= BBSHIFT); 197 198 #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 199 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 200 return E2BIG; 201 #else /* Limited by UINT_MAX of sectors */ 202 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) 203 return E2BIG; 204 #endif 205 return 0; 206 } 207 208 /* 209 * Check the validity of the SB found. 210 */ 211 STATIC int 212 xfs_mount_validate_sb( 213 xfs_mount_t *mp, 214 xfs_sb_t *sbp, 215 int flags) 216 { 217 /* 218 * If the log device and data device have the 219 * same device number, the log is internal. 220 * Consequently, the sb_logstart should be non-zero. If 221 * we have a zero sb_logstart in this case, we may be trying to mount 222 * a volume filesystem in a non-volume manner. 223 */ 224 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 225 xfs_fs_mount_cmn_err(flags, "bad magic number"); 226 return XFS_ERROR(EWRONGFS); 227 } 228 229 if (!xfs_sb_good_version(sbp)) { 230 xfs_fs_mount_cmn_err(flags, "bad version"); 231 return XFS_ERROR(EWRONGFS); 232 } 233 234 if (unlikely( 235 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 236 xfs_fs_mount_cmn_err(flags, 237 "filesystem is marked as having an external log; " 238 "specify logdev on the\nmount command line."); 239 return XFS_ERROR(EINVAL); 240 } 241 242 if (unlikely( 243 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 244 xfs_fs_mount_cmn_err(flags, 245 "filesystem is marked as having an internal log; " 246 "do not specify logdev on\nthe mount command line."); 247 return XFS_ERROR(EINVAL); 248 } 249 250 /* 251 * More sanity checking. These were stolen directly from 252 * xfs_repair. 253 */ 254 if (unlikely( 255 sbp->sb_agcount <= 0 || 256 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 257 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 258 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 259 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 260 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 261 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 262 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 263 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 264 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 265 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 266 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 267 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 268 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 269 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 270 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 271 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 272 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 273 return XFS_ERROR(EFSCORRUPTED); 274 } 275 276 /* 277 * Sanity check AG count, size fields against data size field 278 */ 279 if (unlikely( 280 sbp->sb_dblocks == 0 || 281 sbp->sb_dblocks > 282 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 283 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 284 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 285 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 286 return XFS_ERROR(EFSCORRUPTED); 287 } 288 289 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 290 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 291 xfs_fs_mount_cmn_err(flags, 292 "file system too large to be mounted on this system."); 293 return XFS_ERROR(E2BIG); 294 } 295 296 if (unlikely(sbp->sb_inprogress)) { 297 xfs_fs_mount_cmn_err(flags, "file system busy"); 298 return XFS_ERROR(EFSCORRUPTED); 299 } 300 301 /* 302 * Version 1 directory format has never worked on Linux. 303 */ 304 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { 305 xfs_fs_mount_cmn_err(flags, 306 "file system using version 1 directory format"); 307 return XFS_ERROR(ENOSYS); 308 } 309 310 /* 311 * Until this is fixed only page-sized or smaller data blocks work. 312 */ 313 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 314 xfs_fs_mount_cmn_err(flags, 315 "file system with blocksize %d bytes", 316 sbp->sb_blocksize); 317 xfs_fs_mount_cmn_err(flags, 318 "only pagesize (%ld) or less will currently work.", 319 PAGE_SIZE); 320 return XFS_ERROR(ENOSYS); 321 } 322 323 return 0; 324 } 325 326 STATIC void 327 xfs_initialize_perag_icache( 328 xfs_perag_t *pag) 329 { 330 if (!pag->pag_ici_init) { 331 rwlock_init(&pag->pag_ici_lock); 332 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 333 pag->pag_ici_init = 1; 334 } 335 } 336 337 xfs_agnumber_t 338 xfs_initialize_perag( 339 xfs_mount_t *mp, 340 xfs_agnumber_t agcount) 341 { 342 xfs_agnumber_t index, max_metadata; 343 xfs_perag_t *pag; 344 xfs_agino_t agino; 345 xfs_ino_t ino; 346 xfs_sb_t *sbp = &mp->m_sb; 347 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 348 349 /* Check to see if the filesystem can overflow 32 bit inodes */ 350 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 351 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 352 353 /* Clear the mount flag if no inode can overflow 32 bits 354 * on this filesystem, or if specifically requested.. 355 */ 356 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { 357 mp->m_flags |= XFS_MOUNT_32BITINODES; 358 } else { 359 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 360 } 361 362 /* If we can overflow then setup the ag headers accordingly */ 363 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 364 /* Calculate how much should be reserved for inodes to 365 * meet the max inode percentage. 366 */ 367 if (mp->m_maxicount) { 368 __uint64_t icount; 369 370 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 371 do_div(icount, 100); 372 icount += sbp->sb_agblocks - 1; 373 do_div(icount, sbp->sb_agblocks); 374 max_metadata = icount; 375 } else { 376 max_metadata = agcount; 377 } 378 for (index = 0; index < agcount; index++) { 379 ino = XFS_AGINO_TO_INO(mp, index, agino); 380 if (ino > max_inum) { 381 index++; 382 break; 383 } 384 385 /* This ag is preferred for inodes */ 386 pag = &mp->m_perag[index]; 387 pag->pagi_inodeok = 1; 388 if (index < max_metadata) 389 pag->pagf_metadata = 1; 390 xfs_initialize_perag_icache(pag); 391 } 392 } else { 393 /* Setup default behavior for smaller filesystems */ 394 for (index = 0; index < agcount; index++) { 395 pag = &mp->m_perag[index]; 396 pag->pagi_inodeok = 1; 397 xfs_initialize_perag_icache(pag); 398 } 399 } 400 return index; 401 } 402 403 void 404 xfs_sb_from_disk( 405 xfs_sb_t *to, 406 xfs_dsb_t *from) 407 { 408 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 409 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 410 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 411 to->sb_rblocks = be64_to_cpu(from->sb_rblocks); 412 to->sb_rextents = be64_to_cpu(from->sb_rextents); 413 memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); 414 to->sb_logstart = be64_to_cpu(from->sb_logstart); 415 to->sb_rootino = be64_to_cpu(from->sb_rootino); 416 to->sb_rbmino = be64_to_cpu(from->sb_rbmino); 417 to->sb_rsumino = be64_to_cpu(from->sb_rsumino); 418 to->sb_rextsize = be32_to_cpu(from->sb_rextsize); 419 to->sb_agblocks = be32_to_cpu(from->sb_agblocks); 420 to->sb_agcount = be32_to_cpu(from->sb_agcount); 421 to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); 422 to->sb_logblocks = be32_to_cpu(from->sb_logblocks); 423 to->sb_versionnum = be16_to_cpu(from->sb_versionnum); 424 to->sb_sectsize = be16_to_cpu(from->sb_sectsize); 425 to->sb_inodesize = be16_to_cpu(from->sb_inodesize); 426 to->sb_inopblock = be16_to_cpu(from->sb_inopblock); 427 memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); 428 to->sb_blocklog = from->sb_blocklog; 429 to->sb_sectlog = from->sb_sectlog; 430 to->sb_inodelog = from->sb_inodelog; 431 to->sb_inopblog = from->sb_inopblog; 432 to->sb_agblklog = from->sb_agblklog; 433 to->sb_rextslog = from->sb_rextslog; 434 to->sb_inprogress = from->sb_inprogress; 435 to->sb_imax_pct = from->sb_imax_pct; 436 to->sb_icount = be64_to_cpu(from->sb_icount); 437 to->sb_ifree = be64_to_cpu(from->sb_ifree); 438 to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); 439 to->sb_frextents = be64_to_cpu(from->sb_frextents); 440 to->sb_uquotino = be64_to_cpu(from->sb_uquotino); 441 to->sb_gquotino = be64_to_cpu(from->sb_gquotino); 442 to->sb_qflags = be16_to_cpu(from->sb_qflags); 443 to->sb_flags = from->sb_flags; 444 to->sb_shared_vn = from->sb_shared_vn; 445 to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); 446 to->sb_unit = be32_to_cpu(from->sb_unit); 447 to->sb_width = be32_to_cpu(from->sb_width); 448 to->sb_dirblklog = from->sb_dirblklog; 449 to->sb_logsectlog = from->sb_logsectlog; 450 to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); 451 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 452 to->sb_features2 = be32_to_cpu(from->sb_features2); 453 to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); 454 } 455 456 /* 457 * Copy in core superblock to ondisk one. 458 * 459 * The fields argument is mask of superblock fields to copy. 460 */ 461 void 462 xfs_sb_to_disk( 463 xfs_dsb_t *to, 464 xfs_sb_t *from, 465 __int64_t fields) 466 { 467 xfs_caddr_t to_ptr = (xfs_caddr_t)to; 468 xfs_caddr_t from_ptr = (xfs_caddr_t)from; 469 xfs_sb_field_t f; 470 int first; 471 int size; 472 473 ASSERT(fields); 474 if (!fields) 475 return; 476 477 while (fields) { 478 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 479 first = xfs_sb_info[f].offset; 480 size = xfs_sb_info[f + 1].offset - first; 481 482 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 483 484 if (size == 1 || xfs_sb_info[f].type == 1) { 485 memcpy(to_ptr + first, from_ptr + first, size); 486 } else { 487 switch (size) { 488 case 2: 489 *(__be16 *)(to_ptr + first) = 490 cpu_to_be16(*(__u16 *)(from_ptr + first)); 491 break; 492 case 4: 493 *(__be32 *)(to_ptr + first) = 494 cpu_to_be32(*(__u32 *)(from_ptr + first)); 495 break; 496 case 8: 497 *(__be64 *)(to_ptr + first) = 498 cpu_to_be64(*(__u64 *)(from_ptr + first)); 499 break; 500 default: 501 ASSERT(0); 502 } 503 } 504 505 fields &= ~(1LL << f); 506 } 507 } 508 509 /* 510 * xfs_readsb 511 * 512 * Does the initial read of the superblock. 513 */ 514 int 515 xfs_readsb(xfs_mount_t *mp, int flags) 516 { 517 unsigned int sector_size; 518 unsigned int extra_flags; 519 xfs_buf_t *bp; 520 int error; 521 522 ASSERT(mp->m_sb_bp == NULL); 523 ASSERT(mp->m_ddev_targp != NULL); 524 525 /* 526 * Allocate a (locked) buffer to hold the superblock. 527 * This will be kept around at all times to optimize 528 * access to the superblock. 529 */ 530 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 531 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 532 533 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 534 BTOBB(sector_size), extra_flags); 535 if (!bp || XFS_BUF_ISERROR(bp)) { 536 xfs_fs_mount_cmn_err(flags, "SB read failed"); 537 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 538 goto fail; 539 } 540 ASSERT(XFS_BUF_ISBUSY(bp)); 541 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 542 543 /* 544 * Initialize the mount structure from the superblock. 545 * But first do some basic consistency checking. 546 */ 547 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 548 549 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 550 if (error) { 551 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 552 goto fail; 553 } 554 555 /* 556 * We must be able to do sector-sized and sector-aligned IO. 557 */ 558 if (sector_size > mp->m_sb.sb_sectsize) { 559 xfs_fs_mount_cmn_err(flags, 560 "device supports only %u byte sectors (not %u)", 561 sector_size, mp->m_sb.sb_sectsize); 562 error = ENOSYS; 563 goto fail; 564 } 565 566 /* 567 * If device sector size is smaller than the superblock size, 568 * re-read the superblock so the buffer is correctly sized. 569 */ 570 if (sector_size < mp->m_sb.sb_sectsize) { 571 XFS_BUF_UNMANAGE(bp); 572 xfs_buf_relse(bp); 573 sector_size = mp->m_sb.sb_sectsize; 574 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 575 BTOBB(sector_size), extra_flags); 576 if (!bp || XFS_BUF_ISERROR(bp)) { 577 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 578 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 579 goto fail; 580 } 581 ASSERT(XFS_BUF_ISBUSY(bp)); 582 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 583 } 584 585 /* Initialize per-cpu counters */ 586 xfs_icsb_reinit_counters(mp); 587 588 mp->m_sb_bp = bp; 589 xfs_buf_relse(bp); 590 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 591 return 0; 592 593 fail: 594 if (bp) { 595 XFS_BUF_UNMANAGE(bp); 596 xfs_buf_relse(bp); 597 } 598 return error; 599 } 600 601 602 /* 603 * xfs_mount_common 604 * 605 * Mount initialization code establishing various mount 606 * fields from the superblock associated with the given 607 * mount structure 608 */ 609 STATIC void 610 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 611 { 612 int i; 613 614 mp->m_agfrotor = mp->m_agirotor = 0; 615 spin_lock_init(&mp->m_agirotor_lock); 616 mp->m_maxagi = mp->m_sb.sb_agcount; 617 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 618 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 619 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 620 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 621 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 622 mp->m_litino = sbp->sb_inodesize - 623 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 624 mp->m_blockmask = sbp->sb_blocksize - 1; 625 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 626 mp->m_blockwmask = mp->m_blockwsize - 1; 627 INIT_LIST_HEAD(&mp->m_del_inodes); 628 629 /* 630 * Setup for attributes, in case they get created. 631 * This value is for inodes getting attributes for the first time, 632 * the per-inode value is for old attribute values. 633 */ 634 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 635 switch (sbp->sb_inodesize) { 636 case 256: 637 mp->m_attroffset = XFS_LITINO(mp) - 638 XFS_BMDR_SPACE_CALC(MINABTPTRS); 639 break; 640 case 512: 641 case 1024: 642 case 2048: 643 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 644 break; 645 default: 646 ASSERT(0); 647 } 648 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 649 650 for (i = 0; i < 2; i++) { 651 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 652 xfs_alloc, i == 0); 653 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 654 xfs_alloc, i == 0); 655 } 656 for (i = 0; i < 2; i++) { 657 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 658 xfs_bmbt, i == 0); 659 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 660 xfs_bmbt, i == 0); 661 } 662 for (i = 0; i < 2; i++) { 663 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 664 xfs_inobt, i == 0); 665 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 666 xfs_inobt, i == 0); 667 } 668 669 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 670 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 671 sbp->sb_inopblock); 672 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 673 } 674 675 /* 676 * xfs_initialize_perag_data 677 * 678 * Read in each per-ag structure so we can count up the number of 679 * allocated inodes, free inodes and used filesystem blocks as this 680 * information is no longer persistent in the superblock. Once we have 681 * this information, write it into the in-core superblock structure. 682 */ 683 STATIC int 684 xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) 685 { 686 xfs_agnumber_t index; 687 xfs_perag_t *pag; 688 xfs_sb_t *sbp = &mp->m_sb; 689 uint64_t ifree = 0; 690 uint64_t ialloc = 0; 691 uint64_t bfree = 0; 692 uint64_t bfreelst = 0; 693 uint64_t btree = 0; 694 int error; 695 696 for (index = 0; index < agcount; index++) { 697 /* 698 * read the agf, then the agi. This gets us 699 * all the inforamtion we need and populates the 700 * per-ag structures for us. 701 */ 702 error = xfs_alloc_pagf_init(mp, NULL, index, 0); 703 if (error) 704 return error; 705 706 error = xfs_ialloc_pagi_init(mp, NULL, index); 707 if (error) 708 return error; 709 pag = &mp->m_perag[index]; 710 ifree += pag->pagi_freecount; 711 ialloc += pag->pagi_count; 712 bfree += pag->pagf_freeblks; 713 bfreelst += pag->pagf_flcount; 714 btree += pag->pagf_btreeblks; 715 } 716 /* 717 * Overwrite incore superblock counters with just-read data 718 */ 719 spin_lock(&mp->m_sb_lock); 720 sbp->sb_ifree = ifree; 721 sbp->sb_icount = ialloc; 722 sbp->sb_fdblocks = bfree + bfreelst + btree; 723 spin_unlock(&mp->m_sb_lock); 724 725 /* Fixup the per-cpu counters as well. */ 726 xfs_icsb_reinit_counters(mp); 727 728 return 0; 729 } 730 731 /* 732 * Update alignment values based on mount options and sb values 733 */ 734 STATIC int 735 xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) 736 { 737 xfs_sb_t *sbp = &(mp->m_sb); 738 739 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 740 /* 741 * If stripe unit and stripe width are not multiples 742 * of the fs blocksize turn off alignment. 743 */ 744 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 745 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 746 if (mp->m_flags & XFS_MOUNT_RETERR) { 747 cmn_err(CE_WARN, 748 "XFS: alignment check 1 failed"); 749 return XFS_ERROR(EINVAL); 750 } 751 mp->m_dalign = mp->m_swidth = 0; 752 } else { 753 /* 754 * Convert the stripe unit and width to FSBs. 755 */ 756 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 757 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 758 if (mp->m_flags & XFS_MOUNT_RETERR) { 759 return XFS_ERROR(EINVAL); 760 } 761 xfs_fs_cmn_err(CE_WARN, mp, 762 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 763 mp->m_dalign, mp->m_swidth, 764 sbp->sb_agblocks); 765 766 mp->m_dalign = 0; 767 mp->m_swidth = 0; 768 } else if (mp->m_dalign) { 769 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 770 } else { 771 if (mp->m_flags & XFS_MOUNT_RETERR) { 772 xfs_fs_cmn_err(CE_WARN, mp, 773 "stripe alignment turned off: sunit(%d) less than bsize(%d)", 774 mp->m_dalign, 775 mp->m_blockmask +1); 776 return XFS_ERROR(EINVAL); 777 } 778 mp->m_swidth = 0; 779 } 780 } 781 782 /* 783 * Update superblock with new values 784 * and log changes 785 */ 786 if (xfs_sb_version_hasdalign(sbp)) { 787 if (sbp->sb_unit != mp->m_dalign) { 788 sbp->sb_unit = mp->m_dalign; 789 *update_flags |= XFS_SB_UNIT; 790 } 791 if (sbp->sb_width != mp->m_swidth) { 792 sbp->sb_width = mp->m_swidth; 793 *update_flags |= XFS_SB_WIDTH; 794 } 795 } 796 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 797 xfs_sb_version_hasdalign(&mp->m_sb)) { 798 mp->m_dalign = sbp->sb_unit; 799 mp->m_swidth = sbp->sb_width; 800 } 801 802 return 0; 803 } 804 805 /* 806 * Set the maximum inode count for this filesystem 807 */ 808 STATIC void 809 xfs_set_maxicount(xfs_mount_t *mp) 810 { 811 xfs_sb_t *sbp = &(mp->m_sb); 812 __uint64_t icount; 813 814 if (sbp->sb_imax_pct) { 815 /* 816 * Make sure the maximum inode count is a multiple 817 * of the units we allocate inodes in. 818 */ 819 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 820 do_div(icount, 100); 821 do_div(icount, mp->m_ialloc_blks); 822 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 823 sbp->sb_inopblog; 824 } else { 825 mp->m_maxicount = 0; 826 } 827 } 828 829 /* 830 * Set the default minimum read and write sizes unless 831 * already specified in a mount option. 832 * We use smaller I/O sizes when the file system 833 * is being used for NFS service (wsync mount option). 834 */ 835 STATIC void 836 xfs_set_rw_sizes(xfs_mount_t *mp) 837 { 838 xfs_sb_t *sbp = &(mp->m_sb); 839 int readio_log, writeio_log; 840 841 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 842 if (mp->m_flags & XFS_MOUNT_WSYNC) { 843 readio_log = XFS_WSYNC_READIO_LOG; 844 writeio_log = XFS_WSYNC_WRITEIO_LOG; 845 } else { 846 readio_log = XFS_READIO_LOG_LARGE; 847 writeio_log = XFS_WRITEIO_LOG_LARGE; 848 } 849 } else { 850 readio_log = mp->m_readio_log; 851 writeio_log = mp->m_writeio_log; 852 } 853 854 if (sbp->sb_blocklog > readio_log) { 855 mp->m_readio_log = sbp->sb_blocklog; 856 } else { 857 mp->m_readio_log = readio_log; 858 } 859 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 860 if (sbp->sb_blocklog > writeio_log) { 861 mp->m_writeio_log = sbp->sb_blocklog; 862 } else { 863 mp->m_writeio_log = writeio_log; 864 } 865 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 866 } 867 868 /* 869 * Set whether we're using inode alignment. 870 */ 871 STATIC void 872 xfs_set_inoalignment(xfs_mount_t *mp) 873 { 874 if (xfs_sb_version_hasalign(&mp->m_sb) && 875 mp->m_sb.sb_inoalignmt >= 876 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 877 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 878 else 879 mp->m_inoalign_mask = 0; 880 /* 881 * If we are using stripe alignment, check whether 882 * the stripe unit is a multiple of the inode alignment 883 */ 884 if (mp->m_dalign && mp->m_inoalign_mask && 885 !(mp->m_dalign & mp->m_inoalign_mask)) 886 mp->m_sinoalign = mp->m_dalign; 887 else 888 mp->m_sinoalign = 0; 889 } 890 891 /* 892 * Check that the data (and log if separate) are an ok size. 893 */ 894 STATIC int 895 xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) 896 { 897 xfs_buf_t *bp; 898 xfs_daddr_t d; 899 int error; 900 901 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 902 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 903 cmn_err(CE_WARN, "XFS: size check 1 failed"); 904 return XFS_ERROR(E2BIG); 905 } 906 error = xfs_read_buf(mp, mp->m_ddev_targp, 907 d - XFS_FSS_TO_BB(mp, 1), 908 XFS_FSS_TO_BB(mp, 1), 0, &bp); 909 if (!error) { 910 xfs_buf_relse(bp); 911 } else { 912 cmn_err(CE_WARN, "XFS: size check 2 failed"); 913 if (error == ENOSPC) 914 error = XFS_ERROR(E2BIG); 915 return error; 916 } 917 918 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 919 mp->m_logdev_targp != mp->m_ddev_targp) { 920 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 921 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 922 cmn_err(CE_WARN, "XFS: size check 3 failed"); 923 return XFS_ERROR(E2BIG); 924 } 925 error = xfs_read_buf(mp, mp->m_logdev_targp, 926 d - XFS_FSB_TO_BB(mp, 1), 927 XFS_FSB_TO_BB(mp, 1), 0, &bp); 928 if (!error) { 929 xfs_buf_relse(bp); 930 } else { 931 cmn_err(CE_WARN, "XFS: size check 3 failed"); 932 if (error == ENOSPC) 933 error = XFS_ERROR(E2BIG); 934 return error; 935 } 936 } 937 return 0; 938 } 939 940 /* 941 * xfs_mountfs 942 * 943 * This function does the following on an initial mount of a file system: 944 * - reads the superblock from disk and init the mount struct 945 * - if we're a 32-bit kernel, do a size check on the superblock 946 * so we don't mount terabyte filesystems 947 * - init mount struct realtime fields 948 * - allocate inode hash table for fs 949 * - init directory manager 950 * - perform recovery and init the log manager 951 */ 952 int 953 xfs_mountfs( 954 xfs_mount_t *mp, 955 int mfsi_flags) 956 { 957 xfs_sb_t *sbp = &(mp->m_sb); 958 xfs_inode_t *rip; 959 bhv_vnode_t *rvp = NULL; 960 __uint64_t resblks; 961 __int64_t update_flags = 0LL; 962 uint quotamount, quotaflags; 963 int agno; 964 int uuid_mounted = 0; 965 int error = 0; 966 967 if (mp->m_sb_bp == NULL) { 968 error = xfs_readsb(mp, mfsi_flags); 969 if (error) 970 return error; 971 } 972 xfs_mount_common(mp, sbp); 973 974 /* 975 * Check for a mismatched features2 values. Older kernels 976 * read & wrote into the wrong sb offset for sb_features2 977 * on some platforms due to xfs_sb_t not being 64bit size aligned 978 * when sb_features2 was added, which made older superblock 979 * reading/writing routines swap it as a 64-bit value. 980 * 981 * For backwards compatibility, we make both slots equal. 982 * 983 * If we detect a mismatched field, we OR the set bits into the 984 * existing features2 field in case it has already been modified; we 985 * don't want to lose any features. We then update the bad location 986 * with the ORed value so that older kernels will see any features2 987 * flags, and mark the two fields as needing updates once the 988 * transaction subsystem is online. 989 */ 990 if (xfs_sb_has_mismatched_features2(sbp)) { 991 cmn_err(CE_WARN, 992 "XFS: correcting sb_features alignment problem"); 993 sbp->sb_features2 |= sbp->sb_bad_features2; 994 sbp->sb_bad_features2 = sbp->sb_features2; 995 update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; 996 997 /* 998 * Re-check for ATTR2 in case it was found in bad_features2 999 * slot. 1000 */ 1001 if (xfs_sb_version_hasattr2(&mp->m_sb)) 1002 mp->m_flags |= XFS_MOUNT_ATTR2; 1003 1004 } 1005 1006 /* 1007 * Check if sb_agblocks is aligned at stripe boundary 1008 * If sb_agblocks is NOT aligned turn off m_dalign since 1009 * allocator alignment is within an ag, therefore ag has 1010 * to be aligned at stripe boundary. 1011 */ 1012 error = xfs_update_alignment(mp, mfsi_flags, &update_flags); 1013 if (error) 1014 goto error1; 1015 1016 xfs_alloc_compute_maxlevels(mp); 1017 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 1018 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 1019 xfs_ialloc_compute_maxlevels(mp); 1020 1021 xfs_set_maxicount(mp); 1022 1023 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 1024 1025 /* 1026 * XFS uses the uuid from the superblock as the unique 1027 * identifier for fsid. We can not use the uuid from the volume 1028 * since a single partition filesystem is identical to a single 1029 * partition volume/filesystem. 1030 */ 1031 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 1032 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 1033 if (xfs_uuid_mount(mp)) { 1034 error = XFS_ERROR(EINVAL); 1035 goto error1; 1036 } 1037 uuid_mounted=1; 1038 } 1039 1040 /* 1041 * Set the minimum read and write sizes 1042 */ 1043 xfs_set_rw_sizes(mp); 1044 1045 /* 1046 * Set the inode cluster size. 1047 * This may still be overridden by the file system 1048 * block size if it is larger than the chosen cluster size. 1049 */ 1050 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 1051 1052 /* 1053 * Set inode alignment fields 1054 */ 1055 xfs_set_inoalignment(mp); 1056 1057 /* 1058 * Check that the data (and log if separate) are an ok size. 1059 */ 1060 error = xfs_check_sizes(mp, mfsi_flags); 1061 if (error) 1062 goto error1; 1063 1064 /* 1065 * Initialize realtime fields in the mount structure 1066 */ 1067 error = xfs_rtmount_init(mp); 1068 if (error) { 1069 cmn_err(CE_WARN, "XFS: RT mount failed"); 1070 goto error1; 1071 } 1072 1073 /* 1074 * For client case we are done now 1075 */ 1076 if (mfsi_flags & XFS_MFSI_CLIENT) { 1077 return 0; 1078 } 1079 1080 /* 1081 * Copies the low order bits of the timestamp and the randomly 1082 * set "sequence" number out of a UUID. 1083 */ 1084 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 1085 1086 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 1087 1088 xfs_dir_mount(mp); 1089 1090 /* 1091 * Initialize the attribute manager's entries. 1092 */ 1093 mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 1094 1095 /* 1096 * Initialize the precomputed transaction reservations values. 1097 */ 1098 xfs_trans_init(mp); 1099 1100 /* 1101 * Allocate and initialize the per-ag data. 1102 */ 1103 init_rwsem(&mp->m_peraglock); 1104 mp->m_perag = 1105 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 1106 1107 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); 1108 1109 /* 1110 * log's mount-time initialization. Perform 1st part recovery if needed 1111 */ 1112 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 1113 error = xfs_log_mount(mp, mp->m_logdev_targp, 1114 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1115 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1116 if (error) { 1117 cmn_err(CE_WARN, "XFS: log mount failed"); 1118 goto error2; 1119 } 1120 } else { /* No log has been defined */ 1121 cmn_err(CE_WARN, "XFS: no log defined"); 1122 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 1123 error = XFS_ERROR(EFSCORRUPTED); 1124 goto error2; 1125 } 1126 1127 /* 1128 * Now the log is mounted, we know if it was an unclean shutdown or 1129 * not. If it was, with the first phase of recovery has completed, we 1130 * have consistent AG blocks on disk. We have not recovered EFIs yet, 1131 * but they are recovered transactionally in the second recovery phase 1132 * later. 1133 * 1134 * Hence we can safely re-initialise incore superblock counters from 1135 * the per-ag data. These may not be correct if the filesystem was not 1136 * cleanly unmounted, so we need to wait for recovery to finish before 1137 * doing this. 1138 * 1139 * If the filesystem was cleanly unmounted, then we can trust the 1140 * values in the superblock to be correct and we don't need to do 1141 * anything here. 1142 * 1143 * If we are currently making the filesystem, the initialisation will 1144 * fail as the perag data is in an undefined state. 1145 */ 1146 1147 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 1148 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && 1149 !mp->m_sb.sb_inprogress) { 1150 error = xfs_initialize_perag_data(mp, sbp->sb_agcount); 1151 if (error) { 1152 goto error2; 1153 } 1154 } 1155 /* 1156 * Get and sanity-check the root inode. 1157 * Save the pointer to it in the mount structure. 1158 */ 1159 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1160 if (error) { 1161 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1162 goto error3; 1163 } 1164 1165 ASSERT(rip != NULL); 1166 rvp = XFS_ITOV(rip); 1167 1168 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1169 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1170 cmn_err(CE_WARN, "Device %s - root %llu is not a directory", 1171 XFS_BUFTARG_NAME(mp->m_ddev_targp), 1172 (unsigned long long)rip->i_ino); 1173 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1174 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1175 mp); 1176 error = XFS_ERROR(EFSCORRUPTED); 1177 goto error4; 1178 } 1179 mp->m_rootip = rip; /* save it */ 1180 1181 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1182 1183 /* 1184 * Initialize realtime inode pointers in the mount structure 1185 */ 1186 error = xfs_rtmount_inodes(mp); 1187 if (error) { 1188 /* 1189 * Free up the root inode. 1190 */ 1191 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1192 goto error4; 1193 } 1194 1195 /* 1196 * If fs is not mounted readonly, then update the superblock changes. 1197 */ 1198 if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) 1199 xfs_mount_log_sb(mp, update_flags); 1200 1201 /* 1202 * Initialise the XFS quota management subsystem for this mount 1203 */ 1204 error = XFS_QM_INIT(mp, "amount, "aflags); 1205 if (error) 1206 goto error4; 1207 1208 /* 1209 * Finish recovering the file system. This part needed to be 1210 * delayed until after the root and real-time bitmap inodes 1211 * were consistently read in. 1212 */ 1213 error = xfs_log_mount_finish(mp, mfsi_flags); 1214 if (error) { 1215 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1216 goto error4; 1217 } 1218 1219 /* 1220 * Complete the quota initialisation, post-log-replay component. 1221 */ 1222 error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); 1223 if (error) 1224 goto error4; 1225 1226 /* 1227 * Now we are mounted, reserve a small amount of unused space for 1228 * privileged transactions. This is needed so that transaction 1229 * space required for critical operations can dip into this pool 1230 * when at ENOSPC. This is needed for operations like create with 1231 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1232 * are not allowed to use this reserved space. 1233 * 1234 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. 1235 * This may drive us straight to ENOSPC on mount, but that implies 1236 * we were already there on the last unmount. 1237 */ 1238 resblks = mp->m_sb.sb_dblocks; 1239 do_div(resblks, 20); 1240 resblks = min_t(__uint64_t, resblks, 1024); 1241 xfs_reserve_blocks(mp, &resblks, NULL); 1242 1243 return 0; 1244 1245 error4: 1246 /* 1247 * Free up the root inode. 1248 */ 1249 VN_RELE(rvp); 1250 error3: 1251 xfs_log_unmount_dealloc(mp); 1252 error2: 1253 for (agno = 0; agno < sbp->sb_agcount; agno++) 1254 if (mp->m_perag[agno].pagb_list) 1255 kmem_free(mp->m_perag[agno].pagb_list, 1256 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1257 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1258 mp->m_perag = NULL; 1259 /* FALLTHROUGH */ 1260 error1: 1261 if (uuid_mounted) 1262 xfs_uuid_unmount(mp); 1263 xfs_freesb(mp); 1264 return error; 1265 } 1266 1267 /* 1268 * xfs_unmountfs 1269 * 1270 * This flushes out the inodes,dquots and the superblock, unmounts the 1271 * log and makes sure that incore structures are freed. 1272 */ 1273 int 1274 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1275 { 1276 __uint64_t resblks; 1277 1278 /* 1279 * We can potentially deadlock here if we have an inode cluster 1280 * that has been freed has it's buffer still pinned in memory because 1281 * the transaction is still sitting in a iclog. The stale inodes 1282 * on that buffer will have their flush locks held until the 1283 * transaction hits the disk and the callbacks run. the inode 1284 * flush takes the flush lock unconditionally and with nothing to 1285 * push out the iclog we will never get that unlocked. hence we 1286 * need to force the log first. 1287 */ 1288 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1289 xfs_iflush_all(mp); 1290 1291 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1292 1293 /* 1294 * Flush out the log synchronously so that we know for sure 1295 * that nothing is pinned. This is important because bflush() 1296 * will skip pinned buffers. 1297 */ 1298 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1299 1300 xfs_binval(mp->m_ddev_targp); 1301 if (mp->m_rtdev_targp) { 1302 xfs_binval(mp->m_rtdev_targp); 1303 } 1304 1305 /* 1306 * Unreserve any blocks we have so that when we unmount we don't account 1307 * the reserved free space as used. This is really only necessary for 1308 * lazy superblock counting because it trusts the incore superblock 1309 * counters to be aboslutely correct on clean unmount. 1310 * 1311 * We don't bother correcting this elsewhere for lazy superblock 1312 * counting because on mount of an unclean filesystem we reconstruct the 1313 * correct counter value and this is irrelevant. 1314 * 1315 * For non-lazy counter filesystems, this doesn't matter at all because 1316 * we only every apply deltas to the superblock and hence the incore 1317 * value does not matter.... 1318 */ 1319 resblks = 0; 1320 xfs_reserve_blocks(mp, &resblks, NULL); 1321 1322 xfs_log_sbcount(mp, 1); 1323 xfs_unmountfs_writesb(mp); 1324 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1325 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1326 1327 xfs_freesb(mp); 1328 1329 /* 1330 * All inodes from this mount point should be freed. 1331 */ 1332 ASSERT(mp->m_inodes == NULL); 1333 1334 xfs_unmountfs_close(mp, cr); 1335 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1336 xfs_uuid_unmount(mp); 1337 1338 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1339 xfs_errortag_clearall(mp, 0); 1340 #endif 1341 xfs_mount_free(mp); 1342 return 0; 1343 } 1344 1345 void 1346 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1347 { 1348 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 1349 xfs_free_buftarg(mp->m_logdev_targp, 1); 1350 if (mp->m_rtdev_targp) 1351 xfs_free_buftarg(mp->m_rtdev_targp, 1); 1352 xfs_free_buftarg(mp->m_ddev_targp, 0); 1353 } 1354 1355 STATIC void 1356 xfs_unmountfs_wait(xfs_mount_t *mp) 1357 { 1358 if (mp->m_logdev_targp != mp->m_ddev_targp) 1359 xfs_wait_buftarg(mp->m_logdev_targp); 1360 if (mp->m_rtdev_targp) 1361 xfs_wait_buftarg(mp->m_rtdev_targp); 1362 xfs_wait_buftarg(mp->m_ddev_targp); 1363 } 1364 1365 int 1366 xfs_fs_writable(xfs_mount_t *mp) 1367 { 1368 return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || 1369 (mp->m_flags & XFS_MOUNT_RDONLY)); 1370 } 1371 1372 /* 1373 * xfs_log_sbcount 1374 * 1375 * Called either periodically to keep the on disk superblock values 1376 * roughly up to date or from unmount to make sure the values are 1377 * correct on a clean unmount. 1378 * 1379 * Note this code can be called during the process of freezing, so 1380 * we may need to use the transaction allocator which does not not 1381 * block when the transaction subsystem is in its frozen state. 1382 */ 1383 int 1384 xfs_log_sbcount( 1385 xfs_mount_t *mp, 1386 uint sync) 1387 { 1388 xfs_trans_t *tp; 1389 int error; 1390 1391 if (!xfs_fs_writable(mp)) 1392 return 0; 1393 1394 xfs_icsb_sync_counters(mp); 1395 1396 /* 1397 * we don't need to do this if we are updating the superblock 1398 * counters on every modification. 1399 */ 1400 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1401 return 0; 1402 1403 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); 1404 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1405 XFS_DEFAULT_LOG_COUNT); 1406 if (error) { 1407 xfs_trans_cancel(tp, 0); 1408 return error; 1409 } 1410 1411 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1412 if (sync) 1413 xfs_trans_set_sync(tp); 1414 xfs_trans_commit(tp, 0); 1415 1416 return 0; 1417 } 1418 1419 STATIC void 1420 xfs_mark_shared_ro( 1421 xfs_mount_t *mp, 1422 xfs_buf_t *bp) 1423 { 1424 xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp); 1425 __uint16_t version; 1426 1427 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1428 sb->sb_flags |= XFS_SBF_READONLY; 1429 1430 version = be16_to_cpu(sb->sb_versionnum); 1431 if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 || 1432 !(version & XFS_SB_VERSION_SHAREDBIT)) 1433 version |= XFS_SB_VERSION_SHAREDBIT; 1434 sb->sb_versionnum = cpu_to_be16(version); 1435 } 1436 1437 int 1438 xfs_unmountfs_writesb(xfs_mount_t *mp) 1439 { 1440 xfs_buf_t *sbp; 1441 int error = 0; 1442 1443 /* 1444 * skip superblock write if fs is read-only, or 1445 * if we are doing a forced umount. 1446 */ 1447 if (!((mp->m_flags & XFS_MOUNT_RDONLY) || 1448 XFS_FORCED_SHUTDOWN(mp))) { 1449 1450 sbp = xfs_getsb(mp, 0); 1451 1452 /* 1453 * mark shared-readonly if desired 1454 */ 1455 if (mp->m_mk_sharedro) 1456 xfs_mark_shared_ro(mp, sbp); 1457 1458 XFS_BUF_UNDONE(sbp); 1459 XFS_BUF_UNREAD(sbp); 1460 XFS_BUF_UNDELAYWRITE(sbp); 1461 XFS_BUF_WRITE(sbp); 1462 XFS_BUF_UNASYNC(sbp); 1463 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1464 xfsbdstrat(mp, sbp); 1465 /* Nevermind errors we might get here. */ 1466 error = xfs_iowait(sbp); 1467 if (error) 1468 xfs_ioerror_alert("xfs_unmountfs_writesb", 1469 mp, sbp, XFS_BUF_ADDR(sbp)); 1470 if (error && mp->m_mk_sharedro) 1471 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1472 xfs_buf_relse(sbp); 1473 } 1474 return error; 1475 } 1476 1477 /* 1478 * xfs_mod_sb() can be used to copy arbitrary changes to the 1479 * in-core superblock into the superblock buffer to be logged. 1480 * It does not provide the higher level of locking that is 1481 * needed to protect the in-core superblock from concurrent 1482 * access. 1483 */ 1484 void 1485 xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1486 { 1487 xfs_buf_t *bp; 1488 int first; 1489 int last; 1490 xfs_mount_t *mp; 1491 xfs_sb_field_t f; 1492 1493 ASSERT(fields); 1494 if (!fields) 1495 return; 1496 mp = tp->t_mountp; 1497 bp = xfs_trans_getsb(tp, mp, 0); 1498 first = sizeof(xfs_sb_t); 1499 last = 0; 1500 1501 /* translate/copy */ 1502 1503 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1504 1505 /* find modified range */ 1506 1507 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1508 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1509 first = xfs_sb_info[f].offset; 1510 1511 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1512 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1513 last = xfs_sb_info[f + 1].offset - 1; 1514 1515 xfs_trans_log_buf(tp, bp, first, last); 1516 } 1517 1518 1519 /* 1520 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1521 * a delta to a specified field in the in-core superblock. Simply 1522 * switch on the field indicated and apply the delta to that field. 1523 * Fields are not allowed to dip below zero, so if the delta would 1524 * do this do not apply it and return EINVAL. 1525 * 1526 * The m_sb_lock must be held when this routine is called. 1527 */ 1528 int 1529 xfs_mod_incore_sb_unlocked( 1530 xfs_mount_t *mp, 1531 xfs_sb_field_t field, 1532 int64_t delta, 1533 int rsvd) 1534 { 1535 int scounter; /* short counter for 32 bit fields */ 1536 long long lcounter; /* long counter for 64 bit fields */ 1537 long long res_used, rem; 1538 1539 /* 1540 * With the in-core superblock spin lock held, switch 1541 * on the indicated field. Apply the delta to the 1542 * proper field. If the fields value would dip below 1543 * 0, then do not apply the delta and return EINVAL. 1544 */ 1545 switch (field) { 1546 case XFS_SBS_ICOUNT: 1547 lcounter = (long long)mp->m_sb.sb_icount; 1548 lcounter += delta; 1549 if (lcounter < 0) { 1550 ASSERT(0); 1551 return XFS_ERROR(EINVAL); 1552 } 1553 mp->m_sb.sb_icount = lcounter; 1554 return 0; 1555 case XFS_SBS_IFREE: 1556 lcounter = (long long)mp->m_sb.sb_ifree; 1557 lcounter += delta; 1558 if (lcounter < 0) { 1559 ASSERT(0); 1560 return XFS_ERROR(EINVAL); 1561 } 1562 mp->m_sb.sb_ifree = lcounter; 1563 return 0; 1564 case XFS_SBS_FDBLOCKS: 1565 lcounter = (long long) 1566 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1567 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1568 1569 if (delta > 0) { /* Putting blocks back */ 1570 if (res_used > delta) { 1571 mp->m_resblks_avail += delta; 1572 } else { 1573 rem = delta - res_used; 1574 mp->m_resblks_avail = mp->m_resblks; 1575 lcounter += rem; 1576 } 1577 } else { /* Taking blocks away */ 1578 1579 lcounter += delta; 1580 1581 /* 1582 * If were out of blocks, use any available reserved blocks if 1583 * were allowed to. 1584 */ 1585 1586 if (lcounter < 0) { 1587 if (rsvd) { 1588 lcounter = (long long)mp->m_resblks_avail + delta; 1589 if (lcounter < 0) { 1590 return XFS_ERROR(ENOSPC); 1591 } 1592 mp->m_resblks_avail = lcounter; 1593 return 0; 1594 } else { /* not reserved */ 1595 return XFS_ERROR(ENOSPC); 1596 } 1597 } 1598 } 1599 1600 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1601 return 0; 1602 case XFS_SBS_FREXTENTS: 1603 lcounter = (long long)mp->m_sb.sb_frextents; 1604 lcounter += delta; 1605 if (lcounter < 0) { 1606 return XFS_ERROR(ENOSPC); 1607 } 1608 mp->m_sb.sb_frextents = lcounter; 1609 return 0; 1610 case XFS_SBS_DBLOCKS: 1611 lcounter = (long long)mp->m_sb.sb_dblocks; 1612 lcounter += delta; 1613 if (lcounter < 0) { 1614 ASSERT(0); 1615 return XFS_ERROR(EINVAL); 1616 } 1617 mp->m_sb.sb_dblocks = lcounter; 1618 return 0; 1619 case XFS_SBS_AGCOUNT: 1620 scounter = mp->m_sb.sb_agcount; 1621 scounter += delta; 1622 if (scounter < 0) { 1623 ASSERT(0); 1624 return XFS_ERROR(EINVAL); 1625 } 1626 mp->m_sb.sb_agcount = scounter; 1627 return 0; 1628 case XFS_SBS_IMAX_PCT: 1629 scounter = mp->m_sb.sb_imax_pct; 1630 scounter += delta; 1631 if (scounter < 0) { 1632 ASSERT(0); 1633 return XFS_ERROR(EINVAL); 1634 } 1635 mp->m_sb.sb_imax_pct = scounter; 1636 return 0; 1637 case XFS_SBS_REXTSIZE: 1638 scounter = mp->m_sb.sb_rextsize; 1639 scounter += delta; 1640 if (scounter < 0) { 1641 ASSERT(0); 1642 return XFS_ERROR(EINVAL); 1643 } 1644 mp->m_sb.sb_rextsize = scounter; 1645 return 0; 1646 case XFS_SBS_RBMBLOCKS: 1647 scounter = mp->m_sb.sb_rbmblocks; 1648 scounter += delta; 1649 if (scounter < 0) { 1650 ASSERT(0); 1651 return XFS_ERROR(EINVAL); 1652 } 1653 mp->m_sb.sb_rbmblocks = scounter; 1654 return 0; 1655 case XFS_SBS_RBLOCKS: 1656 lcounter = (long long)mp->m_sb.sb_rblocks; 1657 lcounter += delta; 1658 if (lcounter < 0) { 1659 ASSERT(0); 1660 return XFS_ERROR(EINVAL); 1661 } 1662 mp->m_sb.sb_rblocks = lcounter; 1663 return 0; 1664 case XFS_SBS_REXTENTS: 1665 lcounter = (long long)mp->m_sb.sb_rextents; 1666 lcounter += delta; 1667 if (lcounter < 0) { 1668 ASSERT(0); 1669 return XFS_ERROR(EINVAL); 1670 } 1671 mp->m_sb.sb_rextents = lcounter; 1672 return 0; 1673 case XFS_SBS_REXTSLOG: 1674 scounter = mp->m_sb.sb_rextslog; 1675 scounter += delta; 1676 if (scounter < 0) { 1677 ASSERT(0); 1678 return XFS_ERROR(EINVAL); 1679 } 1680 mp->m_sb.sb_rextslog = scounter; 1681 return 0; 1682 default: 1683 ASSERT(0); 1684 return XFS_ERROR(EINVAL); 1685 } 1686 } 1687 1688 /* 1689 * xfs_mod_incore_sb() is used to change a field in the in-core 1690 * superblock structure by the specified delta. This modification 1691 * is protected by the m_sb_lock. Just use the xfs_mod_incore_sb_unlocked() 1692 * routine to do the work. 1693 */ 1694 int 1695 xfs_mod_incore_sb( 1696 xfs_mount_t *mp, 1697 xfs_sb_field_t field, 1698 int64_t delta, 1699 int rsvd) 1700 { 1701 int status; 1702 1703 /* check for per-cpu counters */ 1704 switch (field) { 1705 #ifdef HAVE_PERCPU_SB 1706 case XFS_SBS_ICOUNT: 1707 case XFS_SBS_IFREE: 1708 case XFS_SBS_FDBLOCKS: 1709 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1710 status = xfs_icsb_modify_counters(mp, field, 1711 delta, rsvd); 1712 break; 1713 } 1714 /* FALLTHROUGH */ 1715 #endif 1716 default: 1717 spin_lock(&mp->m_sb_lock); 1718 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1719 spin_unlock(&mp->m_sb_lock); 1720 break; 1721 } 1722 1723 return status; 1724 } 1725 1726 /* 1727 * xfs_mod_incore_sb_batch() is used to change more than one field 1728 * in the in-core superblock structure at a time. This modification 1729 * is protected by a lock internal to this module. The fields and 1730 * changes to those fields are specified in the array of xfs_mod_sb 1731 * structures passed in. 1732 * 1733 * Either all of the specified deltas will be applied or none of 1734 * them will. If any modified field dips below 0, then all modifications 1735 * will be backed out and EINVAL will be returned. 1736 */ 1737 int 1738 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1739 { 1740 int status=0; 1741 xfs_mod_sb_t *msbp; 1742 1743 /* 1744 * Loop through the array of mod structures and apply each 1745 * individually. If any fail, then back out all those 1746 * which have already been applied. Do all of this within 1747 * the scope of the m_sb_lock so that all of the changes will 1748 * be atomic. 1749 */ 1750 spin_lock(&mp->m_sb_lock); 1751 msbp = &msb[0]; 1752 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1753 /* 1754 * Apply the delta at index n. If it fails, break 1755 * from the loop so we'll fall into the undo loop 1756 * below. 1757 */ 1758 switch (msbp->msb_field) { 1759 #ifdef HAVE_PERCPU_SB 1760 case XFS_SBS_ICOUNT: 1761 case XFS_SBS_IFREE: 1762 case XFS_SBS_FDBLOCKS: 1763 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1764 spin_unlock(&mp->m_sb_lock); 1765 status = xfs_icsb_modify_counters(mp, 1766 msbp->msb_field, 1767 msbp->msb_delta, rsvd); 1768 spin_lock(&mp->m_sb_lock); 1769 break; 1770 } 1771 /* FALLTHROUGH */ 1772 #endif 1773 default: 1774 status = xfs_mod_incore_sb_unlocked(mp, 1775 msbp->msb_field, 1776 msbp->msb_delta, rsvd); 1777 break; 1778 } 1779 1780 if (status != 0) { 1781 break; 1782 } 1783 } 1784 1785 /* 1786 * If we didn't complete the loop above, then back out 1787 * any changes made to the superblock. If you add code 1788 * between the loop above and here, make sure that you 1789 * preserve the value of status. Loop back until 1790 * we step below the beginning of the array. Make sure 1791 * we don't touch anything back there. 1792 */ 1793 if (status != 0) { 1794 msbp--; 1795 while (msbp >= msb) { 1796 switch (msbp->msb_field) { 1797 #ifdef HAVE_PERCPU_SB 1798 case XFS_SBS_ICOUNT: 1799 case XFS_SBS_IFREE: 1800 case XFS_SBS_FDBLOCKS: 1801 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1802 spin_unlock(&mp->m_sb_lock); 1803 status = xfs_icsb_modify_counters(mp, 1804 msbp->msb_field, 1805 -(msbp->msb_delta), 1806 rsvd); 1807 spin_lock(&mp->m_sb_lock); 1808 break; 1809 } 1810 /* FALLTHROUGH */ 1811 #endif 1812 default: 1813 status = xfs_mod_incore_sb_unlocked(mp, 1814 msbp->msb_field, 1815 -(msbp->msb_delta), 1816 rsvd); 1817 break; 1818 } 1819 ASSERT(status == 0); 1820 msbp--; 1821 } 1822 } 1823 spin_unlock(&mp->m_sb_lock); 1824 return status; 1825 } 1826 1827 /* 1828 * xfs_getsb() is called to obtain the buffer for the superblock. 1829 * The buffer is returned locked and read in from disk. 1830 * The buffer should be released with a call to xfs_brelse(). 1831 * 1832 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1833 * the superblock buffer if it can be locked without sleeping. 1834 * If it can't then we'll return NULL. 1835 */ 1836 xfs_buf_t * 1837 xfs_getsb( 1838 xfs_mount_t *mp, 1839 int flags) 1840 { 1841 xfs_buf_t *bp; 1842 1843 ASSERT(mp->m_sb_bp != NULL); 1844 bp = mp->m_sb_bp; 1845 if (flags & XFS_BUF_TRYLOCK) { 1846 if (!XFS_BUF_CPSEMA(bp)) { 1847 return NULL; 1848 } 1849 } else { 1850 XFS_BUF_PSEMA(bp, PRIBIO); 1851 } 1852 XFS_BUF_HOLD(bp); 1853 ASSERT(XFS_BUF_ISDONE(bp)); 1854 return bp; 1855 } 1856 1857 /* 1858 * Used to free the superblock along various error paths. 1859 */ 1860 void 1861 xfs_freesb( 1862 xfs_mount_t *mp) 1863 { 1864 xfs_buf_t *bp; 1865 1866 /* 1867 * Use xfs_getsb() so that the buffer will be locked 1868 * when we call xfs_buf_relse(). 1869 */ 1870 bp = xfs_getsb(mp, 0); 1871 XFS_BUF_UNMANAGE(bp); 1872 xfs_buf_relse(bp); 1873 mp->m_sb_bp = NULL; 1874 } 1875 1876 /* 1877 * See if the UUID is unique among mounted XFS filesystems. 1878 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1879 */ 1880 STATIC int 1881 xfs_uuid_mount( 1882 xfs_mount_t *mp) 1883 { 1884 if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1885 cmn_err(CE_WARN, 1886 "XFS: Filesystem %s has nil UUID - can't mount", 1887 mp->m_fsname); 1888 return -1; 1889 } 1890 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1891 cmn_err(CE_WARN, 1892 "XFS: Filesystem %s has duplicate UUID - can't mount", 1893 mp->m_fsname); 1894 return -1; 1895 } 1896 return 0; 1897 } 1898 1899 /* 1900 * Remove filesystem from the UUID table. 1901 */ 1902 STATIC void 1903 xfs_uuid_unmount( 1904 xfs_mount_t *mp) 1905 { 1906 uuid_table_remove(&mp->m_sb.sb_uuid); 1907 } 1908 1909 /* 1910 * Used to log changes to the superblock unit and width fields which could 1911 * be altered by the mount options, as well as any potential sb_features2 1912 * fixup. Only the first superblock is updated. 1913 */ 1914 STATIC void 1915 xfs_mount_log_sb( 1916 xfs_mount_t *mp, 1917 __int64_t fields) 1918 { 1919 xfs_trans_t *tp; 1920 1921 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1922 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); 1923 1924 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1925 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1926 XFS_DEFAULT_LOG_COUNT)) { 1927 xfs_trans_cancel(tp, 0); 1928 return; 1929 } 1930 xfs_mod_sb(tp, fields); 1931 xfs_trans_commit(tp, 0); 1932 } 1933 1934 1935 #ifdef HAVE_PERCPU_SB 1936 /* 1937 * Per-cpu incore superblock counters 1938 * 1939 * Simple concept, difficult implementation 1940 * 1941 * Basically, replace the incore superblock counters with a distributed per cpu 1942 * counter for contended fields (e.g. free block count). 1943 * 1944 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1945 * hence needs to be accurately read when we are running low on space. Hence 1946 * there is a method to enable and disable the per-cpu counters based on how 1947 * much "stuff" is available in them. 1948 * 1949 * Basically, a counter is enabled if there is enough free resource to justify 1950 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1951 * ENOSPC), then we disable the counters to synchronise all callers and 1952 * re-distribute the available resources. 1953 * 1954 * If, once we redistributed the available resources, we still get a failure, 1955 * we disable the per-cpu counter and go through the slow path. 1956 * 1957 * The slow path is the current xfs_mod_incore_sb() function. This means that 1958 * when we disable a per-cpu counter, we need to drain it's resources back to 1959 * the global superblock. We do this after disabling the counter to prevent 1960 * more threads from queueing up on the counter. 1961 * 1962 * Essentially, this means that we still need a lock in the fast path to enable 1963 * synchronisation between the global counters and the per-cpu counters. This 1964 * is not a problem because the lock will be local to a CPU almost all the time 1965 * and have little contention except when we get to ENOSPC conditions. 1966 * 1967 * Basically, this lock becomes a barrier that enables us to lock out the fast 1968 * path while we do things like enabling and disabling counters and 1969 * synchronising the counters. 1970 * 1971 * Locking rules: 1972 * 1973 * 1. m_sb_lock before picking up per-cpu locks 1974 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1975 * 3. accurate counter sync requires m_sb_lock + per cpu locks 1976 * 4. modifying per-cpu counters requires holding per-cpu lock 1977 * 5. modifying global counters requires holding m_sb_lock 1978 * 6. enabling or disabling a counter requires holding the m_sb_lock 1979 * and _none_ of the per-cpu locks. 1980 * 1981 * Disabled counters are only ever re-enabled by a balance operation 1982 * that results in more free resources per CPU than a given threshold. 1983 * To ensure counters don't remain disabled, they are rebalanced when 1984 * the global resource goes above a higher threshold (i.e. some hysteresis 1985 * is present to prevent thrashing). 1986 */ 1987 1988 #ifdef CONFIG_HOTPLUG_CPU 1989 /* 1990 * hot-plug CPU notifier support. 1991 * 1992 * We need a notifier per filesystem as we need to be able to identify 1993 * the filesystem to balance the counters out. This is achieved by 1994 * having a notifier block embedded in the xfs_mount_t and doing pointer 1995 * magic to get the mount pointer from the notifier block address. 1996 */ 1997 STATIC int 1998 xfs_icsb_cpu_notify( 1999 struct notifier_block *nfb, 2000 unsigned long action, 2001 void *hcpu) 2002 { 2003 xfs_icsb_cnts_t *cntp; 2004 xfs_mount_t *mp; 2005 2006 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 2007 cntp = (xfs_icsb_cnts_t *) 2008 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 2009 switch (action) { 2010 case CPU_UP_PREPARE: 2011 case CPU_UP_PREPARE_FROZEN: 2012 /* Easy Case - initialize the area and locks, and 2013 * then rebalance when online does everything else for us. */ 2014 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2015 break; 2016 case CPU_ONLINE: 2017 case CPU_ONLINE_FROZEN: 2018 xfs_icsb_lock(mp); 2019 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2020 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2021 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2022 xfs_icsb_unlock(mp); 2023 break; 2024 case CPU_DEAD: 2025 case CPU_DEAD_FROZEN: 2026 /* Disable all the counters, then fold the dead cpu's 2027 * count into the total on the global superblock and 2028 * re-enable the counters. */ 2029 xfs_icsb_lock(mp); 2030 spin_lock(&mp->m_sb_lock); 2031 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 2032 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 2033 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 2034 2035 mp->m_sb.sb_icount += cntp->icsb_icount; 2036 mp->m_sb.sb_ifree += cntp->icsb_ifree; 2037 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 2038 2039 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2040 2041 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 2042 XFS_ICSB_SB_LOCKED, 0); 2043 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 2044 XFS_ICSB_SB_LOCKED, 0); 2045 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 2046 XFS_ICSB_SB_LOCKED, 0); 2047 spin_unlock(&mp->m_sb_lock); 2048 xfs_icsb_unlock(mp); 2049 break; 2050 } 2051 2052 return NOTIFY_OK; 2053 } 2054 #endif /* CONFIG_HOTPLUG_CPU */ 2055 2056 int 2057 xfs_icsb_init_counters( 2058 xfs_mount_t *mp) 2059 { 2060 xfs_icsb_cnts_t *cntp; 2061 int i; 2062 2063 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 2064 if (mp->m_sb_cnts == NULL) 2065 return -ENOMEM; 2066 2067 #ifdef CONFIG_HOTPLUG_CPU 2068 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 2069 mp->m_icsb_notifier.priority = 0; 2070 register_hotcpu_notifier(&mp->m_icsb_notifier); 2071 #endif /* CONFIG_HOTPLUG_CPU */ 2072 2073 for_each_online_cpu(i) { 2074 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2075 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 2076 } 2077 2078 mutex_init(&mp->m_icsb_mutex); 2079 2080 /* 2081 * start with all counters disabled so that the 2082 * initial balance kicks us off correctly 2083 */ 2084 mp->m_icsb_counters = -1; 2085 return 0; 2086 } 2087 2088 void 2089 xfs_icsb_reinit_counters( 2090 xfs_mount_t *mp) 2091 { 2092 xfs_icsb_lock(mp); 2093 /* 2094 * start with all counters disabled so that the 2095 * initial balance kicks us off correctly 2096 */ 2097 mp->m_icsb_counters = -1; 2098 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2099 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2100 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2101 xfs_icsb_unlock(mp); 2102 } 2103 2104 STATIC void 2105 xfs_icsb_destroy_counters( 2106 xfs_mount_t *mp) 2107 { 2108 if (mp->m_sb_cnts) { 2109 unregister_hotcpu_notifier(&mp->m_icsb_notifier); 2110 free_percpu(mp->m_sb_cnts); 2111 } 2112 mutex_destroy(&mp->m_icsb_mutex); 2113 } 2114 2115 STATIC_INLINE void 2116 xfs_icsb_lock_cntr( 2117 xfs_icsb_cnts_t *icsbp) 2118 { 2119 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 2120 ndelay(1000); 2121 } 2122 } 2123 2124 STATIC_INLINE void 2125 xfs_icsb_unlock_cntr( 2126 xfs_icsb_cnts_t *icsbp) 2127 { 2128 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 2129 } 2130 2131 2132 STATIC_INLINE void 2133 xfs_icsb_lock_all_counters( 2134 xfs_mount_t *mp) 2135 { 2136 xfs_icsb_cnts_t *cntp; 2137 int i; 2138 2139 for_each_online_cpu(i) { 2140 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2141 xfs_icsb_lock_cntr(cntp); 2142 } 2143 } 2144 2145 STATIC_INLINE void 2146 xfs_icsb_unlock_all_counters( 2147 xfs_mount_t *mp) 2148 { 2149 xfs_icsb_cnts_t *cntp; 2150 int i; 2151 2152 for_each_online_cpu(i) { 2153 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2154 xfs_icsb_unlock_cntr(cntp); 2155 } 2156 } 2157 2158 STATIC void 2159 xfs_icsb_count( 2160 xfs_mount_t *mp, 2161 xfs_icsb_cnts_t *cnt, 2162 int flags) 2163 { 2164 xfs_icsb_cnts_t *cntp; 2165 int i; 2166 2167 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 2168 2169 if (!(flags & XFS_ICSB_LAZY_COUNT)) 2170 xfs_icsb_lock_all_counters(mp); 2171 2172 for_each_online_cpu(i) { 2173 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2174 cnt->icsb_icount += cntp->icsb_icount; 2175 cnt->icsb_ifree += cntp->icsb_ifree; 2176 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 2177 } 2178 2179 if (!(flags & XFS_ICSB_LAZY_COUNT)) 2180 xfs_icsb_unlock_all_counters(mp); 2181 } 2182 2183 STATIC int 2184 xfs_icsb_counter_disabled( 2185 xfs_mount_t *mp, 2186 xfs_sb_field_t field) 2187 { 2188 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2189 return test_bit(field, &mp->m_icsb_counters); 2190 } 2191 2192 STATIC int 2193 xfs_icsb_disable_counter( 2194 xfs_mount_t *mp, 2195 xfs_sb_field_t field) 2196 { 2197 xfs_icsb_cnts_t cnt; 2198 2199 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2200 2201 /* 2202 * If we are already disabled, then there is nothing to do 2203 * here. We check before locking all the counters to avoid 2204 * the expensive lock operation when being called in the 2205 * slow path and the counter is already disabled. This is 2206 * safe because the only time we set or clear this state is under 2207 * the m_icsb_mutex. 2208 */ 2209 if (xfs_icsb_counter_disabled(mp, field)) 2210 return 0; 2211 2212 xfs_icsb_lock_all_counters(mp); 2213 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2214 /* drain back to superblock */ 2215 2216 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2217 switch(field) { 2218 case XFS_SBS_ICOUNT: 2219 mp->m_sb.sb_icount = cnt.icsb_icount; 2220 break; 2221 case XFS_SBS_IFREE: 2222 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2223 break; 2224 case XFS_SBS_FDBLOCKS: 2225 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2226 break; 2227 default: 2228 BUG(); 2229 } 2230 } 2231 2232 xfs_icsb_unlock_all_counters(mp); 2233 2234 return 0; 2235 } 2236 2237 STATIC void 2238 xfs_icsb_enable_counter( 2239 xfs_mount_t *mp, 2240 xfs_sb_field_t field, 2241 uint64_t count, 2242 uint64_t resid) 2243 { 2244 xfs_icsb_cnts_t *cntp; 2245 int i; 2246 2247 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2248 2249 xfs_icsb_lock_all_counters(mp); 2250 for_each_online_cpu(i) { 2251 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 2252 switch (field) { 2253 case XFS_SBS_ICOUNT: 2254 cntp->icsb_icount = count + resid; 2255 break; 2256 case XFS_SBS_IFREE: 2257 cntp->icsb_ifree = count + resid; 2258 break; 2259 case XFS_SBS_FDBLOCKS: 2260 cntp->icsb_fdblocks = count + resid; 2261 break; 2262 default: 2263 BUG(); 2264 break; 2265 } 2266 resid = 0; 2267 } 2268 clear_bit(field, &mp->m_icsb_counters); 2269 xfs_icsb_unlock_all_counters(mp); 2270 } 2271 2272 void 2273 xfs_icsb_sync_counters_flags( 2274 xfs_mount_t *mp, 2275 int flags) 2276 { 2277 xfs_icsb_cnts_t cnt; 2278 2279 /* Pass 1: lock all counters */ 2280 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2281 spin_lock(&mp->m_sb_lock); 2282 2283 xfs_icsb_count(mp, &cnt, flags); 2284 2285 /* Step 3: update mp->m_sb fields */ 2286 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2287 mp->m_sb.sb_icount = cnt.icsb_icount; 2288 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2289 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2290 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2291 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2292 2293 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2294 spin_unlock(&mp->m_sb_lock); 2295 } 2296 2297 /* 2298 * Accurate update of per-cpu counters to incore superblock 2299 */ 2300 STATIC void 2301 xfs_icsb_sync_counters( 2302 xfs_mount_t *mp) 2303 { 2304 xfs_icsb_sync_counters_flags(mp, 0); 2305 } 2306 2307 /* 2308 * Balance and enable/disable counters as necessary. 2309 * 2310 * Thresholds for re-enabling counters are somewhat magic. inode counts are 2311 * chosen to be the same number as single on disk allocation chunk per CPU, and 2312 * free blocks is something far enough zero that we aren't going thrash when we 2313 * get near ENOSPC. We also need to supply a minimum we require per cpu to 2314 * prevent looping endlessly when xfs_alloc_space asks for more than will 2315 * be distributed to a single CPU but each CPU has enough blocks to be 2316 * reenabled. 2317 * 2318 * Note that we can be called when counters are already disabled. 2319 * xfs_icsb_disable_counter() optimises the counter locking in this case to 2320 * prevent locking every per-cpu counter needlessly. 2321 */ 2322 2323 #define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 2324 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2325 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2326 STATIC void 2327 xfs_icsb_balance_counter( 2328 xfs_mount_t *mp, 2329 xfs_sb_field_t field, 2330 int flags, 2331 int min_per_cpu) 2332 { 2333 uint64_t count, resid; 2334 int weight = num_online_cpus(); 2335 uint64_t min = (uint64_t)min_per_cpu; 2336 2337 if (!(flags & XFS_ICSB_SB_LOCKED)) 2338 spin_lock(&mp->m_sb_lock); 2339 2340 /* disable counter and sync counter */ 2341 xfs_icsb_disable_counter(mp, field); 2342 2343 /* update counters - first CPU gets residual*/ 2344 switch (field) { 2345 case XFS_SBS_ICOUNT: 2346 count = mp->m_sb.sb_icount; 2347 resid = do_div(count, weight); 2348 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2349 goto out; 2350 break; 2351 case XFS_SBS_IFREE: 2352 count = mp->m_sb.sb_ifree; 2353 resid = do_div(count, weight); 2354 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2355 goto out; 2356 break; 2357 case XFS_SBS_FDBLOCKS: 2358 count = mp->m_sb.sb_fdblocks; 2359 resid = do_div(count, weight); 2360 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2361 goto out; 2362 break; 2363 default: 2364 BUG(); 2365 count = resid = 0; /* quiet, gcc */ 2366 break; 2367 } 2368 2369 xfs_icsb_enable_counter(mp, field, count, resid); 2370 out: 2371 if (!(flags & XFS_ICSB_SB_LOCKED)) 2372 spin_unlock(&mp->m_sb_lock); 2373 } 2374 2375 STATIC int 2376 xfs_icsb_modify_counters( 2377 xfs_mount_t *mp, 2378 xfs_sb_field_t field, 2379 int64_t delta, 2380 int rsvd) 2381 { 2382 xfs_icsb_cnts_t *icsbp; 2383 long long lcounter; /* long counter for 64 bit fields */ 2384 int cpu, ret = 0; 2385 2386 might_sleep(); 2387 again: 2388 cpu = get_cpu(); 2389 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); 2390 2391 /* 2392 * if the counter is disabled, go to slow path 2393 */ 2394 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2395 goto slow_path; 2396 xfs_icsb_lock_cntr(icsbp); 2397 if (unlikely(xfs_icsb_counter_disabled(mp, field))) { 2398 xfs_icsb_unlock_cntr(icsbp); 2399 goto slow_path; 2400 } 2401 2402 switch (field) { 2403 case XFS_SBS_ICOUNT: 2404 lcounter = icsbp->icsb_icount; 2405 lcounter += delta; 2406 if (unlikely(lcounter < 0)) 2407 goto balance_counter; 2408 icsbp->icsb_icount = lcounter; 2409 break; 2410 2411 case XFS_SBS_IFREE: 2412 lcounter = icsbp->icsb_ifree; 2413 lcounter += delta; 2414 if (unlikely(lcounter < 0)) 2415 goto balance_counter; 2416 icsbp->icsb_ifree = lcounter; 2417 break; 2418 2419 case XFS_SBS_FDBLOCKS: 2420 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2421 2422 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 2423 lcounter += delta; 2424 if (unlikely(lcounter < 0)) 2425 goto balance_counter; 2426 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 2427 break; 2428 default: 2429 BUG(); 2430 break; 2431 } 2432 xfs_icsb_unlock_cntr(icsbp); 2433 put_cpu(); 2434 return 0; 2435 2436 slow_path: 2437 put_cpu(); 2438 2439 /* 2440 * serialise with a mutex so we don't burn lots of cpu on 2441 * the superblock lock. We still need to hold the superblock 2442 * lock, however, when we modify the global structures. 2443 */ 2444 xfs_icsb_lock(mp); 2445 2446 /* 2447 * Now running atomically. 2448 * 2449 * If the counter is enabled, someone has beaten us to rebalancing. 2450 * Drop the lock and try again in the fast path.... 2451 */ 2452 if (!(xfs_icsb_counter_disabled(mp, field))) { 2453 xfs_icsb_unlock(mp); 2454 goto again; 2455 } 2456 2457 /* 2458 * The counter is currently disabled. Because we are 2459 * running atomically here, we know a rebalance cannot 2460 * be in progress. Hence we can go straight to operating 2461 * on the global superblock. We do not call xfs_mod_incore_sb() 2462 * here even though we need to get the m_sb_lock. Doing so 2463 * will cause us to re-enter this function and deadlock. 2464 * Hence we get the m_sb_lock ourselves and then call 2465 * xfs_mod_incore_sb_unlocked() as the unlocked path operates 2466 * directly on the global counters. 2467 */ 2468 spin_lock(&mp->m_sb_lock); 2469 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2470 spin_unlock(&mp->m_sb_lock); 2471 2472 /* 2473 * Now that we've modified the global superblock, we 2474 * may be able to re-enable the distributed counters 2475 * (e.g. lots of space just got freed). After that 2476 * we are done. 2477 */ 2478 if (ret != ENOSPC) 2479 xfs_icsb_balance_counter(mp, field, 0, 0); 2480 xfs_icsb_unlock(mp); 2481 return ret; 2482 2483 balance_counter: 2484 xfs_icsb_unlock_cntr(icsbp); 2485 put_cpu(); 2486 2487 /* 2488 * We may have multiple threads here if multiple per-cpu 2489 * counters run dry at the same time. This will mean we can 2490 * do more balances than strictly necessary but it is not 2491 * the common slowpath case. 2492 */ 2493 xfs_icsb_lock(mp); 2494 2495 /* 2496 * running atomically. 2497 * 2498 * This will leave the counter in the correct state for future 2499 * accesses. After the rebalance, we simply try again and our retry 2500 * will either succeed through the fast path or slow path without 2501 * another balance operation being required. 2502 */ 2503 xfs_icsb_balance_counter(mp, field, 0, delta); 2504 xfs_icsb_unlock(mp); 2505 goto again; 2506 } 2507 2508 #endif 2509