1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_types.h" 21 #include "xfs_bit.h" 22 #include "xfs_log.h" 23 #include "xfs_inum.h" 24 #include "xfs_trans.h" 25 #include "xfs_sb.h" 26 #include "xfs_ag.h" 27 #include "xfs_dir2.h" 28 #include "xfs_dmapi.h" 29 #include "xfs_mount.h" 30 #include "xfs_bmap_btree.h" 31 #include "xfs_alloc_btree.h" 32 #include "xfs_ialloc_btree.h" 33 #include "xfs_dir2_sf.h" 34 #include "xfs_attr_sf.h" 35 #include "xfs_dinode.h" 36 #include "xfs_inode.h" 37 #include "xfs_btree.h" 38 #include "xfs_ialloc.h" 39 #include "xfs_alloc.h" 40 #include "xfs_rtalloc.h" 41 #include "xfs_bmap.h" 42 #include "xfs_error.h" 43 #include "xfs_rw.h" 44 #include "xfs_quota.h" 45 #include "xfs_fsops.h" 46 47 STATIC void xfs_mount_log_sbunit(xfs_mount_t *, __int64_t); 48 STATIC int xfs_uuid_mount(xfs_mount_t *); 49 STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 50 STATIC void xfs_unmountfs_wait(xfs_mount_t *); 51 52 53 #ifdef HAVE_PERCPU_SB 54 STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); 55 STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 56 int, int); 57 STATIC void xfs_icsb_sync_counters(xfs_mount_t *); 58 STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, 59 int64_t, int); 60 STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); 61 62 #else 63 64 #define xfs_icsb_destroy_counters(mp) do { } while (0) 65 #define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0) 66 #define xfs_icsb_sync_counters(mp) do { } while (0) 67 #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 68 69 #endif 70 71 static const struct { 72 short offset; 73 short type; /* 0 = integer 74 * 1 = binary / string (no translation) 75 */ 76 } xfs_sb_info[] = { 77 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 78 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 79 { offsetof(xfs_sb_t, sb_dblocks), 0 }, 80 { offsetof(xfs_sb_t, sb_rblocks), 0 }, 81 { offsetof(xfs_sb_t, sb_rextents), 0 }, 82 { offsetof(xfs_sb_t, sb_uuid), 1 }, 83 { offsetof(xfs_sb_t, sb_logstart), 0 }, 84 { offsetof(xfs_sb_t, sb_rootino), 0 }, 85 { offsetof(xfs_sb_t, sb_rbmino), 0 }, 86 { offsetof(xfs_sb_t, sb_rsumino), 0 }, 87 { offsetof(xfs_sb_t, sb_rextsize), 0 }, 88 { offsetof(xfs_sb_t, sb_agblocks), 0 }, 89 { offsetof(xfs_sb_t, sb_agcount), 0 }, 90 { offsetof(xfs_sb_t, sb_rbmblocks), 0 }, 91 { offsetof(xfs_sb_t, sb_logblocks), 0 }, 92 { offsetof(xfs_sb_t, sb_versionnum), 0 }, 93 { offsetof(xfs_sb_t, sb_sectsize), 0 }, 94 { offsetof(xfs_sb_t, sb_inodesize), 0 }, 95 { offsetof(xfs_sb_t, sb_inopblock), 0 }, 96 { offsetof(xfs_sb_t, sb_fname[0]), 1 }, 97 { offsetof(xfs_sb_t, sb_blocklog), 0 }, 98 { offsetof(xfs_sb_t, sb_sectlog), 0 }, 99 { offsetof(xfs_sb_t, sb_inodelog), 0 }, 100 { offsetof(xfs_sb_t, sb_inopblog), 0 }, 101 { offsetof(xfs_sb_t, sb_agblklog), 0 }, 102 { offsetof(xfs_sb_t, sb_rextslog), 0 }, 103 { offsetof(xfs_sb_t, sb_inprogress), 0 }, 104 { offsetof(xfs_sb_t, sb_imax_pct), 0 }, 105 { offsetof(xfs_sb_t, sb_icount), 0 }, 106 { offsetof(xfs_sb_t, sb_ifree), 0 }, 107 { offsetof(xfs_sb_t, sb_fdblocks), 0 }, 108 { offsetof(xfs_sb_t, sb_frextents), 0 }, 109 { offsetof(xfs_sb_t, sb_uquotino), 0 }, 110 { offsetof(xfs_sb_t, sb_gquotino), 0 }, 111 { offsetof(xfs_sb_t, sb_qflags), 0 }, 112 { offsetof(xfs_sb_t, sb_flags), 0 }, 113 { offsetof(xfs_sb_t, sb_shared_vn), 0 }, 114 { offsetof(xfs_sb_t, sb_inoalignmt), 0 }, 115 { offsetof(xfs_sb_t, sb_unit), 0 }, 116 { offsetof(xfs_sb_t, sb_width), 0 }, 117 { offsetof(xfs_sb_t, sb_dirblklog), 0 }, 118 { offsetof(xfs_sb_t, sb_logsectlog), 0 }, 119 { offsetof(xfs_sb_t, sb_logsectsize),0 }, 120 { offsetof(xfs_sb_t, sb_logsunit), 0 }, 121 { offsetof(xfs_sb_t, sb_features2), 0 }, 122 { sizeof(xfs_sb_t), 0 } 123 }; 124 125 /* 126 * Return a pointer to an initialized xfs_mount structure. 127 */ 128 xfs_mount_t * 129 xfs_mount_init(void) 130 { 131 xfs_mount_t *mp; 132 133 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); 134 135 if (xfs_icsb_init_counters(mp)) { 136 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; 137 } 138 139 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 140 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 141 mutex_init(&mp->m_ilock); 142 mutex_init(&mp->m_growlock); 143 /* 144 * Initialize the AIL. 145 */ 146 xfs_trans_ail_init(mp); 147 148 atomic_set(&mp->m_active_trans, 0); 149 150 return mp; 151 } 152 153 /* 154 * Free up the resources associated with a mount structure. Assume that 155 * the structure was initially zeroed, so we can tell which fields got 156 * initialized. 157 */ 158 void 159 xfs_mount_free( 160 xfs_mount_t *mp) 161 { 162 if (mp->m_perag) { 163 int agno; 164 165 for (agno = 0; agno < mp->m_maxagi; agno++) 166 if (mp->m_perag[agno].pagb_list) 167 kmem_free(mp->m_perag[agno].pagb_list, 168 sizeof(xfs_perag_busy_t) * 169 XFS_PAGB_NUM_SLOTS); 170 kmem_free(mp->m_perag, 171 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); 172 } 173 174 AIL_LOCK_DESTROY(&mp->m_ail_lock); 175 spinlock_destroy(&mp->m_sb_lock); 176 mutex_destroy(&mp->m_ilock); 177 mutex_destroy(&mp->m_growlock); 178 if (mp->m_quotainfo) 179 XFS_QM_DONE(mp); 180 181 if (mp->m_fsname != NULL) 182 kmem_free(mp->m_fsname, mp->m_fsname_len); 183 if (mp->m_rtname != NULL) 184 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 185 if (mp->m_logname != NULL) 186 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 187 188 xfs_icsb_destroy_counters(mp); 189 } 190 191 /* 192 * Check size of device based on the (data/realtime) block count. 193 * Note: this check is used by the growfs code as well as mount. 194 */ 195 int 196 xfs_sb_validate_fsb_count( 197 xfs_sb_t *sbp, 198 __uint64_t nblocks) 199 { 200 ASSERT(PAGE_SHIFT >= sbp->sb_blocklog); 201 ASSERT(sbp->sb_blocklog >= BBSHIFT); 202 203 #if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */ 204 if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX) 205 return E2BIG; 206 #else /* Limited by UINT_MAX of sectors */ 207 if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX) 208 return E2BIG; 209 #endif 210 return 0; 211 } 212 213 /* 214 * Check the validity of the SB found. 215 */ 216 STATIC int 217 xfs_mount_validate_sb( 218 xfs_mount_t *mp, 219 xfs_sb_t *sbp, 220 int flags) 221 { 222 /* 223 * If the log device and data device have the 224 * same device number, the log is internal. 225 * Consequently, the sb_logstart should be non-zero. If 226 * we have a zero sb_logstart in this case, we may be trying to mount 227 * a volume filesystem in a non-volume manner. 228 */ 229 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 230 xfs_fs_mount_cmn_err(flags, "bad magic number"); 231 return XFS_ERROR(EWRONGFS); 232 } 233 234 if (!XFS_SB_GOOD_VERSION(sbp)) { 235 xfs_fs_mount_cmn_err(flags, "bad version"); 236 return XFS_ERROR(EWRONGFS); 237 } 238 239 if (unlikely( 240 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 241 xfs_fs_mount_cmn_err(flags, 242 "filesystem is marked as having an external log; " 243 "specify logdev on the\nmount command line."); 244 return XFS_ERROR(EINVAL); 245 } 246 247 if (unlikely( 248 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 249 xfs_fs_mount_cmn_err(flags, 250 "filesystem is marked as having an internal log; " 251 "do not specify logdev on\nthe mount command line."); 252 return XFS_ERROR(EINVAL); 253 } 254 255 /* 256 * More sanity checking. These were stolen directly from 257 * xfs_repair. 258 */ 259 if (unlikely( 260 sbp->sb_agcount <= 0 || 261 sbp->sb_sectsize < XFS_MIN_SECTORSIZE || 262 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 263 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 264 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 265 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 266 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 267 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 268 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 269 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 270 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 271 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 272 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 273 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 274 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 275 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 276 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 277 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 278 return XFS_ERROR(EFSCORRUPTED); 279 } 280 281 /* 282 * Sanity check AG count, size fields against data size field 283 */ 284 if (unlikely( 285 sbp->sb_dblocks == 0 || 286 sbp->sb_dblocks > 287 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 288 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 289 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 290 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 291 return XFS_ERROR(EFSCORRUPTED); 292 } 293 294 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 295 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 296 xfs_fs_mount_cmn_err(flags, 297 "file system too large to be mounted on this system."); 298 return XFS_ERROR(E2BIG); 299 } 300 301 if (unlikely(sbp->sb_inprogress)) { 302 xfs_fs_mount_cmn_err(flags, "file system busy"); 303 return XFS_ERROR(EFSCORRUPTED); 304 } 305 306 /* 307 * Version 1 directory format has never worked on Linux. 308 */ 309 if (unlikely(!XFS_SB_VERSION_HASDIRV2(sbp))) { 310 xfs_fs_mount_cmn_err(flags, 311 "file system using version 1 directory format"); 312 return XFS_ERROR(ENOSYS); 313 } 314 315 /* 316 * Until this is fixed only page-sized or smaller data blocks work. 317 */ 318 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 319 xfs_fs_mount_cmn_err(flags, 320 "file system with blocksize %d bytes", 321 sbp->sb_blocksize); 322 xfs_fs_mount_cmn_err(flags, 323 "only pagesize (%ld) or less will currently work.", 324 PAGE_SIZE); 325 return XFS_ERROR(ENOSYS); 326 } 327 328 return 0; 329 } 330 331 STATIC void 332 xfs_initialize_perag_icache( 333 xfs_perag_t *pag) 334 { 335 if (!pag->pag_ici_init) { 336 rwlock_init(&pag->pag_ici_lock); 337 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 338 pag->pag_ici_init = 1; 339 } 340 } 341 342 xfs_agnumber_t 343 xfs_initialize_perag( 344 xfs_mount_t *mp, 345 xfs_agnumber_t agcount) 346 { 347 xfs_agnumber_t index, max_metadata; 348 xfs_perag_t *pag; 349 xfs_agino_t agino; 350 xfs_ino_t ino; 351 xfs_sb_t *sbp = &mp->m_sb; 352 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 353 354 /* Check to see if the filesystem can overflow 32 bit inodes */ 355 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 356 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 357 358 /* Clear the mount flag if no inode can overflow 32 bits 359 * on this filesystem, or if specifically requested.. 360 */ 361 if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) { 362 mp->m_flags |= XFS_MOUNT_32BITINODES; 363 } else { 364 mp->m_flags &= ~XFS_MOUNT_32BITINODES; 365 } 366 367 /* If we can overflow then setup the ag headers accordingly */ 368 if (mp->m_flags & XFS_MOUNT_32BITINODES) { 369 /* Calculate how much should be reserved for inodes to 370 * meet the max inode percentage. 371 */ 372 if (mp->m_maxicount) { 373 __uint64_t icount; 374 375 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 376 do_div(icount, 100); 377 icount += sbp->sb_agblocks - 1; 378 do_div(icount, sbp->sb_agblocks); 379 max_metadata = icount; 380 } else { 381 max_metadata = agcount; 382 } 383 for (index = 0; index < agcount; index++) { 384 ino = XFS_AGINO_TO_INO(mp, index, agino); 385 if (ino > max_inum) { 386 index++; 387 break; 388 } 389 390 /* This ag is preferred for inodes */ 391 pag = &mp->m_perag[index]; 392 pag->pagi_inodeok = 1; 393 if (index < max_metadata) 394 pag->pagf_metadata = 1; 395 xfs_initialize_perag_icache(pag); 396 } 397 } else { 398 /* Setup default behavior for smaller filesystems */ 399 for (index = 0; index < agcount; index++) { 400 pag = &mp->m_perag[index]; 401 pag->pagi_inodeok = 1; 402 xfs_initialize_perag_icache(pag); 403 } 404 } 405 return index; 406 } 407 408 void 409 xfs_sb_from_disk( 410 xfs_sb_t *to, 411 xfs_dsb_t *from) 412 { 413 to->sb_magicnum = be32_to_cpu(from->sb_magicnum); 414 to->sb_blocksize = be32_to_cpu(from->sb_blocksize); 415 to->sb_dblocks = be64_to_cpu(from->sb_dblocks); 416 to->sb_rblocks = be64_to_cpu(from->sb_rblocks); 417 to->sb_rextents = be64_to_cpu(from->sb_rextents); 418 memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); 419 to->sb_logstart = be64_to_cpu(from->sb_logstart); 420 to->sb_rootino = be64_to_cpu(from->sb_rootino); 421 to->sb_rbmino = be64_to_cpu(from->sb_rbmino); 422 to->sb_rsumino = be64_to_cpu(from->sb_rsumino); 423 to->sb_rextsize = be32_to_cpu(from->sb_rextsize); 424 to->sb_agblocks = be32_to_cpu(from->sb_agblocks); 425 to->sb_agcount = be32_to_cpu(from->sb_agcount); 426 to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); 427 to->sb_logblocks = be32_to_cpu(from->sb_logblocks); 428 to->sb_versionnum = be16_to_cpu(from->sb_versionnum); 429 to->sb_sectsize = be16_to_cpu(from->sb_sectsize); 430 to->sb_inodesize = be16_to_cpu(from->sb_inodesize); 431 to->sb_inopblock = be16_to_cpu(from->sb_inopblock); 432 memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); 433 to->sb_blocklog = from->sb_blocklog; 434 to->sb_sectlog = from->sb_sectlog; 435 to->sb_inodelog = from->sb_inodelog; 436 to->sb_inopblog = from->sb_inopblog; 437 to->sb_agblklog = from->sb_agblklog; 438 to->sb_rextslog = from->sb_rextslog; 439 to->sb_inprogress = from->sb_inprogress; 440 to->sb_imax_pct = from->sb_imax_pct; 441 to->sb_icount = be64_to_cpu(from->sb_icount); 442 to->sb_ifree = be64_to_cpu(from->sb_ifree); 443 to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); 444 to->sb_frextents = be64_to_cpu(from->sb_frextents); 445 to->sb_uquotino = be64_to_cpu(from->sb_uquotino); 446 to->sb_gquotino = be64_to_cpu(from->sb_gquotino); 447 to->sb_qflags = be16_to_cpu(from->sb_qflags); 448 to->sb_flags = from->sb_flags; 449 to->sb_shared_vn = from->sb_shared_vn; 450 to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); 451 to->sb_unit = be32_to_cpu(from->sb_unit); 452 to->sb_width = be32_to_cpu(from->sb_width); 453 to->sb_dirblklog = from->sb_dirblklog; 454 to->sb_logsectlog = from->sb_logsectlog; 455 to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); 456 to->sb_logsunit = be32_to_cpu(from->sb_logsunit); 457 to->sb_features2 = be32_to_cpu(from->sb_features2); 458 } 459 460 /* 461 * Copy in core superblock to ondisk one. 462 * 463 * The fields argument is mask of superblock fields to copy. 464 */ 465 void 466 xfs_sb_to_disk( 467 xfs_dsb_t *to, 468 xfs_sb_t *from, 469 __int64_t fields) 470 { 471 xfs_caddr_t to_ptr = (xfs_caddr_t)to; 472 xfs_caddr_t from_ptr = (xfs_caddr_t)from; 473 xfs_sb_field_t f; 474 int first; 475 int size; 476 477 ASSERT(fields); 478 if (!fields) 479 return; 480 481 while (fields) { 482 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 483 first = xfs_sb_info[f].offset; 484 size = xfs_sb_info[f + 1].offset - first; 485 486 ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1); 487 488 if (size == 1 || xfs_sb_info[f].type == 1) { 489 memcpy(to_ptr + first, from_ptr + first, size); 490 } else { 491 switch (size) { 492 case 2: 493 *(__be16 *)(to_ptr + first) = 494 cpu_to_be16(*(__u16 *)(from_ptr + first)); 495 break; 496 case 4: 497 *(__be32 *)(to_ptr + first) = 498 cpu_to_be32(*(__u32 *)(from_ptr + first)); 499 break; 500 case 8: 501 *(__be64 *)(to_ptr + first) = 502 cpu_to_be64(*(__u64 *)(from_ptr + first)); 503 break; 504 default: 505 ASSERT(0); 506 } 507 } 508 509 fields &= ~(1LL << f); 510 } 511 } 512 513 /* 514 * xfs_readsb 515 * 516 * Does the initial read of the superblock. 517 */ 518 int 519 xfs_readsb(xfs_mount_t *mp, int flags) 520 { 521 unsigned int sector_size; 522 unsigned int extra_flags; 523 xfs_buf_t *bp; 524 int error; 525 526 ASSERT(mp->m_sb_bp == NULL); 527 ASSERT(mp->m_ddev_targp != NULL); 528 529 /* 530 * Allocate a (locked) buffer to hold the superblock. 531 * This will be kept around at all times to optimize 532 * access to the superblock. 533 */ 534 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 535 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 536 537 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 538 BTOBB(sector_size), extra_flags); 539 if (!bp || XFS_BUF_ISERROR(bp)) { 540 xfs_fs_mount_cmn_err(flags, "SB read failed"); 541 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 542 goto fail; 543 } 544 ASSERT(XFS_BUF_ISBUSY(bp)); 545 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 546 547 /* 548 * Initialize the mount structure from the superblock. 549 * But first do some basic consistency checking. 550 */ 551 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 552 553 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 554 if (error) { 555 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 556 goto fail; 557 } 558 559 /* 560 * We must be able to do sector-sized and sector-aligned IO. 561 */ 562 if (sector_size > mp->m_sb.sb_sectsize) { 563 xfs_fs_mount_cmn_err(flags, 564 "device supports only %u byte sectors (not %u)", 565 sector_size, mp->m_sb.sb_sectsize); 566 error = ENOSYS; 567 goto fail; 568 } 569 570 /* 571 * If device sector size is smaller than the superblock size, 572 * re-read the superblock so the buffer is correctly sized. 573 */ 574 if (sector_size < mp->m_sb.sb_sectsize) { 575 XFS_BUF_UNMANAGE(bp); 576 xfs_buf_relse(bp); 577 sector_size = mp->m_sb.sb_sectsize; 578 bp = xfs_buf_read_flags(mp->m_ddev_targp, XFS_SB_DADDR, 579 BTOBB(sector_size), extra_flags); 580 if (!bp || XFS_BUF_ISERROR(bp)) { 581 xfs_fs_mount_cmn_err(flags, "SB re-read failed"); 582 error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; 583 goto fail; 584 } 585 ASSERT(XFS_BUF_ISBUSY(bp)); 586 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 587 } 588 589 /* Initialize per-cpu counters */ 590 xfs_icsb_reinit_counters(mp); 591 592 mp->m_sb_bp = bp; 593 xfs_buf_relse(bp); 594 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 595 return 0; 596 597 fail: 598 if (bp) { 599 XFS_BUF_UNMANAGE(bp); 600 xfs_buf_relse(bp); 601 } 602 return error; 603 } 604 605 606 /* 607 * xfs_mount_common 608 * 609 * Mount initialization code establishing various mount 610 * fields from the superblock associated with the given 611 * mount structure 612 */ 613 STATIC void 614 xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp) 615 { 616 int i; 617 618 mp->m_agfrotor = mp->m_agirotor = 0; 619 spinlock_init(&mp->m_agirotor_lock, "m_agirotor_lock"); 620 mp->m_maxagi = mp->m_sb.sb_agcount; 621 mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; 622 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 623 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 624 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 625 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; 626 mp->m_litino = sbp->sb_inodesize - 627 ((uint)sizeof(xfs_dinode_core_t) + (uint)sizeof(xfs_agino_t)); 628 mp->m_blockmask = sbp->sb_blocksize - 1; 629 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 630 mp->m_blockwmask = mp->m_blockwsize - 1; 631 INIT_LIST_HEAD(&mp->m_del_inodes); 632 633 /* 634 * Setup for attributes, in case they get created. 635 * This value is for inodes getting attributes for the first time, 636 * the per-inode value is for old attribute values. 637 */ 638 ASSERT(sbp->sb_inodesize >= 256 && sbp->sb_inodesize <= 2048); 639 switch (sbp->sb_inodesize) { 640 case 256: 641 mp->m_attroffset = XFS_LITINO(mp) - 642 XFS_BMDR_SPACE_CALC(MINABTPTRS); 643 break; 644 case 512: 645 case 1024: 646 case 2048: 647 mp->m_attroffset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 648 break; 649 default: 650 ASSERT(0); 651 } 652 ASSERT(mp->m_attroffset < XFS_LITINO(mp)); 653 654 for (i = 0; i < 2; i++) { 655 mp->m_alloc_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 656 xfs_alloc, i == 0); 657 mp->m_alloc_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 658 xfs_alloc, i == 0); 659 } 660 for (i = 0; i < 2; i++) { 661 mp->m_bmap_dmxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 662 xfs_bmbt, i == 0); 663 mp->m_bmap_dmnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 664 xfs_bmbt, i == 0); 665 } 666 for (i = 0; i < 2; i++) { 667 mp->m_inobt_mxr[i] = XFS_BTREE_BLOCK_MAXRECS(sbp->sb_blocksize, 668 xfs_inobt, i == 0); 669 mp->m_inobt_mnr[i] = XFS_BTREE_BLOCK_MINRECS(sbp->sb_blocksize, 670 xfs_inobt, i == 0); 671 } 672 673 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 674 mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK, 675 sbp->sb_inopblock); 676 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; 677 } 678 679 /* 680 * xfs_initialize_perag_data 681 * 682 * Read in each per-ag structure so we can count up the number of 683 * allocated inodes, free inodes and used filesystem blocks as this 684 * information is no longer persistent in the superblock. Once we have 685 * this information, write it into the in-core superblock structure. 686 */ 687 STATIC int 688 xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) 689 { 690 xfs_agnumber_t index; 691 xfs_perag_t *pag; 692 xfs_sb_t *sbp = &mp->m_sb; 693 uint64_t ifree = 0; 694 uint64_t ialloc = 0; 695 uint64_t bfree = 0; 696 uint64_t bfreelst = 0; 697 uint64_t btree = 0; 698 int error; 699 int s; 700 701 for (index = 0; index < agcount; index++) { 702 /* 703 * read the agf, then the agi. This gets us 704 * all the inforamtion we need and populates the 705 * per-ag structures for us. 706 */ 707 error = xfs_alloc_pagf_init(mp, NULL, index, 0); 708 if (error) 709 return error; 710 711 error = xfs_ialloc_pagi_init(mp, NULL, index); 712 if (error) 713 return error; 714 pag = &mp->m_perag[index]; 715 ifree += pag->pagi_freecount; 716 ialloc += pag->pagi_count; 717 bfree += pag->pagf_freeblks; 718 bfreelst += pag->pagf_flcount; 719 btree += pag->pagf_btreeblks; 720 } 721 /* 722 * Overwrite incore superblock counters with just-read data 723 */ 724 s = XFS_SB_LOCK(mp); 725 sbp->sb_ifree = ifree; 726 sbp->sb_icount = ialloc; 727 sbp->sb_fdblocks = bfree + bfreelst + btree; 728 XFS_SB_UNLOCK(mp, s); 729 730 /* Fixup the per-cpu counters as well. */ 731 xfs_icsb_reinit_counters(mp); 732 733 return 0; 734 } 735 736 /* 737 * xfs_mountfs 738 * 739 * This function does the following on an initial mount of a file system: 740 * - reads the superblock from disk and init the mount struct 741 * - if we're a 32-bit kernel, do a size check on the superblock 742 * so we don't mount terabyte filesystems 743 * - init mount struct realtime fields 744 * - allocate inode hash table for fs 745 * - init directory manager 746 * - perform recovery and init the log manager 747 */ 748 int 749 xfs_mountfs( 750 xfs_mount_t *mp, 751 int mfsi_flags) 752 { 753 xfs_buf_t *bp; 754 xfs_sb_t *sbp = &(mp->m_sb); 755 xfs_inode_t *rip; 756 bhv_vnode_t *rvp = NULL; 757 int readio_log, writeio_log; 758 xfs_daddr_t d; 759 __uint64_t resblks; 760 __int64_t update_flags; 761 uint quotamount, quotaflags; 762 int agno; 763 int uuid_mounted = 0; 764 int error = 0; 765 766 if (mp->m_sb_bp == NULL) { 767 if ((error = xfs_readsb(mp, mfsi_flags))) { 768 return error; 769 } 770 } 771 xfs_mount_common(mp, sbp); 772 773 /* 774 * Check if sb_agblocks is aligned at stripe boundary 775 * If sb_agblocks is NOT aligned turn off m_dalign since 776 * allocator alignment is within an ag, therefore ag has 777 * to be aligned at stripe boundary. 778 */ 779 update_flags = 0LL; 780 if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { 781 /* 782 * If stripe unit and stripe width are not multiples 783 * of the fs blocksize turn off alignment. 784 */ 785 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 786 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 787 if (mp->m_flags & XFS_MOUNT_RETERR) { 788 cmn_err(CE_WARN, 789 "XFS: alignment check 1 failed"); 790 error = XFS_ERROR(EINVAL); 791 goto error1; 792 } 793 mp->m_dalign = mp->m_swidth = 0; 794 } else { 795 /* 796 * Convert the stripe unit and width to FSBs. 797 */ 798 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 799 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 800 if (mp->m_flags & XFS_MOUNT_RETERR) { 801 error = XFS_ERROR(EINVAL); 802 goto error1; 803 } 804 xfs_fs_cmn_err(CE_WARN, mp, 805 "stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 806 mp->m_dalign, mp->m_swidth, 807 sbp->sb_agblocks); 808 809 mp->m_dalign = 0; 810 mp->m_swidth = 0; 811 } else if (mp->m_dalign) { 812 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 813 } else { 814 if (mp->m_flags & XFS_MOUNT_RETERR) { 815 xfs_fs_cmn_err(CE_WARN, mp, 816 "stripe alignment turned off: sunit(%d) less than bsize(%d)", 817 mp->m_dalign, 818 mp->m_blockmask +1); 819 error = XFS_ERROR(EINVAL); 820 goto error1; 821 } 822 mp->m_swidth = 0; 823 } 824 } 825 826 /* 827 * Update superblock with new values 828 * and log changes 829 */ 830 if (XFS_SB_VERSION_HASDALIGN(sbp)) { 831 if (sbp->sb_unit != mp->m_dalign) { 832 sbp->sb_unit = mp->m_dalign; 833 update_flags |= XFS_SB_UNIT; 834 } 835 if (sbp->sb_width != mp->m_swidth) { 836 sbp->sb_width = mp->m_swidth; 837 update_flags |= XFS_SB_WIDTH; 838 } 839 } 840 } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && 841 XFS_SB_VERSION_HASDALIGN(&mp->m_sb)) { 842 mp->m_dalign = sbp->sb_unit; 843 mp->m_swidth = sbp->sb_width; 844 } 845 846 xfs_alloc_compute_maxlevels(mp); 847 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 848 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 849 xfs_ialloc_compute_maxlevels(mp); 850 851 if (sbp->sb_imax_pct) { 852 __uint64_t icount; 853 854 /* Make sure the maximum inode count is a multiple of the 855 * units we allocate inodes in. 856 */ 857 858 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 859 do_div(icount, 100); 860 do_div(icount, mp->m_ialloc_blks); 861 mp->m_maxicount = (icount * mp->m_ialloc_blks) << 862 sbp->sb_inopblog; 863 } else 864 mp->m_maxicount = 0; 865 866 mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog); 867 868 /* 869 * XFS uses the uuid from the superblock as the unique 870 * identifier for fsid. We can not use the uuid from the volume 871 * since a single partition filesystem is identical to a single 872 * partition volume/filesystem. 873 */ 874 if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && 875 (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { 876 if (xfs_uuid_mount(mp)) { 877 error = XFS_ERROR(EINVAL); 878 goto error1; 879 } 880 uuid_mounted=1; 881 } 882 883 /* 884 * Set the default minimum read and write sizes unless 885 * already specified in a mount option. 886 * We use smaller I/O sizes when the file system 887 * is being used for NFS service (wsync mount option). 888 */ 889 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { 890 if (mp->m_flags & XFS_MOUNT_WSYNC) { 891 readio_log = XFS_WSYNC_READIO_LOG; 892 writeio_log = XFS_WSYNC_WRITEIO_LOG; 893 } else { 894 readio_log = XFS_READIO_LOG_LARGE; 895 writeio_log = XFS_WRITEIO_LOG_LARGE; 896 } 897 } else { 898 readio_log = mp->m_readio_log; 899 writeio_log = mp->m_writeio_log; 900 } 901 902 if (sbp->sb_blocklog > readio_log) { 903 mp->m_readio_log = sbp->sb_blocklog; 904 } else { 905 mp->m_readio_log = readio_log; 906 } 907 mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog); 908 if (sbp->sb_blocklog > writeio_log) { 909 mp->m_writeio_log = sbp->sb_blocklog; 910 } else { 911 mp->m_writeio_log = writeio_log; 912 } 913 mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog); 914 915 /* 916 * Set the inode cluster size. 917 * This may still be overridden by the file system 918 * block size if it is larger than the chosen cluster size. 919 */ 920 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; 921 922 /* 923 * Set whether we're using inode alignment. 924 */ 925 if (XFS_SB_VERSION_HASALIGN(&mp->m_sb) && 926 mp->m_sb.sb_inoalignmt >= 927 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) 928 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; 929 else 930 mp->m_inoalign_mask = 0; 931 /* 932 * If we are using stripe alignment, check whether 933 * the stripe unit is a multiple of the inode alignment 934 */ 935 if (mp->m_dalign && mp->m_inoalign_mask && 936 !(mp->m_dalign & mp->m_inoalign_mask)) 937 mp->m_sinoalign = mp->m_dalign; 938 else 939 mp->m_sinoalign = 0; 940 /* 941 * Check that the data (and log if separate) are an ok size. 942 */ 943 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 944 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 945 cmn_err(CE_WARN, "XFS: size check 1 failed"); 946 error = XFS_ERROR(E2BIG); 947 goto error1; 948 } 949 error = xfs_read_buf(mp, mp->m_ddev_targp, 950 d - XFS_FSS_TO_BB(mp, 1), 951 XFS_FSS_TO_BB(mp, 1), 0, &bp); 952 if (!error) { 953 xfs_buf_relse(bp); 954 } else { 955 cmn_err(CE_WARN, "XFS: size check 2 failed"); 956 if (error == ENOSPC) { 957 error = XFS_ERROR(E2BIG); 958 } 959 goto error1; 960 } 961 962 if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && 963 mp->m_logdev_targp != mp->m_ddev_targp) { 964 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 965 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 966 cmn_err(CE_WARN, "XFS: size check 3 failed"); 967 error = XFS_ERROR(E2BIG); 968 goto error1; 969 } 970 error = xfs_read_buf(mp, mp->m_logdev_targp, 971 d - XFS_FSB_TO_BB(mp, 1), 972 XFS_FSB_TO_BB(mp, 1), 0, &bp); 973 if (!error) { 974 xfs_buf_relse(bp); 975 } else { 976 cmn_err(CE_WARN, "XFS: size check 3 failed"); 977 if (error == ENOSPC) { 978 error = XFS_ERROR(E2BIG); 979 } 980 goto error1; 981 } 982 } 983 984 /* 985 * Initialize realtime fields in the mount structure 986 */ 987 if ((error = xfs_rtmount_init(mp))) { 988 cmn_err(CE_WARN, "XFS: RT mount failed"); 989 goto error1; 990 } 991 992 /* 993 * For client case we are done now 994 */ 995 if (mfsi_flags & XFS_MFSI_CLIENT) { 996 return 0; 997 } 998 999 /* 1000 * Copies the low order bits of the timestamp and the randomly 1001 * set "sequence" number out of a UUID. 1002 */ 1003 uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); 1004 1005 mp->m_dmevmask = 0; /* not persistent; set after each mount */ 1006 1007 xfs_dir_mount(mp); 1008 1009 /* 1010 * Initialize the attribute manager's entries. 1011 */ 1012 mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; 1013 1014 /* 1015 * Initialize the precomputed transaction reservations values. 1016 */ 1017 xfs_trans_init(mp); 1018 1019 /* 1020 * Allocate and initialize the per-ag data. 1021 */ 1022 init_rwsem(&mp->m_peraglock); 1023 mp->m_perag = 1024 kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); 1025 1026 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); 1027 1028 /* 1029 * log's mount-time initialization. Perform 1st part recovery if needed 1030 */ 1031 if (likely(sbp->sb_logblocks > 0)) { /* check for volume case */ 1032 error = xfs_log_mount(mp, mp->m_logdev_targp, 1033 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1034 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1035 if (error) { 1036 cmn_err(CE_WARN, "XFS: log mount failed"); 1037 goto error2; 1038 } 1039 } else { /* No log has been defined */ 1040 cmn_err(CE_WARN, "XFS: no log defined"); 1041 XFS_ERROR_REPORT("xfs_mountfs_int(1)", XFS_ERRLEVEL_LOW, mp); 1042 error = XFS_ERROR(EFSCORRUPTED); 1043 goto error2; 1044 } 1045 1046 /* 1047 * Now the log is mounted, we know if it was an unclean shutdown or 1048 * not. If it was, with the first phase of recovery has completed, we 1049 * have consistent AG blocks on disk. We have not recovered EFIs yet, 1050 * but they are recovered transactionally in the second recovery phase 1051 * later. 1052 * 1053 * Hence we can safely re-initialise incore superblock counters from 1054 * the per-ag data. These may not be correct if the filesystem was not 1055 * cleanly unmounted, so we need to wait for recovery to finish before 1056 * doing this. 1057 * 1058 * If the filesystem was cleanly unmounted, then we can trust the 1059 * values in the superblock to be correct and we don't need to do 1060 * anything here. 1061 * 1062 * If we are currently making the filesystem, the initialisation will 1063 * fail as the perag data is in an undefined state. 1064 */ 1065 1066 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 1067 !XFS_LAST_UNMOUNT_WAS_CLEAN(mp) && 1068 !mp->m_sb.sb_inprogress) { 1069 error = xfs_initialize_perag_data(mp, sbp->sb_agcount); 1070 if (error) { 1071 goto error2; 1072 } 1073 } 1074 /* 1075 * Get and sanity-check the root inode. 1076 * Save the pointer to it in the mount structure. 1077 */ 1078 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1079 if (error) { 1080 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1081 goto error3; 1082 } 1083 1084 ASSERT(rip != NULL); 1085 rvp = XFS_ITOV(rip); 1086 1087 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1088 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1089 cmn_err(CE_WARN, "Device %s - root %llu is not a directory", 1090 XFS_BUFTARG_NAME(mp->m_ddev_targp), 1091 (unsigned long long)rip->i_ino); 1092 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1093 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1094 mp); 1095 error = XFS_ERROR(EFSCORRUPTED); 1096 goto error4; 1097 } 1098 mp->m_rootip = rip; /* save it */ 1099 1100 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1101 1102 /* 1103 * Initialize realtime inode pointers in the mount structure 1104 */ 1105 if ((error = xfs_rtmount_inodes(mp))) { 1106 /* 1107 * Free up the root inode. 1108 */ 1109 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1110 goto error4; 1111 } 1112 1113 /* 1114 * If fs is not mounted readonly, then update the superblock 1115 * unit and width changes. 1116 */ 1117 if (update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) 1118 xfs_mount_log_sbunit(mp, update_flags); 1119 1120 /* 1121 * Initialise the XFS quota management subsystem for this mount 1122 */ 1123 if ((error = XFS_QM_INIT(mp, "amount, "aflags))) 1124 goto error4; 1125 1126 /* 1127 * Finish recovering the file system. This part needed to be 1128 * delayed until after the root and real-time bitmap inodes 1129 * were consistently read in. 1130 */ 1131 error = xfs_log_mount_finish(mp, mfsi_flags); 1132 if (error) { 1133 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1134 goto error4; 1135 } 1136 1137 /* 1138 * Complete the quota initialisation, post-log-replay component. 1139 */ 1140 if ((error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags))) 1141 goto error4; 1142 1143 /* 1144 * Now we are mounted, reserve a small amount of unused space for 1145 * privileged transactions. This is needed so that transaction 1146 * space required for critical operations can dip into this pool 1147 * when at ENOSPC. This is needed for operations like create with 1148 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1149 * are not allowed to use this reserved space. 1150 * 1151 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller. 1152 * This may drive us straight to ENOSPC on mount, but that implies 1153 * we were already there on the last unmount. 1154 */ 1155 resblks = mp->m_sb.sb_dblocks; 1156 do_div(resblks, 20); 1157 resblks = min_t(__uint64_t, resblks, 1024); 1158 xfs_reserve_blocks(mp, &resblks, NULL); 1159 1160 return 0; 1161 1162 error4: 1163 /* 1164 * Free up the root inode. 1165 */ 1166 VN_RELE(rvp); 1167 error3: 1168 xfs_log_unmount_dealloc(mp); 1169 error2: 1170 for (agno = 0; agno < sbp->sb_agcount; agno++) 1171 if (mp->m_perag[agno].pagb_list) 1172 kmem_free(mp->m_perag[agno].pagb_list, 1173 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1174 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); 1175 mp->m_perag = NULL; 1176 /* FALLTHROUGH */ 1177 error1: 1178 if (uuid_mounted) 1179 xfs_uuid_unmount(mp); 1180 xfs_freesb(mp); 1181 return error; 1182 } 1183 1184 /* 1185 * xfs_unmountfs 1186 * 1187 * This flushes out the inodes,dquots and the superblock, unmounts the 1188 * log and makes sure that incore structures are freed. 1189 */ 1190 int 1191 xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1192 { 1193 __uint64_t resblks; 1194 1195 /* 1196 * We can potentially deadlock here if we have an inode cluster 1197 * that has been freed has it's buffer still pinned in memory because 1198 * the transaction is still sitting in a iclog. The stale inodes 1199 * on that buffer will have their flush locks held until the 1200 * transaction hits the disk and the callbacks run. the inode 1201 * flush takes the flush lock unconditionally and with nothing to 1202 * push out the iclog we will never get that unlocked. hence we 1203 * need to force the log first. 1204 */ 1205 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1206 xfs_iflush_all(mp); 1207 1208 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1209 1210 /* 1211 * Flush out the log synchronously so that we know for sure 1212 * that nothing is pinned. This is important because bflush() 1213 * will skip pinned buffers. 1214 */ 1215 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1216 1217 xfs_binval(mp->m_ddev_targp); 1218 if (mp->m_rtdev_targp) { 1219 xfs_binval(mp->m_rtdev_targp); 1220 } 1221 1222 /* 1223 * Unreserve any blocks we have so that when we unmount we don't account 1224 * the reserved free space as used. This is really only necessary for 1225 * lazy superblock counting because it trusts the incore superblock 1226 * counters to be aboslutely correct on clean unmount. 1227 * 1228 * We don't bother correcting this elsewhere for lazy superblock 1229 * counting because on mount of an unclean filesystem we reconstruct the 1230 * correct counter value and this is irrelevant. 1231 * 1232 * For non-lazy counter filesystems, this doesn't matter at all because 1233 * we only every apply deltas to the superblock and hence the incore 1234 * value does not matter.... 1235 */ 1236 resblks = 0; 1237 xfs_reserve_blocks(mp, &resblks, NULL); 1238 1239 xfs_log_sbcount(mp, 1); 1240 xfs_unmountfs_writesb(mp); 1241 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1242 xfs_log_unmount(mp); /* Done! No more fs ops. */ 1243 1244 xfs_freesb(mp); 1245 1246 /* 1247 * All inodes from this mount point should be freed. 1248 */ 1249 ASSERT(mp->m_inodes == NULL); 1250 1251 xfs_unmountfs_close(mp, cr); 1252 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1253 xfs_uuid_unmount(mp); 1254 1255 #if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1256 xfs_errortag_clearall(mp, 0); 1257 #endif 1258 XFS_IODONE(mp); 1259 xfs_mount_free(mp); 1260 return 0; 1261 } 1262 1263 void 1264 xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) 1265 { 1266 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) 1267 xfs_free_buftarg(mp->m_logdev_targp, 1); 1268 if (mp->m_rtdev_targp) 1269 xfs_free_buftarg(mp->m_rtdev_targp, 1); 1270 xfs_free_buftarg(mp->m_ddev_targp, 0); 1271 } 1272 1273 STATIC void 1274 xfs_unmountfs_wait(xfs_mount_t *mp) 1275 { 1276 if (mp->m_logdev_targp != mp->m_ddev_targp) 1277 xfs_wait_buftarg(mp->m_logdev_targp); 1278 if (mp->m_rtdev_targp) 1279 xfs_wait_buftarg(mp->m_rtdev_targp); 1280 xfs_wait_buftarg(mp->m_ddev_targp); 1281 } 1282 1283 int 1284 xfs_fs_writable(xfs_mount_t *mp) 1285 { 1286 return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || 1287 (mp->m_flags & XFS_MOUNT_RDONLY)); 1288 } 1289 1290 /* 1291 * xfs_log_sbcount 1292 * 1293 * Called either periodically to keep the on disk superblock values 1294 * roughly up to date or from unmount to make sure the values are 1295 * correct on a clean unmount. 1296 * 1297 * Note this code can be called during the process of freezing, so 1298 * we may need to use the transaction allocator which does not not 1299 * block when the transaction subsystem is in its frozen state. 1300 */ 1301 int 1302 xfs_log_sbcount( 1303 xfs_mount_t *mp, 1304 uint sync) 1305 { 1306 xfs_trans_t *tp; 1307 int error; 1308 1309 if (!xfs_fs_writable(mp)) 1310 return 0; 1311 1312 xfs_icsb_sync_counters(mp); 1313 1314 /* 1315 * we don't need to do this if we are updating the superblock 1316 * counters on every modification. 1317 */ 1318 if (!xfs_sb_version_haslazysbcount(&mp->m_sb)) 1319 return 0; 1320 1321 tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT); 1322 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1323 XFS_DEFAULT_LOG_COUNT); 1324 if (error) { 1325 xfs_trans_cancel(tp, 0); 1326 return error; 1327 } 1328 1329 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1330 if (sync) 1331 xfs_trans_set_sync(tp); 1332 xfs_trans_commit(tp, 0); 1333 1334 return 0; 1335 } 1336 1337 STATIC void 1338 xfs_mark_shared_ro( 1339 xfs_mount_t *mp, 1340 xfs_buf_t *bp) 1341 { 1342 xfs_dsb_t *sb = XFS_BUF_TO_SBP(bp); 1343 __uint16_t version; 1344 1345 if (!(sb->sb_flags & XFS_SBF_READONLY)) 1346 sb->sb_flags |= XFS_SBF_READONLY; 1347 1348 version = be16_to_cpu(sb->sb_versionnum); 1349 if ((version & XFS_SB_VERSION_NUMBITS) != XFS_SB_VERSION_4 || 1350 !(version & XFS_SB_VERSION_SHAREDBIT)) 1351 version |= XFS_SB_VERSION_SHAREDBIT; 1352 sb->sb_versionnum = cpu_to_be16(version); 1353 } 1354 1355 int 1356 xfs_unmountfs_writesb(xfs_mount_t *mp) 1357 { 1358 xfs_buf_t *sbp; 1359 int error = 0; 1360 1361 /* 1362 * skip superblock write if fs is read-only, or 1363 * if we are doing a forced umount. 1364 */ 1365 if (!((mp->m_flags & XFS_MOUNT_RDONLY) || 1366 XFS_FORCED_SHUTDOWN(mp))) { 1367 1368 sbp = xfs_getsb(mp, 0); 1369 1370 /* 1371 * mark shared-readonly if desired 1372 */ 1373 if (mp->m_mk_sharedro) 1374 xfs_mark_shared_ro(mp, sbp); 1375 1376 XFS_BUF_UNDONE(sbp); 1377 XFS_BUF_UNREAD(sbp); 1378 XFS_BUF_UNDELAYWRITE(sbp); 1379 XFS_BUF_WRITE(sbp); 1380 XFS_BUF_UNASYNC(sbp); 1381 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); 1382 xfsbdstrat(mp, sbp); 1383 /* Nevermind errors we might get here. */ 1384 error = xfs_iowait(sbp); 1385 if (error) 1386 xfs_ioerror_alert("xfs_unmountfs_writesb", 1387 mp, sbp, XFS_BUF_ADDR(sbp)); 1388 if (error && mp->m_mk_sharedro) 1389 xfs_fs_cmn_err(CE_ALERT, mp, "Superblock write error detected while unmounting. Filesystem may not be marked shared readonly"); 1390 xfs_buf_relse(sbp); 1391 } 1392 return error; 1393 } 1394 1395 /* 1396 * xfs_mod_sb() can be used to copy arbitrary changes to the 1397 * in-core superblock into the superblock buffer to be logged. 1398 * It does not provide the higher level of locking that is 1399 * needed to protect the in-core superblock from concurrent 1400 * access. 1401 */ 1402 void 1403 xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) 1404 { 1405 xfs_buf_t *bp; 1406 int first; 1407 int last; 1408 xfs_mount_t *mp; 1409 xfs_sb_field_t f; 1410 1411 ASSERT(fields); 1412 if (!fields) 1413 return; 1414 mp = tp->t_mountp; 1415 bp = xfs_trans_getsb(tp, mp, 0); 1416 first = sizeof(xfs_sb_t); 1417 last = 0; 1418 1419 /* translate/copy */ 1420 1421 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1422 1423 /* find modified range */ 1424 1425 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1426 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1427 first = xfs_sb_info[f].offset; 1428 1429 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields); 1430 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1431 last = xfs_sb_info[f + 1].offset - 1; 1432 1433 xfs_trans_log_buf(tp, bp, first, last); 1434 } 1435 1436 1437 /* 1438 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1439 * a delta to a specified field in the in-core superblock. Simply 1440 * switch on the field indicated and apply the delta to that field. 1441 * Fields are not allowed to dip below zero, so if the delta would 1442 * do this do not apply it and return EINVAL. 1443 * 1444 * The SB_LOCK must be held when this routine is called. 1445 */ 1446 int 1447 xfs_mod_incore_sb_unlocked( 1448 xfs_mount_t *mp, 1449 xfs_sb_field_t field, 1450 int64_t delta, 1451 int rsvd) 1452 { 1453 int scounter; /* short counter for 32 bit fields */ 1454 long long lcounter; /* long counter for 64 bit fields */ 1455 long long res_used, rem; 1456 1457 /* 1458 * With the in-core superblock spin lock held, switch 1459 * on the indicated field. Apply the delta to the 1460 * proper field. If the fields value would dip below 1461 * 0, then do not apply the delta and return EINVAL. 1462 */ 1463 switch (field) { 1464 case XFS_SBS_ICOUNT: 1465 lcounter = (long long)mp->m_sb.sb_icount; 1466 lcounter += delta; 1467 if (lcounter < 0) { 1468 ASSERT(0); 1469 return XFS_ERROR(EINVAL); 1470 } 1471 mp->m_sb.sb_icount = lcounter; 1472 return 0; 1473 case XFS_SBS_IFREE: 1474 lcounter = (long long)mp->m_sb.sb_ifree; 1475 lcounter += delta; 1476 if (lcounter < 0) { 1477 ASSERT(0); 1478 return XFS_ERROR(EINVAL); 1479 } 1480 mp->m_sb.sb_ifree = lcounter; 1481 return 0; 1482 case XFS_SBS_FDBLOCKS: 1483 lcounter = (long long) 1484 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 1485 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1486 1487 if (delta > 0) { /* Putting blocks back */ 1488 if (res_used > delta) { 1489 mp->m_resblks_avail += delta; 1490 } else { 1491 rem = delta - res_used; 1492 mp->m_resblks_avail = mp->m_resblks; 1493 lcounter += rem; 1494 } 1495 } else { /* Taking blocks away */ 1496 1497 lcounter += delta; 1498 1499 /* 1500 * If were out of blocks, use any available reserved blocks if 1501 * were allowed to. 1502 */ 1503 1504 if (lcounter < 0) { 1505 if (rsvd) { 1506 lcounter = (long long)mp->m_resblks_avail + delta; 1507 if (lcounter < 0) { 1508 return XFS_ERROR(ENOSPC); 1509 } 1510 mp->m_resblks_avail = lcounter; 1511 return 0; 1512 } else { /* not reserved */ 1513 return XFS_ERROR(ENOSPC); 1514 } 1515 } 1516 } 1517 1518 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1519 return 0; 1520 case XFS_SBS_FREXTENTS: 1521 lcounter = (long long)mp->m_sb.sb_frextents; 1522 lcounter += delta; 1523 if (lcounter < 0) { 1524 return XFS_ERROR(ENOSPC); 1525 } 1526 mp->m_sb.sb_frextents = lcounter; 1527 return 0; 1528 case XFS_SBS_DBLOCKS: 1529 lcounter = (long long)mp->m_sb.sb_dblocks; 1530 lcounter += delta; 1531 if (lcounter < 0) { 1532 ASSERT(0); 1533 return XFS_ERROR(EINVAL); 1534 } 1535 mp->m_sb.sb_dblocks = lcounter; 1536 return 0; 1537 case XFS_SBS_AGCOUNT: 1538 scounter = mp->m_sb.sb_agcount; 1539 scounter += delta; 1540 if (scounter < 0) { 1541 ASSERT(0); 1542 return XFS_ERROR(EINVAL); 1543 } 1544 mp->m_sb.sb_agcount = scounter; 1545 return 0; 1546 case XFS_SBS_IMAX_PCT: 1547 scounter = mp->m_sb.sb_imax_pct; 1548 scounter += delta; 1549 if (scounter < 0) { 1550 ASSERT(0); 1551 return XFS_ERROR(EINVAL); 1552 } 1553 mp->m_sb.sb_imax_pct = scounter; 1554 return 0; 1555 case XFS_SBS_REXTSIZE: 1556 scounter = mp->m_sb.sb_rextsize; 1557 scounter += delta; 1558 if (scounter < 0) { 1559 ASSERT(0); 1560 return XFS_ERROR(EINVAL); 1561 } 1562 mp->m_sb.sb_rextsize = scounter; 1563 return 0; 1564 case XFS_SBS_RBMBLOCKS: 1565 scounter = mp->m_sb.sb_rbmblocks; 1566 scounter += delta; 1567 if (scounter < 0) { 1568 ASSERT(0); 1569 return XFS_ERROR(EINVAL); 1570 } 1571 mp->m_sb.sb_rbmblocks = scounter; 1572 return 0; 1573 case XFS_SBS_RBLOCKS: 1574 lcounter = (long long)mp->m_sb.sb_rblocks; 1575 lcounter += delta; 1576 if (lcounter < 0) { 1577 ASSERT(0); 1578 return XFS_ERROR(EINVAL); 1579 } 1580 mp->m_sb.sb_rblocks = lcounter; 1581 return 0; 1582 case XFS_SBS_REXTENTS: 1583 lcounter = (long long)mp->m_sb.sb_rextents; 1584 lcounter += delta; 1585 if (lcounter < 0) { 1586 ASSERT(0); 1587 return XFS_ERROR(EINVAL); 1588 } 1589 mp->m_sb.sb_rextents = lcounter; 1590 return 0; 1591 case XFS_SBS_REXTSLOG: 1592 scounter = mp->m_sb.sb_rextslog; 1593 scounter += delta; 1594 if (scounter < 0) { 1595 ASSERT(0); 1596 return XFS_ERROR(EINVAL); 1597 } 1598 mp->m_sb.sb_rextslog = scounter; 1599 return 0; 1600 default: 1601 ASSERT(0); 1602 return XFS_ERROR(EINVAL); 1603 } 1604 } 1605 1606 /* 1607 * xfs_mod_incore_sb() is used to change a field in the in-core 1608 * superblock structure by the specified delta. This modification 1609 * is protected by the SB_LOCK. Just use the xfs_mod_incore_sb_unlocked() 1610 * routine to do the work. 1611 */ 1612 int 1613 xfs_mod_incore_sb( 1614 xfs_mount_t *mp, 1615 xfs_sb_field_t field, 1616 int64_t delta, 1617 int rsvd) 1618 { 1619 unsigned long s; 1620 int status; 1621 1622 /* check for per-cpu counters */ 1623 switch (field) { 1624 #ifdef HAVE_PERCPU_SB 1625 case XFS_SBS_ICOUNT: 1626 case XFS_SBS_IFREE: 1627 case XFS_SBS_FDBLOCKS: 1628 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1629 status = xfs_icsb_modify_counters(mp, field, 1630 delta, rsvd); 1631 break; 1632 } 1633 /* FALLTHROUGH */ 1634 #endif 1635 default: 1636 s = XFS_SB_LOCK(mp); 1637 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1638 XFS_SB_UNLOCK(mp, s); 1639 break; 1640 } 1641 1642 return status; 1643 } 1644 1645 /* 1646 * xfs_mod_incore_sb_batch() is used to change more than one field 1647 * in the in-core superblock structure at a time. This modification 1648 * is protected by a lock internal to this module. The fields and 1649 * changes to those fields are specified in the array of xfs_mod_sb 1650 * structures passed in. 1651 * 1652 * Either all of the specified deltas will be applied or none of 1653 * them will. If any modified field dips below 0, then all modifications 1654 * will be backed out and EINVAL will be returned. 1655 */ 1656 int 1657 xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) 1658 { 1659 unsigned long s; 1660 int status=0; 1661 xfs_mod_sb_t *msbp; 1662 1663 /* 1664 * Loop through the array of mod structures and apply each 1665 * individually. If any fail, then back out all those 1666 * which have already been applied. Do all of this within 1667 * the scope of the SB_LOCK so that all of the changes will 1668 * be atomic. 1669 */ 1670 s = XFS_SB_LOCK(mp); 1671 msbp = &msb[0]; 1672 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1673 /* 1674 * Apply the delta at index n. If it fails, break 1675 * from the loop so we'll fall into the undo loop 1676 * below. 1677 */ 1678 switch (msbp->msb_field) { 1679 #ifdef HAVE_PERCPU_SB 1680 case XFS_SBS_ICOUNT: 1681 case XFS_SBS_IFREE: 1682 case XFS_SBS_FDBLOCKS: 1683 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1684 XFS_SB_UNLOCK(mp, s); 1685 status = xfs_icsb_modify_counters(mp, 1686 msbp->msb_field, 1687 msbp->msb_delta, rsvd); 1688 s = XFS_SB_LOCK(mp); 1689 break; 1690 } 1691 /* FALLTHROUGH */ 1692 #endif 1693 default: 1694 status = xfs_mod_incore_sb_unlocked(mp, 1695 msbp->msb_field, 1696 msbp->msb_delta, rsvd); 1697 break; 1698 } 1699 1700 if (status != 0) { 1701 break; 1702 } 1703 } 1704 1705 /* 1706 * If we didn't complete the loop above, then back out 1707 * any changes made to the superblock. If you add code 1708 * between the loop above and here, make sure that you 1709 * preserve the value of status. Loop back until 1710 * we step below the beginning of the array. Make sure 1711 * we don't touch anything back there. 1712 */ 1713 if (status != 0) { 1714 msbp--; 1715 while (msbp >= msb) { 1716 switch (msbp->msb_field) { 1717 #ifdef HAVE_PERCPU_SB 1718 case XFS_SBS_ICOUNT: 1719 case XFS_SBS_IFREE: 1720 case XFS_SBS_FDBLOCKS: 1721 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { 1722 XFS_SB_UNLOCK(mp, s); 1723 status = xfs_icsb_modify_counters(mp, 1724 msbp->msb_field, 1725 -(msbp->msb_delta), 1726 rsvd); 1727 s = XFS_SB_LOCK(mp); 1728 break; 1729 } 1730 /* FALLTHROUGH */ 1731 #endif 1732 default: 1733 status = xfs_mod_incore_sb_unlocked(mp, 1734 msbp->msb_field, 1735 -(msbp->msb_delta), 1736 rsvd); 1737 break; 1738 } 1739 ASSERT(status == 0); 1740 msbp--; 1741 } 1742 } 1743 XFS_SB_UNLOCK(mp, s); 1744 return status; 1745 } 1746 1747 /* 1748 * xfs_getsb() is called to obtain the buffer for the superblock. 1749 * The buffer is returned locked and read in from disk. 1750 * The buffer should be released with a call to xfs_brelse(). 1751 * 1752 * If the flags parameter is BUF_TRYLOCK, then we'll only return 1753 * the superblock buffer if it can be locked without sleeping. 1754 * If it can't then we'll return NULL. 1755 */ 1756 xfs_buf_t * 1757 xfs_getsb( 1758 xfs_mount_t *mp, 1759 int flags) 1760 { 1761 xfs_buf_t *bp; 1762 1763 ASSERT(mp->m_sb_bp != NULL); 1764 bp = mp->m_sb_bp; 1765 if (flags & XFS_BUF_TRYLOCK) { 1766 if (!XFS_BUF_CPSEMA(bp)) { 1767 return NULL; 1768 } 1769 } else { 1770 XFS_BUF_PSEMA(bp, PRIBIO); 1771 } 1772 XFS_BUF_HOLD(bp); 1773 ASSERT(XFS_BUF_ISDONE(bp)); 1774 return bp; 1775 } 1776 1777 /* 1778 * Used to free the superblock along various error paths. 1779 */ 1780 void 1781 xfs_freesb( 1782 xfs_mount_t *mp) 1783 { 1784 xfs_buf_t *bp; 1785 1786 /* 1787 * Use xfs_getsb() so that the buffer will be locked 1788 * when we call xfs_buf_relse(). 1789 */ 1790 bp = xfs_getsb(mp, 0); 1791 XFS_BUF_UNMANAGE(bp); 1792 xfs_buf_relse(bp); 1793 mp->m_sb_bp = NULL; 1794 } 1795 1796 /* 1797 * See if the UUID is unique among mounted XFS filesystems. 1798 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 1799 */ 1800 STATIC int 1801 xfs_uuid_mount( 1802 xfs_mount_t *mp) 1803 { 1804 if (uuid_is_nil(&mp->m_sb.sb_uuid)) { 1805 cmn_err(CE_WARN, 1806 "XFS: Filesystem %s has nil UUID - can't mount", 1807 mp->m_fsname); 1808 return -1; 1809 } 1810 if (!uuid_table_insert(&mp->m_sb.sb_uuid)) { 1811 cmn_err(CE_WARN, 1812 "XFS: Filesystem %s has duplicate UUID - can't mount", 1813 mp->m_fsname); 1814 return -1; 1815 } 1816 return 0; 1817 } 1818 1819 /* 1820 * Remove filesystem from the UUID table. 1821 */ 1822 STATIC void 1823 xfs_uuid_unmount( 1824 xfs_mount_t *mp) 1825 { 1826 uuid_table_remove(&mp->m_sb.sb_uuid); 1827 } 1828 1829 /* 1830 * Used to log changes to the superblock unit and width fields which could 1831 * be altered by the mount options. Only the first superblock is updated. 1832 */ 1833 STATIC void 1834 xfs_mount_log_sbunit( 1835 xfs_mount_t *mp, 1836 __int64_t fields) 1837 { 1838 xfs_trans_t *tp; 1839 1840 ASSERT(fields & (XFS_SB_UNIT|XFS_SB_WIDTH|XFS_SB_UUID)); 1841 1842 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1843 if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1844 XFS_DEFAULT_LOG_COUNT)) { 1845 xfs_trans_cancel(tp, 0); 1846 return; 1847 } 1848 xfs_mod_sb(tp, fields); 1849 xfs_trans_commit(tp, 0); 1850 } 1851 1852 1853 #ifdef HAVE_PERCPU_SB 1854 /* 1855 * Per-cpu incore superblock counters 1856 * 1857 * Simple concept, difficult implementation 1858 * 1859 * Basically, replace the incore superblock counters with a distributed per cpu 1860 * counter for contended fields (e.g. free block count). 1861 * 1862 * Difficulties arise in that the incore sb is used for ENOSPC checking, and 1863 * hence needs to be accurately read when we are running low on space. Hence 1864 * there is a method to enable and disable the per-cpu counters based on how 1865 * much "stuff" is available in them. 1866 * 1867 * Basically, a counter is enabled if there is enough free resource to justify 1868 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local 1869 * ENOSPC), then we disable the counters to synchronise all callers and 1870 * re-distribute the available resources. 1871 * 1872 * If, once we redistributed the available resources, we still get a failure, 1873 * we disable the per-cpu counter and go through the slow path. 1874 * 1875 * The slow path is the current xfs_mod_incore_sb() function. This means that 1876 * when we disable a per-cpu counter, we need to drain it's resources back to 1877 * the global superblock. We do this after disabling the counter to prevent 1878 * more threads from queueing up on the counter. 1879 * 1880 * Essentially, this means that we still need a lock in the fast path to enable 1881 * synchronisation between the global counters and the per-cpu counters. This 1882 * is not a problem because the lock will be local to a CPU almost all the time 1883 * and have little contention except when we get to ENOSPC conditions. 1884 * 1885 * Basically, this lock becomes a barrier that enables us to lock out the fast 1886 * path while we do things like enabling and disabling counters and 1887 * synchronising the counters. 1888 * 1889 * Locking rules: 1890 * 1891 * 1. XFS_SB_LOCK() before picking up per-cpu locks 1892 * 2. per-cpu locks always picked up via for_each_online_cpu() order 1893 * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks 1894 * 4. modifying per-cpu counters requires holding per-cpu lock 1895 * 5. modifying global counters requires holding XFS_SB_LOCK 1896 * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK 1897 * and _none_ of the per-cpu locks. 1898 * 1899 * Disabled counters are only ever re-enabled by a balance operation 1900 * that results in more free resources per CPU than a given threshold. 1901 * To ensure counters don't remain disabled, they are rebalanced when 1902 * the global resource goes above a higher threshold (i.e. some hysteresis 1903 * is present to prevent thrashing). 1904 */ 1905 1906 #ifdef CONFIG_HOTPLUG_CPU 1907 /* 1908 * hot-plug CPU notifier support. 1909 * 1910 * We need a notifier per filesystem as we need to be able to identify 1911 * the filesystem to balance the counters out. This is achieved by 1912 * having a notifier block embedded in the xfs_mount_t and doing pointer 1913 * magic to get the mount pointer from the notifier block address. 1914 */ 1915 STATIC int 1916 xfs_icsb_cpu_notify( 1917 struct notifier_block *nfb, 1918 unsigned long action, 1919 void *hcpu) 1920 { 1921 xfs_icsb_cnts_t *cntp; 1922 xfs_mount_t *mp; 1923 int s; 1924 1925 mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier); 1926 cntp = (xfs_icsb_cnts_t *) 1927 per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); 1928 switch (action) { 1929 case CPU_UP_PREPARE: 1930 case CPU_UP_PREPARE_FROZEN: 1931 /* Easy Case - initialize the area and locks, and 1932 * then rebalance when online does everything else for us. */ 1933 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1934 break; 1935 case CPU_ONLINE: 1936 case CPU_ONLINE_FROZEN: 1937 xfs_icsb_lock(mp); 1938 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 1939 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 1940 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 1941 xfs_icsb_unlock(mp); 1942 break; 1943 case CPU_DEAD: 1944 case CPU_DEAD_FROZEN: 1945 /* Disable all the counters, then fold the dead cpu's 1946 * count into the total on the global superblock and 1947 * re-enable the counters. */ 1948 xfs_icsb_lock(mp); 1949 s = XFS_SB_LOCK(mp); 1950 xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT); 1951 xfs_icsb_disable_counter(mp, XFS_SBS_IFREE); 1952 xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS); 1953 1954 mp->m_sb.sb_icount += cntp->icsb_icount; 1955 mp->m_sb.sb_ifree += cntp->icsb_ifree; 1956 mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks; 1957 1958 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1959 1960 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 1961 XFS_ICSB_SB_LOCKED, 0); 1962 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 1963 XFS_ICSB_SB_LOCKED, 0); 1964 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 1965 XFS_ICSB_SB_LOCKED, 0); 1966 XFS_SB_UNLOCK(mp, s); 1967 xfs_icsb_unlock(mp); 1968 break; 1969 } 1970 1971 return NOTIFY_OK; 1972 } 1973 #endif /* CONFIG_HOTPLUG_CPU */ 1974 1975 int 1976 xfs_icsb_init_counters( 1977 xfs_mount_t *mp) 1978 { 1979 xfs_icsb_cnts_t *cntp; 1980 int i; 1981 1982 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t); 1983 if (mp->m_sb_cnts == NULL) 1984 return -ENOMEM; 1985 1986 #ifdef CONFIG_HOTPLUG_CPU 1987 mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify; 1988 mp->m_icsb_notifier.priority = 0; 1989 register_hotcpu_notifier(&mp->m_icsb_notifier); 1990 #endif /* CONFIG_HOTPLUG_CPU */ 1991 1992 for_each_online_cpu(i) { 1993 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 1994 memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); 1995 } 1996 1997 mutex_init(&mp->m_icsb_mutex); 1998 1999 /* 2000 * start with all counters disabled so that the 2001 * initial balance kicks us off correctly 2002 */ 2003 mp->m_icsb_counters = -1; 2004 return 0; 2005 } 2006 2007 void 2008 xfs_icsb_reinit_counters( 2009 xfs_mount_t *mp) 2010 { 2011 xfs_icsb_lock(mp); 2012 /* 2013 * start with all counters disabled so that the 2014 * initial balance kicks us off correctly 2015 */ 2016 mp->m_icsb_counters = -1; 2017 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); 2018 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); 2019 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0); 2020 xfs_icsb_unlock(mp); 2021 } 2022 2023 STATIC void 2024 xfs_icsb_destroy_counters( 2025 xfs_mount_t *mp) 2026 { 2027 if (mp->m_sb_cnts) { 2028 unregister_hotcpu_notifier(&mp->m_icsb_notifier); 2029 free_percpu(mp->m_sb_cnts); 2030 } 2031 mutex_destroy(&mp->m_icsb_mutex); 2032 } 2033 2034 STATIC_INLINE void 2035 xfs_icsb_lock_cntr( 2036 xfs_icsb_cnts_t *icsbp) 2037 { 2038 while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) { 2039 ndelay(1000); 2040 } 2041 } 2042 2043 STATIC_INLINE void 2044 xfs_icsb_unlock_cntr( 2045 xfs_icsb_cnts_t *icsbp) 2046 { 2047 clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags); 2048 } 2049 2050 2051 STATIC_INLINE void 2052 xfs_icsb_lock_all_counters( 2053 xfs_mount_t *mp) 2054 { 2055 xfs_icsb_cnts_t *cntp; 2056 int i; 2057 2058 for_each_online_cpu(i) { 2059 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2060 xfs_icsb_lock_cntr(cntp); 2061 } 2062 } 2063 2064 STATIC_INLINE void 2065 xfs_icsb_unlock_all_counters( 2066 xfs_mount_t *mp) 2067 { 2068 xfs_icsb_cnts_t *cntp; 2069 int i; 2070 2071 for_each_online_cpu(i) { 2072 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2073 xfs_icsb_unlock_cntr(cntp); 2074 } 2075 } 2076 2077 STATIC void 2078 xfs_icsb_count( 2079 xfs_mount_t *mp, 2080 xfs_icsb_cnts_t *cnt, 2081 int flags) 2082 { 2083 xfs_icsb_cnts_t *cntp; 2084 int i; 2085 2086 memset(cnt, 0, sizeof(xfs_icsb_cnts_t)); 2087 2088 if (!(flags & XFS_ICSB_LAZY_COUNT)) 2089 xfs_icsb_lock_all_counters(mp); 2090 2091 for_each_online_cpu(i) { 2092 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i); 2093 cnt->icsb_icount += cntp->icsb_icount; 2094 cnt->icsb_ifree += cntp->icsb_ifree; 2095 cnt->icsb_fdblocks += cntp->icsb_fdblocks; 2096 } 2097 2098 if (!(flags & XFS_ICSB_LAZY_COUNT)) 2099 xfs_icsb_unlock_all_counters(mp); 2100 } 2101 2102 STATIC int 2103 xfs_icsb_counter_disabled( 2104 xfs_mount_t *mp, 2105 xfs_sb_field_t field) 2106 { 2107 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2108 return test_bit(field, &mp->m_icsb_counters); 2109 } 2110 2111 STATIC int 2112 xfs_icsb_disable_counter( 2113 xfs_mount_t *mp, 2114 xfs_sb_field_t field) 2115 { 2116 xfs_icsb_cnts_t cnt; 2117 2118 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2119 2120 /* 2121 * If we are already disabled, then there is nothing to do 2122 * here. We check before locking all the counters to avoid 2123 * the expensive lock operation when being called in the 2124 * slow path and the counter is already disabled. This is 2125 * safe because the only time we set or clear this state is under 2126 * the m_icsb_mutex. 2127 */ 2128 if (xfs_icsb_counter_disabled(mp, field)) 2129 return 0; 2130 2131 xfs_icsb_lock_all_counters(mp); 2132 if (!test_and_set_bit(field, &mp->m_icsb_counters)) { 2133 /* drain back to superblock */ 2134 2135 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT); 2136 switch(field) { 2137 case XFS_SBS_ICOUNT: 2138 mp->m_sb.sb_icount = cnt.icsb_icount; 2139 break; 2140 case XFS_SBS_IFREE: 2141 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2142 break; 2143 case XFS_SBS_FDBLOCKS: 2144 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2145 break; 2146 default: 2147 BUG(); 2148 } 2149 } 2150 2151 xfs_icsb_unlock_all_counters(mp); 2152 2153 return 0; 2154 } 2155 2156 STATIC void 2157 xfs_icsb_enable_counter( 2158 xfs_mount_t *mp, 2159 xfs_sb_field_t field, 2160 uint64_t count, 2161 uint64_t resid) 2162 { 2163 xfs_icsb_cnts_t *cntp; 2164 int i; 2165 2166 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS)); 2167 2168 xfs_icsb_lock_all_counters(mp); 2169 for_each_online_cpu(i) { 2170 cntp = per_cpu_ptr(mp->m_sb_cnts, i); 2171 switch (field) { 2172 case XFS_SBS_ICOUNT: 2173 cntp->icsb_icount = count + resid; 2174 break; 2175 case XFS_SBS_IFREE: 2176 cntp->icsb_ifree = count + resid; 2177 break; 2178 case XFS_SBS_FDBLOCKS: 2179 cntp->icsb_fdblocks = count + resid; 2180 break; 2181 default: 2182 BUG(); 2183 break; 2184 } 2185 resid = 0; 2186 } 2187 clear_bit(field, &mp->m_icsb_counters); 2188 xfs_icsb_unlock_all_counters(mp); 2189 } 2190 2191 void 2192 xfs_icsb_sync_counters_flags( 2193 xfs_mount_t *mp, 2194 int flags) 2195 { 2196 xfs_icsb_cnts_t cnt; 2197 int s; 2198 2199 /* Pass 1: lock all counters */ 2200 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2201 s = XFS_SB_LOCK(mp); 2202 2203 xfs_icsb_count(mp, &cnt, flags); 2204 2205 /* Step 3: update mp->m_sb fields */ 2206 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT)) 2207 mp->m_sb.sb_icount = cnt.icsb_icount; 2208 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE)) 2209 mp->m_sb.sb_ifree = cnt.icsb_ifree; 2210 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS)) 2211 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks; 2212 2213 if ((flags & XFS_ICSB_SB_LOCKED) == 0) 2214 XFS_SB_UNLOCK(mp, s); 2215 } 2216 2217 /* 2218 * Accurate update of per-cpu counters to incore superblock 2219 */ 2220 STATIC void 2221 xfs_icsb_sync_counters( 2222 xfs_mount_t *mp) 2223 { 2224 xfs_icsb_sync_counters_flags(mp, 0); 2225 } 2226 2227 /* 2228 * Balance and enable/disable counters as necessary. 2229 * 2230 * Thresholds for re-enabling counters are somewhat magic. inode counts are 2231 * chosen to be the same number as single on disk allocation chunk per CPU, and 2232 * free blocks is something far enough zero that we aren't going thrash when we 2233 * get near ENOSPC. We also need to supply a minimum we require per cpu to 2234 * prevent looping endlessly when xfs_alloc_space asks for more than will 2235 * be distributed to a single CPU but each CPU has enough blocks to be 2236 * reenabled. 2237 * 2238 * Note that we can be called when counters are already disabled. 2239 * xfs_icsb_disable_counter() optimises the counter locking in this case to 2240 * prevent locking every per-cpu counter needlessly. 2241 */ 2242 2243 #define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64 2244 #define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ 2245 (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp)) 2246 STATIC void 2247 xfs_icsb_balance_counter( 2248 xfs_mount_t *mp, 2249 xfs_sb_field_t field, 2250 int flags, 2251 int min_per_cpu) 2252 { 2253 uint64_t count, resid; 2254 int weight = num_online_cpus(); 2255 int s; 2256 uint64_t min = (uint64_t)min_per_cpu; 2257 2258 if (!(flags & XFS_ICSB_SB_LOCKED)) 2259 s = XFS_SB_LOCK(mp); 2260 2261 /* disable counter and sync counter */ 2262 xfs_icsb_disable_counter(mp, field); 2263 2264 /* update counters - first CPU gets residual*/ 2265 switch (field) { 2266 case XFS_SBS_ICOUNT: 2267 count = mp->m_sb.sb_icount; 2268 resid = do_div(count, weight); 2269 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2270 goto out; 2271 break; 2272 case XFS_SBS_IFREE: 2273 count = mp->m_sb.sb_ifree; 2274 resid = do_div(count, weight); 2275 if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE)) 2276 goto out; 2277 break; 2278 case XFS_SBS_FDBLOCKS: 2279 count = mp->m_sb.sb_fdblocks; 2280 resid = do_div(count, weight); 2281 if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp))) 2282 goto out; 2283 break; 2284 default: 2285 BUG(); 2286 count = resid = 0; /* quiet, gcc */ 2287 break; 2288 } 2289 2290 xfs_icsb_enable_counter(mp, field, count, resid); 2291 out: 2292 if (!(flags & XFS_ICSB_SB_LOCKED)) 2293 XFS_SB_UNLOCK(mp, s); 2294 } 2295 2296 int 2297 xfs_icsb_modify_counters( 2298 xfs_mount_t *mp, 2299 xfs_sb_field_t field, 2300 int64_t delta, 2301 int rsvd) 2302 { 2303 xfs_icsb_cnts_t *icsbp; 2304 long long lcounter; /* long counter for 64 bit fields */ 2305 int cpu, ret = 0, s; 2306 2307 might_sleep(); 2308 again: 2309 cpu = get_cpu(); 2310 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu); 2311 2312 /* 2313 * if the counter is disabled, go to slow path 2314 */ 2315 if (unlikely(xfs_icsb_counter_disabled(mp, field))) 2316 goto slow_path; 2317 xfs_icsb_lock_cntr(icsbp); 2318 if (unlikely(xfs_icsb_counter_disabled(mp, field))) { 2319 xfs_icsb_unlock_cntr(icsbp); 2320 goto slow_path; 2321 } 2322 2323 switch (field) { 2324 case XFS_SBS_ICOUNT: 2325 lcounter = icsbp->icsb_icount; 2326 lcounter += delta; 2327 if (unlikely(lcounter < 0)) 2328 goto balance_counter; 2329 icsbp->icsb_icount = lcounter; 2330 break; 2331 2332 case XFS_SBS_IFREE: 2333 lcounter = icsbp->icsb_ifree; 2334 lcounter += delta; 2335 if (unlikely(lcounter < 0)) 2336 goto balance_counter; 2337 icsbp->icsb_ifree = lcounter; 2338 break; 2339 2340 case XFS_SBS_FDBLOCKS: 2341 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2342 2343 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); 2344 lcounter += delta; 2345 if (unlikely(lcounter < 0)) 2346 goto balance_counter; 2347 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 2348 break; 2349 default: 2350 BUG(); 2351 break; 2352 } 2353 xfs_icsb_unlock_cntr(icsbp); 2354 put_cpu(); 2355 return 0; 2356 2357 slow_path: 2358 put_cpu(); 2359 2360 /* 2361 * serialise with a mutex so we don't burn lots of cpu on 2362 * the superblock lock. We still need to hold the superblock 2363 * lock, however, when we modify the global structures. 2364 */ 2365 xfs_icsb_lock(mp); 2366 2367 /* 2368 * Now running atomically. 2369 * 2370 * If the counter is enabled, someone has beaten us to rebalancing. 2371 * Drop the lock and try again in the fast path.... 2372 */ 2373 if (!(xfs_icsb_counter_disabled(mp, field))) { 2374 xfs_icsb_unlock(mp); 2375 goto again; 2376 } 2377 2378 /* 2379 * The counter is currently disabled. Because we are 2380 * running atomically here, we know a rebalance cannot 2381 * be in progress. Hence we can go straight to operating 2382 * on the global superblock. We do not call xfs_mod_incore_sb() 2383 * here even though we need to get the SB_LOCK. Doing so 2384 * will cause us to re-enter this function and deadlock. 2385 * Hence we get the SB_LOCK ourselves and then call 2386 * xfs_mod_incore_sb_unlocked() as the unlocked path operates 2387 * directly on the global counters. 2388 */ 2389 s = XFS_SB_LOCK(mp); 2390 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 2391 XFS_SB_UNLOCK(mp, s); 2392 2393 /* 2394 * Now that we've modified the global superblock, we 2395 * may be able to re-enable the distributed counters 2396 * (e.g. lots of space just got freed). After that 2397 * we are done. 2398 */ 2399 if (ret != ENOSPC) 2400 xfs_icsb_balance_counter(mp, field, 0, 0); 2401 xfs_icsb_unlock(mp); 2402 return ret; 2403 2404 balance_counter: 2405 xfs_icsb_unlock_cntr(icsbp); 2406 put_cpu(); 2407 2408 /* 2409 * We may have multiple threads here if multiple per-cpu 2410 * counters run dry at the same time. This will mean we can 2411 * do more balances than strictly necessary but it is not 2412 * the common slowpath case. 2413 */ 2414 xfs_icsb_lock(mp); 2415 2416 /* 2417 * running atomically. 2418 * 2419 * This will leave the counter in the correct state for future 2420 * accesses. After the rebalance, we simply try again and our retry 2421 * will either succeed through the fast path or slow path without 2422 * another balance operation being required. 2423 */ 2424 xfs_icsb_balance_counter(mp, field, 0, delta); 2425 xfs_icsb_unlock(mp); 2426 goto again; 2427 } 2428 2429 #endif 2430