1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 44 #include <linux/magic.h> 45 #include <linux/fs_context.h> 46 #include <linux/fs_parser.h> 47 48 static const struct super_operations xfs_super_operations; 49 50 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 51 #ifdef DEBUG 52 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 53 #endif 54 55 #ifdef CONFIG_HOTPLUG_CPU 56 static LIST_HEAD(xfs_mount_list); 57 static DEFINE_SPINLOCK(xfs_mount_list_lock); 58 59 static inline void xfs_mount_list_add(struct xfs_mount *mp) 60 { 61 spin_lock(&xfs_mount_list_lock); 62 list_add(&mp->m_mount_list, &xfs_mount_list); 63 spin_unlock(&xfs_mount_list_lock); 64 } 65 66 static inline void xfs_mount_list_del(struct xfs_mount *mp) 67 { 68 spin_lock(&xfs_mount_list_lock); 69 list_del(&mp->m_mount_list); 70 spin_unlock(&xfs_mount_list_lock); 71 } 72 #else /* !CONFIG_HOTPLUG_CPU */ 73 static inline void xfs_mount_list_add(struct xfs_mount *mp) {} 74 static inline void xfs_mount_list_del(struct xfs_mount *mp) {} 75 #endif 76 77 enum xfs_dax_mode { 78 XFS_DAX_INODE = 0, 79 XFS_DAX_ALWAYS = 1, 80 XFS_DAX_NEVER = 2, 81 }; 82 83 static void 84 xfs_mount_set_dax_mode( 85 struct xfs_mount *mp, 86 enum xfs_dax_mode mode) 87 { 88 switch (mode) { 89 case XFS_DAX_INODE: 90 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 91 break; 92 case XFS_DAX_ALWAYS: 93 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 94 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 95 break; 96 case XFS_DAX_NEVER: 97 mp->m_features |= XFS_FEAT_DAX_NEVER; 98 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 99 break; 100 } 101 } 102 103 static const struct constant_table dax_param_enums[] = { 104 {"inode", XFS_DAX_INODE }, 105 {"always", XFS_DAX_ALWAYS }, 106 {"never", XFS_DAX_NEVER }, 107 {} 108 }; 109 110 /* 111 * Table driven mount option parser. 112 */ 113 enum { 114 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 115 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 116 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 117 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 118 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 119 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 120 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 121 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 122 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 123 }; 124 125 static const struct fs_parameter_spec xfs_fs_parameters[] = { 126 fsparam_u32("logbufs", Opt_logbufs), 127 fsparam_string("logbsize", Opt_logbsize), 128 fsparam_string("logdev", Opt_logdev), 129 fsparam_string("rtdev", Opt_rtdev), 130 fsparam_flag("wsync", Opt_wsync), 131 fsparam_flag("noalign", Opt_noalign), 132 fsparam_flag("swalloc", Opt_swalloc), 133 fsparam_u32("sunit", Opt_sunit), 134 fsparam_u32("swidth", Opt_swidth), 135 fsparam_flag("nouuid", Opt_nouuid), 136 fsparam_flag("grpid", Opt_grpid), 137 fsparam_flag("nogrpid", Opt_nogrpid), 138 fsparam_flag("bsdgroups", Opt_bsdgroups), 139 fsparam_flag("sysvgroups", Opt_sysvgroups), 140 fsparam_string("allocsize", Opt_allocsize), 141 fsparam_flag("norecovery", Opt_norecovery), 142 fsparam_flag("inode64", Opt_inode64), 143 fsparam_flag("inode32", Opt_inode32), 144 fsparam_flag("ikeep", Opt_ikeep), 145 fsparam_flag("noikeep", Opt_noikeep), 146 fsparam_flag("largeio", Opt_largeio), 147 fsparam_flag("nolargeio", Opt_nolargeio), 148 fsparam_flag("attr2", Opt_attr2), 149 fsparam_flag("noattr2", Opt_noattr2), 150 fsparam_flag("filestreams", Opt_filestreams), 151 fsparam_flag("quota", Opt_quota), 152 fsparam_flag("noquota", Opt_noquota), 153 fsparam_flag("usrquota", Opt_usrquota), 154 fsparam_flag("grpquota", Opt_grpquota), 155 fsparam_flag("prjquota", Opt_prjquota), 156 fsparam_flag("uquota", Opt_uquota), 157 fsparam_flag("gquota", Opt_gquota), 158 fsparam_flag("pquota", Opt_pquota), 159 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 160 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 161 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 162 fsparam_flag("qnoenforce", Opt_qnoenforce), 163 fsparam_flag("discard", Opt_discard), 164 fsparam_flag("nodiscard", Opt_nodiscard), 165 fsparam_flag("dax", Opt_dax), 166 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 167 {} 168 }; 169 170 struct proc_xfs_info { 171 uint64_t flag; 172 char *str; 173 }; 174 175 static int 176 xfs_fs_show_options( 177 struct seq_file *m, 178 struct dentry *root) 179 { 180 static struct proc_xfs_info xfs_info_set[] = { 181 /* the few simple ones we can get from the mount struct */ 182 { XFS_FEAT_IKEEP, ",ikeep" }, 183 { XFS_FEAT_WSYNC, ",wsync" }, 184 { XFS_FEAT_NOALIGN, ",noalign" }, 185 { XFS_FEAT_SWALLOC, ",swalloc" }, 186 { XFS_FEAT_NOUUID, ",nouuid" }, 187 { XFS_FEAT_NORECOVERY, ",norecovery" }, 188 { XFS_FEAT_ATTR2, ",attr2" }, 189 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 190 { XFS_FEAT_GRPID, ",grpid" }, 191 { XFS_FEAT_DISCARD, ",discard" }, 192 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 193 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 194 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 195 { 0, NULL } 196 }; 197 struct xfs_mount *mp = XFS_M(root->d_sb); 198 struct proc_xfs_info *xfs_infop; 199 200 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 201 if (mp->m_features & xfs_infop->flag) 202 seq_puts(m, xfs_infop->str); 203 } 204 205 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 206 207 if (xfs_has_allocsize(mp)) 208 seq_printf(m, ",allocsize=%dk", 209 (1 << mp->m_allocsize_log) >> 10); 210 211 if (mp->m_logbufs > 0) 212 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 213 if (mp->m_logbsize > 0) 214 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 215 216 if (mp->m_logname) 217 seq_show_option(m, "logdev", mp->m_logname); 218 if (mp->m_rtname) 219 seq_show_option(m, "rtdev", mp->m_rtname); 220 221 if (mp->m_dalign > 0) 222 seq_printf(m, ",sunit=%d", 223 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 224 if (mp->m_swidth > 0) 225 seq_printf(m, ",swidth=%d", 226 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 227 228 if (mp->m_qflags & XFS_UQUOTA_ENFD) 229 seq_puts(m, ",usrquota"); 230 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 231 seq_puts(m, ",uqnoenforce"); 232 233 if (mp->m_qflags & XFS_PQUOTA_ENFD) 234 seq_puts(m, ",prjquota"); 235 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 236 seq_puts(m, ",pqnoenforce"); 237 238 if (mp->m_qflags & XFS_GQUOTA_ENFD) 239 seq_puts(m, ",grpquota"); 240 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 241 seq_puts(m, ",gqnoenforce"); 242 243 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 244 seq_puts(m, ",noquota"); 245 246 return 0; 247 } 248 249 /* 250 * Set parameters for inode allocation heuristics, taking into account 251 * filesystem size and inode32/inode64 mount options; i.e. specifically 252 * whether or not XFS_FEAT_SMALL_INUMS is set. 253 * 254 * Inode allocation patterns are altered only if inode32 is requested 255 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 256 * If altered, XFS_OPSTATE_INODE32 is set as well. 257 * 258 * An agcount independent of that in the mount structure is provided 259 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 260 * to the potentially higher ag count. 261 * 262 * Returns the maximum AG index which may contain inodes. 263 */ 264 xfs_agnumber_t 265 xfs_set_inode_alloc( 266 struct xfs_mount *mp, 267 xfs_agnumber_t agcount) 268 { 269 xfs_agnumber_t index; 270 xfs_agnumber_t maxagi = 0; 271 xfs_sb_t *sbp = &mp->m_sb; 272 xfs_agnumber_t max_metadata; 273 xfs_agino_t agino; 274 xfs_ino_t ino; 275 276 /* 277 * Calculate how much should be reserved for inodes to meet 278 * the max inode percentage. Used only for inode32. 279 */ 280 if (M_IGEO(mp)->maxicount) { 281 uint64_t icount; 282 283 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 284 do_div(icount, 100); 285 icount += sbp->sb_agblocks - 1; 286 do_div(icount, sbp->sb_agblocks); 287 max_metadata = icount; 288 } else { 289 max_metadata = agcount; 290 } 291 292 /* Get the last possible inode in the filesystem */ 293 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 294 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 295 296 /* 297 * If user asked for no more than 32-bit inodes, and the fs is 298 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 299 * the allocator to accommodate the request. 300 */ 301 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 302 set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 303 else 304 clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 305 306 for (index = 0; index < agcount; index++) { 307 struct xfs_perag *pag; 308 309 ino = XFS_AGINO_TO_INO(mp, index, agino); 310 311 pag = xfs_perag_get(mp, index); 312 313 if (xfs_is_inode32(mp)) { 314 if (ino > XFS_MAXINUMBER_32) { 315 pag->pagi_inodeok = 0; 316 pag->pagf_metadata = 0; 317 } else { 318 pag->pagi_inodeok = 1; 319 maxagi++; 320 if (index < max_metadata) 321 pag->pagf_metadata = 1; 322 else 323 pag->pagf_metadata = 0; 324 } 325 } else { 326 pag->pagi_inodeok = 1; 327 pag->pagf_metadata = 0; 328 } 329 330 xfs_perag_put(pag); 331 } 332 333 return xfs_is_inode32(mp) ? maxagi : agcount; 334 } 335 336 static int 337 xfs_setup_dax_always( 338 struct xfs_mount *mp) 339 { 340 if (!mp->m_ddev_targp->bt_daxdev && 341 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 342 xfs_alert(mp, 343 "DAX unsupported by block device. Turning off DAX."); 344 goto disable_dax; 345 } 346 347 if (mp->m_super->s_blocksize != PAGE_SIZE) { 348 xfs_alert(mp, 349 "DAX not supported for blocksize. Turning off DAX."); 350 goto disable_dax; 351 } 352 353 if (xfs_has_reflink(mp)) { 354 xfs_alert(mp, "DAX and reflink cannot be used together!"); 355 return -EINVAL; 356 } 357 358 xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 359 return 0; 360 361 disable_dax: 362 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 363 return 0; 364 } 365 366 STATIC int 367 xfs_blkdev_get( 368 xfs_mount_t *mp, 369 const char *name, 370 struct block_device **bdevp) 371 { 372 int error = 0; 373 374 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 375 mp); 376 if (IS_ERR(*bdevp)) { 377 error = PTR_ERR(*bdevp); 378 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 379 } 380 381 return error; 382 } 383 384 STATIC void 385 xfs_blkdev_put( 386 struct block_device *bdev) 387 { 388 if (bdev) 389 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 390 } 391 392 STATIC void 393 xfs_close_devices( 394 struct xfs_mount *mp) 395 { 396 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 397 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 398 399 xfs_free_buftarg(mp->m_logdev_targp); 400 xfs_blkdev_put(logdev); 401 } 402 if (mp->m_rtdev_targp) { 403 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 404 405 xfs_free_buftarg(mp->m_rtdev_targp); 406 xfs_blkdev_put(rtdev); 407 } 408 xfs_free_buftarg(mp->m_ddev_targp); 409 } 410 411 /* 412 * The file system configurations are: 413 * (1) device (partition) with data and internal log 414 * (2) logical volume with data and log subvolumes. 415 * (3) logical volume with data, log, and realtime subvolumes. 416 * 417 * We only have to handle opening the log and realtime volumes here if 418 * they are present. The data subvolume has already been opened by 419 * get_sb_bdev() and is stored in sb->s_bdev. 420 */ 421 STATIC int 422 xfs_open_devices( 423 struct xfs_mount *mp) 424 { 425 struct block_device *ddev = mp->m_super->s_bdev; 426 struct block_device *logdev = NULL, *rtdev = NULL; 427 int error; 428 429 /* 430 * Open real time and log devices - order is important. 431 */ 432 if (mp->m_logname) { 433 error = xfs_blkdev_get(mp, mp->m_logname, &logdev); 434 if (error) 435 return error; 436 } 437 438 if (mp->m_rtname) { 439 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); 440 if (error) 441 goto out_close_logdev; 442 443 if (rtdev == ddev || rtdev == logdev) { 444 xfs_warn(mp, 445 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 446 error = -EINVAL; 447 goto out_close_rtdev; 448 } 449 } 450 451 /* 452 * Setup xfs_mount buffer target pointers 453 */ 454 error = -ENOMEM; 455 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); 456 if (!mp->m_ddev_targp) 457 goto out_close_rtdev; 458 459 if (rtdev) { 460 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); 461 if (!mp->m_rtdev_targp) 462 goto out_free_ddev_targ; 463 } 464 465 if (logdev && logdev != ddev) { 466 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); 467 if (!mp->m_logdev_targp) 468 goto out_free_rtdev_targ; 469 } else { 470 mp->m_logdev_targp = mp->m_ddev_targp; 471 } 472 473 return 0; 474 475 out_free_rtdev_targ: 476 if (mp->m_rtdev_targp) 477 xfs_free_buftarg(mp->m_rtdev_targp); 478 out_free_ddev_targ: 479 xfs_free_buftarg(mp->m_ddev_targp); 480 out_close_rtdev: 481 xfs_blkdev_put(rtdev); 482 out_close_logdev: 483 if (logdev && logdev != ddev) 484 xfs_blkdev_put(logdev); 485 return error; 486 } 487 488 /* 489 * Setup xfs_mount buffer target pointers based on superblock 490 */ 491 STATIC int 492 xfs_setup_devices( 493 struct xfs_mount *mp) 494 { 495 int error; 496 497 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 498 if (error) 499 return error; 500 501 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 502 unsigned int log_sector_size = BBSIZE; 503 504 if (xfs_has_sector(mp)) 505 log_sector_size = mp->m_sb.sb_logsectsize; 506 error = xfs_setsize_buftarg(mp->m_logdev_targp, 507 log_sector_size); 508 if (error) 509 return error; 510 } 511 if (mp->m_rtdev_targp) { 512 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 513 mp->m_sb.sb_sectsize); 514 if (error) 515 return error; 516 } 517 518 return 0; 519 } 520 521 STATIC int 522 xfs_init_mount_workqueues( 523 struct xfs_mount *mp) 524 { 525 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 526 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 527 1, mp->m_super->s_id); 528 if (!mp->m_buf_workqueue) 529 goto out; 530 531 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 532 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 533 0, mp->m_super->s_id); 534 if (!mp->m_unwritten_workqueue) 535 goto out_destroy_buf; 536 537 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 538 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 539 0, mp->m_super->s_id); 540 if (!mp->m_reclaim_workqueue) 541 goto out_destroy_unwritten; 542 543 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 544 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 545 0, mp->m_super->s_id); 546 if (!mp->m_blockgc_wq) 547 goto out_destroy_reclaim; 548 549 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 550 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 551 1, mp->m_super->s_id); 552 if (!mp->m_inodegc_wq) 553 goto out_destroy_blockgc; 554 555 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 556 XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); 557 if (!mp->m_sync_workqueue) 558 goto out_destroy_inodegc; 559 560 return 0; 561 562 out_destroy_inodegc: 563 destroy_workqueue(mp->m_inodegc_wq); 564 out_destroy_blockgc: 565 destroy_workqueue(mp->m_blockgc_wq); 566 out_destroy_reclaim: 567 destroy_workqueue(mp->m_reclaim_workqueue); 568 out_destroy_unwritten: 569 destroy_workqueue(mp->m_unwritten_workqueue); 570 out_destroy_buf: 571 destroy_workqueue(mp->m_buf_workqueue); 572 out: 573 return -ENOMEM; 574 } 575 576 STATIC void 577 xfs_destroy_mount_workqueues( 578 struct xfs_mount *mp) 579 { 580 destroy_workqueue(mp->m_sync_workqueue); 581 destroy_workqueue(mp->m_blockgc_wq); 582 destroy_workqueue(mp->m_inodegc_wq); 583 destroy_workqueue(mp->m_reclaim_workqueue); 584 destroy_workqueue(mp->m_unwritten_workqueue); 585 destroy_workqueue(mp->m_buf_workqueue); 586 } 587 588 static void 589 xfs_flush_inodes_worker( 590 struct work_struct *work) 591 { 592 struct xfs_mount *mp = container_of(work, struct xfs_mount, 593 m_flush_inodes_work); 594 struct super_block *sb = mp->m_super; 595 596 if (down_read_trylock(&sb->s_umount)) { 597 sync_inodes_sb(sb); 598 up_read(&sb->s_umount); 599 } 600 } 601 602 /* 603 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 604 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 605 * for IO to complete so that we effectively throttle multiple callers to the 606 * rate at which IO is completing. 607 */ 608 void 609 xfs_flush_inodes( 610 struct xfs_mount *mp) 611 { 612 /* 613 * If flush_work() returns true then that means we waited for a flush 614 * which was already in progress. Don't bother running another scan. 615 */ 616 if (flush_work(&mp->m_flush_inodes_work)) 617 return; 618 619 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 620 flush_work(&mp->m_flush_inodes_work); 621 } 622 623 /* Catch misguided souls that try to use this interface on XFS */ 624 STATIC struct inode * 625 xfs_fs_alloc_inode( 626 struct super_block *sb) 627 { 628 BUG(); 629 return NULL; 630 } 631 632 /* 633 * Now that the generic code is guaranteed not to be accessing 634 * the linux inode, we can inactivate and reclaim the inode. 635 */ 636 STATIC void 637 xfs_fs_destroy_inode( 638 struct inode *inode) 639 { 640 struct xfs_inode *ip = XFS_I(inode); 641 642 trace_xfs_destroy_inode(ip); 643 644 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 645 XFS_STATS_INC(ip->i_mount, vn_rele); 646 XFS_STATS_INC(ip->i_mount, vn_remove); 647 xfs_inode_mark_reclaimable(ip); 648 } 649 650 static void 651 xfs_fs_dirty_inode( 652 struct inode *inode, 653 int flag) 654 { 655 struct xfs_inode *ip = XFS_I(inode); 656 struct xfs_mount *mp = ip->i_mount; 657 struct xfs_trans *tp; 658 659 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 660 return; 661 if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) 662 return; 663 664 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 665 return; 666 xfs_ilock(ip, XFS_ILOCK_EXCL); 667 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 668 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 669 xfs_trans_commit(tp); 670 } 671 672 /* 673 * Slab object creation initialisation for the XFS inode. 674 * This covers only the idempotent fields in the XFS inode; 675 * all other fields need to be initialised on allocation 676 * from the slab. This avoids the need to repeatedly initialise 677 * fields in the xfs inode that left in the initialise state 678 * when freeing the inode. 679 */ 680 STATIC void 681 xfs_fs_inode_init_once( 682 void *inode) 683 { 684 struct xfs_inode *ip = inode; 685 686 memset(ip, 0, sizeof(struct xfs_inode)); 687 688 /* vfs inode */ 689 inode_init_once(VFS_I(ip)); 690 691 /* xfs inode */ 692 atomic_set(&ip->i_pincount, 0); 693 spin_lock_init(&ip->i_flags_lock); 694 695 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 696 "xfsino", ip->i_ino); 697 } 698 699 /* 700 * We do an unlocked check for XFS_IDONTCACHE here because we are already 701 * serialised against cache hits here via the inode->i_lock and igrab() in 702 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 703 * racing with us, and it avoids needing to grab a spinlock here for every inode 704 * we drop the final reference on. 705 */ 706 STATIC int 707 xfs_fs_drop_inode( 708 struct inode *inode) 709 { 710 struct xfs_inode *ip = XFS_I(inode); 711 712 /* 713 * If this unlinked inode is in the middle of recovery, don't 714 * drop the inode just yet; log recovery will take care of 715 * that. See the comment for this inode flag. 716 */ 717 if (ip->i_flags & XFS_IRECOVERY) { 718 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 719 return 0; 720 } 721 722 return generic_drop_inode(inode); 723 } 724 725 static void 726 xfs_mount_free( 727 struct xfs_mount *mp) 728 { 729 kfree(mp->m_rtname); 730 kfree(mp->m_logname); 731 kmem_free(mp); 732 } 733 734 STATIC int 735 xfs_fs_sync_fs( 736 struct super_block *sb, 737 int wait) 738 { 739 struct xfs_mount *mp = XFS_M(sb); 740 int error; 741 742 trace_xfs_fs_sync_fs(mp, __return_address); 743 744 /* 745 * Doing anything during the async pass would be counterproductive. 746 */ 747 if (!wait) 748 return 0; 749 750 error = xfs_log_force(mp, XFS_LOG_SYNC); 751 if (error) 752 return error; 753 754 if (laptop_mode) { 755 /* 756 * The disk must be active because we're syncing. 757 * We schedule log work now (now that the disk is 758 * active) instead of later (when it might not be). 759 */ 760 flush_delayed_work(&mp->m_log->l_work); 761 } 762 763 /* 764 * If we are called with page faults frozen out, it means we are about 765 * to freeze the transaction subsystem. Take the opportunity to shut 766 * down inodegc because once SB_FREEZE_FS is set it's too late to 767 * prevent inactivation races with freeze. The fs doesn't get called 768 * again by the freezing process until after SB_FREEZE_FS has been set, 769 * so it's now or never. Same logic applies to speculative allocation 770 * garbage collection. 771 * 772 * We don't care if this is a normal syncfs call that does this or 773 * freeze that does this - we can run this multiple times without issue 774 * and we won't race with a restart because a restart can only occur 775 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 776 */ 777 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 778 xfs_inodegc_stop(mp); 779 xfs_blockgc_stop(mp); 780 } 781 782 return 0; 783 } 784 785 STATIC int 786 xfs_fs_statfs( 787 struct dentry *dentry, 788 struct kstatfs *statp) 789 { 790 struct xfs_mount *mp = XFS_M(dentry->d_sb); 791 xfs_sb_t *sbp = &mp->m_sb; 792 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 793 uint64_t fakeinos, id; 794 uint64_t icount; 795 uint64_t ifree; 796 uint64_t fdblocks; 797 xfs_extlen_t lsize; 798 int64_t ffree; 799 800 /* 801 * Expedite background inodegc but don't wait. We do not want to block 802 * here waiting hours for a billion extent file to be truncated. 803 */ 804 xfs_inodegc_push(mp); 805 806 statp->f_type = XFS_SUPER_MAGIC; 807 statp->f_namelen = MAXNAMELEN - 1; 808 809 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 810 statp->f_fsid = u64_to_fsid(id); 811 812 icount = percpu_counter_sum(&mp->m_icount); 813 ifree = percpu_counter_sum(&mp->m_ifree); 814 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 815 816 spin_lock(&mp->m_sb_lock); 817 statp->f_bsize = sbp->sb_blocksize; 818 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 819 statp->f_blocks = sbp->sb_dblocks - lsize; 820 spin_unlock(&mp->m_sb_lock); 821 822 /* make sure statp->f_bfree does not underflow */ 823 statp->f_bfree = max_t(int64_t, 0, 824 fdblocks - xfs_fdblocks_unavailable(mp)); 825 statp->f_bavail = statp->f_bfree; 826 827 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 828 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 829 if (M_IGEO(mp)->maxicount) 830 statp->f_files = min_t(typeof(statp->f_files), 831 statp->f_files, 832 M_IGEO(mp)->maxicount); 833 834 /* If sb_icount overshot maxicount, report actual allocation */ 835 statp->f_files = max_t(typeof(statp->f_files), 836 statp->f_files, 837 sbp->sb_icount); 838 839 /* make sure statp->f_ffree does not underflow */ 840 ffree = statp->f_files - (icount - ifree); 841 statp->f_ffree = max_t(int64_t, ffree, 0); 842 843 844 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 845 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 846 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 847 xfs_qm_statvfs(ip, statp); 848 849 if (XFS_IS_REALTIME_MOUNT(mp) && 850 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 851 s64 freertx; 852 853 statp->f_blocks = sbp->sb_rblocks; 854 freertx = percpu_counter_sum_positive(&mp->m_frextents); 855 statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize; 856 } 857 858 return 0; 859 } 860 861 STATIC void 862 xfs_save_resvblks(struct xfs_mount *mp) 863 { 864 uint64_t resblks = 0; 865 866 mp->m_resblks_save = mp->m_resblks; 867 xfs_reserve_blocks(mp, &resblks, NULL); 868 } 869 870 STATIC void 871 xfs_restore_resvblks(struct xfs_mount *mp) 872 { 873 uint64_t resblks; 874 875 if (mp->m_resblks_save) { 876 resblks = mp->m_resblks_save; 877 mp->m_resblks_save = 0; 878 } else 879 resblks = xfs_default_resblks(mp); 880 881 xfs_reserve_blocks(mp, &resblks, NULL); 882 } 883 884 /* 885 * Second stage of a freeze. The data is already frozen so we only 886 * need to take care of the metadata. Once that's done sync the superblock 887 * to the log to dirty it in case of a crash while frozen. This ensures that we 888 * will recover the unlinked inode lists on the next mount. 889 */ 890 STATIC int 891 xfs_fs_freeze( 892 struct super_block *sb) 893 { 894 struct xfs_mount *mp = XFS_M(sb); 895 unsigned int flags; 896 int ret; 897 898 /* 899 * The filesystem is now frozen far enough that memory reclaim 900 * cannot safely operate on the filesystem. Hence we need to 901 * set a GFP_NOFS context here to avoid recursion deadlocks. 902 */ 903 flags = memalloc_nofs_save(); 904 xfs_save_resvblks(mp); 905 ret = xfs_log_quiesce(mp); 906 memalloc_nofs_restore(flags); 907 908 /* 909 * For read-write filesystems, we need to restart the inodegc on error 910 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 911 * going to be run to restart it now. We are at SB_FREEZE_FS level 912 * here, so we can restart safely without racing with a stop in 913 * xfs_fs_sync_fs(). 914 */ 915 if (ret && !xfs_is_readonly(mp)) { 916 xfs_blockgc_start(mp); 917 xfs_inodegc_start(mp); 918 } 919 920 return ret; 921 } 922 923 STATIC int 924 xfs_fs_unfreeze( 925 struct super_block *sb) 926 { 927 struct xfs_mount *mp = XFS_M(sb); 928 929 xfs_restore_resvblks(mp); 930 xfs_log_work_queue(mp); 931 932 /* 933 * Don't reactivate the inodegc worker on a readonly filesystem because 934 * inodes are sent directly to reclaim. Don't reactivate the blockgc 935 * worker because there are no speculative preallocations on a readonly 936 * filesystem. 937 */ 938 if (!xfs_is_readonly(mp)) { 939 xfs_blockgc_start(mp); 940 xfs_inodegc_start(mp); 941 } 942 943 return 0; 944 } 945 946 /* 947 * This function fills in xfs_mount_t fields based on mount args. 948 * Note: the superblock _has_ now been read in. 949 */ 950 STATIC int 951 xfs_finish_flags( 952 struct xfs_mount *mp) 953 { 954 /* Fail a mount where the logbuf is smaller than the log stripe */ 955 if (xfs_has_logv2(mp)) { 956 if (mp->m_logbsize <= 0 && 957 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 958 mp->m_logbsize = mp->m_sb.sb_logsunit; 959 } else if (mp->m_logbsize > 0 && 960 mp->m_logbsize < mp->m_sb.sb_logsunit) { 961 xfs_warn(mp, 962 "logbuf size must be greater than or equal to log stripe size"); 963 return -EINVAL; 964 } 965 } else { 966 /* Fail a mount if the logbuf is larger than 32K */ 967 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 968 xfs_warn(mp, 969 "logbuf size for version 1 logs must be 16K or 32K"); 970 return -EINVAL; 971 } 972 } 973 974 /* 975 * V5 filesystems always use attr2 format for attributes. 976 */ 977 if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { 978 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 979 "attr2 is always enabled for V5 filesystems."); 980 return -EINVAL; 981 } 982 983 /* 984 * prohibit r/w mounts of read-only filesystems 985 */ 986 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 987 xfs_warn(mp, 988 "cannot mount a read-only filesystem as read-write"); 989 return -EROFS; 990 } 991 992 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 993 (mp->m_qflags & XFS_PQUOTA_ACCT) && 994 !xfs_has_pquotino(mp)) { 995 xfs_warn(mp, 996 "Super block does not support project and group quota together"); 997 return -EINVAL; 998 } 999 1000 return 0; 1001 } 1002 1003 static int 1004 xfs_init_percpu_counters( 1005 struct xfs_mount *mp) 1006 { 1007 int error; 1008 1009 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1010 if (error) 1011 return -ENOMEM; 1012 1013 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1014 if (error) 1015 goto free_icount; 1016 1017 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 1018 if (error) 1019 goto free_ifree; 1020 1021 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1022 if (error) 1023 goto free_fdblocks; 1024 1025 error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); 1026 if (error) 1027 goto free_delalloc; 1028 1029 return 0; 1030 1031 free_delalloc: 1032 percpu_counter_destroy(&mp->m_delalloc_blks); 1033 free_fdblocks: 1034 percpu_counter_destroy(&mp->m_fdblocks); 1035 free_ifree: 1036 percpu_counter_destroy(&mp->m_ifree); 1037 free_icount: 1038 percpu_counter_destroy(&mp->m_icount); 1039 return -ENOMEM; 1040 } 1041 1042 void 1043 xfs_reinit_percpu_counters( 1044 struct xfs_mount *mp) 1045 { 1046 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1047 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1048 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1049 percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); 1050 } 1051 1052 static void 1053 xfs_destroy_percpu_counters( 1054 struct xfs_mount *mp) 1055 { 1056 percpu_counter_destroy(&mp->m_icount); 1057 percpu_counter_destroy(&mp->m_ifree); 1058 percpu_counter_destroy(&mp->m_fdblocks); 1059 ASSERT(xfs_is_shutdown(mp) || 1060 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1061 percpu_counter_destroy(&mp->m_delalloc_blks); 1062 percpu_counter_destroy(&mp->m_frextents); 1063 } 1064 1065 static int 1066 xfs_inodegc_init_percpu( 1067 struct xfs_mount *mp) 1068 { 1069 struct xfs_inodegc *gc; 1070 int cpu; 1071 1072 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1073 if (!mp->m_inodegc) 1074 return -ENOMEM; 1075 1076 for_each_possible_cpu(cpu) { 1077 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1078 init_llist_head(&gc->list); 1079 gc->items = 0; 1080 INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); 1081 } 1082 return 0; 1083 } 1084 1085 static void 1086 xfs_inodegc_free_percpu( 1087 struct xfs_mount *mp) 1088 { 1089 if (!mp->m_inodegc) 1090 return; 1091 free_percpu(mp->m_inodegc); 1092 } 1093 1094 static void 1095 xfs_fs_put_super( 1096 struct super_block *sb) 1097 { 1098 struct xfs_mount *mp = XFS_M(sb); 1099 1100 /* if ->fill_super failed, we have no mount to tear down */ 1101 if (!sb->s_fs_info) 1102 return; 1103 1104 xfs_notice(mp, "Unmounting Filesystem"); 1105 xfs_filestream_unmount(mp); 1106 xfs_unmountfs(mp); 1107 1108 xfs_freesb(mp); 1109 free_percpu(mp->m_stats.xs_stats); 1110 xfs_mount_list_del(mp); 1111 xfs_inodegc_free_percpu(mp); 1112 xfs_destroy_percpu_counters(mp); 1113 xfs_destroy_mount_workqueues(mp); 1114 xfs_close_devices(mp); 1115 1116 sb->s_fs_info = NULL; 1117 xfs_mount_free(mp); 1118 } 1119 1120 static long 1121 xfs_fs_nr_cached_objects( 1122 struct super_block *sb, 1123 struct shrink_control *sc) 1124 { 1125 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1126 if (WARN_ON_ONCE(!sb->s_fs_info)) 1127 return 0; 1128 return xfs_reclaim_inodes_count(XFS_M(sb)); 1129 } 1130 1131 static long 1132 xfs_fs_free_cached_objects( 1133 struct super_block *sb, 1134 struct shrink_control *sc) 1135 { 1136 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1137 } 1138 1139 static const struct super_operations xfs_super_operations = { 1140 .alloc_inode = xfs_fs_alloc_inode, 1141 .destroy_inode = xfs_fs_destroy_inode, 1142 .dirty_inode = xfs_fs_dirty_inode, 1143 .drop_inode = xfs_fs_drop_inode, 1144 .put_super = xfs_fs_put_super, 1145 .sync_fs = xfs_fs_sync_fs, 1146 .freeze_fs = xfs_fs_freeze, 1147 .unfreeze_fs = xfs_fs_unfreeze, 1148 .statfs = xfs_fs_statfs, 1149 .show_options = xfs_fs_show_options, 1150 .nr_cached_objects = xfs_fs_nr_cached_objects, 1151 .free_cached_objects = xfs_fs_free_cached_objects, 1152 }; 1153 1154 static int 1155 suffix_kstrtoint( 1156 const char *s, 1157 unsigned int base, 1158 int *res) 1159 { 1160 int last, shift_left_factor = 0, _res; 1161 char *value; 1162 int ret = 0; 1163 1164 value = kstrdup(s, GFP_KERNEL); 1165 if (!value) 1166 return -ENOMEM; 1167 1168 last = strlen(value) - 1; 1169 if (value[last] == 'K' || value[last] == 'k') { 1170 shift_left_factor = 10; 1171 value[last] = '\0'; 1172 } 1173 if (value[last] == 'M' || value[last] == 'm') { 1174 shift_left_factor = 20; 1175 value[last] = '\0'; 1176 } 1177 if (value[last] == 'G' || value[last] == 'g') { 1178 shift_left_factor = 30; 1179 value[last] = '\0'; 1180 } 1181 1182 if (kstrtoint(value, base, &_res)) 1183 ret = -EINVAL; 1184 kfree(value); 1185 *res = _res << shift_left_factor; 1186 return ret; 1187 } 1188 1189 static inline void 1190 xfs_fs_warn_deprecated( 1191 struct fs_context *fc, 1192 struct fs_parameter *param, 1193 uint64_t flag, 1194 bool value) 1195 { 1196 /* Don't print the warning if reconfiguring and current mount point 1197 * already had the flag set 1198 */ 1199 if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && 1200 !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) 1201 return; 1202 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1203 } 1204 1205 /* 1206 * Set mount state from a mount option. 1207 * 1208 * NOTE: mp->m_super is NULL here! 1209 */ 1210 static int 1211 xfs_fs_parse_param( 1212 struct fs_context *fc, 1213 struct fs_parameter *param) 1214 { 1215 struct xfs_mount *parsing_mp = fc->s_fs_info; 1216 struct fs_parse_result result; 1217 int size = 0; 1218 int opt; 1219 1220 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1221 if (opt < 0) 1222 return opt; 1223 1224 switch (opt) { 1225 case Opt_logbufs: 1226 parsing_mp->m_logbufs = result.uint_32; 1227 return 0; 1228 case Opt_logbsize: 1229 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1230 return -EINVAL; 1231 return 0; 1232 case Opt_logdev: 1233 kfree(parsing_mp->m_logname); 1234 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1235 if (!parsing_mp->m_logname) 1236 return -ENOMEM; 1237 return 0; 1238 case Opt_rtdev: 1239 kfree(parsing_mp->m_rtname); 1240 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1241 if (!parsing_mp->m_rtname) 1242 return -ENOMEM; 1243 return 0; 1244 case Opt_allocsize: 1245 if (suffix_kstrtoint(param->string, 10, &size)) 1246 return -EINVAL; 1247 parsing_mp->m_allocsize_log = ffs(size) - 1; 1248 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1249 return 0; 1250 case Opt_grpid: 1251 case Opt_bsdgroups: 1252 parsing_mp->m_features |= XFS_FEAT_GRPID; 1253 return 0; 1254 case Opt_nogrpid: 1255 case Opt_sysvgroups: 1256 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1257 return 0; 1258 case Opt_wsync: 1259 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1260 return 0; 1261 case Opt_norecovery: 1262 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1263 return 0; 1264 case Opt_noalign: 1265 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1266 return 0; 1267 case Opt_swalloc: 1268 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1269 return 0; 1270 case Opt_sunit: 1271 parsing_mp->m_dalign = result.uint_32; 1272 return 0; 1273 case Opt_swidth: 1274 parsing_mp->m_swidth = result.uint_32; 1275 return 0; 1276 case Opt_inode32: 1277 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1278 return 0; 1279 case Opt_inode64: 1280 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1281 return 0; 1282 case Opt_nouuid: 1283 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1284 return 0; 1285 case Opt_largeio: 1286 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1287 return 0; 1288 case Opt_nolargeio: 1289 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1290 return 0; 1291 case Opt_filestreams: 1292 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1293 return 0; 1294 case Opt_noquota: 1295 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1296 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1297 return 0; 1298 case Opt_quota: 1299 case Opt_uquota: 1300 case Opt_usrquota: 1301 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1302 return 0; 1303 case Opt_qnoenforce: 1304 case Opt_uqnoenforce: 1305 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1306 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1307 return 0; 1308 case Opt_pquota: 1309 case Opt_prjquota: 1310 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1311 return 0; 1312 case Opt_pqnoenforce: 1313 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1314 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1315 return 0; 1316 case Opt_gquota: 1317 case Opt_grpquota: 1318 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1319 return 0; 1320 case Opt_gqnoenforce: 1321 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1322 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1323 return 0; 1324 case Opt_discard: 1325 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1326 return 0; 1327 case Opt_nodiscard: 1328 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1329 return 0; 1330 #ifdef CONFIG_FS_DAX 1331 case Opt_dax: 1332 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1333 return 0; 1334 case Opt_dax_enum: 1335 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1336 return 0; 1337 #endif 1338 /* Following mount options will be removed in September 2025 */ 1339 case Opt_ikeep: 1340 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); 1341 parsing_mp->m_features |= XFS_FEAT_IKEEP; 1342 return 0; 1343 case Opt_noikeep: 1344 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); 1345 parsing_mp->m_features &= ~XFS_FEAT_IKEEP; 1346 return 0; 1347 case Opt_attr2: 1348 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); 1349 parsing_mp->m_features |= XFS_FEAT_ATTR2; 1350 return 0; 1351 case Opt_noattr2: 1352 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); 1353 parsing_mp->m_features |= XFS_FEAT_NOATTR2; 1354 return 0; 1355 default: 1356 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1357 return -EINVAL; 1358 } 1359 1360 return 0; 1361 } 1362 1363 static int 1364 xfs_fs_validate_params( 1365 struct xfs_mount *mp) 1366 { 1367 /* No recovery flag requires a read-only mount */ 1368 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1369 xfs_warn(mp, "no-recovery mounts must be read-only."); 1370 return -EINVAL; 1371 } 1372 1373 /* 1374 * We have not read the superblock at this point, so only the attr2 1375 * mount option can set the attr2 feature by this stage. 1376 */ 1377 if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { 1378 xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); 1379 return -EINVAL; 1380 } 1381 1382 1383 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1384 xfs_warn(mp, 1385 "sunit and swidth options incompatible with the noalign option"); 1386 return -EINVAL; 1387 } 1388 1389 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { 1390 xfs_warn(mp, "quota support not available in this kernel."); 1391 return -EINVAL; 1392 } 1393 1394 if ((mp->m_dalign && !mp->m_swidth) || 1395 (!mp->m_dalign && mp->m_swidth)) { 1396 xfs_warn(mp, "sunit and swidth must be specified together"); 1397 return -EINVAL; 1398 } 1399 1400 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1401 xfs_warn(mp, 1402 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1403 mp->m_swidth, mp->m_dalign); 1404 return -EINVAL; 1405 } 1406 1407 if (mp->m_logbufs != -1 && 1408 mp->m_logbufs != 0 && 1409 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1410 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1411 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1412 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1413 return -EINVAL; 1414 } 1415 1416 if (mp->m_logbsize != -1 && 1417 mp->m_logbsize != 0 && 1418 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1419 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1420 !is_power_of_2(mp->m_logbsize))) { 1421 xfs_warn(mp, 1422 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1423 mp->m_logbsize); 1424 return -EINVAL; 1425 } 1426 1427 if (xfs_has_allocsize(mp) && 1428 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1429 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1430 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1431 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1432 return -EINVAL; 1433 } 1434 1435 return 0; 1436 } 1437 1438 static int 1439 xfs_fs_fill_super( 1440 struct super_block *sb, 1441 struct fs_context *fc) 1442 { 1443 struct xfs_mount *mp = sb->s_fs_info; 1444 struct inode *root; 1445 int flags = 0, error; 1446 1447 mp->m_super = sb; 1448 1449 error = xfs_fs_validate_params(mp); 1450 if (error) 1451 goto out_free_names; 1452 1453 sb_min_blocksize(sb, BBSIZE); 1454 sb->s_xattr = xfs_xattr_handlers; 1455 sb->s_export_op = &xfs_export_operations; 1456 #ifdef CONFIG_XFS_QUOTA 1457 sb->s_qcop = &xfs_quotactl_operations; 1458 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1459 #endif 1460 sb->s_op = &xfs_super_operations; 1461 1462 /* 1463 * Delay mount work if the debug hook is set. This is debug 1464 * instrumention to coordinate simulation of xfs mount failures with 1465 * VFS superblock operations 1466 */ 1467 if (xfs_globals.mount_delay) { 1468 xfs_notice(mp, "Delaying mount for %d seconds.", 1469 xfs_globals.mount_delay); 1470 msleep(xfs_globals.mount_delay * 1000); 1471 } 1472 1473 if (fc->sb_flags & SB_SILENT) 1474 flags |= XFS_MFSI_QUIET; 1475 1476 error = xfs_open_devices(mp); 1477 if (error) 1478 goto out_free_names; 1479 1480 error = xfs_init_mount_workqueues(mp); 1481 if (error) 1482 goto out_close_devices; 1483 1484 error = xfs_init_percpu_counters(mp); 1485 if (error) 1486 goto out_destroy_workqueues; 1487 1488 error = xfs_inodegc_init_percpu(mp); 1489 if (error) 1490 goto out_destroy_counters; 1491 1492 /* 1493 * All percpu data structures requiring cleanup when a cpu goes offline 1494 * must be allocated before adding this @mp to the cpu-dead handler's 1495 * mount list. 1496 */ 1497 xfs_mount_list_add(mp); 1498 1499 /* Allocate stats memory before we do operations that might use it */ 1500 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1501 if (!mp->m_stats.xs_stats) { 1502 error = -ENOMEM; 1503 goto out_destroy_inodegc; 1504 } 1505 1506 error = xfs_readsb(mp, flags); 1507 if (error) 1508 goto out_free_stats; 1509 1510 error = xfs_finish_flags(mp); 1511 if (error) 1512 goto out_free_sb; 1513 1514 error = xfs_setup_devices(mp); 1515 if (error) 1516 goto out_free_sb; 1517 1518 /* V4 support is undergoing deprecation. */ 1519 if (!xfs_has_crc(mp)) { 1520 #ifdef CONFIG_XFS_SUPPORT_V4 1521 xfs_warn_once(mp, 1522 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1523 #else 1524 xfs_warn(mp, 1525 "Deprecated V4 format (crc=0) not supported by kernel."); 1526 error = -EINVAL; 1527 goto out_free_sb; 1528 #endif 1529 } 1530 1531 /* Filesystem claims it needs repair, so refuse the mount. */ 1532 if (xfs_has_needsrepair(mp)) { 1533 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1534 error = -EFSCORRUPTED; 1535 goto out_free_sb; 1536 } 1537 1538 /* 1539 * Don't touch the filesystem if a user tool thinks it owns the primary 1540 * superblock. mkfs doesn't clear the flag from secondary supers, so 1541 * we don't check them at all. 1542 */ 1543 if (mp->m_sb.sb_inprogress) { 1544 xfs_warn(mp, "Offline file system operation in progress!"); 1545 error = -EFSCORRUPTED; 1546 goto out_free_sb; 1547 } 1548 1549 /* 1550 * Until this is fixed only page-sized or smaller data blocks work. 1551 */ 1552 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1553 xfs_warn(mp, 1554 "File system with blocksize %d bytes. " 1555 "Only pagesize (%ld) or less will currently work.", 1556 mp->m_sb.sb_blocksize, PAGE_SIZE); 1557 error = -ENOSYS; 1558 goto out_free_sb; 1559 } 1560 1561 /* Ensure this filesystem fits in the page cache limits */ 1562 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1563 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1564 xfs_warn(mp, 1565 "file system too large to be mounted on this system."); 1566 error = -EFBIG; 1567 goto out_free_sb; 1568 } 1569 1570 /* 1571 * XFS block mappings use 54 bits to store the logical block offset. 1572 * This should suffice to handle the maximum file size that the VFS 1573 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1574 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1575 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1576 * to check this assertion. 1577 * 1578 * Avoid integer overflow by comparing the maximum bmbt offset to the 1579 * maximum pagecache offset in units of fs blocks. 1580 */ 1581 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1582 xfs_warn(mp, 1583 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1584 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1585 XFS_MAX_FILEOFF); 1586 error = -EINVAL; 1587 goto out_free_sb; 1588 } 1589 1590 error = xfs_filestream_mount(mp); 1591 if (error) 1592 goto out_free_sb; 1593 1594 /* 1595 * we must configure the block size in the superblock before we run the 1596 * full mount process as the mount process can lookup and cache inodes. 1597 */ 1598 sb->s_magic = XFS_SUPER_MAGIC; 1599 sb->s_blocksize = mp->m_sb.sb_blocksize; 1600 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1601 sb->s_maxbytes = MAX_LFS_FILESIZE; 1602 sb->s_max_links = XFS_MAXLINK; 1603 sb->s_time_gran = 1; 1604 if (xfs_has_bigtime(mp)) { 1605 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1606 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1607 } else { 1608 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1609 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1610 } 1611 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1612 sb->s_iflags |= SB_I_CGROUPWB; 1613 1614 set_posix_acl_flag(sb); 1615 1616 /* version 5 superblocks support inode version counters. */ 1617 if (xfs_has_crc(mp)) 1618 sb->s_flags |= SB_I_VERSION; 1619 1620 if (xfs_has_dax_always(mp)) { 1621 error = xfs_setup_dax_always(mp); 1622 if (error) 1623 goto out_filestream_unmount; 1624 } 1625 1626 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1627 xfs_warn(mp, 1628 "mounting with \"discard\" option, but the device does not support discard"); 1629 mp->m_features &= ~XFS_FEAT_DISCARD; 1630 } 1631 1632 if (xfs_has_reflink(mp)) { 1633 if (mp->m_sb.sb_rblocks) { 1634 xfs_alert(mp, 1635 "reflink not compatible with realtime device!"); 1636 error = -EINVAL; 1637 goto out_filestream_unmount; 1638 } 1639 1640 if (xfs_globals.always_cow) { 1641 xfs_info(mp, "using DEBUG-only always_cow mode."); 1642 mp->m_always_cow = true; 1643 } 1644 } 1645 1646 if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { 1647 xfs_alert(mp, 1648 "reverse mapping btree not compatible with realtime device!"); 1649 error = -EINVAL; 1650 goto out_filestream_unmount; 1651 } 1652 1653 if (xfs_has_large_extent_counts(mp)) 1654 xfs_warn(mp, 1655 "EXPERIMENTAL Large extent counts feature in use. Use at your own risk!"); 1656 1657 error = xfs_mountfs(mp); 1658 if (error) 1659 goto out_filestream_unmount; 1660 1661 root = igrab(VFS_I(mp->m_rootip)); 1662 if (!root) { 1663 error = -ENOENT; 1664 goto out_unmount; 1665 } 1666 sb->s_root = d_make_root(root); 1667 if (!sb->s_root) { 1668 error = -ENOMEM; 1669 goto out_unmount; 1670 } 1671 1672 return 0; 1673 1674 out_filestream_unmount: 1675 xfs_filestream_unmount(mp); 1676 out_free_sb: 1677 xfs_freesb(mp); 1678 out_free_stats: 1679 free_percpu(mp->m_stats.xs_stats); 1680 out_destroy_inodegc: 1681 xfs_mount_list_del(mp); 1682 xfs_inodegc_free_percpu(mp); 1683 out_destroy_counters: 1684 xfs_destroy_percpu_counters(mp); 1685 out_destroy_workqueues: 1686 xfs_destroy_mount_workqueues(mp); 1687 out_close_devices: 1688 xfs_close_devices(mp); 1689 out_free_names: 1690 sb->s_fs_info = NULL; 1691 xfs_mount_free(mp); 1692 return error; 1693 1694 out_unmount: 1695 xfs_filestream_unmount(mp); 1696 xfs_unmountfs(mp); 1697 goto out_free_sb; 1698 } 1699 1700 static int 1701 xfs_fs_get_tree( 1702 struct fs_context *fc) 1703 { 1704 return get_tree_bdev(fc, xfs_fs_fill_super); 1705 } 1706 1707 static int 1708 xfs_remount_rw( 1709 struct xfs_mount *mp) 1710 { 1711 struct xfs_sb *sbp = &mp->m_sb; 1712 int error; 1713 1714 if (xfs_has_norecovery(mp)) { 1715 xfs_warn(mp, 1716 "ro->rw transition prohibited on norecovery mount"); 1717 return -EINVAL; 1718 } 1719 1720 if (xfs_sb_is_v5(sbp) && 1721 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1722 xfs_warn(mp, 1723 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1724 (sbp->sb_features_ro_compat & 1725 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1726 return -EINVAL; 1727 } 1728 1729 clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1730 1731 /* 1732 * If this is the first remount to writeable state we might have some 1733 * superblock changes to update. 1734 */ 1735 if (mp->m_update_sb) { 1736 error = xfs_sync_sb(mp, false); 1737 if (error) { 1738 xfs_warn(mp, "failed to write sb changes"); 1739 return error; 1740 } 1741 mp->m_update_sb = false; 1742 } 1743 1744 /* 1745 * Fill out the reserve pool if it is empty. Use the stashed value if 1746 * it is non-zero, otherwise go with the default. 1747 */ 1748 xfs_restore_resvblks(mp); 1749 xfs_log_work_queue(mp); 1750 xfs_blockgc_start(mp); 1751 1752 /* Create the per-AG metadata reservation pool .*/ 1753 error = xfs_fs_reserve_ag_blocks(mp); 1754 if (error && error != -ENOSPC) 1755 return error; 1756 1757 /* Re-enable the background inode inactivation worker. */ 1758 xfs_inodegc_start(mp); 1759 1760 return 0; 1761 } 1762 1763 static int 1764 xfs_remount_ro( 1765 struct xfs_mount *mp) 1766 { 1767 struct xfs_icwalk icw = { 1768 .icw_flags = XFS_ICWALK_FLAG_SYNC, 1769 }; 1770 int error; 1771 1772 /* Flush all the dirty data to disk. */ 1773 error = sync_filesystem(mp->m_super); 1774 if (error) 1775 return error; 1776 1777 /* 1778 * Cancel background eofb scanning so it cannot race with the final 1779 * log force+buftarg wait and deadlock the remount. 1780 */ 1781 xfs_blockgc_stop(mp); 1782 1783 /* 1784 * Clear out all remaining COW staging extents and speculative post-EOF 1785 * preallocations so that we don't leave inodes requiring inactivation 1786 * cleanups during reclaim on a read-only mount. We must process every 1787 * cached inode, so this requires a synchronous cache scan. 1788 */ 1789 error = xfs_blockgc_free_space(mp, &icw); 1790 if (error) { 1791 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1792 return error; 1793 } 1794 1795 /* 1796 * Stop the inodegc background worker. xfs_fs_reconfigure already 1797 * flushed all pending inodegc work when it sync'd the filesystem. 1798 * The VFS holds s_umount, so we know that inodes cannot enter 1799 * xfs_fs_destroy_inode during a remount operation. In readonly mode 1800 * we send inodes straight to reclaim, so no inodes will be queued. 1801 */ 1802 xfs_inodegc_stop(mp); 1803 1804 /* Free the per-AG metadata reservation pool. */ 1805 error = xfs_fs_unreserve_ag_blocks(mp); 1806 if (error) { 1807 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1808 return error; 1809 } 1810 1811 /* 1812 * Before we sync the metadata, we need to free up the reserve block 1813 * pool so that the used block count in the superblock on disk is 1814 * correct at the end of the remount. Stash the current* reserve pool 1815 * size so that if we get remounted rw, we can return it to the same 1816 * size. 1817 */ 1818 xfs_save_resvblks(mp); 1819 1820 xfs_log_clean(mp); 1821 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1822 1823 return 0; 1824 } 1825 1826 /* 1827 * Logically we would return an error here to prevent users from believing 1828 * they might have changed mount options using remount which can't be changed. 1829 * 1830 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 1831 * arguments in some cases so we can't blindly reject options, but have to 1832 * check for each specified option if it actually differs from the currently 1833 * set option and only reject it if that's the case. 1834 * 1835 * Until that is implemented we return success for every remount request, and 1836 * silently ignore all options that we can't actually change. 1837 */ 1838 static int 1839 xfs_fs_reconfigure( 1840 struct fs_context *fc) 1841 { 1842 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 1843 struct xfs_mount *new_mp = fc->s_fs_info; 1844 int flags = fc->sb_flags; 1845 int error; 1846 1847 /* version 5 superblocks always support version counters. */ 1848 if (xfs_has_crc(mp)) 1849 fc->sb_flags |= SB_I_VERSION; 1850 1851 error = xfs_fs_validate_params(new_mp); 1852 if (error) 1853 return error; 1854 1855 /* inode32 -> inode64 */ 1856 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 1857 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1858 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1859 } 1860 1861 /* inode64 -> inode32 */ 1862 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 1863 mp->m_features |= XFS_FEAT_SMALL_INUMS; 1864 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1865 } 1866 1867 /* ro -> rw */ 1868 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 1869 error = xfs_remount_rw(mp); 1870 if (error) 1871 return error; 1872 } 1873 1874 /* rw -> ro */ 1875 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 1876 error = xfs_remount_ro(mp); 1877 if (error) 1878 return error; 1879 } 1880 1881 return 0; 1882 } 1883 1884 static void xfs_fs_free( 1885 struct fs_context *fc) 1886 { 1887 struct xfs_mount *mp = fc->s_fs_info; 1888 1889 /* 1890 * mp is stored in the fs_context when it is initialized. 1891 * mp is transferred to the superblock on a successful mount, 1892 * but if an error occurs before the transfer we have to free 1893 * it here. 1894 */ 1895 if (mp) 1896 xfs_mount_free(mp); 1897 } 1898 1899 static const struct fs_context_operations xfs_context_ops = { 1900 .parse_param = xfs_fs_parse_param, 1901 .get_tree = xfs_fs_get_tree, 1902 .reconfigure = xfs_fs_reconfigure, 1903 .free = xfs_fs_free, 1904 }; 1905 1906 static int xfs_init_fs_context( 1907 struct fs_context *fc) 1908 { 1909 struct xfs_mount *mp; 1910 1911 mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); 1912 if (!mp) 1913 return -ENOMEM; 1914 1915 spin_lock_init(&mp->m_sb_lock); 1916 spin_lock_init(&mp->m_agirotor_lock); 1917 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1918 spin_lock_init(&mp->m_perag_lock); 1919 mutex_init(&mp->m_growlock); 1920 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1921 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1922 mp->m_kobj.kobject.kset = xfs_kset; 1923 /* 1924 * We don't create the finobt per-ag space reservation until after log 1925 * recovery, so we must set this to true so that an ifree transaction 1926 * started during log recovery will not depend on space reservations 1927 * for finobt expansion. 1928 */ 1929 mp->m_finobt_nores = true; 1930 1931 /* 1932 * These can be overridden by the mount option parsing. 1933 */ 1934 mp->m_logbufs = -1; 1935 mp->m_logbsize = -1; 1936 mp->m_allocsize_log = 16; /* 64k */ 1937 1938 /* 1939 * Copy binary VFS mount flags we are interested in. 1940 */ 1941 if (fc->sb_flags & SB_RDONLY) 1942 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1943 if (fc->sb_flags & SB_DIRSYNC) 1944 mp->m_features |= XFS_FEAT_DIRSYNC; 1945 if (fc->sb_flags & SB_SYNCHRONOUS) 1946 mp->m_features |= XFS_FEAT_WSYNC; 1947 1948 fc->s_fs_info = mp; 1949 fc->ops = &xfs_context_ops; 1950 1951 return 0; 1952 } 1953 1954 static struct file_system_type xfs_fs_type = { 1955 .owner = THIS_MODULE, 1956 .name = "xfs", 1957 .init_fs_context = xfs_init_fs_context, 1958 .parameters = xfs_fs_parameters, 1959 .kill_sb = kill_block_super, 1960 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 1961 }; 1962 MODULE_ALIAS_FS("xfs"); 1963 1964 STATIC int __init 1965 xfs_init_caches(void) 1966 { 1967 int error; 1968 1969 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 1970 sizeof(struct xlog_ticket), 1971 0, 0, NULL); 1972 if (!xfs_log_ticket_cache) 1973 goto out; 1974 1975 error = xfs_btree_init_cur_caches(); 1976 if (error) 1977 goto out_destroy_log_ticket_cache; 1978 1979 error = xfs_defer_init_item_caches(); 1980 if (error) 1981 goto out_destroy_btree_cur_cache; 1982 1983 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 1984 sizeof(struct xfs_da_state), 1985 0, 0, NULL); 1986 if (!xfs_da_state_cache) 1987 goto out_destroy_defer_item_cache; 1988 1989 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 1990 sizeof(struct xfs_ifork), 1991 0, 0, NULL); 1992 if (!xfs_ifork_cache) 1993 goto out_destroy_da_state_cache; 1994 1995 xfs_trans_cache = kmem_cache_create("xfs_trans", 1996 sizeof(struct xfs_trans), 1997 0, 0, NULL); 1998 if (!xfs_trans_cache) 1999 goto out_destroy_ifork_cache; 2000 2001 2002 /* 2003 * The size of the cache-allocated buf log item is the maximum 2004 * size possible under XFS. This wastes a little bit of memory, 2005 * but it is much faster. 2006 */ 2007 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2008 sizeof(struct xfs_buf_log_item), 2009 0, 0, NULL); 2010 if (!xfs_buf_item_cache) 2011 goto out_destroy_trans_cache; 2012 2013 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2014 (sizeof(struct xfs_efd_log_item) + 2015 (XFS_EFD_MAX_FAST_EXTENTS - 1) * 2016 sizeof(struct xfs_extent)), 2017 0, 0, NULL); 2018 if (!xfs_efd_cache) 2019 goto out_destroy_buf_item_cache; 2020 2021 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2022 (sizeof(struct xfs_efi_log_item) + 2023 (XFS_EFI_MAX_FAST_EXTENTS - 1) * 2024 sizeof(struct xfs_extent)), 2025 0, 0, NULL); 2026 if (!xfs_efi_cache) 2027 goto out_destroy_efd_cache; 2028 2029 xfs_inode_cache = kmem_cache_create("xfs_inode", 2030 sizeof(struct xfs_inode), 0, 2031 (SLAB_HWCACHE_ALIGN | 2032 SLAB_RECLAIM_ACCOUNT | 2033 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 2034 xfs_fs_inode_init_once); 2035 if (!xfs_inode_cache) 2036 goto out_destroy_efi_cache; 2037 2038 xfs_ili_cache = kmem_cache_create("xfs_ili", 2039 sizeof(struct xfs_inode_log_item), 0, 2040 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 2041 NULL); 2042 if (!xfs_ili_cache) 2043 goto out_destroy_inode_cache; 2044 2045 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2046 sizeof(struct xfs_icreate_item), 2047 0, 0, NULL); 2048 if (!xfs_icreate_cache) 2049 goto out_destroy_ili_cache; 2050 2051 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2052 sizeof(struct xfs_rud_log_item), 2053 0, 0, NULL); 2054 if (!xfs_rud_cache) 2055 goto out_destroy_icreate_cache; 2056 2057 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2058 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2059 0, 0, NULL); 2060 if (!xfs_rui_cache) 2061 goto out_destroy_rud_cache; 2062 2063 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2064 sizeof(struct xfs_cud_log_item), 2065 0, 0, NULL); 2066 if (!xfs_cud_cache) 2067 goto out_destroy_rui_cache; 2068 2069 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2070 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2071 0, 0, NULL); 2072 if (!xfs_cui_cache) 2073 goto out_destroy_cud_cache; 2074 2075 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2076 sizeof(struct xfs_bud_log_item), 2077 0, 0, NULL); 2078 if (!xfs_bud_cache) 2079 goto out_destroy_cui_cache; 2080 2081 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2082 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2083 0, 0, NULL); 2084 if (!xfs_bui_cache) 2085 goto out_destroy_bud_cache; 2086 2087 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2088 sizeof(struct xfs_attrd_log_item), 2089 0, 0, NULL); 2090 if (!xfs_attrd_cache) 2091 goto out_destroy_bui_cache; 2092 2093 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2094 sizeof(struct xfs_attri_log_item), 2095 0, 0, NULL); 2096 if (!xfs_attri_cache) 2097 goto out_destroy_attrd_cache; 2098 2099 return 0; 2100 2101 out_destroy_attrd_cache: 2102 kmem_cache_destroy(xfs_attrd_cache); 2103 out_destroy_bui_cache: 2104 kmem_cache_destroy(xfs_bui_cache); 2105 out_destroy_bud_cache: 2106 kmem_cache_destroy(xfs_bud_cache); 2107 out_destroy_cui_cache: 2108 kmem_cache_destroy(xfs_cui_cache); 2109 out_destroy_cud_cache: 2110 kmem_cache_destroy(xfs_cud_cache); 2111 out_destroy_rui_cache: 2112 kmem_cache_destroy(xfs_rui_cache); 2113 out_destroy_rud_cache: 2114 kmem_cache_destroy(xfs_rud_cache); 2115 out_destroy_icreate_cache: 2116 kmem_cache_destroy(xfs_icreate_cache); 2117 out_destroy_ili_cache: 2118 kmem_cache_destroy(xfs_ili_cache); 2119 out_destroy_inode_cache: 2120 kmem_cache_destroy(xfs_inode_cache); 2121 out_destroy_efi_cache: 2122 kmem_cache_destroy(xfs_efi_cache); 2123 out_destroy_efd_cache: 2124 kmem_cache_destroy(xfs_efd_cache); 2125 out_destroy_buf_item_cache: 2126 kmem_cache_destroy(xfs_buf_item_cache); 2127 out_destroy_trans_cache: 2128 kmem_cache_destroy(xfs_trans_cache); 2129 out_destroy_ifork_cache: 2130 kmem_cache_destroy(xfs_ifork_cache); 2131 out_destroy_da_state_cache: 2132 kmem_cache_destroy(xfs_da_state_cache); 2133 out_destroy_defer_item_cache: 2134 xfs_defer_destroy_item_caches(); 2135 out_destroy_btree_cur_cache: 2136 xfs_btree_destroy_cur_caches(); 2137 out_destroy_log_ticket_cache: 2138 kmem_cache_destroy(xfs_log_ticket_cache); 2139 out: 2140 return -ENOMEM; 2141 } 2142 2143 STATIC void 2144 xfs_destroy_caches(void) 2145 { 2146 /* 2147 * Make sure all delayed rcu free are flushed before we 2148 * destroy caches. 2149 */ 2150 rcu_barrier(); 2151 kmem_cache_destroy(xfs_attri_cache); 2152 kmem_cache_destroy(xfs_attrd_cache); 2153 kmem_cache_destroy(xfs_bui_cache); 2154 kmem_cache_destroy(xfs_bud_cache); 2155 kmem_cache_destroy(xfs_cui_cache); 2156 kmem_cache_destroy(xfs_cud_cache); 2157 kmem_cache_destroy(xfs_rui_cache); 2158 kmem_cache_destroy(xfs_rud_cache); 2159 kmem_cache_destroy(xfs_icreate_cache); 2160 kmem_cache_destroy(xfs_ili_cache); 2161 kmem_cache_destroy(xfs_inode_cache); 2162 kmem_cache_destroy(xfs_efi_cache); 2163 kmem_cache_destroy(xfs_efd_cache); 2164 kmem_cache_destroy(xfs_buf_item_cache); 2165 kmem_cache_destroy(xfs_trans_cache); 2166 kmem_cache_destroy(xfs_ifork_cache); 2167 kmem_cache_destroy(xfs_da_state_cache); 2168 xfs_defer_destroy_item_caches(); 2169 xfs_btree_destroy_cur_caches(); 2170 kmem_cache_destroy(xfs_log_ticket_cache); 2171 } 2172 2173 STATIC int __init 2174 xfs_init_workqueues(void) 2175 { 2176 /* 2177 * The allocation workqueue can be used in memory reclaim situations 2178 * (writepage path), and parallelism is only limited by the number of 2179 * AGs in all the filesystems mounted. Hence use the default large 2180 * max_active value for this workqueue. 2181 */ 2182 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2183 XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); 2184 if (!xfs_alloc_wq) 2185 return -ENOMEM; 2186 2187 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2188 0); 2189 if (!xfs_discard_wq) 2190 goto out_free_alloc_wq; 2191 2192 return 0; 2193 out_free_alloc_wq: 2194 destroy_workqueue(xfs_alloc_wq); 2195 return -ENOMEM; 2196 } 2197 2198 STATIC void 2199 xfs_destroy_workqueues(void) 2200 { 2201 destroy_workqueue(xfs_discard_wq); 2202 destroy_workqueue(xfs_alloc_wq); 2203 } 2204 2205 #ifdef CONFIG_HOTPLUG_CPU 2206 static int 2207 xfs_cpu_dead( 2208 unsigned int cpu) 2209 { 2210 struct xfs_mount *mp, *n; 2211 2212 spin_lock(&xfs_mount_list_lock); 2213 list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { 2214 spin_unlock(&xfs_mount_list_lock); 2215 xfs_inodegc_cpu_dead(mp, cpu); 2216 spin_lock(&xfs_mount_list_lock); 2217 } 2218 spin_unlock(&xfs_mount_list_lock); 2219 return 0; 2220 } 2221 2222 static int __init 2223 xfs_cpu_hotplug_init(void) 2224 { 2225 int error; 2226 2227 error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, 2228 xfs_cpu_dead); 2229 if (error < 0) 2230 xfs_alert(NULL, 2231 "Failed to initialise CPU hotplug, error %d. XFS is non-functional.", 2232 error); 2233 return error; 2234 } 2235 2236 static void 2237 xfs_cpu_hotplug_destroy(void) 2238 { 2239 cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); 2240 } 2241 2242 #else /* !CONFIG_HOTPLUG_CPU */ 2243 static inline int xfs_cpu_hotplug_init(void) { return 0; } 2244 static inline void xfs_cpu_hotplug_destroy(void) {} 2245 #endif 2246 2247 STATIC int __init 2248 init_xfs_fs(void) 2249 { 2250 int error; 2251 2252 xfs_check_ondisk_structs(); 2253 2254 printk(KERN_INFO XFS_VERSION_STRING " with " 2255 XFS_BUILD_OPTIONS " enabled\n"); 2256 2257 xfs_dir_startup(); 2258 2259 error = xfs_cpu_hotplug_init(); 2260 if (error) 2261 goto out; 2262 2263 error = xfs_init_caches(); 2264 if (error) 2265 goto out_destroy_hp; 2266 2267 error = xfs_init_workqueues(); 2268 if (error) 2269 goto out_destroy_caches; 2270 2271 error = xfs_mru_cache_init(); 2272 if (error) 2273 goto out_destroy_wq; 2274 2275 error = xfs_buf_init(); 2276 if (error) 2277 goto out_mru_cache_uninit; 2278 2279 error = xfs_init_procfs(); 2280 if (error) 2281 goto out_buf_terminate; 2282 2283 error = xfs_sysctl_register(); 2284 if (error) 2285 goto out_cleanup_procfs; 2286 2287 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2288 if (!xfs_kset) { 2289 error = -ENOMEM; 2290 goto out_sysctl_unregister; 2291 } 2292 2293 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2294 2295 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2296 if (!xfsstats.xs_stats) { 2297 error = -ENOMEM; 2298 goto out_kset_unregister; 2299 } 2300 2301 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2302 "stats"); 2303 if (error) 2304 goto out_free_stats; 2305 2306 #ifdef DEBUG 2307 xfs_dbg_kobj.kobject.kset = xfs_kset; 2308 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2309 if (error) 2310 goto out_remove_stats_kobj; 2311 #endif 2312 2313 error = xfs_qm_init(); 2314 if (error) 2315 goto out_remove_dbg_kobj; 2316 2317 error = register_filesystem(&xfs_fs_type); 2318 if (error) 2319 goto out_qm_exit; 2320 return 0; 2321 2322 out_qm_exit: 2323 xfs_qm_exit(); 2324 out_remove_dbg_kobj: 2325 #ifdef DEBUG 2326 xfs_sysfs_del(&xfs_dbg_kobj); 2327 out_remove_stats_kobj: 2328 #endif 2329 xfs_sysfs_del(&xfsstats.xs_kobj); 2330 out_free_stats: 2331 free_percpu(xfsstats.xs_stats); 2332 out_kset_unregister: 2333 kset_unregister(xfs_kset); 2334 out_sysctl_unregister: 2335 xfs_sysctl_unregister(); 2336 out_cleanup_procfs: 2337 xfs_cleanup_procfs(); 2338 out_buf_terminate: 2339 xfs_buf_terminate(); 2340 out_mru_cache_uninit: 2341 xfs_mru_cache_uninit(); 2342 out_destroy_wq: 2343 xfs_destroy_workqueues(); 2344 out_destroy_caches: 2345 xfs_destroy_caches(); 2346 out_destroy_hp: 2347 xfs_cpu_hotplug_destroy(); 2348 out: 2349 return error; 2350 } 2351 2352 STATIC void __exit 2353 exit_xfs_fs(void) 2354 { 2355 xfs_qm_exit(); 2356 unregister_filesystem(&xfs_fs_type); 2357 #ifdef DEBUG 2358 xfs_sysfs_del(&xfs_dbg_kobj); 2359 #endif 2360 xfs_sysfs_del(&xfsstats.xs_kobj); 2361 free_percpu(xfsstats.xs_stats); 2362 kset_unregister(xfs_kset); 2363 xfs_sysctl_unregister(); 2364 xfs_cleanup_procfs(); 2365 xfs_buf_terminate(); 2366 xfs_mru_cache_uninit(); 2367 xfs_destroy_workqueues(); 2368 xfs_destroy_caches(); 2369 xfs_uuid_table_free(); 2370 xfs_cpu_hotplug_destroy(); 2371 } 2372 2373 module_init(init_xfs_fs); 2374 module_exit(exit_xfs_fs); 2375 2376 MODULE_AUTHOR("Silicon Graphics, Inc."); 2377 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2378 MODULE_LICENSE("GPL"); 2379