1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 7 #include "xfs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_alloc.h" 18 #include "xfs_fsops.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_log.h" 22 #include "xfs_log_priv.h" 23 #include "xfs_dir2.h" 24 #include "xfs_extfree_item.h" 25 #include "xfs_mru_cache.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_icache.h" 28 #include "xfs_trace.h" 29 #include "xfs_icreate_item.h" 30 #include "xfs_filestream.h" 31 #include "xfs_quota.h" 32 #include "xfs_sysfs.h" 33 #include "xfs_ondisk.h" 34 #include "xfs_rmap_item.h" 35 #include "xfs_refcount_item.h" 36 #include "xfs_bmap_item.h" 37 #include "xfs_reflink.h" 38 #include "xfs_pwork.h" 39 #include "xfs_ag.h" 40 #include "xfs_defer.h" 41 #include "xfs_attr_item.h" 42 #include "xfs_xattr.h" 43 44 #include <linux/magic.h> 45 #include <linux/fs_context.h> 46 #include <linux/fs_parser.h> 47 48 static const struct super_operations xfs_super_operations; 49 50 static struct kset *xfs_kset; /* top-level xfs sysfs dir */ 51 #ifdef DEBUG 52 static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ 53 #endif 54 55 #ifdef CONFIG_HOTPLUG_CPU 56 static LIST_HEAD(xfs_mount_list); 57 static DEFINE_SPINLOCK(xfs_mount_list_lock); 58 59 static inline void xfs_mount_list_add(struct xfs_mount *mp) 60 { 61 spin_lock(&xfs_mount_list_lock); 62 list_add(&mp->m_mount_list, &xfs_mount_list); 63 spin_unlock(&xfs_mount_list_lock); 64 } 65 66 static inline void xfs_mount_list_del(struct xfs_mount *mp) 67 { 68 spin_lock(&xfs_mount_list_lock); 69 list_del(&mp->m_mount_list); 70 spin_unlock(&xfs_mount_list_lock); 71 } 72 #else /* !CONFIG_HOTPLUG_CPU */ 73 static inline void xfs_mount_list_add(struct xfs_mount *mp) {} 74 static inline void xfs_mount_list_del(struct xfs_mount *mp) {} 75 #endif 76 77 enum xfs_dax_mode { 78 XFS_DAX_INODE = 0, 79 XFS_DAX_ALWAYS = 1, 80 XFS_DAX_NEVER = 2, 81 }; 82 83 static void 84 xfs_mount_set_dax_mode( 85 struct xfs_mount *mp, 86 enum xfs_dax_mode mode) 87 { 88 switch (mode) { 89 case XFS_DAX_INODE: 90 mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); 91 break; 92 case XFS_DAX_ALWAYS: 93 mp->m_features |= XFS_FEAT_DAX_ALWAYS; 94 mp->m_features &= ~XFS_FEAT_DAX_NEVER; 95 break; 96 case XFS_DAX_NEVER: 97 mp->m_features |= XFS_FEAT_DAX_NEVER; 98 mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; 99 break; 100 } 101 } 102 103 static const struct constant_table dax_param_enums[] = { 104 {"inode", XFS_DAX_INODE }, 105 {"always", XFS_DAX_ALWAYS }, 106 {"never", XFS_DAX_NEVER }, 107 {} 108 }; 109 110 /* 111 * Table driven mount option parser. 112 */ 113 enum { 114 Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, 115 Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, 116 Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, 117 Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep, 118 Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, 119 Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, 120 Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, 121 Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, 122 Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, 123 }; 124 125 static const struct fs_parameter_spec xfs_fs_parameters[] = { 126 fsparam_u32("logbufs", Opt_logbufs), 127 fsparam_string("logbsize", Opt_logbsize), 128 fsparam_string("logdev", Opt_logdev), 129 fsparam_string("rtdev", Opt_rtdev), 130 fsparam_flag("wsync", Opt_wsync), 131 fsparam_flag("noalign", Opt_noalign), 132 fsparam_flag("swalloc", Opt_swalloc), 133 fsparam_u32("sunit", Opt_sunit), 134 fsparam_u32("swidth", Opt_swidth), 135 fsparam_flag("nouuid", Opt_nouuid), 136 fsparam_flag("grpid", Opt_grpid), 137 fsparam_flag("nogrpid", Opt_nogrpid), 138 fsparam_flag("bsdgroups", Opt_bsdgroups), 139 fsparam_flag("sysvgroups", Opt_sysvgroups), 140 fsparam_string("allocsize", Opt_allocsize), 141 fsparam_flag("norecovery", Opt_norecovery), 142 fsparam_flag("inode64", Opt_inode64), 143 fsparam_flag("inode32", Opt_inode32), 144 fsparam_flag("ikeep", Opt_ikeep), 145 fsparam_flag("noikeep", Opt_noikeep), 146 fsparam_flag("largeio", Opt_largeio), 147 fsparam_flag("nolargeio", Opt_nolargeio), 148 fsparam_flag("attr2", Opt_attr2), 149 fsparam_flag("noattr2", Opt_noattr2), 150 fsparam_flag("filestreams", Opt_filestreams), 151 fsparam_flag("quota", Opt_quota), 152 fsparam_flag("noquota", Opt_noquota), 153 fsparam_flag("usrquota", Opt_usrquota), 154 fsparam_flag("grpquota", Opt_grpquota), 155 fsparam_flag("prjquota", Opt_prjquota), 156 fsparam_flag("uquota", Opt_uquota), 157 fsparam_flag("gquota", Opt_gquota), 158 fsparam_flag("pquota", Opt_pquota), 159 fsparam_flag("uqnoenforce", Opt_uqnoenforce), 160 fsparam_flag("gqnoenforce", Opt_gqnoenforce), 161 fsparam_flag("pqnoenforce", Opt_pqnoenforce), 162 fsparam_flag("qnoenforce", Opt_qnoenforce), 163 fsparam_flag("discard", Opt_discard), 164 fsparam_flag("nodiscard", Opt_nodiscard), 165 fsparam_flag("dax", Opt_dax), 166 fsparam_enum("dax", Opt_dax_enum, dax_param_enums), 167 {} 168 }; 169 170 struct proc_xfs_info { 171 uint64_t flag; 172 char *str; 173 }; 174 175 static int 176 xfs_fs_show_options( 177 struct seq_file *m, 178 struct dentry *root) 179 { 180 static struct proc_xfs_info xfs_info_set[] = { 181 /* the few simple ones we can get from the mount struct */ 182 { XFS_FEAT_IKEEP, ",ikeep" }, 183 { XFS_FEAT_WSYNC, ",wsync" }, 184 { XFS_FEAT_NOALIGN, ",noalign" }, 185 { XFS_FEAT_SWALLOC, ",swalloc" }, 186 { XFS_FEAT_NOUUID, ",nouuid" }, 187 { XFS_FEAT_NORECOVERY, ",norecovery" }, 188 { XFS_FEAT_ATTR2, ",attr2" }, 189 { XFS_FEAT_FILESTREAMS, ",filestreams" }, 190 { XFS_FEAT_GRPID, ",grpid" }, 191 { XFS_FEAT_DISCARD, ",discard" }, 192 { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, 193 { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, 194 { XFS_FEAT_DAX_NEVER, ",dax=never" }, 195 { 0, NULL } 196 }; 197 struct xfs_mount *mp = XFS_M(root->d_sb); 198 struct proc_xfs_info *xfs_infop; 199 200 for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { 201 if (mp->m_features & xfs_infop->flag) 202 seq_puts(m, xfs_infop->str); 203 } 204 205 seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); 206 207 if (xfs_has_allocsize(mp)) 208 seq_printf(m, ",allocsize=%dk", 209 (1 << mp->m_allocsize_log) >> 10); 210 211 if (mp->m_logbufs > 0) 212 seq_printf(m, ",logbufs=%d", mp->m_logbufs); 213 if (mp->m_logbsize > 0) 214 seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); 215 216 if (mp->m_logname) 217 seq_show_option(m, "logdev", mp->m_logname); 218 if (mp->m_rtname) 219 seq_show_option(m, "rtdev", mp->m_rtname); 220 221 if (mp->m_dalign > 0) 222 seq_printf(m, ",sunit=%d", 223 (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); 224 if (mp->m_swidth > 0) 225 seq_printf(m, ",swidth=%d", 226 (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); 227 228 if (mp->m_qflags & XFS_UQUOTA_ENFD) 229 seq_puts(m, ",usrquota"); 230 else if (mp->m_qflags & XFS_UQUOTA_ACCT) 231 seq_puts(m, ",uqnoenforce"); 232 233 if (mp->m_qflags & XFS_PQUOTA_ENFD) 234 seq_puts(m, ",prjquota"); 235 else if (mp->m_qflags & XFS_PQUOTA_ACCT) 236 seq_puts(m, ",pqnoenforce"); 237 238 if (mp->m_qflags & XFS_GQUOTA_ENFD) 239 seq_puts(m, ",grpquota"); 240 else if (mp->m_qflags & XFS_GQUOTA_ACCT) 241 seq_puts(m, ",gqnoenforce"); 242 243 if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) 244 seq_puts(m, ",noquota"); 245 246 return 0; 247 } 248 249 /* 250 * Set parameters for inode allocation heuristics, taking into account 251 * filesystem size and inode32/inode64 mount options; i.e. specifically 252 * whether or not XFS_FEAT_SMALL_INUMS is set. 253 * 254 * Inode allocation patterns are altered only if inode32 is requested 255 * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. 256 * If altered, XFS_OPSTATE_INODE32 is set as well. 257 * 258 * An agcount independent of that in the mount structure is provided 259 * because in the growfs case, mp->m_sb.sb_agcount is not yet updated 260 * to the potentially higher ag count. 261 * 262 * Returns the maximum AG index which may contain inodes. 263 */ 264 xfs_agnumber_t 265 xfs_set_inode_alloc( 266 struct xfs_mount *mp, 267 xfs_agnumber_t agcount) 268 { 269 xfs_agnumber_t index; 270 xfs_agnumber_t maxagi = 0; 271 xfs_sb_t *sbp = &mp->m_sb; 272 xfs_agnumber_t max_metadata; 273 xfs_agino_t agino; 274 xfs_ino_t ino; 275 276 /* 277 * Calculate how much should be reserved for inodes to meet 278 * the max inode percentage. Used only for inode32. 279 */ 280 if (M_IGEO(mp)->maxicount) { 281 uint64_t icount; 282 283 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 284 do_div(icount, 100); 285 icount += sbp->sb_agblocks - 1; 286 do_div(icount, sbp->sb_agblocks); 287 max_metadata = icount; 288 } else { 289 max_metadata = agcount; 290 } 291 292 /* Get the last possible inode in the filesystem */ 293 agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); 294 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 295 296 /* 297 * If user asked for no more than 32-bit inodes, and the fs is 298 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter 299 * the allocator to accommodate the request. 300 */ 301 if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) 302 set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 303 else 304 clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); 305 306 for (index = 0; index < agcount; index++) { 307 struct xfs_perag *pag; 308 309 ino = XFS_AGINO_TO_INO(mp, index, agino); 310 311 pag = xfs_perag_get(mp, index); 312 313 if (xfs_is_inode32(mp)) { 314 if (ino > XFS_MAXINUMBER_32) { 315 pag->pagi_inodeok = 0; 316 pag->pagf_metadata = 0; 317 } else { 318 pag->pagi_inodeok = 1; 319 maxagi++; 320 if (index < max_metadata) 321 pag->pagf_metadata = 1; 322 else 323 pag->pagf_metadata = 0; 324 } 325 } else { 326 pag->pagi_inodeok = 1; 327 pag->pagf_metadata = 0; 328 } 329 330 xfs_perag_put(pag); 331 } 332 333 return xfs_is_inode32(mp) ? maxagi : agcount; 334 } 335 336 static int 337 xfs_setup_dax_always( 338 struct xfs_mount *mp) 339 { 340 if (!mp->m_ddev_targp->bt_daxdev && 341 (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { 342 xfs_alert(mp, 343 "DAX unsupported by block device. Turning off DAX."); 344 goto disable_dax; 345 } 346 347 if (mp->m_super->s_blocksize != PAGE_SIZE) { 348 xfs_alert(mp, 349 "DAX not supported for blocksize. Turning off DAX."); 350 goto disable_dax; 351 } 352 353 if (xfs_has_reflink(mp)) { 354 xfs_alert(mp, "DAX and reflink cannot be used together!"); 355 return -EINVAL; 356 } 357 358 xfs_warn(mp, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 359 return 0; 360 361 disable_dax: 362 xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); 363 return 0; 364 } 365 366 STATIC int 367 xfs_blkdev_get( 368 xfs_mount_t *mp, 369 const char *name, 370 struct block_device **bdevp) 371 { 372 int error = 0; 373 374 *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, 375 mp); 376 if (IS_ERR(*bdevp)) { 377 error = PTR_ERR(*bdevp); 378 xfs_warn(mp, "Invalid device [%s], error=%d", name, error); 379 } 380 381 return error; 382 } 383 384 STATIC void 385 xfs_blkdev_put( 386 struct block_device *bdev) 387 { 388 if (bdev) 389 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 390 } 391 392 STATIC void 393 xfs_close_devices( 394 struct xfs_mount *mp) 395 { 396 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 397 struct block_device *logdev = mp->m_logdev_targp->bt_bdev; 398 399 xfs_free_buftarg(mp->m_logdev_targp); 400 xfs_blkdev_put(logdev); 401 } 402 if (mp->m_rtdev_targp) { 403 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; 404 405 xfs_free_buftarg(mp->m_rtdev_targp); 406 xfs_blkdev_put(rtdev); 407 } 408 xfs_free_buftarg(mp->m_ddev_targp); 409 } 410 411 /* 412 * The file system configurations are: 413 * (1) device (partition) with data and internal log 414 * (2) logical volume with data and log subvolumes. 415 * (3) logical volume with data, log, and realtime subvolumes. 416 * 417 * We only have to handle opening the log and realtime volumes here if 418 * they are present. The data subvolume has already been opened by 419 * get_sb_bdev() and is stored in sb->s_bdev. 420 */ 421 STATIC int 422 xfs_open_devices( 423 struct xfs_mount *mp) 424 { 425 struct block_device *ddev = mp->m_super->s_bdev; 426 struct block_device *logdev = NULL, *rtdev = NULL; 427 int error; 428 429 /* 430 * Open real time and log devices - order is important. 431 */ 432 if (mp->m_logname) { 433 error = xfs_blkdev_get(mp, mp->m_logname, &logdev); 434 if (error) 435 return error; 436 } 437 438 if (mp->m_rtname) { 439 error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); 440 if (error) 441 goto out_close_logdev; 442 443 if (rtdev == ddev || rtdev == logdev) { 444 xfs_warn(mp, 445 "Cannot mount filesystem with identical rtdev and ddev/logdev."); 446 error = -EINVAL; 447 goto out_close_rtdev; 448 } 449 } 450 451 /* 452 * Setup xfs_mount buffer target pointers 453 */ 454 error = -ENOMEM; 455 mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev); 456 if (!mp->m_ddev_targp) 457 goto out_close_rtdev; 458 459 if (rtdev) { 460 mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev); 461 if (!mp->m_rtdev_targp) 462 goto out_free_ddev_targ; 463 } 464 465 if (logdev && logdev != ddev) { 466 mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev); 467 if (!mp->m_logdev_targp) 468 goto out_free_rtdev_targ; 469 } else { 470 mp->m_logdev_targp = mp->m_ddev_targp; 471 } 472 473 return 0; 474 475 out_free_rtdev_targ: 476 if (mp->m_rtdev_targp) 477 xfs_free_buftarg(mp->m_rtdev_targp); 478 out_free_ddev_targ: 479 xfs_free_buftarg(mp->m_ddev_targp); 480 out_close_rtdev: 481 xfs_blkdev_put(rtdev); 482 out_close_logdev: 483 if (logdev && logdev != ddev) 484 xfs_blkdev_put(logdev); 485 return error; 486 } 487 488 /* 489 * Setup xfs_mount buffer target pointers based on superblock 490 */ 491 STATIC int 492 xfs_setup_devices( 493 struct xfs_mount *mp) 494 { 495 int error; 496 497 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize); 498 if (error) 499 return error; 500 501 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { 502 unsigned int log_sector_size = BBSIZE; 503 504 if (xfs_has_sector(mp)) 505 log_sector_size = mp->m_sb.sb_logsectsize; 506 error = xfs_setsize_buftarg(mp->m_logdev_targp, 507 log_sector_size); 508 if (error) 509 return error; 510 } 511 if (mp->m_rtdev_targp) { 512 error = xfs_setsize_buftarg(mp->m_rtdev_targp, 513 mp->m_sb.sb_sectsize); 514 if (error) 515 return error; 516 } 517 518 return 0; 519 } 520 521 STATIC int 522 xfs_init_mount_workqueues( 523 struct xfs_mount *mp) 524 { 525 mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", 526 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 527 1, mp->m_super->s_id); 528 if (!mp->m_buf_workqueue) 529 goto out; 530 531 mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", 532 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 533 0, mp->m_super->s_id); 534 if (!mp->m_unwritten_workqueue) 535 goto out_destroy_buf; 536 537 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 538 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 539 0, mp->m_super->s_id); 540 if (!mp->m_reclaim_workqueue) 541 goto out_destroy_unwritten; 542 543 mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", 544 XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 545 0, mp->m_super->s_id); 546 if (!mp->m_blockgc_wq) 547 goto out_destroy_reclaim; 548 549 mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", 550 XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 551 1, mp->m_super->s_id); 552 if (!mp->m_inodegc_wq) 553 goto out_destroy_blockgc; 554 555 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", 556 XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); 557 if (!mp->m_sync_workqueue) 558 goto out_destroy_inodegc; 559 560 return 0; 561 562 out_destroy_inodegc: 563 destroy_workqueue(mp->m_inodegc_wq); 564 out_destroy_blockgc: 565 destroy_workqueue(mp->m_blockgc_wq); 566 out_destroy_reclaim: 567 destroy_workqueue(mp->m_reclaim_workqueue); 568 out_destroy_unwritten: 569 destroy_workqueue(mp->m_unwritten_workqueue); 570 out_destroy_buf: 571 destroy_workqueue(mp->m_buf_workqueue); 572 out: 573 return -ENOMEM; 574 } 575 576 STATIC void 577 xfs_destroy_mount_workqueues( 578 struct xfs_mount *mp) 579 { 580 destroy_workqueue(mp->m_sync_workqueue); 581 destroy_workqueue(mp->m_blockgc_wq); 582 destroy_workqueue(mp->m_inodegc_wq); 583 destroy_workqueue(mp->m_reclaim_workqueue); 584 destroy_workqueue(mp->m_unwritten_workqueue); 585 destroy_workqueue(mp->m_buf_workqueue); 586 } 587 588 static void 589 xfs_flush_inodes_worker( 590 struct work_struct *work) 591 { 592 struct xfs_mount *mp = container_of(work, struct xfs_mount, 593 m_flush_inodes_work); 594 struct super_block *sb = mp->m_super; 595 596 if (down_read_trylock(&sb->s_umount)) { 597 sync_inodes_sb(sb); 598 up_read(&sb->s_umount); 599 } 600 } 601 602 /* 603 * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK 604 * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting 605 * for IO to complete so that we effectively throttle multiple callers to the 606 * rate at which IO is completing. 607 */ 608 void 609 xfs_flush_inodes( 610 struct xfs_mount *mp) 611 { 612 /* 613 * If flush_work() returns true then that means we waited for a flush 614 * which was already in progress. Don't bother running another scan. 615 */ 616 if (flush_work(&mp->m_flush_inodes_work)) 617 return; 618 619 queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); 620 flush_work(&mp->m_flush_inodes_work); 621 } 622 623 /* Catch misguided souls that try to use this interface on XFS */ 624 STATIC struct inode * 625 xfs_fs_alloc_inode( 626 struct super_block *sb) 627 { 628 BUG(); 629 return NULL; 630 } 631 632 /* 633 * Now that the generic code is guaranteed not to be accessing 634 * the linux inode, we can inactivate and reclaim the inode. 635 */ 636 STATIC void 637 xfs_fs_destroy_inode( 638 struct inode *inode) 639 { 640 struct xfs_inode *ip = XFS_I(inode); 641 642 trace_xfs_destroy_inode(ip); 643 644 ASSERT(!rwsem_is_locked(&inode->i_rwsem)); 645 XFS_STATS_INC(ip->i_mount, vn_rele); 646 XFS_STATS_INC(ip->i_mount, vn_remove); 647 xfs_inode_mark_reclaimable(ip); 648 } 649 650 static void 651 xfs_fs_dirty_inode( 652 struct inode *inode, 653 int flag) 654 { 655 struct xfs_inode *ip = XFS_I(inode); 656 struct xfs_mount *mp = ip->i_mount; 657 struct xfs_trans *tp; 658 659 if (!(inode->i_sb->s_flags & SB_LAZYTIME)) 660 return; 661 if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME)) 662 return; 663 664 if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp)) 665 return; 666 xfs_ilock(ip, XFS_ILOCK_EXCL); 667 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 668 xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); 669 xfs_trans_commit(tp); 670 } 671 672 /* 673 * Slab object creation initialisation for the XFS inode. 674 * This covers only the idempotent fields in the XFS inode; 675 * all other fields need to be initialised on allocation 676 * from the slab. This avoids the need to repeatedly initialise 677 * fields in the xfs inode that left in the initialise state 678 * when freeing the inode. 679 */ 680 STATIC void 681 xfs_fs_inode_init_once( 682 void *inode) 683 { 684 struct xfs_inode *ip = inode; 685 686 memset(ip, 0, sizeof(struct xfs_inode)); 687 688 /* vfs inode */ 689 inode_init_once(VFS_I(ip)); 690 691 /* xfs inode */ 692 atomic_set(&ip->i_pincount, 0); 693 spin_lock_init(&ip->i_flags_lock); 694 695 mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, 696 "xfsino", ip->i_ino); 697 } 698 699 /* 700 * We do an unlocked check for XFS_IDONTCACHE here because we are already 701 * serialised against cache hits here via the inode->i_lock and igrab() in 702 * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be 703 * racing with us, and it avoids needing to grab a spinlock here for every inode 704 * we drop the final reference on. 705 */ 706 STATIC int 707 xfs_fs_drop_inode( 708 struct inode *inode) 709 { 710 struct xfs_inode *ip = XFS_I(inode); 711 712 /* 713 * If this unlinked inode is in the middle of recovery, don't 714 * drop the inode just yet; log recovery will take care of 715 * that. See the comment for this inode flag. 716 */ 717 if (ip->i_flags & XFS_IRECOVERY) { 718 ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); 719 return 0; 720 } 721 722 return generic_drop_inode(inode); 723 } 724 725 static void 726 xfs_mount_free( 727 struct xfs_mount *mp) 728 { 729 kfree(mp->m_rtname); 730 kfree(mp->m_logname); 731 kmem_free(mp); 732 } 733 734 STATIC int 735 xfs_fs_sync_fs( 736 struct super_block *sb, 737 int wait) 738 { 739 struct xfs_mount *mp = XFS_M(sb); 740 int error; 741 742 trace_xfs_fs_sync_fs(mp, __return_address); 743 744 /* 745 * Doing anything during the async pass would be counterproductive. 746 */ 747 if (!wait) 748 return 0; 749 750 error = xfs_log_force(mp, XFS_LOG_SYNC); 751 if (error) 752 return error; 753 754 if (laptop_mode) { 755 /* 756 * The disk must be active because we're syncing. 757 * We schedule log work now (now that the disk is 758 * active) instead of later (when it might not be). 759 */ 760 flush_delayed_work(&mp->m_log->l_work); 761 } 762 763 /* 764 * If we are called with page faults frozen out, it means we are about 765 * to freeze the transaction subsystem. Take the opportunity to shut 766 * down inodegc because once SB_FREEZE_FS is set it's too late to 767 * prevent inactivation races with freeze. The fs doesn't get called 768 * again by the freezing process until after SB_FREEZE_FS has been set, 769 * so it's now or never. Same logic applies to speculative allocation 770 * garbage collection. 771 * 772 * We don't care if this is a normal syncfs call that does this or 773 * freeze that does this - we can run this multiple times without issue 774 * and we won't race with a restart because a restart can only occur 775 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. 776 */ 777 if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { 778 xfs_inodegc_stop(mp); 779 xfs_blockgc_stop(mp); 780 } 781 782 return 0; 783 } 784 785 STATIC int 786 xfs_fs_statfs( 787 struct dentry *dentry, 788 struct kstatfs *statp) 789 { 790 struct xfs_mount *mp = XFS_M(dentry->d_sb); 791 xfs_sb_t *sbp = &mp->m_sb; 792 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 793 uint64_t fakeinos, id; 794 uint64_t icount; 795 uint64_t ifree; 796 uint64_t fdblocks; 797 xfs_extlen_t lsize; 798 int64_t ffree; 799 800 /* Wait for whatever inactivations are in progress. */ 801 xfs_inodegc_flush(mp); 802 803 statp->f_type = XFS_SUPER_MAGIC; 804 statp->f_namelen = MAXNAMELEN - 1; 805 806 id = huge_encode_dev(mp->m_ddev_targp->bt_dev); 807 statp->f_fsid = u64_to_fsid(id); 808 809 icount = percpu_counter_sum(&mp->m_icount); 810 ifree = percpu_counter_sum(&mp->m_ifree); 811 fdblocks = percpu_counter_sum(&mp->m_fdblocks); 812 813 spin_lock(&mp->m_sb_lock); 814 statp->f_bsize = sbp->sb_blocksize; 815 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 816 statp->f_blocks = sbp->sb_dblocks - lsize; 817 spin_unlock(&mp->m_sb_lock); 818 819 /* make sure statp->f_bfree does not underflow */ 820 statp->f_bfree = max_t(int64_t, 0, 821 fdblocks - xfs_fdblocks_unavailable(mp)); 822 statp->f_bavail = statp->f_bfree; 823 824 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 825 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 826 if (M_IGEO(mp)->maxicount) 827 statp->f_files = min_t(typeof(statp->f_files), 828 statp->f_files, 829 M_IGEO(mp)->maxicount); 830 831 /* If sb_icount overshot maxicount, report actual allocation */ 832 statp->f_files = max_t(typeof(statp->f_files), 833 statp->f_files, 834 sbp->sb_icount); 835 836 /* make sure statp->f_ffree does not underflow */ 837 ffree = statp->f_files - (icount - ifree); 838 statp->f_ffree = max_t(int64_t, ffree, 0); 839 840 841 if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && 842 ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == 843 (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) 844 xfs_qm_statvfs(ip, statp); 845 846 if (XFS_IS_REALTIME_MOUNT(mp) && 847 (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) { 848 s64 freertx; 849 850 statp->f_blocks = sbp->sb_rblocks; 851 freertx = percpu_counter_sum_positive(&mp->m_frextents); 852 statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize; 853 } 854 855 return 0; 856 } 857 858 STATIC void 859 xfs_save_resvblks(struct xfs_mount *mp) 860 { 861 uint64_t resblks = 0; 862 863 mp->m_resblks_save = mp->m_resblks; 864 xfs_reserve_blocks(mp, &resblks, NULL); 865 } 866 867 STATIC void 868 xfs_restore_resvblks(struct xfs_mount *mp) 869 { 870 uint64_t resblks; 871 872 if (mp->m_resblks_save) { 873 resblks = mp->m_resblks_save; 874 mp->m_resblks_save = 0; 875 } else 876 resblks = xfs_default_resblks(mp); 877 878 xfs_reserve_blocks(mp, &resblks, NULL); 879 } 880 881 /* 882 * Second stage of a freeze. The data is already frozen so we only 883 * need to take care of the metadata. Once that's done sync the superblock 884 * to the log to dirty it in case of a crash while frozen. This ensures that we 885 * will recover the unlinked inode lists on the next mount. 886 */ 887 STATIC int 888 xfs_fs_freeze( 889 struct super_block *sb) 890 { 891 struct xfs_mount *mp = XFS_M(sb); 892 unsigned int flags; 893 int ret; 894 895 /* 896 * The filesystem is now frozen far enough that memory reclaim 897 * cannot safely operate on the filesystem. Hence we need to 898 * set a GFP_NOFS context here to avoid recursion deadlocks. 899 */ 900 flags = memalloc_nofs_save(); 901 xfs_save_resvblks(mp); 902 ret = xfs_log_quiesce(mp); 903 memalloc_nofs_restore(flags); 904 905 /* 906 * For read-write filesystems, we need to restart the inodegc on error 907 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not 908 * going to be run to restart it now. We are at SB_FREEZE_FS level 909 * here, so we can restart safely without racing with a stop in 910 * xfs_fs_sync_fs(). 911 */ 912 if (ret && !xfs_is_readonly(mp)) { 913 xfs_blockgc_start(mp); 914 xfs_inodegc_start(mp); 915 } 916 917 return ret; 918 } 919 920 STATIC int 921 xfs_fs_unfreeze( 922 struct super_block *sb) 923 { 924 struct xfs_mount *mp = XFS_M(sb); 925 926 xfs_restore_resvblks(mp); 927 xfs_log_work_queue(mp); 928 929 /* 930 * Don't reactivate the inodegc worker on a readonly filesystem because 931 * inodes are sent directly to reclaim. Don't reactivate the blockgc 932 * worker because there are no speculative preallocations on a readonly 933 * filesystem. 934 */ 935 if (!xfs_is_readonly(mp)) { 936 xfs_blockgc_start(mp); 937 xfs_inodegc_start(mp); 938 } 939 940 return 0; 941 } 942 943 /* 944 * This function fills in xfs_mount_t fields based on mount args. 945 * Note: the superblock _has_ now been read in. 946 */ 947 STATIC int 948 xfs_finish_flags( 949 struct xfs_mount *mp) 950 { 951 /* Fail a mount where the logbuf is smaller than the log stripe */ 952 if (xfs_has_logv2(mp)) { 953 if (mp->m_logbsize <= 0 && 954 mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { 955 mp->m_logbsize = mp->m_sb.sb_logsunit; 956 } else if (mp->m_logbsize > 0 && 957 mp->m_logbsize < mp->m_sb.sb_logsunit) { 958 xfs_warn(mp, 959 "logbuf size must be greater than or equal to log stripe size"); 960 return -EINVAL; 961 } 962 } else { 963 /* Fail a mount if the logbuf is larger than 32K */ 964 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 965 xfs_warn(mp, 966 "logbuf size for version 1 logs must be 16K or 32K"); 967 return -EINVAL; 968 } 969 } 970 971 /* 972 * V5 filesystems always use attr2 format for attributes. 973 */ 974 if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { 975 xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " 976 "attr2 is always enabled for V5 filesystems."); 977 return -EINVAL; 978 } 979 980 /* 981 * prohibit r/w mounts of read-only filesystems 982 */ 983 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { 984 xfs_warn(mp, 985 "cannot mount a read-only filesystem as read-write"); 986 return -EROFS; 987 } 988 989 if ((mp->m_qflags & XFS_GQUOTA_ACCT) && 990 (mp->m_qflags & XFS_PQUOTA_ACCT) && 991 !xfs_has_pquotino(mp)) { 992 xfs_warn(mp, 993 "Super block does not support project and group quota together"); 994 return -EINVAL; 995 } 996 997 return 0; 998 } 999 1000 static int 1001 xfs_init_percpu_counters( 1002 struct xfs_mount *mp) 1003 { 1004 int error; 1005 1006 error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); 1007 if (error) 1008 return -ENOMEM; 1009 1010 error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); 1011 if (error) 1012 goto free_icount; 1013 1014 error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL); 1015 if (error) 1016 goto free_ifree; 1017 1018 error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); 1019 if (error) 1020 goto free_fdblocks; 1021 1022 error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL); 1023 if (error) 1024 goto free_delalloc; 1025 1026 return 0; 1027 1028 free_delalloc: 1029 percpu_counter_destroy(&mp->m_delalloc_blks); 1030 free_fdblocks: 1031 percpu_counter_destroy(&mp->m_fdblocks); 1032 free_ifree: 1033 percpu_counter_destroy(&mp->m_ifree); 1034 free_icount: 1035 percpu_counter_destroy(&mp->m_icount); 1036 return -ENOMEM; 1037 } 1038 1039 void 1040 xfs_reinit_percpu_counters( 1041 struct xfs_mount *mp) 1042 { 1043 percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); 1044 percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); 1045 percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks); 1046 percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents); 1047 } 1048 1049 static void 1050 xfs_destroy_percpu_counters( 1051 struct xfs_mount *mp) 1052 { 1053 percpu_counter_destroy(&mp->m_icount); 1054 percpu_counter_destroy(&mp->m_ifree); 1055 percpu_counter_destroy(&mp->m_fdblocks); 1056 ASSERT(xfs_is_shutdown(mp) || 1057 percpu_counter_sum(&mp->m_delalloc_blks) == 0); 1058 percpu_counter_destroy(&mp->m_delalloc_blks); 1059 percpu_counter_destroy(&mp->m_frextents); 1060 } 1061 1062 static int 1063 xfs_inodegc_init_percpu( 1064 struct xfs_mount *mp) 1065 { 1066 struct xfs_inodegc *gc; 1067 int cpu; 1068 1069 mp->m_inodegc = alloc_percpu(struct xfs_inodegc); 1070 if (!mp->m_inodegc) 1071 return -ENOMEM; 1072 1073 for_each_possible_cpu(cpu) { 1074 gc = per_cpu_ptr(mp->m_inodegc, cpu); 1075 init_llist_head(&gc->list); 1076 gc->items = 0; 1077 INIT_WORK(&gc->work, xfs_inodegc_worker); 1078 } 1079 return 0; 1080 } 1081 1082 static void 1083 xfs_inodegc_free_percpu( 1084 struct xfs_mount *mp) 1085 { 1086 if (!mp->m_inodegc) 1087 return; 1088 free_percpu(mp->m_inodegc); 1089 } 1090 1091 static void 1092 xfs_fs_put_super( 1093 struct super_block *sb) 1094 { 1095 struct xfs_mount *mp = XFS_M(sb); 1096 1097 /* if ->fill_super failed, we have no mount to tear down */ 1098 if (!sb->s_fs_info) 1099 return; 1100 1101 xfs_notice(mp, "Unmounting Filesystem"); 1102 xfs_filestream_unmount(mp); 1103 xfs_unmountfs(mp); 1104 1105 xfs_freesb(mp); 1106 free_percpu(mp->m_stats.xs_stats); 1107 xfs_mount_list_del(mp); 1108 xfs_inodegc_free_percpu(mp); 1109 xfs_destroy_percpu_counters(mp); 1110 xfs_destroy_mount_workqueues(mp); 1111 xfs_close_devices(mp); 1112 1113 sb->s_fs_info = NULL; 1114 xfs_mount_free(mp); 1115 } 1116 1117 static long 1118 xfs_fs_nr_cached_objects( 1119 struct super_block *sb, 1120 struct shrink_control *sc) 1121 { 1122 /* Paranoia: catch incorrect calls during mount setup or teardown */ 1123 if (WARN_ON_ONCE(!sb->s_fs_info)) 1124 return 0; 1125 return xfs_reclaim_inodes_count(XFS_M(sb)); 1126 } 1127 1128 static long 1129 xfs_fs_free_cached_objects( 1130 struct super_block *sb, 1131 struct shrink_control *sc) 1132 { 1133 return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); 1134 } 1135 1136 static const struct super_operations xfs_super_operations = { 1137 .alloc_inode = xfs_fs_alloc_inode, 1138 .destroy_inode = xfs_fs_destroy_inode, 1139 .dirty_inode = xfs_fs_dirty_inode, 1140 .drop_inode = xfs_fs_drop_inode, 1141 .put_super = xfs_fs_put_super, 1142 .sync_fs = xfs_fs_sync_fs, 1143 .freeze_fs = xfs_fs_freeze, 1144 .unfreeze_fs = xfs_fs_unfreeze, 1145 .statfs = xfs_fs_statfs, 1146 .show_options = xfs_fs_show_options, 1147 .nr_cached_objects = xfs_fs_nr_cached_objects, 1148 .free_cached_objects = xfs_fs_free_cached_objects, 1149 }; 1150 1151 static int 1152 suffix_kstrtoint( 1153 const char *s, 1154 unsigned int base, 1155 int *res) 1156 { 1157 int last, shift_left_factor = 0, _res; 1158 char *value; 1159 int ret = 0; 1160 1161 value = kstrdup(s, GFP_KERNEL); 1162 if (!value) 1163 return -ENOMEM; 1164 1165 last = strlen(value) - 1; 1166 if (value[last] == 'K' || value[last] == 'k') { 1167 shift_left_factor = 10; 1168 value[last] = '\0'; 1169 } 1170 if (value[last] == 'M' || value[last] == 'm') { 1171 shift_left_factor = 20; 1172 value[last] = '\0'; 1173 } 1174 if (value[last] == 'G' || value[last] == 'g') { 1175 shift_left_factor = 30; 1176 value[last] = '\0'; 1177 } 1178 1179 if (kstrtoint(value, base, &_res)) 1180 ret = -EINVAL; 1181 kfree(value); 1182 *res = _res << shift_left_factor; 1183 return ret; 1184 } 1185 1186 static inline void 1187 xfs_fs_warn_deprecated( 1188 struct fs_context *fc, 1189 struct fs_parameter *param, 1190 uint64_t flag, 1191 bool value) 1192 { 1193 /* Don't print the warning if reconfiguring and current mount point 1194 * already had the flag set 1195 */ 1196 if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && 1197 !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) 1198 return; 1199 xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); 1200 } 1201 1202 /* 1203 * Set mount state from a mount option. 1204 * 1205 * NOTE: mp->m_super is NULL here! 1206 */ 1207 static int 1208 xfs_fs_parse_param( 1209 struct fs_context *fc, 1210 struct fs_parameter *param) 1211 { 1212 struct xfs_mount *parsing_mp = fc->s_fs_info; 1213 struct fs_parse_result result; 1214 int size = 0; 1215 int opt; 1216 1217 opt = fs_parse(fc, xfs_fs_parameters, param, &result); 1218 if (opt < 0) 1219 return opt; 1220 1221 switch (opt) { 1222 case Opt_logbufs: 1223 parsing_mp->m_logbufs = result.uint_32; 1224 return 0; 1225 case Opt_logbsize: 1226 if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) 1227 return -EINVAL; 1228 return 0; 1229 case Opt_logdev: 1230 kfree(parsing_mp->m_logname); 1231 parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); 1232 if (!parsing_mp->m_logname) 1233 return -ENOMEM; 1234 return 0; 1235 case Opt_rtdev: 1236 kfree(parsing_mp->m_rtname); 1237 parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); 1238 if (!parsing_mp->m_rtname) 1239 return -ENOMEM; 1240 return 0; 1241 case Opt_allocsize: 1242 if (suffix_kstrtoint(param->string, 10, &size)) 1243 return -EINVAL; 1244 parsing_mp->m_allocsize_log = ffs(size) - 1; 1245 parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; 1246 return 0; 1247 case Opt_grpid: 1248 case Opt_bsdgroups: 1249 parsing_mp->m_features |= XFS_FEAT_GRPID; 1250 return 0; 1251 case Opt_nogrpid: 1252 case Opt_sysvgroups: 1253 parsing_mp->m_features &= ~XFS_FEAT_GRPID; 1254 return 0; 1255 case Opt_wsync: 1256 parsing_mp->m_features |= XFS_FEAT_WSYNC; 1257 return 0; 1258 case Opt_norecovery: 1259 parsing_mp->m_features |= XFS_FEAT_NORECOVERY; 1260 return 0; 1261 case Opt_noalign: 1262 parsing_mp->m_features |= XFS_FEAT_NOALIGN; 1263 return 0; 1264 case Opt_swalloc: 1265 parsing_mp->m_features |= XFS_FEAT_SWALLOC; 1266 return 0; 1267 case Opt_sunit: 1268 parsing_mp->m_dalign = result.uint_32; 1269 return 0; 1270 case Opt_swidth: 1271 parsing_mp->m_swidth = result.uint_32; 1272 return 0; 1273 case Opt_inode32: 1274 parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; 1275 return 0; 1276 case Opt_inode64: 1277 parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1278 return 0; 1279 case Opt_nouuid: 1280 parsing_mp->m_features |= XFS_FEAT_NOUUID; 1281 return 0; 1282 case Opt_largeio: 1283 parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; 1284 return 0; 1285 case Opt_nolargeio: 1286 parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; 1287 return 0; 1288 case Opt_filestreams: 1289 parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; 1290 return 0; 1291 case Opt_noquota: 1292 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; 1293 parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; 1294 return 0; 1295 case Opt_quota: 1296 case Opt_uquota: 1297 case Opt_usrquota: 1298 parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); 1299 return 0; 1300 case Opt_qnoenforce: 1301 case Opt_uqnoenforce: 1302 parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; 1303 parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; 1304 return 0; 1305 case Opt_pquota: 1306 case Opt_prjquota: 1307 parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); 1308 return 0; 1309 case Opt_pqnoenforce: 1310 parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; 1311 parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; 1312 return 0; 1313 case Opt_gquota: 1314 case Opt_grpquota: 1315 parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); 1316 return 0; 1317 case Opt_gqnoenforce: 1318 parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; 1319 parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; 1320 return 0; 1321 case Opt_discard: 1322 parsing_mp->m_features |= XFS_FEAT_DISCARD; 1323 return 0; 1324 case Opt_nodiscard: 1325 parsing_mp->m_features &= ~XFS_FEAT_DISCARD; 1326 return 0; 1327 #ifdef CONFIG_FS_DAX 1328 case Opt_dax: 1329 xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); 1330 return 0; 1331 case Opt_dax_enum: 1332 xfs_mount_set_dax_mode(parsing_mp, result.uint_32); 1333 return 0; 1334 #endif 1335 /* Following mount options will be removed in September 2025 */ 1336 case Opt_ikeep: 1337 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); 1338 parsing_mp->m_features |= XFS_FEAT_IKEEP; 1339 return 0; 1340 case Opt_noikeep: 1341 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); 1342 parsing_mp->m_features &= ~XFS_FEAT_IKEEP; 1343 return 0; 1344 case Opt_attr2: 1345 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); 1346 parsing_mp->m_features |= XFS_FEAT_ATTR2; 1347 return 0; 1348 case Opt_noattr2: 1349 xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); 1350 parsing_mp->m_features |= XFS_FEAT_NOATTR2; 1351 return 0; 1352 default: 1353 xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); 1354 return -EINVAL; 1355 } 1356 1357 return 0; 1358 } 1359 1360 static int 1361 xfs_fs_validate_params( 1362 struct xfs_mount *mp) 1363 { 1364 /* No recovery flag requires a read-only mount */ 1365 if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { 1366 xfs_warn(mp, "no-recovery mounts must be read-only."); 1367 return -EINVAL; 1368 } 1369 1370 /* 1371 * We have not read the superblock at this point, so only the attr2 1372 * mount option can set the attr2 feature by this stage. 1373 */ 1374 if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { 1375 xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); 1376 return -EINVAL; 1377 } 1378 1379 1380 if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { 1381 xfs_warn(mp, 1382 "sunit and swidth options incompatible with the noalign option"); 1383 return -EINVAL; 1384 } 1385 1386 if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) { 1387 xfs_warn(mp, "quota support not available in this kernel."); 1388 return -EINVAL; 1389 } 1390 1391 if ((mp->m_dalign && !mp->m_swidth) || 1392 (!mp->m_dalign && mp->m_swidth)) { 1393 xfs_warn(mp, "sunit and swidth must be specified together"); 1394 return -EINVAL; 1395 } 1396 1397 if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { 1398 xfs_warn(mp, 1399 "stripe width (%d) must be a multiple of the stripe unit (%d)", 1400 mp->m_swidth, mp->m_dalign); 1401 return -EINVAL; 1402 } 1403 1404 if (mp->m_logbufs != -1 && 1405 mp->m_logbufs != 0 && 1406 (mp->m_logbufs < XLOG_MIN_ICLOGS || 1407 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 1408 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", 1409 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 1410 return -EINVAL; 1411 } 1412 1413 if (mp->m_logbsize != -1 && 1414 mp->m_logbsize != 0 && 1415 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 1416 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 1417 !is_power_of_2(mp->m_logbsize))) { 1418 xfs_warn(mp, 1419 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 1420 mp->m_logbsize); 1421 return -EINVAL; 1422 } 1423 1424 if (xfs_has_allocsize(mp) && 1425 (mp->m_allocsize_log > XFS_MAX_IO_LOG || 1426 mp->m_allocsize_log < XFS_MIN_IO_LOG)) { 1427 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", 1428 mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); 1429 return -EINVAL; 1430 } 1431 1432 return 0; 1433 } 1434 1435 static int 1436 xfs_fs_fill_super( 1437 struct super_block *sb, 1438 struct fs_context *fc) 1439 { 1440 struct xfs_mount *mp = sb->s_fs_info; 1441 struct inode *root; 1442 int flags = 0, error; 1443 1444 mp->m_super = sb; 1445 1446 error = xfs_fs_validate_params(mp); 1447 if (error) 1448 goto out_free_names; 1449 1450 sb_min_blocksize(sb, BBSIZE); 1451 sb->s_xattr = xfs_xattr_handlers; 1452 sb->s_export_op = &xfs_export_operations; 1453 #ifdef CONFIG_XFS_QUOTA 1454 sb->s_qcop = &xfs_quotactl_operations; 1455 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; 1456 #endif 1457 sb->s_op = &xfs_super_operations; 1458 1459 /* 1460 * Delay mount work if the debug hook is set. This is debug 1461 * instrumention to coordinate simulation of xfs mount failures with 1462 * VFS superblock operations 1463 */ 1464 if (xfs_globals.mount_delay) { 1465 xfs_notice(mp, "Delaying mount for %d seconds.", 1466 xfs_globals.mount_delay); 1467 msleep(xfs_globals.mount_delay * 1000); 1468 } 1469 1470 if (fc->sb_flags & SB_SILENT) 1471 flags |= XFS_MFSI_QUIET; 1472 1473 error = xfs_open_devices(mp); 1474 if (error) 1475 goto out_free_names; 1476 1477 error = xfs_init_mount_workqueues(mp); 1478 if (error) 1479 goto out_close_devices; 1480 1481 error = xfs_init_percpu_counters(mp); 1482 if (error) 1483 goto out_destroy_workqueues; 1484 1485 error = xfs_inodegc_init_percpu(mp); 1486 if (error) 1487 goto out_destroy_counters; 1488 1489 /* 1490 * All percpu data structures requiring cleanup when a cpu goes offline 1491 * must be allocated before adding this @mp to the cpu-dead handler's 1492 * mount list. 1493 */ 1494 xfs_mount_list_add(mp); 1495 1496 /* Allocate stats memory before we do operations that might use it */ 1497 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); 1498 if (!mp->m_stats.xs_stats) { 1499 error = -ENOMEM; 1500 goto out_destroy_inodegc; 1501 } 1502 1503 error = xfs_readsb(mp, flags); 1504 if (error) 1505 goto out_free_stats; 1506 1507 error = xfs_finish_flags(mp); 1508 if (error) 1509 goto out_free_sb; 1510 1511 error = xfs_setup_devices(mp); 1512 if (error) 1513 goto out_free_sb; 1514 1515 /* V4 support is undergoing deprecation. */ 1516 if (!xfs_has_crc(mp)) { 1517 #ifdef CONFIG_XFS_SUPPORT_V4 1518 xfs_warn_once(mp, 1519 "Deprecated V4 format (crc=0) will not be supported after September 2030."); 1520 #else 1521 xfs_warn(mp, 1522 "Deprecated V4 format (crc=0) not supported by kernel."); 1523 error = -EINVAL; 1524 goto out_free_sb; 1525 #endif 1526 } 1527 1528 /* Filesystem claims it needs repair, so refuse the mount. */ 1529 if (xfs_has_needsrepair(mp)) { 1530 xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); 1531 error = -EFSCORRUPTED; 1532 goto out_free_sb; 1533 } 1534 1535 /* 1536 * Don't touch the filesystem if a user tool thinks it owns the primary 1537 * superblock. mkfs doesn't clear the flag from secondary supers, so 1538 * we don't check them at all. 1539 */ 1540 if (mp->m_sb.sb_inprogress) { 1541 xfs_warn(mp, "Offline file system operation in progress!"); 1542 error = -EFSCORRUPTED; 1543 goto out_free_sb; 1544 } 1545 1546 /* 1547 * Until this is fixed only page-sized or smaller data blocks work. 1548 */ 1549 if (mp->m_sb.sb_blocksize > PAGE_SIZE) { 1550 xfs_warn(mp, 1551 "File system with blocksize %d bytes. " 1552 "Only pagesize (%ld) or less will currently work.", 1553 mp->m_sb.sb_blocksize, PAGE_SIZE); 1554 error = -ENOSYS; 1555 goto out_free_sb; 1556 } 1557 1558 /* Ensure this filesystem fits in the page cache limits */ 1559 if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || 1560 xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { 1561 xfs_warn(mp, 1562 "file system too large to be mounted on this system."); 1563 error = -EFBIG; 1564 goto out_free_sb; 1565 } 1566 1567 /* 1568 * XFS block mappings use 54 bits to store the logical block offset. 1569 * This should suffice to handle the maximum file size that the VFS 1570 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT 1571 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes 1572 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON 1573 * to check this assertion. 1574 * 1575 * Avoid integer overflow by comparing the maximum bmbt offset to the 1576 * maximum pagecache offset in units of fs blocks. 1577 */ 1578 if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { 1579 xfs_warn(mp, 1580 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", 1581 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), 1582 XFS_MAX_FILEOFF); 1583 error = -EINVAL; 1584 goto out_free_sb; 1585 } 1586 1587 error = xfs_filestream_mount(mp); 1588 if (error) 1589 goto out_free_sb; 1590 1591 /* 1592 * we must configure the block size in the superblock before we run the 1593 * full mount process as the mount process can lookup and cache inodes. 1594 */ 1595 sb->s_magic = XFS_SUPER_MAGIC; 1596 sb->s_blocksize = mp->m_sb.sb_blocksize; 1597 sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; 1598 sb->s_maxbytes = MAX_LFS_FILESIZE; 1599 sb->s_max_links = XFS_MAXLINK; 1600 sb->s_time_gran = 1; 1601 if (xfs_has_bigtime(mp)) { 1602 sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); 1603 sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); 1604 } else { 1605 sb->s_time_min = XFS_LEGACY_TIME_MIN; 1606 sb->s_time_max = XFS_LEGACY_TIME_MAX; 1607 } 1608 trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); 1609 sb->s_iflags |= SB_I_CGROUPWB; 1610 1611 set_posix_acl_flag(sb); 1612 1613 /* version 5 superblocks support inode version counters. */ 1614 if (xfs_has_crc(mp)) 1615 sb->s_flags |= SB_I_VERSION; 1616 1617 if (xfs_has_dax_always(mp)) { 1618 error = xfs_setup_dax_always(mp); 1619 if (error) 1620 goto out_filestream_unmount; 1621 } 1622 1623 if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { 1624 xfs_warn(mp, 1625 "mounting with \"discard\" option, but the device does not support discard"); 1626 mp->m_features &= ~XFS_FEAT_DISCARD; 1627 } 1628 1629 if (xfs_has_reflink(mp)) { 1630 if (mp->m_sb.sb_rblocks) { 1631 xfs_alert(mp, 1632 "reflink not compatible with realtime device!"); 1633 error = -EINVAL; 1634 goto out_filestream_unmount; 1635 } 1636 1637 if (xfs_globals.always_cow) { 1638 xfs_info(mp, "using DEBUG-only always_cow mode."); 1639 mp->m_always_cow = true; 1640 } 1641 } 1642 1643 if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { 1644 xfs_alert(mp, 1645 "reverse mapping btree not compatible with realtime device!"); 1646 error = -EINVAL; 1647 goto out_filestream_unmount; 1648 } 1649 1650 if (xfs_has_large_extent_counts(mp)) 1651 xfs_warn(mp, 1652 "EXPERIMENTAL Large extent counts feature in use. Use at your own risk!"); 1653 1654 error = xfs_mountfs(mp); 1655 if (error) 1656 goto out_filestream_unmount; 1657 1658 root = igrab(VFS_I(mp->m_rootip)); 1659 if (!root) { 1660 error = -ENOENT; 1661 goto out_unmount; 1662 } 1663 sb->s_root = d_make_root(root); 1664 if (!sb->s_root) { 1665 error = -ENOMEM; 1666 goto out_unmount; 1667 } 1668 1669 return 0; 1670 1671 out_filestream_unmount: 1672 xfs_filestream_unmount(mp); 1673 out_free_sb: 1674 xfs_freesb(mp); 1675 out_free_stats: 1676 free_percpu(mp->m_stats.xs_stats); 1677 out_destroy_inodegc: 1678 xfs_mount_list_del(mp); 1679 xfs_inodegc_free_percpu(mp); 1680 out_destroy_counters: 1681 xfs_destroy_percpu_counters(mp); 1682 out_destroy_workqueues: 1683 xfs_destroy_mount_workqueues(mp); 1684 out_close_devices: 1685 xfs_close_devices(mp); 1686 out_free_names: 1687 sb->s_fs_info = NULL; 1688 xfs_mount_free(mp); 1689 return error; 1690 1691 out_unmount: 1692 xfs_filestream_unmount(mp); 1693 xfs_unmountfs(mp); 1694 goto out_free_sb; 1695 } 1696 1697 static int 1698 xfs_fs_get_tree( 1699 struct fs_context *fc) 1700 { 1701 return get_tree_bdev(fc, xfs_fs_fill_super); 1702 } 1703 1704 static int 1705 xfs_remount_rw( 1706 struct xfs_mount *mp) 1707 { 1708 struct xfs_sb *sbp = &mp->m_sb; 1709 int error; 1710 1711 if (xfs_has_norecovery(mp)) { 1712 xfs_warn(mp, 1713 "ro->rw transition prohibited on norecovery mount"); 1714 return -EINVAL; 1715 } 1716 1717 if (xfs_sb_is_v5(sbp) && 1718 xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { 1719 xfs_warn(mp, 1720 "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", 1721 (sbp->sb_features_ro_compat & 1722 XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); 1723 return -EINVAL; 1724 } 1725 1726 clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1727 1728 /* 1729 * If this is the first remount to writeable state we might have some 1730 * superblock changes to update. 1731 */ 1732 if (mp->m_update_sb) { 1733 error = xfs_sync_sb(mp, false); 1734 if (error) { 1735 xfs_warn(mp, "failed to write sb changes"); 1736 return error; 1737 } 1738 mp->m_update_sb = false; 1739 } 1740 1741 /* 1742 * Fill out the reserve pool if it is empty. Use the stashed value if 1743 * it is non-zero, otherwise go with the default. 1744 */ 1745 xfs_restore_resvblks(mp); 1746 xfs_log_work_queue(mp); 1747 xfs_blockgc_start(mp); 1748 1749 /* Create the per-AG metadata reservation pool .*/ 1750 error = xfs_fs_reserve_ag_blocks(mp); 1751 if (error && error != -ENOSPC) 1752 return error; 1753 1754 /* Re-enable the background inode inactivation worker. */ 1755 xfs_inodegc_start(mp); 1756 1757 return 0; 1758 } 1759 1760 static int 1761 xfs_remount_ro( 1762 struct xfs_mount *mp) 1763 { 1764 struct xfs_icwalk icw = { 1765 .icw_flags = XFS_ICWALK_FLAG_SYNC, 1766 }; 1767 int error; 1768 1769 /* Flush all the dirty data to disk. */ 1770 error = sync_filesystem(mp->m_super); 1771 if (error) 1772 return error; 1773 1774 /* 1775 * Cancel background eofb scanning so it cannot race with the final 1776 * log force+buftarg wait and deadlock the remount. 1777 */ 1778 xfs_blockgc_stop(mp); 1779 1780 /* 1781 * Clear out all remaining COW staging extents and speculative post-EOF 1782 * preallocations so that we don't leave inodes requiring inactivation 1783 * cleanups during reclaim on a read-only mount. We must process every 1784 * cached inode, so this requires a synchronous cache scan. 1785 */ 1786 error = xfs_blockgc_free_space(mp, &icw); 1787 if (error) { 1788 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1789 return error; 1790 } 1791 1792 /* 1793 * Stop the inodegc background worker. xfs_fs_reconfigure already 1794 * flushed all pending inodegc work when it sync'd the filesystem. 1795 * The VFS holds s_umount, so we know that inodes cannot enter 1796 * xfs_fs_destroy_inode during a remount operation. In readonly mode 1797 * we send inodes straight to reclaim, so no inodes will be queued. 1798 */ 1799 xfs_inodegc_stop(mp); 1800 1801 /* Free the per-AG metadata reservation pool. */ 1802 error = xfs_fs_unreserve_ag_blocks(mp); 1803 if (error) { 1804 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1805 return error; 1806 } 1807 1808 /* 1809 * Before we sync the metadata, we need to free up the reserve block 1810 * pool so that the used block count in the superblock on disk is 1811 * correct at the end of the remount. Stash the current* reserve pool 1812 * size so that if we get remounted rw, we can return it to the same 1813 * size. 1814 */ 1815 xfs_save_resvblks(mp); 1816 1817 xfs_log_clean(mp); 1818 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1819 1820 return 0; 1821 } 1822 1823 /* 1824 * Logically we would return an error here to prevent users from believing 1825 * they might have changed mount options using remount which can't be changed. 1826 * 1827 * But unfortunately mount(8) adds all options from mtab and fstab to the mount 1828 * arguments in some cases so we can't blindly reject options, but have to 1829 * check for each specified option if it actually differs from the currently 1830 * set option and only reject it if that's the case. 1831 * 1832 * Until that is implemented we return success for every remount request, and 1833 * silently ignore all options that we can't actually change. 1834 */ 1835 static int 1836 xfs_fs_reconfigure( 1837 struct fs_context *fc) 1838 { 1839 struct xfs_mount *mp = XFS_M(fc->root->d_sb); 1840 struct xfs_mount *new_mp = fc->s_fs_info; 1841 int flags = fc->sb_flags; 1842 int error; 1843 1844 /* version 5 superblocks always support version counters. */ 1845 if (xfs_has_crc(mp)) 1846 fc->sb_flags |= SB_I_VERSION; 1847 1848 error = xfs_fs_validate_params(new_mp); 1849 if (error) 1850 return error; 1851 1852 /* inode32 -> inode64 */ 1853 if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { 1854 mp->m_features &= ~XFS_FEAT_SMALL_INUMS; 1855 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1856 } 1857 1858 /* inode64 -> inode32 */ 1859 if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { 1860 mp->m_features |= XFS_FEAT_SMALL_INUMS; 1861 mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); 1862 } 1863 1864 /* ro -> rw */ 1865 if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { 1866 error = xfs_remount_rw(mp); 1867 if (error) 1868 return error; 1869 } 1870 1871 /* rw -> ro */ 1872 if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { 1873 error = xfs_remount_ro(mp); 1874 if (error) 1875 return error; 1876 } 1877 1878 return 0; 1879 } 1880 1881 static void xfs_fs_free( 1882 struct fs_context *fc) 1883 { 1884 struct xfs_mount *mp = fc->s_fs_info; 1885 1886 /* 1887 * mp is stored in the fs_context when it is initialized. 1888 * mp is transferred to the superblock on a successful mount, 1889 * but if an error occurs before the transfer we have to free 1890 * it here. 1891 */ 1892 if (mp) 1893 xfs_mount_free(mp); 1894 } 1895 1896 static const struct fs_context_operations xfs_context_ops = { 1897 .parse_param = xfs_fs_parse_param, 1898 .get_tree = xfs_fs_get_tree, 1899 .reconfigure = xfs_fs_reconfigure, 1900 .free = xfs_fs_free, 1901 }; 1902 1903 static int xfs_init_fs_context( 1904 struct fs_context *fc) 1905 { 1906 struct xfs_mount *mp; 1907 1908 mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO); 1909 if (!mp) 1910 return -ENOMEM; 1911 1912 spin_lock_init(&mp->m_sb_lock); 1913 spin_lock_init(&mp->m_agirotor_lock); 1914 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1915 spin_lock_init(&mp->m_perag_lock); 1916 mutex_init(&mp->m_growlock); 1917 INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); 1918 INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); 1919 mp->m_kobj.kobject.kset = xfs_kset; 1920 /* 1921 * We don't create the finobt per-ag space reservation until after log 1922 * recovery, so we must set this to true so that an ifree transaction 1923 * started during log recovery will not depend on space reservations 1924 * for finobt expansion. 1925 */ 1926 mp->m_finobt_nores = true; 1927 1928 /* 1929 * These can be overridden by the mount option parsing. 1930 */ 1931 mp->m_logbufs = -1; 1932 mp->m_logbsize = -1; 1933 mp->m_allocsize_log = 16; /* 64k */ 1934 1935 /* 1936 * Copy binary VFS mount flags we are interested in. 1937 */ 1938 if (fc->sb_flags & SB_RDONLY) 1939 set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); 1940 if (fc->sb_flags & SB_DIRSYNC) 1941 mp->m_features |= XFS_FEAT_DIRSYNC; 1942 if (fc->sb_flags & SB_SYNCHRONOUS) 1943 mp->m_features |= XFS_FEAT_WSYNC; 1944 1945 fc->s_fs_info = mp; 1946 fc->ops = &xfs_context_ops; 1947 1948 return 0; 1949 } 1950 1951 static struct file_system_type xfs_fs_type = { 1952 .owner = THIS_MODULE, 1953 .name = "xfs", 1954 .init_fs_context = xfs_init_fs_context, 1955 .parameters = xfs_fs_parameters, 1956 .kill_sb = kill_block_super, 1957 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 1958 }; 1959 MODULE_ALIAS_FS("xfs"); 1960 1961 STATIC int __init 1962 xfs_init_caches(void) 1963 { 1964 int error; 1965 1966 xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", 1967 sizeof(struct xlog_ticket), 1968 0, 0, NULL); 1969 if (!xfs_log_ticket_cache) 1970 goto out; 1971 1972 error = xfs_btree_init_cur_caches(); 1973 if (error) 1974 goto out_destroy_log_ticket_cache; 1975 1976 error = xfs_defer_init_item_caches(); 1977 if (error) 1978 goto out_destroy_btree_cur_cache; 1979 1980 xfs_da_state_cache = kmem_cache_create("xfs_da_state", 1981 sizeof(struct xfs_da_state), 1982 0, 0, NULL); 1983 if (!xfs_da_state_cache) 1984 goto out_destroy_defer_item_cache; 1985 1986 xfs_ifork_cache = kmem_cache_create("xfs_ifork", 1987 sizeof(struct xfs_ifork), 1988 0, 0, NULL); 1989 if (!xfs_ifork_cache) 1990 goto out_destroy_da_state_cache; 1991 1992 xfs_trans_cache = kmem_cache_create("xfs_trans", 1993 sizeof(struct xfs_trans), 1994 0, 0, NULL); 1995 if (!xfs_trans_cache) 1996 goto out_destroy_ifork_cache; 1997 1998 1999 /* 2000 * The size of the cache-allocated buf log item is the maximum 2001 * size possible under XFS. This wastes a little bit of memory, 2002 * but it is much faster. 2003 */ 2004 xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", 2005 sizeof(struct xfs_buf_log_item), 2006 0, 0, NULL); 2007 if (!xfs_buf_item_cache) 2008 goto out_destroy_trans_cache; 2009 2010 xfs_efd_cache = kmem_cache_create("xfs_efd_item", 2011 (sizeof(struct xfs_efd_log_item) + 2012 (XFS_EFD_MAX_FAST_EXTENTS - 1) * 2013 sizeof(struct xfs_extent)), 2014 0, 0, NULL); 2015 if (!xfs_efd_cache) 2016 goto out_destroy_buf_item_cache; 2017 2018 xfs_efi_cache = kmem_cache_create("xfs_efi_item", 2019 (sizeof(struct xfs_efi_log_item) + 2020 (XFS_EFI_MAX_FAST_EXTENTS - 1) * 2021 sizeof(struct xfs_extent)), 2022 0, 0, NULL); 2023 if (!xfs_efi_cache) 2024 goto out_destroy_efd_cache; 2025 2026 xfs_inode_cache = kmem_cache_create("xfs_inode", 2027 sizeof(struct xfs_inode), 0, 2028 (SLAB_HWCACHE_ALIGN | 2029 SLAB_RECLAIM_ACCOUNT | 2030 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 2031 xfs_fs_inode_init_once); 2032 if (!xfs_inode_cache) 2033 goto out_destroy_efi_cache; 2034 2035 xfs_ili_cache = kmem_cache_create("xfs_ili", 2036 sizeof(struct xfs_inode_log_item), 0, 2037 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, 2038 NULL); 2039 if (!xfs_ili_cache) 2040 goto out_destroy_inode_cache; 2041 2042 xfs_icreate_cache = kmem_cache_create("xfs_icr", 2043 sizeof(struct xfs_icreate_item), 2044 0, 0, NULL); 2045 if (!xfs_icreate_cache) 2046 goto out_destroy_ili_cache; 2047 2048 xfs_rud_cache = kmem_cache_create("xfs_rud_item", 2049 sizeof(struct xfs_rud_log_item), 2050 0, 0, NULL); 2051 if (!xfs_rud_cache) 2052 goto out_destroy_icreate_cache; 2053 2054 xfs_rui_cache = kmem_cache_create("xfs_rui_item", 2055 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 2056 0, 0, NULL); 2057 if (!xfs_rui_cache) 2058 goto out_destroy_rud_cache; 2059 2060 xfs_cud_cache = kmem_cache_create("xfs_cud_item", 2061 sizeof(struct xfs_cud_log_item), 2062 0, 0, NULL); 2063 if (!xfs_cud_cache) 2064 goto out_destroy_rui_cache; 2065 2066 xfs_cui_cache = kmem_cache_create("xfs_cui_item", 2067 xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 2068 0, 0, NULL); 2069 if (!xfs_cui_cache) 2070 goto out_destroy_cud_cache; 2071 2072 xfs_bud_cache = kmem_cache_create("xfs_bud_item", 2073 sizeof(struct xfs_bud_log_item), 2074 0, 0, NULL); 2075 if (!xfs_bud_cache) 2076 goto out_destroy_cui_cache; 2077 2078 xfs_bui_cache = kmem_cache_create("xfs_bui_item", 2079 xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 2080 0, 0, NULL); 2081 if (!xfs_bui_cache) 2082 goto out_destroy_bud_cache; 2083 2084 xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", 2085 sizeof(struct xfs_attrd_log_item), 2086 0, 0, NULL); 2087 if (!xfs_attrd_cache) 2088 goto out_destroy_bui_cache; 2089 2090 xfs_attri_cache = kmem_cache_create("xfs_attri_item", 2091 sizeof(struct xfs_attri_log_item), 2092 0, 0, NULL); 2093 if (!xfs_attri_cache) 2094 goto out_destroy_attrd_cache; 2095 2096 return 0; 2097 2098 out_destroy_attrd_cache: 2099 kmem_cache_destroy(xfs_attrd_cache); 2100 out_destroy_bui_cache: 2101 kmem_cache_destroy(xfs_bui_cache); 2102 out_destroy_bud_cache: 2103 kmem_cache_destroy(xfs_bud_cache); 2104 out_destroy_cui_cache: 2105 kmem_cache_destroy(xfs_cui_cache); 2106 out_destroy_cud_cache: 2107 kmem_cache_destroy(xfs_cud_cache); 2108 out_destroy_rui_cache: 2109 kmem_cache_destroy(xfs_rui_cache); 2110 out_destroy_rud_cache: 2111 kmem_cache_destroy(xfs_rud_cache); 2112 out_destroy_icreate_cache: 2113 kmem_cache_destroy(xfs_icreate_cache); 2114 out_destroy_ili_cache: 2115 kmem_cache_destroy(xfs_ili_cache); 2116 out_destroy_inode_cache: 2117 kmem_cache_destroy(xfs_inode_cache); 2118 out_destroy_efi_cache: 2119 kmem_cache_destroy(xfs_efi_cache); 2120 out_destroy_efd_cache: 2121 kmem_cache_destroy(xfs_efd_cache); 2122 out_destroy_buf_item_cache: 2123 kmem_cache_destroy(xfs_buf_item_cache); 2124 out_destroy_trans_cache: 2125 kmem_cache_destroy(xfs_trans_cache); 2126 out_destroy_ifork_cache: 2127 kmem_cache_destroy(xfs_ifork_cache); 2128 out_destroy_da_state_cache: 2129 kmem_cache_destroy(xfs_da_state_cache); 2130 out_destroy_defer_item_cache: 2131 xfs_defer_destroy_item_caches(); 2132 out_destroy_btree_cur_cache: 2133 xfs_btree_destroy_cur_caches(); 2134 out_destroy_log_ticket_cache: 2135 kmem_cache_destroy(xfs_log_ticket_cache); 2136 out: 2137 return -ENOMEM; 2138 } 2139 2140 STATIC void 2141 xfs_destroy_caches(void) 2142 { 2143 /* 2144 * Make sure all delayed rcu free are flushed before we 2145 * destroy caches. 2146 */ 2147 rcu_barrier(); 2148 kmem_cache_destroy(xfs_attri_cache); 2149 kmem_cache_destroy(xfs_attrd_cache); 2150 kmem_cache_destroy(xfs_bui_cache); 2151 kmem_cache_destroy(xfs_bud_cache); 2152 kmem_cache_destroy(xfs_cui_cache); 2153 kmem_cache_destroy(xfs_cud_cache); 2154 kmem_cache_destroy(xfs_rui_cache); 2155 kmem_cache_destroy(xfs_rud_cache); 2156 kmem_cache_destroy(xfs_icreate_cache); 2157 kmem_cache_destroy(xfs_ili_cache); 2158 kmem_cache_destroy(xfs_inode_cache); 2159 kmem_cache_destroy(xfs_efi_cache); 2160 kmem_cache_destroy(xfs_efd_cache); 2161 kmem_cache_destroy(xfs_buf_item_cache); 2162 kmem_cache_destroy(xfs_trans_cache); 2163 kmem_cache_destroy(xfs_ifork_cache); 2164 kmem_cache_destroy(xfs_da_state_cache); 2165 xfs_defer_destroy_item_caches(); 2166 xfs_btree_destroy_cur_caches(); 2167 kmem_cache_destroy(xfs_log_ticket_cache); 2168 } 2169 2170 STATIC int __init 2171 xfs_init_workqueues(void) 2172 { 2173 /* 2174 * The allocation workqueue can be used in memory reclaim situations 2175 * (writepage path), and parallelism is only limited by the number of 2176 * AGs in all the filesystems mounted. Hence use the default large 2177 * max_active value for this workqueue. 2178 */ 2179 xfs_alloc_wq = alloc_workqueue("xfsalloc", 2180 XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0); 2181 if (!xfs_alloc_wq) 2182 return -ENOMEM; 2183 2184 xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 2185 0); 2186 if (!xfs_discard_wq) 2187 goto out_free_alloc_wq; 2188 2189 return 0; 2190 out_free_alloc_wq: 2191 destroy_workqueue(xfs_alloc_wq); 2192 return -ENOMEM; 2193 } 2194 2195 STATIC void 2196 xfs_destroy_workqueues(void) 2197 { 2198 destroy_workqueue(xfs_discard_wq); 2199 destroy_workqueue(xfs_alloc_wq); 2200 } 2201 2202 #ifdef CONFIG_HOTPLUG_CPU 2203 static int 2204 xfs_cpu_dead( 2205 unsigned int cpu) 2206 { 2207 struct xfs_mount *mp, *n; 2208 2209 spin_lock(&xfs_mount_list_lock); 2210 list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { 2211 spin_unlock(&xfs_mount_list_lock); 2212 xfs_inodegc_cpu_dead(mp, cpu); 2213 spin_lock(&xfs_mount_list_lock); 2214 } 2215 spin_unlock(&xfs_mount_list_lock); 2216 return 0; 2217 } 2218 2219 static int __init 2220 xfs_cpu_hotplug_init(void) 2221 { 2222 int error; 2223 2224 error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, 2225 xfs_cpu_dead); 2226 if (error < 0) 2227 xfs_alert(NULL, 2228 "Failed to initialise CPU hotplug, error %d. XFS is non-functional.", 2229 error); 2230 return error; 2231 } 2232 2233 static void 2234 xfs_cpu_hotplug_destroy(void) 2235 { 2236 cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); 2237 } 2238 2239 #else /* !CONFIG_HOTPLUG_CPU */ 2240 static inline int xfs_cpu_hotplug_init(void) { return 0; } 2241 static inline void xfs_cpu_hotplug_destroy(void) {} 2242 #endif 2243 2244 STATIC int __init 2245 init_xfs_fs(void) 2246 { 2247 int error; 2248 2249 xfs_check_ondisk_structs(); 2250 2251 printk(KERN_INFO XFS_VERSION_STRING " with " 2252 XFS_BUILD_OPTIONS " enabled\n"); 2253 2254 xfs_dir_startup(); 2255 2256 error = xfs_cpu_hotplug_init(); 2257 if (error) 2258 goto out; 2259 2260 error = xfs_init_caches(); 2261 if (error) 2262 goto out_destroy_hp; 2263 2264 error = xfs_init_workqueues(); 2265 if (error) 2266 goto out_destroy_caches; 2267 2268 error = xfs_mru_cache_init(); 2269 if (error) 2270 goto out_destroy_wq; 2271 2272 error = xfs_buf_init(); 2273 if (error) 2274 goto out_mru_cache_uninit; 2275 2276 error = xfs_init_procfs(); 2277 if (error) 2278 goto out_buf_terminate; 2279 2280 error = xfs_sysctl_register(); 2281 if (error) 2282 goto out_cleanup_procfs; 2283 2284 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 2285 if (!xfs_kset) { 2286 error = -ENOMEM; 2287 goto out_sysctl_unregister; 2288 } 2289 2290 xfsstats.xs_kobj.kobject.kset = xfs_kset; 2291 2292 xfsstats.xs_stats = alloc_percpu(struct xfsstats); 2293 if (!xfsstats.xs_stats) { 2294 error = -ENOMEM; 2295 goto out_kset_unregister; 2296 } 2297 2298 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, 2299 "stats"); 2300 if (error) 2301 goto out_free_stats; 2302 2303 #ifdef DEBUG 2304 xfs_dbg_kobj.kobject.kset = xfs_kset; 2305 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 2306 if (error) 2307 goto out_remove_stats_kobj; 2308 #endif 2309 2310 error = xfs_qm_init(); 2311 if (error) 2312 goto out_remove_dbg_kobj; 2313 2314 error = register_filesystem(&xfs_fs_type); 2315 if (error) 2316 goto out_qm_exit; 2317 return 0; 2318 2319 out_qm_exit: 2320 xfs_qm_exit(); 2321 out_remove_dbg_kobj: 2322 #ifdef DEBUG 2323 xfs_sysfs_del(&xfs_dbg_kobj); 2324 out_remove_stats_kobj: 2325 #endif 2326 xfs_sysfs_del(&xfsstats.xs_kobj); 2327 out_free_stats: 2328 free_percpu(xfsstats.xs_stats); 2329 out_kset_unregister: 2330 kset_unregister(xfs_kset); 2331 out_sysctl_unregister: 2332 xfs_sysctl_unregister(); 2333 out_cleanup_procfs: 2334 xfs_cleanup_procfs(); 2335 out_buf_terminate: 2336 xfs_buf_terminate(); 2337 out_mru_cache_uninit: 2338 xfs_mru_cache_uninit(); 2339 out_destroy_wq: 2340 xfs_destroy_workqueues(); 2341 out_destroy_caches: 2342 xfs_destroy_caches(); 2343 out_destroy_hp: 2344 xfs_cpu_hotplug_destroy(); 2345 out: 2346 return error; 2347 } 2348 2349 STATIC void __exit 2350 exit_xfs_fs(void) 2351 { 2352 xfs_qm_exit(); 2353 unregister_filesystem(&xfs_fs_type); 2354 #ifdef DEBUG 2355 xfs_sysfs_del(&xfs_dbg_kobj); 2356 #endif 2357 xfs_sysfs_del(&xfsstats.xs_kobj); 2358 free_percpu(xfsstats.xs_stats); 2359 kset_unregister(xfs_kset); 2360 xfs_sysctl_unregister(); 2361 xfs_cleanup_procfs(); 2362 xfs_buf_terminate(); 2363 xfs_mru_cache_uninit(); 2364 xfs_destroy_workqueues(); 2365 xfs_destroy_caches(); 2366 xfs_uuid_table_free(); 2367 xfs_cpu_hotplug_destroy(); 2368 } 2369 2370 module_init(init_xfs_fs); 2371 module_exit(exit_xfs_fs); 2372 2373 MODULE_AUTHOR("Silicon Graphics, Inc."); 2374 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); 2375 MODULE_LICENSE("GPL"); 2376