10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 37b718769SNathan Scott * Copyright (c) 2000-2005 Silicon Graphics, Inc. 47b718769SNathan Scott * All Rights Reserved. 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds #include "xfs.h" 7a844f451SNathan Scott #include "xfs_fs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10a4fbe6abSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 121da177e4SLinus Torvalds #include "xfs_sb.h" 131da177e4SLinus Torvalds #include "xfs_mount.h" 14239880efSDave Chinner #include "xfs_trans.h" 151da177e4SLinus Torvalds #include "xfs_error.h" 161da177e4SLinus Torvalds #include "xfs_alloc.h" 171da177e4SLinus Torvalds #include "xfs_fsops.h" 181da177e4SLinus Torvalds #include "xfs_trans_space.h" 19239880efSDave Chinner #include "xfs_log.h" 2041e63621SDave Chinner #include "xfs_log_priv.h" 21b16817b6SDave Chinner #include "xfs_ag.h" 2284d69619SDarrick J. Wong #include "xfs_ag_resv.h" 237f89c838SDarrick J. Wong #include "xfs_trace.h" 241da177e4SLinus Torvalds 251da177e4SLinus Torvalds /* 26c789c83cSGao Xiang * Write new AG headers to disk. Non-transactional, but need to be 27c789c83cSGao Xiang * written and completed prior to the growfs transaction being logged. 28c789c83cSGao Xiang * To do this, we use a delayed write buffer list and wait for 29c789c83cSGao Xiang * submission and IO completion of the list as a whole. This allows the 30c789c83cSGao Xiang * IO subsystem to merge all the AG headers in a single AG into a single 31c789c83cSGao Xiang * IO and hide most of the latency of the IO from us. 32c789c83cSGao Xiang * 33c789c83cSGao Xiang * This also means that if we get an error whilst building the buffer 34c789c83cSGao Xiang * list to write, we can cancel the entire list without having written 35c789c83cSGao Xiang * anything. 36c789c83cSGao Xiang */ 37c789c83cSGao Xiang static int 38c789c83cSGao Xiang xfs_resizefs_init_new_ags( 39c789c83cSGao Xiang struct xfs_trans *tp, 40c789c83cSGao Xiang struct aghdr_init_data *id, 41c789c83cSGao Xiang xfs_agnumber_t oagcount, 42c789c83cSGao Xiang xfs_agnumber_t nagcount, 43c789c83cSGao Xiang xfs_rfsblock_t delta, 44*c6aee248SDave Chinner struct xfs_perag *last_pag, 45c789c83cSGao Xiang bool *lastag_extended) 46c789c83cSGao Xiang { 47c789c83cSGao Xiang struct xfs_mount *mp = tp->t_mountp; 48c789c83cSGao Xiang xfs_rfsblock_t nb = mp->m_sb.sb_dblocks + delta; 49c789c83cSGao Xiang int error; 50c789c83cSGao Xiang 51c789c83cSGao Xiang *lastag_extended = false; 52c789c83cSGao Xiang 53c789c83cSGao Xiang INIT_LIST_HEAD(&id->buffer_list); 54c789c83cSGao Xiang for (id->agno = nagcount - 1; 55c789c83cSGao Xiang id->agno >= oagcount; 56c789c83cSGao Xiang id->agno--, delta -= id->agsize) { 57c789c83cSGao Xiang 58c789c83cSGao Xiang if (id->agno == nagcount - 1) 59c789c83cSGao Xiang id->agsize = nb - (id->agno * 60c789c83cSGao Xiang (xfs_rfsblock_t)mp->m_sb.sb_agblocks); 61c789c83cSGao Xiang else 62c789c83cSGao Xiang id->agsize = mp->m_sb.sb_agblocks; 63c789c83cSGao Xiang 64c789c83cSGao Xiang error = xfs_ag_init_headers(mp, id); 65c789c83cSGao Xiang if (error) { 66c789c83cSGao Xiang xfs_buf_delwri_cancel(&id->buffer_list); 67c789c83cSGao Xiang return error; 68c789c83cSGao Xiang } 69c789c83cSGao Xiang } 70c789c83cSGao Xiang 71c789c83cSGao Xiang error = xfs_buf_delwri_submit(&id->buffer_list); 72c789c83cSGao Xiang if (error) 73c789c83cSGao Xiang return error; 74c789c83cSGao Xiang 75c789c83cSGao Xiang if (delta) { 76c789c83cSGao Xiang *lastag_extended = true; 77*c6aee248SDave Chinner error = xfs_ag_extend_space(last_pag, tp, delta); 78c789c83cSGao Xiang } 79c789c83cSGao Xiang return error; 80c789c83cSGao Xiang } 81c789c83cSGao Xiang 82c789c83cSGao Xiang /* 83b16817b6SDave Chinner * growfs operations 841da177e4SLinus Torvalds */ 851da177e4SLinus Torvalds static int 861da177e4SLinus Torvalds xfs_growfs_data_private( 8707aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 8807aabd9cSGao Xiang struct xfs_growfs_data *in) /* growfs data input struct */ 891da177e4SLinus Torvalds { 90e8222613SDave Chinner struct xfs_buf *bp; 9183a7f86eSDave Chinner int error; 921da177e4SLinus Torvalds xfs_agnumber_t nagcount; 931da177e4SLinus Torvalds xfs_agnumber_t nagimax = 0; 94ce5e1062SGao Xiang xfs_rfsblock_t nb, nb_div, nb_mod; 95fb2fc172SGao Xiang int64_t delta; 96c789c83cSGao Xiang bool lastag_extended; 971da177e4SLinus Torvalds xfs_agnumber_t oagcount; 9807aabd9cSGao Xiang struct xfs_trans *tp; 990410c3bbSDave Chinner struct aghdr_init_data id = {}; 100*c6aee248SDave Chinner struct xfs_perag *last_pag; 1011da177e4SLinus Torvalds 1021da177e4SLinus Torvalds nb = in->newblocks; 103fb2fc172SGao Xiang error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); 104fb2fc172SGao Xiang if (error) 1054cc929eeSNathan Scott return error; 106fb2fc172SGao Xiang 107fb2fc172SGao Xiang if (nb > mp->m_sb.sb_dblocks) { 108ba372674SDave Chinner error = xfs_buf_read_uncached(mp->m_ddev_targp, 1091da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 110ba372674SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); 111ba372674SDave Chinner if (error) 112eab4e633SDave Chinner return error; 1131da177e4SLinus Torvalds xfs_buf_relse(bp); 114fb2fc172SGao Xiang } 1151da177e4SLinus Torvalds 116ce5e1062SGao Xiang nb_div = nb; 117ce5e1062SGao Xiang nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); 118ce5e1062SGao Xiang nagcount = nb_div + (nb_mod != 0); 1191da177e4SLinus Torvalds if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { 1201da177e4SLinus Torvalds nagcount--; 121e6da7c9fSEric Sandeen nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; 1221da177e4SLinus Torvalds } 123ce5e1062SGao Xiang delta = nb - mp->m_sb.sb_dblocks; 124fb2fc172SGao Xiang /* 125fb2fc172SGao Xiang * Reject filesystems with a single AG because they are not 126fb2fc172SGao Xiang * supported, and reject a shrink operation that would cause a 127fb2fc172SGao Xiang * filesystem to become unsupported. 128fb2fc172SGao Xiang */ 129fb2fc172SGao Xiang if (delta < 0 && nagcount < 2) 130fb2fc172SGao Xiang return -EINVAL; 131fb2fc172SGao Xiang 1321da177e4SLinus Torvalds oagcount = mp->m_sb.sb_agcount; 1331c1c6ebcSDave Chinner /* allocate the new per-ag structures */ 1341da177e4SLinus Torvalds if (nagcount > oagcount) { 1351c1c6ebcSDave Chinner error = xfs_initialize_perag(mp, nagcount, &nagimax); 1361c1c6ebcSDave Chinner if (error) 1371c1c6ebcSDave Chinner return error; 138fb2fc172SGao Xiang } else if (nagcount < oagcount) { 139fb2fc172SGao Xiang /* TODO: shrinking the entire AGs hasn't yet completed */ 140fb2fc172SGao Xiang return -EINVAL; 1411da177e4SLinus Torvalds } 1421c1c6ebcSDave Chinner 143253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 144fb2fc172SGao Xiang (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, 145fb2fc172SGao Xiang XFS_TRANS_RESERVE, &tp); 146253f4911SChristoph Hellwig if (error) 1471da177e4SLinus Torvalds return error; 1481da177e4SLinus Torvalds 149*c6aee248SDave Chinner last_pag = xfs_perag_get(mp, oagcount - 1); 150fb2fc172SGao Xiang if (delta > 0) { 151c789c83cSGao Xiang error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, 152*c6aee248SDave Chinner delta, last_pag, &lastag_extended); 153fb2fc172SGao Xiang } else { 154df5660cfSDarrick J. Wong xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, 155fb2fc172SGao Xiang "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); 156fb2fc172SGao Xiang 157*c6aee248SDave Chinner error = xfs_ag_shrink_space(last_pag, &tp, -delta); 158fb2fc172SGao Xiang } 159*c6aee248SDave Chinner xfs_perag_put(last_pag); 1609aebe805SDave Chinner if (error) 16183a7f86eSDave Chinner goto out_trans_cancel; 1629aebe805SDave Chinner 1631c1c6ebcSDave Chinner /* 1641c1c6ebcSDave Chinner * Update changed superblock fields transactionally. These are not 1651c1c6ebcSDave Chinner * seen by the rest of the world until the transaction commit applies 1661c1c6ebcSDave Chinner * them atomically to the superblock. 1671c1c6ebcSDave Chinner */ 1681da177e4SLinus Torvalds if (nagcount > oagcount) 1691da177e4SLinus Torvalds xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 170c789c83cSGao Xiang if (delta) 171c789c83cSGao Xiang xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, delta); 1720410c3bbSDave Chinner if (id.nfree) 1730410c3bbSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); 174014695c0SGao Xiang 175014695c0SGao Xiang /* 176014695c0SGao Xiang * Sync sb counters now to reflect the updated values. This is 177014695c0SGao Xiang * particularly important for shrink because the write verifier 178014695c0SGao Xiang * will fail if sb_fdblocks is ever larger than sb_dblocks. 179014695c0SGao Xiang */ 18038c26bfdSDave Chinner if (xfs_has_lazysbcount(mp)) 181014695c0SGao Xiang xfs_log_sb(tp); 182014695c0SGao Xiang 183f8079b85SChristoph Hellwig xfs_trans_set_sync(tp); 18470393313SChristoph Hellwig error = xfs_trans_commit(tp); 1851c1c6ebcSDave Chinner if (error) 1861da177e4SLinus Torvalds return error; 1871c1c6ebcSDave Chinner 1881da177e4SLinus Torvalds /* New allocation groups fully initialized, so update mount struct */ 1891da177e4SLinus Torvalds if (nagimax) 1901da177e4SLinus Torvalds mp->m_maxagi = nagimax; 191055388a3SDave Chinner xfs_set_low_space_thresholds(mp); 19252548852SDarrick J. Wong mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 1931c1c6ebcSDave Chinner 194fb2fc172SGao Xiang if (delta > 0) { 19520e73b00SDarrick J. Wong /* 19620e73b00SDarrick J. Wong * If we expanded the last AG, free the per-AG reservation 19720e73b00SDarrick J. Wong * so we can reinitialize it with the new size. 19820e73b00SDarrick J. Wong */ 199c789c83cSGao Xiang if (lastag_extended) { 20020e73b00SDarrick J. Wong struct xfs_perag *pag; 20120e73b00SDarrick J. Wong 2020410c3bbSDave Chinner pag = xfs_perag_get(mp, id.agno); 20320e73b00SDarrick J. Wong error = xfs_ag_resv_free(pag); 20420e73b00SDarrick J. Wong xfs_perag_put(pag); 20520e73b00SDarrick J. Wong if (error) 20683a7f86eSDave Chinner return error; 20720e73b00SDarrick J. Wong } 20883a7f86eSDave Chinner /* 209fb2fc172SGao Xiang * Reserve AG metadata blocks. ENOSPC here does not mean there 210fb2fc172SGao Xiang * was a growfs failure, just that there still isn't space for 211fb2fc172SGao Xiang * new user data after the grow has been run. 21283a7f86eSDave Chinner */ 21384d69619SDarrick J. Wong error = xfs_fs_reserve_ag_blocks(mp); 21483a7f86eSDave Chinner if (error == -ENOSPC) 21583a7f86eSDave Chinner error = 0; 216fb2fc172SGao Xiang } 21783a7f86eSDave Chinner return error; 21883a7f86eSDave Chinner 21983a7f86eSDave Chinner out_trans_cancel: 22083a7f86eSDave Chinner xfs_trans_cancel(tp); 22183a7f86eSDave Chinner return error; 22283a7f86eSDave Chinner } 22383a7f86eSDave Chinner 22483a7f86eSDave Chinner static int 22583a7f86eSDave Chinner xfs_growfs_log_private( 22607aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 22707aabd9cSGao Xiang struct xfs_growfs_log *in) /* growfs log input struct */ 22883a7f86eSDave Chinner { 22983a7f86eSDave Chinner xfs_extlen_t nb; 23083a7f86eSDave Chinner 23183a7f86eSDave Chinner nb = in->newblocks; 23283a7f86eSDave Chinner if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) 23383a7f86eSDave Chinner return -EINVAL; 23483a7f86eSDave Chinner if (nb == mp->m_sb.sb_logblocks && 23583a7f86eSDave Chinner in->isint == (mp->m_sb.sb_logstart != 0)) 23683a7f86eSDave Chinner return -EINVAL; 23783a7f86eSDave Chinner /* 23883a7f86eSDave Chinner * Moving the log is hard, need new interfaces to sync 23983a7f86eSDave Chinner * the log first, hold off all activity while moving it. 24083a7f86eSDave Chinner * Can have shorter or longer log in the same space, 24183a7f86eSDave Chinner * or transform internal to external log or vice versa. 24283a7f86eSDave Chinner */ 24383a7f86eSDave Chinner return -ENOSYS; 24483a7f86eSDave Chinner } 24583a7f86eSDave Chinner 24683a7f86eSDave Chinner static int 24783a7f86eSDave Chinner xfs_growfs_imaxpct( 24883a7f86eSDave Chinner struct xfs_mount *mp, 24983a7f86eSDave Chinner __u32 imaxpct) 25083a7f86eSDave Chinner { 25183a7f86eSDave Chinner struct xfs_trans *tp; 25283a7f86eSDave Chinner int dpct; 25383a7f86eSDave Chinner int error; 25483a7f86eSDave Chinner 25583a7f86eSDave Chinner if (imaxpct > 100) 25683a7f86eSDave Chinner return -EINVAL; 25783a7f86eSDave Chinner 25883a7f86eSDave Chinner error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 25983a7f86eSDave Chinner XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 26083a7f86eSDave Chinner if (error) 26183a7f86eSDave Chinner return error; 26283a7f86eSDave Chinner 26383a7f86eSDave Chinner dpct = imaxpct - mp->m_sb.sb_imax_pct; 26483a7f86eSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 26583a7f86eSDave Chinner xfs_trans_set_sync(tp); 26683a7f86eSDave Chinner return xfs_trans_commit(tp); 26783a7f86eSDave Chinner } 26883a7f86eSDave Chinner 26983a7f86eSDave Chinner /* 2701da177e4SLinus Torvalds * protected versions of growfs function acquire and release locks on the mount 2711da177e4SLinus Torvalds * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG, 2721da177e4SLinus Torvalds * XFS_IOC_FSGROWFSRT 2731da177e4SLinus Torvalds */ 2741da177e4SLinus Torvalds int 2751da177e4SLinus Torvalds xfs_growfs_data( 27687444b8cSDave Chinner struct xfs_mount *mp, 27787444b8cSDave Chinner struct xfs_growfs_data *in) 2781da177e4SLinus Torvalds { 27987444b8cSDave Chinner int error = 0; 280743bb465Ssandeen@sandeen.net 281743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 2822451337dSDave Chinner return -EPERM; 283cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 2842451337dSDave Chinner return -EWOULDBLOCK; 28587444b8cSDave Chinner 28687444b8cSDave Chinner /* update imaxpct separately to the physical grow of the filesystem */ 28787444b8cSDave Chinner if (in->imaxpct != mp->m_sb.sb_imax_pct) { 28887444b8cSDave Chinner error = xfs_growfs_imaxpct(mp, in->imaxpct); 28987444b8cSDave Chinner if (error) 29087444b8cSDave Chinner goto out_error; 29187444b8cSDave Chinner } 29287444b8cSDave Chinner 29387444b8cSDave Chinner if (in->newblocks != mp->m_sb.sb_dblocks) { 2941da177e4SLinus Torvalds error = xfs_growfs_data_private(mp, in); 29587444b8cSDave Chinner if (error) 29687444b8cSDave Chinner goto out_error; 29787444b8cSDave Chinner } 29887444b8cSDave Chinner 29987444b8cSDave Chinner /* Post growfs calculations needed to reflect new state in operations */ 30087444b8cSDave Chinner if (mp->m_sb.sb_imax_pct) { 30187444b8cSDave Chinner uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 30287444b8cSDave Chinner do_div(icount, 100); 303ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 30487444b8cSDave Chinner } else 305ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = 0; 30687444b8cSDave Chinner 30783a7f86eSDave Chinner /* Update secondary superblocks now the physical grow has completed */ 308b16817b6SDave Chinner error = xfs_update_secondary_sbs(mp); 30983a7f86eSDave Chinner 31087444b8cSDave Chinner out_error: 31152785112SChristoph Hellwig /* 31252785112SChristoph Hellwig * Increment the generation unconditionally, the error could be from 31352785112SChristoph Hellwig * updating the secondary superblocks, in which case the new size 31452785112SChristoph Hellwig * is live already. 31552785112SChristoph Hellwig */ 31652785112SChristoph Hellwig mp->m_generation++; 317cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3181da177e4SLinus Torvalds return error; 3191da177e4SLinus Torvalds } 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds int 3221da177e4SLinus Torvalds xfs_growfs_log( 3231da177e4SLinus Torvalds xfs_mount_t *mp, 32407aabd9cSGao Xiang struct xfs_growfs_log *in) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds int error; 327743bb465Ssandeen@sandeen.net 328743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 3292451337dSDave Chinner return -EPERM; 330cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 3312451337dSDave Chinner return -EWOULDBLOCK; 3321da177e4SLinus Torvalds error = xfs_growfs_log_private(mp, in); 333cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3341da177e4SLinus Torvalds return error; 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds /* 3381da177e4SLinus Torvalds * exported through ioctl XFS_IOC_FSCOUNTS 3391da177e4SLinus Torvalds */ 3401da177e4SLinus Torvalds 34191083269SEric Sandeen void 3421da177e4SLinus Torvalds xfs_fs_counts( 3431da177e4SLinus Torvalds xfs_mount_t *mp, 3441da177e4SLinus Torvalds xfs_fsop_counts_t *cnt) 3451da177e4SLinus Torvalds { 346501ab323SDave Chinner cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 347e88b64eaSDave Chinner cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 3480d485adaSDave Chinner cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 34985bcfa26SDarrick J. Wong xfs_fdblocks_unavailable(mp); 3502229276cSDarrick J. Wong cnt->freertx = percpu_counter_read_positive(&mp->m_frextents); 3511da177e4SLinus Torvalds } 3521da177e4SLinus Torvalds 3531da177e4SLinus Torvalds /* 3541da177e4SLinus Torvalds * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS 3551da177e4SLinus Torvalds * 3561da177e4SLinus Torvalds * xfs_reserve_blocks is called to set m_resblks 3571da177e4SLinus Torvalds * in the in-core mount table. The number of unused reserved blocks 358c41564b5SNathan Scott * is kept in m_resblks_avail. 3591da177e4SLinus Torvalds * 3601da177e4SLinus Torvalds * Reserve the requested number of blocks if available. Otherwise return 3611da177e4SLinus Torvalds * as many as possible to satisfy the request. The actual number 3621da177e4SLinus Torvalds * reserved are returned in outval 3631da177e4SLinus Torvalds * 3641da177e4SLinus Torvalds * A null inval pointer indicates that only the current reserved blocks 3651da177e4SLinus Torvalds * available should be returned no settings are changed. 3661da177e4SLinus Torvalds */ 3671da177e4SLinus Torvalds 3681da177e4SLinus Torvalds int 3691da177e4SLinus Torvalds xfs_reserve_blocks( 3701da177e4SLinus Torvalds xfs_mount_t *mp, 371c8ce540dSDarrick J. Wong uint64_t *inval, 3721da177e4SLinus Torvalds xfs_fsop_resblks_t *outval) 3731da177e4SLinus Torvalds { 374c8ce540dSDarrick J. Wong int64_t lcounter, delta; 375c8ce540dSDarrick J. Wong int64_t fdblks_delta = 0; 376c8ce540dSDarrick J. Wong uint64_t request; 377c8ce540dSDarrick J. Wong int64_t free; 378408fd484SBrian Foster int error = 0; 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds /* If inval is null, report current values and return */ 381c8ce540dSDarrick J. Wong if (inval == (uint64_t *)NULL) { 38284e1e99fSDavid Chinner if (!outval) 3832451337dSDave Chinner return -EINVAL; 3841da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 3851da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 386014c2544SJesper Juhl return 0; 3871da177e4SLinus Torvalds } 3881da177e4SLinus Torvalds 3891da177e4SLinus Torvalds request = *inval; 390dbcabad1SDavid Chinner 391dbcabad1SDavid Chinner /* 392408fd484SBrian Foster * With per-cpu counters, this becomes an interesting problem. we need 393408fd484SBrian Foster * to work out if we are freeing or allocation blocks first, then we can 394408fd484SBrian Foster * do the modification as necessary. 395dbcabad1SDavid Chinner * 396408fd484SBrian Foster * We do this under the m_sb_lock so that if we are near ENOSPC, we will 397408fd484SBrian Foster * hold out any changes while we work out what to do. This means that 398408fd484SBrian Foster * the amount of free space can change while we do this, so we need to 399408fd484SBrian Foster * retry if we end up trying to reserve more space than is available. 400dbcabad1SDavid Chinner */ 4013685c2a1SEric Sandeen spin_lock(&mp->m_sb_lock); 4021da177e4SLinus Torvalds 4031da177e4SLinus Torvalds /* 4041da177e4SLinus Torvalds * If our previous reservation was larger than the current value, 405408fd484SBrian Foster * then move any unused blocks back to the free pool. Modify the resblks 406408fd484SBrian Foster * counters directly since we shouldn't have any problems unreserving 407408fd484SBrian Foster * space. 4081da177e4SLinus Torvalds */ 4091da177e4SLinus Torvalds if (mp->m_resblks > request) { 4101da177e4SLinus Torvalds lcounter = mp->m_resblks_avail - request; 4111da177e4SLinus Torvalds if (lcounter > 0) { /* release unused blocks */ 412dbcabad1SDavid Chinner fdblks_delta = lcounter; 4131da177e4SLinus Torvalds mp->m_resblks_avail -= lcounter; 4141da177e4SLinus Torvalds } 4151da177e4SLinus Torvalds mp->m_resblks = request; 416408fd484SBrian Foster if (fdblks_delta) { 417408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 418408fd484SBrian Foster error = xfs_mod_fdblocks(mp, fdblks_delta, 0); 419408fd484SBrian Foster spin_lock(&mp->m_sb_lock); 420408fd484SBrian Foster } 4214be536deSDavid Chinner 422408fd484SBrian Foster goto out; 423408fd484SBrian Foster } 424408fd484SBrian Foster 425408fd484SBrian Foster /* 426408fd484SBrian Foster * If the request is larger than the current reservation, reserve the 427408fd484SBrian Foster * blocks before we update the reserve counters. Sample m_fdblocks and 428408fd484SBrian Foster * perform a partial reservation if the request exceeds free space. 42915f04fdcSDarrick J. Wong * 43015f04fdcSDarrick J. Wong * The code below estimates how many blocks it can request from 43115f04fdcSDarrick J. Wong * fdblocks to stash in the reserve pool. This is a classic TOCTOU 43215f04fdcSDarrick J. Wong * race since fdblocks updates are not always coordinated via 4330baa2657SDarrick J. Wong * m_sb_lock. Set the reserve size even if there's not enough free 4340baa2657SDarrick J. Wong * space to fill it because mod_fdblocks will refill an undersized 4350baa2657SDarrick J. Wong * reserve when it can. 436408fd484SBrian Foster */ 4370d485adaSDave Chinner free = percpu_counter_sum(&mp->m_fdblocks) - 438c8c56825SDarrick J. Wong xfs_fdblocks_unavailable(mp); 4391da177e4SLinus Torvalds delta = request - mp->m_resblks; 4400baa2657SDarrick J. Wong mp->m_resblks = request; 44115f04fdcSDarrick J. Wong if (delta > 0 && free > 0) { 442408fd484SBrian Foster /* 443408fd484SBrian Foster * We'll either succeed in getting space from the free block 44415f04fdcSDarrick J. Wong * count or we'll get an ENOSPC. Don't set the reserved flag 44515f04fdcSDarrick J. Wong * here - we don't want to reserve the extra reserve blocks 44615f04fdcSDarrick J. Wong * from the reserve. 44782be38bcSDarrick J. Wong * 44882be38bcSDarrick J. Wong * The desired reserve size can change after we drop the lock. 44982be38bcSDarrick J. Wong * Use mod_fdblocks to put the space into the reserve or into 45082be38bcSDarrick J. Wong * fdblocks as appropriate. 451408fd484SBrian Foster */ 45215f04fdcSDarrick J. Wong fdblks_delta = min(free, delta); 453408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 454408fd484SBrian Foster error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); 4550baa2657SDarrick J. Wong if (!error) 45682be38bcSDarrick J. Wong xfs_mod_fdblocks(mp, fdblks_delta, 0); 45782be38bcSDarrick J. Wong spin_lock(&mp->m_sb_lock); 4581da177e4SLinus Torvalds } 459dbcabad1SDavid Chinner out: 46084e1e99fSDavid Chinner if (outval) { 4611da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 4621da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 46384e1e99fSDavid Chinner } 464dbcabad1SDavid Chinner 465408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 466408fd484SBrian Foster return error; 4671da177e4SLinus Torvalds } 4681da177e4SLinus Torvalds 4691da177e4SLinus Torvalds int 4701da177e4SLinus Torvalds xfs_fs_goingdown( 4711da177e4SLinus Torvalds xfs_mount_t *mp, 472c8ce540dSDarrick J. Wong uint32_t inflags) 4731da177e4SLinus Torvalds { 4741da177e4SLinus Torvalds switch (inflags) { 4751da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_DEFAULT: { 476040f04bdSChristoph Hellwig if (!freeze_bdev(mp->m_super->s_bdev)) { 4777d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 478040f04bdSChristoph Hellwig thaw_bdev(mp->m_super->s_bdev); 4791da177e4SLinus Torvalds } 4801da177e4SLinus Torvalds break; 4811da177e4SLinus Torvalds } 4821da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 4837d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 4841da177e4SLinus Torvalds break; 4851da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: 4867d04a335SNathan Scott xfs_force_shutdown(mp, 4877d04a335SNathan Scott SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); 4881da177e4SLinus Torvalds break; 4891da177e4SLinus Torvalds default: 4902451337dSDave Chinner return -EINVAL; 4911da177e4SLinus Torvalds } 4921da177e4SLinus Torvalds 4931da177e4SLinus Torvalds return 0; 4941da177e4SLinus Torvalds } 4952af51f3aSDave Chinner 4962af51f3aSDave Chinner /* 4972af51f3aSDave Chinner * Force a shutdown of the filesystem instantly while keeping the filesystem 4982af51f3aSDave Chinner * consistent. We don't do an unmount here; just shutdown the shop, make sure 4992af51f3aSDave Chinner * that absolutely nothing persistent happens to this filesystem after this 5002af51f3aSDave Chinner * point. 501b36d4651SDave Chinner * 502b36d4651SDave Chinner * The shutdown state change is atomic, resulting in the first and only the 503b36d4651SDave Chinner * first shutdown call processing the shutdown. This means we only shutdown the 504b36d4651SDave Chinner * log once as it requires, and we don't spam the logs when multiple concurrent 505b36d4651SDave Chinner * shutdowns race to set the shutdown flags. 5062af51f3aSDave Chinner */ 5072af51f3aSDave Chinner void 5082af51f3aSDave Chinner xfs_do_force_shutdown( 50956668a5cSDave Chinner struct xfs_mount *mp, 5102eb7550dSDave Chinner uint32_t flags, 5112af51f3aSDave Chinner char *fname, 5122af51f3aSDave Chinner int lnnum) 5132af51f3aSDave Chinner { 514b36d4651SDave Chinner int tag; 515b36d4651SDave Chinner const char *why; 5162af51f3aSDave Chinner 51741e63621SDave Chinner 51841e63621SDave Chinner if (test_and_set_bit(XFS_OPSTATE_SHUTDOWN, &mp->m_opstate)) { 51941e63621SDave Chinner xlog_shutdown_wait(mp->m_log); 52056668a5cSDave Chinner return; 52141e63621SDave Chinner } 522b36d4651SDave Chinner if (mp->m_sb_bp) 523b36d4651SDave Chinner mp->m_sb_bp->b_flags |= XBF_DONE; 52456668a5cSDave Chinner 525b36d4651SDave Chinner if (flags & SHUTDOWN_FORCE_UMOUNT) 526b36d4651SDave Chinner xfs_alert(mp, "User initiated shutdown received."); 527b36d4651SDave Chinner 528b36d4651SDave Chinner if (xlog_force_shutdown(mp->m_log, flags)) { 529b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_LOGERROR; 530b36d4651SDave Chinner why = "Log I/O Error"; 531b36d4651SDave Chinner } else if (flags & SHUTDOWN_CORRUPT_INCORE) { 532b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_CORRUPT; 533b36d4651SDave Chinner why = "Corruption of in-memory data"; 53428d84620SBrian Foster } else { 535b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_IOERROR; 536b36d4651SDave Chinner why = "Metadata I/O Error"; 5372af51f3aSDave Chinner } 53856668a5cSDave Chinner 5397f89c838SDarrick J. Wong trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum); 5407f89c838SDarrick J. Wong 541b36d4651SDave Chinner xfs_alert_tag(mp, tag, 542b36d4651SDave Chinner "%s (0x%x) detected at %pS (%s:%d). Shutting down filesystem.", 543b36d4651SDave Chinner why, flags, __return_address, fname, lnnum); 5442af51f3aSDave Chinner xfs_alert(mp, 54556668a5cSDave Chinner "Please unmount the filesystem and rectify the problem(s)"); 546b36d4651SDave Chinner if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 547b36d4651SDave Chinner xfs_stack_trace(); 5482af51f3aSDave Chinner } 54984d69619SDarrick J. Wong 55084d69619SDarrick J. Wong /* 55184d69619SDarrick J. Wong * Reserve free space for per-AG metadata. 55284d69619SDarrick J. Wong */ 55384d69619SDarrick J. Wong int 55484d69619SDarrick J. Wong xfs_fs_reserve_ag_blocks( 55584d69619SDarrick J. Wong struct xfs_mount *mp) 55684d69619SDarrick J. Wong { 55784d69619SDarrick J. Wong xfs_agnumber_t agno; 55884d69619SDarrick J. Wong struct xfs_perag *pag; 55984d69619SDarrick J. Wong int error = 0; 56084d69619SDarrick J. Wong int err2; 56184d69619SDarrick J. Wong 56215a268d9SDarrick J. Wong mp->m_finobt_nores = false; 563f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 564ebcbef3aSDarrick J. Wong err2 = xfs_ag_resv_init(pag, NULL); 56584d69619SDarrick J. Wong if (err2 && !error) 56684d69619SDarrick J. Wong error = err2; 56784d69619SDarrick J. Wong } 56884d69619SDarrick J. Wong 56984d69619SDarrick J. Wong if (error && error != -ENOSPC) { 57084d69619SDarrick J. Wong xfs_warn(mp, 57184d69619SDarrick J. Wong "Error %d reserving per-AG metadata reserve pool.", error); 57284d69619SDarrick J. Wong xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 57384d69619SDarrick J. Wong } 57484d69619SDarrick J. Wong 57584d69619SDarrick J. Wong return error; 57684d69619SDarrick J. Wong } 57784d69619SDarrick J. Wong 57884d69619SDarrick J. Wong /* 57984d69619SDarrick J. Wong * Free space reserved for per-AG metadata. 58084d69619SDarrick J. Wong */ 58184d69619SDarrick J. Wong int 58284d69619SDarrick J. Wong xfs_fs_unreserve_ag_blocks( 58384d69619SDarrick J. Wong struct xfs_mount *mp) 58484d69619SDarrick J. Wong { 58584d69619SDarrick J. Wong xfs_agnumber_t agno; 58684d69619SDarrick J. Wong struct xfs_perag *pag; 58784d69619SDarrick J. Wong int error = 0; 58884d69619SDarrick J. Wong int err2; 58984d69619SDarrick J. Wong 590f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 59184d69619SDarrick J. Wong err2 = xfs_ag_resv_free(pag); 59284d69619SDarrick J. Wong if (err2 && !error) 59384d69619SDarrick J. Wong error = err2; 59484d69619SDarrick J. Wong } 59584d69619SDarrick J. Wong 59684d69619SDarrick J. Wong if (error) 59784d69619SDarrick J. Wong xfs_warn(mp, 59884d69619SDarrick J. Wong "Error %d freeing per-AG metadata reserve pool.", error); 59984d69619SDarrick J. Wong 60084d69619SDarrick J. Wong return error; 60184d69619SDarrick J. Wong } 602