10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 37b718769SNathan Scott * Copyright (c) 2000-2005 Silicon Graphics, Inc. 47b718769SNathan Scott * All Rights Reserved. 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds #include "xfs.h" 7a844f451SNathan Scott #include "xfs_fs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10a4fbe6abSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 121da177e4SLinus Torvalds #include "xfs_sb.h" 131da177e4SLinus Torvalds #include "xfs_mount.h" 14239880efSDave Chinner #include "xfs_trans.h" 151da177e4SLinus Torvalds #include "xfs_error.h" 161da177e4SLinus Torvalds #include "xfs_alloc.h" 171da177e4SLinus Torvalds #include "xfs_fsops.h" 181da177e4SLinus Torvalds #include "xfs_trans_space.h" 19239880efSDave Chinner #include "xfs_log.h" 20b16817b6SDave Chinner #include "xfs_ag.h" 2184d69619SDarrick J. Wong #include "xfs_ag_resv.h" 221da177e4SLinus Torvalds 231da177e4SLinus Torvalds /* 24c789c83cSGao Xiang * Write new AG headers to disk. Non-transactional, but need to be 25c789c83cSGao Xiang * written and completed prior to the growfs transaction being logged. 26c789c83cSGao Xiang * To do this, we use a delayed write buffer list and wait for 27c789c83cSGao Xiang * submission and IO completion of the list as a whole. This allows the 28c789c83cSGao Xiang * IO subsystem to merge all the AG headers in a single AG into a single 29c789c83cSGao Xiang * IO and hide most of the latency of the IO from us. 30c789c83cSGao Xiang * 31c789c83cSGao Xiang * This also means that if we get an error whilst building the buffer 32c789c83cSGao Xiang * list to write, we can cancel the entire list without having written 33c789c83cSGao Xiang * anything. 34c789c83cSGao Xiang */ 35c789c83cSGao Xiang static int 36c789c83cSGao Xiang xfs_resizefs_init_new_ags( 37c789c83cSGao Xiang struct xfs_trans *tp, 38c789c83cSGao Xiang struct aghdr_init_data *id, 39c789c83cSGao Xiang xfs_agnumber_t oagcount, 40c789c83cSGao Xiang xfs_agnumber_t nagcount, 41c789c83cSGao Xiang xfs_rfsblock_t delta, 42c789c83cSGao Xiang bool *lastag_extended) 43c789c83cSGao Xiang { 44c789c83cSGao Xiang struct xfs_mount *mp = tp->t_mountp; 45c789c83cSGao Xiang xfs_rfsblock_t nb = mp->m_sb.sb_dblocks + delta; 46c789c83cSGao Xiang int error; 47c789c83cSGao Xiang 48c789c83cSGao Xiang *lastag_extended = false; 49c789c83cSGao Xiang 50c789c83cSGao Xiang INIT_LIST_HEAD(&id->buffer_list); 51c789c83cSGao Xiang for (id->agno = nagcount - 1; 52c789c83cSGao Xiang id->agno >= oagcount; 53c789c83cSGao Xiang id->agno--, delta -= id->agsize) { 54c789c83cSGao Xiang 55c789c83cSGao Xiang if (id->agno == nagcount - 1) 56c789c83cSGao Xiang id->agsize = nb - (id->agno * 57c789c83cSGao Xiang (xfs_rfsblock_t)mp->m_sb.sb_agblocks); 58c789c83cSGao Xiang else 59c789c83cSGao Xiang id->agsize = mp->m_sb.sb_agblocks; 60c789c83cSGao Xiang 61c789c83cSGao Xiang error = xfs_ag_init_headers(mp, id); 62c789c83cSGao Xiang if (error) { 63c789c83cSGao Xiang xfs_buf_delwri_cancel(&id->buffer_list); 64c789c83cSGao Xiang return error; 65c789c83cSGao Xiang } 66c789c83cSGao Xiang } 67c789c83cSGao Xiang 68c789c83cSGao Xiang error = xfs_buf_delwri_submit(&id->buffer_list); 69c789c83cSGao Xiang if (error) 70c789c83cSGao Xiang return error; 71c789c83cSGao Xiang 72c789c83cSGao Xiang xfs_trans_agblocks_delta(tp, id->nfree); 73c789c83cSGao Xiang 74c789c83cSGao Xiang if (delta) { 75c789c83cSGao Xiang *lastag_extended = true; 76c789c83cSGao Xiang error = xfs_ag_extend_space(mp, tp, id, delta); 77c789c83cSGao Xiang } 78c789c83cSGao Xiang return error; 79c789c83cSGao Xiang } 80c789c83cSGao Xiang 81c789c83cSGao Xiang /* 82b16817b6SDave Chinner * growfs operations 831da177e4SLinus Torvalds */ 841da177e4SLinus Torvalds static int 851da177e4SLinus Torvalds xfs_growfs_data_private( 8607aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 8707aabd9cSGao Xiang struct xfs_growfs_data *in) /* growfs data input struct */ 881da177e4SLinus Torvalds { 89e8222613SDave Chinner struct xfs_buf *bp; 9083a7f86eSDave Chinner int error; 911da177e4SLinus Torvalds xfs_agnumber_t nagcount; 921da177e4SLinus Torvalds xfs_agnumber_t nagimax = 0; 93ce5e1062SGao Xiang xfs_rfsblock_t nb, nb_div, nb_mod; 94*fb2fc172SGao Xiang int64_t delta; 95c789c83cSGao Xiang bool lastag_extended; 961da177e4SLinus Torvalds xfs_agnumber_t oagcount; 9707aabd9cSGao Xiang struct xfs_trans *tp; 980410c3bbSDave Chinner struct aghdr_init_data id = {}; 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds nb = in->newblocks; 101*fb2fc172SGao Xiang error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); 102*fb2fc172SGao Xiang if (error) 1034cc929eeSNathan Scott return error; 104*fb2fc172SGao Xiang 105*fb2fc172SGao Xiang if (nb > mp->m_sb.sb_dblocks) { 106ba372674SDave Chinner error = xfs_buf_read_uncached(mp->m_ddev_targp, 1071da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 108ba372674SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); 109ba372674SDave Chinner if (error) 110eab4e633SDave Chinner return error; 1111da177e4SLinus Torvalds xfs_buf_relse(bp); 112*fb2fc172SGao Xiang } 1131da177e4SLinus Torvalds 114ce5e1062SGao Xiang nb_div = nb; 115ce5e1062SGao Xiang nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); 116ce5e1062SGao Xiang nagcount = nb_div + (nb_mod != 0); 1171da177e4SLinus Torvalds if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { 1181da177e4SLinus Torvalds nagcount--; 119e6da7c9fSEric Sandeen nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; 1201da177e4SLinus Torvalds } 121ce5e1062SGao Xiang delta = nb - mp->m_sb.sb_dblocks; 122*fb2fc172SGao Xiang /* 123*fb2fc172SGao Xiang * Reject filesystems with a single AG because they are not 124*fb2fc172SGao Xiang * supported, and reject a shrink operation that would cause a 125*fb2fc172SGao Xiang * filesystem to become unsupported. 126*fb2fc172SGao Xiang */ 127*fb2fc172SGao Xiang if (delta < 0 && nagcount < 2) 128*fb2fc172SGao Xiang return -EINVAL; 129*fb2fc172SGao Xiang 1301da177e4SLinus Torvalds oagcount = mp->m_sb.sb_agcount; 1311c1c6ebcSDave Chinner 1321c1c6ebcSDave Chinner /* allocate the new per-ag structures */ 1331da177e4SLinus Torvalds if (nagcount > oagcount) { 1341c1c6ebcSDave Chinner error = xfs_initialize_perag(mp, nagcount, &nagimax); 1351c1c6ebcSDave Chinner if (error) 1361c1c6ebcSDave Chinner return error; 137*fb2fc172SGao Xiang } else if (nagcount < oagcount) { 138*fb2fc172SGao Xiang /* TODO: shrinking the entire AGs hasn't yet completed */ 139*fb2fc172SGao Xiang return -EINVAL; 1401da177e4SLinus Torvalds } 1411c1c6ebcSDave Chinner 142253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 143*fb2fc172SGao Xiang (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, 144*fb2fc172SGao Xiang XFS_TRANS_RESERVE, &tp); 145253f4911SChristoph Hellwig if (error) 1461da177e4SLinus Torvalds return error; 1471da177e4SLinus Torvalds 148*fb2fc172SGao Xiang if (delta > 0) { 149c789c83cSGao Xiang error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, 150c789c83cSGao Xiang delta, &lastag_extended); 151*fb2fc172SGao Xiang } else { 152*fb2fc172SGao Xiang static struct ratelimit_state shrink_warning = \ 153*fb2fc172SGao Xiang RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1); 154*fb2fc172SGao Xiang ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE); 155*fb2fc172SGao Xiang 156*fb2fc172SGao Xiang if (__ratelimit(&shrink_warning)) 157*fb2fc172SGao Xiang xfs_alert(mp, 158*fb2fc172SGao Xiang "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); 159*fb2fc172SGao Xiang 160*fb2fc172SGao Xiang error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); 161*fb2fc172SGao Xiang } 1629aebe805SDave Chinner if (error) 16383a7f86eSDave Chinner goto out_trans_cancel; 1649aebe805SDave Chinner 1651c1c6ebcSDave Chinner /* 1661c1c6ebcSDave Chinner * Update changed superblock fields transactionally. These are not 1671c1c6ebcSDave Chinner * seen by the rest of the world until the transaction commit applies 1681c1c6ebcSDave Chinner * them atomically to the superblock. 1691c1c6ebcSDave Chinner */ 1701da177e4SLinus Torvalds if (nagcount > oagcount) 1711da177e4SLinus Torvalds xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 172c789c83cSGao Xiang if (delta) 173c789c83cSGao Xiang xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, delta); 1740410c3bbSDave Chinner if (id.nfree) 1750410c3bbSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); 176014695c0SGao Xiang 177014695c0SGao Xiang /* 178014695c0SGao Xiang * Sync sb counters now to reflect the updated values. This is 179014695c0SGao Xiang * particularly important for shrink because the write verifier 180014695c0SGao Xiang * will fail if sb_fdblocks is ever larger than sb_dblocks. 181014695c0SGao Xiang */ 182014695c0SGao Xiang if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 183014695c0SGao Xiang xfs_log_sb(tp); 184014695c0SGao Xiang 185f8079b85SChristoph Hellwig xfs_trans_set_sync(tp); 18670393313SChristoph Hellwig error = xfs_trans_commit(tp); 1871c1c6ebcSDave Chinner if (error) 1881da177e4SLinus Torvalds return error; 1891c1c6ebcSDave Chinner 1901da177e4SLinus Torvalds /* New allocation groups fully initialized, so update mount struct */ 1911da177e4SLinus Torvalds if (nagimax) 1921da177e4SLinus Torvalds mp->m_maxagi = nagimax; 193055388a3SDave Chinner xfs_set_low_space_thresholds(mp); 19452548852SDarrick J. Wong mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 1951c1c6ebcSDave Chinner 196*fb2fc172SGao Xiang if (delta > 0) { 19720e73b00SDarrick J. Wong /* 19820e73b00SDarrick J. Wong * If we expanded the last AG, free the per-AG reservation 19920e73b00SDarrick J. Wong * so we can reinitialize it with the new size. 20020e73b00SDarrick J. Wong */ 201c789c83cSGao Xiang if (lastag_extended) { 20220e73b00SDarrick J. Wong struct xfs_perag *pag; 20320e73b00SDarrick J. Wong 2040410c3bbSDave Chinner pag = xfs_perag_get(mp, id.agno); 20520e73b00SDarrick J. Wong error = xfs_ag_resv_free(pag); 20620e73b00SDarrick J. Wong xfs_perag_put(pag); 20720e73b00SDarrick J. Wong if (error) 20883a7f86eSDave Chinner return error; 20920e73b00SDarrick J. Wong } 21083a7f86eSDave Chinner /* 211*fb2fc172SGao Xiang * Reserve AG metadata blocks. ENOSPC here does not mean there 212*fb2fc172SGao Xiang * was a growfs failure, just that there still isn't space for 213*fb2fc172SGao Xiang * new user data after the grow has been run. 21483a7f86eSDave Chinner */ 21584d69619SDarrick J. Wong error = xfs_fs_reserve_ag_blocks(mp); 21683a7f86eSDave Chinner if (error == -ENOSPC) 21783a7f86eSDave Chinner error = 0; 218*fb2fc172SGao Xiang } 21983a7f86eSDave Chinner return error; 22083a7f86eSDave Chinner 22183a7f86eSDave Chinner out_trans_cancel: 22283a7f86eSDave Chinner xfs_trans_cancel(tp); 22383a7f86eSDave Chinner return error; 22483a7f86eSDave Chinner } 22583a7f86eSDave Chinner 22683a7f86eSDave Chinner static int 22783a7f86eSDave Chinner xfs_growfs_log_private( 22807aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 22907aabd9cSGao Xiang struct xfs_growfs_log *in) /* growfs log input struct */ 23083a7f86eSDave Chinner { 23183a7f86eSDave Chinner xfs_extlen_t nb; 23283a7f86eSDave Chinner 23383a7f86eSDave Chinner nb = in->newblocks; 23483a7f86eSDave Chinner if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) 23583a7f86eSDave Chinner return -EINVAL; 23683a7f86eSDave Chinner if (nb == mp->m_sb.sb_logblocks && 23783a7f86eSDave Chinner in->isint == (mp->m_sb.sb_logstart != 0)) 23883a7f86eSDave Chinner return -EINVAL; 23983a7f86eSDave Chinner /* 24083a7f86eSDave Chinner * Moving the log is hard, need new interfaces to sync 24183a7f86eSDave Chinner * the log first, hold off all activity while moving it. 24283a7f86eSDave Chinner * Can have shorter or longer log in the same space, 24383a7f86eSDave Chinner * or transform internal to external log or vice versa. 24483a7f86eSDave Chinner */ 24583a7f86eSDave Chinner return -ENOSYS; 24683a7f86eSDave Chinner } 24783a7f86eSDave Chinner 24883a7f86eSDave Chinner static int 24983a7f86eSDave Chinner xfs_growfs_imaxpct( 25083a7f86eSDave Chinner struct xfs_mount *mp, 25183a7f86eSDave Chinner __u32 imaxpct) 25283a7f86eSDave Chinner { 25383a7f86eSDave Chinner struct xfs_trans *tp; 25483a7f86eSDave Chinner int dpct; 25583a7f86eSDave Chinner int error; 25683a7f86eSDave Chinner 25783a7f86eSDave Chinner if (imaxpct > 100) 25883a7f86eSDave Chinner return -EINVAL; 25983a7f86eSDave Chinner 26083a7f86eSDave Chinner error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 26183a7f86eSDave Chinner XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 26283a7f86eSDave Chinner if (error) 26383a7f86eSDave Chinner return error; 26483a7f86eSDave Chinner 26583a7f86eSDave Chinner dpct = imaxpct - mp->m_sb.sb_imax_pct; 26683a7f86eSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 26783a7f86eSDave Chinner xfs_trans_set_sync(tp); 26883a7f86eSDave Chinner return xfs_trans_commit(tp); 26983a7f86eSDave Chinner } 27083a7f86eSDave Chinner 27183a7f86eSDave Chinner /* 2721da177e4SLinus Torvalds * protected versions of growfs function acquire and release locks on the mount 2731da177e4SLinus Torvalds * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG, 2741da177e4SLinus Torvalds * XFS_IOC_FSGROWFSRT 2751da177e4SLinus Torvalds */ 2761da177e4SLinus Torvalds int 2771da177e4SLinus Torvalds xfs_growfs_data( 27887444b8cSDave Chinner struct xfs_mount *mp, 27987444b8cSDave Chinner struct xfs_growfs_data *in) 2801da177e4SLinus Torvalds { 28187444b8cSDave Chinner int error = 0; 282743bb465Ssandeen@sandeen.net 283743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 2842451337dSDave Chinner return -EPERM; 285cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 2862451337dSDave Chinner return -EWOULDBLOCK; 28787444b8cSDave Chinner 28887444b8cSDave Chinner /* update imaxpct separately to the physical grow of the filesystem */ 28987444b8cSDave Chinner if (in->imaxpct != mp->m_sb.sb_imax_pct) { 29087444b8cSDave Chinner error = xfs_growfs_imaxpct(mp, in->imaxpct); 29187444b8cSDave Chinner if (error) 29287444b8cSDave Chinner goto out_error; 29387444b8cSDave Chinner } 29487444b8cSDave Chinner 29587444b8cSDave Chinner if (in->newblocks != mp->m_sb.sb_dblocks) { 2961da177e4SLinus Torvalds error = xfs_growfs_data_private(mp, in); 29787444b8cSDave Chinner if (error) 29887444b8cSDave Chinner goto out_error; 29987444b8cSDave Chinner } 30087444b8cSDave Chinner 30187444b8cSDave Chinner /* Post growfs calculations needed to reflect new state in operations */ 30287444b8cSDave Chinner if (mp->m_sb.sb_imax_pct) { 30387444b8cSDave Chinner uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 30487444b8cSDave Chinner do_div(icount, 100); 305ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 30687444b8cSDave Chinner } else 307ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = 0; 30887444b8cSDave Chinner 30983a7f86eSDave Chinner /* Update secondary superblocks now the physical grow has completed */ 310b16817b6SDave Chinner error = xfs_update_secondary_sbs(mp); 31183a7f86eSDave Chinner 31287444b8cSDave Chinner out_error: 31352785112SChristoph Hellwig /* 31452785112SChristoph Hellwig * Increment the generation unconditionally, the error could be from 31552785112SChristoph Hellwig * updating the secondary superblocks, in which case the new size 31652785112SChristoph Hellwig * is live already. 31752785112SChristoph Hellwig */ 31852785112SChristoph Hellwig mp->m_generation++; 319cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3201da177e4SLinus Torvalds return error; 3211da177e4SLinus Torvalds } 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds int 3241da177e4SLinus Torvalds xfs_growfs_log( 3251da177e4SLinus Torvalds xfs_mount_t *mp, 32607aabd9cSGao Xiang struct xfs_growfs_log *in) 3271da177e4SLinus Torvalds { 3281da177e4SLinus Torvalds int error; 329743bb465Ssandeen@sandeen.net 330743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 3312451337dSDave Chinner return -EPERM; 332cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 3332451337dSDave Chinner return -EWOULDBLOCK; 3341da177e4SLinus Torvalds error = xfs_growfs_log_private(mp, in); 335cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3361da177e4SLinus Torvalds return error; 3371da177e4SLinus Torvalds } 3381da177e4SLinus Torvalds 3391da177e4SLinus Torvalds /* 3401da177e4SLinus Torvalds * exported through ioctl XFS_IOC_FSCOUNTS 3411da177e4SLinus Torvalds */ 3421da177e4SLinus Torvalds 34391083269SEric Sandeen void 3441da177e4SLinus Torvalds xfs_fs_counts( 3451da177e4SLinus Torvalds xfs_mount_t *mp, 3461da177e4SLinus Torvalds xfs_fsop_counts_t *cnt) 3471da177e4SLinus Torvalds { 348501ab323SDave Chinner cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 349e88b64eaSDave Chinner cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 3500d485adaSDave Chinner cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 35152548852SDarrick J. Wong mp->m_alloc_set_aside; 352501ab323SDave Chinner 3533685c2a1SEric Sandeen spin_lock(&mp->m_sb_lock); 3541da177e4SLinus Torvalds cnt->freertx = mp->m_sb.sb_frextents; 3553685c2a1SEric Sandeen spin_unlock(&mp->m_sb_lock); 3561da177e4SLinus Torvalds } 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds /* 3591da177e4SLinus Torvalds * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS 3601da177e4SLinus Torvalds * 3611da177e4SLinus Torvalds * xfs_reserve_blocks is called to set m_resblks 3621da177e4SLinus Torvalds * in the in-core mount table. The number of unused reserved blocks 363c41564b5SNathan Scott * is kept in m_resblks_avail. 3641da177e4SLinus Torvalds * 3651da177e4SLinus Torvalds * Reserve the requested number of blocks if available. Otherwise return 3661da177e4SLinus Torvalds * as many as possible to satisfy the request. The actual number 3671da177e4SLinus Torvalds * reserved are returned in outval 3681da177e4SLinus Torvalds * 3691da177e4SLinus Torvalds * A null inval pointer indicates that only the current reserved blocks 3701da177e4SLinus Torvalds * available should be returned no settings are changed. 3711da177e4SLinus Torvalds */ 3721da177e4SLinus Torvalds 3731da177e4SLinus Torvalds int 3741da177e4SLinus Torvalds xfs_reserve_blocks( 3751da177e4SLinus Torvalds xfs_mount_t *mp, 376c8ce540dSDarrick J. Wong uint64_t *inval, 3771da177e4SLinus Torvalds xfs_fsop_resblks_t *outval) 3781da177e4SLinus Torvalds { 379c8ce540dSDarrick J. Wong int64_t lcounter, delta; 380c8ce540dSDarrick J. Wong int64_t fdblks_delta = 0; 381c8ce540dSDarrick J. Wong uint64_t request; 382c8ce540dSDarrick J. Wong int64_t free; 383408fd484SBrian Foster int error = 0; 3841da177e4SLinus Torvalds 3851da177e4SLinus Torvalds /* If inval is null, report current values and return */ 386c8ce540dSDarrick J. Wong if (inval == (uint64_t *)NULL) { 38784e1e99fSDavid Chinner if (!outval) 3882451337dSDave Chinner return -EINVAL; 3891da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 3901da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 391014c2544SJesper Juhl return 0; 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds 3941da177e4SLinus Torvalds request = *inval; 395dbcabad1SDavid Chinner 396dbcabad1SDavid Chinner /* 397408fd484SBrian Foster * With per-cpu counters, this becomes an interesting problem. we need 398408fd484SBrian Foster * to work out if we are freeing or allocation blocks first, then we can 399408fd484SBrian Foster * do the modification as necessary. 400dbcabad1SDavid Chinner * 401408fd484SBrian Foster * We do this under the m_sb_lock so that if we are near ENOSPC, we will 402408fd484SBrian Foster * hold out any changes while we work out what to do. This means that 403408fd484SBrian Foster * the amount of free space can change while we do this, so we need to 404408fd484SBrian Foster * retry if we end up trying to reserve more space than is available. 405dbcabad1SDavid Chinner */ 4063685c2a1SEric Sandeen spin_lock(&mp->m_sb_lock); 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds /* 4091da177e4SLinus Torvalds * If our previous reservation was larger than the current value, 410408fd484SBrian Foster * then move any unused blocks back to the free pool. Modify the resblks 411408fd484SBrian Foster * counters directly since we shouldn't have any problems unreserving 412408fd484SBrian Foster * space. 4131da177e4SLinus Torvalds */ 4141da177e4SLinus Torvalds if (mp->m_resblks > request) { 4151da177e4SLinus Torvalds lcounter = mp->m_resblks_avail - request; 4161da177e4SLinus Torvalds if (lcounter > 0) { /* release unused blocks */ 417dbcabad1SDavid Chinner fdblks_delta = lcounter; 4181da177e4SLinus Torvalds mp->m_resblks_avail -= lcounter; 4191da177e4SLinus Torvalds } 4201da177e4SLinus Torvalds mp->m_resblks = request; 421408fd484SBrian Foster if (fdblks_delta) { 422408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 423408fd484SBrian Foster error = xfs_mod_fdblocks(mp, fdblks_delta, 0); 424408fd484SBrian Foster spin_lock(&mp->m_sb_lock); 425408fd484SBrian Foster } 4264be536deSDavid Chinner 427408fd484SBrian Foster goto out; 428408fd484SBrian Foster } 429408fd484SBrian Foster 430408fd484SBrian Foster /* 431408fd484SBrian Foster * If the request is larger than the current reservation, reserve the 432408fd484SBrian Foster * blocks before we update the reserve counters. Sample m_fdblocks and 433408fd484SBrian Foster * perform a partial reservation if the request exceeds free space. 434408fd484SBrian Foster */ 435408fd484SBrian Foster error = -ENOSPC; 436408fd484SBrian Foster do { 4370d485adaSDave Chinner free = percpu_counter_sum(&mp->m_fdblocks) - 43852548852SDarrick J. Wong mp->m_alloc_set_aside; 439aafe12ceSDarrick J. Wong if (free <= 0) 440408fd484SBrian Foster break; 441dbcabad1SDavid Chinner 4421da177e4SLinus Torvalds delta = request - mp->m_resblks; 4434be536deSDavid Chinner lcounter = free - delta; 444408fd484SBrian Foster if (lcounter < 0) 4451da177e4SLinus Torvalds /* We can't satisfy the request, just get what we can */ 446408fd484SBrian Foster fdblks_delta = free; 447408fd484SBrian Foster else 448408fd484SBrian Foster fdblks_delta = delta; 449408fd484SBrian Foster 450408fd484SBrian Foster /* 451408fd484SBrian Foster * We'll either succeed in getting space from the free block 452408fd484SBrian Foster * count or we'll get an ENOSPC. If we get a ENOSPC, it means 453408fd484SBrian Foster * things changed while we were calculating fdblks_delta and so 454408fd484SBrian Foster * we should try again to see if there is anything left to 455408fd484SBrian Foster * reserve. 456408fd484SBrian Foster * 457408fd484SBrian Foster * Don't set the reserved flag here - we don't want to reserve 458408fd484SBrian Foster * the extra reserve blocks from the reserve..... 459408fd484SBrian Foster */ 460408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 461408fd484SBrian Foster error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); 462408fd484SBrian Foster spin_lock(&mp->m_sb_lock); 463408fd484SBrian Foster } while (error == -ENOSPC); 464408fd484SBrian Foster 465408fd484SBrian Foster /* 466408fd484SBrian Foster * Update the reserve counters if blocks have been successfully 467408fd484SBrian Foster * allocated. 468408fd484SBrian Foster */ 469408fd484SBrian Foster if (!error && fdblks_delta) { 470408fd484SBrian Foster mp->m_resblks += fdblks_delta; 471408fd484SBrian Foster mp->m_resblks_avail += fdblks_delta; 4721da177e4SLinus Torvalds } 473408fd484SBrian Foster 474dbcabad1SDavid Chinner out: 47584e1e99fSDavid Chinner if (outval) { 4761da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 4771da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 47884e1e99fSDavid Chinner } 479dbcabad1SDavid Chinner 480408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 481408fd484SBrian Foster return error; 4821da177e4SLinus Torvalds } 4831da177e4SLinus Torvalds 4841da177e4SLinus Torvalds int 4851da177e4SLinus Torvalds xfs_fs_goingdown( 4861da177e4SLinus Torvalds xfs_mount_t *mp, 487c8ce540dSDarrick J. Wong uint32_t inflags) 4881da177e4SLinus Torvalds { 4891da177e4SLinus Torvalds switch (inflags) { 4901da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_DEFAULT: { 491040f04bdSChristoph Hellwig if (!freeze_bdev(mp->m_super->s_bdev)) { 4927d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 493040f04bdSChristoph Hellwig thaw_bdev(mp->m_super->s_bdev); 4941da177e4SLinus Torvalds } 4951da177e4SLinus Torvalds break; 4961da177e4SLinus Torvalds } 4971da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 4987d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 4991da177e4SLinus Torvalds break; 5001da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: 5017d04a335SNathan Scott xfs_force_shutdown(mp, 5027d04a335SNathan Scott SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); 5031da177e4SLinus Torvalds break; 5041da177e4SLinus Torvalds default: 5052451337dSDave Chinner return -EINVAL; 5061da177e4SLinus Torvalds } 5071da177e4SLinus Torvalds 5081da177e4SLinus Torvalds return 0; 5091da177e4SLinus Torvalds } 5102af51f3aSDave Chinner 5112af51f3aSDave Chinner /* 5122af51f3aSDave Chinner * Force a shutdown of the filesystem instantly while keeping the filesystem 5132af51f3aSDave Chinner * consistent. We don't do an unmount here; just shutdown the shop, make sure 5142af51f3aSDave Chinner * that absolutely nothing persistent happens to this filesystem after this 5152af51f3aSDave Chinner * point. 5162af51f3aSDave Chinner */ 5172af51f3aSDave Chinner void 5182af51f3aSDave Chinner xfs_do_force_shutdown( 51956668a5cSDave Chinner struct xfs_mount *mp, 5202af51f3aSDave Chinner int flags, 5212af51f3aSDave Chinner char *fname, 5222af51f3aSDave Chinner int lnnum) 5232af51f3aSDave Chinner { 52456668a5cSDave Chinner bool logerror = flags & SHUTDOWN_LOG_IO_ERROR; 5252af51f3aSDave Chinner 5262af51f3aSDave Chinner /* 5272af51f3aSDave Chinner * No need to duplicate efforts. 5282af51f3aSDave Chinner */ 5292af51f3aSDave Chinner if (XFS_FORCED_SHUTDOWN(mp) && !logerror) 5302af51f3aSDave Chinner return; 5312af51f3aSDave Chinner 5322af51f3aSDave Chinner /* 5332af51f3aSDave Chinner * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't 5342af51f3aSDave Chinner * queue up anybody new on the log reservations, and wakes up 5352af51f3aSDave Chinner * everybody who's sleeping on log reservations to tell them 5362af51f3aSDave Chinner * the bad news. 5372af51f3aSDave Chinner */ 5382af51f3aSDave Chinner if (xfs_log_force_umount(mp, logerror)) 5392af51f3aSDave Chinner return; 5402af51f3aSDave Chinner 54156668a5cSDave Chinner if (flags & SHUTDOWN_FORCE_UMOUNT) { 54256668a5cSDave Chinner xfs_alert(mp, 54356668a5cSDave Chinner "User initiated shutdown received. Shutting down filesystem"); 54456668a5cSDave Chinner return; 54556668a5cSDave Chinner } 54656668a5cSDave Chinner 54756668a5cSDave Chinner xfs_notice(mp, 54856668a5cSDave Chinner "%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT, 54956668a5cSDave Chinner __func__, flags, lnnum, fname, __return_address); 55056668a5cSDave Chinner 5512af51f3aSDave Chinner if (flags & SHUTDOWN_CORRUPT_INCORE) { 5522af51f3aSDave Chinner xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, 5532af51f3aSDave Chinner "Corruption of in-memory data detected. Shutting down filesystem"); 5542af51f3aSDave Chinner if (XFS_ERRLEVEL_HIGH <= xfs_error_level) 5552af51f3aSDave Chinner xfs_stack_trace(); 55656668a5cSDave Chinner } else if (logerror) { 5572af51f3aSDave Chinner xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, 5582af51f3aSDave Chinner "Log I/O Error Detected. Shutting down filesystem"); 55928d84620SBrian Foster } else { 5602af51f3aSDave Chinner xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, 5612af51f3aSDave Chinner "I/O Error Detected. Shutting down filesystem"); 5622af51f3aSDave Chinner } 56356668a5cSDave Chinner 5642af51f3aSDave Chinner xfs_alert(mp, 56556668a5cSDave Chinner "Please unmount the filesystem and rectify the problem(s)"); 5662af51f3aSDave Chinner } 56784d69619SDarrick J. Wong 56884d69619SDarrick J. Wong /* 56984d69619SDarrick J. Wong * Reserve free space for per-AG metadata. 57084d69619SDarrick J. Wong */ 57184d69619SDarrick J. Wong int 57284d69619SDarrick J. Wong xfs_fs_reserve_ag_blocks( 57384d69619SDarrick J. Wong struct xfs_mount *mp) 57484d69619SDarrick J. Wong { 57584d69619SDarrick J. Wong xfs_agnumber_t agno; 57684d69619SDarrick J. Wong struct xfs_perag *pag; 57784d69619SDarrick J. Wong int error = 0; 57884d69619SDarrick J. Wong int err2; 57984d69619SDarrick J. Wong 58015a268d9SDarrick J. Wong mp->m_finobt_nores = false; 58184d69619SDarrick J. Wong for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 58284d69619SDarrick J. Wong pag = xfs_perag_get(mp, agno); 583ebcbef3aSDarrick J. Wong err2 = xfs_ag_resv_init(pag, NULL); 58484d69619SDarrick J. Wong xfs_perag_put(pag); 58584d69619SDarrick J. Wong if (err2 && !error) 58684d69619SDarrick J. Wong error = err2; 58784d69619SDarrick J. Wong } 58884d69619SDarrick J. Wong 58984d69619SDarrick J. Wong if (error && error != -ENOSPC) { 59084d69619SDarrick J. Wong xfs_warn(mp, 59184d69619SDarrick J. Wong "Error %d reserving per-AG metadata reserve pool.", error); 59284d69619SDarrick J. Wong xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 59384d69619SDarrick J. Wong } 59484d69619SDarrick J. Wong 59584d69619SDarrick J. Wong return error; 59684d69619SDarrick J. Wong } 59784d69619SDarrick J. Wong 59884d69619SDarrick J. Wong /* 59984d69619SDarrick J. Wong * Free space reserved for per-AG metadata. 60084d69619SDarrick J. Wong */ 60184d69619SDarrick J. Wong int 60284d69619SDarrick J. Wong xfs_fs_unreserve_ag_blocks( 60384d69619SDarrick J. Wong struct xfs_mount *mp) 60484d69619SDarrick J. Wong { 60584d69619SDarrick J. Wong xfs_agnumber_t agno; 60684d69619SDarrick J. Wong struct xfs_perag *pag; 60784d69619SDarrick J. Wong int error = 0; 60884d69619SDarrick J. Wong int err2; 60984d69619SDarrick J. Wong 61084d69619SDarrick J. Wong for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 61184d69619SDarrick J. Wong pag = xfs_perag_get(mp, agno); 61284d69619SDarrick J. Wong err2 = xfs_ag_resv_free(pag); 61384d69619SDarrick J. Wong xfs_perag_put(pag); 61484d69619SDarrick J. Wong if (err2 && !error) 61584d69619SDarrick J. Wong error = err2; 61684d69619SDarrick J. Wong } 61784d69619SDarrick J. Wong 61884d69619SDarrick J. Wong if (error) 61984d69619SDarrick J. Wong xfs_warn(mp, 62084d69619SDarrick J. Wong "Error %d freeing per-AG metadata reserve pool.", error); 62184d69619SDarrick J. Wong 62284d69619SDarrick J. Wong return error; 62384d69619SDarrick J. Wong } 624