10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 37b718769SNathan Scott * Copyright (c) 2000-2005 Silicon Graphics, Inc. 47b718769SNathan Scott * All Rights Reserved. 51da177e4SLinus Torvalds */ 61da177e4SLinus Torvalds #include "xfs.h" 7a844f451SNathan Scott #include "xfs_fs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10a4fbe6abSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 121da177e4SLinus Torvalds #include "xfs_sb.h" 131da177e4SLinus Torvalds #include "xfs_mount.h" 14239880efSDave Chinner #include "xfs_trans.h" 151da177e4SLinus Torvalds #include "xfs_error.h" 161da177e4SLinus Torvalds #include "xfs_alloc.h" 171da177e4SLinus Torvalds #include "xfs_fsops.h" 181da177e4SLinus Torvalds #include "xfs_trans_space.h" 19239880efSDave Chinner #include "xfs_log.h" 20b16817b6SDave Chinner #include "xfs_ag.h" 2184d69619SDarrick J. Wong #include "xfs_ag_resv.h" 227f89c838SDarrick J. Wong #include "xfs_trace.h" 231da177e4SLinus Torvalds 241da177e4SLinus Torvalds /* 25c789c83cSGao Xiang * Write new AG headers to disk. Non-transactional, but need to be 26c789c83cSGao Xiang * written and completed prior to the growfs transaction being logged. 27c789c83cSGao Xiang * To do this, we use a delayed write buffer list and wait for 28c789c83cSGao Xiang * submission and IO completion of the list as a whole. This allows the 29c789c83cSGao Xiang * IO subsystem to merge all the AG headers in a single AG into a single 30c789c83cSGao Xiang * IO and hide most of the latency of the IO from us. 31c789c83cSGao Xiang * 32c789c83cSGao Xiang * This also means that if we get an error whilst building the buffer 33c789c83cSGao Xiang * list to write, we can cancel the entire list without having written 34c789c83cSGao Xiang * anything. 35c789c83cSGao Xiang */ 36c789c83cSGao Xiang static int 37c789c83cSGao Xiang xfs_resizefs_init_new_ags( 38c789c83cSGao Xiang struct xfs_trans *tp, 39c789c83cSGao Xiang struct aghdr_init_data *id, 40c789c83cSGao Xiang xfs_agnumber_t oagcount, 41c789c83cSGao Xiang xfs_agnumber_t nagcount, 42c789c83cSGao Xiang xfs_rfsblock_t delta, 43c789c83cSGao Xiang bool *lastag_extended) 44c789c83cSGao Xiang { 45c789c83cSGao Xiang struct xfs_mount *mp = tp->t_mountp; 46c789c83cSGao Xiang xfs_rfsblock_t nb = mp->m_sb.sb_dblocks + delta; 47c789c83cSGao Xiang int error; 48c789c83cSGao Xiang 49c789c83cSGao Xiang *lastag_extended = false; 50c789c83cSGao Xiang 51c789c83cSGao Xiang INIT_LIST_HEAD(&id->buffer_list); 52c789c83cSGao Xiang for (id->agno = nagcount - 1; 53c789c83cSGao Xiang id->agno >= oagcount; 54c789c83cSGao Xiang id->agno--, delta -= id->agsize) { 55c789c83cSGao Xiang 56c789c83cSGao Xiang if (id->agno == nagcount - 1) 57c789c83cSGao Xiang id->agsize = nb - (id->agno * 58c789c83cSGao Xiang (xfs_rfsblock_t)mp->m_sb.sb_agblocks); 59c789c83cSGao Xiang else 60c789c83cSGao Xiang id->agsize = mp->m_sb.sb_agblocks; 61c789c83cSGao Xiang 62c789c83cSGao Xiang error = xfs_ag_init_headers(mp, id); 63c789c83cSGao Xiang if (error) { 64c789c83cSGao Xiang xfs_buf_delwri_cancel(&id->buffer_list); 65c789c83cSGao Xiang return error; 66c789c83cSGao Xiang } 67c789c83cSGao Xiang } 68c789c83cSGao Xiang 69c789c83cSGao Xiang error = xfs_buf_delwri_submit(&id->buffer_list); 70c789c83cSGao Xiang if (error) 71c789c83cSGao Xiang return error; 72c789c83cSGao Xiang 73c789c83cSGao Xiang if (delta) { 74c789c83cSGao Xiang *lastag_extended = true; 75c789c83cSGao Xiang error = xfs_ag_extend_space(mp, tp, id, delta); 76c789c83cSGao Xiang } 77c789c83cSGao Xiang return error; 78c789c83cSGao Xiang } 79c789c83cSGao Xiang 80c789c83cSGao Xiang /* 81b16817b6SDave Chinner * growfs operations 821da177e4SLinus Torvalds */ 831da177e4SLinus Torvalds static int 841da177e4SLinus Torvalds xfs_growfs_data_private( 8507aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 8607aabd9cSGao Xiang struct xfs_growfs_data *in) /* growfs data input struct */ 871da177e4SLinus Torvalds { 88e8222613SDave Chinner struct xfs_buf *bp; 8983a7f86eSDave Chinner int error; 901da177e4SLinus Torvalds xfs_agnumber_t nagcount; 911da177e4SLinus Torvalds xfs_agnumber_t nagimax = 0; 92ce5e1062SGao Xiang xfs_rfsblock_t nb, nb_div, nb_mod; 93fb2fc172SGao Xiang int64_t delta; 94c789c83cSGao Xiang bool lastag_extended; 951da177e4SLinus Torvalds xfs_agnumber_t oagcount; 9607aabd9cSGao Xiang struct xfs_trans *tp; 970410c3bbSDave Chinner struct aghdr_init_data id = {}; 981da177e4SLinus Torvalds 991da177e4SLinus Torvalds nb = in->newblocks; 100fb2fc172SGao Xiang error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); 101fb2fc172SGao Xiang if (error) 1024cc929eeSNathan Scott return error; 103fb2fc172SGao Xiang 104fb2fc172SGao Xiang if (nb > mp->m_sb.sb_dblocks) { 105ba372674SDave Chinner error = xfs_buf_read_uncached(mp->m_ddev_targp, 1061da177e4SLinus Torvalds XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), 107ba372674SDave Chinner XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL); 108ba372674SDave Chinner if (error) 109eab4e633SDave Chinner return error; 1101da177e4SLinus Torvalds xfs_buf_relse(bp); 111fb2fc172SGao Xiang } 1121da177e4SLinus Torvalds 113ce5e1062SGao Xiang nb_div = nb; 114ce5e1062SGao Xiang nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks); 115ce5e1062SGao Xiang nagcount = nb_div + (nb_mod != 0); 1161da177e4SLinus Torvalds if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) { 1171da177e4SLinus Torvalds nagcount--; 118e6da7c9fSEric Sandeen nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks; 1191da177e4SLinus Torvalds } 120ce5e1062SGao Xiang delta = nb - mp->m_sb.sb_dblocks; 121fb2fc172SGao Xiang /* 122fb2fc172SGao Xiang * Reject filesystems with a single AG because they are not 123fb2fc172SGao Xiang * supported, and reject a shrink operation that would cause a 124fb2fc172SGao Xiang * filesystem to become unsupported. 125fb2fc172SGao Xiang */ 126fb2fc172SGao Xiang if (delta < 0 && nagcount < 2) 127fb2fc172SGao Xiang return -EINVAL; 128fb2fc172SGao Xiang 1291da177e4SLinus Torvalds oagcount = mp->m_sb.sb_agcount; 1301c1c6ebcSDave Chinner 1311c1c6ebcSDave Chinner /* allocate the new per-ag structures */ 1321da177e4SLinus Torvalds if (nagcount > oagcount) { 1331c1c6ebcSDave Chinner error = xfs_initialize_perag(mp, nagcount, &nagimax); 1341c1c6ebcSDave Chinner if (error) 1351c1c6ebcSDave Chinner return error; 136fb2fc172SGao Xiang } else if (nagcount < oagcount) { 137fb2fc172SGao Xiang /* TODO: shrinking the entire AGs hasn't yet completed */ 138fb2fc172SGao Xiang return -EINVAL; 1391da177e4SLinus Torvalds } 1401c1c6ebcSDave Chinner 141253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 142fb2fc172SGao Xiang (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, 143fb2fc172SGao Xiang XFS_TRANS_RESERVE, &tp); 144253f4911SChristoph Hellwig if (error) 1451da177e4SLinus Torvalds return error; 1461da177e4SLinus Torvalds 147fb2fc172SGao Xiang if (delta > 0) { 148c789c83cSGao Xiang error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, 149c789c83cSGao Xiang delta, &lastag_extended); 150fb2fc172SGao Xiang } else { 151fb2fc172SGao Xiang static struct ratelimit_state shrink_warning = \ 152fb2fc172SGao Xiang RATELIMIT_STATE_INIT("shrink_warning", 86400 * HZ, 1); 153fb2fc172SGao Xiang ratelimit_set_flags(&shrink_warning, RATELIMIT_MSG_ON_RELEASE); 154fb2fc172SGao Xiang 155fb2fc172SGao Xiang if (__ratelimit(&shrink_warning)) 156fb2fc172SGao Xiang xfs_alert(mp, 157fb2fc172SGao Xiang "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); 158fb2fc172SGao Xiang 159fb2fc172SGao Xiang error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); 160fb2fc172SGao Xiang } 1619aebe805SDave Chinner if (error) 16283a7f86eSDave Chinner goto out_trans_cancel; 1639aebe805SDave Chinner 1641c1c6ebcSDave Chinner /* 1651c1c6ebcSDave Chinner * Update changed superblock fields transactionally. These are not 1661c1c6ebcSDave Chinner * seen by the rest of the world until the transaction commit applies 1671c1c6ebcSDave Chinner * them atomically to the superblock. 1681c1c6ebcSDave Chinner */ 1691da177e4SLinus Torvalds if (nagcount > oagcount) 1701da177e4SLinus Torvalds xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount); 171c789c83cSGao Xiang if (delta) 172c789c83cSGao Xiang xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS, delta); 1730410c3bbSDave Chinner if (id.nfree) 1740410c3bbSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, id.nfree); 175014695c0SGao Xiang 176014695c0SGao Xiang /* 177014695c0SGao Xiang * Sync sb counters now to reflect the updated values. This is 178014695c0SGao Xiang * particularly important for shrink because the write verifier 179014695c0SGao Xiang * will fail if sb_fdblocks is ever larger than sb_dblocks. 180014695c0SGao Xiang */ 181*38c26bfdSDave Chinner if (xfs_has_lazysbcount(mp)) 182014695c0SGao Xiang xfs_log_sb(tp); 183014695c0SGao Xiang 184f8079b85SChristoph Hellwig xfs_trans_set_sync(tp); 18570393313SChristoph Hellwig error = xfs_trans_commit(tp); 1861c1c6ebcSDave Chinner if (error) 1871da177e4SLinus Torvalds return error; 1881c1c6ebcSDave Chinner 1891da177e4SLinus Torvalds /* New allocation groups fully initialized, so update mount struct */ 1901da177e4SLinus Torvalds if (nagimax) 1911da177e4SLinus Torvalds mp->m_maxagi = nagimax; 192055388a3SDave Chinner xfs_set_low_space_thresholds(mp); 19352548852SDarrick J. Wong mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 1941c1c6ebcSDave Chinner 195fb2fc172SGao Xiang if (delta > 0) { 19620e73b00SDarrick J. Wong /* 19720e73b00SDarrick J. Wong * If we expanded the last AG, free the per-AG reservation 19820e73b00SDarrick J. Wong * so we can reinitialize it with the new size. 19920e73b00SDarrick J. Wong */ 200c789c83cSGao Xiang if (lastag_extended) { 20120e73b00SDarrick J. Wong struct xfs_perag *pag; 20220e73b00SDarrick J. Wong 2030410c3bbSDave Chinner pag = xfs_perag_get(mp, id.agno); 20420e73b00SDarrick J. Wong error = xfs_ag_resv_free(pag); 20520e73b00SDarrick J. Wong xfs_perag_put(pag); 20620e73b00SDarrick J. Wong if (error) 20783a7f86eSDave Chinner return error; 20820e73b00SDarrick J. Wong } 20983a7f86eSDave Chinner /* 210fb2fc172SGao Xiang * Reserve AG metadata blocks. ENOSPC here does not mean there 211fb2fc172SGao Xiang * was a growfs failure, just that there still isn't space for 212fb2fc172SGao Xiang * new user data after the grow has been run. 21383a7f86eSDave Chinner */ 21484d69619SDarrick J. Wong error = xfs_fs_reserve_ag_blocks(mp); 21583a7f86eSDave Chinner if (error == -ENOSPC) 21683a7f86eSDave Chinner error = 0; 217fb2fc172SGao Xiang } 21883a7f86eSDave Chinner return error; 21983a7f86eSDave Chinner 22083a7f86eSDave Chinner out_trans_cancel: 22183a7f86eSDave Chinner xfs_trans_cancel(tp); 22283a7f86eSDave Chinner return error; 22383a7f86eSDave Chinner } 22483a7f86eSDave Chinner 22583a7f86eSDave Chinner static int 22683a7f86eSDave Chinner xfs_growfs_log_private( 22707aabd9cSGao Xiang struct xfs_mount *mp, /* mount point for filesystem */ 22807aabd9cSGao Xiang struct xfs_growfs_log *in) /* growfs log input struct */ 22983a7f86eSDave Chinner { 23083a7f86eSDave Chinner xfs_extlen_t nb; 23183a7f86eSDave Chinner 23283a7f86eSDave Chinner nb = in->newblocks; 23383a7f86eSDave Chinner if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES)) 23483a7f86eSDave Chinner return -EINVAL; 23583a7f86eSDave Chinner if (nb == mp->m_sb.sb_logblocks && 23683a7f86eSDave Chinner in->isint == (mp->m_sb.sb_logstart != 0)) 23783a7f86eSDave Chinner return -EINVAL; 23883a7f86eSDave Chinner /* 23983a7f86eSDave Chinner * Moving the log is hard, need new interfaces to sync 24083a7f86eSDave Chinner * the log first, hold off all activity while moving it. 24183a7f86eSDave Chinner * Can have shorter or longer log in the same space, 24283a7f86eSDave Chinner * or transform internal to external log or vice versa. 24383a7f86eSDave Chinner */ 24483a7f86eSDave Chinner return -ENOSYS; 24583a7f86eSDave Chinner } 24683a7f86eSDave Chinner 24783a7f86eSDave Chinner static int 24883a7f86eSDave Chinner xfs_growfs_imaxpct( 24983a7f86eSDave Chinner struct xfs_mount *mp, 25083a7f86eSDave Chinner __u32 imaxpct) 25183a7f86eSDave Chinner { 25283a7f86eSDave Chinner struct xfs_trans *tp; 25383a7f86eSDave Chinner int dpct; 25483a7f86eSDave Chinner int error; 25583a7f86eSDave Chinner 25683a7f86eSDave Chinner if (imaxpct > 100) 25783a7f86eSDave Chinner return -EINVAL; 25883a7f86eSDave Chinner 25983a7f86eSDave Chinner error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 26083a7f86eSDave Chinner XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 26183a7f86eSDave Chinner if (error) 26283a7f86eSDave Chinner return error; 26383a7f86eSDave Chinner 26483a7f86eSDave Chinner dpct = imaxpct - mp->m_sb.sb_imax_pct; 26583a7f86eSDave Chinner xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct); 26683a7f86eSDave Chinner xfs_trans_set_sync(tp); 26783a7f86eSDave Chinner return xfs_trans_commit(tp); 26883a7f86eSDave Chinner } 26983a7f86eSDave Chinner 27083a7f86eSDave Chinner /* 2711da177e4SLinus Torvalds * protected versions of growfs function acquire and release locks on the mount 2721da177e4SLinus Torvalds * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG, 2731da177e4SLinus Torvalds * XFS_IOC_FSGROWFSRT 2741da177e4SLinus Torvalds */ 2751da177e4SLinus Torvalds int 2761da177e4SLinus Torvalds xfs_growfs_data( 27787444b8cSDave Chinner struct xfs_mount *mp, 27887444b8cSDave Chinner struct xfs_growfs_data *in) 2791da177e4SLinus Torvalds { 28087444b8cSDave Chinner int error = 0; 281743bb465Ssandeen@sandeen.net 282743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 2832451337dSDave Chinner return -EPERM; 284cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 2852451337dSDave Chinner return -EWOULDBLOCK; 28687444b8cSDave Chinner 28787444b8cSDave Chinner /* update imaxpct separately to the physical grow of the filesystem */ 28887444b8cSDave Chinner if (in->imaxpct != mp->m_sb.sb_imax_pct) { 28987444b8cSDave Chinner error = xfs_growfs_imaxpct(mp, in->imaxpct); 29087444b8cSDave Chinner if (error) 29187444b8cSDave Chinner goto out_error; 29287444b8cSDave Chinner } 29387444b8cSDave Chinner 29487444b8cSDave Chinner if (in->newblocks != mp->m_sb.sb_dblocks) { 2951da177e4SLinus Torvalds error = xfs_growfs_data_private(mp, in); 29687444b8cSDave Chinner if (error) 29787444b8cSDave Chinner goto out_error; 29887444b8cSDave Chinner } 29987444b8cSDave Chinner 30087444b8cSDave Chinner /* Post growfs calculations needed to reflect new state in operations */ 30187444b8cSDave Chinner if (mp->m_sb.sb_imax_pct) { 30287444b8cSDave Chinner uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 30387444b8cSDave Chinner do_div(icount, 100); 304ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); 30587444b8cSDave Chinner } else 306ef325959SDarrick J. Wong M_IGEO(mp)->maxicount = 0; 30787444b8cSDave Chinner 30883a7f86eSDave Chinner /* Update secondary superblocks now the physical grow has completed */ 309b16817b6SDave Chinner error = xfs_update_secondary_sbs(mp); 31083a7f86eSDave Chinner 31187444b8cSDave Chinner out_error: 31252785112SChristoph Hellwig /* 31352785112SChristoph Hellwig * Increment the generation unconditionally, the error could be from 31452785112SChristoph Hellwig * updating the secondary superblocks, in which case the new size 31552785112SChristoph Hellwig * is live already. 31652785112SChristoph Hellwig */ 31752785112SChristoph Hellwig mp->m_generation++; 318cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3191da177e4SLinus Torvalds return error; 3201da177e4SLinus Torvalds } 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds int 3231da177e4SLinus Torvalds xfs_growfs_log( 3241da177e4SLinus Torvalds xfs_mount_t *mp, 32507aabd9cSGao Xiang struct xfs_growfs_log *in) 3261da177e4SLinus Torvalds { 3271da177e4SLinus Torvalds int error; 328743bb465Ssandeen@sandeen.net 329743bb465Ssandeen@sandeen.net if (!capable(CAP_SYS_ADMIN)) 3302451337dSDave Chinner return -EPERM; 331cc92e7acSChristoph Hellwig if (!mutex_trylock(&mp->m_growlock)) 3322451337dSDave Chinner return -EWOULDBLOCK; 3331da177e4SLinus Torvalds error = xfs_growfs_log_private(mp, in); 334cc92e7acSChristoph Hellwig mutex_unlock(&mp->m_growlock); 3351da177e4SLinus Torvalds return error; 3361da177e4SLinus Torvalds } 3371da177e4SLinus Torvalds 3381da177e4SLinus Torvalds /* 3391da177e4SLinus Torvalds * exported through ioctl XFS_IOC_FSCOUNTS 3401da177e4SLinus Torvalds */ 3411da177e4SLinus Torvalds 34291083269SEric Sandeen void 3431da177e4SLinus Torvalds xfs_fs_counts( 3441da177e4SLinus Torvalds xfs_mount_t *mp, 3451da177e4SLinus Torvalds xfs_fsop_counts_t *cnt) 3461da177e4SLinus Torvalds { 347501ab323SDave Chinner cnt->allocino = percpu_counter_read_positive(&mp->m_icount); 348e88b64eaSDave Chinner cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); 3490d485adaSDave Chinner cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - 35052548852SDarrick J. Wong mp->m_alloc_set_aside; 351501ab323SDave Chinner 3523685c2a1SEric Sandeen spin_lock(&mp->m_sb_lock); 3531da177e4SLinus Torvalds cnt->freertx = mp->m_sb.sb_frextents; 3543685c2a1SEric Sandeen spin_unlock(&mp->m_sb_lock); 3551da177e4SLinus Torvalds } 3561da177e4SLinus Torvalds 3571da177e4SLinus Torvalds /* 3581da177e4SLinus Torvalds * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS 3591da177e4SLinus Torvalds * 3601da177e4SLinus Torvalds * xfs_reserve_blocks is called to set m_resblks 3611da177e4SLinus Torvalds * in the in-core mount table. The number of unused reserved blocks 362c41564b5SNathan Scott * is kept in m_resblks_avail. 3631da177e4SLinus Torvalds * 3641da177e4SLinus Torvalds * Reserve the requested number of blocks if available. Otherwise return 3651da177e4SLinus Torvalds * as many as possible to satisfy the request. The actual number 3661da177e4SLinus Torvalds * reserved are returned in outval 3671da177e4SLinus Torvalds * 3681da177e4SLinus Torvalds * A null inval pointer indicates that only the current reserved blocks 3691da177e4SLinus Torvalds * available should be returned no settings are changed. 3701da177e4SLinus Torvalds */ 3711da177e4SLinus Torvalds 3721da177e4SLinus Torvalds int 3731da177e4SLinus Torvalds xfs_reserve_blocks( 3741da177e4SLinus Torvalds xfs_mount_t *mp, 375c8ce540dSDarrick J. Wong uint64_t *inval, 3761da177e4SLinus Torvalds xfs_fsop_resblks_t *outval) 3771da177e4SLinus Torvalds { 378c8ce540dSDarrick J. Wong int64_t lcounter, delta; 379c8ce540dSDarrick J. Wong int64_t fdblks_delta = 0; 380c8ce540dSDarrick J. Wong uint64_t request; 381c8ce540dSDarrick J. Wong int64_t free; 382408fd484SBrian Foster int error = 0; 3831da177e4SLinus Torvalds 3841da177e4SLinus Torvalds /* If inval is null, report current values and return */ 385c8ce540dSDarrick J. Wong if (inval == (uint64_t *)NULL) { 38684e1e99fSDavid Chinner if (!outval) 3872451337dSDave Chinner return -EINVAL; 3881da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 3891da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 390014c2544SJesper Juhl return 0; 3911da177e4SLinus Torvalds } 3921da177e4SLinus Torvalds 3931da177e4SLinus Torvalds request = *inval; 394dbcabad1SDavid Chinner 395dbcabad1SDavid Chinner /* 396408fd484SBrian Foster * With per-cpu counters, this becomes an interesting problem. we need 397408fd484SBrian Foster * to work out if we are freeing or allocation blocks first, then we can 398408fd484SBrian Foster * do the modification as necessary. 399dbcabad1SDavid Chinner * 400408fd484SBrian Foster * We do this under the m_sb_lock so that if we are near ENOSPC, we will 401408fd484SBrian Foster * hold out any changes while we work out what to do. This means that 402408fd484SBrian Foster * the amount of free space can change while we do this, so we need to 403408fd484SBrian Foster * retry if we end up trying to reserve more space than is available. 404dbcabad1SDavid Chinner */ 4053685c2a1SEric Sandeen spin_lock(&mp->m_sb_lock); 4061da177e4SLinus Torvalds 4071da177e4SLinus Torvalds /* 4081da177e4SLinus Torvalds * If our previous reservation was larger than the current value, 409408fd484SBrian Foster * then move any unused blocks back to the free pool. Modify the resblks 410408fd484SBrian Foster * counters directly since we shouldn't have any problems unreserving 411408fd484SBrian Foster * space. 4121da177e4SLinus Torvalds */ 4131da177e4SLinus Torvalds if (mp->m_resblks > request) { 4141da177e4SLinus Torvalds lcounter = mp->m_resblks_avail - request; 4151da177e4SLinus Torvalds if (lcounter > 0) { /* release unused blocks */ 416dbcabad1SDavid Chinner fdblks_delta = lcounter; 4171da177e4SLinus Torvalds mp->m_resblks_avail -= lcounter; 4181da177e4SLinus Torvalds } 4191da177e4SLinus Torvalds mp->m_resblks = request; 420408fd484SBrian Foster if (fdblks_delta) { 421408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 422408fd484SBrian Foster error = xfs_mod_fdblocks(mp, fdblks_delta, 0); 423408fd484SBrian Foster spin_lock(&mp->m_sb_lock); 424408fd484SBrian Foster } 4254be536deSDavid Chinner 426408fd484SBrian Foster goto out; 427408fd484SBrian Foster } 428408fd484SBrian Foster 429408fd484SBrian Foster /* 430408fd484SBrian Foster * If the request is larger than the current reservation, reserve the 431408fd484SBrian Foster * blocks before we update the reserve counters. Sample m_fdblocks and 432408fd484SBrian Foster * perform a partial reservation if the request exceeds free space. 433408fd484SBrian Foster */ 434408fd484SBrian Foster error = -ENOSPC; 435408fd484SBrian Foster do { 4360d485adaSDave Chinner free = percpu_counter_sum(&mp->m_fdblocks) - 43752548852SDarrick J. Wong mp->m_alloc_set_aside; 438aafe12ceSDarrick J. Wong if (free <= 0) 439408fd484SBrian Foster break; 440dbcabad1SDavid Chinner 4411da177e4SLinus Torvalds delta = request - mp->m_resblks; 4424be536deSDavid Chinner lcounter = free - delta; 443408fd484SBrian Foster if (lcounter < 0) 4441da177e4SLinus Torvalds /* We can't satisfy the request, just get what we can */ 445408fd484SBrian Foster fdblks_delta = free; 446408fd484SBrian Foster else 447408fd484SBrian Foster fdblks_delta = delta; 448408fd484SBrian Foster 449408fd484SBrian Foster /* 450408fd484SBrian Foster * We'll either succeed in getting space from the free block 451408fd484SBrian Foster * count or we'll get an ENOSPC. If we get a ENOSPC, it means 452408fd484SBrian Foster * things changed while we were calculating fdblks_delta and so 453408fd484SBrian Foster * we should try again to see if there is anything left to 454408fd484SBrian Foster * reserve. 455408fd484SBrian Foster * 456408fd484SBrian Foster * Don't set the reserved flag here - we don't want to reserve 457408fd484SBrian Foster * the extra reserve blocks from the reserve..... 458408fd484SBrian Foster */ 459408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 460408fd484SBrian Foster error = xfs_mod_fdblocks(mp, -fdblks_delta, 0); 461408fd484SBrian Foster spin_lock(&mp->m_sb_lock); 462408fd484SBrian Foster } while (error == -ENOSPC); 463408fd484SBrian Foster 464408fd484SBrian Foster /* 465408fd484SBrian Foster * Update the reserve counters if blocks have been successfully 466408fd484SBrian Foster * allocated. 467408fd484SBrian Foster */ 468408fd484SBrian Foster if (!error && fdblks_delta) { 469408fd484SBrian Foster mp->m_resblks += fdblks_delta; 470408fd484SBrian Foster mp->m_resblks_avail += fdblks_delta; 4711da177e4SLinus Torvalds } 472408fd484SBrian Foster 473dbcabad1SDavid Chinner out: 47484e1e99fSDavid Chinner if (outval) { 4751da177e4SLinus Torvalds outval->resblks = mp->m_resblks; 4761da177e4SLinus Torvalds outval->resblks_avail = mp->m_resblks_avail; 47784e1e99fSDavid Chinner } 478dbcabad1SDavid Chinner 479408fd484SBrian Foster spin_unlock(&mp->m_sb_lock); 480408fd484SBrian Foster return error; 4811da177e4SLinus Torvalds } 4821da177e4SLinus Torvalds 4831da177e4SLinus Torvalds int 4841da177e4SLinus Torvalds xfs_fs_goingdown( 4851da177e4SLinus Torvalds xfs_mount_t *mp, 486c8ce540dSDarrick J. Wong uint32_t inflags) 4871da177e4SLinus Torvalds { 4881da177e4SLinus Torvalds switch (inflags) { 4891da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_DEFAULT: { 490040f04bdSChristoph Hellwig if (!freeze_bdev(mp->m_super->s_bdev)) { 4917d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 492040f04bdSChristoph Hellwig thaw_bdev(mp->m_super->s_bdev); 4931da177e4SLinus Torvalds } 4941da177e4SLinus Torvalds break; 4951da177e4SLinus Torvalds } 4961da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 4977d04a335SNathan Scott xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 4981da177e4SLinus Torvalds break; 4991da177e4SLinus Torvalds case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH: 5007d04a335SNathan Scott xfs_force_shutdown(mp, 5017d04a335SNathan Scott SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR); 5021da177e4SLinus Torvalds break; 5031da177e4SLinus Torvalds default: 5042451337dSDave Chinner return -EINVAL; 5051da177e4SLinus Torvalds } 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds return 0; 5081da177e4SLinus Torvalds } 5092af51f3aSDave Chinner 5102af51f3aSDave Chinner /* 5112af51f3aSDave Chinner * Force a shutdown of the filesystem instantly while keeping the filesystem 5122af51f3aSDave Chinner * consistent. We don't do an unmount here; just shutdown the shop, make sure 5132af51f3aSDave Chinner * that absolutely nothing persistent happens to this filesystem after this 5142af51f3aSDave Chinner * point. 515b36d4651SDave Chinner * 516b36d4651SDave Chinner * The shutdown state change is atomic, resulting in the first and only the 517b36d4651SDave Chinner * first shutdown call processing the shutdown. This means we only shutdown the 518b36d4651SDave Chinner * log once as it requires, and we don't spam the logs when multiple concurrent 519b36d4651SDave Chinner * shutdowns race to set the shutdown flags. 5202af51f3aSDave Chinner */ 5212af51f3aSDave Chinner void 5222af51f3aSDave Chinner xfs_do_force_shutdown( 52356668a5cSDave Chinner struct xfs_mount *mp, 5242af51f3aSDave Chinner int flags, 5252af51f3aSDave Chinner char *fname, 5262af51f3aSDave Chinner int lnnum) 5272af51f3aSDave Chinner { 528b36d4651SDave Chinner int tag; 529b36d4651SDave Chinner const char *why; 5302af51f3aSDave Chinner 531b36d4651SDave Chinner spin_lock(&mp->m_sb_lock); 532b36d4651SDave Chinner if (XFS_FORCED_SHUTDOWN(mp)) { 533b36d4651SDave Chinner spin_unlock(&mp->m_sb_lock); 53456668a5cSDave Chinner return; 53556668a5cSDave Chinner } 536b36d4651SDave Chinner mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 537b36d4651SDave Chinner if (mp->m_sb_bp) 538b36d4651SDave Chinner mp->m_sb_bp->b_flags |= XBF_DONE; 539b36d4651SDave Chinner spin_unlock(&mp->m_sb_lock); 54056668a5cSDave Chinner 541b36d4651SDave Chinner if (flags & SHUTDOWN_FORCE_UMOUNT) 542b36d4651SDave Chinner xfs_alert(mp, "User initiated shutdown received."); 543b36d4651SDave Chinner 544b36d4651SDave Chinner if (xlog_force_shutdown(mp->m_log, flags)) { 545b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_LOGERROR; 546b36d4651SDave Chinner why = "Log I/O Error"; 547b36d4651SDave Chinner } else if (flags & SHUTDOWN_CORRUPT_INCORE) { 548b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_CORRUPT; 549b36d4651SDave Chinner why = "Corruption of in-memory data"; 55028d84620SBrian Foster } else { 551b36d4651SDave Chinner tag = XFS_PTAG_SHUTDOWN_IOERROR; 552b36d4651SDave Chinner why = "Metadata I/O Error"; 5532af51f3aSDave Chinner } 55456668a5cSDave Chinner 5557f89c838SDarrick J. Wong trace_xfs_force_shutdown(mp, tag, flags, fname, lnnum); 5567f89c838SDarrick J. Wong 557b36d4651SDave Chinner xfs_alert_tag(mp, tag, 558b36d4651SDave Chinner "%s (0x%x) detected at %pS (%s:%d). Shutting down filesystem.", 559b36d4651SDave Chinner why, flags, __return_address, fname, lnnum); 5602af51f3aSDave Chinner xfs_alert(mp, 56156668a5cSDave Chinner "Please unmount the filesystem and rectify the problem(s)"); 562b36d4651SDave Chinner if (xfs_error_level >= XFS_ERRLEVEL_HIGH) 563b36d4651SDave Chinner xfs_stack_trace(); 5642af51f3aSDave Chinner } 56584d69619SDarrick J. Wong 56684d69619SDarrick J. Wong /* 56784d69619SDarrick J. Wong * Reserve free space for per-AG metadata. 56884d69619SDarrick J. Wong */ 56984d69619SDarrick J. Wong int 57084d69619SDarrick J. Wong xfs_fs_reserve_ag_blocks( 57184d69619SDarrick J. Wong struct xfs_mount *mp) 57284d69619SDarrick J. Wong { 57384d69619SDarrick J. Wong xfs_agnumber_t agno; 57484d69619SDarrick J. Wong struct xfs_perag *pag; 57584d69619SDarrick J. Wong int error = 0; 57684d69619SDarrick J. Wong int err2; 57784d69619SDarrick J. Wong 57815a268d9SDarrick J. Wong mp->m_finobt_nores = false; 579f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 580ebcbef3aSDarrick J. Wong err2 = xfs_ag_resv_init(pag, NULL); 58184d69619SDarrick J. Wong if (err2 && !error) 58284d69619SDarrick J. Wong error = err2; 58384d69619SDarrick J. Wong } 58484d69619SDarrick J. Wong 58584d69619SDarrick J. Wong if (error && error != -ENOSPC) { 58684d69619SDarrick J. Wong xfs_warn(mp, 58784d69619SDarrick J. Wong "Error %d reserving per-AG metadata reserve pool.", error); 58884d69619SDarrick J. Wong xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 58984d69619SDarrick J. Wong } 59084d69619SDarrick J. Wong 59184d69619SDarrick J. Wong return error; 59284d69619SDarrick J. Wong } 59384d69619SDarrick J. Wong 59484d69619SDarrick J. Wong /* 59584d69619SDarrick J. Wong * Free space reserved for per-AG metadata. 59684d69619SDarrick J. Wong */ 59784d69619SDarrick J. Wong int 59884d69619SDarrick J. Wong xfs_fs_unreserve_ag_blocks( 59984d69619SDarrick J. Wong struct xfs_mount *mp) 60084d69619SDarrick J. Wong { 60184d69619SDarrick J. Wong xfs_agnumber_t agno; 60284d69619SDarrick J. Wong struct xfs_perag *pag; 60384d69619SDarrick J. Wong int error = 0; 60484d69619SDarrick J. Wong int err2; 60584d69619SDarrick J. Wong 606f250eedcSDave Chinner for_each_perag(mp, agno, pag) { 60784d69619SDarrick J. Wong err2 = xfs_ag_resv_free(pag); 60884d69619SDarrick J. Wong if (err2 && !error) 60984d69619SDarrick J. Wong error = err2; 61084d69619SDarrick J. Wong } 61184d69619SDarrick J. Wong 61284d69619SDarrick J. Wong if (error) 61384d69619SDarrick J. Wong xfs_warn(mp, 61484d69619SDarrick J. Wong "Error %d freeing per-AG metadata reserve pool.", error); 61584d69619SDarrick J. Wong 61684d69619SDarrick J. Wong return error; 61784d69619SDarrick J. Wong } 618