1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_alloc.h" 16 #include "xfs_errortag.h" 17 #include "xfs_error.h" 18 #include "xfs_trace.h" 19 #include "xfs_cksum.h" 20 #include "xfs_trans.h" 21 #include "xfs_bit.h" 22 #include "xfs_bmap.h" 23 #include "xfs_bmap_btree.h" 24 #include "xfs_ag_resv.h" 25 #include "xfs_trans_space.h" 26 #include "xfs_rmap_btree.h" 27 #include "xfs_btree.h" 28 #include "xfs_refcount_btree.h" 29 #include "xfs_ialloc_btree.h" 30 31 /* 32 * Per-AG Block Reservations 33 * 34 * For some kinds of allocation group metadata structures, it is advantageous 35 * to reserve a small number of blocks in each AG so that future expansions of 36 * that data structure do not encounter ENOSPC because errors during a btree 37 * split cause the filesystem to go offline. 38 * 39 * Prior to the introduction of reflink, this wasn't an issue because the free 40 * space btrees maintain a reserve of space (the AGFL) to handle any expansion 41 * that may be necessary; and allocations of other metadata (inodes, BMBT, 42 * dir/attr) aren't restricted to a single AG. However, with reflink it is 43 * possible to allocate all the space in an AG, have subsequent reflink/CoW 44 * activity expand the refcount btree, and discover that there's no space left 45 * to handle that expansion. Since we can calculate the maximum size of the 46 * refcount btree, we can reserve space for it and avoid ENOSPC. 47 * 48 * Handling per-AG reservations consists of three changes to the allocator's 49 * behavior: First, because these reservations are always needed, we decrease 50 * the ag_max_usable counter to reflect the size of the AG after the reserved 51 * blocks are taken. Second, the reservations must be reflected in the 52 * fdblocks count to maintain proper accounting. Third, each AG must maintain 53 * its own reserved block counter so that we can calculate the amount of space 54 * that must remain free to maintain the reservations. Fourth, the "remaining 55 * reserved blocks" count must be used when calculating the length of the 56 * longest free extent in an AG and to clamp maxlen in the per-AG allocation 57 * functions. In other words, we maintain a virtual allocation via in-core 58 * accounting tricks so that we don't have to clean up after a crash. :) 59 * 60 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type 61 * values via struct xfs_alloc_arg or directly to the xfs_free_extent 62 * function. It might seem a little funny to maintain a reservoir of blocks 63 * to feed another reservoir, but the AGFL only holds enough blocks to get 64 * through the next transaction. The per-AG reservation is to ensure (we 65 * hope) that each AG never runs out of blocks. Each data structure wanting 66 * to use the reservation system should update ask/used in xfs_ag_resv_init. 67 */ 68 69 /* 70 * Are we critically low on blocks? For now we'll define that as the number 71 * of blocks we can get our hands on being less than 10% of what we reserved 72 * or less than some arbitrary number (maximum btree height). 73 */ 74 bool 75 xfs_ag_resv_critical( 76 struct xfs_perag *pag, 77 enum xfs_ag_resv_type type) 78 { 79 xfs_extlen_t avail; 80 xfs_extlen_t orig; 81 82 switch (type) { 83 case XFS_AG_RESV_METADATA: 84 avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved; 85 orig = pag->pag_meta_resv.ar_asked; 86 break; 87 case XFS_AG_RESV_RMAPBT: 88 avail = pag->pagf_freeblks + pag->pagf_flcount - 89 pag->pag_meta_resv.ar_reserved; 90 orig = pag->pag_rmapbt_resv.ar_asked; 91 break; 92 default: 93 ASSERT(0); 94 return false; 95 } 96 97 trace_xfs_ag_resv_critical(pag, type, avail); 98 99 /* Critically low if less than 10% or max btree height remains. */ 100 return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, 101 pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); 102 } 103 104 /* 105 * How many blocks are reserved but not used, and therefore must not be 106 * allocated away? 107 */ 108 xfs_extlen_t 109 xfs_ag_resv_needed( 110 struct xfs_perag *pag, 111 enum xfs_ag_resv_type type) 112 { 113 xfs_extlen_t len; 114 115 len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved; 116 switch (type) { 117 case XFS_AG_RESV_METADATA: 118 case XFS_AG_RESV_RMAPBT: 119 len -= xfs_perag_resv(pag, type)->ar_reserved; 120 break; 121 case XFS_AG_RESV_NONE: 122 /* empty */ 123 break; 124 default: 125 ASSERT(0); 126 } 127 128 trace_xfs_ag_resv_needed(pag, type, len); 129 130 return len; 131 } 132 133 /* Clean out a reservation */ 134 static int 135 __xfs_ag_resv_free( 136 struct xfs_perag *pag, 137 enum xfs_ag_resv_type type) 138 { 139 struct xfs_ag_resv *resv; 140 xfs_extlen_t oldresv; 141 int error; 142 143 trace_xfs_ag_resv_free(pag, type, 0); 144 145 resv = xfs_perag_resv(pag, type); 146 if (pag->pag_agno == 0) 147 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 148 /* 149 * RMAPBT blocks come from the AGFL and AGFL blocks are always 150 * considered "free", so whatever was reserved at mount time must be 151 * given back at umount. 152 */ 153 if (type == XFS_AG_RESV_RMAPBT) 154 oldresv = resv->ar_orig_reserved; 155 else 156 oldresv = resv->ar_reserved; 157 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); 158 resv->ar_reserved = 0; 159 resv->ar_asked = 0; 160 161 if (error) 162 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, 163 error, _RET_IP_); 164 return error; 165 } 166 167 /* Free a per-AG reservation. */ 168 int 169 xfs_ag_resv_free( 170 struct xfs_perag *pag) 171 { 172 int error; 173 int err2; 174 175 error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT); 176 err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); 177 if (err2 && !error) 178 error = err2; 179 return error; 180 } 181 182 static int 183 __xfs_ag_resv_init( 184 struct xfs_perag *pag, 185 enum xfs_ag_resv_type type, 186 xfs_extlen_t ask, 187 xfs_extlen_t used) 188 { 189 struct xfs_mount *mp = pag->pag_mount; 190 struct xfs_ag_resv *resv; 191 int error; 192 xfs_extlen_t reserved; 193 194 if (used > ask) 195 ask = used; 196 reserved = ask - used; 197 198 error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); 199 if (error) { 200 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 201 error, _RET_IP_); 202 xfs_warn(mp, 203 "Per-AG reservation for AG %u failed. Filesystem may run out of space.", 204 pag->pag_agno); 205 return error; 206 } 207 208 /* 209 * Reduce the maximum per-AG allocation length by however much we're 210 * trying to reserve for an AG. Since this is a filesystem-wide 211 * counter, we only make the adjustment for AG 0. This assumes that 212 * there aren't any AGs hungrier for per-AG reservation than AG 0. 213 */ 214 if (pag->pag_agno == 0) 215 mp->m_ag_max_usable -= ask; 216 217 resv = xfs_perag_resv(pag, type); 218 resv->ar_asked = ask; 219 resv->ar_reserved = resv->ar_orig_reserved = reserved; 220 221 trace_xfs_ag_resv_init(pag, type, ask); 222 return 0; 223 } 224 225 /* Create a per-AG block reservation. */ 226 int 227 xfs_ag_resv_init( 228 struct xfs_perag *pag) 229 { 230 struct xfs_mount *mp = pag->pag_mount; 231 xfs_agnumber_t agno = pag->pag_agno; 232 xfs_extlen_t ask; 233 xfs_extlen_t used; 234 int error = 0; 235 236 /* Create the metadata reservation. */ 237 if (pag->pag_meta_resv.ar_asked == 0) { 238 ask = used = 0; 239 240 error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); 241 if (error) 242 goto out; 243 244 error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); 245 if (error) 246 goto out; 247 248 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 249 ask, used); 250 if (error) { 251 /* 252 * Because we didn't have per-AG reservations when the 253 * finobt feature was added we might not be able to 254 * reserve all needed blocks. Warn and fall back to the 255 * old and potentially buggy code in that case, but 256 * ensure we do have the reservation for the refcountbt. 257 */ 258 ask = used = 0; 259 260 mp->m_inotbt_nores = true; 261 262 error = xfs_refcountbt_calc_reserves(mp, agno, &ask, 263 &used); 264 if (error) 265 goto out; 266 267 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 268 ask, used); 269 if (error) 270 goto out; 271 } 272 } 273 274 /* Create the RMAPBT metadata reservation */ 275 if (pag->pag_rmapbt_resv.ar_asked == 0) { 276 ask = used = 0; 277 278 error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); 279 if (error) 280 goto out; 281 282 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used); 283 if (error) 284 goto out; 285 } 286 287 #ifdef DEBUG 288 /* need to read in the AGF for the ASSERT below to work */ 289 error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); 290 if (error) 291 return error; 292 293 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 294 xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= 295 pag->pagf_freeblks + pag->pagf_flcount); 296 #endif 297 out: 298 return error; 299 } 300 301 /* Allocate a block from the reservation. */ 302 void 303 xfs_ag_resv_alloc_extent( 304 struct xfs_perag *pag, 305 enum xfs_ag_resv_type type, 306 struct xfs_alloc_arg *args) 307 { 308 struct xfs_ag_resv *resv; 309 xfs_extlen_t len; 310 uint field; 311 312 trace_xfs_ag_resv_alloc_extent(pag, type, args->len); 313 314 switch (type) { 315 case XFS_AG_RESV_AGFL: 316 return; 317 case XFS_AG_RESV_METADATA: 318 case XFS_AG_RESV_RMAPBT: 319 resv = xfs_perag_resv(pag, type); 320 break; 321 default: 322 ASSERT(0); 323 /* fall through */ 324 case XFS_AG_RESV_NONE: 325 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 326 XFS_TRANS_SB_FDBLOCKS; 327 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len); 328 return; 329 } 330 331 len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); 332 resv->ar_reserved -= len; 333 if (type == XFS_AG_RESV_RMAPBT) 334 return; 335 /* Allocations of reserved blocks only need on-disk sb updates... */ 336 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); 337 /* ...but non-reserved blocks need in-core and on-disk updates. */ 338 if (args->len > len) 339 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS, 340 -((int64_t)args->len - len)); 341 } 342 343 /* Free a block to the reservation. */ 344 void 345 xfs_ag_resv_free_extent( 346 struct xfs_perag *pag, 347 enum xfs_ag_resv_type type, 348 struct xfs_trans *tp, 349 xfs_extlen_t len) 350 { 351 xfs_extlen_t leftover; 352 struct xfs_ag_resv *resv; 353 354 trace_xfs_ag_resv_free_extent(pag, type, len); 355 356 switch (type) { 357 case XFS_AG_RESV_AGFL: 358 return; 359 case XFS_AG_RESV_METADATA: 360 case XFS_AG_RESV_RMAPBT: 361 resv = xfs_perag_resv(pag, type); 362 break; 363 default: 364 ASSERT(0); 365 /* fall through */ 366 case XFS_AG_RESV_NONE: 367 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); 368 return; 369 } 370 371 leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); 372 resv->ar_reserved += leftover; 373 if (type == XFS_AG_RESV_RMAPBT) 374 return; 375 /* Freeing into the reserved pool only requires on-disk update... */ 376 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); 377 /* ...but freeing beyond that requires in-core and on-disk update. */ 378 if (len > leftover) 379 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); 380 } 381