1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_alloc.h" 30 #include "xfs_errortag.h" 31 #include "xfs_error.h" 32 #include "xfs_trace.h" 33 #include "xfs_cksum.h" 34 #include "xfs_trans.h" 35 #include "xfs_bit.h" 36 #include "xfs_bmap.h" 37 #include "xfs_bmap_btree.h" 38 #include "xfs_ag_resv.h" 39 #include "xfs_trans_space.h" 40 #include "xfs_rmap_btree.h" 41 #include "xfs_btree.h" 42 #include "xfs_refcount_btree.h" 43 #include "xfs_ialloc_btree.h" 44 45 /* 46 * Per-AG Block Reservations 47 * 48 * For some kinds of allocation group metadata structures, it is advantageous 49 * to reserve a small number of blocks in each AG so that future expansions of 50 * that data structure do not encounter ENOSPC because errors during a btree 51 * split cause the filesystem to go offline. 52 * 53 * Prior to the introduction of reflink, this wasn't an issue because the free 54 * space btrees maintain a reserve of space (the AGFL) to handle any expansion 55 * that may be necessary; and allocations of other metadata (inodes, BMBT, 56 * dir/attr) aren't restricted to a single AG. However, with reflink it is 57 * possible to allocate all the space in an AG, have subsequent reflink/CoW 58 * activity expand the refcount btree, and discover that there's no space left 59 * to handle that expansion. Since we can calculate the maximum size of the 60 * refcount btree, we can reserve space for it and avoid ENOSPC. 61 * 62 * Handling per-AG reservations consists of three changes to the allocator's 63 * behavior: First, because these reservations are always needed, we decrease 64 * the ag_max_usable counter to reflect the size of the AG after the reserved 65 * blocks are taken. Second, the reservations must be reflected in the 66 * fdblocks count to maintain proper accounting. Third, each AG must maintain 67 * its own reserved block counter so that we can calculate the amount of space 68 * that must remain free to maintain the reservations. Fourth, the "remaining 69 * reserved blocks" count must be used when calculating the length of the 70 * longest free extent in an AG and to clamp maxlen in the per-AG allocation 71 * functions. In other words, we maintain a virtual allocation via in-core 72 * accounting tricks so that we don't have to clean up after a crash. :) 73 * 74 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type 75 * values via struct xfs_alloc_arg or directly to the xfs_free_extent 76 * function. It might seem a little funny to maintain a reservoir of blocks 77 * to feed another reservoir, but the AGFL only holds enough blocks to get 78 * through the next transaction. The per-AG reservation is to ensure (we 79 * hope) that each AG never runs out of blocks. Each data structure wanting 80 * to use the reservation system should update ask/used in xfs_ag_resv_init. 81 */ 82 83 /* 84 * Are we critically low on blocks? For now we'll define that as the number 85 * of blocks we can get our hands on being less than 10% of what we reserved 86 * or less than some arbitrary number (maximum btree height). 87 */ 88 bool 89 xfs_ag_resv_critical( 90 struct xfs_perag *pag, 91 enum xfs_ag_resv_type type) 92 { 93 xfs_extlen_t avail; 94 xfs_extlen_t orig; 95 96 switch (type) { 97 case XFS_AG_RESV_METADATA: 98 avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; 99 orig = pag->pag_meta_resv.ar_asked; 100 break; 101 case XFS_AG_RESV_AGFL: 102 avail = pag->pagf_freeblks + pag->pagf_flcount - 103 pag->pag_meta_resv.ar_reserved; 104 orig = pag->pag_agfl_resv.ar_asked; 105 break; 106 default: 107 ASSERT(0); 108 return false; 109 } 110 111 trace_xfs_ag_resv_critical(pag, type, avail); 112 113 /* Critically low if less than 10% or max btree height remains. */ 114 return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, 115 pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL); 116 } 117 118 /* 119 * How many blocks are reserved but not used, and therefore must not be 120 * allocated away? 121 */ 122 xfs_extlen_t 123 xfs_ag_resv_needed( 124 struct xfs_perag *pag, 125 enum xfs_ag_resv_type type) 126 { 127 xfs_extlen_t len; 128 129 len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; 130 switch (type) { 131 case XFS_AG_RESV_METADATA: 132 case XFS_AG_RESV_AGFL: 133 len -= xfs_perag_resv(pag, type)->ar_reserved; 134 break; 135 case XFS_AG_RESV_NONE: 136 /* empty */ 137 break; 138 default: 139 ASSERT(0); 140 } 141 142 trace_xfs_ag_resv_needed(pag, type, len); 143 144 return len; 145 } 146 147 /* Clean out a reservation */ 148 static int 149 __xfs_ag_resv_free( 150 struct xfs_perag *pag, 151 enum xfs_ag_resv_type type) 152 { 153 struct xfs_ag_resv *resv; 154 xfs_extlen_t oldresv; 155 int error; 156 157 trace_xfs_ag_resv_free(pag, type, 0); 158 159 resv = xfs_perag_resv(pag, type); 160 if (pag->pag_agno == 0) 161 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 162 /* 163 * AGFL blocks are always considered "free", so whatever 164 * was reserved at mount time must be given back at umount. 165 */ 166 if (type == XFS_AG_RESV_AGFL) 167 oldresv = resv->ar_orig_reserved; 168 else 169 oldresv = resv->ar_reserved; 170 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); 171 resv->ar_reserved = 0; 172 resv->ar_asked = 0; 173 174 if (error) 175 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, 176 error, _RET_IP_); 177 return error; 178 } 179 180 /* Free a per-AG reservation. */ 181 int 182 xfs_ag_resv_free( 183 struct xfs_perag *pag) 184 { 185 int error; 186 int err2; 187 188 error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); 189 err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); 190 if (err2 && !error) 191 error = err2; 192 return error; 193 } 194 195 static int 196 __xfs_ag_resv_init( 197 struct xfs_perag *pag, 198 enum xfs_ag_resv_type type, 199 xfs_extlen_t ask, 200 xfs_extlen_t used) 201 { 202 struct xfs_mount *mp = pag->pag_mount; 203 struct xfs_ag_resv *resv; 204 int error; 205 xfs_extlen_t reserved; 206 207 if (used > ask) 208 ask = used; 209 reserved = ask - used; 210 211 error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); 212 if (error) { 213 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 214 error, _RET_IP_); 215 xfs_warn(mp, 216 "Per-AG reservation for AG %u failed. Filesystem may run out of space.", 217 pag->pag_agno); 218 return error; 219 } 220 221 /* 222 * Reduce the maximum per-AG allocation length by however much we're 223 * trying to reserve for an AG. Since this is a filesystem-wide 224 * counter, we only make the adjustment for AG 0. This assumes that 225 * there aren't any AGs hungrier for per-AG reservation than AG 0. 226 */ 227 if (pag->pag_agno == 0) 228 mp->m_ag_max_usable -= ask; 229 230 resv = xfs_perag_resv(pag, type); 231 resv->ar_asked = ask; 232 resv->ar_reserved = resv->ar_orig_reserved = reserved; 233 234 trace_xfs_ag_resv_init(pag, type, ask); 235 return 0; 236 } 237 238 /* Create a per-AG block reservation. */ 239 int 240 xfs_ag_resv_init( 241 struct xfs_perag *pag) 242 { 243 struct xfs_mount *mp = pag->pag_mount; 244 xfs_agnumber_t agno = pag->pag_agno; 245 xfs_extlen_t ask; 246 xfs_extlen_t used; 247 int error = 0; 248 249 /* Create the metadata reservation. */ 250 if (pag->pag_meta_resv.ar_asked == 0) { 251 ask = used = 0; 252 253 error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); 254 if (error) 255 goto out; 256 257 error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); 258 if (error) 259 goto out; 260 261 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 262 ask, used); 263 if (error) { 264 /* 265 * Because we didn't have per-AG reservations when the 266 * finobt feature was added we might not be able to 267 * reserve all needed blocks. Warn and fall back to the 268 * old and potentially buggy code in that case, but 269 * ensure we do have the reservation for the refcountbt. 270 */ 271 ask = used = 0; 272 273 mp->m_inotbt_nores = true; 274 275 error = xfs_refcountbt_calc_reserves(mp, agno, &ask, 276 &used); 277 if (error) 278 goto out; 279 280 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 281 ask, used); 282 if (error) 283 goto out; 284 } 285 } 286 287 /* Create the AGFL metadata reservation */ 288 if (pag->pag_agfl_resv.ar_asked == 0) { 289 ask = used = 0; 290 291 error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); 292 if (error) 293 goto out; 294 295 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); 296 if (error) 297 goto out; 298 } 299 300 #ifdef DEBUG 301 /* need to read in the AGF for the ASSERT below to work */ 302 error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); 303 if (error) 304 return error; 305 306 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 307 xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= 308 pag->pagf_freeblks + pag->pagf_flcount); 309 #endif 310 out: 311 return error; 312 } 313 314 /* Allocate a block from the reservation. */ 315 void 316 xfs_ag_resv_alloc_extent( 317 struct xfs_perag *pag, 318 enum xfs_ag_resv_type type, 319 struct xfs_alloc_arg *args) 320 { 321 struct xfs_ag_resv *resv; 322 xfs_extlen_t len; 323 uint field; 324 325 trace_xfs_ag_resv_alloc_extent(pag, type, args->len); 326 327 switch (type) { 328 case XFS_AG_RESV_METADATA: 329 case XFS_AG_RESV_AGFL: 330 resv = xfs_perag_resv(pag, type); 331 break; 332 default: 333 ASSERT(0); 334 /* fall through */ 335 case XFS_AG_RESV_NONE: 336 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 337 XFS_TRANS_SB_FDBLOCKS; 338 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len); 339 return; 340 } 341 342 len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); 343 resv->ar_reserved -= len; 344 if (type == XFS_AG_RESV_AGFL) 345 return; 346 /* Allocations of reserved blocks only need on-disk sb updates... */ 347 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); 348 /* ...but non-reserved blocks need in-core and on-disk updates. */ 349 if (args->len > len) 350 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS, 351 -((int64_t)args->len - len)); 352 } 353 354 /* Free a block to the reservation. */ 355 void 356 xfs_ag_resv_free_extent( 357 struct xfs_perag *pag, 358 enum xfs_ag_resv_type type, 359 struct xfs_trans *tp, 360 xfs_extlen_t len) 361 { 362 xfs_extlen_t leftover; 363 struct xfs_ag_resv *resv; 364 365 trace_xfs_ag_resv_free_extent(pag, type, len); 366 367 switch (type) { 368 case XFS_AG_RESV_METADATA: 369 case XFS_AG_RESV_AGFL: 370 resv = xfs_perag_resv(pag, type); 371 break; 372 default: 373 ASSERT(0); 374 /* fall through */ 375 case XFS_AG_RESV_NONE: 376 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); 377 return; 378 } 379 380 leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); 381 resv->ar_reserved += leftover; 382 if (type == XFS_AG_RESV_AGFL) 383 return; 384 /* Freeing into the reserved pool only requires on-disk update... */ 385 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); 386 /* ...but freeing beyond that requires in-core and on-disk update. */ 387 if (len > leftover) 388 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); 389 } 390