1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_alloc.h" 30 #include "xfs_error.h" 31 #include "xfs_trace.h" 32 #include "xfs_cksum.h" 33 #include "xfs_trans.h" 34 #include "xfs_bit.h" 35 #include "xfs_bmap.h" 36 #include "xfs_bmap_btree.h" 37 #include "xfs_ag_resv.h" 38 #include "xfs_trans_space.h" 39 #include "xfs_rmap_btree.h" 40 #include "xfs_btree.h" 41 #include "xfs_refcount_btree.h" 42 43 /* 44 * Per-AG Block Reservations 45 * 46 * For some kinds of allocation group metadata structures, it is advantageous 47 * to reserve a small number of blocks in each AG so that future expansions of 48 * that data structure do not encounter ENOSPC because errors during a btree 49 * split cause the filesystem to go offline. 50 * 51 * Prior to the introduction of reflink, this wasn't an issue because the free 52 * space btrees maintain a reserve of space (the AGFL) to handle any expansion 53 * that may be necessary; and allocations of other metadata (inodes, BMBT, 54 * dir/attr) aren't restricted to a single AG. However, with reflink it is 55 * possible to allocate all the space in an AG, have subsequent reflink/CoW 56 * activity expand the refcount btree, and discover that there's no space left 57 * to handle that expansion. Since we can calculate the maximum size of the 58 * refcount btree, we can reserve space for it and avoid ENOSPC. 59 * 60 * Handling per-AG reservations consists of three changes to the allocator's 61 * behavior: First, because these reservations are always needed, we decrease 62 * the ag_max_usable counter to reflect the size of the AG after the reserved 63 * blocks are taken. Second, the reservations must be reflected in the 64 * fdblocks count to maintain proper accounting. Third, each AG must maintain 65 * its own reserved block counter so that we can calculate the amount of space 66 * that must remain free to maintain the reservations. Fourth, the "remaining 67 * reserved blocks" count must be used when calculating the length of the 68 * longest free extent in an AG and to clamp maxlen in the per-AG allocation 69 * functions. In other words, we maintain a virtual allocation via in-core 70 * accounting tricks so that we don't have to clean up after a crash. :) 71 * 72 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type 73 * values via struct xfs_alloc_arg or directly to the xfs_free_extent 74 * function. It might seem a little funny to maintain a reservoir of blocks 75 * to feed another reservoir, but the AGFL only holds enough blocks to get 76 * through the next transaction. The per-AG reservation is to ensure (we 77 * hope) that each AG never runs out of blocks. Each data structure wanting 78 * to use the reservation system should update ask/used in xfs_ag_resv_init. 79 */ 80 81 /* 82 * Are we critically low on blocks? For now we'll define that as the number 83 * of blocks we can get our hands on being less than 10% of what we reserved 84 * or less than some arbitrary number (maximum btree height). 85 */ 86 bool 87 xfs_ag_resv_critical( 88 struct xfs_perag *pag, 89 enum xfs_ag_resv_type type) 90 { 91 xfs_extlen_t avail; 92 xfs_extlen_t orig; 93 94 switch (type) { 95 case XFS_AG_RESV_METADATA: 96 avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; 97 orig = pag->pag_meta_resv.ar_asked; 98 break; 99 case XFS_AG_RESV_AGFL: 100 avail = pag->pagf_freeblks + pag->pagf_flcount - 101 pag->pag_meta_resv.ar_reserved; 102 orig = pag->pag_agfl_resv.ar_asked; 103 break; 104 default: 105 ASSERT(0); 106 return false; 107 } 108 109 trace_xfs_ag_resv_critical(pag, type, avail); 110 111 /* Critically low if less than 10% or max btree height remains. */ 112 return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS, 113 pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL, 114 XFS_RANDOM_AG_RESV_CRITICAL); 115 } 116 117 /* 118 * How many blocks are reserved but not used, and therefore must not be 119 * allocated away? 120 */ 121 xfs_extlen_t 122 xfs_ag_resv_needed( 123 struct xfs_perag *pag, 124 enum xfs_ag_resv_type type) 125 { 126 xfs_extlen_t len; 127 128 len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; 129 switch (type) { 130 case XFS_AG_RESV_METADATA: 131 case XFS_AG_RESV_AGFL: 132 len -= xfs_perag_resv(pag, type)->ar_reserved; 133 break; 134 case XFS_AG_RESV_NONE: 135 /* empty */ 136 break; 137 default: 138 ASSERT(0); 139 } 140 141 trace_xfs_ag_resv_needed(pag, type, len); 142 143 return len; 144 } 145 146 /* Clean out a reservation */ 147 static int 148 __xfs_ag_resv_free( 149 struct xfs_perag *pag, 150 enum xfs_ag_resv_type type) 151 { 152 struct xfs_ag_resv *resv; 153 xfs_extlen_t oldresv; 154 int error; 155 156 trace_xfs_ag_resv_free(pag, type, 0); 157 158 resv = xfs_perag_resv(pag, type); 159 pag->pag_mount->m_ag_max_usable += resv->ar_asked; 160 /* 161 * AGFL blocks are always considered "free", so whatever 162 * was reserved at mount time must be given back at umount. 163 */ 164 if (type == XFS_AG_RESV_AGFL) 165 oldresv = resv->ar_orig_reserved; 166 else 167 oldresv = resv->ar_reserved; 168 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); 169 resv->ar_reserved = 0; 170 resv->ar_asked = 0; 171 172 if (error) 173 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, 174 error, _RET_IP_); 175 return error; 176 } 177 178 /* Free a per-AG reservation. */ 179 int 180 xfs_ag_resv_free( 181 struct xfs_perag *pag) 182 { 183 int error; 184 int err2; 185 186 error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); 187 err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); 188 if (err2 && !error) 189 error = err2; 190 return error; 191 } 192 193 static int 194 __xfs_ag_resv_init( 195 struct xfs_perag *pag, 196 enum xfs_ag_resv_type type, 197 xfs_extlen_t ask, 198 xfs_extlen_t used) 199 { 200 struct xfs_mount *mp = pag->pag_mount; 201 struct xfs_ag_resv *resv; 202 int error; 203 xfs_extlen_t reserved; 204 205 if (used > ask) 206 ask = used; 207 reserved = ask - used; 208 209 error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); 210 if (error) { 211 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 212 error, _RET_IP_); 213 return error; 214 } 215 216 mp->m_ag_max_usable -= ask; 217 218 resv = xfs_perag_resv(pag, type); 219 resv->ar_asked = ask; 220 resv->ar_reserved = resv->ar_orig_reserved = reserved; 221 222 trace_xfs_ag_resv_init(pag, type, ask); 223 return 0; 224 } 225 226 /* Create a per-AG block reservation. */ 227 int 228 xfs_ag_resv_init( 229 struct xfs_perag *pag) 230 { 231 xfs_extlen_t ask; 232 xfs_extlen_t used; 233 int error = 0; 234 235 /* Create the metadata reservation. */ 236 if (pag->pag_meta_resv.ar_asked == 0) { 237 ask = used = 0; 238 239 error = xfs_refcountbt_calc_reserves(pag->pag_mount, 240 pag->pag_agno, &ask, &used); 241 if (error) 242 goto out; 243 244 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 245 ask, used); 246 if (error) 247 goto out; 248 } 249 250 /* Create the AGFL metadata reservation */ 251 if (pag->pag_agfl_resv.ar_asked == 0) { 252 ask = used = 0; 253 254 error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, 255 &ask, &used); 256 if (error) 257 goto out; 258 259 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); 260 if (error) 261 goto out; 262 } 263 264 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 265 xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= 266 pag->pagf_freeblks + pag->pagf_flcount); 267 out: 268 return error; 269 } 270 271 /* Allocate a block from the reservation. */ 272 void 273 xfs_ag_resv_alloc_extent( 274 struct xfs_perag *pag, 275 enum xfs_ag_resv_type type, 276 struct xfs_alloc_arg *args) 277 { 278 struct xfs_ag_resv *resv; 279 xfs_extlen_t len; 280 uint field; 281 282 trace_xfs_ag_resv_alloc_extent(pag, type, args->len); 283 284 switch (type) { 285 case XFS_AG_RESV_METADATA: 286 case XFS_AG_RESV_AGFL: 287 resv = xfs_perag_resv(pag, type); 288 break; 289 default: 290 ASSERT(0); 291 /* fall through */ 292 case XFS_AG_RESV_NONE: 293 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : 294 XFS_TRANS_SB_FDBLOCKS; 295 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len); 296 return; 297 } 298 299 len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); 300 resv->ar_reserved -= len; 301 if (type == XFS_AG_RESV_AGFL) 302 return; 303 /* Allocations of reserved blocks only need on-disk sb updates... */ 304 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); 305 /* ...but non-reserved blocks need in-core and on-disk updates. */ 306 if (args->len > len) 307 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS, 308 -((int64_t)args->len - len)); 309 } 310 311 /* Free a block to the reservation. */ 312 void 313 xfs_ag_resv_free_extent( 314 struct xfs_perag *pag, 315 enum xfs_ag_resv_type type, 316 struct xfs_trans *tp, 317 xfs_extlen_t len) 318 { 319 xfs_extlen_t leftover; 320 struct xfs_ag_resv *resv; 321 322 trace_xfs_ag_resv_free_extent(pag, type, len); 323 324 switch (type) { 325 case XFS_AG_RESV_METADATA: 326 case XFS_AG_RESV_AGFL: 327 resv = xfs_perag_resv(pag, type); 328 break; 329 default: 330 ASSERT(0); 331 /* fall through */ 332 case XFS_AG_RESV_NONE: 333 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); 334 return; 335 } 336 337 leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); 338 resv->ar_reserved += leftover; 339 if (type == XFS_AG_RESV_AGFL) 340 return; 341 /* Freeing into the reserved pool only requires on-disk update... */ 342 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); 343 /* ...but freeing beyond that requires in-core and on-disk update. */ 344 if (len > leftover) 345 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); 346 } 347