1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "ctree.h" 4 #include "space-info.h" 5 #include "sysfs.h" 6 #include "volumes.h" 7 #include "free-space-cache.h" 8 9 u64 btrfs_space_info_used(struct btrfs_space_info *s_info, 10 bool may_use_included) 11 { 12 ASSERT(s_info); 13 return s_info->bytes_used + s_info->bytes_reserved + 14 s_info->bytes_pinned + s_info->bytes_readonly + 15 (may_use_included ? s_info->bytes_may_use : 0); 16 } 17 18 /* 19 * after adding space to the filesystem, we need to clear the full flags 20 * on all the space infos. 21 */ 22 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 23 { 24 struct list_head *head = &info->space_info; 25 struct btrfs_space_info *found; 26 27 rcu_read_lock(); 28 list_for_each_entry_rcu(found, head, list) 29 found->full = 0; 30 rcu_read_unlock(); 31 } 32 33 static const char *alloc_name(u64 flags) 34 { 35 switch (flags) { 36 case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: 37 return "mixed"; 38 case BTRFS_BLOCK_GROUP_METADATA: 39 return "metadata"; 40 case BTRFS_BLOCK_GROUP_DATA: 41 return "data"; 42 case BTRFS_BLOCK_GROUP_SYSTEM: 43 return "system"; 44 default: 45 WARN_ON(1); 46 return "invalid-combination"; 47 }; 48 } 49 50 static int create_space_info(struct btrfs_fs_info *info, u64 flags) 51 { 52 53 struct btrfs_space_info *space_info; 54 int i; 55 int ret; 56 57 space_info = kzalloc(sizeof(*space_info), GFP_NOFS); 58 if (!space_info) 59 return -ENOMEM; 60 61 ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, 62 GFP_KERNEL); 63 if (ret) { 64 kfree(space_info); 65 return ret; 66 } 67 68 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 69 INIT_LIST_HEAD(&space_info->block_groups[i]); 70 init_rwsem(&space_info->groups_sem); 71 spin_lock_init(&space_info->lock); 72 space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 73 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 74 init_waitqueue_head(&space_info->wait); 75 INIT_LIST_HEAD(&space_info->ro_bgs); 76 INIT_LIST_HEAD(&space_info->tickets); 77 INIT_LIST_HEAD(&space_info->priority_tickets); 78 79 ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, 80 info->space_info_kobj, "%s", 81 alloc_name(space_info->flags)); 82 if (ret) { 83 kobject_put(&space_info->kobj); 84 return ret; 85 } 86 87 list_add_rcu(&space_info->list, &info->space_info); 88 if (flags & BTRFS_BLOCK_GROUP_DATA) 89 info->data_sinfo = space_info; 90 91 return ret; 92 } 93 94 int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 95 { 96 struct btrfs_super_block *disk_super; 97 u64 features; 98 u64 flags; 99 int mixed = 0; 100 int ret; 101 102 disk_super = fs_info->super_copy; 103 if (!btrfs_super_root(disk_super)) 104 return -EINVAL; 105 106 features = btrfs_super_incompat_flags(disk_super); 107 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 108 mixed = 1; 109 110 flags = BTRFS_BLOCK_GROUP_SYSTEM; 111 ret = create_space_info(fs_info, flags); 112 if (ret) 113 goto out; 114 115 if (mixed) { 116 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 117 ret = create_space_info(fs_info, flags); 118 } else { 119 flags = BTRFS_BLOCK_GROUP_METADATA; 120 ret = create_space_info(fs_info, flags); 121 if (ret) 122 goto out; 123 124 flags = BTRFS_BLOCK_GROUP_DATA; 125 ret = create_space_info(fs_info, flags); 126 } 127 out: 128 return ret; 129 } 130 131 void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, 132 u64 total_bytes, u64 bytes_used, 133 u64 bytes_readonly, 134 struct btrfs_space_info **space_info) 135 { 136 struct btrfs_space_info *found; 137 int factor; 138 139 factor = btrfs_bg_type_to_factor(flags); 140 141 found = btrfs_find_space_info(info, flags); 142 ASSERT(found); 143 spin_lock(&found->lock); 144 found->total_bytes += total_bytes; 145 found->disk_total += total_bytes * factor; 146 found->bytes_used += bytes_used; 147 found->disk_used += bytes_used * factor; 148 found->bytes_readonly += bytes_readonly; 149 if (total_bytes > 0) 150 found->full = 0; 151 btrfs_space_info_add_new_bytes(info, found, 152 total_bytes - bytes_used - 153 bytes_readonly); 154 spin_unlock(&found->lock); 155 *space_info = found; 156 } 157 158 struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, 159 u64 flags) 160 { 161 struct list_head *head = &info->space_info; 162 struct btrfs_space_info *found; 163 164 flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; 165 166 rcu_read_lock(); 167 list_for_each_entry_rcu(found, head, list) { 168 if (found->flags & flags) { 169 rcu_read_unlock(); 170 return found; 171 } 172 } 173 rcu_read_unlock(); 174 return NULL; 175 } 176 177 static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) 178 { 179 return (global->size << 1); 180 } 181 182 int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, 183 struct btrfs_space_info *space_info, u64 bytes, 184 enum btrfs_reserve_flush_enum flush, 185 bool system_chunk) 186 { 187 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 188 u64 profile; 189 u64 space_size; 190 u64 avail; 191 u64 used; 192 int factor; 193 194 /* Don't overcommit when in mixed mode. */ 195 if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) 196 return 0; 197 198 if (system_chunk) 199 profile = btrfs_system_alloc_profile(fs_info); 200 else 201 profile = btrfs_metadata_alloc_profile(fs_info); 202 203 used = btrfs_space_info_used(space_info, false); 204 205 /* 206 * We only want to allow over committing if we have lots of actual space 207 * free, but if we don't have enough space to handle the global reserve 208 * space then we could end up having a real enospc problem when trying 209 * to allocate a chunk or some other such important allocation. 210 */ 211 spin_lock(&global_rsv->lock); 212 space_size = calc_global_rsv_need_space(global_rsv); 213 spin_unlock(&global_rsv->lock); 214 if (used + space_size >= space_info->total_bytes) 215 return 0; 216 217 used += space_info->bytes_may_use; 218 219 avail = atomic64_read(&fs_info->free_chunk_space); 220 221 /* 222 * If we have dup, raid1 or raid10 then only half of the free 223 * space is actually usable. For raid56, the space info used 224 * doesn't include the parity drive, so we don't have to 225 * change the math 226 */ 227 factor = btrfs_bg_type_to_factor(profile); 228 avail = div_u64(avail, factor); 229 230 /* 231 * If we aren't flushing all things, let us overcommit up to 232 * 1/2th of the space. If we can flush, don't let us overcommit 233 * too much, let it overcommit up to 1/8 of the space. 234 */ 235 if (flush == BTRFS_RESERVE_FLUSH_ALL) 236 avail >>= 3; 237 else 238 avail >>= 1; 239 240 if (used + bytes < space_info->total_bytes + avail) 241 return 1; 242 return 0; 243 } 244 245 /* 246 * This is for space we already have accounted in space_info->bytes_may_use, so 247 * basically when we're returning space from block_rsv's. 248 */ 249 void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info, 250 struct btrfs_space_info *space_info, 251 u64 num_bytes) 252 { 253 struct reserve_ticket *ticket; 254 struct list_head *head; 255 u64 used; 256 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; 257 bool check_overcommit = false; 258 259 spin_lock(&space_info->lock); 260 head = &space_info->priority_tickets; 261 262 /* 263 * If we are over our limit then we need to check and see if we can 264 * overcommit, and if we can't then we just need to free up our space 265 * and not satisfy any requests. 266 */ 267 used = btrfs_space_info_used(space_info, true); 268 if (used - num_bytes >= space_info->total_bytes) 269 check_overcommit = true; 270 again: 271 while (!list_empty(head) && num_bytes) { 272 ticket = list_first_entry(head, struct reserve_ticket, 273 list); 274 /* 275 * We use 0 bytes because this space is already reserved, so 276 * adding the ticket space would be a double count. 277 */ 278 if (check_overcommit && 279 !btrfs_can_overcommit(fs_info, space_info, 0, flush, 280 false)) 281 break; 282 if (num_bytes >= ticket->bytes) { 283 list_del_init(&ticket->list); 284 num_bytes -= ticket->bytes; 285 ticket->bytes = 0; 286 space_info->tickets_id++; 287 wake_up(&ticket->wait); 288 } else { 289 ticket->bytes -= num_bytes; 290 num_bytes = 0; 291 } 292 } 293 294 if (num_bytes && head == &space_info->priority_tickets) { 295 head = &space_info->tickets; 296 flush = BTRFS_RESERVE_FLUSH_ALL; 297 goto again; 298 } 299 btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes); 300 trace_btrfs_space_reservation(fs_info, "space_info", 301 space_info->flags, num_bytes, 0); 302 spin_unlock(&space_info->lock); 303 } 304 305 /* 306 * This is for newly allocated space that isn't accounted in 307 * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent 308 * we use this helper. 309 */ 310 void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info, 311 struct btrfs_space_info *space_info, 312 u64 num_bytes) 313 { 314 struct reserve_ticket *ticket; 315 struct list_head *head = &space_info->priority_tickets; 316 317 again: 318 while (!list_empty(head) && num_bytes) { 319 ticket = list_first_entry(head, struct reserve_ticket, 320 list); 321 if (num_bytes >= ticket->bytes) { 322 trace_btrfs_space_reservation(fs_info, "space_info", 323 space_info->flags, 324 ticket->bytes, 1); 325 list_del_init(&ticket->list); 326 num_bytes -= ticket->bytes; 327 btrfs_space_info_update_bytes_may_use(fs_info, 328 space_info, 329 ticket->bytes); 330 ticket->bytes = 0; 331 space_info->tickets_id++; 332 wake_up(&ticket->wait); 333 } else { 334 trace_btrfs_space_reservation(fs_info, "space_info", 335 space_info->flags, 336 num_bytes, 1); 337 btrfs_space_info_update_bytes_may_use(fs_info, 338 space_info, 339 num_bytes); 340 ticket->bytes -= num_bytes; 341 num_bytes = 0; 342 } 343 } 344 345 if (num_bytes && head == &space_info->priority_tickets) { 346 head = &space_info->tickets; 347 goto again; 348 } 349 } 350 351 #define DUMP_BLOCK_RSV(fs_info, rsv_name) \ 352 do { \ 353 struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ 354 spin_lock(&__rsv->lock); \ 355 btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ 356 __rsv->size, __rsv->reserved); \ 357 spin_unlock(&__rsv->lock); \ 358 } while (0) 359 360 void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, 361 struct btrfs_space_info *info, u64 bytes, 362 int dump_block_groups) 363 { 364 struct btrfs_block_group_cache *cache; 365 int index = 0; 366 367 spin_lock(&info->lock); 368 btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", 369 info->flags, 370 info->total_bytes - btrfs_space_info_used(info, true), 371 info->full ? "" : "not "); 372 btrfs_info(fs_info, 373 "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", 374 info->total_bytes, info->bytes_used, info->bytes_pinned, 375 info->bytes_reserved, info->bytes_may_use, 376 info->bytes_readonly); 377 spin_unlock(&info->lock); 378 379 DUMP_BLOCK_RSV(fs_info, global_block_rsv); 380 DUMP_BLOCK_RSV(fs_info, trans_block_rsv); 381 DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); 382 DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); 383 DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); 384 385 if (!dump_block_groups) 386 return; 387 388 down_read(&info->groups_sem); 389 again: 390 list_for_each_entry(cache, &info->block_groups[index], list) { 391 spin_lock(&cache->lock); 392 btrfs_info(fs_info, 393 "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", 394 cache->key.objectid, cache->key.offset, 395 btrfs_block_group_used(&cache->item), cache->pinned, 396 cache->reserved, cache->ro ? "[readonly]" : ""); 397 btrfs_dump_free_space(cache, bytes); 398 spin_unlock(&cache->lock); 399 } 400 if (++index < BTRFS_NR_RAID_TYPES) 401 goto again; 402 up_read(&info->groups_sem); 403 } 404