xref: /openbmc/linux/fs/btrfs/space-info.c (revision b338b013)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "ctree.h"
4 #include "space-info.h"
5 #include "sysfs.h"
6 #include "volumes.h"
7 
8 u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
9 			  bool may_use_included)
10 {
11 	ASSERT(s_info);
12 	return s_info->bytes_used + s_info->bytes_reserved +
13 		s_info->bytes_pinned + s_info->bytes_readonly +
14 		(may_use_included ? s_info->bytes_may_use : 0);
15 }
16 
17 /*
18  * after adding space to the filesystem, we need to clear the full flags
19  * on all the space infos.
20  */
21 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
22 {
23 	struct list_head *head = &info->space_info;
24 	struct btrfs_space_info *found;
25 
26 	rcu_read_lock();
27 	list_for_each_entry_rcu(found, head, list)
28 		found->full = 0;
29 	rcu_read_unlock();
30 }
31 
32 static const char *alloc_name(u64 flags)
33 {
34 	switch (flags) {
35 	case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA:
36 		return "mixed";
37 	case BTRFS_BLOCK_GROUP_METADATA:
38 		return "metadata";
39 	case BTRFS_BLOCK_GROUP_DATA:
40 		return "data";
41 	case BTRFS_BLOCK_GROUP_SYSTEM:
42 		return "system";
43 	default:
44 		WARN_ON(1);
45 		return "invalid-combination";
46 	};
47 }
48 
49 static int create_space_info(struct btrfs_fs_info *info, u64 flags)
50 {
51 
52 	struct btrfs_space_info *space_info;
53 	int i;
54 	int ret;
55 
56 	space_info = kzalloc(sizeof(*space_info), GFP_NOFS);
57 	if (!space_info)
58 		return -ENOMEM;
59 
60 	ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
61 				 GFP_KERNEL);
62 	if (ret) {
63 		kfree(space_info);
64 		return ret;
65 	}
66 
67 	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
68 		INIT_LIST_HEAD(&space_info->block_groups[i]);
69 	init_rwsem(&space_info->groups_sem);
70 	spin_lock_init(&space_info->lock);
71 	space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
72 	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
73 	init_waitqueue_head(&space_info->wait);
74 	INIT_LIST_HEAD(&space_info->ro_bgs);
75 	INIT_LIST_HEAD(&space_info->tickets);
76 	INIT_LIST_HEAD(&space_info->priority_tickets);
77 
78 	ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype,
79 				    info->space_info_kobj, "%s",
80 				    alloc_name(space_info->flags));
81 	if (ret) {
82 		kobject_put(&space_info->kobj);
83 		return ret;
84 	}
85 
86 	list_add_rcu(&space_info->list, &info->space_info);
87 	if (flags & BTRFS_BLOCK_GROUP_DATA)
88 		info->data_sinfo = space_info;
89 
90 	return ret;
91 }
92 
93 int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
94 {
95 	struct btrfs_super_block *disk_super;
96 	u64 features;
97 	u64 flags;
98 	int mixed = 0;
99 	int ret;
100 
101 	disk_super = fs_info->super_copy;
102 	if (!btrfs_super_root(disk_super))
103 		return -EINVAL;
104 
105 	features = btrfs_super_incompat_flags(disk_super);
106 	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
107 		mixed = 1;
108 
109 	flags = BTRFS_BLOCK_GROUP_SYSTEM;
110 	ret = create_space_info(fs_info, flags);
111 	if (ret)
112 		goto out;
113 
114 	if (mixed) {
115 		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
116 		ret = create_space_info(fs_info, flags);
117 	} else {
118 		flags = BTRFS_BLOCK_GROUP_METADATA;
119 		ret = create_space_info(fs_info, flags);
120 		if (ret)
121 			goto out;
122 
123 		flags = BTRFS_BLOCK_GROUP_DATA;
124 		ret = create_space_info(fs_info, flags);
125 	}
126 out:
127 	return ret;
128 }
129 
130 void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
131 			     u64 total_bytes, u64 bytes_used,
132 			     u64 bytes_readonly,
133 			     struct btrfs_space_info **space_info)
134 {
135 	struct btrfs_space_info *found;
136 	int factor;
137 
138 	factor = btrfs_bg_type_to_factor(flags);
139 
140 	found = btrfs_find_space_info(info, flags);
141 	ASSERT(found);
142 	spin_lock(&found->lock);
143 	found->total_bytes += total_bytes;
144 	found->disk_total += total_bytes * factor;
145 	found->bytes_used += bytes_used;
146 	found->disk_used += bytes_used * factor;
147 	found->bytes_readonly += bytes_readonly;
148 	if (total_bytes > 0)
149 		found->full = 0;
150 	btrfs_space_info_add_new_bytes(info, found,
151 				       total_bytes - bytes_used -
152 				       bytes_readonly);
153 	spin_unlock(&found->lock);
154 	*space_info = found;
155 }
156 
157 struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
158 					       u64 flags)
159 {
160 	struct list_head *head = &info->space_info;
161 	struct btrfs_space_info *found;
162 
163 	flags &= BTRFS_BLOCK_GROUP_TYPE_MASK;
164 
165 	rcu_read_lock();
166 	list_for_each_entry_rcu(found, head, list) {
167 		if (found->flags & flags) {
168 			rcu_read_unlock();
169 			return found;
170 		}
171 	}
172 	rcu_read_unlock();
173 	return NULL;
174 }
175 
176 static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
177 {
178 	return (global->size << 1);
179 }
180 
181 int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
182 			 struct btrfs_space_info *space_info, u64 bytes,
183 			 enum btrfs_reserve_flush_enum flush,
184 			 bool system_chunk)
185 {
186 	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
187 	u64 profile;
188 	u64 space_size;
189 	u64 avail;
190 	u64 used;
191 	int factor;
192 
193 	/* Don't overcommit when in mixed mode. */
194 	if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
195 		return 0;
196 
197 	if (system_chunk)
198 		profile = btrfs_system_alloc_profile(fs_info);
199 	else
200 		profile = btrfs_metadata_alloc_profile(fs_info);
201 
202 	used = btrfs_space_info_used(space_info, false);
203 
204 	/*
205 	 * We only want to allow over committing if we have lots of actual space
206 	 * free, but if we don't have enough space to handle the global reserve
207 	 * space then we could end up having a real enospc problem when trying
208 	 * to allocate a chunk or some other such important allocation.
209 	 */
210 	spin_lock(&global_rsv->lock);
211 	space_size = calc_global_rsv_need_space(global_rsv);
212 	spin_unlock(&global_rsv->lock);
213 	if (used + space_size >= space_info->total_bytes)
214 		return 0;
215 
216 	used += space_info->bytes_may_use;
217 
218 	avail = atomic64_read(&fs_info->free_chunk_space);
219 
220 	/*
221 	 * If we have dup, raid1 or raid10 then only half of the free
222 	 * space is actually usable.  For raid56, the space info used
223 	 * doesn't include the parity drive, so we don't have to
224 	 * change the math
225 	 */
226 	factor = btrfs_bg_type_to_factor(profile);
227 	avail = div_u64(avail, factor);
228 
229 	/*
230 	 * If we aren't flushing all things, let us overcommit up to
231 	 * 1/2th of the space. If we can flush, don't let us overcommit
232 	 * too much, let it overcommit up to 1/8 of the space.
233 	 */
234 	if (flush == BTRFS_RESERVE_FLUSH_ALL)
235 		avail >>= 3;
236 	else
237 		avail >>= 1;
238 
239 	if (used + bytes < space_info->total_bytes + avail)
240 		return 1;
241 	return 0;
242 }
243 
244 /*
245  * This is for space we already have accounted in space_info->bytes_may_use, so
246  * basically when we're returning space from block_rsv's.
247  */
248 void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
249 				    struct btrfs_space_info *space_info,
250 				    u64 num_bytes)
251 {
252 	struct reserve_ticket *ticket;
253 	struct list_head *head;
254 	u64 used;
255 	enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH;
256 	bool check_overcommit = false;
257 
258 	spin_lock(&space_info->lock);
259 	head = &space_info->priority_tickets;
260 
261 	/*
262 	 * If we are over our limit then we need to check and see if we can
263 	 * overcommit, and if we can't then we just need to free up our space
264 	 * and not satisfy any requests.
265 	 */
266 	used = btrfs_space_info_used(space_info, true);
267 	if (used - num_bytes >= space_info->total_bytes)
268 		check_overcommit = true;
269 again:
270 	while (!list_empty(head) && num_bytes) {
271 		ticket = list_first_entry(head, struct reserve_ticket,
272 					  list);
273 		/*
274 		 * We use 0 bytes because this space is already reserved, so
275 		 * adding the ticket space would be a double count.
276 		 */
277 		if (check_overcommit &&
278 		    !btrfs_can_overcommit(fs_info, space_info, 0, flush,
279 					  false))
280 			break;
281 		if (num_bytes >= ticket->bytes) {
282 			list_del_init(&ticket->list);
283 			num_bytes -= ticket->bytes;
284 			ticket->bytes = 0;
285 			space_info->tickets_id++;
286 			wake_up(&ticket->wait);
287 		} else {
288 			ticket->bytes -= num_bytes;
289 			num_bytes = 0;
290 		}
291 	}
292 
293 	if (num_bytes && head == &space_info->priority_tickets) {
294 		head = &space_info->tickets;
295 		flush = BTRFS_RESERVE_FLUSH_ALL;
296 		goto again;
297 	}
298 	btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes);
299 	trace_btrfs_space_reservation(fs_info, "space_info",
300 				      space_info->flags, num_bytes, 0);
301 	spin_unlock(&space_info->lock);
302 }
303 
304 /*
305  * This is for newly allocated space that isn't accounted in
306  * space_info->bytes_may_use yet.  So if we allocate a chunk or unpin an extent
307  * we use this helper.
308  */
309 void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
310 				    struct btrfs_space_info *space_info,
311 				    u64 num_bytes)
312 {
313 	struct reserve_ticket *ticket;
314 	struct list_head *head = &space_info->priority_tickets;
315 
316 again:
317 	while (!list_empty(head) && num_bytes) {
318 		ticket = list_first_entry(head, struct reserve_ticket,
319 					  list);
320 		if (num_bytes >= ticket->bytes) {
321 			trace_btrfs_space_reservation(fs_info, "space_info",
322 						      space_info->flags,
323 						      ticket->bytes, 1);
324 			list_del_init(&ticket->list);
325 			num_bytes -= ticket->bytes;
326 			btrfs_space_info_update_bytes_may_use(fs_info,
327 							      space_info,
328 							      ticket->bytes);
329 			ticket->bytes = 0;
330 			space_info->tickets_id++;
331 			wake_up(&ticket->wait);
332 		} else {
333 			trace_btrfs_space_reservation(fs_info, "space_info",
334 						      space_info->flags,
335 						      num_bytes, 1);
336 			btrfs_space_info_update_bytes_may_use(fs_info,
337 							      space_info,
338 							      num_bytes);
339 			ticket->bytes -= num_bytes;
340 			num_bytes = 0;
341 		}
342 	}
343 
344 	if (num_bytes && head == &space_info->priority_tickets) {
345 		head = &space_info->tickets;
346 		goto again;
347 	}
348 }
349