xref: /openbmc/linux/fs/btrfs/qgroup.h (revision d0e22329)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2014 Facebook.  All rights reserved.
4  */
5 
6 #ifndef BTRFS_QGROUP_H
7 #define BTRFS_QGROUP_H
8 
9 #include "ulist.h"
10 #include "delayed-ref.h"
11 
12 /*
13  * Btrfs qgroup overview
14  *
15  * Btrfs qgroup splits into 3 main part:
16  * 1) Reserve
17  *    Reserve metadata/data space for incoming operations
18  *    Affect how qgroup limit works
19  *
20  * 2) Trace
21  *    Tell btrfs qgroup to trace dirty extents.
22  *
23  *    Dirty extents including:
24  *    - Newly allocated extents
25  *    - Extents going to be deleted (in this trans)
26  *    - Extents whose owner is going to be modified
27  *
28  *    This is the main part affects whether qgroup numbers will stay
29  *    consistent.
30  *    Btrfs qgroup can trace clean extents and won't cause any problem,
31  *    but it will consume extra CPU time, it should be avoided if possible.
32  *
33  * 3) Account
34  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
35  *    in previous step.
36  *
37  *    Normally at qgroup rescan and transaction commit time.
38  */
39 
40 /*
41  * Record a dirty extent, and info qgroup to update quota on it
42  * TODO: Use kmem cache to alloc it.
43  */
44 struct btrfs_qgroup_extent_record {
45 	struct rb_node node;
46 	u64 bytenr;
47 	u64 num_bytes;
48 	struct ulist *old_roots;
49 };
50 
51 /*
52  * Qgroup reservation types:
53  *
54  * DATA:
55  *	space reserved for data
56  *
57  * META_PERTRANS:
58  * 	Space reserved for metadata (per-transaction)
59  * 	Due to the fact that qgroup data is only updated at transaction commit
60  * 	time, reserved space for metadata must be kept until transaction
61  * 	commits.
62  * 	Any metadata reserved that are used in btrfs_start_transaction() should
63  * 	be of this type.
64  *
65  * META_PREALLOC:
66  *	There are cases where metadata space is reserved before starting
67  *	transaction, and then btrfs_join_transaction() to get a trans handle.
68  *	Any metadata reserved for such usage should be of this type.
69  *	And after join_transaction() part (or all) of such reservation should
70  *	be converted into META_PERTRANS.
71  */
72 enum btrfs_qgroup_rsv_type {
73 	BTRFS_QGROUP_RSV_DATA,
74 	BTRFS_QGROUP_RSV_META_PERTRANS,
75 	BTRFS_QGROUP_RSV_META_PREALLOC,
76 	BTRFS_QGROUP_RSV_LAST,
77 };
78 
79 /*
80  * Represents how many bytes we have reserved for this qgroup.
81  *
82  * Each type should have different reservation behavior.
83  * E.g, data follows its io_tree flag modification, while
84  * *currently* meta is just reserve-and-clear during transaction.
85  *
86  * TODO: Add new type for reservation which can survive transaction commit.
87  * Current metadata reservation behavior is not suitable for such case.
88  */
89 struct btrfs_qgroup_rsv {
90 	u64 values[BTRFS_QGROUP_RSV_LAST];
91 };
92 
93 /*
94  * one struct for each qgroup, organized in fs_info->qgroup_tree.
95  */
96 struct btrfs_qgroup {
97 	u64 qgroupid;
98 
99 	/*
100 	 * state
101 	 */
102 	u64 rfer;	/* referenced */
103 	u64 rfer_cmpr;	/* referenced compressed */
104 	u64 excl;	/* exclusive */
105 	u64 excl_cmpr;	/* exclusive compressed */
106 
107 	/*
108 	 * limits
109 	 */
110 	u64 lim_flags;	/* which limits are set */
111 	u64 max_rfer;
112 	u64 max_excl;
113 	u64 rsv_rfer;
114 	u64 rsv_excl;
115 
116 	/*
117 	 * reservation tracking
118 	 */
119 	struct btrfs_qgroup_rsv rsv;
120 
121 	/*
122 	 * lists
123 	 */
124 	struct list_head groups;  /* groups this group is member of */
125 	struct list_head members; /* groups that are members of this group */
126 	struct list_head dirty;   /* dirty groups */
127 	struct rb_node node;	  /* tree of qgroups */
128 
129 	/*
130 	 * temp variables for accounting operations
131 	 * Refer to qgroup_shared_accounting() for details.
132 	 */
133 	u64 old_refcnt;
134 	u64 new_refcnt;
135 };
136 
137 /*
138  * For qgroup event trace points only
139  */
140 #define QGROUP_RESERVE		(1<<0)
141 #define QGROUP_RELEASE		(1<<1)
142 #define QGROUP_FREE		(1<<2)
143 
144 int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
145 int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
146 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
147 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
148 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
149 				     bool interruptible);
150 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
151 			      u64 dst);
152 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
153 			      u64 dst);
154 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
155 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
156 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
157 		       struct btrfs_qgroup_limit *limit);
158 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
159 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
160 struct btrfs_delayed_extent_op;
161 
162 /*
163  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
164  * So qgroup can account it at transaction committing time.
165  *
166  * No lock version, caller must acquire delayed ref lock and allocated memory,
167  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
168  *
169  * Return 0 for success insert
170  * Return >0 for existing record, caller can free @record safely.
171  * Error is not possible
172  */
173 int btrfs_qgroup_trace_extent_nolock(
174 		struct btrfs_fs_info *fs_info,
175 		struct btrfs_delayed_ref_root *delayed_refs,
176 		struct btrfs_qgroup_extent_record *record);
177 
178 /*
179  * Post handler after qgroup_trace_extent_nolock().
180  *
181  * NOTE: Current qgroup does the expensive backref walk at transaction
182  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
183  * new transaction.
184  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
185  * result.
186  *
187  * However for old_roots there is no need to do backref walk at that time,
188  * since we search commit roots to walk backref and result will always be
189  * correct.
190  *
191  * Due to the nature of no lock version, we can't do backref there.
192  * So we must call btrfs_qgroup_trace_extent_post() after exiting
193  * spinlock context.
194  *
195  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
196  * using current root, then we can move all expensive backref walk out of
197  * transaction committing, but not now as qgroup accounting will be wrong again.
198  */
199 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
200 				   struct btrfs_qgroup_extent_record *qrecord);
201 
202 /*
203  * Inform qgroup to trace one dirty extent, specified by @bytenr and
204  * @num_bytes.
205  * So qgroup can account it at commit trans time.
206  *
207  * Better encapsulated version, with memory allocation and backref walk for
208  * commit roots.
209  * So this can sleep.
210  *
211  * Return 0 if the operation is done.
212  * Return <0 for error, like memory allocation failure or invalid parameter
213  * (NULL trans)
214  */
215 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
216 			      u64 num_bytes, gfp_t gfp_flag);
217 
218 /*
219  * Inform qgroup to trace all leaf items of data
220  *
221  * Return 0 for success
222  * Return <0 for error(ENOMEM)
223  */
224 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
225 				  struct extent_buffer *eb);
226 /*
227  * Inform qgroup to trace a whole subtree, including all its child tree
228  * blocks and data.
229  * The root tree block is specified by @root_eb.
230  *
231  * Normally used by relocation(tree block swap) and subvolume deletion.
232  *
233  * Return 0 for success
234  * Return <0 for error(ENOMEM or tree search error)
235  */
236 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
237 			       struct extent_buffer *root_eb,
238 			       u64 root_gen, int root_level);
239 
240 int btrfs_qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
241 				struct btrfs_block_group_cache *bg_cache,
242 				struct extent_buffer *src_parent, int src_slot,
243 				struct extent_buffer *dst_parent, int dst_slot,
244 				u64 last_snapshot);
245 int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
246 				u64 num_bytes, struct ulist *old_roots,
247 				struct ulist *new_roots);
248 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
249 int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
250 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
251 			 u64 objectid, struct btrfs_qgroup_inherit *inherit);
252 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
253 			       u64 ref_root, u64 num_bytes,
254 			       enum btrfs_qgroup_rsv_type type);
255 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
256 						 u64 ref_root, u64 num_bytes)
257 {
258 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
259 		return;
260 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
261 	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
262 				  BTRFS_QGROUP_RSV_DATA);
263 }
264 
265 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
266 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
267 			       u64 rfer, u64 excl);
268 #endif
269 
270 /* New io_tree based accurate qgroup reserve API */
271 int btrfs_qgroup_reserve_data(struct inode *inode,
272 			struct extent_changeset **reserved, u64 start, u64 len);
273 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
274 int btrfs_qgroup_free_data(struct inode *inode,
275 			struct extent_changeset *reserved, u64 start, u64 len);
276 
277 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
278 				enum btrfs_qgroup_rsv_type type, bool enforce);
279 /* Reserve metadata space for pertrans and prealloc type */
280 static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
281 				int num_bytes, bool enforce)
282 {
283 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
284 			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
285 }
286 static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
287 				int num_bytes, bool enforce)
288 {
289 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
290 			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
291 }
292 
293 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
294 			     enum btrfs_qgroup_rsv_type type);
295 
296 /* Free per-transaction meta reservation for error handling */
297 static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
298 						   int num_bytes)
299 {
300 	__btrfs_qgroup_free_meta(root, num_bytes,
301 			BTRFS_QGROUP_RSV_META_PERTRANS);
302 }
303 
304 /* Pre-allocated meta reservation can be freed at need */
305 static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
306 						   int num_bytes)
307 {
308 	__btrfs_qgroup_free_meta(root, num_bytes,
309 			BTRFS_QGROUP_RSV_META_PREALLOC);
310 }
311 
312 /*
313  * Per-transaction meta reservation should be all freed at transaction commit
314  * time
315  */
316 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
317 
318 /*
319  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
320  *
321  * This is called when preallocated meta reservation needs to be used.
322  * Normally after btrfs_join_transaction() call.
323  */
324 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
325 
326 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
327 
328 #endif
329