xref: /openbmc/linux/fs/btrfs/qgroup.h (revision 4cff79e9)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2014 Facebook.  All rights reserved.
4  */
5 
6 #ifndef BTRFS_QGROUP_H
7 #define BTRFS_QGROUP_H
8 
9 #include "ulist.h"
10 #include "delayed-ref.h"
11 
12 /*
13  * Btrfs qgroup overview
14  *
15  * Btrfs qgroup splits into 3 main part:
16  * 1) Reserve
17  *    Reserve metadata/data space for incoming operations
18  *    Affect how qgroup limit works
19  *
20  * 2) Trace
21  *    Tell btrfs qgroup to trace dirty extents.
22  *
23  *    Dirty extents including:
24  *    - Newly allocated extents
25  *    - Extents going to be deleted (in this trans)
26  *    - Extents whose owner is going to be modified
27  *
28  *    This is the main part affects whether qgroup numbers will stay
29  *    consistent.
30  *    Btrfs qgroup can trace clean extents and won't cause any problem,
31  *    but it will consume extra CPU time, it should be avoided if possible.
32  *
33  * 3) Account
34  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
35  *    in previous step.
36  *
37  *    Normally at qgroup rescan and transaction commit time.
38  */
39 
40 /*
41  * Record a dirty extent, and info qgroup to update quota on it
42  * TODO: Use kmem cache to alloc it.
43  */
44 struct btrfs_qgroup_extent_record {
45 	struct rb_node node;
46 	u64 bytenr;
47 	u64 num_bytes;
48 	struct ulist *old_roots;
49 };
50 
51 /*
52  * Qgroup reservation types:
53  *
54  * DATA:
55  *	space reserved for data
56  *
57  * META_PERTRANS:
58  * 	Space reserved for metadata (per-transaction)
59  * 	Due to the fact that qgroup data is only updated at transaction commit
60  * 	time, reserved space for metadata must be kept until transaction
61  * 	commits.
62  * 	Any metadata reserved that are used in btrfs_start_transaction() should
63  * 	be of this type.
64  *
65  * META_PREALLOC:
66  *	There are cases where metadata space is reserved before starting
67  *	transaction, and then btrfs_join_transaction() to get a trans handle.
68  *	Any metadata reserved for such usage should be of this type.
69  *	And after join_transaction() part (or all) of such reservation should
70  *	be converted into META_PERTRANS.
71  */
72 enum btrfs_qgroup_rsv_type {
73 	BTRFS_QGROUP_RSV_DATA = 0,
74 	BTRFS_QGROUP_RSV_META_PERTRANS,
75 	BTRFS_QGROUP_RSV_META_PREALLOC,
76 	BTRFS_QGROUP_RSV_LAST,
77 };
78 
79 /*
80  * Represents how many bytes we have reserved for this qgroup.
81  *
82  * Each type should have different reservation behavior.
83  * E.g, data follows its io_tree flag modification, while
84  * *currently* meta is just reserve-and-clear during transcation.
85  *
86  * TODO: Add new type for reservation which can survive transaction commit.
87  * Currect metadata reservation behavior is not suitable for such case.
88  */
89 struct btrfs_qgroup_rsv {
90 	u64 values[BTRFS_QGROUP_RSV_LAST];
91 };
92 
93 /*
94  * one struct for each qgroup, organized in fs_info->qgroup_tree.
95  */
96 struct btrfs_qgroup {
97 	u64 qgroupid;
98 
99 	/*
100 	 * state
101 	 */
102 	u64 rfer;	/* referenced */
103 	u64 rfer_cmpr;	/* referenced compressed */
104 	u64 excl;	/* exclusive */
105 	u64 excl_cmpr;	/* exclusive compressed */
106 
107 	/*
108 	 * limits
109 	 */
110 	u64 lim_flags;	/* which limits are set */
111 	u64 max_rfer;
112 	u64 max_excl;
113 	u64 rsv_rfer;
114 	u64 rsv_excl;
115 
116 	/*
117 	 * reservation tracking
118 	 */
119 	struct btrfs_qgroup_rsv rsv;
120 
121 	/*
122 	 * lists
123 	 */
124 	struct list_head groups;  /* groups this group is member of */
125 	struct list_head members; /* groups that are members of this group */
126 	struct list_head dirty;   /* dirty groups */
127 	struct rb_node node;	  /* tree of qgroups */
128 
129 	/*
130 	 * temp variables for accounting operations
131 	 * Refer to qgroup_shared_accounting() for details.
132 	 */
133 	u64 old_refcnt;
134 	u64 new_refcnt;
135 };
136 
137 /*
138  * For qgroup event trace points only
139  */
140 #define QGROUP_RESERVE		(1<<0)
141 #define QGROUP_RELEASE		(1<<1)
142 #define QGROUP_FREE		(1<<2)
143 
144 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
145 		       struct btrfs_fs_info *fs_info);
146 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
147 			struct btrfs_fs_info *fs_info);
148 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
149 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
150 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
151 				     bool interruptible);
152 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
153 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
154 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
155 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
156 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
157 			struct btrfs_fs_info *fs_info, u64 qgroupid);
158 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
159 			      struct btrfs_fs_info *fs_info, u64 qgroupid);
160 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
161 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
162 		       struct btrfs_qgroup_limit *limit);
163 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
164 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
165 struct btrfs_delayed_extent_op;
166 
167 /*
168  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
169  * So qgroup can account it at transaction committing time.
170  *
171  * No lock version, caller must acquire delayed ref lock and allocated memory,
172  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
173  *
174  * Return 0 for success insert
175  * Return >0 for existing record, caller can free @record safely.
176  * Error is not possible
177  */
178 int btrfs_qgroup_trace_extent_nolock(
179 		struct btrfs_fs_info *fs_info,
180 		struct btrfs_delayed_ref_root *delayed_refs,
181 		struct btrfs_qgroup_extent_record *record);
182 
183 /*
184  * Post handler after qgroup_trace_extent_nolock().
185  *
186  * NOTE: Current qgroup does the expensive backref walk at transaction
187  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
188  * new transaction.
189  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
190  * result.
191  *
192  * However for old_roots there is no need to do backref walk at that time,
193  * since we search commit roots to walk backref and result will always be
194  * correct.
195  *
196  * Due to the nature of no lock version, we can't do backref there.
197  * So we must call btrfs_qgroup_trace_extent_post() after exiting
198  * spinlock context.
199  *
200  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
201  * using current root, then we can move all expensive backref walk out of
202  * transaction committing, but not now as qgroup accounting will be wrong again.
203  */
204 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
205 				   struct btrfs_qgroup_extent_record *qrecord);
206 
207 /*
208  * Inform qgroup to trace one dirty extent, specified by @bytenr and
209  * @num_bytes.
210  * So qgroup can account it at commit trans time.
211  *
212  * Better encapsulated version, with memory allocation and backref walk for
213  * commit roots.
214  * So this can sleep.
215  *
216  * Return 0 if the operation is done.
217  * Return <0 for error, like memory allocation failure or invalid parameter
218  * (NULL trans)
219  */
220 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
221 		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
222 		gfp_t gfp_flag);
223 
224 /*
225  * Inform qgroup to trace all leaf items of data
226  *
227  * Return 0 for success
228  * Return <0 for error(ENOMEM)
229  */
230 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
231 				  struct btrfs_fs_info *fs_info,
232 				  struct extent_buffer *eb);
233 /*
234  * Inform qgroup to trace a whole subtree, including all its child tree
235  * blocks and data.
236  * The root tree block is specified by @root_eb.
237  *
238  * Normally used by relocation(tree block swap) and subvolume deletion.
239  *
240  * Return 0 for success
241  * Return <0 for error(ENOMEM or tree search error)
242  */
243 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
244 			       struct btrfs_root *root,
245 			       struct extent_buffer *root_eb,
246 			       u64 root_gen, int root_level);
247 int
248 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
249 			    struct btrfs_fs_info *fs_info,
250 			    u64 bytenr, u64 num_bytes,
251 			    struct ulist *old_roots, struct ulist *new_roots);
252 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
253 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
254 		      struct btrfs_fs_info *fs_info);
255 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
256 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
257 			 struct btrfs_qgroup_inherit *inherit);
258 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
259 			       u64 ref_root, u64 num_bytes,
260 			       enum btrfs_qgroup_rsv_type type);
261 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
262 						 u64 ref_root, u64 num_bytes)
263 {
264 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
265 	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
266 				  BTRFS_QGROUP_RSV_DATA);
267 }
268 
269 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
270 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
271 			       u64 rfer, u64 excl);
272 #endif
273 
274 /* New io_tree based accurate qgroup reserve API */
275 int btrfs_qgroup_reserve_data(struct inode *inode,
276 			struct extent_changeset **reserved, u64 start, u64 len);
277 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
278 int btrfs_qgroup_free_data(struct inode *inode,
279 			struct extent_changeset *reserved, u64 start, u64 len);
280 
281 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
282 				enum btrfs_qgroup_rsv_type type, bool enforce);
283 /* Reserve metadata space for pertrans and prealloc type */
284 static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
285 				int num_bytes, bool enforce)
286 {
287 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
288 			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
289 }
290 static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
291 				int num_bytes, bool enforce)
292 {
293 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
294 			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
295 }
296 
297 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
298 			     enum btrfs_qgroup_rsv_type type);
299 
300 /* Free per-transaction meta reservation for error handling */
301 static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
302 						   int num_bytes)
303 {
304 	__btrfs_qgroup_free_meta(root, num_bytes,
305 			BTRFS_QGROUP_RSV_META_PERTRANS);
306 }
307 
308 /* Pre-allocated meta reservation can be freed at need */
309 static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
310 						   int num_bytes)
311 {
312 	__btrfs_qgroup_free_meta(root, num_bytes,
313 			BTRFS_QGROUP_RSV_META_PREALLOC);
314 }
315 
316 /*
317  * Per-transaction meta reservation should be all freed at transaction commit
318  * time
319  */
320 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
321 
322 /*
323  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
324  *
325  * This is called when preallocated meta reservation needs to be used.
326  * Normally after btrfs_join_transaction() call.
327  */
328 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
329 
330 void btrfs_qgroup_check_reserved_leak(struct inode *inode);
331 
332 #endif
333