xref: /openbmc/linux/fs/btrfs/qgroup.h (revision 39616c27)
19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */
2fcebe456SJosef Bacik /*
3fcebe456SJosef Bacik  * Copyright (C) 2014 Facebook.  All rights reserved.
4fcebe456SJosef Bacik  */
5fcebe456SJosef Bacik 
69888c340SDavid Sterba #ifndef BTRFS_QGROUP_H
79888c340SDavid Sterba #define BTRFS_QGROUP_H
8fcebe456SJosef Bacik 
93368d001SQu Wenruo #include "ulist.h"
103368d001SQu Wenruo #include "delayed-ref.h"
113368d001SQu Wenruo 
12fcebe456SJosef Bacik /*
131d2beaa9SQu Wenruo  * Btrfs qgroup overview
141d2beaa9SQu Wenruo  *
151d2beaa9SQu Wenruo  * Btrfs qgroup splits into 3 main part:
161d2beaa9SQu Wenruo  * 1) Reserve
171d2beaa9SQu Wenruo  *    Reserve metadata/data space for incoming operations
181d2beaa9SQu Wenruo  *    Affect how qgroup limit works
191d2beaa9SQu Wenruo  *
201d2beaa9SQu Wenruo  * 2) Trace
211d2beaa9SQu Wenruo  *    Tell btrfs qgroup to trace dirty extents.
221d2beaa9SQu Wenruo  *
231d2beaa9SQu Wenruo  *    Dirty extents including:
241d2beaa9SQu Wenruo  *    - Newly allocated extents
251d2beaa9SQu Wenruo  *    - Extents going to be deleted (in this trans)
261d2beaa9SQu Wenruo  *    - Extents whose owner is going to be modified
271d2beaa9SQu Wenruo  *
281d2beaa9SQu Wenruo  *    This is the main part affects whether qgroup numbers will stay
291d2beaa9SQu Wenruo  *    consistent.
301d2beaa9SQu Wenruo  *    Btrfs qgroup can trace clean extents and won't cause any problem,
311d2beaa9SQu Wenruo  *    but it will consume extra CPU time, it should be avoided if possible.
321d2beaa9SQu Wenruo  *
331d2beaa9SQu Wenruo  * 3) Account
341d2beaa9SQu Wenruo  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
351d2beaa9SQu Wenruo  *    in previous step.
361d2beaa9SQu Wenruo  *
371d2beaa9SQu Wenruo  *    Normally at qgroup rescan and transaction commit time.
381d2beaa9SQu Wenruo  */
391d2beaa9SQu Wenruo 
401d2beaa9SQu Wenruo /*
413368d001SQu Wenruo  * Record a dirty extent, and info qgroup to update quota on it
423368d001SQu Wenruo  * TODO: Use kmem cache to alloc it.
433368d001SQu Wenruo  */
443368d001SQu Wenruo struct btrfs_qgroup_extent_record {
453368d001SQu Wenruo 	struct rb_node node;
463368d001SQu Wenruo 	u64 bytenr;
473368d001SQu Wenruo 	u64 num_bytes;
483368d001SQu Wenruo 	struct ulist *old_roots;
493368d001SQu Wenruo };
503368d001SQu Wenruo 
51733e03a0SQu Wenruo /*
52733e03a0SQu Wenruo  * Qgroup reservation types:
53733e03a0SQu Wenruo  *
54733e03a0SQu Wenruo  * DATA:
55733e03a0SQu Wenruo  *	space reserved for data
56733e03a0SQu Wenruo  *
57733e03a0SQu Wenruo  * META_PERTRANS:
58733e03a0SQu Wenruo  * 	Space reserved for metadata (per-transaction)
59733e03a0SQu Wenruo  * 	Due to the fact that qgroup data is only updated at transaction commit
60733e03a0SQu Wenruo  * 	time, reserved space for metadata must be kept until transaction
61733e03a0SQu Wenruo  * 	commits.
62733e03a0SQu Wenruo  * 	Any metadata reserved that are used in btrfs_start_transaction() should
63733e03a0SQu Wenruo  * 	be of this type.
64733e03a0SQu Wenruo  *
65733e03a0SQu Wenruo  * META_PREALLOC:
66733e03a0SQu Wenruo  *	There are cases where metadata space is reserved before starting
67733e03a0SQu Wenruo  *	transaction, and then btrfs_join_transaction() to get a trans handle.
68733e03a0SQu Wenruo  *	Any metadata reserved for such usage should be of this type.
69733e03a0SQu Wenruo  *	And after join_transaction() part (or all) of such reservation should
70733e03a0SQu Wenruo  *	be converted into META_PERTRANS.
71733e03a0SQu Wenruo  */
72d4e5c920SQu Wenruo enum btrfs_qgroup_rsv_type {
73d4e5c920SQu Wenruo 	BTRFS_QGROUP_RSV_DATA = 0,
74733e03a0SQu Wenruo 	BTRFS_QGROUP_RSV_META_PERTRANS,
75733e03a0SQu Wenruo 	BTRFS_QGROUP_RSV_META_PREALLOC,
76d4e5c920SQu Wenruo 	BTRFS_QGROUP_RSV_LAST,
77d4e5c920SQu Wenruo };
78d4e5c920SQu Wenruo 
79d4e5c920SQu Wenruo /*
80d4e5c920SQu Wenruo  * Represents how many bytes we have reserved for this qgroup.
81d4e5c920SQu Wenruo  *
82d4e5c920SQu Wenruo  * Each type should have different reservation behavior.
83d4e5c920SQu Wenruo  * E.g, data follows its io_tree flag modification, while
84d4e5c920SQu Wenruo  * *currently* meta is just reserve-and-clear during transcation.
85d4e5c920SQu Wenruo  *
86d4e5c920SQu Wenruo  * TODO: Add new type for reservation which can survive transaction commit.
87d4e5c920SQu Wenruo  * Currect metadata reservation behavior is not suitable for such case.
88d4e5c920SQu Wenruo  */
89d4e5c920SQu Wenruo struct btrfs_qgroup_rsv {
90d4e5c920SQu Wenruo 	u64 values[BTRFS_QGROUP_RSV_LAST];
91d4e5c920SQu Wenruo };
92d4e5c920SQu Wenruo 
9381fb6f77SQu Wenruo /*
943159fe7bSQu Wenruo  * one struct for each qgroup, organized in fs_info->qgroup_tree.
953159fe7bSQu Wenruo  */
963159fe7bSQu Wenruo struct btrfs_qgroup {
973159fe7bSQu Wenruo 	u64 qgroupid;
983159fe7bSQu Wenruo 
993159fe7bSQu Wenruo 	/*
1003159fe7bSQu Wenruo 	 * state
1013159fe7bSQu Wenruo 	 */
1023159fe7bSQu Wenruo 	u64 rfer;	/* referenced */
1033159fe7bSQu Wenruo 	u64 rfer_cmpr;	/* referenced compressed */
1043159fe7bSQu Wenruo 	u64 excl;	/* exclusive */
1053159fe7bSQu Wenruo 	u64 excl_cmpr;	/* exclusive compressed */
1063159fe7bSQu Wenruo 
1073159fe7bSQu Wenruo 	/*
1083159fe7bSQu Wenruo 	 * limits
1093159fe7bSQu Wenruo 	 */
1103159fe7bSQu Wenruo 	u64 lim_flags;	/* which limits are set */
1113159fe7bSQu Wenruo 	u64 max_rfer;
1123159fe7bSQu Wenruo 	u64 max_excl;
1133159fe7bSQu Wenruo 	u64 rsv_rfer;
1143159fe7bSQu Wenruo 	u64 rsv_excl;
1153159fe7bSQu Wenruo 
1163159fe7bSQu Wenruo 	/*
1173159fe7bSQu Wenruo 	 * reservation tracking
1183159fe7bSQu Wenruo 	 */
119d4e5c920SQu Wenruo 	struct btrfs_qgroup_rsv rsv;
1203159fe7bSQu Wenruo 
1213159fe7bSQu Wenruo 	/*
1223159fe7bSQu Wenruo 	 * lists
1233159fe7bSQu Wenruo 	 */
1243159fe7bSQu Wenruo 	struct list_head groups;  /* groups this group is member of */
1253159fe7bSQu Wenruo 	struct list_head members; /* groups that are members of this group */
1263159fe7bSQu Wenruo 	struct list_head dirty;   /* dirty groups */
1273159fe7bSQu Wenruo 	struct rb_node node;	  /* tree of qgroups */
1283159fe7bSQu Wenruo 
1293159fe7bSQu Wenruo 	/*
1303159fe7bSQu Wenruo 	 * temp variables for accounting operations
1313159fe7bSQu Wenruo 	 * Refer to qgroup_shared_accounting() for details.
1323159fe7bSQu Wenruo 	 */
1333159fe7bSQu Wenruo 	u64 old_refcnt;
1343159fe7bSQu Wenruo 	u64 new_refcnt;
1353159fe7bSQu Wenruo };
1363159fe7bSQu Wenruo 
1373159fe7bSQu Wenruo /*
13881fb6f77SQu Wenruo  * For qgroup event trace points only
13981fb6f77SQu Wenruo  */
14081fb6f77SQu Wenruo #define QGROUP_RESERVE		(1<<0)
14181fb6f77SQu Wenruo #define QGROUP_RELEASE		(1<<1)
14281fb6f77SQu Wenruo #define QGROUP_FREE		(1<<2)
14381fb6f77SQu Wenruo 
144340f1aa2SNikolay Borisov int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
145340f1aa2SNikolay Borisov int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
146fcebe456SJosef Bacik int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
147fcebe456SJosef Bacik void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
148d06f23d6SJeff Mahoney int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
149d06f23d6SJeff Mahoney 				     bool interruptible);
1509f8a6ce6SLu Fengqi int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1519f8a6ce6SLu Fengqi 			      u64 dst);
15239616c27SLu Fengqi int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
15339616c27SLu Fengqi 			      u64 dst);
154fcebe456SJosef Bacik int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1554087cf24SDongsheng Yang 			struct btrfs_fs_info *fs_info, u64 qgroupid);
156fcebe456SJosef Bacik int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
157fcebe456SJosef Bacik 			      struct btrfs_fs_info *fs_info, u64 qgroupid);
158fcebe456SJosef Bacik int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
159fcebe456SJosef Bacik 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
160fcebe456SJosef Bacik 		       struct btrfs_qgroup_limit *limit);
161fcebe456SJosef Bacik int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
162fcebe456SJosef Bacik void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
163fcebe456SJosef Bacik struct btrfs_delayed_extent_op;
164d1b8b94aSQu Wenruo 
165cb93b52cSQu Wenruo /*
16650b3e040SQu Wenruo  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
167fb235dc0SQu Wenruo  * So qgroup can account it at transaction committing time.
168cb93b52cSQu Wenruo  *
169fb235dc0SQu Wenruo  * No lock version, caller must acquire delayed ref lock and allocated memory,
170fb235dc0SQu Wenruo  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
171cb93b52cSQu Wenruo  *
172cb93b52cSQu Wenruo  * Return 0 for success insert
173cb93b52cSQu Wenruo  * Return >0 for existing record, caller can free @record safely.
174cb93b52cSQu Wenruo  * Error is not possible
175cb93b52cSQu Wenruo  */
17650b3e040SQu Wenruo int btrfs_qgroup_trace_extent_nolock(
177cb93b52cSQu Wenruo 		struct btrfs_fs_info *fs_info,
178bc074524SJeff Mahoney 		struct btrfs_delayed_ref_root *delayed_refs,
1793368d001SQu Wenruo 		struct btrfs_qgroup_extent_record *record);
180cb93b52cSQu Wenruo 
181cb93b52cSQu Wenruo /*
182fb235dc0SQu Wenruo  * Post handler after qgroup_trace_extent_nolock().
183fb235dc0SQu Wenruo  *
184fb235dc0SQu Wenruo  * NOTE: Current qgroup does the expensive backref walk at transaction
185fb235dc0SQu Wenruo  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
186fb235dc0SQu Wenruo  * new transaction.
187fb235dc0SQu Wenruo  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
188fb235dc0SQu Wenruo  * result.
189fb235dc0SQu Wenruo  *
190fb235dc0SQu Wenruo  * However for old_roots there is no need to do backref walk at that time,
191fb235dc0SQu Wenruo  * since we search commit roots to walk backref and result will always be
192fb235dc0SQu Wenruo  * correct.
193fb235dc0SQu Wenruo  *
194fb235dc0SQu Wenruo  * Due to the nature of no lock version, we can't do backref there.
195fb235dc0SQu Wenruo  * So we must call btrfs_qgroup_trace_extent_post() after exiting
196fb235dc0SQu Wenruo  * spinlock context.
197fb235dc0SQu Wenruo  *
198fb235dc0SQu Wenruo  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
199fb235dc0SQu Wenruo  * using current root, then we can move all expensive backref walk out of
200fb235dc0SQu Wenruo  * transaction committing, but not now as qgroup accounting will be wrong again.
201fb235dc0SQu Wenruo  */
202fb235dc0SQu Wenruo int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
203fb235dc0SQu Wenruo 				   struct btrfs_qgroup_extent_record *qrecord);
204fb235dc0SQu Wenruo 
205fb235dc0SQu Wenruo /*
20650b3e040SQu Wenruo  * Inform qgroup to trace one dirty extent, specified by @bytenr and
20750b3e040SQu Wenruo  * @num_bytes.
20850b3e040SQu Wenruo  * So qgroup can account it at commit trans time.
209cb93b52cSQu Wenruo  *
210fb235dc0SQu Wenruo  * Better encapsulated version, with memory allocation and backref walk for
211fb235dc0SQu Wenruo  * commit roots.
212fb235dc0SQu Wenruo  * So this can sleep.
213cb93b52cSQu Wenruo  *
214cb93b52cSQu Wenruo  * Return 0 if the operation is done.
215cb93b52cSQu Wenruo  * Return <0 for error, like memory allocation failure or invalid parameter
216cb93b52cSQu Wenruo  * (NULL trans)
217cb93b52cSQu Wenruo  */
21850b3e040SQu Wenruo int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
219cb93b52cSQu Wenruo 		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
220cb93b52cSQu Wenruo 		gfp_t gfp_flag);
221cb93b52cSQu Wenruo 
22233d1f05cSQu Wenruo /*
22333d1f05cSQu Wenruo  * Inform qgroup to trace all leaf items of data
22433d1f05cSQu Wenruo  *
22533d1f05cSQu Wenruo  * Return 0 for success
22633d1f05cSQu Wenruo  * Return <0 for error(ENOMEM)
22733d1f05cSQu Wenruo  */
22833d1f05cSQu Wenruo int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
2292ff7e61eSJeff Mahoney 				  struct btrfs_fs_info *fs_info,
23033d1f05cSQu Wenruo 				  struct extent_buffer *eb);
23133d1f05cSQu Wenruo /*
23233d1f05cSQu Wenruo  * Inform qgroup to trace a whole subtree, including all its child tree
23333d1f05cSQu Wenruo  * blocks and data.
23433d1f05cSQu Wenruo  * The root tree block is specified by @root_eb.
23533d1f05cSQu Wenruo  *
23633d1f05cSQu Wenruo  * Normally used by relocation(tree block swap) and subvolume deletion.
23733d1f05cSQu Wenruo  *
23833d1f05cSQu Wenruo  * Return 0 for success
23933d1f05cSQu Wenruo  * Return <0 for error(ENOMEM or tree search error)
24033d1f05cSQu Wenruo  */
24133d1f05cSQu Wenruo int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
24233d1f05cSQu Wenruo 			       struct btrfs_root *root,
24333d1f05cSQu Wenruo 			       struct extent_buffer *root_eb,
24433d1f05cSQu Wenruo 			       u64 root_gen, int root_level);
245442244c9SQu Wenruo int
246442244c9SQu Wenruo btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
247442244c9SQu Wenruo 			    struct btrfs_fs_info *fs_info,
248442244c9SQu Wenruo 			    u64 bytenr, u64 num_bytes,
249442244c9SQu Wenruo 			    struct ulist *old_roots, struct ulist *new_roots);
250460fb20aSNikolay Borisov int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
251fcebe456SJosef Bacik int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
252fcebe456SJosef Bacik 		      struct btrfs_fs_info *fs_info);
253fcebe456SJosef Bacik int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
254fcebe456SJosef Bacik 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
255fcebe456SJosef Bacik 			 struct btrfs_qgroup_inherit *inherit);
256297d750bSQu Wenruo void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
257d4e5c920SQu Wenruo 			       u64 ref_root, u64 num_bytes,
258d4e5c920SQu Wenruo 			       enum btrfs_qgroup_rsv_type type);
259297d750bSQu Wenruo static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
260297d750bSQu Wenruo 						 u64 ref_root, u64 num_bytes)
261297d750bSQu Wenruo {
262bc074524SJeff Mahoney 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
263d4e5c920SQu Wenruo 	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
264d4e5c920SQu Wenruo 				  BTRFS_QGROUP_RSV_DATA);
265297d750bSQu Wenruo }
266fcebe456SJosef Bacik 
267fcebe456SJosef Bacik #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
268fcebe456SJosef Bacik int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
269fcebe456SJosef Bacik 			       u64 rfer, u64 excl);
270fcebe456SJosef Bacik #endif
271fcebe456SJosef Bacik 
27252472553SQu Wenruo /* New io_tree based accurate qgroup reserve API */
273364ecf36SQu Wenruo int btrfs_qgroup_reserve_data(struct inode *inode,
274364ecf36SQu Wenruo 			struct extent_changeset **reserved, u64 start, u64 len);
275f695fdceSQu Wenruo int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
276bc42bda2SQu Wenruo int btrfs_qgroup_free_data(struct inode *inode,
277bc42bda2SQu Wenruo 			struct extent_changeset *reserved, u64 start, u64 len);
27855eeaf05SQu Wenruo 
279733e03a0SQu Wenruo int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
280733e03a0SQu Wenruo 				enum btrfs_qgroup_rsv_type type, bool enforce);
281733e03a0SQu Wenruo /* Reserve metadata space for pertrans and prealloc type */
282733e03a0SQu Wenruo static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
283733e03a0SQu Wenruo 				int num_bytes, bool enforce)
284733e03a0SQu Wenruo {
285733e03a0SQu Wenruo 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
286733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
287733e03a0SQu Wenruo }
288733e03a0SQu Wenruo static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
289733e03a0SQu Wenruo 				int num_bytes, bool enforce)
290733e03a0SQu Wenruo {
291733e03a0SQu Wenruo 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
292733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
293733e03a0SQu Wenruo }
294733e03a0SQu Wenruo 
295733e03a0SQu Wenruo void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
296733e03a0SQu Wenruo 			     enum btrfs_qgroup_rsv_type type);
297733e03a0SQu Wenruo 
298733e03a0SQu Wenruo /* Free per-transaction meta reservation for error handling */
299733e03a0SQu Wenruo static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
300733e03a0SQu Wenruo 						   int num_bytes)
301733e03a0SQu Wenruo {
302733e03a0SQu Wenruo 	__btrfs_qgroup_free_meta(root, num_bytes,
303733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PERTRANS);
304733e03a0SQu Wenruo }
305733e03a0SQu Wenruo 
306733e03a0SQu Wenruo /* Pre-allocated meta reservation can be freed at need */
307733e03a0SQu Wenruo static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
308733e03a0SQu Wenruo 						   int num_bytes)
309733e03a0SQu Wenruo {
310733e03a0SQu Wenruo 	__btrfs_qgroup_free_meta(root, num_bytes,
311733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PREALLOC);
312733e03a0SQu Wenruo }
313733e03a0SQu Wenruo 
314733e03a0SQu Wenruo /*
315733e03a0SQu Wenruo  * Per-transaction meta reservation should be all freed at transaction commit
316733e03a0SQu Wenruo  * time
317733e03a0SQu Wenruo  */
318733e03a0SQu Wenruo void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
319733e03a0SQu Wenruo 
32064cfaef6SQu Wenruo /*
32164cfaef6SQu Wenruo  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
32264cfaef6SQu Wenruo  *
32364cfaef6SQu Wenruo  * This is called when preallocated meta reservation needs to be used.
32464cfaef6SQu Wenruo  * Normally after btrfs_join_transaction() call.
32564cfaef6SQu Wenruo  */
32664cfaef6SQu Wenruo void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
32764cfaef6SQu Wenruo 
32856fa9d07SQu Wenruo void btrfs_qgroup_check_reserved_leak(struct inode *inode);
3299888c340SDavid Sterba 
3309888c340SDavid Sterba #endif
331