xref: /openbmc/linux/fs/btrfs/qgroup.h (revision 9888c340)
19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */
2fcebe456SJosef Bacik /*
3fcebe456SJosef Bacik  * Copyright (C) 2014 Facebook.  All rights reserved.
4fcebe456SJosef Bacik  */
5fcebe456SJosef Bacik 
69888c340SDavid Sterba #ifndef BTRFS_QGROUP_H
79888c340SDavid Sterba #define BTRFS_QGROUP_H
8fcebe456SJosef Bacik 
93368d001SQu Wenruo #include "ulist.h"
103368d001SQu Wenruo #include "delayed-ref.h"
113368d001SQu Wenruo 
12fcebe456SJosef Bacik /*
131d2beaa9SQu Wenruo  * Btrfs qgroup overview
141d2beaa9SQu Wenruo  *
151d2beaa9SQu Wenruo  * Btrfs qgroup splits into 3 main part:
161d2beaa9SQu Wenruo  * 1) Reserve
171d2beaa9SQu Wenruo  *    Reserve metadata/data space for incoming operations
181d2beaa9SQu Wenruo  *    Affect how qgroup limit works
191d2beaa9SQu Wenruo  *
201d2beaa9SQu Wenruo  * 2) Trace
211d2beaa9SQu Wenruo  *    Tell btrfs qgroup to trace dirty extents.
221d2beaa9SQu Wenruo  *
231d2beaa9SQu Wenruo  *    Dirty extents including:
241d2beaa9SQu Wenruo  *    - Newly allocated extents
251d2beaa9SQu Wenruo  *    - Extents going to be deleted (in this trans)
261d2beaa9SQu Wenruo  *    - Extents whose owner is going to be modified
271d2beaa9SQu Wenruo  *
281d2beaa9SQu Wenruo  *    This is the main part affects whether qgroup numbers will stay
291d2beaa9SQu Wenruo  *    consistent.
301d2beaa9SQu Wenruo  *    Btrfs qgroup can trace clean extents and won't cause any problem,
311d2beaa9SQu Wenruo  *    but it will consume extra CPU time, it should be avoided if possible.
321d2beaa9SQu Wenruo  *
331d2beaa9SQu Wenruo  * 3) Account
341d2beaa9SQu Wenruo  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
351d2beaa9SQu Wenruo  *    in previous step.
361d2beaa9SQu Wenruo  *
371d2beaa9SQu Wenruo  *    Normally at qgroup rescan and transaction commit time.
381d2beaa9SQu Wenruo  */
391d2beaa9SQu Wenruo 
401d2beaa9SQu Wenruo /*
413368d001SQu Wenruo  * Record a dirty extent, and info qgroup to update quota on it
423368d001SQu Wenruo  * TODO: Use kmem cache to alloc it.
433368d001SQu Wenruo  */
443368d001SQu Wenruo struct btrfs_qgroup_extent_record {
453368d001SQu Wenruo 	struct rb_node node;
463368d001SQu Wenruo 	u64 bytenr;
473368d001SQu Wenruo 	u64 num_bytes;
483368d001SQu Wenruo 	struct ulist *old_roots;
493368d001SQu Wenruo };
503368d001SQu Wenruo 
51733e03a0SQu Wenruo /*
52733e03a0SQu Wenruo  * Qgroup reservation types:
53733e03a0SQu Wenruo  *
54733e03a0SQu Wenruo  * DATA:
55733e03a0SQu Wenruo  *	space reserved for data
56733e03a0SQu Wenruo  *
57733e03a0SQu Wenruo  * META_PERTRANS:
58733e03a0SQu Wenruo  * 	Space reserved for metadata (per-transaction)
59733e03a0SQu Wenruo  * 	Due to the fact that qgroup data is only updated at transaction commit
60733e03a0SQu Wenruo  * 	time, reserved space for metadata must be kept until transaction
61733e03a0SQu Wenruo  * 	commits.
62733e03a0SQu Wenruo  * 	Any metadata reserved that are used in btrfs_start_transaction() should
63733e03a0SQu Wenruo  * 	be of this type.
64733e03a0SQu Wenruo  *
65733e03a0SQu Wenruo  * META_PREALLOC:
66733e03a0SQu Wenruo  *	There are cases where metadata space is reserved before starting
67733e03a0SQu Wenruo  *	transaction, and then btrfs_join_transaction() to get a trans handle.
68733e03a0SQu Wenruo  *	Any metadata reserved for such usage should be of this type.
69733e03a0SQu Wenruo  *	And after join_transaction() part (or all) of such reservation should
70733e03a0SQu Wenruo  *	be converted into META_PERTRANS.
71733e03a0SQu Wenruo  */
72d4e5c920SQu Wenruo enum btrfs_qgroup_rsv_type {
73d4e5c920SQu Wenruo 	BTRFS_QGROUP_RSV_DATA = 0,
74733e03a0SQu Wenruo 	BTRFS_QGROUP_RSV_META_PERTRANS,
75733e03a0SQu Wenruo 	BTRFS_QGROUP_RSV_META_PREALLOC,
76d4e5c920SQu Wenruo 	BTRFS_QGROUP_RSV_LAST,
77d4e5c920SQu Wenruo };
78d4e5c920SQu Wenruo 
79d4e5c920SQu Wenruo /*
80d4e5c920SQu Wenruo  * Represents how many bytes we have reserved for this qgroup.
81d4e5c920SQu Wenruo  *
82d4e5c920SQu Wenruo  * Each type should have different reservation behavior.
83d4e5c920SQu Wenruo  * E.g, data follows its io_tree flag modification, while
84d4e5c920SQu Wenruo  * *currently* meta is just reserve-and-clear during transcation.
85d4e5c920SQu Wenruo  *
86d4e5c920SQu Wenruo  * TODO: Add new type for reservation which can survive transaction commit.
87d4e5c920SQu Wenruo  * Currect metadata reservation behavior is not suitable for such case.
88d4e5c920SQu Wenruo  */
89d4e5c920SQu Wenruo struct btrfs_qgroup_rsv {
90d4e5c920SQu Wenruo 	u64 values[BTRFS_QGROUP_RSV_LAST];
91d4e5c920SQu Wenruo };
92d4e5c920SQu Wenruo 
9381fb6f77SQu Wenruo /*
943159fe7bSQu Wenruo  * one struct for each qgroup, organized in fs_info->qgroup_tree.
953159fe7bSQu Wenruo  */
963159fe7bSQu Wenruo struct btrfs_qgroup {
973159fe7bSQu Wenruo 	u64 qgroupid;
983159fe7bSQu Wenruo 
993159fe7bSQu Wenruo 	/*
1003159fe7bSQu Wenruo 	 * state
1013159fe7bSQu Wenruo 	 */
1023159fe7bSQu Wenruo 	u64 rfer;	/* referenced */
1033159fe7bSQu Wenruo 	u64 rfer_cmpr;	/* referenced compressed */
1043159fe7bSQu Wenruo 	u64 excl;	/* exclusive */
1053159fe7bSQu Wenruo 	u64 excl_cmpr;	/* exclusive compressed */
1063159fe7bSQu Wenruo 
1073159fe7bSQu Wenruo 	/*
1083159fe7bSQu Wenruo 	 * limits
1093159fe7bSQu Wenruo 	 */
1103159fe7bSQu Wenruo 	u64 lim_flags;	/* which limits are set */
1113159fe7bSQu Wenruo 	u64 max_rfer;
1123159fe7bSQu Wenruo 	u64 max_excl;
1133159fe7bSQu Wenruo 	u64 rsv_rfer;
1143159fe7bSQu Wenruo 	u64 rsv_excl;
1153159fe7bSQu Wenruo 
1163159fe7bSQu Wenruo 	/*
1173159fe7bSQu Wenruo 	 * reservation tracking
1183159fe7bSQu Wenruo 	 */
119d4e5c920SQu Wenruo 	struct btrfs_qgroup_rsv rsv;
1203159fe7bSQu Wenruo 
1213159fe7bSQu Wenruo 	/*
1223159fe7bSQu Wenruo 	 * lists
1233159fe7bSQu Wenruo 	 */
1243159fe7bSQu Wenruo 	struct list_head groups;  /* groups this group is member of */
1253159fe7bSQu Wenruo 	struct list_head members; /* groups that are members of this group */
1263159fe7bSQu Wenruo 	struct list_head dirty;   /* dirty groups */
1273159fe7bSQu Wenruo 	struct rb_node node;	  /* tree of qgroups */
1283159fe7bSQu Wenruo 
1293159fe7bSQu Wenruo 	/*
1303159fe7bSQu Wenruo 	 * temp variables for accounting operations
1313159fe7bSQu Wenruo 	 * Refer to qgroup_shared_accounting() for details.
1323159fe7bSQu Wenruo 	 */
1333159fe7bSQu Wenruo 	u64 old_refcnt;
1343159fe7bSQu Wenruo 	u64 new_refcnt;
1353159fe7bSQu Wenruo };
1363159fe7bSQu Wenruo 
1373159fe7bSQu Wenruo /*
13881fb6f77SQu Wenruo  * For qgroup event trace points only
13981fb6f77SQu Wenruo  */
14081fb6f77SQu Wenruo #define QGROUP_RESERVE		(1<<0)
14181fb6f77SQu Wenruo #define QGROUP_RELEASE		(1<<1)
14281fb6f77SQu Wenruo #define QGROUP_FREE		(1<<2)
14381fb6f77SQu Wenruo 
144fcebe456SJosef Bacik int btrfs_quota_enable(struct btrfs_trans_handle *trans,
145fcebe456SJosef Bacik 		       struct btrfs_fs_info *fs_info);
146fcebe456SJosef Bacik int btrfs_quota_disable(struct btrfs_trans_handle *trans,
147fcebe456SJosef Bacik 			struct btrfs_fs_info *fs_info);
148fcebe456SJosef Bacik int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
149fcebe456SJosef Bacik void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
150d06f23d6SJeff Mahoney int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
151d06f23d6SJeff Mahoney 				     bool interruptible);
152fcebe456SJosef Bacik int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
153fcebe456SJosef Bacik 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
154fcebe456SJosef Bacik int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
155fcebe456SJosef Bacik 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
156fcebe456SJosef Bacik int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1574087cf24SDongsheng Yang 			struct btrfs_fs_info *fs_info, u64 qgroupid);
158fcebe456SJosef Bacik int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
159fcebe456SJosef Bacik 			      struct btrfs_fs_info *fs_info, u64 qgroupid);
160fcebe456SJosef Bacik int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
161fcebe456SJosef Bacik 		       struct btrfs_fs_info *fs_info, u64 qgroupid,
162fcebe456SJosef Bacik 		       struct btrfs_qgroup_limit *limit);
163fcebe456SJosef Bacik int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
164fcebe456SJosef Bacik void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
165fcebe456SJosef Bacik struct btrfs_delayed_extent_op;
166d1b8b94aSQu Wenruo 
167cb93b52cSQu Wenruo /*
16850b3e040SQu Wenruo  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
169fb235dc0SQu Wenruo  * So qgroup can account it at transaction committing time.
170cb93b52cSQu Wenruo  *
171fb235dc0SQu Wenruo  * No lock version, caller must acquire delayed ref lock and allocated memory,
172fb235dc0SQu Wenruo  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
173cb93b52cSQu Wenruo  *
174cb93b52cSQu Wenruo  * Return 0 for success insert
175cb93b52cSQu Wenruo  * Return >0 for existing record, caller can free @record safely.
176cb93b52cSQu Wenruo  * Error is not possible
177cb93b52cSQu Wenruo  */
17850b3e040SQu Wenruo int btrfs_qgroup_trace_extent_nolock(
179cb93b52cSQu Wenruo 		struct btrfs_fs_info *fs_info,
180bc074524SJeff Mahoney 		struct btrfs_delayed_ref_root *delayed_refs,
1813368d001SQu Wenruo 		struct btrfs_qgroup_extent_record *record);
182cb93b52cSQu Wenruo 
183cb93b52cSQu Wenruo /*
184fb235dc0SQu Wenruo  * Post handler after qgroup_trace_extent_nolock().
185fb235dc0SQu Wenruo  *
186fb235dc0SQu Wenruo  * NOTE: Current qgroup does the expensive backref walk at transaction
187fb235dc0SQu Wenruo  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
188fb235dc0SQu Wenruo  * new transaction.
189fb235dc0SQu Wenruo  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
190fb235dc0SQu Wenruo  * result.
191fb235dc0SQu Wenruo  *
192fb235dc0SQu Wenruo  * However for old_roots there is no need to do backref walk at that time,
193fb235dc0SQu Wenruo  * since we search commit roots to walk backref and result will always be
194fb235dc0SQu Wenruo  * correct.
195fb235dc0SQu Wenruo  *
196fb235dc0SQu Wenruo  * Due to the nature of no lock version, we can't do backref there.
197fb235dc0SQu Wenruo  * So we must call btrfs_qgroup_trace_extent_post() after exiting
198fb235dc0SQu Wenruo  * spinlock context.
199fb235dc0SQu Wenruo  *
200fb235dc0SQu Wenruo  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
201fb235dc0SQu Wenruo  * using current root, then we can move all expensive backref walk out of
202fb235dc0SQu Wenruo  * transaction committing, but not now as qgroup accounting will be wrong again.
203fb235dc0SQu Wenruo  */
204fb235dc0SQu Wenruo int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
205fb235dc0SQu Wenruo 				   struct btrfs_qgroup_extent_record *qrecord);
206fb235dc0SQu Wenruo 
207fb235dc0SQu Wenruo /*
20850b3e040SQu Wenruo  * Inform qgroup to trace one dirty extent, specified by @bytenr and
20950b3e040SQu Wenruo  * @num_bytes.
21050b3e040SQu Wenruo  * So qgroup can account it at commit trans time.
211cb93b52cSQu Wenruo  *
212fb235dc0SQu Wenruo  * Better encapsulated version, with memory allocation and backref walk for
213fb235dc0SQu Wenruo  * commit roots.
214fb235dc0SQu Wenruo  * So this can sleep.
215cb93b52cSQu Wenruo  *
216cb93b52cSQu Wenruo  * Return 0 if the operation is done.
217cb93b52cSQu Wenruo  * Return <0 for error, like memory allocation failure or invalid parameter
218cb93b52cSQu Wenruo  * (NULL trans)
219cb93b52cSQu Wenruo  */
22050b3e040SQu Wenruo int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans,
221cb93b52cSQu Wenruo 		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
222cb93b52cSQu Wenruo 		gfp_t gfp_flag);
223cb93b52cSQu Wenruo 
22433d1f05cSQu Wenruo /*
22533d1f05cSQu Wenruo  * Inform qgroup to trace all leaf items of data
22633d1f05cSQu Wenruo  *
22733d1f05cSQu Wenruo  * Return 0 for success
22833d1f05cSQu Wenruo  * Return <0 for error(ENOMEM)
22933d1f05cSQu Wenruo  */
23033d1f05cSQu Wenruo int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
2312ff7e61eSJeff Mahoney 				  struct btrfs_fs_info *fs_info,
23233d1f05cSQu Wenruo 				  struct extent_buffer *eb);
23333d1f05cSQu Wenruo /*
23433d1f05cSQu Wenruo  * Inform qgroup to trace a whole subtree, including all its child tree
23533d1f05cSQu Wenruo  * blocks and data.
23633d1f05cSQu Wenruo  * The root tree block is specified by @root_eb.
23733d1f05cSQu Wenruo  *
23833d1f05cSQu Wenruo  * Normally used by relocation(tree block swap) and subvolume deletion.
23933d1f05cSQu Wenruo  *
24033d1f05cSQu Wenruo  * Return 0 for success
24133d1f05cSQu Wenruo  * Return <0 for error(ENOMEM or tree search error)
24233d1f05cSQu Wenruo  */
24333d1f05cSQu Wenruo int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
24433d1f05cSQu Wenruo 			       struct btrfs_root *root,
24533d1f05cSQu Wenruo 			       struct extent_buffer *root_eb,
24633d1f05cSQu Wenruo 			       u64 root_gen, int root_level);
247442244c9SQu Wenruo int
248442244c9SQu Wenruo btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
249442244c9SQu Wenruo 			    struct btrfs_fs_info *fs_info,
250442244c9SQu Wenruo 			    u64 bytenr, u64 num_bytes,
251442244c9SQu Wenruo 			    struct ulist *old_roots, struct ulist *new_roots);
252460fb20aSNikolay Borisov int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
253fcebe456SJosef Bacik int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
254fcebe456SJosef Bacik 		      struct btrfs_fs_info *fs_info);
255fcebe456SJosef Bacik int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
256fcebe456SJosef Bacik 			 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
257fcebe456SJosef Bacik 			 struct btrfs_qgroup_inherit *inherit);
258297d750bSQu Wenruo void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
259d4e5c920SQu Wenruo 			       u64 ref_root, u64 num_bytes,
260d4e5c920SQu Wenruo 			       enum btrfs_qgroup_rsv_type type);
261297d750bSQu Wenruo static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
262297d750bSQu Wenruo 						 u64 ref_root, u64 num_bytes)
263297d750bSQu Wenruo {
264bc074524SJeff Mahoney 	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
265d4e5c920SQu Wenruo 	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
266d4e5c920SQu Wenruo 				  BTRFS_QGROUP_RSV_DATA);
267297d750bSQu Wenruo }
268fcebe456SJosef Bacik 
269fcebe456SJosef Bacik #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
270fcebe456SJosef Bacik int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
271fcebe456SJosef Bacik 			       u64 rfer, u64 excl);
272fcebe456SJosef Bacik #endif
273fcebe456SJosef Bacik 
27452472553SQu Wenruo /* New io_tree based accurate qgroup reserve API */
275364ecf36SQu Wenruo int btrfs_qgroup_reserve_data(struct inode *inode,
276364ecf36SQu Wenruo 			struct extent_changeset **reserved, u64 start, u64 len);
277f695fdceSQu Wenruo int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len);
278bc42bda2SQu Wenruo int btrfs_qgroup_free_data(struct inode *inode,
279bc42bda2SQu Wenruo 			struct extent_changeset *reserved, u64 start, u64 len);
28055eeaf05SQu Wenruo 
281733e03a0SQu Wenruo int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
282733e03a0SQu Wenruo 				enum btrfs_qgroup_rsv_type type, bool enforce);
283733e03a0SQu Wenruo /* Reserve metadata space for pertrans and prealloc type */
284733e03a0SQu Wenruo static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
285733e03a0SQu Wenruo 				int num_bytes, bool enforce)
286733e03a0SQu Wenruo {
287733e03a0SQu Wenruo 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
288733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PERTRANS, enforce);
289733e03a0SQu Wenruo }
290733e03a0SQu Wenruo static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
291733e03a0SQu Wenruo 				int num_bytes, bool enforce)
292733e03a0SQu Wenruo {
293733e03a0SQu Wenruo 	return __btrfs_qgroup_reserve_meta(root, num_bytes,
294733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PREALLOC, enforce);
295733e03a0SQu Wenruo }
296733e03a0SQu Wenruo 
297733e03a0SQu Wenruo void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
298733e03a0SQu Wenruo 			     enum btrfs_qgroup_rsv_type type);
299733e03a0SQu Wenruo 
300733e03a0SQu Wenruo /* Free per-transaction meta reservation for error handling */
301733e03a0SQu Wenruo static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
302733e03a0SQu Wenruo 						   int num_bytes)
303733e03a0SQu Wenruo {
304733e03a0SQu Wenruo 	__btrfs_qgroup_free_meta(root, num_bytes,
305733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PERTRANS);
306733e03a0SQu Wenruo }
307733e03a0SQu Wenruo 
308733e03a0SQu Wenruo /* Pre-allocated meta reservation can be freed at need */
309733e03a0SQu Wenruo static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
310733e03a0SQu Wenruo 						   int num_bytes)
311733e03a0SQu Wenruo {
312733e03a0SQu Wenruo 	__btrfs_qgroup_free_meta(root, num_bytes,
313733e03a0SQu Wenruo 			BTRFS_QGROUP_RSV_META_PREALLOC);
314733e03a0SQu Wenruo }
315733e03a0SQu Wenruo 
316733e03a0SQu Wenruo /*
317733e03a0SQu Wenruo  * Per-transaction meta reservation should be all freed at transaction commit
318733e03a0SQu Wenruo  * time
319733e03a0SQu Wenruo  */
320733e03a0SQu Wenruo void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
321733e03a0SQu Wenruo 
32264cfaef6SQu Wenruo /*
32364cfaef6SQu Wenruo  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
32464cfaef6SQu Wenruo  *
32564cfaef6SQu Wenruo  * This is called when preallocated meta reservation needs to be used.
32664cfaef6SQu Wenruo  * Normally after btrfs_join_transaction() call.
32764cfaef6SQu Wenruo  */
32864cfaef6SQu Wenruo void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
32964cfaef6SQu Wenruo 
33056fa9d07SQu Wenruo void btrfs_qgroup_check_reserved_leak(struct inode *inode);
3319888c340SDavid Sterba 
3329888c340SDavid Sterba #endif
333