xref: /openbmc/linux/fs/btrfs/delayed-ref.h (revision cac405a3bfa21a6e17089ae2f355f34594bfb543)
1  /* SPDX-License-Identifier: GPL-2.0 */
2  /*
3   * Copyright (C) 2008 Oracle.  All rights reserved.
4   */
5  
6  #ifndef BTRFS_DELAYED_REF_H
7  #define BTRFS_DELAYED_REF_H
8  
9  #include <linux/refcount.h>
10  
11  /* these are the possible values of struct btrfs_delayed_ref_node->action */
12  #define BTRFS_ADD_DELAYED_REF    1 /* add one backref to the tree */
13  #define BTRFS_DROP_DELAYED_REF   2 /* delete one backref from the tree */
14  #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
15  #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
16  
17  struct btrfs_delayed_ref_node {
18  	struct rb_node ref_node;
19  	/*
20  	 * If action is BTRFS_ADD_DELAYED_REF, also link this node to
21  	 * ref_head->ref_add_list, then we do not need to iterate the
22  	 * whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes.
23  	 */
24  	struct list_head add_list;
25  
26  	/* the starting bytenr of the extent */
27  	u64 bytenr;
28  
29  	/* the size of the extent */
30  	u64 num_bytes;
31  
32  	/* seq number to keep track of insertion order */
33  	u64 seq;
34  
35  	/* ref count on this data structure */
36  	refcount_t refs;
37  
38  	/*
39  	 * how many refs is this entry adding or deleting.  For
40  	 * head refs, this may be a negative number because it is keeping
41  	 * track of the total mods done to the reference count.
42  	 * For individual refs, this will always be a positive number
43  	 *
44  	 * It may be more than one, since it is possible for a single
45  	 * parent to have more than one ref on an extent
46  	 */
47  	int ref_mod;
48  
49  	unsigned int action:8;
50  	unsigned int type:8;
51  };
52  
53  struct btrfs_delayed_extent_op {
54  	struct btrfs_disk_key key;
55  	u8 level;
56  	bool update_key;
57  	bool update_flags;
58  	u64 flags_to_set;
59  };
60  
61  /*
62   * the head refs are used to hold a lock on a given extent, which allows us
63   * to make sure that only one process is running the delayed refs
64   * at a time for a single extent.  They also store the sum of all the
65   * reference count modifications we've queued up.
66   */
67  struct btrfs_delayed_ref_head {
68  	u64 bytenr;
69  	u64 num_bytes;
70  	/*
71  	 * For insertion into struct btrfs_delayed_ref_root::href_root.
72  	 * Keep it in the same cache line as 'bytenr' for more efficient
73  	 * searches in the rbtree.
74  	 */
75  	struct rb_node href_node;
76  	/*
77  	 * the mutex is held while running the refs, and it is also
78  	 * held when checking the sum of reference modifications.
79  	 */
80  	struct mutex mutex;
81  
82  	refcount_t refs;
83  
84  	/* Protects 'ref_tree' and 'ref_add_list'. */
85  	spinlock_t lock;
86  	struct rb_root_cached ref_tree;
87  	/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
88  	struct list_head ref_add_list;
89  
90  	struct btrfs_delayed_extent_op *extent_op;
91  
92  	/*
93  	 * This is used to track the final ref_mod from all the refs associated
94  	 * with this head ref, this is not adjusted as delayed refs are run,
95  	 * this is meant to track if we need to do the csum accounting or not.
96  	 */
97  	int total_ref_mod;
98  
99  	/*
100  	 * This is the current outstanding mod references for this bytenr.  This
101  	 * is used with lookup_extent_info to get an accurate reference count
102  	 * for a bytenr, so it is adjusted as delayed refs are run so that any
103  	 * on disk reference count + ref_mod is accurate.
104  	 */
105  	int ref_mod;
106  
107  	/*
108  	 * when a new extent is allocated, it is just reserved in memory
109  	 * The actual extent isn't inserted into the extent allocation tree
110  	 * until the delayed ref is processed.  must_insert_reserved is
111  	 * used to flag a delayed ref so the accounting can be updated
112  	 * when a full insert is done.
113  	 *
114  	 * It is possible the extent will be freed before it is ever
115  	 * inserted into the extent allocation tree.  In this case
116  	 * we need to update the in ram accounting to properly reflect
117  	 * the free has happened.
118  	 */
119  	bool must_insert_reserved;
120  	bool is_data;
121  	bool is_system;
122  	bool processing;
123  };
124  
125  struct btrfs_delayed_tree_ref {
126  	struct btrfs_delayed_ref_node node;
127  	u64 root;
128  	u64 parent;
129  	int level;
130  };
131  
132  struct btrfs_delayed_data_ref {
133  	struct btrfs_delayed_ref_node node;
134  	u64 root;
135  	u64 parent;
136  	u64 objectid;
137  	u64 offset;
138  };
139  
140  enum btrfs_delayed_ref_flags {
141  	/* Indicate that we are flushing delayed refs for the commit */
142  	BTRFS_DELAYED_REFS_FLUSHING,
143  };
144  
145  struct btrfs_delayed_ref_root {
146  	/* head ref rbtree */
147  	struct rb_root_cached href_root;
148  
149  	/* dirty extent records */
150  	struct rb_root dirty_extent_root;
151  
152  	/* this spin lock protects the rbtree and the entries inside */
153  	spinlock_t lock;
154  
155  	/* how many delayed ref updates we've queued, used by the
156  	 * throttling code
157  	 */
158  	atomic_t num_entries;
159  
160  	/* total number of head nodes in tree */
161  	unsigned long num_heads;
162  
163  	/* total number of head nodes ready for processing */
164  	unsigned long num_heads_ready;
165  
166  	u64 pending_csums;
167  
168  	unsigned long flags;
169  
170  	u64 run_delayed_start;
171  
172  	/*
173  	 * To make qgroup to skip given root.
174  	 * This is for snapshot, as btrfs_qgroup_inherit() will manually
175  	 * modify counters for snapshot and its source, so we should skip
176  	 * the snapshot in new_root/old_roots or it will get calculated twice
177  	 */
178  	u64 qgroup_to_skip;
179  };
180  
181  enum btrfs_ref_type {
182  	BTRFS_REF_NOT_SET,
183  	BTRFS_REF_DATA,
184  	BTRFS_REF_METADATA,
185  	BTRFS_REF_LAST,
186  };
187  
188  struct btrfs_data_ref {
189  	/* For EXTENT_DATA_REF */
190  
191  	/* Original root this data extent belongs to */
192  	u64 owning_root;
193  
194  	/* Inode which refers to this data extent */
195  	u64 ino;
196  
197  	/*
198  	 * file_offset - extent_offset
199  	 *
200  	 * file_offset is the key.offset of the EXTENT_DATA key.
201  	 * extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
202  	 */
203  	u64 offset;
204  };
205  
206  struct btrfs_tree_ref {
207  	/*
208  	 * Level of this tree block
209  	 *
210  	 * Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
211  	 */
212  	int level;
213  
214  	/*
215  	 * Root which owns this tree block.
216  	 *
217  	 * For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
218  	 */
219  	u64 owning_root;
220  
221  	/* For non-skinny metadata, no special member needed */
222  };
223  
224  struct btrfs_ref {
225  	enum btrfs_ref_type type;
226  	int action;
227  
228  	/*
229  	 * Whether this extent should go through qgroup record.
230  	 *
231  	 * Normally false, but for certain cases like delayed subtree scan,
232  	 * setting this flag can hugely reduce qgroup overhead.
233  	 */
234  	bool skip_qgroup;
235  
236  #ifdef CONFIG_BTRFS_FS_REF_VERIFY
237  	/* Through which root is this modification. */
238  	u64 real_root;
239  #endif
240  	u64 bytenr;
241  	u64 len;
242  
243  	/* Bytenr of the parent tree block */
244  	u64 parent;
245  	union {
246  		struct btrfs_data_ref data_ref;
247  		struct btrfs_tree_ref tree_ref;
248  	};
249  };
250  
251  extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
252  extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
253  extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
254  extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
255  
256  int __init btrfs_delayed_ref_init(void);
257  void __cold btrfs_delayed_ref_exit(void);
258  
btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info * fs_info,int num_delayed_refs)259  static inline u64 btrfs_calc_delayed_ref_bytes(const struct btrfs_fs_info *fs_info,
260  					       int num_delayed_refs)
261  {
262  	u64 num_bytes;
263  
264  	num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_delayed_refs);
265  
266  	/*
267  	 * We have to check the mount option here because we could be enabling
268  	 * the free space tree for the first time and don't have the compat_ro
269  	 * option set yet.
270  	 *
271  	 * We need extra reservations if we have the free space tree because
272  	 * we'll have to modify that tree as well.
273  	 */
274  	if (btrfs_test_opt(fs_info, FREE_SPACE_TREE))
275  		num_bytes *= 2;
276  
277  	return num_bytes;
278  }
279  
btrfs_init_generic_ref(struct btrfs_ref * generic_ref,int action,u64 bytenr,u64 len,u64 parent)280  static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
281  				int action, u64 bytenr, u64 len, u64 parent)
282  {
283  	generic_ref->action = action;
284  	generic_ref->bytenr = bytenr;
285  	generic_ref->len = len;
286  	generic_ref->parent = parent;
287  }
288  
btrfs_init_tree_ref(struct btrfs_ref * generic_ref,int level,u64 root,u64 mod_root,bool skip_qgroup)289  static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
290  				int level, u64 root, u64 mod_root, bool skip_qgroup)
291  {
292  #ifdef CONFIG_BTRFS_FS_REF_VERIFY
293  	/* If @real_root not set, use @root as fallback */
294  	generic_ref->real_root = mod_root ?: root;
295  #endif
296  	generic_ref->tree_ref.level = level;
297  	generic_ref->tree_ref.owning_root = root;
298  	generic_ref->type = BTRFS_REF_METADATA;
299  	if (skip_qgroup || !(is_fstree(root) &&
300  			     (!mod_root || is_fstree(mod_root))))
301  		generic_ref->skip_qgroup = true;
302  	else
303  		generic_ref->skip_qgroup = false;
304  
305  }
306  
btrfs_init_data_ref(struct btrfs_ref * generic_ref,u64 ref_root,u64 ino,u64 offset,u64 mod_root,bool skip_qgroup)307  static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
308  				u64 ref_root, u64 ino, u64 offset, u64 mod_root,
309  				bool skip_qgroup)
310  {
311  #ifdef CONFIG_BTRFS_FS_REF_VERIFY
312  	/* If @real_root not set, use @root as fallback */
313  	generic_ref->real_root = mod_root ?: ref_root;
314  #endif
315  	generic_ref->data_ref.owning_root = ref_root;
316  	generic_ref->data_ref.ino = ino;
317  	generic_ref->data_ref.offset = offset;
318  	generic_ref->type = BTRFS_REF_DATA;
319  	if (skip_qgroup || !(is_fstree(ref_root) &&
320  			     (!mod_root || is_fstree(mod_root))))
321  		generic_ref->skip_qgroup = true;
322  	else
323  		generic_ref->skip_qgroup = false;
324  }
325  
326  static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)327  btrfs_alloc_delayed_extent_op(void)
328  {
329  	return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
330  }
331  
332  static inline void
btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op * op)333  btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
334  {
335  	if (op)
336  		kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
337  }
338  
btrfs_put_delayed_ref(struct btrfs_delayed_ref_node * ref)339  static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
340  {
341  	WARN_ON(refcount_read(&ref->refs) == 0);
342  	if (refcount_dec_and_test(&ref->refs)) {
343  		WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
344  		switch (ref->type) {
345  		case BTRFS_TREE_BLOCK_REF_KEY:
346  		case BTRFS_SHARED_BLOCK_REF_KEY:
347  			kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
348  			break;
349  		case BTRFS_EXTENT_DATA_REF_KEY:
350  		case BTRFS_SHARED_DATA_REF_KEY:
351  			kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
352  			break;
353  		default:
354  			BUG();
355  		}
356  	}
357  }
358  
btrfs_ref_head_to_space_flags(struct btrfs_delayed_ref_head * head_ref)359  static inline u64 btrfs_ref_head_to_space_flags(
360  				struct btrfs_delayed_ref_head *head_ref)
361  {
362  	if (head_ref->is_data)
363  		return BTRFS_BLOCK_GROUP_DATA;
364  	else if (head_ref->is_system)
365  		return BTRFS_BLOCK_GROUP_SYSTEM;
366  	return BTRFS_BLOCK_GROUP_METADATA;
367  }
368  
btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head * head)369  static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head)
370  {
371  	if (refcount_dec_and_test(&head->refs))
372  		kmem_cache_free(btrfs_delayed_ref_head_cachep, head);
373  }
374  
375  int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
376  			       struct btrfs_ref *generic_ref,
377  			       struct btrfs_delayed_extent_op *extent_op);
378  int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
379  			       struct btrfs_ref *generic_ref,
380  			       u64 reserved);
381  int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
382  				u64 bytenr, u64 num_bytes,
383  				struct btrfs_delayed_extent_op *extent_op);
384  void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
385  			      struct btrfs_delayed_ref_root *delayed_refs,
386  			      struct btrfs_delayed_ref_head *head);
387  
388  struct btrfs_delayed_ref_head *
389  btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
390  			    u64 bytenr);
391  int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
392  			   struct btrfs_delayed_ref_head *head);
btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head * head)393  static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
394  {
395  	mutex_unlock(&head->mutex);
396  }
397  void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
398  			   struct btrfs_delayed_ref_head *head);
399  
400  struct btrfs_delayed_ref_head *btrfs_select_ref_head(
401  		struct btrfs_delayed_ref_root *delayed_refs);
402  
403  int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
404  
405  void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
406  void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
407  int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
408  				  enum btrfs_reserve_flush_enum flush);
409  void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
410  				       u64 num_bytes);
411  bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
412  
413  /*
414   * helper functions to cast a node into its container
415   */
416  static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node * node)417  btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
418  {
419  	return container_of(node, struct btrfs_delayed_tree_ref, node);
420  }
421  
422  static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node * node)423  btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
424  {
425  	return container_of(node, struct btrfs_delayed_data_ref, node);
426  }
427  
428  #endif
429