1 /* 2 * Copyright (C) 2014 Facebook. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #ifndef __BTRFS_QGROUP__ 20 #define __BTRFS_QGROUP__ 21 22 #include "ulist.h" 23 #include "delayed-ref.h" 24 25 /* 26 * Btrfs qgroup overview 27 * 28 * Btrfs qgroup splits into 3 main part: 29 * 1) Reserve 30 * Reserve metadata/data space for incoming operations 31 * Affect how qgroup limit works 32 * 33 * 2) Trace 34 * Tell btrfs qgroup to trace dirty extents. 35 * 36 * Dirty extents including: 37 * - Newly allocated extents 38 * - Extents going to be deleted (in this trans) 39 * - Extents whose owner is going to be modified 40 * 41 * This is the main part affects whether qgroup numbers will stay 42 * consistent. 43 * Btrfs qgroup can trace clean extents and won't cause any problem, 44 * but it will consume extra CPU time, it should be avoided if possible. 45 * 46 * 3) Account 47 * Btrfs qgroup will updates its numbers, based on dirty extents traced 48 * in previous step. 49 * 50 * Normally at qgroup rescan and transaction commit time. 51 */ 52 53 /* 54 * Record a dirty extent, and info qgroup to update quota on it 55 * TODO: Use kmem cache to alloc it. 56 */ 57 struct btrfs_qgroup_extent_record { 58 struct rb_node node; 59 u64 bytenr; 60 u64 num_bytes; 61 struct ulist *old_roots; 62 }; 63 64 /* 65 * Qgroup reservation types: 66 * 67 * DATA: 68 * space reserved for data 69 * 70 * META_PERTRANS: 71 * Space reserved for metadata (per-transaction) 72 * Due to the fact that qgroup data is only updated at transaction commit 73 * time, reserved space for metadata must be kept until transaction 74 * commits. 75 * Any metadata reserved that are used in btrfs_start_transaction() should 76 * be of this type. 77 * 78 * META_PREALLOC: 79 * There are cases where metadata space is reserved before starting 80 * transaction, and then btrfs_join_transaction() to get a trans handle. 81 * Any metadata reserved for such usage should be of this type. 82 * And after join_transaction() part (or all) of such reservation should 83 * be converted into META_PERTRANS. 84 */ 85 enum btrfs_qgroup_rsv_type { 86 BTRFS_QGROUP_RSV_DATA = 0, 87 BTRFS_QGROUP_RSV_META_PERTRANS, 88 BTRFS_QGROUP_RSV_META_PREALLOC, 89 BTRFS_QGROUP_RSV_LAST, 90 }; 91 92 /* 93 * Represents how many bytes we have reserved for this qgroup. 94 * 95 * Each type should have different reservation behavior. 96 * E.g, data follows its io_tree flag modification, while 97 * *currently* meta is just reserve-and-clear during transcation. 98 * 99 * TODO: Add new type for reservation which can survive transaction commit. 100 * Currect metadata reservation behavior is not suitable for such case. 101 */ 102 struct btrfs_qgroup_rsv { 103 u64 values[BTRFS_QGROUP_RSV_LAST]; 104 }; 105 106 /* 107 * one struct for each qgroup, organized in fs_info->qgroup_tree. 108 */ 109 struct btrfs_qgroup { 110 u64 qgroupid; 111 112 /* 113 * state 114 */ 115 u64 rfer; /* referenced */ 116 u64 rfer_cmpr; /* referenced compressed */ 117 u64 excl; /* exclusive */ 118 u64 excl_cmpr; /* exclusive compressed */ 119 120 /* 121 * limits 122 */ 123 u64 lim_flags; /* which limits are set */ 124 u64 max_rfer; 125 u64 max_excl; 126 u64 rsv_rfer; 127 u64 rsv_excl; 128 129 /* 130 * reservation tracking 131 */ 132 struct btrfs_qgroup_rsv rsv; 133 134 /* 135 * lists 136 */ 137 struct list_head groups; /* groups this group is member of */ 138 struct list_head members; /* groups that are members of this group */ 139 struct list_head dirty; /* dirty groups */ 140 struct rb_node node; /* tree of qgroups */ 141 142 /* 143 * temp variables for accounting operations 144 * Refer to qgroup_shared_accounting() for details. 145 */ 146 u64 old_refcnt; 147 u64 new_refcnt; 148 }; 149 150 /* 151 * For qgroup event trace points only 152 */ 153 #define QGROUP_RESERVE (1<<0) 154 #define QGROUP_RELEASE (1<<1) 155 #define QGROUP_FREE (1<<2) 156 157 int btrfs_quota_enable(struct btrfs_trans_handle *trans, 158 struct btrfs_fs_info *fs_info); 159 int btrfs_quota_disable(struct btrfs_trans_handle *trans, 160 struct btrfs_fs_info *fs_info); 161 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); 162 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); 163 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, 164 bool interruptible); 165 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, 166 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 167 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, 168 struct btrfs_fs_info *fs_info, u64 src, u64 dst); 169 int btrfs_create_qgroup(struct btrfs_trans_handle *trans, 170 struct btrfs_fs_info *fs_info, u64 qgroupid); 171 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, 172 struct btrfs_fs_info *fs_info, u64 qgroupid); 173 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, 174 struct btrfs_fs_info *fs_info, u64 qgroupid, 175 struct btrfs_qgroup_limit *limit); 176 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info); 177 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); 178 struct btrfs_delayed_extent_op; 179 180 /* 181 * Inform qgroup to trace one dirty extent, its info is recorded in @record. 182 * So qgroup can account it at transaction committing time. 183 * 184 * No lock version, caller must acquire delayed ref lock and allocated memory, 185 * then call btrfs_qgroup_trace_extent_post() after exiting lock context. 186 * 187 * Return 0 for success insert 188 * Return >0 for existing record, caller can free @record safely. 189 * Error is not possible 190 */ 191 int btrfs_qgroup_trace_extent_nolock( 192 struct btrfs_fs_info *fs_info, 193 struct btrfs_delayed_ref_root *delayed_refs, 194 struct btrfs_qgroup_extent_record *record); 195 196 /* 197 * Post handler after qgroup_trace_extent_nolock(). 198 * 199 * NOTE: Current qgroup does the expensive backref walk at transaction 200 * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming 201 * new transaction. 202 * This is designed to allow btrfs_find_all_roots() to get correct new_roots 203 * result. 204 * 205 * However for old_roots there is no need to do backref walk at that time, 206 * since we search commit roots to walk backref and result will always be 207 * correct. 208 * 209 * Due to the nature of no lock version, we can't do backref there. 210 * So we must call btrfs_qgroup_trace_extent_post() after exiting 211 * spinlock context. 212 * 213 * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result 214 * using current root, then we can move all expensive backref walk out of 215 * transaction committing, but not now as qgroup accounting will be wrong again. 216 */ 217 int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, 218 struct btrfs_qgroup_extent_record *qrecord); 219 220 /* 221 * Inform qgroup to trace one dirty extent, specified by @bytenr and 222 * @num_bytes. 223 * So qgroup can account it at commit trans time. 224 * 225 * Better encapsulated version, with memory allocation and backref walk for 226 * commit roots. 227 * So this can sleep. 228 * 229 * Return 0 if the operation is done. 230 * Return <0 for error, like memory allocation failure or invalid parameter 231 * (NULL trans) 232 */ 233 int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, 234 struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, 235 gfp_t gfp_flag); 236 237 /* 238 * Inform qgroup to trace all leaf items of data 239 * 240 * Return 0 for success 241 * Return <0 for error(ENOMEM) 242 */ 243 int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, 244 struct btrfs_fs_info *fs_info, 245 struct extent_buffer *eb); 246 /* 247 * Inform qgroup to trace a whole subtree, including all its child tree 248 * blocks and data. 249 * The root tree block is specified by @root_eb. 250 * 251 * Normally used by relocation(tree block swap) and subvolume deletion. 252 * 253 * Return 0 for success 254 * Return <0 for error(ENOMEM or tree search error) 255 */ 256 int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, 257 struct btrfs_root *root, 258 struct extent_buffer *root_eb, 259 u64 root_gen, int root_level); 260 int 261 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, 262 struct btrfs_fs_info *fs_info, 263 u64 bytenr, u64 num_bytes, 264 struct ulist *old_roots, struct ulist *new_roots); 265 int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans); 266 int btrfs_run_qgroups(struct btrfs_trans_handle *trans, 267 struct btrfs_fs_info *fs_info); 268 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, 269 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid, 270 struct btrfs_qgroup_inherit *inherit); 271 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info, 272 u64 ref_root, u64 num_bytes, 273 enum btrfs_qgroup_rsv_type type); 274 static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info, 275 u64 ref_root, u64 num_bytes) 276 { 277 trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes); 278 btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes, 279 BTRFS_QGROUP_RSV_DATA); 280 } 281 282 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 283 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid, 284 u64 rfer, u64 excl); 285 #endif 286 287 /* New io_tree based accurate qgroup reserve API */ 288 int btrfs_qgroup_reserve_data(struct inode *inode, 289 struct extent_changeset **reserved, u64 start, u64 len); 290 int btrfs_qgroup_release_data(struct inode *inode, u64 start, u64 len); 291 int btrfs_qgroup_free_data(struct inode *inode, 292 struct extent_changeset *reserved, u64 start, u64 len); 293 294 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, 295 enum btrfs_qgroup_rsv_type type, bool enforce); 296 /* Reserve metadata space for pertrans and prealloc type */ 297 static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root, 298 int num_bytes, bool enforce) 299 { 300 return __btrfs_qgroup_reserve_meta(root, num_bytes, 301 BTRFS_QGROUP_RSV_META_PERTRANS, enforce); 302 } 303 static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root, 304 int num_bytes, bool enforce) 305 { 306 return __btrfs_qgroup_reserve_meta(root, num_bytes, 307 BTRFS_QGROUP_RSV_META_PREALLOC, enforce); 308 } 309 310 void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes, 311 enum btrfs_qgroup_rsv_type type); 312 313 /* Free per-transaction meta reservation for error handling */ 314 static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root, 315 int num_bytes) 316 { 317 __btrfs_qgroup_free_meta(root, num_bytes, 318 BTRFS_QGROUP_RSV_META_PERTRANS); 319 } 320 321 /* Pre-allocated meta reservation can be freed at need */ 322 static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root, 323 int num_bytes) 324 { 325 __btrfs_qgroup_free_meta(root, num_bytes, 326 BTRFS_QGROUP_RSV_META_PREALLOC); 327 } 328 329 /* 330 * Per-transaction meta reservation should be all freed at transaction commit 331 * time 332 */ 333 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root); 334 335 /* 336 * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS. 337 * 338 * This is called when preallocated meta reservation needs to be used. 339 * Normally after btrfs_join_transaction() call. 340 */ 341 void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes); 342 343 void btrfs_qgroup_check_reserved_leak(struct inode *inode); 344 #endif /* __BTRFS_QGROUP__ */ 345