xref: /openbmc/linux/fs/ext4/mballoc.h (revision fd589a8f)
1 /*
2  *  fs/ext4/mballoc.h
3  *
4  *  Written by: Alex Tomas <alex@clusterfs.com>
5  *
6  */
7 #ifndef _EXT4_MBALLOC_H
8 #define _EXT4_MBALLOC_H
9 
10 #include <linux/time.h>
11 #include <linux/fs.h>
12 #include <linux/namei.h>
13 #include <linux/quotaops.h>
14 #include <linux/buffer_head.h>
15 #include <linux/module.h>
16 #include <linux/swap.h>
17 #include <linux/proc_fs.h>
18 #include <linux/pagemap.h>
19 #include <linux/seq_file.h>
20 #include <linux/version.h>
21 #include <linux/blkdev.h>
22 #include <linux/mutex.h>
23 #include "ext4_jbd2.h"
24 #include "ext4.h"
25 
26 /*
27  * with AGGRESSIVE_CHECK allocator runs consistency checks over
28  * structures. these checks slow things down a lot
29  */
30 #define AGGRESSIVE_CHECK__
31 
32 /*
33  * with DOUBLE_CHECK defined mballoc creates persistent in-core
34  * bitmaps, maintains and uses them to check for double allocations
35  */
36 #define DOUBLE_CHECK__
37 
38 /*
39  */
40 #ifdef CONFIG_EXT4_DEBUG
41 extern u8 mb_enable_debug;
42 
43 #define mb_debug(n, fmt, a...)	                                        \
44 	do {								\
45 		if ((n) <= mb_enable_debug) {		        	\
46 			printk(KERN_DEBUG "(%s, %d): %s: ",		\
47 			       __FILE__, __LINE__, __func__);		\
48 			printk(fmt, ## a);				\
49 		}							\
50 	} while (0)
51 #else
52 #define mb_debug(n, fmt, a...)
53 #endif
54 
55 /*
56  * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
57  * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
58  */
59 #define EXT4_MB_HISTORY
60 #define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
61 #define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
62 #define EXT4_MB_HISTORY_DISCARD		4	/* preallocation discarded */
63 #define EXT4_MB_HISTORY_FREE		8	/* free */
64 
65 #define EXT4_MB_HISTORY_DEFAULT		(EXT4_MB_HISTORY_ALLOC | \
66 					 EXT4_MB_HISTORY_PREALLOC)
67 
68 /*
69  * How long mballoc can look for a best extent (in found extents)
70  */
71 #define MB_DEFAULT_MAX_TO_SCAN		200
72 
73 /*
74  * How long mballoc must look for a best extent
75  */
76 #define MB_DEFAULT_MIN_TO_SCAN		10
77 
78 /*
79  * How many groups mballoc will scan looking for the best chunk
80  */
81 #define MB_DEFAULT_MAX_GROUPS_TO_SCAN	5
82 
83 /*
84  * with 'ext4_mb_stats' allocator will collect stats that will be
85  * shown at umount. The collecting costs though!
86  */
87 #define MB_DEFAULT_STATS		1
88 
89 /*
90  * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
91  * by the stream allocator, which purpose is to pack requests
92  * as close each to other as possible to produce smooth I/O traffic
93  * We use locality group prealloc space for stream request.
94  * We can tune the same via /proc/fs/ext4/<parition>/stream_req
95  */
96 #define MB_DEFAULT_STREAM_THRESHOLD	16	/* 64K */
97 
98 /*
99  * for which requests use 2^N search using buddies
100  */
101 #define MB_DEFAULT_ORDER2_REQS		2
102 
103 /*
104  * default group prealloc size 512 blocks
105  */
106 #define MB_DEFAULT_GROUP_PREALLOC	512
107 
108 
109 struct ext4_free_data {
110 	/* this links the free block information from group_info */
111 	struct rb_node node;
112 
113 	/* this links the free block information from ext4_sb_info */
114 	struct list_head list;
115 
116 	/* group which free block extent belongs */
117 	ext4_group_t group;
118 
119 	/* free block extent */
120 	ext4_grpblk_t start_blk;
121 	ext4_grpblk_t count;
122 
123 	/* transaction which freed this extent */
124 	tid_t	t_tid;
125 };
126 
127 struct ext4_prealloc_space {
128 	struct list_head	pa_inode_list;
129 	struct list_head	pa_group_list;
130 	union {
131 		struct list_head pa_tmp_list;
132 		struct rcu_head	pa_rcu;
133 	} u;
134 	spinlock_t		pa_lock;
135 	atomic_t		pa_count;
136 	unsigned		pa_deleted;
137 	ext4_fsblk_t		pa_pstart;	/* phys. block */
138 	ext4_lblk_t		pa_lstart;	/* log. block */
139 	ext4_grpblk_t		pa_len;		/* len of preallocated chunk */
140 	ext4_grpblk_t		pa_free;	/* how many blocks are free */
141 	unsigned short		pa_type;	/* pa type. inode or group */
142 	spinlock_t		*pa_obj_lock;
143 	struct inode		*pa_inode;	/* hack, for history only */
144 };
145 
146 enum {
147 	MB_INODE_PA = 0,
148 	MB_GROUP_PA = 1
149 };
150 
151 struct ext4_free_extent {
152 	ext4_lblk_t fe_logical;
153 	ext4_grpblk_t fe_start;
154 	ext4_group_t fe_group;
155 	ext4_grpblk_t fe_len;
156 };
157 
158 /*
159  * Locality group:
160  *   we try to group all related changes together
161  *   so that writeback can flush/allocate them together as well
162  *   Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
163  *   (512). We store prealloc space into the hash based on the pa_free blocks
164  *   order value.ie, fls(pa_free)-1;
165  */
166 #define PREALLOC_TB_SIZE 10
167 struct ext4_locality_group {
168 	/* for allocator */
169 	/* to serialize allocates */
170 	struct mutex		lg_mutex;
171 	/* list of preallocations */
172 	struct list_head	lg_prealloc_list[PREALLOC_TB_SIZE];
173 	spinlock_t		lg_prealloc_lock;
174 };
175 
176 struct ext4_allocation_context {
177 	struct inode *ac_inode;
178 	struct super_block *ac_sb;
179 
180 	/* original request */
181 	struct ext4_free_extent ac_o_ex;
182 
183 	/* goal request (after normalization) */
184 	struct ext4_free_extent ac_g_ex;
185 
186 	/* the best found extent */
187 	struct ext4_free_extent ac_b_ex;
188 
189 	/* copy of the bext found extent taken before preallocation efforts */
190 	struct ext4_free_extent ac_f_ex;
191 
192 	/* number of iterations done. we have to track to limit searching */
193 	unsigned long ac_ex_scanned;
194 	__u16 ac_groups_scanned;
195 	__u16 ac_found;
196 	__u16 ac_tail;
197 	__u16 ac_buddy;
198 	__u16 ac_flags;		/* allocation hints */
199 	__u8 ac_status;
200 	__u8 ac_criteria;
201 	__u8 ac_repeats;
202 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
203 				 * N > 0, the field stores N, otherwise 0 */
204 	__u8 ac_op;		/* operation, for history only */
205 	struct page *ac_bitmap_page;
206 	struct page *ac_buddy_page;
207 	/*
208 	 * pointer to the held semaphore upon successful
209 	 * block allocation
210 	 */
211 	struct rw_semaphore *alloc_semp;
212 	struct ext4_prealloc_space *ac_pa;
213 	struct ext4_locality_group *ac_lg;
214 };
215 
216 #define AC_STATUS_CONTINUE	1
217 #define AC_STATUS_FOUND		2
218 #define AC_STATUS_BREAK		3
219 
220 struct ext4_mb_history {
221 	struct ext4_free_extent orig;	/* orig allocation */
222 	struct ext4_free_extent goal;	/* goal allocation */
223 	struct ext4_free_extent result;	/* result allocation */
224 	unsigned pid;
225 	unsigned ino;
226 	__u16 found;	/* how many extents have been found */
227 	__u16 groups;	/* how many groups have been scanned */
228 	__u16 tail;	/* what tail broke some buddy */
229 	__u16 buddy;	/* buddy the tail ^^^ broke */
230 	__u16 flags;
231 	__u8 cr:3;	/* which phase the result extent was found at */
232 	__u8 op:4;
233 	__u8 merged:1;
234 };
235 
236 struct ext4_buddy {
237 	struct page *bd_buddy_page;
238 	void *bd_buddy;
239 	struct page *bd_bitmap_page;
240 	void *bd_bitmap;
241 	struct ext4_group_info *bd_info;
242 	struct super_block *bd_sb;
243 	__u16 bd_blkbits;
244 	ext4_group_t bd_group;
245 	struct rw_semaphore *alloc_semp;
246 };
247 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
248 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
249 
250 #ifndef EXT4_MB_HISTORY
251 static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
252 {
253 	return;
254 }
255 #endif
256 
257 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
258 
259 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
260 					struct ext4_free_extent *fex)
261 {
262 	ext4_fsblk_t block;
263 
264 	block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
265 			+ fex->fe_start
266 			+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
267 	return block;
268 }
269 #endif
270